Skip to content

Evaluation

physXAI.evaluation.metrics

Classes

Metrics

A class to calculate and store regression metrics (MSE, RMSE, R2) for training, validation, and test datasets.

Source code in physXAI/evaluation/metrics.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
class Metrics:
    """
    A class to calculate and store regression metrics (MSE, RMSE, R2)
    for training, validation, and test datasets.
    """

    print_evaluate = True

    def __init__(self, td: TrainingDataGeneric):
        """
                Initializes the Metrics object by calculating metrics for train, validation (if available),
                and test sets.

                Args:
                    td (TrainingDataGeneric): An object containing the training data.
        """

        self.train_kpis = self.evaluate(td.y_train_single, td.y_train_pred, label='Train')
        if td.y_val_single is not None:
            self.val_kpis = self.evaluate(td.y_val_single, td.y_val_pred, label='Val')
        else:
            self.val_kpis = None
        self.test_kpis = self.evaluate(td.y_test_single, td.y_test_pred, label='Test')

    @staticmethod
    def evaluate(y_true: np.ndarray, y_pred: np.ndarray, label: str = '') -> dict[str, float]:
        """
        Calculates Mean Squared Error (MSE), Root Mean Squared Error (RMSE), and R-squared (R2) score.

        Args:
            y_true (np.ndarray): The true target values.
            y_pred (np.ndarray): The predicted target values.
            label (str, optional): A label for the dataset being evaluated (e.g., 'Train', 'Test').
                                   Defaults to ''.

        Returns:
            dict: A dict containing MSE, RMSE, and R2 scores.
        """

        mse = mean_squared_error(y_true, y_pred)
        rmse = math.sqrt(mse)
        r2 = r2_score(y_true, y_pred)

        kpis = dict()
        kpis['MSE' + ' ' + label] = mse
        kpis['RMSE' + ' ' + label] = rmse
        kpis['R2' + ' ' + label] = r2

        if Metrics.print_evaluate:
            # print(f"{label} MSE: {mse:.2f}")
            print(f"{label} RMSE: {rmse:.2f}")
            print(f"{label} R2: {r2:.2f}")

        return kpis

    def get_metrics(self, nround: int = 2) -> (list[str], list[float]):
        """
          Returns a list of metric labels and their corresponding rounded values.

          Args:
              nround (int, optional): The number of decimal places to round the metric values to.
                                      Defaults to 2.

          Returns:
              tuple[list[str], list[float]]: A tuple containing a list of metric labels and a list of
                                             their corresponding rounded values.
        """

        kpis = self.train_kpis
        if self.val_kpis is not None:
            kpis = kpis | self.val_kpis
        kpis = kpis | self.test_kpis
        return list(kpis.keys()), [round(v, nround) for v in kpis.values()]

    def get_config(self) -> dict:
        return {
            'train_kpis': self.train_kpis,
            'val_kpis': self.val_kpis,
            'test_kpis': self.test_kpis,
        }
Attributes
print_evaluate = True class-attribute instance-attribute
train_kpis = self.evaluate(td.y_train_single, td.y_train_pred, label='Train') instance-attribute
val_kpis = self.evaluate(td.y_val_single, td.y_val_pred, label='Val') instance-attribute
test_kpis = self.evaluate(td.y_test_single, td.y_test_pred, label='Test') instance-attribute
Functions
__init__(td: TrainingDataGeneric)

Initializes the Metrics object by calculating metrics for train, validation (if available), and test sets.

Parameters:

Name Type Description Default
td TrainingDataGeneric

An object containing the training data.

required
Source code in physXAI/evaluation/metrics.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
def __init__(self, td: TrainingDataGeneric):
    """
            Initializes the Metrics object by calculating metrics for train, validation (if available),
            and test sets.

            Args:
                td (TrainingDataGeneric): An object containing the training data.
    """

    self.train_kpis = self.evaluate(td.y_train_single, td.y_train_pred, label='Train')
    if td.y_val_single is not None:
        self.val_kpis = self.evaluate(td.y_val_single, td.y_val_pred, label='Val')
    else:
        self.val_kpis = None
    self.test_kpis = self.evaluate(td.y_test_single, td.y_test_pred, label='Test')
evaluate(y_true: np.ndarray, y_pred: np.ndarray, label: str = '') -> dict[str, float] staticmethod

Calculates Mean Squared Error (MSE), Root Mean Squared Error (RMSE), and R-squared (R2) score.

Parameters:

Name Type Description Default
y_true ndarray

The true target values.

required
y_pred ndarray

The predicted target values.

required
label str

A label for the dataset being evaluated (e.g., 'Train', 'Test'). Defaults to ''.

''

Returns:

Name Type Description
dict dict[str, float]

A dict containing MSE, RMSE, and R2 scores.

Source code in physXAI/evaluation/metrics.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
@staticmethod
def evaluate(y_true: np.ndarray, y_pred: np.ndarray, label: str = '') -> dict[str, float]:
    """
    Calculates Mean Squared Error (MSE), Root Mean Squared Error (RMSE), and R-squared (R2) score.

    Args:
        y_true (np.ndarray): The true target values.
        y_pred (np.ndarray): The predicted target values.
        label (str, optional): A label for the dataset being evaluated (e.g., 'Train', 'Test').
                               Defaults to ''.

    Returns:
        dict: A dict containing MSE, RMSE, and R2 scores.
    """

    mse = mean_squared_error(y_true, y_pred)
    rmse = math.sqrt(mse)
    r2 = r2_score(y_true, y_pred)

    kpis = dict()
    kpis['MSE' + ' ' + label] = mse
    kpis['RMSE' + ' ' + label] = rmse
    kpis['R2' + ' ' + label] = r2

    if Metrics.print_evaluate:
        # print(f"{label} MSE: {mse:.2f}")
        print(f"{label} RMSE: {rmse:.2f}")
        print(f"{label} R2: {r2:.2f}")

    return kpis
get_metrics(nround: int = 2) -> (list[str], list[float])

Returns a list of metric labels and their corresponding rounded values.

Parameters:

Name Type Description Default
nround int

The number of decimal places to round the metric values to. Defaults to 2.

2

Returns:

Type Description
(list[str], list[float])

tuple[list[str], list[float]]: A tuple containing a list of metric labels and a list of their corresponding rounded values.

Source code in physXAI/evaluation/metrics.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def get_metrics(self, nround: int = 2) -> (list[str], list[float]):
    """
      Returns a list of metric labels and their corresponding rounded values.

      Args:
          nround (int, optional): The number of decimal places to round the metric values to.
                                  Defaults to 2.

      Returns:
          tuple[list[str], list[float]]: A tuple containing a list of metric labels and a list of
                                         their corresponding rounded values.
    """

    kpis = self.train_kpis
    if self.val_kpis is not None:
        kpis = kpis | self.val_kpis
    kpis = kpis | self.test_kpis
    return list(kpis.keys()), [round(v, nround) for v in kpis.values()]
get_config() -> dict
Source code in physXAI/evaluation/metrics.py
81
82
83
84
85
86
def get_config(self) -> dict:
    return {
        'train_kpis': self.train_kpis,
        'val_kpis': self.val_kpis,
        'test_kpis': self.test_kpis,
    }

MetricsPINN

Bases: Metrics

A class to calculate and store metrics for Physics-Informed Neural Networks (PINNs). It evaluates performance using a list of provided loss functions.

Source code in physXAI/evaluation/metrics.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
class MetricsPINN(Metrics):
    """
    A class to calculate and store metrics for Physics-Informed Neural Networks (PINNs).
    It evaluates performance using a list of provided loss functions.
    """

    def __init__(self, td: TrainingDataGeneric, pinn_losses: list):
        """
        Initializes the MetricsPINN object by calculating metrics for train,
        validation (if available), and test sets using specified PINN loss functions.

        Args:
            td (TrainingData): An object containing the true and predicted values.
            pinn_losses (list): A list of loss functions to be used for evaluation.
        """

        self.train_kpis = self.evaluate(td.y_train_single, td.y_train_pred_single, label='Train',
                                        pinn_losses=pinn_losses)
        if td.y_val is not None:
            self.val_kpis = self.evaluate(td.y_val_single, td.y_val_pred_single, label='Val', pinn_losses=pinn_losses)
        else:
            self.val_kpis = None
        self.test_kpis = self.evaluate(td.y_test_single, td.y_test_pred_single, label='Test', pinn_losses=pinn_losses)

    @staticmethod
    def evaluate(y_true: np.ndarray, y_pred: np.ndarray, label: str = '', **kwargs) -> dict[str, float]:
        """
        Calculates metrics based on the provided list of PINN loss functions.

        Args:
            y_true (np.ndarray): The true target values.
            y_pred (np.ndarray): The predicted target values.
            label (str, optional): A label for the dataset being evaluated (e.g., 'Train', 'Test').
                                   Defaults to ''.

        Returns:
            dict: A dictionary where keys are loss function names appended with the label,
                  and values are the calculated loss values.
        """

        kpis = dict()
        for loss in kwargs['pinn_losses']:
            val = float(loss(y_true, y_pred))
            kpis[loss.__name__ + ' ' + label] = val
            print(f"{loss.__name__ + ' ' + label}: {val:.2f}")

        return kpis
Attributes
train_kpis = self.evaluate(td.y_train_single, td.y_train_pred_single, label='Train', pinn_losses=pinn_losses) instance-attribute
val_kpis = self.evaluate(td.y_val_single, td.y_val_pred_single, label='Val', pinn_losses=pinn_losses) instance-attribute
test_kpis = self.evaluate(td.y_test_single, td.y_test_pred_single, label='Test', pinn_losses=pinn_losses) instance-attribute
Functions
__init__(td: TrainingDataGeneric, pinn_losses: list)

Initializes the MetricsPINN object by calculating metrics for train, validation (if available), and test sets using specified PINN loss functions.

Parameters:

Name Type Description Default
td TrainingData

An object containing the true and predicted values.

required
pinn_losses list

A list of loss functions to be used for evaluation.

required
Source code in physXAI/evaluation/metrics.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def __init__(self, td: TrainingDataGeneric, pinn_losses: list):
    """
    Initializes the MetricsPINN object by calculating metrics for train,
    validation (if available), and test sets using specified PINN loss functions.

    Args:
        td (TrainingData): An object containing the true and predicted values.
        pinn_losses (list): A list of loss functions to be used for evaluation.
    """

    self.train_kpis = self.evaluate(td.y_train_single, td.y_train_pred_single, label='Train',
                                    pinn_losses=pinn_losses)
    if td.y_val is not None:
        self.val_kpis = self.evaluate(td.y_val_single, td.y_val_pred_single, label='Val', pinn_losses=pinn_losses)
    else:
        self.val_kpis = None
    self.test_kpis = self.evaluate(td.y_test_single, td.y_test_pred_single, label='Test', pinn_losses=pinn_losses)
evaluate(y_true: np.ndarray, y_pred: np.ndarray, label: str = '', **kwargs) -> dict[str, float] staticmethod

Calculates metrics based on the provided list of PINN loss functions.

Parameters:

Name Type Description Default
y_true ndarray

The true target values.

required
y_pred ndarray

The predicted target values.

required
label str

A label for the dataset being evaluated (e.g., 'Train', 'Test'). Defaults to ''.

''

Returns:

Name Type Description
dict dict[str, float]

A dictionary where keys are loss function names appended with the label, and values are the calculated loss values.

Source code in physXAI/evaluation/metrics.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
@staticmethod
def evaluate(y_true: np.ndarray, y_pred: np.ndarray, label: str = '', **kwargs) -> dict[str, float]:
    """
    Calculates metrics based on the provided list of PINN loss functions.

    Args:
        y_true (np.ndarray): The true target values.
        y_pred (np.ndarray): The predicted target values.
        label (str, optional): A label for the dataset being evaluated (e.g., 'Train', 'Test').
                               Defaults to ''.

    Returns:
        dict: A dictionary where keys are loss function names appended with the label,
              and values are the calculated loss values.
    """

    kpis = dict()
    for loss in kwargs['pinn_losses']:
        val = float(loss(y_true, y_pred))
        kpis[loss.__name__ + ' ' + label] = val
        print(f"{loss.__name__ + ' ' + label}: {val:.2f}")

    return kpis

MetricsMultiStep

Bases: Metrics

A class to calculate and store regression metrics (MSE, RMSE, R2) for multi-step time series forecasting models. It evaluates overall performance and performance at each step.

Source code in physXAI/evaluation/metrics.py
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
class MetricsMultiStep(Metrics):
    """
    A class to calculate and store regression metrics (MSE, RMSE, R2) for multi-step
    time series forecasting models. It evaluates overall performance and performance at each step.
    """

    def __init__(self, td: TrainingDataMultiStep):
        """
        Initializes the MetricsMultiStep object. Calculates overall metrics for train,
        validation (if available), and test sets, as well as step-wise RMSE for each set.

        Args:
            td (TrainingDataMultiStep): An object containing the true and predicted values
                                        for multi-step forecasts.
        """

        self.train_kpis = self.evaluate(td.y_train.reshape(-1, 1), td.y_train_pred.reshape(-1, 1), label='Train')
        if td.y_val is not None:
            self.val_kpis = self.evaluate(td.y_val.reshape(-1, 1), td.y_val_pred.reshape(-1, 1), label='Val')
        else:
            self.val_kpis = None
        self.test_kpis = self.evaluate(td.y_test.reshape(-1, 1), td.y_test_pred.reshape(-1, 1), label='Test')

        # Stepwise RMSE
        rmse_train_l = list[float]()
        rmse_val_l = list[float]()
        rmse_test_l = list[float]()
        for i in range(td.y_train.shape[1]):
            _, rmse_train, _ = self.evaluate_step(td.y_train, td.y_train_pred, i)
            _, rmse_test, _ = self.evaluate_step(td.y_test, td.y_test_pred, i)
            rmse_train_l.append(rmse_train)
            rmse_test_l.append(rmse_test)
            if td.y_val is not None:
                _, rmse_val, _ = self.evaluate_step(td.y_val, td.y_val_pred, i)
                rmse_val_l.append(rmse_val)
        self.rmse_train_l = rmse_train_l
        if td.y_val is not None:
            self.rmse_val_l = rmse_val_l
        else:
            self.rmse_val_l = None
        self.rmse_test_l = rmse_test_l

    @staticmethod
    def evaluate_step(y_true: np.ndarray, y_pred: np.ndarray, step: int) -> (float, float, float):
        """
        Calculates Mean Squared Error (MSE), Root Mean Squared Error (RMSE),
        and R-squared (R2) score for a specific step in multi-step predictions.

        Args:
            y_true (np.ndarray): The true target values (samples, steps, features).
            y_pred (np.ndarray): The predicted target values (samples, steps, features).
            step (int): The specific forecast step to evaluate (0-indexed).

        Returns:
            tuple[float, float, float]: A tuple containing MSE, RMSE, and R2 score for the specified step.
        """

        mse = mean_squared_error(y_true[:, step, :].reshape(-1, 1), y_pred[:, step, :].reshape(-1, 1))
        rmse = math.sqrt(mse)
        r2 = r2_score(y_true[:, step, :].reshape(-1, 1), y_pred[:, step, :].reshape(-1, 1))

        return mse, rmse, r2

    def get_config(self) -> dict:
        c = super().get_config()
        c.update({
            'rmse_train_l': self.rmse_train_l,
            'rmse_val_l': self.rmse_val_l,
            'rmse_test_l': self.rmse_test_l,
        })
        return c
Attributes
train_kpis = self.evaluate(td.y_train.reshape(-1, 1), td.y_train_pred.reshape(-1, 1), label='Train') instance-attribute
val_kpis = self.evaluate(td.y_val.reshape(-1, 1), td.y_val_pred.reshape(-1, 1), label='Val') instance-attribute
test_kpis = self.evaluate(td.y_test.reshape(-1, 1), td.y_test_pred.reshape(-1, 1), label='Test') instance-attribute
rmse_train_l = rmse_train_l instance-attribute
rmse_val_l = rmse_val_l instance-attribute
rmse_test_l = rmse_test_l instance-attribute
Functions
__init__(td: TrainingDataMultiStep)

Initializes the MetricsMultiStep object. Calculates overall metrics for train, validation (if available), and test sets, as well as step-wise RMSE for each set.

Parameters:

Name Type Description Default
td TrainingDataMultiStep

An object containing the true and predicted values for multi-step forecasts.

required
Source code in physXAI/evaluation/metrics.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
def __init__(self, td: TrainingDataMultiStep):
    """
    Initializes the MetricsMultiStep object. Calculates overall metrics for train,
    validation (if available), and test sets, as well as step-wise RMSE for each set.

    Args:
        td (TrainingDataMultiStep): An object containing the true and predicted values
                                    for multi-step forecasts.
    """

    self.train_kpis = self.evaluate(td.y_train.reshape(-1, 1), td.y_train_pred.reshape(-1, 1), label='Train')
    if td.y_val is not None:
        self.val_kpis = self.evaluate(td.y_val.reshape(-1, 1), td.y_val_pred.reshape(-1, 1), label='Val')
    else:
        self.val_kpis = None
    self.test_kpis = self.evaluate(td.y_test.reshape(-1, 1), td.y_test_pred.reshape(-1, 1), label='Test')

    # Stepwise RMSE
    rmse_train_l = list[float]()
    rmse_val_l = list[float]()
    rmse_test_l = list[float]()
    for i in range(td.y_train.shape[1]):
        _, rmse_train, _ = self.evaluate_step(td.y_train, td.y_train_pred, i)
        _, rmse_test, _ = self.evaluate_step(td.y_test, td.y_test_pred, i)
        rmse_train_l.append(rmse_train)
        rmse_test_l.append(rmse_test)
        if td.y_val is not None:
            _, rmse_val, _ = self.evaluate_step(td.y_val, td.y_val_pred, i)
            rmse_val_l.append(rmse_val)
    self.rmse_train_l = rmse_train_l
    if td.y_val is not None:
        self.rmse_val_l = rmse_val_l
    else:
        self.rmse_val_l = None
    self.rmse_test_l = rmse_test_l
evaluate_step(y_true: np.ndarray, y_pred: np.ndarray, step: int) -> (float, float, float) staticmethod

Calculates Mean Squared Error (MSE), Root Mean Squared Error (RMSE), and R-squared (R2) score for a specific step in multi-step predictions.

Parameters:

Name Type Description Default
y_true ndarray

The true target values (samples, steps, features).

required
y_pred ndarray

The predicted target values (samples, steps, features).

required
step int

The specific forecast step to evaluate (0-indexed).

required

Returns:

Type Description
(float, float, float)

tuple[float, float, float]: A tuple containing MSE, RMSE, and R2 score for the specified step.

Source code in physXAI/evaluation/metrics.py
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
@staticmethod
def evaluate_step(y_true: np.ndarray, y_pred: np.ndarray, step: int) -> (float, float, float):
    """
    Calculates Mean Squared Error (MSE), Root Mean Squared Error (RMSE),
    and R-squared (R2) score for a specific step in multi-step predictions.

    Args:
        y_true (np.ndarray): The true target values (samples, steps, features).
        y_pred (np.ndarray): The predicted target values (samples, steps, features).
        step (int): The specific forecast step to evaluate (0-indexed).

    Returns:
        tuple[float, float, float]: A tuple containing MSE, RMSE, and R2 score for the specified step.
    """

    mse = mean_squared_error(y_true[:, step, :].reshape(-1, 1), y_pred[:, step, :].reshape(-1, 1))
    rmse = math.sqrt(mse)
    r2 = r2_score(y_true[:, step, :].reshape(-1, 1), y_pred[:, step, :].reshape(-1, 1))

    return mse, rmse, r2
get_config() -> dict
Source code in physXAI/evaluation/metrics.py
201
202
203
204
205
206
207
208
def get_config(self) -> dict:
    c = super().get_config()
    c.update({
        'rmse_train_l': self.rmse_train_l,
        'rmse_val_l': self.rmse_val_l,
        'rmse_test_l': self.rmse_test_l,
    })
    return c