Coverage for addmo/s3_model_tuning/scoring/validator.py: 87%
15 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-08-31 13:05 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-08-31 13:05 +0000
1import numpy as np
2from sklearn.model_selection import cross_validate
4from addmo.s3_model_tuning.scoring.abstract_scorer import ValidationScoring
5from addmo.s3_model_tuning.models.abstract_model import AbstractMLModel
7from addmo.util.experiment_logger import WandbLogger
9class NoValidation(ValidationScoring):
10 def __init__(self, *args, **kwargs):
11 pass
13 def score_validation(self, model: AbstractMLModel, x, y):
14 """Returns 0. Avoids fitting and validation, e.g. in the case of NoTuning."""
15 return 0
17class CrossValidation(ValidationScoring):
18 def score_validation(self, model: AbstractMLModel, x, y):
19 """Returns a positive float value. The higher the better.
20 x and y include train and evaluation period.
21 CV is shuffle=False by default, so the splits will be same across calls."""
24 cv_info = cross_validate(
25 model.to_scikit_learn(x),
26 x.values.astype(np.float32),
27 y.values.astype(np.float32),
28 scoring=self.metric,
29 cv=self.splitter,
30 return_indices=True
31 )
33 # log the dataset splits for specific splitters which are important to check
34 # if self.splitter.__class__.__name__ == "UnivariateSplitter":
35 # splitter_indices: dict = cv_info["indices"]
36 # # convert indices to datetime indices
37 # splitter_indices["train"] = [
38 # x.iloc[splitter_indices["train"][i]].index
39 # for i in range(len(splitter_indices["train"]))
40 # ]
41 # splitter_indices["test"] = [
42 # x.iloc[splitter_indices["test"][i]].index
43 # for i in range(len(splitter_indices["test"]))
44 # ]
45 # # WandbLogger(splitter_indices)
47 scores = cv_info["test_score"]
48 return scores.mean()