Coverage for addmo/s3_model_tuning/scoring/validator.py: 87%

15 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2025-08-31 13:05 +0000

1import numpy as np 

2from sklearn.model_selection import cross_validate 

3 

4from addmo.s3_model_tuning.scoring.abstract_scorer import ValidationScoring 

5from addmo.s3_model_tuning.models.abstract_model import AbstractMLModel 

6 

7from addmo.util.experiment_logger import WandbLogger 

8 

9class NoValidation(ValidationScoring): 

10 def __init__(self, *args, **kwargs): 

11 pass 

12 

13 def score_validation(self, model: AbstractMLModel, x, y): 

14 """Returns 0. Avoids fitting and validation, e.g. in the case of NoTuning.""" 

15 return 0 

16 

17class CrossValidation(ValidationScoring): 

18 def score_validation(self, model: AbstractMLModel, x, y): 

19 """Returns a positive float value. The higher the better. 

20 x and y include train and evaluation period. 

21 CV is shuffle=False by default, so the splits will be same across calls.""" 

22 

23 

24 cv_info = cross_validate( 

25 model.to_scikit_learn(x), 

26 x.values.astype(np.float32), 

27 y.values.astype(np.float32), 

28 scoring=self.metric, 

29 cv=self.splitter, 

30 return_indices=True 

31 ) 

32 

33 # log the dataset splits for specific splitters which are important to check 

34 # if self.splitter.__class__.__name__ == "UnivariateSplitter": 

35 # splitter_indices: dict = cv_info["indices"] 

36 # # convert indices to datetime indices 

37 # splitter_indices["train"] = [ 

38 # x.iloc[splitter_indices["train"][i]].index 

39 # for i in range(len(splitter_indices["train"])) 

40 # ] 

41 # splitter_indices["test"] = [ 

42 # x.iloc[splitter_indices["test"][i]].index 

43 # for i in range(len(splitter_indices["test"])) 

44 # ] 

45 # # WandbLogger(splitter_indices) 

46 

47 scores = cv_info["test_score"] 

48 return scores.mean() 

49