Coverage for addmo/s2_data_tuning/data_tuner_fixed.py: 86%
29 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-08-31 13:05 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-08-31 13:05 +0000
1import pandas as pd
3from addmo.s2_data_tuning.config.data_tuning_config import DataTuningFixedConfig
4from addmo.s2_data_tuning import feature_constructor as fc
5from addmo.util.load_save import load_data
6from addmo.util.experiment_logger import ExperimentLogger
9class DataTunerByConfig:
10 """Tunes the system_data in a fixed manner. Without randomness."""
11 def __init__(self, config: DataTuningFixedConfig):
12 self.config = config
15 def update_x_raw(self, x_sample: pd.DataFrame):
16 """
17 Update the x_processed DataFrame with new system_data.
18 E.g. for online environments or recursive predictions. #todo: recursive
20 The input DataFrame must have a DateTimeIndex in equal resolution.
21 It can contain features and/or target values. This method either overwrites
22 existing values or appends new system_data depending on the index match.
23 """
24 # Overwrite existing system_data or append new system_data
25 self.xy_raw = self.xy_raw.combine_first(x_sample)
27 # limit the maximum size length of the df to 100 lines
28 self.xy_raw = self.xy_raw.tail(100)
30 def update_y(self, y_sample: pd.DataFrame): #Todo: notwenig?
31 """recursive prediction"""
32 # Overwrite existing system_data or append new system_data
33 self.xy_raw = self.xy_raw.combine_first(y_sample)
36 def tune_fixed(self, xy_raw):
37 x_processed = pd.DataFrame(index=xy_raw.index)
38 for feature_name in self.config.features:
39 # extract feature name and modification type
40 if '___' in feature_name:
41 original_name, modification = feature_name.split('___')
42 var = xy_raw[original_name]
45 if modification.startswith('lag'):
46 lag = int(modification[3:])
47 series = fc.create_lag(var, lag)
48 else:
49 # get the other methods dynamically from module
50 method = getattr(fc, "create_" + modification)
51 series = method(var)
52 x_processed[series.name] = series
54 # keep desired raw features
55 elif feature_name in xy_raw.columns:
56 x_processed[feature_name] = xy_raw[feature_name]
58 else:
59 print(f"Feature <{feature_name}> not present in loaded system_data.")
61 return x_processed