Coverage for addmo/s1_data_tuning_auto/feature_construction.py: 57%
30 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-08-31 13:05 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-08-31 13:05 +0000
1import pandas as pd
3from addmo.s2_data_tuning import feature_constructor
4from addmo.util.data_handling import split_target_features
5from addmo.s3_model_tuning.scoring.validator_factory import ValidatorFactory
6from addmo.s3_model_tuning.models.model_factory import ModelFactory
7from addmo.s3_model_tuning.scoring.abstract_scorer import ValidationScoring
8from addmo.s3_model_tuning.models.abstract_model import AbstractMLModel
9from addmo.s1_data_tuning_auto.config.data_tuning_auto_config import DataTuningAutoSetup
10from addmo.s3_model_tuning.model_tuner import ModelTuner
13def create_difference(config: DataTuningAutoSetup, xy):
14 """
15 Creates difference-based features for non-target variables.
16 """
17 x_created = pd.DataFrame()
19 for var_name in xy.columns:
20 if var_name != config.name_of_target:
21 series = feature_constructor.create_diff(xy[var_name])
22 x_created[series.name] = series
24 return x_created
27def manual_target_lags(config: DataTuningAutoSetup, xy):
28 """
29 Creates manually specified lag features for the target variable.
30 """
31 # target_lags in format [first lag (int), second lag (int)]
32 x_created = pd.DataFrame()
34 for lag in config.target_lag:
35 series = feature_constructor.create_lag(xy[config.name_of_target], lag)
36 x_created[series.name] = series
38 return x_created
41# def automatic_timeseries_target_lag_constructor(config: DataTuningAutoSetup, xy):
42# """
43# Automatically generates target lags based on model performance improvement.
44# """
45# x_created = pd.DataFrame()
46#
47# tuner = ModelTuner(config._config_model_tuning)
48#
49# # prepare system_data
50# x, y = split_target_features(config.name_of_target, xy)
51#
52# model = tuner.tune_model(config._config_model_tuning.models[0], x, y)
53#
54# old_score = tuner.scorer.score_validation(model, x, y)
55#
56# # loop through to create lags as long as they improve the result
57# for i in range(config.minimum_target_lag, len(x)):
58# series = feature_constructor.create_lag(y, i)
59# x_processed = pd.concat([x, series], axis=1, join="inner").bfill()
60#
61# new_model = tuner.tune_model(config._config_model_tuning.models[0], x_processed, y)
62# new_score = tuner.scorer.score_validation(new_model, x_processed, y)
63#
64# if new_score <= old_score + config.min_increase_4_wrapper:
65# break
66# else:
67# x_created[series.name] = series
68# old_score = new_score
69#
70# return x_created
73def manual_feature_lags(config: DataTuningAutoSetup, xy):
74 """
75 Creates manually specified lag features for selected variables.
76 """
77 # feature_lags in format {var_name: [first lag (int), second lag (int)]}
79 x_created = pd.DataFrame()
81 for var_name, lags in config.feature_lags.items():
82 if var_name != config.name_of_target:
83 for lag in lags:
84 series = feature_constructor.create_lag(xy[var_name], lag)
85 x_created[series.name] = series
87 return x_created
90# def automatic_feature_lag_constructor(config: DataTuningAutoSetup, xy):
91# """
92# Automatically generates feature lags based on model performance improvement.
93# """
94# x_created = pd.DataFrame()
95#
96# tuner = ModelTuner(config._config_model_tuning)
97#
98# # prepare system_data
99# x, y = split_target_features(config.name_of_target, xy)
100#
101# model = tuner.tune_model(config._config_model_tuning.models[0], x, y)
102#
103# old_score = tuner.scorer.score_validation(model, x, y)
104#
105# # Loop through to create feature lags as long as they improve the result
106# for column in x:
107# temp_score = old_score
108# for i in range(config.minimum_feature_lag, config.maximum_feature_lag + 1):
109# series = feature_constructor.create_lag(x[column], i)
110# series = series[0:]
111# x_processed = pd.concat([x, series], axis=1, join="inner")
112#
113# new_model = tuner.tune_model(config._config_model_tuning.models[0], x_processed, y)
114# new_score = tuner.scorer.score_validation(new_model, x_processed, y)
115#
116# # choose the best lag for that feature
117# if new_score > temp_score:
118# temp_score = new_score
119# x_best_lag = series
120#
121# # add best lag to feature space if good enough
122# if temp_score >= old_score + config.min_increase_4_wrapper:
123# x_created[x_best_lag.name] = x_best_lag
124#
125# return x_created