Coverage for addmo/s1_data_tuning_auto/feature_construction.py: 57%

30 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2025-08-31 13:05 +0000

1import pandas as pd 

2 

3from addmo.s2_data_tuning import feature_constructor 

4from addmo.util.data_handling import split_target_features 

5from addmo.s3_model_tuning.scoring.validator_factory import ValidatorFactory 

6from addmo.s3_model_tuning.models.model_factory import ModelFactory 

7from addmo.s3_model_tuning.scoring.abstract_scorer import ValidationScoring 

8from addmo.s3_model_tuning.models.abstract_model import AbstractMLModel 

9from addmo.s1_data_tuning_auto.config.data_tuning_auto_config import DataTuningAutoSetup 

10from addmo.s3_model_tuning.model_tuner import ModelTuner 

11 

12 

13def create_difference(config: DataTuningAutoSetup, xy): 

14 """ 

15 Creates difference-based features for non-target variables. 

16 """ 

17 x_created = pd.DataFrame() 

18 

19 for var_name in xy.columns: 

20 if var_name != config.name_of_target: 

21 series = feature_constructor.create_diff(xy[var_name]) 

22 x_created[series.name] = series 

23 

24 return x_created 

25 

26 

27def manual_target_lags(config: DataTuningAutoSetup, xy): 

28 """ 

29 Creates manually specified lag features for the target variable. 

30 """ 

31 # target_lags in format [first lag (int), second lag (int)] 

32 x_created = pd.DataFrame() 

33 

34 for lag in config.target_lag: 

35 series = feature_constructor.create_lag(xy[config.name_of_target], lag) 

36 x_created[series.name] = series 

37 

38 return x_created 

39 

40 

41# def automatic_timeseries_target_lag_constructor(config: DataTuningAutoSetup, xy): 

42# """ 

43# Automatically generates target lags based on model performance improvement. 

44# """ 

45# x_created = pd.DataFrame() 

46# 

47# tuner = ModelTuner(config._config_model_tuning) 

48# 

49# # prepare system_data 

50# x, y = split_target_features(config.name_of_target, xy) 

51# 

52# model = tuner.tune_model(config._config_model_tuning.models[0], x, y) 

53# 

54# old_score = tuner.scorer.score_validation(model, x, y) 

55# 

56# # loop through to create lags as long as they improve the result 

57# for i in range(config.minimum_target_lag, len(x)): 

58# series = feature_constructor.create_lag(y, i) 

59# x_processed = pd.concat([x, series], axis=1, join="inner").bfill() 

60# 

61# new_model = tuner.tune_model(config._config_model_tuning.models[0], x_processed, y) 

62# new_score = tuner.scorer.score_validation(new_model, x_processed, y) 

63# 

64# if new_score <= old_score + config.min_increase_4_wrapper: 

65# break 

66# else: 

67# x_created[series.name] = series 

68# old_score = new_score 

69# 

70# return x_created 

71 

72 

73def manual_feature_lags(config: DataTuningAutoSetup, xy): 

74 """ 

75 Creates manually specified lag features for selected variables. 

76 """ 

77 # feature_lags in format {var_name: [first lag (int), second lag (int)]} 

78 

79 x_created = pd.DataFrame() 

80 

81 for var_name, lags in config.feature_lags.items(): 

82 if var_name != config.name_of_target: 

83 for lag in lags: 

84 series = feature_constructor.create_lag(xy[var_name], lag) 

85 x_created[series.name] = series 

86 

87 return x_created 

88 

89 

90# def automatic_feature_lag_constructor(config: DataTuningAutoSetup, xy): 

91# """ 

92# Automatically generates feature lags based on model performance improvement. 

93# """ 

94# x_created = pd.DataFrame() 

95# 

96# tuner = ModelTuner(config._config_model_tuning) 

97# 

98# # prepare system_data 

99# x, y = split_target_features(config.name_of_target, xy) 

100# 

101# model = tuner.tune_model(config._config_model_tuning.models[0], x, y) 

102# 

103# old_score = tuner.scorer.score_validation(model, x, y) 

104# 

105# # Loop through to create feature lags as long as they improve the result 

106# for column in x: 

107# temp_score = old_score 

108# for i in range(config.minimum_feature_lag, config.maximum_feature_lag + 1): 

109# series = feature_constructor.create_lag(x[column], i) 

110# series = series[0:] 

111# x_processed = pd.concat([x, series], axis=1, join="inner") 

112# 

113# new_model = tuner.tune_model(config._config_model_tuning.models[0], x_processed, y) 

114# new_score = tuner.scorer.score_validation(new_model, x_processed, y) 

115# 

116# # choose the best lag for that feature 

117# if new_score > temp_score: 

118# temp_score = new_score 

119# x_best_lag = series 

120# 

121# # add best lag to feature space if good enough 

122# if temp_score >= old_score + config.min_increase_4_wrapper: 

123# x_created[x_best_lag.name] = x_best_lag 

124# 

125# return x_created