47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145 | def recursive_feature_elimination(file_path: str, preprocessing: PreprocessingData,
model: SingleStepModel, ascending_lag_order: bool = True,
use_multi_step_error: bool = True, save_models: bool = False,
fixed_inputs: list[str] = None):
assert preprocessing.val_size > 0, 'Value Error: For Feature Selection, preprocessing.val_size must be > 0.'
if fixed_inputs is None:
fixed_inputs = list()
print('Feature Selection')
Metrics.print_evaluate = False
if Logger._logger is None:
Logger.setup_logger()
org_inputs = preprocessing.inputs
inputs = preprocessing.inputs
input_length = len(inputs)
runs = dict()
# Train original model
td = preprocessing.pipeline(file_path)
path = f'model_{input_length}'
p = os.path.join(Logger._logger, path)
model.pipeline(td, save_path=p, plot=False, save_model=save_models)
val_kpi = td.metrics.val_kpis['RMSE Val']
# Evaluate model
if isinstance(preprocessing, PreprocessingSingleStep):
runs[input_length] = [{'inputs': inputs, 'kpi': val_kpi}]
elif isinstance(td, TrainingDataMultiStep):
val_kpi_single = td.single_step_metrics.val_kpis['RMSE Val']
runs[input_length] = [{'inputs': inputs, 'kpi': val_kpi, 'kpi_single_step': val_kpi_single}]
else:
raise NotImplementedError
# Recursive feature elimination
for j in range(input_length - 1, 0, -1):
print(f'Features {j + 1}')
print(inputs)
# Reduced input features
new_inputs = list()
for i, v in enumerate(inputs):
if isinstance(preprocessing, PreprocessingMultiStep) and preprocessing.init_features[0] == v:
continue
if ascending_lag_order:
if '_lag' not in v:
if v + '_lag1' in inputs:
continue
else:
match = int(re.search(r"_lag(\d+)", v).group(1))
if v.replace(f'_lag{match}', f'_lag{match + 1}') in inputs: # pragma: no cover
continue # pragma: no cover
if v in fixed_inputs:
continue
new_inputs.append([item for item in inputs if item != v])
if len(new_inputs) == 0:
break
# Evaluate Kpis for new inputs
kpis = dict()
kpis_add = dict()
for i, v in enumerate(new_inputs):
preprocessing.inputs = v
td = preprocessing.pipeline(file_path)
path = f'model_{j}_{i}'
p = os.path.join(Logger._logger, path)
model.pipeline(td, save_path=p, plot=False, save_model=save_models)
val_kpi = td.metrics.val_kpis['RMSE Val']
kpis[i] = val_kpi
if isinstance(preprocessing, PreprocessingMultiStep):
val_kpi = td.single_step_metrics.val_kpis['RMSE Val']
kpis_add[i] = val_kpi
if isinstance(preprocessing, PreprocessingSingleStep):
run = [{'inputs': new_inputs[i], 'kpi': kpis[i]} for i in range(len(new_inputs))]
else:
run = [{'inputs': new_inputs[i], 'kpi': kpis[i], 'kpi_single_step': kpis_add[i]}
for i in range(len(new_inputs))]
# Choose best models
if isinstance(preprocessing, PreprocessingMultiStep) and not use_multi_step_error:
key_filter = int(min(kpis_add, key=kpis_add.get))
else:
key_filter = int(min(kpis, key=kpis.get))
inputs = new_inputs[key_filter]
runs[j] = run
print(f'Features {1}')
print(inputs)
preprocessing.inputs = org_inputs
return runs
|