Skip to content

Feature Selection

physXAI.feature_selection.recursive_feature_elimination

Classes

Functions

search_best_features(runs: dict, multi_step: bool, use_multi_step_error: bool)

Source code in physXAI/feature_selection/recursive_feature_elimination.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def search_best_features(runs: dict, multi_step: bool, use_multi_step_error: bool):
    sorted_kpis = dict()
    min_value = np.inf
    min_index = None
    for k, v in runs.items():
        values = list()
        for f in v:
            if multi_step and not use_multi_step_error:
                values.append(f['kpi_single_step'])
            else:
                values.append(f['kpi'])
        index = values.index(min(values))
        sorted_kpis[k] = {
            'inputs': v[index]['inputs'],
            'kpi': values[index],
        }
        if values[index] < min_value:
            min_value = values[index]
            min_index = k

    try:
        max_features = int(input("Enter number of features. Otherwise features are selected based on RMSE."))
    except ValueError:
        max_features = np.inf

    print('Selected features:')
    if max_features == np.inf:
        inputs = sorted_kpis[min_index]['inputs']
    else:
        inputs = sorted_kpis[max_features]['inputs']
    print(inputs)
    return inputs

recursive_feature_elimination(file_path: str, preprocessing: PreprocessingData, model: SingleStepModel, ascending_lag_order: bool = True, use_multi_step_error: bool = True, save_models: bool = False, fixed_inputs: list[str] = None)

Source code in physXAI/feature_selection/recursive_feature_elimination.py
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
def recursive_feature_elimination(file_path: str, preprocessing: PreprocessingData,
                                  model: SingleStepModel, ascending_lag_order: bool = True,
                                  use_multi_step_error: bool = True, save_models: bool = False,
                                  fixed_inputs: list[str] = None):
    assert preprocessing.val_size > 0, 'Value Error: For Feature Selection, preprocessing.val_size must be > 0.'

    if fixed_inputs is None:
        fixed_inputs = list()

    print('Feature Selection')
    Metrics.print_evaluate = False

    if Logger._logger is None:
        Logger.setup_logger()

    org_inputs = preprocessing.inputs
    inputs = preprocessing.inputs
    input_length = len(inputs)

    runs = dict()

    # Train original model
    td = preprocessing.pipeline(file_path)
    path = f'model_{input_length}'
    p = os.path.join(Logger._logger, path)
    model.pipeline(td, save_path=p, plot=False, save_model=save_models)
    val_kpi = td.metrics.val_kpis['RMSE Val']

    # Evaluate model
    if isinstance(preprocessing, PreprocessingSingleStep):
        runs[input_length] = [{'inputs': inputs, 'kpi': val_kpi}]
    elif isinstance(td, TrainingDataMultiStep):
        val_kpi_single = td.single_step_metrics.val_kpis['RMSE Val']
        runs[input_length] = [{'inputs': inputs, 'kpi': val_kpi, 'kpi_single_step': val_kpi_single}]
    else:
        raise NotImplementedError

    # Recursive feature elimination
    for j in range(input_length - 1, 0, -1):
        print(f'Features {j + 1}')
        print(inputs)

        # Reduced input features
        new_inputs = list()
        for i, v in enumerate(inputs):
            if isinstance(preprocessing, PreprocessingMultiStep) and preprocessing.init_features[0] == v:
                continue
            if ascending_lag_order:
                if '_lag' not in v:
                    if v + '_lag1' in inputs:
                        continue
                else:
                    match = int(re.search(r"_lag(\d+)", v).group(1))
                    if v.replace(f'_lag{match}', f'_lag{match + 1}') in inputs:  # pragma: no cover
                        continue  # pragma: no cover
            if v in fixed_inputs:
                continue
            new_inputs.append([item for item in inputs if item != v])

        if len(new_inputs) == 0:
            break

        # Evaluate Kpis for new inputs
        kpis = dict()
        kpis_add = dict()

        for i, v in enumerate(new_inputs):

            preprocessing.inputs = v
            td = preprocessing.pipeline(file_path)
            path = f'model_{j}_{i}'
            p = os.path.join(Logger._logger, path)
            model.pipeline(td, save_path=p, plot=False, save_model=save_models)

            val_kpi = td.metrics.val_kpis['RMSE Val']
            kpis[i] = val_kpi
            if isinstance(preprocessing, PreprocessingMultiStep):
                val_kpi = td.single_step_metrics.val_kpis['RMSE Val']
                kpis_add[i] = val_kpi

        if isinstance(preprocessing, PreprocessingSingleStep):
            run = [{'inputs': new_inputs[i], 'kpi': kpis[i]} for i in range(len(new_inputs))]
        else:
            run = [{'inputs': new_inputs[i], 'kpi': kpis[i], 'kpi_single_step': kpis_add[i]}
                   for i in range(len(new_inputs))]

        # Choose best models
        if isinstance(preprocessing, PreprocessingMultiStep) and not use_multi_step_error:
            key_filter = int(min(kpis_add, key=kpis_add.get))
        else:
            key_filter = int(min(kpis, key=kpis.get))
        inputs = new_inputs[key_filter]
        runs[j] = run
    print(f'Features {1}')
    print(inputs)

    preprocessing.inputs = org_inputs

    return runs

recursive_feature_elimination_pipeline(file_path: str, preprocessing: PreprocessingData, model: SingleStepModel, ascending_lag_order: bool = True, use_multi_step_error: bool = True, save_models: bool = False, fixed_inputs: list[str] = None)

Source code in physXAI/feature_selection/recursive_feature_elimination.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
def recursive_feature_elimination_pipeline(file_path: str,
                                           preprocessing: PreprocessingData,
                                           model: SingleStepModel, ascending_lag_order: bool = True,
                                           use_multi_step_error: bool = True, save_models: bool = False,
                                           fixed_inputs: list[str] = None):

    runs = recursive_feature_elimination(file_path, preprocessing, model, ascending_lag_order, use_multi_step_error,
                                         save_models, fixed_inputs)

    plot_recFeatureSelection(runs, isinstance(preprocessing, PreprocessingMultiStep), use_multi_step_error)

    inputs = search_best_features(runs, isinstance(preprocessing, PreprocessingMultiStep), use_multi_step_error)

    return runs, inputs