Coverage for addmo/s3_model_tuning/models/scikit_learn_models.py: 70%
131 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-08-31 13:05 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-08-31 13:05 +0000
1import numpy as np
2import joblib
3import sklearn
4from abc import ABC
5from sklearn.neural_network import MLPRegressor
6from sklearn.svm import SVR
7from sklearn.pipeline import Pipeline
8from sklearn.preprocessing import StandardScaler, MaxAbsScaler
9from sklearn.compose import TransformedTargetRegressor
10from skl2onnx import to_onnx
11from addmo.s3_model_tuning.models.abstract_model import AbstractMLModel
12from addmo.s3_model_tuning.models.abstract_model import ModelMetadata
13from sklearn.linear_model import LinearRegression
14from addmo.util.load_save_utils import create_path_or_ask_to_override
17class BaseScikitLearnModel(AbstractMLModel, ABC):
18 """
19 Base class for scikit-learn models.
20 This class extends the AbstractMLModel, providing concrete implementations of
21 common functionalities specific to scikit-learn models.
23 Attributes:
24 model (Pipeline): A scikit-learn Pipeline object containing the scaler and the provided model.
25 """
27 def __init__(self, regressor):
28 """
29 Create an instance of the scikit-learn model including a scaler
30 """
31 self.regressor = Pipeline(
32 steps=[
33 ("scaler", StandardScaler()), # scale the features
34 ("model", regressor) # scaling the target variable through TransformedTargetRegressor
35 # is not compatible with ONNX
36 ]
37 )
39 def fit(self, x, y):
40 """
41 Train the model.
42 """
43 self.x_fit = x
44 self.y_fit = y
45 self.regressor.fit(x.values.astype(np.float32), y.values.astype(np.float32))
47 def predict(self, x):
48 """
49 Make predictions.
50 """
51 return self.regressor.predict(x.values.astype(np.float32))
53 def _define_metadata(self):
54 """
55 Define metadata.
56 """
57 self.metadata = ModelMetadata(
58 addmo_class=type(self).__name__,
59 addmo_commit_id=ModelMetadata.get_commit_id(),
60 library=sklearn.__name__,
61 library_model_type=type(self.regressor.named_steps['model']).__name__,
62 library_version=sklearn.__version__,
63 target_name=self.y_fit.name,
64 features_ordered=list(self.x_fit.columns),
65 preprocessing=['StandardScaler for all features'])
67 def save_regressor(self, directory, regressor_filename, file_type='joblib'):
68 """"
69 Save regressor as .joblib or .onnx including scaler to a file.
70 """
71 full_filename = f"{regressor_filename}.{file_type}"
72 path = create_path_or_ask_to_override(full_filename, directory)
74 if file_type == 'joblib':
75 joblib.dump(self.regressor, path)
77 elif file_type == 'onnx':
78 onnx_model = to_onnx(self.regressor, self.x_fit.values)
79 with open(path, "wb") as f:
80 f.write(onnx_model.SerializeToString())
81 else:
82 raise ValueError(f'The supported file types for saving the model are: .joblib and .onnx')
84 # Saving metadata
85 self._define_metadata()
86 self._save_metadata(directory, regressor_filename)
88 print(f"Model saved to {path}.")
89 return file_type
91 def load_regressor(self, regressor):
92 """""
93 Load trained model for serialisation.
94 """
95 self.regressor = regressor
97 def to_scikit_learn(self, x=None):
98 return self.regressor
100 def set_params(self, hyperparameters):
101 """
102 Access the hyperparameters of the model within the pipeline within the TransformedTargetRegressor
103 """
104 self.regressor.named_steps["model"].set_params(**hyperparameters)
106 def get_params(self, deep=True):
107 """
108 Get the hyperparameters of the model
109 """
110 return self.regressor.named_steps["model"].get_params(deep=deep)
112class ScikitMLP(BaseScikitLearnModel):
113 """Scikit-learn MLPRegressor model."""
115 def __init__(self):
116 super().__init__(MLPRegressor())
117 self.set_params(self.default_hyperparameter())
119 def optuna_hyperparameter_suggest(self, trial):
120 """
121 Suggest hyperparameters for optimization.
122 """
123 hyperparameters = {}
125 # Suggest hyperparameters
126 n_layers = trial.suggest_int("n_layers", 1, 2)
127 hidden_layer_sizes = tuple(
128 trial.suggest_int(f"n_units_l{i}", 1, 1000) for i in range(n_layers)
129 )
131 # Dynamic hidden layer sizes based on the number of layers
132 hyperparameters["hidden_layer_sizes"] = hidden_layer_sizes
134 return hyperparameters
136 def grid_search_hyperparameter(self):
137 """
138 Suggest hyperparameters for optimization.
139 """
140 hyperparameter_grid = {
141 "hidden_layer_sizes": [(50,), (100,), (50, 50), (100, 100)],
142 "activation": ["tanh", "relu"],
143 "solver": ["sgd", "adam"],
144 "alpha": [0.0001, 0.05],
145 "learning_rate": ["constant", "adaptive"],
146 }
147 return hyperparameter_grid
149 def default_hyperparameter(self):
150 """"
151 Return default hyperparameters.
152 """
153 hyperparameter = MLPRegressor().get_params()
154 hyperparameter["max_iter"] = 5000
155 hyperparameter["early_stopping"] = True
156 return hyperparameter
159class ScikitMLP_TargetTransformed(ScikitMLP):
160 def __init__(self):
161 """
162 Create an instance of the scikit-learn model including a scaler.
163 """
164 self.regressor = Pipeline(
165 steps=[
166 ("scaler", StandardScaler()), # scale the features
167 ("model", TransformedTargetRegressor(regressor=MLPRegressor()))
168 # scaling the target variable through TransformedTargetRegressor
169 # is not compatible with ONNX
170 ]
171 )
172 self.set_params(self.default_hyperparameter())
174 def set_params(self, hyperparameters):
175 """
176 Access the hyperparameters of the model within the pipeline within the TransformedTargetRegressor.
177 """
178 self.regressor.named_steps["model"].regressor.set_params(**hyperparameters)
180 def get_params(self, deep=True):
181 """
182 Get the hyperparameters of the model.
183 """
184 return self.regressor.named_steps["model"].regressor.get_params(deep=deep)
187class ScikitLinearReg(BaseScikitLearnModel):
188 """Linear Regression model"""
190 def __init__(self):
191 super().__init__(LinearRegression())
193 def grid_search_hyperparameter(self):
194 pass
196 def optuna_hyperparameter_suggest(self, trial):
197 pass
199 def default_hyperparameter(self):
200 """"
201 Return default hyperparameters.
202 """
203 return LinearRegression().get_params()
205class ScikitLinearRegNoScaler(ScikitLinearReg):
206 def __init__(self):
207 """
208 Create an instance of the scikit-learn model including a scaler
209 """
210 self.regressor = Pipeline(
211 steps=[
212 ("model", LinearRegression())
213 ]
214 )
216 def fit(self, x, y):
217 """
218 Train the model.
219 """
220 self.x_fit = x
221 self.y_fit = y
222 self.regressor.fit(x, y)
224 def predict(self, x):
225 """
226 Make predictions.
227 """
228 return self.regressor.predict(x)
230 def get_params(self, deep=True):
231 """
232 Get the hyperparameters of the model
233 """
234 # get model parameter
235 param = self.regressor.named_steps["model"].get_params(deep=deep)
237 # just info params
238 param['model_complexity'] = 1
239 param['hidden_layer_sizes'] = []
240 return param
242 def _define_metadata(self):
243 """
244 Define metadata.
245 """
246 self.metadata = ModelMetadata(
247 addmo_class=type(self).__name__,
248 addmo_commit_id=ModelMetadata.get_commit_id(),
249 library=sklearn.__name__,
250 library_model_type=type(self.regressor).__name__,
251 library_version=sklearn.__version__,
252 target_name=self.y_fit.name,
253 features_ordered=list(self.x_fit.columns),
254 preprocessing=['No scaling'])
256class ScikitSVR(BaseScikitLearnModel):
257 """Scikit-learn Support Vector Regressor (SVR) model."""
259 def __init__(self):
260 super().__init__(SVR())
262 def _define_metadata(self):
263 """
264 Define metadata.
265 """
266 self.metadata = ModelMetadata(
267 addmo_class=type(self).__name__,
268 addmo_commit_id=ModelMetadata.get_commit_id(),
269 library=sklearn.__name__,
270 library_model_type=type(self.regressor).__name__,
271 library_version=sklearn.__version__,
272 target_name=self.y_fit.name,
273 features_ordered=list(self.x_fit.columns),
274 preprocessing=['Scaling'])
276 def grid_search_hyperparameter(self):
277 pass
279 def optuna_hyperparameter_suggest(self, trial):
280 """
281 Suggest hyperparameters for optimization.
282 """
283 hyperparameters = {}
285 hyperparameters["C"] = trial.suggest_float("C", 1e-2, 1e1, log=True) #regularizer
286 hyperparameters["epsilon"] = trial.suggest_float("epsilon", 1e-3, 1.0, log=True) #distance of tube
287 hyperparameters["kernel"] = trial.suggest_categorical("kernel", ["linear", "poly", "rbf", "sigmoid"])
288 hyperparameters["tol"] = trial.suggest_float("tol", 1e-5, 1e-1, log=True)
290 # kernel-specific hyperparameters
291 if hyperparameters["kernel"] in ["poly", "rbf", "sigmoid"]:
292 hyperparameters["gamma"] = trial.suggest_categorical("gamma", ["scale", "auto"])
294 if hyperparameters["kernel"] == "poly":
295 hyperparameters["degree"] = trial.suggest_int("degree", 2, 5)
296 hyperparameters["coef0"] = trial.suggest_float("coef0", 0.0, 1.0)
298 if hyperparameters["kernel"] == "sigmoid":
299 hyperparameters["coef0"] = trial.suggest_float("coef0", 0.0, 1.0)
301 return hyperparameters
303 def default_hyperparameter(self):
304 """"
305 Return default hyperparameters.
306 """
307 hyperparameter = SVR().get_params()
308 hyperparameter["max_iter"] = 500
309 hyperparameter["tol"] = 1e-2
310 return hyperparameter
312 def set_params(self, hyperparameters):
313 """
314 Access the hyperparameters of the model within the pipeline within the SVR.
315 """
316 hyperparameters = {f"model__{key}": value for key, value in hyperparameters.items()}
318 self.regressor.set_params(**hyperparameters)
320 def get_params(self, deep=True):
321 """
322 Get the hyperparameters of the model.
323 """
324 param = self.regressor.get_params(deep=deep)
326 model_param = {key: value for key, value in param.items() if key.startswith("model__")}
327 return model_param