Coverage for addmo/s3_model_tuning/models/scikit_learn_models.py: 70%

131 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2025-08-31 13:05 +0000

1import numpy as np 

2import joblib 

3import sklearn 

4from abc import ABC 

5from sklearn.neural_network import MLPRegressor 

6from sklearn.svm import SVR 

7from sklearn.pipeline import Pipeline 

8from sklearn.preprocessing import StandardScaler, MaxAbsScaler 

9from sklearn.compose import TransformedTargetRegressor 

10from skl2onnx import to_onnx 

11from addmo.s3_model_tuning.models.abstract_model import AbstractMLModel 

12from addmo.s3_model_tuning.models.abstract_model import ModelMetadata 

13from sklearn.linear_model import LinearRegression 

14from addmo.util.load_save_utils import create_path_or_ask_to_override 

15 

16 

17class BaseScikitLearnModel(AbstractMLModel, ABC): 

18 """ 

19 Base class for scikit-learn models. 

20 This class extends the AbstractMLModel, providing concrete implementations of 

21 common functionalities specific to scikit-learn models. 

22 

23 Attributes: 

24 model (Pipeline): A scikit-learn Pipeline object containing the scaler and the provided model. 

25 """ 

26 

27 def __init__(self, regressor): 

28 """ 

29 Create an instance of the scikit-learn model including a scaler 

30 """ 

31 self.regressor = Pipeline( 

32 steps=[ 

33 ("scaler", StandardScaler()), # scale the features 

34 ("model", regressor) # scaling the target variable through TransformedTargetRegressor 

35 # is not compatible with ONNX 

36 ] 

37 ) 

38 

39 def fit(self, x, y): 

40 """ 

41 Train the model. 

42 """ 

43 self.x_fit = x 

44 self.y_fit = y 

45 self.regressor.fit(x.values.astype(np.float32), y.values.astype(np.float32)) 

46 

47 def predict(self, x): 

48 """ 

49 Make predictions. 

50 """ 

51 return self.regressor.predict(x.values.astype(np.float32)) 

52 

53 def _define_metadata(self): 

54 """ 

55 Define metadata. 

56 """ 

57 self.metadata = ModelMetadata( 

58 addmo_class=type(self).__name__, 

59 addmo_commit_id=ModelMetadata.get_commit_id(), 

60 library=sklearn.__name__, 

61 library_model_type=type(self.regressor.named_steps['model']).__name__, 

62 library_version=sklearn.__version__, 

63 target_name=self.y_fit.name, 

64 features_ordered=list(self.x_fit.columns), 

65 preprocessing=['StandardScaler for all features']) 

66 

67 def save_regressor(self, directory, regressor_filename, file_type='joblib'): 

68 """" 

69 Save regressor as .joblib or .onnx including scaler to a file. 

70 """ 

71 full_filename = f"{regressor_filename}.{file_type}" 

72 path = create_path_or_ask_to_override(full_filename, directory) 

73 

74 if file_type == 'joblib': 

75 joblib.dump(self.regressor, path) 

76 

77 elif file_type == 'onnx': 

78 onnx_model = to_onnx(self.regressor, self.x_fit.values) 

79 with open(path, "wb") as f: 

80 f.write(onnx_model.SerializeToString()) 

81 else: 

82 raise ValueError(f'The supported file types for saving the model are: .joblib and .onnx') 

83 

84 # Saving metadata 

85 self._define_metadata() 

86 self._save_metadata(directory, regressor_filename) 

87 

88 print(f"Model saved to {path}.") 

89 return file_type 

90 

91 def load_regressor(self, regressor): 

92 """"" 

93 Load trained model for serialisation. 

94 """ 

95 self.regressor = regressor 

96 

97 def to_scikit_learn(self, x=None): 

98 return self.regressor 

99 

100 def set_params(self, hyperparameters): 

101 """ 

102 Access the hyperparameters of the model within the pipeline within the TransformedTargetRegressor 

103 """ 

104 self.regressor.named_steps["model"].set_params(**hyperparameters) 

105 

106 def get_params(self, deep=True): 

107 """ 

108 Get the hyperparameters of the model 

109 """ 

110 return self.regressor.named_steps["model"].get_params(deep=deep) 

111 

112class ScikitMLP(BaseScikitLearnModel): 

113 """Scikit-learn MLPRegressor model.""" 

114 

115 def __init__(self): 

116 super().__init__(MLPRegressor()) 

117 self.set_params(self.default_hyperparameter()) 

118 

119 def optuna_hyperparameter_suggest(self, trial): 

120 """ 

121 Suggest hyperparameters for optimization. 

122 """ 

123 hyperparameters = {} 

124 

125 # Suggest hyperparameters 

126 n_layers = trial.suggest_int("n_layers", 1, 2) 

127 hidden_layer_sizes = tuple( 

128 trial.suggest_int(f"n_units_l{i}", 1, 1000) for i in range(n_layers) 

129 ) 

130 

131 # Dynamic hidden layer sizes based on the number of layers 

132 hyperparameters["hidden_layer_sizes"] = hidden_layer_sizes 

133 

134 return hyperparameters 

135 

136 def grid_search_hyperparameter(self): 

137 """ 

138 Suggest hyperparameters for optimization. 

139 """ 

140 hyperparameter_grid = { 

141 "hidden_layer_sizes": [(50,), (100,), (50, 50), (100, 100)], 

142 "activation": ["tanh", "relu"], 

143 "solver": ["sgd", "adam"], 

144 "alpha": [0.0001, 0.05], 

145 "learning_rate": ["constant", "adaptive"], 

146 } 

147 return hyperparameter_grid 

148 

149 def default_hyperparameter(self): 

150 """" 

151 Return default hyperparameters. 

152 """ 

153 hyperparameter = MLPRegressor().get_params() 

154 hyperparameter["max_iter"] = 5000 

155 hyperparameter["early_stopping"] = True 

156 return hyperparameter 

157 

158 

159class ScikitMLP_TargetTransformed(ScikitMLP): 

160 def __init__(self): 

161 """ 

162 Create an instance of the scikit-learn model including a scaler. 

163 """ 

164 self.regressor = Pipeline( 

165 steps=[ 

166 ("scaler", StandardScaler()), # scale the features 

167 ("model", TransformedTargetRegressor(regressor=MLPRegressor())) 

168 # scaling the target variable through TransformedTargetRegressor 

169 # is not compatible with ONNX 

170 ] 

171 ) 

172 self.set_params(self.default_hyperparameter()) 

173 

174 def set_params(self, hyperparameters): 

175 """ 

176 Access the hyperparameters of the model within the pipeline within the TransformedTargetRegressor. 

177 """ 

178 self.regressor.named_steps["model"].regressor.set_params(**hyperparameters) 

179 

180 def get_params(self, deep=True): 

181 """ 

182 Get the hyperparameters of the model. 

183 """ 

184 return self.regressor.named_steps["model"].regressor.get_params(deep=deep) 

185 

186 

187class ScikitLinearReg(BaseScikitLearnModel): 

188 """Linear Regression model""" 

189 

190 def __init__(self): 

191 super().__init__(LinearRegression()) 

192 

193 def grid_search_hyperparameter(self): 

194 pass 

195 

196 def optuna_hyperparameter_suggest(self, trial): 

197 pass 

198 

199 def default_hyperparameter(self): 

200 """" 

201 Return default hyperparameters. 

202 """ 

203 return LinearRegression().get_params() 

204 

205class ScikitLinearRegNoScaler(ScikitLinearReg): 

206 def __init__(self): 

207 """ 

208 Create an instance of the scikit-learn model including a scaler 

209 """ 

210 self.regressor = Pipeline( 

211 steps=[ 

212 ("model", LinearRegression()) 

213 ] 

214 ) 

215 

216 def fit(self, x, y): 

217 """ 

218 Train the model. 

219 """ 

220 self.x_fit = x 

221 self.y_fit = y 

222 self.regressor.fit(x, y) 

223 

224 def predict(self, x): 

225 """ 

226 Make predictions. 

227 """ 

228 return self.regressor.predict(x) 

229 

230 def get_params(self, deep=True): 

231 """ 

232 Get the hyperparameters of the model 

233 """ 

234 # get model parameter 

235 param = self.regressor.named_steps["model"].get_params(deep=deep) 

236 

237 # just info params 

238 param['model_complexity'] = 1 

239 param['hidden_layer_sizes'] = [] 

240 return param 

241 

242 def _define_metadata(self): 

243 """ 

244 Define metadata. 

245 """ 

246 self.metadata = ModelMetadata( 

247 addmo_class=type(self).__name__, 

248 addmo_commit_id=ModelMetadata.get_commit_id(), 

249 library=sklearn.__name__, 

250 library_model_type=type(self.regressor).__name__, 

251 library_version=sklearn.__version__, 

252 target_name=self.y_fit.name, 

253 features_ordered=list(self.x_fit.columns), 

254 preprocessing=['No scaling']) 

255 

256class ScikitSVR(BaseScikitLearnModel): 

257 """Scikit-learn Support Vector Regressor (SVR) model.""" 

258 

259 def __init__(self): 

260 super().__init__(SVR()) 

261 

262 def _define_metadata(self): 

263 """ 

264 Define metadata. 

265 """ 

266 self.metadata = ModelMetadata( 

267 addmo_class=type(self).__name__, 

268 addmo_commit_id=ModelMetadata.get_commit_id(), 

269 library=sklearn.__name__, 

270 library_model_type=type(self.regressor).__name__, 

271 library_version=sklearn.__version__, 

272 target_name=self.y_fit.name, 

273 features_ordered=list(self.x_fit.columns), 

274 preprocessing=['Scaling']) 

275 

276 def grid_search_hyperparameter(self): 

277 pass 

278 

279 def optuna_hyperparameter_suggest(self, trial): 

280 """ 

281 Suggest hyperparameters for optimization. 

282 """ 

283 hyperparameters = {} 

284 

285 hyperparameters["C"] = trial.suggest_float("C", 1e-2, 1e1, log=True) #regularizer 

286 hyperparameters["epsilon"] = trial.suggest_float("epsilon", 1e-3, 1.0, log=True) #distance of tube 

287 hyperparameters["kernel"] = trial.suggest_categorical("kernel", ["linear", "poly", "rbf", "sigmoid"]) 

288 hyperparameters["tol"] = trial.suggest_float("tol", 1e-5, 1e-1, log=True) 

289 

290 # kernel-specific hyperparameters 

291 if hyperparameters["kernel"] in ["poly", "rbf", "sigmoid"]: 

292 hyperparameters["gamma"] = trial.suggest_categorical("gamma", ["scale", "auto"]) 

293 

294 if hyperparameters["kernel"] == "poly": 

295 hyperparameters["degree"] = trial.suggest_int("degree", 2, 5) 

296 hyperparameters["coef0"] = trial.suggest_float("coef0", 0.0, 1.0) 

297 

298 if hyperparameters["kernel"] == "sigmoid": 

299 hyperparameters["coef0"] = trial.suggest_float("coef0", 0.0, 1.0) 

300 

301 return hyperparameters 

302 

303 def default_hyperparameter(self): 

304 """" 

305 Return default hyperparameters. 

306 """ 

307 hyperparameter = SVR().get_params() 

308 hyperparameter["max_iter"] = 500 

309 hyperparameter["tol"] = 1e-2 

310 return hyperparameter 

311 

312 def set_params(self, hyperparameters): 

313 """ 

314 Access the hyperparameters of the model within the pipeline within the SVR. 

315 """ 

316 hyperparameters = {f"model__{key}": value for key, value in hyperparameters.items()} 

317 

318 self.regressor.set_params(**hyperparameters) 

319 

320 def get_params(self, deep=True): 

321 """ 

322 Get the hyperparameters of the model. 

323 """ 

324 param = self.regressor.get_params(deep=deep) 

325 

326 model_param = {key: value for key, value in param.items() if key.startswith("model__")} 

327 return model_param