Coverage for addmo/s3_model_tuning/models/abstract_model.py: 65%

104 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2025-08-31 13:05 +0000

1import os 

2import json 

3import warnings 

4import onnxruntime as rt 

5import numpy as np 

6import subprocess 

7from abc import ABC, abstractmethod 

8import pandas as pd 

9from pydantic import BaseModel, Field 

10from addmo.util.load_save_utils import create_path_or_ask_to_override 

11 

12 

13class ModelMetadata(BaseModel): 

14 """ModelMetadata class represents metadata associated with the trained machine 

15 learning model when saved in joblib format.""" 

16 

17 addmo_class: str = Field( 

18 description="ADDMo model class type, from which the regressor was saved." 

19 ) 

20 addmo_commit_id: str = Field( 

21 description="Current commit id when the model is saved." 

22 ) 

23 library: str = Field(description="ML library origin of the regressor") 

24 library_model_type: str = Field(description="Type of regressor within library") 

25 library_version: str = Field(description="library version used") 

26 target_name: str = Field(description="Name of the target variable") 

27 features_ordered: list = Field(description="Name and order of features") 

28 preprocessing: list = Field( 

29 description="Preprocessing steps applied to the features." 

30 ) 

31 instructions: str = Field( 

32 "Pass a single or multiple observations with features in the order listed above", 

33 description="Instructions for passing input system_data for making predictions.", 

34 ) 

35 

36 @staticmethod 

37 def get_commit_id(): 

38 """Get the commit id for metadata when model is saved. """ 

39 

40 try: 

41 commit_id = subprocess.check_output(["git", "describe", "--always"]).strip().decode() 

42 except subprocess.CalledProcessError: 

43 commit_id = 'Unknown' 

44 return commit_id 

45 

46 

47class AbstractMLModel(ABC): 

48 """ 

49 Abstract base class for machine learning models. 

50 

51 This class provides an interface for all machine learning models, potentially including 

52 a scaler. 

53 

54 Attributes: 

55 regressor: An instance of the machine learning model, usually including the scaler. 

56 """ 

57 

58 @abstractmethod 

59 def __init__(self): 

60 """Initializes the machine learning model.""" 

61 self.regressor = None 

62 self.x_fit: pd.DataFrame = None 

63 self.y_fit: pd.DataFrame = None 

64 

65 @abstractmethod 

66 def fit(self, x: pd.DataFrame, y: pd.Series): 

67 """ 

68 Train the model on the provided system_data. 

69 

70 Args: 

71 x: Features used for training. 

72 y: Target values used for training. 

73 """ 

74 pass 

75 

76 @abstractmethod 

77 def predict(self, x: pd.DataFrame): 

78 """ 

79 Make predictions on the given input system_data. 

80 

81 Args: 

82 x: Input system_data for making predictions. 

83 

84 Returns: 

85 Predicted values, scaled back to the original scale if applicable. 

86 """ 

87 pass 

88 

89 @abstractmethod 

90 def save_regressor(self, directory, regressor_filename, file_type): 

91 """"" 

92 Save the trained model and metadata to the specified file path in the given file format. 

93 

94 Args: 

95 directory: directory where the trained model is saved. 

96 regressor_filename: file name used for saving the model. 

97 file_type: file type used for saving the model. 

98 """ 

99 pass 

100 

101 @abstractmethod 

102 def _define_metadata(self) -> ModelMetadata: 

103 """ 

104 Define metadata for the model. 

105 """ 

106 pass 

107 

108 def _save_metadata(self, directory, regressor_filename): 

109 """ 

110 Save metadata for the model. To be saved with save_regressor as json file. 

111 

112 Args: 

113 directory: directory where the trained model is saved. 

114 regressor_filename: file name used for saving the model. 

115 """ 

116 metadata_path = create_path_or_ask_to_override(regressor_filename + '_metadata.json', directory) 

117 with open(metadata_path, 'w') as f: 

118 json.dump(self.metadata.dict(), f) 

119 

120 def load_regressor(self, model_instance, input_shape=None): 

121 """ 

122 Load a model including scaler. 

123 

124 Args: 

125 model_instance: model that is loaded. 

126 input_shape: input system_data shape which is used to initialize loaded model. 

127 """ 

128 self.regressor = model_instance 

129 

130 @abstractmethod 

131 def to_scikit_learn(self, x=None): 

132 """ 

133 Convert the model including scaler to a scikit-learn compatible model. 

134 E.g. a scikit-learn pipeline. 

135 

136 Most ML frameworks provide a converter to adapt models for scikit-learn specific tasks. 

137 

138 Args: 

139 x: Input system_data used for building the regressor. (Only needed for Keras) 

140 

141 Returns: 

142 A scikit-learn compatible version of the model including scaler. 

143 """ 

144 pass 

145 

146 @abstractmethod 

147 def set_params(self, **params): 

148 """ 

149 Set the hyperparameters of the ML model. 

150 

151 Args: 

152 **params: Variable length keyword arguments for hyperparameters. 

153 """ 

154 pass 

155 

156 @abstractmethod 

157 def get_params(self): 

158 """ 

159 Get the hyperparameters of the ML model. 

160 

161 Returns: 

162 A dictionary of the current hyperparameters. 

163 """ 

164 pass 

165 

166 @abstractmethod 

167 def optuna_hyperparameter_suggest(self, trial): 

168 """ 

169 Suggest hyperparameters using Optuna for hyperparameter optimization. 

170 

171 Args: 

172 trial: An Optuna trial object used to suggest hyperparameters. 

173 

174 Returns: 

175 A dictionary of hyperparameters with Optuna distributions. 

176 """ 

177 pass 

178 

179 @abstractmethod 

180 def grid_search_hyperparameter(self): 

181 """ 

182 Define the hyperparameters for grid search. 

183 

184 Returns: 

185 A dictionary representing a hyperparameter grid for grid search. 

186 """ 

187 pass 

188 

189 @abstractmethod 

190 def default_hyperparameter(self): 

191 """ 

192 Define the default hyperparameters of the model. 

193 

194 Returns: 

195 A dictionary with a default set of hyperparameters. 

196 """ 

197 pass 

198 

199 

200class PredictorOnnx(AbstractMLModel, ABC): 

201 """overwrites predict and load function for onnx format""" 

202 

203 def __init__(self): 

204 super().__init__() 

205 self.labels = None 

206 self.inputs = None 

207 self.model = None 

208 

209 def load_regressor(self, path): 

210 self.model = rt.InferenceSession(path, providers=["CPUExecutionProvider"]) 

211 self.inputs = self.model.get_inputs()[0].name 

212 self.labels = self.model.get_outputs()[0].name 

213 

214 def predict(self, x): 

215 x_ONNX = x.values # Converts dataframe to numpy array 

216 return self.model.run([self.labels], {self.inputs: x_ONNX.astype(np.double)})[0] 

217 

218 def default_hyperparameter(self): 

219 warnings.warn(f"This function is not implemented for ONNX models") 

220 

221 def fit(self, x, y): 

222 warnings.warn(f"This function is not implemented for ONNX models") 

223 

224 def get_params(self): 

225 warnings.warn(f"This function is not implemented for ONNX models") 

226 

227 def grid_search_hyperparameter(self): 

228 warnings.warn(f"This function is not implemented for ONNX models") 

229 

230 def optuna_hyperparameter_suggest(self, trial): 

231 warnings.warn(f"This function is not implemented for ONNX models") 

232 

233 def save_regressor(self, path): 

234 warnings.warn(f"This function is not implemented for ONNX models") 

235 

236 def set_params(self, **params): 

237 warnings.warn(f"This function is not implemented for ONNX models") 

238 

239 def to_scikit_learn(self): 

240 warnings.warn(f"This function is not implemented for ONNX models") 

241 

242 def _define_metadata(self): 

243 warnings.warn(f"This function is not implemented for ONNX models") 

244 

245 def _save_regressor(self, path, file_type): 

246 warnings.warn(f"This function is not implemented for ONNX models") 

247 

248 def default_file_type(self): 

249 warnings.warn(f"This function is not implemented for ONNX models")