Coverage for addmo/s3_model_tuning/models/abstract_model.py: 65%
104 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-08-31 13:05 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-08-31 13:05 +0000
1import os
2import json
3import warnings
4import onnxruntime as rt
5import numpy as np
6import subprocess
7from abc import ABC, abstractmethod
8import pandas as pd
9from pydantic import BaseModel, Field
10from addmo.util.load_save_utils import create_path_or_ask_to_override
13class ModelMetadata(BaseModel):
14 """ModelMetadata class represents metadata associated with the trained machine
15 learning model when saved in joblib format."""
17 addmo_class: str = Field(
18 description="ADDMo model class type, from which the regressor was saved."
19 )
20 addmo_commit_id: str = Field(
21 description="Current commit id when the model is saved."
22 )
23 library: str = Field(description="ML library origin of the regressor")
24 library_model_type: str = Field(description="Type of regressor within library")
25 library_version: str = Field(description="library version used")
26 target_name: str = Field(description="Name of the target variable")
27 features_ordered: list = Field(description="Name and order of features")
28 preprocessing: list = Field(
29 description="Preprocessing steps applied to the features."
30 )
31 instructions: str = Field(
32 "Pass a single or multiple observations with features in the order listed above",
33 description="Instructions for passing input system_data for making predictions.",
34 )
36 @staticmethod
37 def get_commit_id():
38 """Get the commit id for metadata when model is saved. """
40 try:
41 commit_id = subprocess.check_output(["git", "describe", "--always"]).strip().decode()
42 except subprocess.CalledProcessError:
43 commit_id = 'Unknown'
44 return commit_id
47class AbstractMLModel(ABC):
48 """
49 Abstract base class for machine learning models.
51 This class provides an interface for all machine learning models, potentially including
52 a scaler.
54 Attributes:
55 regressor: An instance of the machine learning model, usually including the scaler.
56 """
58 @abstractmethod
59 def __init__(self):
60 """Initializes the machine learning model."""
61 self.regressor = None
62 self.x_fit: pd.DataFrame = None
63 self.y_fit: pd.DataFrame = None
65 @abstractmethod
66 def fit(self, x: pd.DataFrame, y: pd.Series):
67 """
68 Train the model on the provided system_data.
70 Args:
71 x: Features used for training.
72 y: Target values used for training.
73 """
74 pass
76 @abstractmethod
77 def predict(self, x: pd.DataFrame):
78 """
79 Make predictions on the given input system_data.
81 Args:
82 x: Input system_data for making predictions.
84 Returns:
85 Predicted values, scaled back to the original scale if applicable.
86 """
87 pass
89 @abstractmethod
90 def save_regressor(self, directory, regressor_filename, file_type):
91 """""
92 Save the trained model and metadata to the specified file path in the given file format.
94 Args:
95 directory: directory where the trained model is saved.
96 regressor_filename: file name used for saving the model.
97 file_type: file type used for saving the model.
98 """
99 pass
101 @abstractmethod
102 def _define_metadata(self) -> ModelMetadata:
103 """
104 Define metadata for the model.
105 """
106 pass
108 def _save_metadata(self, directory, regressor_filename):
109 """
110 Save metadata for the model. To be saved with save_regressor as json file.
112 Args:
113 directory: directory where the trained model is saved.
114 regressor_filename: file name used for saving the model.
115 """
116 metadata_path = create_path_or_ask_to_override(regressor_filename + '_metadata.json', directory)
117 with open(metadata_path, 'w') as f:
118 json.dump(self.metadata.dict(), f)
120 def load_regressor(self, model_instance, input_shape=None):
121 """
122 Load a model including scaler.
124 Args:
125 model_instance: model that is loaded.
126 input_shape: input system_data shape which is used to initialize loaded model.
127 """
128 self.regressor = model_instance
130 @abstractmethod
131 def to_scikit_learn(self, x=None):
132 """
133 Convert the model including scaler to a scikit-learn compatible model.
134 E.g. a scikit-learn pipeline.
136 Most ML frameworks provide a converter to adapt models for scikit-learn specific tasks.
138 Args:
139 x: Input system_data used for building the regressor. (Only needed for Keras)
141 Returns:
142 A scikit-learn compatible version of the model including scaler.
143 """
144 pass
146 @abstractmethod
147 def set_params(self, **params):
148 """
149 Set the hyperparameters of the ML model.
151 Args:
152 **params: Variable length keyword arguments for hyperparameters.
153 """
154 pass
156 @abstractmethod
157 def get_params(self):
158 """
159 Get the hyperparameters of the ML model.
161 Returns:
162 A dictionary of the current hyperparameters.
163 """
164 pass
166 @abstractmethod
167 def optuna_hyperparameter_suggest(self, trial):
168 """
169 Suggest hyperparameters using Optuna for hyperparameter optimization.
171 Args:
172 trial: An Optuna trial object used to suggest hyperparameters.
174 Returns:
175 A dictionary of hyperparameters with Optuna distributions.
176 """
177 pass
179 @abstractmethod
180 def grid_search_hyperparameter(self):
181 """
182 Define the hyperparameters for grid search.
184 Returns:
185 A dictionary representing a hyperparameter grid for grid search.
186 """
187 pass
189 @abstractmethod
190 def default_hyperparameter(self):
191 """
192 Define the default hyperparameters of the model.
194 Returns:
195 A dictionary with a default set of hyperparameters.
196 """
197 pass
200class PredictorOnnx(AbstractMLModel, ABC):
201 """overwrites predict and load function for onnx format"""
203 def __init__(self):
204 super().__init__()
205 self.labels = None
206 self.inputs = None
207 self.model = None
209 def load_regressor(self, path):
210 self.model = rt.InferenceSession(path, providers=["CPUExecutionProvider"])
211 self.inputs = self.model.get_inputs()[0].name
212 self.labels = self.model.get_outputs()[0].name
214 def predict(self, x):
215 x_ONNX = x.values # Converts dataframe to numpy array
216 return self.model.run([self.labels], {self.inputs: x_ONNX.astype(np.double)})[0]
218 def default_hyperparameter(self):
219 warnings.warn(f"This function is not implemented for ONNX models")
221 def fit(self, x, y):
222 warnings.warn(f"This function is not implemented for ONNX models")
224 def get_params(self):
225 warnings.warn(f"This function is not implemented for ONNX models")
227 def grid_search_hyperparameter(self):
228 warnings.warn(f"This function is not implemented for ONNX models")
230 def optuna_hyperparameter_suggest(self, trial):
231 warnings.warn(f"This function is not implemented for ONNX models")
233 def save_regressor(self, path):
234 warnings.warn(f"This function is not implemented for ONNX models")
236 def set_params(self, **params):
237 warnings.warn(f"This function is not implemented for ONNX models")
239 def to_scikit_learn(self):
240 warnings.warn(f"This function is not implemented for ONNX models")
242 def _define_metadata(self):
243 warnings.warn(f"This function is not implemented for ONNX models")
245 def _save_regressor(self, path, file_type):
246 warnings.warn(f"This function is not implemented for ONNX models")
248 def default_file_type(self):
249 warnings.warn(f"This function is not implemented for ONNX models")