Model Construction

ANN

`physXAI.models.ann.model_construction.ann_models`

Classes

Functions

`ClassicalANNConstruction(config: dict, td: TrainingDataGeneric)`

Constructs a classical Artificial Neural Network (ANN) model using Keras.

Parameters:

Name	Type	Description	Default
`config`	`dict`	A dictionary containing the configuration parameters for the ANN. This will be validated against `ClassicalANNConstruction_config`.	required
`td`	`TrainingDataGeneric`	An object containing the training data, used for adapting normalization and determining input/output shapes.	required

Returns:

Type	Description
	keras.Model: The constructed Keras sequential model.

Source code in physXAI/models/ann/model_construction/ann_models.py

def ClassicalANNConstruction(config: dict, td: TrainingDataGeneric):
    """
    Constructs a classical Artificial Neural Network (ANN) model using Keras.

    Args:
        config (dict): A dictionary containing the configuration parameters for the ANN.
                       This will be validated against `ClassicalANNConstruction_config`.
        td (TrainingDataGeneric): An object containing the training data,
                           used for adapting normalization and determining input/output shapes.

    Returns:
        keras.Model: The constructed Keras sequential model.
    """

    # Validate the input configuration dictionary using the Pydantic model and convert it to a dictionary
    config = ClassicalANNConstruction_config.model_validate(config).model_dump()

    # Get config
    n_layers = config['n_layers']
    n_neurons = config['n_neurons']
    # If n_neurons is a single integer, replicate it for all layers
    if isinstance(n_neurons, int):
        n_neurons = [n_neurons] * n_layers
    else:
        assert len(n_neurons) == n_layers
    n_featues = td.X_train_single.shape[1]
    activation_function = config['activation_function']
    # If activation_function is a single string, replicate it for all layers
    if isinstance(activation_function, str):
        activation_function = [activation_function] * n_layers
    else:
        assert len(activation_function) == n_layers

    # Rescaling for output layer
    rescale_mean = float(np.mean(td.y_train_single))
    rescale_sigma = float(np.std(td.y_train_single, ddof=1))

    # Build artificial neural network as Sequential
    model = keras.Sequential()

    # Add input layer
    model.add(keras.layers.Input(shape=(n_featues,)))

    # Add normalization layer
    normalization = keras.layers.Normalization()
    normalization.adapt(td.X_train_single)
    model.add(normalization)

    for i in range(0, n_layers):
        # For each layer add dense
        model.add(keras.layers.Dense(n_neurons[i], activation=activation_function[i]))
    # Add output layer
    model.add(keras.layers.Dense(1, activation='linear'))
    # Add rescaling
    if config['rescale_output']:
        model.add(keras.layers.Rescaling(scale=rescale_sigma, offset=rescale_mean))

    model.summary()

    return model

`CMNNModelConstruction(config: dict, td: TrainingDataGeneric)`

Constructs a Constrained Monotonic Neural Network (CMNN) model using Keras Functional API. This type of network can enforce monotonicity constraints on the input features.

Parameters:

Name	Type	Description	Default
`config`	`dict`	A dictionary containing the configuration parameters for the CMNN. Validated against `CMNNModelConstruction_config`.	required
`td`	`TrainingDataGeneric`	An object containing the training data, used for normalization, input shape, and determining monotonicity constraints based on column names.	required

Returns:

Type	Description
	keras.Model: The constructed Keras functional model.

Source code in physXAI/models/ann/model_construction/ann_models.py

def CMNNModelConstruction(config: dict, td: TrainingDataGeneric):
    """
    Constructs a Constrained Monotonic Neural Network (CMNN) model using Keras Functional API.
    This type of network can enforce monotonicity constraints on the input features.

    Args:
        config (dict): A dictionary containing the configuration parameters for the CMNN.
                       Validated against `CMNNModelConstruction_config`.
        td (TrainingDataGeneric): An object containing the training data, used for normalization,
                           input shape, and determining monotonicity constraints based on column names.

    Returns:
        keras.Model: The constructed Keras functional model.
    """

    # Validate the input configuration dictionary and convert it to a dictionary
    config = CMNNModelConstruction_config.model_validate(config).model_dump()

    # Get config
    n_layers = config['n_layers']
    n_neurons = config['n_neurons']
    # If n_neurons is a single integer, replicate it for all layers
    if isinstance(n_neurons, int):
        n_neurons = [n_neurons] * n_layers
    else:
        assert len(n_neurons) == n_layers
    n_featues = td.X_train_single.shape[1]
    activation_function = config['activation_function']
    # If activation_function is a single string, replicate it for all layers
    if isinstance(activation_function, str):
        activation_function = [activation_function] * n_layers
    else:
        assert len(activation_function) == n_layers

    # Get monotonicity constraints
    mono = config['monotonicities']
    if mono is None:
        monotonicities = [0] * n_featues
    else:
        monotonicities = [0 if name not in mono.keys() else mono[name] for name in td.columns]

    # Rescaling for output layer
    rescale_mean = float(np.mean(td.y_train_single))
    rescale_sigma = float(np.std(td.y_train_single, ddof=1))

    # Add input layer
    input_layer = keras.layers.Input(shape=(n_featues,))

    # Add normalization layer
    normalization = keras.layers.Normalization()
    normalization.adapt(td.X_train_single)
    x = normalization(input_layer)

    # Add dense layer
    activation_split = config['activation_split']
    # Determine activation split
    if activation_split is None:
        if mono is None:
            activation_split = [1, 0, 0]
        else:
            activation_split = [1, 1, 1]
    # First layer has partial constraints based on monotonicities
    kernel_contraint = NonNegPartial(monotonicities)
    for i in range(0, n_layers):
        x_split = list()
        # Convex activation
        if activation_split[0] > 0:
            x1 = keras.layers.Dense(int(n_neurons[i] * activation_split[0] / sum(activation_split)),
                                    activation=activation_function[i], kernel_constraint=kernel_contraint)(x)
            x_split.append(x1)
        # Concave activation
        if activation_split[1] > 0:
            x2 = keras.layers.Dense(int(n_neurons[i] * activation_split[1] / sum(activation_split)),
                                    activation=ConcaveActivation(activation_function[i]),
                                    kernel_constraint=kernel_contraint)(x)
            x_split.append(x2)
        # Saturated activation
        if activation_split[2] > 0:
            x3 = keras.layers.Dense(int(n_neurons[i] * activation_split[2] / sum(activation_split)),
                                    activation=SaturatedActivation(activation_function[i]),
                                    kernel_constraint=kernel_contraint)(x)
            x_split.append(x3)
        # Concatenate activations
        if len(x_split) > 1:
            x = keras.layers.concatenate(x_split)
        else:
            x = x_split[0]

        # after monotonicity constraint was applied,
        # in all following layers the weights have to be non-neg to maintain the monotonicity
        kernel_contraint = keras.constraints.NonNeg()

    # Add output layer
    x = keras.layers.Dense(1, activation='linear', kernel_constraint=keras.constraints.NonNeg())(x)

    # Add rescaling
    if config['rescale_output']:
        x = keras.layers.Rescaling(scale=rescale_sigma, offset=rescale_mean)(x)

    # # Add min / max constraints
    # min_value = config['min_value']
    # max_value = config['max_value']
    # if min_value is not None or max_value is not None:
    #     d = keras.layers.Dense(1, activation=LimitedActivation(max_value, min_value),
    #                            kernel_initializer=keras.initializers.Ones(), use_bias=False)
    #     d.trainable = False
    #     x = d(x)

    model = keras.models.Model(inputs=input_layer, outputs=x)

    model.summary()

    return model

Radial Basis Function Network (RBF)

`physXAI.models.ann.model_construction.rbf_models`

Classes

Functions

`RBFModelConstruction(config: dict, td: TrainingDataGeneric)`

Constructs a Radial Basis Function (RBF) Network model using Keras.

The first RBF layer's centers can be initialized using K-Means clustering on the training data. Subsequent RBF layers (if any) will have their centers initialized by the RBFLayer's default mechanism or as specified.

Parameters:

Name	Type	Description	Default
`config`	`dict`	A dictionary containing the configuration parameters for the RBF network. This is validated against `ClassicalANNConstruction_config`.	required
`td`	`TrainingDataGeneric`	An object containing the training data, used for adapting normalization, K-Means clustering, and determining input/output shapes.	required

Returns:

Type	Description
	keras.Model: The constructed Keras functional model representing the RBF network.

Source code in physXAI/models/ann/model_construction/rbf_models.py

def RBFModelConstruction(config: dict, td: TrainingDataGeneric):
    """
    Constructs a Radial Basis Function (RBF) Network model using Keras.

    The first RBF layer's centers can be initialized using K-Means clustering
    on the training data. Subsequent RBF layers (if any) will have their
    centers initialized by the RBFLayer's default mechanism or as specified.

    Args:
        config (dict): A dictionary containing the configuration parameters for the RBF network.
                       This is validated against `ClassicalANNConstruction_config`.
        td (TrainingDataGeneric): An object containing the training data,
                           used for adapting normalization, K-Means clustering, and
                           determining input/output shapes.

    Returns:
        keras.Model: The constructed Keras functional model representing the RBF network.
    """

    # Validate the input configuration dictionary using the Pydantic model and convert it to a dictionary
    config = RBFConstruction_config.model_validate(config).model_dump()

    # Get config
    n_layers = config['n_layers']
    n_neurons = config['n_neurons']
    # If n_neurons is a single integer, replicate it for all layers
    if isinstance(n_neurons, int):
        n_neurons = [n_neurons] * n_layers
    else:
        assert len(n_neurons) == n_layers
    n_featues = td.X_train_single.shape[1]

    # Rescaling for output layer
    # Custom rescaling
    if 'rescale_scale' in config.keys() or 'rescale_offset' in config.keys():
        raise ValueError(
            "The 'rescale_scale' and 'rescale_offset' parameters are deprecated. "
            "Scaling has changed from min/max to standardization (z-score normalization using mean=0, std=1). "
            "Please use 'rescale_mean' and 'rescale_sigma' instead."
        )
    if 'rescale_sigma' in config.keys() and config['rescale_sigma'] is not None:
        if 'rescale_mean' in config.keys() and config['rescale_mean'] is not None:
            rescale_mean = config['rescale_mean']
        else:
            rescale_mean = 0
        rescale_sigma = config['rescale_sigma']
    # Standard rescaling
    else:
        rescale_mean = float(np.mean(td.y_train_single))
        rescale_sigma = float(np.std(td.y_train_single, ddof=1))

    # Add input layer
    input_layer = keras.layers.Input(shape=(n_featues,))
    # Add normalization layer
    normalization = keras.layers.Normalization()
    normalization.adapt(td.X_train_single)
    x = normalization(input_layer)

    for i in range(0, n_layers):
        # For each layer add RBF

        # Determine initial rbf centers
        if i == 0:
            # Apply KMeans Clustering for rbf centers
            kmeans = KMeans(n_clusters=n_neurons[i], random_state=config['random_state'], n_init='auto')
            kmeans.fit(normalization(td.X_train_single))
            initial_centers_kmeans = kmeans.cluster_centers_
            x = RBFLayer(n_neurons[i], initial_centers=initial_centers_kmeans, gamma=1)(x)
        else:
            x = RBFLayer(n_neurons[i], gamma=1)(x)

    # Add output layer
    x = keras.layers.Dense(1, activation='linear')(x)

    # Add rescaling
    if config['rescale_output']:
        x = keras.layers.Rescaling(scale=rescale_sigma, offset=rescale_mean)(x)

    model = keras.Model(inputs=input_layer, outputs=x)

    model.summary()

    return model

Residual Model

`physXAI.models.ann.model_construction.residual_models`

Classes

Functions

`LinResidualANNConstruction(config: dict, td: TrainingDataGeneric, lin_model: LinearRegression)`

Constructs a hybrid Keras model that combines a pre-trained linear regression model with a Radial Basis Function (RBF) network. The RBF network is trained to model the residuals (errors) of the linear regression model.

The final prediction is the sum of the linear model's prediction and the RBF network's prediction (which learns the residuals).

Parameters:

Name	Type	Description	Default
`config`	`dict`	A dictionary containing configuration parameters, primarily for the RBF network part of the model. This config will be modified to set rescaling parameters for the RBF network based on the linear model's residuals.	required
`td`	`TrainingDataGeneric`	An object containing the training data. Used to calculate residuals and for the RBF model construction.	required
`lin_model`	`LinearRegression`	A pre-trained scikit-learn LinearRegression model.	required

Returns:

Type	Description
	keras.Model: The constructed Keras functional model, which combines the linear model and the residual-fitting RBF network.

Source code in physXAI/models/ann/model_construction/residual_models.py

def LinResidualANNConstruction(config: dict, td: TrainingDataGeneric, lin_model: LinearRegression):
    """
        Constructs a hybrid Keras model that combines a pre-trained linear regression model
        with a Radial Basis Function (RBF) network. The RBF network is trained to model
        the residuals (errors) of the linear regression model.

        The final prediction is the sum of the linear model's prediction and the RBF network's
        prediction (which learns the residuals).

        Args:
            config (dict): A dictionary containing configuration parameters, primarily for
                           the RBF network part of the model. This config will be modified
                           to set rescaling parameters for the RBF network based on the
                           linear model's residuals.
            td (TrainingDataGeneric): An object containing the training data.
                               Used to calculate residuals and for the RBF model construction.
            lin_model (sklearn.linear_model.LinearRegression): A pre-trained scikit-learn
                                                               LinearRegression model.

        Returns:
            keras.Model: The constructed Keras functional model, which combines the linear
                         model and the residual-fitting RBF network.
    """

    # Determine predictions of linear regression for rescaling
    y_train_pred = lin_model.predict(td.X_train_single)
    config['rescale_sigma'] = float(np.std(td.y_train_single - y_train_pred, ddof=1))
    config['rescale_mean'] = float(np.mean(td.y_train_single - y_train_pred))

    # Add linear regression as dense keras layer
    lin = keras.layers.Dense(1, activation='linear')

    # Add input layer
    n_featues = td.X_train_single.shape[1]
    inputs = keras.layers.Input(shape=(n_featues,))

    # Construct rbf model
    rbf_model = RBFModelConstruction(config, td)

    # Combine linear layer and rbf model
    output = keras.layers.Add()([rbf_model(inputs), lin(inputs)])

    # Create model
    model = keras.Model(inputs, output)

    # Fix the weights of linear regression
    lin.set_weights([lin_model.coef_.reshape(-1, 1), np.array(lin_model.intercept_)])
    lin.trainable = False

    model.summary()

    return model

RNN

`physXAI.models.ann.model_construction.rnn_models`

Classes

Functions

`RNNModelConstruction(config: dict, td: TrainingDataMultiStep)`

Constructs a Recurrent Neural Network (RNN) model for multi-step time series forecasting. The model can optionally use a "warmup" sequence to initialize the RNN's hidden state.

Parameters:

Name	Type	Description	Default
`config`	`dict`	A dictionary containing configuration parameters for the RNN model. Validated against `RNNModelConstruction_config`.	required
`td`	`TrainingDataMultiStep`	An object containing the multi-step training data. It provides shapes for inputs, outputs, and warmup sequences.	required

Returns:

Type	Description
	keras.Model: The constructed Keras functional model for RNN-based forecasting.

Source code in physXAI/models/ann/model_construction/rnn_models.py

def RNNModelConstruction(config: dict, td: TrainingDataMultiStep):
    """
    Constructs a Recurrent Neural Network (RNN) model for multi-step time series forecasting.
    The model can optionally use a "warmup" sequence to initialize the RNN's hidden state.

    Args:
        config (dict): A dictionary containing configuration parameters for the RNN model.
                       Validated against `RNNModelConstruction_config`.
        td (TrainingDataMultiStep): An object containing the multi-step training data.
                                    It provides shapes for inputs, outputs, and warmup sequences.

    Returns:
        keras.Model: The constructed Keras functional model for RNN-based forecasting.
    """

    # Validate the input configuration dictionary using the Pydantic model and convert it to a dictionary
    config = RNNModelConstruction_config.model_validate(config).model_dump()

    # Get boundary conditions from training data
    # With initialization data
    if isinstance(td.X_train, tuple):
        warmup = True
        out_steps = td.X_train[0].shape[1]
        warmup_width = td.X_train[1].shape[1]
        num_features = td.X_train[0].shape[2]
        num_warmup_features = td.X_train[1].shape[2]
    # Without initialization data
    else:
        warmup = False
        out_steps = td.X_train.shape[1]
        warmup_width = 0
        num_features = td.X_train.shape[2]
        num_warmup_features = 0
    num_outputs = td.y_train.shape[2]

    # Get config
    rnn_units = config['rnn_units']
    init_layer = config['init_layer']
    rnn_layer = config['rnn_layer']

    # Rescaling for output layer
    rescale_mean = keras.ops.cast(keras.ops.mean(td.y_train), dtype="float32")
    rescale_sigma = keras.ops.cast(keras.ops.std(td.y_train), dtype="float32")

    # Input layer
    inputs = keras.Input(shape=(out_steps, num_features))

    # Output rnn model
    o_model = out_model(td.X_train[0].reshape(-1, num_features), num_features, rnn_layer, rnn_units,  num_outputs,
                        rescale_mean, rescale_sigma)

    # Warmup
    if warmup:
        # Create warmup model
        initial_value_layer = keras.Input(shape=(warmup_width, num_warmup_features))
        int_model = init_model(td.X_train[1].reshape(-1, num_warmup_features), warmup_width, num_warmup_features,
                               init_layer, rnn_layer, rnn_units)
        state = int_model(initial_value_layer)

    # No warmup
    else:
        # Initialize models with zeros
        initial_value_layer = None
        int_model = init_zeros(num_features, rnn_units, out_steps)
        if rnn_layer == "LSTM":
            state = [int_model(inputs), int_model(inputs)]
        else:
            state = [int_model(inputs)]

    # Get output predictions
    prediction, *_ = o_model([inputs, state])

    # Reshape output
    outputs = keras.layers.Reshape((out_steps, num_outputs))(prediction)

    # Define the model
    if warmup:
        model = keras.Model([inputs, initial_value_layer], outputs)
    else:
        model = keras.Model(inputs, outputs)

    model.summary()
    if warmup:
        int_model.summary()
    o_model.summary()

    return model

`init_model(warmup_df: np.ndarray, warmup_width: int, num_warmup_features: int, init_layer: str, rnn_layer: str, rnn_units: int)`

Creates a Keras model to initialize the RNN state using a warmup sequence.

Parameters:

Name	Type	Description	Default
`warmup_df`	`ndarray`	The warmup sequence data used to adapt the normalization layer. Shape (samples, warmup_width, num_warmup_features).	required
`warmup_width`	`int`	Number of time steps in the warmup sequence.	required
`num_warmup_features`	`int`	Number of features in the warmup sequence.	required
`init_layer`	`str`	Type of layer to process the warmup sequence ('dense', 'GRU', 'RNN', 'LSTM').	required
`rnn_layer`	`str`	Type of the main RNN layer ('LSTM', 'GRU', 'RNN'), used to determine the number of state tensors needed if init_layer_type is 'dense'.	required
`rnn_units`	`int`	Number of units for the RNN/Dense layers used in initialization.	required

Returns:

Type	Description
	keras.Model: A Keras model that takes a warmup sequence and returns the initial RNN state(s).

Source code in physXAI/models/ann/model_construction/rnn_models.py

def init_model(warmup_df: np.ndarray, warmup_width: int, num_warmup_features: int, init_layer: str,
               rnn_layer: str, rnn_units: int):
    """
    Creates a Keras model to initialize the RNN state using a warmup sequence.

    Args:
        warmup_df (np.ndarray): The warmup sequence data  used to adapt the normalization layer.
                                Shape (samples, warmup_width, num_warmup_features).
        warmup_width (int): Number of time steps in the warmup sequence.
        num_warmup_features (int): Number of features in the warmup sequence.
        init_layer (str): Type of layer to process the warmup sequence ('dense', 'GRU', 'RNN', 'LSTM').
        rnn_layer (str): Type of the main RNN layer ('LSTM', 'GRU', 'RNN'), used to determine
                             the number of state tensors needed if init_layer_type is 'dense'.
        rnn_units (int): Number of units for the RNN/Dense layers used in initialization.

    Returns:
        keras.Model: A Keras model that takes a warmup sequence and returns the initial RNN state(s).
    """

    # Input layer
    inputs = keras.Input(shape=(warmup_width, num_warmup_features))

    # Normalization layer
    normalization_layer = keras.layers.Normalization()
    normalization_layer.adapt(warmup_df)
    normalized_inputs = normalization_layer(inputs)
    normalized_inputs = keras.layers.Reshape((warmup_width, num_warmup_features))(normalized_inputs)

    # Init layer
    if init_layer == 'dense':
        dense_init = keras.layers.Dense(units=rnn_units, activation='softplus')
        normalized_inputs = keras.layers.Flatten()(normalized_inputs)
        if rnn_layer == 'LSTM':  # For LSTM, creating two Dense layers
            dense_init2 = keras.layers.Dense(units=rnn_units, activation='softplus')
            state = [dense_init(normalized_inputs), dense_init2(normalized_inputs)]
        else:
            state = [dense_init(normalized_inputs)]
    elif init_layer == 'GRU':
        rnn_init = keras.layers.GRU(units=rnn_units, return_state=True)
        _, *state = rnn_init(normalized_inputs)
    elif init_layer == 'RNN':
        rnn_init = keras.layers.SimpleRNN(units=rnn_units, return_state=True)
        _, *state = rnn_init(normalized_inputs)
    elif init_layer == "LSTM":
        rnn_init = keras.layers.LSTM(units=rnn_units, return_state=True)
        _, *state = rnn_init(normalized_inputs)
    else:
        raise NotImplementedError(f'Not implemented {init_layer}')

    return keras.Model(inputs, state, name='init_model')

`init_zeros(num_features: int, rnn_units: int, out_steps: int)`

Creates a Keras model that generates a zero initial state for an RNN. The state's batch size dimension will match the input batch size.

Parameters:

Name	Type	Description	Default
`num_features`	`int`	Number of features in the main input sequence (used by the input layer).	required
`rnn_units`	`int`	The number of units in the RNN, determining the size of the zero state.	required
`out_steps`	`int`	Number of time steps in the main input sequence.	required

Returns:

Type	Description
	keras.Model: A Keras model that takes a dummy input (main sequence shape) and returns a zero tensor suitable as an initial RNN hidden state.

Source code in physXAI/models/ann/model_construction/rnn_models.py

def init_zeros(num_features: int, rnn_units: int, out_steps: int):
    """
    Creates a Keras model that generates a zero initial state for an RNN.
    The state's batch size dimension will match the input batch size.

    Args:
        num_features (int): Number of features in the main input sequence (used by the input layer).
        rnn_units (int): The number of units in the RNN, determining the size of the zero state.
        out_steps (int): Number of time steps in the main input sequence.

    Returns:
        keras.Model: A Keras model that takes a dummy input (main sequence shape) and
                     returns a zero tensor suitable as an initial RNN hidden state.
    """
    initial_value_layer = keras.Input(shape=(out_steps, num_features))
    crop = keras.layers.Cropping1D(cropping=(0, out_steps-1))
    dense_zeros = keras.layers.Dense(rnn_units, activation='linear', use_bias=False,
                                     kernel_initializer=keras.initializers.Zeros())
    dense_zeros.trainable = False
    cropped = crop(initial_value_layer)
    zeros = keras.layers.Reshape((1, num_features))(cropped)
    zeros = keras.layers.Flatten()(zeros)
    zeros = dense_zeros(zeros)
    return keras.Model(inputs=initial_value_layer, outputs=zeros, name='init_zeros')

`out_model(inputs_df: np.ndarray, num_features: int, rnn_layer: str, rnn_units: int, num_outputs: int, rescale_mean: float, rescale_sigma: float)`

Creates the main Keras model that processes an input sequence with an initial RNN state to produce predictions and the final RNN state.

Parameters:

Name	Type	Description	Default
`inputs_df`	`ndarray`	The main input sequence data used to adapt the normalization layer. Shape (samples, steps, features).	required
`num_features`	`int`	Number of features in the main input sequence.	required
`rnn_layer`	`str`	Type of RNN layer to use ('GRU', 'RNN', 'LSTM').	required
`rnn_units`	`int`	Number of units in the RNN layer.	required
`num_outputs`	`int`	Number of output features to predict at each time step.	required
`rescale_mean`	`float`	Mean value for denormalizing the model's normalized predictions back to the original scale (used as the offset in the Rescaling layer; inverse z-score transformation: value * sigma + mean).	required
`rescale_sigma`	`float`	Standard deviation for denormalizing the model's normalized predictions back to the original scale (used as the scale in the Rescaling layer; inverse z-score transformation: value * sigma + mean).	required

Returns:

Type	Description
	keras.Model: A Keras model that takes [main_input_sequence, initial_state(s)] and returns [prediction_sequence, final_state(s)].

Source code in physXAI/models/ann/model_construction/rnn_models.py

def out_model(inputs_df: np.ndarray, num_features: int, rnn_layer: str, rnn_units: int, num_outputs: int,
              rescale_mean: float, rescale_sigma: float):
    """
    Creates the main Keras model that processes an input sequence with an initial RNN state
    to produce predictions and the final RNN state.

    Args:
        inputs_df (np.ndarray): The main input sequence data used to adapt the normalization layer.
                                Shape (samples, steps, features).
        num_features (int): Number of features in the main input sequence.
        rnn_layer (str): Type of RNN layer to use ('GRU', 'RNN', 'LSTM').
        rnn_units (int): Number of units in the RNN layer.
        num_outputs (int): Number of output features to predict at each time step.
        rescale_mean (float): Mean value for denormalizing the model's normalized predictions back to the original scale (used as the offset in the Rescaling layer; inverse z-score transformation: value * sigma + mean).
        rescale_sigma (float): Standard deviation for denormalizing the model's normalized predictions back to the original scale (used as the scale in the Rescaling layer; inverse z-score transformation: value * sigma + mean).

    Returns:
        keras.Model: A Keras model that takes [main_input_sequence, initial_state(s)]
                     and returns [prediction_sequence, final_state(s)].
    """
    # Input layer
    inputs = keras.Input(shape=(None, num_features))

    # Normalization layer
    normalization_layer = keras.layers.Normalization()
    normalization_layer.adapt(inputs_df)
    normalized_inputs = normalization_layer(inputs)

    # RNN layer
    if rnn_layer == "GRU":
        input_init = keras.Input(shape=(rnn_units,))
        rnn = keras.layers.GRU(rnn_units, return_state=True, return_sequences=True)
    elif rnn_layer == "RNN":
        input_init = keras.Input(shape=(rnn_units,))
        rnn = keras.layers.SimpleRNN(rnn_units, return_state=True, return_sequences=True)
    elif rnn_layer == "LSTM":
        input_init = [keras.Input(shape=(rnn_units,)) for _ in range(2)]  # List of two inputs for LSTM states
        rnn = keras.layers.LSTM(rnn_units, return_state=True, return_sequences=True)
    else:
        raise NotImplementedError(f'Not implemented {rnn_layer}')

    # Predict outputs and states
    pred, *state = rnn(normalized_inputs, initial_state=input_init)

    # Final dense Layer
    dense = keras.layers.Dense(num_outputs)
    pred = dense(pred)

    # Rescaling layer
    rescaling_layer = keras.layers.Rescaling(scale=rescale_sigma, offset=rescale_mean)
    pred = rescaling_layer(pred)

    return keras.Model([inputs, input_init], [pred, *state], name='out_model')