Source code for agentlib_mpc.machine_learning_plugins.physXAI.model_generation

import importlib
import json
import os
import pathlib
import shutil
from typing import Union
from agentlib_mpc.machine_learning_plugins.physXAI.model_config_creation import physXAI_2_agentlib_json
from agentlib.core.errors import OptionalDependencyError
try:
    from physXAI import models  # Keep this import to ensure physXAI models are registered
except ImportError:
    raise OptionalDependencyError(dependency_name="physXAI", dependency_install="git+https://github.com/RWTH-EBC/physXAI.git", used_object="physXAI")


model_save_path_rel: str = 'models'  # Relative path in agentlib_mpc to save machine learning models


[docs]def use_existing_models(old_id: str, new_id: str, model_save_path: str) -> list[str]:
    """Use existing physXAI models by copying them to a new folder with a new run_id.
    
    Args:
        old_id (str): Existing model run identifier
        new_id (str): New model run identifier
        model_save_path (str): Path where models are saved
    Returns:
        List[str]: List of generated model file paths
    """
    new_path = pathlib.Path(os.path.join(model_save_path, new_id))
    os.makedirs(new_path, exist_ok=True)

    old_path = pathlib.Path(os.path.join(model_save_path, old_id))
    if not old_path.is_dir():
        raise ValueError(f"Error: If a single string is given, it is assumed to be an id to an existing model folder. {str(old_path)} is not a valid directory.")

    try:
        shutil.copytree(old_path, new_path, dirs_exist_ok=True)
    except Exception as e:
        print(f"An error occurred: {e}")

    file_names = [str(p) for p in new_path.glob('*.json') if p.is_file()]
    return file_names


[docs]def generate_physxai_model(models: Union[list[str], dict[str, str], str], physXAI_scripts_path: str,
                           training_data_path: str, run_id: str, time_step: int = 900) -> list[str]:
    """Generate physXAI models

    Args:
        models (Union[list[str], dict[str, str], str]): Define Models to be generated by physXAI.
            If a single string is given, it is assumed to be an id to an existing model folder. In this case, the existing models are copied to a new folder with the given new run_id.
            If a list of strings is given, each string is assumed to be a physXAI script filename (with or without .py ending) to be executed for model training. The output model names will be determined by the physXAI scripts.
            If a dict is given, each key is the desired output model name, and each value is the physXAI script filename (with or without .py ending) to be executed for model training.
        physXAI_scripts_path (str): Base path to physXAI scripts
        training_data_path (str): Path to training data csv file
        run_id (str): Run identifier
        time_step (int, optional): Time step for training. Defaults to 900.

    Returns:
        List[str]: List of generated model file paths
    """

    # If a single string is given, it is assumed to be an id to an existing model folder. In this case, the existing models are copied to a new folder with the given new run_id.
    if isinstance(models, str):
        return use_existing_models(models, run_id, model_save_path_rel)

    model_save_path =  os.path.abspath(model_save_path_rel)
    model_names = list()
    # If a list of strings is given, each string is assumed to be a physXAI script filename (with or without .py ending) to be executed for model training. The output model names will be determined by the physXAI scripts.
    if isinstance(models, list):
        for model in models:
            if not model.endswith('.py'):
                model += '.py'
            # Import and execute the physXAI training script
            spec = importlib.util.spec_from_file_location("train_model", os.path.join(physXAI_scripts_path, model))
            module = importlib.util.module_from_spec(spec)
            spec.loader.exec_module(module)
            # Train the model
            name = module.train_model(base_path=model_save_path, folder_name=run_id, training_data_path=os.path.abspath(training_data_path), time_step=time_step)
            model_names.append(name)

    # If a dict is given, each key is the desired output model name, and each value is the physXAI script filename (with or without .py ending) to be executed for model training.
    else:
        for model_name, model_path in models.items():
            if not model_path.endswith('.py'):
                model_path += '.py'
            # Import and execute the physXAI training script
            spec = importlib.util.spec_from_file_location("train_model", os.path.join(physXAI_scripts_path, model_path))
            module = importlib.util.module_from_spec(spec)
            spec.loader.exec_module(module)
            # Train the model
            module.train_model(base_path=model_save_path, folder_name=run_id, training_data_path=os.path.abspath(training_data_path),
                               time_step=time_step, output_name=model_name)
            model_names.append(model_name)

    # Convert physXAI config files to agentlib_mpc json format and clean up intermediate files
    files = list()
    for name in model_names:
        # Load physXAI config files
        pathes = {
            "preprocessing": os.path.join(model_save_path, run_id, f"{name}_preprocessing.json"),
            "constructed": os.path.join(model_save_path, run_id, f"{name}_constructed.json"),
            "model": os.path.join(model_save_path, run_id, f"{name}_model.json"),
            "training_data": os.path.join(model_save_path, run_id, f"{name}_training_data.json"),
            "training_data_pkl": os.path.join(model_save_path, run_id, f"{name}_training_data.pkl"),
        }
        with open(pathes["preprocessing"], "r") as f:
            preprocessing = json.load(f)
        if os.path.exists(pathes["model"]):
            with open(pathes["model"], "r") as f:
                model = json.load(f)
        else:
            model = None
        if os.path.exists(pathes["training_data"]):
            with open(pathes["training_data"], "r") as f:
                training_data = json.load(f)
        else:
            training_data = None
        for path in pathes.values():
            if os.path.exists(path):
                os.remove(path)

        # Convert physXAI config files to agentlib_mpc json format
        model_config = physXAI_2_agentlib_json(run_id, preprocessing, model, training_data, model_name=name)
        os.makedirs(os.path.join(model_save_path, run_id), exist_ok=True)
        file = os.path.join(model_save_path, run_id, f"{name}.json")
        with open(file, 'w') as f:
            json.dump(model_config, f)
        files.append(str(file))

    return files