Source code for ebcpy.utils.conversion

"""
Module with functions to convert
certain format into other formats.
"""
from pathlib import Path
from typing import Union

import scipy.io as spio
import numpy as np
import pandas as pd

from ebcpy.data_types import index_is_numeric, datetime_indexes, TimeSeriesData


[docs]def convert_tsd_to_modelica_mat( tsd: Union[pd.DataFrame, TimeSeriesData], save_path_file: Union[str, Path], **kwargs): """ Function to convert a tsd to a mat-file readable within Dymola. :param pd.DataFrame,TimeSeriesData tsd: Dataframe or TimeSeriesData object with data to convert :param str,os.path.normpath save_path_file: File path and name where to store the output .mat file. :keyword list columns: A list with names of columns that should be saved to .mat file. If no list is provided, all columns are converted. :keyword float offset: Offset for time in seconds, default 0 :returns mat_file: Returns the version 4 mat-file :return: str,os.path.normpath: Path where the data is saved. Equal to save_path_file Examples: >>> import os >>> from ebcpy import load_time_series_data >>> project_dir = os.path.dirname(os.path.dirname(__file__)) >>> example_file = os.path.normpath(project_dir + "//tests//data//example_data.csv") >>> save_path = os.path.normpath(project_dir + "//tests//data//example_data_converted.mat") >>> cols = ["sine.freqHz / Hz"] >>> tsd = load_time_series_data(example_file, sep=";") >>> filepath = convert_tsd_to_modelica_mat(tsd, >>> save_path, columns=cols) >>> os.remove(filepath) """ if not isinstance(save_path_file, Path): save_path_file = Path(save_path_file) if not save_path_file.suffix == ".mat": raise ValueError("Given savepath for txt-file is not a .mat file!") # Load the relevant part of the df df_sub, _ = _convert_to_subset( df=tsd, columns=kwargs.get("columns", None), offset=kwargs.get("offset", 0) ) # Convert np.array into a list and create a dict with 'table' as matrix name new_mat = {'table': df_sub.values.tolist()} # Save matrix as a MATLAB *.mat file, which is readable by Modelica. spio.savemat(save_path_file, new_mat, format="4") # Provide user feedback whether the conversion was successful. return save_path_file
[docs]def convert_tsd_to_clustering_txt( tsd: Union[pd.DataFrame, TimeSeriesData], save_path_file: Union[str, Path], columns: list = None): """ Function to convert a TimeSeriesData object to a txt-file readable within the TICC-module. :param pd.DataFrame,TimeSeriesData tsd: Dataframe or TimeSeriesData object with data to convert :param str,os.path.normpath save_path_file: File path and name where to store the output .mat file. :param list columns: A list with names of columns that should be saved to .mat file. If no list is provided, all columns are converted. :returns True on Success, savepath of txt-file: Returns the version 4 mat-file :return: str,os.path.normpath: Path where the data is saved. Equal to save_path_file Examples: >>> import os >>> from ebcpy import load_time_series_data >>> project_dir = os.path.dirname(os.path.dirname(__file__)) >>> example_file = os.path.normpath(project_dir + "//tests//data//example_data.csv") >>> save_path = os.path.normpath(project_dir + "//tests//data//example_data_converted.txt") >>> cols = ["sine.freqHz / Hz"] >>> tsd = load_time_series_data(example_file, sep=";") >>> filepath = convert_tsd_to_clustering_txt(tsd, >>> save_path, columns=cols) >>> os.remove(filepath) """ if not isinstance(save_path_file, Path): save_path_file = Path(save_path_file) if not save_path_file.suffix == ".txt": raise ValueError("Given savepath for txt-file is not a .txt file!") # Get the subset of the dataFrame df_sub, _ = _convert_to_subset(df=tsd, columns=columns, offset=0) # Convert np.array into a list and create a list as matrix name df_sub.values.tolist() # Save matrix as a *.txt file, which is readable by TICC. np.savetxt(save_path_file, df_sub, delimiter=',', fmt='%.4f') # Provide user feedback whether the conversion was successful. return save_path_file
[docs]def convert_tsd_to_modelica_txt( tsd: Union[pd.DataFrame, TimeSeriesData], table_name: str, save_path_file: Union[str, Path], **kwargs ): """ Convert a TimeSeriesData object to modelica readable text. This is especially useful for generating input data for a modelica simulation. :param pd.DataFrame,TimeSeriesData tsd: Dataframe or TimeSeriesData object with data to convert :param str table_name: Name of the table for modelica. Needed in Modelica to correctly load the file. :param str,os.path.normpath save_path_file: File path and name where to store the output .txt file. :keyword list columns: A list with names of columns that should be saved to .mat file. If no list is provided, all columns are converted. :keyword float offset: Offset for time in seconds, default 0 :keyword str sep: Separator used to separate values between columns :keyword Boolean with_tag: Use True each variable and tag is written to the file If False, only the variable name is written to the file. :return: str,os.path.normpath: Path where the data is saved. Equal to save_path_file Examples: >>> import os >>> from ebcpy import load_time_series_data >>> project_dir = os.path.dirname(os.path.dirname(__file__)) >>> example_file = os.path.normpath(project_dir + "//tests//data//example_data.csv") >>> save_path = os.path.normpath(project_dir + "//tests//data//example_data_converted.txt") >>> cols = ["sine.freqHz / Hz"] >>> tsd = load_time_series_data(example_file, sep=";") >>> filepath = convert_tsd_to_modelica_txt(tsd, "dummy_input_data", save_path, columns=cols) >>> os.remove(filepath) """ if not isinstance(save_path_file, Path): save_path_file = Path(save_path_file) if not save_path_file.suffix == ".txt": raise ValueError("Given savepath for txt-file is not a .txt file!") # Load the relavant part of the df df_sub, header_names = _convert_to_subset( df=tsd, columns=kwargs.get("columns", None), offset=kwargs.get("offset", 0) ) # Unpack kwargs sep = kwargs.get("sep", "\t") n_cols = len(header_names) n_rows = len(df_sub.index) # Comment header line content_as_lines = [f"#{sep.join(header_names)}\n"] content_as_lines.insert(0, f"double {table_name}({n_rows}, {n_cols})\n") content_as_lines.insert(0, "#1\n") # Print Modelica table no # Open file and write the header with open(file=save_path_file, mode="a+", encoding="utf-8") as file: file.seek(0) file.truncate() # Delete possible old content file.writelines(content_as_lines) # Append the data directly using to_csv from pandas df_sub.to_csv(save_path_file, header=None, index=None, sep=sep, mode="a") return save_path_file
def _convert_to_subset( df: Union[pd.DataFrame, TimeSeriesData], columns: list, offset: float, with_tag: bool = False ) -> (pd.DataFrame, list): """ Private function to ensure lean conversion to either mat or txt. :param pd.DataFrame,TimeSeriesData tsd: Dataframe or TimeSeriesData object with data to convert :param list columns: A list with names of columns that should be saved to .mat file. If no list is provided, all columns are converted. :param float offset: Offset for time in seconds, default 0 :param Boolean with_tag: Use True each variable and tag is written to the file If False, only the variable name is written to the file. """ df = df.copy() if columns: if isinstance(columns, str): columns = [columns] # Must be a list headers = df[columns].columns.values.tolist() else: headers = df.columns.values.tolist() if isinstance(df, TimeSeriesData) and isinstance(df.columns, pd.MultiIndex): _time_header = ('time', 'in_s') if with_tag: header_names = [ variable_tag if not isinstance(variable_tag, tuple) else "_".join(variable_tag) for variable_tag in headers ] else: header_names = [ variable_tag if not isinstance(variable_tag, tuple) else variable_tag[0] for variable_tag in headers ] else: _time_header = 'time_in_s' header_names = headers.copy() header_names.insert(0, _time_header) # Ensure time will be at first place headers.insert(0, _time_header) if isinstance(df.index, tuple(datetime_indexes)): df.index = df.index - df.iloc[0].name.to_datetime64() # Make index zero based df[_time_header] = df.index.total_seconds() + offset elif index_is_numeric(df.index): df[_time_header] = df.index - df.iloc[0].name + offset else: # Should not happen as error is raised in data_types. But just to be sure: raise IndexError(f"Given index of type {type(df.index)} is not supported.") # Avoid 1e-8 errors in timedelta calculation. df[_time_header] = df[_time_header].round(4) # Check if nan values occur if df.loc[:, headers].isnull().values.sum() > 0: raise ValueError("Selected columns contain NaN values. This would lead to errors" "in the simulation environment.") return df.loc[:, headers], header_names