Source code for ebcpy.utils.conversion

"""
Module with functions to convert
certain format into other formats.
"""
import pathlib
import scipy.io as spio
import numpy as np
import pandas as pd

from ebcpy.data_types import index_is_numeric, datetime_indexes


[docs]def convert_tsd_to_modelica_mat(tsd, save_path_file, **kwargs): """ Function to convert a tsd to a mat-file readable within Dymola. :param TimeSeriesData tsd: TimeSeriesData object :param str,os.path.normpath save_path_file: File path and name where to store the output .mat file. :keyword list columns: A list with names of columns that should be saved to .mat file. If no list is provided, all columns are converted. :keyword float offset: Offset for time in seconds, default 0 :returns mat_file: Returns the version 4 mat-file :return: str,os.path.normpath: Path where the data is saved. Equal to save_path_file Examples: >>> import os >>> from ebcpy import TimeSeriesData >>> project_dir = os.path.dirname(os.path.dirname(__file__)) >>> example_file = os.path.normpath(project_dir + "//tests//data//example_data.csv") >>> save_path = os.path.normpath(project_dir + "//tests//data//example_data_converted.mat") >>> cols = ["sine.freqHz / Hz"] >>> tsd = TimeSeriesData(example_file, sep=";") >>> filepath = convert_tsd_to_modelica_mat(tsd, >>> save_path, columns=cols) >>> os.remove(filepath) """ if isinstance(save_path_file, pathlib.Path): save_path_file = str(save_path_file) if not save_path_file.endswith(".mat"): raise ValueError("Given savepath for txt-file is not a .mat file!") # Load the relevant part of the df df_sub, _ = _convert_to_subset( df=tsd, columns=kwargs.get("columns", None), offset=kwargs.get("offset", 0) ) # Convert np.array into a list and create a dict with 'table' as matrix name new_mat = {'table': df_sub.values.tolist()} # Save matrix as a MATLAB *.mat file, which is readable by Modelica. spio.savemat(save_path_file, new_mat, format="4") # Provide user feedback whether the conversion was successful. return save_path_file
[docs]def convert_tsd_to_clustering_txt(tsd, save_path_file, columns=None): """ Function to convert a TimeSeriesData object to a txt-file readable within the TICC-module. :param TimeSeriesData tsd: TimeSeriesData object :param str,os.path.normpath save_path_file: File path and name where to store the output .mat file. :param list columns: A list with names of columns that should be saved to .mat file. If no list is provided, all columns are converted. :returns True on Success, savepath of txt-file: Returns the version 4 mat-file :return: str,os.path.normpath: Path where the data is saved. Equal to save_path_file Examples: >>> import os >>> project_dir = os.path.dirname(os.path.dirname(__file__)) >>> example_file = os.path.normpath(project_dir + "//tests//data//example_data.csv") >>> save_path = os.path.normpath(project_dir + "//tests//data//example_data_converted.txt") >>> cols = ["sine.freqHz / Hz"] >>> tsd = TimeSeriesData(example_file, sep=";") >>> filepath = convert_tsd_to_clustering_txt(tsd, >>> save_path, columns=cols) >>> os.remove(filepath) """ # Get the subset of the dataFrame df_sub, _ = _convert_to_subset(df=tsd, columns=columns, offset=0) # Convert np.array into a list and create a list as matrix name df_sub.values.tolist() # Save matrix as a *.txt file, which is readable by TICC. np.savetxt(save_path_file, df_sub, delimiter=',', fmt='%.4f') # Provide user feedback whether the conversion was successful. return save_path_file
[docs]def convert_tsd_to_modelica_txt(tsd, table_name, save_path_file, **kwargs): """ Convert a TimeSeriesData object to modelica readable text. This is especially useful for generating input data for a modelica simulation. :param TimeSeriesData tsd: TimeSeriesData object :param str table_name: Name of the table for modelica. Needed in Modelica to correctly load the file. :param str,os.path.normpath save_path_file: File path and name where to store the output .txt file. :keyword list columns: A list with names of columns that should be saved to .mat file. If no list is provided, all columns are converted. :keyword float offset: Offset for time in seconds, default 0 :keyword str sep: Separator used to separate values between columns :keyword Boolean with_tag: Use True each variable and tag is written to the file If False, only the variable name is written to the file. :return: str,os.path.normpath: Path where the data is saved. Equal to save_path_file Examples: >>> import os >>> from ebcpy import TimeSeriesData >>> project_dir = os.path.dirname(os.path.dirname(__file__)) >>> example_file = os.path.normpath(project_dir + "//tests//data//example_data.csv") >>> save_path = os.path.normpath(project_dir + "//tests//data//example_data_converted.txt") >>> cols = ["sine.freqHz / Hz"] >>> tsd = TimeSeriesData(example_file, sep=";") >>> filepath = convert_tsd_to_modelica_txt(tsd, "dummy_input_data", save_path, columns=cols) >>> os.remove(filepath) """ if isinstance(save_path_file, pathlib.Path): save_path_file = str(save_path_file) if not save_path_file.endswith(".txt"): raise ValueError("Given savepath for txt-file is not a .txt file!") # Load the relavant part of the df df_sub, headers = _convert_to_subset( df=tsd, columns=kwargs.get("columns", None), offset=kwargs.get("offset", 0) ) # Unpack kwargs sep = kwargs.get("sep", "\t") n_cols = len(headers) n_rows = len(df_sub.index) # Comment header line _temp_str = "" if kwargs.get("with_tag", True): # Convert ("variable", "tag") to "variable_tag" _temp_str = sep.join(["_".join(variable_tag) for variable_tag in headers]) else: for idx, var in enumerate(headers): if idx == 0: # Convert time with tag to one string as unit is important _temp_str += "_".join(var) else: # Convert ("variable", "tag") to "variable" _temp_str += sep + var[0] content_as_lines = [f"#{_temp_str}\n"] content_as_lines.insert(0, f"double {table_name}({n_rows}, {n_cols})\n") content_as_lines.insert(0, "#1\n") # Print Modelica table no # Open file and write the header with open(file=save_path_file, mode="a+", encoding="utf-8") as file: file.seek(0) file.truncate() # Delete possible old content file.writelines(content_as_lines) # Append the data directly using to_csv from pandas df_sub.to_csv(save_path_file, header=None, index=None, sep=sep, mode="a") return save_path_file
def _convert_to_subset(df, columns, offset): """ Private function to ensure lean conversion to either mat or txt. """ df = df.copy() if columns: if isinstance(columns, str): columns = [columns] # Must be a list headers = df[columns].columns.values.tolist() else: headers = df.columns.values.tolist() _time_header = ('time', 'in_s') headers.insert(0, _time_header) # Ensure time will be at first place if isinstance(df.index, tuple(datetime_indexes)): df.index = df.index - df.iloc[0].name.to_datetime64() # Make index zero based df[_time_header] = df.index.total_seconds() + offset elif index_is_numeric(df.index): df[_time_header] = df.index - df.iloc[0].name + offset else: # Should not happen as error is raised in data_types. But just to be sure: raise IndexError(f"Given index of type {type(df.index)} is not supported.") # Avoid 1e-8 errors in timedelta calculation. df[_time_header] = df[_time_header].round(4) # Check if nan values occur if df.loc[:, headers].isnull().values.sum() > 0: raise ValueError("Selected columns contain NaN values. This would lead to errors" "in the simulation environment.") # Convert cases with no tag to tuple def _to_tuple(s): if isinstance(s, tuple): return s return (s, ) headers_as_tuple = [_to_tuple(header) for header in headers] return df.loc[:, headers], headers_as_tuple