Coverage for ebcpy/utils/conversion.py: 91%
68 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-08-26 09:12 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-08-26 09:12 +0000
1"""
2Module with functions to convert
3certain format into other formats.
4"""
5from pathlib import Path
6from typing import Union
8import scipy.io as spio
9import numpy as np
10import pandas as pd
12from ebcpy.data_types import index_is_numeric, datetime_indexes, TimeSeriesData
15def convert_tsd_to_modelica_mat(
16 tsd: Union[pd.DataFrame, TimeSeriesData], save_path_file: Union[str, Path], **kwargs):
17 """
18 Function to convert a tsd to a mat-file readable within Dymola.
20 :param pd.DataFrame,TimeSeriesData tsd:
21 Dataframe or TimeSeriesData object with data to convert
22 :param str,os.path.normpath save_path_file:
23 File path and name where to store the output .mat file.
24 :keyword list columns:
25 A list with names of columns that should be saved to .mat file.
26 If no list is provided, all columns are converted.
27 :keyword float offset:
28 Offset for time in seconds, default 0
29 :returns mat_file:
30 Returns the version 4 mat-file
32 :return:
33 str,os.path.normpath:
34 Path where the data is saved.
35 Equal to save_path_file
37 Examples:
39 >>> import os
40 >>> from ebcpy import load_time_series_data
41 >>> project_dir = os.path.dirname(os.path.dirname(__file__))
42 >>> example_file = os.path.normpath(project_dir + "//tests//data//example_data.csv")
43 >>> save_path = os.path.normpath(project_dir + "//tests//data//example_data_converted.mat")
44 >>> cols = ["sine.freqHz / Hz"]
45 >>> tsd = load_time_series_data(example_file, sep=";")
46 >>> filepath = convert_tsd_to_modelica_mat(tsd,
47 >>> save_path, columns=cols)
48 >>> os.remove(filepath)
49 """
50 if not isinstance(save_path_file, Path):
51 save_path_file = Path(save_path_file)
53 if not save_path_file.suffix == ".mat":
54 raise ValueError("Given savepath for txt-file is not a .mat file!")
56 # Load the relevant part of the df
57 df_sub, _ = _convert_to_subset(
58 df=tsd,
59 columns=kwargs.get("columns", None),
60 offset=kwargs.get("offset", 0)
61 )
63 # Convert np.array into a list and create a dict with 'table' as matrix name
64 new_mat = {'table': df_sub.values.tolist()}
65 # Save matrix as a MATLAB *.mat file, which is readable by Modelica.
66 spio.savemat(save_path_file, new_mat, format="4")
67 # Provide user feedback whether the conversion was successful.
68 return save_path_file
71def convert_tsd_to_clustering_txt(
72 tsd: Union[pd.DataFrame, TimeSeriesData],
73 save_path_file: Union[str, Path],
74 columns: list = None):
75 """
76 Function to convert a TimeSeriesData object
77 to a txt-file readable within the TICC-module.
79 :param pd.DataFrame,TimeSeriesData tsd:
80 Dataframe or TimeSeriesData object with data to convert
81 :param str,os.path.normpath save_path_file:
82 File path and name where to store the output .mat file.
83 :param list columns:
84 A list with names of columns that should be saved to .mat file.
85 If no list is provided, all columns are converted.
86 :returns True on Success, savepath of txt-file:
87 Returns the version 4 mat-file
89 :return:
90 str,os.path.normpath:
91 Path where the data is saved.
92 Equal to save_path_file
94 Examples:
96 >>> import os
97 >>> from ebcpy import load_time_series_data
98 >>> project_dir = os.path.dirname(os.path.dirname(__file__))
99 >>> example_file = os.path.normpath(project_dir + "//tests//data//example_data.csv")
100 >>> save_path = os.path.normpath(project_dir + "//tests//data//example_data_converted.txt")
101 >>> cols = ["sine.freqHz / Hz"]
102 >>> tsd = load_time_series_data(example_file, sep=";")
103 >>> filepath = convert_tsd_to_clustering_txt(tsd,
104 >>> save_path, columns=cols)
105 >>> os.remove(filepath)
106 """
107 if not isinstance(save_path_file, Path):
108 save_path_file = Path(save_path_file)
109 if not save_path_file.suffix == ".txt":
110 raise ValueError("Given savepath for txt-file is not a .txt file!")
111 # Get the subset of the dataFrame
112 df_sub, _ = _convert_to_subset(df=tsd, columns=columns, offset=0)
114 # Convert np.array into a list and create a list as matrix name
115 df_sub.values.tolist()
116 # Save matrix as a *.txt file, which is readable by TICC.
117 np.savetxt(save_path_file, df_sub, delimiter=',', fmt='%.4f')
118 # Provide user feedback whether the conversion was successful.
119 return save_path_file
122def convert_tsd_to_modelica_txt(
123 tsd: Union[pd.DataFrame, TimeSeriesData],
124 table_name: str,
125 save_path_file: Union[str, Path],
126 **kwargs
127):
128 """
129 Convert a TimeSeriesData object to modelica readable text. This is especially useful
130 for generating input data for a modelica simulation.
132 :param pd.DataFrame,TimeSeriesData tsd:
133 Dataframe or TimeSeriesData object with data to convert
134 :param str table_name:
135 Name of the table for modelica.
136 Needed in Modelica to correctly load the file.
137 :param str,os.path.normpath save_path_file:
138 File path and name where to store the output .txt file.
139 :keyword list columns:
140 A list with names of columns that should be saved to .mat file.
141 If no list is provided, all columns are converted.
142 :keyword float offset:
143 Offset for time in seconds, default 0
144 :keyword str sep:
145 Separator used to separate values between columns
146 :keyword Boolean with_tag:
147 Use True each variable and tag is written to the file
148 If False, only the variable name is written to the file.
150 :return:
151 str,os.path.normpath:
152 Path where the data is saved.
153 Equal to save_path_file
155 Examples:
157 >>> import os
158 >>> from ebcpy import load_time_series_data
159 >>> project_dir = os.path.dirname(os.path.dirname(__file__))
160 >>> example_file = os.path.normpath(project_dir + "//tests//data//example_data.csv")
161 >>> save_path = os.path.normpath(project_dir + "//tests//data//example_data_converted.txt")
162 >>> cols = ["sine.freqHz / Hz"]
163 >>> tsd = load_time_series_data(example_file, sep=";")
164 >>> filepath = convert_tsd_to_modelica_txt(tsd, "dummy_input_data", save_path, columns=cols)
165 >>> os.remove(filepath)
166 """
167 if not isinstance(save_path_file, Path):
168 save_path_file = Path(save_path_file)
169 if not save_path_file.suffix == ".txt":
170 raise ValueError("Given savepath for txt-file is not a .txt file!")
172 # Load the relavant part of the df
173 df_sub, header_names = _convert_to_subset(
174 df=tsd,
175 columns=kwargs.get("columns", None),
176 offset=kwargs.get("offset", 0)
177 )
179 # Unpack kwargs
180 sep = kwargs.get("sep", "\t")
182 n_cols = len(header_names)
183 n_rows = len(df_sub.index)
184 # Comment header line
185 content_as_lines = [f"#{sep.join(header_names)}\n"]
186 content_as_lines.insert(0, f"double {table_name}({n_rows}, {n_cols})\n")
187 content_as_lines.insert(0, "#1\n") # Print Modelica table no
189 # Open file and write the header
190 with open(file=save_path_file, mode="a+", encoding="utf-8") as file:
191 file.seek(0)
192 file.truncate() # Delete possible old content
193 file.writelines(content_as_lines)
195 # Append the data directly using to_csv from pandas
196 df_sub.to_csv(save_path_file, header=None, index=None, sep=sep, mode="a")
198 return save_path_file
201def _convert_to_subset(
202 df: Union[pd.DataFrame, TimeSeriesData],
203 columns: list,
204 offset: float,
205 with_tag: bool = False
206) -> (pd.DataFrame, list):
207 """
208 Private function to ensure lean conversion to either mat or txt.
210 :param pd.DataFrame,TimeSeriesData tsd:
211 Dataframe or TimeSeriesData object with data to convert
212 :param list columns:
213 A list with names of columns that should be saved to .mat file.
214 If no list is provided, all columns are converted.
215 :param float offset:
216 Offset for time in seconds, default 0
217 :param Boolean with_tag:
218 Use True each variable and tag is written to the file
219 If False, only the variable name is written to the file.
220 """
221 df = df.copy()
223 if columns:
224 if isinstance(columns, str):
225 columns = [columns] # Must be a list
226 headers = df[columns].columns.values.tolist()
227 else:
228 headers = df.columns.values.tolist()
230 if isinstance(df, TimeSeriesData) and isinstance(df.columns, pd.MultiIndex):
231 _time_header = ('time', 'in_s')
232 if with_tag:
233 header_names = [
234 variable_tag if not isinstance(variable_tag, tuple) else "_".join(variable_tag)
235 for variable_tag in headers
236 ]
237 else:
238 header_names = [
239 variable_tag if not isinstance(variable_tag, tuple) else variable_tag[0]
240 for variable_tag in headers
241 ]
243 else:
244 _time_header = 'time_in_s'
245 header_names = headers.copy()
247 header_names.insert(0, _time_header) # Ensure time will be at first place
248 headers.insert(0, _time_header)
250 if isinstance(df.index, tuple(datetime_indexes)):
251 df.index = df.index - df.iloc[0].name.to_datetime64() # Make index zero based
252 df[_time_header] = df.index.total_seconds() + offset
253 elif index_is_numeric(df.index):
254 df[_time_header] = df.index - df.iloc[0].name + offset
255 else:
256 # Should not happen as error is raised in data_types. But just to be sure:
257 raise IndexError(f"Given index of type {type(df.index)} is not supported.")
258 # Avoid 1e-8 errors in timedelta calculation.
259 df[_time_header] = df[_time_header].round(4)
261 # Check if nan values occur
262 if df.loc[:, headers].isnull().values.sum() > 0:
263 raise ValueError("Selected columns contain NaN values. This would lead to errors"
264 "in the simulation environment.")
266 return df.loc[:, headers], header_names