Coverage for ebcpy/utils/conversion.py: 91%

68 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2025-08-26 09:12 +0000

1""" 

2Module with functions to convert 

3certain format into other formats. 

4""" 

5from pathlib import Path 

6from typing import Union 

7 

8import scipy.io as spio 

9import numpy as np 

10import pandas as pd 

11 

12from ebcpy.data_types import index_is_numeric, datetime_indexes, TimeSeriesData 

13 

14 

15def convert_tsd_to_modelica_mat( 

16 tsd: Union[pd.DataFrame, TimeSeriesData], save_path_file: Union[str, Path], **kwargs): 

17 """ 

18 Function to convert a tsd to a mat-file readable within Dymola. 

19 

20 :param pd.DataFrame,TimeSeriesData tsd: 

21 Dataframe or TimeSeriesData object with data to convert 

22 :param str,os.path.normpath save_path_file: 

23 File path and name where to store the output .mat file. 

24 :keyword list columns: 

25 A list with names of columns that should be saved to .mat file. 

26 If no list is provided, all columns are converted. 

27 :keyword float offset: 

28 Offset for time in seconds, default 0 

29 :returns mat_file: 

30 Returns the version 4 mat-file 

31 

32 :return: 

33 str,os.path.normpath: 

34 Path where the data is saved. 

35 Equal to save_path_file 

36 

37 Examples: 

38 

39 >>> import os 

40 >>> from ebcpy import load_time_series_data 

41 >>> project_dir = os.path.dirname(os.path.dirname(__file__)) 

42 >>> example_file = os.path.normpath(project_dir + "//tests//data//example_data.csv") 

43 >>> save_path = os.path.normpath(project_dir + "//tests//data//example_data_converted.mat") 

44 >>> cols = ["sine.freqHz / Hz"] 

45 >>> tsd = load_time_series_data(example_file, sep=";") 

46 >>> filepath = convert_tsd_to_modelica_mat(tsd, 

47 >>> save_path, columns=cols) 

48 >>> os.remove(filepath) 

49 """ 

50 if not isinstance(save_path_file, Path): 

51 save_path_file = Path(save_path_file) 

52 

53 if not save_path_file.suffix == ".mat": 

54 raise ValueError("Given savepath for txt-file is not a .mat file!") 

55 

56 # Load the relevant part of the df 

57 df_sub, _ = _convert_to_subset( 

58 df=tsd, 

59 columns=kwargs.get("columns", None), 

60 offset=kwargs.get("offset", 0) 

61 ) 

62 

63 # Convert np.array into a list and create a dict with 'table' as matrix name 

64 new_mat = {'table': df_sub.values.tolist()} 

65 # Save matrix as a MATLAB *.mat file, which is readable by Modelica. 

66 spio.savemat(save_path_file, new_mat, format="4") 

67 # Provide user feedback whether the conversion was successful. 

68 return save_path_file 

69 

70 

71def convert_tsd_to_clustering_txt( 

72 tsd: Union[pd.DataFrame, TimeSeriesData], 

73 save_path_file: Union[str, Path], 

74 columns: list = None): 

75 """ 

76 Function to convert a TimeSeriesData object 

77 to a txt-file readable within the TICC-module. 

78 

79 :param pd.DataFrame,TimeSeriesData tsd: 

80 Dataframe or TimeSeriesData object with data to convert 

81 :param str,os.path.normpath save_path_file: 

82 File path and name where to store the output .mat file. 

83 :param list columns: 

84 A list with names of columns that should be saved to .mat file. 

85 If no list is provided, all columns are converted. 

86 :returns True on Success, savepath of txt-file: 

87 Returns the version 4 mat-file 

88 

89 :return: 

90 str,os.path.normpath: 

91 Path where the data is saved. 

92 Equal to save_path_file 

93 

94 Examples: 

95 

96 >>> import os 

97 >>> from ebcpy import load_time_series_data 

98 >>> project_dir = os.path.dirname(os.path.dirname(__file__)) 

99 >>> example_file = os.path.normpath(project_dir + "//tests//data//example_data.csv") 

100 >>> save_path = os.path.normpath(project_dir + "//tests//data//example_data_converted.txt") 

101 >>> cols = ["sine.freqHz / Hz"] 

102 >>> tsd = load_time_series_data(example_file, sep=";") 

103 >>> filepath = convert_tsd_to_clustering_txt(tsd, 

104 >>> save_path, columns=cols) 

105 >>> os.remove(filepath) 

106 """ 

107 if not isinstance(save_path_file, Path): 

108 save_path_file = Path(save_path_file) 

109 if not save_path_file.suffix == ".txt": 

110 raise ValueError("Given savepath for txt-file is not a .txt file!") 

111 # Get the subset of the dataFrame 

112 df_sub, _ = _convert_to_subset(df=tsd, columns=columns, offset=0) 

113 

114 # Convert np.array into a list and create a list as matrix name 

115 df_sub.values.tolist() 

116 # Save matrix as a *.txt file, which is readable by TICC. 

117 np.savetxt(save_path_file, df_sub, delimiter=',', fmt='%.4f') 

118 # Provide user feedback whether the conversion was successful. 

119 return save_path_file 

120 

121 

122def convert_tsd_to_modelica_txt( 

123 tsd: Union[pd.DataFrame, TimeSeriesData], 

124 table_name: str, 

125 save_path_file: Union[str, Path], 

126 **kwargs 

127): 

128 """ 

129 Convert a TimeSeriesData object to modelica readable text. This is especially useful 

130 for generating input data for a modelica simulation. 

131 

132 :param pd.DataFrame,TimeSeriesData tsd: 

133 Dataframe or TimeSeriesData object with data to convert 

134 :param str table_name: 

135 Name of the table for modelica. 

136 Needed in Modelica to correctly load the file. 

137 :param str,os.path.normpath save_path_file: 

138 File path and name where to store the output .txt file. 

139 :keyword list columns: 

140 A list with names of columns that should be saved to .mat file. 

141 If no list is provided, all columns are converted. 

142 :keyword float offset: 

143 Offset for time in seconds, default 0 

144 :keyword str sep: 

145 Separator used to separate values between columns 

146 :keyword Boolean with_tag: 

147 Use True each variable and tag is written to the file 

148 If False, only the variable name is written to the file. 

149 

150 :return: 

151 str,os.path.normpath: 

152 Path where the data is saved. 

153 Equal to save_path_file 

154 

155 Examples: 

156 

157 >>> import os 

158 >>> from ebcpy import load_time_series_data 

159 >>> project_dir = os.path.dirname(os.path.dirname(__file__)) 

160 >>> example_file = os.path.normpath(project_dir + "//tests//data//example_data.csv") 

161 >>> save_path = os.path.normpath(project_dir + "//tests//data//example_data_converted.txt") 

162 >>> cols = ["sine.freqHz / Hz"] 

163 >>> tsd = load_time_series_data(example_file, sep=";") 

164 >>> filepath = convert_tsd_to_modelica_txt(tsd, "dummy_input_data", save_path, columns=cols) 

165 >>> os.remove(filepath) 

166 """ 

167 if not isinstance(save_path_file, Path): 

168 save_path_file = Path(save_path_file) 

169 if not save_path_file.suffix == ".txt": 

170 raise ValueError("Given savepath for txt-file is not a .txt file!") 

171 

172 # Load the relavant part of the df 

173 df_sub, header_names = _convert_to_subset( 

174 df=tsd, 

175 columns=kwargs.get("columns", None), 

176 offset=kwargs.get("offset", 0) 

177 ) 

178 

179 # Unpack kwargs 

180 sep = kwargs.get("sep", "\t") 

181 

182 n_cols = len(header_names) 

183 n_rows = len(df_sub.index) 

184 # Comment header line 

185 content_as_lines = [f"#{sep.join(header_names)}\n"] 

186 content_as_lines.insert(0, f"double {table_name}({n_rows}, {n_cols})\n") 

187 content_as_lines.insert(0, "#1\n") # Print Modelica table no 

188 

189 # Open file and write the header 

190 with open(file=save_path_file, mode="a+", encoding="utf-8") as file: 

191 file.seek(0) 

192 file.truncate() # Delete possible old content 

193 file.writelines(content_as_lines) 

194 

195 # Append the data directly using to_csv from pandas 

196 df_sub.to_csv(save_path_file, header=None, index=None, sep=sep, mode="a") 

197 

198 return save_path_file 

199 

200 

201def _convert_to_subset( 

202 df: Union[pd.DataFrame, TimeSeriesData], 

203 columns: list, 

204 offset: float, 

205 with_tag: bool = False 

206) -> (pd.DataFrame, list): 

207 """ 

208 Private function to ensure lean conversion to either mat or txt. 

209 

210 :param pd.DataFrame,TimeSeriesData tsd: 

211 Dataframe or TimeSeriesData object with data to convert 

212 :param list columns: 

213 A list with names of columns that should be saved to .mat file. 

214 If no list is provided, all columns are converted. 

215 :param float offset: 

216 Offset for time in seconds, default 0 

217 :param Boolean with_tag: 

218 Use True each variable and tag is written to the file 

219 If False, only the variable name is written to the file. 

220 """ 

221 df = df.copy() 

222 

223 if columns: 

224 if isinstance(columns, str): 

225 columns = [columns] # Must be a list 

226 headers = df[columns].columns.values.tolist() 

227 else: 

228 headers = df.columns.values.tolist() 

229 

230 if isinstance(df, TimeSeriesData) and isinstance(df.columns, pd.MultiIndex): 

231 _time_header = ('time', 'in_s') 

232 if with_tag: 

233 header_names = [ 

234 variable_tag if not isinstance(variable_tag, tuple) else "_".join(variable_tag) 

235 for variable_tag in headers 

236 ] 

237 else: 

238 header_names = [ 

239 variable_tag if not isinstance(variable_tag, tuple) else variable_tag[0] 

240 for variable_tag in headers 

241 ] 

242 

243 else: 

244 _time_header = 'time_in_s' 

245 header_names = headers.copy() 

246 

247 header_names.insert(0, _time_header) # Ensure time will be at first place 

248 headers.insert(0, _time_header) 

249 

250 if isinstance(df.index, tuple(datetime_indexes)): 

251 df.index = df.index - df.iloc[0].name.to_datetime64() # Make index zero based 

252 df[_time_header] = df.index.total_seconds() + offset 

253 elif index_is_numeric(df.index): 

254 df[_time_header] = df.index - df.iloc[0].name + offset 

255 else: 

256 # Should not happen as error is raised in data_types. But just to be sure: 

257 raise IndexError(f"Given index of type {type(df.index)} is not supported.") 

258 # Avoid 1e-8 errors in timedelta calculation. 

259 df[_time_header] = df[_time_header].round(4) 

260 

261 # Check if nan values occur 

262 if df.loc[:, headers].isnull().values.sum() > 0: 

263 raise ValueError("Selected columns contain NaN values. This would lead to errors" 

264 "in the simulation environment.") 

265 

266 return df.loc[:, headers], header_names