Coverage for aixweather/core_data_format_2_output_file/to_epw_energyplus.py: 97%

194 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2025-12-31 11:58 +0000

1""" 

2convert core data to epw (EnergyPlus) data 

3""" 

4 

5import csv 

6import datetime as dt 

7import logging 

8 

9import pandas as pd 

10import numpy as np 

11 

12from aixweather import definitions 

13from aixweather.imports.utils_import import MetaData 

14from aixweather.transformation_functions import auxiliary, time_observation_transformations, pass_through_handling 

15from aixweather.transformation_to_core_data.EPW import EPWFormat 

16 

17logger = logging.getLogger(__name__) 

18 

19 

20def to_epw( 

21 core_df: pd.DataFrame, 

22 meta: MetaData, 

23 start: dt.datetime, 

24 stop: dt.datetime, 

25 fillna: bool, 

26 result_folder: str = None, 

27 filename: str = None, 

28 export_in_utc: bool = False 

29) -> (pd.DataFrame, str): 

30 """Create an EPW file from the core data. 

31 

32 Args: 

33 core_df (pd.DataFrame): DataFrame containing core data. 

34 meta (MetaData): Metadata associated with the weather data. 

35 start (dt.datetime): Timestamp for the start of the EPW file. 

36 stop (dt.datetime): Timestamp for the end of the EPW file. 

37 fillna (bool): Boolean indicating whether NaN values should be filled. 

38 result_folder (str): 

39 Path to the folder where to save the file. Default will use 

40 the `results_file_path` method. 

41 filename (str): Name of the file to be saved. The default is constructed 

42 based on the meta-data as well as start and stop time 

43 export_in_utc (bool): Timezone to be used for the export. 

44 True (default) to use the core_df timezone, UTC+0, 

45 False (default) to use timezone from metadata 

46 

47 Returns: 

48 pd.DataFrame: DataFrame containing the weather data formatted for EPW export, 

49 excluding metadata. 

50 str: Path to the exported file. 

51 """ 

52 timezone = 0 if export_in_utc else meta.timezone 

53 

54 ### evaluate correctness of format 

55 auxiliary.evaluate_transformations( 

56 core_format=definitions.format_core_data, other_format=EPWFormat.export_format() 

57 ) 

58 

59 df = core_df.copy() 

60 

61 # format data to epw 

62 df_epw_as_list, df_epw = _format_data( 

63 df=df, start=start, stop=stop, timezone=timezone, fillna=fillna 

64 ) 

65 

66 # get final start and stop time (differs from start, stop due to filling to full days) 

67 start_epw = pd.to_datetime(df_epw.iloc[[0]][['Year', 'Month', 'Day', 'Hour']]).iloc[0] 

68 stop_epw = pd.to_datetime(df_epw.iloc[[-1]][['Year', 'Month', 'Day', 'Hour']]).iloc[-1] 

69 # truncate core data for other calculations 

70 df_truncated = time_observation_transformations.truncate_data_from_start_to_stop( 

71 df, start_epw, stop_epw 

72 ) 

73 

74 # keep regular start stop in the filename for the unit tests 

75 if filename is None: 

76 _utc_flag = "_utc" if export_in_utc else "" 

77 filename = ( 

78 f"{meta.station_id}_{start.strftime('%Y%m%d')}_{stop.strftime('%Y%m%d')}" 

79 f"_{meta.station_name}{_utc_flag}.epw" 

80 ) 

81 # get file path to safe data to 

82 file_path = definitions.results_file_path(filename, result_folder) 

83 

84 ### merge all header lines and the data to be saved in a .epw file 

85 with open(file_path, "w", newline="", encoding="latin1") as file: 

86 writer = csv.writer(file) 

87 writer.writerows( 

88 [ 

89 _line1_location(meta=meta, timezone=timezone), 

90 _line2_design_cond(), 

91 _line3_typ_ext_period(df_truncated), 

92 _line4_ground_temp(df_truncated), 

93 _line5_holiday_dl_saving(df_truncated), 

94 _line6_comment_1(), 

95 _line7_comment_2(), 

96 _line8_data_periods(df_truncated), 

97 ] 

98 ) 

99 writer.writerows(df_epw_as_list) 

100 

101 logger.info("EPW file saved to %s.", file_path) 

102 

103 return df, file_path 

104 

105 

106### create header lines 

107def _line1_location( 

108 meta: MetaData, 

109 timezone: int 

110): 

111 """ 

112 Get location metadata (station name, state, country, data_type, 

113 stationID, lat, lon, TZ, alt) 

114 

115 return: 

116 location: List Erstezeile(LOCATION) von epw Daten als List 

117 """ 

118 

119 data_type = "" 

120 

121 location = [ 

122 "LOCATION", 

123 meta.station_name, 

124 "State", 

125 "country", 

126 data_type, 

127 meta.station_id, 

128 str(meta.latitude), 

129 str(meta.longitude), 

130 timezone, 

131 str(meta.altitude), 

132 ] 

133 

134 return location 

135 

136 

137def _line2_design_cond(): 

138 """ 

139 Erstellen zweite Zeile der epw. 

140 

141 return: 

142 design_cond: List Zweite Zeile(Design Condition) von epw Daten als List 

143 """ 

144 design_cond = [ 

145 "DESIGN CONDITIONS", 

146 0, # number of design condition 

147 ] 

148 

149 return design_cond 

150 

151 

152def _line3_typ_ext_period(df): 

153 """ 

154 Parsen von weatherdata um typische und extreme Perioden zu holen. 

155 

156 Typische Perioden sind Wochen mit Temperatur, die der Durchschnittstemperatur der Saison 

157 am nächsten kommt. 

158 Extreme Perioden sind Wochen mit Temperatur, die der Maximum-/Minimumtemperatur der Saison 

159 am nächsten kommt. 

160 

161 return: 

162 typical_extreme_period: List Dritte Zeile(TYPICAL/EXTREME PERIODS) 

163 von epw Daten als List 

164 """ 

165 

166 typical_extreme_period = [ 

167 "TYPICAL/EXTREME PERIODS", 

168 ] 

169 

170 season_dict = { 

171 11: "Autumn", 

172 12: "Winter", 

173 1: "Winter", 

174 2: "Winter", 

175 3: "Spring", 

176 4: "Spring", 

177 5: "Spring", 

178 6: "Summer", 

179 7: "Summer", 

180 8: "Summer", 

181 9: "Autumn", 

182 10: "Autumn", 

183 } # Monaten in Saisons zuweisen 

184 

185 def group_func(input): 

186 """Gruppefunktion für .groupby()""" 

187 return season_dict[input.month] 

188 

189 df_temp_ambient = df["DryBulbTemp"] # Temperature_Ambient von weatherdata holen 

190 number_of_periods = ( 

191 df_temp_ambient.groupby(group_func).mean().shape[0] 

192 ) # Zahl von der Saisons rechnen als Zahl von Perioden 

193 typical_extreme_period.append(number_of_periods) 

194 

195 # Gruppierung per Saison 

196 try: 

197 summer_temp = df_temp_ambient.groupby(group_func).get_group("Summer") 

198 except KeyError: 

199 summer_temp = pd.DataFrame() 

200 try: 

201 spring_temp = df_temp_ambient.groupby(group_func).get_group("Spring") 

202 except KeyError: 

203 spring_temp = pd.DataFrame() 

204 try: 

205 autumn_temp = df_temp_ambient.groupby(group_func).get_group("Autumn") 

206 except KeyError: 

207 autumn_temp = pd.DataFrame() 

208 try: 

209 winter_temp = df_temp_ambient.groupby(group_func).get_group("Winter") 

210 except KeyError: 

211 winter_temp = pd.DataFrame() 

212 

213 if not summer_temp.empty: 

214 typical_extreme_period[1] = ( 

215 typical_extreme_period[1] + 1 

216 ) # Summer und Winter haben extreme Periode. 

217 max_temp_summer = summer_temp.max() 

218 typ_temp_summer = summer_temp.mean() 

219 summer_temp_w = summer_temp.resample( 

220 "W", label="left" 

221 ).mean() # Resample in wochentliche Interval 

222 

223 # Datenpunkt(typisch und extreme) finden 

224 idx_mean_summer = summer_temp_w.sub(typ_temp_summer).abs().idxmin() 

225 idx_max_summer = summer_temp_w.sub(max_temp_summer).abs().idxmin() 

226 week_closest2mean_summer = summer_temp_w.loc[[idx_mean_summer]] # Starttag 

227 week_closest2max_summer = summer_temp_w.loc[[idx_max_summer]] # Starttag 

228 

229 # Endtag berechnen 

230 weekend_max_summer = week_closest2max_summer.index + dt.timedelta(days=6) 

231 weekend_mean_summer = week_closest2mean_summer.index + dt.timedelta(days=6) 

232 

233 # List für die Saison erstellen 

234 summer = [ 

235 "Summer - Week Nearest Max Temperature For Period", 

236 "Extreme", 

237 str(week_closest2max_summer.index.month[0]) 

238 + "/" 

239 + str(week_closest2max_summer.index.day[0]), 

240 str(weekend_max_summer.month[0]) + "/" + str(weekend_max_summer.day[0]), 

241 "Summer - Week Nearest Average Temperature For Period", 

242 "Typical", 

243 str(week_closest2mean_summer.index.month[0]) 

244 + "/" 

245 + str(week_closest2mean_summer.index.day[0]), 

246 str(weekend_mean_summer.month[0]) 

247 + "/" 

248 + str(weekend_mean_summer.day[0]), 

249 ] 

250 

251 typical_extreme_period = ( 

252 typical_extreme_period + summer 

253 ) # Liste zusammensetzen 

254 

255 # für alle Saison wiederholen 

256 if not winter_temp.empty: 

257 typical_extreme_period[1] = typical_extreme_period[1] + 1 

258 min_temp_winter = winter_temp.min() 

259 typ_temp_winter = winter_temp.mean() 

260 winter_temp_w = winter_temp.resample("W", label="left").mean() 

261 idx_mean_winter = winter_temp_w.sub(typ_temp_winter).abs().idxmin() 

262 idx_min_winter = winter_temp_w.sub(min_temp_winter).abs().idxmin() 

263 week_closest2mean_winter = winter_temp_w.loc[[idx_mean_winter]] 

264 week_closest2min_winter = winter_temp_w.loc[[idx_min_winter]] 

265 weekend_min_winter = week_closest2min_winter.index + dt.timedelta(days=6) 

266 weekend_mean_winter = week_closest2mean_winter.index + dt.timedelta(days=6) 

267 winter = [ 

268 "Winter - Week Nearest Min Temperature For Period", 

269 "Extreme", 

270 str(week_closest2min_winter.index.month[0]) 

271 + "/" 

272 + str(week_closest2min_winter.index.day[0]), 

273 str(weekend_min_winter.month[0]) + "/" + str(weekend_min_winter.day[0]), 

274 "Winter - Week Nearest Average Temperature For Period", 

275 "Typical", 

276 str(week_closest2mean_winter.index.month[0]) 

277 + "/" 

278 + str(week_closest2mean_winter.index.day[0]), 

279 str(weekend_mean_winter.month[0]) 

280 + "/" 

281 + str(weekend_mean_winter.day[0]), 

282 ] 

283 

284 typical_extreme_period = typical_extreme_period + winter 

285 

286 if not autumn_temp.empty: 

287 typ_temp_autumn = autumn_temp.mean() 

288 autumn_temp_w = autumn_temp.resample("W", label="left").mean() 

289 idx_mean_autumn = autumn_temp_w.sub(typ_temp_autumn).abs().idxmin() 

290 week_closest2mean_autumn = autumn_temp_w.loc[[idx_mean_autumn]] 

291 weekend_mean_autumn = week_closest2mean_autumn.index + dt.timedelta(days=6) 

292 autumn = [ 

293 "Autumn - Week Nearest Average Temperature For Period", 

294 "Typical", 

295 str(week_closest2mean_autumn.index.month[0]) 

296 + "/" 

297 + str(week_closest2mean_autumn.index.day[0]), 

298 str(weekend_mean_autumn.month[0]) 

299 + "/" 

300 + str(weekend_mean_autumn.day[0]), 

301 ] 

302 

303 typical_extreme_period = typical_extreme_period + autumn 

304 

305 if not spring_temp.empty: 

306 typ_temp_spring = spring_temp.mean() 

307 spring_temp_w = spring_temp.resample("W", label="left").mean() 

308 idx_mean_spring = spring_temp_w.sub(typ_temp_spring).abs().idxmin() 

309 week_closest2mean_spring = spring_temp_w.loc[[idx_mean_spring]] 

310 weekend_mean_spring = week_closest2mean_spring.index + dt.timedelta(days=6) 

311 spring = [ 

312 "Spring - Week Nearest Average Temperature For Period", 

313 "Typical", 

314 str(week_closest2mean_spring.index.month[0]) 

315 + "/" 

316 + str(week_closest2mean_spring.index.day[0]), 

317 str(weekend_mean_spring.month[0]) 

318 + "/" 

319 + str(weekend_mean_spring.day[0]), 

320 ] 

321 

322 typical_extreme_period = typical_extreme_period + spring 

323 

324 return typical_extreme_period 

325 

326 

327def _line4_ground_temp(df): 

328 """ 

329 Parsen von weatherdata, um Bodentemperaturen zu holen. 

330 

331 #Todo: Not checked yet if this is calculation is correct 

332 

333 return: 

334 ground_temp: List Vierte Zeile(GROUND TEMPERATURES) von epw Daten als List 

335 """ 

336 

337 ground_temp = [ 

338 "GROUND TEMPERATURES", 

339 ] 

340 

341 df_4_ground_temp = df.copy() 

342 

343 df_w_ground = ( 

344 df_4_ground_temp.resample("M").mean().round(decimals=1) 

345 ) # Resample in monatliche Interval 

346 try: 

347 ground_t = df_w_ground[ 

348 [ 

349 "Soil_Temperature_5cm", 

350 "Soil_Temperature_10cm", 

351 "Soil_Temperature_20cm", 

352 "Soil_Temperature_50cm", 

353 "Soil_Temperature_1m", 

354 ] 

355 ].to_numpy() # Dataframe2Array 

356 # Array zu Liste umwandeln -> Zusammensetzen 

357 ground_temp = ( 

358 ground_temp 

359 + [5] # ground layers 

360 + [0.05, None, None, None] 

361 + ground_t[:, 0].tolist() 

362 + [0.1, None, None, None] 

363 + ground_t[:, 1].tolist() 

364 + [0.2, None, None, None] 

365 + ground_t[:, 2].tolist() 

366 + [0.5, None, None, None] 

367 + ground_t[:, 3].tolist() 

368 + [1, None, None, None] 

369 + ground_t[:, 4].tolist() 

370 ) 

371 return ground_temp 

372 except KeyError as err: 

373 logger.warn( 

374 "Error while adding the probably unnecessary ground temperature to the .epw file " 

375 "header. A placeholder will be used. Error: %s", err 

376 ) 

377 ground_temp = ground_temp + [0] # 0 ground layers 

378 

379 return ground_temp 

380 

381 

382def _line5_holiday_dl_saving(df): 

383 """ 

384 Erstellen der 5. Zeile der epw. 

385 

386 return: 

387 holiday_dl_saving: List 5.Zeile(HOLIDAYS/DAYLIGHT SAVINGS) von epw Daten als List 

388 """ 

389 

390 if True in df.index.is_leap_year: 

391 isLeap = "Yes" 

392 else: 

393 isLeap = "No" 

394 holiday_dl_saving = [ 

395 "HOLIDAYS/DAYLIGHT SAVINGS", 

396 isLeap, # Leap Year Observed 

397 0, # Daylight Saving Start Date 

398 0, # Daylight Saving End Date 

399 0, # Number of Holidays 

400 ] 

401 return holiday_dl_saving 

402 

403 

404def _line6_comment_1(): 

405 """ 

406 Erstellen der 6. Zeile der epw. 

407 

408 return: 

409 comment_1: List 6.Zeile(COMMENTS 1) von epw Daten als List 

410 """ 

411 return [ 

412 "COMMENTS 1", 

413 "For data format information see the code or check: " 

414 "https://designbuilder.co.uk/cahelp/Content/EnergyPlusWeatherFileFormat.htm", 

415 ] 

416 

417 

418def _line7_comment_2(comment2=None): 

419 """ 

420 Erstellen der 7. Zeile der epw. 

421 

422 return: 

423 comment_2: List 7.Zeile(COMMENTS 2) von epw Daten als List 

424 """ 

425 return ["COMMENTS 2", comment2] 

426 

427 

428def _line8_data_periods(df): 

429 """ 

430 Parsen von weatherdata, um Start- und Enddatenpunkt zu holen 

431 

432 return: 

433 data_periods: List 8.Zeile(DATA PERIODS) von epw Daten als List 

434 """ 

435 start_dp = df.index[0] 

436 end_dp = df.index[-1] 

437 data_periods = [ 

438 "DATA PERIODS", 

439 1, # Anzahl von Datenperioden 

440 1, # Anzahl von Intervale in einer Stunde 

441 "Data", # DP Name oder Beschreibung 

442 start_dp.strftime("%A"), # DP Starttag 

443 start_dp.strftime("%m/%d"), # DP Startdatum 

444 end_dp.strftime("%m/%d"), # DP Enddatum 

445 ] 

446 return data_periods 

447 

448def _format_data(df, start, stop, timezone, fillna): 

449 """ 

450 Parse actual weatherdata, for export 

451 

452 return: 

453 data_list: List Datasätze von epw Daten als List 

454 """ 

455 ### measurement time conversion 

456 df = time_observation_transformations.shift_time_by_dict(EPWFormat.export_format(), df) 

457 

458 ### if possible avoid back and forth interpolating -> pass through 

459 ### variables without shifting 

460 df = pass_through_handling.pass_through_measurements_with_back_and_forth_interpolating( 

461 df, EPWFormat.export_format() 

462 ) 

463 

464 ### select only desired period 

465 df = time_observation_transformations.truncate_data_from_start_to_stop( 

466 df, start, stop 

467 ) 

468 

469 ### Shift to desired timezone 

470 df = df.shift(periods=timezone, freq="h", axis=0) 

471 

472 ### select the desired columns 

473 df = auxiliary.force_data_variable_convention(df, EPWFormat.export_format()) 

474 

475 # fill newly created variables of desired output format 

476 # Index von Dataframe aufspalten 

477 df["Year"] = pd.DatetimeIndex(df.index).year 

478 df["Month"] = pd.DatetimeIndex(df.index).month 

479 df["Day"] = pd.DatetimeIndex(df.index).day 

480 df["Hour"] = pd.DatetimeIndex(df.index).hour 

481 df["Minute"] = pd.DatetimeIndex(df.index).minute 

482 

483 ### meet special epw requirements 

484 # Stunden 0 zu 24 der vorherigen Tag umwandeln 

485 df["Hour"] = df["Hour"].replace([0], 24) 

486 # Falls Tag ungleich 1 -> Tag substrahieren mit 1 

487 df.loc[(df["Hour"] == 24) & (df["Day"] != 1), "Day"] = df.loc[ 

488 (df["Hour"] == 24) & (df["Day"] != 1), "Day" 

489 ].sub(1) 

490 # Falls Tag gleich 1 -> Jahr, Monat, Tag loeschen -> mit ffill nachfuellen 

491 df.loc[ 

492 (df["Hour"] == 24) & (df["Day"] == 1), 

493 ["Year", "Month", "Day"] 

494 ] = np.nan 

495 df["Year"] = ( 

496 df["Year"].ffill().bfill().astype(int) 

497 ) 

498 df["Month"] = ( 

499 df["Month"].ffill().bfill().astype(int) 

500 ) 

501 df["Day"] = df["Day"].ffill().bfill().astype(int) 

502 df.reset_index(drop=True, inplace=True) 

503 

504 # data should always contain full days 

505 df, first_day_added_rows = fill_full_first_day(df) 

506 df, last_day_added_rows = fill_full_last_day(df) 

507 

508 # ensure data type where required 

509 columns_to_convert = ["Year", "Month", "Day", "Hour", "Minute"] 

510 for col in columns_to_convert: 

511 df[col] = df[col].astype(float).astype(int) 

512 

513 ### fill NaNs 

514 if fillna: 

515 # Forward-fill added rows at end of df 

516 df.iloc[-last_day_added_rows:, :] = df.ffill().iloc[ 

517 -last_day_added_rows:, : 

518 ] 

519 # fill added rows at beginning of df 

520 df.iloc[:first_day_added_rows, :] = df.bfill().iloc[ 

521 :first_day_added_rows, : 

522 ] 

523 

524 # fill first and last lines nans (possibly lost through shifting) 

525 df.iloc[0 + first_day_added_rows + 1, :] = df.bfill().iloc[ 

526 0 + first_day_added_rows + 1, : 

527 ] 

528 df.iloc[-1 - last_day_added_rows, :] = df.ffill().iloc[ 

529 -1 - last_day_added_rows, : 

530 ] 

531 

532 # fill default nans to the rest 

533 df = auxiliary.fill_nan_from_format_dict(df, EPWFormat.export_format()) 

534 

535 # cut off float digits (required for EnergyPlus) 

536 df = df.applymap(lambda x: (f"{x:.1f}") if isinstance(x, float) else x) 

537 

538 # again make sure correct order and variables are applied 

539 # (processing might have mixed it up) 

540 df = auxiliary.force_data_variable_convention(df, EPWFormat.export_format()) 

541 

542 ### format dataframe to list 

543 data_list = df[EPWFormat.export_format().keys()].to_numpy().tolist() 

544 

545 return data_list, df 

546 

547 

548def fill_full_first_day(df): 

549 # Identify the first hour and date of the DataFrame 

550 first_minute = df.iloc[0]["Minute"] 

551 first_hour = df.iloc[0]["Hour"] 

552 first_day = df.iloc[0]["Day"] 

553 first_month = df.iloc[0]["Month"] 

554 first_year = df.iloc[0]["Year"] 

555 rows_to_add = 0 

556 

557 # If the first hour is not 1, add rows to start with hour 1 

558 if first_hour != 1: 

559 # If the first hour is 24, we dont want to add an full extra day, just delete the 

560 # line so that the data frame starts with hour 1 

561 if first_hour == 24: 

562 df = df.drop(df.index[0]) 

563 else: 

564 # Calculate how many rows to add 

565 rows_to_add = int(first_hour) - 1 

566 

567 # Generate new rows 

568 for i in range(rows_to_add, 0, -1): 

569 new_row = pd.DataFrame( 

570 { 

571 "Minute": [first_minute], 

572 "Hour": [i], 

573 "Day": [first_day], 

574 "Month": [first_month], 

575 "Year": [first_year], 

576 } 

577 ) 

578 df = pd.concat([new_row, df]).reset_index(drop=True) 

579 return df, rows_to_add 

580 

581 

582def fill_full_last_day(df): 

583 # Identify the last hour and date of the DataFrame 

584 last_hour = df.iloc[-1]["Hour"] 

585 last_day = df.iloc[-1]["Day"] 

586 last_month = df.iloc[-1]["Month"] 

587 last_year = df.iloc[-1]["Year"] 

588 last_minute = df.iloc[-1]["Minute"] 

589 rows_to_add = 0 

590 

591 # If the last hour is not 24, add rows to reach hour 24 

592 if last_hour != 24: 

593 # If the last hour is 0, we dont want to add a full extra day, just delete the 

594 # line so that the data frame ends with hour 24 

595 if last_hour == 0: 

596 df = df.drop(df.index[-1]) 

597 else: 

598 # Calculate how many rows to add 

599 rows_to_add = 24 - int(last_hour) 

600 

601 # Generate new rows 

602 new_rows = [] 

603 for i in range(1, rows_to_add + 1): 

604 new_row = { 

605 "Minute": last_minute, 

606 "Hour": last_hour + i, 

607 "Day": last_day, 

608 "Month": last_month, 

609 "Year": last_year, 

610 } 

611 new_rows.append(new_row) 

612 

613 # Append new rows to DataFrame 

614 df = pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True) 

615 return df, rows_to_add