Coverage for aixweather/core_data_format_2_output_file/to_epw

1"""

2convert core data to epw (EnergyPlus) data

3"""

5import csv

6import datetime as dt

7import logging

9import pandas as pd

10import numpy as np

12from aixweather import definitions

13from aixweather.imports.utils_import import MetaData

14from aixweather.transformation_functions import auxiliary, time_observation_transformations, pass_through_handling

15from aixweather.transformation_to_core_data.EPW import EPWFormat

17logger = logging.getLogger(__name__)

20def to_epw(

21 core_df: pd.DataFrame,

22 meta: MetaData,

23 start: dt.datetime,

24 stop: dt.datetime,

25 fillna: bool,

26 result_folder: str = None,

27 filename: str = None,

28 export_in_utc: bool = False

29) -> (pd.DataFrame, str):

30 """Create an EPW file from the core data.

32 Args:

33 core_df (pd.DataFrame): DataFrame containing core data.

34 meta (MetaData): Metadata associated with the weather data.

35 start (dt.datetime): Timestamp for the start of the EPW file.

36 stop (dt.datetime): Timestamp for the end of the EPW file.

37 fillna (bool): Boolean indicating whether NaN values should be filled.

38 result_folder (str):

39 Path to the folder where to save the file. Default will use

40 the `results_file_path` method.

41 filename (str): Name of the file to be saved. The default is constructed

42 based on the meta-data as well as start and stop time

43 export_in_utc (bool): Timezone to be used for the export.

44 True (default) to use the core_df timezone, UTC+0,

45 False (default) to use timezone from metadata

47 Returns:

48 pd.DataFrame: DataFrame containing the weather data formatted for EPW export,

49 excluding metadata.

50 str: Path to the exported file.

51 """

52 timezone = 0 if export_in_utc else meta.timezone

54 ### evaluate correctness of format

55 auxiliary.evaluate_transformations(

56 core_format=definitions.format_core_data, other_format=EPWFormat.export_format()

57 )

59 df = core_df.copy()

61 # format data to epw

62 df_epw_as_list, df_epw = _format_data(

63 df=df, start=start, stop=stop, timezone=timezone, fillna=fillna

64 )

66 # get final start and stop time (differs from start, stop due to filling to full days)

67 start_epw = pd.to_datetime(df_epw.iloc[[0]][['Year', 'Month', 'Day', 'Hour']]).iloc[0]

68 stop_epw = pd.to_datetime(df_epw.iloc[[-1]][['Year', 'Month', 'Day', 'Hour']]).iloc[-1]

69 # truncate core data for other calculations

70 df_truncated = time_observation_transformations.truncate_data_from_start_to_stop(

71 df, start_epw, stop_epw

72 )

74 # keep regular start stop in the filename for the unit tests

75 if filename is None:

76 _utc_flag = "_utc" if export_in_utc else ""

77 filename = (

78 f"{meta.station_id}_{start.strftime('%Y%m%d')}_{stop.strftime('%Y%m%d')}"

79 f"_{meta.station_name}{_utc_flag}.epw"

80 )

81 # get file path to safe data to

82 file_path = definitions.results_file_path(filename, result_folder)

84 ### merge all header lines and the data to be saved in a .epw file

85 with open(file_path, "w", newline="", encoding="latin1") as file:

86 writer = csv.writer(file)

87 writer.writerows(

88 [

89 _line1_location(meta=meta, timezone=timezone),

90 _line2_design_cond(),

91 _line3_typ_ext_period(df_truncated),

92 _line4_ground_temp(df_truncated),

93 _line5_holiday_dl_saving(df_truncated),

94 _line6_comment_1(),

95 _line7_comment_2(),

96 _line8_data_periods(df_truncated),

97 ]

98 )

99 writer.writerows(df_epw_as_list)

100

101 logger.info("EPW file saved to %s.", file_path)

102

103 return df, file_path

104

105

106### create header lines

107def _line1_location(

108 meta: MetaData,

109 timezone: int

110):

111 """

112 Get location metadata (station name, state, country, data_type,

113 stationID, lat, lon, TZ, alt)

114

115 return:

116 location: List Erstezeile(LOCATION) von epw Daten als List

117 """

118

119 data_type = ""

120

121 location = [

122 "LOCATION",

123 meta.station_name,

124 "State",

125 "country",

126 data_type,

127 meta.station_id,

128 str(meta.latitude),

129 str(meta.longitude),

130 timezone,

131 str(meta.altitude),

132 ]

133

134 return location

135

136

137def _line2_design_cond():

138 """

139 Erstellen zweite Zeile der epw.

140

141 return:

142 design_cond: List Zweite Zeile(Design Condition) von epw Daten als List

143 """

144 design_cond = [

145 "DESIGN CONDITIONS",

146 0, # number of design condition

147 ]

148

149 return design_cond

150

151

152def _line3_typ_ext_period(df):

153 """

154 Parsen von weatherdata um typische und extreme Perioden zu holen.

155

156 Typische Perioden sind Wochen mit Temperatur, die der Durchschnittstemperatur der Saison

157 am nächsten kommt.

158 Extreme Perioden sind Wochen mit Temperatur, die der Maximum-/Minimumtemperatur der Saison

159 am nächsten kommt.

160

161 return:

162 typical_extreme_period: List Dritte Zeile(TYPICAL/EXTREME PERIODS)

163 von epw Daten als List

164 """

165

166 typical_extreme_period = [

167 "TYPICAL/EXTREME PERIODS",

168 ]

169

170 season_dict = {

171 11: "Autumn",

172 12: "Winter",

173 1: "Winter",

174 2: "Winter",

175 3: "Spring",

176 4: "Spring",

177 5: "Spring",

178 6: "Summer",

179 7: "Summer",

180 8: "Summer",

181 9: "Autumn",

182 10: "Autumn",

183 } # Monaten in Saisons zuweisen

184

185 def group_func(input):

186 """Gruppefunktion für .groupby()"""

187 return season_dict[input.month]

188

189 df_temp_ambient = df["DryBulbTemp"] # Temperature_Ambient von weatherdata holen

190 number_of_periods = (

191 df_temp_ambient.groupby(group_func).mean().shape[0]

192 ) # Zahl von der Saisons rechnen als Zahl von Perioden

193 typical_extreme_period.append(number_of_periods)

194

195 # Gruppierung per Saison

196 try:

197 summer_temp = df_temp_ambient.groupby(group_func).get_group("Summer")

198 except KeyError:

199 summer_temp = pd.DataFrame()

200 try:

201 spring_temp = df_temp_ambient.groupby(group_func).get_group("Spring")

202 except KeyError:

203 spring_temp = pd.DataFrame()

204 try:

205 autumn_temp = df_temp_ambient.groupby(group_func).get_group("Autumn")

206 except KeyError:

207 autumn_temp = pd.DataFrame()

208 try:

209 winter_temp = df_temp_ambient.groupby(group_func).get_group("Winter")

210 except KeyError:

211 winter_temp = pd.DataFrame()

212

213 if not summer_temp.empty:

214 typical_extreme_period[1] = (

215 typical_extreme_period[1] + 1

216 ) # Summer und Winter haben extreme Periode.

217 max_temp_summer = summer_temp.max()

218 typ_temp_summer = summer_temp.mean()

219 summer_temp_w = summer_temp.resample(

220 "W", label="left"

221 ).mean() # Resample in wochentliche Interval

222

223 # Datenpunkt(typisch und extreme) finden

224 idx_mean_summer = summer_temp_w.sub(typ_temp_summer).abs().idxmin()

225 idx_max_summer = summer_temp_w.sub(max_temp_summer).abs().idxmin()

226 week_closest2mean_summer = summer_temp_w.loc[[idx_mean_summer]] # Starttag

227 week_closest2max_summer = summer_temp_w.loc[[idx_max_summer]] # Starttag

228

229 # Endtag berechnen

230 weekend_max_summer = week_closest2max_summer.index + dt.timedelta(days=6)

231 weekend_mean_summer = week_closest2mean_summer.index + dt.timedelta(days=6)

232

233 # List für die Saison erstellen

234 summer = [

235 "Summer - Week Nearest Max Temperature For Period",

236 "Extreme",

237 str(week_closest2max_summer.index.month[0])

238 + "/"

239 + str(week_closest2max_summer.index.day[0]),

240 str(weekend_max_summer.month[0]) + "/" + str(weekend_max_summer.day[0]),

241 "Summer - Week Nearest Average Temperature For Period",

242 "Typical",

243 str(week_closest2mean_summer.index.month[0])

244 + "/"

245 + str(week_closest2mean_summer.index.day[0]),

246 str(weekend_mean_summer.month[0])

247 + "/"

248 + str(weekend_mean_summer.day[0]),

249 ]

250

251 typical_extreme_period = (

252 typical_extreme_period + summer

253 ) # Liste zusammensetzen

254

255 # für alle Saison wiederholen

256 if not winter_temp.empty:

257 typical_extreme_period[1] = typical_extreme_period[1] + 1

258 min_temp_winter = winter_temp.min()

259 typ_temp_winter = winter_temp.mean()

260 winter_temp_w = winter_temp.resample("W", label="left").mean()

261 idx_mean_winter = winter_temp_w.sub(typ_temp_winter).abs().idxmin()

262 idx_min_winter = winter_temp_w.sub(min_temp_winter).abs().idxmin()

263 week_closest2mean_winter = winter_temp_w.loc[[idx_mean_winter]]

264 week_closest2min_winter = winter_temp_w.loc[[idx_min_winter]]

265 weekend_min_winter = week_closest2min_winter.index + dt.timedelta(days=6)

266 weekend_mean_winter = week_closest2mean_winter.index + dt.timedelta(days=6)

267 winter = [

268 "Winter - Week Nearest Min Temperature For Period",

269 "Extreme",

270 str(week_closest2min_winter.index.month[0])

271 + "/"

272 + str(week_closest2min_winter.index.day[0]),

273 str(weekend_min_winter.month[0]) + "/" + str(weekend_min_winter.day[0]),

274 "Winter - Week Nearest Average Temperature For Period",

275 "Typical",

276 str(week_closest2mean_winter.index.month[0])

277 + "/"

278 + str(week_closest2mean_winter.index.day[0]),

279 str(weekend_mean_winter.month[0])

280 + "/"

281 + str(weekend_mean_winter.day[0]),

282 ]

283

284 typical_extreme_period = typical_extreme_period + winter

285

286 if not autumn_temp.empty:

287 typ_temp_autumn = autumn_temp.mean()

288 autumn_temp_w = autumn_temp.resample("W", label="left").mean()

289 idx_mean_autumn = autumn_temp_w.sub(typ_temp_autumn).abs().idxmin()

290 week_closest2mean_autumn = autumn_temp_w.loc[[idx_mean_autumn]]

291 weekend_mean_autumn = week_closest2mean_autumn.index + dt.timedelta(days=6)

292 autumn = [

293 "Autumn - Week Nearest Average Temperature For Period",

294 "Typical",

295 str(week_closest2mean_autumn.index.month[0])

296 + "/"

297 + str(week_closest2mean_autumn.index.day[0]),

298 str(weekend_mean_autumn.month[0])

299 + "/"

300 + str(weekend_mean_autumn.day[0]),

301 ]

302

303 typical_extreme_period = typical_extreme_period + autumn

304

305 if not spring_temp.empty:

306 typ_temp_spring = spring_temp.mean()

307 spring_temp_w = spring_temp.resample("W", label="left").mean()

308 idx_mean_spring = spring_temp_w.sub(typ_temp_spring).abs().idxmin()

309 week_closest2mean_spring = spring_temp_w.loc[[idx_mean_spring]]

310 weekend_mean_spring = week_closest2mean_spring.index + dt.timedelta(days=6)

311 spring = [

312 "Spring - Week Nearest Average Temperature For Period",

313 "Typical",

314 str(week_closest2mean_spring.index.month[0])

315 + "/"

316 + str(week_closest2mean_spring.index.day[0]),

317 str(weekend_mean_spring.month[0])

318 + "/"

319 + str(weekend_mean_spring.day[0]),

320 ]

321

322 typical_extreme_period = typical_extreme_period + spring

323

324 return typical_extreme_period

325

326

327def _line4_ground_temp(df):

328 """

329 Parsen von weatherdata, um Bodentemperaturen zu holen.

330

331 #Todo: Not checked yet if this is calculation is correct

332

333 return:

334 ground_temp: List Vierte Zeile(GROUND TEMPERATURES) von epw Daten als List

335 """

336

337 ground_temp = [

338 "GROUND TEMPERATURES",

339 ]

340

341 df_4_ground_temp = df.copy()

342

343 df_w_ground = (

344 df_4_ground_temp.resample("M").mean().round(decimals=1)

345 ) # Resample in monatliche Interval

346 try:

347 ground_t = df_w_ground[

348 [

349 "Soil_Temperature_5cm",

350 "Soil_Temperature_10cm",

351 "Soil_Temperature_20cm",

352 "Soil_Temperature_50cm",

353 "Soil_Temperature_1m",

354 ]

355 ].to_numpy() # Dataframe2Array

356 # Array zu Liste umwandeln -> Zusammensetzen

357 ground_temp = (

358 ground_temp

359 + [5] # ground layers

360 + [0.05, None, None, None]

361 + ground_t[:, 0].tolist()

362 + [0.1, None, None, None]

363 + ground_t[:, 1].tolist()

364 + [0.2, None, None, None]

365 + ground_t[:, 2].tolist()

366 + [0.5, None, None, None]

367 + ground_t[:, 3].tolist()

368 + [1, None, None, None]

369 + ground_t[:, 4].tolist()

370 )

371 return ground_temp

372 except KeyError as err:

373 logger.warn(

374 "Error while adding the probably unnecessary ground temperature to the .epw file "

375 "header. A placeholder will be used. Error: %s", err

376 )

377 ground_temp = ground_temp + [0] # 0 ground layers

378

379 return ground_temp

380

381

382def _line5_holiday_dl_saving(df):

383 """

384 Erstellen der 5. Zeile der epw.

385

386 return:

387 holiday_dl_saving: List 5.Zeile(HOLIDAYS/DAYLIGHT SAVINGS) von epw Daten als List

388 """

389

390 if True in df.index.is_leap_year:

391 isLeap = "Yes"

392 else:

393 isLeap = "No"

394 holiday_dl_saving = [

395 "HOLIDAYS/DAYLIGHT SAVINGS",

396 isLeap, # Leap Year Observed

397 0, # Daylight Saving Start Date

398 0, # Daylight Saving End Date

399 0, # Number of Holidays

400 ]

401 return holiday_dl_saving

402

403

404def _line6_comment_1():

405 """

406 Erstellen der 6. Zeile der epw.

407

408 return:

409 comment_1: List 6.Zeile(COMMENTS 1) von epw Daten als List

410 """

411 return [

412 "COMMENTS 1",

413 "For data format information see the code or check: "

414 "https://designbuilder.co.uk/cahelp/Content/EnergyPlusWeatherFileFormat.htm",

415 ]

416

417

418def _line7_comment_2(comment2=None):

419 """

420 Erstellen der 7. Zeile der epw.

421

422 return:

423 comment_2: List 7.Zeile(COMMENTS 2) von epw Daten als List

424 """

425 return ["COMMENTS 2", comment2]

426

427

428def _line8_data_periods(df):

429 """

430 Parsen von weatherdata, um Start- und Enddatenpunkt zu holen

431

432 return:

433 data_periods: List 8.Zeile(DATA PERIODS) von epw Daten als List

434 """

435 start_dp = df.index[0]

436 end_dp = df.index[-1]

437 data_periods = [

438 "DATA PERIODS",

439 1, # Anzahl von Datenperioden

440 1, # Anzahl von Intervale in einer Stunde

441 "Data", # DP Name oder Beschreibung

442 start_dp.strftime("%A"), # DP Starttag

443 start_dp.strftime("%m/%d"), # DP Startdatum

444 end_dp.strftime("%m/%d"), # DP Enddatum

445 ]

446 return data_periods

447

448def _format_data(df, start, stop, timezone, fillna):

449 """

450 Parse actual weatherdata, for export

451

452 return:

453 data_list: List Datasätze von epw Daten als List

454 """

455 ### measurement time conversion

456 df = time_observation_transformations.shift_time_by_dict(EPWFormat.export_format(), df)

457

458 ### if possible avoid back and forth interpolating -> pass through

459 ### variables without shifting

460 df = pass_through_handling.pass_through_measurements_with_back_and_forth_interpolating(

461 df, EPWFormat.export_format()

462 )

463

464 ### select only desired period

465 df = time_observation_transformations.truncate_data_from_start_to_stop(

466 df, start, stop

467 )

468

469 ### Shift to desired timezone

470 df = df.shift(periods=timezone, freq="h", axis=0)

471

472 ### select the desired columns

473 df = auxiliary.force_data_variable_convention(df, EPWFormat.export_format())

474

475 # fill newly created variables of desired output format

476 # Index von Dataframe aufspalten

477 df["Year"] = pd.DatetimeIndex(df.index).year

478 df["Month"] = pd.DatetimeIndex(df.index).month

479 df["Day"] = pd.DatetimeIndex(df.index).day

480 df["Hour"] = pd.DatetimeIndex(df.index).hour

481 df["Minute"] = pd.DatetimeIndex(df.index).minute

482

483 ### meet special epw requirements

484 # Stunden 0 zu 24 der vorherigen Tag umwandeln

485 df["Hour"] = df["Hour"].replace([0], 24)

486 # Falls Tag ungleich 1 -> Tag substrahieren mit 1

487 df.loc[(df["Hour"] == 24) & (df["Day"] != 1), "Day"] = df.loc[

488 (df["Hour"] == 24) & (df["Day"] != 1), "Day"

489 ].sub(1)

490 # Falls Tag gleich 1 -> Jahr, Monat, Tag loeschen -> mit ffill nachfuellen

491 df.loc[

492 (df["Hour"] == 24) & (df["Day"] == 1),

493 ["Year", "Month", "Day"]

494 ] = np.nan

495 df["Year"] = (

496 df["Year"].ffill().bfill().astype(int)

497 )

498 df["Month"] = (

499 df["Month"].ffill().bfill().astype(int)

500 )

501 df["Day"] = df["Day"].ffill().bfill().astype(int)

502 df.reset_index(drop=True, inplace=True)

503

504 # data should always contain full days

505 df, first_day_added_rows = fill_full_first_day(df)

506 df, last_day_added_rows = fill_full_last_day(df)

507

508 # ensure data type where required

509 columns_to_convert = ["Year", "Month", "Day", "Hour", "Minute"]

510 for col in columns_to_convert:

511 df[col] = df[col].astype(float).astype(int)

512

513 ### fill NaNs

514 if fillna:

515 # Forward-fill added rows at end of df

516 df.iloc[-last_day_added_rows:, :] = df.ffill().iloc[

517 -last_day_added_rows:, :

518 ]

519 # fill added rows at beginning of df

520 df.iloc[:first_day_added_rows, :] = df.bfill().iloc[

521 :first_day_added_rows, :

522 ]

523

524 # fill first and last lines nans (possibly lost through shifting)

525 df.iloc[0 + first_day_added_rows + 1, :] = df.bfill().iloc[

526 0 + first_day_added_rows + 1, :

527 ]

528 df.iloc[-1 - last_day_added_rows, :] = df.ffill().iloc[

529 -1 - last_day_added_rows, :

530 ]

531

532 # fill default nans to the rest

533 df = auxiliary.fill_nan_from_format_dict(df, EPWFormat.export_format())

534

535 # cut off float digits (required for EnergyPlus)

536 df = df.applymap(lambda x: (f"{x:.1f}") if isinstance(x, float) else x)

537

538 # again make sure correct order and variables are applied

539 # (processing might have mixed it up)

540 df = auxiliary.force_data_variable_convention(df, EPWFormat.export_format())

541

542 ### format dataframe to list

543 data_list = df[EPWFormat.export_format().keys()].to_numpy().tolist()

544

545 return data_list, df

546

547

548def fill_full_first_day(df):

549 # Identify the first hour and date of the DataFrame

550 first_minute = df.iloc[0]["Minute"]

551 first_hour = df.iloc[0]["Hour"]

552 first_day = df.iloc[0]["Day"]

553 first_month = df.iloc[0]["Month"]

554 first_year = df.iloc[0]["Year"]

555 rows_to_add = 0

556

557 # If the first hour is not 1, add rows to start with hour 1

558 if first_hour != 1:

559 # If the first hour is 24, we dont want to add an full extra day, just delete the

560 # line so that the data frame starts with hour 1

561 if first_hour == 24:

562 df = df.drop(df.index[0])

563 else:

564 # Calculate how many rows to add

565 rows_to_add = int(first_hour) - 1

566

567 # Generate new rows

568 for i in range(rows_to_add, 0, -1):

569 new_row = pd.DataFrame(

570 {

571 "Minute": [first_minute],

572 "Hour": [i],

573 "Day": [first_day],

574 "Month": [first_month],

575 "Year": [first_year],

576 }

577 )

578 df = pd.concat([new_row, df]).reset_index(drop=True)

579 return df, rows_to_add

580

581

582def fill_full_last_day(df):

583 # Identify the last hour and date of the DataFrame

584 last_hour = df.iloc[-1]["Hour"]

585 last_day = df.iloc[-1]["Day"]

586 last_month = df.iloc[-1]["Month"]

587 last_year = df.iloc[-1]["Year"]

588 last_minute = df.iloc[-1]["Minute"]

589 rows_to_add = 0

590

591 # If the last hour is not 24, add rows to reach hour 24

592 if last_hour != 24:

593 # If the last hour is 0, we dont want to add a full extra day, just delete the

594 # line so that the data frame ends with hour 24

595 if last_hour == 0:

596 df = df.drop(df.index[-1])

597 else:

598 # Calculate how many rows to add

599 rows_to_add = 24 - int(last_hour)

600

601 # Generate new rows

602 new_rows = []

603 for i in range(1, rows_to_add + 1):

604 new_row = {

605 "Minute": last_minute,

606 "Hour": last_hour + i,

607 "Day": last_day,

608 "Month": last_month,

609 "Year": last_year,

610 }

611 new_rows.append(new_row)

612

613 # Append new rows to DataFrame

614 df = pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True)

615 return df, rows_to_add

Coverage for aixweather/core_data_format_2_output_file/to_epw_energyplus.py: 97%

194 statements