Coverage for aixweather/core_data_format_2_output_file/to_epw_energyplus.py: 97%
194 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-12-31 11:58 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-12-31 11:58 +0000
1"""
2convert core data to epw (EnergyPlus) data
3"""
5import csv
6import datetime as dt
7import logging
9import pandas as pd
10import numpy as np
12from aixweather import definitions
13from aixweather.imports.utils_import import MetaData
14from aixweather.transformation_functions import auxiliary, time_observation_transformations, pass_through_handling
15from aixweather.transformation_to_core_data.EPW import EPWFormat
17logger = logging.getLogger(__name__)
20def to_epw(
21 core_df: pd.DataFrame,
22 meta: MetaData,
23 start: dt.datetime,
24 stop: dt.datetime,
25 fillna: bool,
26 result_folder: str = None,
27 filename: str = None,
28 export_in_utc: bool = False
29) -> (pd.DataFrame, str):
30 """Create an EPW file from the core data.
32 Args:
33 core_df (pd.DataFrame): DataFrame containing core data.
34 meta (MetaData): Metadata associated with the weather data.
35 start (dt.datetime): Timestamp for the start of the EPW file.
36 stop (dt.datetime): Timestamp for the end of the EPW file.
37 fillna (bool): Boolean indicating whether NaN values should be filled.
38 result_folder (str):
39 Path to the folder where to save the file. Default will use
40 the `results_file_path` method.
41 filename (str): Name of the file to be saved. The default is constructed
42 based on the meta-data as well as start and stop time
43 export_in_utc (bool): Timezone to be used for the export.
44 True (default) to use the core_df timezone, UTC+0,
45 False (default) to use timezone from metadata
47 Returns:
48 pd.DataFrame: DataFrame containing the weather data formatted for EPW export,
49 excluding metadata.
50 str: Path to the exported file.
51 """
52 timezone = 0 if export_in_utc else meta.timezone
54 ### evaluate correctness of format
55 auxiliary.evaluate_transformations(
56 core_format=definitions.format_core_data, other_format=EPWFormat.export_format()
57 )
59 df = core_df.copy()
61 # format data to epw
62 df_epw_as_list, df_epw = _format_data(
63 df=df, start=start, stop=stop, timezone=timezone, fillna=fillna
64 )
66 # get final start and stop time (differs from start, stop due to filling to full days)
67 start_epw = pd.to_datetime(df_epw.iloc[[0]][['Year', 'Month', 'Day', 'Hour']]).iloc[0]
68 stop_epw = pd.to_datetime(df_epw.iloc[[-1]][['Year', 'Month', 'Day', 'Hour']]).iloc[-1]
69 # truncate core data for other calculations
70 df_truncated = time_observation_transformations.truncate_data_from_start_to_stop(
71 df, start_epw, stop_epw
72 )
74 # keep regular start stop in the filename for the unit tests
75 if filename is None:
76 _utc_flag = "_utc" if export_in_utc else ""
77 filename = (
78 f"{meta.station_id}_{start.strftime('%Y%m%d')}_{stop.strftime('%Y%m%d')}"
79 f"_{meta.station_name}{_utc_flag}.epw"
80 )
81 # get file path to safe data to
82 file_path = definitions.results_file_path(filename, result_folder)
84 ### merge all header lines and the data to be saved in a .epw file
85 with open(file_path, "w", newline="", encoding="latin1") as file:
86 writer = csv.writer(file)
87 writer.writerows(
88 [
89 _line1_location(meta=meta, timezone=timezone),
90 _line2_design_cond(),
91 _line3_typ_ext_period(df_truncated),
92 _line4_ground_temp(df_truncated),
93 _line5_holiday_dl_saving(df_truncated),
94 _line6_comment_1(),
95 _line7_comment_2(),
96 _line8_data_periods(df_truncated),
97 ]
98 )
99 writer.writerows(df_epw_as_list)
101 logger.info("EPW file saved to %s.", file_path)
103 return df, file_path
106### create header lines
107def _line1_location(
108 meta: MetaData,
109 timezone: int
110):
111 """
112 Get location metadata (station name, state, country, data_type,
113 stationID, lat, lon, TZ, alt)
115 return:
116 location: List Erstezeile(LOCATION) von epw Daten als List
117 """
119 data_type = ""
121 location = [
122 "LOCATION",
123 meta.station_name,
124 "State",
125 "country",
126 data_type,
127 meta.station_id,
128 str(meta.latitude),
129 str(meta.longitude),
130 timezone,
131 str(meta.altitude),
132 ]
134 return location
137def _line2_design_cond():
138 """
139 Erstellen zweite Zeile der epw.
141 return:
142 design_cond: List Zweite Zeile(Design Condition) von epw Daten als List
143 """
144 design_cond = [
145 "DESIGN CONDITIONS",
146 0, # number of design condition
147 ]
149 return design_cond
152def _line3_typ_ext_period(df):
153 """
154 Parsen von weatherdata um typische und extreme Perioden zu holen.
156 Typische Perioden sind Wochen mit Temperatur, die der Durchschnittstemperatur der Saison
157 am nächsten kommt.
158 Extreme Perioden sind Wochen mit Temperatur, die der Maximum-/Minimumtemperatur der Saison
159 am nächsten kommt.
161 return:
162 typical_extreme_period: List Dritte Zeile(TYPICAL/EXTREME PERIODS)
163 von epw Daten als List
164 """
166 typical_extreme_period = [
167 "TYPICAL/EXTREME PERIODS",
168 ]
170 season_dict = {
171 11: "Autumn",
172 12: "Winter",
173 1: "Winter",
174 2: "Winter",
175 3: "Spring",
176 4: "Spring",
177 5: "Spring",
178 6: "Summer",
179 7: "Summer",
180 8: "Summer",
181 9: "Autumn",
182 10: "Autumn",
183 } # Monaten in Saisons zuweisen
185 def group_func(input):
186 """Gruppefunktion für .groupby()"""
187 return season_dict[input.month]
189 df_temp_ambient = df["DryBulbTemp"] # Temperature_Ambient von weatherdata holen
190 number_of_periods = (
191 df_temp_ambient.groupby(group_func).mean().shape[0]
192 ) # Zahl von der Saisons rechnen als Zahl von Perioden
193 typical_extreme_period.append(number_of_periods)
195 # Gruppierung per Saison
196 try:
197 summer_temp = df_temp_ambient.groupby(group_func).get_group("Summer")
198 except KeyError:
199 summer_temp = pd.DataFrame()
200 try:
201 spring_temp = df_temp_ambient.groupby(group_func).get_group("Spring")
202 except KeyError:
203 spring_temp = pd.DataFrame()
204 try:
205 autumn_temp = df_temp_ambient.groupby(group_func).get_group("Autumn")
206 except KeyError:
207 autumn_temp = pd.DataFrame()
208 try:
209 winter_temp = df_temp_ambient.groupby(group_func).get_group("Winter")
210 except KeyError:
211 winter_temp = pd.DataFrame()
213 if not summer_temp.empty:
214 typical_extreme_period[1] = (
215 typical_extreme_period[1] + 1
216 ) # Summer und Winter haben extreme Periode.
217 max_temp_summer = summer_temp.max()
218 typ_temp_summer = summer_temp.mean()
219 summer_temp_w = summer_temp.resample(
220 "W", label="left"
221 ).mean() # Resample in wochentliche Interval
223 # Datenpunkt(typisch und extreme) finden
224 idx_mean_summer = summer_temp_w.sub(typ_temp_summer).abs().idxmin()
225 idx_max_summer = summer_temp_w.sub(max_temp_summer).abs().idxmin()
226 week_closest2mean_summer = summer_temp_w.loc[[idx_mean_summer]] # Starttag
227 week_closest2max_summer = summer_temp_w.loc[[idx_max_summer]] # Starttag
229 # Endtag berechnen
230 weekend_max_summer = week_closest2max_summer.index + dt.timedelta(days=6)
231 weekend_mean_summer = week_closest2mean_summer.index + dt.timedelta(days=6)
233 # List für die Saison erstellen
234 summer = [
235 "Summer - Week Nearest Max Temperature For Period",
236 "Extreme",
237 str(week_closest2max_summer.index.month[0])
238 + "/"
239 + str(week_closest2max_summer.index.day[0]),
240 str(weekend_max_summer.month[0]) + "/" + str(weekend_max_summer.day[0]),
241 "Summer - Week Nearest Average Temperature For Period",
242 "Typical",
243 str(week_closest2mean_summer.index.month[0])
244 + "/"
245 + str(week_closest2mean_summer.index.day[0]),
246 str(weekend_mean_summer.month[0])
247 + "/"
248 + str(weekend_mean_summer.day[0]),
249 ]
251 typical_extreme_period = (
252 typical_extreme_period + summer
253 ) # Liste zusammensetzen
255 # für alle Saison wiederholen
256 if not winter_temp.empty:
257 typical_extreme_period[1] = typical_extreme_period[1] + 1
258 min_temp_winter = winter_temp.min()
259 typ_temp_winter = winter_temp.mean()
260 winter_temp_w = winter_temp.resample("W", label="left").mean()
261 idx_mean_winter = winter_temp_w.sub(typ_temp_winter).abs().idxmin()
262 idx_min_winter = winter_temp_w.sub(min_temp_winter).abs().idxmin()
263 week_closest2mean_winter = winter_temp_w.loc[[idx_mean_winter]]
264 week_closest2min_winter = winter_temp_w.loc[[idx_min_winter]]
265 weekend_min_winter = week_closest2min_winter.index + dt.timedelta(days=6)
266 weekend_mean_winter = week_closest2mean_winter.index + dt.timedelta(days=6)
267 winter = [
268 "Winter - Week Nearest Min Temperature For Period",
269 "Extreme",
270 str(week_closest2min_winter.index.month[0])
271 + "/"
272 + str(week_closest2min_winter.index.day[0]),
273 str(weekend_min_winter.month[0]) + "/" + str(weekend_min_winter.day[0]),
274 "Winter - Week Nearest Average Temperature For Period",
275 "Typical",
276 str(week_closest2mean_winter.index.month[0])
277 + "/"
278 + str(week_closest2mean_winter.index.day[0]),
279 str(weekend_mean_winter.month[0])
280 + "/"
281 + str(weekend_mean_winter.day[0]),
282 ]
284 typical_extreme_period = typical_extreme_period + winter
286 if not autumn_temp.empty:
287 typ_temp_autumn = autumn_temp.mean()
288 autumn_temp_w = autumn_temp.resample("W", label="left").mean()
289 idx_mean_autumn = autumn_temp_w.sub(typ_temp_autumn).abs().idxmin()
290 week_closest2mean_autumn = autumn_temp_w.loc[[idx_mean_autumn]]
291 weekend_mean_autumn = week_closest2mean_autumn.index + dt.timedelta(days=6)
292 autumn = [
293 "Autumn - Week Nearest Average Temperature For Period",
294 "Typical",
295 str(week_closest2mean_autumn.index.month[0])
296 + "/"
297 + str(week_closest2mean_autumn.index.day[0]),
298 str(weekend_mean_autumn.month[0])
299 + "/"
300 + str(weekend_mean_autumn.day[0]),
301 ]
303 typical_extreme_period = typical_extreme_period + autumn
305 if not spring_temp.empty:
306 typ_temp_spring = spring_temp.mean()
307 spring_temp_w = spring_temp.resample("W", label="left").mean()
308 idx_mean_spring = spring_temp_w.sub(typ_temp_spring).abs().idxmin()
309 week_closest2mean_spring = spring_temp_w.loc[[idx_mean_spring]]
310 weekend_mean_spring = week_closest2mean_spring.index + dt.timedelta(days=6)
311 spring = [
312 "Spring - Week Nearest Average Temperature For Period",
313 "Typical",
314 str(week_closest2mean_spring.index.month[0])
315 + "/"
316 + str(week_closest2mean_spring.index.day[0]),
317 str(weekend_mean_spring.month[0])
318 + "/"
319 + str(weekend_mean_spring.day[0]),
320 ]
322 typical_extreme_period = typical_extreme_period + spring
324 return typical_extreme_period
327def _line4_ground_temp(df):
328 """
329 Parsen von weatherdata, um Bodentemperaturen zu holen.
331 #Todo: Not checked yet if this is calculation is correct
333 return:
334 ground_temp: List Vierte Zeile(GROUND TEMPERATURES) von epw Daten als List
335 """
337 ground_temp = [
338 "GROUND TEMPERATURES",
339 ]
341 df_4_ground_temp = df.copy()
343 df_w_ground = (
344 df_4_ground_temp.resample("M").mean().round(decimals=1)
345 ) # Resample in monatliche Interval
346 try:
347 ground_t = df_w_ground[
348 [
349 "Soil_Temperature_5cm",
350 "Soil_Temperature_10cm",
351 "Soil_Temperature_20cm",
352 "Soil_Temperature_50cm",
353 "Soil_Temperature_1m",
354 ]
355 ].to_numpy() # Dataframe2Array
356 # Array zu Liste umwandeln -> Zusammensetzen
357 ground_temp = (
358 ground_temp
359 + [5] # ground layers
360 + [0.05, None, None, None]
361 + ground_t[:, 0].tolist()
362 + [0.1, None, None, None]
363 + ground_t[:, 1].tolist()
364 + [0.2, None, None, None]
365 + ground_t[:, 2].tolist()
366 + [0.5, None, None, None]
367 + ground_t[:, 3].tolist()
368 + [1, None, None, None]
369 + ground_t[:, 4].tolist()
370 )
371 return ground_temp
372 except KeyError as err:
373 logger.warn(
374 "Error while adding the probably unnecessary ground temperature to the .epw file "
375 "header. A placeholder will be used. Error: %s", err
376 )
377 ground_temp = ground_temp + [0] # 0 ground layers
379 return ground_temp
382def _line5_holiday_dl_saving(df):
383 """
384 Erstellen der 5. Zeile der epw.
386 return:
387 holiday_dl_saving: List 5.Zeile(HOLIDAYS/DAYLIGHT SAVINGS) von epw Daten als List
388 """
390 if True in df.index.is_leap_year:
391 isLeap = "Yes"
392 else:
393 isLeap = "No"
394 holiday_dl_saving = [
395 "HOLIDAYS/DAYLIGHT SAVINGS",
396 isLeap, # Leap Year Observed
397 0, # Daylight Saving Start Date
398 0, # Daylight Saving End Date
399 0, # Number of Holidays
400 ]
401 return holiday_dl_saving
404def _line6_comment_1():
405 """
406 Erstellen der 6. Zeile der epw.
408 return:
409 comment_1: List 6.Zeile(COMMENTS 1) von epw Daten als List
410 """
411 return [
412 "COMMENTS 1",
413 "For data format information see the code or check: "
414 "https://designbuilder.co.uk/cahelp/Content/EnergyPlusWeatherFileFormat.htm",
415 ]
418def _line7_comment_2(comment2=None):
419 """
420 Erstellen der 7. Zeile der epw.
422 return:
423 comment_2: List 7.Zeile(COMMENTS 2) von epw Daten als List
424 """
425 return ["COMMENTS 2", comment2]
428def _line8_data_periods(df):
429 """
430 Parsen von weatherdata, um Start- und Enddatenpunkt zu holen
432 return:
433 data_periods: List 8.Zeile(DATA PERIODS) von epw Daten als List
434 """
435 start_dp = df.index[0]
436 end_dp = df.index[-1]
437 data_periods = [
438 "DATA PERIODS",
439 1, # Anzahl von Datenperioden
440 1, # Anzahl von Intervale in einer Stunde
441 "Data", # DP Name oder Beschreibung
442 start_dp.strftime("%A"), # DP Starttag
443 start_dp.strftime("%m/%d"), # DP Startdatum
444 end_dp.strftime("%m/%d"), # DP Enddatum
445 ]
446 return data_periods
448def _format_data(df, start, stop, timezone, fillna):
449 """
450 Parse actual weatherdata, for export
452 return:
453 data_list: List Datasätze von epw Daten als List
454 """
455 ### measurement time conversion
456 df = time_observation_transformations.shift_time_by_dict(EPWFormat.export_format(), df)
458 ### if possible avoid back and forth interpolating -> pass through
459 ### variables without shifting
460 df = pass_through_handling.pass_through_measurements_with_back_and_forth_interpolating(
461 df, EPWFormat.export_format()
462 )
464 ### select only desired period
465 df = time_observation_transformations.truncate_data_from_start_to_stop(
466 df, start, stop
467 )
469 ### Shift to desired timezone
470 df = df.shift(periods=timezone, freq="h", axis=0)
472 ### select the desired columns
473 df = auxiliary.force_data_variable_convention(df, EPWFormat.export_format())
475 # fill newly created variables of desired output format
476 # Index von Dataframe aufspalten
477 df["Year"] = pd.DatetimeIndex(df.index).year
478 df["Month"] = pd.DatetimeIndex(df.index).month
479 df["Day"] = pd.DatetimeIndex(df.index).day
480 df["Hour"] = pd.DatetimeIndex(df.index).hour
481 df["Minute"] = pd.DatetimeIndex(df.index).minute
483 ### meet special epw requirements
484 # Stunden 0 zu 24 der vorherigen Tag umwandeln
485 df["Hour"] = df["Hour"].replace([0], 24)
486 # Falls Tag ungleich 1 -> Tag substrahieren mit 1
487 df.loc[(df["Hour"] == 24) & (df["Day"] != 1), "Day"] = df.loc[
488 (df["Hour"] == 24) & (df["Day"] != 1), "Day"
489 ].sub(1)
490 # Falls Tag gleich 1 -> Jahr, Monat, Tag loeschen -> mit ffill nachfuellen
491 df.loc[
492 (df["Hour"] == 24) & (df["Day"] == 1),
493 ["Year", "Month", "Day"]
494 ] = np.nan
495 df["Year"] = (
496 df["Year"].ffill().bfill().astype(int)
497 )
498 df["Month"] = (
499 df["Month"].ffill().bfill().astype(int)
500 )
501 df["Day"] = df["Day"].ffill().bfill().astype(int)
502 df.reset_index(drop=True, inplace=True)
504 # data should always contain full days
505 df, first_day_added_rows = fill_full_first_day(df)
506 df, last_day_added_rows = fill_full_last_day(df)
508 # ensure data type where required
509 columns_to_convert = ["Year", "Month", "Day", "Hour", "Minute"]
510 for col in columns_to_convert:
511 df[col] = df[col].astype(float).astype(int)
513 ### fill NaNs
514 if fillna:
515 # Forward-fill added rows at end of df
516 df.iloc[-last_day_added_rows:, :] = df.ffill().iloc[
517 -last_day_added_rows:, :
518 ]
519 # fill added rows at beginning of df
520 df.iloc[:first_day_added_rows, :] = df.bfill().iloc[
521 :first_day_added_rows, :
522 ]
524 # fill first and last lines nans (possibly lost through shifting)
525 df.iloc[0 + first_day_added_rows + 1, :] = df.bfill().iloc[
526 0 + first_day_added_rows + 1, :
527 ]
528 df.iloc[-1 - last_day_added_rows, :] = df.ffill().iloc[
529 -1 - last_day_added_rows, :
530 ]
532 # fill default nans to the rest
533 df = auxiliary.fill_nan_from_format_dict(df, EPWFormat.export_format())
535 # cut off float digits (required for EnergyPlus)
536 df = df.applymap(lambda x: (f"{x:.1f}") if isinstance(x, float) else x)
538 # again make sure correct order and variables are applied
539 # (processing might have mixed it up)
540 df = auxiliary.force_data_variable_convention(df, EPWFormat.export_format())
542 ### format dataframe to list
543 data_list = df[EPWFormat.export_format().keys()].to_numpy().tolist()
545 return data_list, df
548def fill_full_first_day(df):
549 # Identify the first hour and date of the DataFrame
550 first_minute = df.iloc[0]["Minute"]
551 first_hour = df.iloc[0]["Hour"]
552 first_day = df.iloc[0]["Day"]
553 first_month = df.iloc[0]["Month"]
554 first_year = df.iloc[0]["Year"]
555 rows_to_add = 0
557 # If the first hour is not 1, add rows to start with hour 1
558 if first_hour != 1:
559 # If the first hour is 24, we dont want to add an full extra day, just delete the
560 # line so that the data frame starts with hour 1
561 if first_hour == 24:
562 df = df.drop(df.index[0])
563 else:
564 # Calculate how many rows to add
565 rows_to_add = int(first_hour) - 1
567 # Generate new rows
568 for i in range(rows_to_add, 0, -1):
569 new_row = pd.DataFrame(
570 {
571 "Minute": [first_minute],
572 "Hour": [i],
573 "Day": [first_day],
574 "Month": [first_month],
575 "Year": [first_year],
576 }
577 )
578 df = pd.concat([new_row, df]).reset_index(drop=True)
579 return df, rows_to_add
582def fill_full_last_day(df):
583 # Identify the last hour and date of the DataFrame
584 last_hour = df.iloc[-1]["Hour"]
585 last_day = df.iloc[-1]["Day"]
586 last_month = df.iloc[-1]["Month"]
587 last_year = df.iloc[-1]["Year"]
588 last_minute = df.iloc[-1]["Minute"]
589 rows_to_add = 0
591 # If the last hour is not 24, add rows to reach hour 24
592 if last_hour != 24:
593 # If the last hour is 0, we dont want to add a full extra day, just delete the
594 # line so that the data frame ends with hour 24
595 if last_hour == 0:
596 df = df.drop(df.index[-1])
597 else:
598 # Calculate how many rows to add
599 rows_to_add = 24 - int(last_hour)
601 # Generate new rows
602 new_rows = []
603 for i in range(1, rows_to_add + 1):
604 new_row = {
605 "Minute": last_minute,
606 "Hour": last_hour + i,
607 "Day": last_day,
608 "Month": last_month,
609 "Year": last_year,
610 }
611 new_rows.append(new_row)
613 # Append new rows to DataFrame
614 df = pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True)
615 return df, rows_to_add