Coverage for aixweather/transformation_to_core_data/EPW.py: 98%
60 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-12-31 11:58 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-12-31 11:58 +0000
1"""
2This module includes a function to transform EPW data to core data format.
3"""
5import pandas as pd
6from copy import deepcopy
7import logging
9from aixweather import definitions
10from aixweather.imports.utils_import import MetaData
11from aixweather.transformation_functions import (
12 auxiliary,
13 time_observation_transformations,
14 variable_transformations,
15 pass_through_handling,
16)
18logger = logging.getLogger(__name__)
21class EPWFormat:
22 """
23 Information on EPW format:
24 Online sources for EPW data: https://climate.onebuilding.org/default.html and
25 https://www.ladybug.tools/epwmap/
27 Format info:
28 - key = output data point name
29 - core_name = corresponding name matching the format_core_data
30 - time_of_meas_shift = desired 30min shifting+interpolation to convert the value that is "at
31 indicated time" to "average of preceding hour" (ind2prec).
32 - unit = unit of the output data following the naming convention of format_core_data
33 - nan = The default values stated from the format information, those values are
34 filled if nan.
36 All changes here automatically change the calculations.
37 Exception: unit conversions have to be added manually.
39 Information for shifting:
40 Hour: This is the hour of the data. (1 - 24). Hour 1 is 00:01 to 01:00.
41 Cannot be missing. but e.g.:
42 DryBulbTemp: This is the dry bulb temperature in C at the time indicated. and:
43 GlobHorRad: received on a horizontal surface during the hour preceding the time indicated.
44 ----> Hence, we assume that hour 1 should show the DryBulbTemp from
45 0:30 to 1:30, i.e. the Temp at indicated time.
47 time of measurement checked by Martin Rätz (07.08.2023)
48 units checked by Martin Rätz (07.08.2023)
49 """
51 @classmethod
52 def import_format(cls) -> dict:
53 """
54 Inverts the export format from core2export to import2core
55 """
56 export_format = cls.export_format()
57 import_format = deepcopy(export_format)
58 for key, value in import_format.items():
59 time_shift = value["time_of_meas_shift"]
60 if time_shift == "ind2prec":
61 value["time_of_meas_shift"] = "prec2ind"
62 elif time_shift == "ind2foll":
63 value["time_of_meas_shift"] = "foll2ind"
64 return import_format
66 @classmethod
67 def export_format(cls) -> dict:
68 return {
69 "Year": {"core_name": "", "unit": "year", "time_of_meas_shift": None, "nan": None},
70 "Month": {"core_name": "", "unit": "month", "time_of_meas_shift": None, "nan": None},
71 "Day": {"core_name": "", "unit": "day", "time_of_meas_shift": None, "nan": None},
72 "Hour": {"core_name": "", "unit": "hour", "time_of_meas_shift": None, "nan": None},
73 "Minute": {"core_name": "", "unit": "minute", "time_of_meas_shift": None, "nan": None},
74 "Data Source and Uncertainty Flags": {"core_name": "", "unit": None, "time_of_meas_shift": None, "nan": "?"},
75 "DryBulbTemp": {"core_name": "DryBulbTemp", "unit": "degC", "time_of_meas_shift": None, "nan": 99.9},
76 "DewPointTemp": {"core_name": "DewPointTemp", "unit": "degC", "time_of_meas_shift": None, "nan": 99.9},
77 "RelHum": {"core_name": "RelHum", "unit": "percent", "time_of_meas_shift": None, "nan": 999.0},
78 "AtmPressure": {"core_name": "AtmPressure", "unit": "Pa", "time_of_meas_shift": None, "nan": 999999.0},
79 "ExtHorRad": {"core_name": "ExtHorRad", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0},
80 "ExtDirNormRad": {"core_name": "ExtDirNormRad", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0},
81 "HorInfra": {"core_name": "HorInfra", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0},
82 "GlobHorRad": {"core_name": "GlobHorRad", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0},
83 "DirNormRad": {"core_name": "DirNormRad", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0},
84 "DiffHorRad": {"core_name": "DiffHorRad", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0},
85 "GlobHorIll": {"core_name": "GlobHorIll", "unit": "lux", "time_of_meas_shift": 'ind2prec', "nan": 999999.0},
86 "DirecNormIll": {"core_name": "DirecNormIll", "unit": "lux", "time_of_meas_shift": 'ind2prec', "nan": 999999.0},
87 "DiffuseHorIll": {"core_name": "DiffuseHorIll", "unit": "lux", "time_of_meas_shift": 'ind2prec', "nan": 999999.0},
88 "ZenithLum": {"core_name": "ZenithLum", "unit": "Cd/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0},
89 "WindDir": {"core_name": "WindDir", "unit": "deg", "time_of_meas_shift": None, "nan": 999.0},
90 "WindSpeed": {"core_name": "WindSpeed", "unit": "m/s", "time_of_meas_shift": None, "nan": 999.0},
91 "TotalSkyCover": {"core_name": "TotalSkyCover", "unit": "1tenth", "time_of_meas_shift": None, "nan": 99},
92 "OpaqueSkyCover": {"core_name": "OpaqueSkyCover", "unit": "1tenth", "time_of_meas_shift": None, "nan": 99},
93 "Visibility": {"core_name": "Visibility", "unit": "km", "time_of_meas_shift": None, "nan": 9999.0},
94 "CeilingH": {"core_name": "CeilingH", "unit": "m", "time_of_meas_shift": None, "nan": 99999},
95 "WeatherObs": {"core_name": "", "unit": "None", "time_of_meas_shift": None, "nan": 9},
96 "WeatherCode": {"core_name": "", "unit": "None", "time_of_meas_shift": None, "nan": 999999999},
97 "PrecWater": {"core_name": "PrecWater", "unit": "mm", "time_of_meas_shift": None, "nan": 999.0},
98 "Aerosol": {"core_name": "Aerosol", "unit": "1thousandth", "time_of_meas_shift": None, "nan": 0.999},
99 "Snow": {"core_name": "", "unit": "cm", "time_of_meas_shift": None, "nan": 999.0},
100 "DaysSinceSnow": {"core_name": "", "unit": "days", "time_of_meas_shift": None, "nan": 99},
101 "Albedo": {"core_name": "", "unit": "None", "time_of_meas_shift": None, "nan": 999},
102 "LiquidPrecD": {"core_name": "LiquidPrecD", "unit": "mm/h", "time_of_meas_shift": None, "nan": 999},
103 "LiquidPrepQuant": {"core_name": "", "unit": "hours", "time_of_meas_shift": None, "nan": 99},
104 }
107def EPW_to_core_data(df_import: pd.DataFrame, meta: MetaData) -> pd.DataFrame:
108 """
109 Transform imported EPW (EnergyPlus Weather) data into core data format.
111 Args:
112 df_import (pd.DataFrame): The DataFrame containing imported EPW weather data.
113 meta (MetaData): Metadata associated with the data.
115 Returns:
116 pd.DataFrame: The transformed DataFrame in the core data format.
117 """
118 format_epw = EPWFormat.import_format()
120 # evaluate correctness of format
121 auxiliary.evaluate_transformations(
122 core_format=definitions.format_core_data, other_format=format_epw
123 )
125 def epw_to_datetimeindex(df):
126 '''
127 Convert the first 4 columns of the DataFrame to a DatetimeIndex and set it as the
128 index.'''
129 # loop one by one to avoid faults with non-continuous data
130 datetime_list = []
131 for index, row in df.iterrows():
132 year, month, day, hour = row[:4]
133 if hour == 24:
134 hour = 0
135 # Increment the day by one for those rows where hour
136 # was originally 24
137 row_datetime = pd.Timestamp(year, month, day, hour) + pd.Timedelta(days=1)
138 else:
139 row_datetime = pd.Timestamp(year, month, day, hour)
140 datetime_list.append(row_datetime)
142 # Setting datetime column as index with name 'datetime'
143 df.index = datetime_list
144 df.index = df.index.rename('datetime')
146 return df
148 def if_TMY_convert_to_one_year(df):
149 """TMY (typical meteorological year) data in .epw files often contains data for a period
150 of one year but each month is from a different year. This will lead to several years of
151 nan data in between. As the year is irrelevant in tmy data, we set all dates to the year
152 of februaries data. February is chosen to avoid leap year issues.
154 It is automatically detected whether it is a TMY through the following criteria:
155 - the available data covers exactly 8760 data points (one non-leap year)
156 - the period covered by the timestamps spans more than one year
157 - the first date is the first of January at hour 1
159 This will lead to an info log message if the data is transformed."""
160 if (
161 len(df) == 8760 # exactly one year of data
162 and df.iloc[:, 0].max() - df.iloc[:, 0].min() > 1 # spanning over more than one year
163 and df.iloc[0, 1] == 1 # first month is January
164 and df.iloc[0, 2] == 1 # first day is one
165 and df.iloc[0, 3] == 1 # first hour is one
166 ):
167 year_of_february = df.loc[df.iloc[:, 1] == 2, 0].iloc[0]
168 # Replace the year component with the year of February
169 df.iloc[:, 0] = year_of_february
170 logger.info(
171 "The data was transformed to one year of data as it seems to be TMY data."
172 "The year is irrelevant for TMY data."
173 )
174 return df
176 ### preprocessing raw data for further operations
177 df = df_import.copy()
178 df = if_TMY_convert_to_one_year(df)
179 df = epw_to_datetimeindex(df)
180 # Resample the DataFrame to make the DatetimeIndex complete and monotonic
181 df = df.resample("h").asfreq()
182 # give names to columns according to documentation of import data
183 df.columns = [key for key in format_epw.keys()]
184 # rename available variables to core data format
185 df = auxiliary.rename_columns(df, format_epw)
186 # delete dummy values from EPW
187 df = auxiliary.replace_dummy_with_nan(df, format_epw)
189 ### convert timezone to UTC+0
190 df = df.shift(periods=-meta.timezone, freq="h", axis=0)
192 ### shift and interpolate data forward 30mins or backward -30mins
193 df_no_shift = df.copy()
194 df = time_observation_transformations.shift_time_by_dict(format_epw, df)
196 def transform(df):
197 ### force variable naming format_core_data
198 df = auxiliary.force_data_variable_convention(df, definitions.format_core_data)
199 ### unit conversion
200 # all units correct
201 ### impute missing variables from other available ones
202 df, calc_overview = variable_transformations.variable_transform_all(df, meta)
203 return df, calc_overview
205 df, meta.executed_transformations = transform(df)
207 ### add unshifted data for possible later direct use (pass-through),
208 ### to avoid back and forth interpolating
209 df = pass_through_handling.create_pass_through_variables(
210 df_shifted=df,
211 df_no_shift=df_no_shift,
212 format=format_epw,
213 transform_func=transform,
214 meta=meta,
215 )
217 return df