Coverage for aixweather/transformation_to_core_data/EPW.py: 98%

60 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2025-12-31 11:58 +0000

1""" 

2This module includes a function to transform EPW data to core data format. 

3""" 

4 

5import pandas as pd 

6from copy import deepcopy 

7import logging 

8 

9from aixweather import definitions 

10from aixweather.imports.utils_import import MetaData 

11from aixweather.transformation_functions import ( 

12 auxiliary, 

13 time_observation_transformations, 

14 variable_transformations, 

15 pass_through_handling, 

16) 

17 

18logger = logging.getLogger(__name__) 

19 

20 

21class EPWFormat: 

22 """ 

23 Information on EPW format: 

24 Online sources for EPW data: https://climate.onebuilding.org/default.html and 

25 https://www.ladybug.tools/epwmap/ 

26 

27 Format info: 

28 - key = output data point name 

29 - core_name = corresponding name matching the format_core_data 

30 - time_of_meas_shift = desired 30min shifting+interpolation to convert the value that is "at 

31 indicated time" to "average of preceding hour" (ind2prec). 

32 - unit = unit of the output data following the naming convention of format_core_data 

33 - nan = The default values stated from the format information, those values are 

34 filled if nan. 

35 

36 All changes here automatically change the calculations. 

37 Exception: unit conversions have to be added manually. 

38 

39 Information for shifting: 

40 Hour: This is the hour of the data. (1 - 24). Hour 1 is 00:01 to 01:00. 

41 Cannot be missing. but e.g.: 

42 DryBulbTemp: This is the dry bulb temperature in C at the time indicated. and: 

43 GlobHorRad: received on a horizontal surface during the hour preceding the time indicated. 

44 ----> Hence, we assume that hour 1 should show the DryBulbTemp from 

45 0:30 to 1:30, i.e. the Temp at indicated time. 

46 

47 time of measurement checked by Martin Rätz (07.08.2023) 

48 units checked by Martin Rätz (07.08.2023) 

49 """ 

50 

51 @classmethod 

52 def import_format(cls) -> dict: 

53 """ 

54 Inverts the export format from core2export to import2core 

55 """ 

56 export_format = cls.export_format() 

57 import_format = deepcopy(export_format) 

58 for key, value in import_format.items(): 

59 time_shift = value["time_of_meas_shift"] 

60 if time_shift == "ind2prec": 

61 value["time_of_meas_shift"] = "prec2ind" 

62 elif time_shift == "ind2foll": 

63 value["time_of_meas_shift"] = "foll2ind" 

64 return import_format 

65 

66 @classmethod 

67 def export_format(cls) -> dict: 

68 return { 

69 "Year": {"core_name": "", "unit": "year", "time_of_meas_shift": None, "nan": None}, 

70 "Month": {"core_name": "", "unit": "month", "time_of_meas_shift": None, "nan": None}, 

71 "Day": {"core_name": "", "unit": "day", "time_of_meas_shift": None, "nan": None}, 

72 "Hour": {"core_name": "", "unit": "hour", "time_of_meas_shift": None, "nan": None}, 

73 "Minute": {"core_name": "", "unit": "minute", "time_of_meas_shift": None, "nan": None}, 

74 "Data Source and Uncertainty Flags": {"core_name": "", "unit": None, "time_of_meas_shift": None, "nan": "?"}, 

75 "DryBulbTemp": {"core_name": "DryBulbTemp", "unit": "degC", "time_of_meas_shift": None, "nan": 99.9}, 

76 "DewPointTemp": {"core_name": "DewPointTemp", "unit": "degC", "time_of_meas_shift": None, "nan": 99.9}, 

77 "RelHum": {"core_name": "RelHum", "unit": "percent", "time_of_meas_shift": None, "nan": 999.0}, 

78 "AtmPressure": {"core_name": "AtmPressure", "unit": "Pa", "time_of_meas_shift": None, "nan": 999999.0}, 

79 "ExtHorRad": {"core_name": "ExtHorRad", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0}, 

80 "ExtDirNormRad": {"core_name": "ExtDirNormRad", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0}, 

81 "HorInfra": {"core_name": "HorInfra", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0}, 

82 "GlobHorRad": {"core_name": "GlobHorRad", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0}, 

83 "DirNormRad": {"core_name": "DirNormRad", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0}, 

84 "DiffHorRad": {"core_name": "DiffHorRad", "unit": "Wh/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0}, 

85 "GlobHorIll": {"core_name": "GlobHorIll", "unit": "lux", "time_of_meas_shift": 'ind2prec', "nan": 999999.0}, 

86 "DirecNormIll": {"core_name": "DirecNormIll", "unit": "lux", "time_of_meas_shift": 'ind2prec', "nan": 999999.0}, 

87 "DiffuseHorIll": {"core_name": "DiffuseHorIll", "unit": "lux", "time_of_meas_shift": 'ind2prec', "nan": 999999.0}, 

88 "ZenithLum": {"core_name": "ZenithLum", "unit": "Cd/m2", "time_of_meas_shift": 'ind2prec', "nan": 9999.0}, 

89 "WindDir": {"core_name": "WindDir", "unit": "deg", "time_of_meas_shift": None, "nan": 999.0}, 

90 "WindSpeed": {"core_name": "WindSpeed", "unit": "m/s", "time_of_meas_shift": None, "nan": 999.0}, 

91 "TotalSkyCover": {"core_name": "TotalSkyCover", "unit": "1tenth", "time_of_meas_shift": None, "nan": 99}, 

92 "OpaqueSkyCover": {"core_name": "OpaqueSkyCover", "unit": "1tenth", "time_of_meas_shift": None, "nan": 99}, 

93 "Visibility": {"core_name": "Visibility", "unit": "km", "time_of_meas_shift": None, "nan": 9999.0}, 

94 "CeilingH": {"core_name": "CeilingH", "unit": "m", "time_of_meas_shift": None, "nan": 99999}, 

95 "WeatherObs": {"core_name": "", "unit": "None", "time_of_meas_shift": None, "nan": 9}, 

96 "WeatherCode": {"core_name": "", "unit": "None", "time_of_meas_shift": None, "nan": 999999999}, 

97 "PrecWater": {"core_name": "PrecWater", "unit": "mm", "time_of_meas_shift": None, "nan": 999.0}, 

98 "Aerosol": {"core_name": "Aerosol", "unit": "1thousandth", "time_of_meas_shift": None, "nan": 0.999}, 

99 "Snow": {"core_name": "", "unit": "cm", "time_of_meas_shift": None, "nan": 999.0}, 

100 "DaysSinceSnow": {"core_name": "", "unit": "days", "time_of_meas_shift": None, "nan": 99}, 

101 "Albedo": {"core_name": "", "unit": "None", "time_of_meas_shift": None, "nan": 999}, 

102 "LiquidPrecD": {"core_name": "LiquidPrecD", "unit": "mm/h", "time_of_meas_shift": None, "nan": 999}, 

103 "LiquidPrepQuant": {"core_name": "", "unit": "hours", "time_of_meas_shift": None, "nan": 99}, 

104 } 

105 

106 

107def EPW_to_core_data(df_import: pd.DataFrame, meta: MetaData) -> pd.DataFrame: 

108 """ 

109 Transform imported EPW (EnergyPlus Weather) data into core data format. 

110 

111 Args: 

112 df_import (pd.DataFrame): The DataFrame containing imported EPW weather data. 

113 meta (MetaData): Metadata associated with the data. 

114 

115 Returns: 

116 pd.DataFrame: The transformed DataFrame in the core data format. 

117 """ 

118 format_epw = EPWFormat.import_format() 

119 

120 # evaluate correctness of format 

121 auxiliary.evaluate_transformations( 

122 core_format=definitions.format_core_data, other_format=format_epw 

123 ) 

124 

125 def epw_to_datetimeindex(df): 

126 ''' 

127 Convert the first 4 columns of the DataFrame to a DatetimeIndex and set it as the 

128 index.''' 

129 # loop one by one to avoid faults with non-continuous data 

130 datetime_list = [] 

131 for index, row in df.iterrows(): 

132 year, month, day, hour = row[:4] 

133 if hour == 24: 

134 hour = 0 

135 # Increment the day by one for those rows where hour 

136 # was originally 24 

137 row_datetime = pd.Timestamp(year, month, day, hour) + pd.Timedelta(days=1) 

138 else: 

139 row_datetime = pd.Timestamp(year, month, day, hour) 

140 datetime_list.append(row_datetime) 

141 

142 # Setting datetime column as index with name 'datetime' 

143 df.index = datetime_list 

144 df.index = df.index.rename('datetime') 

145 

146 return df 

147 

148 def if_TMY_convert_to_one_year(df): 

149 """TMY (typical meteorological year) data in .epw files often contains data for a period 

150 of one year but each month is from a different year. This will lead to several years of 

151 nan data in between. As the year is irrelevant in tmy data, we set all dates to the year 

152 of februaries data. February is chosen to avoid leap year issues. 

153 

154 It is automatically detected whether it is a TMY through the following criteria: 

155 - the available data covers exactly 8760 data points (one non-leap year) 

156 - the period covered by the timestamps spans more than one year 

157 - the first date is the first of January at hour 1 

158 

159 This will lead to an info log message if the data is transformed.""" 

160 if ( 

161 len(df) == 8760 # exactly one year of data 

162 and df.iloc[:, 0].max() - df.iloc[:, 0].min() > 1 # spanning over more than one year 

163 and df.iloc[0, 1] == 1 # first month is January 

164 and df.iloc[0, 2] == 1 # first day is one 

165 and df.iloc[0, 3] == 1 # first hour is one 

166 ): 

167 year_of_february = df.loc[df.iloc[:, 1] == 2, 0].iloc[0] 

168 # Replace the year component with the year of February 

169 df.iloc[:, 0] = year_of_february 

170 logger.info( 

171 "The data was transformed to one year of data as it seems to be TMY data." 

172 "The year is irrelevant for TMY data." 

173 ) 

174 return df 

175 

176 ### preprocessing raw data for further operations 

177 df = df_import.copy() 

178 df = if_TMY_convert_to_one_year(df) 

179 df = epw_to_datetimeindex(df) 

180 # Resample the DataFrame to make the DatetimeIndex complete and monotonic 

181 df = df.resample("h").asfreq() 

182 # give names to columns according to documentation of import data 

183 df.columns = [key for key in format_epw.keys()] 

184 # rename available variables to core data format 

185 df = auxiliary.rename_columns(df, format_epw) 

186 # delete dummy values from EPW 

187 df = auxiliary.replace_dummy_with_nan(df, format_epw) 

188 

189 ### convert timezone to UTC+0 

190 df = df.shift(periods=-meta.timezone, freq="h", axis=0) 

191 

192 ### shift and interpolate data forward 30mins or backward -30mins 

193 df_no_shift = df.copy() 

194 df = time_observation_transformations.shift_time_by_dict(format_epw, df) 

195 

196 def transform(df): 

197 ### force variable naming format_core_data 

198 df = auxiliary.force_data_variable_convention(df, definitions.format_core_data) 

199 ### unit conversion 

200 # all units correct 

201 ### impute missing variables from other available ones 

202 df, calc_overview = variable_transformations.variable_transform_all(df, meta) 

203 return df, calc_overview 

204 

205 df, meta.executed_transformations = transform(df) 

206 

207 ### add unshifted data for possible later direct use (pass-through), 

208 ### to avoid back and forth interpolating 

209 df = pass_through_handling.create_pass_through_variables( 

210 df_shifted=df, 

211 df_no_shift=df_no_shift, 

212 format=format_epw, 

213 transform_func=transform, 

214 meta=meta, 

215 ) 

216 

217 return df