Coverage for aixweather/transformation_to_core_data/DWD.py: 100%

60 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2025-01-06 16:01 +0000

1from datetime import datetime, timedelta 

2import pandas as pd 

3 

4from aixweather import definitions 

5from aixweather.imports.utils_import import MetaData 

6from aixweather.transformation_functions import auxiliary, time_observation_transformations, variable_transformations, \ 

7 pass_through_handling, unit_conversions 

8 

9""" 

10format_DWD_historical information: 

11see readme 

12 

13Format info: 

14key = raw data point name 

15core_name = corresponding name matching the format_core_data 

16time_of_meas_shift = desired 30min shifting+interpolation to convert a value that is e.g. the  

17"average of preceding hour" to "indicated time" (prec2ind).  

18unit = unit of the raw data following the naming convention of format_core_data 

19 

20All changes here automatically change the calculations.  

21Exception: unit conversions have to be added manually. 

22 

23checked by Martin Rätz (08.08.2023) 

24""" 

25format_DWD_historical = { 

26 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/ 

27 # 10_minutes/air_temperature/DESCRIPTION_obsgermany_climate_10min_air_temperature_en.pdf 

28 "RF_10": {"core_name": "RelHum", "time_of_meas_shift": "foll2ind", "unit": "percent"}, 

29 "TT_10": {"core_name": "DryBulbTemp", "time_of_meas_shift": "foll2ind", "unit": "degC"}, 

30 "TD_10": {"core_name": "DewPointTemp", "time_of_meas_shift": "foll2ind", "unit": "degC"}, 

31 "PP_10": {"core_name": "AtmPressure", "time_of_meas_shift": "foll2ind", "unit": "hPa"}, 

32 #https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/solar/BESCHREIBUNG_obsgermany_climate_10min_solar_de.pdf 

33 "DS_10": {"core_name": "DiffHorRad", "time_of_meas_shift": "foll2ind", "unit": "J/cm2", "resample": "sum"}, 

34 #https://de.wikipedia.org/wiki/Globalstrahlung 

35 "GS_10": {"core_name": "GlobHorRad", "time_of_meas_shift": "foll2ind", "unit": "J/cm2", "resample": "sum"}, 

36 #https://de.wikipedia.org/wiki/Atmosph%C3%A4rische_Gegenstrahlung 

37 "LS_10": {"core_name": "HorInfra", "time_of_meas_shift": "foll2ind", "unit": "J/cm2", "resample": "sum", "nan":[990, -999]}, 

38 #https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/wind/BESCHREIBUNG_obsgermany_climate_10min_wind_de.pdf 

39 "FF_10": {"core_name": "WindSpeed", "time_of_meas_shift": "prec2ind", "unit": "m/s"}, 

40 "DD_10": {"core_name": "WindDir", "time_of_meas_shift": "prec2ind", "unit": "deg"}, 

41 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/precipitation/BESCHREIBUNG_obsgermany_climate_10min_precipitation_de.pdf 

42 "RWS_10": {"core_name": "LiquidPrecD", "time_of_meas_shift": "prec2ind", "unit": "mm/h", "resample": "sum"}, 

43 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/hourly/soil_temperature/BESCHREIBUNG_obsgermany_climate_hourly_soil_temperature_de.pdf 

44 "V_TE100": {"core_name": "Soil_Temperature_1m", "time_of_meas_shift": None, "unit": "degC"}, 

45 "V_TE050": {"core_name": "Soil_Temperature_50cm", "time_of_meas_shift": None, "unit": "degC"}, 

46 "V_TE020": {"core_name": "Soil_Temperature_20cm", "time_of_meas_shift": None, "unit": "degC"}, 

47 "V_TE010": {"core_name": "Soil_Temperature_10cm", "time_of_meas_shift": None, "unit": "degC"}, 

48 "V_TE005": {"core_name": "Soil_Temperature_5cm", "time_of_meas_shift": None, "unit": "degC"}, 

49 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/hourly/cloud_type/BESCHREIBUNG_obsgermany_climate_hourly_cloud_type_de.pdf 

50 " V_N": {"core_name": "TotalSkyCover", "time_of_meas_shift": None, "unit": "1eighth"}, 

51 # Hourly measurements currently unused due to doubling with 10 

52 # minute data and conflicting time shifting and units 

53 # "RF_TU": "RelHum", 

54 # "TT_TU": "DryBulbTemp", 

55 # " P0": "AtmPressure", 

56 # " P": "Pressure_Sea_Level", 

57 # " F": "WindSpeed", 

58 # " D": "WindDir", 

59 # " R1": "LiquidPrecD", 

60} 

61# get variables that should be resampled by sum instead of mean 

62variables_to_sum_DWD_historical = [ 

63 key for key, value in format_DWD_historical.items() if "resample" in value.keys() 

64] 

65 

66""" 

67format_DWD_forecast information: 

68 

69Variable definitions: https://opendata.dwd.de/weather/lib/MetElementDefinition.xml or  

70https://wetterdienst.readthedocs.io/en/latest/data/coverage/dwd/mosmix/hourly.html (in origin unit) 

71 

72checked by Martin Rätz 18.08.2023 

73""" 

74format_DWD_forecast = { 

75 # "cloud_cover_above_7_km": None, 

76 # "cloud_cover_below_1000_ft": None, 

77 # "cloud_cover_below_500_ft": None, 

78 # "cloud_cover_between_2_to_7_km": None, 

79 "cloud_cover_effective": {"core_name": "OpaqueSkyCover", "time_of_meas_shift": None, "unit": "%"}, 

80 "cloud_cover_total": {"core_name": "TotalSkyCover", "time_of_meas_shift": None, "unit": "%"}, 

81 # "precipitation_height_significant_weather_last_1h": None, 

82 # "precipitation_height_significant_weather_last_3h": None, 

83 "pressure_air_site_reduced": {"core_name": "AtmPressure", "time_of_meas_shift": None, "unit": "Pa"}, 

84 # "probability_fog_last_12h": None, 

85 # "probability_fog_last_1h": None, 

86 # "probability_fog_last_6h": None, 

87 # "probability_precipitation_height_gt_0_0_mm_last_12h": None, 

88 # "probability_precipitation_height_gt_0_2_mm_last_12h": None, 

89 # "probability_precipitation_height_gt_0_2_mm_last_24h": None, 

90 # "probability_precipitation_height_gt_0_2_mm_last_6h": None, 

91 # "probability_precipitation_height_gt_1_0_mm_last_12h": None, 

92 # "probability_precipitation_height_gt_5_0_mm_last_12h": None, 

93 # "probability_precipitation_height_gt_5_0_mm_last_24h": None, 

94 # "probability_precipitation_height_gt_5_0_mm_last_6h": None, 

95 # "probability_wind_gust_ge_25_kn_last_12h": None, 

96 # "probability_wind_gust_ge_40_kn_last_12h": None, 

97 # "probability_wind_gust_ge_55_kn_last_12h": None, 

98 # is actually balance during the last 3 hours: 

99 "radiation_global": {"core_name": "GlobHorRad", "time_of_meas_shift": "prec2ind", "unit": "kJ/m2"}, 

100 # "sunshine_duration": None, 

101 # "temperature_air_max_200": None, 

102 # "temperature_air_mean_005": None, 

103 # no information if temperature is drybulb or something else: 

104 "temperature_air_mean_200": {"core_name": "DryBulbTemp", "time_of_meas_shift": None, "unit": "K"}, 

105 # "temperature_air_min_200": None, 

106 "temperature_dew_point_mean_200": {"core_name": "DewPointTemp", "time_of_meas_shift": None, "unit": "K"}, 

107 "visibility_range": {"core_name": "Visibility", "time_of_meas_shift": None, "unit": "m"}, 

108 # "water_equivalent_snow_depth_new_last_1h": None, 

109 # "water_equivalent_snow_depth_new_last_3h": None, 

110 # "weather_last_6h": None, 

111 # "weather_significant": None, 

112 "wind_direction": {"core_name": "WindDir", "time_of_meas_shift": None, "unit": "deg"}, 

113 # "wind_gust_max_last_12h": None, 

114 # "wind_gust_max_last_1h": None, 

115 # "wind_gust_max_last_3h": None, 

116 "wind_speed": {"core_name": "WindSpeed", "time_of_meas_shift": None, "unit": "m/s"} 

117} 

118 

119 

120def DWD_historical_to_core_data( 

121 df_import: pd.DataFrame, start: datetime, stop: datetime, meta: MetaData 

122) -> pd.DataFrame: 

123 """ 

124 Transform imported weather data from DWD historical format into core data format. 

125 

126 Args: 

127 df_import (pd.DataFrame): The DataFrame containing imported weather data from DWD. 

128 start (datetime): The timestamp for the start of the desired data range (will be extended for interpolation). 

129 stop (datetime): The timestamp for the end of the desired data range (will be extended for interpolation). 

130 meta (MetaData): Metadata associated with the data. 

131 

132 Returns: 

133 pd.DataFrame: The transformed DataFrame in the core data format. 

134 """ 

135 

136 ### evaluate correctness of format 

137 auxiliary.evaluate_transformations( 

138 core_format=definitions.format_core_data, other_format=format_DWD_historical 

139 ) 

140 

141 ### format raw data for further operations 

142 df = df_import.copy() 

143 # to datetime; account for different time-formats 

144 date_format = "%Y%m%d%H%M" 

145 df.index = pd.to_datetime(df.index, format=date_format) 

146 # sort by time 

147 df = df.sort_index() 

148 

149 # reduce time period to extended period for working interpolation and for faster operation 

150 df = time_observation_transformations.truncate_data_from_start_to_stop( 

151 df, start - timedelta(days=1), stop + timedelta(days=1) 

152 ) 

153 

154 # select only numeric columns 

155 df = df.select_dtypes(include=["number"]) 

156 

157 # delete dummy values from DWD 

158 df = auxiliary.replace_dummy_with_nan(df, format_DWD_historical) 

159 

160 # resample some via sum some via mean -> results in average of following hour 

161 for var in df.columns: 

162 if var in variables_to_sum_DWD_historical: 

163 df[var] = df[var].resample("h").sum(min_count=1) # fills nan only if 1 value in interval 

164 else: 

165 df[var] = df[var].resample("h").mean() # fills nan only if all nan in interval 

166 df = df.resample("h").first() # only keep the previously resampled value 

167 

168 # rename available variables to core data format 

169 df = auxiliary.rename_columns(df, format_DWD_historical) 

170 

171 ### convert timezone to UTC 

172 # the data is for most stations and datasets, as well as for more recent 

173 # data (several years) in UTC. For more sophisticated handling pull meta 

174 # and respect time zone or implement dwd_pulling repo from github 

175 

176 ### shift and interpolate data forward 30mins or backward -30mins 

177 df_no_shift = df.copy() 

178 df = time_observation_transformations.shift_time_by_dict(format_DWD_historical, df) 

179 

180 def transform_DWD_historical(df): 

181 # drop unnecessary variables 

182 df = auxiliary.force_data_variable_convention(df, definitions.format_core_data) 

183 

184 ### convert units 

185 df["AtmPressure"] = unit_conversions.hPa_to_Pa(df["AtmPressure"]) 

186 df["DiffHorRad"] = unit_conversions.Jcm2_to_Whm2(df["DiffHorRad"]) 

187 df["GlobHorRad"] = unit_conversions.Jcm2_to_Whm2(df["GlobHorRad"]) 

188 df["HorInfra"] = unit_conversions.Jcm2_to_Whm2(df["HorInfra"]) 

189 df["TotalSkyCover"] = unit_conversions.eigth_to_tenth(df["TotalSkyCover"]) 

190 

191 ### impute missing variables from other available ones 

192 df, calc_overview = variable_transformations.variable_transform_all(df, meta) 

193 

194 return df, calc_overview 

195 

196 df, meta.executed_transformations = transform_DWD_historical(df) 

197 

198 ### add unshifted data for possible later direct use (pass-through), 

199 ### to avoid back and forth interpolating 

200 df = pass_through_handling.create_pass_through_variables( 

201 df_shifted=df, 

202 df_no_shift=df_no_shift, 

203 format=format_DWD_historical, 

204 transform_func=transform_DWD_historical, 

205 meta=meta, 

206 ) 

207 

208 return df 

209 

210 

211def DWD_forecast_2_core_data(df_import: pd.DataFrame, meta: MetaData) -> pd.DataFrame: 

212 """ 

213 Transform imported weather forecast data from DWD into core data format. 

214 

215 Args: 

216 df_import (pd.DataFrame): The DataFrame containing imported weather forecast data from DWD. 

217 meta (MetaData): Metadata associated with the data. 

218 

219 Returns: 

220 pd.DataFrame: The transformed DataFrame in the core data format. 

221 """ 

222 

223 ### evaluate correctness of format 

224 auxiliary.evaluate_transformations( 

225 core_format=definitions.format_core_data, other_format=format_DWD_forecast 

226 ) 

227 

228 ### format raw data for further operations 

229 df = df_import.copy() 

230 # Resample the DataFrame to make the DatetimeIndex complete and monotonic 

231 df = df.resample('h').asfreq() 

232 # delete timezone information 

233 df = df.tz_localize(None) 

234 # rename available variables to core data format 

235 df = auxiliary.rename_columns(df, format_DWD_forecast) 

236 

237 ### convert timezone to UTC 

238 # the data pulled by Wetterdienst is already UTC 

239 

240 ### shift and interpolate data forward 30mins or backward -30mins 

241 df_no_shift = df.copy() 

242 df = time_observation_transformations.shift_time_by_dict(format_DWD_forecast, df) 

243 

244 def transform_DWD_forecast(df): 

245 # drop unnecessary variables 

246 df = auxiliary.force_data_variable_convention(df, definitions.format_core_data) 

247 

248 ### convert units 

249 df["OpaqueSkyCover"] = unit_conversions.percent_to_tenth(df["OpaqueSkyCover"]) 

250 df["TotalSkyCover"] = unit_conversions.percent_to_tenth(df["TotalSkyCover"]) 

251 df["GlobHorRad"] = unit_conversions.kJm2_to_Whm2(df["GlobHorRad"]) 

252 df["DryBulbTemp"] = unit_conversions.kelvin_to_celcius(df["DryBulbTemp"]) 

253 df["DewPointTemp"] = unit_conversions.kelvin_to_celcius(df["DewPointTemp"]) 

254 df["Visibility"] = unit_conversions.divide_by_1000(df["Visibility"]) 

255 

256 ### impute missing variables from other available ones 

257 df, calc_overview = variable_transformations.variable_transform_all(df, meta) 

258 

259 return df, calc_overview 

260 

261 df, meta.executed_transformations = transform_DWD_forecast(df) 

262 

263 ### add unshifted data for possible later direct use (pass-through), 

264 ### to avoid back and forth interpolating 

265 df = pass_through_handling.create_pass_through_variables( 

266 df_shifted=df, 

267 df_no_shift=df_no_shift, 

268 format=format_DWD_forecast, 

269 transform_func=transform_DWD_forecast, 

270 meta=meta, 

271 ) 

272 

273 return df