Coverage for aixweather/transformation_to_core_data/DWD.py: 100%

66 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2025-12-31 11:58 +0000

1from datetime import datetime, timedelta 

2import pandas as pd 

3 

4from aixweather import definitions 

5from aixweather.imports.utils_import import MetaData 

6from aixweather.transformation_functions import auxiliary, time_observation_transformations, variable_transformations, \ 

7 pass_through_handling, unit_conversions 

8 

9 

10class DWDHistoricalFormat: 

11 """ 

12 Information on DWD historical data: 

13 see readme 

14 

15 Format info: 

16 key = raw data point name 

17 core_name = corresponding name matching the format_core_data 

18 time_of_meas_shift = desired 30min shifting+interpolation to convert a value that is e.g. the 

19 "average of preceding hour" to "indicated time" (prec2ind). 

20 unit = unit of the raw data following the naming convention of format_core_data 

21 

22 All changes here automatically change the calculations. 

23 Exception: unit conversions have to be added manually. 

24 

25 checked by Martin Rätz (08.08.2023) 

26 """ 

27 

28 @classmethod 

29 def import_format(cls) -> dict: 

30 return { 

31 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/ 

32 # 10_minutes/air_temperature/DESCRIPTION_obsgermany_climate_10min_air_temperature_en.pdf 

33 "RF_10": {"core_name": "RelHum", "time_of_meas_shift": "foll2ind", "unit": "percent", "nan": [-999, {"<": 0}, {">": 100}]}, 

34 "TT_10": {"core_name": "DryBulbTemp", "time_of_meas_shift": "foll2ind", "unit": "degC", "nan": [-999, {"<": -98}]}, 

35 "TD_10": {"core_name": "DewPointTemp", "time_of_meas_shift": "foll2ind", "unit": "degC", "nan": [-999, {"<": -98}]}, 

36 "PP_10": {"core_name": "AtmPressure", "time_of_meas_shift": "foll2ind", "unit": "hPa", "nan": [-999]}, 

37 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/solar/BESCHREIBUNG_obsgermany_climate_10min_solar_de.pdf 

38 "DS_10": {"core_name": "DiffHorRad", "time_of_meas_shift": "foll2ind", "unit": "J/cm2", "resample": "sum", "nan": [-999, {"<": -10}]}, 

39 # https://de.wikipedia.org/wiki/Globalstrahlung 

40 "GS_10": {"core_name": "GlobHorRad", "time_of_meas_shift": "foll2ind", "unit": "J/cm2", "resample": "sum", "nan": [-999, {"<": -10}]}, 

41 # https://de.wikipedia.org/wiki/Atmosph%C3%A4rische_Gegenstrahlung 

42 "LS_10": {"core_name": "HorInfra", "time_of_meas_shift": "foll2ind", "unit": "J/cm2", "resample": "sum", "nan":[990, -999, {"<": -10}]}, 

43 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/wind/BESCHREIBUNG_obsgermany_climate_10min_wind_de.pdf 

44 "FF_10": {"core_name": "WindSpeed", "time_of_meas_shift": "foll2ind", "unit": "m/s", "nan": [-999, {"<": -10}]}, 

45 "DD_10": {"core_name": "WindDir", "time_of_meas_shift": "foll2ind", "unit": "deg", "nan": [-999, {"<": 0}, {">": 360}]}, 

46 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/precipitation/DESCRIPTION_obsgermany-climate-10min-precipitation_en.pdf 

47 "RWS_10": {"core_name": "LiquidPrecD", "time_of_meas_shift": "prec2ind", "unit": "mm/h", "resample": "sum", "nan": [-999, {"<": -10}]}, 

48 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/hourly/soil_temperature/BESCHREIBUNG_obsgermany_climate_hourly_soil_temperature_de.pdf 

49 "V_TE100": {"core_name": "Soil_Temperature_1m", "time_of_meas_shift": None, "unit": "degC", "nan": [-999, {"<": -98}]}, 

50 "V_TE050": {"core_name": "Soil_Temperature_50cm", "time_of_meas_shift": None, "unit": "degC", "nan": [-999, {"<": -98}]}, 

51 "V_TE020": {"core_name": "Soil_Temperature_20cm", "time_of_meas_shift": None, "unit": "degC", "nan": [-999, {"<": -98}]}, 

52 "V_TE010": {"core_name": "Soil_Temperature_10cm", "time_of_meas_shift": None, "unit": "degC", "nan": [-999, {"<": -98}]}, 

53 "V_TE005": {"core_name": "Soil_Temperature_5cm", "time_of_meas_shift": None, "unit": "degC", "nan": [-999, {"<": -98}]}, 

54 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/hourly/cloud_type/BESCHREIBUNG_obsgermany_climate_hourly_cloud_type_de.pdf 

55 " V_N": {"core_name": "TotalSkyCover", "time_of_meas_shift": None, "unit": "1eighth", "nan": [-999, {"<": 0}]}, 

56 # Hourly measurements currently unused due to doubling with 10 

57 # minute data and conflicting time shifting and units 

58 # "RF_TU": "RelHum", 

59 # "TT_TU": "DryBulbTemp", 

60 # " P0": "AtmPressure", 

61 # " P": "Pressure_Sea_Level", 

62 # " F": "WindSpeed", 

63 # " D": "WindDir", 

64 # " R1": "LiquidPrecD", 

65 } 

66 

67 

68class DWDForecast: 

69 """ 

70 Information on DWD forecast: 

71 

72 Variable definitions: https://opendata.dwd.de/weather/lib/MetElementDefinition.xml or 

73 https://wetterdienst.readthedocs.io/en/latest/data/coverage/dwd/mosmix/hourly.html (in origin unit) 

74 

75 checked by Martin Rätz 18.08.2023 

76 """ 

77 

78 @classmethod 

79 def import_format(cls): 

80 return { 

81 # "cloud_cover_above_7_km": None, 

82 # "cloud_cover_below_1000_ft": None, 

83 # "cloud_cover_below_500_ft": None, 

84 # "cloud_cover_between_2_to_7_km": None, 

85 "cloud_cover_effective": {"core_name": "OpaqueSkyCover", "time_of_meas_shift": None, "unit": "%"}, 

86 "cloud_cover_total": {"core_name": "TotalSkyCover", "time_of_meas_shift": None, "unit": "%"}, 

87 # "precipitation_height_significant_weather_last_1h": None, 

88 # "precipitation_height_significant_weather_last_3h": None, 

89 "pressure_air_site_reduced": {"core_name": "AtmPressure", "time_of_meas_shift": None, "unit": "Pa"}, 

90 # "probability_fog_last_12h": None, 

91 # "probability_fog_last_1h": None, 

92 # "probability_fog_last_6h": None, 

93 # "probability_precipitation_height_gt_0_0_mm_last_12h": None, 

94 # "probability_precipitation_height_gt_0_2_mm_last_12h": None, 

95 # "probability_precipitation_height_gt_0_2_mm_last_24h": None, 

96 # "probability_precipitation_height_gt_0_2_mm_last_6h": None, 

97 # "probability_precipitation_height_gt_1_0_mm_last_12h": None, 

98 # "probability_precipitation_height_gt_5_0_mm_last_12h": None, 

99 # "probability_precipitation_height_gt_5_0_mm_last_24h": None, 

100 # "probability_precipitation_height_gt_5_0_mm_last_6h": None, 

101 # "probability_wind_gust_ge_25_kn_last_12h": None, 

102 # "probability_wind_gust_ge_40_kn_last_12h": None, 

103 # "probability_wind_gust_ge_55_kn_last_12h": None, 

104 # is actually balance during the last 3 hours: 

105 "radiation_global": {"core_name": "GlobHorRad", "time_of_meas_shift": "prec2ind", "unit": "kJ/m2"}, 

106 # "sunshine_duration": None, 

107 # "temperature_air_max_200": None, 

108 # "temperature_air_mean_005": None, 

109 # no information if temperature is drybulb or something else: 

110 "temperature_air_mean_200": {"core_name": "DryBulbTemp", "time_of_meas_shift": None, "unit": "K"}, 

111 # "temperature_air_min_200": None, 

112 "temperature_dew_point_mean_200": {"core_name": "DewPointTemp", "time_of_meas_shift": None, "unit": "K"}, 

113 "visibility_range": {"core_name": "Visibility", "time_of_meas_shift": None, "unit": "m"}, 

114 # "water_equivalent_snow_depth_new_last_1h": None, 

115 # "water_equivalent_snow_depth_new_last_3h": None, 

116 # "weather_last_6h": None, 

117 # "weather_significant": None, 

118 "wind_direction": {"core_name": "WindDir", "time_of_meas_shift": None, "unit": "deg"}, 

119 # "wind_gust_max_last_12h": None, 

120 # "wind_gust_max_last_1h": None, 

121 # "wind_gust_max_last_3h": None, 

122 "wind_speed": {"core_name": "WindSpeed", "time_of_meas_shift": None, "unit": "m/s"} 

123 } 

124 

125 

126def DWD_historical_to_core_data( 

127 df_import: pd.DataFrame, start: datetime, stop: datetime, meta: MetaData 

128) -> pd.DataFrame: 

129 """ 

130 Transform imported weather data from DWD historical format into core data format. 

131 

132 Args: 

133 df_import (pd.DataFrame): The DataFrame containing imported weather data from DWD. 

134 start (datetime): The timestamp for the start of the desired data range (will be extended for interpolation). 

135 stop (datetime): The timestamp for the end of the desired data range (will be extended for interpolation). 

136 meta (MetaData): Metadata associated with the data. 

137 

138 Returns: 

139 pd.DataFrame: The transformed DataFrame in the core data format. 

140 """ 

141 format_DWD_historical = DWDHistoricalFormat.import_format() 

142 

143 ### evaluate correctness of format 

144 auxiliary.evaluate_transformations( 

145 core_format=definitions.format_core_data, other_format=format_DWD_historical 

146 ) 

147 

148 ### format raw data for further operations 

149 df = df_import.copy() 

150 # to datetime; account for different time-formats 

151 date_format = "%Y%m%d%H%M" 

152 df.index = pd.to_datetime(df.index, format=date_format) 

153 # sort by time 

154 df = df.sort_index() 

155 

156 # reduce time period to extended period for working interpolation and for faster operation 

157 df = time_observation_transformations.truncate_data_from_start_to_stop( 

158 df, start - timedelta(days=1), stop + timedelta(days=1) 

159 ) 

160 

161 # select only numeric columns 

162 df = df.select_dtypes(include=["number"]) 

163 

164 # delete dummy values from DWD 

165 df = auxiliary.replace_dummy_with_nan(df, format_DWD_historical) 

166 

167 # get variables that should be resampled by sum instead of mean 

168 variables_to_sum_DWD_historical = [ 

169 key for key, value in format_DWD_historical.items() if "resample" in value.keys() 

170 ] 

171 

172 # resample some via sum some via mean -> results in average of following hour 

173 for var in df.columns: 

174 if var in variables_to_sum_DWD_historical: 

175 df[var] = df[var].resample("h").sum(min_count=1) # fills nan only if 1 value in interval 

176 else: 

177 df[var] = df[var].resample("h").mean() # fills nan only if all nan in interval 

178 df = df.resample("h").first() # only keep the previously resampled value 

179 

180 # rename available variables to core data format 

181 df = auxiliary.rename_columns(df, format_DWD_historical) 

182 

183 ### convert timezone to UTC 

184 # the data is for most stations and datasets, as well as for more recent 

185 # data (several years) in UTC. For more sophisticated handling pull meta 

186 # and respect time zone or implement dwd_pulling repo from github 

187 

188 ### shift and interpolate data forward 30mins or backward -30mins 

189 df_no_shift = df.copy() 

190 df = time_observation_transformations.shift_time_by_dict(format_DWD_historical, df) 

191 

192 def transform_DWD_historical(df): 

193 # drop unnecessary variables 

194 df = auxiliary.force_data_variable_convention(df, definitions.format_core_data) 

195 

196 ### convert units 

197 df["AtmPressure"] = unit_conversions.hPa_to_Pa(df["AtmPressure"]) 

198 df["DiffHorRad"] = unit_conversions.Jcm2_to_Whm2(df["DiffHorRad"]) 

199 df["GlobHorRad"] = unit_conversions.Jcm2_to_Whm2(df["GlobHorRad"]) 

200 df["HorInfra"] = unit_conversions.Jcm2_to_Whm2(df["HorInfra"]) 

201 df["TotalSkyCover"] = unit_conversions.eigth_to_tenth(df["TotalSkyCover"]) 

202 

203 ### impute missing variables from other available ones 

204 df, calc_overview = variable_transformations.variable_transform_all(df, meta) 

205 

206 return df, calc_overview 

207 

208 df, meta.executed_transformations = transform_DWD_historical(df) 

209 

210 ### add unshifted data for possible later direct use (pass-through), 

211 ### to avoid back and forth interpolating 

212 df = pass_through_handling.create_pass_through_variables( 

213 df_shifted=df, 

214 df_no_shift=df_no_shift, 

215 format=format_DWD_historical, 

216 transform_func=transform_DWD_historical, 

217 meta=meta, 

218 ) 

219 

220 return df 

221 

222 

223def DWD_forecast_2_core_data(df_import: pd.DataFrame, meta: MetaData) -> pd.DataFrame: 

224 """ 

225 Transform imported weather forecast data from DWD into core data format. 

226 

227 Args: 

228 df_import (pd.DataFrame): The DataFrame containing imported weather forecast data from DWD. 

229 meta (MetaData): Metadata associated with the data. 

230 

231 Returns: 

232 pd.DataFrame: The transformed DataFrame in the core data format. 

233 """ 

234 format_DWD_forecast = DWDForecast.import_format() 

235 

236 ### evaluate correctness of format 

237 auxiliary.evaluate_transformations( 

238 core_format=definitions.format_core_data, other_format=format_DWD_forecast 

239 ) 

240 

241 ### format raw data for further operations 

242 df = df_import.copy() 

243 # Resample the DataFrame to make the DatetimeIndex complete and monotonic 

244 df = df.resample('h').asfreq() 

245 # delete timezone information 

246 df = df.tz_localize(None) 

247 # rename available variables to core data format 

248 df = auxiliary.rename_columns(df, format_DWD_forecast) 

249 

250 ### convert timezone to UTC 

251 # the data pulled by Wetterdienst is already UTC 

252 

253 ### shift and interpolate data forward 30mins or backward -30mins 

254 df_no_shift = df.copy() 

255 df = time_observation_transformations.shift_time_by_dict(format_DWD_forecast, df) 

256 

257 def transform_DWD_forecast(df): 

258 # drop unnecessary variables 

259 df = auxiliary.force_data_variable_convention(df, definitions.format_core_data) 

260 

261 ### convert units 

262 df["OpaqueSkyCover"] = unit_conversions.percent_to_tenth(df["OpaqueSkyCover"]) 

263 df["TotalSkyCover"] = unit_conversions.percent_to_tenth(df["TotalSkyCover"]) 

264 df["GlobHorRad"] = unit_conversions.kJm2_to_Whm2(df["GlobHorRad"]) 

265 df["DryBulbTemp"] = unit_conversions.kelvin_to_celcius(df["DryBulbTemp"]) 

266 df["DewPointTemp"] = unit_conversions.kelvin_to_celcius(df["DewPointTemp"]) 

267 df["Visibility"] = unit_conversions.divide_by_1000(df["Visibility"]) 

268 

269 ### impute missing variables from other available ones 

270 df, calc_overview = variable_transformations.variable_transform_all(df, meta) 

271 

272 return df, calc_overview 

273 

274 df, meta.executed_transformations = transform_DWD_forecast(df) 

275 

276 ### add unshifted data for possible later direct use (pass-through), 

277 ### to avoid back and forth interpolating 

278 df = pass_through_handling.create_pass_through_variables( 

279 df_shifted=df, 

280 df_no_shift=df_no_shift, 

281 format=format_DWD_forecast, 

282 transform_func=transform_DWD_forecast, 

283 meta=meta, 

284 ) 

285 

286 return df