Coverage for aixweather/transformation_to_core_data/DWD.py: 100%
66 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-12-31 11:58 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-12-31 11:58 +0000
1from datetime import datetime, timedelta
2import pandas as pd
4from aixweather import definitions
5from aixweather.imports.utils_import import MetaData
6from aixweather.transformation_functions import auxiliary, time_observation_transformations, variable_transformations, \
7 pass_through_handling, unit_conversions
10class DWDHistoricalFormat:
11 """
12 Information on DWD historical data:
13 see readme
15 Format info:
16 key = raw data point name
17 core_name = corresponding name matching the format_core_data
18 time_of_meas_shift = desired 30min shifting+interpolation to convert a value that is e.g. the
19 "average of preceding hour" to "indicated time" (prec2ind).
20 unit = unit of the raw data following the naming convention of format_core_data
22 All changes here automatically change the calculations.
23 Exception: unit conversions have to be added manually.
25 checked by Martin Rätz (08.08.2023)
26 """
28 @classmethod
29 def import_format(cls) -> dict:
30 return {
31 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/
32 # 10_minutes/air_temperature/DESCRIPTION_obsgermany_climate_10min_air_temperature_en.pdf
33 "RF_10": {"core_name": "RelHum", "time_of_meas_shift": "foll2ind", "unit": "percent", "nan": [-999, {"<": 0}, {">": 100}]},
34 "TT_10": {"core_name": "DryBulbTemp", "time_of_meas_shift": "foll2ind", "unit": "degC", "nan": [-999, {"<": -98}]},
35 "TD_10": {"core_name": "DewPointTemp", "time_of_meas_shift": "foll2ind", "unit": "degC", "nan": [-999, {"<": -98}]},
36 "PP_10": {"core_name": "AtmPressure", "time_of_meas_shift": "foll2ind", "unit": "hPa", "nan": [-999]},
37 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/solar/BESCHREIBUNG_obsgermany_climate_10min_solar_de.pdf
38 "DS_10": {"core_name": "DiffHorRad", "time_of_meas_shift": "foll2ind", "unit": "J/cm2", "resample": "sum", "nan": [-999, {"<": -10}]},
39 # https://de.wikipedia.org/wiki/Globalstrahlung
40 "GS_10": {"core_name": "GlobHorRad", "time_of_meas_shift": "foll2ind", "unit": "J/cm2", "resample": "sum", "nan": [-999, {"<": -10}]},
41 # https://de.wikipedia.org/wiki/Atmosph%C3%A4rische_Gegenstrahlung
42 "LS_10": {"core_name": "HorInfra", "time_of_meas_shift": "foll2ind", "unit": "J/cm2", "resample": "sum", "nan":[990, -999, {"<": -10}]},
43 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/wind/BESCHREIBUNG_obsgermany_climate_10min_wind_de.pdf
44 "FF_10": {"core_name": "WindSpeed", "time_of_meas_shift": "foll2ind", "unit": "m/s", "nan": [-999, {"<": -10}]},
45 "DD_10": {"core_name": "WindDir", "time_of_meas_shift": "foll2ind", "unit": "deg", "nan": [-999, {"<": 0}, {">": 360}]},
46 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/precipitation/DESCRIPTION_obsgermany-climate-10min-precipitation_en.pdf
47 "RWS_10": {"core_name": "LiquidPrecD", "time_of_meas_shift": "prec2ind", "unit": "mm/h", "resample": "sum", "nan": [-999, {"<": -10}]},
48 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/hourly/soil_temperature/BESCHREIBUNG_obsgermany_climate_hourly_soil_temperature_de.pdf
49 "V_TE100": {"core_name": "Soil_Temperature_1m", "time_of_meas_shift": None, "unit": "degC", "nan": [-999, {"<": -98}]},
50 "V_TE050": {"core_name": "Soil_Temperature_50cm", "time_of_meas_shift": None, "unit": "degC", "nan": [-999, {"<": -98}]},
51 "V_TE020": {"core_name": "Soil_Temperature_20cm", "time_of_meas_shift": None, "unit": "degC", "nan": [-999, {"<": -98}]},
52 "V_TE010": {"core_name": "Soil_Temperature_10cm", "time_of_meas_shift": None, "unit": "degC", "nan": [-999, {"<": -98}]},
53 "V_TE005": {"core_name": "Soil_Temperature_5cm", "time_of_meas_shift": None, "unit": "degC", "nan": [-999, {"<": -98}]},
54 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/hourly/cloud_type/BESCHREIBUNG_obsgermany_climate_hourly_cloud_type_de.pdf
55 " V_N": {"core_name": "TotalSkyCover", "time_of_meas_shift": None, "unit": "1eighth", "nan": [-999, {"<": 0}]},
56 # Hourly measurements currently unused due to doubling with 10
57 # minute data and conflicting time shifting and units
58 # "RF_TU": "RelHum",
59 # "TT_TU": "DryBulbTemp",
60 # " P0": "AtmPressure",
61 # " P": "Pressure_Sea_Level",
62 # " F": "WindSpeed",
63 # " D": "WindDir",
64 # " R1": "LiquidPrecD",
65 }
68class DWDForecast:
69 """
70 Information on DWD forecast:
72 Variable definitions: https://opendata.dwd.de/weather/lib/MetElementDefinition.xml or
73 https://wetterdienst.readthedocs.io/en/latest/data/coverage/dwd/mosmix/hourly.html (in origin unit)
75 checked by Martin Rätz 18.08.2023
76 """
78 @classmethod
79 def import_format(cls):
80 return {
81 # "cloud_cover_above_7_km": None,
82 # "cloud_cover_below_1000_ft": None,
83 # "cloud_cover_below_500_ft": None,
84 # "cloud_cover_between_2_to_7_km": None,
85 "cloud_cover_effective": {"core_name": "OpaqueSkyCover", "time_of_meas_shift": None, "unit": "%"},
86 "cloud_cover_total": {"core_name": "TotalSkyCover", "time_of_meas_shift": None, "unit": "%"},
87 # "precipitation_height_significant_weather_last_1h": None,
88 # "precipitation_height_significant_weather_last_3h": None,
89 "pressure_air_site_reduced": {"core_name": "AtmPressure", "time_of_meas_shift": None, "unit": "Pa"},
90 # "probability_fog_last_12h": None,
91 # "probability_fog_last_1h": None,
92 # "probability_fog_last_6h": None,
93 # "probability_precipitation_height_gt_0_0_mm_last_12h": None,
94 # "probability_precipitation_height_gt_0_2_mm_last_12h": None,
95 # "probability_precipitation_height_gt_0_2_mm_last_24h": None,
96 # "probability_precipitation_height_gt_0_2_mm_last_6h": None,
97 # "probability_precipitation_height_gt_1_0_mm_last_12h": None,
98 # "probability_precipitation_height_gt_5_0_mm_last_12h": None,
99 # "probability_precipitation_height_gt_5_0_mm_last_24h": None,
100 # "probability_precipitation_height_gt_5_0_mm_last_6h": None,
101 # "probability_wind_gust_ge_25_kn_last_12h": None,
102 # "probability_wind_gust_ge_40_kn_last_12h": None,
103 # "probability_wind_gust_ge_55_kn_last_12h": None,
104 # is actually balance during the last 3 hours:
105 "radiation_global": {"core_name": "GlobHorRad", "time_of_meas_shift": "prec2ind", "unit": "kJ/m2"},
106 # "sunshine_duration": None,
107 # "temperature_air_max_200": None,
108 # "temperature_air_mean_005": None,
109 # no information if temperature is drybulb or something else:
110 "temperature_air_mean_200": {"core_name": "DryBulbTemp", "time_of_meas_shift": None, "unit": "K"},
111 # "temperature_air_min_200": None,
112 "temperature_dew_point_mean_200": {"core_name": "DewPointTemp", "time_of_meas_shift": None, "unit": "K"},
113 "visibility_range": {"core_name": "Visibility", "time_of_meas_shift": None, "unit": "m"},
114 # "water_equivalent_snow_depth_new_last_1h": None,
115 # "water_equivalent_snow_depth_new_last_3h": None,
116 # "weather_last_6h": None,
117 # "weather_significant": None,
118 "wind_direction": {"core_name": "WindDir", "time_of_meas_shift": None, "unit": "deg"},
119 # "wind_gust_max_last_12h": None,
120 # "wind_gust_max_last_1h": None,
121 # "wind_gust_max_last_3h": None,
122 "wind_speed": {"core_name": "WindSpeed", "time_of_meas_shift": None, "unit": "m/s"}
123 }
126def DWD_historical_to_core_data(
127 df_import: pd.DataFrame, start: datetime, stop: datetime, meta: MetaData
128) -> pd.DataFrame:
129 """
130 Transform imported weather data from DWD historical format into core data format.
132 Args:
133 df_import (pd.DataFrame): The DataFrame containing imported weather data from DWD.
134 start (datetime): The timestamp for the start of the desired data range (will be extended for interpolation).
135 stop (datetime): The timestamp for the end of the desired data range (will be extended for interpolation).
136 meta (MetaData): Metadata associated with the data.
138 Returns:
139 pd.DataFrame: The transformed DataFrame in the core data format.
140 """
141 format_DWD_historical = DWDHistoricalFormat.import_format()
143 ### evaluate correctness of format
144 auxiliary.evaluate_transformations(
145 core_format=definitions.format_core_data, other_format=format_DWD_historical
146 )
148 ### format raw data for further operations
149 df = df_import.copy()
150 # to datetime; account for different time-formats
151 date_format = "%Y%m%d%H%M"
152 df.index = pd.to_datetime(df.index, format=date_format)
153 # sort by time
154 df = df.sort_index()
156 # reduce time period to extended period for working interpolation and for faster operation
157 df = time_observation_transformations.truncate_data_from_start_to_stop(
158 df, start - timedelta(days=1), stop + timedelta(days=1)
159 )
161 # select only numeric columns
162 df = df.select_dtypes(include=["number"])
164 # delete dummy values from DWD
165 df = auxiliary.replace_dummy_with_nan(df, format_DWD_historical)
167 # get variables that should be resampled by sum instead of mean
168 variables_to_sum_DWD_historical = [
169 key for key, value in format_DWD_historical.items() if "resample" in value.keys()
170 ]
172 # resample some via sum some via mean -> results in average of following hour
173 for var in df.columns:
174 if var in variables_to_sum_DWD_historical:
175 df[var] = df[var].resample("h").sum(min_count=1) # fills nan only if 1 value in interval
176 else:
177 df[var] = df[var].resample("h").mean() # fills nan only if all nan in interval
178 df = df.resample("h").first() # only keep the previously resampled value
180 # rename available variables to core data format
181 df = auxiliary.rename_columns(df, format_DWD_historical)
183 ### convert timezone to UTC
184 # the data is for most stations and datasets, as well as for more recent
185 # data (several years) in UTC. For more sophisticated handling pull meta
186 # and respect time zone or implement dwd_pulling repo from github
188 ### shift and interpolate data forward 30mins or backward -30mins
189 df_no_shift = df.copy()
190 df = time_observation_transformations.shift_time_by_dict(format_DWD_historical, df)
192 def transform_DWD_historical(df):
193 # drop unnecessary variables
194 df = auxiliary.force_data_variable_convention(df, definitions.format_core_data)
196 ### convert units
197 df["AtmPressure"] = unit_conversions.hPa_to_Pa(df["AtmPressure"])
198 df["DiffHorRad"] = unit_conversions.Jcm2_to_Whm2(df["DiffHorRad"])
199 df["GlobHorRad"] = unit_conversions.Jcm2_to_Whm2(df["GlobHorRad"])
200 df["HorInfra"] = unit_conversions.Jcm2_to_Whm2(df["HorInfra"])
201 df["TotalSkyCover"] = unit_conversions.eigth_to_tenth(df["TotalSkyCover"])
203 ### impute missing variables from other available ones
204 df, calc_overview = variable_transformations.variable_transform_all(df, meta)
206 return df, calc_overview
208 df, meta.executed_transformations = transform_DWD_historical(df)
210 ### add unshifted data for possible later direct use (pass-through),
211 ### to avoid back and forth interpolating
212 df = pass_through_handling.create_pass_through_variables(
213 df_shifted=df,
214 df_no_shift=df_no_shift,
215 format=format_DWD_historical,
216 transform_func=transform_DWD_historical,
217 meta=meta,
218 )
220 return df
223def DWD_forecast_2_core_data(df_import: pd.DataFrame, meta: MetaData) -> pd.DataFrame:
224 """
225 Transform imported weather forecast data from DWD into core data format.
227 Args:
228 df_import (pd.DataFrame): The DataFrame containing imported weather forecast data from DWD.
229 meta (MetaData): Metadata associated with the data.
231 Returns:
232 pd.DataFrame: The transformed DataFrame in the core data format.
233 """
234 format_DWD_forecast = DWDForecast.import_format()
236 ### evaluate correctness of format
237 auxiliary.evaluate_transformations(
238 core_format=definitions.format_core_data, other_format=format_DWD_forecast
239 )
241 ### format raw data for further operations
242 df = df_import.copy()
243 # Resample the DataFrame to make the DatetimeIndex complete and monotonic
244 df = df.resample('h').asfreq()
245 # delete timezone information
246 df = df.tz_localize(None)
247 # rename available variables to core data format
248 df = auxiliary.rename_columns(df, format_DWD_forecast)
250 ### convert timezone to UTC
251 # the data pulled by Wetterdienst is already UTC
253 ### shift and interpolate data forward 30mins or backward -30mins
254 df_no_shift = df.copy()
255 df = time_observation_transformations.shift_time_by_dict(format_DWD_forecast, df)
257 def transform_DWD_forecast(df):
258 # drop unnecessary variables
259 df = auxiliary.force_data_variable_convention(df, definitions.format_core_data)
261 ### convert units
262 df["OpaqueSkyCover"] = unit_conversions.percent_to_tenth(df["OpaqueSkyCover"])
263 df["TotalSkyCover"] = unit_conversions.percent_to_tenth(df["TotalSkyCover"])
264 df["GlobHorRad"] = unit_conversions.kJm2_to_Whm2(df["GlobHorRad"])
265 df["DryBulbTemp"] = unit_conversions.kelvin_to_celcius(df["DryBulbTemp"])
266 df["DewPointTemp"] = unit_conversions.kelvin_to_celcius(df["DewPointTemp"])
267 df["Visibility"] = unit_conversions.divide_by_1000(df["Visibility"])
269 ### impute missing variables from other available ones
270 df, calc_overview = variable_transformations.variable_transform_all(df, meta)
272 return df, calc_overview
274 df, meta.executed_transformations = transform_DWD_forecast(df)
276 ### add unshifted data for possible later direct use (pass-through),
277 ### to avoid back and forth interpolating
278 df = pass_through_handling.create_pass_through_variables(
279 df_shifted=df,
280 df_no_shift=df_no_shift,
281 format=format_DWD_forecast,
282 transform_func=transform_DWD_forecast,
283 meta=meta,
284 )
286 return df