Coverage for aixweather/transformation_to_core_data/DWD.py: 100%
60 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-01-06 16:01 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-01-06 16:01 +0000
1from datetime import datetime, timedelta
2import pandas as pd
4from aixweather import definitions
5from aixweather.imports.utils_import import MetaData
6from aixweather.transformation_functions import auxiliary, time_observation_transformations, variable_transformations, \
7 pass_through_handling, unit_conversions
9"""
10format_DWD_historical information:
11see readme
13Format info:
14key = raw data point name
15core_name = corresponding name matching the format_core_data
16time_of_meas_shift = desired 30min shifting+interpolation to convert a value that is e.g. the
17"average of preceding hour" to "indicated time" (prec2ind).
18unit = unit of the raw data following the naming convention of format_core_data
20All changes here automatically change the calculations.
21Exception: unit conversions have to be added manually.
23checked by Martin Rätz (08.08.2023)
24"""
25format_DWD_historical = {
26 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/
27 # 10_minutes/air_temperature/DESCRIPTION_obsgermany_climate_10min_air_temperature_en.pdf
28 "RF_10": {"core_name": "RelHum", "time_of_meas_shift": "foll2ind", "unit": "percent"},
29 "TT_10": {"core_name": "DryBulbTemp", "time_of_meas_shift": "foll2ind", "unit": "degC"},
30 "TD_10": {"core_name": "DewPointTemp", "time_of_meas_shift": "foll2ind", "unit": "degC"},
31 "PP_10": {"core_name": "AtmPressure", "time_of_meas_shift": "foll2ind", "unit": "hPa"},
32 #https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/solar/BESCHREIBUNG_obsgermany_climate_10min_solar_de.pdf
33 "DS_10": {"core_name": "DiffHorRad", "time_of_meas_shift": "foll2ind", "unit": "J/cm2", "resample": "sum"},
34 #https://de.wikipedia.org/wiki/Globalstrahlung
35 "GS_10": {"core_name": "GlobHorRad", "time_of_meas_shift": "foll2ind", "unit": "J/cm2", "resample": "sum"},
36 #https://de.wikipedia.org/wiki/Atmosph%C3%A4rische_Gegenstrahlung
37 "LS_10": {"core_name": "HorInfra", "time_of_meas_shift": "foll2ind", "unit": "J/cm2", "resample": "sum", "nan":[990, -999]},
38 #https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/wind/BESCHREIBUNG_obsgermany_climate_10min_wind_de.pdf
39 "FF_10": {"core_name": "WindSpeed", "time_of_meas_shift": "prec2ind", "unit": "m/s"},
40 "DD_10": {"core_name": "WindDir", "time_of_meas_shift": "prec2ind", "unit": "deg"},
41 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/precipitation/BESCHREIBUNG_obsgermany_climate_10min_precipitation_de.pdf
42 "RWS_10": {"core_name": "LiquidPrecD", "time_of_meas_shift": "prec2ind", "unit": "mm/h", "resample": "sum"},
43 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/hourly/soil_temperature/BESCHREIBUNG_obsgermany_climate_hourly_soil_temperature_de.pdf
44 "V_TE100": {"core_name": "Soil_Temperature_1m", "time_of_meas_shift": None, "unit": "degC"},
45 "V_TE050": {"core_name": "Soil_Temperature_50cm", "time_of_meas_shift": None, "unit": "degC"},
46 "V_TE020": {"core_name": "Soil_Temperature_20cm", "time_of_meas_shift": None, "unit": "degC"},
47 "V_TE010": {"core_name": "Soil_Temperature_10cm", "time_of_meas_shift": None, "unit": "degC"},
48 "V_TE005": {"core_name": "Soil_Temperature_5cm", "time_of_meas_shift": None, "unit": "degC"},
49 # https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/hourly/cloud_type/BESCHREIBUNG_obsgermany_climate_hourly_cloud_type_de.pdf
50 " V_N": {"core_name": "TotalSkyCover", "time_of_meas_shift": None, "unit": "1eighth"},
51 # Hourly measurements currently unused due to doubling with 10
52 # minute data and conflicting time shifting and units
53 # "RF_TU": "RelHum",
54 # "TT_TU": "DryBulbTemp",
55 # " P0": "AtmPressure",
56 # " P": "Pressure_Sea_Level",
57 # " F": "WindSpeed",
58 # " D": "WindDir",
59 # " R1": "LiquidPrecD",
60}
61# get variables that should be resampled by sum instead of mean
62variables_to_sum_DWD_historical = [
63 key for key, value in format_DWD_historical.items() if "resample" in value.keys()
64]
66"""
67format_DWD_forecast information:
69Variable definitions: https://opendata.dwd.de/weather/lib/MetElementDefinition.xml or
70https://wetterdienst.readthedocs.io/en/latest/data/coverage/dwd/mosmix/hourly.html (in origin unit)
72checked by Martin Rätz 18.08.2023
73"""
74format_DWD_forecast = {
75 # "cloud_cover_above_7_km": None,
76 # "cloud_cover_below_1000_ft": None,
77 # "cloud_cover_below_500_ft": None,
78 # "cloud_cover_between_2_to_7_km": None,
79 "cloud_cover_effective": {"core_name": "OpaqueSkyCover", "time_of_meas_shift": None, "unit": "%"},
80 "cloud_cover_total": {"core_name": "TotalSkyCover", "time_of_meas_shift": None, "unit": "%"},
81 # "precipitation_height_significant_weather_last_1h": None,
82 # "precipitation_height_significant_weather_last_3h": None,
83 "pressure_air_site_reduced": {"core_name": "AtmPressure", "time_of_meas_shift": None, "unit": "Pa"},
84 # "probability_fog_last_12h": None,
85 # "probability_fog_last_1h": None,
86 # "probability_fog_last_6h": None,
87 # "probability_precipitation_height_gt_0_0_mm_last_12h": None,
88 # "probability_precipitation_height_gt_0_2_mm_last_12h": None,
89 # "probability_precipitation_height_gt_0_2_mm_last_24h": None,
90 # "probability_precipitation_height_gt_0_2_mm_last_6h": None,
91 # "probability_precipitation_height_gt_1_0_mm_last_12h": None,
92 # "probability_precipitation_height_gt_5_0_mm_last_12h": None,
93 # "probability_precipitation_height_gt_5_0_mm_last_24h": None,
94 # "probability_precipitation_height_gt_5_0_mm_last_6h": None,
95 # "probability_wind_gust_ge_25_kn_last_12h": None,
96 # "probability_wind_gust_ge_40_kn_last_12h": None,
97 # "probability_wind_gust_ge_55_kn_last_12h": None,
98 # is actually balance during the last 3 hours:
99 "radiation_global": {"core_name": "GlobHorRad", "time_of_meas_shift": "prec2ind", "unit": "kJ/m2"},
100 # "sunshine_duration": None,
101 # "temperature_air_max_200": None,
102 # "temperature_air_mean_005": None,
103 # no information if temperature is drybulb or something else:
104 "temperature_air_mean_200": {"core_name": "DryBulbTemp", "time_of_meas_shift": None, "unit": "K"},
105 # "temperature_air_min_200": None,
106 "temperature_dew_point_mean_200": {"core_name": "DewPointTemp", "time_of_meas_shift": None, "unit": "K"},
107 "visibility_range": {"core_name": "Visibility", "time_of_meas_shift": None, "unit": "m"},
108 # "water_equivalent_snow_depth_new_last_1h": None,
109 # "water_equivalent_snow_depth_new_last_3h": None,
110 # "weather_last_6h": None,
111 # "weather_significant": None,
112 "wind_direction": {"core_name": "WindDir", "time_of_meas_shift": None, "unit": "deg"},
113 # "wind_gust_max_last_12h": None,
114 # "wind_gust_max_last_1h": None,
115 # "wind_gust_max_last_3h": None,
116 "wind_speed": {"core_name": "WindSpeed", "time_of_meas_shift": None, "unit": "m/s"}
117}
120def DWD_historical_to_core_data(
121 df_import: pd.DataFrame, start: datetime, stop: datetime, meta: MetaData
122) -> pd.DataFrame:
123 """
124 Transform imported weather data from DWD historical format into core data format.
126 Args:
127 df_import (pd.DataFrame): The DataFrame containing imported weather data from DWD.
128 start (datetime): The timestamp for the start of the desired data range (will be extended for interpolation).
129 stop (datetime): The timestamp for the end of the desired data range (will be extended for interpolation).
130 meta (MetaData): Metadata associated with the data.
132 Returns:
133 pd.DataFrame: The transformed DataFrame in the core data format.
134 """
136 ### evaluate correctness of format
137 auxiliary.evaluate_transformations(
138 core_format=definitions.format_core_data, other_format=format_DWD_historical
139 )
141 ### format raw data for further operations
142 df = df_import.copy()
143 # to datetime; account for different time-formats
144 date_format = "%Y%m%d%H%M"
145 df.index = pd.to_datetime(df.index, format=date_format)
146 # sort by time
147 df = df.sort_index()
149 # reduce time period to extended period for working interpolation and for faster operation
150 df = time_observation_transformations.truncate_data_from_start_to_stop(
151 df, start - timedelta(days=1), stop + timedelta(days=1)
152 )
154 # select only numeric columns
155 df = df.select_dtypes(include=["number"])
157 # delete dummy values from DWD
158 df = auxiliary.replace_dummy_with_nan(df, format_DWD_historical)
160 # resample some via sum some via mean -> results in average of following hour
161 for var in df.columns:
162 if var in variables_to_sum_DWD_historical:
163 df[var] = df[var].resample("h").sum(min_count=1) # fills nan only if 1 value in interval
164 else:
165 df[var] = df[var].resample("h").mean() # fills nan only if all nan in interval
166 df = df.resample("h").first() # only keep the previously resampled value
168 # rename available variables to core data format
169 df = auxiliary.rename_columns(df, format_DWD_historical)
171 ### convert timezone to UTC
172 # the data is for most stations and datasets, as well as for more recent
173 # data (several years) in UTC. For more sophisticated handling pull meta
174 # and respect time zone or implement dwd_pulling repo from github
176 ### shift and interpolate data forward 30mins or backward -30mins
177 df_no_shift = df.copy()
178 df = time_observation_transformations.shift_time_by_dict(format_DWD_historical, df)
180 def transform_DWD_historical(df):
181 # drop unnecessary variables
182 df = auxiliary.force_data_variable_convention(df, definitions.format_core_data)
184 ### convert units
185 df["AtmPressure"] = unit_conversions.hPa_to_Pa(df["AtmPressure"])
186 df["DiffHorRad"] = unit_conversions.Jcm2_to_Whm2(df["DiffHorRad"])
187 df["GlobHorRad"] = unit_conversions.Jcm2_to_Whm2(df["GlobHorRad"])
188 df["HorInfra"] = unit_conversions.Jcm2_to_Whm2(df["HorInfra"])
189 df["TotalSkyCover"] = unit_conversions.eigth_to_tenth(df["TotalSkyCover"])
191 ### impute missing variables from other available ones
192 df, calc_overview = variable_transformations.variable_transform_all(df, meta)
194 return df, calc_overview
196 df, meta.executed_transformations = transform_DWD_historical(df)
198 ### add unshifted data for possible later direct use (pass-through),
199 ### to avoid back and forth interpolating
200 df = pass_through_handling.create_pass_through_variables(
201 df_shifted=df,
202 df_no_shift=df_no_shift,
203 format=format_DWD_historical,
204 transform_func=transform_DWD_historical,
205 meta=meta,
206 )
208 return df
211def DWD_forecast_2_core_data(df_import: pd.DataFrame, meta: MetaData) -> pd.DataFrame:
212 """
213 Transform imported weather forecast data from DWD into core data format.
215 Args:
216 df_import (pd.DataFrame): The DataFrame containing imported weather forecast data from DWD.
217 meta (MetaData): Metadata associated with the data.
219 Returns:
220 pd.DataFrame: The transformed DataFrame in the core data format.
221 """
223 ### evaluate correctness of format
224 auxiliary.evaluate_transformations(
225 core_format=definitions.format_core_data, other_format=format_DWD_forecast
226 )
228 ### format raw data for further operations
229 df = df_import.copy()
230 # Resample the DataFrame to make the DatetimeIndex complete and monotonic
231 df = df.resample('h').asfreq()
232 # delete timezone information
233 df = df.tz_localize(None)
234 # rename available variables to core data format
235 df = auxiliary.rename_columns(df, format_DWD_forecast)
237 ### convert timezone to UTC
238 # the data pulled by Wetterdienst is already UTC
240 ### shift and interpolate data forward 30mins or backward -30mins
241 df_no_shift = df.copy()
242 df = time_observation_transformations.shift_time_by_dict(format_DWD_forecast, df)
244 def transform_DWD_forecast(df):
245 # drop unnecessary variables
246 df = auxiliary.force_data_variable_convention(df, definitions.format_core_data)
248 ### convert units
249 df["OpaqueSkyCover"] = unit_conversions.percent_to_tenth(df["OpaqueSkyCover"])
250 df["TotalSkyCover"] = unit_conversions.percent_to_tenth(df["TotalSkyCover"])
251 df["GlobHorRad"] = unit_conversions.kJm2_to_Whm2(df["GlobHorRad"])
252 df["DryBulbTemp"] = unit_conversions.kelvin_to_celcius(df["DryBulbTemp"])
253 df["DewPointTemp"] = unit_conversions.kelvin_to_celcius(df["DewPointTemp"])
254 df["Visibility"] = unit_conversions.divide_by_1000(df["Visibility"])
256 ### impute missing variables from other available ones
257 df, calc_overview = variable_transformations.variable_transform_all(df, meta)
259 return df, calc_overview
261 df, meta.executed_transformations = transform_DWD_forecast(df)
263 ### add unshifted data for possible later direct use (pass-through),
264 ### to avoid back and forth interpolating
265 df = pass_through_handling.create_pass_through_variables(
266 df_shifted=df,
267 df_no_shift=df_no_shift,
268 format=format_DWD_forecast,
269 transform_func=transform_DWD_forecast,
270 meta=meta,
271 )
273 return df