Coverage for aixweather/imports/TRY.py: 69%
88 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-12-31 11:58 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-12-31 11:58 +0000
1"""
2import DWD TRY data
3"""
5import logging
6import re
7import random
8import pandas as pd
10from aixweather.imports.utils_import import MetaData
12logger = logging.getLogger(__name__)
15def _handle_TRY_type(path: str) -> tuple:
16 """
17 Determine the TRY format type based on the provided file path.
19 Args:
20 path (str): The file path to the TRY dataset file.
22 Returns:
23 tuple: A tuple containing the TRY year (int) and the header row number (int).
24 Raises:
25 ValueError: If the TRY format cannot be detected through the file name or is not supported.
26 """
28 ### get type of TRY, i.e. the year of the TRY
29 TRY_year = None
30 # Header_rows are the rows with general information of the dataset
31 # Are skipped until variable declaration
32 TRY_file_naming = {
33 "TRY2004": {"year": 2004},
34 "TRY2010": {"year": 2010},
35 "TRY2015": {"year": 2015},
36 "TRY2045": {"year": 2045},
37 }
39 if path.endswith(".dat"):
40 for key in TRY_file_naming.keys():
41 if key in path:
42 TRY_year = TRY_file_naming[key]["year"]
43 break
44 if TRY_year is None:
45 raise ValueError(
46 f"TRY format could not be detected through file name,"
47 f" expected {[key for key in TRY_file_naming.keys()]} in the file name."
48 )
49 if TRY_year == 2004 or TRY_year == 2010:
50 raise ValueError(f"TRY format {TRY_year} is not supported.")
52 if TRY_year == 2015 or TRY_year == 2045:
53 with open(path, "r") as file:
54 for line_number, line in enumerate(file, start=1):
55 if "***" in line:
56 header_row = (
57 line_number - 1 - 1
58 ) # -1 for header above *** and -1 for start to count at 0
59 break
61 return TRY_year, header_row
64def load_try_meta_from_file(path: str) -> MetaData:
65 """
66 Load a TRY file from a specified path and parse the header for metadata.
68 Args:
69 path (str): The file path to the TRY file to be loaded.
71 Returns:
72 MetaData: An object containing the parsed metadata from the TRY file.
73 """
75 meta = MetaData()
76 TRY_year, header_row = _handle_TRY_type(path)
78 ### load file to python
79 header_lines = []
80 with open(path, "r") as file:
81 for i, line in enumerate(file):
82 if i >= header_row:
83 break
84 header_lines.append(line)
86 ### read raw meta data
87 # Extract Rechtswert (Easting)
88 rechtswert_line = next(
89 line for line in header_lines if "Rechtswert" in line and ":" in line
90 )
91 rechtswert = int(re.search(r":\s*(-?\d+) Meter", rechtswert_line).group(1))
93 # Extract Hochwert (Northing)
94 hochwert_line = next(
95 line for line in header_lines if "Hochwert" in line and ":" in line
96 )
97 hochwert = int(re.search(r":\s*(-?\d+) Meter", hochwert_line).group(1))
99 # Extract Höhenlage (altitude)
100 hoehenlage_line = next(line for line in header_lines if "Hoehenlage" in line)
101 hoehenlage = int(re.search(r":\s*(-?\d+) Meter", hoehenlage_line).group(1))
103 try:
104 import geopandas as gpd
105 from shapely.geometry import Point
106 except ImportError:
107 raise ImportError("Optional dependency 'TRY' not installed. Conversion of longitude and "
108 "latitude not possible and hence no radiation transformation.")
110 ### convert latitude and longitude
111 # Create a GeoDataFrame with the provided coordinates
112 # (using pyproj directly led to wrong calculation)
113 gdf = gpd.GeoDataFrame(
114 {"geometry": [Point(rechtswert, hochwert)]},
115 crs="EPSG:3034", # Original coordinate system
116 )
118 # Transform to WGS 84
119 gdf_wgs84 = gdf.to_crs("EPSG:4326")
121 # get transformed coordinates, Get the longitude (x) and latitude (y)
122 point_wgs84 = gdf_wgs84.geometry.iloc[0]
123 longitude_wgs84 = point_wgs84.x
124 latitude_wgs84 = point_wgs84.y
126 ### try to get city of the location
127 city = get_city_from_location(
128 longitude_wgs84=longitude_wgs84,
129 latitude_wgs84=latitude_wgs84,
130 meta=meta
131 )
133 meta.station_name = city
134 meta.input_source = f"TRY{TRY_year}"
135 meta.try_year = TRY_year
136 meta.altitude = hoehenlage
137 meta.longitude = longitude_wgs84
138 meta.latitude = latitude_wgs84
139 meta.set_imported_timezone(1) # Always in TRY, used for later export
141 return meta
144def get_city_from_location(latitude_wgs84, longitude_wgs84, meta: MetaData):
145 """
146 Function to get the city of the given latitue and longitude.
147 If the address is malformatted (i.e. not a city, town, village, o.s.),
148 the meta.station_name is used as a city.
149 """
150 try:
151 from geopy.geocoders import Nominatim
152 except ImportError:
153 logger.warning(
154 "Optional dependency 'TRY' not installed. "
155 "Not possible to extract city name, using station_name %s.",
156 meta.station_name
157 )
158 return meta.station_name
159 # Initialize Nominatim geolocator
160 user_agent = f"aixweather_{str(random.randint(1, 1000))}"
161 geolocator = Nominatim(user_agent=user_agent)
162 # Perform reverse geocoding
163 location = geolocator.reverse((latitude_wgs84, longitude_wgs84))
165 # If you want specific components like city, state, etc.
166 address = location.raw["address"]
167 if "city" in address:
168 city = address["city"]
169 elif "town" in address:
170 city = address["town"]
171 elif "village" in address:
172 city = address["village"]
173 elif "hamlet" in address:
174 city = address["hamlet"]
175 elif "suburb" in address:
176 city = address["suburb"]
177 elif "locality" in address:
178 city = address["locality"]
179 else:
180 city = meta.station_name
181 return city
184def load_try_from_file(path: str) -> pd.DataFrame:
185 """
186 Import data from a TRY file and convert it into a DataFrame.
188 Args:
189 path (str): The absolute path to the TRY file.
191 Returns:
192 pd.DataFrame: A DataFrame containing the imported data from the TRY file.
193 """
195 TRY_year, header_row = _handle_TRY_type(path)
197 ### load file to Dataframe
198 weather_df = pd.read_table(
199 filepath_or_buffer=path,
200 header=header_row,
201 sep='\s+',
202 skip_blank_lines=False,
203 encoding="latin",
204 )
205 # drop first row cause empty
206 weather_df = weather_df.iloc[1:]
208 return weather_df