Coverage for aixweather/transformation_to_core_data/EPW.py: 98%
55 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-01-06 16:01 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-01-06 16:01 +0000
1"""
2This module includes a function to transform EPW data to core data format.
3"""
5import pandas as pd
6from copy import deepcopy
7import logging
9from aixweather import definitions
10from aixweather.imports.utils_import import MetaData
11from aixweather.transformation_functions import (
12 auxiliary,
13 time_observation_transformations,
14 variable_transformations,
15 pass_through_handling,
16)
17from aixweather.core_data_format_2_output_file.to_epw_energyplus import (
18 format_epw as format_epw_export,
19)
21logger = logging.getLogger(__name__)
24def EPW_to_core_data(df_import: pd.DataFrame, meta: MetaData) -> pd.DataFrame:
25 """
26 Transform imported EPW (EnergyPlus Weather) data into core data format.
28 Args:
29 df_import (pd.DataFrame): The DataFrame containing imported EPW weather data.
30 meta (MetaData): Metadata associated with the data.
32 Returns:
33 pd.DataFrame: The transformed DataFrame in the core data format.
34 """
36 # invert format_epw from core2export to import2core
37 format_epw = deepcopy(format_epw_export)
38 for key, value in format_epw.items():
39 time_shift = value["time_of_meas_shift"]
40 if time_shift == "ind2prec":
41 value["time_of_meas_shift"] = "prec2ind"
42 elif time_shift == "ind2foll":
43 value["time_of_meas_shift"] = "foll2ind"
45 # evaluate correctness of format
46 auxiliary.evaluate_transformations(
47 core_format=definitions.format_core_data, other_format=format_epw
48 )
50 def epw_to_datetimeindex(df):
51 '''
52 Convert the first 4 columns of the DataFrame to a DatetimeIndex and set it as the
53 index.'''
54 # The first 4 columns represent year, month, day, and hour respectively,
55 # but with hour 24 instead of hour 0.
56 hour = df.iloc[:, 3].copy()
57 mask_24hr = hour == 24
58 hour.loc[mask_24hr] = 0
60 # loop one by one to avoid faults with non-continuous data
61 datetime_list = []
62 for index, row in df.iterrows():
63 year, month, day, hour = row[:4]
64 if hour == 24:
65 hour = 0
66 # Increment the day by one for those rows where hour
67 # was originally 24
68 row_datetime = pd.Timestamp(year, month, day, hour) + pd.Timedelta(days=1)
69 else:
70 row_datetime = pd.Timestamp(year, month, day, hour)
71 datetime_list.append(row_datetime)
73 # Setting datetime column as index with name 'datetime'
74 df.index = datetime_list
75 df.index = df.index.rename('datetime')
77 return df
79 def if_TMY_convert_to_one_year(df):
80 """TMY (typical meteorological year) data in .epw files often contains data for a period
81 of one year but each month is from a different year. This will lead to several years of
82 nan data in between. As the year is irrelevant in tmy data, we set all dates to the year
83 of februaries data. February is chosen to avoid leap year issues.
85 It is automatically detected whether it is a TMY through the following criteria:
86 - the available data covers exactly 8760 data points (one non-leap year)
87 - the period covered by the timestamps spans more than one year
88 - the first date is the first of January at hour 1
90 This will lead to an info log message if the data is transformed."""
91 if (
92 len(df) == 8760 # exactly one year of data
93 and df.iloc[:, 0].max() - df.iloc[:, 0].min() > 1 # spanning over more than one year
94 and df.iloc[0, 1] == 1 # first month is January
95 and df.iloc[0, 2] == 1 # first day is one
96 and df.iloc[0, 3] == 1 # first hour is one
97 ):
98 year_of_february = df.loc[df.iloc[:, 1] == 2, 0].iloc[0]
99 # Replace the year component with the year of February
100 df.iloc[:, 0] = year_of_february
101 logger.info(
102 "The data was transformed to one year of data as it seems to be TMY data."
103 "The year is irrelevant for TMY data."
104 )
105 return df
107 ### preprocessing raw data for further operations
108 df = df_import.copy()
109 df = if_TMY_convert_to_one_year(df)
110 df = epw_to_datetimeindex(df)
111 # Resample the DataFrame to make the DatetimeIndex complete and monotonic
112 df = df.resample("h").asfreq()
113 # give names to columns according to documentation of import data
114 df.columns = [key for key in format_epw.keys()]
115 # rename available variables to core data format
116 df = auxiliary.rename_columns(df, format_epw)
117 # delete dummy values from EPW
118 df = auxiliary.replace_dummy_with_nan(df, format_epw)
120 ### convert timezone to UTC+0
121 df = df.shift(periods=-meta.timezone, freq="h", axis=0)
123 ### shift and interpolate data forward 30mins or backward -30mins
124 df_no_shift = df.copy()
125 df = time_observation_transformations.shift_time_by_dict(format_epw, df)
127 def transform(df):
128 ### force variable naming format_core_data
129 df = auxiliary.force_data_variable_convention(df, definitions.format_core_data)
130 ### unit conversion
131 # all units correct
132 ### impute missing variables from other available ones
133 df, calc_overview = variable_transformations.variable_transform_all(df, meta)
134 return df, calc_overview
136 df, meta.executed_transformations = transform(df)
138 ### add unshifted data for possible later direct use (pass-through),
139 ### to avoid back and forth interpolating
140 df = pass_through_handling.create_pass_through_variables(
141 df_shifted=df,
142 df_no_shift=df_no_shift,
143 format=format_epw,
144 transform_func=transform,
145 meta=meta,
146 )
148 return df