Coverage for aixweather/transformation_to_core

1"""

2This module includes a function to transform EPW data to core data format.

3"""

5import pandas as pd

6from copy import deepcopy

7import logging

9from aixweather import definitions

10from aixweather.imports.utils_import import MetaData

11from aixweather.transformation_functions import (

12 auxiliary,

13 time_observation_transformations,

14 variable_transformations,

15 pass_through_handling,

16)

17from aixweather.core_data_format_2_output_file.to_epw_energyplus import (

18 format_epw as format_epw_export,

19)

21logger = logging.getLogger(__name__)

24def EPW_to_core_data(df_import: pd.DataFrame, meta: MetaData) -> pd.DataFrame:

25 """

26 Transform imported EPW (EnergyPlus Weather) data into core data format.

28 Args:

29 df_import (pd.DataFrame): The DataFrame containing imported EPW weather data.

30 meta (MetaData): Metadata associated with the data.

32 Returns:

33 pd.DataFrame: The transformed DataFrame in the core data format.

34 """

36 # invert format_epw from core2export to import2core

37 format_epw = deepcopy(format_epw_export)

38 for key, value in format_epw.items():

39 time_shift = value["time_of_meas_shift"]

40 if time_shift == "ind2prec":

41 value["time_of_meas_shift"] = "prec2ind"

42 elif time_shift == "ind2foll":

43 value["time_of_meas_shift"] = "foll2ind"

45 # evaluate correctness of format

46 auxiliary.evaluate_transformations(

47 core_format=definitions.format_core_data, other_format=format_epw

48 )

50 def epw_to_datetimeindex(df):

51 '''

52 Convert the first 4 columns of the DataFrame to a DatetimeIndex and set it as the

53 index.'''

54 # The first 4 columns represent year, month, day, and hour respectively,

55 # but with hour 24 instead of hour 0.

56 hour = df.iloc[:, 3].copy()

57 mask_24hr = hour == 24

58 hour.loc[mask_24hr] = 0

60 # loop one by one to avoid faults with non-continuous data

61 datetime_list = []

62 for index, row in df.iterrows():

63 year, month, day, hour = row[:4]

64 if hour == 24:

65 hour = 0

66 # Increment the day by one for those rows where hour

67 # was originally 24

68 row_datetime = pd.Timestamp(year, month, day, hour) + pd.Timedelta(days=1)

69 else:

70 row_datetime = pd.Timestamp(year, month, day, hour)

71 datetime_list.append(row_datetime)

73 # Setting datetime column as index with name 'datetime'

74 df.index = datetime_list

75 df.index = df.index.rename('datetime')

77 return df

79 def if_TMY_convert_to_one_year(df):

80 """TMY (typical meteorological year) data in .epw files often contains data for a period

81 of one year but each month is from a different year. This will lead to several years of

82 nan data in between. As the year is irrelevant in tmy data, we set all dates to the year

83 of februaries data. February is chosen to avoid leap year issues.

85 It is automatically detected whether it is a TMY through the following criteria:

86 - the available data covers exactly 8760 data points (one non-leap year)

87 - the period covered by the timestamps spans more than one year

88 - the first date is the first of January at hour 1

90 This will lead to an info log message if the data is transformed."""

91 if (

92 len(df) == 8760 # exactly one year of data

93 and df.iloc[:, 0].max() - df.iloc[:, 0].min() > 1 # spanning over more than one year

94 and df.iloc[0, 1] == 1 # first month is January

95 and df.iloc[0, 2] == 1 # first day is one

96 and df.iloc[0, 3] == 1 # first hour is one

97 ):

98 year_of_february = df.loc[df.iloc[:, 1] == 2, 0].iloc[0]

99 # Replace the year component with the year of February

100 df.iloc[:, 0] = year_of_february

101 logger.info(

102 "The data was transformed to one year of data as it seems to be TMY data."

103 "The year is irrelevant for TMY data."

104 )

105 return df

106

107 ### preprocessing raw data for further operations

108 df = df_import.copy()

109 df = if_TMY_convert_to_one_year(df)

110 df = epw_to_datetimeindex(df)

111 # Resample the DataFrame to make the DatetimeIndex complete and monotonic

112 df = df.resample("h").asfreq()

113 # give names to columns according to documentation of import data

114 df.columns = [key for key in format_epw.keys()]

115 # rename available variables to core data format

116 df = auxiliary.rename_columns(df, format_epw)

117 # delete dummy values from EPW

118 df = auxiliary.replace_dummy_with_nan(df, format_epw)

119

120 ### convert timezone to UTC+0

121 df = df.shift(periods=-meta.timezone, freq="h", axis=0)

122

123 ### shift and interpolate data forward 30mins or backward -30mins

124 df_no_shift = df.copy()

125 df = time_observation_transformations.shift_time_by_dict(format_epw, df)

126

127 def transform(df):

128 ### force variable naming format_core_data

129 df = auxiliary.force_data_variable_convention(df, definitions.format_core_data)

130 ### unit conversion

131 # all units correct

132 ### impute missing variables from other available ones

133 df, calc_overview = variable_transformations.variable_transform_all(df, meta)

134 return df, calc_overview

135

136 df, meta.executed_transformations = transform(df)

137

138 ### add unshifted data for possible later direct use (pass-through),

139 ### to avoid back and forth interpolating

140 df = pass_through_handling.create_pass_through_variables(

141 df_shifted=df,

142 df_no_shift=df_no_shift,

143 format=format_epw,

144 transform_func=transform,

145 meta=meta,

146 )

147

148 return df

Coverage for aixweather/transformation_to_core_data/EPW.py: 98%

55 statements