Coverage for aixweather/transformation_to_core_data/EPW.py: 98%

55 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2025-01-06 16:01 +0000

1""" 

2This module includes a function to transform EPW data to core data format. 

3""" 

4 

5import pandas as pd 

6from copy import deepcopy 

7import logging 

8 

9from aixweather import definitions 

10from aixweather.imports.utils_import import MetaData 

11from aixweather.transformation_functions import ( 

12 auxiliary, 

13 time_observation_transformations, 

14 variable_transformations, 

15 pass_through_handling, 

16) 

17from aixweather.core_data_format_2_output_file.to_epw_energyplus import ( 

18 format_epw as format_epw_export, 

19) 

20 

21logger = logging.getLogger(__name__) 

22 

23 

24def EPW_to_core_data(df_import: pd.DataFrame, meta: MetaData) -> pd.DataFrame: 

25 """ 

26 Transform imported EPW (EnergyPlus Weather) data into core data format. 

27 

28 Args: 

29 df_import (pd.DataFrame): The DataFrame containing imported EPW weather data. 

30 meta (MetaData): Metadata associated with the data. 

31 

32 Returns: 

33 pd.DataFrame: The transformed DataFrame in the core data format. 

34 """ 

35 

36 # invert format_epw from core2export to import2core 

37 format_epw = deepcopy(format_epw_export) 

38 for key, value in format_epw.items(): 

39 time_shift = value["time_of_meas_shift"] 

40 if time_shift == "ind2prec": 

41 value["time_of_meas_shift"] = "prec2ind" 

42 elif time_shift == "ind2foll": 

43 value["time_of_meas_shift"] = "foll2ind" 

44 

45 # evaluate correctness of format 

46 auxiliary.evaluate_transformations( 

47 core_format=definitions.format_core_data, other_format=format_epw 

48 ) 

49 

50 def epw_to_datetimeindex(df): 

51 ''' 

52 Convert the first 4 columns of the DataFrame to a DatetimeIndex and set it as the 

53 index.''' 

54 # The first 4 columns represent year, month, day, and hour respectively, 

55 # but with hour 24 instead of hour 0. 

56 hour = df.iloc[:, 3].copy() 

57 mask_24hr = hour == 24 

58 hour.loc[mask_24hr] = 0 

59 

60 # loop one by one to avoid faults with non-continuous data 

61 datetime_list = [] 

62 for index, row in df.iterrows(): 

63 year, month, day, hour = row[:4] 

64 if hour == 24: 

65 hour = 0 

66 # Increment the day by one for those rows where hour 

67 # was originally 24 

68 row_datetime = pd.Timestamp(year, month, day, hour) + pd.Timedelta(days=1) 

69 else: 

70 row_datetime = pd.Timestamp(year, month, day, hour) 

71 datetime_list.append(row_datetime) 

72 

73 # Setting datetime column as index with name 'datetime' 

74 df.index = datetime_list 

75 df.index = df.index.rename('datetime') 

76 

77 return df 

78 

79 def if_TMY_convert_to_one_year(df): 

80 """TMY (typical meteorological year) data in .epw files often contains data for a period 

81 of one year but each month is from a different year. This will lead to several years of 

82 nan data in between. As the year is irrelevant in tmy data, we set all dates to the year 

83 of februaries data. February is chosen to avoid leap year issues. 

84 

85 It is automatically detected whether it is a TMY through the following criteria: 

86 - the available data covers exactly 8760 data points (one non-leap year) 

87 - the period covered by the timestamps spans more than one year 

88 - the first date is the first of January at hour 1 

89 

90 This will lead to an info log message if the data is transformed.""" 

91 if ( 

92 len(df) == 8760 # exactly one year of data 

93 and df.iloc[:, 0].max() - df.iloc[:, 0].min() > 1 # spanning over more than one year 

94 and df.iloc[0, 1] == 1 # first month is January 

95 and df.iloc[0, 2] == 1 # first day is one 

96 and df.iloc[0, 3] == 1 # first hour is one 

97 ): 

98 year_of_february = df.loc[df.iloc[:, 1] == 2, 0].iloc[0] 

99 # Replace the year component with the year of February 

100 df.iloc[:, 0] = year_of_february 

101 logger.info( 

102 "The data was transformed to one year of data as it seems to be TMY data." 

103 "The year is irrelevant for TMY data." 

104 ) 

105 return df 

106 

107 ### preprocessing raw data for further operations 

108 df = df_import.copy() 

109 df = if_TMY_convert_to_one_year(df) 

110 df = epw_to_datetimeindex(df) 

111 # Resample the DataFrame to make the DatetimeIndex complete and monotonic 

112 df = df.resample("h").asfreq() 

113 # give names to columns according to documentation of import data 

114 df.columns = [key for key in format_epw.keys()] 

115 # rename available variables to core data format 

116 df = auxiliary.rename_columns(df, format_epw) 

117 # delete dummy values from EPW 

118 df = auxiliary.replace_dummy_with_nan(df, format_epw) 

119 

120 ### convert timezone to UTC+0 

121 df = df.shift(periods=-meta.timezone, freq="h", axis=0) 

122 

123 ### shift and interpolate data forward 30mins or backward -30mins 

124 df_no_shift = df.copy() 

125 df = time_observation_transformations.shift_time_by_dict(format_epw, df) 

126 

127 def transform(df): 

128 ### force variable naming format_core_data 

129 df = auxiliary.force_data_variable_convention(df, definitions.format_core_data) 

130 ### unit conversion 

131 # all units correct 

132 ### impute missing variables from other available ones 

133 df, calc_overview = variable_transformations.variable_transform_all(df, meta) 

134 return df, calc_overview 

135 

136 df, meta.executed_transformations = transform(df) 

137 

138 ### add unshifted data for possible later direct use (pass-through), 

139 ### to avoid back and forth interpolating 

140 df = pass_through_handling.create_pass_through_variables( 

141 df_shifted=df, 

142 df_no_shift=df_no_shift, 

143 format=format_epw, 

144 transform_func=transform, 

145 meta=meta, 

146 ) 

147 

148 return df