Coverage for aixweather/transformation_functions/time_observation_transformations.py: 93%

43 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2025-01-06 16:01 +0000

1""" 

2Includes functions to execute time shift operations. It also includes a 

3function to truncate data in given interval. 

4""" 

5 

6import datetime 

7import logging 

8 

9import pandas as pd 

10 

11 

12logger = logging.getLogger(__name__) 

13 

14 

15def shift_timestamps_and_interpolate(df: pd.DataFrame, backward: bool) -> pd.DataFrame: 

16 """ 

17 Shift and interpolate timestamps in a DataFrame by 30 minutes forward or backward. 

18 

19 This function shifts and interpolates the timestamps in the DataFrame `df` by either 

20 30 minutes forward or backward based on the `backward` parameter. It uses linear interpolation 

21 to fill in missing values during the shift. 

22 

23 Args: 

24 df (pd.DataFrame): The DataFrame containing timestamped data. 

25 backward (bool): If True, shift timestamps 30 minutes backward. If False, shift them 30 minutes forward. 

26 

27 Returns: 

28 pd.DataFrame: A DataFrame with timestamps shifted and interpolated as specified. 

29 """ 

30 

31 if ( 

32 backward 

33 ): # avg_preceding_hour_2_indicated_time or indicated_time_2_avg_following_hour 

34 interval = "-30min" 

35 else: # avg_following_hour_2_indicated_time or indicated_time_2_avg_preceding_hour 

36 interval = "30min" 

37 df = df.astype(float) 

38 

39 # shift and interpolate 

40 df_shifted = df.shift(freq=interval) 

41 df_interpolated = df_shifted.resample("30min").interpolate(method="linear", limit=1) 

42 

43 # keep only original timestamps 

44 df_final = df_interpolated.reindex(df.index) 

45 

46 return df_final 

47 

48 

49def avg_preceding_hour_2_indicated_time(df): 

50 ''' 

51 Wrapper function for shift_timestamps_and_interpolate. 

52 aka: prec2ind 

53 ''' 

54 return shift_timestamps_and_interpolate(df, True) 

55 

56 

57def indicated_time_2_avg_following_hour(df): 

58 ''' 

59 Wrapper function for shift_timestamps_and_interpolate. 

60 aka: ind2foll 

61 ''' 

62 return shift_timestamps_and_interpolate(df, True) 

63 

64 

65def avg_following_hour_2_indicated_time(df): 

66 ''' 

67 Wrapper function for shift_timestamps_and_interpolate. 

68 aka: foll2ind''' 

69 return shift_timestamps_and_interpolate(df, False) 

70 

71 

72def indicated_time_2_avg_preceding_hour(df): 

73 ''' 

74 Wrapper function for shift_timestamps_and_interpolate. 

75 aka: ind2prec''' 

76 return shift_timestamps_and_interpolate(df, False) 

77 

78 

79def shift_time_by_dict(format_dict: dict, df: pd.DataFrame) -> pd.DataFrame: 

80 """ 

81 Shift timestamps in a DataFrame based on a format dictionary. 

82 

83 This function shifts and interpolates values in the DataFrame `df` based on the specified format dictionary. The format 

84 dictionary should contain information about the desired time shifting for core data variables. 

85 

86 Args: 

87 format_dict (dict): A dictionary specifying the time shifting for core data variables. 

88 df (pd.DataFrame): The DataFrame containing timestamped data with core data variable names. 

89 

90 Returns: 

91 pd.DataFrame: The modified DataFrame with values shifted and interpolated according to the format dictionary. 

92 """ 

93 meas_key = "time_of_meas_shift" 

94 core_name = "core_name" 

95 for key, value in format_dict.items(): 

96 # No measurement if not present, though avoid being triggered 

97 # when using this function in 2output (empty string) 

98 if value[core_name] not in df.columns and value[core_name]: 

99 logger.debug("No measurements for %s.", value[core_name]) 

100 else: 

101 if value[meas_key] == "prec2ind": 

102 df.loc[:, value[core_name]] = avg_preceding_hour_2_indicated_time( 

103 df[value[core_name]] 

104 ) 

105 elif value[meas_key] == "ind2foll": 

106 df.loc[:, value[core_name]] = indicated_time_2_avg_following_hour( 

107 df[value[core_name]] 

108 ) 

109 elif value[meas_key] == "foll2ind": 

110 df.loc[:, value[core_name]] = avg_following_hour_2_indicated_time( 

111 df[value[core_name]] 

112 ) 

113 elif value[meas_key] == "ind2prec": 

114 df.loc[:, value[core_name]] = indicated_time_2_avg_preceding_hour( 

115 df[value[core_name]] 

116 ) 

117 elif value[meas_key] is None: 

118 pass 

119 else: 

120 raise ValueError( 

121 f"Invalid keyword for {meas_key} for {key}: '{value[meas_key]}' is not valid." 

122 ) 

123 return df 

124 

125 

126def truncate_data_from_start_to_stop( 

127 df: pd.DataFrame, start: datetime, stop: datetime 

128) -> pd.DataFrame: 

129 """ 

130 Truncate a DataFrame to include data only between specified start and stop timestamps. 

131 

132 Args: 

133 df (pd.DataFrame): The DataFrame containing timestamped data. 

134 start (datetime): The start timestamp to include in the truncated DataFrame. 

135 stop (datetime): The stop timestamp to include in the truncated DataFrame. 

136 

137 Returns: 

138 pd.DataFrame: A new DataFrame containing data only within the specified time range. 

139 """ 

140 mask = (df.index >= start) & (df.index <= stop) 

141 df = df.loc[mask] 

142 return df