Coverage for aixweather/transformation_functions/time_observation_transformations.py: 93%
43 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-01-06 16:01 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-01-06 16:01 +0000
1"""
2Includes functions to execute time shift operations. It also includes a
3function to truncate data in given interval.
4"""
6import datetime
7import logging
9import pandas as pd
12logger = logging.getLogger(__name__)
15def shift_timestamps_and_interpolate(df: pd.DataFrame, backward: bool) -> pd.DataFrame:
16 """
17 Shift and interpolate timestamps in a DataFrame by 30 minutes forward or backward.
19 This function shifts and interpolates the timestamps in the DataFrame `df` by either
20 30 minutes forward or backward based on the `backward` parameter. It uses linear interpolation
21 to fill in missing values during the shift.
23 Args:
24 df (pd.DataFrame): The DataFrame containing timestamped data.
25 backward (bool): If True, shift timestamps 30 minutes backward. If False, shift them 30 minutes forward.
27 Returns:
28 pd.DataFrame: A DataFrame with timestamps shifted and interpolated as specified.
29 """
31 if (
32 backward
33 ): # avg_preceding_hour_2_indicated_time or indicated_time_2_avg_following_hour
34 interval = "-30min"
35 else: # avg_following_hour_2_indicated_time or indicated_time_2_avg_preceding_hour
36 interval = "30min"
37 df = df.astype(float)
39 # shift and interpolate
40 df_shifted = df.shift(freq=interval)
41 df_interpolated = df_shifted.resample("30min").interpolate(method="linear", limit=1)
43 # keep only original timestamps
44 df_final = df_interpolated.reindex(df.index)
46 return df_final
49def avg_preceding_hour_2_indicated_time(df):
50 '''
51 Wrapper function for shift_timestamps_and_interpolate.
52 aka: prec2ind
53 '''
54 return shift_timestamps_and_interpolate(df, True)
57def indicated_time_2_avg_following_hour(df):
58 '''
59 Wrapper function for shift_timestamps_and_interpolate.
60 aka: ind2foll
61 '''
62 return shift_timestamps_and_interpolate(df, True)
65def avg_following_hour_2_indicated_time(df):
66 '''
67 Wrapper function for shift_timestamps_and_interpolate.
68 aka: foll2ind'''
69 return shift_timestamps_and_interpolate(df, False)
72def indicated_time_2_avg_preceding_hour(df):
73 '''
74 Wrapper function for shift_timestamps_and_interpolate.
75 aka: ind2prec'''
76 return shift_timestamps_and_interpolate(df, False)
79def shift_time_by_dict(format_dict: dict, df: pd.DataFrame) -> pd.DataFrame:
80 """
81 Shift timestamps in a DataFrame based on a format dictionary.
83 This function shifts and interpolates values in the DataFrame `df` based on the specified format dictionary. The format
84 dictionary should contain information about the desired time shifting for core data variables.
86 Args:
87 format_dict (dict): A dictionary specifying the time shifting for core data variables.
88 df (pd.DataFrame): The DataFrame containing timestamped data with core data variable names.
90 Returns:
91 pd.DataFrame: The modified DataFrame with values shifted and interpolated according to the format dictionary.
92 """
93 meas_key = "time_of_meas_shift"
94 core_name = "core_name"
95 for key, value in format_dict.items():
96 # No measurement if not present, though avoid being triggered
97 # when using this function in 2output (empty string)
98 if value[core_name] not in df.columns and value[core_name]:
99 logger.debug("No measurements for %s.", value[core_name])
100 else:
101 if value[meas_key] == "prec2ind":
102 df.loc[:, value[core_name]] = avg_preceding_hour_2_indicated_time(
103 df[value[core_name]]
104 )
105 elif value[meas_key] == "ind2foll":
106 df.loc[:, value[core_name]] = indicated_time_2_avg_following_hour(
107 df[value[core_name]]
108 )
109 elif value[meas_key] == "foll2ind":
110 df.loc[:, value[core_name]] = avg_following_hour_2_indicated_time(
111 df[value[core_name]]
112 )
113 elif value[meas_key] == "ind2prec":
114 df.loc[:, value[core_name]] = indicated_time_2_avg_preceding_hour(
115 df[value[core_name]]
116 )
117 elif value[meas_key] is None:
118 pass
119 else:
120 raise ValueError(
121 f"Invalid keyword for {meas_key} for {key}: '{value[meas_key]}' is not valid."
122 )
123 return df
126def truncate_data_from_start_to_stop(
127 df: pd.DataFrame, start: datetime, stop: datetime
128) -> pd.DataFrame:
129 """
130 Truncate a DataFrame to include data only between specified start and stop timestamps.
132 Args:
133 df (pd.DataFrame): The DataFrame containing timestamped data.
134 start (datetime): The start timestamp to include in the truncated DataFrame.
135 stop (datetime): The stop timestamp to include in the truncated DataFrame.
137 Returns:
138 pd.DataFrame: A new DataFrame containing data only within the specified time range.
139 """
140 mask = (df.index >= start) & (df.index <= stop)
141 df = df.loc[mask]
142 return df