Coverage for agentlib_flexquant/utils/data_handling.py: 75%
44 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-08-01 15:10 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-08-01 15:10 +0000
1from typing import Literal
2import pandas as pd
3from agentlib_mpc.utils import TimeConversionTypes, TIME_CONVERSION
6MEAN: str = "mean"
7INTERPOLATE: str = "interpolate"
8FillNansMethods = Literal[MEAN, INTERPOLATE]
11def fill_nans(series: pd.Series, method: FillNansMethods) -> pd.Series:
12 """
13 Fill NaN values in the series with the given method.
15 Implemented methods:
16 - mean: fill NaN values with the mean of the following values.
17 - interpolate: interpolate missing values.
18 """
19 if method == MEAN:
20 series = _set_mean_values(series=series)
21 elif method == INTERPOLATE:
22 # Interpolate missing values
23 series = series.interpolate(method="index", limit_direction="both")
25 if series.isna().any():
26 raise ValueError(f"NaN values are still present in the series after filling them with the method {method}\n{series}")
27 return series
30def _set_mean_values(series: pd.Series) -> pd.Series:
31 """ Fills intervals including the nan with the mean of the following values. """
32 def _get_intervals_for_mean(s: pd.Series) -> list[pd.Interval]:
33 intervals = []
34 start = None
35 for index, value in s.items():
36 if pd.isna(value):
37 if pd.isna(start):
38 start = index
39 else:
40 end = index
41 intervals.append(pd.Interval(left=start, right=end, closed="left"))
42 start = end
43 return intervals
45 for interval in _get_intervals_for_mean(series):
46 interval_index = (interval.left <= series.index) & (series.index < interval.right)
47 series[interval.left] = series[interval_index].mean(skipna=True)
49 # remove last entry if nan, e.g. with collocation
50 if pd.isna(series.iloc[-1]):
51 series = series.iloc[:-1]
53 return series
56def strip_multi_index(series: pd.Series) -> pd.Series:
57 # Convert the index (communicated as string) into a MultiIndex
58 if isinstance(series.index[0], str):
59 series.index = series.index.map(lambda x: eval(x))
60 series.index = pd.MultiIndex.from_tuples(series.index)
61 # vals is multicolumn so get rid of first value (start time of predictions)
62 series.index = series.index.get_level_values(1).astype(float)
63 return series
66def convert_timescale_of_index(df: pd.DataFrame, from_unit: TimeConversionTypes, to_unit: TIME_CONVERSION) -> pd.DataFrame:
67 """ Convert the timescale of a dataframe index (from seconds) to the given time unit
69 Keyword arguments:
70 results -- The dictionary of the results with the dataframes
71 time_unit -- The time unit to convert the index to
72 """
73 time_conversion_factor = TIME_CONVERSION[from_unit] / TIME_CONVERSION[to_unit]
74 if isinstance(df.index, pd.MultiIndex):
75 df.index = pd.MultiIndex.from_arrays(
76 [df.index.get_level_values(level) * time_conversion_factor for level in range(df.index.nlevels)]
77 )
78 else:
79 df.index = df.index * time_conversion_factor
80 return df