Coverage for agentlib_flexquant/utils/data_handling.py: 75%

44 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2025-08-01 15:10 +0000

1from typing import Literal 

2import pandas as pd 

3from agentlib_mpc.utils import TimeConversionTypes, TIME_CONVERSION 

4 

5 

6MEAN: str = "mean" 

7INTERPOLATE: str = "interpolate" 

8FillNansMethods = Literal[MEAN, INTERPOLATE] 

9 

10 

11def fill_nans(series: pd.Series, method: FillNansMethods) -> pd.Series: 

12 """ 

13 Fill NaN values in the series with the given method. 

14 

15 Implemented methods: 

16 - mean: fill NaN values with the mean of the following values. 

17 - interpolate: interpolate missing values. 

18 """ 

19 if method == MEAN: 

20 series = _set_mean_values(series=series) 

21 elif method == INTERPOLATE: 

22 # Interpolate missing values 

23 series = series.interpolate(method="index", limit_direction="both") 

24 

25 if series.isna().any(): 

26 raise ValueError(f"NaN values are still present in the series after filling them with the method {method}\n{series}") 

27 return series 

28 

29 

30def _set_mean_values(series: pd.Series) -> pd.Series: 

31 """ Fills intervals including the nan with the mean of the following values. """ 

32 def _get_intervals_for_mean(s: pd.Series) -> list[pd.Interval]: 

33 intervals = [] 

34 start = None 

35 for index, value in s.items(): 

36 if pd.isna(value): 

37 if pd.isna(start): 

38 start = index 

39 else: 

40 end = index 

41 intervals.append(pd.Interval(left=start, right=end, closed="left")) 

42 start = end 

43 return intervals 

44 

45 for interval in _get_intervals_for_mean(series): 

46 interval_index = (interval.left <= series.index) & (series.index < interval.right) 

47 series[interval.left] = series[interval_index].mean(skipna=True) 

48 

49 # remove last entry if nan, e.g. with collocation 

50 if pd.isna(series.iloc[-1]): 

51 series = series.iloc[:-1] 

52 

53 return series 

54 

55 

56def strip_multi_index(series: pd.Series) -> pd.Series: 

57 # Convert the index (communicated as string) into a MultiIndex 

58 if isinstance(series.index[0], str): 

59 series.index = series.index.map(lambda x: eval(x)) 

60 series.index = pd.MultiIndex.from_tuples(series.index) 

61 # vals is multicolumn so get rid of first value (start time of predictions) 

62 series.index = series.index.get_level_values(1).astype(float) 

63 return series 

64 

65 

66def convert_timescale_of_index(df: pd.DataFrame, from_unit: TimeConversionTypes, to_unit: TIME_CONVERSION) -> pd.DataFrame: 

67 """ Convert the timescale of a dataframe index (from seconds) to the given time unit 

68 

69 Keyword arguments: 

70 results -- The dictionary of the results with the dataframes 

71 time_unit -- The time unit to convert the index to 

72 """ 

73 time_conversion_factor = TIME_CONVERSION[from_unit] / TIME_CONVERSION[to_unit] 

74 if isinstance(df.index, pd.MultiIndex): 

75 df.index = pd.MultiIndex.from_arrays( 

76 [df.index.get_level_values(level) * time_conversion_factor for level in range(df.index.nlevels)] 

77 ) 

78 else: 

79 df.index = df.index * time_conversion_factor 

80 return df