Coverage for addmo/util/load_save.py: 66%

44 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2025-08-31 13:05 +0000

1import csv 

2 

3import pandas as pd 

4import json 

5import datetime 

6from pathlib import Path 

7from pydantic import FilePath, BaseModel 

8from typing import Type, TypeVar, Union 

9import datetime 

10ConfigT = TypeVar("ConfigT", bound=BaseModel) 

11 

12 

13def load_config_from_json( 

14 config: Union[ConfigT, FilePath, str, dict], config_type: Type[ConfigT] 

15) -> ConfigT: 

16 """ 

17 Generic config loader, either accepting a path to a json file, a json string, a 

18 dict or passing through a valid config object. 

19 """ 

20 

21 if isinstance(config, (str, Path)): 

22 # if we have a str / path, we need to check whether it is a file or a json string 

23 if Path(config).is_file(): 

24 # if we have a valid file pointer, we load it 

25 with open(config, "r") as f: 

26 config = json.load(f) 

27 else: 

28 # since the str is not a file path, we assume it is json and try to load it 

29 try: 

30 config = json.loads(config) 

31 except json.JSONDecodeError as e: 

32 # if we failed, we raise an error notifying the user of possibilities 

33 raise TypeError( 

34 f"The config '{config:.100}' is neither an existing file path, nor a " 

35 f"valid json document." 

36 ) from e 

37 return config_type.model_validate(config) 

38 

39 

40def save_config_to_json(config: ConfigT, path: str): 

41 """ 

42 Save the config to a json file. 

43 """ 

44 config_json = config.model_dump_json(indent=4) 

45 with open(path, "w") as f: 

46 f.write(config_json) 

47 

48 

49def load_data(abs_path: str, origin: datetime.datetime = datetime.datetime(2019, 1, 1), 

50 fmt: str = "%Y-%m-%d %H:%M:%S") -> pd.DataFrame: 

51 """ 

52 Load data from absolute file path. 

53 """ 

54 

55 if abs_path.endswith(".csv"): 

56 # Read the CSV file 

57 df = pd.read_csv(abs_path, delimiter=csv.Sniffer().sniff(open(abs_path).read(1024), delimiters=";,").delimiter, index_col=0, encoding="latin1", header=0) 

58 elif abs_path.endswith(".xlsx"): 

59 df = pd.read_excel(abs_path, index_col=0, header=0) 

60 else: 

61 raise ValueError("Unsupported file format: must be .csv or .xlsx") 

62 

63 # Convert the index to datetime 

64 if not pd.api.types.is_datetime64_any_dtype(df.index): 

65 try: 

66 df.index = pd.to_datetime(df.index, format=fmt) 

67 except (ValueError, TypeError): 

68 try: 

69 secs = pd.to_numeric(df.index, errors="coerce") 

70 df.index = pd.to_datetime(secs, unit="s", origin=origin) 

71 except Exception: 

72 # Final fallback: simple range-based datetime index 

73 df.index = pd.date_range(start=origin, periods=len(df), freq="D") 

74 return df 

75 

76 

77def write_data(df: pd.DataFrame, abs_path: str): 

78 """ 

79 Write data to absolute file path. 

80 """ 

81 if abs_path.endswith(".csv"): 

82 # Write the CSV file 

83 df.to_csv(abs_path, sep=";", encoding="latin1") 

84 elif abs_path.endswith(".xlsx"): 

85 # Write the Excel file 

86 df.to_excel(abs_path) 

87 

88