Coverage for aixweather/transformation_to_core_data/custom_file.py: 30%

23 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2025-01-06 16:01 +0000

1""" 

2Change this file to your custom requirements. See the info file in the same 

3directory for requirements of the returned df. 

4""" 

5 

6import pandas as pd 

7 

8from aixweather import definitions 

9from aixweather.imports.utils_import import MetaData 

10from aixweather.transformation_functions import auxiliary, time_observation_transformations, variable_transformations, \ 

11 pass_through_handling, unit_conversions 

12 

13''' 

14Format info: 

15key = raw data point name 

16core_name = corresponding name matching the format_core_data 

17time_of_meas_shift = desired 30min shifting+interpolation to convert a value that is e.g. the  

18"average of preceding hour" to "indicated time" (prec2ind).  

19unit = unit of the raw data following the naming convention of format_core_data 

20''' 

21 

22format_costum = { 

23 "variable_name_from_your_costum_data": { 

24 "core_name": "core_name to which it translates (definitions.format_core_data)", 

25 "time_of_meas_shift": "define if the variable needs to be shifted", 

26 "unit": "see definitions.format_core_data for naming", 

27 }, 

28} 

29 

30 

31def custom_to_core_data(df_import: pd.DataFrame, meta: MetaData) -> pd.DataFrame: 

32 """ 

33 Converts custom data to core_data 

34 """ 

35 ### evaluate correctness of format 

36 auxiliary.evaluate_transformations( 

37 core_format=definitions.format_core_data, other_format=format_costum 

38 ) 

39 

40 ### preprocessing raw data for further operations 

41 df = df_import.copy() 

42 # Resample the DataFrame to make the DatetimeIndex complete and monotonic 

43 df = df.resample('h').asfreq() 

44 # rename available variables to core data format 

45 df = auxiliary.rename_columns(df, format_costum) 

46 

47 ### convert timezone to UTC+0 -> change periods accordingly 

48 df = df.shift(periods=0, freq="h", axis=0) 

49 

50 ### shift and interpolate data forward 30mins or backward -30mins 

51 df_no_shift = df.copy() 

52 df = time_observation_transformations.shift_time_by_dict( 

53 format_costum, df 

54 ) 

55 

56 def transform_custom(df): 

57 # drop unnecessary variables 

58 df = auxiliary.force_data_variable_convention(df, definitions.format_core_data) 

59 

60 ### convert units 

61 # insert unit conversions like desired (examples follow) 

62 df["TotalSkyCover"] = unit_conversions.eigth_to_tenth(df["TotalSkyCover"]) 

63 df["AtmPressure"] = unit_conversions.hPa_to_Pa(df["AtmPressure"]) 

64 

65 ### impute missing variables from other available ones 

66 # add additionaly required transformations to 

67 # variable_transformations.variable_transform_all or apply them directly here 

68 df, calc_overview = variable_transformations.variable_transform_all(df, meta) 

69 return df, calc_overview 

70 

71 df, meta.executed_transformations = transform_custom(df) 

72 

73 ### add unshifted data for possible later direct use (pass-through), 

74 ### to avoid back and forth interpolating 

75 df = pass_through_handling.create_pass_through_variables( 

76 df_shifted=df, 

77 df_no_shift=df_no_shift, 

78 format=format_costum, 

79 transform_func=transform_custom, 

80 meta=meta, 

81 ) 

82 

83 return df