Coverage for aixweather/transformation_functions/auxiliary.py: 98%
41 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-01-06 16:01 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-01-06 16:01 +0000
1"""
2includes auxiliary functions for data handling and transformation
3"""
4import logging
5import pandas as pd
6import numpy as np
8logger = logging.getLogger(__name__)
11def force_data_variable_convention(
12 df: pd.DataFrame, format_desired: dict
13) -> pd.DataFrame:
14 """
15 Ensure that all and only desired variable names are present and in correct order.
17 Args:
18 df (pd.DataFrame): The DataFrame containing the data to be formatted.
19 format_desired (dict): A dictionary specifying the desired format with variable names as keys.
21 Returns:
22 pd.DataFrame: A DataFrame with filtered data in the desired format and order.
23 """
25 # filter existing df
26 desired_var_names = set(format_desired.keys())
27 df_core_var = df.loc[:, df.columns.isin(desired_var_names)]
29 # Reindex the DataFrame to ensure all required columns exist and obey the order of columns
30 df_core_var = df_core_var.reindex(columns=format_desired.keys())
32 return df_core_var
35def rename_columns(df: pd.DataFrame, format_dict: dict) -> pd.DataFrame:
36 """
37 Rename DataFrame columns based on the provided format dictionary.
39 Args:
40 df (pd.DataFrame): The DataFrame whose columns need to be renamed.
41 format_dict (dict): A dictionary specifying the column renaming mapping,
42 with current column names as keys and desired names as values.
44 Returns:
45 pd.DataFrame: A DataFrame with renamed columns.
46 """
47 rename_map = {key: val["core_name"] for key, val in format_dict.items()}
48 return df.rename(columns=rename_map)
51def fill_nan_from_format_dict(df: pd.DataFrame, format_data: dict) -> pd.DataFrame:
52 """
53 Fill NaN values in a DataFrame based on the provided format data.
55 Args:
56 df (pd.DataFrame): The DataFrame containing the data to be processed.
57 format_data (dict): A dictionary specifying NaN replacement values for columns,
58 with column names as keys and NaN replacement values as values.
60 Returns:
61 pd.DataFrame: A DataFrame with NaN values filled as per the format data.
62 """
63 nan_key = "nan"
64 for key, value in format_data.items():
65 nan = value[nan_key]
66 if nan is not None:
67 df[key].fillna(nan, inplace=True)
68 return df
71def replace_dummy_with_nan(df: pd.DataFrame, format_dict: dict) -> pd.DataFrame:
72 """
73 Replace specific values in the DataFrame with NaN based on the given format dictionary.
74 Reason: sometimes, e.g. the DWD, specifies a missing value with a dummy value like e.g. 99,
75 which makes it hard to see where missing values are and might affect the simulation.
77 Args:
78 df (pd.DataFrame): The DataFrame to be processed.
79 format_dict (dict): A dictionary specifying values to be replaced with NaN,
80 with column names as keys and corresponding dummy values as values.
82 Returns:
83 pd.DataFrame: A DataFrame with specified values replaced by NaN.
84 """
86 for key, value in format_dict.items():
87 if "nan" in value and key in df.columns:
88 nan_values = value["nan"]
89 if not isinstance(nan_values, list):
90 nan_values = [nan_values]
91 for nan_val in nan_values:
92 df[key] = df[key].replace(nan_val, np.nan)
93 return df
96def evaluate_transformations(core_format: dict, other_format: dict):
97 """
98 Compare the units and core variables of two formats and print any required unit transformations.
100 Args:
101 core_format (dict): A dictionary representing the core format with keys as variable names and values
102 containing unit information.
103 other_format (dict): A dictionary representing another format to be compared with the core format.
104 It contains keys and values with 'core_name' and 'unit' attributes.
106 Raises:
107 ValueError: If a core variable in other_format doesn't match the core variable format.
108 """
110 logger.debug("Evaluate format.")
111 for key, value in other_format.items():
112 if value["core_name"] in core_format.keys():
113 # compare units
114 if value["unit"] != core_format[value["core_name"]]["unit"]:
115 logger.debug(
116 "Unit transformation required for %s from %s to %s.",
117 value['core_name'], value['unit'],
118 core_format[value['core_name']]['unit']
119 )
120 elif not value["core_name"]:
121 pass
122 else:
123 raise ValueError(
124 f"The core variable '{value['core_name']}' of import variable"
125 f" {key} does not fit the core variable format"
126 )
129def select_entry_by_core_name(format_dict: dict, core_name_to_match: str):
130 """
131 Select an entry from a format dictionary based on the specified core name.
133 Args:
134 format_dict (dict): A dictionary to search for the entry.
135 core_name_to_match (str): The core name to match in the dictionary values.
137 Returns:
138 dict: The dictionary entry matching the specified core name, or None if not found.
139 """
140 for key, value in format_dict.items():
141 if value["core_name"] == core_name_to_match:
142 return value