Coverage for aixweather/data_quality_checks.py: 0%
22 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-01-06 16:01 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-01-06 16:01 +0000
1"""
2This module includes functions for analyzing and visualizing missing values.
3"""
5import matplotlib.pyplot as plt
7import seaborn as sns
10def plot_heatmap_missing_values(df):
11 """
12 Generate a heatmap to visualize missing values in a DataFrame.
14 Args:
15 df (pd.DataFrame): The DataFrame to be analyzed for missing values.
17 Returns:
18 plt: A Matplotlib figure representing the heatmap of missing values.
19 """
21 # define resolution depending on the length of the data set
22 if len(df) <= (24 * 60):
23 resolution = "D"
24 res_name = "daily"
25 elif len(df) <= (24 * 7 * 60):
26 resolution = "W"
27 res_name = "weekly"
28 else:
29 resolution = "M"
30 res_name = "monthly"
32 # Group by resolution and check for missing values in each period
33 missing_data = df.resample(resolution).apply(lambda x: x.isnull().mean())
35 # Determine the number rows to plot
36 num_rows = missing_data.shape[0]
38 # Set the height of the figure based on the number of rows, and a fixed width
39 plt.figure(figsize=(14, num_rows * 0.15 + 3))
41 sns.heatmap(
42 missing_data,
43 cmap="Greens_r",
44 cbar=True,
45 yticklabels=False # Remove y-axis labels
46 )
48 # Set y-tick labels to represent each period
49 plt.yticks(range(num_rows), missing_data.index.date, rotation=0)
51 plt.title("Heatmap of data availability\n"
52 "From white (100% data missing) to dark green (0% data missing)\n"
53 f"Bucket size = {res_name}")
54 plt.tight_layout()
56 return plt
59def print_df_info(df):
60 """
61 prints df info for intermediate checks or debugging
62 """
63 info = df.info()
64 print(info)