Coverage for aixweather/data_quality_checks.py: 0%

22 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2025-01-06 16:01 +0000

1""" 

2This module includes functions for analyzing and visualizing missing values. 

3""" 

4 

5import matplotlib.pyplot as plt 

6 

7import seaborn as sns 

8 

9 

10def plot_heatmap_missing_values(df): 

11 """ 

12 Generate a heatmap to visualize missing values in a DataFrame. 

13 

14 Args: 

15 df (pd.DataFrame): The DataFrame to be analyzed for missing values. 

16 

17 Returns: 

18 plt: A Matplotlib figure representing the heatmap of missing values. 

19 """ 

20 

21 # define resolution depending on the length of the data set 

22 if len(df) <= (24 * 60): 

23 resolution = "D" 

24 res_name = "daily" 

25 elif len(df) <= (24 * 7 * 60): 

26 resolution = "W" 

27 res_name = "weekly" 

28 else: 

29 resolution = "M" 

30 res_name = "monthly" 

31 

32 # Group by resolution and check for missing values in each period 

33 missing_data = df.resample(resolution).apply(lambda x: x.isnull().mean()) 

34 

35 # Determine the number rows to plot 

36 num_rows = missing_data.shape[0] 

37 

38 # Set the height of the figure based on the number of rows, and a fixed width 

39 plt.figure(figsize=(14, num_rows * 0.15 + 3)) 

40 

41 sns.heatmap( 

42 missing_data, 

43 cmap="Greens_r", 

44 cbar=True, 

45 yticklabels=False # Remove y-axis labels 

46 ) 

47 

48 # Set y-tick labels to represent each period 

49 plt.yticks(range(num_rows), missing_data.index.date, rotation=0) 

50 

51 plt.title("Heatmap of data availability\n" 

52 "From white (100% data missing) to dark green (0% data missing)\n" 

53 f"Bucket size = {res_name}") 

54 plt.tight_layout() 

55 

56 return plt 

57 

58 

59def print_df_info(df): 

60 """ 

61 prints df info for intermediate checks or debugging 

62 """ 

63 info = df.info() 

64 print(info)