timeseries_shaper.stats.boolean_stats
1import pandas as pd 2from typing import Dict, Union 3from ..base import Base 4 5class BooleanStatistics(Base): 6 """ 7 Provides class methods to calculate statistics on a boolean column in a pandas DataFrame. 8 """ 9 10 @classmethod 11 def count_true(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int: 12 """Returns the count of True values in the boolean column.""" 13 return dataframe[column_name].sum() 14 15 @classmethod 16 def count_false(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int: 17 """Returns the count of False values in the boolean column.""" 18 return (dataframe[column_name] == False).sum() 19 20 @classmethod 21 def count_null(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int: 22 """Returns the count of null (NaN) values in the boolean column.""" 23 return dataframe[column_name].isna().sum() 24 25 @classmethod 26 def count_not_null(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int: 27 """Returns the count of non-null (True or False) values in the boolean column.""" 28 return dataframe[column_name].notna().sum() 29 30 @classmethod 31 def true_percentage(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> float: 32 """Returns the percentage of True values in the boolean column.""" 33 true_count = cls.count_true(dataframe, column_name) 34 total_count = cls.count_not_null(dataframe, column_name) 35 return (true_count / total_count) * 100 if total_count > 0 else 0.0 36 37 @classmethod 38 def false_percentage(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> float: 39 """Returns the percentage of False values in the boolean column.""" 40 false_count = cls.count_false(dataframe, column_name) 41 total_count = cls.count_not_null(dataframe, column_name) 42 return (false_count / total_count) * 100 if total_count > 0 else 0.0 43 44 @classmethod 45 def mode(cls, dataframe: pd.DataFrame, column_name: str) -> bool: 46 """Returns the mode (most common value) of the specified boolean column.""" 47 return dataframe[column_name].mode()[0] 48 49 @classmethod 50 def is_balanced(cls, dataframe: pd.DataFrame, column_name: str) -> bool: 51 """Indicates if the distribution is balanced (50% True and False) in the specified boolean column.""" 52 true_percentage = dataframe[column_name].mean() 53 return true_percentage == 0.5 54 55 @classmethod 56 def summary_as_dict(cls, dataframe: pd.DataFrame, column_name: str) -> Dict[str, Union[int, float, bool]]: 57 """Returns a summary of boolean statistics for the specified column as a dictionary.""" 58 return { 59 'true_count': cls.count_true(dataframe, column_name), 60 'false_count': cls.count_false(dataframe, column_name), 61 'true_percentage': cls.true_percentage(dataframe, column_name), 62 'false_percentage': cls.false_percentage(dataframe, column_name), 63 'mode': cls.mode(dataframe, column_name), 64 'is_balanced': cls.is_balanced(dataframe, column_name) 65 } 66 67 @classmethod 68 def summary_as_dataframe(cls, dataframe: pd.DataFrame, column_name: str) -> pd.DataFrame: 69 """Returns a summary of boolean statistics for the specified column as a DataFrame.""" 70 summary_data = cls.summary_as_dict(dataframe, column_name) 71 return pd.DataFrame([summary_data])
6class BooleanStatistics(Base): 7 """ 8 Provides class methods to calculate statistics on a boolean column in a pandas DataFrame. 9 """ 10 11 @classmethod 12 def count_true(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int: 13 """Returns the count of True values in the boolean column.""" 14 return dataframe[column_name].sum() 15 16 @classmethod 17 def count_false(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int: 18 """Returns the count of False values in the boolean column.""" 19 return (dataframe[column_name] == False).sum() 20 21 @classmethod 22 def count_null(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int: 23 """Returns the count of null (NaN) values in the boolean column.""" 24 return dataframe[column_name].isna().sum() 25 26 @classmethod 27 def count_not_null(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int: 28 """Returns the count of non-null (True or False) values in the boolean column.""" 29 return dataframe[column_name].notna().sum() 30 31 @classmethod 32 def true_percentage(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> float: 33 """Returns the percentage of True values in the boolean column.""" 34 true_count = cls.count_true(dataframe, column_name) 35 total_count = cls.count_not_null(dataframe, column_name) 36 return (true_count / total_count) * 100 if total_count > 0 else 0.0 37 38 @classmethod 39 def false_percentage(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> float: 40 """Returns the percentage of False values in the boolean column.""" 41 false_count = cls.count_false(dataframe, column_name) 42 total_count = cls.count_not_null(dataframe, column_name) 43 return (false_count / total_count) * 100 if total_count > 0 else 0.0 44 45 @classmethod 46 def mode(cls, dataframe: pd.DataFrame, column_name: str) -> bool: 47 """Returns the mode (most common value) of the specified boolean column.""" 48 return dataframe[column_name].mode()[0] 49 50 @classmethod 51 def is_balanced(cls, dataframe: pd.DataFrame, column_name: str) -> bool: 52 """Indicates if the distribution is balanced (50% True and False) in the specified boolean column.""" 53 true_percentage = dataframe[column_name].mean() 54 return true_percentage == 0.5 55 56 @classmethod 57 def summary_as_dict(cls, dataframe: pd.DataFrame, column_name: str) -> Dict[str, Union[int, float, bool]]: 58 """Returns a summary of boolean statistics for the specified column as a dictionary.""" 59 return { 60 'true_count': cls.count_true(dataframe, column_name), 61 'false_count': cls.count_false(dataframe, column_name), 62 'true_percentage': cls.true_percentage(dataframe, column_name), 63 'false_percentage': cls.false_percentage(dataframe, column_name), 64 'mode': cls.mode(dataframe, column_name), 65 'is_balanced': cls.is_balanced(dataframe, column_name) 66 } 67 68 @classmethod 69 def summary_as_dataframe(cls, dataframe: pd.DataFrame, column_name: str) -> pd.DataFrame: 70 """Returns a summary of boolean statistics for the specified column as a DataFrame.""" 71 summary_data = cls.summary_as_dict(dataframe, column_name) 72 return pd.DataFrame([summary_data])
Provides class methods to calculate statistics on a boolean column in a pandas DataFrame.
11 @classmethod 12 def count_true(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int: 13 """Returns the count of True values in the boolean column.""" 14 return dataframe[column_name].sum()
Returns the count of True values in the boolean column.
16 @classmethod 17 def count_false(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int: 18 """Returns the count of False values in the boolean column.""" 19 return (dataframe[column_name] == False).sum()
Returns the count of False values in the boolean column.
21 @classmethod 22 def count_null(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int: 23 """Returns the count of null (NaN) values in the boolean column.""" 24 return dataframe[column_name].isna().sum()
Returns the count of null (NaN) values in the boolean column.
26 @classmethod 27 def count_not_null(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int: 28 """Returns the count of non-null (True or False) values in the boolean column.""" 29 return dataframe[column_name].notna().sum()
Returns the count of non-null (True or False) values in the boolean column.
31 @classmethod 32 def true_percentage(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> float: 33 """Returns the percentage of True values in the boolean column.""" 34 true_count = cls.count_true(dataframe, column_name) 35 total_count = cls.count_not_null(dataframe, column_name) 36 return (true_count / total_count) * 100 if total_count > 0 else 0.0
Returns the percentage of True values in the boolean column.
38 @classmethod 39 def false_percentage(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> float: 40 """Returns the percentage of False values in the boolean column.""" 41 false_count = cls.count_false(dataframe, column_name) 42 total_count = cls.count_not_null(dataframe, column_name) 43 return (false_count / total_count) * 100 if total_count > 0 else 0.0
Returns the percentage of False values in the boolean column.
45 @classmethod 46 def mode(cls, dataframe: pd.DataFrame, column_name: str) -> bool: 47 """Returns the mode (most common value) of the specified boolean column.""" 48 return dataframe[column_name].mode()[0]
Returns the mode (most common value) of the specified boolean column.
50 @classmethod 51 def is_balanced(cls, dataframe: pd.DataFrame, column_name: str) -> bool: 52 """Indicates if the distribution is balanced (50% True and False) in the specified boolean column.""" 53 true_percentage = dataframe[column_name].mean() 54 return true_percentage == 0.5
Indicates if the distribution is balanced (50% True and False) in the specified boolean column.
56 @classmethod 57 def summary_as_dict(cls, dataframe: pd.DataFrame, column_name: str) -> Dict[str, Union[int, float, bool]]: 58 """Returns a summary of boolean statistics for the specified column as a dictionary.""" 59 return { 60 'true_count': cls.count_true(dataframe, column_name), 61 'false_count': cls.count_false(dataframe, column_name), 62 'true_percentage': cls.true_percentage(dataframe, column_name), 63 'false_percentage': cls.false_percentage(dataframe, column_name), 64 'mode': cls.mode(dataframe, column_name), 65 'is_balanced': cls.is_balanced(dataframe, column_name) 66 }
Returns a summary of boolean statistics for the specified column as a dictionary.
68 @classmethod 69 def summary_as_dataframe(cls, dataframe: pd.DataFrame, column_name: str) -> pd.DataFrame: 70 """Returns a summary of boolean statistics for the specified column as a DataFrame.""" 71 summary_data = cls.summary_as_dict(dataframe, column_name) 72 return pd.DataFrame([summary_data])
Returns a summary of boolean statistics for the specified column as a DataFrame.
Inherited Members
- timeseries_shaper.base.Base
- Base
- dataframe
- get_dataframe