timeseries_shaper.stats.boolean_stats

 1import pandas as pd
 2from typing import Dict, Union
 3from ..base import Base
 4
 5class BooleanStatistics(Base):
 6    """
 7    Provides class methods to calculate statistics on a boolean column in a pandas DataFrame.
 8    """
 9
10    @classmethod
11    def count_true(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int:
12        """Returns the count of True values in the boolean column."""
13        return dataframe[column_name].sum()
14
15    @classmethod
16    def count_false(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int:
17        """Returns the count of False values in the boolean column."""
18        return (dataframe[column_name] == False).sum()
19
20    @classmethod
21    def count_null(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int:
22        """Returns the count of null (NaN) values in the boolean column."""
23        return dataframe[column_name].isna().sum()
24
25    @classmethod
26    def count_not_null(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int:
27        """Returns the count of non-null (True or False) values in the boolean column."""
28        return dataframe[column_name].notna().sum()
29
30    @classmethod
31    def true_percentage(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> float:
32        """Returns the percentage of True values in the boolean column."""
33        true_count = cls.count_true(dataframe, column_name)
34        total_count = cls.count_not_null(dataframe, column_name)
35        return (true_count / total_count) * 100 if total_count > 0 else 0.0
36
37    @classmethod
38    def false_percentage(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> float:
39        """Returns the percentage of False values in the boolean column."""
40        false_count = cls.count_false(dataframe, column_name)
41        total_count = cls.count_not_null(dataframe, column_name)
42        return (false_count / total_count) * 100 if total_count > 0 else 0.0
43
44    @classmethod
45    def mode(cls, dataframe: pd.DataFrame, column_name: str) -> bool:
46        """Returns the mode (most common value) of the specified boolean column."""
47        return dataframe[column_name].mode()[0]
48
49    @classmethod
50    def is_balanced(cls, dataframe: pd.DataFrame, column_name: str) -> bool:
51        """Indicates if the distribution is balanced (50% True and False) in the specified boolean column."""
52        true_percentage = dataframe[column_name].mean()
53        return true_percentage == 0.5
54
55    @classmethod
56    def summary_as_dict(cls, dataframe: pd.DataFrame, column_name: str) -> Dict[str, Union[int, float, bool]]:
57        """Returns a summary of boolean statistics for the specified column as a dictionary."""
58        return {
59            'true_count': cls.count_true(dataframe, column_name),
60            'false_count': cls.count_false(dataframe, column_name),
61            'true_percentage': cls.true_percentage(dataframe, column_name),
62            'false_percentage': cls.false_percentage(dataframe, column_name),
63            'mode': cls.mode(dataframe, column_name),
64            'is_balanced': cls.is_balanced(dataframe, column_name)
65        }
66
67    @classmethod
68    def summary_as_dataframe(cls, dataframe: pd.DataFrame, column_name: str) -> pd.DataFrame:
69        """Returns a summary of boolean statistics for the specified column as a DataFrame."""
70        summary_data = cls.summary_as_dict(dataframe, column_name)
71        return pd.DataFrame([summary_data])
class BooleanStatistics(timeseries_shaper.base.Base):
 6class BooleanStatistics(Base):
 7    """
 8    Provides class methods to calculate statistics on a boolean column in a pandas DataFrame.
 9    """
10
11    @classmethod
12    def count_true(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int:
13        """Returns the count of True values in the boolean column."""
14        return dataframe[column_name].sum()
15
16    @classmethod
17    def count_false(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int:
18        """Returns the count of False values in the boolean column."""
19        return (dataframe[column_name] == False).sum()
20
21    @classmethod
22    def count_null(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int:
23        """Returns the count of null (NaN) values in the boolean column."""
24        return dataframe[column_name].isna().sum()
25
26    @classmethod
27    def count_not_null(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int:
28        """Returns the count of non-null (True or False) values in the boolean column."""
29        return dataframe[column_name].notna().sum()
30
31    @classmethod
32    def true_percentage(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> float:
33        """Returns the percentage of True values in the boolean column."""
34        true_count = cls.count_true(dataframe, column_name)
35        total_count = cls.count_not_null(dataframe, column_name)
36        return (true_count / total_count) * 100 if total_count > 0 else 0.0
37
38    @classmethod
39    def false_percentage(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> float:
40        """Returns the percentage of False values in the boolean column."""
41        false_count = cls.count_false(dataframe, column_name)
42        total_count = cls.count_not_null(dataframe, column_name)
43        return (false_count / total_count) * 100 if total_count > 0 else 0.0
44
45    @classmethod
46    def mode(cls, dataframe: pd.DataFrame, column_name: str) -> bool:
47        """Returns the mode (most common value) of the specified boolean column."""
48        return dataframe[column_name].mode()[0]
49
50    @classmethod
51    def is_balanced(cls, dataframe: pd.DataFrame, column_name: str) -> bool:
52        """Indicates if the distribution is balanced (50% True and False) in the specified boolean column."""
53        true_percentage = dataframe[column_name].mean()
54        return true_percentage == 0.5
55
56    @classmethod
57    def summary_as_dict(cls, dataframe: pd.DataFrame, column_name: str) -> Dict[str, Union[int, float, bool]]:
58        """Returns a summary of boolean statistics for the specified column as a dictionary."""
59        return {
60            'true_count': cls.count_true(dataframe, column_name),
61            'false_count': cls.count_false(dataframe, column_name),
62            'true_percentage': cls.true_percentage(dataframe, column_name),
63            'false_percentage': cls.false_percentage(dataframe, column_name),
64            'mode': cls.mode(dataframe, column_name),
65            'is_balanced': cls.is_balanced(dataframe, column_name)
66        }
67
68    @classmethod
69    def summary_as_dataframe(cls, dataframe: pd.DataFrame, column_name: str) -> pd.DataFrame:
70        """Returns a summary of boolean statistics for the specified column as a DataFrame."""
71        summary_data = cls.summary_as_dict(dataframe, column_name)
72        return pd.DataFrame([summary_data])

Provides class methods to calculate statistics on a boolean column in a pandas DataFrame.

@classmethod
def count_true( cls, dataframe: pandas.core.frame.DataFrame, column_name: str = 'value_bool') -> int:
11    @classmethod
12    def count_true(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int:
13        """Returns the count of True values in the boolean column."""
14        return dataframe[column_name].sum()

Returns the count of True values in the boolean column.

@classmethod
def count_false( cls, dataframe: pandas.core.frame.DataFrame, column_name: str = 'value_bool') -> int:
16    @classmethod
17    def count_false(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int:
18        """Returns the count of False values in the boolean column."""
19        return (dataframe[column_name] == False).sum()

Returns the count of False values in the boolean column.

@classmethod
def count_null( cls, dataframe: pandas.core.frame.DataFrame, column_name: str = 'value_bool') -> int:
21    @classmethod
22    def count_null(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int:
23        """Returns the count of null (NaN) values in the boolean column."""
24        return dataframe[column_name].isna().sum()

Returns the count of null (NaN) values in the boolean column.

@classmethod
def count_not_null( cls, dataframe: pandas.core.frame.DataFrame, column_name: str = 'value_bool') -> int:
26    @classmethod
27    def count_not_null(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int:
28        """Returns the count of non-null (True or False) values in the boolean column."""
29        return dataframe[column_name].notna().sum()

Returns the count of non-null (True or False) values in the boolean column.

@classmethod
def true_percentage( cls, dataframe: pandas.core.frame.DataFrame, column_name: str = 'value_bool') -> float:
31    @classmethod
32    def true_percentage(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> float:
33        """Returns the percentage of True values in the boolean column."""
34        true_count = cls.count_true(dataframe, column_name)
35        total_count = cls.count_not_null(dataframe, column_name)
36        return (true_count / total_count) * 100 if total_count > 0 else 0.0

Returns the percentage of True values in the boolean column.

@classmethod
def false_percentage( cls, dataframe: pandas.core.frame.DataFrame, column_name: str = 'value_bool') -> float:
38    @classmethod
39    def false_percentage(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> float:
40        """Returns the percentage of False values in the boolean column."""
41        false_count = cls.count_false(dataframe, column_name)
42        total_count = cls.count_not_null(dataframe, column_name)
43        return (false_count / total_count) * 100 if total_count > 0 else 0.0

Returns the percentage of False values in the boolean column.

@classmethod
def mode(cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> bool:
45    @classmethod
46    def mode(cls, dataframe: pd.DataFrame, column_name: str) -> bool:
47        """Returns the mode (most common value) of the specified boolean column."""
48        return dataframe[column_name].mode()[0]

Returns the mode (most common value) of the specified boolean column.

@classmethod
def is_balanced(cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> bool:
50    @classmethod
51    def is_balanced(cls, dataframe: pd.DataFrame, column_name: str) -> bool:
52        """Indicates if the distribution is balanced (50% True and False) in the specified boolean column."""
53        true_percentage = dataframe[column_name].mean()
54        return true_percentage == 0.5

Indicates if the distribution is balanced (50% True and False) in the specified boolean column.

@classmethod
def summary_as_dict( cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> Dict[str, Union[int, float, bool]]:
56    @classmethod
57    def summary_as_dict(cls, dataframe: pd.DataFrame, column_name: str) -> Dict[str, Union[int, float, bool]]:
58        """Returns a summary of boolean statistics for the specified column as a dictionary."""
59        return {
60            'true_count': cls.count_true(dataframe, column_name),
61            'false_count': cls.count_false(dataframe, column_name),
62            'true_percentage': cls.true_percentage(dataframe, column_name),
63            'false_percentage': cls.false_percentage(dataframe, column_name),
64            'mode': cls.mode(dataframe, column_name),
65            'is_balanced': cls.is_balanced(dataframe, column_name)
66        }

Returns a summary of boolean statistics for the specified column as a dictionary.

@classmethod
def summary_as_dataframe( cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> pandas.core.frame.DataFrame:
68    @classmethod
69    def summary_as_dataframe(cls, dataframe: pd.DataFrame, column_name: str) -> pd.DataFrame:
70        """Returns a summary of boolean statistics for the specified column as a DataFrame."""
71        summary_data = cls.summary_as_dict(dataframe, column_name)
72        return pd.DataFrame([summary_data])

Returns a summary of boolean statistics for the specified column as a DataFrame.

Inherited Members
timeseries_shaper.base.Base
Base
dataframe
get_dataframe