Skip to content

ts_shape.features.stats.boolean_stats ¤

Classes:

  • BooleanStatistics

    Provides class methods to calculate statistics on a boolean column in a pandas DataFrame.

BooleanStatistics ¤

BooleanStatistics(dataframe: DataFrame, column_name: str = 'systime')

Bases: Base

Provides class methods to calculate statistics on a boolean column in a pandas DataFrame.

Parameters:

  • dataframe ¤

    (DataFrame) –

    The DataFrame to be processed.

  • column_name ¤

    (str, default: 'systime' ) –

    The column to sort by. Default is 'systime'. If the column is not found or is not a time column, the class will attempt to detect other time columns.

Methods:

  • count_false

    Returns the count of False values in the boolean column.

  • count_not_null

    Returns the count of non-null (True or False) values in the boolean column.

  • count_null

    Returns the count of null (NaN) values in the boolean column.

  • count_true

    Returns the count of True values in the boolean column.

  • false_percentage

    Returns the percentage of False values in the boolean column.

  • get_dataframe

    Returns the processed DataFrame.

  • is_balanced

    Indicates if the distribution is balanced (50% True and False) in the specified boolean column.

  • mode

    Returns the mode (most common value) of the specified boolean column.

  • summary_as_dataframe

    Returns a summary of boolean statistics for the specified column as a DataFrame.

  • summary_as_dict

    Returns a summary of boolean statistics for the specified column as a dictionary.

  • true_percentage

    Returns the percentage of True values in the boolean column.

Source code in src/ts_shape/utils/base.py
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def __init__(self, dataframe: pd.DataFrame, column_name: str = 'systime') -> pd.DataFrame:
    """
    Initializes the Base with a DataFrame, detects time columns, converts them to datetime,
    and sorts the DataFrame by the specified column (or the detected time column if applicable).

    Args:
        dataframe (pd.DataFrame): The DataFrame to be processed.
        column_name (str): The column to sort by. Default is 'systime'. If the column is not found or is not a time column, the class will attempt to detect other time columns.
    """
    self.dataframe = dataframe.copy()

    # Attempt to convert the specified column_name to datetime if it exists
    if column_name in self.dataframe.columns:
        self.dataframe[column_name] = pd.to_datetime(self.dataframe[column_name], errors='coerce')
    else:
        # If the column_name is not in the DataFrame, fallback to automatic time detection
        time_columns = [col for col in self.dataframe.columns if 'time' in col.lower() or 'date' in col.lower()]

        # Convert all detected time columns to datetime, if any
        for col in time_columns:
            self.dataframe[col] = pd.to_datetime(self.dataframe[col], errors='coerce')

        # If any time columns are detected, sort by the first one; otherwise, do nothing
        if time_columns:
            column_name = time_columns[0]

    # Sort by the datetime column (either specified or detected)
    if column_name in self.dataframe.columns:
        self.dataframe = self.dataframe.sort_values(by=column_name)

count_false classmethod ¤

count_false(dataframe: DataFrame, column_name: str = 'value_bool') -> int

Returns the count of False values in the boolean column.

Source code in src/ts_shape/features/stats/boolean_stats.py
15
16
17
18
@classmethod
def count_false(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int:
    """Returns the count of False values in the boolean column."""
    return (dataframe[column_name] == False).sum()

count_not_null classmethod ¤

count_not_null(dataframe: DataFrame, column_name: str = 'value_bool') -> int

Returns the count of non-null (True or False) values in the boolean column.

Source code in src/ts_shape/features/stats/boolean_stats.py
25
26
27
28
@classmethod
def count_not_null(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int:
    """Returns the count of non-null (True or False) values in the boolean column."""
    return dataframe[column_name].notna().sum()

count_null classmethod ¤

count_null(dataframe: DataFrame, column_name: str = 'value_bool') -> int

Returns the count of null (NaN) values in the boolean column.

Source code in src/ts_shape/features/stats/boolean_stats.py
20
21
22
23
@classmethod
def count_null(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int:
    """Returns the count of null (NaN) values in the boolean column."""
    return dataframe[column_name].isna().sum()

count_true classmethod ¤

count_true(dataframe: DataFrame, column_name: str = 'value_bool') -> int

Returns the count of True values in the boolean column.

Source code in src/ts_shape/features/stats/boolean_stats.py
10
11
12
13
@classmethod
def count_true(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> int:
    """Returns the count of True values in the boolean column."""
    return dataframe[column_name].sum()

false_percentage classmethod ¤

false_percentage(dataframe: DataFrame, column_name: str = 'value_bool') -> float

Returns the percentage of False values in the boolean column.

Source code in src/ts_shape/features/stats/boolean_stats.py
37
38
39
40
41
42
@classmethod
def false_percentage(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> float:
    """Returns the percentage of False values in the boolean column."""
    false_count = cls.count_false(dataframe, column_name)
    total_count = cls.count_not_null(dataframe, column_name)
    return (false_count / total_count) * 100 if total_count > 0 else 0.0

get_dataframe ¤

get_dataframe() -> DataFrame

Returns the processed DataFrame.

Source code in src/ts_shape/utils/base.py
34
35
36
def get_dataframe(self) -> pd.DataFrame:
    """Returns the processed DataFrame."""
    return self.dataframe

is_balanced classmethod ¤

is_balanced(dataframe: DataFrame, column_name: str) -> bool

Indicates if the distribution is balanced (50% True and False) in the specified boolean column.

Source code in src/ts_shape/features/stats/boolean_stats.py
49
50
51
52
53
@classmethod
def is_balanced(cls, dataframe: pd.DataFrame, column_name: str) -> bool:
    """Indicates if the distribution is balanced (50% True and False) in the specified boolean column."""
    true_percentage = dataframe[column_name].mean()
    return true_percentage == 0.5

mode classmethod ¤

mode(dataframe: DataFrame, column_name: str) -> bool

Returns the mode (most common value) of the specified boolean column.

Source code in src/ts_shape/features/stats/boolean_stats.py
44
45
46
47
@classmethod
def mode(cls, dataframe: pd.DataFrame, column_name: str) -> bool:
    """Returns the mode (most common value) of the specified boolean column."""
    return dataframe[column_name].mode()[0]

summary_as_dataframe classmethod ¤

summary_as_dataframe(dataframe: DataFrame, column_name: str) -> DataFrame

Returns a summary of boolean statistics for the specified column as a DataFrame.

Source code in src/ts_shape/features/stats/boolean_stats.py
67
68
69
70
71
@classmethod
def summary_as_dataframe(cls, dataframe: pd.DataFrame, column_name: str) -> pd.DataFrame:
    """Returns a summary of boolean statistics for the specified column as a DataFrame."""
    summary_data = cls.summary_as_dict(dataframe, column_name)
    return pd.DataFrame([summary_data])

summary_as_dict classmethod ¤

summary_as_dict(dataframe: DataFrame, column_name: str) -> Dict[str, Union[int, float, bool]]

Returns a summary of boolean statistics for the specified column as a dictionary.

Source code in src/ts_shape/features/stats/boolean_stats.py
55
56
57
58
59
60
61
62
63
64
65
@classmethod
def summary_as_dict(cls, dataframe: pd.DataFrame, column_name: str) -> Dict[str, Union[int, float, bool]]:
    """Returns a summary of boolean statistics for the specified column as a dictionary."""
    return {
        'true_count': cls.count_true(dataframe, column_name),
        'false_count': cls.count_false(dataframe, column_name),
        'true_percentage': cls.true_percentage(dataframe, column_name),
        'false_percentage': cls.false_percentage(dataframe, column_name),
        'mode': cls.mode(dataframe, column_name),
        'is_balanced': cls.is_balanced(dataframe, column_name)
    }

true_percentage classmethod ¤

true_percentage(dataframe: DataFrame, column_name: str = 'value_bool') -> float

Returns the percentage of True values in the boolean column.

Source code in src/ts_shape/features/stats/boolean_stats.py
30
31
32
33
34
35
@classmethod
def true_percentage(cls, dataframe: pd.DataFrame, column_name: str = 'value_bool') -> float:
    """Returns the percentage of True values in the boolean column."""
    true_count = cls.count_true(dataframe, column_name)
    total_count = cls.count_not_null(dataframe, column_name)
    return (true_count / total_count) * 100 if total_count > 0 else 0.0