ts_shape.features.stats.numeric_stats ¤

Classes:

NumericStatistics –

Provides class methods to calculate statistics on numeric columns in a pandas DataFrame.

NumericStatistics ¤

NumericStatistics(dataframe: DataFrame, column_name: str = 'systime')

Bases: Base

Provides class methods to calculate statistics on numeric columns in a pandas DataFrame.

Parameters:

dataframe ¤
(DataFrame) –

The DataFrame to be processed.
column_name ¤
(str, default: 'systime' ) –

The column to sort by. Default is 'systime'. If the column is not found or is not a time column, the class will attempt to detect other time columns.

Methods:

coefficient_of_variation –

Calculate the coefficient of variation of the column.
column_iqr –

Calculate the interquartile range of the column.
column_kurtosis –

Calculate the kurtosis of a specified column.
column_mad –

Calculate the mean absolute deviation of the column.
column_max –

Calculate the maximum value of a specified column.
column_mean –

Calculate the mean of a specified column.
column_median –

Calculate the median of a specified column.
column_min –

Calculate the minimum value of a specified column.
column_quantile –

Calculate a specific quantile of the column.
column_range –

Calculate the range of the column.
column_skewness –

Calculate the skewness of a specified column.
column_std –

Calculate the standard deviation of a specified column.
column_sum –

Calculate the sum of a specified column.
column_variance –

Calculate the variance of a specified column.
describe –

Provide a statistical summary for numeric columns in the DataFrame.
get_dataframe –

Returns the processed DataFrame.
standard_error_mean –

Calculate the standard error of the mean for the column.
summary_as_dataframe –

Returns a DataFrame with comprehensive numeric statistics for the specified column.
summary_as_dict –

Returns a dictionary with comprehensive numeric statistics for the specified column.

Source code in src/ts_shape/utils/base.py

def __init__(self, dataframe: pd.DataFrame, column_name: str = 'systime') -> pd.DataFrame:
    """
    Initializes the Base with a DataFrame, detects time columns, converts them to datetime,
    and sorts the DataFrame by the specified column (or the detected time column if applicable).

    Args:
        dataframe (pd.DataFrame): The DataFrame to be processed.
        column_name (str): The column to sort by. Default is 'systime'. If the column is not found or is not a time column, the class will attempt to detect other time columns.
    """
    self.dataframe = dataframe.copy()

    # Attempt to convert the specified column_name to datetime if it exists
    if column_name in self.dataframe.columns:
        self.dataframe[column_name] = pd.to_datetime(self.dataframe[column_name], errors='coerce')
    else:
        # If the column_name is not in the DataFrame, fallback to automatic time detection
        time_columns = [col for col in self.dataframe.columns if 'time' in col.lower() or 'date' in col.lower()]

        # Convert all detected time columns to datetime, if any
        for col in time_columns:
            self.dataframe[col] = pd.to_datetime(self.dataframe[col], errors='coerce')

        # If any time columns are detected, sort by the first one; otherwise, do nothing
        if time_columns:
            column_name = time_columns[0]

    # Sort by the datetime column (either specified or detected)
    if column_name in self.dataframe.columns:
        self.dataframe = self.dataframe.sort_values(by=column_name)

coefficient_of_variation `classmethod` ¤

coefficient_of_variation(dataframe: DataFrame, column_name: str) -> float

Calculate the coefficient of variation of the column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def coefficient_of_variation(cls, dataframe: pd.DataFrame, column_name: str) -> float:
    """Calculate the coefficient of variation of the column."""
    mean = cls.column_mean(dataframe, column_name)
    return cls.column_std(dataframe, column_name) / mean if mean != 0 else None

column_iqr `classmethod` ¤

column_iqr(dataframe: DataFrame, column_name: str) -> float

Calculate the interquartile range of the column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def column_iqr(cls, dataframe: pd.DataFrame, column_name: str) -> float:
    """Calculate the interquartile range of the column."""
    return stats.iqr(dataframe[column_name])

column_kurtosis `classmethod` ¤

column_kurtosis(dataframe: DataFrame, column_name: str) -> float

Calculate the kurtosis of a specified column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def column_kurtosis(cls, dataframe: pd.DataFrame, column_name: str) -> float:
    """Calculate the kurtosis of a specified column."""
    return dataframe[column_name].kurt()

column_mad `classmethod` ¤

column_mad(dataframe: DataFrame, column_name: str) -> float

Calculate the mean absolute deviation of the column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def column_mad(cls, dataframe: pd.DataFrame, column_name: str) -> float:
    """Calculate the mean absolute deviation of the column."""
    return dataframe[column_name].mad()

column_max `classmethod` ¤

column_max(dataframe: DataFrame, column_name: str) -> float

Calculate the maximum value of a specified column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def column_max(cls, dataframe: pd.DataFrame, column_name: str) -> float:
    """Calculate the maximum value of a specified column."""
    return dataframe[column_name].max()

column_mean `classmethod` ¤

column_mean(dataframe: DataFrame, column_name: str) -> float

Calculate the mean of a specified column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def column_mean(cls, dataframe: pd.DataFrame, column_name: str) -> float:
    """Calculate the mean of a specified column."""
    return dataframe[column_name].mean()

column_median `classmethod` ¤

column_median(dataframe: DataFrame, column_name: str) -> float

Calculate the median of a specified column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def column_median(cls, dataframe: pd.DataFrame, column_name: str) -> float:
    """Calculate the median of a specified column."""
    return dataframe[column_name].median()

column_min `classmethod` ¤

column_min(dataframe: DataFrame, column_name: str) -> float

Calculate the minimum value of a specified column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def column_min(cls, dataframe: pd.DataFrame, column_name: str) -> float:
    """Calculate the minimum value of a specified column."""
    return dataframe[column_name].min()

column_quantile `classmethod` ¤

column_quantile(dataframe: DataFrame, column_name: str, quantile: float) -> float

Calculate a specific quantile of the column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def column_quantile(cls, dataframe: pd.DataFrame, column_name: str, quantile: float) -> float:
    """Calculate a specific quantile of the column."""
    return dataframe[column_name].quantile(quantile)

column_range `classmethod` ¤

column_range(dataframe: DataFrame, column_name: str) -> float

Calculate the range of the column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def column_range(cls, dataframe: pd.DataFrame, column_name: str) -> float:
    """Calculate the range of the column."""
    return cls.column_max(dataframe, column_name) - cls.column_min(dataframe, column_name)

column_skewness `classmethod` ¤

column_skewness(dataframe: DataFrame, column_name: str) -> float

Calculate the skewness of a specified column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def column_skewness(cls, dataframe: pd.DataFrame, column_name: str) -> float:
    """Calculate the skewness of a specified column."""
    return dataframe[column_name].skew()

column_std `classmethod` ¤

column_std(dataframe: DataFrame, column_name: str) -> float

Calculate the standard deviation of a specified column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def column_std(cls, dataframe: pd.DataFrame, column_name: str) -> float:
    """Calculate the standard deviation of a specified column."""
    return dataframe[column_name].std()

column_sum `classmethod` ¤

column_sum(dataframe: DataFrame, column_name: str) -> float

Calculate the sum of a specified column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def column_sum(cls, dataframe: pd.DataFrame, column_name: str) -> float:
    """Calculate the sum of a specified column."""
    return dataframe[column_name].sum()

column_variance `classmethod` ¤

column_variance(dataframe: DataFrame, column_name: str) -> float

Calculate the variance of a specified column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def column_variance(cls, dataframe: pd.DataFrame, column_name: str) -> float:
    """Calculate the variance of a specified column."""
    return dataframe[column_name].var()

describe `classmethod` ¤

describe(dataframe: DataFrame) -> DataFrame

Provide a statistical summary for numeric columns in the DataFrame.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def describe(cls, dataframe: pd.DataFrame) -> pd.DataFrame:
    """Provide a statistical summary for numeric columns in the DataFrame."""
    return dataframe.describe()

get_dataframe ¤

get_dataframe() -> DataFrame

Returns the processed DataFrame.

Source code in src/ts_shape/utils/base.py

def get_dataframe(self) -> pd.DataFrame:
    """Returns the processed DataFrame."""
    return self.dataframe

standard_error_mean `classmethod` ¤

standard_error_mean(dataframe: DataFrame, column_name: str) -> float

Calculate the standard error of the mean for the column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def standard_error_mean(cls, dataframe: pd.DataFrame, column_name: str) -> float:
    """Calculate the standard error of the mean for the column."""
    return dataframe[column_name].sem()

summary_as_dataframe `classmethod` ¤

summary_as_dataframe(dataframe: DataFrame, column_name: str) -> DataFrame

Returns a DataFrame with comprehensive numeric statistics for the specified column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def summary_as_dataframe(cls, dataframe: pd.DataFrame, column_name: str) -> pd.DataFrame:
    """Returns a DataFrame with comprehensive numeric statistics for the specified column."""
    summary_data = cls.summary_as_dict(dataframe, column_name)
    return pd.DataFrame([summary_data])

summary_as_dict `classmethod` ¤

summary_as_dict(dataframe: DataFrame, column_name: str) -> Dict[str, Union[float, int]]

Returns a dictionary with comprehensive numeric statistics for the specified column.

Source code in src/ts_shape/features/stats/numeric_stats.py

@classmethod
def summary_as_dict(cls, dataframe: pd.DataFrame, column_name: str) -> Dict[str, Union[float, int]]:
    """Returns a dictionary with comprehensive numeric statistics for the specified column."""
    series = dataframe[column_name]
    return {
        'min': cls.column_min(dataframe, column_name),
        'max': cls.column_max(dataframe, column_name),
        'mean': cls.column_mean(dataframe, column_name),
        'median': cls.column_median(dataframe, column_name),
        'std': cls.column_std(dataframe, column_name),
        'var': cls.column_variance(dataframe, column_name),
        'sum': cls.column_sum(dataframe, column_name),
        'kurtosis': cls.column_kurtosis(dataframe, column_name),
        'skewness': cls.column_skewness(dataframe, column_name),
        'q1': cls.column_quantile(dataframe, column_name, 0.25),
        'q3': cls.column_quantile(dataframe, column_name, 0.75),
        'iqr': cls.column_iqr(dataframe, column_name),
        'range': cls.column_range(dataframe, column_name),
        'mad': cls.column_mad(dataframe, column_name),
        'coeff_var': cls.coefficient_of_variation(dataframe, column_name),
        'sem': cls.standard_error_mean(dataframe, column_name),
        'mode': cls.column_mode(dataframe, column_name),
        'percentile_90': cls.column_quantile(dataframe, column_name, 0.90),
        'percentile_10': cls.column_quantile(dataframe, column_name, 0.10),
    }

ts_shape.features.stats.numeric_stats ¤

NumericStatistics ¤

`dataframe` ¤

`column_name` ¤

coefficient_of_variation `classmethod` ¤

column_iqr `classmethod` ¤

column_kurtosis `classmethod` ¤

column_mad `classmethod` ¤

column_max `classmethod` ¤

column_mean `classmethod` ¤

column_median `classmethod` ¤

column_min `classmethod` ¤

column_quantile `classmethod` ¤

column_range `classmethod` ¤

column_skewness `classmethod` ¤

column_std `classmethod` ¤

column_sum `classmethod` ¤

column_variance `classmethod` ¤

describe `classmethod` ¤

get_dataframe ¤

standard_error_mean `classmethod` ¤

summary_as_dataframe `classmethod` ¤

summary_as_dict `classmethod` ¤

ts_shape.features.stats.numeric_stats ¤

NumericStatistics ¤

dataframe ¤

column_name ¤

coefficient_of_variation classmethod ¤

column_iqr classmethod ¤

column_kurtosis classmethod ¤

column_mad classmethod ¤

column_max classmethod ¤

column_mean classmethod ¤

column_median classmethod ¤

column_min classmethod ¤

column_quantile classmethod ¤

column_range classmethod ¤

column_skewness classmethod ¤

column_std classmethod ¤

column_sum classmethod ¤

column_variance classmethod ¤

describe classmethod ¤

get_dataframe ¤

standard_error_mean classmethod ¤

summary_as_dataframe classmethod ¤

summary_as_dict classmethod ¤

`dataframe` ¤

`column_name` ¤

coefficient_of_variation `classmethod` ¤

column_iqr `classmethod` ¤

column_kurtosis `classmethod` ¤

column_mad `classmethod` ¤

column_max `classmethod` ¤

column_mean `classmethod` ¤

column_median `classmethod` ¤

column_min `classmethod` ¤

column_quantile `classmethod` ¤

column_range `classmethod` ¤

column_skewness `classmethod` ¤

column_std `classmethod` ¤

column_sum `classmethod` ¤

column_variance `classmethod` ¤

describe `classmethod` ¤

standard_error_mean `classmethod` ¤

summary_as_dataframe `classmethod` ¤

summary_as_dict `classmethod` ¤