ts_shape.features.time_stats.time_stats_numeric ¤

Classes:

TimeGroupedStatistics –

A class for calculating time-grouped statistics on numeric data, with class methods to apply various statistical functions.

TimeGroupedStatistics ¤

TimeGroupedStatistics(dataframe: DataFrame, column_name: str = 'systime')

Bases: Base

A class for calculating time-grouped statistics on numeric data, with class methods to apply various statistical functions.

Parameters:

dataframe ¤
(DataFrame) –

The DataFrame to be processed.
column_name ¤
(str, default: 'systime' ) –

The column to sort by. Default is 'systime'. If the column is not found or is not a time column, the class will attempt to detect other time columns.

Methods:

calculate_custom_func –

Apply a custom aggregation function on the value column over the grouped time intervals.
calculate_statistic –

Calculate a specified statistic on the value column over the grouped time intervals.
calculate_statistics –

Calculate multiple specified statistics on the value column over the grouped time intervals.
get_dataframe –

Returns the processed DataFrame.

Source code in src/ts_shape/utils/base.py

def __init__(self, dataframe: pd.DataFrame, column_name: str = 'systime') -> pd.DataFrame:
    """
    Initializes the Base with a DataFrame, detects time columns, converts them to datetime,
    and sorts the DataFrame by the specified column (or the detected time column if applicable).

    Args:
        dataframe (pd.DataFrame): The DataFrame to be processed.
        column_name (str): The column to sort by. Default is 'systime'. If the column is not found or is not a time column, the class will attempt to detect other time columns.
    """
    self.dataframe = dataframe.copy()

    # Attempt to convert the specified column_name to datetime if it exists
    if column_name in self.dataframe.columns:
        self.dataframe[column_name] = pd.to_datetime(self.dataframe[column_name], errors='coerce')
    else:
        # If the column_name is not in the DataFrame, fallback to automatic time detection
        time_columns = [col for col in self.dataframe.columns if 'time' in col.lower() or 'date' in col.lower()]

        # Convert all detected time columns to datetime, if any
        for col in time_columns:
            self.dataframe[col] = pd.to_datetime(self.dataframe[col], errors='coerce')

        # If any time columns are detected, sort by the first one; otherwise, do nothing
        if time_columns:
            column_name = time_columns[0]

    # Sort by the datetime column (either specified or detected)
    if column_name in self.dataframe.columns:
        self.dataframe = self.dataframe.sort_values(by=column_name)

calculate_custom_func `classmethod` ¤

calculate_custom_func(dataframe: DataFrame, time_column: str, value_column: str, freq: str, func) -> DataFrame

Apply a custom aggregation function on the value column over the grouped time intervals.

Parameters:

dataframe ¤
(DataFrame) –

The DataFrame containing the data.
time_column ¤
(str) –

The name of the time column to group and sort by.
value_column ¤
(str) –

The name of the numeric column to calculate statistics on.
freq ¤
(str) –

Frequency string for time grouping (e.g., 'H' for hourly, 'D' for daily).
func ¤
(callable) –

Custom function to apply to each group.

Returns:

DataFrame –

pd.DataFrame: A DataFrame with the custom calculated statistics.

Source code in src/ts_shape/features/time_stats/time_stats_numeric.py

@classmethod
def calculate_custom_func(cls, dataframe: pd.DataFrame, time_column: str, value_column: str, freq: str, func) -> pd.DataFrame:
    """
    Apply a custom aggregation function on the value column over the grouped time intervals.

    Args:
        dataframe (pd.DataFrame): The DataFrame containing the data.
        time_column (str): The name of the time column to group and sort by.
        value_column (str): The name of the numeric column to calculate statistics on.
        freq (str): Frequency string for time grouping (e.g., 'H' for hourly, 'D' for daily).
        func (callable): Custom function to apply to each group.

    Returns:
        pd.DataFrame: A DataFrame with the custom calculated statistics.
    """
    grouped_df = dataframe.set_index(time_column).resample(freq)
    result = grouped_df[value_column].apply(func).to_frame('custom')
    return result

calculate_statistic `classmethod` ¤

calculate_statistic(dataframe: DataFrame, time_column: str, value_column: str, freq: str, stat_method: str) -> DataFrame

Calculate a specified statistic on the value column over the grouped time intervals.

Parameters:

dataframe ¤
(DataFrame) –

The DataFrame containing the data.
time_column ¤
(str) –

The name of the time column to group and sort by.
value_column ¤
(str) –

The name of the numeric column to calculate statistics on.
freq ¤
(str) –

Frequency string for time grouping (e.g., 'H' for hourly, 'D' for daily).
stat_method ¤
(str) –

The statistical method to apply ('mean', 'sum', 'min', 'max', 'diff', 'range').

Returns:

DataFrame –

pd.DataFrame: A DataFrame with the time intervals and the calculated statistics.

Source code in src/ts_shape/features/time_stats/time_stats_numeric.py

@classmethod
def calculate_statistic(cls, dataframe: pd.DataFrame, time_column: str, value_column: str, freq: str, stat_method: str) -> pd.DataFrame:
    """
    Calculate a specified statistic on the value column over the grouped time intervals.

    Args:
        dataframe (pd.DataFrame): The DataFrame containing the data.
        time_column (str): The name of the time column to group and sort by.
        value_column (str): The name of the numeric column to calculate statistics on.
        freq (str): Frequency string for time grouping (e.g., 'H' for hourly, 'D' for daily).
        stat_method (str): The statistical method to apply ('mean', 'sum', 'min', 'max', 'diff', 'range').

    Returns:
        pd.DataFrame: A DataFrame with the time intervals and the calculated statistics.
    """
    # Set the DataFrame index to the time column and resample to the specified frequency
    grouped_df = dataframe.set_index(time_column).resample(freq)

    # Select the calculation method
    if stat_method == 'mean':
        result = grouped_df[value_column].mean().to_frame('mean')
    elif stat_method == 'sum':
        result = grouped_df[value_column].sum().to_frame('sum')
    elif stat_method == 'min':
        result = grouped_df[value_column].min().to_frame('min')
    elif stat_method == 'max':
        result = grouped_df[value_column].max().to_frame('max')
    elif stat_method == 'diff':
        # Improved diff: last value - first value within each interval
        result = (grouped_df[value_column].last() - grouped_df[value_column].first()).to_frame('difference')
    elif stat_method == 'range':
        # Range: max value - min value within each interval
        result = (grouped_df[value_column].max() - grouped_df[value_column].min()).to_frame('range')
    else:
        raise ValueError("Invalid stat_method. Choose from 'mean', 'sum', 'min', 'max', 'diff', 'range'.")

    return result

calculate_statistics `classmethod` ¤

calculate_statistics(dataframe: DataFrame, time_column: str, value_column: str, freq: str, stat_methods: list) -> DataFrame

Calculate multiple specified statistics on the value column over the grouped time intervals.

Parameters:

dataframe ¤
(DataFrame) –

The DataFrame containing the data.
time_column ¤
(str) –

The name of the time column to group and sort by.
value_column ¤
(str) –

The name of the numeric column to calculate statistics on.
freq ¤
(str) –

Frequency string for time grouping (e.g., 'H' for hourly, 'D' for daily).
stat_methods ¤
(list) –

A list of statistical methods to apply (e.g., ['mean', 'sum', 'diff', 'range']).

Returns:

DataFrame –

pd.DataFrame: A DataFrame with the time intervals and the calculated statistics for each method.

Source code in src/ts_shape/features/time_stats/time_stats_numeric.py

@classmethod
def calculate_statistics(cls, dataframe: pd.DataFrame, time_column: str, value_column: str, freq: str, stat_methods: list) -> pd.DataFrame:
    """
    Calculate multiple specified statistics on the value column over the grouped time intervals.

    Args:
        dataframe (pd.DataFrame): The DataFrame containing the data.
        time_column (str): The name of the time column to group and sort by.
        value_column (str): The name of the numeric column to calculate statistics on.
        freq (str): Frequency string for time grouping (e.g., 'H' for hourly, 'D' for daily).
        stat_methods (list): A list of statistical methods to apply (e.g., ['mean', 'sum', 'diff', 'range']).

    Returns:
        pd.DataFrame: A DataFrame with the time intervals and the calculated statistics for each method.
    """
    # Initialize an empty DataFrame for combining results
    result_df = pd.DataFrame()

    # Calculate each requested statistic and join to the result DataFrame
    for method in stat_methods:
        stat_df = cls.calculate_statistic(dataframe, time_column, value_column, freq, method)
        result_df = result_df.join(stat_df, how='outer')

    return result_df

get_dataframe ¤

get_dataframe() -> DataFrame

Returns the processed DataFrame.

Source code in src/ts_shape/utils/base.py

def get_dataframe(self) -> pd.DataFrame:
    """Returns the processed DataFrame."""
    return self.dataframe

ts_shape.features.time_stats.time_stats_numeric ¤

TimeGroupedStatistics ¤

`dataframe` ¤

`column_name` ¤

calculate_custom_func `classmethod` ¤

`dataframe` ¤

`time_column` ¤

`value_column` ¤

`freq` ¤

`func` ¤

calculate_statistic `classmethod` ¤

`dataframe` ¤

`time_column` ¤

`value_column` ¤

`freq` ¤

`stat_method` ¤

calculate_statistics `classmethod` ¤

`dataframe` ¤

`time_column` ¤

`value_column` ¤

`freq` ¤

`stat_methods` ¤

get_dataframe ¤

ts_shape.features.time_stats.time_stats_numeric ¤

TimeGroupedStatistics ¤

dataframe ¤

column_name ¤

calculate_custom_func classmethod ¤

dataframe ¤

time_column ¤

value_column ¤

freq ¤

func ¤

calculate_statistic classmethod ¤

dataframe ¤

time_column ¤

value_column ¤

freq ¤

stat_method ¤

calculate_statistics classmethod ¤

dataframe ¤

time_column ¤

value_column ¤

freq ¤

stat_methods ¤

get_dataframe ¤

`dataframe` ¤

`column_name` ¤

calculate_custom_func `classmethod` ¤

`dataframe` ¤

`time_column` ¤

`value_column` ¤

`freq` ¤

`func` ¤

calculate_statistic `classmethod` ¤

`dataframe` ¤

`time_column` ¤

`value_column` ¤

`freq` ¤

`stat_method` ¤

calculate_statistics `classmethod` ¤

`dataframe` ¤

`time_column` ¤

`value_column` ¤

`freq` ¤

`stat_methods` ¤