timeseries_shaper.stats.numeric_stats

  1import pandas as pd
  2from scipy import stats
  3from typing import Dict, Union
  4from ..base import Base
  5
  6class NumericStatistics(Base):
  7    """
  8    Provides class methods to calculate statistics on numeric columns in a pandas DataFrame.
  9    """
 10
 11    @classmethod
 12    def column_mean(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 13        """Calculate the mean of a specified column."""
 14        return dataframe[column_name].mean()
 15
 16    @classmethod
 17    def column_median(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 18        """Calculate the median of a specified column."""
 19        return dataframe[column_name].median()
 20
 21    @classmethod
 22    def column_std(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 23        """Calculate the standard deviation of a specified column."""
 24        return dataframe[column_name].std()
 25
 26    @classmethod
 27    def column_variance(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 28        """Calculate the variance of a specified column."""
 29        return dataframe[column_name].var()
 30
 31    @classmethod
 32    def column_min(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 33        """Calculate the minimum value of a specified column."""
 34        return dataframe[column_name].min()
 35
 36    @classmethod
 37    def column_max(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 38        """Calculate the maximum value of a specified column."""
 39        return dataframe[column_name].max()
 40
 41    @classmethod
 42    def column_sum(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 43        """Calculate the sum of a specified column."""
 44        return dataframe[column_name].sum()
 45
 46    @classmethod
 47    def column_kurtosis(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 48        """Calculate the kurtosis of a specified column."""
 49        return dataframe[column_name].kurt()
 50
 51    @classmethod
 52    def column_skewness(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 53        """Calculate the skewness of a specified column."""
 54        return dataframe[column_name].skew()
 55
 56    @classmethod
 57    def column_quantile(cls, dataframe: pd.DataFrame, column_name: str, quantile: float) -> float:
 58        """Calculate a specific quantile of the column."""
 59        return dataframe[column_name].quantile(quantile)
 60
 61    @classmethod
 62    def column_iqr(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 63        """Calculate the interquartile range of the column."""
 64        return stats.iqr(dataframe[column_name])
 65
 66    @classmethod
 67    def column_range(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 68        """Calculate the range of the column."""
 69        return cls.column_max(dataframe, column_name) - cls.column_min(dataframe, column_name)
 70
 71    @classmethod
 72    def column_mad(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 73        """Calculate the mean absolute deviation of the column."""
 74        return dataframe[column_name].mad()
 75
 76    @classmethod
 77    def coefficient_of_variation(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 78        """Calculate the coefficient of variation of the column."""
 79        mean = cls.column_mean(dataframe, column_name)
 80        return cls.column_std(dataframe, column_name) / mean if mean != 0 else None
 81
 82    @classmethod
 83    def standard_error_mean(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 84        """Calculate the standard error of the mean for the column."""
 85        return dataframe[column_name].sem()
 86
 87    @classmethod
 88    def describe(cls, dataframe: pd.DataFrame) -> pd.DataFrame:
 89        """Provide a statistical summary for numeric columns in the DataFrame."""
 90        return dataframe.describe()
 91    
 92    @classmethod
 93    def summary_as_dict(cls, dataframe: pd.DataFrame, column_name: str) -> Dict[str, Union[float, int]]:
 94        """Returns a dictionary with comprehensive numeric statistics for the specified column."""
 95        series = dataframe[column_name]
 96        return {
 97            'min': cls.column_min(dataframe, column_name),
 98            'max': cls.column_max(dataframe, column_name),
 99            'mean': cls.column_mean(dataframe, column_name),
100            'median': cls.column_median(dataframe, column_name),
101            'std': cls.column_std(dataframe, column_name),
102            'var': cls.column_variance(dataframe, column_name),
103            'sum': cls.column_sum(dataframe, column_name),
104            'kurtosis': cls.column_kurtosis(dataframe, column_name),
105            'skewness': cls.column_skewness(dataframe, column_name),
106            'q1': cls.column_quantile(dataframe, column_name, 0.25),
107            'q3': cls.column_quantile(dataframe, column_name, 0.75),
108            'iqr': cls.column_iqr(dataframe, column_name),
109            'range': cls.column_range(dataframe, column_name),
110            'mad': cls.column_mad(dataframe, column_name),
111            'coeff_var': cls.coefficient_of_variation(dataframe, column_name),
112            'sem': cls.standard_error_mean(dataframe, column_name),
113            'mode': cls.column_mode(dataframe, column_name),
114            'percentile_90': cls.column_quantile(dataframe, column_name, 0.90),
115            'percentile_10': cls.column_quantile(dataframe, column_name, 0.10),
116        }
117    
118    @classmethod
119    def summary_as_dataframe(cls, dataframe: pd.DataFrame, column_name: str) -> pd.DataFrame:
120        """Returns a DataFrame with comprehensive numeric statistics for the specified column."""
121        summary_data = cls.summary_as_dict(dataframe, column_name)
122        return pd.DataFrame([summary_data])
class NumericStatistics(timeseries_shaper.base.Base):
  7class NumericStatistics(Base):
  8    """
  9    Provides class methods to calculate statistics on numeric columns in a pandas DataFrame.
 10    """
 11
 12    @classmethod
 13    def column_mean(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 14        """Calculate the mean of a specified column."""
 15        return dataframe[column_name].mean()
 16
 17    @classmethod
 18    def column_median(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 19        """Calculate the median of a specified column."""
 20        return dataframe[column_name].median()
 21
 22    @classmethod
 23    def column_std(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 24        """Calculate the standard deviation of a specified column."""
 25        return dataframe[column_name].std()
 26
 27    @classmethod
 28    def column_variance(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 29        """Calculate the variance of a specified column."""
 30        return dataframe[column_name].var()
 31
 32    @classmethod
 33    def column_min(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 34        """Calculate the minimum value of a specified column."""
 35        return dataframe[column_name].min()
 36
 37    @classmethod
 38    def column_max(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 39        """Calculate the maximum value of a specified column."""
 40        return dataframe[column_name].max()
 41
 42    @classmethod
 43    def column_sum(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 44        """Calculate the sum of a specified column."""
 45        return dataframe[column_name].sum()
 46
 47    @classmethod
 48    def column_kurtosis(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 49        """Calculate the kurtosis of a specified column."""
 50        return dataframe[column_name].kurt()
 51
 52    @classmethod
 53    def column_skewness(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 54        """Calculate the skewness of a specified column."""
 55        return dataframe[column_name].skew()
 56
 57    @classmethod
 58    def column_quantile(cls, dataframe: pd.DataFrame, column_name: str, quantile: float) -> float:
 59        """Calculate a specific quantile of the column."""
 60        return dataframe[column_name].quantile(quantile)
 61
 62    @classmethod
 63    def column_iqr(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 64        """Calculate the interquartile range of the column."""
 65        return stats.iqr(dataframe[column_name])
 66
 67    @classmethod
 68    def column_range(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 69        """Calculate the range of the column."""
 70        return cls.column_max(dataframe, column_name) - cls.column_min(dataframe, column_name)
 71
 72    @classmethod
 73    def column_mad(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 74        """Calculate the mean absolute deviation of the column."""
 75        return dataframe[column_name].mad()
 76
 77    @classmethod
 78    def coefficient_of_variation(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 79        """Calculate the coefficient of variation of the column."""
 80        mean = cls.column_mean(dataframe, column_name)
 81        return cls.column_std(dataframe, column_name) / mean if mean != 0 else None
 82
 83    @classmethod
 84    def standard_error_mean(cls, dataframe: pd.DataFrame, column_name: str) -> float:
 85        """Calculate the standard error of the mean for the column."""
 86        return dataframe[column_name].sem()
 87
 88    @classmethod
 89    def describe(cls, dataframe: pd.DataFrame) -> pd.DataFrame:
 90        """Provide a statistical summary for numeric columns in the DataFrame."""
 91        return dataframe.describe()
 92    
 93    @classmethod
 94    def summary_as_dict(cls, dataframe: pd.DataFrame, column_name: str) -> Dict[str, Union[float, int]]:
 95        """Returns a dictionary with comprehensive numeric statistics for the specified column."""
 96        series = dataframe[column_name]
 97        return {
 98            'min': cls.column_min(dataframe, column_name),
 99            'max': cls.column_max(dataframe, column_name),
100            'mean': cls.column_mean(dataframe, column_name),
101            'median': cls.column_median(dataframe, column_name),
102            'std': cls.column_std(dataframe, column_name),
103            'var': cls.column_variance(dataframe, column_name),
104            'sum': cls.column_sum(dataframe, column_name),
105            'kurtosis': cls.column_kurtosis(dataframe, column_name),
106            'skewness': cls.column_skewness(dataframe, column_name),
107            'q1': cls.column_quantile(dataframe, column_name, 0.25),
108            'q3': cls.column_quantile(dataframe, column_name, 0.75),
109            'iqr': cls.column_iqr(dataframe, column_name),
110            'range': cls.column_range(dataframe, column_name),
111            'mad': cls.column_mad(dataframe, column_name),
112            'coeff_var': cls.coefficient_of_variation(dataframe, column_name),
113            'sem': cls.standard_error_mean(dataframe, column_name),
114            'mode': cls.column_mode(dataframe, column_name),
115            'percentile_90': cls.column_quantile(dataframe, column_name, 0.90),
116            'percentile_10': cls.column_quantile(dataframe, column_name, 0.10),
117        }
118    
119    @classmethod
120    def summary_as_dataframe(cls, dataframe: pd.DataFrame, column_name: str) -> pd.DataFrame:
121        """Returns a DataFrame with comprehensive numeric statistics for the specified column."""
122        summary_data = cls.summary_as_dict(dataframe, column_name)
123        return pd.DataFrame([summary_data])

Provides class methods to calculate statistics on numeric columns in a pandas DataFrame.

@classmethod
def column_mean(cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> float:
12    @classmethod
13    def column_mean(cls, dataframe: pd.DataFrame, column_name: str) -> float:
14        """Calculate the mean of a specified column."""
15        return dataframe[column_name].mean()

Calculate the mean of a specified column.

@classmethod
def column_median(cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> float:
17    @classmethod
18    def column_median(cls, dataframe: pd.DataFrame, column_name: str) -> float:
19        """Calculate the median of a specified column."""
20        return dataframe[column_name].median()

Calculate the median of a specified column.

@classmethod
def column_std(cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> float:
22    @classmethod
23    def column_std(cls, dataframe: pd.DataFrame, column_name: str) -> float:
24        """Calculate the standard deviation of a specified column."""
25        return dataframe[column_name].std()

Calculate the standard deviation of a specified column.

@classmethod
def column_variance(cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> float:
27    @classmethod
28    def column_variance(cls, dataframe: pd.DataFrame, column_name: str) -> float:
29        """Calculate the variance of a specified column."""
30        return dataframe[column_name].var()

Calculate the variance of a specified column.

@classmethod
def column_min(cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> float:
32    @classmethod
33    def column_min(cls, dataframe: pd.DataFrame, column_name: str) -> float:
34        """Calculate the minimum value of a specified column."""
35        return dataframe[column_name].min()

Calculate the minimum value of a specified column.

@classmethod
def column_max(cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> float:
37    @classmethod
38    def column_max(cls, dataframe: pd.DataFrame, column_name: str) -> float:
39        """Calculate the maximum value of a specified column."""
40        return dataframe[column_name].max()

Calculate the maximum value of a specified column.

@classmethod
def column_sum(cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> float:
42    @classmethod
43    def column_sum(cls, dataframe: pd.DataFrame, column_name: str) -> float:
44        """Calculate the sum of a specified column."""
45        return dataframe[column_name].sum()

Calculate the sum of a specified column.

@classmethod
def column_kurtosis(cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> float:
47    @classmethod
48    def column_kurtosis(cls, dataframe: pd.DataFrame, column_name: str) -> float:
49        """Calculate the kurtosis of a specified column."""
50        return dataframe[column_name].kurt()

Calculate the kurtosis of a specified column.

@classmethod
def column_skewness(cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> float:
52    @classmethod
53    def column_skewness(cls, dataframe: pd.DataFrame, column_name: str) -> float:
54        """Calculate the skewness of a specified column."""
55        return dataframe[column_name].skew()

Calculate the skewness of a specified column.

@classmethod
def column_quantile( cls, dataframe: pandas.core.frame.DataFrame, column_name: str, quantile: float) -> float:
57    @classmethod
58    def column_quantile(cls, dataframe: pd.DataFrame, column_name: str, quantile: float) -> float:
59        """Calculate a specific quantile of the column."""
60        return dataframe[column_name].quantile(quantile)

Calculate a specific quantile of the column.

@classmethod
def column_iqr(cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> float:
62    @classmethod
63    def column_iqr(cls, dataframe: pd.DataFrame, column_name: str) -> float:
64        """Calculate the interquartile range of the column."""
65        return stats.iqr(dataframe[column_name])

Calculate the interquartile range of the column.

@classmethod
def column_range(cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> float:
67    @classmethod
68    def column_range(cls, dataframe: pd.DataFrame, column_name: str) -> float:
69        """Calculate the range of the column."""
70        return cls.column_max(dataframe, column_name) - cls.column_min(dataframe, column_name)

Calculate the range of the column.

@classmethod
def column_mad(cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> float:
72    @classmethod
73    def column_mad(cls, dataframe: pd.DataFrame, column_name: str) -> float:
74        """Calculate the mean absolute deviation of the column."""
75        return dataframe[column_name].mad()

Calculate the mean absolute deviation of the column.

@classmethod
def coefficient_of_variation(cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> float:
77    @classmethod
78    def coefficient_of_variation(cls, dataframe: pd.DataFrame, column_name: str) -> float:
79        """Calculate the coefficient of variation of the column."""
80        mean = cls.column_mean(dataframe, column_name)
81        return cls.column_std(dataframe, column_name) / mean if mean != 0 else None

Calculate the coefficient of variation of the column.

@classmethod
def standard_error_mean(cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> float:
83    @classmethod
84    def standard_error_mean(cls, dataframe: pd.DataFrame, column_name: str) -> float:
85        """Calculate the standard error of the mean for the column."""
86        return dataframe[column_name].sem()

Calculate the standard error of the mean for the column.

@classmethod
def describe( cls, dataframe: pandas.core.frame.DataFrame) -> pandas.core.frame.DataFrame:
88    @classmethod
89    def describe(cls, dataframe: pd.DataFrame) -> pd.DataFrame:
90        """Provide a statistical summary for numeric columns in the DataFrame."""
91        return dataframe.describe()

Provide a statistical summary for numeric columns in the DataFrame.

@classmethod
def summary_as_dict( cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> Dict[str, Union[float, int]]:
 93    @classmethod
 94    def summary_as_dict(cls, dataframe: pd.DataFrame, column_name: str) -> Dict[str, Union[float, int]]:
 95        """Returns a dictionary with comprehensive numeric statistics for the specified column."""
 96        series = dataframe[column_name]
 97        return {
 98            'min': cls.column_min(dataframe, column_name),
 99            'max': cls.column_max(dataframe, column_name),
100            'mean': cls.column_mean(dataframe, column_name),
101            'median': cls.column_median(dataframe, column_name),
102            'std': cls.column_std(dataframe, column_name),
103            'var': cls.column_variance(dataframe, column_name),
104            'sum': cls.column_sum(dataframe, column_name),
105            'kurtosis': cls.column_kurtosis(dataframe, column_name),
106            'skewness': cls.column_skewness(dataframe, column_name),
107            'q1': cls.column_quantile(dataframe, column_name, 0.25),
108            'q3': cls.column_quantile(dataframe, column_name, 0.75),
109            'iqr': cls.column_iqr(dataframe, column_name),
110            'range': cls.column_range(dataframe, column_name),
111            'mad': cls.column_mad(dataframe, column_name),
112            'coeff_var': cls.coefficient_of_variation(dataframe, column_name),
113            'sem': cls.standard_error_mean(dataframe, column_name),
114            'mode': cls.column_mode(dataframe, column_name),
115            'percentile_90': cls.column_quantile(dataframe, column_name, 0.90),
116            'percentile_10': cls.column_quantile(dataframe, column_name, 0.10),
117        }

Returns a dictionary with comprehensive numeric statistics for the specified column.

@classmethod
def summary_as_dataframe( cls, dataframe: pandas.core.frame.DataFrame, column_name: str) -> pandas.core.frame.DataFrame:
119    @classmethod
120    def summary_as_dataframe(cls, dataframe: pd.DataFrame, column_name: str) -> pd.DataFrame:
121        """Returns a DataFrame with comprehensive numeric statistics for the specified column."""
122        summary_data = cls.summary_as_dict(dataframe, column_name)
123        return pd.DataFrame([summary_data])

Returns a DataFrame with comprehensive numeric statistics for the specified column.

Inherited Members
timeseries_shaper.base.Base
Base
dataframe
get_dataframe