timeseries_shaper.stats.numeric_stats
1import pandas as pd 2from scipy import stats 3from typing import Dict, Union 4from ..base import Base 5 6class NumericStatistics(Base): 7 """ 8 Provides class methods to calculate statistics on numeric columns in a pandas DataFrame. 9 """ 10 11 @classmethod 12 def column_mean(cls, dataframe: pd.DataFrame, column_name: str) -> float: 13 """Calculate the mean of a specified column.""" 14 return dataframe[column_name].mean() 15 16 @classmethod 17 def column_median(cls, dataframe: pd.DataFrame, column_name: str) -> float: 18 """Calculate the median of a specified column.""" 19 return dataframe[column_name].median() 20 21 @classmethod 22 def column_std(cls, dataframe: pd.DataFrame, column_name: str) -> float: 23 """Calculate the standard deviation of a specified column.""" 24 return dataframe[column_name].std() 25 26 @classmethod 27 def column_variance(cls, dataframe: pd.DataFrame, column_name: str) -> float: 28 """Calculate the variance of a specified column.""" 29 return dataframe[column_name].var() 30 31 @classmethod 32 def column_min(cls, dataframe: pd.DataFrame, column_name: str) -> float: 33 """Calculate the minimum value of a specified column.""" 34 return dataframe[column_name].min() 35 36 @classmethod 37 def column_max(cls, dataframe: pd.DataFrame, column_name: str) -> float: 38 """Calculate the maximum value of a specified column.""" 39 return dataframe[column_name].max() 40 41 @classmethod 42 def column_sum(cls, dataframe: pd.DataFrame, column_name: str) -> float: 43 """Calculate the sum of a specified column.""" 44 return dataframe[column_name].sum() 45 46 @classmethod 47 def column_kurtosis(cls, dataframe: pd.DataFrame, column_name: str) -> float: 48 """Calculate the kurtosis of a specified column.""" 49 return dataframe[column_name].kurt() 50 51 @classmethod 52 def column_skewness(cls, dataframe: pd.DataFrame, column_name: str) -> float: 53 """Calculate the skewness of a specified column.""" 54 return dataframe[column_name].skew() 55 56 @classmethod 57 def column_quantile(cls, dataframe: pd.DataFrame, column_name: str, quantile: float) -> float: 58 """Calculate a specific quantile of the column.""" 59 return dataframe[column_name].quantile(quantile) 60 61 @classmethod 62 def column_iqr(cls, dataframe: pd.DataFrame, column_name: str) -> float: 63 """Calculate the interquartile range of the column.""" 64 return stats.iqr(dataframe[column_name]) 65 66 @classmethod 67 def column_range(cls, dataframe: pd.DataFrame, column_name: str) -> float: 68 """Calculate the range of the column.""" 69 return cls.column_max(dataframe, column_name) - cls.column_min(dataframe, column_name) 70 71 @classmethod 72 def column_mad(cls, dataframe: pd.DataFrame, column_name: str) -> float: 73 """Calculate the mean absolute deviation of the column.""" 74 return dataframe[column_name].mad() 75 76 @classmethod 77 def coefficient_of_variation(cls, dataframe: pd.DataFrame, column_name: str) -> float: 78 """Calculate the coefficient of variation of the column.""" 79 mean = cls.column_mean(dataframe, column_name) 80 return cls.column_std(dataframe, column_name) / mean if mean != 0 else None 81 82 @classmethod 83 def standard_error_mean(cls, dataframe: pd.DataFrame, column_name: str) -> float: 84 """Calculate the standard error of the mean for the column.""" 85 return dataframe[column_name].sem() 86 87 @classmethod 88 def describe(cls, dataframe: pd.DataFrame) -> pd.DataFrame: 89 """Provide a statistical summary for numeric columns in the DataFrame.""" 90 return dataframe.describe() 91 92 @classmethod 93 def summary_as_dict(cls, dataframe: pd.DataFrame, column_name: str) -> Dict[str, Union[float, int]]: 94 """Returns a dictionary with comprehensive numeric statistics for the specified column.""" 95 series = dataframe[column_name] 96 return { 97 'min': cls.column_min(dataframe, column_name), 98 'max': cls.column_max(dataframe, column_name), 99 'mean': cls.column_mean(dataframe, column_name), 100 'median': cls.column_median(dataframe, column_name), 101 'std': cls.column_std(dataframe, column_name), 102 'var': cls.column_variance(dataframe, column_name), 103 'sum': cls.column_sum(dataframe, column_name), 104 'kurtosis': cls.column_kurtosis(dataframe, column_name), 105 'skewness': cls.column_skewness(dataframe, column_name), 106 'q1': cls.column_quantile(dataframe, column_name, 0.25), 107 'q3': cls.column_quantile(dataframe, column_name, 0.75), 108 'iqr': cls.column_iqr(dataframe, column_name), 109 'range': cls.column_range(dataframe, column_name), 110 'mad': cls.column_mad(dataframe, column_name), 111 'coeff_var': cls.coefficient_of_variation(dataframe, column_name), 112 'sem': cls.standard_error_mean(dataframe, column_name), 113 'mode': cls.column_mode(dataframe, column_name), 114 'percentile_90': cls.column_quantile(dataframe, column_name, 0.90), 115 'percentile_10': cls.column_quantile(dataframe, column_name, 0.10), 116 } 117 118 @classmethod 119 def summary_as_dataframe(cls, dataframe: pd.DataFrame, column_name: str) -> pd.DataFrame: 120 """Returns a DataFrame with comprehensive numeric statistics for the specified column.""" 121 summary_data = cls.summary_as_dict(dataframe, column_name) 122 return pd.DataFrame([summary_data])
7class NumericStatistics(Base): 8 """ 9 Provides class methods to calculate statistics on numeric columns in a pandas DataFrame. 10 """ 11 12 @classmethod 13 def column_mean(cls, dataframe: pd.DataFrame, column_name: str) -> float: 14 """Calculate the mean of a specified column.""" 15 return dataframe[column_name].mean() 16 17 @classmethod 18 def column_median(cls, dataframe: pd.DataFrame, column_name: str) -> float: 19 """Calculate the median of a specified column.""" 20 return dataframe[column_name].median() 21 22 @classmethod 23 def column_std(cls, dataframe: pd.DataFrame, column_name: str) -> float: 24 """Calculate the standard deviation of a specified column.""" 25 return dataframe[column_name].std() 26 27 @classmethod 28 def column_variance(cls, dataframe: pd.DataFrame, column_name: str) -> float: 29 """Calculate the variance of a specified column.""" 30 return dataframe[column_name].var() 31 32 @classmethod 33 def column_min(cls, dataframe: pd.DataFrame, column_name: str) -> float: 34 """Calculate the minimum value of a specified column.""" 35 return dataframe[column_name].min() 36 37 @classmethod 38 def column_max(cls, dataframe: pd.DataFrame, column_name: str) -> float: 39 """Calculate the maximum value of a specified column.""" 40 return dataframe[column_name].max() 41 42 @classmethod 43 def column_sum(cls, dataframe: pd.DataFrame, column_name: str) -> float: 44 """Calculate the sum of a specified column.""" 45 return dataframe[column_name].sum() 46 47 @classmethod 48 def column_kurtosis(cls, dataframe: pd.DataFrame, column_name: str) -> float: 49 """Calculate the kurtosis of a specified column.""" 50 return dataframe[column_name].kurt() 51 52 @classmethod 53 def column_skewness(cls, dataframe: pd.DataFrame, column_name: str) -> float: 54 """Calculate the skewness of a specified column.""" 55 return dataframe[column_name].skew() 56 57 @classmethod 58 def column_quantile(cls, dataframe: pd.DataFrame, column_name: str, quantile: float) -> float: 59 """Calculate a specific quantile of the column.""" 60 return dataframe[column_name].quantile(quantile) 61 62 @classmethod 63 def column_iqr(cls, dataframe: pd.DataFrame, column_name: str) -> float: 64 """Calculate the interquartile range of the column.""" 65 return stats.iqr(dataframe[column_name]) 66 67 @classmethod 68 def column_range(cls, dataframe: pd.DataFrame, column_name: str) -> float: 69 """Calculate the range of the column.""" 70 return cls.column_max(dataframe, column_name) - cls.column_min(dataframe, column_name) 71 72 @classmethod 73 def column_mad(cls, dataframe: pd.DataFrame, column_name: str) -> float: 74 """Calculate the mean absolute deviation of the column.""" 75 return dataframe[column_name].mad() 76 77 @classmethod 78 def coefficient_of_variation(cls, dataframe: pd.DataFrame, column_name: str) -> float: 79 """Calculate the coefficient of variation of the column.""" 80 mean = cls.column_mean(dataframe, column_name) 81 return cls.column_std(dataframe, column_name) / mean if mean != 0 else None 82 83 @classmethod 84 def standard_error_mean(cls, dataframe: pd.DataFrame, column_name: str) -> float: 85 """Calculate the standard error of the mean for the column.""" 86 return dataframe[column_name].sem() 87 88 @classmethod 89 def describe(cls, dataframe: pd.DataFrame) -> pd.DataFrame: 90 """Provide a statistical summary for numeric columns in the DataFrame.""" 91 return dataframe.describe() 92 93 @classmethod 94 def summary_as_dict(cls, dataframe: pd.DataFrame, column_name: str) -> Dict[str, Union[float, int]]: 95 """Returns a dictionary with comprehensive numeric statistics for the specified column.""" 96 series = dataframe[column_name] 97 return { 98 'min': cls.column_min(dataframe, column_name), 99 'max': cls.column_max(dataframe, column_name), 100 'mean': cls.column_mean(dataframe, column_name), 101 'median': cls.column_median(dataframe, column_name), 102 'std': cls.column_std(dataframe, column_name), 103 'var': cls.column_variance(dataframe, column_name), 104 'sum': cls.column_sum(dataframe, column_name), 105 'kurtosis': cls.column_kurtosis(dataframe, column_name), 106 'skewness': cls.column_skewness(dataframe, column_name), 107 'q1': cls.column_quantile(dataframe, column_name, 0.25), 108 'q3': cls.column_quantile(dataframe, column_name, 0.75), 109 'iqr': cls.column_iqr(dataframe, column_name), 110 'range': cls.column_range(dataframe, column_name), 111 'mad': cls.column_mad(dataframe, column_name), 112 'coeff_var': cls.coefficient_of_variation(dataframe, column_name), 113 'sem': cls.standard_error_mean(dataframe, column_name), 114 'mode': cls.column_mode(dataframe, column_name), 115 'percentile_90': cls.column_quantile(dataframe, column_name, 0.90), 116 'percentile_10': cls.column_quantile(dataframe, column_name, 0.10), 117 } 118 119 @classmethod 120 def summary_as_dataframe(cls, dataframe: pd.DataFrame, column_name: str) -> pd.DataFrame: 121 """Returns a DataFrame with comprehensive numeric statistics for the specified column.""" 122 summary_data = cls.summary_as_dict(dataframe, column_name) 123 return pd.DataFrame([summary_data])
Provides class methods to calculate statistics on numeric columns in a pandas DataFrame.
12 @classmethod 13 def column_mean(cls, dataframe: pd.DataFrame, column_name: str) -> float: 14 """Calculate the mean of a specified column.""" 15 return dataframe[column_name].mean()
Calculate the mean of a specified column.
17 @classmethod 18 def column_median(cls, dataframe: pd.DataFrame, column_name: str) -> float: 19 """Calculate the median of a specified column.""" 20 return dataframe[column_name].median()
Calculate the median of a specified column.
22 @classmethod 23 def column_std(cls, dataframe: pd.DataFrame, column_name: str) -> float: 24 """Calculate the standard deviation of a specified column.""" 25 return dataframe[column_name].std()
Calculate the standard deviation of a specified column.
27 @classmethod 28 def column_variance(cls, dataframe: pd.DataFrame, column_name: str) -> float: 29 """Calculate the variance of a specified column.""" 30 return dataframe[column_name].var()
Calculate the variance of a specified column.
32 @classmethod 33 def column_min(cls, dataframe: pd.DataFrame, column_name: str) -> float: 34 """Calculate the minimum value of a specified column.""" 35 return dataframe[column_name].min()
Calculate the minimum value of a specified column.
37 @classmethod 38 def column_max(cls, dataframe: pd.DataFrame, column_name: str) -> float: 39 """Calculate the maximum value of a specified column.""" 40 return dataframe[column_name].max()
Calculate the maximum value of a specified column.
42 @classmethod 43 def column_sum(cls, dataframe: pd.DataFrame, column_name: str) -> float: 44 """Calculate the sum of a specified column.""" 45 return dataframe[column_name].sum()
Calculate the sum of a specified column.
47 @classmethod 48 def column_kurtosis(cls, dataframe: pd.DataFrame, column_name: str) -> float: 49 """Calculate the kurtosis of a specified column.""" 50 return dataframe[column_name].kurt()
Calculate the kurtosis of a specified column.
52 @classmethod 53 def column_skewness(cls, dataframe: pd.DataFrame, column_name: str) -> float: 54 """Calculate the skewness of a specified column.""" 55 return dataframe[column_name].skew()
Calculate the skewness of a specified column.
57 @classmethod 58 def column_quantile(cls, dataframe: pd.DataFrame, column_name: str, quantile: float) -> float: 59 """Calculate a specific quantile of the column.""" 60 return dataframe[column_name].quantile(quantile)
Calculate a specific quantile of the column.
62 @classmethod 63 def column_iqr(cls, dataframe: pd.DataFrame, column_name: str) -> float: 64 """Calculate the interquartile range of the column.""" 65 return stats.iqr(dataframe[column_name])
Calculate the interquartile range of the column.
67 @classmethod 68 def column_range(cls, dataframe: pd.DataFrame, column_name: str) -> float: 69 """Calculate the range of the column.""" 70 return cls.column_max(dataframe, column_name) - cls.column_min(dataframe, column_name)
Calculate the range of the column.
72 @classmethod 73 def column_mad(cls, dataframe: pd.DataFrame, column_name: str) -> float: 74 """Calculate the mean absolute deviation of the column.""" 75 return dataframe[column_name].mad()
Calculate the mean absolute deviation of the column.
77 @classmethod 78 def coefficient_of_variation(cls, dataframe: pd.DataFrame, column_name: str) -> float: 79 """Calculate the coefficient of variation of the column.""" 80 mean = cls.column_mean(dataframe, column_name) 81 return cls.column_std(dataframe, column_name) / mean if mean != 0 else None
Calculate the coefficient of variation of the column.
83 @classmethod 84 def standard_error_mean(cls, dataframe: pd.DataFrame, column_name: str) -> float: 85 """Calculate the standard error of the mean for the column.""" 86 return dataframe[column_name].sem()
Calculate the standard error of the mean for the column.
88 @classmethod 89 def describe(cls, dataframe: pd.DataFrame) -> pd.DataFrame: 90 """Provide a statistical summary for numeric columns in the DataFrame.""" 91 return dataframe.describe()
Provide a statistical summary for numeric columns in the DataFrame.
93 @classmethod 94 def summary_as_dict(cls, dataframe: pd.DataFrame, column_name: str) -> Dict[str, Union[float, int]]: 95 """Returns a dictionary with comprehensive numeric statistics for the specified column.""" 96 series = dataframe[column_name] 97 return { 98 'min': cls.column_min(dataframe, column_name), 99 'max': cls.column_max(dataframe, column_name), 100 'mean': cls.column_mean(dataframe, column_name), 101 'median': cls.column_median(dataframe, column_name), 102 'std': cls.column_std(dataframe, column_name), 103 'var': cls.column_variance(dataframe, column_name), 104 'sum': cls.column_sum(dataframe, column_name), 105 'kurtosis': cls.column_kurtosis(dataframe, column_name), 106 'skewness': cls.column_skewness(dataframe, column_name), 107 'q1': cls.column_quantile(dataframe, column_name, 0.25), 108 'q3': cls.column_quantile(dataframe, column_name, 0.75), 109 'iqr': cls.column_iqr(dataframe, column_name), 110 'range': cls.column_range(dataframe, column_name), 111 'mad': cls.column_mad(dataframe, column_name), 112 'coeff_var': cls.coefficient_of_variation(dataframe, column_name), 113 'sem': cls.standard_error_mean(dataframe, column_name), 114 'mode': cls.column_mode(dataframe, column_name), 115 'percentile_90': cls.column_quantile(dataframe, column_name, 0.90), 116 'percentile_10': cls.column_quantile(dataframe, column_name, 0.10), 117 }
Returns a dictionary with comprehensive numeric statistics for the specified column.
119 @classmethod 120 def summary_as_dataframe(cls, dataframe: pd.DataFrame, column_name: str) -> pd.DataFrame: 121 """Returns a DataFrame with comprehensive numeric statistics for the specified column.""" 122 summary_data = cls.summary_as_dict(dataframe, column_name) 123 return pd.DataFrame([summary_data])
Returns a DataFrame with comprehensive numeric statistics for the specified column.
Inherited Members
- timeseries_shaper.base.Base
- Base
- dataframe
- get_dataframe