diff options
| author | Charles <sircharlesaze@gmail.com> | 2020-01-25 13:06:10 +0100 |
|---|---|---|
| committer | Charles <sircharlesaze@gmail.com> | 2020-01-25 13:06:10 +0100 |
| commit | dea0f4cdec5bdf24962c8ab3ab2a6473e202259a (patch) | |
| tree | a2a703a50b2d744e103a657d50ea793743ce1ff5 /src/analysis.py | |
| parent | d5e51613d3582e18e858055cf4874507a0df452f (diff) | |
| download | dslr-dea0f4cdec5bdf24962c8ab3ab2a6473e202259a.tar.gz dslr-dea0f4cdec5bdf24962c8ab3ab2a6473e202259a.tar.bz2 dslr-dea0f4cdec5bdf24962c8ab3ab2a6473e202259a.zip | |
Custom statistics modulde, describe program
Diffstat (limited to 'src/analysis.py')
| -rw-r--r-- | src/analysis.py | 30 |
1 files changed, 23 insertions, 7 deletions
diff --git a/src/analysis.py b/src/analysis.py index 64ba100..abc0ffb 100644 --- a/src/analysis.py +++ b/src/analysis.py @@ -1,10 +1,26 @@ -class Analysis(Dataset): - def __init__(self, path): - self.dataset_path = path - super().__init__(path) +import numpy as np +import pandas as pd + +import dslr_stat - def describe(self): - for title in self.df. - pass +class Analysis: + def __init__(self, df): + self.df = df + def describe(self): + desc_df = pd.DataFrame( + dtype=np.float64, + columns=[c for c, t in zip(self.df.columns, self.df.dtypes) if t == np.float64], + index=['Count', 'Mean', 'Std', 'Min', '25%', '50%', '75%', 'Max'] + ) + for col in desc_df.columns: + desc_df.loc['Count', col] = len(self.df[col]) + desc_df.loc['Mean', col] = dslr_stat.mean(self.df[col]) + desc_df.loc['Std', col] = dslr_stat.std(self.df[col]) + desc_df.loc['Min', col] = dslr_stat.min(self.df[col]) + desc_df.loc['25%', col] = dslr_stat.q25(self.df[col]) + desc_df.loc['50%', col] = dslr_stat.median(self.df[col]) + desc_df.loc['75%', col] = dslr_stat.q75(self.df[col]) + desc_df.loc['Max', col] = dslr_stat.max(self.df[col]) + print(desc_df) |
