From dea0f4cdec5bdf24962c8ab3ab2a6473e202259a Mon Sep 17 00:00:00 2001 From: Charles Date: Sat, 25 Jan 2020 13:06:10 +0100 Subject: Custom statistics modulde, describe program --- src/analysis.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'src/analysis.py') diff --git a/src/analysis.py b/src/analysis.py index 64ba100..abc0ffb 100644 --- a/src/analysis.py +++ b/src/analysis.py @@ -1,10 +1,26 @@ -class Analysis(Dataset): - def __init__(self, path): - self.dataset_path = path - super().__init__(path) +import numpy as np +import pandas as pd + +import dslr_stat - def describe(self): - for title in self.df. - pass +class Analysis: + def __init__(self, df): + self.df = df + def describe(self): + desc_df = pd.DataFrame( + dtype=np.float64, + columns=[c for c, t in zip(self.df.columns, self.df.dtypes) if t == np.float64], + index=['Count', 'Mean', 'Std', 'Min', '25%', '50%', '75%', 'Max'] + ) + for col in desc_df.columns: + desc_df.loc['Count', col] = len(self.df[col]) + desc_df.loc['Mean', col] = dslr_stat.mean(self.df[col]) + desc_df.loc['Std', col] = dslr_stat.std(self.df[col]) + desc_df.loc['Min', col] = dslr_stat.min(self.df[col]) + desc_df.loc['25%', col] = dslr_stat.q25(self.df[col]) + desc_df.loc['50%', col] = dslr_stat.median(self.df[col]) + desc_df.loc['75%', col] = dslr_stat.q75(self.df[col]) + desc_df.loc['Max', col] = dslr_stat.max(self.df[col]) + print(desc_df) -- cgit