aboutsummaryrefslogtreecommitdiff
path: root/src/analysis.py
diff options
context:
space:
mode:
authorCharles <sircharlesaze@gmail.com>2020-01-25 13:06:10 +0100
committerCharles <sircharlesaze@gmail.com>2020-01-25 13:06:10 +0100
commitdea0f4cdec5bdf24962c8ab3ab2a6473e202259a (patch)
treea2a703a50b2d744e103a657d50ea793743ce1ff5 /src/analysis.py
parentd5e51613d3582e18e858055cf4874507a0df452f (diff)
downloaddslr-dea0f4cdec5bdf24962c8ab3ab2a6473e202259a.tar.gz
dslr-dea0f4cdec5bdf24962c8ab3ab2a6473e202259a.tar.bz2
dslr-dea0f4cdec5bdf24962c8ab3ab2a6473e202259a.zip
Custom statistics modulde, describe program
Diffstat (limited to 'src/analysis.py')
-rw-r--r--src/analysis.py30
1 files changed, 23 insertions, 7 deletions
diff --git a/src/analysis.py b/src/analysis.py
index 64ba100..abc0ffb 100644
--- a/src/analysis.py
+++ b/src/analysis.py
@@ -1,10 +1,26 @@
-class Analysis(Dataset):
- def __init__(self, path):
- self.dataset_path = path
- super().__init__(path)
+import numpy as np
+import pandas as pd
+
+import dslr_stat
- def describe(self):
- for title in self.df.
- pass
+class Analysis:
+ def __init__(self, df):
+ self.df = df
+ def describe(self):
+ desc_df = pd.DataFrame(
+ dtype=np.float64,
+ columns=[c for c, t in zip(self.df.columns, self.df.dtypes) if t == np.float64],
+ index=['Count', 'Mean', 'Std', 'Min', '25%', '50%', '75%', 'Max']
+ )
+ for col in desc_df.columns:
+ desc_df.loc['Count', col] = len(self.df[col])
+ desc_df.loc['Mean', col] = dslr_stat.mean(self.df[col])
+ desc_df.loc['Std', col] = dslr_stat.std(self.df[col])
+ desc_df.loc['Min', col] = dslr_stat.min(self.df[col])
+ desc_df.loc['25%', col] = dslr_stat.q25(self.df[col])
+ desc_df.loc['50%', col] = dslr_stat.median(self.df[col])
+ desc_df.loc['75%', col] = dslr_stat.q75(self.df[col])
+ desc_df.loc['Max', col] = dslr_stat.max(self.df[col])
+ print(desc_df)