aboutsummaryrefslogtreecommitdiff
path: root/src/analysis.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/analysis.py')
-rw-r--r--src/analysis.py49
1 files changed, 0 insertions, 49 deletions
diff --git a/src/analysis.py b/src/analysis.py
deleted file mode 100644
index b6c9eb9..0000000
--- a/src/analysis.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-
-from dataset import Dataset
-import dslr_stat
-
-
-class Analysis(Dataset):
- def __init__(self, path):
- super().__init__(path)
-
- def describe(self):
- desc_df = pd.DataFrame(
- dtype=np.float64,
- columns=[c for c, t in zip(self.df.columns, self.df.dtypes) if t == np.float64],
- index=['Count', 'Mean', 'Std', 'Min', '25%', '50%', '75%', 'Max']
- )
- for col in desc_df.columns:
- desc_df.loc['Count', col] = len(self.df[col])
- desc_df.loc['Mean', col] = dslr_stat.mean(self.df[col])
- desc_df.loc['Std', col] = dslr_stat.std(self.df[col])
- desc_df.loc['Min', col] = dslr_stat.min(self.df[col])
- desc_df.loc['25%', col] = dslr_stat.q25(self.df[col])
- desc_df.loc['50%', col] = dslr_stat.median(self.df[col])
- desc_df.loc['75%', col] = dslr_stat.q75(self.df[col])
- desc_df.loc['Max', col] = dslr_stat.max(self.df[col])
- print(desc_df)
-
- def hist(self):
- pass
-
- def scatter(self):
- plt.scatter(self.df['astronomy'], self.df['defense_against_the_dark_arts'])
- plt.show()
-
- def pair_plot(self):
- scores = self.df_scores
- fig, axis = plt.subplots(nrows=scores.shape[1],
- ncols=scores.shape[1])
- for i, col in enumerate(scores.columns):
- for j, pair_col in enumerate(scores.columns):
- ax = axis[i, j]
- if pair_col == col:
- ax.hist(scores)
- continue
- ax.scatter(scores[col], scores[pair_col])
- plt.tight_layout()
- plt.show()