diff options
| author | Charles Cabergs <me@cacharle.xyz> | 2020-08-29 17:13:57 +0200 |
|---|---|---|
| committer | Charles Cabergs <me@cacharle.xyz> | 2020-08-29 17:13:57 +0200 |
| commit | a1c7fe1b6d95e2560e62c12453da287d36d4a714 (patch) | |
| tree | 72ce0f3a7dcf3627aba0b019665a23dcaec9f4f5 /src/analysis.py | |
| parent | c766a4481526215057cac928d09d62319f290fe4 (diff) | |
| download | dslr-a1c7fe1b6d95e2560e62c12453da287d36d4a714.tar.gz dslr-a1c7fe1b6d95e2560e62c12453da287d36d4a714.tar.bz2 dslr-a1c7fe1b6d95e2560e62c12453da287d36d4a714.zip | |
Removed bloat, Added logreg_train
Diffstat (limited to 'src/analysis.py')
| -rw-r--r-- | src/analysis.py | 49 |
1 files changed, 0 insertions, 49 deletions
diff --git a/src/analysis.py b/src/analysis.py deleted file mode 100644 index b6c9eb9..0000000 --- a/src/analysis.py +++ /dev/null @@ -1,49 +0,0 @@ -import numpy as np -import pandas as pd -import matplotlib.pyplot as plt - -from dataset import Dataset -import dslr_stat - - -class Analysis(Dataset): - def __init__(self, path): - super().__init__(path) - - def describe(self): - desc_df = pd.DataFrame( - dtype=np.float64, - columns=[c for c, t in zip(self.df.columns, self.df.dtypes) if t == np.float64], - index=['Count', 'Mean', 'Std', 'Min', '25%', '50%', '75%', 'Max'] - ) - for col in desc_df.columns: - desc_df.loc['Count', col] = len(self.df[col]) - desc_df.loc['Mean', col] = dslr_stat.mean(self.df[col]) - desc_df.loc['Std', col] = dslr_stat.std(self.df[col]) - desc_df.loc['Min', col] = dslr_stat.min(self.df[col]) - desc_df.loc['25%', col] = dslr_stat.q25(self.df[col]) - desc_df.loc['50%', col] = dslr_stat.median(self.df[col]) - desc_df.loc['75%', col] = dslr_stat.q75(self.df[col]) - desc_df.loc['Max', col] = dslr_stat.max(self.df[col]) - print(desc_df) - - def hist(self): - pass - - def scatter(self): - plt.scatter(self.df['astronomy'], self.df['defense_against_the_dark_arts']) - plt.show() - - def pair_plot(self): - scores = self.df_scores - fig, axis = plt.subplots(nrows=scores.shape[1], - ncols=scores.shape[1]) - for i, col in enumerate(scores.columns): - for j, pair_col in enumerate(scores.columns): - ax = axis[i, j] - if pair_col == col: - ax.hist(scores) - continue - ax.scatter(scores[col], scores[pair_col]) - plt.tight_layout() - plt.show() |
