diff options
| author | Charles <sircharlesaze@gmail.com> | 2020-01-25 14:16:00 +0100 |
|---|---|---|
| committer | Charles <sircharlesaze@gmail.com> | 2020-01-25 14:16:00 +0100 |
| commit | 9f04fafe42fd6436bec09696e1bc8b2abc496cc4 (patch) | |
| tree | efeba48e71f0053e63578d35204542f61118ff1b /src/dataset.py | |
| parent | dea0f4cdec5bdf24962c8ab3ab2a6473e202259a (diff) | |
| download | dslr-9f04fafe42fd6436bec09696e1bc8b2abc496cc4.tar.gz dslr-9f04fafe42fd6436bec09696e1bc8b2abc496cc4.tar.bz2 dslr-9f04fafe42fd6436bec09696e1bc8b2abc496cc4.zip | |
Dataset parent of Analysis, scatter plot and pair_plot dirty scripts
Diffstat (limited to 'src/dataset.py')
| -rw-r--r-- | src/dataset.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/src/dataset.py b/src/dataset.py new file mode 100644 index 0000000..650d334 --- /dev/null +++ b/src/dataset.py @@ -0,0 +1,21 @@ +import pandas as pd + + +class Dataset: + def __init__(self, path): + self.path = path + try: + self.df = pd.read_csv(path) + except FileNotFoundError: + raise "Couldn't find dataset at: {}".format(path) + self.df.drop(columns=['Index'], inplace=True) + self.df.dropna(inplace=True) + self.df.columns = self.df.columns.str.lower() + self.df.columns = self.df.columns.str.replace(' ', '_') + self.df.rename(columns={'hogwarts_house': 'house'}, inplace=True) + + @property + def df_scores(self): + return self.df.loc[:, 'arithmancy':'flying'] + + |
