diff options
Diffstat (limited to 'src/dataset.py')
| -rwxr-xr-x[-rw-r--r--] | src/dataset.py | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/src/dataset.py b/src/dataset.py index 650d334..e9f4b44 100644..100755 --- a/src/dataset.py +++ b/src/dataset.py @@ -1,3 +1,7 @@ +#!/bin/python3 + +import sys + import pandas as pd @@ -9,13 +13,19 @@ class Dataset: except FileNotFoundError: raise "Couldn't find dataset at: {}".format(path) self.df.drop(columns=['Index'], inplace=True) + self.df.dropna(axis=1, how="all", inplace=True) self.df.dropna(inplace=True) self.df.columns = self.df.columns.str.lower() self.df.columns = self.df.columns.str.replace(' ', '_') self.df.rename(columns={'hogwarts_house': 'house'}, inplace=True) + self.df.rename(columns={'care_of_magical_creatures': 'magical_creatures'}, inplace=True) + self.df.rename(columns={'defense_against_the_dark_arts': 'defense_dark_arts'}, inplace=True) @property def df_scores(self): return self.df.loc[:, 'arithmancy':'flying'] +if __name__ == "__main__": + d = Dataset(sys.argv[1]) + print(d.df) |
