aboutsummaryrefslogtreecommitdiff
path: root/src/dataset.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/dataset.py')
-rwxr-xr-x[-rw-r--r--]src/dataset.py10
1 files changed, 10 insertions, 0 deletions
diff --git a/src/dataset.py b/src/dataset.py
index 650d334..e9f4b44 100644..100755
--- a/src/dataset.py
+++ b/src/dataset.py
@@ -1,3 +1,7 @@
+#!/bin/python3
+
+import sys
+
import pandas as pd
@@ -9,13 +13,19 @@ class Dataset:
except FileNotFoundError:
raise "Couldn't find dataset at: {}".format(path)
self.df.drop(columns=['Index'], inplace=True)
+ self.df.dropna(axis=1, how="all", inplace=True)
self.df.dropna(inplace=True)
self.df.columns = self.df.columns.str.lower()
self.df.columns = self.df.columns.str.replace(' ', '_')
self.df.rename(columns={'hogwarts_house': 'house'}, inplace=True)
+ self.df.rename(columns={'care_of_magical_creatures': 'magical_creatures'}, inplace=True)
+ self.df.rename(columns={'defense_against_the_dark_arts': 'defense_dark_arts'}, inplace=True)
@property
def df_scores(self):
return self.df.loc[:, 'arithmancy':'flying']
+if __name__ == "__main__":
+ d = Dataset(sys.argv[1])
+ print(d.df)