From a1c7fe1b6d95e2560e62c12453da287d36d4a714 Mon Sep 17 00:00:00 2001 From: Charles Cabergs Date: Sat, 29 Aug 2020 17:13:57 +0200 Subject: Removed bloat, Added logreg_train --- src/dataset.py | 10 ++++++++++ 1 file changed, 10 insertions(+) mode change 100644 => 100755 src/dataset.py (limited to 'src/dataset.py') diff --git a/src/dataset.py b/src/dataset.py old mode 100644 new mode 100755 index 650d334..e9f4b44 --- a/src/dataset.py +++ b/src/dataset.py @@ -1,3 +1,7 @@ +#!/bin/python3 + +import sys + import pandas as pd @@ -9,13 +13,19 @@ class Dataset: except FileNotFoundError: raise "Couldn't find dataset at: {}".format(path) self.df.drop(columns=['Index'], inplace=True) + self.df.dropna(axis=1, how="all", inplace=True) self.df.dropna(inplace=True) self.df.columns = self.df.columns.str.lower() self.df.columns = self.df.columns.str.replace(' ', '_') self.df.rename(columns={'hogwarts_house': 'house'}, inplace=True) + self.df.rename(columns={'care_of_magical_creatures': 'magical_creatures'}, inplace=True) + self.df.rename(columns={'defense_against_the_dark_arts': 'defense_dark_arts'}, inplace=True) @property def df_scores(self): return self.df.loc[:, 'arithmancy':'flying'] +if __name__ == "__main__": + d = Dataset(sys.argv[1]) + print(d.df) -- cgit