diff options
| author | Charles Cabergs <me@cacharle.xyz> | 2020-08-29 17:13:57 +0200 |
|---|---|---|
| committer | Charles Cabergs <me@cacharle.xyz> | 2020-08-29 17:13:57 +0200 |
| commit | a1c7fe1b6d95e2560e62c12453da287d36d4a714 (patch) | |
| tree | 72ce0f3a7dcf3627aba0b019665a23dcaec9f4f5 /src/dataset.py | |
| parent | c766a4481526215057cac928d09d62319f290fe4 (diff) | |
| download | dslr-a1c7fe1b6d95e2560e62c12453da287d36d4a714.tar.gz dslr-a1c7fe1b6d95e2560e62c12453da287d36d4a714.tar.bz2 dslr-a1c7fe1b6d95e2560e62c12453da287d36d4a714.zip | |
Removed bloat, Added logreg_train
Diffstat (limited to 'src/dataset.py')
| -rwxr-xr-x[-rw-r--r--] | src/dataset.py | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/src/dataset.py b/src/dataset.py index 650d334..e9f4b44 100644..100755 --- a/src/dataset.py +++ b/src/dataset.py @@ -1,3 +1,7 @@ +#!/bin/python3 + +import sys + import pandas as pd @@ -9,13 +13,19 @@ class Dataset: except FileNotFoundError: raise "Couldn't find dataset at: {}".format(path) self.df.drop(columns=['Index'], inplace=True) + self.df.dropna(axis=1, how="all", inplace=True) self.df.dropna(inplace=True) self.df.columns = self.df.columns.str.lower() self.df.columns = self.df.columns.str.replace(' ', '_') self.df.rename(columns={'hogwarts_house': 'house'}, inplace=True) + self.df.rename(columns={'care_of_magical_creatures': 'magical_creatures'}, inplace=True) + self.df.rename(columns={'defense_against_the_dark_arts': 'defense_dark_arts'}, inplace=True) @property def df_scores(self): return self.df.loc[:, 'arithmancy':'flying'] +if __name__ == "__main__": + d = Dataset(sys.argv[1]) + print(d.df) |
