aboutsummaryrefslogtreecommitdiff
path: root/src/dataset.py
blob: e9f4b44e983324d4c2df6e9cd05b03b187d36506 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/bin/python3

import sys

import pandas as pd


class Dataset:
    def __init__(self, path):
        self.path = path
        try:
            self.df = pd.read_csv(path)
        except FileNotFoundError:
            raise "Couldn't find dataset at: {}".format(path)
        self.df.drop(columns=['Index'], inplace=True)
        self.df.dropna(axis=1, how="all", inplace=True)
        self.df.dropna(inplace=True)
        self.df.columns = self.df.columns.str.lower()
        self.df.columns = self.df.columns.str.replace(' ', '_')
        self.df.rename(columns={'hogwarts_house': 'house'}, inplace=True)
        self.df.rename(columns={'care_of_magical_creatures': 'magical_creatures'}, inplace=True)
        self.df.rename(columns={'defense_against_the_dark_arts': 'defense_dark_arts'}, inplace=True)

    @property
    def df_scores(self):
        return self.df.loc[:, 'arithmancy':'flying']


if __name__ == "__main__":
    d = Dataset(sys.argv[1])
    print(d.df)