From 9f04fafe42fd6436bec09696e1bc8b2abc496cc4 Mon Sep 17 00:00:00 2001 From: Charles Date: Sat, 25 Jan 2020 14:16:00 +0100 Subject: Dataset parent of Analysis, scatter plot and pair_plot dirty scripts --- src/dataset.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 src/dataset.py (limited to 'src/dataset.py') diff --git a/src/dataset.py b/src/dataset.py new file mode 100644 index 0000000..650d334 --- /dev/null +++ b/src/dataset.py @@ -0,0 +1,21 @@ +import pandas as pd + + +class Dataset: + def __init__(self, path): + self.path = path + try: + self.df = pd.read_csv(path) + except FileNotFoundError: + raise "Couldn't find dataset at: {}".format(path) + self.df.drop(columns=['Index'], inplace=True) + self.df.dropna(inplace=True) + self.df.columns = self.df.columns.str.lower() + self.df.columns = self.df.columns.str.replace(' ', '_') + self.df.rename(columns={'hogwarts_house': 'house'}, inplace=True) + + @property + def df_scores(self): + return self.df.loc[:, 'arithmancy':'flying'] + + -- cgit