aboutsummaryrefslogtreecommitdiff
path: root/src/dataset.py
diff options
context:
space:
mode:
authorCharles <sircharlesaze@gmail.com>2020-01-25 14:16:00 +0100
committerCharles <sircharlesaze@gmail.com>2020-01-25 14:16:00 +0100
commit9f04fafe42fd6436bec09696e1bc8b2abc496cc4 (patch)
treeefeba48e71f0053e63578d35204542f61118ff1b /src/dataset.py
parentdea0f4cdec5bdf24962c8ab3ab2a6473e202259a (diff)
downloaddslr-9f04fafe42fd6436bec09696e1bc8b2abc496cc4.tar.gz
dslr-9f04fafe42fd6436bec09696e1bc8b2abc496cc4.tar.bz2
dslr-9f04fafe42fd6436bec09696e1bc8b2abc496cc4.zip
Dataset parent of Analysis, scatter plot and pair_plot dirty scripts
Diffstat (limited to 'src/dataset.py')
-rw-r--r--src/dataset.py21
1 files changed, 21 insertions, 0 deletions
diff --git a/src/dataset.py b/src/dataset.py
new file mode 100644
index 0000000..650d334
--- /dev/null
+++ b/src/dataset.py
@@ -0,0 +1,21 @@
+import pandas as pd
+
+
+class Dataset:
+ def __init__(self, path):
+ self.path = path
+ try:
+ self.df = pd.read_csv(path)
+ except FileNotFoundError:
+ raise "Couldn't find dataset at: {}".format(path)
+ self.df.drop(columns=['Index'], inplace=True)
+ self.df.dropna(inplace=True)
+ self.df.columns = self.df.columns.str.lower()
+ self.df.columns = self.df.columns.str.replace(' ', '_')
+ self.df.rename(columns={'hogwarts_house': 'house'}, inplace=True)
+
+ @property
+ def df_scores(self):
+ return self.df.loc[:, 'arithmancy':'flying']
+
+