aboutsummaryrefslogtreecommitdiff
path: root/src/histogram.py
diff options
context:
space:
mode:
authorCharles Cabergs <me@cacharle.xyz>2020-08-29 17:13:57 +0200
committerCharles Cabergs <me@cacharle.xyz>2020-08-29 17:13:57 +0200
commita1c7fe1b6d95e2560e62c12453da287d36d4a714 (patch)
tree72ce0f3a7dcf3627aba0b019665a23dcaec9f4f5 /src/histogram.py
parentc766a4481526215057cac928d09d62319f290fe4 (diff)
downloaddslr-a1c7fe1b6d95e2560e62c12453da287d36d4a714.tar.gz
dslr-a1c7fe1b6d95e2560e62c12453da287d36d4a714.tar.bz2
dslr-a1c7fe1b6d95e2560e62c12453da287d36d4a714.zip
Removed bloat, Added logreg_train
Diffstat (limited to 'src/histogram.py')
-rwxr-xr-xsrc/histogram.py28
1 files changed, 28 insertions, 0 deletions
diff --git a/src/histogram.py b/src/histogram.py
new file mode 100755
index 0000000..1200233
--- /dev/null
+++ b/src/histogram.py
@@ -0,0 +1,28 @@
+#!/bin/python3
+
+import sys
+
+import matplotlib.pyplot as plt
+
+from dataset import Dataset
+
+
+def house_hist(ax, d, house_name):
+ h = d.df[d.df["house"] == house_name]
+ scores = h.loc[:, "arithmancy":"flying"]
+ x = (scores - scores.min()) / (scores.max() - scores.min())
+ ax.hist(x.values.flatten(), bins=40, rwidth=0.8)
+ ax.set_title(house_name)
+
+if __name__ == "__main__":
+ if len(sys.argv) != 2:
+ raise "Usage: {} dataset_path".format(sys.argv[0])
+ d = Dataset(sys.argv[1])
+
+ fig, axs = plt.subplots(2, 2, sharey=True, tight_layout=True)
+ house_hist(axs[0][0], d, "Gryffindor")
+ house_hist(axs[0][1], d, "Slytherin")
+ house_hist(axs[1][0], d, "Ravenclaw")
+ house_hist(axs[1][1], d, "Hufflepuff")
+ plt.show()
+