aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rwxr-xr-xsrc/logreg_predct.py8
-rwxr-xr-xsrc/logreg_predict.py44
-rwxr-xr-xsrc/logreg_train.py6
3 files changed, 47 insertions, 11 deletions
diff --git a/src/logreg_predct.py b/src/logreg_predct.py
deleted file mode 100755
index 0f09c31..0000000
--- a/src/logreg_predct.py
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/python3
-
-import sys
-
-if __name__ == '__main__':
- if len(sys.argv) != 2:
- raise 'Usage: {} dataset_path'.path(sys.argv[0])
-
diff --git a/src/logreg_predict.py b/src/logreg_predict.py
new file mode 100755
index 0000000..9982c5b
--- /dev/null
+++ b/src/logreg_predict.py
@@ -0,0 +1,44 @@
+#!/bin/python3
+
+import sys
+
+import numpy as np
+
+from dataset import Dataset
+
+
+def sigmoid(x):
+ return 1.0 / (1.0 * np.exp(-x))
+
+def hypothesis(x, theta):
+ return sigmoid(x.dot(theta))
+
+if __name__ == '__main__':
+ if len(sys.argv) != 3:
+ raise 'Usage: {} dataset_path weights_path'.format(sys.argv[0])
+
+ labels = []
+ thetas = []
+ with open(sys.argv[2]) as f:
+ for line in f:
+ label_end = line.find(":")
+ labels.append(line[:label_end])
+ thetas.append(np.array([float(s) for s in line[label_end + 2:].split(",")]))
+ # print(labels)
+ # print(thetas)
+
+ d = Dataset(sys.argv[1])
+
+ X = d.df_scores
+ X = (X - X.min()) / (X.max() - X.min())
+ X = np.hstack([X, np.ones((X.shape[0], 1))])
+
+ with open("houses.csv", "w") as houses_file:
+ houses_file.write("Index,Hogwarts House\n")
+ for i, x in enumerate(X):
+ hs = []
+ for l, t in zip(labels, thetas):
+ hs.append((l, hypothesis(x, t)))
+
+ predicted, _ = max(hs, key=lambda x: x[1])
+ houses_file.write("{},{}\n".format(i, predicted))
diff --git a/src/logreg_train.py b/src/logreg_train.py
index e02e101..6254494 100755
--- a/src/logreg_train.py
+++ b/src/logreg_train.py
@@ -35,7 +35,7 @@ def train(ys, xs):
ys_ally = ys.copy()
ys_ally[ys == trained] = 0 # opposite?
ys_ally[ys != trained] = 1
- thetas.append((trained, gradient_descent(ys_ally, xs, 1, 2)))
+ thetas.append((trained, gradient_descent(ys_ally, xs, 1, 100)))
return thetas
if __name__ == '__main__':
@@ -51,5 +51,5 @@ if __name__ == '__main__':
thetas = train(Y, X)
with open("weights", "w") as f:
- for name, t in thetas:
- f.write("{}: {}\n".format(name, ','.join([str(x) for x in t])))
+ for label, t in thetas:
+ f.write("{}: {}\n".format(label, ','.join([str(x) for x in t])))