From 5a2fb0139e5797a48afa1ca646a26c09c7a56936 Mon Sep 17 00:00:00 2001 From: Charles Cabergs Date: Sat, 29 Aug 2020 17:49:39 +0200 Subject: Added logreg_predict --- src/logreg_predct.py | 8 -------- src/logreg_predict.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ src/logreg_train.py | 6 +++--- 3 files changed, 47 insertions(+), 11 deletions(-) delete mode 100755 src/logreg_predct.py create mode 100755 src/logreg_predict.py (limited to 'src') diff --git a/src/logreg_predct.py b/src/logreg_predct.py deleted file mode 100755 index 0f09c31..0000000 --- a/src/logreg_predct.py +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/python3 - -import sys - -if __name__ == '__main__': - if len(sys.argv) != 2: - raise 'Usage: {} dataset_path'.path(sys.argv[0]) - diff --git a/src/logreg_predict.py b/src/logreg_predict.py new file mode 100755 index 0000000..9982c5b --- /dev/null +++ b/src/logreg_predict.py @@ -0,0 +1,44 @@ +#!/bin/python3 + +import sys + +import numpy as np + +from dataset import Dataset + + +def sigmoid(x): + return 1.0 / (1.0 * np.exp(-x)) + +def hypothesis(x, theta): + return sigmoid(x.dot(theta)) + +if __name__ == '__main__': + if len(sys.argv) != 3: + raise 'Usage: {} dataset_path weights_path'.format(sys.argv[0]) + + labels = [] + thetas = [] + with open(sys.argv[2]) as f: + for line in f: + label_end = line.find(":") + labels.append(line[:label_end]) + thetas.append(np.array([float(s) for s in line[label_end + 2:].split(",")])) + # print(labels) + # print(thetas) + + d = Dataset(sys.argv[1]) + + X = d.df_scores + X = (X - X.min()) / (X.max() - X.min()) + X = np.hstack([X, np.ones((X.shape[0], 1))]) + + with open("houses.csv", "w") as houses_file: + houses_file.write("Index,Hogwarts House\n") + for i, x in enumerate(X): + hs = [] + for l, t in zip(labels, thetas): + hs.append((l, hypothesis(x, t))) + + predicted, _ = max(hs, key=lambda x: x[1]) + houses_file.write("{},{}\n".format(i, predicted)) diff --git a/src/logreg_train.py b/src/logreg_train.py index e02e101..6254494 100755 --- a/src/logreg_train.py +++ b/src/logreg_train.py @@ -35,7 +35,7 @@ def train(ys, xs): ys_ally = ys.copy() ys_ally[ys == trained] = 0 # opposite? ys_ally[ys != trained] = 1 - thetas.append((trained, gradient_descent(ys_ally, xs, 1, 2))) + thetas.append((trained, gradient_descent(ys_ally, xs, 1, 100))) return thetas if __name__ == '__main__': @@ -51,5 +51,5 @@ if __name__ == '__main__': thetas = train(Y, X) with open("weights", "w") as f: - for name, t in thetas: - f.write("{}: {}\n".format(name, ','.join([str(x) for x in t]))) + for label, t in thetas: + f.write("{}: {}\n".format(label, ','.join([str(x) for x in t]))) -- cgit