diff options
Diffstat (limited to 'src/logreg_predict.py')
| -rwxr-xr-x | src/logreg_predict.py | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/src/logreg_predict.py b/src/logreg_predict.py new file mode 100755 index 0000000..9982c5b --- /dev/null +++ b/src/logreg_predict.py @@ -0,0 +1,44 @@ +#!/bin/python3 + +import sys + +import numpy as np + +from dataset import Dataset + + +def sigmoid(x): + return 1.0 / (1.0 * np.exp(-x)) + +def hypothesis(x, theta): + return sigmoid(x.dot(theta)) + +if __name__ == '__main__': + if len(sys.argv) != 3: + raise 'Usage: {} dataset_path weights_path'.format(sys.argv[0]) + + labels = [] + thetas = [] + with open(sys.argv[2]) as f: + for line in f: + label_end = line.find(":") + labels.append(line[:label_end]) + thetas.append(np.array([float(s) for s in line[label_end + 2:].split(",")])) + # print(labels) + # print(thetas) + + d = Dataset(sys.argv[1]) + + X = d.df_scores + X = (X - X.min()) / (X.max() - X.min()) + X = np.hstack([X, np.ones((X.shape[0], 1))]) + + with open("houses.csv", "w") as houses_file: + houses_file.write("Index,Hogwarts House\n") + for i, x in enumerate(X): + hs = [] + for l, t in zip(labels, thetas): + hs.append((l, hypothesis(x, t))) + + predicted, _ = max(hs, key=lambda x: x[1]) + houses_file.write("{},{}\n".format(i, predicted)) |
