diff options
| author | Charles Cabergs <me@cacharle.xyz> | 2020-08-29 17:49:39 +0200 |
|---|---|---|
| committer | Charles Cabergs <me@cacharle.xyz> | 2020-08-29 17:52:47 +0200 |
| commit | 5a2fb0139e5797a48afa1ca646a26c09c7a56936 (patch) | |
| tree | 516a288bb53064bf212d7a4e03663b479ed43442 /src/logreg_predict.py | |
| parent | a1c7fe1b6d95e2560e62c12453da287d36d4a714 (diff) | |
| download | dslr-5a2fb0139e5797a48afa1ca646a26c09c7a56936.tar.gz dslr-5a2fb0139e5797a48afa1ca646a26c09c7a56936.tar.bz2 dslr-5a2fb0139e5797a48afa1ca646a26c09c7a56936.zip | |
Diffstat (limited to 'src/logreg_predict.py')
| -rwxr-xr-x | src/logreg_predict.py | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/src/logreg_predict.py b/src/logreg_predict.py new file mode 100755 index 0000000..9982c5b --- /dev/null +++ b/src/logreg_predict.py @@ -0,0 +1,44 @@ +#!/bin/python3 + +import sys + +import numpy as np + +from dataset import Dataset + + +def sigmoid(x): + return 1.0 / (1.0 * np.exp(-x)) + +def hypothesis(x, theta): + return sigmoid(x.dot(theta)) + +if __name__ == '__main__': + if len(sys.argv) != 3: + raise 'Usage: {} dataset_path weights_path'.format(sys.argv[0]) + + labels = [] + thetas = [] + with open(sys.argv[2]) as f: + for line in f: + label_end = line.find(":") + labels.append(line[:label_end]) + thetas.append(np.array([float(s) for s in line[label_end + 2:].split(",")])) + # print(labels) + # print(thetas) + + d = Dataset(sys.argv[1]) + + X = d.df_scores + X = (X - X.min()) / (X.max() - X.min()) + X = np.hstack([X, np.ones((X.shape[0], 1))]) + + with open("houses.csv", "w") as houses_file: + houses_file.write("Index,Hogwarts House\n") + for i, x in enumerate(X): + hs = [] + for l, t in zip(labels, thetas): + hs.append((l, hypothesis(x, t))) + + predicted, _ = max(hs, key=lambda x: x[1]) + houses_file.write("{},{}\n".format(i, predicted)) |
