1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
#!/bin/python3
import sys
import numpy as np
from dataset import Dataset
def sigmoid(x):
return 1.0 / (1.0 * np.exp(-x))
def hypothesis(x, theta):
return sigmoid(x.dot(theta))
if __name__ == '__main__':
if len(sys.argv) != 3:
raise 'Usage: {} dataset_path weights_path'.format(sys.argv[0])
labels = []
thetas = []
with open(sys.argv[2]) as f:
for line in f:
label_end = line.find(":")
labels.append(line[:label_end])
thetas.append(np.array([float(s) for s in line[label_end + 2:].split(",")]))
# print(labels)
# print(thetas)
d = Dataset(sys.argv[1])
X = d.df_scores
X = (X - X.min()) / (X.max() - X.min())
X = np.hstack([X, np.ones((X.shape[0], 1))])
with open("houses.csv", "w") as houses_file:
houses_file.write("Index,Hogwarts House\n")
for i, x in enumerate(X):
hs = []
for l, t in zip(labels, thetas):
hs.append((l, hypothesis(x, t)))
predicted, _ = max(hs, key=lambda x: x[1])
houses_file.write("{},{}\n".format(i, predicted))
|