aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/logreg_predct.py11
-rw-r--r--src/logreg_train.py11
-rw-r--r--src/model.py54
3 files changed, 76 insertions, 0 deletions
diff --git a/src/logreg_predct.py b/src/logreg_predct.py
new file mode 100644
index 0000000..ae416b0
--- /dev/null
+++ b/src/logreg_predct.py
@@ -0,0 +1,11 @@
+import sys
+
+from model import Model
+
+
+if __name__ == '__main__':
+ if len(sys.argv) != 3:
+ raise 'Usage: {} dataset_path weights_path'.path(*sys.argv[1:])
+ m = Model()
+ m.predict()
+
diff --git a/src/logreg_train.py b/src/logreg_train.py
new file mode 100644
index 0000000..8bc9a25
--- /dev/null
+++ b/src/logreg_train.py
@@ -0,0 +1,11 @@
+import sys
+
+from model import Model
+
+
+if __name__ == '__main__':
+ if len(sys.argv) != 2:
+ raise 'Usage: {} dataset_path'.format(sys.argv[0])
+ m = Model()
+ m.train()
+ # write
diff --git a/src/model.py b/src/model.py
index e69de29..f777b16 100644
--- a/src/model.py
+++ b/src/model.py
@@ -0,0 +1,54 @@
+class Model:
+ def __init__(self, weights_filename='weights'):
+ self.weights_filename = weights_filename
+
+ def train(self, xs, ys, alpha=1, epoch=1000):
+ for _ in range(epoch):
+ theta = theta - alpha * self.gradient(xs, ys)
+
+ def gradient(self, xs, ys):
+ return np.array([self.partial(xs, ys, i) for i in range(len(self.theta))])
+
+ def partial(self, xs, ys, theta_j):
+ total = 0
+ for x_i, y_i in zip(xs, ys):
+ temp = self.hypothesis(x_i) - y_i
+ if theta_j != 0:
+ temp *= x_i[theta_j - 1]
+ total += temp
+ return total / len(xs)
+
+ def hypothesis(self, x):
+ return 1 if self._sigmoid(x.dot(self.theta)) >= 0.5 else 0
+
+ def logloss(self, x, y):
+ if y == 1:
+ return -np.ln(self.hypothesis(x))
+ elif y == 0:
+ return -np.ln(1 - self.hypothesis(x))
+ else:
+ raise "y != 1 and y != 0"
+
+ def cost(self, xs, ys):
+ return sum([self.logloss(x, y) for x, y in zip(xs, ys)]) / len(xs)
+
+ def _sigmoid(self, x):
+ return 1 / (1 + np.exp(-x))
+
+ def _normalize(self, x):
+ return (x - x.min()) / (x.max() - x.min())
+
+ def _read_weights(self):
+ try:
+ with open(self.weights_filename, 'r') as file:
+ self.weights = np.array(
+ [float(s) for s in file.read().strip().split(',')])
+ except IOError:
+ raise 'Couldn\'t read weights file at: {}'.format(self.weights_filename)
+
+ def _write_weights(self):
+ try:
+ with open(self.weights_filename, 'w') as file:
+ file.write(','.join([str(w) for w in self.weights])
+ except IOError:
+ raise 'Couldn\'t write weights file at: {}'.format(self.weights_filename)