From 2deef78aa7138e41dae0c7ffec93fc33e4e2fc51 Mon Sep 17 00:00:00 2001 From: Charles Date: Sat, 25 Jan 2020 18:44:25 +0100 Subject: model template logreg_train and logreg_predict call model --- src/logreg_predct.py | 11 +++++++++++ src/logreg_train.py | 11 +++++++++++ src/model.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+) create mode 100644 src/logreg_predct.py create mode 100644 src/logreg_train.py (limited to 'src') diff --git a/src/logreg_predct.py b/src/logreg_predct.py new file mode 100644 index 0000000..ae416b0 --- /dev/null +++ b/src/logreg_predct.py @@ -0,0 +1,11 @@ +import sys + +from model import Model + + +if __name__ == '__main__': + if len(sys.argv) != 3: + raise 'Usage: {} dataset_path weights_path'.path(*sys.argv[1:]) + m = Model() + m.predict() + diff --git a/src/logreg_train.py b/src/logreg_train.py new file mode 100644 index 0000000..8bc9a25 --- /dev/null +++ b/src/logreg_train.py @@ -0,0 +1,11 @@ +import sys + +from model import Model + + +if __name__ == '__main__': + if len(sys.argv) != 2: + raise 'Usage: {} dataset_path'.format(sys.argv[0]) + m = Model() + m.train() + # write diff --git a/src/model.py b/src/model.py index e69de29..f777b16 100644 --- a/src/model.py +++ b/src/model.py @@ -0,0 +1,54 @@ +class Model: + def __init__(self, weights_filename='weights'): + self.weights_filename = weights_filename + + def train(self, xs, ys, alpha=1, epoch=1000): + for _ in range(epoch): + theta = theta - alpha * self.gradient(xs, ys) + + def gradient(self, xs, ys): + return np.array([self.partial(xs, ys, i) for i in range(len(self.theta))]) + + def partial(self, xs, ys, theta_j): + total = 0 + for x_i, y_i in zip(xs, ys): + temp = self.hypothesis(x_i) - y_i + if theta_j != 0: + temp *= x_i[theta_j - 1] + total += temp + return total / len(xs) + + def hypothesis(self, x): + return 1 if self._sigmoid(x.dot(self.theta)) >= 0.5 else 0 + + def logloss(self, x, y): + if y == 1: + return -np.ln(self.hypothesis(x)) + elif y == 0: + return -np.ln(1 - self.hypothesis(x)) + else: + raise "y != 1 and y != 0" + + def cost(self, xs, ys): + return sum([self.logloss(x, y) for x, y in zip(xs, ys)]) / len(xs) + + def _sigmoid(self, x): + return 1 / (1 + np.exp(-x)) + + def _normalize(self, x): + return (x - x.min()) / (x.max() - x.min()) + + def _read_weights(self): + try: + with open(self.weights_filename, 'r') as file: + self.weights = np.array( + [float(s) for s in file.read().strip().split(',')]) + except IOError: + raise 'Couldn\'t read weights file at: {}'.format(self.weights_filename) + + def _write_weights(self): + try: + with open(self.weights_filename, 'w') as file: + file.write(','.join([str(w) for w in self.weights]) + except IOError: + raise 'Couldn\'t write weights file at: {}'.format(self.weights_filename) -- cgit