aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles <sircharlesaze@gmail.com>2020-06-25 18:17:41 +0200
committerCharles <sircharlesaze@gmail.com>2020-06-25 18:17:41 +0200
commit063afb1650160c8973de51603be0acca53b39e5d (patch)
tree4e7b41397bba9a048b32f26bf3259b266c3da513
downloadhuffman-063afb1650160c8973de51603be0acca53b39e5d.tar.gz
huffman-063afb1650160c8973de51603be0acca53b39e5d.tar.bz2
huffman-063afb1650160c8973de51603be0acca53b39e5d.zip
Initial commit: Rewrite in Rust
-rw-r--r--.gitignore2
-rw-r--r--Cargo.lock5
-rw-r--r--Cargo.toml9
-rw-r--r--README.md9
-rwxr-xr-xdraft.py112
-rw-r--r--src/main.rs12
-rw-r--r--src/symbols.rs122
-rw-r--r--src/tree.rs22
8 files changed, 293 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..eed3cc5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/target
+*.txt
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..e3b5bc6
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,5 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+[[package]]
+name = "huffman"
+version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..192f14f
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "huffman"
+version = "0.1.0"
+authors = ["Charles <sircharlesaze@gmail.com>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..74ef205
--- /dev/null
+++ b/README.md
@@ -0,0 +1,9 @@
+# huffman
+
+Implementation of [Huffman coding](https://en.wikipedia.org/wiki/Huffman_coding?oldformat=true) made for educational purposes.
+
+## Usage
+
+`python3 draft.py [file_name]` to run the python draft.
+
+`cargo run [file_name]` for the real thing.
diff --git a/draft.py b/draft.py
new file mode 100755
index 0000000..f822362
--- /dev/null
+++ b/draft.py
@@ -0,0 +1,112 @@
+#!/usr/bin/python3
+
+import sys
+import heapq
+import collections
+import copy
+
+
+class BitRepr:
+ def __init__(self, x: int, length: int):
+ self.x = x
+ self.length = length
+
+ def add_zero(self):
+ self.x = self.x << 1
+ self.length += 1
+
+ def add_one(self):
+ self.x = self.x << 1
+ self.length += 1
+ self.x = self.x | 1
+
+ def __repr__(self):
+ return "{0:b}".format(self.x).zfill(self.length)
+
+
+class HuffmanNode:
+ def __init__(
+ self, occurence: int, is_leaf: bool,
+ symbol: int = None, left: 'HuffmanNode' = None, right: 'HuffmanNode' = None
+ ):
+ self.occurence = occurence
+ self.is_leaf = is_leaf
+ if is_leaf:
+ self.symbol = symbol
+ else:
+ self.left = left
+ self.right = right
+
+ @staticmethod
+ def leaf(occurence: int, symbol: int):
+ return HuffmanNode(occurence, is_leaf=True, symbol=symbol)
+
+ @staticmethod
+ def node(left: 'HuffmanNode', right: 'HuffmanNode'):
+ return HuffmanNode(left.occurence + right.occurence, is_leaf=False, left=left, right=right)
+
+ def __gt__(self, other: 'HuffmanNode') -> bool:
+ return self.occurence > other.occurence
+
+ def __lt__(self, other: 'HuffmanNode') -> bool:
+ return self.occurence < other.occurence
+
+ def __repr__(self) -> str:
+ if self.is_leaf:
+ return f"({self.symbol} {self.occurence})"
+ else:
+ return f"(NODE {self.occurence})" # left {str(self.left)} right {str(self.right)})"
+
+ def put_tree(self, level: int = 0):
+ if self.is_leaf:
+ print(f"({self.symbol} {self.occurence})")
+ return
+ print(f"NODE {self.occurence}")
+ print(" " * level * 4, "left ", end="")
+ self.left.put_tree(level + 1)
+ print(" " * level * 4, "right ", end="")
+ self.right.put_tree(level + 1)
+
+ def to_bit_repr(self, parent_bits: 'BitRepr' = BitRepr(0, 0)) -> {int: 'BitRepr'}:
+ if self.is_leaf:
+ return {self.symbol: parent_bits}
+ left_bits = copy.deepcopy(parent_bits)
+ left_bits.add_zero()
+ right_bits = copy.deepcopy(parent_bits)
+ right_bits.add_one()
+ return {
+ **self.left.to_bit_repr(left_bits),
+ **self.right.to_bit_repr(right_bits)
+ }
+
+if __name__ == "__main__":
+ if len(sys.argv) != 2:
+ print("missing file name")
+ sys.exit(1)
+
+ file_name = sys.argv[1]
+ # print(file_name)
+
+ content = None
+ with open(file_name, "rb") as f:
+ content = f.read()
+ counter = collections.Counter(content)
+
+ huffman_nodes = [HuffmanNode.leaf(v, k) for k, v in dict(counter).items()]
+ heapq.heapify(huffman_nodes)
+
+ while len(huffman_nodes) >= 2:
+ # print(huffman_nodes)
+ left = heapq.heappop(huffman_nodes)
+ right = heapq.heappop(huffman_nodes)
+ node = HuffmanNode.node(left, right)
+ heapq.heappush(huffman_nodes, node)
+ # print(huffman_nodes)
+
+ huffman_tree = huffman_nodes[0]
+ huffman_tree.put_tree()
+ bit_repr = huffman_tree.to_bit_repr()
+ print(bit_repr)
+
+ huffman_content = ''.join([str(bit_repr[s]) for s in content])
+ print(huffman_content)
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..2b4e6c5
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,12 @@
+use std::env;
+
+pub mod symbols;
+// pub mod tree;
+//
+use symbols::Tree;
+
+fn main() {
+ let file_path = env::args().nth(1).unwrap();
+ let tree = Tree::from_file(&file_path).unwrap();
+ tree.put();
+}
diff --git a/src/symbols.rs b/src/symbols.rs
new file mode 100644
index 0000000..cd8fd52
--- /dev/null
+++ b/src/symbols.rs
@@ -0,0 +1,122 @@
+use std::fs;
+use std::io;
+use std::io::Read;
+use std::collections::{BinaryHeap, HashMap};
+
+pub struct Node {
+ occurences: usize,
+ content: Content,
+}
+
+impl Node {
+ fn join(left: Node, right: Node) -> Node {
+ Node {
+ occurences: left.occurences + right.occurences,
+ content: Content::Parent {
+ left: Box::new(left),
+ right: Box::new(right),
+ }
+ }
+ }
+}
+
+enum Content {
+ Leaf(u8),
+ Parent {
+ left: Box<Node>,
+ right: Box<Node>,
+ },
+}
+
+pub type Tree = Node;
+
+impl Tree {
+ pub fn from_file(file_path: &str) -> io::Result<Tree> {
+ let mut symbols = Symbols::from_file(file_path)?;
+ Ok(Tree::from_symbols(&mut symbols))
+ }
+
+ fn from_symbols(symbols: &mut Symbols) -> Tree {
+ while symbols.0.len() >= 2 {
+ let first = symbols.0.pop().unwrap();
+ let second = symbols.0.pop().unwrap();
+ let n = Node::join(first, second);
+ symbols.0.push(n);
+ }
+ let ret = symbols.0.pop().unwrap();
+ ret
+ }
+
+ pub fn put(&self) {
+ self.put_with_level(0);
+ }
+
+ fn put_with_level(&self, level: usize) {
+ match &self.content {
+ Content::Leaf(b) => {
+ // Tree::put_spaces(level);
+ print!("{} {}\n", self.occurences, b);
+ },
+ Content::Parent { left, right } => {
+ print!("NODE {}\n", self.occurences);
+ Tree::put_spaces(level);
+ print!("left: ");
+ left.put_with_level(level + 1);
+ Tree::put_spaces(level);
+ print!("right: ");
+ right.put_with_level(level + 1);
+ },
+ }
+ }
+
+ fn put_spaces(n: usize) {
+ for _ in 0..(n * 2) {
+ print!(" ");
+ }
+ }
+}
+
+struct Symbols(BinaryHeap<Node>);
+
+impl Symbols {
+ fn from_file(file_path: &str) -> io::Result<Symbols> {
+ let f = fs::File::open(file_path)?;
+ let mut counter: HashMap<u8, usize> = HashMap::new();
+ for b in f.bytes() {
+ let k = b?;
+ let v = match counter.get(&k) {
+ Some(count) => count + 1,
+ None => 1,
+ };
+ counter.insert(k, v);
+ }
+ let heap: BinaryHeap<Node> = counter
+ .iter()
+ .map(|(k, v)| Node { occurences: *v, content: Content::Leaf(*k) })
+ .collect();
+ Ok(Symbols(heap))
+ }
+}
+
+use std::cmp::Ordering;
+
+impl Ord for Node {
+ fn cmp(&self, other: &Self) -> Ordering {
+ // self.occurences.cmp(&other.occurences)
+ other.occurences.cmp(&self.occurences) // dirty hack for min-heap
+ }
+}
+
+impl PartialOrd for Node {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Eq for Node {}
+
+impl PartialEq for Node {
+ fn eq(&self, other: &Self) -> bool {
+ self.occurences == other.occurences
+ }
+}
diff --git a/src/tree.rs b/src/tree.rs
new file mode 100644
index 0000000..d3cca22
--- /dev/null
+++ b/src/tree.rs
@@ -0,0 +1,22 @@
+use super::symbols::Symbols;
+
+struct Node {
+ occurence: usize,
+ content: Content,
+}
+
+enum Content {
+ Leaf(i8),
+ Parent {
+ left: Box<Node>,
+ right: Box<Node>,
+ },
+}
+
+type Tree = Node;
+
+// impl Tree {
+// fn from_symbols(symbols: Symbols) -> Tree {
+// symbols.0
+// }
+// }