diff options
| author | Charles <sircharlesaze@gmail.com> | 2020-06-26 08:36:42 +0200 |
|---|---|---|
| committer | Charles <sircharlesaze@gmail.com> | 2020-06-26 08:36:42 +0200 |
| commit | c4184968ec1cf9bcf8a9305ce858d5a56d34468d (patch) | |
| tree | a7bd0aa2a1797222bac61c251911f7451b78c34d | |
| parent | 063afb1650160c8973de51603be0acca53b39e5d (diff) | |
| download | huffman-c4184968ec1cf9bcf8a9305ce858d5a56d34468d.tar.gz huffman-c4184968ec1cf9bcf8a9305ce858d5a56d34468d.tar.bz2 huffman-c4184968ec1cf9bcf8a9305ce858d5a56d34468d.zip | |
Added tree to bits
| -rw-r--r-- | src/bits.rs | 68 | ||||
| -rw-r--r-- | src/main.rs | 28 | ||||
| -rw-r--r-- | src/symbols.rs | 122 | ||||
| -rw-r--r-- | src/tree.rs | 126 |
4 files changed, 206 insertions, 138 deletions
diff --git a/src/bits.rs b/src/bits.rs new file mode 100644 index 0000000..b479cc9 --- /dev/null +++ b/src/bits.rs @@ -0,0 +1,68 @@ +#[derive(Clone)] +pub struct BitSet { + data: Vec<u8>, + len: usize, +} + +impl BitSet { + pub fn new() -> BitSet { + BitSet { data: Vec::new(), len: 0 } + } + + pub fn push_front_bit(&mut self, bit: u8) { + if bit != 0 && bit != 1 { + panic!("bit should be 1 or 0"); + } + *self >>= 1; + self.data[0] |= bit << 7; + } + + fn shift_right_once(&mut self) { + if self.len == self.data.len() * 8 { + self.data.push(0); + } + for i in (1..self.data.len()).rev() { + self.data[i] >>= 1; + self.data[i] |= (self.data[i - 1] & 1) << 7; + } + self.data[0] >>= 1; + self.len += 1; + } + + pub fn concat(&mut self, other: &BitSet) { + if self.len % 8 == 0 { + self.data.extend(other.data.iter()); + self.len += other.len; + return; + } + + let mut other_mut = other.clone(); + let len_pre_shift = other_mut.len; + other_mut >>= self.len % 8; + *self.data.last_mut().unwrap() |= other_mut.data[0]; + self.data.extend(other_mut.data.iter().skip(1)); + self.len += len_pre_shift; + } +} + +use std::ops; + +impl ops::ShrAssign<usize> for BitSet { + fn shr_assign(&mut self, shift_size: usize) { + for _ in 0..shift_size { + self.shift_right_once(); + } + } +} + +use std::fmt; + +impl fmt::Debug for BitSet { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "len: {:2} data: ", self.len)?; + for chunk in &self.data { + write!(f, "{:08b} ", chunk)?; + } + Ok(()) + } +} diff --git a/src/main.rs b/src/main.rs index 2b4e6c5..920aee2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,12 +1,30 @@ use std::env; +use std::fs; +use std::io::Read; -pub mod symbols; -// pub mod tree; -// -use symbols::Tree; +pub mod tree; +pub mod bits; + +use tree::Tree; +use bits::BitSet; fn main() { let file_path = env::args().nth(1).unwrap(); - let tree = Tree::from_file(&file_path).unwrap(); + + let f = fs::File::open(file_path).unwrap(); + let data: Vec<u8> = f.bytes().map(|x| x.unwrap()).collect(); + + let tree = Tree::from_data(&data); tree.put(); + + let bitmap = tree.to_bit_map(); + for (k, v) in bitmap.iter() { + println!("{:4} {:?}", format!("{:?}", *k as char), v); + } + + let mut bitset = BitSet::new(); + for byte in data { + bitset.concat(&bitmap[&byte]); + } + println!("{:?}", bitset); } diff --git a/src/symbols.rs b/src/symbols.rs deleted file mode 100644 index cd8fd52..0000000 --- a/src/symbols.rs +++ /dev/null @@ -1,122 +0,0 @@ -use std::fs; -use std::io; -use std::io::Read; -use std::collections::{BinaryHeap, HashMap}; - -pub struct Node { - occurences: usize, - content: Content, -} - -impl Node { - fn join(left: Node, right: Node) -> Node { - Node { - occurences: left.occurences + right.occurences, - content: Content::Parent { - left: Box::new(left), - right: Box::new(right), - } - } - } -} - -enum Content { - Leaf(u8), - Parent { - left: Box<Node>, - right: Box<Node>, - }, -} - -pub type Tree = Node; - -impl Tree { - pub fn from_file(file_path: &str) -> io::Result<Tree> { - let mut symbols = Symbols::from_file(file_path)?; - Ok(Tree::from_symbols(&mut symbols)) - } - - fn from_symbols(symbols: &mut Symbols) -> Tree { - while symbols.0.len() >= 2 { - let first = symbols.0.pop().unwrap(); - let second = symbols.0.pop().unwrap(); - let n = Node::join(first, second); - symbols.0.push(n); - } - let ret = symbols.0.pop().unwrap(); - ret - } - - pub fn put(&self) { - self.put_with_level(0); - } - - fn put_with_level(&self, level: usize) { - match &self.content { - Content::Leaf(b) => { - // Tree::put_spaces(level); - print!("{} {}\n", self.occurences, b); - }, - Content::Parent { left, right } => { - print!("NODE {}\n", self.occurences); - Tree::put_spaces(level); - print!("left: "); - left.put_with_level(level + 1); - Tree::put_spaces(level); - print!("right: "); - right.put_with_level(level + 1); - }, - } - } - - fn put_spaces(n: usize) { - for _ in 0..(n * 2) { - print!(" "); - } - } -} - -struct Symbols(BinaryHeap<Node>); - -impl Symbols { - fn from_file(file_path: &str) -> io::Result<Symbols> { - let f = fs::File::open(file_path)?; - let mut counter: HashMap<u8, usize> = HashMap::new(); - for b in f.bytes() { - let k = b?; - let v = match counter.get(&k) { - Some(count) => count + 1, - None => 1, - }; - counter.insert(k, v); - } - let heap: BinaryHeap<Node> = counter - .iter() - .map(|(k, v)| Node { occurences: *v, content: Content::Leaf(*k) }) - .collect(); - Ok(Symbols(heap)) - } -} - -use std::cmp::Ordering; - -impl Ord for Node { - fn cmp(&self, other: &Self) -> Ordering { - // self.occurences.cmp(&other.occurences) - other.occurences.cmp(&self.occurences) // dirty hack for min-heap - } -} - -impl PartialOrd for Node { - fn partial_cmp(&self, other: &Self) -> Option<Ordering> { - Some(self.cmp(other)) - } -} - -impl Eq for Node {} - -impl PartialEq for Node { - fn eq(&self, other: &Self) -> bool { - self.occurences == other.occurences - } -} diff --git a/src/tree.rs b/src/tree.rs index d3cca22..c7e8aae 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -1,22 +1,126 @@ -use super::symbols::Symbols; +use std::collections::{BinaryHeap, HashMap}; -struct Node { - occurence: usize, +use super::bits::BitSet; + +pub struct Node { + occurences: usize, content: Content, } +impl Node { + fn join(left: Node, right: Node) -> Node { + Node { + occurences: left.occurences + right.occurences, + content: Content::Parent { + left: Box::new(left), + right: Box::new(right), + } + } + } +} + enum Content { - Leaf(i8), + Leaf(u8), Parent { - left: Box<Node>, + left: Box<Node>, right: Box<Node>, }, } -type Tree = Node; +pub type Tree = Node; + +impl Tree { + pub fn from_data(data: &[u8]) -> Tree { + let mut counter: HashMap<u8, usize> = HashMap::new(); + for k in data { + let v = match counter.get(&k) { + Some(count) => count + 1, + None => 1, + }; + counter.insert(*k, v); + } + let mut heap: BinaryHeap<Node> = counter + .iter() + .map(|(k, v)| Node { occurences: *v, content: Content::Leaf(*k) }) + .collect(); + while heap.len() >= 2 { + let first = heap.pop().unwrap(); + let second = heap.pop().unwrap(); + let n = Node::join(first, second); + heap.push(n); + } + heap.pop().unwrap() + } + + pub fn to_bit_map(&self) -> HashMap<u8, BitSet> { + match &self.content { + Content::Leaf(b) => { + let mut hm = HashMap::with_capacity(1); + hm.insert(*b, BitSet::new()); + hm + }, + Content::Parent { left, right } => { + let mut left_hm = left.to_bit_map(); + for (_, bits) in left_hm.iter_mut() { + bits.push_front_bit(0); + } + let mut right_hm = right.to_bit_map(); + for (_, bits) in right_hm.iter_mut() { + bits.push_front_bit(1); + } + left_hm.extend(right_hm); + left_hm + }, + } + } + + pub fn put(&self) { + self.put_with_level(0); + } + + fn put_with_level(&self, level: usize) { + match &self.content { + Content::Leaf(b) => { + print!("{} {}\n", self.occurences, b); + }, + Content::Parent { left, right } => { + print!("NODE {}\n", self.occurences); + Tree::put_spaces(level); + print!("left: "); + left.put_with_level(level + 1); + Tree::put_spaces(level); + print!("right: "); + right.put_with_level(level + 1); + }, + } + } + + fn put_spaces(n: usize) { + for _ in 0..(n * 2) { + print!(" "); + } + } +} + +use std::cmp::Ordering; + +impl Ord for Node { + fn cmp(&self, other: &Self) -> Ordering { + // self.occurences.cmp(&other.occurences) + other.occurences.cmp(&self.occurences) // dirty hack to fake min-heap + } +} + +impl PartialOrd for Node { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl Eq for Node {} -// impl Tree { -// fn from_symbols(symbols: Symbols) -> Tree { -// symbols.0 -// } -// } +impl PartialEq for Node { + fn eq(&self, other: &Self) -> bool { + self.occurences == other.occurences + } +} |
