aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--README.md18
-rw-r--r--src/bits.rs4
-rw-r--r--src/conversion.rs48
-rw-r--r--src/main.rs40
-rw-r--r--src/tree.rs45
6 files changed, 117 insertions, 39 deletions
diff --git a/.gitignore b/.gitignore
index eed3cc5..104a166 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
/target
*.txt
+*.huffman
diff --git a/README.md b/README.md
index 74ef205..75fb0ad 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,23 @@
# huffman
-Implementation of [Huffman coding](https://en.wikipedia.org/wiki/Huffman_coding?oldformat=true) made for educational purposes.
+Implementation of the [Huffman coding](https://en.wikipedia.org/wiki/Huffman_coding?oldformat=true) made for educational purposes.
## Usage
+* compress: `cargo run < input_file > output_file.huffman`
+* decompress: `cargo run d < input_file.huffman > output_file`
+
`python3 draft.py [file_name]` to run the python draft.
-`cargo run [file_name]` for the real thing.
+## File format
+
+Compress to a custom `.huffman` file format which is a header of the huffman coding tree followed by the compressed content.
+
+
+### Header format
+
+4 byte unsigned int: header size (including this field)
+Conversion table where each entry's format is:
+ 1 byte for the actual byte value
+ 1 byte for size of representation in bits
+ the representation aligned on a 8-bit boundary
diff --git a/src/bits.rs b/src/bits.rs
index b479cc9..0f944ca 100644
--- a/src/bits.rs
+++ b/src/bits.rs
@@ -1,7 +1,7 @@
#[derive(Clone)]
pub struct BitSet {
- data: Vec<u8>,
- len: usize,
+ pub data: Vec<u8>,
+ pub len: usize,
}
impl BitSet {
diff --git a/src/conversion.rs b/src/conversion.rs
new file mode 100644
index 0000000..ad7c320
--- /dev/null
+++ b/src/conversion.rs
@@ -0,0 +1,48 @@
+use std::collections::HashMap;
+
+use super::bits::BitSet;
+use super::tree::Tree;
+
+pub struct Table(pub HashMap<u8, BitSet>);
+
+impl Table {
+ pub fn from_tree(tree: &Tree) -> Table {
+ Table(tree.to_hash_map())
+ }
+
+ pub fn convert(&self, data: Vec<u8>) -> Vec<u8> {
+ let mut bitset = BitSet::new();
+ for byte in data {
+ bitset.concat(&self.0[&byte]);
+ }
+ bitset.data
+ }
+
+ pub fn serialize(&self) -> Vec<u8> {
+ let mut out: Vec<u8> = Vec::new();
+ let size: u32 = self.0.iter()
+ .fold(0, |acc, (_, v)| acc + 2 + v.data.len() as u32);
+ out.push(((size & 0xff000000) >> 24) as u8);
+ out.push(((size & 0x00ff0000) >> 16) as u8);
+ out.push(((size & 0x0000ff00) >> 8) as u8);
+ out.push(((size & 0x000000ff) >> 0) as u8);
+
+ for (k, v) in &self.0 {
+ out.push(*k);
+ out.push(v.len as u8);
+ out.extend(v.data.iter());
+ }
+ out
+ }
+}
+
+use std::fmt;
+
+impl fmt::Debug for Table {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ for (k, v) in self.0.iter() {
+ write!(f, "{:4} {:?}\n", format!("{:?}", *k as char), v)?;
+ }
+ Ok(())
+ }
+}
diff --git a/src/main.rs b/src/main.rs
index 920aee2..a54a3bd 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,30 +1,40 @@
use std::env;
-use std::fs;
+// use std::fs;
+use std::io;
use std::io::Read;
+use std::io::Write;
pub mod tree;
pub mod bits;
+pub mod conversion;
use tree::Tree;
-use bits::BitSet;
fn main() {
- let file_path = env::args().nth(1).unwrap();
+ if let Some(s) = env::args().nth(1) {
+ if s != "d" {
+ return
+ }
+ // let data: Vec<u8> = io::stdin().bytes().map(|x| x.unwrap()).collect();
+ // deserialize
- let f = fs::File::open(file_path).unwrap();
- let data: Vec<u8> = f.bytes().map(|x| x.unwrap()).collect();
+ } else {
+ // let f = fs::File::open(file_path).unwrap();
- let tree = Tree::from_data(&data);
- tree.put();
+ let data: Vec<u8> = io::stdin().bytes().map(|x| x.unwrap()).collect();
- let bitmap = tree.to_bit_map();
- for (k, v) in bitmap.iter() {
- println!("{:4} {:?}", format!("{:?}", *k as char), v);
- }
+ let tree = Tree::from_data(&data);
+ // print!("{:?}", tree);
+
+ let table = conversion::Table::from_tree(&tree);
+ // print!("{:?}", table);
+
+ let converted_data = table.convert(data);
+ // println!("{:?}", converted_data);
+
+ let header = table.serialize();
- let mut bitset = BitSet::new();
- for byte in data {
- bitset.concat(&bitmap[&byte]);
+ io::stdout().write_all(&header).unwrap();
+ io::stdout().write_all(&converted_data).unwrap();
}
- println!("{:?}", bitset);
}
diff --git a/src/tree.rs b/src/tree.rs
index c7e8aae..7d40979 100644
--- a/src/tree.rs
+++ b/src/tree.rs
@@ -1,3 +1,4 @@
+use std::fmt;
use std::collections::{BinaryHeap, HashMap};
use super::bits::BitSet;
@@ -52,7 +53,7 @@ impl Tree {
heap.pop().unwrap()
}
- pub fn to_bit_map(&self) -> HashMap<u8, BitSet> {
+ pub fn to_hash_map(&self) -> HashMap<u8, BitSet> {
match &self.content {
Content::Leaf(b) => {
let mut hm = HashMap::with_capacity(1);
@@ -60,11 +61,11 @@ impl Tree {
hm
},
Content::Parent { left, right } => {
- let mut left_hm = left.to_bit_map();
+ let mut left_hm = left.to_hash_map();
for (_, bits) in left_hm.iter_mut() {
bits.push_front_bit(0);
}
- let mut right_hm = right.to_bit_map();
+ let mut right_hm = right.to_hash_map();
for (_, bits) in right_hm.iter_mut() {
bits.push_front_bit(1);
}
@@ -74,31 +75,29 @@ impl Tree {
}
}
- pub fn put(&self) {
- self.put_with_level(0);
+ fn fmt_spaces(f: &mut fmt::Formatter, level: usize) -> fmt::Result {
+ for _ in 0..level {
+ write!(f, " ")?;
+ }
+ Ok(())
}
- fn put_with_level(&self, level: usize) {
+ fn fmt_with_level(&self, f: &mut fmt::Formatter, level: usize) -> fmt::Result {
match &self.content {
Content::Leaf(b) => {
- print!("{} {}\n", self.occurences, b);
+ write!(f, "{} {}\n", self.occurences, b)?;
},
Content::Parent { left, right } => {
- print!("NODE {}\n", self.occurences);
- Tree::put_spaces(level);
- print!("left: ");
- left.put_with_level(level + 1);
- Tree::put_spaces(level);
- print!("right: ");
- right.put_with_level(level + 1);
+ write!(f, "NODE {}\n", self.occurences)?;
+ Tree::fmt_spaces(f, level)?;
+ write!(f, "left: ")?;
+ left.fmt_with_level(f, level + 1)?;
+ Tree::fmt_spaces(f, level)?;
+ write!(f, "right: ")?;
+ right.fmt_with_level(f, level + 1)?;
},
}
- }
-
- fn put_spaces(n: usize) {
- for _ in 0..(n * 2) {
- print!(" ");
- }
+ Ok(())
}
}
@@ -124,3 +123,9 @@ impl PartialEq for Node {
self.occurences == other.occurences
}
}
+
+impl fmt::Debug for Tree {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ self.fmt_with_level(f, 0)
+ }
+}