aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/bits.rs68
-rw-r--r--src/main.rs28
-rw-r--r--src/symbols.rs122
-rw-r--r--src/tree.rs126
4 files changed, 206 insertions, 138 deletions
diff --git a/src/bits.rs b/src/bits.rs
new file mode 100644
index 0000000..b479cc9
--- /dev/null
+++ b/src/bits.rs
@@ -0,0 +1,68 @@
+#[derive(Clone)]
+pub struct BitSet {
+ data: Vec<u8>,
+ len: usize,
+}
+
+impl BitSet {
+ pub fn new() -> BitSet {
+ BitSet { data: Vec::new(), len: 0 }
+ }
+
+ pub fn push_front_bit(&mut self, bit: u8) {
+ if bit != 0 && bit != 1 {
+ panic!("bit should be 1 or 0");
+ }
+ *self >>= 1;
+ self.data[0] |= bit << 7;
+ }
+
+ fn shift_right_once(&mut self) {
+ if self.len == self.data.len() * 8 {
+ self.data.push(0);
+ }
+ for i in (1..self.data.len()).rev() {
+ self.data[i] >>= 1;
+ self.data[i] |= (self.data[i - 1] & 1) << 7;
+ }
+ self.data[0] >>= 1;
+ self.len += 1;
+ }
+
+ pub fn concat(&mut self, other: &BitSet) {
+ if self.len % 8 == 0 {
+ self.data.extend(other.data.iter());
+ self.len += other.len;
+ return;
+ }
+
+ let mut other_mut = other.clone();
+ let len_pre_shift = other_mut.len;
+ other_mut >>= self.len % 8;
+ *self.data.last_mut().unwrap() |= other_mut.data[0];
+ self.data.extend(other_mut.data.iter().skip(1));
+ self.len += len_pre_shift;
+ }
+}
+
+use std::ops;
+
+impl ops::ShrAssign<usize> for BitSet {
+ fn shr_assign(&mut self, shift_size: usize) {
+ for _ in 0..shift_size {
+ self.shift_right_once();
+ }
+ }
+}
+
+use std::fmt;
+
+impl fmt::Debug for BitSet {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "len: {:2} data: ", self.len)?;
+ for chunk in &self.data {
+ write!(f, "{:08b} ", chunk)?;
+ }
+ Ok(())
+ }
+}
diff --git a/src/main.rs b/src/main.rs
index 2b4e6c5..920aee2 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,12 +1,30 @@
use std::env;
+use std::fs;
+use std::io::Read;
-pub mod symbols;
-// pub mod tree;
-//
-use symbols::Tree;
+pub mod tree;
+pub mod bits;
+
+use tree::Tree;
+use bits::BitSet;
fn main() {
let file_path = env::args().nth(1).unwrap();
- let tree = Tree::from_file(&file_path).unwrap();
+
+ let f = fs::File::open(file_path).unwrap();
+ let data: Vec<u8> = f.bytes().map(|x| x.unwrap()).collect();
+
+ let tree = Tree::from_data(&data);
tree.put();
+
+ let bitmap = tree.to_bit_map();
+ for (k, v) in bitmap.iter() {
+ println!("{:4} {:?}", format!("{:?}", *k as char), v);
+ }
+
+ let mut bitset = BitSet::new();
+ for byte in data {
+ bitset.concat(&bitmap[&byte]);
+ }
+ println!("{:?}", bitset);
}
diff --git a/src/symbols.rs b/src/symbols.rs
deleted file mode 100644
index cd8fd52..0000000
--- a/src/symbols.rs
+++ /dev/null
@@ -1,122 +0,0 @@
-use std::fs;
-use std::io;
-use std::io::Read;
-use std::collections::{BinaryHeap, HashMap};
-
-pub struct Node {
- occurences: usize,
- content: Content,
-}
-
-impl Node {
- fn join(left: Node, right: Node) -> Node {
- Node {
- occurences: left.occurences + right.occurences,
- content: Content::Parent {
- left: Box::new(left),
- right: Box::new(right),
- }
- }
- }
-}
-
-enum Content {
- Leaf(u8),
- Parent {
- left: Box<Node>,
- right: Box<Node>,
- },
-}
-
-pub type Tree = Node;
-
-impl Tree {
- pub fn from_file(file_path: &str) -> io::Result<Tree> {
- let mut symbols = Symbols::from_file(file_path)?;
- Ok(Tree::from_symbols(&mut symbols))
- }
-
- fn from_symbols(symbols: &mut Symbols) -> Tree {
- while symbols.0.len() >= 2 {
- let first = symbols.0.pop().unwrap();
- let second = symbols.0.pop().unwrap();
- let n = Node::join(first, second);
- symbols.0.push(n);
- }
- let ret = symbols.0.pop().unwrap();
- ret
- }
-
- pub fn put(&self) {
- self.put_with_level(0);
- }
-
- fn put_with_level(&self, level: usize) {
- match &self.content {
- Content::Leaf(b) => {
- // Tree::put_spaces(level);
- print!("{} {}\n", self.occurences, b);
- },
- Content::Parent { left, right } => {
- print!("NODE {}\n", self.occurences);
- Tree::put_spaces(level);
- print!("left: ");
- left.put_with_level(level + 1);
- Tree::put_spaces(level);
- print!("right: ");
- right.put_with_level(level + 1);
- },
- }
- }
-
- fn put_spaces(n: usize) {
- for _ in 0..(n * 2) {
- print!(" ");
- }
- }
-}
-
-struct Symbols(BinaryHeap<Node>);
-
-impl Symbols {
- fn from_file(file_path: &str) -> io::Result<Symbols> {
- let f = fs::File::open(file_path)?;
- let mut counter: HashMap<u8, usize> = HashMap::new();
- for b in f.bytes() {
- let k = b?;
- let v = match counter.get(&k) {
- Some(count) => count + 1,
- None => 1,
- };
- counter.insert(k, v);
- }
- let heap: BinaryHeap<Node> = counter
- .iter()
- .map(|(k, v)| Node { occurences: *v, content: Content::Leaf(*k) })
- .collect();
- Ok(Symbols(heap))
- }
-}
-
-use std::cmp::Ordering;
-
-impl Ord for Node {
- fn cmp(&self, other: &Self) -> Ordering {
- // self.occurences.cmp(&other.occurences)
- other.occurences.cmp(&self.occurences) // dirty hack for min-heap
- }
-}
-
-impl PartialOrd for Node {
- fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
- Some(self.cmp(other))
- }
-}
-
-impl Eq for Node {}
-
-impl PartialEq for Node {
- fn eq(&self, other: &Self) -> bool {
- self.occurences == other.occurences
- }
-}
diff --git a/src/tree.rs b/src/tree.rs
index d3cca22..c7e8aae 100644
--- a/src/tree.rs
+++ b/src/tree.rs
@@ -1,22 +1,126 @@
-use super::symbols::Symbols;
+use std::collections::{BinaryHeap, HashMap};
-struct Node {
- occurence: usize,
+use super::bits::BitSet;
+
+pub struct Node {
+ occurences: usize,
content: Content,
}
+impl Node {
+ fn join(left: Node, right: Node) -> Node {
+ Node {
+ occurences: left.occurences + right.occurences,
+ content: Content::Parent {
+ left: Box::new(left),
+ right: Box::new(right),
+ }
+ }
+ }
+}
+
enum Content {
- Leaf(i8),
+ Leaf(u8),
Parent {
- left: Box<Node>,
+ left: Box<Node>,
right: Box<Node>,
},
}
-type Tree = Node;
+pub type Tree = Node;
+
+impl Tree {
+ pub fn from_data(data: &[u8]) -> Tree {
+ let mut counter: HashMap<u8, usize> = HashMap::new();
+ for k in data {
+ let v = match counter.get(&k) {
+ Some(count) => count + 1,
+ None => 1,
+ };
+ counter.insert(*k, v);
+ }
+ let mut heap: BinaryHeap<Node> = counter
+ .iter()
+ .map(|(k, v)| Node { occurences: *v, content: Content::Leaf(*k) })
+ .collect();
+ while heap.len() >= 2 {
+ let first = heap.pop().unwrap();
+ let second = heap.pop().unwrap();
+ let n = Node::join(first, second);
+ heap.push(n);
+ }
+ heap.pop().unwrap()
+ }
+
+ pub fn to_bit_map(&self) -> HashMap<u8, BitSet> {
+ match &self.content {
+ Content::Leaf(b) => {
+ let mut hm = HashMap::with_capacity(1);
+ hm.insert(*b, BitSet::new());
+ hm
+ },
+ Content::Parent { left, right } => {
+ let mut left_hm = left.to_bit_map();
+ for (_, bits) in left_hm.iter_mut() {
+ bits.push_front_bit(0);
+ }
+ let mut right_hm = right.to_bit_map();
+ for (_, bits) in right_hm.iter_mut() {
+ bits.push_front_bit(1);
+ }
+ left_hm.extend(right_hm);
+ left_hm
+ },
+ }
+ }
+
+ pub fn put(&self) {
+ self.put_with_level(0);
+ }
+
+ fn put_with_level(&self, level: usize) {
+ match &self.content {
+ Content::Leaf(b) => {
+ print!("{} {}\n", self.occurences, b);
+ },
+ Content::Parent { left, right } => {
+ print!("NODE {}\n", self.occurences);
+ Tree::put_spaces(level);
+ print!("left: ");
+ left.put_with_level(level + 1);
+ Tree::put_spaces(level);
+ print!("right: ");
+ right.put_with_level(level + 1);
+ },
+ }
+ }
+
+ fn put_spaces(n: usize) {
+ for _ in 0..(n * 2) {
+ print!(" ");
+ }
+ }
+}
+
+use std::cmp::Ordering;
+
+impl Ord for Node {
+ fn cmp(&self, other: &Self) -> Ordering {
+ // self.occurences.cmp(&other.occurences)
+ other.occurences.cmp(&self.occurences) // dirty hack to fake min-heap
+ }
+}
+
+impl PartialOrd for Node {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Eq for Node {}
-// impl Tree {
-// fn from_symbols(symbols: Symbols) -> Tree {
-// symbols.0
-// }
-// }
+impl PartialEq for Node {
+ fn eq(&self, other: &Self) -> bool {
+ self.occurences == other.occurences
+ }
+}