-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
64 additions
and
173 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,202 +1,93 @@ | ||
use std::collections::VecDeque; | ||
|
||
const INDEX_BIT_COUNT: usize = 12; | ||
const LENGTH_BIT_COUNT: usize = 4; | ||
const WINDOW_SIZE: usize = 1 << INDEX_BIT_COUNT; | ||
const RAW_LOOK_AHEAD_SIZE: usize = 1 << LENGTH_BIT_COUNT; | ||
const BREAK_EVEN: usize = ((1 + INDEX_BIT_COUNT + LENGTH_BIT_COUNT) / 9) as usize; | ||
const LOOK_AHEAD_SIZE: usize = RAW_LOOK_AHEAD_SIZE + BREAK_EVEN; | ||
const TREE_ROOT: usize = WINDOW_SIZE; | ||
const UNUSED: usize = i32::MAX as usize; | ||
|
||
#[derive(Clone, Copy)] | ||
enum Child { | ||
Smaller = 0, | ||
Larger = 1, | ||
} | ||
|
||
#[derive(Clone)] | ||
struct Node { | ||
parent: usize, | ||
children: [usize; 2], | ||
} | ||
|
||
impl Node { | ||
fn new() -> Node { | ||
Node { | ||
parent: UNUSED, | ||
children: [UNUSED; 2], | ||
} | ||
} | ||
} | ||
|
||
impl From<u8> for Node { | ||
fn from(value: u8) -> Self { | ||
Node { parent: value as usize, children: [0, 0] } | ||
} | ||
} | ||
|
||
fn init_tree(tree: &mut [Node]) { | ||
for node in tree.iter_mut() { | ||
node.parent = UNUSED; | ||
node.children = [UNUSED, UNUSED]; | ||
} | ||
tree[TREE_ROOT].children[Child::Larger as usize] = WINDOW_SIZE + 1; | ||
tree[WINDOW_SIZE + 1].parent = TREE_ROOT; | ||
} | ||
|
||
pub(crate) fn compress(input_data: &[u8]) -> Vec<Node> { | ||
pub(crate) fn compress(input_data: &[u8]) -> Vec<u8> { | ||
let mut output_data = Vec::new(); | ||
let mut window = vec![Node::new(); WINDOW_SIZE * 5]; | ||
init_tree(&mut window); | ||
let mut current_pos = 0; | ||
let mut match_position = 0; | ||
let mut len = 0; | ||
let mut count_bits = 0; | ||
let mut mask = 1; | ||
let mut window: VecDeque<u8> = VecDeque::with_capacity(WINDOW_SIZE); | ||
let mut flag_byte: u8 = 0; | ||
let mut bit_count: u8 = 0; | ||
let mut input_pos: usize = 0; | ||
|
||
while input_pos < input_data.len() { | ||
// Ensure the window does not exceed WINDOW_SIZE | ||
if window.len() >= WINDOW_SIZE { | ||
window.pop_front(); | ||
} | ||
|
||
while len < input_data.len() { | ||
let mut match_length = 0; | ||
let mut test_node = TREE_ROOT; | ||
let mut delta = 0; | ||
// Find the longest match in the window | ||
let mut best_match_len = 0; | ||
let mut best_match_pos = 0; | ||
let max_search_len = std::cmp::min(LOOK_AHEAD_SIZE, input_data.len() - input_pos); | ||
|
||
loop { | ||
for i in 0..LOOK_AHEAD_SIZE { | ||
delta = window[current_pos + i].parent as isize - window[test_node + i].parent as isize; | ||
if delta != 0 { | ||
break; | ||
} | ||
for pos in 0..window.len() { | ||
let mut match_len = 0; | ||
while match_len < max_search_len && window.get(pos + match_len) == Some(&input_data[input_pos + match_len]) { | ||
match_len += 1; | ||
} | ||
|
||
if match_length >= LOOK_AHEAD_SIZE { | ||
replace_node(test_node, current_pos, &mut window); | ||
break; | ||
} | ||
|
||
if match_length > 0 && delta == 0 { | ||
match_length += 1; | ||
} else { | ||
match_length = 0; | ||
if match_len > best_match_len { | ||
best_match_len = match_len; | ||
best_match_pos = pos; | ||
} | ||
|
||
let child_prop = if delta >= 0 { Child::Larger } else { Child::Smaller }; | ||
if let Some(child) = get_child(test_node, child_prop, &window) { | ||
test_node = child; | ||
} else { | ||
add_node(test_node, current_pos, child_prop, &mut window); | ||
if best_match_len >= LOOK_AHEAD_SIZE { | ||
break; | ||
} | ||
} | ||
|
||
let mut replace_count = 0; | ||
if match_length <= BREAK_EVEN { | ||
replace_count = 1; | ||
output_data.push(mask as u8); | ||
output_data.push(input_data[current_pos]); | ||
len += 1; | ||
} else { | ||
len += 2; | ||
let value = ((current_pos - match_position - 1) << LENGTH_BIT_COUNT) | (match_length - BREAK_EVEN - 1); | ||
let high = (value & 0xFF) as u8; | ||
let low = ((value >> 8) & 0xFF) as u8; | ||
if best_match_len > BREAK_EVEN { | ||
// Compressed reference | ||
let offset = best_match_pos; | ||
let length = best_match_len - BREAK_EVEN - 1; | ||
|
||
// Pack offset and length into 2 bytes | ||
let value = ((offset << LENGTH_BIT_COUNT) | length) as u16; | ||
let high = (value >> 8) as u8; | ||
let low = (value & 0xFF) as u8; | ||
|
||
// Set flag bit to 0 for compressed | ||
flag_byte &= !(1 << (7 - bit_count)); | ||
bit_count += 1; | ||
output_data.push(high); | ||
output_data.push(low); | ||
replace_count = match_length; | ||
} | ||
|
||
if count_bits == 8 { | ||
output_data.push(0); | ||
count_bits = 0; | ||
mask = 1; | ||
len += 1; | ||
// Add matched bytes to the window | ||
for i in 0..best_match_len { | ||
window.push_back(input_data[input_pos + i]); | ||
} | ||
input_pos += best_match_len; | ||
} else { | ||
mask = (mask << 1) & 0xFF; | ||
count_bits += 1; | ||
// Raw byte | ||
let byte = input_data[input_pos]; | ||
|
||
// Set flag bit to 1 for raw | ||
flag_byte |= 1 << (7 - bit_count); | ||
bit_count += 1; | ||
output_data.push(byte); | ||
|
||
// Add the byte to the window | ||
window.push_back(byte); | ||
input_pos += 1; | ||
} | ||
|
||
for _ in 0..replace_count { | ||
delete_node(current_pos, &mut window); | ||
// Convert input_data[len] to Node and assign it to window | ||
window[current_pos] = Node::from(input_data[len]); | ||
len += 1; | ||
current_pos = (current_pos + 1) % WINDOW_SIZE; | ||
// Output flag byte every 8 data elements | ||
if bit_count == 8 { | ||
output_data.push(flag_byte); | ||
flag_byte = 0; | ||
bit_count = 0; | ||
} | ||
} | ||
|
||
if count_bits == 0 { | ||
output_data.pop(); | ||
// Output remaining flag byte if any | ||
if bit_count > 0 { | ||
// Zero-fill the remaining bits | ||
flag_byte <<= (8 - bit_count); | ||
output_data.push(flag_byte); | ||
} | ||
|
||
output_data | ||
} | ||
|
||
|
||
|
||
fn get_child(node: usize, child_prop: Child, window: &Vec<Node>) -> Option<usize> { | ||
let child_index = child_prop as usize; | ||
let child = window[node + 2 + child_index]; | ||
if child != UNUSED { | ||
Some(child) | ||
} else { | ||
None | ||
} | ||
} | ||
|
||
fn add_node(parent: usize, child: usize, child_prop: Child, window: &mut Vec<Node>) { | ||
let child_index = child_prop as usize; | ||
window[parent + 2 + child_index] = child; | ||
window[child] = UNUSED; | ||
} | ||
|
||
fn replace_node(old_node: usize, new_node: usize, window: &mut Vec<Node>) { | ||
let parent = window[old_node].parent; | ||
let parent_node = &mut window[parent]; | ||
|
||
if parent_node.children[Child::Smaller as usize] == old_node { | ||
parent_node.children[Child::Smaller as usize] = new_node; | ||
} else { | ||
parent_node.children[Child::Larger as usize] = new_node; | ||
} | ||
|
||
window[new_node].parent = parent; | ||
window[old_node].parent = UNUSED; | ||
} | ||
|
||
fn delete_node(node: usize, window: &mut Vec<Node>) { | ||
let parent = window[node]; | ||
if parent == UNUSED { | ||
return; | ||
} | ||
let mut replacement = UNUSED; | ||
let smaller_child = window[parent + 2]; | ||
let larger_child = window[parent + 3]; | ||
if smaller_child == UNUSED { | ||
replacement = larger_child; | ||
contract_node(node, replacement, window); | ||
} else if larger_child == UNUSED { | ||
replacement = smaller_child; | ||
contract_node(node, replacement, window); | ||
} else { | ||
replacement = find_next_node(node, window); | ||
delete_node(replacement, window); | ||
replace_node(node, replacement, window); | ||
} | ||
} | ||
|
||
fn contract_node(old_node: usize, new_node: usize, window: &mut Vec<usize>) { | ||
let parent = window[old_node]; | ||
if parent != UNUSED { | ||
let parent_index = parent; | ||
if window[parent_index + 2] == old_node { | ||
window[parent_index + 2] = new_node; | ||
} else { | ||
window[parent_index + 3] = new_node; | ||
} | ||
} | ||
} | ||
|
||
fn find_next_node(node: usize, window: &[usize]) -> usize { | ||
let mut next = window[node + 3]; | ||
while window[next] != UNUSED { | ||
next = window[next + 2]; | ||
} | ||
next | ||
} |