diff --git a/src/compression.rs b/src/compression.rs index 0f629ff..4255c78 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -1,202 +1,93 @@ +use std::collections::VecDeque; + const INDEX_BIT_COUNT: usize = 12; const LENGTH_BIT_COUNT: usize = 4; const WINDOW_SIZE: usize = 1 << INDEX_BIT_COUNT; const RAW_LOOK_AHEAD_SIZE: usize = 1 << LENGTH_BIT_COUNT; const BREAK_EVEN: usize = ((1 + INDEX_BIT_COUNT + LENGTH_BIT_COUNT) / 9) as usize; const LOOK_AHEAD_SIZE: usize = RAW_LOOK_AHEAD_SIZE + BREAK_EVEN; -const TREE_ROOT: usize = WINDOW_SIZE; -const UNUSED: usize = i32::MAX as usize; - -#[derive(Clone, Copy)] -enum Child { - Smaller = 0, - Larger = 1, -} - -#[derive(Clone)] -struct Node { - parent: usize, - children: [usize; 2], -} - -impl Node { - fn new() -> Node { - Node { - parent: UNUSED, - children: [UNUSED; 2], - } - } -} - -impl From for Node { - fn from(value: u8) -> Self { - Node { parent: value as usize, children: [0, 0] } - } -} - -fn init_tree(tree: &mut [Node]) { - for node in tree.iter_mut() { - node.parent = UNUSED; - node.children = [UNUSED, UNUSED]; - } - tree[TREE_ROOT].children[Child::Larger as usize] = WINDOW_SIZE + 1; - tree[WINDOW_SIZE + 1].parent = TREE_ROOT; -} -pub(crate) fn compress(input_data: &[u8]) -> Vec { +pub(crate) fn compress(input_data: &[u8]) -> Vec { let mut output_data = Vec::new(); - let mut window = vec![Node::new(); WINDOW_SIZE * 5]; - init_tree(&mut window); - let mut current_pos = 0; - let mut match_position = 0; - let mut len = 0; - let mut count_bits = 0; - let mut mask = 1; + let mut window: VecDeque = VecDeque::with_capacity(WINDOW_SIZE); + let mut flag_byte: u8 = 0; + let mut bit_count: u8 = 0; + let mut input_pos: usize = 0; + + while input_pos < input_data.len() { + // Ensure the window does not exceed WINDOW_SIZE + if window.len() >= WINDOW_SIZE { + window.pop_front(); + } - while len < input_data.len() { - let mut match_length = 0; - let mut test_node = TREE_ROOT; - let mut delta = 0; + // Find the longest match in the window + let mut best_match_len = 0; + let mut best_match_pos = 0; + let max_search_len = std::cmp::min(LOOK_AHEAD_SIZE, input_data.len() - input_pos); - loop { - for i in 0..LOOK_AHEAD_SIZE { - delta = window[current_pos + i].parent as isize - window[test_node + i].parent as isize; - if delta != 0 { - break; - } + for pos in 0..window.len() { + let mut match_len = 0; + while match_len < max_search_len && window.get(pos + match_len) == Some(&input_data[input_pos + match_len]) { + match_len += 1; } - - if match_length >= LOOK_AHEAD_SIZE { - replace_node(test_node, current_pos, &mut window); - break; - } - - if match_length > 0 && delta == 0 { - match_length += 1; - } else { - match_length = 0; + if match_len > best_match_len { + best_match_len = match_len; + best_match_pos = pos; } - - let child_prop = if delta >= 0 { Child::Larger } else { Child::Smaller }; - if let Some(child) = get_child(test_node, child_prop, &window) { - test_node = child; - } else { - add_node(test_node, current_pos, child_prop, &mut window); + if best_match_len >= LOOK_AHEAD_SIZE { break; } } - let mut replace_count = 0; - if match_length <= BREAK_EVEN { - replace_count = 1; - output_data.push(mask as u8); - output_data.push(input_data[current_pos]); - len += 1; - } else { - len += 2; - let value = ((current_pos - match_position - 1) << LENGTH_BIT_COUNT) | (match_length - BREAK_EVEN - 1); - let high = (value & 0xFF) as u8; - let low = ((value >> 8) & 0xFF) as u8; + if best_match_len > BREAK_EVEN { + // Compressed reference + let offset = best_match_pos; + let length = best_match_len - BREAK_EVEN - 1; + + // Pack offset and length into 2 bytes + let value = ((offset << LENGTH_BIT_COUNT) | length) as u16; + let high = (value >> 8) as u8; + let low = (value & 0xFF) as u8; + + // Set flag bit to 0 for compressed + flag_byte &= !(1 << (7 - bit_count)); + bit_count += 1; output_data.push(high); output_data.push(low); - replace_count = match_length; - } - if count_bits == 8 { - output_data.push(0); - count_bits = 0; - mask = 1; - len += 1; + // Add matched bytes to the window + for i in 0..best_match_len { + window.push_back(input_data[input_pos + i]); + } + input_pos += best_match_len; } else { - mask = (mask << 1) & 0xFF; - count_bits += 1; + // Raw byte + let byte = input_data[input_pos]; + + // Set flag bit to 1 for raw + flag_byte |= 1 << (7 - bit_count); + bit_count += 1; + output_data.push(byte); + + // Add the byte to the window + window.push_back(byte); + input_pos += 1; } - for _ in 0..replace_count { - delete_node(current_pos, &mut window); - // Convert input_data[len] to Node and assign it to window - window[current_pos] = Node::from(input_data[len]); - len += 1; - current_pos = (current_pos + 1) % WINDOW_SIZE; + // Output flag byte every 8 data elements + if bit_count == 8 { + output_data.push(flag_byte); + flag_byte = 0; + bit_count = 0; } } - if count_bits == 0 { - output_data.pop(); + // Output remaining flag byte if any + if bit_count > 0 { + // Zero-fill the remaining bits + flag_byte <<= (8 - bit_count); + output_data.push(flag_byte); } output_data } - - - -fn get_child(node: usize, child_prop: Child, window: &Vec) -> Option { - let child_index = child_prop as usize; - let child = window[node + 2 + child_index]; - if child != UNUSED { - Some(child) - } else { - None - } -} - -fn add_node(parent: usize, child: usize, child_prop: Child, window: &mut Vec) { - let child_index = child_prop as usize; - window[parent + 2 + child_index] = child; - window[child] = UNUSED; -} - -fn replace_node(old_node: usize, new_node: usize, window: &mut Vec) { - let parent = window[old_node].parent; - let parent_node = &mut window[parent]; - - if parent_node.children[Child::Smaller as usize] == old_node { - parent_node.children[Child::Smaller as usize] = new_node; - } else { - parent_node.children[Child::Larger as usize] = new_node; - } - - window[new_node].parent = parent; - window[old_node].parent = UNUSED; -} - -fn delete_node(node: usize, window: &mut Vec) { - let parent = window[node]; - if parent == UNUSED { - return; - } - let mut replacement = UNUSED; - let smaller_child = window[parent + 2]; - let larger_child = window[parent + 3]; - if smaller_child == UNUSED { - replacement = larger_child; - contract_node(node, replacement, window); - } else if larger_child == UNUSED { - replacement = smaller_child; - contract_node(node, replacement, window); - } else { - replacement = find_next_node(node, window); - delete_node(replacement, window); - replace_node(node, replacement, window); - } -} - -fn contract_node(old_node: usize, new_node: usize, window: &mut Vec) { - let parent = window[old_node]; - if parent != UNUSED { - let parent_index = parent; - if window[parent_index + 2] == old_node { - window[parent_index + 2] = new_node; - } else { - window[parent_index + 3] = new_node; - } - } -} - -fn find_next_node(node: usize, window: &[usize]) -> usize { - let mut next = window[node + 3]; - while window[next] != UNUSED { - next = window[next + 2]; - } - next -}