1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
use std::u16;

use lzvalue::LZValue;
use huffman_table::{NUM_LITERALS_AND_LENGTHS, NUM_DISTANCE_CODES, END_OF_BLOCK_POSITION,
                    get_distance_code, get_length_code};

/// The type used for representing how many times a literal, length or distance code has been ouput
/// to the current buffer.
/// As we are limiting the blocks to be at most 2^16 bytes long, we can represent frequencies using
/// 16-bit values.
pub type FrequencyType = u16;

/// The maximum number of literals/lengths in the buffer, which in practice also means the maximum
/// number of literals/lengths output before a new block is started.
/// This should not be larger than the maximum value `FrequencyType` can represent to prevent
/// overflowing (which would degrade, or in the worst case break compression).
pub const MAX_BUFFER_LENGTH: usize = 1024 * 31;

#[derive(Debug, PartialEq)]
pub enum BufferStatus {
    NotFull,
    Full,
}

/// A trait used by the lz77 compression function to write output.
/// Used to use the same function for compression with both fixed and dynamic huffman codes
/// (When fixed codes are used, there is no need to store frequency information)
pub trait OutputWriter {
    fn write_literal(&mut self, literal: u8) -> BufferStatus;
    fn write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus;
    fn buffer_length(&self) -> usize;
    fn clear_buffer(&mut self);
    fn get_buffer(&self) -> &[LZValue];
}

pub struct _DummyWriter {
    written: usize,
}

impl OutputWriter for _DummyWriter {
    fn write_literal(&mut self, _: u8) -> BufferStatus {
        self.written += 1;
        BufferStatus::NotFull
    }
    fn write_length_distance(&mut self, _: u16, _: u16) -> BufferStatus {
        self.written += 2;
        BufferStatus::NotFull
    }
    fn buffer_length(&self) -> usize {
        self.written
    }
    fn clear_buffer(&mut self) {}

    fn get_buffer(&self) -> &[LZValue] {
        &[]
    }
}

fn check_buffer_length(buffer: &[LZValue]) -> BufferStatus {
    if buffer.len() >= MAX_BUFFER_LENGTH {
        BufferStatus::Full
    } else {
        BufferStatus::NotFull
    }
}

/// `OutputWriter` that doesn't store frequency information
#[derive(Debug)]
pub struct FixedWriter {
    pub buffer: Vec<LZValue>,
}

impl FixedWriter {
    pub fn new() -> FixedWriter {
        FixedWriter { buffer: Vec::with_capacity(MAX_BUFFER_LENGTH) }
    }
}

impl OutputWriter for FixedWriter {
    fn write_literal(&mut self, literal: u8) -> BufferStatus {
        debug_assert!(self.buffer.len() < MAX_BUFFER_LENGTH);
        self.buffer.push(LZValue::literal(literal));
        check_buffer_length(&self.buffer)
    }

    fn write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus {
        debug_assert!(self.buffer.len() < MAX_BUFFER_LENGTH);
        self.buffer.push(LZValue::length_distance(length, distance));
        check_buffer_length(&self.buffer)
    }

    fn buffer_length(&self) -> usize {
        self.buffer.len()
    }

    fn clear_buffer(&mut self) {
        self.buffer.clear();
    }

    fn get_buffer(&self) -> &[LZValue] {
        &self.buffer
    }
}

// `OutputWriter` that keeps track of the usage of different codes
pub struct DynamicWriter {
    fixed_writer: FixedWriter,
    // The two last length codes are not actually used, but only participates in code construction
    // Therefore, we ignore them to get the correct number of lengths
    frequencies: [FrequencyType; NUM_LITERALS_AND_LENGTHS],
    distance_frequencies: [FrequencyType; NUM_DISTANCE_CODES],
}

impl OutputWriter for DynamicWriter {
    fn write_literal(&mut self, literal: u8) -> BufferStatus {
        let ret = self.fixed_writer.write_literal(literal);
        self.frequencies[usize::from(literal)] += 1;
        ret
    }

    fn write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus {
        let ret = self.fixed_writer.write_length_distance(length, distance);
        let l_code_num = get_length_code(length);
        // As we limit the buffer to 2^16 values, this should be safe from overflowing.
        self.frequencies[l_code_num] += 1;
        let d_code_num = get_distance_code(distance);
        self.distance_frequencies[usize::from(d_code_num)] += 1;
        ret
    }

    fn buffer_length(&self) -> usize {
        self.fixed_writer.buffer_length()
    }

    fn clear_buffer(&mut self) {
        self.clear_data();
        self.clear();
    }

    fn get_buffer(&self) -> &[LZValue] {
        self.fixed_writer.get_buffer()
    }
}

impl DynamicWriter {
    pub fn new() -> DynamicWriter {
        let mut w = DynamicWriter {
            fixed_writer: FixedWriter::new(),
            frequencies: [0; NUM_LITERALS_AND_LENGTHS],
            distance_frequencies: [0; NUM_DISTANCE_CODES],
        };
        // This will always be 1,
        // since there will always only be one end of block marker in each block
        w.frequencies[END_OF_BLOCK_POSITION] = 1;
        w
    }

    /// Special output function used with RLE compression
    /// that avoids bothering to lookup a distance code.
    pub fn write_length_rle(&mut self, length: u16) -> BufferStatus {
        let ret = self.fixed_writer.write_length_distance(length, 1);
        let l_code_num = get_length_code(length);
        // As we limit the buffer to 2^16 values, this should be safe from overflowing.
        self.frequencies[l_code_num] += 1;
        self.distance_frequencies[0] += 1;
        ret
    }

    pub fn get_frequencies(&self) -> (&[u16], &[u16]) {
        (&self.frequencies, &self.distance_frequencies)
    }

    pub fn clear_frequencies(&mut self) {
        self.frequencies = [0; NUM_LITERALS_AND_LENGTHS];
        self.distance_frequencies = [0; NUM_DISTANCE_CODES];
        self.frequencies[END_OF_BLOCK_POSITION] = 1;
    }

    pub fn clear_data(&mut self) {
        self.fixed_writer.clear_buffer();
    }

    pub fn clear(&mut self) {
        self.clear_frequencies();
        self.clear_data();
    }
}