libStatGen Software  1
MiniDeflate.h
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef __MINIDEFLATE_H__
19 #define __MINIDEFLATE_H__
20 
21 #include <stdio.h>
22 
23 // MiniDeflate reads and writes files in a simple Deflate like format
24 // A quick overview of this format follows, at the bottom of this file
25 //
26 
27 // Performance tuning constants
28 //
29 
30 // Hash table size is HASH_SIZE (a prime)
31 #define HASH_SIZE 4093
32 // Hash table depth is HASH_DEPTH (a power of 2)
33 #define HASH_DEPTH 8
34 // Matches that are not at least OKAY_MATCH chars are added to hash table
35 #define OKAY_MATCH 32
36 // Buffer size for FILE I/O
37 #define BUFFER_SIZE (32 * 1024)
38 
40 {
41 public:
42  MiniDeflate();
43  ~MiniDeflate();
44 
45  void Deflate(FILE * output, void * input, size_t bytes);
46  void Inflate(FILE * input, void * ouput, size_t bytes);
47 
48 private:
49  unsigned char * buffer;
50  unsigned char * hash_keys;
51  unsigned char ** hash_values;
52 
53  // Inline functions used during file compression
54  inline void EvaluateMatch(unsigned char * in, int len, int hash,
55  unsigned char * & best_pos, int & best_match);
56  inline void QuoteLiterals(unsigned char * & in, int literal,
57  unsigned char * & out, int & buffer_len,
58  FILE * output);
59  inline void OutputLiterals(unsigned char * & in, int literal,
60  unsigned char * & out, int & buffer_len,
61  FILE * output);
62  inline void CiteLiteral(unsigned char * & out, int literal,
63  unsigned char * & in, int & buffer_len,
64  FILE * input);
65 };
66 
67 // Format specification for deflate files
68 //
69 // A compressed file is a sequence of bytes {0 .. N}.
70 // Each byte is a sequence of bits [0 .. 7] with 0 as the Most Significant Bit.
71 //
72 // The following tokens are recognized:
73 //
74 // Literal quotes -- refer to unique strings
75 //
76 // BYTE0 BYTE1 BYTE2 Description
77 // 0 HI LO Quote of 31 bytes of more
78 // Followed by (HI << 8 + LO + 31) quoted chars
79 // 0:4|LEN Quote of up to 1-15 bytes
80 // Followed by LEN quoted chars
81 //
82 // String matches -- refer to previous strings in the input stream
83 //
84 // BYTE0 BYTE1 BYTE2 BYTE3 BYTE4 Description
85 // 1:4|OFF OFF1 OFF2:2|0 HI LO Long match of > 66 bytes
86 // Offset of OFF|OFF1|OFF2 + 1
87 // Length of HI|LO + 66
88 // 1:4|OFF OFF1 OFF2:2|LEN Distant match of < 66 bytes
89 // Offset of OFF|OFF1|OFF2 + 1
90 // Length of LEN + 2
91 // LEN|OFF OFF1 Nearby short match
92 // Offset OFF|OFF1 + 1
93 // Length LEN
94 //
95 
96 // NOTE: When partitioning bytes, I use the notation X:n|Y so that
97 // X takes the n MSB bits of byte and Y takes the remaining bits.
98 
99 
100 #endif
101 
102