zlib usage

zlib usage

以下是zlib的官方示例,这里对代码进行一些说明。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
/* zpipe.c: example of proper use of zlib's inflate() and deflate()
Not copyrighted -- provided to the public domain
Version 1.4 11 December 2005 Mark Adler */

/* Version history:
1.0 30 Oct 2004 First version
1.1 8 Nov 2004 Add void casting for unused return values
Use switch statement for inflate() return values
1.2 9 Nov 2004 Add assertions to document zlib guarantees
1.3 6 Apr 2005 Remove incorrect assertion in inf()
1.4 11 Dec 2005 Add hack to avoid MSDOS end-of-line conversions
Avoid some compiler warnings for input and output buffers
*/

#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "zlib.h"

#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
# include <fcntl.h>
# include <io.h>
# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
#else
# define SET_BINARY_MODE(file)
#endif

#define CHUNK 16384

/* Compress from file source to file dest until EOF on source.
def() returns Z_OK on success, Z_MEM_ERROR if memory could not be
allocated for processing, Z_STREAM_ERROR if an invalid compression
level is supplied, Z_VERSION_ERROR if the version of zlib.h and the
version of the library linked do not match, or Z_ERRNO if there is
an error reading or writing the files. */
int def(FILE *source, FILE *dest, int level)
{
int ret, flush;
unsigned have;
z_stream strm;
unsigned char in[CHUNK];
unsigned char out[CHUNK];

/* allocate deflate state */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
ret = deflateInit(&strm, level);
if (ret != Z_OK)
return ret;

/* compress until end of file */
do {
strm.avail_in = fread(in, 1, CHUNK, source);
if (ferror(source)) {
(void)deflateEnd(&strm);
return Z_ERRNO;
}
flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
strm.next_in = in;

/* run deflate() on input until output buffer not full, finish
compression if all of source has been read in */
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = deflate(&strm, flush); /* no bad return value */
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
have = CHUNK - strm.avail_out;
if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
(void)deflateEnd(&strm);
return Z_ERRNO;
}
} while (strm.avail_out == 0);
assert(strm.avail_in == 0); /* all input will be used */

/* done when last data in file processed */
} while (flush != Z_FINISH);
assert(ret == Z_STREAM_END); /* stream will be complete */

/* clean up and return */
(void)deflateEnd(&strm);
return Z_OK;
}

/* Decompress from file source to file dest until stream ends or EOF.
inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be
allocated for processing, Z_DATA_ERROR if the deflate data is
invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and
the version of the library linked do not match, or Z_ERRNO if there
is an error reading or writing the files. */
int inf(FILE *source, FILE *dest)
{
int ret;
unsigned have;
z_stream strm;
unsigned char in[CHUNK];
unsigned char out[CHUNK];

/* allocate inflate state */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit(&strm);
if (ret != Z_OK)
return ret;

/* decompress until deflate stream ends or end of file */
do {
strm.avail_in = fread(in, 1, CHUNK, source);
if (ferror(source)) {
(void)inflateEnd(&strm);
return Z_ERRNO;
}
if (strm.avail_in == 0)
break;
strm.next_in = in;

/* run inflate() on input until output buffer not full */
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
switch (ret) {
case Z_NEED_DICT:
ret = Z_DATA_ERROR; /* and fall through */
case Z_DATA_ERROR:
case Z_MEM_ERROR:
(void)inflateEnd(&strm);
return ret;
}
have = CHUNK - strm.avail_out;
if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
(void)inflateEnd(&strm);
return Z_ERRNO;
}
} while (strm.avail_out == 0);

/* done when inflate() says it's done */
} while (ret != Z_STREAM_END);

/* clean up and return */
(void)inflateEnd(&strm);
return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
}

/* report a zlib or i/o error */
void zerr(int ret)
{
fputs("zpipe: ", stderr);
switch (ret) {
case Z_ERRNO:
if (ferror(stdin))
fputs("error reading stdin\n", stderr);
if (ferror(stdout))
fputs("error writing stdout\n", stderr);
break;
case Z_STREAM_ERROR:
fputs("invalid compression level\n", stderr);
break;
case Z_DATA_ERROR:
fputs("invalid or incomplete deflate data\n", stderr);
break;
case Z_MEM_ERROR:
fputs("out of memory\n", stderr);
break;
case Z_VERSION_ERROR:
fputs("zlib version mismatch!\n", stderr);
}
}

/* compress or decompress from stdin to stdout */
int main(int argc, char **argv)
{
int ret;

/* avoid end-of-line conversions */
SET_BINARY_MODE(stdin);
SET_BINARY_MODE(stdout);

/* do compression if no arguments */
if (argc == 1) {
ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION);
if (ret != Z_OK)
zerr(ret);
return ret;
}

/* do decompression if -d specified */
else if (argc == 2 && strcmp(argv[1], "-d") == 0) {
ret = inf(stdin, stdout);
if (ret != Z_OK)
zerr(ret);
return ret;
}

/* otherwise, report usage */
else {
fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr);
return 1;
}
}

这个程序的输入是文件名,作用是输入的文件压缩成压缩文件或者是将压缩文件解压。根据输入参数的不同,相应地调到def函数或是inf函数,分别代表压缩和解压。def函数和inf函数的输入输出均是文件指针。

压缩的过程如下:

  1. z_stream保存了当前压缩的上下文状态。压缩的过程首先对z_stream进行初始化,设置压缩等级,其中的几个函数等。

  2. 用一个大循环从输入文件中最多读出CHUNK字节的数据到in这个buffer中。

  3. 判断文件是否已经到最后了,以判断是否需要进行flush操作。

  4. 设置z_stream的next_in为in这个buffer,设置z_stream的avail_in为从文件读到in中的字节数。

  5. 然后设置z_stream的avail_out为CHUNK,next_out为out这个buffer。

  6. 然后对z_stream调用deflate,如果已经到了文件的结尾,需要告诉deflate函数。

  7. 计算出本次deflate操作产出的数据量,have = CHUNK - strm.avail_out;

  8. 将本次操作产出的压缩数据写到压缩文件中。

  9. 内部这个while循环有些不好理解:

    1. deflate这个函数完成后会更新z_stream中的avail_out, next_out。我们要保证next_out有空间给我们来输出,也就是avail_out不为0。如果输出空间不够了,deflate会停止压缩。
    2. Z_NO_FLUSH这个标志使deflate可能不会完全将输入一一对应地压缩出来,deflate中会有一些残留的输入还没有输出。这样做是为了提高压缩率。
    3. 由于2中的特征,一次deflate可能不能将输入next_in压缩完,因为输出buffer满了。这种情况下我们要判断输出buffer是否已经满了,即avail_out为0。如果是的话,就要重复循环中的过程,确保输入被压缩完。
  10. 如果到了文件的结尾,对deflate的调用有Z_FINISH这个标志,使deflate将数据完全压缩后输出到文件。

解压过程和压缩过程区别在于deflate被换成了inflate,过程和压缩过程完全一样。