1 /**
2  * Compress/decompress data using the zlib library.
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <ae@cy.md>
12  */
13 
14 module ae.utils.zlib;
15 
16 import etc.c.zlib;
17 import std.algorithm.mutation : move;
18 import std.conv;
19 import std.exception;
20 
21 import ae.sys.data;
22 import ae.sys.dataset : DataVec, joinData;
23 import ae.utils.array;
24 
25 /// Thrown on zlib errors.
26 class ZlibException : Exception
27 {
28 	private static string getmsg(int err) nothrow @nogc pure @safe
29 	{
30 		switch (err)
31 		{
32 			case Z_STREAM_END:      return "stream end";
33 			case Z_NEED_DICT:       return "need dict";
34 			case Z_ERRNO:           return "errno";
35 			case Z_STREAM_ERROR:    return "stream error";
36 			case Z_DATA_ERROR:      return "data error";
37 			case Z_MEM_ERROR:       return "mem error";
38 			case Z_BUF_ERROR:       return "buf error";
39 			case Z_VERSION_ERROR:   return "version error";
40 			default:                return "unknown error";
41 		}
42 	}
43 
44 	this(int err, z_stream* zs)
45 	{
46 		if (zs.msg)
47 			super(to!string(zs.msg));
48 		else
49 			super(getmsg(err));
50 	} ///
51 
52 	this(string msg) { super(msg); } ///
53 }
54 
55 /// File format.
56 enum ZlibMode
57 {
58 	normal,   /// Normal deflate stream.
59 	raw,      /// Raw deflate stream.
60 	gzipOnly, /// gzip deflate stream. Require gzip input.
61 	gzipAuto, /// Output and detect gzip, but do not require it.
62 }
63 
64 /// Compression/decompression options.
65 struct ZlibOptions
66 {
67 	int deflateLevel = Z_DEFAULT_COMPRESSION; /// Compression level.
68 	int windowBits = 15; /// Window size (8..15) - actual windowBits, without additional meaning
69 	ZlibMode mode; /// File format.
70 
71 	invariant()
72 	{
73 		assert(deflateLevel == Z_DEFAULT_COMPRESSION || (deflateLevel >= 0 && deflateLevel <= 9));
74 		assert(windowBits >= 8 && windowBits <= 15);
75 	}
76 
77 private:
78 	@property
79 	int zwindowBits()
80 	{
81 		final switch (mode)
82 		{
83 		case ZlibMode.normal:
84 			return windowBits;
85 		case ZlibMode.raw:
86 			return -windowBits;
87 		case ZlibMode.gzipOnly:
88 			return 16+windowBits;
89 		case ZlibMode.gzipAuto:
90 			return 32+windowBits;
91 		}
92 	}
93 }
94 
95 /// Implements a zlib compression or decompression process.
96 struct ZlibProcess(bool COMPRESSING)
97 {
98 	/// Initialize zlib.
99 	void init(ZlibOptions options = ZlibOptions.init)
100 	{
101 		static if (COMPRESSING)
102 			//zenforce(deflateInit(&zs, options.deflateLevel));
103 			zenforce(deflateInit2(&zs, options.deflateLevel, Z_DEFLATED, options.zwindowBits, 8, Z_DEFAULT_STRATEGY));
104 		else
105 			//zenforce(inflateInit(&zs));
106 			zenforce(inflateInit2(&zs, options.zwindowBits));
107 	}
108 
109 	/// Process one chunk of data.
110 	void processChunk(ref const Data chunk)
111 	{
112 		if (!chunk.length)
113 			return;
114 
115 		assert(zs.avail_in == 0);
116 		// zlib will consume all data, so unsafeContents is OK to use here
117 		scope(success) assert(zs.avail_in == 0);
118 		// cast+unsafeContents because const(Data) is not copyable, can't use asDataOf, does this even make sense?
119 		zs.next_in  = cast(ubyte*) chunk.unsafeContents.ptr;
120 		zs.avail_in = to!uint(chunk.unsafeContents.length);
121 
122 		do
123 		{
124 			if (zs.avail_out == 0)
125 				allocChunk(adjustSize(zs.avail_in));
126 
127 			assert(zs.avail_in  && zs.next_in );
128 			assert(zs.avail_out && zs.next_out);
129 			if (zend(processFunc(&zs, Z_NO_FLUSH)))
130 				enforce(zs.avail_in==0, new ZlibException("Trailing data"));
131 		} while (zs.avail_in);
132 	}
133 
134 	/// Signal end of input and flush.
135 	DataVec flush()
136 	{
137 		if (zs.avail_out == 0)
138 			allocChunk(adjustSize(zs.avail_in));
139 
140 		while (!zend(processFunc(&zs, Z_FINISH)))
141 			allocChunk(zs.avail_out*2+1);
142 
143 		saveChunk();
144 		return move(outputChunks);
145 	}
146 
147 	/// Process all input.
148 	static DataVec process(scope const(Data)[] input, ZlibOptions options = ZlibOptions.init)
149 	{
150 		typeof(this) zp;
151 		zp.init(options);
152 		foreach (ref chunk; input)
153 			zp.processChunk(chunk);
154 		return zp.flush();
155 	}
156 
157 	/// Process input and return output as a single contiguous `Data`.
158 	static Data process(Data input, ZlibOptions options = ZlibOptions.init)
159 	{
160 		return process(input.asSlice, options).joinData();
161 	}
162 
163 	~this()
164 	{
165 		zenforce(endFunc(&zs));
166 	}
167 
168 private:
169 	z_stream zs;
170 	Data currentChunk;
171 	DataVec outputChunks;
172 
173 	static if (COMPRESSING)
174 	{
175 		alias deflate processFunc;
176 		alias deflateEnd endFunc;
177 
178 		size_t adjustSize(size_t sz) { return sz / 4 + 1; }
179 	}
180 	else
181 	{
182 		alias inflate processFunc;
183 		alias inflateEnd endFunc;
184 
185 		size_t adjustSize(size_t sz) { return sz * 4 + 1; }
186 	}
187 
188 	void zenforce(int ret)
189 	{
190 		if (ret != Z_OK)
191 			throw new ZlibException(ret, &zs);
192 	}
193 
194 	bool zend(int ret)
195 	{
196 		if (ret == Z_STREAM_END)
197 			return true;
198 		zenforce(ret);
199 		return false;
200 	}
201 
202 	void saveChunk()
203 	{
204 		if (zs.next_out && zs.next_out != currentChunk.unsafeContents.ptr)
205 		{
206 			outputChunks ~= currentChunk[0..zs.next_out-cast(ubyte*)currentChunk.unsafeContents.ptr];
207 			currentChunk = Data();
208 		}
209 		zs.next_out = null;
210 	}
211 
212 	void allocChunk(size_t sz)
213 	{
214 		saveChunk();
215 		currentChunk = Data(sz);
216 		currentChunk.length = currentChunk.capacity;
217 		zs.next_out  = cast(ubyte*)currentChunk.unsafeContents.ptr;
218 		zs.avail_out = to!uint(currentChunk.length);
219 	}
220 }
221 
222 alias ZlibProcess!true  ZlibDeflater; /// ditto
223 alias ZlibProcess!false ZlibInflater; /// ditto
224 
225 alias ZlibDeflater.process compress;   ///
226 alias ZlibInflater.process uncompress; ///
227 
228 /// Shorthand for compressing at a certain level.
229 Data compress(Data input, int level)
230 {
231 	return compress(input, ZlibOptions(level));
232 }
233 
234 unittest
235 {
236 	void testRoundtrip(ubyte[] src)
237 	{
238 		ubyte[] def =   compress(Data(src)).asDataOf!ubyte.toGC;
239 		ubyte[] res = uncompress(Data(def)).asDataOf!ubyte.toGC;
240 		assert(res == src);
241 	}
242 
243 	testRoundtrip(cast(ubyte[])
244 "the quick brown fox jumps over the lazy dog\r
245 the quick brown fox jumps over the lazy dog\r
246 ");
247 	testRoundtrip([0]);
248 	testRoundtrip(null);
249 }