1 /**
2  * ae.utils.gzip
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <ae@cy.md>
12  *   Simon Arlott
13  */
14 
15 module ae.utils.gzip;
16 
17 // TODO: recent zlib versions support gzip headers,
18 // reimplement this module as zlib flags
19 
20 import std.exception;
21 import std.conv;
22 import std.range.primitives : ElementType;
23 debug import std.stdio, std.file;
24 
25 import ae.sys.data;
26 import ae.sys.dataset : DataVec, bytes, joinData, copyTo, joinToGC;
27 import ae.utils.array;
28 import ae.utils.bitmanip;
29 
30 static import zlib = ae.utils.zlib;
31 public import ae.utils.zlib : ZlibOptions, ZlibMode;
32 import std.digest.crc;
33 
34 private enum
35 {
36 	FTEXT = 1,
37 	FHCRC = 2,
38 	FEXTRA = 4,
39 	FNAME = 8,
40 	FCOMMENT = 16
41 }
42 
43 /// Calculate CRC32 from `Data[]`
44 uint crc32(R)(R data)
45 if (is(ElementType!R : const(Data)))
46 {
47 	CRC32 crc;
48 	foreach (ref d; data)
49 		d.asDataOf!ubyte.enter((scope contents) {
50 			crc.put(contents);
51 		});
52 	auto result = crc.finish();
53 	return *cast(uint*)result.ptr;
54 }
55 
56 unittest
57 {
58 	assert(crc32([Data("ab".asBytes), Data("c".asBytes)]) == 0x352441C2);
59 }
60 
61 /// Add a Gzip header to deflated data.
62 DataVec deflate2gzip(scope Data[] compressed, uint dataCrc, size_t dataLength)
63 {
64 	ubyte[] header;
65 	header.length = 10;
66 	header[0] = 0x1F;
67 	header[1] = 0x8B;
68 	header[2] = 0x08;
69 	header[3..8] = 0;  // TODO: set MTIME
70 	header[8] = 4;
71 	header[9] = 3;     // TODO: set OS
72 	uint[2] footer = [dataCrc, std.conv.to!uint(dataLength)]; // TODO: endianness
73 
74 	return DataVec(
75 		Data(header),
76 		compressed.bytes[2 .. $ - 4],
77 		Data(footer.asBytes),
78 	);
79 }
80 
81 /// Compress data to Gzip.
82 DataVec compress(scope Data[] data, ZlibOptions options = ZlibOptions.init)
83 {
84 	return deflate2gzip(zlib.compress(data, options)[], crc32(data), data.bytes.length);
85 }
86 
87 Data compress(Data input) { return compress(input.asSlice).joinData(); } /// ditto
88 
89 /// Strip the Gzip header from `data`.
90 DataVec gzipToRawDeflate(scope Data[] data)
91 {
92 	auto bytes = data.bytes;
93 	enforce(bytes.length >= 10, "Gzip too short");
94 	enforce(bytes[0] == 0x1F && bytes[1] == 0x8B, "Invalid Gzip signature");
95 	enforce(bytes[2] == 0x08, "Unsupported Gzip compression method");
96 	ubyte flg = bytes[3];
97 	enforce((flg & FHCRC)==0, "FHCRC not supported");
98 	enforce((flg & FEXTRA)==0, "FEXTRA not supported");
99 	enforce((flg & FCOMMENT)==0, "FCOMMENT not supported");
100 	uint start = 10;
101 	if (flg & FNAME)
102 	{
103 		while (bytes[start]) start++;
104 		start++;
105 	}
106 	return bytes[start..bytes.length-8];
107 }
108 
109 /// Uncompress Gzip-compressed data.
110 DataVec uncompress(scope Data[] data)
111 {
112 	auto bytes = data.bytes;
113 	enforce(bytes.length >= 4, "No data to decompress");
114 
115 	ZlibOptions options; options.mode = ZlibMode.raw;
116 	DataVec uncompressed = zlib.uncompress(gzipToRawDeflate(data)[], options);
117 
118 	LittleEndian!uint size;
119 	bytes[$-4 .. $].copyTo(size.asSlice.asBytes);
120 	enforce(cast(uint)uncompressed.bytes.length == size, "Decompressed data length mismatch");
121 
122 	return uncompressed;
123 }
124 
125 Data uncompress(Data input) { return uncompress(input.asSlice).joinData(); } /// ditto
126 
127 unittest
128 {
129 	void testRoundtrip(ubyte[] src)
130 	{
131 		ubyte[] def =   compress(Data(src)).asDataOf!ubyte.toGC();
132 		ubyte[] res = uncompress(Data(def)).asDataOf!ubyte.toGC();
133 		assert(res == src);
134 
135 		DataVec srcData;
136 		foreach (c; src)
137 			srcData ~= Data([c]);
138 		res = uncompress(compress(srcData[])[]).joinToGC;
139 		assert(res == src);
140 	}
141 
142 	testRoundtrip(cast(ubyte[])
143 "the quick brown fox jumps over the lazy dog\r
144 the quick brown fox jumps over the lazy dog\r
145 ");
146 	testRoundtrip([0]);
147 	testRoundtrip(null);
148 
149 	void testUncompress(ubyte[] src, ubyte[] dst)
150 	{
151 		assert(cast(ubyte[])uncompress(Data(src)).toGC() == dst);
152 	}
153 
154 	testUncompress([
155 		0x1F, 0x8B, 0x08, 0x08, 0xD3, 0xB2, 0x6E, 0x4F, 0x02, 0x00, 0x74, 0x65, 0x73, 0x74, 0x2E, 0x74,
156 		0x78, 0x74, 0x00, 0x2B, 0xC9, 0x48, 0x55, 0x28, 0x2C, 0xCD, 0x4C, 0xCE, 0x56, 0x48, 0x2A, 0xCA,
157 		0x2F, 0xCF, 0x53, 0x48, 0xCB, 0xAF, 0x50, 0xC8, 0x2A, 0xCD, 0x2D, 0x28, 0x56, 0xC8, 0x2F, 0x4B,
158 		0x2D, 0x52, 0x00, 0x49, 0xE7, 0x24, 0x56, 0x55, 0x2A, 0xA4, 0xE4, 0xA7, 0x03, 0x00, 0x14, 0x51,
159 		0x0C, 0xCE, 0x2B, 0x00, 0x00, 0x00], cast(ubyte[])"the quick brown fox jumps over the lazy dog");
160 }