1 /**
2  * ae.utils.gzip
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <ae@cy.md>
12  *   Simon Arlott
13  */
14 
15 module ae.utils.gzip;
16 
17 // TODO: recent zlib versions support gzip headers,
18 // reimplement this module as zlib flags
19 
20 import std.exception;
21 import std.conv;
22 import std.range.primitives : ElementType;
23 debug import std.stdio, std.file;
24 
25 import ae.sys.data;
26 import ae.utils.array;
27 import ae.utils.bitmanip;
28 
29 static import zlib = ae.utils.zlib;
30 public import ae.utils.zlib : ZlibOptions, ZlibMode;
31 import std.digest.crc;
32 
33 private enum
34 {
35 	FTEXT = 1,
36 	FHCRC = 2,
37 	FEXTRA = 4,
38 	FNAME = 8,
39 	FCOMMENT = 16
40 }
41 
42 /// Calculate CRC32 from `Data[]`
43 uint crc32(R)(R data)
44 if (is(ElementType!R : const(Data)))
45 {
46 	CRC32 crc;
47 	foreach (ref d; data)
48 		crc.put(cast(ubyte[])d.contents);
49 	auto result = crc.finish();
50 	return *cast(uint*)result.ptr;
51 }
52 
53 unittest
54 {
55 	assert(crc32([Data("ab"), Data("c")]) == 0x352441C2);
56 }
57 
58 /// Add a Gzip header to deflated data.
59 DataVec deflate2gzip(scope Data[] compressed, uint dataCrc, size_t dataLength)
60 {
61 	ubyte[] header;
62 	header.length = 10;
63 	header[0] = 0x1F;
64 	header[1] = 0x8B;
65 	header[2] = 0x08;
66 	header[3..8] = 0;  // TODO: set MTIME
67 	header[8] = 4;
68 	header[9] = 3;     // TODO: set OS
69 	uint[2] footer = [dataCrc, std.conv.to!uint(dataLength)];
70 
71 	return DataVec(
72 		Data(header),
73 		compressed.bytes[2 .. $ - 4],
74 		Data(footer),
75 	);
76 }
77 
78 /// Compress data to Gzip.
79 DataVec compress(scope Data[] data, ZlibOptions options = ZlibOptions.init)
80 {
81 	return deflate2gzip(zlib.compress(data, options)[], crc32(data), data.bytes.length);
82 }
83 
84 Data compress(Data input) { return compress(input.toArray).joinData(); } /// ditto
85 
86 /// Strip the Gzip header from `data`.
87 DataVec gzipToRawDeflate(scope Data[] data)
88 {
89 	auto bytes = data.bytes;
90 	enforce(bytes.length >= 10, "Gzip too short");
91 	enforce(bytes[0] == 0x1F && bytes[1] == 0x8B, "Invalid Gzip signature");
92 	enforce(bytes[2] == 0x08, "Unsupported Gzip compression method");
93 	ubyte flg = bytes[3];
94 	enforce((flg & FHCRC)==0, "FHCRC not supported");
95 	enforce((flg & FEXTRA)==0, "FEXTRA not supported");
96 	enforce((flg & FCOMMENT)==0, "FCOMMENT not supported");
97 	uint start = 10;
98 	if (flg & FNAME)
99 	{
100 		while (bytes[start]) start++;
101 		start++;
102 	}
103 	return bytes[start..bytes.length-8];
104 }
105 
106 /// Uncompress Gzip-compressed data.
107 DataVec uncompress(scope Data[] data)
108 {
109 	auto bytes = data.bytes;
110 	enforce(bytes.length >= 4, "No data to decompress");
111 
112 	ZlibOptions options; options.mode = ZlibMode.raw;
113 	DataVec uncompressed = zlib.uncompress(gzipToRawDeflate(data)[], options);
114 
115 	LittleEndian!uint size;
116 	bytes[$-4 .. $].copyTo(size.toArray);
117 	enforce(cast(uint)uncompressed.bytes.length == size, "Decompressed data length mismatch");
118 
119 	return uncompressed;
120 }
121 
122 Data uncompress(Data input) { return uncompress(input.toArray).joinData(); } /// ditto
123 
124 unittest
125 {
126 	void testRoundtrip(ubyte[] src)
127 	{
128 		ubyte[] def = cast(ubyte[])  compress(Data(src)).toHeap;
129 		ubyte[] res = cast(ubyte[])uncompress(Data(def)).toHeap;
130 		assert(res == src);
131 
132 		DataVec srcData;
133 		foreach (c; src)
134 			srcData ~= Data([c]);
135 		res = cast(ubyte[])uncompress(compress(srcData[])[]).joinToHeap;
136 		assert(res == src);
137 	}
138 
139 	testRoundtrip(cast(ubyte[])
140 "the quick brown fox jumps over the lazy dog\r
141 the quick brown fox jumps over the lazy dog\r
142 ");
143 	testRoundtrip([0]);
144 	testRoundtrip(null);
145 
146 	void testUncompress(ubyte[] src, ubyte[] dst)
147 	{
148 		assert(cast(ubyte[])uncompress(Data(src)).toHeap == dst);
149 	}
150 
151 	testUncompress([
152 		0x1F, 0x8B, 0x08, 0x08, 0xD3, 0xB2, 0x6E, 0x4F, 0x02, 0x00, 0x74, 0x65, 0x73, 0x74, 0x2E, 0x74,
153 		0x78, 0x74, 0x00, 0x2B, 0xC9, 0x48, 0x55, 0x28, 0x2C, 0xCD, 0x4C, 0xCE, 0x56, 0x48, 0x2A, 0xCA,
154 		0x2F, 0xCF, 0x53, 0x48, 0xCB, 0xAF, 0x50, 0xC8, 0x2A, 0xCD, 0x2D, 0x28, 0x56, 0xC8, 0x2F, 0x4B,
155 		0x2D, 0x52, 0x00, 0x49, 0xE7, 0x24, 0x56, 0x55, 0x2A, 0xA4, 0xE4, 0xA7, 0x03, 0x00, 0x14, 0x51,
156 		0x0C, 0xCE, 0x2B, 0x00, 0x00, 0x00], cast(ubyte[])"the quick brown fox jumps over the lazy dog");
157 }