1 /**
2  * ae.utils.gzip
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <vladimir@thecybershadow.net>
12  *   Simon Arlott
13  */
14 
15 module ae.utils.gzip;
16 
17 // TODO: recent zlib versions support gzip headers,
18 // reimplement this module as zlib flags
19 
20 import std.exception;
21 import std.conv;
22 debug import std.stdio, std.file;
23 
24 import ae.sys.data;
25 
26 static import zlib = ae.utils.zlib;
27 public import ae.utils.zlib : ZlibOptions, ZlibMode;
28 import std.digest.crc;
29 
30 private enum
31 {
32 	FTEXT = 1,
33 	FHCRC = 2,
34 	FEXTRA = 4,
35 	FNAME = 8,
36 	FCOMMENT = 16
37 }
38 
39 uint crc32(Data[] data)
40 {
41 	CRC32 crc;
42 	foreach (ref d; data)
43 		crc.put(cast(ubyte[])d.contents);
44 	auto result = crc.finish();
45 	return *cast(uint*)result.ptr;
46 }
47 
48 unittest
49 {
50 	assert(crc32([Data("ab"), Data("c")]) == 0x352441C2);
51 }
52 
53 Data[] deflate2gzip(Data[] compressed, uint dataCrc, size_t dataLength)
54 {
55 	ubyte[] header;
56 	header.length = 10;
57 	header[0] = 0x1F;
58 	header[1] = 0x8B;
59 	header[2] = 0x08;
60 	header[3..8] = 0;  // TODO: set MTIME
61 	header[8] = 4;
62 	header[9] = 3;     // TODO: set OS
63 	uint[2] footer = [dataCrc, std.conv.to!uint(dataLength)];
64 
65 	compressed = compressed.bytes[2..compressed.bytes.length-4];
66 
67 	return [Data(header)] ~ compressed ~ [Data(footer)];
68 }
69 
70 Data[] compress(Data[] data, ZlibOptions options = ZlibOptions.init)
71 {
72 	return deflate2gzip(zlib.compress(data, options), crc32(data), data.bytes.length);
73 }
74 
75 Data compress(Data input) { return compress([input]).joinData(); }
76 
77 Data[] gzipToRawDeflate(Data[] data)
78 {
79 	enforce(data.bytes.length >= 10, "Gzip too short");
80 	auto bytes = data.bytes;
81 	enforce(bytes[0] == 0x1F && bytes[1] == 0x8B, "Invalid Gzip signature");
82 	enforce(bytes[2] == 0x08, "Unsupported Gzip compression method");
83 	ubyte flg = bytes[3];
84 	enforce((flg & FHCRC)==0, "FHCRC not supported");
85 	enforce((flg & FEXTRA)==0, "FEXTRA not supported");
86 	enforce((flg & FCOMMENT)==0, "FCOMMENT not supported");
87 	uint start = 10;
88 	if (flg & FNAME)
89 	{
90 		while (bytes[start]) start++;
91 		start++;
92 	}
93 	return bytes[start..bytes.length-8];
94 }
95 
96 Data[] uncompress(Data[] data)
97 {
98 	while (data.length >= 2 && data[$-1].length < 4)
99 		data = data[0..$-2] ~ [data[$-2] ~ data[$-1]];
100 	enforce(data.length && data[$-1].length >= 4, "No data to decompress");
101 	ZlibOptions options; options.mode = ZlibMode.raw;
102 	Data[] uncompressed = zlib.uncompress(gzipToRawDeflate(data), options);
103 	enforce(uncompressed.bytes.length == *cast(uint*)(&data[$-1].contents[$-4]), "Decompressed data length mismatch");
104 	return uncompressed;
105 }
106 
107 Data uncompress(Data input) { return uncompress([input]).joinData(); }
108 
109 unittest
110 {
111 	void testRoundtrip(ubyte[] src)
112 	{
113 		ubyte[] def = cast(ubyte[])  compress(Data(src)).toHeap;
114 		ubyte[] res = cast(ubyte[])uncompress(Data(def)).toHeap;
115 		assert(res == src);
116 
117 		Data[] srcData;
118 		foreach (c; src)
119 			srcData ~= Data([c]);
120 		res = cast(ubyte[])uncompress(compress(srcData)).joinToHeap;
121 		assert(res == src);
122 	}
123 
124 	testRoundtrip(cast(ubyte[])
125 "the quick brown fox jumps over the lazy dog\r
126 the quick brown fox jumps over the lazy dog\r
127 ");
128 	testRoundtrip([0]);
129 	testRoundtrip(null);
130 
131 	void testUncompress(ubyte[] src, ubyte[] dst)
132 	{
133 		assert(cast(ubyte[])uncompress(Data(src)).toHeap == dst);
134 	}
135 
136 	testUncompress([
137 		0x1F, 0x8B, 0x08, 0x08, 0xD3, 0xB2, 0x6E, 0x4F, 0x02, 0x00, 0x74, 0x65, 0x73, 0x74, 0x2E, 0x74,
138 		0x78, 0x74, 0x00, 0x2B, 0xC9, 0x48, 0x55, 0x28, 0x2C, 0xCD, 0x4C, 0xCE, 0x56, 0x48, 0x2A, 0xCA,
139 		0x2F, 0xCF, 0x53, 0x48, 0xCB, 0xAF, 0x50, 0xC8, 0x2A, 0xCD, 0x2D, 0x28, 0x56, 0xC8, 0x2F, 0x4B,
140 		0x2D, 0x52, 0x00, 0x49, 0xE7, 0x24, 0x56, 0x55, 0x2A, 0xA4, 0xE4, 0xA7, 0x03, 0x00, 0x14, 0x51,
141 		0x0C, 0xCE, 0x2B, 0x00, 0x00, 0x00], cast(ubyte[])"the quick brown fox jumps over the lazy dog");
142 }