1 /**
2  * ae.utils.gzip
3  *
4  * License:
5  *   This Source Code Form is subject to the terms of
6  *   the Mozilla Public License, v. 2.0. If a copy of
7  *   the MPL was not distributed with this file, You
8  *   can obtain one at http://mozilla.org/MPL/2.0/.
9  *
10  * Authors:
11  *   Vladimir Panteleev <ae@cy.md>
12  *   Simon Arlott
13  */
14 
15 module ae.utils.gzip;
16 
17 // TODO: recent zlib versions support gzip headers,
18 // reimplement this module as zlib flags
19 
20 import std.exception;
21 import std.conv;
22 debug import std.stdio, std.file;
23 
24 import ae.sys.data;
25 
26 static import zlib = ae.utils.zlib;
27 public import ae.utils.zlib : ZlibOptions, ZlibMode;
28 import std.digest.crc;
29 
30 private enum
31 {
32 	FTEXT = 1,
33 	FHCRC = 2,
34 	FEXTRA = 4,
35 	FNAME = 8,
36 	FCOMMENT = 16
37 }
38 
39 /// Calculate CRC32 from `Data[]`
40 uint crc32(Data[] data)
41 {
42 	CRC32 crc;
43 	foreach (ref d; data)
44 		crc.put(cast(ubyte[])d.contents);
45 	auto result = crc.finish();
46 	return *cast(uint*)result.ptr;
47 }
48 
49 unittest
50 {
51 	assert(crc32([Data("ab"), Data("c")]) == 0x352441C2);
52 }
53 
54 /// Add a Gzip header to deflated data.
55 Data[] deflate2gzip(Data[] compressed, uint dataCrc, size_t dataLength)
56 {
57 	ubyte[] header;
58 	header.length = 10;
59 	header[0] = 0x1F;
60 	header[1] = 0x8B;
61 	header[2] = 0x08;
62 	header[3..8] = 0;  // TODO: set MTIME
63 	header[8] = 4;
64 	header[9] = 3;     // TODO: set OS
65 	uint[2] footer = [dataCrc, std.conv.to!uint(dataLength)];
66 
67 	compressed = compressed.bytes[2..compressed.bytes.length-4];
68 
69 	return [Data(header)] ~ compressed ~ [Data(footer)];
70 }
71 
72 /// Compress data to Gzip.
73 Data[] compress(Data[] data, ZlibOptions options = ZlibOptions.init)
74 {
75 	return deflate2gzip(zlib.compress(data, options), crc32(data), data.bytes.length);
76 }
77 
78 Data compress(Data input) { return compress([input]).joinData(); } /// ditto
79 
80 /// Strip thes Gzip header from `data`.
81 Data[] gzipToRawDeflate(Data[] data)
82 {
83 	enforce(data.bytes.length >= 10, "Gzip too short");
84 	auto bytes = data.bytes;
85 	enforce(bytes[0] == 0x1F && bytes[1] == 0x8B, "Invalid Gzip signature");
86 	enforce(bytes[2] == 0x08, "Unsupported Gzip compression method");
87 	ubyte flg = bytes[3];
88 	enforce((flg & FHCRC)==0, "FHCRC not supported");
89 	enforce((flg & FEXTRA)==0, "FEXTRA not supported");
90 	enforce((flg & FCOMMENT)==0, "FCOMMENT not supported");
91 	uint start = 10;
92 	if (flg & FNAME)
93 	{
94 		while (bytes[start]) start++;
95 		start++;
96 	}
97 	return bytes[start..bytes.length-8];
98 }
99 
100 /// Uncompress Gzip-compressed data.
101 Data[] uncompress(Data[] data)
102 {
103 	while (data.length >= 2 && data[$-1].length < 4)
104 		data = data[0..$-2] ~ [data[$-2] ~ data[$-1]];
105 	enforce(data.length && data[$-1].length >= 4, "No data to decompress");
106 	ZlibOptions options; options.mode = ZlibMode.raw;
107 	Data[] uncompressed = zlib.uncompress(gzipToRawDeflate(data), options);
108 	enforce(uncompressed.bytes.length == *cast(uint*)(&data[$-1].contents[$-4]), "Decompressed data length mismatch");
109 	return uncompressed;
110 }
111 
112 Data uncompress(Data input) { return uncompress([input]).joinData(); } /// ditto
113 
114 unittest
115 {
116 	void testRoundtrip(ubyte[] src)
117 	{
118 		ubyte[] def = cast(ubyte[])  compress(Data(src)).toHeap;
119 		ubyte[] res = cast(ubyte[])uncompress(Data(def)).toHeap;
120 		assert(res == src);
121 
122 		Data[] srcData;
123 		foreach (c; src)
124 			srcData ~= Data([c]);
125 		res = cast(ubyte[])uncompress(compress(srcData)).joinToHeap;
126 		assert(res == src);
127 	}
128 
129 	testRoundtrip(cast(ubyte[])
130 "the quick brown fox jumps over the lazy dog\r
131 the quick brown fox jumps over the lazy dog\r
132 ");
133 	testRoundtrip([0]);
134 	testRoundtrip(null);
135 
136 	void testUncompress(ubyte[] src, ubyte[] dst)
137 	{
138 		assert(cast(ubyte[])uncompress(Data(src)).toHeap == dst);
139 	}
140 
141 	testUncompress([
142 		0x1F, 0x8B, 0x08, 0x08, 0xD3, 0xB2, 0x6E, 0x4F, 0x02, 0x00, 0x74, 0x65, 0x73, 0x74, 0x2E, 0x74,
143 		0x78, 0x74, 0x00, 0x2B, 0xC9, 0x48, 0x55, 0x28, 0x2C, 0xCD, 0x4C, 0xCE, 0x56, 0x48, 0x2A, 0xCA,
144 		0x2F, 0xCF, 0x53, 0x48, 0xCB, 0xAF, 0x50, 0xC8, 0x2A, 0xCD, 0x2D, 0x28, 0x56, 0xC8, 0x2F, 0x4B,
145 		0x2D, 0x52, 0x00, 0x49, 0xE7, 0x24, 0x56, 0x55, 0x2A, 0xA4, 0xE4, 0xA7, 0x03, 0x00, 0x14, 0x51,
146 		0x0C, 0xCE, 0x2B, 0x00, 0x00, 0x00], cast(ubyte[])"the quick brown fox jumps over the lazy dog");
147 }