1 /** 2 * ae.utils.gzip 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <ae@cy.md> 12 * Simon Arlott 13 */ 14 15 module ae.utils.gzip; 16 17 // TODO: recent zlib versions support gzip headers, 18 // reimplement this module as zlib flags 19 20 import std.exception; 21 import std.conv; 22 import std.range.primitives : ElementType; 23 debug import std.stdio, std.file; 24 25 import ae.sys.data; 26 import ae.utils.array; 27 import ae.utils.bitmanip; 28 29 static import zlib = ae.utils.zlib; 30 public import ae.utils.zlib : ZlibOptions, ZlibMode; 31 import std.digest.crc; 32 33 private enum 34 { 35 FTEXT = 1, 36 FHCRC = 2, 37 FEXTRA = 4, 38 FNAME = 8, 39 FCOMMENT = 16 40 } 41 42 /// Calculate CRC32 from `Data[]` 43 uint crc32(R)(R data) 44 if (is(ElementType!R : const(Data))) 45 { 46 CRC32 crc; 47 foreach (ref d; data) 48 crc.put(cast(ubyte[])d.contents); 49 auto result = crc.finish(); 50 return *cast(uint*)result.ptr; 51 } 52 53 unittest 54 { 55 assert(crc32([Data("ab"), Data("c")]) == 0x352441C2); 56 } 57 58 /// Add a Gzip header to deflated data. 59 DataVec deflate2gzip(scope Data[] compressed, uint dataCrc, size_t dataLength) 60 { 61 ubyte[] header; 62 header.length = 10; 63 header[0] = 0x1F; 64 header[1] = 0x8B; 65 header[2] = 0x08; 66 header[3..8] = 0; // TODO: set MTIME 67 header[8] = 4; 68 header[9] = 3; // TODO: set OS 69 uint[2] footer = [dataCrc, std.conv.to!uint(dataLength)]; 70 71 return DataVec( 72 Data(header), 73 compressed.bytes[2 .. $ - 4], 74 Data(footer), 75 ); 76 } 77 78 /// Compress data to Gzip. 79 DataVec compress(scope Data[] data, ZlibOptions options = ZlibOptions.init) 80 { 81 return deflate2gzip(zlib.compress(data, options)[], crc32(data), data.bytes.length); 82 } 83 84 Data compress(Data input) { return compress(input.toArray).joinData(); } /// ditto 85 86 /// Strip the Gzip header from `data`. 87 DataVec gzipToRawDeflate(scope Data[] data) 88 { 89 auto bytes = data.bytes; 90 enforce(bytes.length >= 10, "Gzip too short"); 91 enforce(bytes[0] == 0x1F && bytes[1] == 0x8B, "Invalid Gzip signature"); 92 enforce(bytes[2] == 0x08, "Unsupported Gzip compression method"); 93 ubyte flg = bytes[3]; 94 enforce((flg & FHCRC)==0, "FHCRC not supported"); 95 enforce((flg & FEXTRA)==0, "FEXTRA not supported"); 96 enforce((flg & FCOMMENT)==0, "FCOMMENT not supported"); 97 uint start = 10; 98 if (flg & FNAME) 99 { 100 while (bytes[start]) start++; 101 start++; 102 } 103 return bytes[start..bytes.length-8]; 104 } 105 106 /// Uncompress Gzip-compressed data. 107 DataVec uncompress(scope Data[] data) 108 { 109 auto bytes = data.bytes; 110 enforce(bytes.length >= 4, "No data to decompress"); 111 112 ZlibOptions options; options.mode = ZlibMode.raw; 113 DataVec uncompressed = zlib.uncompress(gzipToRawDeflate(data)[], options); 114 115 LittleEndian!uint size; 116 bytes[$-4 .. $].copyTo(size.toArray); 117 enforce(cast(uint)uncompressed.bytes.length == size, "Decompressed data length mismatch"); 118 119 return uncompressed; 120 } 121 122 Data uncompress(Data input) { return uncompress(input.toArray).joinData(); } /// ditto 123 124 unittest 125 { 126 void testRoundtrip(ubyte[] src) 127 { 128 ubyte[] def = cast(ubyte[]) compress(Data(src)).toHeap; 129 ubyte[] res = cast(ubyte[])uncompress(Data(def)).toHeap; 130 assert(res == src); 131 132 DataVec srcData; 133 foreach (c; src) 134 srcData ~= Data([c]); 135 res = cast(ubyte[])uncompress(compress(srcData[])[]).joinToHeap; 136 assert(res == src); 137 } 138 139 testRoundtrip(cast(ubyte[]) 140 "the quick brown fox jumps over the lazy dog\r 141 the quick brown fox jumps over the lazy dog\r 142 "); 143 testRoundtrip([0]); 144 testRoundtrip(null); 145 146 void testUncompress(ubyte[] src, ubyte[] dst) 147 { 148 assert(cast(ubyte[])uncompress(Data(src)).toHeap == dst); 149 } 150 151 testUncompress([ 152 0x1F, 0x8B, 0x08, 0x08, 0xD3, 0xB2, 0x6E, 0x4F, 0x02, 0x00, 0x74, 0x65, 0x73, 0x74, 0x2E, 0x74, 153 0x78, 0x74, 0x00, 0x2B, 0xC9, 0x48, 0x55, 0x28, 0x2C, 0xCD, 0x4C, 0xCE, 0x56, 0x48, 0x2A, 0xCA, 154 0x2F, 0xCF, 0x53, 0x48, 0xCB, 0xAF, 0x50, 0xC8, 0x2A, 0xCD, 0x2D, 0x28, 0x56, 0xC8, 0x2F, 0x4B, 155 0x2D, 0x52, 0x00, 0x49, 0xE7, 0x24, 0x56, 0x55, 0x2A, 0xA4, 0xE4, 0xA7, 0x03, 0x00, 0x14, 0x51, 156 0x0C, 0xCE, 0x2B, 0x00, 0x00, 0x00], cast(ubyte[])"the quick brown fox jumps over the lazy dog"); 157 }