1 /** 2 * ae.utils.gzip 3 * 4 * License: 5 * This Source Code Form is subject to the terms of 6 * the Mozilla Public License, v. 2.0. If a copy of 7 * the MPL was not distributed with this file, You 8 * can obtain one at http://mozilla.org/MPL/2.0/. 9 * 10 * Authors: 11 * Vladimir Panteleev <ae@cy.md> 12 * Simon Arlott 13 */ 14 15 module ae.utils.gzip; 16 17 // TODO: recent zlib versions support gzip headers, 18 // reimplement this module as zlib flags 19 20 import std.exception; 21 import std.conv; 22 import std.range.primitives : ElementType; 23 debug import std.stdio, std.file; 24 25 import ae.sys.data; 26 import ae.sys.dataset : DataVec, bytes, joinData, copyTo, joinToGC; 27 import ae.utils.array; 28 import ae.utils.bitmanip; 29 30 static import zlib = ae.utils.zlib; 31 public import ae.utils.zlib : ZlibOptions, ZlibMode; 32 import std.digest.crc; 33 34 private enum 35 { 36 FTEXT = 1, 37 FHCRC = 2, 38 FEXTRA = 4, 39 FNAME = 8, 40 FCOMMENT = 16 41 } 42 43 /// Calculate CRC32 from `Data[]` 44 uint crc32(R)(R data) 45 if (is(ElementType!R : const(Data))) 46 { 47 CRC32 crc; 48 foreach (ref d; data) 49 d.asDataOf!ubyte.enter((scope contents) { 50 crc.put(contents); 51 }); 52 auto result = crc.finish(); 53 return *cast(uint*)result.ptr; 54 } 55 56 unittest 57 { 58 assert(crc32([Data("ab".asBytes), Data("c".asBytes)]) == 0x352441C2); 59 } 60 61 /// Add a Gzip header to deflated data. 62 DataVec deflate2gzip(scope Data[] compressed, uint dataCrc, size_t dataLength) 63 { 64 ubyte[] header; 65 header.length = 10; 66 header[0] = 0x1F; 67 header[1] = 0x8B; 68 header[2] = 0x08; 69 header[3..8] = 0; // TODO: set MTIME 70 header[8] = 4; 71 header[9] = 3; // TODO: set OS 72 uint[2] footer = [dataCrc, std.conv.to!uint(dataLength)]; // TODO: endianness 73 74 return DataVec( 75 Data(header), 76 compressed.bytes[2 .. $ - 4], 77 Data(footer.asBytes), 78 ); 79 } 80 81 /// Compress data to Gzip. 82 DataVec compress(scope Data[] data, ZlibOptions options = ZlibOptions.init) 83 { 84 return deflate2gzip(zlib.compress(data, options)[], crc32(data), data.bytes.length); 85 } 86 87 Data compress(Data input) { return compress(input.asSlice).joinData(); } /// ditto 88 89 /// Strip the Gzip header from `data`. 90 DataVec gzipToRawDeflate(scope Data[] data) 91 { 92 auto bytes = data.bytes; 93 enforce(bytes.length >= 10, "Gzip too short"); 94 enforce(bytes[0] == 0x1F && bytes[1] == 0x8B, "Invalid Gzip signature"); 95 enforce(bytes[2] == 0x08, "Unsupported Gzip compression method"); 96 ubyte flg = bytes[3]; 97 enforce((flg & FHCRC)==0, "FHCRC not supported"); 98 enforce((flg & FEXTRA)==0, "FEXTRA not supported"); 99 enforce((flg & FCOMMENT)==0, "FCOMMENT not supported"); 100 uint start = 10; 101 if (flg & FNAME) 102 { 103 while (bytes[start]) start++; 104 start++; 105 } 106 return bytes[start..bytes.length-8]; 107 } 108 109 /// Uncompress Gzip-compressed data. 110 DataVec uncompress(scope Data[] data) 111 { 112 auto bytes = data.bytes; 113 enforce(bytes.length >= 4, "No data to decompress"); 114 115 ZlibOptions options; options.mode = ZlibMode.raw; 116 DataVec uncompressed = zlib.uncompress(gzipToRawDeflate(data)[], options); 117 118 LittleEndian!uint size; 119 bytes[$-4 .. $].copyTo(size.asSlice.asBytes); 120 enforce(cast(uint)uncompressed.bytes.length == size, "Decompressed data length mismatch"); 121 122 return uncompressed; 123 } 124 125 Data uncompress(Data input) { return uncompress(input.asSlice).joinData(); } /// ditto 126 127 unittest 128 { 129 void testRoundtrip(ubyte[] src) 130 { 131 ubyte[] def = compress(Data(src)).asDataOf!ubyte.toGC(); 132 ubyte[] res = uncompress(Data(def)).asDataOf!ubyte.toGC(); 133 assert(res == src); 134 135 DataVec srcData; 136 foreach (c; src) 137 srcData ~= Data([c]); 138 res = uncompress(compress(srcData[])[]).joinToGC; 139 assert(res == src); 140 } 141 142 testRoundtrip(cast(ubyte[]) 143 "the quick brown fox jumps over the lazy dog\r 144 the quick brown fox jumps over the lazy dog\r 145 "); 146 testRoundtrip([0]); 147 testRoundtrip(null); 148 149 void testUncompress(ubyte[] src, ubyte[] dst) 150 { 151 assert(cast(ubyte[])uncompress(Data(src)).toGC() == dst); 152 } 153 154 testUncompress([ 155 0x1F, 0x8B, 0x08, 0x08, 0xD3, 0xB2, 0x6E, 0x4F, 0x02, 0x00, 0x74, 0x65, 0x73, 0x74, 0x2E, 0x74, 156 0x78, 0x74, 0x00, 0x2B, 0xC9, 0x48, 0x55, 0x28, 0x2C, 0xCD, 0x4C, 0xCE, 0x56, 0x48, 0x2A, 0xCA, 157 0x2F, 0xCF, 0x53, 0x48, 0xCB, 0xAF, 0x50, 0xC8, 0x2A, 0xCD, 0x2D, 0x28, 0x56, 0xC8, 0x2F, 0x4B, 158 0x2D, 0x52, 0x00, 0x49, 0xE7, 0x24, 0x56, 0x55, 0x2A, 0xA4, 0xE4, 0xA7, 0x03, 0x00, 0x14, 0x51, 159 0x0C, 0xCE, 0x2B, 0x00, 0x00, 0x00], cast(ubyte[])"the quick brown fox jumps over the lazy dog"); 160 }