26 #error "This file cannot be used when building without GZip-support."
34 namespace seqan3::contrib
40 inline static uint64_t bgzf_thread_count = std::thread::hardware_concurrency();
52 static constexpr std::array<char, 28> BGZF_END_OF_FILE_MARKER {{
'\x1f',
'\x8b',
'\x08',
'\x04',
53 '\x00',
'\x00',
'\x00',
'\x00',
54 '\x00',
'\xff',
'\x06',
'\x00',
55 '\x42',
'\x43',
'\x02',
'\x00',
56 '\x1b',
'\x00',
'\x03',
'\x00',
57 '\x00',
'\x00',
'\x00',
'\x00',
58 '\x00',
'\x00',
'\x00',
'\x00'}};
60 template <
typename TAlgTag>
61 struct CompressionContext {};
63 template <
typename TAlgTag>
64 struct DefaultPageSize;
67 struct CompressionContext<detail::gz_compression>
73 std::memset(&strm, 0,
sizeof(z_stream));
78 struct CompressionContext<detail::bgzf_compression>:
79 CompressionContext<detail::gz_compression>
81 static constexpr
size_t BLOCK_HEADER_LENGTH = detail::bgzf_compression::magic_header.size();
82 unsigned char headerPos;
86 struct DefaultPageSize<detail::bgzf_compression>
88 static const unsigned MAX_BLOCK_SIZE = 64 * 1024;
89 static const unsigned BLOCK_FOOTER_LENGTH = 8;
91 static const unsigned ZLIB_BLOCK_OVERHEAD = 5;
95 enum { BLOCK_HEADER_LENGTH = CompressionContext<detail::bgzf_compression>::BLOCK_HEADER_LENGTH };
96 static const unsigned VALUE = MAX_BLOCK_SIZE - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH - ZLIB_BLOCK_OVERHEAD;
108 compressInit(CompressionContext<detail::gz_compression> & ctx)
110 const int GZIP_WINDOW_BITS = -15;
111 const int Z_DEFAULT_MEM_LEVEL = 8;
113 ctx.strm.zalloc = NULL;
114 ctx.strm.zfree = NULL;
120 int status = deflateInit2(&ctx.strm, Z_BEST_SPEED, Z_DEFLATED,
121 GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
123 throw io_error(
"Calling deflateInit2() failed for gz file.");
131 compressInit(CompressionContext<detail::bgzf_compression> & ctx)
133 compressInit(
static_cast<CompressionContext<detail::gz_compression> &
>(ctx));
142 _bgzfUnpack16(
char const * buffer)
145 std::uninitialized_copy(buffer, buffer +
sizeof(uint16_t),
reinterpret_cast<char *
>(&tmp));
146 return detail::to_little_endian(tmp);
150 _bgzfUnpack32(
char const * buffer)
153 std::uninitialized_copy(buffer, buffer +
sizeof(uint32_t),
reinterpret_cast<char *
>(&tmp));
154 return detail::to_little_endian(tmp);
162 _bgzfPack16(
char * buffer, uint16_t value)
164 value = detail::to_little_endian(value);
165 std::uninitialized_copy(
reinterpret_cast<char *
>(&value),
166 reinterpret_cast<char *
>(&value) +
sizeof(uint16_t),
171 _bgzfPack32(
char * buffer, uint32_t value)
173 value = detail::to_little_endian(value);
174 std::uninitialized_copy(
reinterpret_cast<char *
>(&value),
175 reinterpret_cast<char *
>(&value) +
sizeof(uint32_t),
183 template <
typename TDestValue,
typename TDestCapacity,
typename TSourceValue,
typename TSourceLength>
185 _compressBlock(TDestValue *dstBegin, TDestCapacity dstCapacity,
186 TSourceValue *srcBegin, TSourceLength srcLength, CompressionContext<detail::bgzf_compression> & ctx)
188 const size_t BLOCK_HEADER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_HEADER_LENGTH;
189 const size_t BLOCK_FOOTER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_FOOTER_LENGTH;
191 assert(dstCapacity > BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH);
192 assert(
sizeof(TDestValue) == 1u);
193 assert(
sizeof(
unsigned) == 4u);
196 std::ranges::copy(detail::bgzf_compression::magic_header, dstBegin);
200 ctx.strm.next_in = (Bytef *)(srcBegin);
201 ctx.strm.next_out = (Bytef *)(dstBegin + BLOCK_HEADER_LENGTH);
202 ctx.strm.avail_in = srcLength *
sizeof(TSourceValue);
203 ctx.strm.avail_out = dstCapacity - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
205 int status = deflate(&ctx.strm, Z_FINISH);
206 if (status != Z_STREAM_END)
208 deflateEnd(&ctx.strm);
209 throw io_error(
"Deflation failed. Compressed BGZF data is too big.");
212 status = deflateEnd(&ctx.strm);
214 throw io_error(
"BGZF deflateEnd() failed.");
221 size_t len = dstCapacity - ctx.strm.avail_out;
222 _bgzfPack16(dstBegin + 16, len - 1);
224 dstBegin += len - BLOCK_FOOTER_LENGTH;
225 _bgzfPack32(dstBegin, crc32(crc32(0u, NULL, 0u), (Bytef *)(srcBegin), srcLength *
sizeof(TSourceValue)));
226 _bgzfPack32(dstBegin + 4, srcLength *
sizeof(TSourceValue));
228 return dstCapacity - ctx.strm.avail_out;
236 decompressInit(CompressionContext<detail::gz_compression> & ctx)
238 const int GZIP_WINDOW_BITS = -15;
240 ctx.strm.zalloc = NULL;
241 ctx.strm.zfree = NULL;
242 int status = inflateInit2(&ctx.strm, GZIP_WINDOW_BITS);
244 throw io_error(
"GZip inflateInit2() failed.");
252 decompressInit(CompressionContext<detail::bgzf_compression> & ctx)
254 decompressInit(
static_cast<CompressionContext<detail::gz_compression> &
>(ctx));
262 template <
typename TDestValue,
typename TDestCapacity,
typename TSourceValue,
typename TSourceLength>
264 _decompressBlock(TDestValue *dstBegin, TDestCapacity dstCapacity,
265 TSourceValue *srcBegin, TSourceLength srcLength, CompressionContext<detail::bgzf_compression> & ctx)
267 const size_t BLOCK_HEADER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_HEADER_LENGTH;
268 const size_t BLOCK_FOOTER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_FOOTER_LENGTH;
270 assert(
sizeof(TSourceValue) == 1u);
271 assert(
sizeof(
unsigned) == 4u);
275 if (srcLength <= BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH)
276 throw io_error(
"BGZF block too short.");
278 if (!detail::bgzf_compression::validate_header(std::span{srcBegin, srcLength}))
279 throw io_error(
"Invalid BGZF block header.");
281 size_t compressedLen = _bgzfUnpack16(srcBegin + 16) + 1u;
282 if (compressedLen != srcLength)
283 throw io_error(
"BGZF compressed size mismatch.");
289 ctx.strm.next_in = (Bytef *)(srcBegin + BLOCK_HEADER_LENGTH);
290 ctx.strm.next_out = (Bytef *)(dstBegin);
291 ctx.strm.avail_in = srcLength - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
292 ctx.strm.avail_out = dstCapacity *
sizeof(TDestValue);
294 int status = inflate(&ctx.strm, Z_FINISH);
295 if (status != Z_STREAM_END)
297 inflateEnd(&ctx.strm);
298 throw io_error(
"Inflation failed. Decompressed BGZF data is too big.");
301 status = inflateEnd(&ctx.strm);
303 throw io_error(
"BGZF inflateEnd() failed.");
310 unsigned crc = crc32(crc32(0u, NULL, 0u), (Bytef *)(dstBegin), dstCapacity - ctx.strm.avail_out);
312 srcBegin += compressedLen - BLOCK_FOOTER_LENGTH;
313 if (_bgzfUnpack32(srcBegin) != crc)
314 throw io_error(
"BGZF wrong checksum.");
316 if (_bgzfUnpack32(srcBegin + 4) != dstCapacity - ctx.strm.avail_out)
317 throw io_error(
"BGZF size mismatch.");
319 return (dstCapacity - ctx.strm.avail_out) /
sizeof(TDestValue);
Adaptations of algorithms from the Ranges TS.
Provides various transformation traits used by the range module.
Provides exceptions used in the I/O module.
Provides C++20 additions to the <memory> header.
Provides std::span from the C++20 standard library.
Provides utility functions for bit twiddling.