00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <shogun/lib/Compressor.h>
00011 #include <shogun/mathematics/Math.h>
00012 #include <string.h>
00013
00014 #ifdef USE_LZO
00015 #include <lzo/lzoconf.h>
00016 #include <lzo/lzoutil.h>
00017 #include <lzo/lzo1x.h>
00018 #endif
00019
00020 #ifdef USE_GZIP
00021 #include <zlib.h>
00022 #endif
00023
00024 #ifdef USE_BZIP2
00025 #include <bzlib.h>
00026 #endif
00027
00028 #ifdef USE_LZMA
00029 #include <lzma.h>
00030 #endif
00031
00032 #ifdef USE_SNAPPY
00033 #include <snappy.h>
00034 #endif
00035
00036 using namespace shogun;
00037
00038 CCompressor::CCompressor(void)
00039 :CSGObject(), compression_type(UNCOMPRESSED)
00040 {
00041 SG_UNSTABLE("CCompressor::CCompressor(void)", "\n");
00042 }
00043
00044 void CCompressor::compress(uint8_t* uncompressed, uint64_t uncompressed_size,
00045 uint8_t* &compressed, uint64_t &compressed_size, int32_t level)
00046 {
00047 uint64_t initial_buffer_size=0;
00048
00049 if (uncompressed_size==0)
00050 {
00051 compressed=NULL;
00052 compressed_size=0;
00053 return;
00054 }
00055
00056 switch (compression_type)
00057 {
00058 case UNCOMPRESSED:
00059 {
00060 initial_buffer_size=uncompressed_size;
00061 compressed_size=uncompressed_size;
00062 compressed=SG_MALLOC(uint8_t, compressed_size);
00063 memcpy(compressed, uncompressed, uncompressed_size);
00064 break;
00065 }
00066 #ifdef USE_LZO
00067 case LZO:
00068 {
00069 if (lzo_init() != LZO_E_OK)
00070 SG_ERROR("Error initializing LZO Compression\n");
00071
00072 lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS);
00073 if (!lzo_wrkmem)
00074 SG_ERROR("Error allocating LZO workmem\n");
00075
00076 initial_buffer_size=uncompressed_size +
00077 uncompressed_size / 16+ 64 + 3;
00078
00079 compressed_size=initial_buffer_size;
00080 compressed=SG_MALLOC(uint8_t, initial_buffer_size);
00081
00082 lzo_uint lzo_size=compressed_size;
00083
00084 int ret;
00085 if (level<9)
00086 {
00087 ret=lzo1x_1_15_compress(uncompressed, uncompressed_size,
00088 compressed, &lzo_size, lzo_wrkmem);
00089 }
00090 else
00091 {
00092 ret=lzo1x_999_compress(uncompressed, uncompressed_size,
00093 compressed, &lzo_size, lzo_wrkmem);
00094 }
00095
00096 compressed_size=lzo_size;
00097 lzo_free(lzo_wrkmem);
00098
00099 if (ret!= LZO_E_OK)
00100 SG_ERROR("Error lzo-compressing data\n");
00101
00102 break;
00103 }
00104 #endif
00105 #ifdef USE_GZIP
00106 case GZIP:
00107 {
00108 initial_buffer_size=1.001*uncompressed_size + 12;
00109 compressed_size=initial_buffer_size;
00110 compressed=SG_MALLOC(uint8_t, initial_buffer_size);
00111 uLongf gz_size=compressed_size;
00112
00113 if (compress2(compressed, &gz_size, uncompressed,
00114 uncompressed_size, level) != Z_OK)
00115 {
00116 SG_ERROR("Error gzip-compressing data\n");
00117 }
00118 compressed_size=gz_size;
00119 break;
00120 }
00121 #endif
00122 #ifdef USE_BZIP2
00123 case BZIP2:
00124 {
00125 bz_stream strm;
00126 strm.bzalloc=NULL;
00127 strm.bzfree=NULL;
00128 strm.opaque=NULL;
00129 initial_buffer_size=1.01*uncompressed_size + 600;
00130 compressed_size=initial_buffer_size;
00131 compressed=SG_MALLOC(uint8_t, initial_buffer_size);
00132 if (BZ2_bzCompressInit(&strm, level, 0, 0)!=BZ_OK)
00133 SG_ERROR("Error initializing bzip2 compressor\n");
00134
00135 strm.next_in=(char*) uncompressed;
00136 strm.avail_in=(unsigned int) uncompressed_size;
00137 strm.next_out=(char*) compressed;
00138 strm.avail_out=(unsigned int) compressed_size;
00139 if (BZ2_bzCompress(&strm, BZ_RUN) != BZ_RUN_OK)
00140 SG_ERROR("Error bzip2-compressing data (BZ_RUN)\n");
00141
00142 int ret=0;
00143 while (true)
00144 {
00145 ret=BZ2_bzCompress(&strm, BZ_FINISH);
00146 if (ret==BZ_FINISH_OK)
00147 continue;
00148 if (ret==BZ_STREAM_END)
00149 break;
00150 else
00151 SG_ERROR("Error bzip2-compressing data (BZ_FINISH)\n");
00152 }
00153 BZ2_bzCompressEnd(&strm);
00154 compressed_size=(((uint64_t) strm.total_out_hi32) << 32) + strm.total_out_lo32;
00155 break;
00156 }
00157 #endif
00158 #ifdef USE_LZMA
00159 case LZMA:
00160 {
00161 lzma_stream strm = LZMA_STREAM_INIT;
00162 initial_buffer_size = lzma_stream_buffer_bound(uncompressed_size);
00163 compressed_size=initial_buffer_size;
00164 compressed=SG_MALLOC(uint8_t, initial_buffer_size);
00165 strm.next_in=uncompressed;
00166 strm.avail_in=(size_t) uncompressed_size;
00167 strm.next_out=compressed;
00168 strm.avail_out=(size_t) compressed_size;
00169
00170 if (lzma_easy_encoder(&strm, level, LZMA_CHECK_CRC32) != LZMA_OK)
00171 SG_ERROR("Error initializing lzma compressor\n");
00172 if (lzma_code(&strm, LZMA_RUN) != LZMA_OK)
00173 SG_ERROR("Error lzma-compressing data (LZMA_RUN)\n");
00174
00175 lzma_ret ret;
00176 while (true)
00177 {
00178 ret=lzma_code(&strm, LZMA_FINISH);
00179 if (ret==LZMA_OK)
00180 continue;
00181 if (ret==LZMA_STREAM_END)
00182 break;
00183 else
00184 SG_ERROR("Error lzma-compressing data (LZMA_FINISH)\n");
00185 }
00186 lzma_end(&strm);
00187 compressed_size=strm.total_out;
00188 break;
00189 }
00190 #endif
00191 #ifdef USE_SNAPPY
00192 case SNAPPY:
00193 {
00194 compressed=SG_MALLOC(uint8_t, snappy::MaxCompressedLength((size_t) uncompressed_size));
00195 size_t output_length;
00196 snappy::RawCompress((char*) uncompressed, size_t(uncompressed_size), (char*) compressed, &output_length);
00197 compressed_size=(uint64_t) output_length;
00198 break;
00199 }
00200 #endif
00201 default:
00202 SG_ERROR("Unknown compression type\n");
00203 }
00204
00205 if (compressed)
00206 CMath::resize(compressed, initial_buffer_size, compressed_size);
00207 }
00208
00209 void CCompressor::decompress(uint8_t* compressed, uint64_t compressed_size,
00210 uint8_t* uncompressed, uint64_t& uncompressed_size)
00211 {
00212 if (compressed_size==0)
00213 {
00214 uncompressed_size=0;
00215 return;
00216 }
00217
00218 switch (compression_type)
00219 {
00220 case UNCOMPRESSED:
00221 {
00222 ASSERT(uncompressed_size>=compressed_size);
00223 uncompressed_size=compressed_size;
00224 memcpy(uncompressed, compressed, uncompressed_size);
00225 break;
00226 }
00227 #ifdef USE_LZO
00228 case LZO:
00229 {
00230 if (lzo_init() != LZO_E_OK)
00231 SG_ERROR("Error initializing LZO Compression\n");
00232
00233 lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS);
00234 if (!lzo_wrkmem)
00235 SG_ERROR("Error allocating LZO workmem\n");
00236
00237 lzo_uint lzo_size=uncompressed_size;
00238 if (lzo1x_decompress(compressed, compressed_size, uncompressed,
00239 &lzo_size, NULL) != LZO_E_OK)
00240 {
00241 SG_ERROR("Error uncompressing lzo-data\n");
00242 }
00243 uncompressed_size=lzo_size;
00244
00245 lzo_free(lzo_wrkmem);
00246 break;
00247 }
00248 #endif
00249 #ifdef USE_GZIP
00250 case GZIP:
00251 {
00252 uLongf gz_size=uncompressed_size;
00253 if (uncompress(uncompressed, &gz_size, compressed,
00254 compressed_size) != Z_OK)
00255 {
00256 SG_ERROR("Error uncompressing gzip-data\n");
00257 }
00258 uncompressed_size=gz_size;
00259 break;
00260 }
00261 #endif
00262 #ifdef USE_BZIP2
00263 case BZIP2:
00264 {
00265 bz_stream strm;
00266 strm.bzalloc=NULL;
00267 strm.bzfree=NULL;
00268 strm.opaque=NULL;
00269 if (BZ2_bzDecompressInit(&strm, 0, 0)!=BZ_OK)
00270 SG_ERROR("Error initializing bzip2 decompressor\n");
00271 strm.next_in=(char*) compressed;
00272 strm.avail_in=(unsigned int) compressed_size;
00273 strm.next_out=(char*) uncompressed;
00274 strm.avail_out=(unsigned int) uncompressed_size;
00275 if (BZ2_bzDecompress(&strm) != BZ_STREAM_END || strm.avail_in!=0)
00276 SG_ERROR("Error uncompressing bzip2-data\n");
00277 BZ2_bzDecompressEnd(&strm);
00278 break;
00279 }
00280 #endif
00281 #ifdef USE_LZMA
00282 case LZMA:
00283 {
00284 lzma_stream strm = LZMA_STREAM_INIT;
00285 strm.next_in=compressed;
00286 strm.avail_in=(size_t) compressed_size;
00287 strm.next_out=uncompressed;
00288 strm.avail_out=(size_t) uncompressed_size;
00289
00290 uint64_t memory_limit=lzma_easy_decoder_memusage(9);
00291
00292 if (lzma_stream_decoder(&strm, memory_limit, 0)!= LZMA_OK)
00293 SG_ERROR("Error initializing lzma decompressor\n");
00294 if (lzma_code(&strm, LZMA_RUN) != LZMA_STREAM_END)
00295 SG_ERROR("Error decompressing lzma data\n");
00296 lzma_end(&strm);
00297 break;
00298 }
00299 #endif
00300 #ifdef USE_SNAPPY
00301 case SNAPPY:
00302 {
00303 size_t uncompressed_length;
00304 if (!snappy::GetUncompressedLength( (char*) compressed,
00305 (size_t) compressed_size, &uncompressed_length))
00306 SG_ERROR("Error obtaining uncompressed length\n");
00307
00308 ASSERT(uncompressed_length<=uncompressed_size);
00309 uncompressed_size=uncompressed_length;
00310 if (!snappy::RawUncompress((char*) compressed,
00311 (size_t) compressed_size,
00312 (char*) uncompressed))
00313 SG_ERROR("Error uncompressing snappy data\n");
00314
00315 break;
00316 }
00317 #endif
00318 default:
00319 SG_ERROR("Unknown compression type\n");
00320 }
00321 }