Compressor.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009 Soeren Sonnenburg
00008  * Copyright (C) 2009 Berlin Institute of Technology
00009  */
00010 #include <shogun/lib/Compressor.h>
00011 #include <shogun/mathematics/Math.h>
00012 #include <string.h>
00013 
00014 #ifdef USE_LZO
00015 #include <lzo/lzoconf.h>
00016 #include <lzo/lzoutil.h>
00017 #include <lzo/lzo1x.h>
00018 #endif
00019 
00020 #ifdef USE_GZIP
00021 #include <zlib.h>
00022 #endif
00023 
00024 #ifdef USE_BZIP2
00025 #include <bzlib.h>
00026 #endif
00027 
00028 #ifdef USE_LZMA
00029 #include <lzma.h>
00030 #endif
00031 
00032 #ifdef USE_SNAPPY
00033 #include <snappy.h>
00034 #endif
00035 
00036 using namespace shogun;
00037 
00038 CCompressor::CCompressor(void)
00039     :CSGObject(), compression_type(UNCOMPRESSED)
00040 {
00041     SG_UNSTABLE("CCompressor::CCompressor(void)", "\n");
00042 }
00043 
00044 void CCompressor::compress(uint8_t* uncompressed, uint64_t uncompressed_size,
00045         uint8_t* &compressed, uint64_t &compressed_size, int32_t level)
00046 {
00047     uint64_t initial_buffer_size=0;
00048 
00049     if (uncompressed_size==0)
00050     {
00051         compressed=NULL;
00052         compressed_size=0;
00053         return;
00054     }
00055 
00056     switch (compression_type)
00057     {
00058         case UNCOMPRESSED:
00059             {
00060                 initial_buffer_size=uncompressed_size;
00061                 compressed_size=uncompressed_size;
00062                 compressed=SG_MALLOC(uint8_t, compressed_size);
00063                 memcpy(compressed, uncompressed, uncompressed_size);
00064                 break;
00065             }
00066 #ifdef USE_LZO
00067         case LZO:
00068             {
00069                 if (lzo_init() != LZO_E_OK)
00070                     SG_ERROR("Error initializing LZO Compression\n");
00071 
00072                 lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS);
00073                 if (!lzo_wrkmem)
00074                     SG_ERROR("Error allocating LZO workmem\n");
00075 
00076                 initial_buffer_size=uncompressed_size +
00077                     uncompressed_size / 16+ 64 + 3;
00078 
00079                 compressed_size=initial_buffer_size;
00080                 compressed=SG_MALLOC(uint8_t, initial_buffer_size);
00081 
00082                 lzo_uint lzo_size=compressed_size;
00083 
00084                 int ret;
00085                 if (level<9)
00086                 {
00087                     ret=lzo1x_1_15_compress(uncompressed, uncompressed_size,
00088                                 compressed, &lzo_size, lzo_wrkmem);
00089                 }
00090                 else
00091                 {
00092                     ret=lzo1x_999_compress(uncompressed, uncompressed_size,
00093                                 compressed, &lzo_size, lzo_wrkmem);
00094                 }
00095 
00096                 compressed_size=lzo_size;
00097                 lzo_free(lzo_wrkmem);
00098 
00099                 if (ret!= LZO_E_OK)
00100                     SG_ERROR("Error lzo-compressing data\n");
00101 
00102                 break;
00103             }
00104 #endif
00105 #ifdef USE_GZIP
00106         case GZIP:
00107             {
00108                 initial_buffer_size=1.001*uncompressed_size + 12;
00109                 compressed_size=initial_buffer_size;
00110                 compressed=SG_MALLOC(uint8_t, initial_buffer_size);
00111                 uLongf gz_size=compressed_size;
00112 
00113                 if (compress2(compressed, &gz_size, uncompressed,
00114                             uncompressed_size, level) != Z_OK)
00115                 {
00116                     SG_ERROR("Error gzip-compressing data\n");
00117                 }
00118                 compressed_size=gz_size;
00119                 break;
00120             }
00121 #endif
00122 #ifdef USE_BZIP2
00123         case BZIP2:
00124             {
00125                 bz_stream strm;
00126                 strm.bzalloc=NULL;
00127                 strm.bzfree=NULL;
00128                 strm.opaque=NULL;
00129                 initial_buffer_size=1.01*uncompressed_size + 600;
00130                 compressed_size=initial_buffer_size;
00131                 compressed=SG_MALLOC(uint8_t, initial_buffer_size);
00132                 if (BZ2_bzCompressInit(&strm, level, 0, 0)!=BZ_OK)
00133                     SG_ERROR("Error initializing bzip2 compressor\n");
00134 
00135                 strm.next_in=(char*) uncompressed;
00136                 strm.avail_in=(unsigned int) uncompressed_size;
00137                 strm.next_out=(char*) compressed;
00138                 strm.avail_out=(unsigned int) compressed_size;
00139                 if (BZ2_bzCompress(&strm, BZ_RUN) != BZ_RUN_OK)
00140                     SG_ERROR("Error bzip2-compressing data (BZ_RUN)\n");
00141 
00142                 int ret=0;
00143                 while (true)
00144                 {
00145                     ret=BZ2_bzCompress(&strm, BZ_FINISH);
00146                     if (ret==BZ_FINISH_OK)
00147                         continue;
00148                     if (ret==BZ_STREAM_END)
00149                         break;
00150                     else
00151                         SG_ERROR("Error bzip2-compressing data (BZ_FINISH)\n");
00152                 }
00153                 BZ2_bzCompressEnd(&strm);
00154                 compressed_size=(((uint64_t) strm.total_out_hi32) << 32) + strm.total_out_lo32;
00155                 break;
00156             }
00157 #endif
00158 #ifdef USE_LZMA
00159         case LZMA:
00160             {
00161                 lzma_stream strm = LZMA_STREAM_INIT;
00162                 initial_buffer_size = lzma_stream_buffer_bound(uncompressed_size);
00163                 compressed_size=initial_buffer_size;
00164                 compressed=SG_MALLOC(uint8_t, initial_buffer_size);
00165                 strm.next_in=uncompressed;
00166                 strm.avail_in=(size_t) uncompressed_size;
00167                 strm.next_out=compressed;
00168                 strm.avail_out=(size_t) compressed_size;
00169 
00170                 if (lzma_easy_encoder(&strm, level, LZMA_CHECK_CRC32) != LZMA_OK)
00171                     SG_ERROR("Error initializing lzma compressor\n");
00172                 if (lzma_code(&strm, LZMA_RUN) != LZMA_OK)
00173                     SG_ERROR("Error lzma-compressing data (LZMA_RUN)\n");
00174 
00175                 lzma_ret ret;
00176                 while (true)
00177                 {
00178                     ret=lzma_code(&strm, LZMA_FINISH);
00179                     if (ret==LZMA_OK)
00180                         continue;
00181                     if (ret==LZMA_STREAM_END)
00182                         break;
00183                     else
00184                         SG_ERROR("Error lzma-compressing data (LZMA_FINISH)\n");
00185                 }
00186                 lzma_end(&strm);
00187                 compressed_size=strm.total_out;
00188                 break;
00189             }
00190 #endif
00191 #ifdef USE_SNAPPY
00192         case SNAPPY:
00193             {
00194                 compressed=SG_MALLOC(uint8_t, snappy::MaxCompressedLength((size_t) uncompressed_size));
00195                 size_t output_length;
00196                 snappy::RawCompress((char*) uncompressed, size_t(uncompressed_size), (char*) compressed, &output_length);
00197                 compressed_size=(uint64_t) output_length;
00198                 break;
00199             }
00200 #endif
00201         default:
00202             SG_ERROR("Unknown compression type\n");
00203     }
00204 
00205     if (compressed)
00206         CMath::resize(compressed, initial_buffer_size, compressed_size);
00207 }
00208 
00209 void CCompressor::decompress(uint8_t* compressed, uint64_t compressed_size,
00210         uint8_t* uncompressed, uint64_t& uncompressed_size)
00211 {
00212     if (compressed_size==0)
00213     {
00214         uncompressed_size=0;
00215         return;
00216     }
00217 
00218     switch (compression_type)
00219     {
00220         case UNCOMPRESSED:
00221             {
00222                 ASSERT(uncompressed_size>=compressed_size);
00223                 uncompressed_size=compressed_size;
00224                 memcpy(uncompressed, compressed, uncompressed_size);
00225                 break;
00226             }
00227 #ifdef USE_LZO
00228         case LZO:
00229             {
00230                 if (lzo_init() != LZO_E_OK)
00231                     SG_ERROR("Error initializing LZO Compression\n");
00232 
00233                 lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS);
00234                 if (!lzo_wrkmem)
00235                     SG_ERROR("Error allocating LZO workmem\n");
00236 
00237                 lzo_uint lzo_size=uncompressed_size;
00238                 if (lzo1x_decompress(compressed, compressed_size, uncompressed,
00239                             &lzo_size, NULL) != LZO_E_OK)
00240                 {
00241                     SG_ERROR("Error uncompressing lzo-data\n");
00242                 }
00243                 uncompressed_size=lzo_size;
00244 
00245                 lzo_free(lzo_wrkmem);
00246                 break;
00247             }
00248 #endif
00249 #ifdef USE_GZIP
00250         case GZIP:
00251             {
00252                 uLongf gz_size=uncompressed_size;
00253                 if (uncompress(uncompressed, &gz_size, compressed,
00254                             compressed_size) != Z_OK)
00255                 {
00256                     SG_ERROR("Error uncompressing gzip-data\n");
00257                 }
00258                 uncompressed_size=gz_size;
00259                 break;
00260             }
00261 #endif
00262 #ifdef USE_BZIP2
00263         case BZIP2:
00264             {
00265                 bz_stream strm;
00266                 strm.bzalloc=NULL;
00267                 strm.bzfree=NULL;
00268                 strm.opaque=NULL;
00269                 if (BZ2_bzDecompressInit(&strm, 0, 0)!=BZ_OK)
00270                     SG_ERROR("Error initializing bzip2 decompressor\n");
00271                 strm.next_in=(char*) compressed;
00272                 strm.avail_in=(unsigned int) compressed_size;
00273                 strm.next_out=(char*) uncompressed;
00274                 strm.avail_out=(unsigned int) uncompressed_size;
00275                 if (BZ2_bzDecompress(&strm) != BZ_STREAM_END || strm.avail_in!=0)
00276                     SG_ERROR("Error uncompressing bzip2-data\n");
00277                 BZ2_bzDecompressEnd(&strm);
00278                 break;
00279             }
00280 #endif
00281 #ifdef USE_LZMA
00282         case LZMA:
00283             {
00284                 lzma_stream strm = LZMA_STREAM_INIT;
00285                 strm.next_in=compressed;
00286                 strm.avail_in=(size_t) compressed_size;
00287                 strm.next_out=uncompressed;
00288                 strm.avail_out=(size_t) uncompressed_size;
00289 
00290                 uint64_t memory_limit=lzma_easy_decoder_memusage(9);
00291 
00292                 if (lzma_stream_decoder(&strm, memory_limit, 0)!= LZMA_OK)
00293                     SG_ERROR("Error initializing lzma decompressor\n");
00294                 if (lzma_code(&strm, LZMA_RUN) != LZMA_STREAM_END)
00295                     SG_ERROR("Error decompressing lzma data\n");
00296                 lzma_end(&strm);
00297                 break;
00298             }
00299 #endif
00300 #ifdef USE_SNAPPY
00301         case SNAPPY:
00302             {
00303                 size_t uncompressed_length;
00304                 if (!snappy::GetUncompressedLength( (char*) compressed,
00305                         (size_t) compressed_size, &uncompressed_length))
00306                     SG_ERROR("Error obtaining uncompressed length\n");
00307 
00308                 ASSERT(uncompressed_length<=uncompressed_size);
00309                 uncompressed_size=uncompressed_length;
00310                 if (!snappy::RawUncompress((char*) compressed,
00311                             (size_t) compressed_size,
00312                             (char*) uncompressed))
00313                     SG_ERROR("Error uncompressing snappy data\n");
00314 
00315                 break;
00316             }
00317 #endif
00318         default:
00319             SG_ERROR("Unknown compression type\n");
00320     }
00321 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation