Compressor.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009 Soeren Sonnenburg
00008  * Copyright (C) 2009 Berlin Institute of Technology
00009  */
00010 #include "lib/Compressor.h"
00011 #include "lib/Mathematics.h"
00012 #include <string.h>
00013 
00014 #ifdef USE_LZO
00015 #include <lzo/lzoconf.h>
00016 #include <lzo/lzoutil.h>
00017 #include <lzo/lzo1x.h>
00018 #endif
00019 
00020 #ifdef USE_GZIP
00021 #include <zlib.h>
00022 #endif
00023 
00024 #ifdef USE_BZIP2
00025 #include <bzlib.h>
00026 #endif
00027 
00028 #ifdef USE_LZMA
00029 #include <lzma.h>
00030 #endif
00031 
00032 using namespace shogun;
00033 
00034 CCompressor::CCompressor(void)
00035     :CSGObject(), compression_type(UNCOMPRESSED)
00036 {
00037     SG_UNSTABLE("CCompressor::CCompressor(void)", "\n");
00038 }
00039 
00040 void CCompressor::compress(uint8_t* uncompressed, uint64_t uncompressed_size,
00041         uint8_t* &compressed, uint64_t &compressed_size, int32_t level)
00042 {
00043     uint64_t initial_buffer_size=0;
00044 
00045     if (uncompressed_size==0)
00046     {
00047         compressed=NULL;
00048         compressed_size=0;
00049         return;
00050     }
00051 
00052     switch (compression_type)
00053     {
00054         case UNCOMPRESSED:
00055             {
00056                 initial_buffer_size=uncompressed_size;
00057                 compressed_size=uncompressed_size;
00058                 compressed=new uint8_t[compressed_size];
00059                 memcpy(compressed, uncompressed, uncompressed_size);
00060                 break;
00061             }
00062 #ifdef USE_LZO
00063         case LZO:
00064             {
00065                 if (lzo_init() != LZO_E_OK)
00066                     SG_ERROR("Error initializing LZO Compression\n");
00067 
00068                 lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS);
00069                 if (!lzo_wrkmem)
00070                     SG_ERROR("Error allocating LZO workmem\n");
00071 
00072                 initial_buffer_size=uncompressed_size +
00073                     uncompressed_size / 16+ 64 + 3;
00074 
00075                 compressed_size=initial_buffer_size;
00076                 compressed=new uint8_t[initial_buffer_size];
00077 
00078                 lzo_uint lzo_size=compressed_size;
00079 
00080                 int ret;
00081                 if (level<9)
00082                 {
00083                     ret=lzo1x_1_15_compress(uncompressed, uncompressed_size,
00084                                 compressed, &lzo_size, lzo_wrkmem);
00085                 }
00086                 else
00087                 {
00088                     ret=lzo1x_999_compress(uncompressed, uncompressed_size,
00089                                 compressed, &lzo_size, lzo_wrkmem);
00090                 }
00091 
00092                 compressed_size=lzo_size;
00093                 lzo_free(lzo_wrkmem);
00094 
00095                 if (ret!= LZO_E_OK)
00096                     SG_ERROR("Error lzo-compressing data\n");
00097 
00098                 break;
00099             }
00100 #endif
00101 #ifdef USE_GZIP
00102         case GZIP:
00103             {
00104                 initial_buffer_size=1.001*uncompressed_size + 12;
00105                 compressed_size=initial_buffer_size;
00106                 compressed=new uint8_t[initial_buffer_size];
00107                 uLongf gz_size=compressed_size;
00108 
00109                 if (compress2(compressed, &gz_size, uncompressed,
00110                             uncompressed_size, level) != Z_OK)
00111                 {
00112                     SG_ERROR("Error gzip-compressing data\n");
00113                 }
00114                 compressed_size=gz_size;
00115                 break;
00116             }
00117 #endif
00118 #ifdef USE_BZIP2
00119         case BZIP2:
00120             {
00121                 bz_stream strm;
00122                 strm.bzalloc=NULL;
00123                 strm.bzfree=NULL;
00124                 strm.opaque=NULL;
00125                 initial_buffer_size=1.01*uncompressed_size + 600;
00126                 compressed_size=initial_buffer_size;
00127                 compressed=new uint8_t[initial_buffer_size];
00128                 if (BZ2_bzCompressInit(&strm, level, 0, 0)!=BZ_OK)
00129                     SG_ERROR("Error initializing bzip2 compressor\n");
00130 
00131                 strm.next_in=(char*) uncompressed;
00132                 strm.avail_in=(unsigned int) uncompressed_size;
00133                 strm.next_out=(char*) compressed;
00134                 strm.avail_out=(unsigned int) compressed_size;
00135                 if (BZ2_bzCompress(&strm, BZ_RUN) != BZ_RUN_OK)
00136                     SG_ERROR("Error bzip2-compressing data (BZ_RUN)\n");
00137 
00138                 int ret=0;
00139                 while (true)
00140                 {
00141                     ret=BZ2_bzCompress(&strm, BZ_FINISH);
00142                     if (ret==BZ_FINISH_OK)
00143                         continue;
00144                     if (ret==BZ_STREAM_END)
00145                         break;
00146                     else
00147                         SG_ERROR("Error bzip2-compressing data (BZ_FINISH)\n");
00148                 }
00149                 BZ2_bzCompressEnd(&strm);
00150                 compressed_size=(((uint64_t) strm.total_out_hi32) << 32) + strm.total_out_lo32;
00151                 break;
00152             }
00153 #endif
00154 #ifdef USE_LZMA
00155         case LZMA:
00156             {
00157                 lzma_stream strm = LZMA_STREAM_INIT;
00158                 initial_buffer_size = lzma_stream_buffer_bound(uncompressed_size);
00159                 compressed_size=initial_buffer_size;
00160                 compressed=new uint8_t[initial_buffer_size];
00161                 strm.next_in=uncompressed;
00162                 strm.avail_in=(size_t) uncompressed_size;
00163                 strm.next_out=compressed;
00164                 strm.avail_out=(size_t) compressed_size;
00165 
00166                 if (lzma_easy_encoder(&strm, level, LZMA_CHECK_CRC32) != LZMA_OK)
00167                     SG_ERROR("Error initializing lzma compressor\n");
00168                 if (lzma_code(&strm, LZMA_RUN) != LZMA_OK)
00169                     SG_ERROR("Error lzma-compressing data (LZMA_RUN)\n");
00170 
00171                 lzma_ret ret;
00172                 while (true)
00173                 {
00174                     ret=lzma_code(&strm, LZMA_FINISH);
00175                     if (ret==LZMA_OK)
00176                         continue;
00177                     if (ret==LZMA_STREAM_END)
00178                         break;
00179                     else
00180                         SG_ERROR("Error lzma-compressing data (LZMA_FINISH)\n");
00181                 }
00182                 lzma_end(&strm);
00183                 compressed_size=strm.total_out;
00184                 break;
00185             }
00186 #endif
00187         default:
00188             SG_ERROR("Unknown compression type\n");
00189     }
00190 
00191     if (compressed)
00192         CMath::resize(compressed, initial_buffer_size, compressed_size);
00193 }
00194 
00195 void CCompressor::decompress(uint8_t* compressed, uint64_t compressed_size,
00196         uint8_t* uncompressed, uint64_t& uncompressed_size)
00197 {
00198     if (compressed_size==0)
00199     {
00200         uncompressed_size=0;
00201         return;
00202     }
00203 
00204     switch (compression_type)
00205     {
00206         case UNCOMPRESSED:
00207             {
00208                 ASSERT(uncompressed_size>=compressed_size);
00209                 uncompressed_size=compressed_size;
00210                 memcpy(uncompressed, compressed, uncompressed_size);
00211                 break;
00212             }
00213 #ifdef USE_LZO
00214         case LZO:
00215             {
00216                 if (lzo_init() != LZO_E_OK)
00217                     SG_ERROR("Error initializing LZO Compression\n");
00218 
00219                 lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS);
00220                 if (!lzo_wrkmem)
00221                     SG_ERROR("Error allocating LZO workmem\n");
00222 
00223                 lzo_uint lzo_size=uncompressed_size;
00224                 if (lzo1x_decompress(compressed, compressed_size, uncompressed,
00225                             &lzo_size, NULL) != LZO_E_OK)
00226                 {
00227                     SG_ERROR("Error uncompressing lzo-data\n");
00228                 }
00229                 uncompressed_size=lzo_size;
00230 
00231                 lzo_free(lzo_wrkmem);
00232                 break;
00233             }
00234 #endif
00235 #ifdef USE_GZIP
00236         case GZIP:
00237             {
00238                 uLongf gz_size=uncompressed_size;
00239                 if (uncompress(uncompressed, &gz_size, compressed,
00240                             compressed_size) != Z_OK)
00241                 {
00242                     SG_ERROR("Error uncompressing gzip-data\n");
00243                 }
00244                 uncompressed_size=gz_size;
00245                 break;
00246             }
00247 #endif
00248 #ifdef USE_BZIP2
00249         case BZIP2:
00250             {
00251                 bz_stream strm;
00252                 strm.bzalloc=NULL;
00253                 strm.bzfree=NULL;
00254                 strm.opaque=NULL;
00255                 if (BZ2_bzDecompressInit(&strm, 0, 0)!=BZ_OK)
00256                     SG_ERROR("Error initializing bzip2 decompressor\n");
00257                 strm.next_in=(char*) compressed;
00258                 strm.avail_in=(unsigned int) compressed_size;
00259                 strm.next_out=(char*) uncompressed;
00260                 strm.avail_out=(unsigned int) uncompressed_size;
00261                 if (BZ2_bzDecompress(&strm) != BZ_STREAM_END || strm.avail_in!=0)
00262                     SG_ERROR("Error uncompressing bzip2-data\n");
00263                 BZ2_bzDecompressEnd(&strm);
00264                 break;
00265             }
00266 #endif
00267 #ifdef USE_LZMA
00268         case LZMA:
00269             {
00270                 lzma_stream strm = LZMA_STREAM_INIT;
00271                 strm.next_in=compressed;
00272                 strm.avail_in=(size_t) compressed_size;
00273                 strm.next_out=uncompressed;
00274                 strm.avail_out=(size_t) uncompressed_size;
00275 
00276                 uint64_t memory_limit=lzma_easy_decoder_memusage(9);
00277 
00278                 if (lzma_stream_decoder(&strm, memory_limit, 0)!= LZMA_OK)
00279                     SG_ERROR("Error initializing lzma decompressor\n");
00280                 if (lzma_code(&strm, LZMA_RUN) != LZMA_STREAM_END)
00281                     SG_ERROR("Error decompressing lzma data\n");
00282                 lzma_end(&strm);
00283                 break;
00284             }
00285 #endif
00286         default:
00287             SG_ERROR("Unknown compression type\n");
00288     }
00289 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation