SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Compressor.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2009 Soeren Sonnenburg
8  * Copyright (C) 2009 Berlin Institute of Technology
9  */
10 #include <shogun/lib/Compressor.h>
11 #include <shogun/lib/SGVector.h>
13 #include <string.h>
14 
15 #ifdef USE_LZO
16 #include <lzo/lzoconf.h>
17 #include <lzo/lzoutil.h>
18 #include <lzo/lzo1x.h>
19 #endif
20 
21 #ifdef USE_GZIP
22 #include <zlib.h>
23 #endif
24 
25 #ifdef USE_BZIP2
26 #include <bzlib.h>
27 #endif
28 
29 #ifdef USE_LZMA
30 #include <lzma.h>
31 #endif
32 
33 #ifdef USE_SNAPPY
34 #include <snappy.h>
35 #endif
36 
37 using namespace shogun;
38 
40  :CSGObject(), compression_type(UNCOMPRESSED)
41 {
42  SG_UNSTABLE("CCompressor::CCompressor()", "\n")
43 }
44 
45 void CCompressor::compress(uint8_t* uncompressed, uint64_t uncompressed_size,
46  uint8_t* &compressed, uint64_t &compressed_size, int32_t level)
47 {
48  uint64_t initial_buffer_size=0;
49 
50  if (uncompressed_size==0)
51  {
52  compressed=NULL;
53  compressed_size=0;
54  return;
55  }
56 
57  switch (compression_type)
58  {
59  case UNCOMPRESSED:
60  {
61  initial_buffer_size=uncompressed_size;
62  compressed_size=uncompressed_size;
63  compressed=SG_MALLOC(uint8_t, compressed_size);
64  memcpy(compressed, uncompressed, uncompressed_size);
65  break;
66  }
67 #ifdef USE_LZO
68  case LZO:
69  {
70  if (lzo_init() != LZO_E_OK)
71  SG_ERROR("Error initializing LZO Compression\n")
72 
73  lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS);
74  if (!lzo_wrkmem)
75  SG_ERROR("Error allocating LZO workmem\n")
76 
77  initial_buffer_size=uncompressed_size +
78  uncompressed_size / 16+ 64 + 3;
79 
80  compressed_size=initial_buffer_size;
81  compressed=SG_MALLOC(uint8_t, initial_buffer_size);
82 
83  lzo_uint lzo_size=compressed_size;
84 
85  int ret;
86  if (level<9)
87  {
88  ret=lzo1x_1_15_compress(uncompressed, uncompressed_size,
89  compressed, &lzo_size, lzo_wrkmem);
90  }
91  else
92  {
93  ret=lzo1x_999_compress(uncompressed, uncompressed_size,
94  compressed, &lzo_size, lzo_wrkmem);
95  }
96 
97  compressed_size=lzo_size;
98  lzo_free(lzo_wrkmem);
99 
100  if (ret!= LZO_E_OK)
101  SG_ERROR("Error lzo-compressing data\n")
102 
103  break;
104  }
105 #endif
106 #ifdef USE_GZIP
107  case GZIP:
108  {
109  initial_buffer_size=1.001*uncompressed_size + 12;
110  compressed_size=initial_buffer_size;
111  compressed=SG_MALLOC(uint8_t, initial_buffer_size);
112  uLongf gz_size=compressed_size;
113 
114  if (compress2(compressed, &gz_size, uncompressed,
115  uncompressed_size, level) != Z_OK)
116  {
117  SG_ERROR("Error gzip-compressing data\n")
118  }
119  compressed_size=gz_size;
120  break;
121  }
122 #endif
123 #ifdef USE_BZIP2
124  case BZIP2:
125  {
126  bz_stream strm;
127  strm.bzalloc=NULL;
128  strm.bzfree=NULL;
129  strm.opaque=NULL;
130  initial_buffer_size=1.01*uncompressed_size + 600;
131  compressed_size=initial_buffer_size;
132  compressed=SG_MALLOC(uint8_t, initial_buffer_size);
133  if (BZ2_bzCompressInit(&strm, level, 0, 0)!=BZ_OK)
134  SG_ERROR("Error initializing bzip2 compressor\n")
135 
136  strm.next_in=(char*) uncompressed;
137  strm.avail_in=(unsigned int) uncompressed_size;
138  strm.next_out=(char*) compressed;
139  strm.avail_out=(unsigned int) compressed_size;
140  if (BZ2_bzCompress(&strm, BZ_RUN) != BZ_RUN_OK)
141  SG_ERROR("Error bzip2-compressing data (BZ_RUN)\n")
142 
143  int ret=0;
144  while (true)
145  {
146  ret=BZ2_bzCompress(&strm, BZ_FINISH);
147  if (ret==BZ_FINISH_OK)
148  continue;
149  if (ret==BZ_STREAM_END)
150  break;
151  else
152  SG_ERROR("Error bzip2-compressing data (BZ_FINISH)\n")
153  }
154  BZ2_bzCompressEnd(&strm);
155  compressed_size=(((uint64_t) strm.total_out_hi32) << 32) + strm.total_out_lo32;
156  break;
157  }
158 #endif
159 #ifdef USE_LZMA
160  case LZMA:
161  {
162  lzma_stream strm = LZMA_STREAM_INIT;
163  initial_buffer_size = lzma_stream_buffer_bound(uncompressed_size);
164  compressed_size=initial_buffer_size;
165  compressed=SG_MALLOC(uint8_t, initial_buffer_size);
166  strm.next_in=uncompressed;
167  strm.avail_in=(size_t) uncompressed_size;
168  strm.next_out=compressed;
169  strm.avail_out=(size_t) compressed_size;
170 
171  if (lzma_easy_encoder(&strm, level, LZMA_CHECK_CRC32) != LZMA_OK)
172  SG_ERROR("Error initializing lzma compressor\n")
173  if (lzma_code(&strm, LZMA_RUN) != LZMA_OK)
174  SG_ERROR("Error lzma-compressing data (LZMA_RUN)\n")
175 
176  lzma_ret ret;
177  while (true)
178  {
179  ret=lzma_code(&strm, LZMA_FINISH);
180  if (ret==LZMA_OK)
181  continue;
182  if (ret==LZMA_STREAM_END)
183  break;
184  else
185  SG_ERROR("Error lzma-compressing data (LZMA_FINISH)\n")
186  }
187  lzma_end(&strm);
188  compressed_size=strm.total_out;
189  break;
190  }
191 #endif
192 #ifdef USE_SNAPPY
193  case SNAPPY:
194  {
195  compressed=SG_MALLOC(uint8_t, snappy::MaxCompressedLength((size_t) uncompressed_size));
196  size_t output_length;
197  snappy::RawCompress((char*) uncompressed, size_t(uncompressed_size), (char*) compressed, &output_length);
198  compressed_size=(uint64_t) output_length;
199  break;
200  }
201 #endif
202  default:
203  SG_ERROR("Unknown compression type\n")
204  }
205 
206  if (compressed)
207  compressed = SG_REALLOC(uint8_t, compressed, initial_buffer_size, compressed_size);
208 }
209 
210 void CCompressor::decompress(uint8_t* compressed, uint64_t compressed_size,
211  uint8_t* uncompressed, uint64_t& uncompressed_size)
212 {
213  if (compressed_size==0)
214  {
215  uncompressed_size=0;
216  return;
217  }
218 
219  switch (compression_type)
220  {
221  case UNCOMPRESSED:
222  {
223  ASSERT(uncompressed_size>=compressed_size)
224  uncompressed_size=compressed_size;
225  memcpy(uncompressed, compressed, uncompressed_size);
226  break;
227  }
228 #ifdef USE_LZO
229  case LZO:
230  {
231  if (lzo_init() != LZO_E_OK)
232  SG_ERROR("Error initializing LZO Compression\n")
233 
234  lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS);
235  if (!lzo_wrkmem)
236  SG_ERROR("Error allocating LZO workmem\n")
237 
238  lzo_uint lzo_size=uncompressed_size;
239  if (lzo1x_decompress(compressed, compressed_size, uncompressed,
240  &lzo_size, NULL) != LZO_E_OK)
241  {
242  SG_ERROR("Error uncompressing lzo-data\n")
243  }
244  uncompressed_size=lzo_size;
245 
246  lzo_free(lzo_wrkmem);
247  break;
248  }
249 #endif
250 #ifdef USE_GZIP
251  case GZIP:
252  {
253  uLongf gz_size=uncompressed_size;
254  if (uncompress(uncompressed, &gz_size, compressed,
255  compressed_size) != Z_OK)
256  {
257  SG_ERROR("Error uncompressing gzip-data\n")
258  }
259  uncompressed_size=gz_size;
260  break;
261  }
262 #endif
263 #ifdef USE_BZIP2
264  case BZIP2:
265  {
266  bz_stream strm;
267  strm.bzalloc=NULL;
268  strm.bzfree=NULL;
269  strm.opaque=NULL;
270  if (BZ2_bzDecompressInit(&strm, 0, 0)!=BZ_OK)
271  SG_ERROR("Error initializing bzip2 decompressor\n")
272  strm.next_in=(char*) compressed;
273  strm.avail_in=(unsigned int) compressed_size;
274  strm.next_out=(char*) uncompressed;
275  strm.avail_out=(unsigned int) uncompressed_size;
276  if (BZ2_bzDecompress(&strm) != BZ_STREAM_END || strm.avail_in!=0)
277  SG_ERROR("Error uncompressing bzip2-data\n")
278  BZ2_bzDecompressEnd(&strm);
279  break;
280  }
281 #endif
282 #ifdef USE_LZMA
283  case LZMA:
284  {
285  lzma_stream strm = LZMA_STREAM_INIT;
286  strm.next_in=compressed;
287  strm.avail_in=(size_t) compressed_size;
288  strm.next_out=uncompressed;
289  strm.avail_out=(size_t) uncompressed_size;
290 
291  uint64_t memory_limit=lzma_easy_decoder_memusage(9);
292 
293  if (lzma_stream_decoder(&strm, memory_limit, 0)!= LZMA_OK)
294  SG_ERROR("Error initializing lzma decompressor\n")
295  if (lzma_code(&strm, LZMA_RUN) != LZMA_STREAM_END)
296  SG_ERROR("Error decompressing lzma data\n")
297  lzma_end(&strm);
298  break;
299  }
300 #endif
301 #ifdef USE_SNAPPY
302  case SNAPPY:
303  {
304  size_t uncompressed_length;
305  if (!snappy::GetUncompressedLength( (char*) compressed,
306  (size_t) compressed_size, &uncompressed_length))
307  SG_ERROR("Error obtaining uncompressed length\n")
308 
309  ASSERT(uncompressed_length<=uncompressed_size)
310  uncompressed_size=uncompressed_length;
311  if (!snappy::RawUncompress((char*) compressed,
312  (size_t) compressed_size,
313  (char*) uncompressed))
314  SG_ERROR("Error uncompressing snappy data\n")
315 
316  break;
317  }
318 #endif
319  default:
320  SG_ERROR("Unknown compression type\n")
321  }
322 }

SHOGUN Machine Learning Toolbox - Documentation