SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Compressor.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2009 Soeren Sonnenburg
8  * Copyright (C) 2009 Berlin Institute of Technology
9  */
10 #include <shogun/lib/Compressor.h>
11 #include <shogun/io/SGIO.h>
12 #include <string.h>
13 
14 #ifdef USE_LZO
15 #include <lzo/lzoconf.h>
16 #include <lzo/lzoutil.h>
17 #include <lzo/lzo1x.h>
18 #endif
19 
20 #ifdef USE_GZIP
21 #include <zlib.h>
22 #endif
23 
24 #ifdef USE_BZIP2
25 #include <bzlib.h>
26 #endif
27 
28 #ifdef USE_LZMA
29 #include <lzma.h>
30 #endif
31 
32 #ifdef USE_SNAPPY
33 #include <snappy.h>
34 #endif
35 
36 using namespace shogun;
37 
39  :CSGObject(), compression_type(UNCOMPRESSED)
40 {
41  SG_UNSTABLE("CCompressor::CCompressor()", "\n")
42 }
43 
44 void CCompressor::compress(uint8_t* uncompressed, uint64_t uncompressed_size,
45  uint8_t* &compressed, uint64_t &compressed_size, int32_t level)
46 {
47  uint64_t initial_buffer_size=0;
48 
49  if (uncompressed_size==0)
50  {
51  compressed=NULL;
52  compressed_size=0;
53  return;
54  }
55 
56  switch (compression_type)
57  {
58  case UNCOMPRESSED:
59  {
60  initial_buffer_size=uncompressed_size;
61  compressed_size=uncompressed_size;
62  compressed=SG_MALLOC(uint8_t, compressed_size);
63  memcpy(compressed, uncompressed, uncompressed_size);
64  break;
65  }
66 #ifdef USE_LZO
67  case LZO:
68  {
69  if (lzo_init() != LZO_E_OK)
70  SG_ERROR("Error initializing LZO Compression\n")
71 
72  lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS);
73  if (!lzo_wrkmem)
74  SG_ERROR("Error allocating LZO workmem\n")
75 
76  initial_buffer_size=uncompressed_size +
77  uncompressed_size / 16+ 64 + 3;
78 
79  compressed_size=initial_buffer_size;
80  compressed=SG_MALLOC(uint8_t, initial_buffer_size);
81 
82  lzo_uint lzo_size=compressed_size;
83 
84  int ret;
85  if (level<9)
86  {
87  ret=lzo1x_1_15_compress(uncompressed, uncompressed_size,
88  compressed, &lzo_size, lzo_wrkmem);
89  }
90  else
91  {
92  ret=lzo1x_999_compress(uncompressed, uncompressed_size,
93  compressed, &lzo_size, lzo_wrkmem);
94  }
95 
96  compressed_size=lzo_size;
97  lzo_free(lzo_wrkmem);
98 
99  if (ret!= LZO_E_OK)
100  SG_ERROR("Error lzo-compressing data\n")
101 
102  break;
103  }
104 #endif
105 #ifdef USE_GZIP
106  case GZIP:
107  {
108  initial_buffer_size=1.001*uncompressed_size + 12;
109  compressed_size=initial_buffer_size;
110  compressed=SG_MALLOC(uint8_t, initial_buffer_size);
111  uLongf gz_size=compressed_size;
112 
113  if (compress2(compressed, &gz_size, uncompressed,
114  uncompressed_size, level) != Z_OK)
115  {
116  SG_ERROR("Error gzip-compressing data\n")
117  }
118  compressed_size=gz_size;
119  break;
120  }
121 #endif
122 #ifdef USE_BZIP2
123  case BZIP2:
124  {
125  bz_stream strm;
126  strm.bzalloc=NULL;
127  strm.bzfree=NULL;
128  strm.opaque=NULL;
129  initial_buffer_size=1.01*uncompressed_size + 600;
130  compressed_size=initial_buffer_size;
131  compressed=SG_MALLOC(uint8_t, initial_buffer_size);
132  if (BZ2_bzCompressInit(&strm, level, 0, 0)!=BZ_OK)
133  SG_ERROR("Error initializing bzip2 compressor\n")
134 
135  strm.next_in=(char*) uncompressed;
136  strm.avail_in=(unsigned int) uncompressed_size;
137  strm.next_out=(char*) compressed;
138  strm.avail_out=(unsigned int) compressed_size;
139  if (BZ2_bzCompress(&strm, BZ_RUN) != BZ_RUN_OK)
140  SG_ERROR("Error bzip2-compressing data (BZ_RUN)\n")
141 
142  int ret=0;
143  while (true)
144  {
145  ret=BZ2_bzCompress(&strm, BZ_FINISH);
146  if (ret==BZ_FINISH_OK)
147  continue;
148  if (ret==BZ_STREAM_END)
149  break;
150  else
151  SG_ERROR("Error bzip2-compressing data (BZ_FINISH)\n")
152  }
153  BZ2_bzCompressEnd(&strm);
154  compressed_size=(((uint64_t) strm.total_out_hi32) << 32) + strm.total_out_lo32;
155  break;
156  }
157 #endif
158 #ifdef USE_LZMA
159  case LZMA:
160  {
161  lzma_stream strm = LZMA_STREAM_INIT;
162  initial_buffer_size = lzma_stream_buffer_bound(uncompressed_size);
163  compressed_size=initial_buffer_size;
164  compressed=SG_MALLOC(uint8_t, initial_buffer_size);
165  strm.next_in=uncompressed;
166  strm.avail_in=(size_t) uncompressed_size;
167  strm.next_out=compressed;
168  strm.avail_out=(size_t) compressed_size;
169 
170  if (lzma_easy_encoder(&strm, level, LZMA_CHECK_CRC32) != LZMA_OK)
171  SG_ERROR("Error initializing lzma compressor\n")
172  if (lzma_code(&strm, LZMA_RUN) != LZMA_OK)
173  SG_ERROR("Error lzma-compressing data (LZMA_RUN)\n")
174 
175  lzma_ret ret;
176  while (true)
177  {
178  ret=lzma_code(&strm, LZMA_FINISH);
179  if (ret==LZMA_OK)
180  continue;
181  if (ret==LZMA_STREAM_END)
182  break;
183  else
184  SG_ERROR("Error lzma-compressing data (LZMA_FINISH)\n")
185  }
186  lzma_end(&strm);
187  compressed_size=strm.total_out;
188  break;
189  }
190 #endif
191 #ifdef USE_SNAPPY
192  case SNAPPY:
193  {
194  compressed=SG_MALLOC(uint8_t, snappy::MaxCompressedLength((size_t) uncompressed_size));
195  size_t output_length;
196  snappy::RawCompress((char*) uncompressed, size_t(uncompressed_size), (char*) compressed, &output_length);
197  compressed_size=(uint64_t) output_length;
198  break;
199  }
200 #endif
201  default:
202  SG_ERROR("Unknown compression type\n")
203  }
204 
205  if (compressed)
206  compressed = SG_REALLOC(uint8_t, compressed, initial_buffer_size, compressed_size);
207 }
208 
209 void CCompressor::decompress(uint8_t* compressed, uint64_t compressed_size,
210  uint8_t* uncompressed, uint64_t& uncompressed_size)
211 {
212  if (compressed_size==0)
213  {
214  uncompressed_size=0;
215  return;
216  }
217 
218  switch (compression_type)
219  {
220  case UNCOMPRESSED:
221  {
222  ASSERT(uncompressed_size>=compressed_size)
223  uncompressed_size=compressed_size;
224  memcpy(uncompressed, compressed, uncompressed_size);
225  break;
226  }
227 #ifdef USE_LZO
228  case LZO:
229  {
230  if (lzo_init() != LZO_E_OK)
231  SG_ERROR("Error initializing LZO Compression\n")
232 
233  lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS);
234  if (!lzo_wrkmem)
235  SG_ERROR("Error allocating LZO workmem\n")
236 
237  lzo_uint lzo_size=uncompressed_size;
238  if (lzo1x_decompress(compressed, compressed_size, uncompressed,
239  &lzo_size, NULL) != LZO_E_OK)
240  {
241  SG_ERROR("Error uncompressing lzo-data\n")
242  }
243  uncompressed_size=lzo_size;
244 
245  lzo_free(lzo_wrkmem);
246  break;
247  }
248 #endif
249 #ifdef USE_GZIP
250  case GZIP:
251  {
252  uLongf gz_size=uncompressed_size;
253  if (uncompress(uncompressed, &gz_size, compressed,
254  compressed_size) != Z_OK)
255  {
256  SG_ERROR("Error uncompressing gzip-data\n")
257  }
258  uncompressed_size=gz_size;
259  break;
260  }
261 #endif
262 #ifdef USE_BZIP2
263  case BZIP2:
264  {
265  bz_stream strm;
266  strm.bzalloc=NULL;
267  strm.bzfree=NULL;
268  strm.opaque=NULL;
269  if (BZ2_bzDecompressInit(&strm, 0, 0)!=BZ_OK)
270  SG_ERROR("Error initializing bzip2 decompressor\n")
271  strm.next_in=(char*) compressed;
272  strm.avail_in=(unsigned int) compressed_size;
273  strm.next_out=(char*) uncompressed;
274  strm.avail_out=(unsigned int) uncompressed_size;
275  if (BZ2_bzDecompress(&strm) != BZ_STREAM_END || strm.avail_in!=0)
276  SG_ERROR("Error uncompressing bzip2-data\n")
277  BZ2_bzDecompressEnd(&strm);
278  break;
279  }
280 #endif
281 #ifdef USE_LZMA
282  case LZMA:
283  {
284  lzma_stream strm = LZMA_STREAM_INIT;
285  strm.next_in=compressed;
286  strm.avail_in=(size_t) compressed_size;
287  strm.next_out=uncompressed;
288  strm.avail_out=(size_t) uncompressed_size;
289 
290  uint64_t memory_limit=lzma_easy_decoder_memusage(9);
291 
292  if (lzma_stream_decoder(&strm, memory_limit, 0)!= LZMA_OK)
293  SG_ERROR("Error initializing lzma decompressor\n")
294  if (lzma_code(&strm, LZMA_RUN) != LZMA_STREAM_END)
295  SG_ERROR("Error decompressing lzma data\n")
296  lzma_end(&strm);
297  break;
298  }
299 #endif
300 #ifdef USE_SNAPPY
301  case SNAPPY:
302  {
303  size_t uncompressed_length;
304  if (!snappy::GetUncompressedLength( (char*) compressed,
305  (size_t) compressed_size, &uncompressed_length))
306  SG_ERROR("Error obtaining uncompressed length\n")
307 
308  ASSERT(uncompressed_length<=uncompressed_size)
309  uncompressed_size=uncompressed_length;
310  if (!snappy::RawUncompress((char*) compressed,
311  (size_t) compressed_size,
312  (char*) uncompressed))
313  SG_ERROR("Error uncompressing snappy data\n")
314 
315  break;
316  }
317 #endif
318  default:
319  SG_ERROR("Unknown compression type\n")
320  }
321 }

SHOGUN Machine Learning Toolbox - Documentation