SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CircularBuffer.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Evgeniy Andreev (gsomix)
8  */
9 
11 
12 #include <cstdio>
13 #include <cstring>
14 
15 using namespace shogun;
16 
18 {
19  init();
20 }
21 
22 CCircularBuffer::CCircularBuffer(int32_t buffer_size)
23 {
24  init();
25 
26  m_buffer=SGVector<char>(buffer_size);
27  m_buffer_end=m_buffer.vector+m_buffer.vlen;
28 
29  m_begin_pos=m_buffer.vector;
30  m_end_pos=m_begin_pos;
31 
32  m_bytes_available=m_buffer.vlen;
33 }
34 
36 {
37  SG_UNREF(m_tokenizer);
38 }
39 
41 {
42  SG_REF(tokenizer);
43  SG_UNREF(m_tokenizer);
44  m_tokenizer=tokenizer;
45 }
46 
48 {
49  if (source.vector==NULL || source.vlen==0)
50  {
51  SG_ERROR("CCircularBuffer::push(SGVector<char>):: Invalid parameters! Source shouldn't be NULL or zero sized\n");
52  return -1;
53  }
54 
55  int32_t bytes_to_write;
56  if (source.vlen>m_bytes_available)
57  bytes_to_write=m_bytes_available;
58  else
59  bytes_to_write=source.vlen;
60 
61  if (bytes_to_write==0)
62  return 0;
63 
64  // determine which part of the memory block is free to read
65  if (m_end_pos>=m_begin_pos)
66  {
67  int32_t bytes_to_memory_end=m_buffer.vlen-(m_end_pos-m_buffer.vector);
68  if (bytes_to_memory_end<bytes_to_write)
69  {
70  // we need write as at end of memory block and at begin
71  // because logical structure of buffer is ring
72  int32_t first_chunk_size=bytes_to_memory_end;
73  int32_t second_chunk_size=bytes_to_write-first_chunk_size;
74 
75  bytes_to_write=append_chunk(source.vector, first_chunk_size, false);
76  bytes_to_write+=append_chunk(source.vector+first_chunk_size, second_chunk_size, true);
77  }
78  else
79  {
80  bytes_to_write=append_chunk(source.vector, bytes_to_write, false);
81  }
82  }
83  else
84  {
85  bytes_to_write=append_chunk(source.vector, bytes_to_write, false);
86  }
87 
88  return bytes_to_write;
89 }
90 
91 int32_t CCircularBuffer::push(FILE* source, int32_t source_size)
92 {
93  if (source==NULL || source_size==0)
94  {
95  SG_ERROR("CCircularBuffer::push(FILE*, int32_t):: Invalid parameters! Source shouldn't be NULL or zero sized\n");
96  return -1;
97  }
98 
99  int32_t bytes_to_write;
100  if (source_size>m_bytes_available)
101  bytes_to_write=m_bytes_available;
102  else
103  bytes_to_write=source_size;
104 
105  if (bytes_to_write==0)
106  return 0;
107 
108  // determine which part of the memory block is free to read
109  if (m_end_pos>=m_begin_pos)
110  {
111  int32_t bytes_to_memory_end=m_buffer.vlen-(m_end_pos-m_buffer.vector);
112  if (bytes_to_memory_end<bytes_to_write)
113  {
114  // we need write as at end of memory block and at begin
115  // because logical structure of buffer is ring
116  int32_t first_chunk_size=bytes_to_memory_end;
117  int32_t second_chunk_size=bytes_to_write-first_chunk_size;
118 
119  bytes_to_write=append_chunk(source, first_chunk_size, false);
120  bytes_to_write+=append_chunk(source, second_chunk_size, true);
121  }
122  else
123  {
124  bytes_to_write=append_chunk(source, bytes_to_write, false);
125  }
126  }
127  else
128  {
129  bytes_to_write=append_chunk(source, bytes_to_write, false);
130  }
131 
132  return bytes_to_write;
133 }
134 
136 {
137  SGVector<char> result;
138 
139  int32_t bytes_to_read;
140  if (num_bytes>m_bytes_count)
141  bytes_to_read=m_bytes_count;
142  else
143  bytes_to_read=num_bytes;
144 
145  if (bytes_to_read==0)
146  return 0;
147 
148  // determine which part of the memory block will be read
149  if (m_begin_pos>=m_end_pos)
150  {
151  int32_t bytes_to_memory_end=m_buffer.vlen-(m_begin_pos-m_buffer.vector);
152  if (bytes_to_memory_end<bytes_to_read)
153  {
154  // read continious block from end of memory and from begin
155  int32_t first_chunk_size=bytes_to_memory_end;
156  int32_t second_chunk_size=bytes_to_read-first_chunk_size;
157 
158  detach_chunk(&result.vector, &result.vlen, 0, first_chunk_size, false);
159  detach_chunk(&result.vector, &result.vlen, first_chunk_size, second_chunk_size, true);
160  }
161  else
162  {
163  detach_chunk(&result.vector, &result.vlen, 0, bytes_to_read, false);
164  }
165  }
166  else
167  {
168  detach_chunk(&result.vector, &result.vlen, 0, bytes_to_read, false);
169  }
170 
171  return result;
172 }
173 
175 {
176  if (m_tokenizer==NULL)
177  {
178  SG_ERROR("CCircularBuffer::has_next():: Tokenizer is not initialized\n");
179  return false;
180  }
181 
182  if (m_bytes_count==0)
183  return false;
184 
185  int32_t head_length=m_buffer_end-m_begin_pos;
186 
187  // determine position of finder pointer in memory block
188  if (m_last_idx<head_length)
189  {
190  if (m_end_pos>=m_begin_pos && m_bytes_available!=0)
191  {
192  return has_next_locally(m_begin_pos+m_last_idx, m_end_pos);
193  }
194  else
195  {
196  bool temp=false;
197  temp=has_next_locally(m_begin_pos+m_last_idx, m_buffer_end);
198 
199  if (temp)
200  return temp;
201 
202  return has_next_locally(m_buffer.vector+m_last_idx-head_length, m_end_pos);
203  }
204  }
205  else
206  {
207  return has_next_locally(m_buffer.vector+m_last_idx-head_length, m_end_pos);
208  }
209 
210  return false;
211 }
212 
214 {
215  index_t end;
216 
217  if (m_tokenizer==NULL)
218  {
219  SG_ERROR("CCircularBuffer::next_token_idx(index_t&):: Tokenizer is not initialized\n");
220  return 0;
221  }
222 
223  if (m_bytes_count==0)
224  return m_bytes_count;
225 
226  int32_t tail_length=m_end_pos-m_buffer.vector;
227  int32_t head_length=m_buffer_end-m_begin_pos;
228 
229  // determine position of finder pointer in memory block
230  if (m_last_idx<head_length)
231  {
232  if (m_end_pos>=m_begin_pos && m_bytes_available!=0)
233  {
234  end=next_token_idx_locally(start, m_begin_pos+m_last_idx, m_end_pos);
235  if (end<=m_bytes_count)
236  return end;
237  }
238  else
239  {
240  index_t temp_start;
241 
242  // in this case we should find first at end of memory block
243  end=next_token_idx_locally(start, m_begin_pos+m_last_idx, m_buffer_end);
244 
245  if (end<head_length)
246  return end;
247 
248  // and then at begin
249  end=next_token_idx_locally(temp_start, m_buffer.vector+m_last_idx-head_length, m_end_pos);
250 
251  if (start>=head_length)
252  start=temp_start;
253 
254  return end;
255  }
256  }
257  else
258  {
259  end=next_token_idx_locally(start, m_buffer.vector+m_last_idx-head_length, m_end_pos);
260  if (end-head_length<=tail_length)
261  return end;
262  }
263 
264  start=0;
265  return start;
266 }
267 
268 void CCircularBuffer::skip_characters(int32_t num_chars)
269 {
270  move_pointer(&m_begin_pos, m_begin_pos+num_chars);
271 
272  m_last_idx-=num_chars;
273  if (m_last_idx<0)
274  m_last_idx=0;
275 
276  m_bytes_available+=num_chars;
277  m_bytes_count-=num_chars;
278 }
279 
281 {
282  m_begin_pos=m_buffer.vector;
283  m_end_pos=m_begin_pos;
284 
285  m_last_idx=0;
286  m_bytes_available=m_buffer.vlen;
287  m_bytes_count=0;
288 }
289 
290 void CCircularBuffer::init()
291 {
292  m_buffer=SGVector<char>();
293  m_buffer_end=NULL;
294  m_tokenizer=NULL;
295 
296  m_begin_pos=NULL;
297  m_end_pos=NULL;
298 
299  m_last_idx=0;
300  m_bytes_available=0;
301  m_bytes_count=0;
302 }
303 
304 int32_t CCircularBuffer::append_chunk(const char* source, int32_t source_size,
305  bool from_buffer_begin)
306 {
307  if (source==NULL || source_size==0)
308  {
309  SG_ERROR("CCircularBuffer::append_chunk(const char*, int32_t, bool):: Invalid parameters!\
310  Source shouldn't be NULL or zero sized\n");
311  return -1;
312  }
313 
314  if (from_buffer_begin)
315  m_end_pos=m_buffer.vector;
316 
317  memcpy(m_end_pos, source, source_size);
318  move_pointer(&m_end_pos, m_end_pos+source_size);
319 
320  m_bytes_available-=source_size;
321  m_bytes_count+=source_size;
322 
323  return source_size;
324 }
325 
326 int32_t CCircularBuffer::append_chunk(FILE* source, int32_t source_size,
327  bool from_buffer_begin)
328 {
329  int32_t actually_read=fread(m_end_pos, sizeof(char), source_size, source);
330 
331  if (from_buffer_begin && actually_read==source_size)
332  m_end_pos=m_buffer.vector;
333  move_pointer(&m_end_pos, m_end_pos+actually_read);
334 
335  m_bytes_available-=actually_read;
336  m_bytes_count+=actually_read;
337 
338  return actually_read;
339 }
340 
341 void CCircularBuffer::detach_chunk(char** dest, int32_t* dest_size, int32_t dest_offset, int32_t num_bytes,
342  bool from_buffer_begin)
343 {
344  if (dest==NULL || dest_size==NULL)
345  {
346  SG_ERROR("CCircularBuffer::detach_chunk(...):: Invalid parameters! Pointers are NULL\n");
347  return;
348  }
349 
350  if (*dest==NULL)
351  {
352  *dest=SG_MALLOC(char, num_bytes+dest_offset);
353  *dest_size=num_bytes+dest_offset;
354  }
355 
356  if (*dest_size<num_bytes+dest_offset)
357  {
358  *dest=SG_REALLOC(char, *dest, *dest_size, num_bytes+dest_offset);
359  *dest_size=num_bytes+dest_offset;
360  }
361 
362  if (from_buffer_begin)
363  m_begin_pos=m_buffer.vector;
364 
365  memcpy(*dest+dest_offset, m_begin_pos, num_bytes);
366  move_pointer(&m_begin_pos, m_begin_pos+num_bytes);
367 
368  m_last_idx-=num_bytes;
369  if (m_last_idx<0)
370  m_last_idx=0;
371 
372  m_bytes_available+=num_bytes;
373  m_bytes_count-=num_bytes;
374 }
375 
376 bool CCircularBuffer::has_next_locally(char* part_begin, char* part_end)
377 {
378  int32_t num_bytes_to_search=part_end-part_begin;
379 
380  SGVector<char> buffer_part(part_begin, num_bytes_to_search, false);
381  m_tokenizer->set_text(buffer_part);
382 
383  return m_tokenizer->has_next();
384 }
385 
386 index_t CCircularBuffer::next_token_idx_locally(index_t &start, char* part_begin, char* part_end)
387 {
388  index_t end=0;
389  int32_t num_bytes_to_search=part_end-part_begin;
390  if (num_bytes_to_search<=0)
391  {
392  start=0;
393  return m_last_idx;
394  }
395 
396  SGVector<char> buffer_part(part_begin, num_bytes_to_search, false);
397  m_tokenizer->set_text(buffer_part);
398 
399  end=m_tokenizer->next_token_idx(start);
400 
401  start+=m_last_idx;
402  m_last_idx+=end;
403 
404  if (end==num_bytes_to_search)
405  return m_last_idx;
406  else
407  return m_last_idx++;
408 }
409 
410 void CCircularBuffer::move_pointer(char** pointer, char* new_position)
411 {
412  *pointer=new_position;
413  if (*pointer>=m_buffer.vector+m_buffer.vlen)
414  *pointer=m_buffer.vector;
415 }

SHOGUN Machine Learning Toolbox - Documentation