15 using namespace shogun;
29 m_begin_pos=m_buffer.
vector;
30 m_end_pos=m_begin_pos;
32 m_bytes_available=m_buffer.
vlen;
44 m_tokenizer=tokenizer;
51 SG_ERROR(
"CCircularBuffer::push(SGVector<char>):: Invalid parameters! Source shouldn't be NULL or zero sized\n");
55 int32_t bytes_to_write;
56 if (source.
vlen>m_bytes_available)
57 bytes_to_write=m_bytes_available;
59 bytes_to_write=source.
vlen;
61 if (bytes_to_write==0)
65 if (m_end_pos>=m_begin_pos)
67 int32_t bytes_to_memory_end=m_buffer.
vlen-(m_end_pos-m_buffer.
vector);
68 if (bytes_to_memory_end<bytes_to_write)
72 int32_t first_chunk_size=bytes_to_memory_end;
73 int32_t second_chunk_size=bytes_to_write-first_chunk_size;
75 bytes_to_write=append_chunk(source.
vector, first_chunk_size,
false);
76 bytes_to_write+=append_chunk(source.
vector+first_chunk_size, second_chunk_size,
true);
80 bytes_to_write=append_chunk(source.
vector, bytes_to_write,
false);
85 bytes_to_write=append_chunk(source.
vector, bytes_to_write,
false);
88 return bytes_to_write;
93 if (source==NULL || source_size==0)
95 SG_ERROR(
"CCircularBuffer::push(FILE*, int32_t):: Invalid parameters! Source shouldn't be NULL or zero sized\n");
99 int32_t bytes_to_write;
100 if (source_size>m_bytes_available)
101 bytes_to_write=m_bytes_available;
103 bytes_to_write=source_size;
105 if (bytes_to_write==0)
109 if (m_end_pos>=m_begin_pos)
111 int32_t bytes_to_memory_end=m_buffer.
vlen-(m_end_pos-m_buffer.
vector);
112 if (bytes_to_memory_end<bytes_to_write)
116 int32_t first_chunk_size=bytes_to_memory_end;
117 int32_t second_chunk_size=bytes_to_write-first_chunk_size;
119 bytes_to_write=append_chunk(source, first_chunk_size,
false);
120 bytes_to_write+=append_chunk(source, second_chunk_size,
true);
124 bytes_to_write=append_chunk(source, bytes_to_write,
false);
129 bytes_to_write=append_chunk(source, bytes_to_write,
false);
132 return bytes_to_write;
139 int32_t bytes_to_read;
140 if (num_bytes>m_bytes_count)
141 bytes_to_read=m_bytes_count;
143 bytes_to_read=num_bytes;
145 if (bytes_to_read==0)
149 if (m_begin_pos>=m_end_pos)
151 int32_t bytes_to_memory_end=m_buffer.
vlen-(m_begin_pos-m_buffer.
vector);
152 if (bytes_to_memory_end<bytes_to_read)
155 int32_t first_chunk_size=bytes_to_memory_end;
156 int32_t second_chunk_size=bytes_to_read-first_chunk_size;
158 detach_chunk(&result.
vector, &result.
vlen, 0, first_chunk_size,
false);
159 detach_chunk(&result.
vector, &result.
vlen, first_chunk_size, second_chunk_size,
true);
163 detach_chunk(&result.
vector, &result.
vlen, 0, bytes_to_read,
false);
168 detach_chunk(&result.
vector, &result.
vlen, 0, bytes_to_read,
false);
176 if (m_tokenizer==NULL)
178 SG_ERROR(
"CCircularBuffer::has_next():: Tokenizer is not initialized\n");
182 if (m_bytes_count==0)
185 int32_t head_length=m_buffer_end-m_begin_pos;
188 if (m_last_idx<head_length)
190 if (m_end_pos>=m_begin_pos && m_bytes_available!=0)
192 return has_next_locally(m_begin_pos+m_last_idx, m_end_pos);
197 temp=has_next_locally(m_begin_pos+m_last_idx, m_buffer_end);
202 return has_next_locally(m_buffer.
vector+m_last_idx-head_length, m_end_pos);
207 return has_next_locally(m_buffer.
vector+m_last_idx-head_length, m_end_pos);
217 if (m_tokenizer==NULL)
219 SG_ERROR(
"CCircularBuffer::next_token_idx(index_t&):: Tokenizer is not initialized\n");
223 if (m_bytes_count==0)
224 return m_bytes_count;
226 int32_t tail_length=m_end_pos-m_buffer.
vector;
227 int32_t head_length=m_buffer_end-m_begin_pos;
230 if (m_last_idx<head_length)
232 if (m_end_pos>=m_begin_pos && m_bytes_available!=0)
234 end=next_token_idx_locally(start, m_begin_pos+m_last_idx, m_end_pos);
235 if (end<=m_bytes_count)
243 end=next_token_idx_locally(start, m_begin_pos+m_last_idx, m_buffer_end);
249 end=next_token_idx_locally(temp_start, m_buffer.
vector+m_last_idx-head_length, m_end_pos);
251 if (start>=head_length)
259 end=next_token_idx_locally(start, m_buffer.
vector+m_last_idx-head_length, m_end_pos);
260 if (end-head_length<=tail_length)
270 move_pointer(&m_begin_pos, m_begin_pos+num_chars);
272 m_last_idx-=num_chars;
276 m_bytes_available+=num_chars;
277 m_bytes_count-=num_chars;
282 m_begin_pos=m_buffer.
vector;
283 m_end_pos=m_begin_pos;
286 m_bytes_available=m_buffer.
vlen;
290 void CCircularBuffer::init()
304 int32_t CCircularBuffer::append_chunk(
const char* source, int32_t source_size,
305 bool from_buffer_begin)
307 if (source==NULL || source_size==0)
309 SG_ERROR(
"CCircularBuffer::append_chunk(const char*, int32_t, bool):: Invalid parameters!\
310 Source shouldn't be NULL or zero sized\n");
314 if (from_buffer_begin)
315 m_end_pos=m_buffer.
vector;
317 memcpy(m_end_pos, source, source_size);
318 move_pointer(&m_end_pos, m_end_pos+source_size);
320 m_bytes_available-=source_size;
321 m_bytes_count+=source_size;
326 int32_t CCircularBuffer::append_chunk(FILE* source, int32_t source_size,
327 bool from_buffer_begin)
329 int32_t actually_read=fread(m_end_pos,
sizeof(
char), source_size, source);
331 if (from_buffer_begin && actually_read==source_size)
332 m_end_pos=m_buffer.
vector;
333 move_pointer(&m_end_pos, m_end_pos+actually_read);
335 m_bytes_available-=actually_read;
336 m_bytes_count+=actually_read;
338 return actually_read;
341 void CCircularBuffer::detach_chunk(
char** dest, int32_t* dest_size, int32_t dest_offset, int32_t num_bytes,
342 bool from_buffer_begin)
344 if (dest==NULL || dest_size==NULL)
346 SG_ERROR(
"CCircularBuffer::detach_chunk(...):: Invalid parameters! Pointers are NULL\n");
352 *dest=SG_MALLOC(
char, num_bytes+dest_offset);
353 *dest_size=num_bytes+dest_offset;
356 if (*dest_size<num_bytes+dest_offset)
358 *dest=SG_REALLOC(
char, *dest, *dest_size, num_bytes+dest_offset);
359 *dest_size=num_bytes+dest_offset;
362 if (from_buffer_begin)
363 m_begin_pos=m_buffer.
vector;
365 memcpy(*dest+dest_offset, m_begin_pos, num_bytes);
366 move_pointer(&m_begin_pos, m_begin_pos+num_bytes);
368 m_last_idx-=num_bytes;
372 m_bytes_available+=num_bytes;
373 m_bytes_count-=num_bytes;
376 bool CCircularBuffer::has_next_locally(
char* part_begin,
char* part_end)
378 int32_t num_bytes_to_search=part_end-part_begin;
380 SGVector<char> buffer_part(part_begin, num_bytes_to_search,
false);
386 index_t CCircularBuffer::next_token_idx_locally(
index_t &start,
char* part_begin,
char* part_end)
389 int32_t num_bytes_to_search=part_end-part_begin;
390 if (num_bytes_to_search<=0)
396 SGVector<char> buffer_part(part_begin, num_bytes_to_search,
false);
404 if (end==num_bytes_to_search)
410 void CCircularBuffer::move_pointer(
char** pointer,
char* new_position)
412 *pointer=new_position;