31 m_begin_pos=m_buffer.
vector;
32 m_end_pos=m_begin_pos;
34 m_bytes_available=m_buffer.
vlen;
46 m_tokenizer=tokenizer;
53 SG_ERROR(
"CCircularBuffer::push(SGVector<char>):: Invalid parameters! Source shouldn't be NULL or zero sized\n");
57 int32_t bytes_to_write;
58 if (source.
vlen>m_bytes_available)
59 bytes_to_write=m_bytes_available;
61 bytes_to_write=source.
vlen;
63 if (bytes_to_write==0)
67 if (m_end_pos>=m_begin_pos)
69 int32_t bytes_to_memory_end=m_buffer.
vlen-(m_end_pos-m_buffer.
vector);
70 if (bytes_to_memory_end<bytes_to_write)
74 int32_t first_chunk_size=bytes_to_memory_end;
75 int32_t second_chunk_size=bytes_to_write-first_chunk_size;
77 bytes_to_write=append_chunk(source.
vector, first_chunk_size,
false);
78 bytes_to_write+=append_chunk(source.
vector+first_chunk_size, second_chunk_size,
true);
82 bytes_to_write=append_chunk(source.
vector, bytes_to_write,
false);
87 bytes_to_write=append_chunk(source.
vector, bytes_to_write,
false);
90 return bytes_to_write;
95 if (source==NULL || source_size==0)
97 SG_ERROR(
"CCircularBuffer::push(FILE*, int32_t):: Invalid parameters! Source shouldn't be NULL or zero sized\n");
101 int32_t bytes_to_write;
102 if (source_size>m_bytes_available)
103 bytes_to_write=m_bytes_available;
105 bytes_to_write=source_size;
107 if (bytes_to_write==0)
111 if (m_end_pos>=m_begin_pos)
113 int32_t bytes_to_memory_end=m_buffer.
vlen-(m_end_pos-m_buffer.
vector);
114 if (bytes_to_memory_end<bytes_to_write)
118 int32_t first_chunk_size=bytes_to_memory_end;
119 int32_t second_chunk_size=bytes_to_write-first_chunk_size;
121 bytes_to_write=append_chunk(source, first_chunk_size,
false);
122 bytes_to_write+=append_chunk(source, second_chunk_size,
true);
126 bytes_to_write=append_chunk(source, bytes_to_write,
false);
131 bytes_to_write=append_chunk(source, bytes_to_write,
false);
134 return bytes_to_write;
141 int32_t bytes_to_read;
142 if (num_bytes>m_bytes_count)
143 bytes_to_read=m_bytes_count;
145 bytes_to_read=num_bytes;
147 if (bytes_to_read==0)
151 if (m_begin_pos>=m_end_pos)
153 int32_t bytes_to_memory_end=m_buffer.
vlen-(m_begin_pos-m_buffer.
vector);
154 if (bytes_to_memory_end<bytes_to_read)
157 int32_t first_chunk_size=bytes_to_memory_end;
158 int32_t second_chunk_size=bytes_to_read-first_chunk_size;
160 detach_chunk(&result.
vector, &result.
vlen, 0, first_chunk_size,
false);
161 detach_chunk(&result.
vector, &result.
vlen, first_chunk_size, second_chunk_size,
true);
165 detach_chunk(&result.
vector, &result.
vlen, 0, bytes_to_read,
false);
170 detach_chunk(&result.
vector, &result.
vlen, 0, bytes_to_read,
false);
178 if (m_tokenizer==NULL)
180 SG_ERROR(
"CCircularBuffer::has_next():: Tokenizer is not initialized\n");
184 if (m_bytes_count==0)
187 int32_t head_length=m_buffer_end-m_begin_pos;
190 if (m_last_idx<head_length)
192 if (m_end_pos>=m_begin_pos && m_bytes_available!=0)
194 return has_next_locally(m_begin_pos+m_last_idx, m_end_pos);
199 temp=has_next_locally(m_begin_pos+m_last_idx, m_buffer_end);
204 return has_next_locally(m_buffer.
vector+m_last_idx-head_length, m_end_pos);
209 return has_next_locally(m_buffer.
vector+m_last_idx-head_length, m_end_pos);
219 if (m_tokenizer==NULL)
221 SG_ERROR(
"CCircularBuffer::next_token_idx(index_t&):: Tokenizer is not initialized\n");
225 if (m_bytes_count==0)
226 return m_bytes_count;
228 int32_t tail_length=m_end_pos-m_buffer.
vector;
229 int32_t head_length=m_buffer_end-m_begin_pos;
232 if (m_last_idx<head_length)
234 if (m_end_pos>=m_begin_pos && m_bytes_available!=0)
236 end=next_token_idx_locally(start, m_begin_pos+m_last_idx, m_end_pos);
237 if (end<=m_bytes_count)
245 end=next_token_idx_locally(start, m_begin_pos+m_last_idx, m_buffer_end);
251 end=next_token_idx_locally(temp_start, m_buffer.
vector+m_last_idx-head_length, m_end_pos);
253 if (start>=head_length)
261 end=next_token_idx_locally(start, m_buffer.
vector+m_last_idx-head_length, m_end_pos);
262 if (end-head_length<=tail_length)
272 move_pointer(&m_begin_pos, m_begin_pos+num_chars);
274 m_last_idx-=num_chars;
278 m_bytes_available+=num_chars;
279 m_bytes_count-=num_chars;
284 m_begin_pos=m_buffer.
vector;
285 m_end_pos=m_begin_pos;
288 m_bytes_available=m_buffer.
vlen;
292 void CCircularBuffer::init()
306 int32_t CCircularBuffer::append_chunk(
const char* source, int32_t source_size,
307 bool from_buffer_begin)
309 if (source==NULL || source_size==0)
311 SG_ERROR(
"CCircularBuffer::append_chunk(const char*, int32_t, bool):: Invalid parameters!\
312 Source shouldn't be NULL or zero sized\n");
316 if (from_buffer_begin)
317 m_end_pos=m_buffer.
vector;
319 memcpy(m_end_pos, source, source_size);
320 move_pointer(&m_end_pos, m_end_pos+source_size);
322 m_bytes_available-=source_size;
323 m_bytes_count+=source_size;
328 int32_t CCircularBuffer::append_chunk(FILE* source, int32_t source_size,
329 bool from_buffer_begin)
331 int32_t actually_read=fread(m_end_pos,
sizeof(
char), source_size, source);
333 if (from_buffer_begin && actually_read==source_size)
334 m_end_pos=m_buffer.
vector;
335 move_pointer(&m_end_pos, m_end_pos+actually_read);
337 m_bytes_available-=actually_read;
338 m_bytes_count+=actually_read;
340 return actually_read;
343 void CCircularBuffer::detach_chunk(
char** dest, int32_t* dest_size, int32_t dest_offset, int32_t num_bytes,
344 bool from_buffer_begin)
346 if (dest==NULL || dest_size==NULL)
348 SG_ERROR(
"CCircularBuffer::detach_chunk(...):: Invalid parameters! Pointers are NULL\n");
354 *dest=SG_MALLOC(
char, num_bytes+dest_offset);
355 *dest_size=num_bytes+dest_offset;
358 if (*dest_size<num_bytes+dest_offset)
360 *dest=SG_REALLOC(
char, *dest, *dest_size, num_bytes+dest_offset);
361 *dest_size=num_bytes+dest_offset;
364 if (from_buffer_begin)
365 m_begin_pos=m_buffer.
vector;
367 memcpy(*dest+dest_offset, m_begin_pos, num_bytes);
368 move_pointer(&m_begin_pos, m_begin_pos+num_bytes);
370 m_last_idx-=num_bytes;
374 m_bytes_available+=num_bytes;
375 m_bytes_count-=num_bytes;
378 bool CCircularBuffer::has_next_locally(
char* part_begin,
char* part_end)
380 int32_t num_bytes_to_search=part_end-part_begin;
382 SGVector<char> buffer_part(part_begin, num_bytes_to_search,
false);
388 index_t CCircularBuffer::next_token_idx_locally(
index_t &start,
char* part_begin,
char* part_end)
391 int32_t num_bytes_to_search=part_end-part_begin;
392 if (num_bytes_to_search<=0)
398 SGVector<char> buffer_part(part_begin, num_bytes_to_search,
false);
406 if (end==num_bytes_to_search)
412 void CCircularBuffer::move_pointer(
char** pointer,
char* new_position)
414 *pointer=new_position;
virtual void set_text(SGVector< char > txt)
int32_t push(SGVector< char > source)
void skip_characters(int32_t num_chars)
index_t next_token_idx(index_t &start)
The class CTokenizer acts as a base class in order to implement tokenizers. Sub-classes must implemen...
SGVector< char > pop(int32_t num_chars)
virtual bool has_next()=0
all of classes and functions are contained in the shogun namespace
virtual index_t next_token_idx(index_t &start)=0
void set_tokenizer(CTokenizer *tokenizer)