30 m_max_token_length=10*1024*1024;
33 m_tokenizer=tokenizer;
44 m_max_token_length=max_token_length;
47 m_tokenizer=tokenizer;
61 if (m_stream==NULL || m_max_token_length==0 || m_tokenizer==NULL)
63 SG_ERROR(
"CLineReader::has_next():: Class is not initialized\n");
69 SG_ERROR(
"CLineReader::has_next():: Error reading file\n");
81 int32_t bytes_to_skip=0;
82 m_next_token_length=read(bytes_to_skip);
83 if (m_next_token_length==-1)
93 int32_t bytes_to_skip=0;
94 m_next_token_length=read(bytes_to_skip);
95 if (m_next_token_length==-1)
100 line=read_token(m_next_token_length-bytes_to_skip);
116 m_tokenizer=tokenizer;
121 void CLineReader::init()
127 m_max_token_length=0;
128 m_next_token_length=-1;
131 int32_t CLineReader::read(int32_t& bytes_to_skip)
134 int32_t bytes_to_read=0;
135 int32_t temp_bytes_to_skip=0;
139 if (bytes_to_skip==line_end)
152 if (m_buffer->
available() < m_max_token_length)
155 bytes_to_read=m_max_token_length;
160 m_buffer->
push(m_stream, bytes_to_read);
162 if (ferror(m_stream))
164 SG_ERROR(
"CLineReader::read(int32_t&):: Error reading file\n");
177 line=m_buffer->
pop(line_len);
Implementation of circular buffer This buffer has logical structure such as queue (FIFO)...
int32_t push(SGVector< char > source)
int32_t num_bytes_contained() const
void skip_characters(int32_t num_chars)
index_t next_token_idx(index_t &start)
virtual SGVector< char > read_line()
The class CTokenizer acts as a base class in order to implement tokenizers. Sub-classes must implemen...
SGVector< char > pop(int32_t num_chars)
all of classes and functions are contained in the shogun namespace
void set_tokenizer(CTokenizer *tokenizer)
void set_tokenizer(CTokenizer *tokenizer)
int32_t available() const