42 CFile(fname, rw, name)
58 is_data_transposed=value;
65 m_delimiter=delimiter;
73 m_num_to_skip=num_lines;
100 m_line_reader->
reset();
105 void CCSVFile::init()
107 is_data_transposed=
false;
112 m_line_tokenizer=NULL;
117 void CCSVFile::init_with_defaults()
119 is_data_transposed=
false;
137 void CCSVFile::skip_lines(int32_t num_lines)
139 for (int32_t i=0; i<num_lines; i++)
143 #define GET_VECTOR(read_func, sg_type) \
144 void CCSVFile::get_vector(sg_type*& vector, int32_t& len) \
146 if (!m_line_reader->has_next()) \
149 int32_t num_feat=0; \
151 get_matrix(vector, num_feat, num_vec); \
182 #define GET_MATRIX(read_func, sg_type) \
183 void CCSVFile::get_matrix(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec) \
185 int32_t num_lines=0; \
186 int32_t num_tokens=-1; \
187 int32_t current_line_idx=0; \
188 SGVector<char> line; \
190 skip_lines(m_num_to_skip); \
191 num_lines=get_stats(num_tokens); \
195 matrix=SG_MALLOC(sg_type, num_lines*num_tokens); \
196 skip_lines(m_num_to_skip); \
197 while (m_line_reader->has_next()) \
199 line=m_line_reader->read_line(); \
200 m_parser->set_text(line); \
202 for (int32_t i=0; i<num_tokens; i++) \
204 if (!m_parser->has_next()) \
207 if (!is_data_transposed) \
208 matrix[i+current_line_idx*num_tokens]=m_parser->read_func(); \
210 matrix[current_line_idx+i*num_tokens]=m_parser->read_func(); \
212 current_line_idx++; \
217 if (!is_data_transposed) \
219 num_feat=num_tokens; \
224 num_feat=num_lines; \
225 num_vec=num_tokens; \
243 #define GET_NDARRAY(read_func, sg_type) \
244 void CCSVFile::get_ndarray(sg_type*& array, int32_t*& dims, int32_t& num_dims) \
258 #define GET_SPARSE_MATRIX(read_func, sg_type) \
259 void CCSVFile::get_sparse_matrix( \
260 SGSparseVector<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec) \
278 #undef GET_SPARSE_MATRIX
280 #define SET_VECTOR(format, sg_type) \
281 void CCSVFile::set_vector(const sg_type* vector, int32_t len) \
285 if (!is_data_transposed) \
287 for (int32_t i=0; i<len; i++) \
288 fprintf(file, "%" format "\n", vector[i]); \
293 for (i=0; i<len-1; i++) \
294 fprintf(file, "%" format "%c", vector[i], m_delimiter); \
295 fprintf(file, "%" format "\n", vector[i]); \
315 #define SET_MATRIX(format, sg_type) \
316 void CCSVFile::set_matrix(const sg_type* matrix, int32_t num_feat, int32_t num_vec) \
320 if (!is_data_transposed) \
322 for (int32_t i=0; i<num_vec; i++) \
325 for (j=0; j<num_feat-1; j++) \
326 fprintf(file, "%" format "%c", matrix[j+i*num_feat], m_delimiter); \
327 fprintf(file, "%" format "\n", matrix[j+i*num_feat]); \
332 for (int32_t i=0; i<num_feat; i++) \
335 for (j=0; j<num_vec-1; j++) \
336 fprintf(file, "%" format "%c", matrix[i+j*num_vec], m_delimiter); \
337 fprintf(file, "%" format "\n", matrix[i+j*num_vec]); \
358 #define SET_SPARSE_MATRIX(format, sg_type) \
359 void CCSVFile::set_sparse_matrix( \
360 const SGSparseVector<sg_type>* matrix, int32_t num_feat, int32_t num_vec) \
378 #undef SET_SPARSE_MATRIX
382 int32_t& max_string_len)
385 int32_t current_line_idx=0;
386 int32_t num_tokens=0;
392 skip_lines(m_num_to_skip);
396 strings[current_line_idx].
slen=line.
vlen;
397 strings[current_line_idx].
string=SG_MALLOC(
char, line.
vlen);
398 for (int32_t i=0; i<line.
vlen; i++)
399 strings[current_line_idx].
string[i]=line[i];
401 if (line.
vlen>max_string_len)
402 max_string_len=line.
vlen;
407 num_str=current_line_idx;
410 #define GET_STRING_LIST(sg_type) \
411 void CCSVFile::get_string_list( \
412 SGString<sg_type>*& strings, int32_t& num_str, \
413 int32_t& max_string_len) \
429 #undef GET_STRING_LIST
434 for (int32_t i=0; i<num_str; i++)
436 for (int32_t j=0; j<strings[i].
slen; j++)
437 fprintf(
file,
"%c", strings[i].
string[j]);
442 #define SET_STRING_LIST(sg_type) \
443 void CCSVFile::set_string_list( \
444 const SGString<sg_type>* strings, int32_t num_str) \
460 #undef SET_STRING_LIST
void set_delimiter(char delimiter)
#define GET_MATRIX(read_func, sg_type)
void set_transpose(bool value)
#define GET_NDARRAY(read_func, sg_type)
virtual index_t next_token_idx(index_t &start)
virtual void set_string_list(const SGString< uint8_t > *strings, int32_t num_str)
#define GET_SPARSE_MATRIX(read_func, sg_type)
#define SET_STRING_LIST(sg_type)
Class for buffered reading from a ascii file.
void set_lines_to_skip(int32_t num_lines)
int32_t get_stats(int32_t &num_tokens)
virtual SGVector< char > read_line()
#define GET_STRING_LIST(sg_type)
#define SET_MATRIX(format, sg_type)
Class for reading from a string.
A File access base class.
#define SET_VECTOR(format, sg_type)
#define GET_VECTOR(read_func, sg_type)
#define SET_SPARSE_MATRIX(format, sg_type)
void set_tokenizer(CTokenizer *tokenizer)
all of classes and functions are contained in the shogun namespace
The class CDelimiterTokenizer is used to tokenize a SGVector into tokens using custom chars as ...
SGVector< bool > delimiters
virtual void set_text(SGVector< char > txt)
virtual void get_string_list(SGString< uint8_t > *&strings, int32_t &num_str, int32_t &max_string_len)