15 using namespace shogun;
37 CFile(fname, rw, name)
53 is_data_transposed=value;
60 m_delimiter=delimiter;
68 m_num_to_skip=num_lines;
95 m_line_reader->
reset();
100 void CCSVFile::init()
102 is_data_transposed=
false;
107 m_line_tokenizer=NULL;
112 void CCSVFile::init_with_defaults()
114 is_data_transposed=
false;
132 void CCSVFile::skip_lines(int32_t num_lines)
134 for (int32_t i=0; i<num_lines; i++)
138 #define GET_VECTOR(read_func, sg_type) \
139 void CCSVFile::get_vector(sg_type*& vector, int32_t& len) \
141 if (!m_line_reader->has_next()) \
144 int32_t num_feat=0; \
146 get_matrix(vector, num_feat, num_vec); \
177 #define GET_MATRIX(read_func, sg_type) \
178 void CCSVFile::get_matrix(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec) \
180 int32_t num_lines=0; \
181 int32_t num_tokens=-1; \
182 int32_t current_line_idx=0; \
183 SGVector<char> line; \
185 skip_lines(m_num_to_skip); \
186 num_lines=get_stats(num_tokens); \
190 matrix=SG_MALLOC(sg_type, num_lines*num_tokens); \
191 skip_lines(m_num_to_skip); \
192 while (m_line_reader->has_next()) \
194 line=m_line_reader->read_line(); \
195 m_parser->set_text(line); \
197 for (int32_t i=0; i<num_tokens; i++) \
199 if (!m_parser->has_next()) \
202 if (!is_data_transposed) \
203 matrix[i+current_line_idx*num_tokens]=m_parser->read_func(); \
205 matrix[current_line_idx+i*num_tokens]=m_parser->read_func(); \
207 current_line_idx++; \
212 if (!is_data_transposed) \
214 num_feat=num_tokens; \
219 num_feat=num_lines; \
220 num_vec=num_tokens; \
238 #define GET_NDARRAY(read_func, sg_type) \
239 void CCSVFile::get_ndarray(sg_type*& array, int32_t*& dims, int32_t& num_dims) \
253 #define GET_SPARSE_MATRIX(read_func, sg_type) \
254 void CCSVFile::get_sparse_matrix( \
255 SGSparseVector<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec) \
273 #undef GET_SPARSE_MATRIX
275 #define SET_VECTOR(format, sg_type) \
276 void CCSVFile::set_vector(const sg_type* vector, int32_t len) \
280 if (!is_data_transposed) \
282 for (int32_t i=0; i<len; i++) \
283 fprintf(file, "%" format "\n", vector[i]); \
288 for (i=0; i<len-1; i++) \
289 fprintf(file, "%" format "%c", vector[i], m_delimiter); \
290 fprintf(file, "%" format "\n", vector[i]); \
310 #define SET_MATRIX(format, sg_type) \
311 void CCSVFile::set_matrix(const sg_type* matrix, int32_t num_feat, int32_t num_vec) \
315 if (!is_data_transposed) \
317 for (int32_t i=0; i<num_vec; i++) \
320 for (j=0; j<num_feat-1; j++) \
321 fprintf(file, "%" format "%c", matrix[j+i*num_feat], m_delimiter); \
322 fprintf(file, "%" format "\n", matrix[j+i*num_feat]); \
327 for (int32_t i=0; i<num_feat; i++) \
330 for (j=0; j<num_vec-1; j++) \
331 fprintf(file, "%" format "%c", matrix[i+j*num_vec], m_delimiter); \
332 fprintf(file, "%" format "\n", matrix[i+j*num_vec]); \
353 #define SET_SPARSE_MATRIX(format, sg_type) \
354 void CCSVFile::set_sparse_matrix( \
355 const SGSparseVector<sg_type>* matrix, int32_t num_feat, int32_t num_vec) \
373 #undef SET_SPARSE_MATRIX
377 int32_t& max_string_len)
380 int32_t current_line_idx=0;
381 int32_t num_tokens=0;
387 skip_lines(m_num_to_skip);
391 strings[current_line_idx].
slen=line.
vlen;
392 strings[current_line_idx].
string=SG_MALLOC(
char, line.
vlen);
393 for (int32_t i=0; i<line.
vlen; i++)
394 strings[current_line_idx].
string[i]=line[i];
396 if (line.
vlen>max_string_len)
397 max_string_len=line.
vlen;
402 num_str=current_line_idx;
405 #define GET_STRING_LIST(sg_type) \
406 void CCSVFile::get_string_list( \
407 SGString<sg_type>*& strings, int32_t& num_str, \
408 int32_t& max_string_len) \
424 #undef GET_STRING_LIST
429 for (int32_t i=0; i<num_str; i++)
431 for (int32_t j=0; j<strings[i].
slen; j++)
432 fprintf(
file,
"%c", strings[i].
string[j]);
437 #define SET_STRING_LIST(sg_type) \
438 void CCSVFile::set_string_list( \
439 const SGString<sg_type>* strings, int32_t num_str) \
455 #undef SET_STRING_LIST
460 char *last = s.
start;
463 if (*s.
start == delim)