16 using namespace shogun;
31 CFile(fname, rw, name)
46 void CLibSVMFile::init()
50 m_whitespace_tokenizer=NULL;
51 m_delimiter_tokenizer=NULL;
52 m_line_tokenizer=NULL;
57 void CLibSVMFile::init_with_defaults()
63 SG_REF(m_whitespace_tokenizer);
66 m_delimiter_tokenizer->
delimiters[m_delimiter]=1;
67 SG_REF(m_delimiter_tokenizer);
77 #define GET_SPARSE_MATRIX(read_func, sg_type) \
78 void CLibSVMFile::get_sparse_matrix(SGSparseVector<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec) \
80 float64_t* labels=NULL; \
81 get_sparse_matrix(matrix, num_feat, num_vec, labels, false); \
97 #undef GET_SPARSE_MATRIX
99 #define GET_LABELED_SPARSE_MATRIX(read_func, sg_type) \
100 void CLibSVMFile::get_sparse_matrix(SGSparseVector<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec, \
101 float64_t*& labels, bool load_labels) \
105 SG_INFO("counting line numbers in file %s\n", filename) \
106 num_vec=get_num_lines(); \
108 int32_t current_line_ind=0; \
109 SGVector<char> line; \
111 int32_t num_entries=0; \
112 DynArray<SGVector<char> > entries; \
114 matrix=SG_MALLOC(SGSparseVector<sg_type>, num_vec); \
116 labels=SG_MALLOC(float64_t, num_vec); \
120 while (m_line_reader->has_next()) \
123 entries.reset(SGVector<char>(false)); \
124 line=m_line_reader->read_line(); \
126 m_parser->set_tokenizer(m_whitespace_tokenizer); \
127 m_parser->set_text(line); \
129 if (load_labels && m_parser->has_next()) \
130 labels[current_line_ind]=m_parser->read_real(); \
132 while (m_parser->has_next()) \
134 entries.push_back(m_parser->read_string()); \
138 matrix[current_line_ind]=SGSparseVector<sg_type>(num_entries); \
139 for (int32_t i=0; i<num_entries; i++) \
141 m_parser->set_tokenizer(m_delimiter_tokenizer); \
142 m_parser->set_text(entries[i]); \
144 int32_t feat_index=0; \
145 if (m_parser->has_next()) \
146 feat_index=m_parser->read_int(); \
149 if (m_parser->has_next()) \
150 entry=m_parser->read_func(); \
152 if (feat_index>num_feat) \
153 num_feat=feat_index; \
155 matrix[current_line_ind].features[i].feat_index=feat_index-1; \
156 matrix[current_line_ind].features[i].entry=entry; \
159 current_line_ind++; \
160 SG_PROGRESS(current_line_ind, 0, num_vec, 1, "LOADING:\t") \
165 SG_INFO("file successfully read\n") \
181 #undef GET_LABELED_SPARSE_MATRIX
183 #define SET_SPARSE_MATRIX(format, sg_type) \
184 void CLibSVMFile::set_sparse_matrix( \
185 const SGSparseVector<sg_type>* matrix, int32_t num_feat, int32_t num_vec) \
187 set_sparse_matrix(matrix, num_feat, num_vec, NULL); \
203 #undef SET_SPARSE_MATRIX
205 #define SET_LABELED_SPARSE_MATRIX(format, sg_type) \
206 void CLibSVMFile::set_sparse_matrix( \
207 const SGSparseVector<sg_type>* matrix, int32_t num_feat, int32_t num_vec, \
208 const float64_t* labels) \
212 for (int32_t i=0; i<num_vec; i++) \
215 fprintf(file, "%lg ", labels[i]); \
217 for (int32_t j=0; j<matrix[i].num_feat_entries; j++) \
219 fprintf(file, "%d%c%" format " ", \
220 matrix[i].features[j].feat_index+1, \
222 matrix[i].features[j].entry); \
224 fprintf(file, "\n"); \
243 #undef SET_LABELED_SPARSE_MATRIX
245 int32_t CLibSVMFile::get_num_lines()
253 m_line_reader->
reset();