00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "features/SparseFeatures.h"
00012 #include "lib/File.h"
00013 #include "lib/AsciiFile.h"
00014 #include "lib/Mathematics.h"
00015 #include <ctype.h>
00016
00017 using namespace shogun;
00018
00019 CAsciiFile::CAsciiFile(void)
00020 {
00021 SG_UNSTABLE("CAsciiFile::CAsciiFile(void)", "\n");
00022 }
00023
00024 CAsciiFile::CAsciiFile(FILE* f, const char* name) : CFile(f, name)
00025 {
00026 }
00027
00028 CAsciiFile::CAsciiFile(char* fname, char rw, const char* name) : CFile(fname, rw, name)
00029 {
00030 }
00031
00032 CAsciiFile::~CAsciiFile()
00033 {
00034 }
00035
00036 #define GET_VECTOR(fname, mfname, sg_type) \
00037 void CAsciiFile::fname(sg_type*& vec, int32_t& len) \
00038 { \
00039 vec=NULL; \
00040 len=0; \
00041 int32_t num_feat=0; \
00042 int32_t num_vec=0; \
00043 mfname(vec, num_feat, num_vec); \
00044 if ((num_feat==1) || (num_vec==1)) \
00045 { \
00046 if (num_feat==1) \
00047 len=num_vec; \
00048 else \
00049 len=num_feat; \
00050 } \
00051 else \
00052 { \
00053 delete[] vec; \
00054 vec=NULL; \
00055 len=0; \
00056 SG_ERROR("Could not read vector from" \
00057 " file %s (shape %dx%d found but " \
00058 "vector expected).\n", filename, \
00059 num_vec, num_feat); \
00060 } \
00061 }
00062
00063 GET_VECTOR(get_byte_vector, get_byte_matrix, uint8_t)
00064 GET_VECTOR(get_char_vector, get_char_matrix, char)
00065 GET_VECTOR(get_int_vector, get_int_matrix, int32_t)
00066 GET_VECTOR(get_shortreal_vector, get_shortreal_matrix, float32_t)
00067 GET_VECTOR(get_real_vector, get_real_matrix, float64_t)
00068 GET_VECTOR(get_short_vector, get_short_matrix, int16_t)
00069 GET_VECTOR(get_word_vector, get_word_matrix, uint16_t)
00070 #undef GET_VECTOR
00071
00072 #define GET_MATRIX(fname, conv, sg_type) \
00073 void CAsciiFile::fname(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec) \
00074 { \
00075 struct stat stats; \
00076 if (stat(filename, &stats)!=0) \
00077 SG_ERROR("Could not get file statistics.\n"); \
00078 \
00079 char* data=new char[stats.st_size+1]; \
00080 memset(data, 0, sizeof(char)*(stats.st_size+1)); \
00081 size_t nread=fread(data, sizeof(char), stats.st_size, file); \
00082 if (nread<=0) \
00083 SG_ERROR("Could not read data from %s.\n", filename); \
00084 \
00085 SG_DEBUG("data read from file:\n%s\n", data); \
00086 \
00087 \
00088 int32_t nf=0; \
00089 num_feat=0; \
00090 num_vec=0; \
00091 char* ptr_item=NULL; \
00092 char* ptr_data=data; \
00093 DynArray<char*>* items=new DynArray<char*>(); \
00094 \
00095 while (*ptr_data) \
00096 { \
00097 if (*ptr_data=='\n') \
00098 { \
00099 if (ptr_item) \
00100 nf++; \
00101 \
00102 if (num_feat!=0 && nf!=num_feat) \
00103 SG_ERROR("Number of features mismatches (%d != %d) in vector" \
00104 " %d in file %s.\n", num_feat, nf, num_vec, filename); \
00105 \
00106 append_item(items, ptr_data, ptr_item); \
00107 num_feat=nf; \
00108 num_vec++; \
00109 nf=0; \
00110 ptr_item=NULL; \
00111 } \
00112 else if (!isblank(*ptr_data) && !ptr_item) \
00113 { \
00114 ptr_item=ptr_data; \
00115 } \
00116 else if (isblank(*ptr_data) && ptr_item) \
00117 { \
00118 append_item(items, ptr_data, ptr_item); \
00119 ptr_item=NULL; \
00120 nf++; \
00121 } \
00122 \
00123 ptr_data++; \
00124 } \
00125 \
00126 SG_DEBUG("num feat: %d, num_vec %d\n", num_feat, num_vec); \
00127 delete[] data; \
00128 \
00129 \
00130 matrix=new sg_type[num_vec*num_feat]; \
00131 for (int32_t i=0; i<num_vec; i++) \
00132 { \
00133 for (int32_t j=0; j<num_feat; j++) \
00134 { \
00135 char* item=items->get_element(i*num_feat+j); \
00136 matrix[i*num_feat+j]=conv(item); \
00137 delete[] item; \
00138 } \
00139 } \
00140 delete items; \
00141 }
00142
00143 GET_MATRIX(get_byte_matrix, atoi, uint8_t)
00144 GET_MATRIX(get_int8_matrix, atoi, int8_t)
00145 GET_MATRIX(get_char_matrix, atoi, char)
00146 GET_MATRIX(get_int_matrix, atoi, int32_t)
00147 GET_MATRIX(get_uint_matrix, atoi, uint32_t)
00148 GET_MATRIX(get_long_matrix, atoll, int64_t)
00149 GET_MATRIX(get_ulong_matrix, atoll, uint64_t)
00150 GET_MATRIX(get_shortreal_matrix, atof, float32_t)
00151 GET_MATRIX(get_real_matrix, atof, float64_t)
00152 GET_MATRIX(get_longreal_matrix, atof, floatmax_t)
00153 GET_MATRIX(get_short_matrix, atoi, int16_t)
00154 GET_MATRIX(get_word_matrix, atoi, uint16_t)
00155 #undef GET_MATRIX
00156
00157 void CAsciiFile::get_byte_ndarray(uint8_t*& array, int32_t*& dims, int32_t& num_dims)
00158 {
00159 }
00160
00161 void CAsciiFile::get_char_ndarray(char*& array, int32_t*& dims, int32_t& num_dims)
00162 {
00163 }
00164
00165 void CAsciiFile::get_int_ndarray(int32_t*& array, int32_t*& dims, int32_t& num_dims)
00166 {
00167 }
00168
00169 void CAsciiFile::get_shortreal_ndarray(float32_t*& array, int32_t*& dims, int32_t& num_dims)
00170 {
00171 }
00172
00173 void CAsciiFile::get_real_ndarray(float64_t*& array, int32_t*& dims, int32_t& num_dims)
00174 {
00175 }
00176
00177 void CAsciiFile::get_short_ndarray(int16_t*& array, int32_t*& dims, int32_t& num_dims)
00178 {
00179 }
00180
00181 void CAsciiFile::get_word_ndarray(uint16_t*& array, int32_t*& dims, int32_t& num_dims)
00182 {
00183 }
00184
00185 #define GET_SPARSEMATRIX(fname, conv, sg_type) \
00186 void CAsciiFile::fname(TSparse<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec) \
00187 { \
00188 size_t blocksize=1024*1024; \
00189 size_t required_blocksize=blocksize; \
00190 uint8_t* dummy=new uint8_t[blocksize]; \
00191 \
00192 if (file) \
00193 { \
00194 num_vec=0; \
00195 num_feat=0; \
00196 \
00197 SG_INFO("counting line numbers in file %s\n", filename); \
00198 size_t sz=blocksize; \
00199 size_t block_offs=0; \
00200 size_t old_block_offs=0; \
00201 fseek(file, 0, SEEK_END); \
00202 size_t fsize=ftell(file); \
00203 rewind(file); \
00204 \
00205 while (sz == blocksize) \
00206 { \
00207 sz=fread(dummy, sizeof(uint8_t), blocksize, file); \
00208 bool contains_cr=false; \
00209 for (size_t i=0; i<sz; i++) \
00210 { \
00211 block_offs++; \
00212 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) \
00213 { \
00214 num_vec++; \
00215 contains_cr=true; \
00216 required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs+1); \
00217 old_block_offs=block_offs; \
00218 } \
00219 } \
00220 SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t"); \
00221 } \
00222 \
00223 SG_INFO("found %d feature vectors\n", num_vec); \
00224 delete[] dummy; \
00225 blocksize=required_blocksize; \
00226 dummy = new uint8_t[blocksize+1]; \
00227 matrix=new TSparse<sg_type>[num_vec]; \
00228 \
00229 rewind(file); \
00230 sz=blocksize; \
00231 int32_t lines=0; \
00232 while (sz == blocksize) \
00233 { \
00234 sz=fread(dummy, sizeof(uint8_t), blocksize, file); \
00235 \
00236 size_t old_sz=0; \
00237 for (size_t i=0; i<sz; i++) \
00238 { \
00239 if (i==sz-1 && dummy[i]!='\n' && sz==blocksize) \
00240 { \
00241 size_t len=i-old_sz+1; \
00242 uint8_t* data=&dummy[old_sz]; \
00243 \
00244 for (size_t j=0; j<len; j++) \
00245 dummy[j]=data[j]; \
00246 \
00247 sz=fread(dummy+len, sizeof(uint8_t), blocksize-len, file); \
00248 i=0; \
00249 old_sz=0; \
00250 sz+=len; \
00251 } \
00252 \
00253 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) \
00254 { \
00255 \
00256 size_t len=i-old_sz; \
00257 uint8_t* data=&dummy[old_sz]; \
00258 \
00259 int32_t dims=0; \
00260 for (size_t j=0; j<len; j++) \
00261 { \
00262 if (data[j]==':') \
00263 dims++; \
00264 } \
00265 \
00266 if (dims<=0) \
00267 { \
00268 SG_ERROR("Error in line %d - number of" \
00269 " dimensions is %d line is %d characters" \
00270 " long\n line_content:'%.*s'\n", lines, \
00271 dims, len, len, (const char*) data); \
00272 } \
00273 \
00274 TSparseEntry<sg_type>* feat=new TSparseEntry<sg_type>[dims]; \
00275 \
00276 \
00277 size_t j=0; \
00278 for (; j<len; j++) \
00279 { \
00280 if (data[j]==':') \
00281 { \
00282 j=-1; \
00283 break; \
00284 } \
00285 \
00286 if (data[j]==' ') \
00287 { \
00288 data[j]='\0'; \
00289 \
00290 \
00291 break; \
00292 } \
00293 } \
00294 \
00295 int32_t d=0; \
00296 j++; \
00297 uint8_t* start=&data[j]; \
00298 for (; j<len; j++) \
00299 { \
00300 if (data[j]==':') \
00301 { \
00302 data[j]='\0'; \
00303 \
00304 feat[d].feat_index=(int32_t) atoi((const char*) start)-1; \
00305 num_feat=CMath::max(num_feat, feat[d].feat_index+1); \
00306 \
00307 j++; \
00308 start=&data[j]; \
00309 for (; j<len; j++) \
00310 { \
00311 if (data[j]==' ' || data[j]=='\n') \
00312 { \
00313 data[j]='\0'; \
00314 feat[d].entry=(sg_type) conv((const char*) start); \
00315 d++; \
00316 break; \
00317 } \
00318 } \
00319 \
00320 if (j==len) \
00321 { \
00322 data[j]='\0'; \
00323 feat[dims-1].entry=(sg_type) conv((const char*) start); \
00324 } \
00325 \
00326 j++; \
00327 start=&data[j]; \
00328 } \
00329 } \
00330 \
00331 matrix[lines].vec_index=lines; \
00332 matrix[lines].num_feat_entries=dims; \
00333 matrix[lines].features=feat; \
00334 \
00335 old_sz=i+1; \
00336 lines++; \
00337 SG_PROGRESS(lines, 0, num_vec, 1, "LOADING:\t"); \
00338 } \
00339 } \
00340 } \
00341 \
00342 SG_INFO("file successfully read\n"); \
00343 } \
00344 \
00345 delete[] dummy; \
00346 }
00347
00348 GET_SPARSEMATRIX(get_bool_sparsematrix, atoi, bool)
00349 GET_SPARSEMATRIX(get_byte_sparsematrix, atoi, uint8_t)
00350 GET_SPARSEMATRIX(get_int8_sparsematrix, atoi, int8_t)
00351 GET_SPARSEMATRIX(get_char_sparsematrix, atoi, char)
00352 GET_SPARSEMATRIX(get_int_sparsematrix, atoi, int32_t)
00353 GET_SPARSEMATRIX(get_uint_sparsematrix, atoi, uint32_t)
00354 GET_SPARSEMATRIX(get_long_sparsematrix, atoll, int64_t)
00355 GET_SPARSEMATRIX(get_ulong_sparsematrix, atoll, uint64_t)
00356 GET_SPARSEMATRIX(get_shortreal_sparsematrix, atof, float32_t)
00357 GET_SPARSEMATRIX(get_real_sparsematrix, atof, float64_t)
00358 GET_SPARSEMATRIX(get_longreal_sparsematrix, atof, floatmax_t)
00359 GET_SPARSEMATRIX(get_short_sparsematrix, atoi, int16_t)
00360 GET_SPARSEMATRIX(get_word_sparsematrix, atoi, uint16_t)
00361 #undef GET_SPARSEMATRIX
00362
00363
00364 void CAsciiFile::get_byte_string_list(TString<uint8_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00365 {
00366 size_t blocksize=1024*1024;
00367 size_t required_blocksize=0;
00368 uint8_t* dummy=new uint8_t[blocksize];
00369 uint8_t* overflow=NULL;
00370 int32_t overflow_len=0;
00371
00372 if (file)
00373 {
00374 num_str=0;
00375 max_string_len=0;
00376
00377 SG_INFO("counting line numbers in file %s\n", filename);
00378 size_t sz=blocksize;
00379 size_t block_offs=0;
00380 size_t old_block_offs=0;
00381 fseek(file, 0, SEEK_END);
00382 size_t fsize=ftell(file);
00383 rewind(file);
00384
00385 while (sz == blocksize)
00386 {
00387 sz=fread(dummy, sizeof(uint8_t), blocksize, file);
00388 bool contains_cr=false;
00389 for (size_t i=0; i<sz; i++)
00390 {
00391 block_offs++;
00392 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
00393 {
00394 num_str++;
00395 contains_cr=true;
00396 required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs);
00397 old_block_offs=block_offs;
00398 }
00399 }
00400 SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t");
00401 }
00402
00403 SG_INFO("found %d strings\n", num_str);
00404 SG_DEBUG("block_size=%d\n", required_blocksize);
00405 delete[] dummy;
00406 blocksize=required_blocksize;
00407 dummy=new uint8_t[blocksize];
00408 overflow=new uint8_t[blocksize];
00409 strings=new TString<uint8_t>[num_str];
00410
00411 rewind(file);
00412 sz=blocksize;
00413 int32_t lines=0;
00414 size_t old_sz=0;
00415 while (sz == blocksize)
00416 {
00417 sz=fread(dummy, sizeof(uint8_t), blocksize, file);
00418
00419 old_sz=0;
00420 for (size_t i=0; i<sz; i++)
00421 {
00422 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
00423 {
00424 int32_t len=i-old_sz;
00425 max_string_len=CMath::max(max_string_len, len+overflow_len);
00426
00427 strings[lines].length=len+overflow_len;
00428 strings[lines].string=new uint8_t[len+overflow_len];
00429
00430 for (int32_t j=0; j<overflow_len; j++)
00431 strings[lines].string[j]=overflow[j];
00432 for (int32_t j=0; j<len; j++)
00433 strings[lines].string[j+overflow_len]=dummy[old_sz+j];
00434
00435
00436 overflow_len=0;
00437
00438
00439 old_sz=i+1;
00440 lines++;
00441 SG_PROGRESS(lines, 0, num_str, 1, "LOADING:\t");
00442 }
00443 }
00444
00445 for (size_t i=old_sz; i<sz; i++)
00446 overflow[i-old_sz]=dummy[i];
00447
00448 overflow_len=sz-old_sz;
00449 }
00450 SG_INFO("file successfully read\n");
00451 SG_INFO("max_string_length=%d\n", max_string_len);
00452 SG_INFO("num_strings=%d\n", num_str);
00453 }
00454
00455 delete[] dummy;
00456 delete[] overflow;
00457 }
00458
00459 void CAsciiFile::get_int8_string_list(TString<int8_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00460 {
00461 size_t blocksize=1024*1024;
00462 size_t required_blocksize=0;
00463 int8_t* dummy=new int8_t[blocksize];
00464 int8_t* overflow=NULL;
00465 int32_t overflow_len=0;
00466
00467 if (file)
00468 {
00469 num_str=0;
00470 max_string_len=0;
00471
00472 SG_INFO("counting line numbers in file %s\n", filename);
00473 size_t sz=blocksize;
00474 size_t block_offs=0;
00475 size_t old_block_offs=0;
00476 fseek(file, 0, SEEK_END);
00477 size_t fsize=ftell(file);
00478 rewind(file);
00479
00480 while (sz == blocksize)
00481 {
00482 sz=fread(dummy, sizeof(int8_t), blocksize, file);
00483 bool contains_cr=false;
00484 for (size_t i=0; i<sz; i++)
00485 {
00486 block_offs++;
00487 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
00488 {
00489 num_str++;
00490 contains_cr=true;
00491 required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs);
00492 old_block_offs=block_offs;
00493 }
00494 }
00495 SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t");
00496 }
00497
00498 SG_INFO("found %d strings\n", num_str);
00499 SG_DEBUG("block_size=%d\n", required_blocksize);
00500 delete[] dummy;
00501 blocksize=required_blocksize;
00502 dummy=new int8_t[blocksize];
00503 overflow=new int8_t[blocksize];
00504 strings=new TString<int8_t>[num_str];
00505
00506 rewind(file);
00507 sz=blocksize;
00508 int32_t lines=0;
00509 size_t old_sz=0;
00510 while (sz == blocksize)
00511 {
00512 sz=fread(dummy, sizeof(int8_t), blocksize, file);
00513
00514 old_sz=0;
00515 for (size_t i=0; i<sz; i++)
00516 {
00517 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
00518 {
00519 int32_t len=i-old_sz;
00520 max_string_len=CMath::max(max_string_len, len+overflow_len);
00521
00522 strings[lines].length=len+overflow_len;
00523 strings[lines].string=new int8_t[len+overflow_len];
00524
00525 for (int32_t j=0; j<overflow_len; j++)
00526 strings[lines].string[j]=overflow[j];
00527 for (int32_t j=0; j<len; j++)
00528 strings[lines].string[j+overflow_len]=dummy[old_sz+j];
00529
00530
00531 overflow_len=0;
00532
00533
00534 old_sz=i+1;
00535 lines++;
00536 SG_PROGRESS(lines, 0, num_str, 1, "LOADING:\t");
00537 }
00538 }
00539
00540 for (size_t i=old_sz; i<sz; i++)
00541 overflow[i-old_sz]=dummy[i];
00542
00543 overflow_len=sz-old_sz;
00544 }
00545 SG_INFO("file successfully read\n");
00546 SG_INFO("max_string_length=%d\n", max_string_len);
00547 SG_INFO("num_strings=%d\n", num_str);
00548 }
00549
00550 delete[] dummy;
00551 delete[] overflow;
00552 }
00553
00554 void CAsciiFile::get_char_string_list(TString<char>*& strings, int32_t& num_str, int32_t& max_string_len)
00555 {
00556 size_t blocksize=1024*1024;
00557 size_t required_blocksize=0;
00558 char* dummy=new char[blocksize];
00559 char* overflow=NULL;
00560 int32_t overflow_len=0;
00561
00562 if (file)
00563 {
00564 num_str=0;
00565 max_string_len=0;
00566
00567 SG_INFO("counting line numbers in file %s\n", filename);
00568 size_t sz=blocksize;
00569 size_t block_offs=0;
00570 size_t old_block_offs=0;
00571 fseek(file, 0, SEEK_END);
00572 size_t fsize=ftell(file);
00573 rewind(file);
00574
00575 while (sz == blocksize)
00576 {
00577 sz=fread(dummy, sizeof(char), blocksize, file);
00578 bool contains_cr=false;
00579 for (size_t i=0; i<sz; i++)
00580 {
00581 block_offs++;
00582 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
00583 {
00584 num_str++;
00585 contains_cr=true;
00586 required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs);
00587 old_block_offs=block_offs;
00588 }
00589 }
00590 SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t");
00591 }
00592
00593 SG_INFO("found %d strings\n", num_str);
00594 SG_DEBUG("block_size=%d\n", required_blocksize);
00595 delete[] dummy;
00596 blocksize=required_blocksize;
00597 dummy=new char[blocksize];
00598 overflow=new char[blocksize];
00599 strings=new TString<char>[num_str];
00600
00601 rewind(file);
00602 sz=blocksize;
00603 int32_t lines=0;
00604 size_t old_sz=0;
00605 while (sz == blocksize)
00606 {
00607 sz=fread(dummy, sizeof(char), blocksize, file);
00608
00609 old_sz=0;
00610 for (size_t i=0; i<sz; i++)
00611 {
00612 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
00613 {
00614 int32_t len=i-old_sz;
00615 max_string_len=CMath::max(max_string_len, len+overflow_len);
00616
00617 strings[lines].length=len+overflow_len;
00618 strings[lines].string=new char[len+overflow_len];
00619
00620 for (int32_t j=0; j<overflow_len; j++)
00621 strings[lines].string[j]=overflow[j];
00622 for (int32_t j=0; j<len; j++)
00623 strings[lines].string[j+overflow_len]=dummy[old_sz+j];
00624
00625
00626 overflow_len=0;
00627
00628
00629 old_sz=i+1;
00630 lines++;
00631 SG_PROGRESS(lines, 0, num_str, 1, "LOADING:\t");
00632 }
00633 }
00634
00635 for (size_t i=old_sz; i<sz; i++)
00636 overflow[i-old_sz]=dummy[i];
00637
00638 overflow_len=sz-old_sz;
00639 }
00640 SG_INFO("file successfully read\n");
00641 SG_INFO("max_string_length=%d\n", max_string_len);
00642 SG_INFO("num_strings=%d\n", num_str);
00643 }
00644
00645 delete[] dummy;
00646 delete[] overflow;
00647 }
00648
00649 void CAsciiFile::get_int_string_list(TString<int32_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00650 {
00651 strings=NULL;
00652 num_str=0;
00653 max_string_len=0;
00654 }
00655
00656 void CAsciiFile::get_uint_string_list(TString<uint32_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00657 {
00658 strings=NULL;
00659 num_str=0;
00660 max_string_len=0;
00661 }
00662
00663 void CAsciiFile::get_short_string_list(TString<int16_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00664 {
00665 strings=NULL;
00666 num_str=0;
00667 max_string_len=0;
00668 }
00669
00670 void CAsciiFile::get_word_string_list(TString<uint16_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00671 {
00672 strings=NULL;
00673 num_str=0;
00674 max_string_len=0;
00675 }
00676
00677 void CAsciiFile::get_long_string_list(TString<int64_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00678 {
00679 strings=NULL;
00680 num_str=0;
00681 max_string_len=0;
00682 }
00683
00684 void CAsciiFile::get_ulong_string_list(TString<uint64_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00685 {
00686 strings=NULL;
00687 num_str=0;
00688 max_string_len=0;
00689 }
00690
00691 void CAsciiFile::get_shortreal_string_list(TString<float32_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00692 {
00693 strings=NULL;
00694 num_str=0;
00695 max_string_len=0;
00696 }
00697
00698 void CAsciiFile::get_real_string_list(TString<float64_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00699 {
00700 strings=NULL;
00701 num_str=0;
00702 max_string_len=0;
00703 }
00704
00705 void CAsciiFile::get_longreal_string_list(TString<floatmax_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00706 {
00707 strings=NULL;
00708 num_str=0;
00709 max_string_len=0;
00710 }
00711
00712
00715 #define SET_VECTOR(fname, mfname, sg_type) \
00716 void CAsciiFile::fname(const sg_type* vec, int32_t len) \
00717 { \
00718 mfname(vec, len, 1); \
00719 }
00720 SET_VECTOR(set_byte_vector, set_byte_matrix, uint8_t)
00721 SET_VECTOR(set_char_vector, set_char_matrix, char)
00722 SET_VECTOR(set_int_vector, set_int_matrix, int32_t)
00723 SET_VECTOR(set_shortreal_vector, set_shortreal_matrix, float32_t)
00724 SET_VECTOR(set_real_vector, set_real_matrix, float64_t)
00725 SET_VECTOR(set_short_vector, set_short_matrix, int16_t)
00726 SET_VECTOR(set_word_vector, set_word_matrix, uint16_t)
00727 #undef SET_VECTOR
00728
00729 #define SET_MATRIX(fname, sg_type, fprt_type, type_str) \
00730 void CAsciiFile::fname(const sg_type* matrix, int32_t num_feat, int32_t num_vec) \
00731 { \
00732 if (!(file && matrix)) \
00733 SG_ERROR("File or matrix invalid.\n"); \
00734 \
00735 for (int32_t i=0; i<num_vec; i++) \
00736 { \
00737 for (int32_t j=0; j<num_feat; j++) \
00738 { \
00739 sg_type v=matrix[num_feat*i+j]; \
00740 if (j==num_feat-1) \
00741 fprintf(file, type_str "\n", (fprt_type) v); \
00742 else \
00743 fprintf(file, type_str " ", (fprt_type) v); \
00744 } \
00745 } \
00746 }
00747 SET_MATRIX(set_char_matrix, char, char, "%c")
00748 SET_MATRIX(set_byte_matrix, uint8_t, uint8_t, "%u")
00749 SET_MATRIX(set_int8_matrix, int8_t, int8_t, "%d")
00750 SET_MATRIX(set_int_matrix, int32_t, int32_t, "%i")
00751 SET_MATRIX(set_uint_matrix, uint32_t, uint32_t, "%u")
00752 SET_MATRIX(set_long_matrix, int64_t, long long int, "%lli")
00753 SET_MATRIX(set_ulong_matrix, uint64_t, long long unsigned int, "%llu")
00754 SET_MATRIX(set_short_matrix, int16_t, int16_t, "%i")
00755 SET_MATRIX(set_word_matrix, uint16_t, uint16_t, "%u")
00756 SET_MATRIX(set_shortreal_matrix, float32_t, float32_t, "%f")
00757 SET_MATRIX(set_real_matrix, float64_t, float64_t, "%f")
00758 SET_MATRIX(set_longreal_matrix, floatmax_t, floatmax_t, "%Lf")
00759 #undef SET_MATRIX
00760
00761 #define SET_SPARSEMATRIX(fname, sg_type, fprt_type, type_str) \
00762 void CAsciiFile::fname(const TSparse<sg_type>* matrix, int32_t num_feat, int32_t num_vec) \
00763 { \
00764 if (!(file && matrix)) \
00765 SG_ERROR("File or matrix invalid.\n"); \
00766 \
00767 for (int32_t i=0; i<num_vec; i++) \
00768 { \
00769 TSparseEntry<sg_type>* vec = matrix[i].features; \
00770 int32_t len=matrix[i].num_feat_entries; \
00771 \
00772 for (int32_t j=0; j<len; j++) \
00773 { \
00774 if (j<len-1) \
00775 { \
00776 fprintf(file, "%d:" type_str " ", \
00777 (int32_t) vec[j].feat_index+1, (fprt_type) vec[j].entry); \
00778 } \
00779 else \
00780 { \
00781 fprintf(file, "%d:" type_str "\n", \
00782 (int32_t) vec[j].feat_index+1, (fprt_type) vec[j].entry); \
00783 } \
00784 } \
00785 } \
00786 }
00787 SET_SPARSEMATRIX(set_bool_sparsematrix, bool, uint8_t, "%u")
00788 SET_SPARSEMATRIX(set_char_sparsematrix, char, char, "%c")
00789 SET_SPARSEMATRIX(set_byte_sparsematrix, uint8_t, uint8_t, "%u")
00790 SET_SPARSEMATRIX(set_int8_sparsematrix, int8_t, int8_t, "%d")
00791 SET_SPARSEMATRIX(set_int_sparsematrix, int32_t, int32_t, "%i")
00792 SET_SPARSEMATRIX(set_uint_sparsematrix, uint32_t, uint32_t, "%u")
00793 SET_SPARSEMATRIX(set_long_sparsematrix, int64_t, long long int, "%lli")
00794 SET_SPARSEMATRIX(set_ulong_sparsematrix, uint64_t, long long unsigned int, "%llu")
00795 SET_SPARSEMATRIX(set_short_sparsematrix, int16_t, int16_t, "%i")
00796 SET_SPARSEMATRIX(set_word_sparsematrix, uint16_t, uint16_t, "%u")
00797 SET_SPARSEMATRIX(set_shortreal_sparsematrix, float32_t, float32_t, "%f")
00798 SET_SPARSEMATRIX(set_real_sparsematrix, float64_t, float64_t, "%f")
00799 SET_SPARSEMATRIX(set_longreal_sparsematrix, floatmax_t, floatmax_t, "%Lf")
00800 #undef SET_SPARSEMATRIX
00801
00802 void CAsciiFile::set_byte_string_list(const TString<uint8_t>* strings, int32_t num_str)
00803 {
00804 if (!(file && strings))
00805 SG_ERROR("File or strings invalid.\n");
00806
00807 for (int32_t i=0; i<num_str; i++)
00808 {
00809 int32_t len = strings[i].length;
00810 fwrite(strings[i].string, sizeof(uint8_t), len, file);
00811 fprintf(file, "\n");
00812 }
00813 }
00814
00815 void CAsciiFile::set_int8_string_list(const TString<int8_t>* strings, int32_t num_str)
00816 {
00817 if (!(file && strings))
00818 SG_ERROR("File or strings invalid.\n");
00819
00820 for (int32_t i=0; i<num_str; i++)
00821 {
00822 int32_t len = strings[i].length;
00823 fwrite(strings[i].string, sizeof(int8_t), len, file);
00824 fprintf(file, "\n");
00825 }
00826 }
00827
00828 void CAsciiFile::set_char_string_list(const TString<char>* strings, int32_t num_str)
00829 {
00830 if (!(file && strings))
00831 SG_ERROR("File or strings invalid.\n");
00832
00833 for (int32_t i=0; i<num_str; i++)
00834 {
00835 int32_t len = strings[i].length;
00836 fwrite(strings[i].string, sizeof(char), len, file);
00837 fprintf(file, "\n");
00838 }
00839 }
00840
00841 void CAsciiFile::set_int_string_list(const TString<int32_t>* strings, int32_t num_str)
00842 {
00843 }
00844
00845 void CAsciiFile::set_uint_string_list(const TString<uint32_t>* strings, int32_t num_str)
00846 {
00847 }
00848
00849 void CAsciiFile::set_short_string_list(const TString<int16_t>* strings, int32_t num_str)
00850 {
00851 }
00852
00853 void CAsciiFile::set_word_string_list(const TString<uint16_t>* strings, int32_t num_str)
00854 {
00855 }
00856
00857 void CAsciiFile::set_long_string_list(const TString<int64_t>* strings, int32_t num_str)
00858 {
00859 }
00860
00861 void CAsciiFile::set_ulong_string_list(const TString<uint64_t>* strings, int32_t num_str)
00862 {
00863 }
00864
00865 void CAsciiFile::set_shortreal_string_list(const TString<float32_t>* strings, int32_t num_str)
00866 {
00867 }
00868
00869 void CAsciiFile::set_real_string_list(const TString<float64_t>* strings, int32_t num_str)
00870 {
00871 }
00872
00873 void CAsciiFile::set_longreal_string_list(const TString<floatmax_t>* strings, int32_t num_str)
00874 {
00875 }
00876
00877 template <class T> void CAsciiFile::append_item(
00878 DynArray<T>* items, char* ptr_data, char* ptr_item)
00879 {
00880 size_t len=(ptr_data-ptr_item)/sizeof(char);
00881 char* item=new char[len+1];
00882 memset(item, 0, sizeof(char)*(len+1));
00883 item=strncpy(item, ptr_item, len);
00884
00885 SG_DEBUG("current %c, len %d, item %s\n", *ptr_data, len, item);
00886 items->append_element(item);
00887 }