RealFileFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/features/RealFileFeatures.h>
00012 #include <shogun/features/Features.h>
00013 #include <shogun/io/SGIO.h>
00014 
00015 #include <stdio.h>
00016 #include <string.h>
00017 
00018 using namespace shogun;
00019 
00020 CRealFileFeatures::CRealFileFeatures()
00021 {
00022     SG_UNSTABLE("CRealFileFeatures::CRealFileFeatures()", "\n");
00023 
00024     working_file=NULL;
00025     working_filename=strdup("");
00026     intlen=0;
00027     doublelen=0;
00028     endian=0;
00029     fourcc=0;
00030     preprocd=0;
00031     labels=NULL;
00032     status=false;
00033 }
00034 
00035 CRealFileFeatures::CRealFileFeatures(int32_t size, char* fname)
00036 : CDenseFeatures<float64_t>(size)
00037 {
00038     working_file=fopen(fname, "r");
00039     working_filename=strdup(fname);
00040     ASSERT(working_file);
00041     intlen=0;
00042     doublelen=0;
00043     endian=0;
00044     fourcc=0;
00045     preprocd=0;
00046     labels=NULL;
00047     status=load_base_data();
00048 }
00049 
00050 CRealFileFeatures::CRealFileFeatures(int32_t size, FILE* file)
00051 : CDenseFeatures<float64_t>(size), working_file(file), working_filename(NULL)
00052 {
00053     ASSERT(working_file);
00054     intlen=0;
00055     doublelen=0;
00056     endian=0;
00057     fourcc=0;
00058     preprocd=0;
00059     labels=NULL;
00060     status=load_base_data();
00061 }
00062 
00063 CRealFileFeatures::~CRealFileFeatures()
00064 {
00065     SG_FREE(working_filename);
00066     SG_FREE(labels);
00067 }
00068 
00069 CRealFileFeatures::CRealFileFeatures(const CRealFileFeatures & orig)
00070 : CDenseFeatures<float64_t>(orig), working_file(orig.working_file), status(orig.status)
00071 {
00072     if (orig.working_filename)
00073         working_filename=strdup(orig.working_filename);
00074     if (orig.labels && get_num_vectors())
00075     {
00076         labels=SG_MALLOC(int32_t, get_num_vectors());
00077         memcpy(labels, orig.labels, sizeof(int32_t)*get_num_vectors());
00078     }
00079 }
00080 
00081 float64_t* CRealFileFeatures::compute_feature_vector(
00082     int32_t num, int32_t &len, float64_t* target)
00083 {
00084     ASSERT(num<num_vectors);
00085     len=num_features;
00086     float64_t* featurevector=target;
00087     if (!featurevector)
00088         featurevector=SG_MALLOC(float64_t, num_features);
00089     ASSERT(working_file);
00090     fseek(working_file, filepos+num_features*doublelen*num, SEEK_SET);
00091     ASSERT(fread(featurevector, doublelen, num_features, working_file)==(size_t) num_features);
00092     return featurevector;
00093 }
00094 
00095 float64_t* CRealFileFeatures::load_feature_matrix()
00096 {
00097     ASSERT(working_file);
00098     fseek(working_file, filepos, SEEK_SET);
00099     free_feature_matrix();
00100 
00101     SG_INFO( "allocating feature matrix of size %.2fM\n", sizeof(double)*num_features*num_vectors/1024.0/1024.0);
00102     free_feature_matrix();
00103     feature_matrix=SGMatrix<float64_t>(num_features,num_vectors);
00104 
00105     SG_INFO( "loading... be patient.\n");
00106 
00107     for (int32_t i=0; i<(int32_t) num_vectors; i++)
00108     {
00109         if (!(i % (num_vectors/10+1)))
00110             SG_PRINT( "%02d%%.", (int) (100.0*i/num_vectors));
00111         else if (!(i % (num_vectors/200+1)))
00112             SG_PRINT( ".");
00113 
00114         ASSERT(fread(&feature_matrix.matrix[num_features*i], doublelen, num_features, working_file)==(size_t) num_features);
00115     }
00116     SG_DONE();
00117 
00118     return feature_matrix.matrix;
00119 }
00120 
00121 int32_t CRealFileFeatures::get_label(int32_t idx)
00122 {
00123     ASSERT(idx<num_vectors);
00124     if (labels)
00125         return labels[idx];
00126     return 0;
00127 }
00128 
00129 bool CRealFileFeatures::load_base_data()
00130 {
00131     ASSERT(working_file);
00132     uint32_t num_vec=0;
00133     uint32_t num_feat=0;
00134 
00135     ASSERT(fread(&intlen, sizeof(uint8_t), 1, working_file)==1);
00136     ASSERT(fread(&doublelen, sizeof(uint8_t), 1, working_file)==1);
00137     ASSERT(fread(&endian, (uint32_t) intlen, 1, working_file)== 1);
00138     ASSERT(fread(&fourcc, (uint32_t) intlen, 1, working_file)==1);
00139     ASSERT(fread(&num_vec, (uint32_t) intlen, 1, working_file)==1);
00140     ASSERT(fread(&num_feat, (uint32_t) intlen, 1, working_file)==1);
00141     ASSERT(fread(&preprocd, (uint32_t) intlen, 1, working_file)==1);
00142     SG_INFO( "detected: intsize=%d, doublesize=%d, num_vec=%d, num_feat=%d, preprocd=%d\n", intlen, doublelen, num_vec, num_feat, preprocd);
00143     filepos=ftell(working_file);
00144     set_num_vectors(num_vec);
00145     set_num_features(num_feat);
00146     fseek(working_file, filepos+num_features*num_vectors*doublelen, SEEK_SET);
00147     SG_FREE(labels);
00148     labels=SG_MALLOC(int, num_vec);
00149     ASSERT(fread(labels, intlen, num_vec, working_file) == num_vec);
00150     return true;
00151 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation