SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RealFileFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
13 #include <shogun/io/SGIO.h>
14 
15 #include <stdio.h>
16 #include <string.h>
17 
18 using namespace shogun;
19 
21 {
22  SG_UNSTABLE("CRealFileFeatures::CRealFileFeatures()", "\n");
23 
24  working_file=NULL;
25  working_filename=strdup("");
26  intlen=0;
27  doublelen=0;
28  endian=0;
29  fourcc=0;
30  preprocd=0;
31  labels=NULL;
32  status=false;
33 }
34 
35 CRealFileFeatures::CRealFileFeatures(int32_t size, char* fname)
37 {
38  working_file=fopen(fname, "r");
39  working_filename=strdup(fname);
41  intlen=0;
42  doublelen=0;
43  endian=0;
44  fourcc=0;
45  preprocd=0;
46  labels=NULL;
48 }
49 
50 CRealFileFeatures::CRealFileFeatures(int32_t size, FILE* file)
51 : CDenseFeatures<float64_t>(size), working_file(file), working_filename(NULL)
52 {
54  intlen=0;
55  doublelen=0;
56  endian=0;
57  fourcc=0;
58  preprocd=0;
59  labels=NULL;
61 }
62 
64 {
66  SG_FREE(labels);
67 }
68 
70 : CDenseFeatures<float64_t>(orig), working_file(orig.working_file), status(orig.status)
71 {
72  if (orig.working_filename)
74  if (orig.labels && get_num_vectors())
75  {
76  labels=SG_MALLOC(int32_t, get_num_vectors());
77  memcpy(labels, orig.labels, sizeof(int32_t)*get_num_vectors());
78  }
79 }
80 
82  int32_t num, int32_t &len, float64_t* target)
83 {
84  ASSERT(num<num_vectors);
85  len=num_features;
86  float64_t* featurevector=target;
87  if (!featurevector)
88  featurevector=SG_MALLOC(float64_t, num_features);
90  fseek(working_file, filepos+num_features*doublelen*num, SEEK_SET);
91  ASSERT(fread(featurevector, doublelen, num_features, working_file)==(size_t) num_features);
92  return featurevector;
93 }
94 
96 {
98  fseek(working_file, filepos, SEEK_SET);
100 
101  SG_INFO( "allocating feature matrix of size %.2fM\n", sizeof(double)*num_features*num_vectors/1024.0/1024.0);
104 
105  SG_INFO( "loading... be patient.\n");
106 
107  for (int32_t i=0; i<(int32_t) num_vectors; i++)
108  {
109  if (!(i % (num_vectors/10+1)))
110  SG_PRINT( "%02d%%.", (int) (100.0*i/num_vectors));
111  else if (!(i % (num_vectors/200+1)))
112  SG_PRINT( ".");
113 
114  ASSERT(fread(&feature_matrix.matrix[num_features*i], doublelen, num_features, working_file)==(size_t) num_features);
115  }
116  SG_DONE();
117 
118  return feature_matrix.matrix;
119 }
120 
121 int32_t CRealFileFeatures::get_label(int32_t idx)
122 {
123  ASSERT(idx<num_vectors);
124  if (labels)
125  return labels[idx];
126  return 0;
127 }
128 
130 {
132  uint32_t num_vec=0;
133  uint32_t num_feat=0;
134 
135  ASSERT(fread(&intlen, sizeof(uint8_t), 1, working_file)==1);
136  ASSERT(fread(&doublelen, sizeof(uint8_t), 1, working_file)==1);
137  ASSERT(fread(&endian, (uint32_t) intlen, 1, working_file)== 1);
138  ASSERT(fread(&fourcc, (uint32_t) intlen, 1, working_file)==1);
139  ASSERT(fread(&num_vec, (uint32_t) intlen, 1, working_file)==1);
140  ASSERT(fread(&num_feat, (uint32_t) intlen, 1, working_file)==1);
141  ASSERT(fread(&preprocd, (uint32_t) intlen, 1, working_file)==1);
142  SG_INFO( "detected: intsize=%d, doublesize=%d, num_vec=%d, num_feat=%d, preprocd=%d\n", intlen, doublelen, num_vec, num_feat, preprocd);
143  filepos=ftell(working_file);
144  set_num_vectors(num_vec);
145  set_num_features(num_feat);
147  SG_FREE(labels);
148  labels=SG_MALLOC(int, num_vec);
149  ASSERT(fread(labels, intlen, num_vec, working_file) == num_vec);
150  return true;
151 }

SHOGUN Machine Learning Toolbox - Documentation