SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
RealFileFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
13 #include <shogun/io/SGIO.h>
14 #include <shogun/lib/memory.h>
15 
16 #include <stdio.h>
17 #include <string.h>
18 
19 using namespace shogun;
20 
22 {
23  SG_UNSTABLE("CRealFileFeatures::CRealFileFeatures()", "\n")
24  init();
25 }
26 
27 CRealFileFeatures::CRealFileFeatures(int32_t size, char* fname)
29 {
30  init();
31 
32  working_file=fopen(fname, "r");
33  working_filename=get_strdup(fname);
36 }
37 
38 CRealFileFeatures::CRealFileFeatures(int32_t size, FILE* file)
40 {
41  init();
42 
45 }
46 
47 void CRealFileFeatures::init()
48 {
49  working_file=NULL;
50  working_filename=get_strdup("");
51  intlen=0;
52  doublelen=0;
53  endian=0;
54  fourcc=0;
55  preprocd=0;
56  labels=NULL;
57  status=false;
58 
59  unset_generic();
60 }
61 
63 {
64  SG_FREE(working_filename);
65  SG_FREE(labels);
66 }
67 
69 : CDenseFeatures<float64_t>(orig), working_file(orig.working_file), status(orig.status)
70 {
71  if (orig.working_filename)
72  working_filename=get_strdup(orig.working_filename);
73  if (orig.labels && get_num_vectors())
74  {
75  labels=SG_MALLOC(int32_t, get_num_vectors());
76  memcpy(labels, orig.labels, sizeof(int32_t)*get_num_vectors());
77  }
78 }
79 
81  int32_t num, int32_t &len, float64_t* target)
82 {
83  ASSERT(num<num_vectors)
84  len=num_features;
85  float64_t* featurevector=target;
86  if (!featurevector)
87  featurevector=SG_MALLOC(float64_t, num_features);
89  fseek(working_file, filepos+num_features*doublelen*num, SEEK_SET);
90  ASSERT(fread(featurevector, doublelen, num_features, working_file)==(size_t) num_features)
91  return featurevector;
92 }
93 
95 {
97  fseek(working_file, filepos, SEEK_SET);
99 
100  SG_INFO("allocating feature matrix of size %.2fM\n", sizeof(double)*num_features*num_vectors/1024.0/1024.0)
103 
104  SG_INFO("loading... be patient.\n")
105 
106  for (int32_t i=0; i<(int32_t) num_vectors; i++)
107  {
108  if (!(i % (num_vectors/10+1)))
109  SG_PRINT("%02d%%.", (int) (100.0*i/num_vectors))
110  else if (!(i % (num_vectors/200+1)))
111  SG_PRINT(".")
112 
113  ASSERT(fread(&feature_matrix.matrix[num_features*i], doublelen, num_features, working_file)==(size_t) num_features)
114  }
115  SG_DONE()
116 
117  return feature_matrix.matrix;
118 }
119 
120 int32_t CRealFileFeatures::get_label(int32_t idx)
121 {
122  ASSERT(idx<num_vectors)
123  if (labels)
124  return labels[idx];
125  return 0;
126 }
127 
129 {
131  uint32_t num_vec=0;
132  uint32_t num_feat=0;
133 
134  ASSERT(fread(&intlen, sizeof(uint8_t), 1, working_file)==1)
135  ASSERT(fread(&doublelen, sizeof(uint8_t), 1, working_file)==1)
136  ASSERT(fread(&endian, (uint32_t) intlen, 1, working_file)== 1)
137  ASSERT(fread(&fourcc, (uint32_t) intlen, 1, working_file)==1)
138  ASSERT(fread(&num_vec, (uint32_t) intlen, 1, working_file)==1)
139  ASSERT(fread(&num_feat, (uint32_t) intlen, 1, working_file)==1)
140  ASSERT(fread(&preprocd, (uint32_t) intlen, 1, working_file)==1)
141  SG_INFO("detected: intsize=%d, doublesize=%d, num_vec=%d, num_feat=%d, preprocd=%d\n", intlen, doublelen, num_vec, num_feat, preprocd)
142  filepos=ftell(working_file);
143  set_num_vectors(num_vec);
144  set_num_features(num_feat);
146  SG_FREE(labels);
147  labels=SG_MALLOC(int, num_vec);
148  ASSERT(fread(labels, intlen, num_vec, working_file) == num_vec)
149  return true;
150 }
#define SG_INFO(...)
Definition: SGIO.h:118
#define SG_DONE()
Definition: SGIO.h:157
The class DenseFeatures implements dense feature matrices.
Definition: LDA.h:41
void unset_generic()
Definition: SGObject.cpp:303
int32_t get_label(int32_t idx)
virtual float64_t * load_feature_matrix()
int32_t num_features
number of features in cache
#define SG_PRINT(...)
Definition: SGIO.h:137
#define ASSERT(x)
Definition: SGIO.h:201
virtual int32_t get_num_vectors() const
double float64_t
Definition: common.h:50
virtual float64_t * compute_feature_vector(int32_t num, int32_t &len, float64_t *target=NULL)
SGMatrix< float64_t > feature_matrix
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
The class RealFileFeatures implements a dense double-precision floating point matrix from a file...
int32_t num_vectors
number of vectors in cache
#define SG_UNSTABLE(func,...)
Definition: SGIO.h:132

SHOGUN Machine Learning Toolbox - Documentation