SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
VwRegressor.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights
3  * embodied in the content of this file are licensed under the BSD
4  * (revised) open source license.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Written (W) 2011 Shashwat Lal Das
12  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
13  */
14 
17 #include <shogun/io/IOBuffer.h>
18 
19 using namespace shogun;
20 
22  : CSGObject()
23 {
24  weight_vectors = NULL;
25  loss = new CSquaredLoss();
26  init(NULL);
27 }
28 
30  : CSGObject()
31 {
32  weight_vectors = NULL;
33  loss = new CSquaredLoss();
34  init(env_to_use);
35 }
36 
38 {
39  // TODO: the number of weight_vectors depends on num_threads
40  // this should be reimplemented using SGVector (for reference counting)
41  SG_FREE(weight_vectors);
42  SG_UNREF(loss);
43  SG_UNREF(env);
44 }
45 
46 void CVwRegressor::init(CVwEnvironment* env_to_use)
47 {
48  if (!env_to_use)
49  env_to_use = new CVwEnvironment();
50 
51  env = env_to_use;
52  SG_REF(env);
53 
54  // For each feature, there should be 'stride' number of
55  // elements in the weight vector
56  vw_size_t length = ((vw_size_t) 1) << env->num_bits;
57  env->thread_mask = (env->stride * (length >> env->thread_bits)) - 1;
58 
59  // Only one learning thread for now
60  vw_size_t num_threads = 1;
61  weight_vectors = SG_MALLOC(float32_t*, num_threads);
62 
63  for (vw_size_t i = 0; i < num_threads; i++)
64  {
65  weight_vectors[i] = SG_CALLOC(float32_t, env->stride * length / num_threads);
66 
67  if (env->random_weights)
68  {
69  for (vw_size_t j = 0; j < length/num_threads; j++)
70  weight_vectors[i][j] = CMath::random(-0.5, 0.5);
71  }
72 
73  if (env->initial_weight != 0.)
74  for (vw_size_t j = 0; j < env->stride*length/num_threads; j+=env->stride)
76 
77  if (env->adaptive)
78  for (vw_size_t j = 1; j < env->stride*length/num_threads; j+=env->stride)
79  weight_vectors[i][j] = 1;
80  }
81 }
82 
83 void CVwRegressor::dump_regressor(char* reg_name, bool as_text)
84 {
85  CIOBuffer io_temp;
86  int32_t f = io_temp.open_file(reg_name,'w');
87 
88  if (f < 0)
89  SG_SERROR("Can't open: %s for writing! Exiting.\n", reg_name)
90 
91  const char* vw_version = env->vw_version;
92  vw_size_t v_length = env->v_length;
93 
94  if (!as_text)
95  {
96  // Write version info
97  io_temp.write_file((char*)&v_length, sizeof(v_length));
98  io_temp.write_file(vw_version,v_length);
99 
100  // Write max and min labels
101  io_temp.write_file((char*)&env->min_label, sizeof(env->min_label));
102  io_temp.write_file((char*)&env->max_label, sizeof(env->max_label));
103 
104  // Write weight vector bits information
105  io_temp.write_file((char *)&env->num_bits, sizeof(env->num_bits));
106  io_temp.write_file((char *)&env->thread_bits, sizeof(env->thread_bits));
107 
108  // For paired namespaces forming quadratic features
109  int32_t len = env->pairs.get_num_elements();
110  io_temp.write_file((char *)&len, sizeof(len));
111 
112  for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
113  io_temp.write_file(env->pairs.get_element(k), 2);
114 
115  // ngram and skips information
116  io_temp.write_file((char*)&env->ngram, sizeof(env->ngram));
117  io_temp.write_file((char*)&env->skips, sizeof(env->skips));
118  }
119  else
120  {
121  // Write as human readable form
122  char buff[512];
123  int32_t len;
124 
125  len = sprintf(buff, "Version %s\n", vw_version);
126  io_temp.write_file(buff, len);
127  len = sprintf(buff, "Min label:%f max label:%f\n", env->min_label, env->max_label);
128  io_temp.write_file(buff, len);
129  len = sprintf(buff, "bits:%d thread_bits:%d\n", (int32_t)env->num_bits, (int32_t)env->thread_bits);
130  io_temp.write_file(buff, len);
131 
132  if (env->pairs.get_num_elements() > 0)
133  {
134  len = sprintf(buff, "\n");
135  io_temp.write_file(buff, len);
136  }
137 
138  len = sprintf(buff, "ngram:%d skips:%d\nindex:weight pairs:\n", (int32_t)env->ngram, (int32_t)env->skips);
139  io_temp.write_file(buff, len);
140  }
141 
142  uint32_t length = 1 << env->num_bits;
143  vw_size_t num_threads = env->num_threads();
144  vw_size_t stride = env->stride;
145 
146  // Write individual weights
147  for(uint32_t i = 0; i < length; i++)
148  {
149  float32_t v;
150  v = weight_vectors[i%num_threads][stride*(i/num_threads)];
151  if (v != 0.)
152  {
153  if (!as_text)
154  {
155  io_temp.write_file((char *)&i, sizeof (i));
156  io_temp.write_file((char *)&v, sizeof (v));
157  }
158  else
159  {
160  char buff[512];
161  int32_t len = sprintf(buff, "%d:%f\n", i, v);
162  io_temp.write_file(buff, len);
163  }
164  }
165  }
166 
167  io_temp.close_file();
168 }
169 
171 {
172  CIOBuffer source;
173  int32_t fd = source.open_file(file, 'r');
174 
175  if (fd < 0)
176  SG_SERROR("Unable to open file for loading regressor!\n")
177 
178  // Read version info
179  vw_size_t v_length;
180  source.read_file((char*)&v_length, sizeof(v_length));
181  char* t = SG_MALLOC(char, v_length);
182  source.read_file(t,v_length);
183  if (strcmp(t,env->vw_version) != 0)
184  {
185  SG_FREE(t);
186  SG_SERROR("Regressor source has an incompatible VW version!\n")
187  }
188  SG_FREE(t);
189 
190  // Read min and max label
191  source.read_file((char*)&env->min_label, sizeof(env->min_label));
192  source.read_file((char*)&env->max_label, sizeof(env->max_label));
193 
194  // Read num_bits, multiple sources are not supported
195  vw_size_t local_num_bits;
196  source.read_file((char *)&local_num_bits, sizeof(local_num_bits));
197 
198  if ((vw_size_t) env->num_bits != local_num_bits)
199  SG_SERROR("Wrong number of bits in regressor source!\n")
200 
201  env->num_bits = local_num_bits;
202 
203  vw_size_t local_thread_bits;
204  source.read_file((char*)&local_thread_bits, sizeof(local_thread_bits));
205 
206  env->thread_bits = local_thread_bits;
207 
208  int32_t len;
209  source.read_file((char *)&len, sizeof(len));
210 
211  // Read paired namespace information
212  DynArray<char*> local_pairs;
213  for (; len > 0; len--)
214  {
215  char pair[3];
216  source.read_file(pair, sizeof(char)*2);
217  pair[2]='\0';
218  local_pairs.push_back(pair);
219  }
220 
221  env->pairs = local_pairs;
222 
223  // Initialize the weight vector
224  if (weight_vectors)
225  SG_FREE(weight_vectors);
226  init(env);
227 
228  vw_size_t local_ngram;
229  source.read_file((char*)&local_ngram, sizeof(local_ngram));
230  vw_size_t local_skips;
231  source.read_file((char*)&local_skips, sizeof(local_skips));
232 
233  env->ngram = local_ngram;
234  env->skips = local_skips;
235 
236  // Read individual weights
237  vw_size_t stride = env->stride;
238  while (true)
239  {
240  uint32_t hash;
241  ssize_t hash_bytes = source.read_file((char *)&hash, sizeof(hash));
242  if (hash_bytes <= 0)
243  break;
244 
245  float32_t w = 0.;
246  ssize_t weight_bytes = source.read_file((char *)&w, sizeof(float32_t));
247  if (weight_bytes <= 0)
248  break;
249 
250  vw_size_t num_threads = env->num_threads();
251 
252  weight_vectors[hash % num_threads][(hash*stride)/num_threads]
253  = weight_vectors[hash % num_threads][(hash*stride)/num_threads] + w;
254  }
255  source.close_file();
256 }

SHOGUN Machine Learning Toolbox - Documentation