SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
VwRegressor.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights
3  * embodied in the content of this file are licensed under the BSD
4  * (revised) open source license.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Written (W) 2011 Shashwat Lal Das
12  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
13  */
14 
17 #include <shogun/io/IOBuffer.h>
18 
19 using namespace shogun;
20 
22  : CSGObject()
23 {
24  weight_vectors = NULL;
25  loss = new CSquaredLoss();
26  init(NULL);
27 }
28 
30  : CSGObject()
31 {
32  weight_vectors = NULL;
33  loss = new CSquaredLoss();
34  init(env_to_use);
35 }
36 
38 {
40  SG_UNREF(loss);
41  SG_UNREF(env);
42 }
43 
44 void CVwRegressor::init(CVwEnvironment* env_to_use)
45 {
46  if (!env_to_use)
47  env_to_use = new CVwEnvironment();
48 
49  env = env_to_use;
50  SG_REF(env);
51 
52  // For each feature, there should be 'stride' number of
53  // elements in the weight vector
54  vw_size_t length = ((vw_size_t) 1) << env->num_bits;
55  env->thread_mask = (env->stride * (length >> env->thread_bits)) - 1;
56 
57  // Only one learning thread for now
58  vw_size_t num_threads = 1;
59  weight_vectors = SG_MALLOC(float32_t*, num_threads);
60 
61  for (vw_size_t i = 0; i < num_threads; i++)
62  {
63  weight_vectors[i] = SG_CALLOC(float32_t, env->stride * length / num_threads);
64 
65  if (env->random_weights)
66  {
67  for (vw_size_t j = 0; j < length/num_threads; j++)
68  weight_vectors[i][j] = CMath::random(-0.5, 0.5);
69  }
70 
71  if (env->initial_weight != 0.)
72  for (vw_size_t j = 0; j < env->stride*length/num_threads; j+=env->stride)
74 
75  if (env->adaptive)
76  for (vw_size_t j = 1; j < env->stride*length/num_threads; j+=env->stride)
77  weight_vectors[i][j] = 1;
78  }
79 }
80 
81 void CVwRegressor::dump_regressor(char* reg_name, bool as_text)
82 {
83  CIOBuffer io_temp;
84  int32_t f = io_temp.open_file(reg_name,'w');
85 
86  if (f < 0)
87  SG_SERROR("Can't open: %s for writing! Exiting.\n", reg_name);
88 
89  const char* vw_version = env->vw_version;
90  vw_size_t v_length = env->v_length;
91 
92  if (!as_text)
93  {
94  // Write version info
95  io_temp.write_file((char*)&v_length, sizeof(v_length));
96  io_temp.write_file(vw_version,v_length);
97 
98  // Write max and min labels
99  io_temp.write_file((char*)&env->min_label, sizeof(env->min_label));
100  io_temp.write_file((char*)&env->max_label, sizeof(env->max_label));
101 
102  // Write weight vector bits information
103  io_temp.write_file((char *)&env->num_bits, sizeof(env->num_bits));
104  io_temp.write_file((char *)&env->thread_bits, sizeof(env->thread_bits));
105 
106  // For paired namespaces forming quadratic features
107  int32_t len = env->pairs.get_num_elements();
108  io_temp.write_file((char *)&len, sizeof(len));
109 
110  for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
111  io_temp.write_file(env->pairs.get_element(k), 2);
112 
113  // ngram and skips information
114  io_temp.write_file((char*)&env->ngram, sizeof(env->ngram));
115  io_temp.write_file((char*)&env->skips, sizeof(env->skips));
116  }
117  else
118  {
119  // Write as human readable form
120  char buff[512];
121  int32_t len;
122 
123  len = sprintf(buff, "Version %s\n", vw_version);
124  io_temp.write_file(buff, len);
125  len = sprintf(buff, "Min label:%f max label:%f\n", env->min_label, env->max_label);
126  io_temp.write_file(buff, len);
127  len = sprintf(buff, "bits:%d thread_bits:%d\n", (int32_t)env->num_bits, (int32_t)env->thread_bits);
128  io_temp.write_file(buff, len);
129 
130  if (env->pairs.get_num_elements() > 0)
131  {
132  len = sprintf(buff, "\n");
133  io_temp.write_file(buff, len);
134  }
135 
136  len = sprintf(buff, "ngram:%d skips:%d\nindex:weight pairs:\n", (int32_t)env->ngram, (int32_t)env->skips);
137  io_temp.write_file(buff, len);
138  }
139 
140  uint32_t length = 1 << env->num_bits;
141  vw_size_t num_threads = env->num_threads();
142  vw_size_t stride = env->stride;
143 
144  // Write individual weights
145  for(uint32_t i = 0; i < length; i++)
146  {
147  float32_t v;
148  v = weight_vectors[i%num_threads][stride*(i/num_threads)];
149  if (v != 0.)
150  {
151  if (!as_text)
152  {
153  io_temp.write_file((char *)&i, sizeof (i));
154  io_temp.write_file((char *)&v, sizeof (v));
155  }
156  else
157  {
158  char buff[512];
159  int32_t len = sprintf(buff, "%d:%f\n", i, v);
160  io_temp.write_file(buff, len);
161  }
162  }
163  }
164 
165  io_temp.close_file();
166 }
167 
169 {
170  CIOBuffer source;
171  int32_t fd = source.open_file(file, 'r');
172 
173  if (fd < 0)
174  SG_SERROR("Unable to open file for loading regressor!\n");
175 
176  // Read version info
177  vw_size_t v_length;
178  source.read_file((char*)&v_length, sizeof(v_length));
179  char* t = SG_MALLOC(char, v_length);
180  source.read_file(t,v_length);
181  if (strcmp(t,env->vw_version) != 0)
182  {
183  SG_FREE(t);
184  SG_SERROR("Regressor source has an incompatible VW version!\n");
185  }
186  SG_FREE(t);
187 
188  // Read min and max label
189  source.read_file((char*)&env->min_label, sizeof(env->min_label));
190  source.read_file((char*)&env->max_label, sizeof(env->max_label));
191 
192  // Read num_bits, multiple sources are not supported
193  vw_size_t local_num_bits;
194  source.read_file((char *)&local_num_bits, sizeof(local_num_bits));
195 
196  if ((vw_size_t) env->num_bits != local_num_bits)
197  SG_SERROR("Wrong number of bits in regressor source!\n");
198 
199  env->num_bits = local_num_bits;
200 
201  vw_size_t local_thread_bits;
202  source.read_file((char*)&local_thread_bits, sizeof(local_thread_bits));
203 
204  env->thread_bits = local_thread_bits;
205 
206  int32_t len;
207  source.read_file((char *)&len, sizeof(len));
208 
209  // Read paired namespace information
210  DynArray<char*> local_pairs;
211  for (; len > 0; len--)
212  {
213  char pair[3];
214  source.read_file(pair, sizeof(char)*2);
215  pair[2]='\0';
216  local_pairs.push_back(pair);
217  }
218 
219  env->pairs = local_pairs;
220 
221  // Initialize the weight vector
222  if (weight_vectors)
224  init(env);
225 
226  vw_size_t local_ngram;
227  source.read_file((char*)&local_ngram, sizeof(local_ngram));
228  vw_size_t local_skips;
229  source.read_file((char*)&local_skips, sizeof(local_skips));
230 
231  env->ngram = local_ngram;
232  env->skips = local_skips;
233 
234  // Read individual weights
235  vw_size_t stride = env->stride;
236  while (true)
237  {
238  uint32_t hash;
239  ssize_t hash_bytes = source.read_file((char *)&hash, sizeof(hash));
240  if (hash_bytes <= 0)
241  break;
242 
243  float32_t w = 0.;
244  ssize_t weight_bytes = source.read_file((char *)&w, sizeof(float32_t));
245  if (weight_bytes <= 0)
246  break;
247 
248  vw_size_t num_threads = env->num_threads();
249 
250  weight_vectors[hash % num_threads][(hash*stride)/num_threads]
251  = weight_vectors[hash % num_threads][(hash*stride)/num_threads] + w;
252  }
253  source.close_file();
254 }

SHOGUN Machine Learning Toolbox - Documentation