SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
VwNativeCacheWriter.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights
3  * embodied in the content of this file are licensed under the BSD
4  * (revised) open source license.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Written (W) 2011 Shashwat Lal Das
12  * Adaptation of Vowpal Wabbit v5.1.
13  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
14  */
15 
17 
18 using namespace shogun;
19 
21  : CVwCacheWriter()
22 {
23  init();
24 }
25 
27  : CVwCacheWriter(fname, env_to_use)
28 {
29  init();
30  buf.use_file(fd);
31 
32  write_header();
33 }
34 
36 {
37  buf.flush();
38  buf.close_file();
39 }
40 
42 {
43  if (fd > 0)
44  {
45  buf.flush();
46  buf.close_file();
47  }
48 
49  fd = f;
50  buf.use_file(fd);
51 
52  write_header();
53 }
54 
55 void CVwNativeCacheWriter::init()
56 {
57  neg_1 = 1;
58  general = 2;
59  int_size = 6;
60 }
61 
62 void CVwNativeCacheWriter::write_header()
63 {
64  const char* vw_version = env->vw_version;
65  vw_size_t numbits = env->num_bits;
66  vw_size_t v_length = 4;
67 
68  // Version and numbits info
69  buf.write_file(&v_length, sizeof(vw_size_t));
70  buf.write_file(vw_version,v_length);
71  buf.write_file(&numbits, sizeof(vw_size_t));
72 }
73 
74 char* CVwNativeCacheWriter::run_len_encode(char *p, vw_size_t i)
75 {
76  while (i >= 128)
77  {
78  *(p++) = (i & 127) | 128;
79  i = i >> 7;
80  }
81  *(p++) = (i & 127);
82 
83  return p;
84 }
85 
86 char* CVwNativeCacheWriter::bufcache_label(VwLabel* ld, char* c)
87 {
88  *(float32_t*)c = ld->label;
89  c += sizeof(ld->label);
90  *(float32_t*)c = ld->weight;
91  c += sizeof(ld->weight);
92  *(float32_t*)c = ld->initial;
93  c += sizeof(ld->initial);
94  return c;
95 }
96 
97 void CVwNativeCacheWriter::cache_label(VwLabel* ld)
98 {
99  char *c;
100  buf.buf_write(c, sizeof(ld->label)+sizeof(ld->weight)+sizeof(ld->initial));
101  c = bufcache_label(ld,c);
102 }
103 
104 void CVwNativeCacheWriter::cache_tag(v_array<char> tag)
105 {
106  // Store the size of the tag and the tag itself
107  char *c;
108 
109  buf.buf_write(c, sizeof(vw_size_t)+tag.index());
110  *(vw_size_t*)c = tag.index();
111  c += sizeof(vw_size_t);
112  memcpy(c, tag.begin, tag.index());
113  c += tag.index();
114 
115  buf.set(c);
116 }
117 
118 void CVwNativeCacheWriter::output_byte(unsigned char s)
119 {
120  char *c;
121 
122  buf.buf_write(c, 1);
123  *(c++) = s;
124  buf.set(c);
125 }
126 
127 void CVwNativeCacheWriter::output_features(unsigned char index, VwFeature* begin, VwFeature* end)
128 {
129  char* c;
130  vw_size_t storage = (end-begin) * int_size;
131  for (VwFeature* i = begin; i != end; i++)
132  if (i->x != 1. && i->x != -1.)
133  storage+=sizeof(float32_t);
134 
135  buf.buf_write(c, sizeof(index) + storage + sizeof(vw_size_t));
136  *(unsigned char*)c = index;
137  c += sizeof(index);
138 
139  char *storage_size_loc = c;
140  c += sizeof(vw_size_t);
141 
142  vw_size_t last = 0;
143 
144  // Store the differences in hashed feature indices
145  for (VwFeature* i = begin; i != end; i++)
146  {
147  int32_t s_diff = (i->weight_index - last);
148  vw_size_t diff = ZigZagEncode(s_diff) << 2;
149  last = i->weight_index;
150 
151  if (i->x == 1.)
152  c = run_len_encode(c, diff);
153  else if (i->x == -1.)
154  c = run_len_encode(c, diff | neg_1);
155  else
156  {
157  c = run_len_encode(c, diff | general);
158  *(float32_t*)c = i->x;
159  c += sizeof(float32_t);
160  }
161  }
162  buf.set(c);
163  *(vw_size_t*)storage_size_loc = c - storage_size_loc - sizeof(vw_size_t);
164 }
165 
167 {
168  cache_label(ex->ld);
169  cache_tag(ex->tag);
170  output_byte(ex->indices.index());
171  for (vw_size_t* b = ex->indices.begin; b != ex->indices.end; b++)
172  output_features(*b, ex->atomics[*b].begin,ex->atomics[*b].end);
173 }
174 
uint32_t vw_size_t
vw_size_t typedef to work across platforms
Definition: vw_constants.h:26
CVwCacheWriter is the base class for all VW cache creating classes.
Definition: VwCacheWriter.h:35
T * end
Pointer to last set element in the array.
Definition: v_array.h:160
T * begin
Pointer to first element of the array.
Definition: v_array.h:157
Class CVwEnvironment is the environment used by VW.
Definition: VwEnvironment.h:41
virtual void use_file(int fd)
Definition: IOBuffer.cpp:50
virtual void set_file(int32_t f)
CIOBuffer buf
IOBuffer used for writing.
vw_size_t num_bits
log_2 of the number of features
virtual bool close_file()
Definition: IOBuffer.cpp:126
float32_t label
Label value.
Definition: vw_label.h:92
v_array< vw_size_t > indices
Array of namespaces.
Definition: vw_example.h:84
virtual ssize_t write_file(const void *buf, size_t nbytes)
Definition: IOBuffer.cpp:110
float32_t weight
Weight of example.
Definition: vw_label.h:94
Class VwLabel holds a label object used by VW.
Definition: vw_label.h:34
CVwEnvironment * env
Environment.
Definition: VwCacheWriter.h:99
virtual void flush()
Definition: IOBuffer.cpp:115
v_array< char > tag
Tag.
Definition: vw_example.h:82
Example class for VW.
Definition: vw_example.h:58
int32_t fd
File descriptor.
Definition: VwCacheWriter.h:96
void buf_write(char *&pointer, int n)
Definition: IOBuffer.cpp:170
float float32_t
Definition: common.h:49
float32_t initial
Initial approximation.
Definition: vw_label.h:96
One feature in VW.
Definition: vw_example.h:34
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
VwLabel * ld
Label object.
Definition: vw_example.h:79
const char * vw_version
VW version.
void set(char *p)
Definition: IOBuffer.cpp:82
v_array< VwFeature > atomics[256]
Array of features.
Definition: vw_example.h:86
virtual void cache_example(VwExample *&ex)

SHOGUN Machine Learning Toolbox - Documentation