Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include <shogun/classifier/vw/cache/VwNativeCacheWriter.h>
00017
00018 using namespace shogun;
00019
00020 CVwNativeCacheWriter::CVwNativeCacheWriter()
00021 : CVwCacheWriter()
00022 {
00023 init();
00024 }
00025
00026 CVwNativeCacheWriter::CVwNativeCacheWriter(char * fname, CVwEnvironment* env_to_use)
00027 : CVwCacheWriter(fname, env_to_use)
00028 {
00029 init();
00030 buf.use_file(fd);
00031
00032 write_header();
00033 }
00034
00035 CVwNativeCacheWriter::~CVwNativeCacheWriter()
00036 {
00037 buf.flush();
00038 buf.close_file();
00039 }
00040
00041 void CVwNativeCacheWriter::set_file(int32_t f)
00042 {
00043 if (fd > 0)
00044 {
00045 buf.flush();
00046 buf.close_file();
00047 }
00048
00049 fd = f;
00050 buf.use_file(fd);
00051
00052 write_header();
00053 }
00054
00055 void CVwNativeCacheWriter::init()
00056 {
00057 neg_1 = 1;
00058 general = 2;
00059 int_size = 6;
00060 }
00061
00062 void CVwNativeCacheWriter::write_header()
00063 {
00064 const char* vw_version = env->vw_version;
00065 vw_size_t numbits = env->num_bits;
00066 vw_size_t v_length = 4;
00067
00068
00069 buf.write_file(&v_length, sizeof(vw_size_t));
00070 buf.write_file(vw_version,v_length);
00071 buf.write_file(&numbits, sizeof(vw_size_t));
00072 }
00073
00074 char* CVwNativeCacheWriter::run_len_encode(char *p, vw_size_t i)
00075 {
00076 while (i >= 128)
00077 {
00078 *(p++) = (i & 127) | 128;
00079 i = i >> 7;
00080 }
00081 *(p++) = (i & 127);
00082
00083 return p;
00084 }
00085
00086 char* CVwNativeCacheWriter::bufcache_label(VwLabel* ld, char* c)
00087 {
00088 *(float32_t*)c = ld->label;
00089 c += sizeof(ld->label);
00090 *(float32_t*)c = ld->weight;
00091 c += sizeof(ld->weight);
00092 *(float32_t*)c = ld->initial;
00093 c += sizeof(ld->initial);
00094 return c;
00095 }
00096
00097 void CVwNativeCacheWriter::cache_label(VwLabel* ld)
00098 {
00099 char *c;
00100 buf.buf_write(c, sizeof(ld->label)+sizeof(ld->weight)+sizeof(ld->initial));
00101 c = bufcache_label(ld,c);
00102 }
00103
00104 void CVwNativeCacheWriter::cache_tag(v_array<char> tag)
00105 {
00106
00107 char *c;
00108
00109 buf.buf_write(c, sizeof(vw_size_t)+tag.index());
00110 *(vw_size_t*)c = tag.index();
00111 c += sizeof(vw_size_t);
00112 memcpy(c, tag.begin, tag.index());
00113 c += tag.index();
00114
00115 buf.set(c);
00116 }
00117
00118 void CVwNativeCacheWriter::output_byte(unsigned char s)
00119 {
00120 char *c;
00121
00122 buf.buf_write(c, 1);
00123 *(c++) = s;
00124 buf.set(c);
00125 }
00126
00127 void CVwNativeCacheWriter::output_features(unsigned char index, VwFeature* begin, VwFeature* end)
00128 {
00129 char* c;
00130 vw_size_t storage = (end-begin) * int_size;
00131 for (VwFeature* i = begin; i != end; i++)
00132 if (i->x != 1. && i->x != -1.)
00133 storage+=sizeof(float32_t);
00134
00135 buf.buf_write(c, sizeof(index) + storage + sizeof(vw_size_t));
00136 *(unsigned char*)c = index;
00137 c += sizeof(index);
00138
00139 char *storage_size_loc = c;
00140 c += sizeof(vw_size_t);
00141
00142 vw_size_t last = 0;
00143
00144
00145 for (VwFeature* i = begin; i != end; i++)
00146 {
00147 int32_t s_diff = (i->weight_index - last);
00148 vw_size_t diff = ZigZagEncode(s_diff) << 2;
00149 last = i->weight_index;
00150
00151 if (i->x == 1.)
00152 c = run_len_encode(c, diff);
00153 else if (i->x == -1.)
00154 c = run_len_encode(c, diff | neg_1);
00155 else
00156 {
00157 c = run_len_encode(c, diff | general);
00158 *(float32_t*)c = i->x;
00159 c += sizeof(float32_t);
00160 }
00161 }
00162 buf.set(c);
00163 *(vw_size_t*)storage_size_loc = c - storage_size_loc - sizeof(vw_size_t);
00164 }
00165
00166 void CVwNativeCacheWriter::cache_example(VwExample* &ex)
00167 {
00168 cache_label(ex->ld);
00169 cache_tag(ex->tag);
00170 output_byte(ex->indices.index());
00171 for (vw_size_t* b = ex->indices.begin; b != ex->indices.end; b++)
00172 output_features(*b, ex->atomics[*b].begin,ex->atomics[*b].end);
00173 }
00174