VwNativeCacheWriter.cpp

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2009 Yahoo! Inc.  All rights reserved.  The copyrights
00003  * embodied in the content of this file are licensed under the BSD
00004  * (revised) open source license.
00005  *
00006  * This program is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 3 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * Written (W) 2011 Shashwat Lal Das
00012  * Adaptation of Vowpal Wabbit v5.1.
00013  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
00014  */
00015 
00016 #include <shogun/classifier/vw/cache/VwNativeCacheWriter.h>
00017 
00018 using namespace shogun;
00019 
00020 CVwNativeCacheWriter::CVwNativeCacheWriter()
00021     : CVwCacheWriter()
00022 {
00023     init();
00024 }
00025 
00026 CVwNativeCacheWriter::CVwNativeCacheWriter(char * fname, CVwEnvironment* env_to_use)
00027     : CVwCacheWriter(fname, env_to_use)
00028 {
00029     init();
00030     buf.use_file(fd);
00031 
00032     write_header();
00033 }
00034 
00035 CVwNativeCacheWriter::~CVwNativeCacheWriter()
00036 {
00037     buf.flush();
00038     buf.close_file();
00039 }
00040 
00041 void CVwNativeCacheWriter::set_file(int32_t f)
00042 {
00043     if (fd > 0)
00044     {
00045         buf.flush();
00046         buf.close_file();
00047     }
00048 
00049     fd = f;
00050     buf.use_file(fd);
00051 
00052     write_header();
00053 }
00054 
00055 void CVwNativeCacheWriter::init()
00056 {
00057     neg_1 = 1;
00058     general = 2;
00059     int_size = 6;
00060 }
00061 
00062 void CVwNativeCacheWriter::write_header()
00063 {
00064     const char* vw_version = env->vw_version;
00065     vw_size_t numbits = env->num_bits;
00066     vw_size_t v_length = 4;
00067 
00068     // Version and numbits info
00069     buf.write_file(&v_length, sizeof(vw_size_t));
00070     buf.write_file(vw_version,v_length);
00071     buf.write_file(&numbits, sizeof(vw_size_t));
00072 }
00073 
00074 char* CVwNativeCacheWriter::run_len_encode(char *p, vw_size_t i)
00075 {
00076     while (i >= 128)
00077     {
00078         *(p++) = (i & 127) | 128;
00079         i = i >> 7;
00080     }
00081     *(p++) = (i & 127);
00082 
00083     return p;
00084 }
00085 
00086 char* CVwNativeCacheWriter::bufcache_label(VwLabel* ld, char* c)
00087 {
00088     *(float32_t*)c = ld->label;
00089     c += sizeof(ld->label);
00090     *(float32_t*)c = ld->weight;
00091     c += sizeof(ld->weight);
00092     *(float32_t*)c = ld->initial;
00093     c += sizeof(ld->initial);
00094     return c;
00095 }
00096 
00097 void CVwNativeCacheWriter::cache_label(VwLabel* ld)
00098 {
00099     char *c;
00100     buf.buf_write(c, sizeof(ld->label)+sizeof(ld->weight)+sizeof(ld->initial));
00101     c = bufcache_label(ld,c);
00102 }
00103 
00104 void CVwNativeCacheWriter::cache_tag(v_array<char> tag)
00105 {
00106     // Store the size of the tag and the tag itself
00107     char *c;
00108 
00109     buf.buf_write(c, sizeof(vw_size_t)+tag.index());
00110     *(vw_size_t*)c = tag.index();
00111     c += sizeof(vw_size_t);
00112     memcpy(c, tag.begin, tag.index());
00113     c += tag.index();
00114 
00115     buf.set(c);
00116 }
00117 
00118 void CVwNativeCacheWriter::output_byte(unsigned char s)
00119 {
00120     char *c;
00121 
00122     buf.buf_write(c, 1);
00123     *(c++) = s;
00124     buf.set(c);
00125 }
00126 
00127 void CVwNativeCacheWriter::output_features(unsigned char index, VwFeature* begin, VwFeature* end)
00128 {
00129     char* c;
00130     vw_size_t storage = (end-begin) * int_size;
00131     for (VwFeature* i = begin; i != end; i++)
00132         if (i->x != 1. && i->x != -1.)
00133             storage+=sizeof(float32_t);
00134 
00135     buf.buf_write(c, sizeof(index) + storage + sizeof(vw_size_t));
00136     *(unsigned char*)c = index;
00137     c += sizeof(index);
00138 
00139     char *storage_size_loc = c;
00140     c += sizeof(vw_size_t);
00141 
00142     vw_size_t last = 0;
00143 
00144     // Store the differences in hashed feature indices
00145     for (VwFeature* i = begin; i != end; i++)
00146     {
00147         int32_t s_diff = (i->weight_index - last);
00148         vw_size_t diff = ZigZagEncode(s_diff) << 2;
00149         last = i->weight_index;
00150 
00151         if (i->x == 1.)
00152             c = run_len_encode(c, diff);
00153         else if (i->x == -1.)
00154             c = run_len_encode(c, diff | neg_1);
00155         else
00156         {
00157             c = run_len_encode(c, diff | general);
00158             *(float32_t*)c = i->x;
00159             c += sizeof(float32_t);
00160         }
00161     }
00162     buf.set(c);
00163     *(vw_size_t*)storage_size_loc = c - storage_size_loc - sizeof(vw_size_t);
00164 }
00165 
00166 void CVwNativeCacheWriter::cache_example(VwExample* &ex)
00167 {
00168     cache_label(ex->ld);
00169     cache_tag(ex->tag);
00170     output_byte(ex->indices.index());
00171     for (vw_size_t* b = ex->indices.begin; b != ex->indices.end; b++)
00172         output_features(*b, ex->atomics[*b].begin,ex->atomics[*b].end);
00173 }
00174 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation