56 substring example_string = {line, line + num_chars};
62 tokenize(
'|', example_string, channels);
68 feature_start = &channels[0];
74 if (tab_location != label_space.
end)
75 label_space.
start = tab_location+1;
79 if (words.index() > 0 && words.last().end == label_space.
end)
92 for (
substring* i = feature_start; i != channels.end; i++)
97 if (words.begin == words.end)
106 bool new_index =
false;
109 if (channel.
start[0] !=
' ')
115 if (name.index() > 0)
117 index = (
unsigned char)(*name[0].start);
129 index = (
unsigned char)
' ';
138 for (
substring* j = words.begin+feature_offset; j != words.end; j++)
153 if (new_index && ae->
atomics[index].begin != ae->
atomics[index].end)
167 int32_t num_chars = buf->
read_line(line);
172 substring example_string = {line, line + num_chars};
175 tokenize(
' ', example_string, words);
189 for (
substring* i = feature_start; i != words.end; i++)
209 int32_t num_chars = buf->
read_line(line);
214 substring example_string = {line, line + num_chars};
217 tokenize(
' ', example_string, words);
232 for (
substring* i = feature_start; i != words.end; i++)
250 char* file_name = fname;
251 char default_cache_name[] =
"vw_cache.dat.cache";
254 file_name = default_cache_name;
265 SG_ERROR(
"Protocol buffers cache support is not implemented yet.\n")
268 SG_ERROR(
"Unexpected cache type specified!\n")
276 switch (feat_name.
index())
286 SG_SERROR(
"error NaN value for feature %s! Terminating!\n",
290 SG_SERROR(
"Examples with a weird name, i.e., '%s'\n",
298 char *last = s.
start;
301 if (*s.
start == delim)
uint32_t vw_size_t
vw_size_t typedef to work across platforms
ssize_t read_line(char *&pointer)
void feature_value(substring &s, v_array< substring > &name, float32_t &v)
char * safe_index(char *start, char v, char *max)
const uint32_t hash_base
Seed for hash.
void push_many(const T *new_elem, size_t num)
Class CVwEnvironment is the environment used by VW.
int32_t read_features(CIOBuffer *buf, VwExample *&ex)
CVwEnvironment * env
Environment of VW - used by parser.
Class v_array taken directly from JL's implementation.
void set_minmax(float64_t label)
CVwCacheWriter * cache_writer
Object which will be used for writing cache.
int32_t read_dense_features(CIOBuffer *buf, VwExample *&ae)
float64_t sum_feat_sq[256]
Sum of square of features.
struct Substring, specified by start position and end position.
void tokenize(char delim, substring s, v_array< substring > &ret)
void push(const T &new_elem)
bool write_cache
Whether to write cache or not.
float32_t label
Label value.
v_array< vw_size_t > indices
Array of namespaces.
float32_t weight
Weight of example.
Class SGObject is the base class of all shogun objects.
static char * c_string_of_substring(substring s)
int32_t read_svmlight_features(CIOBuffer *buf, VwExample *&ae)
void label_from_substring(v_array< substring > &words)
static float32_t float_of_substring(substring s)
vw_size_t mask
Mask used for hashing.
EVwCacheType cache_type
Type of cache.
float32_t initial
Initial approximation.
virtual void cache_example(VwExample *&ex)=0
static uint32_t MurmurHashString(substring s, uint32_t h)
all of classes and functions are contained in the shogun namespace
static int is_nan(double f)
checks whether a float is nan
VwLabel * ld
Label object.
void init_cache(char *fname, EVwCacheType type=C_NATIVE)
hash_func_t hasher
Hash function to use, of type hash_func_t.
Class CVwNativeCacheWriter writes a cache exactly as that which would be produced by VW's default cac...
v_array< VwFeature > atomics[256]
Array of features.