SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
VwParser.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights
3  * embodied in the content of this file are licensed under the BSD
4  * (revised) open source license.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Written (W) 2011 Shashwat Lal Das
12  * Adaptation of Vowpal Wabbit v5.1.
13  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
14  */
15 
16 #ifndef _VW_PARSER_H__
17 #define _VW_PARSER_H__
18 
19 #include <shogun/base/SGObject.h>
20 #include <shogun/io/SGIO.h>
21 #include <shogun/lib/Hash.h>
24 
25 namespace shogun
26 {
29 {
30  T_VW = 1,
32  T_DENSE = 3
33 };
34 
46 class CVwParser: public CSGObject
47 {
48 public:
52  CVwParser();
53 
59  CVwParser(CVwEnvironment* env_to_use);
60 
64  virtual ~CVwParser();
65 
72  {
73  SG_REF(env);
74  return env;
75  }
76 
82  void set_env(CVwEnvironment* env_to_use)
83  {
84  env = env_to_use;
85  SG_REF(env);
86  }
87 
94  void set_cache_parameters(char * fname, EVwCacheType type = C_NATIVE)
95  {
96  init_cache(fname, type);
97  }
98 
105  {
106  return cache_type;
107  }
108 
114  void set_write_cache(bool wr_cache)
115  {
116  write_cache = wr_cache;
117  if (wr_cache)
118  init_cache(NULL);
119  else
120  if (cache_writer)
122  }
123 
130  {
131  return write_cache;
132  }
133 
139  void set_mm(float64_t label)
140  {
141  env->min_label = CMath::min(env->min_label, label);
142  if (label != FLT_MAX)
143  env->max_label = CMath::max(env->max_label, label);
144  }
145 
152  void noop_mm(float64_t label) { }
153 
160  void set_minmax(float64_t label)
161  {
162  set_mm(label);
163  }
164 
173  int32_t read_features(CIOBuffer* buf, VwExample*& ex);
174 
183  int32_t read_svmlight_features(CIOBuffer* buf, VwExample*& ae);
184 
193  int32_t read_dense_features(CIOBuffer* buf, VwExample*& ae);
194 
200  virtual const char* get_name() const { return "VwParser"; }
201 
202 protected:
209  void init_cache(char * fname, EVwCacheType type = C_NATIVE);
210 
220 
229  void tokenize(char delim, substring s, v_array<substring> &ret);
230 
241  inline char* safe_index(char *start, char v, char *max)
242  {
243  while (start != max && *start != v)
244  start++;
245  return start;
246  }
247 
248 public:
251 
252 protected:
261 
262 private:
264  v_array<substring> channels;
265  v_array<substring> words;
266  v_array<substring> name;
267 };
268 
269 }
270 #endif // _VW_PARSER_H__

SHOGUN Machine Learning Toolbox - Documentation