CVwParser is the object which provides the functions to parse examples from buffered input.
An instance of this class can be created in CStreamingVwFile and the appropriate read_*_features function called to parse examples from different formats.
It also encapsulates a CVwCacheWriter object which may be used in case a cache file is to be generated simultaneously with parsing.
Definition at line 46 of file VwParser.h.
Public Member Functions | |
CVwParser () | |
CVwParser (CVwEnvironment *env_to_use) | |
virtual | ~CVwParser () |
CVwEnvironment * | get_env () |
void | set_env (CVwEnvironment *env_to_use) |
void | set_cache_parameters (char *fname, EVwCacheType type=C_NATIVE) |
EVwCacheType | get_cache_type () |
void | set_write_cache (bool wr_cache) |
bool | get_write_cache () |
void | set_mm (float64_t label) |
void | noop_mm (float64_t label) |
void | set_minmax (float64_t label) |
int32_t | read_features (CIOBuffer *buf, VwExample *&ex) |
int32_t | read_svmlight_features (CIOBuffer *buf, VwExample *&ae) |
int32_t | read_dense_features (CIOBuffer *buf, VwExample *&ae) |
virtual const char * | get_name () const |
Public Attributes | |
hash_func_t | hasher |
Hash function to use, of type hash_func_t. | |
Protected Member Functions | |
void | init_cache (char *fname, EVwCacheType type=C_NATIVE) |
void | feature_value (substring &s, v_array< substring > &name, float32_t &v) |
void | tokenize (char delim, substring s, v_array< substring > &ret) |
char * | safe_index (char *start, char v, char *max) |
Protected Attributes | |
CVwEnvironment * | env |
Environment of VW - used by parser. | |
CVwCacheWriter * | cache_writer |
Object which will be used for writing cache. | |
EVwCacheType | cache_type |
Type of cache. | |
bool | write_cache |
Whether to write cache or not. |
CVwParser | ( | ) |
Default constructor
Definition at line 21 of file VwParser.cpp.
CVwParser | ( | CVwEnvironment * | env_to_use | ) |
Constructor taking environment as parameter.
env_to_use | CVwEnvironment to use |
Definition at line 30 of file VwParser.cpp.
~CVwParser | ( | ) | [virtual] |
Destructor
Definition at line 42 of file VwParser.cpp.
Get value of feature from a given substring. A default of 1 is assumed if no explicit value is specified.
s | substring, usually a feature:value string | |
name | returned array of substrings, split into name and value | |
v | value of feature, set by reference |
Definition at line 278 of file VwParser.cpp.
EVwCacheType get_cache_type | ( | ) |
Return the type of cache
Definition at line 104 of file VwParser.h.
CVwEnvironment* get_env | ( | ) |
Get the environment
Definition at line 71 of file VwParser.h.
virtual const char* get_name | ( | void | ) | const [virtual] |
Return the name of the object
Implements CSGObject.
Definition at line 200 of file VwParser.h.
bool get_write_cache | ( | ) |
Return whether cache will be written or not
Definition at line 129 of file VwParser.h.
void init_cache | ( | char * | fname, | |
EVwCacheType | type = C_NATIVE | |||
) | [protected] |
Initialize the cache writer
fname | cache file name | |
type | cache type as EVwCacheType, default is C_NATIVE |
Definition at line 255 of file VwParser.cpp.
void noop_mm | ( | float64_t | label | ) |
A dummy function performing no operation in case training is not to be performed.
label | label |
Definition at line 152 of file VwParser.h.
Read an example from a file with dense vectors
buf | IOBuffer which contains input | |
ae | parsed example |
Definition at line 213 of file VwParser.cpp.
Reads input from the buffer and parses it into a VwExample
buf | IOBuffer which contains input | |
ex | parsed example |
Definition at line 55 of file VwParser.cpp.
Read an example from an SVMLight file
buf | IOBuffer which contains input | |
ae | parsed example |
Definition at line 171 of file VwParser.cpp.
char* safe_index | ( | char * | start, | |
char | v, | |||
char * | max | |||
) | [protected] |
Get the index of a character in a memory location taking care not to go beyond the max pointer.
start | start memory location, char* | |
v | character to search for | |
max | last location to look in |
Definition at line 241 of file VwParser.h.
void set_cache_parameters | ( | char * | fname, | |
EVwCacheType | type = C_NATIVE | |||
) |
Set the cache parameters
fname | name of the cache file | |
type | type of cache as one in EVwCacheType |
Definition at line 94 of file VwParser.h.
void set_env | ( | CVwEnvironment * | env_to_use | ) |
Set the environment
env_to_use | environment as CVwEnvironment* |
Definition at line 82 of file VwParser.h.
void set_minmax | ( | float64_t | label | ) |
Function which is actually called to update min and max labels Should be set to one of the functions implemented for this.
label | label based on which to update |
Definition at line 160 of file VwParser.h.
void set_mm | ( | float64_t | label | ) |
Update min and max labels seen in the environment
label | current label based on which to update |
Definition at line 139 of file VwParser.h.
void set_write_cache | ( | bool | wr_cache | ) |
Set whether to write cache file or not
wr_cache | write cache or not |
Definition at line 114 of file VwParser.h.
Split a given substring into an array of substrings based on a specified delimiter
delim | delimiter to use | |
s | substring to tokenize | |
ret | array of substrings, returned |
Definition at line 302 of file VwParser.cpp.
EVwCacheType cache_type [protected] |
Type of cache.
Definition at line 258 of file VwParser.h.
CVwCacheWriter* cache_writer [protected] |
Object which will be used for writing cache.
Definition at line 256 of file VwParser.h.
CVwEnvironment* env [protected] |
Environment of VW - used by parser.
Definition at line 254 of file VwParser.h.
Hash function to use, of type hash_func_t.
Definition at line 250 of file VwParser.h.
bool write_cache [protected] |
Whether to write cache or not.
Definition at line 260 of file VwParser.h.