SHOGUN
v3.0.0
Main Page
Related Pages
Modules
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
src
shogun
lib
DelimiterTokenizer.h
Go to the documentation of this file.
1
/*
2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 3 of the License, or
5
* (at your option) any later version.
6
*
7
* Written (W) 2013 Evangelos Anagnostopoulos
8
* Copyright (C) 2013 Evangelos Anagnostopoulos
9
*/
10
11
#ifndef _DELIMITERTOKENIZER__H__
12
#define _DELIMITERTOKENIZER__H__
13
14
#include <
shogun/lib/Tokenizer.h
>
15
16
namespace
shogun
17
{
18
class
CTokenizer;
19
26
class
CDelimiterTokenizer
:
public
CTokenizer
27
{
28
public
:
33
CDelimiterTokenizer
(
bool
skip_delimiters =
false
);
34
39
CDelimiterTokenizer
(
const
CDelimiterTokenizer
& orig);
40
42
virtual
~CDelimiterTokenizer
() {}
43
48
virtual
void
set_text
(
SGVector<char>
txt);
49
55
virtual
bool
has_next
();
56
65
virtual
index_t
next_token_idx
(
index_t
& start);
66
72
virtual
const
char
*
get_name
()
const
;
73
77
void
init_for_whitespace
();
78
79
CDelimiterTokenizer
*
get_copy
();
80
82
void
clear_delimiters
();
83
88
bool
get_skip_delimiters
()
const
;
89
94
void
set_skip_delimiters
(
bool
skip_delimiters);
95
96
private
:
97
void
init();
98
99
public
:
101
SGVector<bool>
delimiters
;
102
103
protected
:
105
index_t
last_idx
;
106
108
bool
skip_consecutive_delimiters
;
109
};
110
}
111
#endif
/* _WHITESPACETOKENIZER__H__ */
112
SHOGUN
Machine Learning Toolbox - Documentation