SHOGUN
v3.0.0
Main Page
Related Pages
Modules
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
src
shogun
lib
NGramTokenizer.h
Go to the documentation of this file.
1
/*
2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 3 of the License, or
5
* (at your option) any later version.
6
*
7
* Written (W) 2013 Evangelos Anagnostopoulos
8
* Copyright (C) 2013 Evangelos Anagnostopoulos
9
*/
10
11
#ifndef _NGRAMTOKENIZER__H__
12
#define _NGRAMTOKENIZER__H__
13
14
#include <
shogun/lib/Tokenizer.h
>
15
16
namespace
shogun
17
{
18
class
CTokenizer;
19
23
class
CNGramTokenizer
:
public
CTokenizer
24
{
25
public
:
30
CNGramTokenizer
(int32_t ns=3);
31
36
CNGramTokenizer
(
const
CNGramTokenizer
& orig);
37
39
virtual
~CNGramTokenizer
() {}
40
45
virtual
void
set_text
(
SGVector<char>
txt);
46
52
virtual
bool
has_next
();
53
60
virtual
index_t
next_token_idx
(
index_t
& start);
61
67
virtual
const
char
*
get_name
()
const
;
68
69
virtual
CNGramTokenizer
*
get_copy
();
70
71
private
:
72
void
init();
73
74
protected
:
75
77
int32_t
n
;
78
80
index_t
last_idx
;
81
};
82
}
83
#endif
/* _NGRAMTOKENIZER__H__ */
84
SHOGUN
Machine Learning Toolbox - Documentation