SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
NGramTokenizer.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Evangelos Anagnostopoulos
8  * Copyright (C) 2013 Evangelos Anagnostopoulos
9  */
10 
12 #include <shogun/base/Parameter.h>
13 
14 namespace shogun
15 {
16 
18 {
19  n = ns;
20  last_idx = 0;
21  init();
22 }
23 
25 : CTokenizer(orig)
26 {
28  n = orig.n;
29  init();
30 }
31 
32 void CNGramTokenizer::init()
33 {
34  SG_ADD(&n, "n", "Size of n-grams",
36  SG_ADD(&last_idx, "last_idx", "Index of last token",
38 }
39 
41 {
42  last_idx = 0;
44 }
45 
46 const char* CNGramTokenizer::get_name() const
47 {
48  return "NGramTokenizer";
49 }
50 
52 {
53  return last_idx<=text.size()-n;
54 }
55 
57 {
58  start = last_idx++;
59  return start + n;
60 }
61 
63 {
65  return t;
66 }
67 }

SHOGUN Machine Learning Toolbox - Documentation