SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
NGramTokenizer.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Evangelos Anagnostopoulos
8  * Copyright (C) 2013 Evangelos Anagnostopoulos
9  */
10 
11 #ifndef _NGRAMTOKENIZER__H__
12 #define _NGRAMTOKENIZER__H__
13 
14 #include <shogun/lib/Tokenizer.h>
15 
16 namespace shogun
17 {
18 class CTokenizer;
19 
24 {
25 public:
30  CNGramTokenizer(int32_t ns=3);
31 
36  CNGramTokenizer(const CNGramTokenizer& orig);
37 
39  virtual ~CNGramTokenizer() {}
40 
45  virtual void set_text(SGVector<char> txt);
46 
52  virtual bool has_next();
53 
60  virtual index_t next_token_idx(index_t& start);
61 
67  virtual const char* get_name() const;
68 
69  virtual CNGramTokenizer* get_copy();
70 
71 private:
72  void init();
73 
74 protected:
75 
77  int32_t n;
78 
81 };
82 }
83 #endif /* _NGRAMTOKENIZER__H__ */
84 

SHOGUN Machine Learning Toolbox - Documentation