SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Tokenizer.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Evangelos Anagnostopoulos
8  * Copyright (C) 2013 Evangelos Anagnostopoulos
9  */
10 
11 #ifndef _TOKENIZER__H__
12 #define _TOKENIZER__H__
13 
14 #include <shogun/base/SGObject.h>
15 #include <shogun/lib/SGString.h>
16 #include <shogun/lib/SGVector.h>
17 
18 namespace shogun
19 {
20 class CSGObject;
21 template<class T> class SGVector;
22 
27 class CTokenizer: public CSGObject
28 {
29 public:
31  CTokenizer();
32 
34  CTokenizer(const CTokenizer& orig);
35 
37  virtual ~CTokenizer() { };
38 
43  virtual void set_text(SGVector<char> txt);
44 
50  virtual bool has_next()=0;
51 
58  virtual index_t next_token_idx(index_t& start)=0;
59 
64  virtual CTokenizer* get_copy()=0;
65 
66 private:
67  void init();
68 
69 protected:
72 };
73 }
74 
75 #endif /* _TOKENIZER__H__ */

SHOGUN Machine Learning Toolbox - Documentation