SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Tokenizer.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Evangelos Anagnostopoulos
8  * Copyright (C) 2013 Evangelos Anagnostopoulos
9  */
10 
11 #ifndef _TOKENIZER__H__
12 #define _TOKENIZER__H__
13 
14 #include <shogun/lib/config.h>
15 
16 #include <shogun/base/SGObject.h>
17 #include <shogun/lib/SGString.h>
18 #include <shogun/lib/SGVector.h>
19 
20 namespace shogun
21 {
22 class CSGObject;
23 template<class T> class SGVector;
24 
29 class CTokenizer: public CSGObject
30 {
31 public:
33  CTokenizer();
34 
36  CTokenizer(const CTokenizer& orig);
37 
39  virtual ~CTokenizer() { };
40 
45  virtual void set_text(SGVector<char> txt);
46 
52  virtual bool has_next()=0;
53 
60  virtual index_t next_token_idx(index_t& start)=0;
61 
66  virtual CTokenizer* get_copy()=0;
67 
68 private:
69  void init();
70 
71 protected:
74 };
75 }
76 
77 #endif /* _TOKENIZER__H__ */
int32_t index_t
Definition: common.h:62
virtual void set_text(SGVector< char > txt)
Definition: Tokenizer.cpp:17
virtual ~CTokenizer()
Definition: Tokenizer.h:39
SGVector< char > text
Definition: Tokenizer.h:73
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115
The class CTokenizer acts as a base class in order to implement tokenizers. Sub-classes must implemen...
Definition: Tokenizer.h:29
virtual bool has_next()=0
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual index_t next_token_idx(index_t &start)=0
virtual CTokenizer * get_copy()=0

SHOGUN Machine Learning Toolbox - Documentation