Set.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2012 Evgeniy Andreev (gsomix)
00008  *
00009  * Copyright (C) 2012 Evgeniy Andreev (gsomix)
00010  */
00011 
00012 #ifndef _SET_H_
00013 #define _SET_H_
00014 
00015 #include <shogun/base/SGObject.h>
00016 #include <shogun/lib/common.h>
00017 #include <shogun/lib/Hash.h>
00018 #include <shogun/base/DynArray.h>
00019 
00020 #include <cstdio>
00021 
00022 namespace shogun
00023 {
00024 
00025 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00026 
00027 template<class T> struct CSetNode
00028 {   
00030     int32_t index;
00031 
00033     bool free;
00034 
00036     T element;
00037 
00039     CSetNode<T> *left;
00040 
00042     CSetNode<T> *right;
00043 };
00044 #endif
00045 
00049 template<class T> class CSet: public CSGObject
00050 {
00051 public:
00053     CSet(int32_t size=41, int32_t reserved=128, bool tracable=true)
00054     {   
00055         hash_size=size;
00056         free_index=0;
00057         num_elements=0;
00058         use_sg_mallocs=tracable;
00059 
00060         if (use_sg_mallocs)
00061             hash_array=SG_CALLOC(CSetNode<T>*, size);
00062         else
00063             hash_array=(CSetNode<T>**) calloc(size, sizeof(CSetNode<T>*));
00064 
00065         for (int32_t i=0; i<size; i++)
00066         {
00067             hash_array[i]=NULL;
00068         }
00069 
00070         array=new DynArray<CSetNode<T>* >(reserved, tracable);
00071     }
00072 
00074     virtual ~CSet()
00075     {
00076         if (array!=NULL)
00077         {
00078             for(int32_t i=0; i<array->get_num_elements(); i++)
00079             {
00080                 if (array->get_element(i)!=NULL)
00081                 {
00082                     if (use_sg_mallocs)
00083                         SG_FREE(array->get_element(i));
00084                     else
00085                         free(array->get_element(i));
00086                 }
00087             }
00088             delete array;
00089         }
00090 
00091         if (hash_array!=NULL)
00092         {
00093             if (use_sg_mallocs)
00094                 SG_FREE(hash_array);
00095             else
00096                 free(hash_array);
00097         }
00098     }
00099 
00101     virtual const char* get_name() const { return "Set"; }
00102 
00107     void add(const T& element)
00108     {
00109         int32_t index=hash(element);
00110         if (chain_search(index, element)==NULL)
00111         {
00112             insert_key(index, element);
00113             num_elements++;
00114         }
00115     }
00116 
00121     bool contains(const T& element)
00122     {
00123         int32_t index=hash(element);
00124         if (chain_search(index, element)!=NULL)
00125             return true; 
00126 
00127         return false;
00128     }
00129 
00134     void remove(const T& element)
00135     {
00136         int32_t index=hash(element);
00137         CSetNode<T>* result=chain_search(index, element);
00138 
00139         if (result!=NULL)       
00140         {
00141             delete_key(index, result);
00142             num_elements--;
00143         }
00144     }
00145 
00151     int32_t index_of(const T& element)
00152     {
00153         int32_t index=hash(element);
00154         CSetNode<T>* result=chain_search(index, element);
00155 
00156         if (result!=NULL)
00157              return result->index;
00158         
00159         return -1;
00160     }
00161 
00166     int32_t get_num_elements() const
00167     {
00168         return num_elements;
00169     }
00170 
00175     int32_t get_array_size() const
00176     {
00177         return array->get_num_elements();
00178     }
00179 
00187     T* get_element_ptr(int32_t index)
00188     {
00189         if (array->get_element(index)!=NULL)
00190             return &(array->get_element(index)->element);
00191         return NULL;
00192     }
00193 
00201     CSetNode<T>* get_node_ptr(int32_t index)
00202     {
00203         return array->get_element(index);
00204     }
00205         
00207     CSetNode<T>** get_array()
00208     {
00209         return array->get_array();
00210     }
00211 
00212 private:
00216     int32_t hash(const T& element)
00217     {
00218         return CHash::MurmurHash3((uint8_t*)(&element), sizeof(element), 0xDEADBEEF) % hash_size;
00219     }
00220 
00222     bool is_free(CSetNode<T>* node)
00223     {
00224         if (node->free==true)
00225             return true;
00226 
00227         return false;
00228     }
00229 
00231     CSetNode<T>* chain_search(int32_t index, const T& element)
00232     {
00233         if (hash_array[index]==NULL)
00234         {
00235             return NULL;
00236         }
00237         else
00238         {
00239             CSetNode<T>* current=hash_array[index];
00240 
00241             do // iterating all items in the list
00242             {
00243                 if (current->element==element)
00244                     return current; // it's a search key
00245 
00246                 current=current->right;
00247 
00248             } while (current!=NULL);
00249 
00250             return NULL;
00251         }
00252     }
00253     
00255     void insert_key(int32_t index, const T& element)
00256     {
00257         int32_t new_index;
00258         CSetNode<T>* new_node;
00259 
00260         if ((free_index>=array->get_num_elements()) || (array->get_element(free_index)==NULL))
00261         {
00262             // init new node
00263             if (use_sg_mallocs)
00264                 new_node=SG_CALLOC(CSetNode<T>, 1);
00265             else
00266                 new_node=(CSetNode<T>*) calloc(1, sizeof(CSetNode<T>));
00267 
00268             array->append_element(new_node);
00269 
00270             new_index=free_index;
00271             free_index++;
00272         }
00273         else
00274         {
00275             new_node=array->get_element(free_index);
00276             ASSERT(is_free(new_node));
00277 
00278             new_index=free_index;
00279             free_index=new_node->index;
00280         }
00281 
00282         new_node->index=new_index;
00283         new_node->free=false;
00284         new_node->element=element;
00285         new_node->left=NULL;
00286         new_node->right=NULL;
00287 
00288         // add new node in start of list
00289         if (hash_array[index]==NULL)
00290         {
00291             hash_array[index]=new_node;
00292         }
00293         else
00294         {
00295             hash_array[index]->left=new_node;
00296             new_node->right=hash_array[index];
00297 
00298             hash_array[index]=new_node;
00299         }
00300     }
00301 
00303     void delete_key(int32_t index, CSetNode<T>* node)
00304     {       
00305         int32_t temp=0;
00306 
00307         if (node==NULL)
00308             return;
00309 
00310         if (node->right!=NULL)
00311             node->right->left = node->left;
00312 
00313         if (node->left!=NULL)
00314             node->left->right = node->right;        
00315         else
00316             hash_array[index] = node->right;
00317 
00318         temp=node->index;
00319 
00320         node->index=free_index;
00321         node->free=true;
00322         node->left=NULL;
00323         node->right=NULL;
00324 
00325         free_index=temp;        
00326     }
00327 
00328 
00329 protected:
00331     bool use_sg_mallocs;
00332 
00334     int32_t hash_size;
00335 
00337     int32_t free_index;
00338 
00340     int32_t num_elements;
00341 
00343     CSetNode<T>** hash_array;
00344 
00346     DynArray<CSetNode<T>*>* array;
00347 };
00348 
00349 }
00350 
00351 #endif /* _MAP_H_ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation