SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
ClusteringMutualInformation.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Chiyuan Zhang
8  * Copyright (C) 2012 Chiyuan Zhang
9  */
10 
11 #include <shogun/lib/SGVector.h>
14 
15 using namespace shogun;
16 
18 {
19  ASSERT(predicted && ground_truth)
20  ASSERT(predicted->get_label_type() == LT_MULTICLASS)
21  ASSERT(ground_truth->get_label_type() == LT_MULTICLASS)
22  SGVector<float64_t> label_p=((CMulticlassLabels*) predicted)->get_unique_labels();
23  SGVector<float64_t> label_g=((CMulticlassLabels*) ground_truth)->get_unique_labels();
24 
25  if (label_p.vlen != label_g.vlen)
26  SG_ERROR("Number of classes are different\n")
27  index_t n_class=label_p.vlen;
28  float64_t n_label=predicted->get_num_labels();
29 
30  SGVector<int32_t> ilabels_p=((CMulticlassLabels*) predicted)->get_int_labels();
31  SGVector<int32_t> ilabels_g=((CMulticlassLabels*) ground_truth)->get_int_labels();
32 
33  SGMatrix<float64_t> G(n_class, n_class);
34  for (index_t i=0; i < n_class; ++i)
35  {
36  for (index_t j=0; j < n_class; ++j)
37  G(i, j)=find_match_count(ilabels_g, label_g[i],
38  ilabels_p, label_p[j])/n_label;
39  }
40 
41  SGVector<float64_t> G_rowsum(n_class);
42  G_rowsum.zero();
43  SGVector<float64_t> G_colsum(n_class);
44  G_colsum.zero();
45  for (index_t i=0; i < n_class; ++i)
46  {
47  for (index_t j=0; j < n_class; ++j)
48  {
49  G_rowsum[i] += G(i, j);
50  G_colsum[i] += G(j, i);
51  }
52  }
53 
54  float64_t mutual_info = 0;
55  for (index_t i=0; i < n_class; ++i)
56  {
57  for (index_t j=0; j < n_class; ++j)
58  {
59  if (G(i, j) != 0)
60  mutual_info += G(i, j) * log(G(i,j) /
61  (G_rowsum[i]*G_colsum[j]))/log(2.);
62  }
63  }
64 
65  float64_t entropy_p = 0;
66  float64_t entropy_g = 0;
67  for (index_t i=0; i < n_class; ++i)
68  {
69  entropy_g += -G_rowsum[i] * log(G_rowsum[i])/log(2.);
70  entropy_p += -G_colsum[i] * log(G_colsum[i])/log(2.);
71  }
72 
73  return mutual_info / CMath::max(entropy_g, entropy_p);
74 }
virtual float64_t evaluate(CLabels *predicted, CLabels *ground_truth)
virtual ELabelType get_label_type() const =0
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
multi-class labels 0,1,...
Definition: LabelTypes.h:20
#define SG_ERROR(...)
Definition: SGIO.h:129
Multiclass Labels for multi-class classification.
index_t vlen
Definition: SGVector.h:494
#define ASSERT(x)
Definition: SGIO.h:201
double float64_t
Definition: common.h:50
static T max(T a, T b)
Definition: Math.h:168
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
int32_t find_match_count(SGVector< int32_t > l1, int32_t m1, SGVector< int32_t > l2, int32_t m2)

SHOGUN Machine Learning Toolbox - Documentation