Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/lib/SGVector.h>
00012 #include <shogun/labels/MulticlassLabels.h>
00013 #include <shogun/evaluation/ClusteringMutualInformation.h>
00014
00015 using namespace shogun;
00016
00017 float64_t CClusteringMutualInformation::evaluate(CLabels* predicted, CLabels* ground_truth)
00018 {
00019 ASSERT(predicted && ground_truth);
00020 ASSERT(predicted->get_label_type() == LT_MULTICLASS);
00021 ASSERT(ground_truth->get_label_type() == LT_MULTICLASS);
00022 SGVector<float64_t> label_p=((CMulticlassLabels*) predicted)->get_unique_labels();
00023 SGVector<float64_t> label_g=((CMulticlassLabels*) ground_truth)->get_unique_labels();
00024
00025 if (label_p.vlen != label_g.vlen)
00026 SG_ERROR("Number of classes are different\n");
00027 uint32_t n_class=label_p.vlen;
00028 float64_t n_label=predicted->get_num_labels();
00029
00030 SGVector<int32_t> ilabels_p=((CMulticlassLabels*) predicted)->get_int_labels();
00031 SGVector<int32_t> ilabels_g=((CMulticlassLabels*) ground_truth)->get_int_labels();
00032
00033 SGMatrix<float64_t> G(n_class, n_class);
00034 for (size_t i=0; i < n_class; ++i)
00035 {
00036 for (size_t j=0; j < n_class; ++j)
00037 G(i, j)=find_match_count(ilabels_g, label_g[i],
00038 ilabels_p, label_p[j])/n_label;
00039 }
00040
00041 SGVector<float64_t> G_rowsum(n_class);
00042 SGVector<float64_t> G_colsum(n_class);
00043 for (size_t i=0; i < n_class; ++i)
00044 {
00045 for (size_t j=0; j < n_class; ++j)
00046 {
00047 G_rowsum[i] += G(i, j);
00048 G_colsum[i] += G(j, i);
00049 }
00050 }
00051
00052 float64_t mutual_info = 0;
00053 for (size_t i=0; i < n_class; ++i)
00054 {
00055 for (size_t j=0; j < n_class; ++j)
00056 {
00057 if (G(i, j) != 0)
00058 mutual_info += G(i, j) * log(G(i,j) /
00059 (G_rowsum[i]*G_colsum[j]))/log(2.);
00060 }
00061 }
00062
00063 float64_t entropy_p = 0;
00064 float64_t entropy_g = 0;
00065 for (size_t i=0; i < n_class; ++i)
00066 {
00067 entropy_g += -G_rowsum[i] * log(G_rowsum[i])/log(2.);
00068 entropy_p += -G_colsum[i] * log(G_colsum[i])/log(2.);
00069 }
00070
00071 return mutual_info / CMath::max(entropy_g, entropy_p);
00072 }