00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/lib/slep/slep_mc_plain_lr.h>
00012 #ifdef HAVE_EIGEN3
00013 #include <shogun/lib/slep/q1/eppMatrix.h>
00014 #include <shogun/mathematics/Math.h>
00015 #include <shogun/mathematics/eigen3.h>
00016 #include <shogun/lib/Signal.h>
00017 #include <shogun/lib/Time.h>
00018 #include <iostream>
00019
00020 using namespace shogun;
00021 using namespace Eigen;
00022 using namespace std;
00023
00024 namespace shogun
00025 {
00026
00027 slep_result_t slep_mc_plain_lr(
00028 CDotFeatures* features,
00029 CMulticlassLabels* labels,
00030 float64_t z,
00031 const slep_options& options)
00032 {
00033 int i,j;
00034
00035 int n_feats = features->get_dim_feature_space();
00036 int n_vecs = features->get_num_vectors();
00037 int n_classes = labels->get_num_classes();
00038
00039
00040 SGVector<float64_t> labels_vector = labels->get_labels();
00041
00042
00043
00044 MatrixXd w = MatrixXd::Zero(n_feats, n_classes);
00045
00046 VectorXd c = VectorXd::Zero(n_classes);
00047
00048 if (options.last_result)
00049 {
00050 SGMatrix<float64_t> last_w = options.last_result->w;
00051 SGVector<float64_t> last_c = options.last_result->c;
00052 for (i=0; i<n_classes; i++)
00053 {
00054 c[i] = last_c[i];
00055 for (j=0; j<n_feats; j++)
00056 w(j,i) = last_w(j,i);
00057 }
00058 }
00059
00060 MatrixXd wp = w, wwp = MatrixXd::Zero(n_feats, n_classes);
00061 VectorXd cp = c, ccp = VectorXd::Zero(n_classes);
00062
00063 MatrixXd search_w = MatrixXd::Zero(n_feats, n_classes);
00064
00065 VectorXd search_c = VectorXd::Zero(n_classes);
00066
00067 MatrixXd Aw = MatrixXd::Zero(n_vecs, n_classes);
00068 for (j=0; j<n_classes; j++)
00069 features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0);
00070 MatrixXd As = MatrixXd::Zero(n_vecs, n_classes);
00071 MatrixXd Awp = MatrixXd::Zero(n_vecs, n_classes);
00072
00073 MatrixXd g = MatrixXd::Zero(n_feats, n_classes);
00074 VectorXd gc = VectorXd::Zero(n_classes);
00075
00076 MatrixXd v = MatrixXd::Zero(n_feats, n_classes);
00077
00078
00079 double L = 1.0/(n_vecs*n_classes);
00080
00081 double alphap = 0, alpha = 1;
00082
00083
00084 double lambda = z;
00085
00086 double objective = 0.0;
00087 double objective_p = 0.0;
00088
00089 int iter = 0;
00090 bool done = false;
00091 CTime time;
00092 internal::set_is_malloc_allowed(false);
00093 while ((!done) && (iter<options.max_iter) && (!CSignal::cancel_computations()))
00094 {
00095 double beta = (alphap-1)/alpha;
00096
00097 search_w = w + beta*wwp;
00098 search_c = c + beta*ccp;
00099
00100
00101 As = Aw + beta*(Aw-Awp);
00102
00103
00104 double fun_s = 0;
00105 g.setZero();
00106 gc.setZero();
00107
00108 for (i=0; i<n_vecs; i++)
00109 {
00110
00111 int vec_class = labels_vector[i];
00112
00113 for (j=0; j<n_classes; j++)
00114 {
00115
00116 double aa = ((vec_class == j) ? -1.0 : 1.0)*(As(i,j) + search_c(j));
00117 double bb = aa > 0.0 ? aa : 0.0;
00118
00119 fun_s += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb;
00120 double prob = 1.0/(1+CMath::exp(aa));
00121 double b = ((vec_class == j) ? -1.0 : 1.0)*(1-prob);
00122
00123 gc[j] += b;
00124
00125 features->add_to_dense_vec(b, i, g.col(j).data(), n_feats);
00126 }
00127 }
00128
00129
00130 wp = w;
00131 Awp = Aw;
00132 cp = c;
00133
00134 int inner_iter = 0;
00135 double fun_x = 0;
00136
00137
00138 while (inner_iter<5000)
00139 {
00140
00141 v = search_w - g/L;
00142 c = search_c - gc/L;
00143
00144
00145 eppMatrix(w.data(),v.data(),n_feats,n_classes,lambda/L,options.q);
00146
00147 v = w - search_w;
00148
00149
00150 for (j=0; j<n_classes; j++)
00151 features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0);
00152
00153
00154 fun_x = 0;
00155 for (i=0; i<n_vecs; i++)
00156 {
00157 int vec_class = labels_vector[i];
00158 for (j=0; j<n_classes; j++)
00159 {
00160 double aa = ((vec_class == j) ? -1.0 : 1.0)*(Aw(i,j) + c(j));
00161 double bb = aa > 0.0 ? aa : 0.0;
00162 fun_x += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb;
00163 }
00164 }
00165
00166
00167
00168 double r_sum = (v.squaredNorm() + (c-search_c).squaredNorm())/2;
00169 double l_sum = fun_x - fun_s - v.cwiseProduct(g).sum() - (c-search_c).dot(gc);
00170
00171
00172 if (r_sum <= 1e-20)
00173 {
00174 SG_SINFO("Gradient step makes little improvement (%f)\n",r_sum);
00175 done = true;
00176 break;
00177 }
00178
00179 if (l_sum <= r_sum*L)
00180 break;
00181 else
00182 L = CMath::max(2*L, l_sum/r_sum);
00183
00184 inner_iter++;
00185 }
00186
00187
00188 alphap = alpha;
00189 alpha = (1+CMath::sqrt(4*alpha*alpha+1))/2;
00190
00191
00192 wwp = w - wp;
00193 ccp = c - cp;
00194
00195
00196 objective_p = objective;
00197 objective = fun_x;
00198
00199
00200 double L1q_norm = 0.0;
00201 for (int m=0; m<n_classes; m++)
00202 L1q_norm += w.col(m).norm();
00203 objective += lambda*L1q_norm;
00204
00205
00206
00207
00208 if ((CMath::abs(objective - objective_p) < options.tolerance*CMath::abs(objective_p)) && (iter>2))
00209 {
00210 SG_SINFO("Objective changes less than tolerance\n");
00211 done = true;
00212 }
00213
00214 iter++;
00215 }
00216 SG_SINFO("%d iterations passed, objective = %f\n",iter,objective);
00217 internal::set_is_malloc_allowed(true);
00218
00219
00220 SGMatrix<float64_t> r_w(n_feats,n_classes);
00221 for (j=0; j<n_classes; j++)
00222 {
00223 for (i=0; i<n_feats; i++)
00224 r_w(i,j) = w(i,j);
00225 }
00226
00227 SGVector<float64_t> r_c(n_classes);
00228 for (j=0; j<n_classes; j++)
00229 r_c[j] = c[j];
00230 return slep_result_t(r_w, r_c);
00231 };
00232 };
00233 #endif