SHOGUN: LogLoss.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Shashwat Lal Das
00008  * Copyright (c) 2011 Berlin Institute of Technology and Max-Planck-Society.
00009  */
00010 
00011 #include <shogun/loss/LogLoss.h>
00012 
00013 using namespace shogun;
00014 
00015 float64_t CLogLoss::loss(float64_t prediction, float64_t label)
00016 {
00017     float64_t z = prediction * label;
00018     if (z >= 0)
00019         return log(1+exp(-z));
00020     return -z + log(1+exp(z));
00021 }
00022 
00023 float64_t CLogLoss::first_derivative(float64_t prediction, float64_t label)
00024 {
00025     float64_t z = prediction * label;
00026     if (z < 0)
00027         return -1 / (exp(z) + 1);
00028     float64_t ez = exp(-z);
00029     return -ez / (ez + 1);
00030 }
00031 
00032 float64_t CLogLoss::second_derivative(float64_t prediction, float64_t label)
00033 {
00034     float64_t z = prediction * label;
00035     float64_t ez = exp(z);
00036 
00037     return ez / (ez*(ez + 2) + 1);
00038 }
00039 
00040 float64_t CLogLoss::get_update(float64_t prediction, float64_t label, float64_t eta_t, float64_t norm)
00041 {
00042     float64_t w,x;
00043     float64_t d = exp(label * prediction);
00044     if(eta_t < 1e-6){
00045         /* As with squared loss, for small eta_t we replace the update
00046          * with its first order Taylor expansion to avoid numerical problems
00047          */
00048         return label*eta_t/((1+d)*norm);
00049     }
00050     x = eta_t + label*prediction + d;
00051 
00052     /* This piece of code is approximating W(exp(x))-x.
00053      * W is the Lambert W function: W(z)*exp(W(z))=z.
00054      * The absolute error of this approximation is less than 9e-5.
00055      * Faster/better approximations can be substituted here.
00056      */
00057     float64_t W = x>=1. ? 0.86*x+0.01 : exp(0.8*x-0.65); //initial guess
00058     float64_t r = x>=1. ? x-log(W)-W : 0.2*x+0.65-W; //residual
00059     float64_t t = 1.+W;
00060     float64_t u = 2.*t*(t+2.*r/3.); //magic
00061     w = W*(1.+r/t*(u-r)/(u-2.*r))-x; //more magic
00062 
00063     return -(label*w+prediction)/norm;
00064 }
00065 
00066 float64_t CLogLoss::get_square_grad(float64_t prediction, float64_t label)
00067 {
00068     float64_t d = first_derivative(prediction, label);
00069     return d*d;
00070 }
00071