SHOGUN: LogLoss.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Shashwat Lal Das
00008  * Written (W) 2012 Fernando José Iglesias García
00009  * Copyright (c) 2011 Berlin Institute of Technology and Max-Planck-Society.
00010  */
00011 
00012 #include <shogun/loss/LogLoss.h>
00013 
00014 using namespace shogun;
00015 
00016 float64_t CLogLoss::loss(float64_t z)
00017 {
00018     return (z >= 0) ? log(1 + exp(-z)) : -z + log(1 + exp(z));
00019 }
00020 
00021 float64_t CLogLoss::first_derivative(float64_t z)
00022 {
00023     if (z < 0)
00024         return -1 / (exp(z) + 1);
00025 
00026     float64_t ez = exp(-z);
00027     return -ez / (ez + 1);
00028 }
00029 
00030 float64_t CLogLoss::second_derivative(float64_t z)
00031 {
00032     float64_t ez = exp(z);
00033     return ez / (ez*(ez + 2) + 1);
00034 }
00035 
00036 float64_t CLogLoss::get_update(float64_t prediction, float64_t label, float64_t eta_t, float64_t norm)
00037 {
00038     float64_t w,x;
00039     float64_t d = exp(label * prediction);
00040     if(eta_t < 1e-6){
00041         /* As with squared loss, for small eta_t we replace the update
00042          * with its first order Taylor expansion to avoid numerical problems
00043          */
00044         return label*eta_t/((1+d)*norm);
00045     }
00046     x = eta_t + label*prediction + d;
00047 
00048     /* This piece of code is approximating W(exp(x))-x.
00049      * W is the Lambert W function: W(z)*exp(W(z))=z.
00050      * The absolute error of this approximation is less than 9e-5.
00051      * Faster/better approximations can be substituted here.
00052      */
00053     float64_t W = x>=1. ? 0.86*x+0.01 : exp(0.8*x-0.65); //initial guess
00054     float64_t r = x>=1. ? x-log(W)-W : 0.2*x+0.65-W; //residual
00055     float64_t t = 1.+W;
00056     float64_t u = 2.*t*(t+2.*r/3.); //magic
00057     w = W*(1.+r/t*(u-r)/(u-2.*r))-x; //more magic
00058 
00059     return -(label*w+prediction)/norm;
00060 }
00061 
00062 float64_t CLogLoss::get_square_grad(float64_t prediction, float64_t label)
00063 {
00064     float64_t d = CLossFunction::first_derivative(prediction, label);
00065     return d*d;
00066 }
00067