00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Shashwat Lal Das 00008 * Copyright (c) 2011 Berlin Institute of Technology and Max-Planck-Society. 00009 */ 00010 00011 #include <shogun/loss/LogLoss.h> 00012 00013 using namespace shogun; 00014 00015 float64_t CLogLoss::loss(float64_t prediction, float64_t label) 00016 { 00017 float64_t z = prediction * label; 00018 if (z >= 0) 00019 return log(1+exp(-z)); 00020 return -z + log(1+exp(z)); 00021 } 00022 00023 float64_t CLogLoss::first_derivative(float64_t prediction, float64_t label) 00024 { 00025 float64_t z = prediction * label; 00026 if (z < 0) 00027 return -1 / (exp(z) + 1); 00028 float64_t ez = exp(-z); 00029 return -ez / (ez + 1); 00030 } 00031 00032 float64_t CLogLoss::second_derivative(float64_t prediction, float64_t label) 00033 { 00034 float64_t z = prediction * label; 00035 float64_t ez = exp(z); 00036 00037 return ez / (ez*(ez + 2) + 1); 00038 } 00039 00040 float64_t CLogLoss::get_update(float64_t prediction, float64_t label, float64_t eta_t, float64_t norm) 00041 { 00042 float64_t w,x; 00043 float64_t d = exp(label * prediction); 00044 if(eta_t < 1e-6){ 00045 /* As with squared loss, for small eta_t we replace the update 00046 * with its first order Taylor expansion to avoid numerical problems 00047 */ 00048 return label*eta_t/((1+d)*norm); 00049 } 00050 x = eta_t + label*prediction + d; 00051 00052 /* This piece of code is approximating W(exp(x))-x. 00053 * W is the Lambert W function: W(z)*exp(W(z))=z. 00054 * The absolute error of this approximation is less than 9e-5. 00055 * Faster/better approximations can be substituted here. 00056 */ 00057 float64_t W = x>=1. ? 0.86*x+0.01 : exp(0.8*x-0.65); //initial guess 00058 float64_t r = x>=1. ? x-log(W)-W : 0.2*x+0.65-W; //residual 00059 float64_t t = 1.+W; 00060 float64_t u = 2.*t*(t+2.*r/3.); //magic 00061 w = W*(1.+r/t*(u-r)/(u-2.*r))-x; //more magic 00062 00063 return -(label*w+prediction)/norm; 00064 } 00065 00066 float64_t CLogLoss::get_square_grad(float64_t prediction, float64_t label) 00067 { 00068 float64_t d = first_derivative(prediction, label); 00069 return d*d; 00070 } 00071