forked from ymalitsky/adaptive_GD
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathloss_functions.py
92 lines (76 loc) · 2.57 KB
/
loss_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import scipy
import numpy as np
import numpy.linalg as la
from sklearn.utils.extmath import safe_sparse_dot
def safe_sparse_add(a, b):
if scipy.sparse.issparse(a) and scipy.sparse.issparse(b):
# both are sparse, keep the result sparse
return a + b
else:
# on of them is non-sparse, convert
# everything to dense.
if scipy.sparse.issparse(a):
a = a.toarray()
if a.ndim == 2 and b.ndim == 1:
b.ravel()
elif scipy.sparse.issparse(b):
b = b.toarray()
if b.ndim == 2 and a.ndim == 1:
b = b.ravel()
return a + b
def logsig(x):
"""
Compute the log-sigmoid function component-wise.
See http://fa.bianp.net/blog/2019/evaluate_logistic/ for more details.
"""
out = np.zeros_like(x)
idx0 = x < -33
out[idx0] = x[idx0]
idx1 = (x >= -33) & (x < -18)
out[idx1] = x[idx1] - np.exp(x[idx1])
idx2 = (x >= -18) & (x < 37)
out[idx2] = -np.log1p(np.exp(-x[idx2]))
idx3 = x >= 37
out[idx3] = -np.exp(-x[idx3])
return out
def logistic_loss(w, X, y, l2):
"""Logistic loss, numerically stable implementation.
Parameters
----------
x: array-like, shape (n_features,)
Coefficients
A: array-like, shape (n_samples, n_features)
Data matrix
b: array-like, shape (n_samples,)
Labels
Returns
-------
loss: float
"""
z = np.dot(X, w)
y = np.asarray(y)
return np.mean((1-y)*z - logsig(z)) + l2/2 * la.norm(w)**2
def logistic_gradient(w, X, y_, l2, normalize=True):
"""
Gradient of the logistic loss at point w with features X, labels y and l2 regularization.
If labels are from {-1, 1}, they will be changed to {0, 1} internally
"""
y = (y_+1) / 2 if -1 in y_ else y_
activation = scipy.special.expit(safe_sparse_dot(X, w, dense_output=True).ravel())
grad = safe_sparse_add(X.T.dot(activation - y) / X.shape[0], l2 * w)
grad = np.asarray(grad).ravel()
if normalize:
return grad
return grad * len(y)
def cubic_loss(w, H, g, M, scale=1):
"""
Loss values of quadratic with cubic regularization.
To make M independent of the gradient/Hessian rescaling, we scale it with the last argument.
"""
return w@g + 0.5*H@w@w + scale*M/6*la.norm(w)**3
def cubic_gradient(w, H, g, M, scale=1):
"""
Gradient of quadratic with cubic regularization.
To make M independent of the gradient/Hessian rescaling, we scale them with the last argument.
"""
return (g+H@w)/scale + M/2*w*la.norm(w)