diff --git a/newton_admm/newton_admm.py b/newton_admm/newton_admm.py index 5edb6d5..6deebc7 100644 --- a/newton_admm/newton_admm.py +++ b/newton_admm/newton_admm.py @@ -3,8 +3,11 @@ import numpy as np from . import cones as C from scipy.sparse.linalg import aslinearoperator as aslo +import scipy from block import block, block_diag +import cvxpy as cp +from time import time def _unpack(data): return data['A'], data['b'], data['c'] @@ -100,13 +103,15 @@ def _J_onto_Kstar(x, cone): J_cones = [] i = 0 existing_cones = [c for c in _cones if _cone_exists(c, cone)] + cone_lengths = [] for c in existing_cones: if _cone_exists(c, cone): c_len = _cone_len(c, cone) J_cones.append(_dual_cone_J(x[i:i + c_len], c, cone)) i += c_len + cone_lengths.append(c_len) assert i == len(x) - return block_diag(J_cones, arrtype=sla.LinearOperator) + return J_cones, cone_lengths def _dual_cone_J(x, c, cone): @@ -150,30 +155,96 @@ def _extract_solution(u, v, n, zerotol): y = u[n:-1] / scale return (x, s, y) +def _setup_benchmark(): + b = { + 'time' : [], + 'fval' : [], + 'error' : [] + } + return b + +def error_from_x(x, benchmark, m): + """ From x, return beta """ + # prob, beta, betastar = benchmark + # scs_output = { + # "info" : { + # 'status': 'Solved', + # 'statusVal': 1, + # 'resPri': 1, + # 'resInfeas': 1, + # 'solveTime': 1, + # 'relGap': 1, + # 'iter': 1, + # 'dobj': 1, + # 'pobj': 1, + # 'setupTime': 1, + # 'resUnbdd': 1, + # 'resDual': 1}, + # "y" : np.zeros(m), + # "x" : x, + # "s" : np.zeros(m) + # } + # prob.unpack_results(cp.SCS, scs_output) + beta_from_x, betastar = benchmark + return np.linalg.norm(beta_from_x(x) - betastar) + # if isinstance(beta, cp.Variable): + # return np.linalg.norm(beta.value - betastar) + # else: + # return np.sum(np.linalg.norm(b1.value-b2) + # for b1, b2 in zip(beta, betastar)) + +def update_benchmark(b, u, v, c, t, zerotol, benchmark, m): + n = len(c) + b['time'].append(t) + x, _, _ = _extract_solution(u, v, n, zerotol) + b['fval'].append(c.T.dot(x)) + b['error'].append(error_from_x(x, benchmark, m)) + +def M_matvec(v, solve_IplusQ, k): + out = np.zeros(3 * k) + out[:k] = solve_IplusQ(v[:k]) + out[k:] = v[k:] + return out + +def J_matvec(x, n, m, k, d, ridge, IplusQ, J_cones, cone_lengths): + out = np.zeros(3*k) + out[:k] = (1+ridge)*IplusQ.dot(x[:k]) - x[k:-k] - x[-k:] + out[k:k+n] = -x[:n] + (1+ridge)*x[k:k+n] + x[-k:-k+n] + # cone block + i = 0 + for K_op, K_len in zip(J_cones, cone_lengths): + out[k+n+i:k+n+i+K_len] = -K_op(x[n+i:n+i+K_len]) + (1+ridge)*x[k+n+i:k+n+i+K_len] + K_op(x[2*k+n+i:2*k+n+i+K_len]) + i += K_len + + out[k+n+m] = -d*x[n+m] + (1+ridge)*x[k+n+m] + d*x[2*k+n+m] + out[2*k:] = x[:k] - x[k:-k] + return out def newton_admm(data, cone, maxiters=100, admm_maxiters=1, gmres_maxiters=lambda i: i // 10 + 1, zerotol=1e-14, res_tol=1e-10, ridge=0, alpha=0.001, beta=0.5, - verbose=0): + verbose=0, benchmark=None): A, b, c = _unpack(data) assert(A.ndim == 2 and b.ndim == 1 and c.ndim == 1) m, n = A.shape k = n + m + 1 # preconstruct IplusQ and do an LU decomposition - Q = sp.bmat([[None, A.T, c[:, None]], - [-A, None, b[:, None]], + Q = sp.bmat([[ None, A.T, c[:, None]], + [ -A, None, b[:, None]], [-c[None, :], -b[None, :], None]]) IplusQ = (sp.eye(Q.shape[0]) + Q).tocsc() - IplusQ_LU = sla.splu(IplusQ) + if IplusQ.nnz/(Q.shape[0]**2) < 0.1: + IplusQ_LU = sla.splu(IplusQ) + solve_IplusQ = lambda v: IplusQ_LU.solve(v) + else: + IplusQ = IplusQ.todense() + IplusQ_LU = scipy.linalg.lu_factor(IplusQ) + solve_IplusQ = lambda v: scipy.linalg.lu_solve(IplusQ_LU, v) + # create preconditioner linear op - def M_matvec(v): - out = np.zeros(3 * k) - out[:k] = IplusQ_LU.solve(v[:k]) - out[k:] = v[k:] - return out - M_lo = sla.LinearOperator((3 * k, 3 * k), matvec=M_matvec) + M_lo = sla.LinearOperator((3 * k, 3 * k), matvec=lambda v: M_matvec(v, solve_IplusQ, k)) # Initialize ADMM variables utilde, u, v = np.zeros(k), np.zeros(k), np.zeros(k) @@ -183,36 +254,76 @@ def M_matvec(v): Ik = sp.eye(k) In = sp.eye(n) + + # save benchmarks in a dictionary + b = _setup_benchmark() + extra_iters = 1 for iters in range(admm_maxiters): + if benchmark is not None: + start_time = time() + # do admm step - utilde = IplusQ_LU.solve(u + v) + utilde = solve_IplusQ(u + v) u = _proj_onto_C(utilde - v, cone, n) v = v - utilde + u + # print("#0", utilde, u) + # assert False + + if benchmark is not None: + update_benchmark(b, u, v, c, time() - start_time, zerotol, benchmark, m) + + if verbose and np.mod(iters, verbose) == 0: + # If still running ADMM iterations, compute residuals + r_utilde, r_u, r_v = _compute_r(utilde, u, v, cone, n, IplusQ) + _r_utilde, _r_u, _r_v = np.linalg.norm( + r_utilde), np.linalg.norm(r_u), np.linalg.norm(r_v) + x, _, _ = _extract_solution(u, v, n, zerotol) + objval = c.T.dot(x) + print("%d/%d: r_utilde = %E, r_u = %E, r_v = %E, obj val c^T x = %E." % + (iters, admm_maxiters, _r_utilde, _r_u, _r_v, objval)) + + if all(r < res_tol for r in (_r_utilde, _r_u, _r_v)): + print("Stopping early, residuals < {}".format(res_tol)) + break for iters in range(maxiters): + if benchmark is not None: + start_time = time() + # do newton step r_utilde, r_u, r_v = _compute_r(utilde, u, v, cone, n, IplusQ) # create linear operator for Jacobian - d = np.array(utilde[-1] - v[-1] >= - 0.0).astype(np.float64).reshape(1, 1) - - D = _J_onto_Kstar((utilde - v)[n:-1], cone) - D_lo = block_diag([In, D, d], arrtype=sla.LinearOperator) + # This code is more readable but slower, and is left here + # just for the understanding of the reader. + # d = np.array(utilde[-1] - v[-1] >= + # 0.0).astype(np.float64).reshape(1, 1) + # D = block_diag(_J_onto_Kstar((utilde - v)[n:-1], cone)[0], + # arrtype=sla.LinearOperator) + # D_lo = block_diag([In, D, d], arrtype=sla.LinearOperator) - J_matvec = block([[IplusQ * (1 + ridge), '-I', '-I'], - [-D_lo, Ik * (1 + ridge), D_lo], - ['I', '-I', -Ik * ridge]], arrtype=sla.LinearOperator) + # J_lo = block([[IplusQ * (1 + ridge), '-I', '-I'], + # [-D_lo, Ik * (1 + ridge), D_lo], + # ['I', '-I', -Ik * ridge]], arrtype=sla.LinearOperator) - J_lo = sla.LinearOperator((3 * k, 3 * k), matvec=J_matvec) + # Instead of building it block-wise, it is more efficient to just + # create the linear operator directly as follows. + d = np.array(utilde[-1] - v[-1] >= + 0.0).astype(np.float64).reshape(1, 1) + J_cones, cone_lengths = _J_onto_Kstar((utilde - v)[n:-1], cone) + # print("#1", u) + J_lo = sla.LinearOperator((3 * k, 3 * k), matvec= + lambda x: J_matvec(x, n, m, k, d, ridge, IplusQ, J_cones, cone_lengths)) # approximately solve then newton step dall, info = sla.gmres(J_lo, np.concatenate([r_utilde, r_u, r_v]), M=M_lo, tol=1e-12, - maxiter=gmres_maxiters(iters)) + maxiter=gmres_maxiters(iters) + extra_iters) + # print("#2") + # assert False dutilde, du, dv = dall[0:k], dall[k:2 * k], dall[2 * k:] # backtracking line search @@ -228,13 +339,20 @@ def M_matvec(v): if not ((np.linalg.norm(np.concatenate([r_utilde0, r_u0, r_v0])) > (1 - alpha * t) * r_all_norm) and - (t >= 1e-1)): + (t >= 1e-4)): break t *= beta + if t < 1e-4: + extra_iters += 1 + # update iterates utilde, u, v = utilde0, u0, v0 + ridge *= 0.9 + + if benchmark is not None: + update_benchmark(b, u, v, c, time() - start_time, zerotol, benchmark, m) if verbose and np.mod(iters, verbose) == 0: # If still running ADMM iterations, compute residuals @@ -242,8 +360,8 @@ def M_matvec(v): r_utilde), np.linalg.norm(r_u), np.linalg.norm(r_v) x, _, _ = _extract_solution(u, v, n, zerotol) objval = c.T.dot(x) - print("At beg. of iter %d/%d: r_utilde = %E, r_u = %E, r_v = %E, obj val c^T x = %E." % - (iters, maxiters, _r_utilde, _r_u, _r_v, objval)) + print("%d/%d: r_utilde = %E, r_u = %E, r_v = %E, obj val c^T x = %E. (%d)" % + (iters, maxiters, _r_utilde, _r_u, _r_v, objval, extra_iters)) if all(r < res_tol for r in (_r_utilde, _r_u, _r_v)): print("Stopping early, residuals < {}".format(res_tol)) @@ -255,6 +373,7 @@ def M_matvec(v): 's': s, 'y': y, 'info': { - 'fstar': c.dot(x) + 'fstar': c.dot(x), + 'benchmark' : b } } diff --git a/newton_admm/problems.py b/newton_admm/problems.py index baeafbc..4f4762e 100644 --- a/newton_admm/problems.py +++ b/newton_admm/problems.py @@ -10,7 +10,7 @@ def least_squares(m, n): b = A.dot(_x) x = cp.Variable(n) - return cp.Problem(cp.Minimize(cp.sum_squares(A * x - b) + cp.norm(x, 2))) + return (x, cp.Problem(cp.Minimize(cp.sum_squares(A * x - b) + cp.norm(x, 2)))) def lp(m, n): @@ -23,7 +23,7 @@ def lp(m, n): b1 = np.zeros(n) # equality constraints - A2 = np.random.randn(m, n) + A2 = np.random.randn(m, n) b2 = A2.dot(_x) # objective @@ -33,9 +33,21 @@ def lp(m, n): lam[idxes] = 0 c = np.ravel(-A2.T.dot(nu) + lam) + # create standard form cone parameters + A = np.vstack([A1,A2,-A2]) + b = np.vstack([b1[:,None], b2[:,None], -b2[:,None]]).ravel() + x = cp.Variable(n) - return cp.Problem(cp.Minimize(c.T * x + cp.sum_squares(x)), - [b1 - A1 * x >= 0, A2 * x == b2]) + return (x, cp.Problem(cp.Minimize(c.T * x), + [b1 - A1 * x >= 0, A2 * x == b2]), { + 'A' : A, + 'b' : b, + 'c' : c, + 'dims' : { + 'l' : 2*m+n + }, + 'beta_from_x' : lambda x: x + }) def portfolio_opt(p): @@ -43,13 +55,39 @@ def portfolio_opt(p): temp = np.random.randn(p, p) Sigma = temp.T.dot(temp) + Sigma_sqrt = sp.linalg.sqrtm(Sigma) o = np.ones((p, 1)) - # Solve by ECOS. + # Create standard form cone problem + Zp1 = np.zeros((p,1)) + + # setup for cone problem + c = np.vstack([Zp1, np.array([[1.0]])]).ravel() + + G1 = sp.linalg.block_diag(2.0*Sigma_sqrt, -1.0) + q = np.vstack([Zp1, np.array([[1.0]])]) + G2 = np.hstack([o.T, np.array([[0.0]])]) + G3 = np.hstack([-o.T, np.array([[0.0]])]) + + h = np.vstack([Zp1, np.array([[1.0]])]) + z = 1.0 + + A = np.vstack([G2, G3, -q.T, -G1 ]) + b = np.vstack([1.0, -1.0, np.array([[z]]), h]).ravel() + betahat = cp.Variable(p) - return cp.Problem(cp.Minimize(cp.quad_form(betahat, Sigma)), - [o.T * betahat == 1]) + return (betahat, cp.Problem(cp.Minimize(cp.quad_form(betahat, Sigma)), + [o.T * betahat == 1]), { + 'A' : A, + 'b' : b, + 'c' : c, + 'dims' : { + 'l' : 2, + 'q' : [p+2] + }, + 'beta_from_x' : lambda x: x[:p] + }) def logistic_regression(N, p, suppfrac): @@ -91,7 +129,8 @@ def logistic_regression(N, p, suppfrac): betahat = cp.Variable(p) logloss = sum(cp.log_sum_exp( cp.hstack(0, y[i] * X[i, :] * betahat)) for i in range(N)) - return cp.Problem(cp.Minimize(logloss + lam * cp.norm(betahat, 1))) + return (betahat, + cp.Problem(cp.Minimize(logloss + lam * cp.norm(betahat, 1)))) def robust_pca(p, suppfrac): diff --git a/test.py b/test.py index 727b31e..7a70a15 100644 --- a/test.py +++ b/test.py @@ -11,7 +11,7 @@ def _s(o1, o2): def test_least_squares(): """ This test will construct second order cone constraints """ - prob = problems.least_squares(10, 5) + prob = problems.least_squares(10, 5)[1] data = prob.get_problem_data(cp.SCS) out = newton_admm(data, data['dims']) cvx_out = prob.solve() @@ -22,7 +22,7 @@ def test_least_squares(): def test_lp(): """ This test will construct equality, inequality, and second order cone constraints """ - prob = problems.lp(30, 60) + prob = problems.lp(30, 60)[1] data = prob.get_problem_data(cp.SCS) out = newton_admm(data, data['dims']) cvx_out = prob.solve() @@ -33,7 +33,7 @@ def test_lp(): def test_portfolio_opt(): """ This test will construct equality, inequality, and second order cone constraints """ - prob = problems.portfolio_opt(10) + prob = problems.portfolio_opt(10)[1] data = prob.get_problem_data(cp.SCS) out = newton_admm(data, data['dims']) cvx_out = prob.solve() @@ -44,7 +44,7 @@ def test_portfolio_opt(): def test_logistic_regression(): """ This test will construct inequality, and exponential cone constraints """ - prob = problems.logistic_regression(5, 2, 1.0) + prob = problems.logistic_regression(5, 2, 1.0)[1] data = prob.get_problem_data(cp.SCS) out = newton_admm(data, data['dims']) cvx_out = prob.solve() @@ -63,7 +63,7 @@ def test_PSD(): def test_robust_pca(): """ This test will construct positive semi-definite cone constraints """ - prob = problems.robust_pca(5, 0.5) + prob = problems.robust_pca(5, 0.5)[1] data = prob.get_problem_data(cp.SCS) # solve the problem with ADMM instead for better accuracy out0 = newton_admm(data, data['dims'], admm_maxiters=4000, maxiters=4000)