function [alpha, c, wresid, wresid_norm, y_est, Regression] = ...
varpro(y, w, alpha, n, ada, lb, ub, options)
%VARPRO Solve a separable nonlinear least squares problem.
% [alpha, c, wresid, wresid_norm, y_est, Regression] =
% VARPRO(y, w, alpha, n, ada, lb, ub, options)
%
% Given a set of m observations y(1),...,y(m)
% this program computes a weighted least squares fit using the model
%
% eta(alpha,c,t) =
% c_1 * phi_1 (alpha,t) + ... + c_n * phi_n (alpha,t)
% (possibly with an extra term + phi_{n+1} (alpha,t) ).
%
% This program determines optimal values of the q nonlinear parameters
% alpha and the n linear parameters c, given observations y at m
% different values of the "time" t and given evaluation of phi and
% (optionally) derivatives of phi.
%
% On Input:
%
% y m x 1 vector containing the m observations.
% w m x 1 vector of weights used in the least squares
% fit. We minimize the norm of the weighted residual
% vector r, where, for i=1:m,
%
% r(i) = w(i) * (y(i) - eta(alpha, c, t(i,:))).
%
% Therefore, w(i) should be set to 1 divided by
% the standard deviation in the measurement y(i).
% If this number is unknown, set w(i) = 1.
% alpha q x 1 initial estimates of the parameters alpha.
% If alpha = [], Varpro assumes that the problem
% is linear and returns estimates of the c parameters.
% n number of linear parameters c
% ada a function handle, described below.
% lb q x 1 lower bounds on the parameters alpha.
% (Optional) (Omit this argument or use [] if there are
% no lower bounds.)
% ub q x 1 upper bounds on the parameters alpha.
% (Optional) (Omit this argument or use [] if there are
% no upper bounds.)
% options The Matlab optimization parameter structure,
% (Optional) set by "optimset", to control convergence
% tolerances, maximum number of function evaluations,
% information displayed in the command window, etc.
% To use default options, omit this parameter.
% To determine the default options, type
% options = optimset('lsqnonlin')
% After doing this, the defaults can be modified;
% to modify the display option, for example, type
% options = optimset('lsqnonlin');
% optimset(options,'Display','iter');
%
% On Output:
%
% alpha q x 1 estimates of the nonlinear parameters.
% c n x 1 estimates of the linear parameters.
% wresid m x 1 weighted residual vector, with i-th component
% w(i) * (y(i) - eta(alpha, c, t(i,:))).
% wresid_norm norm of wresid.
% y_est m x 1 the model estimates = eta(alpha, c, t(i,:)))
% Regression a structure containing diagnostics about the model fit.
% **************************************************
% * C a u t i o n: *
% * The theory that makes statistical *
% * diagnostics useful is derived for *
% * linear regression, with no upper- or *
% * lower-bounds on variables. *
% * The relevance of these quantities to our *
% * nonlinear model is determined by how well *
% * the linearized model (Taylor series model) *
% * eta(alpha_true, c_true) *
% * + Phi * (c - c_true) *
% * + dPhi * (alpha - alpha_true) *
% * fits the data in the neighborhood of the *
% * true values for alpha and c, where Phi *
% * and dPhi contain the partial derivatives *
% * of the model with respect to the c and *
% * alpha parameters, respectively, and are *
% * defined in ada. *
% **************************************************
% Regression.report:
% This structure includes information on the solution
% process, including the number of iterations,
% termination criterion, and exitflag from lsqnonlin.
% (Type 'help lsqnonlin' to see the exit conditions.)
% Regression.report.rank is the computed rank of the
% matrix for the linear subproblem. If this equals
% n, then the linear coefficients are well-determined.
% If it is less than n, then although the model might
% fit the data well, other linear coefficients might
% give just as good a fit.
% Regression.sigma:
% The estimate of the standard deviation is the
% weighted residual norm divided by the square root
% of the number of degrees of freedom.
% This is also called the "regression standard error"
% or the square-root of the weighted SSR (sum squared
% residual) divided by the square root of the
% number of degrees of freedom.
% Regression.RMS:
% The "residual mean square" is equal to sigma^2:
% RMS = wresid_norm^2 / (m-n+q)
% Regression.coef_determ:
% The "coefficient of determination" for the fit,
% also called the square of the multiple correlation
% coefficient, is sometimes called R^2.
% It is computed as 1 - wresid_norm^2/CTSS,
% where the "corrected total sum of squares"
% CTSS is the norm-squared of W*(y-y_bar),
% and the entries of y_bar are all equal to
% (the sum of W_i^2 y_i) divided by (the sum of W_i^2).
% A value of .95, for example, indicates that 95 per
% cent of the CTTS is accounted for by the fit.
%
% Regression.CovMx: (n+q) x (n+q)
% This is the estimated variance/covariance matrix for
% the parameters. The linear parameters c are ordered
% first, followed by the nonlinear parameters alpha.
% This is empty if dPhi is not computed by ada.
% Regression.CorMx: (n+q) x (n+q)
% This is the estimated correlation matrix for the
% parameters. The linear parameters c are ordered
% first, followed by the nonlinear parameters alpha.
% This is empty if dPhi is not computed by ada.
% Regression.std_param: (n+q) x 1
% This vector contains the estimate of the standard
% deviation for each parameter.
% The k-th element is the square root of the k-th main
% diagonal element of Regression.CovMx
% This is empty if dPhi is not computed by ada.
% Regression.t_ratio: (n+q) x 1
% The t-ratio for each parameter is equal to the
% parameter estimate divided by its standard deviation.
% (linear parameters c first, followed by alpha)
% This is empty if dPhi is not computed by ada.
% Regression.standardized_wresid:
% The k-th component of the "standardized weighted
% residual" is the k-th component of the weighted
% residual divided by its standard deviation.
% This is empty if dPhi is not computed by ada.
%
%---------------------------------------------------------------
% Specification of the function ada, which computes information
% related to Phi:
%
% function [Phi,dPhi,Ind] = ada(alpha)
%
% This function computes Phi and, if possible, dPhi.
%
% On Input:
%
% alpha q x 1 contains the current value of the alpha parameters.
%
% Note: If more input arguments are needed, use the standard
% Matlab syntax to accomplish this. For example, if
% the input arguments to ada are t, z, and alpha, then
% before calling varpro, initialize t and z, and in calling
% varpro, replace "@ada" by "@(alpha)ada(t,z,alpha)".
%
% On Output:
%
% Phi m x n1 where Phi(i,j) = phi_j(alpha,t_i).
% (n1 = n if there is no extra term;
% n1 = n+1 if an extra term is used)
% dPhi m x p where the columns contain partial derivative
% information for Phi and p is the number of
% columns in Ind
% (or dPhi = [] if derivatives are not available).
% Ind 2 x p Column k of dPhi contains the partial
% derivative of Phi_j with respect to alpha_i,
% evaluated at the current value of alpha,
% where j = Ind(1,k) and i = Ind(2,k).
% Columns of dPhi that are always zero, independent
% of alpha, need not be stored.
% Example: if phi_1 is a function of alpha_2 and alpha_3,
% and phi_2 is a function of alpha_1 and alpha_2, then
% we can set
% Ind = [ 1 1 2 2
% 2 3 1 2 ]
% In this case, the p=4 columns of dPhi contain
% d phi_1 / d alpha_2,
% d phi_1 / d alpha_3,
% d phi_2 / d alpha_1,
% d phi_2 / d alpha_2,
% evaluated at each t_i.
% There are no restrictions on how the columns of
% dPhi are ordered, as long as Ind correctly specifies
% the ordering.
%
% If derivatives dPhi are not available, then set dPhi = Ind = [].
%
%---------------------------------------------------------------
%
% Varpro calls lsqnonlin, which solves a constrained least squares
% problem with upper and lower bounds on the constraints. What
% distinguishes varpro from lsqnonlin is that, for efficiency and
% reliability, varpro causes lsqnonlin to only iterate on the
% nonlinear parameters. Given the information in Phi and dPhi, this
% requires an intricate but inexpensive computation of partial
% derivatives, and this is handled by the varpro function formJacobian.
%
% lsqnonlin is in Matlab's Optimization Toolbox. Another solver
% can be substituted if the toolbox is not available.
%
% Any parameters that require upper or lower bounds should be put in
% alpha, not c, even if they appear linearly in the model.
%
% The original Fortran implementation of the variable projection
% algorithm (ref. 2) was modified in 1977 by John Bolstad
% Computer Science Department, Serra House, Stanford University,
% using ideas of Linda Kaufman (ref. 5) to speed up the
% computation of derivatives. He also allowed weights on
% the observations, and computed the covariance matrix.
% Our Matlab version takes advantage of 30 years of improvements
% in programming languages and minimization algorithms.
% In this version, we also allow upper and lower bounds on the
% nonlinear parameters.
%
% It is hoped that this implementation will be of use to Matlab
% users, but also that its simplicity will make it easier for the
% algorithm to be implemented in other languages.
%
% This program is documented in
% Dianne P. O'Leary and Bert W. Rust,
% Variable Projection for Nonlinear Least Squares Problems,
% Computational Optimization and Applications (2012)
% doi 10.1007/s10589-012-9492-9.
%
% US National Inst. of Standards and Technology, 2010.
%
% Main reference:
%
% 0. Gene H. Golub and V. Pereyra, 'Separable nonlinear least
% squares: the variable projection method and its applications,'
% Inverse Problems 19, R1-R26 (2003).
%
% See also these papers, cited in John Bolstad's Fortran code:
%
% 1. Gene H. Golub and V. Pereyra, 'The differentiation of
% pseudo-inverses and nonlinear least squares problems whose
% variables separate,' SIAM J. Numer. Anal. 10, 413-432
% (1973).
% 2. ------, same title, Stanford C.S. Report 72-261, Feb. 1972.
% 3. Michael R. Osborne, 'Some aspects of non-linear least
% squares calculations,' in Lootsma, Ed., 'Numerical Methods
% for Non-Linear Optimization,' Academic Press, London, 1972.
% 4. Fred Krogh, 'Efficient implementation of a variable projection
% algorithm for nonlinear least squares problems,'
% Comm. ACM 17:3, 167-169 (March, 1974).
% 5. Linda Kaufman, 'A variable projection method for solving
% separable nonlinear least squares problems', B.I.T. 15,
% 49-57 (1975).
% 6. C. Lawson and R. Hanson, Solving Least Squares Problems,
% Prentice-Hall, Englewood Cliffs, N. J., 1974.
%
% These books discuss the statistical background:
%
% 7. David A. Belsley, Edwin Kuh, and Roy E. Welsch, Regression
% Diagnostics, John Wiley & Sons, New York, 1980, Chap. 2.
% 8. G.A.F. Seber and C.J. Wild, Nonlinear Regression,
% John Wiley & Sons, New York, 1989, Sec. 2.1, 5.1, and 5.2.
%
% Dianne P. O'Leary, NIST and University of Maryland, February 2011.
% Bert W. Rust, NIST February 2011.
% Comments updated 07-2012.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
global mydebug myneglect % test neglect of extra term in Jacobian
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Initialization: Check input, set default parameters and options.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[m,ell] = size(y); % m = number of observations
[m1,ell1] = size(w);
if (m1 ~= m) | (ell > 1) | (ell1 > 1)
error('y and w must be column vectors of the same length')
end
[q,ell] = size(alpha); % q = number of nonlinear parameters
if (ell > 1)
error('alpha must be a column vector containing initial guesses for nonlinear parameters')
end
if (nargin < 6)
lb = [];
else
[q1,ell] = size(lb);
if (q1 > 0) & (ell > 0)
if (q1 ~= q) | (ell > 1)
error('lb must be empty or a column vector of the same length as alpha')
end
end
end
if (nargin < 7)
ub = [];
else
[q1,ell] = size(ub);
if (q1 > 0) & (ell > 0)
if (q1 ~= q) | (ell > 1)
error('ub must be empty or a column vector of the same length as alpha')
end
end
end
if (nargin < 8)
options = optimset('lsqnonlin');
end
if (~strcmp(options.Display,'off'))
disp(sprintf('\n-------------------'))
disp(sprintf('VARPRO is beginning.'))
end
W = spdiags(w,0,m,m); % Create an m x m diagonal matrix from the vector w
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Make the first call to ada and do some error checking.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[Phi, dPhi, Ind] = feval(ada, alpha);
[m1,n1] = size(Phi); % n1 = number of basis functions Phi.
[m2,n2] = size(dPhi);
[ell,n3] = size(Ind);
if (m1 ~= m2) & (m2 > 0)
error('In user function ada: Phi and dPhi must have the same number of rows.')
end
if (n1 < n) | (n1 > n+1)
error('In user function ada: The number of columns in Phi must be n or n+1.')
end
if (n2 > 0) & (ell ~= 2)
error('In user function ada: Ind must have two rows.')
end
if (n2 > 0) & (n2 ~= n3)
error('In user function ada: dPhi and Ind must have the same number of columns.')
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Solve the least squares problem using lsqnonlin or, if there
% are no nonlinear parameters, using the SVD procedure in formJacobian.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (q > 0) % The problem is nonlinear.
if ~isempty(dPhi)
options = optimset(options,'Jacobian','on');
end
[alpha, wresid_norm2, wresid, exitflag,output] = ...
lsqnonlin(@f_lsq, alpha, lb, ub, options);
[r, Jacobian, Phi, dPhi, y_est, rank] = f_lsq(alpha);
wresid_norm = sqrt(wresid_norm2);
Regression.report = output;
Regression.report.rank = rank;
Regression.report.exitflag = exitflag;
else % The problem is linear.
if (~strcmp(options.Display,'off'))
disp(sprintf('VARPRO problem is linear, since length(alpha)=0.'))
end
[Jacobian, c, wresid, y_est, Regression.report.rank] = ...
formJacobian(alpha, Phi, dPhi);
wresid_norm = norm(wresid);
wresid_norm2 = wresid_norm * wresid_norm;
end % if q > 0
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Compute some statistical diagnostics for the solution.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Calculate sample variance, the norm-squared of the residual
% divided by the number of degrees of freedom.
sigma2 = wresid_norm2 / (m-n-q);
% Compute Regression.sigma:
% square-root of weighted residual norm squared divided
% by number of degrees of freedom.
Regression.sigma = sqrt(sigma2);
% Compute Regression.coef_determ:
% The coeficient of determination for the fit,
% also called the square of the multiple correlation
% coefficient, or R^2.
% It is computed as 1 - wresid_norm^2/CTSS,
% where the "corrected total sum of squares"
% CTSS is the norm-squared of W*(y-y_bar),
% and the entries of y_bar are all equal to
% (the sum of W_i y_i) divided by (the sum of W_i).
y_bar = sum(w.*y) / sum(w);
CTTS = norm(W * (y - y_bar)) ^2;
Regression.coef_determ = 1 - wresid_norm^2 / CTTS;
% Compute Regression.RMS = sigma^2:
% the weighted residual norm divided by the number of
% degrees of freedom.
% RMS = wresid_norm / sqrt(m-n+q)
Regression.RMS = sigma2;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Compute some additional statistical diagnostics for the
% solution, if the user requested it.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (nargout==6)
if (isempty(dPhi))
Regression.CovMx = [];
Regression.CorMx = [];
Regression.std_param = [];
Regression.t_ratio = [];
Regression.standardized_wresid = [];
else
% Calculate the covariance matrix CovMx, which is sigma^2 times the
% inverse of H'*H, where
% H = W*[Phi,J]
% contains the partial derivatives of wresid with
% respect to the parameters in alpha and c.
[xx,pp] = size(dPhi);
J = zeros(m,q);
for kk = 1:pp,
j = Ind(1,kk);
i = Ind(2,kk);
if (j > n)
J(:,i) = J(:,i) + dPhi(:,kk);
else
J(:,i) = J(:,i) + c(j) * dPhi(:,kk);
end
end
[Qj,Rj,Pj] = qr(W*[Phi(:,1:n),J], 0); % Uses compact pivoted QR.
T2 = Rj \ (eye(size(Rj,1)));
CovMx = sigma2 * T2 * T2';
Regression.CovMx(Pj,Pj) = CovMx; % Undo the pivoting permutation.
% Compute Regression.CorMx:
% estimated correlation matrix (n+q) x (n+q) for the
% parameters. The linear parameters are ordered
% first, followed by the nonlinear parameters.
d = 1 ./ sqrt(diag(Regression.CovMx));
D = spdiags(d,0,n+q,n+q);
Regression.CorMx = D * Regression.CovMx * D;
% Compute Regression.std_param:
% The k-th element is the square root of the k-th main
% diagonal element of CovMx.
Regression.std_param = sqrt(diag(Regression.CovMx));
% Compute Regression.t_ratio:
% parameter estimates divided by their standard deviations.
alpha = reshape(alpha, q, 1);
Regression.t_ratio = [c; alpha] .* d;
% Compute Regression.standardized_wresid:
% The k-th component is the k-th component of the
% weighted residual, divided by its standard deviation.
% Let X = W*[Phi,J],
% h(k) = k-th main diagonal element of covariance
% matrix for wresid
% = k-th main diagonal element of X*inv(X'*X)*X'
% = k-th main diagonal element of Qj*Qj'.
% Then the standard deviation is estimated by
% sigma*sqrt(1-h(k)).
for k=1:m
temp(k,1) = Qj(k,:) * Qj(k,:)';
end
Regression.standardized_wresid = wresid ./(Regression.sigma*sqrt(1-temp));
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% End of statistical diagnostics computations.
% Print some final information if desired.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (~strcmp(options.Display,'off'))
disp(sprintf(' '))
disp(sprintf('VARPRO Results:'))
disp(sprintf(' Linear Parameters:'))
disp(sprintf(' %15.7e ',c))
disp(sprintf(' Nonlinear Parameters:'))
disp(sprintf(' %15.7e ',alpha))
disp(sprintf(' '))
disp(sprintf(' Norm-squared of weighted residual = %15.7e',wresid_norm2))
disp(sprintf(' Norm-squared of data vector = %15.7e',norm(w.*y)^2))
disp(sprintf(' Norm of weighted residual = %15.7e',wresid_norm))
disp(sprintf(' Norm of data vector = %15.7e',norm(w.*y)))
disp(sprintf(' Expected error of observations = %15.7e',Regression.sigma))
disp(sprintf(' Coefficient of determination = %15.7e',Regression.coef_determ))
disp(sprintf('VARPRO is finished.'))
disp(sprintf('-------------------\n'))
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% The computation is now completed.
%
% varpro uses the following two functions, f_lsq and formJacobian.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%--------------------------- Beginning of f_lsq --------------------------
function [wr_trial, Jacobian, Phi_trial, dPhi_trial, y_est,myrank] = ...
f_lsq(alpha_trial)
% function [wr_trial,Jacobian,Phi_trial,dPhi_trial,y_est,myrank] = ...
% f_lsq(alpha_trial)
%
% This function is used by lsqnonlin to compute
% wr_trial the current weighted residual
% Jacobian the Jacobian matrix for the nonlinear parameters
% It also computes
% Phi_trial the current Phi matrix
% dPhi_trial the partial derivatives of Phi_trial (if available).
% y_est the model estimates of y
% myrank the rank of the matrix W*Phi in the linear problem.
%
% It uses the user-supplied function ada and the Varpro function formJacobian.
[Phi_trial, dPhi_trial, Ind] = feval(ada, alpha_trial);
[Jacobian, c, wr_trial, y_est,myrank] = ...
formJacobian(alpha_trial, Phi_trial, dPhi_trial);
end %--------------------------- End of f_lsq ---------------------------
%----------------------- Beginning of formJacobian ----------------------
function [Jacobian, c, wresid, y_est,myrank] = formJacobian(alpha, Phi, dPhi)
% function [Jacobian, c, wresid, y_est,myrank] = formJacobian(alpha, Phi, dPhi)
%
% This function computes the Jacobian, the linear parameters c,
% and the residual for the model with nonlinear parameters alpha.
% It is used by the functions Varpro and f_lsq.
%
% Notation: there are m data observations
% n1 basis functions (columns of Phi)
% n linear parameters c
% (n = n1, or n = n1 - 1)
% q nonlinear parameters alpha
% p nonzero columns of partial derivatives of Phi
%
% Input:
%
% alpha q x 1 the nonlinear parameters,
% Phi m x n1 the basis functions Phi(alpha),
% dPhi m x p the partial derivatives of Phi
%
% The variables W, y, q, m, n1, and n are also used.
%
% Output:
%
% Jacobian m x p the Jacobian matrix, with J(i,k) = partial
% derivative of W resid(i) with respect to alpha(k).
% c n x 1 the optimal linear parameters for this choice of alpha.
% wresid m x 1 the weighted residual = W(y - Phi * c)
% y_est m x 1 the model estimates = Phi * c)
% myrank 1 x 1 the rank of the matrix W*Phi.
% First we determine the optimal linear parameters c for
% the given values of alpha, and the resulting residual.
%
% We use the singular value decomposition to solve the linear least
% squares problem
%
% min_{c} || W resid ||.
% resid = y - Phi * c.
%
% If W*Phi has any singular value less than m * its largest singular value,
% these singular values are set to zero.
[U,S,V] = svd(W*Phi(:,1:n));
% Three cases: Usually n > 1, but n = 1 and n = 0 require
% special handling (one or no linear parameters).
if (n > 1)
s = diag(S);
elseif (n==1)
s = S;
else % n = 0
if isempty(Ind)
Jacobian = [];
else
Jacobian = zeros(length(y),length(alpha));
Jacobian(:,Ind(2,:)) = -W*dPhi;
end
c = [];
y_est = Phi;
wresid = W * (y - y_est);
myrank = 1;
return
end
tol = m * eps;
myrank = sum(s > tol*s(1) ); % number of singular values > tol*norm(W*Phi)
s = s(1:myrank);
if (myrank < n) & (~strcmp(options.Display,'off'))
disp(sprintf('Warning from VARPRO:'))
disp(sprintf(' The linear parameters are currently not well-determined.'))
disp(sprintf(' The rank of the matrix in the subproblem is %d',myrank))
disp(sprintf(' which is less than the n=%d linear parameters.',n))
end
yuse = y;
if (n < n1)
yuse = y - Phi(:,n1); % extra function Phi(:,n+1)
end
temp = U(:,1:myrank)' * (W*yuse);
c = V(:,1:myrank) * (temp./s);
y_est = Phi(:,1:n) * c;
wresid = W * (yuse - y_est);
if (n < n1)
y_est = y_est + Phi(:,n1);
end
if (q == 0) | (isempty(dPhi))
Jacobian = [];
return
end
% Second, we compute the Jacobian.
% There are two pieces, which we call Jac1 and Jac2,
% with Jacobian = - (Jac1 + Jac2).
%
% The formula for Jac1 is (P D(W*Phi) pinv(W*Phi) y,
% and Jac2 is ((W*Phi)^+})^T (P D(W*Phi))^T y.
% where P is the projection onto the orthogonal complement
% of the range of W*Phi,
% D(W*Phi) is the m x n x q tensor of derivatives of W*Phi,
% pinv(W*Phi) is the pseudo-inverse of W*Phi.
% (See Golub&Pereyra (1973) equation (5.4). We use their notational
% conventions for multiplications by tensors.)
%
% Golub&Pereyra (2003), p. R5 break these formulas down by columns:
% The j-th column of Jac1 is P D_j pinv(W*Phi) y
% = P D_j c
% and the j-th column of Jac2 is (P D_j pinv(W*Phi))^T y,
% = (pinv(W*Phi))^T D_j^T P^T y
% = (pinv(W*Phi))^T D_j^T wresid.
% where D_j is the m x n matrix of partial derivatives of W*Phi
% with respect to alpha(j).
% We begin the computation by precomputing
% WdPhi, which contains the derivatives of W*Phi, and
% WdPhi_r, which contains WdPhi' * wresid.
WdPhi = W * dPhi;
WdPhi_r = WdPhi' * wresid;
T2 = zeros(n1, q);
ctemp = c;
if (n1 > n)
ctemp = [ctemp; 1];
end
% Now we work column-by-column, for j=1:q.
%
% We form Jac1 = D(W*Phi) ctemp.
% After the loop, this matrix is multiplied by
% P = U(:,myrank+1:m)*(U(:,myrank+1:m)'
% to complete the computation.
%
% We also form T2 = (D_j(W*Phi))^T wresid by unpacking
% the information in WdPhi_r, using Ind.
% After the loop, T2 is multiplied by the pseudoinverse
% (pinv(W*Phi))^T = U(:,1:myrank) * diag(1./s) * (V(:,1:myrank)'
% to complete the computation of Jac2.
% Note: if n1 > n, last row of T2 is not needed.
for j=1:q, % for each nonlinear parameter alpha(j)
range = find(Ind(2,:)==j); % columns of WdPhi relevant to alpha(j)
indrows = Ind(1,range); % relevant rows of ctemp
Jac1(:,j) = WdPhi(:,range) * ctemp(indrows);
T2(indrows,j) = WdPhi_r(range);
end
Jac1 = U(:,myrank+1:m) * (U(:,myrank+1:m)' * Jac1);
T2 = diag(1 ./ s(1:myrank)) * (V(:,1:myrank)' * T2(1:n,:));
Jac2 = U(:,1:myrank) * T2;
Jacobian = -(Jac1 + Jac2);
if (mydebug)
disp(sprintf('VARPRO norm(neglected Jacobian)/norm(Jacobian) = %e',...
norm(Jac2,'fro')/norm(Jacobian,'fro') ))
if (myneglect)
disp('neglecting')
Jacobian = -Jac1;
end
end
end %-------------------------- End of formJacobian ----------------------
end %------------------------------ End of varpro ------------------------