00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_LASV2_HEADER 00036 #define TEMPLATE_LAPACK_LASV2_HEADER 00037 00038 00039 template<class Treal> 00040 int template_lapack_lasv2(const Treal *f, const Treal *g, const Treal *h__, 00041 Treal *ssmin, Treal *ssmax, Treal *snr, Treal * 00042 csr, Treal *snl, Treal *csl) 00043 { 00044 /* -- LAPACK auxiliary routine (version 3.0) -- 00045 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00046 Courant Institute, Argonne National Lab, and Rice University 00047 October 31, 1992 00048 00049 00050 Purpose 00051 ======= 00052 00053 DLASV2 computes the singular value decomposition of a 2-by-2 00054 triangular matrix 00055 [ F G ] 00056 [ 0 H ]. 00057 On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the 00058 smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and 00059 right singular vectors for abs(SSMAX), giving the decomposition 00060 00061 [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] 00062 [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. 00063 00064 Arguments 00065 ========= 00066 00067 F (input) DOUBLE PRECISION 00068 The (1,1) element of the 2-by-2 matrix. 00069 00070 G (input) DOUBLE PRECISION 00071 The (1,2) element of the 2-by-2 matrix. 00072 00073 H (input) DOUBLE PRECISION 00074 The (2,2) element of the 2-by-2 matrix. 00075 00076 SSMIN (output) DOUBLE PRECISION 00077 abs(SSMIN) is the smaller singular value. 00078 00079 SSMAX (output) DOUBLE PRECISION 00080 abs(SSMAX) is the larger singular value. 00081 00082 SNL (output) DOUBLE PRECISION 00083 CSL (output) DOUBLE PRECISION 00084 The vector (CSL, SNL) is a unit left singular vector for the 00085 singular value abs(SSMAX). 00086 00087 SNR (output) DOUBLE PRECISION 00088 CSR (output) DOUBLE PRECISION 00089 The vector (CSR, SNR) is a unit right singular vector for the 00090 singular value abs(SSMAX). 00091 00092 Further Details 00093 =============== 00094 00095 Any input parameter may be aliased with any output parameter. 00096 00097 Barring over/underflow and assuming a guard digit in subtraction, all 00098 output quantities are correct to within a few units in the last 00099 place (ulps). 00100 00101 In IEEE arithmetic, the code works correctly if one matrix element is 00102 infinite. 00103 00104 Overflow will not occur unless the largest singular value itself 00105 overflows or is within a few ulps of overflow. (On machines with 00106 partial overflow, like the Cray, overflow may occur if the largest 00107 singular value is within a factor of 2 of overflow.) 00108 00109 Underflow is harmless if underflow is gradual. Otherwise, results 00110 may correspond to a matrix modified by perturbations of size near 00111 the underflow threshold. 00112 00113 ===================================================================== */ 00114 /* Table of constant values */ 00115 Treal c_b3 = 2.; 00116 Treal c_b4 = 1.; 00117 00118 /* System generated locals */ 00119 Treal d__1; 00120 /* Local variables */ 00121 integer pmax; 00122 Treal temp; 00123 logical swap; 00124 Treal a, d__, l, m, r__, s, t, tsign, fa, ga, ha; 00125 Treal ft, gt, ht, mm; 00126 logical gasmal; 00127 Treal tt, clt, crt, slt, srt; 00128 00129 /* Initialization added by Elias to get rid of compiler warnings. */ 00130 tsign = 0; 00131 00132 00133 ft = *f; 00134 fa = absMACRO(ft); 00135 ht = *h__; 00136 ha = absMACRO(*h__); 00137 00138 /* PMAX points to the maximum absolute element of matrix 00139 PMAX = 1 if F largest in absolute values 00140 PMAX = 2 if G largest in absolute values 00141 PMAX = 3 if H largest in absolute values */ 00142 00143 pmax = 1; 00144 swap = ha > fa; 00145 if (swap) { 00146 pmax = 3; 00147 temp = ft; 00148 ft = ht; 00149 ht = temp; 00150 temp = fa; 00151 fa = ha; 00152 ha = temp; 00153 00154 /* Now FA .ge. HA */ 00155 00156 } 00157 gt = *g; 00158 ga = absMACRO(gt); 00159 if (ga == 0.) { 00160 00161 /* Diagonal matrix */ 00162 00163 *ssmin = ha; 00164 *ssmax = fa; 00165 clt = 1.; 00166 crt = 1.; 00167 slt = 0.; 00168 srt = 0.; 00169 } else { 00170 gasmal = TRUE_; 00171 if (ga > fa) { 00172 pmax = 2; 00173 if (fa / ga < template_lapack_lamch("EPS", (Treal)0)) { 00174 00175 /* Case of very large GA */ 00176 00177 gasmal = FALSE_; 00178 *ssmax = ga; 00179 if (ha > 1.) { 00180 *ssmin = fa / (ga / ha); 00181 } else { 00182 *ssmin = fa / ga * ha; 00183 } 00184 clt = 1.; 00185 slt = ht / gt; 00186 srt = 1.; 00187 crt = ft / gt; 00188 } 00189 } 00190 if (gasmal) { 00191 00192 /* Normal case */ 00193 00194 d__ = fa - ha; 00195 if (d__ == fa) { 00196 00197 /* Copes with infinite F or H */ 00198 00199 l = 1.; 00200 } else { 00201 l = d__ / fa; 00202 } 00203 00204 /* Note that 0 .le. L .le. 1 */ 00205 00206 m = gt / ft; 00207 00208 /* Note that abs(M) .le. 1/macheps */ 00209 00210 t = 2. - l; 00211 00212 /* Note that T .ge. 1 */ 00213 00214 mm = m * m; 00215 tt = t * t; 00216 s = template_blas_sqrt(tt + mm); 00217 00218 /* Note that 1 .le. S .le. 1 + 1/macheps */ 00219 00220 if (l == 0.) { 00221 r__ = absMACRO(m); 00222 } else { 00223 r__ = template_blas_sqrt(l * l + mm); 00224 } 00225 00226 /* Note that 0 .le. R .le. 1 + 1/macheps */ 00227 00228 a = (s + r__) * .5; 00229 00230 /* Note that 1 .le. A .le. 1 + abs(M) */ 00231 00232 *ssmin = ha / a; 00233 *ssmax = fa * a; 00234 if (mm == 0.) { 00235 00236 /* Note that M is very tiny */ 00237 00238 if (l == 0.) { 00239 t = template_lapack_d_sign(&c_b3, &ft) * template_lapack_d_sign(&c_b4, >); 00240 } else { 00241 t = gt / template_lapack_d_sign(&d__, &ft) + m / t; 00242 } 00243 } else { 00244 t = (m / (s + t) + m / (r__ + l)) * (a + 1.); 00245 } 00246 l = template_blas_sqrt(t * t + 4.); 00247 crt = 2. / l; 00248 srt = t / l; 00249 clt = (crt + srt * m) / a; 00250 slt = ht / ft * srt / a; 00251 } 00252 } 00253 if (swap) { 00254 *csl = srt; 00255 *snl = crt; 00256 *csr = slt; 00257 *snr = clt; 00258 } else { 00259 *csl = clt; 00260 *snl = slt; 00261 *csr = crt; 00262 *snr = srt; 00263 } 00264 00265 /* Correct signs of SSMAX and SSMIN */ 00266 00267 if (pmax == 1) { 00268 tsign = template_lapack_d_sign(&c_b4, csr) * template_lapack_d_sign(&c_b4, csl) * template_lapack_d_sign(&c_b4, f); 00269 } 00270 if (pmax == 2) { 00271 tsign = template_lapack_d_sign(&c_b4, snr) * template_lapack_d_sign(&c_b4, csl) * template_lapack_d_sign(&c_b4, g); 00272 } 00273 if (pmax == 3) { 00274 tsign = template_lapack_d_sign(&c_b4, snr) * template_lapack_d_sign(&c_b4, snl) * template_lapack_d_sign(&c_b4, h__); 00275 } 00276 *ssmax = template_lapack_d_sign(ssmax, &tsign); 00277 d__1 = tsign * template_lapack_d_sign(&c_b4, f) * template_lapack_d_sign(&c_b4, h__); 00278 *ssmin = template_lapack_d_sign(ssmin, &d__1); 00279 return 0; 00280 00281 /* End of DLASV2 */ 00282 00283 } /* dlasv2_ */ 00284 00285 #endif