00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_LANEG_HEADER 00036 #define TEMPLATE_LAPACK_LANEG_HEADER 00037 00038 template<class Treal> 00039 int template_lapack_laneg(integer *n, Treal *d__, Treal *lld, Treal * 00040 sigma, Treal *pivmin, integer *r__) 00041 { 00042 /* System generated locals */ 00043 integer ret_val, i__1, i__2, i__3, i__4; 00044 00045 /* Local variables */ 00046 integer j; 00047 Treal p, t; 00048 integer bj; 00049 Treal tmp; 00050 integer neg1, neg2; 00051 Treal bsav, gamma, dplus; 00052 integer negcnt; 00053 logical sawnan; 00054 Treal dminus; 00055 00056 00057 /* -- LAPACK auxiliary routine (version 3.2) -- */ 00058 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ 00059 /* November 2006 */ 00060 00061 /* .. Scalar Arguments .. */ 00062 /* .. */ 00063 /* .. Array Arguments .. */ 00064 /* .. */ 00065 00066 /* Purpose */ 00067 /* ======= */ 00068 00069 /* DLANEG computes the Sturm count, the number of negative pivots */ 00070 /* encountered while factoring tridiagonal T - sigma I = L D L^T. */ 00071 /* This implementation works directly on the factors without forming */ 00072 /* the tridiagonal matrix T. The Sturm count is also the number of */ 00073 /* eigenvalues of T less than sigma. */ 00074 00075 /* This routine is called from DLARRB. */ 00076 00077 /* The current routine does not use the PIVMIN parameter but rather */ 00078 /* requires IEEE-754 propagation of Infinities and NaNs. This */ 00079 /* routine also has no input range restrictions but does require */ 00080 /* default exception handling such that x/0 produces Inf when x is */ 00081 /* non-zero, and Inf/Inf produces NaN. For more information, see: */ 00082 00083 /* Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in */ 00084 /* Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on */ 00085 /* Scientific Computing, v28, n5, 2006. DOI 10.1137/050641624 */ 00086 /* (Tech report version in LAWN 172 with the same title.) */ 00087 00088 /* Arguments */ 00089 /* ========= */ 00090 00091 /* N (input) INTEGER */ 00092 /* The order of the matrix. */ 00093 00094 /* D (input) DOUBLE PRECISION array, dimension (N) */ 00095 /* The N diagonal elements of the diagonal matrix D. */ 00096 00097 /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */ 00098 /* The (N-1) elements L(i)*L(i)*D(i). */ 00099 00100 /* SIGMA (input) DOUBLE PRECISION */ 00101 /* Shift amount in T - sigma I = L D L^T. */ 00102 00103 /* PIVMIN (input) DOUBLE PRECISION */ 00104 /* The minimum pivot in the Sturm sequence. May be used */ 00105 /* when zero pivots are encountered on non-IEEE-754 */ 00106 /* architectures. */ 00107 00108 /* R (input) INTEGER */ 00109 /* The twist index for the twisted factorization that is used */ 00110 /* for the negcount. */ 00111 00112 /* Further Details */ 00113 /* =============== */ 00114 00115 /* Based on contributions by */ 00116 /* Osni Marques, LBNL/NERSC, USA */ 00117 /* Christof Voemel, University of California, Berkeley, USA */ 00118 /* Jason Riedy, University of California, Berkeley, USA */ 00119 00120 /* ===================================================================== */ 00121 00122 /* .. Parameters .. */ 00123 /* Some architectures propagate Infinities and NaNs very slowly, so */ 00124 /* the code computes counts in BLKLEN chunks. Then a NaN can */ 00125 /* propagate at most BLKLEN columns before being detected. This is */ 00126 /* not a general tuning parameter; it needs only to be just large */ 00127 /* enough that the overhead is tiny in common cases. */ 00128 /* .. */ 00129 /* .. Local Scalars .. */ 00130 /* .. */ 00131 /* .. Intrinsic Functions .. */ 00132 /* .. */ 00133 /* .. External Functions .. */ 00134 /* .. */ 00135 /* .. Executable Statements .. */ 00136 /* Parameter adjustments */ 00137 --lld; 00138 --d__; 00139 00140 /* Function Body */ 00141 negcnt = 0; 00142 /* I) upper part: L D L^T - SIGMA I = L+ D+ L+^T */ 00143 t = -(*sigma); 00144 i__1 = *r__ - 1; 00145 for (bj = 1; bj <= i__1; bj += 128) { 00146 neg1 = 0; 00147 bsav = t; 00148 /* Computing MIN */ 00149 i__3 = bj + 127, i__4 = *r__ - 1; 00150 i__2 = minMACRO(i__3,i__4); 00151 for (j = bj; j <= i__2; ++j) { 00152 dplus = d__[j] + t; 00153 if (dplus < 0.) { 00154 ++neg1; 00155 } 00156 tmp = t / dplus; 00157 t = tmp * lld[j] - *sigma; 00158 /* L21: */ 00159 } 00160 sawnan = template_lapack_isnan(&t); 00161 /* Run a slower version of the above loop if a NaN is detected. */ 00162 /* A NaN should occur only with a zero pivot after an infinite */ 00163 /* pivot. In that case, substituting 1 for T/DPLUS is the */ 00164 /* correct limit. */ 00165 if (sawnan) { 00166 neg1 = 0; 00167 t = bsav; 00168 /* Computing MIN */ 00169 i__3 = bj + 127, i__4 = *r__ - 1; 00170 i__2 = minMACRO(i__3,i__4); 00171 for (j = bj; j <= i__2; ++j) { 00172 dplus = d__[j] + t; 00173 if (dplus < 0.) { 00174 ++neg1; 00175 } 00176 tmp = t / dplus; 00177 if (template_lapack_isnan(&tmp)) { 00178 tmp = 1.; 00179 } 00180 t = tmp * lld[j] - *sigma; 00181 /* L22: */ 00182 } 00183 } 00184 negcnt += neg1; 00185 /* L210: */ 00186 } 00187 00188 /* II) lower part: L D L^T - SIGMA I = U- D- U-^T */ 00189 p = d__[*n] - *sigma; 00190 i__1 = *r__; 00191 for (bj = *n - 1; bj >= i__1; bj += -128) { 00192 neg2 = 0; 00193 bsav = p; 00194 /* Computing MAX */ 00195 i__3 = bj - 127; 00196 i__2 = maxMACRO(i__3,*r__); 00197 for (j = bj; j >= i__2; --j) { 00198 dminus = lld[j] + p; 00199 if (dminus < 0.) { 00200 ++neg2; 00201 } 00202 tmp = p / dminus; 00203 p = tmp * d__[j] - *sigma; 00204 /* L23: */ 00205 } 00206 sawnan = template_lapack_isnan(&p); 00207 /* As above, run a slower version that substitutes 1 for Inf/Inf. */ 00208 00209 if (sawnan) { 00210 neg2 = 0; 00211 p = bsav; 00212 /* Computing MAX */ 00213 i__3 = bj - 127; 00214 i__2 = maxMACRO(i__3,*r__); 00215 for (j = bj; j >= i__2; --j) { 00216 dminus = lld[j] + p; 00217 if (dminus < 0.) { 00218 ++neg2; 00219 } 00220 tmp = p / dminus; 00221 if (template_lapack_isnan(&tmp)) { 00222 tmp = 1.; 00223 } 00224 p = tmp * d__[j] - *sigma; 00225 /* L24: */ 00226 } 00227 } 00228 negcnt += neg2; 00229 /* L230: */ 00230 } 00231 00232 /* III) Twist index */ 00233 /* T was shifted by SIGMA initially. */ 00234 gamma = t + *sigma + p; 00235 if (gamma < 0.) { 00236 ++negcnt; 00237 } 00238 ret_val = negcnt; 00239 return ret_val; 00240 } /* dlaneg_ */ 00241 00242 #endif