// =========================================================================== // // Filename: bicgstab.h // // Description: // // Version: 0.0 // Created: 10/27/2009 03:15:06 PM // Revision: none // Compiler: g++ (c++) // // Author: Trevor Irons (ti) // // Organisation: Colorado School of Mines (CSM) // United States Geological Survey (USGS) // // Email: tirons@mines.edu, tirons@usgs.gov // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . // // =========================================================================== #include #include #ifdef CHOLMODPRECONDITION #include #endif // CHOLMODPRECONDITION //#include "unsupported/Eigen/IterativeSolvers" //#include #include #include #include #include #include "lemma.h" #include "timer.h" using namespace Eigen; using namespace Lemma; //typedef Eigen::VectorXcd VectorXcr; typedef Eigen::SparseMatrix SparseMat; // On Input // A = Matrix // B = Right hand side // X = initial guess, and solution // maxit = maximum Number of iterations // tol = error tolerance // On Output // X real solution vector // errorn = Real error norm int bicgstab(const SparseMat &A, const SparseMat &M, const VectorXcr &b, VectorXcr &x, int &max_it, Real &tol, Real &errorn, int &iter_done, const bool& banner = true) { Complex omega, rho, rho_1, alpha, beta; Real bnrm2, eps, errmin; int n, iter; //, istat; // Determine size of system and init vectors n = x.size(); VectorXcr r(n); VectorXcr r_tld(n); VectorXcr p(n); VectorXcr v(n); VectorXcr p_hat(n); VectorXcr s(n); VectorXcr s_hat(n); VectorXcr t(n); VectorXcr xmin(n); if (banner) { std::cout << "Start BiCGStab, memory needed: " << (sizeof(Complex)*(9+2)*n/(1024.*1024*1024)) << " [Gb]\n"; } // Initialise iter_done = 0; v.setConstant(0.); // not necessary I don't think t.setConstant(0.); eps = 1e-100; bnrm2 = b.norm(); if (bnrm2 == 0) { x.setConstant(0.0); errorn = 0; std::cerr << "Trivial case of Ax = b, where b is 0\n"; return (0); } // If there is an initial guess if ( x.norm() ) { r = b - A.selfadjointView()*x; //r = b - A*x; } else { r = b; } errorn = r.norm() / bnrm2; omega = 1.; r_tld = r; errmin = 1e30; // Get down to business for (iter=0; iter 0) { beta = (rho/rho_1) * (alpha/omega); p = r.array() + beta*(p.array()-omega*v.array()).array(); } else { p = r; } // Use pseudo inverse to get approximate answer //#pragma omp sections p_hat = M*p; //v = A*p_hat; // TODO double check v = A.selfadjointView()*p_hat; // TODO double check alpha = rho / r_tld.dot(v); s = r.array() - alpha*v.array(); errorn = s.norm()/bnrm2; if (errorn < tol && iter > 1) { x.array() += alpha*p_hat.array(); return (0); } s_hat = M*s; t = A.selfadjointView()*s_hat; //t = A*s_hat; omega = t.dot(s) / t.dot(t); x.array() += alpha*p_hat.array() + omega*s_hat.array(); r = s.array() - omega*t.array(); errorn = r.norm() / bnrm2; iter_done = iter; if (errorn < errmin) { // remember the model with the smallest norm errmin = errorn; xmin = x; } if ( errorn <= tol ) return (0); if ( abs(omega) < eps ) return (0); rho_1 = rho; } return (0); } template bool preconditionedBiCGStab(const SparseMat &A, const Preconditioner &M, const Ref< VectorXcr const > b, Ref x, const int &max_it, const Real &tol, Real &errorn, int &iter_done) { Complex omega, rho, rho_1, alpha, beta; Real bnrm2, eps; int n, iter; Real tol2 = tol*tol; // Determine size of system and init vectors n = x.size(); VectorXcd r(n); VectorXcd r_tld(n); VectorXcd p(n); VectorXcd s(n); VectorXcd s_hat(n); VectorXcd p_hat(n); VectorXcd v = VectorXcr::Zero(n); VectorXcd t = VectorXcr::Zero(n); //std::cout << "Start BiCGStab, memory needed: " // << (sizeof(Complex)*(8+2)*n/(1024.*1024)) / (1024.) << " [Gb]\n"; // Initialise iter_done = 0; eps = 1e-100; bnrm2 = b.squaredNorm(); if (bnrm2 == 0) { x.setConstant(0.0); errorn = 0; std::cerr << "Trivial case of Ax = b, where b is 0\n"; return (false); } // If there is an initial guess if ( x.squaredNorm() ) { r = b - A.selfadjointView()*x; } else { r = b; } errorn = r.squaredNorm() / bnrm2; omega = 1.; r_tld = r; // Get down to business for (iter=0; iter 0) { beta = (rho/rho_1) * (alpha/omega); p = r + beta*(p-omega*v); } else { p = r; } p_hat = M.solve(p); v.noalias() = A.selfadjointView()*p_hat; alpha = rho / r_tld.dot(v); s = r - alpha*v; errorn = s.squaredNorm()/bnrm2; if (errorn < tol2 && iter > 1) { x = x + alpha*p_hat; errorn = std::sqrt(errorn); return (true); } s_hat = M.solve(s); t.noalias() = A.selfadjointView()*s_hat; omega = t.dot(s) / t.dot(t); x += alpha*p_hat + omega*s_hat; r = s - omega*t; errorn = r.squaredNorm() / bnrm2; iter_done = iter; if ( errorn <= tol2 || abs(omega) < eps) { errorn = std::sqrt(errorn); return (true); } rho_1 = rho; } return (false); } template bool preconditionedSCBiCG(const SparseMat &A, const Preconditioner &M, const Ref< VectorXcr const > b, Ref x, const int &max_iter, const Real &tol, Real &errorn, int &iter_done) { Real resid; VectorXcr p, z, q; Complex alpha, beta, rho, rho_1; Real normb = b.norm( ); VectorXcr r = b - A*x; if (normb == 0.0) normb = 1; if ((resid = r.norm( ) / normb) <= tol) { errorn = resid; iter_done = 0; return 0; } for (int i = 1; i <= max_iter; i++) { z = M.solve(r); rho = r.dot(z); if (i == 1) p = z; else { beta = rho / rho_1; p = z + beta * p; } q = A*p; alpha = rho / p.dot(q); x += alpha * p; r -= alpha * q; std::cout << "resid\t" << resid << std::endl; if ((resid = r.norm( ) / normb) <= tol) { errorn = resid; iter_done = i; return 0; } rho_1 = rho; } errorn = resid; return (false); } /** \internal Low-level conjugate gradient algorithm * \param mat The matrix A * \param rhs The right hand side vector b * \param x On input and initial solution, on output the computed solution. * \param precond A preconditioner being able to efficiently solve for an * approximation of Ax=b (regardless of b) * \param iters On input the max number of iteration, on output the number of performed iterations. * \param tol_error On input the tolerance error, on output an estimation of the relative error. */ template EIGEN_DONT_INLINE void conjugateGradient(const SparseMat& mat, const Rhs& rhs, Dest& x, const Preconditioner& precond, int& iters, typename Dest::RealScalar& tol_error) { using std::sqrt; using std::abs; typedef typename Dest::RealScalar RealScalar; typedef typename Dest::Scalar Scalar; typedef Matrix VectorType; RealScalar tol = tol_error; int maxIters = iters; int n = mat.cols(); VectorType residual = rhs - mat.selfadjointView() * x; //initial residual RealScalar rhsNorm2 = rhs.squaredNorm(); if(rhsNorm2 == 0) { x.setZero(); iters = 0; tol_error = 0; return; } RealScalar threshold = tol*tol*rhsNorm2; RealScalar residualNorm2 = residual.squaredNorm(); if (residualNorm2 < threshold) { iters = 0; tol_error = sqrt(residualNorm2 / rhsNorm2); return; } VectorType p(n); p = precond.solve(residual); //initial search direction VectorType z(n), tmp(n); RealScalar absNew = numext::real(residual.dot(p)); // the square of the absolute value of r scaled by invM int i = 0; while(i < maxIters) { tmp.noalias() = mat.selfadjointView() * p; // the bottleneck of the algorithm Scalar alpha = absNew / p.dot(tmp); // the amount we travel on dir x += alpha * p; // update solution residual -= alpha * tmp; // update residue residualNorm2 = residual.squaredNorm(); if(residualNorm2 < threshold) break; z = precond.solve(residual); // approximately solve for "A z = residual" RealScalar absOld = absNew; absNew = numext::real(residual.dot(z)); // update the absolute value of r RealScalar beta = absNew / absOld; // calculate the Gram-Schmidt value used to create the new search direction p = z + beta * p; // update search direction i++; } tol_error = sqrt(residualNorm2 / rhsNorm2); iters = i; } // // Computes implicit // VectorXcr implicitDCInvBPhi (const SparseMat& D, const SparseMat& C, // const SparseMat& B, const SparseMat& MC, // const VectorXcr& Phi, Real& tol, // int& max_it) { // int iter_done(0); // Real errorn(0); // VectorXcr b = B*Phi; // VectorXcr y = VectorXcr::Zero(C.rows()) ; // = C^1*b; // bicgstab(C, MC, b, y, max_it, tol, errorn, iter_done, false); // //std::cout << "Temp " << errorn << std::endl; // return D*y; // } // Computes implicit VectorXcr implicitDCInvBPhi (const SparseMat& D, const SparseMat& C, const VectorXcr& ioms, const SparseMat& MC, const VectorXcr& Phi, Real& tol, int& max_it) { int iter_done(0); Real errorn(0); VectorXcr b = (ioms).asDiagonal() * (D.transpose()*Phi); VectorXcr y = VectorXcr::Zero(C.rows()) ; // = C^1*b; bicgstab(C, MC, b, y, max_it, tol, errorn, iter_done, false); //std::cout << "Temp " << errorn << std::endl; max_it = iter_done; return D*y; } // Computes implicit template VectorXcr implicitDCInvBPhi2 (const SparseMat& D, const SparseMat& C, const Ref ioms, const Preconditioner& solver, const Ref Phi, Real& tol, int& max_it) { VectorXcr b = (ioms).asDiagonal() * (D.transpose()*Phi); VectorXcr y = VectorXcr::Zero(C.rows()) ; // = C^1*b; // Home Made //int iter_done(0); //Real errorn(0); //preconditionedBiCGStab(C, solver, b, y, max_it, tol, errorn, iter_done); //, false); // Jacobi M //max_it = iter_done; // Eigen BiCGStab Eigen::BiCGSTAB > BiCG; BiCG.compute( C ); // TODO move this out of this loop! y = BiCG.solve(b); max_it = BiCG.iterations(); tol = BiCG.error(); // Direct /* std::cout << "Computing LLT" << std::endl; Eigen::SimplicialLLT, Eigen::Upper, Eigen::AMDOrdering > LLT; LLT.compute(C); max_it = 1; std::cout << "Computed LLT" << std::endl; y = LLT.solve(b); */ return D*y; } // Computes implicit //template template < typename Solver > inline VectorXcr implicitDCInvBPhi3 (const SparseMat& D, const Solver& solver, const Ref ioms, const Ref Phi, Real& tol, int& max_it) { VectorXcr b = (ioms).asDiagonal() * (D.transpose()*Phi); VectorXcr y = solver.solve(b); max_it = 0; //max_it = solver.iterations(); //errorn = solver.error(); return D*y; } // // Simple extraction of indices in idx into reduceed array x1 // void vmap( const Ref x0, Ref x1, const std::vector& idx ) { // for (unsigned int ii=0; ii x0, const std::vector& idx ) { VectorXcr x1 = VectorXcr::Zero( idx.size() ); for (unsigned int ii=0; ii x0, const Ref x1, const std::vector& idx ) { for (unsigned int ii=0; ii int implicitbicgstab(//const SparseMat& D, //const SparseMat& C, const Ref< Eigen::SparseMatrix const > D, const std::vector& idx, const Ref< VectorXcr const > ioms, const Ref< VectorXcr const > rhs, Ref phi, CSolver& solver, int &max_it, const Real &tol, Real &errorn, int &iter_done, ofstream& logio) { logio << "using the preconditioned implicit solver" << std::endl; Complex omega, rho, rho_1, alpha, beta; Real tol2; int iter, max_it2, max_it1; // Look at reduced problem VectorXcr rhs2 = vmap(rhs, idx); VectorXcr phi2 = vmap(phi, idx); // Determine size of system and init vectors int n = idx.size(); // was phi.size(); VectorXcr r(n); VectorXcr r_tld(n); VectorXcr p(n); VectorXcr s(n); VectorXcr v = VectorXcr::Zero(n); VectorXcr t = VectorXcr::Zero(n); // TODO, refigure for implicit large system // std::cout << "Start BiCGStab, memory needed: " // << (sizeof(Complex)*(9+2)*n/(1024.*1024*1024)) << " [Gb]\n"; // Initialise iter_done = 0; Real eps = 1e-100; Real bnrm2 = rhs.norm(); if (bnrm2 == 0) { phi.setConstant(0.0); errorn = 0; std::cerr << "Trivial case of Ax = b, where b is 0\n"; return (0); } // If there is an initial guess if ( phi.norm() ) { tol2 = tol; max_it2 = 50000; //r = rhs - implicitDCInvBPhi3(D, solver, ioms, phi, tol2, max_it2); //r = rhs - implicitDCInvBPhi3(D, solver, ioms, phi, tol2, max_it2); r = rhs2 - vmap( implicitDCInvBPhi3(D, solver, ioms, phi, tol2, max_it2), idx ); } else { r = vmap(rhs, idx); } jsw_timer timer; errorn = r.norm() / bnrm2; omega = 1.; r_tld = r; Real errornold = 1e14; // Get down to business for (iter=0; iter 0) { beta = (rho/rho_1) * (alpha/omega); p = r.array() + beta*(p.array()-omega*v.array()).array(); } else { p = r; } tol2 = tol; max_it2 = 500000; //v = implicitDCInvBPhi2(D, C, ioms, solver, p, tol2, max_it2); ivmap(phi, p, idx); v = vmap(implicitDCInvBPhi3(D, solver, ioms, phi, tol2, max_it2), idx); alpha = rho / r_tld.dot(v); s = r.array() - alpha*v.array(); errorn = s.norm()/bnrm2; if (errorn < tol && iter > 1) { phi2.array() += alpha*p.array(); ivmap( phi, phi2, idx ); return (0); } tol2 = tol; max_it1 = 500000; //t = implicitDCInvBPhi2(D, C, ioms, solver, s, tol2, max_it1); //t = implicitDCInvBPhi3(D, solver, ioms, s, tol2, max_it1); ivmap(phi, s, idx); t = vmap(implicitDCInvBPhi3(D, solver, ioms, phi, tol2, max_it1), idx); omega = t.dot(s) / t.dot(t); r = s.array() - omega*t.array(); errorn = r.norm() / bnrm2; iter_done = iter; if (errorn <= tol) { ivmap( phi, phi2, idx ); return (0); } if (abs(omega) < eps) { ivmap( phi, phi2, idx ); return (0); } rho_1 = rho; logio << "iteration " << std::setw(3) << iter << " errorn " << std::setw(6) << std::setprecision(4) << std::scientific << errorn //<< "\timplicit iterations " << std::setw(5) << max_it1+max_it2 << " time " << std::setw(6) << std::fixed << std::setprecision(2) << timer.end() << std::endl; // Check to see how progress is going if (errornold - errorn < 0) { logio << "Irregular non-monotonic (negative) convergence. Recommend restart. \n"; ivmap( phi, phi2, idx ); return (2); } /* if (errornold - errorn < 1e-14) { logio << "not making any progress. Giving up\n"; return (1); } */ //std::cout << "|| p-s ||" << (alpha*p - omega*s).norm() << std::endl; // only update phi if good things are happening phi2.array() += alpha*p.array() + omega*s.array(); errornold = errorn; } ivmap( phi, phi2, idx ); return (0); } // On Input // A = Matrix // B = Right hand side // X = initial guess, and solution // maxit = maximum Number of iterations // tol = error tolerance // On Output // X real solution vector // errorn = Real error norm template < typename Solver > int implicitbicgstab_ei(const SparseMat& D, const Ref< VectorXcr const > ioms, const Ref< VectorXcr const > rhs, Ref phi, Solver& solver, int &max_it, const Real &tol, Real &errorn, int &iter_done, ofstream& logio) { logio << "using the preconditioned Eigen implicit solver" << std::endl; Complex omega, rho, rho_1, alpha, beta; Real tol2; int iter, max_it2,max_it1; // Determine size of system and init vectors int n = phi.size(); VectorXcr r(n); VectorXcr r_tld(n); VectorXcr p(n); VectorXcr v(n); VectorXcr s(n); VectorXcr t(n); // Initialise iter_done = 0; Real eps = 1e-100; Real bnrm2 = rhs.norm(); if (bnrm2 == 0) { phi.setConstant(0.0); errorn = 0; std::cerr << "Trivial case of Ax = b, where b is 0\n"; return (0); } // If there is an initial guess if ( phi.norm() ) { tol2 = tol; max_it2 = 50000; r = rhs - implicitDCInvBPhi3(D, solver, ioms, phi, tol2, max_it2); } else { r = rhs; } jsw_timer timer; errorn = r.norm() / bnrm2; omega = 1.; r_tld = r; Real errornold = 1e14; // Get down to business for (iter=0; iter 0) { beta = (rho/rho_1) * (alpha/omega); p = r.array() + beta*(p.array()-omega*v.array()).array(); } else { p = r; } tol2 = tol; max_it2 = 500000; v = implicitDCInvBPhi3(D, solver, ioms, p, tol2, max_it2); max_it2 = 0; // solver.iterations(); alpha = rho / r_tld.dot(v); s = r.array() - alpha*v.array(); errorn = s.norm()/bnrm2; if (errorn < tol && iter > 1) { phi.array() += alpha*p.array(); return (0); } tol2 = tol; max_it1 = 500000; t = implicitDCInvBPhi3(D, solver, ioms, s, tol2, max_it1); max_it1 = 0; //solver.iterations(); omega = t.dot(s) / t.dot(t); r = s.array() - omega*t.array(); errorn = r.norm() / bnrm2; iter_done = iter; if (errorn <= tol ) return (0); if (abs(omega) < eps) return (0); rho_1 = rho; logio << "iteration " << std::setw(4) << iter << "\terrorn " << std::setw(6) << std::setprecision(4) << std::scientific << errorn << "\timplicit iterations " << std::setw(5) << max_it1+max_it2 << "\ttime " << std::setw(10) << std::fixed << std::setprecision(2) << timer.end() << std::endl; // Check to see how progress is going if (errornold - errorn < 0) { logio << "irregular (negative) convergence. Try again? \n"; return (2); } // only update phi if good things are happening phi.array() += alpha*p.array() + omega*s.array(); errornold = errorn; } return (0); } // On Input // A = Matrix // B = Right hand side // X = initial guess, and solution // maxit = maximum Number of iterations // tol = error tolerance // On Output // X real solution vector // errorn = Real error norm int implicitbicgstab(const SparseMat& D, const SparseMat& C, const VectorXcr& ioms, const SparseMat& MC, Eigen::Ref< VectorXcr > rhs, VectorXcr& phi, int &max_it, Real &tol, Real &errorn, int &iter_done) { Complex omega, rho, rho_1, alpha, beta; Real errmin, tol2; int iter, max_it2; // // Cholesky decomp // SparseLLT, Cholmod> // CholC(SparseMatrix (C.real()) ); // if(!CholC.succeeded()) { // std::cerr << "decomposiiton failed\n"; // return EXIT_FAILURE; // } // Determine size of system and init vectors int n = phi.size(); VectorXcr r(n); VectorXcr r_tld(n); VectorXcr p(n); VectorXcr v(n); //VectorXcr p_hat(n); VectorXcr s(n); //VectorXcr s_hat(n); VectorXcr t(n); VectorXcr xmin(n); // TODO, refigure for implicit large system // std::cout << "Start BiCGStab, memory needed: " // << (sizeof(Complex)*(9+2)*n/(1024.*1024*1024)) << " [Gb]\n"; // Initialise iter_done = 0; v.setConstant(0.); // not necessary I don't think t.setConstant(0.); Real eps = 1e-100; Real bnrm2 = rhs.norm(); if (bnrm2 == 0) { phi.setConstant(0.0); errorn = 0; std::cerr << "Trivial case of Ax = b, where b is 0\n"; return (0); } // If there is an initial guess if ( phi.norm() ) { //r = rhs - A*phi; tol2 = tol; max_it2 = 50000; std::cout << "Initial guess " << std::endl; r = rhs - implicitDCInvBPhi(D, C, ioms, MC, phi, tol2, max_it2); //r = rhs - implicitDCInvBPhi (D, C, B, CholC, phi, tol2, max_it2); } else { r = rhs; } errorn = r.norm() / bnrm2; //std::cout << "Initial |r| " << r.norm() << "\t" << errorn<< std::endl; omega = 1.; r_tld = r; errmin = 1e30; Real errornold = 1e6; // Get down to business for (iter=0; iter 0) { beta = (rho/rho_1) * (alpha/omega); p = r.array() + beta*(p.array()-omega*v.array()).array(); } else { p = r; } // Use pseudo inverse to get approximate answer //p_hat = p; tol2 = std::max(1e-4*errorn, tol); tol2 = tol; max_it2 = 500000; //v = A*p_hat; v = implicitDCInvBPhi(D, C, ioms, MC, p, tol2, max_it2); //v = implicitDCInvBPhi(D, C, B, CholC, p, tol2, max_it2); alpha = rho / r_tld.dot(v); s = r.array() - alpha*v.array(); errorn = s.norm()/bnrm2; if (errorn < tol && iter > 1) { phi.array() += alpha*p.array(); return (0); } // s_hat = M*s; //tol2 = tol; tol2 = std::max(1e-4*errorn, tol); tol2 = tol; max_it2 = 50000; // t = A*s_hat; t = implicitDCInvBPhi(D, C, ioms, MC, s, tol2, max_it2); //t = implicitDCInvBPhi(D, C, B, CholC, s, tol2, max_it2); omega = t.dot(s) / t.dot(t); phi.array() += alpha*p.array() + omega*s.array(); r = s.array() - omega*t.array(); errorn = r.norm() / bnrm2; iter_done = iter; if (errorn < errmin) { // remember the model with the smallest norm errmin = errorn; xmin = phi; } if (errorn <= tol ) return (0); if (abs(omega) < eps) return (0); rho_1 = rho; std::cout << "iteration " << std::setw(4) << iter << "\terrorn " << std::setw(6) << std::scientific << errorn << "\timplicit iterations " << std::setw(5) << max_it2 << std::endl; if (errornold - errorn < 1e-14) { std::cout << "not making any progress. Giving up\n"; return (2); } errornold = errorn; } return (0); } //int bicgstab(const SparseMat &A, Eigen::SparseLU< Eigen::SparseMatrix ,