Lemma is an Electromagnetics API
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

cg.h 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. /* This file is part of Lemma, a geophysical modelling and inversion API */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. /**
  6. @file
  7. @author Trevor Irons
  8. @version $Id: cg.h 87 2013-09-05 22:44:05Z tirons $
  9. **/
  10. #ifndef CG_INC
  11. #define CG_INC
  12. #include <iostream>
  13. #include <fstream>
  14. #include "lemma.h"
  15. namespace Lemma {
  16. /** Port of netlib.org http://www.netlib.org/templates/cpp//cg.h
  17. * Solves the symmetric postive definite system Ax=b.
  18. * No preconditioner is used.
  19. * Iterative template routine -- CG
  20. *
  21. * CG solves the symmetric positive definite linear
  22. * system Ax=b using the Conjugate Gradient method.
  23. *
  24. * CG follows the algorithm described on p. 15 in the
  25. * SIAM Templates book.
  26. *
  27. * The return value indicates convergence within max_iter (input)
  28. * iterations (0), or no convergence within max_iter iterations (1).
  29. *
  30. * Upon successful return, output arguments have the following values:
  31. *
  32. * x -- approximate solution to Ax = b
  33. * max_iter -- the number of iterations performed before the
  34. * tolerance was reached
  35. * tol -- the residual after the final iteration
  36. * @param[in] A is a Real matrix A to be solved.
  37. * @param[in] x0 is the starting model.
  38. * @param[in] b is the right hand side.
  39. */
  40. template < typename Scalar >
  41. VectorXr CG(const MatrixXr &A, const VectorXr &x0, const VectorXr &b,
  42. int &max_iter, Scalar &tol) {
  43. VectorXr p, q;
  44. Scalar alpha, beta, rho, rho_1(0);
  45. VectorXr x = x0;
  46. Scalar normb = b.norm( );
  47. VectorXr r = b - A*x;
  48. if (normb == 0.0) {
  49. normb = 1;
  50. }
  51. Scalar resid = r.norm() / normb;
  52. if (resid <= tol) {
  53. tol = resid;
  54. max_iter = 0;
  55. return x;
  56. }
  57. for (int i = 1; i <= max_iter; i++) {
  58. rho = r.transpose()*r;
  59. if (i == 1) {
  60. p = r;
  61. }
  62. else {
  63. beta = rho / rho_1;
  64. p = r + beta * p;
  65. }
  66. q = A*p;
  67. alpha = rho / p.dot(q);
  68. x += alpha * p;
  69. r -= alpha * q;
  70. if ((resid = r.norm() / normb) <= tol) {
  71. tol = resid;
  72. max_iter = i;
  73. return x;
  74. }
  75. rho_1 = rho;
  76. }
  77. tol = resid;
  78. std::cerr << "CG FAILED TO REACH CONVERGENCE\n";
  79. return x;
  80. }
  81. // Specialised routine that appliex mod(Ax) so that b and x are in real, but
  82. // A is complex.
  83. template < typename Scalar >
  84. VectorXr CG_ModulusAx(const MatrixXcr &A, const VectorXr &x0, const VectorXr &b,
  85. int &max_iter, Scalar &tol) {
  86. VectorXr p, q;
  87. Scalar beta, rho, rho_1(0);
  88. Scalar alpha;
  89. VectorXr x = x0;
  90. Scalar normb = b.norm( );
  91. VectorXr r = b.array() - (A*x).array().abs();
  92. if (normb == 0.0) {
  93. normb = 1;
  94. }
  95. Scalar resid = r.norm() / normb;
  96. if (resid <= tol) {
  97. tol = resid;
  98. max_iter = 0;
  99. return x;
  100. }
  101. for (int i = 1; i <= max_iter; i++) {
  102. rho = r.dot(r); //conjugate().transpose()*r;
  103. if (i == 1) {
  104. p = r;
  105. }
  106. else {
  107. beta = rho / rho_1;
  108. p = r + beta * p;
  109. }
  110. q = (A*p).array().abs();
  111. alpha = rho / p.dot(q);
  112. x += alpha * p;
  113. r -= alpha * q;
  114. if ((resid = r.norm() / normb) <= tol) {
  115. tol = resid;
  116. max_iter = i;
  117. return x;
  118. }
  119. rho_1 = rho;
  120. }
  121. tol = resid;
  122. std::cerr << "CG FAILED TO REACH CONVERGENCE\n";
  123. return x;
  124. }
  125. // Preconditioned version of above
  126. //*****************************************************************
  127. // Iterative template routine -- CG
  128. //
  129. // CG solves the symmetric positive definite linear
  130. // system Ax=b using the Conjugate Gradient method.
  131. //
  132. // CG follows the algorithm described on p. 15 in the
  133. // SIAM Templates book.
  134. //
  135. // The return value indicates convergence within max_iter (input)
  136. // iterations (0), or no convergence within max_iter iterations (1).
  137. //
  138. // Upon successful return, output arguments have the following values:
  139. //
  140. // x -- approximate solution to Ax = b
  141. // max_iter -- the number of iterations performed before the
  142. // tolerance was reached
  143. // tol -- the residual after the final iteration
  144. //
  145. //*****************************************************************
  146. //template < class Matrix, class Vector, class Preconditioner, class Real >
  147. //int
  148. //CG(const Matrix &A, Vector &x, const Vector &b,
  149. // const Preconditioner &M, int &max_iter, Real &tol)
  150. //{
  151. #include <limits>
  152. template <typename Preconditioner>
  153. VectorXr CG(const MatrixXr &A, const VectorXr &x0, const VectorXr &b,
  154. const Preconditioner &M, int &max_iter, Real &tol) {
  155. //const Eigen::SparseMatrix<Real> &M, int &max_iter, Real &tol) {
  156. VectorXr p, z, q;
  157. VectorXr x = x0;
  158. Real alpha(0), beta(0), rho(0), rho_1(0);
  159. Real normb;
  160. VectorXr r;
  161. #ifdef LEMMAUSEOMP
  162. #pragma omp parallel sections
  163. {
  164. #endif
  165. #ifdef LEMMAUSEOMP
  166. #pragma omp section
  167. #endif
  168. {
  169. normb = b.norm();
  170. }
  171. #ifdef LEMMAUSEOMP
  172. #pragma omp section
  173. #endif
  174. {
  175. r = b - A*x;
  176. }
  177. #ifdef LEMMAUSEOMP
  178. }
  179. #endif
  180. if (normb <= std::numeric_limits<double>::epsilon() ) {
  181. normb = 1;
  182. }
  183. Real resid = r.norm() / normb;
  184. if (resid <= tol) {
  185. tol = resid;
  186. max_iter = 0;
  187. return x;
  188. }
  189. // todo do 0th loop manually, gets rid of if statement
  190. for (int i = 1; i <= max_iter; i++) {
  191. z = M.solve(r);
  192. //z = M*r;
  193. rho = r.transpose()*z;
  194. if (i == 1) {
  195. p = z;
  196. } else {
  197. beta = rho / rho_1;
  198. p = z + beta * p;
  199. }
  200. q = A*p;
  201. alpha = rho / p.dot(q);
  202. #ifdef LEMMAUSEOMP
  203. #pragma omp parallel sections
  204. {
  205. #endif
  206. #ifdef LEMMAUSEOMP
  207. #pragma omp section
  208. #endif
  209. {
  210. x += alpha * p;
  211. }
  212. #ifdef LEMMAUSEOMP
  213. #pragma omp section
  214. #endif
  215. {
  216. r -= alpha * q;
  217. }
  218. #ifdef LEMMAUSEOMP
  219. }
  220. #endif
  221. if ((resid = r.norm() / normb) <= tol) {
  222. tol = resid;
  223. max_iter = i;
  224. return x;
  225. }
  226. rho_1 = rho;
  227. }
  228. tol = resid;
  229. std::cerr << "Preconditioned CG failed to converge\n";
  230. return x;
  231. }
  232. template < typename Scalar >
  233. VectorXr CGJ(const MatrixXr &A, const VectorXr &x0, const VectorXr &b,
  234. const Eigen::SparseMatrix<Scalar> &M, int &max_iter, Scalar &tol) {
  235. VectorXr p, z, q;
  236. VectorXr x = x0;
  237. Scalar alpha(0), beta(0), rho(0), rho_1(0);
  238. Scalar normb;
  239. VectorXr r;
  240. #ifdef LEMMAUSEOMP
  241. #pragma omp parallel sections
  242. {
  243. #endif
  244. #ifdef LEMMAUSEOMP
  245. #pragma omp section
  246. #endif
  247. {
  248. normb = b.norm();
  249. }
  250. #ifdef LEMMAUSEOMP
  251. #pragma omp section
  252. #endif
  253. {
  254. r = b - A*x;
  255. }
  256. #ifdef LEMMAUSEOMP
  257. }
  258. #endif
  259. if (normb <= std::numeric_limits<double>::epsilon() ) {
  260. normb = 1;
  261. }
  262. Scalar resid = r.norm() / normb;
  263. if (resid <= tol) {
  264. tol = resid;
  265. max_iter = 0;
  266. return x;
  267. }
  268. // todo do 0th loop manually, gets rid of if statement
  269. for (int i = 1; i <= max_iter; i++) {
  270. //z = M.solve(r);
  271. z = M*r;
  272. rho = r.transpose()*z;
  273. if (i == 1) {
  274. p = z;
  275. } else {
  276. beta = rho / rho_1;
  277. p = z + beta * p;
  278. }
  279. q = A*p;
  280. alpha = rho / p.dot(q);
  281. #ifdef LEMMAUSEOMP
  282. #pragma omp parallel sections
  283. {
  284. #endif
  285. #ifdef LEMMAUSEOMP
  286. #pragma omp section
  287. #endif
  288. {
  289. x += alpha * p;
  290. }
  291. #ifdef LEMMAUSEOMP
  292. #pragma omp section
  293. #endif
  294. {
  295. r -= alpha * q;
  296. }
  297. #ifdef LEMMAUSEOMP
  298. }
  299. #endif
  300. if ((resid = r.norm() / normb) <= tol) {
  301. tol = resid;
  302. max_iter = i;
  303. return x;
  304. }
  305. rho_1 = rho;
  306. }
  307. tol = resid;
  308. std::cerr << "Preconditioned CG failed to converge\n";
  309. return x;
  310. }
  311. ///////////////////////////////////////////////////////
  312. // Log Barrier
  313. /** Solves one iteration, using implicit multiplication of At*b, and ATA.
  314. * @param[in] G is the sensitivity matrix to be inverted.
  315. * @param[in] WdTWd is the data weighting matrix, usually sparse.
  316. * @param[in] WmTWm is the model weighting matrix, usually sparse.
  317. * @param[in] X1 is a vector of the inverse of the current model X. Used in
  318. * log barrier. Should be computed as X1 = VectorXr( 1. / (x.array() - minVal ))
  319. * @param[in] Y1 is a vector of the inverse of the current model X. Used in
  320. * log barrier. Should be computed as X1 = VectorXr( 1. / (maxVal - x.array() ))
  321. * @param[in] X2 is the analaogous inverse of X^2 diagonal matrix, stored
  322. * as a Vector.
  323. * @param[in] Y2 is the analaogous inverse of (maxVal-X)^2 diagonal matrix, stored
  324. * as a Vector.
  325. * @param[in] D2 is the datamisfit vector, formally (d_predicted - d_observed).
  326. * @param[in,out] Mk is the input / output solution vector. The input value is
  327. * an itital guess, and output is the solution.
  328. * @param[in] Mr is the reference model.
  329. * @param[in] BETA is the regularisation (Tikhonov parameter) to apply
  330. * @param[in] P is a preconditioner of G. The product Pb ~ x
  331. * @param[in,out] max_iter is the number of iterations.
  332. * @param[in,out] tol is the desired tolerance on input, and achieved on
  333. * output.
  334. */
  335. template < typename Scalar >
  336. int implicit_log_CG(const MatrixXr& G, const MatrixXr& WdTWd, const VectorXr& WmTWm,
  337. const VectorXr& X1, const VectorXr& X2,
  338. const VectorXr& Y1, const VectorXr& Y2,
  339. const VectorXr& D2, VectorXr& Mk, const VectorXr& Mr,
  340. const Scalar& BETA, const MatrixXr& P,
  341. int& max_iter, Scalar& tol) {
  342. // TODO add WdTWD to this!
  343. Scalar resid;
  344. VectorXr p, z, q;
  345. Scalar alpha(0), beta(0), rho(0), rho_1(0);
  346. // Calculate 'B'
  347. VectorXr delM = Mk - Mr;
  348. VectorXr B = (-G.transpose()*D2).array() - BETA*((WmTWm.asDiagonal()*delM).array())
  349. + X1.array() + Y1.array();
  350. Scalar normb = B.norm();
  351. // Implicit calc of A*x
  352. VectorXr Y = BETA*(WmTWm.asDiagonal()*Mk).array() +
  353. (X2.asDiagonal()*Mk).array() + (Y2.asDiagonal()*Mk).array();
  354. VectorXr Z = G*Mk;
  355. VectorXr U = G.transpose()*Z;
  356. VectorXr r = B - (Y + U);
  357. if (normb == 0.0) normb = 1;
  358. if ((resid = r.norm() / normb) <= tol) {
  359. tol = resid;
  360. max_iter = 0;
  361. return 0;
  362. }
  363. for (int i = 1; i <= max_iter; i++) {
  364. //z = M.solve(r); // we can solve directly z = P*r
  365. z = P*r;
  366. rho = r.dot(z);
  367. if (i == 1) p = z;
  368. else {
  369. beta = rho / rho_1;
  370. p = beta * p;
  371. p = p+z;
  372. }
  373. Y = BETA*(WmTWm.array()*p.array()) + X2.array()*p.array() + Y2.array()*p.array();
  374. Z = G*p;
  375. U = G.transpose()*Z;
  376. q = Y+U;
  377. alpha = rho / p.dot(q);
  378. Mk = Mk + alpha * p;
  379. r = r - (alpha * q);
  380. if ((resid = r.norm() / normb) <= tol) {
  381. tol = resid;
  382. max_iter = i;
  383. return 0;
  384. }
  385. rho_1 = rho;
  386. }
  387. tol = resid;
  388. return 1;
  389. }
  390. } // end of namespace Lemma
  391. #endif // ----- #ifndef CG_INC -----