IR ILU Preconditioned Solver#
The IR-ILU preconditioned solver example.
Kind: preconditioners
Builds on: ilu-preconditioned-solver, iterative-refinement
Upstream source: examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp in the Ginkgo repository.
Introduction#
This example shows how to combine iterative refinement with the adaptive precision block-Jacobi preconditioner in order to approximately solve the triangular systems occurring in ILU preconditioning. Using an adaptive precision block-Jacobi preconditioner matrix as inner solver for the iterative refinement method is equivalent to doing adaptive precision block-Jacobi relaxation in the triangular solves. This example roughly approximates the triangular solves with five adaptive precision block-Jacobi sweeps with a maximum block size of 16.
This example is motivated by “Multiprecision block-Jacobi for Iterative Triangular Solves” (Göbel, Anzt, Cojean, Flegar, Quintana-Ortí, Euro-Par 2020). The theory and a detailed analysis can be found there.
The commented program#
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <map>
#include <string>
#include <ginkgo/ginkgo.hpp>
int main(int argc, char* argv[])
{
Some shortcuts
using ValueType = double;
using RealValueType = gko::remove_complex<ValueType>;
using IndexType = int;
using vec = gko::matrix::Dense<ValueType>;
using real_vec = gko::matrix::Dense<RealValueType>;
using mtx = gko::matrix::Csr<ValueType, IndexType>;
using gmres = gko::solver::Gmres<ValueType>;
using ir = gko::solver::Ir<ValueType>;
using bj = gko::preconditioner::Jacobi<ValueType, IndexType>;
Print version information
std::cout << gko::version_info::get() << std::endl;
Figure out where to run the code
if (argc == 2 && (std::string(argv[1]) == "--help")) {
std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl;
std::exit(-1);
}
const auto executor_string = argc >= 2 ? argv[1] : "reference";
const unsigned int sweeps = argc == 3 ? std::atoi(argv[2]) : 5u;
std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
exec_map{
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
return gko::CudaExecutor::create(0,
gko::OmpExecutor::create());
}},
{"hip",
[] {
return gko::HipExecutor::create(0, gko::OmpExecutor::create());
}},
{"dpcpp",
[] {
return gko::DpcppExecutor::create(0,
gko::OmpExecutor::create());
}},
{"reference", [] { return gko::ReferenceExecutor::create(); }}};
executor where Ginkgo will perform the computation
const auto exec = exec_map.at(executor_string)(); // throws if not valid
Read data
auto A = gko::share(gko::read<mtx>(std::ifstream("data/A.mtx"), exec));
Create RHS and initial guess as 1
gko::size_type num_rows = A->get_size()[0];
auto host_x = vec::create(exec->get_master(), gko::dim<2>(num_rows, 1));
for (gko::size_type i = 0; i < num_rows; i++) {
host_x->at(i, 0) = 1.;
}
auto x = gko::clone(exec, host_x);
auto b = gko::clone(exec, host_x);
auto clone_x = gko::clone(exec, x);
Generate incomplete factors using ParILU
auto par_ilu_fact =
gko::factorization::ParIlu<ValueType, IndexType>::build().on(exec);
Generate concrete factorization for input matrix
auto par_ilu = gko::share(par_ilu_fact->generate(A));
Generate an iterative refinement factory to be used as a triangular solver in the preconditioner application. The generated method is equivalent to doing five block-Jacobi sweeps with a maximum block size of 16.
auto bj_factory = gko::share(
bj::build()
.with_max_block_size(16u)
.with_storage_optimization(gko::precision_reduction::autodetect())
.on(exec));
auto trisolve_factory =
ir::build()
.with_solver(bj_factory)
.with_criteria(gko::stop::Iteration::build().with_max_iters(sweeps))
.on(exec);
Generate an ILU preconditioner factory by setting lower and upper triangular solver - in this case the previously defined iterative refinement method.
auto ilu_pre_factory = gko::preconditioner::Ilu<ValueType>::build()
.with_l_solver(gko::clone(trisolve_factory))
.with_u_solver(gko::clone(trisolve_factory))
.on(exec);
Use incomplete factors to generate ILU preconditioner
auto ilu_preconditioner = gko::share(ilu_pre_factory->generate(par_ilu));
Create stopping criteria for Gmres
const RealValueType reduction_factor{1e-12};
auto iter_stop = gko::share(
gko::stop::Iteration::build().with_max_iters(1000u).on(exec));
auto tol_stop = gko::share(gko::stop::ResidualNorm<ValueType>::build()
.with_reduction_factor(reduction_factor)
.on(exec));
Use preconditioner inside GMRES solver factory Generating a solver factory tied to a specific preconditioner makes sense if there are several very similar systems to solve, and the same solver+preconditioner combination is expected to be effective.
auto ilu_gmres_factory =
gmres::build()
.with_criteria(iter_stop, tol_stop)
.with_generated_preconditioner(ilu_preconditioner)
.on(exec);
Generate preconditioned solver for a specific target system
auto ilu_gmres = ilu_gmres_factory->generate(A);
Add logger
std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
gko::log::Convergence<ValueType>::create();
ilu_gmres->add_logger(logger);
Warmup run
ilu_gmres->apply(b, x);
Solve system 100 times and take the average time.
std::chrono::nanoseconds time(0);
for (int i = 0; i < 100; i++) {
x->copy_from(clone_x);
auto tic = std::chrono::high_resolution_clock::now();
ilu_gmres->apply(b, x);
auto toc = std::chrono::high_resolution_clock::now();
time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
}
std::cout << "Using " << sweeps << " block-Jacobi sweeps.\n";
Print solution
std::cout << "Solution (x):\n";
write(std::cout, x);
Get residual
auto res = gko::as<vec>(logger->get_residual_norm());
std::cout << "GMRES iteration count: " << logger->get_num_iterations()
<< "\n";
std::cout << "GMRES execution time [ms]: "
<< static_cast<double>(time.count()) / 100000000.0 << "\n";
std::cout << "Residual norm sqrt(r^T r):\n";
write(std::cout, res);
}
Results#
This is the expected output:
Using 5 block-Jacobi sweeps.
Solution (x):
%%MatrixMarket matrix array real general
19 1
0.252218
0.108645
0.0662811
0.0630433
0.0384088
0.0396536
0.0402648
0.0338935
0.0193098
0.0234653
0.0211499
0.0196413
0.0199151
0.0181674
0.0162722
0.0150714
0.0107016
0.0121141
0.0123025
GMRES iteration count: 8
GMRES execution time [ms]: 0.377673
Residual norm sqrt(r^T r):
%%MatrixMarket matrix array real general
1 1
1.65303e-12