From 854b27254cfd24e5deaba109c90cd02ac09c5f61 Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <ohearnku@msu.edu> Date: Fri, 8 Jul 2022 18:23:03 -0400 Subject: [PATCH] PG-PuReMD: add OpenMP support for SAI preconditioner computation (--enable-openmp=yes flag in configure script). Other code clean-up. --- Makefile.am | 6 +- PG-PuReMD/src/control.c | 169 +++++++------- PG-PuReMD/src/cuda/cuda_charges.cu | 9 - PG-PuReMD/src/cuda/cuda_init_md.cu | 19 ++ PG-PuReMD/src/init_md.c | 34 +-- PG-PuReMD/src/init_md.h | 3 - PG-PuReMD/src/lin_alg.c | 347 +++++++++++++++-------------- PG-PuReMD/src/reax_types.h | 8 + configure.ac | 33 ++- 9 files changed, 353 insertions(+), 275 deletions(-) diff --git a/Makefile.am b/Makefile.am index fe042f2b..f20ca082 100644 --- a/Makefile.am +++ b/Makefile.am @@ -331,7 +331,7 @@ include cuda.am endif if USE_CUDA -AM_NVCCFLAGS = --compiler-options="$(DEFS) $(AM_CFLAGS) -std=c++14 @CFLAGS_EXTRA@ $(CFLAGS) @M_CFLAGS@" -std=c++14 @NFLAGS@ @NFLAGS_EXTRA@ +AM_NVCCFLAGS = --compiler-options="$(DEFS) $(AM_CFLAGS) -std=c++14 @CFLAGS_EXTRA@ $(CFLAGS) @OMP_CFLAGS@ @M_CFLAGS@" -std=c++14 @NFLAGS@ @NFLAGS_EXTRA@ endif bin_PROGRAMS = PG-PuReMD/bin/pg-puremd @@ -464,9 +464,9 @@ PG_PuReMD_bin_pg_puremd_SOURCES += PG-PuReMD/src/cuda/cuda_allocate.cu \ nodist_EXTRA_PG_PuReMD_bin_pg_puremd_SOURCES = src/dummy.c endif -PG_PuReMD_bin_pg_puremd_CFLAGS = -std=c11 @CFLAGS_EXTRA@ @M_CFLAGS@ +PG_PuReMD_bin_pg_puremd_CFLAGS = -std=c11 @CFLAGS_EXTRA@ @OMP_CFLAGS@ @M_CFLAGS@ PG_PuReMD_bin_pg_puremd_CPPFLAGS = -I PG-PuReMD/src @CPPFLAGS_EXTRA@ -PG_PuReMD_bin_pg_puremd_LDADD = @M_LIBS@ @L_LIBS@ -lstdc++ +PG_PuReMD_bin_pg_puremd_LDADD = @OMP_LIBS@ @M_LIBS@ @L_LIBS@ -lstdc++ if USE_CUDA PG_PuReMD_bin_pg_puremd_CFLAGS += @CUDA_CFLAGS@ PG_PuReMD_bin_pg_puremd_LDADD += @CUDA_LIBS@ diff --git a/PG-PuReMD/src/control.c b/PG-PuReMD/src/control.c index dcac33b0..dd4bf787 100644 --- a/PG-PuReMD/src/control.c +++ b/PG-PuReMD/src/control.c @@ -52,6 +52,7 @@ void Read_Control_File( const char * const control_file, control_params * const control->ensemble = NVE; control->nsteps = 0; control->dt = 0.25; + control->num_threads_set = FALSE; control->nprocs = 1; control->procs_by_dim[0] = 1; control->procs_by_dim[1] = 1; @@ -155,27 +156,32 @@ void Read_Control_File( const char * const control_file, control_params * const } else if ( strncmp(tmp[0], "ensemble_type", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->ensemble = ival; } else if ( strncmp(tmp[0], "nsteps", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->nsteps = ival; } - else if ( strncmp(tmp[0], "dt", MAX_LINE) == 0) + else if ( strncmp(tmp[0], "dt", MAX_LINE) == 0 ) { - val = atof(tmp[1]); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->dt = val * 1.e-3; // convert dt from fs to ps! } + else if ( strncmp(tmp[0], "num_threads", MAX_LINE) == 0 ) + { + control->num_threads = sstrtol( tmp[1], __FILE__, __LINE__ ); + control->num_threads_set = TRUE; + } else if ( strncmp(tmp[0], "gpus_per_node", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->gpus_per_node = ival; } else if ( strncmp(tmp[0], "gpu_streams", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->gpu_streams = ival; if ( control->gpu_streams <= 0 || control->gpu_streams > MAX_CUDA_STREAMS ) @@ -193,11 +199,11 @@ void Read_Control_File( const char * const control_file, control_params * const MPI_Abort( MPI_COMM_WORLD, INVALID_INPUT ); } - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->procs_by_dim[0] = ival; - ival = atoi(tmp[2]); + ival = sstrtol( tmp[2], __FILE__, __LINE__ ); control->procs_by_dim[1] = ival; - ival = atoi(tmp[3]); + ival = sstrtol( tmp[3], __FILE__, __LINE__ ); control->procs_by_dim[2] = ival; control->nprocs = control->procs_by_dim[0] * control->procs_by_dim[1] * @@ -210,7 +216,7 @@ void Read_Control_File( const char * const control_file, control_params * const } // else if( strncmp(tmp[0], "restart", MAX_LINE) == 0 ) // { -// ival = atoi(tmp[1]); +// ival = sstrtol( tmp[1], __FILE__, __LINE__ ); // control->restart = ival; // } // else if( strncmp(tmp[0], "restart_from", MAX_LINE) == 0 ) @@ -220,122 +226,122 @@ void Read_Control_File( const char * const control_file, control_params * const // } else if ( strncmp(tmp[0], "random_vel", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->random_vel = ival; } else if ( strncmp(tmp[0], "restart_format", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); out_control->restart_format = ival; } else if ( strncmp(tmp[0], "restart_freq", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); out_control->restart_freq = ival; } else if ( strncmp(tmp[0], "reposition_atoms", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->reposition_atoms = ival; } else if ( strncmp(tmp[0], "restrict_bonds", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->restrict_bonds = ival; } else if ( strncmp(tmp[0], "remove_CoM_vel", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->remove_CoM_vel = ival; } else if ( strncmp(tmp[0], "debug_level", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); out_control->debug_level = ival; } else if ( strncmp(tmp[0], "energy_update_freq", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); out_control->energy_update_freq = ival; } else if ( strncmp(tmp[0], "reneighbor", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->reneighbor = ival; } else if ( strncmp(tmp[0], "vlist_buffer", MAX_LINE) == 0 ) { - val = atof(tmp[1]); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->vlist_cut = val + control->nonb_cut; } else if ( strncmp(tmp[0], "nbrhood_cutoff", MAX_LINE) == 0 ) { - val = atof(tmp[1]); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->bond_cut = val; } else if ( strncmp(tmp[0], "bond_graph_cutoff", MAX_LINE) == 0 ) { - val = atof(tmp[1]); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->bg_cut = val; } else if ( strncmp(tmp[0], "thb_cutoff", MAX_LINE) == 0 ) { - val = atof(tmp[1]); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->thb_cut = val; } else if ( strncmp(tmp[0], "hbond_cutoff", MAX_LINE) == 0 ) { - val = atof( tmp[1] ); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->hbond_cut = val; } else if ( strncmp(tmp[0], "ghost_cutoff", MAX_LINE) == 0 ) { - val = atof(tmp[1]); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->user_ghost_cut = val; } else if ( strncmp(tmp[0], "tabulate_long_range", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->tabulate = ival; } else if ( strncmp(tmp[0], "charge_method", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->charge_method = ival; } else if ( strncmp(tmp[0], "charge_freq", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->charge_freq = ival; } else if ( strncmp(tmp[0], "cm_q_net", MAX_LINE) == 0 ) { - val = atof( tmp[1] ); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->cm_q_net = val; } else if ( strncmp(tmp[0], "cm_solver_type", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->cm_solver_type = ival; } else if ( strncmp(tmp[0], "cm_solver_max_iters", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->cm_solver_max_iters = ival; } else if ( strncmp(tmp[0], "cm_solver_restart", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->cm_solver_restart = ival; } else if ( strncmp(tmp[0], "cm_solver_q_err", MAX_LINE) == 0 ) { - val = atof( tmp[1] ); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->cm_solver_q_err = val; } else if ( strncmp(tmp[0], "cm_domain_sparsity", MAX_LINE) == 0 ) { - val = atof( tmp[1] ); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->cm_domain_sparsity = val; if ( val < 1.0 ) { @@ -344,57 +350,57 @@ void Read_Control_File( const char * const control_file, control_params * const } else if ( strncmp(tmp[0], "cm_init_guess_extrap1", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->cm_init_guess_extrap1 = ival; } else if ( strncmp(tmp[0], "cm_init_guess_extrap2", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->cm_init_guess_extrap2 = ival; } else if ( strncmp(tmp[0], "cm_solver_pre_comp_type", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->cm_solver_pre_comp_type = ival; } else if ( strncmp(tmp[0], "cm_solver_pre_comp_refactor", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->cm_solver_pre_comp_refactor = ival; } else if ( strncmp(tmp[0], "cm_solver_pre_comp_droptol", MAX_LINE) == 0 ) { - val = atof( tmp[1] ); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->cm_solver_pre_comp_droptol = val; } else if ( strncmp(tmp[0], "cm_solver_pre_comp_sweeps", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->cm_solver_pre_comp_sweeps = ival; } else if ( strncmp(tmp[0], "cm_solver_pre_comp_sai_thres", MAX_LINE) == 0 ) { - val = atof( tmp[1] ); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->cm_solver_pre_comp_sai_thres = val; } else if ( strncmp(tmp[0], "cm_solver_pre_app_type", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->cm_solver_pre_app_type = ival; } else if ( strncmp(tmp[0], "cm_solver_pre_app_jacobi_iters", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->cm_solver_pre_app_jacobi_iters = ival; } else if ( strncmp(tmp[0], "include_polarization_energy", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->polarization_energy_enabled = ival; } else if ( strncmp(tmp[0], "temp_init", MAX_LINE) == 0 ) { - val = atof(tmp[1]); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->T_init = val; if ( control->T_init < 0.001 ) @@ -404,7 +410,7 @@ void Read_Control_File( const char * const control_file, control_params * const } else if ( strncmp(tmp[0], "temp_final", MAX_LINE) == 0 ) { - val = atof(tmp[1]); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->T_final = val; if ( control->T_final < 0.1 ) @@ -414,36 +420,39 @@ void Read_Control_File( const char * const control_file, control_params * const } else if ( strncmp(tmp[0], "t_mass", MAX_LINE) == 0 ) { - val = atof(tmp[1]); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); /* convert from fs to s */ control->Tau_T = val * 1.0e-15; } else if ( strncmp(tmp[0], "t_mode", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->T_mode = ival; } else if ( strncmp(tmp[0], "t_rate", MAX_LINE) == 0 ) { - val = atof(tmp[1]); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->T_rate = val; } else if ( strncmp(tmp[0], "t_freq", MAX_LINE) == 0 ) { - val = atof(tmp[1]); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->T_freq = val; } else if ( strncmp(tmp[0], "pressure", MAX_LINE) == 0 ) { if ( control->ensemble == iNPT ) { - control->P[0] = control->P[1] = control->P[2] = atof(tmp[1]); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); + control->P[0] = val; + control->P[1] = val; + control->P[2] = val; } else if ( control->ensemble == sNPT ) { - control->P[0] = atof(tmp[1]); - control->P[1] = atof(tmp[2]); - control->P[2] = atof(tmp[3]); + control->P[0] = sstrtod( tmp[1], __FILE__, __LINE__ ); + control->P[1] = sstrtod( tmp[2], __FILE__, __LINE__ ); + control->P[2] = sstrtod( tmp[3], __FILE__, __LINE__ ); } } else if ( strncmp(tmp[0], "p_mass", MAX_LINE) == 0 ) @@ -451,45 +460,47 @@ void Read_Control_File( const char * const control_file, control_params * const // convert p_mass from fs to ps if ( control->ensemble == iNPT ) { - control->Tau_P[0] = control->Tau_P[1] = control->Tau_P[2] = - atof(tmp[1]) * 1.e-3; + val = sstrtod( tmp[1], __FILE__, __LINE__ ) * 1.e-3; + control->Tau_P[0] = val; + control->Tau_P[1] = val; + control->Tau_P[2] = val; } else if ( control->ensemble == sNPT ) { - control->Tau_P[0] = atof(tmp[1]) * 1.e-3; - control->Tau_P[1] = atof(tmp[2]) * 1.e-3; - control->Tau_P[2] = atof(tmp[3]) * 1.e-3; + control->Tau_P[0] = sstrtod( tmp[1], __FILE__, __LINE__ ) * 1.e-3; + control->Tau_P[1] = sstrtod( tmp[2], __FILE__, __LINE__ ) * 1.e-3; + control->Tau_P[2] = sstrtod( tmp[3], __FILE__, __LINE__ ) * 1.e-3; } } else if ( strncmp(tmp[0], "pt_mass", MAX_LINE) == 0 ) { - val = atof(tmp[1]); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->Tau_PT[0] = control->Tau_PT[1] = control->Tau_PT[2] = val * 1.e-3; // convert pt_mass from fs to ps } else if ( strncmp(tmp[0], "compress", MAX_LINE) == 0 ) { - val = atof(tmp[1]); + val = sstrtod( tmp[1], __FILE__, __LINE__ ); control->compressibility = val; } else if ( strncmp(tmp[0], "press_mode", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->press_mode = ival; } else if ( strncmp(tmp[0], "geo_format", MAX_LINE) == 0 ) { - ival = atoi( tmp[1] ); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->geo_format = ival; } else if ( strncmp(tmp[0], "write_freq", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); out_control->write_steps = ival; } else if ( strncmp(tmp[0], "traj_compress", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); out_control->traj_compress = ival; } else if ( strncmp(tmp[0], "traj_format", MAX_LINE) == 0 ) @@ -499,7 +510,7 @@ void Read_Control_File( const char * const control_file, control_params * const } else if ( strncmp(tmp[0], "traj_method", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); out_control->traj_method = ival; } else if ( strncmp(tmp[0], "traj_title", MAX_LINE) == 0 ) @@ -509,27 +520,27 @@ void Read_Control_File( const char * const control_file, control_params * const } else if ( strncmp(tmp[0], "atom_info", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); out_control->atom_info += ival * 4; } else if ( strncmp(tmp[0], "atom_velocities", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); out_control->atom_info += ival * 2; } else if ( strncmp(tmp[0], "atom_forces", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); out_control->atom_info += ival * 1; } else if ( strncmp(tmp[0], "bond_info", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); out_control->bond_info = ival; } else if ( strncmp(tmp[0], "angle_info", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); out_control->angle_info = ival; } else if ( strncmp(tmp[0], "test_forces", MAX_LINE) == 0 ) @@ -540,15 +551,15 @@ void Read_Control_File( const char * const control_file, control_params * const else if ( strncmp(tmp[0], "molecular_analysis", MAX_LINE) == 0 || strncmp(tmp[0], "molec_anal", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->molecular_analysis = ival; } else if ( strncmp(tmp[0], "ignore", MAX_LINE) == 0 ) { - control->num_ignored = atoi(tmp[1]); + control->num_ignored = sstrtol( tmp[1], __FILE__, __LINE__ ); for ( i = 0; i < control->num_ignored; ++i ) { - control->ignore[atoi(tmp[i + 2])] = 1; + control->ignore[sstrtol( tmp[i + 2], __FILE__, __LINE__ )] = 1; } } else if ( strncmp(tmp[0], "freq_molec_anal", MAX_LINE) == 0 ) @@ -558,27 +569,27 @@ void Read_Control_File( const char * const control_file, control_params * const } else if ( strncmp(tmp[0], "dipole_anal", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->dipole_anal = ival; } else if ( strncmp(tmp[0], "freq_dipole_anal", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->freq_dipole_anal = ival; } else if ( strncmp(tmp[0], "diffusion_coef", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->diffusion_coef = ival; } else if ( strncmp(tmp[0], "freq_diffusion_coef", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->freq_diffusion_coef = ival; } else if ( strncmp(tmp[0], "restrict_type", MAX_LINE) == 0 ) { - ival = atoi(tmp[1]); + ival = sstrtol( tmp[1], __FILE__, __LINE__ ); control->restrict_type = ival; } else diff --git a/PG-PuReMD/src/cuda/cuda_charges.cu b/PG-PuReMD/src/cuda/cuda_charges.cu index 0f4d4280..6eeda69b 100644 --- a/PG-PuReMD/src/cuda/cuda_charges.cu +++ b/PG-PuReMD/src/cuda/cuda_charges.cu @@ -280,13 +280,6 @@ static void Setup_Preconditioner_QEq( reax_system const * const system, time = Get_Time( ); #endif - /* sort H needed for SpMV's in linear solver, H or H_sp needed for preconditioning */ -// Sort_Matrix_Rows( &workspace->d_workspace->H, workspace, s ); - -#if defined(LOG_PERFORMANCE) - Update_Timing_Info( &time, &data->timing.cm_sort ); -#endif - switch ( control->cm_solver_pre_comp_type ) { case NONE_PC: @@ -323,8 +316,6 @@ static void Setup_Preconditioner_QEq( reax_system const * const system, Cuda_Copy_Matrix_Device_to_Host( &workspace->H, &workspace->d_workspace->H, s ); - Sort_Matrix_Rows( &workspace->H ); - setup_sparse_approx_inverse( system, data, &workspace->H, &workspace->H_spar_patt, control->nprocs, control->cm_solver_pre_comp_sai_thres ); diff --git a/PG-PuReMD/src/cuda/cuda_init_md.cu b/PG-PuReMD/src/cuda/cuda_init_md.cu index c46a1ffc..b37430ef 100644 --- a/PG-PuReMD/src/cuda/cuda_init_md.cu +++ b/PG-PuReMD/src/cuda/cuda_init_md.cu @@ -250,6 +250,25 @@ extern "C" void Cuda_Initialize( reax_system *system, control_params *control, { int i; +#if defined(_OPENMP) + #pragma omp parallel default(none) shared(control) + { + #pragma omp single + { + if ( control->num_threads_set == FALSE ) + { + /* set using OMP_NUM_THREADS environment variable */ + control->num_threads = omp_get_num_threads( ); + control->num_threads_set = TRUE; + } + } + } + + omp_set_num_threads( control->num_threads ); +#else + control->num_threads = 1; +#endif + Init_MPI_Datatypes( system, workspace, mpi_data ); #if defined(CUDA_DEVICE_PACK) diff --git a/PG-PuReMD/src/init_md.c b/PG-PuReMD/src/init_md.c index 5b3e0727..1934388d 100644 --- a/PG-PuReMD/src/init_md.c +++ b/PG-PuReMD/src/init_md.c @@ -710,6 +710,25 @@ void Initialize( reax_system * const system, control_params * const control, reax_list ** const lists, output_controls * const out_control, mpi_datatypes * const mpi_data ) { +#if defined(_OPENMP) + #pragma omp parallel default(none) shared(control) + { + #pragma omp single + { + if ( control->num_threads_set == FALSE ) + { + /* set using OMP_NUM_THREADS environment variable */ + control->num_threads = omp_get_num_threads( ); + control->num_threads_set = TRUE; + } + } + } + + omp_set_num_threads( control->num_threads ); +#else + control->num_threads = 1; +#endif + Init_MPI_Datatypes( system, workspace, mpi_data ); Init_Simulation_Data( system, control, data, mpi_data ); @@ -733,21 +752,6 @@ void Initialize( reax_system * const system, control_params * const control, } -void Pure_Initialize( reax_system * const system, control_params * const control, - simulation_data * const data, storage * const workspace, - reax_list ** const lists, output_controls * const out_control, - mpi_datatypes * const mpi_data ) -{ - Init_Simulation_Data( system, control, data, mpi_data ); - - Init_Workspace( system, control, workspace, mpi_data ); - - Init_Lists( system, control, data, workspace, lists, mpi_data ); - - Init_Force_Functions( control ); -} - - #elif defined(LAMMPS_REAX) void Initialize( reax_system * const system, control_params * const control, simulation_data * const data, storage * const workspace, diff --git a/PG-PuReMD/src/init_md.h b/PG-PuReMD/src/init_md.h index f4fa2a32..44ccb6b5 100644 --- a/PG-PuReMD/src/init_md.h +++ b/PG-PuReMD/src/init_md.h @@ -36,9 +36,6 @@ void Init_MPI_Datatypes( reax_system * const, storage * const, mpi_datatypes * c void Initialize( reax_system * const, control_params * const, simulation_data * const, storage * const, reax_list** const, output_controls * const, mpi_datatypes * const ); -void Pure_Initialize( reax_system * const, control_params * const, simulation_data * const, - storage * const, reax_list** const, output_controls * const, mpi_datatypes * const ); - void Init_Taper( control_params * const, storage * const, mpi_datatypes * const ); diff --git a/PG-PuReMD/src/lin_alg.c b/PG-PuReMD/src/lin_alg.c index b341a045..30cc2dd6 100644 --- a/PG-PuReMD/src/lin_alg.c +++ b/PG-PuReMD/src/lin_alg.c @@ -1494,7 +1494,7 @@ void sparse_approx_inverse( reax_system const * const system, int local_pos, identity_pos; int *X, *q; int q_size, q_top; - int e_size, D_size; + int e_i_size, D_size; int cnt; int *row_nnz, *row_start, total_entries; int *col_inds; @@ -1544,10 +1544,6 @@ void sparse_approx_inverse( reax_system const * const system, nz_vals_send_buf1_size = 0; nz_vals_send_buf2 = NULL; nz_vals_send_buf2_size = 0; - e_i = NULL; - e_size = 0; - D = NULL; - D_size = 0; row_nnz = smalloc( sizeof(int) * system->total_cap, __FILE__, __LINE__ ); row_start = smalloc( sizeof(int) * (system->total_cap + 1 - A->n), __FILE__, __LINE__ ); @@ -1778,222 +1774,249 @@ void sparse_approx_inverse( reax_system const * const system, sfree( nz_vals_send_buf1, __FILE__, __LINE__ ); sfree( nz_vals_send_buf2, __FILE__, __LINE__ ); - X = smalloc( sizeof(int) * system->bigN, __FILE__, __LINE__ ); - q_size = MIN_QUEUE_SIZE; - q = smalloc( sizeof(int) * q_size, __FILE__, __LINE__ ); - - for ( i = 0; i < A_spar_patt->n; ++i ) +#if defined(_OPENMP) + #pragma omp parallel default(none) \ + private(i, pj, pk, ret, N, M, d_i, d_j, mark, local_pos, identity_pos, \ + X, q, q_size, q_top, e_i, e_i_size, D, D_size, \ + m, n, lda, info) \ + firstprivate(system, A, A_spar_patt, A_app_inv, row_nnz, row_start, col_inds, nz_vals) \ + shared(stderr) +#endif { - M = 0; - N = 0; - q_top = 0; - mark = -999; - - /* column indices for the non-zeros in the i-th row in the sparsity pattern - * for H_app_inv delineate which columns of H must be searched for non-zeros - * (when constructing the dense matrix D) */ - for ( pj = A_spar_patt->start[i]; pj < A_spar_patt->end[i]; ++pj ) + e_i = NULL; + e_i_size = 0; + D = NULL; + D_size = 0; + X = smalloc( sizeof(int) * system->bigN, __FILE__, __LINE__ ); + q_size = MIN_QUEUE_SIZE; + q = smalloc( sizeof(int) * q_size, __FILE__, __LINE__ ); + +#if defined(_OPENMP) + #pragma omp for schedule(dynamic,32) +#endif + for ( i = 0; i < A_spar_patt->n; ++i ) { - ++N; + M = 0; + N = 0; + q_top = 0; + mark = -999; + + /* column indices for the non-zeros in the i-th row in the sparsity pattern + * for H_app_inv delineate which columns of H must be searched for non-zeros + * (when constructing the dense matrix D) */ + for ( pj = A_spar_patt->start[i]; pj < A_spar_patt->end[i]; ++pj ) + { + ++N; - assert( 0 <= A_spar_patt->j[pj] ); - assert( A_spar_patt->j[pj] < system->N ); + assert( 0 <= A_spar_patt->j[pj] ); + assert( A_spar_patt->j[pj] < system->N ); - /* due to symmetry, search rows of H instead of columns for non-zeros - * - * case: index's row in local matrix */ - if ( A_spar_patt->j[pj] < A->n ) - { - for ( pk = A->start[A_spar_patt->j[pj]]; pk < A->end[A_spar_patt->j[pj]]; ++pk ) + /* due to symmetry, search rows of H instead of columns for non-zeros + * + * case: index's row in local matrix */ + if ( A_spar_patt->j[pj] < A->n ) { - assert( 1 <= system->my_atoms[A->j[pk]].orig_id - && system->my_atoms[A->j[pk]].orig_id <= system->bigN ); + for ( pk = A->start[A_spar_patt->j[pj]]; pk < A->end[A_spar_patt->j[pj]]; ++pk ) + { + assert( 1 <= system->my_atoms[A->j[pk]].orig_id + && system->my_atoms[A->j[pk]].orig_id <= system->bigN ); - /* accumulate the nonzero column indices to serve as the row indices of the dense matrix */ - X[system->my_atoms[A->j[pk]].orig_id - 1] = mark; + /* accumulate the nonzero column indices to serve as the row indices of the dense matrix */ + X[system->my_atoms[A->j[pk]].orig_id - 1] = mark; - if ( q_top >= q_size ) - { - q_size = (int) CEIL( q_size * SAFE_ZONE ); - q = srealloc( q, sizeof(int) * q_size, __FILE__, __LINE__ ); - } + if ( q_top >= q_size ) + { + q_size = (int) CEIL( q_size * SAFE_ZONE ); + q = srealloc( q, sizeof(int) * q_size, __FILE__, __LINE__ ); + } - q[q_top] = system->my_atoms[A->j[pk]].orig_id - 1; - ++q_top; + q[q_top] = system->my_atoms[A->j[pk]].orig_id - 1; + ++q_top; + } } - } - /* case: index's row communicated */ - else - { - assert( 0 <= row_start[A_spar_patt->j[pj] - A->n] ); - assert( row_start[A_spar_patt->j[pj] - A->n] < total_entries ); - assert( 0 <= row_start[A_spar_patt->j[pj] - A->n + 1] ); - assert( row_start[A_spar_patt->j[pj] - A->n + 1] < total_entries ); - - for ( pk = row_start[A_spar_patt->j[pj] - A->n]; - pk < row_start[A_spar_patt->j[pj] - A->n + 1]; ++pk ) + /* case: index's row communicated */ + else { - assert( 0 <= col_inds[pk] ); - assert( col_inds[pk] < system->bigN ); - - /* accumulate the nonzero column indices to serve as the row indices of the dense matrix */ - X[col_inds[pk]] = mark; + assert( 0 <= row_start[A_spar_patt->j[pj] - A->n] ); + assert( row_start[A_spar_patt->j[pj] - A->n] < total_entries ); + assert( 0 <= row_start[A_spar_patt->j[pj] - A->n + 1] ); + assert( row_start[A_spar_patt->j[pj] - A->n + 1] < total_entries ); - if ( q_top >= q_size ) + for ( pk = row_start[A_spar_patt->j[pj] - A->n]; + pk < row_start[A_spar_patt->j[pj] - A->n + 1]; ++pk ) { - q_size = (int) CEIL( q_size * SAFE_ZONE ); - q = srealloc( q, sizeof(int) * q_size, __FILE__, __LINE__ ); - } + assert( 0 <= col_inds[pk] ); + assert( col_inds[pk] < system->bigN ); + + /* accumulate the nonzero column indices to serve as the row indices of the dense matrix */ + X[col_inds[pk]] = mark; + + if ( q_top >= q_size ) + { + q_size = (int) CEIL( q_size * SAFE_ZONE ); + q = srealloc( q, sizeof(int) * q_size, __FILE__, __LINE__ ); + } - q[q_top] = col_inds[pk]; - ++q_top; + q[q_top] = col_inds[pk]; + ++q_top; + } } } - } - /* enumerate the row indices from 0 to (# of nonzero rows - 1) for the dense matrix */ - for ( pj = 0; pj < q_top; ++pj ) - { - if ( X[q[pj]] == mark ) + /* enumerate the row indices from 0 to (# of nonzero rows - 1) for the dense matrix */ + for ( pj = 0; pj < q_top; ++pj ) { - X[q[pj]] = M; - ++M; + if ( X[q[pj]] == mark ) + { + X[q[pj]] = M; + ++M; + } } - } - identity_pos = X[system->my_atoms[i].orig_id - 1]; + identity_pos = X[system->my_atoms[i].orig_id - 1]; - assert( 0 <= identity_pos ); - assert( identity_pos < M ); + assert( 0 <= identity_pos ); + assert( identity_pos < M ); - if ( M * N > D_size ) - { - if ( D_size > 0 ) + if ( M * N > D_size ) { - sfree( D, __FILE__, __LINE__ ); + if ( D_size > 0 ) + { + sfree( D, __FILE__, __LINE__ ); + } + + D_size = (int) CEIL( M * N * SAFE_ZONE ); + + D = smalloc( sizeof(real) * D_size, __FILE__, __LINE__ ); } - - D_size = (int) CEIL( M * N * SAFE_ZONE ); - D = smalloc( sizeof(real) * D_size, __FILE__, __LINE__ ); - } + for ( d_i = 0; d_i < M; ++d_i ) + { + for ( d_j = 0; d_j < N; ++d_j ) + { + D[d_i * N + d_j] = 0.0; + } + } - for ( d_i = 0; d_i < M; ++d_i ) - { + /* fill in entries of D column-wise */ for ( d_j = 0; d_j < N; ++d_j ) { - D[d_i * N + d_j] = 0.0; - } - } + local_pos = A_spar_patt->j[A_spar_patt->start[i] + d_j]; - /* fill in entries of D column-wise */ - for ( d_j = 0; d_j < N; ++d_j ) - { - local_pos = A_spar_patt->j[A_spar_patt->start[i] + d_j]; + assert( 0 <= local_pos ); + assert( local_pos < system->N ); - assert( 0 <= local_pos ); - assert( local_pos < system->N ); + if ( local_pos < A->n ) + { + for ( d_i = A->start[local_pos]; d_i < A->end[local_pos]; ++d_i ) + { + assert( 0 <= X[system->my_atoms[A->j[d_i]].orig_id - 1] ); + assert( X[system->my_atoms[A->j[d_i]].orig_id - 1] < M ); - if ( local_pos < A->n ) - { - for ( d_i = A->start[local_pos]; d_i < A->end[local_pos]; ++d_i ) + D[X[system->my_atoms[A->j[d_i]].orig_id - 1] * N + d_j] = A->val[d_i]; + } + } + else { - assert( 0 <= X[system->my_atoms[A->j[d_i]].orig_id - 1] ); - assert( X[system->my_atoms[A->j[d_i]].orig_id - 1] < M ); + assert( 0 <= row_start[local_pos - A->n] ); + assert( row_start[local_pos - A->n] < total_entries ); + assert( 0 <= row_start[local_pos - A->n + 1] ); + assert( row_start[local_pos - A->n + 1] < total_entries ); + + for ( d_i = row_start[local_pos - A->n]; + d_i < row_start[local_pos - A->n + 1]; ++d_i ) + { + assert( 0 <= col_inds[d_i] ); + assert( col_inds[d_i] < system->bigN ); + assert( 0 <= X[col_inds[d_i]] ); + assert( X[col_inds[d_i]] < M ); - D[X[system->my_atoms[A->j[d_i]].orig_id - 1] * N + d_j] = A->val[d_i]; + D[X[col_inds[d_i]] * N + d_j] = nz_vals[d_i]; + } } } - else - { - assert( 0 <= row_start[local_pos - A->n] ); - assert( row_start[local_pos - A->n] < total_entries ); - assert( 0 <= row_start[local_pos - A->n + 1] ); - assert( row_start[local_pos - A->n + 1] < total_entries ); - for ( d_i = row_start[local_pos - A->n]; - d_i < row_start[local_pos - A->n + 1]; ++d_i ) + /* create the right hand side of the linear equation + * that is the full column of the identity matrix */ + if ( e_i_size < M ) + { + if ( e_i_size > 0 ) { - assert( 0 <= col_inds[d_i] ); - assert( col_inds[d_i] < system->bigN ); - assert( 0 <= X[col_inds[d_i]] ); - assert( X[col_inds[d_i]] < M ); - - D[X[col_inds[d_i]] * N + d_j] = nz_vals[d_i]; + sfree( e_i, __FILE__, __LINE__ ); } + + e_i_size = (int) CEIL( M * SAFE_ZONE ); + + e_i = smalloc( sizeof(real) * e_i_size, __FILE__, __LINE__ ); } - } - /* create the right hand side of the linear equation - * that is the full column of the identity matrix */ - if ( e_size < M ) - { - if ( e_size > 0 ) + for ( pk = 0; pk < M; ++pk ) { - sfree( e_i, __FILE__, __LINE__ ); + e_i[pk] = 0.0; } + e_i[identity_pos] = 1.0; - e_size = (int) CEIL( M * SAFE_ZONE ); - - e_i = smalloc( sizeof(real) * e_size, __FILE__, __LINE__ ); - } + /* solve the overdetermined system from the i-th columns of min_{M} ||I - HM||_F + * through the least-squares problem min_{m_i} ||e_i - Dm_i||_{2}^{2} + * where D represents the dense matrix composed of relevant non-zeros from H, + * e_i is the i-th column of the identify matrix, and m_i is the i-th column of M + * + * after solve, D contains the QR factorization and e_i contains + * the solution (if info == 0) */ + m = (lapack_int) M; + n = (lapack_int) N; + lda = n; - for ( pk = 0; pk < M; ++pk ) - { - e_i[pk] = 0.0; - } - e_i[identity_pos] = 1.0; + assert( M >= N ); - /* solve the overdetermined system from the i-th columns of min_{M} ||I - HM||_F - * through the least-squares problem min_{m_i} ||e_i - Dm_i||_{2}^{2} - * where D represents the dense matrix composed of relevant non-zeros from H, - * e_i is the i-th column of the identify matrix, and m_i is the i-th column of M - * - * after solve, D contains the QR factorization and e_i contains - * the solution (if info == 0) */ - m = (lapack_int) M; - n = (lapack_int) N; - lda = n; + info = LAPACKE_dgels( LAPACK_ROW_MAJOR, 'N', m, n, 1, D, lda, e_i, 1 ); - assert( M >= N ); + /* check for full rank */ + if ( info > 0 ) + { + fprintf( stderr, "[ERROR] SAI preconditioner computation failure\n" ); + fprintf( stderr, " [INFO] The diagonal element %i of the triangular factor\n", info ); + fprintf( stderr, " of A is zero, so A does not have full rank.\n" ); +// MPI_Abort( MPI_COMM_WORLD, RUNTIME_ERROR ); + exit( RUNTIME_ERROR ); + } - info = LAPACKE_dgels( LAPACK_ROW_MAJOR, 'N', m, n, 1, D, lda, e_i, 1 ); + if ( A_spar_patt->end[i] - A_spar_patt->start[i] < N ) + { + fprintf( stderr, "[ERROR] SAI preconditioner computation failure\n" ); + fprintf( stderr, " [INFO] out of memory for row i = %d\n", i ); +// MPI_Abort( MPI_COMM_WORLD, RUNTIME_ERROR ); + exit( RUNTIME_ERROR ); + } - /* check for full rank */ - if ( info > 0 ) - { - fprintf( stderr, "[ERROR] SAI preconditioner computation failure\n" ); - fprintf( stderr, " [INFO] The diagonal element %i of the triangular factor\n", info ); - fprintf( stderr, " of A is zero, so A does not have full rank.\n" ); - MPI_Abort( MPI_COMM_WORLD, RUNTIME_ERROR ); + /* least-squares solution in e_i comprises non-zero values for i-th row in A_app_inv + * (computed as right preconditioner, store transpose as left preconditioner) */ + A_app_inv->start[i] = A_spar_patt->start[i]; + A_app_inv->end[i] = A_spar_patt->end[i]; + for ( pj = A_app_inv->start[i]; pj < A_app_inv->end[i]; ++pj ) + { + A_app_inv->j[pj] = A_spar_patt->j[pj]; + A_app_inv->val[pj] = e_i[pj - A_spar_patt->start[i]]; + } } - if ( A_spar_patt->end[i] - A_spar_patt->start[i] < N ) + if ( D != NULL ) { - fprintf( stderr, "[ERROR] SAI preconditioner computation failure\n" ); - fprintf( stderr, " [INFO] out of memory for row i = %d\n", i ); - MPI_Abort( MPI_COMM_WORLD, RUNTIME_ERROR ); + sfree( D, __FILE__, __LINE__ ); } - - /* least-squares solution in e_i comprises non-zero values for i-th row in A_app_inv - * (computed as right preconditioner, store transpose as left preconditioner) */ - A_app_inv->start[i] = A_spar_patt->start[i]; - A_app_inv->end[i] = A_spar_patt->end[i]; - for ( pj = A_app_inv->start[i]; pj < A_app_inv->end[i]; ++pj ) + if ( e_i != NULL ) { - A_app_inv->j[pj] = A_spar_patt->j[pj]; - A_app_inv->val[pj] = e_i[pj - A_spar_patt->start[i]]; + sfree( e_i, __FILE__, __LINE__ ); } + sfree( X, __FILE__, __LINE__ ); + sfree( q, __FILE__, __LINE__ ); } + for ( i = A_app_inv->n; i < A_app_inv->n_max; ++i ) { A_app_inv->start[i] = 0; A_app_inv->end[i] = 0; } - sfree( q, __FILE__, __LINE__ ); - sfree( D, __FILE__, __LINE__ ); - sfree( e_i, __FILE__, __LINE__ ); - sfree( X, __FILE__, __LINE__ ); sfree( col_inds, __FILE__, __LINE__ ); sfree( nz_vals, __FILE__, __LINE__ ); sfree( row_nnz, __FILE__, __LINE__ ); diff --git a/PG-PuReMD/src/reax_types.h b/PG-PuReMD/src/reax_types.h index 8865fd7e..ae75d7ee 100644 --- a/PG-PuReMD/src/reax_types.h +++ b/PG-PuReMD/src/reax_types.h @@ -63,6 +63,10 @@ #endif #endif +#if defined(_OPENMP) + #include <omp.h> +#endif + /* IBMC */ #if defined(__IBMC__) #define inline __inline__ @@ -1702,6 +1706,10 @@ struct control_params int freq_diffusion_coef; /**/ int restrict_type; + /* number of OpenMP threads to use during the simulation */ + int num_threads; + /* TRUE if the num. OpenMP has bet set, FALSE otherwise */ + int num_threads_set; /* function pointer for ensemble used to evolve atomic system */ evolve_function Evolve; /* function pointers for bonded interactions */ diff --git a/configure.ac b/configure.ac index dd8a0295..35356b34 100644 --- a/configure.ac +++ b/configure.ac @@ -143,7 +143,7 @@ AC_PROG_CXXCPP AC_CONFIG_HEADERS([common/include/config.h]) # sPuReMD -if test "x${pack_serial_enabled}" = "xyes" || test "x${pack_openmp_enabled}" = "xyes"; then +if test "x${pack_mpi_enabled}" != "xyes" && test "x${pack_mpi_gpu_enabled}" != "xyes" && { test "x${pack_serial_enabled}" = "xyes" || test "x${pack_openmp_enabled}" = "xyes"; }; then if test "x${pack_serial_enabled}" = "xyes" || test "x${pack_openmp_enabled}" != "xyes"; then BUILD_OPENMP="no" else @@ -217,8 +217,6 @@ if test "x${pack_serial_enabled}" = "xyes" || test "x${pack_openmp_enabled}" = " OPENMP_LIBS="-lgomp" fi - # AC_MSG_NOTICE([OPENMP_CFLAGS=${OPENMP_CFLAGS}]) - # AC_MSG_NOTICE([OPENMP_LIBS=${OPENMP_LIBS}]) AC_SUBST([OMP_CFLAGS], ["${OPENMP_CFLAGS}"]) AC_SUBST([OMP_LIBS], ["${OPENMP_LIBS}"]) fi @@ -351,7 +349,7 @@ if test "x${pack_serial_enabled}" = "xyes" || test "x${pack_openmp_enabled}" = " AC_LANG_POP([C]) fi -AM_CONDITIONAL([BUILD_S_OMP], [test "x${pack_serial_enabled}" = "xyes" || test "x${pack_openmp_enabled}" = "xyes"]) +AM_CONDITIONAL([BUILD_S_OMP], [test "x${pack_mpi_enabled}" != "xyes" && test "x${pack_mpi_gpu_enabled}" != "xyes" && { test "x${pack_serial_enabled}" = "xyes" || test "x${pack_openmp_enabled}" = "xyes"; }]) # PuReMD-GPU @@ -633,6 +631,11 @@ AM_CONDITIONAL([BUILD_MPI], [test "x${pack_mpi_old_enabled}" = "xyes"]) # PG-PuReMD if test "x${pack_mpi_enabled}" = "xyes" || test "x${pack_mpi_gpu_enabled}" = "xyes"; then + if test "x${pack_openmp_enabled}" = "xyes"; then + BUILD_OPENMP_MPI="yes" + else + BUILD_OPENMP_MPI="no" + fi if test "x${pack_mpi_enabled}" = "xyes" || test "x${pack_mpi_gpu_enabled}" != "xyes"; then export BUILD_GPU="no" else @@ -691,6 +694,28 @@ if test "x${pack_mpi_enabled}" = "xyes" || test "x${pack_mpi_gpu_enabled}" = "xy AC_SUBST(CPPFLAGS_EXTRA) AC_SUBST(CFLAGS_EXTRA) + # Check for OpenMP support. + if test "x${BUILD_OPENMP_MPI}" = "xyes"; then + AX_OPENMP + + if test "x${OPENMP_CFLAGS}" = "x"; then + AC_MSG_WARN([ +----------------------------------------------- + Unable to find OpenMP support on this system. + Building a single-threaded version. +-----------------------------------------------]) + else + # bug due to recent Intel compiler change (?) + if test "x${ax_cv_c_compiler_vendor}" = "xintel"; then + OPENMP_CFLAGS="-qopenmp" + fi + OPENMP_LIBS="-lgomp" + fi + + AC_SUBST([OMP_CFLAGS], ["${OPENMP_CFLAGS}"]) + AC_SUBST([OMP_LIBS], ["${OPENMP_LIBS}"]) + fi + # Check for MPI support. CONFIGURE_HEADLINE([ MPI compiler ]) ACX_MPI([], [AC_MSG_ERROR([could not find MPI library])]) -- GitLab