diff --git a/PG-PuReMD/src/cuda_environment.cu b/PG-PuReMD/src/cuda_environment.cu index dbbaba9bfad47c8d794f6f593a98cbebdbaf72bd..24898ec57ea754e0658d63193b264ea78c406c14 100644 --- a/PG-PuReMD/src/cuda_environment.cu +++ b/PG-PuReMD/src/cuda_environment.cu @@ -1,25 +1,28 @@ - #include "cuda_environment.h" - #include "cuda_utils.h" extern "C" void Setup_Cuda_Environment (int rank, int nprocs, int gpus_per_node) { - int deviceCount = 0; - cudaGetDeviceCount (&deviceCount); + int deviceCount; + cudaError_t flag; + + flag = cudaGetDeviceCount (&deviceCount); + + if ( flag != cudaSuccess ) + { + fprintf( stderr, "ERROR: no CUDA capable device(s) found. Terminating...\n" ); + exit( CANNOT_INITIALIZE ); + } //Calculate the # of GPUs per processor //and assign the GPU for each process - - //hpcc changes - //if (gpus_per_node == 2) { + //TODO: handle condition where # CPU procs > # GPUs cudaSetDevice ( (rank % (deviceCount)) ); - //cudaSetDevice( 1 ); + +#if defined(__CUDA_DEBUG__) fprintf( stderr, "p:%d is using GPU: %d \n", rank, (rank % deviceCount)); - //} else { - // cudaSetDevice ( 0 ); - //} +#endif /////////////////////////////////////////////// /////////////////////////////////////////////// @@ -36,9 +39,9 @@ extern "C" void Setup_Cuda_Environment (int rank, int nprocs, int gpus_per_node) /////////////////////////////////////////////// /////////////////////////////////////////////// /////////////////////////////////////////////// - } + extern "C" void Cleanup_Cuda_Environment () { cudaDeviceReset (); diff --git a/PG-PuReMD/src/parallelreax.c b/PG-PuReMD/src/parallelreax.c index 2f4b19dff34ca4e52f8d5b6e1368247ffdbbdadf..40cc9c3b96eea09bbf1074485c368beb7af7f9c5 100644 --- a/PG-PuReMD/src/parallelreax.c +++ b/PG-PuReMD/src/parallelreax.c @@ -92,7 +92,7 @@ void Read_System( char *geo_file, char *ffield_file, char *control_file, else { fprintf( stderr, "unknown geo file format. terminating!\n" ); - MPI_Abort( MPI_COMM_WORLD, UNKNOWN_OPTION ); + MPI_Abort( MPI_COMM_WORLD, INVALID_GEO ); } } @@ -170,6 +170,12 @@ void init_blocks (reax_system *system) #endif +static void usage(char* argv[]) +{ + fprintf(stderr, "usage: ./%s geometry ffield control\n", argv[0]); +} + + int main( int argc, char* argv[] ) { reax_system *system; @@ -183,6 +189,12 @@ int main( int argc, char* argv[] ) real t_start = 0, t_elapsed; real t_begin, t_end; + if ( argc != 4 ) + { + usage(argv); + exit( INVALID_INPUT ); + } + #ifdef HAVE_CUDA /* Remove this debug information later */ diff --git a/PuReMD/src/geo_tools.c b/PuReMD/src/geo_tools.c index 30139912468b045f54110c84a92523ab2d39703f..c037840bb15f962717c17363a50c38603f99b31d 100644 --- a/PuReMD/src/geo_tools.c +++ b/PuReMD/src/geo_tools.c @@ -164,19 +164,17 @@ int Read_Box_Info( reax_system *system, FILE *geo, int geo_format ) switch ( geo_format ) { - case PDB: - cryst = "CRYST1"; - break; - default: - cryst = "BOX"; + case PDB: + cryst = "CRYST1"; + break; + default: + cryst = "BOX"; } /* locate the cryst line in the geo file, read it and initialize the big box */ - while ( !feof( geo ) ) + while ( fgets( line, MAX_LINE, geo ) ) { - fgets( line, MAX_LINE, geo ); - if ( strncmp( line, cryst, 6 ) == 0 ) { if ( geo_format == PDB ) @@ -193,6 +191,10 @@ int Read_Box_Info( reax_system *system, FILE *geo, int geo_format ) return SUCCESS; } } + if ( ferror( geo ) ) + { + return FAILURE; + } return FAILURE; } @@ -310,15 +312,11 @@ char Read_PDB( char* pdb_file, reax_system* system, control_params *control, c = 0; c1 = 0; top = 0; - while ( !feof( pdb ) ) - { - /* clear previous input line */ - s[0] = 0; - for ( i = 0; i < c1; ++i ) - tmp[i][0] = 0; + s[0] = 0; + while ( fgets( s, MAX_LINE, pdb ) ) + { /* read new line and tokenize it */ - fgets( s, MAX_LINE, pdb ); strncpy( s1, s, MAX_LINE - 1 ); c1 = Tokenize( s, &tmp ); @@ -474,6 +472,15 @@ char Read_PDB( char* pdb_file, reax_system* system, control_params *control, // fprintf( stderr, "\n" ); } } + + /* clear previous input line */ + s[0] = 0; + for ( i = 0; i < c1; ++i ) + tmp[i][0] = 0; + } + if ( ferror( pdb ) ) + { + return FAILURE; } fclose( pdb ); diff --git a/PuReMD/src/parallelreax.c b/PuReMD/src/parallelreax.c index 4a0420744a6c2aa8a79dbbb22b114890ac6fcb2c..de9eade3f86f23f5ce49f29f4d7641e6fc4f28ae 100644 --- a/PuReMD/src/parallelreax.c +++ b/PuReMD/src/parallelreax.c @@ -51,9 +51,13 @@ void Read_System( char *geo_file, char *ffield_file, char *control_file, /* geo file */ if ( control->geo_format == CUSTOM ) + { Read_Geo( geo_file, system, control, data, workspace, mpi_data ); + } else if ( control->geo_format == PDB ) + { Read_PDB( geo_file, system, control, data, workspace, mpi_data ); + } else if ( control->geo_format == ASCII_RESTART ) { Read_Restart( geo_file, system, control, data, workspace, mpi_data ); @@ -103,6 +107,11 @@ void Post_Evolve( reax_system* system, control_params* control, Compute_Kinetic_Energy( system, data, mpi_data->comm_mesh3D ); } +static void usage(char* argv[]) +{ + fprintf(stderr, "usage: ./%s geometry ffield control\n", argv[0]); +} + int main( int argc, char* argv[] ) { @@ -116,6 +125,12 @@ int main( int argc, char* argv[] ) int i; real t_start = 0, t_elapsed; + if ( argc != 4 ) + { + usage(argv); + exit( INVALID_INPUT ); + } + /* allocated main datastructures */ system = (reax_system *) smalloc( sizeof(reax_system), "system", MPI_COMM_WORLD ); diff --git a/PuReMD/src/reax_defs.h b/PuReMD/src/reax_defs.h index 3cc4b61d7aabb4ce5222ef60582c07a24408532e..a61ce245a5fc29c580158f4f425805dd16506c5a 100644 --- a/PuReMD/src/reax_defs.h +++ b/PuReMD/src/reax_defs.h @@ -102,24 +102,24 @@ /******************* ENUMERATIONS *************************/ -enum geo_formats { CUSTOM, PDB, ASCII_RESTART, BINARY_RESTART, GF_N }; +enum geo_formats { CUSTOM = 0, PDB = 1, ASCII_RESTART = 2, BINARY_RESTART = 3, GF_N = 4 }; -enum restart_formats { WRITE_ASCII, WRITE_BINARY, RF_N }; +enum restart_formats { WRITE_ASCII = 0, WRITE_BINARY = 1, RF_N = 2 }; -enum ensembles { NVE, bNVT, nhNVT, sNPT, iNPT, NPT, ens_N }; +enum ensembles { NVE = 0, bNVT = 1, nhNVT = 2, sNPT = 3, iNPT = 4, NPT = 5, ens_N = 6 }; -enum lists { BONDS, OLD_BONDS, THREE_BODIES, - HBONDS, FAR_NBRS, DBOS, DDELTAS, LIST_N +enum lists { BONDS = 0, OLD_BONDS = 1, THREE_BODIES = 2, + HBONDS = 3, FAR_NBRS = 4, DBOS = 5, DDELTAS = 6, LIST_N = 7 }; -enum interactions { TYP_VOID, TYP_BOND, TYP_THREE_BODY, - TYP_HBOND, TYP_FAR_NEIGHBOR, TYP_DBO, TYP_DDELTA, TYP_N +enum interactions { TYP_VOID = 0, TYP_BOND = 1, TYP_THREE_BODY = 2, + TYP_HBOND = 3, TYP_FAR_NEIGHBOR = 4, TYP_DBO = 5, TYP_DDELTA = 6, TYP_N = 7 }; -enum message_tags { INIT, UPDATE, BNDRY, UPDATE_BNDRY, - EXC_VEC1, EXC_VEC2, DIST_RVEC2, COLL_RVEC2, - DIST_RVECS, COLL_RVECS, INIT_DESCS, ATOM_LINES, - BOND_LINES, ANGLE_LINES, RESTART_ATOMS, TAGS_N +enum message_tags { INIT = 0, UPDATE = 1, BNDRY = 2, UPDATE_BNDRY = 3, + EXC_VEC1 = 4, EXC_VEC2 = 5, DIST_RVEC2 = 6, COLL_RVEC2 = 7, + DIST_RVECS = 8, COLL_RVECS = 9, INIT_DESCS = 10, ATOM_LINES = 11, + BOND_LINES = 12, ANGLE_LINES = 13, RESTART_ATOMS = 14, TAGS_N = 15 }; enum errors { FILE_NOT_FOUND = -10, UNKNOWN_ATOM_TYPE = -11, @@ -128,7 +128,7 @@ enum errors { FILE_NOT_FOUND = -10, UNKNOWN_ATOM_TYPE = -11, INVALID_INPUT = -16, INVALID_GEO = -17 }; -enum exchanges { NONE, NEAR_EXCH, FULL_EXCH }; +enum exchanges { NONE = 0, NEAR_EXCH = 1, FULL_EXCH = 2 }; enum gcell_types { NO_NBRS = 0, NEAR_ONLY = 1, HBOND_ONLY = 2, FAR_ONLY = 4, NEAR_HBOND = 3, NEAR_FAR = 5, HBOND_FAR = 6, FULL_NBRS = 7, @@ -139,9 +139,9 @@ enum atoms { C_ATOM = 0, H_ATOM = 1, O_ATOM = 2, N_ATOM = 3, S_ATOM = 4, SI_ATOM = 5, GE_ATOM = 6, X_ATOM = 7 }; -enum traj_methods { REG_TRAJ, MPI_TRAJ, TF_N }; +enum traj_methods { REG_TRAJ = 0, MPI_TRAJ = 1, TF_N = 2 }; -enum molecules { UNKNOWN, WATER }; +enum molecules { UNKNOWN = 0, WATER = 1 }; #endif diff --git a/PuReMD/src/tool_box.c b/PuReMD/src/tool_box.c index 24c8c799bdd59f7acc7869264724b0e6e4e99ab4..02d9e3d9d6d083f3873b61d932f31b78964eb66e 100644 --- a/PuReMD/src/tool_box.c +++ b/PuReMD/src/tool_box.c @@ -165,9 +165,9 @@ inline int is_Inside_Box( simulation_box *box, rvec p ) if ( p[0] < box->min[0] || p[0] >= box->max[0] || p[1] < box->min[1] || p[1] >= box->max[1] || p[2] < box->min[2] || p[2] >= box->max[2] ) - return 0; + return FALSE; - return 1; + return TRUE; } @@ -182,9 +182,9 @@ inline int iown_midpoint( simulation_box *box, rvec p1, rvec p2 ) if ( midp[0] < box->min[0] || midp[0] >= box->max[0] || midp[1] < box->min[1] || midp[1] >= box->max[1] || midp[2] < box->min[2] || midp[2] >= box->max[2] ) - return 0; + return FALSE; - return 1; + return TRUE; } @@ -284,7 +284,7 @@ int is_Valid_Serial( storage *workspace, int serial ) // MPI_Abort( MPI_COMM_WORLD, INVALID_INPUT ); // } - return SUCCESS; + return TRUE; } diff --git a/environ/param.gpu.water b/environ/param.gpu.water index edd2784aa4f0a7a25df38e415d070f9cc392083c..7b3d00e95f1eba256b006f40283943d2bcbcbd57 100644 --- a/environ/param.gpu.water +++ b/environ/param.gpu.water @@ -14,12 +14,13 @@ bond_graph_cutoff 0.3 ! bond strength cutoff for bond thb_cutoff 0.001 ! cutoff value for three body interactions hbond_cutoff 7.50 ! cutoff distance for hydrogen bond interactions -qeq_solver_type 2 ! method used to solve charge equilibration phase (QEq) +qeq_solver_type 0 ! method used to solve charge equilibration phase (QEq) qeq_solver_q_err 1e-6 ! relative residual norm threshold used in solver -pre_comp_type 2 ! method used to compute QEq preconditioner, if applicable +pre_comp_type 0 ! method used to compute QEq preconditioner, if applicable pre_comp_refactor 100 ! nsteps to recompute preconditioner pre_comp_droptol 0.0 ! threshold tolerance for dropping values in preconditioner computation, if applicable -pre_comp_sweeps 3 ! sweeps to compute preconditioner (ILU_PAR/ICHOL_PAR) +pre_comp_sweeps 3 ! sweeps to compute preconditioner (ILU_PAR) +pre_app_type 1 ! method used to apply QEq preconditioner pre_app_jacobi_iters 50 ! number of Jacobi iterations used for applying QEq precondition, if applicable temp_init 0.0 ! desired initial temperature of the simulated system @@ -34,7 +35,7 @@ p_mass 5000.00 ! in fs, pressure inertia parame compress 0.008134 ! in ps^2 * A / amu ( 4.5X10^(-5) bar^(-1) ) press_mode 0 ! 0: internal + external pressure, 1: ext only, 2: int only -geo_format 1 ! 0: xyz, 1: pdb, 2: bgf +geo_format 1 ! 0: custom, 1: pdb, 2: bgf write_freq 0 ! write trajectory after so many steps traj_compress 0 ! 0: no compression 1: uses zlib to compress trajectory output traj_format 0 ! 0: our own format (below options apply to this only), 1: xyz, 2: bgf, 3: pdb diff --git a/sPuReMD/Makefile.am b/sPuReMD/Makefile.am index 5315c55e00e655b3be1629a9037e9ac1d6c604db..22bfb4986956b78797bc6ceccbb4bbd64aabed72 100644 --- a/sPuReMD/Makefile.am +++ b/sPuReMD/Makefile.am @@ -1,10 +1,10 @@ AM_CFLAGS += -Wall -O3 -funroll-loops -fstrict-aliasing bin_PROGRAMS = bin/spuremd -bin_spuremd_SOURCES = src/grid.c src/list.c src/lookup.c src/print_utils.c \ - src/reset_utils.c src/restart.c src/random.c src/traj.c \ - src/vector.c src/allocate.c src/analyze.c src/box.c src/system_props.c \ - src/param.c src/pdb_tools.c src/neighbors.c src/GMRES.c src/QEq.c src/bond_orders.c \ +bin_spuremd_SOURCES = src/ffield.c src/grid.c src/list.c src/lookup.c src/print_utils.c \ + src/reset_utils.c src/restart.c src/random.c src/tool_box.c src/traj.c \ + src/vector.c src/allocate.c src/analyze.c src/box.c src/system_props.c src/control.c \ + src/geo_tools.c src/neighbors.c src/GMRES.c src/QEq.c src/bond_orders.c \ src/single_body_interactions.c src/two_body_interactions.c \ src/three_body_interactions.c src/four_body_interactions.c src/forces.c \ src/integrate.c src/init_md.c src/testmd.c diff --git a/sPuReMD/src/GMRES.c b/sPuReMD/src/GMRES.c index fae32bb8f66fe1d110503e1d71d0bfe3f1cb3365..c686ce887919d686c09665b140d9e5bb8ce174fe 100644 --- a/sPuReMD/src/GMRES.c +++ b/sPuReMD/src/GMRES.c @@ -19,13 +19,12 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#include "allocate.h" #include "GMRES.h" +#include "allocate.h" #include "list.h" +#include "tool_box.h" #include "vector.h" -#include <omp.h> - typedef enum { @@ -40,7 +39,7 @@ typedef enum * x: vector * b: vector (result) */ static void Sparse_MatVec( const sparse_matrix * const A, - const real * const x, real * const b ) + const real * const x, real * const b ) { int i, j, k, n, si, ei; real H; @@ -66,7 +65,7 @@ static void Sparse_MatVec( const sparse_matrix * const A, { if ( (b_local = (real*) malloc( omp_get_num_threads() * n * sizeof(real))) == NULL ) { - exit( INSUFFICIENT_SPACE ); + exit( INSUFFICIENT_MEMORY ); } } @@ -83,8 +82,8 @@ static void Sparse_MatVec( const sparse_matrix * const A, for ( k = si; k < ei; ++k ) { - j = A->entries[k].j; - H = A->entries[k].val; + j = A->j[k]; + H = A->val[k]; #ifdef _OPENMP b_local[tid * n + j] += H * x[i]; b_local[tid * n + i] += H * x[j]; @@ -96,9 +95,9 @@ static void Sparse_MatVec( const sparse_matrix * const A, // the diagonal entry is the last one in #ifdef _OPENMP - b_local[tid * n + i] += A->entries[k].val * x[i]; + b_local[tid * n + i] += A->val[k] * x[i]; #else - b[i] += A->entries[k].val * x[i]; + b[i] += A->val[k] * x[i]; #endif } #ifdef _OPENMP @@ -126,8 +125,8 @@ static void Sparse_MatVec( const sparse_matrix * const A, * b: vector (result) * D^{-1}b (Dinv_b): precomputed vector-vector product */ static void Sparse_MatVec_Vector_Add( const sparse_matrix * const R, - const TRIANGULARITY tri, const real * const Dinv, - const real * const x, real * const b, const real * const Dinv_b) + const TRIANGULARITY tri, const real * const Dinv, + const real * const x, real * const b, const real * const Dinv_b) { int i, k, si = 0, ei = 0; #ifdef _OPENMP @@ -151,7 +150,7 @@ static void Sparse_MatVec_Vector_Add( const sparse_matrix * const R, { if ( (b_local = (real*) malloc( omp_get_num_threads() * R->n * sizeof(real))) == NULL ) { - exit( INSUFFICIENT_SPACE ); + exit( INSUFFICIENT_MEMORY ); } } @@ -178,9 +177,9 @@ static void Sparse_MatVec_Vector_Add( const sparse_matrix * const R, for ( k = si; k < ei; ++k ) { #ifdef _OPENMP - b_local[tid * R->n + i] += R->entries[k].val * x[R->entries[k].j]; + b_local[tid * R->n + i] += R->val[k] * x[R->j[k]]; #else - b[i] += R->entries[k].val * x[R->entries[k].j]; + b[i] += R->val[k] * x[R->j[k]]; #endif } #ifdef _OPENMP @@ -205,48 +204,211 @@ static void Sparse_MatVec_Vector_Add( const sparse_matrix * const R, } -/* solve sparse lower triangular linear system using forward substitution */ -static void Forward_Subs( const sparse_matrix * const L, const real * const b, real * const y ) +static void diag_pre_app( const real * const Hdia_inv, const real * const y, + real * const x, const int N ) +{ + unsigned int i; + + #pragma omp parallel for schedule(guided) \ + default(none) private(i) shared(stderr) + for ( i = 0; i < N; ++i ) + { + x[i] = y[i] * Hdia_inv[i]; + } +} + + +/* Solve triangular system LU*x = y using level scheduling + * + * LU: lower/upper triangular, stored in CSR + * y: constants in linear system (RHS) + * x: solution + * tri: triangularity of LU (lower/upper) + * + * Assumptions: + * LU has non-zero diagonals + * Each row of LU has at least one non-zero (i.e., no rows with all zeros) */ +static void tri_solve( const sparse_matrix * const LU, const real * const y, + real * const x, const TRIANGULARITY tri ) { int i, pj, j, si, ei; real val; - for ( i = 0; i < L->n; ++i ) + if ( tri == LOWER ) + { + for ( i = 0; i < LU->n; ++i ) + { + x[i] = y[i]; + si = LU->start[i]; + ei = LU->start[i + 1]; + for ( pj = si; pj < ei - 1; ++pj ) + { + j = LU->j[pj]; + val = LU->val[pj]; + x[i] -= val * x[j]; + } + x[i] /= LU->val[pj]; + } + } + else { - y[i] = b[i]; - si = L->start[i]; - ei = L->start[i + 1]; - for ( pj = si; pj < ei - 1; ++pj ) + for ( i = LU->n - 1; i >= 0; --i ) { - // TODO: remove assignments? compiler optimizes away? - j = L->entries[pj].j; - val = L->entries[pj].val; - y[i] -= val * y[j]; + x[i] = y[i]; + si = LU->start[i]; + ei = LU->start[i + 1]; + for ( pj = si + 1; pj < ei; ++pj ) + { + j = LU->j[pj]; + val = LU->val[pj]; + x[i] -= val * x[j]; + } + x[i] /= LU->val[si]; } - y[i] /= L->entries[pj].val; } } -/* solve sparse upper triangular linear system using backward substitution */ -static void Backward_Subs( const sparse_matrix * const U, const real * const y, real * const x ) +/* Solve triangular system LU*x = y using level scheduling + * + * LU: lower/upper triangular, stored in CSR + * y: constants in linear system (RHS) + * x: solution + * tri: triangularity of LU (lower/upper) + * find_levels: perform level search if positive, otherwise reuse existing levels + * + * Assumptions: + * LU has non-zero diagonals + * Each row of LU has at least one non-zero (i.e., no rows with all zeros) */ +static void tri_solve_level_sched( const sparse_matrix * const LU, const real * const y, + real * const x, const TRIANGULARITY tri, int find_levels ) { - int i, pj, j, si, ei; - real val; + int i, j, pj, local_row, local_level, levels; + static int levels_L = 1, levels_U = 1; + static unsigned int *row_levels_L = NULL, *level_rows_L = NULL, *level_rows_cnt_L = NULL; + static unsigned int *row_levels_U = NULL, *level_rows_U = NULL, *level_rows_cnt_U = NULL; + unsigned int *row_levels, *level_rows, *level_rows_cnt; - for ( i = U->n - 1; i >= 0; --i ) + if ( tri == LOWER ) + { + row_levels = row_levels_L; + level_rows = level_rows_L; + level_rows_cnt = level_rows_cnt_L; + levels = levels_L; + } + else + { + row_levels = row_levels_U; + level_rows = level_rows_U; + level_rows_cnt = level_rows_cnt_U; + levels = levels_U; + } + + if ( row_levels == NULL || level_rows == NULL || level_rows_cnt == NULL ) + { + if ( (row_levels = (unsigned int*) malloc(LU->n * sizeof(unsigned int))) == NULL + || (level_rows = (unsigned int*) malloc(LU->n * LU->n * sizeof(unsigned int))) == NULL + || (level_rows_cnt = (unsigned int*) malloc(LU->n * sizeof(unsigned int))) == NULL ) + { + fprintf( stderr, "Not enough space for triangular solve via level scheduling. Terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + + /* find levels (row dependencies in substitutions) */ + if ( find_levels ) + { + memset( row_levels, 0, LU->n * sizeof( unsigned int) ); + memset( level_rows_cnt, 0, LU->n * sizeof( unsigned int) ); + + if ( tri == LOWER ) + { + for ( i = 0; i < LU->n; ++i ) + { + local_level = 0; + for ( pj = LU->start[i]; pj < LU->start[i + 1] - 1; ++pj ) + { + local_level = MAX( local_level, row_levels[LU->j[pj]] + 1 ); + } + + levels = MAX( levels, local_level + 1 ); + row_levels[i] = local_level; + level_rows[local_level * LU->n + level_rows_cnt[local_level]] = i; + ++level_rows_cnt[local_level]; + } + } + else + { + for ( i = LU->n - 1; i >= 0; --i ) + { + local_level = 0; + for ( pj = LU->start[i] + 1; pj < LU->start[i + 1]; ++pj ) + { + local_level = MAX( local_level, row_levels[LU->j[pj]] + 1 ); + } + + levels = MAX( levels, local_level + 1 ); + row_levels[i] = local_level; + level_rows[local_level * LU->n + level_rows_cnt[local_level]] = i; + ++level_rows_cnt[local_level]; + } + } + } + + /* perform substitutions by level */ + if ( tri == LOWER ) + { + for ( i = 0; i < levels; ++i ) + { + #pragma omp parallel for schedule(guided) \ + default(none) private(j, pj, local_row) shared(stderr, i, level_rows_cnt, level_rows) + for ( j = 0; j < level_rows_cnt[i]; ++j ) + { + local_row = level_rows[i * LU->n + j]; + x[local_row] = y[local_row]; + for ( pj = LU->start[local_row]; pj < LU->start[local_row + 1] - 1; ++pj ) + { + x[local_row] -= LU->val[pj] * x[LU->j[pj]]; + + } + x[local_row] /= LU->val[pj]; + } + } + } + else { - x[i] = y[i]; - si = U->start[i]; - ei = U->start[i + 1]; - for ( pj = si + 1; pj < ei; ++pj ) + for ( i = 0; i < levels; ++i ) { - // TODO: remove assignments? compiler optimizes away? - j = U->entries[pj].j; - val = U->entries[pj].val; - x[i] -= val * x[j]; + #pragma omp parallel for schedule(guided) \ + default(none) private(j, pj, local_row) shared(i, level_rows_cnt, level_rows) + for ( j = 0; j < level_rows_cnt[i]; ++j ) + { + local_row = level_rows[i * LU->n + j]; + x[local_row] = y[local_row]; + for ( pj = LU->start[local_row] + 1; pj < LU->start[local_row + 1]; ++pj ) + { + x[local_row] -= LU->val[pj] * x[LU->j[pj]]; + + } + x[local_row] /= LU->val[LU->start[local_row]]; + } } - x[i] /= U->entries[si].val; + } + + /* save level info for re-use if performing repeated triangular solves via preconditioning */ + if ( tri == LOWER ) + { + row_levels_L = row_levels; + level_rows_L = level_rows; + level_rows_cnt_L = level_rows_cnt; + levels_L = levels; + } + else + { + row_levels_U = row_levels; + level_rows_U = level_rows; + level_rows_cnt_U = level_rows_cnt; + levels_U = levels; } } @@ -262,9 +424,9 @@ static void Backward_Subs( const sparse_matrix * const U, const real * const y, * * Note: Newmann series arises from series expansion of the inverse of * the coefficient matrix in the triangular system */ -static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri, - const real * const Dinv, const unsigned int n, - const real * const b, real * const x, const unsigned int maxiter ) +static void jacobi_iter( const sparse_matrix * const R, const real * const Dinv, + const real * const b, real * const x, const TRIANGULARITY tri, + const unsigned int maxiter ) { unsigned int i, k, si = 0, ei = 0, iter = 0; #ifdef _OPENMP @@ -286,26 +448,26 @@ static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri, * overhead per Sparse_MatVec call*/ if ( Dinv_b == NULL ) { - if ( (Dinv_b = (real*) malloc(sizeof(real) * n)) == NULL ) + if ( (Dinv_b = (real*) malloc(sizeof(real) * R->n)) == NULL ) { fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); - exit(INSUFFICIENT_SPACE); + exit( INSUFFICIENT_MEMORY ); } } if ( rp == NULL ) { - if ( (rp = (real*) malloc(sizeof(real) * n)) == NULL ) + if ( (rp = (real*) malloc(sizeof(real) * R->n)) == NULL ) { fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); - exit(INSUFFICIENT_SPACE); + exit( INSUFFICIENT_MEMORY ); } } if ( rp2 == NULL ) { - if ( (rp2 = (real*) malloc(sizeof(real) * n)) == NULL ) + if ( (rp2 = (real*) malloc(sizeof(real) * R->n)) == NULL ) { fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); - exit(INSUFFICIENT_SPACE); + exit( INSUFFICIENT_MEMORY ); } } #ifdef _OPENMP @@ -314,19 +476,19 @@ static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri, if ( (b_local = (real*) malloc( omp_get_num_threads() * R->n * sizeof(real))) == NULL ) { fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); - exit( INSUFFICIENT_SPACE ); + exit( INSUFFICIENT_MEMORY ); } } #endif - Vector_MakeZero( rp, n ); + Vector_MakeZero( rp, R->n ); } #pragma omp barrier /* precompute and cache, as invariant in loop below */ #pragma omp for schedule(guided) - for ( i = 0; i < n; ++i ) + for ( i = 0; i < R->n; ++i ) { Dinv_b[i] = Dinv[i] * b[i]; } @@ -355,7 +517,7 @@ static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri, si = R->start[i]; ei = R->start[i + 1] - 1; } - else if (tri == UPPER) + else { si = R->start[i] + 1; @@ -365,9 +527,9 @@ static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri, for ( k = si; k < ei; ++k ) { #ifdef _OPENMP - b_local[tid * R->n + i] += R->entries[k].val * rp[R->entries[k].j]; + b_local[tid * R->n + i] += R->val[k] * rp[R->j[k]]; #else - rp2[i] += R->entries[k].val * rp[R->entries[k].j]; + rp2[i] += R->val[k] * rp[R->j[k]]; #endif } #ifdef _OPENMP @@ -405,42 +567,198 @@ static void Jacobi_Iter( const sparse_matrix * const R, const TRIANGULARITY tri, while ( iter < maxiter ); } - Vector_Copy( x, rp, n ); + Vector_Copy( x, rp, R->n ); } -/* generalized minimual residual iterative solver for sparse linear systems, - * diagonal preconditioner */ -int GMRES( static_storage *workspace, sparse_matrix *H, - real *b, real tol, real *x, FILE *fout, real *time, real *spmv_time ) +/* Solve triangular system LU*x = y using level scheduling + * + * workspace: lower/upper triangular, stored in CSR + * y: constants in linear system (RHS) + * x: solution + * tri: triangularity of preconditoning factor LU (lower/upper), if using ILU-based + * pc_type: preconditioner computation method used + * pa_type: preconditioner application method to use + * extra: parameter for some application methods, if applicable + * + * Assumptions: + * LU has non-zero diagonals + * Each row of LU has at least one non-zero (i.e., no rows with all zeros) */ +void apply_preconditioner( const static_storage * const workspace, real * const y, + real * const x, const TRIANGULARITY tri, int pc_type, int pa_type, int extra ) { - int i, j, k, itr, N; - real cc, tmp1, tmp2, temp, bnorm; - struct timeval start, stop; + int i, si; + sparse_matrix *LU; + static real *Dinv_L = NULL, *Dinv_U = NULL; + + if ( tri == LOWER ) + { + LU = workspace->L; + } + else + { + LU = workspace->U; + } + + if ( ( pc_type != DIAG_PC || tri != LOWER ) && LU == NULL ) + { + return; + } + + switch ( pa_type ) + { + case NONE_PA: + break; + case TRI_SOLVE_PA: + switch ( pc_type ) + { + case DIAG_PC: + diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: + tri_solve( LU, y, x, tri ); + break; + default: + break; + } + break; + case TRI_SOLVE_LEVEL_SCHED_PA: + switch ( pc_type ) + { + case DIAG_PC: + diag_pre_app( workspace->Hdia_inv, y, x, workspace->H->n ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: + tri_solve_level_sched( LU, y, x, tri, extra ); + break; + default: + break; + } + break; + case JACOBI_ITER_PA: + switch ( pc_type ) + { + case DIAG_PC: + fprintf( stderr, "Unsupported preconditioner computation/application method combination. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + case ICHOLT_PC: + case ILU_PAR_PC: + case ILUT_PAR_PC: + if ( tri == LOWER ) + { + if ( Dinv_L == NULL ) + { + if ( (Dinv_L = (real*) malloc(sizeof(real) * LU->n)) == NULL ) + { + fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + + /* construct D^{-1} */ + for ( i = 0; i < LU->n; ++i ) + { + si = LU->start[i + 1] - 1; + Dinv_L[i] = 1. / LU->val[si]; + } + + jacobi_iter( LU, Dinv_L, y, x, tri, extra ); + } + else + { + if ( Dinv_U == NULL ) + { + if ( (Dinv_U = (real*) malloc(sizeof(real) * LU->n)) == NULL ) + { + fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + + /* construct D^{-1}_U */ + for ( i = 0; i < LU->n; ++i ) + { + si = LU->start[i]; + Dinv_U[i] = 1. / LU->val[si]; + } + + jacobi_iter( LU, Dinv_U, y, x, tri, extra ); + } + break; + default: + break; + } + break; + default: + fprintf( stderr, "Unrecognized preconditioner application method. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + + } + + return; +} + + +/* generalized minimual residual iterative solver for sparse linear systems */ +int GMRES( const static_storage * const workspace, const control_params * const control, + const sparse_matrix * const H, const real * const b, real tol, real * const x, + const FILE * const fout, real * const time, real * const spmv_time ) +{ + int i, j, k, itr, N, si; + real cc, tmp1, tmp2, temp, bnorm, time_start; N = H->n; bnorm = Norm( b, N ); - /* apply the diagonal pre-conditioner to rhs */ - gettimeofday( &start, NULL ); - for ( i = 0; i < N; ++i ) - workspace->b_prc[i] = b[i] * workspace->Hdia_inv[i]; - gettimeofday( &stop, NULL ); - *time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); + + if ( control->pre_comp_type == DIAG_PC ) + { + /* apply precondioner to RHS */ + apply_preconditioner( workspace, workspace->b, workspace->b_prc, LOWER, + control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters ); + } /* GMRES outer-loop */ for ( itr = 0; itr < MAX_ITR; ++itr ) { /* calculate r0 */ - gettimeofday( &start, NULL ); + time_start = Get_Time( ); Sparse_MatVec( H, x, workspace->b_prm ); - gettimeofday( &stop, NULL ); - *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - for ( i = 0; i < N; ++i ) - workspace->b_prm[i] *= workspace->Hdia_inv[i]; /* pre-conditioner */ + *spmv_time += Get_Timing_Info( time_start ); + + if ( control->pre_comp_type == DIAG_PC ) + { + time_start = Get_Time( ); + apply_preconditioner( workspace, workspace->b_prm, workspace->b_prm, LOWER, + control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters ); + *time += Get_Timing_Info( time_start ); + } + + + if ( control->pre_comp_type == DIAG_PC ) + { + Vector_Sum( workspace->v[0], 1., workspace->b_prc, -1., workspace->b_prm, N ); + } + else + { + Vector_Sum( workspace->v[0], 1., b, -1., workspace->b_prm, N ); + } + + if ( control->pre_comp_type != DIAG_PC ) + { + time_start = Get_Time( ); + apply_preconditioner( workspace, workspace->v[0], workspace->v[0], LOWER, + control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters ); + apply_preconditioner( workspace, workspace->v[0], workspace->v[0], UPPER, + control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters ); + *time += Get_Timing_Info( time_start ); + } - Vector_Sum(workspace->v[0], 1., workspace->b_prc, -1., workspace->b_prm, N); workspace->g[0] = Norm( workspace->v[0], N ); Vector_Scale( workspace->v[0], 1. / workspace->g[0], workspace->v[0], N ); //fprintf( stderr, "res: %.15e\n", workspace->g[0] ); @@ -449,26 +767,28 @@ int GMRES( static_storage *workspace, sparse_matrix *H, for ( j = 0; j < RESTART && fabs(workspace->g[j]) / bnorm > tol; j++ ) { /* matvec */ - gettimeofday( &start, NULL ); + time_start = Get_Time( ); Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1] ); - gettimeofday( &stop, NULL ); - *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - /*pre-conditioner*/ - gettimeofday( &start, NULL ); - for ( k = 0; k < N; ++k ) - workspace->v[j + 1][k] *= workspace->Hdia_inv[k]; - gettimeofday( &stop, NULL ); - *time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - //fprintf( stderr, "%d-%d: matvec done.\n", itr, j ); + *spmv_time += Get_Timing_Info( time_start ); + + time_start = Get_Time( ); + apply_preconditioner( workspace, workspace->v[j + 1], workspace->v[j + 1], LOWER, + control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters ); + apply_preconditioner( workspace, workspace->v[j + 1], workspace->v[j + 1], UPPER, + control->pre_comp_type, control->pre_app_type, control->pre_app_jacobi_iters ); + *time += Get_Timing_Info( time_start ); /* apply modified Gram-Schmidt to orthogonalize the new residual */ - for ( i = 0; i <= j; i++ ) + for ( i = 0; i < j - 1; i++ ) + { + workspace->h[i][j] = 0; + } + + //for( i = 0; i <= j; i++ ) { + for ( i = MAX(j - 1, 0); i <= j; i++ ) { workspace->h[i][j] = Dot( workspace->v[i], workspace->v[j + 1], N ); - Vector_Add( workspace->v[j + 1], - -workspace->h[i][j], workspace->v[i], N ); + Vector_Add( workspace->v[j + 1], -workspace->h[i][j], workspace->v[i], N ); } workspace->h[j + 1][j] = Norm( workspace->v[j + 1], N ); @@ -476,9 +796,8 @@ int GMRES( static_storage *workspace, sparse_matrix *H, 1. / workspace->h[j + 1][j], workspace->v[j + 1], N ); // fprintf( stderr, "%d-%d: orthogonalization completed.\n", itr, j ); - /* Givens rotations on the upper-Hessenberg matrix to make it U */ - for ( i = 0; i <= j; i++ ) + for ( i = MAX(j - 1, 0); i <= j; i++ ) { if ( i == j ) { @@ -502,32 +821,41 @@ int GMRES( static_storage *workspace, sparse_matrix *H, workspace->g[j] = tmp1; workspace->g[j + 1] = tmp2; - // fprintf( stderr, "h: " ); - // for( i = 0; i <= j+1; ++i ) - // fprintf( stderr, "%.6f ", workspace->h[i][j] ); - // fprintf( stderr, "\n" ); + //fprintf( stderr, "h: " ); + //for( i = 0; i <= j+1; ++i ) + //fprintf( stderr, "%.6f ", workspace->h[i][j] ); + //fprintf( stderr, "\n" ); //fprintf( stderr, "res: %.15e\n", workspace->g[j+1] ); } - /* solve Hy = g. - H is now upper-triangular, do back-substitution */ + /* TODO: solve using Jacobi iteration? */ + /* solve Hy = g: H is now upper-triangular, do back-substitution */ for ( i = j - 1; i >= 0; i-- ) { temp = workspace->g[i]; for ( k = j - 1; k > i; k-- ) + { temp -= workspace->h[i][k] * workspace->y[k]; + } workspace->y[i] = temp / workspace->h[i][i]; } /* update x = x_0 + Vy */ + Vector_MakeZero( workspace->p, N ); for ( i = 0; i < j; i++ ) - Vector_Add( x, workspace->y[i], workspace->v[i], N ); + { + Vector_Add( workspace->p, workspace->y[i], workspace->v[i], N ); + } + + Vector_Add( x, 1., workspace->p, N ); /* stopping condition */ if ( fabs(workspace->g[j]) / bnorm <= tol ) + { break; + } } // Sparse_MatVec( H, x, workspace->b_prm ); @@ -553,9 +881,9 @@ int GMRES( static_storage *workspace, sparse_matrix *H, } - -int GMRES_HouseHolder( static_storage *workspace, sparse_matrix *H, - real *b, real tol, real *x, FILE *fout) +int GMRES_HouseHolder( const static_storage * const workspace, const control_params * const control, + const sparse_matrix * const H, const real * const b, real tol, real * const x, + const FILE * const fout, real * const time, real * const spmv_time ) { int i, j, k, itr, N; real cc, tmp1, tmp2, temp, bnorm; @@ -743,330 +1071,7 @@ int GMRES_HouseHolder( static_storage *workspace, sparse_matrix *H, } -/* generalized minimual residual iterative solver for sparse linear systems, - * with preconditioner using factors LU \approx H - * and forward / backward substitution */ -int PGMRES( static_storage *workspace, sparse_matrix *H, real *b, real tol, - sparse_matrix *L, sparse_matrix *U, real *x, FILE *fout, real *time, real *spmv_time ) -{ - int i, j, k, itr, N; - real cc, tmp1, tmp2, temp, bnorm; - struct timeval start, stop; - - N = H->n; - bnorm = Norm( b, N ); - - /* GMRES outer-loop */ - for ( itr = 0; itr < MAX_ITR; ++itr ) - { - /* calculate r0 */ - gettimeofday( &start, NULL ); - Sparse_MatVec( H, x, workspace->b_prm ); - gettimeofday( &stop, NULL ); - *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - Vector_Sum( workspace->v[0], 1., b, -1., workspace->b_prm, N ); - gettimeofday( &start, NULL ); - Forward_Subs( L, workspace->v[0], workspace->v[0] ); - Backward_Subs( U, workspace->v[0], workspace->v[0] ); - gettimeofday( &stop, NULL ); - *time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - workspace->g[0] = Norm( workspace->v[0], N ); - Vector_Scale( workspace->v[0], 1. / workspace->g[0], workspace->v[0], N ); - //fprintf( stderr, "res: %.15e\n", workspace->g[0] ); - - /* GMRES inner-loop */ - for ( j = 0; j < RESTART && fabs(workspace->g[j]) / bnorm > tol; j++ ) - { - /* matvec */ - gettimeofday( &start, NULL ); - Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1] ); - gettimeofday( &stop, NULL ); - *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - gettimeofday( &start, NULL ); - Forward_Subs( L, workspace->v[j + 1], workspace->v[j + 1] ); - Backward_Subs( U, workspace->v[j + 1], workspace->v[j + 1] ); - gettimeofday( &stop, NULL ); - *time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - - /* apply modified Gram-Schmidt to orthogonalize the new residual */ - for ( i = 0; i < j - 1; i++ ) workspace->h[i][j] = 0; - - //for( i = 0; i <= j; i++ ) { - for ( i = MAX(j - 1, 0); i <= j; i++ ) - { - workspace->h[i][j] = Dot( workspace->v[i], workspace->v[j + 1], N ); - Vector_Add( workspace->v[j + 1], -workspace->h[i][j], workspace->v[i], N ); - } - - workspace->h[j + 1][j] = Norm( workspace->v[j + 1], N ); - Vector_Scale( workspace->v[j + 1], - 1. / workspace->h[j + 1][j], workspace->v[j + 1], N ); - // fprintf( stderr, "%d-%d: orthogonalization completed.\n", itr, j ); - - /* Givens rotations on the upper-Hessenberg matrix to make it U */ - for ( i = MAX(j - 1, 0); i <= j; i++ ) - { - if ( i == j ) - { - cc = SQRT( SQR(workspace->h[j][j]) + SQR(workspace->h[j + 1][j]) ); - workspace->hc[j] = workspace->h[j][j] / cc; - workspace->hs[j] = workspace->h[j + 1][j] / cc; - } - - tmp1 = workspace->hc[i] * workspace->h[i][j] + - workspace->hs[i] * workspace->h[i + 1][j]; - tmp2 = -workspace->hs[i] * workspace->h[i][j] + - workspace->hc[i] * workspace->h[i + 1][j]; - - workspace->h[i][j] = tmp1; - workspace->h[i + 1][j] = tmp2; - } - - /* apply Givens rotations to the rhs as well */ - tmp1 = workspace->hc[j] * workspace->g[j]; - tmp2 = -workspace->hs[j] * workspace->g[j]; - workspace->g[j] = tmp1; - workspace->g[j + 1] = tmp2; - - //fprintf( stderr, "h: " ); - //for( i = 0; i <= j+1; ++i ) - //fprintf( stderr, "%.6f ", workspace->h[i][j] ); - //fprintf( stderr, "\n" ); - //fprintf( stderr, "res: %.15e\n", workspace->g[j+1] ); - } - - - /* solve Hy = g: H is now upper-triangular, do back-substitution */ - for ( i = j - 1; i >= 0; i-- ) - { - temp = workspace->g[i]; - for ( k = j - 1; k > i; k-- ) - temp -= workspace->h[i][k] * workspace->y[k]; - - workspace->y[i] = temp / workspace->h[i][i]; - } - - /* update x = x_0 + Vy */ - Vector_MakeZero( workspace->p, N ); - for ( i = 0; i < j; i++ ) - Vector_Add( workspace->p, workspace->y[i], workspace->v[i], N ); - //Backward_Subs( U, workspace->p, workspace->p ); - //Forward_Subs( L, workspace->p, workspace->p ); - Vector_Add( x, 1., workspace->p, N ); - - /* stopping condition */ - if ( fabs(workspace->g[j]) / bnorm <= tol ) - break; - } - - // Sparse_MatVec( H, x, workspace->b_prm ); - // for( i = 0; i < N; ++i ) - // workspace->b_prm[i] *= workspace->Hdia_inv[i]; - // fprintf( fout, "\n%10s%15s%15s\n", "b_prc", "b_prm", "x" ); - // for( i = 0; i < N; ++i ) - // fprintf( fout, "%10.5f%15.12f%15.12f\n", - // workspace->b_prc[i], workspace->b_prm[i], x[i] );*/ - - // fprintf(fout,"GMRES outer:%d, inner:%d iters - residual norm: %25.20f\n", - // itr, j, fabs( workspace->g[j] ) / bnorm ); - // data->timing.matvec += itr * RESTART + j; - - if ( itr >= MAX_ITR ) - { - fprintf( stderr, "GMRES convergence failed\n" ); - // return -1; - return itr * (RESTART + 1) + j + 1; - } - - return itr * (RESTART + 1) + j + 1; -} - - -/* generalized minimual residual iterative solver for sparse linear systems, - * with preconditioner using factors LU \approx H - * and Jacobi iteration for approximate factor application */ -int PGMRES_Jacobi( static_storage *workspace, sparse_matrix *H, real *b, real tol, - sparse_matrix *L, sparse_matrix *U, real *x, unsigned int iters, - FILE *fout, real *time, real *spmv_time ) -{ - int i, j, k, itr, N, si; - real cc, tmp1, tmp2, temp, bnorm; - real *Dinv_L, *Dinv_U; - struct timeval start, stop; - - N = H->n; - bnorm = Norm( b, N ); - - /* Compute Jacobi iteration matrices from - * truncated Newmann series: x_{k+1} = Gx_k + D^{-1}b - * where: - * G = I - D^{-1}R - * R = triangular matrix - * D = diagonal matrix, diagonals from R */ - if ( (Dinv_L = (real*) malloc(sizeof(real) * N)) == NULL - || (Dinv_U = (real*) malloc(sizeof(real) * N)) == NULL ) - { - fprintf( stderr, "not enough memory for Jacobi iteration matrices. terminating.\n" ); - exit(INSUFFICIENT_SPACE); - } - - /* construct D^{-1}_L and D^{-1}_U */ - for ( i = 0; i < N; ++i ) - { - si = L->start[i + 1] - 1; - Dinv_L[i] = 1. / L->entries[si].val; - - si = U->start[i]; - Dinv_U[i] = 1. / U->entries[si].val; - } - - /* GMRES outer-loop */ - for ( itr = 0; itr < MAX_ITR; ++itr ) - { - /* calculate r0 */ - gettimeofday( &start, NULL ); - Sparse_MatVec( H, x, workspace->b_prm ); - gettimeofday( &stop, NULL ); - *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - Vector_Sum( workspace->v[0], 1., b, -1., workspace->b_prm, N ); - gettimeofday( &start, NULL ); - Jacobi_Iter( L, LOWER, Dinv_L, N, workspace->v[0], workspace->v[0], iters ); - Jacobi_Iter( U, UPPER, Dinv_U, N, workspace->v[0], workspace->v[0], iters ); - gettimeofday( &stop, NULL ); - *time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - workspace->g[0] = Norm( workspace->v[0], N ); - Vector_Scale( workspace->v[0], 1. / workspace->g[0], workspace->v[0], N ); - //fprintf( stderr, "res: %.15e\n", workspace->g[0] ); - - /* GMRES inner-loop */ - for ( j = 0; j < RESTART && fabs(workspace->g[j]) / bnorm > tol; j++ ) - { - /* matvec */ - gettimeofday( &start, NULL ); - Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1] ); - gettimeofday( &stop, NULL ); - *spmv_time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - gettimeofday( &start, NULL ); - Jacobi_Iter( L, LOWER, Dinv_L, N, workspace->v[j + 1], workspace->v[j + 1], iters ); - Jacobi_Iter( U, UPPER, Dinv_U, N, workspace->v[j + 1], workspace->v[j + 1], iters ); - gettimeofday( &stop, NULL ); - *time += (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); - - /* apply modified Gram-Schmidt to orthogonalize the new residual */ - for ( i = 0; i < j - 1; i++ ) - { - workspace->h[i][j] = 0; - } - - //for( i = 0; i <= j; i++ ) { - for ( i = MAX(j - 1, 0); i <= j; i++ ) - { - workspace->h[i][j] = Dot( workspace->v[i], workspace->v[j + 1], N ); - Vector_Add( workspace->v[j + 1], -workspace->h[i][j], workspace->v[i], N ); - } - - workspace->h[j + 1][j] = Norm( workspace->v[j + 1], N ); - Vector_Scale( workspace->v[j + 1], - 1. / workspace->h[j + 1][j], workspace->v[j + 1], N ); - // fprintf( stderr, "%d-%d: orthogonalization completed.\n", itr, j ); - - /* Givens rotations on the upper-Hessenberg matrix to make it U */ - for ( i = MAX(j - 1, 0); i <= j; i++ ) - { - if ( i == j ) - { - cc = SQRT( SQR(workspace->h[j][j]) + SQR(workspace->h[j + 1][j]) ); - workspace->hc[j] = workspace->h[j][j] / cc; - workspace->hs[j] = workspace->h[j + 1][j] / cc; - } - - tmp1 = workspace->hc[i] * workspace->h[i][j] + - workspace->hs[i] * workspace->h[i + 1][j]; - tmp2 = -workspace->hs[i] * workspace->h[i][j] + - workspace->hc[i] * workspace->h[i + 1][j]; - - workspace->h[i][j] = tmp1; - workspace->h[i + 1][j] = tmp2; - } - - /* apply Givens rotations to the rhs as well */ - tmp1 = workspace->hc[j] * workspace->g[j]; - tmp2 = -workspace->hs[j] * workspace->g[j]; - workspace->g[j] = tmp1; - workspace->g[j + 1] = tmp2; - - //fprintf( stderr, "h: " ); - //for( i = 0; i <= j+1; ++i ) - //fprintf( stderr, "%.6f ", workspace->h[i][j] ); - //fprintf( stderr, "\n" ); - //fprintf( stderr, "res: %.15e\n", workspace->g[j+1] ); - } - - - /* TODO: solve using Jacobi iteration? */ - /* solve Hy = g: H is now upper-triangular, do back-substitution */ - for ( i = j - 1; i >= 0; i-- ) - { - temp = workspace->g[i]; - for ( k = j - 1; k > i; k-- ) - { - temp -= workspace->h[i][k] * workspace->y[k]; - } - - workspace->y[i] = temp / workspace->h[i][i]; - } - - /* update x = x_0 + Vy */ - Vector_MakeZero( workspace->p, N ); - for ( i = 0; i < j; i++ ) - { - Vector_Add( workspace->p, workspace->y[i], workspace->v[i], N ); - } - //Backward_Subs( U, workspace->p, workspace->p ); - //Forward_Subs( L, workspace->p, workspace->p ); - Vector_Add( x, 1., workspace->p, N ); - - /* stopping condition */ - if ( fabs(workspace->g[j]) / bnorm <= tol ) - { - break; - } - } - - // Sparse_MatVec( H, x, workspace->b_prm ); - // for( i = 0; i < N; ++i ) - // workspace->b_prm[i] *= workspace->Hdia_inv[i]; - // fprintf( fout, "\n%10s%15s%15s\n", "b_prc", "b_prm", "x" ); - // for( i = 0; i < N; ++i ) - // fprintf( fout, "%10.5f%15.12f%15.12f\n", - // workspace->b_prc[i], workspace->b_prm[i], x[i] );*/ - - // fprintf(fout,"GMRES outer:%d, inner:%d iters - residual norm: %25.20f\n", - // itr, j, fabs( workspace->g[j] ) / bnorm ); - // data->timing.matvec += itr * RESTART + j; - - free( Dinv_U ); - free( Dinv_L ); - - if ( itr >= MAX_ITR ) - { - fprintf( stderr, "GMRES convergence failed\n" ); - // return -1; - return itr * (RESTART + 1) + j + 1; - } - - return itr * (RESTART + 1) + j + 1; -} - - +/* Preconditioned Conjugate Gradient */ int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol, sparse_matrix *L, sparse_matrix *U, real *x, FILE *fout ) { @@ -1084,8 +1089,8 @@ int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol, //Print_Soln( workspace, x, q, b, N ); //fprintf( stderr, "res: %.15e\n", r_norm ); - Forward_Subs( L, workspace->r, workspace->d ); - Backward_Subs( U, workspace->d, workspace->p ); + tri_solve( L, workspace->r, workspace->d, LOWER ); + tri_solve( U, workspace->d, workspace->p, UPPER ); sig_new = Dot( workspace->r, workspace->p, N ); sig0 = sig_new; @@ -1103,8 +1108,8 @@ int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol, r_norm = Norm(workspace->r, N); //fprintf( stderr, "res: %.15e\n", r_norm ); - Forward_Subs( L, workspace->r, workspace->d ); - Backward_Subs( U, workspace->d, workspace->d ); + tri_solve( L, workspace->r, workspace->d, LOWER ); + tri_solve( U, workspace->d, workspace->d, UPPER ); sig_old = sig_new; sig_new = Dot( workspace->r, workspace->d, N ); beta = sig_new / sig_old; @@ -1122,6 +1127,7 @@ int PCG( static_storage *workspace, sparse_matrix *A, real *b, real tol, } +/* Conjugate Gradient */ int CG( static_storage *workspace, sparse_matrix *H, real *b, real tol, real *x, FILE *fout ) { @@ -1136,7 +1142,9 @@ int CG( static_storage *workspace, sparse_matrix *H, Sparse_MatVec( H, x, workspace->q ); Vector_Sum( workspace->r , 1., b, -1., workspace->q, N ); for ( j = 0; j < N; ++j ) + { workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } sig_new = Dot( workspace->r, workspace->d, N ); sig0 = sig_new; @@ -1179,7 +1187,6 @@ int CG( static_storage *workspace, sparse_matrix *H, } - /* Steepest Descent */ int SDM( static_storage *workspace, sparse_matrix *H, real *b, real tol, real *x, FILE *fout ) @@ -1195,7 +1202,9 @@ int SDM( static_storage *workspace, sparse_matrix *H, Sparse_MatVec( H, x, workspace->q ); Vector_Sum( workspace->r , 1., b, -1., workspace->q, N ); for ( j = 0; j < N; ++j ) + { workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } sig = Dot( workspace->r, workspace->d, N ); sig0 = sig; @@ -1211,7 +1220,9 @@ int SDM( static_storage *workspace, sparse_matrix *H, Vector_Add( x, alpha, workspace->d, N ); Vector_Add( workspace->r, -alpha, workspace->q, N ); for ( j = 0; j < N; ++j ) + { workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } //fprintf( stderr, "d_norm:%24.15e, q_norm:%24.15e, tmp:%24.15e\n", // Norm(workspace->d,N), Norm(workspace->q,N), tmp ); @@ -1229,6 +1240,15 @@ int SDM( static_storage *workspace, sparse_matrix *H, } +/* Estimate the stability of a 2-side preconditioning scheme + * using the factorization A \approx LU. Specifically, estimate the 1-norm of A^{-1} + * using the 1-norm of (LU)^{-1}e, with e = [1 1 ... 1]^T through 2 triangular solves: + * 1) Ly = e + * 2) Ux = y where y = Ux + * That is, we seek to solve e = LUx for unknown x + * + * Reference: Incomplete LU Preconditioning with the Multilevel Fast Multipole Algorithm + * for Electromagnetic Scattering, SIAM J. Sci. Computing, 2007 */ real condest( const sparse_matrix * const L, const sparse_matrix * const U ) { unsigned int i, N; @@ -1239,7 +1259,7 @@ real condest( const sparse_matrix * const L, const sparse_matrix * const U ) if ( (e = (real*) malloc(sizeof(real) * N)) == NULL ) { fprintf( stderr, "Not enough memory for condest. Terminating.\n" ); - exit(INSUFFICIENT_SPACE); + exit( INSUFFICIENT_MEMORY ); } for ( i = 0; i < N; ++i ) @@ -1247,9 +1267,10 @@ real condest( const sparse_matrix * const L, const sparse_matrix * const U ) e[i] = 1.; } - Forward_Subs( L, e, e ); - Backward_Subs( U, e, e ); + tri_solve( L, e, e, LOWER ); + tri_solve( U, e, e, UPPER ); + /* compute 1-norm of vector e */ c = fabs(e[0]); for ( i = 1; i < N; ++i) { diff --git a/sPuReMD/src/GMRES.h b/sPuReMD/src/GMRES.h index 2acf61f2c2df46a175b4bccf31d229c147611755..d6d1c4844a9ea2870edffbdf1d65a5da358b362f 100644 --- a/sPuReMD/src/GMRES.h +++ b/sPuReMD/src/GMRES.h @@ -24,20 +24,13 @@ #include "mytypes.h" -int GMRES( static_storage*, sparse_matrix*, - real*, real, real*, FILE*, real*, real* ); +int GMRES( const static_storage * const, const control_params * const, + const sparse_matrix * const, const real * const, real, real * const, + const FILE * const, real * const, real * const ); -int GMRES_HouseHolder( static_storage*, sparse_matrix*, - real*, real, real*, FILE* ); - -int PGMRES( static_storage*, sparse_matrix*, real*, real, - sparse_matrix*, sparse_matrix*, real*, FILE*, real*, real* ); - -int PGMRES_Jacobi( static_storage*, sparse_matrix*, real*, real, - sparse_matrix*, sparse_matrix*, real*, unsigned int, FILE*, real*, real* ); - -int PCG( static_storage*, sparse_matrix*, real*, real, - sparse_matrix*, sparse_matrix*, real*, FILE* ); +int GMRES_HouseHolder( const static_storage * const, const control_params * const, + const sparse_matrix * const, const real * const, real, real * const, + const FILE * const, real * const, real * const ); int CG( static_storage*, sparse_matrix*, real*, real, real*, FILE* ); diff --git a/sPuReMD/src/QEq.c b/sPuReMD/src/QEq.c index 8306ac1db912641a66dfedfe40ae691e6748812a..6e6cc539531ac4243a2c50a0affe9e556aeef225 100644 --- a/sPuReMD/src/QEq.c +++ b/sPuReMD/src/QEq.c @@ -24,30 +24,63 @@ #include "GMRES.h" #include "list.h" #include "print_utils.h" +#include "tool_box.h" #if defined(HAVE_SUPERLU_MT) #include "slu_mt_ddefs.h" #endif + static int compare_matrix_entry(const void *v1, const void *v2) { /* larger element has larger column index */ - return ((sparse_matrix_entry *)v1)->j - ((sparse_matrix_entry *)v2)->j; + return *(int *)v1 - *(int *)v2; } static void Sort_Matrix_Rows( sparse_matrix * const A ) { - int i, si, ei; + int i, j, k, si, ei, *temp_j; + real *temp_val; + + if ( ( temp_j = (int*) malloc( A->n * sizeof(int)) ) == NULL + || ( temp_val = (real*) malloc( A->n * sizeof(real)) ) == NULL ) + { + fprintf( stderr, "Not enough space for matrix row sort. Terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } /* sort each row of A using column indices */ for ( i = 0; i < A->n; ++i ) { si = A->start[i]; ei = A->start[i + 1]; - /* polymorphic sort in standard C library */ - qsort( &(A->entries[si]), ei - si, - sizeof(sparse_matrix_entry), compare_matrix_entry ); + memcpy( temp_j, A->j + si, sizeof(int) * (ei - si) ); + memcpy( temp_val, A->val + si, sizeof(real) * (ei - si) ); + + //TODO: consider implementing single custom one-pass sort instead of using qsort + manual sort + /* polymorphic sort in standard C library using column indices */ + qsort( temp_j, ei - si, sizeof(int), compare_matrix_entry ); + + /* manually sort vals */ + for ( j = 0; j < (ei - si); ++j ) + { + for ( k = 0; k < (ei - si); ++k ) + { + if ( A->j[si + j] == temp_j[k] ) + { + A->val[si + k] = temp_val[j]; + break; + } + + } + } + + /* copy sorted column indices */ + memcpy( A->j + si, temp_j, sizeof(int) * (ei - si) ); } + + free( temp_val ); + free( temp_j ); } @@ -59,7 +92,7 @@ static void Transpose( const sparse_matrix const *A, sparse_matrix const *A_t ) if ( (A_t_top = (unsigned int*) calloc( A->n + 1, sizeof(unsigned int))) == NULL ) { fprintf( stderr, "Not enough space for matrix tranpose. Terminating...\n" ); - exit( INSUFFICIENT_SPACE ); + exit( INSUFFICIENT_MEMORY ); } memset( A_t->start, 0, (A->n + 1) * sizeof(unsigned int) ); @@ -69,7 +102,7 @@ static void Transpose( const sparse_matrix const *A, sparse_matrix const *A_t ) { for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj ) { - ++A_t->start[A->entries[pj].j + 1]; + ++A_t->start[A->j[pj] + 1]; } } @@ -84,9 +117,9 @@ static void Transpose( const sparse_matrix const *A, sparse_matrix const *A_t ) { for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj ) { - j = A->entries[pj].j; - A_t->entries[A_t_top[j]].j = i; - A_t->entries[A_t_top[j]].val = A->entries[pj].val; + j = A->j[pj]; + A_t->j[A_t_top[j]] = i; + A_t->val[A_t_top[j]] = A->val[pj]; ++A_t_top[j]; } } @@ -103,52 +136,116 @@ static void Transpose_I( sparse_matrix * const A ) if ( Allocate_Matrix( &A_t, A->n, A->m ) == 0 ) { fprintf( stderr, "not enough memory for transposing matrices. terminating.\n" ); - exit(INSUFFICIENT_SPACE); + exit( INSUFFICIENT_MEMORY ); } Transpose( A, A_t ); memcpy( A->start, A_t->start, sizeof(int) * (A_t->n + 1) ); - memcpy( A->entries, A_t->entries, sizeof(sparse_matrix_entry) * (A_t->start[A_t->n]) ); + memcpy( A->j, A_t->j, sizeof(int) * (A_t->start[A_t->n]) ); + memcpy( A->val, A_t->val, sizeof(real) * (A_t->start[A_t->n]) ); Deallocate_Matrix( A_t ); } -static void Calculate_Droptol( const sparse_matrix * const A, real * const droptol, real dtol ) +static void Calculate_Droptol( const sparse_matrix * const A, real * const droptol, + const real dtol ) { int i, j, k; real val; +#ifdef _OPENMP + static real *droptol_local; + unsigned int tid; +#endif - /* init droptol to 0 */ - for ( i = 0; i < A->n; ++i ) - droptol[i] = 0; - - /* calculate sqaure of the norm of each row */ - for ( i = 0; i < A->n; ++i ) + #pragma omp parallel default(none) private(i, j, k, val, tid), shared(droptol_local) { - for ( k = A->start[i]; k < A->start[i + 1] - 1; ++k ) +#ifdef _OPENMP + tid = omp_get_thread_num(); + + #pragma omp master + { + /* keep b_local for program duration to avoid allocate/free + * overhead per Sparse_MatVec call*/ + if ( droptol_local == NULL ) + { + if ( (droptol_local = (real*) malloc( omp_get_num_threads() * A->n * sizeof(real))) == NULL ) + { + exit( INSUFFICIENT_MEMORY ); + } + } + } + + #pragma omp barrier +#endif + + /* init droptol to 0 */ + for ( i = 0; i < A->n; ++i ) + { +#ifdef _OPENMP + droptol_local[tid * A->n + i] = 0.0; +#else + droptol[i] = 0.0; +#endif + } + + #pragma omp barrier + + /* calculate sqaure of the norm of each row */ + #pragma omp parallel for schedule(guided) \ + default(none) private(i, j, k, val, tid) shared(droptol_local) + for ( i = 0; i < A->n; ++i ) { - j = A->entries[k].j; - val = A->entries[k].val; + for ( k = A->start[i]; k < A->start[i + 1] - 1; ++k ) + { + j = A->j[k]; + val = A->val[k]; + +#ifdef _OPENMP + droptol_local[tid * A->n + i] += val * val; + droptol_local[tid * A->n + j] += val * val; +#else + droptol[i] += val * val; + droptol[j] += val * val; +#endif + } + val = A->val[k]; // diagonal entry +#ifdef _OPENMP + droptol_local[tid * A->n + i] += val * val; +#else droptol[i] += val * val; - droptol[j] += val * val; +#endif } - val = A->entries[k].val; // diagonal entry - droptol[i] += val * val; - } + #pragma omp barrier - /* calculate local droptol for each row */ - //fprintf( stderr, "droptol: " ); - for ( i = 0; i < A->n; ++i ) - { - //fprintf( stderr, "%f-->", droptol[i] ); - droptol[i] = SQRT( droptol[i] ) * dtol; - //fprintf( stderr, "%f ", droptol[i] ); +#ifdef _OPENMP + #pragma omp parallel for schedule(guided) default(none) private(i, k) shared(droptol_local) + for ( i = 0; i < A->n; ++i ) + { + droptol[i] = 0.0; + for ( k = 0; k < omp_get_num_threads(); ++k ) + { + droptol[i] += droptol_local[k * A->n + i]; + } + } +#endif + + #pragma omp barrier + + /* calculate local droptol for each row */ + //fprintf( stderr, "droptol: " ); + #pragma omp parallel for schedule(guided) default(none) private(i) + for ( i = 0; i < A->n; ++i ) + { + //fprintf( stderr, "%f-->", droptol[i] ); + droptol[i] = SQRT( droptol[i] ) * dtol; + //fprintf( stderr, "%f ", droptol[i] ); + } + //fprintf( stderr, "\n" ); } - //fprintf( stderr, "\n" ); } @@ -160,17 +257,21 @@ static int Estimate_LU_Fill( const sparse_matrix * const A, const real * const d fillin = 0; - //fprintf( stderr, "n: %d\n", A->n ); + #pragma omp parallel for schedule(guided) \ + default(none) private(i, j, pj, val) reduction(+: fillin) for ( i = 0; i < A->n; ++i ) + { for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj ) { - j = A->entries[pj].j; - val = A->entries[pj].val; - //fprintf( stderr, "i: %d, j: %d", i, j ); + j = A->j[pj]; + val = A->val[pj]; - if ( fabs(val) > droptol[i] ) + if ( FABS(val) > droptol[i] ) + { ++fillin; + } } + } return fillin + A->n; } @@ -266,12 +367,12 @@ static real SuperLU_Factorize( const sparse_matrix * const A, || ( (Utop = (unsigned int*) malloc( (A->n + 1) * sizeof(unsigned int))) == NULL ) ) { fprintf( stderr, "Not enough space for SuperLU factorization. Terminating...\n" ); - exit( INSUFFICIENT_SPACE ); + exit( INSUFFICIENT_MEMORY ); } if ( Allocate_Matrix( &A_t, A->n, A->m ) == 0 ) { fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); - exit(INSUFFICIENT_SPACE); + exit( INSUFFICIENT_MEMORY ); } /* Set up the sparse matrix data structure for A. */ @@ -474,13 +575,13 @@ static real SuperLU_Factorize( const sparse_matrix * const A, #endif -/* Diagonal preconditioner */ -static real diagonal_pre( const reax_system * const system, real * const Hdia_inv ) +/* Diagonal (Jacobi) preconditioner computation */ +static real diag_pre_comp( const reax_system * const system, real * const Hdia_inv ) { unsigned int i; - struct timeval start, stop; + real start; - gettimeofday( &start, NULL ); + start = Get_Time( ); #pragma omp parallel for schedule(guided) \ default(none) private(i) @@ -489,23 +590,22 @@ static real diagonal_pre( const reax_system * const system, real * const Hdia_in Hdia_inv[i] = 1.0 / system->reaxprm.sbp[system->atoms[i].type].eta; } - gettimeofday( &stop, NULL ); - return (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); + return Get_Timing_Info( start ); } -/* Incomplete Cholesky factorization with thresholding */ +/* Incomplete Cholesky factorization with dual thresholding */ static real ICHOLT( const sparse_matrix * const A, const real * const droptol, sparse_matrix * const L, sparse_matrix * const U ) { - sparse_matrix_entry tmp[1000]; + //TODO: either add compilation parameter or dynamically allocate + int tmp_j[1000]; + real tmp_val[1000]; int i, j, pj, k1, k2, tmptop, Ltop; - real val; + real val, start; int *Utop; - struct timeval start, stop; - gettimeofday( &start, NULL ); + start = Get_Time( ); Utop = (int*) malloc((A->n + 1) * sizeof(int)); @@ -513,10 +613,14 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol, Ltop = 0; tmptop = 0; for ( i = 0; i <= A->n; ++i ) + { L->start[i] = U->start[i] = 0; + } for ( i = 0; i < A->n; ++i ) + { Utop[i] = 0; + } //fprintf( stderr, "n: %d\n", A->n ); for ( i = 0; i < A->n; ++i ) @@ -526,30 +630,36 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol, for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj ) { - j = A->entries[pj].j; - val = A->entries[pj].val; + j = A->j[pj]; + val = A->val[pj]; //fprintf( stderr, "i: %d, j: %d", i, j ); - if ( fabs(val) > droptol[i] ) + if ( FABS(val) > droptol[i] ) { k1 = 0; k2 = L->start[j]; while ( k1 < tmptop && k2 < L->start[j + 1] ) { - if ( tmp[k1].j < L->entries[k2].j ) + if ( tmp_j[k1] < L->j[k2] ) + { ++k1; - else if ( tmp[k1].j > L->entries[k2].j ) + } + else if ( tmp_j[k1] > L->j[k2] ) + { ++k2; + } else - val -= (tmp[k1++].val * L->entries[k2++].val); + { + val -= (tmp_val[k1++] * L->val[k2++]); + } } // L matrix is lower triangular, // so right before the start of next row comes jth diagonal - val /= L->entries[L->start[j + 1] - 1].val; + val /= L->val[L->start[j + 1] - 1]; - tmp[tmptop].j = j; - tmp[tmptop].val = val; + tmp_j[tmptop] = j; + tmp_val[tmptop] = val; ++tmptop; } //fprintf( stderr, " -- done\n" ); @@ -557,18 +667,20 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol, // compute the ith diagonal in L // sanity check - if ( A->entries[pj].j != i ) + if ( A->j[pj] != i ) { fprintf( stderr, "i=%d, badly built A matrix!\n", i ); - exit(999); + exit( NUMERIC_BREAKDOWN ); } - val = A->entries[pj].val; + val = A->val[pj]; for ( k1 = 0; k1 < tmptop; ++k1 ) - val -= (tmp[k1].val * tmp[k1].val); + { + val -= (tmp_val[k1] * tmp_val[k1]); + } - tmp[tmptop].j = i; - tmp[tmptop].val = SQRT(val); + tmp_j[tmptop] = i; + tmp_val[tmptop] = SQRT(val); // apply the dropping rule once again //fprintf( stderr, "row%d: tmptop: %d\n", i, tmptop ); @@ -577,17 +689,19 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol, //fprintf( stderr, "\n" ); //fprintf( stderr, "row(%d): droptol=%.4f\n", i+1, droptol[i] ); for ( k1 = 0; k1 < tmptop; ++k1 ) - if ( fabs(tmp[k1].val) > droptol[i] / tmp[tmptop].val ) + { + if ( FABS(tmp_val[k1]) > droptol[i] / tmp_val[tmptop] ) { - L->entries[Ltop].j = tmp[k1].j; - L->entries[Ltop].val = tmp[k1].val; - U->start[tmp[k1].j + 1]++; + L->j[Ltop] = tmp_j[k1]; + L->val[Ltop] = tmp_val[k1]; + U->start[tmp_j[k1] + 1]++; ++Ltop; //fprintf( stderr, "%d(%.4f) ", tmp[k1].j+1, tmp[k1].val ); } + } // keep the diagonal in any case - L->entries[Ltop].j = tmp[k1].j; - L->entries[Ltop].val = tmp[k1].val; + L->j[Ltop] = tmp_j[k1]; + L->val[Ltop] = tmp_val[k1]; ++Ltop; //fprintf( stderr, "%d(%.4f)\n", tmp[k1].j+1, tmp[k1].val ); } @@ -604,9 +718,9 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol, { for ( pj = L->start[i]; pj < L->start[i + 1]; ++pj ) { - j = L->entries[pj].j; - U->entries[Utop[j]].j = i; - U->entries[Utop[j]].val = L->entries[pj].val; + j = L->j[pj]; + U->j[Utop[j]] = i; + U->val[Utop[j]] = L->val[pj]; Utop[j]++; } } @@ -615,9 +729,7 @@ static real ICHOLT( const sparse_matrix * const A, const real * const droptol, free(Utop); - gettimeofday( &stop, NULL ); - return (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); + return Get_Timing_Info( start ); } @@ -631,26 +743,29 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, sparse_matrix * const U_t, sparse_matrix * const U ) { unsigned int i, j, k, pj, x = 0, y = 0, ei_x, ei_y; - real *D, *D_inv, sum; + real *D, *D_inv, sum, start; sparse_matrix *DAD; int *Utop; - struct timeval start, stop; - gettimeofday( &start, NULL ); + start = Get_Time( ); if ( Allocate_Matrix( &DAD, A->n, A->m ) == 0 ) { fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); - exit(INSUFFICIENT_SPACE); + exit( INSUFFICIENT_MEMORY ); } - D = (real*) malloc(A->n * sizeof(real)); - D_inv = (real*) malloc(A->n * sizeof(real)); - Utop = (int*) malloc((A->n + 1) * sizeof(int)); + if( ( D = (real*) malloc(A->n * sizeof(real)) ) == NULL || + ( D_inv = (real*) malloc(A->n * sizeof(real)) ) == NULL || + ( Utop = (int*) malloc((A->n + 1) * sizeof(int)) ) == NULL ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } for ( i = 0; i < A->n; ++i ) { - D_inv[i] = SQRT( A->entries[A->start[i + 1] - 1].val ); + D_inv[i] = SQRT( A->val[A->start[i + 1] - 1] ); D[i] = 1. / D_inv[i]; } @@ -668,19 +783,19 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, /* non-diagonals */ for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj ) { - DAD->entries[pj].j = A->entries[pj].j; - DAD->entries[pj].val = - A->entries[pj].val * D[i] * D[A->entries[pj].j]; + DAD->j[pj] = A->j[pj]; + DAD->val[pj] = A->val[pj] * D[i] * D[A->j[pj]]; } /* diagonal */ - DAD->entries[pj].j = A->entries[pj].j; - DAD->entries[pj].val = 1.; + DAD->j[pj] = A->j[pj]; + DAD->val[pj] = 1.; } /* initial guesses for U^T, * assume: A and DAD symmetric and stored lower triangular */ memcpy( U_t->start, DAD->start, sizeof(int) * (DAD->n + 1) ); - memcpy( U_t->entries, DAD->entries, sizeof(sparse_matrix_entry) * (DAD->m) ); + memcpy( U_t->j, DAD->j, sizeof(int) * (DAD->m) ); + memcpy( U_t->val, DAD->val, sizeof(real) * (DAD->m) ); for ( i = 0; i < sweeps; ++i ) { @@ -704,21 +819,21 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, } } /* column bounds of current nonzero */ - y = U_t->start[U_t->entries[j].j]; - ei_y = U_t->start[U_t->entries[j].j + 1]; + y = U_t->start[U_t->j[j]]; + ei_y = U_t->start[U_t->j[j] + 1]; /* sparse dot product: dot( U^T(i,1:j-1), U^T(j,1:j-1) ) */ - while ( U_t->entries[x].j < U_t->entries[j].j && - U_t->entries[y].j < U_t->entries[j].j && + while ( U_t->j[x] < U_t->j[j] && + U_t->j[y] < U_t->j[j] && x < ei_x && y < ei_y ) { - if ( U_t->entries[x].j == U_t->entries[y].j ) + if ( U_t->j[x] == U_t->j[y] ) { - sum += (U_t->entries[x].val * U_t->entries[y].val); + sum += (U_t->val[x] * U_t->val[y]); ++x; ++y; } - else if ( U_t->entries[x].j < U_t->entries[y].j ) + else if ( U_t->j[x] < U_t->j[y] ) { ++x; } @@ -728,10 +843,10 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, } } - sum = DAD->entries[j].val - sum; + sum = DAD->val[j] - sum; /* diagonal entries */ - if ( (k - 1) == U_t->entries[j].j ) + if ( (k - 1) == U_t->j[j] ) { /* sanity check */ if ( sum < ZERO ) @@ -745,12 +860,12 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, exit(NUMERIC_BREAKDOWN); } - U_t->entries[j].val = SQRT( sum ); + U_t->val[j] = SQRT( sum ); } /* non-diagonal entries */ else { - U_t->entries[j].val = sum / U_t->entries[ei_y - 1].val; + U_t->val[j] = sum / U_t->val[ei_y - 1]; } } } @@ -762,7 +877,7 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, { for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj ) { - U_t->entries[pj].val *= D_inv[i]; + U_t->val[pj] *= D_inv[i]; } } @@ -777,7 +892,7 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, * store in U->start */ for ( pj = U_t->start[i]; pj < U_t->start[i + 1]; ++pj ) { - U->start[U_t->entries[pj].j + 1]++; + U->start[U_t->j[pj] + 1]++; } } /* set correct row pointer in U */ @@ -791,9 +906,9 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, /* for each nonzero (column-wise) in U^T */ for ( pj = U_t->start[i]; pj < U_t->start[i + 1]; ++pj ) { - j = U_t->entries[pj].j; - U->entries[Utop[j]].j = i; - U->entries[Utop[j]].val = U_t->entries[pj].val; + j = U_t->j[pj]; + U->j[Utop[j]] = i; + U->val[Utop[j]] = U_t->val[pj]; /* Utop contains pointer within rows of U * (columns of U^T) to add next nonzero, so increment */ Utop[j]++; @@ -809,9 +924,7 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, free(D); free(Utop); - gettimeofday( &stop, NULL ); - return (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); + return Get_Timing_Info( start ); } @@ -822,34 +935,36 @@ static real ICHOL_PAR( const sparse_matrix * const A, const unsigned int sweeps, * Fine-Grained Parallel Incomplete LU Factorization * SIAM J. Sci. Comp. * - * A: symmetric, half-format (lower triangular) - * sweeps: number of ILUL_PAR loops over non-zeros - * L / U: factorized matrices (A \approx LU) */ + * A: symmetric, half-stored (lower triangular), CSR format + * sweeps: number of loops over non-zeros for computation + * L / U: factorized triangular matrices (A \approx LU), CSR format */ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, - sparse_matrix * const L, sparse_matrix * const U ) + sparse_matrix * const L, sparse_matrix * const U ) { unsigned int i, j, k, pj, x, y, ei_x, ei_y; - real *D, *D_inv, sum; + real *D, *D_inv, sum, start; sparse_matrix *DAD; - struct timeval start, stop; - gettimeofday( &start, NULL ); + start = Get_Time( ); if ( Allocate_Matrix( &DAD, A->n, A->m ) == 0 ) { fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); - exit(INSUFFICIENT_SPACE); + exit( INSUFFICIENT_MEMORY ); } - /*TODO: check malloc return status*/ - D = (real*) malloc(A->n * sizeof(real)); - D_inv = (real*) malloc(A->n * sizeof(real)); + if( ( D = (real*) malloc(A->n * sizeof(real)) ) == NULL || + ( D_inv = (real*) malloc(A->n * sizeof(real)) ) == NULL ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } #pragma omp parallel for schedule(guided) \ default(none) shared(D, D_inv) private(i) for ( i = 0; i < A->n; ++i ) { - D_inv[i] = SQRT( A->entries[A->start[i + 1] - 1].val ); + D_inv[i] = SQRT( A->val[A->start[i + 1] - 1] ); D[i] = 1.0 / D_inv[i]; } @@ -863,29 +978,30 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, /* non-diagonals */ for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj ) { - DAD->entries[pj].j = A->entries[pj].j; - DAD->entries[pj].val = - D[i] * A->entries[pj].val * D[A->entries[pj].j]; + DAD->j[pj] = A->j[pj]; + DAD->val[pj] = D[i] * A->val[pj] * D[A->j[pj]]; } /* diagonal */ - DAD->entries[pj].j = A->entries[pj].j; - DAD->entries[pj].val = 1.0; + DAD->j[pj] = A->j[pj]; + DAD->val[pj] = 1.0; } /* initial guesses for L and U, * assume: A and DAD symmetric and stored lower triangular */ memcpy( L->start, DAD->start, sizeof(int) * (DAD->n + 1) ); - memcpy( L->entries, DAD->entries, sizeof(sparse_matrix_entry) * (DAD->start[DAD->n]) ); + memcpy( L->j, DAD->j, sizeof(int) * (DAD->start[DAD->n]) ); + memcpy( L->val, DAD->val, sizeof(real) * (DAD->start[DAD->n]) ); /* store U^T in CSR for row-wise access and tranpose later */ memcpy( U->start, DAD->start, sizeof(int) * (DAD->n + 1) ); - memcpy( U->entries, DAD->entries, sizeof(sparse_matrix_entry) * (DAD->start[DAD->n]) ); + memcpy( U->j, DAD->j, sizeof(int) * (DAD->start[DAD->n]) ); + memcpy( U->val, DAD->val, sizeof(real) * (DAD->start[DAD->n]) ); /* L has unit diagonal, by convention */ #pragma omp parallel for schedule(guided) \ default(none) private(i) for ( i = 0; i < A->n; ++i ) { - L->entries[L->start[i + 1] - 1].val = 1.0; + L->val[L->start[i + 1] - 1] = 1.0; } for ( i = 0; i < sweeps; ++i ) @@ -910,22 +1026,22 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, } } /* determine column bounds of current nonzero */ - y = DAD->start[DAD->entries[j].j]; - ei_y = DAD->start[DAD->entries[j].j + 1]; + y = DAD->start[DAD->j[j]]; + ei_y = DAD->start[DAD->j[j] + 1]; /* sparse dot product: * dot( L(i,1:j-1), U(1:j-1,j) ) */ - while ( L->entries[x].j < L->entries[j].j && - L->entries[y].j < L->entries[j].j && + while ( L->j[x] < L->j[j] && + L->j[y] < L->j[j] && x < ei_x && y < ei_y ) { - if ( L->entries[x].j == L->entries[y].j ) + if ( L->j[x] == L->j[y] ) { - sum += (L->entries[x].val * U->entries[y].val); + sum += (L->val[x] * U->val[y]); ++x; ++y; } - else if ( L->entries[x].j < L->entries[y].j ) + else if ( L->j[x] < L->j[y] ) { ++x; } @@ -937,7 +1053,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, if( j != ei_x - 1 ) { - L->entries[j].val = ( DAD->entries[j].val - sum ) / U->entries[ei_y - 1].val; + L->val[j] = ( DAD->val[j] - sum ) / U->val[ei_y - 1]; } } @@ -960,22 +1076,22 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, } } /* determine column bounds of current nonzero */ - y = DAD->start[DAD->entries[j].j]; - ei_y = DAD->start[DAD->entries[j].j + 1]; + y = DAD->start[DAD->j[j]]; + ei_y = DAD->start[DAD->j[j] + 1]; /* sparse dot product: * dot( L(i,1:i-1), U(1:i-1,j) ) */ - while ( U->entries[x].j < U->entries[j].j && - U->entries[y].j < U->entries[j].j && + while ( U->j[x] < U->j[j] && + U->j[y] < U->j[j] && x < ei_x && y < ei_y ) { - if ( U->entries[x].j == U->entries[y].j ) + if ( U->j[x] == U->j[y] ) { - sum += (L->entries[y].val * U->entries[x].val); + sum += (L->val[y] * U->val[x]); ++x; ++y; } - else if ( U->entries[x].j < U->entries[y].j ) + else if ( U->j[x] < U->j[y] ) { ++x; } @@ -985,7 +1101,7 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, } } - U->entries[j].val = DAD->entries[j].val - sum; + U->val[j] = DAD->val[j] - sum; } } @@ -998,9 +1114,9 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, { for ( pj = DAD->start[i]; pj < DAD->start[i + 1]; ++pj ) { - L->entries[pj].val = D_inv[i] * L->entries[pj].val; + L->val[pj] = D_inv[i] * L->val[pj]; /* currently storing U^T, so use row index instead of column index */ - U->entries[pj].val = U->entries[pj].val * D_inv[i]; + U->val[pj] = U->val[pj] * D_inv[i]; } } @@ -1015,9 +1131,261 @@ static real ILU_PAR( const sparse_matrix * const A, const unsigned int sweeps, free( D_inv ); free( D ); - gettimeofday( &stop, NULL ); - return (stop.tv_sec + stop.tv_usec / 1000000.0) - - (start.tv_sec + start.tv_usec / 1000000.0); + return Get_Timing_Info( start ); +} + + +/* Fine-grained (parallel) incomplete LU factorization with thresholding + * + * Reference: + * Edmond Chow and Aftab Patel + * Fine-Grained Parallel Incomplete LU Factorization + * SIAM J. Sci. Comp. + * + * A: symmetric, half-stored (lower triangular), CSR format + * droptol: row-wise tolerances used for dropping + * sweeps: number of loops over non-zeros for computation + * L / U: factorized triangular matrices (A \approx LU), CSR format */ +static real ILUT_PAR( const sparse_matrix * const A, const real * droptol, + const unsigned int sweeps, sparse_matrix * const L, sparse_matrix * const U ) +{ + unsigned int i, j, k, pj, x, y, ei_x, ei_y, Ltop, Utop; + real *D, *D_inv, sum, start; + sparse_matrix *DAD, *L_temp, *U_temp; + + start = Get_Time( ); + + if ( Allocate_Matrix( &DAD, A->n, A->m ) == 0 || + Allocate_Matrix( &L_temp, A->n, A->m ) == 0 || + Allocate_Matrix( &U_temp, A->n, A->m ) == 0 ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + if( ( D = (real*) malloc(A->n * sizeof(real)) ) == NULL || + ( D_inv = (real*) malloc(A->n * sizeof(real)) ) == NULL ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + #pragma omp parallel for schedule(guided) \ + default(none) shared(D, D_inv) private(i) + for ( i = 0; i < A->n; ++i ) + { + D_inv[i] = SQRT( A->val[A->start[i + 1] - 1] ); + D[i] = 1.0 / D_inv[i]; + } + + /* to get convergence, A must have unit diagonal, so apply + * transformation DAD, where D = D(1./sqrt(D(A))) */ + memcpy( DAD->start, A->start, sizeof(int) * (A->n + 1) ); + #pragma omp parallel for schedule(guided) \ + default(none) shared(DAD, D) private(i, pj) + for ( i = 0; i < A->n; ++i ) + { + /* non-diagonals */ + for ( pj = A->start[i]; pj < A->start[i + 1] - 1; ++pj ) + { + DAD->j[pj] = A->j[pj]; + DAD->val[pj] = D[i] * A->val[pj] * D[A->j[pj]]; + } + /* diagonal */ + DAD->j[pj] = A->j[pj]; + DAD->val[pj] = 1.0; + } + + /* initial guesses for L and U, + * assume: A and DAD symmetric and stored lower triangular */ + memcpy( L_temp->start, DAD->start, sizeof(int) * (DAD->n + 1) ); + memcpy( L_temp->j, DAD->j, sizeof(int) * (DAD->start[DAD->n]) ); + memcpy( L_temp->val, DAD->val, sizeof(real) * (DAD->start[DAD->n]) ); + /* store U^T in CSR for row-wise access and tranpose later */ + memcpy( U_temp->start, DAD->start, sizeof(int) * (DAD->n + 1) ); + memcpy( U_temp->j, DAD->j, sizeof(int) * (DAD->start[DAD->n]) ); + memcpy( U_temp->val, DAD->val, sizeof(real) * (DAD->start[DAD->n]) ); + + /* L has unit diagonal, by convention */ + #pragma omp parallel for schedule(guided) \ + default(none) private(i) shared(L_temp) + for ( i = 0; i < A->n; ++i ) + { + L_temp->val[L_temp->start[i + 1] - 1] = 1.0; + } + + for ( i = 0; i < sweeps; ++i ) + { + /* for each nonzero in L */ + #pragma omp parallel for schedule(guided) \ + default(none) shared(DAD, L_temp, U_temp) private(j, k, x, y, ei_x, ei_y, sum) + for ( j = 0; j < DAD->start[DAD->n]; ++j ) + { + sum = ZERO; + + /* determine row bounds of current nonzero */ + x = 0; + ei_x = 0; + for ( k = 1; k <= DAD->n; ++k ) + { + if ( DAD->start[k] > j ) + { + x = DAD->start[k - 1]; + ei_x = DAD->start[k]; + break; + } + } + /* determine column bounds of current nonzero */ + y = DAD->start[DAD->j[j]]; + ei_y = DAD->start[DAD->j[j] + 1]; + + /* sparse dot product: + * dot( L(i,1:j-1), U(1:j-1,j) ) */ + while ( L_temp->j[x] < L_temp->j[j] && + L_temp->j[y] < L_temp->j[j] && + x < ei_x && y < ei_y ) + { + if ( L_temp->j[x] == L_temp->j[y] ) + { + sum += (L_temp->val[x] * U_temp->val[y]); + ++x; + ++y; + } + else if ( L_temp->j[x] < L_temp->j[y] ) + { + ++x; + } + else + { + ++y; + } + } + + if( j != ei_x - 1 ) + { + L_temp->val[j] = ( DAD->val[j] - sum ) / U_temp->val[ei_y - 1]; + } + } + + #pragma omp parallel for schedule(guided) \ + default(none) shared(DAD, L_temp, U_temp) private(j, k, x, y, ei_x, ei_y, sum) + for ( j = 0; j < DAD->start[DAD->n]; ++j ) + { + sum = ZERO; + + /* determine row bounds of current nonzero */ + x = 0; + ei_x = 0; + for ( k = 1; k <= DAD->n; ++k ) + { + if ( DAD->start[k] > j ) + { + x = DAD->start[k - 1]; + ei_x = DAD->start[k]; + break; + } + } + /* determine column bounds of current nonzero */ + y = DAD->start[DAD->j[j]]; + ei_y = DAD->start[DAD->j[j] + 1]; + + /* sparse dot product: + * dot( L(i,1:i-1), U(1:i-1,j) ) */ + while ( U_temp->j[x] < U_temp->j[j] && + U_temp->j[y] < U_temp->j[j] && + x < ei_x && y < ei_y ) + { + if ( U_temp->j[x] == U_temp->j[y] ) + { + sum += (L_temp->val[y] * U_temp->val[x]); + ++x; + ++y; + } + else if ( U_temp->j[x] < U_temp->j[y] ) + { + ++x; + } + else + { + ++y; + } + } + + U_temp->val[j] = DAD->val[j] - sum; + } + } + + /* apply inverse transformation: + * since DAD \approx LU, then + * D^{-1}DADD^{-1} = A \approx D^{-1}LUD^{-1} */ + #pragma omp parallel for schedule(guided) \ + default(none) shared(DAD, L_temp, U_temp, D_inv) private(i, pj) + for ( i = 0; i < DAD->n; ++i ) + { + for ( pj = DAD->start[i]; pj < DAD->start[i + 1]; ++pj ) + { + L_temp->val[pj] = D_inv[i] * L_temp->val[pj]; + /* currently storing U^T, so use row index instead of column index */ + U_temp->val[pj] = U_temp->val[pj] * D_inv[i]; + } + } + + /* apply the dropping rule */ + Ltop = 0; + Utop = 0; + for ( i = 0; i < DAD->n; ++i ) + { + L->start[i] = Ltop; + U->start[i] = Utop; + + for ( pj = L_temp->start[i]; pj < L_temp->start[i + 1] - 1; ++pj ) + { + if ( FABS( L_temp->val[pj] ) > FABS( droptol[i] / L_temp->val[L_temp->start[i + 1] - 1] ) ) + { + L->j[Ltop] = L_temp->j[pj]; + L->val[Ltop] = L_temp->val[pj]; + ++Ltop; + } + } + + /* diagonal */ + L->j[Ltop] = L_temp->j[pj]; + L->val[Ltop] = L_temp->val[pj]; + ++Ltop; + + for ( pj = U_temp->start[i]; pj < U_temp->start[i + 1] - 1; ++pj ) + { + if ( FABS( U_temp->val[pj] ) > FABS( droptol[i] / U_temp->val[U_temp->start[i + 1] - 1] ) ) + { + U->j[Utop] = U_temp->j[pj]; + U->val[Utop] = U_temp->val[pj]; + ++Utop; + } + } + + /* diagonal */ + U->j[Utop] = U_temp->j[pj]; + U->val[Utop] = U_temp->val[pj]; + ++Utop; + } + + L->start[i] = Ltop; + U->start[i] = Utop; + + Transpose_I( U ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "nnz(L): %d\n", L->start[L->n] ); + fprintf( stderr, "nnz(U): %d\n", U->start[U->n] ); +#endif + + Deallocate_Matrix( U_temp ); + Deallocate_Matrix( L_temp ); + Deallocate_Matrix( DAD ); + free( D_inv ); + free( D ); + + return Get_Timing_Info( start ); } @@ -1045,9 +1413,13 @@ static void Init_MatVec( const reax_system * const system, const control_params case DIAG_PC: if ( workspace->Hdia_inv == NULL ) { - workspace->Hdia_inv = (real *) calloc( system->N, sizeof( real ) ); + if ( ( workspace->Hdia_inv = (real *) calloc( system->N, sizeof( real ) ) ) == NULL ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } } - data->timing.pre_comp += diagonal_pre( system, workspace->Hdia_inv ); + data->timing.pre_comp += diag_pre_comp( system, workspace->Hdia_inv ); break; case ICHOLT_PC: Calculate_Droptol( workspace->H, workspace->droptol, control->pre_comp_droptol ); @@ -1061,7 +1433,7 @@ static void Init_MatVec( const reax_system * const system, const control_params Allocate_Matrix( &(workspace->U), far_nbrs->n, fillin ) == 0 ) { fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); - exit(INSUFFICIENT_SPACE); + exit( INSUFFICIENT_MEMORY ); } #if defined(DEBUG) fprintf( stderr, "fillin = %d\n", fillin ); @@ -1073,7 +1445,7 @@ static void Init_MatVec( const reax_system * const system, const control_params data->timing.pre_comp += ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U ); // data->timing.pre_comp += ICHOLT( workspace->H_sp, workspace->droptol, workspace->L, workspace->U ); break; - case ICHOL_PAR_PC: + case ILU_PAR_PC: if ( workspace->L == NULL ) { /* factors have sparsity pattern as H */ @@ -1081,7 +1453,7 @@ static void Init_MatVec( const reax_system * const system, const control_params Allocate_Matrix( &(workspace->U), workspace->H->n, workspace->H->m ) == 0 ) { fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); - exit(INSUFFICIENT_SPACE); + exit( INSUFFICIENT_MEMORY ); } } @@ -1097,7 +1469,7 @@ static void Init_MatVec( const reax_system * const system, const control_params // Allocate_Matrix( &U_test, 3, 6 ) == 0 ) // { // fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); -// exit(INSUFFICIENT_SPACE); +// exit( INSUFFICIENT_MEMORY ); // } // // //3x3, SPD, store lower half @@ -1127,6 +1499,26 @@ static void Init_MatVec( const reax_system * const system, const control_params // Print_Sparse_Matrix2( U_test, "U_SLU.out" ); // exit( 0 ); break; + case ILUT_PAR_PC: + Calculate_Droptol( workspace->H, workspace->droptol, control->pre_comp_droptol ); +#if defined(DEBUG_FOCUS) + fprintf( stderr, "drop tolerances calculated\n" ); +#endif + + if ( workspace->L == NULL ) + { + /* TODO: safest storage estimate is ILU(0) (same as lower triangular portion of H), could improve later */ + if ( Allocate_Matrix( &(workspace->L), workspace->H->n, workspace->H->m ) == 0 || + Allocate_Matrix( &(workspace->U), workspace->H->n, workspace->H->m ) == 0 ) + { + fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); + exit( INSUFFICIENT_MEMORY ); + } + } + + data->timing.pre_comp += ILUT_PAR( workspace->H, workspace->droptol, control->pre_comp_sweeps, + workspace->L, workspace->U ); + break; case ILU_SUPERLU_MT_PC: if ( workspace->L == NULL ) { @@ -1135,11 +1527,15 @@ static void Init_MatVec( const reax_system * const system, const control_params Allocate_Matrix( &(workspace->U), workspace->H->n, workspace->H->m ) == 0 ) { fprintf( stderr, "not enough memory for preconditioning matrices. terminating.\n" ); - exit(INSUFFICIENT_SPACE); + exit( INSUFFICIENT_MEMORY ); } } - +#if defined(HAVE_SUPERLU_MT) data->timing.pre_comp += SuperLU_Factorize( workspace->H, workspace->L, workspace->U ); +#else + fprintf( stderr, "SuperLU MT support disabled. Re-compile before enabling. Terminating...\n" ); + exit( INVALID_INPUT ); +#endif break; default: fprintf( stderr, "Unrecognized preconditioner computation method. Terminating...\n" ); @@ -1238,52 +1634,40 @@ void QEq( reax_system * const system, control_params * const control, simulation switch ( control->qeq_solver_type ) { case GMRES_S: - matvecs = GMRES( workspace, workspace->H, - workspace->b_s, control->qeq_solver_q_err, workspace->s[0], out_control->log, - &(data->timing.pre_app), &(data->timing.spmv) ); - matvecs += GMRES( workspace, workspace->H, - workspace->b_t, control->qeq_solver_q_err, workspace->t[0], out_control->log, - &(data->timing.pre_app), &(data->timing.spmv) ); + matvecs = GMRES( workspace, control, workspace->H, workspace->b_s, control->qeq_solver_q_err, + workspace->s[0], out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ); + matvecs += GMRES( workspace, control, workspace->H, workspace->b_t, control->qeq_solver_q_err, + workspace->t[0], out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ); break; case GMRES_H_S: - matvecs = GMRES_HouseHolder( workspace, workspace->H, - workspace->b_s, control->qeq_solver_q_err, workspace->s[0], out_control->log ); - matvecs += GMRES_HouseHolder( workspace, workspace->H, - workspace->b_t, control->qeq_solver_q_err, workspace->t[0], out_control->log ); - break; - case PGMRES_S: - matvecs = PGMRES( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err, - workspace->L, workspace->U, workspace->s[0], out_control->log, - &(data->timing.pre_app), &(data->timing.spmv) ); - matvecs += PGMRES( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err, - workspace->L, workspace->U, workspace->t[0], out_control->log, - &(data->timing.pre_app), &(data->timing.spmv) ); - break; - case PGMRES_J_S: - matvecs = PGMRES_Jacobi( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err, - workspace->L, workspace->U, workspace->s[0], control->pre_app_jacobi_iters, - out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ); - matvecs += PGMRES_Jacobi( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err, - workspace->L, workspace->U, workspace->t[0], control->pre_app_jacobi_iters, - out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ); + matvecs = GMRES_HouseHolder( workspace, control, workspace->H, workspace->b_s, control->qeq_solver_q_err, + workspace->s[0], out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ); + matvecs += GMRES_HouseHolder( workspace, control, workspace->H, workspace->b_t, control->qeq_solver_q_err, + workspace->t[0], out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ); break; case CG_S: - matvecs = CG( workspace, workspace->H, - workspace->b_s, control->qeq_solver_q_err, workspace->s[0], out_control->log ) + 1; - matvecs += CG( workspace, workspace->H, - workspace->b_t, control->qeq_solver_q_err, workspace->t[0], out_control->log ) + 1; - break; - case PCG_S: - matvecs = PCG( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err, - workspace->L, workspace->U, workspace->s[0], out_control->log ) + 1; - matvecs += PCG( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err, - workspace->L, workspace->U, workspace->t[0], out_control->log ) + 1; + matvecs = CG( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err, + workspace->s[0], out_control->log ) + 1; + matvecs += CG( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err, + workspace->t[0], out_control->log ) + 1; +// matvecs = CG( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err, +// workspace->L, workspace->U, workspace->s[0], control->pre_app_type, +// control->pre_app_jacobi_iters, out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ) + 1; +// matvecs += CG( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err, +// workspace->L, workspace->U, workspace->t[0], control->pre_app_type, +// control->pre_app_jacobi_iters, out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ) + 1; break; case SDM_S: - matvecs = SDM( workspace, workspace->H, - workspace->b_s, control->qeq_solver_q_err, workspace->s[0], out_control->log ) + 1; - matvecs += SDM( workspace, workspace->H, - workspace->b_t, control->qeq_solver_q_err, workspace->t[0], out_control->log ) + 1; + matvecs = SDM( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err, + workspace->s[0], out_control->log ) + 1; + matvecs += SDM( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err, + workspace->t[0], out_control->log ) + 1; +// matvecs = SDM( workspace, workspace->H, workspace->b_s, control->qeq_solver_q_err, +// workspace->L, workspace->U, workspace->s[0], control->pre_app_type, +// control->pre_app_jacobi_iters, out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ) + 1; +// matvecs += SDM( workspace, workspace->H, workspace->b_t, control->qeq_solver_q_err, +// workspace->L, workspace->U, workspace->t[0], control->pre_app_type, +// control->pre_app_jacobi_iters, out_control->log, &(data->timing.pre_app), &(data->timing.spmv) ) + 1; break; default: fprintf( stderr, "Unrecognized QEq solver selection. Terminating...\n" ); diff --git a/sPuReMD/src/allocate.c b/sPuReMD/src/allocate.c index 56bacd6d70db16bdf0d5701c589aa11a782eb5d3..b4b6f90fe290669eb32ddf860e47fa1289976ebe 100644 --- a/sPuReMD/src/allocate.c +++ b/sPuReMD/src/allocate.c @@ -21,6 +21,38 @@ #include "allocate.h" #include "list.h" +#include "tool_box.h" + +/* allocate space for atoms */ +int PreAllocate_Space( reax_system *system, control_params *control, + static_storage *workspace ) +{ + int i; + + system->atoms = (reax_atom*) scalloc( system->N, + sizeof(reax_atom), "atoms" ); + workspace->orig_id = (int*) scalloc( system->N, + sizeof(int), "orid_id" ); + + /* space for keeping restriction info, if any */ + if ( control->restrict_bonds ) + { + workspace->restricted = (int*) scalloc( system->N, + sizeof(int), "restricted_atoms" ); + + workspace->restricted_list = (int**) scalloc( system->N, + sizeof(int*), "restricted_list" ); + + for ( i = 0; i < system->N; ++i ) + { + workspace->restricted_list[i] = (int*) scalloc( MAX_RESTRICT, + sizeof(int), "restricted_list[i]" ); + } + } + + return SUCCESS; +} + void Reallocate_Neighbor_List( list *far_nbrs, int n, int num_intrs ) { @@ -28,7 +60,7 @@ void Reallocate_Neighbor_List( list *far_nbrs, int n, int num_intrs ) if (!Make_List( n, num_intrs, TYP_FAR_NEIGHBOR, far_nbrs )) { fprintf(stderr, "Problem in initializing far nbrs list. Terminating!\n"); - exit( INIT_ERR ); + exit( CANNOT_INITIALIZE ); } #if defined(DEBUG_FOCUS) @@ -55,16 +87,11 @@ int Allocate_Matrix( sparse_matrix **pH, int n, int m ) { return 0; } - if ( (H->entries = - (sparse_matrix_entry*) malloc(sizeof(sparse_matrix_entry) * m)) == NULL ) + if ( (H->j = (int*) malloc(sizeof(int) * m)) == NULL + || (H->val = (real*) malloc(sizeof(real) * m)) == NULL ) { return 0; } -// if( ((H->j = (int*) malloc(sizeof(int)*m)) == NULL) || -// ((H->val = (real*) malloc(sizeof(real)*m)) == NULL) ) -// { -// return 0; -// } return 1; } @@ -73,7 +100,8 @@ int Allocate_Matrix( sparse_matrix **pH, int n, int m ) void Deallocate_Matrix( sparse_matrix *H ) { free(H->start); - free(H->entries); + free(H->j); + free(H->val); free(H); } @@ -111,7 +139,7 @@ int Allocate_HBond_List( int n, int num_h, int *h_index, int *hb_top, if ( !Make_List(num_h, num_hbonds, TYP_HBOND, hbonds ) ) { fprintf( stderr, "not enough space for hbonds list. terminating!\n" ); - exit( INIT_ERR ); + exit( CANNOT_INITIALIZE ); } for ( i = 0; i < n; ++i ) @@ -171,7 +199,7 @@ int Allocate_Bond_List( int n, int *bond_top, list *bonds ) if ( !Make_List(n, num_bonds, TYP_BOND, bonds ) ) { fprintf( stderr, "not enough space for bonds list. terminating!\n" ); - exit( INIT_ERR ); + exit( CANNOT_INITIALIZE ); } Set_Start_Index( 0, 0, bonds ); @@ -274,7 +302,7 @@ void Reallocate( reax_system *system, static_storage *workspace, list **lists, TYP_THREE_BODY, (*lists) + THREE_BODIES ) ) { fprintf( stderr, "Problem in initializing angles list. Terminating!\n" ); - exit( INIT_ERR ); + exit( CANNOT_INITIALIZE ); } realloc->num_3body = -1; #if defined(DEBUG_FOCUS) diff --git a/sPuReMD/src/allocate.h b/sPuReMD/src/allocate.h index 2581b3bab0976fe65e2ea14b80cee1a8267ee2a9..e8fdc4d70b96a5c39de5294187121644935e9630 100644 --- a/sPuReMD/src/allocate.h +++ b/sPuReMD/src/allocate.h @@ -24,6 +24,8 @@ #include "mytypes.h" +int PreAllocate_Space( reax_system*, control_params*, static_storage* ); + void Reallocate( reax_system*, static_storage*, list**, int ); int Allocate_Matrix( sparse_matrix**, int, int ); diff --git a/sPuReMD/src/box.c b/sPuReMD/src/box.c index c9bd50c4a14392759f0eb8935748b0b64a46ee63..6c2fda0ba797a4472333aceaa7ebc9e19ecad8df 100644 --- a/sPuReMD/src/box.c +++ b/sPuReMD/src/box.c @@ -20,89 +20,11 @@ ----------------------------------------------------------------------*/ #include "box.h" +#include "tool_box.h" #include "vector.h" -void Init_Box_From_CRYST(real a, real b, real c, - real alpha, real beta, real gamma, - simulation_box* box ) -{ - double c_alpha, c_beta, c_gamma, s_gamma, zi; - - c_alpha = cos(DEG2RAD(alpha)); - c_beta = cos(DEG2RAD(beta)); - c_gamma = cos(DEG2RAD(gamma)); - s_gamma = sin(DEG2RAD(gamma)); - - zi = (c_alpha - c_beta * c_gamma) / s_gamma; - - box->box[0][0] = a; - box->box[0][1] = 0.0; - box->box[0][2] = 0.0; - - box->box[1][0] = b * c_gamma; - box->box[1][1] = b * s_gamma; - box->box[1][2] = 0.0; - - box->box[2][0] = c * c_beta; - box->box[2][1] = c * zi; - box->box[2][2] = c * SQRT(1.0 - SQR(c_beta) - SQR(zi)); - - Make_Consistent( box ); - -#if defined(DEBUG_FOCUS) - fprintf( stderr, "box is %8.2f x %8.2f x %8.2f\n", - box->box[0][0], box->box[1][1], box->box[2][2] ); -#endif -} - - -void Update_Box( rtensor box_tensor, simulation_box* box ) -{ - int i, j; - - for (i = 0; i < 3; i++) - for (j = 0; j < 3; j++) - box->box[i][j] = box_tensor[i][j]; - - Make_Consistent( box ); -} - - -void Update_Box_Isotropic( simulation_box *box, real mu ) -{ - /*box->box[0][0] = - POW( V_new / ( box->side_prop[1] * box->side_prop[2] ), 1.0/3.0 ); - box->box[1][1] = box->box[0][0] * box->side_prop[1]; - box->box[2][2] = box->box[0][0] * box->side_prop[2]; - */ - rtensor_Copy( box->old_box, box->box ); - box->box[0][0] *= mu; - box->box[1][1] *= mu; - box->box[2][2] *= mu; - - box->volume = box->box[0][0] * box->box[1][1] * box->box[2][2]; - Make_Consistent(box/*, periodic*/); -} - - -void Update_Box_SemiIsotropic( simulation_box *box, rvec mu ) -{ - /*box->box[0][0] = - POW( V_new / ( box->side_prop[1] * box->side_prop[2] ), 1.0/3.0 ); - box->box[1][1] = box->box[0][0] * box->side_prop[1]; - box->box[2][2] = box->box[0][0] * box->side_prop[2]; */ - rtensor_Copy( box->old_box, box->box ); - box->box[0][0] *= mu[0]; - box->box[1][1] *= mu[1]; - box->box[2][2] *= mu[2]; - - box->volume = box->box[0][0] * box->box[1][1] * box->box[2][2]; - Make_Consistent(box); -} - - -void Make_Consistent(simulation_box* box) +void Make_Consistent( simulation_box* box ) { real one_vol; @@ -196,7 +118,6 @@ void Make_Consistent(simulation_box* box) // fprintf(stderr,"\n"); // } - box->g[0][0] = box->box[0][0] * box->box[0][0] + box->box[0][1] * box->box[0][1] + box->box[0][2] * box->box[0][2]; @@ -228,44 +149,85 @@ void Make_Consistent(simulation_box* box) } -void Transform( rvec x1, simulation_box *box, char flag, rvec x2 ) +/* setup the simulation box */ +void Setup_Box( real a, real b, real c, real alpha, real beta, real gamma, + simulation_box* box ) { - int i, j; - real tmp; - - // printf(">x1: (%lf, %lf, %lf)\n",x1[0],x1[1],x1[2]); + double c_alpha, c_beta, c_gamma, s_gamma, zi; - if (flag > 0) - { - for (i = 0; i < 3; i++) - { - tmp = 0.0; - for (j = 0; j < 3; j++) - tmp += box->trans[i][j] * x1[j]; - x2[i] = tmp; - } - } - else + if ( IS_NAN_REAL(a) || IS_NAN_REAL(b) || IS_NAN_REAL(c) + || IS_NAN_REAL(alpha) || IS_NAN_REAL(beta) || IS_NAN_REAL(gamma) ) { - for (i = 0; i < 3; i++) - { - tmp = 0.0; - for (j = 0; j < 3; j++) - tmp += box->trans_inv[i][j] * x1[j]; - x2[i] = tmp; - } + fprintf( stderr, "Invalid simulation box boundaries for big box (NaN). Terminating...\n" ); + exit( INVALID_INPUT ); } - // printf(">x2: (%lf, %lf, %lf)\n", x2[0], x2[1], x2[2]); + + c_alpha = COS(DEG2RAD(alpha)); + c_beta = COS(DEG2RAD(beta)); + c_gamma = COS(DEG2RAD(gamma)); + s_gamma = SIN(DEG2RAD(gamma)); + zi = (c_alpha - c_beta * c_gamma) / s_gamma; + + box->box[0][0] = a; + box->box[0][1] = 0.0; + box->box[0][2] = 0.0; + box->box[1][0] = b * c_gamma; + box->box[1][1] = b * s_gamma; + box->box[1][2] = 0.0; + box->box[2][0] = c * c_beta; + box->box[2][1] = c * zi; + box->box[2][2] = c * SQRT(1.0 - SQR(c_beta) - SQR(zi)); +#if defined(DEBUG) + fprintf( stderr, "box is %8.2f x %8.2f x %8.2f\n", + box->box[0][0], box->box[1][1], box->box[2][2] ); +#endif + + Make_Consistent( box ); +} + + +void Update_Box( rtensor box_tensor, simulation_box* box ) +{ + int i, j; + + for (i = 0; i < 3; i++) + for (j = 0; j < 3; j++) + box->box[i][j] = box_tensor[i][j]; + + Make_Consistent( box ); } -void Transform_to_UnitBox( rvec x1, simulation_box *box, char flag, rvec x2 ) +void Update_Box_Isotropic( simulation_box *box, real mu ) { - Transform( x1, box, flag, x2 ); + /*box->box[0][0] = + POW( V_new / ( box->side_prop[1] * box->side_prop[2] ), 1.0/3.0 ); + box->box[1][1] = box->box[0][0] * box->side_prop[1]; + box->box[2][2] = box->box[0][0] * box->side_prop[2]; + */ + rtensor_Copy( box->old_box, box->box ); + box->box[0][0] *= mu; + box->box[1][1] *= mu; + box->box[2][2] *= mu; - x2[0] /= box->box_norms[0]; - x2[1] /= box->box_norms[1]; - x2[2] /= box->box_norms[2]; + box->volume = box->box[0][0] * box->box[1][1] * box->box[2][2]; + Make_Consistent(box/*, periodic*/); +} + + +void Update_Box_SemiIsotropic( simulation_box *box, rvec mu ) +{ + /*box->box[0][0] = + POW( V_new / ( box->side_prop[1] * box->side_prop[2] ), 1.0/3.0 ); + box->box[1][1] = box->box[0][0] * box->side_prop[1]; + box->box[2][2] = box->box[0][0] * box->side_prop[2]; */ + rtensor_Copy( box->old_box, box->box ); + box->box[0][0] *= mu[0]; + box->box[1][1] *= mu[1]; + box->box[2][2] *= mu[2]; + + box->volume = box->box[0][0] * box->box[1][1] * box->box[2][2]; + Make_Consistent(box); } @@ -278,7 +240,7 @@ void Inc_on_T3( rvec x, rvec dx, simulation_box *box ) { tmp = x[i] + dx[i]; if ( tmp <= -box->box_norms[i] || tmp >= box->box_norms[i] ) - tmp = fmod( tmp, box->box_norms[i] ); + tmp = FMOD( tmp, box->box_norms[i] ); if ( tmp < 0 ) tmp += box->box_norms[i]; x[i] = tmp; @@ -369,7 +331,6 @@ real Metric_Product( rvec x1, rvec x2, simulation_box* box ) } - int Are_Far_Neighbors( rvec x1, rvec x2, simulation_box *box, real cutoff, far_neighbor_data *data ) { @@ -417,7 +378,6 @@ int Are_Far_Neighbors( rvec x1, rvec x2, simulation_box *box, } - /* Determines if the distance between x1 and x2 is < vlist_cut. If so, this neighborhood is added to the list of far neighbors. Periodic boundary conditions do not apply. */ @@ -617,7 +577,7 @@ void Get_Periodic_Far_Neighbors_Small_Box( rvec x1, rvec x2, simulation_box *box }*/ -void Print_Box_Information( simulation_box* box, FILE *out ) +void Print_Box( simulation_box* box, FILE *out ) { int i, j; diff --git a/sPuReMD/src/box.h b/sPuReMD/src/box.h index de3e33debea62d57bd15731e97eadfdf623d96de..4902a3404bc60be6dc18a4f4185cf7da9f25d5a9 100644 --- a/sPuReMD/src/box.h +++ b/sPuReMD/src/box.h @@ -24,9 +24,7 @@ #include "mytypes.h" -/* Initializes box from CRYST1 line of PDB */ -void Init_Box_From_CRYST(real, real, real, real, real, real, - simulation_box*/*, int*/); +void Setup_Box( real, real, real, real, real, real, simulation_box* ); /* Initializes box from box rtensor */ void Update_Box(rtensor, simulation_box* /*, int*/); @@ -37,12 +35,6 @@ void Update_Box_SemiIsotropic( simulation_box*, rvec /*, int*/ ); metric and other quantities from box rtensor */ void Make_Consistent(simulation_box*/*, int*/ ); -/* Applies transformation to and from - Cartesian to Triclinic coordinates based on flag */ -/* Use -1 flag for Cartesian -> Triclinic and +1 for otherway */ -void Transform( rvec, simulation_box*, char, rvec ); -void Transform_to_UnitBox( rvec, simulation_box*, char, rvec ); - int Are_Far_Neighbors( rvec, rvec, simulation_box*, real, far_neighbor_data* ); void Get_NonPeriodic_Far_Neighbors( rvec, rvec, simulation_box*, control_params*, far_neighbor_data*, int* ); @@ -66,6 +58,6 @@ void Inc_on_T3( rvec, rvec, simulation_box* ); real Metric_Product( rvec, rvec, simulation_box* ); -void Print_Box_Information( simulation_box*, FILE* ); +void Print_Box( simulation_box*, FILE* ); #endif diff --git a/sPuReMD/src/control.c b/sPuReMD/src/control.c new file mode 100644 index 0000000000000000000000000000000000000000..62311c5934d5da95ee7e5473f799c5ec506e0c20 --- /dev/null +++ b/sPuReMD/src/control.c @@ -0,0 +1,550 @@ +/*---------------------------------------------------------------------- + SerialReax - Reax Force Field Simulator + + Copyright (2010) Purdue University + Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include <ctype.h> + +#include "control.h" +#include "traj.h" +#include "tool_box.h" + + +char Read_Control_File( FILE* fp, reax_system *system, control_params* control, + output_controls *out_control ) +{ + char *s, **tmp; + int c, i; + real val; + int ival; + + /* assign default values */ + strcpy( control->sim_name, "default.sim" ); + + control->restart = 0; + out_control->restart_format = WRITE_BINARY; + out_control->restart_freq = 0; + strcpy( control->restart_from, "default.res" ); + out_control->restart_freq = 0; + control->random_vel = 0; + + control->reposition_atoms = 0; + + control->ensemble = NVE; + control->nsteps = 0; + control->dt = 0.25; + + control->geo_format = PDB; + control->restrict_bonds = 0; + + control->periodic_boundaries = 1; + control->periodic_images[0] = 0; + control->periodic_images[1] = 0; + control->periodic_images[2] = 0; + + control->reneighbor = 1; + control->vlist_cut = 0; + control->nbr_cut = 4.; + control->r_cut = 10; + control->max_far_nbrs = 1000; + control->bo_cut = 0.01; + control->thb_cut = 0.001; + control->hb_cut = 7.50; + + control->tabulate = 0; + + control->qeq_solver_type = GMRES_S; + control->qeq_solver_q_err = 0.000001; + control->pre_comp_type = ICHOLT_PC; + control->pre_comp_sweeps = 3; + control->pre_comp_refactor = 100; + control->pre_comp_droptol = 0.01; + control->pre_app_type = TRI_SOLVE_PA; + control->pre_app_jacobi_iters = 50; + + control->T_init = 0.; + control->T_final = 300.; + control->Tau_T = 1.0; + control->T_mode = 0.; + control->T_rate = 1.; + control->T_freq = 1.; + + control->P[0] = 0.000101325; + control->P[1] = 0.000101325; + control->P[2] = 0.000101325; + control->Tau_P[0] = 500.0; + control->Tau_P[1] = 500.0; + control->Tau_P[2] = 500.0; + control->Tau_PT = 500.0; + control->compressibility = 1.0; + control->press_mode = 0; + + control->remove_CoM_vel = 25; + + out_control->debug_level = 0; + out_control->energy_update_freq = 10; + + out_control->write_steps = 100; + out_control->traj_compress = 0; + out_control->write = fprintf; + out_control->traj_format = 0; + out_control->write_header = + (int (*)( reax_system*, control_params*, + static_storage*, void* )) Write_Custom_Header; + out_control->append_traj_frame = + (int (*)( reax_system*, control_params*, simulation_data*, + static_storage*, list **, void* )) Append_Custom_Frame; + + strcpy( out_control->traj_title, "default_title" ); + out_control->atom_format = 0; + out_control->bond_info = 0; + out_control->angle_info = 0; + + control->molec_anal = NO_ANALYSIS; + control->freq_molec_anal = 0; + control->bg_cut = 0.3; + control->num_ignored = 0; + memset( control->ignore, 0, sizeof(int)*MAX_ATOM_TYPES ); + + control->dipole_anal = 0; + control->freq_dipole_anal = 0; + + control->diffusion_coef = 0; + control->freq_diffusion_coef = 0; + control->restrict_type = 0; + + /* memory allocations */ + s = (char*) malloc(sizeof(char) * MAX_LINE); + tmp = (char**) malloc(sizeof(char*)*MAX_TOKENS); + for (i = 0; i < MAX_TOKENS; i++) + tmp[i] = (char*) malloc(sizeof(char) * MAX_LINE); + + /* read control parameters file */ + while (fgets(s, MAX_LINE, fp)) + { + c = Tokenize(s, &tmp); + + if ( strcmp(tmp[0], "simulation_name") == 0 ) + { + strcpy( control->sim_name, tmp[1] ); + } + //else if( strcmp(tmp[0], "restart") == 0 ) { + // ival = atoi(tmp[1]); + // control->restart = ival; + //} + else if ( strcmp(tmp[0], "restart_format") == 0 ) + { + ival = atoi(tmp[1]); + out_control->restart_format = ival; + } + else if ( strcmp(tmp[0], "restart_freq") == 0 ) + { + ival = atoi(tmp[1]); + out_control->restart_freq = ival; + } + else if ( strcmp(tmp[0], "random_vel") == 0 ) + { + ival = atoi(tmp[1]); + control->random_vel = ival; + } + else if ( strcmp(tmp[0], "reposition_atoms") == 0 ) + { + ival = atoi(tmp[1]); + control->reposition_atoms = ival; + } + else if ( strcmp(tmp[0], "ensemble_type") == 0 ) + { + ival = atoi(tmp[1]); + control->ensemble = ival; + } + else if ( strcmp(tmp[0], "nsteps") == 0 ) + { + ival = atoi(tmp[1]); + control->nsteps = ival; + } + else if ( strcmp(tmp[0], "dt") == 0 ) + { + val = atof(tmp[1]); + control->dt = val * 1.e-3; // convert dt from fs to ps! + } + else if ( strcmp(tmp[0], "periodic_boundaries") == 0 ) + { + ival = atoi( tmp[1] ); + control->periodic_boundaries = ival; + } + else if ( strcmp(tmp[0], "periodic_images") == 0 ) + { + ival = atoi(tmp[1]); + control->periodic_images[0] = ival; + ival = atoi(tmp[2]); + control->periodic_images[1] = ival; + ival = atoi(tmp[3]); + control->periodic_images[2] = ival; + } + else if ( strcmp(tmp[0], "geo_format") == 0 ) + { + ival = atoi( tmp[1] ); + control->geo_format = ival; + } + else if ( strcmp(tmp[0], "restrict_bonds") == 0 ) + { + ival = atoi( tmp[1] ); + control->restrict_bonds = ival; + } + else if ( strcmp(tmp[0], "tabulate_long_range") == 0 ) + { + ival = atoi( tmp[1] ); + control->tabulate = ival; + } + else if ( strcmp(tmp[0], "reneighbor") == 0 ) + { + ival = atoi( tmp[1] ); + control->reneighbor = ival; + } + else if ( strcmp(tmp[0], "vlist_buffer") == 0 ) + { + val = atof(tmp[1]); + control->vlist_cut = val; + } + else if ( strcmp(tmp[0], "nbrhood_cutoff") == 0 ) + { + val = atof(tmp[1]); + control->nbr_cut = val; + } + else if ( strcmp(tmp[0], "thb_cutoff") == 0 ) + { + val = atof(tmp[1]); + control->thb_cut = val; + } + else if ( strcmp(tmp[0], "hbond_cutoff") == 0 ) + { + val = atof( tmp[1] ); + control->hb_cut = val; + } + else if ( strcmp(tmp[0], "qeq_solver_type") == 0 ) + { + ival = atoi( tmp[1] ); + control->qeq_solver_type = ival; + } + else if ( strcmp(tmp[0], "qeq_solver_q_err") == 0 ) + { + val = atof( tmp[1] ); + control->qeq_solver_q_err = val; + } + else if ( strcmp(tmp[0], "pre_comp_type") == 0 ) + { + ival = atoi( tmp[1] ); + control->pre_comp_type = ival; + } + else if ( strcmp(tmp[0], "pre_comp_refactor") == 0 ) + { + ival = atoi( tmp[1] ); + control->pre_comp_refactor = ival; + } + else if ( strcmp(tmp[0], "pre_comp_droptol") == 0 ) + { + val = atof( tmp[1] ); + control->pre_comp_droptol = val; + } + else if ( strcmp(tmp[0], "pre_comp_sweeps") == 0 ) + { + ival = atoi( tmp[1] ); + control->pre_comp_sweeps = ival; + } + else if ( strcmp(tmp[0], "pre_app_type") == 0 ) + { + ival = atoi( tmp[1] ); + control->pre_app_type = ival; + } + else if ( strcmp(tmp[0], "pre_app_jacobi_iters") == 0 ) + { + ival = atoi( tmp[1] ); + control->pre_app_jacobi_iters = ival; + } + else if ( strcmp(tmp[0], "temp_init") == 0 ) + { + val = atof(tmp[1]); + control->T_init = val; + + if ( control->T_init < 0.001 ) + control->T_init = 0.001; + } + else if ( strcmp(tmp[0], "temp_final") == 0 ) + { + val = atof(tmp[1]); + control->T_final = val; + + if ( control->T_final < 0.1 ) + control->T_final = 0.1; + } + else if ( strcmp(tmp[0], "t_mass") == 0 ) + { + val = atof(tmp[1]); + control->Tau_T = val * 1.e-3; // convert t_mass from fs to ps + } + else if ( strcmp(tmp[0], "t_mode") == 0 ) + { + ival = atoi(tmp[1]); + control->T_mode = ival; + } + else if ( strcmp(tmp[0], "t_rate") == 0 ) + { + val = atof(tmp[1]); + control->T_rate = val; + } + else if ( strcmp(tmp[0], "t_freq") == 0 ) + { + val = atof(tmp[1]); + control->T_freq = val; + } + else if ( strcmp(tmp[0], "pressure") == 0 ) + { + if ( control->ensemble == iNPT ) + { + val = atof(tmp[1]); + control->P[0] = control->P[1] = control->P[2] = val; + } + else if ( control->ensemble == sNPT ) + { + val = atof(tmp[1]); + control->P[0] = val; + + val = atof(tmp[2]); + control->P[1] = val; + + val = atof(tmp[3]); + control->P[2] = val; + } + } + else if ( strcmp(tmp[0], "p_mass") == 0 ) + { + if ( control->ensemble == iNPT ) + { + val = atof(tmp[1]); + control->Tau_P[0] = val * 1.e-3; // convert p_mass from fs to ps + } + else if ( control->ensemble == sNPT ) + { + val = atof(tmp[1]); + control->Tau_P[0] = val * 1.e-3; // convert p_mass from fs to ps + + val = atof(tmp[2]); + control->Tau_P[1] = val * 1.e-3; // convert p_mass from fs to ps + + val = atof(tmp[3]); + control->Tau_P[2] = val * 1.e-3; // convert p_mass from fs to ps + } + } + else if ( strcmp(tmp[0], "pt_mass") == 0 ) + { + val = atof(tmp[1]); + control->Tau_PT = val * 1.e-3; // convert pt_mass from fs to ps + } + else if ( strcmp(tmp[0], "compress") == 0 ) + { + val = atof(tmp[1]); + control->compressibility = val; + } + else if ( strcmp(tmp[0], "press_mode") == 0 ) + { + val = atoi(tmp[1]); + control->press_mode = val; + } + else if ( strcmp(tmp[0], "remove_CoM_vel") == 0 ) + { + val = atoi(tmp[1]); + control->remove_CoM_vel = val; + } + else if ( strcmp(tmp[0], "debug_level") == 0 ) + { + ival = atoi(tmp[1]); + out_control->debug_level = ival; + } + else if ( strcmp(tmp[0], "energy_update_freq") == 0 ) + { + ival = atoi(tmp[1]); + out_control->energy_update_freq = ival; + } + else if ( strcmp(tmp[0], "write_freq") == 0 ) + { + ival = atoi(tmp[1]); + out_control->write_steps = ival; + } + else if ( strcmp(tmp[0], "traj_compress") == 0 ) + { + ival = atoi(tmp[1]); + out_control->traj_compress = ival; + + if ( out_control->traj_compress ) + out_control->write = (int (*)(FILE *, const char *, ...)) gzprintf; + else out_control->write = fprintf; + } + else if ( strcmp(tmp[0], "traj_format") == 0 ) + { + ival = atoi(tmp[1]); + out_control->traj_format = ival; + + if ( out_control->traj_format == 0 ) + { + out_control->write_header = + (int (*)( reax_system*, control_params*, + static_storage*, void* )) Write_Custom_Header; + out_control->append_traj_frame = + (int (*)(reax_system*, control_params*, simulation_data*, + static_storage*, list **, void*)) Append_Custom_Frame; + } + else if ( out_control->traj_format == 1 ) + { + out_control->write_header = + (int (*)( reax_system*, control_params*, + static_storage*, void* )) Write_xyz_Header; + out_control->append_traj_frame = + (int (*)( reax_system*, control_params*, simulation_data*, + static_storage*, list **, void* )) Append_xyz_Frame; + } + } + else if ( strcmp(tmp[0], "traj_title") == 0 ) + { + strcpy( out_control->traj_title, tmp[1] ); + } + else if ( strcmp(tmp[0], "atom_info") == 0 ) + { + ival = atoi(tmp[1]); + out_control->atom_format += ival * 4; + } + else if ( strcmp(tmp[0], "atom_velocities") == 0 ) + { + ival = atoi(tmp[1]); + out_control->atom_format += ival * 2; + } + else if ( strcmp(tmp[0], "atom_forces") == 0 ) + { + ival = atoi(tmp[1]); + out_control->atom_format += ival * 1; + } + else if ( strcmp(tmp[0], "bond_info") == 0 ) + { + ival = atoi(tmp[1]); + out_control->bond_info = ival; + } + else if ( strcmp(tmp[0], "angle_info") == 0 ) + { + ival = atoi(tmp[1]); + out_control->angle_info = ival; + } + else if ( strcmp(tmp[0], "test_forces") == 0 ) + { + ival = atoi(tmp[1]); + } + else if ( strcmp(tmp[0], "molec_anal") == 0 ) + { + ival = atoi(tmp[1]); + control->molec_anal = ival; + } + else if ( strcmp(tmp[0], "freq_molec_anal") == 0 ) + { + ival = atoi(tmp[1]); + control->freq_molec_anal = ival; + } + else if ( strcmp(tmp[0], "bond_graph_cutoff") == 0 ) + { + val = atof(tmp[1]); + control->bg_cut = val; + } + else if ( strcmp(tmp[0], "ignore") == 0 ) + { + control->num_ignored = atoi(tmp[1]); + for ( i = 0; i < control->num_ignored; ++i ) + control->ignore[atoi(tmp[i + 2])] = 1; + } + else if ( strcmp(tmp[0], "dipole_anal") == 0 ) + { + ival = atoi(tmp[1]); + control->dipole_anal = ival; + } + else if ( strcmp(tmp[0], "freq_dipole_anal") == 0 ) + { + ival = atoi(tmp[1]); + control->freq_dipole_anal = ival; + } + else if ( strcmp(tmp[0], "diffusion_coef") == 0 ) + { + ival = atoi(tmp[1]); + control->diffusion_coef = ival; + } + else if ( strcmp(tmp[0], "freq_diffusion_coef") == 0 ) + { + ival = atoi(tmp[1]); + control->freq_diffusion_coef = ival; + } + else if ( strcmp(tmp[0], "restrict_type") == 0 ) + { + ival = atoi(tmp[1]); + control->restrict_type = ival; + } + else + { + fprintf( stderr, "WARNING: unknown parameter %s\n", tmp[0] ); + exit( 15 ); + } + } + + if (ferror(fp)) + { + fprintf(stderr, "Error reading control file. Terminating.\n"); + exit( INVALID_INPUT ); + } + + /* determine target T */ + if ( control->T_mode == 0 ) + control->T = control->T_final; + else control->T = control->T_init; + + + /* near neighbor and far neighbor cutoffs */ + control->bo_cut = 0.01 * system->reaxprm.gp.l[29]; + control->r_low = system->reaxprm.gp.l[11]; + control->r_cut = system->reaxprm.gp.l[12]; + control->vlist_cut += control->r_cut; + + system->g.cell_size = control->vlist_cut / 2.; + for ( i = 0; i < 3; ++i ) + { + system->g.spread[i] = 2; + } + + /* free memory allocations at the top */ + for ( i = 0; i < MAX_TOKENS; i++ ) + { + free( tmp[i] ); + } + free( tmp ); + free( s ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, + "en=%d steps=%d dt=%.5f opt=%d T=%.5f P=%.5f %.5f %.5f\n", + control->ensemble, control->nsteps, control->dt, control->tabulate, + control->T, control->P[0], control->P[1], control->P[2] ); + + fprintf(stderr, "control file read\n" ); +#endif + + return SUCCESS; +} diff --git a/sPuReMD/src/control.h b/sPuReMD/src/control.h new file mode 100644 index 0000000000000000000000000000000000000000..66d0dde7b4901d7a7b42512414328a8e6b256d83 --- /dev/null +++ b/sPuReMD/src/control.h @@ -0,0 +1,29 @@ +/*---------------------------------------------------------------------- + SerialReax - Reax Force Field Simulator + + Copyright (2010) Purdue University + Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#ifndef __CONTROL_H_ +#define __CONTROL_H_ + +#include "mytypes.h" + +char Read_Control_File( FILE*, reax_system*, control_params*, output_controls* ); + +#endif diff --git a/sPuReMD/src/param.c b/sPuReMD/src/ffield.c similarity index 58% rename from sPuReMD/src/param.c rename to sPuReMD/src/ffield.c index 4cae89e79cfc93db2e32c9cec68563c76d7f4208..088e1acbe2aa7588cb6e40b19626038d4257e56e 100644 --- a/sPuReMD/src/param.c +++ b/sPuReMD/src/ffield.c @@ -19,85 +19,10 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#include "param.h" -#include "traj.h" -#include "ctype.h" - - -int Get_Atom_Type( reax_interaction *reaxprm, char *s ) -{ - int i; - - for ( i = 0; i < reaxprm->num_atom_types; ++i ) - if ( !strcmp( reaxprm->sbp[i].name, s ) ) - return i; - - fprintf( stderr, "Unknown atom type %s. Terminating...\n", s ); - exit( UNKNOWN_ATOM_TYPE_ERR ); -} - - -int Tokenize(char* s, char*** tok) -{ - char test[MAX_LINE]; - char *sep = "\t \n!="; - char *word; - int count = 0; - - strncpy( test, s, MAX_LINE ); - - // fprintf( stderr, "|%s|\n", test ); - - for ( word = strtok(test, sep); word; word = strtok(NULL, sep) ) - { - strncpy( (*tok)[count], word, MAX_LINE ); - count++; - } - - return count; -} - - -/* Initialize Taper params */ -void Init_Taper( control_params *control ) -{ - real d1, d7; - real swa, swa2, swa3; - real swb, swb2, swb3; - - swa = control->r_low; - swb = control->r_cut; - - if ( fabs( swa ) > 0.01 ) - fprintf( stderr, "Warning: non-zero value for lower Taper-radius cutoff\n" ); - - if ( swb < 0 ) - { - fprintf( stderr, "Negative value for upper Taper-radius cutoff\n" ); - exit( INVALID_INPUT ); - } - else if ( swb < 5 ) - fprintf( stderr, "Warning: low value for upper Taper-radius cutoff:%f\n", - swb ); - - d1 = swb - swa; - d7 = POW( d1, 7.0 ); - swa2 = SQR( swa ); - swa3 = CUBE( swa ); - swb2 = SQR( swb ); - swb3 = CUBE( swb ); - - control->Tap7 = 20.0 / d7; - control->Tap6 = -70.0 * (swa + swb) / d7; - control->Tap5 = 84.0 * (swa2 + 3.0 * swa * swb + swb2) / d7; - control->Tap4 = -35.0 * (swa3 + 9.0 * swa2 * swb + 9.0 * swa * swb2 + swb3 ) / d7; - control->Tap3 = 140.0 * (swa3 * swb + 3.0 * swa2 * swb2 + swa * swb3 ) / d7; - control->Tap2 = -210.0 * (swa3 * swb2 + swa2 * swb3) / d7; - control->Tap1 = 140.0 * swa3 * swb3 / d7; - control->Tap0 = (-35.0 * swa3 * swb2 * swb2 + 21.0 * swa2 * swb3 * swb2 + - 7.0 * swa * swb3 * swb3 + swb3 * swb3 * swb ) / d7; -} +#include <ctype.h> +#include "ffield.h" +#include "tool_box.h" char Read_Force_Field( FILE* fp, reax_interaction* reax ) @@ -452,7 +377,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) } /* calculating combination rules and filling up remaining fields. */ - for (i = 0; i < reax->num_atom_types; i++) for (j = i; j < reax->num_atom_types; j++) { @@ -471,10 +395,10 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) reax->tbp[j][i].r_pp = 0.5 * (reax->sbp[j].r_pi_pi + reax->sbp[i].r_pi_pi); - reax->tbp[i][j].p_boc3 = sqrt(reax->sbp[i].b_o_132 * reax->sbp[j].b_o_132); + reax->tbp[j][i].p_boc3 = sqrt(reax->sbp[j].b_o_132 * reax->sbp[i].b_o_132); @@ -493,7 +417,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) sqrt(reax->sbp[j].b_o_133 * reax->sbp[i].b_o_133); - reax->tbp[i][j].D = sqrt(reax->sbp[i].epsilon * reax->sbp[j].epsilon); @@ -534,7 +457,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) } - /* next line is number of 2-body offdiagonal combinations and some comments */ /* these are two body offdiagonal terms that are different from the combination rules defined above */ @@ -596,7 +518,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) } } - /* 3-body parameters - supports multi-well potentials (upto MAX_3BODY_PARAM in mytypes.h) */ /* clear entries first */ @@ -657,7 +578,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) } } - /* 4-body parameters are entered in compact form. i.e. 0-X-Y-0 correspond to any type of pair of atoms in 1 and 4 position. However, explicit X-Y-Z-W takes precedence over the @@ -763,8 +683,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) } } - - /* next line is number of hydrogen bond params and some comments */ fgets( s, MAX_LINE, fp ); c = Tokenize( s, &tmp ); @@ -779,7 +697,6 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) k = atoi(tmp[1]) - 1; m = atoi(tmp[2]) - 1; - if (j < reax->num_atom_types && m < reax->num_atom_types) { val = atof(tmp[3]); @@ -797,14 +714,12 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) } } - /* deallocate helper storage */ for ( i = 0; i < MAX_TOKENS; i++ ) free( tmp[i] ); free( tmp ); free( s ); - /* deallocate tor_flag */ for ( i = 0; i < reax->num_atom_types; i++ ) { @@ -823,523 +738,5 @@ char Read_Force_Field( FILE* fp, reax_interaction* reax ) fprintf( stderr, "force field read\n" ); #endif - return 0; -} - - -char Read_Control_File( FILE* fp, reax_system *system, control_params* control, - output_controls *out_control ) -{ - char *s, **tmp; - int c, i; - real val; - int ival; - - /* assign default values */ - strcpy( control->sim_name, "default.sim" ); - - control->restart = 0; - out_control->restart_format = WRITE_BINARY; - out_control->restart_freq = 0; - strcpy( control->restart_from, "default.res" ); - out_control->restart_freq = 0; - control->random_vel = 0; - - control->reposition_atoms = 0; - - control->ensemble = NVE; - control->nsteps = 0; - control->dt = 0.25; - - control->geo_format = PDB; - control->restrict_bonds = 0; - - control->periodic_boundaries = 1; - control->periodic_images[0] = 0; - control->periodic_images[1] = 0; - control->periodic_images[2] = 0; - - control->reneighbor = 1; - control->vlist_cut = 0; - control->nbr_cut = 4.; - control->r_cut = 10; - control->max_far_nbrs = 1000; - control->bo_cut = 0.01; - control->thb_cut = 0.001; - control->hb_cut = 7.50; - - control->tabulate = 0; - - control->qeq_solver_type = PGMRES_S; - control->qeq_solver_q_err = 0.000001; - control->pre_comp_type = ICHOLT_PC; - control->pre_comp_sweeps = ICHOLT_PC; - control->pre_comp_refactor = 100; - control->pre_comp_droptol = 0.01; - control->pre_app_jacobi_iters = 10; - - control->T_init = 0.; - control->T_final = 300.; - control->Tau_T = 1.0; - control->T_mode = 0.; - control->T_rate = 1.; - control->T_freq = 1.; - - control->P[0] = 0.000101325; - control->P[1] = 0.000101325; - control->P[2] = 0.000101325; - control->Tau_P[0] = 500.0; - control->Tau_P[1] = 500.0; - control->Tau_P[2] = 500.0; - control->Tau_PT = 500.0; - control->compressibility = 1.0; - control->press_mode = 0; - - control->remove_CoM_vel = 25; - - out_control->debug_level = 0; - out_control->energy_update_freq = 10; - - out_control->write_steps = 100; - out_control->traj_compress = 0; - out_control->write = fprintf; - out_control->traj_format = 0; - out_control->write_header = - (int (*)( reax_system*, control_params*, - static_storage*, void* )) Write_Custom_Header; - out_control->append_traj_frame = - (int (*)( reax_system*, control_params*, simulation_data*, - static_storage*, list **, void* )) Append_Custom_Frame; - - strcpy( out_control->traj_title, "default_title" ); - out_control->atom_format = 0; - out_control->bond_info = 0; - out_control->angle_info = 0; - - control->molec_anal = NO_ANALYSIS; - control->freq_molec_anal = 0; - control->bg_cut = 0.3; - control->num_ignored = 0; - memset( control->ignore, 0, sizeof(int)*MAX_ATOM_TYPES ); - - control->dipole_anal = 0; - control->freq_dipole_anal = 0; - - control->diffusion_coef = 0; - control->freq_diffusion_coef = 0; - control->restrict_type = 0; - - /* memory allocations */ - s = (char*) malloc(sizeof(char) * MAX_LINE); - tmp = (char**) malloc(sizeof(char*)*MAX_TOKENS); - for (i = 0; i < MAX_TOKENS; i++) - tmp[i] = (char*) malloc(sizeof(char) * MAX_LINE); - - /* read control parameters file */ - while (fgets(s, MAX_LINE, fp)) - { - c = Tokenize(s, &tmp); - - if ( strcmp(tmp[0], "simulation_name") == 0 ) - { - strcpy( control->sim_name, tmp[1] ); - } - //else if( strcmp(tmp[0], "restart") == 0 ) { - // ival = atoi(tmp[1]); - // control->restart = ival; - //} - else if ( strcmp(tmp[0], "restart_format") == 0 ) - { - ival = atoi(tmp[1]); - out_control->restart_format = ival; - } - else if ( strcmp(tmp[0], "restart_freq") == 0 ) - { - ival = atoi(tmp[1]); - out_control->restart_freq = ival; - } - else if ( strcmp(tmp[0], "random_vel") == 0 ) - { - ival = atoi(tmp[1]); - control->random_vel = ival; - } - else if ( strcmp(tmp[0], "reposition_atoms") == 0 ) - { - ival = atoi(tmp[1]); - control->reposition_atoms = ival; - } - else if ( strcmp(tmp[0], "ensemble_type") == 0 ) - { - ival = atoi(tmp[1]); - control->ensemble = ival; - } - else if ( strcmp(tmp[0], "nsteps") == 0 ) - { - ival = atoi(tmp[1]); - control->nsteps = ival; - } - else if ( strcmp(tmp[0], "dt") == 0 ) - { - val = atof(tmp[1]); - control->dt = val * 1.e-3; // convert dt from fs to ps! - } - else if ( strcmp(tmp[0], "periodic_boundaries") == 0 ) - { - ival = atoi( tmp[1] ); - control->periodic_boundaries = ival; - } - else if ( strcmp(tmp[0], "periodic_images") == 0 ) - { - ival = atoi(tmp[1]); - control->periodic_images[0] = ival; - ival = atoi(tmp[2]); - control->periodic_images[1] = ival; - ival = atoi(tmp[3]); - control->periodic_images[2] = ival; - } - else if ( strcmp(tmp[0], "geo_format") == 0 ) - { - ival = atoi( tmp[1] ); - control->geo_format = ival; - } - else if ( strcmp(tmp[0], "restrict_bonds") == 0 ) - { - ival = atoi( tmp[1] ); - control->restrict_bonds = ival; - } - else if ( strcmp(tmp[0], "tabulate_long_range") == 0 ) - { - ival = atoi( tmp[1] ); - control->tabulate = ival; - } - else if ( strcmp(tmp[0], "reneighbor") == 0 ) - { - ival = atoi( tmp[1] ); - control->reneighbor = ival; - } - else if ( strcmp(tmp[0], "vlist_buffer") == 0 ) - { - val = atof(tmp[1]); - control->vlist_cut = val; - } - else if ( strcmp(tmp[0], "nbrhood_cutoff") == 0 ) - { - val = atof(tmp[1]); - control->nbr_cut = val; - } - else if ( strcmp(tmp[0], "thb_cutoff") == 0 ) - { - val = atof(tmp[1]); - control->thb_cut = val; - } - else if ( strcmp(tmp[0], "hbond_cutoff") == 0 ) - { - val = atof( tmp[1] ); - control->hb_cut = val; - } - else if ( strcmp(tmp[0], "qeq_solver_type") == 0 ) - { - ival = atoi( tmp[1] ); - control->qeq_solver_type = ival; - } - else if ( strcmp(tmp[0], "qeq_solver_q_err") == 0 ) - { - val = atof( tmp[1] ); - control->qeq_solver_q_err = val; - } - else if ( strcmp(tmp[0], "pre_comp_type") == 0 ) - { - ival = atoi( tmp[1] ); - control->pre_comp_type = ival; - } - else if ( strcmp(tmp[0], "pre_comp_refactor") == 0 ) - { - ival = atoi( tmp[1] ); - control->pre_comp_refactor = ival; - } - else if ( strcmp(tmp[0], "pre_comp_droptol") == 0 ) - { - val = atof( tmp[1] ); - control->pre_comp_droptol = val; - } - else if ( strcmp(tmp[0], "pre_comp_sweeps") == 0 ) - { - ival = atoi( tmp[1] ); - control->pre_comp_sweeps = ival; - } - else if ( strcmp(tmp[0], "pre_app_jacobi_iters") == 0 ) - { - val = atof( tmp[1] ); - control->pre_app_jacobi_iters = val; - } - else if ( strcmp(tmp[0], "temp_init") == 0 ) - { - val = atof(tmp[1]); - control->T_init = val; - - if ( control->T_init < 0.001 ) - control->T_init = 0.001; - } - else if ( strcmp(tmp[0], "temp_final") == 0 ) - { - val = atof(tmp[1]); - control->T_final = val; - - if ( control->T_final < 0.1 ) - control->T_final = 0.1; - } - else if ( strcmp(tmp[0], "t_mass") == 0 ) - { - val = atof(tmp[1]); - control->Tau_T = val * 1.e-3; // convert t_mass from fs to ps - } - else if ( strcmp(tmp[0], "t_mode") == 0 ) - { - ival = atoi(tmp[1]); - control->T_mode = ival; - } - else if ( strcmp(tmp[0], "t_rate") == 0 ) - { - val = atof(tmp[1]); - control->T_rate = val; - } - else if ( strcmp(tmp[0], "t_freq") == 0 ) - { - val = atof(tmp[1]); - control->T_freq = val; - } - else if ( strcmp(tmp[0], "pressure") == 0 ) - { - if ( control->ensemble == iNPT ) - { - val = atof(tmp[1]); - control->P[0] = control->P[1] = control->P[2] = val; - } - else if ( control->ensemble == sNPT ) - { - val = atof(tmp[1]); - control->P[0] = val; - - val = atof(tmp[2]); - control->P[1] = val; - - val = atof(tmp[3]); - control->P[2] = val; - } - } - else if ( strcmp(tmp[0], "p_mass") == 0 ) - { - if ( control->ensemble == iNPT ) - { - val = atof(tmp[1]); - control->Tau_P[0] = val * 1.e-3; // convert p_mass from fs to ps - } - else if ( control->ensemble == sNPT ) - { - val = atof(tmp[1]); - control->Tau_P[0] = val * 1.e-3; // convert p_mass from fs to ps - - val = atof(tmp[2]); - control->Tau_P[1] = val * 1.e-3; // convert p_mass from fs to ps - - val = atof(tmp[3]); - control->Tau_P[2] = val * 1.e-3; // convert p_mass from fs to ps - } - } - else if ( strcmp(tmp[0], "pt_mass") == 0 ) - { - val = atof(tmp[1]); - control->Tau_PT = val * 1.e-3; // convert pt_mass from fs to ps - } - else if ( strcmp(tmp[0], "compress") == 0 ) - { - val = atof(tmp[1]); - control->compressibility = val; - } - else if ( strcmp(tmp[0], "press_mode") == 0 ) - { - val = atoi(tmp[1]); - control->press_mode = val; - } - else if ( strcmp(tmp[0], "remove_CoM_vel") == 0 ) - { - val = atoi(tmp[1]); - control->remove_CoM_vel = val; - } - else if ( strcmp(tmp[0], "debug_level") == 0 ) - { - ival = atoi(tmp[1]); - out_control->debug_level = ival; - } - else if ( strcmp(tmp[0], "energy_update_freq") == 0 ) - { - ival = atoi(tmp[1]); - out_control->energy_update_freq = ival; - } - else if ( strcmp(tmp[0], "write_freq") == 0 ) - { - ival = atoi(tmp[1]); - out_control->write_steps = ival; - } - else if ( strcmp(tmp[0], "traj_compress") == 0 ) - { - ival = atoi(tmp[1]); - out_control->traj_compress = ival; - - if ( out_control->traj_compress ) - out_control->write = (int (*)(FILE *, const char *, ...)) gzprintf; - else out_control->write = fprintf; - } - else if ( strcmp(tmp[0], "traj_format") == 0 ) - { - ival = atoi(tmp[1]); - out_control->traj_format = ival; - - if ( out_control->traj_format == 0 ) - { - out_control->write_header = - (int (*)( reax_system*, control_params*, - static_storage*, void* )) Write_Custom_Header; - out_control->append_traj_frame = - (int (*)(reax_system*, control_params*, simulation_data*, - static_storage*, list **, void*)) Append_Custom_Frame; - } - else if ( out_control->traj_format == 1 ) - { - out_control->write_header = - (int (*)( reax_system*, control_params*, - static_storage*, void* )) Write_xyz_Header; - out_control->append_traj_frame = - (int (*)( reax_system*, control_params*, simulation_data*, - static_storage*, list **, void* )) Append_xyz_Frame; - } - } - else if ( strcmp(tmp[0], "traj_title") == 0 ) - { - strcpy( out_control->traj_title, tmp[1] ); - } - else if ( strcmp(tmp[0], "atom_info") == 0 ) - { - ival = atoi(tmp[1]); - out_control->atom_format += ival * 4; - } - else if ( strcmp(tmp[0], "atom_velocities") == 0 ) - { - ival = atoi(tmp[1]); - out_control->atom_format += ival * 2; - } - else if ( strcmp(tmp[0], "atom_forces") == 0 ) - { - ival = atoi(tmp[1]); - out_control->atom_format += ival * 1; - } - else if ( strcmp(tmp[0], "bond_info") == 0 ) - { - ival = atoi(tmp[1]); - out_control->bond_info = ival; - } - else if ( strcmp(tmp[0], "angle_info") == 0 ) - { - ival = atoi(tmp[1]); - out_control->angle_info = ival; - } - else if ( strcmp(tmp[0], "test_forces") == 0 ) - { - ival = atoi(tmp[1]); - } - else if ( strcmp(tmp[0], "molec_anal") == 0 ) - { - ival = atoi(tmp[1]); - control->molec_anal = ival; - } - else if ( strcmp(tmp[0], "freq_molec_anal") == 0 ) - { - ival = atoi(tmp[1]); - control->freq_molec_anal = ival; - } - else if ( strcmp(tmp[0], "bond_graph_cutoff") == 0 ) - { - val = atof(tmp[1]); - control->bg_cut = val; - } - else if ( strcmp(tmp[0], "ignore") == 0 ) - { - control->num_ignored = atoi(tmp[1]); - for ( i = 0; i < control->num_ignored; ++i ) - control->ignore[atoi(tmp[i + 2])] = 1; - } - else if ( strcmp(tmp[0], "dipole_anal") == 0 ) - { - ival = atoi(tmp[1]); - control->dipole_anal = ival; - } - else if ( strcmp(tmp[0], "freq_dipole_anal") == 0 ) - { - ival = atoi(tmp[1]); - control->freq_dipole_anal = ival; - } - else if ( strcmp(tmp[0], "diffusion_coef") == 0 ) - { - ival = atoi(tmp[1]); - control->diffusion_coef = ival; - } - else if ( strcmp(tmp[0], "freq_diffusion_coef") == 0 ) - { - ival = atoi(tmp[1]); - control->freq_diffusion_coef = ival; - } - else if ( strcmp(tmp[0], "restrict_type") == 0 ) - { - ival = atoi(tmp[1]); - control->restrict_type = ival; - } - else - { - fprintf( stderr, "WARNING: unknown parameter %s\n", tmp[0] ); - exit( 15 ); - } - } - - if (ferror(fp)) - { - fprintf(stderr, "Error reading control file. Terminating.\n"); - exit( INVALID_INPUT ); - } - - /* determine target T */ - if ( control->T_mode == 0 ) - control->T = control->T_final; - else control->T = control->T_init; - - - /* near neighbor and far neighbor cutoffs */ - control->bo_cut = 0.01 * system->reaxprm.gp.l[29]; - control->r_low = system->reaxprm.gp.l[11]; - control->r_cut = system->reaxprm.gp.l[12]; - control->vlist_cut += control->r_cut; - - system->g.cell_size = control->vlist_cut / 2.; - for ( i = 0; i < 3; ++i ) - system->g.spread[i] = 2; - - - /* Initialize Taper function */ - Init_Taper( control ); - - - /* free memory allocations at the top */ - for ( i = 0; i < MAX_TOKENS; i++ ) - free( tmp[i] ); - free( tmp ); - free( s ); - -#if defined(DEBUG_FOCUS) - fprintf( stderr, - "en=%d steps=%d dt=%.5f opt=%d T=%.5f P=%.5f %.5f %.5f\n", - control->ensemble, control->nsteps, control->dt, control->tabulate, - control->T, control->P[0], control->P[1], control->P[2] ); - - fprintf(stderr, "control file read\n" ); -#endif - return 0; + return SUCCESS; } diff --git a/sPuReMD/src/param.h b/sPuReMD/src/ffield.h similarity index 76% rename from sPuReMD/src/param.h rename to sPuReMD/src/ffield.h index 838bb1a0d004672978a35f94547d511104aba9ea..4aaf32a644861b069e8cf87e2eec68aadf4d3c84 100644 --- a/sPuReMD/src/param.h +++ b/sPuReMD/src/ffield.h @@ -19,22 +19,10 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#ifndef __PARAM_H_ -#define __PARAM_H_ +#ifndef __FFIELD_H_ +#define __FFIELD_H_ #include "mytypes.h" - -#define MAX_LINE 1024 -#define MAX_TOKENS 20 -#define MAX_TOKEN_LEN 1024 - -int Get_Atom_Type( reax_interaction*, char* ); - -int Tokenize( char*, char*** ); - char Read_Force_Field( FILE*, reax_interaction* ); -char Read_Control_File( FILE*, reax_system*, control_params*, - output_controls* ); - #endif diff --git a/sPuReMD/src/forces.c b/sPuReMD/src/forces.c index 13c57e63aad64529ca69aa44e05215ace7c4a76f..2746ede2bcc068e90dfd3bee3692999eaec303c9 100644 --- a/sPuReMD/src/forces.c +++ b/sPuReMD/src/forces.c @@ -187,7 +187,7 @@ void Validate_Lists( static_storage *workspace, list **lists, int step, int n, fprintf( stderr, "step%d - ran out of space on H matrix: Htop=%d, max = %d", step, Htop, Hmax ); - exit(INSUFFICIENT_SPACE); + exit( INSUFFICIENT_MEMORY ); } } @@ -206,7 +206,7 @@ void Validate_Lists( static_storage *workspace, list **lists, int step, int n, { fprintf( stderr, "step%d-bondchk failed: i=%d end(i)=%d str(i+1)=%d\n", step, flag, End_Index(flag, bonds), Start_Index(flag + 1, bonds) ); - exit(INSUFFICIENT_SPACE); + exit( INSUFFICIENT_MEMORY ); } if ( End_Index(i, bonds) >= bonds->num_intrs - 2 ) @@ -217,7 +217,7 @@ void Validate_Lists( static_storage *workspace, list **lists, int step, int n, { fprintf( stderr, "step%d-bondchk failed: i=%d end(i)=%d bond_end=%d\n", step, flag, End_Index(i, bonds), bonds->num_intrs ); - exit(INSUFFICIENT_SPACE); + exit( INSUFFICIENT_MEMORY ); } } @@ -240,7 +240,7 @@ void Validate_Lists( static_storage *workspace, list **lists, int step, int n, { fprintf( stderr, "step%d-hbondchk failed: i=%d end(i)=%d str(i+1)=%d\n", step, flag, End_Index(flag, hbonds), Start_Index(flag + 1, hbonds) ); - exit(INSUFFICIENT_SPACE); + exit( INSUFFICIENT_MEMORY ); } if ( Num_Entries(i, hbonds) >= @@ -252,7 +252,7 @@ void Validate_Lists( static_storage *workspace, list **lists, int step, int n, { fprintf( stderr, "step%d-hbondchk failed: i=%d end(i)=%d hbondend=%d\n", step, flag, End_Index(i, hbonds), hbonds->num_intrs ); - exit(INSUFFICIENT_SPACE); + exit( INSUFFICIENT_MEMORY ); } } } @@ -372,15 +372,15 @@ void Init_Forces( reax_system *system, control_params *control, dr3gamij_1 = ( r_ij * r_ij * r_ij + twbp->gamma ); dr3gamij_3 = POW( dr3gamij_1 , 0.33333333333333 ); - H->entries[Htop].j = j; - H->entries[Htop].val = self_coef * Tap * EV_to_KCALpMOL / dr3gamij_3; + H->j[Htop] = j; + H->val[Htop] = self_coef * Tap * EV_to_KCALpMOL / dr3gamij_3; ++Htop; /* H_sp matrix entry */ if ( flag_sp ) { - H_sp->entries[H_sp_top].j = j; - H_sp->entries[H_sp_top].val = self_coef * Tap * EV_to_KCALpMOL / dr3gamij_3; + H_sp->j[H_sp_top] = j; + H_sp->val[H_sp_top] = self_coef * Tap * EV_to_KCALpMOL / dr3gamij_3; ++H_sp_top; } @@ -538,12 +538,12 @@ void Init_Forces( reax_system *system, control_params *control, } } - H->entries[Htop].j = i; - H->entries[Htop].val = system->reaxprm.sbp[type_i].eta; + H->j[Htop] = i; + H->val[Htop] = system->reaxprm.sbp[type_i].eta; ++Htop; - H_sp->entries[H_sp_top].j = i; - H_sp->entries[H_sp_top].val = system->reaxprm.sbp[type_i].eta; + H_sp->j[H_sp_top] = i; + H_sp->val[H_sp_top] = system->reaxprm.sbp[type_i].eta; ++H_sp_top; Set_End_Index( i, btop_i, bonds ); @@ -663,8 +663,8 @@ void Init_Forces_Tab( reax_system *system, control_params *control, t->ele[r].a; val *= EV_to_KCALpMOL / C_ele; - H->entries[Htop].j = j; - H->entries[Htop].val = self_coef * val; + H->j[Htop] = j; + H->val[Htop] = self_coef * val; ++Htop; /* hydrogen bond lists */ @@ -793,8 +793,8 @@ void Init_Forces_Tab( reax_system *system, control_params *control, } } - H->entries[Htop].j = i; - H->entries[Htop].val = system->reaxprm.sbp[type_i].eta; + H->j[Htop] = i; + H->val[Htop] = system->reaxprm.sbp[type_i].eta; ++Htop; Set_End_Index( i, btop_i, bonds ); diff --git a/sPuReMD/src/pdb_tools.c b/sPuReMD/src/geo_tools.c similarity index 52% rename from sPuReMD/src/pdb_tools.c rename to sPuReMD/src/geo_tools.c index e7b1b57d07b91d5bf1c5f96eaf2b314ee55c6f02..01f404b4f6d51e73722b9f450f9ca8873dc3c9c9 100644 --- a/sPuReMD/src/pdb_tools.c +++ b/sPuReMD/src/geo_tools.c @@ -19,143 +19,262 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#include "pdb_tools.h" +#include <ctype.h> + +#include "geo_tools.h" +#include "allocate.h" #include "box.h" #include "list.h" -#include "param.h" #include "restart.h" -#include "ctype.h" +#include "tool_box.h" +#include "vector.h" -int is_Valid_Serial( static_storage *workspace, int serial ) +/********************* geo format routines ******************/ +void Count_Geo_Atoms( FILE *geo, reax_system *system ) { - if ( workspace->map_serials[ serial ] < 0 ) + int i, serial; + rvec x; + char element[3], name[9], line[MAX_LINE + 1]; + + /* total number of atoms */ + fscanf( geo, " %d", &(system->N) ); + + /* count atoms */ + for ( i = 0; i < system->N; ++i ) { - fprintf( stderr, "CONECT line includes invalid pdb serial number %d.\n", - serial ); - fprintf( stderr, "Please correct the input file.Terminating...\n" ); - exit( INVALID_INPUT ); + fscanf( geo, CUSTOM_ATOM_FORMAT, + &serial, element, name, &x[0], &x[1], &x[2] ); + Fit_to_Periodic_Box( &(system->box), &x ); } - return 1; + fseek( geo, 0, SEEK_SET ); // set the pointer to the beginning of the file + fgets( line, MAX_LINE, geo ); + fgets( line, MAX_LINE, geo ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "N = %d\n\n", system->N ); +#endif } -int Check_Input_Range( int val, int lo, int hi, char *message ) +char Read_Geo( char* geo_file, reax_system* system, control_params *control, + simulation_data *data, static_storage *workspace ) { - if ( val < lo || val > hi ) + + FILE *geo; + char descriptor[9]; + int i, serial, top; + real box_x, box_y, box_z, alpha, beta, gamma; + rvec x; + char element[3], name[9]; + reax_atom *atom; + + /* open the geometry file */ + if ( (geo = fopen(geo_file, "r")) == NULL ) { - fprintf( stderr, "%s\nInput %d - Out of range %d-%d. Terminating...\n", - message, val, lo, hi ); - exit( INVALID_INPUT ); + fprintf( stderr, "Error opening the geo file! terminating...\n" ); + exit( FILE_NOT_FOUND ); } - return 1; -} + /* read box information */ + fscanf( geo, CUSTOM_BOXGEO_FORMAT, + descriptor, &box_x, &box_y, &box_z, &alpha, &beta, &gamma ); + /* initialize the box */ + Setup_Box( box_x, box_y, box_z, alpha, beta, gamma, &(system->box) ); + /* count my atoms & allocate storage */ + Count_Geo_Atoms( geo, system ); + if ( PreAllocate_Space( system, control, workspace ) == FAILURE ) + { + fprintf( stderr, "PreAllocate_Space: not enough memory!" ); + fprintf( stderr, "terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } -void Trim_Spaces( char *element ) -{ - int i, j; + /* read in my atom info */ + top = 0; + for ( i = 0; i < system->N; ++i ) + { + fscanf( geo, CUSTOM_ATOM_FORMAT, + &serial, element, name, &x[0], &x[1], &x[2] ); + Fit_to_Periodic_Box( &(system->box), &x ); +#if defined(DEBUG) + fprintf( stderr, "atom%d: %s %s %f %f %f\n", + serial, element, name, x[0], x[1], x[2] ); +#endif + + atom = &(system->atoms[top]); + workspace->orig_id[i] = serial; + atom->type = Get_Atom_Type( &(system->reaxprm), element ); + strcpy( atom->name, name ); + rvec_Copy( atom->x, x ); + rvec_MakeZero( atom->v ); + rvec_MakeZero( atom->f ); + atom->q = 0.; - for ( i = 0; element[i] == ' '; ++i ); // skip initial space chars + top++; + } + + fclose( geo ); + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "finished reading the geo file\n" ); +#endif - for ( j = i; j < strlen(element) && element[j] != ' '; ++j ) - element[j - i] = toupper( element[j] ); // make uppercase, move to beginning - element[j - i] = 0; // finalize the string + return SUCCESS; } -char Read_PDB( char* pdb_file, reax_system* system, control_params *control, - simulation_data *data, static_storage *workspace ) +int Read_Box_Info( reax_system *system, FILE *geo, int geo_format ) { + char *cryst; + char line[MAX_LINE + 1]; + char descriptor[9]; + char s_a[12], s_b[12], s_c[12], s_alpha[12], s_beta[12], s_gamma[12]; + char s_group[12], s_zValue[12]; - FILE *pdb; - char **tmp; - char *s, *s1; - char descriptor[9], serial[9]; - char atom_name[9], res_name[9], res_seq[9]; - char s_x[9], s_y[9], s_z[9]; - char occupancy[9], temp_factor[9]; - char seg_id[9], element[9], charge[9]; - char alt_loc, chain_id, icode; - char s_a[10], s_b[10], s_c[10], s_alpha[9], s_beta[9], s_gamma[9]; - char s_group[12], s_zValue[9]; - char *endptr = NULL; - int i, c, c1, pdb_serial, ratom = 0; - /* open pdb file */ - if ( (pdb = fopen(pdb_file, "r")) == NULL ) + /* initialize variables */ + fseek( geo, 0, SEEK_SET ); // set the pointer to the beginning of the file + + switch ( geo_format ) { - fprintf( stderr, "Error opening the pdb file!\n" ); - exit( FILE_NOT_FOUND_ERR ); + case PDB: + cryst = "CRYST1"; + break; + default: + cryst = "BOX"; } + /* locate the cryst line in the geo file, read it and + initialize the big box */ + while ( fgets( line, MAX_LINE, geo ) ) + { + if ( strncmp( line, cryst, 6 ) == 0 ) + { + if ( geo_format == PDB ) + sscanf( line, PDB_CRYST1_FORMAT, + &descriptor[0], + &s_a[0], &s_b[0], &s_c[0], + &s_alpha[0], &s_beta[0], &s_gamma[0], + &s_group[0], &s_zValue[0] ); + + /* compute full volume tensor from the angles */ + Setup_Box( atof(s_a), atof(s_b), atof(s_c), + atof(s_alpha), atof(s_beta), atof(s_gamma), + &(system->box) ); + return SUCCESS; + } + } + if ( ferror( geo ) ) + { + return FAILURE; + } - /* allocate memory for tokenizing pdb lines */ - s = (char*) malloc( sizeof(char) * MAX_LINE ); - s1 = (char*) malloc( sizeof(char) * MAX_LINE ); - tmp = (char**) malloc( sizeof(char*) * MAX_TOKENS ); - for ( i = 0; i < MAX_TOKENS; i++ ) - tmp[i] = (char*) malloc( sizeof(char) * MAX_TOKEN_LEN ); + return FAILURE; +} - /* count number of atoms in the pdb file */ - system->N = 0; - s[0] = 0; - while (fgets( s, MAX_LINE, pdb )) - { +void Count_PDB_Atoms( FILE *geo, reax_system *system ) +{ + char *endptr = NULL; + char line[MAX_LINE + 1]; + char s_x[9], s_y[9], s_z[9]; + rvec x; - tmp[0][0] = 0; - c = Tokenize( s, &tmp ); + /* initialize variables */ + fseek( geo, 0, SEEK_SET ); /* set the pointer to the beginning of the file */ + system->N = 0; - if ( strncmp( tmp[0], "ATOM", 4 ) == 0 || - strncmp( tmp[0], "HETATM", 6 ) == 0 ) - (system->N)++; - s[0] = 0; - } - if (ferror(pdb)) + /* increment number of atoms for each line denoting an atom desc */ + while ( fgets( line, MAX_LINE, geo ) ) { - fprintf(stderr, "Error reading PDB file. Terminating.\n"); - exit( INVALID_INPUT ); + if ( strncmp( line, "ATOM", 4 ) == 0 || + strncmp( line, "HETATM", 6 ) == 0 ) + { + system->N++; + + strncpy( s_x, line + 30, 8 ); + s_x[8] = 0; + strncpy( s_y, line + 38, 8 ); + s_y[8] = 0; + strncpy( s_z, line + 46, 8 ); + s_z[8] = 0; + Make_Point( strtod( s_x, &endptr ), strtod( s_y, &endptr ), + strtod( s_z, &endptr ), &x ); + Fit_to_Periodic_Box( &(system->box), &x ); + } } - fclose(pdb); -#if defined(DEBUG_FOCUS) - fprintf( stderr, "system->N: %d\n", system->N ); +#if defined(DEBUG) + fprintf( stderr, "count atoms:\n" ); + fprintf( stderr, "N = %d\n\n", system->N ); #endif +} - /* memory allocations for atoms, atom maps, bond restrictions */ - system->atoms = (reax_atom*) calloc( system->N, sizeof(reax_atom) ); - - workspace->map_serials = (int*) calloc( MAX_ATOM_ID, sizeof(int) ); - for ( i = 0; i < MAX_ATOM_ID; ++i ) - workspace->map_serials[i] = -1; - workspace->orig_id = (int*) calloc( system->N, sizeof(int) ); - workspace->restricted = (int*) calloc( system->N, sizeof(int) ); - workspace->restricted_list = (int**) calloc( system->N, sizeof(int*) ); - for ( i = 0; i < system->N; ++i ) - workspace->restricted_list[i] = (int*) calloc( MAX_RESTRICT, sizeof(int) ); +char Read_PDB( char* pdb_file, reax_system* system, control_params *control, + simulation_data *data, static_storage *workspace ) +{ + FILE *pdb; + char **tmp; + char *s, *s1; + char descriptor[9], serial[9]; + char atom_name[9], res_name[9], res_seq[9]; + char s_x[9], s_y[9], s_z[9]; + char occupancy[9], temp_factor[9]; + char seg_id[9], element[9], charge[9]; + char alt_loc, chain_id, icode; + char *endptr = NULL; + int i, c, c1, pdb_serial, top; + rvec x; + reax_atom *atom; - /* start reading and processing pdb file */ + /* open pdb file */ if ( (pdb = fopen(pdb_file, "r")) == NULL ) { - fprintf( stderr, "Error opening the pdb file!\n" ); - exit( FILE_NOT_FOUND_ERR ); + fprintf( stderr, "fopen: error opening the pdb file! terminating...\n" ); + exit( FILE_NOT_FOUND ); } - c = 0; - c1 = 0; - while (!feof(pdb)) + /* allocate memory for tokenizing pdb lines */ + if ( Allocate_Tokenizer_Space( &s, &s1, &tmp ) == FAILURE ) { - /* clear previous input line */ - s[0] = 0; - for ( i = 0; i < c1; ++i ) - tmp[i][0] = 0; + fprintf( stderr, "Allocate_Tokenizer_Space: not enough memory!" ); + fprintf( stderr, "terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + /* read box information */ + if ( Read_Box_Info( system, pdb, PDB ) == FAILURE ) + { + fprintf( stderr, "Read_Box_Info: no CRYST line in the pdb file!" ); + fprintf( stderr, "terminating...\n" ); + exit( INVALID_GEO ); + } + + Count_PDB_Atoms( pdb, system ); + if ( PreAllocate_Space( system, control, workspace ) == FAILURE ) + { + fprintf( stderr, "PreAllocate_Space: not enough memory!" ); + fprintf( stderr, "terminating...\n" ); + exit( INSUFFICIENT_MEMORY ); + } + + /* start reading and processing the pdb file */ +#if defined(DEBUG_FOCUS) + fprintf( stderr, "starting to read the pdb file\n" ); +#endif + fseek( pdb, 0, SEEK_SET ); + c = 0; + c1 = 0; + top = 0; + s[0] = 0; + while ( fgets( s, MAX_LINE, pdb ) ) + { /* read new line and tokenize it */ - fgets( s, MAX_LINE, pdb ); strncpy( s1, s, MAX_LINE - 1 ); c1 = Tokenize( s, &tmp ); @@ -170,6 +289,8 @@ char Read_PDB( char* pdb_file, reax_system* system, control_params *control, serial[5] = 0; strncpy( &atom_name[0], s1 + 12, 4 ); atom_name[4] = 0; + //strncpy( &serial[0], s1+6, 7 ); serial[7] = 0; + //strncpy( &atom_name[0], s1+13, 3 ); atom_name[3] = 0; alt_loc = s1[16]; strncpy( &res_name[0], s1 + 17, 3 ); res_name[3] = 0; @@ -202,6 +323,8 @@ char Read_PDB( char* pdb_file, reax_system* system, control_params *control, serial[5] = 0; strncpy( &atom_name[0], s1 + 12, 4 ); atom_name[4] = 0; + //strncpy( &serial[0], s1+6, 7 ); serial[7] = 0; + //strncpy( &atom_name[0], s1+13, 3 ); atom_name[3] = 0; alt_loc = s1[16]; strncpy( &res_name[0], s1 + 17, 3 ); res_name[3] = 0; @@ -226,193 +349,194 @@ char Read_PDB( char* pdb_file, reax_system* system, control_params *control, charge[2] = 0; } + /* if the point is inside my_box, add it to my lists */ + Make_Point( strtod( &s_x[0], &endptr ), + strtod( &s_y[0], &endptr ), + strtod( &s_z[0], &endptr ), &x ); - /* add to mapping */ - pdb_serial = strtod( &serial[0], &endptr ); - Check_Input_Range( pdb_serial, 0, MAX_ATOM_ID, "Invalid pdb_serial" ); - workspace->map_serials[ pdb_serial ] = c; - workspace->orig_id[ c ] = pdb_serial; - // fprintf( stderr, "map %d --> %d\n", pdb_serial, c ); - + Fit_to_Periodic_Box( &(system->box), &x ); - /* copy atomic positions */ - system->atoms[c].x[0] = strtod( &s_x[0], &endptr ); - system->atoms[c].x[1] = strtod( &s_y[0], &endptr ); - system->atoms[c].x[2] = strtod( &s_z[0], &endptr ); + /* store orig_id, type, name and coord info of the new atom */ + atom = &(system->atoms[top]); + pdb_serial = (int) strtod( &serial[0], &endptr ); + workspace->orig_id[top] = pdb_serial; - /* atom name and type */ - strcpy( system->atoms[c].name, atom_name ); Trim_Spaces( element ); - system->atoms[c].type = Get_Atom_Type( &(system->reaxprm), element ); + atom->type = Get_Atom_Type( &(system->reaxprm), element ); + strcpy( atom->name, atom_name ); + + rvec_Copy( atom->x, x ); + rvec_MakeZero( atom->v ); + rvec_MakeZero( atom->f ); + atom->q = 0; + + top++; + // fprintf( stderr, "p%d: %6d%2d x:%8.3f%8.3f%8.3f" + // "q:%8.3f occ:%s temp:%s seg:%s elmnt:%s\n", + // system->my_rank, + // c, system->my_atoms[top].type, + // system->my_atoms[top].x[0], + // system->my_atoms[top].x[1], + // system->my_atoms[top].x[2], + // system->my_atoms[top].q, occupancy, temp_factor, + // seg_id, element ); + + //fprintf( stderr, "atom( %8.3f %8.3f %8.3f ) --> p%d\n", + // system->my_atoms[top].x[0], system->my_atoms[top].x[1], + // system->my_atoms[top].x[2], system->my_rank ); - /* fprintf( stderr, - "%d%8.3f%8.3f%8.3fq:%8.3f occ:%s temp:%s seg_id:%s element:%s\n", - system->atoms[c].type, - system->atoms[c].x[0], system->atoms[c].x[1], system->atoms[c].x[2], - system->atoms[c].q, occupancy, temp_factor, seg_id, element ); */ c++; } - else if (!strncmp( tmp[0], "CRYST1", 6 )) - { - sscanf( s1, PDB_CRYST1_FORMAT, - &descriptor[0], - &s_a[0], - &s_b[0], - &s_c[0], - &s_alpha[0], - &s_beta[0], - &s_gamma[0], - &s_group[0], - &s_zValue[0] ); - - /* Compute full volume tensor from the angles */ - Init_Box_From_CRYST( atof(s_a), atof(s_b), atof(s_c), - atof(s_alpha), atof(s_beta), atof(s_gamma), - &(system->box) ); - } /* IMPORTANT: We do not check for the soundness of restrictions here. - When atom2 is on atom1's restricted list, and there is a restriction on - atom2, then atom1 has to be on atom2's restricted list, too. However, - we do not check if this is the case in the input file, + When atom2 is on atom1's restricted list, and there is a restriction + on atom2, then atom1 has to be on atom2's restricted list, too. + However, we do not check if this is the case in the input file, this is upto the user. */ else if (!strncmp( tmp[0], "CONECT", 6 )) { - /* error check */ - //fprintf(stderr, "CONECT: %d\n", c1 ); - Check_Input_Range( c1 - 2, 0, MAX_RESTRICT, - "CONECT line exceeds max restrictions allowed.\n" ); - - /* read bond restrictions */ - if ( is_Valid_Serial( workspace, pdb_serial = atoi(tmp[1]) ) ) - ratom = workspace->map_serials[ pdb_serial ]; - - workspace->restricted[ ratom ] = c1 - 2; - for ( i = 2; i < c1; ++i ) + if ( control->restrict_bonds ) { - if ( is_Valid_Serial( workspace, pdb_serial = atoi(tmp[i]) ) ) - workspace->restricted_list[ ratom ][ i - 2 ] = - workspace->map_serials[ pdb_serial ]; + /* error check */ + // Check_Input_Range( c1 - 2, 0, MAX_RESTRICT, + // "CONECT line exceeds max num restrictions allowed.\n" ); + + /* read bond restrictions */ + // if( is_Valid_Serial( workspace, pdb_serial = atoi(tmp[1]) ) ) + // ratom = workspace->map_serials[ pdb_serial ]; + + // workspace->restricted[ ratom ] = c1 - 2; + // for( i = 2; i < c1; ++i ) + // { + // if( is_Valid_Serial(workspace, pdb_serial = atoi(tmp[i])) ) + // workspace->restricted_list[ ratom ][ i-2 ] = + // workspace->map_serials[ pdb_serial ]; + // } + + // fprintf( stderr, "restriction on %d:", ratom ); + // for( i = 0; i < workspace->restricted[ ratom ]; ++i ) + // fprintf( stderr, " %d", + // workspace->restricted_list[ratom][i] ); + // fprintf( stderr, "\n" ); } - - /* fprintf( stderr, "restriction on %d:", ratom ); - for( i = 0; i < workspace->restricted[ ratom ]; ++i ) - fprintf( stderr, " %d", workspace->restricted_list[ratom][i] ); - fprintf( stderr, "\n" ); */ } + + /* clear previous input line */ + s[0] = 0; + for ( i = 0; i < c1; ++i ) + tmp[i][0] = 0; + } + if ( ferror( pdb ) ) + { + return FAILURE; } - fclose(pdb); + fclose( pdb ); #if defined(DEBUG_FOCUS) - fprintf( stderr, "pdb file read\n" ); + fprintf( stderr, "finished reading the pdb file\n" ); #endif - return 1; -} + return SUCCESS; +} -char Write_PDB( reax_system* system, control_params *control, - simulation_data *data, static_storage *workspace, - list* bonds, output_controls *out_control ) +/* PDB serials are written without regard to the order, we'll see if this + cause trouble, if so we'll have to rethink this approach + Also, we do not write connect lines yet. +*/ +char Write_PDB( reax_system* system, list* bonds, simulation_data *data, + control_params *control, static_storage *workspace, output_controls *out_control ) { - int i, j, k, count; - int connect[4]; - char temp[MAX_STR], name[10]; - real bo; + int i, buffer_req, buffer_len; + //int j, connect[4]; + char name[8]; + //real bo; real alpha, beta, gamma; + reax_atom *p_atom; + char fname[MAX_STR]; + char *line; + char *buffer; + FILE *pdb; + /* Allocation */ + line = (char*) smalloc( sizeof(char) * PDB_ATOM_FORMAT_O_LENGTH, "geo:line" ); + buffer_req = system->N * PDB_ATOM_FORMAT_O_LENGTH; - /* open output pdb file */ - sprintf( temp, "%s%d.pdb", control->sim_name, data->step ); - if ( (out_control->pdb = fopen( temp, "w" )) == NULL ) - { - fprintf( stderr, "Error opening the pdb output file!\n" ); - exit( FILE_NOT_FOUND_ERR ); - } - + buffer = (char*) smalloc( sizeof(char) * buffer_req, "geo:buffer" ); + pdb = NULL; + line[0] = 0; + buffer[0] = 0; /* Writing Box information */ - /* Write full volume tensor from the angles (as soon as possible) TODO_SOON */ - gamma = acos( (system->box.box[0][0] * system->box.box[1][0] + + gamma = ACOS( (system->box.box[0][0] * system->box.box[1][0] + system->box.box[0][1] * system->box.box[1][1] + system->box.box[0][2] * system->box.box[1][2]) / - (system->box.box_norms[0] * system->box.box_norms[1])); - beta = acos( (system->box.box[0][0] * system->box.box[2][0] + + (system->box.box_norms[0] * system->box.box_norms[1]) ); + beta = ACOS( (system->box.box[0][0] * system->box.box[2][0] + system->box.box[0][1] * system->box.box[2][1] + system->box.box[0][2] * system->box.box[2][2]) / - (system->box.box_norms[0] * system->box.box_norms[2])); - alpha = acos( (system->box.box[2][0] * system->box.box[1][0] + + (system->box.box_norms[0] * system->box.box_norms[2]) ); + alpha = ACOS( (system->box.box[2][0] * system->box.box[1][0] + system->box.box[2][1] * system->box.box[1][1] + system->box.box[2][2] * system->box.box[1][2]) / - (system->box.box_norms[2] * system->box.box_norms[1])); - - fprintf(out_control->pdb, PDB_CRYST1_FORMAT_O, - "CRYST1", - system->box.box_norms[0], - system->box.box_norms[1], - system->box.box_norms[2], - RAD2DEG(alpha), - RAD2DEG(beta), - RAD2DEG(gamma), - " ", - 0); + (system->box.box_norms[2] * system->box.box_norms[1]) ); + + /*open pdb and write header*/ + sprintf(fname, "%s-%d.pdb", control->sim_name, data->step); + pdb = fopen(fname, "w"); + fprintf( pdb, PDB_CRYST1_FORMAT_O, + "CRYST1", + system->box.box_norms[0], system->box.box_norms[1], + system->box.box_norms[2], + RAD2DEG(alpha), RAD2DEG(beta), RAD2DEG(gamma), " ", 0 ); fprintf( out_control->log, "Box written\n" ); fflush( out_control->log ); - /* Writing atom information */ - for (i = 0; i < system->N; i++) + /*write atom lines to buffer*/ + for ( i = 0; i < system->N; i++) { - strncpy( name, system->reaxprm.sbp[system->atoms[i].type].name, 2 ); - name[2] = '\0'; - fprintf( out_control->pdb, PDB_ATOM_FORMAT_O, - "ATOM ", - workspace->orig_id[i], - name, - ' ', - "REX", - ' ', - 1, - ' ', - system->atoms[i].x[0], - system->atoms[i].x[1], - system->atoms[i].x[2], - 1.0, - 0.0, - "0", - name, - " " ); + p_atom = &(system->atoms[i]); + strncpy(name, p_atom->name, 8); + Trim_Spaces(name); + sprintf( line, PDB_ATOM_FORMAT_O, + "ATOM ", workspace->orig_id[i], p_atom->name, ' ', "REX", ' ', 1, ' ', + p_atom->x[0], p_atom->x[1], p_atom->x[2], + 1.0, 0.0, "0", name, " " ); + fprintf( stderr, "PDB NAME <%s>\n", p_atom->name ); + strncpy( buffer + i * PDB_ATOM_FORMAT_O_LENGTH, line, + PDB_ATOM_FORMAT_O_LENGTH ); } - fprintf( out_control->log, "ATOM written\n" ); - fflush( out_control->log ); + buffer_len = system->N * PDB_ATOM_FORMAT_O_LENGTH; + buffer[buffer_len] = 0; - /* Writing connect information */ - for (i = 0; i < system->N; i++) - { - count = 0; + fprintf( pdb, "%s", buffer ); + fclose( pdb ); - for (j = Start_Index(i, bonds); j < End_Index(i, bonds); ++j) - { - bo = bonds->select.bond_list[j].bo_data.BO; - if (bo > 0.3) - { - connect[count] = workspace->orig_id[bonds->select.bond_list[j].nbr]; - count++; - } + /* Writing connect information */ + /* + for(i=0; i < system->N; i++) { + count = 0; + for(j = Start_Index(i, bonds); j < End_Index(i, bonds); ++j) { + bo = bonds->bond_list[j].bo_data.BO; + if (bo > 0.3) { + connect[count] = bonds->bond_list[j].nbr+1; + count++; } + } - fprintf( out_control->pdb, "%6s%6d", "CONECT", workspace->orig_id[i] ); - for ( k = 0; k < count; k++ ) - fprintf( out_control->pdb, "%6d", connect[k] ); - fprintf( out_control->pdb, "\n" ); + fprintf( out_control->pdb, "%6s%5d", "CONECT", i+1 ); + for( k=0; k < count; k++ ) + fprintf( out_control->pdb, "%5d", connect[k] ); + fprintf( out_control->pdb, "\n" ); } + */ - fprintf( out_control->pdb, "END\n" ); - - fclose( out_control->pdb ); + free(buffer); + free(line); - return 1; + return SUCCESS; } @@ -436,67 +560,70 @@ char Read_BGF( char* bgf_file, reax_system* system, control_params *control, if ( (bgf = fopen( bgf_file, "r" )) == NULL ) { fprintf( stderr, "Error opening the bgf file!\n" ); - exit( FILE_NOT_FOUND_ERR ); + exit( FILE_NOT_FOUND ); } - /* allocate memory for tokenizing biograf file lines */ line = (char*) malloc( sizeof(char) * MAX_LINE ); backup = (char*) malloc( sizeof(char) * MAX_LINE ); tokens = (char**) malloc( sizeof(char*) * MAX_TOKENS ); for ( i = 0; i < MAX_TOKENS; i++ ) + { tokens[i] = (char*) malloc( sizeof(char) * MAX_TOKEN_LEN ); - + } /* count number of atoms in the pdb file */ system->N = 0; - while ( !feof( bgf ) ) - { - line[0] = 0; - fgets( line, MAX_LINE, bgf ); + line[0] = 0; + while ( fgets( line, MAX_LINE, bgf ) ) + { tokens[0][0] = 0; token_cnt = Tokenize( line, &tokens ); if ( !strcmp( tokens[0], "ATOM" ) || !strcmp( tokens[0], "HETATM" ) ) (system->N)++; + + line[0] = 0; } - //fprintf( stderr, "system->N: %d\n", system->N ); + if ( ferror ( bgf ) ) + { + return FAILURE; + } +#if defined(DEBUG_FOCUS) + fprintf( stderr, "system->N: %d\n", system->N ); +#endif fclose( bgf ); - /* memory allocations for atoms, atom maps, bond restrictions */ system->atoms = (reax_atom*) calloc( system->N, sizeof(reax_atom) ); workspace->map_serials = (int*) calloc( MAX_ATOM_ID, sizeof(int) ); for ( i = 0; i < MAX_ATOM_ID; ++i ) + { workspace->map_serials[i] = -1; + } workspace->orig_id = (int*) calloc( system->N, sizeof(int) ); workspace->restricted = (int*) calloc( system->N, sizeof(int) ); workspace->restricted_list = (int**) calloc( system->N, sizeof(int*) ); for ( i = 0; i < system->N; ++i ) + { workspace->restricted_list[i] = (int*) calloc( MAX_RESTRICT, sizeof(int) ); - + } /* start reading and processing bgf file */ if ( (bgf = fopen( bgf_file, "r" )) == NULL ) { fprintf( stderr, "Error opening the bgf file!\n" ); - exit( FILE_NOT_FOUND_ERR ); + exit( FILE_NOT_FOUND ); } atom_cnt = 0; token_cnt = 0; - while ( !feof( bgf ) ) + while ( fgets( line, MAX_LINE, bgf ) ) { - /* clear previous input line */ - line[0] = 0; - for ( i = 0; i < token_cnt; ++i ) - tokens[i][0] = 0; - /* read new line and tokenize it */ - fgets( line, MAX_LINE, bgf ); strncpy( backup, line, MAX_LINE - 1 ); token_cnt = Tokenize( line, &tokens ); @@ -562,7 +689,6 @@ char Read_BGF( char* bgf_file, reax_system* system, control_params *control, charge[8] = 0; } - /* add to mapping */ bgf_serial = strtod( &serial[0], &endptr ); Check_Input_Range( bgf_serial, 0, MAX_ATOM_ID, "Invalid bgf serial" ); @@ -570,13 +696,11 @@ char Read_BGF( char* bgf_file, reax_system* system, control_params *control, workspace->orig_id[ atom_cnt ] = bgf_serial; // fprintf( stderr, "map %d --> %d\n", bgf_serial, atom_cnt ); - /* copy atomic positions */ system->atoms[atom_cnt].x[0] = strtod( &s_x[0], &endptr ); system->atoms[atom_cnt].x[1] = strtod( &s_y[0], &endptr ); system->atoms[atom_cnt].x[2] = strtod( &s_z[0], &endptr ); - /* atom name and type */ strcpy( system->atoms[atom_cnt].name, atom_name ); Trim_Spaces( element ); @@ -605,7 +729,7 @@ char Read_BGF( char* bgf_file, reax_system* system, control_params *control, &s_gamma[0] ); /* Compute full volume tensor from the angles */ - Init_Box_From_CRYST( atof(s_a), atof(s_b), atof(s_c), + Setup_Box( atof(s_a), atof(s_b), atof(s_c), atof(s_alpha), atof(s_beta), atof(s_gamma), &(system->box) ); } @@ -617,19 +741,37 @@ char Read_BGF( char* bgf_file, reax_system* system, control_params *control, /* read bond restrictions */ if ( is_Valid_Serial( workspace, bgf_serial = atoi(tokens[1]) ) ) + { ratom = workspace->map_serials[ bgf_serial ]; + } workspace->restricted[ ratom ] = token_cnt - 2; for ( i = 2; i < token_cnt; ++i ) + { if ( is_Valid_Serial( workspace, bgf_serial = atoi(tokens[i]) ) ) + { workspace->restricted_list[ ratom ][ i - 2 ] = workspace->map_serials[ bgf_serial ]; + } + } /* fprintf( stderr, "restriction on %d:", ratom ); for( i = 0; i < workspace->restricted[ ratom ]; ++i ) fprintf( stderr, " %d", workspace->restricted_list[ratom][i] ); fprintf( stderr, "\n" ); */ } + + /* clear previous input line */ + line[0] = 0; + + for ( i = 0; i < token_cnt; ++i ) + { + tokens[i][0] = 0; + } + } + if ( ferror ( bgf ) ) + { + return FAILURE; } fclose( bgf ); @@ -638,5 +780,5 @@ char Read_BGF( char* bgf_file, reax_system* system, control_params *control, fprintf( stderr, "bgf file read\n" ); #endif - return 1; + return SUCCESS; } diff --git a/sPuReMD/src/pdb_tools.h b/sPuReMD/src/geo_tools.h similarity index 87% rename from sPuReMD/src/pdb_tools.h rename to sPuReMD/src/geo_tools.h index c3afab9f601a666f54b00c99c80502bbecb64c41..4c44e3081e6105947b610916e95210e123b4a7d9 100644 --- a/sPuReMD/src/pdb_tools.h +++ b/sPuReMD/src/geo_tools.h @@ -19,13 +19,20 @@ <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------*/ -#ifndef __PDB_TOOLS_H_ -#define __PDB_TOOLS_H_ +#ifndef __GEO_TOOLS_H_ +#define __GEO_TOOLS_H_ #include "mytypes.h" -/* -PDB format : +// CUSTOM_BOXGEO: BOXGEO box_x box_y box_z angle1 angle2 angle3 +#define CUSTOM_BOXGEO_FORMAT " %s %lf %lf %lf %lf %lf %lf" +// CUSTOM ATOM: serial element name x y z +#define CUSTOM_ATOM_FORMAT " %d %s %s %lf %lf %lf" + +char Read_Geo( char*, reax_system*, control_params*, + simulation_data*, static_storage* ); + +/* PDB format : http://www.rcsb.org/pdb/file_formats/pdb/pdbguide2.2/guide2.2_frame.html #define PDB_ATOM_FORMAT "%6s%5d%4s%c%4s%c%4d%c%8s%8s%8s%6s%6s%4s%2s%2s\n" @@ -95,24 +102,28 @@ COLUMNS DATA TYPE FIELD DEFINITION 67 - 70 Integer z Z value */ -//#define PDB_ATOM_FORMAT "ATOM %4d%4s%c%3s%c%4d%c%8.3f%8.3f%8.3f%6.2f%6.2f%-4s%2s%2s\n" +//#define PDB_ATOM_FORMAT +//"ATOM %4d%4s%c%3s%c%4d%c%8.3f%8.3f%8.3f%6.2f%6.2f%-4s%2s%2s\n" #define PDB_ATOM_FORMAT "%6s%5d%4s%c%4s%c%4d%c%8s%8s%8s%6s%6s%4s%2s%2s\n" +#define PDB_ATOM_FORMAT_LENGTH 71 #define PDB_HETATM_FORMAT "%6s%5d%4s%c%4s%c%4d%c%8s%8s%8s%6s%6s%2s%2s\n" #define PDB_CONECT_FORMAT "%6s%5d%5d%5d%5d%5d\n" #define PDB_CRYST1_FORMAT "%6s%9s%9s%9s%7s%7s%7s%11s%4s\n" #define PDB_ATOM_FORMAT_O "%6s%5d %4s%c%3s %c%4d%c %8.3f%8.3f%8.3f%6.2f%6.2f %-4s%2s%2s\n" +#define PDB_ATOM_FORMAT_O_LENGTH 81 #define PDB_CRYST1_FORMAT_O "%6s%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f%11s%4d\n" #define BGF_CRYSTX_FORMAT "%8s%11s%11s%11s%11s%11s%11s" char Read_PDB( char*, reax_system*, control_params*, - simulation_data*, static_storage* ); + simulation_data*, static_storage* ); + char Read_BGF( char*, reax_system*, control_params*, - simulation_data*, static_storage* ); + simulation_data*, static_storage* ); -char Write_PDB( reax_system*, control_params*, simulation_data*, - static_storage*, list*, output_controls* ); +char Write_PDB( reax_system*, list*, simulation_data*, + control_params*, static_storage*, output_controls* ); #endif diff --git a/sPuReMD/src/init_md.c b/sPuReMD/src/init_md.c index b6b5997db4000d3d67155189090b4be74e9318f2..92a625e6c7882dd2e032524edbedfaa3ce201d05 100644 --- a/sPuReMD/src/init_md.c +++ b/sPuReMD/src/init_md.c @@ -217,6 +217,47 @@ void Init_Simulation_Data( reax_system *system, control_params *control, } +/* Initialize Taper params */ +void Init_Taper( control_params *control ) +{ + real d1, d7; + real swa, swa2, swa3; + real swb, swb2, swb3; + + swa = control->r_low; + swb = control->r_cut; + + if ( fabs( swa ) > 0.01 ) + fprintf( stderr, "Warning: non-zero value for lower Taper-radius cutoff\n" ); + + if ( swb < 0 ) + { + fprintf( stderr, "Negative value for upper Taper-radius cutoff\n" ); + exit( INVALID_INPUT ); + } + else if ( swb < 5 ) + fprintf( stderr, "Warning: low value for upper Taper-radius cutoff:%f\n", + swb ); + + d1 = swb - swa; + d7 = POW( d1, 7.0 ); + swa2 = SQR( swa ); + swa3 = CUBE( swa ); + swb2 = SQR( swb ); + swb3 = CUBE( swb ); + + control->Tap7 = 20.0 / d7; + control->Tap6 = -70.0 * (swa + swb) / d7; + control->Tap5 = 84.0 * (swa2 + 3.0 * swa * swb + swb2) / d7; + control->Tap4 = -35.0 * (swa3 + 9.0 * swa2 * swb + 9.0 * swa * swb2 + swb3 ) / d7; + control->Tap3 = 140.0 * (swa3 * swb + 3.0 * swa2 * swb2 + swa * swb3 ) / d7; + control->Tap2 = -210.0 * (swa3 * swb2 + swa2 * swb3) / d7; + control->Tap1 = 140.0 * swa3 * swb3 / d7; + control->Tap0 = (-35.0 * swa3 * swb2 * swb2 + 21.0 * swa2 * swb3 * swb2 + + 7.0 * swa * swb3 * swb3 + swb3 * swb3 * swb ) / d7; +} + + void Init_Workspace( reax_system *system, control_params *control, static_storage *workspace ) { @@ -347,6 +388,9 @@ void Init_Workspace( reax_system *system, control_params *control, workspace->realloc.gcell_atoms = -1; Reset_Workspace( system, workspace ); + + /* Initialize Taper function */ + Init_Taper( control ); } @@ -361,7 +405,7 @@ void Init_Lists( reax_system *system, control_params *control, if ( !Make_List(system->N, num_nbrs, TYP_FAR_NEIGHBOR, (*lists) + FAR_NBRS) ) { fprintf(stderr, "Problem in initializing far nbrs list. Terminating!\n"); - exit( INIT_ERR ); + exit( CANNOT_INITIALIZE ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "memory allocated: far_nbrs = %ldMB\n", @@ -424,7 +468,7 @@ void Init_Lists( reax_system *system, control_params *control, if (!Make_List(num_bonds, num_3body, TYP_THREE_BODY, (*lists) + THREE_BODIES)) { fprintf( stderr, "Problem in initializing angles list. Terminating!\n" ); - exit( INIT_ERR ); + exit( CANNOT_INITIALIZE ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "estimated storage - num_3body: %d\n", num_3body ); @@ -435,13 +479,13 @@ void Init_Lists( reax_system *system, control_params *control, if (!Make_List( system->N, num_bonds * 8, TYP_DDELTA, (*lists) + DDELTA )) { fprintf( stderr, "Problem in initializing dDelta list. Terminating!\n" ); - exit( INIT_ERR ); + exit( CANNOT_INITIALIZE ); } if ( !Make_List( num_bonds, num_bonds * MAX_BONDS * 3, TYP_DBO, (*lists) + DBO ) ) { fprintf( stderr, "Problem in initializing dBO list. Terminating!\n" ); - exit( INIT_ERR ); + exit( CANNOT_INITIALIZE ); } #endif @@ -670,7 +714,7 @@ void Init_Out_Controls(reax_system *system, control_params *control, out_control->pdb == NULL ) { fprintf( stderr, "FILE OPEN ERROR. TERMINATING..." ); - exit( CANNOT_OPEN_OUTFILE ); + exit( CANNOT_OPEN_FILE ); }*/ } diff --git a/sPuReMD/src/mytypes.h b/sPuReMD/src/mytypes.h index 9d5136efec2dfee42cfc8a0ecf9c6f02b869ccd8..3202c622bb842356ca453284573da12887234391 100644 --- a/sPuReMD/src/mytypes.h +++ b/sPuReMD/src/mytypes.h @@ -46,6 +46,11 @@ //#define REORDER_ATOMS // turns on nbrgen opt by re-ordering atoms //#define LGJ +#define SUCCESS 1 +#define FAILURE 0 +#define TRUE 1 +#define FALSE 0 + #define EXP exp #define SQRT sqrt #define POW pow @@ -53,6 +58,8 @@ #define COS cos #define SIN sin #define TAN tan +#define FABS fabs +#define FMOD fmod #define SQR(x) ((x)*(x)) #define CUBE(x) ((x)*(x)*(x)) @@ -61,6 +68,15 @@ #define MAX( x, y ) (((x) > (y)) ? (x) : (y)) #define MIN( x, y ) (((x) < (y)) ? (x) : (y)) +/* NaN IEEE 754 representation for C99 in math.h + * Note: function choice must match REAL typedef below */ +#ifdef NAN +#define IS_NAN_REAL(a) (isnan(a)) +#else +#warn "No support for NaN" +#define NAN_REAL(a) (0) +#endif + #define PI 3.14159265 #define C_ele 332.06371 //#define K_B 503.398008 // kcal/mol/K @@ -77,11 +93,16 @@ #define AVOGNR 6.0221367e23 #define P_CONV 1.0e-24 * AVOGNR * JOULES_to_CAL -#define MAX_STR 100 // MAX STRing length (used for naming) +#define MAX_STR 1024 +#define MAX_LINE 1024 +#define MAX_TOKENS 1024 +#define MAX_TOKEN_LEN 1024 + #define MAX_ATOM_ID 100000 #define MAX_RESTRICT 15 #define MAX_MOLECULE_SIZE 20 #define MAX_ATOM_TYPES 25 + #define MAX_GRID 50 #define MAX_3BODY_PARAM 5 #define MAX_4BODY_PARAM 5 @@ -95,24 +116,6 @@ #define MAX_ITR 10 #define RESTART 50 -#define FILE_NOT_FOUND_ERR 10 -#define UNKNOWN_ATOM_TYPE_ERR 11 -#define CANNOT_OPEN_OUTFILE 12 -#define INIT_ERR 13 -#define INSUFFICIENT_SPACE 14 -#define UNKNOWN_OPTION 15 -#define INVALID_INPUT 16 -#define NUMERIC_BREAKDOWN 17 - -#define C_ATOM 0 -#define H_ATOM 1 -#define O_ATOM 2 -#define N_ATOM 3 -#define S_ATOM 4 -#define SI_ATOM 5 -#define GE_ATOM 6 -#define X_ATOM 7 - #define ZERO 0.000000000000000e+00 #define ALMOST_ZERO 1e-10 #define NEG_INF -1e10 @@ -134,35 +137,71 @@ typedef int ivec[3]; typedef real rtensor[3][3]; /* config params */ -enum ensemble { - NVE = 0, NVT = 1, NPT = 2, sNPT = 3, iNPT = 4, ensNR = 5, bNVT = 6 +enum ensemble +{ + NVE = 0, NVT = 1, NPT = 2, sNPT = 3, iNPT = 4, ensNR = 5, bNVT = 6, }; -enum interaction_list_offets { - FAR_NBRS = 0, NEAR_NBRS = 1, THREE_BODIES = 2, BONDS = 3, OLD_BONDS = 4, - HBONDS = 5, DBO = 6, DDELTA = 7, LIST_N = 8 + +enum interaction_list_offets +{ + FAR_NBRS = 0, NEAR_NBRS = 1, THREE_BODIES = 2, BONDS = 3, OLD_BONDS = 4, + HBONDS = 5, DBO = 6, DDELTA = 7, LIST_N = 8, }; -enum interaction_type { - TYP_VOID = 0, TYP_THREE_BODY = 1, TYP_BOND = 2, TYP_HBOND = 3, TYP_DBO = 4, - TYP_DDELTA = 5, TYP_FAR_NEIGHBOR = 6, TYP_NEAR_NEIGHBOR = 7, TYP_N = 8 + +enum interaction_type +{ + TYP_VOID = 0, TYP_THREE_BODY = 1, TYP_BOND = 2, TYP_HBOND = 3, TYP_DBO = 4, + TYP_DDELTA = 5, TYP_FAR_NEIGHBOR = 6, TYP_NEAR_NEIGHBOR = 7, TYP_N = 8, }; -enum molecule_type { - UNKNOWN = 0, WATER = 1 + +enum errors +{ + FILE_NOT_FOUND = -10, UNKNOWN_ATOM_TYPE = -11, + CANNOT_OPEN_FILE = -12, CANNOT_INITIALIZE = -13, + INSUFFICIENT_MEMORY = -14, UNKNOWN_OPTION = -15, + INVALID_INPUT = -16, INVALID_GEO = -17, + NUMERIC_BREAKDOWN = -18, }; -enum molecular_analysis_type { - NO_ANALYSIS = 0, FRAGMENTS = 1, REACTIONS = 2, NUM_ANALYSIS = 3 + +enum atoms +{ + C_ATOM = 0, H_ATOM = 1, O_ATOM = 2, N_ATOM = 3, + S_ATOM = 4, SI_ATOM = 5, GE_ATOM = 6, X_ATOM = 7, }; -enum restart_format { - WRITE_ASCII = 0, WRITE_BINARY = 1, RF_N = 2 + +enum molecule_type +{ + UNKNOWN = 0, WATER = 1, }; -enum geometry { - XYZ = 0, PDB = 1, BGF = 2, ASCII_RESTART = 3, BINARY_RESTART = 4, GF_N = 5 + +enum molecular_analysis_type +{ + NO_ANALYSIS = 0, FRAGMENTS = 1, REACTIONS = 2, NUM_ANALYSIS = 3, }; -enum solver { - GMRES_S = 0, GMRES_H_S = 1, PGMRES_S = 2, - PGMRES_J_S = 3, CG_S = 4, PCG_S = 5, SDM_S = 6, + +enum restart_format +{ + WRITE_ASCII = 0, WRITE_BINARY = 1, RF_N = 2, }; -enum pre_comp { - DIAG_PC = 0, ICHOLT_PC = 1, ICHOL_PAR_PC = 2, ILU_SUPERLU_MT_PC = 3, + +enum geo_formats +{ + CUSTOM = 0, PDB = 1, BGF = 2, ASCII_RESTART = 3, BINARY_RESTART = 4, GF_N = 5, +}; + +enum solver +{ + GMRES_S = 0, GMRES_H_S = 1, CG_S = 2, SDM_S = 3, +}; + +enum pre_comp +{ + DIAG_PC = 0, ICHOLT_PC = 1, ILU_PAR_PC = 2, ILUT_PAR_PC = 3, ILU_SUPERLU_MT_PC = 4, +}; + +enum pre_app +{ + NONE_PA = 0, TRI_SOLVE_PA = 1, TRI_SOLVE_LEVEL_SCHED_PA = 2, JACOBI_ITER_PA = 3, }; @@ -357,10 +396,14 @@ typedef struct typedef struct { - rvec x, v, f; /* Position, velocity, force on atom */ - real q; /* Charge on the atom */ int type; /* Type of this atom */ - char name[5]; + char name[8]; + + rvec x; // position + rvec v; // velocity + rvec f; // force + + real q; /* Charge on the atom */ } reax_atom; @@ -472,6 +515,7 @@ typedef struct unsigned int pre_comp_refactor; real pre_comp_droptol; unsigned int pre_comp_sweeps; + unsigned int pre_app_type; unsigned int pre_app_jacobi_iters; int molec_anal; @@ -669,21 +713,6 @@ typedef struct } bond_data; -/* compressed row storage (crs) format - * j: column index for corresponding matrix entry - * val: matrix entry - * */ -// TODO: move pointers into below struct (question about addressing performance) -// Consider: __attribute__((packed)) or similar in structure definition -// to avoid having the compiler add padding bytes (manual alignment along word boundary) -// Warning: padding is implementation (compiler) dependent, so not portable, -// and also potentially dangerous -typedef struct -{ - int j; - real val; -} sparse_matrix_entry; - /* compressed row storage (crs) format * See, e.g., * http://netlib.org/linalg/html_templates/node91.html#SECTION00931100000000000000 @@ -691,15 +720,15 @@ typedef struct * m: number of nonzeros (NNZ) ALLOCATED * n: number of rows * start: row pointer (last element contains ACTUAL NNZ) - * entries: (column index, val) pairs + * j: column index for corresponding matrix entry + * val: matrix entry * */ typedef struct { int n, m; int *start; - sparse_matrix_entry *entries; -// int *j; -// real *val; + int *j; + real *val; } sparse_matrix; diff --git a/sPuReMD/src/neighbors.c b/sPuReMD/src/neighbors.c index 9b04cfa8c4e6f1d1886adc09a4fe68d73a060a45..8d225de100c9f777c058c9f68cc87a02aff6caf2 100644 --- a/sPuReMD/src/neighbors.c +++ b/sPuReMD/src/neighbors.c @@ -134,7 +134,7 @@ void Generate_Neighbor_Lists( reax_system *system, control_params *control, { fprintf( stderr, "step%d-ran out of space on far_nbrs: top=%d, max=%d", data->step, num_far, far_nbrs->num_intrs ); - exit( INSUFFICIENT_SPACE ); + exit( INSUFFICIENT_MEMORY ); } } diff --git a/sPuReMD/src/print_utils.c b/sPuReMD/src/print_utils.c index 710ee56836a7f781deccd15dcdd8e92602c0c6fb..7ba057f8937f957b01998b11a1e0cb7072dab964 100644 --- a/sPuReMD/src/print_utils.c +++ b/sPuReMD/src/print_utils.c @@ -21,7 +21,7 @@ #include "print_utils.h" #include "list.h" -#include "pdb_tools.h" +#include "geo_tools.h" #include "system_props.h" #include "vector.h" @@ -375,18 +375,6 @@ void Init_Force_Test_Functions( ) #endif -char *Get_Element( reax_system *system, int i ) -{ - return &( system->reaxprm.sbp[system->atoms[i].type].name[0] ); -} - - -char *Get_Atom_Name( reax_system *system, int i ) -{ - return &(system->atoms[i].name[0]); -} - - /* near nbrs contain both i-j, j-i nbrhood info */ void Print_Near_Neighbors( reax_system *system, control_params *control, static_storage *workspace, list **lists ) @@ -690,7 +678,7 @@ void Output_Results( reax_system *system, control_params *control, out_control->append_traj_frame( system, control, data, workspace, lists, out_control ); - //Write_PDB( system, control, data, workspace, *lists+BONDS, out_control ); + //Write_PDB( system, *lists+BONDS, data, control, workspace, out_control ); // t_elapsed = Get_Timing_Info( t_start ); // fprintf(stdout, "append_frame took %.6f seconds\n", t_elapsed ); } @@ -736,16 +724,16 @@ void Print_Linear_System( reax_system *system, control_params *control, for ( j = H->start[i]; j < H->start[i + 1] - 1; ++j ) { fprintf( out, "%6d%6d %24.15e\n", - workspace->orig_id[i], workspace->orig_id[H->entries[j].j], - H->entries[j].val ); + workspace->orig_id[i], workspace->orig_id[H->j[j]], + H->val[j] ); fprintf( out, "%6d%6d %24.15e\n", - workspace->orig_id[H->entries[j].j], workspace->orig_id[i], - H->entries[j].val ); + workspace->orig_id[H->j[j]], workspace->orig_id[i], + H->val[j] ); } // the diagonal entry fprintf( out, "%6d%6d %24.15e\n", - workspace->orig_id[i], workspace->orig_id[i], H->entries[j].val ); + workspace->orig_id[i], workspace->orig_id[i], H->val[j] ); } fclose( out ); @@ -759,16 +747,16 @@ void Print_Linear_System( reax_system *system, control_params *control, for ( j = H->start[i]; j < H->start[i + 1] - 1; ++j ) { fprintf( out, "%6d%6d %24.15e\n", - workspace->orig_id[i], workspace->orig_id[H->entries[j].j], - H->entries[j].val ); + workspace->orig_id[i], workspace->orig_id[H->j[j]], + H->val[j] ); fprintf( out, "%6d%6d %24.15e\n", - workspace->orig_id[H->entries[j].j], workspace->orig_id[i], - H->entries[j].val ); + workspace->orig_id[H->j[j]], workspace->orig_id[i], + H->val[j] ); } // the diagonal entry fprintf( out, "%6d%6d %24.15e\n", - workspace->orig_id[i], workspace->orig_id[i], H->entries[j].val ); + workspace->orig_id[i], workspace->orig_id[i], H->val[j] ); } fclose( out ); @@ -831,7 +819,7 @@ void Print_Sparse_Matrix( sparse_matrix *A ) { fprintf( stderr, "i:%d j(val):", i ); for ( j = A->start[i]; j < A->start[i + 1]; ++j ) - fprintf( stderr, "%d(%.4f) ", A->entries[j].j, A->entries[j].val ); + fprintf( stderr, "%d(%.4f) ", A->j[j], A->val[j] ); fprintf( stderr, "\n" ); } } @@ -848,7 +836,7 @@ void Print_Sparse_Matrix2( sparse_matrix *A, char *fname ) { //fprintf( f, "%d%d %.15e\n", A->entries[j].j, i, A->entries[j].val ); //Convert 0-based to 1-based (for Matlab) - fprintf( f, "%6d%6d %24.15e\n", i+1, A->entries[j].j+1, A->entries[j].val ); + fprintf( f, "%6d%6d %24.15e\n", i+1, A->j[j]+1, A->val[j] ); } } diff --git a/sPuReMD/src/system_props.c b/sPuReMD/src/system_props.c index 52c1d2713ea0b84f8ea729306ba856645d191d2f..fc93a474cf378f1a382d0ae017cf15a9b23eb17a 100644 --- a/sPuReMD/src/system_props.c +++ b/sPuReMD/src/system_props.c @@ -20,30 +20,10 @@ ----------------------------------------------------------------------*/ #include "system_props.h" -#include "box.h" +#include "tool_box.h" #include "vector.h" -real Get_Time( ) -{ - struct timeval tim; - - gettimeofday(&tim, NULL ); - return ( tim.tv_sec + (tim.tv_usec / 1000000.0) ); -} - - -real Get_Timing_Info( real t_start ) -{ - struct timeval tim; - real t_end; - - gettimeofday(&tim, NULL ); - t_end = tim.tv_sec + (tim.tv_usec / 1000000.0); - return (t_end - t_start); -} - - void Temperature_Control( control_params *control, simulation_data *data, output_controls *out_control ) { diff --git a/sPuReMD/src/testmd.c b/sPuReMD/src/testmd.c index f6e9003c3e81cfec46f2292d35a08b398d549bb4..5328a5bc2f65a4d4d398b08e8864288b3706c705 100644 --- a/sPuReMD/src/testmd.c +++ b/sPuReMD/src/testmd.c @@ -21,24 +21,25 @@ #include "mytypes.h" #include "analyze.h" -#include "box.h" +#include "control.h" +#include "ffield.h" #include "forces.h" #include "init_md.h" -#include "integrate.h" #include "neighbors.h" -#include "param.h" -#include "pdb_tools.h" +#include "geo_tools.h" #include "print_utils.h" #include "reset_utils.h" #include "restart.h" #include "system_props.h" -#include "traj.h" #include "vector.h" -void Post_Evolve( reax_system* system, control_params* control, - simulation_data* data, static_storage* workspace, - list** lists, output_controls *out_control ) +static void Post_Evolve( reax_system * const system, + control_params * const control, + simulation_data * const data, + static_storage * const workspace, + list ** const lists, + output_controls * const out_control ) { int i; rvec diff, cross; @@ -77,22 +78,26 @@ void Post_Evolve( reax_system* system, control_params* control, } -void Read_System( char *geof, char *ff, char *ctrlf, - reax_system *system, control_params *control, - simulation_data *data, static_storage *workspace, - output_controls *out_control ) +void static Read_System( char * const geo_file, + char * const ffield_file, + char * const control_file, + reax_system * const system, + control_params * const control, + simulation_data * const data, + static_storage * const workspace, + output_controls * const out_control ) { FILE *ffield, *ctrl; - if ( (ffield = fopen( ff, "r" )) == NULL ) + if ( (ffield = fopen( ffield_file, "r" )) == NULL ) { fprintf( stderr, "Error opening the ffield file!\n" ); - exit( FILE_NOT_FOUND_ERR ); + exit( FILE_NOT_FOUND ); } - if ( (ctrl = fopen( ctrlf, "r" )) == NULL ) + if ( (ctrl = fopen( control_file, "r" )) == NULL ) { fprintf( stderr, "Error opening the ffield file!\n" ); - exit( FILE_NOT_FOUND_ERR ); + exit( FILE_NOT_FOUND ); } /* ffield file */ @@ -102,34 +107,37 @@ void Read_System( char *geof, char *ff, char *ctrlf, Read_Control_File( ctrl, system, control, out_control ); /* geo file */ - if ( control->geo_format == XYZ ) + if ( control->geo_format == CUSTOM ) { - fprintf( stderr, "xyz input is not implemented yet\n" ); - exit(1); + Read_Geo( geo_file, system, control, data, workspace ); } else if ( control->geo_format == PDB ) - Read_PDB( geof, system, control, data, workspace ); + { + Read_PDB( geo_file, system, control, data, workspace ); + } else if ( control->geo_format == BGF ) - Read_BGF( geof, system, control, data, workspace ); + { + Read_BGF( geo_file, system, control, data, workspace ); + } else if ( control->geo_format == ASCII_RESTART ) { - Read_ASCII_Restart( geof, system, control, data, workspace ); + Read_ASCII_Restart( geo_file, system, control, data, workspace ); control->restart = 1; } else if ( control->geo_format == BINARY_RESTART ) { - Read_Binary_Restart( geof, system, control, data, workspace ); + Read_Binary_Restart( geo_file, system, control, data, workspace ); control->restart = 1; } else { fprintf( stderr, "unknown geo file format. terminating!\n" ); - exit(1); + exit( INVALID_GEO ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "input files have been read...\n" ); - Print_Box_Information( &(system->box), stderr ); + Print_Box( &(system->box), stderr ); #endif } @@ -198,13 +206,12 @@ int main(int argc, char* argv[]) if ( out_control.write_steps > 0 ) { fclose( out_control.trj ); - Write_PDB( &system, &control, &data, &workspace, - &(lists[BONDS]), &out_control ); + Write_PDB( &system, &(lists[BONDS]), &data, &control, &workspace, &out_control ); } data.timing.end = Get_Time( ); data.timing.elapsed = Get_Timing_Info( data.timing.start ); fprintf( out_control.log, "total: %.2f secs\n", data.timing.elapsed ); - return 0; + return SUCCESS; } diff --git a/sPuReMD/src/three_body_interactions.c b/sPuReMD/src/three_body_interactions.c index 9aef73d4692e50b6a75de4897108b43bcd0e9a5b..d12ab80f30a38468e1819c4963e933e21c8adf48 100644 --- a/sPuReMD/src/three_body_interactions.c +++ b/sPuReMD/src/three_body_interactions.c @@ -571,7 +571,7 @@ void Three_Body_Interactions( reax_system *system, control_params *control, { fprintf( stderr, "step%d-ran out of space on angle_list: top=%d, max=%d", data->step, num_thb_intrs, thb_intrs->num_intrs ); - exit( INSUFFICIENT_SPACE ); + exit( INSUFFICIENT_MEMORY ); } } diff --git a/sPuReMD/src/tool_box.c b/sPuReMD/src/tool_box.c new file mode 100644 index 0000000000000000000000000000000000000000..776bb565f9cb21d7f9ef38259f42bc13fca4fb91 --- /dev/null +++ b/sPuReMD/src/tool_box.c @@ -0,0 +1,468 @@ +/*---------------------------------------------------------------------- + SerialReax - Reax Force Field Simulator + + Copyright (2010) Purdue University + Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include "mytypes.h" +#include "tool_box.h" + +#include <ctype.h> + + +/************** taken from box.c **************/ +void Transform( rvec x1, simulation_box *box, char flag, rvec x2 ) +{ + int i, j; + real tmp; + + // printf(">x1: (%lf, %lf, %lf)\n",x1[0],x1[1],x1[2]); + + if (flag > 0) + { + for (i = 0; i < 3; i++) + { + tmp = 0.0; + for (j = 0; j < 3; j++) + tmp += box->trans[i][j] * x1[j]; + x2[i] = tmp; + } + } + else + { + for (i = 0; i < 3; i++) + { + tmp = 0.0; + for (j = 0; j < 3; j++) + tmp += box->trans_inv[i][j] * x1[j]; + x2[i] = tmp; + } + } + // printf(">x2: (%lf, %lf, %lf)\n", x2[0], x2[1], x2[2]); +} + + +void Transform_to_UnitBox( rvec x1, simulation_box *box, char flag, rvec x2 ) +{ + Transform( x1, box, flag, x2 ); + + x2[0] /= box->box_norms[0]; + x2[1] /= box->box_norms[1]; + x2[2] /= box->box_norms[2]; +} + + +/* determine whether point p is inside the box */ +void Fit_to_Periodic_Box( simulation_box *box, rvec *p ) +{ + int i; + + for ( i = 0; i < 3; ++i ) + { + //TODO: verify box boundary coordinates -- assuming orthogonal box pinned at origin + if ( (*p)[i] < 0. ) + { + /* handle lower coords */ + while ( (*p)[i] < 0. ) + (*p)[i] += box->box_norms[i]; + } + else if ( (*p)[i] >= box->box_norms[i] ) + { + /* handle higher coords */ + while ( (*p)[i] >= box->box_norms[i] ) + (*p)[i] -= box->box_norms[i]; + } +// if ( (*p)[i] < box->min[i] ) +// { +// /* handle lower coords */ +// while ( (*p)[i] < box->min[i] ) +// (*p)[i] += box->box_norms[i]; +// } +// else if ( (*p)[i] >= box->max[i] ) +// { +// /* handle higher coords */ +// while ( (*p)[i] >= box->max[i] ) +// (*p)[i] -= box->box_norms[i]; +// } + } +} + + +/* determine the touch point, tp, of a box to + its neighbor denoted by the relative coordinate rl */ +/* +inline void Box_Touch_Point( simulation_box *box, ivec rl, rvec tp ) +{ + int d; + + for ( d = 0; d < 3; ++d ) + if ( rl[d] == -1 ) + tp[d] = box->min[d]; + else if ( rl[d] == 0 ) + tp[d] = NEG_INF - 1.; + else + tp[d] = box->max[d]; +} +*/ + + +/* determine whether point p is inside the box */ +/* assumes orthogonal box */ +/* +inline int is_Inside_Box( simulation_box *box, rvec p ) +{ + if ( p[0] < box->min[0] || p[0] >= box->max[0] || + p[1] < box->min[1] || p[1] >= box->max[1] || + p[2] < box->min[2] || p[2] >= box->max[2] ) + return FALSE; + + return TRUE; +} +*/ + + +/* +inline int iown_midpoint( simulation_box *box, rvec p1, rvec p2 ) +{ + rvec midp; + + midp[0] = (p1[0] + p2[0]) / 2; + midp[1] = (p1[1] + p2[1]) / 2; + midp[2] = (p1[2] + p2[2]) / 2; + + if ( midp[0] < box->min[0] || midp[0] >= box->max[0] || + midp[1] < box->min[1] || midp[1] >= box->max[1] || + midp[2] < box->min[2] || midp[2] >= box->max[2] ) + return FALSE; + + return TRUE; +} +*/ + + +/**************** from grid.c ****************/ +/* finds the closest point of grid cell cj to ci. + no need to consider periodic boundary conditions as in the serial case + because the box of a process is not periodic in itself */ +/* +inline void GridCell_Closest_Point( grid_cell *gci, grid_cell *gcj, + ivec ci, ivec cj, rvec cp ) +{ + int d; + + for ( d = 0; d < 3; d++ ) + if ( cj[d] > ci[d] ) + cp[d] = gcj->min[d]; + else if ( cj[d] == ci[d] ) + cp[d] = NEG_INF - 1.; + else + cp[d] = gcj->max[d]; +} + + +inline void GridCell_to_Box_Points( grid_cell *gc, ivec rl, rvec cp, rvec fp ) +{ + int d; + + for ( d = 0; d < 3; ++d ) + if ( rl[d] == -1 ) + { + cp[d] = gc->min[d]; + fp[d] = gc->max[d]; + } + else if ( rl[d] == 0 ) + { + cp[d] = fp[d] = NEG_INF - 1.; + } + else + { + cp[d] = gc->max[d]; + fp[d] = gc->min[d]; + } +} + + +inline real DistSqr_between_Special_Points( rvec sp1, rvec sp2 ) +{ + int i; + real d_sqr = 0; + + for ( i = 0; i < 3; ++i ) + { + if ( sp1[i] > NEG_INF && sp2[i] > NEG_INF ) + { + d_sqr += SQR( sp1[i] - sp2[i] ); + } + } + + return d_sqr; +} + + +inline real DistSqr_to_Special_Point( rvec cp, rvec x ) +{ + int i; + real d_sqr = 0; + + for ( i = 0; i < 3; ++i ) + { + if ( cp[i] > NEG_INF ) + { + d_sqr += SQR( cp[i] - x[i] ); + } + } + + return d_sqr; +} + + +inline int Relative_Coord_Encoding( ivec c ) +{ + return 9 * (c[0] + 1) + 3 * (c[1] + 1) + (c[2] + 1); +} +*/ + + +/************** from geo_tools.c *****************/ +void Make_Point( real x, real y, real z, rvec* p ) +{ + (*p)[0] = x; + (*p)[1] = y; + (*p)[2] = z; +} + + +int is_Valid_Serial( static_storage *workspace, int serial ) +{ + if( workspace->map_serials[ serial ] < 0 ) + { + fprintf( stderr, "CONECT line includes invalid pdb serial number %d.\n", serial ); + fprintf( stderr, "Please correct the input file.Terminating...\n" ); + exit( INVALID_INPUT ); + } + + return TRUE; +} + + +int Check_Input_Range( int val, int lo, int hi, char *message ) +{ + if ( val < lo || val > hi ) + { + fprintf( stderr, "%s\nInput %d - Out of range %d-%d. Terminating...\n", + message, val, lo, hi ); + exit( INVALID_INPUT ); + } + + return SUCCESS; +} + + +void Trim_Spaces( char *element ) +{ + int i, j; + + for ( i = 0; element[i] == ' '; ++i ); // skip initial space chars + + for ( j = i; j < (int)(strlen(element)) && element[j] != ' '; ++j ) + { + element[j - i] = toupper( element[j] ); // make uppercase, offset to 0 + } + element[j - i] = 0; // finalize the string +} + + +/************ from system_props.c *************/ +real Get_Time( ) +{ + gettimeofday(&tim, NULL ); + return ( tim.tv_sec + (tim.tv_usec / 1000000.0) ); +} + + +real Get_Timing_Info( real t_start ) +{ + gettimeofday(&tim, NULL ); + t_end = tim.tv_sec + (tim.tv_usec / 1000000.0); + return (t_end - t_start); +} + + +void Update_Timing_Info( real *t_start, real *timing ) +{ + gettimeofday(&tim, NULL ); + t_end = tim.tv_sec + (tim.tv_usec / 1000000.0); + *timing += (t_end - *t_start); + *t_start = t_end; +} + + +/*********** from io_tools.c **************/ +int Get_Atom_Type( reax_interaction *reax_param, char *s ) +{ + int i; + + for ( i = 0; i < reax_param->num_atom_types; ++i ) + { + if ( !strcmp( reax_param->sbp[i].name, s ) ) + { + return i; + } + } + + fprintf( stderr, "Unknown atom type %s. Terminating...\n", s ); + exit( UNKNOWN_ATOM_TYPE ); + + return FAILURE; +} + + +char *Get_Element( reax_system *system, int i ) +{ + return &( system->reaxprm.sbp[system->atoms[i].type].name[0] ); +} + + +char *Get_Atom_Name( reax_system *system, int i ) +{ + return &(system->atoms[i].name[0]); +} + + +int Allocate_Tokenizer_Space( char **line, char **backup, char ***tokens ) +{ + int i; + + if ( (*line = (char*) malloc( sizeof(char) * MAX_LINE )) == NULL ) + { + return FAILURE; + } + + if ( (*backup = (char*) malloc( sizeof(char) * MAX_LINE )) == NULL ) + { + return FAILURE; + } + + if ( (*tokens = (char**) malloc( sizeof(char*) * MAX_TOKENS )) == NULL ) + { + return FAILURE; + } + + for ( i = 0; i < MAX_TOKENS; i++ ) + { + if ( ((*tokens)[i] = (char*) malloc(sizeof(char) * MAX_TOKEN_LEN)) == NULL ) + { + return FAILURE; + } + } + + return SUCCESS; +} + + +int Tokenize( char* s, char*** tok ) +{ + char test[MAX_LINE]; + char *sep = "\t \n!="; + char *word; + int count = 0; + + strncpy( test, s, MAX_LINE ); + + for ( word = strtok(test, sep); word; word = strtok(NULL, sep) ) + { + strncpy( (*tok)[count], word, MAX_LINE ); + count++; + } + + return count; +} + + +/***************** taken from lammps ************************/ +/* safe malloc */ +void *smalloc( long n, char *name ) +{ + void *ptr; + + if ( n <= 0 ) + { + fprintf( stderr, "WARNING: trying to allocate %ld bytes for array %s. ", + n, name ); + fprintf( stderr, "returning NULL.\n" ); + return NULL; + } + + ptr = malloc( n ); + if ( ptr == NULL ) + { + fprintf( stderr, "ERROR: failed to allocate %ld bytes for array %s", + n, name ); + exit( INSUFFICIENT_MEMORY ); + } + + return ptr; +} + + +/* safe calloc */ +void *scalloc( int n, int size, char *name ) +{ + void *ptr; + + if ( n <= 0 ) + { + fprintf( stderr, "WARNING: trying to allocate %d elements for array %s. ", + n, name ); + fprintf( stderr, "returning NULL.\n" ); + return NULL; + } + + if ( size <= 0 ) + { + fprintf( stderr, "WARNING: elements size for array %s is %d. ", + name, size ); + fprintf( stderr, "returning NULL.\n" ); + return NULL; + } + + ptr = calloc( n, size ); + if ( ptr == NULL ) + { + fprintf( stderr, "ERROR: failed to allocate %d bytes for array %s", + n * size, name ); + exit( INSUFFICIENT_MEMORY ); + } + + return ptr; +} + + +/* safe free */ +void sfree( void *ptr, char *name ) +{ + if ( ptr == NULL ) + { + fprintf( stderr, "WARNING: trying to free the already NULL pointer %s!\n", + name ); + return; + } + + free( ptr ); + ptr = NULL; +} diff --git a/sPuReMD/src/tool_box.h b/sPuReMD/src/tool_box.h new file mode 100644 index 0000000000000000000000000000000000000000..5712152d9f399fd2c92152a64a2e82d2aacc70dc --- /dev/null +++ b/sPuReMD/src/tool_box.h @@ -0,0 +1,70 @@ +/*---------------------------------------------------------------------- + SerialReax - Reax Force Field Simulator + + Copyright (2010) Purdue University + Hasan Metin Aktulga, haktulga@cs.purdue.edu + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#ifndef __TOOL_BOX_H_ +#define __TOOL_BOX_H_ + +#include "mytypes.h" + +struct timeval tim; +real t_end; + +/* from box.h */ +void Transform( rvec, simulation_box*, char, rvec ); +void Transform_to_UnitBox( rvec, simulation_box*, char, rvec ); +void Fit_to_Periodic_Box( simulation_box*, rvec* ); +//void Box_Touch_Point( simulation_box*, ivec, rvec ); +//int is_Inside_Box( simulation_box*, rvec ); +//int iown_midpoint( simulation_box*, rvec, rvec ); + +/* from grid.h */ +/* +void GridCell_Closest_Point( grid_cell*, grid_cell*, ivec, ivec, rvec ); +void GridCell_to_Box_Points( grid_cell*, ivec, rvec, rvec ); +real DistSqr_between_Special_Points( rvec, rvec ); +real DistSqr_to_Special_Point( rvec, rvec ); +int Relative_Coord_Encoding( ivec ); +*/ + +/* from geo_tools.h */ +void Make_Point( real, real, real, rvec* ); +int is_Valid_Serial( static_storage*, int ); +int Check_Input_Range( int, int, int, char* ); +void Trim_Spaces( char* ); + +/* from system_props.h */ +real Get_Time( ); +real Get_Timing_Info( real ); +void Update_Timing_Info( real*, real* ); + +/* from io_tools.h */ +int Get_Atom_Type( reax_interaction*, char* ); +char *Get_Element( reax_system*, int ); +char *Get_Atom_Name( reax_system*, int ); +int Allocate_Tokenizer_Space( char**, char**, char*** ); +int Tokenize( char*, char*** ); + +/* from lammps */ +void *smalloc( long, char* ); +void *scalloc( int, int, char* ); +void sfree( void*, char* ); + +#endif diff --git a/tools/run_sim.py b/tools/run_sim.py index 39342fdb03a3f71591453199b561fbc9ca9ba7c8..bcea5dbadefcf0484b189109b438c6cef4df679b 100644 --- a/tools/run_sim.py +++ b/tools/run_sim.py @@ -1,5 +1,6 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 +import argparse from fileinput import input from itertools import product from re import sub @@ -12,7 +13,7 @@ from time import time class TestCase(): def __init__(self, geo_file, ffield_file, control_file, params={}, result_header_fmt='', - result_header='', result_body_fmt='', result_file='results.txt'): + result_header='', result_body_fmt='', result_file='results.txt', geo_format='1'): self.__geo_file = geo_file self.__ffield_file = ffield_file self.__control_file = control_file @@ -47,33 +48,35 @@ class TestCase(): r'(?P<key>geo_format\s+)\S+(?P<comment>.*)', r'\g<key>%s\g<comment>' % x, l), \ } + self.__params['geo_format'] = geo_format + def _setup(self, param, temp_file): fp = open(self.__control_file, 'r') - lines = fp.read() - fp.close() + lines = fp.read() + fp.close() for k in self.__control_res.keys(): try: lines = self.__control_res[k](lines, param[k]) except KeyError: pass - fp_temp = open(temp_file, 'w', 0) - fp_temp.write(lines) - fp_temp.close() + fp_temp = open(temp_file, 'w') + fp_temp.write(lines) + fp_temp.close() def run(self, bin_file='sPuReMD/bin/spuremd'): base_dir = getcwd() - bin_path = path.join(base_dir, bin_file) + bin_path = path.join(base_dir, bin_file) env = dict(environ) write_header = True if path.exists(self.__result_file): write_header = False - fout = open(self.__result_file, 'a', 0) + fout = open(self.__result_file, 'a') if write_header: fout.write(self.__result_header_fmt.format(*self.__result_header)) temp_dir = mkdtemp() - temp_file = path.join(temp_dir, path.basename(self.__control_file)) + temp_file = path.join(temp_dir, path.basename(self.__control_file)) for p in product(*[self.__params[k] for k in self.__param_names]): param_dict = dict((k, v) for (k, v) in zip(self.__param_names, p)) @@ -95,7 +98,7 @@ class TestCase(): fout.close() remove(temp_file) - rmdir(temp_dir) + rmdir(temp_dir) def _process_result(self, fout, time, param): qeq = 0. @@ -129,69 +132,122 @@ class TestCase(): if __name__ == '__main__': + DATA = [ \ + 'bilayer_56800', 'bilayer_340800', \ + 'dna_19733', \ + 'petn_48256', \ + 'silica_6000', 'silica_72000', 'silica_300000', \ + 'water_6540', 'water_78480', 'water_327000', \ + ] + + parser = argparse.ArgumentParser(description='Run molecular dynamics simulations on specified data sets.') + parser.add_argument('data', nargs='+', + choices=DATA, help='Data sets for which to run simulations.') + + # parse args and take action + args = parser.parse_args() + base_dir = getcwd() control_dir = path.join(base_dir, 'environ') data_dir = path.join(base_dir, 'data/benchmarks') header_fmt_str = '{:20}|{:5}|{:5}|{:5}|{:10}|{:10}|{:10}|{:10}|{:10}|{:10}|{:10}\n' header_str = ['Data Set', 'Steps', 'Q Tol', 'Pre T', 'Pre Comp', - 'Pre App', 'Iters', 'SpMV', 'QEq', 'Threads', 'Time (s)'] + 'Pre App', 'Iters', 'SpMV', 'QEq', 'Threads', 'Time (s)'] body_fmt_str = '{:20} {:5} {:5} {:5} {:10.6f} {:10.6f} {:10.6f} {:10.6f} {:10.6f} {:10} {:10.6f}\n' params = { 'ensemble_type': ['0'], - 'nsteps': ['20', '100', '500', '1000'], + 'nsteps': ['20'], +# 'nsteps': ['20', '100', '500', '1000'], 'qeq_solver_type': ['0'], - 'qeq_solver_q_err': ['1e-6', '1e-10'], + 'qeq_solver_q_err': ['1e-6'], +# 'qeq_solver_q_err': ['1e-6', '1e-10'], # 'qeq_solver_q_err': ['1e-6', '1e-8', '1e-10', '1e-14'], - 'pre_comp_type': ['0'], + 'pre_comp_type': ['2'], # 'pre_comp_type': ['0', '1', '2'], - 'pre_comp_refactor': ['1'], -# 'pre_comp_sweeps': ['3'], -# 'pre_app_type': ['0'], -# 'pre_app_jacobi_iters': ['50'], - 'threads': ['12'], + 'pre_comp_refactor': ['100'], + 'pre_comp_sweeps': ['3'], + 'pre_app_type': ['2'], + 'pre_app_jacobi_iters': ['50'], + 'threads': ['2'], # 'threads': ['1', '2', '4', '12', '24'], - 'geo_format': ['1'], + 'geo_format': [], } - test_cases = [ + test_cases = [] + if 'water_6540' in args.data: + test_cases.append( TestCase(path.join(data_dir, 'water/water_6540.pdb'), path.join(data_dir, 'water/ffield.water'), path.join(control_dir, 'param.gpu.water'), - params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), -# TestCase(path.join(data_dir, 'water/water_78480.pdb'), -# path.join(data_dir, 'water/ffield.water'), -# path.join(control_dir, 'param.gpu.water'), -# params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), -# TestCase(path.join(data_dir, 'water/water_327000.geo'), -# path.join(data_dir, 'water/ffield.water'), -# path.join(control_dir, 'param.gpu.water'), -# params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['1'])) + if 'water_78480' in args.data: + test_cases.append( + TestCase(path.join(data_dir, 'water/water_78480.pdb'), + path.join(data_dir, 'water/ffield.water'), + path.join(control_dir, 'param.gpu.water'), + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['0'])) + if 'water_327000' in args.data: + test_cases.append( + TestCase(path.join(data_dir, 'water/water_327000.geo'), + path.join(data_dir, 'water/ffield.water'), + path.join(control_dir, 'param.gpu.water'), + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['0'])) + if 'bilayer_56800' in args.data: + test_cases.append( TestCase(path.join(data_dir, 'bilayer/bilayer_56800.pdb'), path.join(data_dir, 'bilayer/ffield-bio'), path.join(control_dir, 'param.gpu.water'), - params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['1'])) + if 'dna_19733' in args.data: + test_cases.append( TestCase(path.join(data_dir, 'dna/dna_19733.pdb'), path.join(data_dir, 'dna/ffield-dna'), path.join(control_dir, 'param.gpu.water'), - params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['1'])) + if 'silica_6000' in args.data: + test_cases.append( TestCase(path.join(data_dir, 'silica/silica_6000.pdb'), path.join(data_dir, 'silica/ffield-bio'), path.join(control_dir, 'param.gpu.water'), - params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), -# TestCase(path.join(data_dir, 'silica/silica_72000.geo'), -# path.join(data_dir, 'silica/ffield-bio'), -# path.join(control_dir, 'param.gpu.water'), -# params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), -# TestCase(path.join(data_dir, 'silica/silica_300000.geo'), -# path.join(data_dir, 'silica/ffield-bio'), -# path.join(control_dir, 'param.gpu.water'), -# params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['1'])) + if 'silica_72000' in args.data: + test_cases.append( + TestCase(path.join(data_dir, 'silica/silica_72000.geo'), + path.join(data_dir, 'silica/ffield-bio'), + path.join(control_dir, 'param.gpu.water'), + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['0'])) + if 'silica_300000' in args.data: + test_cases.append( + TestCase(path.join(data_dir, 'silica/silica_300000.geo'), + path.join(data_dir, 'silica/ffield-bio'), + path.join(control_dir, 'param.gpu.water'), + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['0'])) + if 'petn_48256' in args.data: + test_cases.append( TestCase(path.join(data_dir, 'petn/petn_48256.pdb'), path.join(data_dir, 'petn/ffield.petn'), path.join(control_dir, 'param.gpu.water'), - params=params, result_header_fmt=header_fmt_str, result_header = header_str, result_body_fmt=body_fmt_str), - ] + params=params, result_header_fmt=header_fmt_str, + result_header = header_str, result_body_fmt=body_fmt_str, + geo_format=['1'])) + for test in test_cases: test.run(bin_file='sPuReMD/bin/spuremd')