diff --git a/PG-PuReMD/src/allocate.c b/PG-PuReMD/src/allocate.c index 88998a8439a76bc3143bb4d1fc4f55f4f7a133b2..1baa4488e2c305d34f18afb6fba550a243c86bff 100644 --- a/PG-PuReMD/src/allocate.c +++ b/PG-PuReMD/src/allocate.c @@ -431,15 +431,19 @@ int Allocate_Matrix( sparse_matrix **pH, int cap, int m ) int Allocate_Matrix( sparse_matrix *H, int cap, int m ) { - //H = (sparse_matrix*) smalloc(sizeof(sparse_matrix), "sparse_matrix"); + + // printf("cap: %d, m: %d \n", H->cap, H->m); + // H = (sparse_matrix*) smalloc(sizeof(sparse_matrix), "sparse_matrix"); H->cap = cap; H->m = m; + H->start = (int*) smalloc(sizeof(int) * cap, "matrix_start"); H->end = (int*) smalloc(sizeof(int) * cap, "matrix_end"); H->entries = (sparse_matrix_entry*) smalloc(sizeof(sparse_matrix_entry) * m, "matrix_entries"); return SUCCESS; + } @@ -961,7 +965,9 @@ void ReAllocate( reax_system *system, control_params *control, (0 && system->numH <= LOOSE_ZONE * system->Hcap) ) { Hflag = 1; - system->Hcap = (int)(system->numH * SAFE_ZONE); + //system->Hcap = (int)(system->numH * SAFE_ZONE); + // Tried fix + system->Hcap = (int)(system->n * SAFE_ZONE); } if ( Hflag || realloc->hbonds ) diff --git a/PG-PuReMD/src/bond_orders.c b/PG-PuReMD/src/bond_orders.c index a8cae10af540348688785e4349d882968a9e38c2..b36ebc4251c5c9397fb087161fa1cb7c5389d888 100644 --- a/PG-PuReMD/src/bond_orders.c +++ b/PG-PuReMD/src/bond_orders.c @@ -843,6 +843,8 @@ void BO( reax_system *system, control_params *control, simulation_data *data, num_bonds = 0; p_boc1 = system->reax_param.gp.l[0]; p_boc2 = system->reax_param.gp.l[1]; + + // printf("Running Bonds \n"); /* Calculate Deltaprime, Deltaprime_boc values */ for ( i = 0; i < system->N; ++i ) @@ -1215,10 +1217,10 @@ void BO( reax_system *system, control_params *control, simulation_data *data, //Print_Bonds( system, bonds, "pbonds.out" ); -#if defined(TEST_ENERGIES) || defined(TEST_FORCES) - fprintf( stderr, "Number of bonds: %d\n", num_bonds ); - Print_Bond_List( system, control, data, lists, out_control); -#endif +//#if defined(TEST_ENERGIES) || defined(TEST_FORCES) + // fprintf( stderr, "Number of bonds: %d\n", num_bonds ); +// Print_Bond_List( system, control, data, lists, out_control); +//#endif } diff --git a/PG-PuReMD/src/bonds.c b/PG-PuReMD/src/bonds.c index cf889b33581ebd829c374af79501171907c0b83c..9c2839eb63e2d722d531914bf6d02136f505d29e 100644 --- a/PG-PuReMD/src/bonds.c +++ b/PG-PuReMD/src/bonds.c @@ -118,11 +118,7 @@ void Bonds( reax_system *system, control_params *control, (sbp_i->mass == 12.0000 && sbp_j->mass == 15.9990) || (sbp_j->mass == 12.0000 && sbp_i->mass == 15.9990) ) { - - //SUDHIR - //TO BE REMOVED - fprintf (stderr, "hit the condition ... \n"); - exit (0); + exphu = EXP( -gp7 * SQR(bo_ij->BO - 2.50) ); exphua1 = EXP(-gp3 * (workspace->total_bond_order[i] - bo_ij->BO)); diff --git a/PG-PuReMD/src/cuda_copy.cu b/PG-PuReMD/src/cuda_copy.cu index a40a5b90b943a55fba76fb7e4910a49ff890a9bb..9a8fed48e249d9c202cd15b4fb1e15278faf44ec 100644 --- a/PG-PuReMD/src/cuda_copy.cu +++ b/PG-PuReMD/src/cuda_copy.cu @@ -105,6 +105,8 @@ void Sync_Atoms (reax_system *sys) fprintf (stderr, "p:%d - Synching atoms: n: %d N: %d, total_cap: %d \n", sys->my_rank, sys->n, sys->N, sys->total_cap); #endif + + //printf("Doing somethign unnecessayr\n"); copy_host_device (sys->my_atoms, sys->d_my_atoms, sizeof (reax_atom) * sys->N, cudaMemcpyHostToDevice, "system:my_atoms"); //TODO //TODO METIN FIX, coredump on his machine diff --git a/PG-PuReMD/src/forces.c b/PG-PuReMD/src/forces.c index 2c0ad9097586a8670bbf7e9b2157f1d938b5f62e..c0fa32826a16437bbd9fb76466123c3f424f914b 100644 --- a/PG-PuReMD/src/forces.c +++ b/PG-PuReMD/src/forces.c @@ -236,6 +236,273 @@ void Cuda_Compute_Total_Force( reax_system *system, control_params *control, #endif + +// Essentially no-cuda copies of cuda kernels, to be used only in the mpi-not-gpu version +//////////////////////// +// HBOND ISSUE +void mpi_not_gpu_update_bonds (reax_atom *my_atoms, + reax_list bonds, + int n) +{ +// int i = blockIdx.x * blockDim.x + threadIdx.x; + // if (i >= n) return; + int i; + for (i=0;i<n;i++){ + my_atoms [i].num_bonds = + MAX(Num_Entries(i, &bonds) * 2, MIN_BONDS); + } +} + + +void mpi_not_gpu_update_hbonds (reax_atom *my_atoms, + reax_list hbonds, + int n) +{ + int Hindex; + int i; + //int i = blockIdx.x * blockDim.x + threadIdx.x; + //if (i >= n) return; + for (i=0;i<n;i++){ + Hindex = my_atoms[i].Hindex; + my_atoms [i].num_hbonds = + MAX(Num_Entries(Hindex, &hbonds) * SAFER_ZONE, MIN_HBONDS); + } +} + +// Essentially a copy of cuda_validate_lists, but with all cuda-dependent kernels turned into serial versions +int MPI_Not_GPU_Validate_Lists (reax_system *system, storage *workspace, reax_list **lists, control_params *control, + int step, int n, int N, int numH ) +{ + int blocks; + int i, comp, Hindex; + int *index, *end_index; + reax_list *bonds, *hbonds; + reax_atom *my_atoms; + reallocate_data *realloc; + realloc = &( workspace->realloc); + + int max_sp_entries, num_hbonds, num_bonds; + int total_sp_entries; + + + + + + + + //blocks = system->n / DEF_BLOCK_SIZE + + // ((system->n % DEF_BLOCK_SIZE == 0) ? 0 : 1); + + //ker_update_bonds <<< blocks, DEF_BLOCK_SIZE >>> + // (system->d_my_atoms, *(*lists + BONDS), + // system->n); + //cudaThreadSynchronize (); + //cudaCheckError (); + mpi_not_gpu_update_bonds(system->my_atoms, *(*lists + BONDS),system->n); + + //////////////////////// + // HBOND ISSUE + //FIX - 4 - Added this check for hydrogen bond issue + if ((control->hbond_cut > 0) && (system->numH > 0)){ + //ker_update_hbonds <<< blocks, DEF_BLOCK_SIZE >>> + // (system->d_my_atoms, *(*lists + HBONDS), + // system->n); + //cudaThreadSynchronize (); + //cudaCheckError (); + mpi_not_gpu_update_hbonds(system->my_atoms, *(*lists + HBONDS),system->n); + + } + + //validate sparse matrix entries. + //memset (host_scratch, 0, 2 * system->N * sizeof (int)); + //index = (int *) host_scratch; + //end_index = index + system->N; + + index = workspace->H.start; + end_index = workspace->H.end; + + // immediately set these to host version since there is no device version. + //memcpy(index, workspace->H.start, system->N * sizeof (int)); + //memcpy(end_index, workspace->H.end, system->N * sizeof (int)); + + + + // don't need these, everything is already at host + //copy_host_device (index, dev_workspace->H.start, system->N * sizeof (int), + // cudaMemcpyDeviceToHost, "sparse_matrix:start" ); + //copy_host_device (end_index, dev_workspace->H.end, system->N * sizeof (int), + // cudaMemcpyDeviceToHost, "sparse_matrix:end" ); + max_sp_entries = total_sp_entries = 0; + //printf("max sparsematrix entries: %d \n", system->max_sparse_entries); + for (i = 0; i < n; i++ ){ + //if (i < N-1) + // comp = index [i+1]; + //else + // comp = dev_workspace->H.m; + + total_sp_entries += end_index [i] - index[i]; + if (end_index [i] - index[i] > system->max_sparse_entries) { + fprintf( stderr, "step%d-sparsemat-chk failed: i=%d start(i)=%d end(i)=%d \n", + step, i, index[i], end_index[i] ); + return FAILURE; + } else if (end_index[i] >= workspace->H.m) { + //SUDHIR_FIX_SPARSE_MATRIX + //TODO move this carver + //TODO move this carver + //TODO move this carver + fprintf (stderr, "p:%d - step%d-sparsemat-chk failed (exceed limits): i=%d start(i)=%d end(i)=%d \n", + system->my_rank, step, i, index[i], end_index[i]); + //TODO move this carver + //TODO move this carver + //TODO move this carver + return FAILURE; + } else { + if (max_sp_entries <= end_index[i] - index [i]) + max_sp_entries = end_index[i] - index [i]; + } + } + //if (max_sp_entries <= end_index[i] - index [i]) + // max_sp_entries = end_index[i] - index [i]; + + //update the current step max_sp_entries; + realloc->Htop = max_sp_entries; + fprintf (stderr, "p:%d - MPI-Not-GPU Reallocate: Total H matrix entries: %d, cap: %d, used: %d \n", + system->my_rank, workspace->H.n, workspace->H.m, total_sp_entries); + + if (total_sp_entries >= workspace->H.m) { + fprintf (stderr, "p:%d - **ran out of space for sparse matrix: step: %d, allocated: %d, used: %d \n", + system->my_rank, step, workspace->H.m, total_sp_entries); + + return FAILURE; + } + + + //validate Bond list + if (N > 0) { + num_bonds = 0; + + bonds = *lists + BONDS; + // memset (host_scratch, 0, 2 * bonds->n * sizeof (int)); + + // index = (int *) host_scratch; + // end_index = index + bonds->n; + index = bonds->index; + end_index = bonds->end_index; + + + // memcpy(index, bonds->index, bonds->n * sizeof (int)); + // memcpy(end_index, bonds->end_index, bonds->n * sizeof (int)); +/* + copy_host_device (index, bonds->index, bonds->n * sizeof (int), + cudaMemcpyDeviceToHost, "bonds:index"); + copy_host_device (end_index, bonds->end_index, bonds->n * sizeof (int), + cudaMemcpyDeviceToHost, "bonds:end_index"); +*/ + /* + for (i = 0; i < N; i++) { + if (i < N-1) + comp = index [i+1]; + else + comp = bonds->num_intrs; + + if (end_index [i] > comp) { + fprintf( stderr, "step%d-bondchk failed: i=%d start(i)=%d end(i)=%d str(i+1)=%d\n", + step, i, index[i], end_index[i], comp ); + return FAILURE; + } + + num_bonds += MAX( (end_index[i] - index[i]) * 4, MIN_BONDS); + } + + if (end_index[N-1] >= bonds->num_intrs) { + fprintf( stderr, "step%d-bondchk failed(end): i=N-1 start(i)=%d end(i)=%d num_intrs=%d\n", + step, index[N-1], end_index[N-1], bonds->num_intrs); + return FAILURE; + } + num_bonds = MAX( num_bonds, MIN_CAP*MIN_BONDS ); + //check the condition for reallocation + realloc->num_bonds = num_bonds; + */ + + int max_bonds = 0; + for (i = 0; i < N; i++) { + //printf("i: %d, index[i]: %d, end_index[i]: %d \n", i, index[i], end_index[i]); + if (end_index[i] - index[i] >= system->max_bonds) { + fprintf( stderr, "MPI-Not-GPU step%d-bondchk failed: i=%d start(i)=%d end(i)=%d max_bonds=%d\n", + step, i, index[i], end_index[i], system->max_bonds); + return FAILURE; + } + if (end_index[i] - index[i] >= max_bonds) + max_bonds = end_index[i] - index[i]; + } + realloc->num_bonds = max_bonds; + + } + //printf("hbonds->n : %d \n", hbonds->n); + //validate Hbonds list + num_hbonds = 0; + // FIX - 4 - added additional check here + if ((numH > 0) && (control->hbond_cut > 0)) { + hbonds = *lists + HBONDS; + memset (host_scratch, 0, 2 * hbonds->n * sizeof (int) + sizeof (reax_atom) * system->N); + index = (int *) host_scratch; + end_index = index + hbonds->n; + my_atoms = (reax_atom *)(end_index + hbonds->n); +/* + copy_host_device (index, hbonds->index, hbonds->n * sizeof (int), + cudaMemcpyDeviceToHost, "hbonds:index"); + copy_host_device (end_index, hbonds->end_index, hbonds->n * sizeof (int), + cudaMemcpyDeviceToHost, "hbonds:end_index"); + copy_host_device (my_atoms, system->d_my_atoms, system->N * sizeof (reax_atom), + cudaMemcpyDeviceToHost, "system:d_my_atoms"); +*/ + //fprintf (stderr, " Total local atoms: %d \n", n); + + /* + for (i = 0; i < N-1; i++) { + Hindex = my_atoms [i].Hindex; + if (Hindex > -1) + comp = index [Hindex + 1]; + else + comp = hbonds->num_intrs; + + if (end_index [Hindex] > comp) { + fprintf(stderr,"step%d-atom:%d hbondchk failed: H=%d start(H)=%d end(H)=%d str(H+1)=%d\n", + step, i, Hindex, index[Hindex], end_index[Hindex], comp ); + return FAILURE; + } + + num_hbonds += MAX( (end_index [Hindex] - index [Hindex]) * 2, MIN_HBONDS * 2); + } + if (end_index [my_atoms[i].Hindex] > hbonds->num_intrs) { + fprintf(stderr,"step%d-atom:%d hbondchk failed: H=%d start(H)=%d end(H)=%d num_intrs=%d\n", + step, i, Hindex, index[Hindex], end_index[Hindex], hbonds->num_intrs); + return FAILURE; + } + + num_hbonds += MIN( (end_index [my_atoms[i].Hindex] - index [my_atoms[i].Hindex]) * 2, + 2 * MIN_HBONDS); + num_hbonds = MAX( num_hbonds, MIN_CAP*MIN_HBONDS ); + realloc->num_hbonds = num_hbonds; + */ + + int max_hbonds = 0; + for (i = 0; i < N; i++) { + if (end_index[i] - index[i] >= system->max_hbonds) { + fprintf( stderr, "step%d-hbondchk failed: i=%d start(i)=%d end(i)=%d max_hbonds=%d\n", + step, i, index[i], end_index[i], system->max_hbonds); + return FAILURE; + } + if (end_index[i] - index[i] >= max_hbonds) + max_hbonds = end_index[i] - index[i]; + } + realloc->num_hbonds = max_hbonds; + } + + return SUCCESS; +} + +/* void Validate_Lists( reax_system *system, storage *workspace, reax_list **lists, int step, int n, int N, int numH ) { @@ -244,7 +511,7 @@ void Validate_Lists( reax_system *system, storage *workspace, reax_list **lists, reallocate_data *realloc; realloc = &(workspace->realloc); - /* bond list */ + // bond list if ( N > 0 ) { bonds = *lists + BONDS; @@ -271,7 +538,7 @@ void Validate_Lists( reax_system *system, storage *workspace, reax_list **lists, } - /* hbonds list */ + // hbonds list if ( numH > 0 ) { hbonds = *lists + HBONDS; @@ -283,11 +550,101 @@ void Validate_Lists( reax_system *system, storage *workspace, reax_list **lists, { system->my_atoms[i].num_hbonds = MAX( Num_Entries(Hindex, hbonds) * SAFER_ZONE, MIN_HBONDS ); +*/ + //if( Num_Entries(i, hbonds) >= + //(Start_Index(i+1,hbonds)-Start_Index(i,hbonds))*0.90/*DANGER_ZONE*/){ + // workspace->realloc.hbonds = 1; +/* + //TODO + if ( Hindex < system->n - 1 ) + comp = Start_Index(Hindex + 1, hbonds); + else comp = hbonds->num_intrs; + + if ( End_Index(Hindex, hbonds) > comp ) + { + fprintf(stderr, "step%d-hbondchk failed: H=%d end(H)=%d str(H+1)=%d\n", + step, Hindex, End_Index(Hindex, hbonds), comp ); + MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY ); + } + } + } + } +}*/ + + +void Validate_Lists( reax_system *system, storage *workspace, reax_list **lists, + int step, int n, int N, int numH, MPI_Comm comm ) +{ + int i, comp, Hindex; + reax_list *bonds, *hbonds; + reallocate_data *realloc; + realloc = &(workspace->realloc); + + /* bond list */ + if ( N > 0 ) + { + bonds = *lists + BONDS; + + for ( i = 0; i < N; ++i ) + { + // if( i < n ) - we need to update ghost estimates for delayed nbrings + system->my_atoms[i].num_bonds = MAX(Num_Entries(i, bonds) * 2, MIN_BONDS); + + //if( End_Index(i, bonds) >= Start_Index(i+1, bonds)-2 ) + //workspace->realloc.bonds = 1; + + if ( i < N - 1 ) + comp = Start_Index(i + 1, bonds); + else comp = bonds->num_intrs; + + if ( End_Index(i, bonds) > comp ) + { + fprintf( stderr, "step%d-bondchk failed: i=%d end(i)=%d str(i+1)=%d\n", + step, i, End_Index(i, bonds), comp ); + MPI_Abort( comm, INSUFFICIENT_MEMORY ); + } + } + } + + + /* hbonds list */ + if ( numH > 0 ) + { + hbonds = *lists + HBONDS; + + for ( i = 0; i < n; ++i ) + { + Hindex = system->my_atoms[i].Hindex; + if ( Hindex > -1 ) + { + system->my_atoms[i].num_hbonds = + (int)(MAX( Num_Entries(Hindex, hbonds) * SAFER_ZONE, MIN_HBONDS )); //if( Num_Entries(i, hbonds) >= //(Start_Index(i+1,hbonds)-Start_Index(i,hbonds))*0.90/*DANGER_ZONE*/){ // workspace->realloc.hbonds = 1; + if ( Hindex < numH - 1 ) + comp = Start_Index(Hindex + 1, hbonds); + else comp = hbonds->num_intrs; + + if ( End_Index(Hindex, hbonds) > comp ) + { + fprintf(stderr, "step%d-hbondchk failed: H=%d end(H)=%d str(H+1)=%d\n", + step, Hindex, End_Index(Hindex, hbonds), comp ); + MPI_Abort( comm, INSUFFICIENT_MEMORY ); + } + } +/* + if ( Hindex > -1 ) + { + system->my_atoms[i].num_hbonds = + MAX( Num_Entries(Hindex, hbonds) * SAFER_ZONE, MIN_HBONDS ); +*/ + //if( Num_Entries(i, hbonds) >= + //(Start_Index(i+1,hbonds)-Start_Index(i,hbonds))*0.90/*DANGER_ZONE*/){ + // workspace->realloc.hbonds = 1; +/* //TODO if ( Hindex < system->n - 1 ) comp = Start_Index(Hindex + 1, hbonds); @@ -300,11 +657,18 @@ void Validate_Lists( reax_system *system, storage *workspace, reax_list **lists, MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY ); } } + +*/ + + + } } } + + #if defined(OLD_VALIDATE) void Validate_Lists( storage *workspace, reax_list **lists, int step, int n, int N, int numH ) @@ -449,6 +813,9 @@ void Init_Forces( reax_system *system, control_params *control, bonds = *lists + BONDS; hbonds = *lists + HBONDS; + //Print_List(*lists + BONDS); + + for ( i = 0; i < system->n; ++i ) workspace->bond_mark[i] = 0; for ( i = system->n; i < system->N; ++i ) @@ -457,7 +824,7 @@ void Init_Forces( reax_system *system, control_params *control, //workspace->done_after[i] = Start_Index( i, far_nbrs ); } - H = &workspace->H; //MATRIX CHANGES + H = &(workspace->H); //MATRIX CHANGES H->n = system->n; Htop = 0; num_bonds = 0; @@ -526,7 +893,6 @@ void Init_Forces( reax_system *system, control_params *control, nbr_pj->dvec[1] = atom_j->x[1] - atom_i->x[1]; nbr_pj->dvec[2] = atom_j->x[2] - atom_i->x[2]; nbr_pj->d = rvec_Norm_Sqr( nbr_pj->dvec ); - if (i == 6540) fprintf (stderr, " atom: %d, nbr_pj->d: %f, cutoff: %f \n", i, nbr_pj->d, SQR(cutoff) ); if ( nbr_pj->d <= SQR(cutoff) ) { nbr_pj->d = sqrt(nbr_pj->d); @@ -614,9 +980,12 @@ void Init_Forces( reax_system *system, control_params *control, } } +// H->end[i] = Htop; + Set_End_Index( i, btop_i, bonds ); if ( local ) { + //printf("Htop: %d \n", Htop); H->end[i] = Htop; if ( ihb == 1 ) Set_End_Index( atom_i->Hindex, ihb_top, hbonds ); @@ -681,21 +1050,32 @@ void Init_Forces( reax_system *system, control_params *control, MPI_Barrier( MPI_COMM_WORLD ); #endif #if defined( DEBUG ) - Print_Bonds( system, bonds, "debugbonds.out" ); - Print_Bond_List2( system, bonds, "pbonds.out" ); - Print_Sparse_Matrix( system, H ); - for ( i = 0; i < H->n; ++i ) + // Print_Bonds( system, bonds, "debugbonds.out" ); + // Print_Bond_List2( system, bonds, "pbonds.out" ); + // Print_Sparse_Matrix( system, H ); +/* for ( i = 0; i < H->n; ++i ) for ( j = H->start[i]; j < H->end[i]; ++j ) fprintf( stderr, "%d %d %.15e\n", MIN(system->my_atoms[i].orig_id, system->my_atoms[H->entries[j].j].orig_id), MAX(system->my_atoms[i].orig_id, system->my_atoms[H->entries[j].j].orig_id), - H->entries[j].val ); + H->entries[j].val );*/ #endif + //Print_List(*lists + BONDS); - Validate_Lists( system, workspace, lists, + +//reax_system *system, storage *workspace, reax_list **lists, + // int step, int n, int N, int numH ) + +/* + Validate_Lists( system, workspace, lists, control, + data->step, system->n, system->N, system->numH );*/ + + MPI_Not_GPU_Validate_Lists( system, workspace, lists, control, data->step, system->n, system->N, system->numH ); + + } @@ -881,11 +1261,108 @@ void Init_Forces_noQEq( reax_system *system, control_params *control, Print_Bond_List2( system, bonds, "pbonds.out" ); #endif - Validate_Lists( system, workspace, lists, + MPI_Not_GPU_Validate_Lists( system, workspace, lists, control, data->step, system->n, system->N, system->numH ); } +void Host_Estimate_Sparse_Matrix (reax_atom *my_atoms, control_params *control, + reax_list p_far_nbrs, int n, int N, int renbr, int *indices) +{ + int i, j, pj; + int start_i, end_i; + int flag; + real cutoff; + far_neighbor_data *nbr_pj; + reax_atom *atom_i, *atom_j; + reax_list *far_nbrs = &( p_far_nbrs ); + + //i = blockIdx.x * blockDim.x + threadIdx.x; + //if (i >= N) return; + for (i=0;i<N;i++){ + atom_i = &(my_atoms[i]); + start_i = Start_Index(i, far_nbrs); + end_i = End_Index(i, far_nbrs); + + cutoff = control->nonb_cut; + + //++Htop; + if ( i < n) + indices [i] ++; + + /* update i-j distance - check if j is within cutoff */ + for( pj = start_i; pj < end_i; ++pj ) { + nbr_pj = &( far_nbrs->select.far_nbr_list[pj] ); + j = nbr_pj->nbr; + atom_j = &(my_atoms[j]); + if( renbr ) { + if(nbr_pj->d <= cutoff) + flag = 1; + else flag = 0; + } + else { + if (i < j) { + nbr_pj->dvec[0] = atom_j->x[0] - atom_i->x[0]; + nbr_pj->dvec[1] = atom_j->x[1] - atom_i->x[1]; + nbr_pj->dvec[2] = atom_j->x[2] - atom_i->x[2]; + } else { + nbr_pj->dvec[0] = atom_i->x[0] - atom_j->x[0]; + nbr_pj->dvec[1] = atom_i->x[1] - atom_j->x[1]; + nbr_pj->dvec[2] = atom_i->x[2] - atom_j->x[2]; + } + nbr_pj->d = rvec_Norm_Sqr( nbr_pj->dvec ); + //TODO + //TODO + //TODO + //if( nbr_pj->d <= (cutoff) ) { + if( nbr_pj->d <= SQR(cutoff) ) + { + nbr_pj->d = sqrt(nbr_pj->d); + flag = 1; + } + else + { + flag = 0; + } + } + if( flag ) + { + /* H matrix entry */ + //if( j < n || atom_i->orig_id < atom_j->orig_id ) + //++Htop; + // indices [i] ++; + //else if (j < n || atom_i->orig_id > atom_j->orig_id ) + // indices [i] ++; + + //if ((i < n) || (j < n)) + // indices [i] ++; + //if ((i < n) && (i < j) && ((j < n) || atom_i->orig_id < atom_j->orig_id)) + // indices [i] ++; + //if ( i >= n && j < n && atom_i->orig_id > atom_j->orig_id) + // indices [i] ++; + //else if ((i >=n) && (i > j) && ((j < n) || (atom_i->orig_id > atom_j->orig_id))) + // indices [i] ++; + //THIS IS THE HOST CONDITION + //if (i < n && i < j && ( j < n || atom_i->orig_id < atom_j->orig_id )) + //if (i < n && i < j && atom_i->orig_id < atom_j->orig_id && j >=n) + // indices [i] ++; + //THIS IS THE DEVICE CONDITION + //if ( i > j && i >= n && j < n && atom_j->orig_id < atom_i->orig_id) + // indices [i] ++; + + //this is the working condition + if (i < j && i < n && ( j < n || atom_i->orig_id < atom_j->orig_id)) + indices [i]++; + else if (i > j && i >= n && j < n && atom_j->orig_id < atom_i->orig_id) + indices [i] ++; + else if (i > j && i < n && ( j < n || atom_j->orig_id < atom_i->orig_id )) + indices [i] ++; + } + } +} +} + +#ifdef HAVE_CUDA void Estimate_Storages( reax_system *system, control_params *control, reax_list **lists, int *Htop, int *hb_top, int *bond_top, int *num_3body ) @@ -907,6 +1384,7 @@ void Estimate_Storages( reax_system *system, control_params *control, far_nbrs = *lists + FAR_NBRS; *Htop = 0; + //printf("Hcap: %d \n", system->Hcap); memset( hb_top, 0, sizeof(int) * system->Hcap ); memset( bond_top, 0, sizeof(int) * system->total_cap ); *num_3body = 0; @@ -953,19 +1431,21 @@ void Estimate_Storages( reax_system *system, control_params *control, if ( j < system->n || atom_i->orig_id < atom_j->orig_id ) //tryQEq ||1 ++(*Htop); - /* hydrogen bond lists */ + if ( control->hbond_cut > 0.1 && (ihb == 1 || ihb == 2) && nbr_pj->d <= control->hbond_cut ) { jhb = sbp_j->p_hbond; if ( ihb == 1 && jhb == 2 ) ++hb_top[i]; - else if ( j < system->n && ihb == 2 && jhb == 1 ) + else if ( j < system->n && ihb == 2 && jhb == 1 ){ ++hb_top[j]; + // printf("j: %d \n", j); + } } } - /* uncorrected bond orders */ + // uncorrected bond orders if ( nbr_pj->d <= control->bond_cut ) { r2 = SQR(r_ij); @@ -991,7 +1471,7 @@ void Estimate_Storages( reax_system *system, control_params *control, } else BO_pi2 = C56 = 0.0; - /* Initially BO values are the uncorrected ones, page 1 */ + // Initially BO values are the uncorrected ones, page 1 BO = BO_s + BO_pi + BO_pi2; if ( BO >= control->bo_cut ) @@ -1007,6 +1487,7 @@ void Estimate_Storages( reax_system *system, control_params *control, fprintf (stderr, "HOST SPARSE MATRIX ENTRIES: %d \n", *Htop ); *Htop = MAX( *Htop * SAFE_ZONE, MIN_CAP * MIN_HENTRIES ); + int hbond_count = 0; for ( i = 0; i < system->n; ++i ) { @@ -1033,6 +1514,150 @@ void Estimate_Storages( reax_system *system, control_params *control, #endif } +#else + +void Estimate_Storages( reax_system *system, control_params *control, + reax_list **lists, int *Htop, int *hb_top, + int *bond_top, int *num_3body) +{ + + int i, j, pj; + int start_i, end_i; + int type_i, type_j; + int ihb, jhb; + int local; + real cutoff; + real r_ij, r2; + real C12, C34, C56; + real BO, BO_s, BO_pi, BO_pi2; + reax_list *far_nbrs; + single_body_parameters *sbp_i, *sbp_j; + two_body_parameters *twbp; + far_neighbor_data *nbr_pj; + reax_atom *atom_i, *atom_j; + + far_nbrs = *lists + FAR_NBRS; + *Htop = 0; + memset( hb_top, 0, sizeof(int) * system->local_cap ); + memset( bond_top, 0, sizeof(int) * system->total_cap ); + *num_3body = 0; + + for ( i = 0; i < system->N; ++i ) + { + atom_i = &(system->my_atoms[i]); + type_i = atom_i->type; + start_i = Start_Index(i, far_nbrs); + end_i = End_Index(i, far_nbrs); + sbp_i = &(system->reax_param.sbp[type_i]); + + if ( i < system->n ) + { + local = 1; + cutoff = control->nonb_cut; + ++(*Htop); + ihb = sbp_i->p_hbond; + } + else + { + local = 0; + cutoff = control->bond_cut; + ihb = -1; + } + + for ( pj = start_i; pj < end_i; ++pj ) + { + //nbr_pj = &( far_nbrs->far_nbr_list[pj] ); + nbr_pj = &( far_nbrs->select.far_nbr_list[pj]); + j = nbr_pj->nbr; + atom_j = &(system->my_atoms[j]); + + if (nbr_pj->d <= cutoff) + { + type_j = system->my_atoms[j].type; + r_ij = nbr_pj->d; + sbp_j = &(system->reax_param.sbp[type_j]); + //twbp = &(system->reax_param.tbp[type_i][type_j]); + twbp = &(system->reax_param.tbp[index_tbp (type_i, type_j, system->reax_param.num_atom_types)]); + + if ( local ) + { + if ( j < system->n || atom_i->orig_id < atom_j->orig_id ) //tryQEq ||1 + ++(*Htop); + + /* hydrogen bond lists */ + if ( control->hbond_cut > 0.1 && (ihb == 1 || ihb == 2) && + nbr_pj->d <= control->hbond_cut ) + { + jhb = sbp_j->p_hbond; + if ( ihb == 1 && jhb == 2 ) + ++hb_top[i]; + else if ( j < system->n && ihb == 2 && jhb == 1 ) + ++hb_top[j]; + } + } + + /* uncorrected bond orders */ + if ( nbr_pj->d <= control->bond_cut ) + { + r2 = SQR(r_ij); + + if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0) + { + C12 = twbp->p_bo1 * pow( r_ij / twbp->r_s, twbp->p_bo2 ); + BO_s = (1.0 + control->bo_cut) * exp( C12 ); + } + else BO_s = C12 = 0.0; + + if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0) + { + C34 = twbp->p_bo3 * pow( r_ij / twbp->r_p, twbp->p_bo4 ); + BO_pi = exp( C34 ); + } + else BO_pi = C34 = 0.0; + + if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0) + { + C56 = twbp->p_bo5 * pow( r_ij / twbp->r_pp, twbp->p_bo6 ); + BO_pi2 = exp( C56 ); + } + else BO_pi2 = C56 = 0.0; + + /* Initially BO values are the uncorrected ones, page 1 */ + BO = BO_s + BO_pi + BO_pi2; + + if ( BO >= control->bo_cut ) + { + ++bond_top[i]; + ++bond_top[j]; + } + } + } + } + } + + *Htop = (int)(MAX( *Htop * SAFE_ZONE, MIN_CAP * MIN_HENTRIES )); + + // Set max sparse entries, needed for first iteration of validate_list + system->max_sparse_entries = *Htop * SAFE_ZONE; + + for ( i = 0; i < system->n; ++i ) + hb_top[i] = (int)(MAX( hb_top[i] * SAFER_ZONE, MIN_HBONDS )); + + for ( i = 0; i < system->N; ++i ) + { + *num_3body += SQR(bond_top[i]); + //if( i < system->n ) + bond_top[i] = MAX( bond_top[i] * 2, MIN_BONDS ); + //else bond_top[i] = MAX_BONDS; + } + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "p%d @ estimate storages: Htop = %d, num_3body = %d\n", + system->my_rank, *Htop, *num_3body ); + MPI_Barrier( MPI_COMM_WORLD ); +#endif +} +#endif void Compute_Forces( reax_system *system, control_params *control, simulation_data *data, storage *workspace, diff --git a/PG-PuReMD/src/hydrogen_bonds.c b/PG-PuReMD/src/hydrogen_bonds.c index 924cd38755b19118bb3b58ac48c306cc354d7375..493379ce7d4c2f903ef1bb51c1914765bf1c9a67 100644 --- a/PG-PuReMD/src/hydrogen_bonds.c +++ b/PG-PuReMD/src/hydrogen_bonds.c @@ -35,6 +35,9 @@ #include "reax_vector.h" #endif +// DANIEL +// This function is taken straight from PuReMD, with minimal changes to accomodate the new datastructures +// Attempting to fix ehb being way off in MPI_Not_GPU void Hydrogen_Bonds( reax_system *system, control_params *control, simulation_data *data, storage *workspace, @@ -59,6 +62,202 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, reax_list *bonds, *hbonds; bond_data *bond_list; hbond_data *hbond_list; + + bonds = (*lists) + BONDS; + bond_list = bonds->select.bond_list; + hbonds = (*lists) + HBONDS; + hbond_list = hbonds->select.hbond_list; + + /* loops below discover the Hydrogen bonds between i-j-k triplets. + here j is H atom and there has to be some bond between i and j. + Hydrogen bond is between j and k. + so in this function i->X, j->H, k->Z when we map + variables onto the ones in the handout.*/ + for ( j = 0; j < system->n; ++j ) + /* j has to be of type H */ + if ( system->reax_param.sbp[system->my_atoms[j].type].p_hbond == 1 ) + { + /*set j's variables */ + type_j = system->my_atoms[j].type; + start_j = Start_Index(j, bonds); + end_j = End_Index(j, bonds); + hb_start_j = Start_Index( system->my_atoms[j].Hindex, hbonds ); + hb_end_j = End_Index( system->my_atoms[j].Hindex, hbonds ); + + top = 0; + for ( pi = start_j; pi < end_j; ++pi ) + { + pbond_ij = &( bond_list[pi] ); + i = pbond_ij->nbr; + bo_ij = &(pbond_ij->bo_data); + type_i = system->my_atoms[i].type; + + if ( system->reax_param.sbp[type_i].p_hbond == 2 && + bo_ij->BO >= HB_THRESHOLD ) + hblist[top++] = pi; + } + + // fprintf( stderr, "j: %d, top: %d, hb_start_j: %d, hb_end_j:%d\n", + // j, top, hb_start_j, hb_end_j ); + + for ( pk = hb_start_j; pk < hb_end_j; ++pk ) + { + /* set k's varibles */ + k = hbond_list[pk].nbr; + type_k = system->my_atoms[k].type; + nbr_jk = hbond_list[pk].ptr; + r_jk = nbr_jk->d; + rvec_Scale( dvec_jk, hbond_list[pk].scl, nbr_jk->dvec ); + + for ( itr = 0; itr < top; ++itr ) + { + pi = hblist[itr]; + // DANIEL + // pbond_ij = &( bonds->bond_list[pi] ); + pbond_ij = &( bonds->select.bond_list[pi] ); + i = pbond_ij->nbr; + + if ( system->my_atoms[i].orig_id != system->my_atoms[k].orig_id ) + { + bo_ij = &(pbond_ij->bo_data); + type_i = system->my_atoms[i].type; + r_ij = pbond_ij->d; + //hbp = &(system->reax_param.hbp[ type_i ][ type_j ][ type_k ]); + // SUDHIR + hbp = &(system->reax_param.hbp[ index_hbp (type_i, type_j, type_k, system->reax_param.num_atom_types) ]); + + ++num_hb_intrs; + + Calculate_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk, + &theta, &cos_theta ); + /* the derivative of cos(theta) */ + Calculate_dCos_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk, + &dcos_theta_di, &dcos_theta_dj, + &dcos_theta_dk ); + + /* hyrogen bond energy*/ + sin_theta2 = sin( theta / 2.0 ); + sin_xhz4 = SQR(sin_theta2); + sin_xhz4 *= sin_xhz4; + cos_xhz1 = ( 1.0 - cos_theta ); + exp_hb2 = exp( -hbp->p_hb2 * bo_ij->BO ); + exp_hb3 = exp( -hbp->p_hb3 * ( hbp->r0_hb / r_jk + + r_jk / hbp->r0_hb - 2.0 ) ); + + data->my_en.e_hb += e_hb = + hbp->p_hb1 * (1.0 - exp_hb2) * exp_hb3 * sin_xhz4; + + CEhb1 = hbp->p_hb1 * hbp->p_hb2 * exp_hb2 * exp_hb3 * sin_xhz4; + CEhb2 = -hbp->p_hb1 / 2.0 * (1.0 - exp_hb2) * exp_hb3 * cos_xhz1; + CEhb3 = -hbp->p_hb3 * + (-hbp->r0_hb / SQR(r_jk) + 1.0 / hbp->r0_hb) * e_hb; + + /*fprintf( stdout, + "%6d%6d%6d%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f\n", + system->my_atoms[i].orig_id, system->my_atoms[j].orig_id, + system->my_atoms[k].orig_id, + r_jk, theta, hbp->p_hb1, exp_hb2, hbp->p_hb3, hbp->r0_hb, + exp_hb3, sin_xhz4, e_hb ); */ + + /* hydrogen bond forces */ + bo_ij->Cdbo += CEhb1; // dbo term + + if ( control->virial == 0 ) + { + // dcos terms + rvec_ScaledAdd( workspace->f[i], +CEhb2, dcos_theta_di ); + rvec_ScaledAdd( workspace->f[j], +CEhb2, dcos_theta_dj ); + rvec_ScaledAdd( workspace->f[k], +CEhb2, dcos_theta_dk ); + // dr terms + rvec_ScaledAdd( workspace->f[j], -CEhb3 / r_jk, dvec_jk ); + rvec_ScaledAdd( workspace->f[k], +CEhb3 / r_jk, dvec_jk ); + } + else + { + /* for pressure coupling, terms that are not related to bond order + derivatives are added directly into pressure vector/tensor */ + rvec_Scale( force, +CEhb2, dcos_theta_di ); // dcos terms + rvec_Add( workspace->f[i], force ); + rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); + rvec_ScaledAdd( data->my_ext_press, 1.0, ext_press ); + + rvec_ScaledAdd( workspace->f[j], +CEhb2, dcos_theta_dj ); + + ivec_Scale( rel_jk, hbond_list[pk].scl, nbr_jk->rel_box ); + rvec_Scale( force, +CEhb2, dcos_theta_dk ); + rvec_Add( workspace->f[k], force ); + rvec_iMultiply( ext_press, rel_jk, force ); + rvec_ScaledAdd( data->my_ext_press, 1.0, ext_press ); + // dr terms + rvec_ScaledAdd( workspace->f[j], -CEhb3 / r_jk, dvec_jk ); + + rvec_Scale( force, CEhb3 / r_jk, dvec_jk ); + rvec_Add( workspace->f[k], force ); + rvec_iMultiply( ext_press, rel_jk, force ); + rvec_ScaledAdd( data->my_ext_press, 1.0, ext_press ); + } + +#ifdef TEST_ENERGY + /* fprintf( out_control->ehb, + "%24.15e%24.15e%24.15e\n%24.15e%24.15e%24.15e\n%24.15e%24.15e%24.15e\n", + dcos_theta_di[0], dcos_theta_di[1], dcos_theta_di[2], + dcos_theta_dj[0], dcos_theta_dj[1], dcos_theta_dj[2], + dcos_theta_dk[0], dcos_theta_dk[1], dcos_theta_dk[2]); + fprintf( out_control->ehb, "%24.15e%24.15e%24.15e\n", + CEhb1, CEhb2, CEhb3 ); */ + fprintf( out_control->ehb, + //"%6d%6d%6d%24.15e%24.15e%24.15e%24.15e%24.15e\n", + "%6d%6d%6d%12.4f%12.4f%12.4f%12.4f%12.4f\n", + system->my_atoms[i].orig_id, system->my_atoms[j].orig_id, + system->my_atoms[k].orig_id, + r_jk, theta, bo_ij->BO, e_hb, data->my_en.e_hb ); +#endif +#ifdef TEST_FORCES + Add_dBO( system, lists, j, pi, +CEhb1, workspace->f_hb ); //dbo term + // dcos terms + rvec_ScaledAdd( workspace->f_hb[i], +CEhb2, dcos_theta_di ); + rvec_ScaledAdd( workspace->f_hb[j], +CEhb2, dcos_theta_dj ); + rvec_ScaledAdd( workspace->f_hb[k], +CEhb2, dcos_theta_dk ); + // dr terms + rvec_ScaledAdd( workspace->f_hb[j], -CEhb3 / r_jk, dvec_jk ); + rvec_ScaledAdd( workspace->f_hb[k], +CEhb3 / r_jk, dvec_jk ); +#endif + } + } + } + } + +#if defined(DEBUG) + fprintf( stderr, "Number of hydrogen bonds: %d\n", num_hb_intrs ); + fprintf( stderr, "Hydrogen Bond Energy: %g\n", data->my_en.e_hb ); + fprintf( stderr, "hydbonds: ext_press (%24.15e %24.15e %24.15e)\n", + data->ext_press[0], data->ext_press[1], data->ext_press[2] ); +#endif +} + +void Old_Hydrogen_Bonds( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) +{ + int i, j, k, pi, pk; + int type_i, type_j, type_k; + int start_j, end_j, hb_start_j, hb_end_j; + int hblist[MAX_BONDS]; + int itr, top; + int num_hb_intrs = 0; + ivec rel_jk; + real r_ij, r_jk, theta, cos_theta, sin_xhz4, cos_xhz1, sin_theta2; + real e_hb, exp_hb2, exp_hb3, CEhb1, CEhb2, CEhb3; + rvec dcos_theta_di, dcos_theta_dj, dcos_theta_dk; + rvec dvec_jk, force, ext_press; + // rtensor temp_rtensor, total_rtensor; + hbond_parameters *hbp; + bond_order_data *bo_ij; + bond_data *pbond_ij; + far_neighbor_data *nbr_jk; + reax_list *bonds, *hbonds; + bond_data *bond_list; + hbond_data *hbond_list; bonds = (*lists) + BONDS; bond_list = bonds->select.bond_list; diff --git a/PG-PuReMD/src/init_md.c b/PG-PuReMD/src/init_md.c index 002c0a44a265de7972ab032563c35189de2a7574..eb2acd7685deefe46e734d1889fe591f4a7286d3 100644 --- a/PG-PuReMD/src/init_md.c +++ b/PG-PuReMD/src/init_md.c @@ -162,6 +162,22 @@ int Init_System( reax_system *system, control_params *control, Bin_Boundary_Atoms( system ); /* estimate numH and Hcap */ + + system->numH = 0; + if ( control->hbond_cut > 0 ) + for ( i = 0; i < system->n; ++i ) + { + atom = &(system->my_atoms[i]); + if ( system->reax_param.sbp[ atom->type ].p_hbond == 1 ) + atom->Hindex = system->numH++; + else atom->Hindex = -1; + } + //Tried fix + //system->Hcap = MAX( system->numH * SAFER_ZONE, MIN_CAP ); + system->Hcap = MAX( system->n * SAFER_ZONE, MIN_CAP ); + //printf("numH: %d, Hcap: %d \n", system->numH, system->Hcap); +// Sudhir-style below +/* system->numH = 0; if ( control->hbond_cut > 0 ) for ( i = 0; i < system->n; ++i ) @@ -172,6 +188,9 @@ int Init_System( reax_system *system, control_params *control, else atom->Hindex = -1; } system->Hcap = MAX( system->numH * SAFER_ZONE, MIN_CAP ); +*/ + +//Sync_System (system); //Allocate_System( system, system->local_cap, system->total_cap, msg ); #if defined(DEBUG_FOCUS) @@ -247,7 +266,7 @@ int Cuda_Init_System( reax_system *system, control_params *control, //else atom->Hindex = -1; } system->Hcap = MAX( system->numH * SAFER_ZONE, MIN_CAP ); - + //Allocate_System( system, system->local_cap, system->total_cap, msg ); //Sync atoms here to continue the computation @@ -722,6 +741,7 @@ int Init_Lists( reax_system *system, control_params *control, //for( i = 0; i < MAX_NBRS; ++i ) nrecv[i] = system->my_nbrs[i].est_recv; //system->N = SendRecv( system, mpi_data, mpi_data->boundary_atom_type, nrecv, // Sort_Boundary_Atoms, Unpack_Exchange_Message, 1 ); + num_nbrs = Estimate_NumNeighbors( system, lists ); if (!Make_List(system->total_cap, num_nbrs, TYP_FAR_NEIGHBOR, *lists + FAR_NBRS)) { @@ -736,13 +756,19 @@ int Init_Lists( reax_system *system, control_params *control, Generate_Neighbor_Lists( system, data, workspace, lists ); bond_top = (int*) calloc( system->total_cap, sizeof(int) ); - //hb_top = (int*) calloc( system->local_cap, sizeof(int) ); - hb_top = (int*) calloc( system->Hcap, sizeof(int) ); - + hb_top = (int*) calloc( system->local_cap, sizeof(int) ); + //hb_top = (int*) calloc( system->Hcap, sizeof(int) ); + //printf("HCap: %d \n", system->Hcap);i + Estimate_Storages( system, control, lists, &Htop, hb_top, bond_top, &num_3body ); - + //Host_Estimate_Sparse_Matrix( system, control, lists, system->local_cap, system->total_cap, + // &Htop, hb_top, bond_top, &num_3body ); + + + //printf("%p , %d , %d \n", (void*)&(workspace->H), system->local_cap,Htop); Allocate_Matrix( &(workspace->H), system->local_cap, Htop ); + //MATRIX CHANGES //workspace->L = NULL; //workspace->U = NULL; @@ -754,7 +780,7 @@ int Init_Lists( reax_system *system, control_params *control, if ( control->hbond_cut > 0 ) { - /* init H indexes */ + // init H indexes total_hbonds = 0; for ( i = 0; i < system->n; ++i ) { @@ -763,6 +789,10 @@ int Init_Lists( reax_system *system, control_params *control, } total_hbonds = MAX( total_hbonds * SAFER_ZONE, MIN_CAP * MIN_HBONDS ); + // DANIEL, to make Mpi_Not_Gpu_Validate_Lists() not complain that system->max_bonds is 0 + system->max_hbonds = total_hbonds * SAFER_ZONE; + + if ( !Make_List( system->Hcap, total_hbonds, TYP_HBOND, *lists + HBONDS) ) { fprintf( stderr, "not enough space for hbonds list. terminating!\n" ); @@ -785,6 +815,10 @@ int Init_Lists( reax_system *system, control_params *control, total_bonds += bond_top[i]; } bond_cap = MAX( total_bonds * SAFE_ZONE, MIN_CAP * MIN_BONDS ); + + + // DANIEL, to make Mpi_Not_Gpu_Validate_Lists() not complain that system->max_bonds is 0 + system->max_bonds = total_bonds * SAFER_ZONE; if ( !Make_List( system->total_cap, bond_cap, TYP_BOND, *lists + BONDS) ) { @@ -1053,6 +1087,9 @@ void Initialize( reax_system *system, control_params *control, reax_list **lists, output_controls *out_control, mpi_datatypes *mpi_data ) { + + host_scratch = (void *)malloc (HOST_SCRATCH_SIZE ); + char msg[MAX_STR]; if ( Init_MPI_Datatypes( system, workspace, mpi_data, msg ) == FAILURE ) @@ -1317,7 +1354,7 @@ void Initialize( reax_system *system, control_params *control, mpi_datatypes *mpi_data ) { char msg[MAX_STR]; - + host_scratch = (void *)malloc (HOST_SCRATCH_SIZE ); if ( Init_System(system, msg) == FAILURE ) { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); diff --git a/PG-PuReMD/src/list.c b/PG-PuReMD/src/list.c index 3994d208ea402ecdc63433127c3e2b555634c945..09a38e59776a4e014d07e58e3cbc7559b902a41d 100644 --- a/PG-PuReMD/src/list.c +++ b/PG-PuReMD/src/list.c @@ -29,7 +29,19 @@ #include "reax_tool_box.h" #endif +void Print_List(reax_list* list){ + //printf("List_Print\n"); + int i; + printf("START INDICES \n"); + for (i=0;i<list->n;i++){ + printf("%d \n",list->index[i]); + } + printf("END INDICES \n"); + for (i=0;i<list->n;i++){ + printf("%d \n", list->end_index[i]); + } +} /************* allocate list space ******************/ int Make_List(int n, int num_intrs, int type, reax_list *l) { @@ -40,7 +52,7 @@ int Make_List(int n, int num_intrs, int type, reax_list *l) l->index = (int*) smalloc( n * sizeof(int), "list:index" ); l->end_index = (int*) smalloc( n * sizeof(int), "list:end_index" ); - +//printf("SUCCESS\n"); l->type = type; #if defined(DEBUG_FOCUS) fprintf( stderr, "list: n=%d num_intrs=%d type=%d\n", n, num_intrs, type ); diff --git a/PG-PuReMD/src/list.h b/PG-PuReMD/src/list.h index a78dcc81a714e2f432c927f599ac1afe0112b37e..630958d0736490eef90948ddd12a3c2e6d0de81a 100644 --- a/PG-PuReMD/src/list.h +++ b/PG-PuReMD/src/list.h @@ -27,7 +27,7 @@ #ifdef _cplusplus extern "C" { #endif - + void Print_List(reax_list*); int Make_List( int, int, int, reax_list*); void Delete_List( reax_list*); diff --git a/PG-PuReMD/src/nonbonded.c b/PG-PuReMD/src/nonbonded.c index 8ad9ebf33115e2383870b8d7cb9aeae0fcb1c5b8..c81c5f3e11d70383323e0dda3b63788fa2d48b2a 100644 --- a/PG-PuReMD/src/nonbonded.c +++ b/PG-PuReMD/src/nonbonded.c @@ -340,9 +340,24 @@ void Tabulated_vdW_Coulomb_Energy( reax_system *system, control_params *control, void Compute_Polarization_Energy( reax_system *system, simulation_data *data ) { +/* int i, type_i; real q; + data->my_en.e_pol = 0.0; + for ( i = 0; i < system->n; i++ ) + { + q = system->my_atoms[i].q; + type_i = system->my_atoms[i].type; + + data->my_en.e_pol += + KCALpMOL_to_EV * (system->reax_param.sbp[type_i].chi * q + + (system->reax_param.sbp[type_i].eta / 2.) * SQR(q)); + }*/ + + int i, type_i; + real q; + data->my_en.e_pol = 0.0; for ( i = 0; i < system->n; i++ ) { @@ -353,6 +368,10 @@ void Compute_Polarization_Energy( reax_system *system, simulation_data *data ) KCALpMOL_to_EV * (system->reax_param.sbp[type_i].chi * q + (system->reax_param.sbp[type_i].eta / 2.) * SQR(q)); } + + + + } diff --git a/PG-PuReMD/src/parallelreax.c b/PG-PuReMD/src/parallelreax.c index 0cca8357bb2eeb6ceef0a9aa40bfe0587643bb90..40cc9c3b96eea09bbf1074485c368beb7af7f9c5 100644 --- a/PG-PuReMD/src/parallelreax.c +++ b/PG-PuReMD/src/parallelreax.c @@ -415,7 +415,7 @@ int main( int argc, char* argv[] ) validate_device (system, data, workspace, lists); #endif -#else +#else /* allocated main datastructures */ system = (reax_system *) @@ -426,19 +426,44 @@ int main( int argc, char* argv[] ) smalloc( sizeof(simulation_data), "data" ); workspace = (storage *) smalloc( sizeof(storage), "workspace" ); + lists = (reax_list **) smalloc( LIST_N * sizeof(reax_list*), "lists" ); for ( i = 0; i < LIST_N; ++i ) { +/* lists[i] = (reax_list *) smalloc( sizeof(reax_list), "lists[i]" ); + lists[i]->allocated = 0;*/ + + lists[i] = (reax_list *) smalloc( sizeof(reax_list), "lists[i]" ); lists[i]->allocated = 0; + + //initialize here TODO + lists[i]->n = 0; + lists[i]->num_intrs = 0; + lists[i]->index = NULL; + lists[i]->end_index = NULL; + lists[i]->select.v = NULL; } out_control = (output_controls *) smalloc( sizeof(output_controls), "out_control" ); mpi_data = (mpi_datatypes *) smalloc( sizeof(mpi_datatypes), "mpi_data" ); + /* allocate the cuda auxiliary data structures */ + dev_workspace = (storage *) smalloc( sizeof(storage), "dev_workspace" ); + + dev_lists = (reax_list **) smalloc ( LIST_N * sizeof (reax_list *), "dev_lists" ); + for ( i = 0; i < LIST_N; ++i ) + { + dev_lists[i] = (reax_list *) smalloc( sizeof(reax_list), "lists[i]" ); + dev_lists[i]->allocated = 0; + } + + /* Initialize member variables */ + system->init_thblist = FALSE; + /* setup the parallel environment */ MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &(control->nprocs) ); @@ -458,9 +483,9 @@ int main( int argc, char* argv[] ) /* measure total simulation time after input is read */ if ( system->my_rank == MASTER_NODE ) t_start = Get_Time( ); - /* initialize datastructures */ Initialize( system, control, data, workspace, lists, out_control, mpi_data ); + #if defined(DEBUG) fprintf( stderr, "p%d: initializated data structures\n", system->my_rank ); MPI_Barrier( mpi_data->world ); @@ -469,6 +494,7 @@ int main( int argc, char* argv[] ) /* compute f_0 */ Comm_Atoms( system, control, data, workspace, lists, mpi_data, 1 ); Reset( system, control, data, workspace, lists ); + //Print_List(*lists + BONDS); Generate_Neighbor_Lists( system, data, workspace, lists ); Compute_Forces( system, control, data, workspace, lists, out_control, mpi_data ); @@ -504,7 +530,7 @@ int main( int argc, char* argv[] ) MPI_Barrier( mpi_data->world ); #endif } - + #endif /* end of the simulation, write total simulation time */ diff --git a/PG-PuReMD/src/reax_types.h b/PG-PuReMD/src/reax_types.h index b75d9139adab44be34fd14eb0a874fd0f46372d1..debe20aac39b241c5064a55b80603a3a3886952f 100644 --- a/PG-PuReMD/src/reax_types.h +++ b/PG-PuReMD/src/reax_types.h @@ -57,7 +57,7 @@ #include <sys/time.h> #include <time.h> #include <zlib.h> - +#define HOST_SCRATCH_SIZE (1024 * 1024 * 20) #ifdef HAVE_CUDA #include <cuda.h> #endif @@ -577,12 +577,21 @@ typedef struct int num_atom_types; #ifndef HAVE_CUDA +/* global_parameters gp; single_body_parameters *sbp; two_body_parameters **tbp; three_body_header ***thbp; hbond_parameters ***hbp; - four_body_header ****fbp; + four_body_header ****fbp;*/ + + global_parameters gp; + single_body_parameters *sbp; + two_body_parameters *tbp; + three_body_header *thbp; + hbond_parameters *hbp; + four_body_header *fbp; + #else global_parameters gp; global_parameters d_gp; @@ -645,6 +654,7 @@ typedef struct struct grid_cell { #ifndef HAVE_CUDA +/* real cutoff; rvec min, max; ivec rel_box; @@ -658,6 +668,23 @@ struct grid_cell struct grid_cell** nbrs; ivec* nbrs_x; rvec* nbrs_cp; +*/ + + //real cutoff; + rvec min, max; + //ivec rel_box; + + int mark; + int type; + //int str; + //int end; + int top; + int* atoms; + //struct grid_cell** nbrs; //changed + //ivec* nbrs_x; + //rvec* nbrs_cp; + + #else //real cutoff; rvec min, max; @@ -700,8 +727,22 @@ typedef struct ivec ghost_bond_span; #ifndef HAVE_CUDA - grid_cell*** cells; - ivec *order; +// grid_cell*** cells; +// ivec *order; + + grid_cell* cells; + ivec *order; + + int *str; + int *end; + real *cutoff; + ivec *nbrs_x; + rvec *nbrs_cp; + + ivec *rel_box; + + + #else grid_cell* cells; //changed ivec *order; @@ -1106,7 +1147,8 @@ typedef struct /* QEq storage */ #ifndef HAVE_CUDA - sparse_matrix *H, *L, *U; +// sparse_matrix *H, *L, *U; + sparse_matrix H, L, U; #else sparse_matrix H, L, U; //CHANGED #endif @@ -1118,7 +1160,8 @@ typedef struct real *y, *z, *g; real *hc, *hs; #ifndef HAVE_CUDA - real **h, **v; + //real **h, **v; + real *h, *v; #else real *h, *v; //changed #endif @@ -1136,7 +1179,8 @@ typedef struct /* storage space for bond restrictions */ int *restricted; #ifndef HAVE_CUDA - int **restricted_list; + //int **restricted_list; + int * restricted_list; #else int *restricted_list; //changed #endif @@ -1183,7 +1227,8 @@ typedef struct typedef union { -#ifdef HAVE_CUDA + +//#ifdef HAVE_CUDA void *v; three_body_interaction_data *three_body_list; bond_data *bond_list; @@ -1191,7 +1236,8 @@ typedef union dDelta_data *dDelta_list; far_neighbor_data *far_nbr_list; hbond_data *hbond_list; -#endif +//#endif + } list_type; @@ -1207,7 +1253,7 @@ typedef struct int type; list_type select; - +/* #ifndef HAVE_CUDA void *v; three_body_interaction_data *three_body_list; @@ -1217,6 +1263,8 @@ typedef struct far_neighbor_data *far_nbr_list; hbond_data *hbond_list; #endif +*/ + } reax_list; @@ -1326,7 +1374,8 @@ typedef struct } LR_lookup_table; #ifndef HAVE_CUDA -extern LR_lookup_table **LR; +//extern LR_lookup_table **LR; +extern LR_lookup_table *LR; #else extern LR_lookup_table *LR; //changed #endif diff --git a/PG-PuReMD/src/reset_tools.c b/PG-PuReMD/src/reset_tools.c index 9d339c9a780d8e2eb44a52f0937c45df48a4ab85..e49ca306c289ee57eb20fea6b7910d403072743f 100644 --- a/PG-PuReMD/src/reset_tools.c +++ b/PG-PuReMD/src/reset_tools.c @@ -201,7 +201,7 @@ void Reset_Neighbor_Lists( reax_system *system, control_params *control, Set_End_Index( i, total_bonds, bonds ); total_bonds += system->my_atoms[i].num_bonds; } - +// Print_List(*lists + BONDS); /* is reallocation needed? */ if ( total_bonds >= bonds->num_intrs * DANGER_ZONE ) { @@ -222,13 +222,19 @@ void Reset_Neighbor_Lists( reax_system *system, control_params *control, /* hbonds list */ if ( control->hbond_cut > 0 && system->numH > 0 ) { +// Print_List(*lists + BONDS); + hbonds = (*lists) + HBONDS; +// printf("bonds enum: %d, bonds pointer: %ld ::: hbonds enum: %d, hbonds pointer: %ld \n", BONDS, bonds, HBONDS, hbonds); + //hbonds = &(lists[HBONDS]); total_hbonds = 0; /* reset start-end indexes */ for ( i = 0; i < system->n; ++i ) { + Hindex = system->my_atoms[i].Hindex; +// printf("i: %d, Hindex: %d \n", i, Hindex); if ( Hindex > -1 ) { Set_Start_Index( Hindex, total_hbonds, hbonds ); @@ -236,7 +242,8 @@ void Reset_Neighbor_Lists( reax_system *system, control_params *control, total_hbonds += system->my_atoms[i].num_hbonds; } } - + +// Print_List(*lists + BONDS); /* is reallocation needed? */ if ( total_hbonds >= hbonds->num_intrs * 0.90/*DANGER_ZONE*/ ) { diff --git a/PG-PuReMD/src/tool_box.c b/PG-PuReMD/src/tool_box.c index 224e2e73fb5dc4eaef9a216885c16c640cb9912d..09e44ea319f6d207842b49ba810a5211da2888f1 100644 --- a/PG-PuReMD/src/tool_box.c +++ b/PG-PuReMD/src/tool_box.c @@ -425,8 +425,11 @@ void* smalloc( long n, char *name ) fprintf( stderr, "returning NULL.\n" ); return NULL; } - + //printf("requesting memory for %s \n", name); + //malloc( n ); + //printf("successfuly requested memory for %s \n", name); ptr = malloc( n ); + // printf("successfuly assigned pointer for %s \n", name); if ( ptr == NULL ) { fprintf( stderr, "ERROR: failed to allocate %ld bytes for array %s", diff --git a/PG-PuReMD/src/torsion_angles.c b/PG-PuReMD/src/torsion_angles.c index 6d6df18fb1691031efe80b3d624878fa2927ebc5..132e76a92d6ee27b953c039b611f756a87d4b452 100644 --- a/PG-PuReMD/src/torsion_angles.c +++ b/PG-PuReMD/src/torsion_angles.c @@ -53,6 +53,122 @@ real Calculate_Omega( rvec dvec_ij, real r_ij, real arg, poem, tel; rvec cross_jk_kl; + sin_ijk = sin( p_ijk->theta ); + cos_ijk = cos( p_ijk->theta ); + sin_jkl = sin( p_jkl->theta ); + cos_jkl = cos( p_jkl->theta ); + + /* omega */ + unnorm_cos_omega = -rvec_Dot(dvec_ij, dvec_jk) * rvec_Dot(dvec_jk, dvec_kl) + + SQR( r_jk ) * rvec_Dot( dvec_ij, dvec_kl ); + + rvec_Cross( cross_jk_kl, dvec_jk, dvec_kl ); + unnorm_sin_omega = -r_jk * rvec_Dot( dvec_ij, cross_jk_kl ); + + omega = atan2( unnorm_sin_omega, unnorm_cos_omega ); + + + /* derivatives */ + /* coef for adjusments to cos_theta's */ + /* rla = r_ij, rlb = r_jk, rlc = r_kl, r4 = r_li; + coshd = cos_ijk, coshe = cos_jkl; + sinhd = sin_ijk, sinhe = sin_jkl; */ + htra = r_ij + cos_ijk * ( r_kl * cos_jkl - r_jk ); + htrb = r_jk - r_ij * cos_ijk - r_kl * cos_jkl; + htrc = r_kl + cos_jkl * ( r_ij * cos_ijk - r_jk ); + hthd = r_ij * sin_ijk * ( r_jk - r_kl * cos_jkl ); + hthe = r_kl * sin_jkl * ( r_jk - r_ij * cos_ijk ); + hnra = r_kl * sin_ijk * sin_jkl; + hnrc = r_ij * sin_ijk * sin_jkl; + hnhd = r_ij * r_kl * cos_ijk * sin_jkl; + hnhe = r_ij * r_kl * sin_ijk * cos_jkl; + + + poem = 2.0 * r_ij * r_kl * sin_ijk * sin_jkl; + if ( poem < 1e-20 ) poem = 1e-20; + + tel = SQR( r_ij ) + SQR( r_jk ) + SQR( r_kl ) - SQR( r_li ) - + 2.0 * ( r_ij * r_jk * cos_ijk - r_ij * r_kl * cos_ijk * cos_jkl + + r_jk * r_kl * cos_jkl ); + + arg = tel / poem; + if ( arg > 1.0 ) arg = 1.0; + if ( arg < -1.0 ) arg = -1.0; + + + /* fprintf( out_control->etor, + "%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f%12.6f\n", + htra, htrb, htrc, hthd, hthe, hnra, hnrc, hnhd, hnhe ); + fprintf( out_control->etor, "%12.6f%12.6f%12.6f\n", + dvec_ij[0]/r_ij, dvec_ij[1]/r_ij, dvec_ij[2]/r_ij ); + fprintf( out_control->etor, "%12.6f%12.6f%12.6f\n", + -dvec_jk[0]/r_jk, -dvec_jk[1]/r_jk, -dvec_jk[2]/r_jk ); + fprintf( out_control->etor, "%12.6f%12.6f%12.6f\n", + -dvec_kl[0]/r_kl, -dvec_kl[1]/r_kl, -dvec_kl[2]/r_kl ); + fprintf( out_control->etor, "%12.6f%12.6f%12.6f%12.6f\n", + r_li, dvec_li[0], dvec_li[1], dvec_li[2] ); + fprintf( out_control->etor, "%12.6f%12.6f%12.6f\n", + poem, tel, arg ); */ + /* fprintf( out_control->etor, "%12.6f%12.6f%12.6f\n", + -p_ijk->dcos_dk[0]/sin_ijk, -p_ijk->dcos_dk[1]/sin_ijk, + -p_ijk->dcos_dk[2]/sin_ijk ); + fprintf( out_control->etor, "%12.6f%12.6f%12.6f\n", + -p_jkl->dcos_dk[0]/sin_jkl, -p_jkl->dcos_dk[1]/sin_jkl, + -p_jkl->dcos_dk[2]/sin_jkl );*/ + + if ( sin_ijk >= 0 && sin_ijk <= MIN_SINE ) sin_ijk = MIN_SINE; + else if ( sin_ijk <= 0 && sin_ijk >= -MIN_SINE ) sin_ijk = -MIN_SINE; + if ( sin_jkl >= 0 && sin_jkl <= MIN_SINE ) sin_jkl = MIN_SINE; + else if ( sin_jkl <= 0 && sin_jkl >= -MIN_SINE ) sin_jkl = -MIN_SINE; + + // dcos_omega_di + rvec_ScaledSum( dcos_omega_di, (htra - arg * hnra) / r_ij, dvec_ij, -1., dvec_li ); + rvec_ScaledAdd( dcos_omega_di, -(hthd - arg * hnhd) / sin_ijk, p_ijk->dcos_dk ); + rvec_Scale( dcos_omega_di, 2.0 / poem, dcos_omega_di ); + + // dcos_omega_dj + rvec_ScaledSum( dcos_omega_dj, -(htra - arg * hnra) / r_ij, dvec_ij, + -htrb / r_jk, dvec_jk ); + rvec_ScaledAdd( dcos_omega_dj, -(hthd - arg * hnhd) / sin_ijk, p_ijk->dcos_dj ); + rvec_ScaledAdd( dcos_omega_dj, -(hthe - arg * hnhe) / sin_jkl, p_jkl->dcos_di ); + rvec_Scale( dcos_omega_dj, 2.0 / poem, dcos_omega_dj ); + + // dcos_omega_dk + rvec_ScaledSum( dcos_omega_dk, -(htrc - arg * hnrc) / r_kl, dvec_kl, + htrb / r_jk, dvec_jk ); + rvec_ScaledAdd( dcos_omega_dk, -(hthd - arg * hnhd) / sin_ijk, p_ijk->dcos_di ); + rvec_ScaledAdd( dcos_omega_dk, -(hthe - arg * hnhe) / sin_jkl, p_jkl->dcos_dj ); + rvec_Scale( dcos_omega_dk, 2.0 / poem, dcos_omega_dk ); + + // dcos_omega_dl + rvec_ScaledSum( dcos_omega_dl, (htrc - arg * hnrc) / r_kl, dvec_kl, 1., dvec_li ); + rvec_ScaledAdd( dcos_omega_dl, -(hthe - arg * hnhe) / sin_jkl, p_jkl->dcos_dk ); + rvec_Scale( dcos_omega_dl, 2.0 / poem, dcos_omega_dl ); + + return omega; +} + + + + + + +real Old_Calculate_Omega( rvec dvec_ij, real r_ij, + rvec dvec_jk, real r_jk, + rvec dvec_kl, real r_kl, + rvec dvec_li, real r_li, + three_body_interaction_data *p_ijk, + three_body_interaction_data *p_jkl, + rvec dcos_omega_di, rvec dcos_omega_dj, + rvec dcos_omega_dk, rvec dcos_omega_dl, + output_controls *out_control ) +{ + real unnorm_cos_omega, unnorm_sin_omega, omega; + real sin_ijk, cos_ijk, sin_jkl, cos_jkl; + real htra, htrb, htrc, hthd, hthe, hnra, hnrc, hnhd, hnhe; + real arg, poem, tel; + rvec cross_jk_kl; + sin_ijk = SIN( p_ijk->theta ); cos_ijk = COS( p_ijk->theta ); sin_jkl = SIN( p_jkl->theta ); @@ -148,9 +264,481 @@ real Calculate_Omega( rvec dvec_ij, real r_ij, return omega; } +// Basically a copy from PuReMD, because Old_Torsion_Angles had some issue +void Torsion_Angles( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) +{ + int i, j, k, l, pi, pj, pk, pl, pij, plk, natoms; + int type_i, type_j, type_k, type_l; + int start_j, end_j, start_k, end_k; + int start_pj, end_pj, start_pk, end_pk; + int num_frb_intrs = 0; + real Delta_j, Delta_k; + real r_ij, r_jk, r_kl, r_li; + real BOA_ij, BOA_jk, BOA_kl; -void Torsion_Angles( reax_system *system, control_params *control, + real exp_tor2_ij, exp_tor2_jk, exp_tor2_kl; + real exp_tor1, exp_tor3_DjDk, exp_tor4_DjDk, exp_tor34_inv; + real exp_cot2_jk, exp_cot2_ij, exp_cot2_kl; + real fn10, f11_DjDk, dfn11, fn12; + real theta_ijk, theta_jkl; + real sin_ijk, sin_jkl; + real cos_ijk, cos_jkl; + real tan_ijk_i, tan_jkl_i; + real omega, cos_omega, cos2omega, cos3omega; + rvec dcos_omega_di, dcos_omega_dj, dcos_omega_dk, dcos_omega_dl; + real CV, cmn, CEtors1, CEtors2, CEtors3, CEtors4; + real CEtors5, CEtors6, CEtors7, CEtors8, CEtors9; + real Cconj, CEconj1, CEconj2, CEconj3; + real CEconj4, CEconj5, CEconj6; + real e_tor, e_con; + rvec dvec_li; + rvec force, ext_press; + ivec rel_box_jl; + // rtensor total_rtensor, temp_rtensor; + four_body_header *fbh; + four_body_parameters *fbp; + bond_data *pbond_ij, *pbond_jk, *pbond_kl; + bond_order_data *bo_ij, *bo_jk, *bo_kl; + three_body_interaction_data *p_ijk, *p_jkl; + real p_tor2 = system->reax_param.gp.l[23]; + real p_tor3 = system->reax_param.gp.l[24]; + real p_tor4 = system->reax_param.gp.l[25]; + real p_cot2 = system->reax_param.gp.l[27]; + reax_list *bonds = (*lists) + BONDS; + reax_list *thb_intrs = (*lists) + THREE_BODIES; + // char fname[100]; + // FILE *ftor; + + // sprintf( fname, "tor%d.out", system->my_rank ); + // ftor = fopen( fname, "w" ); + + natoms = system->n; + + for ( j = 0; j < natoms; ++j ) + { + type_j = system->my_atoms[j].type; + Delta_j = workspace->Delta_boc[j]; + start_j = Start_Index(j, bonds); + end_j = End_Index(j, bonds); + + for ( pk = start_j; pk < end_j; ++pk ) + { + // DANIEL + //pbond_jk = &( bonds->bond_list[pk] ); + pbond_jk = &( bonds->select.bond_list[pk] ); + + k = pbond_jk->nbr; + bo_jk = &( pbond_jk->bo_data ); + BOA_jk = bo_jk->BO - control->thb_cut; + + /* see if there are any 3-body interactions involving j&k + where j is the central atom. Otherwise there is no point in + trying to form a 4-body interaction out of this neighborhood */ + if ( system->my_atoms[j].orig_id < system->my_atoms[k].orig_id && + bo_jk->BO > control->thb_cut/*0*/ && Num_Entries(pk, thb_intrs) ) + { + start_k = Start_Index(k, bonds); + end_k = End_Index(k, bonds); + pj = pbond_jk->sym_index; // pj points to j on k's list + + /* do the same check as above: + are there any 3-body interactions involving k&j + where k is the central atom */ + if ( Num_Entries(pj, thb_intrs) ) + { + type_k = system->my_atoms[k].type; + Delta_k = workspace->Delta_boc[k]; + r_jk = pbond_jk->d; + + start_pk = Start_Index(pk, thb_intrs ); + end_pk = End_Index(pk, thb_intrs ); + start_pj = Start_Index(pj, thb_intrs ); + end_pj = End_Index(pj, thb_intrs ); + + exp_tor2_jk = exp( -p_tor2 * BOA_jk ); + exp_cot2_jk = exp( -p_cot2 * SQR(BOA_jk - 1.5) ); + exp_tor3_DjDk = exp( -p_tor3 * (Delta_j + Delta_k) ); + exp_tor4_DjDk = exp( p_tor4 * (Delta_j + Delta_k) ); + exp_tor34_inv = 1.0 / (1.0 + exp_tor3_DjDk + exp_tor4_DjDk); + f11_DjDk = (2.0 + exp_tor3_DjDk) * exp_tor34_inv; + + + /* pick i up from j-k interaction where j is the central atom */ + for ( pi = start_pk; pi < end_pk; ++pi ) + { + // DANIEL + // p_ijk = &( thb_intrs->three_body_list[pi] ); + p_ijk = &( thb_intrs->select.three_body_list[pi] ); + + pij = p_ijk->pthb; // pij is pointer to i on j's bond_list + + // DANIEL + pbond_ij = &( bonds->select.bond_list[pij] ); + + bo_ij = &( pbond_ij->bo_data ); + + + if ( bo_ij->BO > control->thb_cut/*0*/ ) + { + i = p_ijk->thb; + type_i = system->my_atoms[i].type; + r_ij = pbond_ij->d; + BOA_ij = bo_ij->BO - control->thb_cut; + + theta_ijk = p_ijk->theta; + sin_ijk = sin( theta_ijk ); + cos_ijk = cos( theta_ijk ); + //tan_ijk_i = 1. / tan( theta_ijk ); + if ( sin_ijk >= 0 && sin_ijk <= MIN_SINE ) + tan_ijk_i = cos_ijk / MIN_SINE; + else if ( sin_ijk <= 0 && sin_ijk >= -MIN_SINE ) + tan_ijk_i = cos_ijk / -MIN_SINE; + else tan_ijk_i = cos_ijk / sin_ijk; + + exp_tor2_ij = exp( -p_tor2 * BOA_ij ); + exp_cot2_ij = exp( -p_cot2 * SQR(BOA_ij - 1.5) ); + + + /* pick l up from j-k interaction where k is the central atom */ + for ( pl = start_pj; pl < end_pj; ++pl ) + { + //DANIEL + // More mechanical datastructure changes + p_jkl = &( thb_intrs->select.three_body_list[pl] ); + l = p_jkl->thb; + plk = p_jkl->pthb; //pointer to l on k's bond_list! + pbond_kl = &( bonds->select.bond_list[plk] ); + bo_kl = &( pbond_kl->bo_data ); + type_l = system->my_atoms[l].type; + // DANIEL + //fbh = &(system->reax_param.fbp[type_i][type_j] + // [type_k][type_l]); + //fbp = &(system->reax_param.fbp[type_i][type_j] + // [type_k][type_l].prm[0]); + fbh = &(system->reax_param.fbp[index_fbp (type_i, type_j, type_k, type_l, system->reax_param.num_atom_types)]); + fbp = &(system->reax_param.fbp[index_fbp (type_i, type_j, type_k, type_l, system->reax_param.num_atom_types)].prm[0]); + + + if ( i != l && fbh->cnt && + bo_kl->BO > control->thb_cut/*0*/ && + bo_ij->BO * bo_jk->BO * bo_kl->BO > control->thb_cut/*0*/ ) + { + ++num_frb_intrs; + r_kl = pbond_kl->d; + BOA_kl = bo_kl->BO - control->thb_cut; + + theta_jkl = p_jkl->theta; + sin_jkl = sin( theta_jkl ); + cos_jkl = cos( theta_jkl ); + //tan_jkl_i = 1. / tan( theta_jkl ); + if ( sin_jkl >= 0 && sin_jkl <= MIN_SINE ) + tan_jkl_i = cos_jkl / MIN_SINE; + else if ( sin_jkl <= 0 && sin_jkl >= -MIN_SINE ) + tan_jkl_i = cos_jkl / -MIN_SINE; + else tan_jkl_i = cos_jkl / sin_jkl; + + rvec_ScaledSum( dvec_li, 1., system->my_atoms[i].x, + -1., system->my_atoms[l].x ); + r_li = rvec_Norm( dvec_li ); + + + /* omega and its derivative */ + omega = Calculate_Omega( pbond_ij->dvec, r_ij, + pbond_jk->dvec, r_jk, + pbond_kl->dvec, r_kl, + dvec_li, r_li, + p_ijk, p_jkl, + dcos_omega_di, dcos_omega_dj, + dcos_omega_dk, dcos_omega_dl, + out_control ); + + cos_omega = cos( omega ); + cos2omega = cos( 2. * omega ); + cos3omega = cos( 3. * omega ); + /* end omega calculations */ + + /* torsion energy */ + exp_tor1 = exp( fbp->p_tor1 * + SQR(2.0 - bo_jk->BO_pi - f11_DjDk) ); + exp_tor2_kl = exp( -p_tor2 * BOA_kl ); + exp_cot2_kl = exp( -p_cot2 * SQR(BOA_kl - 1.5) ); + fn10 = (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_jk) * + (1.0 - exp_tor2_kl); + + CV = 0.5 * ( fbp->V1 * (1.0 + cos_omega) + + fbp->V2 * exp_tor1 * (1.0 - cos2omega) + + fbp->V3 * (1.0 + cos3omega) ); + + data->my_en.e_tor += e_tor = fn10 * sin_ijk * sin_jkl * CV; + + + dfn11 = (-p_tor3 * exp_tor3_DjDk + + (p_tor3 * exp_tor3_DjDk - p_tor4 * exp_tor4_DjDk) * + (2.0 + exp_tor3_DjDk) * exp_tor34_inv) * + exp_tor34_inv; + + CEtors1 = sin_ijk * sin_jkl * CV; + + CEtors2 = -fn10 * 2.0 * fbp->p_tor1 * fbp->V2 * exp_tor1 * + (2.0 - bo_jk->BO_pi - f11_DjDk) * (1.0 - SQR(cos_omega)) * + sin_ijk * sin_jkl; + CEtors3 = CEtors2 * dfn11; + + CEtors4 = CEtors1 * p_tor2 * exp_tor2_ij * + (1.0 - exp_tor2_jk) * (1.0 - exp_tor2_kl); + CEtors5 = CEtors1 * p_tor2 * + (1.0 - exp_tor2_ij) * exp_tor2_jk * (1.0 - exp_tor2_kl); + CEtors6 = CEtors1 * p_tor2 * + (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_jk) * exp_tor2_kl; + + cmn = -fn10 * CV; + CEtors7 = cmn * sin_jkl * tan_ijk_i; + CEtors8 = cmn * sin_ijk * tan_jkl_i; + + CEtors9 = fn10 * sin_ijk * sin_jkl * + (0.5 * fbp->V1 - 2.0 * fbp->V2 * exp_tor1 * cos_omega + + 1.5 * fbp->V3 * (cos2omega + 2.0 * SQR(cos_omega))); + /* end of torsion energy */ + + + /* 4-body conjugation energy */ + fn12 = exp_cot2_ij * exp_cot2_jk * exp_cot2_kl; + data->my_en.e_con += e_con = + fbp->p_cot1 * fn12 * + (1.0 + (SQR(cos_omega) - 1.0) * sin_ijk * sin_jkl); + + Cconj = -2.0 * fn12 * fbp->p_cot1 * p_cot2 * + (1.0 + (SQR(cos_omega) - 1.0) * sin_ijk * sin_jkl); + + CEconj1 = Cconj * (BOA_ij - 1.5e0); + CEconj2 = Cconj * (BOA_jk - 1.5e0); + CEconj3 = Cconj * (BOA_kl - 1.5e0); + + CEconj4 = -fbp->p_cot1 * fn12 * + (SQR(cos_omega) - 1.0) * sin_jkl * tan_ijk_i; + CEconj5 = -fbp->p_cot1 * fn12 * + (SQR(cos_omega) - 1.0) * sin_ijk * tan_jkl_i; + CEconj6 = 2.0 * fbp->p_cot1 * fn12 * + cos_omega * sin_ijk * sin_jkl; + /* end 4-body conjugation energy */ + + /* forces */ + bo_jk->Cdbopi += CEtors2; + workspace->CdDelta[j] += CEtors3; + workspace->CdDelta[k] += CEtors3; + bo_ij->Cdbo += (CEtors4 + CEconj1); + bo_jk->Cdbo += (CEtors5 + CEconj2); + bo_kl->Cdbo += (CEtors6 + CEconj3); + + if ( control->virial == 0 ) + { + /* dcos_theta_ijk */ + rvec_ScaledAdd( workspace->f[i], + CEtors7 + CEconj4, p_ijk->dcos_dk ); + rvec_ScaledAdd( workspace->f[j], + CEtors7 + CEconj4, p_ijk->dcos_dj ); + rvec_ScaledAdd( workspace->f[k], + CEtors7 + CEconj4, p_ijk->dcos_di ); + + /* dcos_theta_jkl */ + rvec_ScaledAdd( workspace->f[j], + CEtors8 + CEconj5, p_jkl->dcos_di ); + rvec_ScaledAdd( workspace->f[k], + CEtors8 + CEconj5, p_jkl->dcos_dj ); + rvec_ScaledAdd( workspace->f[l], + CEtors8 + CEconj5, p_jkl->dcos_dk ); + + /* dcos_omega */ + rvec_ScaledAdd( workspace->f[i], + CEtors9 + CEconj6, dcos_omega_di ); + rvec_ScaledAdd( workspace->f[j], + CEtors9 + CEconj6, dcos_omega_dj ); + rvec_ScaledAdd( workspace->f[k], + CEtors9 + CEconj6, dcos_omega_dk ); + rvec_ScaledAdd( workspace->f[l], + CEtors9 + CEconj6, dcos_omega_dl ); + } + else + { + ivec_Sum(rel_box_jl, pbond_jk->rel_box, pbond_kl->rel_box); + + /* dcos_theta_ijk */ + rvec_Scale( force, CEtors7 + CEconj4, p_ijk->dcos_dk ); + rvec_Add( workspace->f[i], force ); + rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); + rvec_Add( data->my_ext_press, ext_press ); + + rvec_ScaledAdd( workspace->f[j], + CEtors7 + CEconj4, p_ijk->dcos_dj ); + + rvec_Scale( force, CEtors7 + CEconj4, p_ijk->dcos_di ); + rvec_Add( workspace->f[k], force ); + rvec_iMultiply( ext_press, pbond_jk->rel_box, force ); + rvec_Add( data->my_ext_press, ext_press ); + + + /* dcos_theta_jkl */ + rvec_ScaledAdd( workspace->f[j], + CEtors8 + CEconj5, p_jkl->dcos_di ); + + rvec_Scale( force, CEtors8 + CEconj5, p_jkl->dcos_dj ); + rvec_Add( workspace->f[k], force ); + rvec_iMultiply( ext_press, pbond_jk->rel_box, force ); + rvec_Add( data->my_ext_press, ext_press ); + + rvec_Scale( force, CEtors8 + CEconj5, p_jkl->dcos_dk ); + rvec_Add( workspace->f[l], force ); + rvec_iMultiply( ext_press, rel_box_jl, force ); + rvec_Add( data->my_ext_press, ext_press ); + + + /* dcos_omega */ + rvec_Scale( force, CEtors9 + CEconj6, dcos_omega_di ); + rvec_Add( workspace->f[i], force ); + rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); + rvec_Add( data->my_ext_press, ext_press ); + + rvec_ScaledAdd( workspace->f[j], + CEtors9 + CEconj6, dcos_omega_dj ); + + rvec_Scale( force, CEtors9 + CEconj6, dcos_omega_dk ); + rvec_Add( workspace->f[k], force ); + rvec_iMultiply( ext_press, pbond_jk->rel_box, force ); + rvec_Add( data->my_ext_press, ext_press ); + + rvec_Scale( force, CEtors9 + CEconj6, dcos_omega_dl ); + rvec_Add( workspace->f[l], force ); + rvec_iMultiply( ext_press, rel_box_jl, force ); + rvec_Add( data->my_ext_press, ext_press ); + } + +#ifdef TEST_ENERGY + /* fprintf( out_control->etor, + "%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f\n", + r_ij, r_jk, r_kl, cos_ijk, cos_jkl, sin_ijk, sin_jkl ); + fprintf( out_control->etor, "%12.8f\n", dfn11 ); */ + /* fprintf( out_control->etor, + "%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f\n", + CEtors2, CEtors3, CEtors4, CEtors5, CEtors6, + CEtors7, CEtors8, CEtors9 ); */ + /* fprintf( out_control->etor, + "%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f\n", + htra, htrb, htrc, hthd, hthe, hnra, hnrc, hnhd, hnhe ); */ + /* fprintf( out_control->etor, + "%12.8f%12.8f%12.8f%12.8f%12.8f%12.8f\n", + CEconj1, CEconj2, CEconj3, CEconj4, CEconj5, CEconj6 ); */ + + /* fprintf( out_control->etor, "%12.6f%12.6f%12.6f%12.6f\n", + fbp->V1, fbp->V2, fbp->V3, fbp->p_tor1 );*/ + + fprintf(out_control->etor, + //"%6d%6d%6d%6d%24.15e%24.15e%24.15e%24.15e\n", + "%6d%6d%6d%6d%12.4f%12.4f%12.4f%12.4f\n", + system->my_atoms[i].orig_id, system->my_atoms[j].orig_id, + system->my_atoms[k].orig_id, system->my_atoms[l].orig_id, + RAD2DEG(omega), BOA_jk, e_tor, data->my_en.e_tor ); + + fprintf(out_control->econ, + //"%6d%6d%6d%6d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n", + "%6d%6d%6d%6d%12.4f%12.4f%12.4f%12.4f%12.4f%12.4f\n", + system->my_atoms[i].orig_id, system->my_atoms[j].orig_id, + system->my_atoms[k].orig_id, system->my_atoms[l].orig_id, + RAD2DEG(omega), BOA_ij, BOA_jk, BOA_kl, + e_con, data->my_en.e_con ); +#endif + +#ifdef TEST_FORCES + /* Torsion Forces */ + Add_dBOpinpi2( system, lists, j, pk, CEtors2, 0.0, + workspace->f_tor, workspace->f_tor ); + Add_dDelta( system, lists, j, CEtors3, workspace->f_tor ); + Add_dDelta( system, lists, k, CEtors3, workspace->f_tor ); + Add_dBO( system, lists, j, pij, CEtors4, workspace->f_tor ); + Add_dBO( system, lists, j, pk, CEtors5, workspace->f_tor ); + Add_dBO( system, lists, k, plk, CEtors6, workspace->f_tor ); + + rvec_ScaledAdd( workspace->f_tor[i], + CEtors7, p_ijk->dcos_dk ); + rvec_ScaledAdd( workspace->f_tor[j], + CEtors7, p_ijk->dcos_dj ); + + rvec_ScaledAdd( workspace->f_tor[k], + CEtors7, p_ijk->dcos_di ); + + rvec_ScaledAdd( workspace->f_tor[j], + CEtors8, p_jkl->dcos_di ); + rvec_ScaledAdd( workspace->f_tor[k], + CEtors8, p_jkl->dcos_dj ); + rvec_ScaledAdd( workspace->f_tor[l], + CEtors8, p_jkl->dcos_dk ); + + rvec_ScaledAdd( workspace->f_tor[i], + CEtors9, dcos_omega_di ); + rvec_ScaledAdd( workspace->f_tor[j], + CEtors9, dcos_omega_dj ); + rvec_ScaledAdd( workspace->f_tor[k], + CEtors9, dcos_omega_dk ); + rvec_ScaledAdd( workspace->f_tor[l], + CEtors9, dcos_omega_dl ); + + /* Conjugation Forces */ + Add_dBO( system, lists, j, pij, CEconj1, workspace->f_con ); + Add_dBO( system, lists, j, pk, CEconj2, workspace->f_con ); + Add_dBO( system, lists, k, plk, CEconj3, workspace->f_con ); + + rvec_ScaledAdd( workspace->f_con[i], + CEconj4, p_ijk->dcos_dk ); + rvec_ScaledAdd( workspace->f_con[j], + CEconj4, p_ijk->dcos_dj ); + rvec_ScaledAdd( workspace->f_con[k], + CEconj4, p_ijk->dcos_di ); + + rvec_ScaledAdd( workspace->f_con[j], + CEconj5, p_jkl->dcos_di ); + rvec_ScaledAdd( workspace->f_con[k], + CEconj5, p_jkl->dcos_dj ); + rvec_ScaledAdd( workspace->f_con[l], + CEconj5, p_jkl->dcos_dk ); + + rvec_ScaledAdd( workspace->f_con[i], + CEconj6, dcos_omega_di ); + rvec_ScaledAdd( workspace->f_con[j], + CEconj6, dcos_omega_dj ); + rvec_ScaledAdd( workspace->f_con[k], + CEconj6, dcos_omega_dk ); + rvec_ScaledAdd( workspace->f_con[l], + CEconj6, dcos_omega_dl ); +#endif + } // pl check ends + } // pl loop ends + } // pi check ends + } // pi loop ends + } // k-j neighbor check ends + } // j<k && j-k neighbor check ends + } // pk loop ends + } // j loop + +#if defined(DEBUG) + fprintf( stderr, "Number of torsion angles: %d\n", num_frb_intrs ); + fprintf( stderr, "Torsion Energy: %g\t Conjugation Energy: %g\n", + data->my_en.e_tor, data->my_en.e_con ); + + fprintf( stderr, "4body: ext_press (%12.6f %12.6f %12.6f)\n", + data->ext_press[0], data->ext_press[1], data->ext_press[2] ); +#endif +} + + + + + + + + +void Old_Torsion_Angles( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists, output_controls *out_control ) { diff --git a/PG-PuReMD/src/valence_angles.c b/PG-PuReMD/src/valence_angles.c index 0fe67007eda9fd078dcd4187b7cc11d127be60e5..905fbbf82733dab2d9d06b5cc1fab2dbcbde584f 100644 --- a/PG-PuReMD/src/valence_angles.c +++ b/PG-PuReMD/src/valence_angles.c @@ -20,7 +20,6 @@ ----------------------------------------------------------------------*/ #include "reax_types.h" -#include "index_utils.h" #if defined(PURE_REAX) #include "valence_angles.h" #include "bond_orders.h" @@ -42,7 +41,7 @@ void Calculate_Theta( rvec dvec_ji, real d_ji, rvec dvec_jk, real d_jk, if ( *cos_theta > 1. ) *cos_theta = 1.0; if ( *cos_theta < -1. ) *cos_theta = -1.0; - (*theta) = ACOS( *cos_theta ); + (*theta) = acos( *cos_theta ); } @@ -56,7 +55,7 @@ void Calculate_dCos_Theta( rvec dvec_ji, real d_ji, rvec dvec_jk, real d_jk, real sqr_d_ji = SQR(d_ji); real sqr_d_jk = SQR(d_jk); real inv_dists = 1.0 / (d_ji * d_jk); - real inv_dists3 = POW( inv_dists, 3 ); + real inv_dists3 = pow( inv_dists, 3 ); real dot_dvecs = Dot( dvec_ji, dvec_jk, 3 ); real Cdot_inv3 = dot_dvecs * inv_dists3; @@ -138,7 +137,7 @@ void Valence_Angles( reax_system *system, control_params *control, temp = SQR( bo_jt->BO ); temp *= temp; temp *= temp; - prod_SBO *= EXP( -temp ); + prod_SBO *= exp( -temp ); } /* modifications to match Adri's code - 09/01/09 */ @@ -160,18 +159,18 @@ void Valence_Angles( reax_system *system, control_params *control, SBO2 = 0, CSBO2 = 0; else if ( SBO > 0 && SBO <= 1 ) { - SBO2 = POW( SBO, p_val9 ); - CSBO2 = p_val9 * POW( SBO, p_val9 - 1 ); + SBO2 = pow( SBO, p_val9 ); + CSBO2 = p_val9 * pow( SBO, p_val9 - 1 ); } else if ( SBO > 1 && SBO < 2 ) { - SBO2 = 2 - POW( 2 - SBO, p_val9 ); - CSBO2 = p_val9 * POW( 2 - SBO, p_val9 - 1 ); + SBO2 = 2 - pow( 2 - SBO, p_val9 ); + CSBO2 = p_val9 * pow( 2 - SBO, p_val9 - 1 ); } else SBO2 = 2, CSBO2 = 0; - expval6 = EXP( p_val6 * workspace->Delta_boc[j] ); + expval6 = exp( p_val6 * workspace->Delta_boc[j] ); for ( pi = start_j; pi < end_j; ++pi ) { @@ -181,16 +180,9 @@ void Valence_Angles( reax_system *system, control_params *control, BOA_ij = bo_ij->BO - control->thb_cut; - //TODO REMOVE THIS - //TODO REMOVE THIS - //TODO REMOVE THIS - //TODO REMOVE THIS - //TODO REMOVE THIS - //TODO REMOVE THIS if ( BOA_ij/*bo_ij->BO*/ > 0.0 && ( j < system->n || pbond_ij->nbr < system->n ) ) { - //if( BOA_ij/*bo_ij->BO*/ > 0.0 ) { i = pbond_ij->nbr; r_ij = pbond_ij->d; type_i = system->my_atoms[i].type; @@ -235,10 +227,6 @@ void Valence_Angles( reax_system *system, control_params *control, type_k = system->my_atoms[k].type; p_ijk = &( thb_intrs->select.three_body_list[num_thb_intrs] ); - //CHANGE ORIGINAL - if ((BOA_jk <= 0) || ((j >= system->n) && (k >= system->n))) continue; - //CHANGE ORIGINAL - Calculate_Theta( pbond_ij->dvec, pbond_ij->d, pbond_jk->dvec, pbond_jk->d, &theta, &cos_theta ); @@ -251,7 +239,7 @@ void Valence_Angles( reax_system *system, control_params *control, p_ijk->pthb = pk; p_ijk->theta = theta; - sin_theta = SIN( theta ); + sin_theta = sin( theta ); if ( sin_theta < 1.0e-5 ) sin_theta = 1.0e-5; @@ -262,9 +250,17 @@ void Valence_Angles( reax_system *system, control_params *control, (bo_ij->BO * bo_jk->BO > SQR(control->thb_cut)/*0*/) ) { r_jk = pbond_jk->d; - //SUDHIR + + +// (type_i * system->reax_param.num_atom_types * system->reax_param.num_atom_types ) + (type_j * system->reax_param.num_atom_types ) + type_k; + + + + //thbh = &( system->reax_param.thbp[ type_i ][ type_j ][ type_k ] ); - thbh = &( system->reax_param.thbp[ index_thbp (type_i, type_j, type_k, system->reax_param.num_atom_types) ] ); + thbh = &( system->reax_param.thbp[ (type_i * system->reax_param.num_atom_types * system->reax_param.num_atom_types ) + (type_j * system->reax_param.num_atom_types ) + type_k + ] ); + /* if( system->my_atoms[i].orig_id < system->my_atoms[k].orig_id ) fprintf( fval, "%6d %6d %6d %7.3f %7.3f %7.3f\n", @@ -295,15 +291,15 @@ void Valence_Angles( reax_system *system, control_params *control, p_val7 = thbp->p_val7; theta_00 = thbp->theta_00; - exp3ij = EXP( -p_val3 * POW( BOA_ij, p_val4 ) ); + exp3ij = exp( -p_val3 * pow( BOA_ij, p_val4 ) ); f7_ij = 1.0 - exp3ij; - Cf7ij = p_val3 * p_val4 * POW( BOA_ij, p_val4 - 1.0 ) * exp3ij; + Cf7ij = p_val3 * p_val4 * pow( BOA_ij, p_val4 - 1.0 ) * exp3ij; - exp3jk = EXP( -p_val3 * POW( BOA_jk, p_val4 ) ); + exp3jk = exp( -p_val3 * pow( BOA_jk, p_val4 ) ); f7_jk = 1.0 - exp3jk; - Cf7jk = p_val3 * p_val4 * POW( BOA_jk, p_val4 - 1.0 ) * exp3jk; + Cf7jk = p_val3 * p_val4 * pow( BOA_jk, p_val4 - 1.0 ) * exp3jk; - expval7 = EXP( -p_val7 * workspace->Delta_boc[j] ); + expval7 = exp( -p_val7 * workspace->Delta_boc[j] ); trm8 = 1.0 + expval6 + expval7; f8_Dj = p_val5 - ( (p_val5 - 1.0) * (2.0 + expval6) / trm8 ); Cf8j = ( (1.0 - p_val5) / SQR(trm8) ) * @@ -311,10 +307,10 @@ void Valence_Angles( reax_system *system, control_params *control, (2.0 + expval6) * ( p_val6 * expval6 - p_val7 * expval7 ) ); theta_0 = 180.0 - theta_00 * (1.0 - - EXP(-p_val10 * (2.0 - SBO2))); + exp(-p_val10 * (2.0 - SBO2))); theta_0 = DEG2RAD( theta_0 ); - expval2theta = EXP( -p_val2 * SQR(theta_0 - theta) ); + expval2theta = exp( -p_val2 * SQR(theta_0 - theta) ); if ( p_val1 >= 0 ) expval12theta = p_val1 * (1.0 - expval2theta); else // To avoid linear Me-H-Me angles (6/6/06) @@ -345,10 +341,10 @@ void Valence_Angles( reax_system *system, control_params *control, p_pen3 = system->reax_param.gp.l[20]; p_pen4 = system->reax_param.gp.l[21]; - exp_pen2ij = EXP( -p_pen2 * SQR( BOA_ij - 2.0 ) ); - exp_pen2jk = EXP( -p_pen2 * SQR( BOA_jk - 2.0 ) ); - exp_pen3 = EXP( -p_pen3 * workspace->Delta[j] ); - exp_pen4 = EXP( p_pen4 * workspace->Delta[j] ); + exp_pen2ij = exp( -p_pen2 * SQR( BOA_ij - 2.0 ) ); + exp_pen2jk = exp( -p_pen2 * SQR( BOA_jk - 2.0 ) ); + exp_pen3 = exp( -p_pen3 * workspace->Delta[j] ); + exp_pen4 = exp( p_pen4 * workspace->Delta[j] ); trm_pen34 = 1.0 + exp_pen3 + exp_pen4; f9_Dj = ( 2.0 + exp_pen3 ) / trm_pen34; Cf9j = ( -p_pen3 * exp_pen3 * trm_pen34 - @@ -372,13 +368,13 @@ void Valence_Angles( reax_system *system, control_params *control, p_coa3 = system->reax_param.gp.l[38]; p_coa4 = system->reax_param.gp.l[30]; - exp_coa2 = EXP( p_coa2 * workspace->Delta_boc[j] ); + exp_coa2 = exp( p_coa2 * workspace->Delta_boc[j] ); data->my_en.e_coa += e_coa = p_coa1 / (1. + exp_coa2) * - EXP( -p_coa3 * SQR(workspace->total_bond_order[i] - BOA_ij) ) * - EXP( -p_coa3 * SQR(workspace->total_bond_order[k] - BOA_jk) ) * - EXP( -p_coa4 * SQR(BOA_ij - 1.5) ) * - EXP( -p_coa4 * SQR(BOA_jk - 1.5) ); + exp( -p_coa3 * SQR(workspace->total_bond_order[i] - BOA_ij) ) * + exp( -p_coa3 * SQR(workspace->total_bond_order[k] - BOA_jk) ) * + exp( -p_coa4 * SQR(BOA_ij - 1.5) ) * + exp( -p_coa4 * SQR(BOA_jk - 1.5) ); CEcoa1 = -2 * p_coa4 * (BOA_ij - 1.5) * e_coa; CEcoa2 = -2 * p_coa4 * (BOA_jk - 1.5) * e_coa; @@ -405,7 +401,7 @@ void Valence_Angles( reax_system *system, control_params *control, pBOjt7 = temp * temp * temp_bo_jt; // fprintf( out_control->eval, "%6d%12.8f\n", - // workspace->reverse_map[bonds->select.bond_list[t].nbr], + // workspace->reverse_map[bonds->bond_list[t].nbr], // (CEval6 * pBOjt7) ); bo_jt->Cdbo += (CEval6 * pBOjt7); @@ -502,7 +498,7 @@ void Valence_Angles( reax_system *system, control_params *control, for ( t = start_j; t < end_j; ++t ) { - pbond_jt = &( bonds->select.bond_list[t] ); + pbond_jt = &( bonds->bond_list[t] ); bo_jt = &(pbond_jt->bo_data); temp_bo_jt = bo_jt->BO; temp = CUBE( temp_bo_jt ); diff --git a/PuReMD/src/forces.c b/PuReMD/src/forces.c index 923bf93dfedda686e31be159fa0271fd9972a1e3..888c6acae9db372721326582558605acc73c464e 100644 --- a/PuReMD/src/forces.c +++ b/PuReMD/src/forces.c @@ -246,6 +246,33 @@ void Validate_Lists( reax_system *system, storage *workspace, reax_list **lists, MPI_Abort( comm, INSUFFICIENT_MEMORY ); } } +/* + if ( Hindex > -1 ) + { + system->my_atoms[i].num_hbonds = + MAX( Num_Entries(Hindex, hbonds) * SAFER_ZONE, MIN_HBONDS ); +*/ + //if( Num_Entries(i, hbonds) >= + //(Start_Index(i+1,hbonds)-Start_Index(i,hbonds))*0.90/*DANGER_ZONE*/){ + // workspace->realloc.hbonds = 1; +/* + //TODO + if ( Hindex < system->n - 1 ) + comp = Start_Index(Hindex + 1, hbonds); + else comp = hbonds->num_intrs; + + if ( End_Index(Hindex, hbonds) > comp ) + { + fprintf(stderr, "step%d-hbondchk failed: H=%d end(H)=%d str(H+1)=%d\n", + step, Hindex, End_Index(Hindex, hbonds), comp ); + MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY ); + } + } + +*/ + + + } } } diff --git a/PuReMD/src/parallelreax.c b/PuReMD/src/parallelreax.c index 7c42f79b9d521ebe7075339b9a81da8d20e7955f..de9eade3f86f23f5ce49f29f4d7641e6fc4f28ae 100644 --- a/PuReMD/src/parallelreax.c +++ b/PuReMD/src/parallelreax.c @@ -138,6 +138,8 @@ int main( int argc, char* argv[] ) smalloc( sizeof(control_params), "control", MPI_COMM_WORLD ); data = (simulation_data *) smalloc( sizeof(simulation_data), "data", MPI_COMM_WORLD ); + + printf("sizeof(storage): %d \n", sizeof(storage)); workspace = (storage *) smalloc( sizeof(storage), "workspace", MPI_COMM_WORLD ); lists = (reax_list **) diff --git a/test_harness/README b/test_harness/README index f0198a0f1675dfc82b24c2233b980bd46edc40de..841e39b437a7a50122615cec10e94c94b8b04fdc 100755 --- a/test_harness/README +++ b/test_harness/README @@ -1,17 +1,29 @@ ###################################################################################################################################################### # -# To run test, open up sim_test_master.sh. This script compiles all executables and qsubs all parts of the test. -# Decide which tests you want to run, comment out the rest. Unless you don't want to recompile, make sure not to comment out ./compile_all_executables. -# Be aware that running sim_test_master in its entirety requests 32 hours of compute time on Laconia. +# This folder contains a test harness to be used for verifying if later versions of the code are correct. It is designed to run on +# Michigan State University's Laconia cluster. Running it on other systems will require modification. # -# To begin test: +# The test checks all three existing versions of the code (the original MPI-only PuReMD, PG-PuReMD, and PG-PuReMD with GPU functionality removed) +# against each other. Each of these versions is put through 11 different benchmark systems, which are run for 100 cycles. # -# ./sim_test_master +# To easily start the test, one should use the following syntax while connected to dev-intel16-k80: # -# When all jobs have completed, run sim_test_disp_output.sh: +# ./sim_test_master.sh +# +# This script compiles all versions of the test and then submits several jobs to the queue. +# Be aware that this runs all benchmarks. It may be awhile before all these finish. For shorter tests, it might be better to comment out +# portions of sim_test_master like the large 300000-atom systems. +# +# Once all jobs have finished, if one desires to view all output in an ordered fashion to compare, the following script is available: # # ./sim_test_disp_output.sh # -# This will output the tails of all .pot files generated by the simulation, for easy comparison. +# If one wants to dump this output to a file: +# +# ./sim_test_disp_output > someoutputfile.txt +# +# If one wants to compare output to an older version, the file sim_output_082516.txt is available. +# +# For any questions please email Daniel Kortemeyer at korteme1@msu.edu # ####################################################################################################################################################### diff --git a/test_harness/bilayer_340800_mpi_not_gpu_control b/test_harness/bilayer_340800_mpi_not_gpu_control new file mode 100644 index 0000000000000000000000000000000000000000..d1646173b69744d7d8076474a39a23844d60b2e7 --- /dev/null +++ b/test_harness/bilayer_340800_mpi_not_gpu_control @@ -0,0 +1,44 @@ +simulation_name bilayer.340800_mpi_not_gpu ! output files will carry this name + their specific ext. +ensemble_type 1 ! 0: NVE 1: Berendsen NVT 2: Nose-Hoover NVT(under testing) 3: semi-isotropic NPT 4: isotropic NPT 5: anisotropic NPT (under development) +nsteps 100 ! number of simulation steps +dt 0.10 ! time step in fs +proc_by_dim 2 2 2 ! distribution of processors by dimensions +geo_format 0 ! 0: custom 1: pdb (only if natoms < 100000) 2: ASCII restart 3: binary restart + +tabulate_long_range 0 ! number of sampling points for cubic spline interpolation, 0 no interpolation +energy_update_freq 10 +remove_CoM_vel 500 ! remove the transrot vel of CoM every 'this many' steps +reposition_atoms 1 ! 1:center of mass to center of box + +reneighbor 1 +vlist_buffer 0 +nbrhood_cutoff 4.5 ! bond cutoff in A +hbond_cutoff 7.5 ! hbond cutoff in A +thb_cutoff 0.001 ! cutoff value for three body interactions + +qeq_freq 1 ! frequency to update charges with QEq +q_err 1e-6 ! norm of the relative residual in QEq solve + +temp_init 0.01 ! initial temperature of the system +temp_final 300.0 ! final temperature of the system +t_mass 500.0 ! 0.16666 for nhNVT ! 500.0 for bNVT, iNPT, sNPT ! in fs, thermal inertia +t_rate 5.0 ! in K +t_freq 1.0 ! in ps +t_mode 2 ! 2: constant slope + +pressure 0.000101325 0.000101325 0.000101325 ! desired pressure of the simulated system in GPa, 1atm = 0.000101325 GPa +p_mass 10000.00 10000.00 10000.00 ! in fs, pressure inertia parameter + +write_freq 0 ! write trajectory after so many steps +traj_method 1 ! 0: simple parallel I/O, 1: MPI I/O +traj_title micelle ! (no white spaces) +atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file +atom_forces 0 ! 0: basic atom format, 1: print force on each atom in the trajectory file +atom_velocities 1 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file +bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file +angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file + +restart_format 1 ! 0: restarts in ASCII 1: restarts in binary +restart_freq 10000 ! 0: do not output any restart files. >0: output a restart file at every 'this many' steps + +bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs diff --git a/test_harness/bilayer_long_mpi_not_gpu.sh b/test_harness/bilayer_long_mpi_not_gpu.sh new file mode 100755 index 0000000000000000000000000000000000000000..c88841ca558c45dd98d967b9e714fe75d00192d0 --- /dev/null +++ b/test_harness/bilayer_long_mpi_not_gpu.sh @@ -0,0 +1,20 @@ +#!/bin/bash -login + +#PBS -l nodes=1:ppn=8:gpus=8 +#PBS -l mem=120gb +#PBS -l walltime=4:00:00 +#PBS -l feature=gpgpu:intel16 + +cd "${PBS_O_WORKDIR}" +#cd /mnt/home/korteme1/PuReMD_new/test_harness + +################ +# MPI-GPU Runs # +################ + +module purge +module load GNU/4.8.2 OpenMPI/1.6.5 CUDA/6.0 + +mpirun -np 8 ../PG-PuReMD/bin/pg-puremd-not-gpu ../data/benchmarks/bilayer/bilayer_340800.geo ../data/benchmarks/bilayer/ffield-bio bilayer_340800_mpi_not_gpu_control + + diff --git a/test_harness/compile_all_executables.sh b/test_harness/compile_all_executables.sh index e9b85bca37c7b51913423d32e5354c7d018204e4..f443665ecb51f32b6765a79c085b90aa2d9b6f3a 100755 --- a/test_harness/compile_all_executables.sh +++ b/test_harness/compile_all_executables.sh @@ -15,10 +15,33 @@ module load autoconf/2.69 automake/1.15 cd .. ./configure --enable-openmp=no --enable-mpi=yes make clean && make -#make cd test_harness -# Run, after each run we must rename the output files, or another run will overwrite it +module purge +module load GNU/4.8.2 OpenMPI/1.6.5 CUDA/6.0 +module load autoconf/2.69 automake/1.15 + +####################### +# MPI_Not_GPU Compile # +####################### + +# Requires an added layer of complexity when dealing with both the mpi_gpu and mpi_not_gpu versions, because they compile to the same name. +# Need to rename the mpi_not_gpu executable before compiling for mpi_gpu + +# Make clean both versions +cd .. +./configure --enable-openmp=no --enable-mpi-gpu=yes +make clean +./configure --enable-openmp=no --enable-mpi-not-gpu=yes +mv PG-PuReMD/bin/pg-puremd-not-gpu PG-PuReMD/bin/pg-puremd +make clean +cd test_harness + +# Compile mpi_not_gpu then rename executable +cd .. +make +mv PG-PuReMD/bin/pg-puremd PG-PuReMD/bin/pg-puremd-not-gpu +cd test_harness ################### # MPI-GPU Compile # @@ -31,11 +54,10 @@ module load autoconf/2.69 automake/1.15 # Compile for MPI-GPU cd .. ./configure --enable-openmp=no --enable-mpi-gpu=yes -make clean && make -#make +make cd test_harness - +echo "Attempted to compile all executables, check if any errors before this point" diff --git a/test_harness/med_jobs.sh b/test_harness/med_jobs.sh index 760c1e4dc47306c7e9d1688db7fc543cebeb2f02..dd1485cd078a4df276995fa253b700ca31bb0c4f 100755 --- a/test_harness/med_jobs.sh +++ b/test_harness/med_jobs.sh @@ -94,37 +94,44 @@ mv petn.out petn_mpi_gpu.out mv petn.pot petn_mpi_gpu.pot mv petn.trj petn_mpi_gpu.trj -# Skipping MPI-NOT-GPU for now, not working yet -if false -then - #################### -# MPI-NOT-GPU Runs # +# MPI_Not_GPU Runs # #################### -# Compile for MPI-NOT-GPU -cd .. -./configure --enable-openmp=no --enable-mpi-not-gpu=yes -make clean && make -cd tools +mpirun -np 2 ../PG-PuReMD/bin/pg-puremd-not-gpu ../data/benchmarks/water/water_78480.geo ../data/benchmarks/water/ffield.water water_78480_control + +mv water.78480.log water.78480_mpi_not_gpu.log +mv water.78480.out water.78480_mpi_not_gpu.out +mv water.78480.pot water.78480_mpi_not_gpu.pot +mv water.78480.trj water.78480_mpi_not_gpu.trj + +mpirun -np 2 ../PG-PuReMD/bin/pg-puremd-not-gpu ../data/benchmarks/silica/silica_72000.geo ../data/benchmarks/silica/ffield-bio silica_72000_control -mpirun -np 2 ../PG-PuReMD/src/pg-puremd water_6540.pdb ffield.water water_6540_control +mv silica.72000.log silica.72000_mpi_not_gpu.log +mv silica.72000.out silica.72000_mpi_not_gpu.out +mv silica.72000.pot silica.72000_mpi_not_gpu.pot +mv silica.72000.trj silica.72000_mpi_not_gpu.trj -mv water.6540.log water.6540_mpi_not_gpu.log -mv water.6540.out water.6540_mpi_not_gpu.out -mv water.6540.pot water.6540_mpi_not_gpu.pot -mv water.6540.trj water.6540_mpi_not_gpu.trj +mpirun -np 2 ../PG-PuReMD/bin/pg-puremd-not-gpu ../data/benchmarks/bilayer/bilayer_56800.pdb ../data/benchmarks/bilayer/ffield-bio bilayer_56800_control -mpirun -np 2 ../PG-PuReMD/src/pg-puremd silica_6000.pdb ffield.water silica_6000_control +mv bilayer.56800.log bilayer.56800_mpi_not_gpu.log +mv bilayer.56800.out bilayer.56800_mpi_not_gpu.out +mv bilayer.56800.pot bilayer.56800_mpi_not_gpu.pot +mv bilayer.56800.trj bilayer.56800_mpi_not_gpu.trj -mv silica.6000.log silica.6000_mpi_not_gpu.log -mv silica.6000.out silica.6000_mpi_not_gpu.out -mv silica.6000.pot silica.6000_mpi_not_gpu.pot -mv silica.6000.trj silica.6000_mpi_not_gpu.trj +mpirun -np 2 ../PG-PuReMD/bin/pg-puremd-not-gpu ../data/benchmarks/dna/dna_19733.pdb ../data/benchmarks/dna/ffield-dna dna_control +mv dna.log dna_mpi_not_gpu.log +mv dna.out dna_mpi_not_gpu.out +mv dna.pot dna_mpi_not_gpu.pot +mv dna.trj dna_mpi_not_gpu.trj -fi +mpirun -np 2 ../PG-PuReMD/bin/pg-puremd-not-gpu ../data/benchmarks/petn/petn_48256.pdb ../data/benchmarks/petn/ffield.petn petn_control +mv petn.log petn_mpi_not_gpu.log +mv petn.out petn_mpi_not_gpu.out +mv petn.pot petn_mpi_not_gpu.pot +mv petn.trj petn_mpi_not_gpu.trj diff --git a/test_harness/short_jobs.sh b/test_harness/short_jobs.sh index 152666d7e174a4411156bdbef33b7af8680f3068..41789ca7fb350739e66a8bfd7a6c99ebf4152f2f 100755 --- a/test_harness/short_jobs.sh +++ b/test_harness/short_jobs.sh @@ -67,36 +67,31 @@ mv zno.6912.out zno.6912_mpi_gpu.out mv zno.6912.pot zno.6912_mpi_gpu.pot mv zno.6912.trj zno.6912_mpi_gpu.trj -# Skipping MPI-NOT-GPU for now, not working yet -if false -then - #################### # MPI-NOT-GPU Runs # #################### -# Compile for MPI-NOT-GPU -cd .. -./configure --enable-openmp=no --enable-mpi-not-gpu=yes -make clean && make -cd tools - -mpirun -np 2 ../PG-PuReMD/src/pg-puremd water_6540.pdb ffield.water water_6540_control +mpirun -np 2 ../PG-PuReMD/bin/pg-puremd-not-gpu ../data/benchmarks/water/water_6540.pdb ../data/benchmarks/water/ffield.water water_6540_control mv water.6540.log water.6540_mpi_not_gpu.log mv water.6540.out water.6540_mpi_not_gpu.out mv water.6540.pot water.6540_mpi_not_gpu.pot mv water.6540.trj water.6540_mpi_not_gpu.trj -mpirun -np 2 ../PG-PuReMD/src/pg-puremd silica_6000.pdb ffield.water silica_6000_control +mpirun -np 2 ../PG-PuReMD/bin/pg-puremd-not-gpu ../data/benchmarks/silica/silica_6000.pdb ../data/benchmarks/silica/ffield-bio silica_6000_control mv silica.6000.log silica.6000_mpi_not_gpu.log mv silica.6000.out silica.6000_mpi_not_gpu.out mv silica.6000.pot silica.6000_mpi_not_gpu.pot mv silica.6000.trj silica.6000_mpi_not_gpu.trj +mpirun -np 2 ../PG-PuReMD/bin/pg-puremd-not-gpu ../data/benchmarks/metal/zno_6912.pdb ../data/benchmarks/metal/ffield.zno zno_6912_control + +mv zno.6912.log zno.6912_mpi_not_gpu.log +mv zno.6912.out zno.6912_mpi_not_gpu.out +mv zno.6912.pot zno.6912_mpi_not_gpu.pot +mv zno.6912.trj zno.6912_mpi_not_gpu.trj -fi diff --git a/test_harness/silica_300000_mpi_not_gpu_control b/test_harness/silica_300000_mpi_not_gpu_control new file mode 100644 index 0000000000000000000000000000000000000000..c03ae7cde56f8d0901f89a09accf634fae4add7a --- /dev/null +++ b/test_harness/silica_300000_mpi_not_gpu_control @@ -0,0 +1,44 @@ +simulation_name silica.300000_mpi_not_gpu ! output files will carry this name + their specific ext. +ensemble_type 1 ! 0: NVE 1: Berendsen NVT 2: Nose-Hoover NVT(under testing) 3: semi-isotropic NPT 4: isotropic NPT 5: anisotropic NPT (under development) +nsteps 100 ! number of simulation steps +dt 0.10 ! time step in fs +proc_by_dim 2 2 2 ! distribution of processors by dimensions +geo_format 0 ! 0: custom 1: pdb (only if natoms < 100000) 2: ASCII restart 3: binary restart + +tabulate_long_range 0 ! number of sampling points for cubic spline interpolation, 0 no interpolation +energy_update_freq 10 +remove_CoM_vel 500 ! remove the transrot vel of CoM every 'this many' steps +reposition_atoms 1 ! 1:center of mass to center of box + +reneighbor 1 +vlist_buffer 0 +nbrhood_cutoff 4.5 ! bond cutoff in A +hbond_cutoff 0 ! hbond cutoff in A +thb_cutoff 0.001 ! cutoff value for three body interactions + +qeq_freq 1 ! frequency to update charges with QEq +q_err 1e-6 ! norm of the relative residual in QEq solve + +temp_init 0.01 ! initial temperature of the system +temp_final 300.0 ! final temperature of the system +t_mass 500.0 ! 0.16666 for nhNVT ! 500.0 for bNVT, iNPT, sNPT ! in fs, thermal inertia +t_rate 5.0 ! in K +t_freq 1.0 ! in ps +t_mode 2 ! 2: constant slope + +pressure 0.000101325 0.000101325 0.000101325 ! desired pressure of the simulated system in GPa, 1atm = 0.000101325 GPa +p_mass 10000.00 10000.00 10000.00 ! in fs, pressure inertia parameter + +write_freq 0 ! write trajectory after so many steps +traj_method 1 ! 0: simple parallel I/O, 1: MPI I/O +traj_title micelle ! (no white spaces) +atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file +atom_forces 0 ! 0: basic atom format, 1: print force on each atom in the trajectory file +atom_velocities 1 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file +bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file +angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file + +restart_format 1 ! 0: restarts in ASCII 1: restarts in binary +restart_freq 10000 ! 0: do not output any restart files. >0: output a restart file at every 'this many' steps + +bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs diff --git a/test_harness/silica_long_mpi_not_gpu.sh b/test_harness/silica_long_mpi_not_gpu.sh new file mode 100755 index 0000000000000000000000000000000000000000..4840ea23b43ecd4d2f2a5d95c532c90417be5118 --- /dev/null +++ b/test_harness/silica_long_mpi_not_gpu.sh @@ -0,0 +1,39 @@ +#!/bin/bash -login + +#PBS -l nodes=1:ppn=8:gpus=8 +#PBS -l mem=120gb +#PBS -l walltime=4:00:00 +#PBS -l feature=gpgpu:intel16 + +cd "${PBS_O_WORKDIR}" +#cd /mnt/home/korteme1/PuReMD_new/test_harness + +################ +# MPI-GPU Runs # +################ + +module purge +module load GNU/4.8.2 OpenMPI/1.6.5 CUDA/6.0 + +mpirun -np 8 ../PG-PuReMD/bin/pg-puremd-not-gpu ../data/benchmarks/silica/silica_300000.geo ../data/benchmarks/silica/ffield-bio silica_300000_mpi_not_gpu_control + + + + + + + + + + + + + + + + + + + + + diff --git a/test_harness/sim_output_082516.txt b/test_harness/sim_output_082516.txt new file mode 100644 index 0000000000000000000000000000000000000000..3e2952beab5f81a1b92a75b63c8d6a3de085f350 --- /dev/null +++ b/test_harness/sim_output_082516.txt @@ -0,0 +1,379 @@ +Small Systems: +------------------------------------------- +tail water.6540_mpi.pot: +10 -563559.95 -14321.43 0.09 11670.67 0.00 -7960.90 37.67 -0.14 108831.26 -263095.78 175457.40 +20 -570250.29 -15031.60 0.02 11064.48 0.00 -8073.57 35.63 -0.15 114759.09 -268511.64 180063.70 +30 -578558.57 -15053.86 0.00 10666.77 0.00 -8196.01 33.30 -0.15 123196.52 -275532.33 186056.05 +40 -584717.60 -15160.69 0.00 10392.89 0.00 -8282.46 30.71 -0.15 131214.11 -281359.67 191054.69 +50 -586735.09 -14885.85 0.00 10270.96 0.00 -8320.01 28.23 -0.14 134826.75 -282847.92 192344.66 +60 -582847.13 -15797.42 -0.00 9710.66 0.00 -8313.50 26.01 -0.12 130578.53 -278558.43 188668.33 +70 -574780.61 -16593.71 -0.00 8962.50 0.00 -8252.29 24.42 -0.11 121530.40 -270696.87 181939.28 +80 -565419.05 -17011.84 -0.00 8347.44 0.00 -8138.77 23.34 -0.09 112397.50 -262730.22 175141.08 +90 -557974.27 -17233.65 -0.00 7978.84 0.00 -8017.92 22.50 -0.08 105848.38 -257230.68 170452.13 +100 -555008.28 -17391.74 -0.00 7854.52 0.00 -7955.88 21.33 -0.08 103083.76 -255531.34 168990.92 +tail water.6540_mpi_gpu.pot: +10 -563559.95 -14321.43 0.09 11670.67 0.00 -7960.90 37.67 -0.14 108831.26 -263095.01 175765.69 +20 -570250.29 -15031.60 0.02 11064.48 0.00 -8073.57 35.63 -0.15 114759.09 -268512.70 180381.93 +30 -578558.57 -15053.86 0.00 10666.77 0.00 -8196.01 33.30 -0.15 123196.52 -275531.60 186383.04 +40 -584717.60 -15160.69 0.00 10392.89 0.00 -8282.46 30.71 -0.15 131214.11 -281358.27 191389.83 +50 -586735.09 -14885.85 0.00 10270.96 0.00 -8320.01 28.23 -0.14 134826.74 -282849.01 192684.56 +60 -582847.12 -15797.42 -0.00 9710.66 0.00 -8313.50 26.01 -0.12 130578.53 -278557.38 188999.62 +70 -574780.61 -16593.71 -0.00 8962.50 0.00 -8252.29 24.42 -0.11 121530.39 -270699.32 182262.20 +80 -565419.05 -17011.84 -0.00 8347.44 0.00 -8138.77 23.34 -0.09 112397.49 -262728.62 175447.99 +90 -557974.27 -17233.65 -0.00 7978.84 0.00 -8017.92 22.50 -0.08 105848.38 -257232.04 170753.74 +100 -555008.27 -17391.74 -0.00 7854.52 0.00 -7955.88 21.33 -0.08 103083.75 -255531.17 169288.42 +tail water.6540_mpi_not_gpu.pot: +10 -563559.95 -14321.43 0.09 11670.67 0.00 -7960.90 37.67 -0.14 108831.26 -263095.02 175765.70 +20 -570250.29 -15031.60 0.02 11064.48 0.00 -8073.57 35.63 -0.15 114759.09 -268512.72 180381.96 +30 -578558.57 -15053.86 0.00 10666.77 0.00 -8196.01 33.30 -0.15 123196.52 -275531.21 186382.66 +40 -584717.61 -15160.69 0.00 10392.89 0.00 -8282.46 30.71 -0.15 131214.11 -281361.00 191392.55 +50 -586735.09 -14885.85 0.00 10270.96 0.00 -8320.01 28.23 -0.14 134826.75 -282847.58 192683.14 +60 -582847.13 -15797.42 -0.00 9710.66 0.00 -8313.50 26.01 -0.12 130578.54 -278559.28 189001.52 +70 -574780.61 -16593.71 -0.00 8962.50 0.00 -8252.29 24.42 -0.11 121530.40 -270697.16 182260.05 +80 -565419.05 -17011.84 -0.00 8347.44 0.00 -8138.77 23.34 -0.09 112397.50 -262728.79 175448.15 +90 -557974.26 -17233.65 -0.00 7978.84 0.00 -8017.92 22.50 -0.08 105848.37 -257232.94 170754.64 +100 -555008.26 -17391.74 -0.00 7854.52 0.00 -7955.88 21.33 -0.08 103083.75 -255532.65 169289.91 +------------------------------------------- +tail silica.6000_mpi.pot: +10 -1460099.66 -70595.13 0.05 65152.78 -0.00 0.00 139.32 -21.56 721214.85 -958586.61 823996.76 +20 -1460050.61 -70706.86 0.05 65099.44 -0.00 0.00 138.15 -21.52 720897.51 -957897.40 823348.15 +30 -1459999.47 -70846.82 0.04 65040.03 -0.00 0.00 136.26 -21.56 720408.37 -956833.64 822346.04 +40 -1459963.76 -70996.90 0.04 64988.55 -0.00 0.00 133.92 -21.49 719795.13 -955538.52 821126.36 +50 -1459946.42 -71148.90 0.03 64951.30 -0.00 0.00 131.23 -21.46 719111.19 -954101.91 819770.90 +60 -1459943.52 -71302.03 0.02 64928.37 -0.00 0.00 128.33 -21.41 718409.80 -952701.74 818449.27 +70 -1459966.29 -71449.81 0.01 64919.04 -0.00 0.00 125.40 -21.27 717742.45 -951445.84 817261.79 +80 -1460020.69 -71576.43 -0.00 64924.52 -0.00 0.00 122.58 -21.18 717156.15 -950402.10 816270.69 +90 -1460121.57 -71672.96 -0.00 64943.88 -0.00 0.00 119.93 -21.05 716683.85 -949649.78 815551.48 +100 -1460263.51 -71744.03 -0.00 64970.52 -0.00 0.00 117.74 -21.01 716341.10 -949212.58 815126.44 +tail silica.6000_mpi_gpu.pot: +10 -1460099.66 -70595.13 0.05 65152.78 -0.00 0.00 139.32 -21.56 721214.85 -958587.42 825449.02 +20 -1460050.61 -70706.86 0.05 65099.44 -0.00 0.00 138.15 -21.52 720897.51 -957890.66 824791.71 +30 -1459999.47 -70846.82 0.04 65040.03 -0.00 0.00 136.26 -21.56 720408.37 -956830.18 823791.12 +40 -1459963.76 -70996.90 0.04 64988.55 -0.00 0.00 133.92 -21.49 719795.13 -955525.81 822560.03 +50 -1459946.41 -71148.90 0.03 64951.30 -0.00 0.00 131.23 -21.46 719111.19 -954099.19 821212.18 +60 -1459943.52 -71302.03 0.02 64928.37 -0.00 0.00 128.33 -21.41 718409.79 -952704.77 819893.97 +70 -1459966.29 -71449.81 0.01 64919.04 -0.00 0.00 125.40 -21.27 717742.45 -951446.06 818701.60 +80 -1460020.68 -71576.43 -0.00 64924.51 -0.00 0.00 122.58 -21.18 717156.15 -950403.38 817709.81 +90 -1460121.56 -71672.96 -0.00 64943.88 -0.00 0.00 119.93 -21.05 716683.84 -949651.67 816989.94 +100 -1460263.50 -71744.03 -0.00 64970.52 -0.00 0.00 117.74 -21.01 716341.09 -949208.95 816558.64 +tail silica.6000_mpi_not_gpu.pot: +10 -1460099.66 -70595.13 0.05 65152.78 -0.00 0.00 139.32 -21.56 721214.85 -958583.49 825445.10 +20 -1460050.61 -70706.86 0.05 65099.44 -0.00 0.00 138.15 -21.52 720897.51 -957893.65 824794.71 +30 -1459999.47 -70846.82 0.04 65040.03 -0.00 0.00 136.26 -21.56 720408.37 -956827.62 823788.57 +40 -1459963.76 -70996.90 0.04 64988.55 -0.00 0.00 133.92 -21.49 719795.12 -955530.85 822565.07 +50 -1459946.41 -71148.90 0.03 64951.30 -0.00 0.00 131.23 -21.46 719111.18 -954103.86 821216.85 +60 -1459943.52 -71302.03 0.02 64928.37 -0.00 0.00 128.33 -21.41 718409.79 -952713.92 819903.11 +70 -1459966.29 -71449.81 0.01 64919.04 -0.00 0.00 125.40 -21.27 717742.45 -951432.44 818687.98 +80 -1460020.68 -71576.43 -0.00 64924.51 -0.00 0.00 122.58 -21.18 717156.15 -950410.53 817716.96 +90 -1460121.56 -71672.96 -0.00 64943.88 -0.00 0.00 119.93 -21.05 716683.84 -949661.33 816999.60 +100 -1460263.50 -71744.03 -0.00 64970.52 -0.00 0.00 117.74 -21.01 716341.09 -949210.17 816559.86 +------------------------------------------- +tail zno.6912_mpi.pot: +10 -1121195.66 46078.33 909.31 145735.92 0.00 0.00 0.00 0.00 569756.24 -866964.80 607629.39 +20 -1121196.71 46078.33 909.31 145735.08 0.00 0.00 0.00 0.00 569756.80 -866963.83 607628.54 +30 -1121198.42 46078.32 909.31 145733.70 0.00 0.00 0.00 0.00 569757.75 -866962.41 607627.30 +40 -1121200.77 46078.31 909.31 145731.74 0.00 0.00 0.00 0.00 569759.12 -866960.59 607625.73 +50 -1121203.71 46078.29 909.31 145729.20 0.00 0.00 0.00 0.00 569760.95 -866958.12 607623.58 +60 -1121207.18 46078.26 909.31 145726.08 0.00 0.00 0.00 0.00 569763.28 -866955.37 607621.20 +70 -1121211.12 46078.20 909.30 145722.33 0.00 0.00 0.00 0.00 569766.19 -866951.98 607618.24 +80 -1121215.46 46078.12 909.30 145717.96 0.00 0.00 0.00 0.00 569769.73 -866948.54 607615.25 +90 -1121220.10 46078.00 909.29 145712.95 0.00 0.00 0.00 0.00 569773.97 -866944.81 607612.02 +100 -1121224.98 46077.84 909.28 145707.27 0.00 0.00 0.00 0.00 569778.98 -866940.86 607608.59 +tail zno.6912_mpi_gpu.pot: +10 -1121195.66 46078.33 909.31 145735.92 0.00 0.00 0.00 0.00 569756.24 -866964.64 608699.55 +20 -1121196.71 46078.33 909.31 145735.08 0.00 0.00 0.00 0.00 569756.80 -866963.99 608699.01 +30 -1121198.42 46078.32 909.31 145733.70 0.00 0.00 0.00 0.00 569757.75 -866962.45 608697.65 +40 -1121200.77 46078.31 909.31 145731.74 0.00 0.00 0.00 0.00 569759.12 -866960.65 608696.10 +50 -1121203.71 46078.29 909.31 145729.20 0.00 0.00 0.00 0.00 569760.95 -866958.00 608693.77 +60 -1121207.18 46078.26 909.31 145726.08 0.00 0.00 0.00 0.00 569763.28 -866955.58 608691.72 +70 -1121211.12 46078.20 909.30 145722.33 0.00 0.00 0.00 0.00 569766.19 -866951.87 608688.43 +80 -1121215.46 46078.12 909.30 145717.96 0.00 0.00 0.00 0.00 569769.73 -866948.59 608685.60 +90 -1121220.10 46078.00 909.29 145712.95 0.00 0.00 0.00 0.00 569773.97 -866944.78 608682.28 +100 -1121224.98 46077.84 909.28 145707.27 0.00 0.00 0.00 0.00 569778.98 -866940.78 608678.79 +tail zno.6912_mpi_not_gpu.pot: +10 -1121195.66 46078.33 909.31 145735.92 0.00 0.00 0.00 0.00 569756.24 -866964.64 608699.55 +20 -1121196.71 46078.33 909.31 145735.08 0.00 0.00 0.00 0.00 569756.80 -866963.99 608699.02 +30 -1121198.42 46078.32 909.31 145733.70 0.00 0.00 0.00 0.00 569757.75 -866962.46 608697.66 +40 -1121200.77 46078.31 909.31 145731.74 0.00 0.00 0.00 0.00 569759.12 -866960.58 608696.03 +50 -1121203.71 46078.29 909.31 145729.20 0.00 0.00 0.00 0.00 569760.95 -866958.02 608693.79 +60 -1121207.18 46078.26 909.31 145726.08 0.00 0.00 0.00 0.00 569763.28 -866955.32 608691.46 +70 -1121211.12 46078.20 909.30 145722.33 0.00 0.00 0.00 0.00 569766.19 -866951.94 608688.49 +80 -1121215.46 46078.12 909.30 145717.96 0.00 0.00 0.00 0.00 569769.73 -866948.59 608685.60 +90 -1121220.10 46078.00 909.29 145712.95 0.00 0.00 0.00 0.00 569773.97 -866944.81 608682.31 +100 -1121224.98 46077.84 909.28 145707.27 0.00 0.00 0.00 0.00 569778.98 -866940.94 608678.96 +___________________________________________ +Medium Systems +------------------------------------------- +tail water.78480_mpi.pot: +10 -6762719.37 -171857.11 1.10 140048.08 0.00 -95530.77 451.99 -1.71 1305975.11 -3157149.64 2105489.05 +20 -6843003.43 -180379.26 0.28 132773.80 0.00 -96882.84 427.55 -1.84 1377109.10 -3222129.67 2160754.42 +30 -6942702.79 -180646.33 0.01 128001.25 0.00 -98352.12 399.58 -1.76 1478358.19 -3306393.64 2232678.23 +40 -7016611.23 -181928.26 0.04 124714.63 0.00 -99389.56 368.48 -1.76 1574569.31 -3376311.54 2292651.76 +50 -7040821.08 -178630.16 0.03 123251.52 0.00 -99840.18 338.76 -1.69 1617920.98 -3394180.95 2308141.87 +60 -6994165.52 -189568.99 -0.00 116527.90 0.00 -99762.04 312.14 -1.40 1566942.41 -3342697.09 2264015.92 +70 -6897367.36 -199124.57 -0.00 107549.96 0.00 -99027.52 292.99 -1.28 1458364.80 -3248370.10 2183278.96 +80 -6785028.62 -204142.07 -0.00 100169.33 0.00 -97665.21 280.04 -1.05 1348770.03 -3152747.22 2101677.52 +90 -6695691.26 -206803.77 -0.00 95746.05 0.00 -96215.06 270.03 -0.95 1270180.57 -3086776.10 2045433.50 +100 -6660099.25 -208700.91 -0.00 94254.19 0.00 -95470.51 255.95 -0.92 1237005.06 -3066413.12 2027928.08 +tail water.78480_mpi_gpu.pot: +10 -6762719.37 -171857.11 1.10 140048.08 0.00 -95530.77 451.99 -1.71 1305975.11 -3157139.96 2109188.11 +20 -6843003.43 -180379.26 0.28 132773.80 0.00 -96882.84 427.55 -1.84 1377109.10 -3222144.88 2164575.74 +30 -6942702.79 -180646.33 0.01 128001.25 0.00 -98352.12 399.58 -1.76 1478358.18 -3306391.61 2236608.99 +40 -7016611.22 -181928.27 0.04 124714.63 0.00 -99389.56 368.48 -1.76 1574569.29 -3376313.56 2296692.22 +50 -7040821.08 -178630.16 0.03 123251.52 0.00 -99840.18 338.76 -1.69 1617920.98 -3394182.36 2312209.01 +60 -6994165.53 -189568.99 -0.00 116527.90 0.00 -99762.04 312.14 -1.40 1566942.42 -3342680.35 2267987.17 +70 -6897367.37 -199124.57 -0.00 107549.96 0.00 -99027.52 292.99 -1.28 1458364.80 -3248368.46 2187123.10 +80 -6785028.60 -204142.07 -0.00 100169.33 0.00 -97665.21 280.04 -1.05 1348769.98 -3152742.36 2105374.70 +90 -6695691.23 -206803.78 -0.00 95746.05 0.00 -96215.06 270.03 -0.95 1270180.51 -3086795.67 2049056.04 +100 -6660099.27 -208700.91 -0.00 94254.20 0.00 -95470.51 255.95 -0.92 1237005.04 -3066403.19 2031490.27 +tail water.78480_mpi_not_gpu.pot: +10 -6762719.37 -171857.11 1.10 140048.08 0.00 -95530.77 451.99 -1.71 1305975.11 -3157140.15 2109188.30 +20 -6843003.41 -180379.26 0.28 132773.79 0.00 -96882.84 427.55 -1.84 1377109.07 -3222137.08 2164567.93 +30 -6942702.75 -180646.33 0.01 128001.25 0.00 -98352.12 399.58 -1.76 1478358.15 -3306396.17 2236613.55 +40 -7016611.20 -181928.26 0.04 124714.62 0.00 -99389.56 368.48 -1.76 1574569.27 -3376309.78 2296688.45 +50 -7040821.06 -178630.16 0.03 123251.52 0.00 -99840.18 338.76 -1.69 1617920.96 -3394178.22 2312204.87 +60 -6994165.51 -189568.99 -0.00 116527.90 0.00 -99762.04 312.14 -1.40 1566942.40 -3342713.09 2268019.92 +70 -6897367.38 -199124.57 -0.00 107549.96 0.00 -99027.52 292.99 -1.28 1458364.83 -3248347.39 2187102.03 +80 -6785028.62 -204142.07 -0.00 100169.34 0.00 -97665.21 280.04 -1.05 1348770.02 -3152771.75 2105404.08 +90 -6695691.21 -206803.78 -0.00 95746.05 0.00 -96215.06 270.03 -0.95 1270180.52 -3086764.49 2049024.86 +100 -6660099.16 -208700.92 -0.00 94254.19 0.00 -95470.51 255.95 -0.92 1237004.99 -3066404.39 2031491.49 +------------------------------------------- +tail silica.72000_mpi.pot: +10 -17519676.11 -847661.69 -0.03 781301.42 0.00 0.00 1671.83 -258.69 8654247.84 -11503077.33 9888011.09 +20 -17519078.04 -848992.62 -0.03 780664.45 0.00 0.00 1657.85 -258.36 8650446.44 -11494717.35 9880138.29 +30 -17518448.70 -850655.87 -0.03 779955.87 0.00 0.00 1635.10 -258.75 8644585.44 -11482084.82 9868245.44 +40 -17517998.74 -852433.74 -0.03 779343.64 0.00 0.00 1606.93 -257.98 8637234.13 -11466347.12 9853413.59 +50 -17517762.69 -854228.26 -0.03 778902.75 0.00 0.00 1574.57 -257.58 8629029.86 -11449268.37 9837309.64 +60 -17517691.36 -856031.00 -0.03 778634.12 0.00 0.00 1539.70 -257.00 8620607.67 -11432467.77 9821453.72 +70 -17517910.62 -857768.73 -0.03 778527.46 0.00 0.00 1504.29 -255.34 8612581.62 -11417158.08 9806969.32 +80 -17518478.97 -859260.97 -0.03 778589.30 0.00 0.00 1470.25 -254.25 8605511.97 -11404652.86 9795101.10 +90 -17519600.70 -860391.38 -0.03 778802.56 0.00 0.00 1438.26 -252.68 8599792.75 -11395545.93 9786398.69 +100 -17521291.41 -861168.31 -0.03 779097.61 0.00 0.00 1411.71 -252.19 8595612.92 -11390069.21 9781076.55 +tail silica.72000_mpi_gpu.pot: +10 -17519676.11 -847661.69 -0.03 781301.42 0.00 0.00 1671.83 -258.69 8654247.84 -11503065.70 9905416.88 +20 -17519078.04 -848992.62 -0.03 780664.45 0.00 0.00 1657.85 -258.36 8650446.45 -11494733.17 9897557.67 +30 -17518448.70 -850655.87 -0.03 779955.87 0.00 0.00 1635.10 -258.75 8644585.44 -11482074.91 9885618.13 +40 -17517998.75 -852433.74 -0.03 779343.64 0.00 0.00 1606.93 -257.98 8637234.14 -11466425.24 9870848.19 +50 -17517762.69 -854228.26 -0.03 778902.75 0.00 0.00 1574.57 -257.58 8629029.86 -11449304.76 9854674.14 +60 -17517691.34 -856031.00 -0.03 778634.12 0.00 0.00 1539.70 -257.00 8620607.65 -11432368.78 9838654.91 +70 -17517910.58 -857768.73 -0.03 778527.46 0.00 0.00 1504.29 -255.34 8612581.58 -11417277.73 9824363.65 +80 -17518478.90 -859260.98 -0.03 778589.30 0.00 0.00 1470.25 -254.25 8605511.89 -11404589.45 9812291.47 +90 -17519600.57 -860391.39 -0.03 778802.56 0.00 0.00 1438.26 -252.68 8599792.60 -11395620.41 9803711.63 +100 -17521291.24 -861168.33 -0.03 779097.60 0.00 0.00 1411.71 -252.19 8595612.72 -11390129.26 9798365.71 +tail silica.72000_mpi_not_gpu.pot: +10 -17519676.11 -847661.69 -0.03 781301.42 0.00 0.00 1671.83 -258.69 8654247.84 -11503051.76 9905402.94 +20 -17519078.04 -848992.62 -0.03 780664.45 0.00 0.00 1657.85 -258.36 8650446.44 -11494736.34 9897560.83 +30 -17518448.70 -850655.87 -0.03 779955.87 0.00 0.00 1635.10 -258.75 8644585.44 -11482052.46 9885595.68 +40 -17517998.74 -852433.74 -0.03 779343.64 0.00 0.00 1606.93 -257.98 8637234.12 -11466394.89 9870817.84 +50 -17517762.68 -854228.26 -0.03 778902.75 0.00 0.00 1574.57 -257.58 8629029.85 -11449197.41 9854566.79 +60 -17517691.33 -856031.01 -0.03 778634.12 0.00 0.00 1539.70 -257.00 8620607.64 -11432432.37 9838718.51 +70 -17517910.58 -857768.73 -0.03 778527.45 0.00 0.00 1504.29 -255.34 8612581.59 -11417218.46 9824304.37 +80 -17518478.93 -859260.98 -0.03 778589.30 0.00 0.00 1470.25 -254.25 8605511.93 -11404651.90 9812353.91 +90 -17519600.65 -860391.38 -0.03 778802.56 0.00 0.00 1438.26 -252.68 8599792.70 -11395596.77 9803687.97 +100 -17521291.36 -861168.32 -0.03 779097.60 0.00 0.00 1411.71 -252.19 8595612.87 -11390098.52 9798334.93 +------------------------------------------- +tail bilayer.56800_mpi.pot: +10 -6518329.26 -117406.81 2206.68 118394.98 -49.81 -40501.58 27673.35 -13930.54 1530766.95 -1457070.57 946921.53 +20 -6523788.35 -118003.38 2206.84 115862.68 -50.12 -40588.21 27308.02 -13943.02 1530500.46 -1459220.14 948713.26 +30 -6528807.01 -118041.23 2207.27 113148.77 -50.26 -40670.75 27018.18 -13942.40 1534718.90 -1463530.45 952378.05 +40 -6531794.70 -117331.80 2207.64 111219.99 -50.05 -40721.42 26960.67 -13942.10 1541675.37 -1468771.77 956882.11 +50 -6532793.00 -116516.43 2207.82 110405.78 -49.47 -40754.55 27158.38 -13950.43 1544746.21 -1472313.62 959943.81 +60 -6531704.05 -116302.92 2207.79 110555.63 -48.70 -40789.75 27433.91 -13958.57 1540173.16 -1472561.41 960165.48 +70 -6528150.84 -116545.66 2207.57 111445.31 -48.02 -40813.78 27568.39 -13953.16 1531754.38 -1470602.68 958482.91 +80 -6522816.66 -116744.70 2207.13 112718.58 -47.68 -40804.78 27449.75 -13935.71 1525726.42 -1468999.30 957113.15 +90 -6517928.35 -116799.00 2206.55 113744.49 -47.76 -40774.53 27154.21 -13908.81 1523809.01 -1469333.89 957412.39 +100 -6515767.16 -117030.38 2206.10 113896.99 -48.17 -40770.31 26868.89 -13888.05 1523408.87 -1471311.22 959101.79 +tail bilayer.56800_mpi_gpu.pot: +10 -6518329.23 -117406.81 2206.68 118394.97 -49.81 -40501.58 27673.35 -13930.54 1530766.92 -1457068.52 948587.46 +20 -6523788.26 -118003.38 2206.84 115862.67 -50.12 -40588.21 27308.02 -13943.02 1530500.36 -1459222.34 950386.61 +30 -6528806.85 -118041.22 2207.27 113148.75 -50.26 -40670.75 27018.18 -13942.40 1534718.72 -1463531.86 954057.06 +40 -6531794.48 -117331.78 2207.64 111219.97 -50.05 -40721.42 26960.68 -13942.10 1541675.15 -1468772.93 958568.82 +50 -6532792.75 -116516.40 2207.82 110405.75 -49.47 -40754.54 27158.38 -13950.43 1544745.95 -1472312.12 961633.25 +60 -6531703.83 -116302.87 2207.79 110555.60 -48.70 -40789.75 27433.92 -13958.57 1540172.91 -1472560.53 961855.93 +70 -6528150.70 -116545.59 2207.57 111445.29 -48.02 -40813.77 27568.39 -13953.16 1531754.17 -1470599.77 960168.37 +80 -6522816.63 -116744.62 2207.13 112718.57 -47.68 -40804.77 27449.75 -13935.71 1525726.29 -1469000.19 958799.99 +90 -6517928.45 -116798.91 2206.55 113744.50 -47.76 -40774.53 27154.20 -13908.80 1523808.99 -1469334.20 959099.15 +100 -6515767.33 -117030.29 2206.10 113897.00 -48.16 -40770.31 26868.88 -13888.05 1523408.92 -1471310.48 960790.46 +tail bilayer.56800_mpi_not_gpu.pot: +10 -6518329.25 -117406.81 2206.68 118394.98 -49.81 -40501.58 27673.35 -13930.54 1530766.95 -1457068.68 948587.61 +20 -6523788.34 -118003.38 2206.84 115862.68 -50.12 -40588.21 27308.02 -13943.02 1530500.45 -1459217.28 950381.54 +30 -6528807.00 -118041.23 2207.27 113148.77 -50.26 -40670.75 27018.18 -13942.40 1534718.89 -1463533.00 954058.18 +40 -6531794.70 -117331.80 2207.64 111219.99 -50.05 -40721.42 26960.67 -13942.10 1541675.38 -1468768.72 958564.57 +50 -6532793.01 -116516.43 2207.82 110405.78 -49.47 -40754.55 27158.38 -13950.43 1544746.22 -1472317.03 961638.12 +60 -6531704.07 -116302.92 2207.79 110555.63 -48.70 -40789.75 27433.91 -13958.57 1540173.18 -1472559.64 961855.00 +70 -6528150.86 -116545.66 2207.57 111445.32 -48.02 -40813.78 27568.39 -13953.16 1531754.40 -1470601.15 960169.71 +80 -6522816.66 -116744.70 2207.13 112718.58 -47.68 -40804.78 27449.75 -13935.71 1525726.43 -1469001.77 958801.54 +90 -6517928.35 -116799.00 2206.55 113744.49 -47.76 -40774.53 27154.21 -13908.81 1523809.02 -1469331.48 959096.43 +100 -6515767.17 -117030.38 2206.10 113896.99 -48.17 -40770.31 26868.89 -13888.05 1523408.88 -1471312.25 960792.24 +------------------------------------------- +tail dna_mpi.pot: +10 -1878686.35 -39024.39 68.94 30653.29 -1227.93 -24907.65 1868.48 -1829.40 404014.75 -798089.18 530877.35 +20 -1880597.67 -39137.38 69.59 30046.82 -1228.93 -24950.70 1824.84 -1841.06 404029.39 -799378.22 531962.25 +30 -1882633.89 -39057.79 70.08 29519.73 -1230.45 -24988.69 1782.86 -1852.24 405537.23 -801813.16 534042.90 +40 -1883985.27 -38773.74 70.34 29303.17 -1232.33 -25006.61 1750.62 -1860.49 408083.01 -804742.12 536564.89 +50 -1884292.39 -38554.43 70.48 29345.62 -1234.52 -25009.02 1721.83 -1866.72 409508.68 -806806.32 538343.47 +60 -1883483.39 -38645.70 70.35 29459.05 -1236.96 -25003.15 1692.74 -1868.27 408158.81 -807123.41 538605.22 +70 -1881817.92 -38875.84 70.07 29582.25 -1239.25 -24981.22 1665.69 -1867.05 404774.39 -806171.57 537768.99 +80 -1879781.00 -38927.47 69.78 29716.91 -1240.81 -24935.14 1638.62 -1865.01 401580.52 -805285.69 536999.31 +90 -1877940.01 -38759.26 69.72 29733.29 -1241.26 -24874.71 1610.22 -1864.18 399961.60 -805426.43 537125.44 +100 -1876870.10 -38588.32 69.77 29493.23 -1240.61 -24830.46 1584.42 -1865.47 399559.40 -806552.10 538096.62 +tail dna_mpi_gpu.pot: +10 -1878686.35 -39024.39 68.94 30653.29 -1227.93 -24907.65 1868.48 -1829.40 404014.75 -798089.12 531812.42 +20 -1880597.66 -39137.38 69.59 30046.82 -1228.93 -24950.70 1824.84 -1841.06 404029.37 -799378.01 532899.07 +30 -1882633.87 -39057.79 70.08 29519.73 -1230.45 -24988.69 1782.86 -1852.24 405537.21 -801811.56 534982.00 +40 -1883985.25 -38773.73 70.34 29303.17 -1232.33 -25006.61 1750.62 -1860.49 408082.99 -804744.15 537512.06 +50 -1884292.37 -38554.42 70.48 29345.61 -1234.53 -25009.02 1721.83 -1866.72 409508.66 -806806.42 539291.84 +60 -1883483.39 -38645.67 70.35 29459.04 -1236.97 -25003.15 1692.75 -1868.27 408158.80 -807123.91 539554.46 +70 -1881817.95 -38875.80 70.07 29582.23 -1239.26 -24981.22 1665.69 -1867.05 404774.39 -806170.23 538714.92 +80 -1879781.04 -38927.42 69.78 29716.90 -1240.81 -24935.14 1638.62 -1865.01 401580.53 -805285.98 537945.51 +90 -1877940.07 -38759.20 69.72 29733.28 -1241.27 -24874.71 1610.23 -1864.18 399961.62 -805426.10 538071.24 +100 -1876870.16 -38588.25 69.77 29493.20 -1240.62 -24830.46 1584.42 -1865.47 399559.41 -806551.52 539043.89 +tail dna_mpi_not_gpu.pot: +10 -1878686.35 -39024.39 68.94 30653.29 -1227.93 -24907.65 1868.48 -1829.40 404014.75 -798088.64 531811.95 +20 -1880597.67 -39137.38 69.59 30046.82 -1228.93 -24950.70 1824.84 -1841.06 404029.39 -799377.35 532898.41 +30 -1882633.89 -39057.79 70.08 29519.73 -1230.45 -24988.69 1782.86 -1852.24 405537.23 -801812.56 534983.00 +40 -1883985.27 -38773.74 70.34 29303.17 -1232.33 -25006.61 1750.62 -1860.49 408083.01 -804742.31 537510.22 +50 -1884292.39 -38554.43 70.48 29345.62 -1234.52 -25009.02 1721.83 -1866.72 409508.68 -806806.85 539292.27 +60 -1883483.39 -38645.70 70.35 29459.05 -1236.96 -25003.15 1692.74 -1868.27 408158.82 -807123.27 539553.82 +70 -1881817.92 -38875.84 70.07 29582.25 -1239.25 -24981.22 1665.69 -1867.05 404774.39 -806169.09 538713.78 +80 -1879781.00 -38927.47 69.78 29716.91 -1240.81 -24935.14 1638.62 -1865.01 401580.52 -805285.26 537944.79 +90 -1877940.01 -38759.26 69.72 29733.29 -1241.26 -24874.71 1610.22 -1864.18 399961.60 -805426.75 538071.90 +100 -1876870.10 -38588.32 69.77 29493.23 -1240.61 -24830.46 1584.42 -1865.47 399559.39 -806551.56 539043.92 +------------------------------------------- +tail petn_mpi.pot: +10 -7382745.14 -100016.55 85183.94 128872.25 -116694.80 -58.45 69339.46 -20733.04 2823262.78 -974024.22 706252.73 +20 -7386431.52 -101420.13 85086.00 117749.36 -116967.90 -61.36 68174.67 -21002.82 2824036.19 -966391.29 699047.08 +30 -7386642.14 -103081.70 84768.39 102968.67 -117274.17 -44.88 66934.92 -21393.93 2823809.67 -955978.34 689124.99 +40 -7379236.50 -104606.88 84276.52 88785.66 -117531.93 -47.90 65978.81 -21837.42 2819830.36 -945699.50 679144.78 +50 -7364262.20 -106007.66 83924.67 78632.68 -117800.59 -43.86 65296.59 -22270.18 2809191.48 -938276.56 671620.75 +60 -7345175.56 -107332.75 84090.83 74089.92 -118238.77 -32.78 64785.01 -22658.28 2791543.80 -935358.97 668140.67 +70 -7324479.10 -109211.92 84878.16 75549.42 -118947.39 0.00 64663.68 -22866.93 2770709.49 -936951.57 668824.55 +80 -7303809.26 -112288.64 86017.34 82741.81 -119835.64 0.00 65561.37 -22698.18 2753218.48 -941541.61 672419.32 +90 -7292413.55 -113457.92 87142.59 93837.69 -120678.66 0.00 66506.75 -22538.82 2744711.48 -946747.84 676853.29 +100 -7299205.69 -110531.84 88067.64 106082.00 -121279.73 0.00 65799.89 -22764.51 2747612.04 -950491.40 680284.44 +tail petn_mpi_gpu.pot: +10 -7382745.13 -100016.55 85183.94 128872.25 -116694.80 -58.45 69339.46 -20733.04 2823262.78 -974012.79 707485.34 +20 -7386431.51 -101420.13 85086.00 117749.36 -116967.90 -61.36 68174.67 -21002.82 2824036.18 -966396.53 700283.67 +30 -7386642.13 -103081.70 84768.39 102968.67 -117274.17 -44.88 66934.93 -21393.93 2823809.66 -955975.61 690336.13 +40 -7379236.48 -104606.89 84276.52 88785.66 -117531.93 -47.90 65978.81 -21837.42 2819830.35 -945699.19 680340.76 +50 -7364262.18 -106007.66 83924.66 78632.68 -117800.59 -43.86 65296.59 -22270.18 2809191.46 -938271.76 672798.99 +60 -7345175.54 -107332.76 84090.83 74089.92 -118238.77 -32.78 64785.01 -22658.28 2791543.79 -935360.55 669319.16 +70 -7324479.09 -109211.92 84878.16 75549.42 -118947.39 0.00 64663.68 -22866.93 2770709.48 -936952.32 670003.41 +80 -7303809.24 -112288.64 86017.33 82741.80 -119835.64 0.00 65561.37 -22698.18 2753218.47 -941541.25 673603.41 +90 -7292413.54 -113457.92 87142.59 93837.69 -120678.66 0.00 66506.75 -22538.82 2744711.47 -946740.23 678037.94 +100 -7299205.68 -110531.84 88067.63 106082.00 -121279.73 0.00 65799.89 -22764.51 2747612.03 -950495.69 681487.03 +tail petn_mpi_not_gpu.pot: +10 -7382745.13 -100016.55 85183.94 128872.25 -116694.80 -58.45 69339.46 -20733.04 2823262.78 -974012.79 707485.34 +20 -7386431.51 -101420.13 85086.00 117749.36 -116967.90 -61.36 68174.67 -21002.82 2824036.18 -966396.53 700283.67 +30 -7386642.13 -103081.70 84768.39 102968.67 -117274.17 -44.88 66934.93 -21393.93 2823809.66 -955975.61 690336.13 +40 -7379236.48 -104606.89 84276.52 88785.66 -117531.93 -47.90 65978.81 -21837.42 2819830.35 -945699.19 680340.76 +50 -7364262.18 -106007.66 83924.66 78632.68 -117800.59 -43.86 65296.59 -22270.18 2809191.46 -938271.76 672798.99 +60 -7345175.54 -107332.76 84090.83 74089.92 -118238.77 -32.78 64785.01 -22658.28 2791543.79 -935360.55 669319.16 +70 -7324479.09 -109211.92 84878.16 75549.42 -118947.39 0.00 64663.68 -22866.93 2770709.48 -936952.32 670003.41 +80 -7303809.24 -112288.64 86017.33 82741.80 -119835.64 0.00 65561.37 -22698.18 2753218.47 -941541.25 673603.41 +90 -7292413.54 -113457.92 87142.59 93837.69 -120678.66 0.00 66506.75 -22538.82 2744711.47 -946741.30 678039.01 +100 -7299205.68 -110531.84 88067.63 106081.99 -121279.73 0.00 65799.89 -22764.51 2747612.03 -950498.79 681490.13 +___________________________________________ +Large Systems +------------------------------------------- +tail water.327000_mpi.pot: +10 -28177997.37 -716071.28 4.60 583533.65 0.00 -398044.86 1883.30 -7.12 5441562.97 -13154790.38 8772871.23 +20 -28512514.30 -751580.24 1.17 553224.15 0.00 -403678.49 1781.45 -7.68 5737954.57 -13425542.27 9003145.40 +30 -28927928.36 -752693.05 0.06 533338.54 0.00 -409800.52 1664.92 -7.33 6159825.86 -13776656.00 9302841.75 +40 -29235880.37 -758034.42 0.18 519644.31 0.00 -414123.19 1535.34 -7.34 6560705.75 -14068001.76 9552752.59 +50 -29336754.94 -744292.28 0.11 513548.03 0.00 -416000.76 1411.49 -7.03 6741337.97 -14142373.30 9617210.51 +60 -29142356.80 -789870.75 -0.00 485532.96 0.00 -415675.16 1300.59 -5.84 6528927.28 -13927970.07 9433465.04 +70 -28739031.18 -829685.67 -0.00 448124.85 0.00 -412614.68 1220.79 -5.33 6076520.52 -13534870.86 9096991.05 +80 -28270953.02 -850591.92 -0.00 417372.24 0.00 -406938.40 1166.84 -4.36 5619875.43 -13136504.26 8757046.99 +90 -27898713.94 -861682.37 -0.00 398941.91 0.00 -400896.10 1125.12 -3.95 5292419.23 -12861633.41 8522705.74 +100 -27750413.87 -869587.10 -0.00 392725.84 0.00 -397793.79 1066.46 -3.82 5154187.87 -12776623.70 8449602.79 +tail water.327000_mpi_gpu.pot: +10 -28177997.38 -716071.28 4.60 583533.65 0.00 -398044.86 1883.30 -7.12 5441562.98 -13154749.89 8788283.88 +20 -28512514.39 -751580.24 1.17 553224.16 0.00 -403678.49 1781.45 -7.68 5737954.66 -13425574.95 9019036.83 +30 -28927928.37 -752693.04 0.06 533338.55 0.00 -409800.51 1664.92 -7.33 6159825.85 -13776621.11 9319193.52 +40 -29235880.19 -758034.43 0.18 519644.30 0.00 -414123.18 1535.34 -7.34 6560705.51 -14068022.72 9569600.45 +50 -29336754.67 -744292.33 0.11 513548.01 0.00 -416000.75 1411.49 -7.03 6741337.63 -14142419.80 9634197.46 +60 -29142356.53 -789870.78 -0.00 485532.94 0.00 -415675.15 1300.59 -5.84 6528926.96 -13927917.41 9450029.16 +70 -28739030.95 -829685.69 -0.00 448124.84 0.00 -412614.67 1220.79 -5.33 6076520.28 -13534883.66 9113027.95 +80 -28270952.83 -850591.94 -0.00 417372.23 0.00 -406938.39 1166.84 -4.36 5619875.26 -13136415.33 8772383.36 +90 -27898713.76 -861682.39 -0.00 398941.89 0.00 -400896.09 1125.12 -3.95 5292419.07 -12861627.44 8537712.28 +100 -27750413.73 -869587.12 -0.00 392725.82 0.00 -397793.78 1066.46 -3.82 5154187.76 -12776665.30 8464528.13 +tail water.327000_mpi_not_gpu.pot: +10 -28177997.38 -716071.28 4.60 583533.65 0.00 -398044.86 1883.30 -7.12 5441562.98 -13154749.84 8788283.83 +20 -28512514.26 -751580.24 1.17 553224.15 0.00 -403678.49 1781.45 -7.68 5737954.53 -13425505.20 9018967.09 +30 -28927928.24 -752693.05 0.06 533338.54 0.00 -409800.51 1664.92 -7.33 6159825.71 -13776720.33 9319292.76 +40 -29235880.30 -758034.42 0.18 519644.30 0.00 -414123.18 1535.34 -7.34 6560705.65 -14067927.15 9569504.86 +50 -29336754.98 -744292.28 0.11 513548.04 0.00 -416000.76 1411.49 -7.03 6741338.04 -14142407.63 9634185.24 +60 -29142356.94 -789870.74 -0.00 485532.97 0.00 -415675.17 1300.59 -5.84 6528927.49 -13927855.87 9449967.56 +70 -28739031.18 -829685.68 -0.00 448124.84 0.00 -412614.68 1220.79 -5.33 6076520.57 -13534913.46 9113057.71 +80 -28270952.77 -850591.93 -0.00 417372.21 0.00 -406938.39 1166.84 -4.36 5619875.25 -13136392.64 8772360.67 +90 -27898713.24 -861682.38 -0.00 398941.87 0.00 -400896.09 1125.12 -3.95 5292418.68 -12861573.42 8537658.31 +100 -27750412.69 -869587.12 -0.00 392725.78 0.00 -397793.76 1066.46 -3.82 5154186.95 -12776705.16 8464568.10 +------------------------------------------- +tail silica.300000_mpi.pot: +10 -72998650.45 -3531923.69 -0.13 3255422.57 0.00 0.00 6965.95 -1077.88 36059366.01 -47929488.15 41200045.51 +20 -72996158.51 -3537469.26 -0.13 3252768.52 0.00 0.00 6907.70 -1076.51 36043526.86 -47894783.06 41167370.09 +30 -72993536.28 -3544399.48 -0.13 3249816.11 0.00 0.00 6812.92 -1078.11 36019106.02 -47841735.13 41117404.87 +40 -72991661.41 -3551807.25 -0.13 3247265.18 0.00 0.00 6695.53 -1074.92 35988475.52 -47776667.27 41056110.54 +50 -72990677.81 -3559284.41 -0.13 3245428.13 0.00 0.00 6560.72 -1073.26 35954290.99 -47705497.30 40989002.24 +60 -72990380.49 -3566795.86 -0.13 3244308.85 0.00 0.00 6415.42 -1070.83 35919198.42 -47635364.28 40922805.65 +70 -72991293.98 -3574036.41 -0.13 3243864.40 0.00 0.00 6267.86 -1063.93 35885756.45 -47571512.02 40862392.23 +80 -72993662.01 -3580254.10 -0.13 3244122.08 0.00 0.00 6126.04 -1059.36 35856299.41 -47519565.68 40813099.84 +90 -72998335.72 -3584964.13 -0.13 3245010.68 0.00 0.00 5992.74 -1052.82 35832469.15 -47481318.20 40776538.38 +100 -73005380.22 -3588201.37 -0.13 3246240.02 0.00 0.00 5882.11 -1050.81 35815053.03 -47459060.51 40754923.89 +tail silica.300000_mpi_gpu.pot: +10 -72998650.45 -3531923.69 -0.13 3255422.57 0.00 0.00 6965.95 -1077.88 36059366.01 -47929437.55 41272567.47 +20 -72996158.50 -3537469.26 -0.13 3252768.52 0.00 0.00 6907.70 -1076.51 36043526.85 -47894781.58 41239883.63 +30 -72993536.24 -3544399.48 -0.13 3249816.11 0.00 0.00 6812.92 -1078.11 36019105.97 -47841659.89 41189756.64 +40 -72991661.32 -3551807.26 -0.13 3247265.17 0.00 0.00 6695.53 -1074.92 35988475.42 -47776204.37 41127966.69 +50 -72990677.63 -3559284.43 -0.13 3245428.12 0.00 0.00 6560.72 -1073.26 35954290.79 -47705238.26 41060944.06 +60 -72990380.21 -3566795.89 -0.13 3244308.83 0.00 0.00 6415.42 -1070.83 35919198.12 -47635476.45 40995002.12 +70 -72991293.61 -3574036.44 -0.13 3243864.38 0.00 0.00 6267.86 -1063.93 35885756.04 -47571428.25 40934286.37 +80 -72993661.52 -3580254.15 -0.13 3244122.06 0.00 0.00 6126.04 -1059.36 35856298.87 -47519515.61 40884940.88 +90 -72998335.13 -3584964.19 -0.13 3245010.65 0.00 0.00 5992.74 -1052.82 35832468.50 -47481332.62 40848379.53 +100 -73005379.53 -3588201.44 -0.13 3246239.99 0.00 0.00 5882.11 -1050.81 35815052.28 -47459279.41 40826931.47 +tail silica.300000_mpi_not_gpu.pot: +10 -72998650.45 -3531923.69 -0.13 3255422.57 0.00 0.00 6965.95 -1077.88 36059366.01 -47929383.14 41272513.07 +20 -72996158.50 -3537469.26 -0.13 3252768.52 0.00 0.00 6907.70 -1076.51 36043526.85 -47894792.71 41239894.76 +30 -72993536.21 -3544399.48 -0.13 3249816.11 0.00 0.00 6812.92 -1078.11 36019105.94 -47841631.77 41189728.53 +40 -72991661.27 -3551807.27 -0.13 3247265.17 0.00 0.00 6695.53 -1074.92 35988475.36 -47776479.60 41128241.94 +50 -72990677.60 -3559284.43 -0.13 3245428.12 0.00 0.00 6560.72 -1073.26 35954290.74 -47705299.06 41061004.87 +60 -72990380.19 -3566795.89 -0.13 3244308.84 0.00 0.00 6415.42 -1070.83 35919198.09 -47635375.53 40994901.20 +70 -72991293.62 -3574036.44 -0.13 3243864.39 0.00 0.00 6267.86 -1063.93 35885756.04 -47571355.87 40934213.99 +80 -72993661.59 -3580254.14 -0.13 3244122.07 0.00 0.00 6126.04 -1059.36 35856298.94 -47519720.75 40885146.01 +90 -72998335.28 -3584964.17 -0.13 3245010.67 0.00 0.00 5992.74 -1052.82 35832468.65 -47481641.23 40848688.12 +100 -73005379.78 -3588201.42 -0.13 3246240.02 0.00 0.00 5882.11 -1050.81 35815052.55 -47458642.69 40826294.67 +------------------------------------------- +tail bilayer.340800_mpi.pot: +10 -39109975.54 -704440.86 13240.09 710369.87 -298.83 -243009.45 166040.12 -83583.25 9184601.71 -8742420.83 5681526.61 +20 -39142730.09 -708020.30 13241.02 695176.06 -300.73 -243529.27 163848.12 -83658.13 9183002.73 -8755326.81 5692285.58 +30 -39172842.02 -708247.36 13243.61 678892.58 -301.57 -244024.51 162109.06 -83654.40 9208313.33 -8781211.63 5714297.18 +40 -39190768.23 -703990.81 13245.86 667319.94 -300.28 -244328.53 161764.04 -83652.62 9250052.25 -8812609.98 5741272.03 +50 -39196758.02 -699098.58 13246.94 662434.67 -296.82 -244527.27 162950.25 -83702.58 9268477.26 -8833881.23 5759662.33 +60 -39190224.39 -697817.50 13246.73 663333.78 -292.19 -244738.51 164603.48 -83751.43 9241039.01 -8835359.89 5760984.29 +70 -39168905.14 -699273.93 13245.41 668671.89 -288.12 -244882.67 165410.32 -83718.97 9190526.36 -8823612.74 5750894.13 +80 -39136900.03 -700468.15 13242.75 676311.50 -286.07 -244828.66 164698.48 -83614.26 9154358.59 -8814009.67 5742692.74 +90 -39107570.16 -700793.97 13239.27 682466.98 -286.56 -244647.19 162925.24 -83452.84 9142854.12 -8815998.11 5744469.11 +100 -39094603.02 -702182.22 13236.58 683381.97 -288.99 -244621.86 161213.36 -83328.30 9140453.23 -8827869.85 5754613.22 +tail bilayer.340800_mpi_gpu.pot: +10 -39109975.40 -704440.86 13240.09 710369.85 -298.83 -243009.45 166040.12 -83583.25 9184601.55 -8742413.12 5691526.75 +20 -39142729.63 -708020.28 13241.02 695176.00 -300.73 -243529.27 163848.13 -83658.13 9183002.20 -8755311.88 5702297.50 +30 -39172841.13 -708247.29 13243.61 678892.48 -301.57 -244024.50 162109.07 -83654.39 9208312.36 -8781206.05 5724357.30 +40 -39190766.97 -703990.65 13245.86 667319.80 -300.28 -244328.52 161764.05 -83652.62 9250050.94 -8812635.61 5751410.94 +50 -39196756.59 -699098.32 13246.94 662434.51 -296.81 -244527.24 162950.27 -83702.57 9268475.75 -8833886.32 5769813.12 +60 -39190223.11 -697817.15 13246.73 663333.62 -292.19 -244738.47 164603.49 -83751.42 9241037.53 -8835359.62 5771132.04 +70 -39168904.30 -699273.50 13245.41 668671.77 -288.12 -244882.63 165410.32 -83718.95 9190525.09 -8823606.17 5761017.76 +80 -39136899.87 -700467.64 13242.75 676311.46 -286.06 -244828.62 164698.46 -83614.24 9154357.75 -8814002.03 5752800.78 +90 -39107570.76 -700793.38 13239.27 682467.05 -286.56 -244647.16 162925.20 -83452.81 9142853.92 -8815994.20 5754583.91 +100 -39094604.13 -702181.62 13236.58 683382.08 -288.99 -244621.85 161213.30 -83328.27 9140453.58 -8827862.44 5764742.30 +tail bilayer.340800_mpi_not_gpu.pot: +10 -39109975.51 -704440.87 13240.09 710369.87 -298.83 -243009.45 166040.12 -83583.25 9184601.69 -8742412.89 5691526.50 +20 -39142730.04 -708020.30 13241.02 695176.06 -300.73 -243529.27 163848.12 -83658.13 9183002.68 -8755341.71 5702327.26 +30 -39172841.97 -708247.37 13243.61 678892.59 -301.57 -244024.51 162109.06 -83654.40 9208313.29 -8781195.99 5724347.09 +40 -39190768.16 -703990.83 13245.86 667319.94 -300.28 -244328.53 161764.04 -83652.62 9250052.18 -8812621.24 5751396.37 +50 -39196757.96 -699098.60 13246.94 662434.66 -296.82 -244527.27 162950.25 -83702.58 9268477.19 -8833881.18 5769807.75 +60 -39190224.36 -697817.52 13246.73 663333.78 -292.19 -244738.50 164603.48 -83751.43 9241038.98 -8835364.73 5771136.92 +70 -39168905.14 -699273.95 13245.41 668671.89 -288.12 -244882.67 165410.32 -83718.97 9190526.38 -8823615.62 5761027.03 +80 -39136900.04 -700468.17 13242.75 676311.49 -286.07 -244828.66 164698.48 -83614.26 9154358.62 -8813992.60 5752791.23 +90 -39107570.14 -700794.01 13239.27 682466.96 -286.56 -244647.19 162925.24 -83452.84 9142854.14 -8815986.64 5754576.34 +100 -39094602.97 -702182.26 13236.58 683381.95 -288.99 -244621.87 161213.36 -83328.30 9140453.21 -8827864.25 5764744.19 diff --git a/test_harness/sim_test_disp_output.sh b/test_harness/sim_test_disp_output.sh index 2c78aea5cfab42a5193b7ae26ec72ac49912d27b..729390eac431728f98edbe3482586efd4e63a16f 100755 --- a/test_harness/sim_test_disp_output.sh +++ b/test_harness/sim_test_disp_output.sh @@ -9,21 +9,29 @@ echo "Small Systems:" echo "-------------------------------------------" echo "tail water.6540_mpi.pot:" -tail water.6540_mpi.pot; +tail water.6540_mpi.pot echo "tail water.6540_mpi_gpu.pot:" -tail water.6540_mpi_gpu.pot; +tail water.6540_mpi_gpu.pot +echo "tail water.6540_mpi_not_gpu.pot:" +tail water.6540_mpi_not_gpu.pot; + echo "-------------------------------------------" echo "tail silica.6000_mpi.pot:" tail silica.6000_mpi.pot echo "tail silica.6000_mpi_gpu.pot:" tail silica.6000_mpi_gpu.pot +echo "tail silica.6000_mpi_not_gpu.pot:" +tail silica.6000_mpi_not_gpu.pot echo "-------------------------------------------" echo "tail zno.6912_mpi.pot:" tail zno.6912_mpi.pot; echo "tail zno.6912_mpi_gpu.pot:" -tail zno.6912_mpi_gpu.pot; +tail zno.6912_mpi_gpu.pot +echo "tail zno.6912_mpi_not_gpu.pot:" +tail zno.6912_mpi_not_gpu.pot + echo "___________________________________________" echo "Medium Systems" @@ -32,31 +40,45 @@ echo "-------------------------------------------" echo "tail water.78480_mpi.pot:" tail water.78480_mpi.pot; echo "tail water.78480_mpi_gpu.pot:" -tail water.78480_mpi_gpu.pot; +tail water.78480_mpi_gpu.pot +echo "tail water.78480_mpi_not_gpu.pot:" +tail water.78480_mpi_not_gpu.pot + echo "-------------------------------------------" echo "tail silica.72000_mpi.pot:" tail silica.72000_mpi.pot; echo "tail silica.72000_mpi_gpu.pot:" tail silica.72000_mpi_gpu.pot; +echo "tail silica.72000_mpi_not_gpu.pot:" +tail silica.72000_mpi_not_gpu.pot; + echo "-------------------------------------------" echo "tail bilayer.56800_mpi.pot:" tail bilayer.56800_mpi.pot; echo "tail bilayer.56800_mpi_gpu.pot:" tail bilayer.56800_mpi_gpu.pot; +echo "tail bilayer.56800_mpi_not_gpu.pot:" +tail bilayer.56800_mpi_not_gpu.pot; echo "-------------------------------------------" echo "tail dna_mpi.pot:" tail dna_mpi.pot; echo "tail dna_mpi_gpu.pot:" tail dna_mpi_gpu.pot; +echo "tail dna_mpi_not_gpu.pot:" +tail dna_mpi_not_gpu.pot; + echo "-------------------------------------------" echo "tail petn_mpi.pot:" tail petn_mpi.pot; echo "tail petn_mpi_gpu.pot:" tail petn_mpi_gpu.pot; +echo "tail petn_mpi_not_gpu.pot:" +tail petn_mpi_not_gpu.pot; + echo "___________________________________________" echo "Large Systems" @@ -66,18 +88,26 @@ echo "tail water.327000_mpi.pot:" tail water.327000_mpi.pot; echo "tail water.327000_mpi_gpu.pot:" tail water.327000_mpi_gpu.pot; +echo "tail water.327000_mpi_not_gpu.pot:" +tail water.327000_mpi_not_gpu.pot; + echo "-------------------------------------------" echo "tail silica.300000_mpi.pot:" tail silica.300000_mpi.pot; echo "tail silica.300000_mpi_gpu.pot:" tail silica.300000_mpi_gpu.pot; +echo "tail silica.300000_mpi_not_gpu.pot:" +tail silica.300000_mpi_not_gpu.pot; + echo "-------------------------------------------" echo "tail bilayer.340800_mpi.pot:" tail bilayer.340800_mpi.pot; echo "tail bilayer.340800_mpi_gpu.pot:" tail bilayer.340800_mpi_gpu.pot; +echo "tail bilayer.340800_mpi_not_gpu.pot:" +tail bilayer.340800_mpi_not_gpu.pot; diff --git a/test_harness/sim_test_master.sh b/test_harness/sim_test_master.sh index d5d14a459b05422556e786c47d0475b1e241d042..203c5ac61a72f10d5ccc7240417f8da7127e976a 100755 --- a/test_harness/sim_test_master.sh +++ b/test_harness/sim_test_master.sh @@ -13,20 +13,20 @@ qsub short_jobs.sh # Then, one for all the medium jobs -#qsub med_jobs.sh +qsub med_jobs.sh # Finally, one for each of the long jobs qsub water_long_mpi.sh qsub water_long_mpi_gpu.sh -#qsub water_long_mpi_not_gpu.sh +qsub water_long_mpi_not_gpu.sh qsub silica_long_mpi.sh qsub silica_long_mpi_gpu.sh -#qsub silica_long_mpi_not_gpu.sh +qsub silica_long_mpi_not_gpu.sh qsub bilayer_long_mpi.sh qsub bilayer_long_mpi_gpu.sh -#qsub bilayer_long_mpi_not_gpu.sh +qsub bilayer_long_mpi_not_gpu.sh echo "All jobs submitted" diff --git a/test_harness/water_327000_mpi_not_gpu_control b/test_harness/water_327000_mpi_not_gpu_control new file mode 100644 index 0000000000000000000000000000000000000000..88a9523876e0ae119b26b011a83b2e72ecba4acb --- /dev/null +++ b/test_harness/water_327000_mpi_not_gpu_control @@ -0,0 +1,44 @@ +simulation_name water.327000_mpi_not_gpu ! output files will carry this name + their specific ext. +ensemble_type 1 ! 0: NVE 1: Berendsen NVT 2: Nose-Hoover NVT(under testing) 3: semi-isotropic NPT 4: isotropic NPT 5: anisotropic NPT (under development) +nsteps 100 ! number of simulation steps +dt 0.10 ! time step in fs +proc_by_dim 2 2 2 ! distribution of processors by dimensions +geo_format 0 ! 0: custom 1: pdb (only if natoms < 100000) 2: ASCII restart 3: binary restart + +tabulate_long_range 0 ! number of sampling points for cubic spline interpolation, 0 no interpolation +energy_update_freq 10 +remove_CoM_vel 500 ! remove the transrot vel of CoM every 'this many' steps +reposition_atoms 1 ! 1:center of mass to center of box + +reneighbor 1 +vlist_buffer 0 +nbrhood_cutoff 4.5 ! bond cutoff in A +hbond_cutoff 7.5 ! hbond cutoff in A +thb_cutoff 0.001 ! cutoff value for three body interactions + +qeq_freq 1 ! frequency to update charges with QEq +q_err 1e-6 ! norm of the relative residual in QEq solve + +temp_init 0.01 ! initial temperature of the system +temp_final 300.0 ! final temperature of the system +t_mass 500.0 ! 0.16666 for nhNVT ! 500.0 for bNVT, iNPT, sNPT ! in fs, thermal inertia +t_rate 5.0 ! in K +t_freq 1.0 ! in ps +t_mode 2 ! 2: constant slope + +pressure 0.000101325 0.000101325 0.000101325 ! desired pressure of the simulated system in GPa, 1atm = 0.000101325 GPa +p_mass 10000.00 10000.00 10000.00 ! in fs, pressure inertia parameter + +write_freq 0 ! write trajectory after so many steps +traj_method 1 ! 0: simple parallel I/O, 1: MPI I/O +traj_title micelle ! (no white spaces) +atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file +atom_forces 0 ! 0: basic atom format, 1: print force on each atom in the trajectory file +atom_velocities 1 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file +bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file +angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file + +restart_format 1 ! 0: restarts in ASCII 1: restarts in binary +restart_freq 10000 ! 0: do not output any restart files. >0: output a restart file at every 'this many' steps + +bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs diff --git a/test_harness/water_long_mpi_not_gpu.sh b/test_harness/water_long_mpi_not_gpu.sh new file mode 100755 index 0000000000000000000000000000000000000000..cb2ecbe22b7cf7096dc4f28d18192599f6dfd9c1 --- /dev/null +++ b/test_harness/water_long_mpi_not_gpu.sh @@ -0,0 +1,20 @@ +#!/bin/bash -login + +#PBS -l nodes=1:ppn=8:gpus=8 +#PBS -l mem=120gb +#PBS -l walltime=4:00:00 +#PBS -l feature=gpgpu:intel16 + +cd "${PBS_O_WORKDIR}" +#cd /mnt/home/korteme1/PuReMD_new/test_harness + +#################### +# MPI-Not-GPU Runs # +#################### + +module purge +module load GNU/4.8.2 OpenMPI/1.6.5 CUDA/6.0 + +mpirun -np 8 ../PG-PuReMD/bin/pg-puremd-not-gpu ../data/benchmarks/water/water_327000.geo ../data/benchmarks/water/ffield.water water_327000_mpi_not_gpu_control + +