diff --git a/PuReMD/src/allocate.c b/PuReMD/src/allocate.c index 0753ea1090ec9b2f4c60007047c86f7c4a91728e..b2c9aa5b14e096cca696033083de3d1f804d57ab 100644 --- a/PuReMD/src/allocate.c +++ b/PuReMD/src/allocate.c @@ -145,9 +145,8 @@ void DeAllocate_Workspace( control_params *control, storage *workspace ) sfree( workspace->Clp, "Clp" ); sfree( workspace->vlpex, "vlpex" ); sfree( workspace->bond_mark, "bond_mark" ); - sfree( workspace->done_after, "done_after" ); - /* QEq storage */ + /* CM storage */ sfree( workspace->Hdia_inv, "Hdia_inv" ); sfree( workspace->b_s, "b_s" ); sfree( workspace->b_t, "b_t" ); @@ -159,28 +158,67 @@ void DeAllocate_Workspace( control_params *control, storage *workspace ) sfree( workspace->b, "b" ); sfree( workspace->x, "x" ); - /* GMRES storage */ - for ( i = 0; i < RESTART + 1; ++i ) - { - sfree( workspace->h[i], "h[i]" ); - sfree( workspace->v[i], "v[i]" ); - } - sfree( workspace->h, "h" ); - sfree( workspace->v, "v" ); - sfree( workspace->y, "y" ); - sfree( workspace->z, "z" ); - sfree( workspace->g, "g" ); - sfree( workspace->hs, "hs" ); - sfree( workspace->hc, "hc" ); - /* CG storage */ - sfree( workspace->r, "r" ); - sfree( workspace->d, "d" ); - sfree( workspace->q, "q" ); - sfree( workspace->p, "p" ); - sfree( workspace->r2, "r2" ); - sfree( workspace->d2, "d2" ); - sfree( workspace->q2, "q2" ); - sfree( workspace->p2, "p2" ); + if ( control->cm_solver_type == GMRES_S + || control->cm_solver_type == GMRES_H_S ) + { + for ( i = 0; i < RESTART + 1; ++i ) + { + sfree( workspace->h[i], "h[i]" ); + sfree( workspace->v[i], "v[i]" ); + } + + sfree( workspace->y, "y" ); + sfree( workspace->g, "g" ); + sfree( workspace->hc, "hc" ); + sfree( workspace->hs, "hs" ); + sfree( workspace->h, "h" ); + sfree( workspace->v, "v" ); + } + + if ( control->cm_solver_type == GMRES_S + || control->cm_solver_type == GMRES_H_S + || control->cm_solver_type == PIPECG_S + || control->cm_solver_type == PIPECR_S ) + { + sfree( workspace->z, "z" ); + } + + if ( control->cm_solver_type == CG_S + || control->cm_solver_type == PIPECG_S + || control->cm_solver_type == PIPECR_S ) + { + sfree( workspace->d, "d" ); + sfree( workspace->p, "p" ); + sfree( workspace->q, "q" ); + sfree( workspace->r, "r" ); + } + + if ( control->cm_solver_type == PIPECG_S + || control->cm_solver_type == PIPECR_S ) + { + sfree( workspace->m, "m" ); + sfree( workspace->n, "n" ); + sfree( workspace->u, "u" ); + sfree( workspace->w, "w" ); + } + + if ( control->cm_solver_type == CG_S + || control->cm_solver_type == PIPECG_S ) + { + sfree( workspace->r2, "r2" ); + sfree( workspace->d2, "d2" ); + sfree( workspace->q2, "q2" ); + sfree( workspace->p2, "p2" ); + } + + if ( control->cm_solver_type == PIPECG_S ) + { + sfree( workspace->m2, "m2" ); + sfree( workspace->n2, "n2" ); + sfree( workspace->u2, "u2" ); + sfree( workspace->w2, "w2" ); + sfree( workspace->w2, "z2" ); + } /* integrator */ // sfree( workspace->f_old ); @@ -239,144 +277,171 @@ void DeAllocate_Workspace( control_params *control, storage *workspace ) int Allocate_Workspace( reax_system *system, control_params *control, - storage *workspace, int local_cap, int total_cap, - MPI_Comm comm, char *msg ) + storage *workspace, int local_cap, int total_cap, + MPI_Comm comm, char *msg ) { - int i, total_real, total_rvec, local_int, local_real, local_rvec; + int i, total_real, total_rvec, local_rvec; workspace->allocated = 1; total_real = total_cap * sizeof(real); total_rvec = total_cap * sizeof(rvec); - local_int = local_cap * sizeof(int); - local_real = local_cap * sizeof(real); local_rvec = local_cap * sizeof(rvec); /* communication storage */ for ( i = 0; i < MAX_NBRS; ++i ) { - workspace->tmp_dbl[i] = (real*) - scalloc( total_cap, sizeof(real), "tmp_dbl", comm ); - workspace->tmp_rvec[i] = (rvec*) - scalloc( total_cap, sizeof(rvec), "tmp_rvec", comm ); - workspace->tmp_rvec2[i] = (rvec2*) - scalloc( total_cap, sizeof(rvec2), "tmp_rvec2", comm ); + workspace->tmp_dbl[i] = scalloc( total_cap, sizeof(real), "tmp_dbl", comm ); + workspace->tmp_rvec[i] = scalloc( total_cap, sizeof(rvec), "tmp_rvec", comm ); + workspace->tmp_rvec2[i] = scalloc( total_cap, sizeof(rvec2), "tmp_rvec2", comm ); } /* bond order related storage */ - workspace->within_bond_box = (int*) - scalloc( total_cap, sizeof(int), "skin", comm ); - workspace->total_bond_order = (real*) smalloc( total_real, "total_bo", comm ); - workspace->Deltap = (real*) smalloc( total_real, "Deltap", comm ); - workspace->Deltap_boc = (real*) smalloc( total_real, "Deltap_boc", comm ); - workspace->dDeltap_self = (rvec*) smalloc( total_rvec, "dDeltap_self", comm ); - workspace->Delta = (real*) smalloc( total_real, "Delta", comm ); - workspace->Delta_lp = (real*) smalloc( total_real, "Delta_lp", comm ); - workspace->Delta_lp_temp = (real*) - smalloc( total_real, "Delta_lp_temp", comm ); - workspace->dDelta_lp = (real*) smalloc( total_real, "dDelta_lp", comm ); - workspace->dDelta_lp_temp = (real*) - smalloc( total_real, "dDelta_lp_temp", comm ); - workspace->Delta_e = (real*) smalloc( total_real, "Delta_e", comm ); - workspace->Delta_boc = (real*) smalloc( total_real, "Delta_boc", comm ); - workspace->nlp = (real*) smalloc( total_real, "nlp", comm ); - workspace->nlp_temp = (real*) smalloc( total_real, "nlp_temp", comm ); - workspace->Clp = (real*) smalloc( total_real, "Clp", comm ); - workspace->vlpex = (real*) smalloc( total_real, "vlpex", comm ); - workspace->bond_mark = (int*) - scalloc( total_cap, sizeof(int), "bond_mark", comm ); - workspace->done_after = (int*) - scalloc( total_cap, sizeof(int), "done_after", comm ); - // fprintf( stderr, "p%d: bond order storage\n", system->my_rank ); - - /* QEq storage */ - workspace->Hdia_inv = (real*) - scalloc( total_cap, sizeof(real), "Hdia_inv", comm ); - workspace->b_s = (real*) scalloc( total_cap, sizeof(real), "b_s", comm ); - workspace->b_t = (real*) scalloc( total_cap, sizeof(real), "b_t", comm ); - workspace->b_prc = (real*) scalloc( total_cap, sizeof(real), "b_prc", comm ); - workspace->b_prm = (real*) scalloc( total_cap, sizeof(real), "b_prm", comm ); - workspace->s = (real*) scalloc( total_cap, sizeof(real), "s", comm ); - workspace->t = (real*) scalloc( total_cap, sizeof(real), "t", comm ); - workspace->droptol = (real*) - scalloc( total_cap, sizeof(real), "droptol", comm ); - workspace->b = (rvec2*) scalloc( total_cap, sizeof(rvec2), "b", comm ); - workspace->x = (rvec2*) scalloc( total_cap, sizeof(rvec2), "x", comm ); - - /* GMRES storage */ - workspace->y = (real*) scalloc( RESTART + 1, sizeof(real), "y", comm ); - workspace->z = (real*) scalloc( RESTART + 1, sizeof(real), "z", comm ); - workspace->g = (real*) scalloc( RESTART + 1, sizeof(real), "g", comm ); - workspace->h = (real**) scalloc( RESTART + 1, sizeof(real*), "h", comm ); - workspace->hs = (real*) scalloc( RESTART + 1, sizeof(real), "hs", comm ); - workspace->hc = (real*) scalloc( RESTART + 1, sizeof(real), "hc", comm ); - workspace->v = (real**) scalloc( RESTART + 1, sizeof(real*), "v", comm ); - - for ( i = 0; i < RESTART + 1; ++i ) - { - workspace->h[i] = (real*) scalloc( RESTART + 1, sizeof(real), "h[i]", comm ); - workspace->v[i] = (real*) scalloc( total_cap, sizeof(real), "v[i]", comm ); - } - - /* CG storage */ - workspace->r = (real*) scalloc( total_cap, sizeof(real), "r", comm ); - workspace->d = (real*) scalloc( total_cap, sizeof(real), "d", comm ); - workspace->q = (real*) scalloc( total_cap, sizeof(real), "q", comm ); - workspace->p = (real*) scalloc( total_cap, sizeof(real), "p", comm ); - workspace->r2 = (rvec2*) scalloc( total_cap, sizeof(rvec2), "r2", comm ); - workspace->d2 = (rvec2*) scalloc( total_cap, sizeof(rvec2), "d2", comm ); - workspace->q2 = (rvec2*) scalloc( total_cap, sizeof(rvec2), "q2", comm ); - workspace->p2 = (rvec2*) scalloc( total_cap, sizeof(rvec2), "p2", comm ); + workspace->within_bond_box = scalloc( total_cap, sizeof(int), "skin", comm ); + workspace->total_bond_order = smalloc( total_real, "total_bo", comm ); + workspace->Deltap = smalloc( total_real, "Deltap", comm ); + workspace->Deltap_boc = smalloc( total_real, "Deltap_boc", comm ); + workspace->dDeltap_self = smalloc( total_rvec, "dDeltap_self", comm ); + workspace->Delta = smalloc( total_real, "Delta", comm ); + workspace->Delta_lp = smalloc( total_real, "Delta_lp", comm ); + workspace->Delta_lp_temp = smalloc( total_real, "Delta_lp_temp", comm ); + workspace->dDelta_lp = smalloc( total_real, "dDelta_lp", comm ); + workspace->dDelta_lp_temp = smalloc( total_real, "dDelta_lp_temp", comm ); + workspace->Delta_e = smalloc( total_real, "Delta_e", comm ); + workspace->Delta_boc = smalloc( total_real, "Delta_boc", comm ); + workspace->nlp = smalloc( total_real, "nlp", comm ); + workspace->nlp_temp = smalloc( total_real, "nlp_temp", comm ); + workspace->Clp = smalloc( total_real, "Clp", comm ); + workspace->vlpex = smalloc( total_real, "vlpex", comm ); + workspace->bond_mark = scalloc( total_cap, sizeof(int), "bond_mark", comm ); + + /* CM storage */ + workspace->Hdia_inv = scalloc( total_cap, sizeof(real), "Hdia_inv", comm ); + workspace->b_s = scalloc( total_cap, sizeof(real), "b_s", comm ); + workspace->b_t = scalloc( total_cap, sizeof(real), "b_t", comm ); + workspace->b_prc = scalloc( total_cap, sizeof(real), "b_prc", comm ); + workspace->b_prm = scalloc( total_cap, sizeof(real), "b_prm", comm ); + workspace->s = scalloc( total_cap, sizeof(real), "s", comm ); + workspace->t = scalloc( total_cap, sizeof(real), "t", comm ); + workspace->droptol = scalloc( total_cap, sizeof(real), "droptol", comm ); + workspace->b = scalloc( total_cap, sizeof(rvec2), "b", comm ); + workspace->x = scalloc( total_cap, sizeof(rvec2), "x", comm ); + + if ( control->cm_solver_type == GMRES_S + || control->cm_solver_type == GMRES_H_S ) + { + workspace->y = scalloc( RESTART + 1, sizeof(real), "y", comm ); + workspace->g = scalloc( RESTART + 1, sizeof(real), "g", comm ); + workspace->hc = scalloc( RESTART + 1, sizeof(real), "hc", comm ); + workspace->hs = scalloc( RESTART + 1, sizeof(real), "hs", comm ); + workspace->h = scalloc( RESTART + 1, sizeof(real*), "h", comm ); + workspace->v = scalloc( RESTART + 1, sizeof(real*), "v", comm ); + + for ( i = 0; i < RESTART + 1; ++i ) + { + workspace->h[i] = scalloc( RESTART + 1, sizeof(real), "h[i]", comm ); + workspace->v[i] = scalloc( total_cap, sizeof(real), "v[i]", comm ); + } + } + + if ( control->cm_solver_type == GMRES_S + || control->cm_solver_type == GMRES_H_S ) + { + workspace->z = scalloc( RESTART + 1, sizeof(real), "z", comm ); + } + else if ( control->cm_solver_type == PIPECG_S + || control->cm_solver_type == PIPECR_S ) + { + workspace->z = scalloc( total_cap, sizeof(real), "z", comm ); + } + + if ( control->cm_solver_type == CG_S + || control->cm_solver_type == PIPECG_S + || control->cm_solver_type == PIPECR_S ) + { + workspace->d = scalloc( total_cap, sizeof(real), "d", comm ); + workspace->p = scalloc( total_cap, sizeof(real), "p", comm ); + workspace->q = scalloc( total_cap, sizeof(real), "q", comm ); + workspace->r = scalloc( total_cap, sizeof(real), "r", comm ); + } + + if ( control->cm_solver_type == PIPECG_S + || control->cm_solver_type == PIPECR_S ) + { + workspace->m = scalloc( total_cap, sizeof(real), "m", comm ); + workspace->n = scalloc( total_cap, sizeof(real), "n", comm ); + workspace->u = scalloc( total_cap, sizeof(real), "u", comm ); + workspace->w = scalloc( total_cap, sizeof(real), "w", comm ); + } + + if ( control->cm_solver_type == CG_S + || control->cm_solver_type == PIPECG_S ) + { + workspace->d2 = scalloc( total_cap, sizeof(rvec2), "d2", comm ); + workspace->r2 = scalloc( total_cap, sizeof(rvec2), "r2", comm ); + workspace->p2 = scalloc( total_cap, sizeof(rvec2), "p2", comm ); + workspace->q2 = scalloc( total_cap, sizeof(rvec2), "q2", comm ); + } + + if ( control->cm_solver_type == PIPECG_S ) + { + workspace->m2 = scalloc( total_cap, sizeof(rvec2), "m2", comm ); + workspace->n2 = scalloc( total_cap, sizeof(rvec2), "n2", comm ); + workspace->u2 = scalloc( total_cap, sizeof(rvec2), "u2", comm ); + workspace->w2 = scalloc( total_cap, sizeof(rvec2), "w2", comm ); + workspace->z2 = scalloc( total_cap, sizeof(rvec2), "z2", comm ); + } /* integrator storage */ - workspace->v_const = (rvec*) smalloc( local_rvec, "v_const", comm ); + workspace->v_const = smalloc( local_rvec, "v_const", comm ); /* storage for analysis */ if ( control->molecular_analysis || control->diffusion_coef ) { - workspace->mark = (int*) scalloc( local_cap, sizeof(int), "mark", comm ); - workspace->old_mark = (int*) - scalloc( local_cap, sizeof(int), "old_mark", comm ); + workspace->mark = scalloc( local_cap, sizeof(int), "mark", comm ); + workspace->old_mark = scalloc( local_cap, sizeof(int), "old_mark", comm ); } else - workspace->mark = workspace->old_mark = NULL; + { + workspace->mark = NULL; + workspace->old_mark = NULL; + } if ( control->diffusion_coef ) - workspace->x_old = (rvec*) - scalloc( local_cap, sizeof(rvec), "x_old", comm ); - else workspace->x_old = NULL; + { + workspace->x_old = scalloc( local_cap, sizeof(rvec), "x_old", comm ); + } + else + { + workspace->x_old = NULL; + } /* force related storage */ - workspace->f = (rvec*) scalloc( total_cap, sizeof(rvec), "f", comm ); - workspace->CdDelta = (real*) - scalloc( total_cap, sizeof(real), "CdDelta", comm ); + workspace->f = scalloc( total_cap, sizeof(rvec), "f", comm ); + workspace->CdDelta = scalloc( total_cap, sizeof(real), "CdDelta", comm ); #ifdef TEST_FORCES - workspace->dDelta = (rvec*) smalloc( total_rvec, "dDelta", comm ); - workspace->f_ele = (rvec*) smalloc( total_rvec, "f_ele", comm ); - workspace->f_vdw = (rvec*) smalloc( total_rvec, "f_vdw", comm ); - workspace->f_bo = (rvec*) smalloc( total_rvec, "f_bo", comm ); - workspace->f_be = (rvec*) smalloc( total_rvec, "f_be", comm ); - workspace->f_lp = (rvec*) smalloc( total_rvec, "f_lp", comm ); - workspace->f_ov = (rvec*) smalloc( total_rvec, "f_ov", comm ); - workspace->f_un = (rvec*) smalloc( total_rvec, "f_un", comm ); - workspace->f_ang = (rvec*) smalloc( total_rvec, "f_ang", comm ); - workspace->f_coa = (rvec*) smalloc( total_rvec, "f_coa", comm ); - workspace->f_pen = (rvec*) smalloc( total_rvec, "f_pen", comm ); - workspace->f_hb = (rvec*) smalloc( total_rvec, "f_hb", comm ); - workspace->f_tor = (rvec*) smalloc( total_rvec, "f_tor", comm ); - workspace->f_con = (rvec*) smalloc( total_rvec, "f_con", comm ); - workspace->f_tot = (rvec*) smalloc( total_rvec, "f_tot", comm ); + workspace->dDelta = smalloc( total_rvec, "dDelta", comm ); + workspace->f_ele = smalloc( total_rvec, "f_ele", comm ); + workspace->f_vdw = smalloc( total_rvec, "f_vdw", comm ); + workspace->f_bo = smalloc( total_rvec, "f_bo", comm ); + workspace->f_be = smalloc( total_rvec, "f_be", comm ); + workspace->f_lp = smalloc( total_rvec, "f_lp", comm ); + workspace->f_ov = smalloc( total_rvec, "f_ov", comm ); + workspace->f_un = smalloc( total_rvec, "f_un", comm ); + workspace->f_ang = smalloc( total_rvec, "f_ang", comm ); + workspace->f_coa = smalloc( total_rvec, "f_coa", comm ); + workspace->f_pen = smalloc( total_rvec, "f_pen", comm ); + workspace->f_hb = smalloc( total_rvec, "f_hb", comm ); + workspace->f_tor = smalloc( total_rvec, "f_tor", comm ); + workspace->f_con = smalloc( total_rvec, "f_con", comm ); + workspace->f_tot = smalloc( total_rvec, "f_tot", comm ); if ( system->my_rank == MASTER_NODE ) { - workspace->rcounts = (int*) - smalloc( system->wsize * sizeof(int), "rcount", comm ); - workspace->displs = (int*) - smalloc( system->wsize * sizeof(int), "displs", comm ); - workspace->id_all = (int*) - smalloc( system->bigN * sizeof(int), "id_all", comm ); - workspace->f_all = (rvec*) - smalloc( system->bigN * sizeof(rvec), "f_all", comm ); + workspace->rcounts = smalloc( system->wsize * sizeof(int), "rcount", comm ); + workspace->displs = smalloc( system->wsize * sizeof(int), "displs", comm ); + workspace->id_all = smalloc( system->bigN * sizeof(int), "id_all", comm ); + workspace->f_all = smalloc( system->bigN * sizeof(rvec), "f_all", comm ); } else { @@ -394,8 +459,12 @@ int Allocate_Workspace( reax_system *system, control_params *control, void Reallocate_Neighbor_List( reax_list *far_nbrs, int n, int num_intrs, MPI_Comm comm ) { + int format; + + format = far_nbrs->format; + Delete_List( far_nbrs, comm ); - if (!Make_List( n, num_intrs, TYP_FAR_NEIGHBOR, far_nbrs, comm )) + if (!Make_List( n, num_intrs, TYP_FAR_NEIGHBOR, format, far_nbrs, comm )) { fprintf(stderr, "Problem in initializing far nbrs list. Terminating!\n"); MPI_Abort( comm, INSUFFICIENT_MEMORY ); @@ -403,7 +472,8 @@ void Reallocate_Neighbor_List( reax_list *far_nbrs, int n, int num_intrs, } -int Allocate_Matrix( sparse_matrix **pH, int cap, int m, MPI_Comm comm ) +int Allocate_Matrix( sparse_matrix **pH, int cap, int m, + int format, MPI_Comm comm ) { sparse_matrix *H; @@ -412,6 +482,26 @@ int Allocate_Matrix( sparse_matrix **pH, int cap, int m, MPI_Comm comm ) H = *pH; H->cap = cap; H->m = m; + H->format = format; + H->start = (int*) smalloc( sizeof(int) * cap, "matrix_start", comm ); + H->end = (int*) smalloc( sizeof(int) * cap, "matrix_end", comm ); + H->entries = (sparse_matrix_entry*) + smalloc( sizeof(sparse_matrix_entry) * m, "matrix_entries", comm ); + + return SUCCESS; +} +int Allocate_Matrix2( sparse_matrix **pH, int n, int cap, int m, + int format, MPI_Comm comm ) +{ + sparse_matrix *H; + + *pH = (sparse_matrix*) + smalloc( sizeof(sparse_matrix), "sparse_matrix", comm ); + H = *pH; + H->n = n; + H->cap = cap; + H->m = m; + H->format = format; H->start = (int*) smalloc( sizeof(int) * cap, "matrix_start", comm ); H->end = (int*) smalloc( sizeof(int) * cap, "matrix_end", comm ); H->entries = (sparse_matrix_entry*) @@ -433,8 +523,12 @@ void Deallocate_Matrix( sparse_matrix *H ) int Reallocate_Matrix( sparse_matrix **H, int n, int m, char *name, MPI_Comm comm ) { + int format; + + format = (*H)->format; + Deallocate_Matrix( *H ); - if ( !Allocate_Matrix( H, n, m, comm ) ) + if ( !Allocate_Matrix( H, n, m, format, comm ) ) { fprintf(stderr, "not enough space for %s matrix. terminating!\n", name); MPI_Abort( comm, INSUFFICIENT_MEMORY ); @@ -452,7 +546,9 @@ int Reallocate_Matrix( sparse_matrix **H, int n, int m, char *name, int Reallocate_HBonds_List( reax_system *system, reax_list *hbonds, MPI_Comm comm ) { - int i, id, total_hbonds; + int i, id, total_hbonds, format; + + format = hbonds->format; total_hbonds = 0; for ( i = 0; i < system->n; ++i ) @@ -466,7 +562,7 @@ int Reallocate_HBonds_List( reax_system *system, reax_list *hbonds, total_hbonds = (int)(MAX( total_hbonds * SAFER_ZONE, MIN_CAP * MIN_HBONDS )); Delete_List( hbonds, comm ); - if ( !Make_List( system->Hcap, total_hbonds, TYP_HBOND, hbonds, comm ) ) + if ( !Make_List( system->Hcap, total_hbonds, TYP_HBOND, format, hbonds, comm ) ) { fprintf( stderr, "not enough space for hbonds list. terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); @@ -479,7 +575,9 @@ int Reallocate_HBonds_List( reax_system *system, reax_list *hbonds, int Reallocate_Bonds_List( reax_system *system, reax_list *bonds, int *total_bonds, int *est_3body, MPI_Comm comm ) { - int i; + int i, format; + + format = bonds->format; *total_bonds = 0; *est_3body = 0; @@ -493,7 +591,7 @@ int Reallocate_Bonds_List( reax_system *system, reax_list *bonds, *total_bonds = (int)(MAX( *total_bonds * SAFE_ZONE, MIN_CAP * MIN_BONDS )); Delete_List( bonds, comm ); - if (!Make_List(system->total_cap, *total_bonds, TYP_BOND, bonds, comm)) + if (!Make_List(system->total_cap, *total_bonds, TYP_BOND, format, bonds, comm)) { fprintf( stderr, "not enough space for bonds list. terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); @@ -510,10 +608,11 @@ int Estimate_GCell_Population( reax_system* system, MPI_Comm comm ) ivec c; grid *g; grid_cell *gc; - simulation_box *big_box, *my_ext_box; + simulation_box *my_ext_box; + //simulation_box *big_box; reax_atom *atoms; - big_box = &(system->big_box); + //big_box = &(system->big_box); my_ext_box = &(system->my_ext_box); g = &(system->my_grid); atoms = system->my_atoms; @@ -676,7 +775,8 @@ void Deallocate_Grid( grid *g ) buffers are void*, type cast to the correct pointer type to access the allocated buffers */ int Allocate_MPI_Buffers( mpi_datatypes *mpi_data, int est_recv, - neighbor_proc *my_nbrs, char *msg ) + neighbor_proc *my_nbrs, neighbor_proc *my_nt_nbrs, + char *msg ) { int i; mpi_out_data *mpi_buf; @@ -684,24 +784,47 @@ int Allocate_MPI_Buffers( mpi_datatypes *mpi_data, int est_recv, comm = mpi_data->world; - /* in buffers */ - mpi_data->in1_buffer = (void*) - scalloc( est_recv, sizeof(boundary_atom), "in1_buffer", comm ); - mpi_data->in2_buffer = (void*) - scalloc( est_recv, sizeof(boundary_atom), "in2_buffer", comm ); - - /* out buffers */ + /* buffers for incoming messages, + * see SendRecv for MPI datatypes sent */ + mpi_data->in1_buffer = scalloc( est_recv, + MAX3( sizeof(mpi_atom), sizeof(boundary_atom), sizeof(rvec) ), + "Allocate_MPI_Buffers::in1_buffer", comm ); + mpi_data->in2_buffer = scalloc( est_recv, + MAX3( sizeof(mpi_atom), sizeof(boundary_atom), sizeof(rvec) ), + "Allocate_MPI_Buffers::in2_buffer", comm ); + + /* buffers for outgoing messages, + * see SendRecv for MPI datatypes sent */ for ( i = 0; i < MAX_NBRS; ++i ) { - mpi_buf = &( mpi_data->out_buffers[i] ); + mpi_buf = &mpi_data->out_buffers[i]; + /* allocate storage for the neighbor processor i */ - mpi_buf->index = (int*) - scalloc( my_nbrs[i].est_send, sizeof(int), "mpibuf:index", comm ); - mpi_buf->out_atoms = (void*) - scalloc( my_nbrs[i].est_send, sizeof(boundary_atom), "mpibuf:out_atoms", - comm ); + mpi_buf->index = scalloc( my_nbrs[i].est_send, sizeof(int), + "Allocate_MPI_Buffers::mpi_buf->index", comm ); + mpi_buf->out_atoms = scalloc( my_nbrs[i].est_send, + MAX3( sizeof(mpi_atom), sizeof(boundary_atom), sizeof(rvec) ), + "Allocate_MPI_Buffers::mpi_buf->out_atoms", comm ); } +#if defined(NEUTRAL_TERRITORY) + /* Neutral Territory out buffers */ + for ( i = 0; i < REAX_MAX_NT_NBRS; ++i ) + { + /* in buffers */ + mpi_data->in_nt_buffer[i] = scalloc( my_nt_nbrs[i].est_recv, sizeof(real), + "mpibuf:in_nt_buffer", comm ); + /* out buffer */ + mpi_buf = &mpi_data->out_nt_buffers[i]; + + /* allocate storage for the neighbor processor i */ + mpi_buf->index = scalloc( my_nt_nbrs[i].est_send, sizeof(int), + "mpibuf:nt_index", comm ); + mpi_buf->out_atoms = scalloc( my_nt_nbrs[i].est_send, sizeof(real), + "mpibuf:nt_out_atoms", comm ); + } +#endif + return SUCCESS; } @@ -711,15 +834,26 @@ void Deallocate_MPI_Buffers( mpi_datatypes *mpi_data ) int i; mpi_out_data *mpi_buf; - sfree( mpi_data->in1_buffer, "in1_buffer" ); - sfree( mpi_data->in2_buffer, "in2_buffer" ); + sfree( mpi_data->in1_buffer, "Deallocate_MPI_Buffers::in1_buffer" ); + sfree( mpi_data->in2_buffer, "Deallocate_MPI_Buffers::in2_buffer" ); for ( i = 0; i < MAX_NBRS; ++i ) { - mpi_buf = &( mpi_data->out_buffers[i] ); - sfree( mpi_buf->index, "mpibuf:index" ); - sfree( mpi_buf->out_atoms, "mpibuf:out_atoms" ); + mpi_buf = &mpi_data->out_buffers[i]; + sfree( mpi_buf->index, "Deallocate_MPI_Buffers::mpi_buf->index" ); + sfree( mpi_buf->out_atoms, "Deallocate_MPI_Buffers::mpi_buf->out_atoms" ); + } + +#if defined(NEUTRAL_TERRITORY) + for ( i = 0; i < REAX_MAX_NT_NBRS; ++i ) + { + sfree( mpi_data->in_nt_buffer[i], "in_nt_buffer" ); + + mpi_buf = &mpi_data->out_nt_buffers[i]; + sfree( mpi_buf->index, "mpibuf:nt_index" ); + sfree( mpi_buf->out_atoms, "mpibuf:nt_out_atoms" ); } +#endif } @@ -729,7 +863,7 @@ void ReAllocate( reax_system *system, control_params *control, { int i, j, k, p; int num_bonds, est_3body, nflag, Nflag, Hflag, mpi_flag, ret, total_send; - int renbr; + int renbr, format; reallocate_data *realloc; reax_list *far_nbrs; sparse_matrix *H; @@ -766,10 +900,20 @@ void ReAllocate( reax_system *system, control_params *control, if ( system->n >= DANGER_ZONE * system->local_cap || (0 && system->n <= LOOSE_ZONE * system->local_cap) ) { +#if !defined(NEUTRAL_TERRITORY) nflag = 1; +#endif system->local_cap = (int)(system->n * SAFE_ZONE); } +#if defined(NEUTRAL_TERRITORY) + if ( workspace->H->NT >= DANGER_ZONE * workspace->H->cap ) + { + nflag = 1; + workspace->H->cap = (int)(workspace->H->NT * SAFE_ZONE_NT); + } +#endif + Nflag = 0; if ( system->N >= DANGER_ZONE * system->total_cap || (0 && system->N <= LOOSE_ZONE * system->total_cap) ) @@ -855,8 +999,13 @@ void ReAllocate( reax_system *system, control_params *control, (int)(realloc->Htop * SAFE_ZONE * sizeof(sparse_matrix_entry) / (1024 * 1024)) ); #endif +#if defined(NEUTRAL_TERRITORY) + Reallocate_Matrix( &(workspace->H), H->cap, + realloc->Htop * SAFE_ZONE_NT, "H", comm ); +#else Reallocate_Matrix( &(workspace->H), system->local_cap, realloc->Htop * SAFE_ZONE, "H", comm ); +#endif //Deallocate_Matrix( workspace->L ); //Deallocate_Matrix( workspace->U ); workspace->L = NULL; @@ -911,6 +1060,9 @@ void ReAllocate( reax_system *system, control_params *control, (int)(realloc->num_3body * sizeof(three_body_interaction_data) / (1024 * 1024)) ); #endif + + format = lists[THREE_BODIES]->format; + Delete_List( lists[THREE_BODIES], comm ); if ( num_bonds == -1 ) @@ -919,7 +1071,7 @@ void ReAllocate( reax_system *system, control_params *control, realloc->num_3body = (int)(MAX(realloc->num_3body * SAFE_ZONE, MIN_3BODIES)); if ( !Make_List( num_bonds, realloc->num_3body, TYP_THREE_BODY, - lists[THREE_BODIES], comm ) ) + format, lists[THREE_BODIES], comm ) ) { fprintf( stderr, "Problem in initializing angles list. Terminating!\n" ); MPI_Abort( comm, CANNOT_INITIALIZE ); @@ -968,6 +1120,21 @@ void ReAllocate( reax_system *system, control_params *control, break; } } + +#if defined(NEUTRAL_TERRITORY) + /* also check individual outgoing Neutral Territory buffers */ + for ( p = 0; p < REAX_MAX_NT_NBRS; ++p ) + { + nbr_pr = &system->my_nt_nbrs[p]; + nbr_data = &mpi_data->out_nt_buffers[p]; + + if ( nbr_data->cnt >= nbr_pr->est_send * 0.90 ) + { + mpi_flag = 1; + break; + } + } +#endif } if ( mpi_flag ) @@ -984,6 +1151,7 @@ void ReAllocate( reax_system *system, control_params *control, system->est_trans = (system->est_recv * sizeof(boundary_atom)) / sizeof(mpi_atom); total_send = 0; + for ( p = 0; p < MAX_NBRS; ++p ) { nbr_pr = &( system->my_nbrs[p] ); @@ -991,6 +1159,16 @@ void ReAllocate( reax_system *system, control_params *control, nbr_pr->est_send = MAX( nbr_data->cnt * SAFER_ZONE, MIN_SEND ); total_send += nbr_pr->est_send; } + +#if defined(NEUTRAL_TERRITORY) + for ( p = 0; p < REAX_MAX_NT_NBRS; ++p ) + { + nbr_pr = &system->my_nt_nbrs[p]; + nbr_data = &mpi_data->out_nt_buffers[p]; + nbr_pr->est_send = MAX( nbr_data->cnt * SAFER_ZONE_NT, MIN_SEND ); + } +#endif + #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: reallocating mpi_buf: recv=%d send=%d total=%dMB\n", system->my_rank, system->est_recv, total_send, @@ -1004,7 +1182,8 @@ void ReAllocate( reax_system *system, control_params *control, /* reallocate mpi buffers */ Deallocate_MPI_Buffers( mpi_data ); ret = Allocate_MPI_Buffers( mpi_data, system->est_recv, - system->my_nbrs, msg ); + system->my_nbrs, system->my_nt_nbrs, + msg ); if ( ret != SUCCESS ) { fprintf( stderr, "%s", msg ); diff --git a/PuReMD/src/allocate.h b/PuReMD/src/allocate.h index 271cb054d636f19d1d3beeea09546f0a22b1c380..669861a2498afb97d24a8f04baf8a5e87c045788 100644 --- a/PuReMD/src/allocate.h +++ b/PuReMD/src/allocate.h @@ -26,17 +26,23 @@ int PreAllocate_Space( reax_system*, control_params*, storage*, MPI_Comm ); void reax_atom_Copy( reax_atom*, reax_atom* ); + int Allocate_System( reax_system*, int, int, char* ); int Allocate_Workspace( reax_system*, control_params*, storage*, int, int, MPI_Comm, char* ); void Allocate_Grid( reax_system*, MPI_Comm ); + void Deallocate_Grid( grid* ); -int Allocate_MPI_Buffers( mpi_datatypes*, int, neighbor_proc*, char* ); +int Allocate_MPI_Buffers( mpi_datatypes*, int, neighbor_proc*, neighbor_proc*, char* ); + +int Allocate_Matrix( sparse_matrix**, int, int, int, MPI_Comm ); + +int Allocate_Matrix2( sparse_matrix**, int, int, int, int, MPI_Comm ); -int Allocate_Matrix( sparse_matrix**, int, int, MPI_Comm ); +void Deallocate_Matrix( sparse_matrix * ); int Allocate_HBond_List( int, int, int*, int*, reax_list* ); diff --git a/PuReMD/src/basic_comm.c b/PuReMD/src/basic_comm.c index 96c8397653e440d95feb25c9b074dc5b84de24d1..a50dbbf7ed150bb44a20fc2348d052192ccbb545 100644 --- a/PuReMD/src/basic_comm.c +++ b/PuReMD/src/basic_comm.c @@ -20,202 +20,630 @@ ----------------------------------------------------------------------*/ #include "reax_types.h" + #if defined(PURE_REAX) -#include "basic_comm.h" -#include "vector.h" + #include "basic_comm.h" + #include "vector.h" #elif defined(LAMMPS_REAX) -#include "reax_basic_comm.h" -#include "reax_vector.h" + #include "reax_basic_comm.h" + #include "reax_vector.h" #endif -#if defined(PURE_REAX) -void real_packer( void *dummy, mpi_out_data *out_buf ) + +typedef void (*dist_packer)( void*, mpi_out_data* ); +typedef void (*coll_unpacker)( void*, void*, mpi_out_data* ); + + +static void int_packer( void *dummy, mpi_out_data *out_buf ) +{ + int i; + int *buf = (int*) dummy; + int *out = (int*) out_buf->out_atoms; + + for ( i = 0; i < out_buf->cnt; ++i ) + { + //if( buf[ out_buf->index[i] ] !=-1 ) + out[i] = buf[ out_buf->index[i] ]; + } +} + + +static void real_packer( void *dummy, mpi_out_data *out_buf ) { int i; real *buf = (real*) dummy; real *out = (real*) out_buf->out_atoms; for ( i = 0; i < out_buf->cnt; ++i ) + { out[i] = buf[ out_buf->index[i] ]; + } } -void rvec_packer( void *dummy, mpi_out_data *out_buf ) +static void rvec_packer( void *dummy, mpi_out_data *out_buf ) { int i; - rvec *buf = (rvec*) dummy; - rvec *out = (rvec*)out_buf->out_atoms; + rvec *buf, *out; + + buf = (rvec*) dummy; + out = (rvec*) out_buf->out_atoms; for ( i = 0; i < out_buf->cnt; ++i ) - memcpy( out[i], buf[ out_buf->index[i] ], sizeof(rvec) ); + { + memcpy( out + i, buf + out_buf->index[i], sizeof(rvec) ); + } } -void rvec2_packer( void *dummy, mpi_out_data *out_buf ) +static void rvec2_packer( void *dummy, mpi_out_data *out_buf ) { int i; - rvec2 *buf = (rvec2*) dummy; - rvec2 *out = (rvec2*) out_buf->out_atoms; + rvec2 *buf, *out; + + buf = (rvec2*) dummy; + out = (rvec2*) out_buf->out_atoms; for ( i = 0; i < out_buf->cnt; ++i ) - memcpy( out[i], buf[ out_buf->index[i] ], sizeof(rvec2) ); + { + memcpy( out + i, buf + out_buf->index[i], sizeof(rvec2) ); + } +} + + +static void int_unpacker( void *dummy_in, void *dummy_buf, mpi_out_data *out_buf ) +{ + int i; + int *in, *buf; + + in = (int*) dummy_in; + buf = (int*) dummy_buf; + + for ( i = 0; i < out_buf->cnt; ++i ) + { + if( buf[ out_buf->index[i] ] == -1 && in[i] != -1 ) + { + buf[ out_buf->index[i] ] = in[i]; + } + } +} + + +static void real_unpacker( void *dummy_in, void *dummy_buf, mpi_out_data *out_buf ) +{ + int i; + real *in, *buf; + + in = (real*) dummy_in; + buf = (real*) dummy_buf; + + for ( i = 0; i < out_buf->cnt; ++i ) + { + buf[ out_buf->index[i] ] += in[i]; + } +} + + +static void rvec_unpacker( void *dummy_in, void *dummy_buf, mpi_out_data *out_buf ) +{ + int i; + rvec *in, *buf; + + in = (rvec*) dummy_in; + buf = (rvec*) dummy_buf; + + for ( i = 0; i < out_buf->cnt; ++i ) + { + rvec_Add( buf[ out_buf->index[i] ], in[i] ); + +#if defined(DEBUG) + fprintf( stderr, "rvec_unpacker: cnt=%d i =%d index[i]=%d\n", + out_buf->cnt, i, out_buf->index[i] ); +#endif + } +} + + +static void rvec2_unpacker( void *dummy_in, void *dummy_buf, mpi_out_data *out_buf ) +{ + int i; + rvec2 *in, *buf; + + in = (rvec2*) dummy_in; + buf = (rvec2*) dummy_buf; + + for ( i = 0; i < out_buf->cnt; ++i ) + { + buf[ out_buf->index[i] ][0] += in[i][0]; + buf[ out_buf->index[i] ][1] += in[i][1]; + } +} + + +static void * Get_Buffer_Offset( const void * const buffer, + const int offset, const int type ) +{ + void * ptr; + + switch ( type ) + { + case INT_PTR_TYPE: + ptr = (int *) buffer + offset; + break; + + case REAL_PTR_TYPE: + ptr = (real *) buffer + offset; + break; + + case RVEC_PTR_TYPE: + ptr = (rvec *) buffer + offset; + break; + + case RVEC2_PTR_TYPE: + ptr = (rvec2 *) buffer + offset; + break; + + default: + fprintf( stderr, "[ERROR] unknown pointer type. Terminating...\n" ); + exit( UNKNOWN_OPTION ); + break; + } + + return ptr; +} + + +static dist_packer Get_Packer( const int type ) +{ + dist_packer ptr; + + switch ( type ) + { + case INT_PTR_TYPE: + ptr = &int_packer; + break; + + case REAL_PTR_TYPE: + ptr = &real_packer; + break; + + case RVEC_PTR_TYPE: + ptr = &rvec_packer; + break; + + case RVEC2_PTR_TYPE: + ptr = &rvec2_packer; + break; + + default: + fprintf( stderr, "[ERROR] unknown pointer type. Terminating...\n" ); + exit( UNKNOWN_OPTION ); + break; + } + + return ptr; +} + + +static coll_unpacker Get_Unpacker( const int type ) +{ + coll_unpacker ptr; + + switch ( type ) + { + case INT_PTR_TYPE: + ptr = &int_unpacker; + break; + + case REAL_PTR_TYPE: + ptr = &real_unpacker; + break; + + case RVEC_PTR_TYPE: + ptr = &rvec_unpacker; + break; + + case RVEC2_PTR_TYPE: + ptr = &rvec2_unpacker; + break; + + default: + fprintf( stderr, "[ERROR] unknown pointer type. Terminating...\n" ); + exit( UNKNOWN_OPTION ); + break; + } + + return ptr; } -void Dist( reax_system* system, mpi_datatypes *mpi_data, - void *buf, MPI_Datatype type, int scale, dist_packer pack ) +void Dist( const reax_system * const system, mpi_datatypes * const mpi_data, + void *buf, int buf_type, MPI_Datatype type ) { +#if defined(NEUTRAL_TERRITORY) + int d, count, index; + mpi_out_data *out_bufs; + MPI_Comm comm; + MPI_Request req[6]; + MPI_Status stat[6]; + dist_packer pack; + + comm = mpi_data->comm_mesh3D; + out_bufs = mpi_data->out_nt_buffers; + pack = Get_Packer( buf_type ); + count = 0; + + /* initiate recvs */ + for ( d = 0; d < 6; ++d ) + { + if ( system->my_nt_nbrs[d].atoms_cnt ) + { + count++; + MPI_Irecv( Get_Buffer_Offset( buf, system->my_nt_nbrs[d].atoms_str, buf_type ), + system->my_nt_nbrs[d].atoms_cnt, type, + system->my_nt_nbrs[d].receive_rank, d, comm, &req[d] ); + } + } + + for ( d = 0; d < 6; ++d) + { + /* send both messages in dimension d */ + if ( out_bufs[d].cnt ) + { + pack( buf, &out_bufs[d] ); + MPI_Send( out_bufs[d].out_atoms, out_bufs[d].cnt, type, + system->my_nt_nbrs[d].rank, d, comm ); + } + } + + for ( d = 0; d < count; ++d ) + { + MPI_Waitany( REAX_MAX_NT_NBRS, req, &index, stat); + } + +#if defined(DEBUG) + fprintf( stderr, "p%d dist: done\n", system->my_rank ); +#endif + +#else int d; mpi_out_data *out_bufs; MPI_Comm comm; MPI_Request req1, req2; MPI_Status stat1, stat2; - neighbor_proc *nbr1, *nbr2; + const neighbor_proc *nbr1, *nbr2; + dist_packer pack; #if defined(DEBUG) fprintf( stderr, "p%d dist: entered\n", system->my_rank ); #endif + comm = mpi_data->comm_mesh3D; out_bufs = mpi_data->out_buffers; + pack = Get_Packer( buf_type ); for ( d = 0; d < 3; ++d ) { /* initiate recvs */ - nbr1 = &(system->my_nbrs[2 * d]); + nbr1 = &system->my_nbrs[2 * d]; if ( nbr1->atoms_cnt ) - MPI_Irecv( buf + nbr1->atoms_str * scale, nbr1->atoms_cnt, type, - nbr1->rank, 2 * d + 1, comm, &req1 ); + { + MPI_Irecv( Get_Buffer_Offset( buf, nbr1->atoms_str, buf_type ), + nbr1->atoms_cnt, type, nbr1->rank, 2 * d + 1, comm, &req1 ); + } - nbr2 = &(system->my_nbrs[2 * d + 1]); + nbr2 = &system->my_nbrs[2 * d + 1]; if ( nbr2->atoms_cnt ) - MPI_Irecv( buf + nbr2->atoms_str * scale, nbr2->atoms_cnt, type, - nbr2->rank, 2 * d, comm, &req2 ); + { + MPI_Irecv( Get_Buffer_Offset( buf, nbr2->atoms_str, buf_type ), + nbr2->atoms_cnt, type, nbr2->rank, 2 * d, comm, &req2 ); + } /* send both messages in dimension d */ if ( out_bufs[2 * d].cnt ) { - pack( buf, out_bufs + (2 * d) ); - MPI_Send( out_bufs[2 * d].out_atoms, out_bufs[2 * d].cnt, type, - nbr1->rank, 2 * d, comm ); + pack( buf, &out_bufs[2 * d] ); + MPI_Send( out_bufs[2 * d].out_atoms, out_bufs[2 * d].cnt, + type, nbr1->rank, 2 * d, comm ); } if ( out_bufs[2 * d + 1].cnt ) { - pack( buf, out_bufs + (2 * d + 1) ); - MPI_Send( out_bufs[2 * d + 1].out_atoms, out_bufs[2 * d + 1].cnt, type, - nbr2->rank, 2 * d + 1, comm ); + pack( buf, &out_bufs[2 * d + 1] ); + MPI_Send( out_bufs[2 * d + 1].out_atoms, out_bufs[2 * d + 1].cnt, + type, nbr2->rank, 2 * d + 1, comm ); } - if ( nbr1->atoms_cnt ) MPI_Wait( &req1, &stat1 ); - if ( nbr2->atoms_cnt ) MPI_Wait( &req2, &stat2 ); + if( nbr1->atoms_cnt ) + { + MPI_Wait( &req1, &stat1 ); + } + if( nbr2->atoms_cnt ) + { + MPI_Wait( &req2, &stat2 ); + } } + #if defined(DEBUG) fprintf( stderr, "p%d dist: done\n", system->my_rank ); #endif +#endif } -void real_unpacker( void *dummy_in, void *dummy_buf, mpi_out_data *out_buf ) +void Dist_FS( const reax_system * const system, mpi_datatypes * const mpi_data, + void *buf, int buf_type, MPI_Datatype type ) { - int i; - real *in = (real*) dummy_in; - real *buf = (real*) dummy_buf; + int d; + mpi_out_data *out_bufs; + MPI_Comm comm; + MPI_Request req1, req2; + MPI_Status stat1, stat2; + const neighbor_proc *nbr1, *nbr2; + dist_packer pack; - for ( i = 0; i < out_buf->cnt; ++i ) - buf[ out_buf->index[i] ] += in[i]; +#if defined(DEBUG) + fprintf( stderr, "p%d dist: entered\n", system->my_rank ); +#endif + + comm = mpi_data->comm_mesh3D; + out_bufs = mpi_data->out_buffers; + pack = Get_Packer( buf_type ); + + for ( d = 0; d < 3; ++d ) + { + /* initiate recvs */ + nbr1 = &system->my_nbrs[2 * d]; + if ( nbr1->atoms_cnt ) + { + MPI_Irecv( Get_Buffer_Offset( buf, nbr1->atoms_str, buf_type ), + nbr1->atoms_cnt, type, nbr1->rank, 2 * d + 1, comm, &req1 ); + } + + nbr2 = &system->my_nbrs[2 * d + 1]; + if ( nbr2->atoms_cnt ) + { + MPI_Irecv( Get_Buffer_Offset( buf, nbr2->atoms_str, buf_type ), + nbr2->atoms_cnt, type, nbr2->rank, 2 * d, comm, &req2 ); + } + + /* send both messages in dimension d */ + if ( out_bufs[2 * d].cnt ) + { + pack( buf, &out_bufs[2 * d] ); + MPI_Send( out_bufs[2 * d].out_atoms, out_bufs[2 * d].cnt, + type, nbr1->rank, 2 * d, comm ); + } + + if ( out_bufs[2 * d + 1].cnt ) + { + pack( buf, &out_bufs[2 * d + 1] ); + MPI_Send( out_bufs[2 * d + 1].out_atoms, out_bufs[2 * d + 1].cnt, + type, nbr2->rank, 2 * d + 1, comm ); + } + + if( nbr1->atoms_cnt ) + { + MPI_Wait( &req1, &stat1 ); + } + if( nbr2->atoms_cnt ) + { + MPI_Wait( &req2, &stat2 ); + } + } + + +#if defined(DEBUG) + fprintf( stderr, "p%d dist: done\n", system->my_rank ); +#endif } -void rvec_unpacker( void *dummy_in, void *dummy_buf, mpi_out_data *out_buf ) -{ - int i; - rvec *in = (rvec*) dummy_in; - rvec *buf = (rvec*) dummy_buf; +void Coll( const reax_system * const system, mpi_datatypes * const mpi_data, + void *buf, int buf_type, MPI_Datatype type ) +{ +#if defined(NEUTRAL_TERRITORY) + int d, count, index; + void *in[6]; + mpi_out_data *out_bufs; + MPI_Comm comm; + MPI_Request req[6]; + MPI_Status stat[6]; + coll_unpacker unpack; - for ( i = 0; i < out_buf->cnt; ++i ) - { - rvec_Add( buf[ out_buf->index[i] ], in[i] ); #if defined(DEBUG) - fprintf( stderr, "rvec_unpacker: cnt=%d i =%d index[i]=%d\n", - out_buf->cnt, i, out_buf->index[i] ); + fprintf( stderr, "p%d coll: entered\n", system->my_rank ); #endif + + comm = mpi_data->comm_mesh3D; + out_bufs = mpi_data->out_nt_buffers; + unpack = Get_Unpacker( buf_type ); + count = 0; + + for ( d = 0; d < 6; ++d ) + { + in[d] = mpi_data->in_nt_buffer[d]; + + if ( out_bufs[d].cnt ) + { + count++; + MPI_Irecv( in[d], out_bufs[d].cnt, type, + system->my_nt_nbrs[d].rank, d, comm, &req[d] ); + } } -} + for ( d = 0; d < 6; ++d ) + { + /* send both messages in direction d */ + if ( system->my_nt_nbrs[d].atoms_cnt ) + { + MPI_Send( Get_Buffer_Offset( buf, system->my_nt_nbrs[d].atoms_str, buf_type ), + system->my_nt_nbrs[d].atoms_cnt, type, + system->my_nt_nbrs[d].receive_rank, d, comm ); + } + } + + for ( d = 0; d < count; ++d ) + { + MPI_Waitany( REAX_MAX_NT_NBRS, req, &index, stat); + unpack( in[index], buf, &out_bufs[index] ); + } -void rvec2_unpacker( void *dummy_in, void *dummy_buf, mpi_out_data *out_buf ) -{ - int i; - rvec2 *in = (rvec2*) dummy_in; - rvec2 *buf = (rvec2*) dummy_buf; +#if defined(DEBUG) + fprintf( stderr, "p%d coll: done\n", system->my_rank ); +#endif - for ( i = 0; i < out_buf->cnt; ++i ) +#else + int d; + mpi_out_data *out_bufs; + MPI_Comm comm; + MPI_Request req1, req2; + MPI_Status stat1, stat2; + const neighbor_proc *nbr1, *nbr2; + coll_unpacker unpack; + +#if defined(DEBUG) + fprintf( stderr, "p%d coll: entered\n", system->my_rank ); +#endif + + comm = mpi_data->comm_mesh3D; + out_bufs = mpi_data->out_buffers; + unpack = Get_Unpacker( buf_type ); + + for ( d = 2; d >= 0; --d ) { - buf[ out_buf->index[i] ][0] += in[i][0]; - buf[ out_buf->index[i] ][1] += in[i][1]; + /* initiate recvs */ + nbr1 = &system->my_nbrs[2 * d]; + + if ( out_bufs[2 * d].cnt ) + { + MPI_Irecv( mpi_data->in1_buffer, out_bufs[2 * d].cnt, + type, nbr1->rank, 2 * d + 1, comm, &req1 ); + } + + nbr2 = &system->my_nbrs[2 * d + 1]; + + if ( out_bufs[2 * d + 1].cnt ) + { + + MPI_Irecv( mpi_data->in2_buffer, out_bufs[2 * d + 1].cnt, + type, nbr2->rank, 2 * d, comm, &req2 ); + } + + /* send both messages in dimension d */ + if ( nbr1->atoms_cnt ) + { + MPI_Send( Get_Buffer_Offset( buf, nbr1->atoms_str, buf_type ), + nbr1->atoms_cnt, type, nbr1->rank, 2 * d, comm ); + } + + if ( nbr2->atoms_cnt ) + { + MPI_Send( Get_Buffer_Offset( buf, nbr2->atoms_str, buf_type ), + nbr2->atoms_cnt, type, nbr2->rank, 2 * d + 1, comm ); + } + +#if defined(DEBUG) + fprintf( stderr, "p%d coll[%d] nbr1: str=%d cnt=%d recv=%d\n", + system->my_rank, d, nbr1->atoms_str, nbr1->atoms_cnt, + out_bufs[2 * d].cnt ); + fprintf( stderr, "p%d coll[%d] nbr2: str=%d cnt=%d recv=%d\n", + system->my_rank, d, nbr2->atoms_str, nbr2->atoms_cnt, + out_bufs[2 * d + 1].cnt ); +#endif + + if ( out_bufs[2 * d].cnt ) + { + MPI_Wait( &req1, &stat1 ); + unpack( mpi_data->in1_buffer, buf, &out_bufs[2 * d] ); + } + + if ( out_bufs[2 * d + 1].cnt ) + { + MPI_Wait( &req2, &stat2 ); + unpack( mpi_data->in2_buffer, buf, &out_bufs[2 * d + 1] ); + } } + +#if defined(DEBUG) + fprintf( stderr, "p%d coll: done\n", system->my_rank ); +#endif +#endif } -void Coll( reax_system* system, mpi_datatypes *mpi_data, - void *buf, MPI_Datatype type, int scale, coll_unpacker unpack ) -{ +void Coll_FS( const reax_system * const system, mpi_datatypes * const mpi_data, + void *buf, int buf_type, MPI_Datatype type ) +{ int d; - void *in1, *in2; mpi_out_data *out_bufs; MPI_Comm comm; MPI_Request req1, req2; MPI_Status stat1, stat2; - neighbor_proc *nbr1, *nbr2; + const neighbor_proc *nbr1, *nbr2; + coll_unpacker unpack; #if defined(DEBUG) fprintf( stderr, "p%d coll: entered\n", system->my_rank ); #endif + comm = mpi_data->comm_mesh3D; - in1 = mpi_data->in1_buffer; - in2 = mpi_data->in2_buffer; out_bufs = mpi_data->out_buffers; + unpack = Get_Unpacker( buf_type ); for ( d = 2; d >= 0; --d ) { /* initiate recvs */ - nbr1 = &(system->my_nbrs[2 * d]); + nbr1 = &system->my_nbrs[2 * d]; + if ( out_bufs[2 * d].cnt ) - MPI_Irecv(in1, out_bufs[2 * d].cnt, type, nbr1->rank, 2 * d + 1, comm, &req1); + { + MPI_Irecv( mpi_data->in1_buffer, out_bufs[2 * d].cnt, + type, nbr1->rank, 2 * d + 1, comm, &req1 ); + } + + nbr2 = &system->my_nbrs[2 * d + 1]; - nbr2 = &(system->my_nbrs[2 * d + 1]); if ( out_bufs[2 * d + 1].cnt ) - MPI_Irecv(in2, out_bufs[2 * d + 1].cnt, type, nbr2->rank, 2 * d, comm, &req2); + { + MPI_Irecv( mpi_data->in2_buffer, out_bufs[2 * d + 1].cnt, + type, nbr2->rank, 2 * d, comm, &req2 ); + } + /* send both messages in dimension d */ if ( nbr1->atoms_cnt ) - MPI_Send( buf + nbr1->atoms_str * scale, nbr1->atoms_cnt, type, - nbr1->rank, 2 * d, comm ); - + { + MPI_Send( Get_Buffer_Offset( buf, nbr1->atoms_str, buf_type ), + nbr1->atoms_cnt, type, nbr1->rank, 2 * d, comm ); + } + if ( nbr2->atoms_cnt ) - MPI_Send( buf + nbr2->atoms_str * scale, nbr2->atoms_cnt, type, - nbr2->rank, 2 * d + 1, comm ); + { + MPI_Send( Get_Buffer_Offset( buf, nbr2->atoms_str, buf_type ), + nbr2->atoms_cnt, type, nbr2->rank, 2 * d + 1, comm ); + } #if defined(DEBUG) fprintf( stderr, "p%d coll[%d] nbr1: str=%d cnt=%d recv=%d\n", - system->my_rank, d, nbr1->atoms_str, nbr1->atoms_cnt, - out_bufs[2 * d].cnt ); + system->my_rank, d, nbr1->atoms_str, nbr1->atoms_cnt, + out_bufs[2 * d].cnt ); fprintf( stderr, "p%d coll[%d] nbr2: str=%d cnt=%d recv=%d\n", - system->my_rank, d, nbr2->atoms_str, nbr2->atoms_cnt, - out_bufs[2 * d + 1].cnt ); + system->my_rank, d, nbr2->atoms_str, nbr2->atoms_cnt, + out_bufs[2 * d + 1].cnt ); #endif if ( out_bufs[2 * d].cnt ) { MPI_Wait( &req1, &stat1 ); - unpack( in1, buf, out_bufs + (2 * d) ); + unpack( mpi_data->in1_buffer, buf, &out_bufs[2 * d] ); } if ( out_bufs[2 * d + 1].cnt ) { MPI_Wait( &req2, &stat2 ); - unpack( in2, buf, out_bufs + (2 * d + 1) ); + unpack( mpi_data->in2_buffer, buf, &out_bufs[2 * d + 1] ); } } @@ -223,17 +651,20 @@ void Coll( reax_system* system, mpi_datatypes *mpi_data, fprintf( stderr, "p%d coll: done\n", system->my_rank ); #endif } -#endif /*PURE_REAX*/ + /*****************************************************************************/ real Parallel_Norm( real *v, int n, MPI_Comm comm ) { - int i; + int i; real my_sum, norm_sqr; - my_sum = 0; + my_sum = 0.0; + for ( i = 0; i < n; ++i ) + { my_sum += SQR( v[i] ); + } MPI_Allreduce( &my_sum, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, comm ); @@ -241,15 +672,17 @@ real Parallel_Norm( real *v, int n, MPI_Comm comm ) } - real Parallel_Dot( real *v1, real *v2, int n, MPI_Comm comm ) { int i; real my_dot, res; - my_dot = 0; + my_dot = 0.0; + for ( i = 0; i < n; ++i ) + { my_dot += v1[i] * v2[i]; + } MPI_Allreduce( &my_dot, &res, 1, MPI_DOUBLE, MPI_SUM, comm ); @@ -257,7 +690,6 @@ real Parallel_Dot( real *v1, real *v2, int n, MPI_Comm comm ) } - real Parallel_Vector_Acc( real *v, int n, MPI_Comm comm ) { int i; @@ -276,13 +708,13 @@ real Parallel_Vector_Acc( real *v, int n, MPI_Comm comm ) /*****************************************************************************/ #if defined(TEST_FORCES) void Coll_ids_at_Master( reax_system *system, storage *workspace, - mpi_datatypes *mpi_data ) + mpi_datatypes *mpi_data ) { int i; int *id_list; MPI_Gather( &system->n, 1, MPI_INT, workspace->rcounts, 1, MPI_INT, - MASTER_NODE, mpi_data->world ); + MASTER_NODE, mpi_data->world ); if ( system->my_rank == MASTER_NODE ) { @@ -296,8 +728,8 @@ void Coll_ids_at_Master( reax_system *system, storage *workspace, id_list[i] = system->my_atoms[i].orig_id; MPI_Gatherv( id_list, system->n, MPI_INT, - workspace->id_all, workspace->rcounts, workspace->displs, - MPI_INT, MASTER_NODE, mpi_data->world ); + workspace->id_all, workspace->rcounts, workspace->displs, + MPI_INT, MASTER_NODE, mpi_data->world ); sfree( id_list, "id_list" ); @@ -312,11 +744,10 @@ void Coll_ids_at_Master( reax_system *system, storage *workspace, void Coll_rvecs_at_Master( reax_system *system, storage *workspace, - mpi_datatypes *mpi_data, rvec* v ) + mpi_datatypes *mpi_data, rvec* v ) { MPI_Gatherv( v, system->n, mpi_data->mpi_rvec, - workspace->f_all, workspace->rcounts, workspace->displs, - mpi_data->mpi_rvec, MASTER_NODE, mpi_data->world ); + workspace->f_all, workspace->rcounts, workspace->displs, + mpi_data->mpi_rvec, MASTER_NODE, mpi_data->world ); } - #endif diff --git a/PuReMD/src/basic_comm.h b/PuReMD/src/basic_comm.h index b3d7a5222c786f8c71662547e3a36a77abf8fb92..e2fe70903a15f4dd61423cf14fe4b557d6802f6d 100644 --- a/PuReMD/src/basic_comm.h +++ b/PuReMD/src/basic_comm.h @@ -24,24 +24,39 @@ #include "reax_types.h" -void real_packer( void*, mpi_out_data* ); -void rvec_packer( void*, mpi_out_data* ); -void rvec2_packer( void*, mpi_out_data* ); -void Dist(reax_system*, mpi_datatypes*, void*, MPI_Datatype, int, dist_packer); -void real_unpacker( void*, void*, mpi_out_data* ); -void rvec_unpacker( void*, void*, mpi_out_data* ); -void rvec2_unpacker( void*, void*, mpi_out_data* ); -void Coll( reax_system*, mpi_datatypes*, void*, MPI_Datatype, - int, coll_unpacker ); +enum pointer_type +{ + INT_PTR_TYPE = 0, + REAL_PTR_TYPE = 1, + RVEC_PTR_TYPE = 2, + RVEC2_PTR_TYPE = 3, +}; + + +void Dist( const reax_system * const, mpi_datatypes * const, + void*, int, MPI_Datatype ); + +void Dist_FS( const reax_system * const, mpi_datatypes * const, + void*, int, MPI_Datatype ); + +void Coll( const reax_system * const, mpi_datatypes * const, + void*, int, MPI_Datatype ); + +void Coll_FS( const reax_system * const, mpi_datatypes * const, + void*, int, MPI_Datatype ); real Parallel_Norm( real*, int, MPI_Comm ); + real Parallel_Dot( real*, real*, int, MPI_Comm ); + real Parallel_Vector_Acc( real*, int, MPI_Comm ); #if defined(TEST_FORCES) void Coll_ids_at_Master( reax_system*, storage*, mpi_datatypes* ); + void Coll_rvecs_at_Master( reax_system*, storage*, mpi_datatypes*, rvec* ); #endif + #endif diff --git a/PuReMD/src/bond_orders.c b/PuReMD/src/bond_orders.c index cf6c69911f889bb528ef32c39e6542960ba0d7e0..0c5950e5b28c28a82591288b2ab0a3be82fbd0ec 100644 --- a/PuReMD/src/bond_orders.c +++ b/PuReMD/src/bond_orders.c @@ -662,41 +662,58 @@ void Add_dBond_to_Forces( int i, int pj, } +/* Compute the bond order term between atoms i and j, + * and if this term exceeds the cutoff bo_cut, then adds + * BOTH atoms the bonds list (i.e., compute term once + * and copy to avoid redundant computation) */ int BOp( storage *workspace, reax_list *bonds, real bo_cut, - int i, int btop_i, far_neighbor_data *nbr_pj, - single_body_parameters *sbp_i, single_body_parameters *sbp_j, - two_body_parameters *twbp ) + int i, int btop_i, int j, ivec *rel_box, real d, rvec *dvec, + int far_nbr_list_format, single_body_parameters *sbp_i, + single_body_parameters *sbp_j, two_body_parameters *twbp ) { - int j, btop_j; real r2, C12, C34, C56; real Cln_BOp_s, Cln_BOp_pi, Cln_BOp_pi2; real BO, BO_s, BO_pi, BO_pi2; - bond_data *ibond, *jbond; - bond_order_data *bo_ij, *bo_ji; + bond_data *ibond; + bond_order_data *bo_ij; + int btop_j; + bond_data *jbond; + bond_order_data *bo_ji; - j = nbr_pj->nbr; - r2 = SQR(nbr_pj->d); + r2 = SQR(d); if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0 ) { - C12 = twbp->p_bo1 * pow( nbr_pj->d / twbp->r_s, twbp->p_bo2 ); + C12 = twbp->p_bo1 * pow( d / twbp->r_s, twbp->p_bo2 ); BO_s = (1.0 + bo_cut) * exp( C12 ); } - else BO_s = C12 = 0.0; + else + { + C12 = 0.0; + BO_s = 0.0; + } if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0 ) { - C34 = twbp->p_bo3 * pow( nbr_pj->d / twbp->r_p, twbp->p_bo4 ); + C34 = twbp->p_bo3 * pow( d / twbp->r_p, twbp->p_bo4 ); BO_pi = exp( C34 ); } - else BO_pi = C34 = 0.0; + else + { + C34 = 0.0; + BO_pi = 0.0; + } if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0 ) { - C56 = twbp->p_bo5 * pow( nbr_pj->d / twbp->r_pp, twbp->p_bo6 ); + C56 = twbp->p_bo5 * pow( d / twbp->r_pp, twbp->p_bo6 ); BO_pi2 = exp( C56 ); } - else BO_pi2 = C56 = 0.0; + else + { + C56 = 0.0; + BO_pi2 = 0.0; + } /* Initially BO values are the uncorrected ones, page 1 */ BO = BO_s + BO_pi + BO_pi2; @@ -704,30 +721,35 @@ int BOp( storage *workspace, reax_list *bonds, real bo_cut, if ( BO >= bo_cut ) { /****** bonds i-j and j-i ******/ - ibond = &( bonds->bond_list[btop_i] ); + ibond = &bonds->bond_list[btop_i]; btop_j = End_Index( j, bonds ); - jbond = &(bonds->bond_list[btop_j]); + jbond = &bonds->bond_list[btop_j]; ibond->nbr = j; - jbond->nbr = i; - ibond->d = nbr_pj->d; - jbond->d = nbr_pj->d; - rvec_Copy( ibond->dvec, nbr_pj->dvec ); - rvec_Scale( jbond->dvec, -1, nbr_pj->dvec ); - ivec_Copy( ibond->rel_box, nbr_pj->rel_box ); - ivec_Scale( jbond->rel_box, -1, nbr_pj->rel_box ); + ibond->d = d; + rvec_Copy( ibond->dvec, *dvec ); + ivec_Copy( ibond->rel_box, *rel_box ); ibond->dbond_index = btop_i; - jbond->dbond_index = btop_i; ibond->sym_index = btop_j; + jbond->nbr = i; + jbond->d = d; + rvec_Scale( jbond->dvec, -1.0, *dvec ); + ivec_Scale( jbond->rel_box, -1.0, *rel_box ); + jbond->dbond_index = btop_i; jbond->sym_index = btop_i; - Set_End_Index( j, btop_j + 1, bonds ); - bo_ij = &( ibond->bo_data ); - bo_ji = &( jbond->bo_data ); - bo_ji->BO = bo_ij->BO = BO; - bo_ji->BO_s = bo_ij->BO_s = BO_s; - bo_ji->BO_pi = bo_ij->BO_pi = BO_pi; - bo_ji->BO_pi2 = bo_ij->BO_pi2 = BO_pi2; + Set_End_Index( j, btop_j + 1, bonds ); + + bo_ij = &ibond->bo_data; + bo_ij->BO = BO; + bo_ij->BO_s = BO_s; + bo_ij->BO_pi = BO_pi; + bo_ij->BO_pi2 = BO_pi2; + bo_ji = &jbond->bo_data; + bo_ji->BO = BO; + bo_ji->BO_s = BO_s; + bo_ji->BO_pi = BO_pi; + bo_ji->BO_pi2 = BO_pi2; /* Bond Order page2-3, derivative of total bond order prime */ Cln_BOp_s = twbp->p_bo2 * C12 / r2; @@ -735,63 +757,193 @@ int BOp( storage *workspace, reax_list *bonds, real bo_cut, Cln_BOp_pi2 = twbp->p_bo6 * C56 / r2; /* Only dln_BOp_xx wrt. dr_i is stored here, note that - dln_BOp_xx/dr_i = -dln_BOp_xx/dr_j and all others are 0 */ - rvec_Scale(bo_ij->dln_BOp_s, -bo_ij->BO_s * Cln_BOp_s, ibond->dvec); - rvec_Scale(bo_ij->dln_BOp_pi, -bo_ij->BO_pi * Cln_BOp_pi, ibond->dvec); - rvec_Scale(bo_ij->dln_BOp_pi2, - -bo_ij->BO_pi2 * Cln_BOp_pi2, ibond->dvec); - rvec_Scale(bo_ji->dln_BOp_s, -1., bo_ij->dln_BOp_s); - rvec_Scale(bo_ji->dln_BOp_pi, -1., bo_ij->dln_BOp_pi ); - rvec_Scale(bo_ji->dln_BOp_pi2, -1., bo_ij->dln_BOp_pi2 ); + * dln_BOp_xx/dr_i = -dln_BOp_xx/dr_j and all others are 0 */ + rvec_Scale( bo_ij->dln_BOp_s, -1.0 * bo_ij->BO_s * Cln_BOp_s, ibond->dvec ); + rvec_Scale( bo_ij->dln_BOp_pi, -1.0 * bo_ij->BO_pi * Cln_BOp_pi, ibond->dvec ); + rvec_Scale( bo_ij->dln_BOp_pi2, -1.0 * bo_ij->BO_pi2 * Cln_BOp_pi2, ibond->dvec ); + rvec_Scale( bo_ji->dln_BOp_s, -1.0, bo_ij->dln_BOp_s ); + rvec_Scale( bo_ji->dln_BOp_pi, -1.0, bo_ij->dln_BOp_pi ); + rvec_Scale( bo_ji->dln_BOp_pi2, -1.0, bo_ij->dln_BOp_pi2 ); /* Only dBOp wrt. dr_i is stored here, note that - dBOp/dr_i = -dBOp/dr_j and all others are 0 */ - rvec_Scale( bo_ij->dBOp, - -(bo_ij->BO_s * Cln_BOp_s + - bo_ij->BO_pi * Cln_BOp_pi + - bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec ); - rvec_Scale( bo_ji->dBOp, -1., bo_ij->dBOp ); + * dBOp/dr_i = -dBOp/dr_j and all others are 0 */ + rvec_Scale( bo_ij->dBOp, -1.0 * (bo_ij->BO_s * Cln_BOp_s + + bo_ij->BO_pi * Cln_BOp_pi + + bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec ); + rvec_Scale( bo_ji->dBOp, -1.0, bo_ij->dBOp ); rvec_Add( workspace->dDeltap_self[i], bo_ij->dBOp ); rvec_Add( workspace->dDeltap_self[j], bo_ji->dBOp ); bo_ij->BO_s -= bo_cut; bo_ij->BO -= bo_cut; + workspace->total_bond_order[i] += bo_ij->BO; //currently total_BOp + bo_ij->Cdbo = 0.0; + bo_ij->Cdbopi = 0.0; + bo_ij->Cdbopi2 = 0.0; bo_ji->BO_s -= bo_cut; bo_ji->BO -= bo_cut; - workspace->total_bond_order[i] += bo_ij->BO; //currently total_BOp workspace->total_bond_order[j] += bo_ji->BO; //currently total_BOp - bo_ij->Cdbo = bo_ij->Cdbopi = bo_ij->Cdbopi2 = 0.0; - bo_ji->Cdbo = bo_ji->Cdbopi = bo_ji->Cdbopi2 = 0.0; - - /*fprintf( stderr, "%d %d %g %g %g\n", - i+1, j+1, bo_ij->BO, bo_ij->BO_pi, bo_ij->BO_pi2 );*/ - - /*fprintf( stderr, "Cln_BOp_s: %f, pbo2: %f, C12:%f\n", - Cln_BOp_s, twbp->p_bo2, C12 ); - fprintf( stderr, "Cln_BOp_pi: %f, pbo4: %f, C34:%f\n", - Cln_BOp_pi, twbp->p_bo4, C34 ); - fprintf( stderr, "Cln_BOp_pi2: %f, pbo6: %f, C56:%f\n", - Cln_BOp_pi2, twbp->p_bo6, C56 );*/ - /*fprintf(stderr, "pbo1: %f, pbo2:%f\n", twbp->p_bo1, twbp->p_bo2); - fprintf(stderr, "pbo3: %f, pbo4:%f\n", twbp->p_bo3, twbp->p_bo4); - fprintf(stderr, "pbo5: %f, pbo6:%f\n", twbp->p_bo5, twbp->p_bo6); - fprintf( stderr, "r_s: %f, r_p: %f, r_pp: %f\n", - twbp->r_s, twbp->r_p, twbp->r_pp ); - fprintf( stderr, "C12: %g, C34:%g, C56:%g\n", C12, C34, C56 );*/ - - /*fprintf( stderr, "\tfactors: %g %g %g\n", - -(bo_ij->BO_s * Cln_BOp_s + bo_ij->BO_pi * Cln_BOp_pi + - bo_ij->BO_pi2 * Cln_BOp_pp), - -bo_ij->BO_pi * Cln_BOp_pi, -bo_ij->BO_pi2 * Cln_BOp_pi2 );*/ - /*fprintf( stderr, "dBOpi:\t[%g, %g, %g]\n", - bo_ij->dBOp[0], bo_ij->dBOp[1], bo_ij->dBOp[2] ); - fprintf( stderr, "dBOpi:\t[%g, %g, %g]\n", - bo_ij->dln_BOp_pi[0], bo_ij->dln_BOp_pi[1], - bo_ij->dln_BOp_pi[2] ); - fprintf( stderr, "dBOpi2:\t[%g, %g, %g]\n\n", - bo_ij->dln_BOp_pi2[0], bo_ij->dln_BOp_pi2[1], - bo_ij->dln_BOp_pi2[2] );*/ + bo_ji->Cdbo = 0.0; + bo_ji->Cdbopi = 0.0; + bo_ji->Cdbopi2 = 0.0; + + return 1; + } + + return 0; +} + + +/* Compute the bond order term between atoms i and j, + * and if this term exceeds the cutoff bo_cut, then adds + * to the bond list according to the following convention: + * * if the far neighbor list is store in half format, + * add BOTH atoms to each other's portion of the bond list + * * if the far neighbor list is store in full format, + * add atom i to atom j's bonds list ONLY */ +int BOp_redundant( storage *workspace, reax_list *bonds, real bo_cut, + int i, int btop_i, int j, ivec *rel_box, real d, rvec *dvec, + int far_nbr_list_format, single_body_parameters *sbp_i, + single_body_parameters *sbp_j, two_body_parameters *twbp ) +{ + real r2, C12, C34, C56; + real Cln_BOp_s, Cln_BOp_pi, Cln_BOp_pi2; + real BO, BO_s, BO_pi, BO_pi2; + bond_data *ibond; + bond_order_data *bo_ij; + int btop_j; + bond_data *jbond; + bond_order_data *bo_ji; + + r2 = SQR(d); + + if ( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0 ) + { + C12 = twbp->p_bo1 * pow( d / twbp->r_s, twbp->p_bo2 ); + BO_s = (1.0 + bo_cut) * exp( C12 ); + } + else + { + C12 = 0.0; + BO_s = 0.0; + } + + if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0 ) + { + C34 = twbp->p_bo3 * pow( d / twbp->r_p, twbp->p_bo4 ); + BO_pi = exp( C34 ); + } + else + { + C34 = 0.0; + BO_pi = 0.0; + } + + if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0 ) + { + C56 = twbp->p_bo5 * pow( d / twbp->r_pp, twbp->p_bo6 ); + BO_pi2 = exp( C56 ); + } + else + { + C56 = 0.0; + BO_pi2 = 0.0; + } + + /* Initially BO values are the uncorrected ones, page 1 */ + BO = BO_s + BO_pi + BO_pi2; + + if ( BO >= bo_cut ) + { + /****** bonds i-j and j-i ******/ + ibond = &bonds->bond_list[btop_i]; + if ( far_nbr_list_format == HALF_LIST ) + { + btop_j = End_Index( j, bonds ); + jbond = &bonds->bond_list[btop_j]; + } + + ibond->nbr = j; + ibond->d = d; + rvec_Copy( ibond->dvec, *dvec ); + ivec_Copy( ibond->rel_box, *rel_box ); + ibond->dbond_index = btop_i; + if ( far_nbr_list_format == HALF_LIST ) + { + ibond->sym_index = btop_j; + jbond->nbr = i; + jbond->d = d; + rvec_Scale( jbond->dvec, -1.0, *dvec ); + ivec_Scale( jbond->rel_box, -1.0, *rel_box ); + jbond->dbond_index = btop_i; + jbond->sym_index = btop_i; + + Set_End_Index( j, btop_j + 1, bonds ); + } + + bo_ij = &ibond->bo_data; + bo_ij->BO = BO; + bo_ij->BO_s = BO_s; + bo_ij->BO_pi = BO_pi; + bo_ij->BO_pi2 = BO_pi2; + if ( far_nbr_list_format == HALF_LIST ) + { + bo_ji = &jbond->bo_data; + bo_ji->BO = BO; + bo_ji->BO_s = BO_s; + bo_ji->BO_pi = BO_pi; + bo_ji->BO_pi2 = BO_pi2; + } + + /* Bond Order page2-3, derivative of total bond order prime */ + Cln_BOp_s = twbp->p_bo2 * C12 / r2; + Cln_BOp_pi = twbp->p_bo4 * C34 / r2; + Cln_BOp_pi2 = twbp->p_bo6 * C56 / r2; + + /* Only dln_BOp_xx wrt. dr_i is stored here, note that + * dln_BOp_xx/dr_i = -dln_BOp_xx/dr_j and all others are 0 */ + rvec_Scale( bo_ij->dln_BOp_s, -1.0 * bo_ij->BO_s * Cln_BOp_s, ibond->dvec ); + rvec_Scale( bo_ij->dln_BOp_pi, -1.0 * bo_ij->BO_pi * Cln_BOp_pi, ibond->dvec ); + rvec_Scale( bo_ij->dln_BOp_pi2, -1.0 * bo_ij->BO_pi2 * Cln_BOp_pi2, ibond->dvec ); + if ( far_nbr_list_format == HALF_LIST ) + { + rvec_Scale( bo_ji->dln_BOp_s, -1.0, bo_ij->dln_BOp_s ); + rvec_Scale( bo_ji->dln_BOp_pi, -1.0, bo_ij->dln_BOp_pi ); + rvec_Scale( bo_ji->dln_BOp_pi2, -1.0, bo_ij->dln_BOp_pi2 ); + } + + /* Only dBOp wrt. dr_i is stored here, note that + * dBOp/dr_i = -dBOp/dr_j and all others are 0 */ + rvec_Scale( bo_ij->dBOp, -1.0 * (bo_ij->BO_s * Cln_BOp_s + + bo_ij->BO_pi * Cln_BOp_pi + + bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec ); + if ( far_nbr_list_format == HALF_LIST ) + { + rvec_Scale( bo_ji->dBOp, -1.0, bo_ij->dBOp ); + } + + rvec_Add( workspace->dDeltap_self[i], bo_ij->dBOp ); + if ( far_nbr_list_format == HALF_LIST ) + { + rvec_Add( workspace->dDeltap_self[j], bo_ji->dBOp ); + } + + bo_ij->BO_s -= bo_cut; + bo_ij->BO -= bo_cut; + workspace->total_bond_order[i] += bo_ij->BO; //currently total_BOp + bo_ij->Cdbo = 0.0; + bo_ij->Cdbopi = 0.0; + bo_ij->Cdbopi2 = 0.0; + if ( far_nbr_list_format == HALF_LIST ) + { + bo_ji->BO_s -= bo_cut; + bo_ji->BO -= bo_cut; + workspace->total_bond_order[j] += bo_ji->BO; //currently total_BOp + bo_ji->Cdbo = 0.0; + bo_ji->Cdbopi = 0.0; + bo_ji->Cdbopi2 = 0.0; + } return 1; } @@ -800,7 +952,7 @@ int BOp( storage *workspace, reax_list *bonds, real bo_cut, } -int compare_bonds( const void *p1, const void *p2 ) +static int compare_bonds( const void *p1, const void *p2 ) { return ((bond_data *)p1)->nbr - ((bond_data *)p2)->nbr; } diff --git a/PuReMD/src/bond_orders.h b/PuReMD/src/bond_orders.h index 1975e20b6320a003b08527fae665dbd0bbc3c2e4..fcf4d71a24a6acbe90e49d5ad087cb4da973f51a 100644 --- a/PuReMD/src/bond_orders.h +++ b/PuReMD/src/bond_orders.h @@ -24,6 +24,7 @@ #include "reax_types.h" + typedef struct { real C1dbo, C2dbo, C3dbo; @@ -32,28 +33,42 @@ typedef struct real C1dDelta, C2dDelta, C3dDelta; } dbond_coefficients; + #ifdef TEST_FORCES void Get_dBO( reax_system*, reax_list**, int, int, real, rvec* ); + void Get_dBOpinpi2( reax_system*, reax_list**, - int, int, real, real, rvec*, rvec* ); + int, int, real, real, rvec*, rvec* ); void Add_dBO( reax_system*, reax_list**, int, int, real, rvec* ); + void Add_dBOpinpi2( reax_system*, reax_list**, - int, int, real, real, rvec*, rvec* ); + int, int, real, real, rvec*, rvec* ); void Add_dBO_to_Forces( reax_system*, reax_list**, int, int, real ); + void Add_dBOpinpi2_to_Forces( reax_system*, reax_list**, - int, int, real, real ); + int, int, real, real ); void Add_dDelta( reax_system*, reax_list**, int, real, rvec* ); + void Add_dDelta_to_Forces( reax_system *, reax_list**, int, real ); #endif void Add_dBond_to_Forces( int, int, storage*, reax_list** ); + void Add_dBond_to_Forces_NPT( int, int, simulation_data*, - storage*, reax_list** ); -int BOp(storage*, reax_list*, real, int, int, far_neighbor_data*, - single_body_parameters*, single_body_parameters*, two_body_parameters*); + storage*, reax_list** ); + +int BOp( storage*, reax_list*, real, int, int, int, ivec*, real, rvec*, + int, single_body_parameters*, single_body_parameters*, + two_body_parameters* ); + +int BOp_redundant( storage*, reax_list*, real, int, int, int, ivec*, real, rvec*, + int, single_body_parameters*, single_body_parameters*, + two_body_parameters* ); + void BO( reax_system*, control_params*, simulation_data*, storage*, reax_list**, output_controls* ); + #endif diff --git a/PuReMD/src/box.c b/PuReMD/src/box.c index c5f18cb35c6305da91536e9b7a26e1788aa83cde..d81a96db80b21fceabe13fc3b518f932c13320af 100644 --- a/PuReMD/src/box.c +++ b/PuReMD/src/box.c @@ -285,6 +285,10 @@ void Setup_Environment( reax_system *system, control_params *control, Setup_My_Box( system, control ); Setup_My_Ext_Box( system, control ); Setup_Comm( system, control, mpi_data ); +#if defined(NEUTRAL_TERRITORY) + Setup_NT_Comm( system, control, mpi_data ); +#endif + #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d coord: %d %d %d\n", system->my_rank, diff --git a/PuReMD/src/comm_tools.c b/PuReMD/src/comm_tools.c index 8419e3efe44377e7d1680fe09b2bc06ef5756ca6..52b8c98618b19e626b71757febd6e795fddfaea0 100644 --- a/PuReMD/src/comm_tools.c +++ b/PuReMD/src/comm_tools.c @@ -25,6 +25,195 @@ #include "tool_box.h" #include "vector.h" +#if defined(NEUTRAL_TERRITORY) +void Setup_NT_Comm( reax_system* system, control_params* control, + mpi_datatypes *mpi_data ) +{ + int i, d; + real bndry_cut; + neighbor_proc *nbr_pr; + simulation_box *my_box; + ivec nbr_coords, nbr_recv_coords; + ivec r[12] = { + {0, 0, -1}, // -z + {0, 0, +1}, // +z + {0, -1, 0}, // -y + {-1, -1, 0}, // -x-y + {-1, 0, 0}, // -x + {-1, +1, 0}, // -x+y + + {0, 0, +1}, // +z + {0, 0, -1}, // -z + {0, +1, 0}, // +y + {+1, +1, 0}, // +x+y + {+1, 0, 0}, // +x + {+1, -1, 0} // +x-y + }; + my_box = &system->my_box; + bndry_cut = system->bndry_cuts.ghost_cutoff; + system->num_nt_nbrs = REAX_MAX_NT_NBRS; + + /* identify my neighbors */ + for ( i = 0; i < system->num_nt_nbrs; ++i ) + { + nbr_pr = &system->my_nt_nbrs[i]; + ivec_Sum( nbr_coords, system->my_coords, r[i] ); /* actual nbr coords */ + MPI_Cart_rank( mpi_data->comm_mesh3D, nbr_coords, &nbr_pr->rank ); + + /* set the rank of the neighbor processor in the receiving direction */ + ivec_Sum( nbr_recv_coords, system->my_coords, r[i + 6] ); /* actual nbr coords */ + MPI_Cart_rank( mpi_data->comm_mesh3D, nbr_recv_coords, &nbr_pr->receive_rank ); + + for ( d = 0; d < 3; ++d ) + { + /* determine the boundary area with this nbr */ + if ( r[i][d] < 0 ) + { + nbr_pr->bndry_min[d] = my_box->min[d]; + nbr_pr->bndry_max[d] = my_box->min[d] + bndry_cut; + } + else if ( r[i][d] > 0 ) + { + nbr_pr->bndry_min[d] = my_box->max[d] - bndry_cut; + nbr_pr->bndry_max[d] = my_box->max[d]; + } + else + { + nbr_pr->bndry_min[d] = my_box->min[d]; + nbr_pr->bndry_max[d] = my_box->max[d]; + } + + /* determine if it is a periodic neighbor */ + if ( nbr_coords[d] < 0 ) + { + nbr_pr->prdc[d] = -1; + } + else if ( nbr_coords[d] >= control->procs_by_dim[d] ) + { + nbr_pr->prdc[d] = 1; + } + else + { + nbr_pr->prdc[d] = 0; + } + } + + } +} +#endif + + +#if defined(NEUTRAL_TERRITORY) +int Sort_Neutral_Territory( reax_system *system, int dir, mpi_out_data *out_bufs, int write ) +{ + int i, cnt; + reax_atom *atoms; + neighbor_proc *nbr_pr; + + cnt = 0; + atoms = system->my_atoms; + /* place each atom into the appropriate outgoing list */ + nbr_pr = &( system->my_nt_nbrs[dir] ); + + for ( i = 0; i < system->n; ++i ) + { + if ( nbr_pr->bndry_min[0] <= atoms[i].x[0] + && atoms[i].x[0] < nbr_pr->bndry_max[0] + && nbr_pr->bndry_min[1] <= atoms[i].x[1] + && atoms[i].x[1] < nbr_pr->bndry_max[1] + && nbr_pr->bndry_min[2] <= atoms[i].x[2] + && atoms[i].x[2] < nbr_pr->bndry_max[2] ) + { + if ( write ) + { + out_bufs[dir].index[out_bufs[dir].cnt] = i; + out_bufs[dir].cnt++; + } + else + { + cnt++; + } + } + } + + return cnt; +} +#endif + + +#if defined(NEUTRAL_TERRITORY) +void Init_Neutral_Territory( reax_system* system, mpi_datatypes *mpi_data ) +{ + int d, end, cnt; + mpi_out_data *out_bufs; + MPI_Comm comm; + MPI_Request req; + MPI_Status stat; + neighbor_proc *nbr; + + Reset_Out_Buffers( mpi_data->out_nt_buffers, system->num_nt_nbrs ); + comm = mpi_data->comm_mesh3D; + out_bufs = mpi_data->out_nt_buffers; + cnt = 0; + end = system->n; + + for ( d = 0; d < 6; ++d ) + { + nbr = &system->my_nt_nbrs[d]; + + Sort_Neutral_Territory( system, d, out_bufs, 1 ); + + MPI_Irecv( &cnt, 1, MPI_INT, nbr->receive_rank, d, comm, &req ); + MPI_Send( &out_bufs[d].cnt, 1, MPI_INT, nbr->rank, d, comm ); + MPI_Wait( &req, &stat ); + + if ( mpi_data->in_nt_buffer[d] == NULL ) + { + nbr->est_recv = MAX( SAFER_ZONE_NT * cnt, MIN_SEND ); + mpi_data->in_nt_buffer[d] = smalloc( nbr->est_recv * sizeof(real), + "Init_Neural_Territory::mpi_data->in_nt_buffer[d]", comm ); + } + + nbr = &system->my_nt_nbrs[d]; + nbr->atoms_str = end; + nbr->atoms_cnt = cnt; + end += cnt; + } +} +#endif + + +#if defined(NEUTRAL_TERRITORY) +void Estimate_NT_Atoms( reax_system *system, mpi_datatypes *mpi_data ) +{ + int d; + mpi_out_data *out_bufs; + neighbor_proc *nbr; + + out_bufs = mpi_data->out_nt_buffers; + + for ( d = 0; d < 6; ++d ) + { + /* count the number of atoms in each processor's outgoing list */ + nbr = &system->my_nt_nbrs[d]; + nbr->est_send = Sort_Neutral_Territory( system, d, out_bufs, 0 ); + + /* estimate the space needed based on the count above */ + nbr->est_send = MAX( MIN_SEND, nbr->est_send * SAFER_ZONE_NT ); + + /* allocate the estimated space */ + out_bufs[d].index = scalloc( nbr->est_send, sizeof(int), + "Estimate_NT_Atoms::out_bufs[d].index", MPI_COMM_WORLD ); + out_bufs[d].out_atoms = scalloc( nbr->est_send, sizeof(real), + "Estimate_NT_Atoms::out_bufs[d].out_atoms", MPI_COMM_WORLD ); + + /* sort the atoms to their outgoing buffers */ + // TODO: to call or not to call? + //Sort_Neutral_Territory( system, d, out_bufs, 1 ); + } +} +#endif + void Setup_Comm( reax_system* system, control_params* control, mpi_datatypes *mpi_data ) @@ -270,7 +459,6 @@ void Sort_Boundary_Atoms( reax_system *system, int start, int end, { int i, d, p, out_cnt; reax_atom *atoms; - simulation_box *my_box; boundary_atom *out_buf; neighbor_proc *nbr_pr; @@ -280,7 +468,6 @@ void Sort_Boundary_Atoms( reax_system *system, int start, int end, #endif atoms = system->my_atoms; - my_box = &( system->my_box ); /* place each atom into the appropriate outgoing list */ for ( i = start; i < end; ++i ) @@ -320,7 +507,6 @@ void Estimate_Boundary_Atoms( reax_system *system, int start, int end, { int i, p, out_cnt; reax_atom *atoms; - simulation_box *my_box; boundary_atom *out_buf; neighbor_proc *nbr1, *nbr2, *nbr_pr; @@ -329,7 +515,6 @@ void Estimate_Boundary_Atoms( reax_system *system, int start, int end, system->my_rank, start, end, d ); #endif atoms = system->my_atoms; - my_box = &( system->my_box ); nbr1 = &(system->my_nbrs[2 * d]); nbr2 = &(system->my_nbrs[2 * d + 1]); nbr1->est_send = 0; @@ -609,7 +794,7 @@ void Comm_Atoms( reax_system *system, control_params *control, if ( system->my_rank == MASTER_NODE ) { - t_start = Get_Time( ); + t_start = MPI_Wtime(); } #endif @@ -653,6 +838,10 @@ void Comm_Atoms( reax_system *system, control_params *control, #endif Bin_Boundary_Atoms( system ); + +#if defined(NEUTRAL_TERRITORY) + Init_Neutral_Territory( system, mpi_data ); +#endif } else { @@ -673,7 +862,7 @@ void Comm_Atoms( reax_system *system, control_params *control, #if defined(LOG_PERFORMANCE) if ( system->my_rank == MASTER_NODE ) { - t_elapsed = Get_Timing_Info( t_start ); + t_elapsed = MPI_Wtime() - t_start; data->timing.comm += t_elapsed; } #endif diff --git a/PuReMD/src/comm_tools.h b/PuReMD/src/comm_tools.h index 48b676ebbe67cbd8fb17af717fd7da5eac96ffc8..c333fa0cd1bce2bc3ae1b71029a9b9522e75573c 100644 --- a/PuReMD/src/comm_tools.h +++ b/PuReMD/src/comm_tools.h @@ -25,10 +25,16 @@ #include "reax_types.h" void Setup_Comm( reax_system*, control_params*, mpi_datatypes* ); +#if defined(NEUTRAL_TERRITORY) +void Setup_NT_Comm( reax_system*, control_params*, mpi_datatypes* ); +#endif void Update_Comm( reax_system* ); void Sort_Boundary_Atoms( reax_system*, int, int, int, mpi_out_data* ); void Estimate_Boundary_Atoms( reax_system*, int, int, int, mpi_out_data* ); +#if defined(NEUTRAL_TERRITORY) +void Estimate_NT_Atoms( reax_system*, mpi_datatypes* ); +#endif void Unpack_Exchange_Message( reax_system*, int, void*, int, neighbor_proc*, int ); void Unpack_Estimate_Message( reax_system*, int, void*, int, diff --git a/PuReMD/src/ffield.c b/PuReMD/src/ffield.c index b05216bdcb19f8002bfe02293a3cf6e28dd4faa7..b25b8db44ab7d20d1380069533e9a65bc82db55b 100644 --- a/PuReMD/src/ffield.c +++ b/PuReMD/src/ffield.c @@ -43,11 +43,7 @@ char Read_Force_Field( char *ffield_file, reax_interaction *reax, comm = MPI_COMM_WORLD; /* open force field file */ - if ( (fp = fopen( ffield_file, "r" ) ) == NULL ) - { - fprintf( stderr, "error opening the force filed file! terminating...\n" ); - MPI_Abort( comm, FILE_NOT_FOUND ); - } + fp = sfopen( ffield_file, "r", "Read_Force_Field::fp" ); s = (char*) malloc(sizeof(char) * MAX_LINE); tmp = (char**) malloc(sizeof(char*)*MAX_TOKENS); diff --git a/PuReMD/src/forces.c b/PuReMD/src/forces.c index c406417b315dc4181fe286fd0e143a7a2e2f5fc7..fbd7acf952b5b4c053aa3e19d868b6bbd78d6077 100644 --- a/PuReMD/src/forces.c +++ b/PuReMD/src/forces.c @@ -20,176 +20,65 @@ ----------------------------------------------------------------------*/ #include "reax_types.h" + #if defined(PURE_REAX) -#include "forces.h" -#include "bond_orders.h" -#include "bonds.h" -#include "basic_comm.h" -#include "hydrogen_bonds.h" -#include "io_tools.h" -#include "list.h" -#include "lookup.h" -#include "multi_body.h" -#include "nonbonded.h" -#include "qEq.h" -#include "tool_box.h" -#include "torsion_angles.h" -#include "valence_angles.h" -#include "vector.h" + #include "forces.h" + #include "bond_orders.h" + #include "bonds.h" + #include "basic_comm.h" + #include "hydrogen_bonds.h" + #include "io_tools.h" + #include "list.h" + #include "lookup.h" + #include "multi_body.h" + #include "nonbonded.h" + #include "qEq.h" + #include "tool_box.h" + #include "torsion_angles.h" + #include "valence_angles.h" + #include "vector.h" #elif defined(LAMMPS_REAX) -#include "reax_forces.h" -#include "reax_bond_orders.h" -#include "reax_bonds.h" -#include "reax_basic_comm.h" -#include "reax_hydrogen_bonds.h" -#include "reax_io_tools.h" -#include "reax_list.h" -#include "reax_lookup.h" -#include "reax_multi_body.h" -#include "reax_nonbonded.h" -#include "reax_tool_box.h" -#include "reax_torsion_angles.h" -#include "reax_valence_angles.h" -#include "reax_vector.h" + #include "reax_forces.h" + #include "reax_bond_orders.h" + #include "reax_bonds.h" + #include "reax_basic_comm.h" + #include "reax_hydrogen_bonds.h" + #include "reax_io_tools.h" + #include "reax_list.h" + #include "reax_lookup.h" + #include "reax_multi_body.h" + #include "reax_nonbonded.h" + #include "reax_tool_box.h" + #include "reax_torsion_angles.h" + #include "reax_valence_angles.h" + #include "reax_vector.h" #endif -interaction_function Interaction_Functions[NUM_INTRS]; - -void Dummy_Interaction( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) -{ -} - -void Init_Force_Functions( control_params *control ) -{ - Interaction_Functions[0] = BO; - Interaction_Functions[1] = Bonds; //Dummy_Interaction; - Interaction_Functions[2] = Atom_Energy; //Dummy_Interaction; - Interaction_Functions[3] = Valence_Angles; //Dummy_Interaction; - Interaction_Functions[4] = Torsion_Angles; //Dummy_Interaction; - if ( control->hbond_cut > 0 ) - Interaction_Functions[5] = Hydrogen_Bonds; - else Interaction_Functions[5] = Dummy_Interaction; - Interaction_Functions[6] = Dummy_Interaction; //empty - Interaction_Functions[7] = Dummy_Interaction; //empty - Interaction_Functions[8] = Dummy_Interaction; //empty - Interaction_Functions[9] = Dummy_Interaction; //empty -} +interaction_function Interaction_Functions[NUM_INTRS]; -void Compute_Bonded_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control, - MPI_Comm comm ) +static int compare_bonds( const void *p1, const void *p2 ) { - int i; - - /* Mark beginning of a new timestep in bonded energy files */ -#if defined(TEST_ENERGY) - Debug_Marker_Bonded( out_control, data->step ); -#endif - - /* Implement all force calls as function pointers */ - for ( i = 0; i < NUM_INTRS; i++ ) - { -#if defined(DEBUG) - fprintf( stderr, "p%d: starting f%d\n", system->my_rank, i ); - MPI_Barrier( comm ); -#endif - (Interaction_Functions[i])( system, control, data, workspace, - lists, out_control ); -#if defined(DEBUG) - fprintf( stderr, "p%d: f%d done\n", system->my_rank, i ); - MPI_Barrier( comm ); -#endif - } + return ((bond_data *)p1)->nbr - ((bond_data *)p2)->nbr; } -void Compute_NonBonded_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control, - MPI_Comm comm ) +static void Dummy_Interaction( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { - /* Mark beginning of a new timestep in nonbonded energy files */ -#if defined(TEST_ENERGY) - Debug_Marker_Nonbonded( out_control, data->step ); -#endif - - /* van der Waals and Coulomb interactions */ - if ( control->tabulate == 0 ) - vdW_Coulomb_Energy( system, control, data, workspace, - lists, out_control ); - else - Tabulated_vdW_Coulomb_Energy( system, control, data, workspace, - lists, out_control ); - -#if defined(DEBUG) - fprintf( stderr, "p%d: nonbonded forces done\n", system->my_rank ); - MPI_Barrier( comm ); -#endif + ; } - -/* this version of Compute_Total_Force computes forces from - coefficients accumulated by all interaction functions. - Saves enormous time & space! */ -void Compute_Total_Force( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, mpi_datatypes *mpi_data ) -{ - int i, pj; - reax_list *bonds = lists[BONDS]; - - for ( i = 0; i < system->N; ++i ) - for ( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) - if ( i < bonds->bond_list[pj].nbr ) - { - if ( control->virial == 0 ) - Add_dBond_to_Forces( i, pj, workspace, lists ); - else - Add_dBond_to_Forces_NPT( i, pj, data, workspace, lists ); - } - - //Print_Total_Force( system, data, workspace ); -#if defined(PURE_REAX) - /* now all forces are computed to their partially-final values - based on the neighbors information each processor has had. - final values of force on each atom needs to be computed by adding up - all partially-final pieces */ - Coll( system, mpi_data, workspace->f, mpi_data->mpi_rvec, - sizeof(rvec) / sizeof(void), rvec_unpacker ); - for ( i = 0; i < system->n; ++i ) - rvec_Copy( system->my_atoms[i].f, workspace->f[i] ); - -#if defined(TEST_FORCES) - Coll( system, mpi_data, workspace->f_ele, mpi_data->mpi_rvec, rvec_unpacker); - Coll( system, mpi_data, workspace->f_vdw, mpi_data->mpi_rvec, rvec_unpacker); - Coll( system, mpi_data, workspace->f_be, mpi_data->mpi_rvec, rvec_unpacker ); - Coll( system, mpi_data, workspace->f_lp, mpi_data->mpi_rvec, rvec_unpacker ); - Coll( system, mpi_data, workspace->f_ov, mpi_data->mpi_rvec, rvec_unpacker ); - Coll( system, mpi_data, workspace->f_un, mpi_data->mpi_rvec, rvec_unpacker ); - Coll( system, mpi_data, workspace->f_ang, mpi_data->mpi_rvec, rvec_unpacker); - Coll( system, mpi_data, workspace->f_coa, mpi_data->mpi_rvec, rvec_unpacker); - Coll( system, mpi_data, workspace->f_pen, mpi_data->mpi_rvec, rvec_unpacker); - Coll( system, mpi_data, workspace->f_hb, mpi_data->mpi_rvec, rvec_unpacker ); - Coll( system, mpi_data, workspace->f_tor, mpi_data->mpi_rvec, rvec_unpacker); - Coll( system, mpi_data, workspace->f_con, mpi_data->mpi_rvec, rvec_unpacker); -#endif - -#endif -} - -void Validate_Lists( reax_system *system, storage *workspace, reax_list **lists, - int step, int n, int N, int numH, MPI_Comm comm ) +static void Validate_Lists( reax_system *system, storage *workspace, + reax_list **lists, int step, int n, int N, int numH, MPI_Comm comm ) { int i, comp, Hindex; reax_list *bonds, *hbonds; reallocate_data *realloc; - realloc = &(workspace->realloc); + realloc = &workspace->realloc; /* bond list */ if ( N > 0 ) @@ -205,19 +94,23 @@ void Validate_Lists( reax_system *system, storage *workspace, reax_list **lists, //workspace->realloc.bonds = 1; if ( i < N - 1 ) + { comp = Start_Index(i + 1, bonds); - else comp = bonds->num_intrs; + } + else + { + comp = bonds->num_intrs; + } if ( End_Index(i, bonds) > comp ) { - fprintf( stderr, "step%d-bondchk failed: i=%d end(i)=%d str(i+1)=%d\n", + fprintf( stderr, "[ERROR] step%d-bondchk failed: i=%d end(i)=%d str(i+1)=%d\n", step, i, End_Index(i, bonds), comp ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } } } - /* hbonds list */ if ( numH > 0 ) { @@ -226,6 +119,7 @@ void Validate_Lists( reax_system *system, storage *workspace, reax_list **lists, for ( i = 0; i < n; ++i ) { Hindex = system->my_atoms[i].Hindex; + if ( Hindex > -1 ) { system->my_atoms[i].num_hbonds = @@ -236,49 +130,55 @@ void Validate_Lists( reax_system *system, storage *workspace, reax_list **lists, // workspace->realloc.hbonds = 1; if ( Hindex < numH - 1 ) - comp = Start_Index(Hindex + 1, hbonds); - else comp = hbonds->num_intrs; + { + comp = Start_Index( Hindex + 1, hbonds ); + } + else + { + comp = hbonds->num_intrs; + } if ( End_Index(Hindex, hbonds) > comp ) { - fprintf(stderr, "step%d-hbondchk failed: H=%d end(H)=%d str(H+1)=%d\n", + fprintf(stderr, "[ERROR] step%d-hbondchk failed: H=%d end(H)=%d str(H+1)=%d\n", step, Hindex, End_Index(Hindex, hbonds), comp ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } } -/* - if ( Hindex > -1 ) - { - system->my_atoms[i].num_hbonds = - MAX( Num_Entries(Hindex, hbonds) * SAFER_ZONE, MIN_HBONDS ); -*/ + +// if ( Hindex > -1 ) +// { +// system->my_atoms[i].num_hbonds = +// MAX( Num_Entries(Hindex, hbonds) * SAFER_ZONE, MIN_HBONDS ); + //if( Num_Entries(i, hbonds) >= //(Start_Index(i+1,hbonds)-Start_Index(i,hbonds))*0.90/*DANGER_ZONE*/){ // workspace->realloc.hbonds = 1; -/* + //TODO - if ( Hindex < system->n - 1 ) - comp = Start_Index(Hindex + 1, hbonds); - else comp = hbonds->num_intrs; - - if ( End_Index(Hindex, hbonds) > comp ) - { - fprintf(stderr, "step%d-hbondchk failed: H=%d end(H)=%d str(H+1)=%d\n", - step, Hindex, End_Index(Hindex, hbonds), comp ); - MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY ); - } - } - -*/ - - - +// if ( Hindex < system->n - 1 ) +// { +// comp = Start_Index(Hindex + 1, hbonds); +// } +// else +// { +// comp = hbonds->num_intrs; +// } +// +// if ( End_Index(Hindex, hbonds) > comp ) +// { +// fprintf(stderr, "[ERROR] step%d-hbondchk failed: H=%d end(H)=%d str(H+1)=%d\n", +// step, Hindex, End_Index(Hindex, hbonds), comp ); +// MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY ); +// } +// } } } } -real Compute_H( real r, real gamma, real *ctap ) +/* Computes a charge matrix entry using the Taper function */ +static real Compute_H( real r, real gamma, real *ctap ) { real taper, dr3gamij_1, dr3gamij_3; @@ -291,24 +191,32 @@ real Compute_H( real r, real gamma, real *ctap ) taper = taper * r + ctap[0]; dr3gamij_1 = ( r * r * r + gamma ); - dr3gamij_3 = pow( dr3gamij_1 , 0.33333333333333 ); + dr3gamij_3 = pow( dr3gamij_1, 1.0 / 3.0 ); + return taper * EV_to_KCALpMOL / dr3gamij_3; } -real Compute_tabH( real r_ij, int ti, int tj ) +/* Computes a charge matrix entry using the force tabulation + * (i.e., an arithmetic-reducing optimization) */ +static real Compute_tabH( real r_ij, int ti, int tj ) { int r, tmin, tmax; real val, dif, base; LR_lookup_table *t; - tmin = MIN( ti, tj ); - tmax = MAX( ti, tj ); - t = &( LR[tmin][tmax] ); + tmin = MIN( ti, tj ); + tmax = MAX( ti, tj ); + t = &LR[tmin][tmax]; /* cubic spline interpolation */ r = (int)(r_ij * t->inv_dx); - if ( r == 0 ) ++r; + + if ( r == 0 ) + { + ++r; + } + base = (real)(r + 1) * t->dx; dif = r_ij - base; val = ((t->ele[r].d * dif + t->ele[r].c) * dif + t->ele[r].b) * dif + @@ -319,307 +227,1680 @@ real Compute_tabH( real r_ij, int ti, int tj ) } -void Init_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, reax_list **lists, - output_controls *out_control, MPI_Comm comm ) +/* Compute the distances and displacement vectors for entries + * in the far neighbors list if it's a NOT re-neighboring step */ +static void Init_Distance( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control, MPI_Comm comm, mpi_datatypes *mpi_data ) { int i, j, pj; int start_i, end_i; - int type_i, type_j; - int Htop, btop_i, btop_j, num_bonds, num_hbonds; - int ihb, jhb, ihb_top, jhb_top; - int local, flag, renbr; - real r_ij, cutoff; - sparse_matrix *H; - reax_list *far_nbrs, *bonds, *hbonds; - single_body_parameters *sbp_i, *sbp_j; - two_body_parameters *twbp; - far_neighbor_data *nbr_pj; + int renbr; + reax_list *far_nbrs; reax_atom *atom_i, *atom_j; far_nbrs = lists[FAR_NBRS]; - bonds = lists[BONDS]; - hbonds = lists[HBONDS]; + renbr = (data->step - data->prev_steps) % control->reneighbor == 0; - for ( i = 0; i < system->n; ++i ) - workspace->bond_mark[i] = 0; - for ( i = system->n; i < system->N; ++i ) + if ( !renbr ) { - workspace->bond_mark[i] = 1000; // put ghost atoms to an infinite distance - //workspace->done_after[i] = Start_Index( i, far_nbrs ); + for ( i = 0; i < system->N; ++i ) + { + atom_i = &system->my_atoms[i]; + start_i = Start_Index( i, far_nbrs ); + end_i = End_Index( i, far_nbrs ); + + /* update distance and displacement vector between atoms i and j (i-j) */ + for ( pj = start_i; pj < end_i; ++pj ) + { + j = far_nbrs->far_nbr_list.nbr[pj]; + atom_j = &system->my_atoms[j]; + + far_nbrs->far_nbr_list.dvec[pj][0] = atom_j->x[0] - atom_i->x[0]; + far_nbrs->far_nbr_list.dvec[pj][1] = atom_j->x[1] - atom_i->x[1]; + far_nbrs->far_nbr_list.dvec[pj][2] = atom_j->x[2] - atom_i->x[2]; + far_nbrs->far_nbr_list.d[pj] = rvec_Norm_Sqr( far_nbrs->far_nbr_list.dvec[pj] ); + far_nbrs->far_nbr_list.d[pj] = sqrt( far_nbrs->far_nbr_list.d[pj] ); + } + } } +} + + +#if defined(NEUTRAL_TERRITORY) +/* Compute the charge matrix entries and store the matrix in half format + * using the far neighbors list (stored in full format) and according to + * the neutral territory communication method */ +static void Init_CM_Half_NT( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control, MPI_Comm comm, mpi_datatypes *mpi_data ) +{ + int i, j, pj; + int start_i, end_i; + int type_i, type_j; + int Htop; + int local, renbr; + real r_ij; + sparse_matrix *H; + reax_list *far_nbrs; + single_body_parameters *sbp_i; + two_body_parameters *twbp; + reax_atom *atom_i, *atom_j; + int mark[6]; + int total_cnt[6]; + int bin[6]; + int total_sum[6]; + int nt_flag; + + far_nbrs = lists[FAR_NBRS]; H = workspace->H; H->n = system->n; Htop = 0; - num_bonds = 0; - num_hbonds = 0; - btop_i = btop_j = 0; renbr = (data->step - data->prev_steps) % control->reneighbor == 0; + nt_flag = 1; + if( renbr ) + { + for ( i = 0; i < 6; ++i ) + { + total_cnt[i] = 0; + bin[i] = 0; + total_sum[i] = 0; + } + + for ( i = system->n; i < system->N; ++i ) + { + atom_i = &system->my_atoms[i]; + + if( atom_i->nt_dir != -1 ) + { + total_cnt[ atom_i->nt_dir ]++; + } + } + + total_sum[0] = system->n; + for ( i = 1; i < 6; ++i ) + { + total_sum[i] = total_sum[i-1] + total_cnt[i-1]; + } + + for ( i = system->n; i < system->N; ++i ) + { + atom_i = &system->my_atoms[i]; + + if( atom_i->nt_dir != -1 ) + { + atom_i->pos = total_sum[ atom_i->nt_dir ] + bin[ atom_i->nt_dir ]; + bin[ atom_i->nt_dir ]++; + } + } + H->NT = total_sum[5] + total_cnt[5]; + } + + mark[0] = mark[1] = 1; + mark[2] = mark[3] = mark[4] = mark[5] = 2; + for ( i = 0; i < system->N; ++i ) { - atom_i = &(system->my_atoms[i]); - type_i = atom_i->type; - start_i = Start_Index(i, far_nbrs); - end_i = End_Index(i, far_nbrs); - btop_i = End_Index( i, bonds ); - sbp_i = &(system->reax_param.sbp[type_i]); + atom_i = &system->my_atoms[i]; + type_i = atom_i->type; + start_i = Start_Index( i, far_nbrs ); + end_i = End_Index( i, far_nbrs ); + + sbp_i = &system->reax_param.sbp[type_i]; if ( i < system->n ) { local = 1; - cutoff = control->nonb_cut; + } + else if ( atom_i->nt_dir != -1 ) + { + local = 2; + nt_flag = 0; } else { - local = 0; - cutoff = control->bond_cut; + continue; } - ihb = -1; - ihb_top = -1; - if ( local ) + if ( local == 1 ) { H->start[i] = Htop; H->entries[Htop].j = i; H->entries[Htop].val = sbp_i->eta; ++Htop; + } - if ( control->hbond_cut > 0 ) + for ( pj = start_i; pj < end_i; ++pj ) + { + j = far_nbrs->far_nbr_list.nbr[pj]; + atom_j = &system->my_atoms[j]; + + if ( far_nbrs->far_nbr_list.d[pj] <= control->nonb_cut ) { - ihb = sbp_i->p_hbond; - if ( ihb == 1 ) - ihb_top = End_Index( atom_i->Hindex, hbonds ); - else ihb_top = -1; + type_j = atom_j->type; + r_ij = far_nbrs->far_nbr_list.d[pj]; + twbp = &system->reax_param.tbp[type_i][type_j]; + + if ( local == 1 ) + { + /* H matrix entry */ + if ( atom_j->nt_dir > 0 || (j < system->n && i < j) ) + { + if ( j < system->n ) + { + H->entries[Htop].j = j; + } + else + { + H->entries[Htop].j = atom_j->pos; + } + + if ( control->tabulate == 0 ) + { + H->entries[Htop].val = Compute_H( r_ij, twbp->gamma, workspace->Tap ); + } + else + { + H->entries[Htop].val = Compute_tabH( r_ij, type_i, type_j ); + } + + ++Htop; + } + + } + else if ( local == 2 ) + { + /* H matrix entry */ + if ( atom_j->nt_dir != -1 + && mark[atom_i->nt_dir] != mark[atom_j->nt_dir] + && atom_i->pos < atom_j->pos ) + { + if ( !nt_flag ) + { + nt_flag = 1; + H->start[atom_i->pos] = Htop; + } + + //TODO: necessary? + if ( j < system->n ) + { + H->entries[Htop].j = j; + } + else + { + H->entries[Htop].j = atom_j->pos; + } + + if ( control->tabulate == 0 ) + { + H->entries[Htop].val = Compute_H( r_ij, twbp->gamma, workspace->Tap ); + } + else + { + H->entries[Htop].val = Compute_tabH( r_ij, type_i, type_j ); + } + + ++Htop; + } + } + + } + } + + if ( local == 1 ) + { + H->end[i] = Htop; + } + else if ( local == 2 ) + { + if ( nt_flag ) + { + H->end[atom_i->pos] = Htop; + } + else + { + H->start[atom_i->pos] = 0; + H->end[atom_i->pos] = 0; } } + } + + workspace->realloc.Htop = Htop; + +#if defined( DEBUG ) + Print_Sparse_Matrix( system, H ); + for ( i = 0; i < H->n; ++i ) + for ( j = H->start[i]; j < H->end[i]; ++j ) + fprintf( stderr, "%d %d %.15e\n", + MIN(system->my_atoms[i].orig_id, + system->my_atoms[H->entries[j].j].orig_id), + MAX(system->my_atoms[i].orig_id, + system->my_atoms[H->entries[j].j].orig_id), + H->entries[j].val ); +#endif + +} + + +/* Compute the charge matrix entries and store the matrix in full format + * using the far neighbors list (stored in full format) and according to + * the neutral territory communication method */ +static void Init_CM_Full_NT( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control, MPI_Comm comm, mpi_datatypes *mpi_data ) +{ + int i, j, pj; + int start_i, end_i; + int type_i, type_j; + int Htop; + int local, renbr; + real r_ij; + sparse_matrix *H; + reax_list *far_nbrs; + single_body_parameters *sbp_i; + two_body_parameters *twbp; + reax_atom *atom_i, *atom_j; + int mark[6]; + int total_cnt[6]; + int bin[6]; + int total_sum[6]; + int nt_flag; + + far_nbrs = lists[FAR_NBRS]; + + H = workspace->H; + H->n = system->n; + Htop = 0; + renbr = (data->step - data->prev_steps) % control->reneighbor == 0; + + nt_flag = 1; + if ( renbr ) + { + for ( i = 0; i < 6; ++i ) + { + total_cnt[i] = 0; + bin[i] = 0; + total_sum[i] = 0; + } + + for ( i = system->n; i < system->N; ++i ) + { + atom_i = &system->my_atoms[i]; + + if ( atom_i->nt_dir != -1 ) + { + total_cnt[ atom_i->nt_dir ]++; + } + } + + total_sum[0] = system->n; + for ( i = 1; i < 6; ++i ) + { + total_sum[i] = total_sum[i-1] + total_cnt[i-1]; + } + + for ( i = system->n; i < system->N; ++i ) + { + atom_i = &system->my_atoms[i]; + + if ( atom_i->nt_dir != -1 ) + { + atom_i->pos = total_sum[ atom_i->nt_dir ] + bin[ atom_i->nt_dir ]; + bin[ atom_i->nt_dir ]++; + } + } + H->NT = total_sum[5] + total_cnt[5]; + } + + mark[0] = mark[1] = 1; + mark[2] = mark[3] = mark[4] = mark[5] = 2; + + for ( i = 0; i < system->N; ++i ) + { + atom_i = &system->my_atoms[i]; + type_i = atom_i->type; + start_i = Start_Index( i, far_nbrs ); + end_i = End_Index( i, far_nbrs ); + + sbp_i = &system->reax_param.sbp[type_i]; + + if ( i < system->n ) + { + local = 1; + } + else if ( atom_i->nt_dir != -1 ) + { + local = 2; + nt_flag = 0; + } + else + { + continue; + } + + if ( local == 1 ) + { + H->start[i] = Htop; + H->entries[Htop].j = i; + H->entries[Htop].val = sbp_i->eta; + ++Htop; + } + + for ( pj = start_i; pj < end_i; ++pj ) + { + if ( far_nbrs->far_nbr_list.d[pj] <= control->nonb_cut ) + { + j = far_nbrs->far_nbr_list.nbr[pj]; + atom_j = &system->my_atoms[j]; + + type_j = atom_j->type; + r_ij = far_nbrs->far_nbr_list.d[pj]; + twbp = &system->reax_param.tbp[type_i][type_j]; + + if ( local == 1 ) + { + /* H matrix entry */ + if ( atom_j->nt_dir > 0 || (j < system->n) ) + { + if ( j < system->n ) + { + H->entries[Htop].j = j; + } + else + { + H->entries[Htop].j = atom_j->pos; + } + + if ( control->tabulate == 0 ) + { + H->entries[Htop].val = Compute_H(r_ij, twbp->gamma, workspace->Tap); + } + else + { + H->entries[Htop].val = Compute_tabH(r_ij, type_i, type_j); + } + + ++Htop; + } + + } + else if ( local == 2 ) + { + /* H matrix entry */ + if ( ( atom_j->nt_dir != -1 + && mark[atom_i->nt_dir] != mark[atom_j->nt_dir] ) + || ( j < system->n && atom_i->nt_dir != 0 ) ) + { + if ( !nt_flag ) + { + nt_flag = 1; + H->start[atom_i->pos] = Htop; + } + + if ( j < system->n ) + { + H->entries[Htop].j = j; + } + else + { + H->entries[Htop].j = atom_j->pos; + } + + if ( control->tabulate == 0 ) + { + H->entries[Htop].val = Compute_H( r_ij, twbp->gamma, workspace->Tap ); + } + else + { + H->entries[Htop].val = Compute_tabH( r_ij, type_i, type_j ); + } + + ++Htop; + } + } + + } + } + + if ( local == 1 ) + { + H->end[i] = Htop; + } + else if ( local == 2 ) + { + if ( nt_flag ) + { + H->end[atom_i->pos] = Htop; + } + else + { + H->start[atom_i->pos] = 0; + H->end[atom_i->pos] = 0; + } + } + } + + workspace->realloc.Htop = Htop; + +#if defined( DEBUG ) + Print_Sparse_Matrix( system, H ); + for ( i = 0; i < H->n; ++i ) + for ( j = H->start[i]; j < H->end[i]; ++j ) + fprintf( stderr, "%d %d %.15e\n", + MIN(system->my_atoms[i].orig_id, + system->my_atoms[H->entries[j].j].orig_id), + MAX(system->my_atoms[i].orig_id, + system->my_atoms[H->entries[j].j].orig_id), + H->entries[j].val ); +#endif + +} + + +#else +/* Compute the charge matrix entries and store the matrix in half format + * using the far neighbors list (stored in half format) and according to + * the full shell communication method */ +static void Init_CM_Half_FS( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control, MPI_Comm comm, mpi_datatypes *mpi_data ) +{ + int i, j, pj; + int start_i, end_i; + int type_i, type_j; + int Htop; + real r_ij; + sparse_matrix *H; + reax_list *far_nbrs; + single_body_parameters *sbp_i; + two_body_parameters *twbp; + reax_atom *atom_i, *atom_j; + + far_nbrs = lists[FAR_NBRS]; + + H = workspace->H; + H->n = system->n; + Htop = 0; + + for ( i = 0; i < system->n; ++i ) + { + atom_i = &system->my_atoms[i]; + type_i = atom_i->type; + start_i = Start_Index( i, far_nbrs ); + end_i = End_Index( i, far_nbrs ); + + sbp_i = &system->reax_param.sbp[type_i]; + + H->start[i] = Htop; + H->entries[Htop].j = i; + H->entries[Htop].val = sbp_i->eta; + ++Htop; + + for ( pj = start_i; pj < end_i; ++pj ) + { + // H matrix entry + if ( far_nbrs->far_nbr_list.d[pj] <= control->nonb_cut ) + { + j = far_nbrs->far_nbr_list.nbr[pj]; + atom_j = &system->my_atoms[j]; + + if ( j < system->n || atom_i->orig_id < atom_j->orig_id ) + { + type_j = atom_j->type; + r_ij = far_nbrs->far_nbr_list.d[pj]; + twbp = &system->reax_param.tbp[type_i][type_j]; + + H->entries[Htop].j = j; + + if ( control->tabulate == 0 ) + { + H->entries[Htop].val = Compute_H( r_ij, twbp->gamma, workspace->Tap ); + } + else + { + H->entries[Htop].val = Compute_tabH( r_ij, type_i, type_j ); + } + + ++Htop; + } + } + } + + H->end[i] = Htop; + } + + workspace->realloc.Htop = Htop; + +#if defined( DEBUG ) + Print_Sparse_Matrix( system, H ); + for ( i = 0; i < H->n; ++i ) + for ( j = H->start[i]; j < H->end[i]; ++j ) + fprintf( stderr, "%d %d %.15e\n", + MIN(system->my_atoms[i].orig_id, + system->my_atoms[H->entries[j].j].orig_id), + MAX(system->my_atoms[i].orig_id, + system->my_atoms[H->entries[j].j].orig_id), + H->entries[j].val ); +#endif +} + + +/* Compute the charge matrix entries and store the matrix in full format + * using the far neighbors list (stored in full format) and according to + * the full shell communication method */ +static void Init_CM_Full_FS( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control, MPI_Comm comm, mpi_datatypes *mpi_data ) +{ + int i, j, pj; + int start_i, end_i; + int type_i, type_j; + int Htop; + real r_ij; + sparse_matrix *H; + reax_list *far_nbrs; + single_body_parameters *sbp_i; + two_body_parameters *twbp; + reax_atom *atom_i, *atom_j; + + far_nbrs = lists[FAR_NBRS]; + + H = workspace->H; + H->n = system->n; + Htop = 0; + + for ( i = 0; i < system->n; ++i ) + { + atom_i = &system->my_atoms[i]; + type_i = atom_i->type; + start_i = Start_Index( i, far_nbrs ); + end_i = End_Index( i, far_nbrs ); + + sbp_i = &system->reax_param.sbp[type_i]; + + H->start[i] = Htop; + H->entries[Htop].j = i; + H->entries[Htop].val = sbp_i->eta; + ++Htop; + + for ( pj = start_i; pj < end_i; ++pj ) + { + if ( far_nbrs->far_nbr_list.d[pj] <= control->nonb_cut ) + { + j = far_nbrs->far_nbr_list.nbr[pj]; + atom_j = &system->my_atoms[j]; + type_j = atom_j->type; + r_ij = far_nbrs->far_nbr_list.d[pj]; + twbp = &system->reax_param.tbp[type_i][type_j]; + + // H matrix entry + H->entries[Htop].j = j; + + if ( control->tabulate == 0 ) + { + H->entries[Htop].val = Compute_H(r_ij, twbp->gamma, workspace->Tap); + } + else + { + H->entries[Htop].val = Compute_tabH(r_ij, type_i, type_j); + } + + ++Htop; + } + } + + H->end[i] = Htop; + } + + workspace->realloc.Htop = Htop; + +#if defined( DEBUG ) + Print_Sparse_Matrix( system, H ); + for ( i = 0; i < H->n; ++i ) + for ( j = H->start[i]; j < H->end[i]; ++j ) + fprintf( stderr, "%d %d %.15e\n", + MIN(system->my_atoms[i].orig_id, + system->my_atoms[H->entries[j].j].orig_id), + MAX(system->my_atoms[i].orig_id, + system->my_atoms[H->entries[j].j].orig_id), + H->entries[j].val ); +#endif +} +#endif + + +/* Compute entries of the bonds/hbonds lists and store the lists in full format + * using the far neighbors list (stored in half format) + * + * Note: this version does NOT contain an optimization to restrict the bond_mark + * array to at most the 3-hop neighborhood */ +static void Init_Bond_Half( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control, MPI_Comm comm, mpi_datatypes *mpi_data ) +{ + int i, j, pj; + int start_i, end_i; + int type_i, type_j; + int btop_i, num_bonds, num_hbonds; + int ihb, jhb, ihb_top; + int local; + real cutoff; + reax_list *far_nbrs, *bonds, *hbonds; + single_body_parameters *sbp_i, *sbp_j; + two_body_parameters *twbp; + reax_atom *atom_i, *atom_j; + int jhb_top; + + far_nbrs = lists[FAR_NBRS]; + bonds = lists[BONDS]; + hbonds = lists[HBONDS]; + + for ( i = 0; i < system->n; ++i ) + { + workspace->bond_mark[i] = 0; + } + for ( i = system->n; i < system->N; ++i ) + { + /* put ghost atoms to an infinite distance (i.e., 1000) */ + workspace->bond_mark[i] = 1000; + } + + num_bonds = 0; + num_hbonds = 0; + btop_i = 0; + + for ( i = 0; i < system->N; ++i ) + { + atom_i = &system->my_atoms[i]; + type_i = atom_i->type; + start_i = Start_Index( i, far_nbrs ); + end_i = End_Index( i, far_nbrs ); + + /* start at end because other atoms + * can add to this atom's list (half-list) */ + btop_i = End_Index( i, bonds ); + sbp_i = &system->reax_param.sbp[type_i]; + + if ( i < system->n ) + { + local = 1; + cutoff = control->nonb_cut; + } + else + { + local = 0; + cutoff = control->bond_cut; + } + + ihb = -1; + ihb_top = -1; + if ( local == 1 ) + { + if ( control->hbond_cut > 0 ) + { + ihb = sbp_i->p_hbond; + + if ( ihb == 1 ) + { + /* start at end because other atoms + * can add to this atom's list (half-list) */ + ihb_top = End_Index( atom_i->Hindex, hbonds ); + } + else + { + ihb_top = -1; + } + } + } + + /* update i-j distance - check if j is within cutoff */ + for ( pj = start_i; pj < end_i; ++pj ) + { + j = far_nbrs->far_nbr_list.nbr[pj]; + atom_j = &system->my_atoms[j]; + + if ( far_nbrs->far_nbr_list.d[pj] <= cutoff ) + { + type_j = atom_j->type; + sbp_j = &system->reax_param.sbp[type_j]; + twbp = &system->reax_param.tbp[type_i][type_j]; + + if ( local == 1 ) + { + /* hydrogen bond lists */ + if ( control->hbond_cut > 0 + && (ihb == 1 || ihb == 2) + && far_nbrs->far_nbr_list.d[pj] <= control->hbond_cut ) + { + // fprintf( stderr, "%d %d\n", atom1, atom2 ); + jhb = sbp_j->p_hbond; + + if ( ihb == 1 && jhb == 2 ) + { + hbonds->hbond_list[ihb_top].nbr = j; + hbonds->hbond_list[ihb_top].scl = 1; + hbonds->hbond_list[ihb_top].ptr = pj; + ++ihb_top; + ++num_hbonds; + } + /* only add to list for local j (far nbrs is half-list) */ + else if ( j < system->n && ihb == 2 && jhb == 1 ) + { + jhb_top = End_Index( atom_j->Hindex, hbonds ); + hbonds->hbond_list[jhb_top].nbr = i; + hbonds->hbond_list[jhb_top].scl = -1; + hbonds->hbond_list[jhb_top].ptr = pj; + Set_End_Index( atom_j->Hindex, jhb_top + 1, hbonds ); + ++num_hbonds; + } + } + } + + /* uncorrected bond orders */ + if ( far_nbrs->far_nbr_list.d[pj] <= control->bond_cut + && BOp( workspace, bonds, control->bo_cut, + i, btop_i, far_nbrs->far_nbr_list.nbr[pj], + &far_nbrs->far_nbr_list.rel_box[pj], far_nbrs->far_nbr_list.d[pj], + &far_nbrs->far_nbr_list.dvec[pj], far_nbrs->format, + sbp_i, sbp_j, twbp ) ) + { + num_bonds += 2; + ++btop_i; + + if ( workspace->bond_mark[j] > workspace->bond_mark[i] + 1 ) + { + workspace->bond_mark[j] = workspace->bond_mark[i] + 1; + } + else if ( workspace->bond_mark[i] > workspace->bond_mark[j] + 1 ) + { + workspace->bond_mark[i] = workspace->bond_mark[j] + 1; + } + } + + } + } + + Set_End_Index( i, btop_i, bonds ); + + if ( local == 1 && ihb == 1 ) + { + Set_End_Index( atom_i->Hindex, ihb_top, hbonds ); + } + } + + workspace->realloc.num_bonds = num_bonds; + workspace->realloc.num_hbonds = num_hbonds; + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "p%d @ step%d: Htop = %d num_bonds = %d num_hbonds = %d\n", + system->my_rank, data->step, workspace->realloc.Htop, num_bonds, num_hbonds ); + MPI_Barrier( comm ); +#endif + +#if defined( DEBUG ) + Print_Bonds( system, bonds, "debugbonds.out" ); + Print_Bond_List2( system, bonds, "pbonds.out" ); +#endif + + Validate_Lists( system, workspace, lists, data->step, + system->n, system->N, system->numH, comm ); + +} + + +/* Compute entries of the bonds/hbonds lists and store the lists in full format + * using the far neighbors list (stored in full format) */ +static void Init_Bond_Full( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control, MPI_Comm comm, mpi_datatypes *mpi_data ) +{ + int i, j, pj; + int start_i, end_i; + int type_i, type_j; + int num_bonds, num_hbonds; + int ihb, jhb, ihb_top; + real cutoff; + reax_list *far_nbrs, *bonds, *hbonds; + single_body_parameters *sbp_i, *sbp_j; + two_body_parameters *twbp; + reax_atom *atom_i, *atom_j; + int start_j, end_j; + int btop_i, btop_j; + int k, push; + int *q; + + far_nbrs = lists[FAR_NBRS]; + bonds = lists[BONDS]; + hbonds = lists[HBONDS]; + num_hbonds = 0; + push = 0; + num_bonds = 0; + btop_i = 0; + bonds = lists[BONDS]; + + q = smalloc( sizeof(int) * (system->N - system->n), + "Init_Distance::q", MPI_COMM_WORLD ); + + for ( i = 0; i < system->n; ++i ) + { + workspace->bond_mark[i] = 0; + } + for ( i = system->n; i < system->N; ++i ) + { + /* put ghost atoms to an infinite distance (i.e., 1000) */ + workspace->bond_mark[i] = 1000; + } + + /* bonds that are directly connected to local atoms */ + for ( i = 0; i < system->n; ++i ) + { + atom_i = &system->my_atoms[i]; + type_i = atom_i->type; + btop_i = End_Index( i, bonds ); + sbp_i = &system->reax_param.sbp[type_i]; + start_i = Start_Index( i, far_nbrs ); + end_i = End_Index( i, far_nbrs ); + ihb = sbp_i->p_hbond; + ihb_top = Start_Index( atom_i->Hindex, hbonds ); + + for ( pj = start_i; pj < end_i; ++pj ) + { + j = far_nbrs->far_nbr_list.nbr[pj]; + atom_j = &system->my_atoms[j]; + + if ( control->hbond_cut > 0.0 && ihb == 1 ) + { + /* check if j is within cutoff */ + if ( far_nbrs->far_nbr_list.d[pj] <= control->hbond_cut + && system->reax_param.sbp[atom_j->type].p_hbond == 2 ) + { + hbonds->hbond_list[ihb_top].nbr = j; + hbonds->hbond_list[ihb_top].scl = 1; + hbonds->hbond_list[ihb_top].ptr = pj; + ++ihb_top; + ++num_hbonds; + } + } + + if ( i <= j && far_nbrs->far_nbr_list.d[pj] <= control->bond_cut ) + { + type_j = atom_j->type; + sbp_j = &system->reax_param.sbp[type_j]; + twbp = &system->reax_param.tbp[type_i][type_j]; + + if ( BOp( workspace, bonds, control->bo_cut, + i, btop_i, far_nbrs->far_nbr_list.nbr[pj], + &far_nbrs->far_nbr_list.rel_box[pj], far_nbrs->far_nbr_list.d[pj], + &far_nbrs->far_nbr_list.dvec[pj], far_nbrs->format, + sbp_i, sbp_j, twbp ) ) + { + num_bonds += 2; + ++btop_i; + + /* if j is a non-local atom, push it on the queue + * to search for it's bonded neighbors later */ + if ( workspace->bond_mark[j] == 1000 ) + { + workspace->bond_mark[j] = 101; + q[ push++ ] = j; + } + } + } + } + + if ( control->hbond_cut > 0.0 && ihb == 1 ) + { + Set_End_Index( atom_i->Hindex, ihb_top, hbonds ); + } + + Set_End_Index( i, btop_i, bonds ); + } + + /* bonds that are indirectly connected to local atoms */ + for ( k = 0; k < push; ++k ) + { + i = q[k]; + workspace->bond_mark[i] -= 100; + atom_i = &system->my_atoms[i]; + type_i = atom_i->type; + btop_i = End_Index( i, bonds ); + sbp_i = &system->reax_param.sbp[type_i]; + start_i = Start_Index( i, far_nbrs ); + end_i = End_Index( i, far_nbrs ); + + for ( pj = start_i; pj < end_i; ++pj ) + { + j = far_nbrs->far_nbr_list.nbr[pj]; + + if ( workspace->bond_mark[i] == 3 + && workspace->bond_mark[j] == 1000 ) + { + continue; + } + + atom_j = &system->my_atoms[j]; + + if ( workspace->bond_mark[j] > 100 + && far_nbrs->far_nbr_list.d[pj] <= control->bond_cut ) + { + type_j = atom_j->type; + sbp_j = &system->reax_param.sbp[type_j]; + twbp = &system->reax_param.tbp[type_i][type_j]; + + if ( BOp( workspace, bonds, control->bo_cut, + i, btop_i, far_nbrs->far_nbr_list.nbr[pj], + &far_nbrs->far_nbr_list.rel_box[pj], far_nbrs->far_nbr_list.d[pj], + &far_nbrs->far_nbr_list.dvec[pj], far_nbrs->format, + sbp_i, sbp_j, twbp ) ) + { + num_bonds += 2; + ++btop_i; + + if ( workspace->bond_mark[j] == 1000 ) + { + workspace->bond_mark[j] = workspace->bond_mark[i] + 100; + + if ( workspace->bond_mark[i] < 3 ) + { + q[ push++ ] = j; + } + } + } + } + } + + Set_End_Index( i, btop_i, bonds ); + } + + workspace->realloc.num_bonds = num_bonds; + sfree( q, "Init_Bond_Full::q" ); + + workspace->realloc.num_hbonds = num_hbonds; + +#if defined(DEBUG_FOCUS) + fprintf( stderr, "p%d @ step%d: Htop = %d num_bonds = %d num_hbonds = %d\n", + system->my_rank, data->step, workspace->realloc.Htop, workspace->realloc.num_bonds, num_hbonds ); + MPI_Barrier( comm ); +#endif + +#if defined( DEBUG ) + Print_Bonds( system, bonds, "debugbonds.out" ); + Print_Bond_List2( system, bonds, "pbonds.out" ); +#endif + + Validate_Lists( system, workspace, lists, data->step, + system->n, system->N, system->numH, comm ); + +} + + +void Init_Force_Functions( control_params *control ) +{ + Interaction_Functions[0] = &BO; + Interaction_Functions[1] = &Bonds; //Dummy_Interaction; + Interaction_Functions[2] = &Atom_Energy; //Dummy_Interaction; + Interaction_Functions[3] = &Valence_Angles; //Dummy_Interaction; + Interaction_Functions[4] = &Torsion_Angles; //Dummy_Interaction; + if ( control->hbond_cut > 0.0 ) + { + Interaction_Functions[5] = &Hydrogen_Bonds; + } + else + { + Interaction_Functions[5] = &Dummy_Interaction; + } + Interaction_Functions[6] = &Dummy_Interaction; //empty + Interaction_Functions[7] = &Dummy_Interaction; //empty + Interaction_Functions[8] = &Dummy_Interaction; //empty + Interaction_Functions[9] = &Dummy_Interaction; //empty +} + + +void Compute_Bonded_Forces( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control, MPI_Comm comm ) +{ + int i; + + /* Mark beginning of a new timestep in bonded energy files */ +#if defined(TEST_ENERGY) + Debug_Marker_Bonded( out_control, data->step ); +#endif + + /* Implement all force calls as function pointers */ + for ( i = 0; i < NUM_INTRS; i++ ) + { +#if defined(DEBUG) + fprintf( stderr, "p%d: starting f%d\n", system->my_rank, i ); + MPI_Barrier( comm ); +#endif + + (Interaction_Functions[i])( system, control, data, workspace, + lists, out_control ); + +#if defined(DEBUG) + fprintf( stderr, "p%d: f%d done\n", system->my_rank, i ); + MPI_Barrier( comm ); +#endif + } +} + + +void Compute_NonBonded_Forces( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control, MPI_Comm comm ) +{ + /* Mark beginning of a new timestep in nonbonded energy files */ +#if defined(TEST_ENERGY) + Debug_Marker_Nonbonded( out_control, data->step ); +#endif + + /* van der Waals and Coulomb interactions */ + if ( control->tabulate == 0 ) + { + vdW_Coulomb_Energy( system, control, data, workspace, + lists, out_control ); + } + else + { + Tabulated_vdW_Coulomb_Energy( system, control, data, workspace, + lists, out_control ); + } + +#if defined(DEBUG) + fprintf( stderr, "p%d: nonbonded forces done\n", system->my_rank ); + MPI_Barrier( comm ); +#endif +} + + +/* this version of Compute_Total_Force computes forces from + * coefficients accumulated by all interaction functions. + * Saves enormous time & space! */ +void Compute_Total_Force( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, mpi_datatypes *mpi_data ) +{ + int i, pj; + reax_list *bonds; + + bonds = lists[BONDS]; - /* update i-j distance - check if j is within cutoff */ - for ( pj = start_i; pj < end_i; ++pj ) + for ( i = 0; i < system->N; ++i ) + { + for ( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) { - nbr_pj = &( far_nbrs->far_nbr_list[pj] ); - j = nbr_pj->nbr; - atom_j = &(system->my_atoms[j]); - //fprintf( stderr, "%d%d i=%d x_i: %f %f %f,j=%d x_j: %f %f %f, d=%f\n", - // MIN(atom_i->orig_id, atom_j->orig_id), - // MAX(atom_i->orig_id, atom_j->orig_id), - // i, atom_i->x[0], atom_i->x[1], atom_i->x[2], - // j, atom_j->x[0], atom_j->x[1], atom_j->x[2], nbr_pj->d ); - if ( renbr ) - { - if (nbr_pj->d <= cutoff) - flag = 1; - else flag = 0; - } - else + if ( i < bonds->bond_list[pj].nbr ) { - nbr_pj->dvec[0] = atom_j->x[0] - atom_i->x[0]; - nbr_pj->dvec[1] = atom_j->x[1] - atom_i->x[1]; - nbr_pj->dvec[2] = atom_j->x[2] - atom_i->x[2]; - nbr_pj->d = rvec_Norm_Sqr( nbr_pj->dvec ); - if ( nbr_pj->d <= SQR(cutoff) ) + if ( control->virial == 0 ) { - nbr_pj->d = sqrt(nbr_pj->d); - flag = 1; + Add_dBond_to_Forces( i, pj, workspace, lists ); } else { - flag = 0; + Add_dBond_to_Forces_NPT( i, pj, data, workspace, lists ); } } + } + } - if ( flag ) - { - type_j = atom_j->type; - r_ij = nbr_pj->d; - sbp_j = &(system->reax_param.sbp[type_j]); - twbp = &(system->reax_param.tbp[type_i][type_j]); + //Print_Total_Force( system, data, workspace ); - if ( local ) - { - /* H matrix entry */ - if ( j < system->n || atom_i->orig_id < atom_j->orig_id ) //tryQEq||1 - { - H->entries[Htop].j = j; - //fprintf( stdout, "%d%d %d %d\n", - // MIN(atom_i->orig_id, atom_j->orig_id), - // MAX(atom_i->orig_id, atom_j->orig_id), - // MIN(atom_i->orig_id, atom_j->orig_id), - // MAX(atom_i->orig_id, atom_j->orig_id) ); - if ( control->tabulate == 0 ) - H->entries[Htop].val = Compute_H(r_ij, twbp->gamma, workspace->Tap); - else H->entries[Htop].val = Compute_tabH(r_ij, type_i, type_j); - ++Htop; - } +#if defined(PURE_REAX) + /* now all forces are computed to their partially-final values + * based on the neighbors information each processor has had. + * final values of force on each atom needs to be computed by adding up + * all partially-final pieces */ + Coll_FS( system, mpi_data, workspace->f, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); - /* hydrogen bond lists */ - if ( control->hbond_cut > 0 && (ihb == 1 || ihb == 2) && - nbr_pj->d <= control->hbond_cut ) - { - // fprintf( stderr, "%d %d\n", atom1, atom2 ); - jhb = sbp_j->p_hbond; - if ( ihb == 1 && jhb == 2 ) - { - hbonds->hbond_list[ihb_top].nbr = j; - hbonds->hbond_list[ihb_top].scl = 1; - hbonds->hbond_list[ihb_top].ptr = nbr_pj; - ++ihb_top; - ++num_hbonds; - } - else if ( j < system->n && ihb == 2 && jhb == 1 ) - { - jhb_top = End_Index( atom_j->Hindex, hbonds ); - hbonds->hbond_list[jhb_top].nbr = i; - hbonds->hbond_list[jhb_top].scl = -1; - hbonds->hbond_list[jhb_top].ptr = nbr_pj; - Set_End_Index( atom_j->Hindex, jhb_top + 1, hbonds ); - ++num_hbonds; - } - } - } + for ( i = 0; i < system->n; ++i ) + { + rvec_Copy( system->my_atoms[i].f, workspace->f[i] ); + } - /* uncorrected bond orders */ - if ( //(workspace->bond_mark[i] < 3 || workspace->bond_mark[j] < 3) && - nbr_pj->d <= control->bond_cut && - BOp( workspace, bonds, control->bo_cut, - i , btop_i, nbr_pj, sbp_i, sbp_j, twbp ) ) - { - num_bonds += 2; - ++btop_i; +#if defined(TEST_FORCES) + Coll_FS( system, mpi_data, workspace->f_ele, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_vdw, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_be, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_lp, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_ov, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_un, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_ang, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_coa, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_pen, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_hb, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_tor, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); + Coll_FS( system, mpi_data, workspace->f_con, RVEC_PTR_TYPE, mpi_data->mpi_rvec ); +#endif - if ( workspace->bond_mark[j] > workspace->bond_mark[i] + 1 ) - workspace->bond_mark[j] = workspace->bond_mark[i] + 1; - else if ( workspace->bond_mark[i] > workspace->bond_mark[j] + 1 ) - { - workspace->bond_mark[i] = workspace->bond_mark[j] + 1; - //if( workspace->bond_mark[i] == 1000 ) - // workspace->done_after[i] = pj; - } - //fprintf( stdout, "%d%d - %d(%d) %d(%d)\n", - // i , j, i, workspace->bond_mark[i], j, workspace->bond_mark[j] ); - } - } - } +#endif +} - Set_End_Index( i, btop_i, bonds ); - if ( local ) - { - H->end[i] = Htop; - if ( ihb == 1 ) - Set_End_Index( atom_i->Hindex, ihb_top, hbonds ); - } - } - //fprintf( stderr, "after the first init loop\n" ); - /*for( i = system->n; i < system->N; ++i ) - if( workspace->bond_mark[i] > 3 ) { - start_i = Start_Index(i, bonds); - end_i = End_Index(i, bonds); - num_bonds -= (end_i - start_i); - Set_End_Index(i, start_i, bonds ); - }*/ +void Init_Forces( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control, MPI_Comm comm, mpi_datatypes *mpi_data ) +{ + double t_start, t_dist, t_cm, t_bond; + double timings[3], t_total[3]; + + t_start = MPI_Wtime( ); - /*for( i = system->n; i < system->N; ++i ) { - start_i = Start_Index(i, far_nbrs); - end_i = workspace->done_after[i]; + Init_Distance( system, control, data, workspace, lists, out_control, comm, mpi_data ); - if( workspace->bond_mark[i] >= 2 && start_i < end_i ) { - atom_i = &(system->my_atoms[i]); - type_i = atom_i->type; - btop_i = End_Index( i, bonds ); - sbp_i = &(system->reax_param.sbp[type_i]); + t_dist = MPI_Wtime( ); - for( pj = start_i; pj < end_i; ++pj ) { - nbr_pj = &( far_nbrs->far_nbr_list[pj] ); - j = nbr_pj->nbr; +#if defined(NEUTRAL_TERRITORY) + if ( workspace->H->format == SYM_HALF_MATRIX ) + { + Init_CM_Half_NT( system, control, data, workspace, lists, out_control, comm, mpi_data ); + } + else + { + Init_CM_Full_NT( system, control, data, workspace, lists, out_control, comm, mpi_data ); + } +#else + if ( workspace->H->format == SYM_HALF_MATRIX ) + { + Init_CM_Half_FS( system, control, data, workspace, lists, out_control, comm, mpi_data ); + } + else + { + Init_CM_Full_FS( system, control, data, workspace, lists, out_control, comm, mpi_data ); + } +#endif - if( workspace->bond_mark[j] >= 2 && nbr_pj->d <= control->bond_cut ) { - atom_j = &(system->my_atoms[j]); - type_j = atom_j->type; - sbp_j = &(system->reax_param.sbp[type_j]); - twbp = &(system->reax_param.tbp[type_i][type_j]); + t_cm = MPI_Wtime(); - if( BOp( workspace, bonds, control->bo_cut, - i , btop_i, nbr_pj, sbp_i, sbp_j, twbp ) ) { - num_bonds += 2; - ++btop_i; + if ( lists[FAR_NBRS]->format == HALF_LIST ) + { + Init_Bond_Half( system, control, data, workspace, lists, out_control, comm, mpi_data ); + } + else + { + Init_Bond_Full( system, control, data, workspace, lists, out_control, comm, mpi_data ); + } - if( workspace->bond_mark[j] > workspace->bond_mark[i] + 1 ) - workspace->bond_mark[j] = workspace->bond_mark[i] + 1; - else if( workspace->bond_mark[i] > workspace->bond_mark[j] + 1 ) - workspace->bond_mark[i] = workspace->bond_mark[j] + 1; + t_bond = MPI_Wtime(); - //fprintf( stdout, "%d%d - %d(%d) %d(%d) new\n", - // i , j, i, workspace->bond_mark[i], j, workspace->bond_mark[j] ); - } - } - } - Set_End_Index( i, btop_i, bonds ); - } - }*/ + timings[0] = t_dist - t_start; + timings[1] = t_cm - t_dist; + timings[2] = t_bond - t_cm; - workspace->realloc.Htop = Htop; - workspace->realloc.num_bonds = num_bonds; - workspace->realloc.num_hbonds = num_hbonds; + MPI_Reduce( timings, t_total, 3, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world ); -#if defined(DEBUG_FOCUS) - fprintf( stderr, "p%d @ step%d: Htop = %d num_bonds = %d num_hbonds = %d\n", - system->my_rank, data->step, Htop, num_bonds, num_hbonds ); - MPI_Barrier( comm ); -#endif -#if defined( DEBUG ) - Print_Bonds( system, bonds, "debugbonds.out" ); - Print_Bond_List2( system, bonds, "pbonds.out" ); - Print_Sparse_Matrix( system, H ); - for ( i = 0; i < H->n; ++i ) - for ( j = H->start[i]; j < H->end[i]; ++j ) - fprintf( stderr, "%d %d %.15e\n", - MIN(system->my_atoms[i].orig_id, - system->my_atoms[H->entries[j].j].orig_id), - MAX(system->my_atoms[i].orig_id, - system->my_atoms[H->entries[j].j].orig_id), - H->entries[j].val ); -#endif + if ( system->my_rank == MASTER_NODE ) + { + data->timing.init_dist += t_total[0] / control->nprocs; + data->timing.init_cm += t_total[1] / control->nprocs; + data->timing.init_bond += t_total[2] / control->nprocs; + } - Validate_Lists( system, workspace, lists, data->step, - system->n, system->N, system->numH, comm ); } +//void Init_Forces( reax_system *system, control_params *control, +// simulation_data *data, storage *workspace, reax_list **lists, +// output_controls *out_control, MPI_Comm comm, mpi_datatypes *mpi_data ) +//{ +// int i, j, pj; +// int start_i, end_i; +// int type_i, type_j; +// int Htop, btop_i, num_bonds, num_hbonds; +// int ihb, jhb, ihb_top; +// int local, flag, renbr; +// real r_ij, cutoff; +// sparse_matrix *H; +// reax_list *far_nbrs, *bonds, *hbonds; +// single_body_parameters *sbp_i, *sbp_j; +// two_body_parameters *twbp; +// reax_atom *atom_i, *atom_j; +// int jhb_top; +// int start_j, end_j; +// int btop_j; +//#if defined(NEUTRAL_TERRITORY) +// int mark[6]; +// int total_cnt[6]; +// int bin[6]; +// int total_sum[6]; +// int nt_flag; +//#endif +// +// far_nbrs = lists[FAR_NBRS]; +// bonds = lists[BONDS]; +// hbonds = lists[HBONDS]; +// +// +// for ( i = 0; i < system->n; ++i ) +// workspace->bond_mark[i] = 0; +// for ( i = system->n; i < system->N; ++i ) +// { +// /* put ghost atoms to an infinite distance (i.e., 1000) */ +// workspace->bond_mark[i] = 1000; +// } +// +// H = workspace->H; +// H->n = system->n; +// Htop = 0; +// num_bonds = 0; +// num_hbonds = 0; +// btop_i = 0; +// renbr = (data->step - data->prev_steps) % control->reneighbor == 0; +// +//#if defined(NEUTRAL_TERRITORY) +// nt_flag = 1; +// if( renbr ) +// { +// for ( i = 0; i < 6; ++i ) +// { +// total_cnt[i] = 0; +// bin[i] = 0; +// total_sum[i] = 0; +// } +// +// for ( i = system->n; i < system->N; ++i ) +// { +// atom_i = &system->my_atoms[i]; +// +// if( atom_i->nt_dir != -1 ) +// { +// total_cnt[ atom_i->nt_dir ]++; +// } +// } +// +// total_sum[0] = system->n; +// for ( i = 1; i < 6; ++i ) +// { +// total_sum[i] = total_sum[i-1] + total_cnt[i-1]; +// } +// +// for ( i = system->n; i < system->N; ++i ) +// { +// atom_i = &system->my_atoms[i]; +// +// if( atom_i->nt_dir != -1 ) +// { +// atom_i->pos = total_sum[ atom_i->nt_dir ] + bin[ atom_i->nt_dir ]; +// bin[ atom_i->nt_dir ]++; +// } +// } +// H->NT = total_sum[5] + total_cnt[5]; +// } +// +// mark[0] = mark[1] = 1; +// mark[2] = mark[3] = mark[4] = mark[5] = 2; +//#endif +// +// for ( i = 0; i < system->N; ++i ) +// { +// atom_i = &system->my_atoms[i]; +// type_i = atom_i->type; +// start_i = Start_Index(i, far_nbrs); +// end_i = End_Index(i, far_nbrs); +// +// if ( far_nbrs->format == HALF_LIST ) +// { +// // start at end because other atoms +// // can add to this atom's list (half-list) +// btop_i = End_Index( i, bonds ); +// } +// else if ( far_nbrs->format == FULL_LIST ) +// { +// btop_i = Start_Index( i, bonds ); +// } +// sbp_i = &system->reax_param.sbp[type_i]; +// +// if ( i < system->n ) +// { +// local = 1; +// cutoff = control->nonb_cut; +// } +//#if defined(NEUTRAL_TERRITORY) +// else if ( atom_i->nt_dir != -1 ) +// { +// local = 2; +// cutoff = control->nonb_cut; +// nt_flag = 0; +// } +//#endif +// else +// { +// local = 0; +// cutoff = control->bond_cut; +// } +// +// ihb = -1; +// ihb_top = -1; +// if ( local == 1 ) +// { +// H->start[i] = Htop; +// H->entries[Htop].j = i; +// H->entries[Htop].val = sbp_i->eta; +// ++Htop; +// +// if ( control->hbond_cut > 0 ) +// { +// ihb = sbp_i->p_hbond; +// if ( ihb == 1 ) +// { +// if ( far_nbrs->format == HALF_LIST ) +// { +// // start at end because other atoms +// // can add to this atom's list (half-list) +// ihb_top = End_Index( atom_i->Hindex, hbonds ); +// } +// else if ( far_nbrs->format == FULL_LIST ) +// { +// ihb_top = Start_Index( atom_i->Hindex, hbonds ); +// } +// } +// else +// { +// ihb_top = -1; +// } +// } +// } +// +// // update i-j distance - check if j is within cutoff +// for ( pj = start_i; pj < end_i; ++pj ) +// { +// j = far_nbrs->far_nbr_list.nbr[pj]; +// atom_j = &system->my_atoms[j]; +// +// if ( renbr ) +// { +// if ( far_nbrs->far_nbr_list.d[pj] <= cutoff ) +// flag = 1; +// else +// flag = 0; +// } +// else +// { +// far_nbrs->far_nbr_list.dvec[pj][0] = atom_j->x[0] - atom_i->x[0]; +// far_nbrs->far_nbr_list.dvec[pj][1] = atom_j->x[1] - atom_i->x[1]; +// far_nbrs->far_nbr_list.dvec[pj][2] = atom_j->x[2] - atom_i->x[2]; +// far_nbrs->far_nbr_list.d[pj] = rvec_Norm_Sqr( far_nbrs->far_nbr_list.dvec[pj] ); +// +// if ( far_nbrs->far_nbr_list.d[pj] <= SQR(cutoff) ) +// { +// far_nbrs->far_nbr_list.d[pj] = sqrt( far_nbrs->far_nbr_list.d[pj] ); +// flag = 1; +// } +// else +// { +// flag = 0; +// } +// } +// +// if ( flag ) +// { +// type_j = atom_j->type; +// r_ij = far_nbrs->far_nbr_list.d[pj]; +// sbp_j = &system->reax_param.sbp[type_j]; +// twbp = &system->reax_param.tbp[type_i][type_j]; +// +// if ( local == 1 ) +// { +// // H matrix entry +//#if defined(NEUTRAL_TERRITORY) +// if ( atom_j->nt_dir > 0 || (j < system->n +// && (H->format == SYM_FULL_MATRIX +// || (H->format == SYM_HALF_MATRIX && i < j))) ) +// { +// if( j < system->n ) +// { +// H->entries[Htop].j = j; +// } +// else +// { +// H->entries[Htop].j = atom_j->pos; +// } +// +// if ( control->tabulate == 0 ) +// { +// H->entries[Htop].val = Compute_H(r_ij, twbp->gamma, workspace->Tap); +// } +// else +// { +// H->entries[Htop].val = Compute_tabH(r_ij, type_i, type_j); +// } +// +// ++Htop; +// } +//#else +// if ( (far_nbrs->format == HALF_LIST +// && (j < system->n || atom_i->orig_id < atom_j->orig_id)) +// || far_nbrs->format == FULL_LIST ) +// { +// H->entries[Htop].j = j; +// +// if ( control->tabulate == 0 ) +// { +// H->entries[Htop].val = Compute_H(r_ij, twbp->gamma, workspace->Tap); +// } +// else +// { +// H->entries[Htop].val = Compute_tabH(r_ij, type_i, type_j); +// } +// +// ++Htop; +// } +//#endif +// +// // hydrogen bond lists +// if ( control->hbond_cut > 0.0 +// && (ihb == 1 || ihb == 2) +// && far_nbrs->far_nbr_list.d[pj] <= control->hbond_cut ) +// { +// // fprintf( stderr, "%d %d\n", atom1, atom2 ); +// jhb = sbp_j->p_hbond; +// if ( ihb == 1 && jhb == 2 ) +// { +// hbonds->hbond_list[ihb_top].nbr = j; +// hbonds->hbond_list[ihb_top].scl = 1; +// hbonds->hbond_list[ihb_top].ptr = pj; +// ++ihb_top; +// ++num_hbonds; +// } +// // only add to list for local j (far nbrs is half-list) +// else if ( far_nbrs->format == HALF_LIST +// && (j < system->n && ihb == 2 && jhb == 1) ) +// { +// jhb_top = End_Index( atom_j->Hindex, hbonds ); +// hbonds->hbond_list[jhb_top].nbr = i; +// hbonds->hbond_list[jhb_top].scl = -1; +// hbonds->hbond_list[jhb_top].ptr = pj; +// Set_End_Index( atom_j->Hindex, jhb_top + 1, hbonds ); +// ++num_hbonds; +// } +// } +// } +//#if defined(NEUTRAL_TERRITORY) +// else if ( local == 2 ) +// { +// // H matrix entry +// if( ( atom_j->nt_dir != -1 && mark[atom_i->nt_dir] != mark[atom_j->nt_dir] +// && ( H->format == SYM_FULL_MATRIX +// || (H->format == SYM_HALF_MATRIX && atom_i->pos < atom_j->pos))) +// || ( j < system->n && atom_i->nt_dir != 0 && H->format == SYM_FULL_MATRIX )) +// { +// if( !nt_flag ) +// { +// nt_flag = 1; +// H->start[atom_i->pos] = Htop; +// } +// +// if( j < system->n ) +// { +// H->entries[Htop].j = j; +// } +// else +// { +// H->entries[Htop].j = atom_j->pos; +// } +// +// if ( control->tabulate == 0 ) +// { +// H->entries[Htop].val = Compute_H(r_ij, twbp->gamma, workspace->Tap); +// } +// else +// { +// H->entries[Htop].val = Compute_tabH(r_ij, type_i, type_j); +// } +// +// ++Htop; +// } +// } +//#endif +// +// // uncorrected bond orders +// if ( //(workspace->bond_mark[i] < 3 || workspace->bond_mark[j] < 3) && +// far_nbrs->far_nbr_list.d[pj] <= control->bond_cut +// && BOp( workspace, bonds, control->bo_cut, +// i, btop_i, far_nbrs->far_nbr_list.nbr[pj], +// &far_nbrs->far_nbr_list.rel_box[pj], far_nbrs->far_nbr_list.d[pj], +// &far_nbrs->far_nbr_list.dvec[pj], far_nbrs->format, +// sbp_i, sbp_j, twbp ) ) +// { +// num_bonds += 2; +// ++btop_i; +// +// if ( workspace->bond_mark[j] > workspace->bond_mark[i] + 1 ) +// workspace->bond_mark[j] = workspace->bond_mark[i] + 1; +// else if ( workspace->bond_mark[i] > workspace->bond_mark[j] + 1 ) +// { +// workspace->bond_mark[i] = workspace->bond_mark[j] + 1; +// } +// } +// } +// } +// +// Set_End_Index( i, btop_i, bonds ); +// if ( local == 1 ) +// { +// H->end[i] = Htop; +// if ( ihb == 1 ) +// Set_End_Index( atom_i->Hindex, ihb_top, hbonds ); +// } +//#if defined(NEUTRAL_TERRITORY) +// else if ( local == 2 ) +// { +// if( nt_flag ) +// { +// H->end[atom_i->pos] = Htop; +// } +// else +// { +// H->start[atom_i->pos] = 0; +// H->end[atom_i->pos] = 0; +// } +// } +//#endif +// } +// +// if ( far_nbrs->format == FULL_LIST ) +// { +// +// for( i = 0; i < system->N; ++i ) +// qsort( &bonds->bond_list[Start_Index(i, bonds)], +// Num_Entries(i, bonds), sizeof(bond_data), compare_bonds ); +// +// // set sym_index for bonds list (far_nbrs full list) +// for ( i = 0; i < system->N; ++i ) +// { +// start_i = Start_Index( i, bonds ); +// end_i = End_Index( i, bonds ); +// +// for ( btop_i = start_i; btop_i < end_i; ++btop_i ) +// { +// j = bonds->bond_list[btop_i].nbr; +// start_j = Start_Index( j, bonds ); +// end_j = End_Index( j, bonds ); +// +// for ( btop_j = start_j; btop_j < end_j; ++btop_j ) +// { +// if ( bonds->bond_list[btop_j].nbr == i ) +// { +// bonds->bond_list[btop_i].sym_index = btop_j; +// break; +// } +// } +// } +// } +// } +// +//#if defined(DEBUG) +// Print_Sparse_Matrix2( system, H, NULL ); +//#endif +// +// workspace->realloc.Htop = Htop; +// workspace->realloc.num_bonds = num_bonds; +// workspace->realloc.num_hbonds = num_hbonds; +// +//#if defined(DEBUG_FOCUS) +// fprintf( stderr, "p%d @ step%d: Htop = %d num_bonds = %d num_hbonds = %d\n", +// system->my_rank, data->step, Htop, num_bonds, num_hbonds ); +// MPI_Barrier( comm ); +//#endif +// +//#if defined( DEBUG ) +// Print_Bonds( system, bonds, "debugbonds.out" ); +// Print_Bond_List2( system, bonds, "pbonds.out" ); +// Print_Sparse_Matrix( system, H ); +// for ( i = 0; i < H->n; ++i ) +// for ( j = H->start[i]; j < H->end[i]; ++j ) +// fprintf( stderr, "%d %d %.15e\n", +// MIN(system->my_atoms[i].orig_id, +// system->my_atoms[H->entries[j].j].orig_id), +// MAX(system->my_atoms[i].orig_id, +// system->my_atoms[H->entries[j].j].orig_id), +// H->entries[j].val ); +//#endif +// +// Validate_Lists( system, workspace, lists, data->step, +// system->n, system->N, system->numH, comm ); +// +//} + + void Init_Forces_noQEq( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control, - MPI_Comm comm ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, MPI_Comm comm ) { int i, j, pj; int start_i, end_i; int type_i, type_j; - int btop_i, btop_j, num_bonds, num_hbonds; - int ihb, jhb, ihb_top, jhb_top; + int btop_i, num_bonds, num_hbonds; + int ihb, jhb, ihb_top; int local, flag, renbr; real r_ij, cutoff; reax_list *far_nbrs, *bonds, *hbonds; single_body_parameters *sbp_i, *sbp_j; two_body_parameters *twbp; - far_neighbor_data *nbr_pj; reax_atom *atom_i, *atom_j; + int jhb_top; + int start_j, end_j; + int btop_j; far_nbrs = lists[FAR_NBRS]; bonds = lists[BONDS]; hbonds = lists[HBONDS]; for ( i = 0; i < system->n; ++i ) + { workspace->bond_mark[i] = 0; + } for ( i = system->n; i < system->N; ++i ) { - workspace->bond_mark[i] = 1000; // put ghost atoms to an infinite distance - //workspace->done_after[i] = Start_Index( i, far_nbrs ); + /* put ghost atoms to an infinite distance (i.e., 1000) */ + workspace->bond_mark[i] = 1000; } num_bonds = 0; num_hbonds = 0; - btop_i = btop_j = 0; + btop_i = 0; renbr = (data->step - data->prev_steps) % control->reneighbor == 0; for ( i = 0; i < system->N; ++i ) @@ -628,7 +1909,16 @@ void Init_Forces_noQEq( reax_system *system, control_params *control, type_i = atom_i->type; start_i = Start_Index(i, far_nbrs); end_i = End_Index(i, far_nbrs); - btop_i = End_Index( i, bonds ); + if ( far_nbrs->format == HALF_LIST ) + { + /* start at end because other atoms + * can add to this atom's list (half-list) */ + btop_i = End_Index( i, bonds ); + } + else if ( far_nbrs->format == FULL_LIST ) + { + btop_i = Start_Index( i, bonds ); + } sbp_i = &(system->reax_param.sbp[type_i]); if ( i < system->n ) @@ -648,32 +1938,51 @@ void Init_Forces_noQEq( reax_system *system, control_params *control, { ihb = sbp_i->p_hbond; if ( ihb == 1 ) - ihb_top = End_Index( atom_i->Hindex, hbonds ); - else ihb_top = -1; + { + if ( far_nbrs->format == HALF_LIST ) + { + /* start at end because other atoms + * can add to this atom's list (half-list) */ + ihb_top = End_Index( atom_i->Hindex, hbonds ); + } + else if ( far_nbrs->format == FULL_LIST ) + { + ihb_top = Start_Index( atom_i->Hindex, hbonds ); + } + } + else + { + ihb_top = -1; + } } /* update i-j distance - check if j is within cutoff */ for ( pj = start_i; pj < end_i; ++pj ) { - nbr_pj = &( far_nbrs->far_nbr_list[pj] ); - j = nbr_pj->nbr; - atom_j = &(system->my_atoms[j]); + j = far_nbrs->far_nbr_list.nbr[pj]; + atom_j = &system->my_atoms[j]; if ( renbr ) { - if ( nbr_pj->d <= cutoff ) + if ( far_nbrs->far_nbr_list.d[pj] <= cutoff ) + { flag = 1; - else flag = 0; + } + else + { + flag = 0; + } } else { - nbr_pj->dvec[0] = atom_j->x[0] - atom_i->x[0]; - nbr_pj->dvec[1] = atom_j->x[1] - atom_i->x[1]; - nbr_pj->dvec[2] = atom_j->x[2] - atom_i->x[2]; - nbr_pj->d = rvec_Norm_Sqr( nbr_pj->dvec ); - if ( nbr_pj->d <= SQR(cutoff) ) + far_nbrs->far_nbr_list.dvec[pj][0] = atom_j->x[0] - atom_i->x[0]; + far_nbrs->far_nbr_list.dvec[pj][1] = atom_j->x[1] - atom_i->x[1]; + far_nbrs->far_nbr_list.dvec[pj][2] = atom_j->x[2] - atom_i->x[2]; + far_nbrs->far_nbr_list.d[pj] = rvec_Norm_Sqr( far_nbrs->far_nbr_list.dvec[pj] ); + + if ( far_nbrs->far_nbr_list.d[pj] <= SQR(cutoff) ) { - nbr_pj->d = sqrt(nbr_pj->d); + far_nbrs->far_nbr_list.d[pj] = sqrt( far_nbrs->far_nbr_list.d[pj] ); flag = 1; } else @@ -685,15 +1994,16 @@ void Init_Forces_noQEq( reax_system *system, control_params *control, if ( flag ) { type_j = atom_j->type; - r_ij = nbr_pj->d; - sbp_j = &(system->reax_param.sbp[type_j]); - twbp = &(system->reax_param.tbp[type_i][type_j]); + r_ij = far_nbrs->far_nbr_list.d[pj]; + sbp_j = &system->reax_param.sbp[type_j]; + twbp = &system->reax_param.tbp[type_i][type_j]; if ( local ) { /* hydrogen bond lists */ - if ( control->hbond_cut > 0 && (ihb == 1 || ihb == 2) && - nbr_pj->d <= control->hbond_cut ) + if ( control->hbond_cut > 0.0 + && (ihb == 1 || ihb == 2) + && far_nbrs->far_nbr_list.d[pj] <= control->hbond_cut ) { // fprintf( stderr, "%d %d\n", atom1, atom2 ); jhb = sbp_j->p_hbond; @@ -701,16 +2011,18 @@ void Init_Forces_noQEq( reax_system *system, control_params *control, { hbonds->hbond_list[ihb_top].nbr = j; hbonds->hbond_list[ihb_top].scl = 1; - hbonds->hbond_list[ihb_top].ptr = nbr_pj; + hbonds->hbond_list[ihb_top].ptr = pj; ++ihb_top; ++num_hbonds; } - else if ( j < system->n && ihb == 2 && jhb == 1 ) + /* only add to list for local j (far nbrs is half-list) */ + else if ( far_nbrs->format == HALF_LIST + && (j < system->n && ihb == 2 && jhb == 1) ) { jhb_top = End_Index( atom_j->Hindex, hbonds ); hbonds->hbond_list[jhb_top].nbr = i; hbonds->hbond_list[jhb_top].scl = -1; - hbonds->hbond_list[jhb_top].ptr = nbr_pj; + hbonds->hbond_list[jhb_top].ptr = pj; Set_End_Index( atom_j->Hindex, jhb_top + 1, hbonds ); ++num_hbonds; } @@ -720,9 +2032,12 @@ void Init_Forces_noQEq( reax_system *system, control_params *control, /* uncorrected bond orders */ if ( //(workspace->bond_mark[i] < 3 || workspace->bond_mark[j] < 3) && - nbr_pj->d <= control->bond_cut && - BOp( workspace, bonds, control->bo_cut, - i , btop_i, nbr_pj, sbp_i, sbp_j, twbp ) ) + far_nbrs->far_nbr_list.d[pj] <= control->bond_cut + && BOp( workspace, bonds, control->bo_cut, + i, btop_i, far_nbrs->far_nbr_list.nbr[pj], + &far_nbrs->far_nbr_list.rel_box[pj], far_nbrs->far_nbr_list.d[pj], + &far_nbrs->far_nbr_list.dvec[pj], far_nbrs->format, + sbp_i, sbp_j, twbp ) ) { num_bonds += 2; ++btop_i; @@ -746,13 +2061,31 @@ void Init_Forces_noQEq( reax_system *system, control_params *control, Set_End_Index( atom_i->Hindex, ihb_top, hbonds ); } - /*for( i = system->n; i < system->N; ++i ) - if( workspace->bond_mark[i] > 3 ) { - start_i = Start_Index(i, bonds); - end_i = End_Index(i, bonds); - num_bonds -= (end_i - start_i); - Set_End_Index(i, start_i, bonds ); - }*/ + if ( far_nbrs->format == FULL_LIST ) + { + /* set sym_index for bonds list (far_nbrs full list) */ + for ( i = 0; i < system->N; ++i ) + { + start_i = Start_Index( i, bonds ); + end_i = End_Index( i, bonds ); + + for ( btop_i = start_i; btop_i < end_i; ++btop_i ) + { + j = bonds->bond_list[btop_i].nbr; + start_j = Start_Index( j, bonds ); + end_j = End_Index( j, bonds ); + + for ( btop_j = start_j; btop_j < end_j; ++btop_j ) + { + if ( bonds->bond_list[btop_j].nbr == i ) + { + bonds->bond_list[btop_i].sym_index = btop_j; + break; + } + } + } + } + } workspace->realloc.num_bonds = num_bonds; workspace->realloc.num_hbonds = num_hbonds; @@ -768,13 +2101,14 @@ void Init_Forces_noQEq( reax_system *system, control_params *control, #endif Validate_Lists( system, workspace, lists, data->step, - system->n, system->N, system->numH, comm ); + system->n, system->N, system->numH, comm ); } void Estimate_Storages( reax_system *system, control_params *control, - reax_list **lists, int *Htop, int *hb_top, - int *bond_top, int *num_3body, MPI_Comm comm ) + reax_list **lists, int *Htop, int *hb_top, + int *bond_top, int *num_3body, MPI_Comm comm, + int *matrix_dim, int cm_format ) { int i, j, pj; int start_i, end_i; @@ -788,30 +2122,44 @@ void Estimate_Storages( reax_system *system, control_params *control, reax_list *far_nbrs; single_body_parameters *sbp_i, *sbp_j; two_body_parameters *twbp; - far_neighbor_data *nbr_pj; reax_atom *atom_i, *atom_j; far_nbrs = lists[FAR_NBRS]; *Htop = 0; + *matrix_dim = 0; memset( hb_top, 0, sizeof(int) * system->local_cap ); memset( bond_top, 0, sizeof(int) * system->total_cap ); *num_3body = 0; +#if defined(NEUTRAL_TERRITORY) + int mark[6] = {1, 1, 2, 2, 2, 2}; +#endif + for ( i = 0; i < system->N; ++i ) { - atom_i = &(system->my_atoms[i]); + atom_i = &system->my_atoms[i]; type_i = atom_i->type; start_i = Start_Index(i, far_nbrs); - end_i = End_Index(i, far_nbrs); - sbp_i = &(system->reax_param.sbp[type_i]); + end_i = End_Index(i, far_nbrs); + sbp_i = &system->reax_param.sbp[type_i]; if ( i < system->n ) { local = 1; cutoff = control->nonb_cut; ++(*Htop); + ++(*matrix_dim); ihb = sbp_i->p_hbond; } +#if defined(NEUTRAL_TERRITORY) + else if ( atom_i->nt_dir != -1 ) + { + local = 2; + cutoff = control->nonb_cut; + ++(*matrix_dim); + ihb = -1; + } +#endif else { local = 0; @@ -821,36 +2169,71 @@ void Estimate_Storages( reax_system *system, control_params *control, for ( pj = start_i; pj < end_i; ++pj ) { - nbr_pj = &( far_nbrs->far_nbr_list[pj] ); - j = nbr_pj->nbr; - atom_j = &(system->my_atoms[j]); + j = far_nbrs->far_nbr_list.nbr[pj]; + +#if !defined(NEUTRAL_TERRITORY) + if ( far_nbrs->format == HALF_LIST ) +#endif + { + atom_j = &system->my_atoms[j]; + } - if (nbr_pj->d <= cutoff) + if ( far_nbrs->far_nbr_list.d[pj] <= cutoff ) { type_j = system->my_atoms[j].type; - r_ij = nbr_pj->d; - sbp_j = &(system->reax_param.sbp[type_j]); - twbp = &(system->reax_param.tbp[type_i][type_j]); + r_ij = far_nbrs->far_nbr_list.d[pj]; + sbp_j = &system->reax_param.sbp[type_j]; + twbp = &system->reax_param.tbp[type_i][type_j]; - if ( local ) + if ( local == 1 ) { - if ( j < system->n || atom_i->orig_id < atom_j->orig_id ) //tryQEq ||1 +#if defined(NEUTRAL_TERRITORY) + if( atom_j->nt_dir > 0 || j < system->n ) + { + ++(*Htop); + } +#else + if ( (far_nbrs->format == HALF_LIST + && (j < system->n || atom_i->orig_id < atom_j->orig_id)) + || far_nbrs->format == FULL_LIST ) + { ++(*Htop); + } +#endif /* hydrogen bond lists */ - if ( control->hbond_cut > 0.1 && (ihb == 1 || ihb == 2) && - nbr_pj->d <= control->hbond_cut ) + if ( control->hbond_cut > 0.1 + && (ihb == 1 || ihb == 2) + && far_nbrs->far_nbr_list.d[pj] <= control->hbond_cut ) { jhb = sbp_j->p_hbond; + if ( ihb == 1 && jhb == 2 ) + { ++hb_top[i]; - else if ( j < system->n && ihb == 2 && jhb == 1 ) + } + /* only add to list for local j (far nbrs is half-list) */ + else if ( far_nbrs->format == HALF_LIST + && (j < system->n && ihb == 2 && jhb == 1) ) + { ++hb_top[j]; + } + } + } + +#if defined(NEUTRAL_TERRITORY) + else if ( local == 2 ) + { + if( ( atom_j->nt_dir != -1 && mark[atom_i->nt_dir] != mark[atom_j->nt_dir] ) + || ( j < system->n && atom_i->nt_dir != 0 )) + { + ++(*Htop); } } +#endif /* uncorrected bond orders */ - if ( nbr_pj->d <= control->bond_cut ) + if ( far_nbrs->far_nbr_list.d[pj] <= control->bond_cut ) { r2 = SQR(r_ij); @@ -859,21 +2242,33 @@ void Estimate_Storages( reax_system *system, control_params *control, C12 = twbp->p_bo1 * pow( r_ij / twbp->r_s, twbp->p_bo2 ); BO_s = (1.0 + control->bo_cut) * exp( C12 ); } - else BO_s = C12 = 0.0; + else + { + C12 = 0.0; + BO_s = 0.0; + } if ( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0) { C34 = twbp->p_bo3 * pow( r_ij / twbp->r_p, twbp->p_bo4 ); BO_pi = exp( C34 ); } - else BO_pi = C34 = 0.0; + else + { + C34 = 0.0; + BO_pi = 0.0; + } if ( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0) { C56 = twbp->p_bo5 * pow( r_ij / twbp->r_pp, twbp->p_bo6 ); BO_pi2 = exp( C56 ); } - else BO_pi2 = C56 = 0.0; + else + { + C56 = 0.0; + BO_pi2 = 0.0; + } /* Initially BO values are the uncorrected ones, page 1 */ BO = BO_s + BO_pi + BO_pi2; @@ -881,137 +2276,187 @@ void Estimate_Storages( reax_system *system, control_params *control, if ( BO >= control->bo_cut ) { ++bond_top[i]; - ++bond_top[j]; + if ( far_nbrs->format == HALF_LIST ) + { + ++bond_top[j]; + } } } } } } - *Htop = (int)(MAX( *Htop * SAFE_ZONE, MIN_CAP * MIN_HENTRIES )); +#if defined(NEUTRAL_TERRITORY) + /* Since we don't know the NT atoms' position yet, Htop cannot be calculated accurately. + * Therefore, we assume it is full and divide 2 if necessary. */ + if ( cm_format == SYM_HALF_MATRIX ) + { + *Htop = (*Htop + system->n + 1) / 2; + } +#endif + +#if defined(NEUTRAL_TERRITORY) + *matrix_dim = (int) MAX( *matrix_dim * SAFE_ZONE_NT, MIN_CAP ); + *Htop = (int) MAX( *Htop * SAFE_ZONE_NT, MIN_CAP * MIN_HENTRIES ); +#else + *matrix_dim = (int) MAX( *matrix_dim * SAFE_ZONE, MIN_CAP ); + *Htop = (int) MAX( *Htop * SAFE_ZONE, MIN_CAP * MIN_HENTRIES ); +#endif + for ( i = 0; i < system->n; ++i ) - hb_top[i] = (int)(MAX( hb_top[i] * SAFER_ZONE, MIN_HBONDS )); + { + hb_top[i] = (int) MAX( hb_top[i] * SAFER_ZONE, MIN_HBONDS ); + } for ( i = 0; i < system->N; ++i ) { - *num_3body += SQR(bond_top[i]); - //if( i < system->n ) + *num_3body += SQR( bond_top[i] ); + //TODO: why x2? bond_top[i] = MAX( bond_top[i] * 2, MIN_BONDS ); - //else bond_top[i] = MAX_BONDS; } #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d @ estimate storages: Htop = %d, num_3body = %d\n", - system->my_rank, *Htop, *num_3body ); + system->my_rank, *Htop, *num_3body ); MPI_Barrier( comm ); #endif } void Compute_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control, - mpi_datatypes *mpi_data ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + mpi_datatypes *mpi_data ) { MPI_Comm comm; int qeq_flag; #if defined(LOG_PERFORMANCE) - real t_start = 0; + real t_start = 0.0, t_end; - //MPI_Barrier( mpi_data->world ); if ( system->my_rank == MASTER_NODE ) - t_start = Get_Time( ); + { + t_start = MPI_Wtime(); + } #endif comm = mpi_data->world; + /********* init forces ************/ #if defined(PURE_REAX) if ( control->charge_freq && (data->step - data->prev_steps) % control->charge_freq == 0 ) + { qeq_flag = 1; - else qeq_flag = 0; + } + else + { + qeq_flag = 0; + } #elif defined(LAMMPS_REAX) qeq_flag = 0; #endif if ( qeq_flag ) - Init_Forces( system, control, data, workspace, lists, out_control, comm ); + { + Init_Forces( system, control, data, workspace, lists, out_control, comm, mpi_data ); + } else + { Init_Forces_noQEq( system, control, data, workspace, - lists, out_control, comm ); + lists, out_control, comm ); + } #if defined(LOG_PERFORMANCE) //MPI_Barrier( mpi_data->world ); if ( system->my_rank == MASTER_NODE ) - Update_Timing_Info( &t_start, &(data->timing.init_forces) ); + { + t_end = MPI_Wtime( ); + data->timing.init_forces += t_end - t_start; + t_start = t_end; + } #endif - /********* bonded interactions ************/ Compute_Bonded_Forces( system, control, data, workspace, - lists, out_control, mpi_data->world ); + lists, out_control, mpi_data->world ); #if defined(LOG_PERFORMANCE) //MPI_Barrier( mpi_data->world ); if ( system->my_rank == MASTER_NODE ) - Update_Timing_Info( &t_start, &(data->timing.bonded) ); + { + t_end = MPI_Wtime( ); + data->timing.bonded += t_end - t_start; + t_start = t_end; + } #endif + #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d @ step%d: completed bonded\n", system->my_rank, data->step ); MPI_Barrier( mpi_data->world ); #endif - /**************** qeq ************************/ #if defined(PURE_REAX) if ( qeq_flag ) + { QEq( system, control, data, workspace, out_control, mpi_data ); + } #if defined(LOG_PERFORMANCE) - //MPI_Barrier( mpi_data->world ); if ( system->my_rank == MASTER_NODE ) - Update_Timing_Info( &t_start, &data->timing.cm ); + { + t_end = MPI_Wtime( ); + data->timing.cm += t_end - t_start; + t_start = t_end; + } #endif + #if defined(DEBUG_FOCUS) fprintf(stderr, "p%d @ step%d: qeq completed\n", system->my_rank, data->step); MPI_Barrier( mpi_data->world ); #endif #endif //PURE_REAX - /********* nonbonded interactions ************/ Compute_NonBonded_Forces( system, control, data, workspace, - lists, out_control, mpi_data->world ); + lists, out_control, mpi_data->world ); #if defined(LOG_PERFORMANCE) - //MPI_Barrier( mpi_data->world ); if ( system->my_rank == MASTER_NODE ) - Update_Timing_Info( &t_start, &(data->timing.nonb) ); + { + t_end = MPI_Wtime( ); + data->timing.nonb += t_end - t_start; + t_start = t_end; + } #endif + #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d @ step%d: nonbonded forces completed\n", system->my_rank, data->step ); MPI_Barrier( mpi_data->world ); #endif - /*********** total force ***************/ Compute_Total_Force( system, control, data, workspace, lists, mpi_data ); #if defined(LOG_PERFORMANCE) - //MPI_Barrier( mpi_data->world ); if ( system->my_rank == MASTER_NODE ) - Update_Timing_Info( &t_start, &(data->timing.bonded) ); + { + t_end = MPI_Wtime( ); + data->timing.bonded += t_end - t_start; + } #endif + #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d @ step%d: total forces computed\n", system->my_rank, data->step ); + //Print_Total_Force( system, data, workspace ); MPI_Barrier( mpi_data->world ); #endif #if defined(TEST_FORCES) Print_Force_Files( system, control, data, workspace, - lists, out_control, mpi_data ); + lists, out_control, mpi_data ); #endif } diff --git a/PuReMD/src/forces.h b/PuReMD/src/forces.h index 43d47cb46f66f45dc2b54d9415f8c92405a1d557..105f35941c8aa0d791f12b55a59649ba2d4ead8b 100644 --- a/PuReMD/src/forces.h +++ b/PuReMD/src/forces.h @@ -31,5 +31,5 @@ void Init_Force_Functions( control_params* ); void Compute_Forces( reax_system*, control_params*, simulation_data*, storage*, reax_list**, output_controls*, mpi_datatypes* ); void Estimate_Storages( reax_system*, control_params*, reax_list**, - int*, int*, int*, int*, MPI_Comm ); + int*, int*, int*, int*, MPI_Comm, int*, int ); #endif diff --git a/PuReMD/src/geo_tools.c b/PuReMD/src/geo_tools.c index c1e3549fedf2039f96af84aa263cd114c7dee3cb..77e1f95b32b7bf8e12b5de46578af8364fe505e3 100644 --- a/PuReMD/src/geo_tools.c +++ b/PuReMD/src/geo_tools.c @@ -81,11 +81,7 @@ char Read_Geo( char* geo_file, reax_system* system, control_params *control, comm = MPI_COMM_WORLD; /* open the geometry file */ - if ( (geo = fopen(geo_file, "r")) == NULL ) - { - fprintf( stderr, "fopen: error opening the geo file! terminating...\n" ); - MPI_Abort( comm, FILE_NOT_FOUND ); - } + geo = sfopen( geo_file, "r", "Read_Geo::geo" ); /* read box information */ fscanf( geo, CUSTOM_BOXGEO_FORMAT, @@ -140,7 +136,7 @@ char Read_Geo( char* geo_file, reax_system* system, control_params *control, } } - fclose( geo ); + sfclose( geo, "Read_Geo::geo" ); #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: finished reading the geo file\n", system->my_rank ); @@ -239,12 +235,12 @@ void Count_PDB_Atoms( FILE *geo, reax_system *system ) system->N = system->n; - //#if defined(DEBUG) +#if defined(DEBUG) fprintf( stderr, "p%d@count atoms:\n", system->my_rank ); fprintf( stderr, "p%d: bigN = %d\n", system->my_rank, system->bigN ); fprintf( stderr, "p%d: n = %d\n", system->my_rank, system->n ); fprintf( stderr, "p%d: N = %d\n\n", system->my_rank, system->N ); - //#endif +#endif } @@ -271,11 +267,7 @@ char Read_PDB( char* pdb_file, reax_system* system, control_params *control, comm = MPI_COMM_WORLD; /* open pdb file */ - if ( (pdb = fopen(pdb_file, "r")) == NULL ) - { - fprintf( stderr, "fopen: error opening the pdb file! terminating...\n" ); - MPI_Abort( comm, FILE_NOT_FOUND ); - } + pdb = sfopen( pdb_file, "r", "Read_PDB::pdb" ); /* allocate memory for tokenizing pdb lines */ if ( Allocate_Tokenizer_Space( &s, &s1, &tmp ) == FAILURE ) @@ -481,7 +473,7 @@ char Read_PDB( char* pdb_file, reax_system* system, control_params *control, return FAILURE; } - fclose( pdb ); + sfclose( pdb, "Read_PDB::pdb" ); #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: finished reading the pdb file\n", system->my_rank ); @@ -497,7 +489,7 @@ char Read_PDB( char* pdb_file, reax_system* system, control_params *control, Also, we do not write connect lines yet. */ -char Write_PDB(reax_system* system, reax_list* bonds, simulation_data *data, +char Write_PDB(reax_system* system, reax_list** bonds, simulation_data *data, control_params *control, mpi_datatypes *mpi_data, output_controls *out_control) { @@ -550,14 +542,14 @@ char Write_PDB(reax_system* system, reax_list* bonds, simulation_data *data, sprintf(fname, "%s-%d.pdb", control->sim_name, data->step); - pdb = fopen(fname, "w"); - fprintf( pdb, PDB_CRYST1_FORMAT_O, + pdb = sfopen( fname, "w", "Write_PDB::pdb" ); + /*fprintf( pdb, PDB_CRYST1_FORMAT_O, "CRYST1", system->big_box.box_norms[0], system->big_box.box_norms[1], system->big_box.box_norms[2], RAD2DEG(alpha), RAD2DEG(beta), RAD2DEG(gamma), " ", 0 ); fprintf( out_control->log, "Box written\n" ); - fflush( out_control->log ); + fflush( out_control->log );*/ } /*write atom lines to buffer*/ @@ -566,11 +558,13 @@ char Write_PDB(reax_system* system, reax_list* bonds, simulation_data *data, p_atom = &(system->my_atoms[i]); strncpy(name, p_atom->name, 8); Trim_Spaces(name); - sprintf( line, PDB_ATOM_FORMAT_O, + /*sprintf( line, PDB_ATOM_FORMAT_O, "ATOM ", p_atom->orig_id, p_atom->name, ' ', "REX", ' ', 1, ' ', p_atom->x[0], p_atom->x[1], p_atom->x[2], - 1.0, 0.0, "0", name, " " ); - fprintf(stderr, "PDB NAME <%s>\n", p_atom->name); + 1.0, 0.0, "0", name, " " );*/ + sprintf( line, PDB_ATOM_FORMAT_O, + p_atom->orig_id, p_atom->x[0], p_atom->x[1], p_atom->x[2] ); + //fprintf( stderr, "PDB NAME <%s>\n", p_atom->name); strncpy( buffer + i * PDB_ATOM_FORMAT_O_LENGTH, line, PDB_ATOM_FORMAT_O_LENGTH ); } @@ -599,7 +593,7 @@ char Write_PDB(reax_system* system, reax_list* bonds, simulation_data *data, if ( me == MASTER_NODE) { fprintf( pdb, "%s", buffer ); - fclose( pdb ); + sfclose( pdb, "Write_PDB::pdb" ); } /* Writing connect information */ diff --git a/PuReMD/src/geo_tools.h b/PuReMD/src/geo_tools.h index 8078685689afa1d6edbe7b4534dd3bc65c45d1c5..d34caaffade4fa4c8b8dee0efffa58afe977de63 100644 --- a/PuReMD/src/geo_tools.h +++ b/PuReMD/src/geo_tools.h @@ -110,14 +110,16 @@ COLUMNS DATA TYPE FIELD DEFINITION #define PDB_CONECT_FORMAT "%6s%5d%5d%5d%5d%5d\n" #define PDB_CRYST1_FORMAT "%6s%9s%9s%9s%7s%7s%7s%11s%4s\n" -#define PDB_ATOM_FORMAT_O "%6s%5d %4s%c%3s %c%4d%c %8.3f%8.3f%8.3f%6.2f%6.2f %-4s%2s%2s\n" -#define PDB_ATOM_FORMAT_O_LENGTH 81 +//#define PDB_ATOM_FORMAT_O "%6s%5d %4s%c%3s %c%4d%c %8.3f%8.3f%8.3f%6.2f%6.2f %-4s%2s%2s\n" +#define PDB_ATOM_FORMAT_O "%5d%8.3f%8.3f%8.3f\n" +//#define PDB_ATOM_FORMAT_O_LENGTH 81 +#define PDB_ATOM_FORMAT_O_LENGTH 30 #define PDB_CRYST1_FORMAT_O "%6s%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f%11s%4d\n" char Read_PDB( char*, reax_system*, control_params*, simulation_data*, storage*, mpi_datatypes* ); -char Write_PDB( reax_system*, reax_list*, simulation_data*, +char Write_PDB( reax_system*, reax_list**, simulation_data*, control_params*, mpi_datatypes*, output_controls* ); #endif diff --git a/PuReMD/src/grid.c b/PuReMD/src/grid.c index 0064f2201695b85625dcd15db75861fe7fcb80b9..804999b437d60928ad9ac6dce010408c5ab95a74 100644 --- a/PuReMD/src/grid.c +++ b/PuReMD/src/grid.c @@ -30,11 +30,22 @@ /* determines the exchange boundaries with nbrs in terms of gcells */ void Mark_GCells( reax_system* system, grid *g, ivec procs, MPI_Comm comm ) { - int x, y, z, d; + int i, x, y, z, d, len; ivec r, nbr_coord, prdc; ivec send_span, recv_span; ivec str_send, end_send; ivec str_recv, end_recv; +#if defined(NEUTRAL_TERRITORY) + ivec nt_str, nt_end; + ivec dir[6] = { + {0, 0, +1}, // +z + {0, 0, -1}, // -z + {0, +1, 0}, // +y + {+1, +1, 0}, // +x+y + {+1, 0, 0}, // +x + {+1, -1, 0} // +x-y + }; +#endif /* clear all gcell type info */ for ( x = 0; x < g->ncells[0]; x++ ) @@ -50,6 +61,42 @@ void Mark_GCells( reax_system* system, grid *g, ivec procs, MPI_Comm comm ) g->cells[x][y][z].type = NATIVE; ivec_MakeZero( g->cells[x][y][z].rel_box ); } + +#if defined(NEUTRAL_TERRITORY) + /* mark NT cells */ + for ( i = 0; i < 6; ++i ) + { + for ( d = 0; d < 3; ++d ) + { + if ( dir[i][d] > 0 ) + { + nt_str[d] = MIN( g->native_end[d], g->ncells[d] ); + nt_end[d] = MIN( g->native_end[d] + g->vlist_span[d], + g->ncells[d] ); + } + else if ( dir[i][d] < 0 ) + { + nt_str[d] = MAX( 0, g->native_str[d] - g->vlist_span[d] ); + nt_end[d] = g->native_str[d]; + } + else + { + nt_str[d] = g->native_str[d]; + nt_end[d] = g->native_end[d]; + } + } + for ( x = nt_str[0]; x < nt_end[0]; x++ ) + { + for ( y = nt_str[1]; y < nt_end[1]; y++ ) + { + for ( z = nt_str[2]; z < nt_end[2]; z++ ) + { + g->cells[x][y][z].type = NT_NBRS + i; + } + } + } + } +#endif /* loop over neighbors */ for ( r[0] = -1; r[0] <= 1; ++r[0]) @@ -136,8 +183,11 @@ void Find_Neighbor_GridCells( grid *g, control_params *control ) gc = &(g->cells[ci[0]][ci[1]][ci[2]]); top = 0; //fprintf( stderr, "grid1: %d %d %d:\n", ci[0], ci[1], ci[2] ); - +#if defined(NEUTRAL_TERRITORY) + if ( gc->type == NATIVE || ( gc->type >= NT_NBRS && gc->type < NT_NBRS + 6 ) ) +#else if ( gc->type == NATIVE ) +#endif gc->cutoff = control->vlist_cut; else gc->cutoff = control->bond_cut; @@ -201,13 +251,13 @@ void Reorder_GridCells( grid *g ) fprintf( stderr, "reordered gcells:\n" ); for ( i = 0; i < top; ++i ) fprintf( stderr, "order%d: %d %d %d\n", - i, g->order[i][0], g->order[i][1], g->order[i][2] ); + i, g->order[i][0], g->order[i][1], g->order[i][2] ); #endif } void Setup_New_Grid( reax_system* system, control_params* control, - MPI_Comm comm ) + MPI_Comm comm ) { int d, i, j, k; grid *g; @@ -242,13 +292,13 @@ void Setup_New_Grid( reax_system* system, control_params* control, g->bond_span[d] = (int)ceil( control->bond_cut / g->cell_len[d] ); /* span of the ghost region in terms of gcells */ g->ghost_span[d] = (int)ceil(system->bndry_cuts.ghost_cutoff / - g->cell_len[d]); + g->cell_len[d]); g->ghost_nonb_span[d] = (int)ceil(system->bndry_cuts.ghost_nonb / - g->cell_len[d]); + g->cell_len[d]); g->ghost_hbond_span[d] = (int)ceil( system->bndry_cuts.ghost_hbond / - g->cell_len[d] ); + g->cell_len[d] ); g->ghost_bond_span[d] = (int)ceil( system->bndry_cuts.ghost_bond / - g->cell_len[d] ); + g->cell_len[d] ); } /* total number of grid cells */ @@ -262,8 +312,8 @@ void Setup_New_Grid( reax_system* system, control_params* control, /* upper bound on the number of gcells to be exchanged with a single nbr */ system->gcell_cap = MAX3( g->native_cells[0] * g->native_cells[1] * g->ghost_span[2], - g->native_cells[0] * g->native_cells[2] * g->ghost_span[1], - g->native_cells[1] * g->native_cells[2] * g->ghost_span[0] ) + 1; + g->native_cells[0] * g->native_cells[2] * g->ghost_span[1], + g->native_cells[1] * g->native_cells[2] * g->ghost_span[0] ) + 1; /* allocate grid space */ Allocate_Grid( system, comm ); @@ -331,9 +381,9 @@ void Update_Grid( reax_system* system, control_params* control, MPI_Comm comm ) ghost_span[d] = (int)ceil(system->bndry_cuts.ghost_cutoff / cell_len[d]); ghost_nonb_span[d] = (int)ceil(system->bndry_cuts.ghost_nonb / cell_len[d]); ghost_hbond_span[d] = (int)ceil( system->bndry_cuts.ghost_hbond / - cell_len[d] ); + cell_len[d] ); ghost_bond_span[d] = (int)ceil( system->bndry_cuts.ghost_bond / - cell_len[d] ); + cell_len[d] ); } @@ -418,14 +468,14 @@ void Bin_My_Atoms( reax_system *system, reallocate_data *realloc ) if ( atoms[l].x[d] < my_box->min[d] || atoms[l].x[d] > my_box->max[d] ) { fprintf( stderr, "p%d: local atom%d [%f %f %f] is out of my box!\n", - system->my_rank, l, - atoms[l].x[0], atoms[l].x[1], atoms[l].x[2] ); + system->my_rank, l, + atoms[l].x[0], atoms[l].x[1], atoms[l].x[2] ); fprintf( stderr, "p%d: orig atom id is %d!\n", - system->my_rank, atoms[l].orig_id); + system->my_rank, atoms[l].orig_id); fprintf( stderr, "p%d: my_box=[%f-%f, %f-%f, %f-%f]\n", - system->my_rank, my_box->min[0], my_box->max[0], - my_box->min[1], my_box->max[1], - my_box->min[2], my_box->max[2] ); + system->my_rank, my_box->min[0], my_box->max[0], + my_box->min[1], my_box->max[1], + my_box->min[2], my_box->max[2] ); MPI_Abort( MPI_COMM_WORLD, -1 ); } @@ -437,10 +487,10 @@ void Bin_My_Atoms( reax_system *system, reallocate_data *realloc ) } #if defined(DEBUG) fprintf( stderr, "p%d bin_my_atoms: l:%d - atom%d @ %.5f %.5f %.5f"\ - "--> cell: %d %d %d\n", - system->my_rank, l, atoms[l].orig_id, - atoms[l].x[0], atoms[l].x[1], atoms[l].x[2], - c[0], c[1], c[2] ); + "--> cell: %d %d %d\n", + system->my_rank, l, atoms[l].orig_id, + atoms[l].x[0], atoms[l].x[1], atoms[l].x[2], + c[0], c[1], c[2] ); #endif gc = &( g->cells[c[0]][c[1]][c[2]] ); gc->atoms[ gc->top++ ] = l; @@ -460,13 +510,13 @@ void Bin_My_Atoms( reax_system *system, reallocate_data *realloc ) max_atoms = gc->top; #if defined(DEBUG) fprintf( stderr, "p%d gc[%d,%d,%d]->top=%d\n", - system->my_rank, i, j, k, gc->top ); + system->my_rank, i, j, k, gc->top ); #endif } #if defined(DEBUG) fprintf( stderr, "p%d max_atoms=%d, g->max_atoms=%d\n", - system->my_rank, max_atoms, g->max_atoms ); + system->my_rank, max_atoms, g->max_atoms ); #endif /* check if current gcell->max_atoms is safe */ if ( max_atoms >= g->max_atoms * DANGER_ZONE ) @@ -524,7 +574,7 @@ void Reorder_My_Atoms( reax_system *system, storage *workspace ) void Get_Boundary_GCell( grid *g, rvec base, rvec x, grid_cell **gc, - rvec *cur_min, rvec *cur_max ) + rvec *cur_min, rvec *cur_max ) { int d; ivec c; @@ -540,7 +590,7 @@ void Get_Boundary_GCell( grid *g, rvec base, rvec x, grid_cell **gc, } #if defined(DEBUG) fprintf( stderr, "get_bndry_gc: base=[%f %f %f] x=[%f %f %f] c=[%d %d %d]\n", - base[0], base[1], base[2], x[0], x[1], x[2], c[0], c[1], c[2] ); + base[0], base[1], base[2], x[0], x[1], x[2], c[0], c[1], c[2] ); #endif *gc = &( g->cells[c[0]][c[1]][c[2]] ); @@ -548,11 +598,11 @@ void Get_Boundary_GCell( grid *g, rvec base, rvec x, grid_cell **gc, rvec_Sum( *cur_max, (*gc)->max, loosen ); #if defined(DEBUG) fprintf( stderr, "get_bndry_gc: gcmin=[%f %f %f] gcmax=[%f %f %f]\n", - (*gc)->min[0], (*gc)->min[1], (*gc)->min[2], - (*gc)->max[0], (*gc)->max[1], (*gc)->max[2] ); + (*gc)->min[0], (*gc)->min[1], (*gc)->min[2], + (*gc)->max[0], (*gc)->max[1], (*gc)->max[2] ); fprintf( stderr, "get_bndry_gc: curmin=[%f %f %f] curmax=[%f %f %f]\n", - (*cur_min)[0], (*cur_min)[1], (*cur_min)[2], - (*cur_max)[0], (*cur_max)[1], (*cur_max)[2] ); + (*cur_min)[0], (*cur_min)[1], (*cur_min)[2], + (*cur_max)[0], (*cur_max)[1], (*cur_max)[2] ); #endif } @@ -599,8 +649,8 @@ void Bin_Boundary_Atoms( reax_system *system ) if ( !is_Within_GCell( atoms[start].x, ext_box->min, ext_box->max ) ) { fprintf( stderr, "p%d: ghost atom%d [%f %f %f] is out of my box!\n", - system->my_rank, start, - atoms[start].x[0], atoms[start].x[1], atoms[start].x[2] ); + system->my_rank, start, + atoms[start].x[0], atoms[start].x[1], atoms[start].x[2] ); MPI_Abort( MPI_COMM_WORLD, -1 ); } @@ -613,8 +663,8 @@ void Bin_Boundary_Atoms( reax_system *system ) if ( !is_Within_GCell( atoms[i].x, ext_box->min, ext_box->max ) ) { fprintf( stderr, "p%d: ghost atom%d [%f %f %f] is out of my box!\n", - system->my_rank, i, - atoms[i].x[0], atoms[i].x[1], atoms[i].x[2] ); + system->my_rank, i, + atoms[i].x[0], atoms[i].x[1], atoms[i].x[2] ); MPI_Abort( MPI_COMM_WORLD, -1 ); } @@ -628,11 +678,11 @@ void Bin_Boundary_Atoms( reax_system *system ) if ( gc->top != 0 ) { fprintf( stderr, "p%d bin_boundary_atoms: atom%d map was unexpected! ", - system->my_rank, i ); + system->my_rank, i ); fprintf( stderr, "[%f %f %f] --> [%f %f %f] to [%f %f %f]\n", - atoms[i].x[0], atoms[i].x[1], atoms[i].x[2], - gc->min[0], gc->min[1], gc->min[2], - gc->max[0], gc->max[1], gc->max[2] ); + atoms[i].x[0], atoms[i].x[1], atoms[i].x[2], + gc->min[0], gc->min[1], gc->min[2], + gc->max[0], gc->max[1], gc->max[2] ); MPI_Abort( MPI_COMM_WORLD, INVALID_INPUT ); } gc->str = i; @@ -646,4 +696,4 @@ void Bin_Boundary_Atoms( reax_system *system ) #if defined(DEBUG) fprintf( stderr, "p%d bin_boundary_atoms: done\n", system->my_rank ); #endif -} + } diff --git a/PuReMD/src/hydrogen_bonds.c b/PuReMD/src/hydrogen_bonds.c index cba267fffa71ef7a11ac23def3c72cae92838653..699e7bc104f7a98f6bb533b90b5ae94026ba0795 100644 --- a/PuReMD/src/hydrogen_bonds.c +++ b/PuReMD/src/hydrogen_bonds.c @@ -39,12 +39,13 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, simulation_data *data, storage *workspace, reax_list **lists, output_controls *out_control ) { - int i, j, k, pi, pk; - int type_i, type_j, type_k; - int start_j, end_j, hb_start_j, hb_end_j; - int hblist[MAX_BONDS]; - int itr, top; - int num_hb_intrs = 0; + int i, j, k, pi, pk; + int type_i, type_j, type_k; + int start_j, end_j, hb_start_j, hb_end_j; + int hblist[MAX_BONDS]; + int itr, top; + int num_hb_intrs = 0; + int nbr_jk; ivec rel_jk; real r_ij, r_jk, theta, cos_theta, sin_xhz4, cos_xhz1, sin_theta2; real e_hb, exp_hb2, exp_hb3, CEhb1, CEhb2, CEhb3; @@ -54,11 +55,11 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, hbond_parameters *hbp; bond_order_data *bo_ij; bond_data *pbond_ij; - far_neighbor_data *nbr_jk; - reax_list *bonds, *hbonds; + reax_list *far_nbrs, *bonds, *hbonds; bond_data *bond_list; hbond_data *hbond_list; + far_nbrs = lists[FAR_NBRS]; bonds = lists[BONDS]; bond_list = bonds->bond_list; hbonds = lists[HBONDS]; @@ -102,8 +103,8 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, k = hbond_list[pk].nbr; type_k = system->my_atoms[k].type; nbr_jk = hbond_list[pk].ptr; - r_jk = nbr_jk->d; - rvec_Scale( dvec_jk, hbond_list[pk].scl, nbr_jk->dvec ); + r_jk = far_nbrs->far_nbr_list.d[nbr_jk]; + rvec_Scale( dvec_jk, hbond_list[pk].scl, far_nbrs->far_nbr_list.dvec[nbr_jk] ); for ( itr = 0; itr < top; ++itr ) { @@ -174,7 +175,8 @@ void Hydrogen_Bonds( reax_system *system, control_params *control, rvec_ScaledAdd( workspace->f[j], +CEhb2, dcos_theta_dj ); - ivec_Scale( rel_jk, hbond_list[pk].scl, nbr_jk->rel_box ); + ivec_Scale( rel_jk, hbond_list[pk].scl, + far_nbrs->far_nbr_list.rel_box[nbr_jk] ); rvec_Scale( force, +CEhb2, dcos_theta_dk ); rvec_Add( workspace->f[k], force ); rvec_iMultiply( ext_press, rel_jk, force ); diff --git a/PuReMD/src/init_md.c b/PuReMD/src/init_md.c index da661aa4f42f2e3ccd893975cfd684876339edc8..b202227f2ead91caab11badd8f900b6f9c7d8be8 100644 --- a/PuReMD/src/init_md.c +++ b/PuReMD/src/init_md.c @@ -54,8 +54,8 @@ #if defined(PURE_REAX) /************************ initialize system ************************/ int Reposition_Atoms( reax_system *system, control_params *control, - simulation_data *data, mpi_datatypes *mpi_data, - char *msg ) + simulation_data *data, mpi_datatypes *mpi_data, + char *msg ) { int i; rvec dx; @@ -130,8 +130,8 @@ void Generate_Initial_Velocities( reax_system *system, real T ) int Init_System( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - mpi_datatypes *mpi_data, char *msg ) + simulation_data *data, storage *workspace, + mpi_datatypes *mpi_data, char *msg ) { int i; reax_atom *atom; @@ -152,9 +152,12 @@ int Init_System( reax_system *system, control_params *control, for ( i = 0; i < MAX_NBRS; ++i ) nrecv[i] = 0; system->max_recved = 0; system->N = SendRecv( system, mpi_data, mpi_data->boundary_atom_type, nrecv, - Estimate_Boundary_Atoms, Unpack_Estimate_Message, 1 ); + Estimate_Boundary_Atoms, Unpack_Estimate_Message, 1 ); system->total_cap = MAX( (int)(system->N * SAFE_ZONE), MIN_CAP ); Bin_Boundary_Atoms( system ); +#if defined(NEUTRAL_TERRITORY) + Estimate_NT_Atoms( system, mpi_data ); +#endif //fprintf( stderr, "p%d SEND RECV SEND!\n", system->my_rank ); //MPI_Barrier( mpi_data->world ); @@ -177,11 +180,11 @@ int Init_System( reax_system *system, control_params *control, //Allocate_System( system, system->local_cap, system->total_cap, msg ); #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: n=%d local_cap=%d\n", - system->my_rank, system->n, system->local_cap ); + system->my_rank, system->n, system->local_cap ); fprintf( stderr, "p%d: N=%d total_cap=%d\n", - system->my_rank, system->N, system->total_cap ); + system->my_rank, system->N, system->total_cap ); fprintf( stderr, "p%d: numH=%d H_cap=%d\n", - system->my_rank, system->numH, system->Hcap ); + system->my_rank, system->numH, system->Hcap ); MPI_Barrier( mpi_data->world ); #endif @@ -198,8 +201,8 @@ int Init_System( reax_system *system, control_params *control, /************************ initialize simulation data ************************/ int Init_Simulation_Data( reax_system *system, control_params *control, - simulation_data *data, mpi_datatypes *mpi_data, - char *msg ) + simulation_data *data, mpi_datatypes *mpi_data, + char *msg ) { Reset_Simulation_Data( data, control->virial ); @@ -212,74 +215,95 @@ int Init_Simulation_Data( reax_system *system, control_params *control, switch ( control->ensemble ) { - case NVE: - data->N_f = 3 * system->bigN; - Evolve = Velocity_Verlet_NVE; - break; - - case bNVT: - data->N_f = 3 * system->bigN + 1; - Evolve = Velocity_Verlet_Berendsen_NVT; - break; - - case nhNVT: - fprintf( stderr, "WARNING: Nose-Hoover NVT is still under testing.\n" ); - //return FAILURE; - data->N_f = 3 * system->bigN + 1; - Evolve = Velocity_Verlet_Nose_Hoover_NVT_Klein; - if ( !control->restart || (control->restart && control->random_vel) ) - { - data->therm.G_xi = control->Tau_T * - (2.0 * data->sys_en.e_kin - data->N_f * K_B * control->T ); - data->therm.v_xi = data->therm.G_xi * control->dt; - data->therm.v_xi_old = 0; - data->therm.xi = 0; - } - break; - - case sNPT: /* Semi-Isotropic NPT */ - data->N_f = 3 * system->bigN + 4; - Evolve = Velocity_Verlet_Berendsen_NPT; - if ( !control->restart ) - Reset_Pressures( data ); - break; - - case iNPT: /* Isotropic NPT */ - data->N_f = 3 * system->bigN + 2; - Evolve = Velocity_Verlet_Berendsen_NPT; - if ( !control->restart ) - Reset_Pressures( data ); - break; - - case NPT: /* Anisotropic NPT */ - strcpy( msg, "init_simulation_data: option not yet implemented" ); - return FAILURE; - - data->N_f = 3 * system->bigN + 9; - Evolve = Velocity_Verlet_Berendsen_NPT; - /*if( !control->restart ) { - data->therm.G_xi = control->Tau_T * - (2.0 * data->my_en.e_Kin - data->N_f * K_B * control->T ); - data->therm.v_xi = data->therm.G_xi * control->dt; - data->iso_bar.eps = 0.33333 * log(system->box.volume); - data->inv_W = 1.0 / - ( data->N_f * K_B * control->T * SQR(control->Tau_P) ); - Compute_Pressure( system, control, data, out_control ); - }*/ - break; - - default: - strcpy( msg, "init_simulation_data: ensemble not recognized" ); - return FAILURE; + case NVE: + data->N_f = 3 * system->bigN; + Evolve = Velocity_Verlet_NVE; + break; + + case bNVT: + data->N_f = 3 * system->bigN + 1; + Evolve = Velocity_Verlet_Berendsen_NVT; + break; + + case nhNVT: + fprintf( stderr, "WARNING: Nose-Hoover NVT is still under testing.\n" ); + //return FAILURE; + data->N_f = 3 * system->bigN + 1; + Evolve = Velocity_Verlet_Nose_Hoover_NVT_Klein; + if ( !control->restart || (control->restart && control->random_vel) ) + { + data->therm.G_xi = control->Tau_T * + (2.0 * data->sys_en.e_kin - data->N_f * K_B * control->T ); + data->therm.v_xi = data->therm.G_xi * control->dt; + data->therm.v_xi_old = 0; + data->therm.xi = 0; + } + break; + + case sNPT: /* Semi-Isotropic NPT */ + data->N_f = 3 * system->bigN + 4; + Evolve = Velocity_Verlet_Berendsen_NPT; + if ( !control->restart ) + Reset_Pressures( data ); + break; + + case iNPT: /* Isotropic NPT */ + data->N_f = 3 * system->bigN + 2; + Evolve = Velocity_Verlet_Berendsen_NPT; + if ( !control->restart ) + Reset_Pressures( data ); + break; + + case NPT: /* Anisotropic NPT */ + strcpy( msg, "init_simulation_data: option not yet implemented" ); + return FAILURE; + + data->N_f = 3 * system->bigN + 9; + Evolve = Velocity_Verlet_Berendsen_NPT; + /*if( !control->restart ) { + data->therm.G_xi = control->Tau_T * + (2.0 * data->my_en.e_Kin - data->N_f * K_B * control->T ); + data->therm.v_xi = data->therm.G_xi * control->dt; + data->iso_bar.eps = 0.33333 * log(system->box.volume); + data->inv_W = 1.0 / + ( data->N_f * K_B * control->T * SQR(control->Tau_P) ); + Compute_Pressure( system, control, data, out_control ); + }*/ + break; + + default: + strcpy( msg, "init_simulation_data: ensemble not recognized" ); + return FAILURE; } /* initialize the timer(s) */ MPI_Barrier( mpi_data->world ); // wait for everyone to come here if ( system->my_rank == MASTER_NODE ) { - data->timing.start = Get_Time( ); + data->timing.start = MPI_Wtime(); #if defined(LOG_PERFORMANCE) - Reset_Timing( &data->timing ); + //Reset_Timing( &data->timing ); + /* init timing info */ + data->timing.total = data->timing.start; + data->timing.comm = ZERO; + data->timing.nbrs = 0; + data->timing.init_forces = 0; + data->timing.bonded = 0; + data->timing.nonb = 0; + data->timing.init_dist = ZERO; + data->timing.init_cm = ZERO; + data->timing.init_bond = ZERO; + data->timing.cm = ZERO; + data->timing.cm_sort = ZERO; + data->timing.cm_solver_comm = ZERO; + data->timing.cm_solver_allreduce = ZERO; + data->timing.cm_solver_pre_comp = ZERO; + data->timing.cm_solver_pre_app = ZERO; + data->timing.cm_solver_iters = 0; + data->timing.cm_solver_spmv = ZERO; + data->timing.cm_solver_vector_ops = ZERO; + data->timing.cm_solver_orthog = ZERO; + data->timing.cm_solver_tri_solve = ZERO; #endif } @@ -314,11 +338,11 @@ int Init_System( reax_system *system, control_params *control, char *msg ) #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: n=%d local_cap=%d\n", - system->my_rank, system->n, system->local_cap ); + system->my_rank, system->n, system->local_cap ); fprintf( stderr, "p%d: N=%d total_cap=%d\n", - system->my_rank, system->N, system->total_cap ); + system->my_rank, system->N, system->total_cap ); fprintf( stderr, "p%d: numH=%d H_cap=%d\n", - system->my_rank, system->numH, system->Hcap ); + system->my_rank, system->numH, system->Hcap ); #endif return SUCCESS; @@ -326,16 +350,37 @@ int Init_System( reax_system *system, control_params *control, char *msg ) int Init_Simulation_Data( reax_system *system, control_params *control, - simulation_data *data, char *msg ) + simulation_data *data, char *msg ) { Reset_Simulation_Data( data, control->virial ); /* initialize the timer(s) */ if ( system->my_rank == MASTER_NODE ) { - data->timing.start = Get_Time( ); + data->timing.start = MPI_Wtime(); #if defined(LOG_PERFORMANCE) - Reset_Timing( &data->timing ); + //Reset_Timing( &data->timing ); + /* init timing info */ + data->timing.total = data->timing.start; + data->timing.comm = ZERO; + data->timing.nbrs = 0; + data->timing.init_forces = 0; + data->timing.bonded = 0; + data->timing.nonb = 0; + data->timing.init_dist = ZERO; + data->timing.init_cm = ZERO; + data->timing.init_bond = ZERO; + data->timing.cm = ZERO; + data->timing.cm_sort = ZERO; + data->timing.cm_solver_comm = ZERO; + data->timing.cm_solver_allreduce = ZERO; + data->timing.cm_solver_pre_comp = ZERO; + data->timing.cm_solver_pre_app = ZERO; + data->timing.cm_solver_iters = 0; + data->timing.cm_solver_spmv = ZERO; + data->timing.cm_solver_vector_ops = ZERO; + data->timing.cm_solver_orthog = ZERO; + data->timing.cm_solver_tri_solve = ZERO; #endif } @@ -385,17 +430,17 @@ void Init_Taper( control_params *control, storage *workspace, MPI_Comm comm ) workspace->Tap[2] = -210.0 * (swa3 * swb2 + swa2 * swb3) / d7; workspace->Tap[1] = 140.0 * swa3 * swb3 / d7; workspace->Tap[0] = (-35.0 * swa3 * swb2 * swb2 + 21.0 * swa2 * swb3 * swb2 + - 7.0 * swa * swb3 * swb3 + swb3 * swb3 * swb ) / d7; + 7.0 * swa * swb3 * swb3 + swb3 * swb3 * swb ) / d7; } int Init_Workspace( reax_system *system, control_params *control, - storage *workspace, MPI_Comm comm, char *msg ) + storage *workspace, MPI_Comm comm, char *msg ) { int ret; ret = Allocate_Workspace( system, control, workspace, - system->local_cap, system->total_cap, comm, msg ); + system->local_cap, system->total_cap, comm, msg ); if ( ret != SUCCESS ) return ret; @@ -411,7 +456,7 @@ int Init_Workspace( reax_system *system, control_params *control, /************** setup communication data structures **************/ int Init_MPI_Datatypes( reax_system *system, storage *workspace, - mpi_datatypes *mpi_data, MPI_Comm comm, char *msg ) + mpi_datatypes *mpi_data, MPI_Comm comm, char *msg ) { #if defined(PURE_REAX) int i, block[11]; @@ -432,9 +477,15 @@ int Init_MPI_Datatypes( reax_system *system, storage *workspace, /* init mpi buffers */ mpi_data->in1_buffer = NULL; mpi_data->in2_buffer = NULL; +#if defined(NEUTRAL_TERRITORY) + for ( i = 0; i < REAX_MAX_NT_NBRS; ++i ) + { + mpi_data->in_nt_buffer[i] = NULL; + } +#endif /* mpi_atom - [orig_id, imprt_id, type, num_bonds, num_hbonds, name, - x, v, f_old, s, t] */ + x, v, f_old, s, t] */ block[0] = block[1] = block[2] = block[3] = block[4] = 1; block[5] = 8; block[6] = block[7] = block[8] = 3; @@ -529,74 +580,95 @@ int Init_MPI_Datatypes( reax_system *system, storage *workspace, /********************** allocate lists *************************/ #if defined(PURE_REAX) int Init_Lists( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, reax_list **lists, - mpi_datatypes *mpi_data, char *msg ) + simulation_data *data, storage *workspace, reax_list **lists, + mpi_datatypes *mpi_data, char *msg ) { - int i, num_nbrs; + int i, num_nbrs, far_nbr_list_format, cm_format, matrix_dim; int total_hbonds, total_bonds, bond_cap, num_3body, cap_3body, Htop; int *hb_top, *bond_top; MPI_Comm comm; #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: before est_nbrs - local_cap=%d, total_cap=%d\n", - system->my_rank, system->local_cap, system->total_cap ); + system->my_rank, system->local_cap, system->total_cap ); #endif comm = mpi_data->world; + + if ( control->cm_solver_pre_comp_type == SAI_PC ) + { + far_nbr_list_format = FULL_LIST; + cm_format = SYM_FULL_MATRIX; + } + else + { +#if defined(NEUTRAL_TERRITORY) + far_nbr_list_format = FULL_LIST; + cm_format = SYM_HALF_MATRIX; +#else + far_nbr_list_format = HALF_LIST; + cm_format = SYM_HALF_MATRIX; +#endif + } + //for( i = 0; i < MAX_NBRS; ++i ) nrecv[i] = system->my_nbrs[i].est_recv; //system->N = SendRecv( system, mpi_data, mpi_data->boundary_atom_type, nrecv, // Sort_Boundary_Atoms, Unpack_Exchange_Message, 1 ); - num_nbrs = Estimate_NumNeighbors( system, lists ); + + num_nbrs = Estimate_NumNeighbors( system, lists, far_nbr_list_format ); #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: after est_nbrs - local_cap=%d, total_cap=%d\n", - system->my_rank, system->local_cap, system->total_cap ); + system->my_rank, system->local_cap, system->total_cap ); #endif if ( !Make_List( system->total_cap, num_nbrs, TYP_FAR_NEIGHBOR, - lists[FAR_NBRS], comm ) ) + far_nbr_list_format, lists[FAR_NBRS], comm ) ) { fprintf(stderr, "Problem in initializing far nbrs list. Terminating!\n"); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } + #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated far_nbrs: num_far=%d, space=%dMB\n", - system->my_rank, num_nbrs, - (int)(num_nbrs * sizeof(far_neighbor_data) / (1024 * 1024)) ); + system->my_rank, num_nbrs, + (int)(num_nbrs * sizeof(far_neighbor_data) / (1024 * 1024)) ); #endif #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: before gen_nbrs - local_cap=%d, total_cap=%d\n", - system->my_rank, system->local_cap, system->total_cap ); + system->my_rank, system->local_cap, system->total_cap ); #endif Generate_Neighbor_Lists( system, data, workspace, lists ); #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: after gen_nbrs - local_cap=%d, total_cap=%d\n", - system->my_rank, system->local_cap, system->total_cap ); + system->my_rank, system->local_cap, system->total_cap ); #endif bond_top = (int*) calloc( system->total_cap, sizeof(int) ); hb_top = (int*) calloc( system->local_cap, sizeof(int) ); //bond_top = (int*) malloc( system->total_cap * sizeof(int) ); //hb_top = (int*) malloc( system->local_cap * sizeof(int) ); - Estimate_Storages( system, control, lists, - &Htop, hb_top, bond_top, &num_3body, comm ); + + Estimate_Storages( system, control, lists, &Htop, hb_top, + bond_top, &num_3body, comm, &matrix_dim, cm_format ); - Allocate_Matrix( &(workspace->H), system->local_cap, Htop, comm ); +#if defined(NEUTRAL_TERRITORY) + Allocate_Matrix( &workspace->H, matrix_dim, Htop, cm_format, comm ); +#else + Allocate_Matrix( &workspace->H, system->local_cap, Htop, cm_format, comm ); +#endif workspace->L = NULL; workspace->U = NULL; - - //TODO: uncomment for SAI -// Allocate_Matrix( &(workspace->H_spar_patt), workspace->H->n, workspace->H->m ); -// Allocate_Matrix( &(workspace->H_spar_patt_full), workspace->H->n, 2 * workspace->H->m - workspace->H->n ); -// Allocate_Matrix( &(workspace->H_app_inv), workspace->H->n, 2 * workspace->H->m - workspace->H->n ); + workspace->H_spar_patt = NULL; + workspace->H_app_inv = NULL; #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated H matrix: Htop=%d, space=%dMB\n", - system->my_rank, Htop, - (int)(Htop * sizeof(sparse_matrix_entry) / (1024 * 1024)) ); + system->my_rank, Htop, + (int)(Htop * sizeof(sparse_matrix_entry) / (1024 * 1024)) ); #endif if ( control->hbond_cut > 0 ) @@ -611,15 +683,15 @@ int Init_Lists( reax_system *system, control_params *control, total_hbonds = MAX( total_hbonds * SAFER_ZONE, MIN_CAP * MIN_HBONDS ); if ( !Make_List( system->Hcap, total_hbonds, TYP_HBOND, - lists[HBONDS], comm ) ) + HALF_LIST, lists[HBONDS], comm ) ) { fprintf( stderr, "not enough space for hbonds list. terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated hbonds: total_hbonds=%d, space=%dMB\n", - system->my_rank, total_hbonds, - (int)(total_hbonds * sizeof(hbond_data) / (1024 * 1024)) ); + system->my_rank, total_hbonds, + (int)(total_hbonds * sizeof(hbond_data) / (1024 * 1024)) ); #endif } @@ -635,50 +707,50 @@ int Init_Lists( reax_system *system, control_params *control, bond_cap = MAX( total_bonds * SAFE_ZONE, MIN_CAP * MIN_BONDS ); if ( !Make_List( system->total_cap, bond_cap, TYP_BOND, - lists[BONDS], comm ) ) + HALF_LIST, lists[BONDS], comm ) ) { fprintf( stderr, "not enough space for bonds list. terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated bonds: total_bonds=%d, space=%dMB\n", - system->my_rank, bond_cap, - (int)(bond_cap * sizeof(bond_data) / (1024 * 1024)) ); + system->my_rank, bond_cap, + (int)(bond_cap * sizeof(bond_data) / (1024 * 1024)) ); #endif /* 3bodies list */ cap_3body = MAX( num_3body * SAFE_ZONE, MIN_3BODIES ); if ( !Make_List( bond_cap, cap_3body, TYP_THREE_BODY, - lists[THREE_BODIES], comm ) ) + HALF_LIST, lists[THREE_BODIES], comm ) ) { fprintf( stderr, "Problem in initializing angles list. Terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated 3-body list: num_3body=%d, space=%dMB\n", - system->my_rank, cap_3body, - (int)(cap_3body * sizeof(three_body_interaction_data) / (1024 * 1024)) ); + system->my_rank, cap_3body, + (int)(cap_3body * sizeof(three_body_interaction_data) / (1024 * 1024)) ); #endif #if defined(TEST_FORCES) if ( !Make_List( system->total_cap, bond_cap * 8, TYP_DDELTA, - lists[DDELTAS], comm ) ) + HALF_LIST, lists[DDELTAS], comm ) ) { fprintf( stderr, "Problem in initializing dDelta list. Terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } fprintf( stderr, "p%d: allocated dDelta list: num_ddelta=%d space=%ldMB\n", - system->my_rank, bond_cap * 30, - bond_cap * 8 * sizeof(dDelta_data) / (1024 * 1024) ); + system->my_rank, bond_cap * 30, + bond_cap * 8 * sizeof(dDelta_data) / (1024 * 1024) ); - if ( !Make_List( bond_cap, bond_cap * 50, TYP_DBO, lists[DBOS], comm ) ) + if ( !Make_List( bond_cap, bond_cap * 50, TYP_DBO, HALF_LIST, lists[DBOS], comm ) ) { fprintf( stderr, "Problem in initializing dBO list. Terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } fprintf( stderr, "p%d: allocated dbond list: num_dbonds=%d space=%ldMB\n", - system->my_rank, bond_cap * MAX_BONDS * 3, - bond_cap * MAX_BONDS * 3 * sizeof(dbond_data) / (1024 * 1024) ); + system->my_rank, bond_cap * MAX_BONDS * 3, + bond_cap * MAX_BONDS * 3 * sizeof(dbond_data) / (1024 * 1024) ); #endif sfree( hb_top, "hb_top" ); @@ -686,22 +758,26 @@ int Init_Lists( reax_system *system, control_params *control, return SUCCESS; } + + #elif defined(LAMMPS_REAX) int Init_Lists( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, reax_list **lists, - mpi_datatypes *mpi_data, char *msg ) + simulation_data *data, storage *workspace, reax_list **lists, + mpi_datatypes *mpi_data, char *msg ) { - int i, num_nbrs; + int i, num_nbrs, matrix_dim; int total_hbonds, total_bonds, bond_cap, num_3body, cap_3body, Htop; int *hb_top, *bond_top; int nrecv[MAX_NBRS]; MPI_Comm comm; comm = mpi_data->world; + bond_top = (int*) calloc( system->total_cap, sizeof(int) ); hb_top = (int*) calloc( system->local_cap, sizeof(int) ); - Estimate_Storages( system, control, lists, - &Htop, hb_top, bond_top, &num_3body, comm ); + //TODO: add one paramater at the end for charge matrix format - half or full + Estimate_Storages( system, control, lists, &Htop, hb_top, + bond_top, &num_3body, comm, &matrix_dim ); if ( control->hbond_cut > 0 ) { @@ -715,15 +791,15 @@ int Init_Lists( reax_system *system, control_params *control, total_hbonds = (int)(MAX( total_hbonds * SAFER_ZONE, MIN_CAP * MIN_HBONDS )); if ( !Make_List( system->Hcap, total_hbonds, TYP_HBOND, - lists[HBONDS], comm ) ) + HALF_LIST, lists[HBONDS], comm ) ) { fprintf( stderr, "not enough space for hbonds list. terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated hbonds: total_hbonds=%d, space=%dMB\n", - system->my_rank, total_hbonds, - (int)(total_hbonds * sizeof(hbond_data) / (1024 * 1024)) ); + system->my_rank, total_hbonds, + (int)(total_hbonds * sizeof(hbond_data) / (1024 * 1024)) ); #endif } @@ -739,50 +815,50 @@ int Init_Lists( reax_system *system, control_params *control, bond_cap = (int)(MAX( total_bonds * SAFE_ZONE, MIN_CAP * MIN_BONDS )); if ( !Make_List( system->total_cap, bond_cap, TYP_BOND, - lists[BONDS], comm ) ) + HALF_LIST, lists[BONDS], comm ) ) { fprintf( stderr, "not enough space for bonds list. terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated bonds: total_bonds=%d, space=%dMB\n", - system->my_rank, bond_cap, - (int)(bond_cap * sizeof(bond_data) / (1024 * 1024)) ); + system->my_rank, bond_cap, + (int)(bond_cap * sizeof(bond_data) / (1024 * 1024)) ); #endif /* 3bodies list */ cap_3body = (int)(MAX( num_3body * SAFE_ZONE, MIN_3BODIES )); if ( !Make_List( bond_cap, cap_3body, TYP_THREE_BODY, - lists[THREE_BODIES], comm ) ) + HALF_LIST, lists[THREE_BODIES], comm ) ) { fprintf( stderr, "Problem in initializing angles list. Terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: allocated 3-body list: num_3body=%d, space=%dMB\n", - system->my_rank, cap_3body, - (int)(cap_3body * sizeof(three_body_interaction_data) / (1024 * 1024)) ); + system->my_rank, cap_3body, + (int)(cap_3body * sizeof(three_body_interaction_data) / (1024 * 1024)) ); #endif #if defined(TEST_FORCES) if ( !Make_List( system->total_cap, bond_cap * 8, TYP_DDELTA, - lists[DDELTAS], comm ) ) + HALF_LIST, lists[DDELTAS], comm ) ) { fprintf( stderr, "Problem in initializing dDelta list. Terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } fprintf( stderr, "p%d: allocated dDelta list: num_ddelta=%d space=%ldMB\n", - system->my_rank, bond_cap * 30, - bond_cap * 8 * sizeof(dDelta_data) / (1024 * 1024) ); + system->my_rank, bond_cap * 30, + bond_cap * 8 * sizeof(dDelta_data) / (1024 * 1024) ); - if ( !Make_List( bond_cap, bond_cap * 50, TYP_DBO, lists[DBOS], comm ) ) + if ( !Make_List( bond_cap, bond_cap * 50, TYP_DBO, HALF_LIST, lists[DBOS], comm ) ) { fprintf( stderr, "Problem in initializing dBO list. Terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } fprintf( stderr, "p%d: allocated dbond list: num_dbonds=%d space=%ldMB\n", - system->my_rank, bond_cap * MAX_BONDS * 3, - bond_cap * MAX_BONDS * 3 * sizeof(dbond_data) / (1024 * 1024) ); + system->my_rank, bond_cap * MAX_BONDS * 3, + bond_cap * MAX_BONDS * 3 * sizeof(dbond_data) / (1024 * 1024) ); #endif sfree( hb_top, "hb_top" ); @@ -796,9 +872,9 @@ int Init_Lists( reax_system *system, control_params *control, #if defined(PURE_REAX) void Initialize( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control, - mpi_datatypes *mpi_data ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + mpi_datatypes *mpi_data ) { char msg[MAX_STR]; @@ -806,9 +882,9 @@ void Initialize( reax_system *system, control_params *control, FAILURE ) { fprintf( stderr, "p%d: init_mpi_datatypes: could not create datatypes\n", - system->my_rank ); + system->my_rank ); fprintf( stderr, "p%d: mpi_data couldn't be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -819,7 +895,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: system could not be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -830,7 +906,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: sim_data couldn't be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -841,9 +917,9 @@ void Initialize( reax_system *system, control_params *control, FAILURE ) { fprintf( stderr, "p%d:init_workspace: not enough memory\n", - system->my_rank ); + system->my_rank ); fprintf( stderr, "p%d:workspace couldn't be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -855,7 +931,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: system could not be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -866,7 +942,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: could not open output files! terminating...\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -879,7 +955,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: couldn't create lookup table! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -894,14 +970,14 @@ void Initialize( reax_system *system, control_params *control, /*#ifdef TEST_FORCES Init_Force_Test_Functions(); fprintf(stderr,"p%d: initialized force test functions\n",system->my_rank); - #endif */ +#endif */ } #elif defined(LAMMPS_REAX) void Initialize( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control, - mpi_datatypes *mpi_data, MPI_Comm comm ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + mpi_datatypes *mpi_data, MPI_Comm comm ) { char msg[MAX_STR]; @@ -909,9 +985,9 @@ void Initialize( reax_system *system, control_params *control, if ( Init_MPI_Datatypes(system, workspace, mpi_data, comm, msg) == FAILURE ) { fprintf( stderr, "p%d: init_mpi_datatypes: could not create datatypes\n", - system->my_rank ); + system->my_rank ); fprintf( stderr, "p%d: mpi_data couldn't be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -922,7 +998,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: system could not be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -933,7 +1009,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: sim_data couldn't be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -944,9 +1020,9 @@ void Initialize( reax_system *system, control_params *control, FAILURE ) { fprintf( stderr, "p%d:init_workspace: not enough memory\n", - system->my_rank ); + system->my_rank ); fprintf( stderr, "p%d:workspace couldn't be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -958,7 +1034,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: system could not be initialized! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -969,7 +1045,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: could not open output files! terminating...\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -982,7 +1058,7 @@ void Initialize( reax_system *system, control_params *control, { fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); fprintf( stderr, "p%d: couldn't create lookup table! terminating.\n", - system->my_rank ); + system->my_rank ); MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); } #if defined(DEBUG) @@ -999,5 +1075,5 @@ void Initialize( reax_system *system, control_params *control, Init_Force_Test_Functions(); fprintf(stderr,"p%d: initialized force test functions\n",system->my_rank); #endif*/ - } +} #endif diff --git a/PuReMD/src/integrate.c b/PuReMD/src/integrate.c index 3672651c01b27cb6b96a77bf94624278d899901a..6b9d6b5ae11cda6c847f604d3a1f202927afbd6c 100644 --- a/PuReMD/src/integrate.c +++ b/PuReMD/src/integrate.c @@ -51,6 +51,22 @@ void Velocity_Verlet_NVE( reax_system* system, control_params* control, dt_sqr = SQR(dt); steps = data->step - data->prev_steps; renbr = (steps % control->reneighbor == 0); + if ( control->cm_solver_pre_comp_type == SAI_PC ) + { + /* HACK: currently required that preconditioner (re)computation step + * and reneighbor step (i.e., (re)construct far nbr list) + * are the same value, so use reneighbor for now */ +// if ( renbr ) +// { +// lists[FAR_NBRS]->format = FULL_LIST; +// workspace->H->format = SYM_FULL_MATRIX; +// } +// else +// { +// lists[FAR_NBRS]->format = HALF_LIST; +// workspace->H->format = SYM_HALF_MATRIX; +// } + } for ( i = 0; i < system->n; i++ ) { @@ -114,6 +130,22 @@ void Velocity_Verlet_Nose_Hoover_NVT_Klein( reax_system* system, therm = &( data->therm ); steps = data->step - data->prev_steps; renbr = (steps % control->reneighbor == 0); + if ( control->cm_solver_pre_comp_type == SAI_PC ) + { + /* HACK: currently required that preconditioner (re)computation step + * and reneighbor step (i.e., (re)construct far nbr list) + * are the same value, so use reneighbor for now */ +// if ( renbr ) +// { +// lists[FAR_NBRS]->format = FULL_LIST; +// workspace->H->format = SYM_FULL_MATRIX; +// } +// else +// { +// lists[FAR_NBRS]->format = HALF_LIST; +// workspace->H->format = SYM_HALF_MATRIX; +// } + } for ( i = 0; i < system->n; i++ ) { @@ -209,6 +241,22 @@ void Velocity_Verlet_Berendsen_NVT( reax_system* system, dt = control->dt; steps = data->step - data->prev_steps; renbr = (steps % control->reneighbor == 0); + if ( control->cm_solver_pre_comp_type == SAI_PC ) + { + /* HACK: currently required that preconditioner (re)computation step + * and reneighbor step (i.e., (re)construct far nbr list) + * are the same value, so use reneighbor for now */ +// if ( renbr ) +// { +// lists[FAR_NBRS]->format = FULL_LIST; +// workspace->H->format = SYM_FULL_MATRIX; +// } +// else +// { +// lists[FAR_NBRS]->format = HALF_LIST; +// workspace->H->format = SYM_HALF_MATRIX; +// } + } /* velocity verlet, 1st part */ for ( i = 0; i < system->n; i++ ) @@ -300,6 +348,22 @@ void Velocity_Verlet_Berendsen_NPT( reax_system* system, dt = control->dt; steps = data->step - data->prev_steps; renbr = (steps % control->reneighbor == 0); + if ( control->cm_solver_pre_comp_type == SAI_PC ) + { + /* HACK: currently required that preconditioner (re)computation step + * and reneighbor step (i.e., (re)construct far nbr list) + * are the same value, so use reneighbor for now */ +// if ( renbr ) +// { +// lists[FAR_NBRS]->format = FULL_LIST; +// workspace->H->format = SYM_FULL_MATRIX; +// } +// else +// { +// lists[FAR_NBRS]->format = HALF_LIST; +// workspace->H->format = SYM_HALF_MATRIX; +// } + } /* velocity verlet, 1st part */ for ( i = 0; i < system->n; i++ ) diff --git a/PuReMD/src/io_tools.c b/PuReMD/src/io_tools.c index 7f3c4c02bf04f0836169bd6048b6bd8f2b0a1846..1c878f6efa665cd7d79fbde4c87424dbf66645d7 100644 --- a/PuReMD/src/io_tools.c +++ b/PuReMD/src/io_tools.c @@ -44,8 +44,8 @@ print_interaction Print_Interactions[NUM_INTRS]; /************************ initialize output controls ************************/ int Init_Output_Files( reax_system *system, control_params *control, - output_controls *out_control, mpi_datatypes *mpi_data, - char *msg ) + output_controls *out_control, mpi_datatypes *mpi_data, + char *msg ) { char temp[MAX_STR]; int ret; @@ -64,65 +64,46 @@ int Init_Output_Files( reax_system *system, control_params *control, { /* init out file */ sprintf( temp, "%s.out", control->sim_name ); - if ( (out_control->out = fopen( temp, "w" )) != NULL ) - { + out_control->out = sfopen( temp, "w", "Init_Output_Files" ); #if !defined(DEBUG) && !defined(DEBUG_FOCUS) - fprintf( out_control->out, "%-6s%14s%14s%14s%11s%13s%13s\n", - "step", "total energy", "potential", "kinetic", - "T(K)", "V(A^3)", "P(Gpa)" ); + fprintf( out_control->out, "%-6s%14s%14s%14s%11s%13s%13s\n", + "step", "total energy", "potential", "kinetic", + "T(K)", "V(A^3)", "P(Gpa)" ); #else - fprintf( out_control->out, "%-6s%24s%24s%24s%13s%16s%13s\n", - "step", "total energy", "potential", "kinetic", - "T(K)", "V(A^3)", "P(GPa)" ); + fprintf( out_control->out, "%-6s%24s%24s%24s%13s%16s%13s\n", + "step", "total energy", "potential", "kinetic", + "T(K)", "V(A^3)", "P(GPa)" ); #endif - fflush( out_control->out ); - } - else - { - strcpy( msg, "init_out_controls: .out file could not be opened\n" ); - return FAILURE; - } + fflush( out_control->out ); /* init potentials file */ sprintf( temp, "%s.pot", control->sim_name ); - if ( (out_control->pot = fopen( temp, "w" )) != NULL ) - { + out_control->pot = sfopen( temp, "w", "Init_Output_Files" ); #if !defined(DEBUG) && !defined(DEBUG_FOCUS) - fprintf( out_control->pot, - "%-6s%14s%14s%14s%14s%14s%14s%14s%14s%14s%14s%14s\n", - "step", "ebond", "eatom", "elp", - "eang", "ecoa", "ehb", "etor", "econj", - "evdw", "ecoul", "epol" ); + fprintf( out_control->pot, + "%-6s%14s%14s%14s%14s%14s%14s%14s%14s%14s%14s%14s\n", + "step", "ebond", "eatom", "elp", + "eang", "ecoa", "ehb", "etor", "econj", + "evdw", "ecoul", "epol" ); #else - fprintf( out_control->pot, - "%-6s%24s%24s%24s%24s%24s%24s%24s%24s%24s%24s%24s\n", - "step", "ebond", "eatom", "elp", - "eang", "ecoa", "ehb", "etor", "econj", - "evdw", "ecoul", "epol" ); + fprintf( out_control->pot, + "%-6s%24s%24s%24s%24s%24s%24s%24s%24s%24s%24s%24s\n", + "step", "ebond", "eatom", "elp", + "eang", "ecoa", "ehb", "etor", "econj", + "evdw", "ecoul", "epol" ); #endif - fflush( out_control->pot ); - } - else - { - strcpy( msg, "init_out_controls: .pot file could not be opened\n" ); - return FAILURE; - } + fflush( out_control->pot ); /* init log file */ #if defined(LOG_PERFORMANCE) sprintf( temp, "%s.log", control->sim_name ); - if ( (out_control->log = fopen( temp, "w" )) != NULL ) - { - fprintf( out_control->log, "%6s%8s%8s%8s%8s%8s%8s%8s%8s\n", - "step", "total", "comm", "nbrs", "init", "bonded", "nonb", - "qeq", "matvecs" ); - fflush( out_control->log ); - } - else - { - strcpy( msg, "init_out_controls: .log file could not be opened\n" ); - return FAILURE; - } + out_control->log = sfopen( temp, "w", "Init_Output_Files" ); + fprintf( out_control->log, "%-6s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s\n", + "step", "total", "comm", "neighbors", "init", + "init_dist", "init_cm", "init_bond", "bonded", "nonbonded", + "cm", "cm_sort", "s_iters", "pre_comp", "pre_app", "s_comm", "s_allr", + "s_spmv", "s_vec_ops", "s_orthog", "s_tsolve" ); + fflush( out_control->log ); #endif } @@ -132,52 +113,31 @@ int Init_Output_Files( reax_system *system, control_params *control, control->ensemble == sNPT ) { sprintf( temp, "%s.prs", control->sim_name ); - if ( (out_control->prs = fopen( temp, "w" )) != NULL ) - { - fprintf(out_control->prs, "%8s%13s%13s%13s%13s%13s%13s%13s\n", - "step", "Pint/norm[x]", "Pint/norm[y]", "Pint/norm[z]", - "Pext/Ptot[x]", "Pext/Ptot[y]", "Pext/Ptot[z]", "Pkin/V" ); - fflush( out_control->prs ); - } - else - { - strcpy(msg, "init_out_controls: .prs file couldn't be opened\n"); - return FAILURE; - } + out_control->prs = sfopen( temp, "w", "Init_Output_Files" ); + fprintf(out_control->prs, "%8s%13s%13s%13s%13s%13s%13s%13s\n", + "step", "Pint/norm[x]", "Pint/norm[y]", "Pint/norm[z]", + "Pext/Ptot[x]", "Pext/Ptot[y]", "Pext/Ptot[z]", "Pkin/V" ); + fflush( out_control->prs ); } /* init electric dipole moment analysis file */ if ( control->dipole_anal ) { sprintf( temp, "%s.dpl", control->sim_name ); - if ( (out_control->dpl = fopen( temp, "w" )) != NULL ) - { - fprintf( out_control->dpl, "%6s%20s%30s", - "step", "molecule count", "avg dipole moment norm" ); - fflush( out_control->dpl ); - } - else - { - strcpy(msg, "init_out_controls: .dpl file couldn't be opened\n"); - return FAILURE; - } + out_control->dpl = sfopen( temp, "w", "Init_Output_Files" ); + fprintf( out_control->dpl, "%6s%20s%30s", + "step", "molecule count", "avg dipole moment norm" ); + fflush( out_control->dpl ); } /* init diffusion coef analysis file */ if ( control->diffusion_coef ) { sprintf( temp, "%s.drft", control->sim_name ); - if ( (out_control->drft = fopen( temp, "w" )) != NULL ) - { - fprintf( out_control->drft, "%7s%20s%20s\n", - "step", "type count", "avg disp^2" ); - fflush( out_control->drft ); - } - else - { - strcpy(msg, "init_out_controls: .drft file couldn't be opened\n"); - return FAILURE; - } + out_control->drft = sfopen( temp, "w", "Init_Output_Files" ); + fprintf( out_control->drft, "%7s%20s%20s\n", + "step", "type count", "avg disp^2" ); + fflush( out_control->drft ); } } @@ -188,11 +148,9 @@ int Init_Output_Files( reax_system *system, control_params *control, fashion controlled by their rank */ /*if( control->molecular_analysis ) { if( system->my_rank == MASTER_NODE ) { - sprintf( temp, "%s.mol", control->sim_name ); - if( (out_control->mol = fopen( temp, "w" )) == NULL ) { - strcpy(msg,"init_out_controls: .mol file could not be opened\n"); - return FAILURE; - } + sprintf( temp, "%s.mol", control->sim_name ); + out_control->mol = sfopen( temp, "w", "Init_Output_Files" ); + } } MPI_Bcast( &(out_control->mol), 1, MPI_LONG, 0, MPI_COMM_WORLD ); @@ -202,233 +160,121 @@ int Init_Output_Files( reax_system *system, control_params *control, #ifdef TEST_ENERGY /* open bond energy file */ sprintf( temp, "%s.ebond.%d", control->sim_name, system->my_rank ); - if ( (out_control->ebond = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .ebond file couldn't be opened\n"); - return FAILURE; - } + out_control->ebond = sfopen( temp, "w", "Init_Output_Files" ); /* open lone-pair energy file */ sprintf( temp, "%s.elp.%d", control->sim_name, system->my_rank ); - if ( (out_control->elp = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .elp file couldn't be opened\n"); - return FAILURE; - } + out_control->elp = sfopen( temp, "w", "Init_Output_Files" ); /* open overcoordination energy file */ sprintf( temp, "%s.eov.%d", control->sim_name, system->my_rank ); - if ( (out_control->eov = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .eov file couldn't be opened\n"); - return FAILURE; - } + out_control->eov = sfopen( temp, "w", "Init_Output_Files" ); /* open undercoordination energy file */ sprintf( temp, "%s.eun.%d", control->sim_name, system->my_rank ); - if ( (out_control->eun = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .eun file couldn't be opened\n"); - return FAILURE; - } + out_control->eun = sfopen( temp, "w", "Init_Output_Files" ); /* open angle energy file */ sprintf( temp, "%s.eval.%d", control->sim_name, system->my_rank ); - if ( (out_control->eval = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .eval file couldn't be opened\n"); - return FAILURE; - } + out_control->eval = sfopen( temp, "w", "Init_Output_Files" ); /* open coalition energy file */ sprintf( temp, "%s.ecoa.%d", control->sim_name, system->my_rank ); - if ( (out_control->ecoa = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .ecoa file couldn't be opened\n"); - return FAILURE; - } + out_control->ecoa = sfopen( temp, "w", "Init_Output_Files" ); /* open penalty energy file */ sprintf( temp, "%s.epen.%d", control->sim_name, system->my_rank ); - if ( (out_control->epen = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .epen file couldn't be opened\n"); - return FAILURE; - } + out_control->epen = sfopen( temp, "w", "Init_Output_Files" ); /* open torsion energy file */ sprintf( temp, "%s.etor.%d", control->sim_name, system->my_rank ); - if ( (out_control->etor = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .etor file couldn't be opened\n"); - return FAILURE; - } + out_control->etor = sfopen( temp, "w", "Init_Output_Files" ); /* open conjugation energy file */ sprintf( temp, "%s.econ.%d", control->sim_name, system->my_rank ); - if ( (out_control->econ = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .econ file couldn't be opened\n"); - return FAILURE; - } + out_control->econ = sfopen( temp, "w", "Init_Output_Files" ); /* open hydrogen bond energy file */ sprintf( temp, "%s.ehb.%d", control->sim_name, system->my_rank ); - if ( (out_control->ehb = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .ehb file couldn't be opened\n"); - return FAILURE; - } + out_control->ehb = sfopen( temp, "w", "Init_Output_Files" ); /* open vdWaals energy file */ sprintf( temp, "%s.evdw.%d", control->sim_name, system->my_rank ); - if ( (out_control->evdw = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .evdw file couldn't be opened\n"); - return FAILURE; - } + out_control->evdw = sfopen( temp, "w", "Init_Output_Files" ); /* open coulomb energy file */ sprintf( temp, "%s.ecou.%d", control->sim_name, system->my_rank ); - if ( (out_control->ecou = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .ecou file couldn't be opened\n"); - return FAILURE; - } + out_control->ecou = sfopen( temp, "w", "Init_Output_Files" ); #endif #ifdef TEST_FORCES /* open bond orders file */ sprintf( temp, "%s.fbo.%d", control->sim_name, system->my_rank ); - if ( (out_control->fbo = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .fbo file couldn't be opened\n"); - return FAILURE; - } + out_control->fbo = sfopen( temp, "w", "Init_Output_Files" ); /* open bond orders derivatives file */ sprintf( temp, "%s.fdbo.%d", control->sim_name, system->my_rank ); - if ( (out_control->fdbo = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .fdbo file couldn't be opened\n"); - return FAILURE; - } + out_control->fdbo = sfopen( temp, "w", "Init_Output_Files" ); /* produce a single force file - to be written by p0 */ if ( system->my_rank == MASTER_NODE ) { /* open bond forces file */ sprintf( temp, "%s.fbond", control->sim_name ); - if ( (out_control->fbond = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .fbond file couldn't be opened\n"); - return FAILURE; - } + out_control->fbond = sfopen( temp, "w", "Init_Output_Files" ); /* open lone-pair forces file */ sprintf( temp, "%s.flp", control->sim_name ); - if ( (out_control->flp = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .flp file couldn't be opened\n"); - return FAILURE; - } + out_control->flp = sfopen( temp, "w", "Init_Output_Files" ); /* open overcoordination forces file */ sprintf( temp, "%s.fov", control->sim_name ); - if ( (out_control->fov = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .fov file couldn't be opened\n"); - return FAILURE; - } + out_control->fov = sfopen( temp, "w", "Init_Output_Files" ); /* open undercoordination forces file */ sprintf( temp, "%s.fun", control->sim_name ); - if ( (out_control->fun = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .fun file couldn't be opened\n"); - return FAILURE; - } + out_control->fun = sfopen( temp, "w", "Init_Output_Files" ); /* open angle forces file */ sprintf( temp, "%s.fang", control->sim_name ); - if ( (out_control->fang = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .fang file couldn't be opened\n"); - return FAILURE; - } + out_control->fang = sfopen( temp, "w", "Init_Output_Files" ); /* open coalition forces file */ sprintf( temp, "%s.fcoa", control->sim_name ); - if ( (out_control->fcoa = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .fcoa file couldn't be opened\n"); - return FAILURE; - } + out_control->fcoa = sfopen( temp, "w", "Init_Output_Files" ); /* open penalty forces file */ sprintf( temp, "%s.fpen", control->sim_name ); - if ( (out_control->fpen = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .fpen file couldn't be opened\n"); - return FAILURE; - } + out_control->fpen = sfopen( temp, "w", "Init_Output_Files" ); /* open torsion forces file */ sprintf( temp, "%s.ftor", control->sim_name ); - if ( (out_control->ftor = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .ftor file couldn't be opened\n"); - return FAILURE; - } + out_control->ftor = sfopen( temp, "w", "Init_Output_Files" ); /* open conjugation forces file */ sprintf( temp, "%s.fcon", control->sim_name ); - if ( (out_control->fcon = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .fcon file couldn't be opened\n"); - return FAILURE; - } + out_control->fcon = sfopen( temp, "w", "Init_Output_Files" ); /* open hydrogen bond forces file */ sprintf( temp, "%s.fhb", control->sim_name ); - if ( (out_control->fhb = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .fhb file couldn't be opened\n"); - return FAILURE; - } + out_control->fhb = sfopen( temp, "w", "Init_Output_Files" ); /* open vdw forces file */ sprintf( temp, "%s.fvdw", control->sim_name ); - if ( (out_control->fvdw = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .fvdw file couldn't be opened\n"); - return FAILURE; - } + out_control->fvdw = sfopen( temp, "w", "Init_Output_Files" ); /* open nonbonded forces file */ sprintf( temp, "%s.fele", control->sim_name ); - if ( (out_control->fele = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .fele file couldn't be opened\n"); - return FAILURE; - } + out_control->fele = sfopen( temp, "w", "Init_Output_Files" ); /* open total force file */ sprintf( temp, "%s.ftot", control->sim_name ); - if ( (out_control->ftot = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .ftot file couldn't be opened\n"); - return FAILURE; - } + out_control->ftot = sfopen( temp, "w", "Init_Output_Files" ); /* open force comprison file */ sprintf( temp, "%s.fcomp", control->sim_name ); - if ( (out_control->fcomp = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .fcomp file couldn't be opened\n"); - return FAILURE; - } + out_control->fcomp = sfopen( temp, "w", "Init_Output_Files" ); } #endif @@ -436,27 +282,15 @@ int Init_Output_Files( reax_system *system, control_params *control, #if defined(TEST_FORCES) || defined(TEST_ENERGY) /* open far neighbor list file */ sprintf( temp, "%s.far_nbrs_list.%d", control->sim_name, system->my_rank ); - if ( (out_control->flist = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .far_nbrs_list file couldn't be opened\n"); - return FAILURE; - } + out_control->flist = sfopen( temp, "w", "Init_Output_Files" ); /* open bond list file */ sprintf( temp, "%s.bond_list.%d", control->sim_name, system->my_rank ); - if ( (out_control->blist = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .bond_list file couldn't be opened\n"); - return FAILURE; - } + out_control->blist = sfopen( temp, "w", "Init_Output_Files" ); /* open near neighbor list file */ sprintf( temp, "%s.near_nbrs_list.%d", control->sim_name, system->my_rank ); - if ( (out_control->nlist = fopen( temp, "w" )) == NULL ) - { - strcpy(msg, "Init_Out_Files: .near_nbrs_list file couldn't be opened\n"); - return FAILURE; - } + out_control->nlist = sfopen( temp, "w", "Init_Output_Files" ); #endif #endif @@ -466,7 +300,7 @@ int Init_Output_Files( reax_system *system, control_params *control, /************************ close output files ************************/ int Close_Output_Files( reax_system *system, control_params *control, - output_controls *out_control, mpi_datatypes *mpi_data ) + output_controls *out_control, mpi_datatypes *mpi_data ) { if ( out_control->write_steps > 0 ) End_Traj( system->my_rank, out_control ); @@ -475,65 +309,68 @@ int Close_Output_Files( reax_system *system, control_params *control, { if ( out_control->energy_update_freq > 0 ) { - fclose( out_control->out ); - fclose( out_control->pot ); + sfclose( out_control->out, "Close_Output_Files" ); + sfclose( out_control->pot, "Close_Output_Files" ); #if defined(LOG_PERFORMANCE) - fclose( out_control->log ); + sfclose( out_control->log, "Close_Output_Files" ); #endif } if ( control->ensemble == NPT || control->ensemble == iNPT || control->ensemble == sNPT ) - fclose( out_control->prs ); + sfclose( out_control->prs, "Close_Output_Files" ); - if ( control->dipole_anal ) fclose( out_control->dpl ); - if ( control->diffusion_coef ) fclose( out_control->drft ); - if ( control->molecular_analysis ) fclose( out_control->mol ); + if ( control->dipole_anal ) + sfclose( out_control->dpl, "Close_Output_Files" ); + if ( control->diffusion_coef ) + sfclose( out_control->drft, "Close_Output_Files" ); + if ( control->molecular_analysis ) + sfclose( out_control->mol, "Close_Output_Files" ); } #ifdef TEST_ENERGY - fclose( out_control->ebond ); - fclose( out_control->elp ); - fclose( out_control->eov ); - fclose( out_control->eun ); - fclose( out_control->eval ); - fclose( out_control->epen ); - fclose( out_control->ecoa ); - fclose( out_control->ehb ); - fclose( out_control->etor ); - fclose( out_control->econ ); - fclose( out_control->evdw ); - fclose( out_control->ecou ); + sfclose( out_control->ebond, "Close_Output_Files" ); + sfclose( out_control->elp, "Close_Output_Files" ); + sfclose( out_control->eov, "Close_Output_Files" ); + sfclose( out_control->eun, "Close_Output_Files" ); + sfclose( out_control->eval, "Close_Output_Files" ); + sfclose( out_control->epen, "Close_Output_Files" ); + sfclose( out_control->ecoa, "Close_Output_Files" ); + sfclose( out_control->ehb, "Close_Output_Files" ); + sfclose( out_control->etor, "Close_Output_Files" ); + sfclose( out_control->econ, "Close_Output_Files" ); + sfclose( out_control->evdw, "Close_Output_Files" ); + sfclose( out_control->ecou, "Close_Output_Files" ); #endif #ifdef TEST_FORCES - fclose( out_control->fbo ); - fclose( out_control->fdbo ); + sfclose( out_control->fbo, "Close_Output_Files" ); + sfclose( out_control->fdbo, "Close_Output_Files" ); if ( system->my_rank == MASTER_NODE ) { - fclose( out_control->fbond ); - fclose( out_control->flp ); - fclose( out_control->fov ); - fclose( out_control->fun ); - fclose( out_control->fang ); - fclose( out_control->fcoa ); - fclose( out_control->fpen ); - fclose( out_control->ftor ); - fclose( out_control->fcon ); - fclose( out_control->fhb ); - fclose( out_control->fvdw ); - fclose( out_control->fele ); - fclose( out_control->ftot ); - fclose( out_control->fcomp ); + sfclose( out_control->fbond, "Close_Output_Files" ); + sfclose( out_control->flp, "Close_Output_Files" ); + sfclose( out_control->fov, "Close_Output_Files" ); + sfclose( out_control->fun, "Close_Output_Files" ); + sfclose( out_control->fang, "Close_Output_Files" ); + sfclose( out_control->fcoa, "Close_Output_Files" ); + sfclose( out_control->fpen, "Close_Output_Files" ); + sfclose( out_control->ftor, "Close_Output_Files" ); + sfclose( out_control->fcon, "Close_Output_Files" ); + sfclose( out_control->fhb, "Close_Output_Files" ); + sfclose( out_control->fvdw, "Close_Output_Files" ); + sfclose( out_control->fele, "Close_Output_Files" ); + sfclose( out_control->ftot, "Close_Output_Files" ); + sfclose( out_control->fcomp, "Close_Output_Files" ); } #endif #if defined(PURE_REAX) #if defined(TEST_FORCES) || defined(TEST_ENERGY) - fclose( out_control->flist ); - fclose( out_control->blist ); - fclose( out_control->nlist ); + sfclose( out_control->flist, "Close_Output_Files" ); + sfclose( out_control->blist, "Close_Output_Files" ); + sfclose( out_control->nlist, "Close_Output_Files" ); #endif #endif @@ -548,11 +385,11 @@ void Print_Box( simulation_box* box, char *name, FILE *out ) fprintf( out, "%s:\n", name ); fprintf( out, "\tmin[%8.3f %8.3f %8.3f]\n", - box->min[0], box->min[1], box->min[2] ); + box->min[0], box->min[1], box->min[2] ); fprintf( out, "\tmax[%8.3f %8.3f %8.3f]\n", - box->max[0], box->max[1], box->max[2] ); + box->max[0], box->max[1], box->max[2] ); fprintf( out, "\tdims[%8.3f%8.3f%8.3f]\n", - box->box_norms[0], box->box_norms[1], box->box_norms[2] ); + box->box_norms[0], box->box_norms[1], box->box_norms[2] ); // fprintf( out, "box: {" ); // for( i = 0; i < 3; ++i ) @@ -598,34 +435,34 @@ void Print_Grid( grid* g, FILE *out ) }; fprintf( out, "\tnumber of grid cells: %d %d %d\n", - g->ncells[0], g->ncells[1], g->ncells[2] ); + g->ncells[0], g->ncells[1], g->ncells[2] ); fprintf( out, "\tgcell lengths: %8.3f %8.3f %8.3f\n", - g->cell_len[0], g->cell_len[1], g->cell_len[2] ); + g->cell_len[0], g->cell_len[1], g->cell_len[2] ); fprintf( out, "\tinverses of gcell lengths: %8.3f %8.3f %8.3f\n", - g->inv_len[0], g->inv_len[1], g->inv_len[2] ); + g->inv_len[0], g->inv_len[1], g->inv_len[2] ); fprintf( out, "\t---------------------------------\n" ); fprintf( out, "\tnumber of native gcells: %d %d %d\n", - g->native_cells[0], g->native_cells[1], g->native_cells[2] ); + g->native_cells[0], g->native_cells[1], g->native_cells[2] ); fprintf( out, "\tnative gcell span: %d-%d %d-%d %d-%d\n", - g->native_str[0], g->native_end[0], - g->native_str[1], g->native_end[1], - g->native_str[2], g->native_end[2] ); + g->native_str[0], g->native_end[0], + g->native_str[1], g->native_end[1], + g->native_str[2], g->native_end[2] ); fprintf( out, "\t---------------------------------\n" ); fprintf( out, "\tvlist gcell stretch: %d %d %d\n", - g->vlist_span[0], g->vlist_span[1], g->vlist_span[2] ); + g->vlist_span[0], g->vlist_span[1], g->vlist_span[2] ); fprintf( out, "\tnonbonded nbrs gcell stretch: %d %d %d\n", - g->nonb_span[0], g->nonb_span[1], g->nonb_span[2] ); + g->nonb_span[0], g->nonb_span[1], g->nonb_span[2] ); fprintf( out, "\tbonded nbrs gcell stretch: %d %d %d\n", - g->bond_span[0], g->bond_span[1], g->bond_span[2] ); + g->bond_span[0], g->bond_span[1], g->bond_span[2] ); fprintf( out, "\t---------------------------------\n" ); fprintf( out, "\tghost gcell span: %d %d %d\n", - g->ghost_span[0], g->ghost_span[1], g->ghost_span[2] ); + g->ghost_span[0], g->ghost_span[1], g->ghost_span[2] ); fprintf( out, "\tnonbonded ghost gcell span: %d %d %d\n", - g->ghost_nonb_span[0], g->ghost_nonb_span[1], g->ghost_nonb_span[2]); + g->ghost_nonb_span[0], g->ghost_nonb_span[1], g->ghost_nonb_span[2]); fprintf(out, "\thbonded ghost gcell span: %d %d %d\n", g->ghost_hbond_span[0], g->ghost_hbond_span[1], g->ghost_hbond_span[2]); fprintf( out, "\tbonded ghost gcell span: %d %d %d\n", - g->ghost_bond_span[0], g->ghost_bond_span[1], g->ghost_bond_span[2]); + g->ghost_bond_span[0], g->ghost_bond_span[1], g->ghost_bond_span[2]); //fprintf(out, "\t---------------------------------\n" ); //fprintf(out, "\tmax number of gcells at the boundary: %d\n", g->gcell_cap); fprintf( out, "\t---------------------------------\n" ); @@ -641,17 +478,17 @@ void Print_Grid( grid* g, FILE *out ) if ( g->cells[x][y][z].type != gc_type ) { fprintf( stderr, - "\tgcells from(%2d %2d %2d) to (%2d %2d %2d): %d - %s\n", - gc_str[0], gc_str[1], gc_str[2], x, y, z, - gc_type, gcell_type_text[gc_type] ); + "\tgcells from(%2d %2d %2d) to (%2d %2d %2d): %d - %s\n", + gc_str[0], gc_str[1], gc_str[2], x, y, z, + gc_type, gcell_type_text[gc_type] ); gc_type = g->cells[x][y][z].type; gc_str[0] = x; gc_str[1] = y; gc_str[2] = z; } fprintf( stderr, "\tgcells from(%2d %2d %2d) to (%2d %2d %2d): %d - %s\n", - gc_str[0], gc_str[1], gc_str[2], x, y, z, - gc_type, gcell_type_text[gc_type] ); + gc_str[0], gc_str[1], gc_str[2], x, y, z, + gc_type, gcell_type_text[gc_type] ); fprintf( out, "-------------------------------------\n" ); } @@ -667,7 +504,7 @@ void Print_GCell_Exchange_Bounds( int my_rank, neighbor_proc *my_nbrs ) char exch[3][10] = { "NONE", "NEAR_EXCH", "FULL_EXCH" }; sprintf( fname, "gcell_exchange_bounds%d", my_rank ); - f = fopen( fname, "w" ); + f = sfopen( fname, "w", "Print_GCell_Exchange_Bounds" ); /* loop over neighbor processes */ for ( r[0] = -1; r[0] <= 1; ++r[0]) @@ -678,24 +515,24 @@ void Print_GCell_Exchange_Bounds( int my_rank, neighbor_proc *my_nbrs ) nbr_pr = &(my_nbrs[nbr]); fprintf( f, "p%-2d GCELL BOUNDARIES with r(%2d %2d %2d):\n", - my_rank, r[0], r[1], r[2] ); + my_rank, r[0], r[1], r[2] ); fprintf( f, "\tsend_type %s: send(%d %d %d) to (%d %d %d)\n", - exch[nbr_pr->send_type], - nbr_pr->str_send[0], nbr_pr->str_send[1], - nbr_pr->str_send[2], - nbr_pr->end_send[0], nbr_pr->end_send[1], - nbr_pr->end_send[2] ); + exch[nbr_pr->send_type], + nbr_pr->str_send[0], nbr_pr->str_send[1], + nbr_pr->str_send[2], + nbr_pr->end_send[0], nbr_pr->end_send[1], + nbr_pr->end_send[2] ); fprintf( f, "\trecv_type %s: recv(%d %d %d) to (%d %d %d)\n", - exch[nbr_pr->recv_type], - nbr_pr->str_recv[0], nbr_pr->str_recv[1], - nbr_pr->str_recv[2], - nbr_pr->end_recv[0], nbr_pr->end_recv[1], - nbr_pr->end_recv[2] ); + exch[nbr_pr->recv_type], + nbr_pr->str_recv[0], nbr_pr->str_recv[1], + nbr_pr->str_recv[2], + nbr_pr->end_recv[0], nbr_pr->end_recv[1], + nbr_pr->end_recv[2] ); } - fclose(f); + sfclose( f, "Print_GCell_Exchange_Bounds" ); } @@ -714,7 +551,7 @@ void Print_Native_GCells( reax_system *system ) }; sprintf( fname, "native_gcells.%d", system->my_rank ); - f = fopen( fname, "w" ); + f = sfopen( fname, "w", "Print_Native_GCells" ); g = &(system->my_grid); for ( i = g->native_str[0]; i < g->native_end[0]; i++ ) @@ -724,8 +561,8 @@ void Print_Native_GCells( reax_system *system ) gc = &( g->cells[i][j][k] ); fprintf( f, "p%d gcell(%2d %2d %2d) of type %d(%s)\n", - system->my_rank, i, j, k, - gc->type, gcell_type_text[gc->type] ); + system->my_rank, i, j, k, + gc->type, gcell_type_text[gc->type] ); fprintf( f, "\tatom list start: %d, end: %d\n\t", gc->str, gc->end ); @@ -734,7 +571,7 @@ void Print_Native_GCells( reax_system *system ) fprintf( f, "\n" ); } - fclose(f); + sfclose( f, "Print_Native_GCells" ); } @@ -753,7 +590,7 @@ void Print_All_GCells( reax_system *system ) }; sprintf( fname, "all_gcells.%d", system->my_rank ); - f = fopen( fname, "w" ); + f = sfopen( fname, "w", "Print_All_GCells" ); g = &(system->my_grid); for ( i = 0; i < g->ncells[0]; i++ ) @@ -763,8 +600,8 @@ void Print_All_GCells( reax_system *system ) gc = &( g->cells[i][j][k] ); fprintf( f, "p%d gcell(%2d %2d %2d) of type %d(%s)\n", - system->my_rank, i, j, k, - gc->type, gcell_type_text[gc->type] ); + system->my_rank, i, j, k, + gc->type, gcell_type_text[gc->type] ); fprintf( f, "\tatom list start: %d, end: %d\n\t", gc->str, gc->end ); @@ -773,7 +610,7 @@ void Print_All_GCells( reax_system *system ) fprintf( f, "\n" ); } - fclose(f); + sfclose( f, "Print_All_GCells" ); } @@ -785,24 +622,20 @@ void Print_My_Atoms( reax_system *system, control_params *control, int step ) FILE *fh; sprintf( fname, "%s.my_atoms.%d.%d", control->sim_name, step, system->my_rank ); - if ( (fh = fopen( fname, "w" )) == NULL ) - { - fprintf( stderr, "error in opening my_atoms file" ); - MPI_Abort( MPI_COMM_WORLD, FILE_NOT_FOUND ); - } + fh = sfopen( fname, "w", "Print_My_Atoms" ); // fprintf( stderr, "p%d had %d atoms\n", // system->my_rank, system->n ); for ( i = 0; i < system->n; ++i ) fprintf( fh, "p%-2d %-5d %2d %24.15e%24.15e%24.15e\n", - system->my_rank, - system->my_atoms[i].orig_id, system->my_atoms[i].type, - system->my_atoms[i].x[0], - system->my_atoms[i].x[1], - system->my_atoms[i].x[2] ); + system->my_rank, + system->my_atoms[i].orig_id, system->my_atoms[i].type, + system->my_atoms[i].x[0], + system->my_atoms[i].x[1], + system->my_atoms[i].x[2] ); - fclose( fh ); + sfclose( fh, "Print_My_Atoms" ); } @@ -813,29 +646,25 @@ void Print_My_Ext_Atoms( reax_system *system ) FILE *fh; sprintf( fname, "my_ext_atoms.%d", system->my_rank ); - if ( (fh = fopen( fname, "w" )) == NULL ) - { - fprintf( stderr, "error in opening my_ext_atoms file" ); - MPI_Abort( MPI_COMM_WORLD, FILE_NOT_FOUND ); - } + fh = sfopen( fname, "w", "Print_My_Ext_Atoms" ); // fprintf( stderr, "p%d had %d atoms\n", // system->my_rank, system->n ); for ( i = 0; i < system->N; ++i ) fprintf( fh, "p%-2d %-5d imprt%-5d %2d %24.15e%24.15e%24.15e\n", - system->my_rank, system->my_atoms[i].orig_id, - system->my_atoms[i].imprt_id, system->my_atoms[i].type, - system->my_atoms[i].x[0], - system->my_atoms[i].x[1], - system->my_atoms[i].x[2] ); + system->my_rank, system->my_atoms[i].orig_id, + system->my_atoms[i].imprt_id, system->my_atoms[i].type, + system->my_atoms[i].x[0], + system->my_atoms[i].x[1], + system->my_atoms[i].x[2] ); - fclose( fh ); + sfclose( fh, "Print_My_Ext_Atoms" ); } void Print_Far_Neighbors( reax_system *system, reax_list **lists, - control_params *control ) + control_params *control ) { char fname[100]; int i, j, id_i, id_j, nbr, natoms; @@ -843,7 +672,7 @@ void Print_Far_Neighbors( reax_system *system, reax_list **lists, reax_list *far_nbrs; sprintf( fname, "%s.far_nbrs.%d", control->sim_name, system->my_rank ); - fout = fopen( fname, "w" ); + fout = sfopen( fname, "w", "Print_Far_Neighbors" ); far_nbrs = lists[FAR_NBRS]; natoms = system->N; @@ -853,24 +682,24 @@ void Print_Far_Neighbors( reax_system *system, reax_list **lists, for ( j = Start_Index(i, far_nbrs); j < End_Index(i, far_nbrs); ++j ) { - nbr = far_nbrs->far_nbr_list[j].nbr; + nbr = far_nbrs->far_nbr_list.nbr[j]; id_j = system->my_atoms[nbr].orig_id; fprintf( fout, "%6d%6d%24.15e%24.15e%24.15e%24.15e\n", - id_i, id_j, far_nbrs->far_nbr_list[j].d, - far_nbrs->far_nbr_list[j].dvec[0], - far_nbrs->far_nbr_list[j].dvec[1], - far_nbrs->far_nbr_list[j].dvec[2] ); + id_i, id_j, far_nbrs->far_nbr_list.d[j], + far_nbrs->far_nbr_list.dvec[j][0], + far_nbrs->far_nbr_list.dvec[j][1], + far_nbrs->far_nbr_list.dvec[j][2] ); fprintf( fout, "%6d%6d%24.15e%24.15e%24.15e%24.15e\n", - id_j, id_i, far_nbrs->far_nbr_list[j].d, - -far_nbrs->far_nbr_list[j].dvec[0], - -far_nbrs->far_nbr_list[j].dvec[1], - -far_nbrs->far_nbr_list[j].dvec[2] ); + id_j, id_i, far_nbrs->far_nbr_list.d[j], + -far_nbrs->far_nbr_list.dvec[j][0], + -far_nbrs->far_nbr_list.dvec[j][1], + -far_nbrs->far_nbr_list.dvec[j][2] ); } } - fclose( fout ); + sfclose( fout, "Print_Far_Neighbors" ); } @@ -881,25 +710,29 @@ void Print_Sparse_Matrix( reax_system *system, sparse_matrix *A ) for ( i = 0; i < A->n; ++i ) for ( j = A->start[i]; j < A->end[i]; ++j ) fprintf( stderr, "%d %d %.15e\n", - system->my_atoms[i].orig_id, - system->my_atoms[A->entries[j].j].orig_id, - A->entries[j].val ); + system->my_atoms[i].orig_id, + system->my_atoms[A->entries[j].j].orig_id, + A->entries[j].val ); } void Print_Sparse_Matrix2( reax_system *system, sparse_matrix *A, char *fname ) { int i, j; - FILE *f = fopen( fname, "w" ); + FILE *f = sfopen( fname, "w", "Print_Sparse_Matrix2" ); - for ( i = 0; i < A->n; ++i ) - for ( j = A->start[i]; j < A->end[i]; ++j ) - fprintf( f, "%d %d %.15e\n", - system->my_atoms[i].orig_id, - system->my_atoms[A->entries[j].j].orig_id, - A->entries[j].val ); + if( system->my_rank == 0 ) + { + for ( i = 0; i < A->n; ++i ) + for ( j = A->start[i]; j < A->end[i]; ++j ) + fprintf( f, "%d %d %.15e\n", + system->my_atoms[i].orig_id, + system->my_atoms[A->entries[j].j].orig_id, + A->entries[j].val ); + + } - fclose(f); + sfclose( f, "Print_Sparse_Matrix2" ); } @@ -907,7 +740,7 @@ void Print_Symmetric_Sparse(reax_system *system, sparse_matrix *A, char *fname) { int i, j; reax_atom *ai, *aj; - FILE *f = fopen( fname, "w" ); + FILE *f = sfopen( fname, "w", "Print_Symmetric_Sparse" ); for ( i = 0; i < A->n; ++i ) { @@ -916,64 +749,75 @@ void Print_Symmetric_Sparse(reax_system *system, sparse_matrix *A, char *fname) { aj = &(system->my_atoms[A->entries[j].j]); fprintf( f, "%d %d %.15e\n", - ai->renumber, aj->renumber, A->entries[j].val ); + ai->renumber, aj->renumber, A->entries[j].val ); if ( A->entries[j].j < system->n && ai->renumber != aj->renumber ) fprintf( f, "%d %d %.15e\n", - aj->renumber, ai->renumber, A->entries[j].val ); + aj->renumber, ai->renumber, A->entries[j].val ); } } - fclose(f); + sfclose( f, "Print_Symmetric_Sparse" ); } void Print_Linear_System( reax_system *system, control_params *control, - storage *workspace, int step ) + storage *workspace, int step ) { - int i, j; - char fname[100]; - reax_atom *ai, *aj; - sparse_matrix *H; + int i; +// int j; + char fname[100]; + reax_atom *ai; +// reax_atom *aj; +// sparse_matrix *H; FILE *out; // print rhs and init guesses for QEq sprintf( fname, "%s.p%dstate%d", control->sim_name, system->my_rank, step ); - out = fopen( fname, "w" ); + out = sfopen( fname, "w", "Print_Linear_System" ); for ( i = 0; i < system->n; i++ ) { ai = &(system->my_atoms[i]); fprintf( out, "%6d%2d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n", - ai->renumber, ai->type, ai->x[0], ai->x[1], ai->x[2], - workspace->s[i], workspace->b_s[i], - workspace->t[i], workspace->b_t[i] ); + ai->renumber, ai->type, ai->x[0], ai->x[1], ai->x[2], + workspace->s[i], workspace->b_s[i], + workspace->t[i], workspace->b_t[i] ); } - fclose( out ); + sfclose( out, "Print_Linear_System" ); // print QEq coef matrix sprintf( fname, "%s.p%dH%d", control->sim_name, system->my_rank, step ); Print_Symmetric_Sparse( system, workspace->H, fname ); // print the incomplete H matrix - /*sprintf( fname, "%s.p%dHinc%d", control->sim_name, system->my_rank, step ); - out = fopen( fname, "w" ); - H = workspace->H; - for( i = 0; i < H->n; ++i ) { - ai = &(system->my_atoms[i]); - for( j = H->start[i]; j < H->end[i]; ++j ) - if( H->entries[j].j < system->n ) { - aj = &(system->my_atoms[H->entries[j].j]); - fprintf( out, "%d %d %.15e\n", - ai->orig_id, aj->orig_id, H->entries[j].val ); - if( ai->orig_id != aj->orig_id ) - fprintf( out, "%d %d %.15e\n", - aj->orig_id, ai->orig_id, H->entries[j].val ); - } - } - fclose( out );*/ +// sprintf( fname, "%s.p%dHinc%d", control->sim_name, system->my_rank, step ); +// out = sfopen( fname, "w", "Print_Linear_System" ); +// H = workspace->H; +// +// for( i = 0; i < H->n; ++i ) +// { +// ai = &(system->my_atoms[i]); +// +// for( j = H->start[i]; j < H->end[i]; ++j ) +// { +// if( H->entries[j].j < system->n ) { +// aj = &(system->my_atoms[H->entries[j].j]); +// +// fprintf( out, "%d %d %.15e\n", +// ai->orig_id, aj->orig_id, H->entries[j].val ); +// +// if( ai->orig_id != aj->orig_id ) +// { +// fprintf( out, "%d %d %.15e\n", +// aj->orig_id, ai->orig_id, H->entries[j].val ); +// } +// } +// } +// } +// sfclose( out, "Print_Linear_System" ); // print the L from incomplete cholesky decomposition - /*sprintf( fname, "%s.p%dL%d", control->sim_name, system->my_rank, step ); - Print_Sparse_Matrix2( system, workspace->L, fname );*/ +// sprintf( fname, "%s.p%dL%d", control->sim_name, system->my_rank, step ); +// Print_Sparse_Matrix2( system, workspace->L, fname ); } @@ -984,13 +828,13 @@ void Print_LinSys_Soln( reax_system *system, real *x, real *b_prm, real *b ) FILE *fout; sprintf( fname, "qeq.%d.out", system->my_rank ); - fout = fopen( fname, "w" ); + fout = sfopen( fname, "w", "Print_LinSys_Soln" ); for ( i = 0; i < system->n; ++i ) fprintf( fout, "%6d%10.4f%10.4f%10.4f\n", - system->my_atoms[i].orig_id, x[i], b_prm[i], b[i] ); + system->my_atoms[i].orig_id, x[i], b_prm[i], b[i] ); - fclose( fout ); + sfclose( fout, "Print_LinSys_Soln" ); } @@ -1001,16 +845,16 @@ void Print_Charges( reax_system *system ) FILE *fout; sprintf( fname, "q.%d.out", system->my_rank ); - fout = fopen( fname, "w" ); + fout = sfopen( fname, "w", "Print_Charges" ); for ( i = 0; i < system->n; ++i ) fprintf( fout, "%6d %10.7f %10.7f %10.7f\n", - system->my_atoms[i].orig_id, - system->my_atoms[i].s[0], - system->my_atoms[i].t[0], - system->my_atoms[i].q ); + system->my_atoms[i].orig_id, + system->my_atoms[i].s[0], + system->my_atoms[i].t[0], + system->my_atoms[i].q ); - fclose( fout ); + sfclose( fout, "Print_Charges" ); } @@ -1021,10 +865,11 @@ void Print_HBonds( reax_system *system, reax_list **lists, char fname[MAX_STR]; hbond_data *phbond; FILE *fout; + reax_list *far_nbrs = lists[FAR_NBRS]; reax_list *hbonds = lists[HBONDS]; sprintf( fname, "%s.hbonds.%d.%d", control->sim_name, step, system->my_rank ); - fout = fopen( fname, "w" ); + fout = sfopen( fname, "w", "Print_HBonds" ); for ( i = 0; i < system->numH; ++i ) { @@ -1033,16 +878,18 @@ void Print_HBonds( reax_system *system, reax_list **lists, phbond = &hbonds->hbond_list[pj]; fprintf( fout, "%8d%8d %24.15e %24.15e %24.15e\n", i, phbond->nbr, - phbond->ptr->dvec[0], phbond->ptr->dvec[1], phbond->ptr->dvec[2] ); -// fprintf( fout, "%8d%8d %8d %8d\n", i, phbond->nbr, -// phbond->scl, phbond->sym_index ); + far_nbrs->far_nbr_list.dvec[phbond->ptr][0], + far_nbrs->far_nbr_list.dvec[phbond->ptr][1], + far_nbrs->far_nbr_list.dvec[phbond->ptr][2] ); + // fprintf( fout, "%8d%8d %8d %8d\n", i, phbond->nbr, + // phbond->scl, phbond->sym_index ); } } - fclose( fout ); + sfclose( fout, "Print_HBonds" ); } - + void Print_HBond_Indices( reax_system *system, reax_list **lists, control_params *control, int step ) { @@ -1052,7 +899,7 @@ void Print_HBond_Indices( reax_system *system, reax_list **lists, reax_list *hbonds = lists[HBONDS]; sprintf( fname, "%s.hbonds_indices.%d.%d", control->sim_name, step, system->my_rank ); - fout = fopen( fname, "w" ); + fout = sfopen( fname, "w", "Print_HBond_Indices" ); for ( i = 0; i < system->N; ++i ) { @@ -1060,7 +907,7 @@ void Print_HBond_Indices( reax_system *system, reax_list **lists, i, Start_Index(i, hbonds), End_Index(i, hbonds) ); } - fclose( fout ); + sfclose( fout, "Print_HBond_Indices" ); } @@ -1075,7 +922,7 @@ void Print_Bonds( reax_system *system, reax_list **lists, reax_list *bonds = lists[BONDS]; sprintf( fname, "%s.bonds.%d.%d", control->sim_name, step, system->my_rank ); - fout = fopen( fname, "w" ); + fout = sfopen( fname, "w", "Print_Bonds" ); for ( i = 0; i < system->N; ++i ) { @@ -1083,16 +930,16 @@ void Print_Bonds( reax_system *system, reax_list **lists, { pbond = &bonds->bond_list[pj]; bo_ij = &pbond->bo_data; -// fprintf( fout, "%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e\n", -// system->my_atoms[i].orig_id, system->my_atoms[j].orig_id, -// pbond->d, bo_ij->BO, bo_ij->BO_s, bo_ij->BO_pi, bo_ij->BO_pi2 ); + // fprintf( fout, "%6d%6d%23.15e%23.15e%23.15e%23.15e%23.15e\n", + // system->my_atoms[i].orig_id, system->my_atoms[j].orig_id, + // pbond->d, bo_ij->BO, bo_ij->BO_s, bo_ij->BO_pi, bo_ij->BO_pi2 ); fprintf( fout, "%8d%8d %24.15f %24.15f\n", i, pbond->nbr, //system->my_atoms[i].orig_id, system->my_atoms[j].orig_id, pbond->d, bo_ij->BO ); } } - fclose( fout ); + sfclose( fout, "Print_Bonds" ); } @@ -1104,7 +951,7 @@ int fn_qsort_intcmp( const void *a, const void *b ) void Print_Bond_List2( reax_system *system, reax_list *bonds, char *fname ) { int i, j, id_i, id_j, nbr, pj; - FILE *f = fopen( fname, "w" ); + FILE *f = sfopen( fname, "w", "Print_Bond_List2" ); int temp[500]; int num = 0; @@ -1130,7 +977,7 @@ void Print_Bond_List2( reax_system *system, reax_list *bonds, char *fname ) void Print_Total_Force( reax_system *system, simulation_data *data, - storage *workspace ) + storage *workspace ) { int i; @@ -1139,9 +986,9 @@ void Print_Total_Force( reax_system *system, simulation_data *data, for ( i = 0; i < system->N; ++i ) fprintf( stderr, "%6d %f %f %f\n", - //"%6d%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - workspace->f[i][0], workspace->f[i][1], workspace->f[i][2] ); + //"%6d%24.15e%24.15e%24.15e\n", + system->my_atoms[i].orig_id, + workspace->f[i][0], workspace->f[i][1], workspace->f[i][2] ); } @@ -1155,7 +1002,7 @@ void Print_Far_Neighbors_List_Adj_Format( reax_system *system, FILE *fout; sprintf( fname, "%s.far.%d.%d", control->sim_name, step, system->my_rank ); - fout = fopen( fname, "w" ); + fout = sfopen( fname, "w", "Print_Far_Neighbors_Adj_Format" ); num_intrs = 0; intrs = NULL; @@ -1180,7 +1027,7 @@ void Print_Far_Neighbors_List_Adj_Format( reax_system *system, for ( pj = Start_Index(i, list); pj < End_Index(i, list); ++pj ) { - nbr = list->far_nbr_list[pj].nbr; + nbr = list->far_nbr_list.nbr[pj]; id_j = system->my_atoms[nbr].orig_id; intrs[cnt++] = id_j; } @@ -1202,19 +1049,19 @@ void Print_Far_Neighbors_List_Adj_Format( reax_system *system, free( intrs ); } - fclose( fout ); + sfclose( fout, "Print_Far_Neighbors_List_Adj_Format" ); } void Output_Results( reax_system *system, control_params *control, - simulation_data *data, reax_list **lists, - output_controls *out_control, mpi_datatypes *mpi_data ) + simulation_data *data, reax_list **lists, + output_controls *out_control, mpi_datatypes *mpi_data ) { #if defined(LOG_PERFORMANCE) real t_elapsed, denom; #endif if ((out_control->energy_update_freq > 0 && - data->step % out_control->energy_update_freq == 0) || + data->step % out_control->energy_update_freq == 0) || (out_control->write_steps > 0 && data->step % out_control->write_steps == 0)) { @@ -1228,74 +1075,111 @@ void Output_Results( reax_system *system, control_params *control, { #if !defined(DEBUG) && !defined(DEBUG_FOCUS) fprintf( out_control->out, - "%-6d%14.2f%14.2f%14.2f%11.2f%13.2f%13.5f\n", - data->step, data->sys_en.e_tot, data->sys_en.e_pot, - E_CONV * data->sys_en.e_kin, data->therm.T, - system->big_box.V, data->iso_bar.P ); + "%-6d%14.2f%14.2f%14.2f%11.2f%13.2f%13.5f\n", + data->step, data->sys_en.e_tot, data->sys_en.e_pot, + E_CONV * data->sys_en.e_kin, data->therm.T, + system->big_box.V, data->iso_bar.P ); fprintf( out_control->pot, - "%-6d%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f\n", - data->step, - data->sys_en.e_bond, - data->sys_en.e_ov + data->sys_en.e_un, data->sys_en.e_lp, - data->sys_en.e_ang + data->sys_en.e_pen, data->sys_en.e_coa, - data->sys_en.e_hb, - data->sys_en.e_tor, data->sys_en.e_con, - data->sys_en.e_vdW, data->sys_en.e_ele, data->sys_en.e_pol); + "%-6d%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f%14.2f\n", + data->step, + data->sys_en.e_bond, + data->sys_en.e_ov + data->sys_en.e_un, data->sys_en.e_lp, + data->sys_en.e_ang + data->sys_en.e_pen, data->sys_en.e_coa, + data->sys_en.e_hb, + data->sys_en.e_tor, data->sys_en.e_con, + data->sys_en.e_vdW, data->sys_en.e_ele, data->sys_en.e_pol); #else fprintf( out_control->out, - "%-6d%24.15e%24.15e%24.15e%13.5f%16.5f%13.5f\n", - data->step, data->sys_en.e_tot, data->sys_en.e_pot, - E_CONV * data->sys_en.e_kin, data->therm.T, - system->big_box.V, data->iso_bar.P ); + "%-6d%24.15e%24.15e%24.15e%13.5f%16.5f%13.5f\n", + data->step, data->sys_en.e_tot, data->sys_en.e_pot, + E_CONV * data->sys_en.e_kin, data->therm.T, + system->big_box.V, data->iso_bar.P ); fprintf( out_control->pot, - "%-6d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n", - data->step, - data->sys_en.e_bond, - data->sys_en.e_ov + data->sys_en.e_un, data->sys_en.e_lp, - data->sys_en.e_ang + data->sys_en.e_pen, data->sys_en.e_coa, - data->sys_en.e_hb, - data->sys_en.e_tor, data->sys_en.e_con, - data->sys_en.e_vdW, data->sys_en.e_ele, data->sys_en.e_pol); + "%-6d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n", + data->step, + data->sys_en.e_bond, + data->sys_en.e_ov + data->sys_en.e_un, data->sys_en.e_lp, + data->sys_en.e_ang + data->sys_en.e_pen, data->sys_en.e_coa, + data->sys_en.e_hb, + data->sys_en.e_tor, data->sys_en.e_con, + data->sys_en.e_vdW, data->sys_en.e_ele, data->sys_en.e_pol); #endif //DEBUG #if defined(LOG_PERFORMANCE) - t_elapsed = Get_Timing_Info( data->timing.total ); + t_elapsed = MPI_Wtime() - data->timing.total; if ( data->step - data->prev_steps > 0 ) + { denom = 1.0 / out_control->energy_update_freq; - else denom = 1; - - fprintf( out_control->log, "%6d%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%8.3f%6d\n", - data->step, - t_elapsed * denom, - data->timing.comm * denom, - data->timing.nbrs * denom, - data->timing.init_forces * denom, - data->timing.bonded * denom, - data->timing.nonb * denom, - data->timing.cm * denom, - (int)(data->timing.cm_solver_iters * denom) ); - - Reset_Timing( &(data->timing) ); + } + else + { + denom = 1.0; + } + + fprintf( out_control->log, "%6d %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.2f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f %10.4f\n", + data->step, + t_elapsed * denom, + data->timing.comm * denom, + data->timing.nbrs * denom, + data->timing.init_forces * denom, + data->timing.init_dist * denom, + data->timing.init_cm * denom, + data->timing.init_bond * denom, + data->timing.bonded * denom, + (data->timing.nonb + data->timing.cm) * denom, + data->timing.cm * denom, + data->timing.cm_sort * denom, + (double)(data->timing.cm_solver_iters * denom), + data->timing.cm_solver_pre_comp * denom, + data->timing.cm_solver_pre_app * denom, + data->timing.cm_solver_comm * denom, + data->timing.cm_solver_allreduce * denom, + data->timing.cm_solver_spmv * denom, + data->timing.cm_solver_vector_ops * denom, + data->timing.cm_solver_orthog * denom, + data->timing.cm_solver_tri_solve * denom ); + + //Reset_Timing( &(data->timing) ); + data->timing.total = MPI_Wtime( ); + data->timing.comm = ZERO; + data->timing.nbrs = ZERO; + data->timing.init_forces = ZERO; + data->timing.init_dist = ZERO; + data->timing.init_cm = ZERO; + data->timing.init_bond = ZERO; + data->timing.bonded = ZERO; + data->timing.nonb = ZERO; + data->timing.cm = ZERO; + data->timing.cm_sort = ZERO; + data->timing.cm_solver_pre_comp = ZERO; + data->timing.cm_solver_pre_app = ZERO; + data->timing.cm_solver_comm = ZERO; + data->timing.cm_solver_allreduce = ZERO; + data->timing.cm_solver_iters = 0; + data->timing.cm_solver_spmv = ZERO; + data->timing.cm_solver_vector_ops = ZERO; + data->timing.cm_solver_orthog = ZERO; + data->timing.cm_solver_tri_solve = ZERO; fflush( out_control->log ); #endif //LOG_PERFORMANCE if ( control->virial ) { fprintf( out_control->prs, - "%8d%13.6f%13.6f%13.6f%13.6f%13.6f%13.6f%13.6f\n", - data->step, - data->int_press[0], data->int_press[1], data->int_press[2], - data->ext_press[0], data->ext_press[1], data->ext_press[2], - data->kin_press ); + "%8d%13.6f%13.6f%13.6f%13.6f%13.6f%13.6f%13.6f\n", + data->step, + data->int_press[0], data->int_press[1], data->int_press[2], + data->ext_press[0], data->ext_press[1], data->ext_press[2], + data->kin_press ); fprintf( out_control->prs, - "%8s%13.6f%13.6f%13.6f%13.6f%13.6f%13.6f%13.6f\n", - "", system->big_box.box_norms[0], system->big_box.box_norms[1], - system->big_box.box_norms[2], - data->tot_press[0], data->tot_press[1], data->tot_press[2], - system->big_box.V ); + "%8s%13.6f%13.6f%13.6f%13.6f%13.6f%13.6f%13.6f\n", + "", system->big_box.box_norms[0], system->big_box.box_norms[1], + system->big_box.box_norms[2], + data->tot_press[0], data->tot_press[1], data->tot_press[2], + system->big_box.V ); fflush( out_control->prs); } @@ -1322,39 +1206,39 @@ void Output_Results( reax_system *system, control_params *control, void Debug_Marker_Bonded( output_controls *out_control, int step ) { fprintf( out_control->ebond, "step: %d\n%6s%6s%12s%12s%12s\n", - step, "atom1", "atom2", "bo", "ebond", "total" ); + step, "atom1", "atom2", "bo", "ebond", "total" ); fprintf( out_control->elp, "step: %d\n%6s%12s%12s%12s\n", - step, "atom", "nlp", "elp", "total" ); + step, "atom", "nlp", "elp", "total" ); fprintf( out_control->eov, "step: %d\n%6s%12s%12s\n", - step, "atom", "eov", "total" ); + step, "atom", "eov", "total" ); fprintf( out_control->eun, "step: %d\n%6s%12s%12s\n", - step, "atom", "eun", "total" ); + step, "atom", "eun", "total" ); fprintf( out_control->eval, "step: %d\n%6s%6s%6s%12s%12s%12s%12s%12s%12s\n", - step, "atom1", "atom2", "atom3", "angle", "theta0", - "bo(12)", "bo(23)", "eval", "total" ); + step, "atom1", "atom2", "atom3", "angle", "theta0", + "bo(12)", "bo(23)", "eval", "total" ); fprintf( out_control->epen, "step: %d\n%6s%6s%6s%12s%12s%12s%12s%12s\n", - step, "atom1", "atom2", "atom3", "angle", "bo(12)", "bo(23)", - "epen", "total" ); + step, "atom1", "atom2", "atom3", "angle", "bo(12)", "bo(23)", + "epen", "total" ); fprintf( out_control->ecoa, "step: %d\n%6s%6s%6s%12s%12s%12s%12s%12s\n", - step, "atom1", "atom2", "atom3", "angle", "bo(12)", "bo(23)", - "ecoa", "total" ); + step, "atom1", "atom2", "atom3", "angle", "bo(12)", "bo(23)", + "ecoa", "total" ); fprintf( out_control->ehb, "step: %d\n%6s%6s%6s%12s%12s%12s%12s%12s\n", - step, "atom1", "atom2", "atom3", "r(23)", "angle", "bo(12)", - "ehb", "total" ); + step, "atom1", "atom2", "atom3", "r(23)", "angle", "bo(12)", + "ehb", "total" ); fprintf( out_control->etor, "step: %d\n%6s%6s%6s%6s%12s%12s%12s%12s\n", - step, "atom1", "atom2", "atom3", "atom4", "phi", "bo(23)", - "etor", "total" ); + step, "atom1", "atom2", "atom3", "atom4", "phi", "bo(23)", + "etor", "total" ); fprintf( out_control->econ, "step:%d\n%6s%6s%6s%6s%12s%12s%12s%12s%12s%12s\n", - step, "atom1", "atom2", "atom3", "atom4", - "phi", "bo(12)", "bo(23)", "bo(34)", "econ", "total" ); + step, "atom1", "atom2", "atom3", "atom4", + "phi", "bo(12)", "bo(23)", "bo(34)", "econ", "total" ); } void Debug_Marker_Nonbonded( output_controls *out_control, int step ) { fprintf( out_control->evdw, "step: %d\n%6s%6s%12s%12s%12s\n", - step, "atom1", "atom2", "r12", "evdw", "total" ); + step, "atom1", "atom2", "r12", "evdw", "total" ); fprintf( out_control->ecou, "step: %d\n%6s%6s%12s%12s%12s%12s%12s\n", - step, "atom1", "atom2", "r12", "q1", "q2", "ecou", "total" ); + step, "atom1", "atom2", "r12", "q1", "q2", "ecou", "total" ); } #endif @@ -1362,16 +1246,16 @@ void Debug_Marker_Nonbonded( output_controls *out_control, int step ) #ifdef TEST_FORCES void Dummy_Printer( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { } void Print_Bond_Orders( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { int i, pj, pk; bond_order_data *bo_ij; @@ -1382,52 +1266,52 @@ void Print_Bond_Orders( reax_system *system, control_params *control, /* bond orders */ fprintf( out_control->fbo, "step: %d\n", data->step ); fprintf( out_control->fbo, "%6s%6s%12s%12s%12s%12s%12s\n", - "atom1", "atom2", "r_ij", "total_bo", "bo_s", "bo_p", "bo_pp" ); + "atom1", "atom2", "r_ij", "total_bo", "bo_s", "bo_p", "bo_pp" ); for ( i = 0; i < system->N; ++i ) for ( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) { bo_ij = &(bonds->bond_list[pj].bo_data); fprintf( out_control->fbo, - "%6d%6d%24.15e%24.15e%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - system->my_atoms[bonds->bond_list[pj].nbr].orig_id, - bonds->bond_list[pj].d, - bo_ij->BO, bo_ij->BO_s, bo_ij->BO_pi, bo_ij->BO_pi2 ); + "%6d%6d%24.15e%24.15e%24.15e%24.15e%24.15e\n", + system->my_atoms[i].orig_id, + system->my_atoms[bonds->bond_list[pj].nbr].orig_id, + bonds->bond_list[pj].d, + bo_ij->BO, bo_ij->BO_s, bo_ij->BO_pi, bo_ij->BO_pi2 ); } /* derivatives of bond orders */ fprintf( out_control->fdbo, "step: %d\n", data->step ); fprintf( out_control->fdbo, "%6s%6s%6s%24s%24s%24s\n", - "atom1", "atom2", "atom2", "dBO", "dBOpi", "dBOpi2" ); + "atom1", "atom2", "atom2", "dBO", "dBOpi", "dBOpi2" ); for ( i = 0; i < system->N; ++i ) for ( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) { /* fprintf( out_control->fdbo, "%6d %6d\tstart: %6d\tend: %6d\n", - system->my_atoms[i].orig_id, - system->my_atoms[bonds->bond_list[pj].nbr].orig_id, - Start_Index( pj, dBOs ), End_Index( pj, dBOs ) ); */ + system->my_atoms[i].orig_id, + system->my_atoms[bonds->bond_list[pj].nbr].orig_id, + Start_Index( pj, dBOs ), End_Index( pj, dBOs ) ); */ for ( pk = Start_Index(pj, dBOs); pk < End_Index(pj, dBOs); ++pk ) { dbo_k = &(dBOs->dbo_list[pk]); fprintf( out_control->fdbo, "%6d%6d%6d%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - system->my_atoms[bonds->bond_list[pj].nbr].orig_id, - system->my_atoms[dbo_k->wrt].orig_id, - dbo_k->dBO[0], dbo_k->dBO[1], dbo_k->dBO[2] ); + system->my_atoms[i].orig_id, + system->my_atoms[bonds->bond_list[pj].nbr].orig_id, + system->my_atoms[dbo_k->wrt].orig_id, + dbo_k->dBO[0], dbo_k->dBO[1], dbo_k->dBO[2] ); fprintf( out_control->fdbo, "%6d%6d%6d%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - system->my_atoms[bonds->bond_list[pj].nbr].orig_id, - system->my_atoms[dbo_k->wrt].orig_id, - dbo_k->dBOpi[0], dbo_k->dBOpi[1], dbo_k->dBOpi[2] ); + system->my_atoms[i].orig_id, + system->my_atoms[bonds->bond_list[pj].nbr].orig_id, + system->my_atoms[dbo_k->wrt].orig_id, + dbo_k->dBOpi[0], dbo_k->dBOpi[1], dbo_k->dBOpi[2] ); fprintf( out_control->fdbo, "%6d%6d%6d%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - system->my_atoms[bonds->bond_list[pj].nbr].orig_id, - system->my_atoms[dbo_k->wrt].orig_id, - dbo_k->dBOpi2[0], dbo_k->dBOpi2[1], dbo_k->dBOpi2[2] ); + system->my_atoms[i].orig_id, + system->my_atoms[bonds->bond_list[pj].nbr].orig_id, + system->my_atoms[dbo_k->wrt].orig_id, + dbo_k->dBOpi2[0], dbo_k->dBOpi2[1], dbo_k->dBOpi2[2] ); } } } @@ -1442,15 +1326,15 @@ void Print_Forces( FILE *f, storage *workspace, int N, int step ) //fprintf( f, "%6d %23.15e %23.15e %23.15e\n", //fprintf( f, "%6d%12.6f%12.6f%12.6f\n", fprintf( f, "%6d %19.9e %19.9e %19.9e\n", - workspace->id_all[i], workspace->f_all[i][0], - workspace->f_all[i][1], workspace->f_all[i][2] ); + workspace->id_all[i], workspace->f_all[i][0], + workspace->f_all[i][1], workspace->f_all[i][2] ); } void Print_Force_Files( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control, - mpi_datatypes *mpi_data ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + mpi_datatypes *mpi_data ) { int i, d; @@ -1514,11 +1398,11 @@ void Print_Force_Files( reax_system *system, control_params *control, { for ( d = 0; d < 3; ++d ) workspace->f_tot[i][d] = workspace->f_be[i][d] + - workspace->f_lp[i][d] + workspace->f_ov[i][d] + workspace->f_un[i][d] + - workspace->f_ang[i][d] + workspace->f_pen[i][d] + workspace->f_coa[i][d] + - workspace->f_tor[i][d] + workspace->f_con[i][d] + - workspace->f_vdw[i][d] + workspace->f_ele[i][d] + - workspace->f_hb[i][d]; + workspace->f_lp[i][d] + workspace->f_ov[i][d] + workspace->f_un[i][d] + + workspace->f_ang[i][d] + workspace->f_pen[i][d] + workspace->f_coa[i][d] + + workspace->f_tor[i][d] + workspace->f_con[i][d] + + workspace->f_vdw[i][d] + workspace->f_ele[i][d] + + workspace->f_hb[i][d]; } Coll_rvecs_at_Master( system, workspace, mpi_data, workspace->f_tot ); @@ -1531,8 +1415,8 @@ void Print_Force_Files( reax_system *system, control_params *control, #if defined(TEST_FORCES) || defined(TEST_ENERGY) void Print_Far_Neighbors_List( reax_system *system, reax_list **lists, - control_params *control, simulation_data *data, - output_controls *out_control ) + control_params *control, simulation_data *data, + output_controls *out_control ) { int i, j, id_i, id_j, nbr, natoms; int num = 0; @@ -1566,8 +1450,8 @@ void Print_Far_Neighbors_List( reax_system *system, reax_list **lists, } void Print_Bond_List( reax_system *system, control_params *control, - simulation_data *data, reax_list **lists, - output_controls *out_control) + simulation_data *data, reax_list **lists, + output_controls *out_control) { int i, j, id_i, id_j, nbr, pj; reax_list *bonds = lists[BONDS]; @@ -1609,283 +1493,283 @@ void Print_Init_Atoms( reax_system *system, storage *workspace ) int i; fprintf( stderr, "p%d had %d atoms\n", - system->my_rank, workspace->init_cnt ); + system->my_rank, workspace->init_cnt ); for ( i = 0; i < workspace->init_cnt; ++i ) fprintf( stderr, "p%d, atom%d: %d %s %8.3f %8.3f %8.3f\n", - system->my_rank, i, - workspace->init_atoms[i].type, workspace->init_atoms[i].name, - workspace->init_atoms[i].x[0], - workspace->init_atoms[i].x[1], - workspace->init_atoms[i].x[2] ); + system->my_rank, i, + workspace->init_atoms[i].type, workspace->init_atoms[i].name, + workspace->init_atoms[i].x[0], + workspace->init_atoms[i].x[1], + workspace->init_atoms[i].x[2] ); } #endif //OLD_VERSION /*void Print_Bond_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) -{ + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) + { int i; fprintf( out_control->fbond, "step: %d\n", data->step ); fprintf( out_control->fbond, "%6s%24s%24s%24s\n", - "atom", "f_be[0]", "f_be[1]", "f_be[2]" ); + "atom", "f_be[0]", "f_be[1]", "f_be[2]" ); for( i = 0; i < system->bigN; ++i ) - fprintf(out_control->fbond, "%6d%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - workspace->f_all[i][0], workspace->f_all[i][1], - workspace->f_all[i][2]); -} + fprintf(out_control->fbond, "%6d%24.15e%24.15e%24.15e\n", + system->my_atoms[i].orig_id, + workspace->f_all[i][0], workspace->f_all[i][1], + workspace->f_all[i][2]); + } -void Print_LonePair_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) -{ + void Print_LonePair_Forces( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) + { int i; fprintf( out_control->flp, "step: %d\n", data->step ); fprintf( out_control->flp, "%6s%24s\n", "atom", "f_lonepair" ); for( i = 0; i < system->bigN; ++i ) - fprintf(out_control->flp, "%6d%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - workspace->f_all[i][0], workspace->f_all[i][1], - workspace->f_all[i][2]); -} + fprintf(out_control->flp, "%6d%24.15e%24.15e%24.15e\n", + system->my_atoms[i].orig_id, + workspace->f_all[i][0], workspace->f_all[i][1], + workspace->f_all[i][2]); + } -void Print_OverCoor_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) -{ + void Print_OverCoor_Forces( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) + { int i; fprintf( out_control->fov, "step: %d\n", data->step ); fprintf( out_control->fov, "%6s%-38s%-38s%-38s\n", - "atom","f_over[0]", "f_over[1]", "f_over[2]" ); + "atom","f_over[0]", "f_over[1]", "f_over[2]" ); for( i = 0; i < system->bigN; ++i ) - fprintf( out_control->fov, - "%6d %24.15e%24.15e%24.15e 0 0 0\n", - system->my_atoms[i].orig_id, - workspace->f_all[i][0], workspace->f_all[i][1], - workspace->f_all[i][2] ); -} + fprintf( out_control->fov, + "%6d %24.15e%24.15e%24.15e 0 0 0\n", + system->my_atoms[i].orig_id, + workspace->f_all[i][0], workspace->f_all[i][1], + workspace->f_all[i][2] ); + } -void Print_UnderCoor_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) -{ + void Print_UnderCoor_Forces( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) + { int i; fprintf( out_control->fun, "step: %d\n", data->step ); fprintf( out_control->fun, "%6s%-38s%-38s%-38s\n", - "atom","f_under[0]", "f_under[1]", "f_under[2]" ); + "atom","f_under[0]", "f_under[1]", "f_under[2]" ); for( i = 0; i < system->bigN; ++i ) - fprintf( out_control->fun, - "%6d %24.15e%24.15e%24.15e 0 0 0\n", - system->my_atoms[i].orig_id, - workspace->f_all[i][0], workspace->f_all[i][1], - workspace->f_all[i][2] ); -} + fprintf( out_control->fun, + "%6d %24.15e%24.15e%24.15e 0 0 0\n", + system->my_atoms[i].orig_id, + workspace->f_all[i][0], workspace->f_all[i][1], + workspace->f_all[i][2] ); + } void Print_ValAngle_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { - int j; - - fprintf( out_control->f3body, "step: %d\n", data->step ); - fprintf( out_control->f3body, "%6s%-37s%-37s%-37s%-38s\n", - "atom", "3-body total", "f_ang", "f_pen", "f_coa" ); - - for( j = 0; j < system->N; ++j ){ - if( rvec_isZero(workspace->f_pen[j]) && rvec_isZero(workspace->f_coa[j]) ) - fprintf( out_control->f3body, - "%6d %24.15e%24.15e%24.15e 0 0 0 0 0 0\n", - system->my_atoms[j].orig_id, - workspace->f_ang[j][0], workspace->f_ang[j][1], - workspace->f_ang[j][2] ); - else if( rvec_isZero(workspace->f_coa[j]) ) - fprintf( out_control->f3body, - "%6d %24.15e%24.15e%24.15e %24.15e%24.15e%24.15e " \ - "%24.15e%24.15e%24.15e\n", - system->my_atoms[j].orig_id, - workspace->f_ang[j][0] + workspace->f_pen[j][0], - workspace->f_ang[j][1] + workspace->f_pen[j][1], - workspace->f_ang[j][2] + workspace->f_pen[j][2], - workspace->f_ang[j][0], workspace->f_ang[j][1], - workspace->f_ang[j][2], - workspace->f_pen[j][0], workspace->f_pen[j][1], - workspace->f_pen[j][2] ); - else{ - fprintf( out_control->f3body, "%6d %24.15e%24.15e%24.15e ", - system->my_atoms[j].orig_id, - workspace->f_ang[j][0] + workspace->f_pen[j][0] + - workspace->f_coa[j][0], - workspace->f_ang[j][1] + workspace->f_pen[j][1] + - workspace->f_coa[j][1], - workspace->f_ang[j][2] + workspace->f_pen[j][2] + - workspace->f_coa[j][2] ); - - fprintf( out_control->f3body, - "%24.15e%24.15e%24.15e %24.15e%24.15e%24.15e "\ - "%24.15e%24.15e%24.15e\n", - workspace->f_ang[j][0], workspace->f_ang[j][1], - workspace->f_ang[j][2], - workspace->f_pen[j][0], workspace->f_pen[j][1], - workspace->f_pen[j][2], - workspace->f_coa[j][0], workspace->f_coa[j][1], - workspace->f_coa[j][2] ); + int j; + + fprintf( out_control->f3body, "step: %d\n", data->step ); + fprintf( out_control->f3body, "%6s%-37s%-37s%-37s%-38s\n", + "atom", "3-body total", "f_ang", "f_pen", "f_coa" ); + + for( j = 0; j < system->N; ++j ){ + if( rvec_isZero(workspace->f_pen[j]) && rvec_isZero(workspace->f_coa[j]) ) + fprintf( out_control->f3body, + "%6d %24.15e%24.15e%24.15e 0 0 0 0 0 0\n", + system->my_atoms[j].orig_id, + workspace->f_ang[j][0], workspace->f_ang[j][1], + workspace->f_ang[j][2] ); + else if( rvec_isZero(workspace->f_coa[j]) ) + fprintf( out_control->f3body, + "%6d %24.15e%24.15e%24.15e %24.15e%24.15e%24.15e " \ + "%24.15e%24.15e%24.15e\n", + system->my_atoms[j].orig_id, + workspace->f_ang[j][0] + workspace->f_pen[j][0], + workspace->f_ang[j][1] + workspace->f_pen[j][1], + workspace->f_ang[j][2] + workspace->f_pen[j][2], + workspace->f_ang[j][0], workspace->f_ang[j][1], + workspace->f_ang[j][2], + workspace->f_pen[j][0], workspace->f_pen[j][1], + workspace->f_pen[j][2] ); + else{ + fprintf( out_control->f3body, "%6d %24.15e%24.15e%24.15e ", + system->my_atoms[j].orig_id, + workspace->f_ang[j][0] + workspace->f_pen[j][0] + + workspace->f_coa[j][0], + workspace->f_ang[j][1] + workspace->f_pen[j][1] + + workspace->f_coa[j][1], + workspace->f_ang[j][2] + workspace->f_pen[j][2] + + workspace->f_coa[j][2] ); + + fprintf( out_control->f3body, + "%24.15e%24.15e%24.15e %24.15e%24.15e%24.15e "\ + "%24.15e%24.15e%24.15e\n", + workspace->f_ang[j][0], workspace->f_ang[j][1], + workspace->f_ang[j][2], + workspace->f_pen[j][0], workspace->f_pen[j][1], + workspace->f_pen[j][2], + workspace->f_coa[j][0], workspace->f_coa[j][1], + workspace->f_coa[j][2] ); + } } - } } void Print_Hydrogen_Bond_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control) { - int j; + int j; - fprintf( out_control->fhb, "step: %d\n", data->step ); - fprintf( out_control->fhb, "%6s\t%-38s\n", "atom", "f_hb[0,1,2]" ); + fprintf( out_control->fhb, "step: %d\n", data->step ); + fprintf( out_control->fhb, "%6s\t%-38s\n", "atom", "f_hb[0,1,2]" ); - for( j = 0; j < system->N; ++j ) - fprintf(out_control->fhb, "%6d%24.15e%24.15e%24.15e\n", - system->my_atoms[j].orig_id, - workspace->f_hb[j][0], - workspace->f_hb[j][1], - workspace->f_hb[j][2] ); + for( j = 0; j < system->N; ++j ) + fprintf(out_control->fhb, "%6d%24.15e%24.15e%24.15e\n", + system->my_atoms[j].orig_id, + workspace->f_hb[j][0], + workspace->f_hb[j][1], + workspace->f_hb[j][2] ); } void Print_Four_Body_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { - int j; - - fprintf( out_control->f4body, "step: %d\n", data->step ); - fprintf( out_control->f4body, "%6s\t%-38s%-38s%-38s\n", - "atom", "4-body total", "f_tor", "f_con" ); - - for( j = 0; j < system->N; ++j ){ - if( !rvec_isZero( workspace->f_con[j] ) ) - fprintf( out_control->f4body, - "%6d %24.15e%24.15e%24.15e %24.15e%24.15e%24.15e "\ - "%24.15e%24.15e%24.15e\n", - system->my_atoms[j].orig_id, - workspace->f_tor[j][0] + workspace->f_con[j][0], - workspace->f_tor[j][1] + workspace->f_con[j][1], - workspace->f_tor[j][2] + workspace->f_con[j][2], - workspace->f_tor[j][0], workspace->f_tor[j][1], - workspace->f_tor[j][2], - workspace->f_con[j][0], workspace->f_con[j][1], - workspace->f_con[j][2] ); - else - fprintf( out_control->f4body, - "%6d %24.15e%24.15e%24.15e 0 0 0\n", - system->my_atoms[j].orig_id, workspace->f_tor[j][0], - workspace->f_tor[j][1], workspace->f_tor[j][2] ); - } + int j; + + fprintf( out_control->f4body, "step: %d\n", data->step ); + fprintf( out_control->f4body, "%6s\t%-38s%-38s%-38s\n", + "atom", "4-body total", "f_tor", "f_con" ); + + for( j = 0; j < system->N; ++j ){ + if( !rvec_isZero( workspace->f_con[j] ) ) + fprintf( out_control->f4body, + "%6d %24.15e%24.15e%24.15e %24.15e%24.15e%24.15e "\ + "%24.15e%24.15e%24.15e\n", + system->my_atoms[j].orig_id, + workspace->f_tor[j][0] + workspace->f_con[j][0], + workspace->f_tor[j][1] + workspace->f_con[j][1], + workspace->f_tor[j][2] + workspace->f_con[j][2], + workspace->f_tor[j][0], workspace->f_tor[j][1], + workspace->f_tor[j][2], + workspace->f_con[j][0], workspace->f_con[j][1], + workspace->f_con[j][2] ); + else + fprintf( out_control->f4body, + "%6d %24.15e%24.15e%24.15e 0 0 0\n", + system->my_atoms[j].orig_id, workspace->f_tor[j][0], + workspace->f_tor[j][1], workspace->f_tor[j][2] ); + } } void Print_vdW_Coulomb_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { - int i; - - return; - - fprintf( out_control->fnonb, "step: %d\n", data->step ); - fprintf( out_control->fnonb, "%6s\t%-38s%-38s%-38s\n", - "atom", "nonbonded_total[0,1,2]", "f_vdw[0,1,2]", "f_ele[0,1,2]" ); - - for( i = 0; i < system->N; ++i ) - fprintf( out_control->fnonb, - "%6d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - workspace->f_vdw[i][0] + workspace->f_ele[i][0], - workspace->f_vdw[i][1] + workspace->f_ele[i][1], - workspace->f_vdw[i][2] + workspace->f_ele[i][2], - workspace->f_vdw[i][0], - workspace->f_vdw[i][1], - workspace->f_vdw[i][2], - workspace->f_ele[i][0], - workspace->f_ele[i][1], - workspace->f_ele[i][2] ); + int i; + + return; + + fprintf( out_control->fnonb, "step: %d\n", data->step ); + fprintf( out_control->fnonb, "%6s\t%-38s%-38s%-38s\n", + "atom", "nonbonded_total[0,1,2]", "f_vdw[0,1,2]", "f_ele[0,1,2]" ); + + for( i = 0; i < system->N; ++i ) + fprintf( out_control->fnonb, + "%6d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n", + system->my_atoms[i].orig_id, + workspace->f_vdw[i][0] + workspace->f_ele[i][0], + workspace->f_vdw[i][1] + workspace->f_ele[i][1], + workspace->f_vdw[i][2] + workspace->f_ele[i][2], + workspace->f_vdw[i][0], + workspace->f_vdw[i][1], + workspace->f_vdw[i][2], + workspace->f_ele[i][0], + workspace->f_ele[i][1], + workspace->f_ele[i][2] ); } void Print_Total_Force( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { - int i; + int i; - return; + return; - fprintf( out_control->ftot, "step: %d\n", data->step ); - fprintf( out_control->ftot, "%6s\t%-38s\n", "atom", "atom.f[0,1,2]"); + fprintf( out_control->ftot, "step: %d\n", data->step ); + fprintf( out_control->ftot, "%6s\t%-38s\n", "atom", "atom.f[0,1,2]"); - for( i = 0; i < system->n; ++i ) - fprintf( out_control->ftot, "%6d%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - system->my_atoms[i].f[0], - system->my_atoms[i].f[1], - system->my_atoms[i].f[2] ); + for( i = 0; i < system->n; ++i ) + fprintf( out_control->ftot, "%6d%24.15e%24.15e%24.15e\n", + system->my_atoms[i].orig_id, + system->my_atoms[i].f[0], + system->my_atoms[i].f[1], + system->my_atoms[i].f[2] ); } void Compare_Total_Forces( reax_system *system, control_params *control, - simulation_data *data, storage *workspace, - reax_list **lists, output_controls *out_control ) + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { - int i; + int i; - return; - - fprintf( out_control->ftot2, "step: %d\n", data->step ); - fprintf( out_control->ftot2, "%6s\t%-38s%-38s\n", - "atom", "f_total[0,1,2]", "test_force_total[0,1,2]" ); - - for( i = 0; i < system->N; ++i ) - fprintf( out_control->ftot2, "%6d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n", - system->my_atoms[i].orig_id, - system->my_atoms[i].f[0], - system->my_atoms[i].f[1], - system->my_atoms[i].f[2], - workspace->f_be[i][0] + workspace->f_lp[i][0] + - workspace->f_ov[i][0] + workspace->f_un[i][0] + - workspace->f_ang[i][0]+ workspace->f_pen[i][0]+ - workspace->f_coa[i][0]+ + workspace->f_hb[i][0] + - workspace->f_tor[i][0] + workspace->f_con[i][0] + - workspace->f_vdw[i][0] + workspace->f_ele[i][0], - workspace->f_be[i][1] + workspace->f_lp[i][1] + - workspace->f_ov[i][1] + workspace->f_un[i][1] + - workspace->f_ang[i][1]+ workspace->f_pen[i][1]+ - workspace->f_coa[i][1]+ + workspace->f_hb[i][1] + - workspace->f_tor[i][1] + workspace->f_con[i][1] + - workspace->f_vdw[i][1] + workspace->f_ele[i][1], - workspace->f_be[i][2] + workspace->f_lp[i][2] + - workspace->f_ov[i][2] + workspace->f_un[i][2] + - workspace->f_ang[i][2]+ workspace->f_pen[i][2] + - workspace->f_coa[i][2]+ + workspace->f_hb[i][2] + - workspace->f_tor[i][2] + workspace->f_con[i][2] + - workspace->f_vdw[i][2] + workspace->f_ele[i][2] ); + return; + + fprintf( out_control->ftot2, "step: %d\n", data->step ); + fprintf( out_control->ftot2, "%6s\t%-38s%-38s\n", + "atom", "f_total[0,1,2]", "test_force_total[0,1,2]" ); + + for( i = 0; i < system->N; ++i ) + fprintf( out_control->ftot2, "%6d%24.15e%24.15e%24.15e%24.15e%24.15e%24.15e\n", + system->my_atoms[i].orig_id, + system->my_atoms[i].f[0], + system->my_atoms[i].f[1], + system->my_atoms[i].f[2], + workspace->f_be[i][0] + workspace->f_lp[i][0] + + workspace->f_ov[i][0] + workspace->f_un[i][0] + + workspace->f_ang[i][0]+ workspace->f_pen[i][0]+ + workspace->f_coa[i][0]+ + workspace->f_hb[i][0] + + workspace->f_tor[i][0] + workspace->f_con[i][0] + + workspace->f_vdw[i][0] + workspace->f_ele[i][0], + workspace->f_be[i][1] + workspace->f_lp[i][1] + + workspace->f_ov[i][1] + workspace->f_un[i][1] + + workspace->f_ang[i][1]+ workspace->f_pen[i][1]+ + workspace->f_coa[i][1]+ + workspace->f_hb[i][1] + + workspace->f_tor[i][1] + workspace->f_con[i][1] + + workspace->f_vdw[i][1] + workspace->f_ele[i][1], + workspace->f_be[i][2] + workspace->f_lp[i][2] + + workspace->f_ov[i][2] + workspace->f_un[i][2] + + workspace->f_ang[i][2]+ workspace->f_pen[i][2] + + workspace->f_coa[i][2]+ + workspace->f_hb[i][2] + + workspace->f_tor[i][2] + workspace->f_con[i][2] + + workspace->f_vdw[i][2] + workspace->f_ele[i][2] ); }*/ /*void Init_Force_Test_Functions( ) -{ + { Print_Interactions[0] = Print_Bond_Orders; Print_Interactions[1] = Print_Bond_Forces; Print_Interactions[2] = Print_LonePair_Forces; diff --git a/PuReMD/src/linear_solvers.c b/PuReMD/src/linear_solvers.c index 541a132be7bc18354069022785af438030c2e286..939b702bf4ff6f716de299fe5f0543cd9a911a72 100644 --- a/PuReMD/src/linear_solvers.c +++ b/PuReMD/src/linear_solvers.c @@ -24,511 +24,2555 @@ #include "io_tools.h" #include "tool_box.h" #include "vector.h" +#include "allocate.h" + +/* Intel MKL */ +#if defined(HAVE_LAPACKE_MKL) +#include "mkl.h" +/* reference LAPACK */ +#elif defined(HAVE_LAPACKE) +#include "lapacke.h" +#endif + +/*#if defined(CG_PERFORMANCE) +real t_start, t_elapsed, matvec_time, dot_time; +#endif*/ + + +static int compare_dbls( const void* arg1, const void* arg2 ) +{ + int ret; + double a1, a2; + + a1 = *(double *) arg1; + a2 = *(double *) arg2; + + if ( a1 < a2 ) + { + ret = -1; + } + else if (a1 == a2) + { + ret = 0; + } + else + { + ret = 1; + } + + return ret; +} + + +static void qsort_dbls( double *array, int array_len ) +{ + qsort( array, (size_t) array_len, sizeof(double), + compare_dbls ); +} + + +static int find_bucket( double *list, int len, double a ) +{ + int s, e, m; + + if ( len == 0 ) + { + return 0; + } + + if ( a > list[len - 1] ) + { + return len; + } + + s = 0; + e = len - 1; + + while ( s < e ) + { + m = (s + e) / 2; + + if ( list[m] < a ) + { + s = m + 1; + } + else + { + e = m; + } + } + + return s; +} + + +static void dual_Sparse_MatVec( sparse_matrix *A, rvec2 *x, rvec2 *b, int N ) +{ + int i, j, k, si, num_rows; + real val; + + for ( i = 0; i < N; ++i ) + { + b[i][0] = 0.0; + b[i][1] = 0.0; + } + +#if defined(NEUTRAL_TERRITORY) + num_rows = A->NT; + + if ( A->format == SYM_HALF_MATRIX ) + { + for ( i = 0; i < num_rows; ++i ) + { + si = A->start[i]; + + /* diagonal only contributes once */ + if( i < A->n ) + { + b[i][0] += A->entries[si].val * x[i][0]; + b[i][1] += A->entries[si].val * x[i][1]; + k = si + 1; + } + /* zeros on the diagonal for i >= A->n, + * so skip the diagonal multplication step as zeros + * are not stored (idea: keep the NNZ's the same + * for full shell and neutral territory half-stored + * charge matrices to make debugging easier) */ + else + { + k = si; + } + + for ( ; k < A->end[i]; ++k ) + { + j = A->entries[k].j; + val = A->entries[k].val; + + b[i][0] += val * x[j][0]; + b[i][1] += val * x[j][1]; + + b[j][0] += val * x[i][0]; + b[j][1] += val * x[i][1]; + } + } + } + else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX ) + { + for ( i = 0; i < num_rows; ++i ) + { + si = A->start[i]; + + for ( k = si; k < A->end[i]; ++k ) + { + j = A->entries[k].j; + val = A->entries[k].val; + + b[i][0] += val * x[j][0]; + b[i][1] += val * x[j][1]; + } + } + } +#else + num_rows = A->n; + + if ( A->format == SYM_HALF_MATRIX ) + { + for ( i = 0; i < num_rows; ++i ) + { + si = A->start[i]; + + /* diagonal only contributes once */ + b[i][0] += A->entries[si].val * x[i][0]; + b[i][1] += A->entries[si].val * x[i][1]; + + for ( k = si + 1; k < A->end[i]; ++k ) + { + j = A->entries[k].j; + val = A->entries[k].val; + + b[i][0] += val * x[j][0]; + b[i][1] += val * x[j][1]; + + b[j][0] += val * x[i][0]; + b[j][1] += val * x[i][1]; + } + } + } + else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX ) + { + for ( i = 0; i < num_rows; ++i ) + { + si = A->start[i]; + + for ( k = si; k < A->end[i]; ++k ) + { + j = A->entries[k].j; + val = A->entries[k].val; + + b[i][0] += val * x[j][0]; + b[i][1] += val * x[j][1]; + } + } + } +#endif +} + + +static void Sparse_MatVec( sparse_matrix *A, real *x, real *b, int N ) +{ + int i, j, k, si, num_rows; + real val; + + for ( i = 0; i < N; ++i ) + { + b[i] = 0.0; + } + +#if defined(NEUTRAL_TERRITORY) + num_rows = A->NT; + + if ( A->format == SYM_HALF_MATRIX ) + { + for ( i = 0; i < num_rows; ++i ) + { + si = A->start[i]; + + /* diagonal only contributes once */ + if( i < A->n ) + { + b[i] += A->entries[si].val * x[i]; + k = si + 1; + } + /* zeros on the diagonal for i >= A->n, + * so skip the diagonal multplication step as zeros + * are not stored (idea: keep the NNZ's the same + * for full shell and neutral territory half-stored + * charge matrices to make debugging easier) */ + else + { + k = si; + } + + for ( ; k < A->end[i]; ++k ) + { + j = A->entries[k].j; + val = A->entries[k].val; + + b[i] += val * x[j]; + b[j] += val * x[i]; + } + } + } + else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX ) + { + for ( i = 0; i < num_rows; ++i ) + { + si = A->start[i]; + + for ( k = si; k < A->end[i]; ++k ) + { + j = A->entries[k].j; + val = A->entries[k].val; + + b[i] += val * x[j]; + } + } + } +#else + num_rows = A->n; + + if ( A->format == SYM_HALF_MATRIX ) + { + for ( i = 0; i < num_rows; ++i ) + { + si = A->start[i]; + + /* diagonal only contributes once */ + b[i] += A->entries[si].val * x[i]; + + for ( k = si + 1; k < A->end[i]; ++k ) + { + j = A->entries[k].j; + val = A->entries[k].val; + + b[i] += val * x[j]; + b[j] += val * x[i]; + } + } + } + else if ( A->format == SYM_FULL_MATRIX || A->format == FULL_MATRIX ) + { + for ( i = 0; i < num_rows; ++i ) + { + si = A->start[i]; + + for ( k = si; k < A->end[i]; ++k ) + { + j = A->entries[k].j; + val = A->entries[k].val; + + b[i] += val * x[j]; + } + } + } +#endif +} + + +real setup_sparse_approx_inverse( reax_system *system, simulation_data *data, storage *workspace, + mpi_datatypes *mpi_data, sparse_matrix *A, sparse_matrix **A_spar_patt, + int nprocs, real filter ) +{ + int i, bin, total, pos; + int n, n_gather, s_local, s, n_local; + int target_proc; + int k; + int pj, size; + int left, right, p, turn; + int num_rows; + + real threshold, pivot, tmp; + real *input_array; + real *samplelist_local, *samplelist; + real *pivotlist; + real *bucketlist_local, *bucketlist; + + int *srecv, *sdispls; + int *scounts_local, *scounts; + int *dspls_local, *dspls; + int *bin_elements; + + MPI_Comm comm; + + real start, t_start, t_comm; + real total_comm; + + start = MPI_Wtime(); + t_comm = 0.0; + + srecv = NULL; + sdispls = NULL; + samplelist_local = NULL; + samplelist = NULL; + pivotlist = NULL; + input_array = NULL; + bucketlist_local = NULL; + bucketlist = NULL; + scounts_local = NULL; + scounts = NULL; + dspls_local = NULL; + dspls = NULL; + bin_elements = NULL; + + comm = mpi_data->world; +#if defined(NEUTRAL_TERRITORY) + num_rows = A->NT; + fprintf( stdout,"%d %d %d\n", A->n, A->NT, A->m ); + fflush( stdout ); +#else + num_rows = A->n; +#endif + + if ( *A_spar_patt == NULL ) + { +#if defined(NEUTRAL_TERRITORY) + Allocate_Matrix2( A_spar_patt, A->n, A->NT, A->m, + A->format, comm ); +#else + Allocate_Matrix2( A_spar_patt, A->n, system->local_cap, A->m, + A->format, comm ); +#endif + } + + else /*if ( (*A_spar_patt)->m < A->m )*/ + { + Deallocate_Matrix( *A_spar_patt ); +#if defined(NEUTRAL_TERRITORY) + Allocate_Matrix2( A_spar_patt, A->n, A->NT, A->m, + A->format, comm ); +#else + Allocate_Matrix2( A_spar_patt, A->n, system->local_cap, A->m, + A->format, comm ); +#endif + } + + n_local = 0; + for( i = 0; i < num_rows; ++i ) + { + n_local += (A->end[i] - A->start[i] + 9)/10; + } + s_local = (int) (12.0 * (log2(n_local) + log2(nprocs))); + + t_start = MPI_Wtime(); + MPI_Allreduce( &n_local, &n, 1, MPI_INT, MPI_SUM, comm ); + MPI_Reduce( &s_local, &s, 1, MPI_INT, MPI_SUM, MASTER_NODE, comm ); + t_comm += MPI_Wtime() - t_start; + + /* count num. bin elements for each processor, uniform bin sizes */ + input_array = smalloc( sizeof(real) * n_local, + "setup_sparse_approx_inverse::input_array", MPI_COMM_WORLD ); + scounts_local = smalloc( sizeof(int) * nprocs, + "setup_sparse_approx_inverse::scounts_local", MPI_COMM_WORLD ); + scounts = smalloc( sizeof(int) * nprocs, + "setup_sparse_approx_inverse::scounts", MPI_COMM_WORLD ); + bin_elements = smalloc( sizeof(int) * nprocs, + "setup_sparse_approx_inverse::bin_elements", MPI_COMM_WORLD ); + dspls_local = smalloc( sizeof(int) * nprocs, + "setup_sparse_approx_inverse::displs_local", MPI_COMM_WORLD ); + bucketlist_local = smalloc( sizeof(real) * n_local, + "setup_sparse_approx_inverse::bucketlist_local", MPI_COMM_WORLD ); + dspls = smalloc( sizeof(int) * nprocs, + "setup_sparse_approx_inverse::dspls", MPI_COMM_WORLD ); + if ( nprocs > 1 ) + { + pivotlist = smalloc( sizeof(real) * (nprocs - 1), + "setup_sparse_approx_inverse::pivotlist", MPI_COMM_WORLD ); + } + samplelist_local = smalloc( sizeof(real) * s_local, + "setup_sparse_approx_inverse::samplelist_local", MPI_COMM_WORLD ); + if ( system->my_rank == MASTER_NODE ) + { + samplelist = smalloc( sizeof(real) * s, + "setup_sparse_approx_inverse::samplelist", MPI_COMM_WORLD ); + srecv = smalloc( sizeof(int) * nprocs, + "setup_sparse_approx_inverse::srecv", MPI_COMM_WORLD ); + sdispls = smalloc( sizeof(int) * nprocs, + "setup_sparse_approx_inverse::sdispls", MPI_COMM_WORLD ); + } + + n_local = 0; + for ( i = 0; i < num_rows; ++i ) + { + for ( pj = A->start[i]; pj < A->end[i]; pj += 10 ) + { + input_array[n_local++] = A->entries[pj].val; + } + } + + for ( i = 0; i < s_local; i++) + { + /* samplelist_local[i] = input_array[rand( ) % n_local]; */ + samplelist_local[i] = input_array[ i ]; + } + + /* gather samples at the root process */ + t_start = MPI_Wtime(); + MPI_Gather( &s_local, 1, MPI_INT, srecv, 1, MPI_INT, MASTER_NODE, comm ); + t_comm += MPI_Wtime() - t_start; + + if( system->my_rank == MASTER_NODE ) + { + sdispls[0] = 0; + for ( i = 0; i < nprocs - 1; ++i ) + { + sdispls[i + 1] = sdispls[i] + srecv[i]; + } + } + + t_start = MPI_Wtime(); + MPI_Gatherv( samplelist_local, s_local, MPI_DOUBLE, + samplelist, srecv, sdispls, MPI_DOUBLE, MASTER_NODE, comm); + t_comm += MPI_Wtime() - t_start; + + /* sort samples at the root process and select pivots */ + if ( system->my_rank == MASTER_NODE ) + { + qsort_dbls( samplelist, s ); + + for ( i = 1; i < nprocs; ++i ) + { + pivotlist[i - 1] = samplelist[(i * s) / nprocs]; + } + } + + /* broadcast pivots */ + t_start = MPI_Wtime(); + MPI_Bcast( pivotlist, nprocs - 1, MPI_DOUBLE, MASTER_NODE, comm ); + t_comm += MPI_Wtime() - t_start; + + for ( i = 0; i < nprocs; ++i ) + { + scounts_local[i] = 0; + } + + for ( i = 0; i < n_local; ++i ) + { + pos = find_bucket( pivotlist, nprocs - 1, input_array[i] ); + scounts_local[pos]++; + } + + for ( i = 0; i < nprocs; ++i ) + { + bin_elements[i] = scounts_local[i]; + scounts[i] = scounts_local[i]; + } + + /* compute displacements for MPI comm */ + dspls_local[0] = 0; + for ( i = 0; i < nprocs - 1; ++i ) + { + dspls_local[i + 1] = dspls_local[i] + scounts_local[i]; + } + + /* bin elements */ + for ( i = 0; i < n_local; ++i ) + { + bin = find_bucket( pivotlist, nprocs - 1, input_array[i] ); + pos = dspls_local[bin] + scounts_local[bin] - bin_elements[bin]; + bucketlist_local[pos] = input_array[i]; + bin_elements[bin]--; + } + + /* determine counts for elements per process */ + t_start = MPI_Wtime(); + MPI_Allreduce( MPI_IN_PLACE, scounts, nprocs, MPI_INT, MPI_SUM, comm ); + t_comm += MPI_Wtime() - t_start; + + /* find the target process */ + target_proc = 0; + total = 0; + k = n * filter; + for (i = nprocs - 1; i >= 0; --i ) + { + if ( total + scounts[i] >= k ) + { + /* global k becomes local k*/ + k -= total; + target_proc = i; + break; + } + total += scounts[i]; + } + + n_gather = scounts[target_proc]; + if ( system->my_rank == target_proc ) + { + bucketlist = smalloc( sizeof( real ) * n_gather, + "setup_sparse_approx_inverse::bucketlist", MPI_COMM_WORLD ); + } + + /* send local buckets to target processor for quickselect */ + t_start = MPI_Wtime(); + MPI_Gather( scounts_local + target_proc, 1, MPI_INT, scounts, + 1, MPI_INT, target_proc, comm ); + t_comm += MPI_Wtime() - t_start; + + if ( system->my_rank == target_proc ) + { + dspls[0] = 0; + for ( i = 0; i < nprocs - 1; ++i ) + { + dspls[i + 1] = dspls[i] + scounts[i]; + } + } + + t_start = MPI_Wtime(); + MPI_Gatherv( bucketlist_local + dspls_local[target_proc], scounts_local[target_proc], MPI_DOUBLE, + bucketlist, scounts, dspls, MPI_DOUBLE, target_proc, comm); + t_comm += MPI_Wtime() - t_start; + + /* apply quick select algorithm at the target process */ + if ( system->my_rank == target_proc ) + { + left = 0; + right = n_gather-1; + + turn = 0; + while( k ) + { + p = left; + turn = 1 - turn; + + /* alternating pivots in order to handle corner cases */ + if ( turn == 1 ) + { + pivot = bucketlist[right]; + } + else + { + pivot = bucketlist[left]; + } + for ( i = left + 1 - turn; i <= right-turn; ++i ) + { + if ( bucketlist[i] > pivot ) + { + tmp = bucketlist[i]; + bucketlist[i] = bucketlist[p]; + bucketlist[p] = tmp; + p++; + } + } + if ( turn == 1 ) + { + tmp = bucketlist[p]; + bucketlist[p] = bucketlist[right]; + bucketlist[right] = tmp; + } + else + { + tmp = bucketlist[p]; + bucketlist[p] = bucketlist[left]; + bucketlist[left] = tmp; + } + + if( p == k - 1) + { + threshold = bucketlist[p]; + break; + } + else if( p > k - 1 ) + { + right = p - 1; + } + else + { + left = p + 1; + } + } + /* comment out if ACKS2 and/or EE is not an option + if(threshold < 1.000000) + { + threshold = 1.000001; + } */ + } + + /* broadcast the filtering value */ + t_start = MPI_Wtime(); + MPI_Bcast( &threshold, 1, MPI_DOUBLE, target_proc, comm ); + t_comm += MPI_Wtime() - t_start; + +#if defined(DEBUG) + int nnz = 0; +#endif + + /* build entries of that pattern*/ + for ( i = 0; i < num_rows; ++i ) + { + (*A_spar_patt)->start[i] = A->start[i]; + size = A->start[i]; + + for ( pj = A->start[i]; pj < A->end[i]; ++pj ) + { + if ( ( A->entries[pj].val >= threshold ) || ( A->entries[pj].j == i ) ) + { + (*A_spar_patt)->entries[size].val = A->entries[pj].val; + (*A_spar_patt)->entries[size].j = A->entries[pj].j; + size++; + +#if defined(DEBUG) + nnz++; +#endif + } + } + (*A_spar_patt)->end[i] = size; + } + +#if defined(DEBUG) + MPI_Allreduce( MPI_IN_PLACE, &nnz, 1, MPI_INT, MPI_SUM, comm ); + if ( system->my_rank == MASTER_NODE ) + { + fprintf( stdout, " [INFO] \ntotal nnz in all charge matrices = %d\ntotal nnz in all sparsity patterns = %d\nthreshold = %.15lf\n", + n, nnz, threshold ); + fprintf( stdout, "SAI SETUP takes %.2f seconds\n", MPI_Wtime() - start ); + fflush( stdout ); + } +#endif + + MPI_Reduce( &t_comm, &total_comm, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, + mpi_data->world ); + + if( system->my_rank == MASTER_NODE ) + { + data->timing.cm_solver_comm += total_comm / nprocs; + } + + sfree( input_array, "setup_sparse_approx_inverse::input_array" ); + sfree( scounts_local, "setup_sparse_approx_inverse::scounts_local" ); + sfree( scounts, "setup_sparse_approx_inverse::scounts" ); + sfree( bin_elements, "setup_sparse_approx_inverse::bin_elements" ); + sfree( dspls_local, "setup_sparse_approx_inverse::displs_local" ); + sfree( bucketlist_local, "setup_sparse_approx_inverse::bucketlist_local" ); + sfree( dspls, "setup_sparse_approx_inverse::dspls" ); + if ( nprocs > 1) + { + sfree( pivotlist, "setup_sparse_approx_inverse::pivotlist" ); + } + sfree( samplelist_local, "setup_sparse_approx_inverse::samplelist_local" ); + if ( system->my_rank == MASTER_NODE ) + { + sfree( samplelist, "setup_sparse_approx_inverse::samplelist" ); + sfree( srecv, "setup_sparse_approx_inverse::srecv" ); + sfree( sdispls, "setup_sparse_approx_inverse::sdispls" ); + } + if ( system->my_rank == target_proc ) + { + sfree( bucketlist, "setup_sparse_approx_inverse::bucketlist" ); + } + + return MPI_Wtime() - start; +} + + +#if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) +#if defined(NEUTRAL_TERRITORY) +real sparse_approx_inverse( reax_system *system, simulation_data *data, + storage *workspace, mpi_datatypes *mpi_data, + sparse_matrix *A, sparse_matrix *A_spar_patt, + sparse_matrix **A_app_inv, int nprocs ) +{ + /////////////// + int N, M, d_i, d_j; + int i, k, pj, j_temp; + int local_pos, atom_pos, identity_pos; + lapack_int m, n, nrhs, lda, ldb, info; + int *pos_x, *X; + real *e_j, *dense_matrix; + int cnt; + + reax_atom *atom; + int *row_nnz; + int **j_list; + real **val_list; + + int d, count, index; + mpi_out_data *out_bufs; + MPI_Comm comm; + MPI_Request req[12]; + MPI_Status stat[12]; + neighbor_proc *nbr; + int *j_send, *j_recv[6]; + real *val_send, *val_recv[6]; + + real start, t_start, t_comm; + real total_comm; + /////////////////// + start = MPI_Wtime(); + t_comm = 0.0; + + comm = mpi_data->world; + + if ( *A_app_inv == NULL) + { + //TODO: FULL_MATRIX? + Allocate_Matrix2( A_app_inv, A_spar_patt->n, A->NT, A_spar_patt->m, + SYM_FULL_MATRIX, comm ); + } + + else /* if ( (*A_app_inv)->m < A_spar_patt->m ) */ + { + Deallocate_Matrix( *A_app_inv ); + Allocate_Matrix2( A_app_inv, A_spar_patt->n, A->NT, A_spar_patt->m, + SYM_FULL_MATRIX, comm ); + } + + pos_x = NULL; + X = NULL; + + row_nnz = NULL; + j_list = NULL; + val_list = NULL; + + j_send = NULL; + val_send = NULL; + for( d = 0; d < 6; ++d ) + { + j_recv[d] = NULL; + val_recv[d] = NULL; + } + //////////////////// + row_nnz = (int *) malloc( sizeof(int) * A->NT ); + + //TODO: allocation size + j_list = (int **) malloc( sizeof(int *) * system->N ); + val_list = (real **) malloc( sizeof(real *) * system->N ); + + for ( i = 0; i < A->NT; ++i ) + { + row_nnz[i] = 0; + } + + /* mark the atoms that already have their row stored in the local matrix */ + for ( i = 0; i < A->n; ++i ) + { + row_nnz[i] = A->end[i] - A->start[i]; + } + + /* Announce the nnz's in each row that will be communicated later */ + t_start = MPI_Wtime(); + Dist( system, mpi_data, row_nnz, REAL_PTR_TYPE, MPI_INT ); + t_comm += MPI_Wtime() - t_start; + fprintf( stdout,"SAI after Dist call\n"); + fflush( stdout ); + + comm = mpi_data->comm_mesh3D; + out_bufs = mpi_data->out_nt_buffers; + count = 0; + + /* use a Dist-like approach to send the row information */ + for ( d = 0; d < 6; ++d) + { + /* initiate recvs */ + nbr = &(system->my_nt_nbrs[d]); + if ( nbr->atoms_cnt ) + { + /* calculate the total data that will be received */ + cnt = 0; + for( i = nbr->atoms_str; i < (nbr->atoms_str + nbr->atoms_cnt); ++i ) + { + cnt += row_nnz[i]; + } + + /* initiate Irecv */ + if( cnt ) + { + count += 2; + + j_recv[d] = (int *) malloc( sizeof(int) * cnt ); + val_recv[d] = (real *) malloc( sizeof(real) * cnt ); + + fprintf( stdout,"Dist communication receive phase direction %d will receive %d\n", d, cnt); + fflush( stdout ); + t_start = MPI_Wtime(); + MPI_Irecv( j_recv + d, cnt, MPI_INT, nbr->receive_rank, d, comm, &req[2 * d] ); + MPI_Irecv( val_recv + d, cnt, MPI_DOUBLE, nbr->receive_rank, d, comm, &req[2 * d + 1] ); + t_comm += MPI_Wtime() - t_start; + } + } + } + ///////////////////// + for( d = 0; d < 6; ++d) + { + nbr = &(system->my_nt_nbrs[d]); + /* send both messages in dimension d */ + if( out_bufs[d].cnt ) + { + cnt = 0; + for( i = 0; i < out_bufs[d].cnt; ++i ) + { + cnt += A->end[ out_bufs[d].index[i] ] - A->start[ out_bufs[d].index[i] ]; + if(out_bufs[d].index[i] < 0 || out_bufs[d].index[i] >= A->n) + { + fprintf( stdout, "INDEXING ERROR %d > %d\n", out_bufs[d].index[i], A->n ); + fflush( stdout ); + } + // row_nnz[ out_bufs[d].index[i] ]; + } + fprintf( stdout,"Dist communication send phase direction %d should send %d\n", d, cnt); + fflush( stdout ); + + if( cnt ) + { + j_send = (int *) malloc( sizeof(int) * cnt ); + val_send = (real *) malloc( sizeof(real) * cnt ); + + cnt = 0; + for( i = 0; i < out_bufs[d].cnt; ++i ) + { + for( pj = A->start[ out_bufs[d].index[i] ]; pj < A->end[ out_bufs[d].index[i] ]; ++pj ) + { + atom = &system->my_atoms[ A->entries[pj].j ]; + j_send[cnt] = atom->orig_id; + val_send[cnt] = A->entries[pj].val; + cnt++; + } + } + + fprintf( stdout,"Dist communication send phase direction %d will send %d\n", d, cnt ); + fflush( stdout ); + + t_start = MPI_Wtime(); + MPI_Send( j_send, cnt, MPI_INT, nbr->rank, d, comm ); + fprintf( stdout,"Dist communication send phase direction %d cnt = %d\n", d, cnt); + fflush( stdout ); + MPI_Send( val_send, cnt, MPI_DOUBLE, nbr->rank, d, comm ); + fprintf( stdout,"Dist communication send phase direction %d cnt = %d\n", d, cnt); + fflush( stdout ); + t_comm += MPI_Wtime() - t_start; + } + } + } + fprintf( stdout," Dist communication for sending row info before waitany\n"); + fflush( stdout ); + /////////////////////// + for ( d = 0; d < count; ++d ) + { + t_start = MPI_Wtime(); + MPI_Waitany( REAX_MAX_NT_NBRS, req, &index, stat); + t_comm += MPI_Wtime() - t_start; + + nbr = &(system->my_nt_nbrs[index/2]); + cnt = 0; + for( i = nbr->atoms_str; i < (nbr->atoms_str + nbr->atoms_cnt); ++i ) + { + if( (index%2) == 0 ) + { + j_list[i] = (int *) malloc( sizeof(int) * row_nnz[i] ); + for( pj = 0; pj < row_nnz[i]; ++pj ) + { + j_list[i][pj] = j_recv[index/2][cnt]; + cnt++; + } + } + else + { + val_list[i] = (real *) malloc( sizeof(real) * row_nnz[i] ); + for( pj = 0; pj < row_nnz[i]; ++pj ) + { + val_list[i][pj] = val_recv[index/2][cnt]; + cnt++; + } + } + + } + } + ////////////////////// + fprintf( stdout," wow wow wow, Dist communication for sending row info worked\n"); + fflush( stdout ); + //TODO: size? + X = (int *) malloc( sizeof(int) * (system->bigN + 1) ); + pos_x = (int *) malloc( sizeof(int) * (system->bigN + 1) ); + + for ( i = 0; i < A_spar_patt->NT; ++i ) + { + N = 0; + M = 0; + for ( k = 0; k <= system->bigN; ++k ) + { + X[k] = 0; + pos_x[k] = 0; + } + + /* find column indices of nonzeros (which will be the columns indices of the dense matrix) */ + for ( pj = A_spar_patt->start[i]; pj < A_spar_patt->end[i]; ++pj ) + { + j_temp = A_spar_patt->entries[pj].j; + atom = &system->my_atoms[j_temp]; + ++N; + + /* for each of those indices + * search through the row of full A of that index */ + + /* the case where the local matrix has that index's row */ + if( j_temp < A->NT ) + { + for ( k = A->start[ j_temp ]; k < A->end[ j_temp ]; ++k ) + { + /* and accumulate the nonzero column indices to serve as the row indices of the dense matrix */ + atom = &system->my_atoms[ A->entries[k].j ]; + X[atom->orig_id] = 1; + } + } + + /* the case where we communicated that index's row */ + else + { + for ( k = 0; k < row_nnz[j_temp]; ++k ) + { + /* and accumulate the nonzero column indices to serve as the row indices of the dense matrix */ + X[ j_list[j_temp][k] ] = 1; + } + } + } + + /* enumerate the row indices from 0 to (# of nonzero rows - 1) for the dense matrix */ + identity_pos = M; + atom = &system->my_atoms[ i ]; + atom_pos = atom->orig_id; + + for ( k = 0; k <= system->bigN; k++) + { + if ( X[k] != 0 ) + { + pos_x[k] = M; + if ( k == atom_pos ) + { + identity_pos = M; + } + ++M; + } + } + + /* allocate memory for NxM dense matrix */ + dense_matrix = (real *) malloc( sizeof(real) * N * M ); + + /* fill in the entries of dense matrix */ + for ( d_j = 0; d_j < N; ++d_j) + { + /* all rows are initialized to zero */ + for ( d_i = 0; d_i < M; ++d_i ) + { + dense_matrix[d_i * N + d_j] = 0.0; + } + /* change the value if any of the column indices is seen */ + + /* it is in the original list */ + local_pos = A_spar_patt->entries[ A_spar_patt->start[i] + d_j ].j; + if( local_pos < 0 || local_pos >= system->N ) + { + fprintf( stderr, "THE LOCAL POSITION OF THE ATOM IS NOT VALID, STOP THE EXECUTION\n"); + fflush( stderr ); + + } + ///////////////////////////// + if( local_pos < A->NT ) + { + for ( d_i = A->start[local_pos]; d_i < A->end[local_pos]; ++d_i ) + { + atom = &system->my_atoms[ A->entries[d_i].j ]; + if (pos_x[ atom->orig_id ] >= M || d_j >= N ) + { + fprintf( stderr, "CANNOT MAP IT TO THE DENSE MATRIX, STOP THE EXECUTION, orig_id = %d, i = %d, j = %d, M = %d N = %d\n", atom->orig_id, pos_x[ atom->orig_id ], d_j, M, N ); + fflush( stderr ); + } + if ( X[ atom->orig_id ] == 1 ) + { + dense_matrix[ pos_x[ atom->orig_id ] * N + d_j ] = A->entries[d_i].val; + } + } + } + else + { + for ( d_i = 0; d_i < row_nnz[ local_pos ]; ++d_i ) + { + if (pos_x[ j_list[local_pos][d_i] ] >= M || d_j >= N ) + { + fprintf( stderr, "CANNOT MAP IT TO THE DENSE MATRIX, STOP THE EXECUTION, %d %d\n", pos_x[ j_list[local_pos][d_i] ], d_j); + fflush( stderr ); + } + if ( X[ j_list[local_pos][d_i] ] == 1 ) + { + dense_matrix[ pos_x[ j_list[local_pos][d_i] ] * N + d_j ] = val_list[local_pos][d_i]; + } + } + } + } + + /* create the right hand side of the linear equation + * that is the full column of the identity matrix */ + e_j = (real *) malloc( sizeof(real) * M ); + ////////////////////// + for ( k = 0; k < M; ++k ) + { + e_j[k] = 0.0; + } + e_j[identity_pos] = 1.0; + + /* Solve the overdetermined system AX = B through the least-squares problem: + * min ||B - AX||_2 */ + m = M; + n = N; + nrhs = 1; + lda = N; + ldb = nrhs; + + info = LAPACKE_dgels( LAPACK_ROW_MAJOR, 'N', m, n, nrhs, dense_matrix, lda, + e_j, ldb ); + + /* Check for the full rank */ + if ( info > 0 ) + { + fprintf( stderr, "The diagonal element %i of the triangular factor ", info ); + fprintf( stderr, "of A is zero, so that A does not have full rank;\n" ); + fprintf( stderr, "the least squares solution could not be computed.\n" ); + exit( INVALID_INPUT ); + } + + /* accumulate the resulting vector to build A_app_inv */ + (*A_app_inv)->start[i] = A_spar_patt->start[i]; + (*A_app_inv)->end[i] = A_spar_patt->end[i]; + for ( k = (*A_app_inv)->start[i]; k < (*A_app_inv)->end[i]; ++k) + { + (*A_app_inv)->entries[k].j = A_spar_patt->entries[k].j; + (*A_app_inv)->entries[k].val = e_j[k - A_spar_patt->start[i]]; + } + free( dense_matrix ); + free( e_j ); + } + + free( pos_x); + free( X ); + ///////////////////// + MPI_Reduce(&t_comm, &total_comm, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world); + + if( system->my_rank == MASTER_NODE ) + { + data->timing.cm_solver_comm += total_comm / nprocs; + } + + return MPI_Wtime() - start; +} + + +#else +real sparse_approx_inverse( reax_system *system, simulation_data *data, + storage *workspace, mpi_datatypes *mpi_data, + sparse_matrix *A, sparse_matrix *A_spar_patt, + sparse_matrix **A_app_inv, int nprocs ) +{ + int N, M, d_i, d_j, mark; + int i, k, pj, j_temp, push; + int local_pos, atom_pos, identity_pos; + lapack_int m, n, nrhs, lda, ldb, info; + int *X, *q; + real *e_j, *dense_matrix; + int size_e, size_dense; + int cnt; + reax_atom *atom; + int *row_nnz; + int **j_list; + real **val_list; + int d; + mpi_out_data *out_bufs; + MPI_Comm comm; + MPI_Request req1, req2, req3, req4; + int flag1, flag2; + MPI_Status stat1, stat2, stat3, stat4; + const neighbor_proc *nbr1, *nbr2; + int *j_send, *j_recv1, *j_recv2; + int size_send, size_recv1, size_recv2; + real *val_send, *val_recv1, *val_recv2; + real start, t_start, t_comm; + real total_comm; + + start = MPI_Wtime(); + t_comm = 0.0; + + comm = mpi_data->world; + + if ( *A_app_inv == NULL) + { + Allocate_Matrix2( A_app_inv, A_spar_patt->n, system->local_cap, A_spar_patt->m, + SYM_FULL_MATRIX, comm ); + } + else /* if ( (*A_app_inv)->m < A_spar_patt->m ) */ + { + Deallocate_Matrix( *A_app_inv ); + Allocate_Matrix2( A_app_inv, A_spar_patt->n, system->local_cap, A_spar_patt->m, + SYM_FULL_MATRIX, comm ); + } + + X = NULL; + j_send = NULL; + val_send = NULL; + j_recv1 = NULL; + j_recv2 = NULL; + val_recv1 = NULL; + val_recv2 = NULL; + size_send = 0; + size_recv1 = 0; + size_recv2 = 0; + + e_j = NULL; + dense_matrix = NULL; + size_e = 0; + size_dense = 0; + + + row_nnz = smalloc( sizeof(int) * system->total_cap, + "sparse_approx_inverse::row_nnz", MPI_COMM_WORLD ); + j_list = smalloc( sizeof(int *) * system->N, + "sparse_approx_inverse::j_list", MPI_COMM_WORLD ); + val_list = smalloc( sizeof(real *) * system->N, + "sparse_approx_inverse::val_list", MPI_COMM_WORLD ); + + for ( i = 0; i < system->total_cap; ++i ) + { + row_nnz[i] = 0; + } + + /* mark the atoms that already have their row stored in the local matrix */ + for ( i = 0; i < system->n; ++i ) + { + row_nnz[i] = A->end[i] - A->start[i]; + } + + /* Announce the nnz's in each row that will be communicated later */ + t_start = MPI_Wtime(); + Dist( system, mpi_data, row_nnz, INT_PTR_TYPE, MPI_INT ); + t_comm += MPI_Wtime() - t_start; + + comm = mpi_data->comm_mesh3D; + out_bufs = mpi_data->out_buffers; + + /* use a Dist-like approach to send the row information */ + for ( d = 0; d < 3; ++d) + { + flag1 = 0; + flag2 = 0; + cnt = 0; + + /* initiate recvs */ + nbr1 = &system->my_nbrs[2 * d]; + if ( nbr1->atoms_cnt ) + { + cnt = 0; + + /* calculate the total data that will be received */ + for( i = nbr1->atoms_str; i < (nbr1->atoms_str + nbr1->atoms_cnt); ++i ) + { + cnt += row_nnz[i]; + } + + /* initiate Irecv */ + if( cnt ) + { + flag1 = 1; + + if ( size_recv1 < cnt ) + { + if ( size_recv1 ) + { + sfree( j_recv1, "sparse_approx_inverse::j_recv1" ); + sfree( val_recv1, "sparse_approx_inverse::val_recv1" ); + } + + size_recv1 = cnt * SAFE_ZONE; + + j_recv1 = smalloc( sizeof(int) * size_recv1, + "sparse_approx_inverse::j_recv1", MPI_COMM_WORLD ); + val_recv1 = smalloc( sizeof(real) * size_recv1, + "sparse_approx_inverse::val_recv1", MPI_COMM_WORLD ); + } + + t_start = MPI_Wtime(); + MPI_Irecv( j_recv1, cnt, MPI_INT, nbr1->rank, 2 * d + 1, comm, &req1 ); + MPI_Irecv( val_recv1, cnt, MPI_DOUBLE, nbr1->rank, 2 * d + 1, comm, &req2 ); + t_comm += MPI_Wtime() - t_start; + } + } + + nbr2 = &system->my_nbrs[2 * d + 1]; + if ( nbr2->atoms_cnt ) + { + /* calculate the total data that will be received */ + cnt = 0; + for( i = nbr2->atoms_str; i < (nbr2->atoms_str + nbr2->atoms_cnt); ++i ) + { + cnt += row_nnz[i]; + } + + /* initiate Irecv */ + if( cnt ) + { + flag2 = 1; + + if ( size_recv2 < cnt ) + { + if ( size_recv2 ) + { + sfree( j_recv2, "sparse_approx_inverse::j_recv2" ); + sfree( val_recv2, "sparse_approx_inverse::val_recv2" ); + } + + size_recv2 = cnt * SAFE_ZONE; + + j_recv2 = smalloc( sizeof(int) * size_recv2, + "sparse_approx_inverse::j_recv2", MPI_COMM_WORLD ); + val_recv2 = smalloc( sizeof(real) * size_recv2, + "sparse_approx_inverse::val_recv2", MPI_COMM_WORLD ); + } + + t_start = MPI_Wtime(); + MPI_Irecv( j_recv2, cnt, MPI_INT, nbr2->rank, 2 * d, comm, &req3 ); + MPI_Irecv( val_recv2, cnt, MPI_DOUBLE, nbr2->rank, 2 * d, comm, &req4 ); + t_comm += MPI_Wtime() - t_start; + } + } + + /* send both messages in dimension d */ + if ( out_bufs[2 * d].cnt ) + { + cnt = 0; + for ( i = 0; i < out_bufs[2 * d].cnt; ++i ) + { + cnt += row_nnz[ out_bufs[2 * d].index[i] ]; + } + + if ( cnt > 0 ) + { + if ( size_send < cnt ) + { + if ( size_send ) + { + sfree( j_send, "sparse_approx_inverse::j_send" ); + sfree( val_send, "sparse_approx_inverse::val_send" ); + } + + size_send = cnt * SAFE_ZONE; + + j_send = smalloc( sizeof(int) * size_send, + "sparse_approx_inverse::j_send", MPI_COMM_WORLD ); + val_send = smalloc( sizeof(real) * size_send, + "sparse_approx_inverse::j_send", MPI_COMM_WORLD ); + } + + cnt = 0; + for ( i = 0; i < out_bufs[2 * d].cnt; ++i ) + { + if ( out_bufs[2 * d].index[i] < A->n ) + { + for ( pj = A->start[ out_bufs[2 * d].index[i] ]; pj < A->end[ out_bufs[2 * d].index[i] ]; ++pj ) + { + atom = &system->my_atoms[ A->entries[pj].j ]; + j_send[cnt] = atom->orig_id; + val_send[cnt] = A->entries[pj].val; + cnt++; + } + } + else + { + for ( pj = 0; pj < row_nnz[ out_bufs[2 * d].index[i] ]; ++pj ) + { + j_send[cnt] = j_list[ out_bufs[2 * d].index[i] ][pj]; + val_send[cnt] = val_list[ out_bufs[2 * d].index[i] ][pj]; + cnt++; + } + } + } + + t_start = MPI_Wtime(); + MPI_Send( j_send, cnt, MPI_INT, nbr1->rank, 2 * d, comm ); + MPI_Send( val_send, cnt, MPI_DOUBLE, nbr1->rank, 2 * d, comm ); + t_comm += MPI_Wtime() - t_start; + } + } + + if ( out_bufs[2 * d + 1].cnt ) + { + cnt = 0; + for ( i = 0; i < out_bufs[2 * d + 1].cnt; ++i ) + { + cnt += row_nnz[ out_bufs[2 * d + 1].index[i] ]; + } + + if ( cnt > 0 ) + { + + if ( size_send < cnt ) + { + if ( size_send ) + { + sfree( j_send, "sparse_approx_inverse::j_send" ); + sfree( val_send, "sparse_approx_inverse::j_send" ); + } + + size_send = cnt * SAFE_ZONE; + + j_send = smalloc( sizeof(int) * size_send, + "sparse_approx_inverse::j_send", MPI_COMM_WORLD ); + val_send = smalloc( sizeof(real) * size_send, + "sparse_approx_inverse::val_send", MPI_COMM_WORLD ); + } + + cnt = 0; + for ( i = 0; i < out_bufs[2 * d + 1].cnt; ++i ) + { + if ( out_bufs[2 * d + 1].index[i] < A->n ) + { + for ( pj = A->start[ out_bufs[2 * d + 1].index[i] ]; pj < A->end[ out_bufs[2 * d + 1].index[i] ]; ++pj ) + { + atom = &system->my_atoms[ A->entries[pj].j ]; + j_send[cnt] = atom->orig_id; + val_send[cnt] = A->entries[pj].val; + cnt++; + } + } + else + { + for ( pj = 0; pj < row_nnz[ out_bufs[2 * d + 1].index[i] ]; ++pj ) + { + j_send[cnt] = j_list[ out_bufs[2 * d + 1].index[i] ][pj]; + val_send[cnt] = val_list[ out_bufs[2 * d + 1].index[i] ][pj]; + cnt++; + } + } + } + + t_start = MPI_Wtime(); + MPI_Send( j_send, cnt, MPI_INT, nbr2->rank, 2 * d + 1, comm ); + MPI_Send( val_send, cnt, MPI_DOUBLE, nbr2->rank, 2 * d + 1, comm ); + t_comm += MPI_Wtime() - t_start; + } + + } + + if ( flag1 ) + { + t_start = MPI_Wtime(); + MPI_Wait( &req1, &stat1 ); + MPI_Wait( &req2, &stat2 ); + t_comm += MPI_Wtime() - t_start; + + cnt = 0; + for ( i = nbr1->atoms_str; i < (nbr1->atoms_str + nbr1->atoms_cnt); ++i ) + { + j_list[i] = smalloc( sizeof(int) * row_nnz[i], + "sparse_approx_inverse::j_list[i]", MPI_COMM_WORLD ); + val_list[i] = smalloc( sizeof(real) * row_nnz[i], + "sparse_approx_inverse::val_list[i]", MPI_COMM_WORLD ); + + for ( pj = 0; pj < row_nnz[i]; ++pj ) + { + j_list[i][pj] = j_recv1[cnt]; + val_list[i][pj] = val_recv1[cnt]; + cnt++; + } + } + } + + if ( flag2 ) + { + t_start = MPI_Wtime(); + MPI_Wait( &req3, &stat3 ); + MPI_Wait( &req4, &stat4 ); + t_comm += MPI_Wtime() - t_start; + + cnt = 0; + for ( i = nbr2->atoms_str; i < (nbr2->atoms_str + nbr2->atoms_cnt); ++i ) + { + j_list[i] = smalloc( sizeof(int) * row_nnz[i], + "sparse_approx_inverse::j_list[i]", MPI_COMM_WORLD ); + val_list[i] = smalloc( sizeof(real) * row_nnz[i], + "sparse_approx_inverse::val_list[i]", MPI_COMM_WORLD ); + + for ( pj = 0; pj < row_nnz[i]; ++pj ) + { + j_list[i][pj] = j_recv2[cnt]; + val_list[i][pj] = val_recv2[cnt]; + cnt++; + } + } + } + } + + sfree( j_send, "sparse_approx_inverse::j_send" ); + sfree( val_send, "sparse_approx_inverse::val_send" ); + sfree( j_recv1, "sparse_approx_inverse::j_recv1" ); + sfree( j_recv2, "sparse_approx_inverse::j_recv2" ); + sfree( val_recv1, "sparse_approx_inverse::val_recv1" ); + sfree( val_recv2, "sparse_approx_inverse::val_recv2" ); + + X = smalloc( sizeof(int) * (system->bigN + 1), + "sparse_approx_inverse::X", MPI_COMM_WORLD ); + //size of q should be equal to the maximum possible cardinalty + //of the set formed by neighbors of neighbors of an atom + //i.e, maximum number of rows of dense matrix + //for water systems, this number is 34000 + //for silica systems, it is 12000 + q = smalloc( sizeof(int) * 50000, + "sparse_approx_inverse::q", MPI_COMM_WORLD ); + + for ( i = 0; i <= system->bigN; ++i ) + { + X[i] = -1; + } + + for ( i = 0; i < A_spar_patt->n; ++i ) + { + N = 0; + M = 0; + push = 0; + mark = i + system->bigN; + + /* find column indices of nonzeros (which will be the columns indices of the dense matrix) */ + for ( pj = A_spar_patt->start[i]; pj < A_spar_patt->end[i]; ++pj ) + { + j_temp = A_spar_patt->entries[pj].j; + atom = &system->my_atoms[j_temp]; + ++N; + + /* for each of those indices + * search through the row of full A of that index */ + + /* the case where the local matrix has that index's row */ + if( j_temp < A->n ) + { + for ( k = A->start[ j_temp ]; k < A->end[ j_temp ]; ++k ) + { + /* and accumulate the nonzero column indices to serve as the row indices of the dense matrix */ + atom = &system->my_atoms[ A->entries[k].j ]; + X[atom->orig_id] = mark; + q[push++] = atom->orig_id; + } + } + + /* the case where we communicated that index's row */ + else + { + for ( k = 0; k < row_nnz[j_temp]; ++k ) + { + /* and accumulate the nonzero column indices to serve as the row indices of the dense matrix */ + X[ j_list[j_temp][k] ] = mark; + q[push++] = j_list[j_temp][k]; + } + } + } + + /* enumerate the row indices from 0 to (# of nonzero rows - 1) for the dense matrix */ + identity_pos = M; + atom = &system->my_atoms[ i ]; + atom_pos = atom->orig_id; + + for ( k = 0; k < push; k++) + { + if ( X[ q[k] ] == mark ) + { + X[ q[k] ] = M; + ++M; + } + } + identity_pos = X[atom_pos]; + + /* allocate memory for NxM dense matrix */ + if ( size_dense < N * M ) + { + if ( size_dense ) + { + sfree( dense_matrix, "sparse_approx_inverse::dense_matrix" ); + } + + size_dense = N * M * SAFE_ZONE; + + dense_matrix = smalloc( sizeof(real) * size_dense, + "sparse_approx_inverse::dense_matrix", MPI_COMM_WORLD ); + } -#if defined(CG_PERFORMANCE) -real t_start, t_elapsed, matvec_time, dot_time; -#endif + /* fill in the entries of dense matrix */ + for ( d_j = 0; d_j < N; ++d_j) + { + /* all rows are initialized to zero */ + for ( d_i = 0; d_i < M; ++d_i ) + { + dense_matrix[d_i * N + d_j] = 0.0; + } + /* change the value if any of the column indices is seen */ + /* it is in the original list */ + local_pos = A_spar_patt->entries[ A_spar_patt->start[i] + d_j ].j; -void dual_Sparse_MatVec( sparse_matrix *A, rvec2 *x, rvec2 *b, int N ) -{ - int i, j, k, si; - real H; + if ( local_pos < A->n ) + { + for ( d_i = A->start[local_pos]; d_i < A->end[local_pos]; ++d_i ) + { + atom = &system->my_atoms[ A->entries[d_i].j ]; + dense_matrix[ X[ atom->orig_id ] * N + d_j ] = A->entries[d_i].val; + } + } + else + { + for ( d_i = 0; d_i < row_nnz[ local_pos ]; ++d_i ) + { + dense_matrix[ X[ j_list[local_pos][d_i] ] * N + d_j ] = val_list[local_pos][d_i]; + } + } + } - for ( i = 0; i < N; ++i ) - { - b[i][0] = b[i][1] = 0; - } + /* create the right hand side of the linear equation + * that is the full column of the identity matrix */ + if ( size_e < M ) + { + if ( size_e ) + { + sfree( e_j, "sparse_approx_inverse::e_j" ); + } - /* perform multiplication */ - for ( i = 0; i < A->n; ++i ) - { - si = A->start[i]; - b[i][0] += A->entries[si].val * x[i][0]; - b[i][1] += A->entries[si].val * x[i][1]; + size_e = M * SAFE_ZONE; - for ( k = si + 1; k < A->end[i]; ++k ) + e_j = smalloc( sizeof(real) * size_e, "sparse_approx_inverse::e_j", MPI_COMM_WORLD ); + } + + for ( k = 0; k < M; ++k ) { - j = A->entries[k].j; - H = A->entries[k].val; + e_j[k] = 0.0; + } + e_j[identity_pos] = 1.0; + + /* Solve the overdetermined system AX = B through the least-squares problem: + * min ||B - AX||_2 */ + m = M; + n = N; + nrhs = 1; + lda = N; + ldb = nrhs; - b[i][0] += H * x[j][0]; - b[i][1] += H * x[j][1]; + info = LAPACKE_dgels( LAPACK_ROW_MAJOR, 'N', m, n, nrhs, dense_matrix, lda, + e_j, ldb ); - // comment out for tryQEq - //if( j < A->n ) { - b[j][0] += H * x[i][0]; - b[j][1] += H * x[i][1]; - //} + /* Check for the full rank */ + if ( info > 0 ) + { + fprintf( stderr, "[ERROR] The diagonal element %i of the triangular factor ", info ); + fprintf( stderr, "of A is zero, so that A does not have full rank;\n" ); + fprintf( stderr, "the least squares solution could not be computed.\n" ); + MPI_Abort( MPI_COMM_WORLD, RUNTIME_ERROR ); + } + + /* accumulate the resulting vector to build A_app_inv */ + (*A_app_inv)->start[i] = A_spar_patt->start[i]; + (*A_app_inv)->end[i] = A_spar_patt->end[i]; + for ( k = (*A_app_inv)->start[i]; k < (*A_app_inv)->end[i]; ++k) + { + (*A_app_inv)->entries[k].j = A_spar_patt->entries[k].j; + (*A_app_inv)->entries[k].val = e_j[k - A_spar_patt->start[i]]; } } + + sfree( dense_matrix, "sparse_approx_inverse::dense_matrix" ); + sfree( e_j, "sparse_approx_inverse::e_j" ); + sfree( X, "sparse_approx_inverse::X" ); + /*for ( i = 0; i < system->N; ++i ) + { + sfree( j_list[i], "sparse_approx_inverse::j_list" ); + sfree( val_list[i], "sparse_approx_inverse::val_list" ); + } + sfree( j_list, "sparse_approx_inverse::j_list" ); + sfree( val_list, "sparse_approx_inverse::val_list" );*/ + sfree( row_nnz, "sparse_approx_inverse::row_nnz" ); + + MPI_Reduce( &t_comm, &total_comm, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, + mpi_data->world ); + + if ( system->my_rank == MASTER_NODE ) + { + data->timing.cm_solver_comm += total_comm / nprocs; + } + + return MPI_Wtime() - start; } +#endif +#endif -int dual_CG( reax_system *system, storage *workspace, sparse_matrix *H, - rvec2 *b, real tol, rvec2 *x, mpi_datatypes* mpi_data, FILE *fout ) +int dual_CG( reax_system *system, control_params *control, simulation_data *data, + storage *workspace, sparse_matrix *H, rvec2 *b, + real tol, rvec2 *x, mpi_datatypes* mpi_data ) { - int i, j, n, N, matvecs, scale; + int i, j; rvec2 tmp, alpha, beta; - rvec2 my_sum, norm_sqr, b_norm, my_dot; + rvec2 norm, b_norm; rvec2 sig_old, sig_new; - MPI_Comm comm; + real t_start, t_pa, t_spmv, t_vops, t_comm, t_allreduce; + real timings[5], redux[6]; + + t_pa = 0.0; + t_spmv = 0.0; + t_vops = 0.0; + t_comm = 0.0; + t_allreduce = 0.0; + + t_start = MPI_Wtime( ); + Dist( system, mpi_data, x, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + dual_Sparse_MatVec( H, x, workspace->q2, H->NT ); +#else + dual_Sparse_MatVec( H, x, workspace->q2, system->N ); +#endif + t_spmv += MPI_Wtime( ) - t_start; - n = system->n; - N = system->N; - comm = mpi_data->world; - matvecs = 0; - scale = sizeof(rvec2) / sizeof(void); -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) + if ( H->format == SYM_HALF_MATRIX ) { - matvecs = 0; - t_start = matvec_time = dot_time = 0; - t_start = Get_Time( ); + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->q2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; } -#endif - - Dist( system, mpi_data, x, mpi_data->mpi_rvec2, scale, rvec2_packer ); - dual_Sparse_MatVec( H, x, workspace->q2, N ); - // tryQEq - Coll(system, mpi_data, workspace->q2, mpi_data->mpi_rvec2, scale, rvec2_unpacker); - -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) +#if defined(NEUTRAL_TERRITORY) + else { - Update_Timing_Info( &t_start, &matvec_time ); + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->q2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; } #endif + t_start = MPI_Wtime( ); for ( j = 0; j < system->n; ++j ) { - /* residual */ + // residual workspace->r2[j][0] = b[j][0] - workspace->q2[j][0]; workspace->r2[j][1] = b[j][1] - workspace->q2[j][1]; - /* apply diagonal pre-conditioner */ - workspace->d2[j][0] = workspace->r2[j][0] * workspace->Hdia_inv[j]; - workspace->d2[j][1] = workspace->r2[j][1] * workspace->Hdia_inv[j]; } + t_vops += MPI_Wtime( ) - t_start; - /* norm of b */ - my_sum[0] = my_sum[1] = 0; - for ( j = 0; j < n; ++j ) + if ( control->cm_solver_pre_comp_type == SAI_PC ) { - my_sum[0] += SQR( b[j][0] ); - my_sum[1] += SQR( b[j][1] ); + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->r2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + dual_Sparse_MatVec( workspace->H_app_inv, workspace->r2, workspace->d2, H->NT ); +#else + dual_Sparse_MatVec( workspace->H_app_inv, workspace->r2, workspace->d2, system->n ); +#endif + t_pa += MPI_Wtime( ) - t_start; + } + else if ( control->cm_solver_pre_comp_type == JACOBI_PC) + { + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) + { + workspace->d2[j][0] = workspace->r2[j][0] * workspace->Hdia_inv[j]; + workspace->d2[j][1] = workspace->r2[j][1] * workspace->Hdia_inv[j]; + } + t_pa += MPI_Wtime( ) - t_start; } - MPI_Allreduce( &my_sum, &norm_sqr, 2, MPI_DOUBLE, MPI_SUM, comm ); - b_norm[0] = sqrt( norm_sqr[0] ); - b_norm[1] = sqrt( norm_sqr[1] ); - //fprintf( stderr, "bnorm = %f %f\n", b_norm[0], b_norm[1] ); - /* dot product: r.d */ - my_dot[0] = my_dot[1] = 0; - for ( j = 0; j < n; ++j ) + t_start = MPI_Wtime( ); + for ( j = 0; j < 6; ++j ) + { + redux[j] = 0; + } + for ( j = 0; j < system->n; ++j ) { - my_dot[0] += workspace->r2[j][0] * workspace->d2[j][0]; - my_dot[1] += workspace->r2[j][1] * workspace->d2[j][1]; + redux[0] += workspace->r2[j][0] * workspace->d2[j][0]; + redux[1] += workspace->r2[j][1] * workspace->d2[j][1]; + + redux[2] += workspace->d2[j][0] * workspace->d2[j][0]; + redux[3] += workspace->d2[j][1] * workspace->d2[j][1]; + + redux[4] += b[j][0] * b[j][0]; + redux[5] += b[j][1] * b[j][1]; } - MPI_Allreduce( &my_dot, &sig_new, 2, MPI_DOUBLE, MPI_SUM, comm ); - //fprintf( stderr, "sig_new: %f %f\n", sig_new[0], sig_new[1] ); + t_vops += MPI_Wtime( ) - t_start; -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) - Update_Timing_Info( &t_start, &dot_time ); -#endif + t_start = MPI_Wtime( ); + MPI_Allreduce( MPI_IN_PLACE, redux, 6, MPI_DOUBLE, MPI_SUM, mpi_data->world ); + t_allreduce += MPI_Wtime( ) - t_start; + + sig_new[0] = redux[0]; + sig_new[1] = redux[1]; + norm[0] = sqrt( redux[2] ); + norm[1] = sqrt( redux[3] ); + b_norm[0] = sqrt( redux[4] ); + b_norm[1] = sqrt( redux[5] ); - for ( i = 1; i < 300; ++i ) + for ( i = 0; i < control->cm_solver_max_iters; ++i ) { - Dist(system, mpi_data, workspace->d2, mpi_data->mpi_rvec2, scale, rvec2_packer); - dual_Sparse_MatVec( H, workspace->d2, workspace->q2, N ); - // tryQEq - Coll(system, mpi_data, workspace->q2, mpi_data->mpi_rvec2, scale, rvec2_unpacker); + if ( norm[0] / b_norm[0] <= tol || norm[1] / b_norm[1] <= tol ) + { + break; + } + + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->d2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + dual_Sparse_MatVec( H, workspace->d2, workspace->q2, H->NT ); +#else + dual_Sparse_MatVec( H, workspace->d2, workspace->q2, system->N ); +#endif + t_spmv += MPI_Wtime( ) - t_start; + + if ( H->format == SYM_HALF_MATRIX ) + { + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->q2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; + } +#if defined(NEUTRAL_TERRITORY) + else { - Update_Timing_Info( &t_start, &matvec_time ); + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->q2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; } #endif - /* dot product: d.q */ - my_dot[0] = my_dot[1] = 0; - for ( j = 0; j < n; ++j ) + // dot product: d.q + t_start = MPI_Wtime( ); + redux[0] = redux[1] = 0; + for ( j = 0; j < system->n; ++j ) { - my_dot[0] += workspace->d2[j][0] * workspace->q2[j][0]; - my_dot[1] += workspace->d2[j][1] * workspace->q2[j][1]; + redux[0] += workspace->d2[j][0] * workspace->q2[j][0]; + redux[1] += workspace->d2[j][1] * workspace->q2[j][1]; } - MPI_Allreduce( &my_dot, &tmp, 2, MPI_DOUBLE, MPI_SUM, comm ); - //fprintf( stderr, "tmp: %f %f\n", tmp[0], tmp[1] ); + t_vops += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); + MPI_Allreduce( &redux, &tmp, 2, MPI_DOUBLE, MPI_SUM, mpi_data->world ); + t_allreduce += MPI_Wtime( ) - t_start; + t_start = MPI_Wtime( ); alpha[0] = sig_new[0] / tmp[0]; alpha[1] = sig_new[1] / tmp[1]; - my_dot[0] = my_dot[1] = 0; for ( j = 0; j < system->n; ++j ) { - /* update x */ + // update x x[j][0] += alpha[0] * workspace->d2[j][0]; x[j][1] += alpha[1] * workspace->d2[j][1]; - /* update residual */ + // update residual workspace->r2[j][0] -= alpha[0] * workspace->q2[j][0]; workspace->r2[j][1] -= alpha[1] * workspace->q2[j][1]; - /* apply diagonal pre-conditioner */ - workspace->p2[j][0] = workspace->r2[j][0] * workspace->Hdia_inv[j]; - workspace->p2[j][1] = workspace->r2[j][1] * workspace->Hdia_inv[j]; - /* dot product: r.p */ - my_dot[0] += workspace->r2[j][0] * workspace->p2[j][0]; - my_dot[1] += workspace->r2[j][1] * workspace->p2[j][1]; } - sig_old[0] = sig_new[0]; - sig_old[1] = sig_new[1]; - MPI_Allreduce( &my_dot, &sig_new, 2, MPI_DOUBLE, MPI_SUM, comm ); - //fprintf( stderr, "sig_new: %f %f\n", sig_new[0], sig_new[1] ); + t_vops += MPI_Wtime( ) - t_start; -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) + if ( control->cm_solver_pre_comp_type == SAI_PC ) { - Update_Timing_Info( &t_start, &dot_time ); - } + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->r2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + dual_Sparse_MatVec( workspace->H_app_inv, workspace->r2, workspace->p2, H->NT ); +#else + dual_Sparse_MatVec( workspace->H_app_inv, workspace->r2, workspace->p2, system->n ); #endif + t_pa += MPI_Wtime( ) - t_start; + } + else if ( control->cm_solver_pre_comp_type == JACOBI_PC) + { + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) + { + workspace->p2[j][0] = workspace->r2[j][0] * workspace->Hdia_inv[j]; + workspace->p2[j][1] = workspace->r2[j][1] * workspace->Hdia_inv[j]; + } + t_pa += MPI_Wtime( ) - t_start; + } - if ( sqrt(sig_new[0]) / b_norm[0] <= tol || sqrt(sig_new[1]) / b_norm[1] <= tol ) + t_start = MPI_Wtime( ); + redux[0] = 0.0; + redux[1] = 0.0; + redux[2] = 0.0; + redux[3] = 0.0; + for ( j = 0; j < system->n; ++j ) { - break; + // dot product: r.p + redux[0] += workspace->r2[j][0] * workspace->p2[j][0]; + redux[1] += workspace->r2[j][1] * workspace->p2[j][1]; + + // dot product: p.p + redux[2] += workspace->p2[j][0] * workspace->p2[j][0]; + redux[3] += workspace->p2[j][1] * workspace->p2[j][1]; } + t_vops += MPI_Wtime( ) - t_start; + t_start = MPI_Wtime( ); + MPI_Allreduce( MPI_IN_PLACE, redux, 4, MPI_DOUBLE, MPI_SUM, mpi_data->world ); + t_allreduce += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); + sig_old[0] = sig_new[0]; + sig_old[1] = sig_new[1]; + sig_new[0] = redux[0]; + sig_new[1] = redux[1]; + norm[0] = sqrt( redux[2] ); + norm[1] = sqrt( redux[3] ); beta[0] = sig_new[0] / sig_old[0]; beta[1] = sig_new[1] / sig_old[1]; for ( j = 0; j < system->n; ++j ) { - /* d = p + beta * d */ + // d = p + beta * d workspace->d2[j][0] = workspace->p2[j][0] + beta[0] * workspace->d2[j][0]; workspace->d2[j][1] = workspace->p2[j][1] + beta[1] * workspace->d2[j][1]; } + t_vops += MPI_Wtime( ) - t_start; + } + + timings[0] = t_pa; + timings[1] = t_spmv; + timings[2] = t_vops; + timings[3] = t_comm; + timings[4] = t_allreduce; + + if ( system->my_rank == MASTER_NODE ) + { + MPI_Reduce( MPI_IN_PLACE, timings, 5, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world ); + + data->timing.cm_solver_pre_app += timings[0] / control->nprocs; + data->timing.cm_solver_spmv += timings[1] / control->nprocs; + data->timing.cm_solver_vector_ops += timings[2] / control->nprocs; + data->timing.cm_solver_comm += timings[3] / control->nprocs; + data->timing.cm_solver_allreduce += timings[4] / control->nprocs; + } + else + { + MPI_Reduce( timings, NULL, 5, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world ); } - if ( sqrt(sig_new[0]) / b_norm[0] <= tol ) + // continue to solve the system that has not converged yet + if ( norm[0] / b_norm[0] > tol ) { - for ( j = 0; j < n; ++j ) + for ( j = 0; j < system->n; ++j ) { - workspace->t[j] = workspace->x[j][1]; + workspace->s[j] = workspace->x[j][0]; } - matvecs = CG( system, workspace, H, workspace->b_t, tol, - workspace->t,mpi_data, fout ); - for ( j = 0; j < n; ++j ) + + i += CG( system, control, data, workspace, + H, workspace->b_s, tol, workspace->s, mpi_data ); + + for ( j = 0; j < system->n; ++j ) { - workspace->x[j][1] = workspace->t[j]; + workspace->x[j][0] = workspace->s[j]; } } - else if ( sqrt(sig_new[1]) / b_norm[1] <= tol ) + else if ( norm[1] / b_norm[1] > tol ) { - for ( j = 0; j < n; ++j ) + for ( j = 0; j < system->n; ++j ) { - workspace->s[j] = workspace->x[j][0]; + workspace->t[j] = workspace->x[j][1]; } - matvecs = CG( system, workspace, H, workspace->b_s, tol, workspace->s, - mpi_data, fout ); + + i += CG( system, control, data, workspace, + H, workspace->b_t, tol, workspace->t, mpi_data ); + for ( j = 0; j < system->n; ++j ) { - workspace->x[j][0] = workspace->s[j]; + workspace->x[j][1] = workspace->t[j]; } } - if ( i >= 300 ) + if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE ) { - fprintf( stderr, "CG convergence failed!\n" ); + fprintf( stderr, "[WARNING] CG convergence failed!\n" ); + return i; } -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) - { - fprintf( fout, "QEq %d + %d iters. matvecs: %f dot: %f\n", i + 1, - matvecs, matvec_time, dot_time ); - } -#endif + return i; - return (i + 1) + matvecs; } -void Sparse_MatVec( sparse_matrix *A, real *x, real *b, int N ) +/* Preconditioned Conjugate Gradient Method */ +int CG( reax_system *system, control_params *control, simulation_data *data, + storage *workspace, sparse_matrix *H, real *b, + real tol, real *x, mpi_datatypes* mpi_data ) { - int i, j, k, si; - real H; + int i, j; + real tmp, alpha, beta, norm, b_norm; + real sig_old, sig_new; + real t_start, t_pa, t_spmv, t_vops, t_comm, t_allreduce; + real timings[5], redux[3]; + + t_pa = 0.0; + t_spmv = 0.0; + t_vops = 0.0; + t_comm = 0.0; + t_allreduce = 0.0; + + t_start = MPI_Wtime( ); + Dist( system, mpi_data, x, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( H, x, workspace->q, H->NT ); +#else + Sparse_MatVec( H, x, workspace->q, system->N ); +#endif + t_spmv += MPI_Wtime( ) - t_start; - for ( i = 0; i < N; ++i ) + if ( H->format == SYM_HALF_MATRIX ) + { + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->q, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + } +#if defined(NEUTRAL_TERRITORY) + else { - b[i] = 0; + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->q, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; } +#endif - /* perform multiplication */ - for ( i = 0; i < A->n; ++i ) + t_start = MPI_Wtime( ); + Vector_Sum( workspace->r , 1., b, -1., workspace->q, system->n ); + t_vops += MPI_Wtime( ) - t_start; + + /* pre-conditioning */ + if ( control->cm_solver_pre_comp_type == SAI_PC ) + { + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->r, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->d, H->NT ); +#else + Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->d, system->n ); +#endif + t_pa += MPI_Wtime( ) - t_start; + } + else if ( control->cm_solver_pre_comp_type == JACOBI_PC) { - si = A->start[i]; - b[i] += A->entries[si].val * x[i]; - for ( k = si + 1; k < A->end[i]; ++k ) + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) { - j = A->entries[k].j; - H = A->entries[k].val; - b[i] += H * x[j]; - //if( j < A->n ) // comment out for tryQEq - b[j] += H * x[i]; + workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; } + t_pa += MPI_Wtime( ) - t_start; } -} + t_start = MPI_Wtime( ); + redux[0] = Dot_local( workspace->r, workspace->d, system->n ); + redux[1] = Dot_local( workspace->d, workspace->d, system->n ); + redux[2] = Dot_local( b, b, system->n ); + t_vops += MPI_Wtime( ) - t_start; -/* sparse matrix-vector product Ax = b - * where: - * A: matrix, stored in CSR format - * x: vector - * b: vector (result) */ -static void Sparse_MatVec_full( const sparse_matrix * const A, - const real * const x, real * const b ) -{ - //TODO: implement full SpMV in MPI -// int i, pj; -// -// Vector_MakeZero( b, A->n ); -// -//#ifdef _OPENMP -// #pragma omp for schedule(static) -//#endif -// for ( i = 0; i < A->n; ++i ) -// { -// for ( pj = A->start[i]; pj < A->start[i + 1]; ++pj ) -// { -// b[i] += A->val[pj] * x[A->j[pj]]; -// } -// } + t_start = MPI_Wtime( ); + MPI_Allreduce( MPI_IN_PLACE, redux, 3, MPI_DOUBLE, MPI_SUM, mpi_data->world ); + t_allreduce += MPI_Wtime( ) - t_start; + sig_new = redux[0]; + norm = sqrt( redux[1] ); + b_norm = sqrt( redux[2] ); + + for ( i = 0; i < control->cm_solver_max_iters && norm / b_norm > tol; ++i ) + { + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->d, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( H, workspace->d, workspace->q, H->NT ); +#else + Sparse_MatVec( H, workspace->d, workspace->q, system->N ); +#endif + t_spmv += MPI_Wtime( ) - t_start; + + if ( H->format == SYM_HALF_MATRIX ) + { + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->q, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + } +#if defined(NEUTRAL_TERRITORY) + else + { + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->q, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + } +#endif + + t_start = MPI_Wtime( ); + tmp = Parallel_Dot( workspace->d, workspace->q, system->n, mpi_data->world ); + t_allreduce += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); + alpha = sig_new / tmp; + Vector_Add( x, alpha, workspace->d, system->n ); + Vector_Add( workspace->r, -alpha, workspace->q, system->n ); + t_vops += MPI_Wtime( ) - t_start; + + /* pre-conditioning */ + if ( control->cm_solver_pre_comp_type == SAI_PC ) + { + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->r, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->p, H->NT ); +#else + Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->p, system->n ); +#endif + t_pa += MPI_Wtime( ) - t_start; + } + else if ( control->cm_solver_pre_comp_type == JACOBI_PC ) + { + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) + { + workspace->p[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } + t_pa += MPI_Wtime( ) - t_start; + } + + t_start = MPI_Wtime( ); + redux[0] = Dot_local( workspace->r, workspace->p, system->n ); + redux[1] = Dot_local( workspace->p, workspace->p, system->n ); + t_vops += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); + MPI_Allreduce( MPI_IN_PLACE, redux, 2, MPI_DOUBLE, MPI_SUM, mpi_data->world ); + t_allreduce += MPI_Wtime( ) - t_start; + sig_old = sig_new; + sig_new = redux[0]; + norm = sqrt( redux[1] ); + + t_start = MPI_Wtime( ); + beta = sig_new / sig_old; + Vector_Sum( workspace->d, 1., workspace->p, beta, workspace->d, system->n ); + t_vops += MPI_Wtime( ) - t_start; + } + + timings[0] = t_pa; + timings[1] = t_spmv; + timings[2] = t_vops; + timings[3] = t_comm; + timings[4] = t_allreduce; + + if ( system->my_rank == MASTER_NODE ) + { + MPI_Reduce( MPI_IN_PLACE, timings, 5, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world ); + + data->timing.cm_solver_pre_app += timings[0] / control->nprocs; + data->timing.cm_solver_spmv += timings[1] / control->nprocs; + data->timing.cm_solver_vector_ops += timings[2] / control->nprocs; + data->timing.cm_solver_comm += timings[3] / control->nprocs; + data->timing.cm_solver_allreduce += timings[4] / control->nprocs; + } + else + { + MPI_Reduce( timings, NULL, 5, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world ); + } + + if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE ) + { + fprintf( stderr, "[WARNING] CG convergence failed!\n" ); + return i; + } + + return i; } -int CG( reax_system *system, storage *workspace, sparse_matrix *H, real *b, - real tol, real *x, mpi_datatypes* mpi_data, FILE *fout ) +/* Pipelined Preconditioned Conjugate Gradient Method + * + * References: + * 1) Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm, + * P. Ghysels and W. Vanroose, Parallel Computing, 2014. + * 2) Scalable Non-blocking Preconditioned Conjugate Gradient Methods, + * Paul R. Eller and William Gropp, SC '16 Proceedings of the International Conference + * for High Performance Computing, Networking, Storage and Analysis, 2016. + * */ +int dual_PIPECG( reax_system *system, control_params *control, simulation_data *data, + storage *workspace, sparse_matrix *H, rvec2 *b, + real tol, rvec2 *x, mpi_datatypes* mpi_data ) { - int i, j, scale; - real tmp, alpha, beta, b_norm; - real sig_old, sig_new, sig0; + int i, j; + rvec2 alpha, beta, delta, gamma_old, gamma_new, norm, b_norm; + real t_start, t_pa, t_spmv, t_vops, t_comm, t_allreduce; + real timings[5], redux[8]; + MPI_Request req; + + t_pa = 0.0; + t_spmv = 0.0; + t_vops = 0.0; + t_comm = 0.0; + t_allreduce = 0.0; + + t_start = MPI_Wtime( ); + Dist( system, mpi_data, x, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + dual_Sparse_MatVec( H, x, workspace->u2, H->NT ); +#else + dual_Sparse_MatVec( H, x, workspace->u2, system->N ); +#endif + t_spmv += MPI_Wtime( ) - t_start; -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) + if ( H->format == SYM_HALF_MATRIX ) { - t_start = matvec_time = dot_time = 0; - t_start = Get_Time( ); + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->u2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; + } +#if defined(NEUTRAL_TERRITORY) + else + { + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->u2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; } #endif - scale = sizeof(real) / sizeof(void); - Dist( system, mpi_data, x, MPI_DOUBLE, scale, real_packer ); - Sparse_MatVec( H, x, workspace->q, system->N ); - // tryQEq - Coll( system, mpi_data, workspace->q, MPI_DOUBLE, scale, real_unpacker ); + t_start = MPI_Wtime( ); + //Vector_Sum( workspace->r , 1.0, b, -1.0, workspace->u, system->n ); + for ( j = 0; j < system->n; ++j ) + { + workspace->r2[j][0] = b[j][0] - workspace->u2[j][0]; + workspace->r2[j][1] = b[j][1] - workspace->u2[j][1]; + } + t_vops += MPI_Wtime( ) - t_start; -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) + /* pre-conditioning */ + if ( control->cm_solver_pre_comp_type == NONE_PC ) + { + //Vector_Copy( workspace->u, workspace->r, system->n ); + for ( j = 0; j < system->n ; ++j ) + { + workspace->u2[j][0] = workspace->r2[j][0]; + workspace->u2[j][1] = workspace->r2[j][1]; + } + } + else if ( control->cm_solver_pre_comp_type == JACOBI_PC ) + { + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) + { + workspace->u2[j][0] = workspace->r2[j][0] * workspace->Hdia_inv[j]; + workspace->u2[j][1] = workspace->r2[j][1] * workspace->Hdia_inv[j]; + } + t_pa += MPI_Wtime( ) - t_start; + } + else if ( control->cm_solver_pre_comp_type == SAI_PC ) { - Update_Timing_Info( &t_start, &matvec_time ); + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->r2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + dual_Sparse_MatVec( workspace->H_app_inv, workspace->r2, workspace->u2, H->NT ); +#else + dual_Sparse_MatVec( workspace->H_app_inv, workspace->r2, workspace->u2, system->n ); +#endif + t_pa += MPI_Wtime( ) - t_start; } + + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->u2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + dual_Sparse_MatVec( H, workspace->u2, workspace->w2, H->NT ); +#else + dual_Sparse_MatVec( H, workspace->u2, workspace->w2, system->N ); #endif + t_spmv += MPI_Wtime( ) - t_start; - Vector_Sum( workspace->r , 1., b, -1., workspace->q, system->n ); + if ( H->format == SYM_HALF_MATRIX ) + { + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->w2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; + } +#if defined(NEUTRAL_TERRITORY) + else + { + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->w2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; + } +#endif - for ( j = 0; j < system->n; ++j ) + t_start = MPI_Wtime( ); + //redux[0] = Dot_local( workspace->w, workspace->u, system->n ); + //redux[1] = Dot_local( workspace->r, workspace->u, system->n ); + //redux[2] = Dot_local( workspace->u, workspace->u, system->n ); + //redux[3] = Dot_local( b, b, system->n ); + for ( j = 0; j < 8; ++j ) { - workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; //pre-condition + redux[j] = 0.0; } - //TODO: apply SAI preconditioner here, comment out diagonal preconditioning above -// Sparse_MatVec_full( workspace->H_app_inv, workspace->r, workspace->d ); + for( j = 0; j < system->n; ++j ) + { + redux[0] += workspace->w2[j][0] * workspace->u2[j][0]; + redux[1] += workspace->w2[j][1] * workspace->u2[j][1]; - b_norm = Parallel_Norm( b, system->n, mpi_data->world ); - sig_new = Parallel_Dot(workspace->r, workspace->d, system->n, mpi_data->world); - sig0 = sig_new; + redux[2] += workspace->r2[j][0] * workspace->u2[j][0]; + redux[3] += workspace->r2[j][1] * workspace->u2[j][1]; -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) + redux[4] += workspace->u2[j][0] * workspace->u2[j][0]; + redux[5] += workspace->u2[j][1] * workspace->u2[j][1]; + + redux[6] += b[j][0] * b[j][0]; + redux[7] += b[j][1] * b[j][1]; + } + t_vops += MPI_Wtime( ) - t_start; + + MPI_Iallreduce( MPI_IN_PLACE, redux, 8, MPI_DOUBLE, MPI_SUM, mpi_data->world, &req ); + + /* pre-conditioning */ + if ( control->cm_solver_pre_comp_type == NONE_PC ) { - Update_Timing_Info( &t_start, &dot_time ); + //Vector_Copy( workspace->m, workspace->w, system->n ); + for ( j = 0; j < system->n; ++j ) + { + workspace->m2[j][0] = workspace->w2[j][0]; + workspace->m2[j][1] = workspace->w2[j][1]; + } + } + else if ( control->cm_solver_pre_comp_type == JACOBI_PC ) + { + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) + { + workspace->m2[j][0] = workspace->w2[j][0] * workspace->Hdia_inv[j]; + workspace->m2[j][1] = workspace->w2[j][1] * workspace->Hdia_inv[j]; + } + t_pa += MPI_Wtime( ) - t_start; } + else if ( control->cm_solver_pre_comp_type == SAI_PC ) + { + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->w2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + dual_Sparse_MatVec( workspace->H_app_inv, workspace->w2, workspace->m2, H->NT ); +#else + dual_Sparse_MatVec( workspace->H_app_inv, workspace->w2, workspace->m2, system->n ); +#endif + t_pa += MPI_Wtime( ) - t_start; + } + + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->m2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + dual_Sparse_MatVec( H, workspace->m2, workspace->n2, H->NT ); +#else + dual_Sparse_MatVec( H, workspace->m2, workspace->n2, system->N ); #endif + t_spmv += MPI_Wtime( ) - t_start; - for ( i = 1; i < 300 && sqrt(sig_new) / b_norm > tol; ++i ) + if ( H->format == SYM_HALF_MATRIX ) { - Dist( system, mpi_data, workspace->d, MPI_DOUBLE, scale, real_packer ); - Sparse_MatVec( H, workspace->d, workspace->q, system->N ); - //tryQEq - Coll(system, mpi_data, workspace->q, MPI_DOUBLE, scale, real_unpacker); + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->n2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; + } +#if defined(NEUTRAL_TERRITORY) + else + { + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->n2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; + } +#endif + + t_start = MPI_Wtime( ); + MPI_Wait( &req, MPI_STATUS_IGNORE ); + t_allreduce += MPI_Wtime( ) - t_start; + delta[0] = redux[0]; + delta[1] = redux[1]; + gamma_new[0] = redux[2]; + gamma_new[1] = redux[3]; + norm[0] = sqrt( redux[4] ); + norm[1] = sqrt( redux[5] ); + b_norm[0] = sqrt( redux[6] ); + b_norm[1] = sqrt( redux[7] ); + + for ( i = 0; i < control->cm_solver_max_iters; ++i ) + { + if ( norm[0] / b_norm[0] <= tol || norm[1] / b_norm[1] <= tol ) + { + break; + } + if ( i > 0 ) + { + beta[0] = gamma_new[0] / gamma_old[0]; + beta[1] = gamma_new[1] / gamma_old[1]; + alpha[0] = gamma_new[0] / (delta[0] - beta[0] / alpha[0] * gamma_new[0]); + alpha[1] = gamma_new[1] / (delta[1] - beta[1] / alpha[1] * gamma_new[1]); + } + else + { + beta[0] = 0.0; + beta[1] = 0.0; + alpha[0] = gamma_new[0] / delta[0]; + alpha[1] = gamma_new[1] / delta[1]; + } + + t_start = MPI_Wtime( ); + //Vector_Sum( workspace->z, 1.0, workspace->n, beta, workspace->z, system->n ); + //Vector_Sum( workspace->q, 1.0, workspace->m, beta, workspace->q, system->n ); + //Vector_Sum( workspace->p, 1.0, workspace->u, beta, workspace->p, system->n ); + //Vector_Sum( workspace->d, 1.0, workspace->w, beta, workspace->d, system->n ); + //Vector_Sum( x, 1.0, x, alpha, workspace->p, system->n ); + //Vector_Sum( workspace->u, 1.0, workspace->u, -alpha, workspace->q, system->n ); + //Vector_Sum( workspace->w, 1.0, workspace->w, -alpha, workspace->z, system->n ); + //Vector_Sum( workspace->r, 1.0, workspace->r, -alpha, workspace->d, system->n ); + //redux[0] = Dot_local( workspace->w, workspace->u, system->n ); + //redux[1] = Dot_local( workspace->r, workspace->u, system->n ); + //redux[2] = Dot_local( workspace->u, workspace->u, system->n ); + for ( j = 0; j < 6; ++j ) + { + redux[j] = 0.0; + } + for ( j = 0; j < system->n; ++j ) + { + workspace->z2[j][0] = workspace->n2[j][0] + beta[0] * workspace->z2[j][0]; + workspace->z2[j][1] = workspace->n2[j][1] + beta[1] * workspace->z2[j][1]; -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) + workspace->q2[j][0] = workspace->m2[j][0] + beta[0] * workspace->q2[j][0]; + workspace->q2[j][1] = workspace->m2[j][1] + beta[1] * workspace->q2[j][1]; + + workspace->p2[j][0] = workspace->u2[j][0] + beta[0] * workspace->p2[j][0]; + workspace->p2[j][1] = workspace->u2[j][1] + beta[1] * workspace->p2[j][1]; + + workspace->d2[j][0] = workspace->w2[j][0] + beta[0] * workspace->d2[j][0]; + workspace->d2[j][1] = workspace->w2[j][1] + beta[1] * workspace->d2[j][1]; + + x[j][0] += alpha[0] * workspace->p2[j][0]; + x[j][1] += alpha[1] * workspace->p2[j][1]; + + workspace->u2[j][0] -= alpha[0] * workspace->q2[j][0]; + workspace->u2[j][1] -= alpha[1] * workspace->q2[j][1]; + + workspace->w2[j][0] -= alpha[0] * workspace->z2[j][0]; + workspace->w2[j][1] -= alpha[1] * workspace->z2[j][1]; + + workspace->r2[j][0] -= alpha[0] * workspace->d2[j][0]; + workspace->r2[j][1] -= alpha[1] * workspace->d2[j][1]; + + redux[0] += workspace->w2[j][0] * workspace->u2[j][0]; + redux[1] += workspace->w2[j][1] * workspace->u2[j][1]; + + redux[2] += workspace->r2[j][0] * workspace->u2[j][0]; + redux[3] += workspace->r2[j][1] * workspace->u2[j][1]; + + redux[4] += workspace->u2[j][0] * workspace->u2[j][0]; + redux[5] += workspace->u2[j][1] * workspace->u2[j][1]; + + } + t_vops += MPI_Wtime( ) - t_start; + + MPI_Iallreduce( MPI_IN_PLACE, redux, 6, MPI_DOUBLE, MPI_SUM, mpi_data->world, &req ); + + /* pre-conditioning */ + if ( control->cm_solver_pre_comp_type == NONE_PC ) + { + //Vector_Copy( workspace->m, workspace->w, system->n ); + for ( j = 0; j < system->n; ++j ) + { + workspace->m2[j][0] = workspace->w2[j][0]; + workspace->m2[j][1] = workspace->w2[j][1]; + } + } + else if ( control->cm_solver_pre_comp_type == JACOBI_PC ) { - Update_Timing_Info( &t_start, &matvec_time ); + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) + { + workspace->m2[j][0] = workspace->w2[j][0] * workspace->Hdia_inv[j]; + workspace->m2[j][1] = workspace->w2[j][1] * workspace->Hdia_inv[j]; + } + t_pa += MPI_Wtime( ) - t_start; } + else if ( control->cm_solver_pre_comp_type == SAI_PC ) + { + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->w2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + dual_Sparse_MatVec( workspace->H_app_inv, workspace->w2, workspace->m2, H->NT ); +#else + dual_Sparse_MatVec( workspace->H_app_inv, workspace->w2, workspace->m2, system->n ); #endif + t_pa += MPI_Wtime( ) - t_start; + } - tmp = Parallel_Dot(workspace->d, workspace->q, system->n, mpi_data->world); - alpha = sig_new / tmp; - Vector_Add( x, alpha, workspace->d, system->n ); - Vector_Add( workspace->r, -alpha, workspace->q, system->n ); + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->m2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2); + t_comm += MPI_Wtime( ) - t_start; - /* pre-conditioning */ - for ( j = 0; j < system->n; ++j ) + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + dual_Sparse_MatVec( H, workspace->m2, workspace->n2, H->NT ); +#else + dual_Sparse_MatVec( H, workspace->m2, workspace->n2, system->N ); +#endif + t_spmv += MPI_Wtime( ) - t_start; + + if ( H->format == SYM_HALF_MATRIX ) { - workspace->p[j] = workspace->r[j] * workspace->Hdia_inv[j]; + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->n2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2 ); + t_comm += MPI_Wtime( ) - t_start; } - //TODO: apply SAI preconditioner here, comment out diagonal preconditioning above -// Sparse_MatVec_full( workspace->H_app_inv, workspace->r, workspace->d ); - - sig_old = sig_new; - sig_new = Parallel_Dot(workspace->r, workspace->p, system->n, mpi_data->world); - beta = sig_new / sig_old; - Vector_Sum( workspace->d, 1., workspace->p, beta, workspace->d, system->n ); - -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) +#if defined(NEUTRAL_TERRITORY) + else { - Update_Timing_Info( &t_start, &dot_time ); + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->n2, RVEC2_PTR_TYPE, mpi_data->mpi_rvec2); + t_comm += MPI_Wtime( ) - t_start; } #endif - } - if ( i >= 300 ) - { - fprintf( stderr, "CG convergence failed!\n" ); - return i; + gamma_old[0] = gamma_new[0]; + gamma_old[1] = gamma_new[1]; + + t_start = MPI_Wtime( ); + MPI_Wait( &req, MPI_STATUS_IGNORE ); + t_allreduce += MPI_Wtime( ) - t_start; + delta[0] = redux[0]; + delta[1] = redux[1]; + gamma_new[0] = redux[2]; + gamma_new[1] = redux[3]; + norm[0] = sqrt( redux[4] ); + norm[1] = sqrt( redux[5] ); } -#if defined(CG_PERFORMANCE) + timings[0] = t_pa; + timings[1] = t_spmv; + timings[2] = t_vops; + timings[3] = t_comm; + timings[4] = t_allreduce; + if ( system->my_rank == MASTER_NODE ) { - fprintf( fout, "QEq %d iters. matvecs: %f dot: %f\n", i, matvec_time, - dot_time ); - } -#endif - - return i; -} - - -int CG_test( reax_system *system, storage *workspace, sparse_matrix *H, - real *b, real tol, real *x, mpi_datatypes* mpi_data, FILE *fout ) -{ - int i, j, scale; - real tmp, alpha, beta, b_norm; - real sig_old, sig_new, sig0; + MPI_Reduce( MPI_IN_PLACE, timings, 5, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world ); - scale = sizeof(real) / sizeof(void); - b_norm = Parallel_Norm( b, system->n, mpi_data->world ); -#if defined(DEBUG) - if ( system->my_rank == MASTER_NODE ) + data->timing.cm_solver_pre_app += timings[0] / control->nprocs; + data->timing.cm_solver_spmv += timings[1] / control->nprocs; + data->timing.cm_solver_vector_ops += timings[2] / control->nprocs; + data->timing.cm_solver_comm += timings[3] / control->nprocs; + data->timing.cm_solver_allreduce += timings[4] / control->nprocs; + } + else { - fprintf( stderr, "n=%d, N=%d\n", system->n, system->N ); - fprintf( stderr, "p%d CGinit: b_norm=%24.15e\n", system->my_rank, b_norm ); - //Vector_Print( stderr, "d", workspace->d, system->N ); - //Vector_Print( stderr, "q", workspace->q, system->N ); + MPI_Reduce( timings, NULL, 5, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world ); } - MPI_Barrier( mpi_data->world ); -#endif - Sparse_MatVec( H, x, workspace->q, system->N ); - //Coll( system, mpi_data, workspace->q, MPI_DOUBLE, real_unpacker ); - - Vector_Sum( workspace->r , 1., b, -1., workspace->q, system->n ); - for ( j = 0; j < system->n; ++j ) - workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; //pre-condition + // continue to solve the system that has not converged yet + if ( norm[0] / b_norm[0] > tol ) + { + for ( j = 0; j < system->n; ++j ) + { + workspace->s[j] = workspace->x[j][0]; + } - sig_new = Parallel_Dot( workspace->r, workspace->d, system->n, - mpi_data->world ); - sig0 = sig_new; -#if defined(DEBUG) - //if( system->my_rank == MASTER_NODE ) { - fprintf( stderr, "p%d CG:sig_new=%24.15e,d_norm=%24.15e,q_norm=%24.15e\n", - system->my_rank, sqrt(sig_new), - Parallel_Norm(workspace->d, system->n, mpi_data->world), - Parallel_Norm(workspace->q, system->n, mpi_data->world) ); - //Vector_Print( stderr, "d", workspace->d, system->N ); - //Vector_Print( stderr, "q", workspace->q, system->N ); - //} - MPI_Barrier( mpi_data->world ); -#endif + i += PIPECG( system, control, data, workspace, + H, workspace->b_s, tol, workspace->s, mpi_data ); - for ( i = 1; i < 300 && sqrt(sig_new) / b_norm > tol; ++i ) + for ( j = 0; j < system->n; ++j ) + { + workspace->x[j][0] = workspace->s[j]; + } + } + else if ( norm[1] / b_norm[1] > tol ) { -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) - t_start = Get_Time( ); -#endif - Dist( system, mpi_data, workspace->d, MPI_DOUBLE, scale, real_packer ); - Sparse_MatVec( H, workspace->d, workspace->q, system->N ); - //tryQEq - //Coll(system, mpi_data, workspace->q, MPI_DOUBLE, real_unpacker); -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) + for ( j = 0; j < system->n; ++j ) { - t_elapsed = Get_Timing_Info( t_start ); - matvec_time += t_elapsed; + workspace->t[j] = workspace->x[j][1]; } -#endif -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) - t_start = Get_Time( ); -#endif - tmp = Parallel_Dot(workspace->d, workspace->q, system->n, mpi_data->world); - alpha = sig_new / tmp; -#if defined(DEBUG) - //if( system->my_rank == MASTER_NODE ){ - fprintf(stderr, - "p%d CG iter%d:d_norm=%24.15e,q_norm=%24.15e,tmp = %24.15e\n", - system->my_rank, i, - //Parallel_Norm(workspace->d, system->n, mpi_data->world), - //Parallel_Norm(workspace->q, system->n, mpi_data->world), - Norm(workspace->d, system->n), Norm(workspace->q, system->n), tmp); - //Vector_Print( stderr, "d", workspace->d, system->N ); - //for( j = 0; j < system->N; ++j ) - // fprintf( stdout, "%d %24.15e\n", - // system->my_atoms[j].orig_id, workspace->q[j] ); - //fprintf( stdout, "\n" ); - //} - MPI_Barrier( mpi_data->world ); -#endif + i += PIPECG( system, control, data, workspace, + H, workspace->b_t, tol, workspace->t, mpi_data ); - Vector_Add( x, alpha, workspace->d, system->n ); - Vector_Add( workspace->r, -alpha, workspace->q, system->n ); - /* pre-conditioning */ for ( j = 0; j < system->n; ++j ) - workspace->p[j] = workspace->r[j] * workspace->Hdia_inv[j]; - - sig_old = sig_new; - sig_new = Parallel_Dot(workspace->r, workspace->p, system->n, mpi_data->world); - beta = sig_new / sig_old; - Vector_Sum( workspace->d, 1., workspace->p, beta, workspace->d, system->n ); -#if defined(DEBUG) - if ( system->my_rank == MASTER_NODE ) - fprintf(stderr, "p%d CG iter%d: sig_new = %24.15e\n", - system->my_rank, i, sqrt(sig_new) ); - MPI_Barrier( mpi_data->world ); -#endif -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) { - t_elapsed = Get_Timing_Info( t_start ); - dot_time += t_elapsed; + workspace->x[j][1] = workspace->t[j]; } -#endif } -#if defined(DEBUG) - if ( system->my_rank == MASTER_NODE ) - fprintf( stderr, "CG took %d iterations\n", i ); -#endif -#if defined(CG_PERFORMANCE) - if ( system->my_rank == MASTER_NODE ) - fprintf( stderr, "%f %f\n", matvec_time, dot_time ); -#endif - if ( i >= 300 ) + if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE ) { - fprintf( stderr, "CG convergence failed!\n" ); + fprintf( stderr, "[WARNING] PIPECG convergence failed!\n" ); return i; } @@ -536,506 +2580,555 @@ int CG_test( reax_system *system, storage *workspace, sparse_matrix *H, } -void Forward_Subs( sparse_matrix *L, real *b, real *y ) +/* Pipelined Preconditioned Conjugate Gradient Method + * + * References: + * 1) Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm, + * P. Ghysels and W. Vanroose, Parallel Computing, 2014. + * 2) Scalable Non-blocking Preconditioned Conjugate Gradient Methods, + * Paul R. Eller and William Gropp, SC '16 Proceedings of the International Conference + * for High Performance Computing, Networking, Storage and Analysis, 2016. + * */ +int PIPECG( reax_system *system, control_params *control, simulation_data *data, + storage *workspace, sparse_matrix *H, real *b, + real tol, real *x, mpi_datatypes* mpi_data ) { - int i, pj, j, si, ei; - real val; + int i, j; + real alpha, beta, delta, gamma_old, gamma_new, norm, b_norm; + real t_start, t_pa, t_spmv, t_vops, t_comm, t_allreduce; + real timings[5], redux[4]; + MPI_Request req; + + t_pa = 0.0; + t_spmv = 0.0; + t_vops = 0.0; + t_comm = 0.0; + t_allreduce = 0.0; + + t_start = MPI_Wtime( ); + Dist( system, mpi_data, x, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( H, x, workspace->u, H->NT ); +#else + Sparse_MatVec( H, x, workspace->u, system->N ); +#endif + t_spmv += MPI_Wtime( ) - t_start; - for ( i = 0; i < L->n; ++i ) + if ( H->format == SYM_HALF_MATRIX ) { - y[i] = b[i]; - si = L->start[i]; - ei = L->end[i]; - for ( pj = si; pj < ei - 1; ++pj ) - { - j = L->entries[pj].j; - val = L->entries[pj].val; - y[i] -= val * y[j]; - } - y[i] /= L->entries[pj].val; + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->u, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; } -} - - -void Backward_Subs( sparse_matrix *U, real *y, real *x ) -{ - int i, pj, j, si, ei; - real val; - - for ( i = U->n - 1; i >= 0; --i ) +#if defined(NEUTRAL_TERRITORY) + else { - x[i] = y[i]; - si = U->start[i]; - ei = U->end[i]; - for ( pj = si + 1; pj < ei; ++pj ) - { - j = U->entries[pj].j; - val = U->entries[pj].val; - x[i] -= val * x[j]; - } - x[i] /= U->entries[si].val; + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->u, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; } -} - - -int PCG( reax_system *system, storage *workspace, - sparse_matrix *H, real *b, real tol, - sparse_matrix *L, sparse_matrix *U, real *x, - mpi_datatypes* mpi_data, FILE *fout ) -{ - int i, me, n, N, scale; - real tmp, alpha, beta, b_norm, r_norm, sig_old, sig_new; - MPI_Comm world; - - me = system->my_rank; - n = system->n; - N = system->N; - world = mpi_data->world; - scale = sizeof(real) / sizeof(void); - b_norm = Parallel_Norm( b, n, world ); -#if defined(DEBUG_FOCUS) - if ( me == MASTER_NODE ) - { - fprintf( stderr, "init_PCG: n=%d, N=%d\n", n, N ); - fprintf( stderr, "init_PCG: |b|=%24.15e\n", b_norm ); - } - MPI_Barrier( world ); #endif - Sparse_MatVec( H, x, workspace->q, N ); - //Coll( system, workspace, mpi_data, workspace->q ); - Vector_Sum( workspace->r , 1., b, -1., workspace->q, n ); - r_norm = Parallel_Norm( workspace->r, n, world ); + t_start = MPI_Wtime( ); + Vector_Sum( workspace->r , 1.0, b, -1.0, workspace->u, system->n ); + t_vops += MPI_Wtime( ) - t_start; - Forward_Subs( L, workspace->r, workspace->d ); - Backward_Subs( U, workspace->d, workspace->p ); - sig_new = Parallel_Dot( workspace->r, workspace->p, n, world ); -#if defined(DEBUG_FOCUS) - if ( me == MASTER_NODE ) + /* pre-conditioning */ + if ( control->cm_solver_pre_comp_type == NONE_PC ) { - fprintf( stderr, "init_PCG: sig_new=%.15e\n", r_norm ); - fprintf( stderr, "init_PCG: |d|=%.15e |q|=%.15e\n", - Parallel_Norm(workspace->d, n, world), - Parallel_Norm(workspace->q, n, world) ); + Vector_Copy( workspace->u, workspace->r, system->n ); } - MPI_Barrier( world ); -#endif - - for ( i = 1; i < 100 && r_norm / b_norm > tol; ++i ) + else if ( control->cm_solver_pre_comp_type == JACOBI_PC ) { - Dist( system, mpi_data, workspace->p, MPI_DOUBLE, scale, real_packer ); - Sparse_MatVec( H, workspace->p, workspace->q, N ); - // tryQEq - //Coll(system,mpi_data,workspace->q, MPI_DOUBLE, real_unpacker); - tmp = Parallel_Dot( workspace->q, workspace->p, n, world ); - alpha = sig_new / tmp; - Vector_Add( x, alpha, workspace->p, n ); -#if defined(DEBUG_FOCUS) - if ( me == MASTER_NODE ) - fprintf(stderr, "iter%d: |p|=%.15e |q|=%.15e tmp=%.15e\n", - i, Parallel_Norm(workspace->p, n, world), - Parallel_Norm(workspace->q, n, world), tmp ); - MPI_Barrier( world ); -#endif - - Vector_Add( workspace->r, -alpha, workspace->q, n ); - r_norm = Parallel_Norm( workspace->r, n, world ); -#if defined(DEBUG_FOCUS) - if ( me == MASTER_NODE ) - fprintf( stderr, "iter%d: res=%.15e\n", i, r_norm ); - MPI_Barrier( world ); -#endif - - Forward_Subs( L, workspace->r, workspace->d ); - Backward_Subs( U, workspace->d, workspace->d ); - sig_old = sig_new; - sig_new = Parallel_Dot( workspace->r, workspace->d, n, world ); - beta = sig_new / sig_old; - Vector_Sum( workspace->p, 1., workspace->d, beta, workspace->p, n ); + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) + { + workspace->u[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } + t_pa += MPI_Wtime( ) - t_start; } - -#if defined(DEBUG_FOCUS) - if ( me == MASTER_NODE ) - fprintf( stderr, "PCG took %d iterations\n", i ); + else if ( control->cm_solver_pre_comp_type == SAI_PC ) + { + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->r, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->u, H->NT ); +#else + Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->u, system->n ); #endif - if ( i >= 100 ) - fprintf( stderr, "PCG convergence failed!\n" ); - - return i; -} + t_pa += MPI_Wtime( ) - t_start; + } + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->u, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; -#if defined(OLD_STUFF) -int sCG( reax_system *system, storage *workspace, sparse_matrix *H, - real *b, real tol, real *x, mpi_datatypes* mpi_data, FILE *fout ) -{ - int i, j; - real tmp, alpha, beta, b_norm; - real sig_old, sig_new, sig0; + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( H, workspace->u, workspace->w, H->NT ); +#else + Sparse_MatVec( H, workspace->u, workspace->w, system->N ); +#endif + t_spmv += MPI_Wtime( ) - t_start; - b_norm = Norm( b, system->n ); -#if defined(DEBUG) - if ( system->my_rank == MASTER_NODE ) + if ( H->format == SYM_HALF_MATRIX ) { - fprintf( stderr, "n=%d, N=%d\n", system->n, system->N ); - fprintf( stderr, "p%d CGinit: b_norm=%24.15e\n", system->my_rank, b_norm ); - //Vector_Print( stderr, "d", workspace->d, system->N ); - //Vector_Print( stderr, "q", workspace->q, system->N ); + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->w, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + } +#if defined(NEUTRAL_TERRITORY) + else + { + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->w, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; } - MPI_Barrier( mpi_data->world ); #endif - Sparse_MatVec( H, x, workspace->q, system->N ); - //Coll_Vector( system, workspace, mpi_data, workspace->q ); + t_start = MPI_Wtime( ); + redux[0] = Dot_local( workspace->w, workspace->u, system->n ); + redux[1] = Dot_local( workspace->r, workspace->u, system->n ); + redux[2] = Dot_local( workspace->u, workspace->u, system->n ); + redux[3] = Dot_local( b, b, system->n ); + t_vops += MPI_Wtime( ) - t_start; - Vector_Sum( workspace->r , 1., b, -1., workspace->q, system->n ); - for ( j = 0; j < system->n; ++j ) - workspace->d[j] = workspace->r[j] * workspace->Hdia_inv[j]; //pre-condition + MPI_Iallreduce( MPI_IN_PLACE, redux, 4, MPI_DOUBLE, MPI_SUM, mpi_data->world, &req ); - sig_new = Dot( workspace->r, workspace->d, system->n ); - sig0 = sig_new; -#if defined(DEBUG) - if ( system->my_rank == MASTER_NODE ) + /* pre-conditioning */ + if ( control->cm_solver_pre_comp_type == NONE_PC ) { - fprintf( stderr, "p%d CGinit:sig_new=%24.15e\n", system->my_rank, sig_new ); - //Vector_Print( stderr, "d", workspace->d, system->N ); - //Vector_Print( stderr, "q", workspace->q, system->N ); + Vector_Copy( workspace->m, workspace->w, system->n ); } - MPI_Barrier( mpi_data->world ); -#endif - - for ( i = 1; i < 100 && sqrt(sig_new) / b_norm > tol; ++i ) + else if ( control->cm_solver_pre_comp_type == JACOBI_PC ) { - //Dist_Vector( system, mpi_data, workspace->d ); - Sparse_MatVec( H, workspace->d, workspace->q, system->N ); - //Coll_Vector( system, workspace, mpi_data, workspace->q ); - - tmp = Dot( workspace->d, workspace->q, system->n ); - alpha = sig_new / tmp; -#if defined(DEBUG) - if ( system->my_rank == MASTER_NODE ) + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) { - fprintf(stderr, - "p%d CG iter%d:d_norm=%24.15e,q_norm=%24.15e,tmp = %24.15e\n", - system->my_rank, i, - Parallel_Norm(workspace->d, system->n, mpi_data->world), - Parallel_Norm(workspace->q, system->n, mpi_data->world), tmp ); - //Vector_Print( stderr, "d", workspace->d, system->N ); - //Vector_Print( stderr, "q", workspace->q, system->N ); + workspace->m[j] = workspace->w[j] * workspace->Hdia_inv[j]; } - MPI_Barrier( mpi_data->world ); + t_pa += MPI_Wtime( ) - t_start; + } + else if ( control->cm_solver_pre_comp_type == SAI_PC ) + { + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->w, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( workspace->H_app_inv, workspace->w, workspace->m, H->NT ); +#else + Sparse_MatVec( workspace->H_app_inv, workspace->w, workspace->m, system->n ); #endif + t_pa += MPI_Wtime( ) - t_start; + } - Vector_Add( x, alpha, workspace->d, system->n ); - Vector_Add( workspace->r, -alpha, workspace->q, system->n ); - /* pre-conditioning */ - for ( j = 0; j < system->n; ++j ) - workspace->p[j] = workspace->r[j] * workspace->Hdia_inv[j]; - - sig_old = sig_new; - sig_new = Dot( workspace->r, workspace->p, system->n ); + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->m, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; - beta = sig_new / sig_old; - Vector_Sum( workspace->d, 1., workspace->p, beta, workspace->d, system->n ); -#if defined(DEBUG) - if ( system->my_rank == MASTER_NODE ) - fprintf(stderr, "p%d CG iter%d: sig_new = %24.15e\n", - system->my_rank, i, sig_new ); - MPI_Barrier( mpi_data->world ); + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( H, workspace->m, workspace->n, H->NT ); +#else + Sparse_MatVec( H, workspace->m, workspace->n, system->N ); #endif - } + t_spmv += MPI_Wtime( ) - t_start; -#if defined(DEBUG) - if ( system->my_rank == MASTER_NODE ) - fprintf( stderr, "CG took %d iterations\n", i ); -#endif - if ( i >= 100 ) + if ( H->format == SYM_HALF_MATRIX ) { - fprintf( stderr, "CG convergence failed!\n" ); - return i; + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->n, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; } - - return i; -} - - -int GMRES( reax_system *system, storage *workspace, sparse_matrix *H, - real *b, real tol, real *x, mpi_datatypes* mpi_data, FILE *fout ) -{ - int i, j, k, itr, N; - real cc, tmp1, tmp2, temp, bnorm; - - N = system->N; - bnorm = Norm( b, N ); - - /* apply the diagonal pre-conditioner to rhs */ - for ( i = 0; i < N; ++i ) - workspace->b_prc[i] = b[i] * workspace->Hdia_inv[i]; - - /* GMRES outer-loop */ - for ( itr = 0; itr < MAX_ITR; ++itr ) +#if defined(NEUTRAL_TERRITORY) + else { - /* calculate r0 */ - Sparse_MatVec( H, x, workspace->b_prm, N ); - for ( i = 0; i < N; ++i ) - workspace->b_prm[i] *= workspace->Hdia_inv[i]; // pre-conditioner - - Vector_Sum( workspace->v[0], - 1., workspace->b_prc, -1., workspace->b_prm, N ); - workspace->g[0] = Norm( workspace->v[0], N ); - Vector_Scale( workspace->v[0], - 1. / workspace->g[0], workspace->v[0], N ); + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->n, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + } +#endif - // fprintf( stderr, "%10.6f\n", workspace->g[0] ); + t_start = MPI_Wtime( ); + MPI_Wait( &req, MPI_STATUS_IGNORE ); + t_allreduce += MPI_Wtime( ) - t_start; + delta = redux[0]; + gamma_new = redux[1]; + norm = sqrt( redux[2] ); + b_norm = sqrt( redux[3] ); - /* GMRES inner-loop */ - for ( j = 0; j < RESTART && fabs(workspace->g[j]) / bnorm > tol; j++ ) + for ( i = 0; i < control->cm_solver_max_iters && norm / b_norm > tol; ++i ) + { + if ( i > 0 ) + { + beta = gamma_new / gamma_old; + alpha = gamma_new / (delta - beta / alpha * gamma_new); + } + else { - /* matvec */ - Sparse_MatVec( H, workspace->v[j], workspace->v[j + 1], N ); + beta = 0.0; + alpha = gamma_new / delta; + } - for ( k = 0; k < N; ++k ) - workspace->v[j + 1][k] *= workspace->Hdia_inv[k]; // pre-conditioner - // fprintf( stderr, "%d-%d: matvec done.\n", itr, j ); + t_start = MPI_Wtime( ); + Vector_Sum( workspace->z, 1.0, workspace->n, beta, workspace->z, system->n ); + Vector_Sum( workspace->q, 1.0, workspace->m, beta, workspace->q, system->n ); + Vector_Sum( workspace->p, 1.0, workspace->u, beta, workspace->p, system->n ); + Vector_Sum( workspace->d, 1.0, workspace->w, beta, workspace->d, system->n ); + Vector_Sum( x, 1.0, x, alpha, workspace->p, system->n ); + Vector_Sum( workspace->u, 1.0, workspace->u, -alpha, workspace->q, system->n ); + Vector_Sum( workspace->w, 1.0, workspace->w, -alpha, workspace->z, system->n ); + Vector_Sum( workspace->r, 1.0, workspace->r, -alpha, workspace->d, system->n ); + redux[0] = Dot_local( workspace->w, workspace->u, system->n ); + redux[1] = Dot_local( workspace->r, workspace->u, system->n ); + redux[2] = Dot_local( workspace->u, workspace->u, system->n ); + t_vops += MPI_Wtime( ) - t_start; + + MPI_Iallreduce( MPI_IN_PLACE, redux, 3, MPI_DOUBLE, MPI_SUM, mpi_data->world, &req ); - /* apply modified Gram-Schmidt to orthogonalize the new residual */ - for ( i = 0; i <= j; i++ ) + /* pre-conditioning */ + if ( control->cm_solver_pre_comp_type == NONE_PC ) + { + Vector_Copy( workspace->m, workspace->w, system->n ); + } + else if ( control->cm_solver_pre_comp_type == JACOBI_PC ) + { + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) { - workspace->h[i][j] = Dot(workspace->v[i], workspace->v[j + 1], N); - Vector_Add( workspace->v[j + 1], - -workspace->h[i][j], workspace->v[i], N ); + workspace->m[j] = workspace->w[j] * workspace->Hdia_inv[j]; } + t_pa += MPI_Wtime( ) - t_start; + } + else if ( control->cm_solver_pre_comp_type == SAI_PC ) + { + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->w, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( workspace->H_app_inv, workspace->w, workspace->m, H->NT ); +#else + Sparse_MatVec( workspace->H_app_inv, workspace->w, workspace->m, system->n ); +#endif + t_pa += MPI_Wtime( ) - t_start; + } - workspace->h[j + 1][j] = Norm( workspace->v[j + 1], N ); - Vector_Scale( workspace->v[j + 1], - 1. / workspace->h[j + 1][j], workspace->v[j + 1], N ); - // fprintf(stderr, "%d-%d: orthogonalization completed.\n", itr, j); - - /* Givens rotations on the H matrix to make it U */ - for ( i = 0; i <= j; i++ ) - { - if ( i == j ) - { - cc = sqrt(SQR(workspace->h[j][j]) + SQR(workspace->h[j + 1][j])); - workspace->hc[j] = workspace->h[j][j] / cc; - workspace->hs[j] = workspace->h[j + 1][j] / cc; - } - - tmp1 = workspace->hc[i] * workspace->h[i][j] + - workspace->hs[i] * workspace->h[i + 1][j]; - tmp2 = -workspace->hs[i] * workspace->h[i][j] + - workspace->hc[i] * workspace->h[i + 1][j]; - - workspace->h[i][j] = tmp1; - workspace->h[i + 1][j] = tmp2; - } + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->m, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; - /* apply Givens rotations to the rhs as well */ - tmp1 = workspace->hc[j] * workspace->g[j]; - tmp2 = -workspace->hs[j] * workspace->g[j]; - workspace->g[j] = tmp1; - workspace->g[j + 1] = tmp2; + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( H, workspace->m, workspace->n, H->NT ); +#else + Sparse_MatVec( H, workspace->m, workspace->n, system->N ); +#endif + t_spmv += MPI_Wtime( ) - t_start; - // fprintf( stderr, "%10.6f\n", fabs(workspace->g[j+1]) ); + if ( H->format == SYM_HALF_MATRIX ) + { + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->n, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; } - - /* solve Hy = g. - H is now upper-triangular, do back-substitution */ - for ( i = j - 1; i >= 0; i-- ) +#if defined(NEUTRAL_TERRITORY) + else { - temp = workspace->g[i]; - for ( k = j - 1; k > i; k-- ) - temp -= workspace->h[i][k] * workspace->y[k]; - workspace->y[i] = temp / workspace->h[i][i]; + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->n, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; } +#endif - /* update x = x_0 + Vy */ - for ( i = 0; i < j; i++ ) - Vector_Add( x, workspace->y[i], workspace->v[i], N ); + gamma_old = gamma_new; - /* stopping condition */ - if ( fabs(workspace->g[j]) / bnorm <= tol ) - break; + t_start = MPI_Wtime( ); + MPI_Wait( &req, MPI_STATUS_IGNORE ); + t_allreduce += MPI_Wtime( ) - t_start; + delta = redux[0]; + gamma_new = redux[1]; + norm = sqrt( redux[2] ); } - /*Sparse_MatVec( system, H, x, workspace->b_prm, mpi_data ); - for( i = 0; i < N; ++i ) - workspace->b_prm[i] *= workspace->Hdia_inv[i]; + timings[0] = t_pa; + timings[1] = t_spmv; + timings[2] = t_vops; + timings[3] = t_comm; + timings[4] = t_allreduce; - fprintf( fout, "\n%10s%15s%15s\n", "b_prc", "b_prm", "x" ); - for( i = 0; i < N; ++i ) - fprintf( fout, "%10.5f%15.12f%15.12f\n", - workspace->b_prc[i], workspace->b_prm[i], x[i] );*/ + if ( system->my_rank == MASTER_NODE ) + { + MPI_Reduce( MPI_IN_PLACE, timings, 5, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world ); - fprintf( fout, "GMRES outer: %d, inner: %d - |rel residual| = %15.10f\n", - itr, j, fabs( workspace->g[j] ) / bnorm ); + data->timing.cm_solver_pre_app += timings[0] / control->nprocs; + data->timing.cm_solver_spmv += timings[1] / control->nprocs; + data->timing.cm_solver_vector_ops += timings[2] / control->nprocs; + data->timing.cm_solver_comm += timings[3] / control->nprocs; + data->timing.cm_solver_allreduce += timings[4] / control->nprocs; + } + else + { + MPI_Reduce( timings, NULL, 5, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world ); + } - if ( itr >= MAX_ITR ) + if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE ) { - fprintf( stderr, "GMRES convergence failed\n" ); - return FAILURE; + fprintf( stderr, "[WARNING] PIPECG convergence failed!\n" ); + return i; } - return SUCCESS; + return i; } -int GMRES_HouseHolder( reax_system *system, storage *workspace, - sparse_matrix *H, real *b, real tol, real *x, - mpi_datatypes* mpi_data, FILE *fout ) +/* Pipelined Preconditioned Conjugate Residual Method + * + * References: + * 1) Hiding global synchronization latency in the preconditioned Conjugate Gradient algorithm, + * P. Ghysels and W. Vanroose, Parallel Computing, 2014. + * */ +int PIPECR( reax_system *system, control_params *control, simulation_data *data, + storage *workspace, sparse_matrix *H, real *b, + real tol, real *x, mpi_datatypes* mpi_data ) { - int i, j, k, itr, N; - real cc, tmp1, tmp2, temp, bnorm; - real v[10000], z[RESTART + 2][10000], w[RESTART + 2]; - real u[RESTART + 2][10000]; - - N = system->N; - bnorm = Norm( b, N ); - - /* apply the diagonal pre-conditioner to rhs */ - for ( i = 0; i < N; ++i ) - workspace->b_prc[i] = b[i] * workspace->Hdia_inv[i]; + int i, j; + real alpha, beta, delta, gamma_old, gamma_new, norm, b_norm; + real t_start, t_pa, t_spmv, t_vops, t_comm, t_allreduce; + real timings[5], redux[4]; + MPI_Request req; + + t_pa = 0.0; + t_spmv = 0.0; + t_vops = 0.0; + t_comm = 0.0; + t_allreduce = 0.0; + + t_start = MPI_Wtime( ); + Dist( system, mpi_data, x, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( H, x, workspace->u, H->NT ); +#else + Sparse_MatVec( H, x, workspace->u, system->N ); +#endif + t_spmv += MPI_Wtime( ) - t_start; - /* GMRES outer-loop */ - for ( itr = 0; itr < MAX_ITR; ++itr ) + if ( H->format == SYM_HALF_MATRIX ) { - /* compute z = r0 */ - Sparse_MatVec( H, x, workspace->b_prm, N ); - - for ( i = 0; i < N; ++i ) - workspace->b_prm[i] *= workspace->Hdia_inv[i]; /* pre-conditioner */ - - Vector_Sum( z[0], 1., workspace->b_prc, -1., workspace->b_prm, N ); - - Vector_MakeZero( w, RESTART + 1 ); - w[0] = Norm( z[0], N ); - - Vector_Copy( u[0], z[0], N ); - u[0][0] += ( u[0][0] < 0.0 ? -1 : 1 ) * w[0]; - Vector_Scale( u[0], 1 / Norm( u[0], N ), u[0], N ); + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->u, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + } +#if defined(NEUTRAL_TERRITORY) + else + { + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->u, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + } +#endif - w[0] *= ( u[0][0] < 0.0 ? 1 : -1 ); - // fprintf( stderr, "\n\n%12.6f\n", w[0] ); + t_start = MPI_Wtime( ); + Vector_Sum( workspace->r , 1.0, b, -1.0, workspace->u, system->n ); + t_vops += MPI_Wtime( ) - t_start; - /* GMRES inner-loop */ - for ( j = 0; j < RESTART && fabs( w[j] ) / bnorm > tol; j++ ) + /* pre-conditioning */ + if ( control->cm_solver_pre_comp_type == NONE_PC ) + { + Vector_Copy( workspace->u, workspace->r, system->n ); + } + else if ( control->cm_solver_pre_comp_type == JACOBI_PC ) + { + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) { - /* compute v_j */ - Vector_Scale( z[j], -2 * u[j][j], u[j], N ); - z[j][j] += 1.; /* due to e_j */ - - for ( i = j - 1; i >= 0; --i ) - Vector_Add( z[j] + i, -2 * Dot( u[i] + i, z[j] + i, N - i ), u[i] + i, N - i ); - - /* matvec */ - Sparse_MatVec( H, z[j], v, N ); - - for ( k = 0; k < N; ++k ) - v[k] *= workspace->Hdia_inv[k]; /* pre-conditioner */ - - for ( i = 0; i <= j; ++i ) - Vector_Add( v + i, -2 * Dot( u[i] + i, v + i, N - i ), u[i] + i, N - i ); - - if ( !Vector_isZero( v + (j + 1), N - (j + 1) ) ) - { - /* compute the HouseHolder unit vector u_j+1 */ - for ( i = 0; i <= j; ++i ) - u[j + 1][i] = 0; - - Vector_Copy( u[j + 1] + (j + 1), v + (j + 1), N - (j + 1) ); + workspace->u[j] = workspace->r[j] * workspace->Hdia_inv[j]; + } + t_pa += MPI_Wtime( ) - t_start; + } + else if ( control->cm_solver_pre_comp_type == SAI_PC ) + { + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->r, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->u, H->NT ); +#else + Sparse_MatVec( workspace->H_app_inv, workspace->r, workspace->u, system->n ); +#endif + t_pa += MPI_Wtime( ) - t_start; + } - u[j + 1][j + 1] += - ( v[j + 1] < 0.0 ? -1 : 1 ) * Norm( v + (j + 1), N - (j + 1) ); + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->u, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; - Vector_Scale( u[j + 1], 1 / Norm( u[j + 1], N ), u[j + 1], N ); + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( H, workspace->u, workspace->w, H->NT ); +#else + Sparse_MatVec( H, workspace->u, workspace->w, system->N ); +#endif + t_spmv += MPI_Wtime( ) - t_start; - /* overwrite v with P_m+1 * v */ - v[j + 1] -= - 2 * Dot( u[j + 1] + (j + 1), v + (j + 1), N - (j + 1) ) * u[j + 1][j + 1]; - Vector_MakeZero( v + (j + 2), N - (j + 2) ); - } + if ( H->format == SYM_HALF_MATRIX ) + { + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->w, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + } +#if defined(NEUTRAL_TERRITORY) + else + { + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->w, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + } +#endif + //TODO: better loop unrolling and termination condition check + norm = tol + 1.0; - /* previous Givens rotations on H matrix to make it U */ - for ( i = 0; i < j; i++ ) + // TODO: warning: b_norm might be uninitialized + for ( i = 0; i < control->cm_solver_max_iters && norm / b_norm > tol; ++i ) + { + /* pre-conditioning */ + if ( control->cm_solver_pre_comp_type == NONE_PC ) + { + Vector_Copy( workspace->m, workspace->w, system->n ); + } + else if ( control->cm_solver_pre_comp_type == JACOBI_PC ) + { + t_start = MPI_Wtime( ); + for ( j = 0; j < system->n; ++j ) { - tmp1 = workspace->hc[i] * v[i] + workspace->hs[i] * v[i + 1]; - tmp2 = -workspace->hs[i] * v[i] + workspace->hc[i] * v[i + 1]; - - v[i] = tmp1; - v[i + 1] = tmp2; + workspace->m[j] = workspace->w[j] * workspace->Hdia_inv[j]; } + t_pa += MPI_Wtime( ) - t_start; + } + else if ( control->cm_solver_pre_comp_type == SAI_PC ) + { + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->w, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( workspace->H_app_inv, workspace->w, workspace->m, H->NT ); +#else + Sparse_MatVec( workspace->H_app_inv, workspace->w, workspace->m, system->n ); +#endif + t_pa += MPI_Wtime( ) - t_start; + } - /* apply the new Givens rotation to H and right-hand side */ - if ( fabs(v[j + 1]) >= ALMOST_ZERO ) - { - cc = sqrt( SQR( v[j] ) + SQR( v[j + 1] ) ); - workspace->hc[j] = v[j] / cc; - workspace->hs[j] = v[j + 1] / cc; - - tmp1 = workspace->hc[j] * v[j] + workspace->hs[j] * v[j + 1]; - tmp2 = -workspace->hs[j] * v[j] + workspace->hc[j] * v[j + 1]; - - v[j] = tmp1; - v[j + 1] = tmp2; + t_start = MPI_Wtime( ); + redux[0] = Dot_local( workspace->w, workspace->u, system->n ); + redux[1] = Dot_local( workspace->m, workspace->w, system->n ); + redux[2] = Dot_local( workspace->u, workspace->u, system->n ); + redux[3] = Dot_local( b, b, system->n ); + t_vops += MPI_Wtime( ) - t_start; - /* Givens rotations to rhs */ - tmp1 = workspace->hc[j] * w[j]; - tmp2 = -workspace->hs[j] * w[j]; - w[j] = tmp1; - w[j + 1] = tmp2; - } + MPI_Iallreduce( MPI_IN_PLACE, redux, 4, MPI_DOUBLE, MPI_SUM, mpi_data->world, &req ); - /* extend R */ - for ( i = 0; i <= j; ++i ) - workspace->h[i][j] = v[i]; + t_start = MPI_Wtime( ); + Dist( system, mpi_data, workspace->m, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + t_start = MPI_Wtime( ); +#if defined(NEUTRAL_TERRITORY) + Sparse_MatVec( H, workspace->m, workspace->n, H->NT ); +#else + Sparse_MatVec( H, workspace->m, workspace->n, system->N ); +#endif + t_spmv += MPI_Wtime( ) - t_start; - // fprintf( stderr, "h:" ); - // for( i = 0; i <= j+1 ; ++i ) - // fprintf( stderr, "%.6f ", h[i][j] ); - // fprintf( stderr, "\n" ); - // fprintf( stderr, "%12.6f\n", w[j+1] ); + if ( H->format == SYM_HALF_MATRIX ) + { + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->n, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; } - - - /* solve Hy = w. - H is now upper-triangular, do back-substitution */ - for ( i = j - 1; i >= 0; i-- ) +#if defined(NEUTRAL_TERRITORY) + else { - temp = w[i]; - for ( k = j - 1; k > i; k-- ) - temp -= workspace->h[i][k] * workspace->y[k]; - - workspace->y[i] = temp / workspace->h[i][i]; + t_start = MPI_Wtime( ); + Coll( system, mpi_data, workspace->n, REAL_PTR_TYPE, MPI_DOUBLE ); + t_comm += MPI_Wtime( ) - t_start; + } +#endif + t_start = MPI_Wtime( ); + MPI_Wait( &req, MPI_STATUS_IGNORE ); + t_allreduce += MPI_Wtime( ) - t_start; + gamma_new = redux[0]; + delta = redux[1]; + norm = sqrt( redux[2] ); + b_norm = sqrt( redux[3] ); + + if ( i > 0 ) + { + beta = gamma_new / gamma_old; + alpha = gamma_new / (delta - beta / alpha * gamma_new); + } + else + { + beta = 0.0; + alpha = gamma_new / delta; } - for ( i = j - 1; i >= 0; i-- ) - Vector_Add( x, workspace->y[i], z[i], N ); - - /* stopping condition */ - if ( fabs( w[j] ) / bnorm <= tol ) - break; + t_start = MPI_Wtime( ); + Vector_Sum( workspace->z, 1.0, workspace->n, beta, workspace->z, system->n ); + Vector_Sum( workspace->q, 1.0, workspace->m, beta, workspace->q, system->n ); + Vector_Sum( workspace->p, 1.0, workspace->u, beta, workspace->p, system->n ); + Vector_Sum( workspace->d, 1.0, workspace->w, beta, workspace->d, system->n ); + Vector_Sum( x, 1.0, x, alpha, workspace->p, system->n ); + Vector_Sum( workspace->u, 1.0, workspace->u, -alpha, workspace->q, system->n ); + Vector_Sum( workspace->w, 1.0, workspace->w, -alpha, workspace->z, system->n ); + Vector_Sum( workspace->r, 1.0, workspace->r, -alpha, workspace->d, system->n ); + t_vops += MPI_Wtime( ) - t_start; + + gamma_old = gamma_new; } - // Sparse_MatVec( system, H, x, workspace->b_prm ); - // for( i = 0; i < N; ++i ) - // workspace->b_prm[i] *= workspace->Hdia_inv[i]; + timings[0] = t_pa; + timings[1] = t_spmv; + timings[2] = t_vops; + timings[3] = t_comm; + timings[4] = t_allreduce; - // fprintf( fout, "\n%10s%15s%15s\n", "b_prc", "b_prm", "x" ); - // for( i = 0; i < N; ++i ) - // fprintf( fout, "%10.5f%15.12f%15.12f\n", - // workspace->b_prc[i], workspace->b_prm[i], x[i] ); + if ( system->my_rank == MASTER_NODE ) + { + MPI_Reduce( MPI_IN_PLACE, timings, 5, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world ); - fprintf( fout, "GMRES outer:%d inner:%d iters, |rel residual| = %15.10f\n", - itr, j, fabs( workspace->g[j] ) / bnorm ); + data->timing.cm_solver_pre_app += timings[0] / control->nprocs; + data->timing.cm_solver_spmv += timings[1] / control->nprocs; + data->timing.cm_solver_vector_ops += timings[2] / control->nprocs; + data->timing.cm_solver_comm += timings[3] / control->nprocs; + data->timing.cm_solver_allreduce += timings[4] / control->nprocs; + } + else + { + MPI_Reduce( timings, NULL, 5, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world ); + } - if ( itr >= MAX_ITR ) + if ( i >= control->cm_solver_max_iters && system->my_rank == MASTER_NODE ) { - fprintf( stderr, "GMRES convergence failed\n" ); - return FAILURE; + fprintf( stderr, "[WARNING] PIPECR convergence failed!\n" ); + return i; } - return SUCCESS; + return i; } -#endif diff --git a/PuReMD/src/linear_solvers.h b/PuReMD/src/linear_solvers.h index 87c2f0ade19586169029b566aa8871d5a0794a77..701ee82d695150b7f4a8660ee843cf46cc6fa873 100644 --- a/PuReMD/src/linear_solvers.h +++ b/PuReMD/src/linear_solvers.h @@ -24,17 +24,27 @@ #include "reax_types.h" -int GMRES( reax_system*, storage*, sparse_matrix*, - real*, real, real*, mpi_datatypes*, FILE* ); -int GMRES_HouseHolder( reax_system*, storage*, sparse_matrix*, - real*, real, real*, mpi_datatypes*, FILE* ); -int dual_CG( reax_system*, storage*, sparse_matrix*, - rvec2*, real, rvec2*, mpi_datatypes*, FILE* ); -int CG( reax_system*, storage*, sparse_matrix*, - real*, real, real*, mpi_datatypes*, FILE* ); -int PCG( reax_system*, storage*, sparse_matrix*, real*, real, - sparse_matrix*, sparse_matrix*, real*, mpi_datatypes*, FILE* ); -int sCG( reax_system*, storage*, sparse_matrix*, - real*, real, real*, mpi_datatypes*, FILE* ); + +real setup_sparse_approx_inverse( reax_system*, simulation_data*, storage*, mpi_datatypes*, + sparse_matrix *, sparse_matrix **, int, double ); + +real sparse_approx_inverse( reax_system*, simulation_data*, storage*, mpi_datatypes*, + sparse_matrix*, sparse_matrix*, sparse_matrix**, int ); + +int dual_CG( reax_system*, control_params*, simulation_data*, storage*, sparse_matrix*, + rvec2*, real, rvec2*, mpi_datatypes* ); + +int CG( reax_system*, control_params*, simulation_data*, storage*, sparse_matrix*, + real*, real, real*, mpi_datatypes* ); + +int dual_PIPECG( reax_system*, control_params*, simulation_data*, storage*, sparse_matrix*, + rvec2*, real, rvec2*, mpi_datatypes* ); + +int PIPECG( reax_system*, control_params*, simulation_data*, storage*, sparse_matrix*, + real*, real, real*, mpi_datatypes* ); + +int PIPECR( reax_system*, control_params*, simulation_data*, storage*, sparse_matrix*, + real*, real, real*, mpi_datatypes* ); + #endif diff --git a/PuReMD/src/list.c b/PuReMD/src/list.c index 4ccb03ed94174c6a5bb184ae48687a6e40c73515..922f42cc62ad5b4ccf1494970c5e576bc72f766e 100644 --- a/PuReMD/src/list.c +++ b/PuReMD/src/list.c @@ -30,17 +30,19 @@ /************* allocate list space ******************/ -int Make_List(int n, int num_intrs, int type, reax_list *l, MPI_Comm comm) +int Make_List( int n, int num_intrs, int type, int format, + reax_list *l, MPI_Comm comm ) { l->allocated = 1; l->n = n; l->num_intrs = num_intrs; - l->index = (int*) smalloc( n * sizeof(int), "list:index", comm ); - l->end_index = (int*) smalloc( n * sizeof(int), "list:end_index", comm ); + l->index = smalloc( n * sizeof(int), "Make_List:index", comm ); + l->end_index = smalloc( n * sizeof(int), "Make_List:end_index", comm ); l->type = type; + l->format = format; #if defined(DEBUG_FOCUS) fprintf( stderr, "list: n=%d num_intrs=%d type=%d\n", l->n, l->num_intrs, l->type ); @@ -49,42 +51,48 @@ int Make_List(int n, int num_intrs, int type, reax_list *l, MPI_Comm comm) switch ( l->type ) { case TYP_VOID: - l->v = (void*) smalloc(l->num_intrs * sizeof(void*), "list:v", comm); + l->v = smalloc( l->num_intrs * sizeof(void*), + "Make_List:v", comm ); break; case TYP_THREE_BODY: - l->three_body_list = (three_body_interaction_data*) - smalloc( l->num_intrs * sizeof(three_body_interaction_data), - "list:three_bodies", comm ); + l->three_body_list = smalloc( l->num_intrs * sizeof(three_body_interaction_data), + "Make_List:three_bodies", comm ); break; case TYP_BOND: - l->bond_list = (bond_data*) - smalloc( l->num_intrs * sizeof(bond_data), "list:bonds", comm ); + l->bond_list = smalloc( l->num_intrs * sizeof(bond_data), + "Make_List:bonds", comm ); break; case TYP_DBO: - l->dbo_list = (dbond_data*) - smalloc( l->num_intrs * sizeof(dbond_data), "list:dbonds", comm ); + l->dbo_list = smalloc( l->num_intrs * sizeof(dbond_data), + "Make_List:dbonds", comm ); break; case TYP_DDELTA: - l->dDelta_list = (dDelta_data*) - smalloc( l->num_intrs * sizeof(dDelta_data), "list:dDeltas", comm ); + l->dDelta_list = smalloc( l->num_intrs * sizeof(dDelta_data), + "Make_List:dDeltas", comm ); break; case TYP_FAR_NEIGHBOR: - l->far_nbr_list = (far_neighbor_data*) - smalloc(l->num_intrs * sizeof(far_neighbor_data), "list:far_nbrs", comm); + l->far_nbr_list.nbr = smalloc( l->num_intrs * sizeof(int), + "Make_List:far_nbr_list.nbr", comm ); + l->far_nbr_list.rel_box = smalloc( l->num_intrs * sizeof(ivec), + "Make_List:far_nbr_list.rel_box", comm ); + l->far_nbr_list.d = smalloc( l->num_intrs * sizeof(real), + "Make_List:far_nbr_list.d", comm ); + l->far_nbr_list.dvec = smalloc( l->num_intrs * sizeof(rvec), + "Make_List:far_nbr_list.dvec", comm ); break; case TYP_HBOND: - l->hbond_list = (hbond_data*) - smalloc( l->num_intrs * sizeof(hbond_data), "list:hbonds", comm ); + l->hbond_list = smalloc( l->num_intrs * sizeof(hbond_data), + "Make_List:hbonds", comm ); break; default: - fprintf( stderr, "ERROR: no %d list type defined!\n", l->type ); + fprintf( stderr, "[ERROR]: no %d list type defined!\n", l->type ); MPI_Abort( comm, INVALID_INPUT ); } @@ -98,31 +106,34 @@ void Delete_List( reax_list *l, MPI_Comm comm ) return; l->allocated = 0; - sfree( l->index, "list:index" ); - sfree( l->end_index, "list:end_index" ); + sfree( l->index, "Delete_List:index" ); + sfree( l->end_index, "Delete_List:end_index" ); switch (l->type) { case TYP_VOID: - sfree( l->v, "list:v" ); + sfree( l->v, "Delete_List:v" ); break; case TYP_HBOND: - sfree( l->hbond_list, "list:hbonds" ); + sfree( l->hbond_list, "Delete_List:hbonds" ); break; case TYP_FAR_NEIGHBOR: - sfree( l->far_nbr_list, "list:far_nbrs" ); + sfree( l->far_nbr_list.nbr, "Delete_List:far_nbr_list.nbr" ); + sfree( l->far_nbr_list.rel_box, "Delete_List:far_nbr_list.rel_box" ); + sfree( l->far_nbr_list.d, "Delete_List:far_nbr_list.d" ); + sfree( l->far_nbr_list.dvec, "Delete_List:far_nbr_list.dvec" ); break; case TYP_BOND: - sfree( l->bond_list, "list:bonds" ); + sfree( l->bond_list, "Delete_List:bonds" ); break; case TYP_DBO: - sfree( l->dbo_list, "list:dbos" ); + sfree( l->dbo_list, "Delete_List:dbos" ); break; case TYP_DDELTA: - sfree( l->dDelta_list, "list:dDeltas" ); + sfree( l->dDelta_list, "Delete_List:dDeltas" ); break; case TYP_THREE_BODY: - sfree( l->three_body_list, "list:three_bodies" ); + sfree( l->three_body_list, "Delete_List:three_bodies" ); break; default: diff --git a/PuReMD/src/list.h b/PuReMD/src/list.h index da400b76e1303bf5e587e949f664bc93225de3dc..918256f595cc6994626693b5b74db1d14b4af0fc 100644 --- a/PuReMD/src/list.h +++ b/PuReMD/src/list.h @@ -25,7 +25,7 @@ #include "reax_types.h" -int Make_List( int, int, int, reax_list*, MPI_Comm ); +int Make_List( int, int, int, int, reax_list*, MPI_Comm ); void Delete_List( reax_list*, MPI_Comm ); diff --git a/PuReMD/src/neighbors.c b/PuReMD/src/neighbors.c index 5be0016eda64a6e4ec56f02d77695cd212fcdd55..a0701698b2f3ad1e5cfec0b7c5c903c5c7470eb1 100644 --- a/PuReMD/src/neighbors.c +++ b/PuReMD/src/neighbors.c @@ -74,14 +74,13 @@ void Generate_Neighbor_Lists( reax_system *system, simulation_data *data, grid *g; grid_cell *gci, *gcj; reax_list *far_nbrs; - far_neighbor_data *nbr_data; reax_atom *atom1, *atom2; #if defined(LOG_PERFORMANCE) real t_start = 0, t_elapsed = 0; if ( system->my_rank == MASTER_NODE ) - t_start = Get_Time( ); + t_start = MPI_Wtime(); #endif // fprintf( stderr, "\n\tentered nbrs - " ); @@ -91,7 +90,9 @@ void Generate_Neighbor_Lists( reax_system *system, simulation_data *data, /* first pick up a cell in the grid */ for ( i = 0; i < g->ncells[0]; i++ ) + { for ( j = 0; j < g->ncells[1]; j++ ) + { for ( k = 0; k < g->ncells[2]; k++ ) { gci = &(g->cells[i][j][k]); @@ -99,20 +100,37 @@ void Generate_Neighbor_Lists( reax_system *system, simulation_data *data, //fprintf( stderr, "gridcell %d %d %d\n", i, j, k ); /* pick up an atom from the current cell */ - for (l = gci->str; l < gci->end; ++l ) + for ( l = gci->str; l < gci->end; ++l ) { - atom1 = &(system->my_atoms[l]); + atom1 = &system->my_atoms[l]; +#if defined(NEUTRAL_TERRITORY) + if( gci->type >= NT_NBRS && gci->type < NT_NBRS + 6 ) + { + atom1->nt_dir = gci->type - NT_NBRS; + } + else + { + atom1->nt_dir = -1; + } +#endif Set_Start_Index( l, num_far, far_nbrs ); //fprintf( stderr, "\tatom %d\n", atom1 ); itr = 0; while ( (gcj = gci->nbrs[itr]) != NULL ) { - if ( gci->str <= gcj->str && - (DistSqr_to_Special_Point(gci->nbrs_cp[itr], atom1->x) <= cutoff) ) + if ( ((far_nbrs->format == HALF_LIST && gci->str <= gcj->str) + || far_nbrs->format == FULL_LIST) + && (DistSqr_to_Special_Point(gci->nbrs_cp[itr], atom1->x) <= cutoff) ) + { /* pick up another atom from the neighbor cell */ for ( m = gcj->str; m < gcj->end; ++m ) - if ( l < m ) // prevent recounting same pairs within a gcell + { + /* HALF_LIST: prevent recounting same pairs within a gcell and + * make half-list + * FULL_LIST: prevent recounting same pairs within a gcell */ + if ( (far_nbrs->format == HALF_LIST && l < m) + || (far_nbrs->format == FULL_LIST && l != m) ) { atom2 = &(system->my_atoms[m]); dvec[0] = atom2->x[0] - atom1->x[0]; @@ -121,31 +139,32 @@ void Generate_Neighbor_Lists( reax_system *system, simulation_data *data, d = rvec_Norm_Sqr( dvec ); if ( d <= cutoff ) { - nbr_data = &(far_nbrs->far_nbr_list[num_far]); - nbr_data->nbr = m; - nbr_data->d = sqrt(d); - rvec_Copy( nbr_data->dvec, dvec ); - //ivec_Copy( nbr_data->rel_box, gcj->rel_box ); - ivec_ScaledSum( nbr_data->rel_box, - 1, gcj->rel_box, -1, gci->rel_box ); + far_nbrs->far_nbr_list.nbr[num_far] = m; + far_nbrs->far_nbr_list.d[num_far] = sqrt(d); + rvec_Copy( far_nbrs->far_nbr_list.dvec[num_far], dvec ); + ivec_ScaledSum( far_nbrs->far_nbr_list.rel_box[num_far], + 1, gcj->rel_box, -1, gci->rel_box ); ++num_far; } } + } + } + ++itr; } + Set_End_Index( l, num_far, far_nbrs ); - //fprintf(stderr, "i:%d, start: %d, end: %d - itr: %d\n", - // atom1,Start_Index(atom1,far_nbrs),End_Index(atom1,far_nbrs), - // itr); } } + } + } workspace->realloc.num_far = num_far; #if defined(LOG_PERFORMANCE) if ( system->my_rank == MASTER_NODE ) { - t_elapsed = Get_Timing_Info( t_start ); + t_elapsed = MPI_Wtime() - t_start; data->timing.nbrs += t_elapsed; } #endif @@ -165,7 +184,8 @@ void Generate_Neighbor_Lists( reax_system *system, simulation_data *data, } -int Estimate_NumNeighbors( reax_system *system, reax_list **lists ) +int Estimate_NumNeighbors( reax_system *system, reax_list **lists, + int far_nbr_list_format ) { int i, j, k, l, m, itr, num_far; //, tmp, tested; real d, cutoff; @@ -180,7 +200,9 @@ int Estimate_NumNeighbors( reax_system *system, reax_list **lists ) /* first pick up a cell in the grid */ for ( i = 0; i < g->ncells[0]; i++ ) + { for ( j = 0; j < g->ncells[1]; j++ ) + { for ( k = 0; k < g->ncells[2]; k++ ) { gci = &(g->cells[i][j][k]); @@ -190,18 +212,34 @@ int Estimate_NumNeighbors( reax_system *system, reax_list **lists ) /* pick up an atom from the current cell */ for ( l = gci->str; l < gci->end; ++l ) { - atom1 = &(system->my_atoms[l]); + atom1 = &system->my_atoms[l]; +#if defined(NEUTRAL_TERRITORY) + if( gci->type >= NT_NBRS && gci->type < NT_NBRS + 6 ) + { + atom1->nt_dir = gci->type - NT_NBRS; + } + else + { + atom1->nt_dir = -1; + } +#endif //fprintf( stderr, "\tatom %d: ", l ); //tmp = num_far; tested = 0; itr = 0; while ( (gcj = gci->nbrs[itr]) != NULL ) { - if (gci->str <= gcj->str && - (DistSqr_to_Special_Point(gci->nbrs_cp[itr], atom1->x) <= cutoff)) - //fprintf( stderr, "\t\tgcell2: %d\n", itr ); + if ( ((far_nbr_list_format == HALF_LIST && gci->str <= gcj->str) + || far_nbr_list_format == FULL_LIST) + && (DistSqr_to_Special_Point(gci->nbrs_cp[itr], atom1->x) <= cutoff)) + { /* pick up another atom from the neighbor cell */ for ( m = gcj->str; m < gcj->end; ++m ) - if ( l < m ) + { + /* HALF_LIST: prevent recounting same pairs within a gcell and + * make half-list + * FULL_LIST: prevent recounting same pairs within a gcell */ + if ( (far_nbr_list_format == HALF_LIST && l < m) + || (far_nbr_list_format == FULL_LIST && l != m) ) { //fprintf( stderr, "\t\t\tatom2=%d\n", m ); atom2 = &(system->my_atoms[m]); @@ -212,13 +250,15 @@ int Estimate_NumNeighbors( reax_system *system, reax_list **lists ) if ( d <= cutoff ) ++num_far; } + } + } ++itr; } - //fprintf( stderr, "itr: %d, tested: %d, num_nbrs: %d\n", - // itr, tested, num_far-tmp ); } } + } + } #if defined(DEBUG_FOCUS) fprintf( stderr, "p%d: estimate nbrs done - num_far=%d\n", diff --git a/PuReMD/src/neighbors.h b/PuReMD/src/neighbors.h index 0a1e3daf289883268e77fbefd7f6a24deaa582dd..818fc7245cf39e132a7198907372f002784d8273 100644 --- a/PuReMD/src/neighbors.h +++ b/PuReMD/src/neighbors.h @@ -33,6 +33,6 @@ void Generate_Neighbor_Lists( reax_system*, simulation_data*, storage*, reax_list** ); -int Estimate_NumNeighbors( reax_system*, reax_list** ); +int Estimate_NumNeighbors( reax_system*, reax_list**, int ); #endif diff --git a/PuReMD/src/nonbonded.c b/PuReMD/src/nonbonded.c index ab25b807d5e0a0263a4d8f89ecc475c8615b8bdd..174bfada844ca271ac2e68c48aacd7014091540a 100644 --- a/PuReMD/src/nonbonded.c +++ b/PuReMD/src/nonbonded.c @@ -47,7 +47,6 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control, real e_ele, e_vdW, e_core; rvec temp, ext_press; two_body_parameters *twbp; - far_neighbor_data *nbr_pj; reax_list *far_nbrs; // rtensor temp_rtensor, total_rtensor; @@ -60,25 +59,25 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control, for ( i = 0; i < natoms; ++i ) { - start_i = Start_Index(i, far_nbrs); - end_i = End_Index(i, far_nbrs); + start_i = Start_Index( i, far_nbrs ); + end_i = End_Index( i, far_nbrs ); orig_i = system->my_atoms[i].orig_id; //fprintf( stderr, "i:%d, start_i: %d, end_i: %d\n", i, start_i, end_i ); for ( pj = start_i; pj < end_i; ++pj ) { - nbr_pj = &(far_nbrs->far_nbr_list[pj]); - j = nbr_pj->nbr; - orig_j = system->my_atoms[j].orig_id; + j = far_nbrs->far_nbr_list.nbr[pj]; + orig_j = system->my_atoms[j].orig_id; - if ( nbr_pj->d <= control->nonb_cut && (j < natoms || orig_i < orig_j) ) + if ( far_nbrs->far_nbr_list.d[pj] <= control->nonb_cut + && ((far_nbrs->format == HALF_LIST && (j < natoms || orig_i < orig_j)) + || (far_nbrs->format == FULL_LIST && orig_i < orig_j)) ) { - r_ij = nbr_pj->d; - twbp = &(system->reax_param.tbp[ system->my_atoms[i].type ] - [ system->my_atoms[j].type ]); + r_ij = far_nbrs->far_nbr_list.d[pj]; + twbp = &system->reax_param.tbp[ + system->my_atoms[i].type ][ system->my_atoms[j].type ]; /* Calculate Taper and its derivative */ - // Tap = nbr_pj->Tap; -- precomputed during compte_H Tap = workspace->Tap[7] * r_ij + workspace->Tap[6]; Tap = Tap * r_ij + workspace->Tap[5]; Tap = Tap * r_ij + workspace->Tap[4]; @@ -94,12 +93,13 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control, dTap = dTap * r_ij + 2 * workspace->Tap[2]; dTap += workspace->Tap[1] / r_ij; - /*vdWaals Calculations*/ - if (system->reax_param.gp.vdw_type == 1 || system->reax_param.gp.vdw_type == 3) + /* vdWaals Calculations */ + if ( system->reax_param.gp.vdw_type == 1 + || system->reax_param.gp.vdw_type == 3 ) { // shielding - powr_vdW1 = pow(r_ij, p_vdW1); - powgi_vdW1 = pow( 1.0 / twbp->gamma_w, p_vdW1); + powr_vdW1 = pow( r_ij, p_vdW1 ); + powgi_vdW1 = pow( 1.0 / twbp->gamma_w, p_vdW1 ); fn13 = pow( powr_vdW1 + powgi_vdW1, p_vdW1i ); exp1 = exp( twbp->alpha * (1.0 - fn13 / twbp->r_vdW) ); @@ -108,11 +108,11 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control, e_vdW = twbp->D * (exp1 - 2.0 * exp2); data->my_en.e_vdW += Tap * e_vdW; - dfn13 = pow( powr_vdW1 + powgi_vdW1, p_vdW1i - 1.0) * - pow(r_ij, p_vdW1 - 2.0); + dfn13 = pow( powr_vdW1 + powgi_vdW1, p_vdW1i - 1.0 ) + * pow( r_ij, p_vdW1 - 2.0 ); - CEvd = dTap * e_vdW - - Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2) * dfn13; + CEvd = dTap * e_vdW - Tap * twbp->D + * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2) * dfn13; } else // no shielding { @@ -154,24 +154,30 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control, if ( control->virial == 0 ) { - rvec_ScaledAdd( workspace->f[i], -(CEvd + CEclmb), nbr_pj->dvec ); - rvec_ScaledAdd( workspace->f[j], +(CEvd + CEclmb), nbr_pj->dvec ); + rvec_ScaledAdd( workspace->f[i], -(CEvd + CEclmb), + far_nbrs->far_nbr_list.dvec[pj] ); + rvec_ScaledAdd( workspace->f[j], +(CEvd + CEclmb), + far_nbrs->far_nbr_list.dvec[pj] ); } else /* NPT, iNPT or sNPT */ { /* for pressure coupling, terms not related to bond order derivatives are added directly into pressure vector/tensor */ - rvec_Scale( temp, CEvd + CEclmb, nbr_pj->dvec ); + rvec_Scale( temp, CEvd + CEclmb, + far_nbrs->far_nbr_list.dvec[pj] ); rvec_ScaledAdd( workspace->f[i], -1., temp ); rvec_Add( workspace->f[j], temp ); - rvec_iMultiply( ext_press, nbr_pj->rel_box, temp ); + rvec_iMultiply( ext_press, + far_nbrs->far_nbr_list.rel_box[pj], temp ); rvec_Add( data->my_ext_press, ext_press ); // fprintf( stderr, "nonbonded(%d,%d): rel_box (%f %f %f) // force(%f %f %f) ext_press (%12.6f %12.6f %12.6f)\n", - // i, j, nbr_pj->rel_box[0], nbr_pj->rel_box[1], nbr_pj->rel_box[2], + // i, j, far_nbrs->far_nbr_list.rel_box[pj][0], + // far_nbrs->far_nbr_list.rel_box[pj][1], + // far_nbrs->far_nbr_list.rel_box[pj][2], // temp[0], temp[1], temp[2], // data->ext_press[0], data->ext_press[1], data->ext_press[2] ); } @@ -193,10 +199,14 @@ void vdW_Coulomb_Energy( reax_system *system, control_params *control, e_ele, data->my_en.e_ele ); #endif #ifdef TEST_FORCES - rvec_ScaledAdd( workspace->f_vdw[i], -CEvd, nbr_pj->dvec ); - rvec_ScaledAdd( workspace->f_vdw[j], +CEvd, nbr_pj->dvec ); - rvec_ScaledAdd( workspace->f_ele[i], -CEclmb, nbr_pj->dvec ); - rvec_ScaledAdd( workspace->f_ele[j], +CEclmb, nbr_pj->dvec ); + rvec_ScaledAdd( workspace->f_vdw[i], -CEvd, + far_nbrs->far_nbr_list.dvec[pj] ); + rvec_ScaledAdd( workspace->f_vdw[j], +CEvd, + far_nbrs->far_nbr_list.dvec[pj] ); + rvec_ScaledAdd( workspace->f_ele[i], -CEclmb, + far_nbrs->far_nbr_list.dvec[pj] ); + rvec_ScaledAdd( workspace->f_ele[j], +CEclmb, + far_nbrs->far_nbr_list.dvec[pj] ); #endif } } @@ -225,7 +235,6 @@ void Tabulated_vdW_Coulomb_Energy( reax_system *system, control_params *control, real e_vdW, e_ele; real CEvd, CEclmb; rvec temp, ext_press; - far_neighbor_data *nbr_pj; reax_list *far_nbrs; LR_lookup_table *t; @@ -245,19 +254,19 @@ void Tabulated_vdW_Coulomb_Energy( reax_system *system, control_params *control, for ( pj = start_i; pj < end_i; ++pj ) { - nbr_pj = &(far_nbrs->far_nbr_list[pj]); - j = nbr_pj->nbr; - orig_j = system->my_atoms[j].orig_id; + j = far_nbrs->far_nbr_list.nbr[pj]; + orig_j = system->my_atoms[j].orig_id; - if ( nbr_pj->d <= control->nonb_cut && (j < natoms || orig_i < orig_j) ) + if ( far_nbrs->far_nbr_list.d[pj] <= control->nonb_cut + && ((far_nbrs->format == HALF_LIST && (j < natoms || orig_i < orig_j)) + || (far_nbrs->format == FULL_LIST && orig_i < orig_j)) ) { - j = nbr_pj->nbr; type_j = system->my_atoms[j].type; - r_ij = nbr_pj->d; - tmin = MIN( type_i, type_j ); - tmax = MAX( type_i, type_j ); - t = &( LR[tmin][tmax] ); - // table = &( LR[type_i][type_j] ); + r_ij = far_nbrs->far_nbr_list.d[pj]; + tmin = MIN( type_i, type_j ); + tmax = MAX( type_i, type_j ); + t = &LR[tmin][tmax]; + // table = &LR[type_i][type_j]; /* Cubic Spline Interpolation */ r = (int)(r_ij * t->inv_dx); @@ -288,19 +297,21 @@ void Tabulated_vdW_Coulomb_Energy( reax_system *system, control_params *control, if ( control->virial == 0 ) { - rvec_ScaledAdd( workspace->f[i], -(CEvd + CEclmb), nbr_pj->dvec ); - rvec_ScaledAdd( workspace->f[j], +(CEvd + CEclmb), nbr_pj->dvec ); + rvec_ScaledAdd( workspace->f[i], -(CEvd + CEclmb), + far_nbrs->far_nbr_list.dvec[pj] ); + rvec_ScaledAdd( workspace->f[j], +(CEvd + CEclmb), + far_nbrs->far_nbr_list.dvec[pj] ); } else // NPT, iNPT or sNPT { /* for pressure coupling, terms not related to bond order derivatives are added directly into pressure vector/tensor */ - rvec_Scale( temp, CEvd + CEclmb, nbr_pj->dvec ); + rvec_Scale( temp, CEvd + CEclmb, far_nbrs->far_nbr_list.dvec[pj] ); rvec_ScaledAdd( workspace->f[i], -1., temp ); rvec_Add( workspace->f[j], temp ); - rvec_iMultiply( ext_press, nbr_pj->rel_box, temp ); + rvec_iMultiply( ext_press, far_nbrs->far_nbr_list.rel_box[pj], temp ); rvec_Add( data->my_ext_press, ext_press ); } @@ -316,10 +327,14 @@ void Tabulated_vdW_Coulomb_Energy( reax_system *system, control_params *control, e_ele, data->my_en.e_ele ); #endif #ifdef TEST_FORCES - rvec_ScaledAdd( workspace->f_vdw[i], -CEvd, nbr_pj->dvec ); - rvec_ScaledAdd( workspace->f_vdw[j], +CEvd, nbr_pj->dvec ); - rvec_ScaledAdd( workspace->f_ele[i], -CEclmb, nbr_pj->dvec ); - rvec_ScaledAdd( workspace->f_ele[j], +CEclmb, nbr_pj->dvec ); + rvec_ScaledAdd( workspace->f_vdw[i], -CEvd, + far_nbrs->far_nbr_list.dvec[pj] ); + rvec_ScaledAdd( workspace->f_vdw[j], +CEvd, + far_nbrs->far_nbr_list.dvec[pj] ); + rvec_ScaledAdd( workspace->f_ele[i], -CEclmb, + far_nbrs->far_nbr_list.dvec[pj] ); + rvec_ScaledAdd( workspace->f_ele[j], +CEclmb, + far_nbrs->far_nbr_list.dvec[pj] ); #endif } } diff --git a/PuReMD/src/parallelreax.c b/PuReMD/src/parallelreax.c index 4b401ad603db53f5b982a2c7fc24861a9b9a6495..359aa1683f9489e0e11f0da53ed52804f2af338f 100644 --- a/PuReMD/src/parallelreax.c +++ b/PuReMD/src/parallelreax.c @@ -134,21 +134,15 @@ int main( int argc, char* argv[] ) } /* allocated main datastructures */ - system = (reax_system *) - smalloc( sizeof(reax_system), "system", MPI_COMM_WORLD ); - control = (control_params *) - smalloc( sizeof(control_params), "control", MPI_COMM_WORLD ); - data = (simulation_data *) - smalloc( sizeof(simulation_data), "data", MPI_COMM_WORLD ); - - workspace = (storage *) - smalloc( sizeof(storage), "workspace", MPI_COMM_WORLD ); - lists = (reax_list **) - smalloc( LIST_N * sizeof(reax_list*), "lists", MPI_COMM_WORLD ); + system = smalloc( sizeof(reax_system), "system", MPI_COMM_WORLD ); + control = smalloc( sizeof(control_params), "control", MPI_COMM_WORLD ); + data = smalloc( sizeof(simulation_data), "data", MPI_COMM_WORLD ); + + workspace = smalloc( sizeof(storage), "workspace", MPI_COMM_WORLD ); + lists = smalloc( LIST_N * sizeof(reax_list*), "lists", MPI_COMM_WORLD ); for ( i = 0; i < LIST_N; ++i ) { - lists[i] = (reax_list *) - smalloc( sizeof(reax_list), "lists[i]", MPI_COMM_WORLD ); + lists[i] = smalloc( sizeof(reax_list), "lists[i]", MPI_COMM_WORLD ); lists[i]->allocated = 0; lists[i]->n = 0; lists[i]->num_intrs = 0; @@ -160,25 +154,22 @@ int main( int argc, char* argv[] ) lists[i]->bond_list = NULL; lists[i]->dbo_list = NULL; lists[i]->dDelta_list = NULL; - lists[i]->far_nbr_list = NULL; lists[i]->hbond_list = NULL; } - out_control = (output_controls *) - smalloc( sizeof(output_controls), "out_control", MPI_COMM_WORLD ); - mpi_data = (mpi_datatypes *) - smalloc( sizeof(mpi_datatypes), "mpi_data", MPI_COMM_WORLD ); + out_control = smalloc( sizeof(output_controls), "out_control", MPI_COMM_WORLD ); + mpi_data = smalloc( sizeof(mpi_datatypes), "mpi_data", MPI_COMM_WORLD ); /* setup the parallel environment */ - MPI_Comm_size( MPI_COMM_WORLD, &(control->nprocs) ); - MPI_Comm_rank( MPI_COMM_WORLD, &(system->my_rank) ); + MPI_Comm_size( MPI_COMM_WORLD, &control->nprocs ); + MPI_Comm_rank( MPI_COMM_WORLD, &system->my_rank ); system->wsize = control->nprocs; - system->global_offset = (int*) - scalloc( system->wsize + 1, sizeof(int), "global_offset", MPI_COMM_WORLD ); + system->global_offset = scalloc( system->wsize + 1, sizeof(int), + "global_offset", MPI_COMM_WORLD ); /* read system description files */ Read_System( argv[1], argv[2], argv[3], system, control, data, workspace, out_control, mpi_data ); - + #if defined(DEBUG) fprintf( stderr, "p%d: read simulation info\n", system->my_rank ); MPI_Barrier( MPI_COMM_WORLD ); @@ -186,7 +177,9 @@ int main( int argc, char* argv[] ) /* measure total simulation time after input is read */ if ( system->my_rank == MASTER_NODE ) - t_start = Get_Time( ); + { + t_start = MPI_Wtime( ); + } /* initialize datastructures */ Initialize( system, control, data, workspace, lists, out_control, mpi_data ); @@ -211,26 +204,46 @@ int main( int argc, char* argv[] ) #endif /* start the simulation */ + int total_itr = data->timing.cm_solver_iters; for ( ++data->step; data->step <= control->nsteps; data->step++ ) { if ( control->T_mode ) + { Temperature_Control( control, data ); + } Evolve( system, control, data, workspace, lists, out_control, mpi_data ); Post_Evolve(system, control, data, workspace, lists, out_control, mpi_data); + + if ( system->my_rank == MASTER_NODE + && out_control->energy_update_freq > 0 + && data->step % out_control->energy_update_freq == 0 ) + { + total_itr += data->timing.cm_solver_iters; + } + Output_Results( system, control, data, lists, out_control, mpi_data ); //Analysis(system, control, data, workspace, lists, out_control, mpi_data); /* dump restart info */ - if ( out_control->restart_freq && - (data->step - data->prev_steps) % out_control->restart_freq == 0 ) + if ( out_control->restart_freq + && (data->step - data->prev_steps) % out_control->restart_freq == 0 ) { if ( out_control->restart_format == WRITE_ASCII ) + { Write_Restart( system, control, data, out_control, mpi_data ); + } else if ( out_control->restart_format == WRITE_BINARY ) + { Write_Binary_Restart( system, control, data, out_control, mpi_data ); + } } +// if ( data->step == 1 || data->step == control->nsteps ) +// { +// Write_PDB( system, lists, data, control, mpi_data, out_control ); +// } + #if defined(DEBUG) fprintf( stderr, "p%d: step%d completed\n", system->my_rank, data->step ); MPI_Barrier( mpi_data->world ); @@ -240,14 +253,15 @@ int main( int argc, char* argv[] ) /* end of the simulation, write total simulation time */ if ( system->my_rank == MASTER_NODE ) { - t_elapsed = Get_Timing_Info( t_start ); + t_elapsed = MPI_Wtime() - t_start; fprintf( out_control->out, "Total Simulation Time: %.2f secs\n", t_elapsed ); + fprintf( out_control->log, "Avg. # of Solver Itrs: %.2f\n", total_itr/((double)control->nsteps) ); } - // Write_PDB( &system, &(lists[BOND]), &out_control ); + //Write_PDB( system, lists, data, control, mpi_data, out_control ); Close_Output_Files( system, control, out_control, mpi_data ); - MPI_Finalize(); + MPI_Finalize( ); /* de-allocate data structures */ sfree( system, "system" ); diff --git a/PuReMD/src/qEq.c b/PuReMD/src/qEq.c index 15cc0249cab467b4813e1d6edeacc17de460472f..3ba18eeb8bb78533f4b894d7b28ff8b29b86f0a1 100644 --- a/PuReMD/src/qEq.c +++ b/PuReMD/src/qEq.c @@ -41,7 +41,7 @@ void Sort_Matrix_Rows( sparse_matrix *A ) si = A->start[i]; ei = A->end[i]; qsort( &(A->entries[si]), ei - si, - sizeof(sparse_matrix_entry), compare_matrix_entry ); + sizeof(sparse_matrix_entry), compare_matrix_entry ); } } @@ -103,7 +103,7 @@ int Estimate_LU_Fill( sparse_matrix *A, real *droptol ) void ICHOLT( sparse_matrix *A, real *droptol, - sparse_matrix *L, sparse_matrix *U ) + sparse_matrix *L, sparse_matrix *U ) { sparse_matrix_entry tmp[1000]; int i, j, pj, k1, k2, tmptop, Utop; @@ -231,52 +231,55 @@ void ICHOLT( sparse_matrix *A, real *droptol, void Init_MatVec( reax_system *system, simulation_data *data, - control_params *control, storage *workspace, - mpi_datatypes *mpi_data ) + control_params *control, storage *workspace, + mpi_datatypes *mpi_data ) { int i; //, fillin; reax_atom *atom; - /*if( (data->step - data->prev_steps) % control->refactor == 0 || - workspace->L == NULL ) { - //Print_Linear_System( system, control, workspace, data->step ); - Sort_Matrix_Rows( workspace->H ); - fprintf( stderr, "H matrix sorted\n" ); - Calculate_Droptol( workspace->H, workspace->droptol, control->droptol ); - fprintf( stderr, "drop tolerances calculated\n" ); - if( workspace->L == NULL ) { - fillin = Estimate_LU_Fill( workspace->H, workspace->droptol ); - - if( Allocate_Matrix( &(workspace->L), workspace->H->cap, fillin ) == 0 || - Allocate_Matrix( &(workspace->U), workspace->H->cap, fillin ) == 0 ) { - fprintf( stderr, "not enough memory for LU matrices. terminating.\n" ); - MPI_Abort( mpi_data->world, INSUFFICIENT_MEMORY ); + /*if( (data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0 || + workspace->L == NULL ) + { + //Print_Linear_System( system, control, workspace, data->step ); + Sort_Matrix_Rows( workspace->H ); + fprintf( stderr, "H matrix sorted\n" ); + Calculate_Droptol( workspace->H, workspace->droptol, control->cm_solver_pre_comp_droptol ); + fprintf( stderr, "drop tolerances calculated\n" ); + if( workspace->L == NULL ) + { + fillin = Estimate_LU_Fill( workspace->H, workspace->droptol ); + + if( Allocate_Matrix( &(workspace->L), workspace->H->cap, fillin, FULL_MATRIX, comm ) == 0 || + Allocate_Matrix( &(workspace->U), workspace->H->cap, fillin, FULL_MATRIX, comm ) == 0 ) + { + fprintf( stderr, "not enough memory for LU matrices. terminating.\n" ); + MPI_Abort( mpi_data->world, INSUFFICIENT_MEMORY ); + } + + workspace->L->n = workspace->H->n; + workspace->U->n = workspace->H->n; +#if defined(DEBUG_FOCUS) + fprintf( stderr, "p%d: n=%d, fillin = %d\n", + system->my_rank, workspace->L->n, fillin ); + fprintf( stderr, "p%d: allocated memory: L = U = %ldMB\n", + system->my_rank,fillin*sizeof(sparse_matrix_entry)/(1024*1024) ); +#endif } - workspace->L->n = workspace->H->n; - workspace->U->n = workspace->H->n; - #if defined(DEBUG_FOCUS) - fprintf( stderr, "p%d: n=%d, fillin = %d\n", - system->my_rank, workspace->L->n, fillin ); - fprintf( stderr, "p%d: allocated memory: L = U = %ldMB\n", - system->my_rank,fillin*sizeof(sparse_matrix_entry)/(1024*1024) ); - #endif - } - - ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U ); - #if defined(DEBUG_FOCUS) - fprintf( stderr, "p%d: icholt finished\n", system->my_rank ); - //sprintf( fname, "%s.L%d.out", control->sim_name, data->step ); - //Print_Sparse_Matrix2( workspace->L, fname ); - //Print_Sparse_Matrix( U ); - #endif + ICHOLT( workspace->H, workspace->droptol, workspace->L, workspace->U ); +#if defined(DEBUG_FOCUS) + fprintf( stderr, "p%d: icholt finished\n", system->my_rank ); + //sprintf( fname, "%s.L%d.out", control->sim_name, data->step ); + //Print_Sparse_Matrix2( workspace->L, fname ); + //Print_Sparse_Matrix( U ); +#endif }*/ //TODO: fill in code for setting up and computing SAI, see sPuReMD code, // and remove diagonal preconditioner computation below (workspace->Hdia_inv) -// setup_sparse_approx_inverse( Hptr, &workspace->H_full, &workspace->H_spar_patt, -// &workspace->H_spar_patt_full, &workspace->H_app_inv, -// control->cm_solver_pre_comp_sai_thres ); + // setup_sparse_approx_inverse( Hptr, &workspace->H_full, &workspace->H_spar_patt, + // &workspace->H_spar_patt_full, &workspace->H_app_inv, + // control->cm_solver_pre_comp_sai_thres ); for ( i = 0; i < system->n; ++i ) { @@ -313,16 +316,15 @@ void Init_MatVec( reax_system *system, simulation_data *data, void Calculate_Charges( reax_system *system, storage *workspace, - mpi_datatypes *mpi_data ) + mpi_datatypes *mpi_data ) { - int i, scale; - real u;//, s_sum, t_sum; - rvec2 my_sum, all_sum; + int i; + real u;//, s_sum, t_sum; + rvec2 my_sum, all_sum; reax_atom *atom; real *q; - scale = sizeof(real) / sizeof(void); - q = (real*) malloc(system->N * sizeof(real)); + q = malloc( system->N * sizeof(real) ); //s_sum = Parallel_Vector_Acc(workspace->s, system->n, mpi_data->world); //t_sum = Parallel_Vector_Acc(workspace->t, system->n, mpi_data->world); @@ -347,73 +349,210 @@ void Calculate_Charges( reax_system *system, storage *workspace, atom->s[3] = atom->s[2]; atom->s[2] = atom->s[1]; atom->s[1] = atom->s[0]; - //atom->s[0] = workspace->s[i]; atom->s[0] = workspace->x[i][0]; atom->t[3] = atom->t[2]; atom->t[2] = atom->t[1]; atom->t[1] = atom->t[0]; - //atom->t[0] = workspace->t[i]; atom->t[0] = workspace->x[i][1]; } - Dist( system, mpi_data, q, MPI_DOUBLE, scale, real_packer ); + Dist_FS( system, mpi_data, q, REAL_PTR_TYPE, MPI_DOUBLE ); + for ( i = system->n; i < system->N; ++i ) + { system->my_atoms[i].q = q[i]; + } - sfree(q, "q"); + sfree( q, "q" ); +} + + +static void Setup_Preconditioner_QEq( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, mpi_datatypes *mpi_data ) +{ + real time, t_sort, t_pc, total_sort, total_pc; + + /* sort H needed for SpMV's in linear solver, H or H_sp needed for preconditioning */ + time = MPI_Wtime(); + Sort_Matrix_Rows( workspace->H ); + t_sort = MPI_Wtime() - time; + + t_pc = setup_sparse_approx_inverse( system, data, workspace, mpi_data, workspace->H, &workspace->H_spar_patt, + control->nprocs, control->cm_solver_pre_comp_sai_thres ); + + + MPI_Reduce(&t_sort, &total_sort, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world); + MPI_Reduce(&t_pc, &total_pc, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world); + + if( system->my_rank == MASTER_NODE ) + { + data->timing.cm_sort += total_sort / control->nprocs; + data->timing.cm_solver_pre_comp += total_pc / control->nprocs; + } +} + +static void Compute_Preconditioner_QEq( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, mpi_datatypes *mpi_data ) +{ + real t_pc, total_pc; +#if defined(HAVE_LAPACKE) || defined(HAVE_LAPACKE_MKL) + t_pc = sparse_approx_inverse( system, data, workspace, mpi_data, + workspace->H, workspace->H_spar_patt, &workspace->H_app_inv, control->nprocs ); + + MPI_Reduce( &t_pc, &total_pc, 1, MPI_DOUBLE, MPI_SUM, MASTER_NODE, mpi_data->world ); + + if( system->my_rank == MASTER_NODE ) + { + data->timing.cm_solver_pre_comp += total_pc / control->nprocs; + } +#else + fprintf( stderr, "[ERROR] LAPACKE support disabled. Re-compile before enabling. Terminating...\n" ); + exit( INVALID_INPUT ); +#endif } void QEq( reax_system *system, control_params *control, simulation_data *data, - storage *workspace, output_controls *out_control, - mpi_datatypes *mpi_data ) + storage *workspace, output_controls *out_control, + mpi_datatypes *mpi_data ) { - int j, s_matvecs, t_matvecs; + int j, iters; + + iters = 0; Init_MatVec( system, data, control, workspace, mpi_data ); - //if( data->step == 50010 ) { - // Print_Linear_System( system, control, workspace, data->step ); - // } #if defined(DEBUG) fprintf( stderr, "p%d: initialized qEq\n", system->my_rank ); //Print_Linear_System( system, control, workspace, data->step ); #endif - //s_matvecs = dual_CG(system, workspace, workspace->H, workspace->b, - // control->cm_solver_q_err, workspace->x, mpi_data, out_control->log); - //t_matvecs = 0; + if( control->cm_solver_pre_comp_type == SAI_PC ) + { + if( control->cm_solver_pre_comp_refactor > 0 + && ((data->step - data->prev_steps) % control->cm_solver_pre_comp_refactor == 0)) + { + Setup_Preconditioner_QEq( system, control, data, workspace, mpi_data ); - for ( j = 0; j < system->n; ++j ) - workspace->s[j] = workspace->x[j][0]; - s_matvecs = CG(system, workspace, workspace->H, workspace->b_s,//newQEq sCG - control->cm_solver_q_err, workspace->s, mpi_data, out_control->log ); - for ( j = 0; j < system->n; ++j ) - workspace->x[j][0] = workspace->s[j]; + Compute_Preconditioner_QEq( system, control, data, workspace, mpi_data ); + } + } + + //TODO: used for timing to sync processors going into the linear solve, but remove for production code + MPI_Barrier( mpi_data->world ); - //s_matvecs = PCG( system, workspace, workspace->H, workspace->b_s, - // control->cm_solver_q_err, workspace->L, workspace->U, workspace->s, - // mpi_data, out_control->log ); -#if defined(DEBUG) - fprintf( stderr, "p%d: first CG completed\n", system->my_rank ); + switch ( control->cm_solver_type ) + { + case CG_S: +#if defined(DUAL_SOLVER) + iters = dual_CG( system, control, data, workspace, workspace->H, workspace->b, + control->cm_solver_q_err, workspace->x, mpi_data ); +#else + for ( j = 0; j < system->n; ++j ) + { + workspace->s[j] = workspace->x[j][0]; + } + + iters = CG( system, control, data, workspace, workspace->H, workspace->b_s, + control->cm_solver_q_err, workspace->s, mpi_data ); + + for ( j = 0; j < system->n; ++j ) + { + workspace->x[j][0] = workspace->s[j]; + } + + for ( j = 0; j < system->n; ++j ) + { + workspace->t[j] = workspace->x[j][1]; + } + + iters += CG( system, control, data, workspace, workspace->H, workspace->b_t, + control->cm_solver_q_err, workspace->t, mpi_data ); + + for ( j = 0; j < system->n; ++j ) + { + workspace->x[j][1] = workspace->t[j]; + } #endif + break; + + case PIPECG_S: +#if defined(DUAL_SOLVER) + iters = dual_PIPECG( system, control, data, workspace, workspace->H, workspace->b, + control->cm_solver_q_err, workspace->x, mpi_data ); +#else + for ( j = 0; j < system->n; ++j ) + { + workspace->s[j] = workspace->x[j][0]; + } - for ( j = 0; j < system->n; ++j ) - workspace->t[j] = workspace->x[j][1]; - t_matvecs = CG(system, workspace, workspace->H, workspace->b_t,//newQEq sCG - control->cm_solver_q_err, workspace->t, mpi_data, out_control->log ); - for ( j = 0; j < system->n; ++j ) - workspace->x[j][1] = workspace->t[j]; + iters = PIPECG( system, control, data, workspace, workspace->H, workspace->b_s, + control->cm_solver_q_err, workspace->s, mpi_data ); - //t_matvecs = PCG( system, workspace, workspace->H, workspace->b_t, - // control->cm_solver_q_err, workspace->L, workspace->U, workspace->t, - // mpi_data, out_control->log ); -#if defined(DEBUG) - fprintf( stderr, "p%d: second CG completed\n", system->my_rank ); + for ( j = 0; j < system->n; ++j ) + { + workspace->x[j][0] = workspace->s[j]; + } + + for ( j = 0; j < system->n; ++j ) + { + workspace->t[j] = workspace->x[j][1]; + } + + iters += PIPECG( system, control, data, workspace, workspace->H, workspace->b_t, + control->cm_solver_q_err, workspace->t, mpi_data ); + + for ( j = 0; j < system->n; ++j ) + { + workspace->x[j][1] = workspace->t[j]; + } #endif + break; + + case PIPECR_S: + for ( j = 0; j < system->n; ++j ) + { + workspace->s[j] = workspace->x[j][0]; + } + + iters = PIPECR( system, control, data, workspace, workspace->H, workspace->b_s, + control->cm_solver_q_err, workspace->s, mpi_data ); + + for ( j = 0; j < system->n; ++j ) + { + workspace->x[j][0] = workspace->s[j]; + } + + for ( j = 0; j < system->n; ++j ) + { + workspace->t[j] = workspace->x[j][1]; + } + + iters += PIPECR( system, control, data, workspace, workspace->H, workspace->b_t, + control->cm_solver_q_err, workspace->t, mpi_data ); + + for ( j = 0; j < system->n; ++j ) + { + workspace->x[j][1] = workspace->t[j]; + } + break; + + case GMRES_S: + case GMRES_H_S: + case SDM_S: + case BiCGStab_S: + fprintf( stderr, "[ERROR] Unsupported solver selection. Terminating...\n" ); + break; + + default: + fprintf( stderr, "[ERROR] Unrecognized solver selection. Terminating...\n" ); + exit( INVALID_INPUT ); + break; + } Calculate_Charges( system, workspace, mpi_data ); + #if defined(DEBUG) fprintf( stderr, "p%d: computed charges\n", system->my_rank ); //Print_Charges( system ); @@ -422,7 +561,7 @@ void QEq( reax_system *system, control_params *control, simulation_data *data, #if defined(LOG_PERFORMANCE) if ( system->my_rank == MASTER_NODE ) { - data->timing.cm_solver_iters += s_matvecs + t_matvecs; + data->timing.cm_solver_iters += iters; } #endif } diff --git a/PuReMD/src/reax_defs.h b/PuReMD/src/reax_defs.h index a61ce245a5fc29c580158f4f425805dd16506c5a..4bd9d740b23d0f5d90d118e0783808ebcb901e7e 100644 --- a/PuReMD/src/reax_defs.h +++ b/PuReMD/src/reax_defs.h @@ -26,10 +26,10 @@ #define inline __inline__ #endif /*IBMC*/ -#define SUCCESS 1 -#define FAILURE 0 -#define TRUE 1 -#define FALSE 0 +#define SUCCESS (1) +#define FAILURE (0) +#define TRUE (1) +#define FALSE (0) #define SQR(x) ((x)*(x)) #define CUBE(x) ((x)*(x)*(x)) @@ -39,66 +39,67 @@ #define MIN(x,y) (((x) < (y)) ? (x) : (y)) #define MAX3(x,y,z) MAX( MAX(x,y), z) -#define constPI 3.14159265 -#define C_ele 332.06371 -//#define K_B 503.398008 // kcal/mol/K -#define K_B 0.831687 // amu A^2 / ps^2 / K -#define F_CONV 1e6 / 48.88821291 / 48.88821291 // --> amu A / ps^2 -#define E_CONV 0.002391 // amu A^2 / ps^2 --> kcal/mol -#define EV_to_KCALpMOL 14.400000 // ElectronVolt --> KCAL per MOLe -#define KCALpMOL_to_EV 23.02 // 23.060549 //KCAL per MOLe --> ElectronVolt -#define ECxA_to_DEBYE 4.803204 // elem. charge * Ang -> debye -#define CAL_to_JOULES 4.184000 // CALories --> JOULES -#define JOULES_to_CAL 1/4.184000 // JOULES --> CALories -#define AMU_to_GRAM 1.6605e-24 -#define ANG_to_CM 1e-8 -#define AVOGNR 6.0221367e23 -#define P_CONV 1e-24 * AVOGNR * JOULES_to_CAL - -#define MAX_STR 1024 -#define MAX_LINE 1024 -#define MAX_TOKENS 1024 -#define MAX_TOKEN_LEN 1024 - -#define MAX_ATOM_ID 100000 -#define MAX_RESTRICT 15 -#define MAX_MOLECULE_SIZE 20 -#define MAX_ATOM_TYPES 25 - -#define NUM_INTRS 10 -#define ALMOST_ZERO 1e-10 -#define NEG_INF -1e10 -#define NO_BOND 1e-3 // 0.001 -#define HB_THRESHOLD 1e-2 // 0.01 - -#define MIN_CAP 50 -#define MIN_NBRS 100 -#define MIN_HENTRIES 100 -#define MAX_BONDS 30 -#define MIN_BONDS 15 -#define MIN_HBONDS 25 -#define MIN_3BODIES 1000 -#define MIN_GCELL_POPL 50 -#define MIN_SEND 100 -#define SAFE_ZONE 1.2 -#define SAFER_ZONE 1.4 -#define DANGER_ZONE 0.90 -#define LOOSE_ZONE 0.75 -#define MAX_3BODY_PARAM 5 -#define MAX_4BODY_PARAM 5 - -#define MAX_dV 1.01 -#define MIN_dV 0.99 -#define MAX_dT 4.00 -#define MIN_dT 0.00 - -#define MASTER_NODE 0 -#define MAX_NBRS 6 //27 -#define MYSELF 13 // encoding of relative coordinate (0,0,0) - -#define MAX_ITR 10 -#define RESTART 30 - +#define constPI (3.14159265) +#define C_ele (332.06371) +//#define K_B (503.398008) // kcal/mol/K +#define K_B (0.831687) // amu A^2 / ps^2 / K +#define F_CONV (1e6 / 48.88821291 / 48.88821291) // --> amu A / ps^2 +#define E_CONV (0.002391) // amu A^2 / ps^2 --> kcal/mol +#define EV_to_KCALpMOL (14.400000) // ElectronVolt --> KCAL per MOLe +#define KCALpMOL_to_EV (23.02) // 23.060549 //KCAL per MOLe --> ElectronVolt +#define ECxA_to_DEBYE (4.803204) // elem. charge * Ang -> debye +#define CAL_to_JOULES (4.184000) // CALories --> JOULES +#define JOULES_to_CAL (1/4.184000) // JOULES --> CALories +#define AMU_to_GRAM (1.6605e-24) +#define ANG_to_CM (1e-8) +#define AVOGNR (6.0221367e23) +#define P_CONV (1e-24 * AVOGNR * JOULES_to_CAL) + +#define MAX_STR (1024) +#define MAX_LINE (1024) +#define MAX_TOKENS (1024) +#define MAX_TOKEN_LEN (1024) + +#define MAX_ATOM_ID (100000) +#define MAX_RESTRICT (15) +#define MAX_MOLECULE_SIZE (20) +#define MAX_ATOM_TYPES (25) + +#define NUM_INTRS (10) +#define ALMOST_ZERO (1e-10) +#define NEG_INF (-1e10) +#define NO_BOND (1e-3) // 0.001 +#define HB_THRESHOLD (1e-2) // 0.01 + +#define MIN_CAP (50) +#define MIN_NBRS (100) +#define MIN_HENTRIES (100) +#define MAX_BONDS (30) +#define MIN_BONDS (15) +#define MIN_HBONDS (25) +#define MIN_3BODIES (1000) +#define MIN_GCELL_POPL (50) +#define MIN_SEND (100) +#define SAFE_ZONE (1.2) +#define SAFER_ZONE (1.4) +#define SAFE_ZONE_NT (2.0) +#define SAFER_ZONE_NT (2.5) +#define DANGER_ZONE (0.90) +#define LOOSE_ZONE (0.75) +#define MAX_3BODY_PARAM (5) +#define MAX_4BODY_PARAM (5) + +#define MAX_dV (1.01) +#define MIN_dV (0.99) +#define MAX_dT (4.00) +#define MIN_dT (0.00) + +#define MASTER_NODE (0) +#define MAX_NBRS (6) //27 +#define MYSELF (13) // encoding of relative coordinate (0,0,0) + +#define MAX_ITR (10) +#define RESTART (30) /******************* ENUMERATIONS *************************/ @@ -125,15 +126,17 @@ enum message_tags { INIT = 0, UPDATE = 1, BNDRY = 2, UPDATE_BNDRY = 3, enum errors { FILE_NOT_FOUND = -10, UNKNOWN_ATOM_TYPE = -11, CANNOT_OPEN_FILE = -12, CANNOT_INITIALIZE = -13, INSUFFICIENT_MEMORY = -14, UNKNOWN_OPTION = -15, - INVALID_INPUT = -16, INVALID_GEO = -17 + INVALID_INPUT = -16, INVALID_GEO = -17, + RUNTIME_ERROR = -18, }; enum exchanges { NONE = 0, NEAR_EXCH = 1, FULL_EXCH = 2 }; enum gcell_types { NO_NBRS = 0, NEAR_ONLY = 1, HBOND_ONLY = 2, FAR_ONLY = 4, NEAR_HBOND = 3, NEAR_FAR = 5, HBOND_FAR = 6, FULL_NBRS = 7, - NATIVE = 8 + NATIVE = 8, NT_NBRS = 9 // 9 through 14 }; +enum nt_atom_type { TOWER = 1, PLATE = 2 }; enum atoms { C_ATOM = 0, H_ATOM = 1, O_ATOM = 2, N_ATOM = 3, S_ATOM = 4, SI_ATOM = 5, GE_ATOM = 6, X_ATOM = 7 diff --git a/PuReMD/src/reax_types.h b/PuReMD/src/reax_types.h index dfaa9c37af08cb5ddbc695d44555c2accd673ac7..136a5ff5728b9ee859355cfdea4a0bbe58614e65 100644 --- a/PuReMD/src/reax_types.h +++ b/PuReMD/src/reax_types.h @@ -40,6 +40,8 @@ /************* SOME DEFS - crucial for reax_types.h *********/ #define PURE_REAX +#define DUAL_SOLVER +//#define NEUTRAL_TERRITORY //#define LAMMPS_REAX //#define DEBUG //#define DEBUG_FOCUS @@ -85,6 +87,8 @@ enum solver CG_S = 2, SDM_S = 3, BiCGStab_S = 4, + PIPECG_S = 5, + PIPECR_S = 6, }; /* preconditioner computation type for charge method linear solver */ @@ -196,8 +200,8 @@ typedef struct typedef struct { - MPI_Comm world; - MPI_Comm comm_mesh3D; + MPI_Comm world; + MPI_Comm comm_mesh3D; MPI_Datatype sys_info; MPI_Datatype mpi_atom_type; @@ -219,6 +223,11 @@ typedef struct void *in1_buffer; void *in2_buffer; + +#if defined(NEUTRAL_TERRITORY) + mpi_out_data out_nt_buffers[REAX_MAX_NT_NBRS]; + void *in_nt_buffer[REAX_MAX_NT_NBRS]; +#endif } mpi_datatypes; @@ -431,6 +440,10 @@ typedef struct int num_bonds; int num_hbonds; int renumber; +#if defined(NEUTRAL_TERRITORY) + int nt_dir; + int pos; +#endif } reax_atom; @@ -496,6 +509,9 @@ typedef struct typedef struct { int rank; +#if defined(NEUTRAL_TERRITORY) + int receive_rank; +#endif int est_send, est_recv; int atoms_str, atoms_cnt; ivec rltv, prdc; @@ -540,12 +556,17 @@ typedef struct int wsize, my_rank, num_nbrs; ivec my_coords; neighbor_proc my_nbrs[REAX_MAX_NBRS]; + neighbor_proc my_nt_nbrs[REAX_MAX_NT_NBRS]; int *global_offset; simulation_box big_box, my_box, my_ext_box; grid my_grid; boundary_cutoff bndry_cuts; reax_atom *my_atoms; + +#if defined(NEUTRAL_TERRITORY) + int num_nt_nbrs; +#endif } reax_system; @@ -771,10 +792,16 @@ typedef struct real bonded; /* non-bonded force calculation time */ real nonb; + /* distance between pairs calculation time */ + real init_dist; + /* charge matrix calculation time */ + real init_cm; + /* bonded interactions calculation time */ + real init_bond; /* atomic charge distribution calculation time */ real cm; /**/ - real cm_sort_mat_rows; + real cm_sort; /**/ real cm_solver_comm; /**/ @@ -782,13 +809,13 @@ typedef struct /**/ real cm_solver_pre_comp; /**/ - real cm_solver_pre_app; // update CG() + real cm_solver_pre_app; /* num. of steps in iterative linear solver for charge distribution */ int cm_solver_iters; /**/ - real cm_solver_spmv; // update CG() + real cm_solver_spmv; /**/ - real cm_solver_vector_ops; // update CG() + real cm_solver_vector_ops; /**/ real cm_solver_orthog; /**/ @@ -870,20 +897,40 @@ typedef struct } three_body_interaction_data; +typedef struct +{ + /* neighbor atom IDs */ + int *nbr; + /* set of three integers which deterimine if the neighbor + * atom is a non-periodic neighbor (all zeros) or a periodic + * neighbor and which perioidic image this neighbor comes from */ + ivec *rel_box; + /* distance to the neighboring atom */ + real *d; + /* difference between positions of this atom and its neighboring atom */ + rvec *dvec; +} far_neighbor_data; + + +#if defined(NEUTRAL_TERRITORY) typedef struct { int nbr; ivec rel_box; real d; rvec dvec; -} far_neighbor_data; +} nt_neighbor_data; +#endif typedef struct { + /* neighbor atom ID */ int nbr; + /* ??? */ int scl; - far_neighbor_data *ptr; + /* position of neighbor in far neighbor list */ + int ptr; } hbond_data; @@ -950,6 +997,9 @@ typedef struct /* matrix storage format */ int format; int cap, n, m; +#if defined(NEUTRAL_TERRITORY) + int NT; +#endif int *start, *end; sparse_matrix_entry *entries; } sparse_matrix; @@ -983,9 +1033,9 @@ typedef struct real *dDelta_lp, *dDelta_lp_temp; real *nlp, *nlp_temp, *Clp, *vlpex; rvec *dDeltap_self; - int *bond_mark, *done_after; + int *bond_mark; - /* QEq storage */ + /* charge matrix storage */ sparse_matrix *H; sparse_matrix *L; sparse_matrix *U; @@ -998,21 +1048,28 @@ typedef struct rvec2 *b, *x; /* GMRES storage */ - real *y, *z, *g; + real *y, *g; real *hc, *hs; real **h, **v; - /* CG storage */ - real *r, *d, *q, *p; - rvec2 *r2, *d2, *q2, *p2; + /* GMRES, PIPECG, PIPECR storage */ + real *z; + /* CG, PIPECG, PIPECR storage */ + real *d, *p, *q, *r; + /* PIPECG, PIPECR storage */ + real *m, *n, *u, *w; + /* dual-CG storage */ + rvec2 *d2, *p2, *q2, *r2; + /* dual-PIPECG storage */ + rvec2 *m2, *n2, *u2, *w2, *z2; /* Taper */ - real Tap[8]; //Tap7, Tap6, Tap5, Tap4, Tap3, Tap2, Tap1, Tap0; + real Tap[8]; /* storage for analysis */ - int *mark, *old_mark; + int *mark, *old_mark; rvec *x_old; /* storage space for bond restrictions */ - int *restricted; + int *restricted; int **restricted_list; /* integrator */ @@ -1088,7 +1145,10 @@ typedef struct bond_data *bond_list; dbond_data *dbo_list; dDelta_data *dDelta_list; - far_neighbor_data *far_nbr_list; + far_neighbor_data far_nbr_list; +#if defined(NEUTRAL_TERRITORY) + nt_neighbor_data *nt_nbr_list; +#endif hbond_data *hbond_list; } reax_list; @@ -1122,7 +1182,7 @@ typedef struct int write_steps; int traj_compress; int traj_method; - char traj_title[81]; + char traj_title[REAX_MAX_STR]; int atom_info; int bond_info; int angle_info; diff --git a/PuReMD/src/restart.c b/PuReMD/src/restart.c index b687d82abc0a2618b0cc7d496f39b8630d51d450..bdd2476f74fdbe797c009643b021ef69f1fd78a6 100644 --- a/PuReMD/src/restart.c +++ b/PuReMD/src/restart.c @@ -49,11 +49,7 @@ void Write_Binary_Restart( reax_system *system, control_params *control, { /* master handles the restart file */ sprintf( fname, "%s.res%d", control->sim_name, data->step ); - if ( (fres = fopen( fname, "wb" )) == NULL ) - { - fprintf( stderr, "ERROR: can't open the restart file! terminating...\n" ); - MPI_Abort( MPI_COMM_WORLD, FILE_NOT_FOUND ); - } + fres = sfopen( fname, "wb", "Write_Binary_Restart" ); /* master can write the header by itself */ res_header.step = data->step; @@ -108,7 +104,7 @@ void Write_Binary_Restart( reax_system *system, control_params *control, if ( me == MASTER_NODE ) { fwrite( buffer, system->bigN, sizeof(restart_atom), fres ); - fclose( fres ); + sfclose( fres, "Write_Binary_Restart" ); } sfree(buffer, "buffer"); @@ -139,11 +135,7 @@ void Write_Restart( reax_system *system, control_params *control, if ( me == MASTER_NODE ) { sprintf( fname, "%s.res%d", control->sim_name, data->step ); - if ( (fres = fopen( fname, "w" )) == NULL ) - { - fprintf( stderr, "ERROR: can't open the restart file! terminating...\n" ); - MPI_Abort( MPI_COMM_WORLD, FILE_NOT_FOUND ); - } + fres = sfopen( fname, "w", "Write_Restart" ); /* write the header - only master writes it */ fprintf( fres, RESTART_HEADER, @@ -204,7 +196,7 @@ void Write_Restart( reax_system *system, control_params *control, if ( me == MASTER_NODE ) { fprintf( fres, "%s", buffer ); - fclose( fres ); + sfclose( fres, "Write_Restart" ); } sfree(buffer, "buffer"); sfree(line, "line"); @@ -250,11 +242,7 @@ void Read_Binary_Restart( char *res_file, reax_system *system, comm = MPI_COMM_WORLD; - if ( (fres = fopen(res_file, "rb")) == NULL ) - { - fprintf( stderr, "ERROR: cannot open the restart file! terminating...\n" ); - MPI_Abort( comm, FILE_NOT_FOUND ); - } + fres = sfopen( res_file, "rb", "Read_Binary_Restart" ); /* first read the header lines */ fread(&res_header, sizeof(restart_header), 1, fres); @@ -313,7 +301,7 @@ void Read_Binary_Restart( char *res_file, reax_system *system, } } - fclose( fres ); + sfclose( fres, "Read_Binary_Restart" ); data->step = data->prev_steps; // nsteps is updated based on the number of steps in the previous run @@ -368,11 +356,7 @@ void Read_Restart( char *res_file, reax_system *system, comm = MPI_COMM_WORLD; - if ( (fres = fopen(res_file, "r")) == NULL ) - { - fprintf( stderr, "ERROR: cannot open the restart file! terminating...\n" ); - MPI_Abort( comm, FILE_NOT_FOUND ); - } + fres = sfopen( res_file, "r", "Read_Binary_Restart" ); s = (char*) malloc(sizeof(char) * MAX_LINE); tmp = (char**) malloc(sizeof(char*)*MAX_TOKENS); @@ -464,7 +448,7 @@ void Read_Restart( char *res_file, reax_system *system, top++; } } - fclose( fres ); + sfclose( fres, "Read_Restart" ); /* free memory allocations at the top */ for ( i = 0; i < MAX_TOKENS; i++ ) sfree( tmp[i], "tmp[i]" ); diff --git a/PuReMD/src/tool_box.c b/PuReMD/src/tool_box.c index d898f2190d72de1424eea2730ed506e9e04ec610..9374fc3c5fe04aeedb1838fde0288258ff8f30c4 100644 --- a/PuReMD/src/tool_box.c +++ b/PuReMD/src/tool_box.c @@ -318,6 +318,8 @@ void Trim_Spaces( char *element ) struct timeval tim; real t_end; +// NOTE: these timing functions are not being used +// replaced by MPI_Wtime() real Get_Time( ) { gettimeofday(&tim, NULL ); diff --git a/PuReMD/src/torsion_angles.c b/PuReMD/src/torsion_angles.c index f915fdb8325de91f5c5ea72f47e36ae0ab306992..4719aa33c2d79b6c07636afb69cd8dfa5857c63f 100644 --- a/PuReMD/src/torsion_angles.c +++ b/PuReMD/src/torsion_angles.c @@ -197,7 +197,7 @@ void Torsion_Angles( reax_system *system, control_params *control, // FILE *ftor; // sprintf( fname, "tor%d.out", system->my_rank ); - // ftor = fopen( fname, "w" ); + // ftor = sfopen( fname, "w", "Torsion_Angles" ); natoms = system->n; @@ -342,7 +342,8 @@ void Torsion_Angles( reax_system *system, control_params *control, fbp->V2 * exp_tor1 * (1.0 - cos2omega) + fbp->V3 * (1.0 + cos3omega) ); - data->my_en.e_tor += e_tor = fn10 * sin_ijk * sin_jkl * CV; + e_tor = fn10 * sin_ijk * sin_jkl * CV; + data->my_en.e_tor += e_tor; dfn11 = (-p_tor3 * exp_tor3_DjDk + (p_tor3 * exp_tor3_DjDk - p_tor4 * exp_tor4_DjDk) * @@ -375,9 +376,8 @@ void Torsion_Angles( reax_system *system, control_params *control, /* 4-body conjugation energy */ fn12 = exp_cot2_ij * exp_cot2_jk * exp_cot2_kl; - data->my_en.e_con += e_con = - fbp->p_cot1 * fn12 * - (1.0 + (SQR(cos_omega) - 1.0) * sin_ijk * sin_jkl); + e_con = fbp->p_cot1 * fn12 * (1.0 + (SQR(cos_omega) - 1.0) * sin_ijk * sin_jkl); + data->my_en.e_con += e_con; Cconj = -2.0 * fn12 * fbp->p_cot1 * p_cot2 * (1.0 + (SQR(cos_omega) - 1.0) * sin_ijk * sin_jkl); diff --git a/PuReMD/src/traj.c b/PuReMD/src/traj.c index d03a8289f22639477db5c9c7b3af919d37adba9e..e189c4fc2c479c6bb0a5b7a7180e267c820d29d2 100644 --- a/PuReMD/src/traj.c +++ b/PuReMD/src/traj.c @@ -527,7 +527,7 @@ int Init_Traj( reax_system *system, control_params *control, else if ( out_control->traj_method == REG_TRAJ) { if ( system->my_rank == MASTER_NODE ) - out_control->strj = fopen( fname, "w" ); + out_control->strj = sfopen( fname, "w", "Init_Traj" ); } else { @@ -538,7 +538,7 @@ int Init_Traj( reax_system *system, control_params *control, if ( out_control->traj_method == REG_TRAJ) { if ( system->my_rank == MASTER_NODE ) - out_control->strj = fopen( fname, "w" ); + out_control->strj = sfopen( fname, "w", "Init_Traj" ); } else { @@ -1116,10 +1116,10 @@ int End_Traj( int my_rank, output_controls *out_control ) if ( out_control->traj_method == MPI_TRAJ ) MPI_File_close( &(out_control->trj) ); else if ( my_rank == MASTER_NODE ) - fclose( out_control->strj ); + sfclose( out_control->strj, "End_Traj" ); #elif defined(LAMMPS_REAX) if ( my_rank == MASTER_NODE ) - fclose( out_control->strj ); + sfclose( out_control->strj, "End_Traj" ); #endif sfree( out_control->buffer, "out_control->buffer" ); diff --git a/PuReMD/src/vector.c b/PuReMD/src/vector.c index 27c33db810ca7e1458f4226b35a1f885072baf17..ee4cf8ee96595150683526f34c26f1b1ed01251f 100644 --- a/PuReMD/src/vector.c +++ b/PuReMD/src/vector.c @@ -27,68 +27,102 @@ #include "reax_vector.h" #endif + int Vector_isZero( real* v, int k ) { - for ( --k; k >= 0; --k ) - if ( fabs( v[k] ) > ALMOST_ZERO ) - return 0; + int i, ret; - return 1; + ret = 1; + + for ( i = 0; i < k; ++i ) + { + if ( fabs( v[i] ) > ALMOST_ZERO ) + { + ret = 0; + break; + } + } + + return ret; } void Vector_MakeZero( real *v, int k ) { - for ( --k; k >= 0; --k ) - v[k] = 0; + int i; + + for ( i = 0; i < k; ++i ) + { + v[i] = 0; + } } void Vector_Copy( real* dest, real* v, int k ) { - for ( --k; k >= 0; --k ) - dest[k] = v[k]; + int i; + + for ( i = 0; i < k; ++i ) + { + dest[i] = v[i]; + } } void Vector_Scale( real* dest, real c, real* v, int k ) { - for ( --k; k >= 0; --k ) - dest[k] = c * v[k]; + int i; + + for ( i = 0; i < k; ++i ) + { + dest[i] = c * v[i]; + } } -void Vector_Sum( real* dest, real c, real* v, real d, real* y, int k ) +real Dot( real* v1, real* v2, int k ) { - for ( --k; k >= 0; --k ) - dest[k] = c * v[k] + d * y[k]; -} + int i; + real ret; + ret = 0.0; -void Vector_Add( real* dest, real c, real* v, int k ) -{ - for ( --k; k >= 0; --k ) - dest[k] += c * v[k]; + for ( i = 0; i < k; ++i ) + { + ret += v1[i] * v2[i]; + } + + return ret; } -real Dot( real* v1, real* v2, int k ) +real Dot_local( real *v1, real *v2, int k ) { - real ret = 0; + int i; + real sum; - for ( --k; k >= 0; --k ) - ret += v1[k] * v2[k]; + sum = 0.0; - return ret; + for ( i = 0; i < k; ++i ) + { + sum += v1[i] * v2[i]; + } + + return sum; } real Norm( real* v1, int k ) { - real ret = 0; + int i; + real ret; + + ret = 0.0; - for ( --k; k >= 0; --k ) - ret += SQR( v1[k] ); + for ( i = 0; i < k; ++i ) + { + ret += SQR( v1[i] ); + } return sqrt( ret ); } diff --git a/PuReMD/src/vector.h b/PuReMD/src/vector.h index 199810eb9203a983870b12b4686702256bd81288..1b12342307e180eac1ecdfb96d894ce9f6be41bb 100644 --- a/PuReMD/src/vector.h +++ b/PuReMD/src/vector.h @@ -25,65 +25,139 @@ #include "reax_types.h" #include "reax_defs.h" + int Vector_isZero( real*, int ); + void Vector_MakeZero( real*, int ); + void Vector_Copy( real*, real*, int ); + void Vector_Scale( real*, real, real*, int ); -void Vector_Sum( real*, real, real*, real, real*, int ); -void Vector_Add( real*, real, real*, int ); + + +static inline void Vector_Sum( real* dest, real c, real* v, real d, real* y, int k ) +{ + int i; + + for ( i = 0; i < k; ++i ) + { + dest[i] = c * v[i] + d * y[i]; + } +} + + +static inline void Vector_Add( real* dest, real c, real* v, int k ) +{ + int i; + + for ( i = 0; i < k; ++i ) + { + dest[i] += c * v[i]; + } +} + + real Dot( real*, real*, int ); + +real Dot_local( real*, real*, int ); + real Norm( real*, int ); + void Vector_Print( FILE*, char*, real*, int ); void rvec_Copy( rvec, rvec ); + void rvec_Scale( rvec, real, rvec ); + void rvec_Add( rvec, rvec ); + void rvec_ScaledAdd( rvec, real, rvec ); + void rvec_Sum( rvec, rvec, rvec ); + void rvec_ScaledSum( rvec, real, rvec, real, rvec ); + real rvec_Dot( rvec, rvec ); + real rvec_ScaledDot( real, rvec, real, rvec ); + void rvec_Multiply( rvec, rvec, rvec ); + void rvec_iMultiply( rvec, ivec, rvec ); + void rvec_Divide( rvec, rvec, rvec ); + void rvec_iDivide( rvec, rvec, ivec ); + void rvec_Invert( rvec, rvec ); + void rvec_Cross( rvec, rvec, rvec ); + void rvec_OuterProduct( rtensor, rvec, rvec ); + real rvec_Norm_Sqr( rvec ); + real rvec_Norm( rvec ); + int rvec_isZero( rvec ); + void rvec_MakeZero( rvec ); + void rvec_Random( rvec ); void rtensor_MakeZero( rtensor ); + void rtensor_Multiply( rtensor, rtensor, rtensor ); + void rtensor_MatVec( rvec, rtensor, rvec ); + void rtensor_Scale( rtensor, real, rtensor ); + void rtensor_Add( rtensor, rtensor ); + void rtensor_ScaledAdd( rtensor, real, rtensor ); + void rtensor_Sum( rtensor, rtensor, rtensor ); + void rtensor_ScaledSum( rtensor, real, rtensor, real, rtensor ); + void rtensor_Scale( rtensor, real, rtensor ); + void rtensor_Copy( rtensor, rtensor ); + void rtensor_Identity( rtensor ); + void rtensor_Transpose( rtensor, rtensor ); + real rtensor_Det( rtensor ); + real rtensor_Trace( rtensor ); void Print_rTensor(FILE*, rtensor); -int ivec_isZero( ivec ); -int ivec_isEqual( ivec, ivec ); +int ivec_isZero( ivec ); + +int ivec_isEqual( ivec, ivec ); + void ivec_MakeZero( ivec ); + void ivec_Copy( ivec, ivec ); + void ivec_Scale( ivec, real, ivec ); + void ivec_rScale( ivec, real, rvec ); + void ivec_Sum( ivec, ivec, ivec ); + void ivec_ScaledSum( ivec, int, ivec, int, ivec ); + void ivec_Add( ivec, ivec ); + void ivec_ScaledAdd( ivec, int, ivec ); + void ivec_Max( ivec, ivec, ivec ); + void ivec_Max3( ivec, ivec, ivec, ivec ); + #endif