From 8d83aab890f5cade562b7a40f6188dbc4d55eedf Mon Sep 17 00:00:00 2001
From: "Kurt A. O'Hearn" <ohearnku@msu.edu>
Date: Wed, 23 Jan 2019 20:04:46 -0800
Subject: [PATCH] PuReMD-old: fix communications to only use full shell pattern
 for non-charge methods (bug only present using neutral territory
 communication method). TODO: revisit code structure at a later date.

---
 PuReMD/src/basic_comm.c | 148 ++++++++++++++++++++++++++++++++++++++++
 PuReMD/src/basic_comm.h |   6 ++
 PuReMD/src/forces.c     |  26 +++----
 PuReMD/src/qEq.c        |   2 +-
 4 files changed, 168 insertions(+), 14 deletions(-)

diff --git a/PuReMD/src/basic_comm.c b/PuReMD/src/basic_comm.c
index d3bb9014..a50dbbf7 100644
--- a/PuReMD/src/basic_comm.c
+++ b/PuReMD/src/basic_comm.c
@@ -372,6 +372,74 @@ void Dist( const reax_system * const system, mpi_datatypes * const mpi_data,
 }
 
 
+void Dist_FS( const reax_system * const system, mpi_datatypes * const mpi_data,
+        void *buf, int buf_type, MPI_Datatype type )
+{
+    int d;
+    mpi_out_data *out_bufs;
+    MPI_Comm comm;
+    MPI_Request req1, req2;
+    MPI_Status stat1, stat2;
+    const neighbor_proc *nbr1, *nbr2;
+    dist_packer pack;
+
+#if defined(DEBUG)
+    fprintf( stderr, "p%d dist: entered\n", system->my_rank );
+#endif
+
+    comm = mpi_data->comm_mesh3D;
+    out_bufs = mpi_data->out_buffers;
+    pack = Get_Packer( buf_type );
+
+    for ( d = 0; d < 3; ++d )
+    {
+        /* initiate recvs */
+        nbr1 = &system->my_nbrs[2 * d];
+        if ( nbr1->atoms_cnt )
+        {
+            MPI_Irecv( Get_Buffer_Offset( buf, nbr1->atoms_str, buf_type ),
+                    nbr1->atoms_cnt, type, nbr1->rank, 2 * d + 1, comm, &req1 );
+        }
+
+        nbr2 = &system->my_nbrs[2 * d + 1];
+        if ( nbr2->atoms_cnt )
+        {
+            MPI_Irecv( Get_Buffer_Offset( buf, nbr2->atoms_str, buf_type ),
+                    nbr2->atoms_cnt, type, nbr2->rank, 2 * d, comm, &req2 );
+        }
+
+        /* send both messages in dimension d */
+        if ( out_bufs[2 * d].cnt )
+        {
+            pack( buf, &out_bufs[2 * d] );
+            MPI_Send( out_bufs[2 * d].out_atoms, out_bufs[2 * d].cnt,
+                    type, nbr1->rank, 2 * d, comm );
+        }
+
+        if ( out_bufs[2 * d + 1].cnt )
+        {
+            pack( buf, &out_bufs[2 * d + 1] );
+            MPI_Send( out_bufs[2 * d + 1].out_atoms, out_bufs[2 * d + 1].cnt,
+                    type, nbr2->rank, 2 * d + 1, comm );
+        }
+
+        if( nbr1->atoms_cnt )
+        {
+            MPI_Wait( &req1, &stat1 );
+        }
+        if( nbr2->atoms_cnt )
+        {
+            MPI_Wait( &req2, &stat2 );
+        }
+    }
+
+
+#if defined(DEBUG)
+    fprintf( stderr, "p%d dist: done\n", system->my_rank );
+#endif
+}
+
+
 void Coll( const reax_system * const system, mpi_datatypes * const mpi_data,
         void *buf, int buf_type, MPI_Datatype type )
 {   
@@ -505,6 +573,86 @@ void Coll( const reax_system * const system, mpi_datatypes * const mpi_data,
 }
 
 
+void Coll_FS( const reax_system * const system, mpi_datatypes * const mpi_data,
+        void *buf, int buf_type, MPI_Datatype type )
+{   
+    int d;
+    mpi_out_data *out_bufs;
+    MPI_Comm comm;
+    MPI_Request req1, req2;
+    MPI_Status stat1, stat2;
+    const neighbor_proc *nbr1, *nbr2;
+    coll_unpacker unpack;
+
+#if defined(DEBUG)
+    fprintf( stderr, "p%d coll: entered\n", system->my_rank );
+#endif
+
+    comm = mpi_data->comm_mesh3D;
+    out_bufs = mpi_data->out_buffers;
+    unpack = Get_Unpacker( buf_type );
+
+    for ( d = 2; d >= 0; --d )
+    {
+        /* initiate recvs */
+        nbr1 = &system->my_nbrs[2 * d];
+
+        if ( out_bufs[2 * d].cnt )
+        {
+            MPI_Irecv( mpi_data->in1_buffer, out_bufs[2 * d].cnt,
+                    type, nbr1->rank, 2 * d + 1, comm, &req1 );
+        }
+
+        nbr2 = &system->my_nbrs[2 * d + 1];
+
+        if ( out_bufs[2 * d + 1].cnt )
+        {
+
+            MPI_Irecv( mpi_data->in2_buffer, out_bufs[2 * d + 1].cnt,
+                    type, nbr2->rank, 2 * d, comm, &req2 );
+        }
+        
+        /* send both messages in dimension d */
+        if ( nbr1->atoms_cnt )
+        {
+            MPI_Send( Get_Buffer_Offset( buf, nbr1->atoms_str, buf_type ),
+                    nbr1->atoms_cnt, type, nbr1->rank, 2 * d, comm );
+        }
+        
+        if ( nbr2->atoms_cnt )
+        {
+            MPI_Send( Get_Buffer_Offset( buf, nbr2->atoms_str, buf_type ),
+                    nbr2->atoms_cnt, type, nbr2->rank, 2 * d + 1, comm );
+        }
+
+#if defined(DEBUG)
+        fprintf( stderr, "p%d coll[%d] nbr1: str=%d cnt=%d recv=%d\n",
+                system->my_rank, d, nbr1->atoms_str, nbr1->atoms_cnt,
+                out_bufs[2 * d].cnt );
+        fprintf( stderr, "p%d coll[%d] nbr2: str=%d cnt=%d recv=%d\n",
+                system->my_rank, d, nbr2->atoms_str, nbr2->atoms_cnt,
+                out_bufs[2 * d + 1].cnt );
+#endif
+
+        if ( out_bufs[2 * d].cnt )
+        {
+            MPI_Wait( &req1, &stat1 );
+            unpack( mpi_data->in1_buffer, buf, &out_bufs[2 * d] );
+        }
+
+        if ( out_bufs[2 * d + 1].cnt )
+        {
+            MPI_Wait( &req2, &stat2 );
+            unpack( mpi_data->in2_buffer, buf, &out_bufs[2 * d + 1] );
+        }
+    }
+
+#if defined(DEBUG)
+    fprintf( stderr, "p%d coll: done\n", system->my_rank );
+#endif
+}
+
+
 /*****************************************************************************/
 real Parallel_Norm( real *v, int n, MPI_Comm comm )
 {
diff --git a/PuReMD/src/basic_comm.h b/PuReMD/src/basic_comm.h
index bf461c05..e2fe7090 100644
--- a/PuReMD/src/basic_comm.h
+++ b/PuReMD/src/basic_comm.h
@@ -37,9 +37,15 @@ enum pointer_type
 void Dist( const reax_system * const, mpi_datatypes * const,
         void*, int, MPI_Datatype );
 
+void Dist_FS( const reax_system * const, mpi_datatypes * const,
+        void*, int, MPI_Datatype );
+
 void Coll( const reax_system * const, mpi_datatypes * const,
         void*, int, MPI_Datatype );
 
+void Coll_FS( const reax_system * const, mpi_datatypes * const,
+        void*, int, MPI_Datatype );
+
 real Parallel_Norm( real*, int, MPI_Comm );
 
 real Parallel_Dot( real*, real*, int, MPI_Comm );
diff --git a/PuReMD/src/forces.c b/PuReMD/src/forces.c
index 833fb261..c9d71e82 100644
--- a/PuReMD/src/forces.c
+++ b/PuReMD/src/forces.c
@@ -1353,7 +1353,7 @@ void Compute_Total_Force( reax_system *system, control_params *control,
      * based on the neighbors information each processor has had.
      * final values of force on each atom needs to be computed by adding up
      * all partially-final pieces */
-    Coll( system, mpi_data, workspace->f, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
+    Coll_FS( system, mpi_data, workspace->f, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
 
     for ( i = 0; i < system->n; ++i )
     {
@@ -1361,18 +1361,18 @@ void Compute_Total_Force( reax_system *system, control_params *control,
     }
 
 #if defined(TEST_FORCES)
-    Coll( system, mpi_data, workspace->f_ele, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
-    Coll( system, mpi_data, workspace->f_vdw, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
-    Coll( system, mpi_data, workspace->f_be, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
-    Coll( system, mpi_data, workspace->f_lp, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
-    Coll( system, mpi_data, workspace->f_ov, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
-    Coll( system, mpi_data, workspace->f_un, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
-    Coll( system, mpi_data, workspace->f_ang, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
-    Coll( system, mpi_data, workspace->f_coa, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
-    Coll( system, mpi_data, workspace->f_pen, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
-    Coll( system, mpi_data, workspace->f_hb, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
-    Coll( system, mpi_data, workspace->f_tor, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
-    Coll( system, mpi_data, workspace->f_con, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
+    Coll_FS( system, mpi_data, workspace->f_ele, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
+    Coll_FS( system, mpi_data, workspace->f_vdw, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
+    Coll_FS( system, mpi_data, workspace->f_be, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
+    Coll_FS( system, mpi_data, workspace->f_lp, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
+    Coll_FS( system, mpi_data, workspace->f_ov, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
+    Coll_FS( system, mpi_data, workspace->f_un, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
+    Coll_FS( system, mpi_data, workspace->f_ang, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
+    Coll_FS( system, mpi_data, workspace->f_coa, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
+    Coll_FS( system, mpi_data, workspace->f_pen, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
+    Coll_FS( system, mpi_data, workspace->f_hb, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
+    Coll_FS( system, mpi_data, workspace->f_tor, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
+    Coll_FS( system, mpi_data, workspace->f_con, RVEC_PTR_TYPE, mpi_data->mpi_rvec );
 #endif
 
 #endif
diff --git a/PuReMD/src/qEq.c b/PuReMD/src/qEq.c
index 4599b986..e608090d 100644
--- a/PuReMD/src/qEq.c
+++ b/PuReMD/src/qEq.c
@@ -359,7 +359,7 @@ void Calculate_Charges( reax_system *system, storage *workspace,
         atom->t[0] = workspace->x[i][1];
     }
 
-    Dist( system, mpi_data, q, REAL_PTR_TYPE, MPI_DOUBLE );
+    Dist_FS( system, mpi_data, q, REAL_PTR_TYPE, MPI_DOUBLE );
 
     for ( i = system->n; i < system->N; ++i )
     {
-- 
GitLab