diff --git a/PuReMD/src/allocate.c b/PuReMD/src/allocate.c
index ca67bea5e5fca41c44b2248600a5ba454b4b89a9..74a9e161ece3c041527e83f01212b0d4921cb2f3 100644
--- a/PuReMD/src/allocate.c
+++ b/PuReMD/src/allocate.c
@@ -693,7 +693,8 @@ void Deallocate_Grid( grid *g )
    buffers are void*, type cast to the correct pointer type to access
    the allocated buffers */
 int  Allocate_MPI_Buffers( mpi_datatypes *mpi_data, int est_recv,
-                           neighbor_proc *my_nbrs, char *msg )
+                           neighbor_proc *my_nbrs,
+                           char *msg )
 {
     int i;
     mpi_out_data  *mpi_buf;
@@ -718,6 +719,19 @@ int  Allocate_MPI_Buffers( mpi_datatypes *mpi_data, int est_recv,
                              scalloc( my_nbrs[i].est_send, sizeof(boundary_atom), "mpibuf:out_atoms",
                                       comm );
     }
+#if defined(NEUTRAL_TERRITORY)
+    /* Neutral Territory out buffers */
+    for ( i = 0; i < MAX_NT_NBRS; ++i )
+    {
+        mpi_buf = &( mpi_data->out_nt_buffers[i] );
+        /* allocate storage for the neighbor processor i */
+        mpi_buf->index = (int*)
+                         scalloc( my_nbrs[i].est_send, sizeof(int), "mpibuf:nt_index", comm );
+        mpi_buf->out_atoms = (void*)
+                             scalloc( my_nbrs[i].est_send, sizeof(boundary_atom), "mpibuf:nt_out_atoms",
+                                      comm );
+    }
+#endif
 
     return SUCCESS;
 }
@@ -737,6 +751,14 @@ void Deallocate_MPI_Buffers( mpi_datatypes *mpi_data )
         sfree( mpi_buf->index, "mpibuf:index" );
         sfree( mpi_buf->out_atoms, "mpibuf:out_atoms" );
     }
+#if defined(NEUTRAL_TERRITORY)
+    for ( i = 0; i < MAX_NT_NBRS; ++i )
+    {
+        mpi_buf = &( mpi_data->out_nt_buffers[i] );
+        sfree( mpi_buf->index, "mpibuf:nt_index" );
+        sfree( mpi_buf->out_atoms, "mpibuf:nt_out_atoms" );
+    }
+#endif
 }
 
 
@@ -985,6 +1007,20 @@ void ReAllocate( reax_system *system, control_params *control,
                 break;
             }
         }
+#if defined(NEUTRAL_TERRITORY)
+        // otherwise check individual outgoing Neutral Territory buffers
+        mpi_flag = 0;
+        for ( p = 0; p < MAX_NT_NBRS; ++p )
+        {
+            nbr_pr   = &( system->my_nt_nbrs[p] );
+            nbr_data = &( mpi_data->out_nt_buffers[p] );
+            if ( nbr_data->cnt >= nbr_pr->est_send * 0.90 )
+            {
+                mpi_flag = 1;
+                break;
+            }
+        }
+#endif
     }
 
     if ( mpi_flag )
@@ -1001,6 +1037,7 @@ void ReAllocate( reax_system *system, control_params *control,
         system->est_trans =
             (system->est_recv * sizeof(boundary_atom)) / sizeof(mpi_atom);
         total_send = 0;
+
         for ( p = 0; p < MAX_NBRS; ++p )
         {
             nbr_pr   = &( system->my_nbrs[p] );
@@ -1008,6 +1045,15 @@ void ReAllocate( reax_system *system, control_params *control,
             nbr_pr->est_send = MAX( nbr_data->cnt * SAFER_ZONE, MIN_SEND );
             total_send += nbr_pr->est_send;
         }
+#if defined(NEUTRAL_TERRITORY)
+        for ( p = 0; p < MAX_NT_NBRS; ++p )
+        {
+            nbr_pr   = &( system->my_nt_nbrs[p] );
+            nbr_data = &( mpi_data->out_nt_buffers[p] );
+            nbr_pr->est_send = MAX( nbr_data->cnt * SAFER_ZONE, MIN_SEND );
+        }
+#endif
+
 #if defined(DEBUG_FOCUS)
         fprintf( stderr, "p%d: reallocating mpi_buf: recv=%d send=%d total=%dMB\n",
                  system->my_rank, system->est_recv, total_send,
diff --git a/PuReMD/src/basic_comm.c b/PuReMD/src/basic_comm.c
index 3014299478da41545f9bdd0da2efc5f4e379d7db..135abb6e49404c8305938ea510e5c0bb2aafd9c6 100644
--- a/PuReMD/src/basic_comm.c
+++ b/PuReMD/src/basic_comm.c
@@ -129,6 +129,60 @@ void Dist( reax_system* system, mpi_datatypes *mpi_data,
 }
 
 
+void Dist_NT( reax_system* system, mpi_datatypes *mpi_data,
+        void *buf, MPI_Datatype type, int scale, dist_packer pack )
+{
+    int d;
+    mpi_out_data *out_bufs;
+    MPI_Comm comm;
+    MPI_Request req[6];
+    MPI_Status stat[6];
+    neighbor_proc *nbr;
+
+#if defined(DEBUG)
+    fprintf( stderr, "p%d dist: entered\n", system->my_rank );
+#endif
+    comm = mpi_data->comm_mesh3D;
+    out_bufs = mpi_data->out_nt_buffers;
+
+    /* initiate recvs */
+    for( d = 0; d < 6; ++d )
+    {
+        nbr = &(system->my_nt_nbrs[d]);
+        if ( nbr->atoms_cnt )
+        {
+            MPI_Irecv( buf + nbr->atoms_str * scale, nbr->atoms_cnt, type,
+                    nbr->receive_rank, d, comm, &(req[d]) );
+        }
+    }
+
+    for( d = 0; d < 6; ++d)
+    {
+        /* send both messages in dimension d */
+        if ( out_bufs[d].cnt )
+        {
+            pack( buf, out_bufs + d );
+            MPI_Send( out_bufs[d].out_atoms, out_bufs[d].cnt, type,
+                    nbr->rank, d, comm );
+        }
+    }
+
+    for( d = 0; d < 6; ++d )
+    {
+        nbr = &(system->my_nbrs[d]);
+        if ( nbr->atoms_cnt )
+        {
+            // TODO: Wait(MPI_WAITANY)
+            MPI_Wait( &(req[d]), &(stat[d]) );
+        }
+    }
+
+#if defined(DEBUG)
+    fprintf( stderr, "p%d dist: done\n", system->my_rank );
+#endif
+}
+
+
 void real_unpacker( void *dummy_in, void *dummy_buf, mpi_out_data *out_buf )
 {
     int i;
@@ -236,6 +290,63 @@ void Coll( reax_system* system, mpi_datatypes *mpi_data,
     fprintf( stderr, "p%d coll: done\n", system->my_rank );
 #endif
 }
+
+
+void Coll_NT( reax_system* system, mpi_datatypes *mpi_data,
+        void *buf, MPI_Datatype type, int scale, coll_unpacker unpack )
+{
+    int d;
+    void *in[6];
+    mpi_out_data *out_bufs;
+    MPI_Comm comm;
+    MPI_Request req[6];
+    MPI_Status stat[6];
+    neighbor_proc *nbr;
+
+#if defined(DEBUG)
+    fprintf( stderr, "p%d coll: entered\n", system->my_rank );
+#endif
+    comm = mpi_data->comm_mesh3D;
+    out_bufs = mpi_data->out_buffers;
+
+    for ( d = 0; d < 6; ++d )
+    {
+        /* initiate recvs */
+        nbr = &(system->my_nbrs[d]);
+        in[d] = mpi_data->in_nt_buffer[d];
+        if ( out_bufs[d].cnt )
+        {
+            MPI_Irecv(in[d], out_bufs[d].cnt, type, nbr->receive_rank, d, comm, &(req[d]));
+        }
+    }
+
+    for( d = 0; d < 6; ++d )
+    {
+        /* send both messages in direction d */
+        nbr = &(system->my_nbrs[d]);
+        if ( nbr->atoms_cnt )
+        {
+            MPI_Send( buf + nbr->atoms_str * scale, nbr->atoms_cnt, type,
+                    nbr->rank, d, comm );
+        }
+    }
+
+    for( d = 0; d < 6; ++d )
+    {
+        if ( out_bufs[d].cnt )
+        {
+            //TODO: WAITANY
+            MPI_Wait( &(req[d]), &(stat[d]));
+            unpack( in[d], buf, out_bufs + d );
+        }
+    }
+
+#if defined(DEBUG)
+    fprintf( stderr, "p%d coll: done\n", system->my_rank );
+#endif
+}
+
+
 #endif /*PURE_REAX*/
 
 /*****************************************************************************/
diff --git a/PuReMD/src/comm_tools.c b/PuReMD/src/comm_tools.c
index 8419e3efe44377e7d1680fe09b2bc06ef5756ca6..0b399825a58054fdfa6aa07f5351d4f221f45c55 100644
--- a/PuReMD/src/comm_tools.c
+++ b/PuReMD/src/comm_tools.c
@@ -26,6 +26,143 @@
 #include "vector.h"
 
 
+void Setup_NT_Comm( reax_system* system, control_params* control,
+                 mpi_datatypes *mpi_data )
+{
+    int i, d;
+    real bndry_cut;
+    neighbor_proc *nbr_pr;
+    simulation_box *my_box;
+    ivec nbr_coords;
+    ivec r[12] = {
+        {0, 0, -1}, // -z
+        {0, 0, +1}, // +z
+        {0, -1, 0}, // -y
+        {-1, -1, 0}, // -x-y
+        {-1, 0, 0}, // -x
+        {-1, +1, 0},  // -x+y
+
+        {0, 0, +1}, // +z
+        {0, 0, -1}, // -z
+        {0, +1, 0}, // +y
+        {+1, +1, 0}, // +x+y
+        {+1, 0, 0}, // -x
+        {+1, -1, 0}  // -x+y
+    };
+    my_box = &(system->my_box);
+    bndry_cut = system->bndry_cuts.ghost_cutoff;
+
+    /* identify my neighbors */
+    system->num_nt_nbrs = MAX_NT_NBRS;
+    for ( i = 0; i < system->num_nt_nbrs; ++i )
+    {
+        ivec_Sum( nbr_coords, system->my_coords, r[i] ); /* actual nbr coords */
+        nbr_pr = &(system->my_nt_nbrs[i]);
+        MPI_Cart_rank( mpi_data->comm_mesh3D, nbr_coords, &(nbr_pr->rank) );
+        
+        /* set the rank of the neighbor processor in the receiving direction */
+        ivec_Sum( nbr_coords, system->my_coords, r[i + 6] ); /* actual nbr coords */
+        MPI_Cart_rank( mpi_data->comm_mesh3D, nbr_coords, &(nbr_pr->receive_rank) );
+
+        for ( d = 0; d < 3; ++d )
+        {
+            /* determine the boundary area with this nbr */
+            if ( r[i][d] < 0 )
+            {
+                nbr_pr->bndry_min[d] = my_box->min[d];
+                nbr_pr->bndry_max[d] = my_box->min[d] + bndry_cut;
+            }
+            else if ( r[i][d] > 0 )
+            {
+                nbr_pr->bndry_min[d] = my_box->max[d] - bndry_cut;
+                nbr_pr->bndry_max[d] = my_box->max[d];
+            }
+            else
+            {
+                nbr_pr->bndry_min[d] = my_box->min[d];
+                nbr_pr->bndry_max[d] = my_box->max[d];
+            }
+
+            /* determine if it is a periodic neighbor */
+            if ( nbr_coords[d] < 0 )
+                nbr_pr->prdc[d] = -1;
+            else if ( nbr_coords[d] >= control->procs_by_dim[d] )
+                nbr_pr->prdc[d] = +1;
+            else
+                nbr_pr->prdc[d] = 0;
+        }
+
+    }
+}
+
+
+void Sort_Neutral_Territory( reax_system *system, int dir, mpi_out_data *out_bufs )
+{
+    int i, d, p, out_cnt;
+    reax_atom *atoms;
+    simulation_box *my_box;
+    boundary_atom *out_buf;
+    neighbor_proc *nbr_pr;
+
+    atoms = system->my_atoms;
+    my_box = &( system->my_box );
+
+    /* place each atom into the appropriate outgoing list */
+    nbr_pr = &( system->my_nt_nbrs[dir] );
+    for ( i = 0; i < system->n; ++i )
+    {
+        if ( nbr_pr->bndry_min[0] <= atoms[i].x[0] &&
+            atoms[i].x[0] < nbr_pr->bndry_max[0] &&
+            nbr_pr->bndry_min[1] <= atoms[i].x[1] &&
+            atoms[i].x[1] < nbr_pr->bndry_max[1] &&
+            nbr_pr->bndry_min[2] <= atoms[i].x[2] &&
+            atoms[i].x[2] < nbr_pr->bndry_max[2] )
+        {
+            out_bufs[dir].index[out_cnt] = i;
+            out_bufs[dir].cnt++;
+        }
+    }
+}
+
+
+void Init_Neutral_Territory( reax_system* system, mpi_datatypes *mpi_data )
+{
+    int d, start, end, cnt;
+    mpi_out_data *out_bufs;
+    void *in;
+    MPI_Comm comm;
+    MPI_Request req;
+    MPI_Status stat;
+    neighbor_proc *nbr;
+
+    Reset_Out_Buffers( mpi_data->out_nt_buffers, system->num_nt_nbrs );
+    comm = mpi_data->comm_mesh3D;
+    out_bufs = mpi_data->out_nt_buffers;
+    cnt = 0;
+    end = system->n;
+
+    for ( d = 0; d < 6; ++d )
+    {
+        Sort_Neutral_Territory( system, d, out_bufs );
+        start = end;
+        
+        MPI_Irecv( &cnt, 1, MPI_INT, nbr->receive_rank, d, comm, &req );
+        MPI_Send( &(out_bufs[d].cnt), 1, MPI_INT, nbr->rank, d, comm );
+        MPI_Wait( &req, &stat );
+        
+        if( mpi_data->in_nt_buffer[d] == NULL )
+        {
+            mpi_data->in_nt_buffer[d] = (void *) smalloc( SAFER_ZONE * out_bufs[d].cnt * sizeof(real), "in", comm );
+        }
+
+        nbr = &(system->my_nt_nbrs[d]);
+        nbr->atoms_str = end;
+        nbr->atoms_cnt = cnt;
+        end += cnt;
+    }
+}
+
+
 void Setup_Comm( reax_system* system, control_params* control,
                  mpi_datatypes *mpi_data )
 {
@@ -653,6 +790,10 @@ void Comm_Atoms( reax_system *system, control_params *control,
 #endif
 
         Bin_Boundary_Atoms( system );
+
+#if defined(NEUTRAL_TERRITORY)
+        Init_Neutral_Territory( system, mpi_data );
+#endif
     }
     else
     {
diff --git a/PuReMD/src/forces.c b/PuReMD/src/forces.c
index 49e747a4c92042a1b2eba168a9c05417450b9332..d05110c13cfbf0e2d6d446df5ad8c484c3ece9bf 100644
--- a/PuReMD/src/forces.c
+++ b/PuReMD/src/forces.c
@@ -378,6 +378,7 @@ void Init_Forces( reax_system *system, control_params *control,
 #endif
         sbp_i = &(system->reax_param.sbp[type_i]);
 
+        //TODO: edit this part to include NT atoms
         if ( i < system->n )
         {
             local = 1;
diff --git a/PuReMD/src/linear_solvers.c b/PuReMD/src/linear_solvers.c
index 2d09f7daf4f570b4d52e7641cabc558dc3361d92..7c881c1b4497531dd7e96c39a82904f0309a552d 100644
--- a/PuReMD/src/linear_solvers.c
+++ b/PuReMD/src/linear_solvers.c
@@ -1266,7 +1266,6 @@ int CG( reax_system *system, control_params *control, simulation_data *data,
 
         t_start = Get_Time( );
         tmp = Parallel_Dot(workspace->d, workspace->q, system->n, mpi_data->world);
-        //TODO: all_Reduce time
         t_allreduce += Get_Timing_Info ( t_start );
 
         t_start = Get_Time( );
@@ -1300,7 +1299,6 @@ int CG( reax_system *system, control_params *control, simulation_data *data,
         t_start = Get_Time( );
         sig_old = sig_new;
         sig_new = Parallel_Dot(workspace->r, workspace->p, system->n, mpi_data->world);
-        //TODO all_reduce time
         t_allreduce += Get_Timing_Info( t_start );
 
         t_start = Get_Time( );
diff --git a/PuReMD/src/reax_defs.h b/PuReMD/src/reax_defs.h
index a61ce245a5fc29c580158f4f425805dd16506c5a..ecee152c938db9b5e33969a07906b7906b781058 100644
--- a/PuReMD/src/reax_defs.h
+++ b/PuReMD/src/reax_defs.h
@@ -94,6 +94,7 @@
 
 #define MASTER_NODE 0
 #define MAX_NBRS 6 //27
+#define MAX_NT_NBRS 6
 #define MYSELF   13  // encoding of relative coordinate (0,0,0)
 
 #define MAX_ITR 10
diff --git a/PuReMD/src/reax_types.h b/PuReMD/src/reax_types.h
index 56a8c1c9841d0f6ea67f74d364277ff9a768f5d3..7e3de437a470f54221824bb27a948401ed546fef 100644
--- a/PuReMD/src/reax_types.h
+++ b/PuReMD/src/reax_types.h
@@ -55,6 +55,7 @@
 #define ZERO                    (0.000000000000000e+00)
 #define REAX_MAX_STR            1024
 #define REAX_MAX_NBRS           6
+#define REAX_MAX_NT_NBRS        6
 #define REAX_MAX_3BODY_PARAM    5
 #define REAX_MAX_4BODY_PARAM    5
 #define REAX_MAX_ATOM_TYPES     25
@@ -205,8 +206,10 @@ typedef struct
     //MPI_Status   recv_stat2[REAX_MAX_NBRS];
 
     mpi_out_data out_buffers[REAX_MAX_NBRS];
+    mpi_out_data out_nt_buffers[REAX_MAX_NT_NBRS];
     void *in1_buffer;
     void *in2_buffer;
+    void *in_nt_buffer[REAX_MAX_NT_NBRS]
 } mpi_datatypes;
 
 
@@ -484,6 +487,7 @@ typedef struct
 typedef struct
 {
     int  rank;
+    int  receive_rank;
     int  est_send, est_recv;
     int  atoms_str, atoms_cnt;
     ivec rltv, prdc;
@@ -525,9 +529,10 @@ typedef struct
     int              n, N, bigN, numH;
     int              local_cap, total_cap, gcell_cap, Hcap;
     int              est_recv, est_trans, max_recved;
-    int              wsize, my_rank, num_nbrs;
+    int              wsize, my_rank, num_nbrs, num_nt_nbrs;
     ivec             my_coords;
     neighbor_proc    my_nbrs[REAX_MAX_NBRS];
+    neighbor_proc    my_nt_nbrs[REAX_MAX_NT_NBRS];
     int             *global_offset;
     simulation_box   big_box, my_box, my_ext_box;
     grid             my_grid;