/*----------------------------------------------------------------------
  PuReMD - Purdue ReaxFF Molecular Dynamics Program
  
  Copyright (2010) Purdue University
  Hasan Metin Aktulga, haktulga@cs.purdue.edu
  Joseph Fogarty, jcfogart@mail.usf.edu
  Sagar Pandit, pandit@usf.edu
  Ananth Y Grama, ayg@cs.purdue.edu

  This program is free software; you can redistribute it and/or
  modify it under the terms of the GNU General Public License as
  published by the Free Software Foundation; either version 2 of 
  the License, or (at your option) any later version.
  
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
  See the GNU General Public License for more details:
  <http://www.gnu.org/licenses/>.
  ----------------------------------------------------------------------*/

#include "comm_tools.h"
#include "grid.h"
#include "reset_tools.h"
#include "tool_box.h"
#include "vector.h"


void Setup_Comm( reax_system* system, control_params* control, 
		 mpi_datatypes *mpi_data )
{
  int i, d;
  real bndry_cut;
  neighbor_proc *nbr_pr;
  simulation_box *my_box;
  ivec nbr_coords;
  ivec r[6] = {{-1, 0, 0}, {+1, 0, 0}, // -x, +x 
	       {0, -1, 0}, {0, +1, 0}, // -y, +y
	       {0, 0, -1}, {0, 0, +1}};// -z, +z
  my_box = &(system->my_box);
  bndry_cut = system->bndry_cuts.ghost_cutoff;

  /* identify my neighbors */
  system->num_nbrs = MAX_NBRS;
  for( i = 0; i < system->num_nbrs; ++i ) {
    ivec_Sum( nbr_coords, system->my_coords, r[i] ); /* actual nbr coords */
    nbr_pr = &(system->my_nbrs[i]);
    ivec_Copy( nbr_pr->rltv, r[i] );
    MPI_Cart_rank( mpi_data->comm_mesh3D, nbr_coords, &(nbr_pr->rank) );

    for( d = 0; d < 3; ++d ) {
      /* determine the boundary area with this nbr */
      if( r[i][d] < 0 ) {
	nbr_pr->bndry_min[d] = my_box->min[d];
	nbr_pr->bndry_max[d] = my_box->min[d] + bndry_cut;
      }
      else if( r[i][d] > 0 ) {
	nbr_pr->bndry_min[d] = my_box->max[d] - bndry_cut;
	nbr_pr->bndry_max[d] = my_box->max[d];
      }
      else{
	nbr_pr->bndry_min[d] = nbr_pr->bndry_max[d] = NEG_INF;
      }
      
      /* determine if it is a periodic neighbor */
      if( nbr_coords[d] < 0 )
	nbr_pr->prdc[d] = -1;
      else if( nbr_coords[d] >= control->procs_by_dim[d] )
	nbr_pr->prdc[d] = +1;
      else
	nbr_pr->prdc[d] = 0;
    }

#if defined(DEBUG_FOCUS)
    fprintf( stderr, "p%d-nbr%d: r[%2d %2d %2d] -> c[%2d %2d %2d] -> rank=%d\n",
	     system->my_rank, i, r[i][0], r[i][1], r[i][2], 
	     nbr_coords[0], nbr_coords[1], nbr_coords[2], nbr_pr->rank );
#endif
  }
}


void Update_Comm( reax_system* system )
{
  int i, d;
  real bndry_cut;
  neighbor_proc *nbr_pr;
  simulation_box *my_box;
  ivec r[6] = {{-1, 0, 0}, {+1, 0, 0}, // -x, +x 
	       {0, -1, 0}, {0, +1, 0}, // -y, +y
	       {0, 0, -1}, {0, 0, +1}};// -z, +z
  my_box = &(system->my_box);
  bndry_cut = system->bndry_cuts.ghost_cutoff;
  
  /* identify my neighbors */
  for( i = 0; i < system->num_nbrs; ++i ) {
    nbr_pr = &(system->my_nbrs[i]);
    
    for( d = 0; d < 3; ++d )
      /* determine the boundary area with this nbr */
      if( r[i][d] < 0 ) {
	nbr_pr->bndry_min[d] = my_box->min[d];
	nbr_pr->bndry_max[d] = my_box->min[d] + bndry_cut;
      }
      else if( r[i][d] > 0 ) {
	nbr_pr->bndry_min[d] = my_box->max[d] - bndry_cut;
	nbr_pr->bndry_max[d] = my_box->max[d];
      }
      else{
	nbr_pr->bndry_min[d] = nbr_pr->bndry_max[d] = NEG_INF;
      }
  }
}


/********************* ATOM TRANSFER ***********************/

/***************** PACK & UNPACK ATOMS *********************/
void Pack_MPI_Atom( mpi_atom *matm, reax_atom *ratm, int i )
{
  matm->orig_id  = ratm->orig_id;
  matm->imprt_id = i;
  matm->type     = ratm->type;
  matm->num_bonds = ratm->num_bonds;
  matm->num_hbonds = ratm->num_hbonds;
  strcpy( matm->name, ratm->name );
  rvec_Copy( matm->x, ratm->x );
  rvec_Copy( matm->v, ratm->v );
  rvec_Copy( matm->f_old, ratm->f_old );
  memcpy( matm->s, ratm->s, sizeof(rvec4) ); //rvec_Copy( matm->s, ratm->s );
  memcpy( matm->t, ratm->t, sizeof(rvec4) ); //rvec_Copy( matm->t, ratm->t );
}


void Unpack_MPI_Atom( reax_atom *ratm, mpi_atom *matm )
{
  ratm->orig_id  = matm->orig_id;
  ratm->imprt_id = matm->imprt_id;
  ratm->type     = matm->type;
  ratm->num_bonds = matm->num_bonds;
  ratm->num_hbonds = matm->num_hbonds;
  strcpy( ratm->name, matm->name );
  rvec_Copy( ratm->x, matm->x );
  rvec_Copy( ratm->v, matm->v );
  rvec_Copy( ratm->f_old, matm->f_old );
  memcpy( ratm->s, matm->s, sizeof(rvec4) ); //rvec_Copy( ratm->s, matm->s );
  memcpy( ratm->t, matm->t, sizeof(rvec4) ); //rvec_Copy( ratm->t, matm->t );
}


/*********************** SORTER **************************/
void Sort_Transfer_Atoms( reax_system *system, int start, int end,
			  int dim, mpi_out_data *out_bufs ) 
{
  int i, d, out_cnt;
  reax_atom *atoms;
  simulation_box *my_box;
  mpi_atom *out_buf;

#if defined(DEBUG)
  fprintf( stderr, "p%d sort_transfers: start=%d end=%d dim=%d starting...\n", 
	   system->my_rank, start, end, dim );
#endif  
  atoms = system->my_atoms;
  my_box = &( system->my_box );

  /* place each atom into the appropriate outgoing list */
  for( i = start; i < end; ++i ) {
    for( d = dim; d < 3; ++d )
      if( atoms[i].x[d] < my_box->min[d] ) {
	out_cnt = out_bufs[2*d].cnt++;
	out_buf = (mpi_atom *)out_bufs[2*d].out_atoms;
	Pack_MPI_Atom( out_buf + out_cnt, atoms + i, i );
	atoms[i].orig_id = -1;
	break;
      }
      else if( atoms[i].x[d] >= my_box->max[d] ) {
	out_cnt = out_bufs[2*d+1].cnt++;
	out_buf = (mpi_atom *)out_bufs[2*d+1].out_atoms;
	Pack_MPI_Atom( out_buf + out_cnt, atoms + i, i );
	atoms[i].orig_id = -1;
	break;
      }
  }

#if defined(DEBUG_FOCUS)
  for( d = 2*dim; d < 2*dim+2; ++d )
    if( out_bufs[d].cnt ) {
      fprintf( stderr, "p%d to p%d(nbr%d) # of transfers = %d\n", 
	       system->my_rank, system->my_nbrs[d].rank, d, out_bufs[d].cnt );
      out_buf = out_bufs[d].out_atoms;
      for( i = 0; i < out_bufs[d].cnt; ++i )
	fprintf( stderr, "p%d to p%d: transfer atom%d [%.3f %.3f %.3f]\n",
		 system->my_rank, system->my_nbrs[d].rank, out_buf[i].imprt_id, 
		 out_buf[i].x[0],  out_buf[i].x[1],  out_buf[i].x[2] );
    }
  //fprintf( stderr, "p%d sort_transfers: start=%d end=%d dim=%d done!\n", 
  //   system->my_rank, start, end, dim );
#endif    
}


/*********************** UNPACKER **************************/
void Unpack_Transfer_Message( reax_system *system, int end, void *dummy, 
			      int cnt, neighbor_proc *nbr, int dim )
{
  int i;
  real dx;
  reax_atom *dest;
  mpi_atom* src = (mpi_atom*) dummy;

  dest = system->my_atoms + end;
  for( i = 0; i < cnt; ++i )
    Unpack_MPI_Atom( dest + i, src + i );

  /* adjust coordinates of recved atoms if nbr is a periodic one */
  if( nbr->prdc[dim] ) {
    dx = nbr->prdc[dim] * system->big_box.box_norms[dim];
    for( i = 0; i < cnt; ++i )
      dest[i].x[dim] += dx;
  }
}


/************** EXCHANGE BOUNDARY ATOMS *****************/

/************ PACK & UNPACK BOUNDARY ATOMS **************/
void Pack_Boundary_Atom( boundary_atom *matm, reax_atom *ratm, int i )
{
  matm->orig_id  = ratm->orig_id;
  matm->imprt_id = i;
  matm->type     = ratm->type;
  matm->num_bonds = ratm->num_bonds;
  matm->num_hbonds = ratm->num_hbonds;
  rvec_Copy( matm->x, ratm->x );
}


void Unpack_Boundary_Atom( reax_atom *ratm, boundary_atom *matm )
{
  ratm->orig_id = matm->orig_id;
  ratm->imprt_id = matm->imprt_id;
  ratm->type = matm->type;
  ratm->num_bonds = matm->num_bonds;
  ratm->num_hbonds = matm->num_hbonds;
  //ratm->renumber = offset + matm->imprt_id;
  rvec_Copy( ratm->x, matm->x );
}


/*********************** SORTER **************************/
void Sort_Boundary_Atoms( reax_system *system, int start, int end,
			  int dim, mpi_out_data *out_bufs ) 
{
  int i, d, p, out_cnt;
  reax_atom *atoms;
  simulation_box *my_box;
  boundary_atom *out_buf;
  neighbor_proc *nbr_pr;

#if defined(DEBUG_FOCUS)
  fprintf( stderr, "p%d sort_exchange: start=%d end=%d dim=%d starting...\n", 
	   system->my_rank, start, end, dim );
#endif 
  atoms = system->my_atoms;
  my_box = &( system->my_box ); 

  /* place each atom into the appropriate outgoing list */
  for( i = start; i < end; ++i ) 
    for( d = dim; d < 3; ++d )
      for( p = 2*d; p < 2*d+2; ++p ) {
	nbr_pr = &( system->my_nbrs[p] );
	if( nbr_pr->bndry_min[d] <= atoms[i].x[d] && 
	    atoms[i].x[d] < nbr_pr->bndry_max[d] ) {
	  out_cnt = out_bufs[p].cnt++;
	  out_bufs[p].index[out_cnt] = i;
	  out_buf = (boundary_atom *)out_bufs[p].out_atoms;
	  Pack_Boundary_Atom( out_buf + out_cnt, atoms + i, i );
	}
      }

#if defined(DEBUG_FOCUS)
  for( i = 2*dim; i < 2*dim+2; ++i )
    fprintf( stderr, "p%d to p%d(nbr%d) # of exchanges to send = %d\n", 
	     system->my_rank, system->my_nbrs[i].rank, i,
	     out_bufs[i].cnt );
  fprintf( stderr, "p%d sort_exchange: start=%d end=%d dim=%d done!\n", 
	   system->my_rank, start, end, dim );
#endif    
}


void Estimate_Boundary_Atoms( reax_system *system, int start, int end,
			      int d, mpi_out_data *out_bufs ) 
{
  int i, p, out_cnt;
  reax_atom *atoms;
  simulation_box *my_box;
  boundary_atom *out_buf;
  neighbor_proc *nbr1, *nbr2, *nbr_pr;
  
#if defined(DEBUG_FOCUS)
  fprintf( stderr, "p%d estimate_exchange: start=%d end=%d dim=%d starting.\n", 
	   system->my_rank, start, end, d );
#endif
  atoms = system->my_atoms;
  my_box = &( system->my_box );
  nbr1 = &(system->my_nbrs[2*d]);
  nbr2 = &(system->my_nbrs[2*d+1]);
  nbr1->est_send = 0;
  nbr2->est_send = 0; 

  /* count the number of atoms in each processor's outgoing list */
  for( i = 0; i < end; ++i ) {
    if( nbr1->bndry_min[d]<=atoms[i].x[d] && atoms[i].x[d]<nbr1->bndry_max[d] )
      nbr1->est_send++;
    if( nbr2->bndry_min[d]<=atoms[i].x[d] && atoms[i].x[d]<nbr2->bndry_max[d] )
      nbr2->est_send++;
  }
  
  /* estimate the space based on the count above */
  nbr1->est_send = MAX( MIN_SEND, nbr1->est_send * SAFER_ZONE );
  nbr2->est_send = MAX( MIN_SEND, nbr2->est_send * SAFER_ZONE );
#if defined(DEBUG_FOCUS)
  fprintf( stderr, "p%d estimate_exchange: end=%d dim=%d est1=%d est2=%d\n", 
	   system->my_rank, end, d, nbr1->est_send, nbr2->est_send );
#endif

  /* allocate the estimated space */
  for( p = 2*d; p < 2*d + 2; ++p ) {
    nbr_pr = &( system->my_nbrs[p] );
    out_bufs[p].index = (int*) 
      scalloc( nbr_pr->est_send, sizeof(int), "mpibuf:index" );
    out_bufs[p].out_atoms = (void*) 
      scalloc( nbr_pr->est_send, sizeof(boundary_atom), "mpibuf:out_atoms" );
  }

  /* sort the atoms to their outgoing buffers */
  for( i = 0; i < end; ++i ) 
    for( p = 2*d; p < 2*d+2; ++p ) {
      nbr_pr = &( system->my_nbrs[p] );
      if( nbr_pr->bndry_min[d] <= atoms[i].x[d] && 
	  atoms[i].x[d] < nbr_pr->bndry_max[d] ) {
	out_cnt = out_bufs[p].cnt++;
	out_bufs[p].index[out_cnt] = i;
	out_buf = (boundary_atom *)out_bufs[p].out_atoms;
	Pack_Boundary_Atom( out_buf + out_cnt, atoms + i, i );
      }
    }

#if defined(DEBUG_FOCUS)
  fprintf( stderr, "p%d estimate_exchange: end=%d dim=%d done!\n", 
	   system->my_rank, end, d );
#endif
}


void Estimate_Init_Storage( int me, neighbor_proc *nbr1, neighbor_proc *nbr2, 
			    int d, int *max, int *nrecv, 
			    void **in1, void **in2, MPI_Comm comm )
{
  MPI_Request req1, req2;
  MPI_Status stat1, stat2;
  int new_max;

  /* first exchange the estimates, then allocate buffers */
  MPI_Irecv( &nbr1->est_recv, 1, MPI_INT, nbr1->rank, 2*d+1, comm, &req1 );
  MPI_Irecv( &nbr2->est_recv, 1, MPI_INT, nbr2->rank, 2*d, comm, &req2 );
  MPI_Send( &nbr1->est_send, 1, MPI_INT, nbr1->rank, 2*d, comm );
  MPI_Send( &nbr2->est_send, 1, MPI_INT, nbr2->rank, 2*d+1, comm );
  MPI_Wait( &req1, &stat1 );
  MPI_Wait( &req2, &stat2 );
  nrecv[2*d] = nbr1->est_recv;
  nrecv[2*d+1] = nbr2->est_recv;
  new_max = MAX( nbr1->est_recv, nbr2->est_recv );
#if defined(DEBUG_FOCUS)
  fprintf( stderr, "p%d-p%d(nbr%d) est_send=%d est_recv=%d\n",
	   me, nbr1->rank, 2*d, nbr1->est_send, nbr1->est_recv );
  fprintf( stderr, "p%d-p%d(nbr%d) est_send=%d est_recv=%d\n",
	   me, nbr2->rank, 2*d+1, nbr2->est_send, nbr2->est_recv );
  fprintf( stderr, "max=%d  new_max=%d\n", *max, new_max );
#endif
  
  if( new_max > *max ) {
    *max = new_max;
    if(*in1) sfree( *in1, "in1" );
    if(*in2) sfree( *in2, "in2" );
    *in1 = (void *) smalloc( new_max * sizeof(boundary_atom), "in1" );
    *in2 = (void *) smalloc( new_max * sizeof(boundary_atom), "in2" ); 
  }
}


/*********************** UNPACKER **************************/
void Unpack_Exchange_Message( reax_system *system, int end, void *dummy, 
			      int cnt, neighbor_proc *nbr, int dim )
{
  int i;
  real dx;
  reax_atom *dest;
  boundary_atom* src = (boundary_atom*) dummy;

  dest = system->my_atoms + end;
  for( i = 0; i < cnt; ++i )
    Unpack_Boundary_Atom( dest + i, src + i );

  /* record the atoms recv'd from this nbr */
  nbr->atoms_str = end;
  nbr->atoms_cnt = cnt;
  /* update est_recv */
  nbr->est_recv = MAX( cnt*SAFER_ZONE, MIN_SEND );

  /* update max_recv to make sure that we reallocate at the right time */
  if( cnt > system->max_recved )
    system->max_recved = cnt;

  /* adjust coordinates of recved atoms if nbr is a periodic one */
  if( nbr->prdc[dim] ) {
    dx = nbr->prdc[dim] * system->big_box.box_norms[dim];
    for( i = 0; i < cnt; ++i )
      dest[i].x[dim] += dx;
  }
}


void Unpack_Estimate_Message( reax_system *system, int end, void *dummy, 
			      int cnt, neighbor_proc *nbr, int dim )
{
#if defined(DEBUG_FOCUS)
  fprintf( stderr, "p%d-p%d unpack_estimate: end=%d cnt=%d - unpacking\n", 
	   system->my_rank, nbr->rank, end, cnt );
#endif

  system->my_atoms = (reax_atom*) 
    realloc( system->my_atoms, (end+cnt) * sizeof(reax_atom) );
  
  Unpack_Exchange_Message( system, end, dummy, cnt, nbr, dim );

#if defined(DEBUG_FOCUS)
  fprintf( stderr, "p%d-p%d unpack_estimate: end=%d cnt=%d - done\n", 
	   system->my_rank, nbr->rank, end, cnt );
#endif
}


/**************** UPDATE ATOM POSITIONS *******************/

/**************** PACK POSITION UPDATES *******************/
void Sort_Position_Updates( reax_system *system, int start, int end,
			    int dim, mpi_out_data *out_bufs ) 
{
  int i, p;
  reax_atom *atoms;
  rvec *out;

  atoms = system->my_atoms;

  for( p = 2*dim; p < 2*dim + 2; ++p ) {
    out = (rvec*) out_bufs[p].out_atoms;    
    for( i = 0; i < out_bufs[p].cnt; ++i ) 
      memcpy( out[i], atoms[ out_bufs[p].index[i] ].x, sizeof(rvec) );
  }
}

/*************** UNPACK POSITION UPDATES ******************/
void Unpack_Position_Updates( reax_system *system, int end, void *dummy, 
			      int cnt, neighbor_proc *nbr, int dim )
{
  int i, start;
  reax_atom *atoms;
  real dx;
  rvec* src = (rvec*) dummy;

  atoms = system->my_atoms;
  start = nbr->atoms_str;

  for( i = 0; i < cnt; ++i )
    memcpy( atoms[start+i].x, src[i], sizeof(rvec) );

  /* adjust coordinates of recved atoms if nbr is a periodic one */
  if( nbr->prdc[dim] ) {
    dx = nbr->prdc[dim] * system->big_box.box_norms[dim];
    for( i = 0; i < cnt; ++i )
      atoms[start+i].x[dim] += dx;
  }
}


int SendRecv( reax_system* system, mpi_datatypes *mpi_data, MPI_Datatype type, 
	      int* nrecv, message_sorter sort_func, unpacker unpack, int clr )
{
  int d, cnt, start, end, max, est_flag;
  mpi_out_data *out_bufs;
  void *in1, *in2; 
  MPI_Comm comm;
  MPI_Request req1, req2;
  MPI_Status stat1, stat2;
  neighbor_proc *nbr1, *nbr2;

#if defined(DEBUG_FOCUS)
  fprintf( stderr, "p%d sendrecv: entered\n", system->my_rank );
#endif
  if( clr ) Reset_Out_Buffers( mpi_data->out_buffers, system->num_nbrs );
  comm = mpi_data->comm_mesh3D;
  in1 = mpi_data->in1_buffer;
  in2 = mpi_data->in2_buffer;
  out_bufs = mpi_data->out_buffers;
  start = 0;
  end = system->n;
  max = 0;
  est_flag = (mpi_data->in1_buffer == NULL) || (mpi_data->in2_buffer == NULL);

  for( d = 0; d < 3; ++d ) {
    sort_func( system, start, end, d, out_bufs );
    start = end;
    nbr1 = &(system->my_nbrs[2*d]);
    nbr2 = &(system->my_nbrs[2*d+1]);

    /* for estimates in1_buffer & in2_buffer will be NULL */
    if( est_flag )
      Estimate_Init_Storage( system->my_rank, nbr1, nbr2, d, 
			     &max, nrecv, &in1, &in2, comm );

    /* initiate recvs */
    MPI_Irecv( in1, nrecv[2*d], type, nbr1->rank, 2*d+1, comm, &req1 );
    MPI_Irecv( in2, nrecv[2*d+1], type, nbr2->rank, 2*d, comm, &req2 );

    /* send both messages in dimension d */
    MPI_Send( out_bufs[2*d].out_atoms, out_bufs[2*d].cnt, type, 
	      nbr1->rank, 2*d, comm );
    MPI_Send( out_bufs[2*d+1].out_atoms, out_bufs[2*d+1].cnt, type, 
	      nbr2->rank, 2*d+1, comm );
    
    /* recv and unpack atoms from nbr1 in dimension d */
    MPI_Wait( &req1, &stat1 );
    MPI_Get_count( &stat1, type, &cnt );
    unpack( system, end, in1, cnt, nbr1, d );
    end += cnt;
        
    /* recv and unpack atoms from nbr2 in dimension d */
    MPI_Wait( &req2, &stat2 );
    MPI_Get_count( &stat2, type, &cnt );
    unpack( system, end, in2, cnt, nbr2, d );
    end += cnt;
  }

  if( est_flag ) {
    system->est_recv = max;
    system->est_trans = (max * sizeof(boundary_atom)) / sizeof(mpi_atom);
    mpi_data->in1_buffer = in1;
    mpi_data->in2_buffer = in2;
  }

  return end;
}


void Comm_Atoms( reax_system *system, control_params *control, 
		 simulation_data *data, storage *workspace, reax_list **lists, 
		 mpi_datatypes *mpi_data, int renbr )
{
  int i;
  int nrecv[MAX_NBRS];
#if defined(LOG_PERFORMANCE)
  real t_start=0, t_elapsed=0;
  
  if( system->my_rank == MASTER_NODE )
    t_start = Get_Time( );
#endif

  if( renbr ) {
    for( i = 0; i < MAX_NBRS; ++i ) nrecv[i] = system->est_trans;
    system->n = SendRecv( system, mpi_data, mpi_data->mpi_atom_type, nrecv,
			  Sort_Transfer_Atoms, Unpack_Transfer_Message, 1 );
    Bin_My_Atoms( system, &(workspace->realloc) );
    Reorder_My_Atoms( system, workspace );
#if defined(DEBUG_FOCUS)
    fprintf( stderr, "p%d updated local atoms, n=%d\n", 
	     system->my_rank, system->n );
    MPI_Barrier( MPI_COMM_WORLD );
#endif
    
    for( i = 0; i < MAX_NBRS; ++i ) nrecv[i] = system->my_nbrs[i].est_recv;
    system->N = SendRecv(system, mpi_data, mpi_data->boundary_atom_type, nrecv,
			 Sort_Boundary_Atoms, Unpack_Exchange_Message, 1);
#if defined(DEBUG_FOCUS)  
    fprintf( stderr, "p%d: exchanged boundary atoms, N=%d\n", 
	     system->my_rank, system->N );
    for( i = 0; i < MAX_NBRS; ++i )
      fprintf( stderr, "p%d: nbr%d(p%d) str=%d cnt=%d end=%d\n",
	       system->my_rank, i, system->my_nbrs[i].rank, 
	       system->my_nbrs[i].atoms_str,  system->my_nbrs[i].atoms_cnt, 
	       system->my_nbrs[i].atoms_str + system->my_nbrs[i].atoms_cnt );
    MPI_Barrier( MPI_COMM_WORLD );
#endif
    Bin_Boundary_Atoms( system );
  }
  else{
    for( i = 0; i < MAX_NBRS; ++i ) nrecv[i] = system->my_nbrs[i].atoms_cnt;
    SendRecv( system, mpi_data, mpi_data->mpi_rvec, nrecv, 
	      Sort_Position_Updates, Unpack_Position_Updates, 0 );
#if defined(DEBUG_FOCUS)  
    fprintf( stderr, "p%d: updated positions\n", system->my_rank );
    MPI_Barrier( MPI_COMM_WORLD );
#endif
  }
  
#if defined(LOG_PERFORMANCE)
  if( system->my_rank == MASTER_NODE ) {
    t_elapsed = Get_Timing_Info( t_start );
    data->timing.comm += t_elapsed;
  }
#endif
#if defined(DEBUG_FOCUS) 
  fprintf(stderr, "p%d @ renbr=%d: comm_atoms done\n", system->my_rank, renbr);
  fprintf( stderr, "p%d: system->n = %d, system->N = %d\n",
	   system->my_rank, system->n, system->N );
  //Print_My_Ext_Atoms( system );
  //Print_All_GCells( system );
  //MPI_Barrier( MPI_COMM_WORLD );
#endif
}