/*----------------------------------------------------------------------
 *   PuReMD - Purdue ReaxFF Molecular Dynamics Program
 *
 *   Copyright (2010) Purdue University
 *   Hasan Metin Aktulga, haktulga@cs.purdue.edu
 *   Joseph Fogarty, jcfogart@mail.usf.edu
 *   Sagar Pandit, pandit@usf.edu
 *   Ananth Y Grama, ayg@cs.purdue.edu
 *
 *   This program is free software; you can redistribute it and/or
 *   modify it under the terms of the GNU General Public License as
 *   published by the Free Software Foundation; either version 2 of
 *   the License, or (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *   See the GNU General Public License for more details:
 *   <http://www.gnu.org/licenses/>.
 *----------------------------------------------------------------------*/

#ifndef __CUDA_SHUFFLE_H_
#define __CUDA_SHUFFLE_H_

#include "reax_types.h"
#include "reax_types.h"


#ifdef __cplusplus
extern "C"  {
#endif

#if defined(__SM_35__)

/* Part of the code is taken from this site.
 * And the other is taken from the download in the PGPuReMD folder on CUPID
 * http://wenda.baba.io/questions/4481817/overloading-the-cuda-shuffle-function-makes-the-original-ones-invisible.html
 */
CUDA_DEVICE inline real shfl(real x, int lane)
{
    // Split the double number into 2 32b registers.
    int lo, hi;
    asm volatile( "mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "d"(x) );

    // Shuffle the two 32b registers.
    lo = __shfl_xor( lo, lane );
    hi = __shfl_xor( hi, lane );

    // Recreate the 64b number.
    //asm volatile( "mov.b64 %0, {%1,%2};" : "=d(x)" : "r"(lo), "r"(hi) );
    //return x;
    return __hiloint2double( hi, lo );
}

#endif

#ifdef __cplusplus
}
#endif


#endif