#ifndef ATOMIC_HPP_INCLUDED
#define ATOMIC_HPP_INCLUDED

// Copyright (c) 2007 Peter Dimov
//
// Distributed under the Boost Software License, Version 1.0.
// http://www.boost.org/LICENSE_1_0.txt

#include <stddef.h>

enum _Relaxed { __relaxed };
enum _Acquire { __acquire };
enum _Release { __release };
enum _Acq_Rel { __acq_rel };
enum _Ordered { __ordered };

extern "C" void _ReadWriteBarrier();
#pragma intrinsic( _ReadWriteBarrier )

extern "C"
{

long  __cdecl _InterlockedIncrement( long volatile * Addend );
long  __cdecl _InterlockedDecrement( long volatile * Addend );
long  __cdecl _InterlockedCompareExchange( long volatile * Dest, long Exchange, long Comp );
long  __cdecl _InterlockedExchange( long volatile * Target, long Value );
long  __cdecl _InterlockedExchangeAdd( long volatile * Addend, long Value );

} // extern "C"

#pragma intrinsic( _InterlockedCompareExchange )
#pragma intrinsic( _InterlockedExchange )
#pragma intrinsic( _InterlockedExchangeAdd )
#pragma intrinsic( _InterlockedIncrement )
#pragma intrinsic( _InterlockedDecrement )


#include <boost/cstdint.hpp>

template< int N > struct _Size;

template<> struct _Size< 1 >
{
    typedef boost::int8_t type;
};

template<> struct _Size< 2 >
{
    typedef boost::int16_t type;
};

template<> struct _Size< 4 >
{
    typedef boost::int32_t type;
};

template<> struct _Size< 8 >
{
    typedef boost::int64_t type;
};

inline _Size< 4 >::type __atomic_load( _Relaxed, _Size< 4 >, void const volatile * p )
{
    return *static_cast< _Size< 4 >::type const volatile * >( p );
}

inline _Size< 4 >::type __atomic_load( _Acquire, _Size< 4 >, void const volatile * p )
{
    _Size< 4 >::type r = *static_cast< _Size< 4 >::type const volatile * >( p );
    _ReadWriteBarrier();
    return r;
}

template< class Cn, class T > inline T atomic_load( Cn, T const * p ) // relaxed, acquire, ordered
{
    typedef _Size< sizeof(T) > __size;
    typedef typename __size::type type;

    type r = __atomic_load( Cn(), __size(), p );

    return (T&)r;
}

template< class Cn, class T > inline T atomic_load( Cn, T const volatile * p )
{
    typedef _Size< sizeof(T) > __size;
    typedef typename __size::type type;

    type r = __atomic_load( Cn(), __size(), p );

    return (T&)r;
}

template< class T > inline T atomic_load( _Ordered, T const * p )
{
    T r = T();

    atomic_compare_swap( __ordered, const_cast< T * >( p ), &r, r );

    return r;
}

template< class T > inline T atomic_load( _Ordered, T volatile const * p )
{
    T r = T();

    atomic_compare_swap( __ordered, const_cast< T volatile * >( p ), &r, r );

    return r;
}

template< class T > inline T * atomic_load_address( T * const * p )
{
    return atomic_load( __relaxed, p ); // __acquire on Alpha
}

template< class T > inline T * atomic_load_address( T * const volatile * p )
{
    return atomic_load( __relaxed, p ); // __acquire on Alpha
}

inline void __atomic_store( _Relaxed, _Size< 4 >, void volatile * p, void const * pv )
{
    *static_cast< _Size< 4 >::type volatile * >( p ) = *static_cast< _Size< 4 >::type const * >( pv );
}

inline void __atomic_store( _Release, _Size< 4 >, void volatile * p, void const * pv )
{
    _ReadWriteBarrier();
    *static_cast< _Size< 4 >::type volatile * >( p ) = *static_cast< _Size< 4 >::type const * >( pv );
}

template< class Cn, class T > inline void atomic_store( Cn, T * p, T v ) // relaxed, release, ordered
{
    typedef _Size< sizeof(T) > __size;
    typedef typename __size::type type;

    __atomic_store( Cn(), __size(), p, &v );
}

template< class Cn, class T > inline void atomic_store( Cn, T volatile * p, T v )
{
    typedef _Size< sizeof(T) > __size;
    typedef typename __size::type type;

    __atomic_store( Cn(), __size(), p, &v );
}

template< class T > inline void atomic_store( _Ordered, T * p, T v )
{
    atomic_swap( __ordered, p, v );
}

template< class T > inline void atomic_store( _Ordered, T volatile * p, T v )
{
    atomic_swap( __ordered, p, v );
}

inline void __prefix_fence( _Relaxed )
{
}

inline void __prefix_fence( _Acquire )
{
}

inline void __prefix_fence( _Release )
{
    _ReadWriteBarrier();
}

inline void __prefix_fence( _Acq_Rel )
{
    _ReadWriteBarrier();
}

inline void __prefix_fence( _Ordered )
{
    _ReadWriteBarrier();
}

inline void __suffix_fence( _Relaxed )
{
}

inline void __suffix_fence( _Acquire )
{
    _ReadWriteBarrier();
}

inline void __suffix_fence( _Release )
{
}

inline void __suffix_fence( _Acq_Rel )
{
    _ReadWriteBarrier();
}

inline void __suffix_fence( _Ordered )
{
    _ReadWriteBarrier();
}

template< class Cn > inline _Size< 4 >::type __atomic_swap( Cn, _Size< 4 >, void volatile * p, void const * pv )
{
    // static_assert( sizeof( long ) == 4 );

    __prefix_fence( Cn() );

    long r = _InterlockedExchange( static_cast< long volatile * >( p ), *static_cast< long const * >( pv ) );

    __suffix_fence( Cn() );

    return r;
}

template< class Cn, class T > inline T atomic_swap( Cn, T * p, T v )
{
    typedef _Size< sizeof(T) > __size;
    typedef typename __size::type type;

    type r = __atomic_swap( Cn(), __size(), p, &v );
    return (T&)r;
}

template< class Cn, class T > inline T atomic_swap( Cn, T volatile * p, T v )
{
    typedef _Size< sizeof(T) > __size;
    typedef typename __size::type type;

    type r = __atomic_swap( Cn(), __size(), p, &v );
    return (T&)r;
}

template< class Cn > inline bool __atomic_compare_swap( Cn, _Size< 4 >, void volatile * p, void * pv, void const * pw )
{
    // static_assert( sizeof( long ) == 4 );

    long v = *static_cast< long * >( pv );
    long w = *static_cast< long const * >( pw );

    __prefix_fence( Cn() );

    long r = _InterlockedCompareExchange( static_cast< long volatile * >( p ), w, v );

    __suffix_fence( Cn() );

    *static_cast< long * >( pv ) = r;

    return r == v;
}

template< class Cn, class T > inline bool atomic_compare_swap( Cn, T * p, T * v, T w )
{
    typedef _Size< sizeof(T) > __size;
    typedef typename __size::type type;

    return __atomic_compare_swap( Cn(), __size(), p, v, &w );
}

template< class Cn, class T > inline bool atomic_compare_swap( Cn, T volatile * p, T * v, T w )
{
    typedef _Size< sizeof(T) > __size;
    typedef typename __size::type type;

    return __atomic_compare_swap( Cn(), __size(), p, v, &w );
}

template< class Cn > inline _Size< 4 >::type __atomic_fetch_add( Cn, _Size< 4 >, void volatile * p, void const * pv )
{
    // static_assert( sizeof( long ) == 4 );

    __prefix_fence( Cn() );

    long r = _InterlockedExchangeAdd( static_cast< long volatile * >( p ), *static_cast< long const * >( pv ) );

    __suffix_fence( Cn() );

    return r;
}

template< class Cn, class T > inline T atomic_fetch_add( Cn, T * p, T v )
{
    // static_assert( __is_integral(T) );

    typedef _Size< sizeof(T) > __size;
    typedef typename __size::type type;

    type r = __atomic_fetch_add( Cn(), __size(), p, &v );
    return (T&)r;
}

template< class Cn, class T > inline T atomic_fetch_add( Cn, T volatile * p, T v )
{
    // static_assert( __is_integral(T) );

    typedef _Size< sizeof(T) > __size;
    typedef typename __size::type type;

    type r = __atomic_fetch_add( Cn(), __size(), p, &v );
    return (T&)r;
}

template< class Cn, class T > inline T* atomic_fetch_add( Cn, T* * p, ptrdiff_t v )
{
    typedef _Size< sizeof(T) > __size;
    typedef typename __size::type type;

    type v2 = v * sizeof(T);

    type r = __atomic_fetch_add( Cn(), __size(), p, &v2 );
    return (T*&)r;
}

template< class Cn, class T > inline T* atomic_fetch_add( Cn, T* volatile * p, ptrdiff_t v )
{
    typedef _Size< sizeof(T) > __size;
    typedef typename __size::type type;

    type v2 = v * sizeof(T);

    type r = __atomic_fetch_add( Cn(), __size(), p, &v2 );
    return (T*&)r;
}

template< class Cn, class T > inline T atomic_fetch_and( Cn, T * p, T v )
{
    // static_assert( __is_integral(T) );

    T r = *p;

    while( !atomic_compare_swap( Cn(), p, &r, r & v ) );

    return r;
}

template< class Cn, class T > inline T atomic_fetch_and( Cn, T volatile * p, T v )
{
    // static_assert( __is_integral(T) );

    T r = *p;

    while( !atomic_compare_swap( Cn(), p, &r, r & v ) );

    return r;
}

template< class Cn, class T > inline T atomic_fetch_or( Cn, T * p, T v )
{
    // static_assert( __is_integral(T) );

    T r = *p;

    while( !atomic_compare_swap( Cn(), p, &r, r | v ) );

    return r;
}

template< class Cn, class T > inline T atomic_fetch_or( Cn, T volatile * p, T v )
{
    // static_assert( __is_integral(T) );

    T r = *p;

    while( !atomic_compare_swap( Cn(), p, &r, r | v ) );

    return r;
}


template< class Cn, class T > inline T atomic_fetch_xor( Cn, T * p, T v )
{
    // static_assert( __is_integral(T) );

    T r = *p;

    while( !atomic_compare_swap( Cn(), p, &r, r ^ v ) );

    return r;
}

template< class Cn, class T > inline T atomic_fetch_xor( Cn, T volatile * p, T v )
{
    // static_assert( __is_integral(T) );

    T r = *p;

    while( !atomic_compare_swap( Cn(), p, &r, r ^ v ) );

    return r;
}

template< class T > inline void atomic_increment( T * p )
{
    atomic_fetch_add( __relaxed, p, static_cast<T>( 1 ) );
}

template< class T > inline void atomic_increment( T volatile * p )
{
    atomic_fetch_add( __relaxed, p, static_cast<T>( 1 ) );
}

template< class T > inline bool atomic_decrement( T * p )
{
    return atomic_fetch_add( __acq_rel, p, static_cast<T>( -1 ) ) == 1;
}

template< class T > inline bool atomic_decrement( T volatile * p )
{
    return atomic_fetch_add( __acq_rel, p, static_cast<T>( -1 ) ) == 1;
}

//

// template< class Cn > inline void atomic_memory_fence( Cn );

inline void atomic_memory_fence( _Relaxed )
{
}

inline void atomic_memory_fence( _Acquire )
{
    _ReadWriteBarrier();
}

inline void atomic_memory_fence( _Release )
{
    _ReadWriteBarrier();
}

inline void atomic_memory_fence( _Acq_Rel )
{
    _ReadWriteBarrier();
}

inline void atomic_memory_fence( _Ordered )
{
    int tmp;
    atomic_store( _Ordered(), &tmp, 0 );
}

template< class Cn > inline void atomic_compiler_fence( Cn )
{
    _ReadWriteBarrier();
}

inline void atomic_compiler_fence( _Relaxed )
{
}

//

typedef int atomic_spinlock_t;
#define ATOMIC_SPINLOCK_INITIALIZER 0

inline bool atomic_spin_trylock( atomic_spinlock_t * lock )
{
    return atomic_swap( __acquire, lock, 1 ) == 0;
}

inline void atomic_spin_unlock( atomic_spinlock_t * lock )
{
    atomic_store( __release, lock, 0 );
}

#endif // #ifndef ATOMIC_HPP_INCLUDED

