/*
 * $Id: libcpu.h,v 1.1 2013-05-13 18:03:43 vrsieh Exp $
 *
 * Derived from QEMU sources.
 * Modified for FAUmachine by Volkmar Sieh.
 *  
 *  Copyright (c) 2005-2009 FAUmachine Team.
 *  Copyright (c) 2003 Fabrice Bellard.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
 * USA
 */
 
#ifndef __LIBCPU_H_INCLUDED
#define __LIBCPU_H_INCLUDED

#include <inttypes.h>

typedef float float32;
typedef double float64;

#if defined(__arm__) || defined(__sparc__)
#define WORDS_ALIGNED
#endif

/* some important defines: 
 * 
 * WORDS_ALIGNED : if defined, the host cpu can only make word aligned
 * memory accesses.
 * 
 * WORDS_BIGENDIAN : if defined, the host cpu is big endian and
 * otherwise little endian.
 * 
 * (TARGET_WORDS_ALIGNED : same for target cpu (not supported yet))
 * 
 * TARGET_WORDS_BIGENDIAN : same for target cpu
 */

#include "bswap.h"

/* NOTE: arm is horrible as double 32 bit words are stored in big endian ! */
typedef union {
    double d;
#if !defined(WORDS_BIGENDIAN) && !defined(__arm__)
    struct {
        uint32_t lower;
        uint32_t upper;
    } l;
#else
    struct {
        uint32_t upper;
        uint32_t lower;
    } l;
#endif
    uint64_t ll;
} CPU_DoubleU;

#if defined(__alpha__) || defined (__ia64__) || defined(__x86_64__)
#define HOST_LONG_BITS 64
#else
#define HOST_LONG_BITS 32
#endif
#define HOST_LONG_SIZE		(HOST_LONG_BITS / 8)

#if CONFIG_CPU >= 80486 && CONFIG_CPU_LM_SUPPORT
#define TARGET_LONG_BITS	64
#else
#define TARGET_LONG_BITS	32
#endif
#define TARGET_LONG_SIZE	(TARGET_LONG_BITS / 8)

/* target_ulong is the type of a virtual address */
#if TARGET_LONG_SIZE == 4
typedef int32_t target_long;
typedef uint32_t target_ulong;
#elif TARGET_LONG_SIZE == 8
typedef int64_t target_long;
typedef uint64_t target_ulong;
#else
#error TARGET_LONG_SIZE undefined
#endif

/* CPU memory access without any memory or io remapping */

/*
 * the generic syntax for the memory accesses is:
 *
 * load: ld{type}{sign}{size}{endian}_{access_type}(ptr)
 *
 * store: st{type}{size}{endian}_{access_type}(ptr, val)
 *
 * type is:
 * (empty): integer access
 *   f    : float access
 *
 * sign is:
 * (empty): for floats or 32 bit size
 *   u    : unsigned
 *   s    : signed
 *
 * size is:
 *   b: 8 bits
 *   w: 16 bits
 *   l: 32 bits
 *   q: 64 bits
 * 
 * endian is:
 * (empty): target cpu endianness or 8 bit access
 *   r    : reversed target cpu endianness (not implemented yet)
 *   be   : big endian (not implemented yet)
 *   le   : little endian (not implemented yet)
 *
 * access_type is:
 *   raw    : host memory access
 *   user   : user mode access using soft MMU
 *   kernel : kernel mode access using soft MMU
 */
static inline __attribute__((__always_inline__)) int ldub_p(void *ptr)
{
    return *(uint8_t *)ptr;
}

static inline __attribute__((__always_inline__)) int ldsb_p(void *ptr)
{
    return *(int8_t *)ptr;
}

static inline __attribute__((__always_inline__)) void stb_p(void *ptr, int v)
{
    *(uint8_t *)ptr = v;
}
/* NOTE: on arm, putting 2 in /proc/sys/debug/alignment so that the
   kernel handles unaligned load/stores may give better results, but
   it is a system wide setting : bad */
#if defined(WORDS_BIGENDIAN) || defined(WORDS_ALIGNED)

/* conservative code for little endian unaligned accesses */
static inline __attribute__((__always_inline__)) int lduw_le_p(void *ptr)
{
#ifdef __powerpc__
    int val;
    __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (ptr));
    return val;
#else
    uint8_t *p = ptr;
    return p[0] | (p[1] << 8);
#endif
}

static inline __attribute__((__always_inline__)) int ldsw_le_p(void *ptr)
{
#ifdef __powerpc__
    int val;
    __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (ptr));
    return (int16_t)val;
#else
    uint8_t *p = ptr;
    return (int16_t)(p[0] | (p[1] << 8));
#endif
}

static inline __attribute__((__always_inline__)) int ldl_le_p(void *ptr)
{
#ifdef __powerpc__
    int val;
    __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
    return val;
#else
    uint8_t *p = ptr;
    return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
#endif
}

static inline __attribute__((__always_inline__)) uint64_t ldq_le_p(void *ptr)
{
    uint8_t *p = ptr;
    uint32_t v1, v2;
    v1 = ldl_le_p(p);
    v2 = ldl_le_p(p + 4);
    return v1 | ((uint64_t)v2 << 32);
}

static inline __attribute__((__always_inline__)) void stw_le_p(void *ptr, int v)
{
#ifdef __powerpc__
    __asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*(uint16_t *)ptr) : "r" (v),"r" (ptr));
#else
    uint8_t *p = ptr;
    p[0] = v;
    p[1] = v >> 8;
#endif
}

static inline __attribute__((__always_inline__)) void stl_le_p(void *ptr, int v)
{
#ifdef __powerpc__
    __asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*(uint32_t *)ptr) : "r" (v),"r" (ptr));
#else
    uint8_t *p = ptr;
    p[0] = v;
    p[1] = v >> 8;
    p[2] = v >> 16;
    p[3] = v >> 24;
#endif
}

static inline __attribute__((__always_inline__)) void stq_le_p(void *ptr, uint64_t v)
{
    uint8_t *p = ptr;
    stl_le_p(p, (uint32_t)v);
    stl_le_p(p + 4, v >> 32);
}

/* float access */

static inline __attribute__((__always_inline__)) float32 ldfl_le_p(void *ptr)
{
    union {
        float32 f;
        uint32_t i;
    } u;
    u.i = ldl_le_p(ptr);
    return u.f;
}

static inline __attribute__((__always_inline__)) void stfl_le_p(void *ptr, float32 v)
{
    union {
        float32 f;
        uint32_t i;
    } u;
    u.f = v;
    stl_le_p(ptr, u.i);
}
static inline __attribute__((__always_inline__)) float64 ldfq_le_p(void *ptr)
{
    CPU_DoubleU u;
    u.l.lower = ldl_le_p(ptr);
    u.l.upper = ldl_le_p(ptr + 4);
    return u.d;
}

static inline __attribute__((__always_inline__)) void stfq_le_p(void *ptr, float64 v)
{
    CPU_DoubleU u;
    u.d = v;
    stl_le_p(ptr, u.l.lower);
    stl_le_p(ptr + 4, u.l.upper);
}

#else

static inline __attribute__((__always_inline__)) int lduw_le_p(void *ptr)
{
    return *(uint16_t *)ptr;
}

static inline __attribute__((__always_inline__)) int ldsw_le_p(void *ptr)
{
    return *(int16_t *)ptr;
}

static inline __attribute__((__always_inline__)) int ldl_le_p(void *ptr)
{
    return *(uint32_t *)ptr;
}

static inline __attribute__((__always_inline__)) uint64_t ldq_le_p(void *ptr)
{
    return *(uint64_t *)ptr;
}

static inline __attribute__((__always_inline__)) void stw_le_p(void *ptr, int v)
{
    *(uint16_t *)ptr = v;
}

static inline __attribute__((__always_inline__)) void stl_le_p(void *ptr, int v)
{
    *(uint32_t *)ptr = v;
}

static inline __attribute__((__always_inline__)) void stq_le_p(void *ptr, uint64_t v)
{
    *(uint64_t *)ptr = v;
}

/* float access */

static inline __attribute__((__always_inline__)) float32 ldfl_le_p(void *ptr)
{
    return *(float32 *)ptr;
}

static inline __attribute__((__always_inline__)) float64 ldfq_le_p(void *ptr)
{
    return *(float64 *)ptr;
}

static inline __attribute__((__always_inline__)) void stfl_le_p(void *ptr, float32 v)
{
    *(float32 *)ptr = v;
}

static inline __attribute__((__always_inline__)) void stfq_le_p(void *ptr, float64 v)
{
    *(float64 *)ptr = v;
}
#endif

#if !defined(WORDS_BIGENDIAN) || defined(WORDS_ALIGNED)

static inline __attribute__((__always_inline__)) int lduw_be_p(void *ptr)
{
#if defined(__i386__)
    int val;
    asm volatile ("movzwl %1, %0\n"
                  "xchgb %b0, %h0\n"
                  : "=q" (val)
                  : "m" (*(uint16_t *)ptr));
    return val;
#else
    uint8_t *b = (uint8_t *) ptr;
    return ((b[0] << 8) | b[1]);
#endif
}

static inline __attribute__((__always_inline__)) int ldsw_be_p(void *ptr)
{
#if defined(__i386__)
    int val;
    asm volatile ("movzwl %1, %0\n"
                  "xchgb %b0, %h0\n"
                  : "=q" (val)
                  : "m" (*(uint16_t *)ptr));
    return (int16_t)val;
#else
    uint8_t *b = (uint8_t *) ptr;
    return (int16_t)((b[0] << 8) | b[1]);
#endif
}

static inline __attribute__((__always_inline__)) int ldl_be_p(void *ptr)
{
#if defined(__i386__) || defined(__x86_64__)
    int val;
    asm volatile ("movl %1, %0\n"
                  "bswap %0\n"
                  : "=r" (val)
                  : "m" (*(uint32_t *)ptr));
    return val;
#else
    uint8_t *b = (uint8_t *) ptr;
    return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
#endif
}

static inline __attribute__((__always_inline__)) uint64_t ldq_be_p(uint8_t *ptr)
{
    uint32_t a,b;
    a = ldl_be_p(ptr);
    b = ldl_be_p(ptr+4);
    return (((uint64_t)a<<32)|b);
}

static inline __attribute__((__always_inline__)) void stw_be_p(void *ptr, int v)
{
#if defined(__i386__)
    asm volatile ("xchgb %b0, %h0\n"
                  "movw %w0, %1\n"
                  : "=q" (v)
                  : "m" (*(uint16_t *)ptr), "0" (v));
#else
    uint8_t *d = (uint8_t *) ptr;
    d[0] = v >> 8;
    d[1] = v;
#endif
}

static inline __attribute__((__always_inline__)) void stl_be_p(void *ptr, int v)
{
#if defined(__i386__) || defined(__x86_64__)
    asm volatile ("bswap %0\n"
                  "movl %0, %1\n"
                  : "=r" (v)
                  : "m" (*(uint32_t *)ptr), "0" (v));
#else
    uint8_t *d = (uint8_t *) ptr;
    d[0] = v >> 24;
    d[1] = v >> 16;
    d[2] = v >> 8;
    d[3] = v;
#endif
}

static inline __attribute__((__always_inline__)) void stq_be_p(uint8_t *ptr, uint64_t v)
{
    stl_be_p(ptr, v >> 32);
    stl_be_p(ptr + 4, v);
}

/* float access */

static inline __attribute__((__always_inline__)) float32 ldfl_be_p(void *ptr)
{
    union {
        float32 f;
        uint32_t i;
    } u;
    u.i = ldl_be_p(ptr);
    return u.f;
}

static inline __attribute__((__always_inline__)) void stfl_be_p(void *ptr, float32 v)
{
    union {
        float32 f;
        uint32_t i;
    } u;
    u.f = v;
    stl_be_p(ptr, u.i);
}

static inline __attribute__((__always_inline__)) float64 ldfq_be_p(uint8_t *ptr)
{
    CPU_DoubleU u;
    u.l.upper = ldl_be_p(ptr);
    u.l.lower = ldl_be_p(ptr + 4);
    return u.d;
}

static inline __attribute__((__always_inline__)) void stfq_be_p(uint8_t *ptr, float64 v)
{
    CPU_DoubleU u;
    u.d = v;
    stl_be_p(ptr, u.l.upper);
    stl_be_p(ptr + 4, u.l.lower);
}

#else

static inline __attribute__((__always_inline__)) int lduw_be_p(void *ptr)
{
    return *(uint16_t *)ptr;
}

static inline __attribute__((__always_inline__)) int ldsw_be_p(void *ptr)
{
    return *(int16_t *)ptr;
}

static inline __attribute__((__always_inline__)) int ldl_be_p(void *ptr)
{
    return *(uint32_t *)ptr;
}

static inline __attribute__((__always_inline__)) uint64_t ldq_be_p(void *ptr)
{
    return *(uint64_t *)ptr;
}

static inline __attribute__((__always_inline__)) void stw_be_p(void *ptr, int v)
{
    *(uint16_t *)ptr = v;
}

static inline __attribute__((__always_inline__)) void stl_be_p(void *ptr, int v)
{
    *(uint32_t *)ptr = v;
}

static inline __attribute__((__always_inline__)) void stq_be_p(void *ptr, uint64_t v)
{
    *(uint64_t *)ptr = v;
}

/* float access */

static inline __attribute__((__always_inline__)) float32 ldfl_be_p(void *ptr)
{
    return *(float32 *)ptr;
}

static inline __attribute__((__always_inline__)) float64 ldfq_be_p(void *ptr)
{
    return *(float64 *)ptr;
}

static inline __attribute__((__always_inline__)) void stfl_be_p(void *ptr, float32 v)
{
    *(float32 *)ptr = v;
}

static inline __attribute__((__always_inline__)) void stfq_be_p(void *ptr, float64 v)
{
    *(float64 *)ptr = v;
}

#endif

/* target CPU memory access functions */
#if defined(TARGET_WORDS_BIGENDIAN)
#define lduw_p(p) lduw_be_p(p)
#define ldsw_p(p) ldsw_be_p(p)
#define ldl_p(p) ldl_be_p(p)
#define ldq_p(p) ldq_be_p(p)
#define ldfl_p(p) ldfl_be_p(p)
#define ldfq_p(p) ldfq_be_p(p)
#define stw_p(p, v) stw_be_p(p, v)
#define stl_p(p, v) stl_be_p(p, v)
#define stq_p(p, v) stq_be_p(p, v)
#define stfl_p(p, v) stfl_be_p(p, v)
#define stfq_p(p, v) stfq_be_p(p, v)
#else
#define lduw_p(p) lduw_le_p(p)
#define ldsw_p(p) ldsw_le_p(p)
#define ldl_p(p) ldl_le_p(p)
#define ldq_p(p) ldq_le_p(p)
#define ldfl_p(p) ldfl_le_p(p)
#define ldfq_p(p) ldfq_le_p(p)
#define stw_p(p, v) stw_le_p(p, v)
#define stl_p(p, v) stl_le_p(p, v)
#define stq_p(p, v) stq_le_p(p, v)
#define stfl_p(p, v) stfl_le_p(p, v)
#define stfq_p(p, v) stfq_le_p(p, v)
#endif

/* NOTE: we use double casts if pointers and target_ulong have
   different sizes */
#define saddr(x) (x)
#define laddr(x) (x)

#define ldub_raw(p) ldub_p(laddr((p)))
#define ldsb_raw(p) ldsb_p(laddr((p)))
#define lduw_raw(p) lduw_p(laddr((p)))
#define ldsw_raw(p) ldsw_p(laddr((p)))
#define ldl_raw(p) ldl_p(laddr((p)))
#define ldq_raw(p) ldq_p(laddr((p)))
#define ldfl_raw(p) ldfl_p(laddr((p)))
#define ldfq_raw(p) ldfq_p(laddr((p)))
#define stb_raw(p, v) stb_p(saddr((p)), v)
#define stw_raw(p, v) stw_p(saddr((p)), v)
#define stl_raw(p, v) stl_p(saddr((p)), v)
#define stq_raw(p, v) stq_p(saddr((p)), v)
#define stfl_raw(p, v) stfl_p(saddr((p)), v)
#define stfq_raw(p, v) stfq_p(saddr((p)), v)

#endif /* __LIBCPU_H_INCLUDED */
