/*
 * $Id: arch_gen_cpu_x86_fpu_fast.c,v 1.12 2013-04-04 13:12:24 vrsieh Exp $
 *
 * Derived from QEMU sources.
 * Modified for FAUmachine by Volkmar Sieh.
 *
 *  Copyright (c) 2007-2009 FAUmachine Team.
 *  Copyright (c) 2003 Fabrice Bellard
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
 * USA
 */

#if defined(DARWIN) && defined(__x86_64__)

/* FIXME sand:
 * helper functions to work around the problem of
 * macosx x86_64 to patch the address of lrintl;
 * it was not possible to write the correct
 * offset of the trampoline into the copied
 * code blocks
 */

#if defined(USE_X86LDOUBLE)
int32_t
NAME_(helper_float32_to_int32)(float32 x)
{
	long a;

	a = lrint(x);
	if (a != (int32_t) a) {
		a = 1 << 31;
	}

	return a;
}

int64_t
NAME_(helper_float32_to_int64)(float32 x)
{
	return llrintl(x);
}

int32_t
NAME_(helper_float64_to_int32)(float64 x)
{
	long a;

	a = lrint(x);
	if (a != (int32_t) a) {
		a = 1 << 31;
	}

	return a;
}

int64_t
NAME_(helper_float64_to_int64)(float64 x)
{
	return llrintl(x);
}

int32_t
NAME_(helper_floatx_to_int32)(floatx80 x)
{
	long a;

	a = lrintl(x);
	if (a != (int32_t) a) {
		a = 1 << 31;
	}
	return a;
}

int64_t
NAME_(helper_floatx_to_int64)(floatx80 x)
{
	return llrintl(x);
}
#else
int32_t
NAME_(helper_floatx_to_int32)(float64 x)
{
	long a;

	a = lrint(x);
	if (a != (int32_t) a) {
		a = 1 << 31;
	}

	return a;
}

int64_t
NAME_(helper_floatx_to_int64)(float64 x)
{
	return llrintl(x);
}
#endif
#endif /* defined(DARWIN) && defined(__x86_64__) */

static void
fpu_set_exception(int mask)
{
	env->fpus |= mask;
	if (env->fpus & (~env->fpuc & FPUC_EM)) {
		env->fpus |= FPUS_SE | FPUS_B;
		if (unlikely(! (env->cr[0] & CPU_CR0_NE_MASK))) {
			NAME_(fpu_check_ferr)();
		}
	}
}

void
NAME_(helper_fldt_ST0_A0)(void)
{
	int new_fpstt;

	new_fpstt = (env->fpstt - 1) & 7;
	env->fpregs[new_fpstt].d = NAME_(helper_fldt)(A0);
	env->fpstt = new_fpstt;
	env->fptags[new_fpstt] = 0; /* validate stack entry */
}

void
NAME_(helper_fstt_ST0_A0)(void)
{
	NAME_(helper_fstt)(ST0, A0);
}

void
NAME_(fpu_check_ferr)(void)
{
	if (env->fpus & (~env->fpuc & FPUC_EM)) {
		env->fpus |= FPUS_SE | FPUS_B;
		if (! env->state_n_ignne) {
			sig_std_logic_or_set(env->sig_n_ferr, env, 1);
		}
	} else {
		env->fpus &= ~(FPUS_SE | FPUS_B);
		sig_std_logic_or_set(env->sig_n_ferr, env, 0);
	}
}

CPU86_LDouble
NAME_(helper_fdiv)(CPU86_LDouble a, CPU86_LDouble b)
{
	if (b == 0.0)
		fpu_set_exception(FPUS_ZE);
	return a / b;
}

void
NAME_(helper_fbld_ST0_A0)(void)
{
	CPU86_LDouble tmp;
	uint64_t val;
	unsigned int v;
	int i;

	val = 0;
	for (i = 8; i >= 0; i--) {
		v = ldub(A0 + i);
		val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
	}
	tmp = val;
	if (ldub(A0 + 9) & 0x80)
		tmp = -tmp;
	fpush();
	ST0 = tmp;
}

void
NAME_(helper_fbst_ST0_A0)(void)
{
	CPU86_LDouble tmp;
	int v;
	target_ulong mem_ref, mem_end;
	int64_t val;

	tmp = rint(ST0);
	val = (int64_t)tmp;
	mem_ref = A0;
	mem_end = mem_ref + 9;
	if (val < 0) {
		stb(mem_end, 0x80);
		val = -val;
	} else {
		stb(mem_end, 0x00);
	}
	while (mem_ref < mem_end) {
		if (val == 0)
			break;
		v = val % 100;
		val = val / 100;
		v = ((v / 10) << 4) | (v % 10);
		stb(mem_ref++, v);
	}
	while (mem_ref < mem_end) {
		stb(mem_ref++, 0);
	}
}

void
NAME_(helper_f2xm1)(void)
{
	ST0 = pow(2.0,ST0) - 1.0;
}

void
NAME_(helper_fyl2x)(void)
{
	CPU86_LDouble fptemp;

	fptemp = ST0;
	if (fptemp>0.0){
		fptemp = log(fptemp)/log(2.0);	 /* log2(ST) */
		ST1 *= fptemp;
		fpop();
	} else { 
		env->fpus &= (~0x4700);
		env->fpus |= 0x400;
	}
}

void
NAME_(helper_fptan)(void)
{
	CPU86_LDouble fptemp;

	fptemp = ST0;
	if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
		env->fpus |= 0x400;
	} else {
		ST0 = tan(fptemp);
		fpush();
		ST0 = 1.0;
		env->fpus &= (~0x400);  /* C2 <-- 0 */
		/* the above code is for  |arg| < 2**52 only */
	}
}

void
NAME_(helper_fpatan)(void)
{
	CPU86_LDouble fptemp, fpsrcop;

	fpsrcop = ST1;
	fptemp = ST0;
	ST1 = atan2(fpsrcop,fptemp);
	fpop();
}

void
NAME_(helper_fxtract)(void)
{
	CPU86_LDoubleU temp;
	unsigned int expdif;

	temp.d = ST0;
	expdif = EXPD(temp) - EXPBIAS;
	/*DP exponent bias*/
	ST0 = expdif;
	fpush();
	BIASEXPONENT(temp);
	ST0 = temp.d;
}

void
NAME_(helper_fprem1)(void)
{
	CPU86_LDouble dblq, fpsrcop, fptemp;
	CPU86_LDoubleU fpsrcop1, fptemp1;
	int expdif;
	int q;

	fpsrcop = ST0;
	fptemp = ST1;
	fpsrcop1.d = fpsrcop;
	fptemp1.d = fptemp;
	expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
	if (expdif < 53) {
		dblq = fpsrcop / fptemp;
		dblq = (dblq < 0.0)? ceil(dblq): floor(dblq);
		ST0 = fpsrcop - fptemp*dblq;
		q = (int)dblq; /* cutting off top bits is assumed here */
		env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
					/* (C0,C1,C3) <-- (q2,q1,q0) */
		env->fpus |= (q&0x4) << 6; /* (C0) <-- q2 */
		env->fpus |= (q&0x2) << 8; /* (C1) <-- q1 */
		env->fpus |= (q&0x1) << 14; /* (C3) <-- q0 */
	} else {
		env->fpus |= 0x400;  /* C2 <-- 1 */
		fptemp = pow(2.0, expdif-50);
		fpsrcop = (ST0 / ST1) / fptemp;
		/* fpsrcop = integer obtained by rounding to the nearest */
		fpsrcop = (fpsrcop-floor(fpsrcop) < ceil(fpsrcop)-fpsrcop)?
		    floor(fpsrcop): ceil(fpsrcop);
		ST0 -= (ST1 * fpsrcop * fptemp);
	}
}

void
NAME_(helper_fprem)(void)
{
	CPU86_LDouble dblq, fpsrcop, fptemp;
	CPU86_LDoubleU fpsrcop1, fptemp1;
	int expdif;
	int q;

	fpsrcop = ST0;
	fptemp = ST1;
	fpsrcop1.d = fpsrcop;
	fptemp1.d = fptemp;
	expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
	if ( expdif < 53 ) {
		dblq = fpsrcop / fptemp;
		dblq = (dblq < 0.0)? ceil(dblq): floor(dblq);
		ST0 = fpsrcop - fptemp*dblq;
		q = (int)dblq; /* cutting off top bits is assumed here */
		env->fpus &= (~0x4700); /* (C3,C2,C1,C0) <-- 0000 */
					/* (C0,C1,C3) <-- (q2,q1,q0) */
		env->fpus |= (q&0x4) << 6; /* (C0) <-- q2 */
		env->fpus |= (q&0x2) << 8; /* (C1) <-- q1 */
		env->fpus |= (q&0x1) << 14; /* (C3) <-- q0 */
	} else {
		env->fpus |= 0x400;  /* C2 <-- 1 */
		fptemp = pow(2.0, expdif-50);
		fpsrcop = (ST0 / ST1) / fptemp;
		/* fpsrcop = integer obtained by chopping */
		fpsrcop = (fpsrcop < 0.0)?
		    -(floor(fabs(fpsrcop))): floor(fpsrcop);
		ST0 -= (ST1 * fpsrcop * fptemp);
	}
}

void
NAME_(helper_fyl2xp1)(void)
{
	CPU86_LDouble fptemp;

	fptemp = ST0;
	if ((fptemp+1.0)>0.0) {
		fptemp = log(fptemp+1.0) / log(2.0); /* log2(ST+1.0) */
		ST1 *= fptemp;
		fpop();
	} else { 
		env->fpus &= (~0x4700);
		env->fpus |= 0x400;
	}
}

void
NAME_(helper_fsqrt)(void)
{
	CPU86_LDouble fptemp;

	fptemp = ST0;
	if (fptemp<0.0) { 
		env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
		env->fpus |= 0x400;
	}
	ST0 = sqrt(fptemp);
}

void
NAME_(helper_fsincos)(void)
{
	CPU86_LDouble fptemp;

	fptemp = ST0;
	if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
		env->fpus |= 0x400;
	} else {
		ST0 = sin(fptemp);
		fpush();
		ST0 = cos(fptemp);
		env->fpus &= (~0x400);  /* C2 <-- 0 */
		/* the above code is for  |arg| < 2**63 only */
	}
}

void
NAME_(helper_frndint)(void)
{
	CPU86_LDouble a;

	a = ST0;
#ifdef __arm__
	switch(env->fpuc & RC_MASK) {
	default:
	case RC_NEAR:
		asm("rndd %0, %1" : "=f" (a) : "f"(a));
		break;
	case RC_DOWN:
		asm("rnddm %0, %1" : "=f" (a) : "f"(a));
		break;
	case RC_UP:
		asm("rnddp %0, %1" : "=f" (a) : "f"(a));
		break;
	case RC_CHOP:
		asm("rnddz %0, %1" : "=f" (a) : "f"(a));
		break;
	}
#else
	a = rint(a);
#endif
	ST0 = a;
}

void
NAME_(helper_fscale)(void)
{
	CPU86_LDouble fpsrcop, fptemp;

	fpsrcop = 2.0;
	fptemp = pow(fpsrcop,ST1);
	ST0 *= fptemp;
}

void
NAME_(helper_fsin)(void)
{
	CPU86_LDouble fptemp;

	fptemp = ST0;
	if ((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
		env->fpus |= 0x400;
	} else {
		ST0 = sin(fptemp);
		env->fpus &= (~0x400);  /* C2 <-- 0 */
		/* the above code is for  |arg| < 2**53 only */
	}
}

void
NAME_(helper_fcos)(void)
{
	CPU86_LDouble fptemp;

	fptemp = ST0;
	if((fptemp > MAXTAN)||(fptemp < -MAXTAN)) {
		env->fpus |= 0x400;
	} else {
		ST0 = cos(fptemp);
		env->fpus &= (~0x400);  /* C2 <-- 0 */
		/* the above code is for  |arg5 < 2**63 only */
	}
}

void
NAME_(helper_fxam_ST0)(void)
{
	CPU86_LDoubleU temp;
	int expdif;

	temp.d = ST0;

	env->fpus &= (~0x4700);  /* (C3,C2,C1,C0) <-- 0000 */
	if (SIGND(temp))
		env->fpus |= 0x200; /* C1 <-- 1 */

	expdif = EXPD(temp);
	if (expdif == MAXEXPD) {
#ifdef USE_X86LDOUBLE
        if (MANTD(temp) == 0x8000000000000000ULL)
#else	
		if (MANTD(temp) == 0)
#endif
			env->fpus |=  0x500 /*Infinity*/;
		else
			env->fpus |=  0x100 /*NaN*/;
	} else if (expdif == 0) {
		if (MANTD(temp) == 0)
			env->fpus |=  0x4000 /*Zero*/;
		else
			env->fpus |= 0x4400 /*Denormal*/;
	} else {
		env->fpus |= 0x400;
	}
}

void
NAME_(helper_fstenv)(target_ulong ptr, int data32)
{
	int fpus, fptag, exp_, i;
	uint64_t mant;
	CPU86_LDoubleU tmp;

	fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
	fptag = 0;
	for (i=7; i>=0; i--) {
		fptag <<= 2;
		if (env->fptags[i]) {
			fptag |= 3;
		} else {
			tmp.d = env->fpregs[i].d;
			exp_ = EXPD(tmp);
			mant = MANTD(tmp);
			if (exp_ == 0 && mant == 0) {
				/* zero */
				fptag |= 1;
			} else if (exp_ == 0 || exp_ == MAXEXPD
#ifdef USE_X86LDOUBLE
				|| (mant & (1LL << 63)) == 0
#endif
				) {
				/* NaNs, infinity, denormal */
				fptag |= 2;
			}
		}
	}
	if (data32) {
		/* 32 bit */
		stl(ptr, env->fpuc);
		stl(ptr + 4, fpus);
		stl(ptr + 8, fptag);
		stl(ptr + 12, 0); /* fpip */
		stl(ptr + 16, 0); /* fpcs */
		stl(ptr + 20, 0); /* fpoo */
		stl(ptr + 24, 0); /* fpos */
	} else {
		/* 16 bit */
		stw(ptr, env->fpuc);
		stw(ptr + 2, fpus);
		stw(ptr + 4, fptag);
		stw(ptr + 6, 0);
		stw(ptr + 8, 0);
		stw(ptr + 10, 0);
		stw(ptr + 12, 0);
	}
}

void
NAME_(helper_fldenv)(target_ulong ptr, int data32)
{
	int i, fpus, fptag;

	if (data32) {
		env->fpuc = lduw(ptr);
		fpus = lduw(ptr + 4);
		fptag = lduw(ptr + 8);
	} else {
		env->fpuc = lduw(ptr);
		fpus = lduw(ptr + 2);
		fptag = lduw(ptr + 4);
	}
	env->fpstt = (fpus >> 11) & 7;
	env->fpus = fpus & ~0x3800;
	for(i = 0;i < 8; i++) {
		env->fptags[i] = ((fptag & 3) == 3);
		fptag >>= 2;
	}
}

void
NAME_(helper_fsave)(target_ulong ptr, int data32)
{
	CPU86_LDouble tmp;
	int i;

	NAME_(helper_fstenv)(ptr, data32);

	ptr += (14 << data32);
	for(i = 0;i < 8; i++) {
		tmp = ST(i);
		NAME_(helper_fstt)(tmp, ptr);
		ptr += 10;
	}

	/* fninit */
	env->fpus = 0;
	env->fpstt = 0;
	env->fpuc = 0x37f;
	env->fptags[0] = 1;
	env->fptags[1] = 1;
	env->fptags[2] = 1;
	env->fptags[3] = 1;
	env->fptags[4] = 1;
	env->fptags[5] = 1;
	env->fptags[6] = 1;
	env->fptags[7] = 1;
}

void
NAME_(helper_frstor)(target_ulong ptr, int data32)
{
	CPU86_LDouble tmp;
	int i;

	NAME_(helper_fldenv)(ptr, data32);
	ptr += (14 << data32);

	for(i = 0;i < 8; i++) {
		tmp = NAME_(helper_fldt)(ptr);
		ST(i) = tmp;
		ptr += 10;
	}
}

void
NAME_(helper_fxsave)(target_ulong ptr, int data64)
{
	int fpus, fptag, i, nb_xmm_regs;
	CPU86_LDouble tmp;
	target_ulong addr;

	fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
	fptag = 0;
	for(i = 0; i < 8; i++) {
		fptag |= (env->fptags[i] << i);
	}
	stw(ptr, env->fpuc);
	stw(ptr + 2, fpus);
	stw(ptr + 4, fptag ^ 0xff);

	if (data64) {
		stq(ptr + 0x08, 0); /* rip */
		stq(ptr + 0x10, 0); /* rdp */
	} else {
		stl(ptr + 0x08, 0); /* eip */
		stl(ptr + 0x0c, 0); /* sel */
		stl(ptr + 0x10, 0); /* dp */
		stl(ptr + 0x14, 0); /* sel */
	}
	
	addr = ptr + 0x20;
	for(i = 0;i < 8; i++) {
		tmp = ST(i);
		NAME_(helper_fstt)(tmp, addr);
		addr += 16;
	}
	
	if (env->cr[4] & CPU_CR4_OSFXSR_MASK) {
		/* XXX: finish it */
		stl(ptr + 0x18, env->mxcsr); /* mxcsr */
		stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */
		if (env->hflags & HF_CS64_MASK) {
			nb_xmm_regs = 16;
		} else {
			nb_xmm_regs = 8;
		}
		addr = ptr + 0xa0;
		for(i = 0; i < nb_xmm_regs; i++) {
			stq(addr, env->xmm_regs[i].XMM_Q(0));
			stq(addr + 8, env->xmm_regs[i].XMM_Q(1));
			addr += 16;
		}
	}
}

void
NAME_(helper_fxrstor)(target_ulong ptr, int data64)
{
	int i, fpus, fptag, nb_xmm_regs;
	CPU86_LDouble tmp;
	target_ulong addr;

	env->fpuc = lduw(ptr);
	fpus = lduw(ptr + 2);
	fptag = lduw(ptr + 4);
	env->fpstt = (fpus >> 11) & 7;
	env->fpus = fpus & ~0x3800;
	fptag ^= 0xff;
	for(i = 0;i < 8; i++) {
		env->fptags[i] = ((fptag >> i) & 1);
	}
	
	addr = ptr + 0x20;
	for(i = 0;i < 8; i++) {
		tmp = NAME_(helper_fldt)(addr);
		ST(i) = tmp;
		addr += 16;
	}
	
	if (env->cr[4] & CPU_CR4_OSFXSR_MASK) {
		/* XXX: finish it */
		env->mxcsr = ldl(ptr + 0x18);
		//ldl(ptr + 0x1c);
		if (env->hflags & HF_CS64_MASK) {
			nb_xmm_regs = 16;
		} else {
			nb_xmm_regs = 8;
		}
		addr = ptr + 0xa0;
		for(i = 0; i < nb_xmm_regs; i++) {
			env->xmm_regs[i].XMM_Q(0) = ldq(addr);
			env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8);
			addr += 16;
		}
	}
}

void
NAME_(get_fp80)(uint64_t *pmant, uint16_t *pexp, CPU86_LDouble f)
{
	CPU86_LDoubleU temp;
#ifndef USE_X86LDOUBLE
	int e;

	temp.d = f;
	/* mantissa */
	*pmant = (MANTD(temp) << 11) | (1LL << 63);
	/* exponent + sign */
	e = EXPD(temp) - EXPBIAS + 16383;
	e |= SIGND(temp) >> 16;
	*pexp = e;
#else
	temp.d = f;
	*pmant = temp.l.lower;
	*pexp = temp.l.upper;
#endif
}

CPU86_LDouble
NAME_(set_fp80)(uint64_t mant, uint16_t upper)
{
	CPU86_LDoubleU temp;
#ifndef USE_X86LDOUBLE
	int e;
	uint64_t ll;

	/* XXX: handle overflow ? */
	e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */
	e |= (upper >> 4) & 0x800; /* sign */
	ll = (mant >> 11) & ((1LL << 52) - 1);
#ifdef __arm__
	temp.l.upper = (e << 20) | (ll >> 32);
	temp.l.lower = ll;
#else
	temp.ll = ll | ((uint64_t)e << 52);
#endif
#else
	temp.l.upper = upper;
	temp.l.lower = mant;
#endif
	return temp.d;
}


float
NAME_(approx_rsqrt)(float a)
{
	return (1.0 / sqrt(a));
}

float
NAME_(approx_rcp)(float a)
{
	return (1.0 / a);
}

void
NAME_(update_fp_status)(void)
{
	int rnd_type;
	
	/* set rounding mode */
	switch(env->fpuc & RC_MASK) {
	default:
	case RC_NEAR:
		rnd_type = FE_TONEAREST;
		break;
	case RC_DOWN:
		rnd_type = FE_DOWNWARD;
		break;
	case RC_UP:
		rnd_type = FE_UPWARD;
		break;
	case RC_CHOP:
		rnd_type = FE_TOWARDZERO;
		break;
	}
	set_float_rounding_mode(rnd_type, &env->fp_status);
#ifdef FLOATX80
	switch((env->fpuc >> 8) & 3) {
	case 0:
		rnd_type = 32;
		break;
	case 2:
		rnd_type = 64;
		break;
	case 3:
	default:
		rnd_type = 80;
		break;
	}
	set_floatx80_rounding_precision(rnd_type, &env->fp_status);
#endif
}
