/*
 * $Id: cpuid.c,v 1.11 2013/07/05 07:45:55 vrsieh Exp $ 
 *
 * Copyright (C) 2003-2009 FAUmachine Team <info@faumachine.org>.
 * This program is free software. You can redistribute it and/or modify it
 * under the terms of the GNU General Public License, either version 2 of
 * the License, or (at your option) any later version. See COPYING.
 */

#include "config.h"

#include <assert.h>
#include <getopt.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/utsname.h>

struct featurebit {
	unsigned char bitnum;
	const char * const description;
};

struct cacheinfo {
	unsigned char code;
	const char * const description;
};

static struct featurebit procfeaturesdx[] = {
	{   0, "x87 on-chip" },
	{   1, "virtual-mode extensions" },
	{   2, "debugging extensions" },
	{   3, "page-size extensions (PSE)" },
	{   4, "timestamp counter (TSC)" },
	{   5, "model-specific-registers (MSR)" },
	{   6, "physical address extensions (PAE)" },
	{   7, "machine check exception" },
	{   8, "cmpxchg8b instruction" },
	{   9, "APIC" },
	{  11, "sysenter/sysexit instructions" },
	{  12, "memory-type range registers (MTRR)" },
	{  13, "page global extensions" },
	{  14, "machine check architecture" },
	{  15, "cmov* instructions" },
	{  16, "page attribute table (PAT)" },
	{  17, "page size extensions (PSE)" },
	{  18, "processor serial number (PSN)" },
	{  19, "clflush instruction" },
	{  21, "debug store" },
	{  22, "thermal monitor and soft contr'd clk" },
	{  23, "MMX" },
	{  24, "fxsave/fxrestore instructions" },
	{  25, "SSE" },
	{  26, "SSE2" },
	{  27, "self-snoop" },
	{  28, "hyper-threading-technology (HTT)" },
	{  29, "automatic thermal control circuit" },
	{  30, "IA-64" },
	{ 255, "I MUST NOT PRINT THIS" }
};
static struct featurebit procfeaturescx[] = {
	{   0, "SSE3" },
	{   3, "monitor/mwait instructions" },
	{   4, "cpl extensions to debug store" },
	{   7, "enhanced intel speedstep" },
	{   8, "thermal monitor 2" },
	{  10, "context id" },
	{  13, "cmpxchg16b instruction" },
	{  14, "send task priority messages" },
	{ 255, "I MUST NOT PRINT THIS" }
};
static struct featurebit procfeaturesamddx[] = {
	{  11, "syscall/sysret instructions" },
	{  20, "no-execute page protection (NX bit)" },
	{  22, "amd-extensions to MMX" },
	{  25, "fast fxsave/fxrestore" },
	{  27, "rdtscp instruction" },
	{  29, "long mode" },
	{  30, "extensions to 3dnow" },
	{  31, "3dnow" },
	{ 255, "I MUST NOT PRINT THIS" }
};
static struct featurebit procfeaturesinteldx[] = {
	{  29, "em64t" },
	{ 255, "I MUST NOT PRINT THIS" }
};

static struct featurebit perfctrfeaturesintelbx[] = {
	{   0, "Core cycle" },
	{   1, "Instruction retired" },
	{   2, "Reference cycles" },
	{   3, "Last-level cache-reference" },
	{   4, "Last-level cache-miss" },
	{   5, "Branch Instruction retired" },
	{   6, "Branch mispredict retired" },
	{ 255, "I MUST NOT PRINT THIS" }
};

/* These codes are taken from intel's application note 485 */
static struct cacheinfo cachecodes[] = {
	{ 0x00, "Null" },
	{ 0x01, "I-TLB: 4KB pages, 4-way set associative, 32 entries" },
	{ 0x02, "I-TLB: 4MB pages, fully associative, 2 entries" },
	{ 0x03, "D-TLB: 4KB pages, 4-way set associative, 64 entries" },
	{ 0x04, "D-TLB: 4MB pages, 4-way set associative, 8 entries" },
	{ 0x05, "D-TLB1: 4MB pages, 4-way set associative, 32 entries" },
	{ 0x06, "L1 IC: 8KB, 4-way set associative, 32B line size" },
	{ 0x08, "L1 IC: 16KB, 4-way set associative, 32B line size" },
	{ 0x0a, "L1 DC: 8KB, 2-way set associative, 32B line size" },
	{ 0x0b, "I-TLB: 4MB pages, 4-way set associative, 4 entries" },
	{ 0x0c, "L1 DC: 16KB, 4-way set associative, 32B line size" },
	{ 0x22, "L3  C: 512KB, 4-way set associative, sectored cache, 64B line size" },
	{ 0x23, "L3  C: 1MB, 8-way set associative, sectored cache, 64B line size" },
	{ 0x25, "L3  C: 2MB, 8-way set associative, sectored cache, 64B line size" },
	{ 0x29, "L3  C: 4MB, 8-way set associative, sectored cache, 64B line size" },
	{ 0x2c, "L1 DC: 32KB, 8-way set associative, 64B line size" },
	{ 0x30, "L1 IC: 32KB, 8-way set associative, 64B line size" },
	{ 0x39, "L2  C: 128KB, 4-way set associative, sectored cache, 64B line size" },
	{ 0x3b, "L2  C: 128KB, 2-way set associative, sectored cache, 64B line size" },
	{ 0x3c, "L2  C: 256KB, 4-way set associative, sectored cache, 64B line size" },
	{ 0x40, "either no L2 C or no L3 C" },
	{ 0x41, "L2  C: 128KB, 4-way set associative, 32B line size" },
	{ 0x42, "L2  C: 256KB, 4-way set associative, 32B line size" },
	{ 0x43, "L2  C: 512KB, 4-way set associative, 32B line size" },
	{ 0x44, "L2  C: 1MB, 4-way set associative, 32B line size" },
	{ 0x45, "L2  C: 2MB, 4-way set associative, 32B line size" },
	{ 0x46, "L3  C: 4MB, 4-way set associative, 64B line size" },
	{ 0x47, "L3  C: 8MB, 8-way set associative, 64B line size" },
	{ 0x49, "L2  C: 4MB, 16-way set associative, 64B line size" },
	{ 0x50, "I-TLB: 4K, 2M or 4M pages, fully associative, 64 entries" },
	{ 0x51, "I-TLB: 4K, 2M or 4M pages, fully associative, 128 entries" },
	{ 0x52, "I-TLB: 4K, 2M or 4M pages, fully associative, 256 entries" },
	{ 0x56, "D-TLB0: 4M pages, 4-way set associative, 16 entries" },
	{ 0x57, "D-TLB0: 4KB pages, 4-way set associative, 16 entries" },
	{ 0x5b, "D-TLB: 4K or 4M pages, fully associative, 64 entries" },
	{ 0x5c, "D-TLB: 4K or 4M pages, fully associative, 128 entries" },
	{ 0x5d, "D-TLB: 4K or 4M pages, fully associative, 256 entries" },
	{ 0x60, "L1 DC: 16KB, 8-way set associative, 64B line size" },
	{ 0x66, "L1 DC: 8KB, 4-way set associative, 64B line size" },
	{ 0x67, "L1 DC: 16KB, 4-way set associative, 64B line size" },
	{ 0x68, "L1 DC: 32KB, 4-way set associative, 64B line size" },
	{ 0x70, "Tr  C: 12K-uops, 8-way set associative" },
	{ 0x71, "Tr  C: 16K-uops, 8-way set associative" },
	{ 0x72, "Tr  C: 32K-uops, 8-way set associative" },
	{ 0x78, "L2  C: 1MB, 4-way set associative, 64B line size" },
	{ 0x79, "L2  C: 128KB, 8-way set associative, sectored cache, 64B line size" },
	{ 0x7a, "L2  C: 256KB, 8-way set associative, sectored cache, 64B line size" },
	{ 0x7b, "L2  C: 512KB, 8-way set associative, sectored cache, 64B line size" },
	{ 0x7c, "L2  C: 1MB, 8-way set associative, sectored cache, 64B line size" },
	{ 0x7d, "L2  C: 2MB, 8-way set associative, 64B line size" },
	{ 0x7f, "L2  C: 512KB, 2-way set associative, 64B line size" },
	{ 0x82, "L2  C: 256KB, 8-way set associative, 32B line size" },
	{ 0x83, "L2  C: 512KB, 8-way set associative, 32B line size" },
	{ 0x84, "L2  C: 1MB, 8-way set associative, 32B line size" },
	{ 0x85, "L2  C: 2MB, 8-way set associative, 32B line size" },
	{ 0x86, "L2  C: 512KB, 4-way set associative, 64B line size" },
	{ 0x87, "L2  C: 1MB, 8-way set associative, 64B line size" },
	{ 0xb0, "I-TLB: 4KB pages, 4-way set associative, 128 entries" },
	{ 0xb3, "D-TLB: 4KB pages, 4-way set associative, 128 entries" },
	{ 0xb4, "D-TLB1: 4KB pages, 4-way set associative, 256 entries" },
	{ 0xf0, "64B prefetching" },
	{ 0xf1, "128B prefetching" },
	{ 0x00, "" } /* Marker for end of list */
};

char *progname;
int verblev = 0;

#define PROCTYPEUNKNOWN	0
#define PROCTYPEAMD	1
#define PROCTYPEINTEL	2

#define outputatlvl(n, arg...) \
  if (verblev >= n) {\
    fprintf(stdout, arg);\
  }

/* Newer intels have cpuid commands that expect an index value in ecx - use
 * this function to access them. normal cpuid maps to this with index 0. */
void
cpuid2(
	uint32_t op,
	uint32_t idx,
	uint32_t *eax,
	uint32_t *ebx,
	uint32_t *ecx,
	uint32_t *edx
)
{
	__asm__("cpuid\n"
		: "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
		: "a" (op), "c" (idx)
		: "cc"
	);
}

void
cpuid(
	uint32_t op,
	uint32_t *eax,
	uint32_t *ebx,
	uint32_t *ecx,
	uint32_t *edx
)
{
	cpuid2(op, 0, eax, ebx, ecx, edx);
}

static void
displayflaglegend(struct featurebit * fbp, unsigned long reg)
{
	unsigned char j;

	j = 0;
	while (fbp[j].bitnum < 64) {
		outputatlvl(1, "    *  (%02d) %-40s: %s\n",
			fbp[j].bitnum,
			fbp[j].description,
			(reg & (1 << fbp[j].bitnum)) ? "Yes" : "No ");
		j++;
	}
}

static void
displaycacheexpl(unsigned long reg, const char * const descr, unsigned char iseax)
{
	unsigned char j;
	unsigned char vb; /* valid bytes in register */
	unsigned char l;
	unsigned char cc; /* cache code */
	
	if (reg & 0x80000000) {
		outputatlvl(2, "    *  %s : can not be parsed\n", descr);
	} else {
		if (iseax) {
			vb = 3; /* eax only contains 3 data bytes */
		} else {
			vb = 4; /* the other registers contain 4. */
		}
		for (j = 0; j < vb; j++) {
			cc = (unsigned char)((reg >> 24) & 0xFF);
			reg <<= 8;
			if (cc == 0) {
				continue;
			}
			outputatlvl(2, "    *  %s%d: (%02x) ", descr, j, cc);
			l = 0;
			while (cachecodes[l].description[0] != 0) {
				if (cachecodes[l].code == cc) {
					break;
				}
				l++;
			}
			if (cachecodes[l].description[0] == 0) {
				outputatlvl(2, "%s", "???\n");
			} else {
				outputatlvl(2, "%s\n",
					cachecodes[l].description);
			}
		}
	}
	return;
}

void
explainnewintelcacheinfo(unsigned long eax, unsigned long ebx,
                         unsigned long ecx, unsigned long edx)
{
	outputatlvl(1, "    * Level %lu ", ((eax >>  5) & 0x07));
	switch ((eax & 0x1f)) {
		case 1: outputatlvl(1, "%s", "Data Cache"); break;
		case 2: outputatlvl(1, "%s", "Instruction Cache"); break;
		case 3: outputatlvl(1, "%s", "Unified Cache"); break;
		default: outputatlvl(1, "[Unknown Cache Type %lu]", eax & 0x1f); break;
	};
	outputatlvl(1, ": %sself initializing, %sfully associative\n",
		    ((eax & 0x100) ? "" : "not "),
		    ((eax & 0x200) ? "" : "not "));
	outputatlvl(1, "    *  Shared by no more than %lu thread%s"
	               " of the %lu core%s in the package.\n",
		    ((eax >> 14) & 0x7ff) + 1,
		    ((((eax >> 14) & 0x7ff) == 0) ? "" : "s"),
		    ((eax >> 26) & 0x3f) + 1,
		    ((((eax >> 26) & 0x3f) == 0) ? "" : "s"));
}

const char *
decodecacheassociativity(unsigned long x)
{
	switch (x) {
	case 0x00:	return "disabled";
	case 0x01:	return "direct mapped (1-way)";
	case 0x02:	return "2-way";
	case 0x04:	return "4-way";
	case 0x06:	return "8-way";
	case 0x08:	return "16-way";
	case 0x0f:	return "full";
	};
	return "unknown";
}

static void
usage(int retval)
{
	fprintf(stderr, "Usage: %s [-qv]\n", progname);
	fprintf(stderr, " -v adds explanations to the output\n");
	exit(retval);
}

int
main(int argc, char **argv)
{
	int c;
	unsigned long i, max;
	uint32_t eax, ebx, ecx, edx;
	struct utsname buf;
	unsigned char processortype;
	int ret;

	progname = argv[0];

	while ((c = getopt(argc, argv, "v")) != -1) {
		switch (c) {
		case 'v':
			verblev++;
			break;
		default:
			usage(1);
			/*NOTREACHED*/
		}
	}

	/* Get the system name */
	ret = uname(&buf);
	if (ret < 0) {
		fprintf(stderr, "Cannot uname, but continuing anyways...\n");
	} else {
		outputatlvl(0, "/* %s %s %s %s %s */\n",
			buf.sysname, buf.nodename,
			buf.release, buf.version, buf.machine);
	}

	/* Get vendor info and max eax */
	i = 0;
	cpuid(i, &eax, &ebx, &ecx, &edx);
	outputatlvl(0, "/* %8s      %-10s  %-10s  %-10s  %-10s */\n",
			"function", "eax", "ebx", "ecx", "edx");
	outputatlvl(-1, "/* %08lx */ { 0x%08lx, 0x%08lx, 0x%08lx, 0x%08lx }, ",
			i, (unsigned long) eax, (unsigned long) ebx, (unsigned long) ecx, (unsigned long) edx);
	outputatlvl(-1, "/* %c%c%c%c%c%c%c%c%c%c%c%c */\n",
			(unsigned char)((ebx >>  0) & 0xFF),
			(unsigned char)((ebx >>  8) & 0xFF),
			(unsigned char)((ebx >> 16) & 0xFF),
			(unsigned char)((ebx >> 24) & 0xFF),
			(unsigned char)((edx >>  0) & 0xFF),
			(unsigned char)((edx >>  8) & 0xFF),
			(unsigned char)((edx >> 16) & 0xFF),
			(unsigned char)((edx >> 24) & 0xFF),
			(unsigned char)((ecx >>  0) & 0xFF),
			(unsigned char)((ecx >>  8) & 0xFF),
			(unsigned char)((ecx >> 16) & 0xFF),
			(unsigned char)((ecx >> 24) & 0xFF));

	processortype = PROCTYPEUNKNOWN;
	if        ((ebx == 0x756e6547)
		&& (ecx == 0x6c65746e)
		&& (edx == 0x49656e69)) {
		processortype = PROCTYPEINTEL;
	} else if ((ebx == 0x68747541)
		&& (ecx == 0x444d4163)
		&& (edx == 0x69746e65)) {
		processortype = PROCTYPEAMD;
	}

	/* Get further information */
	max = eax;
	for (i = 1; i <= max; i++) {
		cpuid(i, &eax, &ebx, &ecx, &edx);
		outputatlvl(-1, "/* %08lx */ { 0x%08lx, 0x%08lx, 0x%08lx, 0x%08lx },\n",
				i, (unsigned long) eax, (unsigned long) ebx, (unsigned long) ecx, (unsigned long) edx);
		if (i == 0x01) {
			unsigned char family;
			unsigned char model;
			
			family = (unsigned char)((eax >> 8) & 0xF);
			if (family == 0xF) {
				family += (unsigned char)((eax >> 20) & 0xFF);
			}
			model = (unsigned char)((eax >> 4) & 0xF);
			if (model == 0xF) {
				model |= (unsigned char)(((eax >> 16) & 0xF) << 4);
			}
			outputatlvl(1, "   /* family=0x%02x, model=0x%02x, step=0x%1x\n",
					family,
					model,
					(unsigned char)((eax >> 0) & 0xF));
			outputatlvl(1, "%s", "    * The bits in the cx register "
					"have the following meaning:\n");
			displayflaglegend(procfeaturescx, ecx);
			outputatlvl(1, "%s", "    * The bits in the dx register "
					"have the following meaning:\n");
			displayflaglegend(procfeaturesdx, edx);
			outputatlvl(1, "    */\n");
		}
		if (i == 0x02) {
			/* This one is evil. It gives Information about the
			 * Cache. However, since the Cache can have many many
			 * levels, this can be called multiple times,
			 * each time giving information about the cache
			 * levels in the four registers...
			 * This hasn't been updated by intel in years, and there
			 * is now a new function to return cache layout, so this
			 * badly designed POS is now useless. */
			unsigned long j;
			unsigned long chmax;
			
			chmax = eax & 0xFF; /* Tells us how often we need to call */
			outputatlvl(2, "%s", "   /* cache information (old):\n");
			for (j = 0; j < chmax ; j++) {
				if (j > 24) break;
				outputatlvl(2, "    * %08lx%c: { 0x%08lx, "
						"0x%08lx, 0x%08lx, 0x%08lx }\n",
					i, (unsigned char)('a'+j),
					(unsigned long) eax, (unsigned long) ebx, (unsigned long) ecx, (unsigned long) edx);
				displaycacheexpl(eax, "eax", 1);
				displaycacheexpl(ebx, "ebx", 0);
				displaycacheexpl(ecx, "ecx", 0);
				displaycacheexpl(edx, "edx", 0);
				cpuid(i, &eax, &ebx, &ecx, &edx);
			}
			outputatlvl(2, "%s", "    */\n");
		}
		if ((i == 0x04) /* ??? && (processortype == PROCTYPEINTEL) */) {
			/* Deterministic Cache Parameters - in other words,
			 * a version of 0x02 that can actually be used... */
			if ((eax & 0x1f)) {
				unsigned long j = 1;
				outputatlvl(1, "%s", "   /* cache information (new):\n");
				while ((eax & 0x1f)) {
					explainnewintelcacheinfo(eax, ebx, ecx, edx);
					cpuid2(i, j, &eax, &ebx, &ecx, &edx);
					j++;
				}
				outputatlvl(1, "%s", "    */\n");
			}
		}
		if ((i == 0x0a) /* ??? && (processortype == PROCTYPEINTEL) */) {
			/* Architectural (!) Performance Monitoring Counter
			 * Support. Seems they now want to standardize these
			 * instead of making them individual for each
			 * processor. */
			outputatlvl(1, "%s", "   /* Performance counters: ");
			if (eax & 0xffUL) {
				outputatlvl(1, "Version %lu; %lu counters "
				                "with %lu bits per core.\n",
						(unsigned long) ((eax >>  0) & 0xFF),
						(unsigned long) ((eax >>  8) & 0xFF),
						(unsigned long) ((eax >> 16) & 0xFF));
				outputatlvl(1, "%s", "    * The bx register says the "
				                "following event counters "
						"are supported:\n");
				displayflaglegend(perfctrfeaturesintelbx, ~ebx);
				outputatlvl(1, "%s", "    */\n");
			} else {
				outputatlvl(1, "%s", "Not supported. */\n");
			}
		}
	}

	/* Test for extended information and get it if available*/
	i = 0x80000000;
	cpuid((unsigned long) i, &eax, &ebx, &ecx, &edx);

	if (eax < 0x80000004) {
		outputatlvl(-1, "%s", "/* No extended cpuid functions available */\n");
		return 0;
	}
	outputatlvl(-1, "/* %08lx */ { 0x%08lx, 0x%08lx, 0x%08lx, 0x%08lx },\n",
			i, (unsigned long) eax, (unsigned long) ebx, (unsigned long) ecx, (unsigned long) edx);

	max = eax;
	for (i = 0x80000001; i <= max; i++) {
		cpuid(i, &eax, &ebx, &ecx, &edx);
		outputatlvl(-1, "/* %08lx */ { 0x%08lx, 0x%08lx, 0x%08lx, 0x%08lx },",
				i, (unsigned long) eax, (unsigned long) ebx, (unsigned long) ecx, (unsigned long) edx);
		if ((i == 0x80000001) && (processortype == PROCTYPEAMD)) {
			outputatlvl(1, "%s", "\n   /* The bits in the dx register "
					"have the following meaning:\n");
			displayflaglegend(procfeaturesamddx, edx);
			outputatlvl(1, "%s", "    */");
		}
		if ((i == 0x80000001) && (processortype == PROCTYPEINTEL)) {
			outputatlvl(1, "%s", "\n   /* The bits in the dx register "
					"have the following meaning:\n");
			displayflaglegend(procfeaturesinteldx, edx);
			outputatlvl(1, "%s", "    */");
		}
		if ((i == 0x80000002) || (i == 0x80000003) || (i == 0x80000004)) {
			/* This supposedly is the processor name string. */
			outputatlvl(1, " /* %c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c */",
					(unsigned char)((eax >>  0) & 0xFF),
					(unsigned char)((eax >>  8) & 0xFF),
					(unsigned char)((eax >> 16) & 0xFF),
					(unsigned char)((eax >> 24) & 0xFF),
					(unsigned char)((ebx >>  0) & 0xFF),
					(unsigned char)((ebx >>  8) & 0xFF),
					(unsigned char)((ebx >> 16) & 0xFF),
					(unsigned char)((ebx >> 24) & 0xFF),
					(unsigned char)((ecx >>  0) & 0xFF),
					(unsigned char)((ecx >>  8) & 0xFF),
					(unsigned char)((ecx >> 16) & 0xFF),
					(unsigned char)((ecx >> 24) & 0xFF),
					(unsigned char)((edx >>  0) & 0xFF),
					(unsigned char)((edx >>  8) & 0xFF),
					(unsigned char)((edx >> 16) & 0xFF),
					(unsigned char)((edx >> 24) & 0xFF));
		}
		if (i == 0x80000006) {
			/* Cache size info */
			outputatlvl(1, "\n   /* %luK cache, associativity: %s,"
			               " cache line size: %lu */",
				       (unsigned long) ((ecx >> 16) & 0xffff),
				       decodecacheassociativity(((ecx >> 12) & 0x0f)),
				       (unsigned long) ((ecx >>  0) & 0xff));
		}
		if (i == 0x80000008) {
			/* long mode address size */
			outputatlvl(1, "\n   /* address size in long mode: "
					"virtual %d bit, physical %d bit */",
					(unsigned char)((eax >>  8) & 0xFF),
					(unsigned char)((eax >>  0) & 0xFF));
		}
		outputatlvl(-1, "%s", "\n");
	}

	return 0;
}
