/*
 * $Id: glue-storage.c,v 1.63 2012-02-13 09:53:16 vrsieh Exp $ 
 *
 * Copyright (C) 2007-2009 FAUmachine Team <info@faumachine.org>.
 * This program is free software. You can redistribute it and/or modify it
 * under the terms of the GNU General Public License, either version 2 of
 * the License, or (at your option) any later version. See COPYING.
 */

#define DEBUG_CONTROL_FLOW	0

#define STORAGE_COW_COUNT	10
#define STORAGE_BLOCKSIZE	4096

#include "config.h"

#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>

#include "glue.h"
#include "simsetup.h"

#include "conv_zero.h"

static int storage_sparse;
static int storage_creat;
static int storage_cow;

struct storage_simple {
	int fd;
	int wflag;

	unsigned long long size;

	struct storage_simple_cache {
		struct storage_simple_cache *lru_prev;
		struct storage_simple_cache *lru_next;

		uint8_t *buf;
		unsigned long long pos;
	} cache[100];

	struct storage_simple_cache *lru_first;
	struct storage_simple_cache *lru_last;
};

static uint8_t *
storage_simple_cache(struct storage_simple *s, unsigned long long pos)
{
	unsigned int nr;
	struct storage_simple_cache *act;

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p 0x%llx\n", __FUNCTION__, s, pos);
#endif

	assert(pos < s->size);
	assert(! (pos & (STORAGE_BLOCKSIZE - 1)));
	assert(s->lru_first);
	assert(s->lru_last);

	for (nr = 0; ; nr++) {
		if (nr == sizeof(s->cache) / sizeof(s->cache[0])) {
			/* Not found in cache. */
			int ret;
			uint8_t *aret;

			act = s->lru_last;
			if (act->pos != -1) {
				ret = munmap(act->buf, STORAGE_BLOCKSIZE);
				assert(0 <= ret);
			}
			errno = 0;
			aret = mmap((void *) 0, STORAGE_BLOCKSIZE,
					s->wflag
						? (PROT_READ | PROT_WRITE)
						: PROT_READ,
					MAP_SHARED,
					s->fd, pos);
			assert(aret != MAP_FAILED);
			assert(errno == 0);
			act->buf = aret;
			act->pos = pos;

			{
				volatile uint8_t *t;
				uint8_t u;

				t = aret;
				u = *t;
				if (s->wflag) {
					*t = u;
				}
			}
			break;
		}
		if (s->cache[nr].pos == pos) {
			/* Found in cache. */
			act = &s->cache[nr];
			break;
		}
	}

	/* Remove from old LRU list. */
	if (act->lru_prev) {
		act->lru_prev->lru_next = act->lru_next;
	} else {
		s->lru_first = act->lru_next;
	}
	if (act->lru_next) {
		act->lru_next->lru_prev = act->lru_prev;
	} else {
		s->lru_last = act->lru_prev;
	}

	/* Add to new LRU list. */
	act->lru_prev = (struct storage_simple_cache *) 0;
	act->lru_next = s->lru_first;
	s->lru_first = act;
	act->lru_next->lru_prev = act;

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p (%ld) %p\n", __FUNCTION__,
			act, act - &s->cache[0], act->buf);
#endif

	return act->buf;
}

long long
storage_simple_read(
	void *_s,
	void *_buf,
	unsigned long long buflen,
	unsigned long long pos
)
{
	struct storage_simple *s = _s;
	uint8_t *buf = (uint8_t *) _buf;
	unsigned long long size;

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p 0x%llx 0x%llx\n", __FUNCTION__,
			s, buflen, pos);
#endif

	size = 0;
	while (0 < buflen) {
		unsigned long long count;
		uint8_t *cache;

		count = buflen;
		if (STORAGE_BLOCKSIZE - (pos & (STORAGE_BLOCKSIZE - 1)) < count) {
			count = STORAGE_BLOCKSIZE - (pos & (STORAGE_BLOCKSIZE - 1));
		}

		cache = storage_simple_cache(s, pos & ~(STORAGE_BLOCKSIZE - 1));

		memcpy(buf, cache + (pos & (STORAGE_BLOCKSIZE - 1)), count);

		pos += count;
		buf += count;
		buflen -= count;
		size += count;
	}

	return size;
	
}

unsigned long long
storage_simple_size(void *_s)
{
	struct storage_simple *s = _s;

	assert(0 <= s->fd);

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: 0x%llx\n", __FUNCTION__, s->size);
#endif

	return s->size;
}

void *
storage_simple_open(const char *path, int wflag)
{
	struct storage_simple *s;
	unsigned int nr;
	signed long long lret;

	s = shm_alloc(sizeof(*s));
	assert(s);

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p %s %d\n", __FUNCTION__,
			s, path, wflag);
#endif

	/*
	 * Open file.
	 */
	s->fd = open(path, wflag ? O_RDWR : O_RDONLY);
	if (s->fd < 0) {
#if DEBUG_CONTROL_FLOW
		fprintf(stderr, "%s: bad open\n", __FUNCTION__);
#endif
		shm_free(s);
		return NULL;
	}
	s->wflag = wflag;

	/*
	 * Get size.
	 */
	s->size = lseek(s->fd, (off_t) 0, SEEK_END);
	lret = s->size;
	if (lret < 0) {
		(void) close(s->fd);
		s->fd = -1;
#if DEBUG_CONTROL_FLOW
		fprintf(stderr, "%s: bad lseek\n", __FUNCTION__);
#endif
		shm_free(s);
		return NULL;
	}

	/*
	 * Initialize cache.
	 */
	for (nr = 0; nr < sizeof(s->cache) / sizeof(s->cache[0]); nr++) {
		s->cache[nr].lru_prev = (nr == 0)
			? (struct storage_simple_cache *) 0
			: &s->cache[nr - 1];
		s->cache[nr].lru_next = (nr == sizeof(s->cache) / sizeof(s->cache[0]) - 1)
			? (struct storage_simple_cache *) 0
			: &s->cache[nr + 1];
		s->cache[nr].buf = (uint8_t *) 0;
		s->cache[nr].pos = -1;
	}

	s->lru_first = &s->cache[0];
	s->lru_last = &s->cache[sizeof(s->cache) / sizeof(s->cache[0]) - 1];

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: done (size=%lld)\n", __FUNCTION__, s->size);
#endif

	return s;
}

int
storage_simple_close(void *_s)
{
	struct storage_simple *s = _s;
	unsigned int nr;
	int ret;

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p\n", __FUNCTION__, s);
#endif

	for (nr = 0; nr < sizeof(s->cache) / sizeof(s->cache[0]); nr++) {
		if (s->cache[nr].pos != -1) {
#if DEBUG_CONTROL_FLOW
			fprintf(stderr, "%s: %p unmapping %d\n", __FUNCTION__,
					s, nr);
#endif
			ret = munmap(s->cache[nr].buf, STORAGE_BLOCKSIZE);
			assert(0 <= ret);
			s->cache[nr].pos = -1;
		}
	}

	ret = close(s->fd);
	assert(0 <= ret);

	shm_free(s);

	return ret;
}

struct storage {
	void *conv;
	int (*conv_close)(void *);
	int64_t (*conv_read)(void *, void *, uint64_t, uint64_t);
	void *media_cow[STORAGE_COW_COUNT];
	void *media_map[STORAGE_COW_COUNT];
};

static int
storage_read_block(
	struct storage *s,
	int cowlevel,
	uint8_t *buf,
	unsigned int buflen,
	unsigned long long pos
)
{
	uint8_t *cache;
	unsigned long addr;
	unsigned int bit;

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p %d %d 0x%llx\n", __FUNCTION__,
			s, cowlevel, buflen, pos);
#endif

	assert(0 < buflen);
	assert((pos & (STORAGE_BLOCKSIZE - 1)) + buflen <= STORAGE_BLOCKSIZE);

	if (cowlevel < 0) {
		/* Use original data. */
		long long ret;

		ret = (s->conv_read)(s->conv, buf, buflen, pos);
		assert(ret == buflen);

	} else {
		/* Use data from file. */
		addr = (pos / STORAGE_BLOCKSIZE) / 8;
		bit  = (pos / STORAGE_BLOCKSIZE) % 8;

		cache = storage_simple_cache(s->media_map[cowlevel],
				addr & ~(STORAGE_BLOCKSIZE - 1));

		if ((cache[addr & (STORAGE_BLOCKSIZE - 1)] >> bit) & 1) {
			/* Use modified data from media.cow-file. */
			cache = storage_simple_cache(s->media_cow[cowlevel],
					pos & ~(STORAGE_BLOCKSIZE - 1));
			memcpy(buf, cache + (pos & (STORAGE_BLOCKSIZE - 1)),
					buflen);
		} else {
			/* Use data from lower levels. */
			buflen = storage_read_block(s, cowlevel - 1,
					buf, buflen, pos);
		}
	}

	return buflen;
}

static int
storage_write_block(
	struct storage *s,
	int cowlevel,
	const uint8_t *buf,
	unsigned int buflen,
	unsigned long long pos
)
{
	uint8_t *cache;
	unsigned long addr;
	unsigned int bit;

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p %d %d 0x%llx\n", __FUNCTION__,
			s, cowlevel, buflen, pos);
#endif

	assert(0 < buflen);
	assert((pos & (STORAGE_BLOCKSIZE - 1)) + buflen <= STORAGE_BLOCKSIZE);

	if (cowlevel < 0) {
		assert(0); /* Mustn't happen. */

	} else {
		addr = (pos / STORAGE_BLOCKSIZE) / 8;
		bit  = (pos / STORAGE_BLOCKSIZE) % 8;

		cache = storage_simple_cache(s->media_map[cowlevel],
				addr & ~(STORAGE_BLOCKSIZE - 1));

		if (! ((cache[addr & (STORAGE_BLOCKSIZE - 1)] >> bit) & 1)) {
			if (buflen < STORAGE_BLOCKSIZE) {
				/* Copy original data to media.cow file. */
				uint8_t *dst;
				long long ret;

				dst = storage_simple_cache(s->media_cow[cowlevel],
						pos & ~(STORAGE_BLOCKSIZE - 1));
				ret = storage_read_block(s, cowlevel - 1,
						dst, STORAGE_BLOCKSIZE,
						pos & ~(STORAGE_BLOCKSIZE - 1));
				assert(0 < ret && ret <= STORAGE_BLOCKSIZE);
			}
			cache[addr & (STORAGE_BLOCKSIZE - 1)] |= 1 << bit;
		}

		cache = storage_simple_cache(s->media_cow[cowlevel],
				pos & ~(STORAGE_BLOCKSIZE - 1));

		memcpy(cache + (pos & (STORAGE_BLOCKSIZE - 1)), buf, buflen);
	}

	return buflen;
}

int
storage_read(
	void *_s,
	void *_buf,
	unsigned int buflen,
	unsigned long long pos
)
{
	struct storage *s = _s;
	uint8_t *buf = (uint8_t *) _buf;
	unsigned int size;

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p %d 0x%llx\n", __FUNCTION__,
			s, buflen, pos);
#endif

	size = 0;
	while (0 < buflen) {
		unsigned int count;

		count = buflen;
		if (STORAGE_BLOCKSIZE - (pos & (STORAGE_BLOCKSIZE - 1)) < count) {
			count = STORAGE_BLOCKSIZE - (pos & (STORAGE_BLOCKSIZE - 1));
		}

		storage_read_block(s, storage_cow, buf, count, pos);

		pos += count;
		buf += count;
		buflen -= count;
		size += count;
	}

	return size;
}

int
storage_write(
	void *_s,
	const void *_buf,
	unsigned int buflen,
	unsigned long long pos
)
{
	struct storage *s = _s;
	const uint8_t *buf = (const uint8_t *) _buf;
	unsigned int size;

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p %d 0x%llx\n", __FUNCTION__,
			s, buflen, pos);
#endif

	size = 0;
	while (0 < buflen) {
		unsigned int count;

		count = buflen;
		if (STORAGE_BLOCKSIZE - (pos & (STORAGE_BLOCKSIZE - 1)) < count) {
			count = STORAGE_BLOCKSIZE - (pos & (STORAGE_BLOCKSIZE - 1));
		}

		storage_write_block(s, storage_cow, buf, count, pos);

		pos += count;
		buf += count;
		buflen -= count;
		size += count;
	}

	return size;
}

static int
xwrite(int fd, const unsigned char *buf, unsigned int buflen)
{
	unsigned int count;
	int ret;

	count = 0;
	while (0 < buflen) {
		ret = write(fd, buf, buflen);
		if (ret < 0) {
			return ret;
		}
		if (ret == 0) {
			break;
		}
		buf += ret;
		buflen -= ret;
		count += ret;
	}
	return count;
}

static int
storage_create_empty(const char *name, unsigned long long size)
{
	int fd;
	int ret;

	fd = open(name, O_WRONLY | O_CREAT, 0666);
	assert(0 <= fd);

	if (storage_sparse) {
		off_t off;
		uint8_t byte;

		if (0 < size) {
			off = lseek(fd, size - sizeof(byte), SEEK_SET);
			assert(off == size - sizeof(byte));

			ret = xwrite(fd, &byte, sizeof(byte));
			assert(ret == sizeof(byte));
		}

	} else {
#if defined(HAVE_POSIX_FALLOCATE)
		ret = 0;
		if (0 < size) {
			ret = posix_fallocate(fd, 0, size);
		}
#else
		while (0 < size) {
			static const uint8_t buffer[1024*1024];
			unsigned int count;

			if (size < sizeof(buffer)) {
				count = size;
			} else {
				count = sizeof(buffer);
			}

			ret = xwrite(fd, buffer, count);
			assert(0 < ret);
			assert(ret <= count);

			size -= ret;
		}
#endif
	}

	ret = fsync(fd);
	assert(0 <= ret);

	ret = close(fd);
	assert(0 <= ret);

	return 0;
}

void *
storage_create(
	const char *name,
	uint64_t size,
	const char *image,
	void *(*conv_open)(const char *, uint64_t),
	int (*conv_close)(void *),
	int64_t (*conv_read)(void *, void *, uint64_t, uint64_t)
)
{
	struct storage *s;
	unsigned int cow;
	int ret;

	s = shm_alloc(sizeof(*s));
	assert(s);

	/*
	 * Open converter.
	 */
	if (! image
	 || ! *image) {
		/* Use empty file. */
		conv_open = conv_zero_open;
		conv_close = conv_zero_close;
		conv_read = conv_zero_read;
	}

	s->conv = (*conv_open)(image, size);
	assert(s->conv);

	s->conv_close = conv_close;
	s->conv_read = conv_read;

	/*
	 * Open COW/MAP files.
	 */
	for (cow = 0; cow <= storage_cow; cow++) {
		char path_cow[1024];
		char path_map[1024];

		if (cow == 0) {
			sprintf(path_cow, "%s/%s.cow", basedir, name);
			sprintf(path_map, "%s/%s.map", basedir, name);
		} else {
			sprintf(path_cow, "%s/%s.cow%u", basedir, name, cow);
			sprintf(path_map, "%s/%s.map%u", basedir, name, cow);
		}

		/*
		 * Create files (if necessary).
		 */
		if ((storage_creat && cow == storage_cow)
		 || (access(path_cow, R_OK) < 0 && cow < storage_cow)
		 || (access(path_map, R_OK) < 0 && cow < storage_cow)
		 || (access(path_cow, R_OK | W_OK) < 0 && cow == storage_cow)
		 || (access(path_map, R_OK | W_OK) < 0 && cow == storage_cow)) {
			uint64_t map_size;

			time_stop();

			(void) unlink(path_cow);
			(void) unlink(path_map);

			/* Create *.cow file. */
			ret = storage_create_empty(path_cow, size);
			assert(0 <= ret);

			/* Create *.map file. */
			map_size = size;
			map_size += STORAGE_BLOCKSIZE - 1;
			map_size /= STORAGE_BLOCKSIZE;	/* # bits */
			map_size += 7;			/* round up */
			map_size /= 8;			/* # bytes */
			ret = storage_create_empty(path_map, map_size);
			assert(0 <= ret);

			time_cont();
		}

		/*
		 * Open files.
		 */
		s->media_cow[cow] = storage_simple_open(path_cow,
				cow == storage_cow);
		if (! s->media_cow[cow]) {
			assert(0); /* FIXME */
		}
		s->media_map[cow] = storage_simple_open(path_map,
				cow == storage_cow);
		if (! s->media_map[cow]) {
			assert(0); /* FIXME */
		}
	}

	return s;
}

int
storage_destroy(void *_s)
{
	struct storage *s = _s;
	int cow;
	int ret;

	ret = 0;

	/*
	 * Close COW/MAP files.
	 */
	for (cow = storage_cow; 0 <= cow; cow--) {
		ret |= storage_simple_close(s->media_map[cow]);
		ret |= storage_simple_close(s->media_cow[cow]);
	}

	/*
	 * Close converter.
	 */
	ret |= (s->conv_close)(s->conv);

	shm_free(s);

	return ret;
}

void
storage_usage(void)
{
	fprintf(stderr, "\t--sparse: Use sparse files for media images.\n");
	fprintf(stderr, "\t--create: Create new files even if they exist already.\n");
	fprintf(stderr, "\t--cow: Use copy-on-write for media images.\n");
}

void
storage_handle_args(int *argc, char **argv)
{
	const char *sp;
	unsigned int na;

	/* Get defaults. */
	storage_sparse = 0;
	storage_creat = 0;
	storage_cow = 0;

	/* Get parameters from environment. */
	sp = getenv("FAUM_SPARSE");
	if (sp) {
		storage_sparse = atoi(sp);
	}
	if (storage_sparse < 0 || 1 < storage_sparse) {
		fprintf(stderr, "%s: Bad FAUM_SPARSE setting.\n", progname);
		exit(1);
	}

	sp = getenv("FAUM_CREATE");
	if (sp) {
		storage_creat = atoi(sp);
	}
	if (storage_creat < 0 || 1 < storage_creat) {
		fprintf(stderr, "%s: Bad FAUM_CREATE setting.\n", progname);
		exit(1);
	}

	sp = getenv("FAUM_COW");
	if (sp) {
		storage_cow = atoi(sp);
	}
	if (storage_cow < 0 || STORAGE_COW_COUNT <= storage_cow) {
		fprintf(stderr, "%s: Bad FAUM_COW setting.\n", progname);
		exit(1);
	}

	/* Get parameters from command line. */
	na = *argc;
	while (0 < na) {
		assert(argv != NULL);
		if (strcmp(*argv, "--sparse") == 0) {
			storage_sparse = 1;
			/* remove this argument. */
			(*argc)--; na--;
			memmove(argv, argv + 1, (na + 1) * sizeof(char *));
			continue;
		}
		if (strcmp(*argv, "--create") == 0) {
			storage_creat = 1;
			/* remove this argument. */
			(*argc)--; na--;
			memmove(argv, argv + 1, (na + 1) * sizeof(char *));
			continue;
		}
		if (strncmp(*argv, "--cow", 5) == 0) {
			if (strncmp(*argv, "--cow=", 6) == 0) {
				storage_cow = atoi(*argv + 6);
			} else {
				storage_cow = 1;
			}
			if (storage_cow < 0 || STORAGE_COW_COUNT <= storage_cow) {
				fprintf(stderr, "%s: Bad --cow setting.\n",
						progname);
				exit(1);
			}
			/* remove this argument. */
			(*argc)--; na--;
			memmove(argv, argv + 1, (na + 1) * sizeof(char *));
			continue;
		}

		na--; argv++;
	}
}

void
storage_handle_simsetup(void)
{
	/* override command line parameters */
	if (simsetup.sparse != 0) {
		storage_sparse = simsetup.sparse;
	}
	if (simsetup.cow != 0) {
		storage_cow = simsetup.cow;
	}
	if (simsetup.create != 0) {
		storage_creat = simsetup.create;
	}
}
