/* See LICENSE file for copyright and license details. */
#include <limits.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

#include "../gen/types.h"
#include "../grapheme.h"
#include "util.h"

void
herodotus_reader_init(HERODOTUS_READER *r, enum herodotus_type type,
                      const void *src, size_t srclen)
{
	size_t i;

	r->type = type;
	r->src = src;
	r->srclen = srclen;
	r->off = 0;
	r->terminated_by_null = false;

	for (i = 0; i < LEN(r->soft_limit); i++) {
		r->soft_limit[i] = SIZE_MAX;
	}
}

void
herodotus_reader_copy(const HERODOTUS_READER *src, HERODOTUS_READER *dest)
{
	size_t i;

	/*
	 * we copy such that we have a "fresh" start and build on the
	 * fact that src->soft_limit[i] for any i and src->srclen are
	 * always larger or equal to src->off
	 */
	dest->type = src->type;
	if (src->type == HERODOTUS_TYPE_CODEPOINT) {
		dest->src = (src->src == NULL) ? NULL :
		            ((const uint_least32_t *)(src->src)) + src->off;
	} else { /* src->type == HERODOTUS_TYPE_UTF8 */
		dest->src = (src->src == NULL) ? NULL :
		            ((const char *)(src->src)) + src->off;
	}
	if (src->srclen == SIZE_MAX) {
		dest->srclen = SIZE_MAX;
	} else {
		dest->srclen = (src->off < src->srclen) ? src->srclen - src->off : 0;
	}
	dest->off = 0;
	dest->terminated_by_null = src->terminated_by_null;

	for (i = 0; i < LEN(src->soft_limit); i++) {
		if (src->soft_limit[i] == SIZE_MAX) {
			dest->soft_limit[i] = SIZE_MAX;
		} else {
			/*
			 * if we have a degenerate case where the offset is
			 * higher than the soft-limit, we simply clamp the
			 * soft-limit to zero given we can't decide here
			 * to release the limit and, instead, we just
			 * prevent any more reads
			 */
			dest->soft_limit[i] = (src->off < src->soft_limit[i]) ?
				src->soft_limit[i] - src->off : 0;
		}
	}
}

void
herodotus_reader_push_advance_limit(HERODOTUS_READER *r, size_t count)
{
	size_t i;

	for (i = LEN(r->soft_limit) - 1; i >= 1; i--) {
		r->soft_limit[i] = r->soft_limit[i - 1];
	}
	r->soft_limit[0] = r->off + count;
}

void
herodotus_reader_pop_limit(HERODOTUS_READER *r)
{
	size_t i;

	for (i = 0; i < LEN(r->soft_limit) - 1; i++) {
		r->soft_limit[i] = r->soft_limit[i + 1];
	}
	r->soft_limit[LEN(r->soft_limit) - 1] = SIZE_MAX;
}

size_t
herodotus_reader_next_word_break(const HERODOTUS_READER *r)
{
	if (r->type == HERODOTUS_TYPE_CODEPOINT) {
		return grapheme_next_word_break(
			(const uint_least32_t *)(r->src) + r->off,
			MIN(r->srclen, r->soft_limit[0]) - r->off);
	} else { /* r->type == HERODOTUS_TYPE_UTF8 */
		return grapheme_next_word_break_utf8(
			(const char *)(r->src) + r->off,
			MIN(r->srclen, r->soft_limit[0]) - r->off);
	}
}

size_t
herodotus_reader_next_codepoint_break(const HERODOTUS_READER *r)
{
	if (r->type == HERODOTUS_TYPE_CODEPOINT) {
		return (r->off < MIN(r->srclen, r->soft_limit[0])) ? 1 : 0;
	} else { /* r->type == HERODOTUS_TYPE_UTF8 */
		return grapheme_decode_utf8(
			(const char *)(r->src) + r->off,
			MIN(r->srclen, r->soft_limit[0]) - r->off, NULL);
	}
}

size_t
herodotus_reader_number_read(const HERODOTUS_READER *r)
{
	return r->off;
}

enum herodotus_status
herodotus_read_codepoint(HERODOTUS_READER *r, bool advance, uint_least32_t *cp)
{
	size_t ret;

	if (r->terminated_by_null || r->off >= r->srclen || r->src == NULL) {
		*cp = GRAPHEME_INVALID_CODEPOINT;
		return HERODOTUS_STATUS_END_OF_BUFFER;
	}

	if (r->off >= r->soft_limit[0]) {
		*cp = GRAPHEME_INVALID_CODEPOINT;
		return HERODOTUS_STATUS_SOFT_LIMIT_REACHED;
	}

	if (r->type == HERODOTUS_TYPE_CODEPOINT) {
		*cp = ((const uint_least32_t *)(r->src))[r->off];
		ret = 1;
	} else { /* r->type == HERODOTUS_TYPE_UTF8 */
		ret = grapheme_decode_utf8((const char *)r->src + r->off,
		                           MIN(r->srclen, r->soft_limit[0]) -
		                           r->off, cp);
	}

	if (unlikely(r->srclen == SIZE_MAX && *cp == 0)) {
		/*
		 * We encountered a null-codepoint. Don't increment
		 * offset and return as if the buffer had ended here all
		 * along
		 */
		r->terminated_by_null = true;
		return HERODOTUS_STATUS_END_OF_BUFFER;
	}

	if (r->off + ret > MIN(r->srclen, r->soft_limit[0])) {
		/*
		 * we want more than we have; instead of returning
		 * garbage we terminate here.
		 */
		return HERODOTUS_STATUS_END_OF_BUFFER;
	}

	/*
	 * Increase offset which we now know won't surpass the limits,
	 * unless we got told otherwise
	 */
	if (advance) {
		r->off += ret;
	}

	return HERODOTUS_STATUS_SUCCESS;
}

void
herodotus_writer_init(HERODOTUS_WRITER *w, enum herodotus_type type,
                      void *dest, size_t destlen)
{
	w->type = type;
	w->dest = dest;
	w->destlen = destlen;
	w->off = 0;
	w->first_unwritable_offset = SIZE_MAX;
}

void
herodotus_writer_nul_terminate(HERODOTUS_WRITER *w)
{
	if (w->dest == NULL) {
		return;
	}

	if (w->off < w->destlen) {
		/* We still have space in the buffer. Simply use it */
		if (w->type == HERODOTUS_TYPE_CODEPOINT) {
			((uint_least32_t *)(w->dest))[w->off] = 0;
		} else { /* w->type == HERODOTUS_TYPE_UTF8 */
			((char *)(w->dest))[w->off] = '\0';
		}
	} else if (w->first_unwritable_offset < w->destlen) {
		/*
		 * There is no more space in the buffer. However,
		 * we have noted down the first offset we couldn't
		 * use to write into the buffer and it's smaller than
		 * destlen. Thus we bailed writing into the
		 * destination when a multibyte-codepoint couldn't be
		 * written. So the last "real" byte might be at
		 * destlen-4, destlen-3, destlen-2 or destlen-1
		 * (the last case meaning truncation).
		 */
		if (w->type == HERODOTUS_TYPE_CODEPOINT) {
			((uint_least32_t *)(w->dest))
				[w->first_unwritable_offset] = 0;
		} else { /* w->type == HERODOTUS_TYPE_UTF8 */
			((char *)(w->dest))[w->first_unwritable_offset] = '\0';
		}
	} else if (w->destlen > 0) {
		/*
		 * In this case, there is no more space in the buffer and
		 * the last unwritable offset is larger than
		 * or equal to the destination buffer length. This means
		 * that we are forced to simply write into the last
		 * byte.
		 */
		if (w->type == HERODOTUS_TYPE_CODEPOINT) {
			((uint_least32_t *)(w->dest))
				[w->destlen - 1] = 0;
		} else { /* w->type == HERODOTUS_TYPE_UTF8 */
			((char *)(w->dest))[w->destlen - 1] = '\0';
		}
	}

	/* w->off is not incremented in any case */
}

size_t
herodotus_writer_number_written(const HERODOTUS_WRITER *w)
{
	return w->off;
}

void
herodotus_write_codepoint(HERODOTUS_WRITER *w, uint_least32_t cp)
{
	size_t ret;

	/*
	 * This function will always faithfully say how many codepoints
	 * were written, even if the buffer ends. This is used to enable
	 * truncation detection.
	 */
	if (w->type == HERODOTUS_TYPE_CODEPOINT) {
		if (w->dest != NULL && w->off < w->destlen) {
			((uint_least32_t *)(w->dest))[w->off] = cp;
		}

		w->off += 1;
	} else { /* w->type == HERODOTUS_TYPE_UTF8 */
		/*
		 * First determine how many bytes we need to encode the
		 * codepoint
		 */
		ret = grapheme_encode_utf8(cp, NULL, 0);

		if (w->dest != NULL && w->off + ret < w->destlen) {
			/* we still have enough room in the buffer */
			grapheme_encode_utf8(cp, (char *)(w->dest) +
			                     w->off, w->destlen - w->off);
		} else if (w->first_unwritable_offset == SIZE_MAX) {
			/*
			 * the first unwritable offset has not been
			 * noted down, so this is the first time we can't
			 * write (completely) to an offset
			 */
			w->first_unwritable_offset = w->off;
		}

		w->off += ret;
	}
}

void
proper_init(const HERODOTUS_READER *r, void *state, uint_least8_t no_prop,
            uint_least8_t (*get_break_prop)(uint_least32_t),
            bool (*is_skippable_prop)(uint_least8_t),
            void (*skip_shift_callback)(uint_least8_t, void *),
            struct proper *p)
{
	uint_least8_t prop;
	uint_least32_t cp;
	size_t i;

	/* set internal variables */
	p->state = state;
	p->no_prop = no_prop;
	p->get_break_prop = get_break_prop;
	p->is_skippable_prop = is_skippable_prop;
	p->skip_shift_callback = skip_shift_callback;

	/*
	 * Initialize mid-reader, which is basically just there
	 * to reflect the current position of the viewing-line
	 */
	herodotus_reader_copy(r, &(p->mid_reader));

	/*
	 * In the initialization, we simply (try to) fill in next_prop.
	 * If we cannot read in more (due to the buffer ending), we
	 * fill in the prop as invalid
	 */

	/*
	 * initialize the previous properties to have no property
	 * (given we are at the start of the buffer)
	 */
	p->raw.prev_prop[1] = p->raw.prev_prop[0] = p->no_prop;
	p->skip.prev_prop[1] = p->skip.prev_prop[0] = p->no_prop;

	/*
	 * initialize the next properties
	 */

	/* initialize the raw reader */
	herodotus_reader_copy(r, &(p->raw_reader));

	/* fill in the two next raw properties (after no-initialization) */
	p->raw.next_prop[0] = p->raw.next_prop[1] = p->no_prop;
	for (i = 0; i < 2 && herodotus_read_codepoint(&(p->raw_reader), true, &cp) ==
	     HERODOTUS_STATUS_SUCCESS; ) {
		p->raw.next_prop[i++] = p->get_break_prop(cp);
	}

	/* initialize the skip reader */
	herodotus_reader_copy(r, &(p->skip_reader));

	/* fill in the two next skip properties (after no-initialization) */
	p->skip.next_prop[0] = p->skip.next_prop[1] = p->no_prop;
	for (i = 0; i < 2 && herodotus_read_codepoint(&(p->skip_reader), true, &cp) ==
	     HERODOTUS_STATUS_SUCCESS; ) {
		prop = p->get_break_prop(cp);
		if (!p->is_skippable_prop(prop)) {
			p->skip.next_prop[i++] = prop;
		}
	}
}

int
proper_advance(struct proper *p)
{
	uint_least8_t prop;
	uint_least32_t cp;

	/* read in next "raw" property */
	if (herodotus_read_codepoint(&(p->raw_reader), true, &cp) ==
	    HERODOTUS_STATUS_SUCCESS) {
		prop = p->get_break_prop(cp);
	} else {
		prop = p->no_prop;
	}

	/*
	 * do a shift-in, unless we find that the property that is to
	 * be moved past the "raw-viewing-line" (this property is stored
	 * in p->raw.next_prop[0]) is a no_prop, indicating that
	 * we are at the end of the buffer.
	 */
	if (p->raw.next_prop[0] == p->no_prop) {
		return 1;
	}

	/* shift in the properties */
	p->raw.prev_prop[1] = p->raw.prev_prop[0];
	p->raw.prev_prop[0] = p->raw.next_prop[0];
	p->raw.next_prop[0] = p->raw.next_prop[1];
	p->raw.next_prop[1] = prop;

	/* advance the middle reader viewing-line */
	(void)herodotus_read_codepoint(&(p->mid_reader), true, &cp);

	/* check skippability-property */
	if (!p->is_skippable_prop(p->raw.prev_prop[0])) {
		/*
		 * the property that has moved past the "raw-viewing-line"
		 * (this property is now (after the raw-shift) stored in
		 * p->raw.prev_prop[0] and guaranteed not to be a no-prop,
		 * guaranteeing that we won't shift a no-prop past the
		 * "viewing-line" in the skip-properties) is not a skippable
		 * property, thus we need to shift the skip property as well.
		 */
		p->skip.prev_prop[1] = p->skip.prev_prop[0];
		p->skip.prev_prop[0] = p->skip.next_prop[0];
		p->skip.next_prop[0] = p->skip.next_prop[1];

		/*
		 * call the skip-shift-callback on the property that
		 * passed the skip-viewing-line (this property is now
		 * stored in p->skip.prev_prop[0]).
		 */
		p->skip_shift_callback(p->skip.prev_prop[0], p->state);

		/* determine the next shift property */
		p->skip.next_prop[1] = p->no_prop;
		while (herodotus_read_codepoint(&(p->skip_reader), true, &cp) ==
		       HERODOTUS_STATUS_SUCCESS) {
			prop = p->get_break_prop(cp);
			if (!p->is_skippable_prop(prop)) {
				p->skip.next_prop[1] = prop;
				break;
			}
		}
	}

	return 0;
}