From 01309c8bd0a9ac4476952ec5063499ec980a7b12 Mon Sep 17 00:00:00 2001
From: erihel <erihel@gmail.com>
Date: Thu, 11 Apr 2013 13:37:15 +0200
Subject: * Added clipboard support (issue #60) * Fixed keyboard shortcuts
 while code editing in game

---
 lib/clipboard/src/clipboardWin32.c | 160 +++++++++++
 lib/clipboard/src/clipboardX11.c   | 293 ++++++++++++++++++++
 lib/clipboard/src/utf.c            | 530 +++++++++++++++++++++++++++++++++++++
 3 files changed, 983 insertions(+)
 create mode 100644 lib/clipboard/src/clipboardWin32.c
 create mode 100644 lib/clipboard/src/clipboardX11.c
 create mode 100644 lib/clipboard/src/utf.c

(limited to 'lib/clipboard/src')

diff --git a/lib/clipboard/src/clipboardWin32.c b/lib/clipboard/src/clipboardWin32.c
new file mode 100644
index 0000000..be48906
--- /dev/null
+++ b/lib/clipboard/src/clipboardWin32.c
@@ -0,0 +1,160 @@
+/*
+	This file is part of Warzone 2100.
+	Copyright (C) 2008  Freddie Witherden
+	Copyright (C) 2008-2009  Warzone Resurrection Project
+
+	Warzone 2100 is free software; you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation; either version 2 of the License, or
+	(at your option) any later version.
+
+	Warzone 2100 is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with Warzone 2100; if not, write to the Free Software
+	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#include "utf.h"
+// Defines most macros and types from <stdbool.h> and <stdint.h>
+#include "types.h"
+
+char *widgetGetClipboardText()
+{
+	uint16_t *clipboardText;
+	char *ourText = NULL;
+
+	// If there is any text on the clipboard, open it
+	if (IsClipboardFormatAvailable(CF_UNICODETEXT) && OpenClipboard(NULL))
+	{
+		// Get any text on the clipboard
+		HANDLE hClipboardData = GetClipboardData(CF_UNICODETEXT);
+
+		// If the handle is valid, fetch the text
+		if (hClipboardData)
+		{
+			// Get the text
+			clipboardText = GlobalLock(hClipboardData);
+
+			// So long as we got something
+			if (clipboardText)
+			{
+				int i, j;
+
+				// Convert it to UTF-8 (from UTF-16)
+				ourText = UTF16toUTF8(clipboardText, NULL);
+
+				// Unlock the text
+				GlobalUnlock(hClipboardData);
+
+				// Strip any '\r' from the text
+				for (i = j = 0; ourText[i]; i++)
+				{
+					if (ourText[i] != '\r')
+					{
+						ourText[j++] = ourText[i];
+					}
+				}
+
+				// NUL terminate
+				ourText[j] = '\0';
+			}
+		}
+
+		// Close the clipboard
+		CloseClipboard();
+	}
+
+	return ourText;
+}
+
+bool widgetSetClipboardText(const char *text)
+{
+	bool ret = false;
+
+	// Copy of text with \n => \r\n
+	char *newText;
+
+	// UTF-16 version of newText
+	uint16_t *utf16NewText;
+
+	// Number of bytes utf16NewText is in size
+	size_t nbytes;
+
+	int count, i, j;
+
+	// Get the number of '\n' characters in the text
+	for (i = count = 0; text[i]; i++)
+	{
+		if (text[i] == '\n')
+		{
+			count++;
+		}
+	}
+
+	// Allocate enough space for the \r\n string
+	newText = malloc(strlen(text) + count + 1);
+
+	// Copy the string, converting \n to \r\n
+	for (i = j = 0; text[i]; i++, j++)
+	{
+		// If the character is a newline prepend a \r
+		if (text[i] == '\n')
+		{
+			newText[j++] = '\r';
+		}
+
+		// Copy the character (\n or otherwise)
+		newText[j] = text[i];
+	}
+
+	// NUL terminate
+	newText[j] = '\0';
+
+	// Convert to UTF-16
+	utf16NewText = UTF8toUTF16(newText, &nbytes);
+
+	// Open the clipboard
+	if (OpenClipboard(NULL))
+	{
+		HGLOBAL hGlobal;
+		uint16_t *clipboardText;
+
+		// Empty it (which also transfers ownership of it to ourself)
+		EmptyClipboard();
+
+		// Allocate global space for the text
+		hGlobal = GlobalAlloc(GMEM_MOVEABLE, nbytes);
+
+		// Lock the newly allocated memory
+		clipboardText = GlobalLock(hGlobal);
+
+		// Copy the text
+		memcpy(clipboardText, utf16NewText, nbytes);
+
+		// Unlock the memory (must come before CloseClipboard())
+		GlobalUnlock(hGlobal);
+
+		// Place the handle on the clipboard
+		if (SetClipboardData(CF_UNICODETEXT, hGlobal))
+		{
+			// We were successful
+			ret = true;
+		}
+
+		// Close the clipboard
+		CloseClipboard();
+	}
+
+	// Release the malloc-ed strings
+	free(newText);
+	free(utf16NewText);
+
+	return ret;
+}
diff --git a/lib/clipboard/src/clipboardX11.c b/lib/clipboard/src/clipboardX11.c
new file mode 100644
index 0000000..0653250
--- /dev/null
+++ b/lib/clipboard/src/clipboardX11.c
@@ -0,0 +1,293 @@
+/*
+	This file is part of Warzone 2100.
+	Copyright (C) 2008  Freddie Witherden
+	Copyright (C) 2008-2009  Warzone Resurrection Project
+
+	Warzone 2100 is free software; you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation; either version 2 of the License, or
+	(at your option) any later version.
+
+	Warzone 2100 is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with Warzone 2100; if not, write to the Free Software
+	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+/*
+ * Something wicked this way comes...
+ * Documentation/reference:
+ *  http://svr-www.eng.cam.ac.uk/~er258/code/dist/x_clipboard/paste.cc
+ */
+
+#include <stdbool.h>
+#include <assert.h>
+
+#include <SDL_syswm.h>
+#include <SDL.h>
+
+static SDL_SysWMinfo info;
+
+// Atoms
+static Atom XA_CLIPBOARD;
+static Atom XA_COMPOUND_TEXT;
+static Atom XA_UTF8_STRING;
+static Atom XA_TARGETS;
+
+/**
+ * Filters through SDL_Events searching for clipboard requests from the X
+ * server.
+ *
+ * @param evt   The event to filter.
+ */
+static int widgetClipboardFilterX11(const SDL_Event *evt)
+{
+	// We are only interested in window manager events
+	if (evt->type == SDL_SYSWMEVENT)
+	{
+		XEvent xevent = evt->syswm.msg->event.xevent;
+
+		// See if the event is a selection/clipboard request
+		if (xevent.type == SelectionRequest)
+		{
+			// Get the request in question
+			XSelectionRequestEvent *request = &xevent.xselectionrequest;
+
+			// Generate a reply to the selection request
+			XSelectionEvent reply;
+
+			reply.type = SelectionNotify;
+			reply.serial = xevent.xany.send_event;
+			reply.send_event = True;
+			reply.display = info.info.x11.display;
+			reply.requestor = request->requestor;
+			reply.selection = request->selection;
+			reply.property = request->property;
+			reply.target = None;
+			reply.time = request->time;
+
+			// They want to know what we can provide/offer
+			if (request->target == XA_TARGETS)
+			{
+				Atom possibleTargets[] =
+				{
+					XA_STRING,
+					XA_UTF8_STRING,
+					XA_COMPOUND_TEXT
+				};
+
+				XChangeProperty(info.info.x11.display, request->requestor,
+				                request->property, XA_ATOM, 32, PropModeReplace,
+				                (unsigned char *) possibleTargets, 3);
+			}
+			// They want a string (all we can provide)
+			else if (request->target == XA_STRING
+			      || request->target == XA_UTF8_STRING
+			      || request->target == XA_COMPOUND_TEXT)
+			{
+				int len;
+				char *xdata = XFetchBytes(info.info.x11.display, &len);
+
+				XChangeProperty(info.info.x11.display, request->requestor,
+				                request->property, request->target, 8,
+				                PropModeReplace, (unsigned char *) xdata,
+				                len);
+				XFree(xdata);
+			}
+			else
+			{
+				// Did not have what they wanted, so no property set
+				reply.property = None;
+			}
+
+			// Dispatch the event
+			XSendEvent(request->display, request->requestor, 0, NoEventMask,
+			           (XEvent *) &reply);
+			XSync(info.info.x11.display, False);
+		}
+	}
+
+	return 1;
+}
+
+static void widgetInitialiseClipboardX11()
+{
+	static bool initialised = false;
+
+	if (!initialised)
+	{
+		// Get the window manager information
+		SDL_GetWMInfo(&info);
+
+		// Ensure we're running under X11
+		assert(info.subsystem == SDL_SYSWM_X11);
+
+		// Register the event filter
+		SDL_EventState(SDL_SYSWMEVENT, SDL_ENABLE);
+		SDL_SetEventFilter(widgetClipboardFilterX11);
+
+		// Lock the connection to the X server
+		info.info.x11.lock_func();
+
+		// Get the clipboard atom (it is not defined by default)
+		XA_CLIPBOARD = XInternAtom(info.info.x11.display, "CLIPBOARD", True);
+
+		// Get the compound text type atom
+		XA_COMPOUND_TEXT = XInternAtom(info.info.x11.display, "COMPOUND_TEXT",
+		                               True);
+
+		// UTF-8 string atom
+		XA_UTF8_STRING = XInternAtom(info.info.x11.display, "UTF8_STRING",
+		                             True);
+
+		// TARGETS atom
+		XA_TARGETS = XInternAtom(info.info.x11.display, "TARGETS", True);
+
+		// Unlock the connection
+		info.info.x11.unlock_func();
+
+		// We are initialised
+		initialised = true;
+	}
+}
+
+char *widgetGetClipboardText()
+{
+	char *text = NULL;
+	unsigned char *data = NULL;
+	Atom type;
+	int format, result;
+	unsigned long len, bytesLeft, dummy;
+	Window selectionOwner;
+
+	// Make sure we are initialised
+	widgetInitialiseClipboardX11();
+
+	// Lock the connection
+	info.info.x11.lock_func();
+
+	// Get the owner of the clipboard selection
+	selectionOwner = XGetSelectionOwner(info.info.x11.display, XA_CLIPBOARD);
+
+	// If there is a selection (and therefore owner) fetch it
+	if (selectionOwner != None)
+	{
+		SDL_Event event;
+		bool response = false;
+
+		/*
+		 * Ask the window whom current owns the clipboard to convert it to an
+		 * XA_UTF8_STRING and place it into the XA_CLIPBOARD property of our
+		 * window.
+		 */
+		XConvertSelection(info.info.x11.display, XA_CLIPBOARD, XA_UTF8_STRING,
+		                  XA_CLIPBOARD, info.info.x11.window, CurrentTime);
+		XFlush(info.info.x11.display);
+
+		/*
+		 * We now need to wait for a response from the window that owns the
+		 * clipboard.
+		 */
+
+		// Unlock the connection so that the SDL event loop may function
+		info.info.x11.unlock_func();
+
+		while (!response)
+		{
+			// Wait for an event
+			SDL_WaitEvent(&event);
+
+			// If the event is a window manager event
+			if (event.type == SDL_SYSWMEVENT)
+			{
+				XEvent xevent = event.syswm.msg->event.xevent;
+
+				// See if it is a response to our request
+				if (xevent.type == SelectionNotify
+				 && xevent.xselection.requestor == info.info.x11.window)
+				{
+					response = true;
+				}
+			}
+		}
+
+		// Lock the connection once again
+		info.info.x11.lock_func();
+
+		// See how much data is there
+		XGetWindowProperty(info.info.x11.display, info.info.x11.window,
+		                   XA_CLIPBOARD, 0, 0, False, AnyPropertyType, &type,
+		                   &format, &len, &bytesLeft, &data);
+
+		// If any 0-length data was returned, free it
+		if (data)
+		{
+			XFree(data);
+			data = NULL;
+		}
+
+		// If there is any data
+		if (bytesLeft)
+		{
+			// Fetch the data
+			result = XGetWindowProperty(info.info.x11.display,
+			                            info.info.x11.window, XA_CLIPBOARD, 0,
+			                            bytesLeft, False, AnyPropertyType,
+			                            &type, &format, &len, &dummy, &data);
+
+			// If we got some data, duplicate it
+			if (result == Success)
+			{
+				text = strdup((char *) data);
+				XFree(data);
+			}
+		}
+
+		// Delete the property now that we are finished with it
+		XDeleteProperty(info.info.x11.display, info.info.x11.window,
+		                XA_CLIPBOARD);
+	}
+
+	// Unlock the connection
+	info.info.x11.unlock_func();
+
+	return text;
+}
+
+bool widgetSetClipboardText(const char *text)
+{
+	Window selectionOwner;
+
+	// Make sure we are initialised
+	widgetInitialiseClipboardX11();
+
+	// Lock the connection
+	info.info.x11.lock_func();
+
+	// Copy the text into the root windows cut buffer (for Xterm compatibility)
+	XStoreBytes(info.info.x11.display, text, strlen(text) + 1);
+
+	// Set ourself as the owner of the CLIPBOARD atom
+	XSetSelectionOwner(info.info.x11.display, XA_CLIPBOARD,
+	                   info.info.x11.window, CurrentTime);
+
+	// Check if we acquired ownership or not
+	selectionOwner = XGetSelectionOwner(info.info.x11.display, XA_CLIPBOARD);
+
+	// We got ownership
+	if (selectionOwner == info.info.x11.window)
+	{
+		info.info.x11.unlock_func();
+		return true;
+	}
+	// We did not get ownership
+	else
+	{
+		info.info.x11.unlock_func();
+		return false;
+	}
+}
diff --git a/lib/clipboard/src/utf.c b/lib/clipboard/src/utf.c
new file mode 100644
index 0000000..2313bb6
--- /dev/null
+++ b/lib/clipboard/src/utf.c
@@ -0,0 +1,530 @@
+/*
+	This file is part of Warzone 2100.
+	Copyright (C) 2007  Giel van Schijndel
+	Copyright (C) 2007-2009  Warzone Resurrection Project
+
+	Warzone 2100 is free software; you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation; either version 2 of the License, or
+	(at your option) any later version.
+
+	Warzone 2100 is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with Warzone 2100; if not, write to the Free Software
+	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+	$Revision: 9101 $
+	$Id: utf.c 9101 2010-01-10 13:11:55Z zarelsl $
+	$HeadURL: https://warzone2100.svn.sourceforge.net/svnroot/warzone2100/trunk/lib/framework/utf.c $
+*/
+
+/** \file
+ *  Functions to convert between different Unicode Transformation Formats (UTF for short)
+ */
+
+#include "utf.h"
+#include <assert.h>
+#include <stdlib.h>
+
+#if defined(LIB_COMPILE)
+# define ASSERT(expr, ...) (assert(expr))
+# define debug(part, ...) ((void)0)
+#else
+# include "debug.h"
+#endif
+
+// Assert that non-starting octets are of the form 10xxxxxx
+#define ASSERT_NON_START_OCTET(octet) \
+	assert((octet & 0xC0) == 0x80 && "invalid non-start UTF-8 octet")
+
+// Assert that starting octets are either of the form 0xxxxxxx (ASCII) or 11xxxxxx
+#define ASSERT_START_OCTECT(octet) \
+	assert((octet & 0x80) == 0x00 || (octet & 0xC0) == 0xC0 || !"invalid starting UTF-8 octet")
+
+// Assert that hexadect (16bit sequence) 1 of UTF-16 surrogate pair sequences are of the form 110110XXXXXXXXXX
+#define ASSERT_START_HEXADECT(hexadect) \
+	assert(((hexadect) & 0xD800) == 0xD800 && "invalid first UTF-16 hexadect")
+
+// Assert that hexadect (16bit sequence) 2 of UTF-16 surrogate pair sequences are of the form 110111XXXXXXXXXX
+#define ASSERT_FINAL_HEXADECT(hexadect) \
+	assert(((hexadect) & 0xDC00) == 0xDC00 && "invalid first UTF-16 hexadect")
+
+utf_32_char UTF8DecodeChar(const char *utf8_char, const char **next_char)
+{
+	utf_32_char decoded = '\0';
+	*next_char = utf8_char;
+
+	ASSERT_START_OCTECT(*utf8_char);
+
+	// first octect: 0xxxxxxx: 7 bit (ASCII)
+	if      ((*utf8_char & 0x80) == 0x00)
+	{
+		// 1 byte long encoding
+		decoded = *((*next_char)++);
+	}
+	// first octect: 110xxxxx: 11 bit
+	else if ((*utf8_char & 0xe0) == 0xc0)
+	{
+		// 2 byte long encoding
+		ASSERT_NON_START_OCTET(utf8_char[1]);
+
+		decoded  = (*((*next_char)++) & 0x1f) << 6;
+		decoded |= (*((*next_char)++) & 0x3f) << 0;
+	}
+	// first octect: 1110xxxx: 16 bit
+	else if ((*utf8_char & 0xf0) == 0xe0)
+	{
+		// 3 byte long encoding
+		ASSERT_NON_START_OCTET(utf8_char[1]);
+		ASSERT_NON_START_OCTET(utf8_char[2]);
+
+		decoded  = (*((*next_char)++) & 0x0f) << 12;
+		decoded |= (*((*next_char)++) & 0x3f) << 6;
+		decoded |= (*((*next_char)++) & 0x3f) << 0;
+	}
+	// first octect: 11110xxx: 21 bit
+	else if ((*utf8_char & 0xf8) == 0xf0)
+	{
+		// 4 byte long encoding
+		ASSERT_NON_START_OCTET(utf8_char[1]);
+		ASSERT_NON_START_OCTET(utf8_char[2]);
+		ASSERT_NON_START_OCTET(utf8_char[3]);
+
+		decoded  = (*((*next_char)++) & 0x07) << 18;
+		decoded |= (*((*next_char)++) & 0x3f) << 12;
+		decoded |= (*((*next_char)++) & 0x3f) << 6;
+		decoded |= (*((*next_char)++) & 0x3f) << 0;
+	}
+	else
+	{
+		// apparently this character uses more than 21 bit
+		// this decoder is not developed to cope with those
+		// characters so error out
+		ASSERT(!"out-of-range UTF-8 character", "this UTF-8 character is too large (> 21bits) for this UTF-8 decoder and too large to be a valid Unicode codepoint");
+	}
+
+	return decoded;
+}
+
+size_t UTF8CharacterCount(const char *utf8_string)
+{
+	size_t length = 0;
+
+	while (*utf8_string != '\0')
+	{
+		UTF8DecodeChar(utf8_string, &utf8_string);
+
+		++length;
+	}
+
+	return length;
+}
+
+size_t UTF16CharacterCount(const uint16_t *utf16)
+{
+	size_t length = 0;
+
+	while (*utf16)
+	{
+		UTF16DecodeChar(utf16, &utf16);
+
+		++length;
+	}
+
+	return length;
+}
+
+static size_t unicode_utf8_char_length(const utf_32_char unicode_char)
+{
+	// an ASCII character, which uses 7 bit at most, which is one byte in UTF-8
+	if      (unicode_char < 0x00000080)
+		return 1; // stores 7 bits
+	else if (unicode_char < 0x00000800)
+		return 2; // stores 11 bits
+	else if (unicode_char < 0x00010000)
+		return 3; // stores 16 bits
+	/* This encoder can deal with < 0x00200000, but Unicode only ranges
+	 * from 0x0 to 0x10FFFF. Thus we don't accept anything else.
+	 */
+	else if (unicode_char < 0x00110000)
+		return 4; // stores 21 bits
+
+	/* Apparently this character lies outside the 0x0 - 0x10FFFF
+	 * Unicode range, so don't accept it.
+	 */
+	ASSERT(!"out-of-range Unicode codepoint", "This Unicode codepoint is too large (%u > 0x10FFFF) to be a valid Unicode codepoint", (unsigned int)unicode_char);
+
+	// Dummy value to prevent warnings about missing return from function
+	return 0;
+}
+
+char *UTF8CharacterAtOffset(const char *utf8_string, size_t index)
+{
+	while (*utf8_string != '\0'
+	    && index != 0)
+	{
+		// Move to the next character
+		UTF8DecodeChar(utf8_string, &utf8_string);
+
+		--index;
+	}
+
+	if (*utf8_string == '\0')
+		return NULL;
+
+	return (char*)utf8_string;
+}
+
+/** Encodes a single Unicode character to a UTF-8 encoded string.
+ *
+ *  \param unicode_char A UTF-32 encoded Unicode codepoint that will be encoded
+ *                      into UTF-8. This should be a valid Unicode codepoint
+ *                      (i.e. ranging from 0x0 to 0x10FFFF inclusive).
+ *  \param out_char     Points to the position in a buffer where the UTF-8
+ *                      encoded character can be stored.
+ *
+ *  \return A pointer pointing to the first byte <em>after</em> the encoded
+ *          UTF-8 sequence. This can be used as the \c out_char parameter for a
+ *          next invocation of encode_utf8_char().
+ */
+static char *encode_utf8_char(const utf_32_char unicode_char, char *out_char)
+{
+	char *next_char = out_char;
+
+	// 7 bits
+	if      (unicode_char < 0x00000080)
+	{
+		*(next_char++) = unicode_char;
+	}
+	// 11 bits
+	else if (unicode_char < 0x00000800)
+	{
+		// 0xc0 provides the counting bits: 110
+		// then append the 5 most significant bits
+		*(next_char++) = 0xc0 | (unicode_char >> 6);
+		// Put the next 6 bits in a byte of their own
+		*(next_char++) = 0x80 | (unicode_char & 0x3f);
+	}
+	// 16 bits
+	else if (unicode_char < 0x00010000)
+	{
+		// 0xe0 provides the counting bits: 1110
+		// then append the 4 most significant bits
+		*(next_char++) = 0xe0 | (unicode_char >> 12);
+		// Put the next 12 bits in two bytes of their own
+		*(next_char++) = 0x80 | ((unicode_char >> 6) & 0x3f);
+		*(next_char++) = 0x80 | (unicode_char & 0x3f);
+	}
+	// 21 bits
+	/* This encoder can deal with < 0x00200000, but Unicode only ranges
+	 * from 0x0 to 0x10FFFF. Thus we don't accept anything else.
+	 */
+	else if (unicode_char < 0x00110000)
+	{
+		// 0xf0 provides the counting bits: 11110
+		// then append the 3 most significant bits
+		*(next_char++) = 0xf0 | (unicode_char >> 18);
+		// Put the next 18 bits in three bytes of their own
+		*(next_char++) = 0x80 | ((unicode_char >> 12) & 0x3f);
+		*(next_char++) = 0x80 | ((unicode_char >> 6) & 0x3f);
+		*(next_char++) = 0x80 | (unicode_char & 0x3f);
+	}
+	else
+	{
+		/* Apparently this character lies outside the 0x0 - 0x10FFFF
+		 * Unicode range, so don't accept it.
+		 */
+		ASSERT(!"out-of-range Unicode codepoint", "This Unicode codepoint is too large (%u > 0x10FFFF) to be a valid Unicode codepoint", (unsigned int)unicode_char);
+	}
+
+	return next_char;
+}
+
+utf_32_char UTF16DecodeChar(const utf_16_char *utf16_char, const utf_16_char **next_char)
+{
+	utf_32_char decoded;
+	*next_char = utf16_char;
+
+	// Are we dealing with a surrogate pair
+	if (*utf16_char >= 0xD800
+	 && *utf16_char <= 0xDFFF)
+	{
+		ASSERT_START_HEXADECT(utf16_char[0]);
+		ASSERT_FINAL_HEXADECT(utf16_char[1]);
+
+		decoded  = (*((*next_char)++) & 0x3ff) << 10;
+		decoded |= *((*next_char)++) & 0x3ff;
+
+		decoded += 0x10000;
+	}
+	// Not a surrogate pair, so it's a valid Unicode codepoint right away
+	else
+	{
+		decoded = *((*next_char)++);
+	}
+
+	return decoded;
+}
+
+/** Encodes a single Unicode character to a UTF-16 encoded string.
+ *
+ *  \param unicode_char A UTF-32 encoded Unicode codepoint that will be encoded
+ *                      into UTF-16. This should be a valid Unicode codepoint
+ *                      (i.e. ranging from 0x0 to 0x10FFFF inclusive).
+ *  \param out_char     Points to the position in a buffer where the UTF-16
+ *                      encoded character can be stored.
+ *
+ *  \return A pointer pointing to the first byte <em>after</em> the encoded
+ *          UTF-16 sequence. This can be used as the \c out_char parameter for a
+ *          next invocation of encode_utf16_char().
+ */
+static utf_16_char *encode_utf16_char(const utf_32_char unicode_char, utf_16_char *out_char)
+{
+	utf_16_char *next_char = out_char;
+
+	// 16 bits
+	if      (unicode_char < 0x10000)
+	{
+		*(next_char++) = unicode_char;
+	}
+	else if (unicode_char < 0x110000)
+	{
+		const utf_16_char v = unicode_char - 0x10000;
+
+		*(next_char++) = 0xD800 | (v >> 10);
+		*(next_char++) = 0xDC00 | (v & 0x3ff);
+
+		ASSERT_START_HEXADECT(out_char[0]);
+		ASSERT_FINAL_HEXADECT(out_char[1]);
+	}
+	else
+	{
+		/* Apparently this character lies outside the 0x0 - 0x10FFFF
+		 * Unicode range, and UTF-16 cannot cope with that, so error
+		 * out.
+		 */
+		ASSERT(!"out-of-range Unicode codepoint", "This Unicode codepoint is too large (%u > 0x10FFFF) to be a valid Unicode codepoint", (unsigned int)unicode_char);
+	}
+
+	return next_char;
+}
+
+static size_t utf16_utf8_buffer_length(const utf_16_char* unicode_string)
+{
+	const utf_16_char* curChar = unicode_string;
+
+	// Determine length of string (in octets) when encoded in UTF-8
+	size_t length = 0;
+
+	while (*curChar)
+	{
+		length += unicode_utf8_char_length(UTF16DecodeChar(curChar, &curChar));
+	}
+
+	return length;
+}
+
+char *UTF16toUTF8(const utf_16_char *unicode_string, size_t *nbytes)
+{
+	const utf_16_char* curChar;
+
+	const size_t utf8_length = utf16_utf8_buffer_length(unicode_string);
+
+	// Allocate memory to hold the UTF-8 encoded string (plus a terminating nul char)
+	char* utf8_string = malloc(utf8_length + 1);
+	char* curOutPos = utf8_string;
+
+	if (utf8_string == NULL)
+	{
+		debug(LOG_ERROR, "Out of memory");
+		return NULL;
+	}
+
+	curChar = unicode_string;
+	while (*curChar)
+	{
+		curOutPos = encode_utf8_char(UTF16DecodeChar(curChar, &curChar), curOutPos);
+	}
+
+	// Terminate the string with a nul character
+	utf8_string[utf8_length] = '\0';
+
+	// Set the number of bytes allocated
+	if (nbytes)
+	{
+		*nbytes = utf8_length + 1;
+	}
+
+	return utf8_string;
+}
+
+static size_t utf8_as_utf16_buf_size(const char* utf8_string)
+{
+	const char* curChar = utf8_string;
+
+	size_t length = 0;
+	while (*curChar != '\0')
+	{
+		const utf_32_char unicode_char = UTF8DecodeChar(curChar, &curChar);
+
+		if      (unicode_char < 0x10000)
+		{
+			length += 1;
+		}
+		else if (unicode_char < 0x110000)
+		{
+			length += 2;
+		}
+		else
+		{
+			/* Apparently this character lies outside the 0x0 - 0x10FFFF
+			 * Unicode range, and UTF-16 cannot cope with that, so error
+			 * out.
+			 */
+			ASSERT(!"out-of-range Unicode codepoint", "This Unicode codepoint too large (%u > 0x10FFFF) for the UTF-16 encoding", (unsigned int)unicode_char);
+		}
+	}
+
+	return length;
+}
+
+utf_16_char *UTF8toUTF16(const char* utf8_string, size_t *nbytes)
+{
+	const char* curChar = utf8_string;
+	const size_t unicode_length = utf8_as_utf16_buf_size(utf8_string);
+
+	// Allocate memory to hold the UTF-16 encoded string (plus a terminating nul)
+	utf_16_char* unicode_string = malloc(sizeof(utf_16_char) * (unicode_length + 1));
+	utf_16_char* curOutPos = unicode_string;
+
+	if (unicode_string == NULL)
+	{
+		debug(LOG_ERROR, "Out of memory");
+		return NULL;
+	}
+
+	while (*curChar != '\0')
+	{
+		curOutPos = encode_utf16_char(UTF8DecodeChar(curChar, &curChar), curOutPos);
+	}
+
+	// Terminate the string with a nul
+	unicode_string[unicode_length] = '\0';
+
+	// Set the number of bytes allocated
+	if (nbytes)
+	{
+		*nbytes = sizeof(utf_16_char) * (unicode_length + 1);
+	}
+
+	return unicode_string;
+}
+
+utf_16_char *UTF16CharacterAtOffset(const utf_16_char *utf16_string, size_t index)
+{
+	while (*utf16_string != '\0'
+	    && index != 0)
+	{
+		// Move to the next character
+		UTF16DecodeChar(utf16_string, &utf16_string);
+
+		--index;
+	}
+
+	if (*utf16_string == '\0')
+		return NULL;
+
+	return (utf_16_char*)utf16_string;
+}
+
+
+static size_t utf32_utf8_buffer_length(const utf_32_char* unicode_string)
+{
+	const utf_32_char* curChar;
+
+	// Determine length of string (in octets) when encoded in UTF-8
+	size_t length = 0;
+	for (curChar = unicode_string; *curChar != '\0'; ++curChar)
+	{
+		length += unicode_utf8_char_length(*curChar);
+	}
+
+	return length;
+}
+
+char *UTF32toUTF8(const utf_32_char *unicode_string, size_t *nbytes)
+{
+	const utf_32_char* curChar;
+
+	const size_t utf8_length = utf32_utf8_buffer_length(unicode_string);
+
+	// Allocate memory to hold the UTF-8 encoded string (plus a terminating nul char)
+	char* utf8_string = malloc(utf8_length + 1);
+	char* curOutPos = utf8_string;
+
+	if (utf8_string == NULL)
+	{
+		debug(LOG_ERROR, "Out of memory");
+		return NULL;
+	}
+
+	for (curChar = unicode_string; *curChar != 0; ++curChar)
+	{
+		curOutPos = encode_utf8_char(*curChar, curOutPos);
+	}
+
+	// Terminate the string with a nul character
+	utf8_string[utf8_length] = '\0';
+
+	// Set the number of bytes allocated
+	if (nbytes)
+	{
+		*nbytes = utf8_length + 1;
+	}
+
+	return utf8_string;
+}
+
+utf_32_char *UTF8toUTF32(const char *utf8_string, size_t *nbytes)
+{
+	const char* curChar = utf8_string;
+	const size_t unicode_length = UTF8CharacterCount(utf8_string);
+
+	// Allocate memory to hold the UTF-32 encoded string (plus a terminating nul)
+	utf_32_char* unicode_string = malloc(sizeof(utf_32_char) * (unicode_length + 1));
+	utf_32_char* curOutPos = unicode_string;
+
+	if (unicode_string == NULL)
+	{
+		debug(LOG_ERROR, "Out of memory");
+		return NULL;
+	}
+
+	while (*curChar != '\0')
+	{
+		*(curOutPos++) = UTF8DecodeChar(curChar, &curChar);
+	}
+
+	// Terminate the string with a nul
+	unicode_string[unicode_length] = '\0';
+
+	// Set the number of bytes allocated
+	if (nbytes)
+	{
+		*nbytes = sizeof(utf_32_char) * (unicode_length + 1);
+	}
+
+	return unicode_string;
+}
+
+size_t utf32len(const utf_32_char *unicode_string)
+{
+	size_t ret = 0;
+	while (*unicode_string++)
+		++ret;
+	return ret;
+}
-- 
cgit v1.2.3-1-g7c22