From 01309c8bd0a9ac4476952ec5063499ec980a7b12 Mon Sep 17 00:00:00 2001 From: erihel Date: Thu, 11 Apr 2013 13:37:15 +0200 Subject: * Added clipboard support (issue #60) * Fixed keyboard shortcuts while code editing in game --- lib/clipboard/src/clipboardWin32.c | 160 +++++++++++ lib/clipboard/src/clipboardX11.c | 293 ++++++++++++++++++++ lib/clipboard/src/utf.c | 530 +++++++++++++++++++++++++++++++++++++ 3 files changed, 983 insertions(+) create mode 100644 lib/clipboard/src/clipboardWin32.c create mode 100644 lib/clipboard/src/clipboardX11.c create mode 100644 lib/clipboard/src/utf.c (limited to 'lib/clipboard/src') diff --git a/lib/clipboard/src/clipboardWin32.c b/lib/clipboard/src/clipboardWin32.c new file mode 100644 index 0000000..be48906 --- /dev/null +++ b/lib/clipboard/src/clipboardWin32.c @@ -0,0 +1,160 @@ +/* + This file is part of Warzone 2100. + Copyright (C) 2008 Freddie Witherden + Copyright (C) 2008-2009 Warzone Resurrection Project + + Warzone 2100 is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + Warzone 2100 is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Warzone 2100; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#define WIN32_LEAN_AND_MEAN +#include + +#include "utf.h" +// Defines most macros and types from and +#include "types.h" + +char *widgetGetClipboardText() +{ + uint16_t *clipboardText; + char *ourText = NULL; + + // If there is any text on the clipboard, open it + if (IsClipboardFormatAvailable(CF_UNICODETEXT) && OpenClipboard(NULL)) + { + // Get any text on the clipboard + HANDLE hClipboardData = GetClipboardData(CF_UNICODETEXT); + + // If the handle is valid, fetch the text + if (hClipboardData) + { + // Get the text + clipboardText = GlobalLock(hClipboardData); + + // So long as we got something + if (clipboardText) + { + int i, j; + + // Convert it to UTF-8 (from UTF-16) + ourText = UTF16toUTF8(clipboardText, NULL); + + // Unlock the text + GlobalUnlock(hClipboardData); + + // Strip any '\r' from the text + for (i = j = 0; ourText[i]; i++) + { + if (ourText[i] != '\r') + { + ourText[j++] = ourText[i]; + } + } + + // NUL terminate + ourText[j] = '\0'; + } + } + + // Close the clipboard + CloseClipboard(); + } + + return ourText; +} + +bool widgetSetClipboardText(const char *text) +{ + bool ret = false; + + // Copy of text with \n => \r\n + char *newText; + + // UTF-16 version of newText + uint16_t *utf16NewText; + + // Number of bytes utf16NewText is in size + size_t nbytes; + + int count, i, j; + + // Get the number of '\n' characters in the text + for (i = count = 0; text[i]; i++) + { + if (text[i] == '\n') + { + count++; + } + } + + // Allocate enough space for the \r\n string + newText = malloc(strlen(text) + count + 1); + + // Copy the string, converting \n to \r\n + for (i = j = 0; text[i]; i++, j++) + { + // If the character is a newline prepend a \r + if (text[i] == '\n') + { + newText[j++] = '\r'; + } + + // Copy the character (\n or otherwise) + newText[j] = text[i]; + } + + // NUL terminate + newText[j] = '\0'; + + // Convert to UTF-16 + utf16NewText = UTF8toUTF16(newText, &nbytes); + + // Open the clipboard + if (OpenClipboard(NULL)) + { + HGLOBAL hGlobal; + uint16_t *clipboardText; + + // Empty it (which also transfers ownership of it to ourself) + EmptyClipboard(); + + // Allocate global space for the text + hGlobal = GlobalAlloc(GMEM_MOVEABLE, nbytes); + + // Lock the newly allocated memory + clipboardText = GlobalLock(hGlobal); + + // Copy the text + memcpy(clipboardText, utf16NewText, nbytes); + + // Unlock the memory (must come before CloseClipboard()) + GlobalUnlock(hGlobal); + + // Place the handle on the clipboard + if (SetClipboardData(CF_UNICODETEXT, hGlobal)) + { + // We were successful + ret = true; + } + + // Close the clipboard + CloseClipboard(); + } + + // Release the malloc-ed strings + free(newText); + free(utf16NewText); + + return ret; +} diff --git a/lib/clipboard/src/clipboardX11.c b/lib/clipboard/src/clipboardX11.c new file mode 100644 index 0000000..0653250 --- /dev/null +++ b/lib/clipboard/src/clipboardX11.c @@ -0,0 +1,293 @@ +/* + This file is part of Warzone 2100. + Copyright (C) 2008 Freddie Witherden + Copyright (C) 2008-2009 Warzone Resurrection Project + + Warzone 2100 is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + Warzone 2100 is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Warzone 2100; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +/* + * Something wicked this way comes... + * Documentation/reference: + * http://svr-www.eng.cam.ac.uk/~er258/code/dist/x_clipboard/paste.cc + */ + +#include +#include + +#include +#include + +static SDL_SysWMinfo info; + +// Atoms +static Atom XA_CLIPBOARD; +static Atom XA_COMPOUND_TEXT; +static Atom XA_UTF8_STRING; +static Atom XA_TARGETS; + +/** + * Filters through SDL_Events searching for clipboard requests from the X + * server. + * + * @param evt The event to filter. + */ +static int widgetClipboardFilterX11(const SDL_Event *evt) +{ + // We are only interested in window manager events + if (evt->type == SDL_SYSWMEVENT) + { + XEvent xevent = evt->syswm.msg->event.xevent; + + // See if the event is a selection/clipboard request + if (xevent.type == SelectionRequest) + { + // Get the request in question + XSelectionRequestEvent *request = &xevent.xselectionrequest; + + // Generate a reply to the selection request + XSelectionEvent reply; + + reply.type = SelectionNotify; + reply.serial = xevent.xany.send_event; + reply.send_event = True; + reply.display = info.info.x11.display; + reply.requestor = request->requestor; + reply.selection = request->selection; + reply.property = request->property; + reply.target = None; + reply.time = request->time; + + // They want to know what we can provide/offer + if (request->target == XA_TARGETS) + { + Atom possibleTargets[] = + { + XA_STRING, + XA_UTF8_STRING, + XA_COMPOUND_TEXT + }; + + XChangeProperty(info.info.x11.display, request->requestor, + request->property, XA_ATOM, 32, PropModeReplace, + (unsigned char *) possibleTargets, 3); + } + // They want a string (all we can provide) + else if (request->target == XA_STRING + || request->target == XA_UTF8_STRING + || request->target == XA_COMPOUND_TEXT) + { + int len; + char *xdata = XFetchBytes(info.info.x11.display, &len); + + XChangeProperty(info.info.x11.display, request->requestor, + request->property, request->target, 8, + PropModeReplace, (unsigned char *) xdata, + len); + XFree(xdata); + } + else + { + // Did not have what they wanted, so no property set + reply.property = None; + } + + // Dispatch the event + XSendEvent(request->display, request->requestor, 0, NoEventMask, + (XEvent *) &reply); + XSync(info.info.x11.display, False); + } + } + + return 1; +} + +static void widgetInitialiseClipboardX11() +{ + static bool initialised = false; + + if (!initialised) + { + // Get the window manager information + SDL_GetWMInfo(&info); + + // Ensure we're running under X11 + assert(info.subsystem == SDL_SYSWM_X11); + + // Register the event filter + SDL_EventState(SDL_SYSWMEVENT, SDL_ENABLE); + SDL_SetEventFilter(widgetClipboardFilterX11); + + // Lock the connection to the X server + info.info.x11.lock_func(); + + // Get the clipboard atom (it is not defined by default) + XA_CLIPBOARD = XInternAtom(info.info.x11.display, "CLIPBOARD", True); + + // Get the compound text type atom + XA_COMPOUND_TEXT = XInternAtom(info.info.x11.display, "COMPOUND_TEXT", + True); + + // UTF-8 string atom + XA_UTF8_STRING = XInternAtom(info.info.x11.display, "UTF8_STRING", + True); + + // TARGETS atom + XA_TARGETS = XInternAtom(info.info.x11.display, "TARGETS", True); + + // Unlock the connection + info.info.x11.unlock_func(); + + // We are initialised + initialised = true; + } +} + +char *widgetGetClipboardText() +{ + char *text = NULL; + unsigned char *data = NULL; + Atom type; + int format, result; + unsigned long len, bytesLeft, dummy; + Window selectionOwner; + + // Make sure we are initialised + widgetInitialiseClipboardX11(); + + // Lock the connection + info.info.x11.lock_func(); + + // Get the owner of the clipboard selection + selectionOwner = XGetSelectionOwner(info.info.x11.display, XA_CLIPBOARD); + + // If there is a selection (and therefore owner) fetch it + if (selectionOwner != None) + { + SDL_Event event; + bool response = false; + + /* + * Ask the window whom current owns the clipboard to convert it to an + * XA_UTF8_STRING and place it into the XA_CLIPBOARD property of our + * window. + */ + XConvertSelection(info.info.x11.display, XA_CLIPBOARD, XA_UTF8_STRING, + XA_CLIPBOARD, info.info.x11.window, CurrentTime); + XFlush(info.info.x11.display); + + /* + * We now need to wait for a response from the window that owns the + * clipboard. + */ + + // Unlock the connection so that the SDL event loop may function + info.info.x11.unlock_func(); + + while (!response) + { + // Wait for an event + SDL_WaitEvent(&event); + + // If the event is a window manager event + if (event.type == SDL_SYSWMEVENT) + { + XEvent xevent = event.syswm.msg->event.xevent; + + // See if it is a response to our request + if (xevent.type == SelectionNotify + && xevent.xselection.requestor == info.info.x11.window) + { + response = true; + } + } + } + + // Lock the connection once again + info.info.x11.lock_func(); + + // See how much data is there + XGetWindowProperty(info.info.x11.display, info.info.x11.window, + XA_CLIPBOARD, 0, 0, False, AnyPropertyType, &type, + &format, &len, &bytesLeft, &data); + + // If any 0-length data was returned, free it + if (data) + { + XFree(data); + data = NULL; + } + + // If there is any data + if (bytesLeft) + { + // Fetch the data + result = XGetWindowProperty(info.info.x11.display, + info.info.x11.window, XA_CLIPBOARD, 0, + bytesLeft, False, AnyPropertyType, + &type, &format, &len, &dummy, &data); + + // If we got some data, duplicate it + if (result == Success) + { + text = strdup((char *) data); + XFree(data); + } + } + + // Delete the property now that we are finished with it + XDeleteProperty(info.info.x11.display, info.info.x11.window, + XA_CLIPBOARD); + } + + // Unlock the connection + info.info.x11.unlock_func(); + + return text; +} + +bool widgetSetClipboardText(const char *text) +{ + Window selectionOwner; + + // Make sure we are initialised + widgetInitialiseClipboardX11(); + + // Lock the connection + info.info.x11.lock_func(); + + // Copy the text into the root windows cut buffer (for Xterm compatibility) + XStoreBytes(info.info.x11.display, text, strlen(text) + 1); + + // Set ourself as the owner of the CLIPBOARD atom + XSetSelectionOwner(info.info.x11.display, XA_CLIPBOARD, + info.info.x11.window, CurrentTime); + + // Check if we acquired ownership or not + selectionOwner = XGetSelectionOwner(info.info.x11.display, XA_CLIPBOARD); + + // We got ownership + if (selectionOwner == info.info.x11.window) + { + info.info.x11.unlock_func(); + return true; + } + // We did not get ownership + else + { + info.info.x11.unlock_func(); + return false; + } +} diff --git a/lib/clipboard/src/utf.c b/lib/clipboard/src/utf.c new file mode 100644 index 0000000..2313bb6 --- /dev/null +++ b/lib/clipboard/src/utf.c @@ -0,0 +1,530 @@ +/* + This file is part of Warzone 2100. + Copyright (C) 2007 Giel van Schijndel + Copyright (C) 2007-2009 Warzone Resurrection Project + + Warzone 2100 is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + Warzone 2100 is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Warzone 2100; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + $Revision: 9101 $ + $Id: utf.c 9101 2010-01-10 13:11:55Z zarelsl $ + $HeadURL: https://warzone2100.svn.sourceforge.net/svnroot/warzone2100/trunk/lib/framework/utf.c $ +*/ + +/** \file + * Functions to convert between different Unicode Transformation Formats (UTF for short) + */ + +#include "utf.h" +#include +#include + +#if defined(LIB_COMPILE) +# define ASSERT(expr, ...) (assert(expr)) +# define debug(part, ...) ((void)0) +#else +# include "debug.h" +#endif + +// Assert that non-starting octets are of the form 10xxxxxx +#define ASSERT_NON_START_OCTET(octet) \ + assert((octet & 0xC0) == 0x80 && "invalid non-start UTF-8 octet") + +// Assert that starting octets are either of the form 0xxxxxxx (ASCII) or 11xxxxxx +#define ASSERT_START_OCTECT(octet) \ + assert((octet & 0x80) == 0x00 || (octet & 0xC0) == 0xC0 || !"invalid starting UTF-8 octet") + +// Assert that hexadect (16bit sequence) 1 of UTF-16 surrogate pair sequences are of the form 110110XXXXXXXXXX +#define ASSERT_START_HEXADECT(hexadect) \ + assert(((hexadect) & 0xD800) == 0xD800 && "invalid first UTF-16 hexadect") + +// Assert that hexadect (16bit sequence) 2 of UTF-16 surrogate pair sequences are of the form 110111XXXXXXXXXX +#define ASSERT_FINAL_HEXADECT(hexadect) \ + assert(((hexadect) & 0xDC00) == 0xDC00 && "invalid first UTF-16 hexadect") + +utf_32_char UTF8DecodeChar(const char *utf8_char, const char **next_char) +{ + utf_32_char decoded = '\0'; + *next_char = utf8_char; + + ASSERT_START_OCTECT(*utf8_char); + + // first octect: 0xxxxxxx: 7 bit (ASCII) + if ((*utf8_char & 0x80) == 0x00) + { + // 1 byte long encoding + decoded = *((*next_char)++); + } + // first octect: 110xxxxx: 11 bit + else if ((*utf8_char & 0xe0) == 0xc0) + { + // 2 byte long encoding + ASSERT_NON_START_OCTET(utf8_char[1]); + + decoded = (*((*next_char)++) & 0x1f) << 6; + decoded |= (*((*next_char)++) & 0x3f) << 0; + } + // first octect: 1110xxxx: 16 bit + else if ((*utf8_char & 0xf0) == 0xe0) + { + // 3 byte long encoding + ASSERT_NON_START_OCTET(utf8_char[1]); + ASSERT_NON_START_OCTET(utf8_char[2]); + + decoded = (*((*next_char)++) & 0x0f) << 12; + decoded |= (*((*next_char)++) & 0x3f) << 6; + decoded |= (*((*next_char)++) & 0x3f) << 0; + } + // first octect: 11110xxx: 21 bit + else if ((*utf8_char & 0xf8) == 0xf0) + { + // 4 byte long encoding + ASSERT_NON_START_OCTET(utf8_char[1]); + ASSERT_NON_START_OCTET(utf8_char[2]); + ASSERT_NON_START_OCTET(utf8_char[3]); + + decoded = (*((*next_char)++) & 0x07) << 18; + decoded |= (*((*next_char)++) & 0x3f) << 12; + decoded |= (*((*next_char)++) & 0x3f) << 6; + decoded |= (*((*next_char)++) & 0x3f) << 0; + } + else + { + // apparently this character uses more than 21 bit + // this decoder is not developed to cope with those + // characters so error out + ASSERT(!"out-of-range UTF-8 character", "this UTF-8 character is too large (> 21bits) for this UTF-8 decoder and too large to be a valid Unicode codepoint"); + } + + return decoded; +} + +size_t UTF8CharacterCount(const char *utf8_string) +{ + size_t length = 0; + + while (*utf8_string != '\0') + { + UTF8DecodeChar(utf8_string, &utf8_string); + + ++length; + } + + return length; +} + +size_t UTF16CharacterCount(const uint16_t *utf16) +{ + size_t length = 0; + + while (*utf16) + { + UTF16DecodeChar(utf16, &utf16); + + ++length; + } + + return length; +} + +static size_t unicode_utf8_char_length(const utf_32_char unicode_char) +{ + // an ASCII character, which uses 7 bit at most, which is one byte in UTF-8 + if (unicode_char < 0x00000080) + return 1; // stores 7 bits + else if (unicode_char < 0x00000800) + return 2; // stores 11 bits + else if (unicode_char < 0x00010000) + return 3; // stores 16 bits + /* This encoder can deal with < 0x00200000, but Unicode only ranges + * from 0x0 to 0x10FFFF. Thus we don't accept anything else. + */ + else if (unicode_char < 0x00110000) + return 4; // stores 21 bits + + /* Apparently this character lies outside the 0x0 - 0x10FFFF + * Unicode range, so don't accept it. + */ + ASSERT(!"out-of-range Unicode codepoint", "This Unicode codepoint is too large (%u > 0x10FFFF) to be a valid Unicode codepoint", (unsigned int)unicode_char); + + // Dummy value to prevent warnings about missing return from function + return 0; +} + +char *UTF8CharacterAtOffset(const char *utf8_string, size_t index) +{ + while (*utf8_string != '\0' + && index != 0) + { + // Move to the next character + UTF8DecodeChar(utf8_string, &utf8_string); + + --index; + } + + if (*utf8_string == '\0') + return NULL; + + return (char*)utf8_string; +} + +/** Encodes a single Unicode character to a UTF-8 encoded string. + * + * \param unicode_char A UTF-32 encoded Unicode codepoint that will be encoded + * into UTF-8. This should be a valid Unicode codepoint + * (i.e. ranging from 0x0 to 0x10FFFF inclusive). + * \param out_char Points to the position in a buffer where the UTF-8 + * encoded character can be stored. + * + * \return A pointer pointing to the first byte after the encoded + * UTF-8 sequence. This can be used as the \c out_char parameter for a + * next invocation of encode_utf8_char(). + */ +static char *encode_utf8_char(const utf_32_char unicode_char, char *out_char) +{ + char *next_char = out_char; + + // 7 bits + if (unicode_char < 0x00000080) + { + *(next_char++) = unicode_char; + } + // 11 bits + else if (unicode_char < 0x00000800) + { + // 0xc0 provides the counting bits: 110 + // then append the 5 most significant bits + *(next_char++) = 0xc0 | (unicode_char >> 6); + // Put the next 6 bits in a byte of their own + *(next_char++) = 0x80 | (unicode_char & 0x3f); + } + // 16 bits + else if (unicode_char < 0x00010000) + { + // 0xe0 provides the counting bits: 1110 + // then append the 4 most significant bits + *(next_char++) = 0xe0 | (unicode_char >> 12); + // Put the next 12 bits in two bytes of their own + *(next_char++) = 0x80 | ((unicode_char >> 6) & 0x3f); + *(next_char++) = 0x80 | (unicode_char & 0x3f); + } + // 21 bits + /* This encoder can deal with < 0x00200000, but Unicode only ranges + * from 0x0 to 0x10FFFF. Thus we don't accept anything else. + */ + else if (unicode_char < 0x00110000) + { + // 0xf0 provides the counting bits: 11110 + // then append the 3 most significant bits + *(next_char++) = 0xf0 | (unicode_char >> 18); + // Put the next 18 bits in three bytes of their own + *(next_char++) = 0x80 | ((unicode_char >> 12) & 0x3f); + *(next_char++) = 0x80 | ((unicode_char >> 6) & 0x3f); + *(next_char++) = 0x80 | (unicode_char & 0x3f); + } + else + { + /* Apparently this character lies outside the 0x0 - 0x10FFFF + * Unicode range, so don't accept it. + */ + ASSERT(!"out-of-range Unicode codepoint", "This Unicode codepoint is too large (%u > 0x10FFFF) to be a valid Unicode codepoint", (unsigned int)unicode_char); + } + + return next_char; +} + +utf_32_char UTF16DecodeChar(const utf_16_char *utf16_char, const utf_16_char **next_char) +{ + utf_32_char decoded; + *next_char = utf16_char; + + // Are we dealing with a surrogate pair + if (*utf16_char >= 0xD800 + && *utf16_char <= 0xDFFF) + { + ASSERT_START_HEXADECT(utf16_char[0]); + ASSERT_FINAL_HEXADECT(utf16_char[1]); + + decoded = (*((*next_char)++) & 0x3ff) << 10; + decoded |= *((*next_char)++) & 0x3ff; + + decoded += 0x10000; + } + // Not a surrogate pair, so it's a valid Unicode codepoint right away + else + { + decoded = *((*next_char)++); + } + + return decoded; +} + +/** Encodes a single Unicode character to a UTF-16 encoded string. + * + * \param unicode_char A UTF-32 encoded Unicode codepoint that will be encoded + * into UTF-16. This should be a valid Unicode codepoint + * (i.e. ranging from 0x0 to 0x10FFFF inclusive). + * \param out_char Points to the position in a buffer where the UTF-16 + * encoded character can be stored. + * + * \return A pointer pointing to the first byte after the encoded + * UTF-16 sequence. This can be used as the \c out_char parameter for a + * next invocation of encode_utf16_char(). + */ +static utf_16_char *encode_utf16_char(const utf_32_char unicode_char, utf_16_char *out_char) +{ + utf_16_char *next_char = out_char; + + // 16 bits + if (unicode_char < 0x10000) + { + *(next_char++) = unicode_char; + } + else if (unicode_char < 0x110000) + { + const utf_16_char v = unicode_char - 0x10000; + + *(next_char++) = 0xD800 | (v >> 10); + *(next_char++) = 0xDC00 | (v & 0x3ff); + + ASSERT_START_HEXADECT(out_char[0]); + ASSERT_FINAL_HEXADECT(out_char[1]); + } + else + { + /* Apparently this character lies outside the 0x0 - 0x10FFFF + * Unicode range, and UTF-16 cannot cope with that, so error + * out. + */ + ASSERT(!"out-of-range Unicode codepoint", "This Unicode codepoint is too large (%u > 0x10FFFF) to be a valid Unicode codepoint", (unsigned int)unicode_char); + } + + return next_char; +} + +static size_t utf16_utf8_buffer_length(const utf_16_char* unicode_string) +{ + const utf_16_char* curChar = unicode_string; + + // Determine length of string (in octets) when encoded in UTF-8 + size_t length = 0; + + while (*curChar) + { + length += unicode_utf8_char_length(UTF16DecodeChar(curChar, &curChar)); + } + + return length; +} + +char *UTF16toUTF8(const utf_16_char *unicode_string, size_t *nbytes) +{ + const utf_16_char* curChar; + + const size_t utf8_length = utf16_utf8_buffer_length(unicode_string); + + // Allocate memory to hold the UTF-8 encoded string (plus a terminating nul char) + char* utf8_string = malloc(utf8_length + 1); + char* curOutPos = utf8_string; + + if (utf8_string == NULL) + { + debug(LOG_ERROR, "Out of memory"); + return NULL; + } + + curChar = unicode_string; + while (*curChar) + { + curOutPos = encode_utf8_char(UTF16DecodeChar(curChar, &curChar), curOutPos); + } + + // Terminate the string with a nul character + utf8_string[utf8_length] = '\0'; + + // Set the number of bytes allocated + if (nbytes) + { + *nbytes = utf8_length + 1; + } + + return utf8_string; +} + +static size_t utf8_as_utf16_buf_size(const char* utf8_string) +{ + const char* curChar = utf8_string; + + size_t length = 0; + while (*curChar != '\0') + { + const utf_32_char unicode_char = UTF8DecodeChar(curChar, &curChar); + + if (unicode_char < 0x10000) + { + length += 1; + } + else if (unicode_char < 0x110000) + { + length += 2; + } + else + { + /* Apparently this character lies outside the 0x0 - 0x10FFFF + * Unicode range, and UTF-16 cannot cope with that, so error + * out. + */ + ASSERT(!"out-of-range Unicode codepoint", "This Unicode codepoint too large (%u > 0x10FFFF) for the UTF-16 encoding", (unsigned int)unicode_char); + } + } + + return length; +} + +utf_16_char *UTF8toUTF16(const char* utf8_string, size_t *nbytes) +{ + const char* curChar = utf8_string; + const size_t unicode_length = utf8_as_utf16_buf_size(utf8_string); + + // Allocate memory to hold the UTF-16 encoded string (plus a terminating nul) + utf_16_char* unicode_string = malloc(sizeof(utf_16_char) * (unicode_length + 1)); + utf_16_char* curOutPos = unicode_string; + + if (unicode_string == NULL) + { + debug(LOG_ERROR, "Out of memory"); + return NULL; + } + + while (*curChar != '\0') + { + curOutPos = encode_utf16_char(UTF8DecodeChar(curChar, &curChar), curOutPos); + } + + // Terminate the string with a nul + unicode_string[unicode_length] = '\0'; + + // Set the number of bytes allocated + if (nbytes) + { + *nbytes = sizeof(utf_16_char) * (unicode_length + 1); + } + + return unicode_string; +} + +utf_16_char *UTF16CharacterAtOffset(const utf_16_char *utf16_string, size_t index) +{ + while (*utf16_string != '\0' + && index != 0) + { + // Move to the next character + UTF16DecodeChar(utf16_string, &utf16_string); + + --index; + } + + if (*utf16_string == '\0') + return NULL; + + return (utf_16_char*)utf16_string; +} + + +static size_t utf32_utf8_buffer_length(const utf_32_char* unicode_string) +{ + const utf_32_char* curChar; + + // Determine length of string (in octets) when encoded in UTF-8 + size_t length = 0; + for (curChar = unicode_string; *curChar != '\0'; ++curChar) + { + length += unicode_utf8_char_length(*curChar); + } + + return length; +} + +char *UTF32toUTF8(const utf_32_char *unicode_string, size_t *nbytes) +{ + const utf_32_char* curChar; + + const size_t utf8_length = utf32_utf8_buffer_length(unicode_string); + + // Allocate memory to hold the UTF-8 encoded string (plus a terminating nul char) + char* utf8_string = malloc(utf8_length + 1); + char* curOutPos = utf8_string; + + if (utf8_string == NULL) + { + debug(LOG_ERROR, "Out of memory"); + return NULL; + } + + for (curChar = unicode_string; *curChar != 0; ++curChar) + { + curOutPos = encode_utf8_char(*curChar, curOutPos); + } + + // Terminate the string with a nul character + utf8_string[utf8_length] = '\0'; + + // Set the number of bytes allocated + if (nbytes) + { + *nbytes = utf8_length + 1; + } + + return utf8_string; +} + +utf_32_char *UTF8toUTF32(const char *utf8_string, size_t *nbytes) +{ + const char* curChar = utf8_string; + const size_t unicode_length = UTF8CharacterCount(utf8_string); + + // Allocate memory to hold the UTF-32 encoded string (plus a terminating nul) + utf_32_char* unicode_string = malloc(sizeof(utf_32_char) * (unicode_length + 1)); + utf_32_char* curOutPos = unicode_string; + + if (unicode_string == NULL) + { + debug(LOG_ERROR, "Out of memory"); + return NULL; + } + + while (*curChar != '\0') + { + *(curOutPos++) = UTF8DecodeChar(curChar, &curChar); + } + + // Terminate the string with a nul + unicode_string[unicode_length] = '\0'; + + // Set the number of bytes allocated + if (nbytes) + { + *nbytes = sizeof(utf_32_char) * (unicode_length + 1); + } + + return unicode_string; +} + +size_t utf32len(const utf_32_char *unicode_string) +{ + size_t ret = 0; + while (*unicode_string++) + ++ret; + return ret; +} -- cgit v1.2.3-1-g7c22