From f44ba092651aa75055e109e04b4164ea92ae7fdc Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Jun 2006 21:53:48 +0000 Subject: big s/polyp/pulse/g git-svn-id: file:///home/lennart/svn/public/pulseaudio/trunk@1033 fefdeb5f-60dc-0310-8127-8f9354f1896f --- src/pulse/utf8.c | 237 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100644 src/pulse/utf8.c (limited to 'src/pulse/utf8.c') diff --git a/src/pulse/utf8.c b/src/pulse/utf8.c new file mode 100644 index 00000000..33fa7214 --- /dev/null +++ b/src/pulse/utf8.c @@ -0,0 +1,237 @@ +/* $Id$ */ + +/* This file is based on the GLIB utf8 validation functions. The + * original license text follows. */ + +/* gutf8.c - Operations on UTF-8 strings. + * + * Copyright (C) 1999 Tom Tromey + * Copyright (C) 2000 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include + +#ifdef HAVE_ICONV +#include +#endif + +#include "utf8.h" +#include "xmalloc.h" + +#define FILTER_CHAR '_' + +static inline int is_unicode_valid(uint32_t ch) { + if (ch >= 0x110000) /* End of unicode space */ + return 0; + if ((ch & 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */ + return 0; + if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) /* Reserved */ + return 0; + if ((ch & 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */ + return 0; + return 1; +} + +static inline int is_continuation_char(uint8_t ch) { + if ((ch & 0xc0) != 0x80) /* 10xxxxxx */ + return 0; + return 1; +} + +static inline void merge_continuation_char(uint32_t *u_ch, uint8_t ch) { + *u_ch <<= 6; + *u_ch |= ch & 0x3f; +} + +static char* utf8_validate(const char *str, char *output) { + uint32_t val = 0; + uint32_t min = 0; + const uint8_t *p, *last; + int size; + uint8_t *o; + + o = (uint8_t*) output; + for (p = (const uint8_t*) str; *p; p++) { + if (*p < 128) { + if (o) + *o = *p; + } else { + last = p; + + if ((*p & 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */ + size = 2; + min = 128; + val = *p & 0x1e; + goto ONE_REMAINING; + } else if ((*p & 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/ + size = 3; + min = (1 << 11); + val = *p & 0x0f; + goto TWO_REMAINING; + } else if ((*p & 0xf8) == 0xf0) { /* 11110xxx four-char seq */ + size = 4; + min = (1 << 16); + val = *p & 0x07; + } else { + size = 1; + goto error; + } + + p++; + if (!is_continuation_char(*p)) + goto error; + merge_continuation_char(&val, *p); + +TWO_REMAINING: + p++; + if (!is_continuation_char(*p)) + goto error; + merge_continuation_char(&val, *p); + +ONE_REMAINING: + p++; + if (!is_continuation_char(*p)) + goto error; + merge_continuation_char(&val, *p); + + if (val < min) + goto error; + + if (!is_unicode_valid(val)) + goto error; + + if (o) { + memcpy(o, last, size); + o += size - 1; + } + + if (o) + o++; + + continue; + +error: + if (o) { + *o = FILTER_CHAR; + p = last; /* We retry at the next character */ + } else + goto failure; + } + + if (o) + o++; + } + + if (o) { + *o = '\0'; + return output; + } + + return (char*) str; + +failure: + return NULL; +} + +const char* pa_utf8_valid (const char *str) { + return utf8_validate(str, NULL); +} + +char* pa_utf8_filter (const char *str) { + char *new_str; + + new_str = pa_xnew(char, strlen(str) + 1); + + return utf8_validate(str, new_str); +} + +#ifdef HAVE_ICONV + +static char* iconv_simple(const char *str, const char *to, const char *from) { + char *new_str; + size_t len, inlen; + + iconv_t cd; + ICONV_CONST char *inbuf; + char *outbuf; + size_t res, inbytes, outbytes; + + cd = iconv_open(to, from); + if (cd == (iconv_t)-1) + return NULL; + + inlen = len = strlen(str) + 1; + new_str = pa_xmalloc(len); + assert(new_str); + + while (1) { + inbuf = (ICONV_CONST char*)str; /* Brain dead prototype for iconv() */ + inbytes = inlen; + outbuf = new_str; + outbytes = len; + + res = iconv(cd, &inbuf, &inbytes, &outbuf, &outbytes); + + if (res != (size_t)-1) + break; + + if (errno != E2BIG) { + pa_xfree(new_str); + new_str = NULL; + break; + } + + assert(inbytes != 0); + + len += inbytes; + new_str = pa_xrealloc(new_str, len); + assert(new_str); + } + + iconv_close(cd); + + return new_str; +} + +char* pa_utf8_to_locale (const char *str) { + return iconv_simple(str, "", "UTF-8"); +} + +char* pa_locale_to_utf8 (const char *str) { + return iconv_simple(str, "UTF-8", ""); +} + +#else + +char* pa_utf8_to_locale (const char *str) { + return NULL; +} + +char* pa_locale_to_utf8 (const char *str) { + return NULL; +} + +#endif -- cgit From 521daf6f0ac4fa6a2fbfb5d523c0c743342dca2b Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Thu, 4 Jan 2007 13:43:45 +0000 Subject: Huge trailing whitespace cleanup. Let's keep the tree pure from here on, mmmkay? git-svn-id: file:///home/lennart/svn/public/pulseaudio/trunk@1418 fefdeb5f-60dc-0310-8127-8f9354f1896f --- src/pulse/utf8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/pulse/utf8.c') diff --git a/src/pulse/utf8.c b/src/pulse/utf8.c index 33fa7214..2708c518 100644 --- a/src/pulse/utf8.c +++ b/src/pulse/utf8.c @@ -130,7 +130,7 @@ ONE_REMAINING: if (o) o++; - + continue; error: -- cgit From 06211b7c8fd329137ae9003818543912a87d9898 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Tue, 13 Feb 2007 15:35:19 +0000 Subject: Add copyright notices to all relevant files. (based on svn log) git-svn-id: file:///home/lennart/svn/public/pulseaudio/trunk@1426 fefdeb5f-60dc-0310-8127-8f9354f1896f --- src/pulse/utf8.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'src/pulse/utf8.c') diff --git a/src/pulse/utf8.c b/src/pulse/utf8.c index 2708c518..2ac2d106 100644 --- a/src/pulse/utf8.c +++ b/src/pulse/utf8.c @@ -1,5 +1,27 @@ /* $Id$ */ +/*** + This file is part of PulseAudio. + + Copyright 2006 Lennart Poettering + Copyright 2006 Pierre Ossman for Cendio AB + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + /* This file is based on the GLIB utf8 validation functions. The * original license text follows. */ -- cgit From a67c21f093202f142438689d3f7cfbdf4ea82eea Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sun, 28 Oct 2007 19:13:50 +0000 Subject: merge 'lennart' branch back into trunk. git-svn-id: file:///home/lennart/svn/public/pulseaudio/trunk@1971 fefdeb5f-60dc-0310-8127-8f9354f1896f --- src/pulse/utf8.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'src/pulse/utf8.c') diff --git a/src/pulse/utf8.c b/src/pulse/utf8.c index 2ac2d106..b2f6c3bd 100644 --- a/src/pulse/utf8.c +++ b/src/pulse/utf8.c @@ -37,7 +37,7 @@ * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public @@ -50,7 +50,6 @@ #include #endif -#include #include #include #include @@ -60,12 +59,15 @@ #include #endif +#include +#include + #include "utf8.h" -#include "xmalloc.h" #define FILTER_CHAR '_' static inline int is_unicode_valid(uint32_t ch) { + if (ch >= 0x110000) /* End of unicode space */ return 0; if ((ch & 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */ @@ -74,6 +76,7 @@ static inline int is_unicode_valid(uint32_t ch) { return 0; if ((ch & 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */ return 0; + return 1; } @@ -95,6 +98,8 @@ static char* utf8_validate(const char *str, char *output) { int size; uint8_t *o; + pa_assert(str); + o = (uint8_t*) output; for (p = (const uint8_t*) str; *p; p++) { if (*p < 128) { @@ -178,15 +183,15 @@ failure: return NULL; } -const char* pa_utf8_valid (const char *str) { +char* pa_utf8_valid (const char *str) { return utf8_validate(str, NULL); } char* pa_utf8_filter (const char *str) { char *new_str; + pa_assert(str); new_str = pa_xnew(char, strlen(str) + 1); - return utf8_validate(str, new_str); } @@ -195,22 +200,24 @@ char* pa_utf8_filter (const char *str) { static char* iconv_simple(const char *str, const char *to, const char *from) { char *new_str; size_t len, inlen; - iconv_t cd; ICONV_CONST char *inbuf; char *outbuf; size_t res, inbytes, outbytes; + pa_assert(str); + pa_assert(to); + pa_assert(from); + cd = iconv_open(to, from); if (cd == (iconv_t)-1) return NULL; inlen = len = strlen(str) + 1; - new_str = pa_xmalloc(len); - assert(new_str); + new_str = pa_xnew(char, len); - while (1) { - inbuf = (ICONV_CONST char*)str; /* Brain dead prototype for iconv() */ + for (;;) { + inbuf = (ICONV_CONST char*) str; /* Brain dead prototype for iconv() */ inbytes = inlen; outbuf = new_str; outbytes = len; @@ -226,11 +233,10 @@ static char* iconv_simple(const char *str, const char *to, const char *from) { break; } - assert(inbytes != 0); + pa_assert(inbytes != 0); len += inbytes; new_str = pa_xrealloc(new_str, len); - assert(new_str); } iconv_close(cd); @@ -249,10 +255,12 @@ char* pa_locale_to_utf8 (const char *str) { #else char* pa_utf8_to_locale (const char *str) { + pa_assert(str); return NULL; } char* pa_locale_to_utf8 (const char *str) { + pa_assert(str); return NULL; } -- cgit