From 9541ceb22b5ecd525e16a318b95329a38240a7a4 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 24 Apr 2006 21:51:00 +0000 Subject: add utf8 validity check API (based on the glib's implementation and hence mostly identical to DBUS's implementation) git-svn-id: file:///home/lennart/svn/public/avahi/trunk@1201 941a03a8-eaeb-0310-b9a0-b1bbd8fe43fe --- avahi-common/Makefile.am | 18 ++++++-- avahi-common/utf8-test.c | 37 +++++++++++++++ avahi-common/utf8.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++ avahi-common/utf8.h | 35 +++++++++++++++ 4 files changed, 201 insertions(+), 4 deletions(-) create mode 100644 avahi-common/utf8-test.c create mode 100644 avahi-common/utf8.c create mode 100644 avahi-common/utf8.h diff --git a/avahi-common/Makefile.am b/avahi-common/Makefile.am index c786954..cd21c11 100644 --- a/avahi-common/Makefile.am +++ b/avahi-common/Makefile.am @@ -48,7 +48,8 @@ noinst_PROGRAMS = \ alternative-test \ timeval-test \ watch-test \ - watch-test-thread + watch-test-thread \ + utf8-test endif lib_LTLIBRARIES = \ @@ -65,7 +66,8 @@ libavahi_common_la_SOURCES = \ simple-watch.c simple-watch.h \ thread-watch.c thread-watch.h \ watch.h gccmacro.h \ - rlist.h rlist.c + rlist.h rlist.c \ + utf8.c utf8.h libavahi_common_la_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) libavahi_common_la_LIBADD = $(AM_LDADD) $(PTHREAD_CFLAGS) $(PTHREAD_LIBS) @@ -82,14 +84,16 @@ alternative_test_SOURCES = \ malloc.c malloc.h \ domain.c domain.h \ address.c address.h \ - alternative-test.c + alternative-test.c \ + utf8.c utf8.h alternative_test_CFLAGS = $(AM_CFLAGS) domain_test_SOURCES = \ domain.c domain.h \ malloc.c malloc.h \ address.c address.h \ - domain-test.c + domain-test.c \ + utf8.c utf8.h domain_test_CFLAGS = $(AM_CFLAGS) watch_test_SOURCES = \ @@ -111,6 +115,12 @@ timeval_test_SOURCES = \ timeval_test_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) timeval_test_LDADD = $(AM_LDADD) $(PTHREAD_LIBS) $(PTHREAD_CFLAGS) +utf8_test_SOURCES = \ + utf8-test.c \ + utf8.c utf8.h +utf8_test_CFLAGS = $(AM_CFLAGS) +utf8_test_LDADD = $(AM_LDADD) + if HAVE_DBUS noinst_HEADERS = \ diff --git a/avahi-common/utf8-test.c b/avahi-common/utf8-test.c new file mode 100644 index 0000000..ce7a984 --- /dev/null +++ b/avahi-common/utf8-test.c @@ -0,0 +1,37 @@ +/* $Id$ */ + +/*** + This file is part of avahi. + + avahi is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + avahi is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General + Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with avahi; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include + +#include "utf8.h" + +int main(AVAHI_GCC_UNUSED int argc, AVAHI_GCC_UNUSED char *argv[]) { + + assert(avahi_utf8_valid("hallo")); + assert(!avahi_utf8_valid("üxknürz")); + assert(avahi_utf8_valid("üxknürz")); +} diff --git a/avahi-common/utf8.c b/avahi-common/utf8.c new file mode 100644 index 0000000..f52ce55 --- /dev/null +++ b/avahi-common/utf8.c @@ -0,0 +1,115 @@ +/* $Id */ + +/* This file is based on the GLIB utf8 validation functions. The + * original license text follows. */ + +/* gutf8.c - Operations on UTF-8 strings. + * + * Copyright (C) 1999 Tom Tromey + * Copyright (C) 2000 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "utf8.h" + +#define UNICODE_VALID(Char) \ + ((Char) < 0x110000 && \ + (((Char) & 0xFFFFF800) != 0xD800) && \ + ((Char) < 0xFDD0 || (Char) > 0xFDEF) && \ + ((Char) & 0xFFFE) != 0xFFFE) + + +#define CONTINUATION_CHAR \ + do { \ + if ((*(const unsigned char *)p & 0xc0) != 0x80) /* 10xxxxxx */ \ + goto error; \ + val <<= 6; \ + val |= (*(const unsigned char *)p) & 0x3f; \ + } while(0) + + +const char * +avahi_utf8_valid (const char *str) + +{ + unsigned val = 0; + unsigned min = 0; + const char *p; + + for (p = str; *p; p++) + { + if (*(const unsigned char *)p < 128) + /* done */; + else + { + const char *last; + + last = p; + if ((*(const unsigned char *)p & 0xe0) == 0xc0) /* 110xxxxx */ + { + if ( ((*(const unsigned char *)p & 0x1e) == 0)) + goto error; + p++; + if ( ((*(const unsigned char *)p & 0xc0) != 0x80)) /* 10xxxxxx */ + goto error; + } + else + { + if ((*(const unsigned char *)p & 0xf0) == 0xe0) /* 1110xxxx */ + { + min = (1 << 11); + val = *(const unsigned char *)p & 0x0f; + goto TWO_REMAINING; + } + else if ((*(const unsigned char *)p & 0xf8) == 0xf0) /* 11110xxx */ + { + min = (1 << 16); + val = *(const unsigned char *)p & 0x07; + } + else + goto error; + + p++; + CONTINUATION_CHAR; + TWO_REMAINING: + p++; + CONTINUATION_CHAR; + p++; + CONTINUATION_CHAR; + + if ( (val < min)) + goto error; + + if ( (!UNICODE_VALID(val))) + goto error; + } + + continue; + + error: + return NULL; + } + } + + return str; +} diff --git a/avahi-common/utf8.h b/avahi-common/utf8.h new file mode 100644 index 0000000..93c9b59 --- /dev/null +++ b/avahi-common/utf8.h @@ -0,0 +1,35 @@ +#ifndef fooutf8hfoo +#define fooutf8hfoo + +/* $Id$ */ + +/*** + This file is part of avahi. + + avahi is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + avahi is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General + Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with avahi; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#include + +#include + +AVAHI_C_DECL_BEGIN + +const char *avahi_utf8_valid(const char *str); + +AVAHI_C_DECL_END + +#endif -- cgit