diff options
| author | Pierre Ossman <ossman@cendio.se> | 2006-05-15 12:44:44 +0000 | 
|---|---|---|
| committer | Pierre Ossman <ossman@cendio.se> | 2006-05-15 12:44:44 +0000 | 
| commit | e91740f68ce6334935d9440c12f08e8c136d0b45 (patch) | |
| tree | e241737b68f4fa06d5028e1278e926575893f8d3 /src | |
| parent | 147da3e36ff8c1ce07a14d44e9c8747069f90c18 (diff) | |
Clean up the UTF-8 validation code.
git-svn-id: file:///home/lennart/svn/public/pulseaudio/trunk@870 fefdeb5f-60dc-0310-8127-8f9354f1896f
Diffstat (limited to 'src')
| -rw-r--r-- | src/polypcore/utf8.c | 154 | 
1 files changed, 76 insertions, 78 deletions
diff --git a/src/polypcore/utf8.c b/src/polypcore/utf8.c index b7758439..a706b280 100644 --- a/src/polypcore/utf8.c +++ b/src/polypcore/utf8.c @@ -29,87 +29,85 @@  #endif  #include <stdlib.h> +#include <inttypes.h>  #include "utf8.h" -#define UNICODE_VALID(Char)                   \ -    ((Char) < 0x110000 &&                     \ -     (((Char) & 0xFFFFF800) != 0xD800) &&     \ -     ((Char) < 0xFDD0 || (Char) > 0xFDEF) &&  \ -     ((Char) & 0xFFFE) != 0xFFFE) -    -      -#define CONTINUATION_CHAR                           \ - do {                                     \ -  if ((*(const unsigned char *)p & 0xc0) != 0x80) /* 10xxxxxx */ \ -    goto error;                                     \ -  val <<= 6;                                        \ -  val |= (*(const unsigned char *)p) & 0x3f;                     \ - } while(0) - - -const char * -pa_utf8_valid (const char *str) - -{ -  unsigned val = 0; -  unsigned min = 0; -  const char *p; - -  for (p = str; *p; p++) -    { -      if (*(const unsigned char *)p < 128) -	/* done */; -      else  -	{ -	  const char *last; -	   -	  last = p; -	  if ((*(const unsigned char *)p & 0xe0) == 0xc0) /* 110xxxxx */ -	    { -	      if ( ((*(const unsigned char *)p & 0x1e) == 0)) -		goto error; -	      p++; -	      if ( ((*(const unsigned char *)p & 0xc0) != 0x80)) /* 10xxxxxx */ -		goto error; -	    } -	  else -	    { -	      if ((*(const unsigned char *)p & 0xf0) == 0xe0) /* 1110xxxx */ -		{ -		  min = (1 << 11); -		  val = *(const unsigned char *)p & 0x0f; -		  goto TWO_REMAINING; -		} -	      else if ((*(const unsigned char *)p & 0xf8) == 0xf0) /* 11110xxx */ -		{ -		  min = (1 << 16); -		  val = *(const unsigned char *)p & 0x07; -		} -	      else -		goto error; -	       -	      p++; -	      CONTINUATION_CHAR; -	    TWO_REMAINING: -	      p++; -	      CONTINUATION_CHAR; -	      p++; -	      CONTINUATION_CHAR; -	       -	      if ( (val < min)) -		goto error; - -	      if ( (!UNICODE_VALID(val))) -		goto error; -	    }  -	   -	  continue; -	   -	error: -	  return NULL; -	} +static inline int is_unicode_valid(uint32_t ch) { +    if (ch >= 0x110000) /* End of unicode space */ +        return 0; +    if ((ch & 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */ +        return 0; +    if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) /* Reserved */ +        return 0; +    if ((ch & 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */ +        return 0; +    return 1; +} + +static inline int is_continuation_char(uint8_t ch) { +    if ((ch & 0xc0) != 0x80) /* 10xxxxxx */ +        return 0; +    return 1; +} + +static inline void merge_continuation_char(uint32_t *u_ch, uint8_t ch) { +    *u_ch <<= 6; +    *u_ch |= ch & 0x3f; +} + +const char* pa_utf8_valid (const char *str) { +    uint32_t val = 0; +    uint32_t min = 0; +    const uint8_t *p, *last; + +    for (p = (uint8_t*)str; *p; p++) { +        if (*p < 128) +            /* done */; +        else { +            last = p; + +            if ((*p & 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */ +                min = 128; +                val = *p & 0x1e; +                goto ONE_REMAINING; +            } else if ((*p & 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/ +                min = (1 << 11); +                val = *p & 0x0f; +                goto TWO_REMAINING; +            } else if ((*p & 0xf8) == 0xf0) { /* 11110xxx four-char seq */ +                min = (1 << 16); +                val = *p & 0x07; +            } else +                goto error; + +            p++; +            if (!is_continuation_char(*p)) +                goto error; +            merge_continuation_char(&val, *p); + +TWO_REMAINING: +            p++; +            if (!is_continuation_char(*p)) +                goto error; +            merge_continuation_char(&val, *p); + +ONE_REMAINING: +            p++; +            if (!is_continuation_char(*p)) +                goto error; +            merge_continuation_char(&val, *p); + +            if (val < min) +                goto error; + +            if (!is_unicode_valid(val)) +                goto error; +        }      } -  return str; +    return str; + +error: +    return NULL;  }  | 
