diff options
author | Jan Schmidt <thaytan@mad.scientist.com> | 2006-01-23 09:22:17 +0000 |
---|---|---|
committer | Jan Schmidt <thaytan@mad.scientist.com> | 2006-01-23 09:22:17 +0000 |
commit | 2ecee9a43d8e420eb21078711e766c5500bf8c04 (patch) | |
tree | 985bef3bae879dc4e8a7d614275d4a8919cd2fba /gst/id3demux/id3v2frames.c | |
parent | e3ba1c0fd5f99782b7481f79e410a7cbf56c167b (diff) |
gst/id3demux/: Rewrite parsing of text tags to handle multiple NULL terminated strings. Parse numeric genre strings a...
Original commit message from CVS:
* gst/id3demux/id3tags.c: (id3demux_read_id3v2_tag):
* gst/id3demux/id3tags.h:
* gst/id3demux/id3v2frames.c: (id3demux_id3v2_parse_frame),
(parse_comment_frame), (parse_text_identification_frame),
(id3v2_tag_to_taglist), (id3v2_are_digits),
(id3v2_genre_string_to_taglist), (id3v2_genre_fields_to_taglist),
(parse_split_strings), (free_tag_strings):
Rewrite parsing of text tags to handle multiple NULL terminated
strings. Parse numeric genre strings and ID3v2 type
"(3)(6)Alternative" style genre strings.
Parse dates that are only YYYY or YYYY-mm format.
Diffstat (limited to 'gst/id3demux/id3v2frames.c')
-rw-r--r-- | gst/id3demux/id3v2frames.c | 312 |
1 files changed, 229 insertions, 83 deletions
diff --git a/gst/id3demux/id3v2frames.c b/gst/id3demux/id3v2frames.c index ed4c1042..fb99f10d 100644 --- a/gst/id3demux/id3v2frames.c +++ b/gst/id3demux/id3v2frames.c @@ -23,6 +23,7 @@ #endif #include <string.h> +#include <stdlib.h> #include <gst/tag/tag.h> #ifdef HAVE_ZLIB @@ -35,11 +36,16 @@ GST_DEBUG_CATEGORY_EXTERN (id3demux_debug); #define GST_CAT_DEFAULT (id3demux_debug) static gchar *parse_comment_frame (ID3TagsWorking * work); -static gchar *parse_text_identification_frame (ID3TagsWorking * work); +static GArray *parse_text_identification_frame (ID3TagsWorking * work); static gboolean id3v2_tag_to_taglist (ID3TagsWorking * work, - const gchar * tag_name, gchar * tag_str); -static void parse_split_strings (ID3TagsWorking * work, guint8 encoding, - gchar ** field1, gchar ** field2); + const gchar * tag_name, const gchar * tag_str); +/* Parse a single string into an array of gchar* */ +static void parse_split_strings (guint8 encoding, gchar * data, gint data_size, + GArray ** out_fields); +static void free_tag_strings (GArray * fields); +static gboolean +id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name, + GArray * tag_fields); #define ID3V2_ENCODING_ISO8859 0x00 #define ID3V2_ENCODING_UTF16 0x01 @@ -57,6 +63,7 @@ id3demux_id3v2_parse_frame (ID3TagsWorking * work) guint8 *frame_data = work->hdr.frame_data; guint frame_data_size = work->cur_frame_size; gchar *tag_str = NULL; + GArray *tag_fields = NULL; /* Check that the frame id is valid */ for (i = 0; i < 5 && work->frame_id[i] != '\0'; i++) { @@ -118,7 +125,7 @@ id3demux_id3v2_parse_frame (ID3TagsWorking * work) if (work->frame_id[0] == 'T') { if (strcmp (work->frame_id, "TXXX") != 0) { /* Text identification frame */ - tag_str = parse_text_identification_frame (work); + tag_fields = parse_text_identification_frame (work); } else { /* Handle user text frame */ } @@ -142,6 +149,16 @@ id3demux_id3v2_parse_frame (ID3TagsWorking * work) result = id3v2_tag_to_taglist (work, tag_name, tag_str); g_free (tag_str); } + if (tag_fields != NULL) { + if (strcmp (work->frame_id, "TCON") == 0) { + /* Genre strings need special treatment */ + result |= id3v2_genre_fields_to_taglist (work, tag_name, tag_fields); + } else { + tag_str = g_array_index (tag_fields, gchar *, 0); + result |= id3v2_tag_to_taglist (work, tag_name, tag_str); + } + free_tag_strings (tag_fields); + } return result; } @@ -151,9 +168,9 @@ parse_comment_frame (ID3TagsWorking * work) { guint8 encoding; gchar language[4]; - gchar *description = NULL; - gchar *text = NULL; + GArray *fields = NULL; gchar *out_str = NULL; + gchar *description, *text; if (work->parse_size < 6) return NULL; @@ -164,12 +181,15 @@ parse_comment_frame (ID3TagsWorking * work) language[2] = work->parse_data[3]; language[3] = 0; - parse_split_strings (work, encoding, &description, &text); + parse_split_strings (encoding, (gchar *) work->parse_data + 4, + work->parse_size - 4, &fields); - if (text == NULL || description == NULL) { + if (fields == NULL || fields->len < 2) { GST_WARNING ("Failed to decode comment frame"); goto fail; } + description = g_array_index (fields, gchar *, 0); + text = g_array_index (fields, gchar *, 1); if (!g_utf8_validate (text, -1, NULL)) { GST_WARNING ("Converted string is not valid utf-8"); @@ -184,53 +204,30 @@ parse_comment_frame (ID3TagsWorking * work) } fail: - g_free (description); - g_free (text); + free_tag_strings (fields); return out_str; } -static gchar * +static GArray * parse_text_identification_frame (ID3TagsWorking * work) { guchar encoding; - gchar *text = NULL; + GArray *fields = NULL; if (work->parse_size < 2) return NULL; encoding = work->parse_data[0]; + parse_split_strings (encoding, (gchar *) work->parse_data + 1, + work->parse_size - 1, &fields); - switch (encoding) { - case ID3V2_ENCODING_ISO8859: - text = g_convert ((gchar *) (work->parse_data + 1), - work->parse_size - 1, "UTF-8", "ISO-8859-1", NULL, NULL, NULL); - break; - case ID3V2_ENCODING_UTF8: - text = g_strndup ((gchar *) (work->parse_data + 1), work->parse_size - 1); - break; - case ID3V2_ENCODING_UTF16: - text = g_convert ((gchar *) (work->parse_data + 1), - work->parse_size - 1, "UTF-8", "UTF-16", NULL, NULL, NULL); - break; - case ID3V2_ENCODING_UTF16BE: - text = g_convert ((gchar *) (work->parse_data + 1), - work->parse_size - 1, "UTF-8", "UTF-16BE", NULL, NULL, NULL); - break; - } - - if (text != NULL && !g_utf8_validate (text, -1, NULL)) { - GST_WARNING ("Converted string is not valid utf-8"); - g_free (text); - text = NULL; - } - - return text; + return fields; } static gboolean id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name, - gchar * tag_str) + const gchar * tag_str) { GType tag_type = gst_tag_get_type (tag_name); GstTagList *tag_list = work->tags; @@ -243,17 +240,7 @@ id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name, tmp = strtoul ((char *) tag_str, &check, 10); - if (strcmp (tag_name, GST_TAG_DATE) == 0) { - GDate *d; - - if (*check != '\0') - break; - if (tmp == 0) - break; - d = g_date_new_dmy (1, 1, tmp); - tmp = g_date_get_julian (d); - g_date_free (d); - } else if (strcmp (tag_name, GST_TAG_TRACK_NUMBER) == 0) { + if (strcmp (tag_name, GST_TAG_TRACK_NUMBER) == 0) { if (*check == '/') { guint total; @@ -290,7 +277,7 @@ id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name, guint64 tmp; g_assert (strcmp (tag_name, GST_TAG_DURATION) == 0); - tmp = strtoul ((char *) tag_str, NULL, 10); + tmp = strtoul (tag_str, NULL, 10); if (tmp == 0) { break; } @@ -299,19 +286,41 @@ id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name, break; } case G_TYPE_STRING:{ + if (!strcmp (tag_name, GST_TAG_GENRE)) { + if (work->prev_genre && !strcmp (tag_str, work->prev_genre)) + break; /* Same as the last genre */ + g_free (work->prev_genre); + work->prev_genre = g_strdup (tag_str); + } gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND, - tag_name, (const gchar *) tag_str, NULL); + tag_name, tag_str, NULL); break; } - /* handles GST_TYPE_DATE and anything else */ + default:{ + gchar *tmp = NULL; + + if (tag_type == GST_TYPE_DATE) { + guint year = 1901, month = 1, day = 1; + + /* Dates can be yyyy-MM-dd, yyyy-MM or yyyy, but we need + * the first type */ + if (sscanf (tag_str, "%04u-%02u-%02u", &year, &month, &day) == 0) + break; + + tmp = g_strdup_printf ("%04u-%02u-%02u", year, month, day); + tag_str = tmp; + break; + } + + /* handles anything else */ GValue src = { 0, }; GValue dest = { 0, }; g_value_init (&src, G_TYPE_STRING); g_value_set_string (&src, (const gchar *) tag_str); - g_value_init (&dest, tag_type); + if (g_value_transform (&src, &dest)) { gst_tag_list_add_values (tag_list, GST_TAG_MERGE_APPEND, tag_name, &dest, NULL); @@ -319,8 +328,10 @@ id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name, GST_WARNING ("Failed to transform tag from string to type '%s'", g_type_name (tag_type)); } + g_value_unset (&src); g_value_unset (&dest); + g_free (tmp); break; } } @@ -328,61 +339,196 @@ id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name, return TRUE; } +/* Check that an array of characters contains only digits */ +static gboolean +id3v2_are_digits (const gchar * chars, gint size) +{ + gint i; + + for (i = 0; i < size; i++) { + if (!g_ascii_isdigit (chars[i])) + return FALSE; + } + return TRUE; +} + +static gboolean +id3v2_genre_string_to_taglist (ID3TagsWorking * work, const gchar * tag_name, + const gchar * tag_str, gint len) +{ + g_return_val_if_fail (tag_str != NULL, FALSE); + + /* If it's a number, it might be a defined genre */ + if (id3v2_are_digits (tag_str, len)) { + tag_str = gst_tag_id3_genre_get (strtol (tag_str, NULL, 10)); + if (tag_str != NULL) + return id3v2_tag_to_taglist (work, tag_name, tag_str); + } + /* Otherwise it might be "RX" or "CR" */ + if (len == 2) { + if (g_ascii_strncasecmp ("rx", tag_str, len) == 0) + return id3v2_tag_to_taglist (work, tag_name, "Remix"); + + if (g_ascii_strncasecmp ("cr", tag_str, len) == 0) + return id3v2_tag_to_taglist (work, tag_name, "Cover"); + } + + /* Otherwise it's a string */ + return id3v2_tag_to_taglist (work, tag_name, tag_str); +} + +static gboolean +id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name, + GArray * tag_fields) +{ + gchar *tag_str = NULL; + gboolean result = FALSE; + gint i; + + for (i = 0; i < tag_fields->len; i++) { + gint len; + + tag_str = g_array_index (tag_fields, gchar *, 0); + if (tag_str == NULL) + continue; + + len = strlen (tag_str); + if (work->hdr.version <= 0x300) { /* <= 2.3.0 */ + /* Check for genre numbers wrapped in parentheses, possibly + * followed by a string */ + while (len >= 2) { + gint pos; + gboolean found = FALSE; + + /* Double parenthesis ends the numeric genres */ + if (tag_str[0] == '(' && tag_str[1] == '(') + break; + + for (pos = 1; pos < len; pos++) { + if (tag_str[pos] == ')') { + gchar *tmp_str; + + tmp_str = g_strndup (tag_str + 1, pos - 1); + result |= + id3v2_genre_string_to_taglist (work, tag_name, tmp_str, + pos - 1); + g_free (tmp_str); + tag_str += pos + 1; + len -= pos + 1; + found = TRUE; + break; + } + } + if (!found) + break; /* There was no closing parenthesis */ + } + } + + if (len > 0) + result |= id3v2_genre_string_to_taglist (work, tag_name, tag_str, len); + } + return result; +} + static void -parse_split_strings (ID3TagsWorking * work, guint8 encoding, - gchar ** field1, gchar ** field2) +parse_split_strings (guint8 encoding, gchar * data, gint data_size, + GArray ** out_fields) { - guint text_pos; + GArray *fields = g_array_new (FALSE, TRUE, sizeof (gchar *)); + gchar *field; + gint text_pos; + gint prev = 0; - *field1 = *field2 = NULL; + g_return_if_fail (out_fields != NULL); switch (encoding) { case ID3V2_ENCODING_ISO8859: - for (text_pos = 4; text_pos < work->parse_size - 5; text_pos++) { - if (work->parse_data[text_pos] == 0) { - *field1 = g_convert ((gchar *) (work->parse_data + 4), - text_pos - 4, "UTF-8", "ISO-8859-1", NULL, NULL, NULL); - *field2 = g_convert ((gchar *) (work->parse_data + text_pos + 5), - work->parse_size - text_pos - 5, + for (text_pos = 0; text_pos < data_size; text_pos++) { + if (data[text_pos] == 0) { + field = g_convert (data + prev, text_pos - prev + 1, "UTF-8", "ISO-8859-1", NULL, NULL, NULL); - break; + if (field) + g_array_append_val (fields, field); + prev = text_pos + 1; } } + if (data_size - prev > 0 && data[prev] != 0x00) { + field = g_convert (data + prev, data_size - prev, + "UTF-8", "ISO-8859-1", NULL, NULL, NULL); + if (field) + g_array_append_val (fields, field); + } + break; case ID3V2_ENCODING_UTF8: - *field1 = g_strndup ((gchar *) (work->parse_data + 4), - work->parse_size - 4); - text_pos = 4 + strlen (*field1) + 1; /* Offset by one more for the null */ - if (text_pos < work->parse_size) { - *field2 = g_strndup ((gchar *) (work->parse_data + text_pos), - work->parse_size - text_pos); + for (prev = 0, text_pos = 0; text_pos < data_size; text_pos++) { + if (data[text_pos]) { + field = g_strndup (data + prev, text_pos - prev + 1); + if (field) + g_array_append_val (fields, field); + prev = text_pos + 1; + } + } + if (data_size - prev > 0 && data[prev] != 0x00) { + field = g_strndup (data + prev, data_size - prev); + if (field) + g_array_append_val (fields, field); } break; case ID3V2_ENCODING_UTF16: case ID3V2_ENCODING_UTF16BE: { /* Find '\0\0' terminator */ - for (text_pos = 4; text_pos < work->parse_size - 6; text_pos++) { - if (work->parse_data[text_pos] == 0 && - work->parse_data[text_pos + 1] == 0) { - /* found our delimiter */ + for (text_pos = 0; text_pos < data_size - 1; text_pos += 2) { + if (data[text_pos] == 0 && data[text_pos + 1] == 0) { + /* found a delimiter */ if (encoding == ID3V2_ENCODING_UTF16) { - *field1 = g_convert ((gchar *) (work->parse_data + 4), - text_pos - 4, "UTF-8", "UTF-16", NULL, NULL, NULL); - *field2 = g_convert ((gchar *) (work->parse_data + text_pos + 6), - work->parse_size - text_pos - 6, + field = g_convert (data + prev, text_pos - prev + 2, "UTF-8", "UTF-16", NULL, NULL, NULL); } else { - *field1 = g_convert ((gchar *) (work->parse_data + 4), - text_pos - 4, "UTF-8", "UTF-16BE", NULL, NULL, NULL); - *field2 = g_convert ((gchar *) (work->parse_data + text_pos + 6), - work->parse_size - text_pos - 6, + field = g_convert (data + prev, text_pos - prev + 2, "UTF-8", "UTF-16BE", NULL, NULL, NULL); } + if (field) + g_array_append_val (fields, field); + text_pos++; /* Advance to the 2nd NULL terminator */ + prev = text_pos + 1; break; } } + if (data_size - prev > 1 && + (data[prev] != 0x00 || data[prev + 1] != 0x00)) { + /* There were 2 or more non-null chars left, convert those too */ + if (encoding == ID3V2_ENCODING_UTF16) { + field = g_convert (data + prev, data_size - prev, + "UTF-8", "UTF-16", NULL, NULL, NULL); + } else { + field = g_convert (data + prev, data_size - prev, + "UTF-8", "UTF-16BE", NULL, NULL, NULL); + } + if (field) + g_array_append_val (fields, field); + } break; } } + if (fields->len > 0) + *out_fields = fields; + else + g_array_free (fields, TRUE); +} + +static void +free_tag_strings (GArray * fields) +{ + if (fields) { + gint i; + gchar *c; + + for (i = 0; i < fields->len; i++) { + c = g_array_index (fields, gchar *, i); + g_free (c); + } + g_array_free (fields, TRUE); + } } |