summaryrefslogtreecommitdiffstats
path: root/gst/id3demux
diff options
context:
space:
mode:
authorJan Schmidt <thaytan@mad.scientist.com>2006-01-23 09:22:17 +0000
committerJan Schmidt <thaytan@mad.scientist.com>2006-01-23 09:22:17 +0000
commit2ecee9a43d8e420eb21078711e766c5500bf8c04 (patch)
tree985bef3bae879dc4e8a7d614275d4a8919cd2fba /gst/id3demux
parente3ba1c0fd5f99782b7481f79e410a7cbf56c167b (diff)
gst/id3demux/: Rewrite parsing of text tags to handle multiple NULL terminated strings. Parse numeric genre strings a...
Original commit message from CVS: * gst/id3demux/id3tags.c: (id3demux_read_id3v2_tag): * gst/id3demux/id3tags.h: * gst/id3demux/id3v2frames.c: (id3demux_id3v2_parse_frame), (parse_comment_frame), (parse_text_identification_frame), (id3v2_tag_to_taglist), (id3v2_are_digits), (id3v2_genre_string_to_taglist), (id3v2_genre_fields_to_taglist), (parse_split_strings), (free_tag_strings): Rewrite parsing of text tags to handle multiple NULL terminated strings. Parse numeric genre strings and ID3v2 type "(3)(6)Alternative" style genre strings. Parse dates that are only YYYY or YYYY-mm format.
Diffstat (limited to 'gst/id3demux')
-rw-r--r--gst/id3demux/id3tags.c3
-rw-r--r--gst/id3demux/id3tags.h3
-rw-r--r--gst/id3demux/id3v2frames.c312
3 files changed, 235 insertions, 83 deletions
diff --git a/gst/id3demux/id3tags.c b/gst/id3demux/id3tags.c
index e25b4291..12765e61 100644
--- a/gst/id3demux/id3tags.c
+++ b/gst/id3demux/id3tags.c
@@ -198,6 +198,9 @@ id3demux_read_id3v2_tag (GstBuffer * buffer, guint * id3v2_size,
*tags = work.tags;
}
+ if (work.prev_genre)
+ g_free (work.prev_genre);
+
return result;
}
diff --git a/gst/id3demux/id3tags.h b/gst/id3demux/id3tags.h
index c81cd550..cdf165d8 100644
--- a/gst/id3demux/id3tags.h
+++ b/gst/id3demux/id3tags.h
@@ -72,6 +72,9 @@ typedef struct {
guint8 *parse_data;
guint parse_size;
+
+ /* Previous genre string, for simple duplicate removal */
+ gchar *prev_genre;
} ID3TagsWorking;
enum {
diff --git a/gst/id3demux/id3v2frames.c b/gst/id3demux/id3v2frames.c
index ed4c1042..fb99f10d 100644
--- a/gst/id3demux/id3v2frames.c
+++ b/gst/id3demux/id3v2frames.c
@@ -23,6 +23,7 @@
#endif
#include <string.h>
+#include <stdlib.h>
#include <gst/tag/tag.h>
#ifdef HAVE_ZLIB
@@ -35,11 +36,16 @@ GST_DEBUG_CATEGORY_EXTERN (id3demux_debug);
#define GST_CAT_DEFAULT (id3demux_debug)
static gchar *parse_comment_frame (ID3TagsWorking * work);
-static gchar *parse_text_identification_frame (ID3TagsWorking * work);
+static GArray *parse_text_identification_frame (ID3TagsWorking * work);
static gboolean id3v2_tag_to_taglist (ID3TagsWorking * work,
- const gchar * tag_name, gchar * tag_str);
-static void parse_split_strings (ID3TagsWorking * work, guint8 encoding,
- gchar ** field1, gchar ** field2);
+ const gchar * tag_name, const gchar * tag_str);
+/* Parse a single string into an array of gchar* */
+static void parse_split_strings (guint8 encoding, gchar * data, gint data_size,
+ GArray ** out_fields);
+static void free_tag_strings (GArray * fields);
+static gboolean
+id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
+ GArray * tag_fields);
#define ID3V2_ENCODING_ISO8859 0x00
#define ID3V2_ENCODING_UTF16 0x01
@@ -57,6 +63,7 @@ id3demux_id3v2_parse_frame (ID3TagsWorking * work)
guint8 *frame_data = work->hdr.frame_data;
guint frame_data_size = work->cur_frame_size;
gchar *tag_str = NULL;
+ GArray *tag_fields = NULL;
/* Check that the frame id is valid */
for (i = 0; i < 5 && work->frame_id[i] != '\0'; i++) {
@@ -118,7 +125,7 @@ id3demux_id3v2_parse_frame (ID3TagsWorking * work)
if (work->frame_id[0] == 'T') {
if (strcmp (work->frame_id, "TXXX") != 0) {
/* Text identification frame */
- tag_str = parse_text_identification_frame (work);
+ tag_fields = parse_text_identification_frame (work);
} else {
/* Handle user text frame */
}
@@ -142,6 +149,16 @@ id3demux_id3v2_parse_frame (ID3TagsWorking * work)
result = id3v2_tag_to_taglist (work, tag_name, tag_str);
g_free (tag_str);
}
+ if (tag_fields != NULL) {
+ if (strcmp (work->frame_id, "TCON") == 0) {
+ /* Genre strings need special treatment */
+ result |= id3v2_genre_fields_to_taglist (work, tag_name, tag_fields);
+ } else {
+ tag_str = g_array_index (tag_fields, gchar *, 0);
+ result |= id3v2_tag_to_taglist (work, tag_name, tag_str);
+ }
+ free_tag_strings (tag_fields);
+ }
return result;
}
@@ -151,9 +168,9 @@ parse_comment_frame (ID3TagsWorking * work)
{
guint8 encoding;
gchar language[4];
- gchar *description = NULL;
- gchar *text = NULL;
+ GArray *fields = NULL;
gchar *out_str = NULL;
+ gchar *description, *text;
if (work->parse_size < 6)
return NULL;
@@ -164,12 +181,15 @@ parse_comment_frame (ID3TagsWorking * work)
language[2] = work->parse_data[3];
language[3] = 0;
- parse_split_strings (work, encoding, &description, &text);
+ parse_split_strings (encoding, (gchar *) work->parse_data + 4,
+ work->parse_size - 4, &fields);
- if (text == NULL || description == NULL) {
+ if (fields == NULL || fields->len < 2) {
GST_WARNING ("Failed to decode comment frame");
goto fail;
}
+ description = g_array_index (fields, gchar *, 0);
+ text = g_array_index (fields, gchar *, 1);
if (!g_utf8_validate (text, -1, NULL)) {
GST_WARNING ("Converted string is not valid utf-8");
@@ -184,53 +204,30 @@ parse_comment_frame (ID3TagsWorking * work)
}
fail:
- g_free (description);
- g_free (text);
+ free_tag_strings (fields);
return out_str;
}
-static gchar *
+static GArray *
parse_text_identification_frame (ID3TagsWorking * work)
{
guchar encoding;
- gchar *text = NULL;
+ GArray *fields = NULL;
if (work->parse_size < 2)
return NULL;
encoding = work->parse_data[0];
+ parse_split_strings (encoding, (gchar *) work->parse_data + 1,
+ work->parse_size - 1, &fields);
- switch (encoding) {
- case ID3V2_ENCODING_ISO8859:
- text = g_convert ((gchar *) (work->parse_data + 1),
- work->parse_size - 1, "UTF-8", "ISO-8859-1", NULL, NULL, NULL);
- break;
- case ID3V2_ENCODING_UTF8:
- text = g_strndup ((gchar *) (work->parse_data + 1), work->parse_size - 1);
- break;
- case ID3V2_ENCODING_UTF16:
- text = g_convert ((gchar *) (work->parse_data + 1),
- work->parse_size - 1, "UTF-8", "UTF-16", NULL, NULL, NULL);
- break;
- case ID3V2_ENCODING_UTF16BE:
- text = g_convert ((gchar *) (work->parse_data + 1),
- work->parse_size - 1, "UTF-8", "UTF-16BE", NULL, NULL, NULL);
- break;
- }
-
- if (text != NULL && !g_utf8_validate (text, -1, NULL)) {
- GST_WARNING ("Converted string is not valid utf-8");
- g_free (text);
- text = NULL;
- }
-
- return text;
+ return fields;
}
static gboolean
id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
- gchar * tag_str)
+ const gchar * tag_str)
{
GType tag_type = gst_tag_get_type (tag_name);
GstTagList *tag_list = work->tags;
@@ -243,17 +240,7 @@ id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
tmp = strtoul ((char *) tag_str, &check, 10);
- if (strcmp (tag_name, GST_TAG_DATE) == 0) {
- GDate *d;
-
- if (*check != '\0')
- break;
- if (tmp == 0)
- break;
- d = g_date_new_dmy (1, 1, tmp);
- tmp = g_date_get_julian (d);
- g_date_free (d);
- } else if (strcmp (tag_name, GST_TAG_TRACK_NUMBER) == 0) {
+ if (strcmp (tag_name, GST_TAG_TRACK_NUMBER) == 0) {
if (*check == '/') {
guint total;
@@ -290,7 +277,7 @@ id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
guint64 tmp;
g_assert (strcmp (tag_name, GST_TAG_DURATION) == 0);
- tmp = strtoul ((char *) tag_str, NULL, 10);
+ tmp = strtoul (tag_str, NULL, 10);
if (tmp == 0) {
break;
}
@@ -299,19 +286,41 @@ id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
break;
}
case G_TYPE_STRING:{
+ if (!strcmp (tag_name, GST_TAG_GENRE)) {
+ if (work->prev_genre && !strcmp (tag_str, work->prev_genre))
+ break; /* Same as the last genre */
+ g_free (work->prev_genre);
+ work->prev_genre = g_strdup (tag_str);
+ }
gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
- tag_name, (const gchar *) tag_str, NULL);
+ tag_name, tag_str, NULL);
break;
}
- /* handles GST_TYPE_DATE and anything else */
+
default:{
+ gchar *tmp = NULL;
+
+ if (tag_type == GST_TYPE_DATE) {
+ guint year = 1901, month = 1, day = 1;
+
+ /* Dates can be yyyy-MM-dd, yyyy-MM or yyyy, but we need
+ * the first type */
+ if (sscanf (tag_str, "%04u-%02u-%02u", &year, &month, &day) == 0)
+ break;
+
+ tmp = g_strdup_printf ("%04u-%02u-%02u", year, month, day);
+ tag_str = tmp;
+ break;
+ }
+
+ /* handles anything else */
GValue src = { 0, };
GValue dest = { 0, };
g_value_init (&src, G_TYPE_STRING);
g_value_set_string (&src, (const gchar *) tag_str);
-
g_value_init (&dest, tag_type);
+
if (g_value_transform (&src, &dest)) {
gst_tag_list_add_values (tag_list, GST_TAG_MERGE_APPEND,
tag_name, &dest, NULL);
@@ -319,8 +328,10 @@ id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
GST_WARNING ("Failed to transform tag from string to type '%s'",
g_type_name (tag_type));
}
+
g_value_unset (&src);
g_value_unset (&dest);
+ g_free (tmp);
break;
}
}
@@ -328,61 +339,196 @@ id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
return TRUE;
}
+/* Check that an array of characters contains only digits */
+static gboolean
+id3v2_are_digits (const gchar * chars, gint size)
+{
+ gint i;
+
+ for (i = 0; i < size; i++) {
+ if (!g_ascii_isdigit (chars[i]))
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static gboolean
+id3v2_genre_string_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
+ const gchar * tag_str, gint len)
+{
+ g_return_val_if_fail (tag_str != NULL, FALSE);
+
+ /* If it's a number, it might be a defined genre */
+ if (id3v2_are_digits (tag_str, len)) {
+ tag_str = gst_tag_id3_genre_get (strtol (tag_str, NULL, 10));
+ if (tag_str != NULL)
+ return id3v2_tag_to_taglist (work, tag_name, tag_str);
+ }
+ /* Otherwise it might be "RX" or "CR" */
+ if (len == 2) {
+ if (g_ascii_strncasecmp ("rx", tag_str, len) == 0)
+ return id3v2_tag_to_taglist (work, tag_name, "Remix");
+
+ if (g_ascii_strncasecmp ("cr", tag_str, len) == 0)
+ return id3v2_tag_to_taglist (work, tag_name, "Cover");
+ }
+
+ /* Otherwise it's a string */
+ return id3v2_tag_to_taglist (work, tag_name, tag_str);
+}
+
+static gboolean
+id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
+ GArray * tag_fields)
+{
+ gchar *tag_str = NULL;
+ gboolean result = FALSE;
+ gint i;
+
+ for (i = 0; i < tag_fields->len; i++) {
+ gint len;
+
+ tag_str = g_array_index (tag_fields, gchar *, 0);
+ if (tag_str == NULL)
+ continue;
+
+ len = strlen (tag_str);
+ if (work->hdr.version <= 0x300) { /* <= 2.3.0 */
+ /* Check for genre numbers wrapped in parentheses, possibly
+ * followed by a string */
+ while (len >= 2) {
+ gint pos;
+ gboolean found = FALSE;
+
+ /* Double parenthesis ends the numeric genres */
+ if (tag_str[0] == '(' && tag_str[1] == '(')
+ break;
+
+ for (pos = 1; pos < len; pos++) {
+ if (tag_str[pos] == ')') {
+ gchar *tmp_str;
+
+ tmp_str = g_strndup (tag_str + 1, pos - 1);
+ result |=
+ id3v2_genre_string_to_taglist (work, tag_name, tmp_str,
+ pos - 1);
+ g_free (tmp_str);
+ tag_str += pos + 1;
+ len -= pos + 1;
+ found = TRUE;
+ break;
+ }
+ }
+ if (!found)
+ break; /* There was no closing parenthesis */
+ }
+ }
+
+ if (len > 0)
+ result |= id3v2_genre_string_to_taglist (work, tag_name, tag_str, len);
+ }
+ return result;
+}
+
static void
-parse_split_strings (ID3TagsWorking * work, guint8 encoding,
- gchar ** field1, gchar ** field2)
+parse_split_strings (guint8 encoding, gchar * data, gint data_size,
+ GArray ** out_fields)
{
- guint text_pos;
+ GArray *fields = g_array_new (FALSE, TRUE, sizeof (gchar *));
+ gchar *field;
+ gint text_pos;
+ gint prev = 0;
- *field1 = *field2 = NULL;
+ g_return_if_fail (out_fields != NULL);
switch (encoding) {
case ID3V2_ENCODING_ISO8859:
- for (text_pos = 4; text_pos < work->parse_size - 5; text_pos++) {
- if (work->parse_data[text_pos] == 0) {
- *field1 = g_convert ((gchar *) (work->parse_data + 4),
- text_pos - 4, "UTF-8", "ISO-8859-1", NULL, NULL, NULL);
- *field2 = g_convert ((gchar *) (work->parse_data + text_pos + 5),
- work->parse_size - text_pos - 5,
+ for (text_pos = 0; text_pos < data_size; text_pos++) {
+ if (data[text_pos] == 0) {
+ field = g_convert (data + prev, text_pos - prev + 1,
"UTF-8", "ISO-8859-1", NULL, NULL, NULL);
- break;
+ if (field)
+ g_array_append_val (fields, field);
+ prev = text_pos + 1;
}
}
+ if (data_size - prev > 0 && data[prev] != 0x00) {
+ field = g_convert (data + prev, data_size - prev,
+ "UTF-8", "ISO-8859-1", NULL, NULL, NULL);
+ if (field)
+ g_array_append_val (fields, field);
+ }
+
break;
case ID3V2_ENCODING_UTF8:
- *field1 = g_strndup ((gchar *) (work->parse_data + 4),
- work->parse_size - 4);
- text_pos = 4 + strlen (*field1) + 1; /* Offset by one more for the null */
- if (text_pos < work->parse_size) {
- *field2 = g_strndup ((gchar *) (work->parse_data + text_pos),
- work->parse_size - text_pos);
+ for (prev = 0, text_pos = 0; text_pos < data_size; text_pos++) {
+ if (data[text_pos]) {
+ field = g_strndup (data + prev, text_pos - prev + 1);
+ if (field)
+ g_array_append_val (fields, field);
+ prev = text_pos + 1;
+ }
+ }
+ if (data_size - prev > 0 && data[prev] != 0x00) {
+ field = g_strndup (data + prev, data_size - prev);
+ if (field)
+ g_array_append_val (fields, field);
}
break;
case ID3V2_ENCODING_UTF16:
case ID3V2_ENCODING_UTF16BE:
{
/* Find '\0\0' terminator */
- for (text_pos = 4; text_pos < work->parse_size - 6; text_pos++) {
- if (work->parse_data[text_pos] == 0 &&
- work->parse_data[text_pos + 1] == 0) {
- /* found our delimiter */
+ for (text_pos = 0; text_pos < data_size - 1; text_pos += 2) {
+ if (data[text_pos] == 0 && data[text_pos + 1] == 0) {
+ /* found a delimiter */
if (encoding == ID3V2_ENCODING_UTF16) {
- *field1 = g_convert ((gchar *) (work->parse_data + 4),
- text_pos - 4, "UTF-8", "UTF-16", NULL, NULL, NULL);
- *field2 = g_convert ((gchar *) (work->parse_data + text_pos + 6),
- work->parse_size - text_pos - 6,
+ field = g_convert (data + prev, text_pos - prev + 2,
"UTF-8", "UTF-16", NULL, NULL, NULL);
} else {
- *field1 = g_convert ((gchar *) (work->parse_data + 4),
- text_pos - 4, "UTF-8", "UTF-16BE", NULL, NULL, NULL);
- *field2 = g_convert ((gchar *) (work->parse_data + text_pos + 6),
- work->parse_size - text_pos - 6,
+ field = g_convert (data + prev, text_pos - prev + 2,
"UTF-8", "UTF-16BE", NULL, NULL, NULL);
}
+ if (field)
+ g_array_append_val (fields, field);
+ text_pos++; /* Advance to the 2nd NULL terminator */
+ prev = text_pos + 1;
break;
}
}
+ if (data_size - prev > 1 &&
+ (data[prev] != 0x00 || data[prev + 1] != 0x00)) {
+ /* There were 2 or more non-null chars left, convert those too */
+ if (encoding == ID3V2_ENCODING_UTF16) {
+ field = g_convert (data + prev, data_size - prev,
+ "UTF-8", "UTF-16", NULL, NULL, NULL);
+ } else {
+ field = g_convert (data + prev, data_size - prev,
+ "UTF-8", "UTF-16BE", NULL, NULL, NULL);
+ }
+ if (field)
+ g_array_append_val (fields, field);
+ }
break;
}
}
+ if (fields->len > 0)
+ *out_fields = fields;
+ else
+ g_array_free (fields, TRUE);
+}
+
+static void
+free_tag_strings (GArray * fields)
+{
+ if (fields) {
+ gint i;
+ gchar *c;
+
+ for (i = 0; i < fields->len; i++) {
+ c = g_array_index (fields, gchar *, i);
+ g_free (c);
+ }
+ g_array_free (fields, TRUE);
+ }
}