diff options
-rw-r--r-- | ChangeLog | 16 | ||||
-rw-r--r-- | gst/autodetect/gstautoaudiosink.c | 5 | ||||
-rw-r--r-- | gst/autodetect/gstautovideosink.c | 5 | ||||
-rw-r--r-- | gst/id3demux/id3v2frames.c | 112 |
4 files changed, 93 insertions, 45 deletions
@@ -1,3 +1,19 @@ +2006-05-16 Jan Schmidt <thaytan@mad.scientist.com> + + * gst/autodetect/gstautoaudiosink.c: + (gst_auto_audio_sink_find_best): + * gst/autodetect/gstautovideosink.c: + (gst_auto_video_sink_find_best): + Make the name of the child element be based on the name of the + parent, so that debug output is more useful. + + * gst/id3demux/id3v2frames.c: (find_utf16_bom), + (parse_insert_string_field), (parse_split_strings): + Rework string parsing to always walk over BOM markers in UTF16 + strings, using the endianness indicated by the innermost one, + then trying the opposite endianness if that fails to convert + to valid UTF-8. Fixes #341774 + 2006-05-16 Zaheer Abbas Merali <zaheerabbas at merali dot org> Patch from: Matthieu <matthieu at fluendo dot com> diff --git a/gst/autodetect/gstautoaudiosink.c b/gst/autodetect/gstautoaudiosink.c index 5743ef33..5e70ac79 100644 --- a/gst/autodetect/gstautoaudiosink.c +++ b/gst/autodetect/gstautoaudiosink.c @@ -159,6 +159,8 @@ gst_auto_audio_sink_find_best (GstAutoAudioSink * sink) GstMessage *message = NULL; GSList *errors = NULL; GstBus *bus = gst_bus_new (); + gchar *child_name = g_strdup_printf ("%s-actual-sink", + GST_OBJECT_NAME (sink)); list = gst_registry_feature_filter (gst_registry_get_default (), (GstPluginFeatureFilter) gst_auto_audio_sink_factory_filter, FALSE, sink); @@ -178,7 +180,7 @@ gst_auto_audio_sink_find_best (GstAutoAudioSink * sink) GstElementFactory *f = GST_ELEMENT_FACTORY (item->data); GstElement *el; - if ((el = gst_element_factory_create (f, "actual-sink"))) { + if ((el = gst_element_factory_create (f, child_name))) { /* FIXME: no element actually has this property as far as I can tell. * also, this is a nasty uncheckable way of supporting something that * amounts to being an interface. */ @@ -247,6 +249,7 @@ done: ("Failed to find a supported audio sink")); } } + g_free (child_name); gst_object_unref (bus); gst_plugin_feature_list_free (list); g_slist_foreach (errors, (GFunc) gst_mini_object_unref, NULL); diff --git a/gst/autodetect/gstautovideosink.c b/gst/autodetect/gstautovideosink.c index 15d89091..779acf12 100644 --- a/gst/autodetect/gstautovideosink.c +++ b/gst/autodetect/gstautovideosink.c @@ -155,6 +155,8 @@ gst_auto_video_sink_find_best (GstAutoVideoSink * sink) { GstElement *choice = NULL; GList *list, *walk; + gchar *child_name = g_strdup_printf ("%s-actual-sink", + GST_OBJECT_NAME (sink)); list = gst_registry_feature_filter (gst_registry_get_default (), (GstPluginFeatureFilter) gst_auto_video_sink_factory_filter, FALSE, sink); @@ -165,7 +167,7 @@ gst_auto_video_sink_find_best (GstAutoVideoSink * sink) GstElement *el; GST_DEBUG_OBJECT (sink, "Trying %s", GST_PLUGIN_FEATURE (f)->name); - if ((el = gst_element_factory_create (f, "actual-sink"))) { + if ((el = gst_element_factory_create (f, child_name))) { GstStateChangeReturn ret; GST_DEBUG_OBJECT (sink, "Changing state to READY"); @@ -188,6 +190,7 @@ gst_auto_video_sink_find_best (GstAutoVideoSink * sink) } done: + g_free (child_name); gst_plugin_feature_list_free (list); return choice; diff --git a/gst/id3demux/id3v2frames.c b/gst/id3demux/id3v2frames.c index 6690f5a5..21ca4f80 100644 --- a/gst/id3demux/id3v2frames.c +++ b/gst/id3demux/id3v2frames.c @@ -667,39 +667,21 @@ id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name, return result; } -static void -parse_insert_string_field (const gchar * encoding, gchar * data, gint data_size, - GArray * fields) -{ - gchar *field = NULL; - - if (strcmp (encoding, "UTF-8") != 0) { - field = g_convert (data, data_size, "UTF-8", encoding, NULL, NULL, NULL); - if (field == NULL) { - GST_WARNING ("could not convert string from %s to UTF-8. Ignoring", - encoding); - } - } else if (g_utf8_validate (data, data_size, NULL)) { - field = g_strndup (data, data_size); - } else { - GST_WARNING ("alleged UTF-8 string is not valid UTF-8. Ignoring"); - } - - if (field) - g_array_append_val (fields, field); -} +static const gchar utf16enc[] = "UTF-16"; +static const gchar utf16leenc[] = "UTF-16LE"; +static const gchar utf16beenc[] = "UTF-16BE"; static gboolean -has_utf16_bom (gchar * data, const gchar ** p_in_encoding) +find_utf16_bom (gchar * data, const gchar ** p_in_encoding) { guint16 marker = (GST_READ_UINT8 (data) << 8) | GST_READ_UINT8 (data + 1); switch (marker) { case 0xFFFE: - *p_in_encoding = "UTF16LE"; + *p_in_encoding = utf16leenc; return TRUE; case 0xFEFF: - *p_in_encoding = "UTF16BE"; + *p_in_encoding = utf16beenc; return TRUE; default: break; @@ -708,6 +690,63 @@ has_utf16_bom (gchar * data, const gchar ** p_in_encoding) } static void +parse_insert_string_field (guint8 encoding, gchar * data, gint data_size, + GArray * fields) +{ + gchar *field = NULL; + + switch (encoding) { + case ID3V2_ENCODING_UTF16: + case ID3V2_ENCODING_UTF16BE: + { + const gchar *in_encode; + + if (encoding == ID3V2_ENCODING_UTF16) + in_encode = utf16enc; + else + in_encode = utf16beenc; + + /* Sometimes we see strings with multiple BOM markers at the start. + * In that case, we assume the innermost one is correct. If that fails + * to produce valid UTF-8, we try the other endianness anyway */ + while (data_size > 2 && find_utf16_bom (data, &in_encode)) { + data += 2; /* skip BOM */ + data_size -= 2; + } + + field = g_convert (data, data_size, "UTF-8", in_encode, NULL, NULL, NULL); + + if (field == NULL || g_utf8_validate (field, -1, NULL) == FALSE) { + /* As a fallback, try interpreting UTF-16 in the other endianness */ + if (in_encode == utf16beenc) + field = g_convert (data, data_size, "UTF-8", utf16leenc, + NULL, NULL, NULL); + } + } + + break; + case ID3V2_ENCODING_ISO8859: + field = g_convert (data, data_size, "UTF-8", "ISO-8859-1", + NULL, NULL, NULL); + break; + default: + field = g_strndup (data, data_size); + break; + } + + if (field) { + if (g_utf8_validate (field, -1, NULL)) { + g_array_append_val (fields, field); + return; + } + + GST_DEBUG ("%s was bad UTF-8 after conversion from encoding %d. Ignoring", + field, encoding); + g_free (field); + } +} + +static void parse_split_strings (guint8 encoding, gchar * data, gint data_size, GArray ** out_fields) { @@ -721,13 +760,13 @@ parse_split_strings (guint8 encoding, gchar * data, gint data_size, case ID3V2_ENCODING_ISO8859: for (text_pos = 0; text_pos < data_size; text_pos++) { if (data[text_pos] == 0) { - parse_insert_string_field ("ISO-8859-1", data + prev, + parse_insert_string_field (encoding, data + prev, text_pos - prev + 1, fields); prev = text_pos + 1; } } if (data_size - prev > 0 && data[prev] != 0x00) { - parse_insert_string_field ("ISO-8859-1", data + prev, + parse_insert_string_field (encoding, data + prev, data_size - prev, fields); } @@ -735,34 +774,24 @@ parse_split_strings (guint8 encoding, gchar * data, gint data_size, case ID3V2_ENCODING_UTF8: for (prev = 0, text_pos = 0; text_pos < data_size; text_pos++) { if (data[text_pos] == '\0') { - parse_insert_string_field ("UTF-8", data + prev, + parse_insert_string_field (encoding, data + prev, text_pos - prev + 1, fields); prev = text_pos + 1; } } if (data_size - prev > 0 && data[prev] != 0x00) { - parse_insert_string_field ("UTF-8", data + prev, + parse_insert_string_field (encoding, data + prev, data_size - prev, fields); } break; case ID3V2_ENCODING_UTF16: case ID3V2_ENCODING_UTF16BE: { - const gchar *in_encode; - - if (encoding == ID3V2_ENCODING_UTF16) - in_encode = "UTF-16"; - else - in_encode = "UTF-16BE"; - /* Find '\0\0' terminator */ for (text_pos = 0; text_pos < data_size - 1; text_pos += 2) { if (data[text_pos] == '\0' && data[text_pos + 1] == '\0') { - if (has_utf16_bom (data + prev, &in_encode)) { - prev += 2; /* skip BOM */ - } /* found a delimiter */ - parse_insert_string_field (in_encode, data + prev, + parse_insert_string_field (encoding, data + prev, text_pos - prev + 2, fields); text_pos++; /* Advance to the 2nd NULL terminator */ prev = text_pos + 1; @@ -771,11 +800,8 @@ parse_split_strings (guint8 encoding, gchar * data, gint data_size, } if (data_size - prev > 1 && (data[prev] != 0x00 || data[prev + 1] != 0x00)) { - if (has_utf16_bom (data + prev, &in_encode)) { - prev += 2; /* skip BOM */ - } /* There were 2 or more non-null chars left, convert those too */ - parse_insert_string_field (in_encode, data + prev, + parse_insert_string_field (encoding, data + prev, data_size - prev, fields); } break; |