summaryrefslogtreecommitdiffstats
path: root/gst/deinterlace/tvtime
diff options
context:
space:
mode:
Diffstat (limited to 'gst/deinterlace/tvtime')
-rw-r--r--gst/deinterlace/tvtime/greedy.c488
-rw-r--r--gst/deinterlace/tvtime/greedyh.asm250
-rw-r--r--gst/deinterlace/tvtime/greedyh.c420
-rw-r--r--gst/deinterlace/tvtime/greedyhmacros.h75
-rw-r--r--gst/deinterlace/tvtime/linear.c214
-rw-r--r--gst/deinterlace/tvtime/linearblend.c231
-rw-r--r--gst/deinterlace/tvtime/mmx.h723
-rw-r--r--gst/deinterlace/tvtime/plugins.h54
-rw-r--r--gst/deinterlace/tvtime/scalerbob.c74
-rw-r--r--gst/deinterlace/tvtime/sse.h992
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp.c211
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/SearchLoop0A.inc15
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/SearchLoopBottom.inc174
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/SearchLoopEdgeA.inc11
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/SearchLoopEdgeA8.inc12
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA.inc10
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA2.inc5
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA6.inc11
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddAH.inc10
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddAH2.inc5
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/SearchLoopTop.inc254
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/SearchLoopVA.inc6
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/SearchLoopVAH.inc6
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/StrangeBob.inc435
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/TomsMoCompAll.inc241
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/TomsMoCompAll2.inc243
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/WierdBob.inc286
-rw-r--r--gst/deinterlace/tvtime/tomsmocomp/tomsmocompmacros.h164
-rw-r--r--gst/deinterlace/tvtime/vfir.c187
-rw-r--r--gst/deinterlace/tvtime/weave.c82
-rw-r--r--gst/deinterlace/tvtime/weavebff.c88
-rw-r--r--gst/deinterlace/tvtime/weavetff.c88
-rw-r--r--gst/deinterlace/tvtime/x86-64_macros.inc82
33 files changed, 6147 insertions, 0 deletions
diff --git a/gst/deinterlace/tvtime/greedy.c b/gst/deinterlace/tvtime/greedy.c
new file mode 100644
index 00000000..293d82fa
--- /dev/null
+++ b/gst/deinterlace/tvtime/greedy.c
@@ -0,0 +1,488 @@
+/*
+ *
+ * GStreamer
+ * Copyright (c) 2000 Tom Barry All rights reserved.
+ * mmx.h port copyright (c) 2002 Billy Biggs <vektor@dumbterm.net>.
+ *
+ * Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Relicensed for GStreamer from GPL to LGPL with permit from Tom Barry
+ * and Billy Biggs.
+ * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "_stdint.h"
+
+#include "gstdeinterlace.h"
+#include <string.h>
+
+#define GST_TYPE_DEINTERLACE_METHOD_GREEDY_L (gst_deinterlace_method_greedy_l_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_GREEDY_L(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L))
+#define GST_IS_DEINTERLACE_METHOD_GREEDY_L_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L))
+#define GST_DEINTERLACE_METHOD_GREEDY_L_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L, GstDeinterlaceMethodGreedyLClass))
+#define GST_DEINTERLACE_METHOD_GREEDY_L(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L, GstDeinterlaceMethodGreedyL))
+#define GST_DEINTERLACE_METHOD_GREEDY_L_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L, GstDeinterlaceMethodGreedyLClass))
+#define GST_DEINTERLACE_METHOD_GREEDY_L_CAST(obj) ((GstDeinterlaceMethodGreedyL*)(obj))
+
+GType gst_deinterlace_method_greedy_l_get_type (void);
+
+typedef struct
+{
+ GstDeinterlaceMethod parent;
+
+ guint max_comb;
+} GstDeinterlaceMethodGreedyL;
+
+typedef struct
+{
+ GstDeinterlaceMethodClass parent_class;
+ void (*scanline) (GstDeinterlaceMethodGreedyL * self, uint8_t * L2,
+ uint8_t * L1, uint8_t * L3, uint8_t * L2P, uint8_t * Dest, int size);
+} GstDeinterlaceMethodGreedyLClass;
+
+// This is a simple lightweight DeInterlace method that uses little CPU time
+// but gives very good results for low or intermedite motion.
+// It defers frames by one field, but that does not seem to produce noticeable
+// lip sync problems.
+//
+// The method used is to take either the older or newer weave pixel depending
+// upon which give the smaller comb factor, and then clip to avoid large damage
+// when wrong.
+//
+// I'd intended this to be part of a larger more elaborate method added to
+// Blended Clip but this give too good results for the CPU to ignore here.
+
+static inline void
+deinterlace_greedy_packed422_scanline_c (GstDeinterlaceMethodGreedyL * self,
+ uint8_t * m0, uint8_t * t1,
+ uint8_t * b1, uint8_t * m2, uint8_t * output, int width)
+{
+ int avg, l2_diff, lp2_diff, max, min, best;
+ guint max_comb = self->max_comb;
+
+ // L2 == m0
+ // L1 == t1
+ // L3 == b1
+ // LP2 == m2
+
+ while (width--) {
+ avg = (*t1 + *b1) / 2;
+
+ l2_diff = ABS (*m0 - avg);
+ lp2_diff = ABS (*m2 - avg);
+
+ if (l2_diff > lp2_diff)
+ best = *m2;
+ else
+ best = *m0;
+
+ max = MAX (*t1, *b1);
+ min = MIN (*t1, *b1);
+
+ if (max < 256 - max_comb)
+ max += max_comb;
+ else
+ max = 255;
+
+ if (min > max_comb)
+ min -= max_comb;
+ else
+ min = 0;
+
+ *output = CLAMP (best, min, max);
+
+ // Advance to the next set of pixels.
+ output += 1;
+ m0 += 1;
+ t1 += 1;
+ b1 += 1;
+ m2 += 1;
+ }
+}
+
+#ifdef BUILD_X86_ASM
+#include "mmx.h"
+static void
+deinterlace_greedy_packed422_scanline_mmx (GstDeinterlaceMethodGreedyL * self,
+ uint8_t * m0, uint8_t * t1,
+ uint8_t * b1, uint8_t * m2, uint8_t * output, int width)
+{
+ mmx_t MaxComb;
+ mmx_t ShiftMask;
+
+ // How badly do we let it weave? 0-255
+ MaxComb.ub[0] = self->max_comb;
+ MaxComb.ub[1] = self->max_comb;
+ MaxComb.ub[2] = self->max_comb;
+ MaxComb.ub[3] = self->max_comb;
+ MaxComb.ub[4] = self->max_comb;
+ MaxComb.ub[5] = self->max_comb;
+ MaxComb.ub[6] = self->max_comb;
+ MaxComb.ub[7] = self->max_comb;
+
+ ShiftMask.ub[0] = 0x7f;
+ ShiftMask.ub[1] = 0x7f;
+ ShiftMask.ub[2] = 0x7f;
+ ShiftMask.ub[3] = 0x7f;
+ ShiftMask.ub[4] = 0x7f;
+ ShiftMask.ub[5] = 0x7f;
+ ShiftMask.ub[6] = 0x7f;
+ ShiftMask.ub[7] = 0x7f;
+
+ // L2 == m0
+ // L1 == t1
+ // L3 == b1
+ // LP2 == m2
+
+ movq_m2r (MaxComb, mm6);
+
+ for (; width > 7; width -= 8) {
+ movq_m2r (*t1, mm1); // L1
+ movq_m2r (*m0, mm2); // L2
+ movq_m2r (*b1, mm3); // L3
+ movq_m2r (*m2, mm0); // LP2
+
+ // average L1 and L3 leave result in mm4
+ movq_r2r (mm1, mm4); // L1
+ movq_r2r (mm3, mm5); // L3
+ psrlw_i2r (1, mm4); // L1/2
+ pand_m2r (ShiftMask, mm4);
+ psrlw_i2r (1, mm5); // L3/2
+ pand_m2r (ShiftMask, mm5);
+ paddusb_r2r (mm5, mm4); // (L1 + L3) / 2
+
+ // get abs value of possible L2 comb
+ movq_r2r (mm2, mm7); // L2
+ psubusb_r2r (mm4, mm7); // L2 - avg
+ movq_r2r (mm4, mm5); // avg
+ psubusb_r2r (mm2, mm5); // avg - L2
+ por_r2r (mm7, mm5); // abs(avg-L2)
+
+ // get abs value of possible LP2 comb
+ movq_r2r (mm0, mm7); // LP2
+ psubusb_r2r (mm4, mm7); // LP2 - avg
+ psubusb_r2r (mm0, mm4); // avg - LP2
+ por_r2r (mm7, mm4); // abs(avg-LP2)
+
+ // use L2 or LP2 depending upon which makes smaller comb
+ psubusb_r2r (mm5, mm4); // see if it goes to zero
+ psubusb_r2r (mm5, mm5); // 0
+ pcmpeqb_r2r (mm5, mm4); // if (mm4=0) then FF else 0
+ pcmpeqb_r2r (mm4, mm5); // opposite of mm4
+
+ // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
+ pand_r2r (mm2, mm5); // use L2 if mm5 == ff, else 0
+ pand_r2r (mm0, mm4); // use LP2 if mm4 = ff, else 0
+ por_r2r (mm5, mm4); // may the best win
+
+ // Now lets clip our chosen value to be not outside of the range
+ // of the high/low range L1-L3 by more than abs(L1-L3)
+ // This allows some comb but limits the damages and also allows more
+ // detail than a boring oversmoothed clip.
+
+ movq_r2r (mm1, mm2); // copy L1
+ psubusb_r2r (mm3, mm2); // - L3, with saturation
+ paddusb_r2r (mm3, mm2); // now = Max(L1,L3)
+
+ pcmpeqb_r2r (mm7, mm7); // all ffffffff
+ psubusb_r2r (mm1, mm7); // - L1
+ paddusb_r2r (mm7, mm3); // add, may sat at fff..
+ psubusb_r2r (mm7, mm3); // now = Min(L1,L3)
+
+ // allow the value to be above the high or below the low by amt of MaxComb
+ paddusb_r2r (mm6, mm2); // increase max by diff
+ psubusb_r2r (mm6, mm3); // lower min by diff
+
+ psubusb_r2r (mm3, mm4); // best - Min
+ paddusb_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
+
+ pcmpeqb_r2r (mm7, mm7); // all ffffffff
+ psubusb_r2r (mm4, mm7); // - Max(best,Min(best,L3)
+ paddusb_r2r (mm7, mm2); // add may sat at FFF..
+ psubusb_r2r (mm7, mm2); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped
+
+ movq_r2m (mm2, *output); // move in our clipped best
+
+ // Advance to the next set of pixels.
+ output += 8;
+ m0 += 8;
+ t1 += 8;
+ b1 += 8;
+ m2 += 8;
+ }
+ emms ();
+ if (width > 0)
+ deinterlace_greedy_packed422_scanline_c (self, m0, t1, b1, m2, output,
+ width);
+}
+
+#include "sse.h"
+
+static void
+deinterlace_greedy_packed422_scanline_mmxext (GstDeinterlaceMethodGreedyL *
+ self, uint8_t * m0, uint8_t * t1, uint8_t * b1, uint8_t * m2,
+ uint8_t * output, int width)
+{
+ mmx_t MaxComb;
+
+ // How badly do we let it weave? 0-255
+ MaxComb.ub[0] = self->max_comb;
+ MaxComb.ub[1] = self->max_comb;
+ MaxComb.ub[2] = self->max_comb;
+ MaxComb.ub[3] = self->max_comb;
+ MaxComb.ub[4] = self->max_comb;
+ MaxComb.ub[5] = self->max_comb;
+ MaxComb.ub[6] = self->max_comb;
+ MaxComb.ub[7] = self->max_comb;
+
+ // L2 == m0
+ // L1 == t1
+ // L3 == b1
+ // LP2 == m2
+
+ movq_m2r (MaxComb, mm6);
+
+ for (; width > 7; width -= 8) {
+ movq_m2r (*t1, mm1); // L1
+ movq_m2r (*m0, mm2); // L2
+ movq_m2r (*b1, mm3); // L3
+ movq_m2r (*m2, mm0); // LP2
+
+ // average L1 and L3 leave result in mm4
+ movq_r2r (mm1, mm4); // L1
+ pavgb_r2r (mm3, mm4); // (L1 + L3)/2
+
+ // get abs value of possible L2 comb
+ movq_r2r (mm2, mm7); // L2
+ psubusb_r2r (mm4, mm7); // L2 - avg
+ movq_r2r (mm4, mm5); // avg
+ psubusb_r2r (mm2, mm5); // avg - L2
+ por_r2r (mm7, mm5); // abs(avg-L2)
+
+ // get abs value of possible LP2 comb
+ movq_r2r (mm0, mm7); // LP2
+ psubusb_r2r (mm4, mm7); // LP2 - avg
+ psubusb_r2r (mm0, mm4); // avg - LP2
+ por_r2r (mm7, mm4); // abs(avg-LP2)
+
+ // use L2 or LP2 depending upon which makes smaller comb
+ psubusb_r2r (mm5, mm4); // see if it goes to zero
+ pxor_r2r (mm5, mm5); // 0
+ pcmpeqb_r2r (mm5, mm4); // if (mm4=0) then FF else 0
+ pcmpeqb_r2r (mm4, mm5); // opposite of mm4
+
+ // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
+ pand_r2r (mm2, mm5); // use L2 if mm5 == ff, else 0
+ pand_r2r (mm0, mm4); // use LP2 if mm4 = ff, else 0
+ por_r2r (mm5, mm4); // may the best win
+
+ // Now lets clip our chosen value to be not outside of the range
+ // of the high/low range L1-L3 by more than abs(L1-L3)
+ // This allows some comb but limits the damages and also allows more
+ // detail than a boring oversmoothed clip.
+
+ movq_r2r (mm1, mm2); // copy L1
+ pmaxub_r2r (mm3, mm2); // now = Max(L1,L3)
+
+ pminub_r2r (mm1, mm3); // now = Min(L1,L3)
+
+ // allow the value to be above the high or below the low by amt of MaxComb
+ paddusb_r2r (mm6, mm2); // increase max by diff
+ psubusb_r2r (mm6, mm3); // lower min by diff
+
+
+ pmaxub_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
+ pminub_r2r (mm4, mm2); // now = Min( Max(best, Min(L1,L3)), L2 )=L2 clipped
+
+ movq_r2m (mm2, *output); // move in our clipped best
+
+ // Advance to the next set of pixels.
+ output += 8;
+ m0 += 8;
+ t1 += 8;
+ b1 += 8;
+ m2 += 8;
+ }
+ emms ();
+
+ if (width > 0)
+ deinterlace_greedy_packed422_scanline_c (self, m0, t1, b1, m2, output,
+ width);
+}
+
+#endif
+
+static void
+deinterlace_frame_di_greedy (GstDeinterlaceMethod * d_method,
+ GstDeinterlace * object, GstBuffer * outbuf)
+{
+ GstDeinterlaceMethodGreedyL *self =
+ GST_DEINTERLACE_METHOD_GREEDY_L (d_method);
+ GstDeinterlaceMethodGreedyLClass *klass =
+ GST_DEINTERLACE_METHOD_GREEDY_L_GET_CLASS (self);
+ int InfoIsOdd = 0;
+ int Line;
+ unsigned int Pitch = object->field_stride;
+ unsigned char *L1; // ptr to Line1, of 3
+ unsigned char *L2; // ptr to Line2, the weave line
+ unsigned char *L3; // ptr to Line3
+
+ unsigned char *L2P; // ptr to prev Line2
+ unsigned char *Dest = GST_BUFFER_DATA (outbuf);
+
+ // copy first even line no matter what, and the first odd line if we're
+ // processing an EVEN field. (note diff from other deint rtns.)
+
+ if (object->field_history[object->history_count - 1].flags ==
+ PICTURE_INTERLACED_BOTTOM) {
+ InfoIsOdd = 1;
+
+ L1 = GST_BUFFER_DATA (object->field_history[object->history_count - 2].buf);
+ L2 = GST_BUFFER_DATA (object->field_history[object->history_count - 1].buf);
+ L3 = L1 + Pitch;
+ L2P =
+ GST_BUFFER_DATA (object->field_history[object->history_count - 3].buf);
+
+ // copy first even line
+ oil_memcpy (Dest, L1, object->row_stride);
+ Dest += object->row_stride;
+ } else {
+ InfoIsOdd = 0;
+ L1 = GST_BUFFER_DATA (object->field_history[object->history_count - 2].buf);
+ L2 = GST_BUFFER_DATA (object->field_history[object->history_count -
+ 1].buf) + Pitch;
+ L3 = L1 + Pitch;
+ L2P =
+ GST_BUFFER_DATA (object->field_history[object->history_count - 3].buf) +
+ Pitch;
+
+ // copy first even line
+ oil_memcpy (Dest, GST_BUFFER_DATA (object->field_history[0].buf),
+ object->row_stride);
+ Dest += object->row_stride;
+ // then first odd line
+ oil_memcpy (Dest, L1, object->row_stride);
+ Dest += object->row_stride;
+ }
+
+ for (Line = 0; Line < (object->field_height - 1); ++Line) {
+ klass->scanline (self, L2, L1, L3, L2P, Dest, object->row_stride);
+ Dest += object->row_stride;
+ oil_memcpy (Dest, L3, object->row_stride);
+ Dest += object->row_stride;
+
+ L1 += Pitch;
+ L2 += Pitch;
+ L3 += Pitch;
+ L2P += Pitch;
+ }
+
+ if (InfoIsOdd) {
+ oil_memcpy (Dest, L2, object->row_stride);
+ }
+}
+
+
+G_DEFINE_TYPE (GstDeinterlaceMethodGreedyL, gst_deinterlace_method_greedy_l,
+ GST_TYPE_DEINTERLACE_METHOD);
+
+enum
+{
+ ARG_0,
+ ARG_MAX_COMB
+};
+
+static void
+gst_deinterlace_method_greedy_l_set_property (GObject * object, guint prop_id,
+ const GValue * value, GParamSpec * pspec)
+{
+ GstDeinterlaceMethodGreedyL *self = GST_DEINTERLACE_METHOD_GREEDY_L (object);
+
+ switch (prop_id) {
+ case ARG_MAX_COMB:
+ self->max_comb = g_value_get_uint (value);
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+ }
+}
+
+static void
+gst_deinterlace_method_greedy_l_get_property (GObject * object, guint prop_id,
+ GValue * value, GParamSpec * pspec)
+{
+ GstDeinterlaceMethodGreedyL *self = GST_DEINTERLACE_METHOD_GREEDY_L (object);
+
+ switch (prop_id) {
+ case ARG_MAX_COMB:
+ g_value_set_uint (value, self->max_comb);
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+ }
+}
+
+static void
+gst_deinterlace_method_greedy_l_class_init (GstDeinterlaceMethodGreedyLClass *
+ klass)
+{
+ GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+ GObjectClass *gobject_class = (GObjectClass *) klass;
+#ifdef BUILD_X86_ASM
+ guint cpu_flags = oil_cpu_get_flags ();
+#endif
+
+ gobject_class->set_property = gst_deinterlace_method_greedy_l_set_property;
+ gobject_class->get_property = gst_deinterlace_method_greedy_l_get_property;
+
+ g_object_class_install_property (gobject_class, ARG_MAX_COMB,
+ g_param_spec_uint ("max-comb",
+ "Max comb",
+ "Max Comb", 0, 255, 15, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)
+ );
+
+ dim_class->fields_required = 4;
+ dim_class->deinterlace_frame = deinterlace_frame_di_greedy;
+ dim_class->name = "Motion Adaptive: Simple Detection";
+ dim_class->nick = "greedyl";
+ dim_class->latency = 1;
+
+#ifdef BUILD_X86_ASM
+ if (cpu_flags & OIL_IMPL_FLAG_MMXEXT) {
+ klass->scanline = deinterlace_greedy_packed422_scanline_mmxext;
+ } else if (cpu_flags & OIL_IMPL_FLAG_MMX) {
+ klass->scanline = deinterlace_greedy_packed422_scanline_mmx;
+ } else {
+ klass->scanline = deinterlace_greedy_packed422_scanline_c;
+ }
+#else
+ klass->scanline = deinterlace_greedy_packed422_scanline_c;
+#endif
+}
+
+static void
+gst_deinterlace_method_greedy_l_init (GstDeinterlaceMethodGreedyL * self)
+{
+ self->max_comb = 15;
+}
diff --git a/gst/deinterlace/tvtime/greedyh.asm b/gst/deinterlace/tvtime/greedyh.asm
new file mode 100644
index 00000000..86e97c58
--- /dev/null
+++ b/gst/deinterlace/tvtime/greedyh.asm
@@ -0,0 +1,250 @@
+/*
+ *
+ * GStreamer
+ * Copyright (c) 2001 Tom Barry. All rights reserved.
+ * Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+
+/*
+ * Relicensed for GStreamer from GPL to LGPL with permit from Tom Barry.
+ * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
+ */
+
+
+#include "x86-64_macros.inc"
+
+void
+FUNCT_NAME (GstDeinterlaceMethodGreedyH *self, uint8_t * L1, uint8_t * L2, uint8_t * L3, uint8_t * L2P,
+ uint8_t * Dest, int size)
+{
+
+ // in tight loop some vars are accessed faster in local storage
+ int64_t YMask = 0x00ff00ff00ff00ffull; // to keep only luma
+ int64_t UVMask = 0xff00ff00ff00ff00ull; // to keep only chroma
+ int64_t ShiftMask = 0xfefefefefefefefeull; // to avoid shifting chroma to luma
+ int64_t QW256 = 0x0100010001000100ull; // 4 256's
+ int64_t MaxComb;
+ int64_t MotionThreshold;
+ int64_t MotionSense;
+ int64_t i;
+ long LoopCtr;
+ long oldbx;
+
+ int64_t QW256B;
+ int64_t LastAvg = 0; //interp value from left qword
+
+ // FIXME: Use C implementation if the width is not a multiple of 4
+ // Do something more optimal later
+ if (size % 8 != 0)
+ greedyDScaler_C (self, L1, L2, L3, L2P, Dest, size);
+
+ // Set up our two parms that are actually evaluated for each pixel
+ i = self->max_comb;
+ MaxComb =
+ i << 56 | i << 48 | i << 40 | i << 32 | i << 24 | i << 16 | i << 8 | i;
+
+ i = self->motion_threshold; // scale to range of 0-257
+ MotionThreshold = i << 48 | i << 32 | i << 16 | i | UVMask;
+
+ i = self->motion_sense; // scale to range of 0-257
+ MotionSense = i << 48 | i << 32 | i << 16 | i;
+
+ i = 0xffffffff - 256;
+ QW256B = i << 48 | i << 32 | i << 16 | i; // save a couple instr on PMINSW instruct.
+
+ LoopCtr = size / 8 - 1; // there are LineLength / 8 qwords per line but do 1 less, adj at end of loop
+
+ // For ease of reading, the comments below assume that we're operating on an odd
+ // field (i.e., that InfoIsOdd is true). Assume the obvious for even lines..
+ __asm__ __volatile__ (
+ // save ebx (-fPIC)
+ MOVX " %%" XBX ", %[oldbx]\n\t"
+ MOVX " %[L1], %%" XAX "\n\t"
+ LEAX " 8(%%" XAX "), %%" XBX "\n\t" // next qword needed by DJR
+ MOVX " %[L3], %%" XCX "\n\t"
+ SUBX " %%" XAX ", %%" XCX "\n\t" // carry L3 addr as an offset
+ MOVX " %[L2P], %%" XDX "\n\t"
+ MOVX " %[L2], %%" XSI "\n\t"
+ MOVX " %[Dest], %%" XDI "\n\t" // DL1 if Odd or DL2 if Even
+
+ ".align 8\n\t"
+ "1:\n\t"
+ "movq (%%" XSI "), %%mm0\n\t" // L2 - the newest weave pixel value
+ "movq (%%" XAX "), %%mm1\n\t" // L1 - the top pixel
+ "movq (%%" XDX "), %%mm2\n\t" // L2P - the prev weave pixel
+ "movq (%%" XAX ", %%" XCX "), %%mm3\n\t" // L3, next odd row
+ "movq %%mm1, %%mm6\n\t" // L1 - get simple single pixel interp
+
+ // pavgb mm6, mm3 // use macro below
+ V_PAVGB ("%%mm6", "%%mm3", "%%mm4", "%[ShiftMask]")
+
+ // DJR - Diagonal Jaggie Reduction
+ // In the event that we are going to use an average (Bob) pixel we do not want a jagged
+ // stair step effect. To combat this we avg in the 2 horizontally adjacen pixels into the
+ // interpolated Bob mix. This will do horizontal smoothing for only the Bob'd pixels.
+
+ "movq %[LastAvg], %%mm4\n\t" // the bob value from prev qword in row
+ "movq %%mm6, %[LastAvg]\n\t" // save for next pass
+ "psrlq $48, %%mm4\n\t" // right justify 1 pixel
+ "movq %%mm6, %%mm7\n\t" // copy of simple bob pixel
+ "psllq $16, %%mm7\n\t" // left justify 3 pixels
+ "por %%mm7, %%mm4\n\t" // and combine
+ "movq (%%" XBX "), %%mm5\n\t" // next horiz qword from L1
+ // pavgb mm5, qword ptr[ebx+ecx] // next horiz qword from L3, use macro below
+
+ V_PAVGB ("%%mm5", "(%%" XBX ",%%" XCX ")", "%%mm7", "%[ShiftMask]")
+ "psllq $48, %%mm5\n\t" // left just 1 pixel
+ "movq %%mm6, %%mm7\n\t" // another copy of simple bob pixel
+ "psrlq $16, %%mm7\n\t" // right just 3 pixels
+ "por %%mm7, %%mm5\n\t" // combine
+ // pavgb mm4, mm5 // avg of forward and prev by 1 pixel, use macro
+ V_PAVGB ("%%mm4", "%%mm5", "%%mm5", "%[ShiftMask]") // mm5 gets modified if MMX
+ // pavgb mm6, mm4 // avg of center and surround interp vals, use macro
+ V_PAVGB ("%%mm6", "%%mm4", "%%mm7", "%[ShiftMask]")
+
+ // Don't do any more averaging than needed for mmx. It hurts performance and causes rounding errors.
+#ifndef IS_MMX
+ // pavgb mm4, mm6 // 1/4 center, 3/4 adjacent
+ V_PAVGB ("%%mm4", "%%mm6", "%%mm7", "%[ShiftMask]")
+ // pavgb mm6, mm4 // 3/8 center, 5/8 adjacent
+ V_PAVGB ("%%mm6", "%%mm4", "%%mm7", "%[ShiftMask]")
+#endif
+
+ // get abs value of possible L2 comb
+ "movq %%mm6, %%mm4\n\t" // work copy of interp val
+ "movq %%mm2, %%mm7\n\t" // L2
+ "psubusb %%mm4, %%mm7\n\t" // L2 - avg
+ "movq %%mm4, %%mm5\n\t" // avg
+ "psubusb %%mm2, %%mm5\n\t" // avg - L2
+ "por %%mm7, %%mm5\n\t" // abs(avg-L2)
+
+ // get abs value of possible L2P comb
+ "movq %%mm0, %%mm7\n\t" // L2P
+ "psubusb %%mm4, %%mm7\n\t" // L2P - avg
+ "psubusb %%mm0, %%mm4\n\t" // avg - L2P
+ "por %%mm7, %%mm4\n\t" // abs(avg-L2P)
+
+ // use L2 or L2P depending upon which makes smaller comb
+ "psubusb %%mm5, %%mm4\n\t" // see if it goes to zero
+ "psubusb %%mm5, %%mm5\n\t" // 0
+ "pcmpeqb %%mm5, %%mm4\n\t" // if (mm4=0) then FF else 0
+ "pcmpeqb %%mm4, %%mm5\n\t" // opposite of mm4
+
+ // if Comb(L2P) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
+ "pand %%mm2, %%mm5\n\t" // use L2 if mm5 == ff, else 0
+ "pand %%mm0, %%mm4\n\t" // use L2P if mm4 = ff, else 0
+ "por %%mm5, %%mm4\n\t" // may the best win
+
+ // Inventory: at this point we have the following values:
+ // mm0 = L2P (or L2)
+ // mm1 = L1
+ // mm2 = L2 (or L2P)
+ // mm3 = L3
+ // mm4 = the best of L2,L2P weave pixel, base upon comb
+ // mm6 = the avg interpolated value, if we need to use it
+ // Let's measure movement, as how much the weave pixel has changed
+
+ "movq %%mm2, %%mm7\n\t"
+ "psubusb %%mm0, %%mm2\n\t"
+ "psubusb %%mm7, %%mm0\n\t"
+ "por %%mm2, %%mm0\n\t" // abs value of change, used later
+
+ // Now lets clip our chosen value to be not outside of the range
+ // of the high/low range L1-L3 by more than MaxComb.
+ // This allows some comb but limits the damages and also allows more
+ // detail than a boring oversmoothed clip.
+
+ "movq %%mm1, %%mm2\n\t" // copy L1
+ // pmaxub mm2, mm3 // use macro
+ V_PMAXUB ("%%mm2", "%%mm3") // now = Max(L1,L3)
+ "movq %%mm1, %%mm5\n\t" // copy L1
+ // pminub mm5, mm3 // now = Min(L1,L3), use macro
+ V_PMINUB ("%%mm5", "%%mm3", "%%mm7")
+
+ // allow the value to be above the high or below the low by amt of MaxComb
+ "psubusb %[MaxComb], %%mm5\n\t" // lower min by diff
+ "paddusb %[MaxComb], %%mm2\n\t" // increase max by diff
+ // pmaxub mm4, mm5 // now = Max(best,Min(L1,L3) use macro
+ V_PMAXUB ("%%mm4", "%%mm5")
+ // pminub mm4, mm2 // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped
+ V_PMINUB ("%%mm4", "%%mm2", "%%mm7")
+
+ // Blend weave pixel with bob pixel, depending on motion val in mm0
+ "psubusb %[MotionThreshold], %%mm0\n\t" // test Threshold, clear chroma change >>>??
+ "pmullw %[MotionSense], %%mm0\n\t" // mul by user factor, keep low 16 bits
+ "movq %[QW256], %%mm7\n\t"
+#ifdef IS_MMXEXT
+ "pminsw %%mm7, %%mm0\n\t" // max = 256
+#else
+ "paddusw %[QW256B], %%mm0\n\t" // add, may sat at fff..
+ "psubusw %[QW256B], %%mm0\n\t" // now = Min(L1,256)
+#endif
+ "psubusw %%mm0, %%mm7\n\t" // so the 2 sum to 256, weighted avg
+ "movq %%mm4, %%mm2\n\t" // save weave chroma info before trashing
+ "pand %[YMask], %%mm4\n\t" // keep only luma from calc'd value
+ "pmullw %%mm7, %%mm4\n\t" // use more weave for less motion
+ "pand %[YMask], %%mm6\n\t" // keep only luma from calc'd value
+ "pmullw %%mm0, %%mm6\n\t" // use more bob for large motion
+ "paddusw %%mm6, %%mm4\n\t" // combine
+ "psrlw $8, %%mm4\n\t" // div by 256 to get weighted avg
+ // chroma comes from weave pixel
+ "pand %[UVMask], %%mm2\n\t" // keep chroma
+ "por %%mm4, %%mm2\n\t" // and combine
+ V_MOVNTQ ("(%%" XDI ")", "%%mm2") // move in our clipped best, use macro
+ // bump ptrs and loop
+ LEAX " 8(%%" XAX "), %%" XAX "\n\t"
+ LEAX " 8(%%" XBX "), %%" XBX "\n\t"
+ LEAX " 8(%%" XDX "), %%" XDX "\n\t"
+ LEAX " 8(%%" XDI "), %%" XDI "\n\t"
+ LEAX " 8(%%" XSI "), %%" XSI "\n\t"
+ DECX " %[LoopCtr]\n\t"
+
+ "jg 1b\n\t" // loop if not to last line
+ // note P-III default assumes backward branches taken
+ "jl 1f\n\t" // done
+ MOVX " %%" XAX ", %%" XBX "\n\t" // sharpness lookahead 1 byte only, be wrong on 1
+ "jmp 1b\n\t"
+
+ "1:\n\t"
+ MOVX " %[oldbx], %%" XBX "\n\t"
+ "emms\n\t": /* no outputs */
+
+ :[LastAvg] "m" (LastAvg),
+ [L1] "m" (L1),
+ [L3] "m" (L3),
+ [L2P] "m" (L2P),
+ [L2] "m" (L2),
+ [Dest] "m" (Dest),
+ [ShiftMask] "m" (ShiftMask),
+ [MaxComb] "m" (MaxComb),
+ [MotionThreshold] "m" (MotionThreshold),
+ [MotionSense] "m" (MotionSense),
+ [QW256B] "m" (QW256B),
+ [YMask] "m" (YMask),
+ [UVMask] "m" (UVMask),
+ [LoopCtr] "m" (LoopCtr),
+ [QW256] "m" (QW256),
+ [oldbx] "m" (oldbx)
+ : XAX, XCX, XDX, XSI, XDI,
+ "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
+#ifdef __MMX__
+ "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
+#endif
+ "memory", "cc");
+}
diff --git a/gst/deinterlace/tvtime/greedyh.c b/gst/deinterlace/tvtime/greedyh.c
new file mode 100644
index 00000000..5d050ce0
--- /dev/null
+++ b/gst/deinterlace/tvtime/greedyh.c
@@ -0,0 +1,420 @@
+/*
+ *
+ * GStreamer
+ * Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
+ * Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Relicensed for GStreamer from GPL to LGPL with permit from Billy Biggs.
+ * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "greedyhmacros.h"
+
+#include <stdlib.h>
+#include "_stdint.h"
+#include <string.h>
+
+#include "gst/gst.h"
+#include "plugins.h"
+#include "gstdeinterlace.h"
+
+#define GST_TYPE_DEINTERLACE_METHOD_GREEDY_H (gst_deinterlace_method_greedy_h_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_GREEDY_H(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_H))
+#define GST_IS_DEINTERLACE_METHOD_GREEDY_H_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_GREEDY_H))
+#define GST_DEINTERLACE_METHOD_GREEDY_H_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_H, GstDeinterlaceMethodGreedyHClass))
+#define GST_DEINTERLACE_METHOD_GREEDY_H(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_H, GstDeinterlaceMethodGreedyH))
+#define GST_DEINTERLACE_METHOD_GREEDY_H_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_GREEDY_H, GstDeinterlaceMethodGreedyHClass))
+#define GST_DEINTERLACE_METHOD_GREEDY_H_CAST(obj) ((GstDeinterlaceMethodGreedyH*)(obj))
+
+GType gst_deinterlace_method_greedy_h_get_type (void);
+
+typedef struct
+{
+ GstDeinterlaceMethod parent;
+
+ guint max_comb, motion_threshold, motion_sense;
+} GstDeinterlaceMethodGreedyH;
+
+typedef struct
+{
+ GstDeinterlaceMethodClass parent_class;
+ void (*scanline) (GstDeinterlaceMethodGreedyH * self, uint8_t * L2,
+ uint8_t * L1, uint8_t * L3, uint8_t * L2P, uint8_t * Dest, int size);
+} GstDeinterlaceMethodGreedyHClass;
+
+void
+greedyDScaler_C (GstDeinterlaceMethodGreedyH * self, uint8_t * L1, uint8_t * L2,
+ uint8_t * L3, uint8_t * L2P, uint8_t * Dest, int size)
+{
+ int Pos;
+ uint8_t l1_l, l1_1_l, l3_l, l3_1_l;
+ uint8_t l1_c, l1_1_c, l3_c, l3_1_c;
+ uint8_t avg_l, avg_c, avg_l_1, avg_c_1;
+ uint8_t avg_l__1 = 0, avg_c__1 = 0;
+ uint8_t avg_s_l, avg_s_c;
+ uint8_t avg_sc_l, avg_sc_c;
+ uint8_t best_l, best_c;
+ uint16_t mov_l;
+ uint8_t out_l, out_c;
+ uint8_t l2_l, l2_c, lp2_l, lp2_c;
+ uint8_t l2_l_diff, l2_c_diff, lp2_l_diff, lp2_c_diff;
+ uint8_t min_l, min_c, max_l, max_c;
+ guint max_comb = self->max_comb;
+ guint motion_sense = self->motion_sense;
+ guint motion_threshold = self->motion_threshold;
+
+ for (Pos = 0; Pos < size; Pos += 2) {
+ l1_l = L1[0];
+ l1_c = L1[1];
+ l3_l = L3[0];
+ l3_c = L3[1];
+
+ if (Pos == size - 1) {
+ l1_1_l = l1_l;
+ l1_1_c = l1_c;
+ l3_1_l = l3_l;
+ l3_1_c = l3_c;
+ } else {
+ l1_1_l = L1[2];
+ l1_1_c = L1[3];
+ l3_1_l = L3[2];
+ l3_1_c = L3[3];
+ }
+
+ /* Average of L1 and L3 */
+ avg_l = (l1_l + l3_l) / 2;
+ avg_c = (l1_c + l3_c) / 2;
+
+ if (Pos == 0) {
+ avg_l__1 = avg_l;
+ avg_c__1 = avg_c;
+ }
+
+ /* Average of next L1 and next L3 */
+ avg_l_1 = (l1_1_l + l3_1_l) / 2;
+ avg_c_1 = (l1_1_c + l3_1_c) / 2;
+
+ /* Calculate average of one pixel forward and previous */
+ avg_s_l = (avg_l__1 + avg_l_1) / 2;
+ avg_s_c = (avg_c__1 + avg_c_1) / 2;
+
+ /* Calculate average of center and surrounding pixels */
+ avg_sc_l = (avg_l + avg_s_l) / 2;
+ avg_sc_c = (avg_c + avg_s_c) / 2;
+
+ /* move forward */
+ avg_l__1 = avg_l;
+ avg_c__1 = avg_c;
+
+ /* Get best L2/L2P, i.e. least diff from above average */
+ l2_l = L2[0];
+ l2_c = L2[1];
+ lp2_l = L2P[0];
+ lp2_c = L2P[1];
+
+ l2_l_diff = ABS (l2_l - avg_sc_l);
+ l2_c_diff = ABS (l2_c - avg_sc_c);
+
+ lp2_l_diff = ABS (lp2_l - avg_sc_l);
+ lp2_c_diff = ABS (lp2_c - avg_sc_c);
+
+ if (l2_l_diff > lp2_l_diff)
+ best_l = lp2_l;
+ else
+ best_l = l2_l;
+
+ if (l2_c_diff > lp2_c_diff)
+ best_c = lp2_c;
+ else
+ best_c = l2_c;
+
+ /* Clip this best L2/L2P by L1/L3 and allow to differ by GreedyMaxComb */
+ max_l = MAX (l1_l, l3_l);
+ min_l = MIN (l1_l, l3_l);
+
+ if (max_l < 256 - max_comb)
+ max_l += max_comb;
+ else
+ max_l = 255;
+
+ if (min_l > max_comb)
+ min_l -= max_comb;
+ else
+ min_l = 0;
+
+ max_c = MAX (l1_c, l3_c);
+ min_c = MIN (l1_c, l3_c);
+
+ if (max_c < 256 - max_comb)
+ max_c += max_comb;
+ else
+ max_c = 255;
+
+ if (min_c > max_comb)
+ min_c -= max_comb;
+ else
+ min_c = 0;
+
+ out_l = CLAMP (best_l, min_l, max_l);
+ out_c = CLAMP (best_c, min_c, max_c);
+
+ /* Do motion compensation for luma, i.e. how much
+ * the weave pixel differs */
+ mov_l = ABS (l2_l - lp2_l);
+ if (mov_l > motion_threshold)
+ mov_l -= motion_threshold;
+ else
+ mov_l = 0;
+
+ mov_l = mov_l * motion_sense;
+ if (mov_l > 256)
+ mov_l = 256;
+
+ /* Weighted sum on clipped weave pixel and average */
+ out_l = (out_l * (256 - mov_l) + avg_sc_l * mov_l) / 256;
+
+ Dest[0] = out_l;
+ Dest[1] = out_c;
+
+ Dest += 2;
+ L1 += 2;
+ L2 += 2;
+ L3 += 2;
+ L2P += 2;
+ }
+}
+
+#ifdef BUILD_X86_ASM
+
+#define IS_MMXEXT
+#define SIMD_TYPE MMXEXT
+#define FUNCT_NAME greedyDScaler_MMXEXT
+#include "greedyh.asm"
+#undef SIMD_TYPE
+#undef IS_MMXEXT
+#undef FUNCT_NAME
+
+#define IS_3DNOW
+#define SIMD_TYPE 3DNOW
+#define FUNCT_NAME greedyDScaler_3DNOW
+#include "greedyh.asm"
+#undef SIMD_TYPE
+#undef IS_3DNOW
+#undef FUNCT_NAME
+
+#define IS_MMX
+#define SIMD_TYPE MMX
+#define FUNCT_NAME greedyDScaler_MMX
+#include "greedyh.asm"
+#undef SIMD_TYPE
+#undef IS_MMX
+#undef FUNCT_NAME
+
+#endif
+
+static void
+deinterlace_frame_di_greedyh (GstDeinterlaceMethod * d_method,
+ GstDeinterlace * object, GstBuffer * outbuf)
+{
+ GstDeinterlaceMethodGreedyH *self =
+ GST_DEINTERLACE_METHOD_GREEDY_H (d_method);
+ GstDeinterlaceMethodGreedyHClass *klass =
+ GST_DEINTERLACE_METHOD_GREEDY_H_GET_CLASS (self);
+ int InfoIsOdd = 0;
+ int Line;
+ unsigned int Pitch = object->field_stride;
+
+ unsigned char *L1; // ptr to Line1, of 3
+ unsigned char *L2; // ptr to Line2, the weave line
+ unsigned char *L3; // ptr to Line3
+
+ unsigned char *L2P; // ptr to prev Line2
+ unsigned char *Dest = GST_BUFFER_DATA (outbuf);
+
+ // copy first even line no matter what, and the first odd line if we're
+ // processing an EVEN field. (note diff from other deint rtns.)
+
+ if (object->field_history[object->history_count - 1].flags ==
+ PICTURE_INTERLACED_BOTTOM) {
+ InfoIsOdd = 1;
+
+ L1 = GST_BUFFER_DATA (object->field_history[object->history_count - 2].buf);
+ L2 = GST_BUFFER_DATA (object->field_history[object->history_count - 1].buf);
+ L3 = L1 + Pitch;
+ L2P =
+ GST_BUFFER_DATA (object->field_history[object->history_count - 3].buf);
+
+ // copy first even line
+ oil_memcpy (Dest, L1, object->row_stride);
+ Dest += object->row_stride;
+ } else {
+ InfoIsOdd = 0;
+ L1 = GST_BUFFER_DATA (object->field_history[object->history_count - 2].buf);
+ L2 = GST_BUFFER_DATA (object->field_history[object->history_count -
+ 1].buf) + Pitch;
+ L3 = L1 + Pitch;
+ L2P =
+ GST_BUFFER_DATA (object->field_history[object->history_count - 3].buf) +
+ Pitch;
+
+ // copy first even line
+ oil_memcpy (Dest, GST_BUFFER_DATA (object->field_history[0].buf),
+ object->row_stride);
+ Dest += object->row_stride;
+ // then first odd line
+ oil_memcpy (Dest, L1, object->row_stride);
+ Dest += object->row_stride;
+ }
+
+ for (Line = 0; Line < (object->field_height - 1); ++Line) {
+ klass->scanline (self, L1, L2, L3, L2P, Dest, object->row_stride);
+ Dest += object->row_stride;
+ oil_memcpy (Dest, L3, object->row_stride);
+ Dest += object->row_stride;
+
+ L1 += Pitch;
+ L2 += Pitch;
+ L3 += Pitch;
+ L2P += Pitch;
+ }
+
+ if (InfoIsOdd) {
+ oil_memcpy (Dest, L2, object->row_stride);
+ }
+}
+
+G_DEFINE_TYPE (GstDeinterlaceMethodGreedyH, gst_deinterlace_method_greedy_h,
+ GST_TYPE_DEINTERLACE_METHOD);
+
+enum
+{
+ ARG_0,
+ ARG_MAX_COMB,
+ ARG_MOTION_THRESHOLD,
+ ARG_MOTION_SENSE
+};
+
+static void
+gst_deinterlace_method_greedy_h_set_property (GObject * object, guint prop_id,
+ const GValue * value, GParamSpec * pspec)
+{
+ GstDeinterlaceMethodGreedyH *self = GST_DEINTERLACE_METHOD_GREEDY_H (object);
+
+ switch (prop_id) {
+ case ARG_MAX_COMB:
+ self->max_comb = g_value_get_uint (value);
+ break;
+ case ARG_MOTION_THRESHOLD:
+ self->motion_threshold = g_value_get_uint (value);
+ break;
+ case ARG_MOTION_SENSE:
+ self->motion_sense = g_value_get_uint (value);
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+ }
+}
+
+static void
+gst_deinterlace_method_greedy_h_get_property (GObject * object, guint prop_id,
+ GValue * value, GParamSpec * pspec)
+{
+ GstDeinterlaceMethodGreedyH *self = GST_DEINTERLACE_METHOD_GREEDY_H (object);
+
+ switch (prop_id) {
+ case ARG_MAX_COMB:
+ g_value_set_uint (value, self->max_comb);
+ break;
+ case ARG_MOTION_THRESHOLD:
+ g_value_set_uint (value, self->motion_threshold);
+ break;
+ case ARG_MOTION_SENSE:
+ g_value_set_uint (value, self->motion_sense);
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+ }
+}
+
+static void
+gst_deinterlace_method_greedy_h_class_init (GstDeinterlaceMethodGreedyHClass *
+ klass)
+{
+ GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+ GObjectClass *gobject_class = (GObjectClass *) klass;
+#ifdef BUILD_X86_ASM
+ guint cpu_flags = oil_cpu_get_flags ();
+#endif
+
+ gobject_class->set_property = gst_deinterlace_method_greedy_h_set_property;
+ gobject_class->get_property = gst_deinterlace_method_greedy_h_get_property;
+
+ g_object_class_install_property (gobject_class, ARG_MAX_COMB,
+ g_param_spec_uint ("max-comb",
+ "Max comb",
+ "Max Comb", 0, 255, 5, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)
+ );
+
+ g_object_class_install_property (gobject_class, ARG_MOTION_THRESHOLD,
+ g_param_spec_uint ("motion-threshold",
+ "Motion Threshold",
+ "Motion Threshold",
+ 0, 255, 25, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)
+ );
+
+ g_object_class_install_property (gobject_class, ARG_MOTION_SENSE,
+ g_param_spec_uint ("motion-sense",
+ "Motion Sense",
+ "Motion Sense",
+ 0, 255, 30, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)
+ );
+
+ dim_class->fields_required = 4;
+ dim_class->deinterlace_frame = deinterlace_frame_di_greedyh;
+ dim_class->name = "Motion Adaptive: Advanced Detection";
+ dim_class->nick = "greedyh";
+ dim_class->latency = 1;
+
+#ifdef BUILD_X86_ASM
+ if (cpu_flags & OIL_IMPL_FLAG_MMXEXT) {
+ klass->scanline = greedyDScaler_MMXEXT;
+ } else if (cpu_flags & OIL_IMPL_FLAG_3DNOW) {
+ klass->scanline = greedyDScaler_3DNOW;
+ } else if (cpu_flags & OIL_IMPL_FLAG_MMX) {
+ klass->scanline = greedyDScaler_MMX;
+ } else {
+ klass->scanline = greedyDScaler_C;
+ }
+#else
+ klass->scanline = greedyDScaler_C;
+#endif
+}
+
+static void
+gst_deinterlace_method_greedy_h_init (GstDeinterlaceMethodGreedyH * self)
+{
+ self->max_comb = 5;
+ self->motion_threshold = 25;
+ self->motion_sense = 30;
+}
diff --git a/gst/deinterlace/tvtime/greedyhmacros.h b/gst/deinterlace/tvtime/greedyhmacros.h
new file mode 100644
index 00000000..0386c28e
--- /dev/null
+++ b/gst/deinterlace/tvtime/greedyhmacros.h
@@ -0,0 +1,75 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2001 Tom Barry. All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+//
+// This file is subject to the terms of the GNU General Public License as
+// published by the Free Software Foundation. A copy of this license is
+// included with this software distribution in the file COPYING. If you
+// do not have a copy, you may obtain a copy by writing to the Free
+// Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+//
+// This software is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details
+//
+/////////////////////////////////////////////////////////////////////////////
+
+// Define a few macros for CPU dependent instructions.
+// I suspect I don't really understand how the C macro preprocessor works but
+// this seems to get the job done. // TRB 7/01
+
+// BEFORE USING THESE YOU MUST SET:
+
+// #define SIMD_TYPE MMXEXT (or MMX or 3DNOW)
+
+// some macros for pavgb instruction
+// V_PAVGB(mmr1, mmr2, mmr work register, smask) mmr2 may = mmrw if you can trash it
+
+#define V_PAVGB_MMX(mmr1, mmr2, mmrw, smask) \
+ "movq "mmr2", "mmrw"\n\t" \
+ "pand "smask", "mmrw"\n\t" \
+ "psrlw $1, "mmrw"\n\t" \
+ "pand "smask", "mmr1"\n\t" \
+ "psrlw $1, "mmr1"\n\t" \
+ "paddusb "mmrw", "mmr1"\n\t"
+#define V_PAVGB_MMXEXT(mmr1, mmr2, mmrw, smask) "pavgb "mmr2", "mmr1"\n\t"
+#define V_PAVGB_3DNOW(mmr1, mmr2, mmrw, smask) "pavgusb "mmr2", "mmr1"\n\t"
+#define V_PAVGB(mmr1, mmr2, mmrw, smask) V_PAVGB2(mmr1, mmr2, mmrw, smask, SIMD_TYPE)
+#define V_PAVGB2(mmr1, mmr2, mmrw, smask, simd_type) V_PAVGB3(mmr1, mmr2, mmrw, smask, simd_type)
+#define V_PAVGB3(mmr1, mmr2, mmrw, smask, simd_type) V_PAVGB_##simd_type(mmr1, mmr2, mmrw, smask)
+
+// some macros for pmaxub instruction
+#define V_PMAXUB_MMX(mmr1, mmr2) \
+ "psubusb "mmr2", "mmr1"\n\t" \
+ "paddusb "mmr2", "mmr1"\n\t"
+#define V_PMAXUB_MMXEXT(mmr1, mmr2) "pmaxub "mmr2", "mmr1"\n\t"
+#define V_PMAXUB_3DNOW(mmr1, mmr2) V_PMAXUB_MMX(mmr1, mmr2) // use MMX version
+#define V_PMAXUB(mmr1, mmr2) V_PMAXUB2(mmr1, mmr2, SIMD_TYPE)
+#define V_PMAXUB2(mmr1, mmr2, simd_type) V_PMAXUB3(mmr1, mmr2, simd_type)
+#define V_PMAXUB3(mmr1, mmr2, simd_type) V_PMAXUB_##simd_type(mmr1, mmr2)
+
+// some macros for pminub instruction
+// V_PMINUB(mmr1, mmr2, mmr work register) mmr2 may NOT = mmrw
+#define V_PMINUB_MMX(mmr1, mmr2, mmrw) \
+ "pcmpeqb "mmrw", "mmrw"\n\t" \
+ "psubusb "mmr2", "mmrw"\n\t" \
+ "paddusb "mmrw", "mmr1"\n\t" \
+ "psubusb "mmrw", "mmr1"\n\t"
+#define V_PMINUB_MMXEXT(mmr1, mmr2, mmrw) "pminub "mmr2", "mmr1"\n\t"
+#define V_PMINUB_3DNOW(mmr1, mmr2, mmrw) V_PMINUB_MMX(mmr1, mmr2, mmrw) // use MMX version
+#define V_PMINUB(mmr1, mmr2, mmrw) V_PMINUB2(mmr1, mmr2, mmrw, SIMD_TYPE)
+#define V_PMINUB2(mmr1, mmr2, mmrw, simd_type) V_PMINUB3(mmr1, mmr2, mmrw, simd_type)
+#define V_PMINUB3(mmr1, mmr2, mmrw, simd_type) V_PMINUB_##simd_type(mmr1, mmr2, mmrw)
+
+// some macros for movntq instruction
+// V_MOVNTQ(mmr1, mmr2)
+#define V_MOVNTQ_MMX(mmr1, mmr2) "movq "mmr2", "mmr1"\n\t"
+#define V_MOVNTQ_3DNOW(mmr1, mmr2) "movq "mmr2", "mmr1"\n\t"
+#define V_MOVNTQ_MMXEXT(mmr1, mmr2) "movntq "mmr2", "mmr1"\n\t"
+#define V_MOVNTQ(mmr1, mmr2) V_MOVNTQ2(mmr1, mmr2, SIMD_TYPE)
+#define V_MOVNTQ2(mmr1, mmr2, simd_type) V_MOVNTQ3(mmr1, mmr2, simd_type)
+#define V_MOVNTQ3(mmr1, mmr2, simd_type) V_MOVNTQ_##simd_type(mmr1, mmr2)
+
+// end of macros
+
diff --git a/gst/deinterlace/tvtime/linear.c b/gst/deinterlace/tvtime/linear.c
new file mode 100644
index 00000000..8a13d8a1
--- /dev/null
+++ b/gst/deinterlace/tvtime/linear.c
@@ -0,0 +1,214 @@
+/**
+ * Copyright (C) 2002 Billy Biggs <vektor@dumbterm.net>.
+ * Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "_stdint.h"
+#include "gstdeinterlace.h"
+#include <string.h>
+
+#define GST_TYPE_DEINTERLACE_METHOD_LINEAR (gst_deinterlace_method_linear_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_LINEAR(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_LINEAR))
+#define GST_IS_DEINTERLACE_METHOD_LINEAR_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_LINEAR))
+#define GST_DEINTERLACE_METHOD_LINEAR_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_LINEAR, GstDeinterlaceMethodLinearClass))
+#define GST_DEINTERLACE_METHOD_LINEAR(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_LINEAR, GstDeinterlaceMethodLinear))
+#define GST_DEINTERLACE_METHOD_LINEAR_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_LINEAR, GstDeinterlaceMethodLinearClass))
+#define GST_DEINTERLACE_METHOD_LINEAR_CAST(obj) ((GstDeinterlaceMethodLinear*)(obj))
+
+GType gst_deinterlace_method_linear_get_type (void);
+
+typedef GstDeinterlaceSimpleMethod GstDeinterlaceMethodLinear;
+
+typedef GstDeinterlaceSimpleMethodClass GstDeinterlaceMethodLinearClass;
+
+static void
+deinterlace_scanline_linear_c (GstDeinterlaceMethod * self,
+ GstDeinterlace * parent, guint8 * out,
+ GstDeinterlaceScanlineData * scanlines, gint width)
+{
+ gint i;
+
+ width *= 2;
+ for (i = 0; i < width; i++)
+ out[i] = (scanlines->t0[i] + scanlines->b0[i]) / 2;
+}
+
+#ifdef BUILD_X86_ASM
+#include "mmx.h"
+static void
+deinterlace_scanline_linear_mmx (GstDeinterlaceMethod * self,
+ GstDeinterlace * parent, guint8 * out,
+ GstDeinterlaceScanlineData * scanlines, gint width)
+{
+ const mmx_t shiftmask = { 0xfefffefffefffeffULL }; /* To avoid shifting chroma to luma. */
+ int i;
+ guint8 *bot = scanlines->b0, *top = scanlines->t0;
+
+ for (i = width / 16; i; --i) {
+ movq_m2r (*bot, mm0);
+ movq_m2r (*top, mm1);
+ movq_m2r (*(bot + 8), mm2);
+ movq_m2r (*(top + 8), mm3);
+ movq_m2r (*(bot + 16), mm4);
+ movq_m2r (*(top + 16), mm5);
+ movq_m2r (*(bot + 24), mm6);
+ movq_m2r (*(top + 24), mm7);
+ pand_m2r (shiftmask, mm0);
+ pand_m2r (shiftmask, mm1);
+ pand_m2r (shiftmask, mm2);
+ pand_m2r (shiftmask, mm3);
+ pand_m2r (shiftmask, mm4);
+ pand_m2r (shiftmask, mm5);
+ pand_m2r (shiftmask, mm6);
+ pand_m2r (shiftmask, mm7);
+ psrlw_i2r (1, mm0);
+ psrlw_i2r (1, mm1);
+ psrlw_i2r (1, mm2);
+ psrlw_i2r (1, mm3);
+ psrlw_i2r (1, mm4);
+ psrlw_i2r (1, mm5);
+ psrlw_i2r (1, mm6);
+ psrlw_i2r (1, mm7);
+ paddb_r2r (mm1, mm0);
+ paddb_r2r (mm3, mm2);
+ paddb_r2r (mm5, mm4);
+ paddb_r2r (mm7, mm6);
+ movq_r2m (mm0, *out);
+ movq_r2m (mm2, *(out + 8));
+ movq_r2m (mm4, *(out + 16));
+ movq_r2m (mm6, *(out + 24));
+ out += 32;
+ top += 32;
+ bot += 32;
+ }
+ width = (width & 0xf);
+
+ for (i = width / 4; i; --i) {
+ movq_m2r (*bot, mm0);
+ movq_m2r (*top, mm1);
+ pand_m2r (shiftmask, mm0);
+ pand_m2r (shiftmask, mm1);
+ psrlw_i2r (1, mm0);
+ psrlw_i2r (1, mm1);
+ paddb_r2r (mm1, mm0);
+ movq_r2m (mm0, *out);
+ out += 8;
+ top += 8;
+ bot += 8;
+ }
+ width = width & 0x7;
+
+ /* Handle last few pixels. */
+ for (i = width * 2; i; --i) {
+ *out++ = ((*top++) + (*bot++)) >> 1;
+ }
+
+ emms ();
+}
+
+#include "sse.h"
+static void
+deinterlace_scanline_linear_mmxext (GstDeinterlaceMethod * self,
+ GstDeinterlace * parent, guint8 * out,
+ GstDeinterlaceScanlineData * scanlines, gint width)
+{
+ gint i;
+ guint8 *bot = scanlines->b0, *top = scanlines->t0;
+
+ for (i = width / 16; i; --i) {
+ movq_m2r (*bot, mm0);
+ movq_m2r (*top, mm1);
+ movq_m2r (*(bot + 8), mm2);
+ movq_m2r (*(top + 8), mm3);
+ movq_m2r (*(bot + 16), mm4);
+ movq_m2r (*(top + 16), mm5);
+ movq_m2r (*(bot + 24), mm6);
+ movq_m2r (*(top + 24), mm7);
+ pavgb_r2r (mm1, mm0);
+ pavgb_r2r (mm3, mm2);
+ pavgb_r2r (mm5, mm4);
+ pavgb_r2r (mm7, mm6);
+ movntq_r2m (mm0, *out);
+ movntq_r2m (mm2, *(out + 8));
+ movntq_r2m (mm4, *(out + 16));
+ movntq_r2m (mm6, *(out + 24));
+ out += 32;
+ top += 32;
+ bot += 32;
+ }
+ width = (width & 0xf);
+
+ for (i = width / 4; i; --i) {
+ movq_m2r (*bot, mm0);
+ movq_m2r (*top, mm1);
+ pavgb_r2r (mm1, mm0);
+ movntq_r2m (mm0, *out);
+ out += 8;
+ top += 8;
+ bot += 8;
+ }
+ width = width & 0x7;
+
+ /* Handle last few pixels. */
+ for (i = width * 2; i; --i) {
+ *out++ = ((*top++) + (*bot++)) >> 1;
+ }
+
+ emms ();
+}
+
+#endif
+
+G_DEFINE_TYPE (GstDeinterlaceMethodLinear, gst_deinterlace_method_linear,
+ GST_TYPE_DEINTERLACE_SIMPLE_METHOD);
+
+static void
+gst_deinterlace_method_linear_class_init (GstDeinterlaceMethodLinearClass *
+ klass)
+{
+ GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+ GstDeinterlaceSimpleMethodClass *dism_class =
+ (GstDeinterlaceSimpleMethodClass *) klass;
+#ifdef BUILD_X86_ASM
+ guint cpu_flags = oil_cpu_get_flags ();
+#endif
+
+ dim_class->fields_required = 1;
+ dim_class->name = "Television: Full resolution";
+ dim_class->nick = "linear";
+ dim_class->latency = 0;
+
+ dism_class->interpolate_scanline = deinterlace_scanline_linear_c;
+
+#ifdef BUILD_X86_ASM
+ if (cpu_flags & OIL_IMPL_FLAG_MMXEXT) {
+ dism_class->interpolate_scanline = deinterlace_scanline_linear_mmxext;
+ } else if (cpu_flags & OIL_IMPL_FLAG_MMXEXT) {
+ dism_class->interpolate_scanline = deinterlace_scanline_linear_mmx;
+ }
+#endif
+}
+
+static void
+gst_deinterlace_method_linear_init (GstDeinterlaceMethodLinear * self)
+{
+}
diff --git a/gst/deinterlace/tvtime/linearblend.c b/gst/deinterlace/tvtime/linearblend.c
new file mode 100644
index 00000000..5ecffd6e
--- /dev/null
+++ b/gst/deinterlace/tvtime/linearblend.c
@@ -0,0 +1,231 @@
+/**
+ * Linear blend deinterlacing plugin. The idea for this algorithm came
+ * from the linear blend deinterlacer which originated in the mplayer
+ * sources.
+ *
+ * Copyright (C) 2002 Billy Biggs <vektor@dumbterm.net>.
+ * Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "_stdint.h"
+#include "gstdeinterlace.h"
+#include <string.h>
+
+#define GST_TYPE_DEINTERLACE_METHOD_LINEAR_BLEND (gst_deinterlace_method_linear_blend_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_LINEAR_BLEND(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_LINEAR_BLEND))
+#define GST_IS_DEINTERLACE_METHOD_LINEAR_BLEND_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_LINEAR_BLEND))
+#define GST_DEINTERLACE_METHOD_LINEAR_BLEND_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_LINEAR_BLEND, GstDeinterlaceMethodLinearBlendClass))
+#define GST_DEINTERLACE_METHOD_LINEAR_BLEND(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_LINEAR_BLEND, GstDeinterlaceMethodLinearBlend))
+#define GST_DEINTERLACE_METHOD_LINEAR_BLEND_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_LINEAR_BLEND, GstDeinterlaceMethodLinearBlendClass))
+#define GST_DEINTERLACE_METHOD_LINEAR_BLEND_CAST(obj) ((GstDeinterlaceMethodLinearBlend*)(obj))
+
+GType gst_deinterlace_method_linear_blend_get_type (void);
+
+typedef GstDeinterlaceSimpleMethod GstDeinterlaceMethodLinearBlend;
+
+typedef GstDeinterlaceSimpleMethodClass GstDeinterlaceMethodLinearBlendClass;
+
+
+static inline void
+deinterlace_scanline_linear_blend_c (GstDeinterlaceMethod * self,
+ GstDeinterlace * parent, guint8 * out,
+ GstDeinterlaceScanlineData * scanlines, gint width)
+{
+ guint8 *t0 = scanlines->t0;
+ guint8 *b0 = scanlines->b0;
+ guint8 *m1 = scanlines->m1;
+
+ width *= 2;
+
+ while (width--) {
+ *out++ = (*t0++ + *b0++ + (*m1++ << 1)) >> 2;
+ }
+}
+
+static inline void
+deinterlace_scanline_linear_blend2_c (GstDeinterlaceMethod * self,
+ GstDeinterlace * parent, guint8 * out,
+ GstDeinterlaceScanlineData * scanlines, gint width)
+{
+ guint8 *m0 = scanlines->m0;
+ guint8 *t1 = scanlines->t1;
+ guint8 *b1 = scanlines->b1;
+
+ width *= 2;
+ while (width--) {
+ *out++ = (*t1++ + *b1++ + (*m0++ << 1)) >> 2;
+ }
+}
+
+#ifdef BUILD_X86_ASM
+#include "mmx.h"
+static inline void
+deinterlace_scanline_linear_blend_mmx (GstDeinterlaceMethod * self,
+ GstDeinterlace * parent, guint8 * out,
+ GstDeinterlaceScanlineData * scanlines, gint width)
+{
+ guint8 *t0 = scanlines->t0;
+ guint8 *b0 = scanlines->b0;
+ guint8 *m1 = scanlines->m1;
+ gint i;
+
+ // Get width in bytes.
+ width *= 2;
+ i = width / 8;
+ width -= i * 8;
+
+ pxor_r2r (mm7, mm7);
+ while (i--) {
+ movd_m2r (*t0, mm0);
+ movd_m2r (*b0, mm1);
+ movd_m2r (*m1, mm2);
+
+ movd_m2r (*(t0 + 4), mm3);
+ movd_m2r (*(b0 + 4), mm4);
+ movd_m2r (*(m1 + 4), mm5);
+
+ punpcklbw_r2r (mm7, mm0);
+ punpcklbw_r2r (mm7, mm1);
+ punpcklbw_r2r (mm7, mm2);
+
+ punpcklbw_r2r (mm7, mm3);
+ punpcklbw_r2r (mm7, mm4);
+ punpcklbw_r2r (mm7, mm5);
+
+ psllw_i2r (1, mm2);
+ psllw_i2r (1, mm5);
+ paddw_r2r (mm0, mm2);
+ paddw_r2r (mm3, mm5);
+ paddw_r2r (mm1, mm2);
+ paddw_r2r (mm4, mm5);
+ psrlw_i2r (2, mm2);
+ psrlw_i2r (2, mm5);
+ packuswb_r2r (mm2, mm2);
+ packuswb_r2r (mm5, mm5);
+
+ movd_r2m (mm2, *out);
+ movd_r2m (mm5, *(out + 4));
+ out += 8;
+ t0 += 8;
+ b0 += 8;
+ m1 += 8;
+ }
+ while (width--) {
+ *out++ = (*t0++ + *b0++ + (*m1++ << 1)) >> 2;
+ }
+ emms ();
+}
+
+static inline void
+deinterlace_scanline_linear_blend2_mmx (GstDeinterlaceMethod * self,
+ GstDeinterlace * parent, guint8 * out,
+ GstDeinterlaceScanlineData * scanlines, gint width)
+{
+ guint8 *m0 = scanlines->m0;
+ guint8 *t1 = scanlines->t1;
+ guint8 *b1 = scanlines->b1;
+ gint i;
+
+ // Get width in bytes.
+ width *= 2;
+ i = width / 8;
+ width -= i * 8;
+
+ pxor_r2r (mm7, mm7);
+ while (i--) {
+ movd_m2r (*t1, mm0);
+ movd_m2r (*b1, mm1);
+ movd_m2r (*m0, mm2);
+
+ movd_m2r (*(t1 + 4), mm3);
+ movd_m2r (*(b1 + 4), mm4);
+ movd_m2r (*(m0 + 4), mm5);
+
+ punpcklbw_r2r (mm7, mm0);
+ punpcklbw_r2r (mm7, mm1);
+ punpcklbw_r2r (mm7, mm2);
+
+ punpcklbw_r2r (mm7, mm3);
+ punpcklbw_r2r (mm7, mm4);
+ punpcklbw_r2r (mm7, mm5);
+
+ psllw_i2r (1, mm2);
+ psllw_i2r (1, mm5);
+ paddw_r2r (mm0, mm2);
+ paddw_r2r (mm3, mm5);
+ paddw_r2r (mm1, mm2);
+ paddw_r2r (mm4, mm5);
+ psrlw_i2r (2, mm2);
+ psrlw_i2r (2, mm5);
+ packuswb_r2r (mm2, mm2);
+ packuswb_r2r (mm5, mm5);
+
+ movd_r2m (mm2, *out);
+ movd_r2m (mm5, *(out + 4));
+ out += 8;
+ t1 += 8;
+ b1 += 8;
+ m0 += 8;
+ }
+ while (width--) {
+ *out++ = (*t1++ + *b1++ + (*m0++ << 1)) >> 2;
+ }
+ emms ();
+}
+
+#endif
+
+G_DEFINE_TYPE (GstDeinterlaceMethodLinearBlend,
+ gst_deinterlace_method_linear_blend, GST_TYPE_DEINTERLACE_SIMPLE_METHOD);
+
+static void
+ gst_deinterlace_method_linear_blend_class_init
+ (GstDeinterlaceMethodLinearBlendClass * klass)
+{
+ GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+ GstDeinterlaceSimpleMethodClass *dism_class =
+ (GstDeinterlaceSimpleMethodClass *) klass;
+#ifdef BUILD_X86_ASM
+ guint cpu_flags = oil_cpu_get_flags ();
+#endif
+
+ dim_class->fields_required = 2;
+ dim_class->name = "Blur: Temporal";
+ dim_class->nick = "linearblend";
+ dim_class->latency = 0;
+
+ dism_class->interpolate_scanline = deinterlace_scanline_linear_blend_c;
+ dism_class->copy_scanline = deinterlace_scanline_linear_blend2_c;
+
+#ifdef BUILD_X86_ASM
+ if (cpu_flags & OIL_IMPL_FLAG_MMX) {
+ dism_class->interpolate_scanline = deinterlace_scanline_linear_blend_mmx;
+ dism_class->copy_scanline = deinterlace_scanline_linear_blend2_mmx;
+ }
+#endif
+}
+
+static void
+gst_deinterlace_method_linear_blend_init (GstDeinterlaceMethodLinearBlend *
+ self)
+{
+}
diff --git a/gst/deinterlace/tvtime/mmx.h b/gst/deinterlace/tvtime/mmx.h
new file mode 100644
index 00000000..3627e61b
--- /dev/null
+++ b/gst/deinterlace/tvtime/mmx.h
@@ -0,0 +1,723 @@
+/* mmx.h
+
+ MultiMedia eXtensions GCC interface library for IA32.
+
+ To use this library, simply include this header file
+ and compile with GCC. You MUST have inlining enabled
+ in order for mmx_ok() to work; this can be done by
+ simply using -O on the GCC command line.
+
+ Compiling with -DMMX_TRACE will cause detailed trace
+ output to be sent to stderr for each mmx operation.
+ This adds lots of code, and obviously slows execution to
+ a crawl, but can be very useful for debugging.
+
+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
+ EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
+ LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ AND FITNESS FOR ANY PARTICULAR PURPOSE.
+
+ 1997-98 by H. Dietz and R. Fisher
+
+ History:
+ 97-98* R.Fisher Early versions
+ 980501 R.Fisher Original Release
+ 980611* H.Dietz Rewrite, correctly implementing inlines, and
+ R.Fisher including direct register accesses.
+ 980616 R.Fisher Release of 980611 as 980616.
+ 980714 R.Fisher Minor corrections to Makefile, etc.
+ 980715 R.Fisher mmx_ok() now prevents optimizer from using
+ clobbered values.
+ mmx_ok() now checks if cpuid instruction is
+ available before trying to use it.
+ 980726* R.Fisher mm_support() searches for AMD 3DNow, Cyrix
+ Extended MMX, and standard MMX. It returns a
+ value which is positive if any of these are
+ supported, and can be masked with constants to
+ see which. mmx_ok() is now a call to this
+ 980726* R.Fisher Added i2r support for shift functions
+ 980919 R.Fisher Fixed AMD extended feature recognition bug.
+ 980921 R.Fisher Added definition/check for _MMX_H.
+ Added "float s[2]" to mmx_t for use with
+ 3DNow and EMMX. So same mmx_t can be used.
+ 981013 R.Fisher Fixed cpuid function 1 bug (looked at wrong reg)
+ Fixed psllq_i2r error in mmxtest.c
+
+ * Unreleased (internal or interim) versions
+
+ Notes:
+ It appears that the latest gas has the pand problem fixed, therefore
+ I'll undefine BROKEN_PAND by default.
+ String compares may be quicker than the multiple test/jumps in vendor
+ test sequence in mmx_ok(), but I'm not concerned with that right now.
+
+ Acknowledgments:
+ Jussi Laako for pointing out the errors ultimately found to be
+ connected to the failure to notify the optimizer of clobbered values.
+ Roger Hardiman for reminding us that CPUID isn't everywhere, and that
+ someone may actually try to use this on a machine without CPUID.
+ Also for suggesting code for checking this.
+ Robert Dale for pointing out the AMD recognition bug.
+ Jimmy Mayfield and Carl Witty for pointing out the Intel recognition
+ bug.
+ Carl Witty for pointing out the psllq_i2r test bug.
+*/
+
+#ifndef _MMX_H
+#define _MMX_H
+
+/*#define MMX_TRACE */
+
+/* Warning: at this writing, the version of GAS packaged
+ with most Linux distributions does not handle the
+ parallel AND operation mnemonic correctly. If the
+ symbol BROKEN_PAND is defined, a slower alternative
+ coding will be used. If execution of mmxtest results
+ in an illegal instruction fault, define this symbol.
+*/
+#undef BROKEN_PAND
+
+
+/* The type of an value that fits in an MMX register
+ (note that long long constant values MUST be suffixed
+ by LL and unsigned long long values by ULL, lest
+ they be truncated by the compiler)
+*/
+typedef union {
+ long long q; /* Quadword (64-bit) value */
+ unsigned long long uq; /* Unsigned Quadword */
+ int d[2]; /* 2 Doubleword (32-bit) values */
+ unsigned int ud[2]; /* 2 Unsigned Doubleword */
+ short w[4]; /* 4 Word (16-bit) values */
+ unsigned short uw[4]; /* 4 Unsigned Word */
+ char b[8]; /* 8 Byte (8-bit) values */
+ unsigned char ub[8]; /* 8 Unsigned Byte */
+ float s[2]; /* Single-precision (32-bit) value */
+} mmx_t;
+
+
+/* Function to test if multimedia instructions are supported...
+*/
+inline extern int
+mm_support(void)
+{
+ /* Returns 1 if MMX instructions are supported,
+ 3 if Cyrix MMX and Extended MMX instructions are supported
+ 5 if AMD MMX and 3DNow! instructions are supported
+ 0 if hardware does not support any of these
+ */
+ register int rval = 0;
+
+ __asm__ __volatile__ (
+ /* See if CPUID instruction is supported ... */
+ /* ... Get copies of EFLAGS into eax and ecx */
+ "pushf\n\t"
+ "popl %%eax\n\t"
+ "movl %%eax, %%ecx\n\t"
+
+ /* ... Toggle the ID bit in one copy and store */
+ /* to the EFLAGS reg */
+ "xorl $0x200000, %%eax\n\t"
+ "push %%eax\n\t"
+ "popf\n\t"
+
+ /* ... Get the (hopefully modified) EFLAGS */
+ "pushf\n\t"
+ "popl %%eax\n\t"
+
+ /* ... Compare and test result */
+ "xorl %%eax, %%ecx\n\t"
+ "testl $0x200000, %%ecx\n\t"
+ "jz NotSupported1\n\t" /* Nothing supported */
+
+
+ /* Get standard CPUID information, and
+ go to a specific vendor section */
+ "movl $0, %%eax\n\t"
+ "cpuid\n\t"
+
+ /* Check for Intel */
+ "cmpl $0x756e6547, %%ebx\n\t"
+ "jne TryAMD\n\t"
+ "cmpl $0x49656e69, %%edx\n\t"
+ "jne TryAMD\n\t"
+ "cmpl $0x6c65746e, %%ecx\n"
+ "jne TryAMD\n\t"
+ "jmp Intel\n\t"
+
+ /* Check for AMD */
+ "\nTryAMD:\n\t"
+ "cmpl $0x68747541, %%ebx\n\t"
+ "jne TryCyrix\n\t"
+ "cmpl $0x69746e65, %%edx\n\t"
+ "jne TryCyrix\n\t"
+ "cmpl $0x444d4163, %%ecx\n"
+ "jne TryCyrix\n\t"
+ "jmp AMD\n\t"
+
+ /* Check for Cyrix */
+ "\nTryCyrix:\n\t"
+ "cmpl $0x69727943, %%ebx\n\t"
+ "jne NotSupported2\n\t"
+ "cmpl $0x736e4978, %%edx\n\t"
+ "jne NotSupported3\n\t"
+ "cmpl $0x64616574, %%ecx\n\t"
+ "jne NotSupported4\n\t"
+ /* Drop through to Cyrix... */
+
+
+ /* Cyrix Section */
+ /* See if extended CPUID is supported */
+ "movl $0x80000000, %%eax\n\t"
+ "cpuid\n\t"
+ "cmpl $0x80000000, %%eax\n\t"
+ "jl MMXtest\n\t" /* Try standard CPUID instead */
+
+ /* Extended CPUID supported, so get extended features */
+ "movl $0x80000001, %%eax\n\t"
+ "cpuid\n\t"
+ "testl $0x00800000, %%eax\n\t" /* Test for MMX */
+ "jz NotSupported5\n\t" /* MMX not supported */
+ "testl $0x01000000, %%eax\n\t" /* Test for Ext'd MMX */
+ "jnz EMMXSupported\n\t"
+ "movl $1, %0:\n\n\t" /* MMX Supported */
+ "jmp Return\n\n"
+ "EMMXSupported:\n\t"
+ "movl $3, %0:\n\n\t" /* EMMX and MMX Supported */
+ "jmp Return\n\t"
+
+
+ /* AMD Section */
+ "AMD:\n\t"
+
+ /* See if extended CPUID is supported */
+ "movl $0x80000000, %%eax\n\t"
+ "cpuid\n\t"
+ "cmpl $0x80000000, %%eax\n\t"
+ "jl MMXtest\n\t" /* Try standard CPUID instead */
+
+ /* Extended CPUID supported, so get extended features */
+ "movl $0x80000001, %%eax\n\t"
+ "cpuid\n\t"
+ "testl $0x00800000, %%edx\n\t" /* Test for MMX */
+ "jz NotSupported6\n\t" /* MMX not supported */
+ "testl $0x80000000, %%edx\n\t" /* Test for 3DNow! */
+ "jnz ThreeDNowSupported\n\t"
+ "movl $1, %0:\n\n\t" /* MMX Supported */
+ "jmp Return\n\n"
+ "ThreeDNowSupported:\n\t"
+ "movl $5, %0:\n\n\t" /* 3DNow! and MMX Supported */
+ "jmp Return\n\t"
+
+
+ /* Intel Section */
+ "Intel:\n\t"
+
+ /* Check for MMX */
+ "MMXtest:\n\t"
+ "movl $1, %%eax\n\t"
+ "cpuid\n\t"
+ "testl $0x00800000, %%edx\n\t" /* Test for MMX */
+ "jz NotSupported7\n\t" /* MMX Not supported */
+ "movl $1, %0:\n\n\t" /* MMX Supported */
+ "jmp Return\n\t"
+
+ /* Nothing supported */
+ "\nNotSupported1:\n\t"
+ "#movl $101, %0:\n\n\t"
+ "\nNotSupported2:\n\t"
+ "#movl $102, %0:\n\n\t"
+ "\nNotSupported3:\n\t"
+ "#movl $103, %0:\n\n\t"
+ "\nNotSupported4:\n\t"
+ "#movl $104, %0:\n\n\t"
+ "\nNotSupported5:\n\t"
+ "#movl $105, %0:\n\n\t"
+ "\nNotSupported6:\n\t"
+ "#movl $106, %0:\n\n\t"
+ "\nNotSupported7:\n\t"
+ "#movl $107, %0:\n\n\t"
+ "movl $0, %0:\n\n\t"
+
+ "Return:\n\t"
+ : "=a" (rval)
+ : /* no input */
+ : "eax", "ebx", "ecx", "edx"
+ );
+
+ /* Return */
+ return(rval);
+}
+
+/* Function to test if mmx instructions are supported...
+*/
+inline extern int
+mmx_ok(void)
+{
+ /* Returns 1 if MMX instructions are supported, 0 otherwise */
+ return ( mm_support() & 0x1 );
+}
+
+
+/* Helper functions for the instruction macros that follow...
+ (note that memory-to-register, m2r, instructions are nearly
+ as efficient as register-to-register, r2r, instructions;
+ however, memory-to-memory instructions are really simulated
+ as a convenience, and are only 1/3 as efficient)
+*/
+#ifdef MMX_TRACE
+
+/* Include the stuff for printing a trace to stderr...
+*/
+
+#include <stdio.h>
+
+#define mmx_i2r(op, imm, reg) \
+ { \
+ mmx_t mmx_trace; \
+ mmx_trace = (imm); \
+ fprintf(stderr, #op "_i2r(" #imm "=0x%016llx, ", mmx_trace.q); \
+ __asm__ __volatile__ ("movq %%" #reg ", %0" \
+ : "=X" (mmx_trace) \
+ : /* nothing */ ); \
+ fprintf(stderr, #reg "=0x%016llx) => ", mmx_trace.q); \
+ __asm__ __volatile__ (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "X" (imm)); \
+ __asm__ __volatile__ ("movq %%" #reg ", %0" \
+ : "=X" (mmx_trace) \
+ : /* nothing */ ); \
+ fprintf(stderr, #reg "=0x%016llx\n", mmx_trace.q); \
+ }
+
+#define mmx_m2r(op, mem, reg) \
+ { \
+ mmx_t mmx_trace; \
+ mmx_trace = (mem); \
+ fprintf(stderr, #op "_m2r(" #mem "=0x%016llx, ", mmx_trace.q); \
+ __asm__ __volatile__ ("movq %%" #reg ", %0" \
+ : "=X" (mmx_trace) \
+ : /* nothing */ ); \
+ fprintf(stderr, #reg "=0x%016llx) => ", mmx_trace.q); \
+ __asm__ __volatile__ (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "X" (mem)); \
+ __asm__ __volatile__ ("movq %%" #reg ", %0" \
+ : "=X" (mmx_trace) \
+ : /* nothing */ ); \
+ fprintf(stderr, #reg "=0x%016llx\n", mmx_trace.q); \
+ }
+
+#define mmx_r2m(op, reg, mem) \
+ { \
+ mmx_t mmx_trace; \
+ __asm__ __volatile__ ("movq %%" #reg ", %0" \
+ : "=X" (mmx_trace) \
+ : /* nothing */ ); \
+ fprintf(stderr, #op "_r2m(" #reg "=0x%016llx, ", mmx_trace.q); \
+ mmx_trace = (mem); \
+ fprintf(stderr, #mem "=0x%016llx) => ", mmx_trace.q); \
+ __asm__ __volatile__ (#op " %%" #reg ", %0" \
+ : "=X" (mem) \
+ : /* nothing */ ); \
+ mmx_trace = (mem); \
+ fprintf(stderr, #mem "=0x%016llx\n", mmx_trace.q); \
+ }
+
+#define mmx_r2r(op, regs, regd) \
+ { \
+ mmx_t mmx_trace; \
+ __asm__ __volatile__ ("movq %%" #regs ", %0" \
+ : "=X" (mmx_trace) \
+ : /* nothing */ ); \
+ fprintf(stderr, #op "_r2r(" #regs "=0x%016llx, ", mmx_trace.q); \
+ __asm__ __volatile__ ("movq %%" #regd ", %0" \
+ : "=X" (mmx_trace) \
+ : /* nothing */ ); \
+ fprintf(stderr, #regd "=0x%016llx) => ", mmx_trace.q); \
+ __asm__ __volatile__ (#op " %" #regs ", %" #regd); \
+ __asm__ __volatile__ ("movq %%" #regd ", %0" \
+ : "=X" (mmx_trace) \
+ : /* nothing */ ); \
+ fprintf(stderr, #regd "=0x%016llx\n", mmx_trace.q); \
+ }
+
+#define mmx_m2m(op, mems, memd) \
+ { \
+ mmx_t mmx_trace; \
+ mmx_trace = (mems); \
+ fprintf(stderr, #op "_m2m(" #mems "=0x%016llx, ", mmx_trace.q); \
+ mmx_trace = (memd); \
+ fprintf(stderr, #memd "=0x%016llx) => ", mmx_trace.q); \
+ __asm__ __volatile__ ("movq %0, %%mm0\n\t" \
+ #op " %1, %%mm0\n\t" \
+ "movq %%mm0, %0" \
+ : "=X" (memd) \
+ : "X" (mems)); \
+ mmx_trace = (memd); \
+ fprintf(stderr, #memd "=0x%016llx\n", mmx_trace.q); \
+ }
+
+#else
+
+/* These macros are a lot simpler without the tracing...
+*/
+
+#define mmx_i2r(op, imm, reg) \
+ __asm__ __volatile__ (#op " $" #imm ", %%" #reg \
+ : /* nothing */ \
+ : /* nothing */);
+
+#define mmx_m2r(op, mem, reg) \
+ __asm__ __volatile__ (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "m" (mem))
+
+#define mmx_r2m(op, reg, mem) \
+ __asm__ __volatile__ (#op " %%" #reg ", %0" \
+ : "=m" (mem) \
+ : /* nothing */ )
+
+#define mmx_r2r(op, regs, regd) \
+ __asm__ __volatile__ (#op " %" #regs ", %" #regd)
+
+#define mmx_m2m(op, mems, memd) \
+ __asm__ __volatile__ ("movq %0, %%mm0\n\t" \
+ #op " %1, %%mm0\n\t" \
+ "movq %%mm0, %0" \
+ : "=m" (memd) \
+ : "m" (mems))
+
+#endif
+
+
+/* 1x64 MOVe Quadword
+ (this is both a load and a store...
+ in fact, it is the only way to store)
+*/
+#define movq_m2r(var, reg) mmx_m2r(movq, var, reg)
+#define movq_r2m(reg, var) mmx_r2m(movq, reg, var)
+#define movq_r2r(regs, regd) mmx_r2r(movq, regs, regd)
+#define movq(vars, vard) \
+ __asm__ __volatile__ ("movq %1, %%mm0\n\t" \
+ "movq %%mm0, %0" \
+ : "=X" (vard) \
+ : "X" (vars))
+
+
+/* 1x32 MOVe Doubleword
+ (like movq, this is both load and store...
+ but is most useful for moving things between
+ mmx registers and ordinary registers)
+*/
+#define movd_m2r(var, reg) mmx_m2r(movd, var, reg)
+#define movd_r2m(reg, var) mmx_r2m(movd, reg, var)
+#define movd_r2r(regs, regd) mmx_r2r(movd, regs, regd)
+#define movd(vars, vard) \
+ __asm__ __volatile__ ("movd %1, %%mm0\n\t" \
+ "movd %%mm0, %0" \
+ : "=X" (vard) \
+ : "X" (vars))
+
+
+/* 2x32, 4x16, and 8x8 Parallel ADDs
+*/
+#define paddd_m2r(var, reg) mmx_m2r(paddd, var, reg)
+#define paddd_r2r(regs, regd) mmx_r2r(paddd, regs, regd)
+#define paddd(vars, vard) mmx_m2m(paddd, vars, vard)
+
+#define paddw_m2r(var, reg) mmx_m2r(paddw, var, reg)
+#define paddw_r2r(regs, regd) mmx_r2r(paddw, regs, regd)
+#define paddw(vars, vard) mmx_m2m(paddw, vars, vard)
+
+#define paddb_m2r(var, reg) mmx_m2r(paddb, var, reg)
+#define paddb_r2r(regs, regd) mmx_r2r(paddb, regs, regd)
+#define paddb(vars, vard) mmx_m2m(paddb, vars, vard)
+
+
+/* 4x16 and 8x8 Parallel ADDs using Saturation arithmetic
+*/
+#define paddsw_m2r(var, reg) mmx_m2r(paddsw, var, reg)
+#define paddsw_r2r(regs, regd) mmx_r2r(paddsw, regs, regd)
+#define paddsw(vars, vard) mmx_m2m(paddsw, vars, vard)
+
+#define paddsb_m2r(var, reg) mmx_m2r(paddsb, var, reg)
+#define paddsb_r2r(regs, regd) mmx_r2r(paddsb, regs, regd)
+#define paddsb(vars, vard) mmx_m2m(paddsb, vars, vard)
+
+
+/* 4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic
+*/
+#define paddusw_m2r(var, reg) mmx_m2r(paddusw, var, reg)
+#define paddusw_r2r(regs, regd) mmx_r2r(paddusw, regs, regd)
+#define paddusw(vars, vard) mmx_m2m(paddusw, vars, vard)
+
+#define paddusb_m2r(var, reg) mmx_m2r(paddusb, var, reg)
+#define paddusb_r2r(regs, regd) mmx_r2r(paddusb, regs, regd)
+#define paddusb(vars, vard) mmx_m2m(paddusb, vars, vard)
+
+
+/* 2x32, 4x16, and 8x8 Parallel SUBs
+*/
+#define psubd_m2r(var, reg) mmx_m2r(psubd, var, reg)
+#define psubd_r2r(regs, regd) mmx_r2r(psubd, regs, regd)
+#define psubd(vars, vard) mmx_m2m(psubd, vars, vard)
+
+#define psubw_m2r(var, reg) mmx_m2r(psubw, var, reg)
+#define psubw_r2r(regs, regd) mmx_r2r(psubw, regs, regd)
+#define psubw(vars, vard) mmx_m2m(psubw, vars, vard)
+
+#define psubb_m2r(var, reg) mmx_m2r(psubb, var, reg)
+#define psubb_r2r(regs, regd) mmx_r2r(psubb, regs, regd)
+#define psubb(vars, vard) mmx_m2m(psubb, vars, vard)
+
+
+/* 4x16 and 8x8 Parallel SUBs using Saturation arithmetic
+*/
+#define psubsw_m2r(var, reg) mmx_m2r(psubsw, var, reg)
+#define psubsw_r2r(regs, regd) mmx_r2r(psubsw, regs, regd)
+#define psubsw(vars, vard) mmx_m2m(psubsw, vars, vard)
+
+#define psubsb_m2r(var, reg) mmx_m2r(psubsb, var, reg)
+#define psubsb_r2r(regs, regd) mmx_r2r(psubsb, regs, regd)
+#define psubsb(vars, vard) mmx_m2m(psubsb, vars, vard)
+
+
+/* 4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic
+*/
+#define psubusw_m2r(var, reg) mmx_m2r(psubusw, var, reg)
+#define psubusw_r2r(regs, regd) mmx_r2r(psubusw, regs, regd)
+#define psubusw(vars, vard) mmx_m2m(psubusw, vars, vard)
+
+#define psubusb_m2r(var, reg) mmx_m2r(psubusb, var, reg)
+#define psubusb_r2r(regs, regd) mmx_r2r(psubusb, regs, regd)
+#define psubusb(vars, vard) mmx_m2m(psubusb, vars, vard)
+
+
+/* 4x16 Parallel MULs giving Low 4x16 portions of results
+*/
+#define pmullw_m2r(var, reg) mmx_m2r(pmullw, var, reg)
+#define pmullw_r2r(regs, regd) mmx_r2r(pmullw, regs, regd)
+#define pmullw(vars, vard) mmx_m2m(pmullw, vars, vard)
+
+
+/* 4x16 Parallel MULs giving High 4x16 portions of results
+*/
+#define pmulhw_m2r(var, reg) mmx_m2r(pmulhw, var, reg)
+#define pmulhw_r2r(regs, regd) mmx_r2r(pmulhw, regs, regd)
+#define pmulhw(vars, vard) mmx_m2m(pmulhw, vars, vard)
+
+
+/* 4x16->2x32 Parallel Mul-ADD
+ (muls like pmullw, then adds adjacent 16-bit fields
+ in the multiply result to make the final 2x32 result)
+*/
+#define pmaddwd_m2r(var, reg) mmx_m2r(pmaddwd, var, reg)
+#define pmaddwd_r2r(regs, regd) mmx_r2r(pmaddwd, regs, regd)
+#define pmaddwd(vars, vard) mmx_m2m(pmaddwd, vars, vard)
+
+
+/* 1x64 bitwise AND
+*/
+#ifdef BROKEN_PAND
+#define pand_m2r(var, reg) \
+ { \
+ mmx_m2r(pandn, (mmx_t) -1LL, reg); \
+ mmx_m2r(pandn, var, reg); \
+ }
+#define pand_r2r(regs, regd) \
+ { \
+ mmx_m2r(pandn, (mmx_t) -1LL, regd); \
+ mmx_r2r(pandn, regs, regd); \
+ }
+#define pand(vars, vard) \
+ { \
+ movq_m2r(vard, mm0); \
+ mmx_m2r(pandn, (mmx_t) -1LL, mm0); \
+ mmx_m2r(pandn, vars, mm0); \
+ movq_r2m(mm0, vard); \
+ }
+#else
+#define pand_m2r(var, reg) mmx_m2r(pand, var, reg)
+#define pand_r2r(regs, regd) mmx_r2r(pand, regs, regd)
+#define pand(vars, vard) mmx_m2m(pand, vars, vard)
+#endif
+
+
+/* 1x64 bitwise AND with Not the destination
+*/
+#define pandn_m2r(var, reg) mmx_m2r(pandn, var, reg)
+#define pandn_r2r(regs, regd) mmx_r2r(pandn, regs, regd)
+#define pandn(vars, vard) mmx_m2m(pandn, vars, vard)
+
+
+/* 1x64 bitwise OR
+*/
+#define por_m2r(var, reg) mmx_m2r(por, var, reg)
+#define por_r2r(regs, regd) mmx_r2r(por, regs, regd)
+#define por(vars, vard) mmx_m2m(por, vars, vard)
+
+
+/* 1x64 bitwise eXclusive OR
+*/
+#define pxor_m2r(var, reg) mmx_m2r(pxor, var, reg)
+#define pxor_r2r(regs, regd) mmx_r2r(pxor, regs, regd)
+#define pxor(vars, vard) mmx_m2m(pxor, vars, vard)
+
+
+/* 2x32, 4x16, and 8x8 Parallel CoMPare for EQuality
+ (resulting fields are either 0 or -1)
+*/
+#define pcmpeqd_m2r(var, reg) mmx_m2r(pcmpeqd, var, reg)
+#define pcmpeqd_r2r(regs, regd) mmx_r2r(pcmpeqd, regs, regd)
+#define pcmpeqd(vars, vard) mmx_m2m(pcmpeqd, vars, vard)
+
+#define pcmpeqw_m2r(var, reg) mmx_m2r(pcmpeqw, var, reg)
+#define pcmpeqw_r2r(regs, regd) mmx_r2r(pcmpeqw, regs, regd)
+#define pcmpeqw(vars, vard) mmx_m2m(pcmpeqw, vars, vard)
+
+#define pcmpeqb_m2r(var, reg) mmx_m2r(pcmpeqb, var, reg)
+#define pcmpeqb_r2r(regs, regd) mmx_r2r(pcmpeqb, regs, regd)
+#define pcmpeqb(vars, vard) mmx_m2m(pcmpeqb, vars, vard)
+
+
+/* 2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than
+ (resulting fields are either 0 or -1)
+*/
+#define pcmpgtd_m2r(var, reg) mmx_m2r(pcmpgtd, var, reg)
+#define pcmpgtd_r2r(regs, regd) mmx_r2r(pcmpgtd, regs, regd)
+#define pcmpgtd(vars, vard) mmx_m2m(pcmpgtd, vars, vard)
+
+#define pcmpgtw_m2r(var, reg) mmx_m2r(pcmpgtw, var, reg)
+#define pcmpgtw_r2r(regs, regd) mmx_r2r(pcmpgtw, regs, regd)
+#define pcmpgtw(vars, vard) mmx_m2m(pcmpgtw, vars, vard)
+
+#define pcmpgtb_m2r(var, reg) mmx_m2r(pcmpgtb, var, reg)
+#define pcmpgtb_r2r(regs, regd) mmx_r2r(pcmpgtb, regs, regd)
+#define pcmpgtb(vars, vard) mmx_m2m(pcmpgtb, vars, vard)
+
+
+/* 1x64, 2x32, and 4x16 Parallel Shift Left Logical
+*/
+#define psllq_i2r(imm, reg) mmx_i2r(psllq, imm, reg)
+#define psllq_m2r(var, reg) mmx_m2r(psllq, var, reg)
+#define psllq_r2r(regs, regd) mmx_r2r(psllq, regs, regd)
+#define psllq(vars, vard) mmx_m2m(psllq, vars, vard)
+
+#define pslld_i2r(imm, reg) mmx_i2r(pslld, imm, reg)
+#define pslld_m2r(var, reg) mmx_m2r(pslld, var, reg)
+#define pslld_r2r(regs, regd) mmx_r2r(pslld, regs, regd)
+#define pslld(vars, vard) mmx_m2m(pslld, vars, vard)
+
+#define psllw_i2r(imm, reg) mmx_i2r(psllw, imm, reg)
+#define psllw_m2r(var, reg) mmx_m2r(psllw, var, reg)
+#define psllw_r2r(regs, regd) mmx_r2r(psllw, regs, regd)
+#define psllw(vars, vard) mmx_m2m(psllw, vars, vard)
+
+
+/* 1x64, 2x32, and 4x16 Parallel Shift Right Logical
+*/
+#define psrlq_i2r(imm, reg) mmx_i2r(psrlq, imm, reg)
+#define psrlq_m2r(var, reg) mmx_m2r(psrlq, var, reg)
+#define psrlq_r2r(regs, regd) mmx_r2r(psrlq, regs, regd)
+#define psrlq(vars, vard) mmx_m2m(psrlq, vars, vard)
+
+#define psrld_i2r(imm, reg) mmx_i2r(psrld, imm, reg)
+#define psrld_m2r(var, reg) mmx_m2r(psrld, var, reg)
+#define psrld_r2r(regs, regd) mmx_r2r(psrld, regs, regd)
+#define psrld(vars, vard) mmx_m2m(psrld, vars, vard)
+
+#define psrlw_i2r(imm, reg) mmx_i2r(psrlw, imm, reg)
+#define psrlw_m2r(var, reg) mmx_m2r(psrlw, var, reg)
+#define psrlw_r2r(regs, regd) mmx_r2r(psrlw, regs, regd)
+#define psrlw(vars, vard) mmx_m2m(psrlw, vars, vard)
+
+
+/* 2x32 and 4x16 Parallel Shift Right Arithmetic
+*/
+#define psrad_i2r(imm, reg) mmx_i2r(psrad, imm, reg)
+#define psrad_m2r(var, reg) mmx_m2r(psrad, var, reg)
+#define psrad_r2r(regs, regd) mmx_r2r(psrad, regs, regd)
+#define psrad(vars, vard) mmx_m2m(psrad, vars, vard)
+
+#define psraw_i2r(imm, reg) mmx_i2r(psraw, imm, reg)
+#define psraw_m2r(var, reg) mmx_m2r(psraw, var, reg)
+#define psraw_r2r(regs, regd) mmx_r2r(psraw, regs, regd)
+#define psraw(vars, vard) mmx_m2m(psraw, vars, vard)
+
+
+/* 2x32->4x16 and 4x16->8x8 PACK and Signed Saturate
+ (packs source and dest fields into dest in that order)
+*/
+#define packssdw_m2r(var, reg) mmx_m2r(packssdw, var, reg)
+#define packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd)
+#define packssdw(vars, vard) mmx_m2m(packssdw, vars, vard)
+
+#define packsswb_m2r(var, reg) mmx_m2r(packsswb, var, reg)
+#define packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd)
+#define packsswb(vars, vard) mmx_m2m(packsswb, vars, vard)
+
+
+/* 4x16->8x8 PACK and Unsigned Saturate
+ (packs source and dest fields into dest in that order)
+*/
+#define packuswb_m2r(var, reg) mmx_m2r(packuswb, var, reg)
+#define packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd)
+#define packuswb(vars, vard) mmx_m2m(packuswb, vars, vard)
+
+
+/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low
+ (interleaves low half of dest with low half of source
+ as padding in each result field)
+*/
+#define punpckldq_m2r(var, reg) mmx_m2r(punpckldq, var, reg)
+#define punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd)
+#define punpckldq(vars, vard) mmx_m2m(punpckldq, vars, vard)
+
+#define punpcklwd_m2r(var, reg) mmx_m2r(punpcklwd, var, reg)
+#define punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd)
+#define punpcklwd(vars, vard) mmx_m2m(punpcklwd, vars, vard)
+
+#define punpcklbw_m2r(var, reg) mmx_m2r(punpcklbw, var, reg)
+#define punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd)
+#define punpcklbw(vars, vard) mmx_m2m(punpcklbw, vars, vard)
+
+
+/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High
+ (interleaves high half of dest with high half of source
+ as padding in each result field)
+*/
+#define punpckhdq_m2r(var, reg) mmx_m2r(punpckhdq, var, reg)
+#define punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd)
+#define punpckhdq(vars, vard) mmx_m2m(punpckhdq, vars, vard)
+
+#define punpckhwd_m2r(var, reg) mmx_m2r(punpckhwd, var, reg)
+#define punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd)
+#define punpckhwd(vars, vard) mmx_m2m(punpckhwd, vars, vard)
+
+#define punpckhbw_m2r(var, reg) mmx_m2r(punpckhbw, var, reg)
+#define punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd)
+#define punpckhbw(vars, vard) mmx_m2m(punpckhbw, vars, vard)
+
+
+/* Empty MMx State
+ (used to clean-up when going from mmx to float use
+ of the registers that are shared by both; note that
+ there is no float-to-mmx operation needed, because
+ only the float tag word info is corruptible)
+*/
+#ifdef MMX_TRACE
+
+#define emms() \
+ { \
+ fprintf(stderr, "emms()\n"); \
+ __asm__ __volatile__ ("emms"); \
+ }
+
+#else
+
+#define emms() __asm__ __volatile__ ("emms")
+
+#endif
+
+#endif
diff --git a/gst/deinterlace/tvtime/plugins.h b/gst/deinterlace/tvtime/plugins.h
new file mode 100644
index 00000000..8fb01af5
--- /dev/null
+++ b/gst/deinterlace/tvtime/plugins.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * GStreamer
+ * Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
+ * Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Relicensed for GStreamer from GPL to LGPL with permit from Billy Biggs.
+ * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
+ */
+
+#ifndef TVTIME_PLUGINS_H_INCLUDED
+#define TVTIME_PLUGINS_H_INCLUDED
+
+#define GST_TYPE_DEINTERLACE_TOMSMOCOMP (gst_deinterlace_method_tomsmocomp_get_type ())
+#define GST_TYPE_DEINTERLACE_GREEDY_H (gst_deinterlace_method_greedy_h_get_type ())
+#define GST_TYPE_DEINTERLACE_GREEDY_L (gst_deinterlace_method_greedy_l_get_type ())
+#define GST_TYPE_DEINTERLACE_VFIR (gst_deinterlace_method_vfir_get_type ())
+#define GST_TYPE_DEINTERLACE_LINEAR (gst_deinterlace_method_linear_get_type ())
+#define GST_TYPE_DEINTERLACE_LINEAR_BLEND (gst_deinterlace_method_linear_blend_get_type ())
+#define GST_TYPE_DEINTERLACE_SCALER_BOB (gst_deinterlace_method_scaler_bob_get_type ())
+#define GST_TYPE_DEINTERLACE_WEAVE (gst_deinterlace_method_weave_get_type ())
+#define GST_TYPE_DEINTERLACE_WEAVE_TFF (gst_deinterlace_method_weave_tff_get_type ())
+#define GST_TYPE_DEINTERLACE_WEAVE_BFF (gst_deinterlace_method_weave_bff_get_type ())
+
+GType gst_deinterlace_method_tomsmocomp_get_type (void);
+GType gst_deinterlace_method_greedy_h_get_type (void);
+GType gst_deinterlace_method_greedy_l_get_type (void);
+GType gst_deinterlace_method_vfir_get_type (void);
+
+GType gst_deinterlace_method_linear_get_type (void);
+GType gst_deinterlace_method_linear_blend_get_type (void);
+GType gst_deinterlace_method_scaler_bob_get_type (void);
+GType gst_deinterlace_method_weave_get_type (void);
+GType gst_deinterlace_method_weave_tff_get_type (void);
+GType gst_deinterlace_method_weave_bff_get_type (void);
+
+#endif /* TVTIME_PLUGINS_H_INCLUDED */
diff --git a/gst/deinterlace/tvtime/scalerbob.c b/gst/deinterlace/tvtime/scalerbob.c
new file mode 100644
index 00000000..a7bca169
--- /dev/null
+++ b/gst/deinterlace/tvtime/scalerbob.c
@@ -0,0 +1,74 @@
+/**
+ * Double lines
+ * Copyright (C) 2008 Sebastian Dröge <sebastian.droege@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "_stdint.h"
+#include "gstdeinterlace.h"
+#include <string.h>
+
+#define GST_TYPE_DEINTERLACE_METHOD_SCALER_BOB (gst_deinterlace_method_scaler_bob_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_SCALER_BOB(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_SCALER_BOB))
+#define GST_IS_DEINTERLACE_METHOD_SCALER_BOB_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_SCALER_BOB))
+#define GST_DEINTERLACE_METHOD_SCALER_BOB_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_SCALER_BOB, GstDeinterlaceMethodScalerBobClass))
+#define GST_DEINTERLACE_METHOD_SCALER_BOB(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_SCALER_BOB, GstDeinterlaceMethodScalerBob))
+#define GST_DEINTERLACE_METHOD_SCALER_BOB_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_SCALER_BOB, GstDeinterlaceMethodScalerBobClass))
+#define GST_DEINTERLACE_METHOD_SCALER_BOB_CAST(obj) ((GstDeinterlaceMethodScalerBob*)(obj))
+
+GType gst_deinterlace_method_scaler_bob_get_type (void);
+
+typedef GstDeinterlaceSimpleMethod GstDeinterlaceMethodScalerBob;
+
+typedef GstDeinterlaceSimpleMethodClass GstDeinterlaceMethodScalerBobClass;
+
+
+static void
+deinterlace_scanline_scaler_bob (GstDeinterlaceMethod * self,
+ GstDeinterlace * parent, guint8 * out,
+ GstDeinterlaceScanlineData * scanlines, gint width)
+{
+ oil_memcpy (out, scanlines->t0, parent->row_stride);
+}
+
+G_DEFINE_TYPE (GstDeinterlaceMethodScalerBob, gst_deinterlace_method_scaler_bob,
+ GST_TYPE_DEINTERLACE_SIMPLE_METHOD);
+
+static void
+gst_deinterlace_method_scaler_bob_class_init (GstDeinterlaceMethodScalerBobClass
+ * klass)
+{
+ GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+ GstDeinterlaceSimpleMethodClass *dism_class =
+ (GstDeinterlaceSimpleMethodClass *) klass;
+
+ dim_class->fields_required = 1;
+ dim_class->name = "Double lines";
+ dim_class->nick = "scalerbob";
+ dim_class->latency = 0;
+
+ dism_class->interpolate_scanline = deinterlace_scanline_scaler_bob;
+}
+
+static void
+gst_deinterlace_method_scaler_bob_init (GstDeinterlaceMethodScalerBob * self)
+{
+}
diff --git a/gst/deinterlace/tvtime/sse.h b/gst/deinterlace/tvtime/sse.h
new file mode 100644
index 00000000..2e00ee0c
--- /dev/null
+++ b/gst/deinterlace/tvtime/sse.h
@@ -0,0 +1,992 @@
+/* sse.h
+
+ Streaming SIMD Extenstions (a.k.a. Katmai New Instructions)
+ GCC interface library for IA32.
+
+ To use this library, simply include this header file
+ and compile with GCC. You MUST have inlining enabled
+ in order for sse_ok() to work; this can be done by
+ simply using -O on the GCC command line.
+
+ Compiling with -DSSE_TRACE will cause detailed trace
+ output to be sent to stderr for each sse operation.
+ This adds lots of code, and obviously slows execution to
+ a crawl, but can be very useful for debugging.
+
+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
+ EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
+ LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ AND FITNESS FOR ANY PARTICULAR PURPOSE.
+
+ 1999 by R. Fisher
+ Based on libmmx by H. Dietz and R. Fisher
+
+ Notes:
+ This is still extremely alpha.
+ Because this library depends on an assembler which understands the
+ SSE opcodes, you probably won't be able to use this yet.
+ For now, do not use TRACE versions. These both make use
+ of the MMX registers, not the SSE registers. This will be resolved
+ at a later date.
+ ToDo:
+ Rewrite TRACE macros
+ Major Debugging Work
+*/
+
+#ifndef _SSE_H
+#define _SSE_H
+
+
+
+/* The type of an value that fits in an SSE register
+ (note that long long constant values MUST be suffixed
+ by LL and unsigned long long values by ULL, lest
+ they be truncated by the compiler)
+*/
+typedef union {
+ float sf[4]; /* Single-precision (32-bit) value */
+} __attribute__ ((aligned (16))) sse_t; /* On a 16 byte (128-bit) boundary */
+
+
+#if 0
+/* Function to test if multimedia instructions are supported...
+*/
+inline extern int
+mm_support(void)
+{
+ /* Returns 1 if MMX instructions are supported,
+ 3 if Cyrix MMX and Extended MMX instructions are supported
+ 5 if AMD MMX and 3DNow! instructions are supported
+ 9 if MMX and SSE instructions are supported
+ 0 if hardware does not support any of these
+ */
+ register int rval = 0;
+
+ __asm__ __volatile__ (
+ /* See if CPUID instruction is supported ... */
+ /* ... Get copies of EFLAGS into eax and ecx */
+ "pushf\n\t"
+ "popl %%eax\n\t"
+ "movl %%eax, %%ecx\n\t"
+
+ /* ... Toggle the ID bit in one copy and store */
+ /* to the EFLAGS reg */
+ "xorl $0x200000, %%eax\n\t"
+ "push %%eax\n\t"
+ "popf\n\t"
+
+ /* ... Get the (hopefully modified) EFLAGS */
+ "pushf\n\t"
+ "popl %%eax\n\t"
+
+ /* ... Compare and test result */
+ "xorl %%eax, %%ecx\n\t"
+ "testl $0x200000, %%ecx\n\t"
+ "jz NotSupported1\n\t" /* CPUID not supported */
+
+
+ /* Get standard CPUID information, and
+ go to a specific vendor section */
+ "movl $0, %%eax\n\t"
+ "cpuid\n\t"
+
+ /* Check for Intel */
+ "cmpl $0x756e6547, %%ebx\n\t"
+ "jne TryAMD\n\t"
+ "cmpl $0x49656e69, %%edx\n\t"
+ "jne TryAMD\n\t"
+ "cmpl $0x6c65746e, %%ecx\n"
+ "jne TryAMD\n\t"
+ "jmp Intel\n\t"
+
+ /* Check for AMD */
+ "\nTryAMD:\n\t"
+ "cmpl $0x68747541, %%ebx\n\t"
+ "jne TryCyrix\n\t"
+ "cmpl $0x69746e65, %%edx\n\t"
+ "jne TryCyrix\n\t"
+ "cmpl $0x444d4163, %%ecx\n"
+ "jne TryCyrix\n\t"
+ "jmp AMD\n\t"
+
+ /* Check for Cyrix */
+ "\nTryCyrix:\n\t"
+ "cmpl $0x69727943, %%ebx\n\t"
+ "jne NotSupported2\n\t"
+ "cmpl $0x736e4978, %%edx\n\t"
+ "jne NotSupported3\n\t"
+ "cmpl $0x64616574, %%ecx\n\t"
+ "jne NotSupported4\n\t"
+ /* Drop through to Cyrix... */
+
+
+ /* Cyrix Section */
+ /* See if extended CPUID level 80000001 is supported */
+ /* The value of CPUID/80000001 for the 6x86MX is undefined
+ according to the Cyrix CPU Detection Guide (Preliminary
+ Rev. 1.01 table 1), so we'll check the value of eax for
+ CPUID/0 to see if standard CPUID level 2 is supported.
+ According to the table, the only CPU which supports level
+ 2 is also the only one which supports extended CPUID levels.
+ */
+ "cmpl $0x2, %%eax\n\t"
+ "jne MMXtest\n\t" /* Use standard CPUID instead */
+
+ /* Extended CPUID supported (in theory), so get extended
+ features */
+ "movl $0x80000001, %%eax\n\t"
+ "cpuid\n\t"
+ "testl $0x00800000, %%eax\n\t" /* Test for MMX */
+ "jz NotSupported5\n\t" /* MMX not supported */
+ "testl $0x01000000, %%eax\n\t" /* Test for Ext'd MMX */
+ "jnz EMMXSupported\n\t"
+ "movl $1, %0:\n\n\t" /* MMX Supported */
+ "jmp Return\n\n"
+ "EMMXSupported:\n\t"
+ "movl $3, %0:\n\n\t" /* EMMX and MMX Supported */
+ "jmp Return\n\t"
+
+
+ /* AMD Section */
+ "AMD:\n\t"
+
+ /* See if extended CPUID is supported */
+ "movl $0x80000000, %%eax\n\t"
+ "cpuid\n\t"
+ "cmpl $0x80000000, %%eax\n\t"
+ "jl MMXtest\n\t" /* Use standard CPUID instead */
+
+ /* Extended CPUID supported, so get extended features */
+ "movl $0x80000001, %%eax\n\t"
+ "cpuid\n\t"
+ "testl $0x00800000, %%edx\n\t" /* Test for MMX */
+ "jz NotSupported6\n\t" /* MMX not supported */
+ "testl $0x80000000, %%edx\n\t" /* Test for 3DNow! */
+ "jnz ThreeDNowSupported\n\t"
+ "movl $1, %0:\n\n\t" /* MMX Supported */
+ "jmp Return\n\n"
+ "ThreeDNowSupported:\n\t"
+ "movl $5, %0:\n\n\t" /* 3DNow! and MMX Supported */
+ "jmp Return\n\t"
+
+
+ /* Intel Section */
+ "Intel:\n\t"
+
+ /* Check for SSE */
+ "SSEtest:\n\t"
+ "movl $1, %%eax\n\t"
+ "cpuid\n\t"
+ "testl $0x02000000, %%edx\n\t" /* Test for SSE */
+ "jz MMXtest\n\t" /* SSE Not supported */
+ "movl $9, %0:\n\n\t" /* SSE Supported */
+ "jmp Return\n\t"
+
+ /* Check for MMX */
+ "MMXtest:\n\t"
+ "movl $1, %%eax\n\t"
+ "cpuid\n\t"
+ "testl $0x00800000, %%edx\n\t" /* Test for MMX */
+ "jz NotSupported7\n\t" /* MMX Not supported */
+ "movl $1, %0:\n\n\t" /* MMX Supported */
+ "jmp Return\n\t"
+
+ /* Nothing supported */
+ "\nNotSupported1:\n\t"
+ "#movl $101, %0:\n\n\t"
+ "\nNotSupported2:\n\t"
+ "#movl $102, %0:\n\n\t"
+ "\nNotSupported3:\n\t"
+ "#movl $103, %0:\n\n\t"
+ "\nNotSupported4:\n\t"
+ "#movl $104, %0:\n\n\t"
+ "\nNotSupported5:\n\t"
+ "#movl $105, %0:\n\n\t"
+ "\nNotSupported6:\n\t"
+ "#movl $106, %0:\n\n\t"
+ "\nNotSupported7:\n\t"
+ "#movl $107, %0:\n\n\t"
+ "movl $0, %0:\n\n\t"
+
+ "Return:\n\t"
+ : "=a" (rval)
+ : /* no input */
+ : "eax", "ebx", "ecx", "edx"
+ );
+
+ /* Return */
+ return(rval);
+}
+
+/* Function to test if sse instructions are supported...
+*/
+inline extern int
+sse_ok(void)
+{
+ /* Returns 1 if SSE instructions are supported, 0 otherwise */
+ return ( (mm_support() & 0x8) >> 3 );
+}
+#endif
+
+
+
+/* Helper functions for the instruction macros that follow...
+ (note that memory-to-register, m2r, instructions are nearly
+ as efficient as register-to-register, r2r, instructions;
+ however, memory-to-memory instructions are really simulated
+ as a convenience, and are only 1/3 as efficient)
+*/
+#ifdef SSE_TRACE
+
+/* Include the stuff for printing a trace to stderr...
+*/
+
+#include <stdio.h>
+
+#define sse_i2r(op, imm, reg) \
+ { \
+ sse_t sse_trace; \
+ sse_trace.uq = (imm); \
+ fprintf(stderr, #op "_i2r(" #imm "=0x%08x%08x, ", \
+ sse_trace.d[1], sse_trace.d[0]); \
+ __asm__ __volatile__ ("movq %%" #reg ", %0" \
+ : "=X" (sse_trace) \
+ : /* nothing */ ); \
+ fprintf(stderr, #reg "=0x%08x%08x) => ", \
+ sse_trace.d[1], sse_trace.d[0]); \
+ __asm__ __volatile__ (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "X" (imm)); \
+ __asm__ __volatile__ ("movq %%" #reg ", %0" \
+ : "=X" (sse_trace) \
+ : /* nothing */ ); \
+ fprintf(stderr, #reg "=0x%08x%08x\n", \
+ sse_trace.d[1], sse_trace.d[0]); \
+ }
+
+#define sse_m2r(op, mem, reg) \
+ { \
+ sse_t sse_trace; \
+ sse_trace = (mem); \
+ fprintf(stderr, #op "_m2r(" #mem "=0x%08x%08x, ", \
+ sse_trace.d[1], sse_trace.d[0]); \
+ __asm__ __volatile__ ("movq %%" #reg ", %0" \
+ : "=X" (sse_trace) \
+ : /* nothing */ ); \
+ fprintf(stderr, #reg "=0x%08x%08x) => ", \
+ sse_trace.d[1], sse_trace.d[0]); \
+ __asm__ __volatile__ (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "X" (mem)); \
+ __asm__ __volatile__ ("movq %%" #reg ", %0" \
+ : "=X" (sse_trace) \
+ : /* nothing */ ); \
+ fprintf(stderr, #reg "=0x%08x%08x\n", \
+ sse_trace.d[1], sse_trace.d[0]); \
+ }
+
+#define sse_r2m(op, reg, mem) \
+ { \
+ sse_t sse_trace; \
+ __asm__ __volatile__ ("movq %%" #reg ", %0" \
+ : "=X" (sse_trace) \
+ : /* nothing */ ); \
+ fprintf(stderr, #op "_r2m(" #reg "=0x%08x%08x, ", \
+ sse_trace.d[1], sse_trace.d[0]); \
+ sse_trace = (mem); \
+ fprintf(stderr, #mem "=0x%08x%08x) => ", \
+ sse_trace.d[1], sse_trace.d[0]); \
+ __asm__ __volatile__ (#op " %%" #reg ", %0" \
+ : "=X" (mem) \
+ : /* nothing */ ); \
+ sse_trace = (mem); \
+ fprintf(stderr, #mem "=0x%08x%08x\n", \
+ sse_trace.d[1], sse_trace.d[0]); \
+ }
+
+#define sse_r2r(op, regs, regd) \
+ { \
+ sse_t sse_trace; \
+ __asm__ __volatile__ ("movq %%" #regs ", %0" \
+ : "=X" (sse_trace) \
+ : /* nothing */ ); \
+ fprintf(stderr, #op "_r2r(" #regs "=0x%08x%08x, ", \
+ sse_trace.d[1], sse_trace.d[0]); \
+ __asm__ __volatile__ ("movq %%" #regd ", %0" \
+ : "=X" (sse_trace) \
+ : /* nothing */ ); \
+ fprintf(stderr, #regd "=0x%08x%08x) => ", \
+ sse_trace.d[1], sse_trace.d[0]); \
+ __asm__ __volatile__ (#op " %" #regs ", %" #regd); \
+ __asm__ __volatile__ ("movq %%" #regd ", %0" \
+ : "=X" (sse_trace) \
+ : /* nothing */ ); \
+ fprintf(stderr, #regd "=0x%08x%08x\n", \
+ sse_trace.d[1], sse_trace.d[0]); \
+ }
+
+#define sse_m2m(op, mems, memd) \
+ { \
+ sse_t sse_trace; \
+ sse_trace = (mems); \
+ fprintf(stderr, #op "_m2m(" #mems "=0x%08x%08x, ", \
+ sse_trace.d[1], sse_trace.d[0]); \
+ sse_trace = (memd); \
+ fprintf(stderr, #memd "=0x%08x%08x) => ", \
+ sse_trace.d[1], sse_trace.d[0]); \
+ __asm__ __volatile__ ("movq %0, %%mm0\n\t" \
+ #op " %1, %%mm0\n\t" \
+ "movq %%mm0, %0" \
+ : "=X" (memd) \
+ : "X" (mems)); \
+ sse_trace = (memd); \
+ fprintf(stderr, #memd "=0x%08x%08x\n", \
+ sse_trace.d[1], sse_trace.d[0]); \
+ }
+
+#else
+
+/* These macros are a lot simpler without the tracing...
+*/
+
+#define sse_i2r(op, imm, reg) \
+ __asm__ __volatile__ (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "X" (imm) )
+
+#define sse_m2r(op, mem, reg) \
+ __asm__ __volatile__ (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "X" (mem))
+
+#define sse_r2m(op, reg, mem) \
+ __asm__ __volatile__ (#op " %%" #reg ", %0" \
+ : "=X" (mem) \
+ : /* nothing */ )
+
+#define sse_r2r(op, regs, regd) \
+ __asm__ __volatile__ (#op " %" #regs ", %" #regd)
+
+#define sse_r2ri(op, regs, regd, imm) \
+ __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
+ : /* nothing */ \
+ : "X" (imm) )
+
+/* Load data from mems to xmmreg, operate on xmmreg, and store data to memd */
+#define sse_m2m(op, mems, memd, xmmreg) \
+ __asm__ __volatile__ ("movups %0, %%xmm0\n\t" \
+ #op " %1, %%xmm0\n\t" \
+ "movups %%mm0, %0" \
+ : "=X" (memd) \
+ : "X" (mems))
+
+#define sse_m2ri(op, mem, reg, subop) \
+ __asm__ __volatile__ (#op " %0, %%" #reg ", " #subop \
+ : /* nothing */ \
+ : "X" (mem))
+
+#define sse_m2mi(op, mems, memd, xmmreg, subop) \
+ __asm__ __volatile__ ("movups %0, %%xmm0\n\t" \
+ #op " %1, %%xmm0, " #subop "\n\t" \
+ "movups %%mm0, %0" \
+ : "=X" (memd) \
+ : "X" (mems))
+#endif
+
+
+
+
+/* 1x128 MOVe Aligned four Packed Single-fp
+*/
+#define movaps_m2r(var, reg) sse_m2r(movaps, var, reg)
+#define movaps_r2m(reg, var) sse_r2m(movaps, reg, var)
+#define movaps_r2r(regs, regd) sse_r2r(movaps, regs, regd)
+#define movaps(vars, vard) \
+ __asm__ __volatile__ ("movaps %1, %%mm0\n\t" \
+ "movaps %%mm0, %0" \
+ : "=X" (vard) \
+ : "X" (vars))
+
+
+/* 1x128 MOVe aligned Non-Temporal four Packed Single-fp
+*/
+#define movntps_r2m(xmmreg, var) sse_r2m(movntps, xmmreg, var)
+
+
+/* 1x64 MOVe Non-Temporal Quadword
+*/
+#define movntq_r2m(mmreg, var) sse_r2m(movntq, mmreg, var)
+
+
+/* 1x128 MOVe Unaligned four Packed Single-fp
+*/
+#define movups_m2r(var, reg) sse_m2r(movups, var, reg)
+#define movups_r2m(reg, var) sse_r2m(movups, reg, var)
+#define movups_r2r(regs, regd) sse_r2r(movups, regs, regd)
+#define movups(vars, vard) \
+ __asm__ __volatile__ ("movups %1, %%mm0\n\t" \
+ "movups %%mm0, %0" \
+ : "=X" (vard) \
+ : "X" (vars))
+
+
+/* MOVe High to Low Packed Single-fp
+ high half of 4x32f (x) -> low half of 4x32f (y)
+*/
+#define movhlps_r2r(regs, regd) sse_r2r(movhlps, regs, regd)
+
+
+/* MOVe Low to High Packed Single-fp
+ low half of 4x32f (x) -> high half of 4x32f (y)
+*/
+#define movlhps_r2r(regs, regd) sse_r2r(movlhps, regs, regd)
+
+
+/* MOVe High Packed Single-fp
+ 2x32f -> high half of 4x32f
+*/
+#define movhps_m2r(var, reg) sse_m2r(movhps, var, reg)
+#define movhps_r2m(reg, var) sse_r2m(movhps, reg, var)
+#define movhps(vars, vard) \
+ __asm__ __volatile__ ("movhps %1, %%mm0\n\t" \
+ "movhps %%mm0, %0" \
+ : "=X" (vard) \
+ : "X" (vars))
+
+
+/* MOVe Low Packed Single-fp
+ 2x32f -> low half of 4x32f
+*/
+#define movlps_m2r(var, reg) sse_m2r(movlps, var, reg)
+#define movlps_r2m(reg, var) sse_r2m(movlps, reg, var)
+#define movlps(vars, vard) \
+ __asm__ __volatile__ ("movlps %1, %%mm0\n\t" \
+ "movlps %%mm0, %0" \
+ : "=X" (vard) \
+ : "X" (vars))
+
+
+/* MOVe Scalar Single-fp
+ lowest field of 4x32f (x) -> lowest field of 4x32f (y)
+*/
+#define movss_m2r(var, reg) sse_m2r(movss, var, reg)
+#define movss_r2m(reg, var) sse_r2m(movss, reg, var)
+#define movss_r2r(regs, regd) sse_r2r(movss, regs, regd)
+#define movss(vars, vard) \
+ __asm__ __volatile__ ("movss %1, %%mm0\n\t" \
+ "movss %%mm0, %0" \
+ : "=X" (vard) \
+ : "X" (vars))
+
+
+/* 4x16 Packed SHUFfle Word
+*/
+#define pshufw_m2r(var, reg, index) sse_m2ri(pshufw, var, reg, index)
+#define pshufw_r2r(regs, regd, index) sse_r2ri(pshufw, regs, regd, index)
+
+
+/* 1x128 SHUFfle Packed Single-fp
+*/
+#define shufps_m2r(var, reg, index) sse_m2ri(shufps, var, reg, index)
+#define shufps_r2r(regs, regd, index) sse_r2ri(shufps, regs, regd, index)
+
+
+/* ConVerT Packed signed Int32 to(2) Packed Single-fp
+*/
+#define cvtpi2ps_m2r(var, xmmreg) sse_m2r(cvtpi2ps, var, xmmreg)
+#define cvtpi2ps_r2r(mmreg, xmmreg) sse_r2r(cvtpi2ps, mmreg, xmmreg)
+
+
+/* ConVerT Packed Single-fp to(2) Packed signed Int32
+*/
+#define cvtps2pi_m2r(var, mmreg) sse_m2r(cvtps2pi, var, mmreg)
+#define cvtps2pi_r2r(xmmreg, mmreg) sse_r2r(cvtps2pi, mmreg, xmmreg)
+
+
+/* ConVerT with Truncate Packed Single-fp to(2) Packed Int32
+*/
+#define cvttps2pi_m2r(var, mmreg) sse_m2r(cvttps2pi, var, mmreg)
+#define cvttps2pi_r2r(xmmreg, mmreg) sse_r2r(cvttps2pi, mmreg, xmmreg)
+
+
+/* ConVerT Signed Int32 to(2) Single-fp (Scalar)
+*/
+#define cvtsi2ss_m2r(var, xmmreg) sse_m2r(cvtsi2ss, var, xmmreg)
+#define cvtsi2ss_r2r(reg, xmmreg) sse_r2r(cvtsi2ss, reg, xmmreg)
+
+
+/* ConVerT Scalar Single-fp to(2) Signed Int32
+*/
+#define cvtss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg)
+#define cvtss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg)
+
+
+/* ConVerT with Truncate Scalar Single-fp to(2) Signed Int32
+*/
+#define cvttss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg)
+#define cvttss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg)
+
+
+/* Parallel EXTRact Word from 4x16
+*/
+#define pextrw_r2r(mmreg, reg, field) sse_r2ri(pextrw, mmreg, reg, field)
+
+
+/* Parallel INSeRt Word from 4x16
+*/
+#define pinsrw_r2r(reg, mmreg, field) sse_r2ri(pinsrw, reg, mmreg, field)
+
+
+
+/* MOVe MaSK from Packed Single-fp
+*/
+#ifdef SSE_TRACE
+ #define movmskps(xmmreg, reg) \
+ { \
+ fprintf(stderr, "movmskps()\n"); \
+ __asm__ __volatile__ ("movmskps %" #xmmreg ", %" #reg) \
+ }
+#else
+ #define movmskps(xmmreg, reg) \
+ __asm__ __volatile__ ("movmskps %" #xmmreg ", %" #reg)
+#endif
+
+
+/* Parallel MOVe MaSK from mmx reg to 32-bit reg
+*/
+#ifdef SSE_TRACE
+ #define pmovmskb(mmreg, reg) \
+ { \
+ fprintf(stderr, "movmskps()\n"); \
+ __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg) \
+ }
+#else
+ #define pmovmskb(mmreg, reg) \
+ __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
+#endif
+
+
+/* MASKed MOVe from 8x8 to memory pointed to by (e)di register
+*/
+#define maskmovq(mmregs, fieldreg) sse_r2ri(maskmovq, mmregs, fieldreg)
+
+
+
+
+/* 4x32f Parallel ADDs
+*/
+#define addps_m2r(var, reg) sse_m2r(addps, var, reg)
+#define addps_r2r(regs, regd) sse_r2r(addps, regs, regd)
+#define addps(vars, vard, xmmreg) sse_m2m(addps, vars, vard, xmmreg)
+
+
+/* Lowest Field of 4x32f Parallel ADDs
+*/
+#define addss_m2r(var, reg) sse_m2r(addss, var, reg)
+#define addss_r2r(regs, regd) sse_r2r(addss, regs, regd)
+#define addss(vars, vard, xmmreg) sse_m2m(addss, vars, vard, xmmreg)
+
+
+/* 4x32f Parallel SUBs
+*/
+#define subps_m2r(var, reg) sse_m2r(subps, var, reg)
+#define subps_r2r(regs, regd) sse_r2r(subps, regs, regd)
+#define subps(vars, vard, xmmreg) sse_m2m(subps, vars, vard, xmmreg)
+
+
+/* Lowest Field of 4x32f Parallel SUBs
+*/
+#define subss_m2r(var, reg) sse_m2r(subss, var, reg)
+#define subss_r2r(regs, regd) sse_r2r(subss, regs, regd)
+#define subss(vars, vard, xmmreg) sse_m2m(subss, vars, vard, xmmreg)
+
+
+/* 8x8u -> 4x16u Packed Sum of Absolute Differences
+*/
+#define psadbw_m2r(var, reg) sse_m2r(psadbw, var, reg)
+#define psadbw_r2r(regs, regd) sse_r2r(psadbw, regs, regd)
+#define psadbw(vars, vard, mmreg) sse_m2m(psadbw, vars, vard, mmreg)
+
+
+/* 4x16u Parallel MUL High Unsigned
+*/
+#define pmulhuw_m2r(var, reg) sse_m2r(pmulhuw, var, reg)
+#define pmulhuw_r2r(regs, regd) sse_r2r(pmulhuw, regs, regd)
+#define pmulhuw(vars, vard, mmreg) sse_m2m(pmulhuw, vars, vard, mmreg)
+
+
+/* 4x32f Parallel MULs
+*/
+#define mulps_m2r(var, reg) sse_m2r(mulps, var, reg)
+#define mulps_r2r(regs, regd) sse_r2r(mulps, regs, regd)
+#define mulps(vars, vard, xmmreg) sse_m2m(mulps, vars, vard, xmmreg)
+
+
+/* Lowest Field of 4x32f Parallel MULs
+*/
+#define mulss_m2r(var, reg) sse_m2r(mulss, var, reg)
+#define mulss_r2r(regs, regd) sse_r2r(mulss, regs, regd)
+#define mulss(vars, vard, xmmreg) sse_m2m(mulss, vars, vard, xmmreg)
+
+
+/* 4x32f Parallel DIVs
+*/
+#define divps_m2r(var, reg) sse_m2r(divps, var, reg)
+#define divps_r2r(regs, regd) sse_r2r(divps, regs, regd)
+#define divps(vars, vard, xmmreg) sse_m2m(divps, vars, vard, xmmreg)
+
+
+/* Lowest Field of 4x32f Parallel DIVs
+*/
+#define divss_m2r(var, reg) sse_m2r(divss, var, reg)
+#define divss_r2r(regs, regd) sse_r2r(divss, regs, regd)
+#define divss(vars, vard, xmmreg) sse_m2m(divss, vars, vard, xmmreg)
+
+
+/* 4x32f Parallel Reciprocals
+*/
+#define rcpps_m2r(var, reg) sse_m2r(rcpps, var, reg)
+#define rcpps_r2r(regs, regd) sse_r2r(rcpps, regs, regd)
+#define rcpps(vars, vard, xmmreg) sse_m2m(rcpps, vars, vard, xmmreg)
+
+
+/* Lowest Field of 4x32f Parallel Reciprocals
+*/
+#define rcpss_m2r(var, reg) sse_m2r(rcpss, var, reg)
+#define rcpss_r2r(regs, regd) sse_r2r(rcpss, regs, regd)
+#define rcpss(vars, vard, xmmreg) sse_m2m(rcpss, vars, vard, xmmreg)
+
+
+/* 4x32f Parallel Square Root of Reciprocals
+*/
+#define rsqrtps_m2r(var, reg) sse_m2r(rsqrtps, var, reg)
+#define rsqrtps_r2r(regs, regd) sse_r2r(rsqrtps, regs, regd)
+#define rsqrtps(vars, vard, xmmreg) sse_m2m(rsqrtps, vars, vard, xmmreg)
+
+
+/* Lowest Field of 4x32f Parallel Square Root of Reciprocals
+*/
+#define rsqrtss_m2r(var, reg) sse_m2r(rsqrtss, var, reg)
+#define rsqrtss_r2r(regs, regd) sse_r2r(rsqrtss, regs, regd)
+#define rsqrtss(vars, vard, xmmreg) sse_m2m(rsqrtss, vars, vard, xmmreg)
+
+
+/* 4x32f Parallel Square Roots
+*/
+#define sqrtps_m2r(var, reg) sse_m2r(sqrtps, var, reg)
+#define sqrtps_r2r(regs, regd) sse_r2r(sqrtps, regs, regd)
+#define sqrtps(vars, vard, xmmreg) sse_m2m(sqrtps, vars, vard, xmmreg)
+
+
+/* Lowest Field of 4x32f Parallel Square Roots
+*/
+#define sqrtss_m2r(var, reg) sse_m2r(sqrtss, var, reg)
+#define sqrtss_r2r(regs, regd) sse_r2r(sqrtss, regs, regd)
+#define sqrtss(vars, vard, xmmreg) sse_m2m(sqrtss, vars, vard, xmmreg)
+
+
+/* 8x8u and 4x16u Parallel AVeraGe
+*/
+#define pavgb_m2r(var, reg) sse_m2r(pavgb, var, reg)
+#define pavgb_r2r(regs, regd) sse_r2r(pavgb, regs, regd)
+#define pavgb(vars, vard, mmreg) sse_m2m(pavgb, vars, vard, mmreg)
+
+#define pavgw_m2r(var, reg) sse_m2r(pavgw, var, reg)
+#define pavgw_r2r(regs, regd) sse_r2r(pavgw, regs, regd)
+#define pavgw(vars, vard, mmreg) sse_m2m(pavgw, vars, vard, mmreg)
+
+
+/* 1x128 bitwise AND
+*/
+#define andps_m2r(var, reg) sse_m2r(andps, var, reg)
+#define andps_r2r(regs, regd) sse_r2r(andps, regs, regd)
+#define andps(vars, vard, xmmreg) sse_m2m(andps, vars, vard, xmmreg)
+
+
+/* 1x128 bitwise AND with Not the destination
+*/
+#define andnps_m2r(var, reg) sse_m2r(andnps, var, reg)
+#define andnps_r2r(regs, regd) sse_r2r(andnps, regs, regd)
+#define andnps(vars, vard, xmmreg) sse_m2m(andnps, vars, vard, xmmreg)
+
+
+/* 1x128 bitwise OR
+*/
+#define orps_m2r(var, reg) sse_m2r(orps, var, reg)
+#define orps_r2r(regs, regd) sse_r2r(orps, regs, regd)
+#define orps(vars, vard, xmmreg) sse_m2m(orps, vars, vard, xmmreg)
+
+
+/* 1x128 bitwise eXclusive OR
+*/
+#define xorps_m2r(var, reg) sse_m2r(xorps, var, reg)
+#define xorps_r2r(regs, regd) sse_r2r(xorps, regs, regd)
+#define xorps(vars, vard, xmmreg) sse_m2m(xorps, vars, vard, xmmreg)
+
+
+/* 8x8u, 4x16, and 4x32f Parallel Maximum
+*/
+#define pmaxub_m2r(var, reg) sse_m2r(pmaxub, var, reg)
+#define pmaxub_r2r(regs, regd) sse_r2r(pmaxub, regs, regd)
+#define pmaxub(vars, vard, mmreg) sse_m2m(pmaxub, vars, vard, mmreg)
+
+#define pmaxsw_m2r(var, reg) sse_m2r(pmaxsw, var, reg)
+#define pmaxsw_r2r(regs, regd) sse_r2r(pmaxsw, regs, regd)
+#define pmaxsw(vars, vard, mmreg) sse_m2m(pmaxsw, vars, vard, mmreg)
+
+#define maxps_m2r(var, reg) sse_m2r(maxps, var, reg)
+#define maxps_r2r(regs, regd) sse_r2r(maxps, regs, regd)
+#define maxps(vars, vard, xmmreg) sse_m2m(maxps, vars, vard, xmmreg)
+
+
+/* Lowest Field of 4x32f Parallel Maximum
+*/
+#define maxss_m2r(var, reg) sse_m2r(maxss, var, reg)
+#define maxss_r2r(regs, regd) sse_r2r(maxss, regs, regd)
+#define maxss(vars, vard, xmmreg) sse_m2m(maxss, vars, vard, xmmreg)
+
+
+/* 8x8u, 4x16, and 4x32f Parallel Minimum
+*/
+#define pminub_m2r(var, reg) sse_m2r(pminub, var, reg)
+#define pminub_r2r(regs, regd) sse_r2r(pminub, regs, regd)
+#define pminub(vars, vard, mmreg) sse_m2m(pminub, vars, vard, mmreg)
+
+#define pminsw_m2r(var, reg) sse_m2r(pminsw, var, reg)
+#define pminsw_r2r(regs, regd) sse_r2r(pminsw, regs, regd)
+#define pminsw(vars, vard, mmreg) sse_m2m(pminsw, vars, vard, mmreg)
+
+#define minps_m2r(var, reg) sse_m2r(minps, var, reg)
+#define minps_r2r(regs, regd) sse_r2r(minps, regs, regd)
+#define minps(vars, vard, xmmreg) sse_m2m(minps, vars, vard, xmmreg)
+
+
+/* Lowest Field of 4x32f Parallel Minimum
+*/
+#define minss_m2r(var, reg) sse_m2r(minss, var, reg)
+#define minss_r2r(regs, regd) sse_r2r(minss, regs, regd)
+#define minss(vars, vard, xmmreg) sse_m2m(minss, vars, vard, xmmreg)
+
+
+/* 4x32f Parallel CoMPares
+ (resulting fields are either 0 or -1)
+*/
+#define cmpps_m2r(var, reg, op) sse_m2ri(cmpps, var, reg, op)
+#define cmpps_r2r(regs, regd, op) sse_r2ri(cmpps, regs, regd, op)
+#define cmpps(vars, vard, op, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, op)
+
+#define cmpeqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 0)
+#define cmpeqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 0)
+#define cmpeqps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 0)
+
+#define cmpltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 1)
+#define cmpltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 1)
+#define cmpltps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 1)
+
+#define cmpleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 2)
+#define cmpleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 2)
+#define cmpleps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 2)
+
+#define cmpunordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 3)
+#define cmpunordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 3)
+#define cmpunordps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 3)
+
+#define cmpneqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 4)
+#define cmpneqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 4)
+#define cmpneqps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 4)
+
+#define cmpnltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 5)
+#define cmpnltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 5)
+#define cmpnltps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 5)
+
+#define cmpnleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 6)
+#define cmpnleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 6)
+#define cmpnleps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 6)
+
+#define cmpordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 7)
+#define cmpordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 7)
+#define cmpordps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 7)
+
+
+/* Lowest Field of 4x32f Parallel CoMPares
+ (resulting fields are either 0 or -1)
+*/
+#define cmpss_m2r(var, reg, op) sse_m2ri(cmpss, var, reg, op)
+#define cmpss_r2r(regs, regd, op) sse_r2ri(cmpss, regs, regd, op)
+#define cmpss(vars, vard, op, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, op)
+
+#define cmpeqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 0)
+#define cmpeqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 0)
+#define cmpeqss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 0)
+
+#define cmpltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 1)
+#define cmpltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 1)
+#define cmpltss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 1)
+
+#define cmpless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 2)
+#define cmpless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 2)
+#define cmpless(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 2)
+
+#define cmpunordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 3)
+#define cmpunordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 3)
+#define cmpunordss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 3)
+
+#define cmpneqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 4)
+#define cmpneqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 4)
+#define cmpneqss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 4)
+
+#define cmpnltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 5)
+#define cmpnltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 5)
+#define cmpnltss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 5)
+
+#define cmpnless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 6)
+#define cmpnless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 6)
+#define cmpnless(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 6)
+
+#define cmpordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 7)
+#define cmpordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 7)
+#define cmpordss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 7)
+
+
+/* Lowest Field of 4x32f Parallel CoMPares to set EFLAGS
+ (resulting fields are either 0 or -1)
+*/
+#define comiss_m2r(var, reg) sse_m2r(comiss, var, reg)
+#define comiss_r2r(regs, regd) sse_r2r(comiss, regs, regd)
+#define comiss(vars, vard, xmmreg) sse_m2m(comiss, vars, vard, xmmreg)
+
+
+/* Lowest Field of 4x32f Unordered Parallel CoMPares to set EFLAGS
+ (resulting fields are either 0 or -1)
+*/
+#define ucomiss_m2r(var, reg) sse_m2r(ucomiss, var, reg)
+#define ucomiss_r2r(regs, regd) sse_r2r(ucomiss, regs, regd)
+#define ucomiss(vars, vard, xmmreg) sse_m2m(ucomiss, vars, vard, xmmreg)
+
+
+/* 2-(4x32f) -> 4x32f UNPaCK Low Packed Single-fp
+ (interleaves low half of dest with low half of source
+ as padding in each result field)
+*/
+#define unpcklps_m2r(var, reg) sse_m2r(unpcklps, var, reg)
+#define unpcklps_r2r(regs, regd) sse_r2r(unpcklps, regs, regd)
+
+
+/* 2-(4x32f) -> 4x32f UNPaCK High Packed Single-fp
+ (interleaves high half of dest with high half of source
+ as padding in each result field)
+*/
+#define unpckhps_m2r(var, reg) sse_m2r(unpckhps, var, reg)
+#define unpckhps_r2r(regs, regd) sse_r2r(unpckhps, regs, regd)
+
+
+
+/* Fp and mmX ReSTORe state
+*/
+#ifdef SSE_TRACE
+ #define fxrstor(mem) \
+ { \
+ fprintf(stderr, "fxrstor()\n"); \
+ __asm__ __volatile__ ("fxrstor %0" \
+ : /* nothing */ \
+ : "X" (mem)) \
+ }
+#else
+ #define fxrstor(mem) \
+ __asm__ __volatile__ ("fxrstor %0" \
+ : /* nothing */ \
+ : "X" (mem))
+#endif
+
+
+/* Fp and mmX SAVE state
+*/
+#ifdef SSE_TRACE
+ #define fxsave(mem) \
+ { \
+ fprintf(stderr, "fxsave()\n"); \
+ __asm__ __volatile__ ("fxsave %0" \
+ : /* nothing */ \
+ : "X" (mem)) \
+ }
+#else
+ #define fxsave(mem) \
+ __asm__ __volatile__ ("fxsave %0" \
+ : /* nothing */ \
+ : "X" (mem))
+#endif
+
+
+/* STore streaMing simd eXtensions Control/Status Register
+*/
+#ifdef SSE_TRACE
+ #define stmxcsr(mem) \
+ { \
+ fprintf(stderr, "stmxcsr()\n"); \
+ __asm__ __volatile__ ("stmxcsr %0" \
+ : /* nothing */ \
+ : "X" (mem)) \
+ }
+#else
+ #define stmxcsr(mem) \
+ __asm__ __volatile__ ("stmxcsr %0" \
+ : /* nothing */ \
+ : "X" (mem))
+#endif
+
+
+/* LoaD streaMing simd eXtensions Control/Status Register
+*/
+#ifdef SSE_TRACE
+ #define ldmxcsr(mem) \
+ { \
+ fprintf(stderr, "ldmxcsr()\n"); \
+ __asm__ __volatile__ ("ldmxcsr %0" \
+ : /* nothing */ \
+ : "X" (mem)) \
+ }
+#else
+ #define ldmxcsr(mem) \
+ __asm__ __volatile__ ("ldmxcsr %0" \
+ : /* nothing */ \
+ : "X" (mem))
+#endif
+
+
+/* Store FENCE - enforce ordering of stores before fence vs. stores
+ occuring after fence in source code.
+*/
+#ifdef SSE_TRACE
+ #define sfence() \
+ { \
+ fprintf(stderr, "sfence()\n"); \
+ __asm__ __volatile__ ("sfence\n\t") \
+ }
+#else
+ #define sfence() \
+ __asm__ __volatile__ ("sfence\n\t")
+#endif
+
+
+/* PREFETCH data using T0, T1, T2, or NTA hint
+ T0 = Prefetch into all cache levels
+ T1 = Prefetch into all cache levels except 0th level
+ T2 = Prefetch into all cache levels except 0th and 1st levels
+ NTA = Prefetch data into non-temporal cache structure
+*/
+#ifdef SSE_TRACE
+#else
+ #define prefetch(mem, hint) \
+ __asm__ __volatile__ ("prefetch" #hint " %0" \
+ : /* nothing */ \
+ : "X" (mem))
+
+ #define prefetcht0(mem) prefetch(mem, t0)
+ #define prefetcht1(mem) prefetch(mem, t1)
+ #define prefetcht2(mem) prefetch(mem, t2)
+ #define prefetchnta(mem) prefetch(mem, nta)
+#endif
+
+
+
+#endif
diff --git a/gst/deinterlace/tvtime/tomsmocomp.c b/gst/deinterlace/tvtime/tomsmocomp.c
new file mode 100644
index 00000000..3141fbac
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp.c
@@ -0,0 +1,211 @@
+/**
+ * Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
+ * Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdlib.h>
+#include "_stdint.h"
+#include <string.h>
+
+#include "gst/gst.h"
+#include "gstdeinterlace.h"
+#include "plugins.h"
+
+#define GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP (gst_deinterlace_method_tomsmocomp_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_TOMSMOCOMP(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP))
+#define GST_IS_DEINTERLACE_METHOD_TOMSMOCOMP_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP))
+#define GST_DEINTERLACE_METHOD_TOMSMOCOMP_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP, GstDeinterlaceMethodTomsMoCompClass))
+#define GST_DEINTERLACE_METHOD_TOMSMOCOMP(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP, GstDeinterlaceMethodTomsMoComp))
+#define GST_DEINTERLACE_METHOD_TOMSMOCOMP_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP, GstDeinterlaceMethodTomsMoCompClass))
+#define GST_DEINTERLACE_METHOD_TOMSMOCOMP_CAST(obj) ((GstDeinterlaceMethodTomsMoComp*)(obj))
+
+GType gst_deinterlace_method_tomsmocomp_get_type (void);
+
+typedef struct
+{
+ GstDeinterlaceMethod parent;
+
+ guint search_effort;
+ gboolean strange_bob;
+} GstDeinterlaceMethodTomsMoComp;
+
+typedef struct
+{
+ GstDeinterlaceMethodClass parent_class;
+} GstDeinterlaceMethodTomsMoCompClass;
+
+static int
+Fieldcopy (void *dest, const void *src, size_t count,
+ int rows, int dst_pitch, int src_pitch)
+{
+ unsigned char *pDest = (unsigned char *) dest;
+ unsigned char *pSrc = (unsigned char *) src;
+
+ int i;
+
+ for (i = 0; i < rows; i++) {
+ oil_memcpy (pDest, pSrc, count);
+ pSrc += src_pitch;
+ pDest += dst_pitch;
+ }
+ return 0;
+}
+
+#define USE_FOR_DSCALER
+
+#define IS_C
+#define SIMD_TYPE C
+#define FUNCT_NAME tomsmocompDScaler_C
+#include "tomsmocomp/TomsMoCompAll.inc"
+#undef IS_C
+#undef SIMD_TYPE
+#undef FUNCT_NAME
+
+#ifdef BUILD_X86_ASM
+
+#include "tomsmocomp/tomsmocompmacros.h"
+#include "x86-64_macros.inc"
+
+#define IS_MMX
+#define SIMD_TYPE MMX
+#define FUNCT_NAME tomsmocompDScaler_MMX
+#include "tomsmocomp/TomsMoCompAll.inc"
+#undef IS_MMX
+#undef SIMD_TYPE
+#undef FUNCT_NAME
+
+#define IS_3DNOW
+#define SIMD_TYPE 3DNOW
+#define FUNCT_NAME tomsmocompDScaler_3DNOW
+#include "tomsmocomp/TomsMoCompAll.inc"
+#undef IS_3DNOW
+#undef SIMD_TYPE
+#undef FUNCT_NAME
+
+#define IS_MMXEXT
+#define SIMD_TYPE MMXEXT
+#define FUNCT_NAME tomsmocompDScaler_MMXEXT
+#include "tomsmocomp/TomsMoCompAll.inc"
+#undef IS_MMXEXT
+#undef SIMD_TYPE
+#undef FUNCT_NAME
+
+#endif
+
+G_DEFINE_TYPE (GstDeinterlaceMethodTomsMoComp,
+ gst_deinterlace_method_tomsmocomp, GST_TYPE_DEINTERLACE_METHOD);
+
+enum
+{
+ ARG_0,
+ ARG_SEARCH_EFFORT,
+ ARG_STRANGE_BOB
+};
+
+static void
+gst_deinterlace_method_tomsmocomp_set_property (GObject * object, guint prop_id,
+ const GValue * value, GParamSpec * pspec)
+{
+ GstDeinterlaceMethodTomsMoComp *self =
+ GST_DEINTERLACE_METHOD_TOMSMOCOMP (object);
+
+ switch (prop_id) {
+ case ARG_SEARCH_EFFORT:
+ self->search_effort = g_value_get_uint (value);
+ break;
+ case ARG_STRANGE_BOB:
+ self->strange_bob = g_value_get_boolean (value);
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+ }
+}
+
+static void
+gst_deinterlace_method_tomsmocomp_get_property (GObject * object, guint prop_id,
+ GValue * value, GParamSpec * pspec)
+{
+ GstDeinterlaceMethodTomsMoComp *self =
+ GST_DEINTERLACE_METHOD_TOMSMOCOMP (object);
+
+ switch (prop_id) {
+ case ARG_SEARCH_EFFORT:
+ g_value_set_uint (value, self->search_effort);
+ break;
+ case ARG_STRANGE_BOB:
+ g_value_set_boolean (value, self->strange_bob);
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+ }
+}
+
+static void
+ gst_deinterlace_method_tomsmocomp_class_init
+ (GstDeinterlaceMethodTomsMoCompClass * klass)
+{
+ GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+ GObjectClass *gobject_class = (GObjectClass *) klass;
+#ifdef BUILD_X86_ASM
+ guint cpu_flags = oil_cpu_get_flags ();
+#endif
+
+ gobject_class->set_property = gst_deinterlace_method_tomsmocomp_set_property;
+ gobject_class->get_property = gst_deinterlace_method_tomsmocomp_get_property;
+
+ g_object_class_install_property (gobject_class, ARG_SEARCH_EFFORT,
+ g_param_spec_uint ("search-effort",
+ "Search Effort",
+ "Search Effort", 0, 27, 5, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)
+ );
+
+ g_object_class_install_property (gobject_class, ARG_STRANGE_BOB,
+ g_param_spec_boolean ("strange-bob",
+ "Strange Bob",
+ "Use strange bob", FALSE, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)
+ );
+
+ dim_class->fields_required = 4;
+ dim_class->name = "Motion Adaptive: Motion Search";
+ dim_class->nick = "tomsmocomp";
+ dim_class->latency = 1;
+
+#ifdef BUILD_X86_ASM
+ if (cpu_flags & OIL_IMPL_FLAG_MMXEXT) {
+ dim_class->deinterlace_frame = tomsmocompDScaler_MMXEXT;
+ } else if (cpu_flags & OIL_IMPL_FLAG_3DNOW) {
+ dim_class->deinterlace_frame = tomsmocompDScaler_3DNOW;
+ } else if (cpu_flags & OIL_IMPL_FLAG_MMX) {
+ dim_class->deinterlace_frame = tomsmocompDScaler_MMX;
+ } else {
+ dim_class->deinterlace_frame = tomsmocompDScaler_C;
+ }
+#else
+ dim_class->deinterlace_frame = tomsmocompDScaler_C;
+#endif
+}
+
+static void
+gst_deinterlace_method_tomsmocomp_init (GstDeinterlaceMethodTomsMoComp * self)
+{
+ self->search_effort = 5;
+ self->strange_bob = FALSE;
+}
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoop0A.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoop0A.inc
new file mode 100644
index 00000000..b1d9aeca
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoop0A.inc
@@ -0,0 +1,15 @@
+// -*- c++ -*-
+
+// Searches just the center pixel, in both the old
+// and new fields, but takes averages. This is an even
+// pixel address. Any chroma match will be used. (YUY2)
+// We best like finding 0 motion so we will bias everything we found previously
+// up by a little, and adjust later
+
+#ifdef IS_SSE2
+ "paddusb "_ONES", %%xmm7\n\t" // bias toward no motion
+#else
+ "paddusb "_ONES", %%mm7\n\t" // bias toward no motion
+#endif
+
+ MERGE4PIXavg("(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")") // center, in old and new
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopBottom.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopBottom.inc
new file mode 100644
index 00000000..e1560353
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopBottom.inc
@@ -0,0 +1,174 @@
+// -*- c++ -*-
+
+// Version for non-SSE2
+
+#ifndef IS_C
+
+#ifdef SKIP_SEARCH
+ "movq %%mm6, %%mm0\n\t" // just use the results of our wierd bob
+#else
+
+
+ // JA 9/Dec/2002
+ // failed experiment
+ // but leave in placeholder for me to play about
+#ifdef DONT_USE_STRANGE_BOB
+ // Use the best weave if diffs less than 10 as that
+ // means the image is still or moving cleanly
+ // if there is motion we will clip which will catch anything
+ "psubusb "_FOURS", %%mm7\n\t" // sets bits to zero if weave diff < 4
+ "pxor %%mm0, %%mm0\n\t"
+ "pcmpeqb %%mm0, %%mm7\n\t" // all ff where weave better, else 00
+ "pcmpeqb %%mm7, %%mm0\n\t" // all ff where bob better, else 00
+ "pand %%mm6, %%mm0\n\t" // use bob for these pixel values
+ "pand %%mm5, %%mm7\n\t" // use weave for these
+ "por %%mm7, %%mm0\n\t" // combine both
+#else
+ // Use the better of bob or weave
+ // pminub mm4, TENS // the most we care about
+ V_PMINUB ("%%mm4", _TENS, "%%mm0") // the most we care about
+
+ "psubusb %%mm4, %%mm7\n\t" // foregive that much from weave est?
+ "psubusb "_FOURS", %%mm7\n\t" // bias it a bit toward weave
+ "pxor %%mm0, %%mm0\n\t"
+ "pcmpeqb %%mm0, %%mm7\n\t" // all ff where weave better, else 00
+ "pcmpeqb %%mm7, %%mm0\n\t" // all ff where bob better, else 00
+ "pand %%mm6, %%mm0\n\t" // use bob for these pixel values
+ "pand %%mm5, %%mm7\n\t" // use weave for these
+ "por %%mm7, %%mm0\n\t" // combine both
+#endif
+
+
+ // pminub mm0, Max_Vals // but clip to catch the stray error
+ V_PMINUB ("%%mm0", _Max_Vals, "%%mm1") // but clip to catch the stray error
+ // pmaxub mm0, Min_Vals
+ V_PMAXUB ("%%mm0", _Min_Vals)
+
+#endif
+
+
+ MOVX" "_pDest", %%"XAX"\n\t"
+
+#ifdef USE_VERTICAL_FILTER
+ "movq %%mm0, %%mm1\n\t"
+ // pavgb mm0, qword ptr["XBX"]
+ V_PAVGB ("%%mm0", "(%%"XBX")", "%%mm2", _ShiftMask)
+ // movntq qword ptr["XAX"+"XDX"], mm0
+ V_MOVNTQ ("(%"XAX", %%"XDX")", "%%mm0")
+ // pavgb mm1, qword ptr["XBX"+"XCX"]
+ V_PAVGB ("%%mm1", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask)
+ //FIXME: XDX or XAX!!
+ "addq "_dst_pitchw", %%"XBX
+ // movntq qword ptr["XAX"+"XDX"], mm1
+ V_MOVNTQ ("(%%"XAX", %%"XDX")", "%%mm1")
+#else
+
+ // movntq qword ptr["XAX"+"XDX"], mm0
+ V_MOVNTQ ("(%%"XAX", %%"XDX")", "%%mm0")
+#endif
+
+ LEAX" 8(%%"XDX"), %%"XDX"\n\t" // bump offset pointer
+ CMPX" "_Last8", %%"XDX"\n\t" // done with line?
+ "jb 1b\n\t" // y
+
+ MOVX" "_oldbx", %%"XBX"\n\t"
+
+ : /* no outputs */
+
+ : "m"(pBob),
+ "m"(src_pitch2),
+ "m"(ShiftMask),
+ "m"(pDest),
+ "m"(dst_pitchw),
+ "m"(Last8),
+ "m"(pSrc),
+ "m"(pSrcP),
+ "m"(pBobP),
+ "m"(DiffThres),
+ "m"(Min_Vals),
+ "m"(Max_Vals),
+ "m"(FOURS),
+ "m"(TENS),
+ "m"(ONES),
+ "m"(UVMask),
+ "m"(Max_Mov),
+ "m"(YMask),
+ "m"(oldbx)
+
+ : XAX, XCX, XDX, XSI, XDI,
+ "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
+#ifdef __MMX__
+ "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
+#endif
+ "memory", "cc"
+ );
+
+ // adjust for next line
+ pSrc += src_pitch2;
+ pSrcP += src_pitch2;
+ pDest += dst_pitch2;
+ pBob += src_pitch2;
+ pBobP += src_pitch2;
+ }
+
+ return 0;
+#else
+#ifdef SKIP_SEARCH
+ out[0] = best[0]; // just use the results of our wierd bob
+ out[1] = best[1];
+#else
+ diff[0] = diff[0] - MIN (diff[0], 10) - 4;
+ diff[1] = diff[1] - MIN (diff[1] - 10) - 4;
+ if (diff[0] < 0)
+ out[0] = weave[0];
+ else
+ out[0] = best[0];
+
+ if (diff[1] < 0)
+ out[1] = weave[1];
+ else
+ out[1] = best[1];
+
+
+ out[0] = CLAMP (out[0], MinVals[0], MaxVals[0]);
+ out[1] = CLAMP (out[1], MinVals[1], MaxVals[1]);
+#endif
+
+#ifdef USE_VERTICAL_FILTER
+ pDest[x] = (out[0] + pBob[0]) / 2;
+ pDest[x + dst_pitchw] = (pBob[src_pitch2] + out[0]) / 2;
+ pDest[x + 1] = (out[1] + pBob[1]) / 2;
+ pDest[x + 1 + dst_pitchw] = (pBob[src_pitch2 + 1] + out[1]) / 2;
+#else
+ pDest[x] = out[0];
+ pDest[x+1] = out[1];
+#endif
+ pBob += 2;
+ pBobP += 2;
+ pSrc += 2;
+ pSrcP += 2;
+ }
+ // adjust for next line
+ pSrc = src_pitch2 * (y+1) + pWeaveSrc;
+ pSrcP = src_pitch2 * (y+1) + pWeaveSrcP;
+ pDest = dst_pitch2 * (y+1) + pWeaveDest + dst_pitch2;
+
+
+ if (TopFirst)
+ {
+ pBob = pCopySrc + src_pitch2;
+ pBobP = pCopySrcP + src_pitch2;
+ }
+ else
+ {
+ pBob = pCopySrc;
+ pBobP = pCopySrcP;
+ }
+
+ pBob += src_pitch2 * (y+1);
+ pBobP += src_pitch2 * (y+1);
+ }
+
+ return 0;
+
+#endif
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopEdgeA.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopEdgeA.inc
new file mode 100644
index 00000000..6208fe8c
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopEdgeA.inc
@@ -0,0 +1,11 @@
+// -*- c++ -*-
+
+// Searches 2 pixel to the left and right, in both the old
+// and new fields, but takes averages. These are even
+// pixel addresses. Chroma match will be used. (YUY2)
+ MERGE4PIXavg("-4(%%"XDI")", "4(%%"XSI", %%"XCX", 2)") // up left, down right
+ MERGE4PIXavg("4(%%"XDI")", "-4(%%"XSI", %%"XCX", 2)") // up right, down left
+ MERGE4PIXavg("-4(%%"XDI", %%"XCX")", "4(%%"XSI", %%"XCX")") // left, right
+ MERGE4PIXavg("4(%%"XDI", %%"XCX")", "-4(%%"XSI", %%"XCX")") // right, left
+ MERGE4PIXavg("-4(%%"XDI", %%"XCX", 2)", "4(%%"XSI")") // down left, up right
+ MERGE4PIXavg("4(%%"XDI", %%"XCX", 2)", "-4(%%"XSI")") // down right, up left
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopEdgeA8.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopEdgeA8.inc
new file mode 100644
index 00000000..2841c3f6
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopEdgeA8.inc
@@ -0,0 +1,12 @@
+// -*- c++ -*-
+
+// Searches 4 pixel to the left and right, in both the old
+// and new fields, but takes averages. These are even
+// pixel addresses. Chroma match will be used. (YUY2)
+ MERGE4PIXavg("-8(%%"XDI")", "8(%%"XSI", %%"XCX", 2)") // up left, down right
+ MERGE4PIXavg("8(%%"XDI")", "-8(%%"XSI", %%"XCX", 2)") // up right, down left
+ MERGE4PIXavg("-8(%%"XDI", %%"XCX")", "8(%%"XSI", %%"XCX")") // left, right
+ MERGE4PIXavg("8(%%"XDI", %%"XCX")", "-8(%%"XSI", %%"XCX")") // right, left
+ MERGE4PIXavg("-8(%%"XDI", %%"XCX", 2)", "8(%%"XSI")") // down left, up right
+ MERGE4PIXavg("8(%%"XDI", %%"XCX", 2)", "-8(%%"XSI")") // down right, up left
+
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA.inc
new file mode 100644
index 00000000..ab5375f4
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA.inc
@@ -0,0 +1,10 @@
+// -*- c++ -*-
+
+// Searches 1 pixel to the left and right, in both the old
+// and new fields, but takes averages. These are odd
+// pixel addresses. Any chroma match will not be used. (YUY2)
+ MERGE4PIXavg("-2(%%"XDI")", "2(%%"XSI", %%"XCX", 2)") // up left, down right
+ MERGE4PIXavg("2(%%"XDI")", "-2(%%"XSI", %%"XCX", 2)") // up right, down left
+ MERGE4PIXavg("-2(%%"XDI", %%"XCX", 2)", "2(%%"XSI")") // down left, up right
+ MERGE4PIXavg("2(%%"XDI", %%"XCX", 2)", "-2(%%"XSI")") // down right, up left
+#include "SearchLoopOddA2.inc"
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA2.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA2.inc
new file mode 100644
index 00000000..fd3f6fb0
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA2.inc
@@ -0,0 +1,5 @@
+// Searches 1 pixel to the left and right, in both the old
+// and new fields, but takes averages. These are odd
+// pixel addresses. Any chroma match will not be used. (YUY2)
+ MERGE4PIXavg("-2(%%"XDI", %%"XCX")", "2(%%"XSI", %%"XCX")") // left, right
+ MERGE4PIXavg("2(%%"XDI", %%"XCX")", "-2(%%"XSI", %%"XCX")") // right, left
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA6.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA6.inc
new file mode 100644
index 00000000..cbae014e
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA6.inc
@@ -0,0 +1,11 @@
+// -*- c++ -*-
+
+// Searches 3 pixels to the left and right, in both the old
+// and new fields, but takes averages. These are odd
+// pixel addresses. Any chroma match will not be used. (YUY2)
+ MERGE4PIXavg("-6(%%"XDI")", "6(%%"XSI", %%"XCX", 2)") // up left, down right
+ MERGE4PIXavg("6(%%"XDI")", "-6(%%"XSI", %%"XCX", 2)") // up right, down left
+ MERGE4PIXavg("-6(%%"XDI", %%"XCX")", "6(%%"XSI", %%"XCX")") // left, right
+ MERGE4PIXavg("6(%%"XDI", %%"XCX")", "-6(%%"XSI", %%"XCX")") // right, left
+ MERGE4PIXavg("-6(%%"XDI", %%"XCX", 2)", "6(%%"XSI")") // down left, up right
+ MERGE4PIXavg("6(%%"XDI", %%"XCX", 2)", "-6(%%"XSI")") // down right, up left
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddAH.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddAH.inc
new file mode 100644
index 00000000..e59e3c7e
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddAH.inc
@@ -0,0 +1,10 @@
+// Searches 1 pixel to the left and right, in both the old
+// and new fields, but takes v-half pel averages. These are odd
+// pixel addresses. Any chroma match will not be used. (YUY2)
+ __asm
+ {
+ MERGE4PIXavgH("XDI"-2, "XDI"+"XCX"-2, "XSI"+"XCX"+2, "XSI"+2*"XCX"+2) // up left, down right
+ MERGE4PIXavgH("XDI"+2, "XDI"+"XCX"+2, "XSI"+"XCX"-2, "XSI"+2*"XCX"-2) // up right, down left
+ MERGE4PIXavgH("XDI"+2*"XCX"-2, "XDI"+"XCX"-2, "XSI"+"XCX"+2, "XSI"+2) // down left, up right
+ MERGE4PIXavgH("XDI"+2*"XCX"+2, "XDI"+"XCX"+2, "XSI"+"XCX"-2, "XSI"-2) // down right, up left
+ }
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddAH2.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddAH2.inc
new file mode 100644
index 00000000..cd7d812a
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddAH2.inc
@@ -0,0 +1,5 @@
+// Searches 1 pixel to the left and right, in both the old
+// and new fields, but takes vertical averages. These are odd
+// pixel addresses. Any chroma match will not be used. (YUY2)
+ MERGE4PIXavgH("-2(%%"XDI", %%"XCX")", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "2(%%"XSI", %%"XCX")") // left, right
+ MERGE4PIXavgH("2(%%"XDI", %%"XCX")", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "-2(%%"XSI", %%"XCX")") // right, left
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopTop.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopTop.inc
new file mode 100644
index 00000000..9d6a490f
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopTop.inc
@@ -0,0 +1,254 @@
+// -*- c++ -*-
+
+unsigned char* pDest;
+const unsigned char* pSrcP;
+const unsigned char* pSrc;
+const unsigned char* pBob;
+const unsigned char* pBobP;
+
+// long is int32 on ARCH_368, int64 on ARCH_AMD64. Declaring it this way
+// saves a lot of xor's to delete 64bit garbage.
+
+#if defined(DBL_RESIZE) || defined(USE_FOR_DSCALER)
+long src_pitch2 = src_pitch; // even & odd lines are not interleaved in DScaler
+#else
+long src_pitch2 = 2 * src_pitch; // even & odd lines are interleaved in Avisynth
+#endif
+
+
+long dst_pitch2 = 2 * dst_pitch;
+long y;
+
+long Last8;
+
+ pSrc = pWeaveSrc; // points 1 weave line above
+ pSrcP = pWeaveSrcP; // "
+
+#ifdef DBL_RESIZE
+
+#ifdef USE_VERTICAL_FILTER
+ pDest = pWeaveDest + dst_pitch2;
+#else
+ pDest = pWeaveDest + 3*dst_pitch;
+#endif
+
+#else
+
+#ifdef USE_VERTICAL_FILTER
+ pDest = pWeaveDest + dst_pitch;
+#else
+ pDest = pWeaveDest + dst_pitch2;
+#endif
+
+#endif
+
+ if (TopFirst)
+ {
+ pBob = pCopySrc + src_pitch2; // remember one weave line just copied previously
+ pBobP = pCopySrcP + src_pitch2;
+ }
+ else
+ {
+ pBob = pCopySrc;
+ pBobP = pCopySrcP;
+ }
+
+#ifndef IS_C
+
+#ifndef _pBob
+#define _pBob "%0"
+#define _src_pitch2 "%1"
+#define _ShiftMask "%2"
+#define _pDest "%3"
+#define _dst_pitchw "%4"
+#define _Last8 "%5"
+#define _pSrc "%6"
+#define _pSrcP "%7"
+#define _pBobP "%8"
+#define _DiffThres "%9"
+#define _Min_Vals "%10"
+#define _Max_Vals "%11"
+#define _FOURS "%12"
+#define _TENS "%13"
+#define _ONES "%14"
+#define _UVMask "%15"
+#define _Max_Mov "%16"
+#define _YMask "%17"
+#define _oldbx "%18"
+#endif
+ Last8 = (rowsize-8);
+
+ for (y=1; y < FldHeight-1; y++)
+ {
+ long dst_pitchw = dst_pitch; // local stor so asm can ref
+ int64_t Max_Mov = 0x0404040404040404ull;
+ int64_t DiffThres = 0x0f0f0f0f0f0f0f0full;
+ int64_t YMask = 0x00ff00ff00ff00ffull; // keeps only luma
+ int64_t UVMask = 0xff00ff00ff00ff00ull; // keeps only chroma
+ int64_t TENS = 0x0a0a0a0a0a0a0a0aull;
+ int64_t FOURS = 0x0404040404040404ull;
+ int64_t ONES = 0x0101010101010101ull;
+ int64_t Min_Vals = 0x0000000000000000ull;
+ int64_t Max_Vals = 0x0000000000000000ull;
+ int64_t ShiftMask = 0xfefffefffefffeffull;
+
+ long oldbx;
+
+ // pretend it's indented -->>
+ __asm__ __volatile__
+ (
+ // Loop general reg usage
+ //
+ // XAX - pBobP, then pDest
+ // XBX - pBob
+ // XCX - src_pitch2
+ // XDX - current offset
+ // XDI - prev weave pixels, 1 line up
+ // XSI - next weave pixels, 1 line up
+
+ // Save "XBX" (-fPIC)
+ MOVX" %%"XBX", "_oldbx"\n\t"
+
+ // simple bob first 8 bytes
+ MOVX" "_pBob", %%"XBX"\n\t"
+ MOVX" "_src_pitch2", %%"XCX"\n\t"
+
+#ifdef USE_VERTICAL_FILTER
+ "movq (%%"XBX"), %%mm0\n\t"
+ "movq (%%"XBX", %%"XCX"), %%mm1\n\t" //, qword ptr["XBX"+"XCX"]
+ "movq %%mm0, %%mm2\n\t"
+ V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // halfway between
+ V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask) // 1/4 way
+ V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask) // 3/4 way
+ MOVX" "_pDest", %%"XDI"\n\t"
+ MOVX" "_dst_pitchw", %%"XAX"\n\t"
+ V_MOVNTQ ("(%%"XDI")", "%%mm0")
+ V_MOVNTQ ("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1
+
+ // simple bob last 8 bytes
+ MOVX" "_Last8", %%"XDX"\n\t"
+ LEAX" (%%"XBX", %%"XDX"), %%"XSI"\n\t" // ["XBX"+"XDX"]
+ "movq (%%"XSI"), %%mm0\n\t"
+ "movq (%%"XSI", %%"XCX"), %%mm1\n\t" // qword ptr["XSI"+"XCX"]
+ "movq %%mm0, %%mm2\n\t"
+ V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // halfway between
+ V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask) // 1/4 way
+ V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask) // 3/4 way
+ ADDX" %%"XDX", %%"XDI"\n\t" // last 8 bytes of dest
+ V_MOVNTQ ("%%"XDI"", "%%mm0")
+ V_MOVNTQ ("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1)
+
+#else
+ "movq (%%"XBX"), %%mm0\n\t"
+ // pavgb mm0, qword ptr["XBX"+"XCX"]
+ V_PAVGB ("%%mm0", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XBX"+"XCX"], mm2, ShiftMask)
+ MOVX" "_pDest", %%"XDI"\n\t"
+ V_MOVNTQ ("(%%"XDI")", "%%mm0")
+
+ // simple bob last 8 bytes
+ MOVX" "_Last8", %%"XDX"\n\t"
+ LEAX" (%%"XBX", %%"XDX"), %%"XSI"\n\t" //"XSI", ["XBX"+"XDX"]
+ "movq (%%"XSI"), %%mm0\n\t"
+ // pavgb mm0, qword ptr["XSI"+"XCX"]
+ V_PAVGB ("%%mm0", "(%%"XSI", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XSI"+"XCX"], mm2, ShiftMask)
+ V_MOVNTQ ("(%%"XDI", %%"XDX")", "%%mm0") // qword ptr["XDI"+"XDX"], mm0)
+#endif
+ // now loop and get the middle qwords
+ MOVX" "_pSrc", %%"XSI"\n\t"
+ MOVX" "_pSrcP", %%"XDI"\n\t"
+ MOVX" $8, %%"XDX"\n\t" // curr offset longo all lines
+
+ "1:\n\t"
+ MOVX" "_pBobP", %%"XAX"\n\t"
+ ADDX" $8, %%"XDI"\n\t"
+ ADDX" $8, %%"XSI"\n\t"
+ ADDX" $8, %%"XBX"\n\t"
+ ADDX" %%"XDX", %%"XAX"\n\t"
+
+#ifdef USE_STRANGE_BOB
+#include "StrangeBob.inc"
+#else
+#include "WierdBob.inc"
+#endif
+
+ // For non-SSE2:
+ // through out most of the rest of this loop we will maintain
+ // mm4 our min bob value
+ // mm5 best weave pixels so far
+ // mm6 our max Bob value
+ // mm7 best weighted pixel ratings so far
+
+ // We will keep a slight bias to using the weave pixels
+ // from the current location, by rating them by the min distance
+ // from the Bob value instead of the avg distance from that value.
+ // our best and only rating so far
+ "pcmpeqb %%mm7, %%mm7\n\t" // ffff, say we didn't find anything good yet
+
+#else
+ Last8 = (rowsize - 4);
+
+ for (y=1; y < FldHeight-1; y++)
+ {
+ #ifdef USE_STRANGE_BOB
+ long DiffThres = 0x0f;
+ #endif
+
+ #ifndef SKIP_SEARCH
+ long weave[2], MaxVals[2], MinVals[2];
+ #endif
+
+ long diff[2], best[2], avg[2], diff2[2], out[2], x;
+
+#ifdef USE_VERTICAL_FILTER
+ pDest[0] = (3 * pBob[0] + pBob[src_pitch2]) / 4;
+ pDest[1] = (3 * pBob[1] + pBob[src_pitch2 + 1]) / 4;
+ pDest[2] = (3 * pBob[2] + pBob[src_pitch2 + 2]) / 4;
+ pDest[3] = (3 * pBob[3] + pBob[src_pitch2 + 3]) / 4;
+ pDest[dst_pitchw] = (pBob[0] + 3 * pBob[src_pitch2]) / 4;
+ pDest[dst_pitchw + 1] = (pBob[1] + 3 * pBob[src_pitch2 + 1]) / 4;
+ pDest[dst_pitchw + 2] = (pBob[2] + 3 * pBob[src_pitch2 + 2]) / 4;
+ pDest[dst_pitchw + 3] = (pBob[3] + 3 * pBob[src_pitch2 + 3]) / 4;
+
+ // simple bob last byte
+ pDest[Last8] = (3 * pBob[Last8] + pBob[Last8 + src_pitch2]) / 4;
+ pDest[Last8 + 1] = (3 * pBob[Last8 + 1] + pBob[Last8 + src_pitch2 + 1]) / 4;
+ pDest[Last8 + 2] = (3 * pBob[Last8 + 2] + pBob[Last8 + src_pitch2 + 2]) / 4;
+ pDest[Last8 + 3] = (3 * pBob[Last8 + 3] + pBob[Last8 + src_pitch2 + 3]) / 4;
+ pDest[Last8 + src_pitch2] = (pBob[Last8] + 3 * pBob[Last8 + src_pitch2]) / 4;
+ pDest[Last8 + src_pitch2 + 1] = (pBob[Last8 + 1] + 3 * pBob[Last8 + src_pitch2 + 1]) / 4;
+ pDest[Last8 + src_pitch2 + 2] = (pBob[Last8 + 2] + 3 * pBob[Last8 + src_pitch2 + 2]) / 4;
+ pDest[Last8 + src_pitch2 + 3] = (pBob[Last8 + 3] + 3 * pBob[Last8 + src_pitch2 + 3]) / 4;
+#else
+ pDest[0] = (pBob[0] + pBob[src_pitch2 + 1]) / 2;
+ pDest[1] = (pBob[1] + pBob[src_pitch2 + 1]) / 2;
+ pDest[2] = (pBob[2] + pBob[src_pitch2 + 2]) / 2;
+ pDest[3] = (pBob[3] + pBob[src_pitch2 + 3]) / 2;
+
+ // simple bob last byte
+ pDest[Last8] = (pBob[Last8] + pBob[Last8 + src_pitch2]) / 2;
+ pDest[Last8 + 1] = (pBob[Last8 + 1] + pBob[Last8 + src_pitch2 + 1]) / 2;
+ pDest[Last8 + 2] = (pBob[Last8 + 2] + pBob[Last8 + src_pitch2 + 2]) / 2;
+ pDest[Last8 + 3] = (pBob[Last8 + 3] + pBob[Last8 + src_pitch2 + 3]) / 2;
+#endif
+
+ pBob += 4;
+ pBobP += 4;
+ pSrc += 4;
+ pSrcP += 4;
+
+ for (x=4; x < Last8; x += 2) {
+
+#ifdef USE_STRANGE_BOB
+#include "StrangeBob.inc"
+#else
+#include "WierdBob.inc"
+#endif
+
+ // We will keep a slight bias to using the weave pixels
+ // from the current location, by rating them by the min distance
+ // from the Bob value instead of the avg distance from that value.
+ // our best and only rating so far
+ diff[0] = diff[1] = 255;
+
+
+#endif
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopVA.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopVA.inc
new file mode 100644
index 00000000..3e3d19b5
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopVA.inc
@@ -0,0 +1,6 @@
+// -*- c++ -*-
+
+// Searches the center vertical line above center and below, in both the old
+// and new fields, but takes averages. These are even pixel addresses.
+ MERGE4PIXavg("(%%"XDI", %%"XCX", 2)", "(%%"XSI")") // down, up
+ MERGE4PIXavg("(%%"XDI")", "(%%"XSI", %%"XCX", 2)") // up, down
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopVAH.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopVAH.inc
new file mode 100644
index 00000000..33155bc1
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopVAH.inc
@@ -0,0 +1,6 @@
+// -*- c++ -*-
+
+// Searches the center vertical line above center and below, in both the old
+// and new fields, but takes averages. These are even pixel addresses.
+ MERGE4PIXavgH("(%%"XDI", %%"XCX", 2)", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "(%%"XSI")") // down, up
+ MERGE4PIXavgH("(%%"XDI")", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "(%%"XSI", %%"XCX", 2)") // up, down
diff --git a/gst/deinterlace/tvtime/tomsmocomp/StrangeBob.inc b/gst/deinterlace/tvtime/tomsmocomp/StrangeBob.inc
new file mode 100644
index 00000000..45b4c865
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/StrangeBob.inc
@@ -0,0 +1,435 @@
+// -*- c++ -*-
+
+ // First, get and save our possible Bob values
+ // Assume our pixels are layed out as follows with x the calc'd bob value
+ // and the other pixels are from the current field
+ //
+ // j a b c k current field
+ // x calculated line
+ // m d e f n current field
+ //
+ // we calc the bob value luma value as:
+ // if |j - n| < Thres && |a - m| > Thres
+ // avg(j,n)
+ // end if
+ // if |k - m| < Thres && |c - n| > Thres
+ // avg(k,m)
+ // end if
+ // if |c - d| < Thres && |b - f| > Thres
+ // avg(c,d)
+ // end if
+ // if |a - f| < Thres && |b - d| > Thres
+ // avg(a,f)
+ // end if
+ // if |b - e| < Thres
+ // avg(b,e)
+ // end if
+ // pickup any thing not yet set with avg(b,e)
+
+#ifndef IS_C
+
+ // j, n
+ "pxor %%mm5, %%mm5\n\t"
+ "pxor %%mm6, %%mm6\n\t"
+ "pxor %%mm7, %%mm7\n\t"
+
+ "movq -2(%%"XBX"), %%mm0\n\t" // value a from top left
+ "movq -4(%%"XBX", %%"XCX"), %%mm1\n\t" // value m from bottom right
+
+ "movq %%mm0, %%mm3\n\t"
+ "psubusb %%mm1, %%mm3\n\t"
+ "psubusb %%mm0, %%mm1\n\t"
+ "por %%mm1, %%mm3\n\t" // abs(a,m)
+
+ "psubusb "_DiffThres", %%mm3\n\t" // nonzero where abs(a,m) > Thres else 0
+ "pxor %%mm4, %%mm4\n\t"
+ "pcmpeqb %%mm4, %%mm3\n\t" // now ff where abs(a,m) < Thres, else 00
+ "pcmpeqb %%mm3, %%mm4\n\t" // here ff where abs(a,m) > Thres, else 00
+
+
+ "movq -4(%%"XBX"), %%mm0\n\t" // value j
+ "movq 4(%%"XBX", %%"XCX"), %%mm1\n\t" // value n
+ "movq %%mm0, %%mm2\n\t"
+ V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(j,n)
+ "movq %%mm0, %%mm3\n\t"
+ "psubusb %%mm1, %%mm0\n\t"
+ "psubusb %%mm3, %%mm1\n\t"
+ "por %%mm1, %%mm0\n\t" // abs(j,n)
+
+ "movq %%mm0, %%mm1\n\t"
+ "psubusb "_DiffThres", %%mm1\n\t" // nonzero where abs(j,n) > Thres else 0
+ "pxor %%mm3, %%mm3\n\t"
+ "pcmpeqb %%mm3, %%mm1\n\t" // now ff where abs(j,n) < Thres, else 00
+
+ "pand %%mm4, %%mm1\n\t"
+ "pand %%mm1, %%mm2\n\t"
+ "pand %%mm1, %%mm0\n\t"
+
+ "movq %%mm1, %%mm3\n\t"
+ "pxor %%mm5, %%mm3\n\t"
+ "pand %%mm3, %%mm6\n\t"
+ "pand %%mm3, %%mm7\n\t"
+ "pand %%mm3, %%mm5\n\t"
+
+ "por %%mm1, %%mm5\n\t"
+ "por %%mm2, %%mm6\n\t"
+ "por %%mm0, %%mm7\n\t"
+
+ // k & m
+ "movq 2(%%"XBX"), %%mm0\n\t" // value c from top left
+ "movq 4(%%"XBX", %%"XCX"), %%mm1\n\t" // value n from bottom right
+
+ "movq %%mm0, %%mm3\n\t"
+ "psubusb %%mm1, %%mm3\n\t"
+ "psubusb %%mm0, %%mm1\n\t"
+ "por %%mm1, %%mm3\n\t" // abs(c,n)
+
+ "psubusb "_DiffThres", %%mm3\n\t" // nonzero where abs(c,n) > Thres else 0
+ "pxor %%mm4, %%mm4\n\t"
+ "pcmpeqb %%mm4, %%mm3\n\t" // now ff where abs(c,n) < Thres, else 00
+ "pcmpeqb %%mm3, %%mm4\n\t" // here ff where abs(c,n) > Thres, else 00
+
+
+ "movq 4(%%"XBX"), %%mm0\n\t" // value k
+ "movq -4(%%"XBX", %%"XCX"), %%mm1\n\t" // value m
+ "movq %%mm0, %%mm2\n\t"
+ V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(k,m)
+ "movq %%mm0, %%mm3\n\t"
+ "psubusb %%mm1, %%mm0\n\t"
+ "psubusb %%mm3, %%mm1\n\t"
+ "por %%mm1, %%mm0\n\t" // abs(k,m)
+
+ "movq %%mm0, %%mm1\n\t"
+ "psubusb "_DiffThres", %%mm1\n\t" // nonzero where abs(k,m) > Thres else 0
+ "pxor %%mm3, %%mm3\n\t"
+ "pcmpeqb %%mm3, %%mm1\n\t" // now ff where abs(k,m) < Thres, else 00
+
+ "pand %%mm4, %%mm1\n\t"
+
+ "pand %%mm1, %%mm2\n\t"
+ "pand %%mm1, %%mm0\n\t"
+
+ "movq %%mm1, %%mm3\n\t"
+ "pxor %%mm5, %%mm3\n\t"
+ "pand %%mm3, %%mm6\n\t"
+ "pand %%mm3, %%mm7\n\t"
+ "pand %%mm3, %%mm5\n\t"
+
+ "por %%mm1, %%mm5\n\t"
+ "por %%mm2, %%mm6\n\t"
+ "por %%mm0, %%mm7\n\t"
+
+
+ // c & d
+ "movq (%%"XBX"), %%mm0\n\t" // value b from top left
+ "movq 2(%%"XBX", %%"XCX"), %%mm1\n\t" // value f from bottom right
+
+ "movq %%mm0, %%mm3\n\t"
+ "psubusb %%mm1, %%mm3\n\t"
+ "psubusb %%mm0, %%mm1\n\t"
+ "por %%mm1, %%mm3\n\t" // abs(b,f)
+
+ "psubusb "_DiffThres", %%mm3\n\t" // nonzero where abs(b,f) > Thres else 0
+ "pxor %%mm4, %%mm4\n\t"
+ "pcmpeqb %%mm4, %%mm3\n\t" // now ff where abs(b,f) < Thres, else 00
+ "pcmpeqb %%mm3, %%mm4\n\t" // here ff where abs(b,f) > Thres, else 00
+
+ "movq 2(%%"XBX"), %%mm0\n\t" // value c
+ "movq -2(%%"XBX", %%"XCX"), %%mm1\n\t" // value d
+ "movq %%mm0, %%mm2\n\t"
+ V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(c,d)
+ "movq %%mm0, %%mm3\n\t"
+ "psubusb %%mm1, %%mm0\n\t"
+ "psubusb %%mm3, %%mm1\n\t"
+ "por %%mm1, %%mm0\n\t" // abs(c,d)
+
+ "movq %%mm0, %%mm1\n\t"
+ "psubusb "_DiffThres", %%mm1\n\t" // nonzero where abs(c,d) > Thres else 0
+ "pxor %%mm3, %%mm3\n\t"
+ "pcmpeqb %%mm3, %%mm1\n\t" // now ff where abs(c,d) < Thres, else 00
+
+ "pand %%mm4, %%mm1\n\t"
+
+ "pand %%mm1, %%mm2\n\t"
+ "pand %%mm1, %%mm0\n\t"
+
+ "movq %%mm1, %%mm3\n\t"
+ "pxor %%mm5, %%mm3\n\t"
+ "pand %%mm3, %%mm6\n\t"
+ "pand %%mm3, %%mm7\n\t"
+ "pand %%mm3, %%mm5\n\t"
+
+ "por %%mm1, %%mm5\n\t"
+ "por %%mm2, %%mm6\n\t"
+ "por %%mm0, %%mm7\n\t"
+
+ // a & f
+ "movq (%%"XBX"), %%mm0\n\t" // value b from top left
+ "movq -2(%%"XBX", %%"XCX"), %%mm1\n\t" // value d from bottom right
+
+ "movq %%mm0, %%mm3\n\t"
+ "psubusb %%mm1, %%mm3\n\t"
+ "psubusb %%mm0, %%mm1\n\t"
+ "por %%mm1, %%mm3\n\t" // abs(b,d)
+
+ "psubusb "_DiffThres", %%mm3\n\t" // nonzero where abs(b,d) > Thres else 0
+ "pxor %%mm4, %%mm4\n\t"
+ "pcmpeqb %%mm4, %%mm3\n\t" // now ff where abs(b,d) < Thres, else 00
+ "pcmpeqb %%mm3, %%mm4\n\t" // here ff where abs(b,d) > Thres, else 00
+
+ "movq -2(%%"XBX"), %%mm0\n\t" // value a
+ "movq 2(%%"XBX", %%"XCX"), %%mm1\n\t" // value f
+ "movq %%mm0, %%mm2\n\t"
+ V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(a,f)
+ "movq %%mm0, %%mm3\n\t"
+ "psubusb %%mm1, %%mm0\n\t"
+ "psubusb %%mm3, %%mm1\n\t"
+ "por %%mm1, %%mm0\n\t" // abs(a,f)
+
+ "movq %%mm0, %%mm1\n\t"
+ "psubusb "_DiffThres", %%mm1\n\t" // nonzero where abs(a,f) > Thres else 0
+ "pxor %%mm3, %%mm3\n\t"
+ "pcmpeqb %%mm3, %%mm1\n\t" // now ff where abs(a,f) < Thres, else 00
+
+ "pand %%mm4, %%mm1\n\t"
+
+ "pand %%mm1, %%mm2\n\t"
+ "pand %%mm1, %%mm0\n\t"
+
+ "movq %%mm1, %%mm3\n\t"
+ "pxor %%mm5, %%mm3\n\t"
+ "pand %%mm3, %%mm6\n\t"
+ "pand %%mm3, %%mm7\n\t"
+ "pand %%mm3, %%mm5\n\t"
+
+ "por %%mm1, %%mm5\n\t"
+ "por %%mm2, %%mm6\n\t"
+ "por %%mm0, %%mm7\n\t"
+
+ "pand "_YMask", %%mm5\n\t" // mask out chroma from here
+ "pand "_YMask", %%mm6\n\t" // mask out chroma from here
+ "pand "_YMask", %%mm7\n\t" // mask out chroma from here
+
+ // b,e
+ "movq (%%"XBX"), %%mm0\n\t" // value b from top
+ "movq (%%"XBX", %%"XCX"), %%mm1\n\t" // value e from bottom
+ "movq %%mm0, %%mm2\n\t"
+ V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(b,e)
+ "movq %%mm0, %%mm3\n\t"
+ "psubusb %%mm1, %%mm0\n\t"
+ "psubusb %%mm3, %%mm1\n\t"
+ "por %%mm1, %%mm0\n\t" // abs(b,e)
+
+ "movq %%mm0, %%mm1\n\t"
+ "psubusb "_DiffThres", %%mm1\n\t" // nonzero where abs(b,e) > Thres else 0
+ "pxor %%mm3, %%mm3\n\t"
+ "pcmpeqb %%mm3, %%mm1\n\t" // now ff where abs(b,e) < Thres, else 00
+
+ "pand %%mm1, %%mm2\n\t"
+ "pand %%mm1, %%mm0\n\t"
+
+ "movq %%mm1, %%mm3\n\t"
+ "pxor %%mm5, %%mm3\n\t"
+ "pand %%mm3, %%mm6\n\t"
+ "pand %%mm3, %%mm7\n\t"
+ "pand %%mm3, %%mm5\n\t"
+
+ "por %%mm1, %%mm5\n\t"
+ "por %%mm2, %%mm6\n\t"
+ "por %%mm0, %%mm7\n\t"
+
+ // bob in any leftovers
+ "movq (%%"XBX"), %%mm0\n\t" // value b from top
+ "movq (%%"XBX", %%"XCX"), %%mm1\n\t" // value e from bottom
+
+
+// We will also calc here the max/min values to later limit comb
+// so the max excursion will not exceed the Max_Comb constant
+
+#ifdef SKIP_SEARCH
+ "movq %%mm0, %%mm2\n\t"
+// pminub %%mm2, %%mm1
+ V_PMINUB ("%%mm2", "%%mm1", "%%mm4")
+
+// pmaxub %%mm6, %%mm2 // clip our current results so far to be above this
+ V_PMAXUB ("%%mm6", "%%mm2")
+ "movq %%mm0, %%mm2\n\t"
+ V_PMAXUB ("%%mm2", "%%mm1")
+// pminub %%mm6, %%mm2 // clip our current results so far to be below this
+ V_PMINUB ("%%mm6", "%%mm2", "%%mm4")
+
+#else
+ "movq %%mm0, %%mm2\n\t"
+ "movq (%%"XAX"), %%mm4\n\t"
+ "psubusb %%mm4, %%mm2\n\t"
+ "psubusb %%mm0, %%mm4\n\t"
+ "por %%mm2, %%mm4\n\t" // abs diff
+
+ "movq %%mm1, %%mm2\n\t"
+ "movq (%%"XAX", %%"XCX"), %%mm3\n\t"
+ "psubusb %%mm3, %%mm2\n\t"
+ "psubusb %%mm1, %%mm3\n\t"
+ "por %%mm2, %%mm3\n\t" // abs diff
+// pmaxub %%mm3, %%mm4 // top or bottom pixel moved most
+ V_PMAXUB ("%%mm3", "%%mm4") // top or bottom pixel moved most
+ "psubusb "_DiffThres", %%mm3\n\t" // moved more than allowed? or goes to 0?
+ "pxor %%mm4, %%mm4\n\t"
+ "pcmpeqb %%mm4, %%mm3\n\t" // now ff where low motion, else high motion
+
+ "movq %%mm0, %%mm2\n\t"
+// pminub %%mm2, %%mm1
+ V_PMINUB ("%%mm2", "%%mm1", "%%mm4")
+
+// pmaxub %%mm6, %%mm2 // clip our current results so far to be above this
+ V_PMAXUB ("%%mm6", "%%mm2")
+
+ "psubusb %%mm3, %%mm2\n\t" // maybe decrease it to 0000.. if no surround motion
+ "movq %%mm2, "_Min_Vals"\n\t"
+
+ "movq %%mm0, %%mm2\n\t"
+ V_PMAXUB ("%%mm2", "%%mm1")
+// pminub %%mm6, %%mm2 // clip our current results so far to be below this
+ V_PMINUB ("%%mm6", "%%mm2", "%%mm4")
+ "paddusb %%mm3, %%mm2\n\t" // maybe increase it to ffffff if no surround motion
+ "movq %%mm2, "_Max_Vals"\n\t"
+#endif
+
+ "movq %%mm0, %%mm2\n\t"
+// pavgb %%mm2, %%mm1 // avg(b,e)
+ V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(b,e)
+
+ "movq %%mm0, %%mm3\n\t"
+ "psubusb %%mm1, %%mm3\n\t"
+ "psubusb %%mm0, %%mm1\n\t"
+ "por %%mm1, %%mm3\n\t" // abs(b,e)
+ "movq %%mm3, %%mm1\n\t" // keep copy of diffs
+
+ "pxor %%mm4, %%mm4\n\t"
+ "psubusb %%mm7, %%mm3\n\t" // nonzero where new weights bigger, else 0
+ "pcmpeqb %%mm4, %%mm3\n\t" // now ff where new better, else 00
+ "pcmpeqb %%mm0, %%mm0\n\t"
+ "pandn %%mm0, %%mm5\n\t"
+ "por %%mm5, %%mm3\n\t"
+ "pcmpeqb %%mm3, %%mm4\n\t" // here ff where old better, else 00
+
+ "pand %%mm3, %%mm1\n\t"
+ "pand %%mm3, %%mm2\n\t"
+
+ "pand %%mm4, %%mm6\n\t"
+ "pand %%mm4, %%mm7\n\t"
+
+ "por %%mm2, %%mm6\n\t" // our x2 value
+ "por %%mm1, %%mm7\n\t" // our x2 diffs
+ "movq %%mm7, %%mm4\n\t" // save as bob uncertainty indicator
+
+#else
+
+ diff[0] = -1;
+ diff[1] = -1;
+ best[0] = 0;
+ best[1] = 0;
+ // j, n
+ if (ABS (pBob[-2] - pBob[src_pitch2 - 4]) < DiffThres &&
+ ABS (pBob[-4] - pBob[src_pitch2 + 4]) > DiffThres) {
+ best[0] = (pBob[-2] + pBob[src_pitch2 - 4]) / 2;
+ diff[0] = ABS (pBob[-2] - pBob[src_pitch2 - 4]);
+ }
+ if (ABS (pBob[-1] - pBob[src_pitch2 - 3]) < DiffThres &&
+ ABS (pBob[-3] - pBob[src_pitch2 + 5]) > DiffThres) {
+ best[1] = (pBob[-1] + pBob[src_pitch2 - 3]) / 2;
+ diff[1] = ABS (pBob[-1] - pBob[src_pitch2 - 3]);
+ }
+
+ // k & m
+ if (ABS (pBob[2] - pBob[src_pitch2 + 4]) < DiffThres &&
+ ABS (pBob[4] - pBob[src_pitch2 - 4]) > DiffThres) {
+ best[0] = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
+ diff[0] = ABS (pBob[4] - pBob[src_pitch2 - 4]);
+ }
+
+ if (ABS (pBob[3] - pBob[src_pitch2 + 5]) < DiffThres &&
+ ABS (pBob[5] - pBob[src_pitch2 - 3]) > DiffThres) {
+ best[1] = (pBob[5] + pBob[src_pitch2 - 3]) / 2;
+ diff[1] = ABS (pBob[5] - pBob[src_pitch2 - 3]);
+ }
+
+ // c & d
+ if (ABS (pBob[0] - pBob[src_pitch2 + 2]) < DiffThres &&
+ ABS (pBob[2] - pBob[src_pitch2 - 2]) > DiffThres) {
+ best[0] = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
+ diff[0] = ABS (pBob[2] - pBob[src_pitch2 - 2]);
+ }
+
+ if (ABS (pBob[1] - pBob[src_pitch2 + 3]) < DiffThres &&
+ ABS (pBob[3] - pBob[src_pitch2 - 1]) > DiffThres) {
+ best[1] = (pBob[3] + pBob[src_pitch2 - 1]) / 2;
+ diff[1] = ABS (pBob[3] - pBob[src_pitch2 - 1]);
+ }
+
+ // a & f
+ if (ABS (pBob[0] - pBob[src_pitch2 - 2]) < DiffThres &&
+ ABS (pBob[-2] - pBob[src_pitch2 + 2]) > DiffThres) {
+ best[0] = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
+ diff[0] = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
+ }
+
+ if (ABS (pBob[1] - pBob[src_pitch2 - 1]) < DiffThres &&
+ ABS (pBob[-1] - pBob[src_pitch2 + 3]) > DiffThres) {
+ best[1] = (pBob[-1] + pBob[src_pitch2 + 3]) / 2;
+ diff[1] = ABS (pBob[-1] - pBob[src_pitch2 + 3]);
+ }
+
+ // b,e
+ if (ABS (pBob[0] - pBob[src_pitch2]) < DiffThres) {
+ best[0] = (pBob[0] + pBob[src_pitch2]) / 2;
+ diff[0] = ABS (pBob[0] - pBob[src_pitch2]);
+ }
+
+ if (ABS (pBob[1] - pBob[src_pitch2 + 1]) < DiffThres) {
+ best[1] = (pBob[1] + pBob[src_pitch2 + 1]) / 2;
+ diff[1] = ABS (pBob[1] - pBob[src_pitch2 + 1]);
+ }
+
+
+// We will also calc here the max/min values to later limit comb
+// so the max excursion will not exceed the Max_Comb constant
+
+#ifdef SKIP_SEARCH
+ best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+ best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
+#else
+ mov[0] = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
+ mov[1] = MAX (ABS (pBob[1] - pBobP[1]), ABS (pBob[src_pitch2 + 1] - pBobP[src_pitch2 + 1]));
+
+ MinVals[0] = 0;
+ MinVals[1] = 0;
+ MaxVals[0] = 255;
+ MaxVals[1] = 255;
+ if (mov[0] > DiffThres) {
+ MinVals[0] = MAX (MIN (pBob[0], pBob[src_pitch2]), best[0]);
+ MaxVals[0] = MIN (MAX (pBob[0], pBob[src_pitch2]), best[0]);
+ }
+
+ if (mov[1] > DiffThres) {
+ MinVals[1] = MAX (MIN (pBob[1], pBob[src_pitch2+1]), best[1]);
+ MaxVals[1] = MIN (MAX (pBob[1], pBob[src_pitch2+1]), best[1]);
+ }
+
+ best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+ best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
+#endif
+ avg[0] = (pBob[src_pitch2] + pBob[0]) / 2;
+ avg[1] = (pBob[src_pitch2 + 1] + pBob[1]) / 2;
+ diff2[0] = ABS (pBob[src_pitch2 + 1] - pBob[1]);
+ diff2[1] = ABS (pBob[src_pitch2 + 1] - pBob[1]);
+
+ if (diff[0] == -1 || diff2[0] < diff[0]) {
+ best[0] = avg[0];
+ diff[0] = diff2[0];
+ }
+
+ if (diff[1] == -1 || diff2[1] < diff[1]) {
+ best[1] = avg[1];
+ diff[1] = diff2[1];
+ }
+#endif
diff --git a/gst/deinterlace/tvtime/tomsmocomp/TomsMoCompAll.inc b/gst/deinterlace/tvtime/tomsmocomp/TomsMoCompAll.inc
new file mode 100644
index 00000000..e8883dd3
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/TomsMoCompAll.inc
@@ -0,0 +1,241 @@
+/*
+ * GStreamer
+ * Copyright (c) 2002 Tom Barry All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Relicensed for GStreamer from GPL to LGPL with permit from Tom Barry.
+ * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
+ */
+
+
+#ifndef TopFirst
+#define TopFirst IsOdd
+#endif
+
+#ifdef SEFUNC
+#undef SEFUNC
+#endif
+
+#if defined(IS_MMXEXT)
+#define SEFUNC(x) Search_Effort_MMXEXT_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight)
+#elif defined(IS_3DNOW)
+#define SEFUNC(x) Search_Effort_3DNOW_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight)
+#elif defined(IS_MMX)
+#define SEFUNC(x) Search_Effort_MMX_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight)
+#else
+#define SEFUNC(x) Search_Effort_C_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight)
+#endif
+
+#include "TomsMoCompAll2.inc"
+
+#define USE_STRANGE_BOB
+
+#include "TomsMoCompAll2.inc"
+
+#undef USE_STRANGE_BOB
+
+#undef SEFUNC
+#if defined(IS_MMXEXT)
+#define SEFUNC(x) Search_Effort_MMXEXT_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight)
+#elif defined(IS_3DNOW)
+#define SEFUNC(x) Search_Effort_3DNOW_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight)
+#elif defined(IS_MMX)
+#define SEFUNC(x) Search_Effort_MMX_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight)
+#else
+#define SEFUNC(x) Search_Effort_C_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight)
+#endif
+
+void FUNCT_NAME(GstDeinterlaceMethod *d_method, GstDeinterlace* object, GstBuffer *outbuf)
+{
+ GstDeinterlaceMethodTomsMoComp *self = GST_DEINTERLACE_METHOD_TOMSMOCOMP (d_method);
+ long SearchEffort = self->search_effort;
+ int UseStrangeBob = self->strange_bob;
+ int IsOdd;
+ const unsigned char *pWeaveSrc;
+ const unsigned char *pWeaveSrcP;
+ unsigned char *pWeaveDest;
+ const unsigned char *pCopySrc;
+ const unsigned char *pCopySrcP;
+ unsigned char *pCopyDest;
+ int src_pitch;
+ int dst_pitch;
+ int rowsize;
+ int FldHeight;
+
+ /* double stride do address just every odd/even scanline */
+ src_pitch = object->field_stride;
+ dst_pitch = object->row_stride;
+ rowsize = object->row_stride;
+ FldHeight = object->field_height;
+
+ pCopySrc = GST_BUFFER_DATA(object->field_history[object->history_count-1].buf);
+ pCopySrcP = GST_BUFFER_DATA(object->field_history[object->history_count-3].buf);
+ pWeaveSrc = GST_BUFFER_DATA(object->field_history[object->history_count-2].buf);
+ pWeaveSrcP = GST_BUFFER_DATA(object->field_history[object->history_count-4].buf);
+
+ /* use bottom field and interlace top field */
+ if (object->field_history[object->history_count-2].flags == PICTURE_INTERLACED_BOTTOM) {
+ IsOdd = 1;
+
+ // if we have an odd field we copy an even field and weave an odd field
+ pCopyDest = GST_BUFFER_DATA(outbuf);
+ pWeaveDest = pCopyDest + dst_pitch;
+ }
+ /* do it vice verca */
+ else {
+
+ IsOdd = 0;
+ // if we have an even field we copy an odd field and weave an even field
+ pCopyDest = GST_BUFFER_DATA(outbuf) + dst_pitch;
+ pWeaveDest = GST_BUFFER_DATA(outbuf);
+ }
+
+
+ // copy 1st and last weave lines
+ Fieldcopy(pWeaveDest, pCopySrc, rowsize,
+ 1, dst_pitch*2, src_pitch);
+ Fieldcopy(pWeaveDest+(FldHeight-1)*dst_pitch*2,
+ pCopySrc+(FldHeight-1)*src_pitch, rowsize,
+ 1, dst_pitch*2, src_pitch);
+
+#ifdef USE_VERTICAL_FILTER
+ // Vertical Filter currently not implemented for DScaler !!
+ // copy 1st and last lines the copy field
+ Fieldcopy(pCopyDest, pCopySrc, rowsize,
+ 1, dst_pitch*2, src_pitch);
+ Fieldcopy(pCopyDest+(FldHeight-1)*dst_pitch*2,
+ pCopySrc+(FldHeight-1)*src_pitch, rowsize,
+ 1, dst_pitch*2, src_pitch);
+#else
+
+ // copy all of the copy field
+ Fieldcopy(pCopyDest, pCopySrc, rowsize,
+ FldHeight, dst_pitch*2, src_pitch);
+#endif
+ // then go fill in the hard part, being variously lazy depending upon
+ // SearchEffort
+
+ if(!UseStrangeBob) {
+ if (SearchEffort == 0)
+ {
+ SEFUNC(0);
+ }
+ else if (SearchEffort <= 1)
+ {
+ SEFUNC(1);
+ }
+ /* else if (SearchEffort <= 2)
+ {
+ SEFUNC(2);
+ }
+ */
+ else if (SearchEffort <= 3)
+ {
+ SEFUNC(3);
+ }
+ else if (SearchEffort <= 5)
+ {
+ SEFUNC(5);
+ }
+ else if (SearchEffort <= 9)
+ {
+ SEFUNC(9);
+ }
+ else if (SearchEffort <= 11)
+ {
+ SEFUNC(11);
+ }
+ else if (SearchEffort <= 13)
+ {
+ SEFUNC(13);
+ }
+ else if (SearchEffort <= 15)
+ {
+ SEFUNC(15);
+ }
+ else if (SearchEffort <= 19)
+ {
+ SEFUNC(19);
+ }
+ else if (SearchEffort <= 21)
+ {
+ SEFUNC(21);
+ }
+ else
+ {
+ SEFUNC(Max);
+ }
+ }
+ else
+ {
+ if (SearchEffort == 0)
+ {
+ SEFUNC(0SB);
+ }
+ else if (SearchEffort <= 1)
+ {
+ SEFUNC(1SB);
+ }
+ /* else if (SearchEffort <= 2)
+ {
+ SEFUNC(2SB);
+ }
+ */
+ else if (SearchEffort <= 3)
+ {
+ SEFUNC(3SB);
+ }
+ else if (SearchEffort <= 5)
+ {
+ SEFUNC(5SB);
+ }
+ else if (SearchEffort <= 9)
+ {
+ SEFUNC(9SB);
+ }
+ else if (SearchEffort <= 11)
+ {
+ SEFUNC(11SB);
+ }
+ else if (SearchEffort <= 13)
+ {
+ SEFUNC(13SB);
+ }
+ else if (SearchEffort <= 15)
+ {
+ SEFUNC(15SB);
+ }
+ else if (SearchEffort <= 19)
+ {
+ SEFUNC(19SB);
+ }
+ else if (SearchEffort <= 21)
+ {
+ SEFUNC(21SB);
+ }
+ else
+ {
+ SEFUNC(MaxSB);
+ }
+ }
+
+#if defined(BUILD_X86_ASM) && !defined(IS_C)
+ __asm__ __volatile__("emms");
+#endif
+}
diff --git a/gst/deinterlace/tvtime/tomsmocomp/TomsMoCompAll2.inc b/gst/deinterlace/tvtime/tomsmocomp/TomsMoCompAll2.inc
new file mode 100644
index 00000000..f6344eab
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/TomsMoCompAll2.inc
@@ -0,0 +1,243 @@
+// -*- c++ -*-
+
+#ifdef SEARCH_EFFORT_FUNC
+#undef SEARCH_EFFORT_FUNC
+#endif
+
+#ifdef USE_STRANGE_BOB
+#define SEARCH_EFFORT_FUNC(n) SEFUNC(n##SB)
+#else
+#define SEARCH_EFFORT_FUNC(n) SEFUNC(n)
+#endif
+
+static inline int SEARCH_EFFORT_FUNC(0) // we don't try at all ;-)
+{
+ //see Search_Effort_Max() for comments
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+}
+
+static inline int SEARCH_EFFORT_FUNC(1)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+ //see Search_Effort_Max() for comments
+#include "SearchLoopTop.inc"
+ RESET_CHROMA // pretend chroma diffs was 255 each
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+static inline int SEARCH_EFFORT_FUNC(3)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+ //see Search_Effort_Max() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA2.inc"
+ RESET_CHROMA // pretend chroma diffs was 255 each
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+static inline int SEARCH_EFFORT_FUNC(5)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+ //see Search_Effort_Max() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA2.inc"
+#include "SearchLoopOddAH2.inc"
+ RESET_CHROMA // pretend chroma diffs was 255 each
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+// 3x3 search
+static inline int SEARCH_EFFORT_FUNC(9)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+ //see SearchEffortMax() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA.inc"
+ RESET_CHROMA // pretend chroma diffs was 255 each
+#include "SearchLoopVA.inc"
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+// Search 9 with 2 H-half pels added
+static inline int SEARCH_EFFORT_FUNC(11)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+ //see SearchEffortMax() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA.inc"
+#include "SearchLoopOddAH2.inc"
+ RESET_CHROMA // pretend chroma diffs was 255 each
+#include "SearchLoopVA.inc"
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+// Search 11 with 2 V-half pels added
+static inline int SEARCH_EFFORT_FUNC(13)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+ //see SearchEffortMax() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA.inc"
+#include "SearchLoopOddAH2.inc"
+ RESET_CHROMA // pretend chroma diffs was 255 each
+#include "SearchLoopVAH.inc"
+#include "SearchLoopVA.inc"
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+// 5x3
+static inline int SEARCH_EFFORT_FUNC(15)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+ //see SearchEffortMax() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA.inc"
+ RESET_CHROMA // pretend chroma diffs was 255 each
+#include "SearchLoopEdgeA.inc"
+#include "SearchLoopVA.inc"
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+// 5x3 + 4 half pels
+static inline int SEARCH_EFFORT_FUNC(19)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+ //see SearchEffortMax() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA.inc"
+#include "SearchLoopOddAH2.inc"
+ RESET_CHROMA // pretend chroma diffs was 255 each
+#include "SearchLoopEdgeA.inc"
+#include "SearchLoopVAH.inc"
+#include "SearchLoopVA.inc"
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+// Handle one 4x1 block of pixels
+// Search a 7x3 area, no half pels
+
+static inline int SEARCH_EFFORT_FUNC(21)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+ //see SearchLoopTop.inc for comments
+#include "SearchLoopTop.inc"
+
+ // odd addresses -- the pixels at odd address wouldn't generate
+ // good luma values but we will mask those off
+
+#include "SearchLoopOddA6.inc" // 4 odd v half pels, 3 to left & right
+#include "SearchLoopOddA.inc" // 6 odd pels, 1 to left & right
+
+ RESET_CHROMA // pretend chroma diffs was 255 each
+
+ // even addresses -- use both luma and chroma from these
+ // search averages of 2 pixels left and right
+#include "SearchLoopEdgeA.inc"
+ // search vertical line and averages, -1,0,+1
+#include "SearchLoopVA.inc"
+ // blend our results and loop
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+// Handle one 4x1 block of pixels
+// Search a 9x3 area, no half pels
+static inline int SEARCH_EFFORT_FUNC(Max)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+ //see SearchLoopTop.inc for comments
+#include "SearchLoopTop.inc"
+
+ // odd addresses -- the pixels at odd address wouldn't generate
+ // good luma values but we will mask those off
+
+#include "SearchLoopOddA6.inc" // 4 odd v half pels, 3 to left & right
+#include "SearchLoopOddA.inc" // 6 odd pels, 1 to left & right
+
+ RESET_CHROMA // pretend chroma diffs was 255 each
+
+ // even addresses -- use both luma and chroma from these
+ // search averages of 4 pixels left and right
+#include "SearchLoopEdgeA8.inc"
+ // search averages of 2 pixels left and right
+#include "SearchLoopEdgeA.inc"
+ // search vertical line and averages, -1,0,+1
+#include "SearchLoopVA.inc"
+ // blend our results and loop
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+#undef SEARCH_EFFORT_FUNC
+
diff --git a/gst/deinterlace/tvtime/tomsmocomp/WierdBob.inc b/gst/deinterlace/tvtime/tomsmocomp/WierdBob.inc
new file mode 100644
index 00000000..f4bbb830
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/WierdBob.inc
@@ -0,0 +1,286 @@
+// -*- c++ -*-
+
+ // First, get and save our possible Bob values
+ // Assume our pixels are layed out as follows with x the calc'd bob value
+ // and the other pixels are from the current field
+ //
+ // j a b c k current field
+ // x calculated line
+ // m d e f n current field
+ //
+ // we calc the bob value as:
+ // x2 = either avg(a,f), avg(c,d), avg(b,e), avg(j,n), or avg(k,m)
+
+ // selected for the smallest of abs(a,f), abs(c,d), or abs(b,e), etc.
+
+#ifndef IS_C
+ // a,f
+ "movq -2(%%"XBX"), %%mm0\n\t" // value a from top left
+ "movq 2(%%"XBX", %%"XCX"), %%mm1\n\t" // value f from bottom right
+ "movq %%mm0, %%mm6\n\t"
+// pavgb %%mm6, %%mm1 // avg(a,f), also best so far
+ V_PAVGB ("%%mm6", "%%mm1", "%%mm7", _ShiftMask) // avg(a,f), also best so far
+ "movq %%mm0, %%mm7\n\t"
+ "psubusb %%mm1, %%mm7\n\t"
+ "psubusb %%mm0, %%mm1\n\t"
+ "por %%mm1, %%mm7\n\t" // abs diff, also best so far
+
+ // c,d
+ "movq 2(%%"XBX"), %%mm0\n\t" // value a from top left
+ "movq -2(%%"XBX", %%"XCX"), %%mm1\n\t" // value f from bottom right
+ "movq %%mm0, %%mm2\n\t"
+// pavgb %%mm2, %%mm1 // avg(c,d)
+ V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(c,d)
+ "movq %%mm0, %%mm3\n\t"
+ "psubusb %%mm1, %%mm3\n\t"
+ "psubusb %%mm0, %%mm1\n\t"
+ "por %%mm1, %%mm3\n\t" // abs(c,d)
+ "movq %%mm3, %%mm1\n\t" // keep copy
+
+ "psubusb %%mm7, %%mm3\n\t" // nonzero where new weights bigger, else 0
+ "pxor %%mm4, %%mm4\n\t"
+ "pcmpeqb %%mm4, %%mm3\n\t" // now ff where new better, else 00
+ "pcmpeqb %%mm3, %%mm4\n\t" // here ff where old better, else 00
+
+ "pand %%mm3, %%mm1\n\t" // keep only better new avg and abs
+ "pand %%mm3, %%mm2\n\t"
+
+ "pand %%mm4, %%mm6\n\t"
+ "pand %%mm4, %%mm7\n\t"
+
+ "por %%mm2, %%mm6\n\t" // and merge new & old vals keeping best
+ "por %%mm1, %%mm7\n\t"
+ "por "_UVMask", %%mm7\n\t" // but we know chroma is worthless so far
+ "pand "_YMask", %%mm5\n\t" // mask out chroma from here also
+
+ // j,n
+ "movq -4(%%"XBX"), %%mm0\n\t" // value j from top left
+ "movq 4(%%"XBX", %%"XCX"), %%mm1\n\t" // value n from bottom right
+ "movq %%mm0, %%mm2\n\t"
+// pavgb %%mm2, %%mm1 // avg(j,n)
+ V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(j,n)
+ "movq %%mm0, %%mm3\n\t"
+ "psubusb %%mm1, %%mm3\n\t"
+ "psubusb %%mm0, %%mm1\n\t"
+ "por %%mm1, %%mm3\n\t" // abs(j-n)
+ "movq %%mm3, %%mm1\n\t" // keep copy
+
+ "psubusb %%mm7, %%mm3\n\t" // nonzero where new weights bigger, else 0
+ "pxor %%mm4, %%mm4\n\t"
+ "pcmpeqb %%mm4, %%mm3\n\t" // now ff where new better, else 00
+ "pcmpeqb %%mm3, %%mm4\n\t" // here ff where old better, else 00
+
+ "pand %%mm3, %%mm1\n\t" // keep only better new avg and abs
+ "pand %%mm2, %%mm3\n\t"
+
+ "pand %%mm4, %%mm6\n\t"
+ "pand %%mm4, %%mm7\n\t"
+
+ "por %%mm3, %%mm6\n\t" // and merge new & old vals keeping best
+ "por %%mm1, %%mm7\n\t" // "
+
+ // k, m
+ "movq 4(%%"XBX"), %%mm0\n\t" // value k from top right
+ "movq -4(%%"XBX", %%"XCX"), %%mm1\n\t" // value n from bottom left
+ "movq %%mm0, %%mm4\n\t"
+// pavgb %%mm4, %%mm1 // avg(k,m)
+ V_PAVGB ("%%mm4", "%%mm1", "%%mm3", _ShiftMask) // avg(k,m)
+
+ "movq %%mm0, %%mm3\n\t"
+ "psubusb %%mm1, %%mm3\n\t"
+ "psubusb %%mm0, %%mm1\n\t"
+ "por %%mm1, %%mm3\n\t" // abs(k,m)
+ "movq %%mm3, %%mm1\n\t" // keep copy
+
+ "movq %%mm4, %%mm2\n\t" // avg(k,m)
+
+ "psubusb %%mm7, %%mm3\n\t" // nonzero where new weights bigger, else 0
+ "pxor %%mm4, %%mm4\n\t"
+ "pcmpeqb %%mm4, %%mm3\n\t" // now ff where new better, else 00
+ "pcmpeqb %%mm3, %%mm4\n\t" // here ff where old better, else 00
+
+ "pand %%mm3, %%mm1\n\t" // keep only better new avg and abs
+ "pand %%mm2, %%mm3\n\t"
+
+ "pand %%mm4, %%mm6\n\t"
+ "pand %%mm4, %%mm7\n\t"
+
+ "por %%mm3, %%mm6\n\t" // and merge new & old vals keeping best
+ "por %%mm1, %%mm7\n\t" // "
+
+ // b,e
+ "movq (%%"XBX"), %%mm0\n\t" // value b from top
+ "movq (%%"XBX", %%"XCX"), %%mm1\n\t" // value e from bottom
+
+// We will also calc here the max/min values to later limit comb
+// so the max excursion will not exceed the Max_Comb constant
+
+#ifdef SKIP_SEARCH
+ "movq %%mm0, %%mm2\n\t"
+// pminub %%mm2, %%mm1
+ V_PMINUB ("%%mm2", "%%mm1", "%%mm4")
+
+// pmaxub %%mm6, %%mm2 // clip our current results so far to be above this
+ V_PMAXUB ("%%mm6", "%%mm2")
+ "movq %%mm0, %%mm2\n\t"
+ V_PMAXUB ("%%mm2", "%%mm1")
+// pminub %%mm6, %%mm2 // clip our current results so far to be below this
+ V_PMINUB ("%%mm6", "%%mm2", "%%mm4")
+
+#else
+ "movq %%mm0, %%mm2\n\t"
+ "movq (%%"XAX"), %%mm4\n\t"
+ "psubusb %%mm4, %%mm2\n\t"
+ "psubusb %%mm0, %%mm4\n\t"
+ "por %%mm2, %%mm4\n\t" // abs diff
+
+ "movq %%mm1, %%mm2\n\t"
+ "movq (%%"XAX", %%"XCX"), %%mm3\n\t"
+ "psubusb %%mm3, %%mm2\n\t"
+ "psubusb %%mm1, %%mm3\n\t"
+ "por %%mm2, %%mm3\n\t" // abs diff
+// pmaxub %%mm3, %%mm4 // top or bottom pixel moved most
+ V_PMAXUB ("%%mm3", "%%mm4") // top or bottom pixel moved most
+ "psubusb "_Max_Mov", %%mm3\n\t" // moved more than allowed? or goes to 0?
+ "pxor %%mm4, %%mm4\n\t"
+ "pcmpeqb %%mm4, %%mm3\n\t" // now ff where low motion, else high motion
+
+ "movq %%mm0, %%mm2\n\t"
+// pminub %%mm2, %%mm1
+ V_PMINUB ("%%mm2", "%%mm1", "%%mm4")
+
+// pmaxub %%mm6, %%mm2 // clip our current results so far to be above this
+ V_PMAXUB ("%%mm6", "%%mm2")
+
+ "psubusb %%mm3, %%mm2\n\t" // maybe decrease it to 0000.. if no surround motion
+ "movq %%mm2, "_Min_Vals"\n\t"
+
+ "movq %%mm0, %%mm2\n\t"
+ V_PMAXUB ("%%mm2", "%%mm1")
+// pminub %%mm6, %%mm2 // clip our current results so far to be below this
+ V_PMINUB ("%%mm6", "%%mm2", "%%mm4")
+ "paddusb %%mm3, %%mm2\n\t" // maybe increase it to ffffff if no surround motion
+ "movq %%mm2, "_Max_Vals"\n\t"
+#endif
+
+ "movq %%mm0, %%mm2\n\t"
+// pavgb %%mm2, %%mm1 // avg(b,e)
+ V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(b,e)
+
+ "movq %%mm0, %%mm3\n\t"
+ "psubusb %%mm1, %%mm3\n\t"
+ "psubusb %%mm0, %%mm1\n\t"
+ "por %%mm1, %%mm3\n\t" // abs(c,d)
+ "movq %%mm3, %%mm1\n\t" // keep copy of diffs
+
+ "pxor %%mm4, %%mm4\n\t"
+ "psubusb %%mm7, %%mm3\n\t" // nonzero where new weights bigger, else 0
+ "pcmpeqb %%mm4, %%mm3\n\t" // now ff where new better, else 00
+
+ "pcmpeqb %%mm3, %%mm4\n\t" // here ff where old better, else 00
+
+ "pand %%mm3, %%mm1\n\t"
+ "pand %%mm3, %%mm2\n\t"
+
+ "pand %%mm4, %%mm6\n\t"
+ "pand %%mm4, %%mm7\n\t"
+
+ "por %%mm2, %%mm6\n\t" // our x2 value
+ "por %%mm1, %%mm7\n\t" // our x2 diffs
+ "movq %%mm7, %%mm4\n\t" // save as bob uncertainty indicator
+
+#else
+
+ // a,f
+ best[0] = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
+ diff[0] = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
+ best[1] = (pBob[-1] + pBob[src_pitch2 + 3]) / 2;
+ diff[1] = ABS (pBob[-1] - pBob[src_pitch2 + 3]);
+
+ // c,d
+ if (ABS (pBob[2] - pBob[src_pitch2 - 2]) < diff[0]) {
+ best[0] = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
+ diff[0] = ABS (pBob[2] - pBob[src_pitch2 - 2]);
+ }
+
+ if (ABS (pBob[3] - pBob[src_pitch2 - 1]) < diff[1]) {
+ best[1] = (pBob[3] + pBob[src_pitch2 - 1]) / 2;
+ diff[1] = ABS (pBob[3] - pBob[src_pitch2 - 1]);
+ }
+
+ // j,n
+ if (ABS (pBob[-4] - pBob[src_pitch2 + 4]) < diff[0]) {
+ best[0] = (pBob[-4] + pBob[src_pitch2 + 4]) / 2;
+ diff[0] = ABS (pBob[-4] - pBob[src_pitch2 + 4]);
+ }
+
+ if (ABS (pBob[-3] - pBob[src_pitch2 + 5]) < diff[1]) {
+ best[1] = (pBob[-3] + pBob[src_pitch2 + 5]) / 2;
+ diff[1] = ABS (pBob[-3] - pBob[src_pitch2 + 5]);
+ }
+
+ // k,m
+ if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff[0]) {
+ best[0] = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
+ diff[0] = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
+ }
+
+ if (ABS (pBob[5] - pBob[src_pitch2 - 3]) < diff[1]) {
+ best[1] = (pBob[5] + pBob[src_pitch2 - 3]) / 2;
+ diff[1] = ABS (pBob[-3] - pBob[src_pitch2 - 3]);
+ }
+ // k,m
+ if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff[0]) {
+ best[0] = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
+ diff[0] = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
+ }
+
+ if (ABS (pBob[5] - pBob[src_pitch2 - 3]) < diff[1]) {
+ best[1] = (pBob[5] + pBob[src_pitch2 - 3]) / 2;
+ diff[1] = ABS (pBob[-3] - pBob[src_pitch2 - 3]);
+ }
+
+// We will also calc here the max/min values to later limit comb
+// so the max excursion will not exceed the Max_Comb constant
+
+#ifdef SKIP_SEARCH
+ best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+ best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
+#else
+ mov[0] = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
+ mov[1] = MAX (ABS (pBob[1] - pBobP[1]), ABS (pBob[src_pitch2 + 1] - pBobP[src_pitch2 + 1]));
+
+ MinVals[0] = 0;
+ MinVals[1] = 0;
+ MaxVals[0] = 255;
+ MaxVals[1] = 255;
+
+ if (mov[0] > Max_Mov[0]) {
+ MinVals[0] = MAX (MIN (pBob[0], pBob[src_pitch2]), best[0]);
+ MaxVals[0] = MIN (MAX (pBob[0], pBob[src_pitch2]), best[0]);
+ }
+
+ if (mov[1] > Max_Mov[1]) {
+ MinVals[1] = MAX (MIN (pBob[1], pBob[src_pitch2 + 1]), best[1]);
+ MaxVals[1] = MIN (MAX (pBob[1], pBob[src_pitch2 + 1]), best[1]);
+ }
+
+ best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+ best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
+#endif
+
+ avg[0] = (pBob[src_pitch2] + pBob[0]) / 2;
+ avg[1] = (pBob[src_pitch2 + 1] + pBob[1]) / 2;
+ diff2[0] = ABS (pBob[src_pitch2] - pBob[0]);
+ diff2[1] = ABS (pBob[src_pitch2 + 1] - pBob[1]);
+
+ if (diff2[0] < diff[0]) {
+ best[0] = avg[0];
+ diff[0] = diff2[0];
+ }
+
+ if (diff2[1] < diff[1]) {
+ best[1] = avg[1];
+ diff[1] = diff2[1];
+ }
+#endif
diff --git a/gst/deinterlace/tvtime/tomsmocomp/tomsmocompmacros.h b/gst/deinterlace/tvtime/tomsmocomp/tomsmocompmacros.h
new file mode 100644
index 00000000..7e8147ec
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/tomsmocompmacros.h
@@ -0,0 +1,164 @@
+#include <string.h>
+#include <math.h>
+
+// Define a few macros for CPU dependent instructions.
+// I suspect I don't really understand how the C macro preprocessor works but
+// this seems to get the job done. // TRB 7/01
+
+// BEFORE USING THESE YOU MUST SET:
+
+// #define SIMD_TYPE MMXEXT (or MMX or 3DNOW)
+
+// some macros for pavgb instruction
+// V_PAVGB(mmr1, mmr2, mmr work register, smask) mmr2 may = mmrw if you can trash it
+
+#define V_PAVGB_MMX(mmr1, mmr2, mmrw, smask) \
+ "movq "mmr2", "mmrw"\n\t" \
+ "pand "smask", "mmrw"\n\t" \
+ "psrlw $1, "mmrw"\n\t" \
+ "pand "smask", "mmr1"\n\t" \
+ "psrlw $1, "mmr1"\n\t" \
+ "paddusb "mmrw", "mmr1"\n\t"
+#define V_PAVGB_MMXEXT(mmr1, mmr2, mmrw, smask) "pavgb "mmr2", "mmr1"\n\t"
+#define V_PAVGB_3DNOW(mmr1, mmr2, mmrw, smask) "pavgusb "mmr2", "mmr1"\n\t"
+#define V_PAVGB(mmr1, mmr2, mmrw, smask) V_PAVGB2(mmr1, mmr2, mmrw, smask, SIMD_TYPE)
+#define V_PAVGB2(mmr1, mmr2, mmrw, smask, simd_type) V_PAVGB3(mmr1, mmr2, mmrw, smask, simd_type)
+#define V_PAVGB3(mmr1, mmr2, mmrw, smask, simd_type) V_PAVGB_##simd_type(mmr1, mmr2, mmrw, smask)
+
+// some macros for pmaxub instruction
+#define V_PMAXUB_MMX(mmr1, mmr2) \
+ "psubusb "mmr2", "mmr1"\n\t" \
+ "paddusb "mmr2", "mmr1"\n\t"
+#define V_PMAXUB_MMXEXT(mmr1, mmr2) "pmaxub "mmr2", "mmr1"\n\t"
+#define V_PMAXUB_3DNOW(mmr1, mmr2) V_PMAXUB_MMX(mmr1, mmr2) // use MMX version
+#define V_PMAXUB(mmr1, mmr2) V_PMAXUB2(mmr1, mmr2, SIMD_TYPE)
+#define V_PMAXUB2(mmr1, mmr2, simd_type) V_PMAXUB3(mmr1, mmr2, simd_type)
+#define V_PMAXUB3(mmr1, mmr2, simd_type) V_PMAXUB_##simd_type(mmr1, mmr2)
+
+// some macros for pminub instruction
+// V_PMINUB(mmr1, mmr2, mmr work register) mmr2 may NOT = mmrw
+#define V_PMINUB_MMX(mmr1, mmr2, mmrw) \
+ "pcmpeqb "mmrw", "mmrw"\n\t" \
+ "psubusb "mmr2", "mmrw"\n\t" \
+ "paddusb "mmrw", "mmr1"\n\t" \
+ "psubusb "mmrw", "mmr1"\n\t"
+#define V_PMINUB_MMXEXT(mmr1, mmr2, mmrw) "pminub "mmr2", "mmr1"\n\t"
+#define V_PMINUB_3DNOW(mmr1, mmr2, mmrw) V_PMINUB_MMX(mmr1, mmr2, mmrw) // use MMX version
+#define V_PMINUB(mmr1, mmr2, mmrw) V_PMINUB2(mmr1, mmr2, mmrw, SIMD_TYPE)
+#define V_PMINUB2(mmr1, mmr2, mmrw, simd_type) V_PMINUB3(mmr1, mmr2, mmrw, simd_type)
+#define V_PMINUB3(mmr1, mmr2, mmrw, simd_type) V_PMINUB_##simd_type(mmr1, mmr2, mmrw)
+
+// some macros for movntq instruction
+// V_MOVNTQ(mmr1, mmr2)
+#define V_MOVNTQ_MMX(mmr1, mmr2) "movq "mmr2", "mmr1"\n\t"
+#define V_MOVNTQ_3DNOW(mmr1, mmr2) "movq "mmr2", "mmr1"\n\t"
+#define V_MOVNTQ_MMXEXT(mmr1, mmr2) "movntq "mmr2", "mmr1"\n\t"
+#define V_MOVNTQ(mmr1, mmr2) V_MOVNTQ2(mmr1, mmr2, SIMD_TYPE)
+#define V_MOVNTQ2(mmr1, mmr2, simd_type) V_MOVNTQ3(mmr1, mmr2, simd_type)
+#define V_MOVNTQ3(mmr1, mmr2, simd_type) V_MOVNTQ_##simd_type(mmr1, mmr2)
+
+// end of macros
+
+#ifdef IS_SSE2
+
+#define MERGE4PIXavg(PADDR1, PADDR2) \
+ "movdqu "PADDR1", %%xmm0\n\t" /* our 4 pixels */ \
+ "movdqu "PADDR2", %%xmm1\n\t" /* our pixel2 value */ \
+ "movdqa %%xmm0, %%xmm2\n\t" /* another copy of our pixel1 value */ \
+ "movdqa %%xmm1, %%xmm3\n\t" /* another copy of our pixel1 value */ \
+ "psubusb %%xmm1, %%xmm2\n\t" \
+ "psubusb %%xmm0, %%xmm3\n\t" \
+ "por %%xmm3, %%xmm2\n\t" \
+ "pavgb %%xmm1, %%xmm0\n\t" /* avg of 2 pixels */ \
+ "movdqa %%xmm2, %%xmm3\n\t" /* another copy of our our weights */ \
+ "pxor %%xmm1, %%xmm1\n\t" \
+ "psubusb %%xmm7, %%xmm3\n\t" /* nonzero where old weights lower, else 0 */ \
+ "pcmpeqb %%xmm1, %%xmm3\n\t" /* now ff where new better, else 00 */ \
+ "pcmpeqb %%xmm3, %%xmm1\n\t" /* here ff where old better, else 00 */ \
+ "pand %%xmm3, %%xmm0\n\t" /* keep only better new pixels */ \
+ "pand %%xmm3, %%xmm2\n\t" /* and weights */ \
+ "pand %%xmm1, %%xmm5\n\t" /* keep only better old pixels */ \
+ "pand %%xmm1, %%xmm7\n\t" \
+ "por %%xmm0, %%xmm5\n\t" /* and merge new & old vals */ \
+ "por %%xmm2, %%xmm7\n\t"
+
+#define MERGE4PIXavgH(PADDR1A, PADDR1B, PADDR2A, PADDR2B) \
+ "movdqu "PADDR1A", %%xmm0\n\t" /* our 4 pixels */ \
+ "movdqu "PADDR2A", %%xmm1\n\t" /* our pixel2 value */ \
+ "movdqu "PADDR1B", %%xmm2\n\t" /* our 4 pixels */ \
+ "movdqu "PADDR2B", %%xmm3\n\t" /* our pixel2 value */ \
+ "pavgb %%xmm2, %%xmm0\n\t" \
+ "pavgb %%xmm3, %%xmm1\n\t" \
+ "movdqa %%xmm0, %%xmm2\n\t" /* another copy of our pixel1 value */ \
+ "movdqa %%xmm1, %%xmm3\n\t" /* another copy of our pixel1 value */ \
+ "psubusb %%xmm1, %%xmm2\n\t" \
+ "psubusb %%xmm0, %%xmm3\n\t" \
+ "por %%xmm3, %%xmm2\n\t" \
+ "pavgb %%xmm1, %%xmm0\n\t" /* avg of 2 pixels */ \
+ "movdqa %%xmm2, %%xmm3\n\t" /* another copy of our our weights */ \
+ "pxor %%xmm1, %%xmm1\n\t" \
+ "psubusb %%xmm7, %%xmm3\n\t" /* nonzero where old weights lower, else 0 */ \
+ "pcmpeqb %%xmm1, %%xmm3\n\t" /* now ff where new better, else 00 */ \
+ "pcmpeqb %%xmm3, %%xmm1\n\t" /* here ff where old better, else 00 */ \
+ "pand %%xmm3, %%xmm0\n\t" /* keep only better new pixels */ \
+ "pand %%xmm3, %%xmm2\n\t" /* and weights */ \
+ "pand %%xmm1, %%xmm5\n\t" /* keep only better old pixels */ \
+ "pand %%xmm1, %%xmm7\n\t" \
+ "por %%xmm0, %%xmm5\n\t" /* and merge new & old vals */ \
+ "por %%xmm2, %%xmm7\n\t"
+
+#define RESET_CHROMA "por "_UVMask", %%xmm7\n\t"
+
+#else // ifdef IS_SSE2
+
+#define MERGE4PIXavg(PADDR1, PADDR2) \
+ "movq "PADDR1", %%mm0\n\t" /* our 4 pixels */ \
+ "movq "PADDR2", %%mm1\n\t" /* our pixel2 value */ \
+ "movq %%mm0, %%mm2\n\t" /* another copy of our pixel1 value */ \
+ "movq %%mm1, %%mm3\n\t" /* another copy of our pixel1 value */ \
+ "psubusb %%mm1, %%mm2\n\t" \
+ "psubusb %%mm0, %%mm3\n\t" \
+ "por %%mm3, %%mm2\n\t" \
+ V_PAVGB ("%%mm0", "%%mm1", "%%mm3", _ShiftMask) /* avg of 2 pixels */ \
+ "movq %%mm2, %%mm3\n\t" /* another copy of our our weights */ \
+ "pxor %%mm1, %%mm1\n\t" \
+ "psubusb %%mm7, %%mm3\n\t" /* nonzero where old weights lower, else 0 */ \
+ "pcmpeqb %%mm1, %%mm3\n\t" /* now ff where new better, else 00 */ \
+ "pcmpeqb %%mm3, %%mm1\n\t" /* here ff where old better, else 00 */ \
+ "pand %%mm3, %%mm0\n\t" /* keep only better new pixels */ \
+ "pand %%mm3, %%mm2\n\t" /* and weights */ \
+ "pand %%mm1, %%mm5\n\t" /* keep only better old pixels */ \
+ "pand %%mm1, %%mm7\n\t" \
+ "por %%mm0, %%mm5\n\t" /* and merge new & old vals */ \
+ "por %%mm2, %%mm7\n\t"
+
+#define MERGE4PIXavgH(PADDR1A, PADDR1B, PADDR2A, PADDR2B) \
+ "movq "PADDR1A", %%mm0\n\t" /* our 4 pixels */ \
+ "movq "PADDR2A", %%mm1\n\t" /* our pixel2 value */ \
+ "movq "PADDR1B", %%mm2\n\t" /* our 4 pixels */ \
+ "movq "PADDR2B", %%mm3\n\t" /* our pixel2 value */ \
+ V_PAVGB("%%mm0", "%%mm2", "%%mm2", _ShiftMask) \
+ V_PAVGB("%%mm1", "%%mm3", "%%mm3", _ShiftMask) \
+ "movq %%mm0, %%mm2\n\t" /* another copy of our pixel1 value */ \
+ "movq %%mm1, %%mm3\n\t" /* another copy of our pixel1 value */ \
+ "psubusb %%mm1, %%mm2\n\t" \
+ "psubusb %%mm0, %%mm3\n\t" \
+ "por %%mm3, %%mm2\n\t" \
+ V_PAVGB("%%mm0", "%%mm1", "%%mm3", _ShiftMask) /* avg of 2 pixels */ \
+ "movq %%mm2, %%mm3\n\t" /* another copy of our our weights */ \
+ "pxor %%mm1, %%mm1\n\t" \
+ "psubusb %%mm7, %%mm3\n\t" /* nonzero where old weights lower, else 0 */ \
+ "pcmpeqb %%mm1, %%mm3\n\t" /* now ff where new better, else 00 */ \
+ "pcmpeqb %%mm3, %%mm1\n\t" /* here ff where old better, else 00 */ \
+ "pand %%mm3, %%mm0\n\t" /* keep only better new pixels */ \
+ "pand %%mm3, %%mm2\n\t" /* and weights */ \
+ "pand %%mm1, %%mm5\n\t" /* keep only better old pixels */ \
+ "pand %%mm1, %%mm7\n\t" \
+ "por %%mm0, %%mm5\n\t" /* and merge new & old vals */ \
+ "por %%mm2, %%mm7\n\t"
+
+#define RESET_CHROMA "por "_UVMask", %%mm7\n\t"
+
+#endif
+
+
diff --git a/gst/deinterlace/tvtime/vfir.c b/gst/deinterlace/tvtime/vfir.c
new file mode 100644
index 00000000..b3ebaae1
--- /dev/null
+++ b/gst/deinterlace/tvtime/vfir.c
@@ -0,0 +1,187 @@
+/*
+ *
+ * GStreamer
+ * Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
+ * Copyright (c) 2001, 2002, 2003 Fabrice Bellard.
+ * Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * This file contains code from ffmpeg, see http://ffmpeg.org/ (LGPL)
+ * and modifications by Billy Biggs.
+ *
+ * Relicensed for GStreamer from GPL to LGPL with permit from Billy Biggs.
+ * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "_stdint.h"
+#include "gstdeinterlace.h"
+#include <string.h>
+
+#define GST_TYPE_DEINTERLACE_METHOD_VFIR (gst_deinterlace_method_vfir_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_VFIR(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_VFIR))
+#define GST_IS_DEINTERLACE_METHOD_VFIR_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_VFIR))
+#define GST_DEINTERLACE_METHOD_VFIR_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_VFIR, GstDeinterlaceMethodVFIRClass))
+#define GST_DEINTERLACE_METHOD_VFIR(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_VFIR, GstDeinterlaceMethodVFIR))
+#define GST_DEINTERLACE_METHOD_VFIR_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_VFIR, GstDeinterlaceMethodVFIRClass))
+#define GST_DEINTERLACE_METHOD_VFIR_CAST(obj) ((GstDeinterlaceMethodVFIR*)(obj))
+
+GType gst_deinterlace_method_vfir_get_type (void);
+
+typedef GstDeinterlaceSimpleMethod GstDeinterlaceMethodVFIR;
+
+typedef GstDeinterlaceSimpleMethodClass GstDeinterlaceMethodVFIRClass;
+
+/*
+ * The MPEG2 spec uses a slightly harsher filter, they specify
+ * [-1 8 2 8 -1]. ffmpeg uses a similar filter but with more of
+ * a tendancy to blur than to use the local information. The
+ * filter taps here are: [-1 4 2 4 -1].
+ */
+
+/**
+ * C implementation.
+ */
+static inline void
+deinterlace_line_c (GstDeinterlaceMethod * self, GstDeinterlace * parent,
+ guint8 * dst, GstDeinterlaceScanlineData * scanlines, gint width)
+{
+ gint sum;
+ guint8 *lum_m4 = scanlines->tt1;
+ guint8 *lum_m3 = scanlines->t0;
+ guint8 *lum_m2 = scanlines->m1;
+ guint8 *lum_m1 = scanlines->b0;
+ guint8 *lum = scanlines->bb1;
+ gint size = width * 2;
+
+ for (; size >= 0; size--) {
+ sum = -lum_m4[0];
+ sum += lum_m3[0] << 2;
+ sum += lum_m2[0] << 1;
+ sum += lum_m1[0] << 2;
+ sum += -lum[0];
+ dst[0] = (sum + 4) >> 3; // This needs to be clipped at 0 and 255: cm[(sum + 4) >> 3];
+ lum_m4++;
+ lum_m3++;
+ lum_m2++;
+ lum_m1++;
+ lum++;
+ dst++;
+ }
+}
+
+#ifdef BUILD_X86_ASM
+#include "mmx.h"
+static void
+deinterlace_line_mmx (GstDeinterlaceMethod * self, GstDeinterlace * parent,
+ guint8 * dst, GstDeinterlaceScanlineData * scanlines, gint width)
+{
+ mmx_t rounder;
+ guint8 *lum_m4 = scanlines->tt1;
+ guint8 *lum_m3 = scanlines->t0;
+ guint8 *lum_m2 = scanlines->m1;
+ guint8 *lum_m1 = scanlines->b0;
+ guint8 *lum = scanlines->bb1;
+
+ rounder.uw[0] = 4;
+ rounder.uw[1] = 4;
+ rounder.uw[2] = 4;
+ rounder.uw[3] = 4;
+ pxor_r2r (mm7, mm7);
+ movq_m2r (rounder, mm6);
+
+ for (; width > 1; width -= 2) {
+ movd_m2r (*lum_m4, mm0);
+ movd_m2r (*lum_m3, mm1);
+ movd_m2r (*lum_m2, mm2);
+ movd_m2r (*lum_m1, mm3);
+ movd_m2r (*lum, mm4);
+ punpcklbw_r2r (mm7, mm0);
+ punpcklbw_r2r (mm7, mm1);
+ punpcklbw_r2r (mm7, mm2);
+ punpcklbw_r2r (mm7, mm3);
+ punpcklbw_r2r (mm7, mm4);
+ paddw_r2r (mm3, mm1);
+ psllw_i2r (1, mm2);
+ paddw_r2r (mm4, mm0);
+ psllw_i2r (2, mm1); // 2
+ paddw_r2r (mm6, mm2);
+ paddw_r2r (mm2, mm1);
+ psubusw_r2r (mm0, mm1);
+ psrlw_i2r (3, mm1); // 3
+ packuswb_r2r (mm7, mm1);
+ movd_r2m (mm1, *dst);
+ lum_m4 += 4;
+ lum_m3 += 4;
+ lum_m2 += 4;
+ lum_m1 += 4;
+ lum += 4;
+ dst += 4;
+ }
+ emms ();
+
+ /* Handle odd widths */
+ if (width > 0) {
+ scanlines->tt1 = lum_m4;
+ scanlines->t0 = lum_m3;
+ scanlines->m1 = lum_m2;
+ scanlines->b0 = lum_m1;
+ scanlines->bb1 = lum;
+
+ deinterlace_line_c (self, parent, dst, scanlines, width);
+ }
+}
+#endif
+
+G_DEFINE_TYPE (GstDeinterlaceMethodVFIR, gst_deinterlace_method_vfir,
+ GST_TYPE_DEINTERLACE_SIMPLE_METHOD);
+
+static void
+gst_deinterlace_method_vfir_class_init (GstDeinterlaceMethodVFIRClass * klass)
+{
+ GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+ GstDeinterlaceSimpleMethodClass *dism_class =
+ (GstDeinterlaceSimpleMethodClass *) klass;
+#ifdef BUILD_X86_ASM
+ guint cpu_flags = oil_cpu_get_flags ();
+#endif
+
+ dim_class->fields_required = 2;
+ dim_class->name = "Blur Vertical";
+ dim_class->nick = "vfir";
+ dim_class->latency = 0;
+
+#ifdef BUILD_X86_ASM
+ if (cpu_flags & OIL_IMPL_FLAG_MMX) {
+ dism_class->interpolate_scanline = deinterlace_line_mmx;
+ } else {
+ dism_class->interpolate_scanline = deinterlace_line_c;
+ }
+#else
+ dism_class->interpolate_scanline = deinterlace_line_c;
+#endif
+}
+
+static void
+gst_deinterlace_method_vfir_init (GstDeinterlaceMethodVFIR * self)
+{
+}
diff --git a/gst/deinterlace/tvtime/weave.c b/gst/deinterlace/tvtime/weave.c
new file mode 100644
index 00000000..1a86170e
--- /dev/null
+++ b/gst/deinterlace/tvtime/weave.c
@@ -0,0 +1,82 @@
+/**
+ * Weave frames
+ * Copyright (C) 2002 Billy Biggs <vektor@dumbterm.net>.
+ * Copyright (C) 2008 Sebastian Dröge <sebastian.droege@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "_stdint.h"
+#include "gstdeinterlace.h"
+#include <string.h>
+
+#define GST_TYPE_DEINTERLACE_METHOD_WEAVE (gst_deinterlace_method_weave_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_WEAVE(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE))
+#define GST_IS_DEINTERLACE_METHOD_WEAVE_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_WEAVE))
+#define GST_DEINTERLACE_METHOD_WEAVE_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE, GstDeinterlaceMethodWeaveClass))
+#define GST_DEINTERLACE_METHOD_WEAVE(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE, GstDeinterlaceMethodWeave))
+#define GST_DEINTERLACE_METHOD_WEAVE_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_WEAVE, GstDeinterlaceMethodWeaveClass))
+#define GST_DEINTERLACE_METHOD_WEAVE_CAST(obj) ((GstDeinterlaceMethodWeave*)(obj))
+
+GType gst_deinterlace_method_weave_get_type (void);
+
+typedef GstDeinterlaceSimpleMethod GstDeinterlaceMethodWeave;
+
+typedef GstDeinterlaceSimpleMethodClass GstDeinterlaceMethodWeaveClass;
+
+
+static void
+deinterlace_scanline_weave (GstDeinterlaceMethod * self,
+ GstDeinterlace * parent, guint8 * out,
+ GstDeinterlaceScanlineData * scanlines, gint width)
+{
+ oil_memcpy (out, scanlines->m1, parent->row_stride);
+}
+
+static void
+copy_scanline (GstDeinterlaceMethod * self, GstDeinterlace * parent,
+ guint8 * out, GstDeinterlaceScanlineData * scanlines, gint width)
+{
+ oil_memcpy (out, scanlines->m0, parent->row_stride);
+}
+
+G_DEFINE_TYPE (GstDeinterlaceMethodWeave, gst_deinterlace_method_weave,
+ GST_TYPE_DEINTERLACE_SIMPLE_METHOD);
+
+static void
+gst_deinterlace_method_weave_class_init (GstDeinterlaceMethodWeaveClass * klass)
+{
+ GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+ GstDeinterlaceSimpleMethodClass *dism_class =
+ (GstDeinterlaceSimpleMethodClass *) klass;
+
+ dim_class->fields_required = 2;
+ dim_class->name = "Weave";
+ dim_class->nick = "weave";
+ dim_class->latency = 0;
+
+ dism_class->interpolate_scanline = deinterlace_scanline_weave;
+ dism_class->copy_scanline = copy_scanline;
+}
+
+static void
+gst_deinterlace_method_weave_init (GstDeinterlaceMethodWeave * self)
+{
+}
diff --git a/gst/deinterlace/tvtime/weavebff.c b/gst/deinterlace/tvtime/weavebff.c
new file mode 100644
index 00000000..eb983cf2
--- /dev/null
+++ b/gst/deinterlace/tvtime/weavebff.c
@@ -0,0 +1,88 @@
+/**
+ * Weave frames, bottom-field-first.
+ * Copyright (C) 2003 Billy Biggs <vektor@dumbterm.net>.
+ * Copyright (C) 2008 Sebastian Dröge <sebastian.droege@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "_stdint.h"
+#include "gstdeinterlace.h"
+#include <string.h>
+
+#define GST_TYPE_DEINTERLACE_METHOD_WEAVE_BFF (gst_deinterlace_method_weave_bff_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_WEAVE_BFF(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE_BFF))
+#define GST_IS_DEINTERLACE_METHOD_WEAVE_BFF_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_WEAVE_BFF))
+#define GST_DEINTERLACE_METHOD_WEAVE_BFF_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE_BFF, GstDeinterlaceMethodWeaveBFFClass))
+#define GST_DEINTERLACE_METHOD_WEAVE_BFF(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE_BFF, GstDeinterlaceMethodWeaveBFF))
+#define GST_DEINTERLACE_METHOD_WEAVE_BFF_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_WEAVE_BFF, GstDeinterlaceMethodWeaveBFFClass))
+#define GST_DEINTERLACE_METHOD_WEAVE_BFF_CAST(obj) ((GstDeinterlaceMethodWeaveBFF*)(obj))
+
+GType gst_deinterlace_method_weave_bff_get_type (void);
+
+typedef GstDeinterlaceSimpleMethod GstDeinterlaceMethodWeaveBFF;
+
+typedef GstDeinterlaceSimpleMethodClass GstDeinterlaceMethodWeaveBFFClass;
+
+
+static void
+deinterlace_scanline_weave (GstDeinterlaceMethod * self,
+ GstDeinterlace * parent, guint8 * out,
+ GstDeinterlaceScanlineData * scanlines, gint width)
+{
+ oil_memcpy (out, scanlines->m1, parent->row_stride);
+}
+
+static void
+copy_scanline (GstDeinterlaceMethod * self, GstDeinterlace * parent,
+ guint8 * out, GstDeinterlaceScanlineData * scanlines, gint width)
+{
+ /* FIXME: original code used m2 and m0 but this looks really bad */
+ if (scanlines->bottom_field) {
+ oil_memcpy (out, scanlines->bb2, parent->row_stride);
+ } else {
+ oil_memcpy (out, scanlines->bb0, parent->row_stride);
+ }
+}
+
+G_DEFINE_TYPE (GstDeinterlaceMethodWeaveBFF, gst_deinterlace_method_weave_bff,
+ GST_TYPE_DEINTERLACE_SIMPLE_METHOD);
+
+static void
+gst_deinterlace_method_weave_bff_class_init (GstDeinterlaceMethodWeaveBFFClass *
+ klass)
+{
+ GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+ GstDeinterlaceSimpleMethodClass *dism_class =
+ (GstDeinterlaceSimpleMethodClass *) klass;
+
+ dim_class->fields_required = 3;
+ dim_class->name = "Progressive: Bottom Field First";
+ dim_class->nick = "weavebff";
+ dim_class->latency = 0;
+
+ dism_class->interpolate_scanline = deinterlace_scanline_weave;
+ dism_class->copy_scanline = copy_scanline;
+}
+
+static void
+gst_deinterlace_method_weave_bff_init (GstDeinterlaceMethodWeaveBFF * self)
+{
+}
diff --git a/gst/deinterlace/tvtime/weavetff.c b/gst/deinterlace/tvtime/weavetff.c
new file mode 100644
index 00000000..4885b63b
--- /dev/null
+++ b/gst/deinterlace/tvtime/weavetff.c
@@ -0,0 +1,88 @@
+/**
+ * Weave frames, top-field-first.
+ * Copyright (C) 2003 Billy Biggs <vektor@dumbterm.net>.
+ * Copyright (C) 2008 Sebastian Dröge <sebastian.droege@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "_stdint.h"
+#include "gstdeinterlace.h"
+#include <string.h>
+
+#define GST_TYPE_DEINTERLACE_METHOD_WEAVE_TFF (gst_deinterlace_method_weave_tff_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_WEAVE_TFF(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE_TFF))
+#define GST_IS_DEINTERLACE_METHOD_WEAVE_TFF_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_WEAVE_TFF))
+#define GST_DEINTERLACE_METHOD_WEAVE_TFF_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE_TFF, GstDeinterlaceMethodWeaveTFFClass))
+#define GST_DEINTERLACE_METHOD_WEAVE_TFF(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE_TFF, GstDeinterlaceMethodWeaveTFF))
+#define GST_DEINTERLACE_METHOD_WEAVE_TFF_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_WEAVE_TFF, GstDeinterlaceMethodWeaveTFFClass))
+#define GST_DEINTERLACE_METHOD_WEAVE_TFF_CAST(obj) ((GstDeinterlaceMethodWeaveTFF*)(obj))
+
+GType gst_deinterlace_method_weave_tff_get_type (void);
+
+typedef GstDeinterlaceSimpleMethod GstDeinterlaceMethodWeaveTFF;
+
+typedef GstDeinterlaceSimpleMethodClass GstDeinterlaceMethodWeaveTFFClass;
+
+
+static void
+deinterlace_scanline_weave (GstDeinterlaceMethod * self,
+ GstDeinterlace * parent, guint8 * out,
+ GstDeinterlaceScanlineData * scanlines, gint width)
+{
+ oil_memcpy (out, scanlines->m1, parent->row_stride);
+}
+
+static void
+copy_scanline (GstDeinterlaceMethod * self, GstDeinterlace * parent,
+ guint8 * out, GstDeinterlaceScanlineData * scanlines, gint width)
+{
+ /* FIXME: original code used m2 and m0 but this looks really bad */
+ if (scanlines->bottom_field) {
+ oil_memcpy (out, scanlines->bb0, parent->row_stride);
+ } else {
+ oil_memcpy (out, scanlines->bb2, parent->row_stride);
+ }
+}
+
+G_DEFINE_TYPE (GstDeinterlaceMethodWeaveTFF, gst_deinterlace_method_weave_tff,
+ GST_TYPE_DEINTERLACE_SIMPLE_METHOD);
+
+static void
+gst_deinterlace_method_weave_tff_class_init (GstDeinterlaceMethodWeaveTFFClass *
+ klass)
+{
+ GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+ GstDeinterlaceSimpleMethodClass *dism_class =
+ (GstDeinterlaceSimpleMethodClass *) klass;
+
+ dim_class->fields_required = 3;
+ dim_class->name = "Progressive: Top Field First";
+ dim_class->nick = "weavetff";
+ dim_class->latency = 0;
+
+ dism_class->interpolate_scanline = deinterlace_scanline_weave;
+ dism_class->copy_scanline = copy_scanline;
+}
+
+static void
+gst_deinterlace_method_weave_tff_init (GstDeinterlaceMethodWeaveTFF * self)
+{
+}
diff --git a/gst/deinterlace/tvtime/x86-64_macros.inc b/gst/deinterlace/tvtime/x86-64_macros.inc
new file mode 100644
index 00000000..2e9df758
--- /dev/null
+++ b/gst/deinterlace/tvtime/x86-64_macros.inc
@@ -0,0 +1,82 @@
+/*
+ *
+ * GStreamer
+ * Copyright (C) 2004 Dirk Ziegelmeier <dziegel@gmx.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ *
+ * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
+ */
+
+/*
+ * This file is copied from TVTIME's sources.
+ * Original author: Achim Schneider <batchall@mordor.ch>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef XAX
+
+#if defined (HAVE_CPU_I386) && !defined(HAVE_CPU_X86_64)
+
+#define XAX "eax"
+#define XBX "ebx"
+#define XCX "ecx"
+#define XDX "edx"
+#define XSI "esi"
+#define XDI "edi"
+#define XSP "esp"
+#define MOVX "movl"
+#define LEAX "leal"
+#define DECX "decl"
+#define PUSHX "pushl"
+#define POPX "popl"
+#define CMPX "cmpl"
+#define ADDX "addl"
+#define SHLX "shll"
+#define SHRX "shrl"
+#define SUBX "subl"
+
+#elif defined (HAVE_CPU_X86_64)
+
+#define XAX "rax"
+#define XBX "rbx"
+#define XCX "rcx"
+#define XDX "rdx"
+#define XSI "rsi"
+#define XDI "rdi"
+#define XSP "rsp"
+#define MOVX "movq"
+#define LEAX "leaq"
+#define DECX "decq"
+#define PUSHX "pushq"
+#define POPX "popq"
+#define CMPX "cmpq"
+#define ADDX "addq"
+#define SHLX "shlq"
+#define SHRX "shrq"
+#define SUBX "subq"
+
+#else
+#error Undefined architecture. Define either ARCH_X86 or ARCH_X86_64.
+#endif
+
+#endif