33 files changed, 6147 insertions, 0 deletions
diff --git a/gst/deinterlace/tvtime/greedy.c b/gst/deinterlace/tvtime/greedy.c
new file mode 100644
index 00000000..293d82fa
--- /dev/null
+++ b/gst/deinterlace/tvtime/greedy.c
@@ -0,0 +1,488 @@
+/*
+ *
+ * GStreamer
+ * Copyright (c) 2000 Tom Barry  All rights reserved.
+ * mmx.h port copyright (c) 2002 Billy Biggs <vektor@dumbterm.net>.
+ *
+ * Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Relicensed for GStreamer from GPL to LGPL with permit from Tom Barry
+ * and Billy Biggs.
+ * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "_stdint.h"
+
+#include "gstdeinterlace.h"
+#include <string.h>
+
+#define GST_TYPE_DEINTERLACE_METHOD_GREEDY_L	(gst_deinterlace_method_greedy_l_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_GREEDY_L(obj)		(G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L))
+#define GST_IS_DEINTERLACE_METHOD_GREEDY_L_CLASS(klass)	(G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L))
+#define GST_DEINTERLACE_METHOD_GREEDY_L_GET_CLASS(obj)	(G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L, GstDeinterlaceMethodGreedyLClass))
+#define GST_DEINTERLACE_METHOD_GREEDY_L(obj)		(G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L, GstDeinterlaceMethodGreedyL))
+#define GST_DEINTERLACE_METHOD_GREEDY_L_CLASS(klass)	(G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L, GstDeinterlaceMethodGreedyLClass))
+#define GST_DEINTERLACE_METHOD_GREEDY_L_CAST(obj)	((GstDeinterlaceMethodGreedyL*)(obj))
+
+GType gst_deinterlace_method_greedy_l_get_type (void);
+
+typedef struct
+{
+  GstDeinterlaceMethod parent;
+
+  guint max_comb;
+} GstDeinterlaceMethodGreedyL;
+
+typedef struct
+{
+  GstDeinterlaceMethodClass parent_class;
+  void (*scanline) (GstDeinterlaceMethodGreedyL * self, uint8_t * L2,
+      uint8_t * L1, uint8_t * L3, uint8_t * L2P, uint8_t * Dest, int size);
+} GstDeinterlaceMethodGreedyLClass;
+
+// This is a simple lightweight DeInterlace method that uses little CPU time
+// but gives very good results for low or intermedite motion.
+// It defers frames by one field, but that does not seem to produce noticeable
+// lip sync problems.
+//
+// The method used is to take either the older or newer weave pixel depending
+// upon which give the smaller comb factor, and then clip to avoid large damage
+// when wrong.
+//
+// I'd intended this to be part of a larger more elaborate method added to 
+// Blended Clip but this give too good results for the CPU to ignore here.
+
+static inline void
+deinterlace_greedy_packed422_scanline_c (GstDeinterlaceMethodGreedyL * self,
+    uint8_t * m0, uint8_t * t1,
+    uint8_t * b1, uint8_t * m2, uint8_t * output, int width)
+{
+  int avg, l2_diff, lp2_diff, max, min, best;
+  guint max_comb = self->max_comb;
+
+  // L2 == m0
+  // L1 == t1
+  // L3 == b1
+  // LP2 == m2
+
+  while (width--) {
+    avg = (*t1 + *b1) / 2;
+
+    l2_diff = ABS (*m0 - avg);
+    lp2_diff = ABS (*m2 - avg);
+
+    if (l2_diff > lp2_diff)
+      best = *m2;
+    else
+      best = *m0;
+
+    max = MAX (*t1, *b1);
+    min = MIN (*t1, *b1);
+
+    if (max < 256 - max_comb)
+      max += max_comb;
+    else
+      max = 255;
+
+    if (min > max_comb)
+      min -= max_comb;
+    else
+      min = 0;
+
+    *output = CLAMP (best, min, max);
+
+    // Advance to the next set of pixels.
+    output += 1;
+    m0 += 1;
+    t1 += 1;
+    b1 += 1;
+    m2 += 1;
+  }
+}
+
+#ifdef BUILD_X86_ASM
+#include "mmx.h"
+static void
+deinterlace_greedy_packed422_scanline_mmx (GstDeinterlaceMethodGreedyL * self,
+    uint8_t * m0, uint8_t * t1,
+    uint8_t * b1, uint8_t * m2, uint8_t * output, int width)
+{
+  mmx_t MaxComb;
+  mmx_t ShiftMask;
+
+  // How badly do we let it weave? 0-255
+  MaxComb.ub[0] = self->max_comb;
+  MaxComb.ub[1] = self->max_comb;
+  MaxComb.ub[2] = self->max_comb;
+  MaxComb.ub[3] = self->max_comb;
+  MaxComb.ub[4] = self->max_comb;
+  MaxComb.ub[5] = self->max_comb;
+  MaxComb.ub[6] = self->max_comb;
+  MaxComb.ub[7] = self->max_comb;
+
+  ShiftMask.ub[0] = 0x7f;
+  ShiftMask.ub[1] = 0x7f;
+  ShiftMask.ub[2] = 0x7f;
+  ShiftMask.ub[3] = 0x7f;
+  ShiftMask.ub[4] = 0x7f;
+  ShiftMask.ub[5] = 0x7f;
+  ShiftMask.ub[6] = 0x7f;
+  ShiftMask.ub[7] = 0x7f;
+
+  // L2 == m0
+  // L1 == t1
+  // L3 == b1
+  // LP2 == m2  
+
+  movq_m2r (MaxComb, mm6);
+
+  for (; width > 7; width -= 8) {
+    movq_m2r (*t1, mm1);        // L1
+    movq_m2r (*m0, mm2);        // L2
+    movq_m2r (*b1, mm3);        // L3
+    movq_m2r (*m2, mm0);        // LP2
+
+    // average L1 and L3 leave result in mm4
+    movq_r2r (mm1, mm4);        // L1
+    movq_r2r (mm3, mm5);        // L3
+    psrlw_i2r (1, mm4);         // L1/2
+    pand_m2r (ShiftMask, mm4);
+    psrlw_i2r (1, mm5);         // L3/2
+    pand_m2r (ShiftMask, mm5);
+    paddusb_r2r (mm5, mm4);     // (L1 + L3) / 2
+
+    // get abs value of possible L2 comb
+    movq_r2r (mm2, mm7);        // L2
+    psubusb_r2r (mm4, mm7);     // L2 - avg
+    movq_r2r (mm4, mm5);        // avg
+    psubusb_r2r (mm2, mm5);     // avg - L2
+    por_r2r (mm7, mm5);         // abs(avg-L2)
+
+    // get abs value of possible LP2 comb
+    movq_r2r (mm0, mm7);        // LP2
+    psubusb_r2r (mm4, mm7);     // LP2 - avg
+    psubusb_r2r (mm0, mm4);     // avg - LP2
+    por_r2r (mm7, mm4);         // abs(avg-LP2)
+
+    // use L2 or LP2 depending upon which makes smaller comb
+    psubusb_r2r (mm5, mm4);     // see if it goes to zero
+    psubusb_r2r (mm5, mm5);     // 0
+    pcmpeqb_r2r (mm5, mm4);     // if (mm4=0) then FF else 0
+    pcmpeqb_r2r (mm4, mm5);     // opposite of mm4
+
+    // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
+    pand_r2r (mm2, mm5);        // use L2 if mm5 == ff, else 0
+    pand_r2r (mm0, mm4);        // use LP2 if mm4 = ff, else 0
+    por_r2r (mm5, mm4);         // may the best win
+
+    // Now lets clip our chosen value to be not outside of the range
+    // of the high/low range L1-L3 by more than abs(L1-L3)
+    // This allows some comb but limits the damages and also allows more
+    // detail than a boring oversmoothed clip.
+
+    movq_r2r (mm1, mm2);        // copy L1
+    psubusb_r2r (mm3, mm2);     // - L3, with saturation
+    paddusb_r2r (mm3, mm2);     // now = Max(L1,L3)
+
+    pcmpeqb_r2r (mm7, mm7);     // all ffffffff
+    psubusb_r2r (mm1, mm7);     // - L1 
+    paddusb_r2r (mm7, mm3);     // add, may sat at fff..
+    psubusb_r2r (mm7, mm3);     // now = Min(L1,L3)
+
+    // allow the value to be above the high or below the low by amt of MaxComb
+    paddusb_r2r (mm6, mm2);     // increase max by diff
+    psubusb_r2r (mm6, mm3);     // lower min by diff
+
+    psubusb_r2r (mm3, mm4);     // best - Min
+    paddusb_r2r (mm3, mm4);     // now = Max(best,Min(L1,L3)
+
+    pcmpeqb_r2r (mm7, mm7);     // all ffffffff
+    psubusb_r2r (mm4, mm7);     // - Max(best,Min(best,L3) 
+    paddusb_r2r (mm7, mm2);     // add may sat at FFF..
+    psubusb_r2r (mm7, mm2);     // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped
+
+    movq_r2m (mm2, *output);    // move in our clipped best
+
+    // Advance to the next set of pixels.
+    output += 8;
+    m0 += 8;
+    t1 += 8;
+    b1 += 8;
+    m2 += 8;
+  }
+  emms ();
+  if (width > 0)
+    deinterlace_greedy_packed422_scanline_c (self, m0, t1, b1, m2, output,
+        width);
+}
+
+#include "sse.h"
+
+static void
+deinterlace_greedy_packed422_scanline_mmxext (GstDeinterlaceMethodGreedyL *
+    self, uint8_t * m0, uint8_t * t1, uint8_t * b1, uint8_t * m2,
+    uint8_t * output, int width)
+{
+  mmx_t MaxComb;
+
+  // How badly do we let it weave? 0-255
+  MaxComb.ub[0] = self->max_comb;
+  MaxComb.ub[1] = self->max_comb;
+  MaxComb.ub[2] = self->max_comb;
+  MaxComb.ub[3] = self->max_comb;
+  MaxComb.ub[4] = self->max_comb;
+  MaxComb.ub[5] = self->max_comb;
+  MaxComb.ub[6] = self->max_comb;
+  MaxComb.ub[7] = self->max_comb;
+
+  // L2 == m0
+  // L1 == t1
+  // L3 == b1
+  // LP2 == m2
+
+  movq_m2r (MaxComb, mm6);
+
+  for (; width > 7; width -= 8) {
+    movq_m2r (*t1, mm1);        // L1
+    movq_m2r (*m0, mm2);        // L2
+    movq_m2r (*b1, mm3);        // L3
+    movq_m2r (*m2, mm0);        // LP2
+
+    // average L1 and L3 leave result in mm4
+    movq_r2r (mm1, mm4);        // L1
+    pavgb_r2r (mm3, mm4);       // (L1 + L3)/2
+
+    // get abs value of possible L2 comb
+    movq_r2r (mm2, mm7);        // L2
+    psubusb_r2r (mm4, mm7);     // L2 - avg
+    movq_r2r (mm4, mm5);        // avg
+    psubusb_r2r (mm2, mm5);     // avg - L2
+    por_r2r (mm7, mm5);         // abs(avg-L2)
+
+    // get abs value of possible LP2 comb
+    movq_r2r (mm0, mm7);        // LP2
+    psubusb_r2r (mm4, mm7);     // LP2 - avg
+    psubusb_r2r (mm0, mm4);     // avg - LP2
+    por_r2r (mm7, mm4);         // abs(avg-LP2)
+
+    // use L2 or LP2 depending upon which makes smaller comb
+    psubusb_r2r (mm5, mm4);     // see if it goes to zero
+    pxor_r2r (mm5, mm5);        // 0
+    pcmpeqb_r2r (mm5, mm4);     // if (mm4=0) then FF else 0
+    pcmpeqb_r2r (mm4, mm5);     // opposite of mm4
+
+    // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
+    pand_r2r (mm2, mm5);        // use L2 if mm5 == ff, else 0
+    pand_r2r (mm0, mm4);        // use LP2 if mm4 = ff, else 0
+    por_r2r (mm5, mm4);         // may the best win
+
+    // Now lets clip our chosen value to be not outside of the range
+    // of the high/low range L1-L3 by more than abs(L1-L3)
+    // This allows some comb but limits the damages and also allows more
+    // detail than a boring oversmoothed clip.
+
+    movq_r2r (mm1, mm2);        // copy L1
+    pmaxub_r2r (mm3, mm2);      // now = Max(L1,L3)
+
+    pminub_r2r (mm1, mm3);      // now = Min(L1,L3)
+
+    // allow the value to be above the high or below the low by amt of MaxComb
+    paddusb_r2r (mm6, mm2);     // increase max by diff
+    psubusb_r2r (mm6, mm3);     // lower min by diff
+
+
+    pmaxub_r2r (mm3, mm4);      // now = Max(best,Min(L1,L3)
+    pminub_r2r (mm4, mm2);      // now = Min( Max(best, Min(L1,L3)), L2 )=L2 clipped
+
+    movq_r2m (mm2, *output);    // move in our clipped best
+
+    // Advance to the next set of pixels.
+    output += 8;
+    m0 += 8;
+    t1 += 8;
+    b1 += 8;
+    m2 += 8;
+  }
+  emms ();
+
+  if (width > 0)
+    deinterlace_greedy_packed422_scanline_c (self, m0, t1, b1, m2, output,
+        width);
+}
+
+#endif
+
+static void
+deinterlace_frame_di_greedy (GstDeinterlaceMethod * d_method,
+    GstDeinterlace * object, GstBuffer * outbuf)
+{
+  GstDeinterlaceMethodGreedyL *self =
+      GST_DEINTERLACE_METHOD_GREEDY_L (d_method);
+  GstDeinterlaceMethodGreedyLClass *klass =
+      GST_DEINTERLACE_METHOD_GREEDY_L_GET_CLASS (self);
+  int InfoIsOdd = 0;
+  int Line;
+  unsigned int Pitch = object->field_stride;
+  unsigned char *L1;            // ptr to Line1, of 3
+  unsigned char *L2;            // ptr to Line2, the weave line
+  unsigned char *L3;            // ptr to Line3
+
+  unsigned char *L2P;           // ptr to prev Line2
+  unsigned char *Dest = GST_BUFFER_DATA (outbuf);
+
+  // copy first even line no matter what, and the first odd line if we're
+  // processing an EVEN field. (note diff from other deint rtns.)
+
+  if (object->field_history[object->history_count - 1].flags ==
+      PICTURE_INTERLACED_BOTTOM) {
+    InfoIsOdd = 1;
+
+    L1 = GST_BUFFER_DATA (object->field_history[object->history_count - 2].buf);
+    L2 = GST_BUFFER_DATA (object->field_history[object->history_count - 1].buf);
+    L3 = L1 + Pitch;
+    L2P =
+        GST_BUFFER_DATA (object->field_history[object->history_count - 3].buf);
+
+    // copy first even line
+    oil_memcpy (Dest, L1, object->row_stride);
+    Dest += object->row_stride;
+  } else {
+    InfoIsOdd = 0;
+    L1 = GST_BUFFER_DATA (object->field_history[object->history_count - 2].buf);
+    L2 = GST_BUFFER_DATA (object->field_history[object->history_count -
+            1].buf) + Pitch;
+    L3 = L1 + Pitch;
+    L2P =
+        GST_BUFFER_DATA (object->field_history[object->history_count - 3].buf) +
+        Pitch;
+
+    // copy first even line
+    oil_memcpy (Dest, GST_BUFFER_DATA (object->field_history[0].buf),
+        object->row_stride);
+    Dest += object->row_stride;
+    // then first odd line
+    oil_memcpy (Dest, L1, object->row_stride);
+    Dest += object->row_stride;
+  }
+
+  for (Line = 0; Line < (object->field_height - 1); ++Line) {
+    klass->scanline (self, L2, L1, L3, L2P, Dest, object->row_stride);
+    Dest += object->row_stride;
+    oil_memcpy (Dest, L3, object->row_stride);
+    Dest += object->row_stride;
+
+    L1 += Pitch;
+    L2 += Pitch;
+    L3 += Pitch;
+    L2P += Pitch;
+  }
+
+  if (InfoIsOdd) {
+    oil_memcpy (Dest, L2, object->row_stride);
+  }
+}
+
+
+G_DEFINE_TYPE (GstDeinterlaceMethodGreedyL, gst_deinterlace_method_greedy_l,
+    GST_TYPE_DEINTERLACE_METHOD);
+
+enum
+{
+  ARG_0,
+  ARG_MAX_COMB
+};
+
+static void
+gst_deinterlace_method_greedy_l_set_property (GObject * object, guint prop_id,
+    const GValue * value, GParamSpec * pspec)
+{
+  GstDeinterlaceMethodGreedyL *self = GST_DEINTERLACE_METHOD_GREEDY_L (object);
+
+  switch (prop_id) {
+    case ARG_MAX_COMB:
+      self->max_comb = g_value_get_uint (value);
+      break;
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+  }
+}
+
+static void
+gst_deinterlace_method_greedy_l_get_property (GObject * object, guint prop_id,
+    GValue * value, GParamSpec * pspec)
+{
+  GstDeinterlaceMethodGreedyL *self = GST_DEINTERLACE_METHOD_GREEDY_L (object);
+
+  switch (prop_id) {
+    case ARG_MAX_COMB:
+      g_value_set_uint (value, self->max_comb);
+      break;
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+  }
+}
+
+static void
+gst_deinterlace_method_greedy_l_class_init (GstDeinterlaceMethodGreedyLClass *
+    klass)
+{
+  GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+  GObjectClass *gobject_class = (GObjectClass *) klass;
+#ifdef BUILD_X86_ASM
+  guint cpu_flags = oil_cpu_get_flags ();
+#endif
+
+  gobject_class->set_property = gst_deinterlace_method_greedy_l_set_property;
+  gobject_class->get_property = gst_deinterlace_method_greedy_l_get_property;
+
+  g_object_class_install_property (gobject_class, ARG_MAX_COMB,
+      g_param_spec_uint ("max-comb",
+          "Max comb",
+          "Max Comb", 0, 255, 15, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)
+      );
+
+  dim_class->fields_required = 4;
+  dim_class->deinterlace_frame = deinterlace_frame_di_greedy;
+  dim_class->name = "Motion Adaptive: Simple Detection";
+  dim_class->nick = "greedyl";
+  dim_class->latency = 1;
+
+#ifdef BUILD_X86_ASM
+  if (cpu_flags & OIL_IMPL_FLAG_MMXEXT) {
+    klass->scanline = deinterlace_greedy_packed422_scanline_mmxext;
+  } else if (cpu_flags & OIL_IMPL_FLAG_MMX) {
+    klass->scanline = deinterlace_greedy_packed422_scanline_mmx;
+  } else {
+    klass->scanline = deinterlace_greedy_packed422_scanline_c;
+  }
+#else
+  klass->scanline = deinterlace_greedy_packed422_scanline_c;
+#endif
+}
+
+static void
+gst_deinterlace_method_greedy_l_init (GstDeinterlaceMethodGreedyL * self)
+{
+  self->max_comb = 15;
+}
diff --git a/gst/deinterlace/tvtime/greedyh.asm b/gst/deinterlace/tvtime/greedyh.asm
new file mode 100644
index 00000000..86e97c58
--- /dev/null
+++ b/gst/deinterlace/tvtime/greedyh.asm
@@ -0,0 +1,250 @@
+/*
+ *
+ * GStreamer
+ * Copyright (c) 2001 Tom Barry.  All rights reserved.
+ * Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+
+/*
+ * Relicensed for GStreamer from GPL to LGPL with permit from Tom Barry.
+ * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
+ */
+
+
+#include "x86-64_macros.inc"
+
+void
+FUNCT_NAME (GstDeinterlaceMethodGreedyH *self, uint8_t * L1, uint8_t * L2, uint8_t * L3, uint8_t * L2P,
+    uint8_t * Dest, int size)
+{
+
+  // in tight loop some vars are accessed faster in local storage
+  int64_t YMask = 0x00ff00ff00ff00ffull;        // to keep only luma
+  int64_t UVMask = 0xff00ff00ff00ff00ull;       // to keep only chroma
+  int64_t ShiftMask = 0xfefefefefefefefeull;    // to avoid shifting chroma to luma
+  int64_t QW256 = 0x0100010001000100ull;        // 4 256's
+  int64_t MaxComb;
+  int64_t MotionThreshold;
+  int64_t MotionSense;
+  int64_t i;
+  long LoopCtr;
+  long oldbx;
+
+  int64_t QW256B;
+  int64_t LastAvg = 0;          //interp value from left qword
+ 
+  // FIXME: Use C implementation if the width is not a multiple of 4
+  // Do something more optimal later
+  if (size % 8 != 0)
+    greedyDScaler_C (self, L1, L2, L3, L2P, Dest, size);
+
+  // Set up our two parms that are actually evaluated for each pixel
+  i = self->max_comb;
+  MaxComb =
+      i << 56 | i << 48 | i << 40 | i << 32 | i << 24 | i << 16 | i << 8 | i;
+
+  i = self->motion_threshold;    // scale to range of 0-257
+  MotionThreshold = i << 48 | i << 32 | i << 16 | i | UVMask;
+
+  i = self->motion_sense;        // scale to range of 0-257
+  MotionSense = i << 48 | i << 32 | i << 16 | i;
+
+  i = 0xffffffff - 256;
+  QW256B = i << 48 | i << 32 | i << 16 | i;     // save a couple instr on PMINSW instruct.
+
+  LoopCtr = size / 8 - 1;       // there are LineLength / 8 qwords per line but do 1 less, adj at end of loop
+
+  // For ease of reading, the comments below assume that we're operating on an odd
+  // field (i.e., that InfoIsOdd is true).  Assume the obvious for even lines..
+  __asm__ __volatile__ (
+      // save ebx (-fPIC)
+      MOVX " %%" XBX ", %[oldbx]\n\t"
+      MOVX "  %[L1],          %%" XAX "\n\t"
+      LEAX "  8(%%" XAX "),     %%" XBX "\n\t"   // next qword needed by DJR
+      MOVX "  %[L3],          %%" XCX "\n\t"
+      SUBX "  %%" XAX ",        %%" XCX "\n\t"   // carry L3 addr as an offset
+      MOVX "  %[L2P],         %%" XDX "\n\t"
+      MOVX "  %[L2],          %%" XSI "\n\t"
+      MOVX "  %[Dest],        %%" XDI "\n\t"      // DL1 if Odd or DL2 if Even
+
+      ".align 8\n\t"
+      "1:\n\t"
+      "movq  (%%" XSI "),      %%mm0\n\t"       // L2 - the newest weave pixel value
+      "movq  (%%" XAX "),      %%mm1\n\t"       // L1 - the top pixel
+      "movq  (%%" XDX "),      %%mm2\n\t"       // L2P - the prev weave pixel
+      "movq  (%%" XAX ", %%" XCX "), %%mm3\n\t" // L3, next odd row
+      "movq  %%mm1,          %%mm6\n\t"         // L1 - get simple single pixel interp
+
+      //        pavgb   mm6, mm3                    // use macro below
+      V_PAVGB ("%%mm6", "%%mm3", "%%mm4", "%[ShiftMask]")
+
+      // DJR - Diagonal Jaggie Reduction
+      // In the event that we are going to use an average (Bob) pixel we do not want a jagged
+      // stair step effect.  To combat this we avg in the 2 horizontally adjacen pixels into the
+      // interpolated Bob mix. This will do horizontal smoothing for only the Bob'd pixels.
+
+      "movq  %[LastAvg],     %%mm4\n\t" // the bob value from prev qword in row
+      "movq  %%mm6,          %[LastAvg]\n\t"    // save for next pass
+      "psrlq $48,            %%mm4\n\t" // right justify 1 pixel
+      "movq  %%mm6,          %%mm7\n\t" // copy of simple bob pixel
+      "psllq $16,            %%mm7\n\t" // left justify 3 pixels
+      "por   %%mm7,          %%mm4\n\t" // and combine
+      "movq  (%%" XBX "),      %%mm5\n\t"       // next horiz qword from L1
+      // pavgb   mm5, qword ptr[ebx+ecx] // next horiz qword from L3, use macro below
+
+      V_PAVGB ("%%mm5", "(%%" XBX ",%%" XCX ")", "%%mm7", "%[ShiftMask]")
+      "psllq $48,            %%mm5\n\t" // left just 1 pixel
+      "movq  %%mm6,          %%mm7\n\t" // another copy of simple bob pixel
+      "psrlq $16,            %%mm7\n\t" // right just 3 pixels
+      "por   %%mm7,          %%mm5\n\t" // combine
+      // pavgb        mm4, mm5                        // avg of forward and prev by 1 pixel, use macro
+      V_PAVGB ("%%mm4", "%%mm5", "%%mm5", "%[ShiftMask]")       // mm5 gets modified if MMX
+      //                        pavgb        mm6, mm4                        // avg of center and surround interp vals, use macro
+      V_PAVGB ("%%mm6", "%%mm4", "%%mm7", "%[ShiftMask]")
+
+      // Don't do any more averaging than needed for mmx. It hurts performance and causes rounding errors.
+#ifndef IS_MMX
+      //          pavgb        mm4, mm6                        // 1/4 center, 3/4 adjacent
+      V_PAVGB ("%%mm4", "%%mm6", "%%mm7", "%[ShiftMask]")
+      //                    pavgb        mm6, mm4                        // 3/8 center, 5/8 adjacent
+      V_PAVGB ("%%mm6", "%%mm4", "%%mm7", "%[ShiftMask]")
+#endif
+
+      // get abs value of possible L2 comb
+      "movq    %%mm6,        %%mm4\n\t" // work copy of interp val
+      "movq    %%mm2,        %%mm7\n\t" // L2
+      "psubusb %%mm4,        %%mm7\n\t" // L2 - avg
+      "movq    %%mm4,        %%mm5\n\t" // avg
+      "psubusb %%mm2,        %%mm5\n\t" // avg - L2
+      "por     %%mm7,        %%mm5\n\t" // abs(avg-L2)
+
+      // get abs value of possible L2P comb
+      "movq    %%mm0,        %%mm7\n\t" // L2P
+      "psubusb %%mm4,        %%mm7\n\t" // L2P - avg
+      "psubusb %%mm0,        %%mm4\n\t" // avg - L2P
+      "por     %%mm7,        %%mm4\n\t" // abs(avg-L2P)
+
+      // use L2 or L2P depending upon which makes smaller comb
+      "psubusb %%mm5,        %%mm4\n\t" // see if it goes to zero
+      "psubusb %%mm5,        %%mm5\n\t" // 0
+      "pcmpeqb %%mm5,        %%mm4\n\t" // if (mm4=0) then FF else 0
+      "pcmpeqb %%mm4,        %%mm5\n\t" // opposite of mm4
+
+      // if Comb(L2P) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
+      "pand    %%mm2,        %%mm5\n\t" // use L2 if mm5 == ff, else 0
+      "pand    %%mm0,        %%mm4\n\t" // use L2P if mm4 = ff, else 0
+      "por     %%mm5,        %%mm4\n\t" // may the best win
+
+      // Inventory: at this point we have the following values:
+      // mm0 = L2P (or L2)
+      // mm1 = L1
+      // mm2 = L2 (or L2P)
+      // mm3 = L3
+      // mm4 = the best of L2,L2P weave pixel, base upon comb
+      // mm6 = the avg interpolated value, if we need to use it
+      // Let's measure movement, as how much the weave pixel has changed
+
+      "movq    %%mm2,        %%mm7\n\t"
+      "psubusb %%mm0,        %%mm2\n\t"
+      "psubusb %%mm7,        %%mm0\n\t"
+      "por     %%mm2,        %%mm0\n\t"   // abs value of change, used later
+
+      // Now lets clip our chosen value to be not outside of the range
+      // of the high/low range L1-L3 by more than MaxComb.
+      // This allows some comb but limits the damages and also allows more
+      // detail than a boring oversmoothed clip.
+
+      "movq    %%mm1,        %%mm2\n\t" // copy L1
+      // pmaxub mm2, mm3                     // use macro
+      V_PMAXUB ("%%mm2", "%%mm3")       // now = Max(L1,L3)
+      "movq    %%mm1,        %%mm5\n\t" // copy L1
+      // pminub        mm5, mm3                    // now = Min(L1,L3), use macro
+      V_PMINUB ("%%mm5", "%%mm3", "%%mm7")
+
+      // allow the value to be above the high or below the low by amt of MaxComb
+      "psubusb %[MaxComb],   %%mm5\n\t" // lower min by diff
+      "paddusb %[MaxComb],   %%mm2\n\t" // increase max by diff
+      // pmaxub        mm4, mm5         // now = Max(best,Min(L1,L3) use macro
+      V_PMAXUB ("%%mm4", "%%mm5")
+      // pminub        mm4, mm2         // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped
+      V_PMINUB ("%%mm4", "%%mm2", "%%mm7")
+
+      // Blend weave pixel with bob pixel, depending on motion val in mm0
+      "psubusb %[MotionThreshold], %%mm0\n\t"   // test Threshold, clear chroma change >>>??
+      "pmullw  %[MotionSense], %%mm0\n\t"       // mul by user factor, keep low 16 bits
+      "movq    %[QW256], %%mm7\n\t"
+#ifdef IS_MMXEXT
+      "pminsw  %%mm7,        %%mm0\n\t" // max = 256
+#else
+      "paddusw %[QW256B],    %%mm0\n\t" // add, may sat at fff..
+      "psubusw %[QW256B],    %%mm0\n\t" // now = Min(L1,256)
+#endif
+      "psubusw %%mm0,        %%mm7\n\t" // so the 2 sum to 256, weighted avg
+      "movq    %%mm4,        %%mm2\n\t" // save weave chroma info before trashing
+      "pand    %[YMask],     %%mm4\n\t" // keep only luma from calc'd value
+      "pmullw  %%mm7,        %%mm4\n\t" // use more weave for less motion
+      "pand    %[YMask],     %%mm6\n\t" // keep only luma from calc'd value
+      "pmullw  %%mm0,        %%mm6\n\t" // use more bob for large motion
+      "paddusw %%mm6,        %%mm4\n\t" // combine
+      "psrlw   $8,           %%mm4\n\t" // div by 256 to get weighted avg
+      // chroma comes from weave pixel
+      "pand    %[UVMask],    %%mm2\n\t" // keep chroma
+      "por     %%mm4,        %%mm2\n\t" // and combine
+      V_MOVNTQ ("(%%" XDI ")", "%%mm2") // move in our clipped best, use macro
+      // bump ptrs and loop
+      LEAX "    8(%%" XAX "),   %%" XAX "\n\t"
+      LEAX "    8(%%" XBX "),   %%" XBX "\n\t"
+      LEAX "    8(%%" XDX "),   %%" XDX "\n\t"
+      LEAX "    8(%%" XDI "),   %%" XDI "\n\t"
+      LEAX "    8(%%" XSI "),   %%" XSI "\n\t"
+      DECX "    %[LoopCtr]\n\t"
+      
+      "jg      1b\n\t"   // loop if not to last line
+      // note P-III default assumes backward branches taken
+      "jl      1f\n\t"          // done
+      MOVX "    %%" XAX ",      %%" XBX "\n\t"  // sharpness lookahead 1 byte only, be wrong on 1
+      "jmp     1b\n\t"
+      
+      "1:\n\t"      
+      MOVX " %[oldbx], %%" XBX "\n\t"
+      "emms\n\t":     /* no outputs */
+
+      :[LastAvg] "m" (LastAvg),
+       [L1] "m" (L1),
+       [L3] "m" (L3),
+       [L2P] "m" (L2P),
+       [L2] "m" (L2),
+       [Dest] "m" (Dest),
+       [ShiftMask] "m" (ShiftMask),
+       [MaxComb] "m" (MaxComb),
+       [MotionThreshold] "m" (MotionThreshold),
+       [MotionSense] "m" (MotionSense),
+       [QW256B] "m" (QW256B),
+       [YMask] "m" (YMask),
+       [UVMask] "m" (UVMask),
+       [LoopCtr] "m" (LoopCtr),
+       [QW256] "m" (QW256),
+       [oldbx] "m" (oldbx)
+      : XAX, XCX, XDX, XSI, XDI,
+      "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
+#ifdef __MMX__
+      "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
+#endif
+      "memory", "cc");
+}
diff --git a/gst/deinterlace/tvtime/greedyh.c b/gst/deinterlace/tvtime/greedyh.c
new file mode 100644
index 00000000..5d050ce0
--- /dev/null
+++ b/gst/deinterlace/tvtime/greedyh.c
@@ -0,0 +1,420 @@
+/*
+ *
+ * GStreamer
+ * Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
+ * Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Relicensed for GStreamer from GPL to LGPL with permit from Billy Biggs.
+ * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "greedyhmacros.h"
+
+#include <stdlib.h>
+#include "_stdint.h"
+#include <string.h>
+
+#include "gst/gst.h"
+#include "plugins.h"
+#include "gstdeinterlace.h"
+
+#define GST_TYPE_DEINTERLACE_METHOD_GREEDY_H	(gst_deinterlace_method_greedy_h_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_GREEDY_H(obj)		(G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_H))
+#define GST_IS_DEINTERLACE_METHOD_GREEDY_H_CLASS(klass)	(G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_GREEDY_H))
+#define GST_DEINTERLACE_METHOD_GREEDY_H_GET_CLASS(obj)	(G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_H, GstDeinterlaceMethodGreedyHClass))
+#define GST_DEINTERLACE_METHOD_GREEDY_H(obj)		(G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_H, GstDeinterlaceMethodGreedyH))
+#define GST_DEINTERLACE_METHOD_GREEDY_H_CLASS(klass)	(G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_GREEDY_H, GstDeinterlaceMethodGreedyHClass))
+#define GST_DEINTERLACE_METHOD_GREEDY_H_CAST(obj)	((GstDeinterlaceMethodGreedyH*)(obj))
+
+GType gst_deinterlace_method_greedy_h_get_type (void);
+
+typedef struct
+{
+  GstDeinterlaceMethod parent;
+
+  guint max_comb, motion_threshold, motion_sense;
+} GstDeinterlaceMethodGreedyH;
+
+typedef struct
+{
+  GstDeinterlaceMethodClass parent_class;
+  void (*scanline) (GstDeinterlaceMethodGreedyH * self, uint8_t * L2,
+      uint8_t * L1, uint8_t * L3, uint8_t * L2P, uint8_t * Dest, int size);
+} GstDeinterlaceMethodGreedyHClass;
+
+void
+greedyDScaler_C (GstDeinterlaceMethodGreedyH * self, uint8_t * L1, uint8_t * L2,
+    uint8_t * L3, uint8_t * L2P, uint8_t * Dest, int size)
+{
+  int Pos;
+  uint8_t l1_l, l1_1_l, l3_l, l3_1_l;
+  uint8_t l1_c, l1_1_c, l3_c, l3_1_c;
+  uint8_t avg_l, avg_c, avg_l_1, avg_c_1;
+  uint8_t avg_l__1 = 0, avg_c__1 = 0;
+  uint8_t avg_s_l, avg_s_c;
+  uint8_t avg_sc_l, avg_sc_c;
+  uint8_t best_l, best_c;
+  uint16_t mov_l;
+  uint8_t out_l, out_c;
+  uint8_t l2_l, l2_c, lp2_l, lp2_c;
+  uint8_t l2_l_diff, l2_c_diff, lp2_l_diff, lp2_c_diff;
+  uint8_t min_l, min_c, max_l, max_c;
+  guint max_comb = self->max_comb;
+  guint motion_sense = self->motion_sense;
+  guint motion_threshold = self->motion_threshold;
+
+  for (Pos = 0; Pos < size; Pos += 2) {
+    l1_l = L1[0];
+    l1_c = L1[1];
+    l3_l = L3[0];
+    l3_c = L3[1];
+
+    if (Pos == size - 1) {
+      l1_1_l = l1_l;
+      l1_1_c = l1_c;
+      l3_1_l = l3_l;
+      l3_1_c = l3_c;
+    } else {
+      l1_1_l = L1[2];
+      l1_1_c = L1[3];
+      l3_1_l = L3[2];
+      l3_1_c = L3[3];
+    }
+
+    /* Average of L1 and L3 */
+    avg_l = (l1_l + l3_l) / 2;
+    avg_c = (l1_c + l3_c) / 2;
+
+    if (Pos == 0) {
+      avg_l__1 = avg_l;
+      avg_c__1 = avg_c;
+    }
+
+    /* Average of next L1 and next L3 */
+    avg_l_1 = (l1_1_l + l3_1_l) / 2;
+    avg_c_1 = (l1_1_c + l3_1_c) / 2;
+
+    /* Calculate average of one pixel forward and previous */
+    avg_s_l = (avg_l__1 + avg_l_1) / 2;
+    avg_s_c = (avg_c__1 + avg_c_1) / 2;
+
+    /* Calculate average of center and surrounding pixels */
+    avg_sc_l = (avg_l + avg_s_l) / 2;
+    avg_sc_c = (avg_c + avg_s_c) / 2;
+
+    /* move forward */
+    avg_l__1 = avg_l;
+    avg_c__1 = avg_c;
+
+    /* Get best L2/L2P, i.e. least diff from above average */
+    l2_l = L2[0];
+    l2_c = L2[1];
+    lp2_l = L2P[0];
+    lp2_c = L2P[1];
+
+    l2_l_diff = ABS (l2_l - avg_sc_l);
+    l2_c_diff = ABS (l2_c - avg_sc_c);
+
+    lp2_l_diff = ABS (lp2_l - avg_sc_l);
+    lp2_c_diff = ABS (lp2_c - avg_sc_c);
+
+    if (l2_l_diff > lp2_l_diff)
+      best_l = lp2_l;
+    else
+      best_l = l2_l;
+
+    if (l2_c_diff > lp2_c_diff)
+      best_c = lp2_c;
+    else
+      best_c = l2_c;
+
+    /* Clip this best L2/L2P by L1/L3 and allow to differ by GreedyMaxComb */
+    max_l = MAX (l1_l, l3_l);
+    min_l = MIN (l1_l, l3_l);
+
+    if (max_l < 256 - max_comb)
+      max_l += max_comb;
+    else
+      max_l = 255;
+
+    if (min_l > max_comb)
+      min_l -= max_comb;
+    else
+      min_l = 0;
+
+    max_c = MAX (l1_c, l3_c);
+    min_c = MIN (l1_c, l3_c);
+
+    if (max_c < 256 - max_comb)
+      max_c += max_comb;
+    else
+      max_c = 255;
+
+    if (min_c > max_comb)
+      min_c -= max_comb;
+    else
+      min_c = 0;
+
+    out_l = CLAMP (best_l, min_l, max_l);
+    out_c = CLAMP (best_c, min_c, max_c);
+
+    /* Do motion compensation for luma, i.e. how much
+     * the weave pixel differs */
+    mov_l = ABS (l2_l - lp2_l);
+    if (mov_l > motion_threshold)
+      mov_l -= motion_threshold;
+    else
+      mov_l = 0;
+
+    mov_l = mov_l * motion_sense;
+    if (mov_l > 256)
+      mov_l = 256;
+
+    /* Weighted sum on clipped weave pixel and average */
+    out_l = (out_l * (256 - mov_l) + avg_sc_l * mov_l) / 256;
+
+    Dest[0] = out_l;
+    Dest[1] = out_c;
+
+    Dest += 2;
+    L1 += 2;
+    L2 += 2;
+    L3 += 2;
+    L2P += 2;
+  }
+}
+
+#ifdef BUILD_X86_ASM
+
+#define IS_MMXEXT
+#define SIMD_TYPE MMXEXT
+#define FUNCT_NAME greedyDScaler_MMXEXT
+#include "greedyh.asm"
+#undef SIMD_TYPE
+#undef IS_MMXEXT
+#undef FUNCT_NAME
+
+#define IS_3DNOW
+#define SIMD_TYPE 3DNOW
+#define FUNCT_NAME greedyDScaler_3DNOW
+#include "greedyh.asm"
+#undef SIMD_TYPE
+#undef IS_3DNOW
+#undef FUNCT_NAME
+
+#define IS_MMX
+#define SIMD_TYPE MMX
+#define FUNCT_NAME greedyDScaler_MMX
+#include "greedyh.asm"
+#undef SIMD_TYPE
+#undef IS_MMX
+#undef FUNCT_NAME
+
+#endif
+
+static void
+deinterlace_frame_di_greedyh (GstDeinterlaceMethod * d_method,
+    GstDeinterlace * object, GstBuffer * outbuf)
+{
+  GstDeinterlaceMethodGreedyH *self =
+      GST_DEINTERLACE_METHOD_GREEDY_H (d_method);
+  GstDeinterlaceMethodGreedyHClass *klass =
+      GST_DEINTERLACE_METHOD_GREEDY_H_GET_CLASS (self);
+  int InfoIsOdd = 0;
+  int Line;
+  unsigned int Pitch = object->field_stride;
+
+  unsigned char *L1;            // ptr to Line1, of 3
+  unsigned char *L2;            // ptr to Line2, the weave line
+  unsigned char *L3;            // ptr to Line3
+
+  unsigned char *L2P;           // ptr to prev Line2
+  unsigned char *Dest = GST_BUFFER_DATA (outbuf);
+
+  // copy first even line no matter what, and the first odd line if we're
+  // processing an EVEN field. (note diff from other deint rtns.)
+
+  if (object->field_history[object->history_count - 1].flags ==
+      PICTURE_INTERLACED_BOTTOM) {
+    InfoIsOdd = 1;
+
+    L1 = GST_BUFFER_DATA (object->field_history[object->history_count - 2].buf);
+    L2 = GST_BUFFER_DATA (object->field_history[object->history_count - 1].buf);
+    L3 = L1 + Pitch;
+    L2P =
+        GST_BUFFER_DATA (object->field_history[object->history_count - 3].buf);
+
+    // copy first even line
+    oil_memcpy (Dest, L1, object->row_stride);
+    Dest += object->row_stride;
+  } else {
+    InfoIsOdd = 0;
+    L1 = GST_BUFFER_DATA (object->field_history[object->history_count - 2].buf);
+    L2 = GST_BUFFER_DATA (object->field_history[object->history_count -
+            1].buf) + Pitch;
+    L3 = L1 + Pitch;
+    L2P =
+        GST_BUFFER_DATA (object->field_history[object->history_count - 3].buf) +
+        Pitch;
+
+    // copy first even line
+    oil_memcpy (Dest, GST_BUFFER_DATA (object->field_history[0].buf),
+        object->row_stride);
+    Dest += object->row_stride;
+    // then first odd line
+    oil_memcpy (Dest, L1, object->row_stride);
+    Dest += object->row_stride;
+  }
+
+  for (Line = 0; Line < (object->field_height - 1); ++Line) {
+    klass->scanline (self, L1, L2, L3, L2P, Dest, object->row_stride);
+    Dest += object->row_stride;
+    oil_memcpy (Dest, L3, object->row_stride);
+    Dest += object->row_stride;
+
+    L1 += Pitch;
+    L2 += Pitch;
+    L3 += Pitch;
+    L2P += Pitch;
+  }
+
+  if (InfoIsOdd) {
+    oil_memcpy (Dest, L2, object->row_stride);
+  }
+}
+
+G_DEFINE_TYPE (GstDeinterlaceMethodGreedyH, gst_deinterlace_method_greedy_h,
+    GST_TYPE_DEINTERLACE_METHOD);
+
+enum
+{
+  ARG_0,
+  ARG_MAX_COMB,
+  ARG_MOTION_THRESHOLD,
+  ARG_MOTION_SENSE
+};
+
+static void
+gst_deinterlace_method_greedy_h_set_property (GObject * object, guint prop_id,
+    const GValue * value, GParamSpec * pspec)
+{
+  GstDeinterlaceMethodGreedyH *self = GST_DEINTERLACE_METHOD_GREEDY_H (object);
+
+  switch (prop_id) {
+    case ARG_MAX_COMB:
+      self->max_comb = g_value_get_uint (value);
+      break;
+    case ARG_MOTION_THRESHOLD:
+      self->motion_threshold = g_value_get_uint (value);
+      break;
+    case ARG_MOTION_SENSE:
+      self->motion_sense = g_value_get_uint (value);
+      break;
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+  }
+}
+
+static void
+gst_deinterlace_method_greedy_h_get_property (GObject * object, guint prop_id,
+    GValue * value, GParamSpec * pspec)
+{
+  GstDeinterlaceMethodGreedyH *self = GST_DEINTERLACE_METHOD_GREEDY_H (object);
+
+  switch (prop_id) {
+    case ARG_MAX_COMB:
+      g_value_set_uint (value, self->max_comb);
+      break;
+    case ARG_MOTION_THRESHOLD:
+      g_value_set_uint (value, self->motion_threshold);
+      break;
+    case ARG_MOTION_SENSE:
+      g_value_set_uint (value, self->motion_sense);
+      break;
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+  }
+}
+
+static void
+gst_deinterlace_method_greedy_h_class_init (GstDeinterlaceMethodGreedyHClass *
+    klass)
+{
+  GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+  GObjectClass *gobject_class = (GObjectClass *) klass;
+#ifdef BUILD_X86_ASM
+  guint cpu_flags = oil_cpu_get_flags ();
+#endif
+
+  gobject_class->set_property = gst_deinterlace_method_greedy_h_set_property;
+  gobject_class->get_property = gst_deinterlace_method_greedy_h_get_property;
+
+  g_object_class_install_property (gobject_class, ARG_MAX_COMB,
+      g_param_spec_uint ("max-comb",
+          "Max comb",
+          "Max Comb", 0, 255, 5, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)
+      );
+
+  g_object_class_install_property (gobject_class, ARG_MOTION_THRESHOLD,
+      g_param_spec_uint ("motion-threshold",
+          "Motion Threshold",
+          "Motion Threshold",
+          0, 255, 25, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)
+      );
+
+  g_object_class_install_property (gobject_class, ARG_MOTION_SENSE,
+      g_param_spec_uint ("motion-sense",
+          "Motion Sense",
+          "Motion Sense",
+          0, 255, 30, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)
+      );
+
+  dim_class->fields_required = 4;
+  dim_class->deinterlace_frame = deinterlace_frame_di_greedyh;
+  dim_class->name = "Motion Adaptive: Advanced Detection";
+  dim_class->nick = "greedyh";
+  dim_class->latency = 1;
+
+#ifdef BUILD_X86_ASM
+  if (cpu_flags & OIL_IMPL_FLAG_MMXEXT) {
+    klass->scanline = greedyDScaler_MMXEXT;
+  } else if (cpu_flags & OIL_IMPL_FLAG_3DNOW) {
+    klass->scanline = greedyDScaler_3DNOW;
+  } else if (cpu_flags & OIL_IMPL_FLAG_MMX) {
+    klass->scanline = greedyDScaler_MMX;
+  } else {
+    klass->scanline = greedyDScaler_C;
+  }
+#else
+  klass->scanline = greedyDScaler_C;
+#endif
+}
+
+static void
+gst_deinterlace_method_greedy_h_init (GstDeinterlaceMethodGreedyH * self)
+{
+  self->max_comb = 5;
+  self->motion_threshold = 25;
+  self->motion_sense = 30;
+}
diff --git a/gst/deinterlace/tvtime/greedyhmacros.h b/gst/deinterlace/tvtime/greedyhmacros.h
new file mode 100644
index 00000000..0386c28e
--- /dev/null
+++ b/gst/deinterlace/tvtime/greedyhmacros.h
@@ -0,0 +1,75 @@
+/////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2001 Tom Barry.  All rights reserved.
+/////////////////////////////////////////////////////////////////////////////
+//
+//	This file is subject to the terms of the GNU General Public License as
+//	published by the Free Software Foundation.  A copy of this license is
+//	included with this software distribution in the file COPYING.  If you
+//	do not have a copy, you may obtain a copy by writing to the Free
+//	Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+//
+//	This software is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details
+//
+/////////////////////////////////////////////////////////////////////////////
+
+// Define a few macros for CPU dependent instructions. 
+// I suspect I don't really understand how the C macro preprocessor works but
+// this seems to get the job done.          // TRB 7/01
+
+// BEFORE USING THESE YOU MUST SET:
+
+// #define SIMD_TYPE MMXEXT            (or MMX or 3DNOW)
+
+// some macros for pavgb instruction
+//      V_PAVGB(mmr1, mmr2, mmr work register, smask) mmr2 may = mmrw if you can trash it
+
+#define V_PAVGB_MMX(mmr1, mmr2, mmrw, smask) \
+	"movq    "mmr2",  "mmrw"\n\t"            \
+	"pand    "smask", "mmrw"\n\t"            \
+	"psrlw   $1,      "mmrw"\n\t"            \
+	"pand    "smask", "mmr1"\n\t"            \
+	"psrlw   $1,      "mmr1"\n\t"            \
+	"paddusb "mmrw",  "mmr1"\n\t"
+#define V_PAVGB_MMXEXT(mmr1, mmr2, mmrw, smask)      "pavgb   "mmr2", "mmr1"\n\t"
+#define V_PAVGB_3DNOW(mmr1, mmr2, mmrw, smask)    "pavgusb "mmr2", "mmr1"\n\t"
+#define V_PAVGB(mmr1, mmr2, mmrw, smask)          V_PAVGB2(mmr1, mmr2, mmrw, smask, SIMD_TYPE) 
+#define V_PAVGB2(mmr1, mmr2, mmrw, smask, simd_type) V_PAVGB3(mmr1, mmr2, mmrw, smask, simd_type) 
+#define V_PAVGB3(mmr1, mmr2, mmrw, smask, simd_type) V_PAVGB_##simd_type(mmr1, mmr2, mmrw, smask) 
+
+// some macros for pmaxub instruction
+#define V_PMAXUB_MMX(mmr1, mmr2) \
+    "psubusb "mmr2", "mmr1"\n\t" \
+    "paddusb "mmr2", "mmr1"\n\t"
+#define V_PMAXUB_MMXEXT(mmr1, mmr2)      "pmaxub "mmr2", "mmr1"\n\t"
+#define V_PMAXUB_3DNOW(mmr1, mmr2)    V_PMAXUB_MMX(mmr1, mmr2)  // use MMX version
+#define V_PMAXUB(mmr1, mmr2)          V_PMAXUB2(mmr1, mmr2, SIMD_TYPE) 
+#define V_PMAXUB2(mmr1, mmr2, simd_type) V_PMAXUB3(mmr1, mmr2, simd_type) 
+#define V_PMAXUB3(mmr1, mmr2, simd_type) V_PMAXUB_##simd_type(mmr1, mmr2) 
+
+// some macros for pminub instruction
+//      V_PMINUB(mmr1, mmr2, mmr work register)     mmr2 may NOT = mmrw
+#define V_PMINUB_MMX(mmr1, mmr2, mmrw) \
+    "pcmpeqb "mmrw", "mmrw"\n\t"       \
+    "psubusb "mmr2", "mmrw"\n\t"       \
+    "paddusb "mmrw", "mmr1"\n\t"       \
+    "psubusb "mmrw", "mmr1"\n\t"
+#define V_PMINUB_MMXEXT(mmr1, mmr2, mmrw)      "pminub "mmr2", "mmr1"\n\t"
+#define V_PMINUB_3DNOW(mmr1, mmr2, mmrw)    V_PMINUB_MMX(mmr1, mmr2, mmrw)  // use MMX version
+#define V_PMINUB(mmr1, mmr2, mmrw)          V_PMINUB2(mmr1, mmr2, mmrw, SIMD_TYPE) 
+#define V_PMINUB2(mmr1, mmr2, mmrw, simd_type) V_PMINUB3(mmr1, mmr2, mmrw, simd_type) 
+#define V_PMINUB3(mmr1, mmr2, mmrw, simd_type) V_PMINUB_##simd_type(mmr1, mmr2, mmrw) 
+
+// some macros for movntq instruction
+//      V_MOVNTQ(mmr1, mmr2) 
+#define V_MOVNTQ_MMX(mmr1, mmr2)      "movq   "mmr2", "mmr1"\n\t"
+#define V_MOVNTQ_3DNOW(mmr1, mmr2)    "movq   "mmr2", "mmr1"\n\t"
+#define V_MOVNTQ_MMXEXT(mmr1, mmr2)      "movntq "mmr2", "mmr1"\n\t"
+#define V_MOVNTQ(mmr1, mmr2)          V_MOVNTQ2(mmr1, mmr2, SIMD_TYPE) 
+#define V_MOVNTQ2(mmr1, mmr2, simd_type) V_MOVNTQ3(mmr1, mmr2, simd_type) 
+#define V_MOVNTQ3(mmr1, mmr2, simd_type) V_MOVNTQ_##simd_type(mmr1, mmr2)
+
+// end of macros
+
diff --git a/gst/deinterlace/tvtime/linear.c b/gst/deinterlace/tvtime/linear.c
new file mode 100644
index 00000000..8a13d8a1
--- /dev/null
+++ b/gst/deinterlace/tvtime/linear.c
@@ -0,0 +1,214 @@
+/**
+ * Copyright (C) 2002 Billy Biggs <vektor@dumbterm.net>.
+ * Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "_stdint.h"
+#include "gstdeinterlace.h"
+#include <string.h>
+
+#define GST_TYPE_DEINTERLACE_METHOD_LINEAR	(gst_deinterlace_method_linear_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_LINEAR(obj)		(G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_LINEAR))
+#define GST_IS_DEINTERLACE_METHOD_LINEAR_CLASS(klass)	(G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_LINEAR))
+#define GST_DEINTERLACE_METHOD_LINEAR_GET_CLASS(obj)	(G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_LINEAR, GstDeinterlaceMethodLinearClass))
+#define GST_DEINTERLACE_METHOD_LINEAR(obj)		(G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_LINEAR, GstDeinterlaceMethodLinear))
+#define GST_DEINTERLACE_METHOD_LINEAR_CLASS(klass)	(G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_LINEAR, GstDeinterlaceMethodLinearClass))
+#define GST_DEINTERLACE_METHOD_LINEAR_CAST(obj)	((GstDeinterlaceMethodLinear*)(obj))
+
+GType gst_deinterlace_method_linear_get_type (void);
+
+typedef GstDeinterlaceSimpleMethod GstDeinterlaceMethodLinear;
+
+typedef GstDeinterlaceSimpleMethodClass GstDeinterlaceMethodLinearClass;
+
+static void
+deinterlace_scanline_linear_c (GstDeinterlaceMethod * self,
+    GstDeinterlace * parent, guint8 * out,
+    GstDeinterlaceScanlineData * scanlines, gint width)
+{
+  gint i;
+
+  width *= 2;
+  for (i = 0; i < width; i++)
+    out[i] = (scanlines->t0[i] + scanlines->b0[i]) / 2;
+}
+
+#ifdef BUILD_X86_ASM
+#include "mmx.h"
+static void
+deinterlace_scanline_linear_mmx (GstDeinterlaceMethod * self,
+    GstDeinterlace * parent, guint8 * out,
+    GstDeinterlaceScanlineData * scanlines, gint width)
+{
+  const mmx_t shiftmask = { 0xfefffefffefffeffULL };    /* To avoid shifting chroma to luma. */
+  int i;
+  guint8 *bot = scanlines->b0, *top = scanlines->t0;
+
+  for (i = width / 16; i; --i) {
+    movq_m2r (*bot, mm0);
+    movq_m2r (*top, mm1);
+    movq_m2r (*(bot + 8), mm2);
+    movq_m2r (*(top + 8), mm3);
+    movq_m2r (*(bot + 16), mm4);
+    movq_m2r (*(top + 16), mm5);
+    movq_m2r (*(bot + 24), mm6);
+    movq_m2r (*(top + 24), mm7);
+    pand_m2r (shiftmask, mm0);
+    pand_m2r (shiftmask, mm1);
+    pand_m2r (shiftmask, mm2);
+    pand_m2r (shiftmask, mm3);
+    pand_m2r (shiftmask, mm4);
+    pand_m2r (shiftmask, mm5);
+    pand_m2r (shiftmask, mm6);
+    pand_m2r (shiftmask, mm7);
+    psrlw_i2r (1, mm0);
+    psrlw_i2r (1, mm1);
+    psrlw_i2r (1, mm2);
+    psrlw_i2r (1, mm3);
+    psrlw_i2r (1, mm4);
+    psrlw_i2r (1, mm5);
+    psrlw_i2r (1, mm6);
+    psrlw_i2r (1, mm7);
+    paddb_r2r (mm1, mm0);
+    paddb_r2r (mm3, mm2);
+    paddb_r2r (mm5, mm4);
+    paddb_r2r (mm7, mm6);
+    movq_r2m (mm0, *out);
+    movq_r2m (mm2, *(out + 8));
+    movq_r2m (mm4, *(out + 16));
+    movq_r2m (mm6, *(out + 24));
+    out += 32;
+    top += 32;
+    bot += 32;
+  }
+  width = (width & 0xf);
+
+  for (i = width / 4; i; --i) {
+    movq_m2r (*bot, mm0);
+    movq_m2r (*top, mm1);
+    pand_m2r (shiftmask, mm0);
+    pand_m2r (shiftmask, mm1);
+    psrlw_i2r (1, mm0);
+    psrlw_i2r (1, mm1);
+    paddb_r2r (mm1, mm0);
+    movq_r2m (mm0, *out);
+    out += 8;
+    top += 8;
+    bot += 8;
+  }
+  width = width & 0x7;
+
+  /* Handle last few pixels. */
+  for (i = width * 2; i; --i) {
+    *out++ = ((*top++) + (*bot++)) >> 1;
+  }
+
+  emms ();
+}
+
+#include "sse.h"
+static void
+deinterlace_scanline_linear_mmxext (GstDeinterlaceMethod * self,
+    GstDeinterlace * parent, guint8 * out,
+    GstDeinterlaceScanlineData * scanlines, gint width)
+{
+  gint i;
+  guint8 *bot = scanlines->b0, *top = scanlines->t0;
+
+  for (i = width / 16; i; --i) {
+    movq_m2r (*bot, mm0);
+    movq_m2r (*top, mm1);
+    movq_m2r (*(bot + 8), mm2);
+    movq_m2r (*(top + 8), mm3);
+    movq_m2r (*(bot + 16), mm4);
+    movq_m2r (*(top + 16), mm5);
+    movq_m2r (*(bot + 24), mm6);
+    movq_m2r (*(top + 24), mm7);
+    pavgb_r2r (mm1, mm0);
+    pavgb_r2r (mm3, mm2);
+    pavgb_r2r (mm5, mm4);
+    pavgb_r2r (mm7, mm6);
+    movntq_r2m (mm0, *out);
+    movntq_r2m (mm2, *(out + 8));
+    movntq_r2m (mm4, *(out + 16));
+    movntq_r2m (mm6, *(out + 24));
+    out += 32;
+    top += 32;
+    bot += 32;
+  }
+  width = (width & 0xf);
+
+  for (i = width / 4; i; --i) {
+    movq_m2r (*bot, mm0);
+    movq_m2r (*top, mm1);
+    pavgb_r2r (mm1, mm0);
+    movntq_r2m (mm0, *out);
+    out += 8;
+    top += 8;
+    bot += 8;
+  }
+  width = width & 0x7;
+
+  /* Handle last few pixels. */
+  for (i = width * 2; i; --i) {
+    *out++ = ((*top++) + (*bot++)) >> 1;
+  }
+
+  emms ();
+}
+
+#endif
+
+G_DEFINE_TYPE (GstDeinterlaceMethodLinear, gst_deinterlace_method_linear,
+    GST_TYPE_DEINTERLACE_SIMPLE_METHOD);
+
+static void
+gst_deinterlace_method_linear_class_init (GstDeinterlaceMethodLinearClass *
+    klass)
+{
+  GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+  GstDeinterlaceSimpleMethodClass *dism_class =
+      (GstDeinterlaceSimpleMethodClass *) klass;
+#ifdef BUILD_X86_ASM
+  guint cpu_flags = oil_cpu_get_flags ();
+#endif
+
+  dim_class->fields_required = 1;
+  dim_class->name = "Television: Full resolution";
+  dim_class->nick = "linear";
+  dim_class->latency = 0;
+
+  dism_class->interpolate_scanline = deinterlace_scanline_linear_c;
+
+#ifdef BUILD_X86_ASM
+  if (cpu_flags & OIL_IMPL_FLAG_MMXEXT) {
+    dism_class->interpolate_scanline = deinterlace_scanline_linear_mmxext;
+  } else if (cpu_flags & OIL_IMPL_FLAG_MMXEXT) {
+    dism_class->interpolate_scanline = deinterlace_scanline_linear_mmx;
+  }
+#endif
+}
+
+static void
+gst_deinterlace_method_linear_init (GstDeinterlaceMethodLinear * self)
+{
+}
diff --git a/gst/deinterlace/tvtime/linearblend.c b/gst/deinterlace/tvtime/linearblend.c
new file mode 100644
index 00000000..5ecffd6e
--- /dev/null
+++ b/gst/deinterlace/tvtime/linearblend.c
@@ -0,0 +1,231 @@
+/**
+ * Linear blend deinterlacing plugin.  The idea for this algorithm came
+ * from the linear blend deinterlacer which originated in the mplayer
+ * sources.
+ *
+ * Copyright (C) 2002 Billy Biggs <vektor@dumbterm.net>.
+ * Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "_stdint.h"
+#include "gstdeinterlace.h"
+#include <string.h>
+
+#define GST_TYPE_DEINTERLACE_METHOD_LINEAR_BLEND	(gst_deinterlace_method_linear_blend_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_LINEAR_BLEND(obj)		(G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_LINEAR_BLEND))
+#define GST_IS_DEINTERLACE_METHOD_LINEAR_BLEND_CLASS(klass)	(G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_LINEAR_BLEND))
+#define GST_DEINTERLACE_METHOD_LINEAR_BLEND_GET_CLASS(obj)	(G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_LINEAR_BLEND, GstDeinterlaceMethodLinearBlendClass))
+#define GST_DEINTERLACE_METHOD_LINEAR_BLEND(obj)		(G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_LINEAR_BLEND, GstDeinterlaceMethodLinearBlend))
+#define GST_DEINTERLACE_METHOD_LINEAR_BLEND_CLASS(klass)	(G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_LINEAR_BLEND, GstDeinterlaceMethodLinearBlendClass))
+#define GST_DEINTERLACE_METHOD_LINEAR_BLEND_CAST(obj)	((GstDeinterlaceMethodLinearBlend*)(obj))
+
+GType gst_deinterlace_method_linear_blend_get_type (void);
+
+typedef GstDeinterlaceSimpleMethod GstDeinterlaceMethodLinearBlend;
+
+typedef GstDeinterlaceSimpleMethodClass GstDeinterlaceMethodLinearBlendClass;
+
+
+static inline void
+deinterlace_scanline_linear_blend_c (GstDeinterlaceMethod * self,
+    GstDeinterlace * parent, guint8 * out,
+    GstDeinterlaceScanlineData * scanlines, gint width)
+{
+  guint8 *t0 = scanlines->t0;
+  guint8 *b0 = scanlines->b0;
+  guint8 *m1 = scanlines->m1;
+
+  width *= 2;
+
+  while (width--) {
+    *out++ = (*t0++ + *b0++ + (*m1++ << 1)) >> 2;
+  }
+}
+
+static inline void
+deinterlace_scanline_linear_blend2_c (GstDeinterlaceMethod * self,
+    GstDeinterlace * parent, guint8 * out,
+    GstDeinterlaceScanlineData * scanlines, gint width)
+{
+  guint8 *m0 = scanlines->m0;
+  guint8 *t1 = scanlines->t1;
+  guint8 *b1 = scanlines->b1;
+
+  width *= 2;
+  while (width--) {
+    *out++ = (*t1++ + *b1++ + (*m0++ << 1)) >> 2;
+  }
+}
+
+#ifdef BUILD_X86_ASM
+#include "mmx.h"
+static inline void
+deinterlace_scanline_linear_blend_mmx (GstDeinterlaceMethod * self,
+    GstDeinterlace * parent, guint8 * out,
+    GstDeinterlaceScanlineData * scanlines, gint width)
+{
+  guint8 *t0 = scanlines->t0;
+  guint8 *b0 = scanlines->b0;
+  guint8 *m1 = scanlines->m1;
+  gint i;
+
+  // Get width in bytes.
+  width *= 2;
+  i = width / 8;
+  width -= i * 8;
+
+  pxor_r2r (mm7, mm7);
+  while (i--) {
+    movd_m2r (*t0, mm0);
+    movd_m2r (*b0, mm1);
+    movd_m2r (*m1, mm2);
+
+    movd_m2r (*(t0 + 4), mm3);
+    movd_m2r (*(b0 + 4), mm4);
+    movd_m2r (*(m1 + 4), mm5);
+
+    punpcklbw_r2r (mm7, mm0);
+    punpcklbw_r2r (mm7, mm1);
+    punpcklbw_r2r (mm7, mm2);
+
+    punpcklbw_r2r (mm7, mm3);
+    punpcklbw_r2r (mm7, mm4);
+    punpcklbw_r2r (mm7, mm5);
+
+    psllw_i2r (1, mm2);
+    psllw_i2r (1, mm5);
+    paddw_r2r (mm0, mm2);
+    paddw_r2r (mm3, mm5);
+    paddw_r2r (mm1, mm2);
+    paddw_r2r (mm4, mm5);
+    psrlw_i2r (2, mm2);
+    psrlw_i2r (2, mm5);
+    packuswb_r2r (mm2, mm2);
+    packuswb_r2r (mm5, mm5);
+
+    movd_r2m (mm2, *out);
+    movd_r2m (mm5, *(out + 4));
+    out += 8;
+    t0 += 8;
+    b0 += 8;
+    m1 += 8;
+  }
+  while (width--) {
+    *out++ = (*t0++ + *b0++ + (*m1++ << 1)) >> 2;
+  }
+  emms ();
+}
+
+static inline void
+deinterlace_scanline_linear_blend2_mmx (GstDeinterlaceMethod * self,
+    GstDeinterlace * parent, guint8 * out,
+    GstDeinterlaceScanlineData * scanlines, gint width)
+{
+  guint8 *m0 = scanlines->m0;
+  guint8 *t1 = scanlines->t1;
+  guint8 *b1 = scanlines->b1;
+  gint i;
+
+  // Get width in bytes.
+  width *= 2;
+  i = width / 8;
+  width -= i * 8;
+
+  pxor_r2r (mm7, mm7);
+  while (i--) {
+    movd_m2r (*t1, mm0);
+    movd_m2r (*b1, mm1);
+    movd_m2r (*m0, mm2);
+
+    movd_m2r (*(t1 + 4), mm3);
+    movd_m2r (*(b1 + 4), mm4);
+    movd_m2r (*(m0 + 4), mm5);
+
+    punpcklbw_r2r (mm7, mm0);
+    punpcklbw_r2r (mm7, mm1);
+    punpcklbw_r2r (mm7, mm2);
+
+    punpcklbw_r2r (mm7, mm3);
+    punpcklbw_r2r (mm7, mm4);
+    punpcklbw_r2r (mm7, mm5);
+
+    psllw_i2r (1, mm2);
+    psllw_i2r (1, mm5);
+    paddw_r2r (mm0, mm2);
+    paddw_r2r (mm3, mm5);
+    paddw_r2r (mm1, mm2);
+    paddw_r2r (mm4, mm5);
+    psrlw_i2r (2, mm2);
+    psrlw_i2r (2, mm5);
+    packuswb_r2r (mm2, mm2);
+    packuswb_r2r (mm5, mm5);
+
+    movd_r2m (mm2, *out);
+    movd_r2m (mm5, *(out + 4));
+    out += 8;
+    t1 += 8;
+    b1 += 8;
+    m0 += 8;
+  }
+  while (width--) {
+    *out++ = (*t1++ + *b1++ + (*m0++ << 1)) >> 2;
+  }
+  emms ();
+}
+
+#endif
+
+G_DEFINE_TYPE (GstDeinterlaceMethodLinearBlend,
+    gst_deinterlace_method_linear_blend, GST_TYPE_DEINTERLACE_SIMPLE_METHOD);
+
+static void
+    gst_deinterlace_method_linear_blend_class_init
+    (GstDeinterlaceMethodLinearBlendClass * klass)
+{
+  GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+  GstDeinterlaceSimpleMethodClass *dism_class =
+      (GstDeinterlaceSimpleMethodClass *) klass;
+#ifdef BUILD_X86_ASM
+  guint cpu_flags = oil_cpu_get_flags ();
+#endif
+
+  dim_class->fields_required = 2;
+  dim_class->name = "Blur: Temporal";
+  dim_class->nick = "linearblend";
+  dim_class->latency = 0;
+
+  dism_class->interpolate_scanline = deinterlace_scanline_linear_blend_c;
+  dism_class->copy_scanline = deinterlace_scanline_linear_blend2_c;
+
+#ifdef BUILD_X86_ASM
+  if (cpu_flags & OIL_IMPL_FLAG_MMX) {
+    dism_class->interpolate_scanline = deinterlace_scanline_linear_blend_mmx;
+    dism_class->copy_scanline = deinterlace_scanline_linear_blend2_mmx;
+  }
+#endif
+}
+
+static void
+gst_deinterlace_method_linear_blend_init (GstDeinterlaceMethodLinearBlend *
+    self)
+{
+}
diff --git a/gst/deinterlace/tvtime/mmx.h b/gst/deinterlace/tvtime/mmx.h
new file mode 100644
index 00000000..3627e61b
--- /dev/null
+++ b/gst/deinterlace/tvtime/mmx.h
@@ -0,0 +1,723 @@
+/*	mmx.h
+
+	MultiMedia eXtensions GCC interface library for IA32.
+
+	To use this library, simply include this header file
+	and compile with GCC.  You MUST have inlining enabled
+	in order for mmx_ok() to work; this can be done by
+	simply using -O on the GCC command line.
+
+	Compiling with -DMMX_TRACE will cause detailed trace
+	output to be sent to stderr for each mmx operation.
+	This adds lots of code, and obviously slows execution to
+	a crawl, but can be very useful for debugging.
+
+	THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
+	EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
+	LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+	AND FITNESS FOR ANY PARTICULAR PURPOSE.
+
+	1997-98 by H. Dietz and R. Fisher
+
+ History:
+	97-98*	R.Fisher	Early versions
+	980501	R.Fisher	Original Release
+	980611*	H.Dietz		Rewrite, correctly implementing inlines, and
+		R.Fisher	 including direct register accesses.
+	980616	R.Fisher	Release of 980611 as 980616.
+	980714	R.Fisher	Minor corrections to Makefile, etc.
+	980715	R.Fisher	mmx_ok() now prevents optimizer from using
+				 clobbered values.
+				mmx_ok() now checks if cpuid instruction is
+				 available before trying to use it.
+	980726*	R.Fisher	mm_support() searches for AMD 3DNow, Cyrix
+				 Extended MMX, and standard MMX.  It returns a
+				 value which is positive if any of these are
+				 supported, and can be masked with constants to
+				 see which.  mmx_ok() is now a call to this
+	980726*	R.Fisher	Added i2r support for shift functions
+	980919	R.Fisher	Fixed AMD extended feature recognition bug.
+	980921	R.Fisher	Added definition/check for _MMX_H.
+				Added "float s[2]" to mmx_t for use with
+				  3DNow and EMMX.  So same mmx_t can be used.
+	981013	R.Fisher	Fixed cpuid function 1 bug (looked at wrong reg)
+				Fixed psllq_i2r error in mmxtest.c
+
+	* Unreleased (internal or interim) versions
+
+ Notes:
+	It appears that the latest gas has the pand problem fixed, therefore
+	  I'll undefine BROKEN_PAND by default.
+	String compares may be quicker than the multiple test/jumps in vendor
+	  test sequence in mmx_ok(), but I'm not concerned with that right now.
+
+ Acknowledgments:
+	Jussi Laako for pointing out the errors ultimately found to be
+	  connected to the failure to notify the optimizer of clobbered values.
+	Roger Hardiman for reminding us that CPUID isn't everywhere, and that
+	  someone may actually try to use this on a machine without CPUID.
+	  Also for suggesting code for checking this.
+	Robert Dale for pointing out the AMD recognition bug.
+	Jimmy Mayfield and Carl Witty for pointing out the Intel recognition
+	  bug.
+	Carl Witty for pointing out the psllq_i2r test bug.
+*/
+
+#ifndef _MMX_H
+#define _MMX_H
+
+/*#define MMX_TRACE */
+
+/*	Warning:  at this writing, the version of GAS packaged
+	with most Linux distributions does not handle the
+	parallel AND operation mnemonic correctly.  If the
+	symbol BROKEN_PAND is defined, a slower alternative
+	coding will be used.  If execution of mmxtest results
+	in an illegal instruction fault, define this symbol.
+*/
+#undef	BROKEN_PAND
+
+
+/*	The type of an value that fits in an MMX register
+	(note that long long constant values MUST be suffixed
+	 by LL and unsigned long long values by ULL, lest
+	 they be truncated by the compiler)
+*/
+typedef	union {
+	long long		q;	/* Quadword (64-bit) value */
+	unsigned long long	uq;	/* Unsigned Quadword */
+	int			d[2];	/* 2 Doubleword (32-bit) values */
+	unsigned int		ud[2];	/* 2 Unsigned Doubleword */
+	short			w[4];	/* 4 Word (16-bit) values */
+	unsigned short		uw[4];	/* 4 Unsigned Word */
+	char			b[8];	/* 8 Byte (8-bit) values */
+	unsigned char		ub[8];	/* 8 Unsigned Byte */
+	float			s[2];	/* Single-precision (32-bit) value */
+} mmx_t;
+
+
+/*	Function to test if multimedia instructions are supported...
+*/
+inline extern int
+mm_support(void)
+{
+	/* Returns 1 if MMX instructions are supported,
+	   3 if Cyrix MMX and Extended MMX instructions are supported
+	   5 if AMD MMX and 3DNow! instructions are supported
+	   0 if hardware does not support any of these
+	*/
+	register int rval = 0;
+
+	__asm__ __volatile__ (
+		/* See if CPUID instruction is supported ... */
+		/* ... Get copies of EFLAGS into eax and ecx */
+		"pushf\n\t"
+		"popl %%eax\n\t"
+		"movl %%eax, %%ecx\n\t"
+
+		/* ... Toggle the ID bit in one copy and store */
+		/*     to the EFLAGS reg */
+		"xorl $0x200000, %%eax\n\t"
+		"push %%eax\n\t"
+		"popf\n\t"
+
+		/* ... Get the (hopefully modified) EFLAGS */
+		"pushf\n\t"
+		"popl %%eax\n\t"
+
+		/* ... Compare and test result */
+		"xorl %%eax, %%ecx\n\t"
+		"testl $0x200000, %%ecx\n\t"
+		"jz NotSupported1\n\t"		/* Nothing supported */
+
+
+		/* Get standard CPUID information, and
+		       go to a specific vendor section */
+		"movl $0, %%eax\n\t"
+		"cpuid\n\t"
+
+		/* Check for Intel */
+		"cmpl $0x756e6547, %%ebx\n\t"
+		"jne TryAMD\n\t"
+		"cmpl $0x49656e69, %%edx\n\t"
+		"jne TryAMD\n\t"
+		"cmpl $0x6c65746e, %%ecx\n"
+		"jne TryAMD\n\t"
+		"jmp Intel\n\t"
+
+		/* Check for AMD */
+		"\nTryAMD:\n\t"
+		"cmpl $0x68747541, %%ebx\n\t"
+		"jne TryCyrix\n\t"
+		"cmpl $0x69746e65, %%edx\n\t"
+		"jne TryCyrix\n\t"
+		"cmpl $0x444d4163, %%ecx\n"
+		"jne TryCyrix\n\t"
+		"jmp AMD\n\t"
+
+		/* Check for Cyrix */
+		"\nTryCyrix:\n\t"
+		"cmpl $0x69727943, %%ebx\n\t"
+		"jne NotSupported2\n\t"
+		"cmpl $0x736e4978, %%edx\n\t"
+		"jne NotSupported3\n\t"
+		"cmpl $0x64616574, %%ecx\n\t"
+		"jne NotSupported4\n\t"
+		/* Drop through to Cyrix... */
+
+
+		/* Cyrix Section */
+		/* See if extended CPUID is supported */
+		"movl $0x80000000, %%eax\n\t"
+		"cpuid\n\t"
+		"cmpl $0x80000000, %%eax\n\t"
+		"jl MMXtest\n\t"	/* Try standard CPUID instead */
+
+		/* Extended CPUID supported, so get extended features */
+		"movl $0x80000001, %%eax\n\t"
+		"cpuid\n\t"
+		"testl $0x00800000, %%eax\n\t"	/* Test for MMX */
+		"jz NotSupported5\n\t"		/* MMX not supported */
+		"testl $0x01000000, %%eax\n\t"	/* Test for Ext'd MMX */
+		"jnz EMMXSupported\n\t"
+		"movl $1, %0:\n\n\t"		/* MMX Supported */
+		"jmp Return\n\n"
+		"EMMXSupported:\n\t"
+		"movl $3, %0:\n\n\t"		/* EMMX and MMX Supported */
+		"jmp Return\n\t"
+
+
+		/* AMD Section */
+		"AMD:\n\t"
+
+		/* See if extended CPUID is supported */
+		"movl $0x80000000, %%eax\n\t"
+		"cpuid\n\t"
+		"cmpl $0x80000000, %%eax\n\t"
+		"jl MMXtest\n\t"	/* Try standard CPUID instead */
+
+		/* Extended CPUID supported, so get extended features */
+		"movl $0x80000001, %%eax\n\t"
+		"cpuid\n\t"
+		"testl $0x00800000, %%edx\n\t"	/* Test for MMX */
+		"jz NotSupported6\n\t"		/* MMX not supported */
+		"testl $0x80000000, %%edx\n\t"	/* Test for 3DNow! */
+		"jnz ThreeDNowSupported\n\t"
+		"movl $1, %0:\n\n\t"		/* MMX Supported */
+		"jmp Return\n\n"
+		"ThreeDNowSupported:\n\t"
+		"movl $5, %0:\n\n\t"		/* 3DNow! and MMX Supported */
+		"jmp Return\n\t"
+
+
+		/* Intel Section */
+		"Intel:\n\t"
+
+		/* Check for MMX */
+		"MMXtest:\n\t"
+		"movl $1, %%eax\n\t"
+		"cpuid\n\t"
+		"testl $0x00800000, %%edx\n\t"	/* Test for MMX */
+		"jz NotSupported7\n\t"		/* MMX Not supported */
+		"movl $1, %0:\n\n\t"		/* MMX Supported */
+		"jmp Return\n\t"
+
+		/* Nothing supported */
+		"\nNotSupported1:\n\t"
+		"#movl $101, %0:\n\n\t"
+		"\nNotSupported2:\n\t"
+		"#movl $102, %0:\n\n\t"
+		"\nNotSupported3:\n\t"
+		"#movl $103, %0:\n\n\t"
+		"\nNotSupported4:\n\t"
+		"#movl $104, %0:\n\n\t"
+		"\nNotSupported5:\n\t"
+		"#movl $105, %0:\n\n\t"
+		"\nNotSupported6:\n\t"
+		"#movl $106, %0:\n\n\t"
+		"\nNotSupported7:\n\t"
+		"#movl $107, %0:\n\n\t"
+		"movl $0, %0:\n\n\t"
+
+		"Return:\n\t"
+		: "=a" (rval)
+		: /* no input */
+		: "eax", "ebx", "ecx", "edx"
+	);
+
+	/* Return */
+	return(rval);
+}
+
+/*	Function to test if mmx instructions are supported...
+*/
+inline extern int
+mmx_ok(void)
+{
+	/* Returns 1 if MMX instructions are supported, 0 otherwise */
+	return ( mm_support() & 0x1 );
+}
+
+
+/*	Helper functions for the instruction macros that follow...
+	(note that memory-to-register, m2r, instructions are nearly
+	 as efficient as register-to-register, r2r, instructions;
+	 however, memory-to-memory instructions are really simulated
+	 as a convenience, and are only 1/3 as efficient)
+*/
+#ifdef	MMX_TRACE
+
+/*	Include the stuff for printing a trace to stderr...
+*/
+
+#include <stdio.h>
+
+#define	mmx_i2r(op, imm, reg) \
+	{ \
+		mmx_t mmx_trace; \
+		mmx_trace = (imm); \
+		fprintf(stderr, #op "_i2r(" #imm "=0x%016llx, ", mmx_trace.q); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ (#op " %0, %%" #reg \
+				      : /* nothing */ \
+				      : "X" (imm)); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#define	mmx_m2r(op, mem, reg) \
+	{ \
+		mmx_t mmx_trace; \
+		mmx_trace = (mem); \
+		fprintf(stderr, #op "_m2r(" #mem "=0x%016llx, ", mmx_trace.q); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ (#op " %0, %%" #reg \
+				      : /* nothing */ \
+				      : "X" (mem)); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#define	mmx_r2m(op, reg, mem) \
+	{ \
+		mmx_t mmx_trace; \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #op "_r2m(" #reg "=0x%016llx, ", mmx_trace.q); \
+		mmx_trace = (mem); \
+		fprintf(stderr, #mem "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ (#op " %%" #reg ", %0" \
+				      : "=X" (mem) \
+				      : /* nothing */ ); \
+		mmx_trace = (mem); \
+		fprintf(stderr, #mem "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#define	mmx_r2r(op, regs, regd) \
+	{ \
+		mmx_t mmx_trace; \
+		__asm__ __volatile__ ("movq %%" #regs ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #op "_r2r(" #regs "=0x%016llx, ", mmx_trace.q); \
+		__asm__ __volatile__ ("movq %%" #regd ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #regd "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ (#op " %" #regs ", %" #regd); \
+		__asm__ __volatile__ ("movq %%" #regd ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #regd "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#define	mmx_m2m(op, mems, memd) \
+	{ \
+		mmx_t mmx_trace; \
+		mmx_trace = (mems); \
+		fprintf(stderr, #op "_m2m(" #mems "=0x%016llx, ", mmx_trace.q); \
+		mmx_trace = (memd); \
+		fprintf(stderr, #memd "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
+				      #op " %1, %%mm0\n\t" \
+				      "movq %%mm0, %0" \
+				      : "=X" (memd) \
+				      : "X" (mems)); \
+		mmx_trace = (memd); \
+		fprintf(stderr, #memd "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#else
+
+/*	These macros are a lot simpler without the tracing...
+*/
+
+#define	mmx_i2r(op, imm, reg) \
+	__asm__ __volatile__ (#op " $" #imm ", %%" #reg \
+			      : /* nothing */ \
+			      : /* nothing */);
+
+#define	mmx_m2r(op, mem, reg) \
+	__asm__ __volatile__ (#op " %0, %%" #reg \
+			      : /* nothing */ \
+			      : "m" (mem))
+
+#define	mmx_r2m(op, reg, mem) \
+	__asm__ __volatile__ (#op " %%" #reg ", %0" \
+			      : "=m" (mem) \
+			      : /* nothing */ )
+
+#define	mmx_r2r(op, regs, regd) \
+	__asm__ __volatile__ (#op " %" #regs ", %" #regd)
+
+#define	mmx_m2m(op, mems, memd) \
+	__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
+			      #op " %1, %%mm0\n\t" \
+			      "movq %%mm0, %0" \
+			      : "=m" (memd) \
+			      : "m" (mems))
+
+#endif
+
+
+/*	1x64 MOVe Quadword
+	(this is both a load and a store...
+	 in fact, it is the only way to store)
+*/
+#define	movq_m2r(var, reg)	mmx_m2r(movq, var, reg)
+#define	movq_r2m(reg, var)	mmx_r2m(movq, reg, var)
+#define	movq_r2r(regs, regd)	mmx_r2r(movq, regs, regd)
+#define	movq(vars, vard) \
+	__asm__ __volatile__ ("movq %1, %%mm0\n\t" \
+			      "movq %%mm0, %0" \
+			      : "=X" (vard) \
+			      : "X" (vars))
+
+
+/*	1x32 MOVe Doubleword
+	(like movq, this is both load and store...
+	 but is most useful for moving things between
+	 mmx registers and ordinary registers)
+*/
+#define	movd_m2r(var, reg)	mmx_m2r(movd, var, reg)
+#define	movd_r2m(reg, var)	mmx_r2m(movd, reg, var)
+#define	movd_r2r(regs, regd)	mmx_r2r(movd, regs, regd)
+#define	movd(vars, vard) \
+	__asm__ __volatile__ ("movd %1, %%mm0\n\t" \
+			      "movd %%mm0, %0" \
+			      : "=X" (vard) \
+			      : "X" (vars))
+
+
+/*	2x32, 4x16, and 8x8 Parallel ADDs
+*/
+#define	paddd_m2r(var, reg)	mmx_m2r(paddd, var, reg)
+#define	paddd_r2r(regs, regd)	mmx_r2r(paddd, regs, regd)
+#define	paddd(vars, vard)	mmx_m2m(paddd, vars, vard)
+
+#define	paddw_m2r(var, reg)	mmx_m2r(paddw, var, reg)
+#define	paddw_r2r(regs, regd)	mmx_r2r(paddw, regs, regd)
+#define	paddw(vars, vard)	mmx_m2m(paddw, vars, vard)
+
+#define	paddb_m2r(var, reg)	mmx_m2r(paddb, var, reg)
+#define	paddb_r2r(regs, regd)	mmx_r2r(paddb, regs, regd)
+#define	paddb(vars, vard)	mmx_m2m(paddb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel ADDs using Saturation arithmetic
+*/
+#define	paddsw_m2r(var, reg)	mmx_m2r(paddsw, var, reg)
+#define	paddsw_r2r(regs, regd)	mmx_r2r(paddsw, regs, regd)
+#define	paddsw(vars, vard)	mmx_m2m(paddsw, vars, vard)
+
+#define	paddsb_m2r(var, reg)	mmx_m2r(paddsb, var, reg)
+#define	paddsb_r2r(regs, regd)	mmx_r2r(paddsb, regs, regd)
+#define	paddsb(vars, vard)	mmx_m2m(paddsb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic
+*/
+#define	paddusw_m2r(var, reg)	mmx_m2r(paddusw, var, reg)
+#define	paddusw_r2r(regs, regd)	mmx_r2r(paddusw, regs, regd)
+#define	paddusw(vars, vard)	mmx_m2m(paddusw, vars, vard)
+
+#define	paddusb_m2r(var, reg)	mmx_m2r(paddusb, var, reg)
+#define	paddusb_r2r(regs, regd)	mmx_r2r(paddusb, regs, regd)
+#define	paddusb(vars, vard)	mmx_m2m(paddusb, vars, vard)
+
+
+/*	2x32, 4x16, and 8x8 Parallel SUBs
+*/
+#define	psubd_m2r(var, reg)	mmx_m2r(psubd, var, reg)
+#define	psubd_r2r(regs, regd)	mmx_r2r(psubd, regs, regd)
+#define	psubd(vars, vard)	mmx_m2m(psubd, vars, vard)
+
+#define	psubw_m2r(var, reg)	mmx_m2r(psubw, var, reg)
+#define	psubw_r2r(regs, regd)	mmx_r2r(psubw, regs, regd)
+#define	psubw(vars, vard)	mmx_m2m(psubw, vars, vard)
+
+#define	psubb_m2r(var, reg)	mmx_m2r(psubb, var, reg)
+#define	psubb_r2r(regs, regd)	mmx_r2r(psubb, regs, regd)
+#define	psubb(vars, vard)	mmx_m2m(psubb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel SUBs using Saturation arithmetic
+*/
+#define	psubsw_m2r(var, reg)	mmx_m2r(psubsw, var, reg)
+#define	psubsw_r2r(regs, regd)	mmx_r2r(psubsw, regs, regd)
+#define	psubsw(vars, vard)	mmx_m2m(psubsw, vars, vard)
+
+#define	psubsb_m2r(var, reg)	mmx_m2r(psubsb, var, reg)
+#define	psubsb_r2r(regs, regd)	mmx_r2r(psubsb, regs, regd)
+#define	psubsb(vars, vard)	mmx_m2m(psubsb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic
+*/
+#define	psubusw_m2r(var, reg)	mmx_m2r(psubusw, var, reg)
+#define	psubusw_r2r(regs, regd)	mmx_r2r(psubusw, regs, regd)
+#define	psubusw(vars, vard)	mmx_m2m(psubusw, vars, vard)
+
+#define	psubusb_m2r(var, reg)	mmx_m2r(psubusb, var, reg)
+#define	psubusb_r2r(regs, regd)	mmx_r2r(psubusb, regs, regd)
+#define	psubusb(vars, vard)	mmx_m2m(psubusb, vars, vard)
+
+
+/*	4x16 Parallel MULs giving Low 4x16 portions of results
+*/
+#define	pmullw_m2r(var, reg)	mmx_m2r(pmullw, var, reg)
+#define	pmullw_r2r(regs, regd)	mmx_r2r(pmullw, regs, regd)
+#define	pmullw(vars, vard)	mmx_m2m(pmullw, vars, vard)
+
+
+/*	4x16 Parallel MULs giving High 4x16 portions of results
+*/
+#define	pmulhw_m2r(var, reg)	mmx_m2r(pmulhw, var, reg)
+#define	pmulhw_r2r(regs, regd)	mmx_r2r(pmulhw, regs, regd)
+#define	pmulhw(vars, vard)	mmx_m2m(pmulhw, vars, vard)
+
+
+/*	4x16->2x32 Parallel Mul-ADD
+	(muls like pmullw, then adds adjacent 16-bit fields
+	 in the multiply result to make the final 2x32 result)
+*/
+#define	pmaddwd_m2r(var, reg)	mmx_m2r(pmaddwd, var, reg)
+#define	pmaddwd_r2r(regs, regd)	mmx_r2r(pmaddwd, regs, regd)
+#define	pmaddwd(vars, vard)	mmx_m2m(pmaddwd, vars, vard)
+
+
+/*	1x64 bitwise AND
+*/
+#ifdef	BROKEN_PAND
+#define	pand_m2r(var, reg) \
+	{ \
+		mmx_m2r(pandn, (mmx_t) -1LL, reg); \
+		mmx_m2r(pandn, var, reg); \
+	}
+#define	pand_r2r(regs, regd) \
+	{ \
+		mmx_m2r(pandn, (mmx_t) -1LL, regd); \
+		mmx_r2r(pandn, regs, regd); \
+	}
+#define	pand(vars, vard) \
+	{ \
+		movq_m2r(vard, mm0); \
+		mmx_m2r(pandn, (mmx_t) -1LL, mm0); \
+		mmx_m2r(pandn, vars, mm0); \
+		movq_r2m(mm0, vard); \
+	}
+#else
+#define	pand_m2r(var, reg)	mmx_m2r(pand, var, reg)
+#define	pand_r2r(regs, regd)	mmx_r2r(pand, regs, regd)
+#define	pand(vars, vard)	mmx_m2m(pand, vars, vard)
+#endif
+
+
+/*	1x64 bitwise AND with Not the destination
+*/
+#define	pandn_m2r(var, reg)	mmx_m2r(pandn, var, reg)
+#define	pandn_r2r(regs, regd)	mmx_r2r(pandn, regs, regd)
+#define	pandn(vars, vard)	mmx_m2m(pandn, vars, vard)
+
+
+/*	1x64 bitwise OR
+*/
+#define	por_m2r(var, reg)	mmx_m2r(por, var, reg)
+#define	por_r2r(regs, regd)	mmx_r2r(por, regs, regd)
+#define	por(vars, vard)	mmx_m2m(por, vars, vard)
+
+
+/*	1x64 bitwise eXclusive OR
+*/
+#define	pxor_m2r(var, reg)	mmx_m2r(pxor, var, reg)
+#define	pxor_r2r(regs, regd)	mmx_r2r(pxor, regs, regd)
+#define	pxor(vars, vard)	mmx_m2m(pxor, vars, vard)
+
+
+/*	2x32, 4x16, and 8x8 Parallel CoMPare for EQuality
+	(resulting fields are either 0 or -1)
+*/
+#define	pcmpeqd_m2r(var, reg)	mmx_m2r(pcmpeqd, var, reg)
+#define	pcmpeqd_r2r(regs, regd)	mmx_r2r(pcmpeqd, regs, regd)
+#define	pcmpeqd(vars, vard)	mmx_m2m(pcmpeqd, vars, vard)
+
+#define	pcmpeqw_m2r(var, reg)	mmx_m2r(pcmpeqw, var, reg)
+#define	pcmpeqw_r2r(regs, regd)	mmx_r2r(pcmpeqw, regs, regd)
+#define	pcmpeqw(vars, vard)	mmx_m2m(pcmpeqw, vars, vard)
+
+#define	pcmpeqb_m2r(var, reg)	mmx_m2r(pcmpeqb, var, reg)
+#define	pcmpeqb_r2r(regs, regd)	mmx_r2r(pcmpeqb, regs, regd)
+#define	pcmpeqb(vars, vard)	mmx_m2m(pcmpeqb, vars, vard)
+
+
+/*	2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than
+	(resulting fields are either 0 or -1)
+*/
+#define	pcmpgtd_m2r(var, reg)	mmx_m2r(pcmpgtd, var, reg)
+#define	pcmpgtd_r2r(regs, regd)	mmx_r2r(pcmpgtd, regs, regd)
+#define	pcmpgtd(vars, vard)	mmx_m2m(pcmpgtd, vars, vard)
+
+#define	pcmpgtw_m2r(var, reg)	mmx_m2r(pcmpgtw, var, reg)
+#define	pcmpgtw_r2r(regs, regd)	mmx_r2r(pcmpgtw, regs, regd)
+#define	pcmpgtw(vars, vard)	mmx_m2m(pcmpgtw, vars, vard)
+
+#define	pcmpgtb_m2r(var, reg)	mmx_m2r(pcmpgtb, var, reg)
+#define	pcmpgtb_r2r(regs, regd)	mmx_r2r(pcmpgtb, regs, regd)
+#define	pcmpgtb(vars, vard)	mmx_m2m(pcmpgtb, vars, vard)
+
+
+/*	1x64, 2x32, and 4x16 Parallel Shift Left Logical
+*/
+#define	psllq_i2r(imm, reg)	mmx_i2r(psllq, imm, reg)
+#define	psllq_m2r(var, reg)	mmx_m2r(psllq, var, reg)
+#define	psllq_r2r(regs, regd)	mmx_r2r(psllq, regs, regd)
+#define	psllq(vars, vard)	mmx_m2m(psllq, vars, vard)
+
+#define	pslld_i2r(imm, reg)	mmx_i2r(pslld, imm, reg)
+#define	pslld_m2r(var, reg)	mmx_m2r(pslld, var, reg)
+#define	pslld_r2r(regs, regd)	mmx_r2r(pslld, regs, regd)
+#define	pslld(vars, vard)	mmx_m2m(pslld, vars, vard)
+
+#define	psllw_i2r(imm, reg)	mmx_i2r(psllw, imm, reg)
+#define	psllw_m2r(var, reg)	mmx_m2r(psllw, var, reg)
+#define	psllw_r2r(regs, regd)	mmx_r2r(psllw, regs, regd)
+#define	psllw(vars, vard)	mmx_m2m(psllw, vars, vard)
+
+
+/*	1x64, 2x32, and 4x16 Parallel Shift Right Logical
+*/
+#define	psrlq_i2r(imm, reg)	mmx_i2r(psrlq, imm, reg)
+#define	psrlq_m2r(var, reg)	mmx_m2r(psrlq, var, reg)
+#define	psrlq_r2r(regs, regd)	mmx_r2r(psrlq, regs, regd)
+#define	psrlq(vars, vard)	mmx_m2m(psrlq, vars, vard)
+
+#define	psrld_i2r(imm, reg)	mmx_i2r(psrld, imm, reg)
+#define	psrld_m2r(var, reg)	mmx_m2r(psrld, var, reg)
+#define	psrld_r2r(regs, regd)	mmx_r2r(psrld, regs, regd)
+#define	psrld(vars, vard)	mmx_m2m(psrld, vars, vard)
+
+#define	psrlw_i2r(imm, reg)	mmx_i2r(psrlw, imm, reg)
+#define	psrlw_m2r(var, reg)	mmx_m2r(psrlw, var, reg)
+#define	psrlw_r2r(regs, regd)	mmx_r2r(psrlw, regs, regd)
+#define	psrlw(vars, vard)	mmx_m2m(psrlw, vars, vard)
+
+
+/*	2x32 and 4x16 Parallel Shift Right Arithmetic
+*/
+#define	psrad_i2r(imm, reg)	mmx_i2r(psrad, imm, reg)
+#define	psrad_m2r(var, reg)	mmx_m2r(psrad, var, reg)
+#define	psrad_r2r(regs, regd)	mmx_r2r(psrad, regs, regd)
+#define	psrad(vars, vard)	mmx_m2m(psrad, vars, vard)
+
+#define	psraw_i2r(imm, reg)	mmx_i2r(psraw, imm, reg)
+#define	psraw_m2r(var, reg)	mmx_m2r(psraw, var, reg)
+#define	psraw_r2r(regs, regd)	mmx_r2r(psraw, regs, regd)
+#define	psraw(vars, vard)	mmx_m2m(psraw, vars, vard)
+
+
+/*	2x32->4x16 and 4x16->8x8 PACK and Signed Saturate
+	(packs source and dest fields into dest in that order)
+*/
+#define	packssdw_m2r(var, reg)	mmx_m2r(packssdw, var, reg)
+#define	packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd)
+#define	packssdw(vars, vard)	mmx_m2m(packssdw, vars, vard)
+
+#define	packsswb_m2r(var, reg)	mmx_m2r(packsswb, var, reg)
+#define	packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd)
+#define	packsswb(vars, vard)	mmx_m2m(packsswb, vars, vard)
+
+
+/*	4x16->8x8 PACK and Unsigned Saturate
+	(packs source and dest fields into dest in that order)
+*/
+#define	packuswb_m2r(var, reg)	mmx_m2r(packuswb, var, reg)
+#define	packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd)
+#define	packuswb(vars, vard)	mmx_m2m(packuswb, vars, vard)
+
+
+/*	2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low
+	(interleaves low half of dest with low half of source
+	 as padding in each result field)
+*/
+#define	punpckldq_m2r(var, reg)	mmx_m2r(punpckldq, var, reg)
+#define	punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd)
+#define	punpckldq(vars, vard)	mmx_m2m(punpckldq, vars, vard)
+
+#define	punpcklwd_m2r(var, reg)	mmx_m2r(punpcklwd, var, reg)
+#define	punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd)
+#define	punpcklwd(vars, vard)	mmx_m2m(punpcklwd, vars, vard)
+
+#define	punpcklbw_m2r(var, reg)	mmx_m2r(punpcklbw, var, reg)
+#define	punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd)
+#define	punpcklbw(vars, vard)	mmx_m2m(punpcklbw, vars, vard)
+
+
+/*	2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High
+	(interleaves high half of dest with high half of source
+	 as padding in each result field)
+*/
+#define	punpckhdq_m2r(var, reg)	mmx_m2r(punpckhdq, var, reg)
+#define	punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd)
+#define	punpckhdq(vars, vard)	mmx_m2m(punpckhdq, vars, vard)
+
+#define	punpckhwd_m2r(var, reg)	mmx_m2r(punpckhwd, var, reg)
+#define	punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd)
+#define	punpckhwd(vars, vard)	mmx_m2m(punpckhwd, vars, vard)
+
+#define	punpckhbw_m2r(var, reg)	mmx_m2r(punpckhbw, var, reg)
+#define	punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd)
+#define	punpckhbw(vars, vard)	mmx_m2m(punpckhbw, vars, vard)
+
+
+/*	Empty MMx State
+	(used to clean-up when going from mmx to float use
+	 of the registers that are shared by both; note that
+	 there is no float-to-mmx operation needed, because
+	 only the float tag word info is corruptible)
+*/
+#ifdef	MMX_TRACE
+
+#define	emms() \
+	{ \
+		fprintf(stderr, "emms()\n"); \
+		__asm__ __volatile__ ("emms"); \
+	}
+
+#else
+
+#define	emms()			__asm__ __volatile__ ("emms")
+
+#endif
+
+#endif
diff --git a/gst/deinterlace/tvtime/plugins.h b/gst/deinterlace/tvtime/plugins.h
new file mode 100644
index 00000000..8fb01af5
--- /dev/null
+++ b/gst/deinterlace/tvtime/plugins.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * GStreamer
+ * Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
+ * Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Relicensed for GStreamer from GPL to LGPL with permit from Billy Biggs.
+ * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
+ */
+
+#ifndef TVTIME_PLUGINS_H_INCLUDED
+#define TVTIME_PLUGINS_H_INCLUDED
+
+#define GST_TYPE_DEINTERLACE_TOMSMOCOMP (gst_deinterlace_method_tomsmocomp_get_type ())
+#define GST_TYPE_DEINTERLACE_GREEDY_H (gst_deinterlace_method_greedy_h_get_type ())
+#define GST_TYPE_DEINTERLACE_GREEDY_L (gst_deinterlace_method_greedy_l_get_type ())
+#define GST_TYPE_DEINTERLACE_VFIR (gst_deinterlace_method_vfir_get_type ())
+#define GST_TYPE_DEINTERLACE_LINEAR (gst_deinterlace_method_linear_get_type ())
+#define GST_TYPE_DEINTERLACE_LINEAR_BLEND (gst_deinterlace_method_linear_blend_get_type ())
+#define GST_TYPE_DEINTERLACE_SCALER_BOB (gst_deinterlace_method_scaler_bob_get_type ())
+#define GST_TYPE_DEINTERLACE_WEAVE (gst_deinterlace_method_weave_get_type ())
+#define GST_TYPE_DEINTERLACE_WEAVE_TFF (gst_deinterlace_method_weave_tff_get_type ())
+#define GST_TYPE_DEINTERLACE_WEAVE_BFF (gst_deinterlace_method_weave_bff_get_type ())
+
+GType gst_deinterlace_method_tomsmocomp_get_type (void);
+GType gst_deinterlace_method_greedy_h_get_type (void);
+GType gst_deinterlace_method_greedy_l_get_type (void);
+GType gst_deinterlace_method_vfir_get_type (void);
+
+GType gst_deinterlace_method_linear_get_type (void);
+GType gst_deinterlace_method_linear_blend_get_type (void);
+GType gst_deinterlace_method_scaler_bob_get_type (void);
+GType gst_deinterlace_method_weave_get_type (void);
+GType gst_deinterlace_method_weave_tff_get_type (void);
+GType gst_deinterlace_method_weave_bff_get_type (void);
+
+#endif /* TVTIME_PLUGINS_H_INCLUDED */
diff --git a/gst/deinterlace/tvtime/scalerbob.c b/gst/deinterlace/tvtime/scalerbob.c
new file mode 100644
index 00000000..a7bca169
--- /dev/null
+++ b/gst/deinterlace/tvtime/scalerbob.c
@@ -0,0 +1,74 @@
+/**
+ * Double lines
+ * Copyright (C) 2008 Sebastian Dröge <sebastian.droege@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "_stdint.h"
+#include "gstdeinterlace.h"
+#include <string.h>
+
+#define GST_TYPE_DEINTERLACE_METHOD_SCALER_BOB	(gst_deinterlace_method_scaler_bob_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_SCALER_BOB(obj)		(G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_SCALER_BOB))
+#define GST_IS_DEINTERLACE_METHOD_SCALER_BOB_CLASS(klass)	(G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_SCALER_BOB))
+#define GST_DEINTERLACE_METHOD_SCALER_BOB_GET_CLASS(obj)	(G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_SCALER_BOB, GstDeinterlaceMethodScalerBobClass))
+#define GST_DEINTERLACE_METHOD_SCALER_BOB(obj)		(G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_SCALER_BOB, GstDeinterlaceMethodScalerBob))
+#define GST_DEINTERLACE_METHOD_SCALER_BOB_CLASS(klass)	(G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_SCALER_BOB, GstDeinterlaceMethodScalerBobClass))
+#define GST_DEINTERLACE_METHOD_SCALER_BOB_CAST(obj)	((GstDeinterlaceMethodScalerBob*)(obj))
+
+GType gst_deinterlace_method_scaler_bob_get_type (void);
+
+typedef GstDeinterlaceSimpleMethod GstDeinterlaceMethodScalerBob;
+
+typedef GstDeinterlaceSimpleMethodClass GstDeinterlaceMethodScalerBobClass;
+
+
+static void
+deinterlace_scanline_scaler_bob (GstDeinterlaceMethod * self,
+    GstDeinterlace * parent, guint8 * out,
+    GstDeinterlaceScanlineData * scanlines, gint width)
+{
+  oil_memcpy (out, scanlines->t0, parent->row_stride);
+}
+
+G_DEFINE_TYPE (GstDeinterlaceMethodScalerBob, gst_deinterlace_method_scaler_bob,
+    GST_TYPE_DEINTERLACE_SIMPLE_METHOD);
+
+static void
+gst_deinterlace_method_scaler_bob_class_init (GstDeinterlaceMethodScalerBobClass
+    * klass)
+{
+  GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+  GstDeinterlaceSimpleMethodClass *dism_class =
+      (GstDeinterlaceSimpleMethodClass *) klass;
+
+  dim_class->fields_required = 1;
+  dim_class->name = "Double lines";
+  dim_class->nick = "scalerbob";
+  dim_class->latency = 0;
+
+  dism_class->interpolate_scanline = deinterlace_scanline_scaler_bob;
+}
+
+static void
+gst_deinterlace_method_scaler_bob_init (GstDeinterlaceMethodScalerBob * self)
+{
+}
diff --git a/gst/deinterlace/tvtime/sse.h b/gst/deinterlace/tvtime/sse.h
new file mode 100644
index 00000000..2e00ee0c
--- /dev/null
+++ b/gst/deinterlace/tvtime/sse.h
@@ -0,0 +1,992 @@
+/*	sse.h
+
+	Streaming SIMD Extenstions (a.k.a. Katmai New Instructions)
+	GCC interface library for IA32.
+
+	To use this library, simply include this header file
+	and compile with GCC.  You MUST have inlining enabled
+	in order for sse_ok() to work; this can be done by
+	simply using -O on the GCC command line.
+
+	Compiling with -DSSE_TRACE will cause detailed trace
+	output to be sent to stderr for each sse operation.
+	This adds lots of code, and obviously slows execution to
+	a crawl, but can be very useful for debugging.
+
+	THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
+	EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
+	LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+	AND FITNESS FOR ANY PARTICULAR PURPOSE.
+
+	1999 by R. Fisher
+	Based on libmmx by H. Dietz and R. Fisher
+
+ Notes:
+	This is still extremely alpha.
+	Because this library depends on an assembler which understands the
+	 SSE opcodes, you probably won't be able to use this yet.
+	For now, do not use TRACE versions.  These both make use
+	 of the MMX registers, not the SSE registers.  This will be resolved
+	 at a later date.
+ ToDo:
+	Rewrite TRACE macros
+	Major Debugging Work
+*/
+
+#ifndef _SSE_H
+#define _SSE_H
+
+
+
+/*	The type of an value that fits in an SSE register
+	(note that long long constant values MUST be suffixed
+	 by LL and unsigned long long values by ULL, lest
+	 they be truncated by the compiler)
+*/
+typedef	union {
+	float			sf[4];	/* Single-precision (32-bit) value */
+} __attribute__ ((aligned (16))) sse_t;	/* On a 16 byte (128-bit) boundary */
+
+
+#if 0
+/*	Function to test if multimedia instructions are supported...
+*/
+inline extern int
+mm_support(void)
+{
+	/* Returns 1 if MMX instructions are supported,
+	   3 if Cyrix MMX and Extended MMX instructions are supported
+	   5 if AMD MMX and 3DNow! instructions are supported
+	   9 if MMX and SSE instructions are supported
+	   0 if hardware does not support any of these
+	*/
+	register int rval = 0;
+
+	__asm__ __volatile__ (
+		/* See if CPUID instruction is supported ... */
+		/* ... Get copies of EFLAGS into eax and ecx */
+		"pushf\n\t"
+		"popl %%eax\n\t"
+		"movl %%eax, %%ecx\n\t"
+
+		/* ... Toggle the ID bit in one copy and store */
+		/*     to the EFLAGS reg */
+		"xorl $0x200000, %%eax\n\t"
+		"push %%eax\n\t"
+		"popf\n\t"
+
+		/* ... Get the (hopefully modified) EFLAGS */
+		"pushf\n\t"
+		"popl %%eax\n\t"
+
+		/* ... Compare and test result */
+		"xorl %%eax, %%ecx\n\t"
+		"testl $0x200000, %%ecx\n\t"
+		"jz NotSupported1\n\t"		/* CPUID not supported */
+
+
+		/* Get standard CPUID information, and
+		       go to a specific vendor section */
+		"movl $0, %%eax\n\t"
+		"cpuid\n\t"
+
+		/* Check for Intel */
+		"cmpl $0x756e6547, %%ebx\n\t"
+		"jne TryAMD\n\t"
+		"cmpl $0x49656e69, %%edx\n\t"
+		"jne TryAMD\n\t"
+		"cmpl $0x6c65746e, %%ecx\n"
+		"jne TryAMD\n\t"
+		"jmp Intel\n\t"
+
+		/* Check for AMD */
+		"\nTryAMD:\n\t"
+		"cmpl $0x68747541, %%ebx\n\t"
+		"jne TryCyrix\n\t"
+		"cmpl $0x69746e65, %%edx\n\t"
+		"jne TryCyrix\n\t"
+		"cmpl $0x444d4163, %%ecx\n"
+		"jne TryCyrix\n\t"
+		"jmp AMD\n\t"
+
+		/* Check for Cyrix */
+		"\nTryCyrix:\n\t"
+		"cmpl $0x69727943, %%ebx\n\t"
+		"jne NotSupported2\n\t"
+		"cmpl $0x736e4978, %%edx\n\t"
+		"jne NotSupported3\n\t"
+		"cmpl $0x64616574, %%ecx\n\t"
+		"jne NotSupported4\n\t"
+		/* Drop through to Cyrix... */
+
+
+		/* Cyrix Section */
+		/* See if extended CPUID level 80000001 is supported */
+		/* The value of CPUID/80000001 for the 6x86MX is undefined
+		   according to the Cyrix CPU Detection Guide (Preliminary
+		   Rev. 1.01 table 1), so we'll check the value of eax for
+		   CPUID/0 to see if standard CPUID level 2 is supported.
+		   According to the table, the only CPU which supports level
+		   2 is also the only one which supports extended CPUID levels.
+		*/
+		"cmpl $0x2, %%eax\n\t"
+		"jne MMXtest\n\t"	/* Use standard CPUID instead */
+
+		/* Extended CPUID supported (in theory), so get extended
+		   features */
+		"movl $0x80000001, %%eax\n\t"
+		"cpuid\n\t"
+		"testl $0x00800000, %%eax\n\t"	/* Test for MMX */
+		"jz NotSupported5\n\t"		/* MMX not supported */
+		"testl $0x01000000, %%eax\n\t"	/* Test for Ext'd MMX */
+		"jnz EMMXSupported\n\t"
+		"movl $1, %0:\n\n\t"		/* MMX Supported */
+		"jmp Return\n\n"
+		"EMMXSupported:\n\t"
+		"movl $3, %0:\n\n\t"		/* EMMX and MMX Supported */
+		"jmp Return\n\t"
+
+
+		/* AMD Section */
+		"AMD:\n\t"
+
+		/* See if extended CPUID is supported */
+		"movl $0x80000000, %%eax\n\t"
+		"cpuid\n\t"
+		"cmpl $0x80000000, %%eax\n\t"
+		"jl MMXtest\n\t"	/* Use standard CPUID instead */
+
+		/* Extended CPUID supported, so get extended features */
+		"movl $0x80000001, %%eax\n\t"
+		"cpuid\n\t"
+		"testl $0x00800000, %%edx\n\t"	/* Test for MMX */
+		"jz NotSupported6\n\t"		/* MMX not supported */
+		"testl $0x80000000, %%edx\n\t"	/* Test for 3DNow! */
+		"jnz ThreeDNowSupported\n\t"
+		"movl $1, %0:\n\n\t"		/* MMX Supported */
+		"jmp Return\n\n"
+		"ThreeDNowSupported:\n\t"
+		"movl $5, %0:\n\n\t"		/* 3DNow! and MMX Supported */
+		"jmp Return\n\t"
+
+
+		/* Intel Section */
+		"Intel:\n\t"
+
+		/* Check for SSE */
+		"SSEtest:\n\t"
+		"movl $1, %%eax\n\t"
+		"cpuid\n\t"
+		"testl $0x02000000, %%edx\n\t"	/* Test for SSE */
+		"jz MMXtest\n\t"		/* SSE Not supported */
+		"movl $9, %0:\n\n\t"		/* SSE Supported */
+		"jmp Return\n\t"
+
+		/* Check for MMX */
+		"MMXtest:\n\t"
+		"movl $1, %%eax\n\t"
+		"cpuid\n\t"
+		"testl $0x00800000, %%edx\n\t"	/* Test for MMX */
+		"jz NotSupported7\n\t"		/* MMX Not supported */
+		"movl $1, %0:\n\n\t"		/* MMX Supported */
+		"jmp Return\n\t"
+
+		/* Nothing supported */
+		"\nNotSupported1:\n\t"
+		"#movl $101, %0:\n\n\t"
+		"\nNotSupported2:\n\t"
+		"#movl $102, %0:\n\n\t"
+		"\nNotSupported3:\n\t"
+		"#movl $103, %0:\n\n\t"
+		"\nNotSupported4:\n\t"
+		"#movl $104, %0:\n\n\t"
+		"\nNotSupported5:\n\t"
+		"#movl $105, %0:\n\n\t"
+		"\nNotSupported6:\n\t"
+		"#movl $106, %0:\n\n\t"
+		"\nNotSupported7:\n\t"
+		"#movl $107, %0:\n\n\t"
+		"movl $0, %0:\n\n\t"
+
+		"Return:\n\t"
+		: "=a" (rval)
+		: /* no input */
+		: "eax", "ebx", "ecx", "edx"
+	);
+
+	/* Return */
+	return(rval);
+}
+
+/*	Function to test if sse instructions are supported...
+*/
+inline extern int
+sse_ok(void)
+{
+	/* Returns 1 if SSE instructions are supported, 0 otherwise */
+	return ( (mm_support() & 0x8) >> 3  );
+}
+#endif
+
+
+
+/*	Helper functions for the instruction macros that follow...
+	(note that memory-to-register, m2r, instructions are nearly
+	 as efficient as register-to-register, r2r, instructions;
+	 however, memory-to-memory instructions are really simulated
+	 as a convenience, and are only 1/3 as efficient)
+*/
+#ifdef	SSE_TRACE
+
+/*	Include the stuff for printing a trace to stderr...
+*/
+
+#include <stdio.h>
+
+#define	sse_i2r(op, imm, reg) \
+	{ \
+		sse_t sse_trace; \
+		sse_trace.uq = (imm); \
+		fprintf(stderr, #op "_i2r(" #imm "=0x%08x%08x, ", \
+			sse_trace.d[1], sse_trace.d[0]); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (sse_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%08x%08x) => ", \
+			sse_trace.d[1], sse_trace.d[0]); \
+		__asm__ __volatile__ (#op " %0, %%" #reg \
+				      : /* nothing */ \
+				      : "X" (imm)); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (sse_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%08x%08x\n", \
+			sse_trace.d[1], sse_trace.d[0]); \
+	}
+
+#define	sse_m2r(op, mem, reg) \
+	{ \
+		sse_t sse_trace; \
+		sse_trace = (mem); \
+		fprintf(stderr, #op "_m2r(" #mem "=0x%08x%08x, ", \
+			sse_trace.d[1], sse_trace.d[0]); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (sse_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%08x%08x) => ", \
+			sse_trace.d[1], sse_trace.d[0]); \
+		__asm__ __volatile__ (#op " %0, %%" #reg \
+				      : /* nothing */ \
+				      : "X" (mem)); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (sse_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%08x%08x\n", \
+			sse_trace.d[1], sse_trace.d[0]); \
+	}
+
+#define	sse_r2m(op, reg, mem) \
+	{ \
+		sse_t sse_trace; \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (sse_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #op "_r2m(" #reg "=0x%08x%08x, ", \
+			sse_trace.d[1], sse_trace.d[0]); \
+		sse_trace = (mem); \
+		fprintf(stderr, #mem "=0x%08x%08x) => ", \
+			sse_trace.d[1], sse_trace.d[0]); \
+		__asm__ __volatile__ (#op " %%" #reg ", %0" \
+				      : "=X" (mem) \
+				      : /* nothing */ ); \
+		sse_trace = (mem); \
+		fprintf(stderr, #mem "=0x%08x%08x\n", \
+			sse_trace.d[1], sse_trace.d[0]); \
+	}
+
+#define	sse_r2r(op, regs, regd) \
+	{ \
+		sse_t sse_trace; \
+		__asm__ __volatile__ ("movq %%" #regs ", %0" \
+				      : "=X" (sse_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #op "_r2r(" #regs "=0x%08x%08x, ", \
+			sse_trace.d[1], sse_trace.d[0]); \
+		__asm__ __volatile__ ("movq %%" #regd ", %0" \
+				      : "=X" (sse_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #regd "=0x%08x%08x) => ", \
+			sse_trace.d[1], sse_trace.d[0]); \
+		__asm__ __volatile__ (#op " %" #regs ", %" #regd); \
+		__asm__ __volatile__ ("movq %%" #regd ", %0" \
+				      : "=X" (sse_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #regd "=0x%08x%08x\n", \
+			sse_trace.d[1], sse_trace.d[0]); \
+	}
+
+#define	sse_m2m(op, mems, memd) \
+	{ \
+		sse_t sse_trace; \
+		sse_trace = (mems); \
+		fprintf(stderr, #op "_m2m(" #mems "=0x%08x%08x, ", \
+			sse_trace.d[1], sse_trace.d[0]); \
+		sse_trace = (memd); \
+		fprintf(stderr, #memd "=0x%08x%08x) => ", \
+			sse_trace.d[1], sse_trace.d[0]); \
+		__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
+				      #op " %1, %%mm0\n\t" \
+				      "movq %%mm0, %0" \
+				      : "=X" (memd) \
+				      : "X" (mems)); \
+		sse_trace = (memd); \
+		fprintf(stderr, #memd "=0x%08x%08x\n", \
+			sse_trace.d[1], sse_trace.d[0]); \
+	}
+
+#else
+
+/*	These macros are a lot simpler without the tracing...
+*/
+
+#define	sse_i2r(op, imm, reg) \
+	__asm__ __volatile__ (#op " %0, %%" #reg \
+			      : /* nothing */ \
+			      : "X" (imm) )
+
+#define	sse_m2r(op, mem, reg) \
+	__asm__ __volatile__ (#op " %0, %%" #reg \
+			      : /* nothing */ \
+			      : "X" (mem))
+
+#define	sse_r2m(op, reg, mem) \
+	__asm__ __volatile__ (#op " %%" #reg ", %0" \
+			      : "=X" (mem) \
+			      : /* nothing */ )
+
+#define	sse_r2r(op, regs, regd) \
+	__asm__ __volatile__ (#op " %" #regs ", %" #regd)
+
+#define	sse_r2ri(op, regs, regd, imm) \
+	__asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
+			      : /* nothing */ \
+			      : "X" (imm) )
+
+/* Load data from mems to xmmreg, operate on xmmreg, and store data to memd */
+#define	sse_m2m(op, mems, memd, xmmreg) \
+	__asm__ __volatile__ ("movups %0, %%xmm0\n\t" \
+			      #op " %1, %%xmm0\n\t" \
+			      "movups %%mm0, %0" \
+			      : "=X" (memd) \
+			      : "X" (mems))
+
+#define	sse_m2ri(op, mem, reg, subop) \
+	__asm__ __volatile__ (#op " %0, %%" #reg ", " #subop \
+			      : /* nothing */ \
+			      : "X" (mem))
+
+#define	sse_m2mi(op, mems, memd, xmmreg, subop) \
+	__asm__ __volatile__ ("movups %0, %%xmm0\n\t" \
+			      #op " %1, %%xmm0, " #subop "\n\t" \
+			      "movups %%mm0, %0" \
+			      : "=X" (memd) \
+			      : "X" (mems))
+#endif
+
+
+
+
+/*	1x128 MOVe Aligned four Packed Single-fp
+*/
+#define	movaps_m2r(var, reg)	sse_m2r(movaps, var, reg)
+#define	movaps_r2m(reg, var)	sse_r2m(movaps, reg, var)
+#define	movaps_r2r(regs, regd)	sse_r2r(movaps, regs, regd)
+#define	movaps(vars, vard) \
+	__asm__ __volatile__ ("movaps %1, %%mm0\n\t" \
+			      "movaps %%mm0, %0" \
+			      : "=X" (vard) \
+			      : "X" (vars))
+
+
+/*	1x128 MOVe aligned Non-Temporal four Packed Single-fp
+*/
+#define	movntps_r2m(xmmreg, var)	sse_r2m(movntps, xmmreg, var)
+
+
+/*	1x64 MOVe Non-Temporal Quadword
+*/
+#define	movntq_r2m(mmreg, var)		sse_r2m(movntq, mmreg, var)
+
+
+/*	1x128 MOVe Unaligned four Packed Single-fp
+*/
+#define	movups_m2r(var, reg)	sse_m2r(movups, var, reg)
+#define	movups_r2m(reg, var)	sse_r2m(movups, reg, var)
+#define	movups_r2r(regs, regd)	sse_r2r(movups, regs, regd)
+#define	movups(vars, vard) \
+	__asm__ __volatile__ ("movups %1, %%mm0\n\t" \
+			      "movups %%mm0, %0" \
+			      : "=X" (vard) \
+			      : "X" (vars))
+
+
+/*	MOVe High to Low Packed Single-fp
+	high half of 4x32f (x) -> low half of 4x32f (y)
+*/
+#define	movhlps_r2r(regs, regd)	sse_r2r(movhlps, regs, regd)
+
+
+/*	MOVe Low to High Packed Single-fp
+	low half of 4x32f (x) -> high half of 4x32f (y)
+*/
+#define	movlhps_r2r(regs, regd)	sse_r2r(movlhps, regs, regd)
+
+
+/*	MOVe High Packed Single-fp
+	2x32f -> high half of 4x32f
+*/
+#define	movhps_m2r(var, reg)	sse_m2r(movhps, var, reg)
+#define	movhps_r2m(reg, var)	sse_r2m(movhps, reg, var)
+#define	movhps(vars, vard) \
+	__asm__ __volatile__ ("movhps %1, %%mm0\n\t" \
+			      "movhps %%mm0, %0" \
+			      : "=X" (vard) \
+			      : "X" (vars))
+
+
+/*	MOVe Low Packed Single-fp
+	2x32f -> low half of 4x32f
+*/
+#define	movlps_m2r(var, reg)	sse_m2r(movlps, var, reg)
+#define	movlps_r2m(reg, var)	sse_r2m(movlps, reg, var)
+#define	movlps(vars, vard) \
+	__asm__ __volatile__ ("movlps %1, %%mm0\n\t" \
+			      "movlps %%mm0, %0" \
+			      : "=X" (vard) \
+			      : "X" (vars))
+
+
+/*	MOVe Scalar Single-fp
+	lowest field of 4x32f (x) -> lowest field of 4x32f (y)
+*/
+#define	movss_m2r(var, reg)	sse_m2r(movss, var, reg)
+#define	movss_r2m(reg, var)	sse_r2m(movss, reg, var)
+#define	movss_r2r(regs, regd)	sse_r2r(movss, regs, regd)
+#define	movss(vars, vard) \
+	__asm__ __volatile__ ("movss %1, %%mm0\n\t" \
+			      "movss %%mm0, %0" \
+			      : "=X" (vard) \
+			      : "X" (vars))
+
+
+/*	4x16 Packed SHUFfle Word
+*/
+#define	pshufw_m2r(var, reg, index)	sse_m2ri(pshufw, var, reg, index)
+#define	pshufw_r2r(regs, regd, index)	sse_r2ri(pshufw, regs, regd, index)
+
+
+/*	1x128 SHUFfle Packed Single-fp
+*/
+#define	shufps_m2r(var, reg, index)	sse_m2ri(shufps, var, reg, index)
+#define	shufps_r2r(regs, regd, index)	sse_r2ri(shufps, regs, regd, index)
+
+
+/*	ConVerT Packed signed Int32 to(2) Packed Single-fp
+*/
+#define	cvtpi2ps_m2r(var, xmmreg)	sse_m2r(cvtpi2ps, var, xmmreg)
+#define	cvtpi2ps_r2r(mmreg, xmmreg)	sse_r2r(cvtpi2ps, mmreg, xmmreg)
+
+
+/*	ConVerT Packed Single-fp to(2) Packed signed Int32
+*/
+#define	cvtps2pi_m2r(var, mmreg)	sse_m2r(cvtps2pi, var, mmreg)
+#define	cvtps2pi_r2r(xmmreg, mmreg)	sse_r2r(cvtps2pi, mmreg, xmmreg)
+
+
+/*	ConVerT with Truncate Packed Single-fp to(2) Packed Int32
+*/
+#define	cvttps2pi_m2r(var, mmreg)	sse_m2r(cvttps2pi, var, mmreg)
+#define	cvttps2pi_r2r(xmmreg, mmreg)	sse_r2r(cvttps2pi, mmreg, xmmreg)
+
+
+/*	ConVerT Signed Int32 to(2) Single-fp (Scalar)
+*/
+#define	cvtsi2ss_m2r(var, xmmreg)	sse_m2r(cvtsi2ss, var, xmmreg)
+#define	cvtsi2ss_r2r(reg, xmmreg)	sse_r2r(cvtsi2ss, reg, xmmreg)
+
+
+/*	ConVerT Scalar Single-fp to(2) Signed Int32
+*/
+#define	cvtss2si_m2r(var, reg)		sse_m2r(cvtss2si, var, reg)
+#define	cvtss2si_r2r(xmmreg, reg)	sse_r2r(cvtss2si, xmmreg, reg)
+
+
+/*	ConVerT with Truncate Scalar Single-fp to(2) Signed Int32
+*/
+#define	cvttss2si_m2r(var, reg)		sse_m2r(cvtss2si, var, reg)
+#define	cvttss2si_r2r(xmmreg, reg)	sse_r2r(cvtss2si, xmmreg, reg)
+
+
+/*	Parallel EXTRact Word from 4x16
+*/
+#define	pextrw_r2r(mmreg, reg, field)	sse_r2ri(pextrw, mmreg, reg, field)
+
+
+/*	Parallel INSeRt Word from 4x16
+*/
+#define	pinsrw_r2r(reg, mmreg, field)	sse_r2ri(pinsrw, reg, mmreg, field)
+
+
+
+/*	MOVe MaSK from Packed Single-fp
+*/
+#ifdef	SSE_TRACE
+	#define	movmskps(xmmreg, reg) \
+	{ \
+		fprintf(stderr, "movmskps()\n"); \
+		__asm__ __volatile__ ("movmskps %" #xmmreg ", %" #reg) \
+	}
+#else
+	#define	movmskps(xmmreg, reg) \
+	__asm__ __volatile__ ("movmskps %" #xmmreg ", %" #reg)
+#endif
+
+
+/*	Parallel MOVe MaSK from mmx reg to 32-bit reg
+*/
+#ifdef	SSE_TRACE
+	#define	pmovmskb(mmreg, reg) \
+	{ \
+		fprintf(stderr, "movmskps()\n"); \
+		__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg) \
+	}
+#else
+	#define	pmovmskb(mmreg, reg) \
+	__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
+#endif
+
+
+/*	MASKed MOVe from 8x8 to memory pointed to by (e)di register
+*/
+#define	maskmovq(mmregs, fieldreg)	sse_r2ri(maskmovq, mmregs, fieldreg)
+
+
+
+
+/*	4x32f Parallel ADDs
+*/
+#define	addps_m2r(var, reg)		sse_m2r(addps, var, reg)
+#define	addps_r2r(regs, regd)		sse_r2r(addps, regs, regd)
+#define	addps(vars, vard, xmmreg)	sse_m2m(addps, vars, vard, xmmreg)
+
+
+/*	Lowest Field of 4x32f Parallel ADDs
+*/
+#define	addss_m2r(var, reg)		sse_m2r(addss, var, reg)
+#define	addss_r2r(regs, regd)		sse_r2r(addss, regs, regd)
+#define	addss(vars, vard, xmmreg)	sse_m2m(addss, vars, vard, xmmreg)
+
+
+/*	4x32f Parallel SUBs
+*/
+#define	subps_m2r(var, reg)		sse_m2r(subps, var, reg)
+#define	subps_r2r(regs, regd)		sse_r2r(subps, regs, regd)
+#define	subps(vars, vard, xmmreg)	sse_m2m(subps, vars, vard, xmmreg)
+
+
+/*	Lowest Field of 4x32f Parallel SUBs
+*/
+#define	subss_m2r(var, reg)		sse_m2r(subss, var, reg)
+#define	subss_r2r(regs, regd)		sse_r2r(subss, regs, regd)
+#define	subss(vars, vard, xmmreg)	sse_m2m(subss, vars, vard, xmmreg)
+
+
+/*	8x8u -> 4x16u Packed Sum of Absolute Differences
+*/
+#define	psadbw_m2r(var, reg)		sse_m2r(psadbw, var, reg)
+#define	psadbw_r2r(regs, regd)		sse_r2r(psadbw, regs, regd)
+#define	psadbw(vars, vard, mmreg)	sse_m2m(psadbw, vars, vard, mmreg)
+
+
+/*	4x16u Parallel MUL High Unsigned
+*/
+#define	pmulhuw_m2r(var, reg)		sse_m2r(pmulhuw, var, reg)
+#define	pmulhuw_r2r(regs, regd)		sse_r2r(pmulhuw, regs, regd)
+#define	pmulhuw(vars, vard, mmreg)	sse_m2m(pmulhuw, vars, vard, mmreg)
+
+
+/*	4x32f Parallel MULs
+*/
+#define	mulps_m2r(var, reg)		sse_m2r(mulps, var, reg)
+#define	mulps_r2r(regs, regd)		sse_r2r(mulps, regs, regd)
+#define	mulps(vars, vard, xmmreg)	sse_m2m(mulps, vars, vard, xmmreg)
+
+
+/*	Lowest Field of 4x32f Parallel MULs
+*/
+#define	mulss_m2r(var, reg)		sse_m2r(mulss, var, reg)
+#define	mulss_r2r(regs, regd)		sse_r2r(mulss, regs, regd)
+#define	mulss(vars, vard, xmmreg)	sse_m2m(mulss, vars, vard, xmmreg)
+
+
+/*	4x32f Parallel DIVs
+*/
+#define	divps_m2r(var, reg)		sse_m2r(divps, var, reg)
+#define	divps_r2r(regs, regd)		sse_r2r(divps, regs, regd)
+#define	divps(vars, vard, xmmreg)	sse_m2m(divps, vars, vard, xmmreg)
+
+
+/*	Lowest Field of 4x32f Parallel DIVs
+*/
+#define	divss_m2r(var, reg)		sse_m2r(divss, var, reg)
+#define	divss_r2r(regs, regd)		sse_r2r(divss, regs, regd)
+#define	divss(vars, vard, xmmreg)	sse_m2m(divss, vars, vard, xmmreg)
+
+
+/*	4x32f Parallel Reciprocals
+*/
+#define	rcpps_m2r(var, reg)		sse_m2r(rcpps, var, reg)
+#define	rcpps_r2r(regs, regd)		sse_r2r(rcpps, regs, regd)
+#define	rcpps(vars, vard, xmmreg)	sse_m2m(rcpps, vars, vard, xmmreg)
+
+
+/*	Lowest Field of 4x32f Parallel Reciprocals
+*/
+#define	rcpss_m2r(var, reg)		sse_m2r(rcpss, var, reg)
+#define	rcpss_r2r(regs, regd)		sse_r2r(rcpss, regs, regd)
+#define	rcpss(vars, vard, xmmreg)	sse_m2m(rcpss, vars, vard, xmmreg)
+
+
+/*	4x32f Parallel Square Root of Reciprocals
+*/
+#define	rsqrtps_m2r(var, reg)		sse_m2r(rsqrtps, var, reg)
+#define	rsqrtps_r2r(regs, regd)		sse_r2r(rsqrtps, regs, regd)
+#define	rsqrtps(vars, vard, xmmreg)	sse_m2m(rsqrtps, vars, vard, xmmreg)
+
+
+/*	Lowest Field of 4x32f Parallel Square Root of Reciprocals
+*/
+#define	rsqrtss_m2r(var, reg)		sse_m2r(rsqrtss, var, reg)
+#define	rsqrtss_r2r(regs, regd)		sse_r2r(rsqrtss, regs, regd)
+#define	rsqrtss(vars, vard, xmmreg)	sse_m2m(rsqrtss, vars, vard, xmmreg)
+
+
+/*	4x32f Parallel Square Roots
+*/
+#define	sqrtps_m2r(var, reg)		sse_m2r(sqrtps, var, reg)
+#define	sqrtps_r2r(regs, regd)		sse_r2r(sqrtps, regs, regd)
+#define	sqrtps(vars, vard, xmmreg)	sse_m2m(sqrtps, vars, vard, xmmreg)
+
+
+/*	Lowest Field of 4x32f Parallel Square Roots
+*/
+#define	sqrtss_m2r(var, reg)		sse_m2r(sqrtss, var, reg)
+#define	sqrtss_r2r(regs, regd)		sse_r2r(sqrtss, regs, regd)
+#define	sqrtss(vars, vard, xmmreg)	sse_m2m(sqrtss, vars, vard, xmmreg)
+
+
+/*	8x8u and 4x16u Parallel AVeraGe
+*/
+#define	pavgb_m2r(var, reg)		sse_m2r(pavgb, var, reg)
+#define	pavgb_r2r(regs, regd)		sse_r2r(pavgb, regs, regd)
+#define	pavgb(vars, vard, mmreg)	sse_m2m(pavgb, vars, vard, mmreg)
+
+#define	pavgw_m2r(var, reg)		sse_m2r(pavgw, var, reg)
+#define	pavgw_r2r(regs, regd)		sse_r2r(pavgw, regs, regd)
+#define	pavgw(vars, vard, mmreg)	sse_m2m(pavgw, vars, vard, mmreg)
+
+
+/*	1x128 bitwise AND
+*/
+#define	andps_m2r(var, reg)		sse_m2r(andps, var, reg)
+#define	andps_r2r(regs, regd)		sse_r2r(andps, regs, regd)
+#define	andps(vars, vard, xmmreg)	sse_m2m(andps, vars, vard, xmmreg)
+
+
+/*	1x128 bitwise AND with Not the destination
+*/
+#define	andnps_m2r(var, reg)		sse_m2r(andnps, var, reg)
+#define	andnps_r2r(regs, regd)		sse_r2r(andnps, regs, regd)
+#define	andnps(vars, vard, xmmreg)	sse_m2m(andnps, vars, vard, xmmreg)
+
+
+/*	1x128 bitwise OR
+*/
+#define	orps_m2r(var, reg)		sse_m2r(orps, var, reg)
+#define	orps_r2r(regs, regd)		sse_r2r(orps, regs, regd)
+#define	orps(vars, vard, xmmreg)	sse_m2m(orps, vars, vard, xmmreg)
+
+
+/*	1x128 bitwise eXclusive OR
+*/
+#define	xorps_m2r(var, reg)		sse_m2r(xorps, var, reg)
+#define	xorps_r2r(regs, regd)		sse_r2r(xorps, regs, regd)
+#define	xorps(vars, vard, xmmreg)	sse_m2m(xorps, vars, vard, xmmreg)
+
+
+/*	8x8u, 4x16, and 4x32f Parallel Maximum
+*/
+#define	pmaxub_m2r(var, reg)		sse_m2r(pmaxub, var, reg)
+#define	pmaxub_r2r(regs, regd)		sse_r2r(pmaxub, regs, regd)
+#define	pmaxub(vars, vard, mmreg)	sse_m2m(pmaxub, vars, vard, mmreg)
+
+#define	pmaxsw_m2r(var, reg)		sse_m2r(pmaxsw, var, reg)
+#define	pmaxsw_r2r(regs, regd)		sse_r2r(pmaxsw, regs, regd)
+#define	pmaxsw(vars, vard, mmreg)	sse_m2m(pmaxsw, vars, vard, mmreg)
+
+#define	maxps_m2r(var, reg)		sse_m2r(maxps, var, reg)
+#define	maxps_r2r(regs, regd)		sse_r2r(maxps, regs, regd)
+#define	maxps(vars, vard, xmmreg)	sse_m2m(maxps, vars, vard, xmmreg)
+
+
+/*	Lowest Field of 4x32f Parallel Maximum
+*/
+#define	maxss_m2r(var, reg)		sse_m2r(maxss, var, reg)
+#define	maxss_r2r(regs, regd)		sse_r2r(maxss, regs, regd)
+#define	maxss(vars, vard, xmmreg)	sse_m2m(maxss, vars, vard, xmmreg)
+
+
+/*	8x8u, 4x16, and 4x32f Parallel Minimum
+*/
+#define	pminub_m2r(var, reg)		sse_m2r(pminub, var, reg)
+#define	pminub_r2r(regs, regd)		sse_r2r(pminub, regs, regd)
+#define	pminub(vars, vard, mmreg)	sse_m2m(pminub, vars, vard, mmreg)
+
+#define	pminsw_m2r(var, reg)		sse_m2r(pminsw, var, reg)
+#define	pminsw_r2r(regs, regd)		sse_r2r(pminsw, regs, regd)
+#define	pminsw(vars, vard, mmreg)	sse_m2m(pminsw, vars, vard, mmreg)
+
+#define	minps_m2r(var, reg)		sse_m2r(minps, var, reg)
+#define	minps_r2r(regs, regd)		sse_r2r(minps, regs, regd)
+#define	minps(vars, vard, xmmreg)	sse_m2m(minps, vars, vard, xmmreg)
+
+
+/*	Lowest Field of 4x32f Parallel Minimum
+*/
+#define	minss_m2r(var, reg)		sse_m2r(minss, var, reg)
+#define	minss_r2r(regs, regd)		sse_r2r(minss, regs, regd)
+#define	minss(vars, vard, xmmreg)	sse_m2m(minss, vars, vard, xmmreg)
+
+
+/*	4x32f Parallel CoMPares
+	(resulting fields are either 0 or -1)
+*/
+#define	cmpps_m2r(var, reg, op)		sse_m2ri(cmpps, var, reg, op)
+#define	cmpps_r2r(regs, regd, op)	sse_r2ri(cmpps, regs, regd, op)
+#define	cmpps(vars, vard, op, xmmreg)	sse_m2mi(cmpps, vars, vard, xmmreg, op)
+
+#define	cmpeqps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 0)
+#define	cmpeqps_r2r(regs, regd)		sse_r2ri(cmpps, regs, regd, 0)
+#define	cmpeqps(vars, vard, xmmreg)	sse_m2mi(cmpps, vars, vard, xmmreg, 0)
+
+#define	cmpltps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 1)
+#define	cmpltps_r2r(regs, regd)		sse_r2ri(cmpps, regs, regd, 1)
+#define	cmpltps(vars, vard, xmmreg)	sse_m2mi(cmpps, vars, vard, xmmreg, 1)
+
+#define	cmpleps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 2)
+#define	cmpleps_r2r(regs, regd)		sse_r2ri(cmpps, regs, regd, 2)
+#define	cmpleps(vars, vard, xmmreg)	sse_m2mi(cmpps, vars, vard, xmmreg, 2)
+
+#define	cmpunordps_m2r(var, reg)	sse_m2ri(cmpps, var, reg, 3)
+#define	cmpunordps_r2r(regs, regd)	sse_r2ri(cmpps, regs, regd, 3)
+#define	cmpunordps(vars, vard, xmmreg)	sse_m2mi(cmpps, vars, vard, xmmreg, 3)
+
+#define	cmpneqps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 4)
+#define	cmpneqps_r2r(regs, regd)	sse_r2ri(cmpps, regs, regd, 4)
+#define	cmpneqps(vars, vard, xmmreg)	sse_m2mi(cmpps, vars, vard, xmmreg, 4)
+
+#define	cmpnltps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 5)
+#define	cmpnltps_r2r(regs, regd)	sse_r2ri(cmpps, regs, regd, 5)
+#define	cmpnltps(vars, vard, xmmreg)	sse_m2mi(cmpps, vars, vard, xmmreg, 5)
+
+#define	cmpnleps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 6)
+#define	cmpnleps_r2r(regs, regd)	sse_r2ri(cmpps, regs, regd, 6)
+#define	cmpnleps(vars, vard, xmmreg)	sse_m2mi(cmpps, vars, vard, xmmreg, 6)
+
+#define	cmpordps_m2r(var, reg)		sse_m2ri(cmpps, var, reg, 7)
+#define	cmpordps_r2r(regs, regd)	sse_r2ri(cmpps, regs, regd, 7)
+#define	cmpordps(vars, vard, xmmreg)	sse_m2mi(cmpps, vars, vard, xmmreg, 7)
+
+
+/*	Lowest Field of 4x32f Parallel CoMPares
+	(resulting fields are either 0 or -1)
+*/
+#define	cmpss_m2r(var, reg, op)		sse_m2ri(cmpss, var, reg, op)
+#define	cmpss_r2r(regs, regd, op)	sse_r2ri(cmpss, regs, regd, op)
+#define	cmpss(vars, vard, op, xmmreg)	sse_m2mi(cmpss, vars, vard, xmmreg, op)
+
+#define	cmpeqss_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 0)
+#define	cmpeqss_r2r(regs, regd)		sse_r2ri(cmpss, regs, regd, 0)
+#define	cmpeqss(vars, vard, xmmreg)	sse_m2mi(cmpss, vars, vard, xmmreg, 0)
+
+#define	cmpltss_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 1)
+#define	cmpltss_r2r(regs, regd)		sse_r2ri(cmpss, regs, regd, 1)
+#define	cmpltss(vars, vard, xmmreg)	sse_m2mi(cmpss, vars, vard, xmmreg, 1)
+
+#define	cmpless_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 2)
+#define	cmpless_r2r(regs, regd)		sse_r2ri(cmpss, regs, regd, 2)
+#define	cmpless(vars, vard, xmmreg)	sse_m2mi(cmpss, vars, vard, xmmreg, 2)
+
+#define	cmpunordss_m2r(var, reg)	sse_m2ri(cmpss, var, reg, 3)
+#define	cmpunordss_r2r(regs, regd)	sse_r2ri(cmpss, regs, regd, 3)
+#define	cmpunordss(vars, vard, xmmreg)	sse_m2mi(cmpss, vars, vard, xmmreg, 3)
+
+#define	cmpneqss_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 4)
+#define	cmpneqss_r2r(regs, regd)	sse_r2ri(cmpss, regs, regd, 4)
+#define	cmpneqss(vars, vard, xmmreg)	sse_m2mi(cmpss, vars, vard, xmmreg, 4)
+
+#define	cmpnltss_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 5)
+#define	cmpnltss_r2r(regs, regd)	sse_r2ri(cmpss, regs, regd, 5)
+#define	cmpnltss(vars, vard, xmmreg)	sse_m2mi(cmpss, vars, vard, xmmreg, 5)
+
+#define	cmpnless_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 6)
+#define	cmpnless_r2r(regs, regd)	sse_r2ri(cmpss, regs, regd, 6)
+#define	cmpnless(vars, vard, xmmreg)	sse_m2mi(cmpss, vars, vard, xmmreg, 6)
+
+#define	cmpordss_m2r(var, reg)		sse_m2ri(cmpss, var, reg, 7)
+#define	cmpordss_r2r(regs, regd)	sse_r2ri(cmpss, regs, regd, 7)
+#define	cmpordss(vars, vard, xmmreg)	sse_m2mi(cmpss, vars, vard, xmmreg, 7)
+
+
+/*	Lowest Field of 4x32f Parallel CoMPares to set EFLAGS
+	(resulting fields are either 0 or -1)
+*/
+#define	comiss_m2r(var, reg)		sse_m2r(comiss, var, reg)
+#define	comiss_r2r(regs, regd)		sse_r2r(comiss, regs, regd)
+#define	comiss(vars, vard, xmmreg)	sse_m2m(comiss, vars, vard, xmmreg)
+
+
+/*	Lowest Field of 4x32f Unordered Parallel CoMPares to set EFLAGS
+	(resulting fields are either 0 or -1)
+*/
+#define	ucomiss_m2r(var, reg)		sse_m2r(ucomiss, var, reg)
+#define	ucomiss_r2r(regs, regd)		sse_r2r(ucomiss, regs, regd)
+#define	ucomiss(vars, vard, xmmreg)	sse_m2m(ucomiss, vars, vard, xmmreg)
+
+
+/*	2-(4x32f) -> 4x32f UNPaCK Low Packed Single-fp
+	(interleaves low half of dest with low half of source
+	 as padding in each result field)
+*/
+#define	unpcklps_m2r(var, reg)		sse_m2r(unpcklps, var, reg)
+#define	unpcklps_r2r(regs, regd)	sse_r2r(unpcklps, regs, regd)
+
+
+/*	2-(4x32f) -> 4x32f UNPaCK High Packed Single-fp
+	(interleaves high half of dest with high half of source
+	 as padding in each result field)
+*/
+#define	unpckhps_m2r(var, reg)		sse_m2r(unpckhps, var, reg)
+#define	unpckhps_r2r(regs, regd)	sse_r2r(unpckhps, regs, regd)
+
+
+
+/*	Fp and mmX ReSTORe state
+*/
+#ifdef	SSE_TRACE
+	#define	fxrstor(mem) \
+	{ \
+		fprintf(stderr, "fxrstor()\n"); \
+		__asm__ __volatile__ ("fxrstor %0" \
+			      : /* nothing */ \
+			      : "X" (mem)) \
+	}
+#else
+	#define	fxrstor(mem) \
+	__asm__ __volatile__ ("fxrstor %0" \
+			      : /* nothing */ \
+			      : "X" (mem))
+#endif
+
+
+/*	Fp and mmX SAVE state
+*/
+#ifdef	SSE_TRACE
+	#define	fxsave(mem) \
+	{ \
+		fprintf(stderr, "fxsave()\n"); \
+		__asm__ __volatile__ ("fxsave %0" \
+			      : /* nothing */ \
+			      : "X" (mem)) \
+	}
+#else
+	#define	fxsave(mem) \
+	__asm__ __volatile__ ("fxsave %0" \
+			      : /* nothing */ \
+			      : "X" (mem))
+#endif
+
+
+/*	STore streaMing simd eXtensions Control/Status Register
+*/
+#ifdef	SSE_TRACE
+	#define	stmxcsr(mem) \
+	{ \
+		fprintf(stderr, "stmxcsr()\n"); \
+		__asm__ __volatile__ ("stmxcsr %0" \
+			      : /* nothing */ \
+			      : "X" (mem)) \
+	}
+#else
+	#define	stmxcsr(mem) \
+	__asm__ __volatile__ ("stmxcsr %0" \
+			      : /* nothing */ \
+			      : "X" (mem))
+#endif
+
+
+/*	LoaD streaMing simd eXtensions Control/Status Register
+*/
+#ifdef	SSE_TRACE
+	#define	ldmxcsr(mem) \
+	{ \
+		fprintf(stderr, "ldmxcsr()\n"); \
+		__asm__ __volatile__ ("ldmxcsr %0" \
+			      : /* nothing */ \
+			      : "X" (mem)) \
+	}
+#else
+	#define	ldmxcsr(mem) \
+	__asm__ __volatile__ ("ldmxcsr %0" \
+			      : /* nothing */ \
+			      : "X" (mem))
+#endif
+
+
+/*	Store FENCE - enforce ordering of stores before fence vs. stores
+	occuring after fence in source code.
+*/
+#ifdef	SSE_TRACE
+	#define	sfence() \
+	{ \
+		fprintf(stderr, "sfence()\n"); \
+		__asm__ __volatile__ ("sfence\n\t") \
+	}
+#else
+	#define	sfence() \
+	__asm__ __volatile__ ("sfence\n\t")
+#endif
+
+
+/*	PREFETCH data using T0, T1, T2, or NTA hint
+		T0  = Prefetch into all cache levels
+		T1  = Prefetch into all cache levels except 0th level
+		T2  = Prefetch into all cache levels except 0th and 1st levels
+		NTA = Prefetch data into non-temporal cache structure
+*/
+#ifdef	SSE_TRACE
+#else
+	#define	prefetch(mem, hint) \
+	__asm__ __volatile__ ("prefetch" #hint " %0" \
+			      : /* nothing */ \
+			      : "X" (mem))
+
+	#define	prefetcht0(mem)		prefetch(mem, t0)
+	#define	prefetcht1(mem)		prefetch(mem, t1)
+	#define	prefetcht2(mem)		prefetch(mem, t2)
+	#define	prefetchnta(mem)	prefetch(mem, nta)
+#endif
+
+
+
+#endif
diff --git a/gst/deinterlace/tvtime/tomsmocomp.c b/gst/deinterlace/tvtime/tomsmocomp.c
new file mode 100644
index 00000000..3141fbac
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp.c
@@ -0,0 +1,211 @@
+/**
+ * Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
+ * Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdlib.h>
+#include "_stdint.h"
+#include <string.h>
+
+#include "gst/gst.h"
+#include "gstdeinterlace.h"
+#include "plugins.h"
+
+#define GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP	(gst_deinterlace_method_tomsmocomp_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_TOMSMOCOMP(obj)		(G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP))
+#define GST_IS_DEINTERLACE_METHOD_TOMSMOCOMP_CLASS(klass)	(G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP))
+#define GST_DEINTERLACE_METHOD_TOMSMOCOMP_GET_CLASS(obj)	(G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP, GstDeinterlaceMethodTomsMoCompClass))
+#define GST_DEINTERLACE_METHOD_TOMSMOCOMP(obj)		(G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP, GstDeinterlaceMethodTomsMoComp))
+#define GST_DEINTERLACE_METHOD_TOMSMOCOMP_CLASS(klass)	(G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP, GstDeinterlaceMethodTomsMoCompClass))
+#define GST_DEINTERLACE_METHOD_TOMSMOCOMP_CAST(obj)	((GstDeinterlaceMethodTomsMoComp*)(obj))
+
+GType gst_deinterlace_method_tomsmocomp_get_type (void);
+
+typedef struct
+{
+  GstDeinterlaceMethod parent;
+
+  guint search_effort;
+  gboolean strange_bob;
+} GstDeinterlaceMethodTomsMoComp;
+
+typedef struct
+{
+  GstDeinterlaceMethodClass parent_class;
+} GstDeinterlaceMethodTomsMoCompClass;
+
+static int
+Fieldcopy (void *dest, const void *src, size_t count,
+    int rows, int dst_pitch, int src_pitch)
+{
+  unsigned char *pDest = (unsigned char *) dest;
+  unsigned char *pSrc = (unsigned char *) src;
+
+  int i;
+
+  for (i = 0; i < rows; i++) {
+    oil_memcpy (pDest, pSrc, count);
+    pSrc += src_pitch;
+    pDest += dst_pitch;
+  }
+  return 0;
+}
+
+#define USE_FOR_DSCALER
+
+#define IS_C
+#define SIMD_TYPE C
+#define FUNCT_NAME tomsmocompDScaler_C
+#include "tomsmocomp/TomsMoCompAll.inc"
+#undef  IS_C
+#undef  SIMD_TYPE
+#undef  FUNCT_NAME
+
+#ifdef BUILD_X86_ASM
+
+#include "tomsmocomp/tomsmocompmacros.h"
+#include "x86-64_macros.inc"
+
+#define IS_MMX
+#define SIMD_TYPE MMX
+#define FUNCT_NAME tomsmocompDScaler_MMX
+#include "tomsmocomp/TomsMoCompAll.inc"
+#undef  IS_MMX
+#undef  SIMD_TYPE
+#undef  FUNCT_NAME
+
+#define IS_3DNOW
+#define SIMD_TYPE 3DNOW
+#define FUNCT_NAME tomsmocompDScaler_3DNOW
+#include "tomsmocomp/TomsMoCompAll.inc"
+#undef  IS_3DNOW
+#undef  SIMD_TYPE
+#undef  FUNCT_NAME
+
+#define IS_MMXEXT
+#define SIMD_TYPE MMXEXT
+#define FUNCT_NAME tomsmocompDScaler_MMXEXT
+#include "tomsmocomp/TomsMoCompAll.inc"
+#undef  IS_MMXEXT
+#undef  SIMD_TYPE
+#undef  FUNCT_NAME
+
+#endif
+
+G_DEFINE_TYPE (GstDeinterlaceMethodTomsMoComp,
+    gst_deinterlace_method_tomsmocomp, GST_TYPE_DEINTERLACE_METHOD);
+
+enum
+{
+  ARG_0,
+  ARG_SEARCH_EFFORT,
+  ARG_STRANGE_BOB
+};
+
+static void
+gst_deinterlace_method_tomsmocomp_set_property (GObject * object, guint prop_id,
+    const GValue * value, GParamSpec * pspec)
+{
+  GstDeinterlaceMethodTomsMoComp *self =
+      GST_DEINTERLACE_METHOD_TOMSMOCOMP (object);
+
+  switch (prop_id) {
+    case ARG_SEARCH_EFFORT:
+      self->search_effort = g_value_get_uint (value);
+      break;
+    case ARG_STRANGE_BOB:
+      self->strange_bob = g_value_get_boolean (value);
+      break;
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+  }
+}
+
+static void
+gst_deinterlace_method_tomsmocomp_get_property (GObject * object, guint prop_id,
+    GValue * value, GParamSpec * pspec)
+{
+  GstDeinterlaceMethodTomsMoComp *self =
+      GST_DEINTERLACE_METHOD_TOMSMOCOMP (object);
+
+  switch (prop_id) {
+    case ARG_SEARCH_EFFORT:
+      g_value_set_uint (value, self->search_effort);
+      break;
+    case ARG_STRANGE_BOB:
+      g_value_set_boolean (value, self->strange_bob);
+      break;
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+  }
+}
+
+static void
+    gst_deinterlace_method_tomsmocomp_class_init
+    (GstDeinterlaceMethodTomsMoCompClass * klass)
+{
+  GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+  GObjectClass *gobject_class = (GObjectClass *) klass;
+#ifdef BUILD_X86_ASM
+  guint cpu_flags = oil_cpu_get_flags ();
+#endif
+
+  gobject_class->set_property = gst_deinterlace_method_tomsmocomp_set_property;
+  gobject_class->get_property = gst_deinterlace_method_tomsmocomp_get_property;
+
+  g_object_class_install_property (gobject_class, ARG_SEARCH_EFFORT,
+      g_param_spec_uint ("search-effort",
+          "Search Effort",
+          "Search Effort", 0, 27, 5, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)
+      );
+
+  g_object_class_install_property (gobject_class, ARG_STRANGE_BOB,
+      g_param_spec_boolean ("strange-bob",
+          "Strange Bob",
+          "Use strange bob", FALSE, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)
+      );
+
+  dim_class->fields_required = 4;
+  dim_class->name = "Motion Adaptive: Motion Search";
+  dim_class->nick = "tomsmocomp";
+  dim_class->latency = 1;
+
+#ifdef BUILD_X86_ASM
+  if (cpu_flags & OIL_IMPL_FLAG_MMXEXT) {
+    dim_class->deinterlace_frame = tomsmocompDScaler_MMXEXT;
+  } else if (cpu_flags & OIL_IMPL_FLAG_3DNOW) {
+    dim_class->deinterlace_frame = tomsmocompDScaler_3DNOW;
+  } else if (cpu_flags & OIL_IMPL_FLAG_MMX) {
+    dim_class->deinterlace_frame = tomsmocompDScaler_MMX;
+  } else {
+    dim_class->deinterlace_frame = tomsmocompDScaler_C;
+  }
+#else
+  dim_class->deinterlace_frame = tomsmocompDScaler_C;
+#endif
+}
+
+static void
+gst_deinterlace_method_tomsmocomp_init (GstDeinterlaceMethodTomsMoComp * self)
+{
+  self->search_effort = 5;
+  self->strange_bob = FALSE;
+}
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoop0A.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoop0A.inc
new file mode 100644
index 00000000..b1d9aeca
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoop0A.inc
@@ -0,0 +1,15 @@
+// -*- c++ -*-
+
+// Searches just the center pixel, in both the old
+//  and new fields, but takes averages. This is an even
+// pixel address. Any chroma match will be used. (YUY2)
+// We best like finding 0 motion so we will bias everything we found previously
+// up by a little, and adjust later
+
+#ifdef IS_SSE2
+		"paddusb "_ONES", %%xmm7\n\t"				// bias toward no motion
+#else
+		"paddusb "_ONES", %%mm7\n\t" 				// bias toward no motion
+#endif
+
+        MERGE4PIXavg("(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")")  // center, in old and new
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopBottom.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopBottom.inc
new file mode 100644
index 00000000..e1560353
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopBottom.inc
@@ -0,0 +1,174 @@
+// -*- c++ -*-       
+
+// Version for non-SSE2
+
+#ifndef IS_C
+
+#ifdef SKIP_SEARCH
+            "movq    %%mm6, %%mm0\n\t"            // just use the results of our wierd bob
+#else
+
+
+            // JA 9/Dec/2002
+            // failed experiment
+            // but leave in placeholder for me to play about
+#ifdef DONT_USE_STRANGE_BOB
+            // Use the best weave if diffs less than 10 as that
+            // means the image is still or moving cleanly
+            // if there is motion we will clip which will catch anything
+            "psubusb "_FOURS", %%mm7\n\t"          // sets bits to zero if weave diff < 4
+            "pxor    %%mm0, %%mm0\n\t"
+            "pcmpeqb %%mm0, %%mm7\n\t"            // all ff where weave better, else 00
+            "pcmpeqb %%mm7, %%mm0\n\t"            // all ff where bob better, else 00
+            "pand    %%mm6, %%mm0\n\t"            // use bob for these pixel values
+            "pand    %%mm5, %%mm7\n\t"            // use weave for these
+            "por     %%mm7, %%mm0\n\t"            // combine both
+#else
+            // Use the better of bob or weave
+            //      pminub  mm4, TENS           // the most we care about
+            V_PMINUB ("%%mm4", _TENS, "%%mm0")   // the most we care about
+            
+            "psubusb %%mm4, %%mm7\n\t"            // foregive that much from weave est?
+            "psubusb "_FOURS", %%mm7\n\t"       // bias it a bit toward weave
+            "pxor    %%mm0, %%mm0\n\t"
+            "pcmpeqb %%mm0, %%mm7\n\t"            // all ff where weave better, else 00
+            "pcmpeqb %%mm7, %%mm0\n\t"            // all ff where bob better, else 00
+            "pand    %%mm6, %%mm0\n\t"            // use bob for these pixel values
+            "pand    %%mm5, %%mm7\n\t"            // use weave for these
+            "por     %%mm7, %%mm0\n\t"            // combine both
+#endif
+            
+            
+                //      pminub  mm0, Max_Vals       // but clip to catch the stray error
+                V_PMINUB ("%%mm0", _Max_Vals, "%%mm1") // but clip to catch the stray error
+                //      pmaxub  mm0, Min_Vals
+                V_PMAXUB ("%%mm0", _Min_Vals)
+                
+#endif
+
+
+            MOVX"     "_pDest", %%"XAX"\n\t"
+                
+#ifdef USE_VERTICAL_FILTER
+            "movq    %%mm0, %%mm1\n\t"
+            //      pavgb   mm0, qword ptr["XBX"]
+            V_PAVGB ("%%mm0", "(%%"XBX")", "%%mm2", _ShiftMask)
+            //      movntq  qword ptr["XAX"+"XDX"], mm0
+            V_MOVNTQ ("(%"XAX", %%"XDX")", "%%mm0")
+            //      pavgb   mm1, qword ptr["XBX"+"XCX"]
+            V_PAVGB ("%%mm1", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask)
+	    //FIXME: XDX or XAX!!
+            "addq   "_dst_pitchw", %%"XBX
+            //      movntq  qword ptr["XAX"+"XDX"], mm1
+            V_MOVNTQ ("(%%"XAX", %%"XDX")", "%%mm1")
+#else
+                
+            //      movntq  qword ptr["XAX"+"XDX"], mm0
+                V_MOVNTQ ("(%%"XAX", %%"XDX")", "%%mm0")
+#endif
+                
+           LEAX"    8(%%"XDX"), %%"XDX"\n\t"       // bump offset pointer
+           CMPX"    "_Last8", %%"XDX"\n\t"       // done with line?
+           "jb      1b\n\t"                    // y
+
+           MOVX" "_oldbx", %%"XBX"\n\t"
+
+        : /* no outputs */
+
+        : "m"(pBob),
+          "m"(src_pitch2),
+          "m"(ShiftMask),
+          "m"(pDest),
+          "m"(dst_pitchw),
+          "m"(Last8),
+          "m"(pSrc),
+          "m"(pSrcP),
+          "m"(pBobP),
+          "m"(DiffThres),
+          "m"(Min_Vals),
+          "m"(Max_Vals),
+          "m"(FOURS),
+          "m"(TENS),
+          "m"(ONES),
+          "m"(UVMask),
+          "m"(Max_Mov),
+          "m"(YMask),
+          "m"(oldbx)
+
+        : XAX, XCX, XDX, XSI, XDI,
+          "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
+#ifdef __MMX__
+          "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
+#endif
+          "memory", "cc"
+        );
+
+        // adjust for next line
+        pSrc  += src_pitch2;
+        pSrcP += src_pitch2;
+        pDest += dst_pitch2;
+        pBob  += src_pitch2;
+        pBobP += src_pitch2;
+    }
+    
+    return 0;
+#else
+#ifdef SKIP_SEARCH
+            out[0] = best[0];            // just use the results of our wierd bob
+	    out[1] = best[1];
+#else
+            diff[0] = diff[0] - MIN (diff[0], 10) - 4;
+	    diff[1] = diff[1] - MIN (diff[1] - 10) - 4;
+	    if (diff[0] < 0)
+	      out[0] = weave[0];
+	    else
+	      out[0] = best[0];
+	    
+	    if (diff[1] < 0)
+	      out[1] = weave[1];
+	    else
+	      out[1] = best[1];
+
+
+	    out[0] = CLAMP (out[0], MinVals[0], MaxVals[0]);
+	    out[1] = CLAMP (out[1], MinVals[1], MaxVals[1]);
+#endif
+
+#ifdef USE_VERTICAL_FILTER
+            pDest[x] = (out[0] + pBob[0]) / 2;
+	    pDest[x + dst_pitchw] = (pBob[src_pitch2] + out[0]) / 2;
+            pDest[x + 1] = (out[1] + pBob[1]) / 2;
+	    pDest[x + 1 + dst_pitchw] = (pBob[src_pitch2 + 1] + out[1]) / 2;
+#else
+            pDest[x] = out[0];
+	    pDest[x+1] = out[1];
+#endif
+            pBob += 2;
+            pBobP += 2;
+            pSrc += 2;
+            pSrcP += 2;
+	}
+        // adjust for next line
+        pSrc  = src_pitch2 * (y+1) + pWeaveSrc;
+        pSrcP = src_pitch2 * (y+1) + pWeaveSrcP;
+        pDest = dst_pitch2 * (y+1) + pWeaveDest + dst_pitch2;
+
+
+	if (TopFirst)
+	{
+		pBob = pCopySrc + src_pitch2;
+		pBobP = pCopySrcP + src_pitch2;
+	}
+	else
+	{
+		pBob =  pCopySrc;
+		pBobP =  pCopySrcP;
+	}
+
+        pBob  += src_pitch2 * (y+1);
+        pBobP += src_pitch2 * (y+1);
+    }
+    
+    return 0;
+
+#endif
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopEdgeA.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopEdgeA.inc
new file mode 100644
index 00000000..6208fe8c
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopEdgeA.inc
@@ -0,0 +1,11 @@
+// -*- c++ -*-
+
+// Searches 2 pixel to the left and right, in both the old
+//  and new fields, but takes averages. These are even
+// pixel addresses. Chroma match will be used. (YUY2)
+        MERGE4PIXavg("-4(%%"XDI")", "4(%%"XSI", %%"XCX", 2)")  // up left, down right
+        MERGE4PIXavg("4(%%"XDI")", "-4(%%"XSI", %%"XCX", 2)")  // up right, down left
+        MERGE4PIXavg("-4(%%"XDI", %%"XCX")", "4(%%"XSI", %%"XCX")") // left, right
+        MERGE4PIXavg("4(%%"XDI", %%"XCX")", "-4(%%"XSI", %%"XCX")") // right, left
+        MERGE4PIXavg("-4(%%"XDI", %%"XCX", 2)", "4(%%"XSI")")   // down left, up right
+        MERGE4PIXavg("4(%%"XDI", %%"XCX", 2)", "-4(%%"XSI")")   // down right, up left
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopEdgeA8.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopEdgeA8.inc
new file mode 100644
index 00000000..2841c3f6
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopEdgeA8.inc
@@ -0,0 +1,12 @@
+// -*- c++ -*-
+
+// Searches 4 pixel to the left and right, in both the old
+//  and new fields, but takes averages. These are even
+// pixel addresses. Chroma match will be used. (YUY2)
+        MERGE4PIXavg("-8(%%"XDI")", "8(%%"XSI", %%"XCX", 2)")  // up left, down right
+        MERGE4PIXavg("8(%%"XDI")", "-8(%%"XSI", %%"XCX", 2)")  // up right, down left
+        MERGE4PIXavg("-8(%%"XDI", %%"XCX")", "8(%%"XSI", %%"XCX")") // left, right
+        MERGE4PIXavg("8(%%"XDI", %%"XCX")", "-8(%%"XSI", %%"XCX")") // right, left
+        MERGE4PIXavg("-8(%%"XDI", %%"XCX", 2)", "8(%%"XSI")")   // down left, up right
+        MERGE4PIXavg("8(%%"XDI", %%"XCX", 2)", "-8(%%"XSI")")   // down right, up left
+
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA.inc
new file mode 100644
index 00000000..ab5375f4
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA.inc
@@ -0,0 +1,10 @@
+// -*- c++ -*-
+
+// Searches 1 pixel to the left and right, in both the old
+//  and new fields, but takes averages. These are odd
+// pixel addresses. Any chroma match will not be used. (YUY2)
+        MERGE4PIXavg("-2(%%"XDI")", "2(%%"XSI", %%"XCX", 2)")  // up left, down right
+        MERGE4PIXavg("2(%%"XDI")", "-2(%%"XSI", %%"XCX", 2)")  // up right, down left
+        MERGE4PIXavg("-2(%%"XDI", %%"XCX", 2)", "2(%%"XSI")")   // down left, up right
+        MERGE4PIXavg("2(%%"XDI", %%"XCX", 2)", "-2(%%"XSI")")   // down right, up left   
+#include "SearchLoopOddA2.inc"
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA2.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA2.inc
new file mode 100644
index 00000000..fd3f6fb0
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA2.inc
@@ -0,0 +1,5 @@
+// Searches 1 pixel to the left and right, in both the old
+// and new fields, but takes averages. These are odd
+// pixel addresses. Any chroma match will not be used. (YUY2)
+        MERGE4PIXavg("-2(%%"XDI", %%"XCX")", "2(%%"XSI", %%"XCX")") // left, right
+        MERGE4PIXavg("2(%%"XDI", %%"XCX")", "-2(%%"XSI", %%"XCX")") // right, left
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA6.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA6.inc
new file mode 100644
index 00000000..cbae014e
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddA6.inc
@@ -0,0 +1,11 @@
+// -*- c++ -*-
+
+// Searches 3 pixels to the left and right, in both the old
+//  and new fields, but takes averages. These are odd
+// pixel addresses. Any chroma match will not be used. (YUY2)
+        MERGE4PIXavg("-6(%%"XDI")", "6(%%"XSI", %%"XCX", 2)")  // up left, down right
+        MERGE4PIXavg("6(%%"XDI")", "-6(%%"XSI", %%"XCX", 2)")  // up right, down left
+        MERGE4PIXavg("-6(%%"XDI", %%"XCX")", "6(%%"XSI", %%"XCX")") // left, right
+        MERGE4PIXavg("6(%%"XDI", %%"XCX")", "-6(%%"XSI", %%"XCX")") // right, left
+        MERGE4PIXavg("-6(%%"XDI", %%"XCX", 2)", "6(%%"XSI")")   // down left, up right
+        MERGE4PIXavg("6(%%"XDI", %%"XCX", 2)", "-6(%%"XSI")")   // down right, up left
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddAH.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddAH.inc
new file mode 100644
index 00000000..e59e3c7e
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddAH.inc
@@ -0,0 +1,10 @@
+// Searches 1 pixel to the left and right, in both the old
+//  and new fields, but takes v-half pel averages. These are odd
+// pixel addresses. Any chroma match will not be used. (YUY2)
+		__asm
+		{
+        MERGE4PIXavgH("XDI"-2, "XDI"+"XCX"-2, "XSI"+"XCX"+2, "XSI"+2*"XCX"+2)  // up left, down right
+        MERGE4PIXavgH("XDI"+2, "XDI"+"XCX"+2, "XSI"+"XCX"-2, "XSI"+2*"XCX"-2)   // up right, down left
+        MERGE4PIXavgH("XDI"+2*"XCX"-2, "XDI"+"XCX"-2, "XSI"+"XCX"+2, "XSI"+2)   // down left, up right
+        MERGE4PIXavgH("XDI"+2*"XCX"+2, "XDI"+"XCX"+2, "XSI"+"XCX"-2, "XSI"-2)   // down right, up left   
+		}
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddAH2.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddAH2.inc
new file mode 100644
index 00000000..cd7d812a
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopOddAH2.inc
@@ -0,0 +1,5 @@
+// Searches 1 pixel to the left and right, in both the old
+//  and new fields, but takes vertical averages. These are odd
+// pixel addresses. Any chroma match will not be used. (YUY2)
+     MERGE4PIXavgH("-2(%%"XDI", %%"XCX")", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "2(%%"XSI", %%"XCX")") // left, right
+     MERGE4PIXavgH("2(%%"XDI", %%"XCX")", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "-2(%%"XSI", %%"XCX")") // right, left
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopTop.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopTop.inc
new file mode 100644
index 00000000..9d6a490f
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopTop.inc
@@ -0,0 +1,254 @@
+// -*- c++ -*-
+
+unsigned char* pDest;
+const unsigned char* pSrcP;
+const unsigned char* pSrc;
+const unsigned char* pBob;
+const unsigned char* pBobP;
+
+// long is int32 on ARCH_368, int64 on ARCH_AMD64. Declaring it this way
+// saves a lot of xor's to delete 64bit garbage.
+
+#if defined(DBL_RESIZE) || defined(USE_FOR_DSCALER)
+long	    src_pitch2 = src_pitch;			// even & odd lines are not interleaved in DScaler
+#else
+long	    src_pitch2 = 2 * src_pitch;		// even & odd lines are interleaved in Avisynth
+#endif
+
+
+long	    dst_pitch2 = 2 * dst_pitch;
+long        y;
+
+long     Last8;
+
+	pSrc  = pWeaveSrc;			// points 1 weave line above
+	pSrcP = pWeaveSrcP;			// " 
+
+#ifdef DBL_RESIZE
+	        
+#ifdef USE_VERTICAL_FILTER
+	pDest = pWeaveDest + dst_pitch2;
+#else
+	pDest = pWeaveDest + 3*dst_pitch;
+#endif
+
+#else
+
+#ifdef USE_VERTICAL_FILTER
+	pDest = pWeaveDest + dst_pitch;
+#else
+	pDest = pWeaveDest + dst_pitch2;
+#endif
+
+#endif
+
+	if (TopFirst)
+	{
+		pBob = pCopySrc + src_pitch2;      // remember one weave line just copied previously
+		pBobP = pCopySrcP + src_pitch2;
+	}
+	else
+	{
+		pBob =  pCopySrc;
+		pBobP =  pCopySrcP;
+	}
+
+#ifndef IS_C
+
+#ifndef _pBob
+#define _pBob       "%0"
+#define _src_pitch2 "%1"
+#define _ShiftMask  "%2"
+#define _pDest      "%3"
+#define _dst_pitchw "%4"
+#define _Last8      "%5"
+#define _pSrc       "%6"
+#define _pSrcP      "%7"
+#define _pBobP      "%8"
+#define _DiffThres  "%9"
+#define _Min_Vals   "%10"
+#define _Max_Vals   "%11"
+#define _FOURS      "%12"
+#define _TENS       "%13"
+#define _ONES       "%14"
+#define _UVMask     "%15"
+#define _Max_Mov    "%16"
+#define _YMask      "%17"
+#define _oldbx      "%18"
+#endif
+        Last8 = (rowsize-8);
+
+	for (y=1; y < FldHeight-1; y++)	
+	{	
+          long	dst_pitchw = dst_pitch; // local stor so asm can ref
+          int64_t Max_Mov   = 0x0404040404040404ull; 
+          int64_t DiffThres = 0x0f0f0f0f0f0f0f0full; 
+          int64_t YMask     = 0x00ff00ff00ff00ffull; // keeps only luma
+          int64_t UVMask    = 0xff00ff00ff00ff00ull; // keeps only chroma
+          int64_t TENS      = 0x0a0a0a0a0a0a0a0aull; 
+          int64_t FOURS     = 0x0404040404040404ull; 
+          int64_t ONES      = 0x0101010101010101ull; 
+          int64_t Min_Vals  = 0x0000000000000000ull;
+          int64_t Max_Vals  = 0x0000000000000000ull;
+          int64_t ShiftMask = 0xfefffefffefffeffull;
+
+          long oldbx;
+
+		// pretend it's indented -->>
+        __asm__ __volatile__
+            (
+             // Loop general reg usage
+             //
+             // XAX - pBobP, then pDest 
+             // XBX - pBob
+             // XCX - src_pitch2
+             // XDX - current offset
+             // XDI - prev weave pixels, 1 line up
+             // XSI - next weave pixels, 1 line up
+
+             // Save "XBX" (-fPIC)
+	     MOVX" %%"XBX", "_oldbx"\n\t"
+             
+             // simple bob first 8 bytes
+             MOVX"	"_pBob",        %%"XBX"\n\t"
+             MOVX"	"_src_pitch2",  %%"XCX"\n\t"
+
+#ifdef USE_VERTICAL_FILTER
+             "movq	    (%%"XBX"),        %%mm0\n\t"
+             "movq	    (%%"XBX", %%"XCX"), %%mm1\n\t" //, qword ptr["XBX"+"XCX"]
+             "movq	    %%mm0,          %%mm2\n\t"
+             V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)		// halfway between
+             V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask)		// 1/4 way
+             V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask)		// 3/4 way
+             MOVX"		"_pDest",       %%"XDI"\n\t"
+             MOVX"		"_dst_pitchw",  %%"XAX"\n\t"
+             V_MOVNTQ	("(%%"XDI")", "%%mm0")
+             V_MOVNTQ	("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1
+
+             // simple bob last 8 bytes
+             MOVX"		"_Last8", %%"XDX"\n\t"
+             LEAX"		(%%"XBX", %%"XDX"), %%"XSI"\n\t"  // ["XBX"+"XDX"]
+             "movq	    (%%"XSI"), %%mm0\n\t"
+             "movq	    (%%"XSI", %%"XCX"), %%mm1\n\t"    // qword ptr["XSI"+"XCX"]
+             "movq	    %%mm0, %%mm2\n\t"
+             V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)		// halfway between
+             V_PAVGB ("%%mm0", "%%mm2", "%%mm3", _ShiftMask)		// 1/4 way
+             V_PAVGB ("%%mm1", "%%mm2", "%%mm3", _ShiftMask)		// 3/4 way
+             ADDX"		%%"XDX", %%"XDI"\n\t"						// last 8 bytes of dest
+             V_MOVNTQ	("%%"XDI"", "%%mm0")
+             V_MOVNTQ	("(%%"XDI", %%"XAX")", "%%mm1") // qword ptr["XDI"+"XAX"], mm1)
+
+#else
+             "movq	(%%"XBX"), %%mm0\n\t"
+             //		pavgb	mm0, qword ptr["XBX"+"XCX"]
+             V_PAVGB ("%%mm0", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XBX"+"XCX"], mm2, ShiftMask)
+             MOVX"		"_pDest", %%"XDI"\n\t"
+             V_MOVNTQ	("(%%"XDI")", "%%mm0")
+
+             // simple bob last 8 bytes
+             MOVX"		"_Last8", %%"XDX"\n\t"
+             LEAX"		(%%"XBX", %%"XDX"), %%"XSI"\n\t" //"XSI", ["XBX"+"XDX"]
+             "movq	    (%%"XSI"), %%mm0\n\t"
+             //		pavgb	mm0, qword ptr["XSI"+"XCX"]
+             V_PAVGB	("%%mm0", "(%%"XSI", %%"XCX")", "%%mm2", _ShiftMask) // qword ptr["XSI"+"XCX"], mm2, ShiftMask)
+             V_MOVNTQ	("(%%"XDI", %%"XDX")", "%%mm0") // qword ptr["XDI"+"XDX"], mm0)
+#endif
+             // now loop and get the middle qwords
+             MOVX"		"_pSrc", %%"XSI"\n\t"
+             MOVX"		"_pSrcP", %%"XDI"\n\t"
+             MOVX"		$8, %%"XDX"\n\t"				// curr offset longo all lines
+
+             "1:\n\t"	
+             MOVX"		"_pBobP", %%"XAX"\n\t"
+             ADDX"		$8, %%"XDI"\n\t"
+             ADDX"		$8, %%"XSI"\n\t"
+             ADDX"		$8, %%"XBX"\n\t"
+             ADDX"		%%"XDX", %%"XAX"\n\t"
+
+#ifdef USE_STRANGE_BOB
+#include "StrangeBob.inc"
+#else
+#include "WierdBob.inc"
+#endif
+
+             // For non-SSE2:
+             // through out most of the rest of this loop we will maintain
+             //	mm4		our min bob value
+             //	mm5		best weave pixels so far
+             // mm6		our max Bob value 
+             //	mm7		best weighted pixel ratings so far
+             
+             // We will keep a slight bias to using the weave pixels
+             // from the current location, by rating them by the min distance
+             // from the Bob value instead of the avg distance from that value.
+             // our best and only rating so far
+             "pcmpeqb	%%mm7, %%mm7\n\t"			// ffff, say we didn't find anything good yet
+
+#else
+        Last8 = (rowsize - 4);
+
+	for (y=1; y < FldHeight-1; y++)
+	{
+	  #ifdef USE_STRANGE_BOB
+	  long DiffThres = 0x0f;
+	  #endif
+
+	  #ifndef SKIP_SEARCH
+	  long weave[2], MaxVals[2], MinVals[2];
+	  #endif
+
+	  long diff[2], best[2], avg[2], diff2[2], out[2], x;
+
+#ifdef USE_VERTICAL_FILTER
+             pDest[0] = (3 * pBob[0] + pBob[src_pitch2]) / 4;
+             pDest[1] = (3 * pBob[1] + pBob[src_pitch2 + 1]) / 4;
+             pDest[2] = (3 * pBob[2] + pBob[src_pitch2 + 2]) / 4;
+             pDest[3] = (3 * pBob[3] + pBob[src_pitch2 + 3]) / 4;
+	     pDest[dst_pitchw] = (pBob[0] + 3 * pBob[src_pitch2]) / 4;
+	     pDest[dst_pitchw + 1] = (pBob[1] + 3 * pBob[src_pitch2 + 1]) / 4;
+	     pDest[dst_pitchw + 2] = (pBob[2] + 3 * pBob[src_pitch2 + 2]) / 4;
+	     pDest[dst_pitchw + 3] = (pBob[3] + 3 * pBob[src_pitch2 + 3]) / 4;
+
+             // simple bob last byte
+	     pDest[Last8] = (3 * pBob[Last8] + pBob[Last8 + src_pitch2]) / 4;
+	     pDest[Last8 + 1] = (3 * pBob[Last8 + 1] + pBob[Last8 + src_pitch2 + 1]) / 4;
+	     pDest[Last8 + 2] = (3 * pBob[Last8 + 2] + pBob[Last8 + src_pitch2 + 2]) / 4;
+	     pDest[Last8 + 3] = (3 * pBob[Last8 + 3] + pBob[Last8 + src_pitch2 + 3]) / 4;
+	     pDest[Last8 + src_pitch2] = (pBob[Last8] + 3 * pBob[Last8 + src_pitch2]) / 4;
+	     pDest[Last8 + src_pitch2 + 1] = (pBob[Last8 + 1] + 3 * pBob[Last8 + src_pitch2 + 1]) / 4;
+	     pDest[Last8 + src_pitch2 + 2] = (pBob[Last8 + 2] + 3 * pBob[Last8 + src_pitch2 + 2]) / 4;
+	     pDest[Last8 + src_pitch2 + 3] = (pBob[Last8 + 3] + 3 * pBob[Last8 + src_pitch2 + 3]) / 4;
+#else
+             pDest[0] = (pBob[0] + pBob[src_pitch2 + 1]) / 2;
+             pDest[1] = (pBob[1] + pBob[src_pitch2 + 1]) / 2;
+             pDest[2] = (pBob[2] + pBob[src_pitch2 + 2]) / 2;
+             pDest[3] = (pBob[3] + pBob[src_pitch2 + 3]) / 2;
+
+             // simple bob last byte
+	     pDest[Last8] = (pBob[Last8] + pBob[Last8 + src_pitch2]) / 2;
+	     pDest[Last8 + 1] = (pBob[Last8 + 1] + pBob[Last8 + src_pitch2 + 1]) / 2;
+	     pDest[Last8 + 2] = (pBob[Last8 + 2] + pBob[Last8 + src_pitch2 + 2]) / 2;
+	     pDest[Last8 + 3] = (pBob[Last8 + 3] + pBob[Last8 + src_pitch2 + 3]) / 2;
+#endif
+
+             pBob += 4;
+	     pBobP += 4;
+	     pSrc += 4;
+	     pSrcP += 4;
+
+             for (x=4; x < Last8; x += 2) {
+
+#ifdef USE_STRANGE_BOB
+#include "StrangeBob.inc"
+#else
+#include "WierdBob.inc"
+#endif
+
+             // We will keep a slight bias to using the weave pixels
+             // from the current location, by rating them by the min distance
+             // from the Bob value instead of the avg distance from that value.
+             // our best and only rating so far
+             diff[0] = diff[1] = 255;
+
+
+#endif
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopVA.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopVA.inc
new file mode 100644
index 00000000..3e3d19b5
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopVA.inc
@@ -0,0 +1,6 @@
+// -*- c++ -*-
+
+// Searches the center vertical line above center and below, in both the old 
+// and new fields, but takes averages.  These are even pixel addresses.
+        MERGE4PIXavg("(%%"XDI", %%"XCX", 2)", "(%%"XSI")")	// down, up
+        MERGE4PIXavg("(%%"XDI")", "(%%"XSI", %%"XCX", 2)")	// up, down
diff --git a/gst/deinterlace/tvtime/tomsmocomp/SearchLoopVAH.inc b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopVAH.inc
new file mode 100644
index 00000000..33155bc1
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/SearchLoopVAH.inc
@@ -0,0 +1,6 @@
+// -*- c++ -*-
+
+// Searches the center vertical line above center and below, in both the old 
+// and new fields, but takes averages.  These are even pixel addresses.
+        MERGE4PIXavgH("(%%"XDI", %%"XCX", 2)", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "(%%"XSI")")	// down, up
+        MERGE4PIXavgH("(%%"XDI")", "(%%"XDI", %%"XCX")", "(%%"XSI", %%"XCX")", "(%%"XSI", %%"XCX", 2)")	// up, down
diff --git a/gst/deinterlace/tvtime/tomsmocomp/StrangeBob.inc b/gst/deinterlace/tvtime/tomsmocomp/StrangeBob.inc
new file mode 100644
index 00000000..45b4c865
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/StrangeBob.inc
@@ -0,0 +1,435 @@
+// -*- c++ -*-
+		
+		// First, get and save our possible Bob values
+		// Assume our pixels are layed out as follows with x the calc'd bob value
+		// and the other pixels are from the current field
+		//  
+		//	  j a b c k		current field
+		//            x			calculated line
+		//        m d e f n		current field
+		//
+		// we calc the bob value luma value as:
+        // if |j - n| < Thres && |a - m| > Thres 
+        //  avg(j,n)
+        // end if
+        // if |k - m| < Thres && |c - n| > Thres 
+        //  avg(k,m)
+        // end if
+        // if |c - d| < Thres && |b - f| > Thres 
+        //  avg(c,d)
+        // end if
+        // if |a - f| < Thres && |b - d| > Thres 
+        //  avg(a,f)
+        // end if
+        // if |b - e| < Thres
+        //  avg(b,e)
+        // end if
+        // pickup any thing not yet set with avg(b,e)
+
+#ifndef IS_C
+
+		// j, n
+        "pxor %%mm5, %%mm5\n\t"
+        "pxor %%mm6, %%mm6\n\t"
+        "pxor %%mm7, %%mm7\n\t"
+
+		"movq    -2(%%"XBX"), %%mm0\n\t"		// value a from top left		
+		"movq    -4(%%"XBX", %%"XCX"), %%mm1\n\t"	// value m from bottom right			
+        
+		"movq	%%mm0, %%mm3\n\t"
+		"psubusb	%%mm1, %%mm3\n\t"
+		"psubusb %%mm0, %%mm1\n\t"
+		"por		%%mm1, %%mm3\n\t"					// abs(a,m)
+
+		"psubusb "_DiffThres", %%mm3\n\t"		// nonzero where abs(a,m) > Thres else 0
+		"pxor	%%mm4, %%mm4\n\t"
+		"pcmpeqb %%mm4, %%mm3\n\t"			// now ff where abs(a,m) < Thres, else 00	
+		"pcmpeqb	%%mm3, %%mm4\n\t"			// here ff where abs(a,m) > Thres, else 00
+
+
+		"movq    -4(%%"XBX"), %%mm0\n\t"		// value j
+		"movq    4(%%"XBX", %%"XCX"), %%mm1\n\t"	// value n
+		"movq	%%mm0, %%mm2\n\t"					
+		V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)	// avg(j,n)
+        "movq	%%mm0, %%mm3\n\t"
+		"psubusb	%%mm1, %%mm0\n\t"
+		"psubusb %%mm3, %%mm1\n\t"
+		"por		%%mm1, %%mm0\n\t"					// abs(j,n)
+
+        "movq    %%mm0, %%mm1\n\t"
+		"psubusb "_DiffThres", %%mm1\n\t"		// nonzero where abs(j,n) > Thres else 0
+		"pxor	%%mm3, %%mm3\n\t"
+		"pcmpeqb %%mm3, %%mm1\n\t"			// now ff where abs(j,n) < Thres, else 00	
+
+        "pand    %%mm4, %%mm1\n\t"
+        "pand    %%mm1, %%mm2\n\t"
+        "pand    %%mm1, %%mm0\n\t"
+
+        "movq    %%mm1, %%mm3\n\t"
+        "pxor    %%mm5, %%mm3\n\t"
+        "pand    %%mm3, %%mm6\n\t"
+        "pand    %%mm3, %%mm7\n\t"
+        "pand    %%mm3, %%mm5\n\t"
+
+        "por     %%mm1, %%mm5\n\t"
+        "por     %%mm2, %%mm6\n\t"
+        "por     %%mm0, %%mm7\n\t"
+        
+        // k & m
+		"movq    2(%%"XBX"), %%mm0\n\t"		// value c from top left		
+		"movq    4(%%"XBX", %%"XCX"), %%mm1\n\t"	// value n from bottom right			
+
+		"movq	%%mm0, %%mm3\n\t"
+		"psubusb	%%mm1, %%mm3\n\t"
+		"psubusb %%mm0, %%mm1\n\t"
+		"por		%%mm1, %%mm3\n\t"					// abs(c,n)
+
+		"psubusb "_DiffThres", %%mm3\n\t"		// nonzero where abs(c,n) > Thres else 0
+		"pxor	%%mm4, %%mm4\n\t"
+		"pcmpeqb %%mm4, %%mm3\n\t"			// now ff where abs(c,n) < Thres, else 00	
+		"pcmpeqb	%%mm3, %%mm4\n\t"			// here ff where abs(c,n) > Thres, else 00
+
+
+		"movq    4(%%"XBX"), %%mm0\n\t"		// value k
+		"movq    -4(%%"XBX", %%"XCX"), %%mm1\n\t"	// value m
+		"movq	%%mm0, %%mm2\n\t"					
+		V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)	// avg(k,m)
+        "movq	%%mm0, %%mm3\n\t"
+		"psubusb	%%mm1, %%mm0\n\t"
+		"psubusb %%mm3, %%mm1\n\t"
+		"por		%%mm1, %%mm0\n\t"					// abs(k,m)
+
+        "movq    %%mm0, %%mm1\n\t"
+		"psubusb "_DiffThres", %%mm1\n\t"		// nonzero where abs(k,m) > Thres else 0
+		"pxor	%%mm3, %%mm3\n\t"
+		"pcmpeqb %%mm3, %%mm1\n\t"			// now ff where abs(k,m) < Thres, else 00	
+
+        "pand    %%mm4, %%mm1\n\t"
+        
+        "pand    %%mm1, %%mm2\n\t"
+        "pand    %%mm1, %%mm0\n\t"
+
+        "movq    %%mm1, %%mm3\n\t"
+        "pxor    %%mm5, %%mm3\n\t"
+        "pand    %%mm3, %%mm6\n\t"
+        "pand    %%mm3, %%mm7\n\t"
+        "pand    %%mm3, %%mm5\n\t"
+
+        "por     %%mm1, %%mm5\n\t"
+        "por     %%mm2, %%mm6\n\t"
+        "por     %%mm0, %%mm7\n\t"
+
+
+        // c & d
+		"movq    (%%"XBX"), %%mm0\n\t"		// value b from top left		
+		"movq    2(%%"XBX", %%"XCX"), %%mm1\n\t"	// value f from bottom right			
+
+		"movq	%%mm0, %%mm3\n\t"
+		"psubusb	%%mm1, %%mm3\n\t"
+		"psubusb %%mm0, %%mm1\n\t"
+		"por		%%mm1, %%mm3\n\t"					// abs(b,f)
+
+		"psubusb "_DiffThres", %%mm3\n\t"		// nonzero where abs(b,f) > Thres else 0
+		"pxor	%%mm4, %%mm4\n\t"
+		"pcmpeqb %%mm4, %%mm3\n\t"			// now ff where abs(b,f) < Thres, else 00	
+		"pcmpeqb	%%mm3, %%mm4\n\t"			// here ff where abs(b,f) > Thres, else 00
+
+		"movq    2(%%"XBX"), %%mm0\n\t"		// value c
+		"movq    -2(%%"XBX", %%"XCX"), %%mm1\n\t"	// value d
+		"movq	%%mm0, %%mm2\n\t"					
+		V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)	// avg(c,d)
+        "movq	%%mm0, %%mm3\n\t"
+		"psubusb	%%mm1, %%mm0\n\t"
+		"psubusb %%mm3, %%mm1\n\t"
+		"por		%%mm1, %%mm0\n\t"					// abs(c,d)
+
+        "movq    %%mm0, %%mm1\n\t"
+		"psubusb "_DiffThres", %%mm1\n\t"		// nonzero where abs(c,d) > Thres else 0
+		"pxor	%%mm3, %%mm3\n\t"
+        "pcmpeqb %%mm3, %%mm1\n\t"			// now ff where abs(c,d) < Thres, else 00	
+
+        "pand    %%mm4, %%mm1\n\t"
+
+        "pand    %%mm1, %%mm2\n\t"
+        "pand    %%mm1, %%mm0\n\t"
+
+        "movq    %%mm1, %%mm3\n\t"
+        "pxor    %%mm5, %%mm3\n\t"
+        "pand    %%mm3, %%mm6\n\t"
+        "pand    %%mm3, %%mm7\n\t"
+        "pand    %%mm3, %%mm5\n\t"
+
+        "por     %%mm1, %%mm5\n\t"
+        "por     %%mm2, %%mm6\n\t"
+        "por     %%mm0, %%mm7\n\t"
+
+        // a & f
+		"movq    (%%"XBX"), %%mm0\n\t"		// value b from top left		
+		"movq    -2(%%"XBX", %%"XCX"), %%mm1\n\t"	// value d from bottom right			
+
+		"movq	%%mm0, %%mm3\n\t"
+		"psubusb	%%mm1, %%mm3\n\t"
+		"psubusb %%mm0, %%mm1\n\t"
+		"por		%%mm1, %%mm3\n\t"					// abs(b,d)
+
+		"psubusb "_DiffThres", %%mm3\n\t"	// nonzero where abs(b,d) > Thres else 0
+		"pxor	%%mm4, %%mm4\n\t"
+		"pcmpeqb %%mm4, %%mm3\n\t"			// now ff where abs(b,d) < Thres, else 00	
+		"pcmpeqb	%%mm3, %%mm4\n\t"			// here ff where abs(b,d) > Thres, else 00
+
+		"movq    -2(%%"XBX"), %%mm0\n\t"		// value a
+		"movq    2(%%"XBX", %%"XCX"), %%mm1\n\t"	// value f
+		"movq	%%mm0, %%mm2\n\t"					
+		V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)	// avg(a,f)
+        "movq	%%mm0, %%mm3\n\t"
+        "psubusb	%%mm1, %%mm0\n\t"
+		"psubusb %%mm3, %%mm1\n\t"
+		"por		%%mm1, %%mm0\n\t"					// abs(a,f)
+
+        "movq    %%mm0, %%mm1\n\t"
+		"psubusb "_DiffThres", %%mm1\n\t"		// nonzero where abs(a,f) > Thres else 0
+		"pxor	%%mm3, %%mm3\n\t"
+		"pcmpeqb %%mm3, %%mm1\n\t"			// now ff where abs(a,f) < Thres, else 00	
+
+        "pand    %%mm4, %%mm1\n\t"
+
+        "pand    %%mm1, %%mm2\n\t"
+        "pand    %%mm1, %%mm0\n\t"
+            
+        "movq    %%mm1, %%mm3\n\t"
+        "pxor    %%mm5, %%mm3\n\t"
+        "pand    %%mm3, %%mm6\n\t"
+        "pand    %%mm3, %%mm7\n\t"
+        "pand    %%mm3, %%mm5\n\t"
+
+        "por     %%mm1, %%mm5\n\t"
+        "por     %%mm2, %%mm6\n\t"
+        "por     %%mm0, %%mm7\n\t"
+           
+ 		"pand	"_YMask", %%mm5\n\t"		// mask out chroma from here
+ 		"pand	"_YMask", %%mm6\n\t"			// mask out chroma from here
+ 		"pand	"_YMask", %%mm7\n\t"			// mask out chroma from here
+
+		// b,e
+		"movq    (%%"XBX"), %%mm0\n\t"		// value b from top 		
+		"movq    (%%"XBX", %%"XCX"), %%mm1\n\t"	// value e from bottom 
+		"movq	%%mm0, %%mm2\n\t"					
+		V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)	// avg(b,e)
+        "movq	%%mm0, %%mm3\n\t"
+        "psubusb	%%mm1, %%mm0\n\t"
+		"psubusb %%mm3, %%mm1\n\t"
+		"por		%%mm1, %%mm0\n\t"					// abs(b,e)
+
+        "movq    %%mm0, %%mm1\n\t"
+		"psubusb "_DiffThres", %%mm1\n\t"		// nonzero where abs(b,e) > Thres else 0
+		"pxor	%%mm3, %%mm3\n\t"
+		"pcmpeqb %%mm3, %%mm1\n\t"		// now ff where abs(b,e) < Thres, else 00	
+
+        "pand    %%mm1, %%mm2\n\t"
+        "pand    %%mm1, %%mm0\n\t"
+
+        "movq    %%mm1, %%mm3\n\t"
+        "pxor    %%mm5, %%mm3\n\t"
+        "pand    %%mm3, %%mm6\n\t"
+        "pand    %%mm3, %%mm7\n\t"
+        "pand    %%mm3, %%mm5\n\t"
+
+        "por     %%mm1, %%mm5\n\t"
+        "por     %%mm2, %%mm6\n\t"
+        "por     %%mm0, %%mm7\n\t"
+
+		// bob in any leftovers
+		"movq    (%%"XBX"), %%mm0\n\t"		// value b from top 		
+		"movq    (%%"XBX", %%"XCX"), %%mm1\n\t"	// value e from bottom 
+
+
+// We will also calc here the max/min values to later limit comb
+// so the max excursion will not exceed the Max_Comb constant
+
+#ifdef SKIP_SEARCH		
+		"movq	%%mm0, %%mm2\n\t"
+//		pminub	%%mm2, %%mm1
+		V_PMINUB ("%%mm2", "%%mm1", "%%mm4")
+
+//		pmaxub	%%mm6, %%mm2			// clip our current results so far to be above this
+		V_PMAXUB ("%%mm6", "%%mm2")
+        "movq	%%mm0, %%mm2\n\t"
+		V_PMAXUB ("%%mm2", "%%mm1")
+//		pminub	%%mm6, %%mm2			// clip our current results so far to be below this
+		V_PMINUB ("%%mm6", "%%mm2", "%%mm4")
+
+#else
+        "movq	%%mm0, %%mm2\n\t"
+		"movq	(%%"XAX"), %%mm4\n\t"
+		"psubusb %%mm4, %%mm2\n\t"
+		"psubusb %%mm0, %%mm4\n\t"
+		"por		%%mm2, %%mm4\n\t"			// abs diff
+		
+		"movq	%%mm1, %%mm2\n\t"
+		"movq	(%%"XAX", %%"XCX"), %%mm3\n\t"
+		"psubusb %%mm3, %%mm2\n\t"
+		"psubusb %%mm1, %%mm3\n\t"
+		"por		%%mm2, %%mm3\n\t"			// abs diff
+//		pmaxub  %%mm3, %%mm4			// top or bottom pixel moved most
+		V_PMAXUB ("%%mm3", "%%mm4")			// top or bottom pixel moved most
+        "psubusb "_DiffThres", %%mm3\n\t"		// moved more than allowed? or goes to 0?
+		"pxor	%%mm4, %%mm4\n\t"
+		"pcmpeqb %%mm4, %%mm3\n\t"			// now ff where low motion, else high motion
+		
+		"movq	%%mm0, %%mm2\n\t"
+//		pminub	%%mm2, %%mm1
+		V_PMINUB ("%%mm2", "%%mm1", "%%mm4")
+
+//		pmaxub	%%mm6, %%mm2			// clip our current results so far to be above this
+		V_PMAXUB ("%%mm6", "%%mm2")
+
+        "psubusb %%mm3, %%mm2\n\t"			// maybe decrease it to 0000.. if no surround motion
+		"movq	%%mm2, "_Min_Vals"\n\t"
+
+		"movq	%%mm0, %%mm2\n\t"
+		V_PMAXUB ("%%mm2", "%%mm1")
+//		pminub	%%mm6, %%mm2			// clip our current results so far to be below this
+		V_PMINUB ("%%mm6", "%%mm2", "%%mm4")
+        "paddusb %%mm3, %%mm2\n\t"			// maybe increase it to ffffff if no surround motion
+		"movq	%%mm2, "_Max_Vals"\n\t"
+#endif
+			
+		"movq	%%mm0, %%mm2\n\t"						
+//		pavgb	%%mm2, %%mm1					// avg(b,e)
+		V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)	// avg(b,e)
+				
+        "movq	%%mm0, %%mm3\n\t"
+		"psubusb	%%mm1, %%mm3\n\t"
+		"psubusb %%mm0, %%mm1\n\t"
+		"por		%%mm1, %%mm3\n\t"			// abs(b,e)
+		"movq	%%mm3, %%mm1\n\t"			// keep copy of diffs
+            
+		"pxor	%%mm4, %%mm4\n\t"			
+		"psubusb %%mm7, %%mm3\n\t"			// nonzero where new weights bigger, else 0
+		"pcmpeqb %%mm4, %%mm3\n\t"			// now ff where new better, else 00	
+        "pcmpeqb %%mm0, %%mm0\n\t"
+        "pandn   %%mm0, %%mm5\n\t"
+        "por     %%mm5, %%mm3\n\t"
+		"pcmpeqb	%%mm3, %%mm4\n\t"			// here ff where old better, else 00
+
+		"pand	%%mm3, %%mm1\n\t"
+		"pand	%%mm3, %%mm2\n\t"
+        
+		"pand    %%mm4, %%mm6\n\t"
+		"pand    %%mm4, %%mm7\n\t"
+
+		"por		%%mm2, %%mm6\n\t"			// our x2 value
+		"por		%%mm1, %%mm7\n\t"			// our x2 diffs
+		"movq	%%mm7, %%mm4\n\t"			// save as bob uncertainty indicator
+
+#else
+
+        diff[0] = -1;
+        diff[1] = -1;
+	best[0] = 0;
+	best[1] = 0;
+	// j, n
+        if (ABS (pBob[-2] - pBob[src_pitch2 - 4]) < DiffThres &&
+	    ABS (pBob[-4] - pBob[src_pitch2 + 4]) > DiffThres) {
+	   best[0] = (pBob[-2] + pBob[src_pitch2 - 4]) / 2;
+	   diff[0] = ABS (pBob[-2] - pBob[src_pitch2 - 4]);
+	}
+        if (ABS (pBob[-1] - pBob[src_pitch2 - 3]) < DiffThres &&
+	    ABS (pBob[-3] - pBob[src_pitch2 + 5]) > DiffThres) {
+	   best[1] = (pBob[-1] + pBob[src_pitch2 - 3]) / 2;
+	   diff[1] = ABS (pBob[-1] - pBob[src_pitch2 - 3]);
+	}
+
+        // k & m
+        if (ABS (pBob[2] - pBob[src_pitch2 + 4]) < DiffThres &&
+	    ABS (pBob[4] - pBob[src_pitch2 - 4]) > DiffThres) {
+	   best[0] = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
+	   diff[0] = ABS (pBob[4] - pBob[src_pitch2 - 4]);
+	}
+
+        if (ABS (pBob[3] - pBob[src_pitch2 + 5]) < DiffThres &&
+	    ABS (pBob[5] - pBob[src_pitch2 - 3]) > DiffThres) {
+	   best[1] = (pBob[5] + pBob[src_pitch2 - 3]) / 2;
+	   diff[1] = ABS (pBob[5] - pBob[src_pitch2 - 3]);
+	}
+
+        // c & d
+	if (ABS (pBob[0] - pBob[src_pitch2 + 2]) < DiffThres &&
+	    ABS (pBob[2] - pBob[src_pitch2 - 2]) > DiffThres) {
+	   best[0] = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
+	   diff[0] = ABS (pBob[2] - pBob[src_pitch2 - 2]);
+	}
+
+	if (ABS (pBob[1] - pBob[src_pitch2 + 3]) < DiffThres &&
+	    ABS (pBob[3] - pBob[src_pitch2 - 1]) > DiffThres) {
+	   best[1] = (pBob[3] + pBob[src_pitch2 - 1]) / 2;
+	   diff[1] = ABS (pBob[3] - pBob[src_pitch2 - 1]);
+	}
+
+        // a & f
+	if (ABS (pBob[0] - pBob[src_pitch2 - 2]) < DiffThres &&
+	    ABS (pBob[-2] - pBob[src_pitch2 + 2]) > DiffThres) {
+	   best[0] = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
+	   diff[0] = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
+	}
+
+	if (ABS (pBob[1] - pBob[src_pitch2 - 1]) < DiffThres &&
+	    ABS (pBob[-1] - pBob[src_pitch2 + 3]) > DiffThres) {
+	   best[1] = (pBob[-1] + pBob[src_pitch2 + 3]) / 2;
+	   diff[1] = ABS (pBob[-1] - pBob[src_pitch2 + 3]);
+	}
+
+	// b,e
+	if (ABS (pBob[0] - pBob[src_pitch2]) < DiffThres) {
+	   best[0] = (pBob[0] + pBob[src_pitch2]) / 2;
+	   diff[0] = ABS (pBob[0] - pBob[src_pitch2]);
+	}
+
+	if (ABS (pBob[1] - pBob[src_pitch2 + 1]) < DiffThres) {
+	   best[1] = (pBob[1] + pBob[src_pitch2 + 1]) / 2;
+	   diff[1] = ABS (pBob[1] - pBob[src_pitch2 + 1]);
+	}
+
+
+// We will also calc here the max/min values to later limit comb
+// so the max excursion will not exceed the Max_Comb constant
+
+#ifdef SKIP_SEARCH
+		best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+		best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
+#else
+		mov[0] = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
+		mov[1] = MAX (ABS (pBob[1] - pBobP[1]), ABS (pBob[src_pitch2 + 1] - pBobP[src_pitch2 + 1]));
+
+		MinVals[0] = 0;
+		MinVals[1] = 0;
+		MaxVals[0] = 255;
+		MaxVals[1] = 255;
+		if (mov[0] > DiffThres) {
+		  MinVals[0] = MAX (MIN (pBob[0], pBob[src_pitch2]), best[0]);
+		  MaxVals[0] = MIN (MAX (pBob[0], pBob[src_pitch2]), best[0]);
+		}
+		
+		if (mov[1] > DiffThres) {
+		  MinVals[1] = MAX (MIN (pBob[1], pBob[src_pitch2+1]), best[1]);
+		  MaxVals[1] = MIN (MAX (pBob[1], pBob[src_pitch2+1]), best[1]);
+		}
+
+		best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+		best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
+#endif
+		avg[0] = (pBob[src_pitch2] + pBob[0]) / 2;
+		avg[1] = (pBob[src_pitch2 + 1] + pBob[1]) / 2;
+		diff2[0] = ABS (pBob[src_pitch2 + 1] - pBob[1]);
+		diff2[1] = ABS (pBob[src_pitch2 + 1] - pBob[1]);
+
+		if (diff[0] == -1 || diff2[0] < diff[0]) {
+		  best[0] = avg[0];
+		  diff[0] = diff2[0];
+		}
+
+		if (diff[1] == -1 || diff2[1] < diff[1]) {
+		  best[1] = avg[1];
+		  diff[1] = diff2[1];
+		}
+#endif
diff --git a/gst/deinterlace/tvtime/tomsmocomp/TomsMoCompAll.inc b/gst/deinterlace/tvtime/tomsmocomp/TomsMoCompAll.inc
new file mode 100644
index 00000000..e8883dd3
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/TomsMoCompAll.inc
@@ -0,0 +1,241 @@
+/*
+ * GStreamer
+ * Copyright (c) 2002 Tom Barry  All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Relicensed for GStreamer from GPL to LGPL with permit from Tom Barry.
+ * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
+ */
+
+
+#ifndef TopFirst
+#define TopFirst IsOdd
+#endif
+
+#ifdef SEFUNC
+#undef SEFUNC
+#endif
+
+#if defined(IS_MMXEXT)
+#define SEFUNC(x) Search_Effort_MMXEXT_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight)
+#elif defined(IS_3DNOW)
+#define SEFUNC(x) Search_Effort_3DNOW_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight)
+#elif defined(IS_MMX)
+#define SEFUNC(x) Search_Effort_MMX_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight)
+#else
+#define SEFUNC(x) Search_Effort_C_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight)
+#endif
+
+#include "TomsMoCompAll2.inc"
+
+#define USE_STRANGE_BOB
+
+#include "TomsMoCompAll2.inc"
+
+#undef USE_STRANGE_BOB
+
+#undef SEFUNC
+#if defined(IS_MMXEXT)
+#define SEFUNC(x) Search_Effort_MMXEXT_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight)
+#elif defined(IS_3DNOW)
+#define SEFUNC(x) Search_Effort_3DNOW_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight)
+#elif defined(IS_MMX)
+#define SEFUNC(x) Search_Effort_MMX_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight)
+#else
+#define SEFUNC(x) Search_Effort_C_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight)
+#endif
+
+void FUNCT_NAME(GstDeinterlaceMethod *d_method, GstDeinterlace* object, GstBuffer *outbuf)
+{
+  GstDeinterlaceMethodTomsMoComp *self = GST_DEINTERLACE_METHOD_TOMSMOCOMP (d_method);
+  long SearchEffort = self->search_effort;
+  int UseStrangeBob = self->strange_bob;
+  int IsOdd;
+  const unsigned char *pWeaveSrc;
+  const unsigned char *pWeaveSrcP;
+  unsigned char *pWeaveDest;
+  const unsigned char *pCopySrc;
+  const unsigned char *pCopySrcP;
+  unsigned char *pCopyDest;
+  int src_pitch;
+  int dst_pitch;
+  int rowsize;
+  int FldHeight;
+
+  /* double stride do address just every odd/even scanline */
+  src_pitch = object->field_stride;
+  dst_pitch = object->row_stride;
+  rowsize   = object->row_stride;
+  FldHeight = object->field_height;
+
+  pCopySrc   = GST_BUFFER_DATA(object->field_history[object->history_count-1].buf);
+  pCopySrcP  = GST_BUFFER_DATA(object->field_history[object->history_count-3].buf);
+  pWeaveSrc  = GST_BUFFER_DATA(object->field_history[object->history_count-2].buf);  
+  pWeaveSrcP = GST_BUFFER_DATA(object->field_history[object->history_count-4].buf);
+
+  /* use bottom field and interlace top field */
+  if (object->field_history[object->history_count-2].flags == PICTURE_INTERLACED_BOTTOM) {
+    IsOdd      = 1;
+
+    // if we have an odd field we copy an even field and weave an odd field
+    pCopyDest = GST_BUFFER_DATA(outbuf);
+    pWeaveDest = pCopyDest + dst_pitch;
+  }
+  /* do it vice verca */
+  else {
+
+    IsOdd      = 0;
+    // if we have an even field we copy an odd field and weave an even field
+    pCopyDest = GST_BUFFER_DATA(outbuf) + dst_pitch;
+    pWeaveDest = GST_BUFFER_DATA(outbuf);
+  }
+
+  
+  // copy 1st and last weave lines 
+  Fieldcopy(pWeaveDest, pCopySrc, rowsize,		
+	    1, dst_pitch*2, src_pitch);
+  Fieldcopy(pWeaveDest+(FldHeight-1)*dst_pitch*2,
+	    pCopySrc+(FldHeight-1)*src_pitch, rowsize, 
+	    1, dst_pitch*2, src_pitch);
+  
+#ifdef USE_VERTICAL_FILTER
+  // Vertical Filter currently not implemented for DScaler !!
+  // copy 1st and last lines the copy field
+  Fieldcopy(pCopyDest, pCopySrc, rowsize, 
+	    1, dst_pitch*2, src_pitch);
+  Fieldcopy(pCopyDest+(FldHeight-1)*dst_pitch*2,
+	    pCopySrc+(FldHeight-1)*src_pitch, rowsize, 
+	    1, dst_pitch*2, src_pitch);
+#else
+  
+  // copy all of the copy field
+  Fieldcopy(pCopyDest, pCopySrc, rowsize, 
+	    FldHeight, dst_pitch*2, src_pitch);
+#endif	
+  // then go fill in the hard part, being variously lazy depending upon
+  // SearchEffort
+
+  if(!UseStrangeBob) {
+    if (SearchEffort == 0)
+      {
+	SEFUNC(0);
+      }
+    else if (SearchEffort <= 1)
+      {
+	SEFUNC(1);
+      }
+    /*	else if (SearchEffort <= 2)
+	{
+	SEFUNC(2);
+	}
+    */
+    else if (SearchEffort <= 3)
+      {
+	SEFUNC(3);
+      }
+    else if (SearchEffort <= 5)
+      {
+	SEFUNC(5);
+      }
+    else if (SearchEffort <= 9)
+      {
+	SEFUNC(9);
+      }
+    else if (SearchEffort <= 11)
+      {
+	SEFUNC(11);
+      }
+    else if (SearchEffort <= 13)
+      {
+	SEFUNC(13);
+      }
+    else if (SearchEffort <= 15)
+      {
+	SEFUNC(15);
+      }
+    else if (SearchEffort <= 19)
+      {
+	SEFUNC(19);
+      }
+    else if (SearchEffort <= 21)
+      {
+	SEFUNC(21);
+      }
+    else 
+      {
+	SEFUNC(Max);
+      }
+  }
+  else
+    {
+      if (SearchEffort == 0)
+	{
+	  SEFUNC(0SB);
+	}
+      else if (SearchEffort <= 1)
+	{
+	  SEFUNC(1SB);
+	}
+      /*	else if (SearchEffort <= 2)
+		{
+		SEFUNC(2SB);
+		}
+      */
+      else if (SearchEffort <= 3)
+	{
+	  SEFUNC(3SB);
+	}
+      else if (SearchEffort <= 5)
+	{
+	  SEFUNC(5SB);
+	}
+      else if (SearchEffort <= 9)
+	{
+	  SEFUNC(9SB);
+	}
+      else if (SearchEffort <= 11)
+	{
+	  SEFUNC(11SB);
+	}
+      else if (SearchEffort <= 13)
+	{
+	  SEFUNC(13SB);
+	}
+      else if (SearchEffort <= 15)
+	{
+	  SEFUNC(15SB);
+	}
+      else if (SearchEffort <= 19)
+	{
+	  SEFUNC(19SB);
+	}
+      else if (SearchEffort <= 21)
+	{
+	  SEFUNC(21SB);
+	}
+      else 
+	{
+	  SEFUNC(MaxSB);
+	}
+    }
+
+#if defined(BUILD_X86_ASM) && !defined(IS_C)
+  __asm__ __volatile__("emms");
+#endif
+}
diff --git a/gst/deinterlace/tvtime/tomsmocomp/TomsMoCompAll2.inc b/gst/deinterlace/tvtime/tomsmocomp/TomsMoCompAll2.inc
new file mode 100644
index 00000000..f6344eab
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/TomsMoCompAll2.inc
@@ -0,0 +1,243 @@
+// -*- c++ -*-
+
+#ifdef SEARCH_EFFORT_FUNC
+#undef SEARCH_EFFORT_FUNC
+#endif
+
+#ifdef USE_STRANGE_BOB
+#define SEARCH_EFFORT_FUNC(n) SEFUNC(n##SB)
+#else
+#define SEARCH_EFFORT_FUNC(n) SEFUNC(n)
+#endif
+
+static inline int SEARCH_EFFORT_FUNC(0)		// we don't try at all ;-)
+{
+		//see Search_Effort_Max() for comments
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+}
+
+static inline int SEARCH_EFFORT_FUNC(1)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+		//see Search_Effort_Max() for comments
+#include "SearchLoopTop.inc"
+	RESET_CHROMA		// pretend chroma diffs was 255 each
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+static inline int SEARCH_EFFORT_FUNC(3)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+		//see Search_Effort_Max() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA2.inc"
+	RESET_CHROMA		// pretend chroma diffs was 255 each
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+static inline int SEARCH_EFFORT_FUNC(5)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+		//see Search_Effort_Max() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA2.inc"
+#include "SearchLoopOddAH2.inc"
+	RESET_CHROMA		// pretend chroma diffs was 255 each
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+// 3x3 search
+static inline int SEARCH_EFFORT_FUNC(9)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+		//see SearchEffortMax() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA.inc"
+	RESET_CHROMA			// pretend chroma diffs was 255 each
+#include "SearchLoopVA.inc"
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+// Search 9 with 2 H-half pels added
+static inline int SEARCH_EFFORT_FUNC(11)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+		//see SearchEffortMax() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA.inc"
+#include "SearchLoopOddAH2.inc"
+	RESET_CHROMA			// pretend chroma diffs was 255 each
+#include "SearchLoopVA.inc"
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+// Search 11 with 2 V-half pels added
+static inline int SEARCH_EFFORT_FUNC(13)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+		//see SearchEffortMax() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA.inc"
+#include "SearchLoopOddAH2.inc"
+	RESET_CHROMA			// pretend chroma diffs was 255 each
+#include "SearchLoopVAH.inc"
+#include "SearchLoopVA.inc"
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+// 5x3
+static inline int SEARCH_EFFORT_FUNC(15)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+		//see SearchEffortMax() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA.inc"
+	RESET_CHROMA			// pretend chroma diffs was 255 each
+#include "SearchLoopEdgeA.inc"
+#include "SearchLoopVA.inc"
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+// 5x3 + 4 half pels
+static inline int SEARCH_EFFORT_FUNC(19)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+		//see SearchEffortMax() for comments
+#include "SearchLoopTop.inc"
+#include "SearchLoopOddA.inc"
+#include "SearchLoopOddAH2.inc"
+	RESET_CHROMA			// pretend chroma diffs was 255 each
+#include "SearchLoopEdgeA.inc"
+#include "SearchLoopVAH.inc"
+#include "SearchLoopVA.inc"
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+// Handle one 4x1 block of pixels
+// Search a 7x3 area, no half pels
+
+static inline int SEARCH_EFFORT_FUNC(21)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+		//see SearchLoopTop.inc for comments
+#include "SearchLoopTop.inc"
+
+		// odd addresses -- the pixels at odd address wouldn't generate
+		// good luma values but we will mask those off
+
+#include "SearchLoopOddA6.inc"  // 4 odd v half pels, 3 to left & right
+#include "SearchLoopOddA.inc"   // 6 odd pels, 1 to left & right
+
+	RESET_CHROMA		// pretend chroma diffs was 255 each
+
+		// even addresses -- use both luma and chroma from these
+		// search averages of 2 pixels left and right
+#include "SearchLoopEdgeA.inc"
+		// search vertical line and averages, -1,0,+1
+#include "SearchLoopVA.inc"
+		// blend our results and loop
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+// Handle one 4x1 block of pixels
+// Search a 9x3 area, no half pels
+static inline int SEARCH_EFFORT_FUNC(Max)
+{
+#ifdef IS_C
+#define SKIP_SEARCH
+#include "SearchLoopTop.inc"
+#include "SearchLoopBottom.inc"
+#undef SKIP_SEARCH
+#else
+		//see SearchLoopTop.inc for comments
+#include "SearchLoopTop.inc"
+
+		// odd addresses -- the pixels at odd address wouldn't generate
+		// good luma values but we will mask those off
+
+#include "SearchLoopOddA6.inc"  // 4 odd v half pels, 3 to left & right
+#include "SearchLoopOddA.inc"   // 6 odd pels, 1 to left & right
+
+	RESET_CHROMA		// pretend chroma diffs was 255 each
+
+		// even addresses -- use both luma and chroma from these
+		// search averages of 4 pixels left and right
+#include "SearchLoopEdgeA8.inc"
+		// search averages of 2 pixels left and right
+#include "SearchLoopEdgeA.inc"
+		// search vertical line and averages, -1,0,+1
+#include "SearchLoopVA.inc"
+		// blend our results and loop
+#include "SearchLoop0A.inc"
+#include "SearchLoopBottom.inc"
+#endif
+}
+
+#undef SEARCH_EFFORT_FUNC
+
diff --git a/gst/deinterlace/tvtime/tomsmocomp/WierdBob.inc b/gst/deinterlace/tvtime/tomsmocomp/WierdBob.inc
new file mode 100644
index 00000000..f4bbb830
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/WierdBob.inc
@@ -0,0 +1,286 @@
+// -*- c++ -*-
+
+		// First, get and save our possible Bob values
+		// Assume our pixels are layed out as follows with x the calc'd bob value
+		// and the other pixels are from the current field
+		//  
+		//        j a b c k		current field
+		//            x			calculated line
+		//        m d e f n		current field
+		//
+		// we calc the bob value as:
+		//		x2 = either avg(a,f), avg(c,d), avg(b,e), avg(j,n), or avg(k,m)
+		 
+		// selected for the	smallest of abs(a,f), abs(c,d), or abs(b,e), etc.
+
+#ifndef IS_C
+		// a,f
+		"movq    -2(%%"XBX"), %%mm0\n\t"		// value a from top left		
+		"movq    2(%%"XBX", %%"XCX"), %%mm1\n\t"	// value f from bottom right			
+		"movq	%%mm0, %%mm6\n\t"					
+//		pavgb	%%mm6, %%mm1					// avg(a,f), also best so far
+		V_PAVGB ("%%mm6", "%%mm1", "%%mm7", _ShiftMask)	// avg(a,f), also best so far
+        "movq	%%mm0, %%mm7\n\t"
+		"psubusb	 %%mm1, %%mm7\n\t"
+		"psubusb %%mm0, %%mm1\n\t"
+		"por		%%mm1, %%mm7\n\t"					// abs diff, also best so far
+
+		// c,d
+		"movq    2(%%"XBX"), %%mm0\n\t"		// value a from top left		
+		"movq    -2(%%"XBX", %%"XCX"), %%mm1\n\t"	// value f from bottom right			
+		"movq	%%mm0, %%mm2\n\t"						
+//		pavgb	%%mm2, %%mm1					// avg(c,d)
+		V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)	// avg(c,d)
+        "movq	%%mm0, %%mm3\n\t"
+		"psubusb	%%mm1, %%mm3\n\t"
+		"psubusb %%mm0, %%mm1\n\t"
+		"por		%%mm1, %%mm3\n\t"					// abs(c,d)
+		"movq	%%mm3, %%mm1\n\t"					// keep copy
+
+		"psubusb %%mm7, %%mm3\n\t"			// nonzero where new weights bigger, else 0
+		"pxor	%%mm4, %%mm4\n\t"
+		"pcmpeqb %%mm4, %%mm3\n\t"			// now ff where new better, else 00	
+		"pcmpeqb	%%mm3, %%mm4\n\t"			// here ff where old better, else 00
+
+		"pand	%%mm3, %%mm1\n\t"			// keep only better new avg and abs
+		"pand	%%mm3, %%mm2\n\t"
+
+		"pand	%%mm4, %%mm6\n\t"
+		"pand    %%mm4, %%mm7\n\t"
+
+		"por		%%mm2, %%mm6\n\t"			// and merge new & old vals keeping best
+		"por		%%mm1, %%mm7\n\t"
+		"por		"_UVMask", %%mm7\n\t"			// but we know chroma is worthless so far
+		"pand	"_YMask", %%mm5\n\t"			// mask out chroma from here also
+
+		// j,n
+		"movq    -4(%%"XBX"), %%mm0\n\t"		// value j from top left		
+		"movq    4(%%"XBX", %%"XCX"), %%mm1\n\t"	// value n from bottom right			
+		"movq	%%mm0, %%mm2\n\t"						
+//		pavgb	%%mm2, %%mm1					// avg(j,n)
+		V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)	// avg(j,n)
+        "movq	%%mm0, %%mm3\n\t"
+		"psubusb	%%mm1, %%mm3\n\t"
+		"psubusb %%mm0, %%mm1\n\t"
+		"por		%%mm1, %%mm3\n\t"					// abs(j-n)
+		"movq	%%mm3, %%mm1\n\t"					// keep copy
+
+		"psubusb %%mm7, %%mm3\n\t"			// nonzero where new weights bigger, else 0
+		"pxor	%%mm4, %%mm4\n\t"
+		"pcmpeqb %%mm4, %%mm3\n\t"			// now ff where new better, else 00	
+		"pcmpeqb	%%mm3, %%mm4\n\t"			// here ff where old better, else 00
+
+		"pand	%%mm3, %%mm1\n\t"			// keep only better new avg and abs
+		"pand	%%mm2, %%mm3\n\t"
+
+		"pand	%%mm4, %%mm6\n\t"
+		"pand    %%mm4, %%mm7\n\t"
+
+		"por		%%mm3, %%mm6\n\t"			// and merge new & old vals keeping best
+		"por		%%mm1, %%mm7\n\t"			// "
+
+		// k, m
+		"movq    4(%%"XBX"), %%mm0\n\t"		// value k from top right		
+		"movq    -4(%%"XBX", %%"XCX"), %%mm1\n\t"	// value n from bottom left			
+		"movq	%%mm0, %%mm4\n\t"						
+//		pavgb	%%mm4, %%mm1					// avg(k,m)
+		V_PAVGB ("%%mm4", "%%mm1", "%%mm3", _ShiftMask)	// avg(k,m)
+
+        "movq	%%mm0, %%mm3\n\t"
+		"psubusb	%%mm1, %%mm3\n\t"
+		"psubusb %%mm0, %%mm1\n\t"
+		"por		%%mm1, %%mm3\n\t"					// abs(k,m)
+		"movq	%%mm3, %%mm1\n\t"					// keep copy
+            
+		"movq	%%mm4, %%mm2\n\t"			// avg(k,m)
+
+		"psubusb %%mm7, %%mm3\n\t"			// nonzero where new weights bigger, else 0
+		"pxor	%%mm4, %%mm4\n\t"
+		"pcmpeqb %%mm4, %%mm3\n\t"			// now ff where new better, else 00	
+		"pcmpeqb	%%mm3, %%mm4\n\t"			// here ff where old better, else 00
+
+		"pand	%%mm3, %%mm1\n\t"			// keep only better new avg and abs
+		"pand	%%mm2, %%mm3\n\t"
+            
+		"pand	%%mm4, %%mm6\n\t"
+		"pand    %%mm4, %%mm7\n\t"
+
+		"por		%%mm3, %%mm6\n\t"			// and merge new & old vals keeping best
+		"por		%%mm1, %%mm7\n\t"			// "
+
+		// b,e
+		"movq    (%%"XBX"), %%mm0\n\t"		// value b from top 		
+		"movq    (%%"XBX", %%"XCX"), %%mm1\n\t"	// value e from bottom 
+
+// We will also calc here the max/min values to later limit comb
+// so the max excursion will not exceed the Max_Comb constant
+
+#ifdef SKIP_SEARCH		
+		"movq	%%mm0, %%mm2\n\t"
+//		pminub	%%mm2, %%mm1
+		V_PMINUB ("%%mm2", "%%mm1", "%%mm4")
+
+//		pmaxub	%%mm6, %%mm2			// clip our current results so far to be above this
+		V_PMAXUB ("%%mm6", "%%mm2")
+		"movq	%%mm0, %%mm2\n\t"
+		V_PMAXUB ("%%mm2", "%%mm1")
+//		pminub	%%mm6, %%mm2			// clip our current results so far to be below this
+		V_PMINUB ("%%mm6", "%%mm2", "%%mm4")
+
+#else
+        "movq	%%mm0, %%mm2\n\t"
+		"movq	(%%"XAX"), %%mm4\n\t"
+		"psubusb %%mm4, %%mm2\n\t"
+		"psubusb %%mm0, %%mm4\n\t"
+		"por		%%mm2, %%mm4\n\t"			// abs diff
+		
+		"movq	%%mm1, %%mm2\n\t"
+		"movq	(%%"XAX", %%"XCX"), %%mm3\n\t"
+		"psubusb %%mm3, %%mm2\n\t"
+		"psubusb %%mm1, %%mm3\n\t"
+		"por		%%mm2, %%mm3\n\t"			// abs diff
+//		pmaxub  %%mm3, %%mm4			// top or bottom pixel moved most
+		V_PMAXUB ("%%mm3", "%%mm4")			// top or bottom pixel moved most
+        "psubusb "_Max_Mov", %%mm3\n\t"		// moved more than allowed? or goes to 0?
+		"pxor	%%mm4, %%mm4\n\t"
+		"pcmpeqb %%mm4, %%mm3\n\t"			// now ff where low motion, else high motion
+		
+		"movq	%%mm0, %%mm2\n\t"
+//		pminub	%%mm2, %%mm1
+		V_PMINUB ("%%mm2", "%%mm1", "%%mm4")
+
+//		pmaxub	%%mm6, %%mm2			// clip our current results so far to be above this
+		V_PMAXUB ("%%mm6", "%%mm2")
+
+		"psubusb %%mm3, %%mm2\n\t"			// maybe decrease it to 0000.. if no surround motion
+		"movq	%%mm2, "_Min_Vals"\n\t"
+
+		"movq	%%mm0, %%mm2\n\t"
+		V_PMAXUB ("%%mm2", "%%mm1")
+//		pminub	%%mm6, %%mm2			// clip our current results so far to be below this
+		V_PMINUB ("%%mm6", "%%mm2", "%%mm4")
+        "paddusb %%mm3, %%mm2\n\t"			// maybe increase it to ffffff if no surround motion
+		"movq	%%mm2, "_Max_Vals"\n\t"
+#endif
+        
+		"movq	%%mm0, %%mm2\n\t"						
+//		pavgb	%%mm2, %%mm1					// avg(b,e)
+		V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)	// avg(b,e)
+				
+        "movq	%%mm0, %%mm3\n\t"
+		"psubusb	%%mm1, %%mm3\n\t"
+		"psubusb %%mm0, %%mm1\n\t"
+		"por		%%mm1, %%mm3\n\t"			// abs(c,d)
+		"movq	%%mm3, %%mm1\n\t"			// keep copy of diffs
+
+		"pxor	%%mm4, %%mm4\n\t"			
+		"psubusb %%mm7, %%mm3\n\t"			// nonzero where new weights bigger, else 0
+		"pcmpeqb %%mm4, %%mm3\n\t"			// now ff where new better, else 00	
+
+		"pcmpeqb	%%mm3, %%mm4\n\t"			// here ff where old better, else 00
+
+		"pand	%%mm3, %%mm1\n\t"
+		"pand	%%mm3, %%mm2\n\t"
+
+		"pand    %%mm4, %%mm6\n\t"
+		"pand    %%mm4, %%mm7\n\t"
+
+		"por		%%mm2, %%mm6\n\t"			// our x2 value
+		"por		%%mm1, %%mm7\n\t"			// our x2 diffs
+		"movq	%%mm7, %%mm4\n\t"			// save as bob uncertainty indicator
+
+#else
+
+        // a,f
+        best[0] = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
+	diff[0] = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
+        best[1] = (pBob[-1] + pBob[src_pitch2 + 3]) / 2;
+	diff[1] = ABS (pBob[-1] - pBob[src_pitch2 + 3]);
+
+        // c,d
+	if (ABS (pBob[2] - pBob[src_pitch2 - 2]) < diff[0]) {
+          best[0] = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
+	  diff[0] = ABS (pBob[2] - pBob[src_pitch2 - 2]);
+	}
+
+	if (ABS (pBob[3] - pBob[src_pitch2 - 1]) < diff[1]) {
+          best[1] = (pBob[3] + pBob[src_pitch2 - 1]) / 2;
+	  diff[1] = ABS (pBob[3] - pBob[src_pitch2 - 1]);
+	}
+
+	// j,n
+	if (ABS (pBob[-4] - pBob[src_pitch2 + 4]) < diff[0]) {
+          best[0] = (pBob[-4] + pBob[src_pitch2 + 4]) / 2;
+	  diff[0] = ABS (pBob[-4] - pBob[src_pitch2 + 4]);
+	}
+
+	if (ABS (pBob[-3] - pBob[src_pitch2 + 5]) < diff[1]) {
+          best[1] = (pBob[-3] + pBob[src_pitch2 + 5]) / 2;
+	  diff[1] = ABS (pBob[-3] - pBob[src_pitch2 + 5]);
+	}
+
+	// k,m
+	if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff[0]) {
+          best[0] = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
+	  diff[0] = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
+	}
+
+	if (ABS (pBob[5] - pBob[src_pitch2 - 3]) < diff[1]) {
+          best[1] = (pBob[5] + pBob[src_pitch2 - 3]) / 2;
+	  diff[1] = ABS (pBob[-3] - pBob[src_pitch2 - 3]);
+	}
+	// k,m
+	if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff[0]) {
+          best[0] = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
+	  diff[0] = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
+	}
+	
+	if (ABS (pBob[5] - pBob[src_pitch2 - 3]) < diff[1]) {
+          best[1] = (pBob[5] + pBob[src_pitch2 - 3]) / 2;
+	  diff[1] = ABS (pBob[-3] - pBob[src_pitch2 - 3]);
+	}
+
+// We will also calc here the max/min values to later limit comb
+// so the max excursion will not exceed the Max_Comb constant
+
+#ifdef SKIP_SEARCH
+		best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+		best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
+#else
+		mov[0] = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
+		mov[1] = MAX (ABS (pBob[1] - pBobP[1]), ABS (pBob[src_pitch2 + 1] - pBobP[src_pitch2 + 1]));
+
+		MinVals[0] = 0;
+		MinVals[1] = 0;
+		MaxVals[0] = 255;
+		MaxVals[1] = 255;
+
+		if (mov[0] > Max_Mov[0]) {
+		  MinVals[0] = MAX (MIN (pBob[0], pBob[src_pitch2]), best[0]);
+		  MaxVals[0] = MIN (MAX (pBob[0], pBob[src_pitch2]), best[0]);
+		}
+		
+		if (mov[1] > Max_Mov[1]) {
+		  MinVals[1] = MAX (MIN (pBob[1], pBob[src_pitch2 + 1]), best[1]);
+		  MaxVals[1] = MIN (MAX (pBob[1], pBob[src_pitch2 + 1]), best[1]);
+		}
+
+		best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
+		best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
+#endif
+
+		avg[0] = (pBob[src_pitch2] + pBob[0]) / 2;
+		avg[1] = (pBob[src_pitch2 + 1] + pBob[1]) / 2;
+		diff2[0] = ABS (pBob[src_pitch2] - pBob[0]);
+		diff2[1] = ABS (pBob[src_pitch2 + 1] - pBob[1]);
+
+		if (diff2[0] < diff[0]) {
+		  best[0] = avg[0];
+		  diff[0] = diff2[0];
+		}
+
+		if (diff2[1] < diff[1]) {
+		  best[1] = avg[1];
+		  diff[1] = diff2[1];
+		}
+#endif
diff --git a/gst/deinterlace/tvtime/tomsmocomp/tomsmocompmacros.h b/gst/deinterlace/tvtime/tomsmocomp/tomsmocompmacros.h
new file mode 100644
index 00000000..7e8147ec
--- /dev/null
+++ b/gst/deinterlace/tvtime/tomsmocomp/tomsmocompmacros.h
@@ -0,0 +1,164 @@
+#include <string.h>
+#include <math.h>
+
+// Define a few macros for CPU dependent instructions. 
+// I suspect I don't really understand how the C macro preprocessor works but
+// this seems to get the job done.          // TRB 7/01
+
+// BEFORE USING THESE YOU MUST SET:
+
+// #define SIMD_TYPE MMXEXT            (or MMX or 3DNOW)
+
+// some macros for pavgb instruction
+//      V_PAVGB(mmr1, mmr2, mmr work register, smask) mmr2 may = mmrw if you can trash it
+
+#define V_PAVGB_MMX(mmr1, mmr2, mmrw, smask) \
+	"movq    "mmr2",  "mmrw"\n\t"            \
+	"pand    "smask", "mmrw"\n\t"            \
+	"psrlw   $1,      "mmrw"\n\t"            \
+	"pand    "smask", "mmr1"\n\t"            \
+	"psrlw   $1,      "mmr1"\n\t"            \
+	"paddusb "mmrw",  "mmr1"\n\t"
+#define V_PAVGB_MMXEXT(mmr1, mmr2, mmrw, smask)      "pavgb   "mmr2", "mmr1"\n\t"
+#define V_PAVGB_3DNOW(mmr1, mmr2, mmrw, smask)    "pavgusb "mmr2", "mmr1"\n\t"
+#define V_PAVGB(mmr1, mmr2, mmrw, smask)          V_PAVGB2(mmr1, mmr2, mmrw, smask, SIMD_TYPE) 
+#define V_PAVGB2(mmr1, mmr2, mmrw, smask, simd_type) V_PAVGB3(mmr1, mmr2, mmrw, smask, simd_type) 
+#define V_PAVGB3(mmr1, mmr2, mmrw, smask, simd_type) V_PAVGB_##simd_type(mmr1, mmr2, mmrw, smask) 
+
+// some macros for pmaxub instruction
+#define V_PMAXUB_MMX(mmr1, mmr2) \
+    "psubusb "mmr2", "mmr1"\n\t" \
+    "paddusb "mmr2", "mmr1"\n\t"
+#define V_PMAXUB_MMXEXT(mmr1, mmr2)      "pmaxub "mmr2", "mmr1"\n\t"
+#define V_PMAXUB_3DNOW(mmr1, mmr2)    V_PMAXUB_MMX(mmr1, mmr2)  // use MMX version
+#define V_PMAXUB(mmr1, mmr2)          V_PMAXUB2(mmr1, mmr2, SIMD_TYPE) 
+#define V_PMAXUB2(mmr1, mmr2, simd_type) V_PMAXUB3(mmr1, mmr2, simd_type) 
+#define V_PMAXUB3(mmr1, mmr2, simd_type) V_PMAXUB_##simd_type(mmr1, mmr2) 
+
+// some macros for pminub instruction
+//      V_PMINUB(mmr1, mmr2, mmr work register)     mmr2 may NOT = mmrw
+#define V_PMINUB_MMX(mmr1, mmr2, mmrw) \
+    "pcmpeqb "mmrw", "mmrw"\n\t"       \
+    "psubusb "mmr2", "mmrw"\n\t"       \
+    "paddusb "mmrw", "mmr1"\n\t"       \
+    "psubusb "mmrw", "mmr1"\n\t"
+#define V_PMINUB_MMXEXT(mmr1, mmr2, mmrw)      "pminub "mmr2", "mmr1"\n\t"
+#define V_PMINUB_3DNOW(mmr1, mmr2, mmrw)    V_PMINUB_MMX(mmr1, mmr2, mmrw)  // use MMX version
+#define V_PMINUB(mmr1, mmr2, mmrw)          V_PMINUB2(mmr1, mmr2, mmrw, SIMD_TYPE) 
+#define V_PMINUB2(mmr1, mmr2, mmrw, simd_type) V_PMINUB3(mmr1, mmr2, mmrw, simd_type) 
+#define V_PMINUB3(mmr1, mmr2, mmrw, simd_type) V_PMINUB_##simd_type(mmr1, mmr2, mmrw) 
+
+// some macros for movntq instruction
+//      V_MOVNTQ(mmr1, mmr2) 
+#define V_MOVNTQ_MMX(mmr1, mmr2)      "movq   "mmr2", "mmr1"\n\t"
+#define V_MOVNTQ_3DNOW(mmr1, mmr2)    "movq   "mmr2", "mmr1"\n\t"
+#define V_MOVNTQ_MMXEXT(mmr1, mmr2)      "movntq "mmr2", "mmr1"\n\t"
+#define V_MOVNTQ(mmr1, mmr2)          V_MOVNTQ2(mmr1, mmr2, SIMD_TYPE) 
+#define V_MOVNTQ2(mmr1, mmr2, simd_type) V_MOVNTQ3(mmr1, mmr2, simd_type) 
+#define V_MOVNTQ3(mmr1, mmr2, simd_type) V_MOVNTQ_##simd_type(mmr1, mmr2)
+
+// end of macros
+
+#ifdef IS_SSE2
+
+#define MERGE4PIXavg(PADDR1, PADDR2)                                                     \
+    "movdqu  "PADDR1",   %%xmm0\n\t"       /* our 4 pixels */                            \
+    "movdqu  "PADDR2",   %%xmm1\n\t"       /* our pixel2 value */                        \
+    "movdqa  %%xmm0,     %%xmm2\n\t"       /* another copy of our pixel1 value */        \
+    "movdqa  %%xmm1,     %%xmm3\n\t"       /* another copy of our pixel1 value */        \
+    "psubusb %%xmm1,     %%xmm2\n\t"                                                     \
+    "psubusb %%xmm0,     %%xmm3\n\t"                                                     \
+    "por     %%xmm3,     %%xmm2\n\t"                                                     \
+    "pavgb   %%xmm1,     %%xmm0\n\t"       /* avg of 2 pixels */                         \
+    "movdqa  %%xmm2,     %%xmm3\n\t"       /* another copy of our our weights */         \
+    "pxor    %%xmm1,     %%xmm1\n\t"                                                     \
+    "psubusb %%xmm7,     %%xmm3\n\t"       /* nonzero where old weights lower, else 0 */ \
+    "pcmpeqb %%xmm1,     %%xmm3\n\t"       /* now ff where new better, else 00 */        \
+    "pcmpeqb %%xmm3,     %%xmm1\n\t"       /* here ff where old better, else 00 */       \
+    "pand    %%xmm3,     %%xmm0\n\t"       /* keep only better new pixels */             \
+    "pand    %%xmm3,     %%xmm2\n\t"       /* and weights */                             \
+    "pand    %%xmm1,     %%xmm5\n\t"       /* keep only better old pixels */             \
+    "pand    %%xmm1,     %%xmm7\n\t"                                                     \
+    "por     %%xmm0,     %%xmm5\n\t"       /* and merge new & old vals */                \
+    "por     %%xmm2,     %%xmm7\n\t"
+
+#define MERGE4PIXavgH(PADDR1A, PADDR1B, PADDR2A, PADDR2B)                                \
+    "movdqu  "PADDR1A",   %%xmm0\n\t"      /* our 4 pixels */                            \
+    "movdqu  "PADDR2A",   %%xmm1\n\t"      /* our pixel2 value */                        \
+    "movdqu  "PADDR1B",   %%xmm2\n\t"      /* our 4 pixels */                            \
+    "movdqu  "PADDR2B",   %%xmm3\n\t"      /* our pixel2 value */                        \
+    "pavgb   %%xmm2,      %%xmm0\n\t"                                                    \
+    "pavgb   %%xmm3,      %%xmm1\n\t"                                                    \
+    "movdqa  %%xmm0,      %%xmm2\n\t"      /* another copy of our pixel1 value */        \
+    "movdqa  %%xmm1,      %%xmm3\n\t"      /* another copy of our pixel1 value */        \
+    "psubusb %%xmm1,      %%xmm2\n\t"                                                    \
+    "psubusb %%xmm0,      %%xmm3\n\t"                                                    \
+    "por     %%xmm3,      %%xmm2\n\t"                                                    \
+    "pavgb   %%xmm1,      %%xmm0\n\t"      /* avg of 2 pixels */                         \
+    "movdqa  %%xmm2,      %%xmm3\n\t"      /* another copy of our our weights */         \
+    "pxor    %%xmm1,      %%xmm1\n\t"                                                    \
+    "psubusb %%xmm7,      %%xmm3\n\t"      /* nonzero where old weights lower, else 0 */ \
+    "pcmpeqb %%xmm1,      %%xmm3\n\t"      /* now ff where new better, else 00 */        \
+    "pcmpeqb %%xmm3,      %%xmm1\n\t"      /* here ff where old better, else 00 */       \
+    "pand    %%xmm3,      %%xmm0\n\t"      /* keep only better new pixels */             \
+    "pand    %%xmm3,      %%xmm2\n\t"      /* and weights */                             \
+    "pand    %%xmm1,      %%xmm5\n\t"      /* keep only better old pixels */             \
+    "pand    %%xmm1,      %%xmm7\n\t"                                                    \
+    "por     %%xmm0,      %%xmm5\n\t"      /* and merge new & old vals */                \
+    "por     %%xmm2,      %%xmm7\n\t"
+
+#define RESET_CHROMA "por "_UVMask", %%xmm7\n\t"
+
+#else // ifdef IS_SSE2
+
+#define MERGE4PIXavg(PADDR1, PADDR2)                                                    \
+    "movq    "PADDR1",   %%mm0\n\t"       /* our 4 pixels */                            \
+    "movq    "PADDR2",   %%mm1\n\t"       /* our pixel2 value */                        \
+    "movq    %%mm0,      %%mm2\n\t"       /* another copy of our pixel1 value */        \
+    "movq    %%mm1,      %%mm3\n\t"       /* another copy of our pixel1 value */        \
+    "psubusb %%mm1,      %%mm2\n\t"                                                     \
+    "psubusb %%mm0,      %%mm3\n\t"                                                     \
+    "por     %%mm3,      %%mm2\n\t"                                                     \
+    V_PAVGB ("%%mm0", "%%mm1", "%%mm3", _ShiftMask) /* avg of 2 pixels */               \
+    "movq    %%mm2,      %%mm3\n\t"       /* another copy of our our weights */         \
+    "pxor    %%mm1,      %%mm1\n\t"                                                     \
+    "psubusb %%mm7,      %%mm3\n\t"       /* nonzero where old weights lower, else 0 */ \
+    "pcmpeqb %%mm1,      %%mm3\n\t"       /* now ff where new better, else 00 */        \
+    "pcmpeqb %%mm3,      %%mm1\n\t"       /* here ff where old better, else 00 */       \
+    "pand    %%mm3,      %%mm0\n\t"       /* keep only better new pixels */             \
+    "pand    %%mm3,      %%mm2\n\t"       /* and weights */                             \
+    "pand    %%mm1,      %%mm5\n\t"       /* keep only better old pixels */             \
+    "pand    %%mm1,      %%mm7\n\t"                                                     \
+    "por     %%mm0,      %%mm5\n\t"       /* and merge new & old vals */                \
+    "por     %%mm2,      %%mm7\n\t"
+
+#define MERGE4PIXavgH(PADDR1A, PADDR1B, PADDR2A, PADDR2B)                               \
+    "movq    "PADDR1A",   %%mm0\n\t"      /* our 4 pixels */                            \
+    "movq    "PADDR2A",   %%mm1\n\t"      /* our pixel2 value */                        \
+    "movq    "PADDR1B",   %%mm2\n\t"      /* our 4 pixels */                            \
+    "movq    "PADDR2B",   %%mm3\n\t"      /* our pixel2 value */                        \
+    V_PAVGB("%%mm0", "%%mm2", "%%mm2", _ShiftMask)                                      \
+    V_PAVGB("%%mm1", "%%mm3", "%%mm3", _ShiftMask)                                      \
+    "movq    %%mm0,       %%mm2\n\t"      /* another copy of our pixel1 value */        \
+    "movq    %%mm1,       %%mm3\n\t"      /* another copy of our pixel1 value */        \
+    "psubusb %%mm1,       %%mm2\n\t"                                                    \
+    "psubusb %%mm0,       %%mm3\n\t"                                                    \
+    "por     %%mm3,       %%mm2\n\t"                                                    \
+    V_PAVGB("%%mm0", "%%mm1", "%%mm3", _ShiftMask)   /* avg of 2 pixels */              \
+    "movq    %%mm2,       %%mm3\n\t"      /* another copy of our our weights */         \
+    "pxor    %%mm1,       %%mm1\n\t"                                                    \
+    "psubusb %%mm7,       %%mm3\n\t"      /* nonzero where old weights lower, else 0 */ \
+    "pcmpeqb %%mm1,       %%mm3\n\t"      /* now ff where new better, else 00 */        \
+    "pcmpeqb %%mm3,       %%mm1\n\t"      /* here ff where old better, else 00 */       \
+    "pand    %%mm3,       %%mm0\n\t"      /* keep only better new pixels */             \
+    "pand    %%mm3,       %%mm2\n\t"      /* and weights */                             \
+    "pand    %%mm1,       %%mm5\n\t"      /* keep only better old pixels */             \
+    "pand    %%mm1,       %%mm7\n\t"                                                    \
+    "por     %%mm0,       %%mm5\n\t"      /* and merge new & old vals */                \
+    "por     %%mm2,       %%mm7\n\t"
+
+#define RESET_CHROMA "por "_UVMask", %%mm7\n\t"
+
+#endif
+
+
diff --git a/gst/deinterlace/tvtime/vfir.c b/gst/deinterlace/tvtime/vfir.c
new file mode 100644
index 00000000..b3ebaae1
--- /dev/null
+++ b/gst/deinterlace/tvtime/vfir.c
@@ -0,0 +1,187 @@
+/*
+ *
+ * GStreamer
+ * Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
+ * Copyright (c) 2001, 2002, 2003 Fabrice Bellard.
+ * Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * This file contains code from ffmpeg, see http://ffmpeg.org/ (LGPL)
+ * and modifications by Billy Biggs.
+ *
+ * Relicensed for GStreamer from GPL to LGPL with permit from Billy Biggs.
+ * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "_stdint.h"
+#include "gstdeinterlace.h"
+#include <string.h>
+
+#define GST_TYPE_DEINTERLACE_METHOD_VFIR	(gst_deinterlace_method_vfir_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_VFIR(obj)		(G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_VFIR))
+#define GST_IS_DEINTERLACE_METHOD_VFIR_CLASS(klass)	(G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_VFIR))
+#define GST_DEINTERLACE_METHOD_VFIR_GET_CLASS(obj)	(G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_VFIR, GstDeinterlaceMethodVFIRClass))
+#define GST_DEINTERLACE_METHOD_VFIR(obj)		(G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_VFIR, GstDeinterlaceMethodVFIR))
+#define GST_DEINTERLACE_METHOD_VFIR_CLASS(klass)	(G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_VFIR, GstDeinterlaceMethodVFIRClass))
+#define GST_DEINTERLACE_METHOD_VFIR_CAST(obj)	((GstDeinterlaceMethodVFIR*)(obj))
+
+GType gst_deinterlace_method_vfir_get_type (void);
+
+typedef GstDeinterlaceSimpleMethod GstDeinterlaceMethodVFIR;
+
+typedef GstDeinterlaceSimpleMethodClass GstDeinterlaceMethodVFIRClass;
+
+/*
+ * The MPEG2 spec uses a slightly harsher filter, they specify
+ * [-1 8 2 8 -1].  ffmpeg uses a similar filter but with more of
+ * a tendancy to blur than to use the local information.  The
+ * filter taps here are: [-1 4 2 4 -1].
+ */
+
+/**
+  * C implementation.
+  */
+static inline void
+deinterlace_line_c (GstDeinterlaceMethod * self, GstDeinterlace * parent,
+    guint8 * dst, GstDeinterlaceScanlineData * scanlines, gint width)
+{
+  gint sum;
+  guint8 *lum_m4 = scanlines->tt1;
+  guint8 *lum_m3 = scanlines->t0;
+  guint8 *lum_m2 = scanlines->m1;
+  guint8 *lum_m1 = scanlines->b0;
+  guint8 *lum = scanlines->bb1;
+  gint size = width * 2;
+
+  for (; size >= 0; size--) {
+    sum = -lum_m4[0];
+    sum += lum_m3[0] << 2;
+    sum += lum_m2[0] << 1;
+    sum += lum_m1[0] << 2;
+    sum += -lum[0];
+    dst[0] = (sum + 4) >> 3;    // This needs to be clipped at 0 and 255: cm[(sum + 4) >> 3];
+    lum_m4++;
+    lum_m3++;
+    lum_m2++;
+    lum_m1++;
+    lum++;
+    dst++;
+  }
+}
+
+#ifdef BUILD_X86_ASM
+#include "mmx.h"
+static void
+deinterlace_line_mmx (GstDeinterlaceMethod * self, GstDeinterlace * parent,
+    guint8 * dst, GstDeinterlaceScanlineData * scanlines, gint width)
+{
+  mmx_t rounder;
+  guint8 *lum_m4 = scanlines->tt1;
+  guint8 *lum_m3 = scanlines->t0;
+  guint8 *lum_m2 = scanlines->m1;
+  guint8 *lum_m1 = scanlines->b0;
+  guint8 *lum = scanlines->bb1;
+
+  rounder.uw[0] = 4;
+  rounder.uw[1] = 4;
+  rounder.uw[2] = 4;
+  rounder.uw[3] = 4;
+  pxor_r2r (mm7, mm7);
+  movq_m2r (rounder, mm6);
+
+  for (; width > 1; width -= 2) {
+    movd_m2r (*lum_m4, mm0);
+    movd_m2r (*lum_m3, mm1);
+    movd_m2r (*lum_m2, mm2);
+    movd_m2r (*lum_m1, mm3);
+    movd_m2r (*lum, mm4);
+    punpcklbw_r2r (mm7, mm0);
+    punpcklbw_r2r (mm7, mm1);
+    punpcklbw_r2r (mm7, mm2);
+    punpcklbw_r2r (mm7, mm3);
+    punpcklbw_r2r (mm7, mm4);
+    paddw_r2r (mm3, mm1);
+    psllw_i2r (1, mm2);
+    paddw_r2r (mm4, mm0);
+    psllw_i2r (2, mm1);         // 2
+    paddw_r2r (mm6, mm2);
+    paddw_r2r (mm2, mm1);
+    psubusw_r2r (mm0, mm1);
+    psrlw_i2r (3, mm1);         // 3
+    packuswb_r2r (mm7, mm1);
+    movd_r2m (mm1, *dst);
+    lum_m4 += 4;
+    lum_m3 += 4;
+    lum_m2 += 4;
+    lum_m1 += 4;
+    lum += 4;
+    dst += 4;
+  }
+  emms ();
+
+  /* Handle odd widths */
+  if (width > 0) {
+    scanlines->tt1 = lum_m4;
+    scanlines->t0 = lum_m3;
+    scanlines->m1 = lum_m2;
+    scanlines->b0 = lum_m1;
+    scanlines->bb1 = lum;
+
+    deinterlace_line_c (self, parent, dst, scanlines, width);
+  }
+}
+#endif
+
+G_DEFINE_TYPE (GstDeinterlaceMethodVFIR, gst_deinterlace_method_vfir,
+    GST_TYPE_DEINTERLACE_SIMPLE_METHOD);
+
+static void
+gst_deinterlace_method_vfir_class_init (GstDeinterlaceMethodVFIRClass * klass)
+{
+  GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+  GstDeinterlaceSimpleMethodClass *dism_class =
+      (GstDeinterlaceSimpleMethodClass *) klass;
+#ifdef BUILD_X86_ASM
+  guint cpu_flags = oil_cpu_get_flags ();
+#endif
+
+  dim_class->fields_required = 2;
+  dim_class->name = "Blur Vertical";
+  dim_class->nick = "vfir";
+  dim_class->latency = 0;
+
+#ifdef BUILD_X86_ASM
+  if (cpu_flags & OIL_IMPL_FLAG_MMX) {
+    dism_class->interpolate_scanline = deinterlace_line_mmx;
+  } else {
+    dism_class->interpolate_scanline = deinterlace_line_c;
+  }
+#else
+  dism_class->interpolate_scanline = deinterlace_line_c;
+#endif
+}
+
+static void
+gst_deinterlace_method_vfir_init (GstDeinterlaceMethodVFIR * self)
+{
+}
diff --git a/gst/deinterlace/tvtime/weave.c b/gst/deinterlace/tvtime/weave.c
new file mode 100644
index 00000000..1a86170e
--- /dev/null
+++ b/gst/deinterlace/tvtime/weave.c
@@ -0,0 +1,82 @@
+/**
+ * Weave frames
+ * Copyright (C) 2002 Billy Biggs <vektor@dumbterm.net>.
+ * Copyright (C) 2008 Sebastian Dröge <sebastian.droege@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "_stdint.h"
+#include "gstdeinterlace.h"
+#include <string.h>
+
+#define GST_TYPE_DEINTERLACE_METHOD_WEAVE	(gst_deinterlace_method_weave_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_WEAVE(obj)		(G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE))
+#define GST_IS_DEINTERLACE_METHOD_WEAVE_CLASS(klass)	(G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_WEAVE))
+#define GST_DEINTERLACE_METHOD_WEAVE_GET_CLASS(obj)	(G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE, GstDeinterlaceMethodWeaveClass))
+#define GST_DEINTERLACE_METHOD_WEAVE(obj)		(G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE, GstDeinterlaceMethodWeave))
+#define GST_DEINTERLACE_METHOD_WEAVE_CLASS(klass)	(G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_WEAVE, GstDeinterlaceMethodWeaveClass))
+#define GST_DEINTERLACE_METHOD_WEAVE_CAST(obj)	((GstDeinterlaceMethodWeave*)(obj))
+
+GType gst_deinterlace_method_weave_get_type (void);
+
+typedef GstDeinterlaceSimpleMethod GstDeinterlaceMethodWeave;
+
+typedef GstDeinterlaceSimpleMethodClass GstDeinterlaceMethodWeaveClass;
+
+
+static void
+deinterlace_scanline_weave (GstDeinterlaceMethod * self,
+    GstDeinterlace * parent, guint8 * out,
+    GstDeinterlaceScanlineData * scanlines, gint width)
+{
+  oil_memcpy (out, scanlines->m1, parent->row_stride);
+}
+
+static void
+copy_scanline (GstDeinterlaceMethod * self, GstDeinterlace * parent,
+    guint8 * out, GstDeinterlaceScanlineData * scanlines, gint width)
+{
+  oil_memcpy (out, scanlines->m0, parent->row_stride);
+}
+
+G_DEFINE_TYPE (GstDeinterlaceMethodWeave, gst_deinterlace_method_weave,
+    GST_TYPE_DEINTERLACE_SIMPLE_METHOD);
+
+static void
+gst_deinterlace_method_weave_class_init (GstDeinterlaceMethodWeaveClass * klass)
+{
+  GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+  GstDeinterlaceSimpleMethodClass *dism_class =
+      (GstDeinterlaceSimpleMethodClass *) klass;
+
+  dim_class->fields_required = 2;
+  dim_class->name = "Weave";
+  dim_class->nick = "weave";
+  dim_class->latency = 0;
+
+  dism_class->interpolate_scanline = deinterlace_scanline_weave;
+  dism_class->copy_scanline = copy_scanline;
+}
+
+static void
+gst_deinterlace_method_weave_init (GstDeinterlaceMethodWeave * self)
+{
+}
diff --git a/gst/deinterlace/tvtime/weavebff.c b/gst/deinterlace/tvtime/weavebff.c
new file mode 100644
index 00000000..eb983cf2
--- /dev/null
+++ b/gst/deinterlace/tvtime/weavebff.c
@@ -0,0 +1,88 @@
+/**
+ * Weave frames, bottom-field-first.
+ * Copyright (C) 2003 Billy Biggs <vektor@dumbterm.net>.
+ * Copyright (C) 2008 Sebastian Dröge <sebastian.droege@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "_stdint.h"
+#include "gstdeinterlace.h"
+#include <string.h>
+
+#define GST_TYPE_DEINTERLACE_METHOD_WEAVE_BFF	(gst_deinterlace_method_weave_bff_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_WEAVE_BFF(obj)		(G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE_BFF))
+#define GST_IS_DEINTERLACE_METHOD_WEAVE_BFF_CLASS(klass)	(G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_WEAVE_BFF))
+#define GST_DEINTERLACE_METHOD_WEAVE_BFF_GET_CLASS(obj)	(G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE_BFF, GstDeinterlaceMethodWeaveBFFClass))
+#define GST_DEINTERLACE_METHOD_WEAVE_BFF(obj)		(G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE_BFF, GstDeinterlaceMethodWeaveBFF))
+#define GST_DEINTERLACE_METHOD_WEAVE_BFF_CLASS(klass)	(G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_WEAVE_BFF, GstDeinterlaceMethodWeaveBFFClass))
+#define GST_DEINTERLACE_METHOD_WEAVE_BFF_CAST(obj)	((GstDeinterlaceMethodWeaveBFF*)(obj))
+
+GType gst_deinterlace_method_weave_bff_get_type (void);
+
+typedef GstDeinterlaceSimpleMethod GstDeinterlaceMethodWeaveBFF;
+
+typedef GstDeinterlaceSimpleMethodClass GstDeinterlaceMethodWeaveBFFClass;
+
+
+static void
+deinterlace_scanline_weave (GstDeinterlaceMethod * self,
+    GstDeinterlace * parent, guint8 * out,
+    GstDeinterlaceScanlineData * scanlines, gint width)
+{
+  oil_memcpy (out, scanlines->m1, parent->row_stride);
+}
+
+static void
+copy_scanline (GstDeinterlaceMethod * self, GstDeinterlace * parent,
+    guint8 * out, GstDeinterlaceScanlineData * scanlines, gint width)
+{
+  /* FIXME: original code used m2 and m0 but this looks really bad */
+  if (scanlines->bottom_field) {
+    oil_memcpy (out, scanlines->bb2, parent->row_stride);
+  } else {
+    oil_memcpy (out, scanlines->bb0, parent->row_stride);
+  }
+}
+
+G_DEFINE_TYPE (GstDeinterlaceMethodWeaveBFF, gst_deinterlace_method_weave_bff,
+    GST_TYPE_DEINTERLACE_SIMPLE_METHOD);
+
+static void
+gst_deinterlace_method_weave_bff_class_init (GstDeinterlaceMethodWeaveBFFClass *
+    klass)
+{
+  GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+  GstDeinterlaceSimpleMethodClass *dism_class =
+      (GstDeinterlaceSimpleMethodClass *) klass;
+
+  dim_class->fields_required = 3;
+  dim_class->name = "Progressive: Bottom Field First";
+  dim_class->nick = "weavebff";
+  dim_class->latency = 0;
+
+  dism_class->interpolate_scanline = deinterlace_scanline_weave;
+  dism_class->copy_scanline = copy_scanline;
+}
+
+static void
+gst_deinterlace_method_weave_bff_init (GstDeinterlaceMethodWeaveBFF * self)
+{
+}
diff --git a/gst/deinterlace/tvtime/weavetff.c b/gst/deinterlace/tvtime/weavetff.c
new file mode 100644
index 00000000..4885b63b
--- /dev/null
+++ b/gst/deinterlace/tvtime/weavetff.c
@@ -0,0 +1,88 @@
+/**
+ * Weave frames, top-field-first.
+ * Copyright (C) 2003 Billy Biggs <vektor@dumbterm.net>.
+ * Copyright (C) 2008 Sebastian Dröge <sebastian.droege@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "_stdint.h"
+#include "gstdeinterlace.h"
+#include <string.h>
+
+#define GST_TYPE_DEINTERLACE_METHOD_WEAVE_TFF	(gst_deinterlace_method_weave_tff_get_type ())
+#define GST_IS_DEINTERLACE_METHOD_WEAVE_TFF(obj)		(G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE_TFF))
+#define GST_IS_DEINTERLACE_METHOD_WEAVE_TFF_CLASS(klass)	(G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_WEAVE_TFF))
+#define GST_DEINTERLACE_METHOD_WEAVE_TFF_GET_CLASS(obj)	(G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE_TFF, GstDeinterlaceMethodWeaveTFFClass))
+#define GST_DEINTERLACE_METHOD_WEAVE_TFF(obj)		(G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_WEAVE_TFF, GstDeinterlaceMethodWeaveTFF))
+#define GST_DEINTERLACE_METHOD_WEAVE_TFF_CLASS(klass)	(G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_WEAVE_TFF, GstDeinterlaceMethodWeaveTFFClass))
+#define GST_DEINTERLACE_METHOD_WEAVE_TFF_CAST(obj)	((GstDeinterlaceMethodWeaveTFF*)(obj))
+
+GType gst_deinterlace_method_weave_tff_get_type (void);
+
+typedef GstDeinterlaceSimpleMethod GstDeinterlaceMethodWeaveTFF;
+
+typedef GstDeinterlaceSimpleMethodClass GstDeinterlaceMethodWeaveTFFClass;
+
+
+static void
+deinterlace_scanline_weave (GstDeinterlaceMethod * self,
+    GstDeinterlace * parent, guint8 * out,
+    GstDeinterlaceScanlineData * scanlines, gint width)
+{
+  oil_memcpy (out, scanlines->m1, parent->row_stride);
+}
+
+static void
+copy_scanline (GstDeinterlaceMethod * self, GstDeinterlace * parent,
+    guint8 * out, GstDeinterlaceScanlineData * scanlines, gint width)
+{
+  /* FIXME: original code used m2 and m0 but this looks really bad */
+  if (scanlines->bottom_field) {
+    oil_memcpy (out, scanlines->bb0, parent->row_stride);
+  } else {
+    oil_memcpy (out, scanlines->bb2, parent->row_stride);
+  }
+}
+
+G_DEFINE_TYPE (GstDeinterlaceMethodWeaveTFF, gst_deinterlace_method_weave_tff,
+    GST_TYPE_DEINTERLACE_SIMPLE_METHOD);
+
+static void
+gst_deinterlace_method_weave_tff_class_init (GstDeinterlaceMethodWeaveTFFClass *
+    klass)
+{
+  GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
+  GstDeinterlaceSimpleMethodClass *dism_class =
+      (GstDeinterlaceSimpleMethodClass *) klass;
+
+  dim_class->fields_required = 3;
+  dim_class->name = "Progressive: Top Field First";
+  dim_class->nick = "weavetff";
+  dim_class->latency = 0;
+
+  dism_class->interpolate_scanline = deinterlace_scanline_weave;
+  dism_class->copy_scanline = copy_scanline;
+}
+
+static void
+gst_deinterlace_method_weave_tff_init (GstDeinterlaceMethodWeaveTFF * self)
+{
+}
diff --git a/gst/deinterlace/tvtime/x86-64_macros.inc b/gst/deinterlace/tvtime/x86-64_macros.inc
new file mode 100644
index 00000000..2e9df758
--- /dev/null
+++ b/gst/deinterlace/tvtime/x86-64_macros.inc
@@ -0,0 +1,82 @@
+/*
+ *
+ * GStreamer
+ * Copyright (C) 2004 Dirk Ziegelmeier <dziegel@gmx.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * 
+ * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
+ */
+
+/*
+ * This file is copied from TVTIME's sources.
+ * Original author: Achim Schneider <batchall@mordor.ch>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef XAX
+
+#if defined (HAVE_CPU_I386) && !defined(HAVE_CPU_X86_64)
+
+#define XAX   "eax"
+#define XBX   "ebx"
+#define XCX   "ecx"
+#define XDX   "edx"
+#define XSI   "esi"
+#define XDI   "edi"
+#define XSP   "esp"
+#define MOVX  "movl"
+#define LEAX  "leal"
+#define DECX  "decl"
+#define PUSHX "pushl"
+#define POPX  "popl"
+#define CMPX  "cmpl"
+#define ADDX  "addl"
+#define SHLX  "shll"
+#define SHRX  "shrl"
+#define SUBX  "subl"
+
+#elif defined (HAVE_CPU_X86_64)
+
+#define XAX   "rax"
+#define XBX   "rbx"
+#define XCX   "rcx"
+#define XDX   "rdx"
+#define XSI   "rsi"
+#define XDI   "rdi"
+#define XSP   "rsp"
+#define MOVX  "movq"
+#define LEAX  "leaq"
+#define DECX  "decq"
+#define PUSHX "pushq"
+#define POPX  "popq"
+#define CMPX  "cmpq"
+#define ADDX  "addq"
+#define SHLX  "shlq"
+#define SHRX  "shrq"
+#define SUBX  "subq"
+
+#else
+#error Undefined architecture. Define either ARCH_X86 or ARCH_X86_64.
+#endif
+
+#endif