//
// Copyright 2003 (c) Kevin Atkinson under the GNU GPL license version
// 2.0.  You should have received a copy of the GPL license along with
// this program if you did not you can find it at http://www.gnu.org/.
//

#include <windows.h>
#include <io.h>

#include <stdio.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdlib.h>
#include <assert.h>

#include <cmath>
#include <algorithm>

using std::swap;
using std::abs;
using std::min;
using std::max;
typedef unsigned char byte;
static inline bool is_odd(int x) {return x & 1;}
static inline bool is_even(int x) {return !is_odd(x);}

#define REGPARM __attribute__ (( regparm(3) ))

#include "avisynth_c.h"

////////////////////////////////////////////////////////////////////
//
//

static FILE * gclog = 0;

struct Log
{
  FILE * clog;
  FILE * flog;
  bool debug_p;
  int log_level;
  void use_clog();
  bool open_log_file(const char * fn);
  Log() : clog(0), flog(0), debug_p(false), log_level(2) {}
  void print(int l, const char * fmt, ...)
    __attribute__ ((format (printf, 3, 4)));
  ~Log() {if (flog) fclose(flog);}
};

void alloc_clog()
{
  if (gclog) return;
  AllocConsole();
  HANDLE h = GetStdHandle( STD_OUTPUT_HANDLE );
  int fd = _open_osfhandle( (long) h, _O_TEXT );
  gclog = fdopen(fd, "w");
}

void free_clog(void *, AVS_ScriptEnvironment *)
{
  if (!gclog) return;
  fclose(gclog);
  gclog = 0;
  FreeConsole();
}

void Log::use_clog()
{
  alloc_clog();
  clog = gclog;
}

bool Log::open_log_file(const char * fn)
{
  if (flog) return true;
  flog = fopen(fn, "a");
  if (!flog) return false;
  return true;
}

// __ZN3Log5printEiPKcz
void Log::print(int l, const char * fmt, ...)
{
  if (l > log_level) return;
  char buf[80];
  va_list vl;
  va_start(vl, fmt);
  int n = vsnprintf(buf, 79, fmt, vl);
  if (n < 0 || n > 79) n = 79;
  buf[n] = '\n'; buf[n+1] = '\0';
  va_end(vl);
  if (clog) {
    fputs(buf, clog);
    fflush(clog);}
  if (flog) {
    fputs(buf, flog);}
  if (debug_p)
    OutputDebugString(buf);
}

////////////////////////////////////////////////////////////////////
//
//

struct MutableRow {
  byte * data;
  int pitch;
  MutableRow() : data(0) {}
  MutableRow(byte * d, int p) : data(d), pitch(p) {}
};

struct ConstRow {
  const byte * data;
  int pitch;
  ConstRow() : data(0) {}
  ConstRow(MutableRow o) : data(o.data), pitch(o.pitch) {}
  ConstRow(byte * d, int p) : data(d), pitch(p) {}
};

class VideoFramePtr
{
  AVS_VideoFrame * ptr;
  VideoFramePtr(const VideoFramePtr &);
  void operator= (const VideoFramePtr &);
public:
  VideoFramePtr(AVS_VideoFrame * p = 0) : ptr(p) {}
  void operator= (AVS_VideoFrame * p) {assert(ptr == 0); ptr = p;}
  operator AVS_VideoFrame * () {return ptr;}
  void reset(AVS_VideoFrame * p = 0) {
    if (ptr) avs_release_video_frame(ptr); ptr = p;}
  AVS_VideoFrame * release() {AVS_VideoFrame * tmp = ptr; ptr = 0; return tmp;}
  ~VideoFramePtr() {if (ptr) avs_release_video_frame(ptr);}
};

////////////////////////////////////////////////////////////////////
//
//

enum VideoFormat {VF_YV12, VF_YUY2};
enum FieldOrder {FO_Unknown, FO_TFF, FO_BFF};
struct Filter;

struct FieldDiff
{
  int num;
  float diff;
  int   what;
  AVS_VideoFrame * frame;
  void clear() {num = -9; diff = 1.0; what = 2;}
  FieldDiff() {clear();}
  FieldDiff(int n, float d, int w) : num(n), diff(d), what(w) {}
};

struct FrameWNum
{
  int num;
  AVS_VideoFrame * frame;
  void clear() {num = -9; if (frame) avs_release_video_frame(frame); frame = 0;}
  FrameWNum() : num(-9), frame(0) {}
  ~FrameWNum() {if (frame) avs_release_video_frame(frame);}
};

struct FrameInfo {
  int even;
  int odd; // only used for weave
  FrameInfo() : even(0), odd(0) {}
  FrameInfo(int t, int b) : even(t), odd(b) {}
};

//
//
//

static const int HSCALE = 2;
typedef REGPARM void (SubsampleLine)(const byte *, short *, size_t);
extern "C" SubsampleLine subsample_line_yuy2_2, subsample_line_yv12_2;

// hand written assembler code uses this structure
// only append to it
struct CompareParms
{
  SubsampleLine * subsample_line;
  int row_size;
  int s_height;
  int s_width;
  int s_pitch;
  void * data;
  float scale;
  Log * log;
  short * a, * b, * c, * d, * e;
  short * prev0, * cur0, * next0;
  float * prev1, * cur1, * next1;
  CompareParms() : data(0) {}
  void alloc_data();
};

void CompareParms::alloc_data()
{
  s_pitch = s_width;
  if (s_pitch % 16 != 0) s_pitch += 16 - (s_pitch % 16);
  int data_size = s_pitch*(sizeof(short)*8 + sizeof(float)*3) + 32;
  data = malloc(data_size);
  memset(data, 0, data_size);
  unsigned long addr = (unsigned long)data;
  if (addr % 32 != 0) addr += 32 - (addr % 32);
  short * sp = (short *)addr;
  a = sp + 0*s_pitch;
  b = sp + 1*s_pitch;
  c = sp + 2*s_pitch;
  d = sp + 3*s_pitch;
  e = sp + 4*s_pitch;
  prev0 = sp + 5*s_pitch;
  cur0  = sp + 6*s_pitch;
  next0 = sp + 7*s_pitch;
  float * fp = (float *)(sp + 8*s_pitch);
  prev1 = fp + 0*s_pitch;
  cur1  = fp + 1*s_pitch;
  next1 = fp + 2*s_pitch;
}

extern "C"
REGPARM void print_cs(unsigned int num, CompareParms * l)
{
  l->log->print(1, "Clock Cycles %d %f", num, (double)num/l->s_width);
}

extern "C"
float compare_fields  (const CompareParms * p, ConstRow even, ConstRow odd);

struct Analysis
{
  int MAX_LAST_SET;

  float T1_DEF, T2_DEF;
  float T1_MAX, T2_MAX;

  float FLOOR, FLOOR_ADJ;
  float SML_PEAK, SML_TAIL_DIFF;
  float LRG_PEAK, LRG_TAIL_DIFF;

  float pure_tel;
  float old_thres1;
  float old_thres2;
  float thres1;
  float thres2;
  float vals[3];
  int last_set;
  int ready;
  int num; // frame number the next adv will give
  
  Log * log;

  void reset(int num);
  FieldDiff adv(float val); // delay of 2
};

struct Filter
{
  Log log;
  
  float pure_tel; // 0.45, 0.60, 0.65
  
  FieldOrder field_order;
  
  VideoFormat video_format;
  
  float ratio;
  float round_div;

  size_t width;
  size_t row_size, c_row_size;
  size_t height, c_height;
 
  AVS_Clip * child;
  AVS_Clip * bob_src;
  AVS_ScriptEnvironment * env;
  const AVS_VideoInfo * dest_vi;

  int prev_src_fn;
  int cur_src_fn;
  int prev_fn;
  Analysis anal;
  CompareParms comp;
  
  float accum;
  int num_src_frames;
  
  Filter() : bob_src(0), prev_fn(-9) {}
  ~Filter() {if (bob_src) avs_release_clip(bob_src); 
             if (comp.data) free(comp.data);}
  
  //
  // Cache
  //
  
  static const int CACHE_SIZE = 16;
  static const int CACHE_MASK = 15;
  FieldDiff fd_cache[CACHE_SIZE];
  FrameWNum frame_cache[CACHE_SIZE];

  AVS_VideoFrame * get_src_frame(int fn);
  
  int diff(int d1, int d2);
  bool same_group(int x, int y) {return diff(x,y) <= 0;}
  bool similar_group(int x, int y) {return diff(x,y) <= 1;}
  bool diff_group(int x, int y) {return diff(x,y) == 2;}
  
  void clear_cache();
  void clear_frames_to(int fn);
  void adv_fd_cache_to(int fn);

  //
  //
  //

  void reset(int num);
  
  float compare(int num);

  bool chose_next_frame();
  FrameInfo get_frame_info();
  AVS_VideoFrame * render(FrameInfo fi);
  
  static AVS_VideoFrame * get_frame(AVS_FilterInfo *, int num);
  static int get_parity(AVS_FilterInfo *, int num);
  static void free_filter(AVS_FilterInfo *);
};

/////////////////////////////////////////////////////////////////////
//
//
//

#define ERR(str) do {ret = avs_new_value_error(str); goto error;} while (0)
#define CHECK_RANGE(str,var,low,high) do {\
    if (var < low || var > high) ERR(#str " must be in the range "\
                                     #low " - " #high "."); } while(0)
#define CHECK_GT(str, var, low)  do {\
    if (var < low) ERR(str " must be greater than  " #low "."); } while(0)
#define CHECK_ORDER(str1, str2, var1, var2) do {\
    if (var1 > var2) ERR(str1 " must be less than " str2 ".");}while(0)
#define CHECK_ORDER_ANAL(var1, var2) CHECK_ORDER(#var1, #var2, f->anal.var1, f->anal.var2)
#define SET_I_ANAL(what, def) do {\
    tmp = avs_array_elt(args, i++);\
    if (avs_defined(tmp)) f->anal.what = avs_as_int(tmp);\
    else f->anal.what = def;} while (0)
#define SET_F_ANAL(what, def) do {\
    tmp = avs_array_elt(args, i++);\
    if (avs_defined(tmp)) f->anal.what = avs_as_float(tmp);\
    else f->anal.what = def;} while (0)
#define SET_F_R_ANAL(what, def) do {SET_F_ANAL(what, def); \
                               CHECK_RANGE(#what, f->anal.what, 0.0, 1.0); } while (0)
#define SET_F_0_ANAL(what, def) do {SET_F_ANAL(what, def); \
                               CHECK_GT(#what, f->anal.what, 0.0); } while (0)
#define SET_F_1_ANAL(what, def) do {SET_F_ANAL(what, def); \
                               CHECK_GT(#what, f->anal.what, 1.0); } while (0)

AVS_Value smart_decimate(AVS_ScriptEnvironment * env, AVS_Value args, void * )
{
  AVS_Value ret;
  AVS_Value tmp,tmp1;
  AVS_FilterInfo * fi;
  Filter * f = 0;
  int numr, denm;
  int i = 1;  
  AVS_Clip * clip = avs_new_c_filter(env, &fi, args.d.array[0], 1);
{
  VideoFramePtr frame(avs_get_frame(fi->child, 0));
  
  f = new Filter;
  f->anal.log = &f->log;
  
  if (avs_is_yuy2(&fi->vi))
    f->video_format = VF_YUY2;
  else if (avs_is_yv12(&fi->vi))
    f->video_format = VF_YV12;
  else
    ERR("Video must be YUY2 or YV12");
  
  if (!avs_is_field_based(&fi->vi)) ERR("Use SeparateFieldsFirst.");
  
  if (fi->vi.width % 8 != 0) ERR("The width must be a multiple of eight.");
    
  fi->user_data = f;
  fi->get_frame   = Filter::get_frame;
  fi->get_parity  = Filter::get_parity;
  fi->free_filter = Filter::free_filter;

  f->child   = fi->child;
  f->env     = fi->env;
  f->dest_vi = &fi->vi;

  numr = avs_as_int(avs_array_elt(args, i++));
  denm = avs_as_int(avs_array_elt(args, i++));
  f->ratio = (float)denm/(float)numr;
  CHECK_GT("numerator", numr, 0);
  CHECK_GT("denominator", denm, 0);
  
  f->bob_src = avs_take_clip(avs_array_elt(args, i++), f->env);
  
  tmp = avs_array_elt(args, i++); // tel
  if (avs_defined(tmp))  f->pure_tel = avs_as_float(tmp);
  else                   f->pure_tel = 0.50;    
  f->anal.pure_tel = f->pure_tel;
  CHECK_RANGE("tel", f->pure_tel, 0.0, 1.0);
  
  tmp  = avs_array_elt(args, i++); // noise
  tmp1 = avs_array_elt(args, i++); // t_max
  if (avs_defined(tmp1)) {
    f->anal.T1_MAX = avs_as_float(tmp1);
  } else if (avs_defined(tmp)) {
    f->anal.T1_MAX = expf(17.65*avs_as_float(tmp) - 21.26);
  } else {
    f->anal.T1_MAX = expf(17.65*0.50 - 21.26);
  }
  f->anal.T2_MAX = 3*f->anal.T1_MAX;
  
  tmp  = avs_array_elt(args, i++); // max_last_set
  if (avs_defined(tmp))
    f->anal.MAX_LAST_SET = avs_as_int(tmp);
  else
    f->anal.MAX_LAST_SET = (3*denm)/numr + 1;
  CHECK_GT("max_last_set", f->anal.MAX_LAST_SET, 0);
 
  SET_F_R_ANAL(T1_DEF, 5e-12);
  SET_F_R_ANAL(T2_DEF, 9e-12);
  
  tmp  = avs_array_elt(args, i++); // t1_max
  if (avs_defined(tmp)) f->anal.T1_MAX = avs_as_float(tmp);
  tmp  = avs_array_elt(args, i++); // t2_max
  if (avs_defined(tmp)) f->anal.T2_MAX = avs_as_float(tmp);

  CHECK_RANGE("t1_max", f->anal.T1_MAX, 0.0, 1.0);  
  CHECK_RANGE("t2_max", f->anal.T2_MAX, 0.0, 1.0);  
  CHECK_ORDER_ANAL(T1_DEF, T2_DEF);
  CHECK_ORDER_ANAL(T1_MAX, T2_MAX);

  SET_F_R_ANAL(FLOOR, 0.05);
  SET_F_0_ANAL(FLOOR_ADJ, 0.50);
  //SET_F_1_ANAL(SML_PEAK, 1.05);
  SET_F_1_ANAL(SML_PEAK, 1.1);
  SET_F_0_ANAL(SML_TAIL_DIFF, 0.35);
  //SET_F_1_ANAL(LRG_PEAK, 1.50);
  SET_F_1_ANAL(LRG_PEAK, 2.0);
  SET_F_0_ANAL(LRG_TAIL_DIFF, 0.15);
  CHECK_ORDER_ANAL(SML_PEAK, LRG_PEAK);
  
  tmp = avs_array_elt(args, i++);
  if (avs_defined(tmp)) f->log.log_level = avs_as_int(tmp);
  CHECK_RANGE("log_level", f->log.log_level, 0, 5);
  
  tmp = avs_array_elt(args, i++);
  if (avs_defined(tmp)) f->log.open_log_file(avs_as_string(tmp));
  
  tmp = avs_array_elt(args, i++);
  if (avs_defined(tmp) && avs_as_bool(tmp)) f->log.use_clog();

  tmp = avs_array_elt(args, i++);
  if (avs_defined(tmp) && avs_as_bool(tmp)) f->log.debug_p = true;
  
  f->num_src_frames = fi->vi.num_frames;

  f->width = fi->vi.width;
  
  f->row_size = avs_get_row_size(frame);  
  f->height   = avs_get_height(frame);
  
  f->c_row_size = avs_get_row_size_p(frame, AVS_PLANAR_U);
  f->c_height   = avs_get_height_p  (frame, AVS_PLANAR_U);
  
  if (avs_get_parity(fi->child, 0)) {
    f->field_order = FO_TFF;
    f->log.print(1, "Field Order Is TFF");
  } else {
    f->field_order = FO_BFF;
    f->log.print(1, "Field Order Is BFF");
  }
  
  f->log.print(3, "t1_max = %#g  t2_max = %#g  max_last_set = %d",
               f->anal.T1_MAX, f->anal.T2_MAX, f->anal.MAX_LAST_SET);
  
  // Set compare parms

  if (f->video_format == VF_YUY2)
    f->comp.subsample_line = subsample_line_yuy2_2;
  else
    f->comp.subsample_line = subsample_line_yv12_2;

  f->comp.row_size = f->row_size;
  f->comp.s_height = ((((f->height - 1)*2)/3)/4)*4;
  f->comp.s_width  = f->width/HSCALE;

  f->comp.alloc_data();
  
  f->comp.scale  = 256 * 6 * (2*HSCALE-1);
  f->comp.scale *= f->comp.scale;
  f->comp.scale *= f->comp.scale;
  f->comp.scale *= f->comp.s_width * f->comp.s_height;
  f->comp.scale  = 1/f->comp.scale;
  
  f->comp.log    = &f->log;
  
  const AVS_VideoInfo * bob_vi = avs_get_video_info(f->bob_src);
  if (fi->vi.width != bob_vi->width || fi->vi.height*2 != bob_vi->height)
    ERR("Bob source video has incorrect dimmensions.");
  if (abs((float)fi->vi.fps_numerator/fi->vi.fps_denominator -
          (float)bob_vi->fps_numerator/bob_vi->fps_denominator) > 0.001)
    ERR("Bob source frame rate incorrect.");

  // Set output video parms
  
  avs_clear_property(&fi->vi, AVS_IT_BFF);
  avs_clear_property(&fi->vi, AVS_IT_TFF);
  avs_clear_property(&fi->vi, AVS_IT_FIELDBASED);
  fi->vi.height *= 2;
  fi->vi.num_frames = (int)(fi->vi.num_frames/f->ratio);
  avs_set_fps(&fi->vi, fi->vi.fps_numerator * numr, 
                       fi->vi.fps_denominator * denm);
                       
  }
  ret = avs_new_value_clip(clip);
  avs_release_clip(clip); clip = 0;
  return ret;
error:
  delete f;
  avs_release_clip(clip); clip = 0;
  return ret;  
}

const char * avisynth_c_plugin_init(AVS_ScriptEnvironment * env)
{
  avs_add_function(
    env, "SmartDecimate", 
    "ciic" "[tel]f[noise]f[t_max]f"
    "[max_last_set]i" "[t1_def]f[t2_def]f[t1_max]f[t2_max]f"
    "[floor]f[floor_adj]f"
    "[sml_peak]f[sml_tail_diff]f[lrg_peak]f[lrg_tail_diff]f"
    "[log_level]i[log_file]s[console]b[debug_print]b",
    smart_decimate, 0);
  avs_at_exit(env, free_clog, env);
  return "";
}

//////////////////////////////////////////////////////////////////////
//
//
//

AVS_VideoFrame * Filter::get_frame(AVS_FilterInfo * fi, int num)
{
  Filter * f = (Filter *)fi->user_data;
  assert(num >= 0);
  assert(num < f->dest_vi->num_frames);
  if (f->prev_fn + 1 != num) {
    f->reset(num);
  } else {
    f->chose_next_frame();
    for (int i = f->prev_src_fn; i < f->cur_src_fn; ++i) {
      f->log.print(2, "Diff %d: %d %#g", 
           i, f->fd_cache[i&CACHE_MASK].what, f->fd_cache[i&CACHE_MASK].diff);
    }
  }
  FrameInfo inf = f->get_frame_info();
  if (inf.even == inf.odd)
    f->log.print(1, "FRAME %d (%d) = BOB", num, f->cur_src_fn);
  else
    f->log.print(1, "FRAME %d (%d) = [%d,%d]", num, f->cur_src_fn, inf.even, inf.odd);
  f->prev_fn = num;
  return f->render(inf);
}

int Filter::get_parity(AVS_FilterInfo *, int)
{
  return 0;
}

void Filter::free_filter(AVS_FilterInfo * fi)
{
  Filter * f = (Filter *)fi->user_data;
  delete f;
  fi->user_data = 0;
}

void Filter::reset(int num)
{
  log.print(1, "Reseting to %d", num);
  clear_cache();
  float ideal = num*ratio;
  cur_src_fn = (int)ideal;
  accum = ideal - cur_src_fn;
  round_div = 1.0;
  anal.reset(cur_src_fn);
  anal.adv(compare(anal.num + 2));
  anal.adv(compare(anal.num + 2));
}

float Filter::compare(int num)
{
  if (num < 0 || num >= num_src_frames)
    return 1.0;
  AVS_VideoFrame * cur = get_src_frame(num);
  int cur_o = avs_get_parity(child, num);
  AVS_VideoFrame * next = get_src_frame(num+1);
  int next_o = avs_get_parity(child, num+1);
  assert(cur_o != next_o);
  ConstRow cur_d, next_d;
  cur_d.data  = avs_get_read_ptr(cur);
  cur_d.pitch = avs_get_pitch(cur);
  next_d.data = avs_get_read_ptr(next);
  next_d.pitch = avs_get_pitch(next);
  float res;
  if (cur_o)
    res = compare_fields(&comp, cur_d, next_d);
  else
    res = compare_fields(&comp, next_d, cur_d);
  return res;
}

AVS_VideoFrame * Filter::get_src_frame(int fn)
{
  FrameWNum & d = frame_cache[fn & CACHE_MASK];
  if (d.num != fn) {
    d.clear();
    d.num = fn;
    d.frame = avs_get_frame(child, fn);
  }
  return d.frame;
}

void Filter::adv_fd_cache_to(int fn)
{
  assert(anal.num <= fn);
  while (anal.num <= fn)
  {
    FieldDiff fd = anal.adv(compare(anal.num + 2));
    fd_cache[fd.num & CACHE_MASK] = fd;
  }
}
  
int Filter::diff(int d1, int d2) {
  if (d2 < d1) swap(d1,d2);
  d1 += cur_src_fn; d2 += cur_src_fn;
  int mx = 0;
  for (; d1 < d2; ++d1) {
    FieldDiff & d = fd_cache[d1 & CACHE_MASK];
    if (d.num != d1) adv_fd_cache_to(d1);
    mx = max(mx, d.what);
  }
  return mx;
}

void Filter::clear_cache() 
{
  for (int i = 0; i != CACHE_SIZE; ++i) {
    fd_cache[i].clear(); 
    frame_cache[i].clear();
  }
}

void Filter::clear_frames_to(int fn) 
{
  for (int i = 0; i != CACHE_SIZE; ++i) {
    if (frame_cache[i].num <= fn)
      frame_cache[i].clear();
  }
}

//////////////////////////////////////////////////////////////////////
//
// Compare
//

#ifndef SSE

struct CompareData : public CompareParms {
  CompareData(const CompareParms & d) : CompareParms(d) {}
  ConstRow s_l[2];
  int line;
};

REGPARM void compare_fields_row(CompareData & d, short *);

extern "C"
float compare_fields(const CompareParms * p0, ConstRow even, ConstRow odd)
{
  CompareData d(*p0);
  d.s_l[0] = even; d.s_l[1] = odd;
  d.line = 0;
  float accum = 0;
  memset(d.prev0, 0, sizeof(short)*d.s_pitch);
  memset(d.prev1, 0, sizeof(float)*d.s_pitch);
  d.subsample_line(d.s_l[0].data, d.d, d.row_size);
  d.subsample_line(d.s_l[1].data, d.e, d.row_size);
  while (d.line < d.s_height)
  {
    compare_fields_row(d,d.cur0);
    compare_fields_row(d,d.next0);
    for (int x = 0; x < d.s_width; ++x)
      d.cur1[x] = (d.prev0[x]+d.next0[x])*(float)d.cur0[x];
    swap(d.prev0,d.next0);
    compare_fields_row(d,d.cur0);
    compare_fields_row(d,d.next0);
    for (int x = 0; x < d.s_width; ++x)
      d.next1[x] = (d.prev0[x]+d.next0[x])*(float)d.cur0[x];
    swap(d.prev0,d.next0);
    for (int x = 0; x < d.s_width; ++x)
      accum += (d.prev1[x]+d.next1[x])*d.cur1[x];
    swap(d.prev1,d.next1);
  }
  accum *= d.scale;
  return accum;
}

REGPARM void compare_fields_row(CompareData & D, short * o)
{
  swap(D.a,D.d);
  swap(D.b,D.e);
  int i = D.line & 1; // 0 if even, 1 if odd
  D.s_l[i].data += D.s_l[i].pitch; D.subsample_line(D.s_l[i].data, D.c, D.row_size);
  D.s_l[i].data += D.s_l[i].pitch; D.subsample_line(D.s_l[i].data, D.e, D.row_size);
  i = i ^ 1; // 1 if even, 0 if odd
  D.s_l[i].data += D.s_l[i].pitch; D.subsample_line(D.s_l[i].data, D.d, D.row_size);
  o += D.s_width;
  // now compare lines
  short * a = D.a + D.s_width, * b = D.b + D.s_width, 
        * c = D.c + D.s_width, * d = D.d + D.s_width,
        * e = D.e + D.s_width;
  for (int x = -D.s_width; x != 0; ++x) 
  {
    o[x] = abs((a[x] + 4*c[x] + e[x]) - 3*(b[x] + d[x])) + 1;
  }
  ++D.line;
}

void subsample_line_yuy2_2(const byte * x, short * o, size_t row_size)
{
  const byte * stop = x + row_size;
  short a,b;
  a = 0;
  while (x < stop) {
    b = x[2];
    o[0] = a + x[0] + b;
    a = x[6];
    o[1] = b + x[4] + a;
    x += 8;
    o += 2;
  }
}

void subsample_line_yv12_2(const byte * x, short * o, size_t row_size)
{
  const byte * stop = x + row_size;
  short a,b;
  a = 0;
  while (x < stop) {
    b = x[1];
    o[0] = a + x[0] + b;
    a = x[3];
    o[1] = b + x[2] + a;
    x += 4;
    o += 2;
  }
}

#endif // NDEF SSE

//////////////////////////////////////////////////////////////////////
//
// Analysis
//

static const float T_MIN_RATIO = 1.1;

static const float SML_T_MIN = 0.50, SML_T_MAX = 0.8;
static const float LRG_T1_MIN = 0.01, LRG_T1_MAX = 0.30;
static const float LRG_T2_MIN = 0.60, LRG_T2_MAX = 0.90;

void Analysis::reset(int n)
{
  thres1 = 0;
  thres2 = 0;
  thres1 = T1_DEF;
  thres2 = T2_DEF;
  last_set = -1;
  num = n - 2 - 2;
  ready = 0;
}

static inline float w_avg(float x, float y, float w)
{
  return x*(1-w) + y*w;
}
static inline float clip(float x, float min, float max)
{
  if (x < min) return min;
  if (x > max) return max;
  return x;
}
static inline float clip(float x, float min, float max, float r_min, float r_max)
{
  return clip(x, w_avg(min,max,r_min), w_avg(min,max,r_max));
}

FieldDiff Analysis::adv(float val)
{
  ++num;
  if (ready < 2) {

    vals[ready] = val;
    ready++;
    return FieldDiff();

  } else {

    bool has_peak = false;

    vals[2] = val;
    if (vals[0] < vals[1] && vals[2] < vals[1]) 
    {
      float p = vals[1] / vals[0];
      float n = vals[1] / vals[2];
      float peak = min(p,n);
      float max_val = max(vals[0], vals[2]);
      float tail_max = max(vals[0],vals[2]);
      float tail_min = min(vals[0],vals[2]);
      float rel_tail_diff = (tail_max - tail_min) / (vals[1] - tail_min);
      float self_tail_diff = 2.0 * (tail_max - tail_min) / (tail_max + tail_min);
      float tail_diff = sqrtf(rel_tail_diff * self_tail_diff);
      log->print(4, "%d:  Peak %0.3f  Tail Diff %0.3f  (%0.3f %0.3f)", 
                 num, peak, tail_diff, rel_tail_diff, self_tail_diff); 
      if (max_val < T2_MAX && peak > LRG_PEAK && tail_diff < LRG_TAIL_DIFF)
      {
        log->print(4, " is T2");
        last_set = 0;
        thres1 = clip(thres1, tail_max, vals[1], LRG_T1_MIN, LRG_T1_MAX);
        thres2 = clip(thres2, tail_max, vals[1], LRG_T2_MIN, LRG_T2_MAX);
        has_peak = true;
      }
      else if (max_val < T2_MAX && peak > SML_PEAK && tail_diff < SML_TAIL_DIFF)
      {
        last_set = 0;
        if (thres2 > vals[1] && (pure_tel > 0.45 || thres1 >= tail_min)) {
          log->print(4, " is T1-T1");
          thres1 = clip(thres1, tail_max, vals[1], SML_T_MIN, SML_T_MAX);
          thres2 = max(thres2, thres1*T_MIN_RATIO);
        } else {
          log->print(4, " is T1-T2");
          thres2 = clip(thres2, tail_max, vals[1], SML_T_MIN, SML_T_MAX);
          thres1 = min(thres1, thres2/T_MIN_RATIO);
        }
        has_peak = true;
      }
    }
    if (!has_peak && pure_tel > 0.55)
    {
      float avg = (vals[0] + vals[1] + vals[2])/3;
      float d1 = abs(vals[0]-vals[1]);
      float d2 = abs(vals[0]-vals[2]);
      float d3 = abs(vals[1]-vals[2]);
      float d_max = max(d1, max(d2, d3));
      float v_max = max(vals[0], max(vals[1], vals[2]));
      log->print(4, "%d:  Max Diff %0.6f",
                 num, d_max/avg);
      if (d_max < FLOOR*avg && v_max < T1_MAX) {
        last_set = 0;
        log->print(4, "  adjusting floor");      
        thres1 = max(thres1, v_max + d_max * FLOOR_ADJ);
        thres2 = max(thres2, thres1*T_MIN_RATIO);
      }
    }
    if (pure_tel < 0.72 && last_set > MAX_LAST_SET) 
    {
      log->print(4, "Reseting Thres");
      thres1 = T1_DEF;
      thres2 = T2_DEF;
      last_set = -1;
    }
    if (thres1 > T1_MAX) thres1 = T1_MAX;
    if (thres2 > T2_MAX) thres2 = T2_MAX;
    if (thres1 != old_thres1)
      log->print(3, "%d: Thres1 now %#g", num-1, thres1);
    if (thres2 != old_thres2)
      log->print(3, "%d: Thres2 now %#g", num-1, thres2);
    old_thres1 = thres1; old_thres2 = thres2;
    
    if (last_set >= 0)
      last_set ++;
    int what;
    if (vals[0] < thres1)       what = 0;
    else if (vals[0] < thres2)  what = 1;
    else                        what = 2;
    FieldDiff res(num-1, vals[0], what);
    vals[0] = vals[1];
    vals[1] = vals[2];
    return res;
  }
}

//////////////////////////////////////////////////////////////////////
//
// Find Best Frame
//

static const float DELTA = 0.001;

bool Filter::chose_next_frame()
{
  prev_src_fn = cur_src_fn;
  float ideal = prev_src_fn + accum + ratio;
  int     cur = (int)ideal;
  assert(cur < num_src_frames);
  cur_src_fn = cur;
  int p = prev_src_fn - cur;  // note p does not nessary have to equal -1
  float frac  = ideal - cur;
  enum Chosen {Either, Cur, Next};
  int chosen = Cur;

  if (cur == num_src_frames - 1)                          chosen = Cur;
  else if (same_group(0, 1))                              chosen = Either;
  else if (same_group(p, 0))                              chosen = Next;
  else if (similar_group(p,0) && similar_group(-1,0)) {
    if (similar_group(0, 1) && !same_group(1, 2))         chosen = Either;
    else if (diff_group(0, 1))                            chosen = Next;
  } else if (diff_group(p, 0) && diff_group(-1, 0)
             && diff_group(1,2))                          chosen = Either;

  if (chosen == Either) {
    cur_src_fn = frac < round_div ? cur : cur + 1;
  } else if (chosen == Cur) {
    if (round_div < frac) round_div = frac + DELTA;
    cur_src_fn = cur;
  } else if (chosen == Next) {
    if (round_div > frac) round_div = frac - DELTA;
    cur_src_fn = cur + 1;
  }

  accum = ideal - cur_src_fn;
  return true;
}

FrameInfo Filter::get_frame_info()
{
  int matching = is_even(cur_src_fn) ? 1 : -1;
  int other    = -matching;

  enum UseF {Matching, Other, Bob} use_f = Bob;
  
  if      (same_group(0, matching))   use_f = Matching;
  else if (same_group(0, other))      use_f = Other;
  else if (pure_tel > 0.65) {
    if (similar_group(0, matching))   use_f = Matching;
    else if (similar_group(0, other)) use_f = Other;
  }
  
  if (use_f == Bob) return FrameInfo();
    
  if (field_order == FO_TFF) {
    if (is_even(cur_src_fn)) return FrameInfo(0, use_f == Matching ? 1 : -1);
    else                     return FrameInfo(use_f == Matching ? -1 : 1, 0);
  } else {
    if (is_even(cur_src_fn)) return FrameInfo(use_f == Matching ? 1 : -1 , 0);
    else                     return FrameInfo(0, use_f == Matching ? -1 : 1);
  }
}

//////////////////////////////////////////////////////////////////////
//
// Render
//

struct MutableRawFrame
{
  VideoFramePtr frame;
  MutableRow y;
  MutableRow chroma[2];
  void init(AVS_VideoFrame * frame);
};

struct ConstRawFrame
{
  AVS_VideoFrame * frame;
  ConstRow y;
  ConstRow chroma[2];
  void init(AVS_VideoFrame * frame);
};

extern "C"
void weave_chroma(const ConstRawFrame & even, const ConstRawFrame & odd,
                  const MutableRawFrame & dest, 
                  size_t row_size, size_t height);

AVS_VideoFrame * Filter::render(FrameInfo finf)
{
  AVS_VideoFrame * res;
  if (finf.even != finf.odd) {
    MutableRawFrame dest;
    dest.init(avs_new_video_frame(env, dest_vi, 8));

    ConstRawFrame even, odd;
    even.init(get_src_frame(finf.even + cur_src_fn));
    odd.init(get_src_frame(finf.odd + cur_src_fn));

    avs_bit_blt(env, dest.y.data, dest.y.pitch*2, 
                even.y.data, even.y.pitch, row_size, height);
    avs_bit_blt(env, dest.y.data + dest.y.pitch, dest.y.pitch*2, 
                odd.y.data, odd.y.pitch, row_size, height);
    
    if (video_format == VF_YV12)
      weave_chroma(even, odd, dest, c_row_size, c_height);
      
    res = dest.frame.release();
  
  } else {
  
    res = avs_get_frame(bob_src, cur_src_fn);
    
  }
  
  clear_frames_to(cur_src_fn-2);
  return res;

}

void MutableRawFrame::init(AVS_VideoFrame * f)
{
  frame = f;
  y.data  = avs_get_write_ptr(frame);
  y.pitch = avs_get_pitch(frame);
  chroma[0].data = avs_get_write_ptr_p(frame, AVS_PLANAR_U);
  chroma[0].pitch = avs_get_pitch_p(frame, AVS_PLANAR_U);
  chroma[1].data = avs_get_write_ptr_p(frame, AVS_PLANAR_V);
  chroma[1].pitch = avs_get_pitch_p(frame, AVS_PLANAR_V);
}

void ConstRawFrame::init(AVS_VideoFrame * f)
{
  frame = f;
  y.data  = avs_get_read_ptr(frame);
  y.pitch = avs_get_pitch(frame);
  chroma[0].data = avs_get_read_ptr_p(frame, AVS_PLANAR_U);
  chroma[0].pitch = avs_get_pitch_p(frame, AVS_PLANAR_U);
  chroma[1].data = avs_get_read_ptr_p(frame, AVS_PLANAR_V);
  chroma[1].pitch = avs_get_pitch_p(frame, AVS_PLANAR_V);
}

void weave_chroma(const ConstRawFrame & even, const ConstRawFrame & odd,
                  const MutableRawFrame & dest, 
                  size_t row_size, size_t height)
{
  for (int i = 0; i != 2; ++i) {
    int    d_pitch = dest.chroma[i].pitch;
    byte * d0 = dest.chroma[i].data; byte * d1 = d0 + d_pitch;
    const byte * el = even.chroma[i].data; int e_pitch = even.chroma[i].pitch;
    const byte * ol = odd.chroma[i].data;  int o_pitch = odd.chroma[i].pitch;
    for (size_t y = 0; y != height; ++y) {
#ifdef SSE
      typedef int v8qi __attribute__ ((mode(V8QI)));
      for (size_t x = 0; x != row_size; x += 8) {
        v8qi tmp = __builtin_ia32_pavgb(*(const v8qi *)(el + x),
                                        *(const v8qi *)(ol + x));
        *(v8qi *)(d0 + x) = tmp;
        *(v8qi *)(d1 + x) = tmp;
      }
#else
      for (size_t x = 0; x != row_size; ++x) {
        d0[x] = (el[x] + ol[x] + 1)/2;
        d1[x] = d0[x];
      }
#endif
      d0 += 2*d_pitch; d1 += 2*d_pitch;
      el += e_pitch; ol += o_pitch;
    }
  }
}


