/* -*- C++ -*-
 * headarry.cpp
 * author: Sam Rushing
 * $Id: headarry.cpp 1.27 1997/04/05 00:26:27 dumoulin Exp $
 */

#include <windows.h>
#include <windowsx.h>
#include "wvglob.h"
#include "winvn.h"
#pragma hdrstop

#include <ctype.h>  // for isspace
#include "wvclass.h"


/* The header and thread arrays are set up as follows:
 * When we allocate space for the TypHeader array, we leave room for a
 * pointer at the very front of that space, which will either indicate 
 * that there is no thread index array (== NULL) or will point to that
 * array.  Since windows can move both of these arrays around, this
 * slot is updated whenever lock_headers is called.
 * 
 * After initialize_header_array is called, this sequence of
 * operations can be used to access the header array:
 * 1. header_p headers = lock_headers (header_handle, thread_handle);
 * 2. header_elt (headers, index);
 * 3. unlock_headers (header_handle, thread_handle);
 *
 * The thread_index array is an array of longs, each an index into the
 * the real header array.  If thread_index is allocated and filled,
 * header_elt will indirect through it.  Sorting the array of indices
 * will sort the headers accordingly.
 */

/* globals, yuck! */
header_p g_headers;
thread_array g_index, g_parent, g_parsort;
long g_length;
WMemPool WVHeaderData::m_ClassPool(sizeof(WVHeaderData), POOLBLOCKSIZE / sizeof(WVHeaderData));

/* lock the headers and the thread index array in memory for access */
header_p
lock_headers (HANDLE header_handle, HANDLE thread_handle)
{
  thread_array_p indirect;
  header_p headers;

  /* lock the header array in position */
  indirect = (thread_array_p) GlobalLock (header_handle);
  headers = (header_p) ((char_p) indirect + sizeof (char_p));

  /* if we've a valid thread_handle, lock the thread_array, too */
  if (thread_handle) {
    *indirect = (thread_array) GlobalLock (thread_handle);
  }
  else
    *indirect = NULL;

  return (headers);
}

/* unlock the headers and thread index array */
void
unlock_headers (HANDLE header_handle, HANDLE thread_handle)
{
  GlobalUnlock (header_handle);
  if (thread_handle)
    GlobalUnlock (thread_handle);
}


/* return the header memory to windows */
void
free_headers (HANDLE header_handle, HANDLE thread_handle)
{
  GlobalFree (header_handle);
  if (thread_handle)
    GlobalFree (thread_handle);
}


void
set_index_to_identity (HANDLE header_handle, HANDLE thread_handle, long length)
{
  long i;
  header_p headers;
  thread_array thread_index;
  thread_array_p thread_index_p;

  headers = lock_headers (header_handle, thread_handle);

  if (thread_handle) {
    thread_index_p = (thread_array_p) ((char_p) headers - sizeof (char_p));
    thread_index = *thread_index_p;

    /* Initialize with identity */
    for (i = 0; i < length; i++)
      thread_index[i] = i;
  }
  unlock_headers(header_handle, thread_handle);
}


/* set up the header array, and possibly the thread index array */
void
initialize_header_array (HANDLE header_handle, HANDLE thread_handle, long length)
{
  long i;
  header_p headers;
  thread_array thread_index;
  thread_array_p thread_index_p;

  headers = lock_headers (header_handle, thread_handle);

  if (thread_handle) {
    thread_index_p = (thread_array_p) ((char_p) headers - sizeof (char_p));
    thread_index = *thread_index_p;

    /* Initialize with identity */
    for (i = 0; i < length; i++)
      thread_index[i] = i;
  }

  //memset(&headers[0], 0, (size_t) length * sizeof(headers[0])); // now use ZEROINIT option
  
  for (i = 0; i < length; i++)
  {
    headers[i].pHD = new WVHeaderData();
  }

  unlock_headers (header_handle, thread_handle);
}


long
find_index_from_artnum (long artindex, HANDLE header_handle, HANDLE thread_handle, long length)
{
  long i;
  header_p headers;

  headers = lock_headers (header_handle, thread_handle);
  for (i = 0; i < length; i++) {
    if ((long) headers[i].number == artindex)
      break;
    }
  
  unlock_headers (header_handle, thread_handle);
  if (i >= length) return(-1);
  else
    return(i);
}

/* Use this routine to get at an element of the header array - it  */
/* will automatically indirect through the thread index array if it's */
/* there. */

header_p
header_elt (header_p headers, long index)
{

  thread_array thread_index;
  thread_index = *((thread_array_p) ((char_p) headers - sizeof (char_p)));

  if (thread_index) {
    return (&(headers[thread_index[index]]));
  }
  else
    return (&(headers[index]));
}

int
compare_artnum (header_p headers,
                long elem1, long elem2)
{
  long e1, e2;
  e1 = headers[elem1].number;
  e2 = headers[elem2].number;
  if (e1 == e2)
    return 0;
  else if (e1 < e2)
    return -1;
  else
    return 1;
}


int
compare_lines (header_p headers,
               long elem1, long elem2)
{
  long e1, e2;
  e1 = headers[elem1].lines;
  e2 = headers[elem2].lines;
  if (e1 == e2)
    return 0;
  else if (e1 < e2)
    return -1;
  else
    return 1;
}

int
compare_message_id (header_p headers,
                    long elem1, long elem2)
{
  return strcmp (headers[elem1].pHD->sMessage_id, headers[elem2].pHD->sMessage_id);
}


int
_compare_subject (const char * p1, const char * p2)
{
  // trim leading 're:' notation
  while (*p1 && !_strnicmp(p1, "Re:", 3)) {
    p1 += 3;
    while (*p1 && isspace(*p1)) p1++;
  }
  while (*p2 && !_strnicmp(p2, "Re:", 3)) {
    p2 += 3;
    while (*p2 && isspace(*p2)) p2++;
  }
  return stricmp (p1, p2);
}  

int
compare_subject (header_p headers,
                 long elem1, long elem2)
{
  return _compare_subject (headers[elem1].pHD->sSubject, headers[elem2].pHD->sSubject);
}

int
compare_from (header_p headers,
              long elem1, long elem2)
{
  return stricmp (headers[elem1].pHD->sFrom, headers[elem2].pHD->sFrom);
}

int
compare_date (header_p headers,
              long elem1, long elem2)
{
  long e1, e2;
  e1 = headers[elem1].pHD->date;
  e2 = headers[elem2].pHD->date;
  if (e1 == e2)
    return 0;
  else if (e1 < e2)
    return -1;
  else
    return 1;
}

/* this is the shell sort taken from shellsor.c and modified for */
/* threading purposes... The extra comparison is to make the sort */
/* stable w.r.t. article number */

void
shell_sort_index_array (header_p headers,
                        thread_array index,
                        long nElements,
                        int (*compare) (header_p headers,
                                        long elem1,
                                        long elem2))
{
#define STRIDE_FACTOR 3
  long c, d, stride;
  int found;

  stride = 1;
  while (stride <= nElements)
    stride = stride * STRIDE_FACTOR + 1;

  while (stride > (STRIDE_FACTOR - 1)) {
    stride = stride / STRIDE_FACTOR;
    for (c = stride; c < nElements; c++) {
      found = 0;
      d = c - stride;
      while ((d >= 0) && !found) {
        int comp = compare (headers, index[d + stride], index[d]);
        if ((comp < 0) ||
            ((comp == 0) &&
             (compare_artnum (headers, index[d + stride], index[d]) < 0))) {
          long tmp = index[d];
          index[d] = index[d + stride];
          index[d + stride] = tmp;
          d -= stride;
        }
        else {
          found = 1;
        }
      }
    }
  }
}


void
shell_sort_parent_array (thread_array index,
                         thread_array parents,
                         long n)
{
#define STRIDE_FACTOR 3
  int c, d, stride;
  int found;

  stride = 1;
  while (stride <= n)
    stride = stride * STRIDE_FACTOR + 1;

  while (stride > (STRIDE_FACTOR - 1)) {
    stride = stride / STRIDE_FACTOR;
    for (c = stride; c < n; c++) {
      found = 0;
      d = c - stride;
      while ((d >= 0) && !found) {
        long p1 = parents[index[d + stride]];
        long p2 = parents[index[d]];

        if ((p1 < p2) || ((p1 == p2) && (index[d + stride] > index[d]))) {
          long tmp = index[d];
          index[d] = index[d + stride];
          index[d + stride] = tmp;
          d -= stride;
        }
        else {
          found = 1;
        }
      }
    }
  }
}

long
bsearch_parsort_table (thread_array parsort,
                       thread_array parents,
                       long looking_for,
                       long n)
{
  long p;
  long high = n;
  long low = 0;

  while ((high - low) > 1) {
    p = (high + low) / 2;
    if (looking_for <= parents[parsort[p - 1]])
      high = p;
    else
      low = p;
  }
  if (looking_for == parents[parsort[high - 1]])
    return (high - 1);
  else
    return -1;
}


long
bsearch_mid_table (header_p headers,
                   thread_array index,  /* sorted index */
                   const char *mid,     /* message_id we're looking for */
                   long n)
{
  long p;
  long high = n;
  long low = 0;

  if (mid) {
    while ((high - low) > 1) {
       p = (high + low) / 2;
	   if (strcmp (mid, headers[index[p - 1]].pHD->sMessage_id) <= 0)
	  	  high = p;
	   else
        low = p;
    }
    if (strcmp (mid, headers[index[high - 1]].pHD->sMessage_id) == 0)
      return (high - 1);
    else
      return -1;
  }
  else return -1;
}

/*
 * All these bsearch & shell-sort routines are ugly, but I haven't
 * the time right now to generalize and debug it all (SMR 950123)
 */

long
bsearch_sub_table (header_p headers,
                   thread_array index,  /* sorted index */
                   const char * sub,    /* subject we're looking for */
                   long n)
{
  long p;
  long high = n;
  long low = 0;

  while ((high - low) > 1) {
    p = (high + low) / 2;
    if (_compare_subject (sub, headers[index[p - 1]].pHD->sSubject) <= 0)
      high = p;
    else
      low = p;
  }
  if (_compare_subject (sub, headers[index[high-1]].pHD->sSubject) == 0)
    return (high - 1);
  else
    return -1;
}

long
thread_sort (long root_index, long start, long end, int depth)
{
  long j;
  long num_children = 0;
  long child_start;

  if (start == end)
    return end;
  else {
    /* find the children of this node, pack them in the bottom */

    /* this will find the first child in the sorted table */
    child_start = bsearch_parsort_table (g_parsort,
                                         g_parent,
                                         root_index,
                                         g_length);

    /* for each child, find its index and push on the stack in g_index */
    if (child_start != -1) {
      while ((child_start < g_length) &&
             (g_parent[g_parsort[child_start]] == root_index)) {
        g_index[end - num_children - 1] = g_parsort[child_start];
        child_start++;
        num_children++;
      }
    }
    /* no children found */
    else
      return (start);

    /* apply sort-me to each of the children */

    if (num_children == 0)
      return (start);

    for (j = num_children; j > 0; j--) {
      g_index[start] = g_index[end - j];
      g_headers[g_index[start]].thread_depth = (char) depth;
      start = thread_sort (g_index[start], start + 1, end - (j - 1), depth + 1);
    }
    return (start);
  }
}

#if 0
/*
 * Routine SortParents will make 2 parents with the same subject contiguous in
 * the header list without disrupting the "stableness" of the children.
 *
 * this function rearranges the 'parsort' table (see end of documentation)
 * so that threads with the same subject are contiguous.
 * Unfortunately this is an n^2 algorithm (or close), and it was slowing
 * the thread process considerably (4000 arts took about 22 seconds on a 486/33)
 * (SMR 950123)
 */

void
SortParents (header_p headers,
             thread_array parsort,
             thread_array parents,
             long iSize)
{
  long nlast = 0;
  long temp;
  int32 i, j, k;

  for(i = 0; i < iSize; i++) {
    nlast = i-1;
    if(parents[parsort[i]] != -1) {
//    TRACE4("SortParents: i = %02d, parsort[i] = %2d, parents[parsort[i]] = %3d, nlast = %3d\n", 
//           i, parsort[i], parents[parsort[i]], nlast);
      break;
    }
  }
  if (nlast) {  // at least 2 items in list
    for (i = 0; i < nlast; i++) {
      for (j = i+2; j <= nlast; j++) {
        if (!compare_subject(headers, parsort[i], parsort[j])) {
          // char *p1 = headers[parsort[i]].subject;
          // char *p2 = headers[parsort[j]].subject;
          // TRACE2("SortParents: headers[%02d] = %s\n", parsort[i], p1);
          // TRACE2("             headers[%02d] = %s\n", parsort[j], p2);
          for (k = j-1; k > i; k--) {
            temp = parsort[k];
            parsort[k] = parsort[k+1];
            parsort[k+1] = temp;
          }
        }
      }
    }
  }
}

#endif

long
find_parent (thread_array parent_table, long start)
{
  long climber = start;
  while ((parent_table[climber] != -1) &&
         (climber != parent_table[climber])) {
    climber = parent_table[climber];
  }
  return (climber);
}

/* setup for threading algorithm:
 * 1. allocate an extra thread_array for holding a sorted message-id table
 * 2. using mid_table, allocate and create another table, a map from
 *    article_index->parent_index  
 * 3. sort this table by parent_index (must be a stable sort)
 * 
 * the recursive thread_sort will use these tables to do a stable sort
 * by threads...
 */

void
sort_by_threads (HANDLE header_handle, HANDLE thread_handle, long length, BOOL bSOpt)
{
  long i;
  header_p headers;
  HANDLE mid_handle, parent_handle;
  thread_array thread_index, mid_table, parent_table;
  
  headers = lock_headers (header_handle, thread_handle);
  thread_index = *((thread_array_p) ((char_p) headers - sizeof (char_p)));
  
  if (!thread_index)
    return;
  
  mid_handle = GlobalAlloc (GMEM_MOVEABLE, (long) (sizeof (long) * length));
  if (!mid_handle)
    return;
  
  parent_handle = GlobalAlloc (GMEM_MOVEABLE, (long) (sizeof (long) * length));
  if (!parent_handle) {
    GlobalFree (mid_handle);
    return;
  }
  
  mid_table = (thread_array) GlobalLock (mid_handle);
  parent_table = (thread_array) GlobalLock (parent_handle);
  
  /* these globals are needed in various sort & search routines */
  g_headers = headers;
  g_index = thread_index;
  g_parent = parent_table;
  g_parsort = mid_table;
  g_length = length;

  /* create the message_id table */
  for (i = 0; i < length; i++)
    mid_table[i] = i;
  shell_sort_index_array (headers, mid_table, length, compare_message_id);
  
  /* create the parent table */
  
  for (i = 0; i < length; i++) {
    long p = bsearch_mid_table (headers, mid_table, headers[i].pHD->sBest_ref, length);
    parent_table[i] = (p == -1) ? -1 : mid_table[p];
  }
  
  if(bSOpt) {
    // this should be moved into a separate routine, called something like
    // 'reunite_orphans'
    
    // At this point, parent_table maps from article -> parent.
    // We want to identify 'orphaned' threads, created by broken newsreaders
    //  and mangled reference lines, and reattach them to the first tree with
    //  the same subject.

    // reuse mid_table temporarily as an index sorted by subject (modulo 'Re: ')
    for (i = 0; i < length; i++) {
      mid_table[i] = i;
    } // set to identity
    shell_sort_index_array (headers, mid_table, length, compare_subject); // sort
    
    // In a single pass through parent_table, check each root 
    // subject, and attach them to the root of the first matching thread...
    
    for (i = 0; i < length; i++) {
      if (parent_table[i] == -1) {
        long p = bsearch_sub_table (headers, mid_table, headers[i].pHD->sSubject, length);
        // find a matching subject for this orphan
        if (p != -1) {
          // find the root node of this subject hit
          p = find_parent (parent_table, mid_table[p]);
          // if we didn't find ourself
          if (p != i) {
            int p_re, i_re;
            // Try to identify root art from the subject line
            p_re = (_strnicmp (headers[p].pHD->sSubject, "re:", 3) == 0);
            i_re = (_strnicmp (headers[i].pHD->sSubject, "re:", 3) == 0);
            if (p_re && !i_re) {
              parent_table[p] = i;
            } else if (i_re && !p_re) {
              parent_table[i] = p;
            // Try to identify root art by comparing dates,
            // which doesn't work very well because winvn's
            // current date parser does not handle timezones
            } else if (headers[p].pHD->date < headers[i].pHD->date) {
              parent_table[i] = p;
            } else {
              parent_table[p] = i;
            }
          }
        }
      }
    }
  }
    
  // re-use the mid_table again, as a sorted parent table.
  for (i = 0; i < length; i++)
    mid_table[i] = i;
  
  shell_sort_parent_array (mid_table, parent_table, length);
  
  // recursively construct the thread trees
  thread_sort (-1, 0, length, 0);
  
  GlobalUnlock (parent_handle);
  GlobalFree (parent_handle);
  GlobalUnlock (mid_handle);
  GlobalFree (mid_handle);
}


void sort_by_option(header_p headers, thread_array thread_index, BOOL threadOk,
                    long nLines, HANDLE header_handle, HANDLE thread_handle)
{
  switch(iSortOption)
    {
    case IDM_SORT_DATE:
      shell_sort_index_array (headers, thread_index,
                              nLines, compare_date);
      break;
      
    case IDM_SORT_SUBJECT:
      shell_sort_index_array (headers, thread_index,
                              nLines, compare_subject);
      break;
      
    case IDM_SORT_LINES:
      shell_sort_index_array (headers, thread_index,
                              nLines, compare_lines);
      break;
      
    case IDM_SORT_THREADSUB:
    case IDM_SORT_THREADS:
      if (threadOk)
        sort_by_threads(header_handle, thread_handle, nLines,
                        (iSortOption == IDM_SORT_THREADSUB) ? TRUE : FALSE);
      else
        MessageBox(NULL, "Threading disabled", "WinVN", MB_OK);
      break;
      
    case IDM_SORT_FROM:
      shell_sort_index_array (headers, thread_index,
                              nLines, compare_from);
      break;
      
    case IDM_SORT_ARTNUM:
    default:
      shell_sort_index_array (headers, thread_index,
                              nLines, compare_artnum);
      
      break;
    }
}

//   --------------------------------------------------------------------------
//
//   Thread sorting algorithm.
//
//   Here's an example, already threaded, showing the relationship between
//   parent, child, and original index.
//
//
//
//   0   5
//   1      2
//   2         3
//   3      0   
//   4   1   
//   5      4        
//
//   On the display, it may look like this:
//
//   5   Why my computer is better than yours...
//   2     Re: Why my computer is better than yours...
//   3       Why my _car_ is better than your computer (was Re: ...)
//   0     Re: Why my computer is better than yous...
//   1   What's the latest on the Amiga 6000???
//   4     What planet are you on ? (was Re: What's the latest...)
//
//   This shows that articles #2 and #0 are in response to article #5 (yes this can
//   and does happen), article #4 is in response to #1, etc...
//
//   This is the parent table.  It answers the question: "what is the index
//   of my parent".  '*' means root, or 'no' index - either there is no    
//   parent of this article, or we don't have it.  In the code, '*' == -1.
//
//     |--|
//   0 |5 |
//     |--|
//   1 |* | 
//     |--|
//   2 |5 |
//     |--|
//   3 |2 |
//     |--|
//   4 |1 |
//     |--|
//   5 |* |
//     |--|
//
//   This table was computed by
//   1) sorting by Message-ID (by creating 'mid_table') with a shell sort.
//   2) use 'mid_table' to find each article's parent (using a binary search),
//   thereby creating 'parent_table'.
//   3) mid_table's not needed any longer, so we now re-use it as a sorted
//   index into parent_table.  More on this later.
//
//   What we want from thread_sort is for the empty thread_index to hold
//   an array with these articles indices in the correct order:
//
//   |--|
//   |5 |
//   |--|
//   |2 |
//   |--|
//   |3 |
//   |--|
//   |0 |
//   |--|
//   |1 |
//   |--|
//   |4 |
//   |--|
//
//   ---------------------------------------------------------------------------
//   Now, for the actual algorithm.  The work is performed by the recursive
//   function thread_sort().
//
//   thread_sort (root_index, start, end, depth) { ... }
//   (depth is used to keep track of the depth of the recursion)
//
//   1) Start with an empty index table.  start = 0 (the beginning of the
//   table), and end = length (the length of the table).
//
//   2) Find all the children of the current root, (which is '*' to start
//   with), and pack them (in order) into the bottom of the table.
//   If there are no children, return 'start'.
//   If start == end, return 'start'.
//
//   3) Now, we will recurse [call thread_root()] for each of these
//   children, using the empty portion of thread_index to work with.
//   We do this by:
//
//   3a) Move child #1 into the top slot.
//   3b) calling thread_sort, with
//   root_index = child #1
//   start = start of the empty portion
//   end = end of the empty portion
//   depth = depth+1
//   3c) After thread_sort() does its magic, it will return a 
//   new value for 'start', indicating where the 'work area'
//   can start.  thread_sort() may have filled in an arbitrary
//   number of slots in this call, but will never overstep the
//   free space.  Don't worry, it all works out.  8^)
//
//   3d) Go to child #2, repeat 3(a-c), #3, #4, etc...
//
//   4) return 'start' (the start of the empty space).
//
//   Here's a trace of the algorithm using our example articles.
//   ---------------------------------------------------------------------------
//   The number above the stack of boxes indicates the parent that
//   we're finding the children of.
//
//      *                      
//     |--|  |--|                                                
//   0 |  |  |5 |   5                                            
//     |--|  |--|  |--|  |--|                          |--|  |--|
//   1 |  |  |  |  |  |  |2 |   2                      |2 |  |2 |
//     |--|  |--|  |--|  |--|  |--|  |--|        |--|  |--|  |--|
//   2 |  |  |  |  |  |  |  |  |  |  |3 |   3    |3 |  |3 |  |3 |    ==>
//     |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|
//   3 |  |  |  |  |2 |  |  |  |3 |  |  |  |  |  |  |  |  |  |  |
//     |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|  |--|
//   4 |5 |  |  |  |0 |  |0 |                                |0 |
//     |--|  |--|  |--|  |--|                                |--|
//   5 |1 |  |1 |                                                
//     |--|  |--|                                                
//
//
//   continued...
//
//                       |--|                    |--|
//   0                   |5 |                    |5 |
//                 |--|  |--|                    |--|
//   1             |2 |  |2 |                    |2 |
//                 |--|  |--|                    |--|
//   2             |3 |  |3 |                    |3 |
//     |--|        |--|  |--|                    |--|
//   3 |0 |   0    |0 |  |0 |                    |0 |
//     |--|  |--|  |--|  |--|  |--|              |--|
//   4 |  |  |  |  |  |  |  |  |1 |   1          |1 |
//     |--|  |--|  |--|  |--|  |--|  |--|        |--|
//   5                   |1 |  |  |  |4 |   4    |4 |
//                       |--|  |--|  |--|  |--|  |--|
//
//
//   ---------------------------------------------------------------------------
//   Now that you understand the algorithm 8^), we go back to parsort_table.  
//   At the start of each call to thread_sort(), we need to find all the
//   children of root_index.  We can do this quickly by using a sorted
//   version of parent_table, called parsort_table.  It contains a
//   convenient ordered list of children for us:
//
//   x -+
//   x  | all the children of 'x' 
//   x  |                   
//   x -+                        
//   y -+           
//   y -+ all the children of 'y'
//   z -+
//   z  |
//   z  | all the children of 'z' 
//   z  | 
//   z -+
//
//   A single call (order logn) to bsearch_parsort_table puts us at the
//   correct index for finding all the children we are looking for.
//
//   parsort_table
//     |--|
//   0 |1 |
//     |--|
//   1 |5 | 
//     |--|
//   2 |4 |
//     |--|
//   3 |3 |
//     |--|
//   4 |0 |
//     |--|
//   5 |2 |
//     |--|
//
//   ---------------------------------------------------------------------------
// (SMR 950123)
// Harvey Brydon has implemented a more sophisticated threading algorithm
// that attempts to reunite 'orphaned' articles to their parent threads by
// using the subject header.  I recoded the algorithm to make it O(nlogn).
// See the comments in the function 'sort_by_threads'.  The extra code executes
// just before the 'parsort' table is created.

/*
 * Local Variables:
 * tab-width: 4
 * end:
 */
