/*
    Copyright (C) 2004  Damien Guillaume
    
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
    as published by the Free Software Foundation; either version 2
    of the License, or (at your option) any later version.
    
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/

package clustering;

import java.lang.*;
import java.util.Hashtable;
import java.util.Vector;
import java.io.*;
import java.net.*;

import xml.*;

/**
 * To get the list of documents
 *
 * @version     2.0, 15 April 2004
 * @author      Damien Guillaume
 * @see         clustering.Clustering
 * @see         clustering.Node
 * @see         clustering.Cluster
 */

public class GetNodes {

  int tmpi,tmpj; // for the quicksort
  boolean use_keywords; // use keywords to create new links
  Vector terms; // sorted vector of String (the keywords)
  Vector term_nodes; // vector of vectors of Node
  Vector tkwds, tkptr;


  private void error(String s) {
    System.err.println(s);
  }
  
  private void nvsegment(Vector v, int g, int d) {
    String p;
    String s;
    Node n;

    p = ((Node)v.elementAt((g+d)/2)).url;
    tmpi = g;
    tmpj = d;
    while (tmpi <= tmpj) {
      while (((Node)v.elementAt(tmpi)).url.compareTo(p) < 0)
	tmpi++;
      while (((Node)v.elementAt(tmpj)).url.compareTo(p) > 0)
	tmpj--;
      if (tmpi <= tmpj) {
	/*s = ((Node)v.elementAt(tmpi)).url;
	((Node)v.elementAt(tmpi)).url = ((Node)v.elementAt(tmpj)).url;
	((Node)v.elementAt(tmpj)).url = s;
	*/
	n = (Node)v.elementAt(tmpi);
	v.setElementAt((Node)v.elementAt(tmpj), tmpi);
	v.setElementAt(n, tmpj);
	tmpi++;
	tmpj--;
      }
    }
  }

  private void nvquick2(Vector v, int m, int n) {
    int i,j;

    if (m < n) {
      nvsegment(v, m, n);
      i = tmpi;
      j = tmpj;
      nvquick2(v, m, j);
      nvquick2(v, i, n);
    }
  }

  /**
   * quicksort of a Vector of Node, on the url field
   */
  private void nvquicksort(Vector v) {

    nvquick2(v, 0, v.size()-1);
  }

  /**
   * same thing as in Cluster, but using Node.ident instead of Node.url as a key
   */
  private ResiField iFieldUni(Vector v, String s) {
    int a=0;
    int b=v.size()-1;
    int m;
    int r;
    ResiField res;

    res = new ResiField();
    if (b == -1) {
      res.ind = 0;
      res.found = false;
      return(res);
    }
    while (a+1 < b) {
      m = (a+b)/2;
      r = ((Node)v.elementAt(m)).ident.compareTo(s);
      if (r < 0)
	a = m;
      else if (r > 0)
	b = m;
      else {
	res.ind = m;
	res.found = true;
	return(res);
      }
    }
    r = ((Node)v.elementAt(a)).ident.compareTo(s);
    if (r == 0) {
      res.ind = a;
      res.found = true;
      return(res);
    } else if (r > 0) {
      res.ind = a;
      res.found = false;
      return(res);
    } else {
      r = ((Node)v.elementAt(b)).ident.compareTo(s);
      if (r == 0) {
	res.ind = b;
	res.found = true;
	return(res);
      } else if (r > 0) {
	res.ind = b;
	res.found = false;
	return(res);
      } else {
	res.ind = b+1;
	res.found = false;
	return(res);
      }
    }
  }

  /**
   * search for a String by dichotomy in a sorted Vector of String
   */
  private ResiField iVString(Vector v, String s) {
    int a=0;
    int b=v.size()-1;
    int m;
    int r;
    ResiField res;

    res = new ResiField();
    if (b == -1) {
      res.ind = 0;
      res.found = false;
      return(res);
    }
    while (a+1 < b) {
      m = (a+b)/2;
      r = ((String)v.elementAt(m)).compareTo(s);
      if (r < 0)
	a = m;
      else if (r > 0)
	b = m;
      else {
	res.ind = m;
	res.found = true;
	return(res);
      }
    }
    r = ((String)v.elementAt(a)).compareTo(s);
    if (r == 0) {
      res.ind = a;
      res.found = true;
      return(res);
    } else if (r > 0) {
      res.ind = a;
      res.found = false;
      return(res);
    } else {
      r = ((String)v.elementAt(b)).compareTo(s);
      if (r == 0) {
	res.ind = b;
	res.found = true;
	return(res);
      } else if (r > 0) {
	res.ind = b;
	res.found = false;
	return(res);
      } else {
	res.ind = b+1;
	res.found = false;
	return(res);
      }
    }
  }

  public synchronized void addTermNode(String term, Node node) {
    int i,noterm=-1;
    boolean found;
    Vector vnodes;
    ResiField res;
    
    /*for (i=0, found=false; (i<terms.size())&&(!found); i++)
      if (term.equals((String)terms.elementAt(i))) {
        found = true;
        noterm = i;
      }*/ // sorted now:
    term = term.toUpperCase();
    res = iVString(terms, term);
    noterm = res.ind;
    if (res.found) {
      vnodes = (Vector)term_nodes.elementAt(noterm);
      for (i=0, found=false; (i<vnodes.size())&&(!found); i++)
        if (node == (Node)vnodes.elementAt(i))
          found = true;
      if (!found)
        vnodes.addElement(node);
    } else {
      //terms.addElement(term);
      terms.insertElementAt(term, noterm);
      vnodes = new Vector();
      vnodes.addElement(node);
      //term_nodes.addElement(vnodes);
      term_nodes.insertElementAt(vnodes, noterm);
    }
    
    // add this keyword in our vocabulary, to search in the title
    res = iVString(tkwds, term);
    if (!res.found) {
      tkwds.insertElementAt(term, res.ind);
      tkptr.insertElementAt(term, res.ind);
    }
  }

  public synchronized void addOtherTerms(String s, Node node) {
    int i;
    String term;
    String s_up;
    int ind;
    
    s_up = s.toUpperCase();
    for (i=0; i<tkwds.size(); i++) {
      term = (String)tkwds.elementAt(i); // term is already upcase
      ind = s_up.indexOf(term);
      if (ind != -1) {
        if (((ind == 0) || (!Character.isLetter(s.charAt(ind-1)))) &&
          ((ind+term.length() == s.length()) || 
            (!Character.isLetter(s.charAt(ind+term.length()))))) {
          addTermNode(term, node);
          //System.out.println("found keyword "+term+" in the title: "+s);
        }
      }
    }
  }

  public synchronized void clusWait() {
    try {
      wait(400);
    } catch (InterruptedException e) { }
  }
  
/**
 * get all nodes from a document list
 */
  public void getAllNodesFromDoclist(Vector allNodes, File doclistFile) {
    XMLTree xmltree = new XMLTree(null, "", null, "");
    InputStream is = null;
    try {
        is = new FileInputStream(doclistFile);
    } catch (FileNotFoundException ex) {
        error("FileNotFoundException: " + ex.getMessage());
        return;
    }
    Parser_XML p = new Parser_XML(is, System.out, xmltree);
    p.parse(0);
    
    if ((xmltree == null) || (!xmltree.tag.equals("DOCLIST"))) {
        error("error reading DOCLIST file " + doclistFile.getPath());
        return;
    }
    
    Hashtable doctrees = new Hashtable();
    Hashtable idurls = new Hashtable();
    
    for (XMLTree doctree=xmltree.first_child; doctree!=null; doctree=doctree.next_brother) {
        String docid = doctree.getAttVal("id");
        XMLTree titleNode = doctree.getNode("TITLE");
        String title = null;
        if (titleNode != null)
            title = titleNode.val;
        
        XMLTree urlNode = doctree.getNode("URL");
        String url = null;
        if (urlNode != null)
            url = urlNode.val;
        XMLTree fileNode = doctree.getNode("FILE");
        if (fileNode != null) {
            if (url != null)
                System.err.println("Warning: URL and FILE are both defined for '" + title + "'");
            url = "file://" + fileNode.val;
        }
        
        Node nn = new Node(url);
        nn.title = title;
        allNodes.addElement(nn);
        doctrees.put(nn, doctree);
        idurls.put(docid, url);
        
        if (use_keywords) {
            XMLTree kwdTree = doctree.getNode("KEYWORDS");
            if (kwdTree != null) {
                for (XMLTree node1=kwdTree.first_child; node1!=null; node1=node1.next_brother)
                    addTermNode(node1.val, nn);
            }
        }
    }
    
    nvquicksort(allNodes);
    
    for (int i=0; i<allNodes.size(); i++) {
        Node nn = (Node)allNodes.elementAt(i);
        XMLTree doctree = (XMLTree)doctrees.get(nn);
        
        if (use_keywords) {
            XMLTree kwdTree = doctree.getNode("KEYWORDS");
            if (kwdTree != null) {
                for (XMLTree node1=kwdTree.first_child; node1!=null; node1=node1.next_brother)
                    addOtherTerms(node1.val, nn); // add keywords within the keyword
            }
        }
        
        addOtherTerms(nn.title, nn); // add keywords within the title
        
        XMLTree linksTree = doctree.getNode("LINKS");
        if (linksTree != null) {
            for (XMLTree linkn=linksTree.first_child; linkn!=null; linkn=linkn.next_brother) {
                String toid = linkn.getAttVal("toid");
                String role = linkn.getAttVal("role");
                String tourl = (String)idurls.get(toid);
                ResiField res = Cluster.iField(allNodes, tourl);
                if (res.found) {
                    nn.prelinks.addElement((Node)allNodes.elementAt(res.ind));
                    nn.preroles.addElement(role);
                } else
                    System.err.println("link target not found: " + toid);
            }
        }
    }
    return;
  }
}
