/*
    Copyright (C) 2004  Damien Guillaume
    
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
    as published by the Free Software Foundation; either version 2
    of the License, or (at your option) any later version.
    
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/

package clustering;

import java.io.*;
import java.net.*;
import java.util.Vector;
import java.awt.*;
import java.awt.event.*;
import java.awt.image.*;

import xml.*;

import dispgrid.GridFrame;


/**
 * An application to do clustering using links and keywords, and
 * to reduce to a 2D grid and display the result.<br>
 *<br>
 * Input: list of documents (DOCLIST XML document)<br>
 * Output: grid of clusters (GRID XML document)<br>
 *<br>
 * Usage: java clustering.Clustering doclist.xml grid.xml<br>
 *<br>
 * initial version for AML documents, Damien Guillaume, 21/5/98<br>
 * changed to use any collection of documents, Damien Guillaume 4/2004<br>
 *
 * @version     2.0, 15 April 2004
 * @author      Damien Guillaume
 * @see         clustering.GetNodes
 * @see         clustering.NoisingPartitioning
 * @see         clustering.Node
 * @see         clustering.Cluster
 * @see         xml.XMLTree
 * @see         dispgrid.GridFrame
 */

public class Clustering extends Frame implements ActionListener {

  final int ALGO_PARTITIONING = 3;
  // 2 = noisingPartitioning, 3 = recursPartitioning
  final static int DIMX = 4;
  final static int DIMY = 4;
  int nb_clusters;
  Cluster[] clusters;
  Vector allNodes;
  Cluster grid[][] = null;
  boolean inited = false;
  int tmpi,tmpj; // for the quicksort
  Choice keywords_choice;
  Choice delnodes_choice;
  TextField doclist_field;
  String doclist;
  boolean use_keywords; // use keywords to create new links
  boolean del_nodes;
  int del_nb_links;
  Vector terms; // sorted vector of String (the keywords)
  Vector term_nodes; // vector of vectors of Node
  final static String KEYWORDS_URL = "http://server_to_define/dir/keywords_uni_3";
  Vector tkwds, tkptr;
  boolean delfirst;
  Checkbox delfirst_chk;
  Vector keyword_nodes=null;
  Label progress_lab;
  Button bok;
  String gridFilename = "grid.xml";

  // final Strings for roles, to save up memory:
  final static String OTHER_DATA = "other_data";
  final static String INCLUDES = "includes";
  final static String IS_INCLUDED_IN = "is_included_in";
  final static String IS_AN_IMAGE_OF = "is_an_image_of";
  final static String HAS_FOR_IMAGE = "has_for_image";
  final static String HAS_FOR_AUTHOR = "has_for_author";
  final static String IS_AUTHOR_OF = "is_author_of";
  final static String IS_STUDYING = "is_studying";
  final static String BEING_STUDIED_BY = "being_studied_by";
  final static String HAS_FOR_REF = "has_for_ref";
  final static String IS_REF_OF = "is_ref_of";
  final static String KEYWORD = "KEYWORD";


  private void error(String s) {
    if (inited) {
      System.err.println(s);
      add(new Label(s, Label.LEFT));
      validate();
    } else
      System.err.println(s);
  }
  
  public void init() {
    GridBagLayout gridBag;
    GridBagConstraints c;
    Panel cp;

    gridBag = new GridBagLayout();
    c = new GridBagConstraints();
    c.gridwidth = GridBagConstraints.REMAINDER; //end row

    setLayout(gridBag);
    setFont(new Font("Helvetica", Font.PLAIN, 14));
    
    cp = new Panel();
    cp.setLayout(new FlowLayout());
    Label lab = new Label("doclist file:", Label.CENTER);
    cp.add(lab);
    doclist_field = new TextField("", 38);
    cp.add(doclist_field);
    Button bdoclist = new Button("Open...");
    cp.add(bdoclist);
    bdoclist.setActionCommand("open");
    bdoclist.addActionListener(this);
    gridBag.setConstraints(cp, c);
    add(cp);

    cp = new Panel();
    cp.setLayout(new FlowLayout());
    keywords_choice = new Choice();
    keywords_choice.add("use only the links");
    keywords_choice.add("use links and kwds");
    keywords_choice.select(1);
    cp.add(keywords_choice);

    delnodes_choice = new Choice();
    delnodes_choice.add("keep all nodes");
    delnodes_choice.add("del nodes with < 2 links");
    delnodes_choice.add("del nodes with < 3 links");
    cp.add(delnodes_choice);
    
    delfirst_chk = new Checkbox("Delete first doc");
    cp.add(delfirst_chk);

    bok = new Button("Go!");
    cp.add(bok);
    bok.setActionCommand("OK");
    bok.addActionListener(this);
    gridBag.setConstraints(cp, c);
    add(cp);
    
    progress_lab = new Label("-------------------------------------");
    gridBag.setConstraints(progress_lab, c);
    add(progress_lab);
    
    addWindowListener(new WindowAdapter() {
      public void windowClosing(WindowEvent e) {
	e.getWindow().setVisible(false);
	e.getWindow().dispose();
	System.exit(0);
      }
    });
    
    pack();
    
    inited = true;
  }


  public void progress(String s) {
    progress_lab.setText(s);
    repaint();
  }
  
  
  public static float strength(String role) {
    if (role == null)
      return(1);
    if (role.equals(OTHER_DATA))
      return(5);
    if (role.equals(INCLUDES) || role.equals(IS_INCLUDED_IN))
      return(4);
    if (role.equals(IS_AN_IMAGE_OF) || role.equals(HAS_FOR_IMAGE))
      return(4);
    if (role.equals(HAS_FOR_AUTHOR) || role.equals(IS_AUTHOR_OF))
      return(3);
    if (role.equals(IS_STUDYING) || role.equals(BEING_STUDIED_BY))
      return(2);
    if (role.equals(HAS_FOR_REF) || role.equals(IS_REF_OF))
      return(2);
    if (role.startsWith(KEYWORD))
      return(Float.valueOf(role.substring(7)).floatValue());
    return(1);
  }


  /**
   * search for a String by dichotomy in a sorted Vector of String
   */
  private ResiField iVString(Vector v, String s) {
    int a=0;
    int b=v.size()-1;
    int m;
    int r;
    ResiField res;

    res = new ResiField();
    if (b == -1) {
      res.ind = 0;
      res.found = false;
      return(res);
    }
    while (a+1 < b) {
      m = (a+b)/2;
      r = ((String)v.elementAt(m)).compareTo(s);
      if (r < 0)
	a = m;
      else if (r > 0)
	b = m;
      else {
	res.ind = m;
	res.found = true;
	return(res);
      }
    }
    r = ((String)v.elementAt(a)).compareTo(s);
    if (r == 0) {
      res.ind = a;
      res.found = true;
      return(res);
    } else if (r > 0) {
      res.ind = a;
      res.found = false;
      return(res);
    } else {
      r = ((String)v.elementAt(b)).compareTo(s);
      if (r == 0) {
	res.ind = b;
	res.found = true;
	return(res);
      } else if (r > 0) {
	res.ind = b;
	res.found = false;
	return(res);
      } else {
	res.ind = b+1;
	res.found = false;
	return(res);
      }
    }
  }


  private void putClusterInTree(Cluster ag, XMLTree tree) {
    Vector attl;
    Attribute att;
    int i;
    XMLTree space;

    if (ag != null) {
      attl = new Vector();
      att = new Attribute();
      att.name = "x";
      att.val = ""+ag.x;
      attl.addElement(att);
      att = new Attribute();
      att.name = "y";
      att.val = ""+ag.y;
      attl.addElement(att);
      att = new Attribute();
      att.name = "centerURL";
      att.val = ag.centerURL;
      attl.addElement(att);
      att = new Attribute();
      att.name = "title";
      att.val = ag.title;
      attl.addElement(att);
      space = tree.addChild("CLUSTER",attl,"");
      if (ag.subClusters == null) {
	for (i=0; i<ag.nodes.size(); i++) {
	  Node thisnode = (Node)ag.nodes.elementAt(i);
	  attl = new Vector();
	  att = new Attribute();
	  att.name = "url";
	  att.val = thisnode.url;
	  attl.addElement(att);
	  att = new Attribute();
	  att.name = "title";
	  att.val = thisnode.title;
	  attl.addElement(att);
	  space.addChild("NODE",attl,"");
	}
      } else {
	for (i=0; i<ag.subClusters.length; i++)
	  putClusterInTree(ag.subClusters[i], space);
      }
    }
  }

  private void saveGrid() {
    String res;
    XMLTree tree;
    int i,x,y;
    Cluster ag;
    Vector vatt;

    grid = new Cluster[DIMX][DIMY];
    for (x=0; x<DIMX; x++)
      for (y=0; y<DIMY; y++)
	grid[x][y] = null;
    for (i=0; i < clusters.length; i++)
      if (clusters[i] != null)
        grid[clusters[i].x][clusters[i].y] = clusters[i];
      else
        System.out.println("Cluster number "+i+" is null !");

    vatt = new Vector();
    vatt.addElement(new Attribute("dimx", String.valueOf(DIMX)));
    vatt.addElement(new Attribute("dimy", String.valueOf(DIMY)));
    
    tree = new XMLTree(null,"GRID",vatt,"");
    for (x=0; x<DIMX; x++)
      for (y=0; y<DIMY; y++) {
	ag = grid[x][y];
	putClusterInTree(ag, tree);
      }

    System.out.println("Saving grid to "+gridFilename);
    //tree.writeTree(System.out);

    try {
      OutputStream in = new FileOutputStream(gridFilename);
      PrintWriter bufin = new PrintWriter(new OutputStreamWriter(in));
      tree.writeTree(in, "GRID");
      bufin.close();
    } catch (Exception e) {
      System.err.println("exception: " + e.getMessage());
	e.printStackTrace();
    }
    
    GridFrame gf;
    gf = new GridFrame(null);
    gf.init();
    gf.changeGridClusters(tree);
  }

  private double sqr(double x) {
    return(x*x);
  }
  
  /**
   * the center of a Cluster become the node with the most links
   * this procedure is used to find the cluster titles: a title can
   * be either a node name or a keyword
   */
  private void modifClusterCenters(Cluster[] clusters) {
    int i,j,k;
    int nblinks;
    int nobest;
    Cluster ag;
    Node aNode;
    double fmaxlinks1, fmaxlinks2, links_ratio;

    for (i=0; i<nb_clusters; i++) {
      ag = clusters[i];
      if (ag.nodes.size() == 0) {
	ag.centerURL = "";
	ag.title = ""; // will be an empty cluster title
      } else {
        // score for the links
	fmaxlinks1 = -1.0;
	nobest = -1;
	for (j=0; j<ag.nodes.size(); j++) {
          aNode = (Node)ag.nodes.elementAt(j);
          // we keep only the links in the same agent
          links_ratio = 0.0;
          for (k=0; k<aNode.links.size(); k++)
            if (((Node)aNode.links.elementAt(k)).master == ag)
              links_ratio += ((MyFloat)aNode.roles.elementAt(k)).val;
	  if (links_ratio > fmaxlinks1) {
	    fmaxlinks1 = links_ratio; // (note: we don't use Node.count here)
	    nobest = j;
	  }
        }
        if (nobest == -1)
          aNode = (Node)ag.nodes.elementAt(0);
        else
	  aNode = ((Node)ag.nodes.elementAt(nobest));
	ag.centerURL = aNode.url;
	if (!aNode.title.equals(""))
	  ag.title = aNode.title;
	else
	  ag.title = aNode.ident;
        if (use_keywords) {
        // score for the keywords
          Vector vnodes;
          
	  fmaxlinks2 = 0.0;
	  nobest = -1;
          for (j=0; j<term_nodes.size(); j++) {
            nblinks = 0;
            vnodes = (Vector)term_nodes.elementAt(j);
            if (vnodes.size() > 1) {
	      for (k=0; k<ag.nodes.size(); k++)
                if (vnodes.contains(ag.nodes.elementAt(k)))
                  nblinks++;
              if (nblinks > 1) {
                links_ratio = (double)nblinks / vnodes.size();
                if (vnodes.size() < ag.nodes.size())
                  links_ratio *= 1 - sqr(vnodes.size()-ag.nodes.size())/sqr(ag.nodes.size());
                if (links_ratio > fmaxlinks2) {
                  fmaxlinks2 = links_ratio;
                  nobest = j;
                }
              }
            }
          }
          // if the keyword is important, it becomes the cluster title
	  if ((nobest != -1) && (fmaxlinks2*2 > fmaxlinks1/ag.nodes.size())) {
            ag.title = (String)terms.elementAt(nobest);
            ag.centerURL = "";
          }
        }
      }
      if (ag.subClusters != null)
        modifClusterCenters(ag.subClusters);
    }
  }

/* initial keyword list unused
  private void createKeywordsTable(Vector tkwds, Vector tkptr) {
    URL theURL;
    String line;
    ResiField res;
    int i;
    String kwd, kwd1=null;
    boolean isfirst;
    
    try {
      theURL = new URL(KEYWORDS_URL);
    } catch(Exception e) {
      error("Error in creating the URL connection to get the keywords synonyms table");
      return;
    }

    try{
      URLConnection connection = theURL.openConnection();
      InputStream out = connection.getInputStream();
      BufferedReader buf = new BufferedReader(new InputStreamReader(out));
      line = buf.readLine();
      while (line != null) {
        isfirst = true;
        for (; line != null; ) {
          i = line.indexOf(',');
          if (i == -1)
            i = line.length();
          kwd = line.substring(0,i);
          kwd = kwd.toUpperCase();
          if (isfirst) {
            kwd1 = kwd;
            isfirst = false;
          }
          res = iVString(tkwds, kwd);
          if (!res.found) {
            tkwds.insertElementAt(kwd, res.ind);
            tkptr.insertElementAt(kwd1, res.ind);
          } else
            System.err.println("Keyword "+kwd+" found 2 times in the table ?!?");
          if (i != line.length())
            line = line.substring(i+2);
          else
            line = null;
        }
        line = buf.readLine();
      }
      buf.close();
    } catch (Exception e) {
      error("Error while reading the keywords synonyms table");
      return;
    }
  }
*/
  
  private void addKeywordLinks(Vector allNodes) {
    int i,j;
    Vector vnodes;
    String role;
    Node node1,n;
    ResiField res,res1;
    String kwd,kwd1;
    Vector vnodes1;
    int nb_keywords;
    
    // checking some consistency (probably useful here)
    for (i=0; i<terms.size(); i++) {
      vnodes = (Vector)term_nodes.elementAt(i);
      for (j=0; j<vnodes.size(); j++)
        if (!allNodes.contains((Node)vnodes.elementAt(j))) {
          System.err.println("Node "+((Node)vnodes.elementAt(j)).ident+" in term_nodes but not in allNodes!!!");
          vnodes.removeElementAt(j);
          j--;
        }
      if (vnodes.size() == 0) {
        term_nodes.removeElementAt(i);
        terms.removeElementAt(i);
        i--;
      }
    }

    for (i=0; i<terms.size(); i++) {
      kwd = (String)terms.elementAt(i);
      res = iVString(tkwds, kwd);
      if (res.found) {
        kwd1 = (String)tkptr.elementAt(res.ind);
        if (!kwd.equals(kwd1)) {
          vnodes = (Vector)term_nodes.elementAt(i);
          terms.removeElementAt(i);
          term_nodes.removeElementAt(i);
          res1 = iVString(terms, kwd1);
          i--;
          if (res1.found) {
            vnodes1 = (Vector)term_nodes.elementAt(res1.ind);
            for (j=0; j<vnodes.size(); j++) {
              node1 = (Node)vnodes.elementAt(j);
              if (!vnodes1.contains(node1))
                vnodes1.addElement(node1);
            }
          } else {
            terms.insertElementAt(kwd1, res1.ind);
            term_nodes.insertElementAt(vnodes, res1.ind);
            if (res1.ind < i)
              i++;
          }
          System.out.println(kwd+" changed to "+kwd1);
        }
      }
    }
    
    nb_keywords = 0;
    System.out.println("Keywords: ");
    keyword_nodes = new Vector();
    for (i=0; i<terms.size(); i++) {
      vnodes = (Vector)term_nodes.elementAt(i);
      if (vnodes.size() > 1) {
        System.out.print(((String)terms.elementAt(i)) + 
          "(" + vnodes.size() + "); ");
        nb_keywords++;
        // add a fake node for this keyword
        n = new Node(KEYWORD + (String)terms.elementAt(i));
        n.title = n.url;
        n.fake = true;
        keyword_nodes.addElement(n);
        res = Cluster.iField(allNodes, n.url);
	if (!res.found) {
          allNodes.insertElementAt(n, res.ind);
          // and link to all nodes with this keyword
          for (j=0; j<vnodes.size(); j++) {
            node1 = (Node)vnodes.elementAt(j);
            if (vnodes.size() > allNodes.size()) {
              System.err.println("Error: vnodes > allNodes");
            } else {
              role = KEYWORD + String.valueOf(1.0/vnodes.size());
              node1.addPrelinkrole(n, role);
              n.addPrelinkrole(node1, role);
            }
          }
        }
      }
    }
    System.out.println("\n"+nb_keywords+" keywords.");
  }
  
  private void checkLinksSymetry(Vector allNodes) {
    int i,j,k;
    Node iNode, jNode;
    
    System.out.println("checking prelinks symetry");
    for (i=0; i<allNodes.size(); i++) {
      iNode = (Node)allNodes.elementAt(i);
      for (j=0; j<iNode.prelinks.size(); j++) {
	jNode = (Node)iNode.prelinks.elementAt(j);
	k = jNode.prelinks.indexOf(iNode);
	if (k == -1) {
	  //System.err.println("No link from "+jNode.ident+" to "+iNode.ident+", but one in the other direction !!! ...adding the link...");
	  jNode.prelinks.addElement(iNode);
	  jNode.preroles.addElement( Cluster.inv_role((String)iNode.preroles.elementAt(j)) );
	}
      }
    }
  }
  
  private void clean_clusters(Cluster[] clusters) {
    int i,j;
    Cluster ag;
    
    for (i=0; i<nb_clusters; i++) {
      ag = clusters[i];
      for (j=0; j<ag.nodes.size(); j++)
        if (((Node)ag.nodes.elementAt(j)).fake) {
          ag.nodes.removeElementAt(j);
          j--;
        }
      if (ag.subClusters != null)
        clean_clusters(ag.subClusters);
    }
  }
  
  private void del_keyword_nodes(Vector allNodes, Cluster[] clusters) {
    int i,j;
    ResiField res;
    Node n,n2;
    
    for (i=0; i<allNodes.size(); i++) {
      n = (Node)allNodes.elementAt(i);
      for (j=0; j<n.links.size(); j++) {
        n2 = (Node)n.links.elementAt(j);
        if (n2.fake) {
          n.links.removeElementAt(j);
          n.roles.removeElementAt(j);
          j--;
        }
      }
    }
    for (i=0; i<allNodes.size(); i++) {
      n = (Node)allNodes.elementAt(i);
      if (n.fake) {
        allNodes.removeElementAt(i);
        i--;
      }
    }
    clean_clusters(clusters);
  }

  private void deleteNodes(Vector allNodes) {
    int i,j;
    Node n,n2;
    Vector vnodes;
    
    checkLinksSymetry(allNodes);
    
    for (i=0; i<allNodes.size(); i++)
      ((Node)allNodes.elementAt(i)).mark = false;
      
    if (del_nodes) {
      System.out.println("deleting nodes with less than "+del_nb_links+" links...");
      for (i=0; i<allNodes.size(); i++) {
        n = (Node)allNodes.elementAt(i);
        if (n.prelinks.size() < del_nb_links)
          n.mark = true;
      }
    }
      
    for (i=0; i<allNodes.size(); i++) {
      n = (Node)allNodes.elementAt(i);
      for (j=0; j<n.prelinks.size(); j++) {
        n2 = (Node)n.prelinks.elementAt(j);
        if (n2.mark) {
          n.prelinks.removeElementAt(j);
          n.preroles.removeElementAt(j);
          j--;
        }
      }
    }
    if (use_keywords) {
      for(i=0; i<term_nodes.size(); i++) {
        vnodes = (Vector)term_nodes.elementAt(i);
        for (j=0; j<vnodes.size(); j++)
          if (((Node)vnodes.elementAt(j)).mark) {
            vnodes.removeElementAt(j);
            j--;
          }
        if (vnodes.size() == 0) {
          terms.removeElementAt(i);
          term_nodes.removeElementAt(i);
          i--;
        }
      }
    }
    for (i=0; i<allNodes.size(); i++) {
      n = (Node)allNodes.elementAt(i);
      if (n.mark) {
        allNodes.removeElementAt(i);
        i--;
      }
    }
    System.out.println("There are "+allNodes.size()+" documents left.");
    
    // checking some consistency (maybe useless)
    for (i=0; i<allNodes.size(); i++) {
      n = (Node)allNodes.elementAt(i);
      if (n.mark)
        System.err.println("n.mark!!!");
      for (j=0; j<n.prelinks.size(); j++) {
        n2 = (Node)n.prelinks.elementAt(j);
        if (n2.mark)
          System.err.println("n2.mark!!!");
        if (!allNodes.contains(n2)) {
          System.err.println("Node "+n2.ident+" in a link but not in allNodes!!!");
          n.prelinks.removeElementAt(j);
          n.preroles.removeElementAt(j);
          j--;
        }
      }
    }
    
  }
  
// kind of "main" procedure
  public void letsgo() {
    
    if (doclist == null || "".equals(doclist))
        return;
    
    Vector stopURL; // URLs not to read again (vector of String[2])
                    // (to avoid pb with people with different names...)
    Vector unique; // second index, sorted with Node.ident,
                   //to check for unicity
    int i,j;
    GetNodes gn = new GetNodes();
    
    setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
    
    gn.use_keywords = use_keywords;
    gn.terms = terms;
    gn.term_nodes = term_nodes;
    gn.tkwds = tkwds;
    gn.tkptr = tkptr;
    allNodes = new Vector();
    unique = new Vector();
    stopURL = new Vector();
    gn.getAllNodesFromDoclist(allNodes, new File(doclist));
    if (allNodes.size() == 0)
      return;
    // we no longer need the unique index
    unique = null;
    /*
    System.out.println("After the quicksort:");
    for (i=0; i<allNodes.size(); i++) {
      System.out.println(((Node)allNodes.elementAt(i)).ident+" "+((Node)allNodes.elementAt(i)).url);
    }
    */
    System.out.println("There are " + allNodes.size() + " documents.");

    if (allNodes.size() < DIMX*DIMY) {
      System.err.println("allNodes.size() < nb_clusters !!!");
      setCursor(Cursor.getDefaultCursor());
      return;
    }

    if (del_nodes)
      deleteNodes(allNodes);
        
    nb_clusters = DIMX * DIMY;
    clusters = new Cluster[DIMX*DIMY];
    /* nb_clusters is not used in noisingPartitioning but is used in 
        sortByAmlobj and modifClusterCenters */
    
    if (use_keywords)
        addKeywordLinks(allNodes);
    
    Node iNode, jNode;
    int k;
    
    //check links unicity & add link weigths
    System.out.println("checking links unicity");
    for (i=0; i<allNodes.size(); i++) {
    iNode = (Node)allNodes.elementAt(i);
    if (iNode.prelinks != null)
        for (j=0; j<iNode.prelinks.size(); j++) {
        jNode = (Node)iNode.prelinks.elementAt(j);
        for (k=j+1; k<iNode.prelinks.size(); k++)
            if (jNode == (Node)iNode.prelinks.elementAt(k)) {
            //System.err.println("Link duplicated from "+iNode.ident+" to "+jNode.ident+" ...deleting one...");
            String role1 = (String)iNode.preroles.elementAt(j);
            String role2 = (String)iNode.preroles.elementAt(k);
            if (role1 != null && !role1.equals(role2)) {
                // in this case, there are 2 links with a different meaning, and we want to keep both weights
                String newrole = KEYWORD + String.valueOf(strength(role1) + strength(role2));
                iNode.preroles.setElementAt(newrole, j);
                System.out.println("merged 2 links between "+iNode.ident+
                " and "+jNode.ident);
            }
            iNode.prelinks.removeElementAt(k);
            iNode.preroles.removeElementAt(k);
            k--;
            }
        }
    }
    //System.out.println("prelinks -> links");
    for (i=0; i<allNodes.size(); i++) {
    iNode = (Node)allNodes.elementAt(i);
    if (iNode.prelinks != null)
        iNode.links = iNode.prelinks;
    if (iNode.preroles != null)
        for (j=0; j<iNode.preroles.size(); j++)
        iNode.roles.addElement(new MyFloat(strength((String)iNode.preroles.elementAt(j))));
    iNode.prelinks = null;
    iNode.preroles = null;
    }
        
    float init_alpha;
    /*if (use_keywords)
    init_alpha = (float)0.20;
    else
    init_alpha = (float)0.25;*/
    init_alpha = (float)2.0; // for a big collection with only keyword links (less otherwise)
    
    NoisingPartitioning np = new NoisingPartitioning(this, ALGO_PARTITIONING, allNodes,
        clusters, init_alpha, use_keywords);
    
    bok.setEnabled(false);
    np.start();
  }
  
  public void endClustering() {
    if (use_keywords && (keyword_nodes != null))
        del_keyword_nodes(allNodes, clusters);

    modifClusterCenters(clusters);

    // partitioning results
    /*for (i=0; i<nb_clusters; i++) {
    System.out.println("Cluster "+i+", "+clusters[i].title+" at ("+clusters[i].x+","+clusters[i].y+") has for nodes:");
    for (j=0; j<clusters[i].nodes.size(); j++)
        System.out.println(((Node)clusters[i].nodes.elementAt(j)).url);
    if (clusters[i].links.size() > 0)
        System.out.println("... and for links:");
    for (j=0; j<clusters[i].links.size(); j++)
        System.out.println(((Cluster)clusters[i].links.elementAt(j)).centerURL);
    }*/

    saveGrid();

    bok.setEnabled(true);
    setCursor(Cursor.getDefaultCursor());
  }
  
  public void openDlg() {
    FileDialog dlg = new FileDialog(this, "Open document list", FileDialog.LOAD);
    dlg.show();
    String sf = dlg.getFile();
    String sdir = dlg.getDirectory();
    if (sf != null) {
        File f;
        if (sdir != null)
            f = new File(sdir, sf);
        else
            f = new File(sf);
        doclist_field.setText(f.getPath());
    }
  }
  
  public void actionPerformed(ActionEvent event) {
    String command = event.getActionCommand();

    System.out.println("command: "+command);
    if (command.equals("OK")) {
      if (keywords_choice.getSelectedIndex()==0)
        use_keywords = false;
      else {
        use_keywords = true;
        terms = new Vector();
        term_nodes = new Vector();
        tkwds = new Vector();
        tkptr = new Vector();
        //createKeywordsTable(tkwds, tkptr); initial keyword list unused
      }
      if (delnodes_choice.getSelectedIndex()==0)
        del_nodes = false;
      else {
        del_nodes = true;
        del_nb_links = delnodes_choice.getSelectedIndex()+1;
      }
      doclist = doclist_field.getText();
      delfirst = delfirst_chk.getState();
      letsgo();
    } else if (command.equals("open")) {
        openDlg();
    }
  }

  public static void main(String[] args) {
    Clustering clus = new Clustering();
    clus.init();
    clus.show();
    if (args.length > 0) {
        clus.doclist = args[0];
        if (args.length > 1)
            clus.gridFilename = args[1];
        clus.use_keywords = true;
        clus.terms = new Vector();
        clus.term_nodes = new Vector();
        clus.tkwds = new Vector();
        clus.tkptr = new Vector();
        clus.del_nodes = false;
        clus.letsgo();
    }
  }

}
