Bug 2273: Removed unbuilt third-party code.
[controller.git] / third-party / net.sf.jung2 / src / main / java / edu / uci / ics / jung / algorithms / cluster / VoltageClusterer.java
diff --git a/third-party/net.sf.jung2/src/main/java/edu/uci/ics/jung/algorithms/cluster/VoltageClusterer.java b/third-party/net.sf.jung2/src/main/java/edu/uci/ics/jung/algorithms/cluster/VoltageClusterer.java
deleted file mode 100644 (file)
index 859c063..0000000
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * Copyright (c) 2004, the JUNG Project and the Regents of the University
- * of California
- * All rights reserved.
- *
- * This software is open-source under the BSD license; see either
- * "license.txt" or
- * http://jung.sourceforge.net/license.txt for a description.
- *
- * Created on Aug 12, 2004
- */
-package edu.uci.ics.jung.algorithms.cluster;
-
-import edu.uci.ics.jung.algorithms.scoring.VoltageScorer;
-import edu.uci.ics.jung.algorithms.util.DiscreteDistribution;
-import edu.uci.ics.jung.algorithms.util.KMeansClusterer;
-import edu.uci.ics.jung.algorithms.util.KMeansClusterer.NotEnoughClustersException;
-import edu.uci.ics.jung.graph.Graph;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.Set;
-
-/**
- * <p>Clusters vertices of a <code>Graph</code> based on their ranks as
- * calculated by <code>VoltageScorer</code>.  This algorithm is based on,
- * but not identical with, the method described in the paper below.
- * The primary difference is that Wu and Huberman assume a priori that the clusters
- * are of approximately the same size, and therefore use a more complex
- * method than k-means (which is used here) for determining cluster
- * membership based on co-occurrence data.</p>
- *
- * <p>The algorithm proceeds as follows:
- * <ul>
- * <li/>first, generate a set of candidate clusters as follows:
- *      <ul>
- *      <li/>pick (widely separated) vertex pair, run VoltageScorer
- *      <li/>group the vertices in two clusters according to their voltages
- *      <li/>store resulting candidate clusters
- *      </ul>
- * <li/>second, generate k-1 clusters as follows:
- *      <ul>
- *      <li/>pick a vertex v as a cluster 'seed'
- *           <br>(Wu/Huberman: most frequent vertex in candidate clusters)
- *      <li/>calculate co-occurrence over all candidate clusters of v with each other
- *           vertex
- *      <li/>separate co-occurrence counts into high/low;
- *           high vertices constitute a cluster
- *      <li/>remove v's vertices from candidate clusters; continue
- *      </ul>
- * <li/>finally, remaining unassigned vertices are assigned to the kth ("garbage")
- * cluster.
- * </ul></p>
- *
- * <p><b>NOTE</b>: Depending on how the co-occurrence data splits the data into
- * clusters, the number of clusters returned by this algorithm may be less than the
- * number of clusters requested.  The number of clusters will never be more than
- * the number requested, however.</p>
- *
- * @author Joshua O'Madadhain
- * @see "'Finding communities in linear time: a physics approach', Fang Wu and Bernardo Huberman, http://www.hpl.hp.com/research/idl/papers/linear/"
- * @see VoltageScorer
- * @see KMeansClusterer
- */
-public class VoltageClusterer<V,E>
-{
-    protected int num_candidates;
-    protected KMeansClusterer<V> kmc;
-    protected Random rand;
-    protected Graph<V,E> g;
-
-    /**
-     * Creates an instance of a VoltageCluster with the specified parameters.
-     * These are mostly parameters that are passed directly to VoltageScorer
-     * and KMeansClusterer.
-     *
-     * @param num_candidates    the number of candidate clusters to create
-     */
-    public VoltageClusterer(Graph<V,E> g, int num_candidates)
-    {
-        if (num_candidates < 1)
-            throw new IllegalArgumentException("must generate >=1 candidates");
-
-        this.num_candidates = num_candidates;
-        this.kmc = new KMeansClusterer<V>();
-        rand = new Random();
-        this.g = g;
-    }
-
-    protected void setRandomSeed(int random_seed)
-    {
-        rand = new Random(random_seed);
-    }
-
-    /**
-     * Returns a community (cluster) centered around <code>v</code>.
-     * @param v the vertex whose community we wish to discover
-     */
-    public Collection<Set<V>> getCommunity(V v)
-    {
-        return cluster_internal(v, 2);
-    }
-
-    /**
-     * Clusters the vertices of <code>g</code> into
-     * <code>num_clusters</code> clusters, based on their connectivity.
-     * @param num_clusters  the number of clusters to identify
-     */
-    public Collection<Set<V>> cluster(int num_clusters)
-    {
-        return cluster_internal(null, num_clusters);
-    }
-
-    /**
-     * Does the work of <code>getCommunity</code> and <code>cluster</code>.
-     * @param origin the vertex around which clustering is to be done
-     * @param num_clusters the (maximum) number of clusters to find
-     */
-    protected Collection<Set<V>> cluster_internal(V origin, int num_clusters)
-    {
-        // generate candidate clusters
-        // repeat the following 'samples' times:
-        // * pick (widely separated) vertex pair, run VoltageScorer
-        // * use k-means to identify 2 communities in ranked graph
-        // * store resulting candidate communities
-        ArrayList<V> v_array = new ArrayList<V>(g.getVertices());
-
-        LinkedList<Set<V>> candidates = new LinkedList<Set<V>>();
-
-        for (int j = 0; j < num_candidates; j++)
-        {
-            V source;
-            if (origin == null)
-                source = v_array.get((int)(rand.nextDouble() * v_array.size()));
-            else
-                source = origin;
-            V target = null;
-            do
-            {
-                target = v_array.get((int)(rand.nextDouble() * v_array.size()));
-            }
-            while (source == target);
-            VoltageScorer<V,E> vs = new VoltageScorer<V,E>(g, source, target);
-            vs.evaluate();
-
-            Map<V, double[]> voltage_ranks = new HashMap<V, double[]>();
-            for (V v : g.getVertices())
-                voltage_ranks.put(v, new double[] {vs.getVertexScore(v)});
-
-//            addOneCandidateCluster(candidates, voltage_ranks);
-            addTwoCandidateClusters(candidates, voltage_ranks);
-        }
-
-        // repeat the following k-1 times:
-        // * pick a vertex v as a cluster seed
-        //   (Wu/Huberman: most frequent vertex in candidates)
-        // * calculate co-occurrence (in candidate clusters)
-        //   of this vertex with all others
-        // * use k-means to separate co-occurrence counts into high/low;
-        //   high vertices are a cluster
-        // * remove v's vertices from candidate clusters
-
-        Collection<Set<V>> clusters = new LinkedList<Set<V>>();
-        Set<V> remaining = new HashSet<V>(g.getVertices());
-
-        List<V> seed_candidates = getSeedCandidates(candidates);
-        int seed_index = 0;
-
-        for (int j = 0; j < (num_clusters - 1); j++)
-        {
-            if (remaining.isEmpty())
-                break;
-
-            V seed;
-            if (seed_index == 0 && origin != null)
-                seed = origin;
-            else
-            {
-                do { seed = seed_candidates.get(seed_index++); }
-                while (!remaining.contains(seed));
-            }
-
-            Map<V, double[]> occur_counts = getObjectCounts(candidates, seed);
-            if (occur_counts.size() < 2)
-                break;
-
-            // now that we have the counts, cluster them...
-            try
-            {
-                Collection<Map<V, double[]>> high_low = kmc.cluster(occur_counts, 2);
-                // ...get the cluster with the highest-valued centroid...
-                Iterator<Map<V, double[]>> h_iter = high_low.iterator();
-                Map<V, double[]> cluster1 = h_iter.next();
-                Map<V, double[]> cluster2 = h_iter.next();
-                double[] centroid1 = DiscreteDistribution.mean(cluster1.values());
-                double[] centroid2 = DiscreteDistribution.mean(cluster2.values());
-                Set<V> new_cluster;
-                if (centroid1[0] >= centroid2[0])
-                    new_cluster = cluster1.keySet();
-                else
-                    new_cluster = cluster2.keySet();
-
-                // ...remove the elements of new_cluster from each candidate...
-                for (Set<V> cluster : candidates)
-                    cluster.removeAll(new_cluster);
-                clusters.add(new_cluster);
-                remaining.removeAll(new_cluster);
-            }
-            catch (NotEnoughClustersException nece)
-            {
-                // all remaining vertices are in the same cluster
-                break;
-            }
-        }
-
-        // identify remaining vertices (if any) as a 'garbage' cluster
-        if (!remaining.isEmpty())
-            clusters.add(remaining);
-
-        return clusters;
-    }
-
-    /**
-     * Do k-means with three intervals and pick the
-     * smaller two clusters (presumed to be on the ends); this is closer to the Wu-Huberman method.
-     * @param candidates
-     * @param voltage_ranks
-     */
-    protected void addTwoCandidateClusters(LinkedList<Set<V>> candidates,
-            Map<V, double[]> voltage_ranks)
-    {
-        try
-        {
-            List<Map<V, double[]>> clusters = new ArrayList<Map<V, double[]>>(kmc.cluster(voltage_ranks, 3));
-            boolean b01 = clusters.get(0).size() > clusters.get(1).size();
-            boolean b02 = clusters.get(0).size() > clusters.get(2).size();
-            boolean b12 = clusters.get(1).size() > clusters.get(2).size();
-            if (b01 && b02)
-            {
-                candidates.add(clusters.get(1).keySet());
-                candidates.add(clusters.get(2).keySet());
-            }
-            else if (!b01 && b12)
-            {
-                candidates.add(clusters.get(0).keySet());
-                candidates.add(clusters.get(2).keySet());
-            }
-            else if (!b02 && !b12)
-            {
-                candidates.add(clusters.get(0).keySet());
-                candidates.add(clusters.get(1).keySet());
-            }
-        }
-        catch (NotEnoughClustersException e)
-        {
-            // no valid candidates, continue
-        }
-    }
-
-    /**
-     * alternative to addTwoCandidateClusters(): cluster vertices by voltages into 2 clusters.
-     * We only consider the smaller of the two clusters returned
-     * by k-means to be a 'true' cluster candidate; the other is a garbage cluster.
-     * @param candidates
-     * @param voltage_ranks
-     */
-    protected void addOneCandidateCluster(LinkedList<Set<V>> candidates,
-            Map<V, double[]> voltage_ranks)
-    {
-        try
-        {
-            List<Map<V, double[]>> clusters;
-            clusters = new ArrayList<Map<V, double[]>>(kmc.cluster(voltage_ranks, 2));
-            if (clusters.get(0).size() < clusters.get(1).size())
-                candidates.add(clusters.get(0).keySet());
-            else
-                candidates.add(clusters.get(1).keySet());
-        }
-        catch (NotEnoughClustersException e)
-        {
-            // no valid candidates, continue
-        }
-    }
-
-    /**
-     * Returns an array of cluster seeds, ranked in decreasing order
-     * of number of appearances in the specified collection of candidate
-     * clusters.
-     * @param candidates
-     */
-    protected List<V> getSeedCandidates(Collection<Set<V>> candidates)
-    {
-        final Map<V, double[]> occur_counts = getObjectCounts(candidates, null);
-
-        ArrayList<V> occurrences = new ArrayList<V>(occur_counts.keySet());
-        Collections.sort(occurrences, new MapValueArrayComparator(occur_counts));
-
-        System.out.println("occurrences: ");
-        for (int i = 0; i < occurrences.size(); i++)
-            System.out.println(occur_counts.get(occurrences.get(i))[0]);
-
-        return occurrences;
-    }
-
-    protected Map<V, double[]> getObjectCounts(Collection<Set<V>> candidates, V seed)
-    {
-        Map<V, double[]> occur_counts = new HashMap<V, double[]>();
-        for (V v : g.getVertices())
-            occur_counts.put(v, new double[]{0});
-
-        for (Set<V> candidate : candidates)
-        {
-            if (seed == null)
-                System.out.println(candidate.size());
-            if (seed == null || candidate.contains(seed))
-            {
-                for (V element : candidate)
-                {
-                    double[] count = occur_counts.get(element);
-                    count[0]++;
-                }
-            }
-        }
-
-        if (seed == null)
-        {
-            System.out.println("occur_counts size: " + occur_counts.size());
-            for (V v : occur_counts.keySet())
-                System.out.println(occur_counts.get(v)[0]);
-        }
-
-        return occur_counts;
-    }
-
-    protected class MapValueArrayComparator implements Comparator<V>
-    {
-        private Map<V, double[]> map;
-
-        protected MapValueArrayComparator(Map<V, double[]> map)
-        {
-            this.map = map;
-        }
-
-        public int compare(V o1, V o2)
-        {
-            double[] count0 = map.get(o1);
-            double[] count1 = map.get(o2);
-            if (count0[0] < count1[0])
-                return 1;
-            else if (count0[0] > count1[0])
-                return -1;
-            return 0;
-        }
-
-    }
-
-}