wpi.associations.arminer
Class HashTree

java.lang.Object
  extended bywpi.associations.arminer.HashTree

public class HashTree
extends java.lang.Object

HashTree.java

A HashTree is a special data structure that is used to index a Vector of ARMinerItemset objects for more efficient processing.


Nested Class Summary
private static class HashTree.HashNode
           
private static class HashTree.ListNode
           
private static class HashTree.MyHashtable
           
private static class HashTree.Node
           
 
Field Summary
private  int counter
           
private static int DEFAULT_HASH_SIZE
           
private static int DEFAULT_LIST_SIZE
           
private static int HASH_NODE
           
private static int HASH_SIZE
           
private  java.util.Vector itemsets
           
private  java.util.Vector leaves
           
private static int LIST_NODE
           
private static int LIST_SIZE
           
private static int[] primes
           
private  HashTree.Node theRoot
           
 
Constructor Summary
HashTree(int listSize, int hashSize, java.util.Vector itemsets)
          Create a new HashTree.
HashTree(java.util.Vector itemsets)
          Create a new HashTree.
 
Method Summary
private  HashTree.Node add(HashTree.Node node, int level, int index)
           
 void add(int index)
          This method indexes in the HashTree the ARMinerItemset at index index from Vector itemsets which was passed to the constructor of this HashTree.
 void checkLargeness(ARMinerItemset itemset)
          Verifies if any of the indexed Itemsets is not large by checking whether they're included in the frequent itemset itemset.
private  void checkLargeness(HashTree.Node node, ARMinerItemset itemset, int index, int level)
           
 long countFrequentSubsets(ARMinerItemset itemset, long minWeight)
          Count how many frequent Itemsets (frequent = having weight greater than a specified minimum weight) are included in itemset
private  void countFrequentSubsets(HashTree.Node node, ARMinerItemset itemset, int index, long minWeight, int level)
           
 long countSubsets(ARMinerItemset itemset)
          Count how many Itemsets are included in itemset
private  void countSubsets(HashTree.Node node, ARMinerItemset itemset, int index, int level)
           
private static int hash(int item, int level)
           
private  void prepare(HashTree.Node node)
           
 void prepareForDescent()
          This method should be called before calling update() to gather all leaves of the HashTree for more efficient processing.
private  void unvisitLeaves()
           
 void update(ARMinerItemset row)
          Update the weights of all indexed Itemsets that are included in row
 void update(ARMinerItemset row, long[][] counts)
          Update the weights of all indexed Itemsets that are included in row and also update the matrix counts
private  void update(HashTree.Node node, ARMinerItemset row, int index, int level)
           
private  void update(HashTree.Node node, ARMinerItemset row, int index, long[][] counts, int level)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

LIST_NODE

private static final int LIST_NODE
See Also:
Constant Field Values

HASH_NODE

private static final int HASH_NODE
See Also:
Constant Field Values

DEFAULT_LIST_SIZE

private static final int DEFAULT_LIST_SIZE
See Also:
Constant Field Values

DEFAULT_HASH_SIZE

private static final int DEFAULT_HASH_SIZE
See Also:
Constant Field Values

LIST_SIZE

private static int LIST_SIZE

HASH_SIZE

private static int HASH_SIZE

primes

private static int[] primes

counter

private int counter

leaves

private java.util.Vector leaves

itemsets

private java.util.Vector itemsets

theRoot

private HashTree.Node theRoot
Constructor Detail

HashTree

public HashTree(int listSize,
                int hashSize,
                java.util.Vector itemsets)
Create a new HashTree. The listSize parameter determines after how many inserts in a ListNode we have to change it to a HashNode (i.e. perform a split). The hashSize parameter can be specified to improve the efficiency of the structure.

Parameters:
listSize - the size of the internal lists in the list nodes
hashSize - the size of the internal hashtables in the hash nodes
itemsets - the Vector of Itemsets that we should index
Throws:
java.lang.IllegalArgumentException - itemsets is null or listSize <= 0 or hashSize <= 0

HashTree

public HashTree(java.util.Vector itemsets)
Create a new HashTree. This initializes the HashTree with default parameters.

Parameters:
itemsets - the Vector of Itemsets that we should index
Throws:
java.lang.IllegalArgumentException - itemsets is null
Method Detail

unvisitLeaves

private void unvisitLeaves()

prepareForDescent

public void prepareForDescent()
This method should be called before calling update() to gather all leaves of the HashTree for more efficient processing.


prepare

private void prepare(HashTree.Node node)

hash

private static int hash(int item,
                        int level)

add

public void add(int index)
This method indexes in the HashTree the ARMinerItemset at index index from Vector itemsets which was passed to the constructor of this HashTree.

Parameters:
index - the index of the ARMinerItemset that we need to index in this HashTree.

add

private HashTree.Node add(HashTree.Node node,
                          int level,
                          int index)

update

public void update(ARMinerItemset row)
Update the weights of all indexed Itemsets that are included in row

Parameters:
row - the ARMinerItemset (normally a database row) against which we test for inclusion

update

private void update(HashTree.Node node,
                    ARMinerItemset row,
                    int index,
                    int level)

update

public void update(ARMinerItemset row,
                   long[][] counts)
Update the weights of all indexed Itemsets that are included in row and also update the matrix counts

Parameters:
row - the ARMinerItemset (normally a database row) against which we test for inclusion
counts - a matrix used by some algorithms to speed up computations; its rows correspond to Itemsets and its columns correspond to items; each value in the matrix tells for how many times had an item appeared together with an itemset in the rows of the database.

update

private void update(HashTree.Node node,
                    ARMinerItemset row,
                    int index,
                    long[][] counts,
                    int level)

countFrequentSubsets

public long countFrequentSubsets(ARMinerItemset itemset,
                                 long minWeight)
Count how many frequent Itemsets (frequent = having weight greater than a specified minimum weight) are included in itemset

Parameters:
itemset - the ARMinerItemset for which we count the subsets
minWeight - the minimum weight

countFrequentSubsets

private void countFrequentSubsets(HashTree.Node node,
                                  ARMinerItemset itemset,
                                  int index,
                                  long minWeight,
                                  int level)

countSubsets

public long countSubsets(ARMinerItemset itemset)
Count how many Itemsets are included in itemset

Parameters:
itemset - the ARMinerItemset for which we count the subsets

countSubsets

private void countSubsets(HashTree.Node node,
                          ARMinerItemset itemset,
                          int index,
                          int level)

checkLargeness

public void checkLargeness(ARMinerItemset itemset)
Verifies if any of the indexed Itemsets is not large by checking whether they're included in the frequent itemset itemset. If an ARMinerItemset is not large then it will be marked.

Parameters:
itemset - the ARMinerItemset we check

checkLargeness

private void checkLargeness(HashTree.Node node,
                            ARMinerItemset itemset,
                            int index,
                            int level)