org.apache.mahout.vectorizer
Class HighDFWordsPruner

java.lang.Object
  extended by org.apache.mahout.vectorizer.HighDFWordsPruner

public final class HighDFWordsPruner
extends Object


Field Summary
static String MAX_DF
           
static String MIN_DF
           
static String STD_CALC_DIR
           
 
Method Summary
static void mergePartialVectors(Iterable<org.apache.hadoop.fs.Path> partialVectorPaths, org.apache.hadoop.fs.Path output, org.apache.hadoop.conf.Configuration baseConf, float normPower, boolean logNormalize, int numReducers)
           
static void pruneVectors(org.apache.hadoop.fs.Path tfDir, org.apache.hadoop.fs.Path prunedTFDir, org.apache.hadoop.fs.Path prunedPartialTFDir, long maxDF, long minDF, org.apache.hadoop.conf.Configuration baseConf, Pair<Long[],List<org.apache.hadoop.fs.Path>> docFrequenciesFeatures, float normPower, boolean logNormalize, int numReducers)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

STD_CALC_DIR

public static final String STD_CALC_DIR
See Also:
Constant Field Values

MAX_DF

public static final String MAX_DF
See Also:
Constant Field Values

MIN_DF

public static final String MIN_DF
See Also:
Constant Field Values
Method Detail

pruneVectors

public static void pruneVectors(org.apache.hadoop.fs.Path tfDir,
                                org.apache.hadoop.fs.Path prunedTFDir,
                                org.apache.hadoop.fs.Path prunedPartialTFDir,
                                long maxDF,
                                long minDF,
                                org.apache.hadoop.conf.Configuration baseConf,
                                Pair<Long[],List<org.apache.hadoop.fs.Path>> docFrequenciesFeatures,
                                float normPower,
                                boolean logNormalize,
                                int numReducers)
                         throws IOException,
                                InterruptedException,
                                ClassNotFoundException
Throws:
IOException
InterruptedException
ClassNotFoundException

mergePartialVectors

public static void mergePartialVectors(Iterable<org.apache.hadoop.fs.Path> partialVectorPaths,
                                       org.apache.hadoop.fs.Path output,
                                       org.apache.hadoop.conf.Configuration baseConf,
                                       float normPower,
                                       boolean logNormalize,
                                       int numReducers)
                                throws IOException,
                                       InterruptedException,
                                       ClassNotFoundException
Throws:
IOException
InterruptedException
ClassNotFoundException


Copyright © 2008–2014 The Apache Software Foundation. All rights reserved.