org.apache.mahout.utils.clustering
Class ClusterDumper

java.lang.Object
  extended by org.apache.hadoop.conf.Configured
      extended by org.apache.mahout.common.AbstractJob
          extended by org.apache.mahout.utils.clustering.ClusterDumper
All Implemented Interfaces:
org.apache.hadoop.conf.Configurable, org.apache.hadoop.util.Tool

public final class ClusterDumper
extends AbstractJob


Nested Class Summary
static class ClusterDumper.OUTPUT_FORMAT
           
 
Field Summary
static String DICTIONARY_OPTION
           
static String DICTIONARY_TYPE_OPTION
           
static String EVALUATE_CLUSTERS
           
static String NUM_WORDS_OPTION
           
static String OUTPUT_FORMAT_OPT
           
static String POINTS_DIR_OPTION
           
static String SAMPLE_POINTS
           
static String SUBSTRING_OPTION
           
 
Fields inherited from class org.apache.mahout.common.AbstractJob
argMap, inputFile, inputPath, outputFile, outputPath, tempPath
 
Constructor Summary
ClusterDumper()
           
ClusterDumper(org.apache.hadoop.fs.Path seqFileDir, org.apache.hadoop.fs.Path pointsDir)
           
 
Method Summary
 Map<Integer,List<WeightedPropertyVectorWritable>> getClusterIdToPoints()
           
 long getMaxPointsPerCluster()
           
 int getNumTopFeatures()
           
 int getSubString()
           
 String getTermDictionary()
           
static void main(String[] args)
           
 void printClusters(String[] dictionary)
           
static Map<Integer,List<WeightedPropertyVectorWritable>> readPoints(org.apache.hadoop.fs.Path pointsPathDir, long maxPointsPerCluster, org.apache.hadoop.conf.Configuration conf)
           
 int run(String[] args)
           
 void setMaxPointsPerCluster(long maxPointsPerCluster)
           
 void setNumTopFeatures(int num)
           
 void setOutputFormat(ClusterDumper.OUTPUT_FORMAT of)
          Convenience function to set the output format during testing.
 void setSubString(int subString)
           
 void setTermDictionary(String termDictionary, String dictionaryType)
           
 
Methods inherited from class org.apache.mahout.common.AbstractJob
addFlag, addInputOption, addOption, addOption, addOption, addOption, addOutputOption, buildOption, buildOption, getAnalyzerClassFromOption, getCLIOption, getConf, getDimensions, getFloat, getFloat, getGroup, getInputFile, getInputPath, getInt, getInt, getOption, getOption, getOption, getOptions, getOutputFile, getOutputPath, getOutputPath, getTempPath, getTempPath, hasOption, keyFor, maybePut, parseArguments, parseArguments, parseDirectories, prepareJob, prepareJob, prepareJob, prepareJob, setConf, setS3SafeCombinedInputPath, shouldRunNextPhase
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

SAMPLE_POINTS

public static final String SAMPLE_POINTS
See Also:
Constant Field Values

DICTIONARY_TYPE_OPTION

public static final String DICTIONARY_TYPE_OPTION
See Also:
Constant Field Values

DICTIONARY_OPTION

public static final String DICTIONARY_OPTION
See Also:
Constant Field Values

POINTS_DIR_OPTION

public static final String POINTS_DIR_OPTION
See Also:
Constant Field Values

NUM_WORDS_OPTION

public static final String NUM_WORDS_OPTION
See Also:
Constant Field Values

SUBSTRING_OPTION

public static final String SUBSTRING_OPTION
See Also:
Constant Field Values

EVALUATE_CLUSTERS

public static final String EVALUATE_CLUSTERS
See Also:
Constant Field Values

OUTPUT_FORMAT_OPT

public static final String OUTPUT_FORMAT_OPT
See Also:
Constant Field Values
Constructor Detail

ClusterDumper

public ClusterDumper(org.apache.hadoop.fs.Path seqFileDir,
                     org.apache.hadoop.fs.Path pointsDir)

ClusterDumper

public ClusterDumper()
Method Detail

main

public static void main(String[] args)
                 throws Exception
Throws:
Exception

run

public int run(String[] args)
        throws Exception
Throws:
Exception

printClusters

public void printClusters(String[] dictionary)
                   throws Exception
Throws:
Exception

setOutputFormat

public void setOutputFormat(ClusterDumper.OUTPUT_FORMAT of)
Convenience function to set the output format during testing.


getSubString

public int getSubString()

setSubString

public void setSubString(int subString)

getClusterIdToPoints

public Map<Integer,List<WeightedPropertyVectorWritable>> getClusterIdToPoints()

getTermDictionary

public String getTermDictionary()

setTermDictionary

public void setTermDictionary(String termDictionary,
                              String dictionaryType)

setNumTopFeatures

public void setNumTopFeatures(int num)

getNumTopFeatures

public int getNumTopFeatures()

getMaxPointsPerCluster

public long getMaxPointsPerCluster()

setMaxPointsPerCluster

public void setMaxPointsPerCluster(long maxPointsPerCluster)

readPoints

public static Map<Integer,List<WeightedPropertyVectorWritable>> readPoints(org.apache.hadoop.fs.Path pointsPathDir,
                                                                           long maxPointsPerCluster,
                                                                           org.apache.hadoop.conf.Configuration conf)


Copyright © 2008–2014 The Apache Software Foundation. All rights reserved.