org.apache.mahout.text
Class SequenceFilesFromMailArchivesMapper

java.lang.Object
  extended by org.apache.hadoop.mapreduce.Mapper<org.apache.hadoop.io.IntWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.Text,org.apache.hadoop.io.Text>
      extended by org.apache.mahout.text.SequenceFilesFromMailArchivesMapper

public class SequenceFilesFromMailArchivesMapper
extends org.apache.hadoop.mapreduce.Mapper<org.apache.hadoop.io.IntWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.Text,org.apache.hadoop.io.Text>

Map Class for the SequenceFilesFromMailArchives job


Nested Class Summary
 
Nested classes/interfaces inherited from class org.apache.hadoop.mapreduce.Mapper
org.apache.hadoop.mapreduce.Mapper.Context
 
Constructor Summary
SequenceFilesFromMailArchivesMapper()
           
 
Method Summary
protected static String generateKey(String mboxFilename, String prefix, String messageId)
           
 void map(org.apache.hadoop.io.IntWritable key, org.apache.hadoop.io.BytesWritable value, org.apache.hadoop.mapreduce.Mapper.Context context)
           
 long parseMailboxLineByLine(String filename, InputStream mailBoxInputStream, org.apache.hadoop.mapreduce.Mapper.Context context)
           
 void setup(org.apache.hadoop.mapreduce.Mapper.Context context)
           
 
Methods inherited from class org.apache.hadoop.mapreduce.Mapper
cleanup, run
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

SequenceFilesFromMailArchivesMapper

public SequenceFilesFromMailArchivesMapper()
Method Detail

setup

public void setup(org.apache.hadoop.mapreduce.Mapper.Context context)
           throws IOException,
                  InterruptedException
Overrides:
setup in class org.apache.hadoop.mapreduce.Mapper<org.apache.hadoop.io.IntWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.Text,org.apache.hadoop.io.Text>
Throws:
IOException
InterruptedException

parseMailboxLineByLine

public long parseMailboxLineByLine(String filename,
                                   InputStream mailBoxInputStream,
                                   org.apache.hadoop.mapreduce.Mapper.Context context)
                            throws IOException,
                                   InterruptedException
Throws:
IOException
InterruptedException

generateKey

protected static String generateKey(String mboxFilename,
                                    String prefix,
                                    String messageId)

map

public void map(org.apache.hadoop.io.IntWritable key,
                org.apache.hadoop.io.BytesWritable value,
                org.apache.hadoop.mapreduce.Mapper.Context context)
         throws IOException,
                InterruptedException
Overrides:
map in class org.apache.hadoop.mapreduce.Mapper<org.apache.hadoop.io.IntWritable,org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.Text,org.apache.hadoop.io.Text>
Throws:
IOException
InterruptedException


Copyright © 2008–2014 The Apache Software Foundation. All rights reserved.