2017-06-09 3 views
0

// Ich bin immer Ausnahme während hadoop JAR-Datei ausgeführt wird, die pdf konvertieren Text // und analysierenwie ruunable jar für hadoop erstellen Eclipse

java.lang.Exception: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received org.apache.hadoop.io.LongWritable 
     at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:489) 
     at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:549) 
    Caused by: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received org.apache.hadoop.io.LongWritable 
     at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1072) 
     at org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:715) 
     at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:89) 
     at org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.write(WrappedMapper.java:112) 
     at org.apache.hadoop.mapreduce.Mapper.map(Mapper.java:125) 
     at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:146) 
     at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787) 
     at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341) 
     at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:270) 
     at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
     at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
     at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) 
     at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) 
     at java.lang.Thread.run(Thread.java:745) 
    17/06/08 19:12:10 INFO mapreduce.Job: Job job_local815278758_0001 running in uber mode : false 
    17/06/08 19:12:10 INFO mapreduce.Job: map 0% reduce 0% 
    17/06/08 19:12:10 INFO mapreduce.Job: Job job_local815278758_0001 failed with state FAILED due to: NA 
    17/06/08 19:12:10 INFO mapreduce.Job: Counters: 0 



//Mapper class 

import java.io.IOException; 
import java.util.StringTokenizer; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Mapper; 

public class WordCountMapper extends 
     Mapper<Object, Object, Object, Object> { 
    private Text word = new Text(); 
    private final static LongWritable one = new LongWritable(1); 

    protected void map(LongWritable key, Text value, Context context) 
      throws IOException, InterruptedException { 
     String line = value.toString(); 
     StringTokenizer tokenizer = new StringTokenizer(line); 
     while (tokenizer.hasMoreTokens()) { 
      word.set(tokenizer.nextToken()); 
      context.progress(); 
      context.write(word, one); 
     } 
    } 
} 


//Reducer class 
package com.amal.pdf; 

import java.io.IOException; 

import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Reducer; 

public class WordCountReducer extends 
     Reducer<Object, Object, Object, Object> { 
    protected void reduce(Text key, Iterable<LongWritable> values, 
      Context context) throws IOException, InterruptedException { 
     int sum = 0; 
     for (LongWritable value : values) { 
      sum += value.get(); 

     } 
     context.write(key, new LongWritable(sum)); 
    } 
} 

//PDF record Reader class 

import java.io.IOException; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.FSDataInputStream; 
import org.apache.hadoop.fs.FileSystem; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.InputSplit; 
import org.apache.hadoop.mapreduce.RecordReader; 
import org.apache.hadoop.mapreduce.TaskAttemptContext; 
import org.apache.hadoop.mapreduce.lib.input.FileSplit; 
import org.apache.pdfbox.pdmodel.PDDocument; 
import org.apache.pdfbox.util.PDFTextStripper; 

public class PdfRecordReader extends RecordReader<Object, Object> { 

    private String[] lines = null; 
    private LongWritable key = null; 
    private Text value = null; 

    @Override 

    public void initialize(InputSplit genericSplit, TaskAttemptContext context) 
      throws IOException, InterruptedException { 


     FileSplit split = (FileSplit) genericSplit; 
     Configuration job = context.getConfiguration(); 
     final Path file = split.getPath(); 

     /* 
     * The below code contains the logic for opening the file and seek to 
     * the start of the split. Here we are applying the Pdf Parsing logic 
     */ 

     FileSystem fs = file.getFileSystem(job); 

     FSDataInputStream fileIn = fs.open(split.getPath()); 
     PDDocument pdf = null; 
     String parsedText = null; 
     PDFTextStripper stripper; 
     pdf = PDDocument.load(fileIn); 
     stripper = new PDFTextStripper(); 
    //getting exception because of this line 
     parsedText = stripper.getText(pdf); 
     this.lines = parsedText.split("\n"); } 

    @Override 
    public boolean nextKeyValue() throws IOException, InterruptedException { 

     if (key == null) { 

      key = new LongWritable(); 
      key.set(1); 
      value = new Text(); 
      value.set(lines[0]); 
     } else { 

      int temp = (int) key.get(); 
      if (temp < (lines.length - 1)) { 
       int count = (int) key.get(); 
       value = new Text(); 
       value.set(lines[count]); 
       count = count + 1; 
       key = new LongWritable(count); 
      } else { 
       return false; 
      } 

     } 
     if (key == null || value == null) { 
      return false; 
     } else { 
      return true; 
     } 
    } 

    @Override 
    public LongWritable getCurrentKey() throws IOException, 
      InterruptedException { 

     return key; 
    } 

    @Override 
    public Text getCurrentValue() throws IOException, InterruptedException { 

     return value; 
    } 

    @Override 
    public float getProgress() throws IOException, InterruptedException { 

     return 0; 
    } 

    @Override 
    public void close() throws IOException { 

    } 

} 

// Eine weitere Sache, jemand helfen kann Mapper Runnable-JAR erstellen, Konfiguration wird nicht // in Eclipse angezeigt, da Main für Hadoop-Umgebung ist.

+0

Ihr Titel und Fehler sind völlig verschieden. Können Sie Ihren Mapper- und Reducer-Code hinzufügen? – philantrovert

+0

Ich habe meine Frage bearbeiten. Vielen Dank – shubham

Antwort

0

Der Fehler auf der Konsole sagt:

Caused by: java.io.IOException: 
Type mismatch in key from map: expected org.apache.hadoop.io.Text, received org.apache.hadoop.io.LongWritable 

Das bedeutet, dass der Schlüssel-Wert-Paar Sie Ihren Mapper bieten nicht die Definition überein.

Ihre Zuordnerklasse sollte wie folgt aussehen:

public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> { 
     private Text word = new Text(); 
     private final static IntWritable one = new IntWritable(1); 
     protected void map(LongWritable key, Text value, Context context) 
      throws IOException, InterruptedException { 
     String line = value.toString(); 
     StringTokenizer tokenizer = new StringTokenizer(line); 
     while (tokenizer.hasMoreTokens()) { 
      word.set(tokenizer.nextToken()); 
      context.progress(); 
      context.write(word, one); 
     } 
    } 
} 
Verwandte Themen