2016-12-12 3 views
0

Ich arbeite mit einem Funke auf Garn. Ich habe den Link in das Verzeichnis von Plugins zur Verfügung gestellt, jedoch die Ausnahme angezeigt wird unten dargestellt:Datei nicht gefunden Ausnahme mit Spark-submit

Exception: couldn't find:file:/home/centos/vopEngine1_2/lib/plugins/ANNIE/ 
    at gate.creole.CreoleRegisterImpl.registerDirectories(CreoleRegisterImpl.java:282) 
    at gate.creole.CreoleRegisterImpl.registerDirectories(CreoleRegisterImpl.java:317) 
    at com.scryAnalytics.NLPGeneric.GateGenericNLP.<init>(GateGenericNLP.java:47) 
    at com.scryAnalytics.vopEngine.NlpProcessing$$anonfun$5.apply(NlpProcessing.scala:95) 
    at com.scryAnalytics.vopEngine.NlpProcessing$$anonfun$5.apply(NlpProcessing.scala:94) 
    at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710) 
    at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710) 
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) 
    at org.apache.spark.scheduler.Task.run(Task.scala:89) 
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) 
    at java.lang.Thread.run(Thread.java:745) 

Ich rufe einen Wrapper NLPGeneric.java, die das Verzeichnis für seine Verwendung verwendet. Dieser Wrapper wird aus einem Funken scala-Code genannt NLPProcessing.scala

NLPGeneric.java

package com.scryAnalytics.NLPGeneric; 

import java.io.File; 
import java.net.MalformedURLException; 
import java.util.ArrayList; 
import java.util.List; 

import gate.Annotation; 
import gate.AnnotationSet; 
import gate.Corpus; 
import gate.Document; 
import gate.Factory; 
import gate.FeatureMap; 
import gate.Gate; 
import gate.Resource; 
import gate.creole.ExecutionException; 
import gate.creole.POSTagger; 
import gate.creole.ResourceInstantiationException; 
import gate.creole.SerialAnalyserController; 
import gate.util.GateException; 

import org.apache.log4j.Logger; 

import com.scryAnalytics.NLPGeneric.DAO.GateAnnotation; 
import com.scryAnalytics.NLPGeneric.DAO.Output; 

public class GateGenericNLP { 

static Logger logger = Logger.getLogger(GateGenericNLP.class.getName()); 
private SerialAnalyserController applicationPipeline; 
private Corpus corpus; 
private List<NLPEntities> entitiesToGenerate; 

public GateGenericNLP(String pluginHome, List<NLPEntities> entities) 
     throws GateException, MalformedURLException { 

    System.out.println("Initializing ....."); 
    entitiesToGenerate = entities; 
    if (!Gate.isInitialised()) { 
      Gate.runInSandbox(true); 
      Gate.init(); 
      Gate.setPluginsHome(new File(pluginHome)); 
      Gate.getCreoleRegister().registerDirectories(
          new File(Gate.getPluginsHome(), "ANNIE").toURI().toURL()); 
      Gate.getCreoleRegister().registerDirectories(
          new File(Gate.getPluginsHome(), "Tools").toURI().toURL()); 
      Gate.getCreoleRegister().registerDirectories(
          new File(Gate.getPluginsHome(), "Tagger_NP_Chunking") 
              .toURI().toURL()); 
      Gate.getCreoleRegister().registerDirectories(
        new File(Gate.getPluginsHome(), "Stemmer_Snowball").toURI().toURL()); 
    } 
    applicationPipeline = (SerialAnalyserController) Factory 
      .createResource("gate.creole.SerialAnalyserController"); 

    applicationPipeline.add((gate.LanguageAnalyser) Factory 
      .createResource("gate.creole.tokeniser.DefaultTokeniser")); 
    applicationPipeline.add((gate.LanguageAnalyser) Factory 
      .createResource("gate.creole.splitter.SentenceSplitter")); 
    applicationPipeline.add((gate.LanguageAnalyser) Factory 
      .createResource("stemmer.SnowballStemmer")); 

    if (entitiesToGenerate.contains(NLPEntities.POS_TAGGER)) { 
     applicationPipeline 
       .add((gate.LanguageAnalyser) createPOSTaggerPR()); 

    } 

    if (entitiesToGenerate.contains(NLPEntities.VP_CHUNKER)) { 
     applicationPipeline.add((gate.LanguageAnalyser) Factory 
       .createResource("gate.creole.VPChunker")); 

    } 

    if (entitiesToGenerate.contains(NLPEntities.NP_CHUNKER)) { 
     applicationPipeline.add((gate.LanguageAnalyser) Factory 
       .createResource("mark.chunking.GATEWrapper")); 

    } 

    corpus = Factory.newCorpus("VOCP Corpus"); 
    applicationPipeline.setCorpus(corpus); 

} 

public String generateNLPEntities(String message) { 

    Document doc = null; 
    String resultJson = ""; 

    try { 
     doc = Factory.newDocument(message); 
     System.out.println("String Added......."); 
     corpus.add(doc); 
     applicationPipeline.execute(); 

     resultJson = prepareResultJson(); 
     Factory.deleteResource(doc); 

    } catch (ResourceInstantiationException e) { 

     e.printStackTrace(); 
    } catch (ExecutionException e) { 

     e.printStackTrace(); 
    } finally { 
     logger.debug("clearing corpus....."); 
     corpus.clear(); 
     Factory.deleteResource(doc); 
    } 

    return resultJson; 

} 

private Resource createPOSTaggerPR() throws ResourceInstantiationException { 
    FeatureMap posTaggerParams = Factory.newFeatureMap(); 
    posTaggerParams.put(
      POSTagger.BASE_SENTENCE_ANNOTATION_TYPE_PARAMETER_NAME, 
      "Sentence"); 
    posTaggerParams.put(
      POSTagger.BASE_TOKEN_ANNOTATION_TYPE_PARAMETER_NAME, "Token"); 
    posTaggerParams.put(POSTagger.OUTPUT_ANNOTATION_TYPE_PARAMETER_NAME, 
      "Token"); 

    Resource posTagger = Factory.createResource("gate.creole.POSTagger", 
      posTaggerParams); 
    return posTagger; 

} 

private List<GateAnnotation> getTokens() { 
    List<GateAnnotation> tokens = new ArrayList<GateAnnotation>(); 
    Document doc = corpus.get(0); 
    AnnotationSet defaultAnnSet = doc.getAnnotations(); 
    AnnotationSet tokenSet = defaultAnnSet.get("Token"); 
    for (Annotation annotation : tokenSet) { 

     GateAnnotation annot = new GateAnnotation(annotation.getId(), 
       annotation.getStartNode(), annotation.getEndNode(), 
       annotation.getType(), annotation.getFeatures()); 
     annot.setAnnotatedText(gate.Utils.stringFor(doc, annotation)); 

     tokens.add(annot); 
    } 
    return tokens; 
} 

private List<GateAnnotation> getSpaceTokens() { 
    List<GateAnnotation> spaceTokens = new ArrayList<GateAnnotation>(); 
    Document doc = corpus.get(0); 
    AnnotationSet defaultAnnSet = doc.getAnnotations(); 
    AnnotationSet spaceTokenSet = defaultAnnSet.get("SpaceToken"); 
    for (Annotation annotation : spaceTokenSet) { 
     GateAnnotation annot = new GateAnnotation(annotation.getId(), 
       annotation.getStartNode(), annotation.getEndNode(), 
       annotation.getType(), annotation.getFeatures()); 
     annot.setAnnotatedText(gate.Utils.stringFor(doc, annotation)); 

     spaceTokens.add(annot); 
    } 
    return spaceTokens; 
} 

private List<GateAnnotation> getSentences() { 
    List<GateAnnotation> sentences = new ArrayList<GateAnnotation>(); 
    Document doc = corpus.get(0); 
    AnnotationSet defaultAnnSet = doc.getAnnotations(); 
    AnnotationSet sentencesSet = defaultAnnSet.get("Sentence"); 
    for (Annotation annotation : sentencesSet) { 
     GateAnnotation annot = new GateAnnotation(annotation.getId(), 
       annotation.getStartNode(), annotation.getEndNode(), 
       annotation.getType(), annotation.getFeatures()); 
     annot.setAnnotatedText(gate.Utils.stringFor(doc, annotation)); 

     sentences.add(annot); 
    } 
    return sentences; 
} 

private List<GateAnnotation> getVPChunks() { 
    List<GateAnnotation> vpChunks = new ArrayList<GateAnnotation>(); 
    Document doc = corpus.get(0); 
    AnnotationSet defaultAnnSet = doc.getAnnotations(); 
    AnnotationSet VGSet = defaultAnnSet.get("VG"); 
    for (Annotation annotation : VGSet) { 
     GateAnnotation annot = new GateAnnotation(annotation.getId(), 
       annotation.getStartNode(), annotation.getEndNode(), 
       annotation.getType(), annotation.getFeatures()); 
     annot.setAnnotatedText(gate.Utils.stringFor(doc, annotation)); 

     vpChunks.add(annot); 
    } 
    return vpChunks; 
} 

private List<GateAnnotation> getNounChunks() { 

    List<GateAnnotation> nounChunks = new ArrayList<GateAnnotation>(); 
    Document doc = corpus.get(0); 
    AnnotationSet defaultAnnSet = doc.getAnnotations(); 
    AnnotationSet nounChunksSet = defaultAnnSet.get("NounChunk"); 
    for (Annotation annotation : nounChunksSet) { 
     GateAnnotation annot = new GateAnnotation(annotation.getId(), 
       annotation.getStartNode(), annotation.getEndNode(), 
       annotation.getType(), annotation.getFeatures()); 
     annot.setAnnotatedText(gate.Utils.stringFor(doc, annotation)); 

     nounChunks.add(annot); 
    } 
    return nounChunks; 
} 

private List<GateAnnotation> getSplits() { 

    List<GateAnnotation> splits = new ArrayList<GateAnnotation>(); 
    Document doc = corpus.get(0); 
    AnnotationSet defaultAnnSet = doc.getAnnotations(); 
    AnnotationSet splitSet = defaultAnnSet.get("Split"); 
    for (Annotation annotation : splitSet) { 
     GateAnnotation annot = new GateAnnotation(annotation.getId(), 
       annotation.getStartNode(), annotation.getEndNode(), 
       annotation.getType(), annotation.getFeatures()); 
     annot.setAnnotatedText(gate.Utils.stringFor(doc, annotation)); 

     splits.add(annot); 
    } 
    return splits; 
} 

private String prepareResultJson() { 
    Output result = new Output(); 
    result.setToken(getTokens()); 
    result.setSpaceToken(getSpaceTokens()); 
    result.setSentence(getSentences()); 
    result.setSplit(getSplits()); 

    if (entitiesToGenerate.contains(NLPEntities.VP_CHUNKER)) { 
     result.setVG(getVPChunks()); 
    } 

    if (entitiesToGenerate.contains(NLPEntities.NP_CHUNKER)) { 
     result.setNounChunk(getNounChunks()); 
    } 

    String resultJson = Utility.objectToJson(result); 
    return resultJson; 
} 

public void close() { 
    Factory.deleteResource(corpus); 
    Factory.deleteResource(applicationPipeline); 
} 

}

NLPProcessing.scala

package com.scryAnalytics.vopEngine 

import java.util.Arrays 
import java.util.ArrayList 
import gate.util.GateException 
import java.net.MalformedURLException 
import org.apache.spark.SparkContext 
import org.apache.hadoop.hbase.{ HBaseConfiguration, HTableDescriptor, TableName } 
import org.apache.hadoop.hbase.HConstants 
import org.apache.hadoop.hbase.spark.HBaseContext 
import org.apache.hadoop.hbase.mapreduce.TableInputFormat 
import org.apache.hadoop.hbase.mapreduce.{ TableOutputFormat, MultiTableOutputFormat } 
import org.apache.hadoop.mapreduce.Job 
import org.apache.hadoop.hbase.client.HBaseAdmin 
import org.apache.hadoop.hbase.KeyValue.Type 
import org.apache.hadoop.hbase.util.Bytes 
import org.apache.hadoop.hbase.client.Put 
import com.scryAnalytics.NLPGeneric.NLPEntities 
import com.vocp.ner.main.GateNERImpl 
import com.scryAnalytics.NLPGeneric._ 
import java.util.ArrayList 
import org.apache.spark.rdd.RDD 
import org.apache.spark.{ SparkContext, SparkConf } 
import org.apache.log4j.{ Level, Logger } 
import com.scryAnalytics.vopEngine.DAO.{ GateNERDAO, GenericNLPDAO, NLPEntitiesDAO } 
import org.apache.hadoop.hbase.io.ImmutableBytesWritable 
import com.scryAnalytics.vopEngine.Configuration.VocpConfiguration 
import com.scryAnalytics.vopEngine.Configuration.VOCPConstants 

class NLPProcessingLog { 
    var log: Logger = Logger.getLogger(classOf[NLPProcessingLog]) 
    log.info("Logger Initialized .....") 
} 

object NlpProcessing { 

    val logger = new NLPProcessingLog 

    @throws(classOf[Exception]) 
    def nlpAnnotationExtraction(conf: org.apache.hadoop.conf.Configuration, batchString: String): Int = { 

    logger.log.info("In Main Object..") 

    //Initializing Spark Context 
    val sc = new SparkContext(new SparkConf().setAppName("NLPAnnotationController").setMaster("local")) 

    val batchId = 
     if (batchString == "newbatch") 
     java.lang.Long.toString(System.currentTimeMillis()) 
     else batchString 

    conf.set("batchId", batchId) 

    val inputCfs = Arrays.asList(conf.get(VOCPConstants.INPUTCOLUMNFAMILIES).split(","): _*) 

    try { 

     conf.set(TableInputFormat.INPUT_TABLE, conf.get(VOCPConstants.INPUTTABLE)) 
     conf.set(TableOutputFormat.OUTPUT_TABLE, conf.get(VOCPConstants.OUTPUTTABLE)) 

     val job: Job = Job.getInstance(conf, "NLPAnnotationJob") 
     job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, conf.get(VOCPConstants.OUTPUTTABLE)) 
     job.setOutputFormatClass(classOf[MultiTableOutputFormat]) 

     val admin = new HBaseAdmin(conf) 
     if (!admin.isTableAvailable(conf.get(VOCPConstants.OUTPUTTABLE))) { 
     val tableDesc = new HTableDescriptor(TableName.valueOf(conf.get(VOCPConstants.OUTPUTTABLE))) 
     admin.createTable(tableDesc) 
     } 

     val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], 
     classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], 
     classOf[org.apache.hadoop.hbase.client.Result]) 

     val processedFilteredRDD = hBaseRDD.map(x => x._2).filter { result => 
     val flag = Bytes.toString(result.getValue(Bytes.toBytes("f"), 
      Bytes.toBytes("is_processed"))) 
     (flag == null) || (flag == 0) 
     } 

     println(processedFilteredRDD.count()) 
     val messageRDD = processedFilteredRDD.filter { x => x != null }.map { result => 
     val message = Bytes.toString(result.getValue(Bytes.toBytes("p"), 
      Bytes.toBytes("message"))) 
     (Bytes.toString(result.getRow()), message) 

     } 

     println("Number of partitions " + messageRDD.getNumPartitions) 

     val pluginHome = conf.get(VOCPConstants.GATE_PLUGIN_ARCHIVE) 
     val requiredNLPEntities = new ArrayList[NLPEntities]() 
     requiredNLPEntities.add(NLPEntities.POS_TAGGER) 
     requiredNLPEntities.add(NLPEntities.VP_CHUNKER) 
     requiredNLPEntities.add(NLPEntities.NP_CHUNKER) 

     val nlpGenericRDD = messageRDD.mapPartitions { iter => 
     val nlpModule = new GateGenericNLP(pluginHome, requiredNLPEntities) 
     iter.map { x => 
      val nlpGenericJson = nlpModule.generateNLPEntities(x._2) 
      val genericNLPObject = Utility.jsonToGenericNLP(nlpGenericJson) 
      (x._1, x._2, genericNLPObject) 

     } 
     } 

     val requiredNEREntities = new ArrayList[String]() 
     requiredNEREntities.add("DRUG") 
     requiredNEREntities.add("SE") 
     requiredNEREntities.add("REG") 
     requiredNEREntities.add("ALT_THERAPY") 
     requiredNEREntities.add("ALT_DRUG") 

     val nlpRDD = nlpGenericRDD.mapPartitions { iter => 
     val nerModule = new GateNERImpl(pluginHome, requiredNEREntities) 
     iter.map { x => 
      val nerJson = nerModule.generateNER(x._2, Utility.objectToJson(x._3)) 
      val nerJsonObject = Utility.jsonToGateNer(nerJson) 

      val nlpEntities: NLPEntitiesDAO = new NLPEntitiesDAO 
      nlpEntities.setToken(x._3.getToken()) 
      nlpEntities.setSpaceToken(x._3.getSpaceToken()) 
      nlpEntities.setSentence(x._3.getSentence()) 
      nlpEntities.setSplit(x._3.getSplit()) 
      nlpEntities.setVG(x._3.getVG) 
      nlpEntities.setNounChunk(x._3.getNounChunk) 

      nlpEntities.setDRUG(nerJsonObject.getDRUG()) 
      nlpEntities.setREG(nerJsonObject.getREG()) 
      nlpEntities.setSE(nerJsonObject.getSE()) 
      nlpEntities.setALT_DRUG(nerJsonObject.getALT_DRUG()) 
      nlpEntities.setALT_THERAPY(nerJsonObject.getALT_THERAPY()) 
      (x._1, nlpEntities) 
     } 
     } 

     //outputRDD.foreach(println) 

     val newRDD = nlpRDD.map { k => convertToPut(k) } 
     newRDD.saveAsNewAPIHadoopDataset(job.getConfiguration()) 
     return 0 

    } catch { 
     case e: MalformedURLException => { 
     e.printStackTrace() 
     return 1 
     } 
     case e: GateException => 
     { 
      e.printStackTrace() 
      return 1 
     } 

    } 
    } 

    def convertToPut(genericNlpWithRowKey: (String, NLPEntitiesDAO)): (ImmutableBytesWritable, Put) = { 
    val rowkey = genericNlpWithRowKey._1 
    val genericNLP = genericNlpWithRowKey._2 
    val put = new Put(Bytes.toBytes(rowkey)) 
    val genCFDataBytes = Bytes.toBytes("gen") 
    val nerCFDataBytes = Bytes.toBytes("ner") 
    val flagCFDataBytes = Bytes.toBytes("f") 

    put.add(genCFDataBytes, Bytes.toBytes("token"), 
     Bytes.toBytes(Utility.objectToJson((genericNLP.getToken())))); 
    put.add(genCFDataBytes, Bytes.toBytes("spaceToken"), 
     Bytes.toBytes(Utility.objectToJson((genericNLP.getSpaceToken())))); 
    put.add(genCFDataBytes, Bytes.toBytes("sentence"), 
     Bytes.toBytes(Utility.objectToJson((genericNLP.getSentence())))); 
    put.add(genCFDataBytes, Bytes.toBytes("verbGroup"), 
     Bytes.toBytes(Utility.objectToJson((genericNLP.getVG())))); 
    put.add(genCFDataBytes, Bytes.toBytes("split"), 
     Bytes.toBytes(Utility.objectToJson((genericNLP.getSplit())))); 
    put.add(genCFDataBytes, Bytes.toBytes("nounChunk"), 
     Bytes.toBytes(Utility.objectToJson((genericNLP.getNounChunk())))); 

    put.add(nerCFDataBytes, Bytes.toBytes("drug"), 
     Bytes.toBytes(Utility.objectToJson((genericNLP.getDRUG())))) 
    put.add(nerCFDataBytes, Bytes.toBytes("sideEffect"), 
     Bytes.toBytes(Utility.objectToJson((genericNLP.getSE())))) 
    put.add(nerCFDataBytes, Bytes.toBytes("regimen"), 
     Bytes.toBytes(Utility.objectToJson((genericNLP.getREG())))) 
    put.add(nerCFDataBytes, Bytes.toBytes("altTherapy"), 
     Bytes.toBytes(Utility.objectToJson((genericNLP.getALT_THERAPY())))) 
    put.add(nerCFDataBytes, Bytes.toBytes("altDrug"), 
     Bytes.toBytes(Utility.objectToJson((genericNLP.getALT_DRUG())))) 

    put.add(flagCFDataBytes, Bytes.toBytes("is_processed"), 
     Bytes.toBytes("1")) 
    put.add(flagCFDataBytes, Bytes.toBytes("dStatus"), 
     Bytes.toBytes("0")) 
    put.add(flagCFDataBytes, Bytes.toBytes("rStatus"), 
     Bytes.toBytes("0")) 
    put.add(flagCFDataBytes, Bytes.toBytes("adStatus"), 
     Bytes.toBytes("0")) 
    put.add(flagCFDataBytes, Bytes.toBytes("atStatus"), 
     Bytes.toBytes("0")) 

    (new ImmutableBytesWritable(Bytes.toBytes(rowkey)), put) 

    } 

    def pipeLineExecute(args: Array[String]): Int = { 

    var batchString = "" 
    val usage = "Usage: NLPAnnotationController" + " -inputTable tableName -outputTable tableName" + 
     " -batchId batchId/-newbatch \n" 
    if (args.length == 0) { 
     System.err.println(usage) 
     return -1 
    } 

    val conf = VocpConfiguration.create 
    for (i <- 0 until args.length by 2) { 
     if ("-inputTable" == args(i)) { 
     conf.set(VOCPConstants.INPUTTABLE, args(i + 1)) 
     } else if ("-outputTable" == args(i)) { 
     conf.set(VOCPConstants.OUTPUTTABLE, args(i + 1)) 
     } else if ("-batchId" == args(i)) { 
     batchString = args(i) 
     } else if ("-newbatch" == args(i)) { 
     batchString = "newbatch" 
     } else { 
     throw new IllegalArgumentException("arg " + args(i) + " not recognized") 
     } 
    } 
    val result = nlpAnnotationExtraction(conf, batchString) 
    result 

    } 

    def main(args: Array[String]) { 
    val res = pipeLineExecute(args) 
    System.exit(res) 
    } 

} 

ich den Code lief mit der Befehl

spark-submit --driver-memory 8g --executor-memory 4g --master yarn --deploy-mode client --name NLPEntitiesGeneration --conf "spark.app.id=NLPEntitiesGeneration" target/vopEngine-0.0.1-SNAPSHOT-jar-with-dependencies.jar -inputTable parseddata_sample -outputTable parseddata_sample -newbatch 

Das Verzeichnis/home/centos/vopEngine1_2/lib/plugins/ANNIE/existiert. Was könnte der mögliche Grund für die Ausnahme sein?

Vielen Dank im Voraus

EDIT: - Die Datei Ausnahme nach dem Kopieren der Plugins-Ordner/tmp nicht gefunden ging. Es erscheint jedoch eine neue Ausnahme mit den folgenden Details.

org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException: Failed 1 action: 9e941de5-84a7-43f4-9c8e-8c859976d9e1: 1 time, 
    at org.apache.hadoop.hbase.client.AsyncProcess$BatchErrors.makeException(AsyncProcess.java:247) 
    at org.apache.hadoop.hbase.client.AsyncProcess$BatchErrors.access$1800(AsyncProcess.java:227) 
    at org.apache.hadoop.hbase.client.AsyncProcess.waitForAllPreviousOpsAndReset(AsyncProcess.java:1766) 
    at org.apache.hadoop.hbase.client.BufferedMutatorImpl.backgroundFlushCommits(BufferedMutatorImpl.java:240) 
    at org.apache.hadoop.hbase.client.BufferedMutatorImpl.mutate(BufferedMutatorImpl.java:146) 
    at org.apache.hadoop.hbase.client.BufferedMutatorImpl.mutate(BufferedMutatorImpl.java:113) 
    at org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat$MultiTableRecordWriter.write(MultiTableOutputFormat.java:148) 
    at org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat$MultiTableRecordWriter.write(MultiTableOutputFormat.java:76) 
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply$mcV$sp(PairRDDFunctions.scala:1113) 
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply(PairRDDFunctions.scala:1111) 
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply(PairRDDFunctions.scala:1111) 
    at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1251) 
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1119) 
    at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1091) 
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) 
    at org.apache.spark.scheduler.Task.run(Task.scala:89) 
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) 
    at java.lang.Thread.run(Thread.java:745) 
    Suppressed: org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException: Failed 1 action: 9ca384fc-580c-4198-985c-6acc2af5f404: 1 time, 
     at org.apache.hadoop.hbase.client.AsyncProcess$BatchErrors.makeException(AsyncProcess.java:247) 
     at org.apache.hadoop.hbase.client.AsyncProcess$BatchErrors.access$1800(AsyncProcess.java:227) 
     at org.apache.hadoop.hbase.client.AsyncProcess.waitForAllPreviousOpsAndReset(AsyncProcess.java:1766) 
     at org.apache.hadoop.hbase.client.BufferedMutatorImpl.backgroundFlushCommits(BufferedMutatorImpl.java:240) 
     at org.apache.hadoop.hbase.client.BufferedMutatorImpl.close(BufferedMutatorImpl.java:163) 
     at org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat$MultiTableRecordWriter.close(MultiTableOutputFormat.java:123) 
     at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$5.apply$mcV$sp(PairRDDFunctions.scala:1120) 
     at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1260) 
     ... 8 more 

Garn Protokolle zeigt die folgende Fehlermeldung:

ERROR client.AsyncProcess: Failed to get region location 
org.apache.hadoop.hbase.TableNotFoundException: 9e941de5-84a7-43f4-9c8e-8c859976d9e1 
+1

wo '- -class com.scryAnalytics.vopEngine.NLPProcessingLog' in Ihrem Spark-Submit-Befehl – toofrellik

+0

Ich habe es in meiner Pom-Datei erwähnt. – wadhwasahil

Antwort

0

Verwendung dieser Pfad: file: /// home/CentOS/vopEngine1_2/lib/plugins/ANNIE/

+0

Der Fehler (diesmal neu) tritt auf: gate.util.GateException: konnte nicht finden: file:/mnt/garn/nm/usercache/centos/appcache/application_1480573656109_0046/container_1480573656109_0046_01_000002/file:/home/centos/vopEngine1_2/lib/plugins/ANNIE/ – wadhwasahil

+0

was ist diese Adresse? Ich schlage vor, dass Sie die gesamte Dateiadresse in den Pfad/tmp setzen und alle Dateien in/tmp ablegen. – vahid

+0

Ich denke, Garn erstellt einen temporären Ordner in seinem Pfad. Das ANNIE-Verzeichnis wird jedoch nicht gefunden. – wadhwasahil

Verwandte Themen