2016-04-15 3 views
0
Here is my SentiWorNet Algo: 



public class SWN3 { 

private String pathToSWN = "C:/Users/RAHUL/Desktop/SWN/SentiWordNet_3.0.0.txt"; 
     private HashMap<String, Double>_dict; 

     public SWN3(){ 

      _dict = new HashMap<String, Double>(); 
      HashMap<String, Vector<Double>> _temp = new HashMap<String, Vector<Double>>(); 
      try{ 
       BufferedReader csv = new BufferedReader(new FileReader(pathToSWN)); 
       String line = "";   
       while((line = csv.readLine()) != null) 
       { 
        String[] data = line.split("\t"); 
        Double score = Double.parseDouble(data[2])-Double.parseDouble(data[3]); 
        String[] words = data[4].split(" "); 
        for(String w:words) 
        { 
         String[] w_n = w.split("#"); 
         w_n[0] += "#"+data[0]; 
         int index = Integer.parseInt(w_n[1])-1; 
         if(_temp.containsKey(w_n[0])) 
         { 
          Vector<Double> v = _temp.get(w_n[0]); 
          if(index>v.size()) 
           for(int i = v.size();i<index; i++) 
            v.add(0.0); 
          v.add(index, score); 
          _temp.put(w_n[0], v); 
         } 
         else 
         { 
          Vector<Double> v = new Vector<Double>(); 
          for(int i = 0;i<index; i++) 
           v.add(0.0); 
          v.add(index, score); 
          _temp.put(w_n[0], v); 
         } 
        } 
       } 
       Set<String> temp = _temp.keySet(); 
       for (Iterator<String> iterator = temp.iterator(); iterator.hasNext();) { 
        String word = iterator.next(); 
        Vector<Double> v = _temp.get(word); 
        double score = 0.0; 
        double sum = 0.0; 
        for(int i = 0; i < v.size(); i++) 
         score += ((double)1/(double)(i+1))*v.get(i); 
        for(int i = 1; i<=v.size(); i++) 
         sum += (double)1/(double)i; 
        score /= sum; 
        String sent = "";    
        if(score>=0.75) 
         sent = "strong_positive"; 
        else 
        if(score > 0.50 && score<0.75) 
         sent = "moderately_positive"; 
        else 
         if(score > 0.25 && score>=0.50) 
          sent = "positive"; 
        else 
        if(score > 0 && score>=0.25) 
         sent = "weak_positive"; 
        else 
        if(score < 0 && score>=-0.25) 
         sent = "weak_negative"; 
        else 
        if(score < -0.25 && score>=-0.5) 
         sent = "negative"; 
        else 
         if(score < -0.50 && score>-0.75) 
          sent = "moderately_negative"; 
        else 
        if(score<=-0.75) 
         sent = "strong_negative"; 
        _dict.put(word, score); 
       } 
      } 
      catch(Exception e){e.printStackTrace();}   
     } 

public Double extract(String word) 
{ 
    Double total = new Double(0); 
    if(_dict.get(word+"#n") != null) 
     total = _dict.get(word+"#n") + total; 
    if(_dict.get(word+"#a") != null) 
     total = _dict.get(word+"#a") + total; 
    if(_dict.get(word+"#r") != null) 
     total = _dict.get(word+"#r") + total; 
    if(_dict.get(word+"#v") != null) 
     total = _dict.get(word+"#v") + total; 
    return total; 
} 



public static String SentiWord(String stri) { 
    SWN3 test = new SWN3(); 
    String sentence=stri; 
    String[] words = sentence.split("\\s+"); 
    double totalScore = 0; 
    for(String word : words) { 
     word = word.replaceAll("([^a-zA-Z\\s])", ""); 
     if (test.extract(word) == null) 
      continue; 
     totalScore += test.extract(word); 
    } 

    String sent = "";    
    if(totalScore>=0.75) 
     sent = "strong_positive"; 
    else 
    if(totalScore > 0.25 && totalScore<0.75) 
     sent = "positive"; 
    .... 
    .... 

    return sent; 
} 

} 

Und hier ist mein Po Tagger Methode zu integrieren. Momentan funktioniert dieser SentiwordNet-Code ohne Pos-Tags, aber keine guten Ergebnisse. Ich finde es einfach nicht heraus. Bitte helfen Sie.Wie POS-Tagger mit SentiWordNet Algorithmus

Antwort

0

Sie könnten Ihre extract Methode in SWN3 wie folgt anpassen:

public Double extract(String word, String tail) { 
    if (tail.contains("NN") || tail.contains("NNS") 
      || tail.contains("NNP") 
      || tail.contains("NNPS")) 
     return _dict.get(word + "#n"); 
    else if (tail.contains("VB") || tail.contains("VBD") 
      || tail.contains("VBG") || tail.contains("VBN") 
      || tail.contains("VBP") || tail.contains("VBZ")) 
     return _dict.get(word + "#v"); 
    else if (tail.contains("JJ") || tail.contains("JJR") 
      || tail.contains("JJS")) 
     return _dict.get(word + "#a"); 
    else if (tail.contains("RB") || tail.contains("RBR") 
      || tail.contains("RBS")) 
     return _dict.get(word + "#r"); 
    else 
     return null; 
} 

Sie der tags mit den Typen von Wörtern Karten wie in SentiWordNet definiert. Ich schlage vor, Ihre Haupt-Methode wie folgt zu ändern:

public static void main(String[] args) { 
    MaxentTagger tagger = new MaxentTagger("files/english-left3words-distsim.tagger"); 

    //String sample = "This is a sample text"; 
    String sample = "It works much better with this great example!"; 
    sample = sample.replaceAll("([^a-zA-Z\\s])", ""); 
    String[] words = sample.split("\\s+"); 

    String taggedSample = tagger.tagString(sample); 
    String[] taggedWords = taggedSample.split("\\s+"); 
    System.out.println(tagger.tagString(sample)); 

    double totalScore = 0; 
    SWN3 test = new SWN3(); 
    System.out.println("-----------"); 
    for (int i=0; i<taggedWords.length;i++) { 
     String tail = taggedWords[i].substring(words[i].length() + 1); 
     Double score = null; 
     if(tail!=null{ 
      score = test.extract(words[i], tail); 
      System.out.println(taggedWords[i] + "\t" + words[i] + "\t" + tail + "\t" + score); 
     } 
     if (score == null) 
      continue; 
     totalScore += score; 
    } 
    System.out.println("-----------"); 
    System.out.println(totalScore); 
} 

ich in sample einen anderen Satz verwendet, wo es besser funktioniert. Beachten Sie, dass das individuelle Markieren des Satzes und das Markieren von Wörtern zu unterschiedlichen Ergebnissen führen kann.

Ich hoffe es hilft.

+0

Beantwortet es Ihre Frage? – joel314

+1

Ja, tut es. Das Programm hat funktioniert und liefert definitiv bessere Ergebnisse als zuvor. Vielen Dank für die Zeit nehmen :) Viele Grüße –

+0

Extract-Methode ist Null Zeiger-Ausnahme in Zeile 1 dh manchmal bei ---> if (tail.contains ("NN") || tail.contains ("NNS"). Bitte helfen: ( –