Ich implementiere Naive Bayesian Klassifizierer mit NLTK. Aber wenn ich Klassifizierer mit extrahierten Merkmalen trainiere, gibt es einen Fehler "zu viele Werte zum Entpacken". Ich bin nur Anfänger zu Python. Hier ist Code. Programm liest Text aus Dateien und extrahiert Features aus diesen Dateien.Sentimentklassifizierung mit NLTK Naive Baysian Klassifizierer
import nltk.classify.util,os,sys;
from nltk.classify import NaiveBayesClassifier;
from nltk.corpus import stopwords;
from nltk.tokenize import word_tokenize,RegexpTokenizer;
import re;
TAG_RE = re.compile(r'<[^>]+>')
def remove_tags(text):
return TAG_RE.sub('', text)
def word_feats(words):
return dict([(word,True) for word in words])
def feature_extractor(sentiment):
path = "train/"+sentiment+"/"
files = os.listdir(path);
feats = {};
i = 0;
for file in files:
f = open(path+file,"r", encoding='utf-8');
review = f.read();
review = remove_tags(review);
stopWords = (stopwords.words("english"))
tokenizer = RegexpTokenizer(r"\w+");
tokens = tokenizer.tokenize(review);
features = word_feats(tokens);
feats.update(features)
return feats;
posative_feat = feature_extractor("pos");
p = open("posFeat.txt","w", encoding='utf-8');
p.write(str(posative_feat));
negative_feat = feature_extractor("neg");
n = open("negFeat.txt","w", encoding='utf-8');
n.write(str(negative_feat));
plength = int(len(posative_feat)*3/4);
nlength = int(len(negative_feat)*3/4)
totalLength = plength+nlength;
trainFeatList = {}
testFeatList = {}
i = 0
for items in posative_feat.items():
i +=1;
value = {items[0]:items[1]}
if(i<plength):
trainFeatList.update(value);
else:
testFeatList.update(value);
j = 0
for items in negative_feat.items():
j +=1;
value = {items[0]:items[1]}
if(j<plength):
trainFeatList.update(value);
else:
testFeatList.update(value);
classifier = NaiveBayesClassifier.train(trainFeatList)
print(nltk.classify.util.accuracy(classifier,testFeatList));
classifier.show_most_informative_features();
Mögliches Duplikat [NLTK Genauigkeit: "Valueerror: zu viele Werte entpacken"] (http://stackoverflow.com/questions/31920199/nltk-accuracy-valueerror-too-many-values-to-unpack) – Pierre