auf str Hier ist mein Code:Python: Kann nicht 'Bytes' konvertieren Objekt implizit
class ReviewCategoryClassifier(object):
@classmethod
def load_data(cls, input_file):
job = category_predictor.CategoryPredictor()
category_counts = None
word_counts = {}
with open(input_file) as src:
for line in src:
category, counts = job.parse_output_line(line)
def __init__(self, input_file):
"""input_file: the output of the CategoryPredictor job."""
category_counts, word_counts = self.load_data(input_file)
self.word_given_cat_prob = {}
for cat, counts in word_counts.iteritems():
self.word_given_cat_prob[cat] = self.normalize_counts(counts)
# filter out categories which have no words
seen_categories = set(word_counts)
seen_category_counts = dict((cat, count) for cat, count in
category_counts.iteritems() \
if cat in seen_categories)
self.category_prob= self.normalize_counts(
seen_category_counts)
if __name__ == "__main__":
input_file = sys.argv[1]
text = sys.argv[2]
guesses = ReviewCategoryClassifier(input_file).classify(text)
btw CategoryPredictor() ist ein mrjob Projekt.
Jedes Mal, wenn ich in
getipptpython predict.py yelp_academic_dataset_review.json 'I like donut'
in der Befehlszeile, hat es einen Fehler, der sagt:
TypeError: Can't convert 'bytes' object to str implicitly
Aber Linie ist eine Zeichenfolge stattdessen ein Byte-Objekt. Was habe ich falsch gemacht?
Hier ist das vollständige Zurückverfolgungs
Traceback (most recent call last):
File "predict.py", line 116, in <module>
guesses = ReviewCategoryClassifier(input_file).classify(text)
File "predict.py", line 65, in __init__
category_counts, word_counts = self.load_data(input_file)
File "predict.py", line 44, in load_data
category, counts = job.parse_output_line(line)
File "//anaconda/lib/python3.5/site-packages/mrjob/job.py", line 961, in
parse_output_line
return self.output_protocol().read(line)
File "//anaconda/lib/python3.5/site-packages/mrjob/protocol.py", line 84, in
read
raw_key, raw_value = line.split(b'\t', 1)
TypeError: Can't convert 'bytes' object to str implicitly
Die 'b' \ t'' a Bytes anstelle ein 'str' ist. Versuchen Sie '' open (Eingabe_Datei, 'r', encoding = "utf-8") als src' zu verwenden. – stamaimer