2017-09-16 3 views
0

Ich versuche, Facebook-Daten mit Skript von folgendem Link zu kratzen. Aber ich bekomme immer folgenden Fehler beim Scannen von 2000 Status. Bitte, führe mich, was der Fehler ist. Vielen Dank.Facebook Grafik api keyerror 'Weiter'

Error>>> Traceback (most recent call last): File "posts.py", line 191, in scrapeFacebookPageFeedStatus(page_id, access_token) File "posts.py", line 181, in scrapeFacebookPageFeedStatus statuses['paging']['next'])) KeyError: 'next'

Script Link->>https://nocodewebscraping.com/facebook-scraper/

Script >>

import urllib2 
import json 
import datetime 
import csv 
import time 



page_id = raw_input("Please Paste Public Page Name:") 



access_token = raw_input("Please Paste Your Access Token:") 

def request_until_succeed(url): 
    req = urllib2.Request(url) 
    success = False 
    while success is False: 
     try: 
      response = urllib2.urlopen(req) 
      if response.getcode() == 200: 
       success = True 
     except Exception, e: 
      print e 
      time.sleep(5) 

      print "Error for URL %s: %s" % (url, datetime.datetime.now()) 
      print "Retrying." 

    return response.read() 


def unicode_normalize(text): 
    return text.translate({ 0x2018:0x27, 0x2019:0x27, 0x201C:0x22, 0x201D:0x22, 
          0xa0:0x20 }).encode('utf-8') 

def getFacebookPageFeedData(page_id, access_token, num_statuses): 


    base = "https://graph.facebook.com/v2.6" 
    node = "/%s/posts" % page_id 
    fields = "/?fields=message,link,permalink_url,created_time,type,name,id," + \ 
      "comments.limit(0).summary(true),shares,reactions" + \ 
      ".limit(0).summary(true)" 
    parameters = "&limit=%s&access_token=%s" % (num_statuses, access_token) 
    url = base + node + fields + parameters 


    data = json.loads(request_until_succeed(url)) 

    return data 

def getReactionsForStatus(status_id, access_token): 



    base = "https://graph.facebook.com/v2.6" 
    node = "/%s" % status_id 
    reactions = "/?fields=" \ 
      "reactions.type(LIKE).limit(0).summary(total_count).as(like)" \ 
      ",reactions.type(LOVE).limit(0).summary(total_count).as(love)" \ 
      ",reactions.type(WOW).limit(0).summary(total_count).as(wow)" \ 
      ",reactions.type(HAHA).limit(0).summary(total_count).as(haha)" \ 
      ",reactions.type(SAD).limit(0).summary(total_count).as(sad)" \ 
      ",reactions.type(ANGRY).limit(0).summary(total_count).as(angry)" 
    parameters = "&access_token=%s" % access_token 
    url = base + node + reactions + parameters 


    data = json.loads(request_until_succeed(url)) 

    return data 


def processFacebookPageFeedStatus(status, access_token): 



    status_id = status['id'] 
    status_message = '' if 'message' not in status.keys() else \ 
      unicode_normalize(status['message']) 
    link_name = '' if 'name' not in status.keys() else \ 
      unicode_normalize(status['name']) 
    status_type = status['type'] 
    status_link = '' if 'link' not in status.keys() else \ 
      unicode_normalize(status['link']) 
    status_permalink_url = '' if 'permalink_url' not in status.keys() else \ 
      unicode_normalize(status['permalink_url']) 


    status_published = datetime.datetime.strptime(
      status['created_time'],'%Y-%m-%dT%H:%M:%S+0000') 
    status_published = status_published + \ 
      datetime.timedelta(hours=-5) 
    status_published = status_published.strftime(
      '%Y-%m-%d %H:%M:%S') 



    num_reactions = 0 if 'reactions' not in status else \ 
      status['reactions']['summary']['total_count'] 
    num_comments = 0 if 'comments' not in status else \ 
      status['comments']['summary']['total_count'] 
    num_shares = 0 if 'shares' not in status else status['shares']['count'] 



    reactions = getReactionsForStatus(status_id, access_token) if \ 
      status_published > '2016-02-24 00:00:00' else {} 

    num_likes = 0 if 'like' not in reactions else \ 
      reactions['like']['summary']['total_count'] 



    num_likes = num_reactions if status_published < '2016-02-24 00:00:00' \ 
      else num_likes 

    def get_num_total_reactions(reaction_type, reactions): 
     if reaction_type not in reactions: 
      return 0 
     else: 
      return reactions[reaction_type]['summary']['total_count'] 

    num_loves = get_num_total_reactions('love', reactions) 
    num_wows = get_num_total_reactions('wow', reactions) 
    num_hahas = get_num_total_reactions('haha', reactions) 
    num_sads = get_num_total_reactions('sad', reactions) 
    num_angrys = get_num_total_reactions('angry', reactions) 



    return (status_id, status_message, link_name, status_type, status_link, status_permalink_url, 
      status_published, num_reactions, num_comments, num_shares, 
      num_likes, num_loves, num_wows, num_hahas, num_sads, num_angrys) 

def scrapeFacebookPageFeedStatus(page_id, access_token): 
    with open('%s_facebook_statuses.csv' % page_id, 'wb') as file: 
     w = csv.writer(file) 
     w.writerow(["status_id", "status_message", "link_name", "status_type", 
        "status_link", "permalink_url", "status_published", "num_reactions", 
        "num_comments", "num_shares", "num_likes", "num_loves", 
        "num_wows", "num_hahas", "num_sads", "num_angrys"]) 

     has_next_page = True 
     num_processed = 0 
     scrape_starttime = datetime.datetime.now() 

     print "Scraping %s Facebook Page: %s\n" % (page_id, scrape_starttime) 

     statuses = getFacebookPageFeedData(page_id, access_token, 100) 

     while has_next_page: 
      for status in statuses['data']: 


       if 'reactions' in status: 
        w.writerow(processFacebookPageFeedStatus(status, 
         access_token)) 


       num_processed += 1 
       if num_processed % 100 == 0: 
        print "%s Statuses Processed: %s" % \ 
         (num_processed, datetime.datetime.now()) 


      if 'paging' in statuses.keys(): 
       statuses = json.loads(request_until_succeed(
             statuses['paging']['next'])) 
      else: 
       has_next_page = False 


     print "\nDone!\n%s Statuses Processed in %s" % \ 
       (num_processed, datetime.datetime.now() - scrape_starttime) 


if __name__ == '__main__': 
    scrapeFacebookPageFeedStatus(page_id, access_token) 
+1

Sie sollten Ihren eigentlichen Code schreiben, wird es erleichtern helfen. Aus einer hohen Sicht besteht das Problem darin, dass 'status ['paging']' keinen 'next' Key hat. Was wahrscheinlich bedeutet, dass Sie die letzte Seite erreicht haben. – AMC

+0

Ich bearbeitet mit tatsächlichem Code und ich würde gerne wissen, wie kann ich 'Next' Schlüssel Fehler korrigieren, wenn die letzte Seite erreicht, um Daten im CSV-Format auszudrucken. Vielen Dank. – Morris

Antwort

1

Änderung dieser Zustand if 'paging' in statuses.keys(): mit

if 'paging' in statuses.keys() and 'next' in statuses['paging'] and statuses['paging']['next']: 
Verwandte Themen