2017-02-15 2 views
-3

Ich bekomme immer einen Fehler, wenn ich dieses Python-Programm ausführen, Es heißt, ich habe keine Datei oder Verzeichnis '/path/to/times-testing.log'. Ich verstehe das nicht, kann mir jemand helfen, dieses Problem zu beheben. Vielen Dank im Voraus!Fehler in Python-Skript?

Heres der Code:

import urllib2 
import json 
import datetime 
import time 
import sys, os 
import logging 
from urllib2 import HTTPError 
from ConfigParser import SafeConfigParser 


# helper function to iterate through dates 
def daterange(start_date, end_date): 
    if start_date <= end_date: 
     for n in range((end_date - start_date).days + 1): 
      yield start_date + datetime.timedelta(n) 
    else: 
     for n in range((start_date - end_date).days + 1): 
      yield start_date - datetime.timedelta(n) 

# helper function to get json into a form I can work with  
def convert(input): 
    if isinstance(input, dict): 
     return {convert(key): convert(value) for key, value in input.iteritems()} 
    elif isinstance(input, list): 
     return [convert(element) for element in input] 
    elif isinstance(input, unicode): 
     return input.encode('utf-8') 
    else: 
     return input 

# helpful function to figure out what to name individual JSON files   
def getJsonFileName(date, page, json_file_path): 
    json_file_name = ".".join([date,str(page),'json']) 
    json_file_name = "".join([json_file_path,json_file_name]) 
    return json_file_name 

# helpful function for processing keywords, mostly  
def getMultiples(items, key): 
    values_list = "" 
    if len(items) > 0: 
     num_keys = 0 
     for item in items: 
      if num_keys == 0: 
       values_list = item[key]     
      else: 
       values_list = "; ".join([values_list,item[key]]) 
      num_keys += 1 
    return values_list 

# get the articles from the NYTimes Article API  
def getArticles(date, query, api_key, json_file_path): 
    # LOOP THROUGH THE 101 PAGES NYTIMES ALLOWS FOR THAT DATE 
    for page in range(101): 
     for n in range(5): # 5 tries 
      try: 
       request_string = "http://api.nytimes.com/svc/search/v2/articlesearch.json?begin_date=" + date + "&end_date=" + date + "&page=" + str(page) + "&api-key=" + api_key 
       response = urllib2.urlopen(request_string) 
       content = response.read() 
       if content: 
        articles = convert(json.loads(content)) 
        # if there are articles here 
        if len(articles["response"]["docs"]) >= 1: 
         json_file_name = getJsonFileName(date, page, json_file_path) 
         json_file = open(json_file_name, 'w') 
         json_file.write(content) 
         json_file.close() 
        # if no more articles, go to next date 
        else: 
         return 
       time.sleep(3) # wait so we don't overwhelm the API 
      except HTTPError as e: 
       logging.error("HTTPError on page %s on %s (err no. %s: %s) Here's the URL of the call: %s", page, date, e.code, e.reason, request_string) 
       if e.code == 403: 
        print "Script hit a snag and got an HTTPError 403. Check your log file for more info." 
        return 
       if e.code == 429: 
        print "Waiting. You've probably reached an API limit." 
        time.sleep(30) # wait 30 seconds and try again 
      except: 
       logging.error("Error on %s page %s: %s", date, file_number, sys.exc_info()[0]) 
       continue 

# parse the JSON files you stored into a tab-delimited file 
def parseArticles(date, tsv_file_name, json_file_path): 

    for file_number in range(101): 
     # get the articles and put them into a dictionary 
     try: 
      file_name = getJsonFileName(date,file_number, json_file_path) 
      if os.path.isfile(file_name): 
       in_file = open(file_name, 'r') 
       articles = convert(json.loads(in_file.read())) 
       in_file.close() 
      else: 
       break 
     except IOError as e: 
      logging.error("IOError in %s page %s: %s %s", date, file_number, e.errno, e.strerror) 
      continue 

     # if there are articles in that document, parse them 
     if len(articles["response"]["docs"]) >= 1: 

      # open the tsv for appending 
      try: 
       out_file = open(tsv_file_name, 'ab') 

      except IOError as e: 
       logging.error("IOError: %s %s %s %s", date, file_number, e.errno, e.strerror) 
       continue 

      # loop through the articles putting what we need in a tsv 
      try: 
       for article in articles["response"]["docs"]: 
        # if (article["source"] == "The New York Times" and article["document_type"] == "article"): 
        keywords = "" 
        keywords = getMultiples(article["keywords"],"value") 

        # should probably pull these if/else checks into a module 
        variables = [ 
         article["pub_date"], 
         keywords, 
         str(article["headline"]["main"]).decode("utf8").replace("\n","") if "main" in article["headline"].keys() else "", 
         str(article["source"]).decode("utf8") if "source" in article.keys() else "", 
         str(article["document_type"]).decode("utf8") if "document_type" in article.keys() else "", 
         article["web_url"] if "web_url" in article.keys() else "", 
         str(article["news_desk"]).decode("utf8") if "news_desk" in article.keys() else "", 
         str(article["section_name"]).decode("utf8") if "section_name" in article.keys() else "", 
         str(article["snippet"]).decode("utf8").replace("\n","") if "snippet" in article.keys() else "", 
         str(article["lead_paragraph"]).decode("utf8").replace("\n","") if "lead_paragraph" in article.keys() else "", 
         ] 
        line = "\t".join(variables) 
        out_file.write(line.encode("utf8")+"\n") 
      except KeyError as e: 
       logging.error("KeyError in %s page %s: %s %s", date, file_number, e.errno, e.strerror) 
       continue 
      except (KeyboardInterrupt, SystemExit): 
       raise 
      except: 
       logging.error("Error on %s page %s: %s", date, file_number, sys.exc_info()[0]) 
       continue 

      out_file.close() 
     else: 
      break 

# Main function where stuff gets done 

def main(): 

    config = SafeConfigParser() 
    script_dir = os.path.dirname(__file__) 
    config_file = os.path.join(script_dir, 'config/settings.cfg') 
    config.read(config_file) 

    json_file_path = config.get('files','json_folder') 
    tsv_file_name = config.get('files','tsv_file') 
    log_file = config.get('files','logfile') 

    api_key = config.get('nytimes','api_key')  
    start = datetime.date(year = int(config.get('nytimes','start_year')), month = int(config.get('nytimes','start_month')), day = int(config.get('nytimes','start_day'))) 
    end = datetime.date(year = int(config.get('nytimes','end_year')), month = int(config.get('nytimes','end_month')), day = int(config.get('nytimes','end_day'))) 
    query = config.get('nytimes','query') 

    logging.basicConfig(filename=log_file, level=logging.INFO) 

    logging.info("Getting started.") 
    try: 
     # LOOP THROUGH THE SPECIFIED DATES 
     for date in daterange(start, end): 
      date = date.strftime("%Y%m%d") 
      logging.info("Working on %s." % date) 
      getArticles(date, query, api_key, json_file_path) 
      parseArticles(date, tsv_file_name, json_file_path) 
    except: 
     logging.error("Unexpected error: %s", str(sys.exc_info()[0])) 
    finally: 
     logging.info("Finished.") 

if __name__ == '__main__' : 
    main() 

Es erzeugt den folgenden Fehler, wenn es kompiliert:

Rakeshs-MacBook-Air:get-nytimes-articles-master niharika$ python getTimesArticles.py 
Traceback (most recent call last): 
    File "getTimesArticles.py", line 180, in <module> 
    main() 
    File "getTimesArticles.py", line 164, in main 
    logging.basicConfig(filename=log_file, level=logging.INFO) 
    File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.py", line 1545, in basicConfig 
    hdlr = FileHandler(filename, mode) 
    File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.py", line 911, in __init__ 
    StreamHandler.__init__(self, self._open()) 
    File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.py", line 941, in _open 
    stream = open(self.baseFilename, self.mode) 
IOError: [Errno 2] No such file or directory: '/path/to/times-testing.log' 
Rakeshs-MacBook-Air:get-nytimes-articles-master niharika$ 
+0

maby die Logdatei existiert nicht die Logdatei berühren 'touch /path/to/times-testing.log' – mtt2p

+0

Was ist der Inhalt der Datei config/settings.cfg? Welchen Wert hat insbesondere die Schlüsselprotokolldatei? Ist die entsprechende Datei auf der Festplatte vorhanden? – Antwane

+0

Ja, ich habe settings.cfc unter /desktop/nytpy/config/settings.cfc –

Antwort

0

Ihre main() Funktion:

def main(): 

    config = SafeConfigParser() 
    script_dir = os.path.dirname(__file__) 
    config_file = os.path.join(script_dir, 'config/settings.cfg') 
    config.read(config_file) 
    ... 
    log_file = config.get('files','logfile') 
    ... 
    logging.basicConfig(filename=log_file, level=logging.INFO) 

öffnet die config/settings.cfg Datei und ruft den Namen der Protokolldatei, diescheint 10. Sie müssen entweder diesen Ordner erstellen (wahrscheinlich nicht die beste Idee) oder ihn so konfigurieren, dass er auf die richtige Datei verweist.

+0

Vielen Dank, Ihre Antwort ist sehr aufschlussreich, ich habe die Datei settings.cfg gefunden, es gibt einen Parameter, der angibt, logfile = /path/to/times-testing.log json_folder =/full/path/ tsv_file =/full/path/output.tsv, sollte ich neue leere Dateien erstellen, zB output.tsv und times-testing.log Dateien und den Pfad hinzufügen? Was sollte ich für den Parameter Json_folder einstellen, Danke! –

+0

Nun, es liegt ganz bei Ihnen und wo Sie diese Dateien finden wollen, wirklich. :) Normalerweise werden Protokolle auf UNIX-Systemen (z. B. macOS) im Ordner "/ var/log /" gespeichert. – sxn

Verwandte Themen