2017-06-01 4 views
0

Also hier habe ich die folgende Liste und Wörterbücher:Mapping Wörterbücher in verschiedenen Liste

{'linked': {'instructors.v1': 
       [{'id': '3219339', 'fullName': 'Lisa Mazzola'}, 
        {'id': '6407572', 'fullName': 'Alan S. Miller '}, 
        {'id': '226710', 'fullName': 'Kevin Werbach'}, 
        {'id': '8054217', 'fullName': '许 肖潇'}, 
        {'id': '20696355', 'fullName': 'Варшалович Дмитрий Александрович'}, 
        {'id': '15622422', 'fullName': 'Prof. James Evans'}}], 

'elements': 
    [{ 'id': '69Bku0KoEeWZtA4u62x6lQ', 'name': 'Gamification','instructorIds': '226710'}] 

} 

Ich versuche, die ‚fullname‘ aus ‚instructors.v1‘ mit den ‚instructorIds‘ assoziiert zu erhalten, von den ‚Elementen 'indem man beides zusammenbringt. Mein Ansatz -> ein anderes Wörterbuch wie folgt erstellt:

{'3219339': 'Lisa Mazzola'} 
{'6407572': 'Alan S. Miller'} 
{'226710': 'Kevin Werbach'} 

Das gibt mir eine KeyError: „‚226710‘“, obwohl 226.710 nicht in der list.Please vorschlagen existieren einen anderen Ansatz, sonst, wo gehe ich hin falsch?

hier ist der Python-Code für Ihre Referenz:

import imp 
import importlib 
import requests 
import json 
import re 
from bs4 import BeautifulSoup 
import csv 
import sys 
import urllib.request 
from importlib import reload 

if __name__ == "__main__": 
headers = ({ 
    "x-user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 
(KHTML, like Gecko) Chrome/53.0.2785.92 Safari/537.36 
FKUA/website/41/website/Desktop"}) 

url = "https://api.coursera.org/api/courses.v1?start=0&limit=20&includes=instructorIds,partnerIds,specializations,s12nlds,v1Details,v2Details&fields=instructorIds,partnerIds,specializations,s12nlds,description" 
data = requests.get(url).json() 
# print(len(data['elements'])) 
print(data) 

with open("courserarough1.csv", 'a') as f: 
             ##### Header ##### 

    header = f.write(
     'instructors' + ',' + 'courseURL' + ',' + 'courseType' + ',' + 
    'CourseName' + ',' + 'partnerName' + ',' + 
     'slug' + ',' + 'specializations' + ',' + 'course_id' + ',' + 
    'description' + ',' + "\n") 


    for n in range(len(data['linked']['instructors.v1'])): 
     instructors = data['linked']['instructors.v1'][n]['fullName'] 
     instructors = str(instructors) 
     instructors = instructors.strip().replace(',', '') 

     instructorsid = data['linked']['instructors.v1'][n]['id'] 
     instructorsid = str(instructorsid) 
     instructorsid = instructorsid.strip().replace(',', '') 
     newdict = dict([(instructorsid,instructors)]) 
     print(newdict) 
    #print(data['linked']['instructors.v1']) 

    partnerlist = [] 
    for m in range(len(data['linked']['partners.v1'])): 
     partnerName = data['linked']['partners.v1'][m]['name'] 
     partnerName = str(partnerName) 
     partnerid = data['linked']['partners.v1'][m]['id'] 
     partnerid = str(partnerid) 

     partnerlist.append(partnerid) 

    #print(partnerlist) 


    for i in range(len(data['elements'])): 

     partnerIds = data['elements'][i]['partnerIds'] 
     #filtered = data[(np.where(partnerlist.__contains__(partnerIds)))] 
     #print(filtered) 
     courseType = data['elements'][i]['courseType'] 
     courseType = str(courseType) 
     if courseType: 
      courseType = courseType.rstrip().replace('v2.', '') 
     else: 
      courseType = ' ' 
     # print(courseType) 
     CourseName = data['elements'][i]['name'] 
     CourseName = str(CourseName) 

     CourseName = CourseName.strip().replace(',', '') 

     partnerIds = data['elements'][i]['partnerIds'] 
     partnerIds = str(partnerIds) 
     if partnerIds: 
      partnerIds = partnerIds.rstrip().replace(',', '') 
      partnerIds = partnerIds.rstrip().replace('\n', '') 
      partnerIds = partnerIds.rstrip().replace('u', '') 
      partnerIds = partnerIds.rstrip().replace('[', '') 
      partnerIds = partnerIds.rstrip().replace(']', '') 
     else: 
      partnerIds = ' ' 

     slug = data['elements'][i]['slug'] 
     slug = str(slug) 
     # print(slug) 
     specializations = data['elements'][i]['specializations'] 
     specializations = str(specializations) 
     if specializations: 
      specializations = specializations.rstrip().replace(',', '') 
      specializations = specializations.rstrip().replace('\n', '') 
      specializations = specializations.rstrip().replace('u', '') 
      specializations = specializations.rstrip().replace('[', '') 
      specializations = specializations.rstrip().replace(']', '') 
     else: 
      specializations = ' ' 

     course_id = data['elements'][i]['id'] 
     course_id = str(course_id) 

     description = data['elements'][i]['description'] 
     description = str(description) 

     if description: 
      description = description.strip().replace(',', '') 
      description = description.strip().replace('\n', '') 

     else: 
      description = ' ' 

     courseURL = "https://www.coursera.org/learn/" + slug 
     courseURL = str(courseURL) 

     instructorIds = data['elements'][i]['instructorIds'] 

     instructorIds = str(instructorIds) 
     if instructorIds: 
      instructorIds = instructorIds.rstrip().replace(',', '') 
      instructorIds = instructorIds.rstrip().replace('\n', '') 
      instructorIds = instructorIds.rstrip().replace('u', '') 
      instructorIds = instructorIds.rstrip().replace('[', '') 
      instructorIds = instructorIds.rstrip().replace(']', '') 

      instructorIds = re.sub(r'^"|"$', '', instructorIds) 
     else: 
      instructorIds = ' ' 
     #print(instructorIds) 

     instructors = newdict[instructorIds] 
     print(instructors) 






                   ##writing the 
     attributes in a csv file## 
     f.write(instructors + ',' + courseURL + ',' + courseType + ',' + 
     CourseName + ',' + partnerName + ',' + slug + 
      ',' + specializations + ',' + course_id + ',' + description + 
     "\n") 
+2

uns bitte zeigen, was u – DineshKumar

+0

versucht haben, wäre es sein hilfreich, wenn Sie auch einen Code hinzugefügt schnitten Sie verwendet, um die Daten zu extrahieren. Beachten Sie auch, dass "instructor.v1" ein Array von Wörterbüchern ist und in der '226710' ist ein Wert nicht der Schlüssel, der Schlüssel ist 'id'. –

+0

@AnisH_GauptA Ja genau..so das ist der Grund, warum ich ein anderes Wörterbuch namens 'newdict' erstellt habe, wo ich '226710' als Schlüssel verwenden kann, damit ich den Namen als Wert bekomme. –

Antwort

1
import requests 
import json 
import re 
from bs4 import BeautifulSoup 

# url = "https://api.coursera.org/api/courses.v1?start=0&limit=20&includes=instructorIds,partnerIds,specializations,s12nlds,v1Details,v2Details&fields=instructorIds,partnerIds,specializations,s12nlds,description" 
url = "https://api.coursera.org/api/courses.v1?start=0&limit=20&includes=instructorIds&fields=instructorIds" # for brief I have shorten api call 
data = requests.get(url).text 
json_data = json.loads(data) 
cmp1 = json_data['elements'] 
cmp2 = json_data['linked']['instructors.v1'] 
for element in cmp1: 
    new_list = [] 
    for ids in element['instructorIds']: 
     for inst in cmp2: 
      new_dict = {} 
      if ids in inst['id']: 
       new_dict[ids] = inst['fullName'] 
       new_list.append(new_dict) 
    element['instructorIds'] = new_list 

print(cmp1) 
json_data['elements'] = cmp1 
with open('data.json', 'w', encoding='utf-8') as fp: # file data.json will generated in the directory from which you execute this script. assign full path to store json file to your desire place. 
    json.dump(json_data, fp, sort_keys=False, indent=4, ensure_ascii=False) 

Einige der JSON Ausgabe Beispiel:

"elements": [ 
     { 
      "name": "Gamification", 
      "slug": "gamification", 
      "instructorIds": [ 
       { 
        "226710": "Kevin Werbach" 
       } 
      ], 
      "courseType": "v2.ondemand", 
      "id": "69Bku0KoEeWZtA4u62x6lQ" 
     }, 
     { 
      "name": "Dealing With Missing Data", 
      "slug": "missing-data", 
      "instructorIds": [ 
       { 
        "8394050": "Richard Valliant, Ph.D." 
       } 
      ], 
      "courseType": "v2.ondemand", 
      "id": "0HiU7Oe4EeWTAQ4yevf_oQ" 
     }, 
... 
... 
... 
... 
... 
     { 
      "name": "Accounting Analytics", 
      "slug": "accounting-analytics", 
      "instructorIds": [ 
       { 
        "1937011": "Brian J Bushee" 
       }, 
       { 
        "14757138": "Christopher D. Ittner" 
       } 
      ], 
      "courseType": "v2.ondemand", 
      "id": "rc5KG0aUEeWG1w6arGoEIQ" 
     }, 
     { 
      "name": "Municipal Solid Waste Management in Developing Countries", 
      "slug": "solid-waste-management", 
      "instructorIds": [ 
       { 
        "2387594": "Dr. Christian Zurbrügg" 
       }, 
       { 
        "7293234": "Imanol Zabaleta" 
       }, 
       { 
        "16974677": "Félix Schmidt" 
       } 
      ], 
      "courseType": "v2.ondemand", 
      "id": "gpAI9GK4EeWFkQ7sUCFGVQ" 
     }, 
... 
... 
... 
+0

Danke eine Tonne. Das hat geholfen! –

Verwandte Themen