2017-03-24 4 views
1

Unten ist der Code, der Marken- und Produktnamen für URLs scrapt, die URLs werden in einer XLSX-Datei gespeichert und die Ausgabe ist eine XLS-Datei.Hinzufügen von Spaltenüberschriften zu xlsx mit Python?

import requests 
from bs4 import BeautifulSoup 
import xlrd 
import xlwt 

file_location = "C:/Users/Nitin Kansal/Desktop/Facets Project/Jabong ALL/Jabong/input.xlsx" 

workbook = xlrd.open_workbook(file_location) 

sheet = workbook.sheet_by_index(0) 

products = [] 
for r in range(sheet.nrows): 
    products.append(sheet.cell_value(r,0)) 

book = xlwt.Workbook(encoding= "utf-8", style_compression = 0) 
sheet = book.add_sheet("Sheet11", cell_overwrite_ok=True) 

for index, url in enumerate(products): 
    source = requests.get(url) 
    data = source.content 
    soup = BeautifulSoup(data, "lxml") 

    sheet.write(index, 0, url) 

    try: 
     Brand = soup.select(".brand")[0].text 
     sheet.write(index, 1, Brand) 

    except Exception: 
     sheet.write(index, 1, "") 

    try: 
     Product_Name = soup.select(".product-title")[0].text 
     sheet.write(index, 2, Product_Name) 

    except Exception: 
     sheet.write(index, 2, "") 

book.save("Jabong Output.xls") 

Die Ausgabe ist wie folgt:

http://www.jabong.com/belle-fille-Grey-Solid-Winter-Jacket-1310773.html   Belle Fille    Grey Solid Winter Jacket 
http://www.jabong.com/Femella-Red-Solid-Winter-Jacket-2880302.html     Femella    Red Solid Winter Jacket 
http://www.jabong.com/Style-Quotient-Fuchsia-Striped-Sweatshirt-2765328.html  Style Quotient Fuchsia Striped Sweatshirt 

I-Header an den Ausgang hinzufügen müssen, so dass es wie unten aussieht: Sie

URL                    Brand     Product_Name 
http://www.jabong.com/belle-fille-Grey-Solid-Winter-Jacket-1310773.html   Belle Fille    Grey Solid Winter Jacket 
http://www.jabong.com/Femella-Red-Solid-Winter-Jacket-2880302.html    Femella     Red Solid Winter Jacket 
http://www.jabong.com/Style-Quotient-Fuchsia-Striped-Sweatshirt-2765328.html  Style Quotient Fuchsia Striped Sweatshirt 

Antwort

1

die Spalte schreiben Namen zuerst, bevor Sie die Einträge schreiben.

import requests 
from bs4 import BeautifulSoup 
import xlrd 
import xlwt 

file_location = "C:/Users/Nitin Kansal/Desktop/Facets Project/Jabong ALL/Jabong/input.xlsx" 

workbook = xlrd.open_workbook(file_location) 

sheet = workbook.sheet_by_index(0) 

products = [] 
for r in range(sheet.nrows): 
    products.append(sheet.cell_value(r,0)) 

book = xlwt.Workbook(encoding= "utf-8", style_compression = 0) 
sheet = book.add_sheet("Sheet11", cell_overwrite_ok=True) 

#write column names 
sheet.write(0, 0, "URL") 
sheet.write(0, 1, "Brand") 
sheet.write(0, 2, "Product_Name") 

for index, url in enumerate(products): 
    source = requests.get(url) 
    data = source.content 
    soup = BeautifulSoup(data, "lxml") 

    sheet.write(index+1, 0, url) 

    try: 
     Brand = soup.select(".brand")[0].text 
     sheet.write(index+1, 1, Brand) 

    except Exception: 
     sheet.write(index+1, 1, "") 

    try: 
     Product_Name = soup.select(".product-title")[0].text 
     sheet.write(index+1, 2, Product_Name) 

    except Exception: 
     sheet.write(index+1, 2, "") 

book.save("Jabong Output.xls") 

Alternativ können Sie verwenden Pandas:

import pandas as pd 
l = [] 
for url in products: 
    source = requests.get(url) 
    data = source.content 
    soup = BeautifulSoup(data, "lxml") 
    brand = soup.select(".brand")[0].text 
    product_name = soup.select(".product-title")[0].text 
    l.append((url,brand,product_name)) 

df = pd.DataFrame(l,columns=["URL","Brand",'Product_Name']) 
df.to_excel("output.xlsx",index=False) 
Verwandte Themen