2016-06-03 13 views
4

Ich versuche, aus mehreren WebDriver Instanzen mit dem Code zum Laichen: http://www.ibm.com/developerworks/aix/library/au-threadingpython/Python WebDriver Multithreading

import time 
import Queue 
import urllib2 
import threading 
from selenium import webdriver 
from BeautifulSoup import BeautifulSoup 
hosts = ["http://yahoo.com", "http://google.com", "http://amazon.com", 
    "http://ibm.com", "http://apple.com"] 
queue = Queue.Queue 
out_queue = Queue.Queue 

class Login_Driver(threading.Thread): 
    def __init__(self, queue, out_queue, driver): 
     threading.Thread.__init__(self) 
     self.queue = queue 
     self.out_queue = out_queue 
     self.driver = driver 
     print driver.title 
    def run(self): 
     while True: 
      #grabs host from queue 
      host = self.queue.get() 
      #grabs urls of hosts and then grabs chunk of webpage 
      driver.get(host) 
      chunk = driver.page_source() 
      #place chunk into out queue 
      self.out_queue.put(chunk) 
      #signals to queue job is done 
      self.queue.task_done() 
class Poster(threading.Thread): 
    def __init__(self, driver, out_queue): 
     self.out_queue = out_queue 
     self.driver = driver 
     print driver.name 
    def run(self): 
     while True: 
      #grabs host from queue 
      chunk = self.out_queue.get() 
      #parse the chunk 
      soup = BeautifulSoup(chunk) 
      print soup.findAll(['title']) 
      #signals to queue job is done 
      self.out_queue.task_done() 
start = time.time() 
def main(): 
    #spawn a pool of threads, and pass them queue instance 
    for i in range(5): 
     driver = webdriver.Firefox() 
     t = Login_Driver(queue, out_queue, driver) 
     t.setDaemon(True) 
     t.start() 
     time.sleep(20) 
    #populate queue with data 
    for host in hosts: 
     queue.put(host) 
    for i in range(5): 
     dt = Poster(out_queue) 
     dt.setDaemon(True) 
     dt.start() 
    #wait on the queue until everything has been processed 
    queue.join() 
    out_queue.join() 
main() 
print "Elapsed Time: %s" % (time.time() - start) 

Es Fehler: Typeerror: ungebundene Methode get() muss mit Queque Instanz als erstes Argument (hat nichts genannt werden stattdessen)

Ich bin ein Neuling auf Threads, Klassen, Prozesse, können Sie mir bitte sagen, was ist mehr OK zu verwenden, Threads oder Prozesse und wenn mir ein Beispiel geben könnte wäre großartig. Danke Jungs.

UPDATE

Arbeitscode:

import time 
import Queue 
import urllib2 
import threading 
from selenium import webdriver 
from BeautifulSoup import BeautifulSoup 

hosts = ["http://yahoo.com", "http://google.com", "http://amazon.com", 
     "http://ibm.com", "http://apple.com"] 
queue = Queue.Queue() 
out_queue = Queue.Queue() 

class Login_Driver(threading.Thread): 
#def __init__(self, driver): 
    def __init__(self, queue, out_queue, driver): 
     threading.Thread.__init__(self) 
     self.queue = queue 
     self.out_queue = out_queue 
     self.driver = driver 
     print "In init first class.." 
    def run(self): 
     while True: 
      #grabs host from queue 
      host = self.queue.get() 
      #grabs urls of hosts and then grabs chunk of webpage 
      self.driver.get(host) 
      chunk = self.driver.page_source 
      #place chunk into out queue 
      self.out_queue.put(chunk) 
      #signals to queue job is done 
      print self.driver.title 
      self.queue.task_done() 
class Poster(threading.Thread): 
    def __init__(self, out_queue, driver): 
     threading.Thread.__init__(self) 
     self.out_queue = out_queue 
     self.driver = driver 
     print "In init a second class.." 
    def run(self): 
     while True: 
      #grabs host from queue 
      chunk = self.out_queue.get() 
      #parse the chunk 
      soup = BeautifulSoup(chunk) 
      print soup.findAll(['title']) 
      #signals to queue job is done 
      print self.driver.name 
      self.out_queue.task_done() 
start = time.time() 
def main(): 
    #spawn a pool of threads, and pass them queue instance 
    for i in range(5): 
     driver = webdriver.Firefox() 
     t = Login_Driver(queue, out_queue, driver) 
     t.setDaemon(True) 
     t.start() 
     print "Started webdriver: --- "+str(i)+" --- from main" 
    print "All started" 
    time.sleep(3) 
    #populate queue with data 
    for host in hosts: 
     queue.put(host) 
     print "Opening website: "+host 
    print "All sites passed for opening.." 
    time.sleep(3) 
    for i in range(5): 
     dt = Poster(out_queue, driver) 
     dt.setDaemon(True) 
     dt.start() 
     print "Starting second class/title and name beautifull soup and webdriver: --- "+str(i)+" --- from main" 
    print "Started secound class.." 
    time.sleep(3) 
    #wait on the queue until everything has been processed 
    queue.join() 
    out_queue.join() 
    print "out_queue.join()" 
main() 
print "Elapsed Time: %s" % (time.time() - start) 

Antwort

2

Sie müssen Queue.Queue() verwenden statt Queue.Queue

2

Sie sind nicht die Queue richtig instanziieren. Anstelle von

queue = Queue.Queue 
out_queue = Queue.Queue 

sollte es

sein
queue = Queue.Queue() 
out_queue = Queue.Queue()