2016-12-01 6 views
1

Ich scrape eine Site und ich möchte die JSON die data Variable im folgenden JS-Code mithilfe von Python Regex extrahieren.Extrahieren Sie Zeichenfolgen zwischen zwei Zeichenfolgen mit Regex

<script type="text/javascript"> 
P.when('A').register("ImageBlockATF", function(A){ 
    var data = { 
       'colorImages': { 'initial': [{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41SnVVzKChL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41SnVVzKChL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SY355_.jpg":[355,270],"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SY450_.jpg":[450,342],"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SY550_.jpg":[550,419],"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SY606_.jpg":[606,461],"https://images-na.ssl-images-amazon.com/images/I/81Oo79kGp2L._SY679_.jpg":[679,517]},"variant":"MAIN","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/416rXB0xcmL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/416rXB0xcmL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SY355_.jpg":[355,276],"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SY450_.jpg":[450,349],"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SX425_.jpg":[547,425],"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SX466_.jpg":[600,466],"https://images-na.ssl-images-amazon.com/images/I/81%2BGc-r4gLL._SX522_.jpg":[672,522]},"variant":"PT01","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/51gQxeLTYhL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/51gQxeLTYhL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SX355_.jpg":[251,355],"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SX450_.jpg":[318,450],"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SX425_.jpg":[300,425],"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SX466_.jpg":[329,466],"https://images-na.ssl-images-amazon.com/images/I/817slrgsGbL._SX522_.jpg":[369,522]},"variant":"PT02","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41d9m8J4MbL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41d9m8J4MbL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SX355_.jpg":[142,355],"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SX450_.jpg":[180,450],"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SX425_.jpg":[170,425],"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SX466_.jpg":[187,466],"https://images-na.ssl-images-amazon.com/images/I/71r3nXKZBmL._SX522_.jpg":[209,522]},"variant":"PT03","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41zh%2BCGamHL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41zh%2BCGamHL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SY355_.jpg":[355,260],"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SY450_.jpg":[450,330],"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SY550_.jpg":[550,403],"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SY606_.jpg":[606,444],"https://images-na.ssl-images-amazon.com/images/I/81Uys4ccU4L._SY679_.jpg":[679,498]},"variant":"PT04","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41sMHp-WegL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41sMHp-WegL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SY355_.jpg":[355,258],"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SY450_.jpg":[450,327],"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SY550_.jpg":[550,400],"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SY606_.jpg":[606,441],"https://images-na.ssl-images-amazon.com/images/I/8179KoLoyGL._SY679_.jpg":[679,494]},"variant":"PT05","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SL1364_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/416TFrjOFlL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/416TFrjOFlL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SX355_.jpg":[231,355],"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SX450_.jpg":[293,450],"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SX425_.jpg":[277,425],"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SX466_.jpg":[304,466],"https://images-na.ssl-images-amazon.com/images/I/71Sw2wrvy6L._SX522_.jpg":[340,522]},"variant":"PT06","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SL1341_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41%2BNMI0l9yL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41%2BNMI0l9yL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SX355_.jpg":[190,355],"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SX450_.jpg":[240,450],"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SX425_.jpg":[227,425],"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SX466_.jpg":[249,466],"https://images-na.ssl-images-amazon.com/images/I/61GKBhtPKPL._SX522_.jpg":[279,522]},"variant":"PT07","lowRes":null},{"hiRes":null,"thumb":"https://images-na.ssl-images-amazon.com/images/I/41ziorm06nL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41ziorm06nL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/41ziorm06nL._SY355_.jpg":[355,266],"https://images-na.ssl-images-amazon.com/images/I/41ziorm06nL._SY450_.jpg":[450,338],"https://images-na.ssl-images-amazon.com/images/I/41ziorm06nL.jpg":[500,375]},"variant":"AW01","lowRes":null},{"hiRes":null,"thumb":"https://images-na.ssl-images-amazon.com/images/I/41lZ6jtPe%2BL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41lZ6jtPe%2BL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/41lZ6jtPe%2BL._SY355_.jpg":[355,266],"https://images-na.ssl-images-amazon.com/images/I/41lZ6jtPe%2BL._SY450_.jpg":[450,338],"https://images-na.ssl-images-amazon.com/images/I/41lZ6jtPe%2BL.jpg":[500,375]},"variant":"AW02","lowRes":null},{"hiRes":null,"thumb":"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL._SY355_.jpg":[355,355],"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL._SY450_.jpg":[450,450],"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL._SX425_.jpg":[425,425],"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL._SX466_.jpg":[466,466],"https://images-na.ssl-images-amazon.com/images/I/51JqQcNGjUL.jpg":[500,500]},"variant":"AW03","lowRes":null}]}, 
       'colorToAsin': {'initial': {}}, 
       'holderRatio': 1.0, 
       'holderMaxHeight': 700, 
       'heroImage': {'initial': []}, 
       'weblabs' : {} 
       }; 
    A.trigger('P.AboveTheFold'); // trigger ATF event. 
    return data; 
}); 
</script> 

Ich habe versucht, Regex, aber nicht funktioniert.

(var\s+data\s+=).*^[A.trigger('P.AboveTheFold')]$ 

Grundsätzlich ich regex müssen Zeichenfolge zwischen var data = und A.trigger('P.AboveTheFold')

Antwort

1

greifen Wenn Sie sicher sind Ihre Daten json enthält keine ;, können Sie schreiben:

var data\s*=\s*([^;]*}); 

Es ist nicht sehr robust, und Sie sollten wahrscheinlich eine Parsing-Bibliothek verwenden. Die JSON-Daten befinden sich in der ersten Gruppe.

Siehe here.

Wenn Sie sicher sind, dass Ihre Daten zwischen var data = und A.trigger('P.AboveTheFold'), können Sie verwenden:

(?<=var data =).*(?=A.trigger\('P\.AboveTheFold'\)) 

anzeigen there.

Die JSON-Daten sind die perfekte Übereinstimmung, dank positiver Blickwinkel. Es ist auch nicht robust. Jeder andere Abstand zwischen Daten und = würde es zum Beispiel brechen. Sie benötigen das Flag , um Python mitzuteilen, dass . mit einer neuen Zeile übereinstimmen sollte.

Verwandte Themen