(Ich habe diesen Beitrag aktualisiert, um ein genaueres Bild des Problems zu reflektieren. Dazu gehören die Bereitstellung von mehr Informationen, die ich anfangs ausgelassen.)Extract Teilstring aus komplexen HTML in geschweiften Klammern und eckigen Klammern mit regulären Ausdrücken Regex in Python
Alles, was ich versucht habe, um die gewünschten Zeichenfolgen zu erhalten, führt zu einem AttributeError: 'NoneType' Objekt hat kein Attribut 'Gruppe'.
Hier ist mein Code:
image = re.search("photo: /\[[^\]]+\]/", text)
image = image.group(1)
Ich versuche immer noch regex
zu lernen, aber dieses für eine Schleife für viel zu lange werfen mich ist.
Ich möchte nur den Teil des JSON, der den Foto-Link enthält. Das ist alles, was die "uploadTime"
vorangeht mit Ausnahme der "id"
:
Hier ist das Stück von JSON in Frage:
photo: [{
"id": "http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418383-59832.jpg",
"uploadTime": {
"sec": 1498418386,
"usec": 192000
},
"extension": "jpg",
"md5": "6fac68fbcbdb31d17af7be277ab673be",
"height": 600,
"width": 800,
"description": "",
"originalFilePath": "",
"originalFileName": "photo_0D993ADA-8AFC-4A79-8F9B-18E6F6C30B94.jpg"
}, {
"id": "http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418389-472609.jpg",
"uploadTime": {
"sec": 1498418392,
"usec": 118000
},
"extension": "jpg",
"md5": "6470e562d650099a1cafe9281f951c21",
"height": 600,
"width": 800,
"description": "",
"originalFilePath": "",
"originalFileName": "photo_335B7BC0-F6DE-4E19-8489-3AA7B3920144.jpg"
}, {
"id": "http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418397-06491.jpg",
"uploadTime": {
"sec": 1498418400,
"usec": 161000
},
"extension": "jpg",
"md5": "5f2df3edfed164c062e739c0c3258970",
"height": 600,
"width": 800,
"description": "",
"originalFilePath": "",
"originalFileName": "photo_9C57A971-9748-4DBD-919D-8D532C8D7C1A.jpg"
}, {
"id": "http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418403-391642.jpg",
"uploadTime": {
"sec": 1498418406,
"usec": 936000
},
"extension": "jpg",
"md5": "098dfa4d40e33c6897f62edc471670dd",
"height": 600,
"width": 800,
"description": "",
"originalFilePath": "",
"originalFileName": "photo_A55BD209-3BFB-447E-AE59-40CF656664A8.jpg"
}, {
"id": "http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418409-263588.jpg",
"uploadTime": {
"sec": 1498418412,
"usec": 789000
},
"extension": "jpg",
"md5": "50b69c1db486f4bb6af723f7395a360b",
"height": 600,
"width": 800,
"description": "",
"originalFilePath": "",
"originalFileName": "photo_8BCDC2F0-8CBA-442C-98F5-0389455C8014.jpg"
}, {
"id": "http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418415-54882.jpg",
"uploadTime": {
"sec": 1498418418,
"usec": 462000
},
"extension": "jpg",
"md5": "34296cda28b212a6c5590f233a2dca09",
"height": 600,
"width": 800,
"description": "",
"originalFilePath": "",
"originalFileName": "photo_726D1636-E3A9-4515-9B95-55161FAAF730.jpg"
}, {
"id": "http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418421-389128.jpg",
"uploadTime": {
"sec": 1498418424,
"usec": 518000
},
"extension": "jpg",
"md5": "265087f19c17a99561a817f02a097b21",
"height": 600,
"width": 800,
"description": "",
"originalFilePath": "",
"originalFileName": "photo_09B01A71-46F2-4D8F-9153-CE0F0017495A.jpg"
}]
Diese JSON Stück Teil einer größeren Zeichenfolge ist:
<script type="text/javascript">
var listingData = {};
var userData = {};
window.detailPage = window.detailPage || {};
window.detailPage.listingData = {
id: 44782446,
status: "Active",
createTime: 1498418380,
displayTime: 1500694902,
expireTime: 1503286902,
title: "Yamaha RX-V461",
description: "Great Audio\/Video 5.1 surround receiver. Great condition ",
city: "South Jordan",
state: "UT",
zip: 84095,
contactName: "Robert",
contactHomePhone: "801-635-6040",
contactCellPhone: "801-635-6040",
contactEmail: "hasEmail",
lat: 40.5693,
lon: -111.9672,
latLon: "40.5693,-111.9672",
price: 50,
category: "Electronics",
subCategory: "Home Audio Receivers",
marketType: "Sale",
sellerType: "Private",
photo: [{"id":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418383-59832.jpg","uploadTime":{"sec":1498418386,"usec":192000},"extension":"jpg","md5":"6fac68fbcbdb31d17af7be277ab673be","height":600,"width":800,"description":"","originalFilePath":"","originalFileName":"photo_0D993ADA-8AFC-4A79-8F9B-18E6F6C30B94.jpg"},{"id":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418389-472609.jpg","uploadTime":{"sec":1498418392,"usec":118000},"extension":"jpg","md5":"6470e562d650099a1cafe9281f951c21","height":600,"width":800,"description":"","originalFilePath":"","originalFileName":"photo_335B7BC0-F6DE-4E19-8489-3AA7B3920144.jpg"},{"id":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418397-06491.jpg","uploadTime":{"sec":1498418400,"usec":161000},"extension":"jpg","md5":"5f2df3edfed164c062e739c0c3258970","height":600,"width":800,"description":"","originalFilePath":"","originalFileName":"photo_9C57A971-9748-4DBD-919D-8D532C8D7C1A.jpg"},{"id":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418403-391642.jpg","uploadTime":{"sec":1498418406,"usec":936000},"extension":"jpg","md5":"098dfa4d40e33c6897f62edc471670dd","height":600,"width":800,"description":"","originalFilePath":"","originalFileName":"photo_A55BD209-3BFB-447E-AE59-40CF656664A8.jpg"},{"id":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418409-263588.jpg","uploadTime":{"sec":1498418412,"usec":789000},"extension":"jpg","md5":"50b69c1db486f4bb6af723f7395a360b","height":600,"width":800,"description":"","originalFilePath":"","originalFileName":"photo_8BCDC2F0-8CBA-442C-98F5-0389455C8014.jpg"},{"id":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418415-54882.jpg","uploadTime":{"sec":1498418418,"usec":462000},"extension":"jpg","md5":"34296cda28b212a6c5590f233a2dca09","height":600,"width":800,"description":"","originalFilePath":"","originalFileName":"photo_726D1636-E3A9-4515-9B95-55161FAAF730.jpg"},{"id":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418421-389128.jpg","uploadTime":{"sec":1498418424,"usec":518000},"extension":"jpg","md5":"265087f19c17a99561a817f02a097b21","height":600,"width":800,"description":"","originalFilePath":"","originalFileName":"photo_09B01A71-46F2-4D8F-9153-CE0F0017495A.jpg"}],
standardFeaturedDates: [],
favorited: 1,
pageViews: 68 };
window.detailPage.sellerData = {
sellerId: 1159545,
sellerAccountAge: "Nov 2010",
moreListingsFromSeller: [{"id":44782211,"displayTime":1500694907,"price":100,"title":"Moto Gear 3 Helmets and Alpine Star Tech 6 Boots S","photo":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498417151-456217.jpg"},{"id":44782400,"displayTime":1500694904,"price":30,"title":"Belts Pouch, Canteen Holsters For 2 Canteens","photo":"http:\/\/img.ksl.com\/mx\/mplace-classifieds.ksl.com\/1159545-1498418072-282620.jpg"}] };
window.detailPage.userData = {
testUser: Boolean(0)
};
</script>
Wie kann ich extrahieren das Stück, das ich will?
Danke für das Betrachten meiner Frage!
Woah. Dies ist kein HTML. Das ist JSON. Und Sie sollten einen JSON-Parser verwenden. –
Sie haben auch nicht angegeben, in welcher Sprache Sie eine Lösung wünschen. –
Wahrscheinlich ein Fake-Konto für den Benutzer @AlexR, der [diese Frage] gepostet hat (https://stackoverflow.com/questions/45257932/how-to-extract- String-from-Complex-Javascript-Text-HTML-Container-Nested-in-betw) eine Stunde vor. –