main
p 3 years ago
parent a7bfe202c4
commit 86889e7f4a

@ -5,9 +5,3 @@
# useful for handling different item types with a single interface
from itemadapter import ItemAdapter
class PerilsPipeline:
def process_item(self, item, spider):
return item

@ -9,13 +9,6 @@ class ScrapePerils(scrapy.Spider):
def parse(self, response):
for adresses in response.xpath('//div[@class="card"]//li|//div[@class="card"]//li/p'):
item = PerilsItem()
# l = ItemLoader(item = PerilsItem(), selector=adresses)
# l.add_xpath('adrs', './text()')
# l.add_xpath('dernierA', './a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()')
# if adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get() is None:
# l.add_xpath('dernierA', './a[last()]/text()')
item['adrs'] = adresses.xpath('./text()').get(),
item['dernierA'] = adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get(),
if adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get() is None:
@ -24,7 +17,3 @@ class ScrapePerils(scrapy.Spider):
item['raw'] = adresses.xpath('.').get()
yield item
#response.xpath('//div[@class="card"]//li/text()[1]|//div[@class="card"]//li/p/text()[1]').getall()

@ -1,13 +0,0 @@
import scrapy
class ScrapePerils(scrapy.Spider):
name = "perils"
start_urls = ["https://www.marseille.fr/logement-urbanisme/am%C3%A9lioration-de-lhabitat/arretes-de-peril"]
def parse(self, response):
for adresses in response.xpath('//div[@class="card"]//li'):
yield {
'adresse': adresses.xpath('./text()').get(),
'dernier arrêté hors modificatif' : adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get(),
'dernier arrêté' : adresses.xpath('./a[last()]/text()').get(),
}
Loading…
Cancel
Save