You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
31 lines
1.5 KiB
31 lines
1.5 KiB
import scrapy
|
|
from perils.items import PerilsItem
|
|
# from scrapy.loader import ItemLoader
|
|
|
|
class ScrapePerils(scrapy.Spider):
|
|
name = "perils"
|
|
start_urls = ["https://www.marseille.fr/logement-urbanisme/am%C3%A9lioration-de-lhabitat/arretes-de-peril"]
|
|
|
|
def parse(self, response):
|
|
for adresses in response.xpath('//div[@class="card"]//li|//div[@class="card"]//li/p'):
|
|
item = PerilsItem()
|
|
# l = ItemLoader(item = PerilsItem(), selector=adresses)
|
|
|
|
# l.add_xpath('adrs', './text()')
|
|
# l.add_xpath('dernierA', './a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()')
|
|
# if adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get() is None:
|
|
# l.add_xpath('dernierA', './a[last()]/text()')
|
|
|
|
item['adrs'] = adresses.xpath('./text()').get(),
|
|
item['dernierA'] = adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get(),
|
|
if adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get() is None:
|
|
item['dernierA'] = adresses.xpath('./a[last()]/text()').get(),
|
|
item['As'] = adresses.xpath('./a/text()').getall()
|
|
item['raw'] = adresses.xpath('.').get()
|
|
|
|
yield item
|
|
|
|
|
|
|
|
#response.xpath('//div[@class="card"]//li/text()[1]|//div[@class="card"]//li/p/text()[1]').getall()
|