import scrapy from perils.items import PerilsItem # from scrapy.loader import ItemLoader class ScrapePerils(scrapy.Spider): name = "perils" start_urls = ["https://www.marseille.fr/logement-urbanisme/am%C3%A9lioration-de-lhabitat/arretes-de-peril"] def parse(self, response): for adresses in response.xpath('//div[@class="card"]//li|//div[@class="card"]//li/p'): item = PerilsItem() item['adrs'] = adresses.xpath('./text()').get(), item['dernierA'] = adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get(), if adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get() is None: item['dernierA'] = adresses.xpath('./a[last()]/text()').get(), item['As'] = adresses.xpath('./a/text()').getall() item['raw'] = adresses.xpath('.').get() yield item