import scrapy from perils.items import PerilsItem # from scrapy.loader import ItemLoader class ScrapePerils(scrapy.Spider): name = "perils" start_urls = ["https://www.marseille.fr/logement-urbanisme/am%C3%A9lioration-de-lhabitat/arretes-de-peril"] def parse(self, response): for adresses in response.xpath('//div[@class="card"]//li|//div[@class="card"]//li/p'): item = PerilsItem() # l = ItemLoader(item = PerilsItem(), selector=adresses) # l.add_xpath('adrs', './text()') # l.add_xpath('dernierA', './a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()') # if adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get() is None: # l.add_xpath('dernierA', './a[last()]/text()') item['adrs'] = adresses.xpath('./text()').get(), item['dernierA'] = adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get(), if adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get() is None: item['dernierA'] = adresses.xpath('./a[last()]/text()').get(), item['As'] = adresses.xpath('./a/text()').getall() item['raw'] = adresses.xpath('.').get() yield item #response.xpath('//div[@class="card"]//li/text()[1]|//div[@class="card"]//li/p/text()[1]').getall()