You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
20 lines
974 B
20 lines
974 B
import scrapy
|
|
from perils.items import PerilsItem
|
|
# from scrapy.loader import ItemLoader
|
|
|
|
class ScrapePerils(scrapy.Spider):
|
|
name = "perils"
|
|
start_urls = ["https://www.marseille.fr/logement-urbanisme/am%C3%A9lioration-de-lhabitat/arretes-de-peril"]
|
|
|
|
def parse(self, response):
|
|
for adresses in response.xpath('//div[@class="card"]//li|//div[@class="card"]//li/p'):
|
|
item = PerilsItem()
|
|
item['adrs'] = adresses.xpath('./text()').get(),
|
|
item['dernierA'] = adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get(),
|
|
if adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get() is None:
|
|
item['dernierA'] = adresses.xpath('./a[last()]/text()').get(),
|
|
item['As'] = adresses.xpath('./a/text()').getall()
|
|
item['raw'] = adresses.xpath('.').get()
|
|
|
|
yield item
|