diff --git a/crawler/perils/pipelines.py b/crawler/perils/pipelines.py index bf3bba2..37ff3f3 100644 --- a/crawler/perils/pipelines.py +++ b/crawler/perils/pipelines.py @@ -5,9 +5,3 @@ # useful for handling different item types with a single interface -from itemadapter import ItemAdapter - - -class PerilsPipeline: - def process_item(self, item, spider): - return item diff --git a/crawler/perils/spiders/scraperils.py b/crawler/perils/spiders/scraperils.py index 4245fc3..b755348 100644 --- a/crawler/perils/spiders/scraperils.py +++ b/crawler/perils/spiders/scraperils.py @@ -9,13 +9,6 @@ class ScrapePerils(scrapy.Spider): def parse(self, response): for adresses in response.xpath('//div[@class="card"]//li|//div[@class="card"]//li/p'): item = PerilsItem() - # l = ItemLoader(item = PerilsItem(), selector=adresses) - - # l.add_xpath('adrs', './text()') - # l.add_xpath('dernierA', './a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()') - # if adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get() is None: - # l.add_xpath('dernierA', './a[last()]/text()') - item['adrs'] = adresses.xpath('./text()').get(), item['dernierA'] = adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get(), if adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get() is None: @@ -24,7 +17,3 @@ class ScrapePerils(scrapy.Spider): item['raw'] = adresses.xpath('.').get() yield item - - - -#response.xpath('//div[@class="card"]//li/text()[1]|//div[@class="card"]//li/p/text()[1]').getall() diff --git a/crawler/perils/spiders/scraperils.py.bak b/crawler/perils/spiders/scraperils.py.bak deleted file mode 100644 index fd1f678..0000000 --- a/crawler/perils/spiders/scraperils.py.bak +++ /dev/null @@ -1,13 +0,0 @@ -import scrapy - -class ScrapePerils(scrapy.Spider): - name = "perils" - start_urls = ["https://www.marseille.fr/logement-urbanisme/am%C3%A9lioration-de-lhabitat/arretes-de-peril"] - - def parse(self, response): - for adresses in response.xpath('//div[@class="card"]//li'): - yield { - 'adresse': adresses.xpath('./text()').get(), - 'dernier arrêté hors modificatif' : adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get(), - 'dernier arrêté' : adresses.xpath('./a[last()]/text()').get(), - }