Commit initial

3 years ago · 45a9729855
parent 9e26ef4202
commit 45a9729855
40 changed files with 2752 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,5 @@
 #Directory (KDE)
 .directory
 # ---> Python
 # Byte-compiled / optimized / DLL files
 __pycache__/
--- a/README.md
+++ b/README.md
@ -1,3 +1,13 @@
-# marseille-perils-webcrawler
+# Environnement virtuel pour python
 python3 -m venv scrapy-env
 (unix-like)  source scrapy-env/bin/activate
 (windows)    tutorial-env\Scripts\activate.bat
-Un webcrawler basé sur Scrapy pour extraire les arrêtés de péril du site de la ville de Marseille
+# Installation des dépendances
 python -m pip install --upgrade pip
 python -m pip install regex
 python -m pip install scrapy
 # Lancement de scrapy
 cd src
 scrapy crawl perils -O perils.csv
--- a/perils-table.ods
+++ b/perils-table.ods
--- a/scrapy-env/bin/Activate.ps1
+++ b/scrapy-env/bin/Activate.ps1
@ -0,0 +1,247 @@
 <#
 .Synopsis
 Activate a Python virtual environment for the current PowerShell session.
 .Description
 Pushes the python executable for a virtual environment to the front of the
 $Env:PATH environment variable and sets the prompt to signify that you are
 in a Python virtual environment. Makes use of the command line switches as
 well as the `pyvenv.cfg` file values present in the virtual environment.
 .Parameter VenvDir
 Path to the directory that contains the virtual environment to activate. The
 default value for this is the parent of the directory that the Activate.ps1
 script is located within.
 .Parameter Prompt
 The prompt prefix to display when this virtual environment is activated. By
 default, this prompt is the name of the virtual environment folder (VenvDir)
 surrounded by parentheses and followed by a single space (ie. '(.venv) ').
 .Example
 Activate.ps1
 Activates the Python virtual environment that contains the Activate.ps1 script.
 .Example
 Activate.ps1 -Verbose
 Activates the Python virtual environment that contains the Activate.ps1 script,
 and shows extra information about the activation as it executes.
 .Example
 Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
 Activates the Python virtual environment located in the specified location.
 .Example
 Activate.ps1 -Prompt "MyPython"
 Activates the Python virtual environment that contains the Activate.ps1 script,
 and prefixes the current prompt with the specified string (surrounded in
 parentheses) while the virtual environment is active.
 .Notes
 On Windows, it may be required to enable this Activate.ps1 script by setting the
 execution policy for the user. You can do this by issuing the following PowerShell
 command:
 PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
 For more information on Execution Policies: 
 https://go.microsoft.com/fwlink/?LinkID=135170
 #>
 Param(
    [Parameter(Mandatory = $false)]
    [String]
    $VenvDir,
    [Parameter(Mandatory = $false)]
    [String]
    $Prompt
 )
 <# Function declarations --------------------------------------------------- #>
 <#
 .Synopsis
 Remove all shell session elements added by the Activate script, including the
 addition of the virtual environment's Python executable from the beginning of
 the PATH variable.
 .Parameter NonDestructive
 If present, do not remove this function from the global namespace for the
 session.
 #>
 function global:deactivate ([switch]$NonDestructive) {
    # Revert to original values
    # The prior prompt:
    if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
        Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
        Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
    }
    # The prior PYTHONHOME:
    if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
        Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
        Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
    }
    # The prior PATH:
    if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
        Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
        Remove-Item -Path Env:_OLD_VIRTUAL_PATH
    }
    # Just remove the VIRTUAL_ENV altogether:
    if (Test-Path -Path Env:VIRTUAL_ENV) {
        Remove-Item -Path env:VIRTUAL_ENV
    }
    # Just remove VIRTUAL_ENV_PROMPT altogether.
    if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
        Remove-Item -Path env:VIRTUAL_ENV_PROMPT
    }
    # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
    if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
        Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
    }
    # Leave deactivate function in the global namespace if requested:
    if (-not $NonDestructive) {
        Remove-Item -Path function:deactivate
    }
 }
 <#
 .Description
 Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
 given folder, and returns them in a map.
 For each line in the pyvenv.cfg file, if that line can be parsed into exactly
 two strings separated by `=` (with any amount of whitespace surrounding the =)
 then it is considered a `key = value` line. The left hand string is the key,
 the right hand is the value.
 If the value starts with a `'` or a `"` then the first and last character is
 stripped from the value before being captured.
 .Parameter ConfigDir
 Path to the directory that contains the `pyvenv.cfg` file.
 #>
 function Get-PyVenvConfig(
    [String]
    $ConfigDir
 ) {
    Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
    # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
    $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
    # An empty map will be returned if no config file is found.
    $pyvenvConfig = @{ }
    if ($pyvenvConfigPath) {
        Write-Verbose "File exists, parse `key = value` lines"
        $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
        $pyvenvConfigContent | ForEach-Object {
            $keyval = $PSItem -split "\s*=\s*", 2
            if ($keyval[0] -and $keyval[1]) {
                $val = $keyval[1]
                # Remove extraneous quotations around a string value.
                if ("'""".Contains($val.Substring(0, 1))) {
                    $val = $val.Substring(1, $val.Length - 2)
                }
                $pyvenvConfig[$keyval[0]] = $val
                Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
            }
        }
    }
    return $pyvenvConfig
 }
 <# Begin Activate script --------------------------------------------------- #>
 # Determine the containing directory of this script
 $VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
 $VenvExecDir = Get-Item -Path $VenvExecPath
 Write-Verbose "Activation script is located in path: '$VenvExecPath'"
 Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
 Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
 # Set values required in priority: CmdLine, ConfigFile, Default
 # First, get the location of the virtual environment, it might not be
 # VenvExecDir if specified on the command line.
 if ($VenvDir) {
    Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
 }
 else {
    Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
    $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
    Write-Verbose "VenvDir=$VenvDir"
 }
 # Next, read the `pyvenv.cfg` file to determine any required value such
 # as `prompt`.
 $pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
 # Next, set the prompt from the command line, or the config file, or
 # just use the name of the virtual environment folder.
 if ($Prompt) {
    Write-Verbose "Prompt specified as argument, using '$Prompt'"
 }
 else {
    Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
    if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
        Write-Verbose "  Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
        $Prompt = $pyvenvCfg['prompt'];
    }
    else {
        Write-Verbose "  Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
        Write-Verbose "  Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
        $Prompt = Split-Path -Path $venvDir -Leaf
    }
 }
 Write-Verbose "Prompt = '$Prompt'"
 Write-Verbose "VenvDir='$VenvDir'"
 # Deactivate any currently active virtual environment, but leave the
 # deactivate function in place.
 deactivate -nondestructive
 # Now set the environment variable VIRTUAL_ENV, used by many tools to determine
 # that there is an activated venv.
 $env:VIRTUAL_ENV = $VenvDir
 if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
    Write-Verbose "Setting prompt to '$Prompt'"
    # Set the prompt to include the env name
    # Make sure _OLD_VIRTUAL_PROMPT is global
    function global:_OLD_VIRTUAL_PROMPT { "" }
    Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
    New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
    function global:prompt {
        Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
        _OLD_VIRTUAL_PROMPT
    }
    $env:VIRTUAL_ENV_PROMPT = $Prompt
 }
 # Clear PYTHONHOME
 if (Test-Path -Path Env:PYTHONHOME) {
    Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
    Remove-Item -Path Env:PYTHONHOME
 }
 # Add the venv to the PATH
 Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
 $Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
--- a/scrapy-env/bin/activate
+++ b/scrapy-env/bin/activate
@ -0,0 +1,69 @@
 # This file must be used with "source bin/activate" *from bash*
 # you cannot run it directly
 deactivate () {
    # reset old environment variables
    if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
        PATH="${_OLD_VIRTUAL_PATH:-}"
        export PATH
        unset _OLD_VIRTUAL_PATH
    fi
    if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
        PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
        export PYTHONHOME
        unset _OLD_VIRTUAL_PYTHONHOME
    fi
    # This should detect bash and zsh, which have a hash command that must
    # be called to get it to forget past commands.  Without forgetting
    # past commands the $PATH changes we made may not be respected
    if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
        hash -r 2> /dev/null
    fi
    if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
        PS1="${_OLD_VIRTUAL_PS1:-}"
        export PS1
        unset _OLD_VIRTUAL_PS1
    fi
    unset VIRTUAL_ENV
    unset VIRTUAL_ENV_PROMPT
    if [ ! "${1:-}" = "nondestructive" ] ; then
    # Self destruct!
        unset -f deactivate
    fi
 }
 # unset irrelevant variables
 deactivate nondestructive
 VIRTUAL_ENV="/home/p/Documents/Mapping/scraping/perils/scrapy-env"
 export VIRTUAL_ENV
 _OLD_VIRTUAL_PATH="$PATH"
 PATH="$VIRTUAL_ENV/bin:$PATH"
 export PATH
 # unset PYTHONHOME if set
 # this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
 # could use `if (set -u; : $PYTHONHOME) ;` in bash
 if [ -n "${PYTHONHOME:-}" ] ; then
    _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
    unset PYTHONHOME
 fi
 if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
    _OLD_VIRTUAL_PS1="${PS1:-}"
    PS1="(scrapy-env) ${PS1:-}"
    export PS1
    VIRTUAL_ENV_PROMPT="(scrapy-env) "
    export VIRTUAL_ENV_PROMPT
 fi
 # This should detect bash and zsh, which have a hash command that must
 # be called to get it to forget past commands.  Without forgetting
 # past commands the $PATH changes we made may not be respected
 if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
    hash -r 2> /dev/null
 fi
--- a/scrapy-env/bin/activate.csh
+++ b/scrapy-env/bin/activate.csh
@ -0,0 +1,26 @@
 # This file must be used with "source bin/activate.csh" *from csh*.
 # You cannot run it directly.
 # Created by Davide Di Blasi <davidedb@gmail.com>.
 # Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
 alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
 # Unset irrelevant variables.
 deactivate nondestructive
 setenv VIRTUAL_ENV "/home/p/Documents/Mapping/scraping/perils/scrapy-env"
 set _OLD_VIRTUAL_PATH="$PATH"
 setenv PATH "$VIRTUAL_ENV/bin:$PATH"
 set _OLD_VIRTUAL_PROMPT="$prompt"
 if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
    set prompt = "(scrapy-env) $prompt"
    setenv VIRTUAL_ENV_PROMPT "(scrapy-env) "
 endif
 alias pydoc python -m pydoc
 rehash
--- a/scrapy-env/bin/activate.fish
+++ b/scrapy-env/bin/activate.fish
@ -0,0 +1,66 @@
 # This file must be used with "source <venv>/bin/activate.fish" *from fish*
 # (https://fishshell.com/); you cannot run it directly.
 function deactivate  -d "Exit virtual environment and return to normal shell environment"
    # reset old environment variables
    if test -n "$_OLD_VIRTUAL_PATH"
        set -gx PATH $_OLD_VIRTUAL_PATH
        set -e _OLD_VIRTUAL_PATH
    end
    if test -n "$_OLD_VIRTUAL_PYTHONHOME"
        set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
        set -e _OLD_VIRTUAL_PYTHONHOME
    end
    if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
        functions -e fish_prompt
        set -e _OLD_FISH_PROMPT_OVERRIDE
        functions -c _old_fish_prompt fish_prompt
        functions -e _old_fish_prompt
    end
    set -e VIRTUAL_ENV
    set -e VIRTUAL_ENV_PROMPT
    if test "$argv[1]" != "nondestructive"
        # Self-destruct!
        functions -e deactivate
    end
 end
 # Unset irrelevant variables.
 deactivate nondestructive
 set -gx VIRTUAL_ENV "/home/p/Documents/Mapping/scraping/perils/scrapy-env"
 set -gx _OLD_VIRTUAL_PATH $PATH
 set -gx PATH "$VIRTUAL_ENV/bin" $PATH
 # Unset PYTHONHOME if set.
 if set -q PYTHONHOME
    set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
    set -e PYTHONHOME
 end
 if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
    # fish uses a function instead of an env var to generate the prompt.
    # Save the current fish_prompt function as the function _old_fish_prompt.
    functions -c fish_prompt _old_fish_prompt
    # With the original prompt function renamed, we can override with our own.
    function fish_prompt
        # Save the return status of the last command.
        set -l old_status $status
        # Output the venv prompt; color taken from the blue of the Python logo.
        printf "%s%s%s" (set_color 4B8BBE) "(scrapy-env) " (set_color normal)
        # Restore the return status of the previous command.
        echo "exit $old_status" | .
        # Output the original/"old" prompt.
        _old_fish_prompt
    end
    set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
    set -gx VIRTUAL_ENV_PROMPT "(scrapy-env) "
 end
--- a/scrapy-env/bin/automat-visualize
+++ b/scrapy-env/bin/automat-visualize
@ -0,0 +1,8 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 # -*- coding: utf-8 -*-
 import re
 import sys
 from automat._visualize import tool
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(tool())
--- a/scrapy-env/bin/cftp
+++ b/scrapy-env/bin/cftp
@ -0,0 +1,8 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 # -*- coding: utf-8 -*-
 import re
 import sys
 from twisted.conch.scripts.cftp import run
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(run())
--- a/scrapy-env/bin/ckeygen
+++ b/scrapy-env/bin/ckeygen
@ -0,0 +1,8 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 # -*- coding: utf-8 -*-
 import re
 import sys
 from twisted.conch.scripts.ckeygen import run
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(run())
--- a/scrapy-env/bin/conch
+++ b/scrapy-env/bin/conch
@ -0,0 +1,8 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 # -*- coding: utf-8 -*-
 import re
 import sys
 from twisted.conch.scripts.conch import run
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(run())
--- a/scrapy-env/bin/jp.py
+++ b/scrapy-env/bin/jp.py
@ -0,0 +1,54 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 import sys
 import json
 import argparse
 from pprint import pformat
 import jmespath
 from jmespath import exceptions
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('expression')
    parser.add_argument('-f', '--filename',
                        help=('The filename containing the input data.  '
                              'If a filename is not given then data is '
                              'read from stdin.'))
    parser.add_argument('--ast', action='store_true',
                        help=('Pretty print the AST, do not search the data.'))
    args = parser.parse_args()
    expression = args.expression
    if args.ast:
        # Only print the AST
        expression = jmespath.compile(args.expression)
        sys.stdout.write(pformat(expression.parsed))
        sys.stdout.write('\n')
        return 0
    if args.filename:
        with open(args.filename, 'r') as f:
            data = json.load(f)
    else:
        data = sys.stdin.read()
        data = json.loads(data)
    try:
        sys.stdout.write(json.dumps(
            jmespath.search(expression, data), indent=4, ensure_ascii=False))
        sys.stdout.write('\n')
    except exceptions.ArityError as e:
        sys.stderr.write("invalid-arity: %s\n" % e)
        return 1
    except exceptions.JMESPathTypeError as e:
        sys.stderr.write("invalid-type: %s\n" % e)
        return 1
    except exceptions.UnknownFunctionError as e:
        sys.stderr.write("unknown-function: %s\n" % e)
        return 1
    except exceptions.ParseError as e:
        sys.stderr.write("syntax-error: %s\n" % e)
        return 1
 if __name__ == '__main__':
    sys.exit(main())
--- a/scrapy-env/bin/mailmail
+++ b/scrapy-env/bin/mailmail
@ -0,0 +1,8 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 # -*- coding: utf-8 -*-
 import re
 import sys
 from twisted.mail.scripts.mailmail import run
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(run())
--- a/scrapy-env/bin/normalizer
+++ b/scrapy-env/bin/normalizer
@ -0,0 +1,8 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 # -*- coding: utf-8 -*-
 import re
 import sys
 from charset_normalizer.cli.normalizer import cli_detect
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(cli_detect())
--- a/scrapy-env/bin/pip
+++ b/scrapy-env/bin/pip
@ -0,0 +1,8 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 # -*- coding: utf-8 -*-
 import re
 import sys
 from pip._internal.cli.main import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/scrapy-env/bin/pip3
+++ b/scrapy-env/bin/pip3
@ -0,0 +1,8 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 # -*- coding: utf-8 -*-
 import re
 import sys
 from pip._internal.cli.main import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/scrapy-env/bin/pip3.10
+++ b/scrapy-env/bin/pip3.10
@ -0,0 +1,8 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 # -*- coding: utf-8 -*-
 import re
 import sys
 from pip._internal.cli.main import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/scrapy-env/bin/pyhtmlizer
+++ b/scrapy-env/bin/pyhtmlizer
@ -0,0 +1,8 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 # -*- coding: utf-8 -*-
 import re
 import sys
 from twisted.scripts.htmlizer import run
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(run())
--- a/scrapy-env/bin/python
+++ b/scrapy-env/bin/python
@ -0,0 +1 @@
 python3
--- a/scrapy-env/bin/python3
+++ b/scrapy-env/bin/python3
@ -0,0 +1 @@
 /usr/bin/python3
--- a/scrapy-env/bin/python3.10
+++ b/scrapy-env/bin/python3.10
@ -0,0 +1 @@
 python3
--- a/scrapy-env/bin/scrapy
+++ b/scrapy-env/bin/scrapy
@ -0,0 +1,8 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 # -*- coding: utf-8 -*-
 import re
 import sys
 from scrapy.cmdline import execute
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(execute())
--- a/scrapy-env/bin/tkconch
+++ b/scrapy-env/bin/tkconch
@ -0,0 +1,8 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 # -*- coding: utf-8 -*-
 import re
 import sys
 from twisted.conch.scripts.tkconch import run
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(run())
--- a/scrapy-env/bin/tldextract
+++ b/scrapy-env/bin/tldextract
@ -0,0 +1,8 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 # -*- coding: utf-8 -*-
 import re
 import sys
 from tldextract.cli import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/scrapy-env/bin/trial
+++ b/scrapy-env/bin/trial
@ -0,0 +1,8 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 # -*- coding: utf-8 -*-
 import re
 import sys
 from twisted.scripts.trial import run
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(run())
--- a/scrapy-env/bin/twist
+++ b/scrapy-env/bin/twist
@ -0,0 +1,8 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 # -*- coding: utf-8 -*-
 import re
 import sys
 from twisted.application.twist._twist import Twist
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(Twist.main())
--- a/scrapy-env/bin/twistd
+++ b/scrapy-env/bin/twistd
@ -0,0 +1,8 @@
 #!/home/p/Documents/Mapping/scraping/perils/scrapy-env/bin/python
 # -*- coding: utf-8 -*-
 import re
 import sys
 from twisted.scripts.twistd import run
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(run())
--- a/scrapy-env/lib64
+++ b/scrapy-env/lib64
@ -0,0 +1 @@
 lib
--- a/scrapy-env/pyvenv.cfg
+++ b/scrapy-env/pyvenv.cfg
@ -0,0 +1,3 @@
 home = /usr/bin
 include-system-site-packages = false
 version = 3.10.8
--- a/src/perils.csv
+++ b/src/perils.csv
--- a/src/perils/init.py
+++ b/src/perils/init.py
--- a/src/perils/items.py
+++ b/src/perils/items.py
@ -0,0 +1,33 @@
 import scrapy
 # import regex
 # from scrapy.loader import ItemLoader
 # from itemloaders.processors import TakeFirst, MapCompose
 # from w3lib.html import remove_tags
 # # Une fonction qui sépare les adresses en fonction des noms de rue
 # def splitStreet(value):
 #     # Le regex qui évalue là où il faut séparer la chaîne de caractère 
 #     # (présence de "et / - + ainsi que" 
 #     # sans que ce séparateur ne soit placé près d'un groupe de numéros de la même rue)
 #     expr = r"((?<!(\d\sa)|(\d\sb)|(\d\sbis)|(\d\ster)|(\d)|(\da)|(\db)|(\dbis)|(\dter)|(\d\s)|(\da\s)|(\db\s)|(\dbis\s)|(\dter\s)|(\d\sa\s)|(\d\sb\s)|(\d\sbis\s)|(\d\ster\s))\/|((?<!(\d))\s\bet)|(ainsi\sque)|((?<!(\d))\s-\s(?!(bt)|(bis)|(ter)))|(?<!(\d\sa)|(\d\sb)|(\d\sbis)|(\d\ster)|(\d)|(\da)|(\db)|(\dbis)|(\dter)|(\d\s)|(\da\s)|(\db\s)|(\dbis\s)|(\dter\s)|(\d\sa\s)|(\d\sb\s)|(\d\sbis\s)|(\d\ster\s))\+)"
 #     # Remplacement de chaque séparateur par un pipe ("|")
 #     subst = "|"
 #     repl = regex.sub(expr, subst, value, 0, regex.MULTILINE | regex.IGNORECASE)
 #     # Formattage (supression des espaces insécables et des ":")
 #     filtered = regex.sub('\\xa0|:', '', repl, 0, regex.MULTILINE | regex.IGNORECASE)
 #     # Séparation en liste
 #     splitted = regex.split('\|', filtered, regex.MULTILINE | regex.IGNORECASE)
 #     # Suppression des espaces en début et fin de chaîne
 #     stripped = [x.strip() for x in splitted]
 #     # Suppression des "None" de la liste (failsafe, ne devrait pas être nécessaire)
 #     result = list(filter(None, stripped))
 #     return result
 class PerilsItem(scrapy.Item):
    adrs = scrapy.Field()
    dernierA = scrapy.Field()
    As = scrapy.Field()
    raw = scrapy.Field()
    pass
--- a/src/perils/middlewares.py
+++ b/src/perils/middlewares.py
@ -0,0 +1,141 @@
 # Define here the models for your spider middleware
 #
 # See documentation in:
 # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
 from scrapy import signals
 # useful for handling different item types with a single interface
 from itemadapter import is_item, ItemAdapter
 # from copy import deepcopy
 # import regex
 # class SplitByStreet:
 #         @classmethod
 #         # Une fonction qui sépare les adresses en fonction des noms de rue
 #         def splitStreet(self, value):
 #             # Le regex qui évalue là où il faut séparer la chaîne de caractère 
 #             # (présence de "et / - + ainsi que" 
 #             # sans que ce séparateur ne soit placé près d'un groupe de numéros de la même rue)
 #             expr = r"((?<!(\d\sa)|(\d\sb)|(\d\sbis)|(\d\ster)|(\d)|(\da)|(\db)|(\dbis)|(\dter)|(\d\s)|(\da\s)|(\db\s)|(\dbis\s)|(\dter\s)|(\d\sa\s)|(\d\sb\s)|(\d\sbis\s)|(\d\ster\s))\/|((?<!(\d))\s\bet)|(ainsi\sque)|((?<!(\d))\s-\s(?!(bt)|(bis)|(ter)))|(?<!(\d\sa)|(\d\sb)|(\d\sbis)|(\d\ster)|(\d)|(\da)|(\db)|(\dbis)|(\dter)|(\d\s)|(\da\s)|(\db\s)|(\dbis\s)|(\dter\s)|(\d\sa\s)|(\d\sb\s)|(\d\sbis\s)|(\d\ster\s))\+)"
 #             # Remplacement de chaque séparateur par un pipe ("|")
 #             subst = "|"
 #             repl = regex.sub(expr, subst, value, 0, regex.MULTILINE | regex.IGNORECASE)
 #             # Formattage (supression des espaces insécables et des ":")
 #             filtered = regex.sub('\\xa0|:', '', repl, 0, regex.MULTILINE | regex.IGNORECASE)
 #             # Séparation en liste
 #             splitted = regex.split('\|', filtered, regex.MULTILINE | regex.IGNORECASE)
 #             # Suppression des espaces en début et fin de chaîne
 #             stripped = [x.strip() for x in splitted]
 #             # Suppression des "None" de la liste (failsafe, ne devrait pas être nécessaire)
 #             value = list(filter(None, stripped))
 #             return value
 #         def process_spider_output(self, response, result, spider):
 #             for r in result:
 #                 adresses = r.pop("adrs")
 #                 adresses = adresses[0]
 #                 indiv = self.splitStreet(adresses)
 #                 index = 0
 #                 for i in indiv:
 #                     d = {"adrs":indiv[index]}
 #                     index += 1
 #                     yield d
 class PerilsSpiderMiddleware:
    # Not all methods need to be defined. If a method is not defined,
    # scrapy acts as if the spider middleware does not modify the
    # passed objects.
    @classmethod
    def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
    def process_spider_input(self, response, spider):
        # Called for each response that goes through the spider
        # middleware and into the spider.
        # Should return None or raise an exception.
        return None
    def process_spider_output(self, response, result, spider):
        # Called with the results returned from the Spider, after
        # it has processed the response.
        # Must return an iterable of Request, or item objects.
        for i in result:
            yield i
    def process_spider_exception(self, response, exception, spider):
        # Called when a spider or process_spider_input() method
        # (from other spider middleware) raises an exception.
        # Should return either None or an iterable of Request or item objects.
        pass
    def process_start_requests(self, start_requests, spider):
        # Called with the start requests of the spider, and works
        # similarly to the process_spider_output() method, except
        # that it doesn’t have a response associated.
        # Must return only requests (not items).
        for r in start_requests:
            yield r
    def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
 class PerilsDownloaderMiddleware:
    # Not all methods need to be defined. If a method is not defined,
    # scrapy acts as if the downloader middleware does not modify the
    # passed objects.
    @classmethod
    def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s
    def process_request(self, request, spider):
        # Called for each request that goes through the downloader
        # middleware.
        # Must either:
        # - return None: continue processing this request
        # - or return a Response object
        # - or return a Request object
        # - or raise IgnoreRequest: process_exception() methods of
        #   installed downloader middleware will be called
        return None
    def process_response(self, request, response, spider):
        # Called with the response returned from the downloader.
        # Must either;
        # - return a Response object
        # - return a Request object
        # - or raise IgnoreRequest
        return response
    def process_exception(self, request, exception, spider):
        # Called when a download handler or a process_request()
        # (from other downloader middleware) raises an exception.
        # Must either:
        # - return None: continue processing this exception
        # - return a Response object: stops process_exception() chain
        # - return a Request object: stops process_exception() chain
        pass
    def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)
--- a/src/perils/pipelines.py
+++ b/src/perils/pipelines.py
@ -0,0 +1,13 @@
 # Define your item pipelines here
 #
 # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 # useful for handling different item types with a single interface
 from itemadapter import ItemAdapter
 class PerilsPipeline:
    def process_item(self, item, spider):
        return item
--- a/src/perils/settings.py
+++ b/src/perils/settings.py
@ -0,0 +1,93 @@
 # Scrapy settings for perils project
 #
 # For simplicity, this file contains only settings considered important or
 # commonly used. You can find more settings consulting the documentation:
 #
 #     https://docs.scrapy.org/en/latest/topics/settings.html
 #     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
 #     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
 BOT_NAME = 'perils'
 SPIDER_MODULES = ['perils.spiders']
 NEWSPIDER_MODULE = 'perils.spiders'
 # Crawl responsibly by identifying yourself (and your website) on the user-agent
 #USER_AGENT = 'perils (+http://www.yourdomain.com)'
 # Obey robots.txt rules
 ROBOTSTXT_OBEY = True
 # Configure maximum concurrent requests performed by Scrapy (default: 16)
 #CONCURRENT_REQUESTS = 32
 # Configure a delay for requests for the same website (default: 0)
 # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
 # See also autothrottle settings and docs
 #DOWNLOAD_DELAY = 3
 # The download delay setting will honor only one of:
 #CONCURRENT_REQUESTS_PER_DOMAIN = 16
 #CONCURRENT_REQUESTS_PER_IP = 16
 # Disable cookies (enabled by default)
 #COOKIES_ENABLED = False
 # Disable Telnet Console (enabled by default)
 #TELNETCONSOLE_ENABLED = False
 # Override the default request headers:
 #DEFAULT_REQUEST_HEADERS = {
 #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 #   'Accept-Language': 'en',
 #}
 # Enable or disable spider middlewares
 # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
 SPIDER_MIDDLEWARES = {
 #    'perils.middlewares.PerilsSpiderMiddleware': 543,
    'perils.splittermidware.SplitAndSort': 543,
 }
 # Enable or disable downloader middlewares
 # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
 #DOWNLOADER_MIDDLEWARES = {
 #    'perils.middlewares.PerilsDownloaderMiddleware': 543,
 #}
 # Enable or disable extensions
 # See https://docs.scrapy.org/en/latest/topics/extensions.html
 #EXTENSIONS = {
 #    'scrapy.extensions.telnet.TelnetConsole': None,
 #}
 # Configure item pipelines
 # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 #ITEM_PIPELINES = {
 #    'perils.pipelines.PerilsPipeline': 300,
 #}
 # Enable and configure the AutoThrottle extension (disabled by default)
 # See https://docs.scrapy.org/en/latest/topics/autothrottle.html
 #AUTOTHROTTLE_ENABLED = True
 # The initial download delay
 #AUTOTHROTTLE_START_DELAY = 5
 # The maximum download delay to be set in case of high latencies
 #AUTOTHROTTLE_MAX_DELAY = 60
 # The average number of requests Scrapy should be sending in parallel to
 # each remote server
 #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
 # Enable showing throttling stats for every response received:
 #AUTOTHROTTLE_DEBUG = False
 # Enable and configure HTTP caching (disabled by default)
 # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
 #HTTPCACHE_ENABLED = True
 #HTTPCACHE_EXPIRATION_SECS = 0
 #HTTPCACHE_DIR = 'httpcache'
 #HTTPCACHE_IGNORE_HTTP_CODES = []
 #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
 # Set settings whose default value is deprecated to a future-proof value
 REQUEST_FINGERPRINTER_IMPLEMENTATION = '2.7'
 TWISTED_REACTOR = 'twisted.internet.asyncioreactor.AsyncioSelectorReactor'
--- a/src/perils/spiders/init.py
+++ b/src/perils/spiders/init.py
@ -0,0 +1,4 @@
 # This package will contain the spiders of your Scrapy project
 #
 # Please refer to the documentation for information on how to create and manage
 # your spiders.
--- a/src/perils/spiders/scraperils.py
+++ b/src/perils/spiders/scraperils.py
@ -0,0 +1,30 @@
 import scrapy
 from perils.items import PerilsItem
 # from scrapy.loader import ItemLoader
 class ScrapePerils(scrapy.Spider):
    name = "perils"
    start_urls = ["https://www.marseille.fr/logement-urbanisme/am%C3%A9lioration-de-lhabitat/arretes-de-peril"]
    def parse(self, response):
        for adresses in response.xpath('//div[@class="card"]//li|//div[@class="card"]//li/p'):
                item = PerilsItem()
                # l = ItemLoader(item = PerilsItem(), selector=adresses)
                # l.add_xpath('adrs', './text()')
                # l.add_xpath('dernierA', './a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()')
                # if adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get() is None:
                #     l.add_xpath('dernierA', './a[last()]/text()')
                item['adrs'] = adresses.xpath('./text()').get(),
                item['dernierA'] = adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get(),
                if adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get() is None:
                   item['dernierA'] = adresses.xpath('./a[last()]/text()').get(),
                item['As'] = adresses.xpath('./a/text()').getall()
                item['raw'] = adresses.xpath('.').get()
                yield item
 #response.xpath('//div[@class="card"]//li/text()[1]|//div[@class="card"]//li/p/text()[1]').getall()
--- a/src/perils/spiders/scraperils.py.bak
+++ b/src/perils/spiders/scraperils.py.bak
@ -0,0 +1,13 @@
 import scrapy
 class ScrapePerils(scrapy.Spider):
    name = "perils"
    start_urls = ["https://www.marseille.fr/logement-urbanisme/am%C3%A9lioration-de-lhabitat/arretes-de-peril"]
    def parse(self, response):
        for adresses in response.xpath('//div[@class="card"]//li'):
            yield {
                'adresse': adresses.xpath('./text()').get(),
                'dernier arrêté hors modificatif' : adresses.xpath('./a[not(contains(translate(.,"MODIF","modif"),"modif"))][last()]/text()').get(),
                'dernier arrêté' : adresses.xpath('./a[last()]/text()').get(),
                }
--- a/src/perils/splittermidware.py
+++ b/src/perils/splittermidware.py
@ -0,0 +1,211 @@
 # Dépendances
 from scrapy import signals
 # Python a une fonctionnalité regex native (re.py) mais elle a quelques limitations qui dans le cas présent rendent les regex inopérants
 # On importe donc regex.py (pip install regex) qui étend les capacités de re.py (notamment la tolérance aux erreurs)
 import regex
 class SplitAndSort:
        @classmethod
        # Une fonction qui sépare les adresses selon les noms de rue
        def splitStreet(self, value):
            # Le regex qui évalue là où il faut séparer la chaîne de caractère 
            # (présence de "et / - + ainsi que" )
            # (sans que ce séparateur ne soit placé près d'un groupe de numéros de la même rue)
            expr = r"((?<!(\d\sa)|(\d\sb)|(\d\sbis)|(\d\ster)|(\d)|(\da)|(\db)|(\dbis)|(\dter)|(\d\s)|(\da\s)|(\db\s)|(\dbis\s)|(\dter\s)|(\d\sa\s)|(\d\sb\s)|(\d\sbis\s)|(\d\ster\s))(–|\bet)|(?<!(\d\sa)|(\d\sb)|(\d\sbis)|(\d\ster)|(\d)|(\da)|(\db)|(\dbis)|(\dter)|(\d\s)|(\da\s)|(\db\s)|(\dbis\s)|(\dter\s)|(\d\sa\s)|(\d\sb\s)|(\d\sbis\s)|(\d\ster\s))\/|(ainsi\sque)|((?<!(\d))\s-\s(?!(bt)|(bis)|(ter)))|(?<!(\d\sa)|(\d\sb)|(\d\sbis)|(\d\ster)|(\d)|(\da)|(\db)|(\dbis)|(\dter)|(\d\s)|(\da\s)|(\db\s)|(\dbis\s)|(\dter\s)|(\d\sa\s)|(\d\sb\s)|(\d\sbis\s)|(\d\ster\s))\+)"
            # Remplacement de chaque séparateur par un pipe ("|")
            subst = "|"
            repl = regex.sub(expr, subst, value, 0, regex.MULTILINE | regex.IGNORECASE)
            # Formattage (supression des espaces insécables et des ":")
            filtered = regex.sub('\\xa0|:', '', repl, 0, regex.MULTILINE | regex.IGNORECASE)
            # Séparation en liste
            splitted = regex.split('\|', filtered, regex.MULTILINE | regex.IGNORECASE)
            # Suppression des espaces en début et fin de chaîne
            stripped = [x.strip() for x in splitted]
            # Suppression des "None" de la liste (failsafe, ne devrait pas être nécessaire)
            result = list(filter(None, stripped))
            return result
        # Une fonction qui valide tout ce qui est un couple n°/nom de rue
        def isWorth(self, value):
            # Le regex qui évalue ce qu'il faut garder
            expr = r"(?=.*[a-z])(?=.*\d+).*"
            # Un regex contre lequel comparer avant pour supprimer les exceptions 
            # (cas d'usage : "jardin public du 19 mars 1962" est reconnu comme un couple n°/nom de rue)
            # ce n'est pas très sexy, il vaudrait mieux un regex plus strict,
            # mais il serait trop complexe pour que ça en vaille la peine pour de rares exceptions
            # (possibilité d'en ajouter d'autres ici par la suite si besoin)
            xcp = r"(jardin public)|(Bâtiment 12)"
            # Le résultat (booléen) si le regex match ou non, après comparaison avec les exceptions
            if bool(regex.findall(xcp, value, regex.MULTILINE | regex.IGNORECASE)) is False :
                result =bool(regex.findall(expr, value, regex.MULTILINE | regex.IGNORECASE))
            else :
                result = False
            return result
        # Une fonction qui enlève tous les trucs peu utiles ou encombrants des adresses 
        # ("immeuble", infos en parenthèses, infos sur l'arrêté, etc)
        def removeClutter(self,value):
            # Le regex qui évalue ce qu'il faut retirer
            expr = r"\(.*\)|Immeuble|Chapelle,|Arrêté.*|\((.*){0,3}|"
            # Le résultat après retrait
            result = regex.sub(expr, '', value, regex.IGNORECASE | regex.MULTILINE)
            stripped = result.strip()
            return stripped
        # Une fonction qui sépare les numéros des adresses
        def separateNbr(self,value):
            # Le regex qui évalue ce qui est un numéro ou un groupe de numéros
            expr = r"(((\d).*(\da\b))(?=\s?\S?.{8,})|((\d).*(\db\b))(?=\s?\S?.{8,})|((\d).*(\dt\b))(?=\s?\S?.{8,})|((\d).*(\dbis\b))(?=\s?\S?.{8,})|((\d).*(\dter\b))(?=\s?\S?.{8,})|((\d).*(\d))(?=\s?\S?.{8,})|(\da\b)(?=\s?\S?.{8,})|(\db\b)(?=\s?\S?.{8,})|(\dbis\b)(?=\s?\S?.{8,})|(\dt\b)(?=\s?\S?.{8,})|(\dter\b)(?=\s?\S?.{8,})|(\d)(?=\s?\S?.{8,}))"
            # On extrait le groupe numérique et on le place dans la variable "nbr"
            sep = regex.search(expr, value, regex.IGNORECASE | regex.MULTILINE)
            nbr = sep[0]
            # On supprime le texte de la variable "nbr" de l'adresse (et on enlève les espaces en trop)
            invexpr = regex.compile(nbr)
            name = regex.sub(invexpr, '', value, regex.IGNORECASE | regex.MULTILINE)
            name = name.strip()
            # On retourne une liste avec le groupe numérique d'un côté, le nom de rue de l'autre
            return (nbr,name)
        # Une fonction qui sépare les numéros (hors rangées)
        def splitNumber(self, value):
            # Le regex qui évalue là où il faut séparer les numéros
            expr = r"((?<=\d)\s?(&|et|_|-|\/|\+|,)\s?(?=\d))|((?<=\da)\s?(&|et|_|-|\/|\+|,)\s?(?=\d))|((?<=\db)\s?(&|et|_|-|\/|\+|,)\s?(?=\d))|((?<=\dt)\s?(&|et|_|-|\/|\+|,)\s?(?=\d))|((?<=\dbis)\s?(&|et|_|-|\/|\+|,)\s?(?=\d))|((?<=\dter)\s?(&|et|_|-|\/|\+|,)\s?(?=\d))"
            # Remplacement de chaque séparateur par un pipe ("|")
            repl = regex.sub(expr, "|", value, regex.MULTILINE | regex.IGNORECASE)
            # Séparation en liste
            splitted = regex.split('\|', repl, regex.MULTILINE | regex.IGNORECASE)
            # Suppression des espaces en début et fin de chaîne
            stripped = [x.strip() for x in splitted]
            # Suppression des "None" de la liste (failsafe, ne devrait pas être nécessaire)
            value = list(filter(None, stripped))
            return value
        #Une fonction qui interpole les numéros manquants à partir des rangées
        def splitRanges(self,value):
            # Le regex qui évalue là où il faut séparer les numéros
            expr = r"((?<=\d)\s?(au|à)\s?(?=\d))|((?<=\da)\s?(au|à)\s?(?=\d))|((?<=\db)\s?(au|à)\s?(?=\d))|((?<=\dt)\s?(au|à)\s?(?=\d))|((?<=\dbis)\s?(au|à)\s?(?=\d))|((?<=\dter)\s?(au|à)\s?(?=\d))"
            # Remplacement de chaque séparateur par un pipe ("|")
            repl = regex.sub(expr, "|", value, regex.MULTILINE | regex.IGNORECASE)
            # Séparation en liste
            splitted = regex.split('\|', repl, regex.MULTILINE | regex.IGNORECASE)
            # Suppression des espaces en début et fin de chaîne
            stripped = [x.strip() for x in splitted]
            # Suppression des "None" de la liste (failsafe, ne devrait pas être nécessaire)
            listed = list(filter(None, stripped))
            # Si c'est effectivement une rangée :
            if len(listed) > 1 :
                # On initie une liste vide et un index correspondant au premier numéro de la rangée
                rawList = []
                index = int(listed[0])
                # Tant que l'index n'est pas supérieur au dernier numéro de la rangée...
                while index <= int(listed[1]):
                    # Inscrire l'index dans la liste
                    rawList.append(index)
                    # augmenter l'index d'un numéro
                    index += 1
                    # Un petit check pour supprimer les numéros pairs ou impairs selon la parité du premier numéro.
                    if int(listed[0])%2 == 0 :
                        for i in rawList :
                            if i%2 != 0 :
                                rawList.remove(i)
                    if int(listed[0])%2 != 0 :
                        for i in rawList :
                            if i%2 == 0 :
                                rawList.remove(i)
                return rawList
            else :
                return value
        def removeAbrog(self,value):
            expr = r"(?:((main\s?-?levée)|(abrog))){i<=2,d<=2,e<=3}"
            part = r"(?:(partiel)){i<=2,d<=2,e<=3}"
            if bool(regex.findall(expr, value, regex.MULTILINE | regex.IGNORECASE)):
                if bool(regex.findall(part, value, regex.MULTILINE | regex.IGNORECASE)):
                    return 1 # partiellement abrogé
                else :
                    return 2 # abrogé
            else :
                return 0
        def typeOf(self,value):
            # mise en sécurité
            peril = r"(?:((?<!périmètre)(sécurité|péril))){i<=2,d<=2,e<=2}"
            # périmètre de sécurité
            perimetre = r"(?:(périmètre)){i<=2,d<=2,e<=3}"
            # imminent
            urgent = r"(?:(urgent|imminent|grave)){i<=2,d<=2,e<=3}"
            # interdiction d'occupation 
            interdi = r"(?:((?<=(occup)).*(interdi)|(?<=(util)).*(interdi)|(interdi).*(?=(occup))|(interdi).*(?=(util)))){i<=2,d<=2,e<=3}"
            # déconstruction
            deconstr = r"(?:(déconstr)){i<=2,d<=2,e<=3}"
            # astreinte administrative 
            astr = r"(?:(astreinte)){i<=2,d<=2,e<=3}"
            if bool(regex.findall(perimetre, value, regex.MULTILINE | regex.IGNORECASE)):
                return "Périmètre de sécurité"
            elif bool(regex.findall(interdi, value, regex.MULTILINE | regex.IGNORECASE)):
                return "Interdiction d'occupation"
            elif bool(regex.findall(deconstr, value, regex.MULTILINE | regex.IGNORECASE)):
                return "Déconstruction"
            elif bool(regex.findall(astr, value, regex.MULTILINE | regex.IGNORECASE)):
                return "Astreinte administrative"
            elif bool(regex.findall(peril, value, regex.MULTILINE | regex.IGNORECASE)):
                if bool(regex.findall(urgent, value, regex.MULTILINE | regex.IGNORECASE)):
                    return "Péril imminent"
                else :
                    return "Péril"
            else :
                return ""
        # La fonction principale de traitement du résultat de la requête de scrapy avant yield
        def process_spider_output(self, response, result, spider):
            # Pour chaque résultat individuel dans le résultat de la requête :
            for r in result:
                # Extraire le texte brut de l'adresse et le stocker dans la variable "adresses"
                adresses = r.pop('adrs')
                # Extraire le dernier arrêté et le stocker dans la variable "dernA"
                dernA = r.pop('dernierA')
                As = r.pop('As')
                raw = r.pop('raw')
                # Ne prendre que le premier index de la liste 
                # il n'y en a qu'un de toute manière, c'est pour extraire le texte de l'objet car regex.py ne sait pas
                # traiter les objets tuple et scrapy retourne un objet tuple, pas seulement une chaîne de caractères
                adresses = adresses[0]
                dernA = dernA[0]
                # Si l'adresse est non-nulle :
                # (pour éviter que regex.py ne plante à cause d'un objet de type None au lieu d'une chaîne de caractères)
                # (ça supprime par la même occasion les arrêtés sans adresse, à date il n'y en a qu'un, sur un passage privé)
                if dernA :
                    abrog = self.removeAbrog(dernA)
                    if abrog == 1:
                        typeof = "(Mainlevée partielle) "+ self.typeOf(dernA)
                    elif abrog == 2:
                        typeof = "Mainlevée" 
                    else :
                        typeof = self.typeOf(dernA)
                if adresses :
                    # On apelle la fonction splitStreet pour séparer les rues dans la variable "adresses" et les inscrire dans la liste "indiv"
                    indiv = self.splitStreet(adresses)
                    # Pour chaque adresse de la liste d'adresses "indiv" précemment obtenue :
                    for i in indiv:
                        # Si l'adresse est utile
                        if self.isWorth(i) :
                            iClean = self.removeClutter(i)
                            iSep = self.separateNbr(iClean)
                            iNbrs = self.splitNumber(iSep[0])
                            for r in iNbrs:
                                x = self.splitRanges(r)
                                print(x)
                                if isinstance(x, list):
                                    for n in x:
                                        yield {'N°':n,'Nom de rue':iSep[1],'Statut':typeof,'Dernier arrêté (hors modificatif)':dernA,'Arrêtés':As,'Données brutes':raw,"QGIS-RAW":str(n)+" "+iSep[1],"QGIS-City":"Marseille","QGIS-Country":"France"}
                                else :
                                    yield {'N°':r,'Nom de rue':iSep[1],'Statut':typeof,'Dernier arrêté (hors modificatif)':dernA,'Arrêtés':As,'Données brutes':raw,"QGIS-RAW":str(r)+" "+iSep[1],"QGIS-City":"Marseille","QGIS-Country":"France"}
--- a/src/scrapy.cfg
+++ b/src/scrapy.cfg
@ -0,0 +1,11 @@
 # Automatically created by: scrapy startproject
 #
 # For more information about the [deploy] section see:
 # https://scrapyd.readthedocs.io/en/latest/deploy.html
 [settings]
 default = perils.settings
 [deploy]
 #url = http://localhost:6800/
 project = perils