2018 Scrapy Environment Enhance(3)Docker ENV

2018ScrapyEnvironmentEnhance(3)DockerENV

SetUpScrapyUbuntuDEV

>sudoapt-getinstall-qypythonpython-devpython-distributepython-pipipython

>sudoapt-getinstall-qyfirefoxxvfb

>sudoapt-getinstall-qylibffi-devlibxml2-devlibxslt-devlib32z1-devlibssl-dev

>sudoapt-getinstallpython3-venv

>sudoapt-getinstallpython3-dev

>sudoaptinstallunzip

>sudoapt-getinstalllibxi6libgconf-2-4

>sudoapt-getinstalllibnss3libgconf-2-4

>sudoapt-getinstallchromium-browser

Ifneed,makeittorememberthegitusernameandpassword

>gitconfigcredential.helper'cache--timeout=300000'

CreatethevirtualENVandactivatethat

>python3-mvenv./env

>source./env/bin/activate

>pipinstall--upgradepip

>pipinstallseleniumpyvirtualdisplay

>pipinstallboto3

>pipinstallbeautifulsoup4requests

InstallTwisted

>wgethttp://twistedmatrix.com/Releases/Twisted/17.9/Twisted-17.9.0.tar.bz2

>tarxjfTwisted-17.9.0.tar.bz2

>pythonsetup.pyinstall

>pipinstalllxmlscrapyscrapyjs

InstallBrowserandDriver

>wgethttps://chromedriver.storage.googleapis.com/2.37/chromedriver_linux64.zip

>unzipchromedriver_linux64.zip

>chmoda+xchromedriver

>sudomvchromedriver/usr/local/bin/

>chromedriver--version

ChromeDriver2.37.544315(730aa6a5fdba159ac9f4c1e8cbc59bf1b5ce12b7)

>chromium-browser-version

Chromium65.0.3325.181BuiltonUbuntu,runningonUbuntu16.04

SetupTorNetworkProxy

>sudoapt-getinstalltor

>sudoapt-getinstallnetcat

>sudoapt-getinstallcurl

>sudoapt-getinstallprivoxy

CheckmyLocalIP

>curlhttp://icanhazip.com/

52.14.197.xxx

SetUpTor

>tor--hash-passwordprxxxxxxxx

16:01D5D02xxxxxxxxxxxxxxxxxxxxxxxxxxx

>cat/etc/tor/torrc

ControlPort9051

>cat/etc/tor/torrcpassword

HashedControlPassword16:01D5D02EFA3D6A5xxxxxxxxxxxxxxxxxxx

StartTor

>sudoservicetorstart

VerifyitchangemyIP

>torifycurlhttp://icanhazip.com/

192.36.27.4

Commanddoesnotworkhere

>echo-e'AUTHENTICATE"pricemonitor1234"\r\nsignalNEWNYM\r\nQUIT'|nc127.0.0.19051

TrytousePythontochangetheIP

>pipinstallstem

>python

Python3.5.2(default,Nov232017,16:37:01)

[GCC5.4.020160609]onlinux

Type"help","copyright","credits"or"license"formoreinformation.

>>>

>>>fromstemimportSignal

>>>fromstem.controlimportController

>>>withController.from_port(port=9051)ascontroller:

...controller.authenticate()

...controller.signal(Signal.NEWNYM)

...

Thatshouldworkifthepermissionisright.

ConfigtheProxy

>cat/etc/privoxy/config

forward-socks5t/127.0.0.1:9050.

StarttheService

>sudoserviceprivoxystart

VerifytheIP

>curl-x127.0.0.1:8118http://icanhazip.com/

185.220.101.6

VerifywithRequestAPI

>python

Python3.5.2(default,Nov232017,16:37:01)

[GCC5.4.020160609]onlinux

Type"help","copyright","credits"or"license"formoreinformation.

>>>

>>>

>>>importrequests

>>>response=requests.get('http://icanhazip.com/',proxies={'http':'127.0.0.1:8118'})

>>>response.text.strip()

'185.220.101.6'

ThinkAboutDockerApplication

Dockerfile

#Runascrapyserverside

#PreparetheOS

FROMubuntu:16.04

MAINTAINERCarlLuo<[email protected]>

ENVDEBIAN_FRONTENDnoninteractive

RUNapt-get-qqupdate

RUNapt-get-qqydist-upgrade

#Preparethedenpendencies

RUNapt-getinstall-qypython3python3-devpython-distributepython3-pipipython

RUNapt-getinstall-qyfirefoxxvfb

RUNpip3installseleniumpyvirtualdisplay

RUNpip3installboto3beautifulsoup4requests

RUNapt-getinstall-qylibffi-devlibxml2-devlibxslt-devlib32z1-devlibssl-dev

RUNpip3installlxmlscrapyscrapyjs

RUNpip3install--upgradepip

RUNapt-getinstall-qypython3-venv

RUNapt-getinstall-qylibxi6libgconf-2-4libnss3libgconf-2-4

RUNapt-getinstall-qychromium-browser

RUNapt-getinstall-qywgetunzipgit

#addtool

ADDinstall/chromedriver/usr/local/bin/

RUNpipinstallscrapyd

#copytheconfig

RUNmkdir-p/tool/scrapyd/

ADDconf/scrapyd.conf/tool/scrapyd/

#setuptheapp

EXPOSE6801

RUNmkdir-p/app/

ADDstart.sh/app/

WORKDIR/app/

CMD["./start.sh"]

Makefile

IMAGE=sillycat/public

TAG=ubuntu-scrapy-1.0

NAME=ubuntu-scrapy-1.0

docker-context:

build:docker-context

dockerbuild-t$(IMAGE):$(TAG).

run:

dockerrun-d-p6801:6801--name$(NAME)$(IMAGE):$(TAG)

debug:

dockerrun-p6801:6801--name$(NAME)-ti$(IMAGE):$(TAG)/bin/bash

clean:

dockerstop${NAME}

dockerrm${NAME}

logs:

dockerlogs${NAME}

publish:

dockerpush${IMAGE}

start.sh

#!/bin/sh-ex

#starttheservice

cd/tool/scrapyd/

scrapyd

Configurationinconf/scrapyd.conf

[scrapyd]

eggs_dir=eggs

logs_dir=logs

items_dir=

jobs_to_keep=100

dbs_dir=dbs

max_proc=0

max_proc_per_cpu=20

finished_to_keep=100

poll_interval=5.0

bind_address=0.0.0.0

http_port=6801

debug=off

runner=scrapyd.runner

application=scrapyd.app.application

launcher=scrapyd.launcher.Launcher

webroot=scrapyd.website.Root

[services]

schedule.json=scrapyd.webservice.Schedule

cancel.json=scrapyd.webservice.Cancel

addversion.json=scrapyd.webservice.AddVersion

listprojects.json=scrapyd.webservice.ListProjects

listversions.json=scrapyd.webservice.ListVersions

listspiders.json=scrapyd.webservice.ListSpiders

delproject.json=scrapyd.webservice.DeleteProject

delversion.json=scrapyd.webservice.DeleteVersion

listjobs.json=scrapyd.webservice.ListJobs

daemonstatus.json=scrapyd.webservice.DaemonStatus

References:

相关推荐