diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java index eefd91bb5..58aabc1ef 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Request.java @@ -1,6 +1,12 @@ package us.codecraft.webmagic; +import java.util.ArrayList; +import java.util.List; +import us.codecraft.webmagic.downloader.Downloader; import us.codecraft.webmagic.model.HttpRequestBody; +import us.codecraft.webmagic.pipeline.Pipeline; +import us.codecraft.webmagic.processor.PageProcessor; +import us.codecraft.webmagic.scheduler.Scheduler; import us.codecraft.webmagic.utils.Experimental; import java.io.Serializable; @@ -53,6 +59,14 @@ public class Request implements Serializable { private String charset; + private Downloader downloader; + + private PageProcessor pageProcessor; + + private Scheduler scheduler; + + private List pipelines = new ArrayList(); + public Request() { } @@ -188,6 +202,38 @@ public Request setCharset(String charset) { return this; } + public Downloader getDownloader() { + return downloader; + } + + public void setDownloader(Downloader downloader) { + this.downloader = downloader; + } + + public PageProcessor getPageProcessor() { + return pageProcessor; + } + + public void setPageProcessor(PageProcessor pageProcessor) { + this.pageProcessor = pageProcessor; + } + + public Scheduler getScheduler() { + return scheduler; + } + + public void setScheduler(Scheduler scheduler) { + this.scheduler = scheduler; + } + + public List getPipelines() { + return pipelines; + } + + public void addPipelines(Pipeline pipeline) { + this.pipelines.add(pipeline); + } + @Override public String toString() { return "Request{" + diff --git a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java index 62c989f1d..ac124b408 100644 --- a/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java +++ b/webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java @@ -401,7 +401,11 @@ public void test(String... urls) { } private void processRequest(Request request) { - Page page = downloader.download(request, this); + Downloader dl = request.getDownloader(); + if(null == dl){ + dl = downloader; + } + Page page = dl.download(request, this); if (page.isDownloadSuccess()){ onDownloadSuccess(request, page); } else { @@ -411,10 +415,18 @@ private void processRequest(Request request) { private void onDownloadSuccess(Request request, Page page) { if (site.getAcceptStatCode().contains(page.getStatusCode())){ - pageProcessor.process(page); + PageProcessor pp = request.getPageProcessor(); + if(null == pp){ + pp = pageProcessor; + } + pp.process(page); extractAndAddRequests(page, spawnUrl); if (!page.getResultItems().isSkip()) { - for (Pipeline pipeline : pipelines) { + List ps = request.getPipelines(); + if(ps.isEmpty()){ + ps.addAll(pipelines); + } + for (Pipeline pipeline : ps) { pipeline.process(page.getResultItems(), this); } } @@ -468,7 +480,11 @@ private void addRequest(Request request) { if (site.getDomain() == null && request != null && request.getUrl() != null) { site.setDomain(UrlUtils.getDomain(request.getUrl())); } - scheduler.push(request, this); + Scheduler sc = request.getScheduler(); + if(null == sc){ + sc = scheduler; + } + sc.push(request, this); } protected void checkIfRunning() {