Skip to content

Latest commit

 

History

History
24 lines (17 loc) · 996 Bytes

she-zhi-pa-qu-can-shu.md

File metadata and controls

24 lines (17 loc) · 996 Bytes

设置爬虫参数

setAndGetSiteConfig方法是爬虫的配置方法。含义参看注释。

    @Override
    public SiteConfig setAndGetSiteConfig() {

        SiteConfig siteConfig = SiteConfig.create();

        siteConfig.setDomain("https://www.zhihu.com") //设置网站域名
                .setStartUrl("https://www.zhihu.com/people/zhang-jia-wei/followers") //设置爬虫起始url
                .setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36")
                .addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
                .addHeader("Accept-Encoding", "gzip, deflate, sdch, br")
                .addHeader("Accept-Language", "zh-CN, zh; q=0.8, en; q=0.6")
                .setThreadSleep(2000)  //设置线程休眠时间,单位ms
                .setThreadNum(3);  //设置线程数
        return siteConfig;
    }