Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var AreaCodes2018 = &spider.Spider{ Name: "2018年统计用区划代码和城乡划分代码", Description: "2018年统计用区划代码和城乡划分代码。间隔不要小于100ms,不然容易触发验证码导致失败。总数据大概71万(暂停时长100ms,耗时2小时),所以适当做数据分批输出,不然出现内存溢出。", EnableCookie: false, RuleTree: &spider.RuleTree{ Root: func(ctx *spider.Context) { ctx.AddQueue(&request.Request{ URL: "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2018/index.html", Rule: "省", }) }, Trunk: map[string]*spider.Rule{ "省": { ItemFields: []string{ "名称", "代码", "级别", "上级", }, ParseFunc: func(ctx *spider.Context) { baseURL := ctx.GetRequest().URL baseURL = baseURL[:strings.LastIndex(baseURL, "/")+1] query := ctx.GetDom() query.Find("tr.provincetr").Each(func(i int, tr *goquery.Selection) { tr.Find("td a").Each(func(j int, a *goquery.Selection) { if url := a.Attr("href"); url.IsSome() { u := url.Unwrap() code := strings.Split(u, ".")[0] u = baseURL + u ctx.Output(map[int]interface{}{ 0: a.Text(), 1: code, 2: 0, 3: 0, }) ctx.AddQueue(&request.Request{URL: u, Rule: "市", Temp: request.Temp{"level": 0, "parent": code}}) } }) }) }, }, "市": { ItemFields: []string{ "名称", "代码", "级别", "上级", }, ParseFunc: func(ctx *spider.Context) { baseURL := ctx.GetRequest().URL baseURL = baseURL[:strings.LastIndex(baseURL, "/")+1] level := ctx.GetRequest().Temp["level"].(int) + 1 parent := ctx.GetRequest().Temp["parent"].(string) query := ctx.GetDom() if level == 4 { myCode := "" query.Find("tr.villagetr td").Each(func(i int, td *goquery.Selection) { if i%3 == 0 { myCode = td.Text() } if i%3 == 2 { ctx.Output(map[int]interface{}{ 0: td.Text(), 1: myCode, 2: level, 3: parent, }) } }) } else { myCode := "" query.Find("tr td a").Each(func(i int, a *goquery.Selection) { if i%2 == 0 { myCode = a.Text() } if i%2 == 1 { if url := a.Attr("href"); url.IsSome() { u := url.Unwrap() code := strings.Split(strings.Split(u, "/")[1], ".")[0] u = baseURL + u ctx.Output(map[int]interface{}{ 0: a.Text(), 1: myCode, 2: level, 3: parent, }) ctx.AddQueue(&request.Request{URL: u, Rule: "市", Temp: request.Temp{"level": level, "parent": code}}) } } }) } }, }, }, }, }
AreaCodes2018 2018 statistical area codes and urban-rural division codes
creatTime: 2019-09-06 09:23:55 author: hailaz
Functions ¶
This section is empty.
Types ¶
This section is empty.
Click to show internal directories.
Click to hide internal directories.