rules

package
v1.3.5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 28, 2026 License: Apache-2.0 Imports: 4 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var AreaCodes2018 = &spider.Spider{
	Name:        "2018年统计用区划代码和城乡划分代码",
	Description: "2018年统计用区划代码和城乡划分代码。间隔不要小于100ms,不然容易触发验证码导致失败。总数据大概71万(暂停时长100ms,耗时2小时),所以适当做数据分批输出,不然出现内存溢出。",

	EnableCookie: false,
	RuleTree: &spider.RuleTree{
		Root: func(ctx *spider.Context) {
			ctx.AddQueue(&request.Request{
				Url:  "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2018/index.html",
				Rule: "省",
			})
		},

		Trunk: map[string]*spider.Rule{
			"省": {
				ItemFields: []string{
					"名称",
					"代码",
					"级别",
					"上级",
				},
				ParseFunc: func(ctx *spider.Context) {
					baseUrl := ctx.GetRequest().Url
					baseUrl = baseUrl[:strings.LastIndex(baseUrl, "/")+1]
					query := ctx.GetDom()

					query.Find("tr.provincetr").Each(func(i int, tr *goquery.Selection) {

						tr.Find("td a").Each(func(j int, a *goquery.Selection) {
							if url, ok := a.Attr("href"); ok {
								code := strings.Split(url, ".")[0]
								url = baseUrl + url

								ctx.Output(map[int]interface{}{
									0: a.Text(),
									1: code,
									2: 0,
									3: 0,
								})
								ctx.AddQueue(&request.Request{Url: url, Rule: "市", Temp: request.Temp{"level": 0, "parent": code}})
							}
						})
					})

				},
			},
			"市": {
				ItemFields: []string{
					"名称",
					"代码",
					"级别",
					"上级",
				},
				ParseFunc: func(ctx *spider.Context) {
					baseUrl := ctx.GetRequest().Url
					baseUrl = baseUrl[:strings.LastIndex(baseUrl, "/")+1]
					level := ctx.GetRequest().Temp["level"].(int) + 1
					parent := ctx.GetRequest().Temp["parent"].(string)
					query := ctx.GetDom()
					if level == 4 {
						myCode := ""
						query.Find("tr.villagetr td").Each(func(i int, td *goquery.Selection) {
							if i%3 == 0 {
								myCode = td.Text()
							}
							if i%3 == 2 {
								ctx.Output(map[int]interface{}{
									0: td.Text(),
									1: myCode,
									2: level,
									3: parent,
								})

							}
						})
					} else {
						myCode := ""
						query.Find("tr td a").Each(func(i int, a *goquery.Selection) {
							if i%2 == 0 {
								myCode = a.Text()
							}
							if i%2 == 1 {
								if url, ok := a.Attr("href"); ok {
									code := strings.Split(strings.Split(url, "/")[1], ".")[0]
									url = baseUrl + url
									ctx.Output(map[int]interface{}{
										0: a.Text(),
										1: myCode,
										2: level,
										3: parent,
									})

									ctx.AddQueue(&request.Request{Url: url, Rule: "市", Temp: request.Temp{"level": level, "parent": code}})
								}
							}
						})
					}
				},
			},
		},
	},
}

AreaCodes2018 2018年统计用区划代码和城乡划分代码

creatTime:2019年09月06日 09:23:55 author:hailaz

Functions

This section is empty.

Types

This section is empty.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL