rules

package
v1.4.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 3, 2026 License: Apache-2.0 Imports: 6 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var JDSpider = &spider.Spider{
	Name:        "京东搜索new",
	Description: "京东搜索结果 [search.jd.com]",

	Keyin:        spider.KEYIN,
	Limit:        spider.LIMIT,
	EnableCookie: false,
	RuleTree: &spider.RuleTree{
		Root: func(ctx *spider.Context) {

			ctx.Aid(map[string]interface{}{"Rule": "判断页数"}, "判断页数")
		},

		Trunk: map[string]*spider.Rule{

			"判断页数": {
				AidFunc: func(ctx *spider.Context, aid map[string]interface{}) interface{} {
					ctx.AddQueue(
						&request.Request{
							URL:  "http://search.jd.com/Search?keyword=" + ctx.GetKeyin() + "&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&bs=1&s=1&click=0&page=1",
							Rule: aid["Rule"].(string),
						},
					)
					return nil
				},
				ParseFunc: func(ctx *spider.Context) {
					query := ctx.GetDom()
					pageCount := 0
					query.Find("script").Each(func(i int, s *goquery.Selection) {
						if strings.Contains(s.Text(), "page_count") {
							re := regexp.MustCompile(`page_count:"\d{1,}"`)
							temp := re.FindString(s.Text())
							re = regexp.MustCompile(`\d{1,}`)
							temp2 := re.FindString(temp)
							pageCount, _ = strconv.Atoi(temp2)
						}
					})
					ctx.Aid(map[string]interface{}{"PageCount": pageCount}, "生成请求")
				},
			},

			"生成请求": {

				AidFunc: func(ctx *spider.Context, aid map[string]interface{}) interface{} {

					pageCount := aid["PageCount"].(int)

					for i := 1; i < pageCount; i++ {
						ctx.AddQueue(
							&request.Request{
								URL:  "http://search.jd.com/Search?keyword=" + ctx.GetKeyin() + "&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&bs=1&s=1&click=0&page=" + strconv.Itoa(i*2-1),
								Rule: "搜索结果",
							},
						)
						ctx.AddQueue(
							&request.Request{
								URL:  "http://search.jd.com/s_new.php?keyword=" + ctx.GetKeyin() + "&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&bs=1&s=31&scrolling=y&pos=30&page=" + strconv.Itoa(i*2),
								Rule: "搜索结果",
							},
						)
					}
					return nil
				},
			},

			"搜索结果": {

				ItemFields: []string{
					"标题",
					"价格",
					"评论数",
					"链接",
				},
				ParseFunc: func(ctx *spider.Context) {
					query := ctx.GetDom()

					query.Find(".gl-item").Each(func(i int, s *goquery.Selection) {

						a := s.Find(".p-name.p-name-type-2 > a")
						title := a.Text()

						re := regexp.MustCompile("\\<[\\S\\s]+?\\>")

						title = re.ReplaceAllString(title, " ")
						title = strings.Trim(title, " \t\n")

						price := s.Find(".p-price > strong > i").Text()

						discuss := s.Find(".p-commit > strong > a").Text()

						url := a.Attr("href").UnwrapOr("")
						url = "http:" + url

						if title != "" {
							ctx.Output(map[int]interface{}{
								0: title,
								1: price,
								2: discuss,
								3: url,
							})
						}
					})
				},
			},
		},
	},
}

Functions

This section is empty.

Types

This section is empty.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL