Documentation
¶
Index ¶
- func If(condition bool, trueVal, falseVal interface{}) interface{}
- type CustomForm
- type Field
- type Form
- func (f *Form) DealCoding(html string, header http2.Header) (string, error)
- func (f *Form) DownImg(url string, item Field, res *sync.Map) string
- func (f *Form) GetCharsetByContentType(contentType string) string
- func (f *Form) GetDir(path string, res *sync.Map) string
- func (f *Form) GetHref(href string) string
- func (f *Form) ResolveSelector(html string, selector map[string]Field, originUrl string) (map[string]string, error)
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
Types ¶
type CustomForm ¶ added in v2.0.1
type CustomForm struct {
Host string //网站域名
Channel string //栏目链接,页码用[PAGE]替换
PageStart int //页码起始页
Length int //爬取页码长度
ListSelector string //列表选择器
HrefSelector string //a链接选择器,相对于列表选择器
DisableAutoCoding bool //是否自动转码
LazyImageAttrName string //懒加载图片属性,默认为data-original
DisableImageExtensionCheck bool //禁用图片拓展名检查,禁用后所有图片拓展名强制为png
AllowImageExtension []string //允许下载的图片拓展名
DefaultImg func(form *Form, item Field) string //图片出错时,设置默认图片
DetailFields map[string]Field //详情页面字段选择器
ListFields map[string]Field //列表页面字段选择器,暂不支持api爬取
CustomExcelHeader bool //自定义Excel表格头部
DetailCoroutineNumber int //爬取详情页协程数
HttpTimeout time.Duration //请求超时时间
HttpHeader map[string]string //header
}
type Field ¶
type Field struct {
Types fileTypes.FieldTypes
Selector string //字段选择器
AttrKey string //属性值参数
ImagePrefix func(form *Form, path string) string //图片路径前缀,会添加到图片路径前缀,但不会生成文件夹
ImageDir string //图片子文件夹,支持变量 1.[date:Y-m-d] 2.[random:1-100] 3.[singleField:title]
ExcelHeader string //excel表头,需要CustomExcelHeader为true,例:A
}
type Form ¶
type Form struct {
Host string //网站域名
Channel string //栏目链接,页码用[PAGE]替换
PageStart int //页码起始页
Length int //爬取页码长度
Client *request.Client //http客户端
ListSelector string //列表选择器
HrefSelector string //a链接选择器,相对于列表选择器
Mode mode.Mode
DisableAutoCoding bool //是否自动转码
Notice *notice.Notice
Wait sync.WaitGroup
LazyImageAttrName string //懒加载图片属性,默认为data-original
DisableImageExtensionCheck bool //禁用图片拓展名检查,禁用后所有图片拓展名强制为png
AllowImageExtension []string //允许下载的图片拓展名
DefaultImg func(form *Form, item Field) string //图片出错时,设置默认图片
DetailFields map[string]Field //详情页面字段选择器
ListFields map[string]Field //列表页面字段选择器,暂不支持api爬取
Storage chan map[string]string //数据结果通道
CustomExcelHeader bool //自定义Excel表格头部
DetailCoroutineNumber int //爬取详情页协程数
DetailCoroutineChan chan bool //限制详情页并发chan
DetailWait sync.WaitGroup
HttpTimeout time.Duration //请求超时时间
HttpHeader map[string]string //header
DetailSize int //每个列表的详情数量
Total int //预计爬取总数
CurrentIndex int //当前爬取数量
}
func (*Form) DealCoding ¶
DealCoding 解决编码问题
func (*Form) GetCharsetByContentType ¶
GetCharsetByContentType 从contentType中获取编码
Click to show internal directories.
Click to hide internal directories.