ptt

package module
v0.0.0-...-240e486 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 23, 2020 License: GPL-3.0 Imports: 15 Imported by: 0

README

Ptt-Crawler-Go

Implement Ptt Crawler in go with goroutine

Status: Developing

Usage

import (
	crawler "github.com/jameshwc/Ptt-Crawler-Go"
)
const (
	pages        = 5000
	numOfRoutine = 100
	storePath    = "dat/"
)

func main() {
	p := crawler.NewPTT(storePath, pages, numOfRoutine)
	p.CrawlBoard("Gossiping")
}

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func CrawlArticle

func CrawlArticle(url string) (article, error)

func CrawlArticleThread

func CrawlArticleThread(url string, ch chan article, sem chan int, wg *sync.WaitGroup)

Types

type Option

type Option struct {
	StartPage          int
	EndPage            int
	Board              string
	URLlistFileName    string
	OutputJsonFileName string
}

func NewOption

func NewOption(StartPage, EndPage int, Board string) *Option

type PTT

type PTT struct {
	// contains filtered or unexported fields
}

func NewPTT

func NewPTT(storePathFolder string, pages, numsOfRoutine, pagePerFile int, delayTime int64) *PTT

func (*PTT) CrawlBoard

func (p *PTT) CrawlBoard(board string)

func (*PTT) CrawlBoardWithPages

func (p *PTT) CrawlBoardWithPages(board string, startPage, endPage int)

func (*PTT) CrawlURLlistToFile

func (p *PTT) CrawlURLlistToFile(board string, startPage, endPage int, filename string)

func (*PTT) CrawlWithURLFile

func (p *PTT) CrawlWithURLFile(inputFile, outputFile string)

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL