Documentation
¶
Index ¶
- func FetchReviewsWithFallback(ctx context.Context, params fetchReviewsParams) (FetchReviewsResponse, []DOMReview, error)
- type About
- type Address
- type ContactExtractJob
- type ContactExtractJobOptions
- type DOMReview
- type EmailValidation
- type Entry
- type EntryWithDistance
- type FetchReviewsResponse
- type GmapJob
- type GmapJobOptions
- type Image
- type LinkSource
- type MapLocation
- type MapSearchParams
- type Option
- type Owner
- type PlaceJob
- type PlaceJobOptions
- type Review
- type SearchJob
- type SearchJobOptions
- type SocialLinks
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func FetchReviewsWithFallback ¶
func FetchReviewsWithFallback(ctx context.Context, params fetchReviewsParams) (FetchReviewsResponse, []DOMReview, error)
FetchReviewsWithFallback attempts RPC-based extraction first, then falls back to DOM
Types ¶
type ContactExtractJob ¶
type ContactExtractJob struct {
scrapemate.Job
Entry *Entry
ExitMonitor exiter.Exiter
Validator emailvalidator.Validator
}
func NewContactJob ¶
func NewContactJob(parentID string, entry *Entry, opts ...ContactExtractJobOptions) *ContactExtractJob
NewContactJob creates a new job to extract contacts (emails, social links, phones) from a website
func (*ContactExtractJob) Process ¶
func (j *ContactExtractJob) Process(ctx context.Context, resp *scrapemate.Response) (any, []scrapemate.IJob, error)
func (*ContactExtractJob) ProcessOnFetchError ¶
func (j *ContactExtractJob) ProcessOnFetchError() bool
func (*ContactExtractJob) UseInResults ¶
func (j *ContactExtractJob) UseInResults() bool
type ContactExtractJobOptions ¶
type ContactExtractJobOptions func(*ContactExtractJob)
func WithContactJobExitMonitor ¶
func WithContactJobExitMonitor(exitMonitor exiter.Exiter) ContactExtractJobOptions
func WithContactValidatorOption ¶
func WithContactValidatorOption(validator emailvalidator.Validator) ContactExtractJobOptions
type DOMReview ¶
type DOMReview struct {
AuthorName string
AuthorURL string
ProfilePicture string
Rating int
RelativeTimeDescription string
Text string
Images []string
}
DOMReview represents a review extracted from the DOM
type EmailValidation ¶
type EmailValidation struct {
Email string `json:"email"`
Status string `json:"status"` // valid, invalid, unknown, catch_all
Score float64 `json:"score"` // 0-100
Deliverable bool `json:"deliverable"`
Disposable bool `json:"disposable"`
RoleAccount bool `json:"role_account"`
FreeEmail bool `json:"free_email"`
CatchAll bool `json:"catch_all"`
Reason string `json:"reason"`
}
EmailValidation holds validation result for a single email
type Entry ¶
type Entry struct {
ID string `json:"input_id"`
Link string `json:"link"`
Cid string `json:"cid"`
Title string `json:"title"`
Categories []string `json:"categories"`
Category string `json:"category"`
Address string `json:"address"`
OpenHours map[string][]string `json:"open_hours"`
// PopularTImes is a map with keys the days of the week
// and value is a map with key the hour and value the traffic in that time
PopularTimes map[string]map[int]int `json:"popular_times"`
WebSite string `json:"web_site"`
Phone string `json:"phone"`
PlusCode string `json:"plus_code"`
ReviewCount int `json:"review_count"`
ReviewRating float64 `json:"review_rating"`
ReviewsPerRating map[int]int `json:"reviews_per_rating"`
Latitude float64 `json:"latitude"`
Longitude float64 `json:"longitude"`
Status string `json:"status"`
Description string `json:"description"`
ReviewsLink string `json:"reviews_link"`
Thumbnail string `json:"thumbnail"`
Timezone string `json:"timezone"`
PriceRange string `json:"price_range"`
DataID string `json:"data_id"`
PlaceID string `json:"place_id"`
Images []Image `json:"images"`
Reservations []LinkSource `json:"reservations"`
OrderOnline []LinkSource `json:"order_online"`
Menu LinkSource `json:"menu"`
Owner Owner `json:"owner"`
CompleteAddress Address `json:"complete_address"`
About []About `json:"about"`
UserReviews []Review `json:"user_reviews"`
UserReviewsExtended []Review `json:"user_reviews_extended"`
Emails []string `json:"emails"`
EmailValidations []EmailValidation `json:"email_validations,omitempty"`
Phones []string `json:"phones,omitempty"` // Phone numbers extracted from website
SocialLinks SocialLinks `json:"social_links"` // Social media and messaging contacts
}
func EntryFromJSON ¶
func ParseSearchResults ¶
func (*Entry) AddExtraReviews ¶
func (*Entry) CsvHeaders ¶
func (*Entry) IsWebsiteValidForEmail ¶
type EntryWithDistance ¶
type FetchReviewsResponse ¶
type FetchReviewsResponse struct {
// contains filtered or unexported fields
}
type GmapJob ¶
type GmapJob struct {
scrapemate.Job
MaxDepth int
LangCode string
Region string
ExtractEmail bool
Deduper deduper.Deduper
ExitMonitor exiter.Exiter
ExtractExtraReviews bool
EmailValidator emailvalidator.Validator
}
func NewGmapJob ¶
func (*GmapJob) BrowserActions ¶
func (j *GmapJob) BrowserActions(ctx context.Context, page scrapemate.BrowserPage) scrapemate.Response
func (*GmapJob) Process ¶
func (j *GmapJob) Process(ctx context.Context, resp *scrapemate.Response) (any, []scrapemate.IJob, error)
func (*GmapJob) UseInResults ¶
type GmapJobOptions ¶
type GmapJobOptions func(*GmapJob)
func WithDeduper ¶
func WithDeduper(d deduper.Deduper) GmapJobOptions
func WithEmailValidator ¶
func WithEmailValidator(v emailvalidator.Validator) GmapJobOptions
func WithExitMonitor ¶
func WithExitMonitor(e exiter.Exiter) GmapJobOptions
func WithExtraReviews ¶
func WithExtraReviews() GmapJobOptions
type LinkSource ¶
type MapLocation ¶
type MapSearchParams ¶
type MapSearchParams struct {
Location MapLocation
Query string
ViewportW int
ViewportH int
Hl string
}
type PlaceJob ¶
type PlaceJob struct {
scrapemate.Job
UsageInResults bool
ExtractEmail bool
ExitMonitor exiter.Exiter
ExtractExtraReviews bool
EmailValidator emailvalidator.Validator
}
func NewPlaceJob ¶
func NewPlaceJob(parentID, langCode, u string, extractEmail, extraExtraReviews bool, opts ...PlaceJobOptions) *PlaceJob
func (*PlaceJob) BrowserActions ¶
func (j *PlaceJob) BrowserActions(ctx context.Context, page scrapemate.BrowserPage) scrapemate.Response
func (*PlaceJob) Process ¶
func (j *PlaceJob) Process(_ context.Context, resp *scrapemate.Response) (any, []scrapemate.IJob, error)
func (*PlaceJob) UseInResults ¶
type PlaceJobOptions ¶
type PlaceJobOptions func(*PlaceJob)
func WithPlaceJobEmailValidator ¶
func WithPlaceJobEmailValidator(validator emailvalidator.Validator) PlaceJobOptions
func WithPlaceJobExitMonitor ¶
func WithPlaceJobExitMonitor(exitMonitor exiter.Exiter) PlaceJobOptions
type Review ¶
type Review struct {
Name string `json:"name"`
ProfilePicture string `json:"profile_picture"`
Rating int `json:"rating"`
Description string `json:"description"`
Images []string `json:"images"`
When string `json:"when"`
}
func ConvertDOMReviewsToReviews ¶
ConvertDOMReviewsToReviews converts DOMReview slice to Review slice
type SearchJob ¶
type SearchJob struct {
scrapemate.Job
ExitMonitor exiter.Exiter
// contains filtered or unexported fields
}
func NewSearchJob ¶
func NewSearchJob(params *MapSearchParams, opts ...SearchJobOptions) *SearchJob
type SearchJobOptions ¶
type SearchJobOptions func(*SearchJob)
func WithSearchJobExitMonitor ¶
func WithSearchJobExitMonitor(exitMonitor exiter.Exiter) SearchJobOptions
type SocialLinks ¶
type SocialLinks struct {
Facebook string `json:"facebook,omitempty"`
Instagram string `json:"instagram,omitempty"`
Twitter string `json:"twitter,omitempty"` // includes x.com
LinkedIn string `json:"linkedin,omitempty"`
TikTok string `json:"tiktok,omitempty"`
YouTube string `json:"youtube,omitempty"`
WhatsApp string `json:"whatsapp,omitempty"` // phone number (e.g., 6281234567890)
Telegram string `json:"telegram,omitempty"` // username or channel
}
SocialLinks holds all social media and messaging contacts extracted from website
func ClassifyWebsiteURL ¶
func ClassifyWebsiteURL(websiteURL string) (platform string, social SocialLinks)
ClassifyWebsiteURL determines if a website URL is a social media platform and extracts the relevant info if so
func ExtractContacts ¶
func ExtractContacts(doc *goquery.Document, body []byte) (SocialLinks, []string)
ExtractContacts extracts all contact information from HTML document
func (SocialLinks) IsEmpty ¶
func (s SocialLinks) IsEmpty() bool
IsEmpty returns true if no social links are set