Documentation
¶
Index ¶
- Constants
- Variables
- func CreateUrlPatternMatcher(pattern string) func(string) bool
- func FetchUrl(netreq *NetRequest) (*http.Response, error)
- func FileContentType(path string) string
- func First[T any](t T, args ...any) T
- func IsRootPath(abspath string) bool
- func MatchUrlPattern(pattern string, optionalUrl string) bool
- func MatchUrlPatterns(patterns []string, url string, matchempty bool) bool
- func Mime(str string) string
- func ParseExecUrlFilepath(urlObj *url.URL) string
- func ParseFileUrlFilepath(urlObj *url.URL, windows bool) string
- func ParseLocalDateTime(str string) (int64, error)
- func ParseLocalFileUrlFilepath(urlObj *url.URL) string
- func ParseProxyFromEnv(urlStr string) string
- func PrintJson(output io.Writer, value any) error
- func ReadCloserWithPrefix(rc io.ReadCloser, prefix []byte) io.ReadCloser
- func SplitCsv(str string) []string
- type ImpersonateProfile
- type NetRequest
- type RangesFile
- type TimestampTime
Constants ¶
const (
HTTP_HEADER_PLACEHOLDER = "\n"
)
Variables ¶
var ImpersonateProfiles = map[string]*ImpersonateProfile{ "chrome120": { Navigator: "chrome", Comment: "Chrome 120 on Windows 11 x64 en-US", Ja3: "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,65281-45-11-65037-18-5-51-0-23-27-43-16-10-35-17513-13,29-23-24,0", H2fingerprint: "1:65536,2:0,4:6291456,6:262144|15663105|0|m,a,s,p", Headers: [][]string{ {"Cache-Control", "max-age=0"}, {"Sec-Ch-Ua", `"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"`}, {"Sec-Ch-Ua-Mobile", `?0`}, {"Sec-Ch-Ua-Platform", `"Windows"`}, {"Upgrade-Insecure-Requests", "1"}, {"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}, {"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"}, {"Sec-Fetch-Site", `none`}, {"Sec-Fetch-Mode", `navigate`}, {"Sec-Fetch-User", `?1`}, {"Sec-Fetch-Dest", `document`}, {"Accept-Language", "en-US,en;q=0.9"}, {"Cookie", HTTP_HEADER_PLACEHOLDER}, }, }, "chrome122": { Name: "chrome122", Navigator: "chrome", Comment: "Chrome 122 on Windows 11 x64 en-US", Ja3: "771,4865-4866-4867-49195-49199-49196-49200-52393-52392-49171-49172-156-157-47-53,23-13-27-16-51-65281-45-5-17513-0-35-43-65037-11-18-10,29-23-24,0", H2fingerprint: "1:65536,2:0,4:6291456,6:262144|15663105|0|m,a,s,p", Headers: [][]string{ {"Cache-Control", "max-age=0"}, {"Sec-Ch-Ua", `"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"`}, {"Sec-Ch-Ua-Mobile", `?0`}, {"Sec-Ch-Ua-Platform", `"Windows"`}, {"Upgrade-Insecure-Requests", "1"}, {"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"}, {"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"}, {"Sec-Fetch-Site", `none`}, {"Sec-Fetch-Mode", `navigate`}, {"Sec-Fetch-User", `?1`}, {"Sec-Fetch-Dest", `document`}, {"Accept-Encoding", "gzip, deflate, br, zstd"}, {"Accept-Language", "en-US,en;q=0.9"}, {"Cookie", HTTP_HEADER_PLACEHOLDER}, }, }, "firefox121": { Navigator: "firefox", Comment: "Firefox 121 on Windows 11 x64 en-US", Ja3: "771,4865-4867-4866-49195-49199-52393-52392-49196-49200-49162-49161-49171-49172-156-157-47-53,0-23-65281-10-11-35-16-5-51-43-13-45-28-65037,29-23-24-25-256-257,0", H2fingerprint: "1:65536,4:131072,5:16384|12517377|3:0:0:201,5:0:0:101,7:0:0:1,9:0:7:1,11:0:3:1,13:0:0:241|m,p,a,s", Headers: [][]string{ {"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0"}, {"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"}, {"Accept-Language", "en-US,en;q=0.5"}, {"Accept-Encoding", "gzip, deflate, br"}, {"Cookie", HTTP_HEADER_PLACEHOLDER}, {"Upgrade-Insecure-Requests", "1"}, {"Sec-Fetch-Dest", `document`}, {"Sec-Fetch-Mode", `navigate`}, {"Sec-Fetch-Site", `none`}, {"Sec-Fetch-User", `?1`}, {"te", "trailers"}, }, }, }
var ( // all supported impersonate names Impersonates []string )
Functions ¶
func CreateUrlPatternMatcher ¶ added in v0.6.0
Create a Chrome extension match pattern style matcher, that test a url against the pattern. Mattern syntax: https://developer.chrome.com/docs/extensions/develop/concepts/match-patterns . Pattern examples: https://*/ , https://*/foo* , https://*.google.com/foo*bar . Adapted from https://github.com/nickclaw/url-match-patterns
func FileContentType ¶ added in v0.10.0
func IsRootPath ¶ added in v0.10.0
Tell if abspath is a file system root path (e.g. "/" or "C:\") abspath should be a Cleaned absolute file path.
func MatchUrlPattern ¶ added in v0.6.0
func MatchUrlPatterns ¶ added in v0.9.0
func Mime ¶ added in v0.10.0
Get mime from str. str could be: already a mime (do nothing); or a file ext (with or without leading dot) or file name.
func ParseExecUrlFilepath ¶ added in v0.10.0
Parse binary file path from "exec://" url. It's a custom scheme that's different with "file://" scheme that: In url parts: if host exists but pathname does not, treat it as a binary in PATH: "exec://pwd" => "pwd".
func ParseFileUrlFilepath ¶ added in v0.10.0
parse a "file://" (or custom scheme with same struct) url, extract full file system path. If url is malformed or invalid, it just returns empty string. E.g. "file:///root/a.txt" => "/root/a.txt". If windows is true, it will treat url as a windows path: 1. Use "\" as path sep instead of `/`. 2. support unc pathes: "file://server/folder/data.xml" or "file:////server/folder/data.xml" => "\\server\folder\data.xml". 2. support Drive letter in url: "file:///D:/foo.txt" => "D:\foo.txt" If windows is false, it will treat a non-empty host in urlObj (except "localhost") as invalid. The returned path is NOT cleaned. Reference: https://en.wikipedia.org/wiki/File_URI_scheme .
func ParseLocalDateTime ¶ added in v0.9.0
func ParseLocalFileUrlFilepath ¶ added in v0.10.0
Similar to ParseFileUrlFilepath, but treat urlObj as a local file system url automatically. Also, the returned path is fullpath.Cleaned.
func ParseProxyFromEnv ¶ added in v0.4.0
Parse standard HTTP_PROXY, HTTPS_PROXY, NO_PROXY (and lowercase versions) envs, return proxy for urlStr.
func ReadCloserWithPrefix ¶ added in v0.10.0
func ReadCloserWithPrefix(rc io.ReadCloser, prefix []byte) io.ReadCloser
Return io.ReadCloser that return prefix, then read from rc.
Types ¶
type ImpersonateProfile ¶ added in v0.4.2
type NetRequest ¶ added in v0.10.0
type RangesFile ¶ added in v0.10.0
type RangesFile struct {
// contains filtered or unexported fields
}
func NewRangesFile ¶ added in v0.10.0
func NewRangesFile(file *os.File, contentType string, fileSize int64, rangeHeader string) (*RangesFile, error)
Reference: https://stackoverflow.com/questions/18315787/http-1-1-response-to-multiple-range . Return the body that will read from f according to ranges, which is parsed from http request "Range" header. The returned body is intended to be used in http response body. f will be closed then body is closed. Note http ranges is inclusive. E.g. "Range: bytes=0-499" : first 500 bytes.
func (*RangesFile) Close ¶ added in v0.10.0
func (rfb *RangesFile) Close() error
func (*RangesFile) SetHeader ¶ added in v0.10.0
func (rfb *RangesFile) SetHeader(header http.Header)
type TimestampTime ¶ added in v0.10.0
A custom time format, that: When unmarshal from json, can be parsed from multiple time formats; When marshal into json, serialized to timestamp seconds number.
func (*TimestampTime) Format ¶ added in v0.10.0
func (ct *TimestampTime) Format(layout string) string
func (TimestampTime) MarshalJSON ¶ added in v0.10.0
func (ct TimestampTime) MarshalJSON() ([]byte, error)
func (*TimestampTime) String ¶ added in v0.10.0
func (ct *TimestampTime) String() string
func (*TimestampTime) UnmarshalJSON ¶ added in v0.10.0
func (ct *TimestampTime) UnmarshalJSON(b []byte) (err error)