core

package module

v0.0.0-...-8843561 Latest Latest Go to latest Published: Oct 27, 2017 License: AGPL-3.0 Imports: 28 Imported by: 21

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/datatogether/core

Links

Open Source Insights

README ¶

Documentation ¶

Overview ¶

Archive holds all common model definitions for archivers 2.0.

TODO - turn "Metadata" into github.com/datatogether/metablocks.Metablock

Index ¶

Variables
func CalcHash(data []byte) (string, error)
func ContentUrlsCount(db sqlutil.Queryable) (count int, err error)
func CountPrimers(db sqlutil.Queryable) (count int64, err error)
func CountSources(db sqlutil.Queryable) (count int, err error)
func FileUrl(url *Url) string
func MetadataCountByKey(db sqlutil.Queryable, keyId string) (count int, err error)
func NormalizeURL(u *url.URL) *url.URL
func NormalizeURLString(url string) (string, error)
func ValidArchivingUrl(db sqlutil.Queryable, url string) error
func WriteSnapshot(store datastore.Datastore, u *Url) error
type Collection
- func CollectionsByCreator(store datastore.Datastore, creator, orderby string, limit, offset int) ([]*Collection, error)
- func ListCollections(store datastore.Datastore, limit, offset int) ([]*Collection, error)
- func (c Collection) DatastoreType() string
- func (c *Collection) Delete(store datastore.Datastore) error
- func (c *Collection) DeleteItems(store datastore.Datastore, items []*CollectionItem) error
- func (c Collection) GetId() string
- func (c *Collection) ItemCount(store datastore.Datastore) (count int, err error)
- func (c Collection) Key() datastore.Key
- func (c *Collection) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (c *Collection) Read(store datastore.Datastore) error
- func (c *Collection) ReadItems(store datastore.Datastore, orderby string, limit, offset int) (items []*CollectionItem, err error)
- func (c *Collection) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (c Collection) SQLQuery(cmd sql_datastore.Cmd) string
- func (c *Collection) Save(store datastore.Datastore) (err error)
- func (c *Collection) SaveItems(store datastore.Datastore, items []*CollectionItem) error
- func (c *Collection) UnmarshalSQL(row sqlutil.Scannable) (err error)
type CollectionItem
- func (c CollectionItem) DatastoreType() string
- func (c *CollectionItem) Delete(store datastore.Datastore) error
- func (c CollectionItem) GetId() string
- func (c CollectionItem) Key() datastore.Key
- func (c *CollectionItem) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (c *CollectionItem) Read(store datastore.Datastore) error
- func (c *CollectionItem) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (c CollectionItem) SQLQuery(cmd sql_datastore.Cmd) string
- func (c *CollectionItem) Save(store datastore.Datastore) (err error)
- func (c *CollectionItem) UnmarshalSQL(row sqlutil.Scannable) (err error)
type Consensus
- func SumConsensus(subject string, blocks []*Metadata) (c Consensus, values map[string]interface{}, err error)
- func (c Consensus) Metadata(data map[string]interface{}) (map[string][]interface{}, error)
type CustomCrawl
- func ListCustomCrawls(store datastore.Datastore, limit, offset int) ([]*CustomCrawl, error)
- func (CustomCrawl) DatastoreType() string
- func (c *CustomCrawl) Delete(store datastore.Datastore) error
- func (c CustomCrawl) GetId() string
- func (u CustomCrawl) Key() datastore.Key
- func (c *CustomCrawl) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (c *CustomCrawl) Read(store datastore.Datastore) error
- func (c *CustomCrawl) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (c *CustomCrawl) SQLQuery(cmd sql_datastore.Cmd) string
- func (c *CustomCrawl) Save(store datastore.Datastore) (err error)
- func (c *CustomCrawl) UnmarshalSQL(row sqlutil.Scannable) (err error)
type DataRepo
- func (d *DataRepo) DatastoreType() string
- func (d *DataRepo) Delete(store datastore.Datastore) error
- func (d *DataRepo) GetId() string
- func (d *DataRepo) Key() datastore.Key
- func (d *DataRepo) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (d *DataRepo) Read(store datastore.Datastore) error
- func (d DataRepo) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (d DataRepo) SQLQuery(cmd sql_datastore.Cmd) string
- func (d *DataRepo) Save(store datastore.Datastore) (err error)
- func (d *DataRepo) UnmarshalSQL(row sqlutil.Scannable) (err error)
type File
- func NewFileFromRes(url string, res *http.Response) (*File, error)
- func (f *File) Delete() error
- func (f *File) Filename() (string, error)
- func (f *File) GetS3() error
- func (f *File) PutS3() error
type Link
- func ReadDstContentLinks(db sqlutil.Queryable, src *Url) ([]*Link, error)
- func ReadDstLinks(db sqlutil.Queryable, src *Url) ([]*Link, error)
- func ReadSrcLinks(db sqlutil.Queryable, dst *Url) ([]*Link, error)
- func (l *Link) DatastoreType() string
- func (l *Link) Delete(store datastore.Datastore) error
- func (l *Link) GetId() string
- func (l *Link) Insert(store datastore.Datastore) error
- func (l *Link) Key() datastore.Key
- func (l *Link) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (l *Link) Read(store datastore.Datastore) (err error)
- func (l *Link) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (l *Link) SQLQuery(cmd sql_datastore.Cmd) string
- func (l *Link) UnmarshalSQL(row sqlutil.Scannable) error
- func (l *Link) Update(store datastore.Datastore) error
type Meta
type Metadata
- func LatestMetadata(db sqlutil.Queryable, keyId, subject string) (m *Metadata, err error)
- func MetadataByKey(db sqlutil.Queryable, keyId string, limit, offset int) ([]*Metadata, error)
- func MetadataBySubject(db sqlutil.Queryable, subject string) ([]*Metadata, error)
- func NextMetadata(db sqlutil.Queryable, keyId, subject string) (*Metadata, error)
- func (m Metadata) DatastoreType() string
- func (m Metadata) GetId() string
- func (m *Metadata) HashMaps() (keyMap map[string]string, valueMap map[string]interface{}, err error)
- func (m *Metadata) HashableBytes() ([]byte, error)
- func (m Metadata) Key() datastore.Key
- func (m Metadata) String() string
- func (m *Metadata) UnmarshalSQL(row sqlutil.Scannable) error
- func (m *Metadata) Write(store datastore.Datastore) error
type Primer
- func BasePrimers(db sqlutil.Queryable, limit, offset int) (primers []*Primer, err error)
- func ListPrimers(store datastore.Datastore, limit, offset int) ([]*Primer, error)
- func UnmarshalBoundedPrimers(rows *sql.Rows, limit int) (primers []*Primer, err error)
- func (p *Primer) CalcStats(db *sql.DB) error
- func (p Primer) DatastoreType() string
- func (p *Primer) Delete(store datastore.Datastore) error
- func (p Primer) GetId() string
- func (p Primer) Key() datastore.Key
- func (p *Primer) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (p *Primer) Read(store datastore.Datastore) error
- func (p *Primer) ReadSources(db sqlutil.Queryable) error
- func (p *Primer) ReadSubPrimers(db sqlutil.Queryable) error
- func (p *Primer) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (p *Primer) SQLQuery(cmd sql_datastore.Cmd) string
- func (p *Primer) Save(store datastore.Datastore) (err error)
- func (p *Primer) UnmarshalSQL(row sqlutil.Scannable) error
type PrimerStats
type Snapshot
- func SnapshotsForUrl(db sqlutil.Queryable, url string) ([]*Snapshot, error)
- func (s *Snapshot) UnmarshalSQL(row sqlutil.Scannable) error
type Source
- func CrawlingSources(db sqlutil.Queryable, limit, offset int) ([]*Source, error)
- func ListSources(store datastore.Datastore, limit, offset int) ([]*Source, error)
- func UnmarshalBoundedSources(rows *sql.Rows, limit int) ([]*Source, error)
- func (c *Source) AsUrl(db *sql.DB) (*Url, error)
- func (s *Source) CalcStats(db *sql.DB) error
- func (s Source) DatastoreType() string
- func (s *Source) Delete(store datastore.Datastore) error
- func (s *Source) DescribedContent(db sqlutil.Queryable, limit, offset int) ([]*Url, error)
- func (s Source) GetId() string
- func (s Source) Key() datastore.Key
- func (s *Source) MatchesUrl(rawurl string) bool
- func (s *Source) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (s *Source) Read(store datastore.Datastore) error
- func (s *Source) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (s *Source) SQLQuery(cmd sql_datastore.Cmd) string
- func (s *Source) Save(store datastore.Datastore) (err error)
- func (s *Source) UndescribedContent(db sqlutil.Queryable, limit, offset int) ([]*Url, error)
- func (c *Source) UnmarshalSQL(row sqlutil.Scannable) error
type SourceStats
type Uncrawlable
- func ListUncrawlables(store datastore.Datastore, limit, offset int) ([]*Uncrawlable, error)
- func (u Uncrawlable) DatastoreType() string
- func (u *Uncrawlable) Delete(store datastore.Datastore) error
- func (u Uncrawlable) GetId() string
- func (u Uncrawlable) Key() datastore.Key
- func (u *Uncrawlable) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (u *Uncrawlable) Read(store datastore.Datastore) error
- func (u *Uncrawlable) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (u *Uncrawlable) SQLQuery(cmd sql_datastore.Cmd) string
- func (u *Uncrawlable) Save(store datastore.Datastore) (err error)
- func (u *Uncrawlable) UnmarshalSQL(row sqlutil.Scannable) (err error)
type Url
- func ContentUrls(db sqlutil.Queryable, limit, skip int) ([]*Url, error)
- func FetchedUrls(db sqlutil.Queryable, limit, offset int) ([]*Url, error)
- func ListUrls(store datastore.Datastore, limit, offset int) ([]*Url, error)
- func Search(db sqlutil.Queryable, q string, limit, offset int) ([]*Url, error)
- func UnfetchedUrls(db sqlutil.Queryable, limit, offset int) ([]*Url, error)
- func UnmarshalBoundedUrls(rows *sql.Rows, limit int) ([]*Url, error)
- func UnmarshalUrls(rows *sql.Rows) ([]*Url, error)
- func UrlsForHash(db sqlutil.Queryable, hash string) ([]*Url, error)
- func (u Url) DatastoreType() string
- func (u *Url) Delete(store datastore.Datastore) error
- func (u *Url) ExtractDocLinks(store datastore.Datastore, doc *goquery.Document) ([]*Link, error)
- func (u *Url) File() (*File, error)
- func (u *Url) Get(store datastore.Datastore) (body []byte, links []*Link, err error)
- func (u Url) GetId() string
- func (u *Url) HandleGetResponse(store datastore.Datastore, res *http.Response) (body []byte, links []*Link, err error)
- func (u *Url) HeadersMap() (headers map[string]string)
- func (u *Url) InboundLinks(db sqlutil.Queryable) ([]string, error)
- func (u Url) Key() datastore.Key
- func (u *Url) NewSQLModel(key datastore.Key) sql_datastore.Model
- func (u *Url) OutboundLinks(db sqlutil.Queryable) ([]string, error)
- func (u *Url) ParsedUrl() (*url.URL, error)
- func (u *Url) Read(store datastore.Datastore) error
- func (u *Url) SQLParams(cmd sql_datastore.Cmd) []interface{}
- func (u *Url) SQLQuery(cmd sql_datastore.Cmd) string
- func (u *Url) Save(store datastore.Datastore) (err error)
- func (u *Url) ShouldEnqueueGet() bool
- func (u *Url) ShouldEnqueueHead() bool
- func (u *Url) ShouldPutS3() bool
- func (u *Url) SuspectedContentUrl() bool
- func (u *Url) UnmarshalSQL(row sqlutil.Scannable) (err error)
- func (u *Url) WarcRequest() *warc.Request

Constants ¶

This section is empty.

Variables ¶

View Source

var (
	// how long before a url is considered stale. default is 72 hours.
	StaleDuration = time.Hour * 72
	// all these need to be set for file saving to work
	AwsRegion          string
	AwsAccessKeyId     string
	AwsSecretAccessKey string
	AwsS3BucketName    string
	AwsS3BucketPath    string
)

View Source

var (
	ErrNotFound        = fmt.Errorf("Not Found")
	ErrInvalidResponse = fmt.Errorf("Datastore returned an invalid response")
)

Functions ¶

func CalcHash ¶

func CalcHash(data []byte) (string, error)

CalcHash calculates the multihash key for a given slice of bytes TODO - find a proper home for this

func ContentUrlsCount ¶

func ContentUrlsCount(db sqlutil.Queryable) (count int, err error)

func CountPrimers ¶

func CountPrimers(db sqlutil.Queryable) (count int64, err error)

CountPrimers returns the total number of primers

func CountSources ¶

func CountSources(db sqlutil.Queryable) (count int, err error)

CountSources grabs the total number of sources

func FileUrl ¶

func FileUrl(url *Url) string

func MetadataCountByKey ¶

func MetadataCountByKey(db sqlutil.Queryable, keyId string) (count int, err error)

func NormalizeURL ¶

func NormalizeURL(u *url.URL) *url.URL

NormalizeURL removes inconsitencincies from a given url

func NormalizeURLString ¶

func NormalizeURLString(url string) (string, error)

NormalizeURLString removes inconsitencincies from a given url string

func ValidArchivingUrl ¶

func ValidArchivingUrl(db sqlutil.Queryable, url string) error

func WriteSnapshot ¶

func WriteSnapshot(store datastore.Datastore, u *Url) error

WriteSnapshot creates a snapshot record in the DB from a given Url struct

Types ¶

type Collection ¶

type Collection struct {
	// version 4 uuid
	Id string `json:"id"`
	// Created timestamp rounded to seconds in UTC
	Created time.Time `json:"created"`
	// Updated timestamp rounded to seconds in UTC
	Updated time.Time `json:"updated"`
	// sha256 multihash of the public key that created this collection
	Creator string `json:"creator"`
	// human-readable title of the collection
	Title string `json:"title"`
	// description of the collection
	Description string `json:"description"`
	// url this collection originates from
	Url string `json:"url,omitempty"`
}

Collections are generic groupings of content collections can be thought of as a csv file listing content hashes as the first column, and whatever other information is necessary in subsequent columns

func CollectionsByCreator ¶

func CollectionsByCreator(store datastore.Datastore, creator, orderby string, limit, offset int) ([]*Collection, error)

func ListCollections ¶

func ListCollections(store datastore.Datastore, limit, offset int) ([]*Collection, error)

func (Collection) DatastoreType ¶

func (c Collection) DatastoreType() string

func (*Collection) Delete ¶

func (c *Collection) Delete(store datastore.Datastore) error

Delete a collection, should only do for erronious additions

func (*Collection) DeleteItems ¶

func (c *Collection) DeleteItems(store datastore.Datastore, items []*CollectionItem) error

DeleteItems removes a given list of items from the collection

func (Collection) GetId ¶

func (c Collection) GetId() string

func (*Collection) ItemCount ¶

func (c *Collection) ItemCount(store datastore.Datastore) (count int, err error)

ItemCount gets the number of items in the collection

func (Collection) Key ¶

func (c Collection) Key() datastore.Key

func (*Collection) NewSQLModel ¶

func (c *Collection) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*Collection) Read ¶

func (c *Collection) Read(store datastore.Datastore) error

Read collection from db

func (*Collection) ReadItems ¶

func (c *Collection) ReadItems(store datastore.Datastore, orderby string, limit, offset int) (items []*CollectionItem, err error)

ReadItems reads a bounded set of items from the collection the orderby param currently only supports SQL-style input of a single proprty, eg: "index" or "index DESC"

func (*Collection) SQLParams ¶

func (c *Collection) SQLParams(cmd sql_datastore.Cmd) []interface{}

func (Collection) SQLQuery ¶

func (c Collection) SQLQuery(cmd sql_datastore.Cmd) string

func (*Collection) Save ¶

func (c *Collection) Save(store datastore.Datastore) (err error)

Save a collection

func (*Collection) SaveItems ¶

func (c *Collection) SaveItems(store datastore.Datastore, items []*CollectionItem) error

SaveItems saves a slice of items to the collection. It's up to you to ensure that the "index" param doesn't get all messed up. TODO - validate / automate the Index param?

func (*Collection) UnmarshalSQL ¶

func (c *Collection) UnmarshalSQL(row sqlutil.Scannable) (err error)

UnmarshalSQL reads an sql response into the collection receiver it expects the request to have used collectionCols() for selection

type CollectionItem ¶

type CollectionItem struct {
	// Collection Items are Url's at heart
	Url

	// this item's index in the collection
	Index int `json:"index"`
	// unique description of this item
	Description string `json:"description"`
	// contains filtered or unexported fields
}

CollectionItem is an item in a collection. They are urls with added collection-specific information. This has the effect of storing all of the "main properties" of a collection item in the common list of urls

func (CollectionItem) DatastoreType ¶

func (c CollectionItem) DatastoreType() string

DatastoreType is to satisfy sql_datastore.Model interface

func (*CollectionItem) Delete ¶

func (c *CollectionItem) Delete(store datastore.Datastore) error

Delete a collection item

func (CollectionItem) GetId ¶

func (c CollectionItem) GetId() string

GetId returns the Id of the collectionItem, which is the id of the underlying Url

func (CollectionItem) Key ¶

func (c CollectionItem) Key() datastore.Key

Key is somewhat special as CollectionItems always have a Collection as their parent. This relationship is represented in directory-form: /Collection:[collection-id]/CollectionItem:[item-id]

func (*CollectionItem) NewSQLModel ¶

func (c *CollectionItem) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*CollectionItem) Read ¶

func (c *CollectionItem) Read(store datastore.Datastore) error

Read collection from db

func (*CollectionItem) SQLParams ¶

func (c *CollectionItem) SQLParams(cmd sql_datastore.Cmd) []interface{}

SQLQuery is to satisfy the sql_datastore.Model interface, it returns this CollectionItem's parameters for a given type of SQL command

func (CollectionItem) SQLQuery ¶

func (c CollectionItem) SQLQuery(cmd sql_datastore.Cmd) string

SQLQuery is to satisfy the sql_datastore.Model interface, it returns the concrete query for a given type of SQL command

func (*CollectionItem) Save ¶

func (c *CollectionItem) Save(store datastore.Datastore) (err error)

Save a collection item to a store

func (*CollectionItem) UnmarshalSQL ¶

func (c *CollectionItem) UnmarshalSQL(row sqlutil.Scannable) (err error)

UnmarshalSQL reads an sql response into the collection receiver it expects the request to have used collectionCols() for selection

type Consensus ¶

type Consensus map[string]map[string]int

Consensus is an enumeration of Meta graph values arranged by key

func SumConsensus ¶

func SumConsensus(subject string, blocks []*Metadata) (c Consensus, values map[string]interface{}, err error)

SumConsensus tallies the consensus around a given subject hash from a provided Metadata slice

func (Consensus) Metadata ¶

func (c Consensus) Metadata(data map[string]interface{}) (map[string][]interface{}, error)

Metadata takes a store and gives back the actual metadata based on a provided stringMap Any key present in the consensus that isn't found in data will write the hash value instead Returned map should be valid for JSON encoding

type CustomCrawl ¶

type CustomCrawl struct {
	// version 4 uuid
	Id string `json:"id"`
	// Created timestamp rounded to seconds in UTC
	Created time.Time `json:"created"`
	// Updated timestamp rounded to seconds in UTC
	Updated time.Time `json:"updated"`
	// Json Web token that created this request
	Jwt string `json:"jwt"`
	// MorphRunId
	MorphRunId string `json:"morphRunId"`
	// timestamp this run was completed
	DateCompleted time.Time
	// repository for code that ran the crawl
	GithubRepo string `json:"githubRepo"`
	// OriginalUrl
	OriginalUrl string `json:"originalUrl"`
	// SqliteChecksum
	SqliteChecksum string `json:"sqliteChecksum"`
}

CustomCrawls are urls that contain content that cannot be extracted with traditional web crawling / scraping methods. This model classifies the nature of the custom crawl, setting the stage for writing custom scripts to extract the underlying content.

func ListCustomCrawls ¶

func ListCustomCrawls(store datastore.Datastore, limit, offset int) ([]*CustomCrawl, error)

func (CustomCrawl) DatastoreType ¶

func (CustomCrawl) DatastoreType() string

func (*CustomCrawl) Delete ¶

func (c *CustomCrawl) Delete(store datastore.Datastore) error

Delete a custom crawl, should only do for erronious additions

func (CustomCrawl) GetId ¶

func (c CustomCrawl) GetId() string

func (CustomCrawl) Key ¶

func (u CustomCrawl) Key() datastore.Key

func (*CustomCrawl) NewSQLModel ¶

func (c *CustomCrawl) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*CustomCrawl) Read ¶

func (c *CustomCrawl) Read(store datastore.Datastore) error

Read custom crawl from db

func (*CustomCrawl) SQLParams ¶

func (c *CustomCrawl) SQLParams(cmd sql_datastore.Cmd) []interface{}

SQLParams formats a custom crawl struct for inserting / updating into postgres

func (*CustomCrawl) SQLQuery ¶

func (c *CustomCrawl) SQLQuery(cmd sql_datastore.Cmd) string

func (*CustomCrawl) Save ¶

func (c *CustomCrawl) Save(store datastore.Datastore) (err error)

Save a custom crawl

func (*CustomCrawl) UnmarshalSQL ¶

func (c *CustomCrawl) UnmarshalSQL(row sqlutil.Scannable) (err error)

UnmarshalSQL reads an sql response into the custom crawl receiver it expects the request to have used custom crawlCols() for selection

type DataRepo ¶

type DataRepo struct {
	// version 4 uuid
	Id string
	// Created timestamp rounded to seconds in UTC
	Created time.Time `json:"created"`
	// Updated timestamp rounded to seconds in UTC
	Updated time.Time `json:"updated"`
	// Title of this data repository
	Title string `json:"title"`
	// Human-readable description
	Description string `json:"description"`
	// Main url link to the DataRepository
	Url string `json:"url"`
}

DataRepo is a place that holds data in a structured format

func (*DataRepo) DatastoreType ¶

func (d *DataRepo) DatastoreType() string

func (*DataRepo) Delete ¶

func (d *DataRepo) Delete(store datastore.Datastore) error

Delete a dataRepo, should only do for erronious additions

func (*DataRepo) GetId ¶

func (d *DataRepo) GetId() string

func (*DataRepo) Key ¶

func (d *DataRepo) Key() datastore.Key

func (*DataRepo) NewSQLModel ¶

func (d *DataRepo) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*DataRepo) Read ¶

func (d *DataRepo) Read(store datastore.Datastore) error

Read dataRepo from db

func (DataRepo) SQLParams ¶

func (d DataRepo) SQLParams(cmd sql_datastore.Cmd) []interface{}

func (DataRepo) SQLQuery ¶

func (d DataRepo) SQLQuery(cmd sql_datastore.Cmd) string

func (*DataRepo) Save ¶

func (d *DataRepo) Save(store datastore.Datastore) (err error)

Save a dataRepo

func (*DataRepo) UnmarshalSQL ¶

func (d *DataRepo) UnmarshalSQL(row sqlutil.Scannable) (err error)

UnmarshalSQL reads an sql response into the dataRepo receiver it expects the request to have used dataRepoCols() for selection

type File ¶

type File struct {
	Url  string
	Data []byte
	Hash string
}

File is a buffered byte slice often made from a GET response body. It provides easy hash-calculation & storage to S3 TODO - depricate, use s3-datastore, or, uh... the distributed web

func NewFileFromRes ¶

func NewFileFromRes(url string, res *http.Response) (*File, error)

NewFileFromRes generates a new file by consuming & closing a given response body

func (*File) Delete ¶

func (f *File) Delete() error

Delete a file from S3

func (*File) Filename ¶

func (f *File) Filename() (string, error)

Filename returns the name of the file, which is it's sha2-256 hash

func (*File) GetS3 ¶

func (f *File) GetS3() error

GetS3 reads a given file's hash from S3

func (*File) PutS3 ¶

func (f *File) PutS3() error

PutS3 puts the file on S3 if it doesn't already exist

type Link ¶

type Link struct {
	// Calculated Hash for fixed ID purposes
	Hash string
	// created timestamp rounded to seconds in UTC
	Created time.Time `json:"created"`
	// updated timestamp rounded to seconds in UTC
	Updated time.Time `json:"updated"`
	// origin url of the linking document
	Src *Url `json:"src"`
	// absolute url of the <a> href property
	Dst *Url `json:"dst"`
}

A link represents an <a> tag in an html document src who's href attribute points to the url that resolves to dst. both src & dst must be stored as urls

func ReadDstContentLinks ¶

func ReadDstContentLinks(db sqlutil.Queryable, src *Url) ([]*Link, error)

ReadDstContentLinks returns a list of links that specify a gien url as src that are content urls

func ReadDstLinks ¶

func ReadDstLinks(db sqlutil.Queryable, src *Url) ([]*Link, error)

ReadDstLinks returns all links that specify a given url as src

func ReadSrcLinks ¶

func ReadSrcLinks(db sqlutil.Queryable, dst *Url) ([]*Link, error)

ReadSrcLinks returns all links that specify a given url as dst

func (*Link) DatastoreType ¶

func (l *Link) DatastoreType() string

func (*Link) Delete ¶

func (l *Link) Delete(store datastore.Datastore) error

func (*Link) GetId ¶

func (l *Link) GetId() string

func (*Link) Insert ¶

func (l *Link) Insert(store datastore.Datastore) error

func (*Link) Key ¶

func (l *Link) Key() datastore.Key

func (*Link) NewSQLModel ¶

func (l *Link) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*Link) Read ¶

func (l *Link) Read(store datastore.Datastore) (err error)

func (*Link) SQLParams ¶

func (l *Link) SQLParams(cmd sql_datastore.Cmd) []interface{}

func (*Link) SQLQuery ¶

func (l *Link) SQLQuery(cmd sql_datastore.Cmd) string

func (*Link) UnmarshalSQL ¶

func (l *Link) UnmarshalSQL(row sqlutil.Scannable) error

func (*Link) Update ¶

func (l *Link) Update(store datastore.Datastore) error

type Meta ¶

type Meta struct {
	Url           string            `json:"url"`
	Date          *time.Time        `json:"date,omitempty"`
	HeadersTook   int               `json:"headersTook,omitempty"`
	Id            string            `json:"id"`
	Status        int               `json:"status"`
	ContentSniff  string            `json:"contentSniff,omitempty"`
	RawHeaders    []string          `json:"rawHeaders""`
	Headers       map[string]string `json:"headers"`
	DownloadTook  int               `json:"downloadTook,omitempty"`
	Sha256        string            `json:"sha256"`
	Multihash     string            `json:"multihash"`
	Consensus     *Consensus        `json:"consensus"`
	InboundLinks  []string          `json:"inboundLinks,omitempty"`
	OutboundLinks []string          `json:"outboundLinks,omitempty"`
}

Meta is a struct for sharing our knowledge of a url with other services

type Metadata ¶

type Metadata struct {
	// Hash is the sha256 multihash of all other fields in metadata
	// as expressed by Metadata.HashableBytes()
	Hash string `json:"hash"`
	// Creation timestamp
	Timestamp time.Time `json:"timestamp"`
	// Sha256 multihash of the public key that signed this metadata
	KeyId string `json:"keyId"`
	// Sha256 multihash of the content this metadata is describing
	Subject string `json:"subject"`
	// Hash value of the metadata that came before this, if any
	Prev string `json:"prev"`
	// Acutal metadata, a valid json Object
	Meta map[string]interface{} `json:"meta"`
}

A snapshot is a record of a GET request to a url There can be many metadata of a given url

func LatestMetadata ¶

func LatestMetadata(db sqlutil.Queryable, keyId, subject string) (m *Metadata, err error)

LatestMetadata gives the most recent metadata timestamp for a given keyId & subject combination if one exists

func MetadataByKey ¶

func MetadataByKey(db sqlutil.Queryable, keyId string, limit, offset int) ([]*Metadata, error)

func MetadataBySubject ¶

func MetadataBySubject(db sqlutil.Queryable, subject string) ([]*Metadata, error)

MetadatasBySubject returns all metadata for a given subject hash

func NextMetadata ¶

func NextMetadata(db sqlutil.Queryable, keyId, subject string) (*Metadata, error)

NextMetadata returns the next metadata block for a given subject. If no metablock exists a new one is created

func (Metadata) DatastoreType ¶

func (m Metadata) DatastoreType() string

func (Metadata) GetId ¶

func (m Metadata) GetId() string

func (*Metadata) HashMaps ¶

func (m *Metadata) HashMaps() (keyMap map[string]string, valueMap map[string]interface{}, err error)

TODO - this is ripped from metablocks

func (*Metadata) HashableBytes ¶

func (m *Metadata) HashableBytes() ([]byte, error)

HashableBytes returns the exact structure to be used for hash

func (Metadata) Key ¶

func (m Metadata) Key() datastore.Key

func (Metadata) String ¶

func (m Metadata) String() string

String is metadata's abbreviated string representation

func (*Metadata) UnmarshalSQL ¶

func (m *Metadata) UnmarshalSQL(row sqlutil.Scannable) error

UnmarshalSQL reads an SQL result into the snapshot receiver

func (*Metadata) Write ¶

func (m *Metadata) Write(store datastore.Datastore) error

WriteMetadata creates a snapshot record in the DB from a given Url struct

type Primer ¶

type Primer struct {
	// version 4 uuid
	Id string `json:"id"`
	// Created timestamp rounded to seconds in UTC
	Created time.Time `json:"created"`
	// Updated timestamp rounded to seconds in UTC
	Updated time.Time `json:"updated"`
	// shortest possible expression of this primer's name, usually an acronym
	// called shortTitle b/c acronyms collide often & users should feel free to
	// expand on acronyms
	ShortTitle string `json:"shortTitle"`
	// human-readable title of this primer.
	Title string `json:"title"`
	// long-form description of this primer.
	// TODO - Maybe we should store this in markdown format?
	Description string `json:"description"`
	// parent primer (if any)
	Parent *Primer `json:"parent"`
	// child-primers list
	SubPrimers []*Primer `json:"subPrimers,omitempty"`
	// metadata to associate with this primer
	Meta map[string]interface{} `json:"meta"`
	// statistics about this primer
	Stats *PrimerStats `json:"stats"`
	// collection of child sources
	Sources []*Source `json:"sources,omitempty"`
}

Primer is tracking information about an abstract group of content. For example a government agency is a primer

func BasePrimers ¶

func BasePrimers(db sqlutil.Queryable, limit, offset int) (primers []*Primer, err error)

BasePrimers lists primers that have no parent

func ListPrimers ¶

func ListPrimers(store datastore.Datastore, limit, offset int) ([]*Primer, error)

ListPrimers

func UnmarshalBoundedPrimers ¶

func UnmarshalBoundedPrimers(rows *sql.Rows, limit int) (primers []*Primer, err error)

UnmarshalBoundedPrimers turns sql.Rows into primers, expecting len(rows) <= limit

func (*Primer) CalcStats ¶

func (p *Primer) CalcStats(db *sql.DB) error

func (Primer) DatastoreType ¶

func (p Primer) DatastoreType() string

func (*Primer) Delete ¶

func (p *Primer) Delete(store datastore.Datastore) error

func (Primer) GetId ¶

func (p Primer) GetId() string

func (Primer) Key ¶

func (p Primer) Key() datastore.Key

func (*Primer) NewSQLModel ¶

func (p *Primer) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*Primer) Read ¶

func (p *Primer) Read(store datastore.Datastore) error

func (*Primer) ReadSources ¶

func (p *Primer) ReadSources(db sqlutil.Queryable) error

ReadSources reads child sources of this primer

func (*Primer) ReadSubPrimers ¶

func (p *Primer) ReadSubPrimers(db sqlutil.Queryable) error

ReadSubPrimers reads child primers of this primer

func (*Primer) SQLParams ¶

func (p *Primer) SQLParams(cmd sql_datastore.Cmd) []interface{}

func (*Primer) SQLQuery ¶

func (p *Primer) SQLQuery(cmd sql_datastore.Cmd) string

func (*Primer) Save ¶

func (p *Primer) Save(store datastore.Datastore) (err error)

func (*Primer) UnmarshalSQL ¶

func (p *Primer) UnmarshalSQL(row sqlutil.Scannable) error

type PrimerStats ¶

type PrimerStats struct {
	UrlCount                int `json:"urlCount"`
	ArchivedUrlCount        int `json:"archivedUrlCount"`
	ContentUrlCount         int `json:"contentUrlCount"`
	ContentMetadataCount    int `json:"contentMetadataCount"`
	SourcesUrlCount         int `json:"sourcesUrlCount"`
	SourcesArchivedUrlCount int `json:"sourcesArchivedUrlCount"`
}

TODO - finish

type Snapshot ¶

type Snapshot struct {
	// The url that was requested
	Url string `json:"url"`
	// Time this request was issued
	Created time.Time `json:"date"`
	// Returned Status
	Status int `json:"status,omitempty"`
	// Time to complete response in milliseconds
	Duration int64 `json:"downloadTook,omitempty"`
	// Record of all returned headers in [key,value,key,value...]
	Headers []string `json:"headers,omitempty"`
	// Multihash of response body (if any)
	Hash string `json:"hash,omitempty"`
}

A snapshot is a record of a GET request to a url There can be many snapshots of a given url

func SnapshotsForUrl ¶

func SnapshotsForUrl(db sqlutil.Queryable, url string) ([]*Snapshot, error)

SnapshotsForUrl returns all snapshots for a given url string

func (*Snapshot) UnmarshalSQL ¶

func (s *Snapshot) UnmarshalSQL(row sqlutil.Scannable) error

UnmarshalSQL reads an SQL result into the snapshot receiver

type Source ¶

type Source struct {
	// version 4 uuid
	Id string `json:"id"`
	// Created timestamp rounded to seconds in UTC
	Created time.Time `json:"created"`
	// Updated timestamp rounded to seconds in UTC
	Updated time.Time `json:"updated"`
	// human-readable title for this source
	Title string `json:"title"`
	// description of the source, ideally one paragraph
	Description string `json:"description"`
	// absolute url to serve as the root of the
	Url string `json:"url"`
	// primer this source is connected to
	Primer *Primer `json:"primer"`
	// weather or not this url should be crawled be a web crawler
	Crawl bool `json:"crawl"`
	// amount of time before a link within this tree is considered in need
	// of re-checking for changes. currently not in use, but planned.
	StaleDuration time.Duration `json:"staleDuration"`
	// yeah this'll probably get depricated. Part of a half-baked alerts feature idea.
	LastAlertSent *time.Time `json:"lastAlertSent"`
	// Metadata associated with this source that should be added to all
	// child urls, currently not in use, but planned
	Meta map[string]interface{} `json:"meta"`
	// Stats about this source
	Stats *SourceStats `json:"stats"`
}

Source is a concreate handle for archiving. Crawlers use source's url as a base of a link tree. Sources are connected to a parent Primer to provide context & organization.

func CrawlingSources ¶

func CrawlingSources(db sqlutil.Queryable, limit, offset int) ([]*Source, error)

CrawlingSources lists sources with crawling = true, paginated

func ListSources ¶

func ListSources(store datastore.Datastore, limit, offset int) ([]*Source, error)

ListSources lists all sources from most to least recent, paginated

func UnmarshalBoundedSources ¶

func UnmarshalBoundedSources(rows *sql.Rows, limit int) ([]*Source, error)

UnmarshalBoundedSources turns a standard sql.Rows of Source results into a *Source slice

func (*Source) AsUrl ¶

func (c *Source) AsUrl(db *sql.DB) (*Url, error)

AsUrl retrieves the url that corresponds for the crawlUrl. If one doesn't exist & the url is saved, a new url is created

func (*Source) CalcStats ¶

func (s *Source) CalcStats(db *sql.DB) error

func (Source) DatastoreType ¶

func (s Source) DatastoreType() string

func (*Source) Delete ¶

func (s *Source) Delete(store datastore.Datastore) error

func (*Source) DescribedContent ¶

func (s *Source) DescribedContent(db sqlutil.Queryable, limit, offset int) ([]*Url, error)

TODO - this currently doesn't check the status of metadata, gonna need to do that DescribedContent returns a list of content-urls from this subprimer that need work.

func (Source) GetId ¶

func (s Source) GetId() string

func (Source) Key ¶

func (s Source) Key() datastore.Key

func (*Source) MatchesUrl ¶

func (s *Source) MatchesUrl(rawurl string) bool

MatchesUrl checks to see if the url pattern of Source is contained within the passed-in url string TODO - make this more sophisticated, checking against the beginning of the url to avoid things like accidental matches, or urls in query params matching within rawurl

func (*Source) NewSQLModel ¶

func (s *Source) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*Source) Read ¶

func (s *Source) Read(store datastore.Datastore) error

func (*Source) SQLParams ¶

func (s *Source) SQLParams(cmd sql_datastore.Cmd) []interface{}

func (*Source) SQLQuery ¶

func (s *Source) SQLQuery(cmd sql_datastore.Cmd) string

func (*Source) Save ¶

func (s *Source) Save(store datastore.Datastore) (err error)

func (*Source) UndescribedContent ¶

func (s *Source) UndescribedContent(db sqlutil.Queryable, limit, offset int) ([]*Url, error)

TODO - this currently doesn't check the status of metadata, gonna need to do that UndescribedContent returns a list of content-urls from this subprimer that need work.

func (*Source) UnmarshalSQL ¶

func (c *Source) UnmarshalSQL(row sqlutil.Scannable) error

type SourceStats ¶

type SourceStats struct {
	UrlCount             int `json:"urlCount"`
	ArchivedUrlCount     int `json:"archivedUrlCount"`
	ContentUrlCount      int `json:"contentUrlCount"`
	ContentMetadataCount int `json:"contentMetadataCount"`
}

type Uncrawlable ¶

type Uncrawlable struct {
	// version 4 uuid
	Id string `json:"id"`
	// url from urls table, must be unique
	Url string `json:"url"`
	// Created timestamp rounded to seconds in UTC
	Created time.Time `json:"created"`
	// Updated timestamp rounded to seconds in UTC
	Updated time.Time `json:"updated"`
	// sha256 multihash of the public key that created this uncrawlable
	Creator string `json:"creator"`
	// name of person making submission
	Name string `json:"name"`
	// email address of person making submission
	Email string `json:"email"`
	// name of data rescue event where uncrawlable was added
	EventName string `json:"eventName"`
	// agency name
	Agency string `json:"agency"`
	// EDGI agency Id
	AgencyId string `json:"agencyId"`
	// EDGI subagency Id
	SubagencyId string `json:"subagencyId"`
	// EDGI organization Id
	OrgId string `json:"orgId"`
	// EDGI Suborganization Id
	SuborgId string `json:"orgId"`
	// EDGI subprimer Id
	SubprimerId string `json:"subprimerId"`
	// flag for ftp content
	Ftp bool `json:"ftp"`
	// flag for 'database'
	// TODO - refine this?
	Database bool `json:"database"`
	// flag for visualization / interactive content
	// obfuscating data
	Interactive bool `json:"interactive"`
	// flag for a page that links to many files
	ManyFiles bool `json:"manyFiles"`
	// uncrawlable comments
	Comments string `json:"comments"`
}

Uncrawlables are urls that contain content that cannot be extracted with traditional web crawling / scraping methods. This model classifies the nature of the uncrawlable, setting the stage for writing custom scripts to extract the underlying content.

func ListUncrawlables ¶

func ListUncrawlables(store datastore.Datastore, limit, offset int) ([]*Uncrawlable, error)

func (Uncrawlable) DatastoreType ¶

func (u Uncrawlable) DatastoreType() string

func (*Uncrawlable) Delete ¶

func (u *Uncrawlable) Delete(store datastore.Datastore) error

Delete a uncrawlable, should only do for erronious additions

func (Uncrawlable) GetId ¶

func (u Uncrawlable) GetId() string

func (Uncrawlable) Key ¶

func (u Uncrawlable) Key() datastore.Key

func (*Uncrawlable) NewSQLModel ¶

func (u *Uncrawlable) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*Uncrawlable) Read ¶

func (u *Uncrawlable) Read(store datastore.Datastore) error

Read uncrawlable from db

func (*Uncrawlable) SQLParams ¶

func (u *Uncrawlable) SQLParams(cmd sql_datastore.Cmd) []interface{}

SQLParams formats a uncrawlable struct for inserting / updating into postgres

func (*Uncrawlable) SQLQuery ¶

func (u *Uncrawlable) SQLQuery(cmd sql_datastore.Cmd) string

func (*Uncrawlable) Save ¶

func (u *Uncrawlable) Save(store datastore.Datastore) (err error)

Save a uncrawlable

func (*Uncrawlable) UnmarshalSQL ¶

func (u *Uncrawlable) UnmarshalSQL(row sqlutil.Scannable) (err error)

UnmarshalSQL reads an sql response into the uncrawlable receiver it expects the request to have used uncrawlableCols() for selection

type Url ¶

type Url struct {
	// version 4 uuid
	// urls can/should/must also be be uniquely identified by Url
	Id string `json:"id,omitempty"`
	// A Url is uniquely identified by URI string without
	// any normalization. Url strings must always be absolute.
	Url string `json:"url"`
	// Created timestamp rounded to seconds in UTC
	Created time.Time `json:"created,omitempty"`
	// Updated timestamp rounded to seconds in UTC
	Updated time.Time `json:"updated,omitempty"`

	// Timestamp for most recent GET request
	LastGet *time.Time `json:"lastGet,omitempty"`
	// Timestamp for most revent HEAD request
	LastHead *time.Time `json:"lastHead,omitempty"`

	// Returned HTTP status code
	Status int `json:"status,omitempty"`
	// Returned HTTP 'Content-Type' header
	ContentType string `json:"contentType,omitempty"`
	// Result of mime sniffing to GET response body, as detailed at https://mimesniff.spec.whatwg.org
	ContentSniff string `json:"contentSniff,omitempty"`
	// ContentLength in bytes, will be the header value if only a HEAD request has been issued
	// After a valid GET response, it will be set to the length of the returned response
	ContentLength int64 `json:"contentLength,omitempty"`

	// best guess at a filename based on url string analysis
	// if you just want to know what type of file this is, this is the field to use.
	FileName string `json:"fileName,omitempty"`

	// HTML Title tag attribute
	Title string `json:"title,omitempty"`

	// Time remote server took to transfer content in miliseconds.
	// TODO - currently not implemented
	DownloadTook int `json:"downloadTook,omitempty"`
	// Time taken to  in miliseconds. currently not implemented
	HeadersTook int `json:"headersTook,omitempty"`

	// key-value slice of returned headers from most recent HEAD or GET request
	// stored in the form [key,value,key,value...]
	Headers []string `json:"headers,omitempty"`
	// any associative metadata
	Meta map[string]interface{} `json:"meta,omitempty"`

	// Hash is a multihash sha-256 of res.Body
	Hash string `json:"hash,omitempty"`

	// Url to saved content
	ContentUrl string `json:"contentUrl,omitempty"`

	// Uncrawlable information
	Uncrawlable *Uncrawlable `json:"uncrawlable,omitempty"`
}

URL represents... a url. TODO - consider renaming to Resource

func ContentUrls ¶

func ContentUrls(db sqlutil.Queryable, limit, skip int) ([]*Url, error)

func FetchedUrls ¶

func FetchedUrls(db sqlutil.Queryable, limit, offset int) ([]*Url, error)

func ListUrls ¶

func ListUrls(store datastore.Datastore, limit, offset int) ([]*Url, error)

func Search ¶

func Search(db sqlutil.Queryable, q string, limit, offset int) ([]*Url, error)

func UnfetchedUrls ¶

func UnfetchedUrls(db sqlutil.Queryable, limit, offset int) ([]*Url, error)

func UnmarshalBoundedUrls ¶

func UnmarshalBoundedUrls(rows *sql.Rows, limit int) ([]*Url, error)

func UnmarshalUrls ¶

func UnmarshalUrls(rows *sql.Rows) ([]*Url, error)

UnmarshalUrls takes an sql cursor & returns a slice of url pointers expects columns to math urlCols()

func UrlsForHash ¶

func UrlsForHash(db sqlutil.Queryable, hash string) ([]*Url, error)

func (Url) DatastoreType ¶

func (u Url) DatastoreType() string

func (*Url) Delete ¶

func (u *Url) Delete(store datastore.Datastore) error

Delete a url, should only do for erronious additions

func (*Url) ExtractDocLinks ¶

func (u *Url) ExtractDocLinks(store datastore.Datastore, doc *goquery.Document) ([]*Link, error)

ExtractDocLinks extracts & stores a page's linked documents by selecting all a[href] links from a given qoquery document, using the receiver *Url as the base

func (*Url) File ¶

func (u *Url) File() (*File, error)

File leverages a url's hash to generate a file that can have it's bytes read back

func (*Url) Get ¶

func (u *Url) Get(store datastore.Datastore) (body []byte, links []*Link, err error)

Issue a GET request to this URL if it's eligible for one

func (Url) GetId ¶

func (u Url) GetId() string

func (*Url) HandleGetResponse ¶

func (u *Url) HandleGetResponse(store datastore.Datastore, res *http.Response) (body []byte, links []*Link, err error)

HandleGetResponse performs all necessary actions in response to a GET request, regardless of weather it came from a crawl or archive request

func (*Url) HeadersMap ¶

func (u *Url) HeadersMap() (headers map[string]string)

HeadersMap formats u.Headers (a string slice) as a map[header]value

func (*Url) InboundLinks ¶

func (u *Url) InboundLinks(db sqlutil.Queryable) ([]string, error)

InboundLinks returns a slice of url strings that link to this url

func (Url) Key ¶

func (u Url) Key() datastore.Key

func (*Url) NewSQLModel ¶

func (u *Url) NewSQLModel(key datastore.Key) sql_datastore.Model

func (*Url) OutboundLinks ¶

func (u *Url) OutboundLinks(db sqlutil.Queryable) ([]string, error)

Outbound returns a slice of url strings that this url links to

func (*Url) ParsedUrl ¶

func (u *Url) ParsedUrl() (*url.URL, error)

ParsedUrl is a convenience wrapper around url.Parse

func (*Url) Read ¶

func (u *Url) Read(store datastore.Datastore) error

Read url from db

func (*Url) SQLParams ¶

func (u *Url) SQLParams(cmd sql_datastore.Cmd) []interface{}

SQLArgs formats a url struct for inserting / updating into postgres

func (*Url) SQLQuery ¶

func (u *Url) SQLQuery(cmd sql_datastore.Cmd) string

func (*Url) Save ¶

func (u *Url) Save(store datastore.Datastore) (err error)

func (*Url) ShouldEnqueueGet ¶

func (u *Url) ShouldEnqueueGet() bool

ShouldEnqueueGet returns weather the url can be added to the que for a GET request. keep in mind only urls who's domain are are marked crawl : true in the domains list will be candidates for GET requests. It should return true if: * the url is of http / https scheme * has never been GET'd or hasn't been GET'd for a period longer than the stale duration

func (*Url) ShouldEnqueueHead ¶

func (u *Url) ShouldEnqueueHead() bool

ShouldEnqueueHead returns weather the url can be added to the que for a HEAD request. It should return true if: * the url is of http / https scheme * has never been GET'd or hasn't been GET'd for a period longer than the stale duration

func (*Url) ShouldPutS3 ¶

func (u *Url) ShouldPutS3() bool

ShouldPutS3 is a chance to override weather the content should be stored

func (*Url) SuspectedContentUrl ¶

func (u *Url) SuspectedContentUrl() bool

SuspectedContentUrl examines the url string, returns true if there's a reasonable chance the url leads to content

func (*Url) UnmarshalSQL ¶

func (u *Url) UnmarshalSQL(row sqlutil.Scannable) (err error)

UnmarshalSQL reads an sql response into the url receiver it expects the request to have used urlCols() for selection

func (*Url) WarcRequest ¶

func (u *Url) WarcRequest() *warc.Request

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL