Documentation
¶
Index ¶
- Constants
- Variables
- type CleanupConfiguration
- type CleanupStrategy
- type Configuration
- type Directory
- type ExpirationConfiguration
- type File
- type FileSpec
- type FileState
- type Job
- type JobConfiguration
- type ListEntriesError
- type MatchError
- type Matcher
- type SegmentingConfiguration
- type Source
- type SourceUnmarshaler
- type StatsdConfiguration
- type SwiftLocation
- func (s *SwiftLocation) Connect() error
- func (s *SwiftLocation) DiscoverExistingFiles(matcher Matcher) error
- func (s *SwiftLocation) GetFile(path string, requestHeaders schwift.ObjectHeaders) (io.ReadCloser, FileState, error)
- func (s *SwiftLocation) ListAllFiles() ([]FileSpec, *ListEntriesError)
- func (s *SwiftLocation) ListEntries(path string) ([]FileSpec, *ListEntriesError)
- func (s *SwiftLocation) ObjectAtPath(path string) *schwift.Object
- func (s SwiftLocation) Validate(name string) []error
- type TransferResult
- type URLSource
- func (u *URLSource) Connect() error
- func (u URLSource) GetFile(directoryPath string, requestHeaders schwift.ObjectHeaders) (io.ReadCloser, FileState, error)
- func (u URLSource) ListAllFiles() ([]FileSpec, *ListEntriesError)
- func (u URLSource) ListEntries(directoryPath string) ([]FileSpec, *ListEntriesError)
- func (u *URLSource) Validate(name string) (result []error)
- type YumSource
- func (s *YumSource) Connect() error
- func (s *YumSource) GetFile(directoryPath string, requestHeaders schwift.ObjectHeaders) (body io.ReadCloser, sourceState FileState, err error)
- func (s *YumSource) ListAllFiles() ([]FileSpec, *ListEntriesError)
- func (s *YumSource) ListEntries(directoryPath string) ([]FileSpec, *ListEntriesError)
- func (s *YumSource) Validate(name string) []error
Constants ¶
const StatusSwiftRateLimit = 498
StatusSwiftRateLimit is the non-standard HTTP status code used by Swift to indicate Too Many Requests.
Variables ¶
var ErrListAllFilesNotSupported = &ListEntriesError{
Message: "ListAllFiles not supported by this source",
}
ErrListAllFilesNotSupported is returned by ListAllFiles() for sources that do not support it.
Functions ¶
This section is empty.
Types ¶
type CleanupConfiguration ¶
type CleanupConfiguration struct {
Strategy CleanupStrategy `yaml:"strategy"`
}
CleanupConfiguration contains the "cleanup" section of a JobConfiguration.
type CleanupStrategy ¶
type CleanupStrategy string
CleanupStrategy is an enum of legal values for the jobs[].cleanup.strategy configuration option.
const ( //KeepUnknownFiles is the default cleanup strategy. KeepUnknownFiles CleanupStrategy = "" //DeleteUnknownFiles is another strategy. DeleteUnknownFiles CleanupStrategy = "delete" //ReportUnknownFiles is another strategy. ReportUnknownFiles CleanupStrategy = "report" )
type Configuration ¶
type Configuration struct {
Swift SwiftLocation `yaml:"swift"`
WorkerCounts struct {
Transfer uint
} `yaml:"workers"`
Statsd StatsdConfiguration `yaml:"statsd"`
JobConfigs []JobConfiguration `yaml:"jobs"`
Jobs []*Job `yaml:"-"`
}
Configuration contains the contents of the configuration file.
func ReadConfiguration ¶
func ReadConfiguration(path string) (*Configuration, []error)
ReadConfiguration reads the configuration file.
type Directory ¶
type Directory struct {
Job *Job
Path string
//RetryCounter is increased by the actors.Scraper when scraping of this
//directory fails.
RetryCounter uint
}
Directory describes a directory on the source side which can be scraped.
type ExpirationConfiguration ¶
type ExpirationConfiguration struct {
EnabledIn *bool `yaml:"enabled"`
Enabled bool `yaml:"-"`
DelaySeconds uint32 `yaml:"delay_seconds"`
}
ExpirationConfiguration contains the "expiration" section of a JobConfiguration.
type File ¶
File describes a single file which is mirrored as part of a Job.
func (File) PerformTransfer ¶
func (f File) PerformTransfer() TransferResult
PerformTransfer transfers this file from the source to the target. The return value indicates if the transfer finished successfully.
func (File) TargetObject ¶
TargetObject returns the object corresponding to this file in the target container.
type FileSpec ¶
type FileSpec struct {
Path string
IsDirectory bool
//only set for symlinks (refers to a path below the ObjectPrefix in the same container)
SymlinkTargetPath string
//results of GET on this file
Contents []byte
Headers http.Header
}
FileSpec contains metadata for a File. The only required field is Path. Sources that download some files early (during scraping) can pass the downloaded contents and metadata in the remaining fields of the FileSpec to avoid double download.
type FileState ¶
type FileState struct {
Etag string
LastModified string
SizeBytes int64 //-1 if not known
ExpiryTime *time.Time //nil if not set
//the following fields are only used in `sourceState`, not `targetState`
SkipTransfer bool
ContentType string
}
FileState is used by Source.GetFile() to describe the state of a file.
type Job ¶
type Job struct {
Source Source
Target *SwiftLocation
Matcher Matcher
Segmenting *SegmentingConfiguration
Expiration ExpirationConfiguration
Cleanup CleanupConfiguration
}
Job describes a transfer job at runtime.
type JobConfiguration ¶
type JobConfiguration struct {
//basic options
Source SourceUnmarshaler `yaml:"from"`
Target *SwiftLocation `yaml:"to"`
//behavior options
ExcludePattern string `yaml:"except"`
IncludePattern string `yaml:"only"`
ImmutableFilePattern string `yaml:"immutable"`
Segmenting *SegmentingConfiguration `yaml:"segmenting"`
Expiration ExpirationConfiguration `yaml:"expiration"`
Cleanup CleanupConfiguration `yaml:"cleanup"`
}
JobConfiguration describes a transfer job in the configuration file.
func (JobConfiguration) Compile ¶
func (cfg JobConfiguration) Compile(name string, swift SwiftLocation) (job *Job, errors []error)
Compile validates the given JobConfiguration, then creates and prepares a Job from it.
type ListEntriesError ¶
type ListEntriesError struct {
//the location of the directory (e.g. an URL)
Location string
//error message
Message string
}
ListEntriesError is an error that occurs while scraping a directory.
type MatchError ¶
MatchError is returned by the functions on type Matcher.
func (MatchError) Error ¶
func (e MatchError) Error() string
Error implements the builtin/error interface.
type Matcher ¶
type Matcher struct {
ExcludeRx *regexp.Regexp //pointers because nil signifies absence
IncludeRx *regexp.Regexp
ImmutableFileRx *regexp.Regexp
}
Matcher determines if files shall be included or excluded in a transfer.
func (Matcher) Check ¶
Check checks whether the directory at `path` should be scraped, or whether the file at `path` should be transferred. If so, an empty string is returned. If not, a non-empty string is returned that contains a human-readable message why the file is excluded from the transfer.
If `path` is a directory, `path` must have a trailing slash. If `path` is a file, `path` must not have a trailing slash.
func (Matcher) CheckFile ¶
CheckFile is like CheckRecursive, but uses `spec.Path` and appends a slash if `spec.IsDirectory`.
func (Matcher) CheckRecursive ¶
CheckRecursive is like Check(), but also checks each directory along the way as well.
For example, CheckRecursive("a/b/c") calls Check("a/"), "Check("a/b/") and Check("a/b/c").
type SegmentingConfiguration ¶
type SegmentingConfiguration struct {
MinObjectSize uint64 `yaml:"min_bytes"`
SegmentSize uint64 `yaml:"segment_bytes"`
ContainerName string `yaml:"container"`
//Container is initialized by JobConfiguration.Compile().
Container *schwift.Container `yaml:"-"`
}
SegmentingConfiguration contains the "segmenting" section of a JobConfiguration.
type Source ¶
type Source interface {
//Validate reports errors if this source is malspecified.
Validate(name string) []error
//Connect performs source-specific one-time setup.
Connect() error
//ListAllFiles returns all files in the source (as paths relative to the
//source's root). If this returns ErrListAllFilesNotSupported, ListEntries
//must be used instead.
ListAllFiles() ([]FileSpec, *ListEntriesError)
//ListEntries returns all files and subdirectories at this path in the
//source. Each result value must have a "/" prefix for subdirectories, or
//none for files.
ListEntries(directoryPath string) ([]FileSpec, *ListEntriesError)
//GetFile retrieves the contents and metadata for the file at the given path
//in the source. The `headers` map contains additional HTTP request headers
//that shall be passed to the source in the GET request.
GetFile(directoryPath string, headers schwift.ObjectHeaders) (body io.ReadCloser, sourceState FileState, err error)
}
Source describes a place from which files can be fetched.
type SourceUnmarshaler ¶
type SourceUnmarshaler struct {
// contains filtered or unexported fields
}
SourceUnmarshaler provides a yaml.Unmarshaler implementation for the Source interface.
func (*SourceUnmarshaler) UnmarshalYAML ¶
func (u *SourceUnmarshaler) UnmarshalYAML(unmarshal func(interface{}) error) error
UnmarshalYAML implements the yaml.Unmarshaler interface.
type StatsdConfiguration ¶
type StatsdConfiguration struct {
HostName string `yaml:"hostname"`
Port int `yaml:"port"`
Prefix string `yaml:"prefix"`
}
StatsdConfiguration contains the configuration options relating to StatsD metric emission.
type SwiftLocation ¶
type SwiftLocation struct {
AuthURL string `yaml:"auth_url"`
UserName string `yaml:"user_name"`
UserDomainName string `yaml:"user_domain_name"`
ProjectName string `yaml:"project_name"`
ProjectDomainName string `yaml:"project_domain_name"`
Password string `yaml:"password"`
RegionName string `yaml:"region_name"`
ContainerName string `yaml:"container"`
ObjectNamePrefix string `yaml:"object_prefix"`
//configuration for Validate()
ValidateIgnoreEmptyContainer bool `yaml:"-"`
//Account and Container is filled by Connect(). Container will be nil if ContainerName is empty.
Account *schwift.Account `yaml:"-"`
Container *schwift.Container `yaml:"-"`
//FileExists is filled by DiscoverExistingFiles(). The keys are object names
//including the ObjectNamePrefix, if any.
FileExists map[string]bool `yaml:"-"`
}
SwiftLocation contains all parameters required to establish a Swift connection. It implements the Source interface, but is also used on the target side.
func (*SwiftLocation) Connect ¶
func (s *SwiftLocation) Connect() error
Connect implements the Source interface. It establishes the connection to Swift.
func (*SwiftLocation) DiscoverExistingFiles ¶
func (s *SwiftLocation) DiscoverExistingFiles(matcher Matcher) error
DiscoverExistingFiles finds all objects that currently exist in this location (i.e. in this Swift container below the given object name prefix) and fills s.FileExists accordingly.
The given Matcher is used to find out which files are to be considered as belonging to the transfer job in question.
func (*SwiftLocation) GetFile ¶
func (s *SwiftLocation) GetFile(path string, requestHeaders schwift.ObjectHeaders) (io.ReadCloser, FileState, error)
GetFile implements the Source interface.
func (*SwiftLocation) ListAllFiles ¶
func (s *SwiftLocation) ListAllFiles() ([]FileSpec, *ListEntriesError)
ListAllFiles implements the Source interface.
func (*SwiftLocation) ListEntries ¶
func (s *SwiftLocation) ListEntries(path string) ([]FileSpec, *ListEntriesError)
ListEntries implements the Source interface.
func (*SwiftLocation) ObjectAtPath ¶
func (s *SwiftLocation) ObjectAtPath(path string) *schwift.Object
ObjectAtPath returns an Object instance for the object at the given path (below the ObjectNamePrefix, if any) in this container.
func (SwiftLocation) Validate ¶
func (s SwiftLocation) Validate(name string) []error
Validate returns an empty list only if all required credentials are present.
type TransferResult ¶
type TransferResult uint
TransferResult is the return type for PerformTransfer().
const ( //TransferSuccess means that the file was newer on the source and was sent //to the target. TransferSuccess TransferResult = iota //TransferSkipped means that the file was the same on both sides and //nothing was transferred. TransferSkipped //TransferFailed means that an error occurred and was logged. TransferFailed )
type URLSource ¶
type URLSource struct {
URLString string `yaml:"url"`
URL *url.URL `yaml:"-"`
//auth options
ClientCertificatePath string `yaml:"cert"`
ClientCertificateKeyPath string `yaml:"key"`
ServerCAPath string `yaml:"ca"`
HTTPClient *http.Client `yaml:"-"`
//transfer options
SegmentingIn *bool `yaml:"segmenting"`
Segmenting bool `yaml:"-"`
SegmentSize uint64 `yaml:"segment_bytes"`
}
URLSource describes a source that's accessible via HTTP.
func (URLSource) GetFile ¶
func (u URLSource) GetFile(directoryPath string, requestHeaders schwift.ObjectHeaders) (io.ReadCloser, FileState, error)
GetFile implements the Source interface.
func (URLSource) ListAllFiles ¶
func (u URLSource) ListAllFiles() ([]FileSpec, *ListEntriesError)
ListAllFiles implements the Source interface.
func (URLSource) ListEntries ¶
func (u URLSource) ListEntries(directoryPath string) ([]FileSpec, *ListEntriesError)
ListEntries implements the Source interface.
type YumSource ¶
type YumSource struct {
//options from config file
URLString string `yaml:"url"`
ClientCertificatePath string `yaml:"cert"`
ClientCertificateKeyPath string `yaml:"key"`
ServerCAPath string `yaml:"ca"`
Architectures []string `yaml:"arch"`
// contains filtered or unexported fields
}
YumSource is a URLSource containing a Yum repository. This type reuses the Validate() and Connect() logic of URLSource, but adds a custom scraping implementation that reads the Yum repository metadata instead of relying on directory listings.
func (*YumSource) GetFile ¶
func (s *YumSource) GetFile(directoryPath string, requestHeaders schwift.ObjectHeaders) (body io.ReadCloser, sourceState FileState, err error)
GetFile implements the Source interface.
func (*YumSource) ListAllFiles ¶
func (s *YumSource) ListAllFiles() ([]FileSpec, *ListEntriesError)
ListAllFiles implements the Source interface.
func (*YumSource) ListEntries ¶
func (s *YumSource) ListEntries(directoryPath string) ([]FileSpec, *ListEntriesError)
ListEntries implements the Source interface.