The highest tagged major version is v2.

chunk

package

v1.9.1 Latest Latest Go to latest Published: Jul 18, 2019 License: Apache-2.0 Imports: 17 Imported by: 2

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/pachyderm/pachyderm

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func Cleanup(objC obj.Client, chunks *Storage)
func CopyN(w *Writer, r *Reader, n int64) error
func RandSeq(n int) []byte
type Chunk
type DataRef
type Reader
type Storage
- func LocalStorage(tb testing.TB) (obj.Client, *Storage)
- func NewStorage(objC obj.Client) *Storage
type Writer

Constants ¶

View Source

const (
	// MB is Megabytes.
	MB = 1024 * 1024
	// AverageBits determines the average chunk size (2^AverageBits).
	AverageBits = 23
	// WindowSize is the size of the rolling hash window.
	WindowSize = 64
)

Variables ¶

View Source

var (
	ErrInvalidLengthChunk = fmt.Errorf("proto: negative length found during unmarshaling")
	ErrIntOverflowChunk   = fmt.Errorf("proto: integer overflow")
)

Functions ¶

func Cleanup ¶

func Cleanup(objC obj.Client, chunks *Storage)

Cleanup cleans up a local chunk storage instance.

func CopyN ¶ added in v1.9.0

func CopyN(w *Writer, r *Reader, n int64) error

CopyN is an efficient copy function that turns full chunk copies into data reference writes.

func RandSeq ¶

func RandSeq(n int) []byte

RandSeq generates a random sequence of data (n is number of bytes)

Types ¶

type Chunk ¶

type Chunk struct {
	Hash                 string   `protobuf:"bytes,1,opt,name=hash,proto3" json:"hash,omitempty"`
	XXX_NoUnkeyedLiteral struct{} `json:"-"`
	XXX_unrecognized     []byte   `json:"-"`
	XXX_sizecache        int32    `json:"-"`
}

func (*Chunk) Descriptor ¶

func (*Chunk) Descriptor() ([]byte, []int)

func (*Chunk) GetHash ¶

func (m *Chunk) GetHash() string

func (*Chunk) Marshal ¶

func (m *Chunk) Marshal() (dAtA []byte, err error)

func (*Chunk) MarshalTo ¶

func (m *Chunk) MarshalTo(dAtA []byte) (int, error)

func (*Chunk) ProtoMessage ¶

func (*Chunk) ProtoMessage()

func (*Chunk) Reset ¶

func (m *Chunk) Reset()

func (*Chunk) Size ¶

func (m *Chunk) Size() (n int)

func (*Chunk) String ¶

func (m *Chunk) String() string

func (*Chunk) Unmarshal ¶

func (m *Chunk) Unmarshal(dAtA []byte) error

func (*Chunk) XXX_DiscardUnknown ¶

func (m *Chunk) XXX_DiscardUnknown()

func (*Chunk) XXX_Marshal ¶

func (m *Chunk) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)

func (*Chunk) XXX_Merge ¶

func (m *Chunk) XXX_Merge(src proto.Message)

func (*Chunk) XXX_Size ¶

func (m *Chunk) XXX_Size() int

func (*Chunk) XXX_Unmarshal ¶

func (m *Chunk) XXX_Unmarshal(b []byte) error

type DataRef ¶

type DataRef struct {
	// The chunk the referenced data is located in.
	Chunk *Chunk `protobuf:"bytes,1,opt,name=chunk,proto3" json:"chunk,omitempty"`
	// The hash of the data being referenced.
	// This field is empty when it is equal to the chunk hash (the ref is the whole chunk).
	Hash string `protobuf:"bytes,2,opt,name=hash,proto3" json:"hash,omitempty"`
	// The offset and size used for accessing the data within the chunk.
	OffsetBytes          int64    `protobuf:"varint,3,opt,name=offset_bytes,json=offsetBytes,proto3" json:"offset_bytes,omitempty"`
	SizeBytes            int64    `protobuf:"varint,4,opt,name=size_bytes,json=sizeBytes,proto3" json:"size_bytes,omitempty"`
	XXX_NoUnkeyedLiteral struct{} `json:"-"`
	XXX_unrecognized     []byte   `json:"-"`
	XXX_sizecache        int32    `json:"-"`
}

DataRef is a reference to data within a chunk.

func (*DataRef) Descriptor ¶

func (*DataRef) Descriptor() ([]byte, []int)

func (*DataRef) GetChunk ¶

func (m *DataRef) GetChunk() *Chunk

func (*DataRef) GetHash ¶

func (m *DataRef) GetHash() string

func (*DataRef) GetOffsetBytes ¶

func (m *DataRef) GetOffsetBytes() int64

func (*DataRef) GetSizeBytes ¶

func (m *DataRef) GetSizeBytes() int64

func (*DataRef) Marshal ¶

func (m *DataRef) Marshal() (dAtA []byte, err error)

func (*DataRef) MarshalTo ¶

func (m *DataRef) MarshalTo(dAtA []byte) (int, error)

func (*DataRef) ProtoMessage ¶

func (*DataRef) ProtoMessage()

func (*DataRef) Reset ¶

func (m *DataRef) Reset()

func (*DataRef) Size ¶

func (m *DataRef) Size() (n int)

func (*DataRef) String ¶

func (m *DataRef) String() string

func (*DataRef) Unmarshal ¶

func (m *DataRef) Unmarshal(dAtA []byte) error

func (*DataRef) XXX_DiscardUnknown ¶

func (m *DataRef) XXX_DiscardUnknown()

func (*DataRef) XXX_Marshal ¶

func (m *DataRef) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)

func (*DataRef) XXX_Merge ¶

func (m *DataRef) XXX_Merge(src proto.Message)

func (*DataRef) XXX_Size ¶

func (m *DataRef) XXX_Size() int

func (*DataRef) XXX_Unmarshal ¶

func (m *DataRef) XXX_Unmarshal(b []byte) error

type Reader ¶

type Reader struct {
	// contains filtered or unexported fields
}

Reader reads a set of DataRefs from chunk storage.

func (*Reader) Close ¶

func (r *Reader) Close() error

Close closes the reader. Currently a no-op, but will be used when streaming is implemented.

func (*Reader) Len ¶

func (r *Reader) Len() int64

Len returns the number of bytes left.

func (*Reader) NextRange ¶

func (r *Reader) NextRange(dataRefs []*DataRef)

NextRange sets the next range for the reader.

func (*Reader) Read ¶

func (r *Reader) Read(data []byte) (int, error)

Read reads from the byte stream produced by the set of DataRefs.

type Storage ¶

type Storage struct {
	// contains filtered or unexported fields
}

Storage is the abstraction that manages chunk storage.

func LocalStorage ¶

func LocalStorage(tb testing.TB) (obj.Client, *Storage)

LocalStorage creates a local chunk storage instance. Useful for storage layer tests.

func (*Storage) DeleteAll ¶

func (s *Storage) DeleteAll(ctx context.Context) error

DeleteAll deletes all of the chunks in object storage.

func (*Storage) List ¶

func (s *Storage) List(ctx context.Context, f func(string) error) error

List lists all of the chunks in object storage.

func (*Storage) NewReader ¶

func (s *Storage) NewReader(ctx context.Context, dataRefs ...*DataRef) *Reader

NewReader creates an io.ReadCloser for a chunk. (bryce) The whole chunk is in-memory right now. Could be a problem with concurrency, particularly the merge process. May want to handle concurrency here (pass in multiple data refs)

func (*Storage) NewWriter ¶

func (s *Storage) NewWriter(ctx context.Context) *Writer

NewWriter creates an io.WriteCloser for a stream of bytes to be chunked. Chunks are created based on the content, then hashed and deduplicated/uploaded to object storage. The callback arguments are the chunk hash and content.

type Writer ¶

type Writer struct {
	// contains filtered or unexported fields
}

Writer splits a byte stream into content defined chunks that are hashed and deduplicated/uploaded to object storage. Chunk split points are determined by a bit pattern in a rolling hash function (buzhash64 at https://github.com/chmduquesne/rollinghash).

func (*Writer) Close ¶

func (w *Writer) Close() error

Close closes the writer and flushes the remaining bytes to a chunk and finishes the final range.

func (*Writer) RangeCount ¶ added in v1.9.0

func (w *Writer) RangeCount() int64

RangeCount returns a count of the number of ranges associated with the writer.

func (*Writer) RangeSize ¶ added in v1.9.0

func (w *Writer) RangeSize() int64

RangeSize returns the size of the current range.

func (*Writer) StartRange ¶ added in v1.9.0

func (w *Writer) StartRange(cb func([]*DataRef) error)

StartRange specifies the start of a range within the byte stream that is meaningful to the caller. When this range has ended (by calling StartRange again or Close) and all of the necessary chunks are written, the callback given during initialization will be called with DataRefs that can be used for accessing that range.

func (*Writer) Write ¶

func (w *Writer) Write(data []byte) (int, error)

Write rolls through the data written, calling c.f when a chunk is found. Note: If making changes to this function, be wary of the performance implications (check before and after performance with chunker benchmarks).

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL