Documentation
¶
Index ¶
- type ColumnBufferType
- type ParquetReader
- func (pr *ParquetReader) GetFooterSize() (uint32, error)
- func (pr *ParquetReader) GetNumRows() int64
- func (pr *ParquetReader) Read(dstInterface interface{}) error
- func (pr *ParquetReader) ReadByNumber(maxReadNumber int) ([]interface{}, error)
- func (pr *ParquetReader) ReadColumnByIndex(index int64, num int64) (values []interface{}, rls []int32, dls []int32, err error)
- func (pr *ParquetReader) ReadColumnByPath(pathStr string, num int64) (values []interface{}, rls []int32, dls []int32, err error)
- func (pr *ParquetReader) ReadFooter() error
- func (pr *ParquetReader) ReadPartial(dstInterface interface{}, prefixPath string) error
- func (pr *ParquetReader) ReadPartialByNumber(maxReadNumber int, prefixPath string) ([]interface{}, error)
- func (pr *ParquetReader) ReadStop()
- func (pr *ParquetReader) RenameSchema()
- func (pr *ParquetReader) SetSchemaHandlerFromJSON(jsonSchema string) error
- func (pr *ParquetReader) SkipRows(num int64) error
- func (pr *ParquetReader) SkipRowsByIndex(index int64, num int64)
- func (pr *ParquetReader) SkipRowsByPath(pathStr string, num int64) error
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ColumnBufferType ¶
type ColumnBufferType struct {
PFile source.ParquetFile
ThriftReader *thrift.TBufferedTransport
SchemaHandler *schema.SchemaHandler
PathStr string
RowGroupIndex int64
ChunkHeader *parquet.ColumnChunk
ChunkReadValues int64
DictPage *layout.Page
DataTable *layout.Table
DataTableNumRows int64
}
func NewColumnBuffer ¶
func NewColumnBuffer(pFile source.ParquetFile, footer *parquet.FileMetaData, schemaHandler *schema.SchemaHandler, pathStr string) (*ColumnBufferType, error)
func (*ColumnBufferType) NextRowGroup ¶
func (cbt *ColumnBufferType) NextRowGroup() error
func (*ColumnBufferType) ReadPage ¶
func (cbt *ColumnBufferType) ReadPage() error
func (*ColumnBufferType) ReadPageForSkip ¶
func (cbt *ColumnBufferType) ReadPageForSkip() (*layout.Page, error)
func (*ColumnBufferType) ReadRows ¶
func (cbt *ColumnBufferType) ReadRows(num int64) (*layout.Table, int64)
func (*ColumnBufferType) SkipRows ¶
func (cbt *ColumnBufferType) SkipRows(num int64) int64
type ParquetReader ¶
type ParquetReader struct {
SchemaHandler *schema.SchemaHandler
NP int64 //parallel number
PFile source.ParquetFile
ColumnBuffers map[string]*ColumnBufferType
//One reader can only read one type objects
ObjType reflect.Type
ObjPartialType reflect.Type
}
func NewParquetColumnReader ¶
func NewParquetColumnReader(pFile source.ParquetFile, np int64) (*ParquetReader, error)
NewParquetColumnReader creates a parquet column reader
func NewParquetReader ¶
func NewParquetReader(pFile source.ParquetFile, obj interface{}, np int64) (*ParquetReader, error)
Create a parquet reader: obj is a object with schema tags or a JSON schema string
func (*ParquetReader) GetFooterSize ¶
func (pr *ParquetReader) GetFooterSize() (uint32, error)
Get the footer size
func (*ParquetReader) GetNumRows ¶
func (pr *ParquetReader) GetNumRows() int64
func (*ParquetReader) Read ¶
func (pr *ParquetReader) Read(dstInterface interface{}) error
Read rows of parquet file and unmarshal all to dst
func (*ParquetReader) ReadByNumber ¶
func (pr *ParquetReader) ReadByNumber(maxReadNumber int) ([]interface{}, error)
Read maxReadNumber objects
func (*ParquetReader) ReadColumnByIndex ¶
func (pr *ParquetReader) ReadColumnByIndex(index int64, num int64) (values []interface{}, rls []int32, dls []int32, err error)
ReadColumnByIndex reads column by index. The index of first column is 0.
func (*ParquetReader) ReadColumnByPath ¶
func (pr *ParquetReader) ReadColumnByPath(pathStr string, num int64) (values []interface{}, rls []int32, dls []int32, err error)
ReadColumnByPath reads column by path in schema.
func (*ParquetReader) ReadFooter ¶
func (pr *ParquetReader) ReadFooter() error
Read footer from parquet file
func (*ParquetReader) ReadPartial ¶
func (pr *ParquetReader) ReadPartial(dstInterface interface{}, prefixPath string) error
Read rows of parquet file and unmarshal all to dst
func (*ParquetReader) ReadPartialByNumber ¶
func (pr *ParquetReader) ReadPartialByNumber(maxReadNumber int, prefixPath string) ([]interface{}, error)
Read maxReadNumber partial objects
func (*ParquetReader) RenameSchema ¶
func (pr *ParquetReader) RenameSchema()
Rename schema name to inname
func (*ParquetReader) SetSchemaHandlerFromJSON ¶
func (pr *ParquetReader) SetSchemaHandlerFromJSON(jsonSchema string) error
func (*ParquetReader) SkipRows ¶
func (pr *ParquetReader) SkipRows(num int64) error
Skip rows of parquet file
func (*ParquetReader) SkipRowsByIndex ¶
func (pr *ParquetReader) SkipRowsByIndex(index int64, num int64)
func (*ParquetReader) SkipRowsByPath ¶
func (pr *ParquetReader) SkipRowsByPath(pathStr string, num int64) error
Click to show internal directories.
Click to hide internal directories.