loader

package
v0.0.0-...-85e3569 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 17, 2026 License: Apache-2.0 Imports: 19 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func Load

func Load(ctx context.Context, params LoadParams) (int64, error)

Load reads a Parquet file and bulk-loads it into the target database. Data is streamed one row group at a time to keep memory usage steady. Returns the number of rows loaded.

func Save

func Save(ctx context.Context, params SaveParams) (int64, error)

Save executes a SQL query and writes the results to a Parquet file. Returns the number of rows written.

Types

type ClickHouseDriver

type ClickHouseDriver struct{}

ClickHouseDriver implements the Driver interface for ClickHouse.

func (*ClickHouseDriver) ArrowType

func (d *ClickHouseDriver) ArrowType(dt arrow.DataType) (string, error)

ArrowType maps an Arrow data type to a ClickHouse column type string.

func (*ClickHouseDriver) BulkLoad

func (d *ClickHouseDriver) BulkLoad(ctx context.Context, db *sql.DB, params LoadParams, stream *parquetStream) (int64, error)

BulkLoad streams Arrow record batches into a ClickHouse table using batch inserts. The clickhouse-go driver accumulates rows in the prepared statement and sends them as a batch on tx.Commit().

func (*ClickHouseDriver) CreateTable

func (d *ClickHouseDriver) CreateTable(ctx context.Context, db *sql.DB, schema, table string, arrowSchema *arrow.Schema) error

CreateTable creates a table in the database from an Arrow schema.

func (*ClickHouseDriver) DefaultSchema

func (d *ClickHouseDriver) DefaultSchema() string

DefaultSchema returns an empty string; ClickHouse uses databases, not schemas.

func (*ClickHouseDriver) DropTable

func (d *ClickHouseDriver) DropTable(ctx context.Context, db *sql.DB, schema, table string) error

DropTable drops a table if it exists.

func (*ClickHouseDriver) QuoteIdentifier

func (d *ClickHouseDriver) QuoteIdentifier(name string) string

QuoteIdentifier wraps a name in backtick quoting for ClickHouse.

func (*ClickHouseDriver) SQLTypeToArrow

func (d *ClickHouseDriver) SQLTypeToArrow(dbTypeName string) (arrow.DataType, error)

SQLTypeToArrow maps a ClickHouse type name to an Arrow data type.

func (*ClickHouseDriver) TruncateTable

func (d *ClickHouseDriver) TruncateTable(ctx context.Context, db *sql.DB, schema, table string) error

TruncateTable truncates a table.

type Driver

type Driver interface {
	BulkLoad(ctx context.Context, db *sql.DB, params LoadParams, stream *parquetStream) (int64, error)
	CreateTable(ctx context.Context, db *sql.DB, schema, table string, arrowSchema *arrow.Schema) error
	DropTable(ctx context.Context, db *sql.DB, schema, table string) error
	TruncateTable(ctx context.Context, db *sql.DB, schema, table string) error
	ArrowType(dt arrow.DataType) (string, error)
	SQLTypeToArrow(dbTypeName string) (arrow.DataType, error)
	DefaultSchema() string
	QuoteIdentifier(name string) string
}

Driver abstracts database-specific bulk load and DDL operations.

func GetDriver

func GetDriver(name string) (Driver, error)

GetDriver returns the Driver for the given name.

type LoadMode

type LoadMode string

LoadMode controls how data is loaded into the target table.

const (
	ModeAppend          LoadMode = "append"
	ModeTruncateAndLoad LoadMode = "truncate_and_load"
	ModeCreateOrReplace LoadMode = "create_or_replace"
)

type LoadParams

type LoadParams struct {
	FilePath string   // path to the Parquet file
	Table    string   // target table name
	Schema   string   // target schema (default depends on driver)
	Mode     LoadMode // append, truncate_and_load, or create_or_replace
	ConnStr  string   // database connection string
}

LoadParams configures a data load operation.

type MSSQLDriver

type MSSQLDriver struct{}

MSSQLDriver implements the Driver interface for Microsoft SQL Server.

func (*MSSQLDriver) ArrowType

func (d *MSSQLDriver) ArrowType(dt arrow.DataType) (string, error)

ArrowType maps an Arrow data type to a MSSQL column type string.

func (*MSSQLDriver) BulkLoad

func (d *MSSQLDriver) BulkLoad(ctx context.Context, db *sql.DB, params LoadParams, stream *parquetStream) (int64, error)

BulkLoad streams Arrow record batches from the parquetStream into an MSSQL table. Only one row group's worth of data is held in memory at a time.

func (*MSSQLDriver) CreateTable

func (d *MSSQLDriver) CreateTable(ctx context.Context, db *sql.DB, schema, table string, arrowSchema *arrow.Schema) error

CreateTable creates a table in the database from an Arrow schema.

func (*MSSQLDriver) DefaultSchema

func (d *MSSQLDriver) DefaultSchema() string

DefaultSchema returns the default schema for MSSQL.

func (*MSSQLDriver) DropTable

func (d *MSSQLDriver) DropTable(ctx context.Context, db *sql.DB, schema, table string) error

DropTable drops a table if it exists.

func (*MSSQLDriver) QuoteIdentifier

func (d *MSSQLDriver) QuoteIdentifier(name string) string

QuoteIdentifier wraps a name in MSSQL bracket-quoting.

func (*MSSQLDriver) SQLTypeToArrow

func (d *MSSQLDriver) SQLTypeToArrow(dbTypeName string) (arrow.DataType, error)

SQLTypeToArrow maps a database type name to an Arrow data type.

func (*MSSQLDriver) TruncateTable

func (d *MSSQLDriver) TruncateTable(ctx context.Context, db *sql.DB, schema, table string) error

TruncateTable truncates a table.

type OracleDriver

type OracleDriver struct{}

OracleDriver implements the Driver interface for Oracle Database.

func (*OracleDriver) ArrowType

func (d *OracleDriver) ArrowType(dt arrow.DataType) (string, error)

ArrowType maps an Arrow data type to an Oracle column type string.

func (*OracleDriver) BulkLoad

func (d *OracleDriver) BulkLoad(ctx context.Context, db *sql.DB, params LoadParams, stream *parquetStream) (int64, error)

BulkLoad streams Arrow record batches into an Oracle table using prepared statements with Oracle bind variables (:1, :2, ...) within a transaction.

func (*OracleDriver) CreateTable

func (d *OracleDriver) CreateTable(ctx context.Context, db *sql.DB, schema, table string, arrowSchema *arrow.Schema) error

CreateTable creates a table in the database from an Arrow schema.

func (*OracleDriver) DefaultSchema

func (d *OracleDriver) DefaultSchema() string

DefaultSchema returns an empty string; Oracle derives the schema from the connection user.

func (*OracleDriver) DropTable

func (d *OracleDriver) DropTable(ctx context.Context, db *sql.DB, schema, table string) error

DropTable drops a table if it exists using PL/SQL to suppress ORA-00942.

func (*OracleDriver) QuoteIdentifier

func (d *OracleDriver) QuoteIdentifier(name string) string

QuoteIdentifier wraps a name in double-quote identifiers with upper-casing for Oracle.

func (*OracleDriver) SQLTypeToArrow

func (d *OracleDriver) SQLTypeToArrow(dbTypeName string) (arrow.DataType, error)

SQLTypeToArrow maps an Oracle database type name to an Arrow data type.

func (*OracleDriver) TruncateTable

func (d *OracleDriver) TruncateTable(ctx context.Context, db *sql.DB, schema, table string) error

TruncateTable truncates a table.

type PostgresDriver

type PostgresDriver struct{}

PostgresDriver implements the Driver interface for PostgreSQL.

func (*PostgresDriver) ArrowType

func (d *PostgresDriver) ArrowType(dt arrow.DataType) (string, error)

ArrowType maps an Arrow data type to a PostgreSQL column type string.

func (*PostgresDriver) BulkLoad

func (d *PostgresDriver) BulkLoad(ctx context.Context, db *sql.DB, params LoadParams, stream *parquetStream) (int64, error)

BulkLoad streams Arrow record batches into a PostgreSQL table using pgx COPY protocol. It opens a separate pgx native connection for the COPY operation (the db *sql.DB param is used by the shared Load() caller for DDL but is not needed here).

func (*PostgresDriver) CreateTable

func (d *PostgresDriver) CreateTable(ctx context.Context, db *sql.DB, schema, table string, arrowSchema *arrow.Schema) error

CreateTable creates a table in the database from an Arrow schema.

func (*PostgresDriver) DefaultSchema

func (d *PostgresDriver) DefaultSchema() string

DefaultSchema returns the default schema for PostgreSQL.

func (*PostgresDriver) DropTable

func (d *PostgresDriver) DropTable(ctx context.Context, db *sql.DB, schema, table string) error

DropTable drops a table if it exists.

func (*PostgresDriver) QuoteIdentifier

func (d *PostgresDriver) QuoteIdentifier(name string) string

QuoteIdentifier wraps a name in double-quote identifiers for PostgreSQL.

func (*PostgresDriver) SQLTypeToArrow

func (d *PostgresDriver) SQLTypeToArrow(dbTypeName string) (arrow.DataType, error)

SQLTypeToArrow maps a PostgreSQL type name to an Arrow data type.

func (*PostgresDriver) TruncateTable

func (d *PostgresDriver) TruncateTable(ctx context.Context, db *sql.DB, schema, table string) error

TruncateTable truncates a table.

type SaveParams

type SaveParams struct {
	Query    string // SQL SELECT query
	FilePath string // output Parquet file path
	ConnStr  string // database connection string
}

SaveParams configures a query-to-Parquet save operation.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL