Documentation
¶
Index ¶
- Constants
- Variables
- func DecodeFile(path string, v any) error
- func DecodeReader(format string, r io.Reader, v any) error
- func FieldFillDefaults(field Field)
- func FieldParse(field Field) error
- func FieldValidate(field Field) []error
- type AppConfig
- type CSVConfig
- type Column
- type ColumnDateTimeParams
- type ColumnFloatParams
- type ColumnIntegerParams
- type ColumnParquetParams
- type ColumnStringParams
- type DataRow
- type DevNullConfig
- type Field
- type GenerationConfig
- func (gc *GenerationConfig) FillDefaults()
- func (gc *GenerationConfig) Parse() error
- func (gc *GenerationConfig) ParseFromFile(path string) error
- func (gc *GenerationConfig) ParseFromJSON(data []byte) error
- func (gc *GenerationConfig) ParseFromYAML(data []byte) error
- func (gc *GenerationConfig) PostProcess() error
- func (gc *GenerationConfig) Validate() []error
- type HTTPConfig
- type HTTPParams
- type Model
- type OpenAI
- type OutputConfig
- type Params
- type ParquetConfig
- type PartitionColumn
- type TCSConfig
Constants ¶
const ( FirstNameType = "first_name" LastNameType = "last_name" PhoneType = "phone" TextType = "text" )
const ( DefaultOutputDir = "output" DefaultOutputType = "csv" ParquetDateTimeMillisFormat = "millis" ParquetDateTimeMicrosFormat = "micros" )
const (
DefaultWorkersPerCPU = 4
)
Variables ¶
var DiskFilesOutputTypes = []string{"csv", "parquet"} // output types that actually create files on disk
var OutputTypes = []string{"csv", "devnull", "http", "tcs", "parquet"}
Functions ¶
func DecodeFile ¶
func FieldFillDefaults ¶
func FieldFillDefaults(field Field)
func FieldParse ¶
func FieldValidate ¶
Types ¶
type AppConfig ¶
type AppConfig struct {
LogFormat string `json:"log_format" yaml:"log_format"`
HTTPConfig HTTPConfig `json:"http" yaml:"http"`
OpenAI OpenAI `json:"open_ai" yaml:"open_ai"`
}
AppConfig type is used to describe application config.
func (*AppConfig) FillDefaults ¶
func (m *AppConfig) FillDefaults()
func (*AppConfig) ParseFromFile ¶
func (*AppConfig) PostProcess ¶
type CSVConfig ¶
type CSVConfig struct {
FloatPrecision int `json:"float_precision" yaml:"float_precision"`
DatetimeFormat string `json:"datetime_format" yaml:"datetime_format"`
Delimiter string `backup:"true" json:"delimiter" yaml:"delimiter"`
WithoutHeaders bool `backup:"true" json:"without_headers" yaml:"without_headers"`
}
CSVConfig type used to describe output config for CSV implementation.
func (*CSVConfig) FillDefaults ¶
func (c *CSVConfig) FillDefaults()
type Column ¶
type Column struct {
Name string `backup:"true" json:"name" yaml:"name"`
Type string `backup:"true" json:"type" yaml:"type"`
Params *Params `json:",inline" yaml:",inline"` // it moved to Ranges after parsing
Ranges []*Params `backup:"true" json:"ranges" yaml:"ranges"`
ForeignKey string `backup:"true" json:"foreign_key" yaml:"foreign_key"`
ForeignKeyColumn *Column `json:"-" yaml:"-"`
ForeignKeyOrder bool `backup:"true" json:"foreign_key_order" yaml:"foreign_key_order"`
ParquetParams *ColumnParquetParams `backup:"true" json:"parquet" yaml:"parquet"`
}
Column type is used to describe column of one structure.
func (*Column) FillDefaults ¶
func (c *Column) FillDefaults()
type ColumnDateTimeParams ¶
type ColumnDateTimeParams struct {
From time.Time `backup:"true" json:"from" yaml:"from"`
To time.Time `backup:"true" json:"to" yaml:"to"`
}
ColumnDateTimeParams type is used to describe params for DateTime fields.
func (*ColumnDateTimeParams) FillDefaults ¶
func (p *ColumnDateTimeParams) FillDefaults()
func (*ColumnDateTimeParams) Parse ¶
func (p *ColumnDateTimeParams) Parse() error
func (*ColumnDateTimeParams) Validate ¶
func (p *ColumnDateTimeParams) Validate() []error
type ColumnFloatParams ¶
type ColumnFloatParams struct {
BitWidth int `backup:"true" json:"bit_width" yaml:"bit_width"`
FromPtr *float64 `backup:"true" json:"from" yaml:"from"`
From float64 `json:"-" yaml:"-"`
ToPtr *float64 `backup:"true" json:"to" yaml:"to"`
To float64 `json:"-" yaml:"-"`
}
ColumnFloatParams type is used to describe params for float fields.
func (*ColumnFloatParams) FillDefaults ¶
func (p *ColumnFloatParams) FillDefaults()
func (*ColumnFloatParams) Parse ¶
func (p *ColumnFloatParams) Parse() error
func (*ColumnFloatParams) Validate ¶
func (p *ColumnFloatParams) Validate() []error
type ColumnIntegerParams ¶
type ColumnIntegerParams struct {
BitWidth int `backup:"true" json:"bit_width" yaml:"bit_width"`
FromPtr *int64 `backup:"true" json:"from" yaml:"from"`
From int64 `json:"-" yaml:"-"`
ToPtr *int64 `backup:"true" json:"to" yaml:"to"`
To int64 `json:"-" yaml:"-"`
}
ColumnIntegerParams type is used to describe params for integer fields.
func (*ColumnIntegerParams) FillDefaults ¶
func (p *ColumnIntegerParams) FillDefaults()
func (*ColumnIntegerParams) Parse ¶
func (p *ColumnIntegerParams) Parse() error
func (*ColumnIntegerParams) Validate ¶
func (p *ColumnIntegerParams) Validate() []error
type ColumnParquetParams ¶
type ColumnParquetParams struct {
Encoding string `backup:"true" json:"encoding" yaml:"encoding"`
}
ColumnParquetParams type is used to describe params for parquet fields.
func (*ColumnParquetParams) FillDefaults ¶
func (p *ColumnParquetParams) FillDefaults()
func (*ColumnParquetParams) Parse ¶
func (p *ColumnParquetParams) Parse() error
func (*ColumnParquetParams) Validate ¶
func (p *ColumnParquetParams) Validate() []error
type ColumnStringParams ¶
type ColumnStringParams struct {
MinLength int `backup:"true" json:"min_length" yaml:"min_length"`
MaxLength int `backup:"true" json:"max_length" yaml:"max_length"`
Locale string `backup:"true" json:"locale" yaml:"locale"`
LogicalType string `backup:"true" json:"logical_type" yaml:"logical_type"`
Template string `backup:"true" json:"template" yaml:"template"`
WithoutLargeLetters bool `backup:"true" json:"without_large_letters" yaml:"without_large_letters"`
WithoutSmallLetters bool `backup:"true" json:"without_small_letters" yaml:"without_small_letters"`
WithoutNumbers bool `backup:"true" json:"without_numbers" yaml:"without_numbers"`
WithoutSpecialChars bool `backup:"true" json:"without_special_chars" yaml:"without_special_chars"`
}
ColumnStringParams type is used to describe params for string fields.
func (*ColumnStringParams) FillDefaults ¶
func (p *ColumnStringParams) FillDefaults()
func (*ColumnStringParams) Parse ¶
func (p *ColumnStringParams) Parse() error
func (*ColumnStringParams) Validate ¶
func (p *ColumnStringParams) Validate() []error
type DataRow ¶
type DataRow struct {
Values []any
}
DataRow type is used to represent any data row that was generated.
type DevNullConfig ¶
type DevNullConfig struct {
Handler func(row *DataRow, modelName string) error `json:"-" yaml:"-"`
}
DevNullConfig type used to describe output config for devnull implementation.
func (*DevNullConfig) FillDefaults ¶
func (c *DevNullConfig) FillDefaults()
func (*DevNullConfig) Parse ¶
func (c *DevNullConfig) Parse() error
func (*DevNullConfig) Validate ¶
func (c *DevNullConfig) Validate() []error
type Field ¶
type Field interface {
// Parse function should parse all fields with "any" type
Parse() error
// FillDefaults function should fill all default values
FillDefaults()
// Validate function should validate all values and return all list of all occurred errors
Validate() []error
}
Field interface is used to summarize model field methods.
type GenerationConfig ¶
type GenerationConfig struct {
WorkersCount int `json:"workers_count" yaml:"workers_count"`
BatchSize uint64 `json:"batch_size" yaml:"batch_size"`
//nolint:lll
RandomSeed uint64 `backup:"true" json:"random_seed" yaml:"random_seed"` // only for backup
RealRandomSeed uint64 `json:"-" yaml:"-"`
OutputConfig *OutputConfig `backup:"true" json:"output" yaml:"output"`
Models map[string]*Model `backup:"true" json:"models" yaml:"models"`
ModelsToIgnore []string `json:"models_to_ignore" yaml:"models_to_ignore"`
}
GenerationConfig type is used to describe config and model of generated data.
func (*GenerationConfig) FillDefaults ¶
func (gc *GenerationConfig) FillDefaults()
func (*GenerationConfig) Parse ¶
func (gc *GenerationConfig) Parse() error
func (*GenerationConfig) ParseFromFile ¶
func (gc *GenerationConfig) ParseFromFile(path string) error
func (*GenerationConfig) ParseFromJSON ¶
func (gc *GenerationConfig) ParseFromJSON(data []byte) error
func (*GenerationConfig) ParseFromYAML ¶
func (gc *GenerationConfig) ParseFromYAML(data []byte) error
func (*GenerationConfig) PostProcess ¶
func (gc *GenerationConfig) PostProcess() error
func (*GenerationConfig) Validate ¶
func (gc *GenerationConfig) Validate() []error
type HTTPConfig ¶
type HTTPConfig struct {
ListenAddress string `json:"listen_address" yaml:"listen_address"`
ReadTimeout time.Duration `json:"read_timeout" yaml:"read_timeout"`
WriteTimeout time.Duration `json:"write_timeout" yaml:"write_timeout"`
IdleTimeout time.Duration `json:"idle_timeout" yaml:"idle_timeout"`
}
HTTPConfig type used to describe delivery config for http implementation.
func (*HTTPConfig) FillDefaults ¶
func (c *HTTPConfig) FillDefaults()
func (*HTTPConfig) Validate ¶
func (c *HTTPConfig) Validate() []error
type HTTPParams ¶
type HTTPParams struct {
Endpoint string `json:"endpoint" yaml:"endpoint"`
Timeout time.Duration `json:"timeout" yaml:"timeout"`
BatchSize int `json:"batch_size" yaml:"batch_size"`
WorkersCount int `json:"workers_count" yaml:"workers_count"`
Headers map[string]string `json:"headers" yaml:"headers"`
FormatTemplate string `json:"format_template" yaml:"format_template"`
}
func (*HTTPParams) FillDefaults ¶
func (c *HTTPParams) FillDefaults()
func (*HTTPParams) Parse ¶
func (c *HTTPParams) Parse() error
func (*HTTPParams) Validate ¶
func (c *HTTPParams) Validate() []error
type Model ¶
type Model struct {
Name string
RowsCount uint64 `backup:"true" json:"rows_count" yaml:"rows_count"`
GenerateFrom uint64 `backup:"true" json:"generate_from" yaml:"generate_from"`
GenerateToPtr *uint64 `json:"generate_to" yaml:"generate_to"`
GenerateTo uint64 `json:"-" yaml:"-"`
RowsPerFile uint64 `backup:"true" json:"rows_per_file" yaml:"rows_per_file"`
ModelDir string `backup:"true" json:"model_dir" yaml:"model_dir"`
// The columns from the partitioning key with PartitionColumn.WriteToOutput == false, must be at the end of slice.
Columns []*Column `backup:"true" json:"columns" yaml:"columns"`
PartitionColumns []*PartitionColumn `backup:"true" json:"partition_columns" yaml:"partition_columns"`
}
Model type is used to describe model of generated data.
func (*Model) FillDefaults ¶
func (m *Model) FillDefaults()
type OpenAI ¶
type OpenAI struct {
APIKey string `json:"api_key" yaml:"api_key"`
BaseURL string `json:"base_url" yaml:"base_url"`
Model string `json:"model" yaml:"model"`
}
OpenAI type used to describe OpenAI config.
type OutputConfig ¶
type OutputConfig struct {
Type string `backup:"true" json:"type" yaml:"type"`
Dir string `backup:"true" json:"dir" yaml:"dir"`
CheckpointInterval time.Duration `json:"checkpoint_interval" yaml:"checkpoint_interval"`
CreateModelDir bool `backup:"true" json:"create_model_dir" yaml:"create_model_dir"`
Params any `backup:"true" json:"params" yaml:"params"`
DevNullParams *DevNullConfig `json:"-" yaml:"-"`
CSVParams *CSVConfig `json:"-" yaml:"-"`
HTTPParams *HTTPParams `json:"-" yaml:"-"`
TCSParams *TCSConfig `json:"-" yaml:"-"`
ParquetParams *ParquetConfig `json:"-" yaml:"-"`
}
func (*OutputConfig) FillDefaults ¶
func (c *OutputConfig) FillDefaults()
func (*OutputConfig) Parse ¶
func (c *OutputConfig) Parse() error
func (*OutputConfig) Validate ¶
func (c *OutputConfig) Validate() []error
type Params ¶
type Params struct {
ColumnType string `json:"-" yaml:"-"`
//nolint:lll
TypeParams any `backup:"true" json:"type_params" yaml:"type_params"` // only for config parsing
IntegerParams *ColumnIntegerParams `json:"-" yaml:"-"`
FloatParams *ColumnFloatParams `json:"-" yaml:"-"`
StringParams *ColumnStringParams `json:"-" yaml:"-"`
DateTimeParams *ColumnDateTimeParams `json:"-" yaml:"-"`
Values []any `backup:"true" json:"values" yaml:"values"`
NullPercentage float64 `backup:"true" json:"null_percentage" yaml:"null_percentage"`
DistinctPercentage float64 `backup:"true" json:"distinct_percentage" yaml:"distinct_percentage"`
DistinctCount uint64 `backup:"true" json:"distinct_count" yaml:"distinct_count"`
RangePercentage float64 `backup:"true" json:"range_percentage" yaml:"range_percentage"`
Ordered bool `backup:"true" json:"ordered" yaml:"ordered"`
}
func (*Params) FillDefaults ¶
func (p *Params) FillDefaults()
func (*Params) PostProcess ¶
type ParquetConfig ¶
type ParquetConfig struct {
CompressionCodec string `backup:"true" json:"compression_codec" yaml:"compression_codec"`
FloatPrecision int `json:"float_precision" yaml:"float_precision"`
DateTimeFormat string `json:"datetime_format" yaml:"datetime_format"`
}
ParquetConfig type used to describe output config for parquet implementation.
func (*ParquetConfig) FillDefaults ¶
func (c *ParquetConfig) FillDefaults()
func (*ParquetConfig) Parse ¶
func (c *ParquetConfig) Parse() error
func (*ParquetConfig) Validate ¶
func (c *ParquetConfig) Validate() []error
type PartitionColumn ¶
type PartitionColumn struct {
Name string `backup:"true" json:"name" yaml:"name"`
WriteToOutput bool `backup:"true" json:"write_to_output" yaml:"write_to_output"`
}
PartitionColumn type is used to describe partition parameters for column.
func (*PartitionColumn) FillDefaults ¶
func (pc *PartitionColumn) FillDefaults()
func (*PartitionColumn) Validate ¶
func (pc *PartitionColumn) Validate() []error
type TCSConfig ¶
type TCSConfig struct {
HTTPParams `json:",inline" yaml:",inline"`
}
TCSConfig type used to describe output config for TCS implementation.
func (*TCSConfig) FillDefaults ¶
func (c *TCSConfig) FillDefaults()