models

package
v0.0.0-...-732dc05 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 15, 2025 License: MIT Imports: 20 Imported by: 0

Documentation

Index

Constants

View Source
const (
	FirstNameType = "first_name"
	LastNameType  = "last_name"
	PhoneType     = "phone"
	TextType      = "text"
)
View Source
const (
	DefaultOutputDir  = "output"
	DefaultOutputType = "csv"

	ParquetDateTimeMillisFormat = "millis"
	ParquetDateTimeMicrosFormat = "micros"
)
View Source
const (
	DefaultWorkersPerCPU = 4
)

Variables

View Source
var DiskFilesOutputTypes = []string{"csv", "parquet"} // output types that actually create files on disk
View Source
var OutputTypes = []string{"csv", "devnull", "http", "tcs", "parquet"}

Functions

func DecodeFile

func DecodeFile(path string, v any) error

func DecodeReader

func DecodeReader(format string, r io.Reader, v any) error

func FieldFillDefaults

func FieldFillDefaults(field Field)

func FieldParse

func FieldParse(field Field) error

func FieldValidate

func FieldValidate(field Field) []error

Types

type AppConfig

type AppConfig struct {
	LogFormat  string     `json:"log_format" yaml:"log_format"`
	HTTPConfig HTTPConfig `json:"http"       yaml:"http"`
	OpenAI     OpenAI     `json:"open_ai"    yaml:"open_ai"`
}

AppConfig type is used to describe application config.

func (*AppConfig) FillDefaults

func (m *AppConfig) FillDefaults()

func (*AppConfig) ParseFromFile

func (m *AppConfig) ParseFromFile(path string) error

func (*AppConfig) PostProcess

func (m *AppConfig) PostProcess() error

func (*AppConfig) Validate

func (m *AppConfig) Validate() []error

type CSVConfig

type CSVConfig struct {
	FloatPrecision int    `json:"float_precision" yaml:"float_precision"`
	DatetimeFormat string `json:"datetime_format" yaml:"datetime_format"`
	Delimiter      string `backup:"true"          json:"delimiter"       yaml:"delimiter"`
	WithoutHeaders bool   `backup:"true"          json:"without_headers" yaml:"without_headers"`
}

CSVConfig type used to describe output config for CSV implementation.

func (*CSVConfig) FillDefaults

func (c *CSVConfig) FillDefaults()

func (*CSVConfig) Parse

func (c *CSVConfig) Parse() error

func (*CSVConfig) Validate

func (c *CSVConfig) Validate() []error

type Column

type Column struct {
	Name             string               `backup:"true"  json:"name"              yaml:"name"`
	Type             string               `backup:"true"  json:"type"              yaml:"type"`
	Params           *Params              `json:",inline" yaml:",inline"` // it moved to Ranges after parsing
	Ranges           []*Params            `backup:"true"  json:"ranges"            yaml:"ranges"`
	ForeignKey       string               `backup:"true"  json:"foreign_key"       yaml:"foreign_key"`
	ForeignKeyColumn *Column              `json:"-"       yaml:"-"`
	ForeignKeyOrder  bool                 `backup:"true"  json:"foreign_key_order" yaml:"foreign_key_order"`
	ParquetParams    *ColumnParquetParams `backup:"true"  json:"parquet"           yaml:"parquet"`
}

Column type is used to describe column of one structure.

func (*Column) FillDefaults

func (c *Column) FillDefaults()

func (*Column) Parse

func (c *Column) Parse() error

func (*Column) String

func (c *Column) String() string

func (*Column) Validate

func (c *Column) Validate() []error

type ColumnDateTimeParams

type ColumnDateTimeParams struct {
	From time.Time `backup:"true" json:"from" yaml:"from"`
	To   time.Time `backup:"true" json:"to"   yaml:"to"`
}

ColumnDateTimeParams type is used to describe params for DateTime fields.

func (*ColumnDateTimeParams) FillDefaults

func (p *ColumnDateTimeParams) FillDefaults()

func (*ColumnDateTimeParams) Parse

func (p *ColumnDateTimeParams) Parse() error

func (*ColumnDateTimeParams) Validate

func (p *ColumnDateTimeParams) Validate() []error

type ColumnFloatParams

type ColumnFloatParams struct {
	BitWidth int      `backup:"true" json:"bit_width" yaml:"bit_width"`
	FromPtr  *float64 `backup:"true" json:"from"      yaml:"from"`
	From     float64  `json:"-"      yaml:"-"`
	ToPtr    *float64 `backup:"true" json:"to"        yaml:"to"`
	To       float64  `json:"-"      yaml:"-"`
}

ColumnFloatParams type is used to describe params for float fields.

func (*ColumnFloatParams) FillDefaults

func (p *ColumnFloatParams) FillDefaults()

func (*ColumnFloatParams) Parse

func (p *ColumnFloatParams) Parse() error

func (*ColumnFloatParams) Validate

func (p *ColumnFloatParams) Validate() []error

type ColumnIntegerParams

type ColumnIntegerParams struct {
	BitWidth int    `backup:"true" json:"bit_width" yaml:"bit_width"`
	FromPtr  *int64 `backup:"true" json:"from"      yaml:"from"`
	From     int64  `json:"-"      yaml:"-"`
	ToPtr    *int64 `backup:"true" json:"to"        yaml:"to"`
	To       int64  `json:"-"      yaml:"-"`
}

ColumnIntegerParams type is used to describe params for integer fields.

func (*ColumnIntegerParams) FillDefaults

func (p *ColumnIntegerParams) FillDefaults()

func (*ColumnIntegerParams) Parse

func (p *ColumnIntegerParams) Parse() error

func (*ColumnIntegerParams) Validate

func (p *ColumnIntegerParams) Validate() []error

type ColumnParquetParams

type ColumnParquetParams struct {
	Encoding string `backup:"true" json:"encoding" yaml:"encoding"`
}

ColumnParquetParams type is used to describe params for parquet fields.

func (*ColumnParquetParams) FillDefaults

func (p *ColumnParquetParams) FillDefaults()

func (*ColumnParquetParams) Parse

func (p *ColumnParquetParams) Parse() error

func (*ColumnParquetParams) Validate

func (p *ColumnParquetParams) Validate() []error

type ColumnStringParams

type ColumnStringParams struct {
	MinLength           int    `backup:"true" json:"min_length"            yaml:"min_length"`
	MaxLength           int    `backup:"true" json:"max_length"            yaml:"max_length"`
	Locale              string `backup:"true" json:"locale"                yaml:"locale"`
	LogicalType         string `backup:"true" json:"logical_type"          yaml:"logical_type"`
	Template            string `backup:"true" json:"template"              yaml:"template"`
	WithoutLargeLetters bool   `backup:"true" json:"without_large_letters" yaml:"without_large_letters"`
	WithoutSmallLetters bool   `backup:"true" json:"without_small_letters" yaml:"without_small_letters"`
	WithoutNumbers      bool   `backup:"true" json:"without_numbers"       yaml:"without_numbers"`
	WithoutSpecialChars bool   `backup:"true" json:"without_special_chars" yaml:"without_special_chars"`
}

ColumnStringParams type is used to describe params for string fields.

func (*ColumnStringParams) FillDefaults

func (p *ColumnStringParams) FillDefaults()

func (*ColumnStringParams) Parse

func (p *ColumnStringParams) Parse() error

func (*ColumnStringParams) Validate

func (p *ColumnStringParams) Validate() []error

type DataRow

type DataRow struct {
	Values []any
}

DataRow type is used to represent any data row that was generated.

type DevNullConfig

type DevNullConfig struct {
	Handler func(row *DataRow, modelName string) error `json:"-" yaml:"-"`
}

DevNullConfig type used to describe output config for devnull implementation.

func (*DevNullConfig) FillDefaults

func (c *DevNullConfig) FillDefaults()

func (*DevNullConfig) Parse

func (c *DevNullConfig) Parse() error

func (*DevNullConfig) Validate

func (c *DevNullConfig) Validate() []error

type Field

type Field interface {
	// Parse function should parse all fields with "any" type
	Parse() error
	// FillDefaults function should fill all default values
	FillDefaults()
	// Validate function should validate all values and return all list of all occurred errors
	Validate() []error
}

Field interface is used to summarize model field methods.

type GenerationConfig

type GenerationConfig struct {
	WorkersCount int    `json:"workers_count" yaml:"workers_count"`
	BatchSize    uint64 `json:"batch_size"    yaml:"batch_size"`
	//nolint:lll
	RandomSeed     uint64            `backup:"true"           json:"random_seed"      yaml:"random_seed"` // only for backup
	RealRandomSeed uint64            `json:"-"                yaml:"-"`
	OutputConfig   *OutputConfig     `backup:"true"           json:"output"           yaml:"output"`
	Models         map[string]*Model `backup:"true"           json:"models"           yaml:"models"`
	ModelsToIgnore []string          `json:"models_to_ignore" yaml:"models_to_ignore"`
}

GenerationConfig type is used to describe config and model of generated data.

func (*GenerationConfig) FillDefaults

func (gc *GenerationConfig) FillDefaults()

func (*GenerationConfig) Parse

func (gc *GenerationConfig) Parse() error

func (*GenerationConfig) ParseFromFile

func (gc *GenerationConfig) ParseFromFile(path string) error

func (*GenerationConfig) ParseFromJSON

func (gc *GenerationConfig) ParseFromJSON(data []byte) error

func (*GenerationConfig) ParseFromYAML

func (gc *GenerationConfig) ParseFromYAML(data []byte) error

func (*GenerationConfig) PostProcess

func (gc *GenerationConfig) PostProcess() error

func (*GenerationConfig) Validate

func (gc *GenerationConfig) Validate() []error

type HTTPConfig

type HTTPConfig struct {
	ListenAddress string        `json:"listen_address" yaml:"listen_address"`
	ReadTimeout   time.Duration `json:"read_timeout"   yaml:"read_timeout"`
	WriteTimeout  time.Duration `json:"write_timeout"  yaml:"write_timeout"`
	IdleTimeout   time.Duration `json:"idle_timeout"   yaml:"idle_timeout"`
}

HTTPConfig type used to describe delivery config for http implementation.

func (*HTTPConfig) FillDefaults

func (c *HTTPConfig) FillDefaults()

func (*HTTPConfig) Validate

func (c *HTTPConfig) Validate() []error

type HTTPParams

type HTTPParams struct {
	Endpoint       string            `json:"endpoint"        yaml:"endpoint"`
	Timeout        time.Duration     `json:"timeout"         yaml:"timeout"`
	BatchSize      int               `json:"batch_size"      yaml:"batch_size"`
	WorkersCount   int               `json:"workers_count"   yaml:"workers_count"`
	Headers        map[string]string `json:"headers"         yaml:"headers"`
	FormatTemplate string            `json:"format_template" yaml:"format_template"`
}

func (*HTTPParams) FillDefaults

func (c *HTTPParams) FillDefaults()

func (*HTTPParams) Parse

func (c *HTTPParams) Parse() error

func (*HTTPParams) Validate

func (c *HTTPParams) Validate() []error

type Model

type Model struct {
	Name          string
	RowsCount     uint64  `backup:"true"      json:"rows_count"    yaml:"rows_count"`
	GenerateFrom  uint64  `backup:"true"      json:"generate_from" yaml:"generate_from"`
	GenerateToPtr *uint64 `json:"generate_to" yaml:"generate_to"`
	GenerateTo    uint64  `json:"-"           yaml:"-"`
	RowsPerFile   uint64  `backup:"true"      json:"rows_per_file" yaml:"rows_per_file"`
	ModelDir      string  `backup:"true"      json:"model_dir"     yaml:"model_dir"`
	// The columns from the partitioning key with PartitionColumn.WriteToOutput == false, must be at the end of slice.
	Columns          []*Column          `backup:"true" json:"columns"           yaml:"columns"`
	PartitionColumns []*PartitionColumn `backup:"true" json:"partition_columns" yaml:"partition_columns"`
}

Model type is used to describe model of generated data.

func (*Model) FillDefaults

func (m *Model) FillDefaults()

func (*Model) Parse

func (m *Model) Parse() error

func (*Model) Validate

func (m *Model) Validate() []error

type OpenAI

type OpenAI struct {
	APIKey  string `json:"api_key"  yaml:"api_key"`
	BaseURL string `json:"base_url" yaml:"base_url"`
	Model   string `json:"model"    yaml:"model"`
}

OpenAI type used to describe OpenAI config.

type OutputConfig

type OutputConfig struct {
	Type               string         `backup:"true"              json:"type"                yaml:"type"`
	Dir                string         `backup:"true"              json:"dir"                 yaml:"dir"`
	CheckpointInterval time.Duration  `json:"checkpoint_interval" yaml:"checkpoint_interval"`
	CreateModelDir     bool           `backup:"true"              json:"create_model_dir"    yaml:"create_model_dir"`
	Params             any            `backup:"true"              json:"params"              yaml:"params"`
	DevNullParams      *DevNullConfig `json:"-"                   yaml:"-"`
	CSVParams          *CSVConfig     `json:"-"                   yaml:"-"`
	HTTPParams         *HTTPParams    `json:"-"                   yaml:"-"`
	TCSParams          *TCSConfig     `json:"-"                   yaml:"-"`
	ParquetParams      *ParquetConfig `json:"-"                   yaml:"-"`
}

func (*OutputConfig) FillDefaults

func (c *OutputConfig) FillDefaults()

func (*OutputConfig) Parse

func (c *OutputConfig) Parse() error

func (*OutputConfig) Validate

func (c *OutputConfig) Validate() []error

type Params

type Params struct {
	ColumnType string `json:"-" yaml:"-"`
	//nolint:lll
	TypeParams         any                   `backup:"true" json:"type_params"         yaml:"type_params"` // only for config parsing
	IntegerParams      *ColumnIntegerParams  `json:"-"      yaml:"-"`
	FloatParams        *ColumnFloatParams    `json:"-"      yaml:"-"`
	StringParams       *ColumnStringParams   `json:"-"      yaml:"-"`
	DateTimeParams     *ColumnDateTimeParams `json:"-"      yaml:"-"`
	Values             []any                 `backup:"true" json:"values"              yaml:"values"`
	NullPercentage     float64               `backup:"true" json:"null_percentage"     yaml:"null_percentage"`
	DistinctPercentage float64               `backup:"true" json:"distinct_percentage" yaml:"distinct_percentage"`
	DistinctCount      uint64                `backup:"true" json:"distinct_count"      yaml:"distinct_count"`
	RangePercentage    float64               `backup:"true" json:"range_percentage"    yaml:"range_percentage"`
	Ordered            bool                  `backup:"true" json:"ordered"             yaml:"ordered"`
}

func (*Params) FillDefaults

func (p *Params) FillDefaults()

func (*Params) Parse

func (p *Params) Parse() error

func (*Params) PostProcess

func (p *Params) PostProcess() error

func (*Params) Validate

func (p *Params) Validate() []error

type ParquetConfig

type ParquetConfig struct {
	CompressionCodec string `backup:"true"          json:"compression_codec" yaml:"compression_codec"`
	FloatPrecision   int    `json:"float_precision" yaml:"float_precision"`
	DateTimeFormat   string `json:"datetime_format" yaml:"datetime_format"`
}

ParquetConfig type used to describe output config for parquet implementation.

func (*ParquetConfig) FillDefaults

func (c *ParquetConfig) FillDefaults()

func (*ParquetConfig) Parse

func (c *ParquetConfig) Parse() error

func (*ParquetConfig) Validate

func (c *ParquetConfig) Validate() []error

type PartitionColumn

type PartitionColumn struct {
	Name          string `backup:"true" json:"name"            yaml:"name"`
	WriteToOutput bool   `backup:"true" json:"write_to_output" yaml:"write_to_output"`
}

PartitionColumn type is used to describe partition parameters for column.

func (*PartitionColumn) FillDefaults

func (pc *PartitionColumn) FillDefaults()

func (*PartitionColumn) Validate

func (pc *PartitionColumn) Validate() []error

type TCSConfig

type TCSConfig struct {
	HTTPParams `json:",inline" yaml:",inline"`
}

TCSConfig type used to describe output config for TCS implementation.

func (*TCSConfig) FillDefaults

func (c *TCSConfig) FillDefaults()

func (*TCSConfig) Parse

func (c *TCSConfig) Parse() error

func (*TCSConfig) Validate

func (c *TCSConfig) Validate() []error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL