cli

package
v0.0.0-...-cfe9fea Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 13, 2025 License: BSD-2-Clause Imports: 26 Imported by: 0

Documentation

Index

Constants

View Source
const (

	// GeneProductQuery is the AQL query for fetching gene products
	GeneProductQuery = `` /* 350-byte string literal not displayed */

	ListActiveGenesQ = `` /* 591-byte string literal not displayed */

	ListPubmedsByFeature = `` /* 184-byte string literal not displayed */

	ListSynonyms = `` /* 1457-byte string literal not displayed */

)
View Source
const DefaultAQLQuery = `` /* 742-byte string literal not displayed */

DefaultAQLQuery is the default query to fetch gene data from ArangoDB. Exported for use in flag.go

View Source
const (
	// DefaultUserName is the default creator/updater for annotations
	DefaultUserName = "dcr@dictycr.org"
)
View Source
const (
	GeneProductTag = "gene product"
)

Constants for gene product processing

Variables

View Source
var AnnMap = map[string]string{
	"CGM_DDB_PASC": "pgaudet@northwestern.edu",
	"CGM_DDB_PFEY": "pfey@northwestern.edu",
	"CGM_DDB_BOBD": "robert-dodson@northwestern.edu",
	"CGM_DDB_KPIL": "kpilchar@northwestern.edu",
	"CGM_DDB":      "dictybase@northwestern.edu",
}

AnnMap maps legacy creator usernames to their email addresses.

Functions

func GeneProductFromCsvFlag

func GeneProductFromCsvFlag() []cli.Flag

func GeneProductUpdaterFlags

func GeneProductUpdaterFlags() []cli.Flag

GeneProductUpdaterFlags returns flags for gene product updater

func GeneUpdaterFlags

func GeneUpdaterFlags() []cli.Flag

GeneUpdaterFlags returns all flags required for the gene updater command.

func LoadCSVToArangodb

func LoadCSVToArangodb(cltx *cli.Context) error

func LoadCSVToArangodbFlag

func LoadCSVToArangodbFlag() []cli.Flag

LoadCSVToArangodbFlag returns all flags required for loading CSV data to ArangoDB

func LoadFeatureAnnotationFlag

func LoadFeatureAnnotationFlag() []cli.Flag

LoadFeatureAnnotationFlag returns all flags required for loading feature annotations

func LoadGeneProduct

func LoadGeneProduct(c *cli.Context) error

func LoadGeneProductFlag

func LoadGeneProductFlag() []cli.Flag

func RunFeatureAnnotationLoader

func RunFeatureAnnotationLoader(cltx *cli.Context) error

func RunGeneProductUpdater

func RunGeneProductUpdater(cltx *cli.Context) error

RunGeneProductUpdater is the main entry point for the gene product updater

func RunGeneUpdater

func RunGeneUpdater(cltx *cli.Context) error

func RunSynonymLoader

func RunSynonymLoader(cltx *cli.Context) error

RunSynonymLoader is the main entry point for the synonym loader.

func SynonymLoaderFlags

func SynonymLoaderFlags() []cli.Flag

SynonymLoaderFlags returns all flags required for the synonym loader command.

Types

type AppConfig

type AppConfig struct {
	AQLQuery             string
	ArangoUser           string // For authorship in gRPC updates
	NumProcessingWorkers int
	NumGrpcWorkers       int
	Logger               *logrus.Entry
	Metrics              *ProcessingMetrics // Add this field
}

AppConfig holds all configuration for the application.

type ArangoProperty

type ArangoProperty struct {
	Name  string `json:"name"`
	Value string `json:"value"`
}

ArangoProperty represents a single property object from ArangoDB.

type ArangoResultDoc

type ArangoResultDoc struct {
	ID    string           `json:"id"` // This is dbx.accession, likely the feature_id
	Props []ArangoProperty `json:"props"`
}

ArangoResultDoc represents the structure of a document from ArangoDB.

type BatchGeneProductJob

type BatchGeneProductJob struct {
	GeneProducts []ProcessedGeneProduct
}

BatchGeneProductJob holds a slice of gene products for batch processing

type BatchGeneProductResult

type BatchGeneProductResult struct {
	GeneID         string
	Success        bool
	Message        string
	Error          error
	ProcessedCount int
	SkippedCount   int
}

BatchGeneProductResult holds the result of batch gene product processing

type FeatureAnnotationAppConfig

type FeatureAnnotationAppConfig struct {
	Ctx              context.Context
	NumPubmedWorkers int
	NumGrpcWorkers   int
	Logger           *logrus.Entry
	Metrics          *FeatureAnnotationMetrics
}

type FeatureAnnotationMetrics

type FeatureAnnotationMetrics struct {
	TotalProcessed int64
	SuccessCount   int64
	ErrorCount     int64
	StartTime      time.Time

	TotalFetchedFromArango      int64
	AllArangoDocsFetched        bool
	JobsSubmittedToPubmedPool   int64
	JobsCompletedFromPubmedPool int64
	JobsSubmittedToGrpcPool     int64
	JobsCompletedFromGrpcPool   int64
	// contains filtered or unexported fields
}

func (*FeatureAnnotationMetrics) IsComplete

func (m *FeatureAnnotationMetrics) IsComplete() bool

type FileContext

type FileContext struct {
	Setup  SetupConfig
	File   *os.File
	Reader *csv.Reader
	Error  error // To propagate errors
}

Stage 2: File Processing

type Gene

type Gene struct {
	FeatureID int    `json:"feature_id"`
	GeneID    string `json:"gene_id"`
	Name      string `json:"name"`
	CreatedBy string `json:"created_by"`
}

type GeneInfo

type GeneInfo struct {
	Name      string `json:"name"`
	GeneID    string `json:"gene_id"`
	FeatureID int64  `json:"feature_id"`
	CreatedBy string `json:"created_by"`
}

GeneInfo holds gene information from ArangoDB

type GeneProduct

type GeneProduct struct {
	GeneID  string
	Product string
}

type GeneProductAppConfig

type GeneProductAppConfig struct {
	Ctx              context.Context
	LegacyDatabase   string
	NumLegacyWorkers int
	NumGrpcWorkers   int
	Logger           *logrus.Entry
	Metrics          *GeneProductMetrics
}

GeneProductAppConfig holds configuration

type GeneProductMetrics

type GeneProductMetrics struct {
	TotalProcessed int64
	SuccessCount   int64
	ErrorCount     int64
	SkippedCount   int64
	StartTime      time.Time

	TotalFetchedFromArango      int64
	AllArangoDocsFetched        bool
	JobsSubmittedToLegacyPool   int64
	JobsCompletedFromLegacyPool int64
	JobsSubmittedToGrpcPool     int64
	JobsCompletedFromGrpcPool   int64
	// contains filtered or unexported fields
}

GeneProductMetrics holds processing metrics

func (*GeneProductMetrics) IsComplete

func (m *GeneProductMetrics) IsComplete() bool

IsComplete checks if all processing is finished

type GeneProductResult

type GeneProductResult struct {
	GeneProduct string     `json:"gene_product"`
	CreatedBy   string     `json:"created_by"`
	CreatedOn   LegacyTime `json:"created_on"`
}

GeneProductResult holds gene product query result

type GeneWithPubmed

type GeneWithPubmed struct {
	Gene
	Pubmeds    []string
	Skip       bool
	SkipReason string
}

type GrpcAnnotationResult

type GrpcAnnotationResult struct {
	GeneID  string
	Success bool
	Skipped bool
	Message string
	Error   error
}

type GrpcSynonymResult

type GrpcSynonymResult struct {
	GeneID  string
	Success bool
	Message string
	Error   error
}

GrpcSynonymResult holds the result of a gRPC update operation for synonyms.

type GrpcUpdateResult

type GrpcUpdateResult struct {
	GeneID  string
	Success bool
	Message string
	Error   error
}

GrpcUpdateResult holds the result of a gRPC update operation.

type LegacyTime

type LegacyTime struct {
	time.Time
}

LegacyTime handles Oracle date format "DD-MON-YY" from legacy database

func (*LegacyTime) UnmarshalJSON

func (lt *LegacyTime) UnmarshalJSON(data []byte) error

UnmarshalJSON implements json.Unmarshaler for Oracle date format

type PipelineResult

type PipelineResult struct {
	File        *os.File
	Setup       SetupConfig
	UpdateCount int
	Error       error
}

Stage 4: Data Processing / Final Pipeline Result

type ProcessSingleRecordParams

type ProcessSingleRecordParams struct {
	Record             []string
	FeaturePropIDIndex int
	ValueIndex         int
	RowNumForLogging   int // Actual row number in the CSV file for logging
	Logger             *logrus.Entry
}

ProcessSingleRecordParams holds the parameters for the processSingleRecordAndValidate function.

type ProcessedGeneData

type ProcessedGeneData struct {
	GeneID            string
	StrippedPropsText []StrippedProperty
}

ProcessedGeneData holds the gene ID and its list of HTML-stripped property values.

type ProcessedGeneProduct

type ProcessedGeneProduct struct {
	GeneID      string
	GeneName    string
	GeneProduct string
	CreatedBy   string
	CreatedOn   time.Time
}

ProcessedGeneProduct holds processed gene with product

type ProcessingContext

type ProcessingContext struct {
	FileContext
	FeaturePropIDIndex int
	ValueIndex         int
}

Stage 3: Header Validation

type ProcessingMetrics

type ProcessingMetrics struct {
	TotalProcessed int64
	SuccessCount   int64
	ErrorCount     int64
	StartTime      time.Time

	// TotalFetchedFromArango stores the total number of items fetched by queryArango.
	// This field is set by queryArango once the total count is known.
	TotalFetchedFromArango int64
	// AllArangoDocsFetched is a flag set to true by queryArango after all documents
	// have been fetched and sent to the processing pipeline.
	AllArangoDocsFetched bool
	// Intermediate tracking counters for detailed pipeline monitoring
	JobsSubmittedToHTMLPool   int64
	JobsCompletedFromHTMLPool int64
	JobsSubmittedToGrpcPool   int64
	JobsCompletedFromGrpcPool int64
	// contains filtered or unexported fields
}

ProcessingMetrics holds counters for tracking progress.

type SetupConfig

type SetupConfig struct {
	Logger         *logrus.Entry
	DBH            *arangomanager.Database
	CSVFilePath    string
	CollectionName string
	BatchSize      int
	Delimiter      string
	Workers        int
}

Stage 1: Setup

type StrippedProperty

type StrippedProperty struct {
	OriginalName string
	StrippedText string
}

StrippedProperty holds the original property name and its stripped text.

type SubmitBatchAndLogParams

type SubmitBatchAndLogParams struct {
	Setup            *SetupConfig
	Docs             []map[string]string
	Logger           *logrus.Entry
	BatchDescription string
}

SubmitBatchAndLogParams holds the parameters for the submitBatchAndLog function.

type SynonymAppConfig

type SynonymAppConfig struct {
	Ctx            context.Context
	NumGrpcWorkers int
	Logger         *logrus.Entry
	Metrics        *SynonymMetrics
}

SynonymAppConfig holds configuration for the synonym loader application.

type SynonymData

type SynonymData struct {
	Name     string   `json:"name"`
	GeneID   string   `json:"gene_id"`
	Synonyms []string `json:"synonyms"`
}

SynonymData holds gene synonym information from ArangoDB

type SynonymMetrics

type SynonymMetrics struct {
	TotalProcessed int64
	SuccessCount   int64
	NotFoundCount  int64
	ErrorCount     int64
	StartTime      time.Time

	TotalFetchedFromArango    int64
	AllArangoDocsFetched      bool
	JobsSubmittedToGrpcPool   int64
	JobsCompletedFromGrpcPool int64
	// contains filtered or unexported fields
}

SynonymMetrics holds processing metrics for the synonym loader.

func (*SynonymMetrics) IsComplete

func (m *SynonymMetrics) IsComplete() bool

IsComplete checks if all processing is finished.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL