Documentation
¶
Index ¶
- Constants
- Variables
- func GeneProductFromCsvFlag() []cli.Flag
- func GeneProductUpdaterFlags() []cli.Flag
- func GeneUpdaterFlags() []cli.Flag
- func LoadCSVToArangodb(cltx *cli.Context) error
- func LoadCSVToArangodbFlag() []cli.Flag
- func LoadFeatureAnnotationFlag() []cli.Flag
- func LoadGeneProduct(c *cli.Context) error
- func LoadGeneProductFlag() []cli.Flag
- func RunFeatureAnnotationLoader(cltx *cli.Context) error
- func RunGeneProductUpdater(cltx *cli.Context) error
- func RunGeneUpdater(cltx *cli.Context) error
- func RunSynonymLoader(cltx *cli.Context) error
- func SynonymLoaderFlags() []cli.Flag
- type AppConfig
- type ArangoProperty
- type ArangoResultDoc
- type BatchGeneProductJob
- type BatchGeneProductResult
- type FeatureAnnotationAppConfig
- type FeatureAnnotationMetrics
- type FileContext
- type Gene
- type GeneInfo
- type GeneProduct
- type GeneProductAppConfig
- type GeneProductMetrics
- type GeneProductResult
- type GeneWithPubmed
- type GrpcAnnotationResult
- type GrpcSynonymResult
- type GrpcUpdateResult
- type LegacyTime
- type PipelineResult
- type ProcessSingleRecordParams
- type ProcessedGeneData
- type ProcessedGeneProduct
- type ProcessingContext
- type ProcessingMetrics
- type SetupConfig
- type StrippedProperty
- type SubmitBatchAndLogParams
- type SynonymAppConfig
- type SynonymData
- type SynonymMetrics
Constants ¶
const ( // GeneProductQuery is the AQL query for fetching gene products GeneProductQuery = `` /* 350-byte string literal not displayed */ ListActiveGenesQ = `` /* 591-byte string literal not displayed */ ListPubmedsByFeature = `` /* 184-byte string literal not displayed */ ListSynonyms = `` /* 1457-byte string literal not displayed */ )
const DefaultAQLQuery = `` /* 742-byte string literal not displayed */
DefaultAQLQuery is the default query to fetch gene data from ArangoDB. Exported for use in flag.go
const (
// DefaultUserName is the default creator/updater for annotations
DefaultUserName = "dcr@dictycr.org"
)
const (
GeneProductTag = "gene product"
)
Constants for gene product processing
Variables ¶
var AnnMap = map[string]string{
"CGM_DDB_PASC": "pgaudet@northwestern.edu",
"CGM_DDB_PFEY": "pfey@northwestern.edu",
"CGM_DDB_BOBD": "robert-dodson@northwestern.edu",
"CGM_DDB_KPIL": "kpilchar@northwestern.edu",
"CGM_DDB": "dictybase@northwestern.edu",
}
AnnMap maps legacy creator usernames to their email addresses.
Functions ¶
func GeneProductFromCsvFlag ¶
func GeneProductFromCsvFlag() []cli.Flag
func GeneProductUpdaterFlags ¶
func GeneProductUpdaterFlags() []cli.Flag
GeneProductUpdaterFlags returns flags for gene product updater
func GeneUpdaterFlags ¶
func GeneUpdaterFlags() []cli.Flag
GeneUpdaterFlags returns all flags required for the gene updater command.
func LoadCSVToArangodb ¶
func LoadCSVToArangodb(cltx *cli.Context) error
func LoadCSVToArangodbFlag ¶
func LoadCSVToArangodbFlag() []cli.Flag
LoadCSVToArangodbFlag returns all flags required for loading CSV data to ArangoDB
func LoadFeatureAnnotationFlag ¶
func LoadFeatureAnnotationFlag() []cli.Flag
LoadFeatureAnnotationFlag returns all flags required for loading feature annotations
func LoadGeneProduct ¶
func LoadGeneProduct(c *cli.Context) error
func LoadGeneProductFlag ¶
func LoadGeneProductFlag() []cli.Flag
func RunFeatureAnnotationLoader ¶
func RunFeatureAnnotationLoader(cltx *cli.Context) error
func RunGeneProductUpdater ¶
func RunGeneProductUpdater(cltx *cli.Context) error
RunGeneProductUpdater is the main entry point for the gene product updater
func RunGeneUpdater ¶
func RunGeneUpdater(cltx *cli.Context) error
func RunSynonymLoader ¶
func RunSynonymLoader(cltx *cli.Context) error
RunSynonymLoader is the main entry point for the synonym loader.
func SynonymLoaderFlags ¶
func SynonymLoaderFlags() []cli.Flag
SynonymLoaderFlags returns all flags required for the synonym loader command.
Types ¶
type AppConfig ¶
type AppConfig struct { AQLQuery string ArangoUser string // For authorship in gRPC updates NumProcessingWorkers int NumGrpcWorkers int Logger *logrus.Entry Metrics *ProcessingMetrics // Add this field }
AppConfig holds all configuration for the application.
type ArangoProperty ¶
ArangoProperty represents a single property object from ArangoDB.
type ArangoResultDoc ¶
type ArangoResultDoc struct { ID string `json:"id"` // This is dbx.accession, likely the feature_id Props []ArangoProperty `json:"props"` }
ArangoResultDoc represents the structure of a document from ArangoDB.
type BatchGeneProductJob ¶
type BatchGeneProductJob struct {
GeneProducts []ProcessedGeneProduct
}
BatchGeneProductJob holds a slice of gene products for batch processing
type BatchGeneProductResult ¶
type BatchGeneProductResult struct { GeneID string Success bool Message string Error error ProcessedCount int SkippedCount int }
BatchGeneProductResult holds the result of batch gene product processing
type FeatureAnnotationMetrics ¶
type FeatureAnnotationMetrics struct { TotalProcessed int64 SuccessCount int64 ErrorCount int64 StartTime time.Time TotalFetchedFromArango int64 AllArangoDocsFetched bool JobsSubmittedToPubmedPool int64 JobsCompletedFromPubmedPool int64 JobsSubmittedToGrpcPool int64 JobsCompletedFromGrpcPool int64 // contains filtered or unexported fields }
func (*FeatureAnnotationMetrics) IsComplete ¶
func (m *FeatureAnnotationMetrics) IsComplete() bool
type FileContext ¶
type FileContext struct { Setup SetupConfig File *os.File Reader *csv.Reader Error error // To propagate errors }
Stage 2: File Processing
type GeneInfo ¶
type GeneInfo struct { Name string `json:"name"` GeneID string `json:"gene_id"` FeatureID int64 `json:"feature_id"` CreatedBy string `json:"created_by"` }
GeneInfo holds gene information from ArangoDB
type GeneProduct ¶
type GeneProductAppConfig ¶
type GeneProductAppConfig struct { Ctx context.Context LegacyDatabase string NumLegacyWorkers int NumGrpcWorkers int Logger *logrus.Entry Metrics *GeneProductMetrics }
GeneProductAppConfig holds configuration
type GeneProductMetrics ¶
type GeneProductMetrics struct { TotalProcessed int64 SuccessCount int64 ErrorCount int64 SkippedCount int64 StartTime time.Time TotalFetchedFromArango int64 AllArangoDocsFetched bool JobsSubmittedToLegacyPool int64 JobsCompletedFromLegacyPool int64 JobsSubmittedToGrpcPool int64 JobsCompletedFromGrpcPool int64 // contains filtered or unexported fields }
GeneProductMetrics holds processing metrics
func (*GeneProductMetrics) IsComplete ¶
func (m *GeneProductMetrics) IsComplete() bool
IsComplete checks if all processing is finished
type GeneProductResult ¶
type GeneProductResult struct { GeneProduct string `json:"gene_product"` CreatedBy string `json:"created_by"` CreatedOn LegacyTime `json:"created_on"` }
GeneProductResult holds gene product query result
type GeneWithPubmed ¶
type GrpcAnnotationResult ¶
type GrpcSynonymResult ¶
GrpcSynonymResult holds the result of a gRPC update operation for synonyms.
type GrpcUpdateResult ¶
GrpcUpdateResult holds the result of a gRPC update operation.
type LegacyTime ¶
LegacyTime handles Oracle date format "DD-MON-YY" from legacy database
func (*LegacyTime) UnmarshalJSON ¶
func (lt *LegacyTime) UnmarshalJSON(data []byte) error
UnmarshalJSON implements json.Unmarshaler for Oracle date format
type PipelineResult ¶
type PipelineResult struct { File *os.File Setup SetupConfig UpdateCount int Error error }
Stage 4: Data Processing / Final Pipeline Result
type ProcessSingleRecordParams ¶
type ProcessSingleRecordParams struct { Record []string FeaturePropIDIndex int ValueIndex int RowNumForLogging int // Actual row number in the CSV file for logging Logger *logrus.Entry }
ProcessSingleRecordParams holds the parameters for the processSingleRecordAndValidate function.
type ProcessedGeneData ¶
type ProcessedGeneData struct { GeneID string StrippedPropsText []StrippedProperty }
ProcessedGeneData holds the gene ID and its list of HTML-stripped property values.
type ProcessedGeneProduct ¶
type ProcessedGeneProduct struct { GeneID string GeneName string GeneProduct string CreatedBy string CreatedOn time.Time }
ProcessedGeneProduct holds processed gene with product
type ProcessingContext ¶
type ProcessingContext struct { FileContext FeaturePropIDIndex int ValueIndex int }
Stage 3: Header Validation
type ProcessingMetrics ¶
type ProcessingMetrics struct { TotalProcessed int64 SuccessCount int64 ErrorCount int64 StartTime time.Time // TotalFetchedFromArango stores the total number of items fetched by queryArango. // This field is set by queryArango once the total count is known. TotalFetchedFromArango int64 // AllArangoDocsFetched is a flag set to true by queryArango after all documents // have been fetched and sent to the processing pipeline. AllArangoDocsFetched bool // Intermediate tracking counters for detailed pipeline monitoring JobsSubmittedToHTMLPool int64 JobsCompletedFromHTMLPool int64 JobsSubmittedToGrpcPool int64 JobsCompletedFromGrpcPool int64 // contains filtered or unexported fields }
ProcessingMetrics holds counters for tracking progress.
type SetupConfig ¶
type SetupConfig struct { Logger *logrus.Entry DBH *arangomanager.Database CSVFilePath string CollectionName string BatchSize int Delimiter string Workers int }
Stage 1: Setup
type StrippedProperty ¶
StrippedProperty holds the original property name and its stripped text.
type SubmitBatchAndLogParams ¶
type SubmitBatchAndLogParams struct { Setup *SetupConfig Docs []map[string]string Logger *logrus.Entry BatchDescription string }
SubmitBatchAndLogParams holds the parameters for the submitBatchAndLog function.
type SynonymAppConfig ¶
type SynonymAppConfig struct { Ctx context.Context NumGrpcWorkers int Logger *logrus.Entry Metrics *SynonymMetrics }
SynonymAppConfig holds configuration for the synonym loader application.
type SynonymData ¶
type SynonymData struct { Name string `json:"name"` GeneID string `json:"gene_id"` Synonyms []string `json:"synonyms"` }
SynonymData holds gene synonym information from ArangoDB
type SynonymMetrics ¶
type SynonymMetrics struct { TotalProcessed int64 SuccessCount int64 NotFoundCount int64 ErrorCount int64 StartTime time.Time TotalFetchedFromArango int64 AllArangoDocsFetched bool JobsSubmittedToGrpcPool int64 JobsCompletedFromGrpcPool int64 // contains filtered or unexported fields }
SynonymMetrics holds processing metrics for the synonym loader.
func (*SynonymMetrics) IsComplete ¶
func (m *SynonymMetrics) IsComplete() bool
IsComplete checks if all processing is finished.