Documentation
¶
Overview ¶
Package database provides SQLite-backed storage for SRA metadata records including studies, experiments, samples, runs, submissions, and analyses.
Package database provides safe SQL utilities to prevent SQL injection.
Index ¶
- Variables
- func MustColumnName(column string) string
- func MustTableName(table string) string
- func SafeColumnName(column string) (string, error)
- func SafeTableName(table string) (string, error)
- func ValidateColumnName(column string) error
- func ValidateIdentifier(identifier string) error
- func ValidateTableName(table string) error
- type AccessionResult
- type Analysis
- type DB
- func (db *DB) BatchInsertExperiments(experiments []Experiment) error
- func (db *DB) CountSamplePools() (int, error)
- func (db *DB) CountTable(table string) (int64, error)
- func (db *DB) FindRecordsByIdentifier(idValue string) ([]Identifier, error)
- func (db *DB) FullTextSearch(query string) (interface{}, error)
- func (db *DB) GetAnalysis(accession string) (*Analysis, error)
- func (db *DB) GetAveragePoolSize() (float64, error)
- func (db *DB) GetExperiment(accession string) (*Experiment, error)
- func (db *DB) GetIdentifiers(recordType, recordAccession string) ([]Identifier, error)
- func (db *DB) GetInfo() (*DatabaseInfo, error)
- func (db *DB) GetLinks(recordType, recordAccession string) ([]Link, error)
- func (db *DB) GetMaxPoolSize() (int, error)
- func (db *DB) GetRun(accession string) (*Run, error)
- func (db *DB) GetSQLDB() *sql.DB
- func (db *DB) GetSample(accession string) (*Sample, error)
- func (db *DB) GetSamplePools(parentSample string) ([]SamplePool, error)
- func (db *DB) GetStatistics() (map[string]int64, error)
- func (db *DB) GetStats() (*DatabaseStats, error)
- func (db *DB) GetStudiesBatch(offset, limit int) ([]*Study, error)
- func (db *DB) GetStudy(accession string) (*Study, error)
- func (db *DB) GetSubmission(accession string) (*Submission, error)
- func (db *DB) InitializeStatistics() error
- func (db *DB) InsertAnalysis(analysis *Analysis) error
- func (db *DB) InsertExperiment(exp *Experiment) error
- func (db *DB) InsertIdentifier(identifier *Identifier) error
- func (db *DB) InsertLink(link *Link) error
- func (db *DB) InsertRun(run *Run) error
- func (db *DB) InsertSample(sample *Sample) error
- func (db *DB) InsertSamplePool(pool *SamplePool) error
- func (db *DB) InsertStudy(study *Study) error
- func (db *DB) InsertSubmission(submission *Submission) error
- func (db *DB) Ping() error
- func (db *DB) Query(query string, args ...interface{}) (*sql.Rows, error)
- func (db *DB) QueryRow(query string, args ...interface{}) *sql.Row
- func (db *DB) ScanExperiment(scanner interface{}, exp *Experiment) error
- func (db *DB) ScanRun(scanner interface{}, run *Run) error
- func (db *DB) ScanSample(scanner interface{}, sample *Sample) error
- func (db *DB) ScanStudy(scanner interface{}, study *Study) error
- func (db *DB) SearchByLibraryStrategy(strategy string, limit int) ([]Experiment, error)
- func (db *DB) SearchByOrganism(organism string, limit int) ([]Sample, error)
- func (db *DB) UpdateStatistics() error
- type DatabaseInfo
- type DatabaseStats
- type Experiment
- type FTS5Manager
- func (f *FTS5Manager) CreateFTSTables() error
- func (f *FTS5Manager) GetFTSStats() (map[string]int64, error)
- func (f *FTS5Manager) OptimizeFTSTables() error
- func (f *FTS5Manager) SearchAccessions(query string, limit int) ([]AccessionResult, error)
- func (f *FTS5Manager) SearchRuns(query string, limit int) ([]RunResult, error)
- func (f *FTS5Manager) SearchSamples(query string, limit int) ([]SampleResult, error)
- type Identifier
- type Link
- type Run
- type RunResult
- type Sample
- type SamplePool
- type SampleResult
- type Study
- type Submission
Constants ¶
This section is empty.
Variables ¶
var AllowedColumns = map[string]bool{ "study_accession": true, "experiment_accession": true, "sample_accession": true, "run_accession": true, "submission_accession": true, "analysis_accession": true, "title": true, "abstract": true, "description": true, "organism": true, "scientific_name": true, "taxon_id": true, "platform": true, "instrument_model": true, "library_strategy": true, "library_source": true, "library_selection": true, "library_layout": true, "created_at": true, "updated_at": true, "submission_date": true, "first_public": true, "last_update": true, "table_name": true, "row_count": true, }
AllowedColumns is the whitelist of valid column names. This is used for dynamic column selection in queries.
var AllowedTables = map[string]bool{ "studies": true, "experiments": true, "samples": true, "runs": true, "submissions": true, "analyses": true, "sample_pool": true, "identifiers": true, "links": true, "experiment_samples": true, "fts_accessions": true, "fts_samples": true, "fts_runs": true, "statistics": true, "sync_status": true, "progress": true, "index_progress": true, }
AllowedTables is the whitelist of valid table names in SRAKE database. Any table name not in this list will be rejected to prevent SQL injection.
var ErrInvalidColumnName = fmt.Errorf("invalid column name")
ErrInvalidColumnName is returned when a column name is not in the whitelist.
var ErrInvalidTableName = fmt.Errorf("invalid table name")
ErrInvalidTableName is returned when a table name is not in the whitelist.
Functions ¶
func MustColumnName ¶
MustColumnName returns the column name if valid, panics otherwise. Use this only for hardcoded column names that are known to be valid.
func MustTableName ¶
MustTableName returns the table name if valid, panics otherwise. Use this only for hardcoded table names that are known to be valid.
func SafeColumnName ¶
SafeColumnName returns the column name if valid, otherwise returns an error. Use this when you need the column name for SQL construction.
func SafeTableName ¶
SafeTableName returns the table name if valid, otherwise returns an error. Use this when you need the table name for SQL construction.
func ValidateColumnName ¶
ValidateColumnName checks if a column name is in the allowed list. Returns nil if valid, ErrInvalidColumnName otherwise.
func ValidateIdentifier ¶
ValidateIdentifier checks if a string is a valid SQL identifier format. This is a fallback for dynamic identifiers not in the whitelists. Valid format: starts with letter or underscore, followed by alphanumeric or underscore.
func ValidateTableName ¶
ValidateTableName checks if a table name is in the allowed list. Returns nil if valid, ErrInvalidTableName otherwise.
Types ¶
type AccessionResult ¶
type AccessionResult struct {
Accession string
Type string
Title string
Metadata string
Score float64
}
AccessionResult holds a single accession match from an FTS5 search, including its BM25 relevance score.
type Analysis ¶
type Analysis struct {
AnalysisAccession string `json:"analysis_accession"`
Alias string `json:"alias"`
CenterName string `json:"center_name"`
BrokerName string `json:"broker_name"`
AnalysisCenter string `json:"analysis_center"`
AnalysisDate *time.Time `json:"analysis_date"`
StudyAccession string `json:"study_accession"`
Title string `json:"title"`
Description string `json:"description"`
AnalysisType string `json:"analysis_type"`
// Analysis-specific fields
Targets string `json:"targets"` // JSON array of target SRA objects
DataBlocks string `json:"data_blocks"` // JSON array of data blocks
AssemblyRef string `json:"assembly_ref"` // JSON object for assembly reference
RunLabels string `json:"run_labels"` // JSON array for run label mappings
SeqLabels string `json:"seq_labels"` // JSON array for sequence label mappings
Processing string `json:"processing"` // JSON object for pipeline info
// Links and attributes
AnalysisLinks string `json:"analysis_links"` // JSON array
AnalysisAttributes string `json:"analysis_attributes"` // JSON array
Metadata string `json:"metadata"` // JSON
}
Analysis represents an analysis record with comprehensive fields
type DB ¶
DB wraps the SQL database connection
func Initialize ¶
Initialize creates and configures the database connection
func (*DB) BatchInsertExperiments ¶
func (db *DB) BatchInsertExperiments(experiments []Experiment) error
BatchInsertExperiments inserts multiple experiments in a single transaction for performance.
func (*DB) CountSamplePools ¶
CountSamplePools counts total number of pool relationships
func (*DB) CountTable ¶
CountTable counts rows in a table. The table name is validated against the AllowedTables whitelist to prevent SQL injection attacks.
func (*DB) FindRecordsByIdentifier ¶
func (db *DB) FindRecordsByIdentifier(idValue string) ([]Identifier, error)
FindRecordsByIdentifier finds records with a specific identifier value
func (*DB) FullTextSearch ¶
FullTextSearch performs a LIKE-based text search across studies and experiments, returning results from both tables ranked by relevance.
func (*DB) GetAnalysis ¶
GetAnalysis retrieves an analysis by its accession identifier. Returns an error if the analysis is not found.
func (*DB) GetAveragePoolSize ¶
GetAveragePoolSize returns the average pool size
func (*DB) GetExperiment ¶
func (db *DB) GetExperiment(accession string) (*Experiment, error)
GetExperiment retrieves an experiment by its accession identifier. Returns an error if the experiment is not found.
func (*DB) GetIdentifiers ¶
func (db *DB) GetIdentifiers(recordType, recordAccession string) ([]Identifier, error)
GetIdentifiers retrieves identifiers for a record
func (*DB) GetInfo ¶
func (db *DB) GetInfo() (*DatabaseInfo, error)
GetInfo returns database information
func (*DB) GetMaxPoolSize ¶
GetMaxPoolSize returns the maximum pool size
func (*DB) GetRun ¶
GetRun retrieves a run by its accession identifier. Returns an error if the run is not found.
func (*DB) GetSample ¶
GetSample retrieves a sample by its accession identifier. Returns an error if the sample is not found.
func (*DB) GetSamplePools ¶
func (db *DB) GetSamplePools(parentSample string) ([]SamplePool, error)
GetSamplePools retrieves pool relationships for a parent sample
func (*DB) GetStatistics ¶
GetStatistics retrieves cached statistics from the statistics table
func (*DB) GetStats ¶
func (db *DB) GetStats() (*DatabaseStats, error)
GetStats returns live row counts for all core SRA tables.
func (*DB) GetStudiesBatch ¶
GetStudiesBatch retrieves a batch of studies with pagination
func (*DB) GetStudy ¶
GetStudy retrieves a study by its accession identifier. Returns an error if the study is not found.
func (*DB) GetSubmission ¶
func (db *DB) GetSubmission(accession string) (*Submission, error)
GetSubmission retrieves a submission by its accession identifier. Returns an error if the submission is not found.
func (*DB) InitializeStatistics ¶
InitializeStatistics ensures the statistics table exists but does NOT populate it Population happens only via UpdateStatistics() after ingestion
func (*DB) InsertAnalysis ¶
InsertAnalysis inserts or replaces an analysis record in the database.
func (*DB) InsertExperiment ¶
func (db *DB) InsertExperiment(exp *Experiment) error
InsertExperiment inserts or replaces an experiment record in the database.
func (*DB) InsertIdentifier ¶
func (db *DB) InsertIdentifier(identifier *Identifier) error
InsertIdentifier inserts a structured identifier
func (*DB) InsertLink ¶
InsertLink inserts a structured link
func (*DB) InsertSample ¶
InsertSample inserts or replaces a sample record in the database.
func (*DB) InsertSamplePool ¶
func (db *DB) InsertSamplePool(pool *SamplePool) error
InsertSamplePool inserts a pool relationship
func (*DB) InsertStudy ¶
InsertStudy inserts or replaces a study record in the database.
func (*DB) InsertSubmission ¶
func (db *DB) InsertSubmission(submission *Submission) error
InsertSubmission inserts or replaces a submission record in the database.
func (*DB) ScanExperiment ¶
func (db *DB) ScanExperiment(scanner interface{}, exp *Experiment) error
ScanExperiment scans a row into an Experiment struct
func (*DB) ScanSample ¶
ScanSample scans a row into a Sample struct
func (*DB) SearchByLibraryStrategy ¶
func (db *DB) SearchByLibraryStrategy(strategy string, limit int) ([]Experiment, error)
SearchByLibraryStrategy returns experiments matching the given library strategy (e.g., RNA-Seq, WGS).
func (*DB) SearchByOrganism ¶
SearchByOrganism returns samples matching the given organism name or scientific name.
func (*DB) UpdateStatistics ¶
UpdateStatistics recalculates and updates the statistics table This should be called only after batch operations complete
type DatabaseInfo ¶
DatabaseInfo holds database file size and cached table row counts.
type DatabaseStats ¶
type DatabaseStats struct {
TotalStudies int `json:"total_studies"`
TotalExperiments int `json:"total_experiments"`
TotalSamples int `json:"total_samples"`
TotalRuns int `json:"total_runs"`
LastUpdate time.Time `json:"last_update"`
}
DatabaseStats holds aggregate counts for all core SRA tables.
type Experiment ¶
type Experiment struct {
// Primary key
ExperimentAccession string `json:"experiment_accession"`
// NameGroup attributes
Alias string `json:"alias"`
CenterName string `json:"center_name"`
BrokerName string `json:"broker_name"`
// References
StudyAccession string `json:"study_accession"`
SampleAccession string `json:"sample_accession"`
// Core fields
Title string `json:"title"`
DesignDescription string `json:"design_description"`
// Library information
LibraryName string `json:"library_name"`
LibraryStrategy string `json:"library_strategy"`
LibrarySource string `json:"library_source"`
LibrarySelection string `json:"library_selection"`
LibraryLayout string `json:"library_layout"` // 'SINGLE' or 'PAIRED'
LibraryConstructionProtocol string `json:"library_construction_protocol"`
// Paired-end specific
NominalLength int `json:"nominal_length"`
NominalSdev float64 `json:"nominal_sdev"`
// Platform information
Platform string `json:"platform"`
InstrumentModel string `json:"instrument_model"`
// Targeted sequencing
TargetedLoci string `json:"targeted_loci"` // JSON array
// Pooling information
PoolMemberCount int `json:"pool_member_count"`
PoolInfo string `json:"pool_info"` // JSON object
// Links and attributes
ExperimentLinks string `json:"experiment_links"` // JSON array
ExperimentAttributes string `json:"experiment_attributes"` // JSON array
// Spot descriptor
SpotLength int `json:"spot_length"`
SpotDecodeSpec string `json:"spot_decode_spec"` // JSON object
// Full metadata
Metadata string `json:"metadata"` // JSON
}
Experiment represents a comprehensive SRA experiment record
type FTS5Manager ¶
type FTS5Manager struct {
// contains filtered or unexported fields
}
FTS5Manager manages SQLite FTS5 tables for fast text search
func NewFTS5Manager ¶
func NewFTS5Manager(db *DB) *FTS5Manager
NewFTS5Manager creates a new FTS5 manager
func (*FTS5Manager) CreateFTSTables ¶
func (f *FTS5Manager) CreateFTSTables() error
CreateFTSTables creates FTS5 tables for tier 3 search (samples and runs)
func (*FTS5Manager) GetFTSStats ¶
func (f *FTS5Manager) GetFTSStats() (map[string]int64, error)
GetFTSStats returns statistics about FTS5 tables
func (*FTS5Manager) OptimizeFTSTables ¶
func (f *FTS5Manager) OptimizeFTSTables() error
OptimizeFTSTables optimizes FTS5 tables for better performance
func (*FTS5Manager) SearchAccessions ¶
func (f *FTS5Manager) SearchAccessions(query string, limit int) ([]AccessionResult, error)
SearchAccessions searches for accessions using FTS5
func (*FTS5Manager) SearchRuns ¶
func (f *FTS5Manager) SearchRuns(query string, limit int) ([]RunResult, error)
SearchRuns searches runs using FTS5
func (*FTS5Manager) SearchSamples ¶
func (f *FTS5Manager) SearchSamples(query string, limit int) ([]SampleResult, error)
SearchSamples searches samples using FTS5
type Identifier ¶
type Identifier struct {
RecordType string `json:"record_type"`
RecordAccession string `json:"record_accession"`
IDType string `json:"id_type"`
IDNamespace string `json:"id_namespace"`
IDValue string `json:"id_value"`
IDLabel string `json:"id_label"`
}
Identifier represents a structured identifier
type Link ¶
type Link struct {
RecordType string `json:"record_type"`
RecordAccession string `json:"record_accession"`
LinkType string `json:"link_type"`
DB string `json:"db"`
ID string `json:"id"`
Label string `json:"label"`
URL string `json:"url"`
}
Link represents a structured link
type Run ¶
type Run struct {
// Primary key
RunAccession string `json:"run_accession"`
// NameGroup attributes
Alias string `json:"alias"`
CenterName string `json:"center_name"`
BrokerName string `json:"broker_name"`
RunCenter string `json:"run_center"`
// References
ExperimentAccession string `json:"experiment_accession"`
// Core fields
Title string `json:"title"`
RunDate *time.Time `json:"run_date"`
// Statistics
TotalSpots int64 `json:"total_spots"`
TotalBases int64 `json:"total_bases"`
TotalSize int64 `json:"total_size"`
LoadDone bool `json:"load_done"`
Published string `json:"published"`
// File information
DataFiles string `json:"data_files"` // JSON array
// Links and attributes
RunLinks string `json:"run_links"` // JSON array
RunAttributes string `json:"run_attributes"` // JSON array
// Quality metrics
QualityScoreMean float64 `json:"quality_score_mean"`
QualityScoreStd float64 `json:"quality_score_std"`
ReadCountR1 int64 `json:"read_count_r1"`
ReadCountR2 int64 `json:"read_count_r2"`
// Full metadata
Metadata string `json:"metadata"` // JSON
}
Run represents a comprehensive SRA run record
type RunResult ¶
type RunResult struct {
RunAccession string
ExperimentAccession string
TotalSpots string
TotalBases string
Score float64
}
RunResult holds a single run match from an FTS5 search, including its BM25 relevance score.
type Sample ¶
type Sample struct {
// Primary key
SampleAccession string `json:"sample_accession"`
// NameGroup attributes
Alias string `json:"alias"`
CenterName string `json:"center_name"`
BrokerName string `json:"broker_name"`
// Core fields
Title string `json:"title"`
Description string `json:"description"`
// Taxonomy
TaxonID int `json:"taxon_id"`
ScientificName string `json:"scientific_name"`
CommonName string `json:"common_name"`
Organism string `json:"organism"`
// Sample source information
Tissue string `json:"tissue"`
CellType string `json:"cell_type"`
CellLine string `json:"cell_line"`
Strain string `json:"strain"`
Sex string `json:"sex"`
Age string `json:"age"`
Disease string `json:"disease"`
Treatment string `json:"treatment"`
// Geographic/environmental
GeoLocName string `json:"geo_loc_name"`
LatLon string `json:"lat_lon"`
CollectionDate string `json:"collection_date"`
EnvBiome string `json:"env_biome"`
EnvFeature string `json:"env_feature"`
EnvMaterial string `json:"env_material"`
// Links and attributes
SampleLinks string `json:"sample_links"` // JSON array
SampleAttributes string `json:"sample_attributes"` // JSON array
// BioSample/BioProject references
BiosampleAccession string `json:"biosample_accession"`
BioprojectAccession string `json:"bioproject_accession"`
// Full metadata
Metadata string `json:"metadata"` // JSON
}
Sample represents a comprehensive SRA sample record
type SamplePool ¶
type SamplePool struct {
PoolID int `json:"pool_id"`
ParentSample string `json:"parent_sample"`
MemberSample string `json:"member_sample"`
MemberName string `json:"member_name"`
Proportion float64 `json:"proportion"`
ReadLabel string `json:"read_label"`
}
SamplePool represents a pool/multiplex relationship
type SampleResult ¶
type SampleResult struct {
SampleAccession string
Description string
Organism string
ScientificName string
Score float64
}
SampleResult holds a single sample match from an FTS5 search, including its BM25 relevance score.
type Study ¶
type Study struct {
// Primary key
StudyAccession string `json:"study_accession"`
// NameGroup attributes
Alias string `json:"alias"`
CenterName string `json:"center_name"`
BrokerName string `json:"broker_name"`
// Core fields
StudyTitle string `json:"study_title"`
StudyType string `json:"study_type"`
StudyAbstract string `json:"study_abstract"`
StudyDescription string `json:"study_description"`
CenterProjectName string `json:"center_project_name"`
// Dates
SubmissionDate *time.Time `json:"submission_date"`
FirstPublic *time.Time `json:"first_public"`
LastUpdate *time.Time `json:"last_update"`
// Identifiers (JSON)
PrimaryID string `json:"primary_id"`
SecondaryIDs string `json:"secondary_ids"` // JSON array
ExternalIDs string `json:"external_ids"` // JSON array
SubmitterIDs string `json:"submitter_ids"` // JSON array
// Links and attributes (JSON)
StudyLinks string `json:"study_links"` // JSON array
StudyAttributes string `json:"study_attributes"` // JSON array
RelatedStudies string `json:"related_studies"` // JSON array
// Extracted organism
Organism string `json:"organism"`
// Full metadata
Metadata string `json:"metadata"` // JSON
}
Study represents a comprehensive SRA study record
type Submission ¶
type Submission struct {
SubmissionAccession string `json:"submission_accession"`
Alias string `json:"alias"`
CenterName string `json:"center_name"`
BrokerName string `json:"broker_name"`
LabName string `json:"lab_name"`
Title string `json:"title"`
SubmissionDate *time.Time `json:"submission_date"`
SubmissionComment string `json:"submission_comment"`
Contacts string `json:"contacts"` // JSON array of contacts
Actions string `json:"actions"` // JSON array of actions
SubmissionLinks string `json:"submission_links"` // JSON array
SubmissionAttributes string `json:"submission_attributes"` // JSON array
Metadata string `json:"metadata"` // JSON
}
Submission represents a submission record with enhanced fields