Documentation
¶
Overview ¶
Package vcfgo implements a Reader and Writer for variant call format. It eases reading, filtering modifying VCF's even if they are not to spec. Example:
f, _ := os.Open("examples/test.auto_dom.no_parents.vcf")
rdr, err := vcfgo.NewReader(f)
if err != nil {
panic(err)
}
for {
variant := rdr.Read()
if variant == nil {
break
}
fmt.Printf("%s\t%d\t%s\t%s\n", variant.Chromosome, variant.Pos, variant.Ref, variant.Alt)
fmt.Printf("%s", variant.Info["DP"].(int) > 10)
sample := variant.Samples[0]
// we can get the PL field as a list (-1 is default in case of missing value)
fmt.Println("%s", variant.GetGenotypeField(sample, "PL", -1))
_ = sample.DP
}
fmt.Fprintln(os.Stderr, rdr.Error())
Example ¶
package main
import (
"fmt"
"os"
"github.com/brentp/vcfgo"
)
func main() {
f, _ := os.Open("examples/test.auto_dom.no_parents.vcf")
rdr, err := vcfgo.NewReader(f, false)
if err != nil {
panic(err)
}
for {
variant := rdr.Read()
if variant == nil {
break
}
fmt.Printf("%s\t%d\t%s\t%s\n", variant.Chromosome, variant.Pos, variant.Ref(), variant.Alt())
dp, _ := variant.Info().Get("DP")
fmt.Printf("%v", dp.(int) > 10)
Index ¶
- Constants
- func ItoS(k string, v interface{}) string
- func ParseHeaderContig(contig string) (map[string]string, error)
- func ParseHeaderExtraKV(kv string) ([]string, error)
- func ParseHeaderFileVersion(format string) (string, error)
- func ParseHeaderFilter(info string) ([]string, error)
- func ParseHeaderSample(line string) (string, error)
- func ParseOne(key, val, itype string) (interface{}, error)
- func ParseSampleLine(line string) ([]string, error)
- type Header
- type Info
- type InfoByte
- func (i *InfoByte) Add(key string, value interface{})
- func (i InfoByte) Bytes() []byte
- func (i InfoByte) Contains(key string) bool
- func (i *InfoByte) Delete(key string)
- func (i InfoByte) Get(key string) (interface{}, error)
- func (i InfoByte) Keys() []string
- func (i InfoByte) SGet(key string) []byte
- func (i *InfoByte) Set(key string, value interface{}) error
- func (i InfoByte) String() string
- func (i *InfoByte) UpdateHeader(key string, value interface{})
- type Reader
- func (vr *Reader) AddFormatToHeader(id string, num string, stype string, desc string)
- func (vr *Reader) AddInfoToHeader(id string, num string, stype string, desc string)
- func (vr *Reader) Clear()
- func (vr *Reader) Close() error
- func (vr *Reader) Error() error
- func (vr *Reader) GetHeaderType(field string) string
- func (vr *Reader) Parse(fields [][]byte) *Variant
- func (vr *Reader) Read() *Variant
- type SampleFormat
- type SampleGenotype
- type VCFError
- type Variant
- func (v *Variant) Alt() []string
- func (v *Variant) CIEnd() (uint32, uint32, bool)
- func (v *Variant) CIPos() (uint32, uint32, bool)
- func (v *Variant) Chrom() string
- func (v *Variant) End() uint32
- func (v *Variant) GetGenotypeField(g *SampleGenotype, field string, missing interface{}) (interface{}, error)
- func (v *Variant) Id() string
- func (v *Variant) Info() interfaces.Info
- func (v *Variant) Ref() string
- func (v *Variant) Start() uint32
- func (v *Variant) String() string
- type Writer
Examples ¶
Constants ¶
const MISSING_VAL = 256
used for the quality score which is 0 to 255, but allows "."
Variables ¶
This section is empty.
Functions ¶
func ParseHeaderExtraKV ¶
func ParseHeaderFileVersion ¶
func ParseHeaderFilter ¶
func ParseHeaderSample ¶
return just the sample id.
func ParseSampleLine ¶
Types ¶
type Header ¶
type Header struct {
sync.RWMutex
SampleNames []string
Infos map[string]*Info
SampleFormats map[string]*SampleFormat
Filters map[string]string
Extras []string
FileFormat string
// Contigs is a list of maps of length, URL, etc.
Contigs []map[string]string
// ##SAMPLE
Samples map[string]string
Pedigrees []string
}
Header holds all the type and format information for the variants.
func NewHeader ¶
func NewHeader() *Header
NewHeader returns a Header with the requisite allocations.
func (*Header) ParseSample ¶
func (h *Header) ParseSample(format []string, s string) (*SampleGenotype, []error)
func (*Header) ParseSamples ¶
Force parsing of the sample fields.
type Info ¶
type Info struct {
Id string
Description string
Number string // A G R . ”
Type string // STRING INTEGER FLOAT FLAG CHARACTER UNKONWN
}
Info holds the Info and Format fields
func ParseHeaderInfo ¶
type InfoByte ¶
type InfoByte struct {
Info []byte
// contains filtered or unexported fields
}
func NewInfoByte ¶
func (*InfoByte) UpdateHeader ¶
type Reader ¶
Reader holds information about the current line number (for errors) and The VCF header that indicates the structure of records.
func NewReader ¶
NewReader returns a Reader. If lazySamples is true, then the user will have to call Reader.ParseSamples() in order to access simple info.
func (*Reader) AddFormatToHeader ¶
AddFormatToHeader adds a FORMAT field to the header.
func (*Reader) AddInfoToHeader ¶
AddInfoToHeader adds a INFO field to the header.
func (*Reader) GetHeaderType ¶
type SampleFormat ¶
type SampleFormat Info
SampleFormat holds the type info for Format fields.
func ParseHeaderFormat ¶
func ParseHeaderFormat(info string) (*SampleFormat, error)
func (*SampleFormat) String ¶
func (i *SampleFormat) String() string
String returns a string representation.
type SampleGenotype ¶
type SampleGenotype struct {
Phased bool
GT []int
DP int
GL []float64
GQ int
MQ int
Fields map[string]string
}
SampleGenotype holds the information about a sample. Several fields are pre-parsed, but all fields are kept in Fields as well.
func NewSampleGenotype ¶
func NewSampleGenotype() *SampleGenotype
NewSampleGenotype allocates the internals and returns a *SampleGenotype
func (*SampleGenotype) AltDepths ¶
func (s *SampleGenotype) AltDepths() ([]int, error)
AltDepths returns the depths of the alternates for this sample
func (*SampleGenotype) RefDepth ¶
func (s *SampleGenotype) RefDepth() (int, error)
RefDepth returns the depths of the alternates for this sample
func (*SampleGenotype) String ¶
func (sg *SampleGenotype) String(fields []string) string
String returns the string representation of the sample field.
type VCFError ¶
VCFError satisfies the error interface and allows multiple errors. This is useful because, for example, on a single line, every sample may have a field that doesn't match the description in the header. We want to keep parsing but also let the caller know about the error.
func (*VCFError) Add ¶
Add adds an error and the line number within the vcf where the error took place.
type Variant ¶
type Variant struct {
Chromosome string
Pos uint64
Id_ string
Reference string
Alternate []string
Quality float32
Filter string
Info_ interfaces.Info
Format []string
Samples []*SampleGenotype
Header *Header
LineNumber int64
// contains filtered or unexported fields
}
Variant holds the information about a single site. It is analagous to a row in a VCF file.
func NewVariant ¶
func (*Variant) CIEnd ¶
CIEnd reports the Left and Right end of an SV using the CIEND tag. It is in bed format so the end is +1'ed. E.g. If there is no CIEND, the return value is v.End() - 1, v.End()
func (*Variant) CIPos ¶
CIPos reports the Left and Right end of an SV using the CIPOS tag. It is in bed format so the end is +1'ed. E.g. If there is not CIPOS, the return value is v.Start(), v.Start() + 1
func (*Variant) GetGenotypeField ¶
func (v *Variant) GetGenotypeField(g *SampleGenotype, field string, missing interface{}) (interface{}, error)
GetGenotypeField uses the information from the header to parse the correct time from a genotype field. It returns an interface that can be asserted to the expected type.
func (*Variant) Info ¶
func (v *Variant) Info() interfaces.Info