cmd

package
v2.12.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 4, 2025 License: MIT Imports: 68 Imported by: 0

Documentation

Index

Constants

View Source
const BIG_SLEEP = 100 * time.Millisecond
View Source
const MICRO_SLEEP = time.Millisecond
View Source
const NAP_SLEEP = 10 * time.Millisecond
View Source
const VERSION = "2.12.0"

VERSION of seqkit

Variables

View Source
var IUPACAminoAcids, _ = asciiset.MakeASCIISet("ACDEFGHIKLMNPQRSTVWY")
View Source
var IUPACBases, _ = asciiset.MakeASCIISet("ACGTRYSWKMBDHVNUacgtryswkmbdhvnu")
View Source
var RootCmd = &cobra.Command{
	Use:   "seqkit",
	Short: "a cross-platform and ultrafast toolkit for FASTA/Q file manipulation",
	Long: fmt.Sprintf(`SeqKit -- a cross-platform and ultrafast toolkit for FASTA/Q file manipulation

Version: %s

Author: Wei Shen <shenwei356@gmail.com>

Documents  : http://bioinf.shenwei.me/seqkit
Source code: https://github.com/shenwei356/seqkit
Please cite: https://doi.org/10.1002/imt2.191


Seqkit utilizes the pgzip (https://github.com/klauspost/pgzip) package to
read and write gzip file, and the outputted gzip file would be slighty
larger than files generated by GNU gzip.

Seqkit writes gzip files very fast, much faster than the multi-threaded pigz,
therefore there's no need to pipe the result to gzip/pigz.

Seqkit also supports reading and writing xz (.xz) and zstd (.zst) formats since v2.2.0.
Bzip2 format is supported since v2.4.0.

Compression level:
  format   range   default  comment
  gzip     1-9     5        https://github.com/klauspost/pgzip sets 5 as the default value.
  xz       NA      NA       https://github.com/ulikunitz/xz does not support.
  zstd     1-4     2        roughly equals to zstd 1, 3, 7, 11, respectively.
  bzip     1-9     6        https://github.com/dsnet/compress

`, VERSION),
}

RootCmd represents the base command when called without any subcommands

View Source
var Threads = runtime.NumCPU()

Threads for bread.NewBufferedReader()

Functions

func Bam2Bundles

func Bam2Bundles(inBam string, outDir string, minBundle int, nrProcBam int, quiet, silent bool)

func BamToolAccStats

func BamToolAccStats(p *BamToolParams)

func BamToolAlnContext

func BamToolAlnContext(p *BamToolParams)

func BamToolDump

func BamToolDump(p *BamToolParams)

func BamToolbox

func BamToolbox(toolYaml string, inFile string, outFile string, quiet bool, silent bool, threads int)

func BashExec

func BashExec(command string)

BashExec executes a command via bash.

func CountReads

func CountReads(bamReader *bam.Reader, bamWriter *bam.Writer, countFile string, field string, rangeMin, rangeMax float64, printPass bool, printPrim bool, printLog bool, printBins int, binMode string, mapQual int, printFreq int, printDump bool, printDelay int, printPdf string, execBefore, execAfter string, includeIds map[string]bool, excludeIds map[string]bool, printQuiet bool)

CountReads counts total, secondary and supplementary reads mapped to each reference.

func Execute

func Execute()

Execute adds all child commands to the root command sets flags appropriately. This is called by main.main(). It only needs to happen once to the rootCmd.

func FasLinesToSimpleSeq

func FasLinesToSimpleSeq(lines FqLines) (*simpleSeq, error)

FasLinesToSimpleSeq attempts to construct a valid sequence record from a buffer of parsed lines.

func FileExists

func FileExists(fn string) bool

FileExists checks if a file exists by calling os.Stat.

func FileSize

func FileSize(file string) int

FileSize gets size of a file by calling os.Stat.

func FqLinesToSimpleSeq

func FqLinesToSimpleSeq(lines FqLines, qBase int, gaps bool) (*simpleSeq, error)

FqLinesToSimpleSeq attempts to construct a valid fastq record from a buffer of parsed lines.

func GetSamAcc

func GetSamAcc(r *sam.Record) float64

func GetSamDump

func GetSamDump(field string, r *sam.Record) string

func GetSamEndPos

func GetSamEndPos(r *sam.Record) int

func GetSamHardClipped

func GetSamHardClipped(r *sam.Record) int

func GetSamIsSec

func GetSamIsSec(r *sam.Record) int

func GetSamIsSup

func GetSamIsSup(r *sam.Record) int

func GetSamLeftClip

func GetSamLeftClip(r *sam.Record) int

func GetSamLeftHardClip

func GetSamLeftHardClip(r *sam.Record) int

func GetSamLeftSoftClip

func GetSamLeftSoftClip(r *sam.Record) int

func GetSamLeftSoftClipSeq

func GetSamLeftSoftClipSeq(r *sam.Record) string

func GetSamMapQual

func GetSamMapQual(r *sam.Record) int

func GetSamMapped

func GetSamMapped(r *sam.Record) bool

func GetSamMeanBaseQual

func GetSamMeanBaseQual(r *sam.Record) float64

func GetSamName

func GetSamName(r *sam.Record) string

func GetSamPos

func GetSamPos(r *sam.Record) int

func GetSamReadAln

func GetSamReadAln(r *sam.Record) int

func GetSamReadAlnSeq

func GetSamReadAlnSeq(r *sam.Record) string

func GetSamReadCov

func GetSamReadCov(r *sam.Record) float64

func GetSamReadLen

func GetSamReadLen(r *sam.Record) int

func GetSamReadSeq

func GetSamReadSeq(r *sam.Record) string

func GetSamRef

func GetSamRef(r *sam.Record) string

func GetSamRefAln

func GetSamRefAln(r *sam.Record) int

func GetSamRefCov

func GetSamRefCov(r *sam.Record) float64

func GetSamRefLen

func GetSamRefLen(r *sam.Record) int

func GetSamReverse

func GetSamReverse(r *sam.Record) bool

func GetSamRightClip

func GetSamRightClip(r *sam.Record) int

func GetSamRightHardClip

func GetSamRightHardClip(r *sam.Record) int

func GetSamRightSoftClip

func GetSamRightSoftClip(r *sam.Record) int

func GetSamRightSoftClipSeq

func GetSamRightSoftClipSeq(r *sam.Record) string

func GetSamStrand

func GetSamStrand(r *sam.Record) int

func IsPidAlive

func IsPidAlive(pid int) bool

func LaunchFxWatchers

func LaunchFxWatchers(dirs []string, ctrlChan WatchCtrlChan, re *regexp.Regexp, inFmt, outFmt string, qBase int, allowGaps bool, delta int, timeout string, dropString string, waitPid int, findOnly bool, outw *xopen.Writer)

LaunchFxWatchers launches fastx watcher goroutines on multiple input directories.

func ListTools

func ListTools(p *BamToolParams)

func MaxInts

func MaxInts(s []int) (m int)

MaxInts calculates the maximum of a slice of integers.

func MinInts

func MinInts(s []int) (m int)

MinInts calculates the minimum of a slice of integers.

func NewAnonLinearSeq

func NewAnonLinearSeq(s string) *linear.Seq

NewAnonLinearSeq makes a new anonymous linear.Seq.

func NewBamReader

func NewBamReader(bamFile string, nrProc int) *bam.Reader

NewBamReader creates a new BAM reader from file.

func NewBamReaderChan

func NewBamReaderChan(inFile string, cp int, buff int, threads int) (chan *sam.Record, *bam.Reader)

func NewBamSinkChan

func NewBamSinkChan(cp int) (chan *sam.Record, chan bool)

func NewBamWriterChan

func NewBamWriterChan(inFile string, head *sam.Header, cp int, buff int, threads int) (chan *sam.Record, chan bool)

func NewFxWatcher

func NewFxWatcher(dir string, seqChan chan *simpleSeq, watcherCtrlChanIn, watcherCtrlChanOut WatchCtrlChan, re *regexp.Regexp, inFmt, outFmt string, qBase int, allowGaps bool, minDelta int, dropString string, findOnly bool)

NewFxWatcher streams records from fastx files under a directory.

func NewRawFastaStream

func NewRawFastaStream(name string, inFh *xopen.Reader, inReader *bufio.Reader, seqChan chan *simpleSeq, id string, ctrlChanIn, ctrlChanOut chan SeqStreamCtrl, gaps bool) chan *simpleSeq

NewRawSeqStream initializes a new channel for reading fastq records in a robust way.

func NewRawFastqStream

func NewRawFastqStream(name string, inFh *xopen.Reader, inReader *bufio.Reader, seqChan chan *simpleSeq, qBase int, id string, ctrlChanIn, ctrlChanOut chan SeqStreamCtrl, gaps bool) chan *simpleSeq

NewRawSeqStream initializes a new channel for reading fastq records in a robust way.

func NewRawSeqStreamFromFile

func NewRawSeqStreamFromFile(inFastq string, seqChan chan *simpleSeq, qBase int, format string, allowGaps bool) (chan SeqStreamCtrl, chan SeqStreamCtrl)

NewRawSeqStream initializes a new channel for reading fastq records from a file in a robust way.

func NewSAMRecordFromAln

func NewSAMRecordFromAln(name string, ref *sam.Reference, refStart, refEnd, queryStart, queryEnd int, refAln, queryAln string, strand string, mapQ byte, seq string, qual []byte, aux []sam.Aux) (*sam.Record, error)

NewRecordFromAln builds a new SAM record based on the provided local alignment and its reference/query coordinates.

func ParseByteSize

func ParseByteSize(val string) (int64, error)

ParseByteSize parses byte size from string

func PrintTsvLine

func PrintTsvLine(fields []string) string

func RevCompDNA

func RevCompDNA(s string) string

RevCompDNA reverse complements a DNA sequence string.

func ReverseInt

func ReverseInt(d []int) []int

ReverseInt revsrees a slice of integers.

func SamDumper

func SamDumper(fields []string, r *sam.Record) []string

func SubLocationFlanking

func SubLocationFlanking(length, B, E, begin, end int, strictMode bool) (int, int, bool)

SubLocationFlanking returns location of a flanking range (begin:end, relative to amplicon). B/E: 0-based, location of amplicon. begin/end: 1-based, begin: relative location to 5' end of amplicon, end: relative location to 3' end of amplicon. Returned locations are 1-based.

            F
-----===============-----
 -3-1                        x/y
                    1 3 5    x/y
     F             R
-----=====-----=====-----
=====                        -5:-1
===                          -5:-3
                    =====     1:5
                      ===     3:5
    =================        -1:1
=========================    -5:5
                              x:-y (invalid)

func SubLocationInner

func SubLocationInner(length, B, E, begin, end int, strictMode bool) (int, int, bool)

SubLocationInner returns location of a range (begin:end, relative to amplicon). B/E: 0-based, location of amplicon. begin/end: 1-based, begin: relative location to 5' end of amplicon, end: relative location to 3' end of amplicon. Returned locations are 1-based.

            F
-----===============-----
     1 3 5                    x/y
              -5-3-1          x/y
     F             R
-----=====-----=====-----     x:y

     ===============          1:-1
     =======                  1:7
       =====                  3:7
          =====               6:10
          =====             -10:-6
             =====           -7:-3
                             -x:y (invalid)

func SumInts

func SumInts(s []int) (r int)

SumInts calculates the sum of a slice of integers.

func ValidateSeq

func ValidateSeq(seq *simpleSeq, gaps bool) error

ValidateSeq validates simpleSeq objects.

Types

type AlignedSeq

type AlignedSeq struct {
	Ref        *Reference
	Query      *Query
	QueryAln   string
	RefAln     string
	RefStart   int
	RefEnd     int
	QueryStart int
	QueryEnd   int
	Score      float64
	Best       bool
	Detector   *SeqDetector
}

AlignedSeq holds alignment results.

func AlignInfo

func AlignInfo(r *Reference, q *Query, f []feat.Pair) *AlignedSeq

AlignInfo constructs an *AlignedSeq structure based on raw alignment results.

func PairwiseAlignSW

func PairwiseAlignSW(r *Reference, q *Query, alnParams *AlnParams) *AlignedSeq

PairwiseAlignSW performs pairwise local alignment of two sequences using the biogo implementation of the Smith-Waterman algorithm.

func (*AlignedSeq) AlnString

func (a *AlignedSeq) AlnString() string

func (*AlignedSeq) Fields

func (a *AlignedSeq) Fields() []string

Fields returns the fields of AlignedSeq in a defined order.

func (*AlignedSeq) String

func (a *AlignedSeq) String() string

String generates string represenattion of a *AlignedSeq.

type AlnDetails

type AlnDetails struct {
	Match         int
	Mismatch      int
	MatchMismatch int
	Insertion     int
	Deletion      int
	Skip          int
	Len           int
	Acc           float64
	WAcc          float64
}

func GetSamAlnDetails

func GetSamAlnDetails(r *sam.Record) *AlnDetails

type AlnParams

type AlnParams struct {
	Match     int
	Mismatch  int
	GapOpen   int
	GapExtend int
}

AlnParams holds the alignment parameters.

type AmpliconFinder

type AmpliconFinder struct {
	Seq []byte
	F   []byte // Forward primer
	R   []byte // R should be reverse complementary sequence of reverse primer

	MaxMismatch int
	FMindex     *fmi.FMIndex
	// contains filtered or unexported fields
}

AmpliconFinder is a struct for locating amplicon via primer(s).

func NewAmpliconFinder

func NewAmpliconFinder(sequence, forwardPrimer, reversePrimerRC []byte, maxMismatch int) (*AmpliconFinder, error)

NewAmpliconFinder returns a AmpliconFinder struct.

func (*AmpliconFinder) Locate

func (finder *AmpliconFinder) Locate() ([]int, []int, error)

Locate returns location of amplicon. Locations are 1-based, nil returns if not found.

func (*AmpliconFinder) LocateRange

func (finder *AmpliconFinder) LocateRange(begin, end int, flanking bool, strictMode bool) ([]int, []int, error)

LocateRange returns location of the range (begin:end, 1-based).

func (*AmpliconFinder) Location

func (finder *AmpliconFinder) Location() ([]int, []int, error)

Location returns location of amplicon. Locations are 1-based, nil returns if not found.

func (*AmpliconFinder) Reset added in v2.8.0

func (finder *AmpliconFinder) Reset(sequence []byte, maxMismatch int) error

type BamTool

type BamTool struct {
	Name string
	Desc string
	Use  func(params *BamToolParams)
}

type BamToolParams

type BamToolParams struct {
	Yaml    *syaml.Yaml
	InChan  chan *sam.Record
	OutChan chan *sam.Record
	Quiet   bool
	Silent  bool
	Threads int
	Rank    int
	Shed    Toolshed
}

type BedFeature

type BedFeature struct {
	Chr    string
	Start  int // 1based
	End    int // end included
	Name   *string
	Strand *string
}

BedFeature is the gff BedFeature struct

func ReadBedFeatures

func ReadBedFeatures(file string) ([]BedFeature, error)

ReadBedFeatures returns gtf BedFeatures of a file

func ReadBedFilteredFeatures

func ReadBedFilteredFeatures(file string, chrs []string) ([]BedFeature, error)

ReadBedFilteredFeatures returns gtf BedFeatures of selected chrs from file

type ColorCycler

type ColorCycler struct {
	Dummy   bool
	Index   int
	Palette []au.Color
}

ColorCycler is a utilty object to cycle between colors and colorize text.

func NewColorCycler

func NewColorCycler(dummy bool) *ColorCycler

NewColorCycler return a new color cycler object.

func PrettyPrintTsv

func PrettyPrintTsv(cols []string, fields [][]string, width int, color bool) (string, *ColorCycler)

PrettyPrintTsv pretty prints and optionally colorizes a "data frame".

func (*ColorCycler) Colorize

func (p *ColorCycler) Colorize(s string) string

Colorize adds the current ANSI color to the text.

func (*ColorCycler) Fancy

func (p *ColorCycler) Fancy(s string, head bool) string

Fancy colorizes text with normal or header styles.

func (*ColorCycler) Header

func (p *ColorCycler) Header(s string) string

Colorize adds the current ANSI color to the text with a header style.

func (*ColorCycler) Next

func (p *ColorCycler) Next()

Next swiches to the next color.

func (*ColorCycler) WrapWriter

func (p *ColorCycler) WrapWriter(fh *os.File) io.Writer

WrapWriter wraps a file into am go-colorable object if necessary.

type Config

type Config struct {
	Alphabet               *seq.Alphabet
	ChunkSize              int
	BufferSize             int
	Threads                int
	LineWidth              int
	IDRegexp               string
	IDNCBI                 bool
	OutFile                string
	Quiet                  bool
	AlphabetGuessSeqLength int
	ValidateSeqLength      int
	CompressionLevel       int
	SkipFileCheck          bool
}

Config is the global falgs

type FqLine

type FqLine struct {
	Line     string
	FqlState FqlState
	LineNr   int
}

type FqLines

type FqLines []FqLine

type FqlState

type FqlState struct {
	Header  bool
	Seq     bool
	Plus    bool
	Qual    bool
	Partial bool
	Invalid bool
}

type FxWatcher

type FxWatcher struct {
	Base string
	Pool *WatchedFxPool
}

type Locus

type Locus struct {
	Chrom     string
	Start     int
	End       int
	Order     int
	NrRecords int
	Size      int
}

type Queries

type Queries []*Query

Queries is a slice of pointers to Query.

type Query

type Query struct {
	Name      string
	Seq       string
	Strand    string
	NullScore float64
}

Query holds information about a query sequence.

type Range

type Range struct {
	Start float64
	End   float64
}

Range defines a half-open slice over a sequence [Start, End).

func (Range) Len

func (r Range) Len() float64

Range returns the length of a range.

type Ranges

type Ranges []Range

Ranges is a slice of ranges.

type ReadCounts

type ReadCounts []*RefCounts

ReadCounts holds read counts for all references.

func NewReadCounts

func NewReadCounts(refs []*sam.Reference) ReadCounts

NewReadCounts initializes a new read count slice.

func (ReadCounts) Sorted

func (c ReadCounts) Sorted() ReadCounts

Sorted created a sorted copy of a read counts slice.

type RecordLoopBuffer

type RecordLoopBuffer struct {
	Size, Capacity int
	Current        *RecordNode
}

RecordLoopBuffer is a loop buffer for FASTA/Q records

func NewRecordLoopBuffer

func NewRecordLoopBuffer(capacity int) (*RecordLoopBuffer, error)

NewRecordLoopBuffer creats new RecordLoopBuffer object with certern capacity

func (*RecordLoopBuffer) Add

func (buf *RecordLoopBuffer) Add(value *fastx.Record)

Add add new RecordNode

func (*RecordLoopBuffer) Backward

func (buf *RecordLoopBuffer) Backward(n int)

Backward moves the current pointer backward N nodes

func (*RecordLoopBuffer) Next

func (buf *RecordLoopBuffer) Next() *RecordNode

Next returns next node

func (*RecordLoopBuffer) Prev

func (buf *RecordLoopBuffer) Prev() *RecordNode

Prev returns previous node

type RecordNode

type RecordNode struct {
	Value *fastx.Record
	// contains filtered or unexported fields
}

RecordNode is the node for double-linked loop list

func (RecordNode) String

func (node RecordNode) String() string

type RefCounts

type RefCounts struct {
	Ref      *sam.Reference
	Count    float64
	SecCount float64
	SupCount float64
}

RefCounts is a structure holding read count information for a given reference.

type RefWithFaidx

type RefWithFaidx struct {
	Fasta   string
	IdxFile string

	Cache bool
	// contains filtered or unexported fields
}

func NewRefWitdFaidx

func NewRefWitdFaidx(file string, cache bool, quiet bool) *RefWithFaidx

func (*RefWithFaidx) IdxSubSeq

func (idx *RefWithFaidx) IdxSubSeq(chrom string, start, end int) (string, error)

type Reference

type Reference struct {
	Name   string
	Seq    string
	Ranges Ranges
}

Reference holds information about a reference sequence along with the target ranges.

type Scorer

type Scorer interface {
	Score() int
}

Scorer is an interface for getting alignment score.

type SeqColorizer

type SeqColorizer struct {
	NucPalette    map[byte]au.Color
	ProtPalette   map[byte]au.Color
	QualPalette   map[byte]au.Color
	QualBgPalette map[byte]au.Color
	Alphabet      string
}

SeqColorizer is a sequence colorizer object.

func NewSeqColorizer

func NewSeqColorizer(alphabet string) *SeqColorizer

NewSeqColorizer return a new sequence colorizer object.

func (*SeqColorizer) Color

func (p *SeqColorizer) Color(seq []byte) []byte

ColorAmino adds ANSI colors to DNA/RNA or protein sequences.

func (*SeqColorizer) ColorAmino

func (p *SeqColorizer) ColorAmino(seq []byte) []byte

ColorAmino adds ANSI colors to protein sequences.

func (*SeqColorizer) ColorNucleic

func (p *SeqColorizer) ColorNucleic(seq []byte) []byte

ColorNucleic adds ANSI colors to DNA/RNA sequences.

func (*SeqColorizer) ColorNucleicWithQuals

func (p *SeqColorizer) ColorNucleicWithQuals(seq []byte, quals []byte) []byte

ColorNucleic adds ANSI colors to DNA/RNA, use quality palette as background.

func (*SeqColorizer) ColorQuals

func (p *SeqColorizer) ColorQuals(quals []byte) []byte

ColorAmino adds grayscale colors to DNA/RNA or protein sequences.

func (*SeqColorizer) ColorWithQuals

func (p *SeqColorizer) ColorWithQuals(seq []byte, quals []byte) []byte

ColorAmino adds ANSI colors to DNA/RNA or protein sequences, use quality palette as background.

func (*SeqColorizer) WrapWriter

func (p *SeqColorizer) WrapWriter(fh *os.File) io.Writer

WrapWriter wraps a file into am go-colorable object if necessary.

type SeqDetector

type SeqDetector struct {
	Queries   Queries
	SearchAll bool
	Stranded  bool
	NullMode  string
	Cutoff    float64
	AlnParams *AlnParams
}

SeqDetector holds paramters for sequence detection.

func NewSeqDetector

func NewSeqDetector(searchAll bool, stranded bool, nullMode string, cutoff float64, alnParams *AlnParams) *SeqDetector

NewSeqDetector initilizes a SeqDetector object.

func (*SeqDetector) AddAnonQueries

func (d *SeqDetector) AddAnonQueries(qrs []string)

AddAnonQueries adds anonymous queries from a list of comma separated strings.

func (*SeqDetector) Detect

func (d *SeqDetector) Detect(r *Reference, rec bool) []*AlignedSeq

Detect performs an optinally recursive alignments of the queries of a given reference sequence.

func (*SeqDetector) LoadQueries

func (d *SeqDetector) LoadQueries(fx string)

LoadQueries loads queries from a fasta file and calculates null scores for each.

type SeqStreamCtrl

type SeqStreamCtrl int
const (
	StreamTry SeqStreamCtrl = iota
	StreamQuit
	StreamEOF
	StreamExited
)

type SumResult added in v2.2.0

type SumResult struct {
	File   string
	SeqNum int
	SeqLen int
	Digest string
}

type Toolshed

type Toolshed map[string]BamTool

func NewToolshed

func NewToolshed() Toolshed

func (Toolshed) String

func (s Toolshed) String() string

type TopBuffer

type TopBuffer []topEntry

TopBuffer is a slice of topEntries.

type WatchCtrl

type WatchCtrl int

type WatchCtrlChan

type WatchCtrlChan chan WatchCtrl

type WatchedFx

type WatchedFx struct {
	Name        string
	LastSize    int64
	LastTry     time.Time
	BytesRead   int64
	IsDir       bool
	SeqChan     chan *simpleSeq
	CtrlChanIn  chan SeqStreamCtrl
	CtrlChanOut chan SeqStreamCtrl
}

type WatchedFxPool

type WatchedFxPool struct {
	Map *sync.Map
}

func (*WatchedFxPool) Delete

func (m *WatchedFxPool) Delete(k string)

func (*WatchedFxPool) Get

func (m *WatchedFxPool) Get(k string) *WatchedFx

func (*WatchedFxPool) Insert

func (m *WatchedFxPool) Insert(k string, v *WatchedFx)

func (*WatchedFxPool) IsEmpty

func (m *WatchedFxPool) IsEmpty() bool

func (*WatchedFxPool) Range

func (m *WatchedFxPool) Range(f func(key, value interface{}) bool)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL