Documentation
¶
Index ¶
- Constants
- func AA2Index(aa rune) (idx int, err error)
- func AlphabetFromString(alphabet string) int
- func Complement(seq []rune) (err error)
- func DetectAlphabet(seq string) int
- func Index2AA(index int) (aa rune, err error)
- func Index2Nt(index int) (nt rune, err error)
- func NewAlign(alphabet int) *align
- func NewPwAligner(seq1, seq2 Sequence, algo int) *pwaligner
- func NewSeqBag(alphabet int) *seqbag
- func NewSequence(name string, sequence []rune, comment string) *seq
- func Nt2Index(nt rune) (idx int, err error)
- func RandomSequence(alphabet, length int) ([]rune, error)
- func Reverse(seq []rune)
- type AlignChannel
- type Alignment
- type PairwiseAligner
- type PartitionSet
- func (ps *PartitionSet) AddRange(partName, modelName string, start, end, modulo int) (err error)
- func (ps *PartitionSet) AliLength() int
- func (ps *PartitionSet) CheckSites() (err error)
- func (ps *PartitionSet) ModeleName(code int) string
- func (ps *PartitionSet) NPartitions() int
- func (ps *PartitionSet) Partition(position int) int
- func (ps *PartitionSet) PartitionName(code int) string
- func (ps *PartitionSet) String() string
- type PhasedSequence
- type Phaser
- type SeqBag
- type Sequence
Constants ¶
const ( ALIGN_UP = iota ALIGN_LEFT ALIGN_DIAG ALIGN_STOP ALIGN_ALGO_SW = iota ALIGN_ALGO_ATG )
const ( AMINOACIDS = 0 // Amino acid sequence alphabet NUCLEOTIDS = 1 // Nucleotid sequence alphabet BOTH = 2 // Could be both UNKNOWN = 3 // Unkown alphabet GAP = '-' POINT = '.' OTHER = '*' ALL_AMINO = 'X' ALL_NUCLE = 'N' PSSM_NORM_NONE = 0 // No normalization PSSM_NORM_FREQ = 1 // Normalization by freq in the site PSSM_NORM_DATA = 2 // Normalization by aa/nt frequency in data PSSM_NORM_UNIF = 3 // Normalization by uniform frequency PSSM_NORM_LOGO = 4 // Normalization like LOGO : v(site)=freq*(log2(alphabet)-H(site)-pseudocount FORMAT_FASTA = 0 FORMAT_PHYLIP = 1 FORMAT_NEXUS = 2 FORMAT_CLUSTAL = 3 POSITION_IDENTICAL = 0 // All characters in a position are the same POSITION_CONSERVED = 1 // Same strong group POSITION_SEMI_CONSERVED = 2 // Same weak group POSITION_NOT_CONSERVED = 3 // None of the above values GENETIC_CODE_STANDARD = 0 // Standard genetic code GENETIC_CODE_VETEBRATE_MITO = 1 // Vertebrate mitochondrial genetic code GENETIC_CODE_INVETEBRATE_MITO = 2 // Invertebrate mitochondrial genetic code )
Variables ¶
This section is empty.
Functions ¶
func AlphabetFromString ¶ added in v0.2.3
func DetectAlphabet ¶
func NewPwAligner ¶ added in v0.3.0
func NewSequence ¶
func RandomSequence ¶ added in v0.1.3
Types ¶
type AlignChannel ¶ added in v0.2.4
type Alignment ¶
type Alignment interface {
SeqBag
AddGaps(rate, lenprop float64)
AvgAllelesPerSite() float64
BuildBootstrap() Alignment // Bootstrap alignment
CharStatsSite(site int) (map[rune]int, error)
Clone() (Alignment, error)
CodonAlign(ntseqs SeqBag) (codonAl *align, err error)
// Remove identical patterns/sites and return number of occurence
// of each pattern (order of patterns/sites may have changed)
Compress() []int
// concatenates the given alignment with this alignment
Concat(Alignment) error
// Compares all sequences to the first one and counts all differences per sequence
//
// - alldiffs: The set of all differences that have been seen at least once
// - diffs : The number of occurences of each difference, for each sequence
// Sequences are ordered as the original alignment. Differences are
// written as REFNEW, ex: diffs["AC"]=12 .
CountDifferences() (alldiffs []string, diffs []map[string]int)
// Compares all sequences to the first one and replace identical characters with .
DiffWithFirst()
Entropy(site int, removegaps bool) (float64, error) // Entropy of the given site
// Positions of potential frameshifts
// if startinggapsasincomplete is true, then considers gaps as the beginning
// as incomplete sequence, then take the right phase
Frameshifts(startingGapsAsIncomplete bool) []struct{ Start, End int }
// Positions of potential stop in frame
// if startinggapsasincomplete is true, then considers gaps as the beginning
// as incomplete sequence, then take the right phase
Stops(startingGapsAsIncomplete bool, geneticode int) (stops []int, err error)
Length() int // Length of the alignment
Mask(start, length int) error // Masks given positions
MaxCharStats() ([]rune, []int)
Mutate(rate float64) // Adds uniform substitutions in the alignment (~sequencing errors)
NbVariableSites() int // Nb of variable sites
Pssm(log bool, pseudocount float64, normalization int) (pssm map[rune][]float64, err error) // Normalization: PSSM_NORM_NONE, PSSM_NORM_UNIF, PSSM_NORM_DATA
Rarefy(nb int, counts map[string]int) (Alignment, error) // Take a new rarefied sample taking into accounts weights
RandSubAlign(length int) (Alignment, error) // Extract a random subalignment with given length from this alignment
Recombine(rate float64, lenprop float64)
RemoveGapSeqs(cutoff float64) // Removes sequences having >= cutoff gaps
RemoveGapSites(cutoff float64, ends bool) // Removes sites having >= cutoff gaps
// Replaces match characters (.) by their corresponding characters on the first sequence
ReplaceMatchChars()
Sample(nb int) (Alignment, error) // generate a sub sample of the sequences
ShuffleSites(rate float64, roguerate float64, randroguefirst bool) []string
SimulateRogue(prop float64, proplen float64) ([]string, []string) // add "rogue" sequences
SiteConservation(position int) (int, error) // If the site is conserved:
Split(part *PartitionSet) ([]Alignment, error) //Splits the alignment given the paritions in argument
SubAlign(start, length int) (Alignment, error) // Extract a subalignment from this alignment
Swap(rate float64)
TrimSequences(trimsize int, fromStart bool) error
}
func RandomAlignment ¶ added in v0.1.3
type PairwiseAligner ¶ added in v0.3.0
type PairwiseAligner interface {
AlignEnds() (int, int)
AlignStarts() (int, int)
Seq1Ali() []rune
Seq2Ali() []rune
SetGapOpenScore(open float64)
SetGapExtendScore(extend float64)
SetScore(match, mismatch float64)
MaxScore() float64 // Maximum score of the alignment
NbMatches() int // Number of matches
NbMisMatches() int // Number of mismatches
NbGaps() int // Nuber of gaps
Length() int // Length of the alignment
Alignment() (Alignment, error)
AlignmentStr() string
}
type PartitionSet ¶ added in v0.3.2
type PartitionSet struct {
// contains filtered or unexported fields
}
func NewPartitionSet ¶ added in v0.3.2
func NewPartitionSet(alignmentLength int) (ps *PartitionSet)
func (*PartitionSet) AddRange ¶ added in v0.3.2
func (ps *PartitionSet) AddRange(partName, modelName string, start, end, modulo int) (err error)
func (*PartitionSet) AliLength ¶ added in v0.3.2
func (ps *PartitionSet) AliLength() int
returns the length of the alignment
func (*PartitionSet) CheckSites ¶ added in v0.3.2
func (ps *PartitionSet) CheckSites() (err error)
If not all sites are in a partition, returns an error
func (*PartitionSet) ModeleName ¶ added in v0.3.2
func (ps *PartitionSet) ModeleName(code int) string
Returns the name of the modele associated to the given index If the code does not exist, then returns ""
func (*PartitionSet) NPartitions ¶ added in v0.3.2
func (ps *PartitionSet) NPartitions() int
func (*PartitionSet) Partition ¶ added in v0.3.2
func (ps *PartitionSet) Partition(position int) int
Returns the partition code associated to the given position
If the position is outside the alignment, then returns -1
func (*PartitionSet) PartitionName ¶ added in v0.3.2
func (ps *PartitionSet) PartitionName(code int) string
Returns the name of the partition associated to the given index If the code does not exist, then returns ""
func (*PartitionSet) String ¶ added in v0.3.2
func (ps *PartitionSet) String() string
type PhasedSequence ¶ added in v0.3.0
type PhasedSequence struct {
Err error
Removed bool
Position int
// phased nt sequence
NtSeq Sequence
// phased nt sequence
// with first nt corresponding
// first position of aa codon
CodonSeq Sequence
// phased aa sequence
AaSeq Sequence
// Aligned sequences
// 1st: best found orf
// 2nd: sequence
Ali Alignment
}
type Phaser ¶ added in v0.3.0
type Phaser interface {
Phase(orfs, seqs SeqBag) (chan PhasedSequence, error)
SetLenCutoff(cutoff float64)
SetMatchCutoff(cutoff float64)
SetReverse(reverse bool)
SetCutEnd(cutend bool)
SetCpus(cpus int)
SetTranslate(translate bool, geneticcode int) (err error)
SetAlignScores(match, mismatch float64)
SetGapOpen(float64)
SetGapExtend(float64)
}
* If SetTranslate(true):
align all sequences to the given ORF and trims sequences to the start position If orf is nil, searches for the longest ORF (in 3 or 6 phases depending on reverse arg) in all sequences
To do so, Phase() will:
- Translate the given ORF in aminoacids;
- For each sequence of the dataset: translate it in the 3 phases (forward) if reverse is false or 6 phases (forward and reverse) if reverse is true, align it with the translated orf, and take the phase giving the best alignment; If no phase gives a good alignment (>lencutoff * orf length, >matchcutoff matches over the align length and starting at first position of the ORF), then the sequence is discarded;
- For each sequence, take the Start corresponding to the Start of the ORF, and remove nucleotides before;
- Return the trimmed nucleotidic sequences (phased), the corresponding amino-acid sequences (phasedaa) the positions of starts in the nucleotidic sequences, and the removed sequence names.
If cutend is true, then also remove the end of sequences that do not align with orf ¶
It does not modify the input object ¶
* If SetTranslate(false):
align all sequences to the given ORF and trims sequences to the start position, it does not take into account protein information
If orf is nil, searches for the longest ORF (in forward only or both strands depending on reverse arg) in all sequences ¶
To do so:
1. If alignment is bad (>lencutoff * orf length, >matchcutoff matches over the align length and starting at first position of the ORF), then the sequence is discarded; 3. For each sequence, take the Start corresponding to the Start of the ORF, and remove nucleotides before; 4. Return the trimmed nucleotidic sequences (phased), the positions of starts in the nucleotidic sequences, and the removed sequence names. If cutend is true, then also remove the end of sequences that do not align with orf It does not modify the input object
type SeqBag ¶ added in v0.3.0
type SeqBag interface {
AddSequence(name string, sequence string, comment string) error
AddSequenceChar(name string, sequence []rune, comment string) error
AppendSeqIdentifier(identifier string, right bool)
Alphabet() int
AlphabetStr() string
AlphabetCharacters() []rune
AlphabetCharToIndex(c rune) int // Returns index of the character (nt or aa) in the AlphabetCharacters() array
AutoAlphabet() // detects and sets alphabet automatically for all the sequences
CharStats() map[rune]int64
CleanNames(namemap map[string]string) // Clean sequence names (newick special char)
Clear() // Removes all sequences
CloneSeqBag() (seqs SeqBag, err error) // Clones the seqqbag
Deduplicate() (identical [][]string, err error) // Remove duplicate sequences
GetSequence(name string) (string, bool) // Get a sequence by names
GetSequenceById(ith int) (string, bool)
GetSequenceChar(name string) ([]rune, bool)
GetSequenceCharById(ith int) ([]rune, bool)
GetSequenceNameById(ith int) (string, bool)
SetSequenceChar(ithAlign, ithSite int, char rune) error
IgnoreIdentical(bool) // if true, then it won't add the sequence if a sequence with the same name AND same sequence exists
Sequence(ith int) (Sequence, bool)
SequenceByName(name string) (Sequence, bool)
Identical(SeqBag) bool
Iterate(it func(name string, sequence string))
IterateChar(it func(name string, sequence []rune))
IterateAll(it func(name string, sequence []rune, comment string))
Sequences() []Sequence
SequencesChan() chan Sequence
LongestORF(reverse bool) (orf Sequence, err error)
MaxNameLength() int // maximum sequence name length
NbSequences() int
Rename(namemap map[string]string)
RenameRegexp(regex, replace string, namemap map[string]string) error
Replace(old, new string, regex bool) error // Replaces old string with new string in sequences of the alignment
ShuffleSequences() // Shuffle sequence order
String() string // Raw string representation (just write all sequences)
Translate(phase int, geneticcode int) (err error) // Translates nt sequence in aa
TrimNames(namemap map[string]string, size int) error
TrimNamesAuto(namemap map[string]string, curid *int) error
Sort() // Sorts the sequences by name
Unalign() SeqBag
}
type Sequence ¶
type Sequence interface {
Sequence() string
SequenceChar() []rune
SameSequence([]rune) bool
CharAt(int) rune
Name() string
SetName(name string)
Comment() string
Length() int
LongestORF() (start, end int) // Detects the longest ORF in forward strand only
Reverse()
Complement() error // Returns an error if not nucleotide sequence
Translate(phase int, geneticcode int) (Sequence, error) // Translates the sequence using the given code
DetectAlphabet() int // Try to detect alphabet (nt or aa)
Clone() Sequence
}