Documentation
¶
Index ¶
- Constants
- Variables
- func AA2Index(aa uint8) (idx int, err error)
- func AlphabetFromString(alphabet string) int
- func Complement(seq []uint8) (err error)
- func DetectAlphabet(seq string) int
- func EqualOrCompatible(nt1, nt2 uint8) (ok bool, err error)
- func GenAllPossibleCodons(nt1, nt2, nt3 uint8) (codons []string)
- func Index2AA(index int) (aa uint8, err error)
- func Index2Nt(index int) (nt uint8, err error)
- func NewAlign(alphabet int) *align
- func NewPwAligner(seq1, seq2 Sequence, algo int) *pwaligner
- func NewSeqBag(alphabet int) *seqbag
- func NewSequence(name string, sequence []uint8, comment string) *seq
- func Nt2Index(nt uint8) (idx int, err error)
- func Nt2IndexIUPAC(nt uint8) (idx uint8, err error)
- func NtIUPACDifference(nt1, nt2 uint8) (diff float64, err error)
- func PossibleNtIUPAC(nt uint8) (idx []uint8, err error)
- func RandomSequence(alphabet, length int) ([]uint8, error)
- func Reverse(seq []uint8)
- type AlignChannel
- type Alignment
- type CountProfile
- func (p *CountProfile) AppendCount(i, count int) (err error)
- func (p *CountProfile) CheckLength(length int) bool
- func (p *CountProfile) Count(r uint8, site int) (count int, err error)
- func (p *CountProfile) CountAt(i, site int) (count int, err error)
- func (p *CountProfile) CountsAt(i int) (counts []int, err error)
- func (p *CountProfile) NameAt(i int) (name uint8, err error)
- func (p *CountProfile) NameIndex(r uint8) (index int, ok bool)
- func (p *CountProfile) NbCharacters() (nb int)
- func (p *CountProfile) Print()
- func (p *CountProfile) SetHeader(header []uint8)
- type Mutation
- type PairwiseAligner
- type PartitionSet
- func (ps *PartitionSet) AddRange(partName, modelName string, start, end, modulo int) (err error)
- func (ps *PartitionSet) AliLength() int
- func (ps *PartitionSet) CheckSites() (err error)
- func (ps *PartitionSet) ModeleName(code int) string
- func (ps *PartitionSet) NPartitions() int
- func (ps *PartitionSet) Partition(position int) int
- func (ps *PartitionSet) PartitionName(code int) string
- func (ps *PartitionSet) String() string
- type PhasedSequence
- type Phaser
- type SeqBag
- type Sequence
Constants ¶
const ( ALIGN_UP = iota ALIGN_LEFT ALIGN_DIAG ALIGN_STOP ALIGN_ALGO_SW = iota ALIGN_ALGO_ATG )
const ( AMINOACIDS = 0 // Amino acid sequence alphabet NUCLEOTIDS = 1 // Nucleotid sequence alphabet BOTH = 2 // Could be both UNKNOWN = 3 // Unkown alphabet GAP = '-' POINT = '.' OTHER = '*' ALL_AMINO = 'X' ALL_NUCLE = 'N' PSSM_NORM_NONE = 0 // No normalization PSSM_NORM_FREQ = 1 // Normalization by freq in the site PSSM_NORM_DATA = 2 // Normalization by aa/nt frequency in data PSSM_NORM_UNIF = 3 // Normalization by uniform frequency PSSM_NORM_LOGO = 4 // Normalization like LOGO : v(site)=freq*(log2(alphabet)-H(site)-pseudocount FORMAT_FASTA = 0 FORMAT_PHYLIP = 1 FORMAT_NEXUS = 2 FORMAT_CLUSTAL = 3 FORMAT_STOCKHOLM = 4 POSITION_IDENTICAL = 0 // All characters in a position are the same POSITION_CONSERVED = 1 // Same strong group POSITION_SEMI_CONSERVED = 2 // Same weak group POSITION_NOT_CONSERVED = 3 // None of the above values GENETIC_CODE_STANDARD = 0 // Standard genetic code GENETIC_CODE_VETEBRATE_MITO = 1 // Vertebrate mitochondrial genetic code GENETIC_CODE_INVETEBRATE_MITO = 2 // Invertebrate mitochondrial genetic code IGNORE_NONE = 0 IGNORE_NAME = 1 IGNORE_SEQUENCE = 2 // IUPAC Nucleotide Code : For bitwise operations NT_OTHER = 0 // GAP, *, etc;? NT_A = 1 NT_C = 2 NT_G = 4 NT_T = 8 NT_R = NT_A | NT_G NT_Y = NT_C | NT_T NT_S = NT_G | NT_C NT_W = NT_A | NT_T NT_K = NT_G | NT_T NT_M = NT_A | NT_C NT_B = NT_C | NT_G | NT_T NT_D = NT_A | NT_G | NT_T NT_H = NT_A | NT_C | NT_T NT_V = NT_A | NT_C | NT_G NT_N = NT_A | NT_C | NT_G | NT_T )
Variables ¶
var IupacCode = map[uint8][]uint8{
'A': {'A'},
'C': {'C'},
'G': {'G'},
'T': {'T'},
'R': {'A', 'G'},
'Y': {'C', 'T'},
'S': {'G', 'C'},
'W': {'A', 'T'},
'K': {'G', 'T'},
'M': {'A', 'C'},
'B': {'C', 'G', 'T'},
'D': {'A', 'G', 'T'},
'H': {'A', 'C', 'T'},
'V': {'A', 'C', 'G'},
'N': {'A', 'C', 'G', 'T'},
'-': {'-'},
}
Functions ¶
func AlphabetFromString ¶ added in v0.2.3
AlphabetFromString converts the alphabet name to its code If the alphabet name is not known, returns align.UNKNOWN
func DetectAlphabet ¶
func EqualOrCompatible ¶ added in v0.3.4
EqualOrCompatible returns true if the two nucleotides are identical or if they are compatible in case they are ambigous.
For example : Y: {C | T} is compatible with S: {G | C} because there is one nt in common If nt1 or nt2 are not nucleotides, then returns an error n1 and nt2 valures are from NT_... in const.go
func GenAllPossibleCodons ¶ added in v0.3.4
GenAllPossibleCodons generates all possible codons given the 3 nucleotides in arguments Multiple codons may exist if IUPAC code is employed (R=A|G, etc.). The 3 nucleotites in arguments are converted to upper case and U converted to T. If one character does not correspond to a known nucleotide in IUPAC code, then Returns an empty slice. If one of the nucleotides is a GAP, then returns an empty slice.
For example GenAllPossibleCodons('A','G','N') should return {"AGA","AGC","AGG","AGT"}.
func NewPwAligner ¶ added in v0.3.0
func NewSequence ¶
func Nt2IndexIUPAC ¶ added in v0.3.4
Returns the int code of the given nucleotide. It takes the upper case of the given uint8. Ex: 'B': NT_B
func NtIUPACDifference ¶ added in v0.3.4
NtIUPACDifference returns the cost of the difference between the two potentially ambiguous nucleotides.
- if the two nucleotides are identical : returns 0.0 - if the two nucleotides are different:
- If none are ambigous: returns 1.0
- Otherwise, returns 1-Card(I)/Card(U), I being the intersection of the sets of possible nucleotides of nt1 and nt2, and U being the union of the sets of possible nucleotides of nt1 and nt2.
For example, if we want to compare Y and S : Y = {C | T} and S = {G | C}. Card(I)=1, Card(U)=3, so diff=2/3
Precisions: - For N vs. A for example: the difference will be 1-1/4 : 3/4 - For gaps: Returns diff=1.0
nt1 and nt2 values are in NT_... of const.go
func PossibleNtIUPAC ¶ added in v0.3.4
PossibleNtIUPAC returns the possible meaning of the given iupac nucleotide Ex: NT_B : {NT_C, NT_G, NT_T}
func RandomSequence ¶ added in v0.1.3
Types ¶
type AlignChannel ¶ added in v0.2.4
AlignChannel is used for iterating over alignments
type Alignment ¶
type Alignment interface {
SeqBag
AddGaps(rate, lenprop float64)
AddAmbiguities(rate, lenprop float64)
Append(Alignment) error // Appends alignment sequences to this alignment
AvgAllelesPerSite() float64
BuildBootstrap(frac float64) Alignment // Bootstrap alignment
CharStatsSite(site int) (map[uint8]int, error)
Clone() (Alignment, error)
CodonAlign(ntseqs SeqBag) (codonAl *align, err error)
// Remove identical patterns/sites and return number of occurence
// of each pattern (order of patterns/sites may have changed)
Compress() []int
// concatenates the given alignment with this alignment
Concat(Alignment) error
// Computes the majority consensus of the given alignemnt
// To do so, it takes the majority character at each alignment site
// if ignoreGaps is true, then gaps are not taken into account for majority computation (except if only Gaps)
// if ignoreNs is true, then Ns are not taken into account for majority computation (except if only Ns)
Consensus(ignoreGaps, ignoreNs bool) *align
// Compares all sequences to the first one and counts all differences per sequence
//
// - alldiffs: The set of all differences that have been seen at least once
// - diffs : The number of occurences of each difference, for each sequence
// Sequences are ordered as the original alignment. Differences are
// written as REFNEW, ex: diffs["AC"]=12 .
CountDifferences() (alldiffs []string, diffs []map[string]int)
// Compares all sequences to the first one and replace identical characters with .
DiffWithFirst()
Entropy(site int, removegaps bool) (float64, error) // Entropy of the given site
// Positions of potential frameshifts
// if startinggapsasincomplete is true, then considers gaps as the beginning
// as incomplete sequence, then take the right phase
Frameshifts(startingGapsAsIncomplete bool) []struct{ Start, End int }
// Returns informative positions of the alignment. Informative positions
// are sites that contain at least two characters that occur at least twice each
// X, N and GAPS are not considered in this definition
InformativeSites() (sites []int)
// Positions of potential stop in frame
// if startinggapsasincomplete is true, then considers gaps as the beginning
// as incomplete sequence, then take the right phase
Stops(startingGapsAsIncomplete bool, geneticode int) (stops []int, err error)
Length() int // Length of the alignment
// maskreplace defines the replacing character. If maskreplace is "", then, masked characters
// are replaced by "N" or "X" depending on the alphabet. Orherwise:
// 1) if maskreplace is AMBIG: just like ""
// 2) if maskreplace is MAJ: Replacing character is most frequent character of the column
// 3) if maskreplace is GAP: Replacing character is a GAP
// if nogap is true, then Mask will not replace gaps with the replacement character
// if noref is true, then does not replace the character if it is the same as the reference sequences (only if refseq is specified).
Mask(refseq string, start, length int, maskreplace string, nogap, noref bool) error // Masks given positions
// Masks unique mutations in the given aligment (not the gaps).
// If refseq is not "" then masks unique characters if
// 1) they are different from the given reference sequence
// 2) or if the reference is a GAP
// maskreplace defines the replacing character. If maskreplace is "", then, masked characters
// are replaced by "N" or "X" depending on the alphabet. Orherwise:
// 1) if maskreplace is AMBIG: just like ""
// 2) if maskreplace is MAJ: Replacing character is most frequent character of the column
// 3) if maskreplace is GAP: Replacing character is a GAP
MaskUnique(refseq string, maskreplace string) error
// Masks mutations that appear less or equal than the given number of max occurences in their columns (not the gaps).
// If refseq is not "" then masks these characters if
// 1) they are different from the given reference sequence
// 2) or if the reference is a GAP
// maskreplace defines the replacing character. If maskreplace is "", then, masked characters
// are replaced by "N" or "X" depending on the alphabet. Orherwise:
// 1) if maskreplace is AMBIG: just like ""
// 2) if maskreplace is MAJ: Replacing character is most frequent character of the column
// 3) if maskreplace is GAP: Replacing character is a GAP
MaskOccurences(refseq string, maxOccurence int, maskreplace string) error
MaxCharStats(excludeGaps, excludeNs bool) (out []uint8, occur []int, total []int)
Mutate(rate float64) // Adds uniform substitutions in the alignment (~sequencing errors)
NbVariableSites() int // Nb of variable sites
// Number of Gaps in each sequence that are unique in their alignment site
NumGapsUniquePerSequence(countProfile *CountProfile) (numuniques []int, numnew []int, numboth []int, err error)
// returns the number of characters in each sequence that are unique in their alignment site (gaps or others)
// It does not take into account 'N' and '-' as unique mutations
NumMutationsUniquePerSequence(profile *CountProfile) (numuniques []int, numnew []int, nummuts []int, err error)
Pssm(log bool, pseudocount float64, normalization int) (pssm map[uint8][]float64, err error) // Normalization: PSSM_NORM_NONE, PSSM_NORM_UNIF, PSSM_NORM_DATA
Rarefy(nb int, counts map[string]int) (Alignment, error) // Take a new rarefied sample taking into accounts weights
RandSubAlign(length int, consecutive bool) (Alignment, error) // Extract a random subalignment with given length from this alignment
Recombine(rate float64, lenprop float64, swap bool) error
// converts coordinates on the given sequence to coordinates on the alignment
RefCoordinates(name string, refstart, refend int) (alistart, aliend int, err error)
// converts sites on the given sequence to coordinates on the alignment
RefSites(name string, sites []int) (refsites []int, err error)
// Overwrites the character at position "site" of the sequence "seqname" by "newchar"
ReplaceChar(seqname string, site int, newchar uint8) error
// Removes sites having >= cutoff gaps, returns the number of consecutive removed sites at start and end of alignment
RemoveGapSites(cutoff float64, ends bool) (first, last int, kept, removed []int)
// Removes sites having >= cutoff character, returns the number of consecutive removed sites at start and end of alignment
RemoveCharacterSites(c []uint8, cutoff float64, ends bool, ignoreCase, ignoreGaps, ignoreNs, reverse bool) (first, last int, kept, removed []int)
// Removes sites having >= cutoff of the main character at these sites, returns the number of consecutive removed sites at start and end of alignment
RemoveMajorityCharacterSites(cutoff float64, ends, ignoreGaps, ignoreNs bool) (first, last int, kept, removed []int)
// Replaces match characters (.) by their corresponding characters on the first sequence
ReplaceMatchChars()
Sample(nb int) (Alignment, error) // generate a sub sample of the sequences
ShuffleSites(rate float64, roguerate float64, randroguefirst bool) []string
SimulateRogue(prop float64, proplen float64) ([]string, []string) // add "rogue" sequences
SiteConservation(position int) (int, error) // If the site is conserved:
Split(part *PartitionSet) ([]Alignment, error) //Splits the alignment given the paritions in argument
SubAlign(start, length int) (Alignment, error) // Extract a subalignment from this alignment
SelectSites(sites []int) (Alignment, error) // Extract givens sites from the alignment
InverseCoordinates(start, length int) (invstarts, invlengths []int, err error)
InversePositions(sites []int) (invsites []int, err error)
// Swap will exchange sequences from one seq to another of the alignment
// if rate>=0 and rate<=1 then it takes rate/2 sequences and exhanges sequences
// with rate/2 other sequences, from a random position
// if pos >=0 and <=1, then take this position (relative to align length) instead of a random one
Swap(rate, pos float64) error
// TranslateByReference translates the alignment codon by codon using the given reference sequence as guide
// We traverse reference nt 3 by 3
// The reference codon may have gaps between nt ,
// ex 1:
// Ref: AC--GTACGT
// Seq: ACTTGTACGT
// In that case, the first ref codon is [0,1,4], corresponding to sequence ACTTG in seq
// ACTTG % 3 != 0 ==> Frameshift? => Replaced by X in the compared sequence.
// ex 2:
// Ref: AC---GTACGT
// Seq: ACTTTGTACGT
// ref codon: [0,1,5]
// seq : ACTTTG : Insertion - OK => Replaced by "T-" in ref and "TT" in seq
// ex 3:
// Ref: ACGTACGT
// Seq: A--TACGT
// ref codon: [0,1,2]
// seq : A--: Deletion: not ok : Frameshift? => Replaced by "T" in ref and "X" in comp
// ex 4:
// Ref: AC----GTACGT
// Seq: ACTT-TGTACGT
// ref codon: [0,1,6]
// seq : ACTTTG : Insertion - OK => Replaced by "T-" in ref and "TT" in seq
// ex 5:
// Ref: AC----GTACGT
// Seq: ACT--TGTACGT
// ref codon: [0,1,6]
// seq : ACTTTG : Insertion not OK : Frameshift? => Replaced by "T-" in ref and "XX" in seq
TranslateByReference(phase int, geneticcode int, refseq string) (err error)
Transpose() (Alignment, error) // Output sequences are made of sites and output sites are sequences
TrimSequences(trimsize int, fromStart bool) error
}
Alignment represents a set of aligned sequences (multiple Sequence Alignment)
func RandomAlignment ¶ added in v0.1.3
RandomAlignment generates a random alignment with a given alphabet length and number of sequences. Each character is randomly choosen in a uniform distribution.
type CountProfile ¶ added in v0.3.4
type CountProfile struct {
// contains filtered or unexported fields
}
CountProfile represents a simple view of an alignment and stores the number of occurences of each characters at each position of an alignment
func NewCountProfile ¶ added in v0.3.4
func NewCountProfile() (p *CountProfile)
NewCountProfile initializes a new Profile with nil attributes
func NewCountProfileFromAlignment ¶ added in v0.3.4
func NewCountProfileFromAlignment(al Alignment) (p *CountProfile)
NewCountProfileFromAlignment initializes a new CountProfile using an input alignment
func (*CountProfile) AppendCount ¶ added in v0.3.4
func (p *CountProfile) AppendCount(i, count int) (err error)
AppendCount appends a new site to the profile for the ith character, and associates count to it
func (*CountProfile) CheckLength ¶ added in v0.3.4
func (p *CountProfile) CheckLength(length int) bool
CheckLength returns true if the number of sites of the profile corresponds to the given length false otherwise.
func (*CountProfile) Count ¶ added in v0.3.4
func (p *CountProfile) Count(r uint8, site int) (count int, err error)
Count returns the number of occurences of the character r at the position site
func (*CountProfile) CountAt ¶ added in v0.3.4
func (p *CountProfile) CountAt(i, site int) (count int, err error)
CountAt returns the number of occurences of the ith character at the position site
func (*CountProfile) CountsAt ¶ added in v0.3.4
func (p *CountProfile) CountsAt(i int) (counts []int, err error)
CountsAt returns the counts for all sites, for the ith character (arbitrary order of character)
func (*CountProfile) NameAt ¶ added in v0.3.4
func (p *CountProfile) NameAt(i int) (name uint8, err error)
NameAt returns the name of ith character in the header
func (*CountProfile) NameIndex ¶ added in v0.3.4
func (p *CountProfile) NameIndex(r uint8) (index int, ok bool)
NameIndex returns the index of the given character in the header If the character does not exist, returns false
func (*CountProfile) NbCharacters ¶ added in v0.3.4
func (p *CountProfile) NbCharacters() (nb int)
NbCharacters returns the number of different characters in the profile
func (*CountProfile) Print ¶ added in v0.3.4
func (p *CountProfile) Print()
func (*CountProfile) SetHeader ¶ added in v0.3.4
func (p *CountProfile) SetHeader(header []uint8)
SetHeader sets the Header and initializes the count structure
type PairwiseAligner ¶ added in v0.3.0
type PairwiseAligner interface {
AlignEnds() (int, int)
AlignStarts() (int, int)
Seq1Ali() []uint8
Seq2Ali() []uint8
SetGapOpenScore(open float64)
SetGapExtendScore(extend float64)
SetScore(match, mismatch float64)
MaxScore() float64 // Maximum score of the alignment
NbMatches() int // Number of matches
NbMisMatches() int // Number of mismatches
NbGaps() int // Nuber of gaps
Length() int // Length of the alignment
Alignment() (Alignment, error)
AlignmentStr() string
}
type PartitionSet ¶ added in v0.3.2
type PartitionSet struct {
// contains filtered or unexported fields
}
func NewPartitionSet ¶ added in v0.3.2
func NewPartitionSet(alignmentLength int) (ps *PartitionSet)
func (*PartitionSet) AddRange ¶ added in v0.3.2
func (ps *PartitionSet) AddRange(partName, modelName string, start, end, modulo int) (err error)
func (*PartitionSet) AliLength ¶ added in v0.3.2
func (ps *PartitionSet) AliLength() int
returns the length of the alignment
func (*PartitionSet) CheckSites ¶ added in v0.3.2
func (ps *PartitionSet) CheckSites() (err error)
If not all sites are in a partition, returns an error
func (*PartitionSet) ModeleName ¶ added in v0.3.2
func (ps *PartitionSet) ModeleName(code int) string
Returns the name of the modele associated to the given index If the code does not exist, then returns ""
func (*PartitionSet) NPartitions ¶ added in v0.3.2
func (ps *PartitionSet) NPartitions() int
func (*PartitionSet) Partition ¶ added in v0.3.2
func (ps *PartitionSet) Partition(position int) int
Returns the partition code associated to the given position
If the position is outside the alignment, then returns -1
func (*PartitionSet) PartitionName ¶ added in v0.3.2
func (ps *PartitionSet) PartitionName(code int) string
Returns the name of the partition associated to the given index If the code does not exist, then returns ""
func (*PartitionSet) String ¶ added in v0.3.2
func (ps *PartitionSet) String() string
type PhasedSequence ¶ added in v0.3.0
type PhasedSequence struct {
Err error
Removed bool
Position int
// phased nt sequence
NtSeq Sequence
// phased nt sequence
// with first nt corresponding
// first position of aa codon
CodonSeq Sequence
// phased aa sequence
AaSeq Sequence
// Aligned sequences
// 1st: best found orf
// 2nd: sequence
Ali Alignment
}
type Phaser ¶ added in v0.3.0
type Phaser interface {
Phase(orfs, seqs SeqBag) (chan PhasedSequence, error)
SetLenCutoff(cutoff float64)
SetMatchCutoff(cutoff float64)
SetReverse(reverse bool)
SetCutEnd(cutend bool)
SetCpus(cpus int)
SetTranslate(translate bool, geneticcode int) (err error)
SetAlignScores(match, mismatch float64)
SetGapOpen(float64)
SetGapExtend(float64)
}
* If SetTranslate(true):
align all sequences to the given ORF and trims sequences to the start position If orf is nil, searches for the longest ORF (in 3 or 6 phases depending on reverse arg) in all sequences
To do so, Phase() will:
- Translate the given ORF in aminoacids;
- For each sequence of the dataset: translate it in the 3 phases (forward) if reverse is false or 6 phases (forward and reverse) if reverse is true, align it with the translated orf, and take the phase giving the best alignment; If no phase gives a good alignment (>lencutoff * orf length, >matchcutoff matches over the align length and starting at first position of the ORF), then the sequence is discarded;
- For each sequence, take the Start corresponding to the Start of the ORF, and remove nucleotides before;
- Return the trimmed nucleotidic sequences (phased), the corresponding amino-acid sequences (phasedaa) the positions of starts in the nucleotidic sequences, and the removed sequence names.
If cutend is true, then also remove the end of sequences that do not align with orf ¶
It does not modify the input object ¶
* If SetTranslate(false):
align all sequences to the given ORF and trims sequences to the start position, it does not take into account protein information
If orf is nil, searches for the longest ORF (in forward only or both strands depending on reverse arg) in all sequences ¶
To do so:
1. If alignment is bad (>lencutoff * orf length, >matchcutoff matches over the align length and starting at first position of the ORF), then the sequence is discarded; 3. For each sequence, take the Start corresponding to the Start of the ORF, and remove nucleotides before; 4. Return the trimmed nucleotidic sequences (phased), the positions of starts in the nucleotidic sequences, and the removed sequence names. If cutend is true, then also remove the end of sequences that do not align with orf It does not modify the input object
type SeqBag ¶ added in v0.3.0
type SeqBag interface {
AddSequence(name string, sequence string, comment string) error
AddSequenceChar(name string, sequence []uint8, comment string) error
AppendSeqIdentifier(identifier string, right bool)
Alphabet() int
SetAlphabet(int) error // Sets the alphabet
AlphabetStr() string
AlphabetCharacters() []uint8
AlphabetCharToIndex(c uint8) int // Returns index of the character (nt or aa) in the AlphabetCharacters() array
AutoAlphabet() // detects and sets alphabet automatically for all the sequences
DetectAlphabet() (alphabet int) // detects the compatible alphabets
CharStats() map[uint8]int64
UniqueCharacters() []uint8
CharStatsSeq(idx int) (map[uint8]int, error) // Computes frequency of characters for the given sequence
CleanNames(namemap map[string]string) // Clean sequence names (newick special char)
Clear() // Removes all sequences
CloneSeqBag() (seqs SeqBag, err error) // Clones the seqqbag
Deduplicate(nAsGap bool) (identical [][]string, err error) // Remove duplicate sequences (nAsGap is for considering N/X identical to gaps for sequence comparison)
FilterLength(minlength, maxlength int) error // Remove sequences whose length is <minlength or >maxlength
GetSequence(name string) (string, bool) // Get a sequence by names
GetSequenceById(ith int) (string, bool)
GetSequenceChar(name string) ([]uint8, bool)
GetSequenceCharById(ith int) ([]uint8, bool)
GetSequenceNameById(ith int) (string, bool)
GetSequenceByName(name string) (Sequence, bool)
GetSequenceIdByName(name string) (i int) // if the name does not exist, i < 0
SetSequenceChar(ithAlign, ithSite int, char uint8) error
// IgnoreIdentical sets the behavior when duplicate names are encountered while building the alignment
// If ignore is IGNORE_NONE: Does not ignore anything
// If ignore is IGNORE_NAME: Ignore sequences having the same name (keep the first one whatever their sequence)
// If ignore is IGNORE_SEQUENCE: Ignore sequences having the same name and the same sequence
// Otherwise, sets IGNORE_NONE
IgnoreIdentical(int)
SampleSeqBag(nb int) (SeqBag, error) // generate a sub sample of the sequences
Sequence(ith int) (Sequence, bool)
SequenceByName(name string) (Sequence, bool)
Identical(SeqBag) bool
Iterate(it func(name string, sequence string) bool)
IterateChar(it func(name string, sequence []uint8) bool)
IterateAll(it func(name string, sequence []uint8, comment string) bool)
Sequences() []Sequence
SequencesChan() chan Sequence
LongestORF(reverse bool) (orf Sequence, err error)
MaxNameLength() int // maximum sequence name length
NbSequences() int
RarefySeqBag(nb int, counts map[string]int) (SeqBag, error) // Take a new rarefied sample taking into accounts weights
// Removes sequences having >= cutoff gaps, returns number of removed sequences
RemoveGapSeqs(cutoff float64, ignoreNs bool) int
// Removes sequences having >= cutoff character, returns number of removed sequences
RemoveCharacterSeqs(c uint8, cutoff float64, ignoreCase, ignoreGaps, ignoreNs bool) int
Rename(namemap map[string]string)
RenameRegexp(regex, replace string, namemap map[string]string) error
Replace(old, new string, regex bool) error // Replaces old string with new string in sequences of the alignment
ReplaceStops(phase int, geneticode int) error // Replaces stop codons in the given phase using the given genetic code
ShuffleSequences() // Shuffle sequence order
String() string // Raw string representation (just write all sequences)
Translate(phase int, geneticcode int) (err error) // Translates nt sequence in aa
ToUpper() // replaces lower case characters by upper case characters
ToLower() // replaces upper case characters by lower case characters
ReverseComplement() (err error) // Reverse-complements the alignment
ReverseComplementSequences(name ...string) (err error) // Reverse-complements some sequences in the alignment
TrimNames(namemap map[string]string, size int) error
TrimNamesAuto(namemap map[string]string, curid *int) error
Sort() // Sorts the sequences by name
Unalign() SeqBag
}
SeqBag represents a set of unaligned sequences
type Sequence ¶
type Sequence interface {
Sequence() string
SequenceChar() []uint8
SameSequence([]uint8) bool
CharAt(int) uint8
Name() string
SetName(name string)
Comment() string
Length() int
LongestORF() (start, end int) // Detects the longest ORF in forward strand only
Reverse()
Complement() error // Returns an error if not nucleotide sequence
Translate(phase int, geneticcode int) (Sequence, error) // Translates the sequence using the given code
DetectAlphabet() int // Try to detect alphabet (nt or aa)
NumGaps() int // Number of Gaps
NumGapsOpenning() int // Number of Gaps opennin, it counts streches of gap only once
NumGapsFromStart() int // Number of Gaps from Start (until a non gap is encountered)
NumGapsFromEnd() int // Number of Gaps from End (until a non gap is encountered)
// returns the number of differences between the reference sequence and each sequence of the alignment
// If lengths are different, returns an error
// It does not take into account 'N' and '-' in sequences as mutations compared to ref
/// sequence (ref sequence can have a '-' or a 'N')
NumMutationsComparedToReferenceSequence(alphabet int, seq Sequence) (nummutations int, err error)
// returns the list of differences between the reference sequence and each sequence of the alignment
// Counts only non N sites in each sequences (may be a gap or a N in the reference sequence though)
// If a character is ambigous (IUPAC notation), then it is counted as a mutation only if it is incompatible with
// the reference character.
// if codon is true: the sequences are nucleotides and nucleotides are taken codon by codon of the reference sequence
// to list mutations.
// it translate is true (codon must be true also): Translate each codon. In case of insertion or a deletion in the target sequence: if %3==0: - or aa insert,
// otherwise "/" ~frameshift?
//
// If lengths are different, returns an error
ListMutationsComparedToReferenceSequence(alphabet int, refseq Sequence, codon bool, translate bool) (mutations []Mutation, err error)
Clone() Sequence
}