Clustal Omega  1.2.4
Macros | Functions
seq.c File Reference
#include <assert.h>
#include "squid/squid.h"
#include <ctype.h>
#include "util.h"
#include "log.h"
#include "seq.h"
Include dependency graph for seq.c:

Macros

#define ALLOW_ONLY_PROTEIN   0
 

Functions

void AliStat (mseq_t *prMSeq, bool bSampling, bool bReportAll)
 Stripped down version of squid's alistat. More...
 
void ShuffleMSeq (mseq_t *mseq)
 Shuffle mseq order. More...
 
void SeqSwap (mseq_t *prMSeq, int i, int j)
 Swap two sequences in an mseq_t structure. More...
 
void DealignMSeq (mseq_t *mseq)
 Dealigns all sequences in mseq structure, updates the sequence length info and sets aligned to FALSE. More...
 
void LogSqInfo (SQINFO *sqinfo)
 debug output of sqinfo struct More...
 
const char * SeqTypeToStr (int iSeqType)
 convert int-encoded iSeqType to string More...
 
int ReadSequences (mseq_t *prMSeq, char *seqfile, int iSeqType, int iSeqFmt, bool bIsProfile, bool bDealignInputSeqs, int iMaxNumSeq, int iMaxSeqLen, char *pcHMMBatch)
 reads sequences from file More...
 
void NewMSeq (mseq_t **prMSeq)
 allocate and initialise new mseq_t More...
 
void CopyMSeq (mseq_t **prMSeqDest_p, mseq_t *prMSeqSrc)
 copies an mseq structure More...
 
int FindSeqName (char *seqname, mseq_t *mseq)
 
void FreeMSeq (mseq_t **mseq)
 Frees an mseq_t and it's members and zeros all members. More...
 
int WriteAlignment (mseq_t *mseq, const char *pcAlnOutfile, int outfmt, int iWrap, bool bResno)
 Write alignment to file. More...
 
void DealignSeq (char *seq)
 Removes all gap-characters from a sequence. More...
 
void SortMSeqByLength (mseq_t *prMSeq, const char cOrder)
 Sort sequences by length. More...
 
bool SeqsAreAligned (mseq_t *prMSeq, bool bIsProfile, bool bDealignInputSeqs)
 Checks if sequences in given mseq structure are aligned. By definition this is only true, if sequences are of the same length and at least one gap was found. More...
 
void AddSeq (mseq_t **prMSeqDest_p, char *pcSeqName, char *pcSeqRes)
 Creates a new sequence entry and appends it to an existing mseq structure. More...
 
void JoinMSeqs (mseq_t **prMSeqDest_p, mseq_t *prMSeqToAdd)
 Appends an mseq structure to an already existing one. filename will be left untouched. More...
 

Macro Definition Documentation

◆ ALLOW_ONLY_PROTEIN

#define ALLOW_ONLY_PROTEIN   0

Function Documentation

◆ AddSeq()

void AddSeq ( mseq_t **  prMSeqDest_p,
char *  pcSeqName,
char *  pcSeqRes 
)

Creates a new sequence entry and appends it to an existing mseq structure.

Parameters
[out]prMSeqDest_pAlready existing and initialised mseq structure
[in]pcSeqNamesequence name of the sequence to add
[in]pcSeqResthe actual sequence (residues) to add
Note
Don't forget to update the align and type flag if necessary!

FIXME allow adding of more features

◆ AliStat()

void AliStat ( mseq_t prMSeq,
bool  bSampling,
bool  bReportAll 
)

Stripped down version of squid's alistat.

Parameters
[in]prMSeqThe alignment to analyse
[in]bSamplingFor many sequences: samples from pool
[in]bReportAllReport identities for all sequence pairs

Don't have to worry about sequence case because our version of PairwiseIdentity is case insensitive

mseq to squid msa

FIXME code overlap with WriteAlignment. Make it a function and take code there (contains more comments) as template

◆ CopyMSeq()

void CopyMSeq ( mseq_t **  prMSeqDest_p,
mseq_t prMSeqSrc 
)

copies an mseq structure

Parameters
[out]prMSeqDest_pCopy of mseq structure
[in]prMSeqSrcSource mseq structure to copy
Note
caller has to free copy by calling FreeMSeq()

◆ DealignMSeq()

void DealignMSeq ( mseq_t mseq)

Dealigns all sequences in mseq structure, updates the sequence length info and sets aligned to FALSE.

Parameters
[out]mseqThe mseq structure to dealign

◆ DealignSeq()

void DealignSeq ( char *  seq)

Removes all gap-characters from a sequence.

Parameters
[out]seqSequence to dealign
Note
seq will not be reallocated

◆ FindSeqName()

int FindSeqName ( char *  seqname,
mseq_t mseq 
)
Parameters
[in]seqnameThe sequence name to search for
[in]mseqThe multiple sequence structure to search in
Returns
-1 on failure, sequence index of matching name otherwise
Warning
If sequence name happens to be used twice, only the first one will be reported back

◆ FreeMSeq()

void FreeMSeq ( mseq_t **  mseq)

Frees an mseq_t and it's members and zeros all members.

Parameters
[in]mseqmseq_to to free
Note
use in conjunction with NewMSeq()
See also
new_mseq

◆ JoinMSeqs()

void JoinMSeqs ( mseq_t **  prMSeqDest_p,
mseq_t prMSeqToAdd 
)

Appends an mseq structure to an already existing one. filename will be left untouched.

Parameters
[in]prMSeqDest_pMSeq structure to which to append to
[out]prMSeqToAddMSeq structure which is to append

◆ LogSqInfo()

void LogSqInfo ( SQINFO *  sqinfo)

debug output of sqinfo struct

Parameters
[in]sqinfoSquid's SQINFO struct for a certain seqeuence
Note
useful for debugging only

◆ NewMSeq()

void NewMSeq ( mseq_t **  prMSeq)

allocate and initialise new mseq_t

Parameters
[out]prMSeqnewly allocated and initialised mseq_t
Note
caller has to free by calling FreeMSeq()
See also
FreeMSeq

◆ ReadSequences()

int ReadSequences ( mseq_t prMSeq,
char *  seqfile,
int  iSeqType,
int  iSeqFmt,
bool  bIsProfile,
bool  bDealignInputSeqs,
int  iMaxNumSeq,
int  iMaxSeqLen,
char *  pcHMMBatch 
)

reads sequences from file

Parameters
[out]prMSeqMultiple sequence struct. Must be preallocated. FIXME: would make more sense to allocate it here.
[in]seqfileSequence file name. If '-' sequence will be read from stdin.
[in]iSeqTypeint-encoded sequence type. Set to SEQTYPE_UNKNOWN for autodetect (guessed from first sequence)
[in]iMaxNumSeqReturn an error, if more than iMaxNumSeq have been read
[in]iMaxSeqLenReturn an error, if a seq longer than iMaxSeqLen has been read
Returns
0 on success, -1 on error
Note
  • Depends heavily on squid
  • Sequence file format will be guessed
  • If supported by squid, gzipped files can be read as well.

◆ SeqsAreAligned()

bool SeqsAreAligned ( mseq_t prMSeq,
bool  bIsProfile,
bool  bDealignInputSeqs 
)

Checks if sequences in given mseq structure are aligned. By definition this is only true, if sequences are of the same length and at least one gap was found.

Parameters
[in]prMSeqSequences to check
Returns
TRUE if sequences are aligned, FALSE if not

◆ SeqSwap()

void SeqSwap ( mseq_t prMSeq,
int  i,
int  j 
)

Swap two sequences in an mseq_t structure.

Parameters
[out]prMSeqMultiple sequence struct
[in]iIndex of first sequence
[in]jIndex of seconds sequence

◆ SeqTypeToStr()

const char* SeqTypeToStr ( int  iSeqType)

convert int-encoded iSeqType to string

Parameters
[in]iSeqTypeint-encoded sequence type
Returns
character pointer describing the sequence type

◆ ShuffleMSeq()

void ShuffleMSeq ( mseq_t mseq)

Shuffle mseq order.

Parameters
[out]mseqmseq structure to shuffle

◆ SortMSeqByLength()

void SortMSeqByLength ( mseq_t prMSeq,
const char  cOrder 
)

Sort sequences by length.

Parameters
[out]prMSeqmseq to sort by length
[out]cOrderSorting order. 'd' for descending, 'a' for ascending.

◆ WriteAlignment()

int WriteAlignment ( mseq_t mseq,
const char *  pcAlnOutfile,
int  outfmt,
int  iWrap,
bool  bResno 
)

Write alignment to file.

Parameters
[in]mseqThe mseq_t struct containing the aligned sequences
[in]pcAlnOutfileThe name of the output file
[in]outfmtThe alignment output format (defined in squid.h)
[in]iWraplength of line for Clustal/Fasta format
Returns
Non-zero on error
Note
We create a temporary squid MSA struct in here because we never use it within clustal. We might be better of using the old clustal output routines instead.