29
29
* Version Creation Date: 10/01
33
33
* File Description: SeqAlign indexing, access, and manipulation functions
36
36
* --------------------------------------------------------------------------
37
37
* $Log: alignmgr2.h,v $
38
* Revision 6.21 2003/10/09 13:46:39 rsmith
39
* Add AlnMgr2GetFirstNForSipList.
41
* Revision 6.20 2003/04/23 20:37:06 rsmith
42
* Added four functions in section 11 to allow examination of Std-Seg alignments.
44
* Revision 6.19 2003/03/31 20:17:11 todorov
45
* Added AlnMgr2IndexSeqAlignEx
47
* Revision 6.18 2002/08/07 21:57:33 kans
48
* added AlignMgr2GetFirstNForStdSeg
50
* Revision 6.17 2002/07/11 14:35:51 kans
51
* fixed Mac complaints about prototypes
53
* Revision 6.16 2002/07/11 12:55:33 wheelan
54
* added support for std-seg alignments
56
* Revision 6.15 2002/05/21 12:26:25 wheelan
57
* added n5 field to AMSmallPtr
59
* Revision 6.14 2002/04/09 18:21:55 wheelan
60
* changed params for AlnMgr2IndexAsRows
62
* Revision 6.13 2002/03/04 17:19:29 wheelan
63
* added AlnMgr2FuseSet, changed behavior of RemoveInconsistent
65
* Revision 6.12 2002/01/30 19:12:20 wheelan
66
* added RemoveInconsistentAlnsFromSet, ExtractPairwiseSeqAlign, changed behavior of GetSubAlign, changed structures and behavior of GetNextAlnBit, added GetInterruptInfo
68
* Revision 6.11 2001/12/28 22:53:46 wheelan
69
* added AlnMgr2DupAlnAndIndexes, changed amaip struct
38
71
* Revision 6.10 2001/12/14 12:38:35 wheelan
39
72
* added functions for ddv
194
246
NLM_EXTERN Boolean LIBCALLBACK AMAlignIndex2Free2(VoidPtr index);
196
typedef struct am_adjacent {
199
} AMAdjac, PNTR AMAdjacPtr;
254
} AMInterrupt, PNTR AMInterruptPtr;
201
256
typedef struct am_msg2 {
202
257
/* fields filled in by calling function */
203
Int4 from_aln; /* from is in alignment coordinates */
204
Int4 to_aln; /* to is in alignment coordinates */
205
Int4 row_num; /* which row the function wants to retrieve */
258
Int4 from_aln; /* from is in alignment coordinates */
259
Int4 to_aln; /* to is in alignment coordinates */
260
Int4 row_num; /* which row the function wants to retrieve */
206
262
/* fields filled in by AlnMgr2GetNextAlnBit */
210
Uint1 type; /* AM_SEQ or AM_GAP */
211
AMAdjacPtr left_insert;
212
AMAdjacPtr right_insert;
213
AMAdjacPtr left_unaligned;
214
AMAdjacPtr right_unaligned;
266
Uint1 type; /* AM_SEQ or AM_GAP */
267
AMInterruptPtr left_interrupt;
268
AMInterruptPtr right_interrupt;
215
270
/* fields used internally */
218
273
} AlnMsg2, PNTR AlnMsg2Ptr;
220
275
NLM_EXTERN AlnMsg2Ptr AlnMsgNew2(void);
658
752
* 'from' to 'to' in the row coordinates specified, or if which_row is 0,
659
753
* 'from' and 'to' are assumed to be alignment coordinates. If 'to' is -1,
660
754
* the subalignment will go to the end of the specified row (or to the end
661
* of the whole alignment).
755
* of the whole alignment). If the alignment is discontinuous and fill_in
756
* is FALSE, the alignment will be returned as an SAS_DISC set, each piece
757
* represented by a single alignment. If the alignment is discontinuous and
758
* fill_in is TRUE, the unaligned regions will be added in to the alignment,
759
* with all gaps in all other rows. If the alignment is continuous, it
760
* doesn't matter whether fill_in is TRUE or FALSE. (SUBALIGN)
663
762
***************************************************************************/
664
NLM_EXTERN SeqAlignPtr AlnMgr2GetSubAlign(SeqAlignPtr sap, Int4 from, Int4 to, Int4 row);
763
NLM_EXTERN SeqAlignPtr AlnMgr2GetSubAlign(SeqAlignPtr sap, Int4 from, Int4 to, Int4 which_row, Boolean fill_in);
666
765
/***************************************************************************
763
862
***************************************************************************/
764
863
NLM_EXTERN SeqAlignPtr AlnMgr2PadConservatively(SeqAlignPtr sap);
865
/***************************************************************************
867
* AlnMgr2ExtractPairwiseSeqAlign takes an indexed alignment (parent or
868
* child, but must be fully indexed, not lite) and extracts a pairwise
869
* subalignment containing the two requested rows. The subalignment is
870
* unindexed and may have internal unaligned regions.
872
***************************************************************************/
873
NLM_EXTERN SeqAlignPtr AlnMgr2ExtractPairwiseSeqAlign(SeqAlignPtr sap, Int4 n1, Int4 n2);
875
/***************************************************************************
877
* AlnMgr2RemoveInconsistentAlnsFromSet takes an alignment that is
878
* indexed at least at the AM2_LITE level, and prunes the child
879
* alignments so that the remaining alignments form a consistent,
880
* nonoverlapping set. All alignments must have the same number of rows,
881
* and they must be the same rows (although not necessarily in the same
882
* order). The function uses a simple greedy algorithm to construct the
883
* nonoverlapping set, starting with the highest-scoring alignment.
884
* If fuzz is negative, the function creates the best nonoverlapping set
885
* by actually truncating alignments.
887
***************************************************************************/
888
NLM_EXTERN void AlnMgr2RemoveInconsistentAlnsFromSet(SeqAlignPtr sap_head, Int4 fuzz);
890
/***************************************************************************
892
* AlnMgr2FuseSet takes a set of alignments sharing all their rows and orders
893
* the alignments, then fuses together any adjacent alignments. If returnall
894
* is TRUE, all pieces are returned; if not, then only the largest piece is
895
* returned. This function will work best when called after
896
* AlnMgr2RemoveInconsistentAlnsFromSet(sap_head, -1).
898
***************************************************************************/
899
NLM_EXTERN SeqAlignPtr AlnMgr2FuseSet(SeqAlignPtr sap_head, Boolean returnall);
901
/* SECTION 11 -- functions for std-segs */
902
NLM_EXTERN Int4 AlignMgr2GetFirstNForStdSeg(SeqAlignPtr sap, SeqIdPtr sip);
903
NLM_EXTERN SeqIdPtr AlnMgr2GetNthSeqIdPtrStdSeg(SeqAlignPtr sap, Int4 n);
904
NLM_EXTERN void AlnMgr2GetNthSeqRangeInSAStdSeg(SeqAlignPtr sap, Int4 n, Int4Ptr start, Int4Ptr stop);
906
/***************************************************************************
908
* AlnMgr2GetSeqRangeForSipInSAStdSeg returns the smallest and largest sequence
909
* coordinates in in a Std-Seg seqalign for a given Sequence Id. Also return the
910
* strand type if it is the same on every segment, else set it to Seq_strand_unknown.
911
* Either start, stop or strand can be NULL to only retrieve some of them.
912
* If start and stop are -1, there is an error (not a std-seg), the SeqID does not participate in this
913
* alignment or the alignment is one big insert on that id. Returns true if the sip was found
914
* in the alignment with real coordinates, i.e. *start would not be -1. RANGE
916
***************************************************************************/
917
NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInSAStdSeg(SeqAlignPtr sap, SeqIdPtr sip, Int4Ptr start, Int4Ptr stop, Uint1Ptr strand);
919
/***************************************************************************
921
* AlnMgr2GetSeqRangeForSipInStdSeg returns the start and stop sequence
922
* coordinates in a Std-Segment for a given Sequence Id. Also return the
923
* strand type. Either start, stop, strand or segType can be NULL to only retrieve some of them.
924
* Returns false if the SeqID was not found in this segment, so no meaningful
925
* data was passed back in other arguments.
926
* Returns true if the sip was found, even if it is a gap (start, stop = -1).
927
* segType is set to AM_SEQ if the SeqID Sequence is not empty and one of
928
* the other sequences aligned with it is also not empty. To AM_GAP if
929
* the other sequences are all empty, and to AM_INSERT if the main sequence
933
***************************************************************************/
934
NLM_EXTERN Boolean AlnMgr2GetSeqRangeForSipInStdSeg(
940
Uint1Ptr segType); /* AM_SEQ, AM_GAP, AM_INSERT */
942
/***************************************************************************
944
* AlnMgr2GetNthStdSeg returns the a pointer to the Nth segment of
945
* a standard segment alignment.
946
* returns NULL if not n segments or is not a std-seg aligment.
947
* Useful to pass its return value to AlnMgr2GetSeqRangeForSipInStdSeg()
949
***************************************************************************/
950
NLM_EXTERN StdSegPtr AlnMgr2GetNthStdSeg(SeqAlignPtr sap, Int2 n);
952
/***************************************************************************
954
* AlnMgr2GetNumStdSegs returns the number of segments in a standar-seg alignment.
955
* returns -1 if sap is null or not a standard-seg alignment.
956
* the Std-Seg version of AlnMgr2GetNumSegs
958
***************************************************************************/
959
NLM_EXTERN Int4 AlnMgr2GetNumStdSegs(SeqAlignPtr sap);
961
/***************************************************************************
963
* The two mapping functions act a little differently for std-segs. The
964
* alignment coordinates are 1:1 linearly correlated with the longest
965
* seqloc in the set; the others may be significantly shorter.
966
* The mapping functions deal with % lengths, and map those instead of
967
* coordinates (which may not be linear);
969
***************************************************************************/
970
NLM_EXTERN Int4 AlnMgr2MapBioseqToSeqAlignStdSeg(SeqAlignPtr sap, Int4 n, Int4 pos);
971
NLM_EXTERN Int4 AlnMgr2MapSeqAlignToBioseqStdSeg(SeqAlignPtr sap, Int4 n, Int4 pos);
972
NLM_EXTERN Int4 AlnMgr2GetAlnLengthStdSeg(SeqAlignPtr sap);
766
974
/***************************************************************************/