1
/************************************************************
2
* HMMER - Biological sequence analysis with profile HMMs
3
* Copyright (C) 1992-2003 Washington University School of Medicine
6
* This source code is distributed under the terms of the
7
* GNU General Public License. See the files COPYING and LICENSE
9
************************************************************/
13
* Routines for storing, sorting, displaying high scoring hits
16
*****************************************************************************
20
* AllocTophits() - allocation
21
* FreeTophits() - free'ing
22
* RegisterHit() - put information about a hit in the list
23
* GetRankedHit() - recovers information about a hit
24
* FullSortTophits() - sorts the top H hits.
26
*****************************************************************************
27
* Brief example of use:
29
* struct tophit_s *yourhits; // list of hits
30
* struct fancyali_s *ali; // (optional structure) alignment of a hit
32
* yourhits = AllocTophits(200);
33
* (for every hit in a search) {
35
* ali = Trace2FancyAli(); // You provide a function/structure here
36
* if (score > threshold)
37
* RegisterHit(yourhits, ...)
40
* FullSortTophits(yourhits); // Sort hits by evalue
41
* for (i = 0; i < 100; i++) // Recover hits out in ranked order
43
* GetRankedHit(yourhits, i, ...);
44
* // Presumably you'd print here...
46
* FreeTophits(yourhits);
47
***************************************************************************
49
* Estimated storage per hit:
52
* name/acc/desc: 192 bytes
53
* alignment: 1000 bytes total = ~1200 bytes with alignment;
54
* = ~200 bytes without
55
* Designed for: 10^5 hits (20 MB) or 10^4 alignments (10 MB)
61
/* Function: AllocTophits()
63
* Purpose: Allocate a struct tophit_s, for maintaining
64
* a list of top-scoring hits in a database search.
66
* Args: lumpsize - allocation lumpsize
68
* Return: An allocated struct hit_s. Caller must free.
71
AllocTophits(int lumpsize)
73
struct tophit_s *hitlist;
75
hitlist = (struct tophit_s*)MallocOrDie (sizeof(struct tophit_s));
77
hitlist->unsrt = (struct hit_s*)MallocOrDie (lumpsize * sizeof(struct hit_s));
78
hitlist->alloc = lumpsize;
80
hitlist->lump = lumpsize;
84
GrowTophits(struct tophit_s *h)
86
h->unsrt = (struct hit_s*)ReallocOrDie(h->unsrt,(h->alloc + h->lump) * sizeof(struct hit_s));
90
FreeTophits(struct tophit_s *h)
93
for (pos = 0; pos < h->num; pos++)
95
if (h->unsrt[pos].ali != NULL) FreeFancyAli(h->unsrt[pos].ali);
96
if (h->unsrt[pos].name != NULL) free(h->unsrt[pos].name);
97
if (h->unsrt[pos].acc != NULL) free(h->unsrt[pos].acc);
98
if (h->unsrt[pos].desc != NULL) free(h->unsrt[pos].desc);
101
if (h->hit != NULL) free(h->hit);
108
struct fancyali_s *ali;
110
ali = (struct fancyali_s*)MallocOrDie(sizeof(struct fancyali_s));
111
ali->rfline = ali->csline = ali->model = ali->mline = ali->aseq = NULL;
112
ali->query = ali->target = NULL;
113
ali->sqfrom = ali->sqto = 0;
117
FreeFancyAli(struct fancyali_s *ali)
120
if (ali->rfline != NULL) free(ali->rfline);
121
if (ali->csline != NULL) free(ali->csline);
122
if (ali->model != NULL) free(ali->model);
123
if (ali->mline != NULL) free(ali->mline);
124
if (ali->aseq != NULL) free(ali->aseq);
125
if (ali->query != NULL) free(ali->query);
126
if (ali->target != NULL) free(ali->target);
131
/* Function: RegisterHit()
133
* Purpose: Add a new hit to a list of top hits.
135
* "ali", if provided, is a pointer to allocated memory
136
* for an alignment output structure.
137
* Management is turned over to the top hits structure.
138
* Caller should not free them; they will be free'd by
139
* the FreeTophits() call.
141
* In contrast, "name", "acc", and "desc" are copied, so caller
142
* is still responsible for these.
144
* Number of args is unwieldy.
146
* Args: h - active top hit list
147
* key - value to sort by: bigger is better
148
* pvalue - P-value of this hit
149
* score - score of this hit
150
* motherp - P-value of parent whole sequence
151
* mothersc - score of parent whole sequence
152
* name - name of target
153
* acc - accession of target (may be NULL)
154
* desc - description of target (may be NULL)
155
* sqfrom - 1..L pos in target seq of start
156
* sqto - 1..L pos; sqfrom > sqto if rev comp
157
* sqlen - length of sequence, L
158
* hmmfrom - 0..M+1 pos in HMM of start
159
* hmmto - 0..M+1 pos in HMM of end
160
* hmmlen - length of HMM, M
161
* domidx - number of this domain
162
* ndom - total # of domains in sequence
163
* ali - optional printable alignment info
166
* hitlist is modified and possibly reallocated internally.
169
RegisterHit(struct tophit_s *h, double key,
170
double pvalue, float score, double motherp, float mothersc,
171
char *name, char *acc, char *desc,
172
int sqfrom, int sqto, int sqlen,
173
int hmmfrom, int hmmto, int hmmlen,
174
int domidx, int ndom,
175
struct fancyali_s *ali)
177
/* Check to see if list is full and we must realloc.
179
if (h->num == h->alloc) GrowTophits(h);
181
h->unsrt[h->num].name = Strdup(name);
182
h->unsrt[h->num].acc = Strdup(acc);
183
h->unsrt[h->num].desc = Strdup(desc);
184
h->unsrt[h->num].sortkey = key;
185
h->unsrt[h->num].pvalue = pvalue;
186
h->unsrt[h->num].score = score;
187
h->unsrt[h->num].motherp = motherp;
188
h->unsrt[h->num].mothersc= mothersc;
189
h->unsrt[h->num].sqfrom = sqfrom;
190
h->unsrt[h->num].sqto = sqto;
191
h->unsrt[h->num].sqlen = sqlen;
192
h->unsrt[h->num].hmmfrom = hmmfrom;
193
h->unsrt[h->num].hmmto = hmmto;
194
h->unsrt[h->num].hmmlen = hmmlen;
195
h->unsrt[h->num].domidx = domidx;
196
h->unsrt[h->num].ndom = ndom;
197
h->unsrt[h->num].ali = ali;
202
/* Function: GetRankedHit()
203
* Date: SRE, Tue Oct 28 10:06:48 1997 [Newton Institute, Cambridge UK]
205
* Purpose: Recover the data from the i'th ranked hit.
206
* Any of the data ptrs may be passed as NULL for fields
207
* you don't want. hitlist must have been sorted first.
209
* name, acc, desc, and ali are returned as pointers, not copies;
213
GetRankedHit(struct tophit_s *h, int rank,
214
double *r_pvalue, float *r_score,
215
double *r_motherp, float *r_mothersc,
216
char **r_name, char **r_acc, char **r_desc,
217
int *r_sqfrom, int *r_sqto, int *r_sqlen,
218
int *r_hmmfrom, int *r_hmmto, int *r_hmmlen,
219
int *r_domidx, int *r_ndom,
220
struct fancyali_s **r_ali)
222
if (r_pvalue != NULL) *r_pvalue = h->hit[rank]->pvalue;
223
if (r_score != NULL) *r_score = h->hit[rank]->score;
224
if (r_motherp != NULL) *r_motherp = h->hit[rank]->motherp;
225
if (r_mothersc!= NULL) *r_mothersc= h->hit[rank]->mothersc;
226
if (r_name != NULL) *r_name = h->hit[rank]->name;
227
if (r_acc != NULL) *r_acc = h->hit[rank]->acc;
228
if (r_desc != NULL) *r_desc = h->hit[rank]->desc;
229
if (r_sqfrom != NULL) *r_sqfrom = h->hit[rank]->sqfrom;
230
if (r_sqto != NULL) *r_sqto = h->hit[rank]->sqto;
231
if (r_sqlen != NULL) *r_sqlen = h->hit[rank]->sqlen;
232
if (r_hmmfrom != NULL) *r_hmmfrom = h->hit[rank]->hmmfrom;
233
if (r_hmmto != NULL) *r_hmmto = h->hit[rank]->hmmto;
234
if (r_hmmlen != NULL) *r_hmmlen = h->hit[rank]->hmmlen;
235
if (r_domidx != NULL) *r_domidx = h->hit[rank]->domidx;
236
if (r_ndom != NULL) *r_ndom = h->hit[rank]->ndom;
237
if (r_ali != NULL) *r_ali = h->hit[rank]->ali;
240
/* Function: TophitsMaxName()
242
* Purpose: Returns the maximum name length in a top hits list;
243
* doesn't need to be sorted yet.
246
TophitsMaxName(struct tophit_s *h)
252
for (i = 0; i < h->num; i++)
254
len = strlen(h->unsrt[i].name);
255
if (len > maxlen) maxlen = len;
260
/* Function: FullSortTophits()
262
* Purpose: Completely sort the top hits list. Calls
263
* qsort() to do the sorting, and uses
264
* hit_comparison() to do the comparison.
266
* Args: h - top hits structure
269
hit_comparison(const void *vh1, const void *vh2)
271
/* don't ask. don't change. Don't Panic. */
272
struct hit_s *h1 = *((struct hit_s **) vh1);
273
struct hit_s *h2 = *((struct hit_s **) vh2);
275
if (h1->sortkey < h2->sortkey) return 1;
276
else if (h1->sortkey > h2->sortkey) return -1;
277
else if (h1->sortkey == h2->sortkey) return 0;
282
FullSortTophits(struct tophit_s *h)
286
/* If we don't have /any/ hits, then don't
289
if (h->num == 0) return;
291
/* Assign the ptrs in h->hit.
293
h->hit = (struct hit_s **)MallocOrDie(h->num * sizeof(struct hit_s *));
294
for (i = 0; i < h->num; i++)
295
h->hit[i] = &(h->unsrt[i]);
297
/* Sort the pointers. Don't bother if we've only got one.
300
qsort(h->hit, h->num, sizeof(struct hit_s *), hit_comparison);