1
/*****************************************************************
2
* SQUID - a library of functions for biological sequence analysis
3
* Copyright (C) 1992-2002 Washington University School of Medicine
5
* This source code is freely distributed under the terms of the
6
* GNU General Public License. See the files COPYRIGHT and LICENSE
8
*****************************************************************/
14
* Database indexing (SSI format support)
15
* CVS $Id: ssi.h,v 1.7 2002/02/24 19:39:27 eddy Exp)
17
* See: ssi_format.tex in Docs/
25
#define SSI_MAXFILES 32767 /* 2^15-1 */
26
#define SSI_MAXKEYS 2147483647L /* 2^31-1 */
27
#define SSI_MAXRAM 200 /* allow 200MB indexes before external sort mode */
30
* Use the union to save space, since the two offset types are
31
* mutually exclusive, controlled by "mode"
34
char mode; /* GSI_OFFSET_I32, for example */
36
sqd_uint32 i32; /* an offset that fseek() can use */
37
sqd_uint64 i64; /* an offset that e.g. fseeko64() can use */
40
typedef struct ssioffset_s SSIOFFSET;
41
#define SSI_OFFSET_I32 0
42
#define SSI_OFFSET_I64 1
45
* xref: SSI API documentation in ssi-format.tex
48
FILE *fp; /* open SSI index file */
49
sqd_uint32 flags; /* optional behavior flags */
50
sqd_uint16 nfiles; /* number of files = 16 bit int */
51
sqd_uint32 nprimary; /* number of primary keys */
52
sqd_uint32 nsecondary; /* number of secondary keys */
53
sqd_uint32 flen; /* length of filenames (inc '\0') */
54
sqd_uint32 plen; /* length of primary keys (inc '\0') */
55
sqd_uint32 slen; /* length of secondary keys (inc '\0') */
56
sqd_uint32 frecsize; /* # bytes in a file record */
57
sqd_uint32 precsize; /* # bytes in a primary key record */
58
sqd_uint32 srecsize; /* # bytes in a secondary key record */
59
SSIOFFSET foffset; /* disk offset, start of file records */
60
SSIOFFSET poffset; /* disk offset, start of pri key recs */
61
SSIOFFSET soffset; /* disk offset, start of sec key recs */
63
char imode; /* mode for index file offsets, 32 v. 64 bit */
64
char smode; /* mode for sequence file offsets, 32 v. 64 bit */
68
char **filename; /* list of file names [0..nfiles-1] */
69
sqd_uint32 *fileformat; /* file formats */
70
sqd_uint32 *fileflags; /* optional per-file behavior flags */
71
sqd_uint32 *bpl; /* bytes per line in file */
72
sqd_uint32 *rpl; /* residues per line in file */
74
typedef struct ssifile_s SSIFILE;
76
/* optional per-index behavior flags in SSIFILE structure's flags:
78
#define SSI_USE64 1<<0 /* seq offsets are 64-bit */
79
#define SSI_USE64_INDEX 1<<1 /* index file offsets are 64-bit */
81
/* optional per-file behavior flags in fileflags
83
#define SSI_FAST_SUBSEQ 1<<0 /* can do subseq lookup in this file */
85
/* Structure: SSIINDEX
87
* Used when building up an index and writing it to disk
89
struct ssipkey_s { /* Primary key data: */
90
char *key; /* key name */
91
sqd_uint16 fnum; /* file number */
92
SSIOFFSET r_off; /* record offset */
93
SSIOFFSET d_off; /* data offset */
94
sqd_uint32 len; /* sequence length */
96
struct ssiskey_s { /* Secondary key data: */
97
char *key; /* secondary key name */
98
char *pkey; /* primary key name */
101
int smode; /* sequence mode: SSI_OFFSET_I32 or _I64 */
102
int imode; /* index mode: SSI_OFFSET_I32 or _I64 */
103
int external; /* TRUE if pkeys and skeys are on disk */
104
int max_ram; /* maximum RAM in MB before switching to external */
107
sqd_uint32 *fileformat;
110
sqd_uint32 flen; /* length of longest filename, inc '\0' */
113
struct ssipkey_s *pkeys;
114
sqd_uint32 plen; /* length of longest pkey, including '\0' */
116
char *ptmpfile; /* name of tmp file, for external sort mode */
117
FILE *ptmp; /* handle on open ptmpfile */
119
struct ssiskey_s *skeys;
120
sqd_uint32 slen; /* length of longest skey, including '\0' */
121
sqd_uint32 nsecondary;
122
char *stmpfile; /* name of tmp file, for external sort mode */
123
FILE *stmp; /* handle on open ptmpfile */
125
typedef struct ssiindex_s SSIINDEX;
127
/* These control malloc and realloc chunk sizes in the index
130
#define SSI_FILE_BLOCK 10
131
#define SSI_KEY_BLOCK 100
133
/* Error codes set by the API
136
#define SSI_ERR_NODATA 1 /* no data? an fread() failed */
137
#define SSI_ERR_NO_SUCH_KEY 2 /* that key's not in the index */
138
#define SSI_ERR_MALLOC 3
139
#define SSI_ERR_NOFILE 4 /* no such file? an fopen() failed */
140
#define SSI_ERR_BADMAGIC 5 /* magic number mismatch in GSIOpen() */
141
#define SSI_ERR_BADFORMAT 6 /* didn't read what I expected to fread() */
142
#define SSI_ERR_NO64BIT 7 /* needed 64-bit support and didn't have it */
143
#define SSI_ERR_SEEK_FAILED 8 /* an fseek() (or similar) failed */
144
#define SSI_ERR_TELL_FAILED 9 /* an ftell() (or similar) failed */
145
#define SSI_ERR_NO_SUBSEQS 10 /* fast subseq is disallowed */
146
#define SSI_ERR_RANGE 11 /* subseq requested is out of range */
147
#define SSI_ERR_BADARG 12 /* something wrong with a function argument */
148
#define SSI_ERR_TOOMANY_FILES 13 /* ran out of range for files in an index */
149
#define SSI_ERR_TOOMANY_KEYS 14 /* ran out of range for keys in an index */
150
#define SSI_ERR_FWRITE 15
151
#define SSI_ERR_EXTERNAL_SORT 16 /* external sort failed */
153
/* The SSI file reading API:
155
extern int SSIOpen(char *filename, SSIFILE **ret_sfp);
156
extern int SSIGetOffsetByName(SSIFILE *sfp, char *key, int *ret_fh,
157
SSIOFFSET *ret_offset);
158
extern int SSIGetOffsetByNumber(SSIFILE *sfp, int n, int *ret_fh,
159
SSIOFFSET *ret_offset);
160
extern int SSIGetSubseqOffset(SSIFILE *sfp, char *key, int requested_start,
161
int *ret_fh, SSIOFFSET *record_offset,
162
SSIOFFSET *data_offset, int *ret_actual_start);
163
extern int SSISetFilePosition(FILE *fp, SSIOFFSET *offset);
164
extern int SSIFileInfo(SSIFILE *sfp, int fh, char **ret_filename, int *ret_format);
165
extern void SSIClose(SSIFILE *sfp);
167
/* The SSI index file writing API:
169
extern int SSIRecommendMode(char *file);
170
extern SSIINDEX *SSICreateIndex(int mode);
171
extern int SSIGetFilePosition(FILE *fp, int mode, SSIOFFSET *ret_offset);
172
extern int SSIAddFileToIndex(SSIINDEX *g, char *filename, int fmt, int *ret_fh);
173
extern int SSISetFileForSubseq(SSIINDEX *g, int fh, int bpl, int rpl);
174
extern int SSIAddPrimaryKeyToIndex(SSIINDEX *g, char *key, int fh,
175
SSIOFFSET *r_off, SSIOFFSET *d_off,
177
extern int SSIAddSecondaryKeyToIndex(SSIINDEX *g, char *key, char *pkey);
178
extern int SSIWriteIndex(char *file, SSIINDEX *g);
179
extern void SSIFreeIndex(SSIINDEX *g);
181
/* The SSI misc. functions API:
183
extern char *SSIErrorString(int n);
185
/* The SSI debugging API:
187
extern void SSIForceExternalSort(SSIINDEX *g);
189
#endif /*SSIH_INCLUDED*/