1
/* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
3
/*********************************************************************
4
* Clustal Omega - Multiple sequence alignment
6
* Copyright (C) 2010 University College Dublin
8
* Clustal-Omega is free software; you can redistribute it and/or
9
* modify it under the terms of the GNU General Public License as
10
* published by the Free Software Foundation; either version 2 of the
11
* License, or (at your option) any later version.
13
* This file is part of Clustal-Omega.
15
********************************************************************/
18
* RCS $Id: symmatrix.c 230 2011-04-09 15:37:50Z andreas $
21
* Functions for symmetric (square) matrices including diagonal.
22
* supports the notion of non-square sub-matrices of a symmetric
23
* matrix, i.e. where |rows|<|cols|.
25
* FIXME Allocating one big chunk of memory is probably
26
* much faster and also easier to maintain.
38
#include "symmatrix.h"
45
#define MAX_BUF_SIZE 65536
48
* @brief Allocates symmat and its members and initialises them. Data
49
* will be calloced, i.e. initialised with zeros.
52
* newly allocated and initialised symmatrix instance
56
* ncols number of columns
58
* @return: non-zero on error
60
* @see FreeSymMatrix()
62
* @note: symmat data will be of fake shape nrows x ncols
66
NewSymMatrix(symmatrix_t **symmat, int nrows, int ncols)
70
assert(nrows>0 && ncols>0 && ncols>=nrows);
71
assert(ncols>0 && ncols>=nrows);
73
(*symmat) = (symmatrix_t *) malloc(1*sizeof(symmatrix_t));
74
if (NULL == (*symmat)) {
75
fprintf(stderr, "Couldn't allocate memory (%s|%s)\n",
76
__FILE__, __FUNCTION__);
80
(*symmat)->data = (double **) malloc(nrows * sizeof(double *));
81
if (NULL == (*symmat)->data) {
82
fprintf(stderr, "Couldn't allocate memory (%s|%s)\n",
83
__FILE__, __FUNCTION__);
88
for (i=0; i<nrows; i++) {
89
(*symmat)->data[i] = (double *) calloc((ncols-i), sizeof(double));
90
if (NULL == (*symmat)->data[i]) {
91
fprintf(stderr, "Couldn't allocate memory (%s|%s)\n",
92
__FILE__, __FUNCTION__);
94
free((*symmat)->data[i]);
96
free((*symmat)->data);
102
fprintf(stderr, "DEBUG(%s|%s():%d) initialising symmat with the number of the beast\n",
103
__FILE__, __FUNCTION__, __LINE__);
106
for (j=0; j<ncols-i; j++) {
107
(*symmat)->data[i][j] = -666.0;
114
(*symmat)->nrows = nrows;
115
(*symmat)->ncols = ncols;
119
/*** end: new_symmatrix ***/
123
* @brief Sets symmat data of given index to given value
126
* symmatrix_t whose data is to be set
132
* value used to set data point
134
* @see SymMatrixGetValue()
136
* @note This is a convenience function that checks index order.
140
SymMatrixSetValue(symmatrix_t *symmat, const int i, const int j, const double value)
142
assert(NULL != symmat);
145
assert(i < symmat->nrows && j < symmat->ncols);
146
symmat->data[i][j-i] = value;
148
assert(j < symmat->nrows && i < symmat->ncols);
149
symmat->data[j][i-j] = value;
152
/*** end: symmatrix_setvalue ***/
157
* @brief Returns element of symmat corresponding to given indices
160
* symmatrix_t of question
166
* @return requested value
168
* @see SymMatrixSetValue()
170
* @note This is a convenience function that checks index order.
173
SymMatrixGetValue(symmatrix_t *symmat, const int i, const int j)
175
assert(NULL != symmat);
178
assert(i < symmat->nrows && j < symmat->ncols);
179
return symmat->data[i][j-i];
181
assert(j < symmat->nrows && i < symmat->ncols);
182
return symmat->data[j][i-j];
185
/*** end: symmatrix_getvalue ***/
192
* @brief Returns a pointer to an element of symmat corresponding to
198
* symmatrix_t of question
204
* @return pointer to value
206
* @see SymMatrixGetValue()
208
* @note This is a convenience function that checks index order.
212
SymMatrixGetValueP(double **val, symmatrix_t *symmat,
213
const int i, const int j)
215
assert(NULL != symmat);
218
assert(i < symmat->nrows && j < symmat->ncols);
219
*val = &(symmat->data[i][j-i]);
221
assert(j < symmat->nrows && i < symmat->ncols);
222
*val = &(symmat->data[j][i-j]);
225
/*** end: symmatrix_getvaluep ***/
229
* @brief Frees memory allocated by data members of symmat and symmat
233
* symmatrix_t to be freed
235
* @note Use in conjunction with NewSymMatrix()
237
* @see NewSymMatrix()
241
FreeSymMatrix(symmatrix_t **symmat)
244
if (NULL != (*symmat)) {
245
if (NULL != (*symmat)->data) {
246
for (i=0; i<(*symmat)->nrows; i++) {
247
free((*symmat)->data[i]);
249
free((*symmat)->data);
255
/*** end: free_symmatrix ***/
260
* @brief Print out a symmat in phylip style. Distances are printed on
261
* one line per sequence/object. Since we also support matrices with
262
* rows\<cols, the first line can also be nrows by ncolumns instead
266
* the symmatrix_t to print
268
* sequence/objects labels/names. must be at least of length symmat nrows
270
* filename or NULL. If NULL stdout will be used.
274
SymMatrixPrint(symmatrix_t *symmat, char **labels, const char *path)
278
int max_label_len = 0;
280
if (NULL==symmat || NULL==labels) {
282
"One of the provided arguments is empty or NULL (print_matrix)\n");
288
if (NULL==(fp=fopen(path, "w"))) {
289
fprintf(stderr, "Couldn't open %s for writing.", path);
294
/* find maximum label length for pretty printing
296
for (i=0; i<symmat->nrows; i++) {
298
assert(NULL != labels[i]);
299
this_len = strlen(labels[i]);
300
if (this_len>max_label_len)
301
max_label_len = this_len;
304
if (symmat->ncols==symmat->nrows) {
305
fprintf(fp, "%u\n", symmat->ncols);
307
/* this is not standard phylip, but necessary to indicate
309
fprintf(fp, "%u x %u\n", symmat->nrows, symmat->ncols);
311
for (i=0; i<symmat->nrows; i++) {
312
/* phylip restriction is 10 characters. we don't care here
314
fprintf(fp, "%-*s", max_label_len, labels[i]);
315
/* we are lazy and do it like fastdist: write all distances
316
* for one seq to one line
318
for (j=0; j<symmat->ncols; j++) {
319
fprintf(fp, " %f", SymMatrixGetValue(symmat, i, j));
321
fprintf(fp, "%s", "\n");
329
/*** end: SymMatrixPrint ***/
335
* @brief Read a distance matrix in phylip format
337
* @param[in] pcFileIn
338
* distance matrix filename
339
* @param[out] prSymMat_p
340
* the symmatrix_t. will be allocated here.
345
* FIXME untested code
348
SymMatrixRead(char *pcFileIn, symmatrix_t **prSymMat_p)
352
/* number of parsed sequences */
354
/* number of parsed distances per sequence */
355
int iNParsedDists = 0;
356
/* total number of sequences/objects */
360
assert(NULL!=pcFileIn);
362
/* FIXME: test correctness and implement label checking */
363
fprintf(stderr, "WARNING: Reading of distance matrix from file not thoroughly tested!\n");
364
fprintf(stderr, "WARNING: Assuming same order of sequences in sequence file and distance matrix file (matching of labels not implemented)\n");
366
if (NULL == (buf = (char *) malloc(MAX_BUF_SIZE * sizeof(char)))) {
367
fprintf(stderr, "ERROR: couldn't allocate memory at %s:%s:%d\n",
368
__FILE__, __FUNCTION__, __LINE__);
372
if (NULL == (prFilePointer = fopen(pcFileIn, "r"))) {
373
fprintf(stderr, "ERROR: Couldn't open %s for reading\n", pcFileIn);
378
/* get number of sequences from first line and allocate memory for
382
if (NULL == fgets(buf, MAX_BUF_SIZE, prFilePointer) ) {
383
fprintf(stderr, "Couldn't read first line from %s\n", pcFileIn);
385
goto closefile_and_freebuf;
387
if (strlen(buf)==MAX_BUF_SIZE-1) {
388
fprintf(stderr, "%s\n", "Looks like I couldn't read complete line. Wrong format (or too small MAX_BUF_SIZE)");
390
goto closefile_and_freebuf;
392
if (sscanf(buf, "%d", &iTotalNSeq)!=1) {
393
fprintf(stderr, "ERROR: couldn't parse number of sequences from first line of %s\n", pcFileIn);
395
goto closefile_and_freebuf;
399
Log(&rLog, LOG_FORCED_DEBUG, "iTotalNSeq parsed from %s is %d\n", pcFileIn, iTotalNSeq);
402
if (NewSymMatrix(prSymMat_p, iTotalNSeq, iTotalNSeq)) {
403
fprintf(stderr, "FATAL %s", "Memory allocation for distance matrix failed");
405
goto closefile_and_freebuf;
409
/* parse file line by line
412
while (NULL != fgets(buf, MAX_BUF_SIZE, prFilePointer)) {
416
if (MAX_BUF_SIZE-1 == strlen(buf)) {
417
fprintf(stderr, "%s\n", "Looks like I couldn't read complete line. Wrong format (or too small MAX_BUF_SIZE)");
419
goto closefile_and_freebuf;
423
Log(&rLog, LOG_FORCED_DEBUG, "Got line: %s\n", buf);
426
/* new sequence label at beginning of line?
428
if (isblank(buf[0])) {
434
/* tokenise line and treat new sequence specially
436
szToken = strtok(buf, " \t");
441
/* if format is lower dimensional matrix then first
442
* sequence has no distances but might have newline
443
* character at it's end.
445
while (isspace(szToken[strlen(szToken)-1])) {
446
szToken[strlen(szToken)-1]='\0';
448
/* FIXME save label? */
449
szToken = strtok(NULL, " \t");
451
/* from here on it's only parsing of distances */
452
while (szToken != NULL) {
456
/* only parse what's needed */
457
if (iNParsedDists!=iNParsedSeq) {
458
/* parse and store distance
460
if (sscanf(szToken, "%lf", &dist)!=1) {
461
fprintf(stderr, "Couldn't parse float from entry '%s'\n", szToken);
463
goto closefile_and_freebuf;
466
Log(&rLog, LOG_FORCED_DEBUG, "Parsed distance %d for seq %d = %f\n", iNParsedDists-1, iNParsedSeq-1, dist);
468
SymMatrixSetValue(*prSymMat_p, iNParsedSeq-1, iNParsedDists-1, dist);
469
SymMatrixSetValue(*prSymMat_p, iNParsedDists-1, iNParsedSeq-1, dist);
471
szToken = strtok(NULL, " \t");
475
if (iTotalNSeq!=iNParsedSeq) {
476
fprintf(stderr, "expected %d seqs, but only parsed %d\n", iTotalNSeq, iNParsedSeq);
478
goto closefile_and_freebuf;
481
for (i=0; i<iNParsedSeq; i++) {
483
for (j=0; j<iNParsedSeq; j++) {
484
Log(&rLog, LOG_FORCED_DEBUG, "prSymMat_p[%d][%d]=%f\n", i, j, (*prSymMat_p)[i][j]);
489
closefile_and_freebuf:
490
fclose(prFilePointer);
495
/*** end: SymMatrixRead ***/