~ubuntu-branches/ubuntu/precise/exonerate/precise

« back to all changes in this revision

Viewing changes to src/hub/analysis.c

  • Committer: Bazaar Package Importer
  • Author(s): Charles Plessy, David Paleino, Charles Plessy
  • Date: 2008-07-02 11:24:48 UTC
  • mfrom: (1.1.1 upstream)
  • Revision ID: james.westby@ubuntu.com-20080702112448-2kphz1t3xwqyi9jv
Tags: 2.0.0-1
[ David Paleino ]
* Updated to Standards-Version 3.8.0 (no changes needed)

[ Charles Plessy ]
* New upstream release (Closes: #479323)
  Quoting the upstream changelog:
  o Modified exonerate to work in Client:Server mode
  o Fixed --refine to work with SDP alignments
  o Disabled codegen warnings from bootstrapper during compilation
  o Added --geneticcode option for using alternative genetic codes
  o Added --splice5 and --splice3 to allow alternative splice site PSSMs
  o Fixed several bugs when scanning query sequences (eg. --forcescan query)
  o Fixed to report query on forward strand whenever possible.
  o Fixed --ryo sequence dumping with --bestn
  o Fixed a bug with missing seeds when using --annotation
  o Changed license to GPLv3
  o Added protein2dna:bestfit and protein2genome:bestfit models
    (works with exhaustive alignment only)
* Updated my email address.
* debian/control:
  - Building on glib-2.0 instead of 1.2.
  - Added a build dependancy on libreadline5-dev.
  - Augmented the long description and changed the short.
  - Added a build dependency on CDBS.
* debian/copyright made machine-readable.
* debian/rules switched to cdbs.
* debian/dirs: removed.

Show diffs side-by-side

added added

removed removed

Lines of Context:
3
3
*  Analysis module for exonerate                                 *
4
4
*                                                                *
5
5
*  Guy St.C. Slater..   mailto:guy@ebi.ac.uk                     *
6
 
*  Copyright (C) 2000-2006.  All Rights Reserved.                *
 
6
*  Copyright (C) 2000-2008.  All Rights Reserved.                *
7
7
*                                                                *
8
8
*  This source code is distributed under the terms of the        *
9
 
*  GNU Lesser General Public License. See the file COPYING       *
10
 
*  or http://www.fsf.org/copyleft/lesser.html for details        *
 
9
*  GNU General Public License, version 3. See the file COPYING   *
 
10
*  or http://www.gnu.org/licenses/gpl.txt for details            *
11
11
*                                                                *
12
12
*  If you use this code, please keep this notice intact.         *
13
13
*                                                                *
16
16
#include "analysis.h"
17
17
#include "ungapped.h"
18
18
#include "compoundfile.h"
 
19
#include "hspset.h"
 
20
 
 
21
#include <stdlib.h> /* For atoi() */
 
22
#include <unistd.h> /* For sleep() */
 
23
#include <string.h> /* For strcmp() */
 
24
#include <ctype.h>  /* For isdigit() */
19
25
 
20
26
Analysis_ArgumentSet *Analysis_ArgumentSet_create(Argument *arg){
21
27
    register ArgumentSet *as;
35
41
        ArgumentSet_add_option(as, 0, "saturatethreshold", "int",
36
42
            "Word saturation threshold", "0",
37
43
            Argument_parse_int, &aas.saturate_threshold);
 
44
        /**/
 
45
        ArgumentSet_add_option(as, 0, "customserver", "command",
 
46
            "Custom command to send non-standard server", "NULL",
 
47
            Argument_parse_string, &aas.custom_server_command);
38
48
        Argument_absorb_ArgumentSet(arg, as);
39
49
        }
40
50
    return &aas;
47
57
    register Analysis *analysis = user_data;
48
58
    register GAM_Result *gam_result;
49
59
    g_assert(Comparison_has_hsps(comparison));
 
60
    if(analysis->scan_query){
 
61
        /* Swap back query and target after a query scan */
 
62
        Comparison_swap(comparison);
 
63
    } else {
 
64
        /* Revert target scan alignments to +ve query strand when possible */
 
65
        if((comparison->query->alphabet->type == Alphabet_Type_DNA)
 
66
        && (comparison->target->alphabet->type == Alphabet_Type_DNA)
 
67
        && (comparison->query->strand == Sequence_Strand_REVCOMP)
 
68
        && (comparison->target->strand == Sequence_Strand_FORWARD)
 
69
        && (!analysis->gam->translate_both))
 
70
            Comparison_revcomp(comparison);
 
71
        }
50
72
    if(Model_Type_is_gapped(analysis->gam->gas->type)){
51
73
        gam_result = GAM_Result_heuristic_create(analysis->gam,
52
74
                                                 comparison);
156
178
static void Analysis_FastaPipe_Seeder_init_func(gpointer user_data){
157
179
    register Analysis *analysis = user_data;
158
180
    g_assert(!analysis->curr_seeder);
 
181
    register Comparison_Param *comparison_param
 
182
           = Comparison_Param_share(analysis->comparison_param);
 
183
    if(analysis->scan_query)
 
184
        comparison_param = Comparison_Param_swap(comparison_param);
159
185
    analysis->curr_seeder = Seeder_create(analysis->verbosity,
160
 
            analysis->comparison_param,
161
 
            analysis->aas->saturate_threshold,
162
 
            analysis->scan_query,
 
186
            comparison_param, analysis->aas->saturate_threshold,
163
187
            Analysis_report_func, analysis);
 
188
    Comparison_Param_destroy(comparison_param);
164
189
    return;
165
190
    }
166
191
/* Called before query pipeline loading */
273
298
    return;
274
299
    }
275
300
 
 
301
/**/
 
302
 
 
303
static SocketClient *Analysis_Client_connect(gchar *path){
 
304
    register SocketClient *sc;
 
305
    register gint i, divider = 0, port;
 
306
    register gchar *server;
 
307
    register gint connection_attempts = 10;
 
308
    for(i = 0; path[i]; i++)
 
309
        if(path[i] == ':'){
 
310
            divider = i;
 
311
            break;
 
312
            }
 
313
    if(divider){
 
314
        port = atoi(path+divider+1);
 
315
        server = g_strndup(path, divider);
 
316
        for(i = connection_attempts; i >= 0; i--){
 
317
            sc = SocketClient_create(server, port);
 
318
            if(sc)
 
319
                break;
 
320
            g_warning("Failed connection to server (retrys left: %d", i);
 
321
            sleep(1);
 
322
            }
 
323
        g_free(server);
 
324
        return sc;
 
325
        }
 
326
    return NULL;
 
327
    }
 
328
 
 
329
static gchar *AnalysisClient_send(SocketClient *sc, gchar *msg,
 
330
                                  gchar *expect, gboolean multi_line_reply){
 
331
    register gchar *reply = SocketClient_send(sc, msg);
 
332
    register gchar *line, *p = reply, *processed_reply;
 
333
    register gint line_count = 0;
 
334
    register GString *str = g_string_sized_new(64);
 
335
    do {
 
336
        while(*p){
 
337
            if(!isspace(*p)) /* Strip blank lines */
 
338
                break;
 
339
            p++;
 
340
            }
 
341
        line = p;
 
342
        if(!strncmp(p, "error:", 6))
 
343
            g_error("Error from server: [%s]", p);
 
344
        if(!strncmp(p, "warning:", 8)){
 
345
            g_warning("Warning from server: [%s]", p);
 
346
        } else if(!strncmp(p, expect, strlen(expect))){
 
347
            while(*p){ /* Skip to end of line */
 
348
                if(*p == '\n')
 
349
                    break;
 
350
                p++;
 
351
                }
 
352
            line_count++;
 
353
            *p = '\0';
 
354
            g_string_append(str, line);
 
355
            g_string_append_c(str, '\n');
 
356
            *p = '\n';
 
357
        } else {
 
358
            if(!*p)
 
359
                break;
 
360
            g_error("Unexpected line from server [%s]", p);
 
361
            }
 
362
    } while(TRUE);
 
363
    g_free(reply);
 
364
    if(!line_count)
 
365
        g_error("No reply received from server msg=[%s]", msg);
 
366
    if((!multi_line_reply) && (line_count > 1))
 
367
        g_error("Unexpected multi-line reply from server");
 
368
    processed_reply = str->str;
 
369
    g_string_free(str, FALSE);
 
370
    return processed_reply;
 
371
    }
 
372
 
 
373
static void Analysis_Client_set_param(SocketClient *sc){
 
374
    register HSPset_ArgumentSet *has = HSPset_ArgumentSet_create(NULL);
 
375
    register Analysis_ArgumentSet *aas = Analysis_ArgumentSet_create(NULL);
 
376
    register gchar *msg, *reply;
 
377
    /**/
 
378
    if(aas->custom_server_command){
 
379
        msg = g_strdup_printf("%s\n", aas->custom_server_command);
 
380
        reply = AnalysisClient_send(sc, msg, "ok:", FALSE);
 
381
        g_free(msg);
 
382
        g_free(reply);
 
383
        }
 
384
    /**/
 
385
    msg = g_strdup_printf("set param seedrepeat %d", has->seed_repeat);
 
386
    reply = AnalysisClient_send(sc, msg, "ok:", FALSE);
 
387
    g_free(msg);
 
388
    g_free(reply);
 
389
    /**/
 
390
    msg = g_strdup_printf("set param dnahspthreshold %d", has->dna_hsp_threshold);
 
391
    reply = AnalysisClient_send(sc, msg, "ok:", FALSE);
 
392
    g_free(msg);
 
393
    g_free(reply);
 
394
    /**/
 
395
    msg = g_strdup_printf("set param proteinhspthreshold %d",
 
396
                          has->protein_hsp_threshold);
 
397
    reply = AnalysisClient_send(sc, msg, "ok:", FALSE);
 
398
    g_free(msg);
 
399
    g_free(reply);
 
400
    /**/
 
401
    msg = g_strdup_printf("set param codonhspthreshold %d",
 
402
                          has->codon_hsp_threshold);
 
403
    reply = AnalysisClient_send(sc, msg, "ok:", FALSE);
 
404
    g_free(msg);
 
405
    g_free(reply);
 
406
    /**/
 
407
    msg = g_strdup_printf("set param geneseedthreshold %d",
 
408
                          has->geneseed_threshold);
 
409
    reply = AnalysisClient_send(sc, msg, "ok:", FALSE);
 
410
    g_free(msg);
 
411
    g_free(reply);
 
412
    /**/
 
413
    msg = g_strdup_printf("set param geneseedrepeat %d",
 
414
                          has->geneseed_repeat);
 
415
    reply = AnalysisClient_send(sc, msg, "ok:", FALSE);
 
416
    g_free(msg);
 
417
    g_free(reply);
 
418
    return;
 
419
    }
 
420
 
 
421
static Analysis_Client *Analysis_Client_create(gchar *path){
 
422
    register Analysis_Client *aclient;
 
423
    register SocketClient *sc = Analysis_Client_connect(path);
 
424
    register Alphabet_Type alphabet_type;
 
425
    register gchar *dbinfo, **dbinfo_word;
 
426
    if(!sc)
 
427
        return NULL;
 
428
    aclient = g_new(Analysis_Client, 1);
 
429
    aclient->sc = sc;
 
430
    aclient->probe_fdb = NULL;
 
431
    dbinfo = AnalysisClient_send(sc, "dbinfo", "dbinfo:", FALSE);
 
432
    dbinfo_word = g_strsplit(dbinfo, " ", 8);
 
433
    /**/
 
434
    g_assert(dbinfo_word[0]);
 
435
    g_assert(dbinfo_word[1]);
 
436
    g_assert(dbinfo_word[2]);
 
437
    g_assert(dbinfo_word[3]);
 
438
    g_assert(dbinfo_word[4]);
 
439
    /**/
 
440
    alphabet_type = Alphabet_Type_UNKNOWN;
 
441
    if(!strcmp(dbinfo_word[1], "dna"))
 
442
        alphabet_type = Alphabet_Type_DNA;
 
443
    if(!strcmp(dbinfo_word[1], "protein"))
 
444
        alphabet_type = Alphabet_Type_PROTEIN;
 
445
    /**/
 
446
    aclient->is_masked = FALSE;
 
447
    if(!strcmp(dbinfo_word[2], "masked"))
 
448
        aclient->is_masked = TRUE;
 
449
    /**/
 
450
    aclient->server_alphabet = Alphabet_create(alphabet_type, aclient->is_masked);
 
451
    aclient->num_seqs      = atoll(dbinfo_word[3]);
 
452
    aclient->max_seq_len   = atoll(dbinfo_word[4]);
 
453
    aclient->total_seq_len = atoll(dbinfo_word[5]);
 
454
    /**/
 
455
    g_strfreev(dbinfo_word);
 
456
    g_free(dbinfo);
 
457
    aclient->curr_query = NULL;
 
458
    aclient->seq_cache = g_new0(Sequence*, aclient->num_seqs);
 
459
    Analysis_Client_set_param(sc);
 
460
    return aclient;
 
461
    }
 
462
 
 
463
static void Analysis_Client_destroy(Analysis_Client *aclient){
 
464
    register gint i;
 
465
    register Sequence *seq;
 
466
    if(aclient->curr_query)
 
467
        Sequence_destroy(aclient->curr_query);
 
468
    for(i = 0; i < aclient->num_seqs; i++){
 
469
        seq = aclient->seq_cache[i];
 
470
        if(seq)
 
471
            Sequence_destroy(seq);
 
472
        }
 
473
    g_free(aclient->seq_cache);
 
474
    if(aclient->probe_fdb)
 
475
        FastaDB_close(aclient->probe_fdb);
 
476
    SocketClient_destroy(aclient->sc);
 
477
    Alphabet_destroy(aclient->server_alphabet);
 
478
    g_free(aclient);
 
479
    return;
 
480
    }
 
481
 
 
482
static void Analysis_Client_set_probe_fdb(Analysis_Client *aclient,
 
483
                                          FastaDB *probe_fdb){
 
484
    g_assert(!aclient->probe_fdb);
 
485
    aclient->probe_fdb = FastaDB_share(probe_fdb);
 
486
    return;
 
487
    }
 
488
 
 
489
static void Analysis_Client_set_query(Analysis_Client *aclient, Sequence *seq){
 
490
    register gchar *seq_str = Sequence_get_str(seq);
 
491
    register gchar *msg = g_strdup_printf("set query %s", seq_str);
 
492
    register gchar *reply = AnalysisClient_send(aclient->sc, msg, "ok:", FALSE);
 
493
    register gchar **word;
 
494
    register gint len, checksum;
 
495
    if(strncmp(reply, "ok:", 3))
 
496
        g_error("Could not set query [%s] on server", seq->id);
 
497
    word = g_strsplit(reply+4, " ", 4);
 
498
    len = atoi(word[0]);
 
499
    checksum = atoi(word[1]);
 
500
    if(seq->len != len)
 
501
        g_error("Query length mismatch on server %d %d", seq->len, len);
 
502
    if(Sequence_checksum(seq) != checksum)
 
503
        g_error("Query checksum mismatch on server %d %d",
 
504
                Sequence_checksum(seq), checksum);
 
505
    g_strfreev(word);
 
506
    g_free(reply);
 
507
    g_free(msg);
 
508
    g_free(seq_str);
 
509
    if(aclient->curr_query)
 
510
        Sequence_destroy(aclient->curr_query);
 
511
    aclient->curr_query = Sequence_share(seq);
 
512
    return;
 
513
    }
 
514
 
 
515
static void Analysis_Client_revcomp_query(Analysis_Client *aclient){
 
516
    register gchar *reply = AnalysisClient_send(aclient->sc,
 
517
            "revcomp query", "ok: query strand revcomp", FALSE);
 
518
    register Sequence *curr_query;
 
519
    curr_query = aclient->curr_query;
 
520
    aclient->curr_query = Sequence_revcomp(aclient->curr_query);
 
521
    Sequence_destroy(curr_query);
 
522
    g_free(reply);
 
523
    return;
 
524
    }
 
525
 
 
526
static void Analysis_Client_revcomp_target(Analysis_Client *aclient){
 
527
    register gchar *reply = AnalysisClient_send(aclient->sc,
 
528
            "revcomp target", "ok: target strand", FALSE);
 
529
    g_free(reply);
 
530
    return;
 
531
    }
 
532
 
 
533
/**/
 
534
 
 
535
typedef struct {
 
536
    Analysis_Client *aclient;
 
537
               gint  target_id;
 
538
               gint  seq_len;
 
539
} Analysis_Client_Key;
 
540
 
 
541
static Analysis_Client_Key *Analysis_Client_Key_create(Analysis_Client *aclient,
 
542
                                                   gint target_id, gint seq_len){
 
543
    register Analysis_Client_Key *key = g_new(Analysis_Client_Key, 1);
 
544
    key->aclient = aclient;
 
545
    key->target_id = target_id;
 
546
    key->seq_len = seq_len;
 
547
    return key;
 
548
    }
 
549
 
 
550
static void Analysis_Client_Key_destroy(Analysis_Client_Key *key){
 
551
    g_free(key);
 
552
    return;
 
553
    }
 
554
 
 
555
static gpointer Analysis_Client_SparseCache_get_func(gint pos,
 
556
                                                     gpointer page_data,
 
557
                                                     gpointer user_data){
 
558
    return GINT_TO_POINTER((gint)((gchar*)page_data)[pos]);
 
559
    }
 
560
 
 
561
static SparseCache_Page *Analysis_Client_SparseCache_fill_func(gint start,
 
562
                                                     gpointer user_data){
 
563
    register Analysis_Client_Key *key = user_data;
 
564
    register SparseCache_Page *page = g_new(SparseCache_Page, 1);
 
565
    register gint len = MIN(SparseCache_PAGE_SIZE, key->seq_len-start),
 
566
                  page_len;
 
567
    register gchar *msg = g_strdup_printf("get subseq %d %d %d",
 
568
            key->target_id, start, len);
 
569
    register gchar *reply = AnalysisClient_send(key->aclient->sc, msg,
 
570
                                                "subseq:", FALSE);
 
571
    if(strncmp(reply, "subseq:", 7))
 
572
        g_error("Failed to get subseq for target (%d,%d,%d) [%s]",
 
573
                key->target_id, start, len, reply);
 
574
    page->get_func = Analysis_Client_SparseCache_get_func;
 
575
    page_len = strlen(reply+8)-1;
 
576
    page->data = g_strndup(reply+8, page_len);
 
577
    page->data_size = sizeof(gchar)*page_len;
 
578
    g_free(msg);
 
579
    g_free(reply);
 
580
    FastaDB_SparseCache_compress(page, page_len);
 
581
    return page;
 
582
    }
 
583
/* FIXME: move compression stuff to SeqPage in Sequence */
 
584
 
 
585
static void Analysis_Client_SparseCache_free_func(gpointer user_data){
 
586
    register Analysis_Client_Key *key = user_data;
 
587
    Analysis_Client_Key_destroy(key);
 
588
    return;
 
589
    }
 
590
 
 
591
static SparseCache *Analysis_Client_get_SparseCache(Analysis_Client *aclient,
 
592
                                                    gint sequence_id, gint len){
 
593
    register Analysis_Client_Key *key
 
594
        = Analysis_Client_Key_create(aclient, sequence_id, len);
 
595
    return SparseCache_create(len, Analysis_Client_SparseCache_fill_func, NULL,
 
596
                              Analysis_Client_SparseCache_free_func, key);
 
597
    }
 
598
 
 
599
static Sequence *Analysis_Client_get_Sequence(Analysis_Client *aclient,
 
600
                                              gint sequence_id,
 
601
                                              gboolean revcomp_target){
 
602
    register gchar *msg, *reply, *id, *def;
 
603
    register SparseCache *cache;
 
604
    register Sequence *seq = aclient->seq_cache[sequence_id];
 
605
    register gint len, checksum;
 
606
    register gchar **seqinfo_word;
 
607
    if(seq){
 
608
        if(revcomp_target)
 
609
            return Sequence_revcomp(seq);
 
610
        return Sequence_share(seq);
 
611
        }
 
612
    msg = g_strdup_printf("get info %d", sequence_id);
 
613
    reply = AnalysisClient_send(aclient->sc, msg, "seqinfo:", FALSE);
 
614
    /**/
 
615
    if(strncmp(reply, "seqinfo:", 8))
 
616
        g_error("Failed to set info for target [%d]", sequence_id);
 
617
    /* parse seqinfo for <len> <checksum> <id> and [<def>] */
 
618
    seqinfo_word = g_strsplit(reply+9, " ", 4);
 
619
    len = atoi(seqinfo_word[0]);
 
620
    checksum = atoi(seqinfo_word[1]);
 
621
    id = seqinfo_word[2];
 
622
    def = seqinfo_word[3];
 
623
    /* Strip any trailing newlines */
 
624
    if(id[strlen(id)-1] == '\n')
 
625
        id[strlen(id)-1] = '\0';
 
626
    if(def && (def[strlen(def)-1] == '\n'))
 
627
        def[strlen(def)-1] = '\0';
 
628
    /**/
 
629
    cache = Analysis_Client_get_SparseCache(aclient, sequence_id, len);
 
630
    seq = Sequence_create_extmem(id, def, len,
 
631
                       (aclient->server_alphabet->type == Alphabet_Type_DNA)
 
632
                       ?Sequence_Strand_FORWARD:Sequence_Strand_UNKNOWN,
 
633
                       aclient->server_alphabet, cache);
 
634
    g_assert(!aclient->seq_cache[sequence_id]);
 
635
    aclient->seq_cache[sequence_id] = seq;
 
636
    g_strfreev(seqinfo_word);
 
637
    SparseCache_destroy(cache);
 
638
    g_free(reply);
 
639
    g_free(msg);
 
640
    if(revcomp_target)
 
641
        return Sequence_revcomp(seq);
 
642
    return Sequence_share(seq);
 
643
    }
 
644
 
 
645
typedef enum {
 
646
   Analysis_Client_HSP_TOKEN_BEGIN_SET,
 
647
   Analysis_Client_HSP_TOKEN_END_SET,
 
648
   Analysis_Client_HSP_TOKEN_INT,
 
649
   Analysis_Client_HSP_TOKEN_FINISH
 
650
} Analysis_Client_HSP_TOKEN;
 
651
 
 
652
static Analysis_Client_HSP_TOKEN Analysis_Client_get_hsp_token(gchar *str,
 
653
                                                   gint *pos, gint *intval){
 
654
    register gint ch;
 
655
    gchar *endptr;
 
656
    g_assert(str);
 
657
    while((ch = str[(*pos)])){
 
658
        switch(ch){
 
659
            case 'h':
 
660
                if(strncmp(str+(*pos), "hspset:", 7))
 
661
                    g_error("Unexpected string in HSPset list");
 
662
                (*pos) += 7;
 
663
                if(strncmp(str+(*pos), " empty\n", 7)){
 
664
                    return Analysis_Client_HSP_TOKEN_BEGIN_SET;
 
665
                } else {
 
666
                    (*pos) += 7;
 
667
                    break;
 
668
                    }
 
669
            case ' ':
 
670
                (*pos)++;
 
671
                break;
 
672
            case '\n':
 
673
                (*pos)++;
 
674
                return Analysis_Client_HSP_TOKEN_END_SET;
 
675
            default:
 
676
                if(isdigit(ch)){
 
677
                    (*intval) = strtol(str+(*pos), &endptr, 10);
 
678
                    (*pos) = endptr-str;
 
679
                    return Analysis_Client_HSP_TOKEN_INT;
 
680
                } else {
 
681
                    g_error("Unexpected character [%d] in HSPset list", ch);
 
682
                    }
 
683
                break;
 
684
            }
 
685
        }
 
686
    return Analysis_Client_HSP_TOKEN_FINISH;
 
687
    }
 
688
 
 
689
static void Analysis_Client_get_hsp_sets(Analysis_Client *aclient,
 
690
                                         Analysis *analysis,
 
691
                                         gboolean swap_chains,
 
692
                                         gboolean revcomp_target){
 
693
    register gchar *reply = AnalysisClient_send(aclient->sc,
 
694
                                                "get hsps", "hspset:", TRUE);
 
695
    gint pos = 0, intval = 0;
 
696
    register gboolean ok = TRUE;
 
697
    register Analysis_Client_HSP_TOKEN token;
 
698
    register gint target_id = -1, query_pos = -1, target_pos = -1, length;
 
699
    register Comparison *comparison = NULL;
 
700
    register Sequence *target = NULL;
 
701
    register Match_Type match_type
 
702
           = Match_Type_find(aclient->curr_query->alphabet->type,
 
703
                             aclient->server_alphabet->type, FALSE);
 
704
    /* FIXME: use Match_Type_find with translate_both for codon alignments */
 
705
    register HSPset *hsp_set = NULL;
 
706
    do {
 
707
        token = Analysis_Client_get_hsp_token(reply, &pos, &intval);
 
708
        switch(token){
 
709
            case Analysis_Client_HSP_TOKEN_BEGIN_SET:
 
710
                break;
 
711
            case Analysis_Client_HSP_TOKEN_INT:
 
712
                if(target_id == -1){
 
713
                    target_id = intval;
 
714
                    target = Analysis_Client_get_Sequence(aclient,
 
715
                                               target_id, revcomp_target);
 
716
                    g_assert(!comparison);
 
717
                    g_assert(aclient->curr_query);
 
718
                    /* FIXME: temp : make work with other HSP types */
 
719
                    /* FIXME: should take necessary HSP params from server */
 
720
                    if(swap_chains)
 
721
                        comparison = Comparison_create(analysis->comparison_param,
 
722
                                                   target, aclient->curr_query);
 
723
                    else
 
724
                        comparison = Comparison_create(analysis->comparison_param,
 
725
                                                   aclient->curr_query, target);
 
726
                    /* FIXME: should ensure that the HSPset is created
 
727
                     * without a horizon
 
728
                     */
 
729
                    Sequence_destroy(target);
 
730
                    target = NULL;
 
731
                } else if(query_pos == -1){
 
732
                    query_pos = intval;
 
733
                } else if(target_pos == -1){
 
734
                    target_pos = intval;
 
735
                } else {
 
736
                    length = intval;
 
737
                    g_assert(comparison);
 
738
                    /*
 
739
                    g_message("adding one [%d,%d,%d]", query_pos, target_pos,
 
740
                            length);
 
741
                    */
 
742
                    /* FIXME: need fix to work with for other match types */
 
743
                    switch(match_type){
 
744
                        case Match_Type_DNA2DNA:
 
745
                            hsp_set = comparison->dna_hspset;
 
746
                            break;
 
747
                        case Match_Type_PROTEIN2PROTEIN:
 
748
                        case Match_Type_PROTEIN2DNA:
 
749
                        case Match_Type_DNA2PROTEIN:
 
750
                            hsp_set = comparison->protein_hspset;
 
751
                            break;
 
752
                        default:
 
753
                            g_error("Match_Type not supported [%s]",
 
754
                                    Match_Type_get_name(match_type));
 
755
                        }
 
756
                    g_assert(hsp_set);
 
757
                    if(swap_chains)
 
758
                        HSPset_add_known_hsp(hsp_set, target_pos, query_pos,
 
759
                                             length);
 
760
                    else
 
761
                        HSPset_add_known_hsp(hsp_set, query_pos, target_pos,
 
762
                                             length);
 
763
                    query_pos = target_pos = -1;
 
764
                    }
 
765
                break;
 
766
            case Analysis_Client_HSP_TOKEN_END_SET:
 
767
                /* FIXME: needs to work for other hsp_set types */
 
768
                Comparison_finalise(comparison);
 
769
                if(Comparison_has_hsps(comparison)){
 
770
#if 0
 
771
                    /* FIXME: move to use scan_query swap in report */
 
772
                    if(swap_chains)
 
773
                        Comparison_swap(comparison);
 
774
#endif /* 0 */
 
775
                    Analysis_report_func(comparison, analysis);
 
776
                    }
 
777
                Comparison_destroy(comparison);
 
778
                comparison = NULL;
 
779
                target_id = -1;
 
780
                break;
 
781
            case Analysis_Client_HSP_TOKEN_FINISH:
 
782
                ok = FALSE;
 
783
                break;
 
784
            }
 
785
    } while(ok);
 
786
    g_assert(target_id == -1);
 
787
    /* format: <HSPSET> <TARGETID> { <QSTART TSTART LEN> } */
 
788
    /* tokens <BEGIN_HSPSET> <INT> <ENDHSPSET> */
 
789
    g_free(reply);
 
790
    return;
 
791
    }
 
792
/* FIXME: only working for single hspset comparisons */
 
793
 
 
794
static void Analysis_Client_process_query(Analysis_Client *aclient,
 
795
                                          Analysis *analysis,
 
796
                                          Sequence *query,
 
797
                                          gboolean swap_chains,
 
798
                                          gboolean revcomp_target){
 
799
    Analysis_Client_set_query(aclient, query);
 
800
    Analysis_Client_get_hsp_sets(aclient, analysis, swap_chains, revcomp_target);
 
801
    /* Revcomp query if DNA */
 
802
    if(aclient->curr_query->alphabet->type == Alphabet_Type_DNA){
 
803
        Analysis_Client_revcomp_query(aclient);
 
804
        Analysis_Client_get_hsp_sets(aclient, analysis,
 
805
                                     swap_chains, revcomp_target);
 
806
        }
 
807
    return;
 
808
    }
 
809
 
 
810
static void Analysis_Client_process(Analysis_Client *aclient, Analysis *analysis,
 
811
                                    gboolean swap_chains){
 
812
    register FastaDB_Seq *fdbs;
 
813
    /* FIXME: need to check for appropriate database type */
 
814
    while((fdbs = FastaDB_next(aclient->probe_fdb, FastaDB_Mask_ALL))){
 
815
        Analysis_Client_process_query(aclient, analysis, fdbs->seq,
 
816
                                      swap_chains, FALSE);
 
817
        /* Revcomp target if protein vs DNA or translate_both */
 
818
        if(((aclient->curr_query->alphabet->type == Alphabet_Type_PROTEIN)
 
819
          && (aclient->server_alphabet->type == Alphabet_Type_DNA))
 
820
           || analysis->gam->translate_both){
 
821
            Analysis_Client_revcomp_target(aclient);
 
822
            Analysis_Client_process_query(aclient, analysis,
 
823
                                          fdbs->seq, swap_chains, TRUE);
 
824
            Analysis_Client_revcomp_target(aclient);
 
825
            }
 
826
        FastaDB_Seq_destroy(fdbs);
 
827
        }
 
828
    return;
 
829
    }
 
830
 
 
831
/**/
 
832
 
276
833
Analysis *Analysis_create(
277
834
              GPtrArray *query_path_list, Alphabet_Type query_type,
278
835
              gint query_chunk_id, gint query_chunk_total,
280
837
              gint target_chunk_id, gint target_chunk_total,
281
838
              gint verbosity){
282
839
    register Analysis *analysis = g_new0(Analysis, 1);
283
 
    register FastaDB *query_fdb, *target_fdb,
 
840
    register FastaDB *query_fdb = NULL, *target_fdb = NULL,
284
841
                     *seeder_query_fdb, *seeder_target_fdb;
285
842
    register Match *match;
286
843
    Match *dna_match = NULL, *protein_match = NULL,
295
852
    g_assert(target_path_list->len);
296
853
    analysis->aas = Analysis_ArgumentSet_create(NULL);
297
854
    analysis->verbosity = verbosity;
 
855
    /**/
 
856
    if(query_path_list->len == 1)
 
857
        analysis->query_ac
 
858
                = Analysis_Client_create((gchar*)query_path_list->pdata[0]);
 
859
    if(target_path_list->len == 1)
 
860
        analysis->target_ac
 
861
                = Analysis_Client_create((gchar*)target_path_list->pdata[0]);
 
862
    /**/
298
863
    if(query_type == Alphabet_Type_UNKNOWN){
299
 
        query_type = FastaDB_guess_type(
300
 
                          (gchar*)query_path_list->pdata[0]);
301
 
        if(verbosity > 1)
302
 
            g_message("Guessed query type [%s]",
303
 
                    Alphabet_Type_get_name(query_type));
 
864
        if(analysis->query_ac){
 
865
            query_type = analysis->query_ac->server_alphabet->type;
 
866
        } else {
 
867
            query_type = FastaDB_guess_type(
 
868
                              (gchar*)query_path_list->pdata[0]);
 
869
            if(verbosity > 1)
 
870
                g_message("Guessed query type [%s]",
 
871
                        Alphabet_Type_get_name(query_type));
 
872
            }
304
873
        }
305
874
    if(target_type == Alphabet_Type_UNKNOWN){
306
 
        target_type = FastaDB_guess_type(
307
 
                          (gchar*)target_path_list->pdata[0]);
308
 
        if(verbosity > 1)
309
 
            g_message("Guessed target type [%s]",
310
 
                    Alphabet_Type_get_name(target_type));
 
875
        if(analysis->target_ac){
 
876
            target_type = analysis->target_ac->server_alphabet->type;
 
877
        } else {
 
878
            target_type = FastaDB_guess_type(
 
879
                              (gchar*)target_path_list->pdata[0]);
 
880
            if(verbosity > 1)
 
881
                g_message("Guessed target type [%s]",
 
882
                        Alphabet_Type_get_name(target_type));
 
883
            }
311
884
        }
312
885
    g_assert((query_type == Alphabet_Type_DNA)
313
886
           ||(query_type == Alphabet_Type_PROTEIN));
331
904
    if(!match)
332
905
        match = codon_match;
333
906
    g_assert(match);
334
 
    query_fdb = FastaDB_open_list_with_limit(query_path_list,
335
 
            match->query->alphabet, query_chunk_id, query_chunk_total);
336
 
    target_fdb = FastaDB_open_list_with_limit(target_path_list,
337
 
            match->target->alphabet, target_chunk_id, target_chunk_total);
 
907
    if(!analysis->query_ac)
 
908
        query_fdb = FastaDB_open_list_with_limit(query_path_list,
 
909
                match->query->alphabet, query_chunk_id, query_chunk_total);
 
910
    if(!analysis->target_ac)
 
911
       target_fdb = FastaDB_open_list_with_limit(target_path_list,
 
912
               match->target->alphabet, target_chunk_id, target_chunk_total);
338
913
    if(analysis->aas->use_exhaustive){
 
914
        if(analysis->query_ac || analysis->target_ac) /* FIXME: ni */
 
915
            g_error("Exhaustive alignment against server not implemented");
339
916
        analysis->fasta_pipe = FastaPipe_create(
340
917
                                  query_fdb, target_fdb,
341
918
                                  Analysis_FastaPipe_Pair_init_func,
347
924
                                  analysis->gam->translate_both);
348
925
        analysis->curr_query = NULL;
349
926
    } else { /* Not exhaustive */
350
 
        Analysis_find_matches(analysis, &dna_match, &protein_match,
351
 
                                        &codon_match);
352
 
        /**/
353
 
        use_horizon = !analysis->aas->use_bigseq;
 
927
        use_horizon = (analysis->aas->use_bigseq
 
928
                    || analysis->query_ac
 
929
                    || analysis->target_ac)?FALSE:TRUE;
354
930
        dna_hsp_param = dna_match
355
931
                      ? HSP_Param_create(dna_match, use_horizon)
356
932
                      : NULL;
388
964
                    = analysis->gam->gas->threshold;
389
965
            }
390
966
        /* Don't need HSP horizon for bigseq comparison */
391
 
        if(analysis->aas->use_bigseq){
392
 
            analysis->bsam = BSAM_create(analysis->comparison_param,
393
 
                    analysis->aas->saturate_threshold,
394
 
                    verbosity);
395
 
            analysis->fasta_pipe = FastaPipe_create(
396
 
                                  query_fdb, target_fdb,
397
 
                                  Analysis_FastaPipe_Pair_init_func,
398
 
                                  Analysis_FastaPipe_Pair_prep_func,
399
 
                                  Analysis_FastaPipe_Pair_term_func,
400
 
                                  Analysis_FastaPipe_Pair_query_func,
401
 
                                  Analysis_FastaPipe_Pair_target_func,
402
 
                                  FastaDB_Mask_ALL,
403
 
                                  analysis->gam->translate_both);
404
 
            analysis->curr_query = NULL;
405
 
        } else { /* Use Seeder */
406
 
            analysis->scan_query = Analysis_decide_scan_query(query_fdb,
407
 
                                                    target_fdb,
408
 
                                             analysis->aas->force_scan);
409
 
            if(verbosity > 1)
410
 
                g_message("Applying FSM scan to [%s]",
411
 
                          analysis->scan_query?"query":"target");
412
 
            /* Swap paths and types
413
 
             * for query and target when scan on query
414
 
             */
415
 
            if(analysis->scan_query){
416
 
                seeder_query_fdb = target_fdb;
417
 
                seeder_target_fdb = query_fdb;
 
967
        if(analysis->query_ac || analysis->target_ac){
 
968
            if(analysis->query_ac && analysis->target_ac)
 
969
                g_error("Server vs server comparison not impelemented");
 
970
            analysis->fasta_pipe = NULL;
 
971
            if(analysis->query_ac){
 
972
                Analysis_Client_set_probe_fdb(analysis->query_ac, target_fdb);
418
973
            } else {
419
 
                seeder_query_fdb = query_fdb;
420
 
                seeder_target_fdb = target_fdb;
421
 
                }
422
 
            analysis->curr_seeder = NULL;
423
 
            analysis->fasta_pipe = FastaPipe_create(
424
 
                seeder_query_fdb, seeder_target_fdb,
425
 
                Analysis_FastaPipe_Seeder_init_func,
426
 
                Analysis_FastaPipe_Seeder_prep_func,
427
 
                Analysis_FastaPipe_Seeder_term_func,
428
 
                Analysis_FastaPipe_Seeder_query_func,
429
 
                Analysis_FastaPipe_Seeder_target_func,
430
 
                FastaDB_Mask_ALL, analysis->gam->translate_both);
 
974
                g_assert(analysis->target_ac);
 
975
                Analysis_Client_set_probe_fdb(analysis->target_ac, query_fdb);
 
976
                }
 
977
        } else {
 
978
            if(analysis->aas->use_bigseq){
 
979
                analysis->bsam = BSAM_create(analysis->comparison_param,
 
980
                        analysis->aas->saturate_threshold,
 
981
                        verbosity);
 
982
                analysis->fasta_pipe = FastaPipe_create(
 
983
                                      query_fdb, target_fdb,
 
984
                                      Analysis_FastaPipe_Pair_init_func,
 
985
                                      Analysis_FastaPipe_Pair_prep_func,
 
986
                                      Analysis_FastaPipe_Pair_term_func,
 
987
                                      Analysis_FastaPipe_Pair_query_func,
 
988
                                      Analysis_FastaPipe_Pair_target_func,
 
989
                                      FastaDB_Mask_ALL,
 
990
                                      analysis->gam->translate_both);
 
991
                analysis->curr_query = NULL;
 
992
            } else { /* Use Seeder */
 
993
                analysis->scan_query = Analysis_decide_scan_query(query_fdb,
 
994
                                                        target_fdb,
 
995
                                                 analysis->aas->force_scan);
 
996
                if(verbosity > 1)
 
997
                    g_message("Applying FSM scan to [%s]",
 
998
                              analysis->scan_query?"query":"target");
 
999
                /* Swap paths and types
 
1000
                 * for query and target when scan on query
 
1001
                 */
 
1002
                if(analysis->scan_query){
 
1003
                    seeder_query_fdb = target_fdb;
 
1004
                    seeder_target_fdb = query_fdb;
 
1005
                } else {
 
1006
                    seeder_query_fdb = query_fdb;
 
1007
                    seeder_target_fdb = target_fdb;
 
1008
                    }
 
1009
                analysis->curr_seeder = NULL;
 
1010
                analysis->fasta_pipe = FastaPipe_create(
 
1011
                    seeder_query_fdb, seeder_target_fdb,
 
1012
                    Analysis_FastaPipe_Seeder_init_func,
 
1013
                    Analysis_FastaPipe_Seeder_prep_func,
 
1014
                    Analysis_FastaPipe_Seeder_term_func,
 
1015
                    Analysis_FastaPipe_Seeder_query_func,
 
1016
                    Analysis_FastaPipe_Seeder_target_func,
 
1017
                    FastaDB_Mask_ALL, analysis->gam->translate_both);
 
1018
                }
431
1019
            }
432
1020
        }
433
 
    FastaDB_close(query_fdb);
434
 
    FastaDB_close(target_fdb);
 
1021
    if(query_fdb)
 
1022
        FastaDB_close(query_fdb);
 
1023
    if(target_fdb)
 
1024
        FastaDB_close(target_fdb);
 
1025
    /**/
435
1026
    return analysis;
436
1027
    }
437
1028
 
438
1029
void Analysis_destroy(Analysis *analysis){
439
 
    FastaPipe_destroy(analysis->fasta_pipe);
 
1030
    if(analysis->fasta_pipe)
 
1031
        FastaPipe_destroy(analysis->fasta_pipe);
440
1032
    if(analysis->curr_query)
441
1033
        FastaDB_Seq_destroy(analysis->curr_query);
442
1034
    if(analysis->curr_seeder)
445
1037
        BSAM_destroy(analysis->bsam);
446
1038
    if(analysis->comparison_param)
447
1039
        Comparison_Param_destroy(analysis->comparison_param);
 
1040
    if(analysis->query_ac)
 
1041
        Analysis_Client_destroy(analysis->query_ac);
 
1042
    if(analysis->target_ac)
 
1043
        Analysis_Client_destroy(analysis->target_ac);
448
1044
    GAM_destroy(analysis->gam);
449
1045
    g_free(analysis);
450
1046
    return;
451
1047
    }
452
1048
 
453
1049
void Analysis_process(Analysis *analysis){
454
 
    while(FastaPipe_process(analysis->fasta_pipe, analysis));
 
1050
    if(analysis->query_ac){
 
1051
        Analysis_Client_process(analysis->query_ac, analysis, TRUE);
 
1052
    } else if(analysis->target_ac){
 
1053
        Analysis_Client_process(analysis->target_ac, analysis, FALSE);
 
1054
    } else {
 
1055
        while(FastaPipe_process(analysis->fasta_pipe, analysis));
 
1056
        }
455
1057
    GAM_report(analysis->gam);
456
1058
    return;
457
1059
    }
458
1060
 
 
1061
/**/
 
1062