303
static SocketClient *Analysis_Client_connect(gchar *path){
304
register SocketClient *sc;
305
register gint i, divider = 0, port;
306
register gchar *server;
307
register gint connection_attempts = 10;
308
for(i = 0; path[i]; i++)
314
port = atoi(path+divider+1);
315
server = g_strndup(path, divider);
316
for(i = connection_attempts; i >= 0; i--){
317
sc = SocketClient_create(server, port);
320
g_warning("Failed connection to server (retrys left: %d", i);
329
static gchar *AnalysisClient_send(SocketClient *sc, gchar *msg,
330
gchar *expect, gboolean multi_line_reply){
331
register gchar *reply = SocketClient_send(sc, msg);
332
register gchar *line, *p = reply, *processed_reply;
333
register gint line_count = 0;
334
register GString *str = g_string_sized_new(64);
337
if(!isspace(*p)) /* Strip blank lines */
342
if(!strncmp(p, "error:", 6))
343
g_error("Error from server: [%s]", p);
344
if(!strncmp(p, "warning:", 8)){
345
g_warning("Warning from server: [%s]", p);
346
} else if(!strncmp(p, expect, strlen(expect))){
347
while(*p){ /* Skip to end of line */
354
g_string_append(str, line);
355
g_string_append_c(str, '\n');
360
g_error("Unexpected line from server [%s]", p);
365
g_error("No reply received from server msg=[%s]", msg);
366
if((!multi_line_reply) && (line_count > 1))
367
g_error("Unexpected multi-line reply from server");
368
processed_reply = str->str;
369
g_string_free(str, FALSE);
370
return processed_reply;
373
static void Analysis_Client_set_param(SocketClient *sc){
374
register HSPset_ArgumentSet *has = HSPset_ArgumentSet_create(NULL);
375
register Analysis_ArgumentSet *aas = Analysis_ArgumentSet_create(NULL);
376
register gchar *msg, *reply;
378
if(aas->custom_server_command){
379
msg = g_strdup_printf("%s\n", aas->custom_server_command);
380
reply = AnalysisClient_send(sc, msg, "ok:", FALSE);
385
msg = g_strdup_printf("set param seedrepeat %d", has->seed_repeat);
386
reply = AnalysisClient_send(sc, msg, "ok:", FALSE);
390
msg = g_strdup_printf("set param dnahspthreshold %d", has->dna_hsp_threshold);
391
reply = AnalysisClient_send(sc, msg, "ok:", FALSE);
395
msg = g_strdup_printf("set param proteinhspthreshold %d",
396
has->protein_hsp_threshold);
397
reply = AnalysisClient_send(sc, msg, "ok:", FALSE);
401
msg = g_strdup_printf("set param codonhspthreshold %d",
402
has->codon_hsp_threshold);
403
reply = AnalysisClient_send(sc, msg, "ok:", FALSE);
407
msg = g_strdup_printf("set param geneseedthreshold %d",
408
has->geneseed_threshold);
409
reply = AnalysisClient_send(sc, msg, "ok:", FALSE);
413
msg = g_strdup_printf("set param geneseedrepeat %d",
414
has->geneseed_repeat);
415
reply = AnalysisClient_send(sc, msg, "ok:", FALSE);
421
static Analysis_Client *Analysis_Client_create(gchar *path){
422
register Analysis_Client *aclient;
423
register SocketClient *sc = Analysis_Client_connect(path);
424
register Alphabet_Type alphabet_type;
425
register gchar *dbinfo, **dbinfo_word;
428
aclient = g_new(Analysis_Client, 1);
430
aclient->probe_fdb = NULL;
431
dbinfo = AnalysisClient_send(sc, "dbinfo", "dbinfo:", FALSE);
432
dbinfo_word = g_strsplit(dbinfo, " ", 8);
434
g_assert(dbinfo_word[0]);
435
g_assert(dbinfo_word[1]);
436
g_assert(dbinfo_word[2]);
437
g_assert(dbinfo_word[3]);
438
g_assert(dbinfo_word[4]);
440
alphabet_type = Alphabet_Type_UNKNOWN;
441
if(!strcmp(dbinfo_word[1], "dna"))
442
alphabet_type = Alphabet_Type_DNA;
443
if(!strcmp(dbinfo_word[1], "protein"))
444
alphabet_type = Alphabet_Type_PROTEIN;
446
aclient->is_masked = FALSE;
447
if(!strcmp(dbinfo_word[2], "masked"))
448
aclient->is_masked = TRUE;
450
aclient->server_alphabet = Alphabet_create(alphabet_type, aclient->is_masked);
451
aclient->num_seqs = atoll(dbinfo_word[3]);
452
aclient->max_seq_len = atoll(dbinfo_word[4]);
453
aclient->total_seq_len = atoll(dbinfo_word[5]);
455
g_strfreev(dbinfo_word);
457
aclient->curr_query = NULL;
458
aclient->seq_cache = g_new0(Sequence*, aclient->num_seqs);
459
Analysis_Client_set_param(sc);
463
static void Analysis_Client_destroy(Analysis_Client *aclient){
465
register Sequence *seq;
466
if(aclient->curr_query)
467
Sequence_destroy(aclient->curr_query);
468
for(i = 0; i < aclient->num_seqs; i++){
469
seq = aclient->seq_cache[i];
471
Sequence_destroy(seq);
473
g_free(aclient->seq_cache);
474
if(aclient->probe_fdb)
475
FastaDB_close(aclient->probe_fdb);
476
SocketClient_destroy(aclient->sc);
477
Alphabet_destroy(aclient->server_alphabet);
482
static void Analysis_Client_set_probe_fdb(Analysis_Client *aclient,
484
g_assert(!aclient->probe_fdb);
485
aclient->probe_fdb = FastaDB_share(probe_fdb);
489
static void Analysis_Client_set_query(Analysis_Client *aclient, Sequence *seq){
490
register gchar *seq_str = Sequence_get_str(seq);
491
register gchar *msg = g_strdup_printf("set query %s", seq_str);
492
register gchar *reply = AnalysisClient_send(aclient->sc, msg, "ok:", FALSE);
493
register gchar **word;
494
register gint len, checksum;
495
if(strncmp(reply, "ok:", 3))
496
g_error("Could not set query [%s] on server", seq->id);
497
word = g_strsplit(reply+4, " ", 4);
499
checksum = atoi(word[1]);
501
g_error("Query length mismatch on server %d %d", seq->len, len);
502
if(Sequence_checksum(seq) != checksum)
503
g_error("Query checksum mismatch on server %d %d",
504
Sequence_checksum(seq), checksum);
509
if(aclient->curr_query)
510
Sequence_destroy(aclient->curr_query);
511
aclient->curr_query = Sequence_share(seq);
515
static void Analysis_Client_revcomp_query(Analysis_Client *aclient){
516
register gchar *reply = AnalysisClient_send(aclient->sc,
517
"revcomp query", "ok: query strand revcomp", FALSE);
518
register Sequence *curr_query;
519
curr_query = aclient->curr_query;
520
aclient->curr_query = Sequence_revcomp(aclient->curr_query);
521
Sequence_destroy(curr_query);
526
static void Analysis_Client_revcomp_target(Analysis_Client *aclient){
527
register gchar *reply = AnalysisClient_send(aclient->sc,
528
"revcomp target", "ok: target strand", FALSE);
536
Analysis_Client *aclient;
539
} Analysis_Client_Key;
541
static Analysis_Client_Key *Analysis_Client_Key_create(Analysis_Client *aclient,
542
gint target_id, gint seq_len){
543
register Analysis_Client_Key *key = g_new(Analysis_Client_Key, 1);
544
key->aclient = aclient;
545
key->target_id = target_id;
546
key->seq_len = seq_len;
550
static void Analysis_Client_Key_destroy(Analysis_Client_Key *key){
555
static gpointer Analysis_Client_SparseCache_get_func(gint pos,
558
return GINT_TO_POINTER((gint)((gchar*)page_data)[pos]);
561
static SparseCache_Page *Analysis_Client_SparseCache_fill_func(gint start,
563
register Analysis_Client_Key *key = user_data;
564
register SparseCache_Page *page = g_new(SparseCache_Page, 1);
565
register gint len = MIN(SparseCache_PAGE_SIZE, key->seq_len-start),
567
register gchar *msg = g_strdup_printf("get subseq %d %d %d",
568
key->target_id, start, len);
569
register gchar *reply = AnalysisClient_send(key->aclient->sc, msg,
571
if(strncmp(reply, "subseq:", 7))
572
g_error("Failed to get subseq for target (%d,%d,%d) [%s]",
573
key->target_id, start, len, reply);
574
page->get_func = Analysis_Client_SparseCache_get_func;
575
page_len = strlen(reply+8)-1;
576
page->data = g_strndup(reply+8, page_len);
577
page->data_size = sizeof(gchar)*page_len;
580
FastaDB_SparseCache_compress(page, page_len);
583
/* FIXME: move compression stuff to SeqPage in Sequence */
585
static void Analysis_Client_SparseCache_free_func(gpointer user_data){
586
register Analysis_Client_Key *key = user_data;
587
Analysis_Client_Key_destroy(key);
591
static SparseCache *Analysis_Client_get_SparseCache(Analysis_Client *aclient,
592
gint sequence_id, gint len){
593
register Analysis_Client_Key *key
594
= Analysis_Client_Key_create(aclient, sequence_id, len);
595
return SparseCache_create(len, Analysis_Client_SparseCache_fill_func, NULL,
596
Analysis_Client_SparseCache_free_func, key);
599
static Sequence *Analysis_Client_get_Sequence(Analysis_Client *aclient,
601
gboolean revcomp_target){
602
register gchar *msg, *reply, *id, *def;
603
register SparseCache *cache;
604
register Sequence *seq = aclient->seq_cache[sequence_id];
605
register gint len, checksum;
606
register gchar **seqinfo_word;
609
return Sequence_revcomp(seq);
610
return Sequence_share(seq);
612
msg = g_strdup_printf("get info %d", sequence_id);
613
reply = AnalysisClient_send(aclient->sc, msg, "seqinfo:", FALSE);
615
if(strncmp(reply, "seqinfo:", 8))
616
g_error("Failed to set info for target [%d]", sequence_id);
617
/* parse seqinfo for <len> <checksum> <id> and [<def>] */
618
seqinfo_word = g_strsplit(reply+9, " ", 4);
619
len = atoi(seqinfo_word[0]);
620
checksum = atoi(seqinfo_word[1]);
621
id = seqinfo_word[2];
622
def = seqinfo_word[3];
623
/* Strip any trailing newlines */
624
if(id[strlen(id)-1] == '\n')
625
id[strlen(id)-1] = '\0';
626
if(def && (def[strlen(def)-1] == '\n'))
627
def[strlen(def)-1] = '\0';
629
cache = Analysis_Client_get_SparseCache(aclient, sequence_id, len);
630
seq = Sequence_create_extmem(id, def, len,
631
(aclient->server_alphabet->type == Alphabet_Type_DNA)
632
?Sequence_Strand_FORWARD:Sequence_Strand_UNKNOWN,
633
aclient->server_alphabet, cache);
634
g_assert(!aclient->seq_cache[sequence_id]);
635
aclient->seq_cache[sequence_id] = seq;
636
g_strfreev(seqinfo_word);
637
SparseCache_destroy(cache);
641
return Sequence_revcomp(seq);
642
return Sequence_share(seq);
646
Analysis_Client_HSP_TOKEN_BEGIN_SET,
647
Analysis_Client_HSP_TOKEN_END_SET,
648
Analysis_Client_HSP_TOKEN_INT,
649
Analysis_Client_HSP_TOKEN_FINISH
650
} Analysis_Client_HSP_TOKEN;
652
static Analysis_Client_HSP_TOKEN Analysis_Client_get_hsp_token(gchar *str,
653
gint *pos, gint *intval){
657
while((ch = str[(*pos)])){
660
if(strncmp(str+(*pos), "hspset:", 7))
661
g_error("Unexpected string in HSPset list");
663
if(strncmp(str+(*pos), " empty\n", 7)){
664
return Analysis_Client_HSP_TOKEN_BEGIN_SET;
674
return Analysis_Client_HSP_TOKEN_END_SET;
677
(*intval) = strtol(str+(*pos), &endptr, 10);
679
return Analysis_Client_HSP_TOKEN_INT;
681
g_error("Unexpected character [%d] in HSPset list", ch);
686
return Analysis_Client_HSP_TOKEN_FINISH;
689
static void Analysis_Client_get_hsp_sets(Analysis_Client *aclient,
691
gboolean swap_chains,
692
gboolean revcomp_target){
693
register gchar *reply = AnalysisClient_send(aclient->sc,
694
"get hsps", "hspset:", TRUE);
695
gint pos = 0, intval = 0;
696
register gboolean ok = TRUE;
697
register Analysis_Client_HSP_TOKEN token;
698
register gint target_id = -1, query_pos = -1, target_pos = -1, length;
699
register Comparison *comparison = NULL;
700
register Sequence *target = NULL;
701
register Match_Type match_type
702
= Match_Type_find(aclient->curr_query->alphabet->type,
703
aclient->server_alphabet->type, FALSE);
704
/* FIXME: use Match_Type_find with translate_both for codon alignments */
705
register HSPset *hsp_set = NULL;
707
token = Analysis_Client_get_hsp_token(reply, &pos, &intval);
709
case Analysis_Client_HSP_TOKEN_BEGIN_SET:
711
case Analysis_Client_HSP_TOKEN_INT:
714
target = Analysis_Client_get_Sequence(aclient,
715
target_id, revcomp_target);
716
g_assert(!comparison);
717
g_assert(aclient->curr_query);
718
/* FIXME: temp : make work with other HSP types */
719
/* FIXME: should take necessary HSP params from server */
721
comparison = Comparison_create(analysis->comparison_param,
722
target, aclient->curr_query);
724
comparison = Comparison_create(analysis->comparison_param,
725
aclient->curr_query, target);
726
/* FIXME: should ensure that the HSPset is created
729
Sequence_destroy(target);
731
} else if(query_pos == -1){
733
} else if(target_pos == -1){
737
g_assert(comparison);
739
g_message("adding one [%d,%d,%d]", query_pos, target_pos,
742
/* FIXME: need fix to work with for other match types */
744
case Match_Type_DNA2DNA:
745
hsp_set = comparison->dna_hspset;
747
case Match_Type_PROTEIN2PROTEIN:
748
case Match_Type_PROTEIN2DNA:
749
case Match_Type_DNA2PROTEIN:
750
hsp_set = comparison->protein_hspset;
753
g_error("Match_Type not supported [%s]",
754
Match_Type_get_name(match_type));
758
HSPset_add_known_hsp(hsp_set, target_pos, query_pos,
761
HSPset_add_known_hsp(hsp_set, query_pos, target_pos,
763
query_pos = target_pos = -1;
766
case Analysis_Client_HSP_TOKEN_END_SET:
767
/* FIXME: needs to work for other hsp_set types */
768
Comparison_finalise(comparison);
769
if(Comparison_has_hsps(comparison)){
771
/* FIXME: move to use scan_query swap in report */
773
Comparison_swap(comparison);
775
Analysis_report_func(comparison, analysis);
777
Comparison_destroy(comparison);
781
case Analysis_Client_HSP_TOKEN_FINISH:
786
g_assert(target_id == -1);
787
/* format: <HSPSET> <TARGETID> { <QSTART TSTART LEN> } */
788
/* tokens <BEGIN_HSPSET> <INT> <ENDHSPSET> */
792
/* FIXME: only working for single hspset comparisons */
794
static void Analysis_Client_process_query(Analysis_Client *aclient,
797
gboolean swap_chains,
798
gboolean revcomp_target){
799
Analysis_Client_set_query(aclient, query);
800
Analysis_Client_get_hsp_sets(aclient, analysis, swap_chains, revcomp_target);
801
/* Revcomp query if DNA */
802
if(aclient->curr_query->alphabet->type == Alphabet_Type_DNA){
803
Analysis_Client_revcomp_query(aclient);
804
Analysis_Client_get_hsp_sets(aclient, analysis,
805
swap_chains, revcomp_target);
810
static void Analysis_Client_process(Analysis_Client *aclient, Analysis *analysis,
811
gboolean swap_chains){
812
register FastaDB_Seq *fdbs;
813
/* FIXME: need to check for appropriate database type */
814
while((fdbs = FastaDB_next(aclient->probe_fdb, FastaDB_Mask_ALL))){
815
Analysis_Client_process_query(aclient, analysis, fdbs->seq,
817
/* Revcomp target if protein vs DNA or translate_both */
818
if(((aclient->curr_query->alphabet->type == Alphabet_Type_PROTEIN)
819
&& (aclient->server_alphabet->type == Alphabet_Type_DNA))
820
|| analysis->gam->translate_both){
821
Analysis_Client_revcomp_target(aclient);
822
Analysis_Client_process_query(aclient, analysis,
823
fdbs->seq, swap_chains, TRUE);
824
Analysis_Client_revcomp_target(aclient);
826
FastaDB_Seq_destroy(fdbs);
276
833
Analysis *Analysis_create(
277
834
GPtrArray *query_path_list, Alphabet_Type query_type,
278
835
gint query_chunk_id, gint query_chunk_total,
388
964
= analysis->gam->gas->threshold;
390
966
/* Don't need HSP horizon for bigseq comparison */
391
if(analysis->aas->use_bigseq){
392
analysis->bsam = BSAM_create(analysis->comparison_param,
393
analysis->aas->saturate_threshold,
395
analysis->fasta_pipe = FastaPipe_create(
396
query_fdb, target_fdb,
397
Analysis_FastaPipe_Pair_init_func,
398
Analysis_FastaPipe_Pair_prep_func,
399
Analysis_FastaPipe_Pair_term_func,
400
Analysis_FastaPipe_Pair_query_func,
401
Analysis_FastaPipe_Pair_target_func,
403
analysis->gam->translate_both);
404
analysis->curr_query = NULL;
405
} else { /* Use Seeder */
406
analysis->scan_query = Analysis_decide_scan_query(query_fdb,
408
analysis->aas->force_scan);
410
g_message("Applying FSM scan to [%s]",
411
analysis->scan_query?"query":"target");
412
/* Swap paths and types
413
* for query and target when scan on query
415
if(analysis->scan_query){
416
seeder_query_fdb = target_fdb;
417
seeder_target_fdb = query_fdb;
967
if(analysis->query_ac || analysis->target_ac){
968
if(analysis->query_ac && analysis->target_ac)
969
g_error("Server vs server comparison not impelemented");
970
analysis->fasta_pipe = NULL;
971
if(analysis->query_ac){
972
Analysis_Client_set_probe_fdb(analysis->query_ac, target_fdb);
419
seeder_query_fdb = query_fdb;
420
seeder_target_fdb = target_fdb;
422
analysis->curr_seeder = NULL;
423
analysis->fasta_pipe = FastaPipe_create(
424
seeder_query_fdb, seeder_target_fdb,
425
Analysis_FastaPipe_Seeder_init_func,
426
Analysis_FastaPipe_Seeder_prep_func,
427
Analysis_FastaPipe_Seeder_term_func,
428
Analysis_FastaPipe_Seeder_query_func,
429
Analysis_FastaPipe_Seeder_target_func,
430
FastaDB_Mask_ALL, analysis->gam->translate_both);
974
g_assert(analysis->target_ac);
975
Analysis_Client_set_probe_fdb(analysis->target_ac, query_fdb);
978
if(analysis->aas->use_bigseq){
979
analysis->bsam = BSAM_create(analysis->comparison_param,
980
analysis->aas->saturate_threshold,
982
analysis->fasta_pipe = FastaPipe_create(
983
query_fdb, target_fdb,
984
Analysis_FastaPipe_Pair_init_func,
985
Analysis_FastaPipe_Pair_prep_func,
986
Analysis_FastaPipe_Pair_term_func,
987
Analysis_FastaPipe_Pair_query_func,
988
Analysis_FastaPipe_Pair_target_func,
990
analysis->gam->translate_both);
991
analysis->curr_query = NULL;
992
} else { /* Use Seeder */
993
analysis->scan_query = Analysis_decide_scan_query(query_fdb,
995
analysis->aas->force_scan);
997
g_message("Applying FSM scan to [%s]",
998
analysis->scan_query?"query":"target");
999
/* Swap paths and types
1000
* for query and target when scan on query
1002
if(analysis->scan_query){
1003
seeder_query_fdb = target_fdb;
1004
seeder_target_fdb = query_fdb;
1006
seeder_query_fdb = query_fdb;
1007
seeder_target_fdb = target_fdb;
1009
analysis->curr_seeder = NULL;
1010
analysis->fasta_pipe = FastaPipe_create(
1011
seeder_query_fdb, seeder_target_fdb,
1012
Analysis_FastaPipe_Seeder_init_func,
1013
Analysis_FastaPipe_Seeder_prep_func,
1014
Analysis_FastaPipe_Seeder_term_func,
1015
Analysis_FastaPipe_Seeder_query_func,
1016
Analysis_FastaPipe_Seeder_target_func,
1017
FastaDB_Mask_ALL, analysis->gam->translate_both);
433
FastaDB_close(query_fdb);
434
FastaDB_close(target_fdb);
1022
FastaDB_close(query_fdb);
1024
FastaDB_close(target_fdb);
435
1026
return analysis;
438
1029
void Analysis_destroy(Analysis *analysis){
439
FastaPipe_destroy(analysis->fasta_pipe);
1030
if(analysis->fasta_pipe)
1031
FastaPipe_destroy(analysis->fasta_pipe);
440
1032
if(analysis->curr_query)
441
1033
FastaDB_Seq_destroy(analysis->curr_query);
442
1034
if(analysis->curr_seeder)