~ubuntu-branches/ubuntu/trusty/qiime/trusty

« back to all changes in this revision

Viewing changes to tests/test_parse.py

  • Committer: Package Import Robot
  • Author(s): Andreas Tille
  • Date: 2013-06-17 18:28:26 UTC
  • mfrom: (9.1.2 sid)
  • Revision ID: package-import@ubuntu.com-20130617182826-376az5ad080a0sfe
Tags: 1.7.0+dfsg-1
Upload preparations done for BioLinux to Debian

Show diffs side-by-side

added added

removed removed

Lines of Context:
3
3
 
4
4
__author__ = "Rob Knight"
5
5
__copyright__ = "Copyright 2011, The QIIME Project"
6
 
__credits__ = ["Rob Knight", "Justin Kuczynski", "Greg Caporaso",\
7
 
                "Cathy Lozupone", "Jens Reeder", "Daniel McDonald"] #remember to add yourself
 
6
__credits__ = ["Rob Knight", "Justin Kuczynski", "Greg Caporaso",
 
7
               "Cathy Lozupone", "Jens Reeder", "Daniel McDonald",
 
8
               "Jai Ram Rideout","Will Van Treuren"] #remember to add yourself
8
9
__license__ = "GPL"
9
 
__version__ = "1.5.0"
 
10
__version__ = "1.7.0"
10
11
__maintainer__ = "Greg Caporaso"
11
12
__email__ = "gregcaporaso@gmail.com"
12
13
__status__ = "Release"
27
28
    parse_illumina_line, parse_qual_score, parse_qual_scores, QiimeParseError,
28
29
    parse_newick,parse_trflp,parse_taxa_summary_table, parse_prefs_file,
29
30
    parse_mapping_file_to_dict, mapping_file_to_dict, MinimalQualParser,
30
 
    parse_denoiser_mapping,parse_otu_map,parse_taxonomy_to_otu_metadata)
 
31
    parse_denoiser_mapping, parse_otu_map, parse_sample_id_map,
 
32
    parse_taxonomy_to_otu_metadata, is_casava_v180_or_later, MinimalSamParser)
31
33
 
32
34
class TopLevelTests(TestCase):
33
35
    """Tests of top-level functions"""
67
69
        
68
70
        self.legacy_otu_table1 = legacy_otu_table1
69
71
        self.otu_table1 = otu_table1
 
72
        self.otu_table_without_leading_comment = \
 
73
            otu_table_without_leading_comment
70
74
        self.expected_lineages1 = expected_lineages1
71
75
        self.taxa_summary1 = taxa_summary1
72
76
        self.taxa_summary1_expected = taxa_summary1_expected
73
77
        self.otu_table1_floats=otu_table1_floats
74
78
        self.files_to_remove = []
75
79
        self.denoiser_mapping1 = denoiser_mapping1.split('\n')
 
80
        self.sam_data1 = sam_data1.split("\n")
 
81
        self.sam1_expected = sam1_expected
76
82
    
77
83
    def tearDown(self):
78
84
        remove_files(self.files_to_remove)
79
 
        
 
85
    
 
86
    def test_MinimalSamParser(self):
 
87
        """MinimalSamParser functions as expected"""
 
88
        actual = list(MinimalSamParser(self.sam_data1))
 
89
        expected = self.sam1_expected
 
90
        self.assertEqual(actual,expected)
 
91
 
 
92
    def test_is_casava_v180_or_later(self):
 
93
        """ is_casava_v180_or_later functions as expected """
 
94
        # handles trailing \n
 
95
        header_line = "@M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0\n"
 
96
        self.assertTrue(is_casava_v180_or_later(header_line))
 
97
        # same w no trailing \n
 
98
        header_line = "@M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0"
 
99
        self.assertTrue(is_casava_v180_or_later(header_line))
 
100
        
 
101
        header_line = "@HWUSI-EAS552R_0357:8:1:10040:6364#0/1"
 
102
        self.assertFalse(is_casava_v180_or_later(header_line))
 
103
        header_line = "@ some misc junk..."
 
104
        self.assertFalse(is_casava_v180_or_later(header_line))
 
105
        
 
106
        # non-header line raises error
 
107
        header_line = "HWUSI-EAS552R_0357:8:1:10040:6364#0/1"
 
108
        self.assertRaises(AssertionError,is_casava_v180_or_later,header_line)
 
109
        header_line = "M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0"
 
110
        self.assertRaises(AssertionError,is_casava_v180_or_later,header_line)
 
111
 
80
112
    def test_parse_taxa_summary_table(self):
81
113
        """ parse_taxa_summary_table functions as expected """
82
114
        actual = parse_taxa_summary_table(self.taxa_summary1.split('\n'))
471
503
                      [1722,4903,17], [589,2074,34]]),
472
504
               self.expected_lineages1)
473
505
        self.assertEqual(obs, exp)
 
506
 
 
507
        # test that the modified parse_classic performs correctly on OTU tables
 
508
        # without leading comments
 
509
        data = self.otu_table_without_leading_comment
 
510
        data_f = (data.split('\n'))
 
511
        obs = parse_classic_otu_table(data_f)
 
512
        sams = ['let-7i','miR-7','miR-17n','miR-18a','miR-19a','miR-22',
 
513
            'miR-25','miR-26a']
 
514
        otus = ['A2M', 'AAAS', 'AACS', 'AADACL1']
 
515
        vals = array([\
 
516
            [-0.2,  0.03680505,  0.205,  0.23,  0.66,  0.08,  -0.373,  0.26],
 
517
            [-0.09,  -0.25,  0.274,  0.15,  0.12,  0.29,  0.029,  -0.1148452],
 
518
            [0.33,  0.19,  0.27,  0.28,  0.19,  0.25,  0.089,  0.14],
 
519
            [0.49,  -0.92,  -0.723,  -0.23,  0.08,  0.49,  -0.386,  -0.64]])
 
520
        exp = (sams, otus, vals, []) # no lineages
 
521
        # because float comps in arrays always errors
 
522
        self.assertEqual(obs[0], exp[0])
 
523
        self.assertEqual(obs[1], exp[1])
 
524
        self.assertEqual(obs[3], exp[3])
 
525
        self.assertTrue(all((obs[2]==exp[2]).tolist()))
474
526
    
475
527
    def test_parse_classic_otu_table_floats_in_table(self):
476
528
        """parse_classic_otu_table functions using an OTU table containing floats
579
631
        
580
632
    def test_parse_qiime_parameters(self):
581
633
        """parse_qiime_parameters: functions with valid input """
582
 
        lines = ["#Don't edit this file!",\
583
 
                 "pick_otus:similarity 0.94",\
584
 
                 "pick_otus:otu_picking_method\tcdhit",\
585
 
                 "align_seqs:verbose",\
586
 
                 "assign_taxonomy:use_rdp\ttRuE",\
587
 
                 "assign_taxonomy:something\tNone",\
588
 
                 "",\
589
 
                 "#some_script:fake_parameter\t99.0"]
 
634
        lines = ["#Don't edit this file!",
 
635
                 "pick_otus:similarity 0.94#this is not a comment...",
 
636
                 "pick_otus:otu_picking_method\tcdhit  # useful comment  ",
 
637
                 "align_seqs:verbose",
 
638
                 "assign_taxonomy:use_rdp\ttRuE # another great ## comment!",
 
639
                 "assign_taxonomy:something\tNone",
 
640
                 "",
 
641
                 "#some_script:fake_parameter\t99.0",
 
642
                 'summarize_taxa:md_identifier "Consensus Lineage"']
590
643
        actual = parse_qiime_parameters(lines)
591
 
        expected = {'pick_otus':\
592
 
                     {'similarity':'0.94', 'otu_picking_method':'cdhit'},\
593
 
                    'assign_taxonomy':\
594
 
                     {'use_rdp':None}}
 
644
        expected = {'pick_otus':
 
645
                     {'similarity':'0.94#this is not a comment...',
 
646
                      'otu_picking_method':'cdhit'},
 
647
                    'assign_taxonomy':
 
648
                     {'use_rdp':None},
 
649
                    'summarize_taxa':
 
650
                     {'md_identifier':'"Consensus Lineage"'}}
595
651
        self.assertEqual(actual,expected)
596
652
        
597
653
        # default dict functions as expected -- looking up non-existant key
682
738
 '338':{'something':1.990}}
683
739
        self.assertEqual(actual,expected)
684
740
 
685
 
 
 
741
    def test_parse_taxonomy_to_otu_metadata_extra_fields_ignored(self):
 
742
        """parsing of taxonomy file to otu metadata format functions as expected
 
743
        """
 
744
        example_tax = \
 
745
"""412 PC.635_647       Root;Bacteria;Firmicutes;"Clostridia";Clostridiales     0.930
 
746
319 PC.355_281  Root;Bacteria;Bacteroidetes     some text
 
747
353 PC.634_154  Root;Bacteria;Bacteroidetes     0.830
 
748
17 PC.607_302   Root;Bacteria;Bacteroidetes     0.960
 
749
13      Root;Bacteria;Firmicutes;"Clostridia";Clostridiales     0.870
 
750
338 PC.593_1314 Root;Bacteria   0.990   42556   Additional fields ignored"""
 
751
        expected = {'412':{'taxonomy':['Root','Bacteria','Firmicutes','"Clostridia"','Clostridiales']},
 
752
                    '319':{'taxonomy':['Root','Bacteria','Bacteroidetes']},
 
753
                    '353':{'taxonomy':['Root','Bacteria','Bacteroidetes']},
 
754
                    '17':{'taxonomy':['Root','Bacteria','Bacteroidetes']},
 
755
                    '13':{'taxonomy':['Root','Bacteria','Firmicutes','"Clostridia"','Clostridiales']},
 
756
                    '338':{'taxonomy':['Root','Bacteria']}}
 
757
        actual = parse_taxonomy_to_otu_metadata(example_tax.split('\n'),labels=['taxonomy'])
 
758
        self.assertEqual(actual,expected)
686
759
 
687
760
    def test_parse_taxonomy_to_otu_metadata_invalid_input(self):
688
 
        """parsing of taxonomy file to otu metadata format functions as expected
 
761
        """parsing of taxonomy file to otu metadata format fails when too few functions
689
762
        """
690
763
        example_tax = \
691
764
"""412 PC.635_647       Root;Bacteria;Firmicutes;"Clostridia";Clostridiales     0.930
693
766
353 PC.634_154  Root;Bacteria;Bacteroidetes     0.830
694
767
17 PC.607_302   Root;Bacteria;Bacteroidetes     0.960
695
768
13      Root;Bacteria;Firmicutes;"Clostridia";Clostridiales     0.870
696
 
338 PC.593_1314 Root;Bacteria   0.990   42556   Additional fields raise error"""
697
 
        self.assertRaises(ValueError,parse_taxonomy_to_otu_metadata,example_tax.split('\n'))
 
769
338 PC.593_1314 Root;Bacteria   0.990"""
 
770
        self.assertRaises(ValueError,parse_taxonomy_to_otu_metadata,example_tax.split('\n'),labels=['taxonomy','score'],process_fs=[str])
698
771
 
699
772
    def test_parse_qiime_config_files(self):
700
773
        """ parse_qiime_config_files functions as expected """
709
782
        
710
783
        # empty dict on empty input
711
784
        self.assertEqual(parse_qiime_config_files([]),{})
 
785
        
 
786
        # test with an env variable - if it gets expanded
 
787
        # there won't be a $ in the output
 
788
        fake_file3 = ['key2\t$HOME', 'key3\thello $HOME']
 
789
        actual = parse_qiime_config_files([fake_file3])
 
790
        self.assertTrue('$' not in actual['key2'])
 
791
        self.assertTrue('$' not in actual['key3'])
712
792
 
713
793
 
714
794
    def test_parse_metadata_state_descriptions(self):
718
798
        s = 'Study:Twin,Hand,Dog;BodySite:Palm,Stool'
719
799
        self.assertEqual(parse_metadata_state_descriptions(s), {'Study':set(['Twin','Hand','Dog']),
720
800
            'BodySite':set(['Palm','Stool'])})
721
 
            
722
 
        
 
801
 
 
802
        # category names with colons i. e. ontology-derived
 
803
        s = 'Study:Twin,Hand,Dog;site:UBERON:feces,UBERON:ear canal;'+\
 
804
            'env_feature:ENVO:farm soil,ENVO:national park'
 
805
        self.assertEqual(parse_metadata_state_descriptions(s), {'Study':
 
806
            set(['Twin', 'Hand', 'Dog']), 'site':set(['UBERON:feces',
 
807
            'UBERON:ear canal']), 'env_feature':set(['ENVO:farm soil',
 
808
            'ENVO:national park'])})
 
809
 
 
810
        s = "Treatment:A,B,C;env_matter:ENVO:nitsol,ENVO:farm soil;env_biom:"+\
 
811
            "ENVO:Tropical dry (including Monsoon forests) and woodlands,"+\
 
812
            "ENVO:Forest: including woodlands;country:GAZ:Persnickety Islands"+\
 
813
            ",St. Kitt's and Nevis"
 
814
        self.assertEqual(parse_metadata_state_descriptions(s), {"country":
 
815
            set(["GAZ:Persnickety Islands", "St. Kitt's and Nevis"]),
 
816
            "env_biom":set(["ENVO:Tropical dry (including Monsoon forests) "+\
 
817
            "and woodlands", "ENVO:Forest: including woodlands"]), "env_matter":
 
818
            set(["ENVO:nitsol","ENVO:farm soil"]), 'Treatment':set(["A", "B",
 
819
            "C"])})
 
820
 
 
821
 
723
822
    def test_parse_illumina_line_barcode_in_header(self):
724
823
        """parse_illumina_line: handles barcode in header correctly """
725
824
        illumina_line0 = illumina_read1[0]
935
1034
        self.assertEqual(actual[1],expected_sids)
936
1035
        self.assertEqual(actual[2],expected_oids)
937
1036
 
 
1037
    def test_parse_sample_id_map(self):
 
1038
        """Test parsing a sample id map functions correctly."""
 
1039
        sample_id_map = ['\t\t\n', '', ' ', '\n', 'S1\ta',
 
1040
                         'S2\tb', '\n \t', 'T1\ta', 'T2\tb']
 
1041
        exp = {'S1':'a', 'S2':'b', 'T1':'a', 'T2':'b'}
 
1042
        obs = parse_sample_id_map(sample_id_map)
 
1043
        self.assertEqual(obs, exp)
 
1044
 
 
1045
    def test_parse_sample_id_map_repeat_sample_ids(self):
 
1046
        """Test parsing a sample id map with non-unique first column fails."""
 
1047
        sample_id_map = ['\t\t\n', '', ' ', '\n', 'S1\ta',
 
1048
                         'S2\tb', '\n \t', 'S1\tc']
 
1049
        self.assertRaises(ValueError, parse_sample_id_map,
 
1050
                          sample_id_map)
 
1051
 
 
1052
    def test_parse_sample_id_map_many_to_one_mapping(self):
 
1053
        """Test parsing a sample id map with many-to-one mapping fails."""
 
1054
        sample_id_map = ['S1\ta', 'T1\ta', 'S2\ta']
 
1055
        self.assertRaises(ValueError, parse_sample_id_map,
 
1056
                          sample_id_map)
 
1057
 
 
1058
 
938
1059
illumina_read1 = """HWI-6X_9267:1:1:4:1699#ACCACCC/1:TACGGAGGGTGCGAGCGTTAATCGCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGAAAAAAAAAAAAAAAAAAAAAAA:abbbbbbbbbb`_`bbbbbb`bb^aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaDaabbBBBBBBBBBBBBBBBBBBB
939
1060
HWI-6X_9267:1:1:4:390#ACCTCCC/1:GACAGGAGGAGCAAGTGTTATTCAAATTATGCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAA:aaaaaaaaaa```aa\^_aa``aVaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaBaaaaa""".split('\n')
940
1061
 
986
1107
4       589.6   2074.4  34.5    Bacteria; Cyanobacteria; Chloroplasts; vectors
987
1108
"""
988
1109
 
 
1110
 
 
1111
otu_table_without_leading_comment = '#OTU ID\tlet-7i\tmiR-7\tmiR-17n\tmiR-18a\tmiR-19a\tmiR-22\tmiR-25\tmiR-26a\nA2M\t-0.2\t0.03680505\t0.205\t0.23\t0.66\t0.08\t-0.373\t0.26\nAAAS\t-0.09\t-0.25\t0.274\t0.15\t0.12\t0.29\t0.029\t-0.114845199\nAACS\t0.33\t0.19\t0.27\t0.28\t0.19\t0.25\t0.089\t0.14\nAADACL1\t0.49\t-0.92\t-0.723\t-0.23\t0.08\t0.49\t-0.386\t-0.64'
 
1112
 
 
1113
 
 
1114
 
989
1115
taxa_summary1 = """#Full OTU Counts
990
1116
Taxon   Even1   Even2   Even3
991
1117
Bacteria;Actinobacteria;Actinobacteria(class);Actinobacteridae  0.0880247251673 0.0721968465746 0.081371761759
1023
1149
Read2:
1024
1150
Read3:\tRead6"""
1025
1151
 
 
1152
sam_data1 = """@SQ      SN:s1_1 LN:66
 
1153
@SQ     SN:s2_2 LN:1131
 
1154
@SQ     SN:s1_3 LN:348
 
1155
@SQ     SN:s1_4 LN:348
 
1156
@SQ     SN:s1_5 LN:1131
 
1157
@SQ     SN:s1_6 LN:1132
 
1158
s1_1    0       s1_1    2       136     1S65M   *       0       0       atgaaacgcattagcaccaccattaccaccaccatcaccattaccacaggtaacggtgcgggctga      *       AS:i:65 XS:i:0  XF:i:3  XE:i:2  NM:i:0
 
1159
s2_2    0       s1_5    1       0       1131M   *       0       0       atggctaagcaagattattacgagattttaggcgtttccaaaacagcggaagagcgtgaaatcagaaaggcctacaaacgcctggccatgaaataccacccggaccgtaaccagggtgacaaagaggccgaggcgaaatttaaagagatcaaggaagcttatgaagttctgaccgactcgcaaaaacgtgcggcatacgatcagtatggtcatgctgcgtttgagcaaggtggcatgggcggcggcggttttggcggcggcgcagacttcagcgatatttttggtgacgttttcggcgatatttttggcggcggacgtggtcgtcaacgtgcggcgcgcggtgctgatttacgctataacatggagctcaccctcgaagaagctgtacgtggcgtgaccaaagagatccgcattccgactctggaagagtgtgacgtttgccacggtagcggtgcaaaaccaggtacacagccgcagacttgtccgacctgtcatggttctggtcaggtgcagatgcgccagggattcttcgctgtacagcagacctgtccacactgtcagggccgcggtacgctgatcaaagatccgtgcaacaaatgtcatggtcatggtcgtgttgagcgcagcaaaacgctgtccgttaaaatcccggcaggggtggacactggagaccgcatccgtcttgcgggcgaaggtgaagcgggcgagcatggcgcaccggcaggcgatctgtacgttcaggttcaggttaaacagcacccgattttcgagcgtgaaggcaacaacctgtattgcgaagtcccgatcaacttcgctatggcggcgctgggtggcgaaatcgaagtaccgacccttgatggtcgcgtcaaactgaaagtgcctggcgaaacccagaccggtaagctattccgtatgcgcggtaaaggcgtcaagtctgtccgcggtggcgcacagggtgatttgctgtgccgcgttgtcgtcgaaacaccggtaggcctgaacgaaaggcagaaacagctgctgcaagagctgcaagaaagcttcggtggcccaaccggcgagcacaacagcccgcgctcaaagagcttctttgatggtgtgaagaagttttttgacgacctgacccgctaa     *       AS:i:1131       XS:i:1131       XF:i:0  XE:i:24 NM:i:0
 
1160
s1_3    0       s1_3    1       9       348M    *       0       0       atgaagacgtttttcagaacagtgttattcggcagcctgatggccgtctgcgcaaacagttacgcgctcagcgagtctgaagccgaagatatggccgatttaacggcagtttttgtctttctgaagaacgattgtggttaccagaacttacctaacgggcaaattcgtcgcgcactggtctttttcgctcagcaaaaccagtgggacctcagtaattacgacaccttcgacatgaaagccctcggtgaagacagctaccgcgatctcagcggcattggcattcccgtcgctaaaaaatgcaaagccctggcccgcgattccttaagcctgcttgcctacgtcaaataa    *       AS:i:348        XS:i:336        XF:i:0  XE:i:9  NM:i:0
 
1161
s1_4    0       s1_4    1       9       348M    *       0       0       atgaagaaaattttcagaacagtgttattcggcagcctgatggccgtctgcgcaaacagttacgcgctcagcgagtctgaagccgaagatatggccgatttaacggcagtttttgtctttctgaagaacgattgtggttaccagaacttacctaacgggcaaattcgtcgcgcactggtctttttcgctcagcaaaaccagtgggacctcagtaattacgacaccttcgacatgaaagccctcggtgaagacagctaccgcgatctcagcggcattggcattcccgtcgctaaaaaatgcaaagccctggcccgcgattccttaagcctgcttgcctacgtcaaatcc    *       AS:i:348        XS:i:336        XF:i:0  XE:i:9  NM:i:0
 
1162
s1_5    0       s2_2    1       0       1131M   *       0       0       atggctaagcaagattattacgagattttaggcgtttccaaaacagcggaagagcgtgaaatcagaaaggcctacaaacgcctggccatgaaataccacccggaccgtaaccagggtgacaaagaggccgaggcgaaatttaaagagatcaaggaagcttatgaagttctgaccgactcgcaaaaacgtgcggcatacgatcagtatggtcatgctgcgtttgagcaaggtggcatgggcggcggcggttttggcggcggcgcagacttcagcgatatttttggtgacgttttcggcgatatttttggcggcggacgtggtcgtcaacgtgcggcgcgcggtgctgatttacgctataacatggagctcaccctcgaagaagctgtacgtggcgtgaccaaagagatccgcattccgactctggaagagtgtgacgtttgccacggtagcggtgcaaaaccaggtacacagccgcagacttgtccgacctgtcatggttctggtcaggtgcagatgcgccagggattcttcgctgtacagcagacctgtccacactgtcagggccgcggtacgctgatcaaagatccgtgcaacaaatgtcatggtcatggtcgtgttgagcgcagcaaaacgctgtccgttaaaatcccggcaggggtggacactggagaccgcatccgtcttgcgggcgaaggtgaagcgggcgagcatggcgcaccggcaggcgatctgtacgttcaggttcaggttaaacagcacccgattttcgagcgtgaaggcaacaacctgtattgcgaagtcccgatcaacttcgctatggcggcgctgggtggcgaaatcgaagtaccgacccttgatggtcgcgtcaaactgaaagtgcctggcgaaacccagaccggtaagctattccgtatgcgcggtaaaggcgtcaagtctgtccgcggtggcgcacagggtgatttgctgtgccgcgttgtcgtcgaaacaccggtaggcctgaacgaaaggcagaaacagctgctgcaagagctgcaagaaagcttcggtggcccaaccggcgagcacaacagcccgcgctcaaagagcttctttgatggtgtgaagaagttttttgacgacctgacccgctaa     *       AS:i:1131       XS:i:1131       XF:i:0  XE:i:24 NM:i:0
 
1163
s1_6    0       s1_6    1       1       1132M   *       0       0       aatgactaagcaagattattacgagattttaggcgtttccaaaacagcggaagagcgtgaaatcagaaaggcctacaaacgcctggccatgaaataccacccggaccgtaaccagggtgacaaagaggccgaggcgaaatttaaagagatcaaggaagcttatgaagttctgaccgactcgcaaaaacgtgcggcatacgatcagtatggtcatgctgcgtttgagcaaggtggcatgggcggcggcggttttggcggcggcgcagacttcagcgatatttttggtgacgttttcggcgatatttttggcggcggacgtggtcgtcaacgtgcggcgcgcggtgctgatttacgctataacatggagctcaccctcgaagaagctgtacgtggcgtgaccaaagagatccgcattccgactctggaagagtgtgacgtttgccacggtagcggtgcaaaaccaggtacacagccgcagacttgtccgacctgtcatggttctggtcaggtgcagatgcgccagggattcttcgctgtacagcagacctgtccacactgtcagggccgcggtacgctgatcaaagatccgtgcaacaaatgtcatggtcatggtcgtgttgagcgcagcaaaacgctgtccgttaaaatcccggcaggggtggacactggagaccgcatccgtcttgcgggcgaaggtgaagcgggcgagcatggcgcaccggcaggcgatctgtacgttcaggttcaggttaaacagcacccgattttcgagcgtgaaggcaacaacctgtattgcgaagtcccgatcaacttcgctatggcggcgctgggtggcgaaatcgaagtaccgacccttgatggtcgcgtcaaactgaaagtgcctggcgaaacccagaccggtaagctattccgtatgcgcggtaaaggcgtcaagtctgtccgcggtggcgcacagggtgatttgctgtgccgcgttgtcgtcgaaacaccggtaggcctgaacgaaaggcagaaacagctgctgcaagagctgcaagaaagcttcggtggcccaaccggcgagcacaacagcccgcgctcaaagagcttctttgatggtgtgaagaagttttttgacgacctgacccgctaa    *       AS:i:1132       XS:i:1128       XF:i:0  XE:i:24 NM:i:0
 
1164
"""
 
1165
 
 
1166
sam1_expected = [["s1_1", "0", "s1_1", "2", "136", "1S65M", "*", "0", "0", "atgaaacgcattagcaccaccattaccaccaccatcaccattaccacaggtaacggtgcgggctga", "*", "AS:i:65", "XS:i:0", "XF:i:3", "XE:i:2", "NM:i:0"],
 
1167
["s2_2", "0", "s1_5", "1", "0", "1131M", "*", "0", "0", "atggctaagcaagattattacgagattttaggcgtttccaaaacagcggaagagcgtgaaatcagaaaggcctacaaacgcctggccatgaaataccacccggaccgtaaccagggtgacaaagaggccgaggcgaaatttaaagagatcaaggaagcttatgaagttctgaccgactcgcaaaaacgtgcggcatacgatcagtatggtcatgctgcgtttgagcaaggtggcatgggcggcggcggttttggcggcggcgcagacttcagcgatatttttggtgacgttttcggcgatatttttggcggcggacgtggtcgtcaacgtgcggcgcgcggtgctgatttacgctataacatggagctcaccctcgaagaagctgtacgtggcgtgaccaaagagatccgcattccgactctggaagagtgtgacgtttgccacggtagcggtgcaaaaccaggtacacagccgcagacttgtccgacctgtcatggttctggtcaggtgcagatgcgccagggattcttcgctgtacagcagacctgtccacactgtcagggccgcggtacgctgatcaaagatccgtgcaacaaatgtcatggtcatggtcgtgttgagcgcagcaaaacgctgtccgttaaaatcccggcaggggtggacactggagaccgcatccgtcttgcgggcgaaggtgaagcgggcgagcatggcgcaccggcaggcgatctgtacgttcaggttcaggttaaacagcacccgattttcgagcgtgaaggcaacaacctgtattgcgaagtcccgatcaacttcgctatggcggcgctgggtggcgaaatcgaagtaccgacccttgatggtcgcgtcaaactgaaagtgcctggcgaaacccagaccggtaagctattccgtatgcgcggtaaaggcgtcaagtctgtccgcggtggcgcacagggtgatttgctgtgccgcgttgtcgtcgaaacaccggtaggcctgaacgaaaggcagaaacagctgctgcaagagctgcaagaaagcttcggtggcccaaccggcgagcacaacagcccgcgctcaaagagcttctttgatggtgtgaagaagttttttgacgacctgacccgctaa", "*", "AS:i:1131", "XS:i:1131", "XF:i:0", "XE:i:24", "NM:i:0"],
 
1168
["s1_3", "0", "s1_3", "1", "9", "348M", "*", "0", "0", "atgaagacgtttttcagaacagtgttattcggcagcctgatggccgtctgcgcaaacagttacgcgctcagcgagtctgaagccgaagatatggccgatttaacggcagtttttgtctttctgaagaacgattgtggttaccagaacttacctaacgggcaaattcgtcgcgcactggtctttttcgctcagcaaaaccagtgggacctcagtaattacgacaccttcgacatgaaagccctcggtgaagacagctaccgcgatctcagcggcattggcattcccgtcgctaaaaaatgcaaagccctggcccgcgattccttaagcctgcttgcctacgtcaaataa", "*", "AS:i:348", "XS:i:336", "XF:i:0", "XE:i:9", "NM:i:0"],
 
1169
["s1_4", "0", "s1_4", "1", "9", "348M", "*", "0", "0", "atgaagaaaattttcagaacagtgttattcggcagcctgatggccgtctgcgcaaacagttacgcgctcagcgagtctgaagccgaagatatggccgatttaacggcagtttttgtctttctgaagaacgattgtggttaccagaacttacctaacgggcaaattcgtcgcgcactggtctttttcgctcagcaaaaccagtgggacctcagtaattacgacaccttcgacatgaaagccctcggtgaagacagctaccgcgatctcagcggcattggcattcccgtcgctaaaaaatgcaaagccctggcccgcgattccttaagcctgcttgcctacgtcaaatcc", "*", "AS:i:348", "XS:i:336", "XF:i:0", "XE:i:9", "NM:i:0"],
 
1170
["s1_5", "0", "s2_2", "1", "0", "1131M", "*", "0", "0", "atggctaagcaagattattacgagattttaggcgtttccaaaacagcggaagagcgtgaaatcagaaaggcctacaaacgcctggccatgaaataccacccggaccgtaaccagggtgacaaagaggccgaggcgaaatttaaagagatcaaggaagcttatgaagttctgaccgactcgcaaaaacgtgcggcatacgatcagtatggtcatgctgcgtttgagcaaggtggcatgggcggcggcggttttggcggcggcgcagacttcagcgatatttttggtgacgttttcggcgatatttttggcggcggacgtggtcgtcaacgtgcggcgcgcggtgctgatttacgctataacatggagctcaccctcgaagaagctgtacgtggcgtgaccaaagagatccgcattccgactctggaagagtgtgacgtttgccacggtagcggtgcaaaaccaggtacacagccgcagacttgtccgacctgtcatggttctggtcaggtgcagatgcgccagggattcttcgctgtacagcagacctgtccacactgtcagggccgcggtacgctgatcaaagatccgtgcaacaaatgtcatggtcatggtcgtgttgagcgcagcaaaacgctgtccgttaaaatcccggcaggggtggacactggagaccgcatccgtcttgcgggcgaaggtgaagcgggcgagcatggcgcaccggcaggcgatctgtacgttcaggttcaggttaaacagcacccgattttcgagcgtgaaggcaacaacctgtattgcgaagtcccgatcaacttcgctatggcggcgctgggtggcgaaatcgaagtaccgacccttgatggtcgcgtcaaactgaaagtgcctggcgaaacccagaccggtaagctattccgtatgcgcggtaaaggcgtcaagtctgtccgcggtggcgcacagggtgatttgctgtgccgcgttgtcgtcgaaacaccggtaggcctgaacgaaaggcagaaacagctgctgcaagagctgcaagaaagcttcggtggcccaaccggcgagcacaacagcccgcgctcaaagagcttctttgatggtgtgaagaagttttttgacgacctgacccgctaa", "*", "AS:i:1131", "XS:i:1131", "XF:i:0", "XE:i:24", "NM:i:0"],
 
1171
["s1_6", "0", "s1_6", "1", "1", "1132M", "*", "0", "0", "aatgactaagcaagattattacgagattttaggcgtttccaaaacagcggaagagcgtgaaatcagaaaggcctacaaacgcctggccatgaaataccacccggaccgtaaccagggtgacaaagaggccgaggcgaaatttaaagagatcaaggaagcttatgaagttctgaccgactcgcaaaaacgtgcggcatacgatcagtatggtcatgctgcgtttgagcaaggtggcatgggcggcggcggttttggcggcggcgcagacttcagcgatatttttggtgacgttttcggcgatatttttggcggcggacgtggtcgtcaacgtgcggcgcgcggtgctgatttacgctataacatggagctcaccctcgaagaagctgtacgtggcgtgaccaaagagatccgcattccgactctggaagagtgtgacgtttgccacggtagcggtgcaaaaccaggtacacagccgcagacttgtccgacctgtcatggttctggtcaggtgcagatgcgccagggattcttcgctgtacagcagacctgtccacactgtcagggccgcggtacgctgatcaaagatccgtgcaacaaatgtcatggtcatggtcgtgttgagcgcagcaaaacgctgtccgttaaaatcccggcaggggtggacactggagaccgcatccgtcttgcgggcgaaggtgaagcgggcgagcatggcgcaccggcaggcgatctgtacgttcaggttcaggttaaacagcacccgattttcgagcgtgaaggcaacaacctgtattgcgaagtcccgatcaacttcgctatggcggcgctgggtggcgaaatcgaagtaccgacccttgatggtcgcgtcaaactgaaagtgcctggcgaaacccagaccggtaagctattccgtatgcgcggtaaaggcgtcaagtctgtccgcggtggcgcacagggtgatttgctgtgccgcgttgtcgtcgaaacaccggtaggcctgaacgaaaggcagaaacagctgctgcaagagctgcaagaaagcttcggtggcccaaccggcgagcacaacagcccgcgctcaaagagcttctttgatggtgtgaagaagttttttgacgacctgacccgctaa", "*", "AS:i:1132", "XS:i:1128", "XF:i:0", "XE:i:24", "NM:i:0"]]
1026
1172
 
1027
1173
if __name__ =='__main__':
1028
1174
    main()