~ubuntu-branches/ubuntu/natty/python-cogent/natty

Viewing changes to cogent/parse/binary_sff.py

Committer: Bazaar Package Importer
Author(s): Steffen Moeller
Date: 2010-12-04 22:30:35 UTC
mfrom: (1.1.1 upstream)
Revision ID: james.westby@ubuntu.com-20101204223035-j11kinhcrrdgg2p2

Tags: 1.5-1

* Bumped standard to 3.9.1, no changes required.
* New upstream version.
  - major additions to Cookbook
  - added AlleleFreqs attribute to ensembl Variation objects.
  - added getGeneByStableId method to genome objects.
  - added Introns attribute to Transcript objects and an Intron class.
  - added Mann-Whitney test and a Monte-Carlo version
  - exploratory and confirmatory period estimation techniques (suitable for
    symbolic and continuous data)
  - Information theoretic measures (AIC and BIC) added
  - drawing of trees with collapsed nodes
  - progress display indicator support for terminal and GUI apps
  - added parser for illumina HiSeq2000 and GAiix sequence files as
    cogent.parse.illumina_sequence.MinimalIlluminaSequenceParser.
  - added parser to FASTQ files, one of the output options for illumina's
    workflow, also added cookbook demo.
  - added functionality for parsing of SFF files without the Roche tools in
    cogent.parse.binary_sff
  - thousand fold performance improvement to nmds
  - >10-fold performance improvements to some Table operations

files added:
cogent/cluster/approximate_mds.py

cogent/maths/_period.c

cogent/maths/_period.pyx

cogent/maths/period.py

cogent/maths/stats/information_criteria.py

cogent/maths/stats/period.py

cogent/parse/binary_sff.py

cogent/parse/fastq.py

cogent/parse/illumina_sequence.py

cogent/parse/kegg_ko.py

cogent/parse/kegg_pos.py

cogent/parse/kegg_taxonomy.py

cogent/util/progress_display.py

cogent/util/terminal.py

doc/_static

doc/_static/google_feed.js

doc/cookbook/alphabet.rst

doc/cookbook/checkpointing_long_running.rst

doc/cookbook/ensembl.rst

doc/cookbook/loading_sequences.rst

doc/cookbook/managing_trees.rst

doc/cookbook/moltypesequence.rst

doc/cookbook/parallel_tasks.rst

doc/cookbook/phylonodes.rst

doc/cookbook/structural_contacts.rst

doc/cookbook/structural_data_2.rst

doc/data/1HQF.pdb

doc/data/Crump_et_al_example_env_file.txt

doc/data/Crump_example_tree_newick.txt

doc/data/inseqs_protein.fasta

doc/data/refseqs_protein.fasta

doc/examples/building_and_using_an_application_controller.rst

doc/examples/period_estimation.rst

doc/examples/seqsim_alignment_simulation.rst

doc/examples/seqsim_aln_sim_user_alphabet.rst

doc/examples/seqsim_tree_sim.rst

tests/data/F6AVWTA01.sff

tests/data/fastq.txt

tests/test_cluster/test_approximate_mds.py

tests/test_maths/test_period.py

tests/test_maths/test_stats/test_information_criteria.py

tests/test_maths/test_stats/test_period.py

tests/test_parse/test_binary_sff.py

tests/test_parse/test_fastq.py

tests/test_parse/test_illumina_sequence.py

tests/test_parse/test_kegg_ko.py

tests/test_parse/test_kegg_pos.py

tests/test_parse/test_kegg_taxonomy.py

tests/test_parse/test_mothur.py

tests/test_parse/test_pdb.py

tests/test_parse/test_rna_plot.py

tests/test_parse/test_structure.py

files removed:
tests/test_core/test_tree2.py

files modified:
.pc/fix_python_shebang_line.patch/cogent/align/dp_calculation.py

.pc/fix_python_shebang_line.patch/cogent/data/molecular_weight.py

.pc/fix_python_shebang_line.patch/cogent/format/text_tree.py

.pc/fix_python_shebang_line.patch/cogent/phylo/maximum_likelihood.py

.pc/fix_python_shebang_line.patch/cogent/recalculation/__init__.py

.pc/fix_python_shebang_line.patch/cogent/recalculation/setting.py

ChangeLog

cogent/__init__.py

cogent/align/__init__.py

cogent/align/_compare.c

cogent/align/_compare.pyx

cogent/align/_pairwise_pogs.c

cogent/align/_pairwise_pogs.pyx

cogent/align/_pairwise_seqs.c

cogent/align/_pairwise_seqs.pyx

cogent/align/algorithm.py

cogent/align/align.py

cogent/align/dp_calculation.py

cogent/align/indel_model.py

cogent/align/indel_positions.py

cogent/align/pairwise.py

cogent/align/partial_order_graph.py

cogent/align/progressive.py

cogent/align/pycompare.py

cogent/align/traceback.py

cogent/align/weights/__init__.py

cogent/align/weights/methods.py

cogent/align/weights/util.py

cogent/app/__init__.py

cogent/app/blast.py

cogent/app/carnac.py

cogent/app/cd_hit.py

cogent/app/clearcut.py

cogent/app/clustalw.py

cogent/app/cmfinder.py

cogent/app/comrna.py

cogent/app/consan.py

cogent/app/contrafold.py

cogent/app/cove.py

cogent/app/dialign.py

cogent/app/dotur.py

cogent/app/dynalign.py

cogent/app/fasttree.py

cogent/app/fasttree_v1.py

cogent/app/foldalign.py

cogent/app/formatdb.py

cogent/app/gctmpca.py

cogent/app/ilm.py

cogent/app/infernal.py

cogent/app/knetfold.py

cogent/app/mafft.py

cogent/app/mfold.py

cogent/app/mothur.py

cogent/app/msms.py

cogent/app/muscle.py

cogent/app/nupack.py

cogent/app/parameters.py

cogent/app/pfold.py

cogent/app/pknotsrg.py

cogent/app/raxml.py

cogent/app/rdp_classifier.py

cogent/app/rnaalifold.py

cogent/app/rnaforester.py

cogent/app/rnashapes.py

cogent/app/rnaview.py

cogent/app/sfffile.py

cogent/app/sffinfo.py

cogent/app/sfold.py

cogent/app/stride.py

cogent/app/uclust.py

cogent/app/unafold.py

cogent/app/util.py

cogent/app/vienna_package.py

cogent/cluster/UPGMA.py

cogent/cluster/__init__.py

cogent/cluster/goodness_of_fit.py

cogent/cluster/metric_scaling.py

cogent/cluster/nmds.py

cogent/cluster/procrustes.py

cogent/core/__init__.py

cogent/core/alignment.py

cogent/core/alphabet.py

cogent/core/annotation.py

cogent/core/bitvector.py

cogent/core/entity.py

cogent/core/genetic_code.py

cogent/core/info.py

cogent/core/location.py

cogent/core/moltype.py

cogent/core/profile.py

cogent/core/sequence.py

cogent/core/tree.py

cogent/core/usage.py

cogent/data/__init__.py

cogent/data/energy_params.py

cogent/data/ligand_properties.py

cogent/data/molecular_weight.py

cogent/data/nucleic_properties.py

cogent/data/protein_properties.py

cogent/db/__init__.py

cogent/db/ensembl/__init__.py

cogent/db/ensembl/assembly.py

cogent/db/ensembl/compara.py

cogent/db/ensembl/database.py

cogent/db/ensembl/feature_level.py

cogent/db/ensembl/genome.py

cogent/db/ensembl/host.py

cogent/db/ensembl/name.py

cogent/db/ensembl/region.py

cogent/db/ensembl/related_region.py

cogent/db/ensembl/sequence.py

cogent/db/ensembl/species.py

cogent/db/ensembl/util.py

cogent/db/ncbi.py

cogent/db/pdb.py

cogent/db/rfam.py

cogent/db/util.py

cogent/draw/__init__.py

cogent/draw/arrow_rates.py

cogent/draw/codon_usage.py

cogent/draw/dendrogram.py

cogent/draw/dinuc.py

cogent/draw/dotplot.py

cogent/draw/fancy_arrow.py

cogent/draw/legend.py

cogent/draw/linear.py

cogent/draw/multivariate_plot.py

cogent/draw/rlg2mpl.py

cogent/draw/util.py

cogent/evolve/__init__.py

cogent/evolve/_likelihood_tree.c

cogent/evolve/_likelihood_tree.pyx

cogent/evolve/best_likelihood.py

cogent/evolve/bootstrap.py

cogent/evolve/coevolution.py

cogent/evolve/discrete_markov.py

cogent/evolve/likelihood_calculation.py

cogent/evolve/likelihood_function.py

cogent/evolve/likelihood_tree.py

cogent/evolve/models.py

cogent/evolve/motif_prob_model.py

cogent/evolve/parameter_controller.py

cogent/evolve/predicate.py

cogent/evolve/simulate.py

cogent/evolve/substitution_calculation.py

cogent/evolve/substitution_model.py

cogent/format/__init__.py

cogent/format/alignment.py

cogent/format/clustal.py

cogent/format/fasta.py

cogent/format/mage.py

cogent/format/motif.py

cogent/format/nexus.py

cogent/format/pdb.py

cogent/format/pdb_color.py

cogent/format/phylip.py

cogent/format/rna_struct.py

cogent/format/stockholm.py

cogent/format/structure.py

cogent/format/table.py

cogent/format/text_tree.py

cogent/format/xyzrn.py

cogent/maths/__init__.py

cogent/maths/_matrix_exponentiation.c

cogent/maths/_matrix_exponentiation.pyx

cogent/maths/distance_transform.py

cogent/maths/eigen.c

cogent/maths/function_optimisation.py

cogent/maths/geometry.py

cogent/maths/markov.py

cogent/maths/matrix/__init__.py

cogent/maths/matrix/distance.py

cogent/maths/matrix_exponentiation.py

cogent/maths/matrix_invert.c

cogent/maths/matrix_logarithm.py

cogent/maths/optimiser.py

cogent/maths/optimisers.py

cogent/maths/scipy_optimisers.py

cogent/maths/scipy_optimize.py

cogent/maths/simannealingoptimiser.py

cogent/maths/solve.py

cogent/maths/spatial/__init__.py

cogent/maths/spatial/ckd3.c

cogent/maths/spatial/ckd3.pyx

cogent/maths/stats/__init__.py

cogent/maths/stats/alpha_diversity.py

cogent/maths/stats/cai/__init__.py

cogent/maths/stats/cai/adaptor.py

cogent/maths/stats/cai/get_by_cai.py

cogent/maths/stats/cai/util.py

cogent/maths/stats/distribution.py

cogent/maths/stats/histogram.py

cogent/maths/stats/kendall.py

cogent/maths/stats/ks.py

cogent/maths/stats/rarefaction.py

cogent/maths/stats/special.py

cogent/maths/stats/test.py

cogent/maths/stats/util.py

cogent/maths/svd.py

cogent/maths/unifrac/__init__.py

cogent/maths/unifrac/fast_tree.py

cogent/maths/unifrac/fast_unifrac.py

cogent/motif/__init__.py

cogent/motif/k_word.py

cogent/motif/util.py

cogent/parse/__init__.py

cogent/parse/aaindex.py

cogent/parse/agilent_microarray.py

cogent/parse/blast.py

cogent/parse/blast_xml.py

cogent/parse/bpseq.py

cogent/parse/carnac.py

cogent/parse/cigar.py

cogent/parse/clustal.py

cogent/parse/cmfinder.py

cogent/parse/column.py

cogent/parse/comrna.py

cogent/parse/consan.py

cogent/parse/contrafold.py

cogent/parse/cove.py

cogent/parse/ct.py

cogent/parse/cut.py

cogent/parse/cutg.py

cogent/parse/dialign.py

cogent/parse/dotur.py

cogent/parse/dynalign.py

cogent/parse/ebi.py

cogent/parse/fasta.py

cogent/parse/flowgram.py

cogent/parse/flowgram_collection.py

cogent/parse/flowgram_parser.py

cogent/parse/foldalign.py

cogent/parse/gbseq.py

cogent/parse/gcg.py

cogent/parse/genbank.py

cogent/parse/gff.py

cogent/parse/gibbs.py

cogent/parse/ilm.py

cogent/parse/infernal.py

cogent/parse/knetfold.py

cogent/parse/locuslink.py

cogent/parse/macsim.py

cogent/parse/mage.py

cogent/parse/meme.py

cogent/parse/mfold.py

cogent/parse/mothur.py

cogent/parse/msms.py

cogent/parse/ncbi_taxonomy.py

cogent/parse/newick.py

cogent/parse/nexus.py

cogent/parse/nupack.py

cogent/parse/paml.py

cogent/parse/paml_matrix.py

cogent/parse/pdb.py

cogent/parse/pfold.py

cogent/parse/phylip.py

cogent/parse/pknotsrg.py

cogent/parse/rdb.py

cogent/parse/record.py

cogent/parse/record_finder.py

cogent/parse/rfam.py

cogent/parse/rna_fold.py

cogent/parse/rna_plot.py

cogent/parse/rnaalifold.py

cogent/parse/rnaforester.py

cogent/parse/rnashapes.py

cogent/parse/rnaview.py

cogent/parse/sequence.py

cogent/parse/sfold.py

cogent/parse/sprinzl.py

cogent/parse/stride.py

cogent/parse/structure.py

cogent/parse/table.py

cogent/parse/tinyseq.py

cogent/parse/tree.py

cogent/parse/tree_xml.py

cogent/parse/unafold.py

cogent/parse/unigene.py

cogent/phylo/__init__.py

cogent/phylo/compatibility.py

cogent/phylo/consensus.py

cogent/phylo/distance.py

cogent/phylo/least_squares.py

cogent/phylo/maximum_likelihood.py

cogent/phylo/nj.py

cogent/phylo/tree_collection.py

cogent/phylo/tree_space.py

cogent/phylo/util.py

cogent/recalculation/__init__.py

cogent/recalculation/calculation.py

cogent/recalculation/definition.py

cogent/recalculation/scope.py

cogent/recalculation/setting.py

cogent/seqsim/__init__.py

cogent/seqsim/analysis.py

cogent/seqsim/birth_death.py

cogent/seqsim/markov.py

cogent/seqsim/microarray.py

cogent/seqsim/microarray_normalize.py

cogent/seqsim/randomization.py

cogent/seqsim/searchpath.py

cogent/seqsim/sequence_generators.py

cogent/seqsim/tree.py

cogent/seqsim/usage.py

cogent/struct/__init__.py

cogent/struct/_asa.c

cogent/struct/_asa.pyx

cogent/struct/_contact.c

cogent/struct/_contact.pyx

cogent/struct/annotation.py

cogent/struct/asa.py

cogent/struct/contact.py

cogent/struct/dihedral.py

cogent/struct/knots.py

cogent/struct/manipulation.py

cogent/struct/pairs_util.py

cogent/struct/rna2d.py

cogent/struct/selection.py

cogent/util/__init__.py

cogent/util/array.py

cogent/util/checkpointing.py

cogent/util/datatypes.py

cogent/util/dict2d.py

cogent/util/dict_array.py

cogent/util/misc.py

cogent/util/modules.py

cogent/util/organizer.py

cogent/util/parallel.py

cogent/util/recode_alignment.py

cogent/util/table.py

cogent/util/transform.py

cogent/util/trie.py

cogent/util/unit_test.py

cogent/util/update_version.py

cogent/util/warning.py

debian/changelog

debian/control

doc/conf.py

doc/cookbook/DNA_and_RNA_sequences.rst

doc/cookbook/accessing_databases.rst

doc/cookbook/alignments.rst

doc/cookbook/analysis_of_sequence_composition.rst

doc/cookbook/annotations.rst

doc/cookbook/blast.rst

doc/cookbook/building_alignments.rst

doc/cookbook/building_phylogenies.rst

doc/cookbook/community_analysis.rst

doc/cookbook/dealing_with_hts_data.rst

doc/cookbook/genetic_code.rst

doc/cookbook/hpc_environments.rst

doc/cookbook/index.rst

doc/cookbook/introduction.rst

doc/cookbook/manipulating_biological_data.rst

doc/cookbook/multivariate_data_analysis.rst

doc/cookbook/simple_trees.rst

doc/cookbook/standard_statistical_analyses.rst

doc/cookbook/structural_data.rst

doc/cookbook/tips_for_using_python.rst

doc/cookbook/useful_utilities.rst

doc/cookbook/using_likelihood_to_perform_evolutionary_analyses.rst

doc/data_file_links.rst

doc/examples/alignment_app_controllers.rst

doc/examples/application_controller_framework.rst

doc/examples/calculate_UPGMA_cluster.rst

doc/examples/calculate_neigbourjoining_tree.rst

doc/examples/calculate_pairwise_distances.rst

doc/examples/codon_models.rst

doc/examples/draw_dendrogram.rst

doc/examples/draw_dotplot.rst

doc/examples/empirical_protein_models.rst

doc/examples/estimate_startingpoint.rst

doc/examples/genetic_code_aa_index.rst

doc/examples/handling_3dstructures.rst

doc/examples/hmm_par_heterogeneity.rst

doc/examples/index.rst

doc/examples/maketree_from_proteinseqs.rst

doc/examples/neutral_test.rst

doc/examples/parametric_bootstrap.rst

doc/examples/perform_PCoA_analysis.rst

doc/examples/phylo_by_ls.rst

doc/examples/phylogeny_app_controllers.rst

doc/examples/query_ensembl.rst

doc/examples/query_ncbi.rst

doc/examples/rate_heterogeneity.rst

doc/examples/relative_rate.rst

doc/examples/reuse_results.rst

doc/examples/scope_model_params_on_trees.rst

doc/examples/simple.rst

doc/examples/testing_multi_loci.rst

doc/examples/unrestricted_nucleotide.rst

doc/index.rst

doc/install.rst

doc/templates/layout.html

include/array_interface.h

include/numerical_pyrex.pyx

setup.py

tests/__init__.py

tests/alltests.py

tests/benchmark.py

tests/benchmark_aligning.py

tests/test_align/__init__.py

tests/test_align/test_algorithm.py

tests/test_align/test_align.py

tests/test_align/test_weights/__init__.py

tests/test_align/test_weights/test_methods.py

tests/test_align/test_weights/test_util.py

tests/test_app/__init__.py

tests/test_app/test_blast.py

tests/test_app/test_carnac.py

tests/test_app/test_cd_hit.py

tests/test_app/test_clearcut.py

tests/test_app/test_clustalw.py

tests/test_app/test_cmfinder.py

tests/test_app/test_comrna.py

tests/test_app/test_consan.py

tests/test_app/test_contrafold.py

tests/test_app/test_cove.py

tests/test_app/test_dialign.py

tests/test_app/test_dotur.py

tests/test_app/test_dynalign.py

tests/test_app/test_fasttree.py

tests/test_app/test_fasttree_v1.py

tests/test_app/test_foldalign.py

tests/test_app/test_formatdb.py

tests/test_app/test_gctmpca.py

tests/test_app/test_ilm.py

tests/test_app/test_infernal.py

tests/test_app/test_knetfold.py

tests/test_app/test_mafft.py

tests/test_app/test_mfold.py

tests/test_app/test_mothur.py

tests/test_app/test_msms.py

tests/test_app/test_muscle.py

tests/test_app/test_nupack.py

tests/test_app/test_parameters.py

tests/test_app/test_pfold.py

tests/test_app/test_pknotsrg.py

tests/test_app/test_raxml.py

tests/test_app/test_rdp_classifier.py

tests/test_app/test_rnaalifold.py

tests/test_app/test_rnaforester.py

tests/test_app/test_rnaview.py

tests/test_app/test_sfffile.py

tests/test_app/test_sffinfo.py

tests/test_app/test_sfold.py

tests/test_app/test_stride.py

tests/test_app/test_uclust.py

tests/test_app/test_unafold.py

tests/test_app/test_util.py

tests/test_app/test_vienna_package.py

tests/test_cluster/__init__.py

tests/test_cluster/test_UPGMA.py

tests/test_cluster/test_goodness_of_fit.py

tests/test_cluster/test_metric_scaling.py

tests/test_cluster/test_nmds.py

tests/test_cluster/test_procrustes.py

tests/test_core/__init__.py

tests/test_core/test_alignment.py

tests/test_core/test_alphabet.py

tests/test_core/test_annotation.py

tests/test_core/test_bitvector.py

tests/test_core/test_core_standalone.py

tests/test_core/test_entity.py

tests/test_core/test_genetic_code.py

tests/test_core/test_info.py

tests/test_core/test_location.py

tests/test_core/test_maps.py

tests/test_core/test_moltype.py

tests/test_core/test_profile.py

tests/test_core/test_seq_aln_integration.py

tests/test_core/test_sequence.py

tests/test_core/test_tree.py

tests/test_core/test_usage.py

tests/test_data/__init__.py

tests/test_data/test_molecular_weight.py

tests/test_db/__init__.py

tests/test_db/test_ensembl/__init__.py

tests/test_db/test_ensembl/test_assembly.py

tests/test_db/test_ensembl/test_compara.py

tests/test_db/test_ensembl/test_database.py

tests/test_db/test_ensembl/test_feature_level.py

tests/test_db/test_ensembl/test_genome.py

tests/test_db/test_ensembl/test_host.py

tests/test_db/test_ensembl/test_species.py

tests/test_db/test_ncbi.py

tests/test_db/test_pdb.py

tests/test_db/test_rfam.py

tests/test_db/test_util.py

tests/test_draw.py

tests/test_draw/test_matplotlib/test_arrow_rates.py

tests/test_draw/test_matplotlib/test_codon_usage.py

tests/test_draw/test_matplotlib/test_dinuc.py

tests/test_draw/test_matplotlib/test_multivariate_plot.py

tests/test_evolve/__init__.py

tests/test_evolve/test_best_likelihood.py

tests/test_evolve/test_bootstrap.py

tests/test_evolve/test_coevolution.py

tests/test_evolve/test_likelihood_function.py

tests/test_evolve/test_models.py

tests/test_evolve/test_motifchange.py

tests/test_evolve/test_newq.py

tests/test_evolve/test_parameter_controller.py

tests/test_evolve/test_scale_rules.py

tests/test_evolve/test_simulation.py

tests/test_evolve/test_substitution_model.py

tests/test_format/__init__.py

tests/test_format/test_clustal.py

tests/test_format/test_fasta.py

tests/test_format/test_mage.py

tests/test_format/test_pdb_color.py

tests/test_format/test_stockholm.py

tests/test_format/test_xyzrn.py

tests/test_maths/__init__.py

tests/test_maths/test_distance_transform.py

tests/test_maths/test_function_optimisation.py

tests/test_maths/test_geometry.py

tests/test_maths/test_matrix/__init__.py

tests/test_maths/test_matrix/test_distance.py

tests/test_maths/test_matrix_logarithm.py

tests/test_maths/test_optimisers.py

tests/test_maths/test_spatial/__init__.py

tests/test_maths/test_spatial/test_ckd3.py

tests/test_maths/test_stats/__init__.py

tests/test_maths/test_stats/test_alpha_diversity.py

tests/test_maths/test_stats/test_cai/__init__.py

tests/test_maths/test_stats/test_cai/test_adaptor.py

tests/test_maths/test_stats/test_cai/test_get_by_cai.py

tests/test_maths/test_stats/test_cai/test_util.py

tests/test_maths/test_stats/test_distribution.py

tests/test_maths/test_stats/test_histogram.py

tests/test_maths/test_stats/test_ks.py

tests/test_maths/test_stats/test_rarefaction.py

tests/test_maths/test_stats/test_special.py

tests/test_maths/test_stats/test_test.py

tests/test_maths/test_stats/test_util.py

tests/test_maths/test_svd.py

tests/test_maths/test_unifrac/__init__.py

tests/test_maths/test_unifrac/test_fast_tree.py

tests/test_maths/test_unifrac/test_fast_unifrac.py

tests/test_motif/__init__.py

tests/test_motif/test_util.py

tests/test_parse/__init__.py

tests/test_parse/test_aaindex.py

tests/test_parse/test_agilent_microarray.py

tests/test_parse/test_blast.py

tests/test_parse/test_blast_xml.py

tests/test_parse/test_bpseq.py

tests/test_parse/test_cigar.py

tests/test_parse/test_clustal.py

tests/test_parse/test_column.py

tests/test_parse/test_comrna.py

tests/test_parse/test_consan.py

tests/test_parse/test_cove.py

tests/test_parse/test_ct.py

tests/test_parse/test_cut.py

tests/test_parse/test_cutg.py

tests/test_parse/test_dialign.py

tests/test_parse/test_dotur.py

tests/test_parse/test_ebi.py

tests/test_parse/test_fasta.py

tests/test_parse/test_flowgram.py

tests/test_parse/test_flowgram_collection.py

tests/test_parse/test_flowgram_parser.py

tests/test_parse/test_genbank.py

tests/test_parse/test_gff.py

tests/test_parse/test_gibbs.py

tests/test_parse/test_ilm.py

tests/test_parse/test_infernal.py

tests/test_parse/test_locuslink.py

tests/test_parse/test_mage.py

tests/test_parse/test_meme.py

tests/test_parse/test_msms.py

tests/test_parse/test_ncbi_taxonomy.py

tests/test_parse/test_nexus.py

tests/test_parse/test_nupack.py

tests/test_parse/test_phylip.py

tests/test_parse/test_pknotsrg.py

tests/test_parse/test_rdb.py

tests/test_parse/test_record.py

tests/test_parse/test_record_finder.py

tests/test_parse/test_rfam.py

tests/test_parse/test_rna_fold.py

tests/test_parse/test_rnaalifold.py

tests/test_parse/test_rnaforester.py

tests/test_parse/test_rnaview.py

tests/test_parse/test_sprinzl.py

tests/test_parse/test_stride.py

tests/test_parse/test_tree.py

tests/test_parse/test_unigene.py

tests/test_phylo.py

tests/test_recalculation.rst

tests/test_seqsim/__init__.py

tests/test_seqsim/test_analysis.py

tests/test_seqsim/test_birth_death.py

tests/test_seqsim/test_markov.py

tests/test_seqsim/test_microarray.py

tests/test_seqsim/test_microarray_normalize.py

tests/test_seqsim/test_randomization.py

tests/test_seqsim/test_searchpath.py

tests/test_seqsim/test_sequence_generators.py

tests/test_seqsim/test_tree.py

tests/test_seqsim/test_usage.py

tests/test_struct/__init__.py

tests/test_struct/test_annotation.py

tests/test_struct/test_asa.py

tests/test_struct/test_contact.py

tests/test_struct/test_dihedral.py

tests/test_struct/test_knots.py

tests/test_struct/test_manipulation.py

tests/test_struct/test_pairs_util.py

tests/test_struct/test_rna2d.py

tests/test_struct/test_selection.py

tests/test_util/__init__.py

tests/test_util/test_array.py

tests/test_util/test_dict2d.py

tests/test_util/test_misc.py

tests/test_util/test_organizer.py

tests/test_util/test_recode_alignment.py

tests/test_util/test_table.rst

tests/test_util/test_transform.py

tests/test_util/test_trie.py

tests/test_util/test_unit_test.py

tests/timetrial.py

Show diffs side-by-side

added added

removed removed

cogent/parse/binary_sff.py

#!/usr/bin/env python

"""Parser for 454 Flowgram files in native binary format."""

__author__ = 'Kyle Bittinger'

__license__ = 'GPL'

__version__ = "1.5.0"

__credits__ = ['Kyle Bittinger']

__maintainer__ = 'Kyle Bittinger'

__email__ = 'kylebittinger@gmail.com'

__status__ = 'Prototype'

from cStringIO import StringIO

import string

import struct

# Sections were inspired by, but not derived from, several other implementations:

# * BioPython (biopython.org)

# * sff_extract (www.melogen.upv.es/sff_extract)

# * Mothur (mothur.org)

class NamedStruct(struct.Struct):

"""Enhanced Struct class that associates names with each item in the struct.

"""

def __init__(self, format, keys):

"""Create a new NamedStruct with a list of keys.

"""

self.keys = keys

super(NamedStruct, self).__init__(format)

def read_from(self, file):

"""Read the struct from a file object and return the values as a dict.

"""

buff = file.read(self.size)

return self.unpack(buff)

def pack(self, dict_of_vals):

vals = [dict_of_vals[k] for k in self.keys]

return super(NamedStruct, self).pack(*vals)

def unpack(self, buffer):

vals = super(NamedStruct, self).unpack(buffer)

return dict(zip(self.keys, vals))

def seek_pad(file, unit=8):

"""Set a file's position to the next multiple of a given number.

"""

position = file.tell()

rem = position % unit

if rem != 0:

padding = unit - rem

file.seek(padding, 1)

def write_pad(file, unit=8):

"""Write zeros until the file's position is a multiple of the given number.

"""

position = file.tell()

rem = position % unit

if rem != 0:

num_bytes = unit - rem

padding_bytes = '\x00' * num_bytes

file.write(padding_bytes)

common_header_fields = [

'magic_number',

'version',

'index_offset',

'index_length',

'number_of_reads',

'header_length',

'key_length',

'number_of_flows_per_read',

'flowgram_format_code',

]

common_header_struct = NamedStruct('>IIQIIHHHB', common_header_fields)

def parse_common_header(sff_file):

"""Parse a Common Header section from a binary SFF file.

Keys in the resulting dict are identical to those defined in the

Roche documentation.

As a side effect, sets the position of the file object to the end

of the Common Header section.

"""

h = common_header_struct.read_from(sff_file)

h['flow_chars'] = sff_file.read(h['number_of_flows_per_read'])

h['key_sequence'] = sff_file.read(h['key_length'])

seek_pad(sff_file)

return h

100

101

def write_common_header(sff_file, header):

102

"""Write a common header section to a binary SFF file.

103

"""

104

header_bytes = common_header_struct.pack(header)

105

sff_file.write(header_bytes)

106

sff_file.write(header['flow_chars'])

107

sff_file.write(header['key_sequence'])

108

write_pad(sff_file)

109

110

111

common_header_formats = [

112

' Magic Number: 0x%X\n',

113

' Version: %04d\n',

114

' Index Offset: %d\n',

115

' Index Length: %d\n',

116

' # of Reads: %d\n',

117

' Header Length: %d\n',

118

' Key Length: %d\n',

119

' # of Flows: %d\n',

120

' Flowgram Code: %d\n',

121

]

122

123

124

def format_common_header(header):

125

"""Format a dictionary representation of an SFF common header as text.

126

"""

127

out = StringIO()

128

out.write('Common Header:\n')

129

for key, fmt in zip(common_header_fields, common_header_formats):

130

val = header[key]

131

out.write(fmt % val)

132

out.write(' Flow Chars: %s\n' % header['flow_chars'])

133

out.write(' Key Sequence: %s\n' % header['key_sequence'])

134

return out.getvalue()

135

136

137

class UnsupportedSffError(Exception):

138

pass

139

140

141

def validate_common_header(header):

142

"""Validate the Common Header section of a binary SFF file.

143

144

Raises an UnsupportedSffError if the header is not supported.

145

"""

146

supported_values = {

147

'magic_number': 0x2E736666,

148

'version': 1,

149

'flowgram_format_code': 1,

150

}

151

for attr_name, expected_value in supported_values.items():

152

observed_value = header[attr_name]

153

if observed_value != expected_value:

154

raise UnsupportedSffError(

155

'%s not supported. (Expected %s, observed %s)' % (

156

attr_name, expected_value, observed_value))

157

158

159

read_header_fields = [

160

'read_header_length',

161

'name_length',

162

'number_of_bases',

163

'clip_qual_left',

164

'clip_qual_right',

165

'clip_adapter_left',

166

'clip_adapter_right',

167

]

168

169

170

read_header_struct = NamedStruct('>HHIHHHH', read_header_fields)

171

172

173

def parse_read_header(sff_file):

174

"""Parse a Read Header section from a binary SFF file.

175

176

Keys in the resulting dict are identical to those defined in the

177

Roche documentation.

178

179

As a side effect, sets the position of the file object to the end

180

of the Read Header section.

181

"""

182

data = read_header_struct.read_from(sff_file)

183

data['Name'] = sff_file.read(data['name_length'])

184

seek_pad(sff_file)

185

return data

186

187

188

def write_read_header(sff_file, read_header):

189

"""Write a read header section to a binary SFF file.

190

"""

191

header_bytes = read_header_struct.pack(read_header)

192

sff_file.write(header_bytes)

193

sff_file.write(read_header['Name'])

194

write_pad(sff_file)

195

196

197

read_header_formats = [

198

' Read Header Len: %d\n',

199

' Name Length: %d\n',

200

' # of Bases: %d\n',

201

' Clip Qual Left: %d\n',

202

' Clip Qual Right: %d\n',

203

' Clip Adap Left: %d\n',

204

' Clip Adap Right: %d\n',

205

]

206

207

208

def format_read_header(read_header):

209

"""Format a dictionary representation of an SFF read header as text.

210

"""

211

out = StringIO()

212

out.write('\n>%s\n' % read_header['Name'])

213

timestamp, hashchar, region, location = decode_accession(read_header['Name'])

214

out.write(' Run Prefix: R_%d_%02d_%02d_%02d_%02d_%02d_\n' % timestamp)

215

out.write(' Region #: %d\n' % region)

216

out.write(' XY Location: %04d_%04d\n' % location)

217

out.write('\n')

218

for key, fmt in zip(read_header_fields, read_header_formats):

219

val = read_header[key]

220

out.write(fmt % val)

221

return out.getvalue()

222

223

224

def parse_read_data(sff_file, number_of_bases, number_of_flows=400):

225

"""Parse a Read Data section from a binary SFF file.

226

227

Keys in the resulting dict are identical to those defined in the

228

Roche documentation.

229

230

As a side effect, sets the position of the file object to the end

231

of the Read Header section.

232

"""

233

data = {}

234

flow_fmt = '>' + ('H' * number_of_flows)

235

base_fmt = '>' + ('B' * number_of_bases)

236

flow_fmt_size = struct.calcsize(flow_fmt)

237

base_fmt_size = struct.calcsize(base_fmt)

238

239

buff = sff_file.read(flow_fmt_size)

240

data['flowgram_values'] = struct.unpack(flow_fmt, buff)

241

242

buff = sff_file.read(base_fmt_size)

243

data['flow_index_per_base'] = struct.unpack(base_fmt, buff)

244

245

data['Bases'] = sff_file.read(number_of_bases)

246

247

buff = sff_file.read(base_fmt_size)

248

data['quality_scores'] = struct.unpack(base_fmt, buff)

249

250

seek_pad(sff_file)

251

return data

252

253

254

def write_read_data(sff_file, read_data):

255

"""Write a read data section to a binary SFF file.

256

"""

257

number_of_flows = len(read_data['flowgram_values'])

258

number_of_bases = len(read_data['quality_scores'])

259

flow_fmt = '>' + ('H' * number_of_flows)

260

base_fmt = '>' + ('B' * number_of_bases)

261

262

flow_bytes = struct.pack(flow_fmt, *read_data['flowgram_values'])

263

sff_file.write(flow_bytes)

264

265

index_bytes = struct.pack(base_fmt, *read_data['flow_index_per_base'])

266

sff_file.write(index_bytes)

267

268

sff_file.write(read_data['Bases'])

269

270

qual_bytes = struct.pack(base_fmt, *read_data['quality_scores'])

271

sff_file.write(qual_bytes)

272

273

write_pad(sff_file)

274

275

276

def format_read_data(read_data, read_header):

277

"""Format a dictionary representation of an SFF read data as text.

278

279

The read data is expected to be in native flowgram format.

280

"""

281

out = StringIO()

282

out.write('\n')

283

284

out.write('Flowgram:')

285

for x in read_data['flowgram_values']:

286

out.write('\t%01.2f' % (x * 0.01))

287

out.write('\n')

288

289

out.write('Flow Indexes:')

290

current_index = 0

291

for i in read_data['flow_index_per_base']:

292

current_index = current_index + i

293

out.write('\t%d' % current_index)

294

out.write('\n')

295

296

out.write('Bases:\t')

297

# Roche uses 1-based indexing

298

left_idx = read_header['clip_qual_left'] - 1

299

right_idx = read_header['clip_qual_right'] - 1

300

for i, base in enumerate(read_data['Bases']):

301

if (i < left_idx) or (i > right_idx):

302

out.write(base.lower())

303

else:

304

out.write(base.upper())

305

out.write('\n')

306

307

out.write('Quality Scores:')

308

for score in read_data['quality_scores']:

309

out.write('\t%d' % score)

310

out.write('\n')

311

312

return out.getvalue()

313

314

315

def parse_read(sff_file, number_of_flows=400):

316

"""Parse a single read from a binary SFF file.

317

318

Keys in the resulting dict are identical to those defined in the

319

Roche documentation for the Read Header and Read Data sections.

320

321

As a side effect, sets the position of the file object to the end

322

of the Read Data section.

323

"""

324

header_data = parse_read_header(sff_file)

325

read_data = parse_read_data(

326

sff_file, header_data['number_of_bases'], number_of_flows)

327

read_data.update(header_data)

328

return read_data

329

330

331

def write_read(sff_file, read):

332

"""Write a single read to a binary SFF file.

333

"""

334

write_read_header(sff_file, read)

335

write_read_data(sff_file, read)

336

337

338

def format_read(read):

339

"""Format a dictionary representation of an SFF read as text.

340

"""

341

out = StringIO()

342

out.write(format_read_header(read))

343

out.write(format_read_data(read, read))

344

return out.getvalue()

345

346

347

def parse_binary_sff(sff_file, native_flowgram_values=False):

348

"""Parse a binary SFF file, returning the header and a sequence of reads.

349

350

In the binary file, flowgram values are stored as integers, 100

351

times larger than the normalized floating point value. Because

352

the conversion is relatively expensive, we allow the computation

353

to be skipped if the keyword argument native_flowgram_values is

354

True.

355

"""

356

header = parse_common_header(sff_file)

357

number_of_flows = header['number_of_flows_per_read']

358

validate_common_header(header)

359

def get_reads():

360

for i in range(header['number_of_reads']):

361

362

# Skip the index section

363

if sff_file.tell() == header['index_offset']:

364

sff_file.seek(header['index_length'], 1)

365

366

read = parse_read(sff_file, number_of_flows)

367

368

if not native_flowgram_values:

369

read['flowgram_values'] = [x * 0.01 for x in read['flowgram_values']]

370

371

yield read

372

return header, get_reads()

373

374

375

def write_binary_sff(sff_file, header, reads):

376

"""Write a binary SFF file, using provided header and read dicts.

377

"""

378

sff_file.seek(0)

379

sff_file.truncate()

380

write_common_header(sff_file, header)

381

for read in reads:

382

write_read(sff_file, read)

383

384

385

def format_binary_sff(sff_file, output_file=None):

386

"""Write a text version of a binary SFF file to an output file.

387

388

If no output file is provided, an in-memory file-like buffer is

389

used (namely, a StringIO object).

390

"""

391

if output_file is None:

392

output_file = StringIO()

393

header, reads = parse_binary_sff(sff_file, True)

394

output_file.write(format_common_header(header))

395

for read in reads:

396

output_file.write(format_read(read))

397

return output_file

398

399

400

def base36_encode(n):

401

"""Convert a positive integer to a base36 string.

402

403

Following the conventions outlined in the Roche 454 manual, the

404

numbers 0-25 are represented by letters, and the numbers 36-35 are

405

represented by digits.

406

407

Based on the code example at http://en.wikipedia.org/wiki/Base_36

408

"""

409

if n < 0:

410

raise ValueError('Only poitive numbers are supported.')

411

chars = []

412

while n != 0:

413

n, remainder = divmod(n, 36)

414

chars.append(base36_encode.alphabet[remainder])

415

return ''.join(chars)

416

417

418

base36_encode.alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'

419

420

421

def base36_decode(base36_str):

422

"""Convert a base36 string to a positive integer.

423

424

Following the conventions outlined in the Roche 454 manual, the

425

numbers 0-25 are represented by letters, and the numbers 36-35 are

426

represented by digits.

427

"""

428

base36_str = base36_str.translate(base36_decode.translation)

429

return int(base36_str, 36)

430

431

432

base36_decode.translation = string.maketrans(

433

'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789',

434

'0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ',

435

)

436

437

438

def decode_location(location_str):

439

"""Decode a base36-encoded well location, in Roche 454 format.

440

441

Such timestamps are embedded in the final 5 characters of Roche

442

\"universal\" accession numbers.

443

"""

444

return divmod(base36_decode(location_str), 4096)

445

446

447

def decode_timestamp(timestamp_str):

448

"""Decode a base36-encoded timestamp, in Roche 454 format.

449

450

Such timestamps are embedded in the first 6 characters of Roche

451

\"universal\" accession numbers and SFF filenames.

452

"""

453

n = base36_decode(timestamp_str)

454

year, n = divmod(n, 13 * 32 * 24 * 60 * 60)

455

year = year + 2000

456

month, n = divmod(n, 32 * 24 * 60 * 60)

457

day, n = divmod(n, 24 * 60 * 60)

458

hour, n = divmod(n, 60 * 60)

459

minute, second = divmod(n, 60)

460

return year, month, day, hour, minute, second

461

462

463

def decode_accession(accession):

464

"""Decode a Roche 454 \"universal\" accession number.

465

"""

466

assert len(accession) == 14

467

timestamp = decode_timestamp(accession[:6])

468

hashchar = accession[6]

469

region = int(accession[7:9])

470

location = decode_location(accession[9:14])

471

return timestamp, hashchar, region, location

472

473

474

def decode_sff_filename(sff_filename):

475

"""Decode a Roche 454 SFF filename, returning a timestamp and other info.

476

"""

477

assert len(sff_filename) == 13

478

assert sff_filename.endswith('.sff')

479

timestamp = decode_timestamp(sff_filename[:6])

480

hashchar = sff_filename[6]

481

region = int(sff_filename[7:9])

482

return timestamp, hashchar, region

Older »