~ubuntu-branches/ubuntu/karmic/python-docutils/karmic

« back to all changes in this revision

Viewing changes to tools/dev/unicode2rstsubs.py

Committer: Bazaar Package Importer
Author(s): martin f. krafft
Date: 2006-07-10 11:45:05 UTC
mfrom: (2.1.4 edgy)
Revision ID: james.westby@ubuntu.com-20060710114505-otkhqcslevewxmz5

Tags: 0.4-3

http://bugs.debian.org/377580

Added build dependency on python-central (closes: #377580).

files added:
BUGS.txt

PKG-INFO

RELEASE-NOTES.txt

THANKS.txt

debian/patches

debian/patches/00list

debian/patches/01_upstream_04_branch_r4646.dpatch

debian/patches/10_doc_debian_paths.dpatch

debian/patches/11_not_usrbinenv_python.dpatch

debian/patches/12_remove_unnecessary_hashbang.dpatch

debian/patches/13_nodes_namespace.dpatch

debian/patches/14_distutils_setup.dpatch

debian/patches/99_old-diffs.dpatch

debian/rst2html.1

docs/api

docs/api/cmdline-tool.txt

docs/api/publisher.txt

docs/api/runtime-settings.txt

docs/dev

docs/dev/distributing.txt

docs/dev/enthought-plan.txt

docs/dev/enthought-rfp.txt

docs/dev/hacking.txt

docs/dev/policies.txt

docs/dev/pysource.dtd

docs/dev/pysource.txt

docs/dev/release.txt

docs/dev/repository.txt

docs/dev/rst

docs/dev/rst/alternatives.txt

docs/dev/rst/problems.txt

docs/dev/semantics.txt

docs/dev/testing.txt

docs/dev/todo.txt

docs/dev/website.txt

docs/howto

docs/howto/html-stylesheets.txt

docs/howto/i18n.txt

docs/howto/rst-directives.txt

docs/howto/rst-roles.txt

docs/index.txt

docs/peps

docs/peps/pep-0256.txt

docs/peps/pep-0257.txt

docs/peps/pep-0258.txt

docs/peps/pep-0287.txt

docs/ref

docs/ref/doctree.txt

docs/ref/docutils.dtd

docs/ref/rst

docs/ref/rst/definitions.txt

docs/ref/rst/directives.txt

docs/ref/rst/introduction.txt

docs/ref/rst/restructuredtext.txt

docs/ref/rst/roles.txt

docs/ref/soextblx.dtd

docs/ref/transforms.txt

docs/user

docs/user/Makefile.docutils-update

docs/user/config.txt

docs/user/emacs.txt

docs/user/images

docs/user/images/big-black.png

docs/user/images/big-white.png

docs/user/images/default.png

docs/user/images/happy_monkey.png

docs/user/images/medium-black.png

docs/user/images/medium-white.png

docs/user/images/rsp-all.png

docs/user/images/rsp-breaks.png

docs/user/images/rsp-covers.png

docs/user/images/rsp-cuts.png

docs/user/images/rsp-empty.png

docs/user/images/rsp-objects.png

docs/user/images/rsp.svg

docs/user/images/s5-files.png

docs/user/images/s5-files.svg

docs/user/images/small-black.png

docs/user/images/small-white.png

docs/user/latex.txt

docs/user/links.txt

docs/user/mailing-lists.txt

docs/user/rst

docs/user/rst/cheatsheet.txt

docs/user/rst/demo.txt

docs/user/rst/images

docs/user/rst/images/ball1.gif

docs/user/rst/images/biohazard.png

docs/user/rst/images/title.png

docs/user/rst/quickref.html

docs/user/rst/quickstart.txt

docs/user/slide-shows.txt

docs/user/tools.txt

docutils/docutils.conf

docutils/languages/ca.py

docutils/languages/fi.py

docutils/languages/ja.py

docutils/languages/nl.py

docutils/languages/zh_cn.py

docutils/languages/zh_tw.py

docutils/parsers/null.py

docutils/parsers/rst/directives/tables.py

docutils/parsers/rst/include

docutils/parsers/rst/include/README.txt

docutils/parsers/rst/include/isoamsa.txt

docutils/parsers/rst/include/isoamsb.txt

docutils/parsers/rst/include/isoamsc.txt

docutils/parsers/rst/include/isoamsn.txt

docutils/parsers/rst/include/isoamso.txt

docutils/parsers/rst/include/isoamsr.txt

docutils/parsers/rst/include/isobox.txt

docutils/parsers/rst/include/isocyr1.txt

docutils/parsers/rst/include/isocyr2.txt

docutils/parsers/rst/include/isodia.txt

docutils/parsers/rst/include/isogrk1.txt

docutils/parsers/rst/include/isogrk2.txt

docutils/parsers/rst/include/isogrk3.txt

docutils/parsers/rst/include/isogrk4-wide.txt

docutils/parsers/rst/include/isogrk4.txt

docutils/parsers/rst/include/isolat1.txt

docutils/parsers/rst/include/isolat2.txt

docutils/parsers/rst/include/isomfrk-wide.txt

docutils/parsers/rst/include/isomfrk.txt

docutils/parsers/rst/include/isomopf-wide.txt

docutils/parsers/rst/include/isomopf.txt

docutils/parsers/rst/include/isomscr-wide.txt

docutils/parsers/rst/include/isomscr.txt

docutils/parsers/rst/include/isonum.txt

docutils/parsers/rst/include/isopub.txt

docutils/parsers/rst/include/isotech.txt

docutils/parsers/rst/include/mmlalias.txt

docutils/parsers/rst/include/mmlextra-wide.txt

docutils/parsers/rst/include/mmlextra.txt

docutils/parsers/rst/include/s5defs.txt

docutils/parsers/rst/include/xhtml1-lat1.txt

docutils/parsers/rst/include/xhtml1-special.txt

docutils/parsers/rst/include/xhtml1-symbol.txt

docutils/parsers/rst/languages/ca.py

docutils/parsers/rst/languages/fi.py

docutils/parsers/rst/languages/ja.py

docutils/parsers/rst/languages/nl.py

docutils/parsers/rst/languages/zh_cn.py

docutils/parsers/rst/languages/zh_tw.py

docutils/readers/doctree.py

docutils/transforms/writer_aux.py

docutils/writers/html4css1

docutils/writers/html4css1/__init__.py

docutils/writers/html4css1/html4css1.css

docutils/writers/latex2e

docutils/writers/latex2e/__init__.py

docutils/writers/latex2e/latex2e.tex

docutils/writers/newlatex2e

docutils/writers/newlatex2e/__init__.py

docutils/writers/newlatex2e/base.tex

docutils/writers/newlatex2e/unicode_map.py

docutils/writers/null.py

docutils/writers/pep_html

docutils/writers/pep_html/__init__.py

docutils/writers/pep_html/pep.css

docutils/writers/pep_html/template.txt

docutils/writers/s5_html

docutils/writers/s5_html/__init__.py

docutils/writers/s5_html/themes

docutils/writers/s5_html/themes/README.txt

docutils/writers/s5_html/themes/big-black

docutils/writers/s5_html/themes/big-black/__base__

docutils/writers/s5_html/themes/big-black/framing.css

docutils/writers/s5_html/themes/big-black/pretty.css

docutils/writers/s5_html/themes/big-white

docutils/writers/s5_html/themes/big-white/framing.css

docutils/writers/s5_html/themes/big-white/pretty.css

docutils/writers/s5_html/themes/default

docutils/writers/s5_html/themes/default/blank.gif

docutils/writers/s5_html/themes/default/framing.css

docutils/writers/s5_html/themes/default/iepngfix.htc

docutils/writers/s5_html/themes/default/opera.css

docutils/writers/s5_html/themes/default/outline.css

docutils/writers/s5_html/themes/default/pretty.css

docutils/writers/s5_html/themes/default/print.css

docutils/writers/s5_html/themes/default/s5-core.css

docutils/writers/s5_html/themes/default/slides.css

docutils/writers/s5_html/themes/default/slides.js

docutils/writers/s5_html/themes/medium-black

docutils/writers/s5_html/themes/medium-black/__base__

docutils/writers/s5_html/themes/medium-black/pretty.css

docutils/writers/s5_html/themes/medium-white

docutils/writers/s5_html/themes/medium-white/framing.css

docutils/writers/s5_html/themes/medium-white/pretty.css

docutils/writers/s5_html/themes/small-black

docutils/writers/s5_html/themes/small-black/__base__

docutils/writers/s5_html/themes/small-black/pretty.css

docutils/writers/s5_html/themes/small-white

docutils/writers/s5_html/themes/small-white/framing.css

docutils/writers/s5_html/themes/small-white/pretty.css

test/coverage.sh

test/data/csv_data.txt

test/data/csv_dep.txt

test/data/dependencies.txt

test/data/include.txt

test/data/raw.txt

test/data/stylesheet.txt

test/docutils_difflib.py

test/functional

test/functional/README.txt

test/functional/expected

test/functional/expected/compact_lists.html

test/functional/expected/dangerous.html

test/functional/expected/field_name_limit.html

test/functional/expected/latex_docinfo.tex

test/functional/expected/misc_rst_html4css1.html

test/functional/expected/pep_html.html

test/functional/expected/standalone_rst_html4css1.html

test/functional/expected/standalone_rst_latex.tex

test/functional/expected/standalone_rst_pseudoxml.txt

test/functional/expected/standalone_rst_s5_html_1.html

test/functional/expected/standalone_rst_s5_html_2.html

test/functional/expected/ui

test/functional/expected/ui/default

test/functional/expected/ui/default/blank.gif

test/functional/expected/ui/default/framing.css

test/functional/expected/ui/default/iepngfix.htc

test/functional/expected/ui/default/opera.css

test/functional/expected/ui/default/outline.css

test/functional/expected/ui/default/pretty.css

test/functional/expected/ui/default/print.css

test/functional/expected/ui/default/s5-core.css

test/functional/expected/ui/default/slides.css

test/functional/expected/ui/default/slides.js

test/functional/expected/ui/small-black

test/functional/expected/ui/small-black/blank.gif

test/functional/expected/ui/small-black/framing.css

test/functional/expected/ui/small-black/iepngfix.htc

test/functional/expected/ui/small-black/opera.css

test/functional/expected/ui/small-black/outline.css

test/functional/expected/ui/small-black/pretty.css

test/functional/expected/ui/small-black/print.css

test/functional/expected/ui/small-black/s5-core.css

test/functional/expected/ui/small-black/slides.css

test/functional/expected/ui/small-black/slides.js

test/functional/input

test/functional/input/compact_lists.txt

test/functional/input/dangerous.txt

test/functional/input/data

test/functional/input/data/custom_roles.txt

test/functional/input/data/errors.txt

test/functional/input/data/header_footer.txt

test/functional/input/data/latex.txt

test/functional/input/data/list_table.txt

test/functional/input/data/nonalphanumeric.txt

test/functional/input/data/standard.txt

test/functional/input/data/table_colspan.txt

test/functional/input/data/table_complex.txt

test/functional/input/data/table_rowspan.txt

test/functional/input/data/unicode.txt

test/functional/input/field_list.txt

test/functional/input/latex_docinfo.txt

test/functional/input/pep_html.txt

test/functional/input/simple.txt

test/functional/input/standalone_rst_html4css1.txt

test/functional/input/standalone_rst_latex.txt

test/functional/input/standalone_rst_newlatex.txt

test/functional/input/standalone_rst_pseudoxml.txt

test/functional/input/standalone_rst_s5_html.txt

test/functional/output

test/functional/output/README.txt

test/functional/tests

test/functional/tests/_default.py

test/functional/tests/_standalone_rst_defaults.py

test/functional/tests/compact_lists.py

test/functional/tests/dangerous.py

test/functional/tests/field_name_limit.py

test/functional/tests/latex_docinfo.py

test/functional/tests/misc_rst_html4css1.py

test/functional/tests/pep_html.py

test/functional/tests/standalone_rst_html4css1.py

test/functional/tests/standalone_rst_latex.py

test/functional/tests/standalone_rst_pseudoxml.py

test/functional/tests/standalone_rst_s5_html_1.py

test/functional/tests/standalone_rst_s5_html_2.py

test/test_dependencies.py

test/test_functional.py

test/test_io.py

test/test_parsers/test_rst/__init__.py

test/test_parsers/test_rst/test_directives/__init__.py

test/test_parsers/test_rst/test_directives/empty.txt

test/test_parsers/test_rst/test_directives/include 11.txt

test/test_parsers/test_rst/test_directives/test_compound.py

test/test_parsers/test_rst/test_directives/test_container.py

test/test_parsers/test_rst/test_directives/test_date.py

test/test_parsers/test_rst/test_directives/test_decorations.py

test/test_parsers/test_rst/test_directives/test_default_role.py

test/test_parsers/test_rst/test_directives/test_line_blocks.py

test/test_parsers/test_rst/test_directives/test_sidebars.py

test/test_parsers/test_rst/test_directives/test_tables.py

test/test_parsers/test_rst/test_directives/test_target_notes.py

test/test_parsers/test_rst/test_directives/utf-16.csv

test/test_parsers/test_rst/test_east_asian_text.py

test/test_parsers/test_rst/test_line_blocks.py

test/test_publisher.py

test/test_readers/test_pep/__init__.py

test/test_transforms/test___init__.py

test/test_transforms/test_expose_internals.py

test/test_transforms/test_strip_comments.py

test/test_transforms/test_target_notes.py

test/test_transforms/test_transitions.py

test/test_transforms/test_writer_aux.py

test/test_writers/test_html4css1.py

test/test_writers/test_html4css1_misc.py

test/test_writers/test_null.py

tools/dev

tools/dev/README.txt

tools/dev/create_unimap.py

tools/dev/profile_docutils.py

tools/dev/unicode2rstsubs.py

tools/editors/emacs/README.txt

tools/editors/emacs/docutils.conf

tools/editors/emacs/rst.el

tools/editors/emacs/tests

tools/editors/emacs/tests/Makefile

tools/editors/emacs/tests/README

tools/editors/emacs/tests/tests-adjust-section.el

tools/editors/emacs/tests/tests-basic.el

tools/editors/emacs/tests/tests-runner.el

tools/rst2newlatex.py

tools/rst2pseudoxml.py

tools/rst2s5.py

tools/rst2xml.py

tools/rstpep2html.py

files removed:
COPYING-orig.txt

MANIFEST.in

debian/NEWS.Debian

debian/python-difflib.copyright

debian/python-textwrap.copyright

debian/python2.1-difflib.copyright

debian/python2.1-textwrap.copyright

debian/python2.2-docutils.copyright

debian/python2.2-textwrap.copyright

debian/python2.3-docutils.copyright

docs/config.txt

docs/latex.txt

docs/rst

docs/rst/quickref.html

docs/rst/quickstart.txt

docs/tools.txt

docutils/writers/html4css1.py

docutils/writers/latex2e.py

docutils/writers/pep_html.py

extras/difflib.py

man/pep2html.1

man/rest2html.1

man/rest2latex.1

man/rest2xml.1

roman.py

setup_difflib.py

setup_roman.py

setup_textwrap.py

spec

spec/doctree.txt

spec/docutils.dtd

spec/howto

spec/howto/i18n.txt

spec/howto/rst-directives.txt

spec/howto/rst-roles.txt

spec/notes.txt

spec/pep-0256.txt

spec/pep-0257.txt

spec/pep-0258.txt

spec/pep-0287.txt

spec/pysource.dtd

spec/pysource.txt

spec/rst

spec/rst/alternatives.txt

spec/rst/directives.txt

spec/rst/interpreted.txt

spec/rst/introduction.txt

spec/rst/problems.txt

spec/rst/reStructuredText.txt

spec/semantics.txt

spec/transforms.txt

test/difflib.py

test/test_pep

test/test_pep/__init__.py

test/test_pep/test_inline_markup.py

test/test_pep/test_rfc2822.py

test/test_rst

test/test_rst/__init__.py

test/test_rst/test_SimpleTableParser.py

test/test_rst/test_TableParser.py

test/test_rst/test_block_quotes.py

test/test_rst/test_bullet_lists.py

test/test_rst/test_citations.py

test/test_rst/test_comments.py

test/test_rst/test_definition_lists.py

test/test_rst/test_directives

test/test_rst/test_directives/__init__.py

test/test_rst/test_directives/test_admonitions.py

test/test_rst/test_directives/test_contents.py

test/test_rst/test_directives/test_figures.py

test/test_rst/test_directives/test_images.py

test/test_rst/test_directives/test_meta.py

test/test_rst/test_directives/test_test_directives.py

test/test_rst/test_directives/test_unknown.py

test/test_rst/test_doctest_blocks.py

test/test_rst/test_enumerated_lists.py

test/test_rst/test_field_lists.py

test/test_rst/test_footnotes.py

test/test_rst/test_functions.py

test/test_rst/test_inline_markup.py

test/test_rst/test_literal_blocks.py

test/test_rst/test_option_lists.py

test/test_rst/test_outdenting.py

test/test_rst/test_paragraphs.py

test/test_rst/test_section_headers.py

test/test_rst/test_substitutions.py

test/test_rst/test_tables.py

test/test_rst/test_targets.py

test/test_rst/test_transitions.py

test/test_transforms/test_final_checks.py

test/test_writers/test_html4css1

test/test_writers/test_html4css1/__init__.py

test/test_writers/test_html4css1/test_parts.py

tools/default.css

tools/docutils-xml.py

tools/editors/emacs/restructuredtext.el

tools/editors/emacs/rst-html.el

tools/editors/emacs/rst-mode.el

tools/html.py

tools/pep-html-template

tools/pep.py

tools/pep2html.py

tools/publish.py

tools/stylesheets

tools/stylesheets/default.css

tools/stylesheets/pep.css

tools/stylesheets/style.tex

tools/test.txt

tools/unicode2rstsubs.py

files modified:
COPYING.txt

FAQ.txt

HISTORY.txt

README.txt

debian/README.Debian

debian/changelog

debian/compat

debian/control

debian/emacs-start.el

debian/python-docutils.copyright

debian/python-docutils.manpages

debian/python-roman.copyright

debian/rules

docutils/__init__.py

docutils/core.py

docutils/examples.py

docutils/frontend.py

docutils/io.py

docutils/languages/__init__.py

docutils/languages/af.py

docutils/languages/cs.py

docutils/languages/de.py

docutils/languages/en.py

docutils/languages/eo.py

docutils/languages/es.py

docutils/languages/fr.py

docutils/languages/it.py

docutils/languages/pt_br.py

docutils/languages/ru.py

docutils/languages/sk.py

docutils/languages/sv.py

docutils/nodes.py

docutils/parsers/__init__.py

docutils/parsers/rst/__init__.py

docutils/parsers/rst/directives/__init__.py

docutils/parsers/rst/directives/admonitions.py

docutils/parsers/rst/directives/body.py

docutils/parsers/rst/directives/html.py

docutils/parsers/rst/directives/images.py

docutils/parsers/rst/directives/misc.py

docutils/parsers/rst/directives/parts.py

docutils/parsers/rst/directives/references.py

docutils/parsers/rst/languages/__init__.py

docutils/parsers/rst/languages/af.py

docutils/parsers/rst/languages/cs.py

docutils/parsers/rst/languages/de.py

docutils/parsers/rst/languages/en.py

docutils/parsers/rst/languages/eo.py

docutils/parsers/rst/languages/es.py

docutils/parsers/rst/languages/fr.py

docutils/parsers/rst/languages/it.py

docutils/parsers/rst/languages/pt_br.py

docutils/parsers/rst/languages/ru.py

docutils/parsers/rst/languages/sk.py

docutils/parsers/rst/languages/sv.py

docutils/parsers/rst/roles.py

docutils/parsers/rst/states.py

docutils/parsers/rst/tableparser.py

docutils/readers/__init__.py

docutils/readers/pep.py

docutils/readers/python/__init__.py

docutils/readers/python/moduleparser.py

docutils/readers/python/pynodes.py

docutils/readers/standalone.py

docutils/statemachine.py

docutils/transforms/__init__.py

docutils/transforms/components.py

docutils/transforms/frontmatter.py

docutils/transforms/misc.py

docutils/transforms/parts.py

docutils/transforms/peps.py

docutils/transforms/references.py

docutils/transforms/universal.py

docutils/urischemes.py

docutils/utils.py

docutils/writers/__init__.py

docutils/writers/docutils_xml.py

docutils/writers/pseudoxml.py

extras/optparse.py

extras/textwrap.py

install.py

setup.py

test/DocutilsTestSupport.py

test/alltests.py

test/data/config_1.txt

test/data/config_2.txt

test/docutils.conf

test/package_unittest.py

test/test_language.py *

test/test_nodes.py

test/test_parsers/__init__.py

test/test_parsers/test_rst/test_SimpleTableParser.py *

test/test_parsers/test_rst/test_TableParser.py

test/test_parsers/test_rst/test_block_quotes.py

test/test_parsers/test_rst/test_bullet_lists.py

test/test_parsers/test_rst/test_citations.py

test/test_parsers/test_rst/test_comments.py

test/test_parsers/test_rst/test_definition_lists.py

test/test_parsers/test_rst/test_directives/test_admonitions.py

test/test_parsers/test_rst/test_directives/test_contents.py

test/test_parsers/test_rst/test_directives/test_figures.py

test/test_parsers/test_rst/test_directives/test_images.py

test/test_parsers/test_rst/test_directives/test_include.py

test/test_parsers/test_rst/test_directives/test_meta.py

test/test_parsers/test_rst/test_directives/test_raw.py

test/test_parsers/test_rst/test_directives/test_replace.py

test/test_parsers/test_rst/test_directives/test_role.py

test/test_parsers/test_rst/test_directives/test_rubrics.py

test/test_parsers/test_rst/test_directives/test_test_directives.py

test/test_parsers/test_rst/test_directives/test_topics.py *

test/test_parsers/test_rst/test_directives/test_unicode.py

test/test_parsers/test_rst/test_directives/test_unknown.py

test/test_parsers/test_rst/test_doctest_blocks.py

test/test_parsers/test_rst/test_enumerated_lists.py

test/test_parsers/test_rst/test_field_lists.py

test/test_parsers/test_rst/test_footnotes.py

test/test_parsers/test_rst/test_functions.py

test/test_parsers/test_rst/test_inline_markup.py

test/test_parsers/test_rst/test_interpreted.py *

test/test_parsers/test_rst/test_literal_blocks.py

test/test_parsers/test_rst/test_option_lists.py

test/test_parsers/test_rst/test_outdenting.py

test/test_parsers/test_rst/test_paragraphs.py

test/test_parsers/test_rst/test_section_headers.py

test/test_parsers/test_rst/test_substitutions.py

test/test_parsers/test_rst/test_tables.py

test/test_parsers/test_rst/test_targets.py

test/test_parsers/test_rst/test_transitions.py

test/test_readers/__init__.py

test/test_readers/test_pep/test_inline_markup.py *

test/test_readers/test_pep/test_rfc2822.py *

test/test_readers/test_python/__init__.py

test/test_readers/test_python/test_functions.py *

test/test_readers/test_python/test_parser.py *

test/test_readers/test_python/test_token_parser.py *

test/test_settings.py

test/test_statemachine.py

test/test_transforms/__init__.py

test/test_transforms/test_class.py

test/test_transforms/test_contents.py

test/test_transforms/test_docinfo.py

test/test_transforms/test_doctitle.py

test/test_transforms/test_filter.py *

test/test_transforms/test_footnotes.py

test/test_transforms/test_hyperlinks.py

test/test_transforms/test_messages.py

test/test_transforms/test_peps.py *

test/test_transforms/test_sectnum.py *

test/test_transforms/test_substitutions.py

test/test_utils.py

test/test_viewlist.py *

test/test_writers/__init__.py

test/test_writers/test_docutils_xml.py *

test/test_writers/test_latex2e.py *

test/test_writers/test_pseudoxml.py *

tools/buildhtml.py

tools/docutils.conf

tools/editors/README.txt

tools/quicktest.py

tools/rst2html.py

tools/rst2latex.py

Show diffs side-by-side

added added

removed removed

tools/dev/unicode2rstsubs.py

#! /usr/bin/env python

# Author: David Goodger

# Contact: goodger@python.org

# Revision: $Revision: 3532 $

# Date: $Date: 2005-06-20 20:59:29 +0200 (Mon, 20 Jun 2005) $

# Copyright: This program has been placed in the public domain.

"""

unicode2subfiles.py -- produce character entity files (reSructuredText

substitutions) from the W3C master unicode.xml file.

This program extracts character entity and entity set information from a

unicode.xml file and produces multiple reStructuredText files (in the current

directory) containing substitutions. Entity sets are from ISO 8879 & ISO

9573-13 (combined), MathML, and HTML4. One or two files are produced for each

entity set; a second file with a "-wide.txt" suffix is produced if there are

wide-Unicode characters in the set.

The input file, unicode.xml, is maintained as part of the MathML 2

Recommentation XML source, and is available from

<http://www.w3.org/2003/entities/xml/>.

"""

import sys

import os

import optparse

import re

from xml.parsers.expat import ParserCreate

usage_msg = """Usage: %s [unicode.xml]"""

def usage(prog, status=0, msg=None):

print >>sys.stderr, usage_msg % prog

if msg:

print >>sys.stderr, msg

sys.exit(status)

def main(argv=None):

if argv is None:

argv = sys.argv

if len(argv) == 2:

inpath = argv[1]

elif len(argv) > 2:

usage(argv[0], 2,

'Too many arguments (%s): only 1 expected.' % (len(argv) - 1))

else:

inpath = 'unicode.xml'

if not os.path.isfile(inpath):

usage(argv[0], 1, 'No such file: "%s".' % inpath)

infile = open(inpath)

process(infile)

def process(infile):

grouper = CharacterEntitySetExtractor(infile)

grouper.group()

grouper.write_sets()

class CharacterEntitySetExtractor:

"""

Extracts character entity information from unicode.xml file, groups it by

entity set, and writes out reStructuredText substitution files.

"""

unwanted_entity_sets = ['stix', # unknown, buggy set

'predefined']

header = """\

.. This data file has been placed in the public domain.

.. Derived from the Unicode character mappings available from

<http://www.w3.org/2003/entities/xml/>.

Processed by unicode2rstsubs.py, part of Docutils:

<http://docutils.sourceforge.net>.

"""

def __init__(self, infile):

self.infile = infile

"""Input unicode.xml file."""

self.parser = self.setup_parser()

"""XML parser."""

self.elements = []

"""Stack of element names. Last is current element."""

self.sets = {}

"""Mapping of charent set name to set dict."""

self.charid = None

"""Current character's "id" attribute value."""

self.descriptions = {}

"""Mapping of character ID to description."""

def setup_parser(self):

parser = ParserCreate()

100

parser.StartElementHandler = self.StartElementHandler

101

parser.EndElementHandler = self.EndElementHandler

102

parser.CharacterDataHandler = self.CharacterDataHandler

103

return parser

104

105

def group(self):

106

self.parser.ParseFile(self.infile)

107

108

def StartElementHandler(self, name, attributes):

109

self.elements.append(name)

110

handler = name + '_start'

111

if hasattr(self, handler):

112

getattr(self, handler)(name, attributes)

113

114

def EndElementHandler(self, name):

115

assert self.elements[-1] == name, \

116

'unknown end-tag %r (%r)' % (name, self.element)

117

self.elements.pop()

118

handler = name + '_end'

119

if hasattr(self, handler):

120

getattr(self, handler)(name)

121

122

def CharacterDataHandler(self, data):

123

handler = self.elements[-1] + '_data'

124

if hasattr(self, handler):

125

getattr(self, handler)(data)

126

127

def character_start(self, name, attributes):

128

self.charid = attributes['id']

129

130

def entity_start(self, name, attributes):

131

set = self.entity_set_name(attributes['set'])

132

if not set:

133

return

134

if not self.sets.has_key(set):

135

print 'bad set: %r' % set

136

return

137

entity = attributes['id']

138

assert (not self.sets[set].has_key(entity)

139

or self.sets[set][entity] == self.charid), \

140

('sets[%r][%r] == %r (!= %r)'

141

% (set, entity, self.sets[set][entity], self.charid))

142

self.sets[set][entity] = self.charid

143

144

def description_data(self, data):

145

self.descriptions.setdefault(self.charid, '')

146

self.descriptions[self.charid] += data

147

148

entity_set_name_pat = re.compile(r'[0-9-]*(.+)$')

149

"""Pattern to strip ISO numbers off the beginning of set names."""

150

151

def entity_set_name(self, name):

152

"""

153

Return lowcased and standard-number-free entity set name.

154

Return ``None`` for unwanted entity sets.

155

"""

156

match = self.entity_set_name_pat.match(name)

157

name = match.group(1).lower()

158

if name in self.unwanted_entity_sets:

159

return None

160

self.sets.setdefault(name, {})

161

return name

162

163

def write_sets(self):

164

sets = self.sets.keys()

165

sets.sort()

166

for set_name in sets:

167

self.write_set(set_name)

168

169

def write_set(self, set_name, wide=None):

170

if wide:

171

outname = set_name + '-wide.txt'

172

else:

173

outname = set_name + '.txt'

174

outfile = open(outname, 'w')

175

print 'writing file "%s"' % outname

176

print >>outfile, self.header

177

set = self.sets[set_name]

178

entities = [(e.lower(), e) for e in set.keys()]

179

entities.sort()

180

longest = 0

181

for _, entity_name in entities:

182

longest = max(longest, len(entity_name))

183

has_wide = None

184

for _, entity_name in entities:

185

has_wide = self.write_entity(

186

set, set_name, entity_name, outfile, longest, wide) or has_wide

187

if has_wide and not wide:

188

self.write_set(set_name, 1)

189

190

def write_entity(self, set, set_name, entity_name, outfile, longest,

191

wide=None):

192

charid = set[entity_name]

193

if not wide:

194

for code in charid[1:].split('-'):

195

if int(code, 16) > 0xFFFF:

196

return 1 # wide-Unicode character

197

codes = ' '.join(['U+%s' % code for code in charid[1:].split('-')])

198

print >>outfile, ('.. %-*s unicode:: %s .. %s'

199

% (longest + 2, '|' + entity_name + '|',

200

codes, self.descriptions[charid]))

201

202

203

if __name__ == '__main__':

204

sys.exit(main())

Older »