~ubuntu-branches/ubuntu/vivid/golang/vivid

Viewing changes to src/pkg/exp/regexp/syntax/parse.go

Committer: Bazaar Package Importer
Author(s): Ondřej Surý
Date: 2011-08-03 17:04:59 UTC
mfrom: (14.1.2 sid)
Revision ID: james.westby@ubuntu.com-20110803170459-wzd99m3567y80ila

Tags: 1:59-1

http://bugs.debian.org/634270

* Imported Upstream version 59
* Refresh patches to a new release
* Fix FTBFS on ARM (Closes: #634270)
* Update version.bash to work with Debian packaging and not hg
repository

files added:
.pc/010-getgoversion_from_debian_changelog.patch

.pc/010-getgoversion_from_debian_changelog.patch/src

.pc/010-getgoversion_from_debian_changelog.patch/src/version.bash

debian/patches/010-getgoversion_from_debian_changelog.patch

doc/gopher/appenginegopher.jpg

doc/gopher/appenginegophercolor.jpg

doc/gopher/appenginelogo.gif

doc/progs/file_windows.go

misc/dashboard/godashboard/auth.py

src/cmd/godefs/test.sh

src/cmd/godefs/testdata.c

src/cmd/godefs/testdata_darwin_386.golden

src/cmd/godefs/testdata_darwin_amd64.golden

src/cmd/godoc/filesystem.go

src/cmd/godoc/parser.go

src/cmd/gofix/filepath.go

src/cmd/gofix/filepath_test.go

src/cmd/gofix/httpfs.go

src/cmd/gofix/httpfs_test.go

src/cmd/gofix/httpheaders.go

src/cmd/gofix/httpheaders_test.go

src/cmd/gofix/oserrorstring.go

src/cmd/gofix/oserrorstring_test.go

src/cmd/gofix/sorthelpers.go

src/cmd/gofix/sorthelpers_test.go

src/cmd/gofix/sortslice.go

src/cmd/gofix/sortslice_test.go

src/cmd/gofix/stringssplit.go

src/cmd/gofix/stringssplit_test.go

src/pkg/crypto/openpgp/elgamal

src/pkg/crypto/openpgp/elgamal/Makefile

src/pkg/crypto/openpgp/elgamal/elgamal.go

src/pkg/crypto/openpgp/elgamal/elgamal_test.go

src/pkg/csv

src/pkg/csv/Makefile

src/pkg/csv/reader.go

src/pkg/csv/reader_test.go

src/pkg/csv/writer.go

src/pkg/csv/writer_test.go

src/pkg/debug/elf/testdata/go-relocation-test-gcc424-x86-64.obj

src/pkg/debug/elf/testdata/go-relocation-test-gcc441-x86-64.obj

src/pkg/debug/elf/testdata/go-relocation-test-gcc441-x86.obj

src/pkg/exec/lp_plan9.go

src/pkg/exp/regexp

src/pkg/exp/regexp/syntax

src/pkg/exp/regexp/syntax/Makefile

src/pkg/exp/regexp/syntax/compile.go

src/pkg/exp/regexp/syntax/make_perl_groups.pl

src/pkg/exp/regexp/syntax/parse.go

src/pkg/exp/regexp/syntax/parse_test.go

src/pkg/exp/regexp/syntax/perl_groups.go

src/pkg/exp/regexp/syntax/prog.go

src/pkg/exp/regexp/syntax/prog_test.go

src/pkg/exp/regexp/syntax/regexp.go

src/pkg/exp/regexp/syntax/simplify.go

src/pkg/exp/regexp/syntax/simplify_test.go

src/pkg/exp/template

src/pkg/exp/template/Makefile

src/pkg/exp/template/exec.go

src/pkg/exp/template/exec_test.go

src/pkg/exp/template/funcs.go

src/pkg/exp/template/lex.go

src/pkg/exp/template/lex_test.go

src/pkg/exp/template/parse.go

src/pkg/exp/template/parse_test.go

src/pkg/exp/template/set.go

src/pkg/exp/template/set_test.go

src/pkg/go/build/cgotest/cgotest.go

src/pkg/go/build/cmdtest

src/pkg/go/build/cmdtest/main.go

src/pkg/go/build/pkgtest

src/pkg/go/build/pkgtest/pkgtest.go

src/pkg/go/build/pkgtest/sqrt_386.s

src/pkg/go/build/pkgtest/sqrt_amd64.s

src/pkg/go/build/pkgtest/sqrt_arm.s

src/pkg/net/dnsclient_unix.go

src/pkg/net/interface_windows.go

src/pkg/net/lookup_unix.go

src/pkg/net/lookup_windows.go

src/pkg/net/sendfile_windows.go

src/pkg/os/str.go

src/pkg/runtime/386/atomic.c

src/pkg/runtime/amd64/atomic.c

src/pkg/runtime/arm/atomic.c

src/pkg/runtime/sema_test.go

src/pkg/runtime/windows/amd64

src/pkg/runtime/windows/amd64/defs.h

src/pkg/runtime/windows/amd64/rt0.s

src/pkg/runtime/windows/amd64/signal.c

src/pkg/runtime/windows/amd64/sys.s

src/pkg/syscall/asm_windows_amd64.s

src/pkg/syscall/bpf_bsd.go

src/pkg/syscall/lsf_linux.go

src/pkg/syscall/sockcmsg_unix.go

src/pkg/syscall/syscall_windows_amd64.go

src/pkg/syscall/zerrors_windows.go

src/pkg/syscall/zerrors_windows_amd64.go

src/pkg/syscall/zsyscall_windows_amd64.go

src/pkg/syscall/zsysnum_windows_amd64.go

src/pkg/syscall/ztypes_windows.go

src/pkg/syscall/ztypes_windows_amd64.go

src/pkg/time/sys_plan9.go

src/pkg/time/sys_posix.go

src/pkg/time/zoneinfo_plan9.go

src/pkg/time/zoneinfo_posix.go

src/pkg/xml/atom_test.go

src/pkg/xml/marshal.go

src/pkg/xml/marshal_test.go

test/fixedbugs/bug345.dir

test/fixedbugs/bug345.dir/io.go

test/fixedbugs/bug345.dir/main.go

test/fixedbugs/bug345.go

test/fixedbugs/bug346.go

test/goprint.go

test/goto.go

test/sizeof.go

files removed:
.pc/010-fix_arm_ftbfs.patch

.pc/010-fix_arm_ftbfs.patch/src

.pc/010-fix_arm_ftbfs.patch/src/cmd

.pc/010-fix_arm_ftbfs.patch/src/cmd/5g

.pc/010-fix_arm_ftbfs.patch/src/cmd/5g/reg.c

.pc/010-fix_arm_ftbfs.patch/src/cmd/5l

.pc/010-fix_arm_ftbfs.patch/src/cmd/5l/noop.c

debian/patches/010-fix_arm_ftbfs.patch

doc/prog.sh

misc/nacl

misc/nacl/naclrun

src/cmd/8a/l.s

src/cmd/goinstall/parse.go

src/cmd/goinstall/path.go

src/cmd/goinstall/syslist_test.go

src/pkg/debug/elf/testdata/go-relocation-test-gcc424-x86-64.o

src/pkg/debug/elf/testdata/go-relocation-test-gcc441-x86-64.o

src/pkg/debug/elf/testdata/go-relocation-test-gcc441-x86.o

src/pkg/go/build/cgotest/file.go

src/pkg/net/lookup.go

src/pkg/net/resolv_windows.go

files modified:
.pc/001-allow_IPv4_on_IPv6_sockets.patch/src/pkg/net/sock.go

.pc/005-goinstall_dont_call_home_by_default.patch/src/cmd/goinstall/doc.go

.pc/005-goinstall_dont_call_home_by_default.patch/src/cmd/goinstall/main.go

.pc/006-fix_kfreebsd_build.patch/src/cmd/cov/Makefile

.pc/006-fix_kfreebsd_build.patch/src/cmd/prof/Makefile

.pc/006-fix_kfreebsd_build.patch/src/pkg/net/Makefile

.pc/006-fix_kfreebsd_build.patch/src/pkg/syscall/mkerrors.sh

.pc/007-use_native_dynamic_linker_on_kfreebsd.patch/src/cmd/6l/asm.c

.pc/007-use_native_dynamic_linker_on_kfreebsd.patch/src/cmd/8l/asm.c

.pc/008-emacs-mode-key-literal-indent.patch/misc/emacs/go-mode.el

.pc/009-goinstall_can_install_to_GOPATH.patch/src/cmd/goinstall/main.go

.pc/applied-patches

AUTHORS

CONTRIBUTORS

debian/changelog

debian/control

debian/patches/005-goinstall_dont_call_home_by_default.patch

debian/patches/006-fix_kfreebsd_build.patch

debian/patches/009-goinstall_can_install_to_GOPATH.patch

debian/patches/series

doc/devel/release.html

doc/devel/weekly.html

doc/effective_go.html

doc/go_mem.html

doc/go_spec.html

doc/go_tutorial.html

doc/go_tutorial.txt

doc/htmlgen.go

doc/install.html

doc/makehtml

doc/progs/run

doc/progs/sort.go

doc/progs/sortmain.go

include/u.h

lib/codereview/codereview.py

misc/dashboard/builder/http.go

misc/dashboard/builder/main.go

misc/dashboard/builder/package.go

misc/dashboard/godashboard/gobuild.py

misc/dashboard/godashboard/index.yaml

misc/dashboard/godashboard/package.html

misc/dashboard/godashboard/package.py

misc/dashboard/godashboard/static/style.css

misc/emacs/go-mode.el

misc/vim/ftdetect/gofiletype.vim

src/Make.ccmd

src/Make.clib

src/all-qemu.bash

src/cmd/5c/gc.h

src/cmd/5c/txt.c

src/cmd/5g/ggen.c

src/cmd/5g/gobj.c

src/cmd/5g/gsubr.c

src/cmd/5g/reg.c

src/cmd/5l/asm.c

src/cmd/5l/mkenam

src/cmd/6c/cgen.c

src/cmd/6c/gc.h

src/cmd/6g/ggen.c

src/cmd/6g/gobj.c

src/cmd/6g/gsubr.c

src/cmd/6g/reg.c

src/cmd/6l/asm.c

src/cmd/6l/mkenam

src/cmd/8a/a.h

src/cmd/8a/a.y

src/cmd/8a/lex.c

src/cmd/8c/cgen.c

src/cmd/8c/gc.h

src/cmd/8c/swt.c

src/cmd/8g/ggen.c

src/cmd/8g/gobj.c

src/cmd/8g/gsubr.c

src/cmd/8g/reg.c

src/cmd/8l/asm.c

src/cmd/8l/list.c

src/cmd/8l/mkenam

src/cmd/8l/obj.c

src/cmd/8l/pass.c

src/cmd/8l/span.c

src/cmd/cc/acid.c

src/cmd/cc/bits.c

src/cmd/cc/cc.h

src/cmd/cc/cc.y

src/cmd/cc/com.c

src/cmd/cc/com64.c

src/cmd/cc/dcl.c

src/cmd/cc/dpchk.c

src/cmd/cc/funct.c

src/cmd/cc/godefs.c

src/cmd/cc/lex.c

src/cmd/cc/lexbody

src/cmd/cc/mac.c

src/cmd/cc/macbody

src/cmd/cc/omachcap.c

src/cmd/cc/pgen.c

src/cmd/cc/scon.c

src/cmd/cc/sub.c

src/cmd/cgo/gcc.go

src/cmd/cgo/main.go

src/cmd/cgo/out.go

src/cmd/cgo/util.go

src/cmd/cov/Makefile

src/cmd/ebnflint/ebnflint.go

src/cmd/gc/builtin.c.boot

src/cmd/gc/closure.c

src/cmd/gc/dcl.c

src/cmd/gc/gen.c

src/cmd/gc/go.h

src/cmd/gc/go.y

src/cmd/gc/lex.c

src/cmd/gc/pgen.c

src/cmd/gc/runtime.go

src/cmd/gc/subr.c

src/cmd/gc/typecheck.c

src/cmd/gc/unsafe.c

src/cmd/gc/unsafe.go

src/cmd/gc/walk.c

src/cmd/godefs/Makefile

src/cmd/godoc/Makefile

src/cmd/godoc/codewalk.go

src/cmd/godoc/dirtrees.go

src/cmd/godoc/godoc.go

src/cmd/godoc/index.go

src/cmd/godoc/main.go

src/cmd/godoc/mapping.go

src/cmd/godoc/utils.go

src/cmd/gofix/Makefile

src/cmd/gofix/httpfinalurl.go

src/cmd/gofix/main.go

src/cmd/gofix/osopen.go

src/cmd/gofix/testdata/reflect.decoder.go.out

src/cmd/gofix/testdata/reflect.encoder.go.out

src/cmd/gofix/testdata/reflect.export.go.out

src/cmd/gofix/testdata/reflect.print.go.out

src/cmd/gofix/testdata/reflect.read.go.out

src/cmd/gofix/testdata/reflect.scan.go.out

src/cmd/gofix/testdata/reflect.template.go.in

src/cmd/gofix/testdata/reflect.template.go.out

src/cmd/gofix/testdata/reflect.type.go.out

src/cmd/gofmt/gofmt_test.go

src/cmd/gofmt/rewrite.go

src/cmd/goinstall/Makefile

src/cmd/goinstall/doc.go

src/cmd/goinstall/download.go

src/cmd/goinstall/main.go

src/cmd/goinstall/make.go

src/cmd/gopack/Makefile

src/cmd/gotest/doc.go

src/cmd/gotest/flag.go

src/cmd/gotype/gotype.go

src/cmd/govet/Makefile

src/cmd/govet/govet.go

src/cmd/goyacc/goyacc.go

src/cmd/hgpatch/main.go

src/cmd/ld/data.c

src/cmd/ld/dwarf.c

src/cmd/ld/elf.h

src/cmd/ld/ldelf.c

src/cmd/ld/ldmacho.c

src/cmd/ld/ldpe.c

src/cmd/ld/lib.c

src/cmd/ld/lib.h

src/cmd/ld/pe.c

src/cmd/ld/symtab.c

src/cmd/nm/Makefile

src/cmd/prof/Makefile

src/env.bash

src/lib9/Makefile

src/libmach/8db.c

src/libmach/darwin.c

src/libmach/linux.c

src/pkg/Makefile

src/pkg/archive/tar/reader.go

src/pkg/asn1/asn1.go

src/pkg/asn1/asn1_test.go

src/pkg/asn1/common.go

src/pkg/asn1/marshal.go

src/pkg/asn1/marshal_test.go

src/pkg/big/int.go

src/pkg/big/int_test.go

src/pkg/big/nat.go

src/pkg/big/rat.go

src/pkg/bufio/bufio.go

src/pkg/bufio/bufio_test.go

src/pkg/bytes/buffer.go

src/pkg/bytes/bytes.go

src/pkg/bytes/bytes_test.go

src/pkg/compress/gzip/gunzip.go

src/pkg/compress/lzw/reader_test.go

src/pkg/compress/zlib/reader.go

src/pkg/crypto/aes/cipher.go

src/pkg/crypto/blowfish/cipher.go

src/pkg/crypto/cast5/cast5.go

src/pkg/crypto/dsa/dsa.go

src/pkg/crypto/ocsp/ocsp.go

src/pkg/crypto/openpgp/keys.go

src/pkg/crypto/openpgp/packet/encrypted_key.go

src/pkg/crypto/openpgp/packet/encrypted_key_test.go

src/pkg/crypto/openpgp/packet/one_pass_signature.go

src/pkg/crypto/openpgp/packet/packet.go

src/pkg/crypto/openpgp/packet/private_key.go

src/pkg/crypto/openpgp/packet/private_key_test.go

src/pkg/crypto/openpgp/packet/public_key.go

src/pkg/crypto/openpgp/packet/signature.go

src/pkg/crypto/openpgp/packet/signature_test.go

src/pkg/crypto/openpgp/packet/symmetric_key_encrypted.go

src/pkg/crypto/openpgp/packet/symmetrically_encrypted.go

src/pkg/crypto/openpgp/packet/symmetrically_encrypted_test.go

src/pkg/crypto/openpgp/read.go

src/pkg/crypto/openpgp/read_test.go

src/pkg/crypto/openpgp/write.go

src/pkg/crypto/openpgp/write_test.go

src/pkg/crypto/rand/rand_windows.go

src/pkg/crypto/rsa/pkcs1v15.go

src/pkg/crypto/rsa/rsa.go

src/pkg/crypto/tls/conn.go

src/pkg/crypto/tls/generate_cert.go

src/pkg/crypto/tls/handshake_client.go

src/pkg/crypto/tls/handshake_server.go

src/pkg/crypto/tls/key_agreement.go

src/pkg/crypto/tls/tls.go

src/pkg/crypto/twofish/twofish.go

src/pkg/crypto/x509/pkix/pkix.go

src/pkg/crypto/x509/verify.go

src/pkg/crypto/x509/verify_test.go

src/pkg/crypto/x509/x509.go

src/pkg/crypto/xtea/cipher.go

src/pkg/debug/dwarf/type.go

src/pkg/debug/elf/file.go

src/pkg/debug/elf/file_test.go

src/pkg/debug/proc/proc_linux.go

src/pkg/encoding/pem/pem.go

src/pkg/exec/Makefile

src/pkg/exec/exec.go

src/pkg/exec/exec_test.go

src/pkg/exec/lp_unix.go

src/pkg/exec/lp_windows.go

src/pkg/exp/ogle/cmd.go

src/pkg/flag/flag.go

src/pkg/fmt/doc.go

src/pkg/fmt/fmt_test.go

src/pkg/fmt/format.go

src/pkg/fmt/print.go

src/pkg/fmt/scan.go

src/pkg/fmt/scan_test.go

src/pkg/go/ast/print_test.go

src/pkg/go/build/Makefile

src/pkg/go/build/build.go

src/pkg/go/build/build_test.go

src/pkg/go/build/dir.go

src/pkg/go/build/path.go

src/pkg/go/doc/comment.go

src/pkg/go/doc/doc.go

src/pkg/go/parser/interface.go

src/pkg/go/printer/nodes.go

src/pkg/go/printer/testdata/statements.golden

src/pkg/go/printer/testdata/statements.input

src/pkg/go/scanner/scanner.go

src/pkg/go/scanner/scanner_test.go

src/pkg/go/types/exportdata.go

src/pkg/go/types/gcimporter.go

src/pkg/go/types/testdata/exports.go

src/pkg/gob/codec_test.go

src/pkg/gob/decode.go

src/pkg/gob/decoder.go

src/pkg/gob/doc.go

src/pkg/gob/encode.go

src/pkg/gob/encoder.go

src/pkg/gob/gobencdec_test.go

src/pkg/gob/type.go

src/pkg/hash/fnv/fnv.go

src/pkg/html/parse.go

src/pkg/html/token_test.go

src/pkg/http/cgi/child.go

src/pkg/http/cgi/child_test.go

src/pkg/http/cgi/host.go

src/pkg/http/cgi/host_test.go

src/pkg/http/cgi/testdata/test.cgi

src/pkg/http/chunked.go

src/pkg/http/client.go

src/pkg/http/client_test.go

src/pkg/http/cookie.go

src/pkg/http/cookie_test.go

src/pkg/http/fs.go

src/pkg/http/fs_test.go

src/pkg/http/header.go

src/pkg/http/header_test.go

src/pkg/http/persist.go

src/pkg/http/readrequest_test.go

src/pkg/http/request.go

src/pkg/http/requestwrite_test.go

src/pkg/http/response.go

src/pkg/http/reverseproxy.go

src/pkg/http/reverseproxy_test.go

src/pkg/http/serve_test.go

src/pkg/http/server.go

src/pkg/http/spdy/read.go

src/pkg/http/spdy/spdy_test.go

src/pkg/http/spdy/types.go

src/pkg/http/spdy/write.go

src/pkg/http/transfer.go

src/pkg/http/transport.go

src/pkg/http/url.go

src/pkg/image/draw/draw.go

src/pkg/image/draw/draw_test.go

src/pkg/image/gif/reader.go

src/pkg/image/image.go

src/pkg/image/image_test.go

src/pkg/index/suffixarray/suffixarray.go

src/pkg/index/suffixarray/suffixarray_test.go

src/pkg/io/io.go

src/pkg/io/ioutil/ioutil.go

src/pkg/json/decode.go

src/pkg/json/decode_test.go

src/pkg/json/encode.go

src/pkg/json/scanner_test.go

src/pkg/mail/message.go

src/pkg/mail/message_test.go

src/pkg/mime/grammar.go

src/pkg/mime/mediatype.go

src/pkg/mime/mediatype_test.go

src/pkg/mime/multipart/formdata.go

src/pkg/mime/multipart/formdata_test.go

src/pkg/mime/multipart/multipart.go

src/pkg/mime/multipart/multipart_test.go

src/pkg/mime/multipart/writer.go

src/pkg/mime/multipart/writer_test.go

src/pkg/net/Makefile

src/pkg/net/dial.go

src/pkg/net/dnsclient.go

src/pkg/net/dnsmsg.go

src/pkg/net/dnsmsg_test.go

src/pkg/net/dnsname_test.go

src/pkg/net/fd_darwin.go

src/pkg/net/fd_windows.go

src/pkg/net/hosts_test.go

src/pkg/net/interface.go

src/pkg/net/interface_bsd.go

src/pkg/net/interface_linux.go

src/pkg/net/interface_stub.go

src/pkg/net/interface_test.go

src/pkg/net/iprawsock.go

src/pkg/net/ipsock.go

src/pkg/net/net.go

src/pkg/net/net_test.go

src/pkg/net/newpollserver.go

src/pkg/net/sock.go

src/pkg/net/sock_windows.go

src/pkg/net/udpsock.go

src/pkg/netchan/common.go

src/pkg/netchan/export.go

src/pkg/netchan/import.go

src/pkg/os/Makefile

src/pkg/os/env_windows.go

src/pkg/os/error.go

src/pkg/os/error_plan9.go

src/pkg/os/exec.go

src/pkg/os/exec_plan9.go

src/pkg/os/exec_posix.go

src/pkg/os/exec_windows.go

src/pkg/os/file.go

src/pkg/os/file_plan9.go

src/pkg/os/file_posix.go

src/pkg/os/file_unix.go

src/pkg/os/file_windows.go

src/pkg/os/mkunixsignals.sh

src/pkg/os/os_test.go

src/pkg/os/path_test.go

src/pkg/os/path_windows.go

src/pkg/os/stat_plan9.go

src/pkg/os/types.go

src/pkg/patch/patch.go

src/pkg/patch/textdiff.go

src/pkg/path/filepath/match.go

src/pkg/path/filepath/path.go

src/pkg/path/filepath/path_test.go

src/pkg/rand/rand_test.go

src/pkg/reflect/all_test.go

src/pkg/reflect/type.go

src/pkg/reflect/value.go

src/pkg/regexp/regexp.go

src/pkg/rpc/client.go

src/pkg/rpc/jsonrpc/all_test.go

src/pkg/rpc/jsonrpc/client.go

src/pkg/rpc/jsonrpc/server.go

src/pkg/rpc/server.go

src/pkg/rpc/server_test.go

src/pkg/runtime/Makefile

src/pkg/runtime/cgo/Makefile

src/pkg/runtime/cgo/darwin_386.c

src/pkg/runtime/cgo/darwin_amd64.c

src/pkg/runtime/cgo/freebsd_386.c

src/pkg/runtime/cgo/freebsd_amd64.c

src/pkg/runtime/cgo/linux_386.c

src/pkg/runtime/cgo/linux_amd64.c

src/pkg/runtime/cgo/windows_amd64.c

src/pkg/runtime/debug/stack.go

src/pkg/runtime/debug/stack_test.go

src/pkg/runtime/error.go

src/pkg/runtime/linux/mem.c

src/pkg/runtime/malloc.goc

src/pkg/runtime/mem.go

src/pkg/runtime/mkasmh.sh

src/pkg/runtime/plan9/386/sys.s

src/pkg/runtime/plan9/os.h

src/pkg/runtime/plan9/thread.c

src/pkg/runtime/pprof/pprof_test.go

src/pkg/runtime/proc.c

src/pkg/runtime/runtime.h

src/pkg/runtime/sema.goc

src/pkg/runtime/windows/mem.c

src/pkg/runtime/windows/os.h

src/pkg/runtime/windows/thread.c

src/pkg/smtp/smtp.go

src/pkg/smtp/smtp_test.go

src/pkg/sort/search.go

src/pkg/sort/search_test.go

src/pkg/sort/sort.go

src/pkg/sort/sort_test.go

src/pkg/strconv/atoi.go

src/pkg/strconv/fp_test.go

src/pkg/strconv/quote.go

src/pkg/strings/reader.go

src/pkg/strings/strings.go

src/pkg/strings/strings_test.go

src/pkg/sync/mutex.go

src/pkg/sync/mutex_test.go

src/pkg/sync/once.go

src/pkg/sync/once_test.go

src/pkg/syscall/Makefile

src/pkg/syscall/asm_windows_386.s

src/pkg/syscall/exec_plan9.go

src/pkg/syscall/exec_unix.go

src/pkg/syscall/exec_windows.go

src/pkg/syscall/mkall.sh

src/pkg/syscall/mkerrors.sh

src/pkg/syscall/mksyscall.pl

src/pkg/syscall/mksyscall_windows.pl

src/pkg/syscall/mksysnum_darwin.pl

src/pkg/syscall/mksysnum_freebsd.pl

src/pkg/syscall/mksysnum_linux.pl

src/pkg/syscall/route_bsd.go

src/pkg/syscall/syscall.go

src/pkg/syscall/syscall_bsd.go

src/pkg/syscall/syscall_darwin.go

src/pkg/syscall/syscall_darwin_386.go

src/pkg/syscall/syscall_darwin_amd64.go

src/pkg/syscall/syscall_freebsd_386.go

src/pkg/syscall/syscall_freebsd_amd64.go

src/pkg/syscall/syscall_linux.go

src/pkg/syscall/syscall_linux_386.go

src/pkg/syscall/syscall_linux_amd64.go

src/pkg/syscall/syscall_linux_arm.go

src/pkg/syscall/syscall_plan9.go

src/pkg/syscall/syscall_unix.go

src/pkg/syscall/syscall_windows.go

src/pkg/syscall/syscall_windows_386.go

src/pkg/syscall/types_linux.c

src/pkg/syscall/types_plan9.c

src/pkg/syscall/zerrors_darwin_386.go

src/pkg/syscall/zerrors_darwin_amd64.go

src/pkg/syscall/zerrors_freebsd_386.go

src/pkg/syscall/zerrors_freebsd_amd64.go

src/pkg/syscall/zerrors_linux_386.go

src/pkg/syscall/zerrors_linux_amd64.go

src/pkg/syscall/zerrors_linux_arm.go

src/pkg/syscall/zerrors_plan9_386.go

src/pkg/syscall/zerrors_windows_386.go

src/pkg/syscall/zsyscall_darwin_386.go

src/pkg/syscall/zsyscall_darwin_amd64.go

src/pkg/syscall/zsyscall_freebsd_386.go

src/pkg/syscall/zsyscall_freebsd_amd64.go

src/pkg/syscall/zsyscall_linux_amd64.go

src/pkg/syscall/zsyscall_linux_arm.go

src/pkg/syscall/zsyscall_windows_386.go

src/pkg/syscall/zsysnum_darwin_386.go

src/pkg/syscall/zsysnum_darwin_amd64.go

src/pkg/syscall/ztypes_linux_386.go

src/pkg/syscall/ztypes_linux_amd64.go

src/pkg/syscall/ztypes_linux_arm.go

src/pkg/syscall/ztypes_plan9_386.go

src/pkg/syscall/ztypes_windows_386.go

src/pkg/syslog/syslog_unix.go

src/pkg/template/execute.go

src/pkg/template/parse.go

src/pkg/testing/benchmark.go

src/pkg/testing/iotest/reader.go

src/pkg/testing/testing.go

src/pkg/time/Makefile

src/pkg/time/format.go

src/pkg/time/sleep_test.go

src/pkg/time/sys.go

src/pkg/time/tick.go

src/pkg/time/time_test.go

src/pkg/time/zoneinfo_unix.go

src/pkg/unicode/letter.go

src/pkg/unicode/letter_test.go

src/pkg/unicode/maketables.go

src/pkg/unicode/tables.go

src/pkg/websocket/client.go

src/pkg/xml/Makefile

src/pkg/xml/embed_test.go

src/pkg/xml/read.go

src/pkg/xml/read_test.go

src/pkg/xml/xml.go

src/pkg/xml/xml_test.go

src/run.bash

src/version.bash

test/errchk

test/fixedbugs/bug140.go

test/fixedbugs/bug178.go

test/fixedbugs/bug238.go

test/fixedbugs/bug243.go

test/fixedbugs/bug274.go

test/fixedbugs/bug344.go

test/golden.out

test/run

Show diffs side-by-side

added added

removed removed

src/pkg/exp/regexp/syntax/parse.go

// Use of this source code is governed by a BSD-style

// license that can be found in the LICENSE file.

package syntax

import (

"os"

"sort"

"strings"

"unicode"

"utf8"

)

// An Error describes a failure to parse a regular expression

// and gives the offending expression.

type Error struct {

Code ErrorCode

Expr string

}

func (e *Error) String() string {

return "error parsing regexp: " + e.Code.String() + ": `" + e.Expr + "`"

}

// An ErrorCode describes a failure to parse a regular expression.

type ErrorCode string

const (

// Unexpected error

ErrInternalError ErrorCode = "regexp/syntax: internal error"

// Parse errors

ErrInvalidCharClass ErrorCode = "invalid character class"

ErrInvalidCharRange ErrorCode = "invalid character class range"

ErrInvalidEscape ErrorCode = "invalid escape sequence"

ErrInvalidNamedCapture ErrorCode = "invalid named capture"

ErrInvalidPerlOp ErrorCode = "invalid or unsupported Perl syntax"

ErrInvalidRepeatOp ErrorCode = "invalid nested repetition operator"

ErrInvalidRepeatSize ErrorCode = "invalid repeat count"

ErrInvalidUTF8 ErrorCode = "invalid UTF-8"

ErrMissingBracket ErrorCode = "missing closing ]"

ErrMissingParen ErrorCode = "missing closing )"

ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"

ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression"

)

func (e ErrorCode) String() string {

return string(e)

}

// Flags control the behavior of the parser and record information about regexp context.

type Flags uint16

const (

FoldCase Flags = 1 << iota // case-insensitive match

Literal // treat pattern as literal string

ClassNL // allow character classes like [^a-z] and [[:space:]] to match newline

DotNL // allow . to match newline

OneLine // treat ^ and $ as only matching at beginning and end of text

NonGreedy // make repetition operators default to non-greedy

PerlX // allow Perl extensions

UnicodeGroups // allow \p{Han}, \P{Han} for Unicode group and negation

WasDollar // regexp OpEndText was $, not \z

Simple // regexp contains no counted repetition

MatchNL = ClassNL | DotNL

Perl = ClassNL | OneLine | PerlX | UnicodeGroups // as close to Perl as possible

POSIX Flags = 0 // POSIX syntax

)

// Pseudo-ops for parsing stack.

const (

opLeftParen = opPseudo + iota

opVerticalBar

)

type parser struct {

flags Flags // parse mode flags

stack []*Regexp // stack of parsed expressions

free *Regexp

numCap int // number of capturing groups seen

wholeRegexp string

tmpClass []int // temporary char class work space

}

func (p *parser) newRegexp(op Op) *Regexp {

re := p.free

if re != nil {

p.free = re.Sub0[0]

*re = Regexp{}

} else {

re = new(Regexp)

}

re.Op = op

return re

}

100

func (p *parser) reuse(re *Regexp) {

101

re.Sub0[0] = p.free

102

p.free = re

103

}

104

105

// Parse stack manipulation.

106

107

// push pushes the regexp re onto the parse stack and returns the regexp.

108

func (p *parser) push(re *Regexp) *Regexp {

109

if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] {

110

// Single rune.

111

if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) {

112

return nil

113

}

114

re.Op = OpLiteral

115

re.Rune = re.Rune[:1]

116

re.Flags = p.flags &^ FoldCase

117

} else if re.Op == OpCharClass && len(re.Rune) == 4 &&

118

re.Rune[0] == re.Rune[1] && re.Rune[2] == re.Rune[3] &&

119

unicode.SimpleFold(re.Rune[0]) == re.Rune[2] &&

120

unicode.SimpleFold(re.Rune[2]) == re.Rune[0] ||

121

re.Op == OpCharClass && len(re.Rune) == 2 &&

122

re.Rune[0]+1 == re.Rune[1] &&

123

unicode.SimpleFold(re.Rune[0]) == re.Rune[1] &&

124

unicode.SimpleFold(re.Rune[1]) == re.Rune[0] {

125

// Case-insensitive rune like [Aa] or [Δδ].

126

if p.maybeConcat(re.Rune[0], p.flags|FoldCase) {

127

return nil

128

}

129

130

// Rewrite as (case-insensitive) literal.

131

re.Op = OpLiteral

132

re.Rune = re.Rune[:1]

133

re.Flags = p.flags | FoldCase

134

} else {

135

// Incremental concatenation.

136

p.maybeConcat(-1, 0)

137

}

138

139

p.stack = append(p.stack, re)

140

return re

141

}

142

143

// maybeConcat implements incremental concatenation

144

// of literal runes into string nodes. The parser calls this

145

// before each push, so only the top fragment of the stack

146

// might need processing. Since this is called before a push,

147

// the topmost literal is no longer subject to operators like *

148

// (Otherwise ab* would turn into (ab)*.)

149

// If r >= 0 and there's a node left over, maybeConcat uses it

150

// to push r with the given flags.

151

// maybeConcat reports whether r was pushed.

152

func (p *parser) maybeConcat(r int, flags Flags) bool {

153

n := len(p.stack)

154

if n < 2 {

155

return false

156

}

157

158

re1 := p.stack[n-1]

159

re2 := p.stack[n-2]

160

if re1.Op != OpLiteral || re2.Op != OpLiteral || re1.Flags&FoldCase != re2.Flags&FoldCase {

161

return false

162

}

163

164

// Push re1 into re2.

165

re2.Rune = append(re2.Rune, re1.Rune...)

166

167

// Reuse re1 if possible.

168

if r >= 0 {

169

re1.Rune = re1.Rune0[:1]

170

re1.Rune[0] = r

171

re1.Flags = flags

172

return true

173

}

174

175

p.stack = p.stack[:n-1]

176

p.reuse(re1)

177

return false // did not push r

178

}

179

180

// newLiteral returns a new OpLiteral Regexp with the given flags

181

func (p *parser) newLiteral(r int, flags Flags) *Regexp {

182

re := p.newRegexp(OpLiteral)

183

re.Flags = flags

184

re.Rune0[0] = r

185

re.Rune = re.Rune0[:1]

186

return re

187

}

188

189

// literal pushes a literal regexp for the rune r on the stack

190

// and returns that regexp.

191

func (p *parser) literal(r int) {

192

p.push(p.newLiteral(r, p.flags))

193

}

194

195

// op pushes a regexp with the given op onto the stack

196

// and returns that regexp.

197

func (p *parser) op(op Op) *Regexp {

198

re := p.newRegexp(op)

199

re.Flags = p.flags

200

return p.push(re)

201

}

202

203

// repeat replaces the top stack element with itself repeated

204

// according to op.

205

func (p *parser) repeat(op Op, min, max int, opstr, t, lastRepeat string) (string, os.Error) {

206

flags := p.flags

207

if p.flags&PerlX != 0 {

208

if len(t) > 0 && t[0] == '?' {

209

t = t[1:]

210

flags ^= NonGreedy

211

}

212

if lastRepeat != "" {

213

// In Perl it is not allowed to stack repetition operators:

214

// a** is a syntax error, not a doubled star, and a++ means

215

// something else entirely, which we don't support!

216

return "", &Error{ErrInvalidRepeatOp, lastRepeat[:len(lastRepeat)-len(t)]}

217

}

218

}

219

n := len(p.stack)

220

if n == 0 {

221

return "", &Error{ErrMissingRepeatArgument, opstr}

222

}

223

sub := p.stack[n-1]

224

re := p.newRegexp(op)

225

re.Min = min

226

re.Max = max

227

re.Flags = flags

228

re.Sub = re.Sub0[:1]

229

re.Sub[0] = sub

230

p.stack[n-1] = re

231

return t, nil

232

}

233

234

// concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation.

235

func (p *parser) concat() *Regexp {

236

p.maybeConcat(-1, 0)

237

238

// Scan down to find pseudo-operator | or (.

239

i := len(p.stack)

240

for i > 0 && p.stack[i-1].Op < opPseudo {

241

i--

242

}

243

subs := p.stack[i:]

244

p.stack = p.stack[:i]

245

246

// Empty concatenation is special case.

247

if len(subs) == 0 {

248

return p.push(p.newRegexp(OpEmptyMatch))

249

}

250

251

return p.push(p.collapse(subs, OpConcat))

252

}

253

254

// alternate replaces the top of the stack (above the topmost '(') with its alternation.

255

func (p *parser) alternate() *Regexp {

256

// Scan down to find pseudo-operator (.

257

// There are no | above (.

258

i := len(p.stack)

259

for i > 0 && p.stack[i-1].Op < opPseudo {

260

i--

261

}

262

subs := p.stack[i:]

263

p.stack = p.stack[:i]

264

265

// Make sure top class is clean.

266

// All the others already are (see swapVerticalBar).

267

if len(subs) > 0 {

268

cleanAlt(subs[len(subs)-1])

269

}

270

271

// Empty alternate is special case

272

// (shouldn't happen but easy to handle).

273

if len(subs) == 0 {

274

return p.push(p.newRegexp(OpNoMatch))

275

}

276

277

return p.push(p.collapse(subs, OpAlternate))

278

}

279

280

// cleanAlt cleans re for eventual inclusion in an alternation.

281

func cleanAlt(re *Regexp) {

282

switch re.Op {

283

case OpCharClass:

284

re.Rune = cleanClass(&re.Rune)

285

if len(re.Rune) == 2 && re.Rune[0] == 0 && re.Rune[1] == unicode.MaxRune {

286

re.Rune = nil

287

re.Op = OpAnyChar

288

return

289

}

290

if len(re.Rune) == 4 && re.Rune[0] == 0 && re.Rune[1] == '\n'-1 && re.Rune[2] == '\n'+1 && re.Rune[3] == unicode.MaxRune {

291

re.Rune = nil

292

re.Op = OpAnyCharNotNL

293

return

294

}

295

if cap(re.Rune)-len(re.Rune) > 100 {

296

// re.Rune will not grow any more.

297

// Make a copy or inline to reclaim storage.

298

re.Rune = append(re.Rune0[:0], re.Rune...)

299

}

300

}

301

}

302

303

// collapse returns the result of applying op to sub.

304

// If sub contains op nodes, they all get hoisted up

305

// so that there is never a concat of a concat or an

306

// alternate of an alternate.

307

func (p *parser) collapse(subs []*Regexp, op Op) *Regexp {

308

if len(subs) == 1 {

309

return subs[0]

310

}

311

re := p.newRegexp(op)

312

re.Sub = re.Sub0[:0]

313

for _, sub := range subs {

314

if sub.Op == op {

315

re.Sub = append(re.Sub, sub.Sub...)

316

p.reuse(sub)

317

} else {

318

re.Sub = append(re.Sub, sub)

319

}

320

}

321

if op == OpAlternate {

322

re.Sub = p.factor(re.Sub, re.Flags)

323

if len(re.Sub) == 1 {

324

old := re

325

re = re.Sub[0]

326

p.reuse(old)

327

}

328

}

329

return re

330

}

331

332

// factor factors common prefixes from the alternation list sub.

333

// It returns a replacement list that reuses the same storage and

334

// frees (passes to p.reuse) any removed *Regexps.

335

336

// For example,

337

// ABC|ABD|AEF|BCX|BCY

338

// simplifies by literal prefix extraction to

339

// A(B(C|D)|EF)|BC(X|Y)

340

// which simplifies by character class introduction to

341

// A(B[CD]|EF)|BC[XY]

342

343

func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {

344

if len(sub) < 2 {

345

return sub

346

}

347

348

// Round 1: Factor out common literal prefixes.

349

var str []int

350

var strflags Flags

351

start := 0

352

out := sub[:0]

353

for i := 0; i <= len(sub); i++ {

354

// Invariant: the Regexps that were in sub[0:start] have been

355

// used or marked for reuse, and the slice space has been reused

356

// for out (len(out) <= start).

357

358

// Invariant: sub[start:i] consists of regexps that all begin

359

// with str as modified by strflags.

360

var istr []int

361

var iflags Flags

362

if i < len(sub) {

363

istr, iflags = p.leadingString(sub[i])

364

if iflags == strflags {

365

same := 0

366

for same < len(str) && same < len(istr) && str[same] == istr[same] {

367

same++

368

}

369

if same > 0 {

370

// Matches at least one rune in current range.

371

// Keep going around.

372

str = str[:same]

373

continue

374

}

375

}

376

}

377

378

// Found end of a run with common leading literal string:

379

// sub[start:i] all begin with str[0:len(str)], but sub[i]

380

// does not even begin with str[0].

381

382

// Factor out common string and append factored expression to out.

383

if i == start {

384

// Nothing to do - run of length 0.

385

} else if i == start+1 {

386

// Just one: don't bother factoring.

387

out = append(out, sub[start])

388

} else {

389

// Construct factored form: prefix(suffix1|suffix2|...)

390

prefix := p.newRegexp(OpLiteral)

391

prefix.Flags = strflags

392

prefix.Rune = append(prefix.Rune[:0], str...)

393

394

for j := start; j < i; j++ {

395

sub[j] = p.removeLeadingString(sub[j], len(str))

396

}

397

suffix := p.collapse(sub[start:i], OpAlternate) // recurse

398

399

re := p.newRegexp(OpConcat)

400

re.Sub = append(re.Sub[:0], prefix, suffix)

401

out = append(out, re)

402

}

403

404

// Prepare for next iteration.

405

start = i

406

str = istr

407

strflags = iflags

408

}

409

sub = out

410

411

// Round 2: Factor out common complex prefixes,

412

// just the first piece of each concatenation,

413

// whatever it is. This is good enough a lot of the time.

414

start = 0

415

out = sub[:0]

416

var first *Regexp

417

for i := 0; i <= len(sub); i++ {

418

// Invariant: the Regexps that were in sub[0:start] have been

419

// used or marked for reuse, and the slice space has been reused

420

// for out (len(out) <= start).

421

422

// Invariant: sub[start:i] consists of regexps that all begin

423

// with str as modified by strflags.

424

var ifirst *Regexp

425

if i < len(sub) {

426

ifirst = p.leadingRegexp(sub[i])

427

if first != nil && first.Equal(ifirst) {

428

continue

429

}

430

}

431

432

// Found end of a run with common leading regexp:

433

// sub[start:i] all begin with first but sub[i] does not.

434

435

// Factor out common regexp and append factored expression to out.

436

if i == start {

437

// Nothing to do - run of length 0.

438

} else if i == start+1 {

439

// Just one: don't bother factoring.

440

out = append(out, sub[start])

441

} else {

442

// Construct factored form: prefix(suffix1|suffix2|...)

443

prefix := first

444

445

for j := start; j < i; j++ {

446

reuse := j != start // prefix came from sub[start]

447

sub[j] = p.removeLeadingRegexp(sub[j], reuse)

448

}

449

suffix := p.collapse(sub[start:i], OpAlternate) // recurse

450

451

re := p.newRegexp(OpConcat)

452

re.Sub = append(re.Sub[:0], prefix, suffix)

453

out = append(out, re)

454

}

455

456

// Prepare for next iteration.

457

start = i

458

first = ifirst

459

}

460

sub = out

461

462

// Round 3: Collapse runs of single literals into character classes.

463

start = 0

464

out = sub[:0]

465

for i := 0; i <= len(sub); i++ {

466

// Invariant: the Regexps that were in sub[0:start] have been

467

// used or marked for reuse, and the slice space has been reused

468

// for out (len(out) <= start).

469

470

// Invariant: sub[start:i] consists of regexps that are either

471

// literal runes or character classes.

472

if i < len(sub) && isCharClass(sub[i]) {

473

continue

474

}

475

476

// sub[i] is not a char or char class;

477

// emit char class for sub[start:i]...

478

if i == start {

479

// Nothing to do - run of length 0.

480

} else if i == start+1 {

481

out = append(out, sub[start])

482

} else {

483

// Make new char class.

484

// Start with most complex regexp in sub[start].

485

max := start

486

for j := start + 1; j < i; j++ {

487

if sub[max].Op < sub[j].Op || sub[max].Op == sub[j].Op && len(sub[max].Rune) < len(sub[j].Rune) {

488

max = j

489

}

490

}

491

sub[start], sub[max] = sub[max], sub[start]

492

493

for j := start + 1; j < i; j++ {

494

mergeCharClass(sub[start], sub[j])

495

p.reuse(sub[j])

496

}

497

cleanAlt(sub[start])

498

out = append(out, sub[start])

499

}

500

501

// ... and then emit sub[i].

502

if i < len(sub) {

503

out = append(out, sub[i])

504

}

505

start = i + 1

506

}

507

sub = out

508

509

// Round 4: Collapse runs of empty matches into a single empty match.

510

start = 0

511

out = sub[:0]

512

for i := range sub {

513

if i+1 < len(sub) && sub[i].Op == OpEmptyMatch && sub[i+1].Op == OpEmptyMatch {

514

continue

515

}

516

out = append(out, sub[i])

517

}

518

sub = out

519

520

return sub

521

}

522

523

// leadingString returns the leading literal string that re begins with.

524

// The string refers to storage in re or its children.

525

func (p *parser) leadingString(re *Regexp) ([]int, Flags) {

526

if re.Op == OpConcat && len(re.Sub) > 0 {

527

re = re.Sub[0]

528

}

529

if re.Op != OpLiteral {

530

return nil, 0

531

}

532

return re.Rune, re.Flags & FoldCase

533

}

534

535

// removeLeadingString removes the first n leading runes

536

// from the beginning of re. It returns the replacement for re.

537

func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp {

538

if re.Op == OpConcat && len(re.Sub) > 0 {

539

// Removing a leading string in a concatenation

540

// might simplify the concatenation.

541

sub := re.Sub[0]

542

sub = p.removeLeadingString(sub, n)

543

re.Sub[0] = sub

544

if sub.Op == OpEmptyMatch {

545

p.reuse(sub)

546

switch len(re.Sub) {

547

case 0, 1:

548

// Impossible but handle.

549

re.Op = OpEmptyMatch

550

re.Sub = nil

551

case 2:

552

old := re

553

re = re.Sub[1]

554

p.reuse(old)

555

default:

556

copy(re.Sub, re.Sub[1:])

557

re.Sub = re.Sub[:len(re.Sub)-1]

558

}

559

}

560

return re

561

}

562

563

if re.Op == OpLiteral {

564

re.Rune = re.Rune[:copy(re.Rune, re.Rune[n:])]

565

if len(re.Rune) == 0 {

566

re.Op = OpEmptyMatch

567

}

568

}

569

return re

570

}

571

572

// leadingRegexp returns the leading regexp that re begins with.

573

// The regexp refers to storage in re or its children.

574

func (p *parser) leadingRegexp(re *Regexp) *Regexp {

575

if re.Op == OpEmptyMatch {

576

return nil

577

}

578

if re.Op == OpConcat && len(re.Sub) > 0 {

579

sub := re.Sub[0]

580

if sub.Op == OpEmptyMatch {

581

return nil

582

}

583

return sub

584

}

585

return re

586

}

587

588

// removeLeadingRegexp removes the leading regexp in re.

589

// It returns the replacement for re.

590

// If reuse is true, it passes the removed regexp (if no longer needed) to p.reuse.

591

func (p *parser) removeLeadingRegexp(re *Regexp, reuse bool) *Regexp {

592

if re.Op == OpConcat && len(re.Sub) > 0 {

593

if reuse {

594

p.reuse(re.Sub[0])

595

}

596

re.Sub = re.Sub[:copy(re.Sub, re.Sub[1:])]

597

switch len(re.Sub) {

598

case 0:

599

re.Op = OpEmptyMatch

600

re.Sub = nil

601

case 1:

602

old := re

603

re = re.Sub[0]

604

p.reuse(old)

605

}

606

return re

607

}

608

re.Op = OpEmptyMatch

609

return re

610

}

611

612

func literalRegexp(s string, flags Flags) *Regexp {

613

re := &Regexp{Op: OpLiteral}

614

re.Flags = flags

615

re.Rune = re.Rune0[:0] // use local storage for small strings

616

for _, c := range s {

617

if len(re.Rune) >= cap(re.Rune) {

618

// string is too long to fit in Rune0. let Go handle it

619

re.Rune = []int(s)

620

break

621

}

622

re.Rune = append(re.Rune, c)

623

}

624

return re

625

}

626

627

// Parsing.

628

629

func Parse(s string, flags Flags) (*Regexp, os.Error) {

630

if flags&Literal != 0 {

631

// Trivial parser for literal string.

632

if err := checkUTF8(s); err != nil {

633

return nil, err

634

}

635

return literalRegexp(s, flags), nil

636

}

637

638

// Otherwise, must do real work.

639

var (

640

p parser

641

err os.Error

642

c int

643

op Op

644

lastRepeat string

645

min, max int

646

)

647

p.flags = flags

648

p.wholeRegexp = s

649

t := s

650

for t != "" {

651

repeat := ""

652

BigSwitch:

653

switch t[0] {

654

default:

655

if c, t, err = nextRune(t); err != nil {

656

return nil, err

657

}

658

p.literal(c)

659

660

case '(':

661

if p.flags&PerlX != 0 && len(t) >= 2 && t[1] == '?' {

662

// Flag changes and non-capturing groups.

663

if t, err = p.parsePerlFlags(t); err != nil {

664

return nil, err

665

}

666

break

667

}

668

p.numCap++

669

p.op(opLeftParen).Cap = p.numCap

670

t = t[1:]

671

case '|':

672

if err = p.parseVerticalBar(); err != nil {

673

return nil, err

674

}

675

t = t[1:]

676

case ')':

677

if err = p.parseRightParen(); err != nil {

678

return nil, err

679

}

680

t = t[1:]

681

case '^':

682

if p.flags&OneLine != 0 {

683

p.op(OpBeginText)

684

} else {

685

p.op(OpBeginLine)

686

}

687

t = t[1:]

688

case '$':

689

if p.flags&OneLine != 0 {

690

p.op(OpEndText).Flags |= WasDollar

691

} else {

692

p.op(OpEndLine)

693

}

694

t = t[1:]

695

case '.':

696

if p.flags&DotNL != 0 {

697

p.op(OpAnyChar)

698

} else {

699

p.op(OpAnyCharNotNL)

700

}

701

t = t[1:]

702

case '[':

703

if t, err = p.parseClass(t); err != nil {

704

return nil, err

705

}

706

case '*', '+', '?':

707

switch t[0] {

708

case '*':

709

op = OpStar

710

case '+':

711

op = OpPlus

712

case '?':

713

op = OpQuest

714

}

715

if t, err = p.repeat(op, min, max, t[:1], t[1:], lastRepeat); err != nil {

716

return nil, err

717

}

718

case '{':

719

op = OpRepeat

720

min, max, tt, ok := p.parseRepeat(t)

721

if !ok {

722

// If the repeat cannot be parsed, { is a literal.

723

p.literal('{')

724

t = t[1:]

725

break

726

}

727

if t, err = p.repeat(op, min, max, t[:len(t)-len(tt)], tt, lastRepeat); err != nil {

728

return nil, err

729

}

730

case '\\':

731

if p.flags&PerlX != 0 && len(t) >= 2 {

732

switch t[1] {

733

case 'A':

734

p.op(OpBeginText)

735

t = t[2:]

736

break BigSwitch

737

case 'b':

738

p.op(OpWordBoundary)

739

t = t[2:]

740

break BigSwitch

741

case 'B':

742

p.op(OpNoWordBoundary)

743

t = t[2:]

744

break BigSwitch

745

case 'C':

746

// any byte; not supported

747

return nil, &Error{ErrInvalidEscape, t[:2]}

748

case 'Q':

749

// \Q ... \E: the ... is always literals

750

var lit string

751

if i := strings.Index(t, `\E`); i < 0 {

752

lit = t[2:]

753

t = ""

754

} else {

755

lit = t[2:i]

756

t = t[i+2:]

757

}

758

p.push(literalRegexp(lit, p.flags))

759

break BigSwitch

760

case 'z':

761

p.op(OpEndText)

762

t = t[2:]

763

break BigSwitch

764

}

765

}

766

767

re := p.newRegexp(OpCharClass)

768

re.Flags = p.flags

769

770

// Look for Unicode character group like \p{Han}

771

if len(t) >= 2 && (t[1] == 'p' || t[1] == 'P') {

772

r, rest, err := p.parseUnicodeClass(t, re.Rune0[:0])

773

if err != nil {

774

return nil, err

775

}

776

if r != nil {

777

re.Rune = r

778

t = rest

779

p.push(re)

780

break BigSwitch

781

}

782

}

783

784

// Perl character class escape.

785

if r, rest := p.parsePerlClassEscape(t, re.Rune0[:0]); r != nil {

786

re.Rune = r

787

t = rest

788

p.push(re)

789

break BigSwitch

790

}

791

p.reuse(re)

792

793

// Ordinary single-character escape.

794

if c, t, err = p.parseEscape(t); err != nil {

795

return nil, err

796

}

797

p.literal(c)

798

}

799

lastRepeat = repeat

800

}

801

802

p.concat()

803

if p.swapVerticalBar() {

804

// pop vertical bar

805

p.stack = p.stack[:len(p.stack)-1]

806

}

807

p.alternate()

808

809

n := len(p.stack)

810

if n != 1 {

811

return nil, &Error{ErrMissingParen, s}

812

}

813

return p.stack[0], nil

814

}

815

816

// parseRepeat parses {min} (max=min) or {min,} (max=-1) or {min,max}.

817

// If s is not of that form, it returns ok == false.

818

func (p *parser) parseRepeat(s string) (min, max int, rest string, ok bool) {

819

if s == "" || s[0] != '{' {

820

return

821

}

822

s = s[1:]

823

if min, s, ok = p.parseInt(s); !ok {

824

return

825

}

826

if s == "" {

827

return

828

}

829

if s[0] != ',' {

830

max = min

831

} else {

832

s = s[1:]

833

if s == "" {

834

return

835

}

836

if s[0] == '}' {

837

max = -1

838

} else if max, s, ok = p.parseInt(s); !ok {

839

return

840

}

841

}

842

if s == "" || s[0] != '}' {

843

return

844

}

845

rest = s[1:]

846

ok = true

847

return

848

}

849

850

// parsePerlFlags parses a Perl flag setting or non-capturing group or both,

851

// like (?i) or (?: or (?i:. It removes the prefix from s and updates the parse state.

852

// The caller must have ensured that s begins with "(?".

853

func (p *parser) parsePerlFlags(s string) (rest string, err os.Error) {

854

t := s

855

856

// Check for named captures, first introduced in Python's regexp library.

857

// As usual, there are three slightly different syntaxes:

858

859

// (?P<name>expr) the original, introduced by Python

860

// (?<name>expr) the .NET alteration, adopted by Perl 5.10

861

// (?'name'expr) another .NET alteration, adopted by Perl 5.10

862

863

// Perl 5.10 gave in and implemented the Python version too,

864

// but they claim that the last two are the preferred forms.

865

// PCRE and languages based on it (specifically, PHP and Ruby)

866

// support all three as well. EcmaScript 4 uses only the Python form.

867

868

// In both the open source world (via Code Search) and the

869

// Google source tree, (?P<expr>name) is the dominant form,

870

// so that's the one we implement. One is enough.

871

if len(t) > 4 && t[2] == 'P' && t[3] == '<' {

872

// Pull out name.

873

end := strings.IndexRune(t, '>')

874

if end < 0 {

875

if err = checkUTF8(t); err != nil {

876

return "", err

877

}

878

return "", &Error{ErrInvalidNamedCapture, s}

879

}

880

881

capture := t[:end+1] // "(?P<name>"

882

name := t[4:end] // "name"

883

if err = checkUTF8(name); err != nil {

884

return "", err

885

}

886

if !isValidCaptureName(name) {

887

return "", &Error{ErrInvalidNamedCapture, capture}

888

}

889

890

// Like ordinary capture, but named.

891

p.numCap++

892

re := p.op(opLeftParen)

893

re.Cap = p.numCap

894

re.Name = name

895

return t[end+1:], nil

896

}

897

898

// Non-capturing group. Might also twiddle Perl flags.

899

var c int

900

t = t[2:] // skip (?

901

flags := p.flags

902

sign := +1

903

sawFlag := false

904

Loop:

905

for t != "" {

906

if c, t, err = nextRune(t); err != nil {

907

return "", err

908

}

909

switch c {

910

default:

911

break Loop

912

913

// Flags.

914

case 'i':

915

flags |= FoldCase

916

sawFlag = true

917

case 'm':

918

flags &^= OneLine

919

sawFlag = true

920

case 's':

921

flags |= DotNL

922

sawFlag = true

923

case 'U':

924

flags |= NonGreedy

925

sawFlag = true

926

927

// Switch to negation.

928

case '-':

929

if sign < 0 {

930

break Loop

931

}

932

sign = -1

933

// Invert flags so that | above turn into &^ and vice versa.

934

// We'll invert flags again before using it below.

935

flags = ^flags

936

sawFlag = false

937

938

// End of flags, starting group or not.

939

case ':', ')':

940

if sign < 0 {

941

if !sawFlag {

942

break Loop

943

}

944

flags = ^flags

945

}

946

if c == ':' {

947

// Open new group

948

p.op(opLeftParen)

949

}

950

p.flags = flags

951

return t, nil

952

}

953

}

954

955

return "", &Error{ErrInvalidPerlOp, s[:len(s)-len(t)]}

956

}

957

958

// isValidCaptureName reports whether name

959

// is a valid capture name: [A-Za-z0-9_]+.

960

// PCRE limits names to 32 bytes.

961

// Python rejects names starting with digits.

962

// We don't enforce either of those.

963

func isValidCaptureName(name string) bool {

964

if name == "" {

965

return false

966

}

967

for _, c := range name {

968

if c != '_' && !isalnum(c) {

969

return false

970

}

971

}

972

return true

973

}

974

975

// parseInt parses a decimal integer.

976

func (p *parser) parseInt(s string) (n int, rest string, ok bool) {

977

if s == "" || s[0] < '0' || '9' < s[0] {

978

return

979

}

980

// Disallow leading zeros.

981

if len(s) >= 2 && s[0] == '0' && '0' <= s[1] && s[1] <= '9' {

982

return

983

}

984

for s != "" && '0' <= s[0] && s[0] <= '9' {

985

// Avoid overflow.

986

if n >= 1e8 {

987

return

988

}

989

n = n*10 + int(s[0]) - '0'

990

s = s[1:]

991

}

992

rest = s

993

ok = true

994

return

995

}

996

997

// can this be represented as a character class?

998

// single-rune literal string, char class, ., and .|\n.

999

func isCharClass(re *Regexp) bool {

1000

return re.Op == OpLiteral && len(re.Rune) == 1 ||

1001

re.Op == OpCharClass ||

1002

re.Op == OpAnyCharNotNL ||

1003

re.Op == OpAnyChar

1004

}

1005

1006

// does re match r?

1007

func matchRune(re *Regexp, r int) bool {

1008

switch re.Op {

1009

case OpLiteral:

1010

return len(re.Rune) == 1 && re.Rune[0] == r

1011

case OpCharClass:

1012

for i := 0; i < len(re.Rune); i += 2 {

1013

if re.Rune[i] <= r && r <= re.Rune[i+1] {

1014

return true

1015

}

1016

}

1017

return false

1018

case OpAnyCharNotNL:

1019

return r != '\n'

1020

case OpAnyChar:

1021

return true

1022

}

1023

return false

1024

}

1025

1026

// parseVerticalBar handles a | in the input.

1027

func (p *parser) parseVerticalBar() os.Error {

1028

p.concat()

1029

1030

// The concatenation we just parsed is on top of the stack.

1031

// If it sits above an opVerticalBar, swap it below

1032

// (things below an opVerticalBar become an alternation).

1033

// Otherwise, push a new vertical bar.

1034

if !p.swapVerticalBar() {

1035

p.op(opVerticalBar)

1036

}

1037

1038

return nil

1039

}

1040

1041

// mergeCharClass makes dst = dst|src.

1042

// The caller must ensure that dst.Op >= src.Op,

1043

// to reduce the amount of copying.

1044

func mergeCharClass(dst, src *Regexp) {

1045

switch dst.Op {

1046

case OpAnyChar:

1047

// src doesn't add anything.

1048

case OpAnyCharNotNL:

1049

// src might add \n

1050

if matchRune(src, '\n') {

1051

dst.Op = OpAnyChar

1052

}

1053

case OpCharClass:

1054

// src is simpler, so either literal or char class

1055

if src.Op == OpLiteral {

1056

dst.Rune = appendRange(dst.Rune, src.Rune[0], src.Rune[0])

1057

} else {

1058

dst.Rune = appendClass(dst.Rune, src.Rune)

1059

}

1060

case OpLiteral:

1061

// both literal

1062

if src.Rune[0] == dst.Rune[0] {

1063

break

1064

}

1065

dst.Op = OpCharClass

1066

dst.Rune = append(dst.Rune, dst.Rune[0])

1067

dst.Rune = appendRange(dst.Rune, src.Rune[0], src.Rune[0])

1068

}

1069

}

1070

1071

// If the top of the stack is an element followed by an opVerticalBar

1072

// swapVerticalBar swaps the two and returns true.

1073

// Otherwise it returns false.

1074

func (p *parser) swapVerticalBar() bool {

1075

// If above and below vertical bar are literal or char class,

1076

// can merge into a single char class.

1077

n := len(p.stack)

1078

if n >= 3 && p.stack[n-2].Op == opVerticalBar && isCharClass(p.stack[n-1]) && isCharClass(p.stack[n-3]) {

1079

re1 := p.stack[n-1]

1080

re3 := p.stack[n-3]

1081

// Make re3 the more complex of the two.

1082

if re1.Op > re3.Op {

1083

re1, re3 = re3, re1

1084

p.stack[n-3] = re3

1085

}

1086

mergeCharClass(re3, re1)

1087

p.reuse(re1)

1088

p.stack = p.stack[:n-1]

1089

return true

1090

}

1091

1092

if n >= 2 {

1093

re1 := p.stack[n-1]

1094

re2 := p.stack[n-2]

1095

if re2.Op == opVerticalBar {

1096

if n >= 3 {

1097

// Now out of reach.

1098

// Clean opportunistically.

1099

cleanAlt(p.stack[n-3])

1100

}

1101

p.stack[n-2] = re1

1102

p.stack[n-1] = re2

1103

return true

1104

}

1105

}

1106

return false

1107

}

1108

1109

// parseRightParen handles a ) in the input.

1110

func (p *parser) parseRightParen() os.Error {

1111

p.concat()

1112

if p.swapVerticalBar() {

1113

// pop vertical bar

1114

p.stack = p.stack[:len(p.stack)-1]

1115

}

1116

p.alternate()

1117

1118

n := len(p.stack)

1119

if n < 2 {

1120

return &Error{ErrInternalError, ""}

1121

}

1122

re1 := p.stack[n-1]

1123

re2 := p.stack[n-2]

1124

p.stack = p.stack[:n-2]

1125

if re2.Op != opLeftParen {

1126

return &Error{ErrMissingParen, p.wholeRegexp}

1127

}

1128

if re2.Cap == 0 {

1129

// Just for grouping.

1130

p.push(re1)

1131

} else {

1132

re2.Op = OpCapture

1133

re2.Sub = re2.Sub0[:1]

1134

re2.Sub[0] = re1

1135

p.push(re2)

1136

}

1137

return nil

1138

}

1139

1140

// parseEscape parses an escape sequence at the beginning of s

1141

// and returns the rune.

1142

func (p *parser) parseEscape(s string) (r int, rest string, err os.Error) {

1143

t := s[1:]

1144

if t == "" {

1145

return 0, "", &Error{ErrTrailingBackslash, ""}

1146

}

1147

c, t, err := nextRune(t)

1148

if err != nil {

1149

return 0, "", err

1150

}

1151

1152

Switch:

1153

switch c {

1154

default:

1155

if c < utf8.RuneSelf && !isalnum(c) {

1156

// Escaped non-word characters are always themselves.

1157

// PCRE is not quite so rigorous: it accepts things like

1158

// \q, but we don't. We once rejected \_, but too many

1159

// programs and people insist on using it, so allow \_.

1160

return c, t, nil

1161

}

1162

1163

// Octal escapes.

1164

case '1', '2', '3', '4', '5', '6', '7':

1165

// Single non-zero digit is a backreference; not supported

1166

if t == "" || t[0] < '0' || t[0] > '7' {

1167

break

1168

}

1169

fallthrough

1170

case '0':

1171

// Consume up to three octal digits; already have one.

1172

r = c - '0'

1173

for i := 1; i < 3; i++ {

1174

if t == "" || t[0] < '0' || t[0] > '7' {

1175

break

1176

}

1177

r = r*8 + int(t[0]) - '0'

1178

t = t[1:]

1179

}

1180

return r, t, nil

1181

1182

// Hexadecimal escapes.

1183

case 'x':

1184

if t == "" {

1185

break

1186

}

1187

if c, t, err = nextRune(t); err != nil {

1188

return 0, "", err

1189

}

1190

if c == '{' {

1191

// Any number of digits in braces.

1192

// Perl accepts any text at all; it ignores all text

1193

// after the first non-hex digit. We require only hex digits,

1194

// and at least one.

1195

nhex := 0

1196

r = 0

1197

for {

1198

if t == "" {

1199

break Switch

1200

}

1201

if c, t, err = nextRune(t); err != nil {

1202

return 0, "", err

1203

}

1204

if c == '}' {

1205

break

1206

}

1207

v := unhex(c)

1208

if v < 0 {

1209

break Switch

1210

}

1211

r = r*16 + v

1212

if r > unicode.MaxRune {

1213

break Switch

1214

}

1215

nhex++

1216

}

1217

if nhex == 0 {

1218

break Switch

1219

}

1220

return r, t, nil

1221

}

1222

1223

// Easy case: two hex digits.

1224

x := unhex(c)

1225

if c, t, err = nextRune(t); err != nil {

1226

return 0, "", err

1227

}

1228

y := unhex(c)

1229

if x < 0 || y < 0 {

1230

break

1231

}

1232

return x*16 + y, t, nil

1233

1234

// C escapes. There is no case 'b', to avoid misparsing

1235

// the Perl word-boundary \b as the C backspace \b

1236

// when in POSIX mode. In Perl, /\b/ means word-boundary

1237

// but /[\b]/ means backspace. We don't support that.

1238

// If you want a backspace, embed a literal backspace

1239

// character or use \x08.

1240

case 'a':

1241

return '\a', t, err

1242

case 'f':

1243

return '\f', t, err

1244

case 'n':

1245

return '\n', t, err

1246

case 'r':

1247

return '\r', t, err

1248

case 't':

1249

return '\t', t, err

1250

case 'v':

1251

return '\v', t, err

1252

}

1253

return 0, "", &Error{ErrInvalidEscape, s[:len(s)-len(t)]}

1254

}

1255

1256

// parseClassChar parses a character class character at the beginning of s

1257

// and returns it.

1258

func (p *parser) parseClassChar(s, wholeClass string) (r int, rest string, err os.Error) {

1259

if s == "" {

1260

return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass}

1261

}

1262

1263

// Allow regular escape sequences even though

1264

// many need not be escaped in this context.

1265

if s[0] == '\\' {

1266

return p.parseEscape(s)

1267

}

1268

1269

return nextRune(s)

1270

}

1271

1272

type charGroup struct {

1273

sign int

1274

class []int

1275

}

1276

1277

// parsePerlClassEscape parses a leading Perl character class escape like \d

1278

// from the beginning of s. If one is present, it appends the characters to r

1279

// and returns the new slice r and the remainder of the string.

1280

func (p *parser) parsePerlClassEscape(s string, r []int) (out []int, rest string) {

1281

if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' {

1282

return

1283

}

1284

g := perlGroup[s[0:2]]

1285

if g.sign == 0 {

1286

return

1287

}

1288

return p.appendGroup(r, g), s[2:]

1289

}

1290

1291

// parseNamedClass parses a leading POSIX named character class like [:alnum:]

1292

// from the beginning of s. If one is present, it appends the characters to r

1293

// and returns the new slice r and the remainder of the string.

1294

func (p *parser) parseNamedClass(s string, r []int) (out []int, rest string, err os.Error) {

1295

if len(s) < 2 || s[0] != '[' || s[1] != ':' {

1296

return

1297

}

1298

1299

i := strings.Index(s[2:], ":]")

1300

if i < 0 {

1301

return

1302

}

1303

i += 2

1304

name, s := s[0:i+2], s[i+2:]

1305

g := posixGroup[name]

1306

if g.sign == 0 {

1307

return nil, "", &Error{ErrInvalidCharRange, name}

1308

}

1309

return p.appendGroup(r, g), s, nil

1310

}

1311

1312

func (p *parser) appendGroup(r []int, g charGroup) []int {

1313

if p.flags&FoldCase == 0 {

1314

if g.sign < 0 {

1315

r = appendNegatedClass(r, g.class)

1316

} else {

1317

r = appendClass(r, g.class)

1318

}

1319

} else {

1320

tmp := p.tmpClass[:0]

1321

tmp = appendFoldedClass(tmp, g.class)

1322

p.tmpClass = tmp

1323

tmp = cleanClass(&p.tmpClass)

1324

if g.sign < 0 {

1325

r = appendNegatedClass(r, tmp)

1326

} else {

1327

r = appendClass(r, tmp)

1328

}

1329

}

1330

return r

1331

}

1332

1333

// unicodeTable returns the unicode.RangeTable identified by name

1334

// and the table of additional fold-equivalent code points.

1335

func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) {

1336

if t := unicode.Categories[name]; t != nil {

1337

return t, unicode.FoldCategory[name]

1338

}

1339

if t := unicode.Scripts[name]; t != nil {

1340

return t, unicode.FoldScript[name]

1341

}

1342

return nil, nil

1343

}

1344

1345

// parseUnicodeClass parses a leading Unicode character class like \p{Han}

1346

// from the beginning of s. If one is present, it appends the characters to r

1347

// and returns the new slice r and the remainder of the string.

1348

func (p *parser) parseUnicodeClass(s string, r []int) (out []int, rest string, err os.Error) {

1349

if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' {

1350

return

1351

}

1352

1353

// Committed to parse or return error.

1354

sign := +1

1355

if s[1] == 'P' {

1356

sign = -1

1357

}

1358

t := s[2:]

1359

c, t, err := nextRune(t)

1360

if err != nil {

1361

return

1362

}

1363

var seq, name string

1364

if c != '{' {

1365

// Single-letter name.

1366

seq = s[:len(s)-len(t)]

1367

name = seq[2:]

1368

} else {

1369

// Name is in braces.

1370

end := strings.IndexRune(s, '}')

1371

if end < 0 {

1372

if err = checkUTF8(s); err != nil {

1373

return

1374

}

1375

return nil, "", &Error{ErrInvalidCharRange, s}

1376

}

1377

seq, t = s[:end+1], s[end+1:]

1378

name = s[3:end]

1379

if err = checkUTF8(name); err != nil {

1380

return

1381

}

1382

}

1383

1384

// Group can have leading negation too. \p{^Han} == \P{Han}, \P{^Han} == \p{Han}.

1385

if name != "" && name[0] == '^' {

1386

sign = -sign

1387

name = name[1:]

1388

}

1389

1390

tab, fold := unicodeTable(name)

1391

if tab == nil {

1392

return nil, "", &Error{ErrInvalidCharRange, seq}

1393

}

1394

1395

if p.flags&FoldCase == 0 || fold == nil {

1396

if sign > 0 {

1397

r = appendTable(r, tab)

1398

} else {

1399

r = appendNegatedTable(r, tab)

1400

}

1401

} else {

1402

// Merge and clean tab and fold in a temporary buffer.

1403

// This is necessary for the negative case and just tidy

1404

// for the positive case.

1405

tmp := p.tmpClass[:0]

1406

tmp = appendTable(tmp, tab)

1407

tmp = appendTable(tmp, fold)

1408

p.tmpClass = tmp

1409

tmp = cleanClass(&p.tmpClass)

1410

if sign > 0 {

1411

r = appendClass(r, tmp)

1412

} else {

1413

r = appendNegatedClass(r, tmp)

1414

}

1415

}

1416

return r, t, nil

1417

}

1418

1419

// parseClass parses a character class at the beginning of s

1420

// and pushes it onto the parse stack.

1421

func (p *parser) parseClass(s string) (rest string, err os.Error) {

1422

t := s[1:] // chop [

1423

re := p.newRegexp(OpCharClass)

1424

re.Flags = p.flags

1425

re.Rune = re.Rune0[:0]

1426

1427

sign := +1

1428

if t != "" && t[0] == '^' {

1429

sign = -1

1430

t = t[1:]

1431

1432

// If character class does not match \n, add it here,

1433

// so that negation later will do the right thing.

1434

if p.flags&ClassNL == 0 {

1435

re.Rune = append(re.Rune, '\n', '\n')

1436

}

1437

}

1438

1439

class := re.Rune

1440

first := true // ] and - are okay as first char in class

1441

for t == "" || t[0] != ']' || first {

1442

// POSIX: - is only okay unescaped as first or last in class.

1443

// Perl: - is okay anywhere.

1444

if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (len(t) == 1 || t[1] != ']') {

1445

_, size := utf8.DecodeRuneInString(t[1:])

1446

return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+size]}

1447

}

1448

first = false

1449

1450

// Look for POSIX [:alnum:] etc.

1451

if len(t) > 2 && t[0] == '[' && t[1] == ':' {

1452

nclass, nt, err := p.parseNamedClass(t, class)

1453

if err != nil {

1454

return "", err

1455

}

1456

if nclass != nil {

1457

class, t = nclass, nt

1458

continue

1459

}

1460

}

1461

1462

// Look for Unicode character group like \p{Han}.

1463

nclass, nt, err := p.parseUnicodeClass(t, class)

1464

if err != nil {

1465

return "", err

1466

}

1467

if nclass != nil {

1468

class, t = nclass, nt

1469

continue

1470

}

1471

1472

// Look for Perl character class symbols (extension).

1473

if nclass, nt := p.parsePerlClassEscape(t, class); nclass != nil {

1474

class, t = nclass, nt

1475

continue

1476

}

1477

1478

// Single character or simple range.

1479

rng := t

1480

var lo, hi int

1481

if lo, t, err = p.parseClassChar(t, s); err != nil {

1482

return "", err

1483

}

1484

hi = lo

1485

// [a-] means (a|-) so check for final ].

1486

if len(t) >= 2 && t[0] == '-' && t[1] != ']' {

1487

t = t[1:]

1488

if hi, t, err = p.parseClassChar(t, s); err != nil {

1489

return "", err

1490

}

1491

if hi < lo {

1492

rng = rng[:len(rng)-len(t)]

1493

return "", &Error{Code: ErrInvalidCharRange, Expr: rng}

1494

}

1495

}

1496

if p.flags&FoldCase == 0 {

1497

class = appendRange(class, lo, hi)

1498

} else {

1499

class = appendFoldedRange(class, lo, hi)

1500

}

1501

}

1502

t = t[1:] // chop ]

1503

1504

// Use &re.Rune instead of &class to avoid allocation.

1505

re.Rune = class

1506

class = cleanClass(&re.Rune)

1507

if sign < 0 {

1508

class = negateClass(class)

1509

}

1510

re.Rune = class

1511

p.push(re)

1512

return t, nil

1513

}

1514

1515

// cleanClass sorts the ranges (pairs of elements of r),

1516

// merges them, and eliminates duplicates.

1517

func cleanClass(rp *[]int) []int {

1518

1519

// Sort by lo increasing, hi decreasing to break ties.

1520

sort.Sort(ranges{rp})

1521

1522

r := *rp

1523

if len(r) < 2 {

1524

return r

1525

}

1526

1527

// Merge abutting, overlapping.

1528

w := 2 // write index

1529

for i := 2; i < len(r); i += 2 {

1530

lo, hi := r[i], r[i+1]

1531

if lo <= r[w-1]+1 {

1532

// merge with previous range

1533

if hi > r[w-1] {

1534

r[w-1] = hi

1535

}

1536

continue

1537

}

1538

// new disjoint range

1539

r[w] = lo

1540

r[w+1] = hi

1541

w += 2

1542

}

1543

1544

return r[:w]

1545

}

1546

1547

// appendRange returns the result of appending the range lo-hi to the class r.

1548

func appendRange(r []int, lo, hi int) []int {

1549

// Expand last range or next to last range if it overlaps or abuts.

1550

// Checking two ranges helps when appending case-folded

1551

// alphabets, so that one range can be expanding A-Z and the

1552

// other expanding a-z.

1553

n := len(r)

1554

for i := 2; i <= 4; i += 2 { // twice, using i=2, i=4

1555

if n >= i {

1556

rlo, rhi := r[n-i], r[n-i+1]

1557

if lo <= rhi+1 && rlo <= hi+1 {

1558

if lo < rlo {

1559

r[n-i] = lo

1560

}

1561

if hi > rhi {

1562

r[n-i+1] = hi

1563

}

1564

return r

1565

}

1566

}

1567

}

1568

1569

return append(r, lo, hi)

1570

}

1571

1572

const (

1573

// minimum and maximum runes involved in folding.

1574

// checked during test.

1575

minFold = 0x0041

1576

maxFold = 0x1044f

1577

)

1578

1579

// appendFoldedRange returns the result of appending the range lo-hi

1580

// and its case folding-equivalent runes to the class r.

1581

func appendFoldedRange(r []int, lo, hi int) []int {

1582

// Optimizations.

1583

if lo <= minFold && hi >= maxFold {

1584

// Range is full: folding can't add more.

1585

return appendRange(r, lo, hi)

1586

}

1587

if hi < minFold || lo > maxFold {

1588

// Range is outside folding possibilities.

1589

return appendRange(r, lo, hi)

1590

}

1591

if lo < minFold {

1592

// [lo, minFold-1] needs no folding.

1593

r = appendRange(r, lo, minFold-1)

1594

lo = minFold

1595

}

1596

if hi > maxFold {

1597

// [maxFold+1, hi] needs no folding.

1598

r = appendRange(r, maxFold+1, hi)

1599

hi = maxFold

1600

}

1601

1602

// Brute force. Depend on appendRange to coalesce ranges on the fly.

1603

for c := lo; c <= hi; c++ {

1604

r = appendRange(r, c, c)

1605

f := unicode.SimpleFold(c)

1606

for f != c {

1607

r = appendRange(r, f, f)

1608

f = unicode.SimpleFold(f)

1609

}

1610

}

1611

return r

1612

}

1613

1614

// appendClass returns the result of appending the class x to the class r.

1615

// It assume x is clean.

1616

func appendClass(r []int, x []int) []int {

1617

for i := 0; i < len(x); i += 2 {

1618

r = appendRange(r, x[i], x[i+1])

1619

}

1620

return r

1621

}

1622

1623

// appendFolded returns the result of appending the case folding of the class x to the class r.

1624

func appendFoldedClass(r []int, x []int) []int {

1625

for i := 0; i < len(x); i += 2 {

1626

r = appendFoldedRange(r, x[i], x[i+1])

1627

}

1628

return r

1629

}

1630

1631

// appendNegatedClass returns the result of appending the negation of the class x to the class r.

1632

// It assumes x is clean.

1633

func appendNegatedClass(r []int, x []int) []int {

1634

nextLo := 0

1635

for i := 0; i < len(x); i += 2 {

1636

lo, hi := x[i], x[i+1]

1637

if nextLo <= lo-1 {

1638

r = appendRange(r, nextLo, lo-1)

1639

}

1640

nextLo = hi + 1

1641

}

1642

if nextLo <= unicode.MaxRune {

1643

r = appendRange(r, nextLo, unicode.MaxRune)

1644

}

1645

return r

1646

}

1647

1648

// appendTable returns the result of appending x to the class r.

1649

func appendTable(r []int, x *unicode.RangeTable) []int {

1650

for _, xr := range x.R16 {

1651

lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride)

1652

if stride == 1 {

1653

r = appendRange(r, lo, hi)

1654

continue

1655

}

1656

for c := lo; c <= hi; c += stride {

1657

r = appendRange(r, c, c)

1658

}

1659

}

1660

for _, xr := range x.R32 {

1661

lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride)

1662

if stride == 1 {

1663

r = appendRange(r, lo, hi)

1664

continue

1665

}

1666

for c := lo; c <= hi; c += stride {

1667

r = appendRange(r, c, c)

1668

}

1669

}

1670

return r

1671

}

1672

1673

// appendNegatedTable returns the result of appending the negation of x to the class r.

1674

func appendNegatedTable(r []int, x *unicode.RangeTable) []int {

1675

nextLo := 0 // lo end of next class to add

1676

for _, xr := range x.R16 {

1677

lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride)

1678

if stride == 1 {

1679

if nextLo <= lo-1 {

1680

r = appendRange(r, nextLo, lo-1)

1681

}

1682

nextLo = hi + 1

1683

continue

1684

}

1685

for c := lo; c <= hi; c += stride {

1686

if nextLo <= c-1 {

1687

r = appendRange(r, nextLo, c-1)

1688

}

1689

nextLo = c + 1

1690

}

1691

}

1692

for _, xr := range x.R32 {

1693

lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride)

1694

if stride == 1 {

1695

if nextLo <= lo-1 {

1696

r = appendRange(r, nextLo, lo-1)

1697

}

1698

nextLo = hi + 1

1699

continue

1700

}

1701

for c := lo; c <= hi; c += stride {

1702

if nextLo <= c-1 {

1703

r = appendRange(r, nextLo, c-1)

1704

}

1705

nextLo = c + 1

1706

}

1707

}

1708

if nextLo <= unicode.MaxRune {

1709

r = appendRange(r, nextLo, unicode.MaxRune)

1710

}

1711

return r

1712

}

1713

1714

// negateClass overwrites r and returns r's negation.

1715

// It assumes the class r is already clean.

1716

func negateClass(r []int) []int {

1717

nextLo := 0 // lo end of next class to add

1718

w := 0 // write index

1719

for i := 0; i < len(r); i += 2 {

1720

lo, hi := r[i], r[i+1]

1721

if nextLo <= lo-1 {

1722

r[w] = nextLo

1723

r[w+1] = lo - 1

1724

w += 2

1725

}

1726

nextLo = hi + 1

1727

}

1728

r = r[:w]

1729

if nextLo <= unicode.MaxRune {

1730

// It's possible for the negation to have one more

1731

// range - this one - than the original class, so use append.

1732

r = append(r, nextLo, unicode.MaxRune)

1733

}

1734

return r

1735

}

1736

1737

// ranges implements sort.Interface on a []rune.

1738

// The choice of receiver type definition is strange

1739

// but avoids an allocation since we already have

1740

// a *[]int.

1741

type ranges struct {

1742

p *[]int

1743

}

1744

1745

func (ra ranges) Less(i, j int) bool {

1746

p := *ra.p

1747

i *= 2

1748

j *= 2

1749

return p[i] < p[j] || p[i] == p[j] && p[i+1] > p[j+1]

1750

}

1751

1752

func (ra ranges) Len() int {

1753

return len(*ra.p) / 2

1754

}

1755

1756

func (ra ranges) Swap(i, j int) {

1757

p := *ra.p

1758

i *= 2

1759

j *= 2

1760

p[i], p[i+1], p[j], p[j+1] = p[j], p[j+1], p[i], p[i+1]

1761

}

1762

1763

1764

func checkUTF8(s string) os.Error {

1765

for s != "" {

1766

rune, size := utf8.DecodeRuneInString(s)

1767

if rune == utf8.RuneError && size == 1 {

1768

return &Error{Code: ErrInvalidUTF8, Expr: s}

1769

}

1770

s = s[size:]

1771

}

1772

return nil

1773

}

1774

1775

func nextRune(s string) (c int, t string, err os.Error) {

1776

c, size := utf8.DecodeRuneInString(s)

1777

if c == utf8.RuneError && size == 1 {

1778

return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}

1779

}

1780

return c, s[size:], nil

1781

}

1782

1783

func isalnum(c int) bool {

1784

return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'

1785

}

1786

1787

func unhex(c int) int {

1788

if '0' <= c && c <= '9' {

1789

return c - '0'

1790

}

1791

if 'a' <= c && c <= 'f' {

1792

return c - 'a' + 10

1793

}

1794

if 'A' <= c && c <= 'F' {

1795

return c - 'A' + 10

1796

}

1797

return -1

1798

}

Older »