1
// Copyright 2013 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
5
// Package charmap provides simple character encodings such as IBM Code Page 437
12
"code.google.com/p/go.text/transform"
15
// utf8Enc holds a rune's UTF-8 encoding in data[:len].
21
// charmap describes an 8-bit character set encoding.
23
// name is the encoding's name.
25
// asciiSuperset states whether the encoding is a superset of ASCII.
27
// low is the lower bound of the encoded byte for a non-ASCII rune. If
28
// charmap.asciiSuperset is true then this will be 0x80, otherwise 0x00.
30
// replacement is the encoded replacement character.
32
// decode is the map from encoded byte to UTF-8.
34
// encoding is the map from runes to encoded bytes. Each entry is a
35
// uint32: the high 8 bits are the encoded byte and the low 24 bits are
36
// the rune. The table entries are sorted by ascending rune.
40
func (m *charmap) NewDecoder() transform.Transformer {
41
return charmapDecoder{charmap: m}
44
func (m *charmap) NewEncoder() transform.Transformer {
45
return charmapEncoder{charmap: m}
48
func (m *charmap) String() string {
52
// charmapDecoder implements transform.Transformer by decoding to UTF-8.
53
type charmapDecoder struct {
57
func (m charmapDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
58
for i, c := range src {
59
if m.charmap.asciiSuperset && c < utf8.RuneSelf {
61
err = transform.ErrShortDst
70
decode := &m.charmap.decode[c]
72
if nDst+n > len(dst) {
73
err = transform.ErrShortDst
76
// It's 15% faster to avoid calling copy for these tiny slices.
77
for j := 0; j < n; j++ {
78
dst[nDst] = decode.data[j]
83
return nDst, nSrc, err
86
// charmapEncoder implements transform.Transformer by encoding from UTF-8.
87
type charmapEncoder struct {
91
func (m charmapEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
95
err = transform.ErrShortDst
100
// Decode a 1-byte rune.
101
if r < utf8.RuneSelf {
103
if m.charmap.asciiSuperset {
110
// Decode a multi-byte rune.
111
r, size = utf8.DecodeRune(src[nSrc:])
113
// All valid runes of size 1 (those below utf8.RuneSelf) were
114
// handled above. We have invalid UTF-8 or we haven't seen the
115
// full character yet.
116
if !atEOF && !utf8.FullRune(src[nSrc:]) {
117
err = transform.ErrShortSrc
122
if r == utf8.RuneError {
123
dst[nDst] = m.charmap.replacement
129
// Binary search in [low, high) for that rune in the m.charmap.encode table.
130
for low, high := int(m.charmap.low), 0x100; ; {
132
dst[nDst] = m.charmap.replacement
136
mid := (low + high) / 2
137
got := m.charmap.encode[mid]
138
gotRune := rune(got & (1<<24 - 1))
141
} else if gotRune > r {
144
dst[nDst] = byte(got >> 24)
150
return nDst, nSrc, err