~ubuntu-branches/ubuntu/raring/libencode-perl/raring

« back to all changes in this revision

Viewing changes to t/gsm0338.t

Committer: Bazaar Package Importer
Author(s): Jose Luis Rivas
Date: 2007-05-18 23:49:27 UTC
Revision ID: james.westby@ubuntu.com-20070518234927-bs37c807cty7i1ny

Tags: upstream-2.21

Import upstream version 2.21

files added:

AUTHORS

Byte

Byte/Byte.pm

Byte/Makefile.PL

CN/CN.pm

CN/Makefile.PL

Changes

EBCDIC

EBCDIC/EBCDIC.pm

EBCDIC/Makefile.PL

Encode

Encode.pm

Encode.xs

Encode/Changes.e2x

Encode/ConfigLocal_PM.e2x

Encode/Makefile_PL.e2x

Encode/README.e2x

Encode/_PM.e2x

Encode/_T.e2x

Encode/encode.h

JP/JP.pm

JP/Makefile.PL

KR/KR.pm

KR/Makefile.PL

MANIFEST

META.yml

Makefile.PL

README

Symbol

Symbol/Makefile.PL

Symbol/Symbol.pm

TW/Makefile.PL

TW/TW.pm

Unicode

Unicode/Makefile.PL

Unicode/Unicode.pm

Unicode/Unicode.xs

bin/enc2xs

bin/piconv

bin/ucm2table

bin/ucmlint

bin/ucmsort

bin/unidump

encengine.c

encoding.pm

lib/Encode

lib/Encode/Alias.pm

lib/Encode/CJKConstants.pm

lib/Encode/CN

lib/Encode/CN/HZ.pm

lib/Encode/Config.pm

lib/Encode/Encoder.pm

lib/Encode/Encoding.pm

lib/Encode/GSM0338.pm

lib/Encode/Guess.pm

lib/Encode/JP

lib/Encode/JP/H2Z.pm

lib/Encode/JP/JIS7.pm

lib/Encode/KR

lib/Encode/KR/2022_KR.pm

lib/Encode/MIME

lib/Encode/MIME/Header

lib/Encode/MIME/Header.pm

lib/Encode/MIME/Header/ISO_2022_JP.pm

lib/Encode/MIME/Name.pm

lib/Encode/PerlIO.pod

lib/Encode/Supported.pod

lib/Encode/Unicode

lib/Encode/Unicode/UTF7.pm

t/Aliases.t

t/CJKT.t

t/Encode.t

t/Encoder.t

t/Mod_EUCJP.pm

t/Unicode.t

t/at-cn.t

t/at-tw.t

t/big5-eten.enc

t/big5-eten.utf

t/big5-hkscs.enc

t/big5-hkscs.utf

t/enc_data.t

t/enc_eucjp.t

t/enc_module.enc

t/enc_module.t

t/enc_utf8.t

t/encoding.t

t/fallback.t

t/from_to.t

t/gb2312.enc

t/gb2312.utf

t/grow.t

t/gsm0338.t

t/guess.t

t/jis7-fallback.t

t/jisx0201.enc

t/jisx0201.utf

t/jisx0208.enc

t/jisx0208.utf

t/jisx0212.enc

t/jisx0212.utf

t/jperl.t

t/ksc5601.enc

t/ksc5601.utf

t/mime-header.t

t/mime-name.t

t/mime_header_iso2022jp.t

t/perlio.t

t/rt.pl

t/unibench.pl

t/utf8strict.t

ucm/8859-1.ucm

ucm/8859-10.ucm

ucm/8859-11.ucm

ucm/8859-13.ucm

ucm/8859-14.ucm

ucm/8859-15.ucm

ucm/8859-16.ucm

ucm/8859-2.ucm

ucm/8859-3.ucm

ucm/8859-4.ucm

ucm/8859-5.ucm

ucm/8859-6.ucm

ucm/8859-7.ucm

ucm/8859-8.ucm

ucm/8859-9.ucm

ucm/adobeStdenc.ucm

ucm/adobeSymbol.ucm

ucm/adobeZdingbat.ucm

ucm/ascii.ucm

ucm/big5-eten.ucm

ucm/big5-hkscs.ucm

ucm/cp037.ucm

ucm/cp1006.ucm

ucm/cp1026.ucm

ucm/cp1047.ucm

ucm/cp1250.ucm

ucm/cp1251.ucm

ucm/cp1252.ucm

ucm/cp1253.ucm

ucm/cp1254.ucm

ucm/cp1255.ucm

ucm/cp1256.ucm

ucm/cp1257.ucm

ucm/cp1258.ucm

ucm/cp424.ucm

ucm/cp437.ucm

ucm/cp500.ucm

ucm/cp737.ucm

ucm/cp775.ucm

ucm/cp850.ucm

ucm/cp852.ucm

ucm/cp855.ucm

ucm/cp856.ucm

ucm/cp857.ucm

ucm/cp860.ucm

ucm/cp861.ucm

ucm/cp862.ucm

ucm/cp863.ucm

ucm/cp864.ucm

ucm/cp865.ucm

ucm/cp866.ucm

ucm/cp869.ucm

ucm/cp874.ucm

ucm/cp875.ucm

ucm/cp932.ucm

ucm/cp936.ucm

ucm/cp949.ucm

ucm/cp950.ucm

ucm/ctrl.ucm

ucm/dingbats.ucm

ucm/euc-cn.ucm

ucm/euc-jp.ucm

ucm/euc-kr.ucm

ucm/gb12345.ucm

ucm/gb2312.ucm

ucm/hp-roman8.ucm

ucm/ir-165.ucm

ucm/jis0201.ucm

ucm/jis0208.ucm

ucm/jis0212.ucm

ucm/johab.ucm

ucm/koi8-f.ucm

ucm/koi8-r.ucm

ucm/koi8-u.ucm

ucm/ksc5601.ucm

ucm/macArabic.ucm

ucm/macCentEuro.ucm

ucm/macChinsimp.ucm

ucm/macChintrad.ucm

ucm/macCroatian.ucm

ucm/macCyrillic.ucm

ucm/macDingbats.ucm

ucm/macFarsi.ucm

ucm/macGreek.ucm

ucm/macHebrew.ucm

ucm/macIceland.ucm

ucm/macJapanese.ucm

ucm/macKorean.ucm

ucm/macROMnn.ucm

ucm/macRUMnn.ucm

ucm/macRoman.ucm

ucm/macSami.ucm

ucm/macSymbol.ucm

ucm/macThai.ucm

ucm/macTurkish.ucm

ucm/macUkraine.ucm

ucm/nextstep.ucm

ucm/null.ucm

ucm/posix-bc.ucm

ucm/shiftjis.ucm

ucm/symbol.ucm

ucm/viscii.ucm

Show diffs side-by-side

added added

removed removed

t/gsm0338.t

BEGIN {

if ($ENV{'PERL_CORE'}){

chdir 't';

unshift @INC, '../lib';

}

require Config; import Config;

if ($Config{'extensions'} !~ /\bEncode\b/) {

print "1..0 # Skip: Encode was not built\n";

exit 0;

}

$| = 1;

}

use strict;

use utf8;

use Test::More tests => 778;

use Encode;

use Encode::GSM0338;

# The specification of GSM 03.38 is not awfully clear.

# (http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT)

# The various combinations of 0x00 and 0x1B as leading bytes

# are unclear, as is the semantics of those bytes as standalone

# or as final single bytes.

my $chk = Encode::LEAVE_SRC();

# escapes

# see http://www.csoft.co.uk/sms/character_sets/gsm.htm

my %esc_seq = (

"\x{20ac}" => "\x1b\x65",

"\x0c" => "\x1b\x0A",

"[" => "\x1b\x3C",

"\\" => "\x1b\x2F",

"]" => "\x1b\x3E",

"^" => "\x1b\x14",

"{" => "\x1b\x28",

"|" => "\x1b\x40",

"}" => "\x1b\x29",

"~" => "\x1b\x3D",

);

my %unesc_seq = reverse %esc_seq;

sub eu{

$_[0] =~ /[\x00-\x1f]/ ?

sprintf("\\x{%04X}", ord($_[0])) : encode_utf8($_[0]);

}

for my $c ( map { chr } 0 .. 127 ) {

my $u = $Encode::GSM0338::GSM2UNI{$c};

# default character set

is decode( "gsm0338", $c, $chk ), $u,

sprintf( "decode \\x%02X", ord($c) );

eval { decode( "gsm0338", $c . "\xff", $chk ) };

ok( $@, $@ );

is encode( "gsm0338", $u, $chk ), $c, sprintf( "encode %s", eu($u) );

eval { encode( "gsm0338", $u . "\x{3000}", $chk ) };

ok( $@, $@ );

# nasty atmark

if ( $c eq "\x00" ) {

is decode( "gsm0338", "\x00" . $c, $chk ), "\x00",

sprintf( '@@ =>: \x00+\x%02X', ord($c) );

}

else {

is decode( "gsm0338", "\x00" . $c ), '@' . decode( "gsm0338", $c ),

sprintf( '@: decode \x00+\x%02X', ord($c) );

}

# escape seq.

my $ecs = "\x1b" . $c;

if ( $unesc_seq{$ecs} ) {

is decode( "gsm0338", $ecs, $chk ), $unesc_seq{$ecs},

sprintf( "ESC: decode ESC+\\x%02X", ord($c) );

is encode( "gsm0338", $unesc_seq{$ecs}, $chk ), $ecs,

sprintf( "ESC: encode %s ", eu( $unesc_seq{$ecs} ) );

}

else {

is decode( "gsm0338", $ecs, $chk ),

"\xA0" . decode( "gsm0338", $c ),

sprintf( "decode ESC+\\x%02X", ord($c) );

}

__END__

for my $c (map { chr } 0..127){

my $b = "\x1b$c";

my $u = $Encode::GSM0338::GSM2UNI{$b};

next unless $u;

$u ||= "\xA0" . $Encode::GSM0338::GSM2UNI{$c};

is decode("gsm0338", $b), $u, sprintf("decode ESC+\\x%02X", ord($c) );

}

__END__

100

# old test follows

101

ub t { is(decode("gsm0338", my $t = $_[0]), $_[1]) }

102

103

# t("\x00", "\x00"); # ???

104

105

# "Round-trip".

106

t("\x41", "\x41");

107

108

t("\x01", "\xA3");

109

t("\x02", "\x24");

110

t("\x03", "\xA5");

111

t("\x09", "\xE7");

112

113

t("\x00\x00", "\x00\x00"); # Maybe?

114

t("\x00\x1B", "\x40\xA0"); # Maybe?

115

t("\x00\x41", "\x40\x41");

116

117

# t("\x1B", "\x1B"); # ???

118

119

# Escape with no special second byte is just a NBSP.

120

t("\x1B\x41", "\xA0\x41");

121

122

t("\x1B\x00", "\xA0\x40"); # Maybe?

123

124

# Special escape characters.

125

t("\x1B\x0A", "\x0C");

126

t("\x1B\x14", "\x5E");

127

t("\x1B\x28", "\x7B");

128

t("\x1B\x29", "\x7D");

129

t("\x1B\x2F", "\x5C");

130

t("\x1B\x3C", "\x5B");

131

t("\x1B\x3D", "\x7E");

132

t("\x1B\x3E", "\x5D");

133

t("\x1B\x40", "\x7C");

134

t("\x1B\x40", "\x7C");

135

t("\x1B\x65", "\x{20AC}");

Older »