1
{-# LANGUAGE ForeignFunctionInterface #-}
3
-- Copyright (C) 2010 John Millikin <jmillikin@gmail.com>
5
-- This program is free software: you can redistribute it and/or modify
6
-- it under the terms of the GNU General Public License as published by
7
-- the Free Software Foundation, either version 3 of the License, or
10
-- This program is distributed in the hope that it will be useful,
11
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
-- GNU General Public License for more details.
15
-- You should have received a copy of the GNU General Public License
16
-- along with this program. If not, see <http://www.gnu.org/licenses/>.
18
-- | Punycode is a simple and efficient transfer encoding syntax designed
19
-- for use with Internationalized Domain Names in Applications (IDNA). It
20
-- uniquely and reversibly transforms a Unicode string into ASCII. ASCII
21
-- characters in the Unicode string are represented literally, and non-ASCII
22
-- characters are represented by ASCII characters that are allowed in host
23
-- name labels (letters, digits, and hyphens).
24
module Data.Text.IDN.Punycode
29
import Control.Exception (ErrorCall(..), throwIO)
30
import Control.Monad (unless)
31
import Data.List (unfoldr)
32
import qualified Data.ByteString as B
33
import qualified Data.Text as T
38
import Data.Text.IDN.Internal (toUCS4, fromUCS4)
42
-- | Encode unicode into an ASCII-only 'B.ByteString'. If provided, the
43
-- case predicate indicates whether to uppercase the corresponding character
46
-> Maybe (Integer -> Bool)
48
encode input maybeIsCase = unsafePerformIO io where
49
inSize = T.length input
51
flags = flip fmap maybeIsCase $ \isCase -> let
52
step idx = Just (fromBool (isCase idx), idx + 1)
55
io = maybeWith (withArray . take inSize) flags impl
57
impl caseBuf = withArray (toUCS4 input) (loop caseBuf inSize . castPtr)
59
loop caseBuf outMax inBuf = do
60
res <- tryEnc caseBuf outMax inBuf
62
Nothing -> loop caseBuf (outMax + 50) inBuf
63
Just (Right bytes) -> return bytes
64
Just (Left rc) -> cToError rc
66
tryEnc caseBuf outMax inBuf =
67
allocaBytes outMax $ \outBuf ->
68
alloca $ \outSizeBuf -> do
69
poke outSizeBuf (fromIntegral outMax)
70
c_rc <- {# call punycode_encode #}
77
let rc = fromIntegral c_rc
78
if rc == fromEnum OVERFLOW
80
else if rc == fromEnum SUCCESS
82
outSize <- peek outSizeBuf
83
bytes <- peekOut outBuf outSize
84
return (Just (Right bytes))
85
else return (Just (Left c_rc))
87
peekOut outBuf outSize = B.packCStringLen cstr where
88
cstr = (outBuf, fromIntegral outSize)
90
-- | Decode a 'B.ByteString' into unicode. The second component of the
91
-- result is a case predicate; it indicates whether a particular character
92
-- position of the result string should be upper-cased.
94
-- Returns 'Nothing' if the input is invalid.
95
decode :: B.ByteString
96
-> Maybe (T.Text, (Integer -> Bool))
97
decode input = unsafePerformIO $
98
let outMax = B.length input in
99
B.useAsCStringLen input $ \(inBuf, inSize) ->
100
alloca $ \outSizeBuf ->
101
allocaArray outMax $ \outBuf -> do
103
flagForeign <- mallocForeignPtrArray outMax
104
poke outSizeBuf (fromIntegral outMax)
106
c_rc <- withForeignPtr flagForeign $ \flagBuf ->
107
{# call punycode_decode #}
108
(fromIntegral inSize)
114
let rc = fromIntegral c_rc
115
if rc == fromEnum BAD_INPUT
118
unless (rc == fromEnum SUCCESS) (cToError c_rc)
120
outSize <- peek outSizeBuf
121
ucs4 <- peekArray (fromIntegral outSize) (castPtr outBuf)
122
let text = fromUCS4 ucs4
123
return (Just (text, checkCaseFlag flagForeign outSize))
125
type SizeT = {# type size_t #}
127
{# enum Punycode_status {} with prefix = "PUNYCODE_" #}
129
checkCaseFlag :: ForeignPtr CUChar -> SizeT -> Integer -> Bool
130
checkCaseFlag ptr csize = checkIdx where
131
intsize = toInteger csize
132
checkIdx idx | idx < 0 = False
133
checkIdx idx | idx >= intsize = False
136
withForeignPtr ptr $ \buf -> do
137
cuchar <- peekElemOff buf (fromInteger idx)
138
return (toBool cuchar)
140
cToError :: CInt -> IO a
142
str <- peekCString =<< {# call punycode_strerror #} rc
143
throwIO (ErrorCall str)