fix encoding unicode strings, and Text support

This commit is contained in:
Hideyuki Tanaka 2011-03-29 16:15:29 +09:00
parent 5c5f16f148
commit ace4f1e7f0
5 changed files with 95 additions and 25 deletions

View File

@ -23,6 +23,7 @@ Library
Build-depends: base >=4 && <5,
transformers >= 0.2 && < 0.3,
bytestring >= 0.9 && < 0.10,
text >= 0.11 && < 0.12,
vector >= 0.7 && < 0.8,
attoparsec >= 0.8 && < 0.9,
binary >= 0.5.0 && < 0.5.1,
@ -41,6 +42,9 @@ Library
Data.MessagePack.Object
Data.MessagePack.Derive
Other-modules:
Data.MessagePack.Internal.Utf8
Source-repository head
Type: git
Location: git://github.com/msgpack/msgpack.git

View File

@ -0,0 +1,28 @@
module Data.MessagePack.Internal.Utf8 (
encodeUtf8,
decodeUtf8,
skipChar,
toLBS,
fromLBS,
) where
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Encoding.Error as T
encodeUtf8 :: String -> B.ByteString
encodeUtf8 = T.encodeUtf8 . T.pack
decodeUtf8 :: B.ByteString -> String
decodeUtf8 = T.unpack . T.decodeUtf8With skipChar
skipChar :: T.OnDecodeError
skipChar _ _ = Nothing
toLBS :: B.ByteString -> BL.ByteString
toLBS bs = BL.fromChunks [bs]
fromLBS :: BL.ByteString -> B.ByteString
fromLBS = B.concat . BL.toChunks

View File

@ -32,12 +32,17 @@ import Control.Monad
import Control.Monad.Trans.Error ()
import qualified Data.Attoparsec as A
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as C8
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import Data.Typeable
import Data.MessagePack.Assoc
import Data.MessagePack.Pack
import Data.MessagePack.Unpack
import Data.MessagePack.Internal.Utf8
-- | Object Representation of MessagePack data.
data Object
@ -149,14 +154,29 @@ instance OBJECT Float where
tryFromObject (ObjectFloat f) = Right f
tryFromObject _ = tryFromObjectError
instance OBJECT String where
toObject = toObject . encodeUtf8
tryFromObject obj = liftM decodeUtf8 $ tryFromObject obj
instance OBJECT B.ByteString where
toObject = ObjectRAW
tryFromObject (ObjectRAW bs) = Right bs
tryFromObject _ = tryFromObjectError
instance OBJECT String where
toObject = toObject . C8.pack
tryFromObject obj = liftM C8.unpack $ tryFromObject obj
instance OBJECT BL.ByteString where
toObject = ObjectRAW . fromLBS
tryFromObject (ObjectRAW bs) = Right $ toLBS bs
tryFromObject _ = tryFromObjectError
instance OBJECT T.Text where
toObject = ObjectRAW . T.encodeUtf8
tryFromObject (ObjectRAW bs) = Right $ T.decodeUtf8With skipChar bs
tryFromObject _ = tryFromObjectError
instance OBJECT TL.Text where
toObject = ObjectRAW . fromLBS . TL.encodeUtf8
tryFromObject (ObjectRAW bs) = Right $ TL.decodeUtf8With skipChar $ toLBS bs
tryFromObject _ = tryFromObjectError
instance OBJECT a => OBJECT [a] where
toObject = ObjectArray . map toObject

View File

@ -27,11 +27,15 @@ import Data.Binary.Put
import Data.Binary.IEEE754
import Data.Bits
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import qualified Data.Vector as V
import Data.MessagePack.Assoc
import Data.MessagePack.Internal.Utf8
-- | Serializable class
class Packable a where
@ -39,7 +43,7 @@ class Packable a where
put :: a -> Put
-- | Pack Haskell data to MessagePack string.
pack :: Packable a => a -> L.ByteString
pack :: Packable a => a -> BL.ByteString
pack = runPut . put
instance Packable Int where
@ -93,17 +97,24 @@ instance Packable Double where
putFloat64be d
instance Packable String where
put = putString length (putByteString . B8.pack)
put = putString encodeUtf8 B.length putByteString
instance Packable B.ByteString where
put = putString B.length putByteString
put = putString id B.length putByteString
instance Packable L.ByteString where
put = putString (fromIntegral . L.length) putLazyByteString
instance Packable BL.ByteString where
put = putString id (fromIntegral . BL.length) putLazyByteString
putString :: (s -> Int) -> (s -> Put) -> s -> Put
putString lf pf str = do
case lf str of
instance Packable T.Text where
put = putString T.encodeUtf8 B.length putByteString
instance Packable TL.Text where
put = putString TL.encodeUtf8 (fromIntegral . BL.length) putLazyByteString
putString :: (s -> t) -> (t -> Int) -> (t -> Put) -> s -> Put
putString cnv lf pf str = do
let bs = cnv str
case lf bs of
len | len <= 31 -> do
putWord8 $ 0xA0 .|. fromIntegral len
len | len < 0x10000 -> do
@ -112,7 +123,7 @@ putString lf pf str = do
len -> do
putWord8 0xDB
putWord32be $ fromIntegral len
pf str
pf bs
instance Packable a => Packable [a] where
put = putArray length (mapM_ put)

View File

@ -36,8 +36,11 @@ import Data.Binary.Get
import Data.Binary.IEEE754
import Data.Bits
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import Data.Int
import Data.Typeable
import qualified Data.Vector as V
@ -45,6 +48,7 @@ import Data.Word
import Text.Printf
import Data.MessagePack.Assoc
import Data.MessagePack.Internal.Utf8
-- | Deserializable class
class Unpackable a where
@ -57,8 +61,8 @@ class IsByteString s where
instance IsByteString B.ByteString where
toBS = id
instance IsByteString L.ByteString where
toBS = B.concat . L.toChunks
instance IsByteString BL.ByteString where
toBS = B.concat . BL.toChunks
-- | The exception of unpack
data UnpackError =
@ -153,13 +157,19 @@ instance Unpackable Double where
fail $ printf "invlid double tag: 0x%02X" c
instance Unpackable String where
get = parseString (\n -> return . B8.unpack =<< A.take n)
get = parseString (\n -> return . decodeUtf8 =<< A.take n)
instance Unpackable B.ByteString where
get = parseString A.take
instance Unpackable L.ByteString where
get = parseString (\n -> do bs <- A.take n; return $ L.fromChunks [bs])
instance Unpackable BL.ByteString where
get = parseString (\n -> return . toLBS =<< A.take n)
instance Unpackable T.Text where
get = parseString (\n -> return . T.decodeUtf8With skipChar =<< A.take n)
instance Unpackable TL.Text where
get = parseString (\n -> return . TL.decodeUtf8With skipChar . toLBS =<< A.take n)
parseString :: (Int -> A.Parser a) -> A.Parser a
parseString aget = do
@ -311,6 +321,3 @@ parseInt32 = return . fromIntegral =<< parseUint32
parseInt64 :: A.Parser Int64
parseInt64 = return . fromIntegral =<< parseUint64
toLBS :: B.ByteString -> L.ByteString
toLBS bs = L.fromChunks [bs]