mirror of
https://github.com/msgpack/msgpack-c.git
synced 2025-04-17 07:13:27 +02:00
fix encoding unicode strings, and Text support
This commit is contained in:
parent
5c5f16f148
commit
ace4f1e7f0
@ -23,6 +23,7 @@ Library
|
|||||||
Build-depends: base >=4 && <5,
|
Build-depends: base >=4 && <5,
|
||||||
transformers >= 0.2 && < 0.3,
|
transformers >= 0.2 && < 0.3,
|
||||||
bytestring >= 0.9 && < 0.10,
|
bytestring >= 0.9 && < 0.10,
|
||||||
|
text >= 0.11 && < 0.12,
|
||||||
vector >= 0.7 && < 0.8,
|
vector >= 0.7 && < 0.8,
|
||||||
attoparsec >= 0.8 && < 0.9,
|
attoparsec >= 0.8 && < 0.9,
|
||||||
binary >= 0.5.0 && < 0.5.1,
|
binary >= 0.5.0 && < 0.5.1,
|
||||||
@ -41,6 +42,9 @@ Library
|
|||||||
Data.MessagePack.Object
|
Data.MessagePack.Object
|
||||||
Data.MessagePack.Derive
|
Data.MessagePack.Derive
|
||||||
|
|
||||||
|
Other-modules:
|
||||||
|
Data.MessagePack.Internal.Utf8
|
||||||
|
|
||||||
Source-repository head
|
Source-repository head
|
||||||
Type: git
|
Type: git
|
||||||
Location: git://github.com/msgpack/msgpack.git
|
Location: git://github.com/msgpack/msgpack.git
|
||||||
|
28
haskell/src/Data/MessagePack/Internal/Utf8.hs
Normal file
28
haskell/src/Data/MessagePack/Internal/Utf8.hs
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
module Data.MessagePack.Internal.Utf8 (
|
||||||
|
encodeUtf8,
|
||||||
|
decodeUtf8,
|
||||||
|
skipChar,
|
||||||
|
toLBS,
|
||||||
|
fromLBS,
|
||||||
|
) where
|
||||||
|
|
||||||
|
import qualified Data.ByteString as B
|
||||||
|
import qualified Data.ByteString.Lazy as BL
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import qualified Data.Text.Encoding as T
|
||||||
|
import qualified Data.Text.Encoding.Error as T
|
||||||
|
|
||||||
|
encodeUtf8 :: String -> B.ByteString
|
||||||
|
encodeUtf8 = T.encodeUtf8 . T.pack
|
||||||
|
|
||||||
|
decodeUtf8 :: B.ByteString -> String
|
||||||
|
decodeUtf8 = T.unpack . T.decodeUtf8With skipChar
|
||||||
|
|
||||||
|
skipChar :: T.OnDecodeError
|
||||||
|
skipChar _ _ = Nothing
|
||||||
|
|
||||||
|
toLBS :: B.ByteString -> BL.ByteString
|
||||||
|
toLBS bs = BL.fromChunks [bs]
|
||||||
|
|
||||||
|
fromLBS :: BL.ByteString -> B.ByteString
|
||||||
|
fromLBS = B.concat . BL.toChunks
|
@ -32,12 +32,17 @@ import Control.Monad
|
|||||||
import Control.Monad.Trans.Error ()
|
import Control.Monad.Trans.Error ()
|
||||||
import qualified Data.Attoparsec as A
|
import qualified Data.Attoparsec as A
|
||||||
import qualified Data.ByteString as B
|
import qualified Data.ByteString as B
|
||||||
import qualified Data.ByteString.Char8 as C8
|
import qualified Data.ByteString.Lazy as BL
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import qualified Data.Text.Encoding as T
|
||||||
|
import qualified Data.Text.Lazy as TL
|
||||||
|
import qualified Data.Text.Lazy.Encoding as TL
|
||||||
import Data.Typeable
|
import Data.Typeable
|
||||||
|
|
||||||
import Data.MessagePack.Assoc
|
import Data.MessagePack.Assoc
|
||||||
import Data.MessagePack.Pack
|
import Data.MessagePack.Pack
|
||||||
import Data.MessagePack.Unpack
|
import Data.MessagePack.Unpack
|
||||||
|
import Data.MessagePack.Internal.Utf8
|
||||||
|
|
||||||
-- | Object Representation of MessagePack data.
|
-- | Object Representation of MessagePack data.
|
||||||
data Object
|
data Object
|
||||||
@ -149,14 +154,29 @@ instance OBJECT Float where
|
|||||||
tryFromObject (ObjectFloat f) = Right f
|
tryFromObject (ObjectFloat f) = Right f
|
||||||
tryFromObject _ = tryFromObjectError
|
tryFromObject _ = tryFromObjectError
|
||||||
|
|
||||||
|
instance OBJECT String where
|
||||||
|
toObject = toObject . encodeUtf8
|
||||||
|
tryFromObject obj = liftM decodeUtf8 $ tryFromObject obj
|
||||||
|
|
||||||
instance OBJECT B.ByteString where
|
instance OBJECT B.ByteString where
|
||||||
toObject = ObjectRAW
|
toObject = ObjectRAW
|
||||||
tryFromObject (ObjectRAW bs) = Right bs
|
tryFromObject (ObjectRAW bs) = Right bs
|
||||||
tryFromObject _ = tryFromObjectError
|
tryFromObject _ = tryFromObjectError
|
||||||
|
|
||||||
instance OBJECT String where
|
instance OBJECT BL.ByteString where
|
||||||
toObject = toObject . C8.pack
|
toObject = ObjectRAW . fromLBS
|
||||||
tryFromObject obj = liftM C8.unpack $ tryFromObject obj
|
tryFromObject (ObjectRAW bs) = Right $ toLBS bs
|
||||||
|
tryFromObject _ = tryFromObjectError
|
||||||
|
|
||||||
|
instance OBJECT T.Text where
|
||||||
|
toObject = ObjectRAW . T.encodeUtf8
|
||||||
|
tryFromObject (ObjectRAW bs) = Right $ T.decodeUtf8With skipChar bs
|
||||||
|
tryFromObject _ = tryFromObjectError
|
||||||
|
|
||||||
|
instance OBJECT TL.Text where
|
||||||
|
toObject = ObjectRAW . fromLBS . TL.encodeUtf8
|
||||||
|
tryFromObject (ObjectRAW bs) = Right $ TL.decodeUtf8With skipChar $ toLBS bs
|
||||||
|
tryFromObject _ = tryFromObjectError
|
||||||
|
|
||||||
instance OBJECT a => OBJECT [a] where
|
instance OBJECT a => OBJECT [a] where
|
||||||
toObject = ObjectArray . map toObject
|
toObject = ObjectArray . map toObject
|
||||||
|
@ -27,11 +27,15 @@ import Data.Binary.Put
|
|||||||
import Data.Binary.IEEE754
|
import Data.Binary.IEEE754
|
||||||
import Data.Bits
|
import Data.Bits
|
||||||
import qualified Data.ByteString as B
|
import qualified Data.ByteString as B
|
||||||
import qualified Data.ByteString.Char8 as B8
|
import qualified Data.ByteString.Lazy as BL
|
||||||
import qualified Data.ByteString.Lazy as L
|
import qualified Data.Text as T
|
||||||
|
import qualified Data.Text.Encoding as T
|
||||||
|
import qualified Data.Text.Lazy as TL
|
||||||
|
import qualified Data.Text.Lazy.Encoding as TL
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
|
|
||||||
import Data.MessagePack.Assoc
|
import Data.MessagePack.Assoc
|
||||||
|
import Data.MessagePack.Internal.Utf8
|
||||||
|
|
||||||
-- | Serializable class
|
-- | Serializable class
|
||||||
class Packable a where
|
class Packable a where
|
||||||
@ -39,7 +43,7 @@ class Packable a where
|
|||||||
put :: a -> Put
|
put :: a -> Put
|
||||||
|
|
||||||
-- | Pack Haskell data to MessagePack string.
|
-- | Pack Haskell data to MessagePack string.
|
||||||
pack :: Packable a => a -> L.ByteString
|
pack :: Packable a => a -> BL.ByteString
|
||||||
pack = runPut . put
|
pack = runPut . put
|
||||||
|
|
||||||
instance Packable Int where
|
instance Packable Int where
|
||||||
@ -93,17 +97,24 @@ instance Packable Double where
|
|||||||
putFloat64be d
|
putFloat64be d
|
||||||
|
|
||||||
instance Packable String where
|
instance Packable String where
|
||||||
put = putString length (putByteString . B8.pack)
|
put = putString encodeUtf8 B.length putByteString
|
||||||
|
|
||||||
instance Packable B.ByteString where
|
instance Packable B.ByteString where
|
||||||
put = putString B.length putByteString
|
put = putString id B.length putByteString
|
||||||
|
|
||||||
instance Packable L.ByteString where
|
instance Packable BL.ByteString where
|
||||||
put = putString (fromIntegral . L.length) putLazyByteString
|
put = putString id (fromIntegral . BL.length) putLazyByteString
|
||||||
|
|
||||||
putString :: (s -> Int) -> (s -> Put) -> s -> Put
|
instance Packable T.Text where
|
||||||
putString lf pf str = do
|
put = putString T.encodeUtf8 B.length putByteString
|
||||||
case lf str of
|
|
||||||
|
instance Packable TL.Text where
|
||||||
|
put = putString TL.encodeUtf8 (fromIntegral . BL.length) putLazyByteString
|
||||||
|
|
||||||
|
putString :: (s -> t) -> (t -> Int) -> (t -> Put) -> s -> Put
|
||||||
|
putString cnv lf pf str = do
|
||||||
|
let bs = cnv str
|
||||||
|
case lf bs of
|
||||||
len | len <= 31 -> do
|
len | len <= 31 -> do
|
||||||
putWord8 $ 0xA0 .|. fromIntegral len
|
putWord8 $ 0xA0 .|. fromIntegral len
|
||||||
len | len < 0x10000 -> do
|
len | len < 0x10000 -> do
|
||||||
@ -112,7 +123,7 @@ putString lf pf str = do
|
|||||||
len -> do
|
len -> do
|
||||||
putWord8 0xDB
|
putWord8 0xDB
|
||||||
putWord32be $ fromIntegral len
|
putWord32be $ fromIntegral len
|
||||||
pf str
|
pf bs
|
||||||
|
|
||||||
instance Packable a => Packable [a] where
|
instance Packable a => Packable [a] where
|
||||||
put = putArray length (mapM_ put)
|
put = putArray length (mapM_ put)
|
||||||
|
@ -36,8 +36,11 @@ import Data.Binary.Get
|
|||||||
import Data.Binary.IEEE754
|
import Data.Binary.IEEE754
|
||||||
import Data.Bits
|
import Data.Bits
|
||||||
import qualified Data.ByteString as B
|
import qualified Data.ByteString as B
|
||||||
import qualified Data.ByteString.Char8 as B8
|
import qualified Data.ByteString.Lazy as BL
|
||||||
import qualified Data.ByteString.Lazy as L
|
import qualified Data.Text as T
|
||||||
|
import qualified Data.Text.Encoding as T
|
||||||
|
import qualified Data.Text.Lazy as TL
|
||||||
|
import qualified Data.Text.Lazy.Encoding as TL
|
||||||
import Data.Int
|
import Data.Int
|
||||||
import Data.Typeable
|
import Data.Typeable
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
@ -45,6 +48,7 @@ import Data.Word
|
|||||||
import Text.Printf
|
import Text.Printf
|
||||||
|
|
||||||
import Data.MessagePack.Assoc
|
import Data.MessagePack.Assoc
|
||||||
|
import Data.MessagePack.Internal.Utf8
|
||||||
|
|
||||||
-- | Deserializable class
|
-- | Deserializable class
|
||||||
class Unpackable a where
|
class Unpackable a where
|
||||||
@ -57,8 +61,8 @@ class IsByteString s where
|
|||||||
instance IsByteString B.ByteString where
|
instance IsByteString B.ByteString where
|
||||||
toBS = id
|
toBS = id
|
||||||
|
|
||||||
instance IsByteString L.ByteString where
|
instance IsByteString BL.ByteString where
|
||||||
toBS = B.concat . L.toChunks
|
toBS = B.concat . BL.toChunks
|
||||||
|
|
||||||
-- | The exception of unpack
|
-- | The exception of unpack
|
||||||
data UnpackError =
|
data UnpackError =
|
||||||
@ -153,13 +157,19 @@ instance Unpackable Double where
|
|||||||
fail $ printf "invlid double tag: 0x%02X" c
|
fail $ printf "invlid double tag: 0x%02X" c
|
||||||
|
|
||||||
instance Unpackable String where
|
instance Unpackable String where
|
||||||
get = parseString (\n -> return . B8.unpack =<< A.take n)
|
get = parseString (\n -> return . decodeUtf8 =<< A.take n)
|
||||||
|
|
||||||
instance Unpackable B.ByteString where
|
instance Unpackable B.ByteString where
|
||||||
get = parseString A.take
|
get = parseString A.take
|
||||||
|
|
||||||
instance Unpackable L.ByteString where
|
instance Unpackable BL.ByteString where
|
||||||
get = parseString (\n -> do bs <- A.take n; return $ L.fromChunks [bs])
|
get = parseString (\n -> return . toLBS =<< A.take n)
|
||||||
|
|
||||||
|
instance Unpackable T.Text where
|
||||||
|
get = parseString (\n -> return . T.decodeUtf8With skipChar =<< A.take n)
|
||||||
|
|
||||||
|
instance Unpackable TL.Text where
|
||||||
|
get = parseString (\n -> return . TL.decodeUtf8With skipChar . toLBS =<< A.take n)
|
||||||
|
|
||||||
parseString :: (Int -> A.Parser a) -> A.Parser a
|
parseString :: (Int -> A.Parser a) -> A.Parser a
|
||||||
parseString aget = do
|
parseString aget = do
|
||||||
@ -311,6 +321,3 @@ parseInt32 = return . fromIntegral =<< parseUint32
|
|||||||
|
|
||||||
parseInt64 :: A.Parser Int64
|
parseInt64 :: A.Parser Int64
|
||||||
parseInt64 = return . fromIntegral =<< parseUint64
|
parseInt64 = return . fromIntegral =<< parseUint64
|
||||||
|
|
||||||
toLBS :: B.ByteString -> L.ByteString
|
|
||||||
toLBS bs = L.fromChunks [bs]
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user