mirror of
https://github.com/msgpack/msgpack-c.git
synced 2025-04-15 22:50:35 +02:00
fix encoding unicode strings, and Text support
This commit is contained in:
parent
5c5f16f148
commit
ace4f1e7f0
@ -23,6 +23,7 @@ Library
|
||||
Build-depends: base >=4 && <5,
|
||||
transformers >= 0.2 && < 0.3,
|
||||
bytestring >= 0.9 && < 0.10,
|
||||
text >= 0.11 && < 0.12,
|
||||
vector >= 0.7 && < 0.8,
|
||||
attoparsec >= 0.8 && < 0.9,
|
||||
binary >= 0.5.0 && < 0.5.1,
|
||||
@ -41,6 +42,9 @@ Library
|
||||
Data.MessagePack.Object
|
||||
Data.MessagePack.Derive
|
||||
|
||||
Other-modules:
|
||||
Data.MessagePack.Internal.Utf8
|
||||
|
||||
Source-repository head
|
||||
Type: git
|
||||
Location: git://github.com/msgpack/msgpack.git
|
||||
|
28
haskell/src/Data/MessagePack/Internal/Utf8.hs
Normal file
28
haskell/src/Data/MessagePack/Internal/Utf8.hs
Normal file
@ -0,0 +1,28 @@
|
||||
module Data.MessagePack.Internal.Utf8 (
|
||||
encodeUtf8,
|
||||
decodeUtf8,
|
||||
skipChar,
|
||||
toLBS,
|
||||
fromLBS,
|
||||
) where
|
||||
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.Encoding.Error as T
|
||||
|
||||
encodeUtf8 :: String -> B.ByteString
|
||||
encodeUtf8 = T.encodeUtf8 . T.pack
|
||||
|
||||
decodeUtf8 :: B.ByteString -> String
|
||||
decodeUtf8 = T.unpack . T.decodeUtf8With skipChar
|
||||
|
||||
skipChar :: T.OnDecodeError
|
||||
skipChar _ _ = Nothing
|
||||
|
||||
toLBS :: B.ByteString -> BL.ByteString
|
||||
toLBS bs = BL.fromChunks [bs]
|
||||
|
||||
fromLBS :: BL.ByteString -> B.ByteString
|
||||
fromLBS = B.concat . BL.toChunks
|
@ -32,12 +32,17 @@ import Control.Monad
|
||||
import Control.Monad.Trans.Error ()
|
||||
import qualified Data.Attoparsec as A
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Char8 as C8
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text.Lazy.Encoding as TL
|
||||
import Data.Typeable
|
||||
|
||||
import Data.MessagePack.Assoc
|
||||
import Data.MessagePack.Pack
|
||||
import Data.MessagePack.Unpack
|
||||
import Data.MessagePack.Internal.Utf8
|
||||
|
||||
-- | Object Representation of MessagePack data.
|
||||
data Object
|
||||
@ -149,14 +154,29 @@ instance OBJECT Float where
|
||||
tryFromObject (ObjectFloat f) = Right f
|
||||
tryFromObject _ = tryFromObjectError
|
||||
|
||||
instance OBJECT String where
|
||||
toObject = toObject . encodeUtf8
|
||||
tryFromObject obj = liftM decodeUtf8 $ tryFromObject obj
|
||||
|
||||
instance OBJECT B.ByteString where
|
||||
toObject = ObjectRAW
|
||||
tryFromObject (ObjectRAW bs) = Right bs
|
||||
tryFromObject _ = tryFromObjectError
|
||||
|
||||
instance OBJECT String where
|
||||
toObject = toObject . C8.pack
|
||||
tryFromObject obj = liftM C8.unpack $ tryFromObject obj
|
||||
instance OBJECT BL.ByteString where
|
||||
toObject = ObjectRAW . fromLBS
|
||||
tryFromObject (ObjectRAW bs) = Right $ toLBS bs
|
||||
tryFromObject _ = tryFromObjectError
|
||||
|
||||
instance OBJECT T.Text where
|
||||
toObject = ObjectRAW . T.encodeUtf8
|
||||
tryFromObject (ObjectRAW bs) = Right $ T.decodeUtf8With skipChar bs
|
||||
tryFromObject _ = tryFromObjectError
|
||||
|
||||
instance OBJECT TL.Text where
|
||||
toObject = ObjectRAW . fromLBS . TL.encodeUtf8
|
||||
tryFromObject (ObjectRAW bs) = Right $ TL.decodeUtf8With skipChar $ toLBS bs
|
||||
tryFromObject _ = tryFromObjectError
|
||||
|
||||
instance OBJECT a => OBJECT [a] where
|
||||
toObject = ObjectArray . map toObject
|
||||
|
@ -27,11 +27,15 @@ import Data.Binary.Put
|
||||
import Data.Binary.IEEE754
|
||||
import Data.Bits
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Char8 as B8
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text.Lazy.Encoding as TL
|
||||
import qualified Data.Vector as V
|
||||
|
||||
import Data.MessagePack.Assoc
|
||||
import Data.MessagePack.Internal.Utf8
|
||||
|
||||
-- | Serializable class
|
||||
class Packable a where
|
||||
@ -39,7 +43,7 @@ class Packable a where
|
||||
put :: a -> Put
|
||||
|
||||
-- | Pack Haskell data to MessagePack string.
|
||||
pack :: Packable a => a -> L.ByteString
|
||||
pack :: Packable a => a -> BL.ByteString
|
||||
pack = runPut . put
|
||||
|
||||
instance Packable Int where
|
||||
@ -93,17 +97,24 @@ instance Packable Double where
|
||||
putFloat64be d
|
||||
|
||||
instance Packable String where
|
||||
put = putString length (putByteString . B8.pack)
|
||||
put = putString encodeUtf8 B.length putByteString
|
||||
|
||||
instance Packable B.ByteString where
|
||||
put = putString B.length putByteString
|
||||
put = putString id B.length putByteString
|
||||
|
||||
instance Packable L.ByteString where
|
||||
put = putString (fromIntegral . L.length) putLazyByteString
|
||||
instance Packable BL.ByteString where
|
||||
put = putString id (fromIntegral . BL.length) putLazyByteString
|
||||
|
||||
putString :: (s -> Int) -> (s -> Put) -> s -> Put
|
||||
putString lf pf str = do
|
||||
case lf str of
|
||||
instance Packable T.Text where
|
||||
put = putString T.encodeUtf8 B.length putByteString
|
||||
|
||||
instance Packable TL.Text where
|
||||
put = putString TL.encodeUtf8 (fromIntegral . BL.length) putLazyByteString
|
||||
|
||||
putString :: (s -> t) -> (t -> Int) -> (t -> Put) -> s -> Put
|
||||
putString cnv lf pf str = do
|
||||
let bs = cnv str
|
||||
case lf bs of
|
||||
len | len <= 31 -> do
|
||||
putWord8 $ 0xA0 .|. fromIntegral len
|
||||
len | len < 0x10000 -> do
|
||||
@ -112,7 +123,7 @@ putString lf pf str = do
|
||||
len -> do
|
||||
putWord8 0xDB
|
||||
putWord32be $ fromIntegral len
|
||||
pf str
|
||||
pf bs
|
||||
|
||||
instance Packable a => Packable [a] where
|
||||
put = putArray length (mapM_ put)
|
||||
|
@ -36,8 +36,11 @@ import Data.Binary.Get
|
||||
import Data.Binary.IEEE754
|
||||
import Data.Bits
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Char8 as B8
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as T
|
||||
import qualified Data.Text.Lazy as TL
|
||||
import qualified Data.Text.Lazy.Encoding as TL
|
||||
import Data.Int
|
||||
import Data.Typeable
|
||||
import qualified Data.Vector as V
|
||||
@ -45,6 +48,7 @@ import Data.Word
|
||||
import Text.Printf
|
||||
|
||||
import Data.MessagePack.Assoc
|
||||
import Data.MessagePack.Internal.Utf8
|
||||
|
||||
-- | Deserializable class
|
||||
class Unpackable a where
|
||||
@ -57,8 +61,8 @@ class IsByteString s where
|
||||
instance IsByteString B.ByteString where
|
||||
toBS = id
|
||||
|
||||
instance IsByteString L.ByteString where
|
||||
toBS = B.concat . L.toChunks
|
||||
instance IsByteString BL.ByteString where
|
||||
toBS = B.concat . BL.toChunks
|
||||
|
||||
-- | The exception of unpack
|
||||
data UnpackError =
|
||||
@ -153,13 +157,19 @@ instance Unpackable Double where
|
||||
fail $ printf "invlid double tag: 0x%02X" c
|
||||
|
||||
instance Unpackable String where
|
||||
get = parseString (\n -> return . B8.unpack =<< A.take n)
|
||||
get = parseString (\n -> return . decodeUtf8 =<< A.take n)
|
||||
|
||||
instance Unpackable B.ByteString where
|
||||
get = parseString A.take
|
||||
|
||||
instance Unpackable L.ByteString where
|
||||
get = parseString (\n -> do bs <- A.take n; return $ L.fromChunks [bs])
|
||||
instance Unpackable BL.ByteString where
|
||||
get = parseString (\n -> return . toLBS =<< A.take n)
|
||||
|
||||
instance Unpackable T.Text where
|
||||
get = parseString (\n -> return . T.decodeUtf8With skipChar =<< A.take n)
|
||||
|
||||
instance Unpackable TL.Text where
|
||||
get = parseString (\n -> return . TL.decodeUtf8With skipChar . toLBS =<< A.take n)
|
||||
|
||||
parseString :: (Int -> A.Parser a) -> A.Parser a
|
||||
parseString aget = do
|
||||
@ -311,6 +321,3 @@ parseInt32 = return . fromIntegral =<< parseUint32
|
||||
|
||||
parseInt64 :: A.Parser Int64
|
||||
parseInt64 = return . fromIntegral =<< parseUint64
|
||||
|
||||
toLBS :: B.ByteString -> L.ByteString
|
||||
toLBS bs = L.fromChunks [bs]
|
||||
|
Loading…
x
Reference in New Issue
Block a user