Initial commit

This commit is contained in:
La Ancapo 2026-01-25 02:27:22 +01:00
commit c101616e62
309 changed files with 53937 additions and 0 deletions

View file

@ -0,0 +1,847 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE BangPatterns #-}
-----------------------------------------------------------------------------
-- |
-- Module : Data.Serialize.Get
-- Copyright : Lennart Kolmodin, Galois Inc. 2009
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Trevor Elliott <trevor@galois.com>
-- Stability :
-- Portability :
--
-- The Get monad. A monad for efficiently building structures from
-- strict ByteStrings
--
-----------------------------------------------------------------------------
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
#include "MachDeps.h"
#endif
module Data.Serialize.Get (
-- * The Get type
Get
, runGet
, runGetLazy
, runGetState
, runGetLazyState
-- ** Incremental interface
, Result(..)
, runGetPartial
, runGetChunk
-- * Parsing
, ensure
, isolate
, label
, skip
, uncheckedSkip
, lookAhead
, lookAheadM
, lookAheadE
, uncheckedLookAhead
, bytesRead
-- * Utility
, getBytes
, remaining
, isEmpty
-- * Parsing particular types
, getWord8
, getInt8
-- ** ByteStrings
, getByteString
, getLazyByteString
, getShortByteString
-- ** Big-endian reads
, getWord16be
, getWord32be
, getWord64be
, getInt16be
, getInt32be
, getInt64be
-- ** Little-endian reads
, getWord16le
, getWord32le
, getWord64le
, getInt16le
, getInt32le
, getInt64le
-- ** Host-endian, unaligned reads
, getWordhost
, getWord16host
, getWord32host
, getWord64host
-- ** Containers
, getTwoOf
, getListOf
, getIArrayOf
, getTreeOf
, getSeqOf
, getMapOf
, getIntMapOf
, getSetOf
, getIntSetOf
, getMaybeOf
, getEitherOf
, getNested
) where
import qualified Control.Applicative as A
import qualified Control.Monad as M
import Control.Monad (unless)
import qualified Control.Monad.Fail as Fail
import Data.Array.IArray (IArray,listArray)
import Data.Ix (Ix)
import Data.List (intercalate)
import Data.Maybe (isNothing,fromMaybe)
import Foreign
import System.IO.Unsafe (unsafeDupablePerformIO)
import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as B
import qualified Data.ByteString.Unsafe as B
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Short as BS
import qualified Data.IntMap as IntMap
import qualified Data.IntSet as IntSet
import qualified Data.Map as Map
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import qualified Data.Tree as T
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
import GHC.Base
import GHC.Word
#endif
-- | The result of a parse.
data Result r = Fail String B.ByteString
-- ^ The parse failed. The 'String' is the
-- message describing the error, if any.
| Partial (B.ByteString -> Result r)
-- ^ Supply this continuation with more input so that
-- the parser can resume. To indicate that no more
-- input is available, use an 'B.empty' string.
| Done r B.ByteString
-- ^ The parse succeeded. The 'B.ByteString' is the
-- input that had not yet been consumed (if any) when
-- the parse succeeded.
instance Show r => Show (Result r) where
show (Fail msg _) = "Fail " ++ show msg
show (Partial _) = "Partial _"
show (Done r bs) = "Done " ++ show r ++ " " ++ show bs
instance Functor Result where
fmap _ (Fail msg rest) = Fail msg rest
fmap f (Partial k) = Partial (fmap f . k)
fmap f (Done r bs) = Done (f r) bs
-- | The Get monad is an Exception and State monad.
newtype Get a = Get
{ unGet :: forall r. Input -> Buffer -> More
-> Int -> Failure r
-> Success a r -> Result r }
type Input = B.ByteString
type Buffer = Maybe B.ByteString
emptyBuffer :: Buffer
emptyBuffer = Just B.empty
extendBuffer :: Buffer -> B.ByteString -> Buffer
extendBuffer buf chunk =
do bs <- buf
return $! bs `B.append` chunk
{-# INLINE extendBuffer #-}
append :: Buffer -> Buffer -> Buffer
append l r = B.append `fmap` l A.<*> r
{-# INLINE append #-}
bufferBytes :: Buffer -> B.ByteString
bufferBytes = fromMaybe B.empty
{-# INLINE bufferBytes #-}
type Failure r = Input -> Buffer -> More -> [String] -> String -> Result r
type Success a r = Input -> Buffer -> More -> Int -> a -> Result r
-- | Have we read all available input?
data More
= Complete
| Incomplete (Maybe Int)
deriving (Eq)
moreLength :: More -> Int
moreLength m = case m of
Complete -> 0
Incomplete mb -> fromMaybe 0 mb
instance Functor Get where
fmap p m = Get $ \ s0 b0 m0 w0 kf ks ->
unGet m s0 b0 m0 w0 kf $ \ s1 b1 m1 w1 a -> ks s1 b1 m1 w1 (p a)
instance A.Applicative Get where
pure a = Get $ \ s0 b0 m0 w _ ks -> ks s0 b0 m0 w a
{-# INLINE pure #-}
f <*> x = Get $ \ s0 b0 m0 w0 kf ks ->
unGet f s0 b0 m0 w0 kf $ \ s1 b1 m1 w1 g ->
unGet x s1 b1 m1 w1 kf $ \ s2 b2 m2 w2 y -> ks s2 b2 m2 w2 (g y)
{-# INLINE (<*>) #-}
m *> k = Get $ \ s0 b0 m0 w0 kf ks ->
unGet m s0 b0 m0 w0 kf $ \ s1 b1 m1 w1 _ -> unGet k s1 b1 m1 w1 kf ks
{-# INLINE (*>) #-}
instance A.Alternative Get where
empty = failDesc "empty"
{-# INLINE empty #-}
(<|>) = M.mplus
{-# INLINE (<|>) #-}
-- Definition directly from Control.Monad.State.Strict
instance Monad Get where
return = A.pure
{-# INLINE return #-}
m >>= g = Get $ \ s0 b0 m0 w0 kf ks ->
unGet m s0 b0 m0 w0 kf $ \ s1 b1 m1 w1 a -> unGet (g a) s1 b1 m1 w1 kf ks
{-# INLINE (>>=) #-}
(>>) = (A.*>)
{-# INLINE (>>) #-}
#if !(MIN_VERSION_base(4,13,0))
fail = Fail.fail
{-# INLINE fail #-}
#endif
instance Fail.MonadFail Get where
fail = failDesc
{-# INLINE fail #-}
instance M.MonadPlus Get where
mzero = failDesc "mzero"
{-# INLINE mzero #-}
-- TODO: Test this!
mplus a b =
Get $ \s0 b0 m0 w0 kf ks ->
let ks' s1 b1 = ks s1 (b0 `append` b1)
kf' _ b1 m1 = kf (s0 `B.append` bufferBytes b1)
(b0 `append` b1) m1
try _ b1 m1 _ _ = unGet b (s0 `B.append` bufferBytes b1)
b1 m1 w0 kf' ks'
in unGet a s0 emptyBuffer m0 w0 try ks'
{-# INLINE mplus #-}
------------------------------------------------------------------------
formatTrace :: [String] -> String
formatTrace [] = "Empty call stack"
formatTrace ls = "From:\t" ++ intercalate "\n\t" ls ++ "\n"
get :: Get B.ByteString
get = Get (\s0 b0 m0 w _ k -> k s0 b0 m0 w s0)
{-# INLINE get #-}
put :: B.ByteString -> Int -> Get ()
put s !w = Get (\_ b0 m _ _ k -> k s b0 m w ())
{-# INLINE put #-}
label :: String -> Get a -> Get a
label l m =
Get $ \ s0 b0 m0 w0 kf ks ->
let kf' s1 b1 m1 ls = kf s1 b1 m1 (l:ls)
in unGet m s0 b0 m0 w0 kf' ks
finalK :: Success a a
finalK s _ _ _ a = Done a s
failK :: Failure a
failK s b _ ls msg =
Fail (unlines [msg, formatTrace ls]) (s `B.append` bufferBytes b)
-- | Run the Get monad applies a 'get'-based parser on the input ByteString
runGet :: Get a -> B.ByteString -> Either String a
runGet m str =
case unGet m str Nothing Complete 0 failK finalK of
Fail i _ -> Left i
Done a _ -> Right a
Partial{} -> Left "Failed reading: Internal error: unexpected Partial."
{-# INLINE runGet #-}
-- | Run the get monad on a single chunk, providing an optional length for the
-- remaining, unseen input, with Nothing indicating that it's not clear how much
-- input is left. For example, with a lazy ByteString, the optional length
-- represents the sum of the lengths of all remaining chunks.
runGetChunk :: Get a -> Maybe Int -> B.ByteString -> Result a
runGetChunk m mbLen str = unGet m str Nothing (Incomplete mbLen) 0 failK finalK
{-# INLINE runGetChunk #-}
-- | Run the Get monad applies a 'get'-based parser on the input ByteString
runGetPartial :: Get a -> B.ByteString -> Result a
runGetPartial m = runGetChunk m Nothing
{-# INLINE runGetPartial #-}
-- | Run the Get monad applies a 'get'-based parser on the input
-- ByteString, starting at the specified offset. In addition to the result of get
-- it returns the rest of the input.
runGetState :: Get a -> B.ByteString -> Int
-> Either String (a, B.ByteString)
runGetState m str off = case runGetState' m str off of
(Right a,bs) -> Right (a,bs)
(Left i,_) -> Left i
{-# INLINE runGetState #-}
-- | Run the Get monad applies a 'get'-based parser on the input
-- ByteString, starting at the specified offset. In addition to the result of get
-- it returns the rest of the input, even in the event of a failure.
runGetState' :: Get a -> B.ByteString -> Int
-> (Either String a, B.ByteString)
runGetState' m str off =
case unGet m (B.drop off str) Nothing Complete 0 failK finalK of
Fail i bs -> (Left i,bs)
Done a bs -> (Right a, bs)
Partial{} -> (Left "Failed reading: Internal error: unexpected Partial.",B.empty)
{-# INLINE runGetState' #-}
-- Lazy Get --------------------------------------------------------------------
runGetLazy' :: Get a -> L.ByteString -> (Either String a,L.ByteString)
runGetLazy' m lstr =
case L.toChunks lstr of
[c] -> wrapStrict (runGetState' m c 0)
[] -> wrapStrict (runGetState' m B.empty 0)
c:cs -> loop (runGetChunk m (Just (len - B.length c)) c) cs
where
len = fromIntegral (L.length lstr)
wrapStrict (e,s) = (e,L.fromChunks [s])
loop result chunks = case result of
Fail str rest -> (Left str, L.fromChunks (rest : chunks))
Partial k -> case chunks of
c:cs -> loop (k c) cs
[] -> loop (k B.empty) []
Done r rest -> (Right r, L.fromChunks (rest : chunks))
{-# INLINE runGetLazy' #-}
-- | Run the Get monad over a Lazy ByteString. Note that this will not run the
-- Get parser lazily, but will operate on lazy ByteStrings.
runGetLazy :: Get a -> L.ByteString -> Either String a
runGetLazy m lstr = fst (runGetLazy' m lstr)
{-# INLINE runGetLazy #-}
-- | Run the Get monad over a Lazy ByteString. Note that this does not run the
-- Get parser lazily, but will operate on lazy ByteStrings.
runGetLazyState :: Get a -> L.ByteString -> Either String (a,L.ByteString)
runGetLazyState m lstr = case runGetLazy' m lstr of
(Right a,rest) -> Right (a,rest)
(Left err,_) -> Left err
{-# INLINE runGetLazyState #-}
------------------------------------------------------------------------
-- | If at least @n@ bytes of input are available, return the current
-- input, otherwise fail.
{-# INLINE ensure #-}
ensure :: Int -> Get B.ByteString
ensure n0 = n0 `seq` Get $ \ s0 b0 m0 w0 kf ks -> let
n' = n0 - B.length s0
in if n' <= 0
then ks s0 b0 m0 w0 s0
else getMore n' s0 [] b0 m0 w0 kf ks
where
-- The "accumulate and concat" pattern here is important not to incur
-- in quadratic behavior, see <https://github.com/GaloisInc/cereal/issues/48>
finalInput s0 ss = B.concat (reverse (s0 : ss))
finalBuffer b0 s0 ss = extendBuffer b0 (B.concat (reverse (init (s0 : ss))))
getMore !n s0 ss b0 m0 w0 kf ks = let
tooFewBytes = let
!s = finalInput s0 ss
!b = finalBuffer b0 s0 ss
in kf s b m0 ["demandInput"] "too few bytes"
in case m0 of
Complete -> tooFewBytes
Incomplete mb -> Partial $ \s ->
if B.null s
then tooFewBytes
else let
!mb' = case mb of
Just l -> Just $! l - B.length s
Nothing -> Nothing
in checkIfEnough n s (s0 : ss) b0 (Incomplete mb') w0 kf ks
checkIfEnough !n s0 ss b0 m0 w0 kf ks = let
n' = n - B.length s0
in if n' <= 0
then let
!s = finalInput s0 ss
!b = finalBuffer b0 s0 ss
in ks s b m0 w0 s
else getMore n' s0 ss b0 m0 w0 kf ks
-- | Isolate an action to operating within a fixed block of bytes. The action
-- is required to consume all the bytes that it is isolated to.
isolate :: Int -> Get a -> Get a
isolate n m = do
M.when (n < 0) (fail "Attempted to isolate a negative number of bytes")
s <- ensure n
let (s',rest) = B.splitAt n s
cur <- bytesRead
put s' cur
a <- m
used <- get
unless (B.null used) (fail "not all bytes parsed in isolate")
put rest (cur + n)
return a
failDesc :: String -> Get a
failDesc err = do
let msg = "Failed reading: " ++ err
Get (\s0 b0 m0 _ kf _ -> kf s0 b0 m0 [] msg)
-- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available.
skip :: Int -> Get ()
skip n = do
s <- ensure n
cur <- bytesRead
put (B.drop n s) (cur + n)
-- | Skip ahead up to @n@ bytes in the current chunk. No error if there aren't
-- enough bytes, or if less than @n@ bytes are skipped.
uncheckedSkip :: Int -> Get ()
uncheckedSkip n = do
s <- get
cur <- bytesRead
put (B.drop n s) (cur + n)
-- | Run @ga@, but return without consuming its input.
-- Fails if @ga@ fails.
lookAhead :: Get a -> Get a
lookAhead ga = Get $ \ s0 b0 m0 w0 kf ks ->
-- the new continuation extends the old input with the new buffered bytes, and
-- appends the new buffer to the old one, if there was one.
let ks' _ b1 = ks (s0 `B.append` bufferBytes b1) (b0 `append` b1)
kf' _ b1 = kf s0 (b0 `append` b1)
in unGet ga s0 emptyBuffer m0 w0 kf' ks'
-- | Like 'lookAhead', but consume the input if @gma@ returns 'Just _'.
-- Fails if @gma@ fails.
lookAheadM :: Get (Maybe a) -> Get (Maybe a)
lookAheadM gma = do
s <- get
pre <- bytesRead
ma <- gma
M.when (isNothing ma) (put s pre)
return ma
-- | Like 'lookAhead', but consume the input if @gea@ returns 'Right _'.
-- Fails if @gea@ fails.
lookAheadE :: Get (Either a b) -> Get (Either a b)
lookAheadE gea = do
s <- get
pre <- bytesRead
ea <- gea
case ea of
Left _ -> put s pre
_ -> return ()
return ea
-- | Get the next up to @n@ bytes as a ByteString until end of this chunk,
-- without consuming them.
uncheckedLookAhead :: Int -> Get B.ByteString
uncheckedLookAhead n = do
s <- get
return (B.take n s)
------------------------------------------------------------------------
-- Utility
-- | Get the number of remaining unparsed bytes. Useful for checking whether
-- all input has been consumed.
--
-- WARNING: when run with @runGetPartial@, remaining will only return the number
-- of bytes that are remaining in the current input.
remaining :: Get Int
remaining = Get (\ s0 b0 m0 w0 _ ks -> ks s0 b0 m0 w0 (B.length s0 + moreLength m0))
-- | Test whether all input has been consumed.
--
-- WARNING: when run with @runGetPartial@, isEmpty will only tell you if you're
-- at the end of the current chunk.
isEmpty :: Get Bool
isEmpty = Get (\ s0 b0 m0 w0 _ ks -> ks s0 b0 m0 w0 (B.null s0 && moreLength m0 == 0))
------------------------------------------------------------------------
-- Utility with ByteStrings
-- | An efficient 'get' method for strict ByteStrings. Fails if fewer
-- than @n@ bytes are left in the input. This function creates a fresh
-- copy of the underlying bytes.
getByteString :: Int -> Get B.ByteString
getByteString n = do
bs <- getBytes n
return $! B.copy bs
getLazyByteString :: Int64 -> Get L.ByteString
getLazyByteString n = f `fmap` getByteString (fromIntegral n)
where f bs = L.fromChunks [bs]
getShortByteString :: Int -> Get BS.ShortByteString
getShortByteString n = do
bs <- getBytes n
return $! BS.toShort bs
------------------------------------------------------------------------
-- Helpers
-- | Pull @n@ bytes from the input, as a strict ByteString.
getBytes :: Int -> Get B.ByteString
getBytes n | n < 0 = fail "getBytes: negative length requested"
getBytes n = do
s <- ensure n
let consume = B.unsafeTake n s
rest = B.unsafeDrop n s
-- (consume,rest) = B.splitAt n s
cur <- bytesRead
put rest (cur + n)
return consume
{-# INLINE getBytes #-}
------------------------------------------------------------------------
-- Primtives
-- helper, get a raw Ptr onto a strict ByteString copied out of the
-- underlying strict byteString.
getPtr :: Storable a => Int -> Get a
getPtr n = do
(fp,o,_) <- B.toForeignPtr `fmap` getBytes n
let k p = peek (castPtr (p `plusPtr` o))
return (unsafeDupablePerformIO (withForeignPtr fp k))
{-# INLINE getPtr #-}
-----------------------------------------------------------------------
-- | Read a Int8 from the monad state
getInt8 :: Get Int8
getInt8 = do
s <- getBytes 1
return $! fromIntegral (B.unsafeHead s)
-- | Read a Int16 in big endian format
getInt16be :: Get Int16
getInt16be = do
s <- getBytes 2
return $! (fromIntegral (s `B.unsafeIndex` 0) `shiftL` 8) .|.
(fromIntegral (s `B.unsafeIndex` 1) )
-- | Read a Int16 in little endian format
getInt16le :: Get Int16
getInt16le = do
s <- getBytes 2
return $! (fromIntegral (s `B.unsafeIndex` 1) `shiftL` 8) .|.
(fromIntegral (s `B.unsafeIndex` 0) )
-- | Read a Int32 in big endian format
getInt32be :: Get Int32
getInt32be = do
s <- getBytes 4
return $! (fromIntegral (s `B.unsafeIndex` 0) `shiftL` 24) .|.
(fromIntegral (s `B.unsafeIndex` 1) `shiftL` 16) .|.
(fromIntegral (s `B.unsafeIndex` 2) `shiftL` 8) .|.
(fromIntegral (s `B.unsafeIndex` 3) )
-- | Read a Int32 in little endian format
getInt32le :: Get Int32
getInt32le = do
s <- getBytes 4
return $! (fromIntegral (s `B.unsafeIndex` 3) `shiftL` 24) .|.
(fromIntegral (s `B.unsafeIndex` 2) `shiftL` 16) .|.
(fromIntegral (s `B.unsafeIndex` 1) `shiftL` 8) .|.
(fromIntegral (s `B.unsafeIndex` 0) )
-- | Read a Int64 in big endian format
getInt64be :: Get Int64
getInt64be = do
s <- getBytes 8
return $! (fromIntegral (s `B.unsafeIndex` 0) `shiftL` 56) .|.
(fromIntegral (s `B.unsafeIndex` 1) `shiftL` 48) .|.
(fromIntegral (s `B.unsafeIndex` 2) `shiftL` 40) .|.
(fromIntegral (s `B.unsafeIndex` 3) `shiftL` 32) .|.
(fromIntegral (s `B.unsafeIndex` 4) `shiftL` 24) .|.
(fromIntegral (s `B.unsafeIndex` 5) `shiftL` 16) .|.
(fromIntegral (s `B.unsafeIndex` 6) `shiftL` 8) .|.
(fromIntegral (s `B.unsafeIndex` 7) )
-- | Read a Int64 in little endian format
getInt64le :: Get Int64
getInt64le = do
s <- getBytes 8
return $! (fromIntegral (s `B.unsafeIndex` 7) `shiftL` 56) .|.
(fromIntegral (s `B.unsafeIndex` 6) `shiftL` 48) .|.
(fromIntegral (s `B.unsafeIndex` 5) `shiftL` 40) .|.
(fromIntegral (s `B.unsafeIndex` 4) `shiftL` 32) .|.
(fromIntegral (s `B.unsafeIndex` 3) `shiftL` 24) .|.
(fromIntegral (s `B.unsafeIndex` 2) `shiftL` 16) .|.
(fromIntegral (s `B.unsafeIndex` 1) `shiftL` 8) .|.
(fromIntegral (s `B.unsafeIndex` 0) )
{-# INLINE getInt8 #-}
{-# INLINE getInt16be #-}
{-# INLINE getInt16le #-}
{-# INLINE getInt32be #-}
{-# INLINE getInt32le #-}
{-# INLINE getInt64be #-}
{-# INLINE getInt64le #-}
------------------------------------------------------------------------
-- | Read a Word8 from the monad state
getWord8 :: Get Word8
getWord8 = do
s <- getBytes 1
return (B.unsafeHead s)
-- | Read a Word16 in big endian format
getWord16be :: Get Word16
getWord16be = do
s <- getBytes 2
return $! (fromIntegral (s `B.unsafeIndex` 0) `shiftl_w16` 8) .|.
(fromIntegral (s `B.unsafeIndex` 1))
-- | Read a Word16 in little endian format
getWord16le :: Get Word16
getWord16le = do
s <- getBytes 2
return $! (fromIntegral (s `B.unsafeIndex` 1) `shiftl_w16` 8) .|.
(fromIntegral (s `B.unsafeIndex` 0) )
-- | Read a Word32 in big endian format
getWord32be :: Get Word32
getWord32be = do
s <- getBytes 4
return $! (fromIntegral (s `B.unsafeIndex` 0) `shiftl_w32` 24) .|.
(fromIntegral (s `B.unsafeIndex` 1) `shiftl_w32` 16) .|.
(fromIntegral (s `B.unsafeIndex` 2) `shiftl_w32` 8) .|.
(fromIntegral (s `B.unsafeIndex` 3) )
-- | Read a Word32 in little endian format
getWord32le :: Get Word32
getWord32le = do
s <- getBytes 4
return $! (fromIntegral (s `B.unsafeIndex` 3) `shiftl_w32` 24) .|.
(fromIntegral (s `B.unsafeIndex` 2) `shiftl_w32` 16) .|.
(fromIntegral (s `B.unsafeIndex` 1) `shiftl_w32` 8) .|.
(fromIntegral (s `B.unsafeIndex` 0) )
-- | Read a Word64 in big endian format
getWord64be :: Get Word64
getWord64be = do
s <- getBytes 8
return $! (fromIntegral (s `B.unsafeIndex` 0) `shiftl_w64` 56) .|.
(fromIntegral (s `B.unsafeIndex` 1) `shiftl_w64` 48) .|.
(fromIntegral (s `B.unsafeIndex` 2) `shiftl_w64` 40) .|.
(fromIntegral (s `B.unsafeIndex` 3) `shiftl_w64` 32) .|.
(fromIntegral (s `B.unsafeIndex` 4) `shiftl_w64` 24) .|.
(fromIntegral (s `B.unsafeIndex` 5) `shiftl_w64` 16) .|.
(fromIntegral (s `B.unsafeIndex` 6) `shiftl_w64` 8) .|.
(fromIntegral (s `B.unsafeIndex` 7) )
-- | Read a Word64 in little endian format
getWord64le :: Get Word64
getWord64le = do
s <- getBytes 8
return $! (fromIntegral (s `B.unsafeIndex` 7) `shiftl_w64` 56) .|.
(fromIntegral (s `B.unsafeIndex` 6) `shiftl_w64` 48) .|.
(fromIntegral (s `B.unsafeIndex` 5) `shiftl_w64` 40) .|.
(fromIntegral (s `B.unsafeIndex` 4) `shiftl_w64` 32) .|.
(fromIntegral (s `B.unsafeIndex` 3) `shiftl_w64` 24) .|.
(fromIntegral (s `B.unsafeIndex` 2) `shiftl_w64` 16) .|.
(fromIntegral (s `B.unsafeIndex` 1) `shiftl_w64` 8) .|.
(fromIntegral (s `B.unsafeIndex` 0) )
{-# INLINE getWord8 #-}
{-# INLINE getWord16be #-}
{-# INLINE getWord16le #-}
{-# INLINE getWord32be #-}
{-# INLINE getWord32le #-}
{-# INLINE getWord64be #-}
{-# INLINE getWord64le #-}
------------------------------------------------------------------------
-- Host-endian reads
-- | /O(1)./ Read a single native machine word. The word is read in
-- host order, host endian form, for the machine you're on. On a 64 bit
-- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes.
getWordhost :: Get Word
getWordhost = getPtr (sizeOf (undefined :: Word))
-- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness.
getWord16host :: Get Word16
getWord16host = getPtr (sizeOf (undefined :: Word16))
-- | /O(1)./ Read a Word32 in native host order and host endianness.
getWord32host :: Get Word32
getWord32host = getPtr (sizeOf (undefined :: Word32))
-- | /O(1)./ Read a Word64 in native host order and host endianness.
getWord64host :: Get Word64
getWord64host = getPtr (sizeOf (undefined :: Word64))
------------------------------------------------------------------------
-- Unchecked shifts
shiftl_w16 :: Word16 -> Int -> Word16
shiftl_w32 :: Word32 -> Int -> Word32
shiftl_w64 :: Word64 -> Int -> Word64
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
#if MIN_VERSION_base(4,16,0)
shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftLWord16#` i)
shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftLWord32#` i)
#else
shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#` i)
shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#` i)
#endif
#if WORD_SIZE_IN_BITS < 64
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
#if __GLASGOW_HASKELL__ <= 606
-- Exported by GHC.Word in GHC 6.8 and higher
foreign import ccall unsafe "stg_uncheckedShiftL64"
uncheckedShiftL64# :: Word64# -> Int# -> Word64#
#endif
#else
#if MIN_VERSION_base(4,17,0)
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
#else
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i)
#endif
#endif
#else
shiftl_w16 = shiftL
shiftl_w32 = shiftL
shiftl_w64 = shiftL
#endif
-- Containers ------------------------------------------------------------------
getTwoOf :: Get a -> Get b -> Get (a,b)
getTwoOf ma mb = M.liftM2 (,) ma mb
-- | Get a list in the following format:
-- Word64 (big endian format)
-- element 1
-- ...
-- element n
getListOf :: Get a -> Get [a]
getListOf m = go [] =<< getWord64be
where
go as 0 = return $! reverse as
go as i = do x <- m
x `seq` go (x:as) (i - 1)
-- | Get an IArray in the following format:
-- index (lower bound)
-- index (upper bound)
-- Word64 (big endian format)
-- element 1
-- ...
-- element n
getIArrayOf :: (Ix i, IArray a e) => Get i -> Get e -> Get (a i e)
getIArrayOf ix e = M.liftM2 listArray (getTwoOf ix ix) (getListOf e)
-- | Get a sequence in the following format:
-- Word64 (big endian format)
-- element 1
-- ...
-- element n
getSeqOf :: Get a -> Get (Seq.Seq a)
getSeqOf m = go Seq.empty =<< getWord64be
where
go xs 0 = return $! xs
go xs n = xs `seq` n `seq` do
x <- m
go (xs Seq.|> x) (n - 1)
-- | Read as a list of lists.
getTreeOf :: Get a -> Get (T.Tree a)
getTreeOf m = M.liftM2 T.Node m (getListOf (getTreeOf m))
-- | Read as a list of pairs of key and element.
getMapOf :: Ord k => Get k -> Get a -> Get (Map.Map k a)
getMapOf k m = Map.fromList `fmap` getListOf (getTwoOf k m)
-- | Read as a list of pairs of int and element.
getIntMapOf :: Get Int -> Get a -> Get (IntMap.IntMap a)
getIntMapOf i m = IntMap.fromList `fmap` getListOf (getTwoOf i m)
-- | Read as a list of elements.
getSetOf :: Ord a => Get a -> Get (Set.Set a)
getSetOf m = Set.fromList `fmap` getListOf m
-- | Read as a list of ints.
getIntSetOf :: Get Int -> Get IntSet.IntSet
getIntSetOf m = IntSet.fromList `fmap` getListOf m
-- | Read in a Maybe in the following format:
-- Word8 (0 for Nothing, anything else for Just)
-- element (when Just)
getMaybeOf :: Get a -> Get (Maybe a)
getMaybeOf m = do
tag <- getWord8
case tag of
0 -> return Nothing
_ -> Just `fmap` m
-- | Read an Either, in the following format:
-- Word8 (0 for Left, anything else for Right)
-- element a when 0, element b otherwise
getEitherOf :: Get a -> Get b -> Get (Either a b)
getEitherOf ma mb = do
tag <- getWord8
case tag of
0 -> Left `fmap` ma
_ -> Right `fmap` mb
-- | Read in a length and then read a nested structure
-- of that length.
getNested :: Get Int -> Get a -> Get a
getNested getLen getVal = do
n <- getLen
isolate n getVal
-- | Get the number of bytes read up to this point
bytesRead :: Get Int
bytesRead = Get (\i b m w _ k -> k i b m w w)

View file

@ -0,0 +1,84 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE ScopedTypeVariables #-}
#ifndef MIN_VERSION_base
#define MIN_VERSION_base(x,y,z) 1
#endif
-- | IEEE-754 parsing, as described in this stack-overflow article:
--
-- <http://stackoverflow.com/questions/6976684/converting-ieee-754-floating-point-in-haskell-word32-64-to-and-from-haskell-float/7002812#7002812>
module Data.Serialize.IEEE754 (
-- * IEEE-754 reads
getFloat32le
, getFloat32be
, getFloat64le
, getFloat64be
-- * IEEE-754 writes
, putFloat32le
, putFloat32be
, putFloat64le
, putFloat64be
) where
import Data.Word ( Word32, Word64 )
import Data.Serialize.Get
import Data.Serialize.Put
import qualified Data.ByteString.Builder as Builder
import System.IO.Unsafe (unsafeDupablePerformIO)
import Foreign.Marshal.Alloc (alloca)
import Foreign.Storable (peek, poke)
import Foreign.Ptr (castPtr, Ptr)
#if !(MIN_VERSION_base(4,8,0))
import Control.Applicative ( (<$>) )
#endif
-- | Read a Float in little endian IEEE-754 format
getFloat32le :: Get Float
getFloat32le = wordToFloat <$> getWord32le
-- | Read a Float in big endian IEEE-754 format
getFloat32be :: Get Float
getFloat32be = wordToFloat <$> getWord32be
-- | Read a Double in little endian IEEE-754 format
getFloat64le :: Get Double
getFloat64le = wordToDouble <$> getWord64le
-- | Read a Double in big endian IEEE-754 format
getFloat64be :: Get Double
getFloat64be = wordToDouble <$> getWord64be
-- | Write a Float in little endian IEEE-754 format
putFloat32le :: Float -> Put
putFloat32le = putBuilder . Builder.floatLE
-- | Write a Float in big endian IEEE-754 format
putFloat32be :: Float -> Put
putFloat32be = putBuilder . Builder.floatBE
-- | Write a Double in little endian IEEE-754 format
putFloat64le :: Double -> Put
putFloat64le = putBuilder . Builder.doubleLE
-- | Write a Double in big endian IEEE-754 format
putFloat64be :: Double -> Put
putFloat64be = putBuilder . Builder.doubleBE
{-# INLINE wordToFloat #-}
wordToFloat :: Word32 -> Float
wordToFloat w = unsafeDupablePerformIO $ alloca $ \(ptr :: Ptr Word32) -> do
poke ptr w
peek (castPtr ptr)
{-# INLINE wordToDouble #-}
wordToDouble :: Word64 -> Double
wordToDouble w = unsafeDupablePerformIO $ alloca $ \(ptr :: Ptr Word64) -> do
poke ptr w
peek (castPtr ptr)

View file

@ -0,0 +1,484 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleInstances #-}
#ifndef MIN_VERSION_base
#define MIN_VERSION_base(x,y,z) 0
#endif
#ifndef MIN_VERSION_bytestring
#define MIN_VERSION_bytestring(x,y,z) 0
#endif
-----------------------------------------------------------------------------
-- |
-- Module : Data.Serialize.Put
-- Copyright : Lennart Kolmodin, Galois Inc. 2009
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Trevor Elliott <trevor@galois.com>
-- Stability :
-- Portability :
--
-- The Put monad. A monad for efficiently constructing bytestrings.
--
-----------------------------------------------------------------------------
module Data.Serialize.Put (
-- * The Put type
Put
, PutM(..)
, Putter
, runPut
, runPutM
, runPutLazy
, runPutMLazy
, runPutMBuilder
, putBuilder
, execPut
-- * Flushing the implicit parse state
, flush
-- * Primitives
, putWord8
, putInt8
, putByteString
, putLazyByteString
, putShortByteString
-- * Big-endian primitives
, putWord16be
, putWord32be
, putWord64be
, putInt16be
, putInt32be
, putInt64be
-- * Little-endian primitives
, putWord16le
, putWord32le
, putWord64le
, putInt16le
, putInt32le
, putInt64le
-- * Host-endian, unaligned writes
, putWordhost
, putWord16host
, putWord32host
, putWord64host
, putInthost
, putInt16host
, putInt32host
, putInt64host
-- * Containers
, putTwoOf
, putListOf
, putIArrayOf
, putSeqOf
, putTreeOf
, putMapOf
, putIntMapOf
, putSetOf
, putIntSetOf
, putMaybeOf
, putEitherOf
, putNested
) where
import Data.ByteString.Builder (Builder, toLazyByteString)
import qualified Data.ByteString.Builder as B
import qualified Data.ByteString.Builder.Extra as B
import qualified Data.ByteString.Short as BS
import qualified Control.Applicative as A
import Data.Array.Unboxed
#if MIN_VERSION_base(4,9,0)
import qualified Data.Semigroup as M
#endif
import qualified Data.Monoid as M
import qualified Data.Foldable as F
import Data.Word
import Data.Int
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.IntMap as IntMap
import qualified Data.IntSet as IntSet
import qualified Data.Map as Map
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import qualified Data.Tree as T
#if !(MIN_VERSION_base(4,8,0))
import Control.Applicative
import Data.Foldable (foldMap)
import Data.Monoid
#endif
#if !(MIN_VERSION_bytestring(0,10,0))
import Foreign.ForeignPtr (withForeignPtr)
import Foreign.Marshal.Utils (copyBytes)
import Foreign.Ptr (plusPtr)
import qualified Data.ByteString.Internal as S
import qualified Data.ByteString.Lazy.Internal as L
#endif
------------------------------------------------------------------------
-- XXX Strict in builder only.
data PairS a = PairS a !Builder
sndS :: PairS a -> Builder
sndS (PairS _ b) = b
-- | The PutM type. A Writer monad over the efficient Builder monoid.
newtype PutM a = Put { unPut :: PairS a }
-- | Put merely lifts Builder into a Writer monad, applied to ().
type Put = PutM ()
type Putter a = a -> Put
instance Functor PutM where
fmap f m = Put $ let PairS a w = unPut m in PairS (f a) w
{-# INLINE fmap #-}
instance A.Applicative PutM where
pure a = Put (PairS a M.mempty)
{-# INLINE pure #-}
m <*> k = Put $
let PairS f w = unPut m
PairS x w' = unPut k
in PairS (f x) (w `M.mappend` w')
{-# INLINE (<*>) #-}
m *> k = Put $
let PairS _ w = unPut m
PairS b w' = unPut k
in PairS b (w `M.mappend` w')
{-# INLINE (*>) #-}
instance Monad PutM where
return = pure
{-# INLINE return #-}
m >>= k = Put $
let PairS a w = unPut m
PairS b w' = unPut (k a)
in PairS b (w `M.mappend` w')
{-# INLINE (>>=) #-}
(>>) = (*>)
{-# INLINE (>>) #-}
#if MIN_VERSION_base(4,9,0)
instance M.Semigroup (PutM ()) where
(<>) = (*>)
{-# INLINE (<>) #-}
#endif
instance Monoid (PutM ()) where
mempty = pure ()
{-# INLINE mempty #-}
#if !(MIN_VERSION_base(4,11,0))
mappend = (*>)
{-# INLINE mappend #-}
#endif
tell :: Putter Builder
tell b = Put $! PairS () b
{-# INLINE tell #-}
putBuilder :: Putter Builder
putBuilder = tell
{-# INLINE putBuilder #-}
-- | Run the 'Put' monad
execPut :: PutM a -> Builder
execPut = sndS . unPut
{-# INLINE execPut #-}
-- | Run the 'Put' monad with a serialiser
runPut :: Put -> S.ByteString
runPut = lazyToStrictByteString . runPutLazy
{-# INLINE runPut #-}
-- | Run the 'Put' monad with a serialiser and get its result
runPutM :: PutM a -> (a, S.ByteString)
runPutM (Put (PairS f s)) = (f, lazyToStrictByteString (toLazyByteString s))
{-# INLINE runPutM #-}
-- | Run the 'Put' monad with a serialiser
runPutLazy :: Put -> L.ByteString
runPutLazy = toLazyByteString . sndS . unPut
{-# INLINE runPutLazy #-}
-- | Run the 'Put' monad with a serialiser
runPutMLazy :: PutM a -> (a, L.ByteString)
runPutMLazy (Put (PairS f s)) = (f, toLazyByteString s)
{-# INLINE runPutMLazy #-}
-- | Run the 'Put' monad and get the result and underlying 'Builder'
runPutMBuilder :: PutM a -> (a, Builder)
runPutMBuilder (Put (PairS f s)) = (f, s)
{-# INLINE runPutMBuilder #-}
------------------------------------------------------------------------
-- | Pop the ByteString we have constructed so far, if any, yielding a
-- new chunk in the result ByteString.
flush :: Put
flush = tell B.flush
{-# INLINE flush #-}
-- | Efficiently write a byte into the output buffer
putWord8 :: Putter Word8
putWord8 = tell . B.word8
{-# INLINE putWord8 #-}
-- | Efficiently write an int into the output buffer
putInt8 :: Putter Int8
putInt8 = tell . B.int8
{-# INLINE putInt8 #-}
-- | An efficient primitive to write a strict ByteString into the output buffer.
-- It flushes the current buffer, and writes the argument into a new chunk.
putByteString :: Putter S.ByteString
putByteString = tell . B.byteString
{-# INLINE putByteString #-}
putShortByteString :: Putter BS.ShortByteString
putShortByteString = tell . B.shortByteString
-- | Write a lazy ByteString efficiently, simply appending the lazy
-- ByteString chunks to the output buffer
putLazyByteString :: Putter L.ByteString
putLazyByteString = tell . B.lazyByteString
{-# INLINE putLazyByteString #-}
-- | Write a Word16 in big endian format
putWord16be :: Putter Word16
putWord16be = tell . B.word16BE
{-# INLINE putWord16be #-}
-- | Write a Word16 in little endian format
putWord16le :: Putter Word16
putWord16le = tell . B.word16LE
{-# INLINE putWord16le #-}
-- | Write a Word32 in big endian format
putWord32be :: Putter Word32
putWord32be = tell . B.word32BE
{-# INLINE putWord32be #-}
-- | Write a Word32 in little endian format
putWord32le :: Putter Word32
putWord32le = tell . B.word32LE
{-# INLINE putWord32le #-}
-- | Write a Word64 in big endian format
putWord64be :: Putter Word64
putWord64be = tell . B.word64BE
{-# INLINE putWord64be #-}
-- | Write a Word64 in little endian format
putWord64le :: Putter Word64
putWord64le = tell . B.word64LE
{-# INLINE putWord64le #-}
------------------------------------------------------------------------
-- | /O(1)./ Write a single native machine word. The word is
-- written in host order, host endian form, for the machine you're on.
-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
-- 4 bytes. Values written this way are not portable to
-- different endian or word sized machines, without conversion.
--
putWordhost :: Putter Word
putWordhost = tell . B.wordHost
{-# INLINE putWordhost #-}
-- | /O(1)./ Write a Word16 in native host order and host endianness.
-- For portability issues see @putWordhost@.
putWord16host :: Putter Word16
putWord16host = tell . B.word16Host
{-# INLINE putWord16host #-}
-- | /O(1)./ Write a Word32 in native host order and host endianness.
-- For portability issues see @putWordhost@.
putWord32host :: Putter Word32
putWord32host = tell . B.word32Host
{-# INLINE putWord32host #-}
-- | /O(1)./ Write a Word64 in native host order
-- On a 32 bit machine we write two host order Word32s, in big endian form.
-- For portability issues see @putWordhost@.
putWord64host :: Putter Word64
putWord64host = tell . B.word64Host
{-# INLINE putWord64host #-}
-- | Write a Int16 in big endian format
putInt16be :: Putter Int16
putInt16be = tell . B.int16BE
{-# INLINE putInt16be #-}
-- | Write a Int16 in little endian format
putInt16le :: Putter Int16
putInt16le = tell . B.int16LE
{-# INLINE putInt16le #-}
-- | Write a Int32 in big endian format
putInt32be :: Putter Int32
putInt32be = tell . B.int32BE
{-# INLINE putInt32be #-}
-- | Write a Int32 in little endian format
putInt32le :: Putter Int32
putInt32le = tell . B.int32LE
{-# INLINE putInt32le #-}
-- | Write a Int64 in big endian format
putInt64be :: Putter Int64
putInt64be = tell . B.int64BE
{-# INLINE putInt64be #-}
-- | Write a Int64 in little endian format
putInt64le :: Putter Int64
putInt64le = tell . B.int64LE
{-# INLINE putInt64le #-}
------------------------------------------------------------------------
-- | /O(1)./ Write a single native machine int. The int is
-- written in host order, host endian form, for the machine you're on.
-- On a 64 bit machine the Int is an 8 byte value, on a 32 bit machine,
-- 4 bytes. Values written this way are not portable to
-- different endian or int sized machines, without conversion.
--
putInthost :: Putter Int
putInthost = tell . B.intHost
{-# INLINE putInthost #-}
-- | /O(1)./ Write a Int16 in native host order and host endianness.
-- For portability issues see @putInthost@.
putInt16host :: Putter Int16
putInt16host = tell . B.int16Host
{-# INLINE putInt16host #-}
-- | /O(1)./ Write a Int32 in native host order and host endianness.
-- For portability issues see @putInthost@.
putInt32host :: Putter Int32
putInt32host = tell . B.int32Host
{-# INLINE putInt32host #-}
-- | /O(1)./ Write a Int64 in native host order
-- On a 32 bit machine we write two host order Int32s, in big endian form.
-- For portability issues see @putInthost@.
putInt64host :: Putter Int64
putInt64host = tell . B.int64Host
{-# INLINE putInt64host #-}
-- Containers ------------------------------------------------------------------
encodeListOf :: (a -> Builder) -> [a] -> Builder
encodeListOf f = -- allow inlining with just a single argument
\xs -> execPut (putWord64be (fromIntegral $ length xs)) `M.mappend`
F.foldMap f xs
{-# INLINE encodeListOf #-}
putTwoOf :: Putter a -> Putter b -> Putter (a,b)
putTwoOf pa pb (a,b) = pa a >> pb b
{-# INLINE putTwoOf #-}
putListOf :: Putter a -> Putter [a]
putListOf pa = \l -> do
putWord64be (fromIntegral (length l))
mapM_ pa l
{-# INLINE putListOf #-}
putIArrayOf :: (Ix i, IArray a e) => Putter i -> Putter e -> Putter (a i e)
putIArrayOf pix pe a = do
putTwoOf pix pix (bounds a)
putListOf pe (elems a)
{-# INLINE putIArrayOf #-}
putSeqOf :: Putter a -> Putter (Seq.Seq a)
putSeqOf pa = \s -> do
putWord64be (fromIntegral $ Seq.length s)
F.mapM_ pa s
{-# INLINE putSeqOf #-}
putTreeOf :: Putter a -> Putter (T.Tree a)
putTreeOf pa =
tell . go
where
go (T.Node x cs) = execPut (pa x) `M.mappend` encodeListOf go cs
{-# INLINE putTreeOf #-}
putMapOf :: Putter k -> Putter a -> Putter (Map.Map k a)
putMapOf pk pa = putListOf (putTwoOf pk pa) . Map.toAscList
{-# INLINE putMapOf #-}
putIntMapOf :: Putter Int -> Putter a -> Putter (IntMap.IntMap a)
putIntMapOf pix pa = putListOf (putTwoOf pix pa) . IntMap.toAscList
{-# INLINE putIntMapOf #-}
putSetOf :: Putter a -> Putter (Set.Set a)
putSetOf pa = putListOf pa . Set.toAscList
{-# INLINE putSetOf #-}
putIntSetOf :: Putter Int -> Putter IntSet.IntSet
putIntSetOf pix = putListOf pix . IntSet.toAscList
{-# INLINE putIntSetOf #-}
putMaybeOf :: Putter a -> Putter (Maybe a)
putMaybeOf _ Nothing = putWord8 0
putMaybeOf pa (Just a) = putWord8 1 >> pa a
{-# INLINE putMaybeOf #-}
putEitherOf :: Putter a -> Putter b -> Putter (Either a b)
putEitherOf pa _ (Left a) = putWord8 0 >> pa a
putEitherOf _ pb (Right b) = putWord8 1 >> pb b
{-# INLINE putEitherOf #-}
-- | Put a nested structure by first putting a length
-- field and then putting the encoded value.
putNested :: Putter Int -> Put -> Put
putNested putLen putVal = do
let bs = runPut putVal
putLen (S.length bs)
putByteString bs
-------------------------------------------------------------------------------
-- pre-bytestring-0.10 compatibility
-------------------------------------------------------------------------------
{-# INLINE lazyToStrictByteString #-}
lazyToStrictByteString :: L.ByteString -> S.ByteString
#if MIN_VERSION_bytestring(0,10,0)
lazyToStrictByteString = L.toStrict
#else
lazyToStrictByteString = packChunks
-- packChunks is taken from the blaze-builder package.
-- | Pack the chunks of a lazy bytestring into a single strict bytestring.
packChunks :: L.ByteString -> S.ByteString
packChunks lbs = S.unsafeCreate (fromIntegral $ L.length lbs) (copyChunks lbs)
where
copyChunks !L.Empty !_pf = return ()
copyChunks !(L.Chunk (S.PS fpbuf o l) lbs') !pf = do
withForeignPtr fpbuf $ \pbuf ->
copyBytes pf (pbuf `plusPtr` o) l
copyChunks lbs' (pf `plusPtr` l)
#endif