Initial commit

This commit is contained in:
La Ancapo 2026-01-25 02:27:22 +01:00
commit c101616e62
309 changed files with 53937 additions and 0 deletions

34
bundled/Data/ByteArray.hs Normal file
View file

@ -0,0 +1,34 @@
-- |
-- Module : Data.ByteArray
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : stable
-- Portability : Good
--
-- Simple and efficient byte array types
--
-- This module should be imported qualified.
--
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Data.ByteArray
(
-- * ByteArray Classes
module Data.ByteArray.Types
-- * ByteArray built-in types
, module Data.ByteArray.Bytes
, module Data.ByteArray.ScrubbedBytes
, module Data.ByteArray.MemView
, module Data.ByteArray.View
-- * ByteArray methods
, module Data.ByteArray.Methods
) where
import Data.ByteArray.Types
import Data.ByteArray.Methods
import Data.ByteArray.ScrubbedBytes (ScrubbedBytes)
import Data.ByteArray.Bytes (Bytes)
import Data.ByteArray.MemView (MemView(..))
import Data.ByteArray.View (View, view, takeView, dropView)

View file

@ -0,0 +1,216 @@
-- |
-- Module : Data.ByteArray.Bytes
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : stable
-- Portability : Good
--
-- Simple and efficient byte array types
--
{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE DeriveDataTypeable #-}
module Data.ByteArray.Bytes
( Bytes
) where
#if MIN_VERSION_base(4,15,0)
import GHC.Exts (unsafeCoerce#)
#endif
import GHC.Word
import GHC.Char (chr)
import GHC.Types
import GHC.Prim
import GHC.Ptr
#if MIN_VERSION_base(4,9,0)
import Data.Semigroup
import Data.Foldable (toList)
#else
import Data.Monoid
#endif
import Data.Memory.PtrMethods
import Data.Memory.Internal.Imports
import Data.Memory.Internal.CompatPrim
import Data.Memory.Internal.Compat (unsafeDoIO)
import Data.ByteArray.Types
import Data.Typeable
#ifdef MIN_VERSION_basement
import Basement.NormalForm
#endif
import Basement.IntegralConv
-- | Simplest Byte Array
data Bytes = Bytes (MutableByteArray# RealWorld)
deriving (Typeable)
instance Show Bytes where
showsPrec p b r = showsPrec p (bytesUnpackChars b []) r
instance Eq Bytes where
(==) = bytesEq
instance Ord Bytes where
compare = bytesCompare
#if MIN_VERSION_base(4,9,0)
instance Semigroup Bytes where
b1 <> b2 = unsafeDoIO $ bytesAppend b1 b2
sconcat = unsafeDoIO . bytesConcat . toList
#endif
instance Monoid Bytes where
mempty = unsafeDoIO (newBytes 0)
#if !(MIN_VERSION_base(4,11,0))
mappend b1 b2 = unsafeDoIO $ bytesAppend b1 b2
mconcat = unsafeDoIO . bytesConcat
#endif
instance NFData Bytes where
rnf b = b `seq` ()
#ifdef MIN_VERSION_basement
instance NormalForm Bytes where
toNormalForm b = b `seq` ()
#endif
instance ByteArrayAccess Bytes where
length = bytesLength
withByteArray = withBytes
instance ByteArray Bytes where
allocRet = bytesAllocRet
------------------------------------------------------------------------
newBytes :: Int -> IO Bytes
newBytes (I# sz)
| booleanPrim (sz <# 0#) = error "Bytes: size must be >= 0"
| otherwise = IO $ \s ->
case newAlignedPinnedByteArray# sz 8# s of
(# s', mbarr #) -> (# s', Bytes mbarr #)
touchBytes :: Bytes -> IO ()
touchBytes (Bytes mba) = IO $ \s -> case touch# mba s of s' -> (# s', () #)
{-# INLINE touchBytes #-}
sizeofBytes :: Bytes -> Int
sizeofBytes (Bytes mba) = I# (sizeofMutableByteArray# mba)
{-# INLINE sizeofBytes #-}
withPtr :: Bytes -> (Ptr p -> IO a) -> IO a
withPtr b@(Bytes mba) f = do
a <- f (Ptr (byteArrayContents# (unsafeCoerce# mba)))
touchBytes b
return a
------------------------------------------------------------------------
bytesAlloc :: Int -> (Ptr p -> IO ()) -> IO Bytes
bytesAlloc sz f = do
ba <- newBytes sz
withPtr ba f
return ba
bytesConcat :: [Bytes] -> IO Bytes
bytesConcat l = bytesAlloc retLen (copy l)
where
!retLen = sum $ map bytesLength l
copy [] _ = return ()
copy (x:xs) dst = do
withPtr x $ \src -> memCopy dst src chunkLen
copy xs (dst `plusPtr` chunkLen)
where
!chunkLen = bytesLength x
bytesAppend :: Bytes -> Bytes -> IO Bytes
bytesAppend b1 b2 = bytesAlloc retLen $ \dst -> do
withPtr b1 $ \s1 -> memCopy dst s1 len1
withPtr b2 $ \s2 -> memCopy (dst `plusPtr` len1) s2 len2
where
!len1 = bytesLength b1
!len2 = bytesLength b2
!retLen = len1 + len2
bytesAllocRet :: Int -> (Ptr p -> IO a) -> IO (a, Bytes)
bytesAllocRet sz f = do
ba <- newBytes sz
r <- withPtr ba f
return (r, ba)
bytesLength :: Bytes -> Int
bytesLength = sizeofBytes
{-# LANGUAGE bytesLength #-}
withBytes :: Bytes -> (Ptr p -> IO a) -> IO a
withBytes = withPtr
bytesEq :: Bytes -> Bytes -> Bool
bytesEq b1@(Bytes m1) b2@(Bytes m2)
| l1 /= l2 = False
| otherwise = unsafeDoIO $ IO $ \s -> loop 0# s
where
!l1@(I# len) = bytesLength b1
!l2 = bytesLength b2
loop i s
| booleanPrim (i ==# len) = (# s, True #)
| otherwise =
case readWord8Array# m1 i s of
(# s', e1 #) -> case readWord8Array# m2 i s' of
(# s'', e2 #) ->
if (W8# e1) == (W8# e2)
then loop (i +# 1#) s''
else (# s'', False #)
{-# INLINE loop #-}
bytesCompare :: Bytes -> Bytes -> Ordering
bytesCompare b1@(Bytes m1) b2@(Bytes m2) = unsafeDoIO $ loop 0
where
!l1 = bytesLength b1
!l2 = bytesLength b2
!len = min l1 l2
loop !i
| i == len =
if l1 == l2
then pure EQ
else if l1 > l2 then pure GT
else pure LT
| otherwise = do
e1 <- read8 m1 i
e2 <- read8 m2 i
if e1 == e2
then loop (i+1)
else if e1 < e2 then pure LT
else pure GT
read8 m (I# i) = IO $ \s -> case readWord8Array# m i s of
(# s2, e #) -> (# s2, W8# e #)
bytesUnpackChars :: Bytes -> String -> String
bytesUnpackChars (Bytes mba) xs = chunkLoop 0#
where
!len = sizeofMutableByteArray# mba
-- chunk 64 bytes at a time
chunkLoop :: Int# -> [Char]
chunkLoop idx
| booleanPrim (len ==# idx) = []
| booleanPrim ((len -# idx) ># 63#) =
bytesLoop idx 64# (chunkLoop (idx +# 64#))
| otherwise =
bytesLoop idx (len -# idx) xs
bytesLoop idx chunkLenM1 paramAcc = unsafeDoIO $
loop (idx +# chunkLenM1 -# 1#) paramAcc
where loop i acc
| booleanPrim (i ==# idx) = do
c <- rChar i
return (c : acc)
| otherwise = do
c <- rChar i
loop (i -# 1#) (c : acc)
rChar :: Int# -> IO Char
rChar idx = IO $ \s ->
case readWord8Array# mba idx s of
(# s2, w #) -> (# s2, chr (integralUpsize (W8# w)) #)
{-
bytesShowHex :: Bytes -> String
bytesShowHex b = showHexadecimal (withPtr b) (bytesLength b)
{-# NOINLINE bytesShowHex #-}
-}

View file

@ -0,0 +1,162 @@
-- |
-- Module : Data.ByteArray.Encoding
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : experimental
-- Portability : unknown
--
-- Base conversions for 'ByteArray'.
--
module Data.ByteArray.Encoding
( convertToBase
, convertFromBase
, Base(..)
) where
import Data.ByteArray.Types
import qualified Data.ByteArray.Types as B
import qualified Data.ByteArray.Methods as B
import Data.Memory.Internal.Compat
import Data.Memory.Encoding.Base16
import Data.Memory.Encoding.Base32
import Data.Memory.Encoding.Base64
-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Data.ByteString
-- | The different bases that can be used.
--
-- See <http://tools.ietf.org/html/rfc4648 RFC4648> for details.
-- In particular, Base64 can be standard or
-- <http://tools.ietf.org/html/rfc4648#section-5 URL-safe>. URL-safe
-- encoding is often used in other specifications without
-- <http://tools.ietf.org/html/rfc4648#section-3.2 padding> characters.
--
-- <https://www.ietf.org/rfc/rfc2045.txt RFC 2045>
-- defines a separate Base64 encoding, which is not supported. This format
-- requires a newline at least every 76 encoded characters, which works around
-- limitations of older email programs that could not handle long lines.
-- Be aware that other languages, such as Ruby, encode the RFC 2045 version
-- by default. To decode their output, remove all newlines before decoding.
--
-- ==== Examples
--
-- A quick example to show the differences:
--
-- >>> let input = "Is 3 > 2?" :: ByteString
-- >>> let convertedTo base = convertToBase base input :: ByteString
-- >>> convertedTo Base16
-- "49732033203e20323f"
-- >>> convertedTo Base32
-- "JFZSAMZAHYQDEPY="
-- >>> convertedTo Base64
-- "SXMgMyA+IDI/"
-- >>> convertedTo Base64URLUnpadded
-- "SXMgMyA-IDI_"
-- >>> convertedTo Base64OpenBSD
-- "QVKeKw.8GBG9"
--
data Base = Base16 -- ^ similar to hexadecimal
| Base32
| Base64 -- ^ standard Base64
| Base64URLUnpadded -- ^ unpadded URL-safe Base64
| Base64OpenBSD -- ^ Base64 as used in OpenBSD password encoding (such as bcrypt)
deriving (Show,Eq)
-- | Encode some bytes to the equivalent representation in a specific 'Base'.
--
-- ==== Examples
--
-- Convert a 'ByteString' to base-64:
--
-- >>> convertToBase Base64 ("foobar" :: ByteString) :: ByteString
-- "Zm9vYmFy"
--
convertToBase :: (ByteArrayAccess bin, ByteArray bout) => Base -> bin -> bout
convertToBase base b = case base of
Base16 -> doConvert (binLength * 2) toHexadecimal
Base32 -> let (q,r) = binLength `divMod` 5
outLen = 8 * (if r == 0 then q else q + 1)
in doConvert outLen toBase32
Base64 -> doConvert base64Length toBase64
-- Base64URL -> doConvert base64Length (toBase64URL True)
Base64URLUnpadded -> doConvert base64UnpaddedLength (toBase64URL False)
Base64OpenBSD -> doConvert base64UnpaddedLength toBase64OpenBSD
where
binLength = B.length b
base64Length = let (q,r) = binLength `divMod` 3
in 4 * (if r == 0 then q else q+1)
base64UnpaddedLength = let (q,r) = binLength `divMod` 3
in 4 * q + (if r == 0 then 0 else r+1)
doConvert l f =
B.unsafeCreate l $ \bout ->
B.withByteArray b $ \bin ->
f bout bin binLength
-- | Try to decode some bytes from the equivalent representation in a specific 'Base'.
--
-- ==== Examples
--
-- Successfully convert from base-64 to a 'ByteString':
--
-- >>> convertFromBase Base64 ("Zm9vYmFy" :: ByteString) :: Either String ByteString
-- Right "foobar"
--
-- Trying to decode invalid data will return an error string:
--
-- >>> convertFromBase Base64 ("!!!" :: ByteString) :: Either String ByteString
-- Left "base64: input: invalid length"
--
convertFromBase :: (ByteArrayAccess bin, ByteArray bout) => Base -> bin -> Either String bout
convertFromBase Base16 b
| odd (B.length b) = Left "base16: input: invalid length"
| otherwise = unsafeDoIO $ do
(ret, out) <-
B.allocRet (B.length b `div` 2) $ \bout ->
B.withByteArray b $ \bin ->
fromHexadecimal bout bin (B.length b)
case ret of
Nothing -> return $ Right out
Just ofs -> return $ Left ("base16: input: invalid encoding at offset: " ++ show ofs)
convertFromBase Base32 b = unsafeDoIO $
withByteArray b $ \bin -> do
mDstLen <- unBase32Length bin (B.length b)
case mDstLen of
Nothing -> return $ Left "base32: input: invalid length"
Just dstLen -> do
(ret, out) <- B.allocRet dstLen $ \bout -> fromBase32 bout bin (B.length b)
case ret of
Nothing -> return $ Right out
Just ofs -> return $ Left ("base32: input: invalid encoding at offset: " ++ show ofs)
convertFromBase Base64 b = unsafeDoIO $
withByteArray b $ \bin -> do
mDstLen <- unBase64Length bin (B.length b)
case mDstLen of
Nothing -> return $ Left "base64: input: invalid length"
Just dstLen -> do
(ret, out) <- B.allocRet dstLen $ \bout -> fromBase64 bout bin (B.length b)
case ret of
Nothing -> return $ Right out
Just ofs -> return $ Left ("base64: input: invalid encoding at offset: " ++ show ofs)
convertFromBase Base64URLUnpadded b = unsafeDoIO $
withByteArray b $ \bin ->
case unBase64LengthUnpadded (B.length b) of
Nothing -> return $ Left "base64URL unpadded: input: invalid length"
Just dstLen -> do
(ret, out) <- B.allocRet dstLen $ \bout -> fromBase64URLUnpadded bout bin (B.length b)
case ret of
Nothing -> return $ Right out
Just ofs -> return $ Left ("base64URL unpadded: input: invalid encoding at offset: " ++ show ofs)
convertFromBase Base64OpenBSD b = unsafeDoIO $
withByteArray b $ \bin ->
case unBase64LengthUnpadded (B.length b) of
Nothing -> return $ Left "base64 unpadded: input: invalid length"
Just dstLen -> do
(ret, out) <- B.allocRet dstLen $ \bout -> fromBase64OpenBSD bout bin (B.length b)
case ret of
Nothing -> return $ Right out
Just ofs -> return $ Left ("base64 unpadded: input: invalid encoding at offset: " ++ show ofs)

View file

@ -0,0 +1,78 @@
-- |
-- Module : Data.ByteArray.Hash
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : experimental
-- Portability : good
--
-- provide the SipHash algorithm.
-- reference: <http://131002.net/siphash/siphash.pdf>
--
{-# LANGUAGE BangPatterns #-}
module Data.ByteArray.Hash
(
-- * SipHash
SipKey(..)
, SipHash(..)
, sipHash
, sipHashWith
-- * FNV1 and FNV1a (32 and 64 bits)
, FnvHash32(..)
, FnvHash64(..)
, fnv1Hash
, fnv1aHash
, fnv1_64Hash
, fnv1a_64Hash
) where
import Data.Memory.Internal.Compat
import Data.Memory.Hash.SipHash
import Data.Memory.Hash.FNV
import qualified Data.ByteArray.Types as B
-- | Compute the SipHash tag of a byte array for a given key.
--
-- 'sipHash` is equivalent to 'sipHashWith 2 4'
sipHash :: B.ByteArrayAccess ba
=> SipKey
-> ba
-> SipHash
sipHash key ba = unsafeDoIO $ B.withByteArray ba $ \p -> hash key p (B.length ba)
-- | Compute the SipHash tag of a byte array for a given key.
--
-- The user can choose the C and D numbers of rounds.
--
-- calling 'sipHash` is equivalent to 'sipHashWith 2 4'
sipHashWith :: B.ByteArrayAccess ba
=> Int -- ^ c rounds
-> Int -- ^ d rounds
-> SipKey -- ^ key
-> ba -- ^ data to hash
-> SipHash
sipHashWith c d key ba = unsafeDoIO $ B.withByteArray ba $ \p -> hashWith c d key p (B.length ba)
-- | Compute the FNV1 32 bit hash value of a byte array
fnv1Hash :: B.ByteArrayAccess ba
=> ba
-> FnvHash32
fnv1Hash ba = unsafeDoIO $ B.withByteArray ba $ \p -> fnv1 p (B.length ba)
-- | Compute the FNV1a 32 bit hash value of a byte array
fnv1aHash :: B.ByteArrayAccess ba
=> ba
-> FnvHash32
fnv1aHash ba = unsafeDoIO $ B.withByteArray ba $ \p -> fnv1a p (B.length ba)
-- | Compute the FNV1 64 bit hash value of a byte array
fnv1_64Hash :: B.ByteArrayAccess ba
=> ba
-> FnvHash64
fnv1_64Hash ba = unsafeDoIO $ B.withByteArray ba $ \p -> fnv1_64 p (B.length ba)
-- | Compute the FNV1a 64 bit hash value of a byte array
fnv1a_64Hash :: B.ByteArrayAccess ba
=> ba
-> FnvHash64
fnv1a_64Hash ba = unsafeDoIO $ B.withByteArray ba $ \p -> fnv1a_64 p (B.length ba)

View file

@ -0,0 +1,84 @@
-- |
-- Module : Data.ByteArray.Mapping
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : stable
-- Portability : Good
--
module Data.ByteArray.Mapping
( toW64BE
, toW64LE
, fromW64BE
, mapAsWord64
, mapAsWord128
) where
import Data.ByteArray.Types
import Data.ByteArray.Methods
import Data.Memory.Internal.Compat
import Data.Memory.Internal.Imports hiding (empty)
import Data.Memory.Endian
import Data.Memory.ExtendedWords
import Foreign.Storable
import Foreign.Ptr
import Prelude hiding (length, take, drop, span, concat, replicate, splitAt, null, pred)
-- | Transform a bytearray at a specific offset into
-- a Word64 tagged as BE (Big Endian)
--
-- no bounds checking. unsafe
toW64BE :: ByteArrayAccess bs => bs -> Int -> BE Word64
toW64BE bs ofs = unsafeDoIO $ withByteArray bs $ \p -> peek (p `plusPtr` ofs)
-- | Transform a bytearray at a specific offset into
-- a Word64 tagged as LE (Little Endian)
--
-- no bounds checking. unsafe
toW64LE :: ByteArrayAccess bs => bs -> Int -> LE Word64
toW64LE bs ofs = unsafeDoIO $ withByteArray bs $ \p -> peek (p `plusPtr` ofs)
-- | Serialize a @Word64@ to a @ByteArray@ in big endian format
fromW64BE :: (ByteArray ba) => Word64 -> ba
fromW64BE n = allocAndFreeze 8 $ \p -> poke p (toBE n)
-- | map blocks of 128 bits of a bytearray, creating a new bytestring
-- of equivalent size where each blocks has been mapped through @f@
--
-- no length checking is done. unsafe
mapAsWord128 :: ByteArray bs => (Word128 -> Word128) -> bs -> bs
mapAsWord128 f bs =
unsafeCreate len $ \dst ->
withByteArray bs $ \src ->
loop (len `div` 16) dst src
where
len = length bs
loop :: Int -> Ptr (BE Word64) -> Ptr (BE Word64) -> IO ()
loop 0 _ _ = return ()
loop i d s = do
w1 <- peek s
w2 <- peek (s `plusPtr` 8)
let (Word128 r1 r2) = f (Word128 (fromBE w1) (fromBE w2))
poke d (toBE r1)
poke (d `plusPtr` 8) (toBE r2)
loop (i-1) (d `plusPtr` 16) (s `plusPtr` 16)
-- | map blocks of 64 bits of a bytearray, creating a new bytestring
-- of equivalent size where each blocks has been mapped through @f@
--
-- no length checking is done. unsafe
mapAsWord64 :: ByteArray bs => (Word64 -> Word64) -> bs -> bs
mapAsWord64 f bs =
unsafeCreate len $ \dst ->
withByteArray bs $ \src ->
loop (len `div` 8) dst src
where
len = length bs
loop :: Int -> Ptr (BE Word64) -> Ptr (BE Word64) -> IO ()
loop 0 _ _ = return ()
loop i d s = do
w <- peek s
let r = f (fromBE w)
poke d (toBE r)
loop (i-1) (d `plusPtr` 8) (s `plusPtr` 8)

View file

@ -0,0 +1,38 @@
-- |
-- Module : Data.ByteArray.MemView
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : stable
-- Portability : Good
--
module Data.ByteArray.MemView
( MemView(..)
, memViewPlus
) where
import Foreign.Ptr
import Data.ByteArray.Types
import Data.Memory.Internal.Imports
-- | A simple abstraction to a piece of memory.
--
-- Do beware that garbage collection related to
-- piece of memory could be triggered before this
-- is used.
--
-- Only use with the appropriate handler has been
-- used (e.g. withForeignPtr on ForeignPtr)
--
data MemView = MemView {-# UNPACK #-} !(Ptr Word8) {-# UNPACK #-} !Int
deriving (Show,Eq)
instance ByteArrayAccess MemView where
length (MemView _ l) = l
withByteArray (MemView p _) f = f (castPtr p)
-- | Increase the memory view while reducing the size of the window
--
-- this is useful as an abstraction to represent the current offset
-- in a buffer, and the remaining bytes left.
memViewPlus :: MemView -> Int -> MemView
memViewPlus (MemView p len) n = MemView (p `plusPtr` n) (len - n)

View file

@ -0,0 +1,312 @@
-- |
-- Module : Data.ByteArray.Methods
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : stable
-- Portability : Good
--
{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
module Data.ByteArray.Methods
( alloc
, allocAndFreeze
, create
, unsafeCreate
, pack
, unpack
, uncons
, empty
, singleton
, cons
, snoc
, null
, replicate
, zero
, copy
, take
, drop
, span
, reverse
, convert
, copyRet
, copyAndFreeze
, splitAt
, xor
, index
, eq
, constEq
, any
, all
, append
, concat
) where
import Data.ByteArray.Types
import Data.Memory.Internal.Compat
import Data.Memory.Internal.Imports hiding (empty)
import Data.Memory.PtrMethods
import Data.Monoid
import Foreign.Storable
import Foreign.Ptr
import Prelude hiding (length, take, drop, span, reverse, concat, replicate, splitAt, null, pred, last, any, all)
import qualified Prelude
#if defined(WITH_BYTESTRING_SUPPORT) && defined(WITH_BASEMENT_SUPPORT)
import qualified Data.ByteString as SPE (ByteString)
import qualified Basement.UArray as SPE (UArray)
import qualified Basement.Block as SPE (Block)
#endif
-- | Allocate a new bytearray of specific size, and run the initializer on this memory
alloc :: ByteArray ba => Int -> (Ptr p -> IO ()) -> IO ba
alloc n f
| n < 0 = alloc 0 f
| otherwise = snd `fmap` allocRet n f
{-# INLINE alloc #-}
-- | Allocate a new bytearray of specific size, and run the initializer on this memory
create :: ByteArray ba => Int -> (Ptr p -> IO ()) -> IO ba
create n f = alloc n f
-- | similar to 'alloc' but hide the allocation and initializer in a pure context
allocAndFreeze :: ByteArray a => Int -> (Ptr p -> IO ()) -> a
allocAndFreeze sz f = unsafeDoIO (alloc sz f)
{-# NOINLINE allocAndFreeze #-}
-- | similar to 'create' but hide the allocation and initializer in a pure context
unsafeCreate :: ByteArray a => Int -> (Ptr p -> IO ()) -> a
unsafeCreate sz f = unsafeDoIO (alloc sz f)
{-# NOINLINE unsafeCreate #-}
inlineUnsafeCreate :: ByteArray a => Int -> (Ptr p -> IO ()) -> a
inlineUnsafeCreate !sz f = unsafeDoIO (alloc sz f)
{-# INLINE inlineUnsafeCreate #-}
-- | Create an empty byte array
empty :: ByteArray a => a
empty = unsafeDoIO (alloc 0 $ \_ -> return ())
-- | Check if a byte array is empty
null :: ByteArrayAccess a => a -> Bool
null b = length b == 0
-- | Pack a list of bytes into a bytearray
pack :: ByteArray a => [Word8] -> a
pack l = inlineUnsafeCreate (Prelude.length l) (fill l)
where fill [] _ = return ()
fill (x:xs) !p = poke p x >> fill xs (p `plusPtr` 1)
{-# INLINE fill #-}
{-# NOINLINE pack #-}
-- | Un-pack a bytearray into a list of bytes
unpack :: ByteArrayAccess a => a -> [Word8]
unpack bs = loop 0
where !len = length bs
loop i
| i == len = []
| otherwise =
let !v = unsafeDoIO $ withByteArray bs (\p -> peekByteOff p i)
in v : loop (i+1)
-- | returns the first byte, and the remaining bytearray if the bytearray is not null
uncons :: ByteArray a => a -> Maybe (Word8, a)
uncons a
| null a = Nothing
| otherwise = Just (index a 0, drop 1 a)
-- | Create a byte array from a single byte
singleton :: ByteArray a => Word8 -> a
singleton b = unsafeCreate 1 (\p -> pokeByteOff p 0 b)
-- | prepend a single byte to a byte array
cons :: ByteArray a => Word8 -> a -> a
cons b ba = unsafeCreate (len + 1) $ \d -> withByteArray ba $ \s -> do
pokeByteOff d 0 b
memCopy (d `plusPtr` 1) s len
where len = length ba
-- | append a single byte to a byte array
snoc :: ByteArray a => a -> Word8 -> a
snoc ba b = unsafeCreate (len + 1) $ \d -> withByteArray ba $ \s -> do
memCopy d s len
pokeByteOff d len b
where len = length ba
-- | Create a xor of bytes between a and b.
--
-- the returns byte array is the size of the smallest input.
xor :: (ByteArrayAccess a, ByteArrayAccess b, ByteArray c) => a -> b -> c
xor a b =
unsafeCreate n $ \pc ->
withByteArray a $ \pa ->
withByteArray b $ \pb ->
memXor pc pa pb n
where
n = min la lb
la = length a
lb = length b
-- | return a specific byte indexed by a number from 0 in a bytearray
--
-- unsafe, no bound checking are done
index :: ByteArrayAccess a => a -> Int -> Word8
index b i = unsafeDoIO $ withByteArray b $ \p -> peek (p `plusPtr` i)
-- | Split a bytearray at a specific length in two bytearray
splitAt :: ByteArray bs => Int -> bs -> (bs, bs)
splitAt n bs
| n <= 0 = (empty, bs)
| n >= len = (bs, empty)
| otherwise = unsafeDoIO $ do
withByteArray bs $ \p -> do
b1 <- alloc n $ \r -> memCopy r p n
b2 <- alloc (len - n) $ \r -> memCopy r (p `plusPtr` n) (len - n)
return (b1, b2)
where len = length bs
-- | Take the first @n@ byte of a bytearray
take :: ByteArray bs => Int -> bs -> bs
take n bs
| n <= 0 = empty
| otherwise = unsafeCreate m $ \d -> withByteArray bs $ \s -> memCopy d s m
where
!m = min len n
!len = length bs
-- | drop the first @n@ byte of a bytearray
drop :: ByteArray bs => Int -> bs -> bs
drop n bs
| n <= 0 = bs
| nb == 0 = empty
| otherwise = unsafeCreate nb $ \d -> withByteArray bs $ \s -> memCopy d (s `plusPtr` ofs) nb
where
ofs = min len n
nb = len - ofs
len = length bs
-- | Split a bytearray at the point where @pred@ becomes invalid
span :: ByteArray bs => (Word8 -> Bool) -> bs -> (bs, bs)
span pred bs
| null bs = (bs, bs)
| otherwise = let n = loop 0 in (take n bs, drop n bs)
where loop !i
| i >= len = len
| pred (index bs i) = loop (i+1)
| otherwise = i
len = length bs
-- | Reverse a bytearray
reverse :: ByteArray bs => bs -> bs
reverse bs = unsafeCreate n $ \d -> withByteArray bs $ \s -> memReverse d s n
where n = length bs
-- | Concatenate bytearray into a larger bytearray
concat :: (ByteArrayAccess bin, ByteArray bout) => [bin] -> bout
concat l = unsafeCreate retLen (loopCopy l)
where
retLen = sum $ map length l
loopCopy [] _ = return ()
loopCopy (x:xs) dst = do
copyByteArrayToPtr x dst
loopCopy xs (dst `plusPtr` chunkLen)
where
!chunkLen = length x
-- | append one bytearray to the other
append :: ByteArray bs => bs -> bs -> bs
append = mappend
-- | Duplicate a bytearray into another bytearray, and run an initializer on it
copy :: (ByteArrayAccess bs1, ByteArray bs2) => bs1 -> (Ptr p -> IO ()) -> IO bs2
copy bs f =
alloc (length bs) $ \d -> do
copyByteArrayToPtr bs d
f (castPtr d)
-- | Similar to 'copy' but also provide a way to return a value from the initializer
copyRet :: (ByteArrayAccess bs1, ByteArray bs2) => bs1 -> (Ptr p -> IO a) -> IO (a, bs2)
copyRet bs f =
allocRet (length bs) $ \d -> do
copyByteArrayToPtr bs d
f (castPtr d)
-- | Similiar to 'copy' but expect the resulting bytearray in a pure context
copyAndFreeze :: (ByteArrayAccess bs1, ByteArray bs2) => bs1 -> (Ptr p -> IO ()) -> bs2
copyAndFreeze bs f =
inlineUnsafeCreate (length bs) $ \d -> do
copyByteArrayToPtr bs d
f (castPtr d)
{-# NOINLINE copyAndFreeze #-}
-- | Create a bytearray of a specific size containing a repeated byte value
replicate :: ByteArray ba => Int -> Word8 -> ba
replicate 0 _ = empty
replicate n b
| n < 0 = empty
| otherwise = inlineUnsafeCreate n $ \ptr -> memSet ptr b n
{-# NOINLINE replicate #-}
-- | Create a bytearray of a specific size initialized to 0
zero :: ByteArray ba => Int -> ba
zero 0 = empty
zero n
| n < 0 = empty
| otherwise = unsafeCreate n $ \ptr -> memSet ptr 0 n
{-# NOINLINE zero #-}
-- | Check if two bytearray are equals
--
-- This is not constant time, as soon some byte differs the function will
-- returns. use 'constEq' in sensitive context where timing matters.
eq :: (ByteArrayAccess bs1, ByteArrayAccess bs2) => bs1 -> bs2 -> Bool
eq b1 b2
| l1 /= l2 = False
| otherwise = unsafeDoIO $ withByteArray b1 $ \p1 -> withByteArray b2 $ \p2 -> memEqual p1 p2 l1
where
l1 = length b1
l2 = length b2
-- | A constant time equality test for 2 ByteArrayAccess values.
--
-- If values are of 2 different sizes, the function will abort early
-- without comparing any bytes.
--
-- compared to == , this function will go over all the bytes
-- present before yielding a result even when knowing the
-- overall result early in the processing.
constEq :: (ByteArrayAccess bs1, ByteArrayAccess bs2) => bs1 -> bs2 -> Bool
constEq b1 b2
| l1 /= l2 = False
| otherwise = unsafeDoIO $ withByteArray b1 $ \p1 -> withByteArray b2 $ \p2 -> memConstEqual p1 p2 l1
where
!l1 = length b1
!l2 = length b2
-- | Check if any element of a byte array satisfies a predicate
any :: (ByteArrayAccess ba) => (Word8 -> Bool) -> ba -> Bool
any f b
| null b = False
| otherwise = unsafeDoIO $ withByteArray b $ \p -> loop p 0
where
len = length b
loop p i
| i == len = return False
| otherwise = do
w <- peekByteOff p i
if f w then return True else loop p (i+1)
-- | Check if all elements of a byte array satisfy a predicate
all :: (ByteArrayAccess ba) => (Word8 -> Bool) -> ba -> Bool
all f b = not (any (not . f) b)
-- | Convert a bytearray to another type of bytearray
convert :: (ByteArrayAccess bin, ByteArray bout) => bin -> bout
convert bs = inlineUnsafeCreate (length bs) (copyByteArrayToPtr bs)
#if defined(WITH_BYTESTRING_SUPPORT) && defined(WITH_BASEMENT_SUPPORT)
{-# SPECIALIZE convert :: SPE.ByteString -> SPE.UArray Word8 #-}
{-# SPECIALIZE convert :: SPE.UArray Word8 -> SPE.ByteString #-}
{-# SPECIALIZE convert :: SPE.ByteString -> SPE.Block Word8 #-}
{-# SPECIALIZE convert :: SPE.Block Word8 -> SPE.ByteString #-}
#endif

View file

@ -0,0 +1,145 @@
-- |
-- Module : Data.ByteArray.Pack
-- License : BSD-Style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : experimental
-- Portability : unknown
--
-- Simple Byte Array packer
--
-- Simple example:
--
-- > > flip pack 20 $ putWord8 0x41 >> putByteString "BCD" >> putWord8 0x20 >> putStorable (42 :: Word32)
-- > Right (ABCD *\NUL\NUL\NUL")
--
-- Original code from <https://hackage.haskell.org/package/bspack>
-- generalized and adapted to run on 'memory', and spellchecked / tweaked. (2015-05)
-- Copyright (c) 2014 Nicolas DI PRIMA
--
module Data.ByteArray.Pack
( Packer
, Result(..)
, fill
, pack
-- * Operations
-- ** put
, putWord8
, putWord16
, putWord32
, putStorable
, putBytes
, fillList
, fillUpWith
-- ** skip
, skip
, skipStorable
) where
import Data.Word
import Foreign.Ptr
import Foreign.Storable
import Data.Memory.Internal.Imports ()
import Data.Memory.Internal.Compat
import Data.Memory.PtrMethods
import Data.ByteArray.Pack.Internal
import Data.ByteArray (ByteArray, ByteArrayAccess, MemView(..))
import qualified Data.ByteArray as B
-- | Fill a given sized buffer with the result of the Packer action
fill :: ByteArray byteArray => Int -> Packer a -> Either String byteArray
fill len packing = unsafeDoIO $ do
(val, out) <- B.allocRet len $ \ptr -> runPacker_ packing (MemView ptr len)
case val of
PackerMore _ (MemView _ r)
| r == 0 -> return $ Right out
| otherwise -> return $ Left ("remaining unpacked bytes " ++ show r ++ " at the end of buffer")
PackerFail err -> return $ Left err
-- | Pack the given packer into the given bytestring
pack :: ByteArray byteArray => Packer a -> Int -> Either String byteArray
pack packing len = fill len packing
{-# DEPRECATED pack "use fill instead" #-}
fillUpWithWord8' :: Word8 -> Packer ()
fillUpWithWord8' w = Packer $ \(MemView ptr size) -> do
memSet ptr w size
return $ PackerMore () (MemView (ptr `plusPtr` size) 0)
-- | Put a storable from the current position in the stream
putStorable :: Storable storable => storable -> Packer ()
putStorable s = actionPacker (sizeOf s) (\ptr -> poke (castPtr ptr) s)
-- | Put a Byte Array from the current position in the stream
--
-- If the ByteArray is null, then do nothing
putBytes :: ByteArrayAccess ba => ba -> Packer ()
putBytes bs
| neededLength == 0 = return ()
| otherwise =
actionPacker neededLength $ \dstPtr -> B.withByteArray bs $ \srcPtr ->
memCopy dstPtr srcPtr neededLength
where
neededLength = B.length bs
-- | Skip some bytes from the current position in the stream
skip :: Int -> Packer ()
skip n = actionPacker n (\_ -> return ())
-- | Skip the size of a storable from the current position in the stream
skipStorable :: Storable storable => storable -> Packer ()
skipStorable = skip . sizeOf
-- | Fill up from the current position in the stream to the end
--
-- It is equivalent to:
--
-- > fillUpWith s == fillList (repeat s)
--
fillUpWith :: Storable storable => storable -> Packer ()
fillUpWith s = fillList $ repeat s
{-# RULES "fillUpWithWord8" forall s . fillUpWith s = fillUpWithWord8' s #-}
{-# NOINLINE fillUpWith #-}
-- | Will put the given storable list from the current position in the stream
-- to the end.
--
-- This function will fail with not enough storage if the given storable can't
-- be written (not enough space)
--
-- Example:
--
-- > > pack (fillList $ [1..] :: Word8) 9
-- > "\1\2\3\4\5\6\7\8\9"
-- > > pack (fillList $ [1..] :: Word32) 4
-- > "\1\0\0\0"
-- > > pack (fillList $ [1..] :: Word32) 64
-- > .. <..succesful..>
-- > > pack (fillList $ [1..] :: Word32) 1
-- > .. <.. not enough space ..>
-- > > pack (fillList $ [1..] :: Word32) 131
-- > .. <.. not enough space ..>
--
fillList :: Storable storable => [storable] -> Packer ()
fillList [] = return ()
fillList (x:xs) = putStorable x >> fillList xs
------------------------------------------------------------------------------
-- Common packer --
------------------------------------------------------------------------------
-- | put Word8 in the current position in the stream
putWord8 :: Word8 -> Packer ()
putWord8 = putStorable
{-# INLINE putWord8 #-}
-- | put Word16 in the current position in the stream
-- /!\ use Host Endianness
putWord16 :: Word16 -> Packer ()
putWord16 = putStorable
{-# INLINE putWord16 #-}
-- | put Word32 in the current position in the stream
-- /!\ use Host Endianness
putWord32 :: Word32 -> Packer ()
putWord32 = putStorable
{-# INLINE putWord32 #-}

View file

@ -0,0 +1,88 @@
-- |
-- Module : Data.ByteArray.Pack.Internal
-- License : BSD-Style
-- Copyright : Copyright © 2014 Nicolas DI PRIMA
--
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : experimental
-- Portability : unknown
--
module Data.ByteArray.Pack.Internal
( Result(..)
, Packer(..)
, actionPacker
, actionPackerWithRemain
) where
import Foreign.Ptr (Ptr)
import Data.ByteArray.MemView
import Data.Memory.Internal.Imports
-- | Packing result:
--
-- * PackerMore: the next state of Packing with an arbitrary value
-- * PackerFail: an error happened
data Result a =
PackerMore a MemView
| PackerFail String
deriving (Show)
-- | Simple ByteArray Packer
newtype Packer a = Packer { runPacker_ :: MemView -> IO (Result a) }
instance Functor Packer where
fmap = fmapPacker
instance Applicative Packer where
pure = returnPacker
(<*>) = appendPacker
instance Monad Packer where
return = pure
(>>=) = bindPacker
fmapPacker :: (a -> b) -> Packer a -> Packer b
fmapPacker f p = Packer $ \cache -> do
rv <- runPacker_ p cache
return $ case rv of
PackerMore v cache' -> PackerMore (f v) cache'
PackerFail err -> PackerFail err
{-# INLINE fmapPacker #-}
returnPacker :: a -> Packer a
returnPacker v = Packer $ \cache -> return $ PackerMore v cache
{-# INLINE returnPacker #-}
bindPacker :: Packer a -> (a -> Packer b) -> Packer b
bindPacker p fp = Packer $ \cache -> do
rv <- runPacker_ p cache
case rv of
PackerMore v cache' -> runPacker_ (fp v) cache'
PackerFail err -> return $ PackerFail err
{-# INLINE bindPacker #-}
appendPacker :: Packer (a -> b) -> Packer a -> Packer b
appendPacker p1f p2 = p1f >>= \p1 -> p2 >>= \v -> return (p1 v)
{-# INLINE appendPacker #-}
-- | run a sized action
actionPacker :: Int -> (Ptr Word8 -> IO a) -> Packer a
actionPacker s action = Packer $ \m@(MemView ptr size) ->
case compare size s of
LT -> return $ PackerFail "Not enough space in destination"
_ -> do
v <- action ptr
return $ PackerMore v (m `memViewPlus` s)
{-# INLINE actionPacker #-}
-- | run a sized action
actionPackerWithRemain :: Int -> (Ptr Word8 -> Int -> IO (Int, a)) -> Packer a
actionPackerWithRemain s action = Packer $ \m@(MemView ptr size) ->
case compare size s of
LT -> return $ PackerFail "Not enough space in destination"
_ -> do
(remain, v) <- action ptr size
return $ if remain > s
then PackerFail "remaining bytes higher than the destination's size"
else PackerMore v (m `memViewPlus` (s+remain))
{-# INLINE actionPackerWithRemain #-}

View file

@ -0,0 +1,258 @@
-- |
-- Module : Data.ByteArray.Parse
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : experimental
-- Portability : portable
--
-- A very simple bytearray parser related to Parsec and Attoparsec
--
-- Simple example:
--
-- > > parse ((,,) <$> take 2 <*> byte 0x20 <*> (bytes "abc" *> anyByte)) "xx abctest"
-- > ParseOK "est" ("xx", 116)
--
{-# LANGUAGE CPP #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE OverloadedStrings #-}
module Data.ByteArray.Parse
( Parser
, Result(..)
-- * run the Parser
, parse
, parseFeed
-- * Parser methods
, hasMore
, byte
, anyByte
, bytes
, take
, takeWhile
, takeAll
, skip
, skipWhile
, skipAll
, takeStorable
) where
import Control.Monad
import qualified Control.Monad.Fail as Fail
import Foreign.Storable (Storable, peek, sizeOf)
import Data.Word
import Data.Memory.Internal.Imports
import Data.Memory.Internal.Compat
import Data.ByteArray.Types (ByteArrayAccess, ByteArray)
import qualified Data.ByteArray.Types as B
import qualified Data.ByteArray.Methods as B
import Prelude hiding (take, takeWhile)
-- | Simple parsing result, that represent respectively:
--
-- * failure: with the error message
--
-- * continuation: that need for more input data
--
-- * success: the remaining unparsed data and the parser value
data Result byteArray a =
ParseFail String
| ParseMore (Maybe byteArray -> Result byteArray a)
| ParseOK byteArray a
instance (Show ba, Show a) => Show (Result ba a) where
show (ParseFail err) = "ParseFailure: " ++ err
show (ParseMore _) = "ParseMore _"
show (ParseOK b a) = "ParseOK " ++ show a ++ " " ++ show b
-- | The continuation of the current buffer, and the error string
type Failure byteArray r = byteArray -> String -> Result byteArray r
-- | The continuation of the next buffer value, and the parsed value
type Success byteArray a r = byteArray -> a -> Result byteArray r
-- | Simple ByteString parser structure
newtype Parser byteArray a = Parser
{ runParser :: forall r . byteArray
-> Failure byteArray r
-> Success byteArray a r
-> Result byteArray r }
instance Functor (Parser byteArray) where
fmap f p = Parser $ \buf err ok ->
runParser p buf err (\b a -> ok b (f a))
instance Applicative (Parser byteArray) where
pure v = Parser $ \buf _ ok -> ok buf v
(<*>) d e = d >>= \b -> e >>= \a -> return (b a)
instance Monad (Parser byteArray) where
#if !(MIN_VERSION_base(4,13,0))
fail = Fail.fail
#endif
return = pure
m >>= k = Parser $ \buf err ok ->
runParser m buf err (\buf' a -> runParser (k a) buf' err ok)
instance Fail.MonadFail (Parser byteArray) where
fail errorMsg = Parser $ \buf err _ -> err buf ("Parser failed: " ++ errorMsg)
instance MonadPlus (Parser byteArray) where
mzero = fail "MonadPlus.mzero"
mplus f g = Parser $ \buf err ok ->
-- rewrite the err callback of @f to call @g
runParser f buf (\_ _ -> runParser g buf err ok) ok
instance Alternative (Parser byteArray) where
empty = fail "Alternative.empty"
(<|>) = mplus
-- | Run a parser on an @initial byteArray.
--
-- If the Parser need more data than available, the @feeder function
-- is automatically called and fed to the More continuation.
parseFeed :: (ByteArrayAccess byteArray, Monad m)
=> m (Maybe byteArray)
-> Parser byteArray a
-> byteArray
-> m (Result byteArray a)
parseFeed feeder p initial = loop $ parse p initial
where loop (ParseMore k) = feeder >>= (loop . k)
loop r = return r
-- | Run a Parser on a ByteString and return a 'Result'
parse :: ByteArrayAccess byteArray
=> Parser byteArray a -> byteArray -> Result byteArray a
parse p s = runParser p s (\_ msg -> ParseFail msg) (\b a -> ParseOK b a)
------------------------------------------------------------
-- When needing more data, getMore append the next data
-- to the current buffer. if no further data, then
-- the err callback is called.
getMore :: ByteArray byteArray => Parser byteArray ()
getMore = Parser $ \buf err ok -> ParseMore $ \nextChunk ->
case nextChunk of
Nothing -> err buf "EOL: need more data"
Just nc
| B.null nc -> runParser getMore buf err ok
| otherwise -> ok (B.append buf nc) ()
-- Only used by takeAll, which accumulate all the remaining data
-- until ParseMore is fed a Nothing value.
--
-- getAll cannot fail.
getAll :: ByteArray byteArray => Parser byteArray ()
getAll = Parser $ \buf err ok -> ParseMore $ \nextChunk ->
case nextChunk of
Nothing -> ok buf ()
Just nc -> runParser getAll (B.append buf nc) err ok
-- Only used by skipAll, which flush all the remaining data
-- until ParseMore is fed a Nothing value.
--
-- flushAll cannot fail.
flushAll :: ByteArray byteArray => Parser byteArray ()
flushAll = Parser $ \buf err ok -> ParseMore $ \nextChunk ->
case nextChunk of
Nothing -> ok buf ()
Just _ -> runParser flushAll B.empty err ok
------------------------------------------------------------
hasMore :: ByteArray byteArray => Parser byteArray Bool
hasMore = Parser $ \buf err ok ->
if B.null buf
then ParseMore $ \nextChunk ->
case nextChunk of
Nothing -> ok buf False
Just nc -> runParser hasMore nc err ok
else ok buf True
-- | Get the next byte from the parser
anyByte :: ByteArray byteArray => Parser byteArray Word8
anyByte = Parser $ \buf err ok ->
case B.uncons buf of
Nothing -> runParser (getMore >> anyByte) buf err ok
Just (c1,b2) -> ok b2 c1
-- | Parse a specific byte at current position
--
-- if the byte is different than the expected on,
-- this parser will raise a failure.
byte :: ByteArray byteArray => Word8 -> Parser byteArray ()
byte w = Parser $ \buf err ok ->
case B.uncons buf of
Nothing -> runParser (getMore >> byte w) buf err ok
Just (c1,b2) | c1 == w -> ok b2 ()
| otherwise -> err buf ("byte " ++ show w ++ " : failed : got " ++ show c1)
-- | Parse a sequence of bytes from current position
--
-- if the following bytes don't match the expected
-- bytestring completely, the parser will raise a failure
bytes :: (Show ba, Eq ba, ByteArray ba) => ba -> Parser ba ()
bytes allExpected = consumeEq allExpected
where errMsg = "bytes " ++ show allExpected ++ " : failed"
-- partially consume as much as possible or raise an error.
consumeEq expected = Parser $ \actual err ok ->
let eLen = B.length expected in
if B.length actual >= eLen
then -- enough data for doing a full match
let (aMatch,aRem) = B.splitAt eLen actual
in if aMatch == expected
then ok aRem ()
else err actual errMsg
else -- not enough data, match as much as we have, and then recurse.
let (eMatch, eRem) = B.splitAt (B.length actual) expected
in if actual == eMatch
then runParser (getMore >> consumeEq eRem) B.empty err ok
else err actual errMsg
------------------------------------------------------------
-- | Take a storable from the current position in the stream
takeStorable :: (ByteArray byteArray, Storable d)
=> Parser byteArray d
takeStorable = anyStorable undefined
where
anyStorable :: ByteArray byteArray => Storable d => d -> Parser byteArray d
anyStorable a = do
buf <- take (sizeOf a)
return $ unsafeDoIO $ B.withByteArray buf $ \ptr -> peek ptr
-- | Take @n bytes from the current position in the stream
take :: ByteArray byteArray => Int -> Parser byteArray byteArray
take n = Parser $ \buf err ok ->
if B.length buf >= n
then let (b1,b2) = B.splitAt n buf in ok b2 b1
else runParser (getMore >> take n) buf err ok
-- | Take bytes while the @predicate hold from the current position in the stream
takeWhile :: ByteArray byteArray => (Word8 -> Bool) -> Parser byteArray byteArray
takeWhile predicate = Parser $ \buf err ok ->
let (b1, b2) = B.span predicate buf
in if B.null b2
then runParser (getMore >> takeWhile predicate) buf err ok
else ok b2 b1
-- | Take the remaining bytes from the current position in the stream
takeAll :: ByteArray byteArray => Parser byteArray byteArray
takeAll = Parser $ \buf err ok ->
runParser (getAll >> returnBuffer) buf err ok
where
returnBuffer = Parser $ \buf _ ok -> ok B.empty buf
-- | Skip @n bytes from the current position in the stream
skip :: ByteArray byteArray => Int -> Parser byteArray ()
skip n = Parser $ \buf err ok ->
if B.length buf >= n
then ok (B.drop n buf) ()
else runParser (getMore >> skip (n - B.length buf)) B.empty err ok
-- | Skip bytes while the @predicate hold from the current position in the stream
skipWhile :: ByteArray byteArray => (Word8 -> Bool) -> Parser byteArray ()
skipWhile p = Parser $ \buf err ok ->
let (_, b2) = B.span p buf
in if B.null b2
then runParser (getMore >> skipWhile p) B.empty err ok
else ok b2 ()
-- | Skip all the remaining bytes from the current position in the stream
skipAll :: ByteArray byteArray => Parser byteArray ()
skipAll = Parser $ \buf err ok -> runParser flushAll buf err ok

View file

@ -0,0 +1,205 @@
-- |
-- Module : Data.ByteArray.ScrubbedBytes
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : Stable
-- Portability : GHC
--
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
module Data.ByteArray.ScrubbedBytes
( ScrubbedBytes
) where
import GHC.Types
import GHC.Prim
import GHC.Ptr
import GHC.Word
#if MIN_VERSION_base(4,15,0)
import GHC.Exts (unsafeCoerce#)
#endif
#if MIN_VERSION_base(4,9,0)
import Data.Semigroup
import Data.Foldable (toList)
#else
import Data.Monoid
#endif
import Data.String (IsString(..))
import Data.Typeable
import Data.Memory.PtrMethods
import Data.Memory.Internal.CompatPrim
import Data.Memory.Internal.Compat (unsafeDoIO)
import Data.Memory.Internal.Imports
import Data.ByteArray.Types
import Foreign.Storable
#ifdef MIN_VERSION_basement
import Basement.NormalForm
#endif
-- | ScrubbedBytes is a memory chunk which have the properties of:
--
-- * Being scrubbed after its goes out of scope.
--
-- * A Show instance that doesn't actually show any content
--
-- * A Eq instance that is constant time
--
data ScrubbedBytes = ScrubbedBytes (MutableByteArray# RealWorld)
deriving (Typeable)
instance Show ScrubbedBytes where
show _ = "<scrubbed-bytes>"
instance Eq ScrubbedBytes where
(==) = scrubbedBytesEq
instance Ord ScrubbedBytes where
compare = scrubbedBytesCompare
#if MIN_VERSION_base(4,9,0)
instance Semigroup ScrubbedBytes where
b1 <> b2 = unsafeDoIO $ scrubbedBytesAppend b1 b2
sconcat = unsafeDoIO . scrubbedBytesConcat . toList
#endif
instance Monoid ScrubbedBytes where
mempty = unsafeDoIO (newScrubbedBytes 0)
#if !(MIN_VERSION_base(4,11,0))
mappend b1 b2 = unsafeDoIO $ scrubbedBytesAppend b1 b2
mconcat = unsafeDoIO . scrubbedBytesConcat
#endif
instance NFData ScrubbedBytes where
rnf b = b `seq` ()
#ifdef MIN_VERSION_basement
instance NormalForm ScrubbedBytes where
toNormalForm b = b `seq` ()
#endif
instance IsString ScrubbedBytes where
fromString = scrubbedFromChar8
instance ByteArrayAccess ScrubbedBytes where
length = sizeofScrubbedBytes
withByteArray = withPtr
instance ByteArray ScrubbedBytes where
allocRet = scrubbedBytesAllocRet
newScrubbedBytes :: Int -> IO ScrubbedBytes
newScrubbedBytes (I# sz)
| booleanPrim (sz <# 0#) = error "ScrubbedBytes: size must be >= 0"
| booleanPrim (sz ==# 0#) = IO $ \s ->
case newAlignedPinnedByteArray# 0# 8# s of
(# s2, mba #) -> (# s2, ScrubbedBytes mba #)
| otherwise = IO $ \s ->
case newAlignedPinnedByteArray# sz 8# s of
(# s1, mbarr #) ->
let !scrubber = getScrubber (byteArrayContents# (unsafeCoerce# mbarr))
!mba = ScrubbedBytes mbarr
in case mkWeak# mbarr () (finalize scrubber mba) s1 of
(# s2, _ #) -> (# s2, mba #)
where
getScrubber :: Addr# -> State# RealWorld -> State# RealWorld
getScrubber addr s =
let IO scrubBytes = memSet (Ptr addr) 0 (I# sz)
in case scrubBytes s of
(# s', _ #) -> s'
#if __GLASGOW_HASKELL__ >= 800
finalize :: (State# RealWorld -> State# RealWorld) -> ScrubbedBytes -> State# RealWorld -> (# State# RealWorld, () #)
finalize scrubber mba@(ScrubbedBytes _) = \s1 ->
case scrubber s1 of
s2 -> case touch# mba s2 of
s3 -> (# s3, () #)
#else
finalize :: (State# RealWorld -> State# RealWorld) -> ScrubbedBytes -> IO ()
finalize scrubber mba@(ScrubbedBytes _) = IO $ \s1 -> do
case scrubber s1 of
s2 -> case touch# mba s2 of
s3 -> (# s3, () #)
#endif
scrubbedBytesAllocRet :: Int -> (Ptr p -> IO a) -> IO (a, ScrubbedBytes)
scrubbedBytesAllocRet sz f = do
ba <- newScrubbedBytes sz
r <- withPtr ba f
return (r, ba)
scrubbedBytesAlloc :: Int -> (Ptr p -> IO ()) -> IO ScrubbedBytes
scrubbedBytesAlloc sz f = do
ba <- newScrubbedBytes sz
withPtr ba f
return ba
scrubbedBytesConcat :: [ScrubbedBytes] -> IO ScrubbedBytes
scrubbedBytesConcat l = scrubbedBytesAlloc retLen (copy l)
where
retLen = sum $ map sizeofScrubbedBytes l
copy [] _ = return ()
copy (x:xs) dst = do
withPtr x $ \src -> memCopy dst src chunkLen
copy xs (dst `plusPtr` chunkLen)
where
chunkLen = sizeofScrubbedBytes x
scrubbedBytesAppend :: ScrubbedBytes -> ScrubbedBytes -> IO ScrubbedBytes
scrubbedBytesAppend b1 b2 = scrubbedBytesAlloc retLen $ \dst -> do
withPtr b1 $ \s1 -> memCopy dst s1 len1
withPtr b2 $ \s2 -> memCopy (dst `plusPtr` len1) s2 len2
where
len1 = sizeofScrubbedBytes b1
len2 = sizeofScrubbedBytes b2
retLen = len1 + len2
sizeofScrubbedBytes :: ScrubbedBytes -> Int
sizeofScrubbedBytes (ScrubbedBytes mba) = I# (sizeofMutableByteArray# mba)
withPtr :: ScrubbedBytes -> (Ptr p -> IO a) -> IO a
withPtr b@(ScrubbedBytes mba) f = do
a <- f (Ptr (byteArrayContents# (unsafeCoerce# mba)))
touchScrubbedBytes b
return a
touchScrubbedBytes :: ScrubbedBytes -> IO ()
touchScrubbedBytes (ScrubbedBytes mba) = IO $ \s -> case touch# mba s of s' -> (# s', () #)
scrubbedBytesEq :: ScrubbedBytes -> ScrubbedBytes -> Bool
scrubbedBytesEq a b
| l1 /= l2 = False
| otherwise = unsafeDoIO $ withPtr a $ \p1 -> withPtr b $ \p2 -> memConstEqual p1 p2 l1
where
l1 = sizeofScrubbedBytes a
l2 = sizeofScrubbedBytes b
scrubbedBytesCompare :: ScrubbedBytes -> ScrubbedBytes -> Ordering
scrubbedBytesCompare b1@(ScrubbedBytes m1) b2@(ScrubbedBytes m2) = unsafeDoIO $ loop 0
where
!l1 = sizeofScrubbedBytes b1
!l2 = sizeofScrubbedBytes b2
!len = min l1 l2
loop !i
| i == len =
if l1 == l2
then pure EQ
else if l1 > l2 then pure GT
else pure LT
| otherwise = do
e1 <- read8 m1 i
e2 <- read8 m2 i
if e1 == e2
then loop (i+1)
else if e1 < e2 then pure LT
else pure GT
read8 m (I# i) = IO $ \s -> case readWord8Array# m i s of
(# s2, e #) -> (# s2, W8# e #)
scrubbedFromChar8 :: [Char] -> ScrubbedBytes
scrubbedFromChar8 l = unsafeDoIO $ scrubbedBytesAlloc len (fill l)
where
len = Prelude.length l
fill :: [Char] -> Ptr Word8 -> IO ()
fill [] _ = return ()
fill (x:xs) !p = poke p (fromIntegral $ fromEnum x) >> fill xs (p `plusPtr` 1)

View file

@ -0,0 +1,398 @@
-- |
-- Module : Data.ByteArray.Sized
-- License : BSD-style
-- Maintainer : Nicolas Di Prima <nicolas@primetype.co.uk>
-- Stability : stable
-- Portability : Good
--
{-# LANGUAGE CPP #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE UndecidableInstances #-}
#if __GLASGOW_HASKELL__ >= 806
{-# LANGUAGE NoStarIsType #-}
#endif
module Data.ByteArray.Sized
( ByteArrayN(..)
, SizedByteArray
, unSizedByteArray
, sizedByteArray
, unsafeSizedByteArray
, -- * ByteArrayN operators
alloc
, create
, allocAndFreeze
, unsafeCreate
, inlineUnsafeCreate
, empty
, pack
, unpack
, cons
, snoc
, xor
, index
, splitAt
, take
, drop
, append
, copy
, copyRet
, copyAndFreeze
, replicate
, zero
, convert
, fromByteArrayAccess
, unsafeFromByteArrayAccess
) where
import Basement.Imports
import Basement.NormalForm
import Basement.Nat
import Basement.Numerical.Additive ((+))
import Basement.Numerical.Subtractive ((-))
import Basement.Sized.List (ListN, unListN, toListN)
import Foreign.Storable
import Foreign.Ptr
import Data.Maybe (fromMaybe)
import Data.Memory.Internal.Compat
import Data.Memory.PtrMethods
import Data.Proxy (Proxy(..))
import Data.ByteArray.Types (ByteArrayAccess(..), ByteArray)
import qualified Data.ByteArray.Types as ByteArray (allocRet)
#if MIN_VERSION_basement(0,0,7)
import Basement.BlockN (BlockN)
import qualified Basement.BlockN as BlockN
import qualified Basement.PrimType as Base
import Basement.Types.OffsetSize (Countable)
#endif
-- | Type class to emulate exactly the behaviour of 'ByteArray' but with
-- a known length at compile time
--
class (ByteArrayAccess c, KnownNat n) => ByteArrayN (n :: Nat) c | c -> n where
-- | just like 'allocRet' but with the size at the type level
allocRet :: forall p a
. Proxy n
-> (Ptr p -> IO a)
-> IO (a, c)
-- | Wrapper around any collection type with the size as type parameter
--
newtype SizedByteArray (n :: Nat) ba = SizedByteArray { unSizedByteArray :: ba }
deriving (Eq, Show, Typeable, Ord, NormalForm)
-- | create a 'SizedByteArray' from the given 'ByteArrayAccess' if the
-- size is the same as the target size.
--
sizedByteArray :: forall n ba . (KnownNat n, ByteArrayAccess ba)
=> ba
-> Maybe (SizedByteArray n ba)
sizedByteArray ba
| length ba == n = Just $ SizedByteArray ba
| otherwise = Nothing
where
n = fromInteger $ natVal (Proxy @n)
-- | just like the 'sizedByteArray' function but throw an exception if
-- the size is invalid.
unsafeSizedByteArray :: forall n ba . (ByteArrayAccess ba, KnownNat n) => ba -> SizedByteArray n ba
unsafeSizedByteArray = fromMaybe (error "The size is invalid") . sizedByteArray
instance (ByteArrayAccess ba, KnownNat n) => ByteArrayAccess (SizedByteArray n ba) where
length _ = fromInteger $ natVal (Proxy @n)
withByteArray (SizedByteArray ba) = withByteArray ba
instance (KnownNat n, ByteArray ba) => ByteArrayN n (SizedByteArray n ba) where
allocRet p f = do
(a, ba) <- ByteArray.allocRet n f
pure (a, SizedByteArray ba)
where
n = fromInteger $ natVal p
#if MIN_VERSION_basement(0,0,7)
instance ( ByteArrayAccess (BlockN n ty)
, PrimType ty
, KnownNat n
, Countable ty n
, KnownNat nbytes
, nbytes ~ (Base.PrimSize ty * n)
) => ByteArrayN nbytes (BlockN n ty) where
allocRet _ f = do
mba <- BlockN.new @n
a <- BlockN.withMutablePtrHint True False mba (f . castPtr)
ba <- BlockN.freeze mba
return (a, ba)
#endif
-- | Allocate a new bytearray of specific size, and run the initializer on this memory
alloc :: forall n ba p . (ByteArrayN n ba, KnownNat n)
=> (Ptr p -> IO ())
-> IO ba
alloc f = snd <$> allocRet (Proxy @n) f
-- | Allocate a new bytearray of specific size, and run the initializer on this memory
create :: forall n ba p . (ByteArrayN n ba, KnownNat n)
=> (Ptr p -> IO ())
-> IO ba
create = alloc @n
{-# NOINLINE create #-}
-- | similar to 'allocN' but hide the allocation and initializer in a pure context
allocAndFreeze :: forall n ba p . (ByteArrayN n ba, KnownNat n)
=> (Ptr p -> IO ()) -> ba
allocAndFreeze f = unsafeDoIO (alloc @n f)
{-# NOINLINE allocAndFreeze #-}
-- | similar to 'createN' but hide the allocation and initializer in a pure context
unsafeCreate :: forall n ba p . (ByteArrayN n ba, KnownNat n)
=> (Ptr p -> IO ()) -> ba
unsafeCreate f = unsafeDoIO (alloc @n f)
{-# NOINLINE unsafeCreate #-}
inlineUnsafeCreate :: forall n ba p . (ByteArrayN n ba, KnownNat n)
=> (Ptr p -> IO ()) -> ba
inlineUnsafeCreate f = unsafeDoIO (alloc @n f)
{-# INLINE inlineUnsafeCreate #-}
-- | Create an empty byte array
empty :: forall ba . ByteArrayN 0 ba => ba
empty = unsafeDoIO (alloc @0 $ \_ -> return ())
-- | Pack a list of bytes into a bytearray
pack :: forall n ba . (ByteArrayN n ba, KnownNat n) => ListN n Word8 -> ba
pack l = inlineUnsafeCreate @n (fill $ unListN l)
where fill [] _ = return ()
fill (x:xs) !p = poke p x >> fill xs (p `plusPtr` 1)
{-# INLINE fill #-}
{-# NOINLINE pack #-}
-- | Un-pack a bytearray into a list of bytes
unpack :: forall n ba
. (ByteArrayN n ba, KnownNat n, NatWithinBound Int n, ByteArrayAccess ba)
=> ba -> ListN n Word8
unpack bs = fromMaybe (error "the impossible appened") $ toListN @n $ loop 0
where !len = length bs
loop i
| i == len = []
| otherwise =
let !v = unsafeDoIO $ withByteArray bs (`peekByteOff` i)
in v : loop (i+1)
-- | prepend a single byte to a byte array
cons :: forall ni no bi bo
. ( ByteArrayN ni bi, ByteArrayN no bo, ByteArrayAccess bi
, KnownNat ni, KnownNat no
, (ni + 1) ~ no
)
=> Word8 -> bi -> bo
cons b ba = unsafeCreate @no $ \d -> withByteArray ba $ \s -> do
pokeByteOff d 0 b
memCopy (d `plusPtr` 1) s len
where
!len = fromInteger $ natVal (Proxy @ni)
-- | append a single byte to a byte array
snoc :: forall bi bo ni no
. ( ByteArrayN ni bi, ByteArrayN no bo, ByteArrayAccess bi
, KnownNat ni, KnownNat no
, (ni + 1) ~ no
)
=> bi -> Word8 -> bo
snoc ba b = unsafeCreate @no $ \d -> withByteArray ba $ \s -> do
memCopy d s len
pokeByteOff d len b
where
!len = fromInteger $ natVal (Proxy @ni)
-- | Create a xor of bytes between a and b.
--
-- the returns byte array is the size of the smallest input.
xor :: forall n a b c
. ( ByteArrayN n a, ByteArrayN n b, ByteArrayN n c
, ByteArrayAccess a, ByteArrayAccess b
, KnownNat n
)
=> a -> b -> c
xor a b =
unsafeCreate @n $ \pc ->
withByteArray a $ \pa ->
withByteArray b $ \pb ->
memXor pc pa pb n
where
n = fromInteger (natVal (Proxy @n))
-- | return a specific byte indexed by a number from 0 in a bytearray
--
-- unsafe, no bound checking are done
index :: forall n na ba
. ( ByteArrayN na ba, ByteArrayAccess ba
, KnownNat na, KnownNat n
, n <= na
)
=> ba -> Proxy n -> Word8
index b pi = unsafeDoIO $ withByteArray b $ \p -> peek (p `plusPtr` i)
where
i = fromInteger $ natVal pi
-- | Split a bytearray at a specific length in two bytearray
splitAt :: forall nblhs nbi nbrhs bi blhs brhs
. ( ByteArrayN nbi bi, ByteArrayN nblhs blhs, ByteArrayN nbrhs brhs
, ByteArrayAccess bi
, KnownNat nbi, KnownNat nblhs, KnownNat nbrhs
, nblhs <= nbi, (nbrhs + nblhs) ~ nbi
)
=> bi -> (blhs, brhs)
splitAt bs = unsafeDoIO $
withByteArray bs $ \p -> do
b1 <- alloc @nblhs $ \r -> memCopy r p n
b2 <- alloc @nbrhs $ \r -> memCopy r (p `plusPtr` n) (len - n)
return (b1, b2)
where
n = fromInteger $ natVal (Proxy @nblhs)
len = length bs
-- | Take the first @n@ byte of a bytearray
take :: forall nbo nbi bi bo
. ( ByteArrayN nbi bi, ByteArrayN nbo bo
, ByteArrayAccess bi
, KnownNat nbi, KnownNat nbo
, nbo <= nbi
)
=> bi -> bo
take bs = unsafeCreate @nbo $ \d -> withByteArray bs $ \s -> memCopy d s m
where
!m = min len n
!len = length bs
!n = fromInteger $ natVal (Proxy @nbo)
-- | drop the first @n@ byte of a bytearray
drop :: forall n nbi nbo bi bo
. ( ByteArrayN nbi bi, ByteArrayN nbo bo
, ByteArrayAccess bi
, KnownNat n, KnownNat nbi, KnownNat nbo
, (nbo + n) ~ nbi
)
=> Proxy n -> bi -> bo
drop pn bs = unsafeCreate @nbo $ \d ->
withByteArray bs $ \s ->
memCopy d (s `plusPtr` ofs) nb
where
ofs = min len n
nb = len - ofs
len = length bs
n = fromInteger $ natVal pn
-- | append one bytearray to the other
append :: forall nblhs nbrhs nbout blhs brhs bout
. ( ByteArrayN nblhs blhs, ByteArrayN nbrhs brhs, ByteArrayN nbout bout
, ByteArrayAccess blhs, ByteArrayAccess brhs
, KnownNat nblhs, KnownNat nbrhs, KnownNat nbout
, (nbrhs + nblhs) ~ nbout
)
=> blhs -> brhs -> bout
append blhs brhs = unsafeCreate @nbout $ \p ->
withByteArray blhs $ \plhs ->
withByteArray brhs $ \prhs -> do
memCopy p plhs (length blhs)
memCopy (p `plusPtr` length blhs) prhs (length brhs)
-- | Duplicate a bytearray into another bytearray, and run an initializer on it
copy :: forall n bs1 bs2 p
. ( ByteArrayN n bs1, ByteArrayN n bs2
, ByteArrayAccess bs1
, KnownNat n
)
=> bs1 -> (Ptr p -> IO ()) -> IO bs2
copy bs f = alloc @n $ \d -> do
withByteArray bs $ \s -> memCopy d s (length bs)
f (castPtr d)
-- | Similar to 'copy' but also provide a way to return a value from the initializer
copyRet :: forall n bs1 bs2 p a
. ( ByteArrayN n bs1, ByteArrayN n bs2
, ByteArrayAccess bs1
, KnownNat n
)
=> bs1 -> (Ptr p -> IO a) -> IO (a, bs2)
copyRet bs f =
allocRet (Proxy @n) $ \d -> do
withByteArray bs $ \s -> memCopy d s (length bs)
f (castPtr d)
-- | Similiar to 'copy' but expect the resulting bytearray in a pure context
copyAndFreeze :: forall n bs1 bs2 p
. ( ByteArrayN n bs1, ByteArrayN n bs2
, ByteArrayAccess bs1
, KnownNat n
)
=> bs1 -> (Ptr p -> IO ()) -> bs2
copyAndFreeze bs f =
inlineUnsafeCreate @n $ \d -> do
copyByteArrayToPtr bs d
f (castPtr d)
{-# NOINLINE copyAndFreeze #-}
-- | Create a bytearray of a specific size containing a repeated byte value
replicate :: forall n ba . (ByteArrayN n ba, KnownNat n)
=> Word8 -> ba
replicate b = inlineUnsafeCreate @n $ \ptr -> memSet ptr b (fromInteger $ natVal $ Proxy @n)
{-# NOINLINE replicate #-}
-- | Create a bytearray of a specific size initialized to 0
zero :: forall n ba . (ByteArrayN n ba, KnownNat n) => ba
zero = unsafeCreate @n $ \ptr -> memSet ptr 0 (fromInteger $ natVal $ Proxy @n)
{-# NOINLINE zero #-}
-- | Convert a bytearray to another type of bytearray
convert :: forall n bin bout
. ( ByteArrayN n bin, ByteArrayN n bout
, KnownNat n
)
=> bin -> bout
convert bs = inlineUnsafeCreate @n (copyByteArrayToPtr bs)
-- | Convert a ByteArrayAccess to another type of bytearray
--
-- This function returns nothing if the size is not compatible
fromByteArrayAccess :: forall n bin bout
. ( ByteArrayAccess bin, ByteArrayN n bout
, KnownNat n
)
=> bin -> Maybe bout
fromByteArrayAccess bs
| l == n = Just $ inlineUnsafeCreate @n (copyByteArrayToPtr bs)
| otherwise = Nothing
where
l = length bs
n = fromInteger $ natVal (Proxy @n)
-- | Convert a ByteArrayAccess to another type of bytearray
unsafeFromByteArrayAccess :: forall n bin bout
. ( ByteArrayAccess bin, ByteArrayN n bout
, KnownNat n
)
=> bin -> bout
unsafeFromByteArrayAccess bs = case fromByteArrayAccess @n @bin @bout bs of
Nothing -> error "Invalid Size"
Just v -> v

View file

@ -0,0 +1,133 @@
-- |
-- Module : Data.ByteArray.Types
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : stable
-- Portability : Good
--
{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}
module Data.ByteArray.Types
( ByteArrayAccess(..)
, ByteArray(..)
) where
import Foreign.Ptr
import Data.Monoid
#ifdef WITH_BYTESTRING_SUPPORT
import qualified Data.ByteString as Bytestring (length)
import qualified Data.ByteString.Internal as Bytestring
import Foreign.ForeignPtr (withForeignPtr)
#endif
import Data.Memory.PtrMethods (memCopy)
import Data.Proxy (Proxy(..))
import Data.Word (Word8)
import qualified Basement.Types.OffsetSize as Base
import qualified Basement.UArray as Base
import qualified Basement.String as Base (String, toBytes, Encoding(UTF8))
import qualified Basement.UArray.Mutable as BaseMutable (withMutablePtrHint)
import qualified Basement.Block as Block
import qualified Basement.Block.Mutable as Block
import Basement.Nat
import qualified Basement.Sized.Block as BlockN
import Prelude hiding (length)
-- | Class to Access size properties and data of a ByteArray
class ByteArrayAccess ba where
-- | Return the length in bytes of a bytearray
length :: ba -> Int
-- | Allow to use using a pointer
withByteArray :: ba -> (Ptr p -> IO a) -> IO a
-- | Copy the data of a bytearray to a ptr
copyByteArrayToPtr :: ba -> Ptr p -> IO ()
copyByteArrayToPtr a dst = withByteArray a $ \src -> memCopy (castPtr dst) src (length a)
-- | Class to allocate new ByteArray of specific size
class (Eq ba, Ord ba, Monoid ba, ByteArrayAccess ba) => ByteArray ba where
-- | allocate `n` bytes and perform the given operation
allocRet :: Int
-- ^ number of bytes to allocate. i.e. might not match the
-- size of the given type `ba`.
-> (Ptr p -> IO a)
-> IO (a, ba)
#ifdef WITH_BYTESTRING_SUPPORT
instance ByteArrayAccess Bytestring.ByteString where
length = Bytestring.length
withByteArray (Bytestring.PS fptr off _) f = withForeignPtr fptr $ \ptr -> f $! (ptr `plusPtr` off)
instance ByteArray Bytestring.ByteString where
allocRet sz f = do
fptr <- Bytestring.mallocByteString sz
r <- withForeignPtr fptr (f . castPtr)
return (r, Bytestring.PS fptr 0 sz)
#endif
#ifdef WITH_BASEMENT_SUPPORT
baseBlockRecastW8 :: Base.PrimType ty => Block.Block ty -> Block.Block Word8
baseBlockRecastW8 = Block.unsafeCast -- safe with Word8 destination
instance Base.PrimType ty => ByteArrayAccess (Block.Block ty) where
length a = let Base.CountOf i = Block.length (baseBlockRecastW8 a) in i
withByteArray a f = Block.withPtr (baseBlockRecastW8 a) (f . castPtr)
copyByteArrayToPtr ba dst = do
mb <- Block.unsafeThaw (baseBlockRecastW8 ba)
Block.copyToPtr mb 0 (castPtr dst) (Block.length $ baseBlockRecastW8 ba)
instance (KnownNat n, Base.PrimType ty, Base.Countable ty n) => ByteArrayAccess (BlockN.BlockN n ty) where
length a = let Base.CountOf i = BlockN.lengthBytes a in i
withByteArray a f = BlockN.withPtr a (f . castPtr)
copyByteArrayToPtr bna = copyByteArrayToPtr (BlockN.toBlock bna)
baseUarrayRecastW8 :: Base.PrimType ty => Base.UArray ty -> Base.UArray Word8
baseUarrayRecastW8 = Base.recast
instance Base.PrimType ty => ByteArrayAccess (Base.UArray ty) where
length a = let Base.CountOf i = Base.length (baseUarrayRecastW8 a) in i
withByteArray a f = Base.withPtr (baseUarrayRecastW8 a) (f . castPtr)
copyByteArrayToPtr ba dst = Base.copyToPtr ba (castPtr dst)
instance ByteArrayAccess Base.String where
length str = let Base.CountOf i = Base.length bytes in i
where
-- the Foundation's length return a number of elements not a number of
-- bytes. For @ByteArrayAccess@, because we are using an @Int@, we
-- didn't see that we were returning the wrong @CountOf@.
bytes = Base.toBytes Base.UTF8 str
withByteArray s f = withByteArray (Base.toBytes Base.UTF8 s) f
instance (Ord ty, Base.PrimType ty) => ByteArray (Block.Block ty) where
allocRet sz f = do
mba <- Block.new $ sizeRecastBytes sz Proxy
a <- Block.withMutablePtrHint True False mba (f . castPtr)
ba <- Block.unsafeFreeze mba
return (a, ba)
instance (Ord ty, Base.PrimType ty) => ByteArray (Base.UArray ty) where
allocRet sz f = do
mba <- Base.new $ sizeRecastBytes sz Proxy
a <- BaseMutable.withMutablePtrHint True False mba (f . castPtr)
ba <- Base.unsafeFreeze mba
return (a, ba)
sizeRecastBytes :: Base.PrimType ty => Int -> Proxy ty -> Base.CountOf ty
sizeRecastBytes w p = Base.CountOf $
let (q,r) = w `Prelude.quotRem` szTy
in q + (if r == 0 then 0 else 1)
where !(Base.CountOf szTy) = Base.primSizeInBytes p
{-# INLINE [1] sizeRecastBytes #-}
#endif

View file

@ -0,0 +1,128 @@
-- |
-- Module : Data.ByteArray.View
-- License : BSD-style
-- Maintainer : Nicolas DI PRIMA <nicolas@di-prima.fr>
-- Stability : stable
-- Portability : Good
--
-- a View on a given ByteArrayAccess
--
module Data.ByteArray.View
( View
, view
, takeView
, dropView
) where
import Data.ByteArray.Methods
import Data.ByteArray.Types
import Data.Memory.PtrMethods
import Data.Memory.Internal.Compat
import Foreign.Ptr (plusPtr)
import Prelude hiding (length, take, drop)
-- | a view on a given bytes
--
-- Equality test in constant time
data View bytes = View
{ viewOffset :: !Int
, viewSize :: !Int
, unView :: !bytes
}
instance ByteArrayAccess bytes => Eq (View bytes) where
(==) = constEq
instance ByteArrayAccess bytes => Ord (View bytes) where
compare v1 v2 = unsafeDoIO $
withByteArray v1 $ \ptr1 ->
withByteArray v2 $ \ptr2 -> do
ret <- memCompare ptr1 ptr2 (min (viewSize v1) (viewSize v2))
return $ case ret of
EQ | length v1 > length v2 -> GT
| length v1 < length v2 -> LT
| length v1 == length v2 -> EQ
_ -> ret
instance ByteArrayAccess bytes => Show (View bytes) where
showsPrec p v r = showsPrec p (viewUnpackChars v []) r
instance ByteArrayAccess bytes => ByteArrayAccess (View bytes) where
length = viewSize
withByteArray v f = withByteArray (unView v) $ \ptr -> f (ptr `plusPtr` (viewOffset v))
viewUnpackChars :: ByteArrayAccess bytes
=> View bytes
-> String
-> String
viewUnpackChars v xs = chunkLoop 0
where
len = length v
chunkLoop :: Int -> [Char]
chunkLoop idx
| len == idx = []
| (len - idx) > 63 =
bytesLoop idx (idx + 64) (chunkLoop (idx + 64))
| otherwise =
bytesLoop idx (len - idx) xs
bytesLoop :: Int -> Int -> [Char] -> [Char]
bytesLoop idx chunkLenM1 paramAcc =
loop (idx + chunkLenM1 - 1) paramAcc
where
loop i acc
| i == idx = (rChar i : acc)
| otherwise = loop (i - 1) (rChar i : acc)
rChar :: Int -> Char
rChar idx = toEnum $ fromIntegral $ index v idx
-- | create a view on a given bytearray
--
-- This function update the offset and the size in order to guarantee:
--
-- * offset >= 0
-- * size >= 0
-- * offset < length
-- * size =< length - offset
--
view :: ByteArrayAccess bytes
=> bytes -- ^ the byte array we put a view on
-> Int -- ^ the offset to start the byte array on
-> Int -- ^ the size of the view
-> View bytes
view b offset'' size'' = View offset size b
where
-- make sure offset is not negative
offset' :: Int
offset' = max offset'' 0
-- make sure the offset is not out of bound
offset :: Int
offset = min offset' (length b - 1)
-- make sure length is not negative
size' :: Int
size' = max size'' 0
-- make sure the length is not out of the bound
size :: Int
size = min size' (length b - offset)
-- | create a view from the given bytearray
takeView :: ByteArrayAccess bytes
=> bytes -- ^ byte aray
-> Int -- ^ size of the view
-> View bytes
takeView b size = view b 0 size
-- | create a view from the given byte array
-- starting after having dropped the fist n bytes
dropView :: ByteArrayAccess bytes
=> bytes -- ^ byte array
-> Int -- ^ the number of bytes do dropped before creating the view
-> View bytes
dropView b offset = view b offset (length b - offset)

View file

@ -0,0 +1,225 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Trustworthy #-}
-- |
-- Module : Data.ByteString.Base32
-- Copyright : (c) 2019-2023 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : stable
-- Portability : non-portable
--
-- This module contains 'Data.ByteString.ByteString'-valued combinators for
-- implementing the RFC 4648 specification of the Base32
-- encoding format. This includes padded and unpadded decoding variants, as well as
-- internal and external validation for canonicity.
--
module Data.ByteString.Base32
( -- * Encoding
encodeBase32
, encodeBase32'
, encodeBase32Unpadded
, encodeBase32Unpadded'
-- * Decoding
, decodeBase32
, decodeBase32Unpadded
, decodeBase32Padded
-- , decodeBase32Lenient
-- * Validation
, isBase32
, isValidBase32
) where
import qualified Data.ByteString as BS
import Data.ByteString.Internal (ByteString(..))
import Data.ByteString.Base32.Internal
import Data.ByteString.Base32.Internal.Tables
import Data.Either (isRight)
import Data.Text (Text)
import qualified Data.Text.Encoding as T
import System.IO.Unsafe (unsafeDupablePerformIO)
-- | Encode a 'ByteString' value as a Base32 'Text' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32 "Sun"
-- "KN2W4==="
--
encodeBase32 :: ByteString -> Text
encodeBase32 = T.decodeUtf8 . encodeBase32'
{-# INLINE encodeBase32 #-}
-- | Encode a 'ByteString' value as a Base32 'ByteString' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32' "Sun"
-- "KN2W4==="
--
encodeBase32' :: ByteString -> ByteString
encodeBase32' = encodeBase32_ "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"#
{-# INLINE encodeBase32' #-}
-- | Decode an arbitrarily padded Base32-encoded 'ByteString' value. If its length
-- is not a multiple of 8, then padding characters will be added to fill out the
-- input to a multiple of 8 for safe decoding, as Base32-encoded values are
-- optionally padded.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32 "KN2W4==="
-- Right "Sun"
--
-- >>> decodeBase32 "KN2W4"
-- Right "Sun"
--
-- >>> decodeBase32 "KN2W==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32 :: ByteString -> Either Text ByteString
decodeBase32 bs@(BS _ !l)
| l == 0 = Right bs
| r == 0 = unsafeDupablePerformIO $ decodeBase32_ stdDecodeTable bs
| r == 2 = unsafeDupablePerformIO $ decodeBase32_ stdDecodeTable (BS.append bs "======")
| r == 4 = validateLastNPads 2 bs $ decodeBase32_ stdDecodeTable (BS.append bs "====")
| r == 5 = validateLastNPads 3 bs $ decodeBase32_ stdDecodeTable (BS.append bs "===")
| r == 7 = validateLastNPads 5 bs $ decodeBase32_ stdDecodeTable (BS.append bs "=")
| otherwise = Left "Base32-encoded bytestring has invalid size"
where
!r = l `rem` 8
{-# INLINE decodeBase32 #-}
-- | Encode a 'ByteString' value as a Base32 'Text' value without padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded "Sun"
-- "KN2W4"
--
encodeBase32Unpadded :: ByteString -> Text
encodeBase32Unpadded = T.decodeUtf8 . encodeBase32Unpadded'
{-# INLINE encodeBase32Unpadded #-}
-- | Encode a 'ByteString' value as a Base32 'ByteString' value without padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "KN2W4"
--
encodeBase32Unpadded' :: ByteString -> ByteString
encodeBase32Unpadded' = encodeBase32NoPad_ "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"#
{-# INLINE encodeBase32Unpadded' #-}
-- | Decode an unpadded Base32-encoded 'ByteString' value.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32Unpadded "KN2W4"
-- Right "Sun"
--
-- >>> decodeBase32Unpadded "KN2W4==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32Unpadded :: ByteString -> Either Text ByteString
decodeBase32Unpadded bs@(BS _ !l)
| l == 0 = Right bs
| r == 0 = validateLastNPads 1 bs $ decodeBase32_ stdDecodeTable bs
| r == 2 = unsafeDupablePerformIO $ decodeBase32_ stdDecodeTable (BS.append bs "======")
| r == 4 = validateLastNPads 1 bs $ decodeBase32_ stdDecodeTable (BS.append bs "====")
| r == 5 = validateLastNPads 1 bs $ decodeBase32_ stdDecodeTable (BS.append bs "===")
| r == 7 = validateLastNPads 1 bs $ decodeBase32_ stdDecodeTable (BS.append bs "=")
| otherwise = Left "Base32-encoded bytestring has invalid size"
where
!r = l `rem` 8
{-# INLINE decodeBase32Unpadded #-}
-- | Decode a padded Base32-encoded 'ByteString' value.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32Padded "KN2W4==="
-- Right "Sun"
--
-- >>> decodeBase32Padded "KN2W4"
-- Left "Base32-encoded bytestring requires padding"
--
decodeBase32Padded :: ByteString -> Either Text ByteString
decodeBase32Padded bs@(BS _ !l)
| l == 0 = Right bs
| r == 1 = Left "Base32-encoded bytestring has invalid size"
| r == 3 = Left "Base32-encoded bytestring has invalid size"
| r == 6 = Left "Base32-encoded bytestring has invalid size"
| r /= 0 = Left "Base32-encoded bytestring requires padding"
| otherwise = unsafeDupablePerformIO $ decodeBase32_ stdDecodeTable bs
where
!r = l `rem` 8
{-# INLINE decodeBase32Padded #-}
-- -- | Leniently decode an unpadded Base32-encoded 'ByteString' value. This function
-- -- will not generate parse errors. If input data contains padding chars,
-- -- then the input will be parsed up until the first pad character.
-- --
-- -- __Note:__ This is not RFC 4648-compliant.
-- --
-- decodeBase32Lenient :: ByteString -> ByteString
-- decodeBase32Lenient = decodeBase32Lenient_ decodeB32Table
-- {-# INLINE decodeBase32Lenient #-}
-- | Tell whether a 'ByteString' value is encoded in padded or unpadded Base32 format
--
-- === __Examples__:
--
-- >>> isBase32 "KN2W4"
-- True
--
-- >>> isBase32 "KN2W4==="
-- True
--
-- >>> isBase32 "KN2W4=="
-- False
--
isBase32 :: ByteString -> Bool
isBase32 bs = isValidBase32 bs && isRight (decodeBase32 bs)
{-# INLINE isBase32 #-}
-- | Tell whether a 'ByteString' value is a valid Base32 format.
--
-- This will not tell you whether or not this is a correct Base32 representation,
-- only that it conforms to the correct shape (including padding/size etc.).
-- To check whether it is a true Base32 encoded 'ByteString' value, use 'isBase32'.
--
-- === __Examples__:
--
-- >>> isValidBase32 "KN2W4"
-- True
--
-- >>> isValidBase32 "KN2W4="
-- False
--
-- >>> isValidBase32 "KN2W4%"
-- False
--
isValidBase32 :: ByteString -> Bool
isValidBase32 = validateBase32 "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
{-# INLINE isValidBase32 #-}

View file

@ -0,0 +1,215 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Trustworthy #-}
-- |
-- Module : Data.ByteString.Base32.Hex
-- Copyright : (c) 2019-2023 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : stable
-- Portability : non-portable
--
-- This module contains 'Data.ByteString.ByteString'-valued combinators for
-- implementing the RFC 4648 specification of the Base32hex
-- encoding format. This includes padded and unpadded decoding variants, as well as
-- internal and external validation for canonicity.
--
module Data.ByteString.Base32.Hex
( -- * Encoding
encodeBase32
, encodeBase32'
, encodeBase32Unpadded
, encodeBase32Unpadded'
-- * Decoding
, decodeBase32
, decodeBase32Unpadded
, decodeBase32Padded
-- , decodeBase32Lenient
-- * Validation
, isBase32Hex
, isValidBase32Hex
) where
import qualified Data.ByteString as BS
import Data.ByteString.Internal (ByteString(..))
import Data.ByteString.Base32.Internal
import Data.ByteString.Base32.Internal.Tables
import Data.Either (isRight)
import Data.Text (Text)
import qualified Data.Text.Encoding as T
import System.IO.Unsafe (unsafeDupablePerformIO)
-- | Encode a 'ByteString' value as a Base32hex 'Text' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32 "Sun"
-- "ADQMS==="
--
encodeBase32 :: ByteString -> Text
encodeBase32 = T.decodeUtf8 . encodeBase32'
{-# INLINE encodeBase32 #-}
-- | Encode a 'ByteString' value as a Base32hex 'ByteString' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32' "Sun"
-- "ADQMS==="
--
encodeBase32' :: ByteString -> ByteString
encodeBase32' = encodeBase32_ "0123456789ABCDEFGHIJKLMNOPQRSTUV"#
{-# INLINE encodeBase32' #-}
-- | Decode an arbitrarily padded Base32hex-encoded 'ByteString' value. If its length
-- is not a multiple of 8, then padding characters will be added to fill out the
-- input to a multiple of 8 for safe decoding, as Base32hex-encoded values are
-- optionally padded.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32 "ADQMS==="
-- Right "Sun"
--
-- >>> decodeBase32 "ADQMS"
-- Right "Sun"
--
-- >>> decodeBase32 "ADQM==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32 :: ByteString -> Either Text ByteString
decodeBase32 bs@(BS _ !l)
| l == 0 = Right bs
| r == 0 = unsafeDupablePerformIO $ decodeBase32_ hexDecodeTable bs
| r == 2 = unsafeDupablePerformIO $ decodeBase32_ hexDecodeTable (BS.append bs "======")
| r == 4 = validateLastNPads 2 bs $ decodeBase32_ hexDecodeTable (BS.append bs "====")
| r == 5 = validateLastNPads 3 bs $ decodeBase32_ hexDecodeTable (BS.append bs "===")
| r == 7 = validateLastNPads 5 bs $ decodeBase32_ hexDecodeTable (BS.append bs "=")
| otherwise = Left "Base32-encoded bytestring has invalid size"
where
!r = l `rem` 8
{-# INLINE decodeBase32 #-}
-- | Encode a 'ByteString' value as a Base32hex 'Text' value without padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "ADQMS"
--
encodeBase32Unpadded :: ByteString -> Text
encodeBase32Unpadded = T.decodeUtf8 . encodeBase32Unpadded'
{-# INLINE encodeBase32Unpadded #-}
-- | Encode a 'ByteString' value as a Base32hex 'ByteString' value without padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "ADQMS"
--
encodeBase32Unpadded' :: ByteString -> ByteString
encodeBase32Unpadded' = encodeBase32NoPad_ "0123456789ABCDEFGHIJKLMNOPQRSTUV"#
{-# INLINE encodeBase32Unpadded' #-}
-- | Decode an unpadded Base32hex-encoded 'ByteString' value.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32Unpadded "ADQMS"
-- Right "Sun"
--
-- >>> decodeBase32Unpadded "ADQMS==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32Unpadded :: ByteString -> Either Text ByteString
decodeBase32Unpadded bs@(BS _ !l)
| l == 0 = Right bs
| r == 0 = validateLastNPads 1 bs $ decodeBase32_ hexDecodeTable bs
| r == 2 = unsafeDupablePerformIO $ decodeBase32_ hexDecodeTable (BS.append bs "======")
| r == 4 = validateLastNPads 1 bs $ decodeBase32_ hexDecodeTable (BS.append bs "====")
| r == 5 = validateLastNPads 1 bs $ decodeBase32_ hexDecodeTable (BS.append bs "===")
| r == 7 = validateLastNPads 1 bs $ decodeBase32_ hexDecodeTable (BS.append bs "=")
| otherwise = Left "Base32-encoded bytestring has invalid size"
where
!r = l `rem` 8
{-# INLINE decodeBase32Unpadded #-}
-- | Decode a padded Base32hex-encoded 'ByteString' value.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32Padded "ADQMS==="
-- Right "Sun"
--
-- >>> decodeBase32Padded "ADQMS"
-- Left "Base32-encoded bytestring requires padding"
--
decodeBase32Padded :: ByteString -> Either Text ByteString
decodeBase32Padded bs@(BS _ !l)
| l == 0 = Right bs
| r == 1 = Left "Base32-encoded bytestring has invalid size"
| r == 3 = Left "Base32-encoded bytestring has invalid size"
| r == 6 = Left "Base32-encoded bytestring has invalid size"
| r /= 0 = Left "Base32-encoded bytestring requires padding"
| otherwise = unsafeDupablePerformIO $ decodeBase32_ hexDecodeTable bs
where
!r = l `rem` 8
{-# INLINE decodeBase32Padded #-}
-- | Tell whether a 'ByteString' value is encoded in padded or unpadded Base32hex format
--
-- === __Examples__:
--
-- >>> isBase32Hex "ADQMS"
-- True
--
-- >>> isBase32Hex "ADQMS==="
-- True
--
-- >>> isBase32Hex "ADQMS=="
-- False
--
isBase32Hex :: ByteString -> Bool
isBase32Hex bs = isValidBase32Hex bs && isRight (decodeBase32 bs)
{-# INLINE isBase32Hex #-}
-- | Tell whether a 'ByteString' value is a valid Base32hex format.
--
-- This will not tell you whether or not this is a correct Base32hex representation,
-- only that it conforms to the correct shape (including padding/size etc.).
-- To check whether it is a true Base32hex encoded 'ByteString' value, use 'isBase32'.
--
-- === __Examples__:
--
-- >>> isValidBase32Hex "ADQMS"
-- True
--
-- >>> isValidBase32Hex "ADQMS="
-- False
--
-- >>> isValidBase32Hex "ADQMS%"
-- False
--
isValidBase32Hex :: ByteString -> Bool
isValidBase32Hex = validateBase32 "0123456789ABCDEFGHIJKLMNOPQRSTUV"
{-# INLINE isValidBase32Hex #-}

View file

@ -0,0 +1,120 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
-- |
-- Module : Data.ByteString.Base32.Internal
-- Copyright : (c) 2020 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : Experimental
-- Portability : portable
--
-- Internal module defining the encoding and decoding
-- processes and tables.
--
module Data.ByteString.Base32.Internal
( encodeBase32_
, encodeBase32NoPad_
, decodeBase32_
, validateBase32
, validateLastNPads
) where
import qualified Data.ByteString as BS
import Data.ByteString.Internal
import Data.ByteString.Base32.Internal.Head
import Data.Text (Text)
import Foreign.ForeignPtr
import Foreign.Ptr
import Foreign.Storable
import GHC.Word
import System.IO.Unsafe
-- -------------------------------------------------------------------------- --
-- Validating Base32
-- | Validate a base32-encoded bytestring against some alphabet.
--
validateBase32 :: ByteString -> ByteString -> Bool
validateBase32 !alphabet bs@(BS _ l)
| l == 0 = True
| r == 0 = f bs
| r == 2 = f (BS.append bs "======")
| r == 4 = f (BS.append bs "====")
| r == 5 = f (BS.append bs "===")
| r == 7 = f (BS.append bs "=")
| otherwise = False
where
r = l `rem` 8
f (BS fp l') = accursedUnutterablePerformIO $ withForeignPtr fp $ \p ->
go p (plusPtr p l')
go !p !end
| p == end = return True
| otherwise = do
w <- peek p
let check a
| a == 0x3d, plusPtr p 1 == end = True
| a == 0x3d, plusPtr p 2 == end = True
| a == 0x3d, plusPtr p 3 == end = True
| a == 0x3d, plusPtr p 4 == end = True
| a == 0x3d, plusPtr p 5 == end = True
| a == 0x3d, plusPtr p 6 == end = True
| a == 0x3d = False
| otherwise = BS.elem a alphabet
if check w then go (plusPtr p 1) end else return False
{-# INLINE validateBase32 #-}
-- | This function checks that the last N-chars of a bytestring are '='
-- and, if true, fails with a message or completes some io action.
--
-- This is necessary to check when decoding permissively (i.e. filling in padding chars).
-- Consider the following 8 cases of a string of length l:
--
-- - @l = 0 mod 8@: No pad chars are added, since the input is assumed to be good.
-- - @l = 1 mod 8@: Never an admissible length in base32
-- - @l = 2 mod 8@: 6 padding chars are added. If padding chars are present in the string, they will fail as to decode as final quanta
-- - @l = 3 mod 8@: Never an admissible length in base32
-- - @l = 4 mod 8@: 4 padding chars are added. If 2 padding chars are present in the string this can be "completed" in the sense that
-- it now acts like a string `l == 2 mod 8` with 6 padding chars, and could potentially form corrupted data.
-- - @l = 5 mod 8@: 3 padding chars are added. If 3 padding chars are present in the string, this could form corrupted data like in the
-- previous case.
-- - @l = 6 mod 8@: Never an admissible length in base32
-- - @l = 7 mod 8@: 1 padding char is added. If 5 padding chars are present in the string, this could form corrupted data like the
-- previous cases.
--
-- Hence, permissive decodes should only fill in padding chars when it makes sense to add them. That is,
-- if an input is degenerate, it should never succeed when we add padding chars. We need the following invariant to hold:
--
-- @
-- B32.decodeUnpadded <|> B32.decodePadded ~ B32.decode
-- @
--
validateLastNPads
:: Int
-> ByteString
-> IO (Either Text ByteString)
-> Either Text ByteString
validateLastNPads !n (BS !fp !l) io
| not valid = Left "Base32-encoded bytestring has invalid padding"
| otherwise = unsafeDupablePerformIO io
where
valid = accursedUnutterablePerformIO $ withForeignPtr fp $ \p -> do
let end = plusPtr p l
let go :: Ptr Word8 -> IO Bool
go !q
| q == end = return True
| otherwise = do
a <- peek q
if a == 0x3d then return False else go (plusPtr q 1)
go (plusPtr p (l - n))
{-# INLINE validateLastNPads #-}

View file

@ -0,0 +1,76 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE TypeApplications #-}
module Data.ByteString.Base32.Internal.Head
( encodeBase32_
, encodeBase32NoPad_
, decodeBase32_
) where
import Data.ByteString.Internal
import Data.ByteString.Base32.Internal.Loop
import Data.ByteString.Base32.Internal.Tail
import Data.Text (Text)
import Foreign.Ptr
import Foreign.ForeignPtr
import GHC.Exts
import GHC.ForeignPtr
import GHC.Word
import System.IO.Unsafe
-- | Head of the padded base32 encoding loop.
--
-- This function takes an alphabet in the form of an unboxed 'Addr#',
-- allocates the correct number of bytes that will be written, and
-- executes the inner encoding loop against that data.
--
encodeBase32_ :: Addr# -> ByteString -> ByteString
encodeBase32_ !lut (BS !sfp !l) = unsafeDupablePerformIO $ do
dfp <- mallocPlainForeignPtrBytes dlen
withForeignPtr dfp $ \dptr ->
withForeignPtr sfp $ \sptr -> do
let !end = plusPtr sptr l
innerLoop lut
(castPtr dptr) sptr
end (loopTail lut dfp dptr end)
where
!dlen = ceiling (fromIntegral @_ @Double l / 5) * 8
-- | Head of the unpadded base32 encoding loop.
--
-- This function takes an alphabet in the form of an unboxed 'Addr#',
-- allocates the correct number of bytes that will be written, and
-- executes the inner encoding loop against that data.
--
encodeBase32NoPad_ :: Addr# -> ByteString -> ByteString
encodeBase32NoPad_ !lut (BS !sfp !l) = unsafeDupablePerformIO $ do
!dfp <- mallocPlainForeignPtrBytes dlen
withForeignPtr dfp $ \dptr ->
withForeignPtr sfp $ \sptr -> do
let !end = plusPtr sptr l
innerLoop lut
(castPtr dptr) sptr
end (loopTailNoPad lut dfp dptr end)
where
!dlen = ceiling (fromIntegral @_ @Double l / 5) * 8
-- | Head of the base32 decoding loop.
--
-- This function takes a base32-decoding lookup table and base32-encoded
-- bytestring, allocates the correct number of bytes that will be written,
-- and executes the inner decoding loop against that data.
--
decodeBase32_ :: Ptr Word8 -> ByteString -> IO (Either Text ByteString)
decodeBase32_ (Ptr !dtable) (BS !sfp !slen) =
withForeignPtr sfp $ \sptr -> do
dfp <- mallocPlainForeignPtrBytes dlen
withForeignPtr dfp $ \dptr -> do
let !end = plusPtr sptr slen
decodeLoop dtable dfp dptr sptr end
where
!dlen = ceiling (fromIntegral @_ @Double slen / 1.6)

View file

@ -0,0 +1,204 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE TypeApplications #-}
module Data.ByteString.Base32.Internal.Loop
( innerLoop
, decodeLoop
) where
import Data.Bits
import Data.ByteString.Internal (ByteString(..))
import Data.ByteString.Base32.Internal.Utils
import Data.Text (Text)
import qualified Data.Text as T
import Foreign.Ptr
import Foreign.ForeignPtr
import Foreign.Storable
import GHC.Exts
import GHC.Word
-- ------------------------------------------------------------------------ --
-- Encoding loops
innerLoop
:: Addr#
-> Ptr Word64
-> Ptr Word8
-> Ptr Word8
-> (Ptr Word8 -> Ptr Word8 -> IO ByteString)
-> IO ByteString
innerLoop !lut !dptr !sptr !end finish = go dptr sptr
where
lix a = w64 (aix (fromIntegral a .&. 0x1f) lut)
{-# INLINE lix #-}
go !dst !src
| plusPtr src 4 >= end = finish (castPtr dst) src
| otherwise = do
!t <- peekWord32BE (castPtr src)
!u <- w32 <$> peek (plusPtr src 4)
let !a = lix (unsafeShiftR t 27)
!b = lix (unsafeShiftR t 22)
!c = lix (unsafeShiftR t 17)
!d = lix (unsafeShiftR t 12)
!e = lix (unsafeShiftR t 7)
!f = lix (unsafeShiftR t 2)
!g = lix (unsafeShiftL t 3 .|. unsafeShiftR u 5)
!h = lix u
let !w = a
.|. unsafeShiftL b 8
.|. unsafeShiftL c 16
.|. unsafeShiftL d 24
.|. unsafeShiftL e 32
.|. unsafeShiftL f 40
.|. unsafeShiftL g 48
.|. unsafeShiftL h 56
poke dst w
go (plusPtr dst 8) (plusPtr src 5)
{-# INLINE innerLoop #-}
-- ------------------------------------------------------------------------ --
-- Decoding loops
decodeLoop
:: Addr#
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> IO (Either Text ByteString)
decodeLoop !lut !dfp !dptr !sptr !end = go dptr sptr
where
lix a = w64 (aix (fromIntegral a) lut)
err :: Ptr Word8 -> IO (Either Text ByteString)
err p = return . Left . T.pack
$ "invalid character at offset: "
++ show (p `minusPtr` sptr)
padErr :: Ptr Word8 -> IO (Either Text ByteString)
padErr p = return . Left . T.pack
$ "invalid padding at offset: "
++ show (p `minusPtr` sptr)
look :: Ptr Word8 -> IO Word64
look !p = lix <$> peek @Word8 p
go !dst !src
| plusPtr src 8 >= end = do
a <- look src
b <- look (plusPtr src 1)
c <- look (plusPtr src 2)
d <- look (plusPtr src 3)
e <- look (plusPtr src 4)
f <- look (plusPtr src 5)
g <- look (plusPtr src 6)
h <- look (plusPtr src 7)
finalChunk dst src a b c d e f g h
| otherwise = do
!t <- peekWord64BE (castPtr src)
let a = lix (unsafeShiftR t 56)
b = lix (unsafeShiftR t 48)
c = lix (unsafeShiftR t 40)
d = lix (unsafeShiftR t 32)
e = lix (unsafeShiftR t 24)
f = lix (unsafeShiftR t 16)
g = lix (unsafeShiftR t 8)
h = lix t
decodeChunk dst src a b c d e f g h
finalChunk !dst !src !a !b !c !d !e !f !g !h
| a == 0x63 = padErr src
| b == 0x63 = padErr (plusPtr src 1)
| a == 0xff = err src
| b == 0xff = err (plusPtr src 1)
| c == 0xff = err (plusPtr src 2)
| d == 0xff = err (plusPtr src 3)
| e == 0xff = err (plusPtr src 4)
| f == 0xff = err (plusPtr src 5)
| g == 0xff = err (plusPtr src 6)
| h == 0xff = err (plusPtr src 7)
| otherwise = do
let !o1 = (fromIntegral a `unsafeShiftL` 3) .|. (fromIntegral b `unsafeShiftR` 2)
!o2 = (fromIntegral b `unsafeShiftL` 6)
.|. (fromIntegral c `unsafeShiftL` 1)
.|. (fromIntegral d `unsafeShiftR` 4)
!o3 = (fromIntegral d `unsafeShiftL` 4) .|. (fromIntegral e `unsafeShiftR` 1)
!o4 = (fromIntegral e `unsafeShiftL` 7)
.|. (fromIntegral f `unsafeShiftL` 2)
.|. (fromIntegral g `unsafeShiftR` 3)
!o5 = (fromIntegral g `unsafeShiftL` 5) .|. fromIntegral h
poke @Word8 dst o1
poke @Word8 (plusPtr dst 1) o2
case (c,d,e,f,g,h) of
(0x63,0x63,0x63,0x63,0x63,0x63) ->
return (Right (BS dfp (1 + minusPtr dst dptr)))
(0x63,_,_,_,_,_) -> padErr (plusPtr src 3)
(_,0x63,0x63,0x63,0x63,0x63) -> padErr (plusPtr src 3)
(_,0x63,_,_,_,_) -> padErr (plusPtr src 4)
(_,_,0x63,0x63,0x63,0x63) -> do
poke @Word8 (plusPtr dst 2) o3
return (Right (BS dfp (2 + minusPtr dst dptr)))
(_,_,0x63,_,_,_) -> padErr (plusPtr src 5)
(_,_,_,0x63,0x63,0x63) -> do
poke @Word8 (plusPtr dst 2) o3
poke @Word8 (plusPtr dst 3) o4
return (Right (BS dfp (3 + minusPtr dst dptr)))
(_,_,_,0x63,_,_) -> padErr (plusPtr src 6)
(_,_,_,_,0x63,0x63) -> padErr (plusPtr src 6)
(_,_,_,_,0x63,_) -> padErr (plusPtr src 7)
(_,_,_,_,_,0x63) -> do
poke @Word8 (plusPtr dst 2) o3
poke @Word8 (plusPtr dst 3) o4
poke @Word8 (plusPtr dst 4) o5
return (Right (BS dfp (4 + minusPtr dst dptr)))
(_,_,_,_,_,_) -> do
poke @Word8 (plusPtr dst 2) o3
poke @Word8 (plusPtr dst 3) o4
poke @Word8 (plusPtr dst 4) o5
return (Right (BS dfp (5 + minusPtr dst dptr)))
decodeChunk !dst !src !a !b !c !d !e !f !g !h
| a == 0x63 = padErr src
| b == 0x63 = padErr (plusPtr src 1)
| c == 0x63 = padErr (plusPtr src 2)
| d == 0x63 = padErr (plusPtr src 3)
| e == 0x63 = padErr (plusPtr src 4)
| f == 0x63 = padErr (plusPtr src 5)
| g == 0x63 = padErr (plusPtr src 6)
| h == 0x63 = padErr (plusPtr src 7)
| a == 0xff = err src
| b == 0xff = err (plusPtr src 1)
| c == 0xff = err (plusPtr src 2)
| d == 0xff = err (plusPtr src 3)
| e == 0xff = err (plusPtr src 4)
| f == 0xff = err (plusPtr src 5)
| g == 0xff = err (plusPtr src 6)
| h == 0xff = err (plusPtr src 7)
| otherwise = do
let !w = (unsafeShiftL a 35
.|. unsafeShiftL b 30
.|. unsafeShiftL c 25
.|. unsafeShiftL d 20
.|. unsafeShiftL e 15
.|. unsafeShiftL f 10
.|. unsafeShiftL g 5
.|. h) :: Word64
poke @Word32 (castPtr dst) (byteSwap32 (fromIntegral (unsafeShiftR w 8)))
poke @Word8 (plusPtr dst 4) (fromIntegral w)
go (plusPtr dst 5) (plusPtr src 8)

View file

@ -0,0 +1,54 @@
{-# LANGUAGE TypeApplications #-}
module Data.ByteString.Base32.Internal.Tables
( stdDecodeTable
, hexDecodeTable
) where
import Data.ByteString.Base32.Internal.Utils (writeNPlainPtrBytes)
import GHC.Word (Word8)
import GHC.Ptr (Ptr)
stdDecodeTable :: Ptr Word8
stdDecodeTable = writeNPlainPtrBytes @Word8 256
[ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0xff,0xff,0xff,0xff,0xff,0x63,0xff,0xff
, 0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e
, 0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0xff,0xff,0xff,0xff,0xff
, 0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e
, 0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
]
{-# NOINLINE stdDecodeTable #-}
hexDecodeTable :: Ptr Word8
hexDecodeTable = writeNPlainPtrBytes @Word8 256
[ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0xff,0xff,0xff,0x63,0xff,0xff
, 0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18
, 0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18
, 0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
]
{-# NOINLINE hexDecodeTable #-}

View file

@ -0,0 +1,191 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
module Data.ByteString.Base32.Internal.Tail
( loopTail
, loopTailNoPad
) where
import Data.Bits
import Data.ByteString.Internal
import Data.ByteString.Base32.Internal.Utils
import Foreign.Ptr
import Foreign.ForeignPtr
import Foreign.Storable
import GHC.Exts
import GHC.Word
-- | Unroll final quantum encoding for base32
--
loopTail
:: Addr#
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> IO ByteString
loopTail !lut !dfp !dptr !end !dst !src
| src == end = return (BS dfp (minusPtr dst dptr))
| plusPtr src 1 == end = do -- 2 6
!a <- peek src
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2)
poke dst t
poke (plusPtr dst 1) u
padN (plusPtr dst 2) 6
return (BS dfp (8 + minusPtr dst dptr))
| plusPtr src 2 == end = do -- 4 4
!a <- peek src
!b <- peek (plusPtr src 1)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
padN (plusPtr dst 4) 4
return (BS dfp (8 + minusPtr dst dptr))
| plusPtr src 3 == end = do -- 5 3
!a <- peek src
!b <- peek (plusPtr src 1)
!c <- peek (plusPtr src 2)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4 .|. unsafeShiftR (c .&. 0xf0) 4)
!x = look (unsafeShiftL (c .&. 0x0f) 1)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
poke (plusPtr dst 4) x
padN (plusPtr dst 5) 3
return (BS dfp (8 + minusPtr dst dptr))
| otherwise = do -- 7 1
!a <- peek src
!b <- peek (plusPtr src 1)
!c <- peek (plusPtr src 2)
!d <- peek (plusPtr src 3)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4 .|. unsafeShiftR (c .&. 0xf0) 4)
!x = look (unsafeShiftL (c .&. 0x0f) 1 .|. unsafeShiftR (d .&. 0x80) 7)
!y = look (unsafeShiftR (d .&. 0x7c) 2)
!z = look (unsafeShiftL (d .&. 0x03) 3)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
poke (plusPtr dst 4) x
poke (plusPtr dst 5) y
poke (plusPtr dst 6) z
padN (plusPtr dst 7) 1
return (BS dfp (8 + minusPtr dst dptr))
where
look !n = aix n lut
padN :: Ptr Word8 -> Int -> IO ()
padN !_ 0 = return ()
padN !p n = poke p 0x3d >> padN (plusPtr p 1) (n - 1)
{-# INLINE loopTail #-}
-- | Unroll final quantum encoding for base32
--
loopTailNoPad
:: Addr#
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> IO ByteString
loopTailNoPad !lut !dfp !dptr !end !dst !src
| src == end = return (BS dfp (minusPtr dst dptr))
| plusPtr src 1 == end = do -- 2 6
!a <- peek src
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2)
poke dst t
poke (plusPtr dst 1) u
return (BS dfp (2 + minusPtr dst dptr))
| plusPtr src 2 == end = do -- 4 4
!a <- peek src
!b <- peek (plusPtr src 1)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
return (BS dfp (4 + minusPtr dst dptr))
| plusPtr src 3 == end = do -- 5 3
!a <- peek src
!b <- peek (plusPtr src 1)
!c <- peek (plusPtr src 2)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4 .|. unsafeShiftR (c .&. 0xf0) 4)
!x = look (unsafeShiftL (c .&. 0x0f) 1)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
poke (plusPtr dst 4) x
return (BS dfp (5 + minusPtr dst dptr))
| otherwise = do -- 7 1
!a <- peek src
!b <- peek (plusPtr src 1)
!c <- peek (plusPtr src 2)
!d <- peek (plusPtr src 3)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4 .|. unsafeShiftR (c .&. 0xf0) 4)
!x = look (unsafeShiftL (c .&. 0x0f) 1 .|. unsafeShiftR (d .&. 0x80) 7)
!y = look (unsafeShiftR (d .&. 0x7c) 2)
!z = look (unsafeShiftL (d .&. 0x03) 3)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
poke (plusPtr dst 4) x
poke (plusPtr dst 5) y
poke (plusPtr dst 6) z
return (BS dfp (7 + minusPtr dst dptr))
where
look !i = aix i lut
{-# INLINE loopTailNoPad #-}

View file

@ -0,0 +1,98 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
module Data.ByteString.Base32.Internal.Utils
( aix
, peekWord32BE
, peekWord64BE
, reChunkN
, w32
, w64
, w64_32
, writeNPlainPtrBytes
) where
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import Foreign.Ptr
import Foreign.Storable
import GHC.ByteOrder
import GHC.Exts
import GHC.Word
import System.IO.Unsafe
import Foreign.Marshal.Alloc (mallocBytes)
-- | Read 'Word8' index off alphabet addr
--
aix :: Word8 -> Addr# -> Word8
aix w8 alpha = W8# (indexWord8OffAddr# alpha i)
where
!(I# i) = fromIntegral w8
{-# INLINE aix #-}
w32 :: Word8 -> Word32
w32 = fromIntegral
{-# INLINE w32 #-}
w64_32 :: Word32 -> Word64
w64_32 = fromIntegral
{-# INLINE w64_32 #-}
w64 :: Word8 -> Word64
w64 = fromIntegral
{-# INLINE w64 #-}
-- | Allocate and fill @n@ bytes with some data
--
writeNPlainPtrBytes
:: Storable a
=> Int
-> [a]
-> Ptr a
writeNPlainPtrBytes !n as = unsafeDupablePerformIO $ do
p <- mallocBytes n
go p as
return p
where
go !_ [] = return ()
go !p (x:xs) = poke p x >> go (plusPtr p 1) xs
{-# INLINE writeNPlainPtrBytes #-}
peekWord32BE :: Ptr Word32 -> IO Word32
peekWord32BE p = case targetByteOrder of
LittleEndian -> byteSwap32 <$> peek p
BigEndian -> peek p
{-# inline peekWord32BE #-}
peekWord64BE :: Ptr Word64 -> IO Word64
peekWord64BE p = case targetByteOrder of
LittleEndian -> byteSwap64 <$> peek p
BigEndian -> peek p
{-# inline peekWord64BE #-}
-- | Rechunk a list of bytestrings in multiples of @n@
--
reChunkN :: Int -> [ByteString] -> [ByteString]
reChunkN n = go
where
go [] = []
go (b:bs) = case divMod (BS.length b) n of
(_, 0) -> b : go bs
(d, _) -> case BS.splitAt (d * n) b of
~(h, t) -> h : accum t bs
accum acc [] = [acc]
accum acc (c:cs) =
case BS.splitAt (n - BS.length acc) c of
~(h, t) ->
let acc' = BS.append acc h
in if BS.length acc' == n
then
let cs' = if BS.null t then cs else t : cs
in acc' : go cs'
else accum acc' cs
{-# INLINE reChunkN #-}

View file

@ -0,0 +1,77 @@
{-# LANGUAGE CPP #-}
#if __GLASGOW_HASKELL__ >= 702
{-# LANGUAGE Trustworthy #-}
#endif
-- |
-- Module : Data.ByteString.Base64
-- Copyright : (c) 2010 Bryan O'Sullivan
--
-- License : BSD-style
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>,
-- Herbert Valerio Riedel <hvr@gnu.org>,
-- Mikhail Glushenkov <mikhail.glushenkov@gmail.com>
-- Stability : experimental
-- Portability : GHC
--
-- Fast and efficient encoding and decoding of base64-encoded strings.
--
-- @since 0.1.0.0
module Data.ByteString.Base64
( encode
, decode
, decodeLenient
) where
import Data.ByteString.Base64.Internal
import qualified Data.ByteString as B
import Data.ByteString.Internal (ByteString(..))
import Data.Word (Word8)
import Foreign.ForeignPtr (ForeignPtr)
-- | Encode a string into base64 form. The result will always be a
-- multiple of 4 bytes in length.
encode :: ByteString -> ByteString
encode s = encodeWith Padded (mkEncodeTable alphabet) s
-- | Decode a base64-encoded string. This function strictly follows
-- the specification in
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>.
--
-- (Note: this means that even @"\\n"@ and @"\\r\\n"@ as line breaks are rejected
-- rather than ignored. If you are using this in the context of a
-- standard that overrules RFC 4648 such as HTTP multipart mime bodies,
-- consider using 'decodeLenient'.)
decode :: ByteString -> Either String ByteString
decode s = decodeWithTable Padded decodeFP s
-- | Decode a base64-encoded string. This function is lenient in
-- following the specification from
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>, and will not
-- generate parse errors no matter how poor its input.
decodeLenient :: ByteString -> ByteString
decodeLenient s = decodeLenientWithTable decodeFP s
alphabet :: ByteString
alphabet = B.pack $ [65..90] ++ [97..122] ++ [48..57] ++ [43,47]
{-# NOINLINE alphabet #-}
decodeFP :: ForeignPtr Word8
#if MIN_VERSION_bytestring(0,11,0)
BS decodeFP _ =
#else
PS decodeFP _ _ =
#endif
B.pack $ replicate 43 x
++ [62,x,x,x,63]
++ [52..61]
++ [x,x,x,done,x,x,x]
++ [0..25]
++ [x,x,x,x,x,x]
++ [26..51]
++ replicate 133 x
{-# NOINLINE decodeFP #-}
x :: Integral a => a
x = 255
{-# INLINE x #-}

View file

@ -0,0 +1,446 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DoAndIfThenElse #-}
-- |
-- Module : Data.ByteString.Base64.Internal
-- Copyright : (c) 2010 Bryan O'Sullivan
--
-- License : BSD-style
-- Maintainer : bos@serpentine.com
-- Stability : experimental
-- Portability : GHC
--
-- Fast and efficient encoding and decoding of base64-encoded strings.
module Data.ByteString.Base64.Internal
( encodeWith
, decodeWithTable
, decodeLenientWithTable
, mkEncodeTable
, done
, peek8, poke8, peek8_32
, reChunkIn
, Padding(..)
, withBS
, mkBS
) where
import Data.Bits ((.|.), (.&.), shiftL, shiftR)
import qualified Data.ByteString as B
import Data.ByteString.Internal (ByteString(..), mallocByteString)
import Data.Word (Word8, Word16, Word32)
import Foreign.ForeignPtr (ForeignPtr, withForeignPtr, castForeignPtr)
import Foreign.Ptr (Ptr, castPtr, minusPtr, plusPtr)
import Foreign.Storable (peek, peekElemOff, poke)
import System.IO.Unsafe (unsafePerformIO)
peek8 :: Ptr Word8 -> IO Word8
peek8 = peek
poke8 :: Ptr Word8 -> Word8 -> IO ()
poke8 = poke
peek8_32 :: Ptr Word8 -> IO Word32
peek8_32 = fmap fromIntegral . peek8
data Padding = Padded | Don'tCare | Unpadded deriving Eq
-- | Encode a string into base64 form. The result will always be a multiple
-- of 4 bytes in length.
encodeWith :: Padding -> EncodeTable -> ByteString -> ByteString
encodeWith !padding (ET alfaFP encodeTable) !bs = withBS bs go
where
go !sptr !slen
| slen > maxBound `div` 4 =
error "Data.ByteString.Base64.encode: input too long"
| otherwise = do
let dlen = (slen + 2) `div` 3 * 4
dfp <- mallocByteString dlen
withForeignPtr alfaFP $ \aptr ->
withForeignPtr encodeTable $ \ep -> do
let aidx n = peek8 (aptr `plusPtr` n)
sEnd = sptr `plusPtr` slen
finish !n = return $ mkBS dfp n
fill !dp !sp !n
| sp `plusPtr` 2 >= sEnd = complete (castPtr dp) sp n
| otherwise = {-# SCC "encode/fill" #-} do
i <- peek8_32 sp
j <- peek8_32 (sp `plusPtr` 1)
k <- peek8_32 (sp `plusPtr` 2)
let w = i `shiftL` 16 .|. j `shiftL` 8 .|. k
enc = peekElemOff ep . fromIntegral
poke dp =<< enc (w `shiftR` 12)
poke (dp `plusPtr` 2) =<< enc (w .&. 0xfff)
fill (dp `plusPtr` 4) (sp `plusPtr` 3) (n + 4)
complete dp sp n
| sp == sEnd = finish n
| otherwise = {-# SCC "encode/complete" #-} do
let peekSP m f = (f . fromIntegral) `fmap` peek8 (sp `plusPtr` m)
twoMore = sp `plusPtr` 2 == sEnd
equals = 0x3d :: Word8
doPad = padding == Padded
{-# INLINE equals #-}
!a <- peekSP 0 ((`shiftR` 2) . (.&. 0xfc))
!b <- peekSP 0 ((`shiftL` 4) . (.&. 0x03))
poke8 dp =<< aidx a
if twoMore
then do
!b' <- peekSP 1 ((.|. b) . (`shiftR` 4) . (.&. 0xf0))
!c <- aidx =<< peekSP 1 ((`shiftL` 2) . (.&. 0x0f))
poke8 (dp `plusPtr` 1) =<< aidx b'
poke8 (dp `plusPtr` 2) c
if doPad
then poke8 (dp `plusPtr` 3) equals >> finish (n + 4)
else finish (n + 3)
else do
poke8 (dp `plusPtr` 1) =<< aidx b
if doPad
then do
poke8 (dp `plusPtr` 2) equals
poke8 (dp `plusPtr` 3) equals
finish (n + 4)
else finish (n + 2)
withForeignPtr dfp (\dptr -> fill (castPtr dptr) sptr 0)
data EncodeTable = ET !(ForeignPtr Word8) !(ForeignPtr Word16)
-- The encoding table is constructed such that the expansion of a 12-bit
-- block to a 16-bit block can be done by a single Word16 copy from the
-- correspoding table entry to the target address. The 16-bit blocks are
-- stored in big-endian order, as the indices into the table are built in
-- big-endian order.
mkEncodeTable :: ByteString -> EncodeTable
#if MIN_VERSION_bytestring(0,11,0)
mkEncodeTable alphabet@(BS afp _) =
case table of BS fp _ -> ET afp (castForeignPtr fp)
#else
mkEncodeTable alphabet@(PS afp _ _) =
case table of PS fp _ _ -> ET afp (castForeignPtr fp)
#endif
where
ix = fromIntegral . B.index alphabet
table = B.pack $ concat $ [ [ix j, ix k] | j <- [0..63], k <- [0..63] ]
-- | Decode a base64-encoded string. This function strictly follows
-- the specification in <http://tools.ietf.org/rfc/rfc4648 RFC 4648>.
--
-- This function takes the decoding table (for @base64@ or @base64url@) as
-- the first parameter.
--
-- For validation of padding properties, see note: $Validation
--
decodeWithTable :: Padding -> ForeignPtr Word8 -> ByteString -> Either String ByteString
decodeWithTable padding !decodeFP bs
| B.length bs == 0 = Right B.empty
| otherwise = case padding of
Padded
| r == 0 -> withBS bs go
| r == 1 -> Left "Base64-encoded bytestring has invalid size"
| otherwise -> Left "Base64-encoded bytestring is unpadded or has invalid padding"
Don'tCare
| r == 0 -> withBS bs go
| r == 2 -> withBS (B.append bs (B.replicate 2 0x3d)) go
| r == 3 -> validateLastPad bs invalidPad $ withBS (B.append bs (B.replicate 1 0x3d)) go
| otherwise -> Left "Base64-encoded bytestring has invalid size"
Unpadded
| r == 0 -> validateLastPad bs noPad $ withBS bs go
| r == 2 -> validateLastPad bs noPad $ withBS (B.append bs (B.replicate 2 0x3d)) go
| r == 3 -> validateLastPad bs noPad $ withBS (B.append bs (B.replicate 1 0x3d)) go
| otherwise -> Left "Base64-encoded bytestring has invalid size"
where
!r = B.length bs `rem` 4
noPad = "Base64-encoded bytestring required to be unpadded"
invalidPad = "Base64-encoded bytestring has invalid padding"
go !sptr !slen = do
dfp <- mallocByteString (slen `quot` 4 * 3)
withForeignPtr decodeFP (\ !decptr ->
withForeignPtr dfp (\dptr ->
decodeLoop decptr sptr dptr (sptr `plusPtr` slen) dfp))
decodeLoop
:: Ptr Word8
-- ^ decoding table pointer
-> Ptr Word8
-- ^ source pointer
-> Ptr Word8
-- ^ destination pointer
-> Ptr Word8
-- ^ source end pointer
-> ForeignPtr Word8
-- ^ destination foreign pointer (used for finalizing string)
-> IO (Either String ByteString)
decodeLoop !dtable !sptr !dptr !end !dfp = go dptr sptr
where
err p = return . Left
$ "invalid character at offset: "
++ show (p `minusPtr` sptr)
padErr p = return . Left
$ "invalid padding at offset: "
++ show (p `minusPtr` sptr)
canonErr p = return . Left
$ "non-canonical encoding detected at offset: "
++ show (p `minusPtr` sptr)
look :: Ptr Word8 -> IO Word32
look !p = do
!i <- peek p
!v <- peekElemOff dtable (fromIntegral i)
return (fromIntegral v)
go !dst !src
| plusPtr src 4 >= end = do
!a <- look src
!b <- look (src `plusPtr` 1)
!c <- look (src `plusPtr` 2)
!d <- look (src `plusPtr` 3)
finalChunk dst src a b c d
| otherwise = do
!a <- look src
!b <- look (src `plusPtr` 1)
!c <- look (src `plusPtr` 2)
!d <- look (src `plusPtr` 3)
decodeChunk dst src a b c d
-- | Decodes chunks of 4 bytes at a time, recombining into
-- 3 bytes. Note that in the inner loop stage, no padding
-- characters are admissible.
--
decodeChunk !dst !src !a !b !c !d
| a == 0x63 = padErr src
| b == 0x63 = padErr (plusPtr src 1)
| c == 0x63 = padErr (plusPtr src 2)
| d == 0x63 = padErr (plusPtr src 3)
| a == 0xff = err src
| b == 0xff = err (plusPtr src 1)
| c == 0xff = err (plusPtr src 2)
| d == 0xff = err (plusPtr src 3)
| otherwise = do
let !w = (shiftL a 18
.|. shiftL b 12
.|. shiftL c 6
.|. d) :: Word32
poke8 dst (fromIntegral (shiftR w 16))
poke8 (plusPtr dst 1) (fromIntegral (shiftR w 8))
poke8 (plusPtr dst 2) (fromIntegral w)
go (plusPtr dst 3) (plusPtr src 4)
-- | Decode the final 4 bytes in the string, recombining into
-- 3 bytes. Note that in this stage, we can have padding chars
-- but only in the final 2 positions.
--
finalChunk !dst !src a b c d
| a == 0x63 = padErr src
| b == 0x63 = padErr (plusPtr src 1)
| c == 0x63 && d /= 0x63 = err (plusPtr src 3) -- make sure padding is coherent.
| a == 0xff = err src
| b == 0xff = err (plusPtr src 1)
| c == 0xff = err (plusPtr src 2)
| d == 0xff = err (plusPtr src 3)
| otherwise = do
let !w = (shiftL a 18
.|. shiftL b 12
.|. shiftL c 6
.|. d) :: Word32
poke8 dst (fromIntegral (shiftR w 16))
if c == 0x63 && d == 0x63
then
if sanityCheckPos b mask_4bits
then return $ Right $ mkBS dfp (1 + (dst `minusPtr` dptr))
else canonErr (plusPtr src 1)
else if d == 0x63
then
if sanityCheckPos c mask_2bits
then do
poke8 (plusPtr dst 1) (fromIntegral (shiftR w 8))
return $ Right $ mkBS dfp (2 + (dst `minusPtr` dptr))
else canonErr (plusPtr src 2)
else do
poke8 (plusPtr dst 1) (fromIntegral (shiftR w 8))
poke8 (plusPtr dst 2) (fromIntegral w)
return $ Right $ mkBS dfp (3 + (dst `minusPtr` dptr))
-- | Decode a base64-encoded string. This function is lenient in
-- following the specification from
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>, and will not
-- generate parse errors no matter how poor its input. This function
-- takes the decoding table (for @base64@ or @base64url@) as the first
-- paramert.
decodeLenientWithTable :: ForeignPtr Word8 -> ByteString -> ByteString
decodeLenientWithTable !decodeFP !bs = withBS bs go
where
go !sptr !slen
| dlen <= 0 = return B.empty
| otherwise = do
dfp <- mallocByteString dlen
withForeignPtr decodeFP $ \ !decptr -> do
let finish dbytes
| dbytes > 0 = return $ mkBS dfp dbytes
| otherwise = return B.empty
sEnd = sptr `plusPtr` slen
fill !dp !sp !n
| sp >= sEnd = finish n
| otherwise = {-# SCC "decodeLenientWithTable/fill" #-}
let look :: Bool -> Ptr Word8
-> (Ptr Word8 -> Word32 -> IO ByteString)
-> IO ByteString
{-# INLINE look #-}
look skipPad p0 f = go' p0
where
go' p | p >= sEnd = f (sEnd `plusPtr` (-1)) done
| otherwise = {-# SCC "decodeLenient/look" #-} do
ix <- fromIntegral `fmap` peek8 p
v <- peek8 (decptr `plusPtr` ix)
if v == x || v == done && skipPad
then go' (p `plusPtr` 1)
else f (p `plusPtr` 1) (fromIntegral v)
in look True sp $ \ !aNext !aValue ->
look True aNext $ \ !bNext !bValue ->
if aValue == done || bValue == done
then finish n
else
look False bNext $ \ !cNext !cValue ->
look False cNext $ \ !dNext !dValue -> do
let w = aValue `shiftL` 18 .|. bValue `shiftL` 12 .|.
cValue `shiftL` 6 .|. dValue
poke8 dp $ fromIntegral (w `shiftR` 16)
if cValue == done
then finish (n + 1)
else do
poke8 (dp `plusPtr` 1) $ fromIntegral (w `shiftR` 8)
if dValue == done
then finish (n + 2)
else do
poke8 (dp `plusPtr` 2) $ fromIntegral w
fill (dp `plusPtr` 3) dNext (n+3)
withForeignPtr dfp $ \dptr -> fill dptr sptr 0
where
!dlen = (slen + 3) `div` 4 * 3
x :: Integral a => a
x = 255
{-# INLINE x #-}
done :: Integral a => a
done = 99
{-# INLINE done #-}
-- This takes a list of ByteStrings, and returns a list in which each
-- (apart from possibly the last) has length that is a multiple of n
reChunkIn :: Int -> [ByteString] -> [ByteString]
reChunkIn !n = go
where
go [] = []
go (y : ys) = case B.length y `divMod` n of
(_, 0) -> y : go ys
(d, _) -> case B.splitAt (d * n) y of
(prefix, suffix) -> prefix : fixup suffix ys
fixup acc [] = [acc]
fixup acc (z : zs) = case B.splitAt (n - B.length acc) z of
(prefix, suffix) ->
let acc' = acc `B.append` prefix
in if B.length acc' == n
then let zs' = if B.null suffix
then zs
else suffix : zs
in acc' : go zs'
else -- suffix must be null
fixup acc' zs
-- $Validation
--
-- This function checks that the last char of a bytestring is '='
-- and, if true, fails with a message or completes some io action.
--
-- This is necessary to check when decoding permissively (i.e. filling in padding chars).
-- Consider the following 4 cases of a string of length l:
--
-- l = 0 mod 4: No pad chars are added, since the input is assumed to be good.
-- l = 1 mod 4: Never an admissible length in base64
-- l = 2 mod 4: 2 padding chars are added. If padding chars are present in the last 4 chars of the string,
-- they will fail to decode as final quanta.
-- l = 3 mod 4: 1 padding char is added. In this case a string is of the form <body> + <padchar>. If adding the
-- pad char "completes" the string so that it is `l = 0 mod 4`, then this may possibly form corrupted data.
-- This case is degenerate and should be disallowed.
--
-- Hence, permissive decodes should only fill in padding chars when it makes sense to add them. That is,
-- if an input is degenerate, it should never succeed when we add padding chars. We need the following invariant to hold:
--
-- @
-- B64U.decodeUnpadded <|> B64U.decodePadded ~ B64U.decodePadded
-- @
--
-- This means the only char we need to check is the last one, and only to disallow `l = 3 mod 4`.
--
validateLastPad
:: ByteString
-- ^ input to validate
-> String
-- ^ error msg
-> Either String ByteString
-> Either String ByteString
validateLastPad !bs err !io
| B.last bs == 0x3d = Left err
| otherwise = io
{-# INLINE validateLastPad #-}
-- | Sanity check an index against a bitmask to make sure
-- it's coherent. If pos & mask == 0, we're good. If not, we should fail.
--
sanityCheckPos :: Word32 -> Word8 -> Bool
sanityCheckPos pos mask = fromIntegral pos .&. mask == 0
{-# INLINE sanityCheckPos #-}
-- | Mask 2 bits
--
mask_2bits :: Word8
mask_2bits = 3 -- (1 << 2) - 1
{-# NOINLINE mask_2bits #-}
-- | Mask 4 bits
--
mask_4bits :: Word8
mask_4bits = 15 -- (1 << 4) - 1
{-# NOINLINE mask_4bits #-}
-- | Back-compat shim for bytestring >=0.11. Constructs a
-- bytestring from a foreign ptr and a length. Offset is 0.
--
mkBS :: ForeignPtr Word8 -> Int -> ByteString
#if MIN_VERSION_bytestring(0,11,0)
mkBS dfp n = BS dfp n
#else
mkBS dfp n = PS dfp 0 n
#endif
{-# INLINE mkBS #-}
-- | Back-compat shim for bytestring >=0.11. Unwraps the foreign ptr of
-- a bytestring, executing an IO action as a function of the underlying
-- pointer and some starting length.
--
-- Note: in `unsafePerformIO`.
--
withBS :: ByteString -> (Ptr Word8 -> Int -> IO a) -> a
#if MIN_VERSION_bytestring(0,11,0)
withBS (BS !sfp !slen) f = unsafePerformIO $
withForeignPtr sfp $ \p -> f p slen
#else
withBS (PS !sfp !soff !slen) f = unsafePerformIO $
withForeignPtr sfp $ \p -> f (plusPtr p soff) slen
#endif
{-# INLINE withBS #-}

View file

@ -0,0 +1,64 @@
{-# LANGUAGE CPP #-}
#if __GLASGOW_HASKELL__ >= 702
{-# LANGUAGE Trustworthy #-}
#endif
-- |
-- Module : Data.ByteString.Base64.Lazy
-- Copyright : (c) 2012 Ian Lynagh
--
-- License : BSD-style
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>,
-- Herbert Valerio Riedel <hvr@gnu.org>,
-- Mikhail Glushenkov <mikhail.glushenkov@gmail.com>
-- Stability : experimental
-- Portability : GHC
--
-- Fast and efficient encoding and decoding of base64-encoded
-- lazy bytestrings.
--
-- @since 1.0.0.0
module Data.ByteString.Base64.Lazy
(
encode
, decode
, decodeLenient
) where
import Data.ByteString.Base64.Internal
import qualified Data.ByteString.Base64 as B64
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Char8 as LC
import Data.Char
-- | Encode a string into base64 form. The result will always be a
-- multiple of 4 bytes in length.
encode :: L.ByteString -> L.ByteString
encode = L.fromChunks . map B64.encode . reChunkIn 3 . L.toChunks
-- | Decode a base64-encoded string. This function strictly follows
-- the specification in
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>.
decode :: L.ByteString -> Either String L.ByteString
decode b = -- Returning an Either type means that the entire result will
-- need to be in memory at once anyway, so we may as well
-- keep it simple and just convert to and from a strict byte
-- string
-- TODO: Use L.{fromStrict,toStrict} once we can rely on
-- a new enough bytestring
case B64.decode $ S.concat $ L.toChunks b of
Left err -> Left err
Right b' -> Right $ L.fromChunks [b']
-- | Decode a base64-encoded string. This function is lenient in
-- following the specification from
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>, and will not generate
-- parse errors no matter how poor its input.
decodeLenient :: L.ByteString -> L.ByteString
decodeLenient = L.fromChunks . map B64.decodeLenient . reChunkIn 4 . L.toChunks
. LC.filter goodChar
where -- We filter out and '=' padding here, but B64.decodeLenient
-- handles that
goodChar c = isDigit c || isAsciiUpper c || isAsciiLower c
|| c == '+' || c == '/'

View file

@ -0,0 +1,99 @@
{-# LANGUAGE CPP #-}
#if __GLASGOW_HASKELL__ >= 702
{-# LANGUAGE Trustworthy #-}
#endif
-- |
-- Module : Data.ByteString.Base64.URL
-- Copyright : (c) 2012 Deian Stefan
--
-- License : BSD-style
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>,
-- Herbert Valerio Riedel <hvr@gnu.org>,
-- Mikhail Glushenkov <mikhail.glushenkov@gmail.com>
-- Stability : experimental
-- Portability : GHC
--
-- Fast and efficient encoding and decoding of base64url-encoded strings.
--
-- @since 0.1.1.0
module Data.ByteString.Base64.URL
( encode
, encodeUnpadded
, decode
, decodePadded
, decodeUnpadded
, decodeLenient
) where
import Data.ByteString.Base64.Internal
import qualified Data.ByteString as B
import Data.ByteString.Internal (ByteString(..))
import Data.Word (Word8)
import Foreign.ForeignPtr (ForeignPtr)
-- | Encode a string into base64url form. The result will always be a
-- multiple of 4 bytes in length.
encode :: ByteString -> ByteString
encode = encodeWith Padded (mkEncodeTable alphabet)
-- | Encode a string into unpadded base64url form.
--
-- @since 1.1.0.0
encodeUnpadded :: ByteString -> ByteString
encodeUnpadded = encodeWith Unpadded (mkEncodeTable alphabet)
-- | Decode a base64url-encoded string applying padding if necessary.
-- This function follows the specification in <http://tools.ietf.org/rfc/rfc4648 RFC 4648>
-- and in <https://tools.ietf.org/html/rfc7049#section-2.4.4.2 RFC 7049 2.4>
decode :: ByteString -> Either String ByteString
decode = decodeWithTable Don'tCare decodeFP
-- | Decode a padded base64url-encoded string, failing if input is improperly padded.
-- This function follows the specification in <http://tools.ietf.org/rfc/rfc4648 RFC 4648>
-- and in <https://tools.ietf.org/html/rfc7049#section-2.4.4.2 RFC 7049 2.4>
--
-- @since 1.1.0.0
decodePadded :: ByteString -> Either String ByteString
decodePadded = decodeWithTable Padded decodeFP
-- | Decode a unpadded base64url-encoded string, failing if input is padded.
-- This function follows the specification in <http://tools.ietf.org/rfc/rfc4648 RFC 4648>
-- and in <https://tools.ietf.org/html/rfc7049#section-2.4.4.2 RFC 7049 2.4>
--
-- @since 1.1.0.0
decodeUnpadded :: ByteString -> Either String ByteString
decodeUnpadded = decodeWithTable Unpadded decodeFP
-- | Decode a base64url-encoded string. This function is lenient in
-- following the specification from
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>, and will not
-- generate parse errors no matter how poor its input.
decodeLenient :: ByteString -> ByteString
decodeLenient = decodeLenientWithTable decodeFP
alphabet :: ByteString
alphabet = B.pack $ [65..90] ++ [97..122] ++ [48..57] ++ [45,95]
{-# NOINLINE alphabet #-}
decodeFP :: ForeignPtr Word8
#if MIN_VERSION_bytestring(0,11,0)
BS decodeFP _ =
#else
PS decodeFP _ _ =
#endif
B.pack $ replicate 45 x
++ [62,x,x]
++ [52..61]
++ [x,x,x,done,x,x,x]
++ [0..25]
++ [x,x,x,x,63,x]
++ [26..51]
++ replicate 133 x
{-# NOINLINE decodeFP #-}
x :: Integral a => a
x = 255
{-# INLINE x #-}

View file

@ -0,0 +1,95 @@
{-# LANGUAGE CPP #-}
#if __GLASGOW_HASKELL__ >= 702
{-# LANGUAGE Trustworthy #-}
#endif
-- |
-- Module : Data.ByteString.Base64.URL.Lazy
-- Copyright : (c) 2012 Ian Lynagh
--
-- License : BSD-style
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>,
-- Herbert Valerio Riedel <hvr@gnu.org>,
-- Mikhail Glushenkov <mikhail.glushenkov@gmail.com>
-- Stability : experimental
-- Portability : GHC
--
-- Fast and efficient encoding and decoding of base64-encoded
-- lazy bytestrings.
--
-- @since 1.0.0.0
module Data.ByteString.Base64.URL.Lazy
(
encode
, encodeUnpadded
, decode
, decodeUnpadded
, decodePadded
, decodeLenient
) where
import Data.ByteString.Base64.Internal
import qualified Data.ByteString.Base64.URL as B64
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Char8 as LC
import Data.Char
-- | Encode a string into base64 form. The result will always be a
-- multiple of 4 bytes in length.
encode :: L.ByteString -> L.ByteString
encode = L.fromChunks . map B64.encode . reChunkIn 3 . L.toChunks
-- | Encode a string into unpadded base64url form.
--
-- @since 1.1.0.0
encodeUnpadded :: L.ByteString -> L.ByteString
encodeUnpadded = L.fromChunks
. map B64.encodeUnpadded
. reChunkIn 3
. L.toChunks
-- | Decode a base64-encoded string. This function strictly follows
-- the specification in
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>.
decode :: L.ByteString -> Either String L.ByteString
decode b = -- Returning an Either type means that the entire result will
-- need to be in memory at once anyway, so we may as well
-- keep it simple and just convert to and from a strict byte
-- string
-- TODO: Use L.{fromStrict,toStrict} once we can rely on
-- a new enough bytestring
case B64.decode $ S.concat $ L.toChunks b of
Left err -> Left err
Right b' -> Right $ L.fromChunks [b']
-- | Decode a unpadded base64url-encoded string, failing if input is padded.
-- This function follows the specification in <http://tools.ietf.org/rfc/rfc4648 RFC 4648>
-- and in <https://tools.ietf.org/html/rfc7049#section-2.4.4.2 RFC 7049 2.4>
--
-- @since 1.1.0.0
decodeUnpadded :: L.ByteString -> Either String L.ByteString
decodeUnpadded bs = case B64.decodeUnpadded $ S.concat $ L.toChunks bs of
Right b -> Right $ L.fromChunks [b]
Left e -> Left e
-- | Decode a padded base64url-encoded string, failing if input is improperly padded.
-- This function follows the specification in <http://tools.ietf.org/rfc/rfc4648 RFC 4648>
-- and in <https://tools.ietf.org/html/rfc7049#section-2.4.4.2 RFC 7049 2.4>
--
-- @since 1.1.0.0
decodePadded :: L.ByteString -> Either String L.ByteString
decodePadded bs = case B64.decodePadded $ S.concat $ L.toChunks bs of
Right b -> Right $ L.fromChunks [b]
Left e -> Left e
-- | Decode a base64-encoded string. This function is lenient in
-- following the specification from
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>, and will not generate
-- parse errors no matter how poor its input.
decodeLenient :: L.ByteString -> L.ByteString
decodeLenient = L.fromChunks . map B64.decodeLenient . reChunkIn 4 . L.toChunks
. LC.filter goodChar
where -- We filter out and '=' padding here, but B64.decodeLenient
-- handles that
goodChar c = isAlphaNum c || c == '-' || c == '_'

View file

@ -0,0 +1,94 @@
{-# LANGUAGE OverloadedStrings, Safe #-}
module Data.ByteString.Builder.Scientific
( scientificBuilder
, formatScientificBuilder
, FPFormat(..)
) where
import Data.Scientific (Scientific)
import qualified Data.Scientific as Scientific
import Data.Text.Lazy.Builder.RealFloat (FPFormat(..))
import qualified Data.ByteString.Char8 as BC8
import Data.ByteString.Builder (Builder, string8, char8, intDec)
import Data.ByteString.Builder.Extra (byteStringCopy)
import Utils (roundTo, i2d)
import Data.Monoid ((<>))
-- | A @ByteString@ @Builder@ which renders a scientific number to full
-- precision, using standard decimal notation for arguments whose
-- absolute value lies between @0.1@ and @9,999,999@, and scientific
-- notation otherwise.
scientificBuilder :: Scientific -> Builder
scientificBuilder = formatScientificBuilder Generic Nothing
-- | Like 'scientificBuilder' but provides rendering options.
formatScientificBuilder :: FPFormat
-> Maybe Int -- ^ Number of decimal places to render.
-> Scientific
-> Builder
formatScientificBuilder fmt decs scntfc
| scntfc < 0 = char8 '-' <> doFmt fmt (Scientific.toDecimalDigits (-scntfc))
| otherwise = doFmt fmt (Scientific.toDecimalDigits scntfc)
where
doFmt format (is, e) =
let ds = map i2d is in
case format of
Generic ->
doFmt (if e < 0 || e > 7 then Exponent else Fixed)
(is,e)
Exponent ->
case decs of
Nothing ->
let show_e' = intDec (e-1) in
case ds of
"0" -> byteStringCopy "0.0e0"
[d] -> char8 d <> byteStringCopy ".0e" <> show_e'
(d:ds') -> char8 d <> char8 '.' <> string8 ds' <> char8 'e' <> show_e'
[] -> error $ "Data.ByteString.Builder.Scientific.formatScientificBuilder" ++
"/doFmt/Exponent: []"
Just dec ->
let dec' = max dec 1 in
case is of
[0] -> byteStringCopy "0." <>
byteStringCopy (BC8.replicate dec' '0') <>
byteStringCopy "e0"
_ ->
let (ei,is') = roundTo (dec'+1) is
in case map i2d (if ei > 0 then init is' else is') of
[] -> mempty
d:ds' -> char8 d <> char8 '.' <> string8 ds' <> char8 'e' <> intDec (e-1+ei)
Fixed ->
let
mk0 ls = case ls of { "" -> char8 '0' ; _ -> string8 ls}
in
case decs of
Nothing
| e <= 0 -> byteStringCopy "0." <>
byteStringCopy (BC8.replicate (-e) '0') <>
string8 ds
| otherwise ->
let
f 0 s rs = mk0 (reverse s) <> char8 '.' <> mk0 rs
f n s "" = f (n-1) ('0':s) ""
f n s (r:rs) = f (n-1) (r:s) rs
in
f e "" ds
Just dec ->
let dec' = max dec 0 in
if e >= 0 then
let
(ei,is') = roundTo (dec' + e) is
(ls,rs) = splitAt (e+ei) (map i2d is')
in
mk0 ls <> (if null rs then mempty else char8 '.' <> string8 rs)
else
let (ei,is') = roundTo dec' (replicate (-e) 0 ++ is)
in case map i2d (if ei > 0 then is' else 0:is') of
[] -> mempty
d:ds' -> char8 d <> (if null ds' then mempty else char8 '.' <> string8 ds')

View file

@ -0,0 +1,243 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Trustworthy #-}
-- |
-- Module : Data.ByteString.Lazy.Base32
-- Copyright : (c) 2019-2023 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : stable
-- Portability : non-portable
--
-- This module contains 'Data.ByteString.Lazy.ByteString'-valued combinators for
-- implementing the RFC 4648 specification of the Base32
-- encoding format. This includes strictly padded/unpadded
-- decoding variants, as well as internal and external validation for canonicity.
--
module Data.ByteString.Lazy.Base32
( -- * Encoding
encodeBase32
, encodeBase32'
, encodeBase32Unpadded
, encodeBase32Unpadded'
-- * Decoding
, decodeBase32
, decodeBase32Unpadded
, decodeBase32Padded
--, decodeBase32Lenient
-- * Validation
, isBase32
, isValidBase32
) where
import Prelude hiding (all, elem)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Base32 as B32
import Data.ByteString.Base32.Internal.Utils (reChunkN)
import Data.ByteString.Lazy (elem, fromChunks, toChunks)
import Data.ByteString.Lazy.Internal (ByteString(..))
import Data.Either (isRight)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
-- | Encode a 'ByteString' value as a Base32 'Text' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32 "Sun"
-- "KN2W4==="
--
encodeBase32 :: ByteString -> TL.Text
encodeBase32 = TL.decodeUtf8 . encodeBase32'
{-# INLINE encodeBase32 #-}
-- | Encode a 'ByteString' as a Base32 'ByteString' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32 "Sun"
-- "KN2W4==="
--
encodeBase32' :: ByteString -> ByteString
encodeBase32' = fromChunks
. fmap B32.encodeBase32'
. reChunkN 5
. toChunks
-- | Decode an arbitrarily padded Base32 encoded 'ByteString' value. If its length is not a multiple
-- of 4, then padding chars will be added to fill out the input to a multiple of
-- 4 for safe decoding as Base32-encoded values are optionally padded.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32 "KN2W4==="
-- Right "Sun"
--
-- >>> decodeBase32 "KN2W4"
-- Right "Sun"
--
-- >>> decodeBase32 "KN2W==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32 :: ByteString -> Either T.Text ByteString
decodeBase32 = fmap (fromChunks . (:[]))
. B32.decodeBase32
. BS.concat
. toChunks
{-# INLINE decodeBase32 #-}
-- | Encode a 'ByteString' value as Base32 'Text' without padding. Note that for Base32,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base32 and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded "Sun"
-- "KN2W4"
--
encodeBase32Unpadded :: ByteString -> TL.Text
encodeBase32Unpadded = TL.decodeUtf8 . encodeBase32Unpadded'
{-# INLINE encodeBase32Unpadded #-}
-- | Encode a 'ByteString' value as Base32 without padding. Note that for Base32,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base32 and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "KN2W4"
--
encodeBase32Unpadded' :: ByteString -> ByteString
encodeBase32Unpadded' = fromChunks
. fmap B32.encodeBase32Unpadded'
. reChunkN 5
. toChunks
-- | Decode an unpadded Base32-encoded 'ByteString' value. Input strings are
-- required to be unpadded, and will undergo validation prior to decoding to
-- confirm.
--
-- In general, unless unpadded Base32 is explicitly required, it is
-- safer to call 'decodeBase32'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32Unpadded "KN2W4"
-- Right "Sun"
--
-- >>> decodeBase32Unpadded "KN2W4==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32Unpadded :: ByteString -> Either T.Text ByteString
decodeBase32Unpadded = fmap (fromChunks . (:[]))
. B32.decodeBase32Unpadded
. BS.concat
. toChunks
{-# INLINE decodeBase32Unpadded #-}
-- | Decode a padded Base32-encoded 'ByteString' value. Input strings are
-- required to be correctly padded, and will be validated prior to decoding
-- to confirm.
--
-- In general, unless padded Base32 is explicitly required, it is
-- safer to call 'decodeBase32'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32Padded "KN2W4==="
-- Right "Sun"
--
-- >>> decodeBase32Padded "KN2W4"
-- Left "Base32-encoded bytestring requires padding"
--
decodeBase32Padded :: ByteString -> Either T.Text ByteString
decodeBase32Padded = fmap (fromChunks . (:[]))
. B32.decodeBase32Padded
. BS.concat
. toChunks
{-# INLINE decodeBase32Padded #-}
-- -- | Leniently decode an unpadded Base32-encoded 'ByteString'. This function
-- -- will not generate parse errors. If input data contains padding chars,
-- -- then the input will be parsed up until the first pad character.
-- --
-- -- __Note:__ This is not RFC 4648-compliant.
-- --
-- -- === __Examples__:
-- --
-- -- >>> decodeBase32Lenient "PDw_Pj4="
-- -- "<<?>>"
-- --
-- -- >>> decodeBase32Lenient "PDw_%%%$}Pj4"
-- -- "<<?>>"
-- --
-- decodeBase32Lenient :: ByteString -> ByteString
-- decodeBase32Lenient = fromChunks
-- . fmap B32.decodeBase32Lenient
-- . reChunkN 8
-- . fmap (BS.filter (flip elem "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567="))
-- . toChunks
-- {-# INLINE decodeBase32Lenient #-}
-- | Tell whether a 'ByteString' is Base32-encoded.
--
-- === __Examples__:
--
-- >>> isBase32 "KN2W4"
-- True
--
-- >>> isBase32 "KN2W4==="
-- True
--
-- >>> isBase32 "KN2W4=="
-- False
--
isBase32 :: ByteString -> Bool
isBase32 bs = isValidBase32 bs && isRight (decodeBase32 bs)
{-# INLINE isBase32 #-}
-- | Tell whether a 'ByteString' is a valid Base32 format.
--
-- This will not tell you whether or not this is a correct Base32 representation,
-- only that it conforms to the correct shape. To check whether it is a true
-- Base32 encoded 'ByteString' value, use 'isBase32'.
--
-- === __Examples__:
--
-- >>> isValidBase32 "KN2W4"
-- True
--
-- >>> isValidBase32 "KN2W4="
-- False
--
-- >>> isValidBase32 "KN2W4%"
-- False
--
isValidBase32 :: ByteString -> Bool
isValidBase32 = go . toChunks
where
go [] = True
go [c] = B32.isValidBase32 c
go (c:cs) = -- note the lack of padding char
BS.all (flip elem "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567") c
&& go cs
{-# INLINE isValidBase32 #-}

View file

@ -0,0 +1,244 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Trustworthy #-}
-- |
-- Module : Data.ByteString.Lazy.Base32.Hex
-- Copyright : (c) 2019-2023 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : stable
-- Portability : non-portable
--
-- This module contains 'Data.ByteString.Lazy.ByteString'-valued combinators for
-- implementing the RFC 4648 specification of the Base32hex
-- encoding format. This includes strictly padded/unpadded
-- decoding variants, as well as internal and external validation for canonicity.
--
module Data.ByteString.Lazy.Base32.Hex
( -- * Encoding
encodeBase32
, encodeBase32'
, encodeBase32Unpadded
, encodeBase32Unpadded'
-- * Decoding
, decodeBase32
, decodeBase32Unpadded
, decodeBase32Padded
--, decodeBase32Lenient
-- * Validation
, isBase32Hex
, isValidBase32Hex
) where
import Prelude hiding (all, elem)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Base32.Hex as B32H
import Data.ByteString.Base32.Internal.Utils (reChunkN)
import Data.ByteString.Lazy (elem, fromChunks, toChunks)
import Data.ByteString.Lazy.Internal (ByteString(..))
import Data.Either (isRight)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
-- | Encode a 'ByteString' value as a Base32hex 'Text' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32 "Sun"
-- "ADQMS==="
--
encodeBase32 :: ByteString -> TL.Text
encodeBase32 = TL.decodeUtf8 . encodeBase32'
{-# INLINE encodeBase32 #-}
-- | Encode a 'ByteString' as a Base32hex 'ByteString' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32' "Sun"
-- "ADQMS==="
--
encodeBase32' :: ByteString -> ByteString
encodeBase32' = fromChunks
. fmap B32H.encodeBase32'
. reChunkN 5
. toChunks
-- | Decode an arbitrarily padded Base32hex encoded 'ByteString' value. If its length is not a multiple
-- of 4, then padding chars will be added to fill out the input to a multiple of
-- 4 for safe decoding as Base32hex-encoded values are optionally padded.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32 "ADQMS==="
-- Right "Sun"
--
-- >>> decodeBase32 "ADQMS"
-- Right "Sun"
--
-- >>> decodeBase32 "ADQMS==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32 :: ByteString -> Either T.Text ByteString
decodeBase32 = fmap (fromChunks . (:[]))
. B32H.decodeBase32
. BS.concat
. toChunks
{-# INLINE decodeBase32 #-}
-- | Encode a 'ByteString' value as Base32hex 'Text' without padding. Note that for Base32hex,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base32hex and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded "Sun"
-- "ADQMS"
--
encodeBase32Unpadded :: ByteString -> TL.Text
encodeBase32Unpadded = TL.decodeUtf8 . encodeBase32Unpadded'
{-# INLINE encodeBase32Unpadded #-}
-- | Encode a 'ByteString' value as Base32hex without padding. Note that for Base32hex,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base32hex and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "ADQMS"
--
encodeBase32Unpadded' :: ByteString -> ByteString
encodeBase32Unpadded' = fromChunks
. fmap B32H.encodeBase32Unpadded'
. reChunkN 5
. toChunks
-- | Decode an unpadded Base32hex-encoded 'ByteString' value. Input strings are
-- required to be unpadded, and will undergo validation prior to decoding to
-- confirm.
--
-- In general, unless unpadded Base32hex is explicitly required, it is
-- safer to call 'decodeBase32'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32Unpadded "ADQMS"
-- Right "Sun"
--
-- >>> decodeBase32Unpadded "ADQMS==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32Unpadded :: ByteString -> Either T.Text ByteString
decodeBase32Unpadded = fmap (fromChunks . (:[]))
. B32H.decodeBase32Unpadded
. BS.concat
. toChunks
{-# INLINE decodeBase32Unpadded #-}
-- | Decode a padded Base32hex-encoded 'ByteString' value. Input strings are
-- required to be correctly padded, and will be validated prior to decoding
-- to confirm.
--
-- In general, unless padded Base32hex is explicitly required, it is
-- safer to call 'decodeBase32'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32Padded "ADQMS==="
-- Right "Sun"
--
-- >>> decodeBase32Padded "ADQMS"
-- Left "Base32-encoded bytestring requires padding"
--
decodeBase32Padded :: ByteString -> Either T.Text ByteString
decodeBase32Padded = fmap (fromChunks . (:[]))
. B32H.decodeBase32Padded
. BS.concat
. toChunks
{-# INLINE decodeBase32Padded #-}
-- -- | Leniently decode an unpadded Base32hex-encoded 'ByteString'. This function
-- -- will not generate parse errors. If input data contains padding chars,
-- -- then the input will be parsed up until the first pad character.
-- --
-- -- __Note:__ This is not RFC 4648-compliant.
-- --
-- -- === __Examples__:
-- --
-- -- >>> decodeBase32Lenient "PDw_Pj4="
-- -- "<<?>>"
-- --
-- -- >>> decodeBase32Lenient "PDw_%%%$}Pj4"
-- -- "<<?>>"
-- --
-- decodeBase32Lenient :: ByteString -> ByteString
-- decodeBase32Lenient = fromChunks
-- . fmap B32H.decodeBase32Lenient
-- . reChunkN 8
-- . fmap (BS.filter (flip elem "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567="))
-- . toChunks
-- {-# INLINE decodeBase32Lenient #-}
-- | Tell whether a 'ByteString' is Base32hex-encoded.
--
-- === __Examples__:
--
-- >>> isBase32Hex "ADQMS"
-- True
--
-- >>> isBase32Hex "ADQMS==="
-- True
--
-- >>> isBase32Hex "ADQMS=="
-- False
--
isBase32Hex :: ByteString -> Bool
isBase32Hex bs = isValidBase32Hex bs && isRight (decodeBase32 bs)
{-# INLINE isBase32Hex #-}
-- | Tell whether a 'ByteString' is a valid Base32hex format.
--
-- This will not tell you whether or not this is a correct Base32hex representation,
-- only that it conforms to the correct shape. To check whether it is a true
-- Base32hex encoded 'ByteString' value, use 'isBase32Hex'.
--
-- === __Examples__:
--
--
-- >>> isValidBase32Hex "ADQMS"
-- True
--
-- >>> isValidBase32Hex "ADQMS="
-- False
--
-- >>> isValidBase32Hex "ADQMS%"
-- False
--
isValidBase32Hex :: ByteString -> Bool
isValidBase32Hex = go . toChunks
where
go [] = True
go [c] = B32H.isValidBase32Hex c
go (c:cs) = -- note the lack of padding char
BS.all (flip elem "0123456789ABCDEFGHIJKLMNOPQRSTUV") c
&& go cs
{-# INLINE isValidBase32Hex #-}

View file

@ -0,0 +1,212 @@
{-# LANGUAGE Trustworthy #-}
-- |
-- Module : Data.ByteString.Short.Base32
-- Copyright : (c) 2019-2023 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : stable
-- Portability : non-portable
--
-- This module contains 'Data.ByteString.Short.ShortByteString'-valued combinators for
-- implementing the RFC 4648 specification of the Base32
-- encoding format. This includes strictly padded/unpadded decoding
-- variants, as well as internal and external validation for canonicity.
--
module Data.ByteString.Short.Base32
( -- * Encoding
encodeBase32
, encodeBase32'
, encodeBase32Unpadded
, encodeBase32Unpadded'
-- * Decoding
, decodeBase32
, decodeBase32Unpadded
, decodeBase32Padded
--, decodeBase32Lenient
-- * Validation
, isBase32
, isValidBase32
) where
import qualified Data.ByteString.Base32 as B32
import Data.ByteString.Short (ShortByteString, fromShort, toShort)
import Data.Text (Text)
import Data.Text.Short (ShortText)
import Data.Text.Short.Unsafe (fromShortByteStringUnsafe)
-- | Encode a 'ShortByteString' value as a Base32 'Text' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32 "Sun"
-- "KN2W4==="
--
encodeBase32 :: ShortByteString -> ShortText
encodeBase32 = fromShortByteStringUnsafe . encodeBase32'
{-# INLINE encodeBase32 #-}
-- | Encode a 'ShortByteString' as a Base32 'ShortByteString' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32' "Sun"
-- "KN2W4==="
--
encodeBase32' :: ShortByteString -> ShortByteString
encodeBase32' = toShort . B32.encodeBase32' . fromShort
-- | Decode an arbitrarily padded Base32 encoded 'ShortByteString' value. If its length is not a multiple
-- of 4, then padding chars will be added to fill out the input to a multiple of
-- 4 for safe decoding as Base32-encoded values are optionally padded.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32 "KN2W4==="
-- Right "Sun"
--
-- >>> decodeBase32 "KN2W4"
-- Right "Sun"
--
-- >>> decodeBase32 "KN2W==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32 :: ShortByteString -> Either Text ShortByteString
decodeBase32 = fmap toShort . B32.decodeBase32 . fromShort
{-# INLINE decodeBase32 #-}
-- | Encode a 'ShortByteString' value as Base32 'Text' without padding. Note that for Base32,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base32 and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded "Sun"
-- "KN2W4"
--
encodeBase32Unpadded :: ShortByteString -> ShortText
encodeBase32Unpadded = fromShortByteStringUnsafe . encodeBase32Unpadded'
{-# INLINE encodeBase32Unpadded #-}
-- | Encode a 'ShortByteString' value as Base32 without padding. Note that for Base32,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base32 and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "KN2W4"
--
encodeBase32Unpadded' :: ShortByteString -> ShortByteString
encodeBase32Unpadded' = toShort . B32.encodeBase32Unpadded' . fromShort
-- | Decode an unpadded Base32-encoded 'ShortByteString' value. Input strings are
-- required to be unpadded, and will undergo validation prior to decoding to
-- confirm.
--
-- In general, unless unpadded Base32 is explicitly required, it is
-- safer to call 'decodeBase32'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32Unpadded "KN2W4"
-- Right "Sun"
--
-- >>> decodeBase32Unpadded "KN2W4==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32Unpadded :: ShortByteString -> Either Text ShortByteString
decodeBase32Unpadded = fmap toShort . B32.decodeBase32Unpadded . fromShort
{-# INLINE decodeBase32Unpadded #-}
-- | Decode a padded Base32-encoded 'ShortByteString' value. Input strings are
-- required to be correctly padded, and will be validated prior to decoding
-- to confirm.
--
-- In general, unless padded Base32 is explicitly required, it is
-- safer to call 'decodeBase32'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32Padded "KN2W4==="
-- Right "Sun"
--
-- >>> decodeBase32Padded "KN2W4"
-- Left "Base32-encoded bytestring requires padding"
--
decodeBase32Padded :: ShortByteString -> Either Text ShortByteString
decodeBase32Padded = fmap toShort . B32.decodeBase32Padded . fromShort
{-# INLINE decodeBase32Padded #-}
-- -- | Leniently decode an unpadded Base32-encoded 'ShortByteString'. This function
-- -- will not generate parse errors. If input data contains padding chars,
-- -- then the input will be parsed up until the first pad character.
-- --
-- -- __Note:__ This is not RFC 4648-compliant.
-- --
-- -- === __Examples__:
-- --
-- -- >>> decodeBase32Lenient "PDw_Pj4="
-- -- "<<?>>"
-- --
-- -- >>> decodeBase32Lenient "PDw_%%%$}Pj4"
-- -- "<<?>>"
-- --
-- decodeBase32Lenient :: ShortByteString -> ShortByteString
-- decodeBase32Lenient = toShort . B32.decodeBase32Lenient . fromShort
-- {-# INLINE decodeBase32Lenient #-}
-- | Tell whether a 'ShortByteString' is Base32-encoded.
--
-- === __Examples__:
--
-- >>> isBase32 "KN2W4"
-- True
--
-- >>> isBase32 "KN2W4==="
-- True
--
-- >>> isBase32 "KN2W4=="
-- False
--
isBase32 :: ShortByteString -> Bool
isBase32 = B32.isBase32 . fromShort
{-# INLINE isBase32 #-}
-- | Tell whether a 'ShortByteString' is a valid Base32 format.
--
-- This will not tell you whether or not this is a correct Base32 representation,
-- only that it conforms to the correct shape. To check whether it is a true
-- Base32 encoded 'ShortByteString' value, use 'isBase32'.
--
-- === __Examples__:
--
-- >>> isValidBase32 "KN2W4"
-- True
--
-- >>> isValidBase32 "KN2W4="
-- False
--
-- >>> isValidBase32 "KN2W4%"
-- False
--
isValidBase32 :: ShortByteString -> Bool
isValidBase32 = B32.isValidBase32 . fromShort
{-# INLINE isValidBase32 #-}

View file

@ -0,0 +1,210 @@
{-# LANGUAGE Trustworthy #-}
-- |
-- Module : Data.ByteString.Short.Base32.Hex
-- Copyright : (c) 2019-2023 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : stable
-- Portability : non-portable
--
-- This module contains 'Data.ByteString.Short.ShortByteString'-valued combinators for
-- implementing the RFC 4648 specification of the Base32hex
-- encoding format. This includes strictly padded/unpadded and decoding
-- variants, as well as internal and external validation for canonicity.
--
module Data.ByteString.Short.Base32.Hex
( -- * Encoding
encodeBase32
, encodeBase32'
, encodeBase32Unpadded
, encodeBase32Unpadded'
-- * Decoding
, decodeBase32
, decodeBase32Unpadded
, decodeBase32Padded
--, decodeBase32Lenient
-- * Validation
, isBase32Hex
, isValidBase32Hex
) where
import qualified Data.ByteString.Base32.Hex as B32H
import Data.ByteString.Short (ShortByteString, fromShort, toShort)
import Data.Text (Text)
import Data.Text.Short (ShortText)
import Data.Text.Short.Unsafe (fromShortByteStringUnsafe)
-- | Encode a 'ShortByteString' value as a Base32hex 'Text' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32 "Sun"
-- "ADQMS==="
--
encodeBase32 :: ShortByteString -> ShortText
encodeBase32 = fromShortByteStringUnsafe . encodeBase32'
{-# INLINE encodeBase32 #-}
-- | Encode a 'ShortByteString' as a Base32hex 'ShortByteString' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32' "Sun"
-- "ADQMS==="
--
encodeBase32' :: ShortByteString -> ShortByteString
encodeBase32' = toShort . B32H.encodeBase32' . fromShort
-- | Decode an arbitrarily padded Base32hex encoded 'ShortByteString' value. If its length is not a multiple
-- of 4, then padding chars will be added to fill out the input to a multiple of
-- 4 for safe decoding as Base32hex-encoded values are optionally padded.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32 "ADQMS==="
-- Right "Sun"
--
-- >>> decodeBase32 "ADQMS"
-- Right "Sun"
--
-- >>> decodeBase32 "ADQM==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32 :: ShortByteString -> Either Text ShortByteString
decodeBase32 = fmap toShort . B32H.decodeBase32 . fromShort
{-# INLINE decodeBase32 #-}
-- | Encode a 'ShortByteString' value as Base32hex 'Text' without padding. Note that for Base32hex,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base32hex and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "ADQMS"
--
encodeBase32Unpadded :: ShortByteString -> ShortText
encodeBase32Unpadded = fromShortByteStringUnsafe . encodeBase32Unpadded'
{-# INLINE encodeBase32Unpadded #-}
-- | Encode a 'ShortByteString' value as Base32hex without padding. Note that for Base32hex,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base32hex and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "ADQMS"
--
encodeBase32Unpadded' :: ShortByteString -> ShortByteString
encodeBase32Unpadded' = toShort . B32H.encodeBase32Unpadded' . fromShort
-- | Decode an unpadded Base32hex-encoded 'ShortByteString' value. Input strings are
-- required to be unpadded, and will undergo validation prior to decoding to
-- confirm.
--
-- In general, unless unpadded Base32hex is explicitly required, it is
-- safer to call 'decodeBase32'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32Unpadded "ADQMS"
-- Right "Sun"
--
-- >>> decodeBase32Unpadded "ADQMS==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32Unpadded :: ShortByteString -> Either Text ShortByteString
decodeBase32Unpadded = fmap toShort . B32H.decodeBase32Unpadded . fromShort
{-# INLINE decodeBase32Unpadded #-}
-- | Decode a padded Base32hex-encoded 'ShortByteString' value. Input strings are
-- required to be correctly padded, and will be validated prior to decoding
-- to confirm.
--
-- In general, unless padded Base32hex is explicitly required, it is
-- safer to call 'decodeBase32'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32Padded "ADQMS==="
-- Right "Sun"
--
-- >>> decodeBase32Padded "ADQMS"
-- Left "Base32-encoded bytestring requires padding"
--
decodeBase32Padded :: ShortByteString -> Either Text ShortByteString
decodeBase32Padded = fmap toShort . B32H.decodeBase32Padded . fromShort
{-# INLINE decodeBase32Padded #-}
-- -- | Leniently decode an unpadded Base32hex-encoded 'ShortByteString'. This function
-- -- will not generate parse errors. If input data contains padding chars,
-- -- then the input will be parsed up until the first pad character.
-- --
-- -- __Note:__ This is not RFC 4648-compliant.
-- --
-- -- === __Examples__:
-- --
-- -- >>> decodeBase32Lenient "PDw_Pj4="
-- -- "<<?>>"
-- --
-- -- >>> decodeBase32Lenient "PDw_%%%$}Pj4"
-- -- "<<?>>"
-- --
-- decodeBase32Lenient :: ShortByteString -> ShortByteString
-- decodeBase32Lenient = toShort . B32H.decodeBase32Lenient . fromShort
-- {-# INLINE decodeBase32Lenient #-}
-- | Tell whether a 'ShortByteString' is Base32hex-encoded.
--
-- === __Examples__:
--
-- >>> isBase32Hex "ADQMS"
-- True
--
-- >>> isBase32Hex "ADQMS==="
-- True
--
-- >>> isBase32Hex "ADQMS=="
-- False
--
isBase32Hex :: ShortByteString -> Bool
isBase32Hex = B32H.isBase32Hex . fromShort
{-# INLINE isBase32Hex #-}
-- | Tell whether a 'ShortByteString' is a valid Base32hex format.
--
-- This will not tell you whether or not this is a correct Base32hex representation,
-- only that it conforms to the correct shape. To check whether it is a true
-- Base32 encoded 'ShortByteString' value, use 'isBase32Hex'.
--
-- === __Examples__:
--
-- >>> isValidBase32Hex "ADQMS"
-- True
--
-- >>> isValidBase32Hex "ADQMS="
-- False
--
-- >>> isValidBase32Hex "ADQMS%"
-- False
--
isValidBase32Hex :: ShortByteString -> Bool
isValidBase32Hex = B32H.isValidBase32Hex . fromShort
{-# INLINE isValidBase32Hex #-}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,176 @@
-- |
-- Module : Data.Memory.Encoding.Base16
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : experimental
-- Portability : unknown
--
-- Low-level Base16 encoding and decoding.
--
-- If you just want to encode or decode some bytes, you probably want to use
-- the "Data.ByteArray.Encoding" module.
--
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE Rank2Types #-}
module Data.Memory.Encoding.Base16
( showHexadecimal
, toHexadecimal
, fromHexadecimal
) where
import Data.Memory.Internal.Compat
import Data.Word
import Basement.Bits
import Basement.IntegralConv
import GHC.Prim
import GHC.Types
import GHC.Word
import GHC.Char (chr)
import Control.Monad
import Foreign.Storable
import Foreign.Ptr (Ptr)
-- | Transform a raw memory to an hexadecimal 'String'
--
-- user beware, no checks are made
showHexadecimal :: (forall a . (Ptr Word8 -> IO a) -> IO a) -- ^ a 'with' type of function to hold reference to the object
-> Int -- ^ length in bytes
-> String
showHexadecimal withPtr = doChunks 0
where
doChunks ofs len
| len < 4 = doUnique ofs len
| otherwise = do
let !(a, b, c, d) = unsafeDoIO $ withPtr (read4 ofs)
!(# w1, w2 #) = convertByte a
!(# w3, w4 #) = convertByte b
!(# w5, w6 #) = convertByte c
!(# w7, w8 #) = convertByte d
in wToChar w1 : wToChar w2 : wToChar w3 : wToChar w4
: wToChar w5 : wToChar w6 : wToChar w7 : wToChar w8
: doChunks (ofs + 4) (len - 4)
doUnique ofs len
| len == 0 = []
| otherwise =
let !b = unsafeDoIO $ withPtr (byteIndex ofs)
!(# w1, w2 #) = convertByte b
in wToChar w1 : wToChar w2 : doUnique (ofs + 1) (len - 1)
read4 :: Int -> Ptr Word8 -> IO (Word8, Word8, Word8, Word8)
read4 ofs p =
liftM4 (,,,) (byteIndex ofs p) (byteIndex (ofs+1) p)
(byteIndex (ofs+2) p) (byteIndex (ofs+3) p)
wToChar :: Word8 -> Char
wToChar w = chr (integralUpsize w)
byteIndex :: Int -> Ptr Word8 -> IO Word8
byteIndex i p = peekByteOff p i
-- | Transform a number of bytes pointed by.@src in the hexadecimal binary representation in @dst
--
-- destination memory need to be of correct size, otherwise it will lead
-- to really bad things.
toHexadecimal :: Ptr Word8 -- ^ destination memory
-> Ptr Word8 -- ^ source memory
-> Int -- ^ number of bytes
-> IO ()
toHexadecimal bout bin n = loop 0
where loop i
| i == n = return ()
| otherwise = do
!w <- peekByteOff bin i
let !(# !w1, !w2 #) = convertByte w
pokeByteOff bout (i * 2) w1
pokeByteOff bout (i * 2 + 1) w2
loop (i+1)
-- | Convert a value Word# to two Word#s containing
-- the hexadecimal representation of the Word#
convertByte :: Word8 -> (# Word8, Word8 #)
convertByte bwrap = (# r tableHi b, r tableLo b #)
where
!(W# b) = integralUpsize bwrap
r :: Addr# -> Word# -> Word8
r table index = W8# (indexWord8OffAddr# table (word2Int# index))
!tableLo =
"0123456789abcdef0123456789abcdef\
\0123456789abcdef0123456789abcdef\
\0123456789abcdef0123456789abcdef\
\0123456789abcdef0123456789abcdef\
\0123456789abcdef0123456789abcdef\
\0123456789abcdef0123456789abcdef\
\0123456789abcdef0123456789abcdef\
\0123456789abcdef0123456789abcdef"#
!tableHi =
"00000000000000001111111111111111\
\22222222222222223333333333333333\
\44444444444444445555555555555555\
\66666666666666667777777777777777\
\88888888888888889999999999999999\
\aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbb\
\ccccccccccccccccdddddddddddddddd\
\eeeeeeeeeeeeeeeeffffffffffffffff"#
{-# INLINE convertByte #-}
-- | convert a base16 @src in @dst.
--
-- n need to even
fromHexadecimal :: Ptr Word8 -> Ptr Word8 -> Int -> IO (Maybe Int)
fromHexadecimal dst src n
| odd n = error "fromHexadecimal: invalid odd length."
| otherwise = loop 0 0
where loop di i
| i == n = return Nothing
| otherwise = do
a <- rHi `fmap` peekByteOff src i
b <- rLo `fmap` peekByteOff src (i+1)
if a == 0xff || b == 0xff
then return $ Just i
else pokeByteOff dst di (a .|. b) >> loop (di+1) (i+2)
rLo, rHi :: Word8 -> Word8
rLo index = W8# (indexWord8OffAddr# tableLo (word2Int# widx))
where !(W# widx) = integralUpsize index
rHi index = W8# (indexWord8OffAddr# tableHi (word2Int# widx))
where !(W# widx) = integralUpsize index
!tableLo =
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff\
\\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#
!tableHi =
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\x00\x10\x20\x30\x40\x50\x60\x70\x80\x90\xff\xff\xff\xff\xff\xff\
\\xff\xa0\xb0\xc0\xd0\xe0\xf0\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xa0\xb0\xc0\xd0\xe0\xf0\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#

View file

@ -0,0 +1,256 @@
-- |
-- Module : Data.Memory.Encoding.Base32
-- License : BSD-style
-- Maintainer : Nicolas DI PRIMA <nicolas@di-prima.fr>
-- Stability : experimental
-- Portability : unknown
--
-- Low-level Base32 encoding and decoding.
--
-- If you just want to encode or decode some bytes, you probably want to use
-- the "Data.ByteArray.Encoding" module.
--
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE Rank2Types #-}
module Data.Memory.Encoding.Base32
( toBase32
, unBase32Length
, fromBase32
) where
import Data.Memory.Internal.Compat
import Data.Word
import Basement.Bits
import Basement.IntegralConv
import GHC.Prim
import GHC.Word
import Control.Monad
import Foreign.Storable
import Foreign.Ptr (Ptr)
-- | Transform a number of bytes pointed by.@src in the base32 binary representation in @dst
--
-- destination memory need to be of correct size, otherwise it will lead
-- to really bad things.
toBase32 :: Ptr Word8 -- ^ input
-> Ptr Word8 -- ^ output
-> Int -- ^ input len
-> IO ()
toBase32 dst src len = loop 0 0
where
eqChar :: Word8
eqChar = 0x3d
peekOrZero :: Int -> IO Word8
peekOrZero i
| i >= len = return 0
| otherwise = peekByteOff src i
pokeOrPadding :: Int -- for the test
-> Int -- src index
-> Word8 -- the value
-> IO ()
pokeOrPadding i di v
| i < len = pokeByteOff dst di v
| otherwise = pokeByteOff dst di eqChar
loop :: Int -- index input
-> Int -- index output
-> IO ()
loop i di
| i >= len = return ()
| otherwise = do
i1 <- peekByteOff src i
i2 <- peekOrZero (i + 1)
i3 <- peekOrZero (i + 2)
i4 <- peekOrZero (i + 3)
i5 <- peekOrZero (i + 4)
let (o1,o2,o3,o4,o5,o6,o7,o8) = toBase32Per5Bytes (i1, i2, i3, i4, i5)
pokeByteOff dst di o1
pokeByteOff dst (di + 1) o2
pokeOrPadding (i + 1) (di + 2) o3
pokeOrPadding (i + 1) (di + 3) o4
pokeOrPadding (i + 2) (di + 4) o5
pokeOrPadding (i + 3) (di + 5) o6
pokeOrPadding (i + 3) (di + 6) o7
pokeOrPadding (i + 4) (di + 7) o8
loop (i+5) (di+8)
toBase32Per5Bytes :: (Word8, Word8, Word8, Word8, Word8)
-> (Word8, Word8, Word8, Word8, Word8, Word8, Word8, Word8)
toBase32Per5Bytes (!i1, !i2, !i3, !i4, !i5) =
(index o1, index o2, index o3, index o4, index o5, index o6, index o7, index o8)
where
-- 1111 1000 >> 3
!o1 = (i1 .&. 0xF8) .>>. 3
-- 0000 0111 << 2 | 1100 0000 >> 6
!o2 = ((i1 .&. 0x07) .<<. 2) .|. ((i2 .&. 0xC0) .>>. 6)
-- 0011 1110 >> 1
!o3 = ((i2 .&. 0x3E) .>>. 1)
-- 0000 0001 << 4 | 1111 0000 >> 4
!o4 = ((i2 .&. 0x01) .<<. 4) .|. ((i3 .&. 0xF0) .>>. 4)
-- 0000 1111 << 1 | 1000 0000 >> 7
!o5 = ( (i3 .&. 0x0F) .<<. 1) .|. ((i4 .&. 0x80) .>>. 7)
-- 0111 1100 >> 2
!o6 = (i4 .&. 0x7C) .>>. 2
-- 0000 0011 << 3 | 1110 0000 >> 5
!o7 = ((i4 .&. 0x03) .<<. 3) .|. ((i5 .&. 0xE0) .>>. 5)
-- 0001 1111
!o8 = i5 .&. 0x1F
!set = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"#
index :: Word8 -> Word8
index idx = W8# (indexWord8OffAddr# set (word2Int# widx))
where !(W# widx) = integralUpsize idx
-- | Get the length needed for the destination buffer for a base32 decoding.
--
-- if the length is not a multiple of 8, Nothing is returned
unBase32Length :: Ptr Word8 -> Int -> IO (Maybe Int)
unBase32Length src len
| len < 1 = return $ Just 0
| (len `mod` 8) /= 0 = return Nothing
| otherwise = do
last1Byte <- peekByteOff src (len - 1)
last2Byte <- peekByteOff src (len - 2)
last3Byte <- peekByteOff src (len - 3)
last4Byte <- peekByteOff src (len - 4)
last5Byte <- peekByteOff src (len - 5)
last6Byte <- peekByteOff src (len - 6)
let dstLen = caseByte last1Byte last2Byte last3Byte last4Byte last5Byte last6Byte
return $ Just $ (len `div` 8) * 5 - dstLen
where
caseByte :: Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> Int
caseByte last1 last2 last3 last4 last5 last6
| last6 == eqAscii = 4
| last5 == eqAscii = 3 -- error this padding is not expected (error will be detected in fromBase32)
| last4 == eqAscii = 3
| last3 == eqAscii = 2
| last2 == eqAscii = 1 -- error this padding is not expected (error will be detected in fromBase32)
| last1 == eqAscii = 1
| otherwise = 0
eqAscii :: Word8
eqAscii = 0x3D
-- | convert from base32 in @src to binary in @dst, using the number of bytes specified
--
-- the user should use unBase32Length to compute the correct length, or check that
-- the length specification is proper. no check is done here.
fromBase32 :: Ptr Word8 -> Ptr Word8 -> Int -> IO (Maybe Int)
fromBase32 dst src len
| len == 0 = return Nothing
| otherwise = loop 0 0
where
loop :: Int -- the index dst
-> Int -- the index src
-> IO (Maybe Int)
loop di i
| i == (len - 8) = do
i1 <- peekByteOff src i
i2 <- peekByteOff src (i + 1)
i3 <- peekByteOff src (i + 2)
i4 <- peekByteOff src (i + 3)
i5 <- peekByteOff src (i + 4)
i6 <- peekByteOff src (i + 5)
i7 <- peekByteOff src (i + 6)
i8 <- peekByteOff src (i + 7)
let (nbBytes, i3', i4', i5', i6', i7', i8') =
case (i3, i4, i5, i6, i7, i8) of
(0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D) -> (6, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41)
(0x3D, _ , _ , _ , _ , _ ) -> (0, i3, i4, i5, i6, i7, i8) -- invalid
(_ , 0x3D, 0x3D, 0x3D, 0x3D, 0x3D) -> (5, i3 , 0x41, 0x41, 0x41, 0x41, 0x41)
(_ , 0x3D, _ , _ , _ , _ ) -> (0, i3, i4, i5, i6, i7, i8) -- invalid
(_ , _ , 0x3D, 0x3D, 0x3D, 0x3D) -> (4, i3 , i4 , 0x41, 0x41, 0x41, 0x41)
(_ , _ , 0x3D, _ , _ , _ ) -> (0, i3, i4, i5, i6, i7, i8) -- invalid
(_ , _ , _ , 0x3D, 0x3D, 0x3D) -> (3, i3 , i4 , i5 , 0x41, 0x41, 0x41)
(_ , _ , _ , 0x3D, _ , _ ) -> (0, i3, i4, i5, i6, i7, i8) -- invalid
(_ , _ , _ , _ , 0x3D, 0x3D) -> (2, i3 , i4 , i5 , i6 , 0x41, 0x41)
(_ , _ , _ , _ , 0x3D, _ ) -> (0, i3, i4, i5, i6, i7, i8) -- invalid
(_ , _ , _ , _ , _ , 0x3D) -> (1, i3 , i4 , i5 , i6 , i7 , 0x41)
(_ , _ , _ , _ , _ , _ ) -> (0 :: Int, i3, i4, i5, i6, i7, i8)
case fromBase32Per8Bytes (i1, i2, i3', i4', i5', i6', i7', i8') of
Left ofs -> return $ Just (i + ofs)
Right (o1, o2, o3, o4, o5) -> do
pokeByteOff dst di o1
pokeByteOff dst (di+1) o2
when (nbBytes < 5) $ pokeByteOff dst (di+2) o3
when (nbBytes < 4) $ pokeByteOff dst (di+3) o4
when (nbBytes < 2) $ pokeByteOff dst (di+4) o5
return Nothing
| otherwise = do
i1 <- peekByteOff src i
i2 <- peekByteOff src (i + 1)
i3 <- peekByteOff src (i + 2)
i4 <- peekByteOff src (i + 3)
i5 <- peekByteOff src (i + 4)
i6 <- peekByteOff src (i + 5)
i7 <- peekByteOff src (i + 6)
i8 <- peekByteOff src (i + 7)
case fromBase32Per8Bytes (i1, i2, i3, i4, i5, i6, i7, i8) of
Left ofs -> return $ Just (i + ofs)
Right (o1, o2, o3, o4, o5) -> do
pokeByteOff dst di o1
pokeByteOff dst (di+1) o2
pokeByteOff dst (di+2) o3
pokeByteOff dst (di+3) o4
pokeByteOff dst (di+4) o5
loop (di+5) (i+8)
fromBase32Per8Bytes :: (Word8, Word8, Word8, Word8, Word8, Word8, Word8, Word8)
-> Either Int (Word8, Word8, Word8, Word8, Word8)
fromBase32Per8Bytes (i1, i2, i3, i4, i5, i6, i7, i8) =
case (rset i1, rset i2, rset i3, rset i4, rset i5, rset i6, rset i7, rset i8) of
(0xFF, _ , _ , _ , _ , _ , _ , _ ) -> Left 0
(_ , 0xFF, _ , _ , _ , _ , _ , _ ) -> Left 1
(_ , _ , 0xFF, _ , _ , _ , _ , _ ) -> Left 2
(_ , _ , _ , 0xFF, _ , _ , _ , _ ) -> Left 3
(_ , _ , _ , _ , 0xFF, _ , _ , _ ) -> Left 4
(_ , _ , _ , _ , _ , 0xFF, _ , _ ) -> Left 5
(_ , _ , _ , _ , _ , _ , 0xFF, _ ) -> Left 6
(_ , _ , _ , _ , _ , _ , _ , 0xFF) -> Left 7
(ri1 , ri2 , ri3 , ri4 , ri5 , ri6 , ri7 , ri8 ) ->
-- 0001 1111 << 3 | 0001 11xx >> 2
let o1 = (ri1 `unsafeShiftL` 3) .|. (ri2 `unsafeShiftR` 2)
-- 000x xx11 << 6 | 0001 1111 << 1 | 0001 xxxx >> 4
o2 = (ri2 `unsafeShiftL` 6) .|. (ri3 `unsafeShiftL` 1) .|. (ri4 `unsafeShiftR` 4)
-- 000x 1111 << 4 | 0001 111x >> 1
o3 = (ri4 `unsafeShiftL` 4) .|. (ri5 `unsafeShiftR` 1)
-- 000x xxx1 << 7 | 0001 1111 << 2 | 0001 1xxx >> 3
o4 = (ri5 `unsafeShiftL` 7) .|. (ri6 `unsafeShiftL` 2) .|. (ri7 `unsafeShiftR` 3)
-- 000x x111 << 5 | 0001 1111
o5 = (ri7 `unsafeShiftL` 5) .|. ri8
in Right (o1, o2, o3, o4, o5)
where
rset :: Word8 -> Word8
rset w = W8# (indexWord8OffAddr# rsetTable (word2Int# widx))
where !(W# widx) = integralUpsize w
!rsetTable = "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
\\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
\\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
\\xFF\xFF\x1A\x1B\x1C\x1D\x1E\x1F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
\\xFF\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\
\\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\xFF\xFF\xFF\xFF\xFF\
\\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
\\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
\\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
\\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
\\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
\\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
\\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
\\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
\\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
\\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"#

View file

@ -0,0 +1,328 @@
-- |
-- Module : Data.Memory.Encoding.Base64
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : experimental
-- Portability : unknown
--
-- Low-level Base64 encoding and decoding.
--
-- If you just want to encode or decode some bytes, you probably want to use
-- the "Data.ByteArray.Encoding" module.
--
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE Rank2Types #-}
module Data.Memory.Encoding.Base64
( toBase64
, toBase64URL
, toBase64OpenBSD
, unBase64Length
, unBase64LengthUnpadded
, fromBase64
, fromBase64URLUnpadded
, fromBase64OpenBSD
) where
import Data.Memory.Internal.Compat
import Data.Memory.Internal.Imports
import Basement.Bits
import Basement.IntegralConv (integralUpsize)
import GHC.Prim
import GHC.Word
import Foreign.Storable
import Foreign.Ptr (Ptr)
-- | Transform a number of bytes pointed by @src@ to base64 binary representation in @dst@
--
-- The destination memory need to be of correct size, otherwise it will lead
-- to really bad things.
toBase64 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
toBase64 dst src len = toBase64Internal set dst src len True
where
!set = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"#
-- | Transform a number of bytes pointed by @src@ to, URL-safe base64 binary
-- representation in @dst@. The result will be either padded or unpadded,
-- depending on the boolean @padded@ argument.
--
-- The destination memory need to be of correct size, otherwise it will lead
-- to really bad things.
toBase64URL :: Bool -> Ptr Word8 -> Ptr Word8 -> Int -> IO ()
toBase64URL padded dst src len = toBase64Internal set dst src len padded
where
!set = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"#
toBase64OpenBSD :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
toBase64OpenBSD dst src len = toBase64Internal set dst src len False
where
!set = "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"#
toBase64Internal :: Addr# -> Ptr Word8 -> Ptr Word8 -> Int -> Bool -> IO ()
toBase64Internal table dst src len padded = loop 0 0
where
eqChar = 0x3d :: Word8
loop i di
| i >= len = return ()
| otherwise = do
a <- peekByteOff src i
b <- if i + 1 >= len then return 0 else peekByteOff src (i+1)
c <- if i + 2 >= len then return 0 else peekByteOff src (i+2)
let (w,x,y,z) = convert3 table a b c
pokeByteOff dst di w
pokeByteOff dst (di+1) x
if i + 1 < len
then
pokeByteOff dst (di+2) y
else
when padded (pokeByteOff dst (di+2) eqChar)
if i + 2 < len
then
pokeByteOff dst (di+3) z
else
when padded (pokeByteOff dst (di+3) eqChar)
loop (i+3) (di+4)
convert3 :: Addr# -> Word8 -> Word8 -> Word8 -> (Word8, Word8, Word8, Word8)
convert3 table !a !b !c =
let !w = a .>>. 2
!x = ((a .<<. 4) .&. 0x30) .|. (b .>>. 4)
!y = ((b .<<. 2) .&. 0x3c) .|. (c .>>. 6)
!z = c .&. 0x3f
in (index w, index x, index y, index z)
where
index :: Word8 -> Word8
index !idxb = W8# (indexWord8OffAddr# table (word2Int# idx))
where !(W# idx) = integralUpsize idxb
-- | Get the length needed for the destination buffer for a base64 decoding.
--
-- if the length is not a multiple of 4, Nothing is returned
unBase64Length :: Ptr Word8 -> Int -> IO (Maybe Int)
unBase64Length src len
| len < 1 = return $ Just 0
| (len `mod` 4) /= 0 = return Nothing
| otherwise = do
last1Byte <- peekByteOff src (len - 1)
last2Byte <- peekByteOff src (len - 2)
let dstLen = if last1Byte == eqAscii
then if last2Byte == eqAscii then 2 else 1
else 0
return $ Just $ (len `div` 4) * 3 - dstLen
where
eqAscii :: Word8
eqAscii = fromIntegral (fromEnum '=')
-- | Get the length needed for the destination buffer for an
-- <http://tools.ietf.org/html/rfc4648#section-3.2 unpadded> base64 decoding.
--
-- If the length of the encoded string is a multiple of 4, plus one, Nothing is
-- returned. Any other value can be valid without padding.
unBase64LengthUnpadded :: Int -> Maybe Int
unBase64LengthUnpadded len = case r of
0 -> Just (3*q)
2 -> Just (3*q + 1)
3 -> Just (3*q + 2)
_ -> Nothing
where (q, r) = len `divMod` 4
fromBase64OpenBSD :: Ptr Word8 -> Ptr Word8 -> Int -> IO (Maybe Int)
fromBase64OpenBSD dst src len = fromBase64Unpadded rsetOpenBSD dst src len
fromBase64URLUnpadded :: Ptr Word8 -> Ptr Word8 -> Int -> IO (Maybe Int)
fromBase64URLUnpadded dst src len = fromBase64Unpadded rsetURL dst src len
fromBase64Unpadded :: (Word8 -> Word8) -> Ptr Word8 -> Ptr Word8 -> Int -> IO (Maybe Int)
fromBase64Unpadded rset dst src len = loop 0 0
where loop di i
| i == len = return Nothing
| i == len - 1 = return Nothing -- Shouldn't happen if len is valid
| i == len - 2 = do
a <- peekByteOff src i
b <- peekByteOff src (i+1)
case decode2 a b of
Left ofs -> return $ Just (i + ofs)
Right x -> do
pokeByteOff dst di x
return Nothing
| i == len - 3 = do
a <- peekByteOff src i
b <- peekByteOff src (i+1)
c <- peekByteOff src (i+2)
case decode3 a b c of
Left ofs -> return $ Just (i + ofs)
Right (x,y) -> do
pokeByteOff dst di x
pokeByteOff dst (di+1) y
return Nothing
| otherwise = do
a <- peekByteOff src i
b <- peekByteOff src (i+1)
c <- peekByteOff src (i+2)
d <- peekByteOff src (i+3)
case decode4 a b c d of
Left ofs -> return $ Just (i + ofs)
Right (x,y,z) -> do
pokeByteOff dst di x
pokeByteOff dst (di+1) y
pokeByteOff dst (di+2) z
loop (di + 3) (i + 4)
decode2 :: Word8 -> Word8 -> Either Int Word8
decode2 a b =
case (rset a, rset b) of
(0xff, _ ) -> Left 0
(_ , 0xff) -> Left 1
(ra , rb ) -> Right ((ra `unsafeShiftL` 2) .|. (rb `unsafeShiftR` 4))
decode3 :: Word8 -> Word8 -> Word8 -> Either Int (Word8, Word8)
decode3 a b c =
case (rset a, rset b, rset c) of
(0xff, _ , _ ) -> Left 0
(_ , 0xff, _ ) -> Left 1
(_ , _ , 0xff) -> Left 2
(ra , rb , rc ) ->
let x = (ra `unsafeShiftL` 2) .|. (rb `unsafeShiftR` 4)
y = (rb `unsafeShiftL` 4) .|. (rc `unsafeShiftR` 2)
in Right (x,y)
decode4 :: Word8 -> Word8 -> Word8 -> Word8 -> Either Int (Word8, Word8, Word8)
decode4 a b c d =
case (rset a, rset b, rset c, rset d) of
(0xff, _ , _ , _ ) -> Left 0
(_ , 0xff, _ , _ ) -> Left 1
(_ , _ , 0xff, _ ) -> Left 2
(_ , _ , _ , 0xff) -> Left 3
(ra , rb , rc , rd ) ->
let x = (ra `unsafeShiftL` 2) .|. (rb `unsafeShiftR` 4)
y = (rb `unsafeShiftL` 4) .|. (rc `unsafeShiftR` 2)
z = (rc `unsafeShiftL` 6) .|. rd
in Right (x,y,z)
rsetURL :: Word8 -> Word8
rsetURL !w = W8# (indexWord8OffAddr# rsetTable (word2Int# widx))
where !(W# widx) = integralUpsize w
!rsetTable = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x3e\xff\xff\
\\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\xff\xff\xff\xff\xff\xff\
\\xff\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\
\\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\xff\xff\xff\xff\x3f\
\\xff\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\
\\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#
rsetOpenBSD :: Word8 -> Word8
rsetOpenBSD !w = W8# (indexWord8OffAddr# rsetTable (word2Int# widx))
where !(W# widx) = integralUpsize w
!rsetTable = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00\x01\
\\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f\xff\xff\xff\xff\xff\xff\
\\xff\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\
\\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\xff\xff\xff\xff\xff\
\\xff\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\
\\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\x34\x35\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#
-- | convert from base64 in @src@ to binary in @dst@, using the number of bytes specified
--
-- the user should use unBase64Length to compute the correct length, or check that
-- the length specification is proper. no check is done here.
fromBase64 :: Ptr Word8 -> Ptr Word8 -> Int -> IO (Maybe Int)
fromBase64 dst src len
| len == 0 = return Nothing
| otherwise = loop 0 0
where loop di i
| i == (len-4) = do
a <- peekByteOff src i
b <- peekByteOff src (i+1)
c <- peekByteOff src (i+2)
d <- peekByteOff src (i+3)
let (nbBytes, c',d') =
case (c,d) of
(0x3d, 0x3d) -> (2, 0x30, 0x30)
(0x3d, _ ) -> (0, c, d) -- invalid: automatically 'c' will make it error out
(_ , 0x3d) -> (1, c, 0x30)
(_ , _ ) -> (0 :: Int, c, d)
case decode4 a b c' d' of
Left ofs -> return $ Just (i + ofs)
Right (x,y,z) -> do
pokeByteOff dst di x
when (nbBytes < 2) $ pokeByteOff dst (di+1) y
when (nbBytes < 1) $ pokeByteOff dst (di+2) z
return Nothing
| otherwise = do
a <- peekByteOff src i
b <- peekByteOff src (i+1)
c <- peekByteOff src (i+2)
d <- peekByteOff src (i+3)
case decode4 a b c d of
Left ofs -> return $ Just (i + ofs)
Right (x,y,z) -> do
pokeByteOff dst di x
pokeByteOff dst (di+1) y
pokeByteOff dst (di+2) z
loop (di + 3) (i + 4)
decode4 :: Word8 -> Word8 -> Word8 -> Word8 -> Either Int (Word8, Word8, Word8)
decode4 a b c d =
case (rset a, rset b, rset c, rset d) of
(0xff, _ , _ , _ ) -> Left 0
(_ , 0xff, _ , _ ) -> Left 1
(_ , _ , 0xff, _ ) -> Left 2
(_ , _ , _ , 0xff) -> Left 3
(ra , rb , rc , rd ) ->
let x = (ra `unsafeShiftL` 2) .|. (rb `unsafeShiftR` 4)
y = (rb `unsafeShiftL` 4) .|. (rc `unsafeShiftR` 2)
z = (rc `unsafeShiftL` 6) .|. rd
in Right (x,y,z)
rset :: Word8 -> Word8
rset !w = W8# (indexWord8OffAddr# rsetTable (word2Int# widx))
where !(W# widx) = integralUpsize w
!rsetTable = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x3e\xff\xff\xff\x3f\
\\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\xff\xff\xff\xff\xff\xff\
\\xff\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\
\\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\xff\xff\xff\xff\xff\
\\xff\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\
\\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
\\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#

View file

@ -0,0 +1,121 @@
-- |
-- Module : Data.Memory.Endian
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : stable
-- Portability : good
--
{-# LANGUAGE CPP #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module Data.Memory.Endian
( Endianness(..)
, getSystemEndianness
, BE(..), LE(..)
, fromBE, toBE
, fromLE, toLE
, ByteSwap
) where
import Data.Word (Word16, Word32, Word64)
import Foreign.Storable
#if !defined(ARCH_IS_LITTLE_ENDIAN) && !defined(ARCH_IS_BIG_ENDIAN)
import Data.Word (Word8)
import Data.Memory.Internal.Compat (unsafeDoIO)
import Foreign.Marshal.Alloc
import Foreign.Ptr
#endif
import Data.Memory.Internal.Compat (byteSwap64, byteSwap32, byteSwap16)
-- | represent the CPU endianness
--
-- Big endian system stores bytes with the MSB as the first byte.
-- Little endian system stores bytes with the LSB as the first byte.
--
-- middle endian is purposely avoided.
data Endianness = LittleEndian
| BigEndian
deriving (Show,Eq)
-- | Return the system endianness
getSystemEndianness :: Endianness
#ifdef ARCH_IS_LITTLE_ENDIAN
getSystemEndianness = LittleEndian
#elif ARCH_IS_BIG_ENDIAN
getSystemEndianness = BigEndian
#else
getSystemEndianness
| isLittleEndian = LittleEndian
| isBigEndian = BigEndian
| otherwise = error "cannot determine endianness"
where
isLittleEndian = endianCheck == 2
isBigEndian = endianCheck == 1
endianCheck = unsafeDoIO $ alloca $ \p -> do
poke p (0x01000002 :: Word32)
peek (castPtr p :: Ptr Word8)
#endif
-- | Little Endian value
newtype LE a = LE { unLE :: a }
deriving (Show,Eq,Storable)
-- | Big Endian value
newtype BE a = BE { unBE :: a }
deriving (Show,Eq,Storable)
-- | Convert a value in cpu endianess to big endian
toBE :: ByteSwap a => a -> BE a
#ifdef ARCH_IS_LITTLE_ENDIAN
toBE = BE . byteSwap
#elif ARCH_IS_BIG_ENDIAN
toBE = BE
#else
toBE = BE . (if getSystemEndianness == LittleEndian then byteSwap else id)
#endif
{-# INLINE toBE #-}
-- | Convert from a big endian value to the cpu endianness
fromBE :: ByteSwap a => BE a -> a
#ifdef ARCH_IS_LITTLE_ENDIAN
fromBE (BE a) = byteSwap a
#elif ARCH_IS_BIG_ENDIAN
fromBE (BE a) = a
#else
fromBE (BE a) = if getSystemEndianness == LittleEndian then byteSwap a else a
#endif
{-# INLINE fromBE #-}
-- | Convert a value in cpu endianess to little endian
toLE :: ByteSwap a => a -> LE a
#ifdef ARCH_IS_LITTLE_ENDIAN
toLE = LE
#elif ARCH_IS_BIG_ENDIAN
toLE = LE . byteSwap
#else
toLE = LE . (if getSystemEndianness == LittleEndian then id else byteSwap)
#endif
{-# INLINE toLE #-}
-- | Convert from a little endian value to the cpu endianness
fromLE :: ByteSwap a => LE a -> a
#ifdef ARCH_IS_LITTLE_ENDIAN
fromLE (LE a) = a
#elif ARCH_IS_BIG_ENDIAN
fromLE (LE a) = byteSwap a
#else
fromLE (LE a) = if getSystemEndianness == LittleEndian then a else byteSwap a
#endif
{-# INLINE fromLE #-}
-- | Class of types that can be byte-swapped.
--
-- e.g. Word16, Word32, Word64
class Storable a => ByteSwap a where
byteSwap :: a -> a
instance ByteSwap Word16 where
byteSwap = byteSwap16
instance ByteSwap Word32 where
byteSwap = byteSwap32
instance ByteSwap Word64 where
byteSwap = byteSwap64

View file

@ -0,0 +1,17 @@
-- |
-- Module : Data.Memory.ExtendedWords
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : experimental
-- Portability : unknown
--
-- Extra Word size
--
module Data.Memory.ExtendedWords
( Word128(..)
) where
import Data.Word (Word64)
-- | A simple Extended Word128 composed of 2 Word64
data Word128 = Word128 !Word64 !Word64 deriving (Show, Eq)

View file

@ -0,0 +1,106 @@
-- |
-- Module : Data.Memory.Hash.FNV
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : experimental
-- Portability : good
--
-- Fowler Noll Vo Hash (1 and 1a / 32 / 64 bits versions)
-- <http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function>
--
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE BangPatterns #-}
module Data.Memory.Hash.FNV
(
-- * types
FnvHash32(..)
, FnvHash64(..)
-- * methods
, fnv1
, fnv1a
, fnv1_64
, fnv1a_64
) where
import Basement.Bits
import Basement.IntegralConv
import Data.Memory.Internal.Compat ()
import Data.Memory.Internal.Imports
import GHC.Word
import GHC.Prim hiding (Word64#, Int64#)
import GHC.Types
import GHC.Ptr
-- | FNV1(a) hash (32 bit variants)
newtype FnvHash32 = FnvHash32 Word32
deriving (Show,Eq,Ord,NFData)
-- | FNV1(a) hash (64 bit variants)
newtype FnvHash64 = FnvHash64 Word64
deriving (Show,Eq,Ord,NFData)
fnv1_32_Mix8 :: Word8 -> FnvHash32 -> FnvHash32
fnv1_32_Mix8 !w (FnvHash32 acc) = FnvHash32 ((0x01000193 * acc) .^. integralUpsize w)
{-# INLINE fnv1_32_Mix8 #-}
fnv1a_32_Mix8 :: Word8 -> FnvHash32 -> FnvHash32
fnv1a_32_Mix8 !w (FnvHash32 acc) = FnvHash32 (0x01000193 * (acc .^. integralUpsize w))
{-# INLINE fnv1a_32_Mix8 #-}
fnv1_64_Mix8 :: Word8 -> FnvHash64 -> FnvHash64
fnv1_64_Mix8 !w (FnvHash64 acc) = FnvHash64 ((0x100000001b3 * acc) .^. integralUpsize w)
{-# INLINE fnv1_64_Mix8 #-}
fnv1a_64_Mix8 :: Word8 -> FnvHash64 -> FnvHash64
fnv1a_64_Mix8 !w (FnvHash64 acc) = FnvHash64 (0x100000001b3 * (acc .^. integralUpsize w))
{-# INLINE fnv1a_64_Mix8 #-}
-- | compute FNV1 (32 bit variant) of a raw piece of memory
fnv1 :: Ptr Word8 -> Int -> IO FnvHash32
fnv1 (Ptr addr) n = loop (FnvHash32 0x811c9dc5) 0
where
loop :: FnvHash32 -> Int -> IO FnvHash32
loop !acc !i
| i == n = pure $ acc
| otherwise = do
v <- read8 addr i
loop (fnv1_32_Mix8 v acc) (i + 1)
-- | compute FNV1a (32 bit variant) of a raw piece of memory
fnv1a :: Ptr Word8 -> Int -> IO FnvHash32
fnv1a (Ptr addr) n = loop (FnvHash32 0x811c9dc5) 0
where
loop :: FnvHash32 -> Int -> IO FnvHash32
loop !acc !i
| i == n = pure $ acc
| otherwise = do
v <- read8 addr i
loop (fnv1a_32_Mix8 v acc) (i + 1)
-- | compute FNV1 (64 bit variant) of a raw piece of memory
fnv1_64 :: Ptr Word8 -> Int -> IO FnvHash64
fnv1_64 (Ptr addr) n = loop (FnvHash64 0xcbf29ce484222325) 0
where
loop :: FnvHash64 -> Int -> IO FnvHash64
loop !acc !i
| i == n = pure $ acc
| otherwise = do
v <- read8 addr i
loop (fnv1_64_Mix8 v acc) (i + 1)
-- | compute FNV1a (64 bit variant) of a raw piece of memory
fnv1a_64 :: Ptr Word8 -> Int -> IO FnvHash64
fnv1a_64 (Ptr addr) n = loop (FnvHash64 0xcbf29ce484222325) 0
where
loop :: FnvHash64 -> Int -> IO FnvHash64
loop !acc !i
| i == n = pure $ acc
| otherwise = do
v <- read8 addr i
loop (fnv1a_64_Mix8 v acc) (i + 1)
read8 :: Addr# -> Int -> IO Word8
read8 addr (I# i) = IO $ \s -> case readWord8OffAddr# addr i s of
(# s2, e #) -> (# s2, W8# e #)

View file

@ -0,0 +1,163 @@
-- |
-- Module : Data.Memory.Hash.SipHash
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : experimental
-- Portability : good
--
-- provide the SipHash algorithm.
-- reference: <http://131002.net/siphash/siphash.pdf>
--
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveDataTypeable #-}
module Data.Memory.Hash.SipHash
( SipKey(..)
, SipHash(..)
, hash
, hashWith
) where
import Data.Memory.Endian
import Data.Memory.Internal.Compat
import Data.Word
import Data.Bits
import Data.Typeable (Typeable)
import Control.Monad
import Foreign.Ptr
import Foreign.Storable
-- | SigHash Key
data SipKey = SipKey {-# UNPACK #-} !Word64 {-# UNPACK #-} !Word64
-- | Siphash tag value
newtype SipHash = SipHash Word64
deriving (Show,Eq,Ord,Typeable)
data InternalState = InternalState {-# UNPACK #-} !Word64 {-# UNPACK #-} !Word64 {-# UNPACK #-} !Word64 {-# UNPACK #-} !Word64
-- | produce a siphash with a key and a memory pointer + length.
hash :: SipKey -> Ptr Word8 -> Int -> IO SipHash
hash = hashWith 2 4
-- | same as 'hash', except also specifies the number of sipround iterations for compression and digest.
hashWith :: Int -- ^ siphash C
-> Int -- ^ siphash D
-> SipKey -- ^ key for the hash
-> Ptr Word8 -- ^ memory pointer
-> Int -- ^ length of the data
-> IO SipHash
hashWith c d key startPtr totalLen = runHash (initSip key) startPtr totalLen
where runHash !st !ptr l
| l > 7 = peek (castPtr ptr) >>= \v -> runHash (process st (fromLE v)) (ptr `plusPtr` 8) (l-8)
| otherwise = do
let !lengthBlock = (fromIntegral totalLen `mod` 256) `unsafeShiftL` 56
(finish . process st) `fmap` case l of
0 -> do return lengthBlock
1 -> do v0 <- peekByteOff ptr 0
return (lengthBlock .|. to64 v0)
2 -> do (v0,v1) <- liftM2 (,) (peekByteOff ptr 0) (peekByteOff ptr 1)
return (lengthBlock
.|. (to64 v1 `unsafeShiftL` 8)
.|. to64 v0)
3 -> do (v0,v1,v2) <- liftM3 (,,) (peekByteOff ptr 0) (peekByteOff ptr 1) (peekByteOff ptr 2)
return ( lengthBlock
.|. (to64 v2 `unsafeShiftL` 16)
.|. (to64 v1 `unsafeShiftL` 8)
.|. to64 v0)
4 -> do (v0,v1,v2,v3) <- liftM4 (,,,) (peekByteOff ptr 0) (peekByteOff ptr 1) (peekByteOff ptr 2)
(peekByteOff ptr 3)
return ( lengthBlock
.|. (to64 v3 `unsafeShiftL` 24)
.|. (to64 v2 `unsafeShiftL` 16)
.|. (to64 v1 `unsafeShiftL` 8)
.|. to64 v0)
5 -> do (v0,v1,v2,v3,v4) <- liftM5 (,,,,) (peekByteOff ptr 0) (peekByteOff ptr 1) (peekByteOff ptr 2)
(peekByteOff ptr 3) (peekByteOff ptr 4)
return ( lengthBlock
.|. (to64 v4 `unsafeShiftL` 32)
.|. (to64 v3 `unsafeShiftL` 24)
.|. (to64 v2 `unsafeShiftL` 16)
.|. (to64 v1 `unsafeShiftL` 8)
.|. to64 v0)
6 -> do v0 <- peekByteOff ptr 0
v1 <- peekByteOff ptr 1
v2 <- peekByteOff ptr 2
v3 <- peekByteOff ptr 3
v4 <- peekByteOff ptr 4
v5 <- peekByteOff ptr 5
return ( lengthBlock
.|. (to64 v5 `unsafeShiftL` 40)
.|. (to64 v4 `unsafeShiftL` 32)
.|. (to64 v3 `unsafeShiftL` 24)
.|. (to64 v2 `unsafeShiftL` 16)
.|. (to64 v1 `unsafeShiftL` 8)
.|. to64 v0)
7 -> do v0 <- peekByteOff ptr 0
v1 <- peekByteOff ptr 1
v2 <- peekByteOff ptr 2
v3 <- peekByteOff ptr 3
v4 <- peekByteOff ptr 4
v5 <- peekByteOff ptr 5
v6 <- peekByteOff ptr 6
return ( lengthBlock
.|. (to64 v6 `unsafeShiftL` 48)
.|. (to64 v5 `unsafeShiftL` 40)
.|. (to64 v4 `unsafeShiftL` 32)
.|. (to64 v3 `unsafeShiftL` 24)
.|. (to64 v2 `unsafeShiftL` 16)
.|. (to64 v1 `unsafeShiftL` 8)
.|. to64 v0)
_ -> error "siphash: internal error: cannot happens"
{-# INLINE to64 #-}
to64 :: Word8 -> Word64
to64 = fromIntegral
{-# INLINE process #-}
process istate m = newState
where newState = postInject $! runRoundsCompression $! preInject istate
preInject (InternalState v0 v1 v2 v3) = InternalState v0 v1 v2 (v3 `xor` m)
postInject (InternalState v0 v1 v2 v3) = InternalState (v0 `xor` m) v1 v2 v3
{-# INLINE finish #-}
finish istate = getDigest $! runRoundsDigest $! preInject istate
where getDigest (InternalState v0 v1 v2 v3) = SipHash (v0 `xor` v1 `xor` v2 `xor` v3)
preInject (InternalState v0 v1 v2 v3) = InternalState v0 v1 (v2 `xor` 0xff) v3
{-# INLINE doRound #-}
doRound (InternalState v0 v1 v2 v3) =
let !v0' = v0 + v1
!v2' = v2 + v3
!v1' = v1 `rotateL` 13
!v3' = v3 `rotateL` 16
!v1'' = v1' `xor` v0'
!v3'' = v3' `xor` v2'
!v0'' = v0' `rotateL` 32
!v2'' = v2' + v1''
!v0''' = v0'' + v3''
!v1''' = v1'' `rotateL` 17
!v3''' = v3'' `rotateL` 21
!v1'''' = v1''' `xor` v2''
!v3'''' = v3''' `xor` v0'''
!v2''' = v2'' `rotateL` 32
in InternalState v0''' v1'''' v2''' v3''''
{-# INLINE runRoundsCompression #-}
runRoundsCompression st
| c == 2 = doRound $! doRound st
| otherwise = loopRounds c st
{-# INLINE runRoundsDigest #-}
runRoundsDigest st
| d == 4 = doRound $! doRound $! doRound $! doRound st
| otherwise = loopRounds d st
{-# INLINE loopRounds #-}
loopRounds 1 !v = doRound v
loopRounds n !v = loopRounds (n-1) (doRound v)
{-# INLINE initSip #-}
initSip (SipKey k0 k1) = InternalState (k0 `xor` 0x736f6d6570736575)
(k1 `xor` 0x646f72616e646f6d)
(k0 `xor` 0x6c7967656e657261)
(k1 `xor` 0x7465646279746573)

View file

@ -0,0 +1,76 @@
-- |
-- Module : Data.Memory.Internal.Compat
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : stable
-- Portability : Good
--
-- This module try to keep all the difference between versions of base
-- or other needed packages, so that modules don't need to use CPP
--
{-# LANGUAGE CPP #-}
module Data.Memory.Internal.Compat
( unsafeDoIO
, popCount
, unsafeShiftL
, unsafeShiftR
, byteSwap64
, byteSwap32
, byteSwap16
) where
import System.IO.Unsafe
import Data.Word
import Data.Bits
-- | perform io for hashes that do allocation and ffi.
-- unsafeDupablePerformIO is used when possible as the
-- computation is pure and the output is directly linked
-- to the input. we also do not modify anything after it has
-- been returned to the user.
unsafeDoIO :: IO a -> a
#if __GLASGOW_HASKELL__ > 704
unsafeDoIO = unsafeDupablePerformIO
#else
unsafeDoIO = unsafePerformIO
#endif
#if !(MIN_VERSION_base(4,5,0))
popCount :: Word64 -> Int
popCount n = loop 0 n
where loop c 0 = c
loop c i = loop (c + if testBit c 0 then 1 else 0) (i `shiftR` 1)
#endif
#if !(MIN_VERSION_base(4,7,0))
byteSwap64 :: Word64 -> Word64
byteSwap64 w =
(w `shiftR` 56) .|. (w `shiftL` 56)
.|. ((w `shiftR` 40) .&. 0xff00) .|. ((w .&. 0xff00) `shiftL` 40)
.|. ((w `shiftR` 24) .&. 0xff0000) .|. ((w .&. 0xff0000) `shiftL` 24)
.|. ((w `shiftR` 8) .&. 0xff000000) .|. ((w .&. 0xff000000) `shiftL` 8)
#endif
#if !(MIN_VERSION_base(4,7,0))
byteSwap32 :: Word32 -> Word32
byteSwap32 w =
(w `shiftR` 24)
.|. (w `shiftL` 24)
.|. ((w `shiftR` 8) .&. 0xff00)
.|. ((w .&. 0xff00) `shiftL` 8)
#endif
#if !(MIN_VERSION_base(4,7,0))
byteSwap16 :: Word16 -> Word16
byteSwap16 w =
(w `shiftR` 8) .|. (w `shiftL` 8)
#endif
#if !(MIN_VERSION_base(4,5,0))
unsafeShiftL :: Bits a => a -> Int -> a
unsafeShiftL = shiftL
unsafeShiftR :: Bits a => a -> Int -> a
unsafeShiftR = shiftR
#endif

View file

@ -0,0 +1,70 @@
-- |
-- Module : Data.Memory.Internal.CompatPrim
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : stable
-- Portability : Compat
--
-- This module try to keep all the difference between versions of ghc primitive
-- or other needed packages, so that modules don't need to use CPP.
--
-- Note that MagicHash and CPP conflicts in places, making it "more interesting"
-- to write compat code for primitives
--
{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
module Data.Memory.Internal.CompatPrim
( be32Prim
, le32Prim
, byteswap32Prim
, booleanPrim
) where
import GHC.Prim
-- | byteswap Word# to or from Big Endian
--
-- on a big endian machine, this function is a nop.
be32Prim :: Word# -> Word#
#ifdef ARCH_IS_LITTLE_ENDIAN
be32Prim = byteswap32Prim
#else
be32Prim w = w
#endif
-- | byteswap Word# to or from Little Endian
--
-- on a little endian machine, this function is a nop.
le32Prim :: Word# -> Word#
#ifdef ARCH_IS_LITTLE_ENDIAN
le32Prim w = w
#else
le32Prim = byteswap32Prim
#endif
-- | Simple compatibility for byteswap the lower 32 bits of a Word#
-- at the primitive level
byteswap32Prim :: Word# -> Word#
#if __GLASGOW_HASKELL__ >= 708
byteswap32Prim w = byteSwap32# w
#else
byteswap32Prim w =
let !a = uncheckedShiftL# w 24#
!b = and# (uncheckedShiftL# w 8#) 0x00ff0000##
!c = and# (uncheckedShiftRL# w 8#) 0x0000ff00##
!d = and# (uncheckedShiftRL# w 24#) 0x000000ff##
in or# a (or# b (or# c d))
#endif
-- | Simple wrapper to handle pre 7.8 and future, where
-- most comparaison functions don't returns a boolean
-- anymore.
#if __GLASGOW_HASKELL__ >= 708
booleanPrim :: Int# -> Bool
booleanPrim v = tagToEnum# v
#else
booleanPrim :: Bool -> Bool
booleanPrim b = b
#endif
{-# INLINE booleanPrim #-}

View file

@ -0,0 +1,169 @@
-- |
-- Module : Data.Memory.Internal.CompatPrim
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : stable
-- Portability : Compat
--
-- This module try to keep all the difference between versions of ghc primitive
-- or other needed packages, so that modules don't need to use CPP.
--
-- Note that MagicHash and CPP conflicts in places, making it "more interesting"
-- to write compat code for primitives
--
{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
#include "MachDeps.h"
module Data.Memory.Internal.CompatPrim64
( Word64#
, Int64#
, eqInt64#
, neInt64#
, ltInt64#
, leInt64#
, gtInt64#
, geInt64#
, quotInt64#
, remInt64#
, eqWord64#
, neWord64#
, ltWord64#
, leWord64#
, gtWord64#
, geWord64#
, and64#
, or64#
, xor64#
, not64#
, timesWord64#
, uncheckedShiftL64#
, uncheckedShiftRL64#
, int64ToWord64#
, word64ToInt64#
, intToInt64#
, int64ToInt#
, wordToWord64#
, word64ToWord#
, w64#
) where
#if WORD_SIZE_IN_BITS == 64
import GHC.Prim hiding (Word64#, Int64#)
#if __GLASGOW_HASKELL__ >= 708
type OutBool = Int#
#else
type OutBool = Bool
#endif
type Word64# = Word#
type Int64# = Int#
#if __GLASGOW_HASKELL__ < 904
eqWord64# :: Word64# -> Word64# -> OutBool
eqWord64# = eqWord#
neWord64# :: Word64# -> Word64# -> OutBool
neWord64# = neWord#
ltWord64# :: Word64# -> Word64# -> OutBool
ltWord64# = ltWord#
leWord64# :: Word64# -> Word64# -> OutBool
leWord64# = leWord#
gtWord64# :: Word64# -> Word64# -> OutBool
gtWord64# = gtWord#
geWord64# :: Word64# -> Word64# -> OutBool
geWord64# = geWord#
eqInt64# :: Int64# -> Int64# -> OutBool
eqInt64# = (==#)
neInt64# :: Int64# -> Int64# -> OutBool
neInt64# = (/=#)
ltInt64# :: Int64# -> Int64# -> OutBool
ltInt64# = (<#)
leInt64# :: Int64# -> Int64# -> OutBool
leInt64# = (<=#)
gtInt64# :: Int64# -> Int64# -> OutBool
gtInt64# = (>#)
geInt64# :: Int64# -> Int64# -> OutBool
geInt64# = (<=#)
quotInt64# :: Int64# -> Int64# -> Int64#
quotInt64# = quotInt#
remInt64# :: Int64# -> Int64# -> Int64#
remInt64# = remInt#
and64# :: Word64# -> Word64# -> Word64#
and64# = and#
or64# :: Word64# -> Word64# -> Word64#
or64# = or#
xor64# :: Word64# -> Word64# -> Word64#
xor64# = xor#
not64# :: Word64# -> Word64#
not64# = not#
uncheckedShiftL64# :: Word64# -> Int# -> Word64#
uncheckedShiftL64# = uncheckedShiftL#
uncheckedShiftRL64# :: Word64# -> Int# -> Word64#
uncheckedShiftRL64# = uncheckedShiftL#
int64ToWord64# :: Int64# -> Word64#
int64ToWord64# = int2Word#
word64ToInt64# :: Word64# -> Int64#
word64ToInt64# = word2Int#
intToInt64# :: Int# -> Int64#
intToInt64# w = w
int64ToInt# :: Int64# -> Int#
int64ToInt# w = w
wordToWord64# :: Word# -> Word64#
wordToWord64# w = w
word64ToWord# :: Word64# -> Word#
word64ToWord# w = w
timesWord64# :: Word64# -> Word64# -> Word64#
timesWord64# = timesWord#
#endif
w64# :: Word# -> Word# -> Word# -> Word64#
w64# w _ _ = w
#elif WORD_SIZE_IN_BITS == 32
import GHC.IntWord64
import GHC.Prim (Word#)
timesWord64# :: Word64# -> Word64# -> Word64#
timesWord64# a b =
let !ai = word64ToInt64# a
!bi = word64ToInt64# b
in int64ToWord64# (timesInt64# ai bi)
w64# :: Word# -> Word# -> Word# -> Word64#
w64# _ hw lw =
let !h = wordToWord64# hw
!l = wordToWord64# lw
in or64# (uncheckedShiftL64# h 32#) l
#else
#error "not a supported architecture. supported WORD_SIZE_IN_BITS is 32 bits or 64 bits"
#endif

View file

@ -0,0 +1,28 @@
-- |
-- Module : Data.Memory.Internal.DeepSeq
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : experimental
-- Portability : unknown
--
-- Simple abstraction module to allow compilation without deepseq
-- by defining our own NFData class if not compiling with deepseq
-- support.
--
{-# LANGUAGE CPP #-}
module Data.Memory.Internal.DeepSeq
( NFData(..)
) where
#ifdef WITH_DEEPSEQ_SUPPORT
import Control.DeepSeq
#else
import Data.Word
class NFData a where rnf :: a -> ()
instance NFData Word8 where rnf w = w `seq` ()
instance NFData Word16 where rnf w = w `seq` ()
instance NFData Word32 where rnf w = w `seq` ()
instance NFData Word64 where rnf w = w `seq` ()
#endif

View file

@ -0,0 +1,17 @@
-- |
-- Module : Data.Memory.Internal.Imports
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : experimental
-- Portability : unknown
--
{-# LANGUAGE CPP #-}
module Data.Memory.Internal.Imports
( module X
) where
import Data.Word as X
import Control.Applicative as X
import Control.Monad as X (forM, forM_, void, when)
import Control.Arrow as X (first, second)
import Data.Memory.Internal.DeepSeq as X

View file

@ -0,0 +1,222 @@
-----------------------------------------------------------------------------
-- |
-- Module : Data.Memory.MemMap.Posix
-- Copyright : (c) Vincent Hanquez 2014
-- License : BSD-style
--
-- Maintainer : Vincent Hanquez
-- Stability : provisional
-- Portability : non-portable (requires POSIX)
--
-- Functions defined by the POSIX standards for manipulating memory maps
--
-- When a function that calls an underlying POSIX function fails, the errno
-- code is converted to an 'IOError' using 'Foreign.C.Error.errnoToIOError'.
-- For a list of which errno codes may be generated, consult the POSIX
-- documentation for the underlying function.
--
-----------------------------------------------------------------------------
#include <sys/mman.h>
#include <unistd.h>
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE CPP #-}
module Data.Memory.MemMap.Posix
( memoryMap
, memoryUnmap
, memoryAdvise
, memoryLock
, memoryUnlock
, memoryProtect
, memorySync
-- * Flags types
, MemoryMapFlag(..)
, MemoryProtection(..)
, MemoryAdvice(..)
, MemorySyncFlag(..)
-- * system page size
, sysconfPageSize
) where
import System.Posix.Types
import Foreign.Ptr
import Foreign.C.Types
import Foreign.C.Error
import Data.Bits
foreign import ccall unsafe "mmap"
c_mmap :: Ptr a -> CSize -> CInt -> CInt -> CInt -> COff -> IO (Ptr a)
foreign import ccall unsafe "munmap"
c_munmap :: Ptr a -> CSize -> IO CInt
#if defined(POSIX_MADV_NORMAL)
foreign import ccall unsafe "posix_madvise"
c_madvise :: Ptr a -> CSize -> CInt -> IO CInt
#else
foreign import ccall unsafe "madvise"
c_madvise :: Ptr a -> CSize -> CInt -> IO CInt
#endif
foreign import ccall unsafe "msync"
c_msync :: Ptr a -> CSize -> CInt -> IO CInt
foreign import ccall unsafe "mprotect"
c_mprotect :: Ptr a -> CSize -> CInt -> IO CInt
#ifndef __HAIKU__
foreign import ccall unsafe "mlock"
c_mlock :: Ptr a -> CSize -> IO CInt
#else
c_mlock :: Ptr a -> CSize -> IO CInt
c_mlock _ _ = return (-1)
#endif
#ifndef __HAIKU__
foreign import ccall unsafe "munlock"
c_munlock :: Ptr a -> CSize -> IO CInt
#else
c_munlock :: Ptr a -> CSize -> IO CInt
c_munlock _ _ = return (-1)
#endif
foreign import ccall unsafe "sysconf"
c_sysconf :: CInt -> CLong
-- | Mapping flag
data MemoryMapFlag =
MemoryMapShared -- ^ memory changes are shared between process
| MemoryMapPrivate -- ^ memory changes are private to process
deriving (Show,Read,Eq)
-- | Memory protection
data MemoryProtection =
MemoryProtectionNone
| MemoryProtectionRead
| MemoryProtectionWrite
| MemoryProtectionExecute
deriving (Show,Read,Eq)
-- | Advice to put on memory.
--
-- only define the posix one.
data MemoryAdvice =
MemoryAdviceNormal -- ^ no specific advice, the default.
| MemoryAdviceRandom -- ^ Expect page references in random order. No readahead should occur.
| MemoryAdviceSequential -- ^ Expect page references in sequential order. Page should be readahead aggressively.
| MemoryAdviceWillNeed -- ^ Expect access in the near future. Probably a good idea to readahead early
| MemoryAdviceDontNeed -- ^ Do not expect access in the near future.
deriving (Show,Read,Eq)
-- | Memory synchronization flags
data MemorySyncFlag =
MemorySyncAsync -- ^ perform asynchronous write.
| MemorySyncSync -- ^ perform synchronous write.
| MemorySyncInvalidate -- ^ invalidate cache data.
deriving (Show,Read,Eq)
cvalueOfMemoryProts :: [MemoryProtection] -> CInt
cvalueOfMemoryProts = foldl (.|.) 0 . map toProt
where toProt :: MemoryProtection -> CInt
toProt MemoryProtectionNone = (#const PROT_NONE)
toProt MemoryProtectionRead = (#const PROT_READ)
toProt MemoryProtectionWrite = (#const PROT_WRITE)
toProt MemoryProtectionExecute = (#const PROT_EXEC)
cvalueOfMemorySync :: [MemorySyncFlag] -> CInt
cvalueOfMemorySync = foldl (.|.) 0 . map toSync
where toSync MemorySyncAsync = (#const MS_ASYNC)
toSync MemorySyncSync = (#const MS_SYNC)
toSync MemorySyncInvalidate = (#const MS_INVALIDATE)
-- | Map pages of memory.
--
-- If fd is present, this memory will represent the file associated.
-- Otherwise, the memory will be an anonymous mapping.
--
-- use 'mmap'
memoryMap :: Maybe (Ptr a) -- ^ The address to map to if MapFixed is used.
-> CSize -- ^ The length of the mapping
-> [MemoryProtection] -- ^ the memory protection associated with the mapping
-> MemoryMapFlag -- ^
-> Maybe Fd
-> COff
-> IO (Ptr a)
memoryMap initPtr sz prots flag mfd off =
throwErrnoIf (== m1ptr) "mmap" (c_mmap (maybe nullPtr id initPtr) sz cprot cflags fd off)
where m1ptr = nullPtr `plusPtr` (-1)
fd = maybe (-1) (\(Fd v) -> v) mfd
cprot = cvalueOfMemoryProts prots
cflags = maybe cMapAnon (const 0) mfd
.|. maybe 0 (const cMapFixed) initPtr
.|. toMapFlag flag
#ifdef __APPLE__
cMapAnon = (#const MAP_ANON)
#else
cMapAnon = (#const MAP_ANONYMOUS)
#endif
cMapFixed = (#const MAP_FIXED)
toMapFlag MemoryMapShared = (#const MAP_SHARED)
toMapFlag MemoryMapPrivate = (#const MAP_PRIVATE)
-- | Unmap pages of memory
--
-- use 'munmap'
memoryUnmap :: Ptr a -> CSize -> IO ()
memoryUnmap ptr sz = throwErrnoIfMinus1_ "munmap" (c_munmap ptr sz)
-- | give advice to the operating system about use of memory
--
-- call 'madvise'
memoryAdvise :: Ptr a -> CSize -> MemoryAdvice -> IO ()
memoryAdvise ptr sz adv = throwErrnoIfMinus1_ "madvise" (c_madvise ptr sz cadv)
where cadv = toAdvice adv
#if defined(POSIX_MADV_NORMAL)
toAdvice MemoryAdviceNormal = (#const POSIX_MADV_NORMAL)
toAdvice MemoryAdviceRandom = (#const POSIX_MADV_RANDOM)
toAdvice MemoryAdviceSequential = (#const POSIX_MADV_SEQUENTIAL)
toAdvice MemoryAdviceWillNeed = (#const POSIX_MADV_WILLNEED)
toAdvice MemoryAdviceDontNeed = (#const POSIX_MADV_DONTNEED)
#else
toAdvice MemoryAdviceNormal = (#const MADV_NORMAL)
toAdvice MemoryAdviceRandom = (#const MADV_RANDOM)
toAdvice MemoryAdviceSequential = (#const MADV_SEQUENTIAL)
toAdvice MemoryAdviceWillNeed = (#const MADV_WILLNEED)
toAdvice MemoryAdviceDontNeed = (#const MADV_DONTNEED)
#endif
-- | lock a range of process address space
--
-- call 'mlock'
memoryLock :: Ptr a -> CSize -> IO ()
memoryLock ptr sz = throwErrnoIfMinus1_ "mlock" (c_mlock ptr sz)
-- | unlock a range of process address space
--
-- call 'munlock'
memoryUnlock :: Ptr a -> CSize -> IO ()
memoryUnlock ptr sz = throwErrnoIfMinus1_ "munlock" (c_munlock ptr sz)
-- | set protection of memory mapping
--
-- call 'mprotect'
memoryProtect :: Ptr a -> CSize -> [MemoryProtection] -> IO ()
memoryProtect ptr sz prots = throwErrnoIfMinus1_ "mprotect" (c_mprotect ptr sz cprot)
where cprot = cvalueOfMemoryProts prots
-- | memorySync synchronize memory with physical storage.
--
-- On an anonymous mapping this function doesn't have any effect.
-- call 'msync'
memorySync :: Ptr a -> CSize -> [MemorySyncFlag] -> IO ()
memorySync ptr sz flags = throwErrnoIfMinus1_ "msync" (c_msync ptr sz cflags)
where cflags = cvalueOfMemorySync flags
-- | Return the operating system page size.
--
-- call 'sysconf'
sysconfPageSize :: Int
sysconfPageSize = fromIntegral $ c_sysconf (#const _SC_PAGESIZE)

View file

@ -0,0 +1,12 @@
-----------------------------------------------------------------------------
-- |
-- Module : Data.Memory.MemMap.Windows
-- Copyright : (c) Vincent Hanquez 2014
-- License : BSD-style
-- Maintainer : Vincent Hanquez
-- Stability : provisional
-- Portability : non-portable (requires Windows)
--
module Data.Memory.MemMap.Windows
(
) where

View file

@ -0,0 +1,120 @@
-- |
-- Module : Data.Memory.PtrMethods
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : experimental
-- Portability : unknown
--
-- methods to manipulate raw memory representation
--
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE ForeignFunctionInterface #-}
module Data.Memory.PtrMethods
( memCreateTemporary
, memXor
, memXorWith
, memCopy
, memSet
, memReverse
, memEqual
, memConstEqual
, memCompare
) where
import Data.Memory.Internal.Imports
import Foreign.Ptr (Ptr, plusPtr)
import Foreign.Storable (peek, poke, peekByteOff)
import Foreign.C.Types
import Foreign.Marshal.Alloc (allocaBytesAligned)
import Data.Bits ((.|.), xor)
-- | Create a new temporary buffer
memCreateTemporary :: Int -> (Ptr Word8 -> IO a) -> IO a
memCreateTemporary size f = allocaBytesAligned size 8 f
-- | xor bytes from source1 and source2 to destination
--
-- d = s1 xor s2
--
-- s1, nor s2 are modified unless d point to s1 or s2
memXor :: Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> Int -> IO ()
memXor _ _ _ 0 = return ()
memXor d s1 s2 n = do
(xor <$> peek s1 <*> peek s2) >>= poke d
memXor (d `plusPtr` 1) (s1 `plusPtr` 1) (s2 `plusPtr` 1) (n-1)
-- | xor bytes from source with a specific value to destination
--
-- d = replicate (sizeof s) v `xor` s
memXorWith :: Ptr Word8 -> Word8 -> Ptr Word8 -> Int -> IO ()
memXorWith destination !v source bytes
| destination == source = loopInplace source bytes
| otherwise = loop destination source bytes
where
loop !d !s n = when (n > 0) $ do
peek s >>= poke d . xor v
loop (d `plusPtr` 1) (s `plusPtr` 1) (n-1)
loopInplace !s n = when (n > 0) $ do
peek s >>= poke s . xor v
loopInplace (s `plusPtr` 1) (n-1)
-- | Copy a set number of bytes from @src to @dst
memCopy :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
memCopy dst src n = c_memcpy dst src (fromIntegral n)
{-# INLINE memCopy #-}
-- | Set @n number of bytes to the same value @v
memSet :: Ptr Word8 -> Word8 -> Int -> IO ()
memSet start v n = c_memset start v (fromIntegral n) >>= \_ -> return ()
{-# INLINE memSet #-}
-- | Reverse a set number of bytes from @src@ to @dst@. Memory
-- locations should not overlap.
memReverse :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
memReverse d s n
| n > 0 = do peekByteOff s (n - 1) >>= poke d
memReverse (d `plusPtr` 1) s (n - 1)
| otherwise = return ()
-- | Check if two piece of memory are equals
memEqual :: Ptr Word8 -> Ptr Word8 -> Int -> IO Bool
memEqual p1 p2 n = loop 0
where
loop i
| i == n = return True
| otherwise = do
e <- (==) <$> peekByteOff p1 i <*> (peekByteOff p2 i :: IO Word8)
if e then loop (i+1) else return False
-- | Compare two piece of memory and returns how they compare
memCompare :: Ptr Word8 -> Ptr Word8 -> Int -> IO Ordering
memCompare p1 p2 n = loop 0
where
loop i
| i == n = return EQ
| otherwise = do
e <- compare <$> peekByteOff p1 i <*> (peekByteOff p2 i :: IO Word8)
if e == EQ then loop (i+1) else return e
-- | A constant time equality test for 2 Memory buffers
--
-- compared to normal equality function, this function will go
-- over all the bytes present before yielding a result even when
-- knowing the overall result early in the processing.
memConstEqual :: Ptr Word8 -> Ptr Word8 -> Int -> IO Bool
memConstEqual p1 p2 n = loop 0 0
where
loop i !acc
| i == n = return $! acc == 0
| otherwise = do
e <- xor <$> peekByteOff p1 i <*> (peekByteOff p2 i :: IO Word8)
loop (i+1) (acc .|. e)
foreign import ccall unsafe "memset"
c_memset :: Ptr Word8 -> Word8 -> CSize -> IO ()
foreign import ccall unsafe "memcpy"
c_memcpy :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()

1095
bundled/Data/Scientific.hs Normal file

File diff suppressed because it is too large Load diff

709
bundled/Data/Serialize.hs Normal file
View file

@ -0,0 +1,709 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DefaultSignatures
, TypeOperators
, BangPatterns
, KindSignatures
, ScopedTypeVariables #-}
#ifndef MIN_VERSION_base
#define MIN_VERSION_base(x,y,z) 1
#endif
-----------------------------------------------------------------------------
-- |
-- Module : Data.Serialize
-- Copyright : Lennart Kolmodin, Galois Inc. 2009
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Trevor Elliott <trevor@galois.com>
-- Stability :
-- Portability :
--
-----------------------------------------------------------------------------
module Data.Serialize (
-- * The Serialize class
Serialize(..)
-- $example
-- * Serialize serialisation
, encode, encodeLazy
, decode, decodeLazy
, expect
, module Data.Serialize.Get
, module Data.Serialize.Put
, module Data.Serialize.IEEE754
-- * Generic deriving
, GSerializePut(..)
, GSerializeGet(..)
) where
import Data.Serialize.Put
import Data.Serialize.Get
import Data.Serialize.IEEE754
import Control.Monad
import Data.Array.Unboxed
import Data.ByteString (ByteString)
import Data.Char (chr,ord)
import Data.List (unfoldr)
import Data.Word
import Foreign
-- And needed for the instances:
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Short as S
import qualified Data.Map as Map
import qualified Data.Monoid as M
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import qualified Data.IntSet as IntSet
import qualified Data.Ratio as R
import qualified Data.Tree as T
import qualified Data.Sequence as Seq
import GHC.Generics
#if !(MIN_VERSION_base(4,8,0))
import Control.Applicative ((*>),(<*>),(<$>),pure)
#endif
#if MIN_VERSION_base(4,8,0)
import Numeric.Natural
#endif
------------------------------------------------------------------------
-- | If your compiler has support for the @DeriveGeneric@ and
-- @DefaultSignatures@ language extensions (@ghc >= 7.2.1@), the 'put' and 'get'
-- methods will have default generic implementations.
--
-- To use this option, simply add a @deriving 'Generic'@ clause to your datatype
-- and declare a 'Serialize' instance for it without giving a definition for
-- 'put' and 'get'.
class Serialize t where
-- | Encode a value in the Put monad.
put :: Putter t
-- | Decode a value in the Get monad
get :: Get t
default put :: (Generic t, GSerializePut (Rep t)) => Putter t
put = gPut . from
default get :: (Generic t, GSerializeGet (Rep t)) => Get t
get = to <$> gGet
------------------------------------------------------------------------
-- Wrappers to run the underlying monad
-- | Encode a value using binary serialization to a strict ByteString.
encode :: Serialize a => a -> ByteString
encode = runPut . put
-- | Encode a value using binary serialization to a lazy ByteString.
encodeLazy :: Serialize a => a -> L.ByteString
encodeLazy = runPutLazy . put
-- | Decode a value from a strict ByteString, reconstructing the original
-- structure.
decode :: Serialize a => ByteString -> Either String a
decode = runGet get
-- | Decode a value from a lazy ByteString, reconstructing the original
-- structure.
decodeLazy :: Serialize a => L.ByteString -> Either String a
decodeLazy = runGetLazy get
------------------------------------------------------------------------
-- Combinators
-- | Perform an action, failing if the read result does not match the argument
-- provided.
expect :: (Eq a, Serialize a) => a -> Get a
expect x = get >>= \y -> if x == y then return x else mzero
------------------------------------------------------------------------
-- Simple instances
-- The () type need never be written to disk: values of singleton type
-- can be reconstructed from the type alone
instance Serialize () where
put () = return ()
get = return ()
{-# INLINE boolToWord8 #-}
boolToWord8 :: Bool -> Word8
boolToWord8 False = 0
boolToWord8 True = 1
{-# INLINE boolFromWord8 #-}
boolFromWord8 :: Word8 -> Get Bool
boolFromWord8 0 = return False
boolFromWord8 1 = return True
boolFromWord8 w = fail ("Invalid Bool encoding " ++ show w)
{-# INLINE orderingToWord8 #-}
orderingToWord8 :: Ordering -> Word8
orderingToWord8 LT = 0
orderingToWord8 EQ = 1
orderingToWord8 GT = 2
{-# INLINE orderingFromWord8 #-}
orderingFromWord8 :: Word8 -> Get Ordering
orderingFromWord8 0 = return LT
orderingFromWord8 1 = return EQ
orderingFromWord8 2 = return GT
orderingFromWord8 w = fail ("Invalid Ordering encoding " ++ show w)
-- Bools are encoded as a byte in the range 0 .. 1
instance Serialize Bool where
put = putWord8 . boolToWord8
get = boolFromWord8 =<< getWord8
-- Values of type 'Ordering' are encoded as a byte in the range 0 .. 2
instance Serialize Ordering where
put = putWord8 . orderingToWord8
get = orderingFromWord8 =<< getWord8
------------------------------------------------------------------------
-- Words and Ints
-- Words8s are written as bytes
instance Serialize Word8 where
put = putWord8
get = getWord8
-- Words16s are written as 2 bytes in big-endian (network) order
instance Serialize Word16 where
put = putWord16be
get = getWord16be
-- Words32s are written as 4 bytes in big-endian (network) order
instance Serialize Word32 where
put = putWord32be
get = getWord32be
-- Words64s are written as 8 bytes in big-endian (network) order
instance Serialize Word64 where
put = putWord64be
get = getWord64be
-- Int8s are written as a single byte.
instance Serialize Int8 where
put = putInt8
get = getInt8
-- Int16s are written as a 2 bytes in big endian format
instance Serialize Int16 where
put = putInt16be
get = getInt16be
-- Int32s are written as a 4 bytes in big endian format
instance Serialize Int32 where
put = putInt32be
get = getInt32be
-- Int64s are written as a 8 bytes in big endian format
instance Serialize Int64 where
put = putInt64be
get = getInt64be
------------------------------------------------------------------------
-- Words are are written as Word64s, that is, 8 bytes in big endian format
instance Serialize Word where
put i = put (fromIntegral i :: Word64)
get = liftM fromIntegral (get :: Get Word64)
-- Ints are are written as Int64s, that is, 8 bytes in big endian format
instance Serialize Int where
put i = put (fromIntegral i :: Int64)
get = liftM fromIntegral (get :: Get Int64)
------------------------------------------------------------------------
--
-- Portable, and pretty efficient, serialisation of Integer
--
-- Fixed-size type for a subset of Integer
type SmallInt = Int32
-- Integers are encoded in two ways: if they fit inside a SmallInt,
-- they're written as a byte tag, and that value. If the Integer value
-- is too large to fit in a SmallInt, it is written as a byte array,
-- along with a sign and length field.
instance Serialize Integer where
put n | n >= lo && n <= hi = do
putWord8 0
put (fromIntegral n :: SmallInt) -- fast path
where
lo = fromIntegral (minBound :: SmallInt) :: Integer
hi = fromIntegral (maxBound :: SmallInt) :: Integer
put n = do
putWord8 1
put sign
let len = ((nrBits (abs n) + 7) `div` 8)
putWord64be (fromIntegral len)
mapM_ put (unroll (abs n)) -- unroll the bytes
where
sign = fromIntegral (signum n) :: Word8
get = do
tag <- get :: Get Word8
case tag of
0 -> liftM fromIntegral (get :: Get SmallInt)
_ -> do sign <- get
bytes <- get
let v = roll bytes
return $! if sign == (1 :: Word8) then v else - v
--
-- Fold and unfold an Integer to and from a list of its bytes
--
unroll :: (Integral a, Bits a) => a -> [Word8]
unroll = unfoldr step
where
step 0 = Nothing
step i = Just (fromIntegral i, i `shiftR` 8)
roll :: (Integral a, Bits a) => [Word8] -> a
roll = foldr unstep 0
where
unstep b a = a `shiftL` 8 .|. fromIntegral b
nrBits :: (Ord a, Integral a) => a -> Int
nrBits k =
let expMax = until (\e -> 2 ^ e > k) (* 2) 1
findNr :: Int -> Int -> Int
findNr lo hi
| mid == lo = hi
| 2 ^ mid <= k = findNr mid hi
| otherwise = findNr lo mid
where mid = (lo + hi) `div` 2
in findNr (expMax `div` 2) expMax
instance (Serialize a,Integral a) => Serialize (R.Ratio a) where
put r = put (R.numerator r) >> put (R.denominator r)
get = liftM2 (R.%) get get
#if MIN_VERSION_base(4,8,0)
-- Fixed-size type for a subset of Natural
type NaturalWord = Word64
instance Serialize Natural where
{-# INLINE put #-}
put n | n <= hi = do
putWord8 0
put (fromIntegral n :: NaturalWord) -- fast path
where
hi = fromIntegral (maxBound :: NaturalWord) :: Natural
put n = do
putWord8 1
let len = ((nrBits (abs n) + 7) `div` 8)
putWord64be (fromIntegral len)
mapM_ put (unroll (abs n)) -- unroll the bytes
{-# INLINE get #-}
get = do
tag <- get :: Get Word8
case tag of
0 -> liftM fromIntegral (get :: Get NaturalWord)
_ -> do bytes <- get
return $! roll bytes
#endif
------------------------------------------------------------------------
-- Safely wrap `chr` to avoid exceptions.
-- `chr` source: http://hackage.haskell.org/package/base-4.7.0.2/docs/src/GHC-Char.html#chr
chrEither :: Int -> Either String Char
chrEither i
| i <= 0x10FFFF = Right (chr i) -- Or: C# (chr# i#)
| otherwise =
Left ("bad argument: " ++ show i)
-- Char is serialised as UTF-8
instance Serialize Char where
put a | c <= 0x7f = put (fromIntegral c :: Word8)
| c <= 0x7ff = do put (0xc0 .|. y)
put (0x80 .|. z)
| c <= 0xffff = do put (0xe0 .|. x)
put (0x80 .|. y)
put (0x80 .|. z)
| c <= 0x10ffff = do put (0xf0 .|. w)
put (0x80 .|. x)
put (0x80 .|. y)
put (0x80 .|. z)
| otherwise = error "Not a valid Unicode code point"
where
c = ord a
z, y, x, w :: Word8
z = fromIntegral (c .&. 0x3f)
y = fromIntegral (shiftR c 6 .&. 0x3f)
x = fromIntegral (shiftR c 12 .&. 0x3f)
w = fromIntegral (shiftR c 18 .&. 0x7)
get = do
let getByte = liftM (fromIntegral :: Word8 -> Int) get
shiftL6 = flip shiftL 6 :: Int -> Int
w <- getByte
r <- case () of
_ | w < 0x80 -> return w
| w < 0xe0 -> do
x <- liftM (xor 0x80) getByte
return (x .|. shiftL6 (xor 0xc0 w))
| w < 0xf0 -> do
x <- liftM (xor 0x80) getByte
y <- liftM (xor 0x80) getByte
return (y .|. shiftL6 (x .|. shiftL6
(xor 0xe0 w)))
| otherwise -> do
x <- liftM (xor 0x80) getByte
y <- liftM (xor 0x80) getByte
z <- liftM (xor 0x80) getByte
return (z .|. shiftL6 (y .|. shiftL6
(x .|. shiftL6 (xor 0xf0 w))))
case chrEither r of
Right r' ->
return $! r'
Left err ->
fail err
------------------------------------------------------------------------
-- Instances for the first few tuples
instance (Serialize a, Serialize b) => Serialize (a,b) where
put = putTwoOf put put
get = getTwoOf get get
instance (Serialize a, Serialize b, Serialize c) => Serialize (a,b,c) where
put (a,b,c) = put a >> put b >> put c
get = liftM3 (,,) get get get
instance (Serialize a, Serialize b, Serialize c, Serialize d)
=> Serialize (a,b,c,d) where
put (a,b,c,d) = put a >> put b >> put c >> put d
get = liftM4 (,,,) get get get get
instance (Serialize a, Serialize b, Serialize c, Serialize d, Serialize e)
=> Serialize (a,b,c,d,e) where
put (a,b,c,d,e) = put a >> put b >> put c >> put d >> put e
get = liftM5 (,,,,) get get get get get
--
-- and now just recurse:
--
instance (Serialize a, Serialize b, Serialize c, Serialize d, Serialize e
, Serialize f)
=> Serialize (a,b,c,d,e,f) where
put (a,b,c,d,e,f) = put (a,(b,c,d,e,f))
get = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f)
instance (Serialize a, Serialize b, Serialize c, Serialize d, Serialize e
, Serialize f, Serialize g)
=> Serialize (a,b,c,d,e,f,g) where
put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g))
get = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g)
instance (Serialize a, Serialize b, Serialize c, Serialize d, Serialize e,
Serialize f, Serialize g, Serialize h)
=> Serialize (a,b,c,d,e,f,g,h) where
put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h))
get = do (a,(b,c,d,e,f,g,h)) <- get
return (a,b,c,d,e,f,g,h)
instance (Serialize a, Serialize b, Serialize c, Serialize d, Serialize e,
Serialize f, Serialize g, Serialize h, Serialize i)
=> Serialize (a,b,c,d,e,f,g,h,i) where
put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i))
get = do (a,(b,c,d,e,f,g,h,i)) <- get
return (a,b,c,d,e,f,g,h,i)
instance (Serialize a, Serialize b, Serialize c, Serialize d, Serialize e,
Serialize f, Serialize g, Serialize h, Serialize i, Serialize j)
=> Serialize (a,b,c,d,e,f,g,h,i,j) where
put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j))
get = do (a,(b,c,d,e,f,g,h,i,j)) <- get
return (a,b,c,d,e,f,g,h,i,j)
------------------------------------------------------------------------
-- Monoid newtype wrappers
instance Serialize a => Serialize (M.Dual a) where
put = put . M.getDual
get = fmap M.Dual get
instance Serialize M.All where
put = put . M.getAll
get = fmap M.All get
instance Serialize M.Any where
put = put . M.getAny
get = fmap M.Any get
instance Serialize a => Serialize (M.Sum a) where
put = put . M.getSum
get = fmap M.Sum get
instance Serialize a => Serialize (M.Product a) where
put = put . M.getProduct
get = fmap M.Product get
instance Serialize a => Serialize (M.First a) where
put = put . M.getFirst
get = fmap M.First get
instance Serialize a => Serialize (M.Last a) where
put = put . M.getLast
get = fmap M.Last get
------------------------------------------------------------------------
-- Container types
instance Serialize a => Serialize [a] where
put = putListOf put
get = getListOf get
instance (Serialize a) => Serialize (Maybe a) where
put = putMaybeOf put
get = getMaybeOf get
instance (Serialize a, Serialize b) => Serialize (Either a b) where
put = putEitherOf put put
get = getEitherOf get get
------------------------------------------------------------------------
-- ByteStrings (have specially efficient instances)
instance Serialize B.ByteString where
put bs = do put (B.length bs :: Int)
putByteString bs
get = get >>= getByteString
instance Serialize L.ByteString where
put bs = do put (L.length bs :: Int64)
putLazyByteString bs
get = get >>= getLazyByteString
instance Serialize S.ShortByteString where
put sbs = do put (S.length sbs)
putShortByteString sbs
get = get >>= getShortByteString
------------------------------------------------------------------------
-- Maps and Sets
instance (Ord a, Serialize a) => Serialize (Set.Set a) where
put = putSetOf put
get = getSetOf get
instance (Ord k, Serialize k, Serialize e) => Serialize (Map.Map k e) where
put = putMapOf put put
get = getMapOf get get
instance Serialize IntSet.IntSet where
put = putIntSetOf put
get = getIntSetOf get
instance (Serialize e) => Serialize (IntMap.IntMap e) where
put = putIntMapOf put put
get = getIntMapOf get get
------------------------------------------------------------------------
-- Queues and Sequences
instance (Serialize e) => Serialize (Seq.Seq e) where
put = putSeqOf put
get = getSeqOf get
------------------------------------------------------------------------
-- Floating point
instance Serialize Double where
put = putFloat64be
get = getFloat64be
instance Serialize Float where
put = putFloat32be
get = getFloat32be
------------------------------------------------------------------------
-- Trees
instance (Serialize e) => Serialize (T.Tree e) where
put = putTreeOf put
get = getTreeOf get
------------------------------------------------------------------------
-- Arrays
instance (Serialize i, Ix i, Serialize e) => Serialize (Array i e) where
put = putIArrayOf put put
get = getIArrayOf get get
--
-- The IArray UArray e constraint is non portable. Requires flexible instances
--
instance (Serialize i, Ix i, Serialize e, IArray UArray e)
=> Serialize (UArray i e) where
put = putIArrayOf put put
get = getIArrayOf get get
------------------------------------------------------------------------
-- Generic Serialze
class GSerializePut f where
gPut :: Putter (f a)
class GSerializeGet f where
gGet :: Get (f a)
instance GSerializePut a => GSerializePut (M1 i c a) where
gPut = gPut . unM1
{-# INLINE gPut #-}
instance GSerializeGet a => GSerializeGet (M1 i c a) where
gGet = M1 <$> gGet
{-# INLINE gGet #-}
instance Serialize a => GSerializePut (K1 i a) where
gPut = put . unK1
{-# INLINE gPut #-}
instance Serialize a => GSerializeGet (K1 i a) where
gGet = K1 <$> get
{-# INLINE gGet #-}
instance GSerializePut U1 where
gPut _ = pure ()
{-# INLINE gPut #-}
instance GSerializeGet U1 where
gGet = pure U1
{-# INLINE gGet #-}
-- | Always fails to serialize
instance GSerializePut V1 where
gPut v = v `seq` error "GSerializePut.V1"
{-# INLINE gPut #-}
-- | Always fails to deserialize
instance GSerializeGet V1 where
gGet = fail "GSerializeGet.V1"
{-# INLINE gGet #-}
instance (GSerializePut a, GSerializePut b) => GSerializePut (a :*: b) where
gPut (a :*: b) = gPut a *> gPut b
{-# INLINE gPut #-}
instance (GSerializeGet a, GSerializeGet b) => GSerializeGet (a :*: b) where
gGet = (:*:) <$> gGet <*> gGet
{-# INLINE gGet #-}
-- The following GSerialize* instance for sums has support for serializing types
-- with up to 2^64-1 constructors. It will use the minimal number of bytes
-- needed to encode the constructor. For example when a type has 2^8
-- constructors or less it will use a single byte to encode the constructor. If
-- it has 2^16 constructors or less it will use two bytes, and so on till 2^64-1.
#define GUARD(WORD) (size - 1) <= fromIntegral (maxBound :: WORD)
#define PUTSUM(WORD) GUARD(WORD) = putSum (0 :: WORD) (fromIntegral size)
#define GETSUM(WORD) GUARD(WORD) = (get :: Get WORD) >>= checkGetSum (fromIntegral size)
instance ( PutSum a, PutSum b
, SumSize a, SumSize b) => GSerializePut (a :+: b) where
gPut | PUTSUM(Word8) | PUTSUM(Word16) | PUTSUM(Word32) | PUTSUM(Word64)
| otherwise = sizeError "encode" size
where
size = unTagged (sumSize :: Tagged (a :+: b) Word64)
{-# INLINE gPut #-}
instance ( GetSum a, GetSum b
, SumSize a, SumSize b) => GSerializeGet (a :+: b) where
gGet | GETSUM(Word8) | GETSUM(Word16) | GETSUM(Word32) | GETSUM(Word64)
| otherwise = sizeError "decode" size
where
size = unTagged (sumSize :: Tagged (a :+: b) Word64)
{-# INLINE gGet #-}
sizeError :: Show size => String -> size -> error
sizeError s size = error $ "Can't " ++ s ++ " a type with " ++ show size ++ " constructors"
------------------------------------------------------------------------
class PutSum f where
putSum :: (Num word, Bits word, Serialize word) => word -> word -> Putter (f a)
instance (PutSum a, PutSum b) => PutSum (a :+: b) where
putSum !code !size s = case s of
L1 x -> putSum code sizeL x
R1 x -> putSum (code + sizeL) sizeR x
where
#if MIN_VERSION_base(4,5,0)
sizeL = size `unsafeShiftR` 1
#else
sizeL = size `shiftR` 1
#endif
sizeR = size - sizeL
{-# INLINE putSum #-}
instance GSerializePut a => PutSum (C1 c a) where
putSum !code _ x = put code *> gPut x
{-# INLINE putSum #-}
------------------------------------------------------------------------
checkGetSum :: (Ord word, Num word, Bits word, GetSum f)
=> word -> word -> Get (f a)
checkGetSum size code | code < size = getSum code size
| otherwise = fail "Unknown encoding for constructor"
{-# INLINE checkGetSum #-}
class GetSum f where
getSum :: (Ord word, Num word, Bits word) => word -> word -> Get (f a)
instance (GetSum a, GetSum b) => GetSum (a :+: b) where
getSum !code !size | code < sizeL = L1 <$> getSum code sizeL
| otherwise = R1 <$> getSum (code - sizeL) sizeR
where
#if MIN_VERSION_base(4,5,0)
sizeL = size `unsafeShiftR` 1
#else
sizeL = size `shiftR` 1
#endif
sizeR = size - sizeL
{-# INLINE getSum #-}
instance GSerializeGet a => GetSum (C1 c a) where
getSum _ _ = gGet
{-# INLINE getSum #-}
------------------------------------------------------------------------
class SumSize f where
sumSize :: Tagged f Word64
newtype Tagged (s :: * -> *) b = Tagged {unTagged :: b}
instance (SumSize a, SumSize b) => SumSize (a :+: b) where
sumSize = Tagged $ unTagged (sumSize :: Tagged a Word64) +
unTagged (sumSize :: Tagged b Word64)
instance SumSize (C1 c a) where
sumSize = Tagged 1

View file

@ -0,0 +1,847 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE Rank2Types #-}
{-# LANGUAGE BangPatterns #-}
-----------------------------------------------------------------------------
-- |
-- Module : Data.Serialize.Get
-- Copyright : Lennart Kolmodin, Galois Inc. 2009
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Trevor Elliott <trevor@galois.com>
-- Stability :
-- Portability :
--
-- The Get monad. A monad for efficiently building structures from
-- strict ByteStrings
--
-----------------------------------------------------------------------------
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
#include "MachDeps.h"
#endif
module Data.Serialize.Get (
-- * The Get type
Get
, runGet
, runGetLazy
, runGetState
, runGetLazyState
-- ** Incremental interface
, Result(..)
, runGetPartial
, runGetChunk
-- * Parsing
, ensure
, isolate
, label
, skip
, uncheckedSkip
, lookAhead
, lookAheadM
, lookAheadE
, uncheckedLookAhead
, bytesRead
-- * Utility
, getBytes
, remaining
, isEmpty
-- * Parsing particular types
, getWord8
, getInt8
-- ** ByteStrings
, getByteString
, getLazyByteString
, getShortByteString
-- ** Big-endian reads
, getWord16be
, getWord32be
, getWord64be
, getInt16be
, getInt32be
, getInt64be
-- ** Little-endian reads
, getWord16le
, getWord32le
, getWord64le
, getInt16le
, getInt32le
, getInt64le
-- ** Host-endian, unaligned reads
, getWordhost
, getWord16host
, getWord32host
, getWord64host
-- ** Containers
, getTwoOf
, getListOf
, getIArrayOf
, getTreeOf
, getSeqOf
, getMapOf
, getIntMapOf
, getSetOf
, getIntSetOf
, getMaybeOf
, getEitherOf
, getNested
) where
import qualified Control.Applicative as A
import qualified Control.Monad as M
import Control.Monad (unless)
import qualified Control.Monad.Fail as Fail
import Data.Array.IArray (IArray,listArray)
import Data.Ix (Ix)
import Data.List (intercalate)
import Data.Maybe (isNothing,fromMaybe)
import Foreign
import System.IO.Unsafe (unsafeDupablePerformIO)
import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as B
import qualified Data.ByteString.Unsafe as B
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Short as BS
import qualified Data.IntMap as IntMap
import qualified Data.IntSet as IntSet
import qualified Data.Map as Map
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import qualified Data.Tree as T
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
import GHC.Base
import GHC.Word
#endif
-- | The result of a parse.
data Result r = Fail String B.ByteString
-- ^ The parse failed. The 'String' is the
-- message describing the error, if any.
| Partial (B.ByteString -> Result r)
-- ^ Supply this continuation with more input so that
-- the parser can resume. To indicate that no more
-- input is available, use an 'B.empty' string.
| Done r B.ByteString
-- ^ The parse succeeded. The 'B.ByteString' is the
-- input that had not yet been consumed (if any) when
-- the parse succeeded.
instance Show r => Show (Result r) where
show (Fail msg _) = "Fail " ++ show msg
show (Partial _) = "Partial _"
show (Done r bs) = "Done " ++ show r ++ " " ++ show bs
instance Functor Result where
fmap _ (Fail msg rest) = Fail msg rest
fmap f (Partial k) = Partial (fmap f . k)
fmap f (Done r bs) = Done (f r) bs
-- | The Get monad is an Exception and State monad.
newtype Get a = Get
{ unGet :: forall r. Input -> Buffer -> More
-> Int -> Failure r
-> Success a r -> Result r }
type Input = B.ByteString
type Buffer = Maybe B.ByteString
emptyBuffer :: Buffer
emptyBuffer = Just B.empty
extendBuffer :: Buffer -> B.ByteString -> Buffer
extendBuffer buf chunk =
do bs <- buf
return $! bs `B.append` chunk
{-# INLINE extendBuffer #-}
append :: Buffer -> Buffer -> Buffer
append l r = B.append `fmap` l A.<*> r
{-# INLINE append #-}
bufferBytes :: Buffer -> B.ByteString
bufferBytes = fromMaybe B.empty
{-# INLINE bufferBytes #-}
type Failure r = Input -> Buffer -> More -> [String] -> String -> Result r
type Success a r = Input -> Buffer -> More -> Int -> a -> Result r
-- | Have we read all available input?
data More
= Complete
| Incomplete (Maybe Int)
deriving (Eq)
moreLength :: More -> Int
moreLength m = case m of
Complete -> 0
Incomplete mb -> fromMaybe 0 mb
instance Functor Get where
fmap p m = Get $ \ s0 b0 m0 w0 kf ks ->
unGet m s0 b0 m0 w0 kf $ \ s1 b1 m1 w1 a -> ks s1 b1 m1 w1 (p a)
instance A.Applicative Get where
pure a = Get $ \ s0 b0 m0 w _ ks -> ks s0 b0 m0 w a
{-# INLINE pure #-}
f <*> x = Get $ \ s0 b0 m0 w0 kf ks ->
unGet f s0 b0 m0 w0 kf $ \ s1 b1 m1 w1 g ->
unGet x s1 b1 m1 w1 kf $ \ s2 b2 m2 w2 y -> ks s2 b2 m2 w2 (g y)
{-# INLINE (<*>) #-}
m *> k = Get $ \ s0 b0 m0 w0 kf ks ->
unGet m s0 b0 m0 w0 kf $ \ s1 b1 m1 w1 _ -> unGet k s1 b1 m1 w1 kf ks
{-# INLINE (*>) #-}
instance A.Alternative Get where
empty = failDesc "empty"
{-# INLINE empty #-}
(<|>) = M.mplus
{-# INLINE (<|>) #-}
-- Definition directly from Control.Monad.State.Strict
instance Monad Get where
return = A.pure
{-# INLINE return #-}
m >>= g = Get $ \ s0 b0 m0 w0 kf ks ->
unGet m s0 b0 m0 w0 kf $ \ s1 b1 m1 w1 a -> unGet (g a) s1 b1 m1 w1 kf ks
{-# INLINE (>>=) #-}
(>>) = (A.*>)
{-# INLINE (>>) #-}
#if !(MIN_VERSION_base(4,13,0))
fail = Fail.fail
{-# INLINE fail #-}
#endif
instance Fail.MonadFail Get where
fail = failDesc
{-# INLINE fail #-}
instance M.MonadPlus Get where
mzero = failDesc "mzero"
{-# INLINE mzero #-}
-- TODO: Test this!
mplus a b =
Get $ \s0 b0 m0 w0 kf ks ->
let ks' s1 b1 = ks s1 (b0 `append` b1)
kf' _ b1 m1 = kf (s0 `B.append` bufferBytes b1)
(b0 `append` b1) m1
try _ b1 m1 _ _ = unGet b (s0 `B.append` bufferBytes b1)
b1 m1 w0 kf' ks'
in unGet a s0 emptyBuffer m0 w0 try ks'
{-# INLINE mplus #-}
------------------------------------------------------------------------
formatTrace :: [String] -> String
formatTrace [] = "Empty call stack"
formatTrace ls = "From:\t" ++ intercalate "\n\t" ls ++ "\n"
get :: Get B.ByteString
get = Get (\s0 b0 m0 w _ k -> k s0 b0 m0 w s0)
{-# INLINE get #-}
put :: B.ByteString -> Int -> Get ()
put s !w = Get (\_ b0 m _ _ k -> k s b0 m w ())
{-# INLINE put #-}
label :: String -> Get a -> Get a
label l m =
Get $ \ s0 b0 m0 w0 kf ks ->
let kf' s1 b1 m1 ls = kf s1 b1 m1 (l:ls)
in unGet m s0 b0 m0 w0 kf' ks
finalK :: Success a a
finalK s _ _ _ a = Done a s
failK :: Failure a
failK s b _ ls msg =
Fail (unlines [msg, formatTrace ls]) (s `B.append` bufferBytes b)
-- | Run the Get monad applies a 'get'-based parser on the input ByteString
runGet :: Get a -> B.ByteString -> Either String a
runGet m str =
case unGet m str Nothing Complete 0 failK finalK of
Fail i _ -> Left i
Done a _ -> Right a
Partial{} -> Left "Failed reading: Internal error: unexpected Partial."
{-# INLINE runGet #-}
-- | Run the get monad on a single chunk, providing an optional length for the
-- remaining, unseen input, with Nothing indicating that it's not clear how much
-- input is left. For example, with a lazy ByteString, the optional length
-- represents the sum of the lengths of all remaining chunks.
runGetChunk :: Get a -> Maybe Int -> B.ByteString -> Result a
runGetChunk m mbLen str = unGet m str Nothing (Incomplete mbLen) 0 failK finalK
{-# INLINE runGetChunk #-}
-- | Run the Get monad applies a 'get'-based parser on the input ByteString
runGetPartial :: Get a -> B.ByteString -> Result a
runGetPartial m = runGetChunk m Nothing
{-# INLINE runGetPartial #-}
-- | Run the Get monad applies a 'get'-based parser on the input
-- ByteString, starting at the specified offset. In addition to the result of get
-- it returns the rest of the input.
runGetState :: Get a -> B.ByteString -> Int
-> Either String (a, B.ByteString)
runGetState m str off = case runGetState' m str off of
(Right a,bs) -> Right (a,bs)
(Left i,_) -> Left i
{-# INLINE runGetState #-}
-- | Run the Get monad applies a 'get'-based parser on the input
-- ByteString, starting at the specified offset. In addition to the result of get
-- it returns the rest of the input, even in the event of a failure.
runGetState' :: Get a -> B.ByteString -> Int
-> (Either String a, B.ByteString)
runGetState' m str off =
case unGet m (B.drop off str) Nothing Complete 0 failK finalK of
Fail i bs -> (Left i,bs)
Done a bs -> (Right a, bs)
Partial{} -> (Left "Failed reading: Internal error: unexpected Partial.",B.empty)
{-# INLINE runGetState' #-}
-- Lazy Get --------------------------------------------------------------------
runGetLazy' :: Get a -> L.ByteString -> (Either String a,L.ByteString)
runGetLazy' m lstr =
case L.toChunks lstr of
[c] -> wrapStrict (runGetState' m c 0)
[] -> wrapStrict (runGetState' m B.empty 0)
c:cs -> loop (runGetChunk m (Just (len - B.length c)) c) cs
where
len = fromIntegral (L.length lstr)
wrapStrict (e,s) = (e,L.fromChunks [s])
loop result chunks = case result of
Fail str rest -> (Left str, L.fromChunks (rest : chunks))
Partial k -> case chunks of
c:cs -> loop (k c) cs
[] -> loop (k B.empty) []
Done r rest -> (Right r, L.fromChunks (rest : chunks))
{-# INLINE runGetLazy' #-}
-- | Run the Get monad over a Lazy ByteString. Note that this will not run the
-- Get parser lazily, but will operate on lazy ByteStrings.
runGetLazy :: Get a -> L.ByteString -> Either String a
runGetLazy m lstr = fst (runGetLazy' m lstr)
{-# INLINE runGetLazy #-}
-- | Run the Get monad over a Lazy ByteString. Note that this does not run the
-- Get parser lazily, but will operate on lazy ByteStrings.
runGetLazyState :: Get a -> L.ByteString -> Either String (a,L.ByteString)
runGetLazyState m lstr = case runGetLazy' m lstr of
(Right a,rest) -> Right (a,rest)
(Left err,_) -> Left err
{-# INLINE runGetLazyState #-}
------------------------------------------------------------------------
-- | If at least @n@ bytes of input are available, return the current
-- input, otherwise fail.
{-# INLINE ensure #-}
ensure :: Int -> Get B.ByteString
ensure n0 = n0 `seq` Get $ \ s0 b0 m0 w0 kf ks -> let
n' = n0 - B.length s0
in if n' <= 0
then ks s0 b0 m0 w0 s0
else getMore n' s0 [] b0 m0 w0 kf ks
where
-- The "accumulate and concat" pattern here is important not to incur
-- in quadratic behavior, see <https://github.com/GaloisInc/cereal/issues/48>
finalInput s0 ss = B.concat (reverse (s0 : ss))
finalBuffer b0 s0 ss = extendBuffer b0 (B.concat (reverse (init (s0 : ss))))
getMore !n s0 ss b0 m0 w0 kf ks = let
tooFewBytes = let
!s = finalInput s0 ss
!b = finalBuffer b0 s0 ss
in kf s b m0 ["demandInput"] "too few bytes"
in case m0 of
Complete -> tooFewBytes
Incomplete mb -> Partial $ \s ->
if B.null s
then tooFewBytes
else let
!mb' = case mb of
Just l -> Just $! l - B.length s
Nothing -> Nothing
in checkIfEnough n s (s0 : ss) b0 (Incomplete mb') w0 kf ks
checkIfEnough !n s0 ss b0 m0 w0 kf ks = let
n' = n - B.length s0
in if n' <= 0
then let
!s = finalInput s0 ss
!b = finalBuffer b0 s0 ss
in ks s b m0 w0 s
else getMore n' s0 ss b0 m0 w0 kf ks
-- | Isolate an action to operating within a fixed block of bytes. The action
-- is required to consume all the bytes that it is isolated to.
isolate :: Int -> Get a -> Get a
isolate n m = do
M.when (n < 0) (fail "Attempted to isolate a negative number of bytes")
s <- ensure n
let (s',rest) = B.splitAt n s
cur <- bytesRead
put s' cur
a <- m
used <- get
unless (B.null used) (fail "not all bytes parsed in isolate")
put rest (cur + n)
return a
failDesc :: String -> Get a
failDesc err = do
let msg = "Failed reading: " ++ err
Get (\s0 b0 m0 _ kf _ -> kf s0 b0 m0 [] msg)
-- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available.
skip :: Int -> Get ()
skip n = do
s <- ensure n
cur <- bytesRead
put (B.drop n s) (cur + n)
-- | Skip ahead up to @n@ bytes in the current chunk. No error if there aren't
-- enough bytes, or if less than @n@ bytes are skipped.
uncheckedSkip :: Int -> Get ()
uncheckedSkip n = do
s <- get
cur <- bytesRead
put (B.drop n s) (cur + n)
-- | Run @ga@, but return without consuming its input.
-- Fails if @ga@ fails.
lookAhead :: Get a -> Get a
lookAhead ga = Get $ \ s0 b0 m0 w0 kf ks ->
-- the new continuation extends the old input with the new buffered bytes, and
-- appends the new buffer to the old one, if there was one.
let ks' _ b1 = ks (s0 `B.append` bufferBytes b1) (b0 `append` b1)
kf' _ b1 = kf s0 (b0 `append` b1)
in unGet ga s0 emptyBuffer m0 w0 kf' ks'
-- | Like 'lookAhead', but consume the input if @gma@ returns 'Just _'.
-- Fails if @gma@ fails.
lookAheadM :: Get (Maybe a) -> Get (Maybe a)
lookAheadM gma = do
s <- get
pre <- bytesRead
ma <- gma
M.when (isNothing ma) (put s pre)
return ma
-- | Like 'lookAhead', but consume the input if @gea@ returns 'Right _'.
-- Fails if @gea@ fails.
lookAheadE :: Get (Either a b) -> Get (Either a b)
lookAheadE gea = do
s <- get
pre <- bytesRead
ea <- gea
case ea of
Left _ -> put s pre
_ -> return ()
return ea
-- | Get the next up to @n@ bytes as a ByteString until end of this chunk,
-- without consuming them.
uncheckedLookAhead :: Int -> Get B.ByteString
uncheckedLookAhead n = do
s <- get
return (B.take n s)
------------------------------------------------------------------------
-- Utility
-- | Get the number of remaining unparsed bytes. Useful for checking whether
-- all input has been consumed.
--
-- WARNING: when run with @runGetPartial@, remaining will only return the number
-- of bytes that are remaining in the current input.
remaining :: Get Int
remaining = Get (\ s0 b0 m0 w0 _ ks -> ks s0 b0 m0 w0 (B.length s0 + moreLength m0))
-- | Test whether all input has been consumed.
--
-- WARNING: when run with @runGetPartial@, isEmpty will only tell you if you're
-- at the end of the current chunk.
isEmpty :: Get Bool
isEmpty = Get (\ s0 b0 m0 w0 _ ks -> ks s0 b0 m0 w0 (B.null s0 && moreLength m0 == 0))
------------------------------------------------------------------------
-- Utility with ByteStrings
-- | An efficient 'get' method for strict ByteStrings. Fails if fewer
-- than @n@ bytes are left in the input. This function creates a fresh
-- copy of the underlying bytes.
getByteString :: Int -> Get B.ByteString
getByteString n = do
bs <- getBytes n
return $! B.copy bs
getLazyByteString :: Int64 -> Get L.ByteString
getLazyByteString n = f `fmap` getByteString (fromIntegral n)
where f bs = L.fromChunks [bs]
getShortByteString :: Int -> Get BS.ShortByteString
getShortByteString n = do
bs <- getBytes n
return $! BS.toShort bs
------------------------------------------------------------------------
-- Helpers
-- | Pull @n@ bytes from the input, as a strict ByteString.
getBytes :: Int -> Get B.ByteString
getBytes n | n < 0 = fail "getBytes: negative length requested"
getBytes n = do
s <- ensure n
let consume = B.unsafeTake n s
rest = B.unsafeDrop n s
-- (consume,rest) = B.splitAt n s
cur <- bytesRead
put rest (cur + n)
return consume
{-# INLINE getBytes #-}
------------------------------------------------------------------------
-- Primtives
-- helper, get a raw Ptr onto a strict ByteString copied out of the
-- underlying strict byteString.
getPtr :: Storable a => Int -> Get a
getPtr n = do
(fp,o,_) <- B.toForeignPtr `fmap` getBytes n
let k p = peek (castPtr (p `plusPtr` o))
return (unsafeDupablePerformIO (withForeignPtr fp k))
{-# INLINE getPtr #-}
-----------------------------------------------------------------------
-- | Read a Int8 from the monad state
getInt8 :: Get Int8
getInt8 = do
s <- getBytes 1
return $! fromIntegral (B.unsafeHead s)
-- | Read a Int16 in big endian format
getInt16be :: Get Int16
getInt16be = do
s <- getBytes 2
return $! (fromIntegral (s `B.unsafeIndex` 0) `shiftL` 8) .|.
(fromIntegral (s `B.unsafeIndex` 1) )
-- | Read a Int16 in little endian format
getInt16le :: Get Int16
getInt16le = do
s <- getBytes 2
return $! (fromIntegral (s `B.unsafeIndex` 1) `shiftL` 8) .|.
(fromIntegral (s `B.unsafeIndex` 0) )
-- | Read a Int32 in big endian format
getInt32be :: Get Int32
getInt32be = do
s <- getBytes 4
return $! (fromIntegral (s `B.unsafeIndex` 0) `shiftL` 24) .|.
(fromIntegral (s `B.unsafeIndex` 1) `shiftL` 16) .|.
(fromIntegral (s `B.unsafeIndex` 2) `shiftL` 8) .|.
(fromIntegral (s `B.unsafeIndex` 3) )
-- | Read a Int32 in little endian format
getInt32le :: Get Int32
getInt32le = do
s <- getBytes 4
return $! (fromIntegral (s `B.unsafeIndex` 3) `shiftL` 24) .|.
(fromIntegral (s `B.unsafeIndex` 2) `shiftL` 16) .|.
(fromIntegral (s `B.unsafeIndex` 1) `shiftL` 8) .|.
(fromIntegral (s `B.unsafeIndex` 0) )
-- | Read a Int64 in big endian format
getInt64be :: Get Int64
getInt64be = do
s <- getBytes 8
return $! (fromIntegral (s `B.unsafeIndex` 0) `shiftL` 56) .|.
(fromIntegral (s `B.unsafeIndex` 1) `shiftL` 48) .|.
(fromIntegral (s `B.unsafeIndex` 2) `shiftL` 40) .|.
(fromIntegral (s `B.unsafeIndex` 3) `shiftL` 32) .|.
(fromIntegral (s `B.unsafeIndex` 4) `shiftL` 24) .|.
(fromIntegral (s `B.unsafeIndex` 5) `shiftL` 16) .|.
(fromIntegral (s `B.unsafeIndex` 6) `shiftL` 8) .|.
(fromIntegral (s `B.unsafeIndex` 7) )
-- | Read a Int64 in little endian format
getInt64le :: Get Int64
getInt64le = do
s <- getBytes 8
return $! (fromIntegral (s `B.unsafeIndex` 7) `shiftL` 56) .|.
(fromIntegral (s `B.unsafeIndex` 6) `shiftL` 48) .|.
(fromIntegral (s `B.unsafeIndex` 5) `shiftL` 40) .|.
(fromIntegral (s `B.unsafeIndex` 4) `shiftL` 32) .|.
(fromIntegral (s `B.unsafeIndex` 3) `shiftL` 24) .|.
(fromIntegral (s `B.unsafeIndex` 2) `shiftL` 16) .|.
(fromIntegral (s `B.unsafeIndex` 1) `shiftL` 8) .|.
(fromIntegral (s `B.unsafeIndex` 0) )
{-# INLINE getInt8 #-}
{-# INLINE getInt16be #-}
{-# INLINE getInt16le #-}
{-# INLINE getInt32be #-}
{-# INLINE getInt32le #-}
{-# INLINE getInt64be #-}
{-# INLINE getInt64le #-}
------------------------------------------------------------------------
-- | Read a Word8 from the monad state
getWord8 :: Get Word8
getWord8 = do
s <- getBytes 1
return (B.unsafeHead s)
-- | Read a Word16 in big endian format
getWord16be :: Get Word16
getWord16be = do
s <- getBytes 2
return $! (fromIntegral (s `B.unsafeIndex` 0) `shiftl_w16` 8) .|.
(fromIntegral (s `B.unsafeIndex` 1))
-- | Read a Word16 in little endian format
getWord16le :: Get Word16
getWord16le = do
s <- getBytes 2
return $! (fromIntegral (s `B.unsafeIndex` 1) `shiftl_w16` 8) .|.
(fromIntegral (s `B.unsafeIndex` 0) )
-- | Read a Word32 in big endian format
getWord32be :: Get Word32
getWord32be = do
s <- getBytes 4
return $! (fromIntegral (s `B.unsafeIndex` 0) `shiftl_w32` 24) .|.
(fromIntegral (s `B.unsafeIndex` 1) `shiftl_w32` 16) .|.
(fromIntegral (s `B.unsafeIndex` 2) `shiftl_w32` 8) .|.
(fromIntegral (s `B.unsafeIndex` 3) )
-- | Read a Word32 in little endian format
getWord32le :: Get Word32
getWord32le = do
s <- getBytes 4
return $! (fromIntegral (s `B.unsafeIndex` 3) `shiftl_w32` 24) .|.
(fromIntegral (s `B.unsafeIndex` 2) `shiftl_w32` 16) .|.
(fromIntegral (s `B.unsafeIndex` 1) `shiftl_w32` 8) .|.
(fromIntegral (s `B.unsafeIndex` 0) )
-- | Read a Word64 in big endian format
getWord64be :: Get Word64
getWord64be = do
s <- getBytes 8
return $! (fromIntegral (s `B.unsafeIndex` 0) `shiftl_w64` 56) .|.
(fromIntegral (s `B.unsafeIndex` 1) `shiftl_w64` 48) .|.
(fromIntegral (s `B.unsafeIndex` 2) `shiftl_w64` 40) .|.
(fromIntegral (s `B.unsafeIndex` 3) `shiftl_w64` 32) .|.
(fromIntegral (s `B.unsafeIndex` 4) `shiftl_w64` 24) .|.
(fromIntegral (s `B.unsafeIndex` 5) `shiftl_w64` 16) .|.
(fromIntegral (s `B.unsafeIndex` 6) `shiftl_w64` 8) .|.
(fromIntegral (s `B.unsafeIndex` 7) )
-- | Read a Word64 in little endian format
getWord64le :: Get Word64
getWord64le = do
s <- getBytes 8
return $! (fromIntegral (s `B.unsafeIndex` 7) `shiftl_w64` 56) .|.
(fromIntegral (s `B.unsafeIndex` 6) `shiftl_w64` 48) .|.
(fromIntegral (s `B.unsafeIndex` 5) `shiftl_w64` 40) .|.
(fromIntegral (s `B.unsafeIndex` 4) `shiftl_w64` 32) .|.
(fromIntegral (s `B.unsafeIndex` 3) `shiftl_w64` 24) .|.
(fromIntegral (s `B.unsafeIndex` 2) `shiftl_w64` 16) .|.
(fromIntegral (s `B.unsafeIndex` 1) `shiftl_w64` 8) .|.
(fromIntegral (s `B.unsafeIndex` 0) )
{-# INLINE getWord8 #-}
{-# INLINE getWord16be #-}
{-# INLINE getWord16le #-}
{-# INLINE getWord32be #-}
{-# INLINE getWord32le #-}
{-# INLINE getWord64be #-}
{-# INLINE getWord64le #-}
------------------------------------------------------------------------
-- Host-endian reads
-- | /O(1)./ Read a single native machine word. The word is read in
-- host order, host endian form, for the machine you're on. On a 64 bit
-- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes.
getWordhost :: Get Word
getWordhost = getPtr (sizeOf (undefined :: Word))
-- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness.
getWord16host :: Get Word16
getWord16host = getPtr (sizeOf (undefined :: Word16))
-- | /O(1)./ Read a Word32 in native host order and host endianness.
getWord32host :: Get Word32
getWord32host = getPtr (sizeOf (undefined :: Word32))
-- | /O(1)./ Read a Word64 in native host order and host endianness.
getWord64host :: Get Word64
getWord64host = getPtr (sizeOf (undefined :: Word64))
------------------------------------------------------------------------
-- Unchecked shifts
shiftl_w16 :: Word16 -> Int -> Word16
shiftl_w32 :: Word32 -> Int -> Word32
shiftl_w64 :: Word64 -> Int -> Word64
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
#if MIN_VERSION_base(4,16,0)
shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftLWord16#` i)
shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftLWord32#` i)
#else
shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#` i)
shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#` i)
#endif
#if WORD_SIZE_IN_BITS < 64
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
#if __GLASGOW_HASKELL__ <= 606
-- Exported by GHC.Word in GHC 6.8 and higher
foreign import ccall unsafe "stg_uncheckedShiftL64"
uncheckedShiftL64# :: Word64# -> Int# -> Word64#
#endif
#else
#if MIN_VERSION_base(4,17,0)
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
#else
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i)
#endif
#endif
#else
shiftl_w16 = shiftL
shiftl_w32 = shiftL
shiftl_w64 = shiftL
#endif
-- Containers ------------------------------------------------------------------
getTwoOf :: Get a -> Get b -> Get (a,b)
getTwoOf ma mb = M.liftM2 (,) ma mb
-- | Get a list in the following format:
-- Word64 (big endian format)
-- element 1
-- ...
-- element n
getListOf :: Get a -> Get [a]
getListOf m = go [] =<< getWord64be
where
go as 0 = return $! reverse as
go as i = do x <- m
x `seq` go (x:as) (i - 1)
-- | Get an IArray in the following format:
-- index (lower bound)
-- index (upper bound)
-- Word64 (big endian format)
-- element 1
-- ...
-- element n
getIArrayOf :: (Ix i, IArray a e) => Get i -> Get e -> Get (a i e)
getIArrayOf ix e = M.liftM2 listArray (getTwoOf ix ix) (getListOf e)
-- | Get a sequence in the following format:
-- Word64 (big endian format)
-- element 1
-- ...
-- element n
getSeqOf :: Get a -> Get (Seq.Seq a)
getSeqOf m = go Seq.empty =<< getWord64be
where
go xs 0 = return $! xs
go xs n = xs `seq` n `seq` do
x <- m
go (xs Seq.|> x) (n - 1)
-- | Read as a list of lists.
getTreeOf :: Get a -> Get (T.Tree a)
getTreeOf m = M.liftM2 T.Node m (getListOf (getTreeOf m))
-- | Read as a list of pairs of key and element.
getMapOf :: Ord k => Get k -> Get a -> Get (Map.Map k a)
getMapOf k m = Map.fromList `fmap` getListOf (getTwoOf k m)
-- | Read as a list of pairs of int and element.
getIntMapOf :: Get Int -> Get a -> Get (IntMap.IntMap a)
getIntMapOf i m = IntMap.fromList `fmap` getListOf (getTwoOf i m)
-- | Read as a list of elements.
getSetOf :: Ord a => Get a -> Get (Set.Set a)
getSetOf m = Set.fromList `fmap` getListOf m
-- | Read as a list of ints.
getIntSetOf :: Get Int -> Get IntSet.IntSet
getIntSetOf m = IntSet.fromList `fmap` getListOf m
-- | Read in a Maybe in the following format:
-- Word8 (0 for Nothing, anything else for Just)
-- element (when Just)
getMaybeOf :: Get a -> Get (Maybe a)
getMaybeOf m = do
tag <- getWord8
case tag of
0 -> return Nothing
_ -> Just `fmap` m
-- | Read an Either, in the following format:
-- Word8 (0 for Left, anything else for Right)
-- element a when 0, element b otherwise
getEitherOf :: Get a -> Get b -> Get (Either a b)
getEitherOf ma mb = do
tag <- getWord8
case tag of
0 -> Left `fmap` ma
_ -> Right `fmap` mb
-- | Read in a length and then read a nested structure
-- of that length.
getNested :: Get Int -> Get a -> Get a
getNested getLen getVal = do
n <- getLen
isolate n getVal
-- | Get the number of bytes read up to this point
bytesRead :: Get Int
bytesRead = Get (\i b m w _ k -> k i b m w w)

View file

@ -0,0 +1,84 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE ScopedTypeVariables #-}
#ifndef MIN_VERSION_base
#define MIN_VERSION_base(x,y,z) 1
#endif
-- | IEEE-754 parsing, as described in this stack-overflow article:
--
-- <http://stackoverflow.com/questions/6976684/converting-ieee-754-floating-point-in-haskell-word32-64-to-and-from-haskell-float/7002812#7002812>
module Data.Serialize.IEEE754 (
-- * IEEE-754 reads
getFloat32le
, getFloat32be
, getFloat64le
, getFloat64be
-- * IEEE-754 writes
, putFloat32le
, putFloat32be
, putFloat64le
, putFloat64be
) where
import Data.Word ( Word32, Word64 )
import Data.Serialize.Get
import Data.Serialize.Put
import qualified Data.ByteString.Builder as Builder
import System.IO.Unsafe (unsafeDupablePerformIO)
import Foreign.Marshal.Alloc (alloca)
import Foreign.Storable (peek, poke)
import Foreign.Ptr (castPtr, Ptr)
#if !(MIN_VERSION_base(4,8,0))
import Control.Applicative ( (<$>) )
#endif
-- | Read a Float in little endian IEEE-754 format
getFloat32le :: Get Float
getFloat32le = wordToFloat <$> getWord32le
-- | Read a Float in big endian IEEE-754 format
getFloat32be :: Get Float
getFloat32be = wordToFloat <$> getWord32be
-- | Read a Double in little endian IEEE-754 format
getFloat64le :: Get Double
getFloat64le = wordToDouble <$> getWord64le
-- | Read a Double in big endian IEEE-754 format
getFloat64be :: Get Double
getFloat64be = wordToDouble <$> getWord64be
-- | Write a Float in little endian IEEE-754 format
putFloat32le :: Float -> Put
putFloat32le = putBuilder . Builder.floatLE
-- | Write a Float in big endian IEEE-754 format
putFloat32be :: Float -> Put
putFloat32be = putBuilder . Builder.floatBE
-- | Write a Double in little endian IEEE-754 format
putFloat64le :: Double -> Put
putFloat64le = putBuilder . Builder.doubleLE
-- | Write a Double in big endian IEEE-754 format
putFloat64be :: Double -> Put
putFloat64be = putBuilder . Builder.doubleBE
{-# INLINE wordToFloat #-}
wordToFloat :: Word32 -> Float
wordToFloat w = unsafeDupablePerformIO $ alloca $ \(ptr :: Ptr Word32) -> do
poke ptr w
peek (castPtr ptr)
{-# INLINE wordToDouble #-}
wordToDouble :: Word64 -> Double
wordToDouble w = unsafeDupablePerformIO $ alloca $ \(ptr :: Ptr Word64) -> do
poke ptr w
peek (castPtr ptr)

View file

@ -0,0 +1,484 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleInstances #-}
#ifndef MIN_VERSION_base
#define MIN_VERSION_base(x,y,z) 0
#endif
#ifndef MIN_VERSION_bytestring
#define MIN_VERSION_bytestring(x,y,z) 0
#endif
-----------------------------------------------------------------------------
-- |
-- Module : Data.Serialize.Put
-- Copyright : Lennart Kolmodin, Galois Inc. 2009
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Trevor Elliott <trevor@galois.com>
-- Stability :
-- Portability :
--
-- The Put monad. A monad for efficiently constructing bytestrings.
--
-----------------------------------------------------------------------------
module Data.Serialize.Put (
-- * The Put type
Put
, PutM(..)
, Putter
, runPut
, runPutM
, runPutLazy
, runPutMLazy
, runPutMBuilder
, putBuilder
, execPut
-- * Flushing the implicit parse state
, flush
-- * Primitives
, putWord8
, putInt8
, putByteString
, putLazyByteString
, putShortByteString
-- * Big-endian primitives
, putWord16be
, putWord32be
, putWord64be
, putInt16be
, putInt32be
, putInt64be
-- * Little-endian primitives
, putWord16le
, putWord32le
, putWord64le
, putInt16le
, putInt32le
, putInt64le
-- * Host-endian, unaligned writes
, putWordhost
, putWord16host
, putWord32host
, putWord64host
, putInthost
, putInt16host
, putInt32host
, putInt64host
-- * Containers
, putTwoOf
, putListOf
, putIArrayOf
, putSeqOf
, putTreeOf
, putMapOf
, putIntMapOf
, putSetOf
, putIntSetOf
, putMaybeOf
, putEitherOf
, putNested
) where
import Data.ByteString.Builder (Builder, toLazyByteString)
import qualified Data.ByteString.Builder as B
import qualified Data.ByteString.Builder.Extra as B
import qualified Data.ByteString.Short as BS
import qualified Control.Applicative as A
import Data.Array.Unboxed
#if MIN_VERSION_base(4,9,0)
import qualified Data.Semigroup as M
#endif
import qualified Data.Monoid as M
import qualified Data.Foldable as F
import Data.Word
import Data.Int
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.IntMap as IntMap
import qualified Data.IntSet as IntSet
import qualified Data.Map as Map
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import qualified Data.Tree as T
#if !(MIN_VERSION_base(4,8,0))
import Control.Applicative
import Data.Foldable (foldMap)
import Data.Monoid
#endif
#if !(MIN_VERSION_bytestring(0,10,0))
import Foreign.ForeignPtr (withForeignPtr)
import Foreign.Marshal.Utils (copyBytes)
import Foreign.Ptr (plusPtr)
import qualified Data.ByteString.Internal as S
import qualified Data.ByteString.Lazy.Internal as L
#endif
------------------------------------------------------------------------
-- XXX Strict in builder only.
data PairS a = PairS a !Builder
sndS :: PairS a -> Builder
sndS (PairS _ b) = b
-- | The PutM type. A Writer monad over the efficient Builder monoid.
newtype PutM a = Put { unPut :: PairS a }
-- | Put merely lifts Builder into a Writer monad, applied to ().
type Put = PutM ()
type Putter a = a -> Put
instance Functor PutM where
fmap f m = Put $ let PairS a w = unPut m in PairS (f a) w
{-# INLINE fmap #-}
instance A.Applicative PutM where
pure a = Put (PairS a M.mempty)
{-# INLINE pure #-}
m <*> k = Put $
let PairS f w = unPut m
PairS x w' = unPut k
in PairS (f x) (w `M.mappend` w')
{-# INLINE (<*>) #-}
m *> k = Put $
let PairS _ w = unPut m
PairS b w' = unPut k
in PairS b (w `M.mappend` w')
{-# INLINE (*>) #-}
instance Monad PutM where
return = pure
{-# INLINE return #-}
m >>= k = Put $
let PairS a w = unPut m
PairS b w' = unPut (k a)
in PairS b (w `M.mappend` w')
{-# INLINE (>>=) #-}
(>>) = (*>)
{-# INLINE (>>) #-}
#if MIN_VERSION_base(4,9,0)
instance M.Semigroup (PutM ()) where
(<>) = (*>)
{-# INLINE (<>) #-}
#endif
instance Monoid (PutM ()) where
mempty = pure ()
{-# INLINE mempty #-}
#if !(MIN_VERSION_base(4,11,0))
mappend = (*>)
{-# INLINE mappend #-}
#endif
tell :: Putter Builder
tell b = Put $! PairS () b
{-# INLINE tell #-}
putBuilder :: Putter Builder
putBuilder = tell
{-# INLINE putBuilder #-}
-- | Run the 'Put' monad
execPut :: PutM a -> Builder
execPut = sndS . unPut
{-# INLINE execPut #-}
-- | Run the 'Put' monad with a serialiser
runPut :: Put -> S.ByteString
runPut = lazyToStrictByteString . runPutLazy
{-# INLINE runPut #-}
-- | Run the 'Put' monad with a serialiser and get its result
runPutM :: PutM a -> (a, S.ByteString)
runPutM (Put (PairS f s)) = (f, lazyToStrictByteString (toLazyByteString s))
{-# INLINE runPutM #-}
-- | Run the 'Put' monad with a serialiser
runPutLazy :: Put -> L.ByteString
runPutLazy = toLazyByteString . sndS . unPut
{-# INLINE runPutLazy #-}
-- | Run the 'Put' monad with a serialiser
runPutMLazy :: PutM a -> (a, L.ByteString)
runPutMLazy (Put (PairS f s)) = (f, toLazyByteString s)
{-# INLINE runPutMLazy #-}
-- | Run the 'Put' monad and get the result and underlying 'Builder'
runPutMBuilder :: PutM a -> (a, Builder)
runPutMBuilder (Put (PairS f s)) = (f, s)
{-# INLINE runPutMBuilder #-}
------------------------------------------------------------------------
-- | Pop the ByteString we have constructed so far, if any, yielding a
-- new chunk in the result ByteString.
flush :: Put
flush = tell B.flush
{-# INLINE flush #-}
-- | Efficiently write a byte into the output buffer
putWord8 :: Putter Word8
putWord8 = tell . B.word8
{-# INLINE putWord8 #-}
-- | Efficiently write an int into the output buffer
putInt8 :: Putter Int8
putInt8 = tell . B.int8
{-# INLINE putInt8 #-}
-- | An efficient primitive to write a strict ByteString into the output buffer.
-- It flushes the current buffer, and writes the argument into a new chunk.
putByteString :: Putter S.ByteString
putByteString = tell . B.byteString
{-# INLINE putByteString #-}
putShortByteString :: Putter BS.ShortByteString
putShortByteString = tell . B.shortByteString
-- | Write a lazy ByteString efficiently, simply appending the lazy
-- ByteString chunks to the output buffer
putLazyByteString :: Putter L.ByteString
putLazyByteString = tell . B.lazyByteString
{-# INLINE putLazyByteString #-}
-- | Write a Word16 in big endian format
putWord16be :: Putter Word16
putWord16be = tell . B.word16BE
{-# INLINE putWord16be #-}
-- | Write a Word16 in little endian format
putWord16le :: Putter Word16
putWord16le = tell . B.word16LE
{-# INLINE putWord16le #-}
-- | Write a Word32 in big endian format
putWord32be :: Putter Word32
putWord32be = tell . B.word32BE
{-# INLINE putWord32be #-}
-- | Write a Word32 in little endian format
putWord32le :: Putter Word32
putWord32le = tell . B.word32LE
{-# INLINE putWord32le #-}
-- | Write a Word64 in big endian format
putWord64be :: Putter Word64
putWord64be = tell . B.word64BE
{-# INLINE putWord64be #-}
-- | Write a Word64 in little endian format
putWord64le :: Putter Word64
putWord64le = tell . B.word64LE
{-# INLINE putWord64le #-}
------------------------------------------------------------------------
-- | /O(1)./ Write a single native machine word. The word is
-- written in host order, host endian form, for the machine you're on.
-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
-- 4 bytes. Values written this way are not portable to
-- different endian or word sized machines, without conversion.
--
putWordhost :: Putter Word
putWordhost = tell . B.wordHost
{-# INLINE putWordhost #-}
-- | /O(1)./ Write a Word16 in native host order and host endianness.
-- For portability issues see @putWordhost@.
putWord16host :: Putter Word16
putWord16host = tell . B.word16Host
{-# INLINE putWord16host #-}
-- | /O(1)./ Write a Word32 in native host order and host endianness.
-- For portability issues see @putWordhost@.
putWord32host :: Putter Word32
putWord32host = tell . B.word32Host
{-# INLINE putWord32host #-}
-- | /O(1)./ Write a Word64 in native host order
-- On a 32 bit machine we write two host order Word32s, in big endian form.
-- For portability issues see @putWordhost@.
putWord64host :: Putter Word64
putWord64host = tell . B.word64Host
{-# INLINE putWord64host #-}
-- | Write a Int16 in big endian format
putInt16be :: Putter Int16
putInt16be = tell . B.int16BE
{-# INLINE putInt16be #-}
-- | Write a Int16 in little endian format
putInt16le :: Putter Int16
putInt16le = tell . B.int16LE
{-# INLINE putInt16le #-}
-- | Write a Int32 in big endian format
putInt32be :: Putter Int32
putInt32be = tell . B.int32BE
{-# INLINE putInt32be #-}
-- | Write a Int32 in little endian format
putInt32le :: Putter Int32
putInt32le = tell . B.int32LE
{-# INLINE putInt32le #-}
-- | Write a Int64 in big endian format
putInt64be :: Putter Int64
putInt64be = tell . B.int64BE
{-# INLINE putInt64be #-}
-- | Write a Int64 in little endian format
putInt64le :: Putter Int64
putInt64le = tell . B.int64LE
{-# INLINE putInt64le #-}
------------------------------------------------------------------------
-- | /O(1)./ Write a single native machine int. The int is
-- written in host order, host endian form, for the machine you're on.
-- On a 64 bit machine the Int is an 8 byte value, on a 32 bit machine,
-- 4 bytes. Values written this way are not portable to
-- different endian or int sized machines, without conversion.
--
putInthost :: Putter Int
putInthost = tell . B.intHost
{-# INLINE putInthost #-}
-- | /O(1)./ Write a Int16 in native host order and host endianness.
-- For portability issues see @putInthost@.
putInt16host :: Putter Int16
putInt16host = tell . B.int16Host
{-# INLINE putInt16host #-}
-- | /O(1)./ Write a Int32 in native host order and host endianness.
-- For portability issues see @putInthost@.
putInt32host :: Putter Int32
putInt32host = tell . B.int32Host
{-# INLINE putInt32host #-}
-- | /O(1)./ Write a Int64 in native host order
-- On a 32 bit machine we write two host order Int32s, in big endian form.
-- For portability issues see @putInthost@.
putInt64host :: Putter Int64
putInt64host = tell . B.int64Host
{-# INLINE putInt64host #-}
-- Containers ------------------------------------------------------------------
encodeListOf :: (a -> Builder) -> [a] -> Builder
encodeListOf f = -- allow inlining with just a single argument
\xs -> execPut (putWord64be (fromIntegral $ length xs)) `M.mappend`
F.foldMap f xs
{-# INLINE encodeListOf #-}
putTwoOf :: Putter a -> Putter b -> Putter (a,b)
putTwoOf pa pb (a,b) = pa a >> pb b
{-# INLINE putTwoOf #-}
putListOf :: Putter a -> Putter [a]
putListOf pa = \l -> do
putWord64be (fromIntegral (length l))
mapM_ pa l
{-# INLINE putListOf #-}
putIArrayOf :: (Ix i, IArray a e) => Putter i -> Putter e -> Putter (a i e)
putIArrayOf pix pe a = do
putTwoOf pix pix (bounds a)
putListOf pe (elems a)
{-# INLINE putIArrayOf #-}
putSeqOf :: Putter a -> Putter (Seq.Seq a)
putSeqOf pa = \s -> do
putWord64be (fromIntegral $ Seq.length s)
F.mapM_ pa s
{-# INLINE putSeqOf #-}
putTreeOf :: Putter a -> Putter (T.Tree a)
putTreeOf pa =
tell . go
where
go (T.Node x cs) = execPut (pa x) `M.mappend` encodeListOf go cs
{-# INLINE putTreeOf #-}
putMapOf :: Putter k -> Putter a -> Putter (Map.Map k a)
putMapOf pk pa = putListOf (putTwoOf pk pa) . Map.toAscList
{-# INLINE putMapOf #-}
putIntMapOf :: Putter Int -> Putter a -> Putter (IntMap.IntMap a)
putIntMapOf pix pa = putListOf (putTwoOf pix pa) . IntMap.toAscList
{-# INLINE putIntMapOf #-}
putSetOf :: Putter a -> Putter (Set.Set a)
putSetOf pa = putListOf pa . Set.toAscList
{-# INLINE putSetOf #-}
putIntSetOf :: Putter Int -> Putter IntSet.IntSet
putIntSetOf pix = putListOf pix . IntSet.toAscList
{-# INLINE putIntSetOf #-}
putMaybeOf :: Putter a -> Putter (Maybe a)
putMaybeOf _ Nothing = putWord8 0
putMaybeOf pa (Just a) = putWord8 1 >> pa a
{-# INLINE putMaybeOf #-}
putEitherOf :: Putter a -> Putter b -> Putter (Either a b)
putEitherOf pa _ (Left a) = putWord8 0 >> pa a
putEitherOf _ pb (Right b) = putWord8 1 >> pb b
{-# INLINE putEitherOf #-}
-- | Put a nested structure by first putting a length
-- field and then putting the encoded value.
putNested :: Putter Int -> Put -> Put
putNested putLen putVal = do
let bs = runPut putVal
putLen (S.length bs)
putByteString bs
-------------------------------------------------------------------------------
-- pre-bytestring-0.10 compatibility
-------------------------------------------------------------------------------
{-# INLINE lazyToStrictByteString #-}
lazyToStrictByteString :: L.ByteString -> S.ByteString
#if MIN_VERSION_bytestring(0,10,0)
lazyToStrictByteString = L.toStrict
#else
lazyToStrictByteString = packChunks
-- packChunks is taken from the blaze-builder package.
-- | Pack the chunks of a lazy bytestring into a single strict bytestring.
packChunks :: L.ByteString -> S.ByteString
packChunks lbs = S.unsafeCreate (fromIntegral $ L.length lbs) (copyChunks lbs)
where
copyChunks !L.Empty !_pf = return ()
copyChunks !(L.Chunk (S.PS fpbuf o l) lbs') !pf = do
withForeignPtr fpbuf $ \pbuf ->
copyBytes pf (pbuf `plusPtr` o) l
copyChunks lbs' (pf `plusPtr` l)
#endif

View file

@ -0,0 +1,88 @@
{-# LANGUAGE OverloadedStrings, Safe #-}
module Data.Text.Lazy.Builder.Scientific
( scientificBuilder
, formatScientificBuilder
, FPFormat(..)
) where
import Data.Scientific (Scientific)
import qualified Data.Scientific as Scientific
import Data.Text.Lazy.Builder.RealFloat (FPFormat(..))
import Data.Text.Lazy.Builder (Builder, fromString, singleton, fromText)
import Data.Text.Lazy.Builder.Int (decimal)
import qualified Data.Text as T (replicate)
import Utils (roundTo, i2d)
import Data.Monoid ((<>))
-- | A @Text@ @Builder@ which renders a scientific number to full
-- precision, using standard decimal notation for arguments whose
-- absolute value lies between @0.1@ and @9,999,999@, and scientific
-- notation otherwise.
scientificBuilder :: Scientific -> Builder
scientificBuilder = formatScientificBuilder Generic Nothing
-- | Like 'scientificBuilder' but provides rendering options.
formatScientificBuilder :: FPFormat
-> Maybe Int -- ^ Number of decimal places to render.
-> Scientific
-> Builder
formatScientificBuilder fmt decs scntfc
| scntfc < 0 = singleton '-' <> doFmt fmt (Scientific.toDecimalDigits (-scntfc))
| otherwise = doFmt fmt (Scientific.toDecimalDigits scntfc)
where
doFmt format (is, e) =
let ds = map i2d is in
case format of
Generic ->
doFmt (if e < 0 || e > 7 then Exponent else Fixed)
(is,e)
Exponent ->
case decs of
Nothing ->
let show_e' = decimal (e-1) in
case ds of
"0" -> "0.0e0"
[d] -> singleton d <> ".0e" <> show_e'
(d:ds') -> singleton d <> singleton '.' <> fromString ds' <> singleton 'e' <> show_e'
[] -> error $ "Data.Text.Lazy.Builder.Scientific.formatScientificBuilder" ++
"/doFmt/Exponent: []"
Just dec ->
let dec' = max dec 1 in
case is of
[0] -> "0." <> fromText (T.replicate dec' "0") <> "e0"
_ ->
let (ei,is') = roundTo (dec'+1) is
in case map i2d (if ei > 0 then init is' else is') of
[] -> mempty
d:ds' -> singleton d <> singleton '.' <> fromString ds' <> singleton 'e' <> decimal (e-1+ei)
Fixed ->
let
mk0 ls = case ls of { "" -> "0" ; _ -> fromString ls}
in
case decs of
Nothing
| e <= 0 -> "0." <> fromText (T.replicate (-e) "0") <> fromString ds
| otherwise ->
let
f 0 s rs = mk0 (reverse s) <> singleton '.' <> mk0 rs
f n s "" = f (n-1) ('0':s) ""
f n s (r:rs) = f (n-1) (r:s) rs
in
f e "" ds
Just dec ->
let dec' = max dec 0 in
if e >= 0 then
let
(ei,is') = roundTo (dec' + e) is
(ls,rs) = splitAt (e+ei) (map i2d is')
in
mk0 ls <> (if null rs then "" else singleton '.' <> fromString rs)
else
let (ei,is') = roundTo dec' (replicate (-e) 0 ++ is)
in case map i2d (if ei > 0 then is' else 0:is') of
[] -> mempty
d:ds' -> singleton d <> (if null ds' then "" else singleton '.' <> fromString ds')