Initial commit

This commit is contained in:
La Ancapo 2026-01-25 02:27:22 +01:00
commit c101616e62
309 changed files with 53937 additions and 0 deletions

View file

@ -0,0 +1,215 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Trustworthy #-}
-- |
-- Module : Data.ByteString.Base32.Hex
-- Copyright : (c) 2019-2023 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : stable
-- Portability : non-portable
--
-- This module contains 'Data.ByteString.ByteString'-valued combinators for
-- implementing the RFC 4648 specification of the Base32hex
-- encoding format. This includes padded and unpadded decoding variants, as well as
-- internal and external validation for canonicity.
--
module Data.ByteString.Base32.Hex
( -- * Encoding
encodeBase32
, encodeBase32'
, encodeBase32Unpadded
, encodeBase32Unpadded'
-- * Decoding
, decodeBase32
, decodeBase32Unpadded
, decodeBase32Padded
-- , decodeBase32Lenient
-- * Validation
, isBase32Hex
, isValidBase32Hex
) where
import qualified Data.ByteString as BS
import Data.ByteString.Internal (ByteString(..))
import Data.ByteString.Base32.Internal
import Data.ByteString.Base32.Internal.Tables
import Data.Either (isRight)
import Data.Text (Text)
import qualified Data.Text.Encoding as T
import System.IO.Unsafe (unsafeDupablePerformIO)
-- | Encode a 'ByteString' value as a Base32hex 'Text' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32 "Sun"
-- "ADQMS==="
--
encodeBase32 :: ByteString -> Text
encodeBase32 = T.decodeUtf8 . encodeBase32'
{-# INLINE encodeBase32 #-}
-- | Encode a 'ByteString' value as a Base32hex 'ByteString' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32' "Sun"
-- "ADQMS==="
--
encodeBase32' :: ByteString -> ByteString
encodeBase32' = encodeBase32_ "0123456789ABCDEFGHIJKLMNOPQRSTUV"#
{-# INLINE encodeBase32' #-}
-- | Decode an arbitrarily padded Base32hex-encoded 'ByteString' value. If its length
-- is not a multiple of 8, then padding characters will be added to fill out the
-- input to a multiple of 8 for safe decoding, as Base32hex-encoded values are
-- optionally padded.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32 "ADQMS==="
-- Right "Sun"
--
-- >>> decodeBase32 "ADQMS"
-- Right "Sun"
--
-- >>> decodeBase32 "ADQM==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32 :: ByteString -> Either Text ByteString
decodeBase32 bs@(BS _ !l)
| l == 0 = Right bs
| r == 0 = unsafeDupablePerformIO $ decodeBase32_ hexDecodeTable bs
| r == 2 = unsafeDupablePerformIO $ decodeBase32_ hexDecodeTable (BS.append bs "======")
| r == 4 = validateLastNPads 2 bs $ decodeBase32_ hexDecodeTable (BS.append bs "====")
| r == 5 = validateLastNPads 3 bs $ decodeBase32_ hexDecodeTable (BS.append bs "===")
| r == 7 = validateLastNPads 5 bs $ decodeBase32_ hexDecodeTable (BS.append bs "=")
| otherwise = Left "Base32-encoded bytestring has invalid size"
where
!r = l `rem` 8
{-# INLINE decodeBase32 #-}
-- | Encode a 'ByteString' value as a Base32hex 'Text' value without padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "ADQMS"
--
encodeBase32Unpadded :: ByteString -> Text
encodeBase32Unpadded = T.decodeUtf8 . encodeBase32Unpadded'
{-# INLINE encodeBase32Unpadded #-}
-- | Encode a 'ByteString' value as a Base32hex 'ByteString' value without padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "ADQMS"
--
encodeBase32Unpadded' :: ByteString -> ByteString
encodeBase32Unpadded' = encodeBase32NoPad_ "0123456789ABCDEFGHIJKLMNOPQRSTUV"#
{-# INLINE encodeBase32Unpadded' #-}
-- | Decode an unpadded Base32hex-encoded 'ByteString' value.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32Unpadded "ADQMS"
-- Right "Sun"
--
-- >>> decodeBase32Unpadded "ADQMS==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32Unpadded :: ByteString -> Either Text ByteString
decodeBase32Unpadded bs@(BS _ !l)
| l == 0 = Right bs
| r == 0 = validateLastNPads 1 bs $ decodeBase32_ hexDecodeTable bs
| r == 2 = unsafeDupablePerformIO $ decodeBase32_ hexDecodeTable (BS.append bs "======")
| r == 4 = validateLastNPads 1 bs $ decodeBase32_ hexDecodeTable (BS.append bs "====")
| r == 5 = validateLastNPads 1 bs $ decodeBase32_ hexDecodeTable (BS.append bs "===")
| r == 7 = validateLastNPads 1 bs $ decodeBase32_ hexDecodeTable (BS.append bs "=")
| otherwise = Left "Base32-encoded bytestring has invalid size"
where
!r = l `rem` 8
{-# INLINE decodeBase32Unpadded #-}
-- | Decode a padded Base32hex-encoded 'ByteString' value.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32Padded "ADQMS==="
-- Right "Sun"
--
-- >>> decodeBase32Padded "ADQMS"
-- Left "Base32-encoded bytestring requires padding"
--
decodeBase32Padded :: ByteString -> Either Text ByteString
decodeBase32Padded bs@(BS _ !l)
| l == 0 = Right bs
| r == 1 = Left "Base32-encoded bytestring has invalid size"
| r == 3 = Left "Base32-encoded bytestring has invalid size"
| r == 6 = Left "Base32-encoded bytestring has invalid size"
| r /= 0 = Left "Base32-encoded bytestring requires padding"
| otherwise = unsafeDupablePerformIO $ decodeBase32_ hexDecodeTable bs
where
!r = l `rem` 8
{-# INLINE decodeBase32Padded #-}
-- | Tell whether a 'ByteString' value is encoded in padded or unpadded Base32hex format
--
-- === __Examples__:
--
-- >>> isBase32Hex "ADQMS"
-- True
--
-- >>> isBase32Hex "ADQMS==="
-- True
--
-- >>> isBase32Hex "ADQMS=="
-- False
--
isBase32Hex :: ByteString -> Bool
isBase32Hex bs = isValidBase32Hex bs && isRight (decodeBase32 bs)
{-# INLINE isBase32Hex #-}
-- | Tell whether a 'ByteString' value is a valid Base32hex format.
--
-- This will not tell you whether or not this is a correct Base32hex representation,
-- only that it conforms to the correct shape (including padding/size etc.).
-- To check whether it is a true Base32hex encoded 'ByteString' value, use 'isBase32'.
--
-- === __Examples__:
--
-- >>> isValidBase32Hex "ADQMS"
-- True
--
-- >>> isValidBase32Hex "ADQMS="
-- False
--
-- >>> isValidBase32Hex "ADQMS%"
-- False
--
isValidBase32Hex :: ByteString -> Bool
isValidBase32Hex = validateBase32 "0123456789ABCDEFGHIJKLMNOPQRSTUV"
{-# INLINE isValidBase32Hex #-}

View file

@ -0,0 +1,120 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
-- |
-- Module : Data.ByteString.Base32.Internal
-- Copyright : (c) 2020 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : Experimental
-- Portability : portable
--
-- Internal module defining the encoding and decoding
-- processes and tables.
--
module Data.ByteString.Base32.Internal
( encodeBase32_
, encodeBase32NoPad_
, decodeBase32_
, validateBase32
, validateLastNPads
) where
import qualified Data.ByteString as BS
import Data.ByteString.Internal
import Data.ByteString.Base32.Internal.Head
import Data.Text (Text)
import Foreign.ForeignPtr
import Foreign.Ptr
import Foreign.Storable
import GHC.Word
import System.IO.Unsafe
-- -------------------------------------------------------------------------- --
-- Validating Base32
-- | Validate a base32-encoded bytestring against some alphabet.
--
validateBase32 :: ByteString -> ByteString -> Bool
validateBase32 !alphabet bs@(BS _ l)
| l == 0 = True
| r == 0 = f bs
| r == 2 = f (BS.append bs "======")
| r == 4 = f (BS.append bs "====")
| r == 5 = f (BS.append bs "===")
| r == 7 = f (BS.append bs "=")
| otherwise = False
where
r = l `rem` 8
f (BS fp l') = accursedUnutterablePerformIO $ withForeignPtr fp $ \p ->
go p (plusPtr p l')
go !p !end
| p == end = return True
| otherwise = do
w <- peek p
let check a
| a == 0x3d, plusPtr p 1 == end = True
| a == 0x3d, plusPtr p 2 == end = True
| a == 0x3d, plusPtr p 3 == end = True
| a == 0x3d, plusPtr p 4 == end = True
| a == 0x3d, plusPtr p 5 == end = True
| a == 0x3d, plusPtr p 6 == end = True
| a == 0x3d = False
| otherwise = BS.elem a alphabet
if check w then go (plusPtr p 1) end else return False
{-# INLINE validateBase32 #-}
-- | This function checks that the last N-chars of a bytestring are '='
-- and, if true, fails with a message or completes some io action.
--
-- This is necessary to check when decoding permissively (i.e. filling in padding chars).
-- Consider the following 8 cases of a string of length l:
--
-- - @l = 0 mod 8@: No pad chars are added, since the input is assumed to be good.
-- - @l = 1 mod 8@: Never an admissible length in base32
-- - @l = 2 mod 8@: 6 padding chars are added. If padding chars are present in the string, they will fail as to decode as final quanta
-- - @l = 3 mod 8@: Never an admissible length in base32
-- - @l = 4 mod 8@: 4 padding chars are added. If 2 padding chars are present in the string this can be "completed" in the sense that
-- it now acts like a string `l == 2 mod 8` with 6 padding chars, and could potentially form corrupted data.
-- - @l = 5 mod 8@: 3 padding chars are added. If 3 padding chars are present in the string, this could form corrupted data like in the
-- previous case.
-- - @l = 6 mod 8@: Never an admissible length in base32
-- - @l = 7 mod 8@: 1 padding char is added. If 5 padding chars are present in the string, this could form corrupted data like the
-- previous cases.
--
-- Hence, permissive decodes should only fill in padding chars when it makes sense to add them. That is,
-- if an input is degenerate, it should never succeed when we add padding chars. We need the following invariant to hold:
--
-- @
-- B32.decodeUnpadded <|> B32.decodePadded ~ B32.decode
-- @
--
validateLastNPads
:: Int
-> ByteString
-> IO (Either Text ByteString)
-> Either Text ByteString
validateLastNPads !n (BS !fp !l) io
| not valid = Left "Base32-encoded bytestring has invalid padding"
| otherwise = unsafeDupablePerformIO io
where
valid = accursedUnutterablePerformIO $ withForeignPtr fp $ \p -> do
let end = plusPtr p l
let go :: Ptr Word8 -> IO Bool
go !q
| q == end = return True
| otherwise = do
a <- peek q
if a == 0x3d then return False else go (plusPtr q 1)
go (plusPtr p (l - n))
{-# INLINE validateLastNPads #-}

View file

@ -0,0 +1,76 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE TypeApplications #-}
module Data.ByteString.Base32.Internal.Head
( encodeBase32_
, encodeBase32NoPad_
, decodeBase32_
) where
import Data.ByteString.Internal
import Data.ByteString.Base32.Internal.Loop
import Data.ByteString.Base32.Internal.Tail
import Data.Text (Text)
import Foreign.Ptr
import Foreign.ForeignPtr
import GHC.Exts
import GHC.ForeignPtr
import GHC.Word
import System.IO.Unsafe
-- | Head of the padded base32 encoding loop.
--
-- This function takes an alphabet in the form of an unboxed 'Addr#',
-- allocates the correct number of bytes that will be written, and
-- executes the inner encoding loop against that data.
--
encodeBase32_ :: Addr# -> ByteString -> ByteString
encodeBase32_ !lut (BS !sfp !l) = unsafeDupablePerformIO $ do
dfp <- mallocPlainForeignPtrBytes dlen
withForeignPtr dfp $ \dptr ->
withForeignPtr sfp $ \sptr -> do
let !end = plusPtr sptr l
innerLoop lut
(castPtr dptr) sptr
end (loopTail lut dfp dptr end)
where
!dlen = ceiling (fromIntegral @_ @Double l / 5) * 8
-- | Head of the unpadded base32 encoding loop.
--
-- This function takes an alphabet in the form of an unboxed 'Addr#',
-- allocates the correct number of bytes that will be written, and
-- executes the inner encoding loop against that data.
--
encodeBase32NoPad_ :: Addr# -> ByteString -> ByteString
encodeBase32NoPad_ !lut (BS !sfp !l) = unsafeDupablePerformIO $ do
!dfp <- mallocPlainForeignPtrBytes dlen
withForeignPtr dfp $ \dptr ->
withForeignPtr sfp $ \sptr -> do
let !end = plusPtr sptr l
innerLoop lut
(castPtr dptr) sptr
end (loopTailNoPad lut dfp dptr end)
where
!dlen = ceiling (fromIntegral @_ @Double l / 5) * 8
-- | Head of the base32 decoding loop.
--
-- This function takes a base32-decoding lookup table and base32-encoded
-- bytestring, allocates the correct number of bytes that will be written,
-- and executes the inner decoding loop against that data.
--
decodeBase32_ :: Ptr Word8 -> ByteString -> IO (Either Text ByteString)
decodeBase32_ (Ptr !dtable) (BS !sfp !slen) =
withForeignPtr sfp $ \sptr -> do
dfp <- mallocPlainForeignPtrBytes dlen
withForeignPtr dfp $ \dptr -> do
let !end = plusPtr sptr slen
decodeLoop dtable dfp dptr sptr end
where
!dlen = ceiling (fromIntegral @_ @Double slen / 1.6)

View file

@ -0,0 +1,204 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE TypeApplications #-}
module Data.ByteString.Base32.Internal.Loop
( innerLoop
, decodeLoop
) where
import Data.Bits
import Data.ByteString.Internal (ByteString(..))
import Data.ByteString.Base32.Internal.Utils
import Data.Text (Text)
import qualified Data.Text as T
import Foreign.Ptr
import Foreign.ForeignPtr
import Foreign.Storable
import GHC.Exts
import GHC.Word
-- ------------------------------------------------------------------------ --
-- Encoding loops
innerLoop
:: Addr#
-> Ptr Word64
-> Ptr Word8
-> Ptr Word8
-> (Ptr Word8 -> Ptr Word8 -> IO ByteString)
-> IO ByteString
innerLoop !lut !dptr !sptr !end finish = go dptr sptr
where
lix a = w64 (aix (fromIntegral a .&. 0x1f) lut)
{-# INLINE lix #-}
go !dst !src
| plusPtr src 4 >= end = finish (castPtr dst) src
| otherwise = do
!t <- peekWord32BE (castPtr src)
!u <- w32 <$> peek (plusPtr src 4)
let !a = lix (unsafeShiftR t 27)
!b = lix (unsafeShiftR t 22)
!c = lix (unsafeShiftR t 17)
!d = lix (unsafeShiftR t 12)
!e = lix (unsafeShiftR t 7)
!f = lix (unsafeShiftR t 2)
!g = lix (unsafeShiftL t 3 .|. unsafeShiftR u 5)
!h = lix u
let !w = a
.|. unsafeShiftL b 8
.|. unsafeShiftL c 16
.|. unsafeShiftL d 24
.|. unsafeShiftL e 32
.|. unsafeShiftL f 40
.|. unsafeShiftL g 48
.|. unsafeShiftL h 56
poke dst w
go (plusPtr dst 8) (plusPtr src 5)
{-# INLINE innerLoop #-}
-- ------------------------------------------------------------------------ --
-- Decoding loops
decodeLoop
:: Addr#
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> IO (Either Text ByteString)
decodeLoop !lut !dfp !dptr !sptr !end = go dptr sptr
where
lix a = w64 (aix (fromIntegral a) lut)
err :: Ptr Word8 -> IO (Either Text ByteString)
err p = return . Left . T.pack
$ "invalid character at offset: "
++ show (p `minusPtr` sptr)
padErr :: Ptr Word8 -> IO (Either Text ByteString)
padErr p = return . Left . T.pack
$ "invalid padding at offset: "
++ show (p `minusPtr` sptr)
look :: Ptr Word8 -> IO Word64
look !p = lix <$> peek @Word8 p
go !dst !src
| plusPtr src 8 >= end = do
a <- look src
b <- look (plusPtr src 1)
c <- look (plusPtr src 2)
d <- look (plusPtr src 3)
e <- look (plusPtr src 4)
f <- look (plusPtr src 5)
g <- look (plusPtr src 6)
h <- look (plusPtr src 7)
finalChunk dst src a b c d e f g h
| otherwise = do
!t <- peekWord64BE (castPtr src)
let a = lix (unsafeShiftR t 56)
b = lix (unsafeShiftR t 48)
c = lix (unsafeShiftR t 40)
d = lix (unsafeShiftR t 32)
e = lix (unsafeShiftR t 24)
f = lix (unsafeShiftR t 16)
g = lix (unsafeShiftR t 8)
h = lix t
decodeChunk dst src a b c d e f g h
finalChunk !dst !src !a !b !c !d !e !f !g !h
| a == 0x63 = padErr src
| b == 0x63 = padErr (plusPtr src 1)
| a == 0xff = err src
| b == 0xff = err (plusPtr src 1)
| c == 0xff = err (plusPtr src 2)
| d == 0xff = err (plusPtr src 3)
| e == 0xff = err (plusPtr src 4)
| f == 0xff = err (plusPtr src 5)
| g == 0xff = err (plusPtr src 6)
| h == 0xff = err (plusPtr src 7)
| otherwise = do
let !o1 = (fromIntegral a `unsafeShiftL` 3) .|. (fromIntegral b `unsafeShiftR` 2)
!o2 = (fromIntegral b `unsafeShiftL` 6)
.|. (fromIntegral c `unsafeShiftL` 1)
.|. (fromIntegral d `unsafeShiftR` 4)
!o3 = (fromIntegral d `unsafeShiftL` 4) .|. (fromIntegral e `unsafeShiftR` 1)
!o4 = (fromIntegral e `unsafeShiftL` 7)
.|. (fromIntegral f `unsafeShiftL` 2)
.|. (fromIntegral g `unsafeShiftR` 3)
!o5 = (fromIntegral g `unsafeShiftL` 5) .|. fromIntegral h
poke @Word8 dst o1
poke @Word8 (plusPtr dst 1) o2
case (c,d,e,f,g,h) of
(0x63,0x63,0x63,0x63,0x63,0x63) ->
return (Right (BS dfp (1 + minusPtr dst dptr)))
(0x63,_,_,_,_,_) -> padErr (plusPtr src 3)
(_,0x63,0x63,0x63,0x63,0x63) -> padErr (plusPtr src 3)
(_,0x63,_,_,_,_) -> padErr (plusPtr src 4)
(_,_,0x63,0x63,0x63,0x63) -> do
poke @Word8 (plusPtr dst 2) o3
return (Right (BS dfp (2 + minusPtr dst dptr)))
(_,_,0x63,_,_,_) -> padErr (plusPtr src 5)
(_,_,_,0x63,0x63,0x63) -> do
poke @Word8 (plusPtr dst 2) o3
poke @Word8 (plusPtr dst 3) o4
return (Right (BS dfp (3 + minusPtr dst dptr)))
(_,_,_,0x63,_,_) -> padErr (plusPtr src 6)
(_,_,_,_,0x63,0x63) -> padErr (plusPtr src 6)
(_,_,_,_,0x63,_) -> padErr (plusPtr src 7)
(_,_,_,_,_,0x63) -> do
poke @Word8 (plusPtr dst 2) o3
poke @Word8 (plusPtr dst 3) o4
poke @Word8 (plusPtr dst 4) o5
return (Right (BS dfp (4 + minusPtr dst dptr)))
(_,_,_,_,_,_) -> do
poke @Word8 (plusPtr dst 2) o3
poke @Word8 (plusPtr dst 3) o4
poke @Word8 (plusPtr dst 4) o5
return (Right (BS dfp (5 + minusPtr dst dptr)))
decodeChunk !dst !src !a !b !c !d !e !f !g !h
| a == 0x63 = padErr src
| b == 0x63 = padErr (plusPtr src 1)
| c == 0x63 = padErr (plusPtr src 2)
| d == 0x63 = padErr (plusPtr src 3)
| e == 0x63 = padErr (plusPtr src 4)
| f == 0x63 = padErr (plusPtr src 5)
| g == 0x63 = padErr (plusPtr src 6)
| h == 0x63 = padErr (plusPtr src 7)
| a == 0xff = err src
| b == 0xff = err (plusPtr src 1)
| c == 0xff = err (plusPtr src 2)
| d == 0xff = err (plusPtr src 3)
| e == 0xff = err (plusPtr src 4)
| f == 0xff = err (plusPtr src 5)
| g == 0xff = err (plusPtr src 6)
| h == 0xff = err (plusPtr src 7)
| otherwise = do
let !w = (unsafeShiftL a 35
.|. unsafeShiftL b 30
.|. unsafeShiftL c 25
.|. unsafeShiftL d 20
.|. unsafeShiftL e 15
.|. unsafeShiftL f 10
.|. unsafeShiftL g 5
.|. h) :: Word64
poke @Word32 (castPtr dst) (byteSwap32 (fromIntegral (unsafeShiftR w 8)))
poke @Word8 (plusPtr dst 4) (fromIntegral w)
go (plusPtr dst 5) (plusPtr src 8)

View file

@ -0,0 +1,54 @@
{-# LANGUAGE TypeApplications #-}
module Data.ByteString.Base32.Internal.Tables
( stdDecodeTable
, hexDecodeTable
) where
import Data.ByteString.Base32.Internal.Utils (writeNPlainPtrBytes)
import GHC.Word (Word8)
import GHC.Ptr (Ptr)
stdDecodeTable :: Ptr Word8
stdDecodeTable = writeNPlainPtrBytes @Word8 256
[ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0xff,0xff,0xff,0xff,0xff,0x63,0xff,0xff
, 0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e
, 0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0xff,0xff,0xff,0xff,0xff
, 0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e
, 0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
]
{-# NOINLINE stdDecodeTable #-}
hexDecodeTable :: Ptr Word8
hexDecodeTable = writeNPlainPtrBytes @Word8 256
[ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0xff,0xff,0xff,0x63,0xff,0xff
, 0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18
, 0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18
, 0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
]
{-# NOINLINE hexDecodeTable #-}

View file

@ -0,0 +1,191 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
module Data.ByteString.Base32.Internal.Tail
( loopTail
, loopTailNoPad
) where
import Data.Bits
import Data.ByteString.Internal
import Data.ByteString.Base32.Internal.Utils
import Foreign.Ptr
import Foreign.ForeignPtr
import Foreign.Storable
import GHC.Exts
import GHC.Word
-- | Unroll final quantum encoding for base32
--
loopTail
:: Addr#
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> IO ByteString
loopTail !lut !dfp !dptr !end !dst !src
| src == end = return (BS dfp (minusPtr dst dptr))
| plusPtr src 1 == end = do -- 2 6
!a <- peek src
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2)
poke dst t
poke (plusPtr dst 1) u
padN (plusPtr dst 2) 6
return (BS dfp (8 + minusPtr dst dptr))
| plusPtr src 2 == end = do -- 4 4
!a <- peek src
!b <- peek (plusPtr src 1)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
padN (plusPtr dst 4) 4
return (BS dfp (8 + minusPtr dst dptr))
| plusPtr src 3 == end = do -- 5 3
!a <- peek src
!b <- peek (plusPtr src 1)
!c <- peek (plusPtr src 2)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4 .|. unsafeShiftR (c .&. 0xf0) 4)
!x = look (unsafeShiftL (c .&. 0x0f) 1)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
poke (plusPtr dst 4) x
padN (plusPtr dst 5) 3
return (BS dfp (8 + minusPtr dst dptr))
| otherwise = do -- 7 1
!a <- peek src
!b <- peek (plusPtr src 1)
!c <- peek (plusPtr src 2)
!d <- peek (plusPtr src 3)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4 .|. unsafeShiftR (c .&. 0xf0) 4)
!x = look (unsafeShiftL (c .&. 0x0f) 1 .|. unsafeShiftR (d .&. 0x80) 7)
!y = look (unsafeShiftR (d .&. 0x7c) 2)
!z = look (unsafeShiftL (d .&. 0x03) 3)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
poke (plusPtr dst 4) x
poke (plusPtr dst 5) y
poke (plusPtr dst 6) z
padN (plusPtr dst 7) 1
return (BS dfp (8 + minusPtr dst dptr))
where
look !n = aix n lut
padN :: Ptr Word8 -> Int -> IO ()
padN !_ 0 = return ()
padN !p n = poke p 0x3d >> padN (plusPtr p 1) (n - 1)
{-# INLINE loopTail #-}
-- | Unroll final quantum encoding for base32
--
loopTailNoPad
:: Addr#
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> IO ByteString
loopTailNoPad !lut !dfp !dptr !end !dst !src
| src == end = return (BS dfp (minusPtr dst dptr))
| plusPtr src 1 == end = do -- 2 6
!a <- peek src
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2)
poke dst t
poke (plusPtr dst 1) u
return (BS dfp (2 + minusPtr dst dptr))
| plusPtr src 2 == end = do -- 4 4
!a <- peek src
!b <- peek (plusPtr src 1)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
return (BS dfp (4 + minusPtr dst dptr))
| plusPtr src 3 == end = do -- 5 3
!a <- peek src
!b <- peek (plusPtr src 1)
!c <- peek (plusPtr src 2)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4 .|. unsafeShiftR (c .&. 0xf0) 4)
!x = look (unsafeShiftL (c .&. 0x0f) 1)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
poke (plusPtr dst 4) x
return (BS dfp (5 + minusPtr dst dptr))
| otherwise = do -- 7 1
!a <- peek src
!b <- peek (plusPtr src 1)
!c <- peek (plusPtr src 2)
!d <- peek (plusPtr src 3)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4 .|. unsafeShiftR (c .&. 0xf0) 4)
!x = look (unsafeShiftL (c .&. 0x0f) 1 .|. unsafeShiftR (d .&. 0x80) 7)
!y = look (unsafeShiftR (d .&. 0x7c) 2)
!z = look (unsafeShiftL (d .&. 0x03) 3)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
poke (plusPtr dst 4) x
poke (plusPtr dst 5) y
poke (plusPtr dst 6) z
return (BS dfp (7 + minusPtr dst dptr))
where
look !i = aix i lut
{-# INLINE loopTailNoPad #-}

View file

@ -0,0 +1,98 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
module Data.ByteString.Base32.Internal.Utils
( aix
, peekWord32BE
, peekWord64BE
, reChunkN
, w32
, w64
, w64_32
, writeNPlainPtrBytes
) where
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import Foreign.Ptr
import Foreign.Storable
import GHC.ByteOrder
import GHC.Exts
import GHC.Word
import System.IO.Unsafe
import Foreign.Marshal.Alloc (mallocBytes)
-- | Read 'Word8' index off alphabet addr
--
aix :: Word8 -> Addr# -> Word8
aix w8 alpha = W8# (indexWord8OffAddr# alpha i)
where
!(I# i) = fromIntegral w8
{-# INLINE aix #-}
w32 :: Word8 -> Word32
w32 = fromIntegral
{-# INLINE w32 #-}
w64_32 :: Word32 -> Word64
w64_32 = fromIntegral
{-# INLINE w64_32 #-}
w64 :: Word8 -> Word64
w64 = fromIntegral
{-# INLINE w64 #-}
-- | Allocate and fill @n@ bytes with some data
--
writeNPlainPtrBytes
:: Storable a
=> Int
-> [a]
-> Ptr a
writeNPlainPtrBytes !n as = unsafeDupablePerformIO $ do
p <- mallocBytes n
go p as
return p
where
go !_ [] = return ()
go !p (x:xs) = poke p x >> go (plusPtr p 1) xs
{-# INLINE writeNPlainPtrBytes #-}
peekWord32BE :: Ptr Word32 -> IO Word32
peekWord32BE p = case targetByteOrder of
LittleEndian -> byteSwap32 <$> peek p
BigEndian -> peek p
{-# inline peekWord32BE #-}
peekWord64BE :: Ptr Word64 -> IO Word64
peekWord64BE p = case targetByteOrder of
LittleEndian -> byteSwap64 <$> peek p
BigEndian -> peek p
{-# inline peekWord64BE #-}
-- | Rechunk a list of bytestrings in multiples of @n@
--
reChunkN :: Int -> [ByteString] -> [ByteString]
reChunkN n = go
where
go [] = []
go (b:bs) = case divMod (BS.length b) n of
(_, 0) -> b : go bs
(d, _) -> case BS.splitAt (d * n) b of
~(h, t) -> h : accum t bs
accum acc [] = [acc]
accum acc (c:cs) =
case BS.splitAt (n - BS.length acc) c of
~(h, t) ->
let acc' = BS.append acc h
in if BS.length acc' == n
then
let cs' = if BS.null t then cs else t : cs
in acc' : go cs'
else accum acc' cs
{-# INLINE reChunkN #-}