Initial commit

This commit is contained in:
La Ancapo 2026-01-25 02:27:22 +01:00
commit c101616e62
309 changed files with 53937 additions and 0 deletions

View file

@ -0,0 +1,225 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Trustworthy #-}
-- |
-- Module : Data.ByteString.Base32
-- Copyright : (c) 2019-2023 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : stable
-- Portability : non-portable
--
-- This module contains 'Data.ByteString.ByteString'-valued combinators for
-- implementing the RFC 4648 specification of the Base32
-- encoding format. This includes padded and unpadded decoding variants, as well as
-- internal and external validation for canonicity.
--
module Data.ByteString.Base32
( -- * Encoding
encodeBase32
, encodeBase32'
, encodeBase32Unpadded
, encodeBase32Unpadded'
-- * Decoding
, decodeBase32
, decodeBase32Unpadded
, decodeBase32Padded
-- , decodeBase32Lenient
-- * Validation
, isBase32
, isValidBase32
) where
import qualified Data.ByteString as BS
import Data.ByteString.Internal (ByteString(..))
import Data.ByteString.Base32.Internal
import Data.ByteString.Base32.Internal.Tables
import Data.Either (isRight)
import Data.Text (Text)
import qualified Data.Text.Encoding as T
import System.IO.Unsafe (unsafeDupablePerformIO)
-- | Encode a 'ByteString' value as a Base32 'Text' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32 "Sun"
-- "KN2W4==="
--
encodeBase32 :: ByteString -> Text
encodeBase32 = T.decodeUtf8 . encodeBase32'
{-# INLINE encodeBase32 #-}
-- | Encode a 'ByteString' value as a Base32 'ByteString' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32' "Sun"
-- "KN2W4==="
--
encodeBase32' :: ByteString -> ByteString
encodeBase32' = encodeBase32_ "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"#
{-# INLINE encodeBase32' #-}
-- | Decode an arbitrarily padded Base32-encoded 'ByteString' value. If its length
-- is not a multiple of 8, then padding characters will be added to fill out the
-- input to a multiple of 8 for safe decoding, as Base32-encoded values are
-- optionally padded.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32 "KN2W4==="
-- Right "Sun"
--
-- >>> decodeBase32 "KN2W4"
-- Right "Sun"
--
-- >>> decodeBase32 "KN2W==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32 :: ByteString -> Either Text ByteString
decodeBase32 bs@(BS _ !l)
| l == 0 = Right bs
| r == 0 = unsafeDupablePerformIO $ decodeBase32_ stdDecodeTable bs
| r == 2 = unsafeDupablePerformIO $ decodeBase32_ stdDecodeTable (BS.append bs "======")
| r == 4 = validateLastNPads 2 bs $ decodeBase32_ stdDecodeTable (BS.append bs "====")
| r == 5 = validateLastNPads 3 bs $ decodeBase32_ stdDecodeTable (BS.append bs "===")
| r == 7 = validateLastNPads 5 bs $ decodeBase32_ stdDecodeTable (BS.append bs "=")
| otherwise = Left "Base32-encoded bytestring has invalid size"
where
!r = l `rem` 8
{-# INLINE decodeBase32 #-}
-- | Encode a 'ByteString' value as a Base32 'Text' value without padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded "Sun"
-- "KN2W4"
--
encodeBase32Unpadded :: ByteString -> Text
encodeBase32Unpadded = T.decodeUtf8 . encodeBase32Unpadded'
{-# INLINE encodeBase32Unpadded #-}
-- | Encode a 'ByteString' value as a Base32 'ByteString' value without padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "KN2W4"
--
encodeBase32Unpadded' :: ByteString -> ByteString
encodeBase32Unpadded' = encodeBase32NoPad_ "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"#
{-# INLINE encodeBase32Unpadded' #-}
-- | Decode an unpadded Base32-encoded 'ByteString' value.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32Unpadded "KN2W4"
-- Right "Sun"
--
-- >>> decodeBase32Unpadded "KN2W4==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32Unpadded :: ByteString -> Either Text ByteString
decodeBase32Unpadded bs@(BS _ !l)
| l == 0 = Right bs
| r == 0 = validateLastNPads 1 bs $ decodeBase32_ stdDecodeTable bs
| r == 2 = unsafeDupablePerformIO $ decodeBase32_ stdDecodeTable (BS.append bs "======")
| r == 4 = validateLastNPads 1 bs $ decodeBase32_ stdDecodeTable (BS.append bs "====")
| r == 5 = validateLastNPads 1 bs $ decodeBase32_ stdDecodeTable (BS.append bs "===")
| r == 7 = validateLastNPads 1 bs $ decodeBase32_ stdDecodeTable (BS.append bs "=")
| otherwise = Left "Base32-encoded bytestring has invalid size"
where
!r = l `rem` 8
{-# INLINE decodeBase32Unpadded #-}
-- | Decode a padded Base32-encoded 'ByteString' value.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32Padded "KN2W4==="
-- Right "Sun"
--
-- >>> decodeBase32Padded "KN2W4"
-- Left "Base32-encoded bytestring requires padding"
--
decodeBase32Padded :: ByteString -> Either Text ByteString
decodeBase32Padded bs@(BS _ !l)
| l == 0 = Right bs
| r == 1 = Left "Base32-encoded bytestring has invalid size"
| r == 3 = Left "Base32-encoded bytestring has invalid size"
| r == 6 = Left "Base32-encoded bytestring has invalid size"
| r /= 0 = Left "Base32-encoded bytestring requires padding"
| otherwise = unsafeDupablePerformIO $ decodeBase32_ stdDecodeTable bs
where
!r = l `rem` 8
{-# INLINE decodeBase32Padded #-}
-- -- | Leniently decode an unpadded Base32-encoded 'ByteString' value. This function
-- -- will not generate parse errors. If input data contains padding chars,
-- -- then the input will be parsed up until the first pad character.
-- --
-- -- __Note:__ This is not RFC 4648-compliant.
-- --
-- decodeBase32Lenient :: ByteString -> ByteString
-- decodeBase32Lenient = decodeBase32Lenient_ decodeB32Table
-- {-# INLINE decodeBase32Lenient #-}
-- | Tell whether a 'ByteString' value is encoded in padded or unpadded Base32 format
--
-- === __Examples__:
--
-- >>> isBase32 "KN2W4"
-- True
--
-- >>> isBase32 "KN2W4==="
-- True
--
-- >>> isBase32 "KN2W4=="
-- False
--
isBase32 :: ByteString -> Bool
isBase32 bs = isValidBase32 bs && isRight (decodeBase32 bs)
{-# INLINE isBase32 #-}
-- | Tell whether a 'ByteString' value is a valid Base32 format.
--
-- This will not tell you whether or not this is a correct Base32 representation,
-- only that it conforms to the correct shape (including padding/size etc.).
-- To check whether it is a true Base32 encoded 'ByteString' value, use 'isBase32'.
--
-- === __Examples__:
--
-- >>> isValidBase32 "KN2W4"
-- True
--
-- >>> isValidBase32 "KN2W4="
-- False
--
-- >>> isValidBase32 "KN2W4%"
-- False
--
isValidBase32 :: ByteString -> Bool
isValidBase32 = validateBase32 "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
{-# INLINE isValidBase32 #-}

View file

@ -0,0 +1,215 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Trustworthy #-}
-- |
-- Module : Data.ByteString.Base32.Hex
-- Copyright : (c) 2019-2023 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : stable
-- Portability : non-portable
--
-- This module contains 'Data.ByteString.ByteString'-valued combinators for
-- implementing the RFC 4648 specification of the Base32hex
-- encoding format. This includes padded and unpadded decoding variants, as well as
-- internal and external validation for canonicity.
--
module Data.ByteString.Base32.Hex
( -- * Encoding
encodeBase32
, encodeBase32'
, encodeBase32Unpadded
, encodeBase32Unpadded'
-- * Decoding
, decodeBase32
, decodeBase32Unpadded
, decodeBase32Padded
-- , decodeBase32Lenient
-- * Validation
, isBase32Hex
, isValidBase32Hex
) where
import qualified Data.ByteString as BS
import Data.ByteString.Internal (ByteString(..))
import Data.ByteString.Base32.Internal
import Data.ByteString.Base32.Internal.Tables
import Data.Either (isRight)
import Data.Text (Text)
import qualified Data.Text.Encoding as T
import System.IO.Unsafe (unsafeDupablePerformIO)
-- | Encode a 'ByteString' value as a Base32hex 'Text' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32 "Sun"
-- "ADQMS==="
--
encodeBase32 :: ByteString -> Text
encodeBase32 = T.decodeUtf8 . encodeBase32'
{-# INLINE encodeBase32 #-}
-- | Encode a 'ByteString' value as a Base32hex 'ByteString' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32' "Sun"
-- "ADQMS==="
--
encodeBase32' :: ByteString -> ByteString
encodeBase32' = encodeBase32_ "0123456789ABCDEFGHIJKLMNOPQRSTUV"#
{-# INLINE encodeBase32' #-}
-- | Decode an arbitrarily padded Base32hex-encoded 'ByteString' value. If its length
-- is not a multiple of 8, then padding characters will be added to fill out the
-- input to a multiple of 8 for safe decoding, as Base32hex-encoded values are
-- optionally padded.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32 "ADQMS==="
-- Right "Sun"
--
-- >>> decodeBase32 "ADQMS"
-- Right "Sun"
--
-- >>> decodeBase32 "ADQM==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32 :: ByteString -> Either Text ByteString
decodeBase32 bs@(BS _ !l)
| l == 0 = Right bs
| r == 0 = unsafeDupablePerformIO $ decodeBase32_ hexDecodeTable bs
| r == 2 = unsafeDupablePerformIO $ decodeBase32_ hexDecodeTable (BS.append bs "======")
| r == 4 = validateLastNPads 2 bs $ decodeBase32_ hexDecodeTable (BS.append bs "====")
| r == 5 = validateLastNPads 3 bs $ decodeBase32_ hexDecodeTable (BS.append bs "===")
| r == 7 = validateLastNPads 5 bs $ decodeBase32_ hexDecodeTable (BS.append bs "=")
| otherwise = Left "Base32-encoded bytestring has invalid size"
where
!r = l `rem` 8
{-# INLINE decodeBase32 #-}
-- | Encode a 'ByteString' value as a Base32hex 'Text' value without padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "ADQMS"
--
encodeBase32Unpadded :: ByteString -> Text
encodeBase32Unpadded = T.decodeUtf8 . encodeBase32Unpadded'
{-# INLINE encodeBase32Unpadded #-}
-- | Encode a 'ByteString' value as a Base32hex 'ByteString' value without padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "ADQMS"
--
encodeBase32Unpadded' :: ByteString -> ByteString
encodeBase32Unpadded' = encodeBase32NoPad_ "0123456789ABCDEFGHIJKLMNOPQRSTUV"#
{-# INLINE encodeBase32Unpadded' #-}
-- | Decode an unpadded Base32hex-encoded 'ByteString' value.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32Unpadded "ADQMS"
-- Right "Sun"
--
-- >>> decodeBase32Unpadded "ADQMS==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32Unpadded :: ByteString -> Either Text ByteString
decodeBase32Unpadded bs@(BS _ !l)
| l == 0 = Right bs
| r == 0 = validateLastNPads 1 bs $ decodeBase32_ hexDecodeTable bs
| r == 2 = unsafeDupablePerformIO $ decodeBase32_ hexDecodeTable (BS.append bs "======")
| r == 4 = validateLastNPads 1 bs $ decodeBase32_ hexDecodeTable (BS.append bs "====")
| r == 5 = validateLastNPads 1 bs $ decodeBase32_ hexDecodeTable (BS.append bs "===")
| r == 7 = validateLastNPads 1 bs $ decodeBase32_ hexDecodeTable (BS.append bs "=")
| otherwise = Left "Base32-encoded bytestring has invalid size"
where
!r = l `rem` 8
{-# INLINE decodeBase32Unpadded #-}
-- | Decode a padded Base32hex-encoded 'ByteString' value.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32Padded "ADQMS==="
-- Right "Sun"
--
-- >>> decodeBase32Padded "ADQMS"
-- Left "Base32-encoded bytestring requires padding"
--
decodeBase32Padded :: ByteString -> Either Text ByteString
decodeBase32Padded bs@(BS _ !l)
| l == 0 = Right bs
| r == 1 = Left "Base32-encoded bytestring has invalid size"
| r == 3 = Left "Base32-encoded bytestring has invalid size"
| r == 6 = Left "Base32-encoded bytestring has invalid size"
| r /= 0 = Left "Base32-encoded bytestring requires padding"
| otherwise = unsafeDupablePerformIO $ decodeBase32_ hexDecodeTable bs
where
!r = l `rem` 8
{-# INLINE decodeBase32Padded #-}
-- | Tell whether a 'ByteString' value is encoded in padded or unpadded Base32hex format
--
-- === __Examples__:
--
-- >>> isBase32Hex "ADQMS"
-- True
--
-- >>> isBase32Hex "ADQMS==="
-- True
--
-- >>> isBase32Hex "ADQMS=="
-- False
--
isBase32Hex :: ByteString -> Bool
isBase32Hex bs = isValidBase32Hex bs && isRight (decodeBase32 bs)
{-# INLINE isBase32Hex #-}
-- | Tell whether a 'ByteString' value is a valid Base32hex format.
--
-- This will not tell you whether or not this is a correct Base32hex representation,
-- only that it conforms to the correct shape (including padding/size etc.).
-- To check whether it is a true Base32hex encoded 'ByteString' value, use 'isBase32'.
--
-- === __Examples__:
--
-- >>> isValidBase32Hex "ADQMS"
-- True
--
-- >>> isValidBase32Hex "ADQMS="
-- False
--
-- >>> isValidBase32Hex "ADQMS%"
-- False
--
isValidBase32Hex :: ByteString -> Bool
isValidBase32Hex = validateBase32 "0123456789ABCDEFGHIJKLMNOPQRSTUV"
{-# INLINE isValidBase32Hex #-}

View file

@ -0,0 +1,120 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
-- |
-- Module : Data.ByteString.Base32.Internal
-- Copyright : (c) 2020 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : Experimental
-- Portability : portable
--
-- Internal module defining the encoding and decoding
-- processes and tables.
--
module Data.ByteString.Base32.Internal
( encodeBase32_
, encodeBase32NoPad_
, decodeBase32_
, validateBase32
, validateLastNPads
) where
import qualified Data.ByteString as BS
import Data.ByteString.Internal
import Data.ByteString.Base32.Internal.Head
import Data.Text (Text)
import Foreign.ForeignPtr
import Foreign.Ptr
import Foreign.Storable
import GHC.Word
import System.IO.Unsafe
-- -------------------------------------------------------------------------- --
-- Validating Base32
-- | Validate a base32-encoded bytestring against some alphabet.
--
validateBase32 :: ByteString -> ByteString -> Bool
validateBase32 !alphabet bs@(BS _ l)
| l == 0 = True
| r == 0 = f bs
| r == 2 = f (BS.append bs "======")
| r == 4 = f (BS.append bs "====")
| r == 5 = f (BS.append bs "===")
| r == 7 = f (BS.append bs "=")
| otherwise = False
where
r = l `rem` 8
f (BS fp l') = accursedUnutterablePerformIO $ withForeignPtr fp $ \p ->
go p (plusPtr p l')
go !p !end
| p == end = return True
| otherwise = do
w <- peek p
let check a
| a == 0x3d, plusPtr p 1 == end = True
| a == 0x3d, plusPtr p 2 == end = True
| a == 0x3d, plusPtr p 3 == end = True
| a == 0x3d, plusPtr p 4 == end = True
| a == 0x3d, plusPtr p 5 == end = True
| a == 0x3d, plusPtr p 6 == end = True
| a == 0x3d = False
| otherwise = BS.elem a alphabet
if check w then go (plusPtr p 1) end else return False
{-# INLINE validateBase32 #-}
-- | This function checks that the last N-chars of a bytestring are '='
-- and, if true, fails with a message or completes some io action.
--
-- This is necessary to check when decoding permissively (i.e. filling in padding chars).
-- Consider the following 8 cases of a string of length l:
--
-- - @l = 0 mod 8@: No pad chars are added, since the input is assumed to be good.
-- - @l = 1 mod 8@: Never an admissible length in base32
-- - @l = 2 mod 8@: 6 padding chars are added. If padding chars are present in the string, they will fail as to decode as final quanta
-- - @l = 3 mod 8@: Never an admissible length in base32
-- - @l = 4 mod 8@: 4 padding chars are added. If 2 padding chars are present in the string this can be "completed" in the sense that
-- it now acts like a string `l == 2 mod 8` with 6 padding chars, and could potentially form corrupted data.
-- - @l = 5 mod 8@: 3 padding chars are added. If 3 padding chars are present in the string, this could form corrupted data like in the
-- previous case.
-- - @l = 6 mod 8@: Never an admissible length in base32
-- - @l = 7 mod 8@: 1 padding char is added. If 5 padding chars are present in the string, this could form corrupted data like the
-- previous cases.
--
-- Hence, permissive decodes should only fill in padding chars when it makes sense to add them. That is,
-- if an input is degenerate, it should never succeed when we add padding chars. We need the following invariant to hold:
--
-- @
-- B32.decodeUnpadded <|> B32.decodePadded ~ B32.decode
-- @
--
validateLastNPads
:: Int
-> ByteString
-> IO (Either Text ByteString)
-> Either Text ByteString
validateLastNPads !n (BS !fp !l) io
| not valid = Left "Base32-encoded bytestring has invalid padding"
| otherwise = unsafeDupablePerformIO io
where
valid = accursedUnutterablePerformIO $ withForeignPtr fp $ \p -> do
let end = plusPtr p l
let go :: Ptr Word8 -> IO Bool
go !q
| q == end = return True
| otherwise = do
a <- peek q
if a == 0x3d then return False else go (plusPtr q 1)
go (plusPtr p (l - n))
{-# INLINE validateLastNPads #-}

View file

@ -0,0 +1,76 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE TypeApplications #-}
module Data.ByteString.Base32.Internal.Head
( encodeBase32_
, encodeBase32NoPad_
, decodeBase32_
) where
import Data.ByteString.Internal
import Data.ByteString.Base32.Internal.Loop
import Data.ByteString.Base32.Internal.Tail
import Data.Text (Text)
import Foreign.Ptr
import Foreign.ForeignPtr
import GHC.Exts
import GHC.ForeignPtr
import GHC.Word
import System.IO.Unsafe
-- | Head of the padded base32 encoding loop.
--
-- This function takes an alphabet in the form of an unboxed 'Addr#',
-- allocates the correct number of bytes that will be written, and
-- executes the inner encoding loop against that data.
--
encodeBase32_ :: Addr# -> ByteString -> ByteString
encodeBase32_ !lut (BS !sfp !l) = unsafeDupablePerformIO $ do
dfp <- mallocPlainForeignPtrBytes dlen
withForeignPtr dfp $ \dptr ->
withForeignPtr sfp $ \sptr -> do
let !end = plusPtr sptr l
innerLoop lut
(castPtr dptr) sptr
end (loopTail lut dfp dptr end)
where
!dlen = ceiling (fromIntegral @_ @Double l / 5) * 8
-- | Head of the unpadded base32 encoding loop.
--
-- This function takes an alphabet in the form of an unboxed 'Addr#',
-- allocates the correct number of bytes that will be written, and
-- executes the inner encoding loop against that data.
--
encodeBase32NoPad_ :: Addr# -> ByteString -> ByteString
encodeBase32NoPad_ !lut (BS !sfp !l) = unsafeDupablePerformIO $ do
!dfp <- mallocPlainForeignPtrBytes dlen
withForeignPtr dfp $ \dptr ->
withForeignPtr sfp $ \sptr -> do
let !end = plusPtr sptr l
innerLoop lut
(castPtr dptr) sptr
end (loopTailNoPad lut dfp dptr end)
where
!dlen = ceiling (fromIntegral @_ @Double l / 5) * 8
-- | Head of the base32 decoding loop.
--
-- This function takes a base32-decoding lookup table and base32-encoded
-- bytestring, allocates the correct number of bytes that will be written,
-- and executes the inner decoding loop against that data.
--
decodeBase32_ :: Ptr Word8 -> ByteString -> IO (Either Text ByteString)
decodeBase32_ (Ptr !dtable) (BS !sfp !slen) =
withForeignPtr sfp $ \sptr -> do
dfp <- mallocPlainForeignPtrBytes dlen
withForeignPtr dfp $ \dptr -> do
let !end = plusPtr sptr slen
decodeLoop dtable dfp dptr sptr end
where
!dlen = ceiling (fromIntegral @_ @Double slen / 1.6)

View file

@ -0,0 +1,204 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE TypeApplications #-}
module Data.ByteString.Base32.Internal.Loop
( innerLoop
, decodeLoop
) where
import Data.Bits
import Data.ByteString.Internal (ByteString(..))
import Data.ByteString.Base32.Internal.Utils
import Data.Text (Text)
import qualified Data.Text as T
import Foreign.Ptr
import Foreign.ForeignPtr
import Foreign.Storable
import GHC.Exts
import GHC.Word
-- ------------------------------------------------------------------------ --
-- Encoding loops
innerLoop
:: Addr#
-> Ptr Word64
-> Ptr Word8
-> Ptr Word8
-> (Ptr Word8 -> Ptr Word8 -> IO ByteString)
-> IO ByteString
innerLoop !lut !dptr !sptr !end finish = go dptr sptr
where
lix a = w64 (aix (fromIntegral a .&. 0x1f) lut)
{-# INLINE lix #-}
go !dst !src
| plusPtr src 4 >= end = finish (castPtr dst) src
| otherwise = do
!t <- peekWord32BE (castPtr src)
!u <- w32 <$> peek (plusPtr src 4)
let !a = lix (unsafeShiftR t 27)
!b = lix (unsafeShiftR t 22)
!c = lix (unsafeShiftR t 17)
!d = lix (unsafeShiftR t 12)
!e = lix (unsafeShiftR t 7)
!f = lix (unsafeShiftR t 2)
!g = lix (unsafeShiftL t 3 .|. unsafeShiftR u 5)
!h = lix u
let !w = a
.|. unsafeShiftL b 8
.|. unsafeShiftL c 16
.|. unsafeShiftL d 24
.|. unsafeShiftL e 32
.|. unsafeShiftL f 40
.|. unsafeShiftL g 48
.|. unsafeShiftL h 56
poke dst w
go (plusPtr dst 8) (plusPtr src 5)
{-# INLINE innerLoop #-}
-- ------------------------------------------------------------------------ --
-- Decoding loops
decodeLoop
:: Addr#
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> IO (Either Text ByteString)
decodeLoop !lut !dfp !dptr !sptr !end = go dptr sptr
where
lix a = w64 (aix (fromIntegral a) lut)
err :: Ptr Word8 -> IO (Either Text ByteString)
err p = return . Left . T.pack
$ "invalid character at offset: "
++ show (p `minusPtr` sptr)
padErr :: Ptr Word8 -> IO (Either Text ByteString)
padErr p = return . Left . T.pack
$ "invalid padding at offset: "
++ show (p `minusPtr` sptr)
look :: Ptr Word8 -> IO Word64
look !p = lix <$> peek @Word8 p
go !dst !src
| plusPtr src 8 >= end = do
a <- look src
b <- look (plusPtr src 1)
c <- look (plusPtr src 2)
d <- look (plusPtr src 3)
e <- look (plusPtr src 4)
f <- look (plusPtr src 5)
g <- look (plusPtr src 6)
h <- look (plusPtr src 7)
finalChunk dst src a b c d e f g h
| otherwise = do
!t <- peekWord64BE (castPtr src)
let a = lix (unsafeShiftR t 56)
b = lix (unsafeShiftR t 48)
c = lix (unsafeShiftR t 40)
d = lix (unsafeShiftR t 32)
e = lix (unsafeShiftR t 24)
f = lix (unsafeShiftR t 16)
g = lix (unsafeShiftR t 8)
h = lix t
decodeChunk dst src a b c d e f g h
finalChunk !dst !src !a !b !c !d !e !f !g !h
| a == 0x63 = padErr src
| b == 0x63 = padErr (plusPtr src 1)
| a == 0xff = err src
| b == 0xff = err (plusPtr src 1)
| c == 0xff = err (plusPtr src 2)
| d == 0xff = err (plusPtr src 3)
| e == 0xff = err (plusPtr src 4)
| f == 0xff = err (plusPtr src 5)
| g == 0xff = err (plusPtr src 6)
| h == 0xff = err (plusPtr src 7)
| otherwise = do
let !o1 = (fromIntegral a `unsafeShiftL` 3) .|. (fromIntegral b `unsafeShiftR` 2)
!o2 = (fromIntegral b `unsafeShiftL` 6)
.|. (fromIntegral c `unsafeShiftL` 1)
.|. (fromIntegral d `unsafeShiftR` 4)
!o3 = (fromIntegral d `unsafeShiftL` 4) .|. (fromIntegral e `unsafeShiftR` 1)
!o4 = (fromIntegral e `unsafeShiftL` 7)
.|. (fromIntegral f `unsafeShiftL` 2)
.|. (fromIntegral g `unsafeShiftR` 3)
!o5 = (fromIntegral g `unsafeShiftL` 5) .|. fromIntegral h
poke @Word8 dst o1
poke @Word8 (plusPtr dst 1) o2
case (c,d,e,f,g,h) of
(0x63,0x63,0x63,0x63,0x63,0x63) ->
return (Right (BS dfp (1 + minusPtr dst dptr)))
(0x63,_,_,_,_,_) -> padErr (plusPtr src 3)
(_,0x63,0x63,0x63,0x63,0x63) -> padErr (plusPtr src 3)
(_,0x63,_,_,_,_) -> padErr (plusPtr src 4)
(_,_,0x63,0x63,0x63,0x63) -> do
poke @Word8 (plusPtr dst 2) o3
return (Right (BS dfp (2 + minusPtr dst dptr)))
(_,_,0x63,_,_,_) -> padErr (plusPtr src 5)
(_,_,_,0x63,0x63,0x63) -> do
poke @Word8 (plusPtr dst 2) o3
poke @Word8 (plusPtr dst 3) o4
return (Right (BS dfp (3 + minusPtr dst dptr)))
(_,_,_,0x63,_,_) -> padErr (plusPtr src 6)
(_,_,_,_,0x63,0x63) -> padErr (plusPtr src 6)
(_,_,_,_,0x63,_) -> padErr (plusPtr src 7)
(_,_,_,_,_,0x63) -> do
poke @Word8 (plusPtr dst 2) o3
poke @Word8 (plusPtr dst 3) o4
poke @Word8 (plusPtr dst 4) o5
return (Right (BS dfp (4 + minusPtr dst dptr)))
(_,_,_,_,_,_) -> do
poke @Word8 (plusPtr dst 2) o3
poke @Word8 (plusPtr dst 3) o4
poke @Word8 (plusPtr dst 4) o5
return (Right (BS dfp (5 + minusPtr dst dptr)))
decodeChunk !dst !src !a !b !c !d !e !f !g !h
| a == 0x63 = padErr src
| b == 0x63 = padErr (plusPtr src 1)
| c == 0x63 = padErr (plusPtr src 2)
| d == 0x63 = padErr (plusPtr src 3)
| e == 0x63 = padErr (plusPtr src 4)
| f == 0x63 = padErr (plusPtr src 5)
| g == 0x63 = padErr (plusPtr src 6)
| h == 0x63 = padErr (plusPtr src 7)
| a == 0xff = err src
| b == 0xff = err (plusPtr src 1)
| c == 0xff = err (plusPtr src 2)
| d == 0xff = err (plusPtr src 3)
| e == 0xff = err (plusPtr src 4)
| f == 0xff = err (plusPtr src 5)
| g == 0xff = err (plusPtr src 6)
| h == 0xff = err (plusPtr src 7)
| otherwise = do
let !w = (unsafeShiftL a 35
.|. unsafeShiftL b 30
.|. unsafeShiftL c 25
.|. unsafeShiftL d 20
.|. unsafeShiftL e 15
.|. unsafeShiftL f 10
.|. unsafeShiftL g 5
.|. h) :: Word64
poke @Word32 (castPtr dst) (byteSwap32 (fromIntegral (unsafeShiftR w 8)))
poke @Word8 (plusPtr dst 4) (fromIntegral w)
go (plusPtr dst 5) (plusPtr src 8)

View file

@ -0,0 +1,54 @@
{-# LANGUAGE TypeApplications #-}
module Data.ByteString.Base32.Internal.Tables
( stdDecodeTable
, hexDecodeTable
) where
import Data.ByteString.Base32.Internal.Utils (writeNPlainPtrBytes)
import GHC.Word (Word8)
import GHC.Ptr (Ptr)
stdDecodeTable :: Ptr Word8
stdDecodeTable = writeNPlainPtrBytes @Word8 256
[ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0xff,0xff,0xff,0xff,0xff,0x63,0xff,0xff
, 0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e
, 0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0xff,0xff,0xff,0xff,0xff
, 0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e
, 0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
]
{-# NOINLINE stdDecodeTable #-}
hexDecodeTable :: Ptr Word8
hexDecodeTable = writeNPlainPtrBytes @Word8 256
[ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0xff,0xff,0xff,0x63,0xff,0xff
, 0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18
, 0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18
, 0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
, 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
]
{-# NOINLINE hexDecodeTable #-}

View file

@ -0,0 +1,191 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
module Data.ByteString.Base32.Internal.Tail
( loopTail
, loopTailNoPad
) where
import Data.Bits
import Data.ByteString.Internal
import Data.ByteString.Base32.Internal.Utils
import Foreign.Ptr
import Foreign.ForeignPtr
import Foreign.Storable
import GHC.Exts
import GHC.Word
-- | Unroll final quantum encoding for base32
--
loopTail
:: Addr#
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> IO ByteString
loopTail !lut !dfp !dptr !end !dst !src
| src == end = return (BS dfp (minusPtr dst dptr))
| plusPtr src 1 == end = do -- 2 6
!a <- peek src
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2)
poke dst t
poke (plusPtr dst 1) u
padN (plusPtr dst 2) 6
return (BS dfp (8 + minusPtr dst dptr))
| plusPtr src 2 == end = do -- 4 4
!a <- peek src
!b <- peek (plusPtr src 1)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
padN (plusPtr dst 4) 4
return (BS dfp (8 + minusPtr dst dptr))
| plusPtr src 3 == end = do -- 5 3
!a <- peek src
!b <- peek (plusPtr src 1)
!c <- peek (plusPtr src 2)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4 .|. unsafeShiftR (c .&. 0xf0) 4)
!x = look (unsafeShiftL (c .&. 0x0f) 1)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
poke (plusPtr dst 4) x
padN (plusPtr dst 5) 3
return (BS dfp (8 + minusPtr dst dptr))
| otherwise = do -- 7 1
!a <- peek src
!b <- peek (plusPtr src 1)
!c <- peek (plusPtr src 2)
!d <- peek (plusPtr src 3)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4 .|. unsafeShiftR (c .&. 0xf0) 4)
!x = look (unsafeShiftL (c .&. 0x0f) 1 .|. unsafeShiftR (d .&. 0x80) 7)
!y = look (unsafeShiftR (d .&. 0x7c) 2)
!z = look (unsafeShiftL (d .&. 0x03) 3)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
poke (plusPtr dst 4) x
poke (plusPtr dst 5) y
poke (plusPtr dst 6) z
padN (plusPtr dst 7) 1
return (BS dfp (8 + minusPtr dst dptr))
where
look !n = aix n lut
padN :: Ptr Word8 -> Int -> IO ()
padN !_ 0 = return ()
padN !p n = poke p 0x3d >> padN (plusPtr p 1) (n - 1)
{-# INLINE loopTail #-}
-- | Unroll final quantum encoding for base32
--
loopTailNoPad
:: Addr#
-> ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> IO ByteString
loopTailNoPad !lut !dfp !dptr !end !dst !src
| src == end = return (BS dfp (minusPtr dst dptr))
| plusPtr src 1 == end = do -- 2 6
!a <- peek src
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2)
poke dst t
poke (plusPtr dst 1) u
return (BS dfp (2 + minusPtr dst dptr))
| plusPtr src 2 == end = do -- 4 4
!a <- peek src
!b <- peek (plusPtr src 1)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
return (BS dfp (4 + minusPtr dst dptr))
| plusPtr src 3 == end = do -- 5 3
!a <- peek src
!b <- peek (plusPtr src 1)
!c <- peek (plusPtr src 2)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4 .|. unsafeShiftR (c .&. 0xf0) 4)
!x = look (unsafeShiftL (c .&. 0x0f) 1)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
poke (plusPtr dst 4) x
return (BS dfp (5 + minusPtr dst dptr))
| otherwise = do -- 7 1
!a <- peek src
!b <- peek (plusPtr src 1)
!c <- peek (plusPtr src 2)
!d <- peek (plusPtr src 3)
let !t = look (unsafeShiftR (a .&. 0xf8) 3)
!u = look (unsafeShiftL (a .&. 0x07) 2 .|. unsafeShiftR (b .&. 0xc0) 6)
!v = look (unsafeShiftR (b .&. 0x3e) 1)
!w = look (unsafeShiftL (b .&. 0x01) 4 .|. unsafeShiftR (c .&. 0xf0) 4)
!x = look (unsafeShiftL (c .&. 0x0f) 1 .|. unsafeShiftR (d .&. 0x80) 7)
!y = look (unsafeShiftR (d .&. 0x7c) 2)
!z = look (unsafeShiftL (d .&. 0x03) 3)
poke dst t
poke (plusPtr dst 1) u
poke (plusPtr dst 2) v
poke (plusPtr dst 3) w
poke (plusPtr dst 4) x
poke (plusPtr dst 5) y
poke (plusPtr dst 6) z
return (BS dfp (7 + minusPtr dst dptr))
where
look !i = aix i lut
{-# INLINE loopTailNoPad #-}

View file

@ -0,0 +1,98 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
module Data.ByteString.Base32.Internal.Utils
( aix
, peekWord32BE
, peekWord64BE
, reChunkN
, w32
, w64
, w64_32
, writeNPlainPtrBytes
) where
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import Foreign.Ptr
import Foreign.Storable
import GHC.ByteOrder
import GHC.Exts
import GHC.Word
import System.IO.Unsafe
import Foreign.Marshal.Alloc (mallocBytes)
-- | Read 'Word8' index off alphabet addr
--
aix :: Word8 -> Addr# -> Word8
aix w8 alpha = W8# (indexWord8OffAddr# alpha i)
where
!(I# i) = fromIntegral w8
{-# INLINE aix #-}
w32 :: Word8 -> Word32
w32 = fromIntegral
{-# INLINE w32 #-}
w64_32 :: Word32 -> Word64
w64_32 = fromIntegral
{-# INLINE w64_32 #-}
w64 :: Word8 -> Word64
w64 = fromIntegral
{-# INLINE w64 #-}
-- | Allocate and fill @n@ bytes with some data
--
writeNPlainPtrBytes
:: Storable a
=> Int
-> [a]
-> Ptr a
writeNPlainPtrBytes !n as = unsafeDupablePerformIO $ do
p <- mallocBytes n
go p as
return p
where
go !_ [] = return ()
go !p (x:xs) = poke p x >> go (plusPtr p 1) xs
{-# INLINE writeNPlainPtrBytes #-}
peekWord32BE :: Ptr Word32 -> IO Word32
peekWord32BE p = case targetByteOrder of
LittleEndian -> byteSwap32 <$> peek p
BigEndian -> peek p
{-# inline peekWord32BE #-}
peekWord64BE :: Ptr Word64 -> IO Word64
peekWord64BE p = case targetByteOrder of
LittleEndian -> byteSwap64 <$> peek p
BigEndian -> peek p
{-# inline peekWord64BE #-}
-- | Rechunk a list of bytestrings in multiples of @n@
--
reChunkN :: Int -> [ByteString] -> [ByteString]
reChunkN n = go
where
go [] = []
go (b:bs) = case divMod (BS.length b) n of
(_, 0) -> b : go bs
(d, _) -> case BS.splitAt (d * n) b of
~(h, t) -> h : accum t bs
accum acc [] = [acc]
accum acc (c:cs) =
case BS.splitAt (n - BS.length acc) c of
~(h, t) ->
let acc' = BS.append acc h
in if BS.length acc' == n
then
let cs' = if BS.null t then cs else t : cs
in acc' : go cs'
else accum acc' cs
{-# INLINE reChunkN #-}

View file

@ -0,0 +1,77 @@
{-# LANGUAGE CPP #-}
#if __GLASGOW_HASKELL__ >= 702
{-# LANGUAGE Trustworthy #-}
#endif
-- |
-- Module : Data.ByteString.Base64
-- Copyright : (c) 2010 Bryan O'Sullivan
--
-- License : BSD-style
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>,
-- Herbert Valerio Riedel <hvr@gnu.org>,
-- Mikhail Glushenkov <mikhail.glushenkov@gmail.com>
-- Stability : experimental
-- Portability : GHC
--
-- Fast and efficient encoding and decoding of base64-encoded strings.
--
-- @since 0.1.0.0
module Data.ByteString.Base64
( encode
, decode
, decodeLenient
) where
import Data.ByteString.Base64.Internal
import qualified Data.ByteString as B
import Data.ByteString.Internal (ByteString(..))
import Data.Word (Word8)
import Foreign.ForeignPtr (ForeignPtr)
-- | Encode a string into base64 form. The result will always be a
-- multiple of 4 bytes in length.
encode :: ByteString -> ByteString
encode s = encodeWith Padded (mkEncodeTable alphabet) s
-- | Decode a base64-encoded string. This function strictly follows
-- the specification in
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>.
--
-- (Note: this means that even @"\\n"@ and @"\\r\\n"@ as line breaks are rejected
-- rather than ignored. If you are using this in the context of a
-- standard that overrules RFC 4648 such as HTTP multipart mime bodies,
-- consider using 'decodeLenient'.)
decode :: ByteString -> Either String ByteString
decode s = decodeWithTable Padded decodeFP s
-- | Decode a base64-encoded string. This function is lenient in
-- following the specification from
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>, and will not
-- generate parse errors no matter how poor its input.
decodeLenient :: ByteString -> ByteString
decodeLenient s = decodeLenientWithTable decodeFP s
alphabet :: ByteString
alphabet = B.pack $ [65..90] ++ [97..122] ++ [48..57] ++ [43,47]
{-# NOINLINE alphabet #-}
decodeFP :: ForeignPtr Word8
#if MIN_VERSION_bytestring(0,11,0)
BS decodeFP _ =
#else
PS decodeFP _ _ =
#endif
B.pack $ replicate 43 x
++ [62,x,x,x,63]
++ [52..61]
++ [x,x,x,done,x,x,x]
++ [0..25]
++ [x,x,x,x,x,x]
++ [26..51]
++ replicate 133 x
{-# NOINLINE decodeFP #-}
x :: Integral a => a
x = 255
{-# INLINE x #-}

View file

@ -0,0 +1,446 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DoAndIfThenElse #-}
-- |
-- Module : Data.ByteString.Base64.Internal
-- Copyright : (c) 2010 Bryan O'Sullivan
--
-- License : BSD-style
-- Maintainer : bos@serpentine.com
-- Stability : experimental
-- Portability : GHC
--
-- Fast and efficient encoding and decoding of base64-encoded strings.
module Data.ByteString.Base64.Internal
( encodeWith
, decodeWithTable
, decodeLenientWithTable
, mkEncodeTable
, done
, peek8, poke8, peek8_32
, reChunkIn
, Padding(..)
, withBS
, mkBS
) where
import Data.Bits ((.|.), (.&.), shiftL, shiftR)
import qualified Data.ByteString as B
import Data.ByteString.Internal (ByteString(..), mallocByteString)
import Data.Word (Word8, Word16, Word32)
import Foreign.ForeignPtr (ForeignPtr, withForeignPtr, castForeignPtr)
import Foreign.Ptr (Ptr, castPtr, minusPtr, plusPtr)
import Foreign.Storable (peek, peekElemOff, poke)
import System.IO.Unsafe (unsafePerformIO)
peek8 :: Ptr Word8 -> IO Word8
peek8 = peek
poke8 :: Ptr Word8 -> Word8 -> IO ()
poke8 = poke
peek8_32 :: Ptr Word8 -> IO Word32
peek8_32 = fmap fromIntegral . peek8
data Padding = Padded | Don'tCare | Unpadded deriving Eq
-- | Encode a string into base64 form. The result will always be a multiple
-- of 4 bytes in length.
encodeWith :: Padding -> EncodeTable -> ByteString -> ByteString
encodeWith !padding (ET alfaFP encodeTable) !bs = withBS bs go
where
go !sptr !slen
| slen > maxBound `div` 4 =
error "Data.ByteString.Base64.encode: input too long"
| otherwise = do
let dlen = (slen + 2) `div` 3 * 4
dfp <- mallocByteString dlen
withForeignPtr alfaFP $ \aptr ->
withForeignPtr encodeTable $ \ep -> do
let aidx n = peek8 (aptr `plusPtr` n)
sEnd = sptr `plusPtr` slen
finish !n = return $ mkBS dfp n
fill !dp !sp !n
| sp `plusPtr` 2 >= sEnd = complete (castPtr dp) sp n
| otherwise = {-# SCC "encode/fill" #-} do
i <- peek8_32 sp
j <- peek8_32 (sp `plusPtr` 1)
k <- peek8_32 (sp `plusPtr` 2)
let w = i `shiftL` 16 .|. j `shiftL` 8 .|. k
enc = peekElemOff ep . fromIntegral
poke dp =<< enc (w `shiftR` 12)
poke (dp `plusPtr` 2) =<< enc (w .&. 0xfff)
fill (dp `plusPtr` 4) (sp `plusPtr` 3) (n + 4)
complete dp sp n
| sp == sEnd = finish n
| otherwise = {-# SCC "encode/complete" #-} do
let peekSP m f = (f . fromIntegral) `fmap` peek8 (sp `plusPtr` m)
twoMore = sp `plusPtr` 2 == sEnd
equals = 0x3d :: Word8
doPad = padding == Padded
{-# INLINE equals #-}
!a <- peekSP 0 ((`shiftR` 2) . (.&. 0xfc))
!b <- peekSP 0 ((`shiftL` 4) . (.&. 0x03))
poke8 dp =<< aidx a
if twoMore
then do
!b' <- peekSP 1 ((.|. b) . (`shiftR` 4) . (.&. 0xf0))
!c <- aidx =<< peekSP 1 ((`shiftL` 2) . (.&. 0x0f))
poke8 (dp `plusPtr` 1) =<< aidx b'
poke8 (dp `plusPtr` 2) c
if doPad
then poke8 (dp `plusPtr` 3) equals >> finish (n + 4)
else finish (n + 3)
else do
poke8 (dp `plusPtr` 1) =<< aidx b
if doPad
then do
poke8 (dp `plusPtr` 2) equals
poke8 (dp `plusPtr` 3) equals
finish (n + 4)
else finish (n + 2)
withForeignPtr dfp (\dptr -> fill (castPtr dptr) sptr 0)
data EncodeTable = ET !(ForeignPtr Word8) !(ForeignPtr Word16)
-- The encoding table is constructed such that the expansion of a 12-bit
-- block to a 16-bit block can be done by a single Word16 copy from the
-- correspoding table entry to the target address. The 16-bit blocks are
-- stored in big-endian order, as the indices into the table are built in
-- big-endian order.
mkEncodeTable :: ByteString -> EncodeTable
#if MIN_VERSION_bytestring(0,11,0)
mkEncodeTable alphabet@(BS afp _) =
case table of BS fp _ -> ET afp (castForeignPtr fp)
#else
mkEncodeTable alphabet@(PS afp _ _) =
case table of PS fp _ _ -> ET afp (castForeignPtr fp)
#endif
where
ix = fromIntegral . B.index alphabet
table = B.pack $ concat $ [ [ix j, ix k] | j <- [0..63], k <- [0..63] ]
-- | Decode a base64-encoded string. This function strictly follows
-- the specification in <http://tools.ietf.org/rfc/rfc4648 RFC 4648>.
--
-- This function takes the decoding table (for @base64@ or @base64url@) as
-- the first parameter.
--
-- For validation of padding properties, see note: $Validation
--
decodeWithTable :: Padding -> ForeignPtr Word8 -> ByteString -> Either String ByteString
decodeWithTable padding !decodeFP bs
| B.length bs == 0 = Right B.empty
| otherwise = case padding of
Padded
| r == 0 -> withBS bs go
| r == 1 -> Left "Base64-encoded bytestring has invalid size"
| otherwise -> Left "Base64-encoded bytestring is unpadded or has invalid padding"
Don'tCare
| r == 0 -> withBS bs go
| r == 2 -> withBS (B.append bs (B.replicate 2 0x3d)) go
| r == 3 -> validateLastPad bs invalidPad $ withBS (B.append bs (B.replicate 1 0x3d)) go
| otherwise -> Left "Base64-encoded bytestring has invalid size"
Unpadded
| r == 0 -> validateLastPad bs noPad $ withBS bs go
| r == 2 -> validateLastPad bs noPad $ withBS (B.append bs (B.replicate 2 0x3d)) go
| r == 3 -> validateLastPad bs noPad $ withBS (B.append bs (B.replicate 1 0x3d)) go
| otherwise -> Left "Base64-encoded bytestring has invalid size"
where
!r = B.length bs `rem` 4
noPad = "Base64-encoded bytestring required to be unpadded"
invalidPad = "Base64-encoded bytestring has invalid padding"
go !sptr !slen = do
dfp <- mallocByteString (slen `quot` 4 * 3)
withForeignPtr decodeFP (\ !decptr ->
withForeignPtr dfp (\dptr ->
decodeLoop decptr sptr dptr (sptr `plusPtr` slen) dfp))
decodeLoop
:: Ptr Word8
-- ^ decoding table pointer
-> Ptr Word8
-- ^ source pointer
-> Ptr Word8
-- ^ destination pointer
-> Ptr Word8
-- ^ source end pointer
-> ForeignPtr Word8
-- ^ destination foreign pointer (used for finalizing string)
-> IO (Either String ByteString)
decodeLoop !dtable !sptr !dptr !end !dfp = go dptr sptr
where
err p = return . Left
$ "invalid character at offset: "
++ show (p `minusPtr` sptr)
padErr p = return . Left
$ "invalid padding at offset: "
++ show (p `minusPtr` sptr)
canonErr p = return . Left
$ "non-canonical encoding detected at offset: "
++ show (p `minusPtr` sptr)
look :: Ptr Word8 -> IO Word32
look !p = do
!i <- peek p
!v <- peekElemOff dtable (fromIntegral i)
return (fromIntegral v)
go !dst !src
| plusPtr src 4 >= end = do
!a <- look src
!b <- look (src `plusPtr` 1)
!c <- look (src `plusPtr` 2)
!d <- look (src `plusPtr` 3)
finalChunk dst src a b c d
| otherwise = do
!a <- look src
!b <- look (src `plusPtr` 1)
!c <- look (src `plusPtr` 2)
!d <- look (src `plusPtr` 3)
decodeChunk dst src a b c d
-- | Decodes chunks of 4 bytes at a time, recombining into
-- 3 bytes. Note that in the inner loop stage, no padding
-- characters are admissible.
--
decodeChunk !dst !src !a !b !c !d
| a == 0x63 = padErr src
| b == 0x63 = padErr (plusPtr src 1)
| c == 0x63 = padErr (plusPtr src 2)
| d == 0x63 = padErr (plusPtr src 3)
| a == 0xff = err src
| b == 0xff = err (plusPtr src 1)
| c == 0xff = err (plusPtr src 2)
| d == 0xff = err (plusPtr src 3)
| otherwise = do
let !w = (shiftL a 18
.|. shiftL b 12
.|. shiftL c 6
.|. d) :: Word32
poke8 dst (fromIntegral (shiftR w 16))
poke8 (plusPtr dst 1) (fromIntegral (shiftR w 8))
poke8 (plusPtr dst 2) (fromIntegral w)
go (plusPtr dst 3) (plusPtr src 4)
-- | Decode the final 4 bytes in the string, recombining into
-- 3 bytes. Note that in this stage, we can have padding chars
-- but only in the final 2 positions.
--
finalChunk !dst !src a b c d
| a == 0x63 = padErr src
| b == 0x63 = padErr (plusPtr src 1)
| c == 0x63 && d /= 0x63 = err (plusPtr src 3) -- make sure padding is coherent.
| a == 0xff = err src
| b == 0xff = err (plusPtr src 1)
| c == 0xff = err (plusPtr src 2)
| d == 0xff = err (plusPtr src 3)
| otherwise = do
let !w = (shiftL a 18
.|. shiftL b 12
.|. shiftL c 6
.|. d) :: Word32
poke8 dst (fromIntegral (shiftR w 16))
if c == 0x63 && d == 0x63
then
if sanityCheckPos b mask_4bits
then return $ Right $ mkBS dfp (1 + (dst `minusPtr` dptr))
else canonErr (plusPtr src 1)
else if d == 0x63
then
if sanityCheckPos c mask_2bits
then do
poke8 (plusPtr dst 1) (fromIntegral (shiftR w 8))
return $ Right $ mkBS dfp (2 + (dst `minusPtr` dptr))
else canonErr (plusPtr src 2)
else do
poke8 (plusPtr dst 1) (fromIntegral (shiftR w 8))
poke8 (plusPtr dst 2) (fromIntegral w)
return $ Right $ mkBS dfp (3 + (dst `minusPtr` dptr))
-- | Decode a base64-encoded string. This function is lenient in
-- following the specification from
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>, and will not
-- generate parse errors no matter how poor its input. This function
-- takes the decoding table (for @base64@ or @base64url@) as the first
-- paramert.
decodeLenientWithTable :: ForeignPtr Word8 -> ByteString -> ByteString
decodeLenientWithTable !decodeFP !bs = withBS bs go
where
go !sptr !slen
| dlen <= 0 = return B.empty
| otherwise = do
dfp <- mallocByteString dlen
withForeignPtr decodeFP $ \ !decptr -> do
let finish dbytes
| dbytes > 0 = return $ mkBS dfp dbytes
| otherwise = return B.empty
sEnd = sptr `plusPtr` slen
fill !dp !sp !n
| sp >= sEnd = finish n
| otherwise = {-# SCC "decodeLenientWithTable/fill" #-}
let look :: Bool -> Ptr Word8
-> (Ptr Word8 -> Word32 -> IO ByteString)
-> IO ByteString
{-# INLINE look #-}
look skipPad p0 f = go' p0
where
go' p | p >= sEnd = f (sEnd `plusPtr` (-1)) done
| otherwise = {-# SCC "decodeLenient/look" #-} do
ix <- fromIntegral `fmap` peek8 p
v <- peek8 (decptr `plusPtr` ix)
if v == x || v == done && skipPad
then go' (p `plusPtr` 1)
else f (p `plusPtr` 1) (fromIntegral v)
in look True sp $ \ !aNext !aValue ->
look True aNext $ \ !bNext !bValue ->
if aValue == done || bValue == done
then finish n
else
look False bNext $ \ !cNext !cValue ->
look False cNext $ \ !dNext !dValue -> do
let w = aValue `shiftL` 18 .|. bValue `shiftL` 12 .|.
cValue `shiftL` 6 .|. dValue
poke8 dp $ fromIntegral (w `shiftR` 16)
if cValue == done
then finish (n + 1)
else do
poke8 (dp `plusPtr` 1) $ fromIntegral (w `shiftR` 8)
if dValue == done
then finish (n + 2)
else do
poke8 (dp `plusPtr` 2) $ fromIntegral w
fill (dp `plusPtr` 3) dNext (n+3)
withForeignPtr dfp $ \dptr -> fill dptr sptr 0
where
!dlen = (slen + 3) `div` 4 * 3
x :: Integral a => a
x = 255
{-# INLINE x #-}
done :: Integral a => a
done = 99
{-# INLINE done #-}
-- This takes a list of ByteStrings, and returns a list in which each
-- (apart from possibly the last) has length that is a multiple of n
reChunkIn :: Int -> [ByteString] -> [ByteString]
reChunkIn !n = go
where
go [] = []
go (y : ys) = case B.length y `divMod` n of
(_, 0) -> y : go ys
(d, _) -> case B.splitAt (d * n) y of
(prefix, suffix) -> prefix : fixup suffix ys
fixup acc [] = [acc]
fixup acc (z : zs) = case B.splitAt (n - B.length acc) z of
(prefix, suffix) ->
let acc' = acc `B.append` prefix
in if B.length acc' == n
then let zs' = if B.null suffix
then zs
else suffix : zs
in acc' : go zs'
else -- suffix must be null
fixup acc' zs
-- $Validation
--
-- This function checks that the last char of a bytestring is '='
-- and, if true, fails with a message or completes some io action.
--
-- This is necessary to check when decoding permissively (i.e. filling in padding chars).
-- Consider the following 4 cases of a string of length l:
--
-- l = 0 mod 4: No pad chars are added, since the input is assumed to be good.
-- l = 1 mod 4: Never an admissible length in base64
-- l = 2 mod 4: 2 padding chars are added. If padding chars are present in the last 4 chars of the string,
-- they will fail to decode as final quanta.
-- l = 3 mod 4: 1 padding char is added. In this case a string is of the form <body> + <padchar>. If adding the
-- pad char "completes" the string so that it is `l = 0 mod 4`, then this may possibly form corrupted data.
-- This case is degenerate and should be disallowed.
--
-- Hence, permissive decodes should only fill in padding chars when it makes sense to add them. That is,
-- if an input is degenerate, it should never succeed when we add padding chars. We need the following invariant to hold:
--
-- @
-- B64U.decodeUnpadded <|> B64U.decodePadded ~ B64U.decodePadded
-- @
--
-- This means the only char we need to check is the last one, and only to disallow `l = 3 mod 4`.
--
validateLastPad
:: ByteString
-- ^ input to validate
-> String
-- ^ error msg
-> Either String ByteString
-> Either String ByteString
validateLastPad !bs err !io
| B.last bs == 0x3d = Left err
| otherwise = io
{-# INLINE validateLastPad #-}
-- | Sanity check an index against a bitmask to make sure
-- it's coherent. If pos & mask == 0, we're good. If not, we should fail.
--
sanityCheckPos :: Word32 -> Word8 -> Bool
sanityCheckPos pos mask = fromIntegral pos .&. mask == 0
{-# INLINE sanityCheckPos #-}
-- | Mask 2 bits
--
mask_2bits :: Word8
mask_2bits = 3 -- (1 << 2) - 1
{-# NOINLINE mask_2bits #-}
-- | Mask 4 bits
--
mask_4bits :: Word8
mask_4bits = 15 -- (1 << 4) - 1
{-# NOINLINE mask_4bits #-}
-- | Back-compat shim for bytestring >=0.11. Constructs a
-- bytestring from a foreign ptr and a length. Offset is 0.
--
mkBS :: ForeignPtr Word8 -> Int -> ByteString
#if MIN_VERSION_bytestring(0,11,0)
mkBS dfp n = BS dfp n
#else
mkBS dfp n = PS dfp 0 n
#endif
{-# INLINE mkBS #-}
-- | Back-compat shim for bytestring >=0.11. Unwraps the foreign ptr of
-- a bytestring, executing an IO action as a function of the underlying
-- pointer and some starting length.
--
-- Note: in `unsafePerformIO`.
--
withBS :: ByteString -> (Ptr Word8 -> Int -> IO a) -> a
#if MIN_VERSION_bytestring(0,11,0)
withBS (BS !sfp !slen) f = unsafePerformIO $
withForeignPtr sfp $ \p -> f p slen
#else
withBS (PS !sfp !soff !slen) f = unsafePerformIO $
withForeignPtr sfp $ \p -> f (plusPtr p soff) slen
#endif
{-# INLINE withBS #-}

View file

@ -0,0 +1,64 @@
{-# LANGUAGE CPP #-}
#if __GLASGOW_HASKELL__ >= 702
{-# LANGUAGE Trustworthy #-}
#endif
-- |
-- Module : Data.ByteString.Base64.Lazy
-- Copyright : (c) 2012 Ian Lynagh
--
-- License : BSD-style
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>,
-- Herbert Valerio Riedel <hvr@gnu.org>,
-- Mikhail Glushenkov <mikhail.glushenkov@gmail.com>
-- Stability : experimental
-- Portability : GHC
--
-- Fast and efficient encoding and decoding of base64-encoded
-- lazy bytestrings.
--
-- @since 1.0.0.0
module Data.ByteString.Base64.Lazy
(
encode
, decode
, decodeLenient
) where
import Data.ByteString.Base64.Internal
import qualified Data.ByteString.Base64 as B64
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Char8 as LC
import Data.Char
-- | Encode a string into base64 form. The result will always be a
-- multiple of 4 bytes in length.
encode :: L.ByteString -> L.ByteString
encode = L.fromChunks . map B64.encode . reChunkIn 3 . L.toChunks
-- | Decode a base64-encoded string. This function strictly follows
-- the specification in
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>.
decode :: L.ByteString -> Either String L.ByteString
decode b = -- Returning an Either type means that the entire result will
-- need to be in memory at once anyway, so we may as well
-- keep it simple and just convert to and from a strict byte
-- string
-- TODO: Use L.{fromStrict,toStrict} once we can rely on
-- a new enough bytestring
case B64.decode $ S.concat $ L.toChunks b of
Left err -> Left err
Right b' -> Right $ L.fromChunks [b']
-- | Decode a base64-encoded string. This function is lenient in
-- following the specification from
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>, and will not generate
-- parse errors no matter how poor its input.
decodeLenient :: L.ByteString -> L.ByteString
decodeLenient = L.fromChunks . map B64.decodeLenient . reChunkIn 4 . L.toChunks
. LC.filter goodChar
where -- We filter out and '=' padding here, but B64.decodeLenient
-- handles that
goodChar c = isDigit c || isAsciiUpper c || isAsciiLower c
|| c == '+' || c == '/'

View file

@ -0,0 +1,99 @@
{-# LANGUAGE CPP #-}
#if __GLASGOW_HASKELL__ >= 702
{-# LANGUAGE Trustworthy #-}
#endif
-- |
-- Module : Data.ByteString.Base64.URL
-- Copyright : (c) 2012 Deian Stefan
--
-- License : BSD-style
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>,
-- Herbert Valerio Riedel <hvr@gnu.org>,
-- Mikhail Glushenkov <mikhail.glushenkov@gmail.com>
-- Stability : experimental
-- Portability : GHC
--
-- Fast and efficient encoding and decoding of base64url-encoded strings.
--
-- @since 0.1.1.0
module Data.ByteString.Base64.URL
( encode
, encodeUnpadded
, decode
, decodePadded
, decodeUnpadded
, decodeLenient
) where
import Data.ByteString.Base64.Internal
import qualified Data.ByteString as B
import Data.ByteString.Internal (ByteString(..))
import Data.Word (Word8)
import Foreign.ForeignPtr (ForeignPtr)
-- | Encode a string into base64url form. The result will always be a
-- multiple of 4 bytes in length.
encode :: ByteString -> ByteString
encode = encodeWith Padded (mkEncodeTable alphabet)
-- | Encode a string into unpadded base64url form.
--
-- @since 1.1.0.0
encodeUnpadded :: ByteString -> ByteString
encodeUnpadded = encodeWith Unpadded (mkEncodeTable alphabet)
-- | Decode a base64url-encoded string applying padding if necessary.
-- This function follows the specification in <http://tools.ietf.org/rfc/rfc4648 RFC 4648>
-- and in <https://tools.ietf.org/html/rfc7049#section-2.4.4.2 RFC 7049 2.4>
decode :: ByteString -> Either String ByteString
decode = decodeWithTable Don'tCare decodeFP
-- | Decode a padded base64url-encoded string, failing if input is improperly padded.
-- This function follows the specification in <http://tools.ietf.org/rfc/rfc4648 RFC 4648>
-- and in <https://tools.ietf.org/html/rfc7049#section-2.4.4.2 RFC 7049 2.4>
--
-- @since 1.1.0.0
decodePadded :: ByteString -> Either String ByteString
decodePadded = decodeWithTable Padded decodeFP
-- | Decode a unpadded base64url-encoded string, failing if input is padded.
-- This function follows the specification in <http://tools.ietf.org/rfc/rfc4648 RFC 4648>
-- and in <https://tools.ietf.org/html/rfc7049#section-2.4.4.2 RFC 7049 2.4>
--
-- @since 1.1.0.0
decodeUnpadded :: ByteString -> Either String ByteString
decodeUnpadded = decodeWithTable Unpadded decodeFP
-- | Decode a base64url-encoded string. This function is lenient in
-- following the specification from
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>, and will not
-- generate parse errors no matter how poor its input.
decodeLenient :: ByteString -> ByteString
decodeLenient = decodeLenientWithTable decodeFP
alphabet :: ByteString
alphabet = B.pack $ [65..90] ++ [97..122] ++ [48..57] ++ [45,95]
{-# NOINLINE alphabet #-}
decodeFP :: ForeignPtr Word8
#if MIN_VERSION_bytestring(0,11,0)
BS decodeFP _ =
#else
PS decodeFP _ _ =
#endif
B.pack $ replicate 45 x
++ [62,x,x]
++ [52..61]
++ [x,x,x,done,x,x,x]
++ [0..25]
++ [x,x,x,x,63,x]
++ [26..51]
++ replicate 133 x
{-# NOINLINE decodeFP #-}
x :: Integral a => a
x = 255
{-# INLINE x #-}

View file

@ -0,0 +1,95 @@
{-# LANGUAGE CPP #-}
#if __GLASGOW_HASKELL__ >= 702
{-# LANGUAGE Trustworthy #-}
#endif
-- |
-- Module : Data.ByteString.Base64.URL.Lazy
-- Copyright : (c) 2012 Ian Lynagh
--
-- License : BSD-style
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>,
-- Herbert Valerio Riedel <hvr@gnu.org>,
-- Mikhail Glushenkov <mikhail.glushenkov@gmail.com>
-- Stability : experimental
-- Portability : GHC
--
-- Fast and efficient encoding and decoding of base64-encoded
-- lazy bytestrings.
--
-- @since 1.0.0.0
module Data.ByteString.Base64.URL.Lazy
(
encode
, encodeUnpadded
, decode
, decodeUnpadded
, decodePadded
, decodeLenient
) where
import Data.ByteString.Base64.Internal
import qualified Data.ByteString.Base64.URL as B64
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Char8 as LC
import Data.Char
-- | Encode a string into base64 form. The result will always be a
-- multiple of 4 bytes in length.
encode :: L.ByteString -> L.ByteString
encode = L.fromChunks . map B64.encode . reChunkIn 3 . L.toChunks
-- | Encode a string into unpadded base64url form.
--
-- @since 1.1.0.0
encodeUnpadded :: L.ByteString -> L.ByteString
encodeUnpadded = L.fromChunks
. map B64.encodeUnpadded
. reChunkIn 3
. L.toChunks
-- | Decode a base64-encoded string. This function strictly follows
-- the specification in
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>.
decode :: L.ByteString -> Either String L.ByteString
decode b = -- Returning an Either type means that the entire result will
-- need to be in memory at once anyway, so we may as well
-- keep it simple and just convert to and from a strict byte
-- string
-- TODO: Use L.{fromStrict,toStrict} once we can rely on
-- a new enough bytestring
case B64.decode $ S.concat $ L.toChunks b of
Left err -> Left err
Right b' -> Right $ L.fromChunks [b']
-- | Decode a unpadded base64url-encoded string, failing if input is padded.
-- This function follows the specification in <http://tools.ietf.org/rfc/rfc4648 RFC 4648>
-- and in <https://tools.ietf.org/html/rfc7049#section-2.4.4.2 RFC 7049 2.4>
--
-- @since 1.1.0.0
decodeUnpadded :: L.ByteString -> Either String L.ByteString
decodeUnpadded bs = case B64.decodeUnpadded $ S.concat $ L.toChunks bs of
Right b -> Right $ L.fromChunks [b]
Left e -> Left e
-- | Decode a padded base64url-encoded string, failing if input is improperly padded.
-- This function follows the specification in <http://tools.ietf.org/rfc/rfc4648 RFC 4648>
-- and in <https://tools.ietf.org/html/rfc7049#section-2.4.4.2 RFC 7049 2.4>
--
-- @since 1.1.0.0
decodePadded :: L.ByteString -> Either String L.ByteString
decodePadded bs = case B64.decodePadded $ S.concat $ L.toChunks bs of
Right b -> Right $ L.fromChunks [b]
Left e -> Left e
-- | Decode a base64-encoded string. This function is lenient in
-- following the specification from
-- <http://tools.ietf.org/rfc/rfc4648 RFC 4648>, and will not generate
-- parse errors no matter how poor its input.
decodeLenient :: L.ByteString -> L.ByteString
decodeLenient = L.fromChunks . map B64.decodeLenient . reChunkIn 4 . L.toChunks
. LC.filter goodChar
where -- We filter out and '=' padding here, but B64.decodeLenient
-- handles that
goodChar c = isAlphaNum c || c == '-' || c == '_'

View file

@ -0,0 +1,94 @@
{-# LANGUAGE OverloadedStrings, Safe #-}
module Data.ByteString.Builder.Scientific
( scientificBuilder
, formatScientificBuilder
, FPFormat(..)
) where
import Data.Scientific (Scientific)
import qualified Data.Scientific as Scientific
import Data.Text.Lazy.Builder.RealFloat (FPFormat(..))
import qualified Data.ByteString.Char8 as BC8
import Data.ByteString.Builder (Builder, string8, char8, intDec)
import Data.ByteString.Builder.Extra (byteStringCopy)
import Utils (roundTo, i2d)
import Data.Monoid ((<>))
-- | A @ByteString@ @Builder@ which renders a scientific number to full
-- precision, using standard decimal notation for arguments whose
-- absolute value lies between @0.1@ and @9,999,999@, and scientific
-- notation otherwise.
scientificBuilder :: Scientific -> Builder
scientificBuilder = formatScientificBuilder Generic Nothing
-- | Like 'scientificBuilder' but provides rendering options.
formatScientificBuilder :: FPFormat
-> Maybe Int -- ^ Number of decimal places to render.
-> Scientific
-> Builder
formatScientificBuilder fmt decs scntfc
| scntfc < 0 = char8 '-' <> doFmt fmt (Scientific.toDecimalDigits (-scntfc))
| otherwise = doFmt fmt (Scientific.toDecimalDigits scntfc)
where
doFmt format (is, e) =
let ds = map i2d is in
case format of
Generic ->
doFmt (if e < 0 || e > 7 then Exponent else Fixed)
(is,e)
Exponent ->
case decs of
Nothing ->
let show_e' = intDec (e-1) in
case ds of
"0" -> byteStringCopy "0.0e0"
[d] -> char8 d <> byteStringCopy ".0e" <> show_e'
(d:ds') -> char8 d <> char8 '.' <> string8 ds' <> char8 'e' <> show_e'
[] -> error $ "Data.ByteString.Builder.Scientific.formatScientificBuilder" ++
"/doFmt/Exponent: []"
Just dec ->
let dec' = max dec 1 in
case is of
[0] -> byteStringCopy "0." <>
byteStringCopy (BC8.replicate dec' '0') <>
byteStringCopy "e0"
_ ->
let (ei,is') = roundTo (dec'+1) is
in case map i2d (if ei > 0 then init is' else is') of
[] -> mempty
d:ds' -> char8 d <> char8 '.' <> string8 ds' <> char8 'e' <> intDec (e-1+ei)
Fixed ->
let
mk0 ls = case ls of { "" -> char8 '0' ; _ -> string8 ls}
in
case decs of
Nothing
| e <= 0 -> byteStringCopy "0." <>
byteStringCopy (BC8.replicate (-e) '0') <>
string8 ds
| otherwise ->
let
f 0 s rs = mk0 (reverse s) <> char8 '.' <> mk0 rs
f n s "" = f (n-1) ('0':s) ""
f n s (r:rs) = f (n-1) (r:s) rs
in
f e "" ds
Just dec ->
let dec' = max dec 0 in
if e >= 0 then
let
(ei,is') = roundTo (dec' + e) is
(ls,rs) = splitAt (e+ei) (map i2d is')
in
mk0 ls <> (if null rs then mempty else char8 '.' <> string8 rs)
else
let (ei,is') = roundTo dec' (replicate (-e) 0 ++ is)
in case map i2d (if ei > 0 then is' else 0:is') of
[] -> mempty
d:ds' -> char8 d <> (if null ds' then mempty else char8 '.' <> string8 ds')

View file

@ -0,0 +1,243 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Trustworthy #-}
-- |
-- Module : Data.ByteString.Lazy.Base32
-- Copyright : (c) 2019-2023 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : stable
-- Portability : non-portable
--
-- This module contains 'Data.ByteString.Lazy.ByteString'-valued combinators for
-- implementing the RFC 4648 specification of the Base32
-- encoding format. This includes strictly padded/unpadded
-- decoding variants, as well as internal and external validation for canonicity.
--
module Data.ByteString.Lazy.Base32
( -- * Encoding
encodeBase32
, encodeBase32'
, encodeBase32Unpadded
, encodeBase32Unpadded'
-- * Decoding
, decodeBase32
, decodeBase32Unpadded
, decodeBase32Padded
--, decodeBase32Lenient
-- * Validation
, isBase32
, isValidBase32
) where
import Prelude hiding (all, elem)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Base32 as B32
import Data.ByteString.Base32.Internal.Utils (reChunkN)
import Data.ByteString.Lazy (elem, fromChunks, toChunks)
import Data.ByteString.Lazy.Internal (ByteString(..))
import Data.Either (isRight)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
-- | Encode a 'ByteString' value as a Base32 'Text' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32 "Sun"
-- "KN2W4==="
--
encodeBase32 :: ByteString -> TL.Text
encodeBase32 = TL.decodeUtf8 . encodeBase32'
{-# INLINE encodeBase32 #-}
-- | Encode a 'ByteString' as a Base32 'ByteString' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32 "Sun"
-- "KN2W4==="
--
encodeBase32' :: ByteString -> ByteString
encodeBase32' = fromChunks
. fmap B32.encodeBase32'
. reChunkN 5
. toChunks
-- | Decode an arbitrarily padded Base32 encoded 'ByteString' value. If its length is not a multiple
-- of 4, then padding chars will be added to fill out the input to a multiple of
-- 4 for safe decoding as Base32-encoded values are optionally padded.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32 "KN2W4==="
-- Right "Sun"
--
-- >>> decodeBase32 "KN2W4"
-- Right "Sun"
--
-- >>> decodeBase32 "KN2W==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32 :: ByteString -> Either T.Text ByteString
decodeBase32 = fmap (fromChunks . (:[]))
. B32.decodeBase32
. BS.concat
. toChunks
{-# INLINE decodeBase32 #-}
-- | Encode a 'ByteString' value as Base32 'Text' without padding. Note that for Base32,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base32 and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded "Sun"
-- "KN2W4"
--
encodeBase32Unpadded :: ByteString -> TL.Text
encodeBase32Unpadded = TL.decodeUtf8 . encodeBase32Unpadded'
{-# INLINE encodeBase32Unpadded #-}
-- | Encode a 'ByteString' value as Base32 without padding. Note that for Base32,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base32 and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "KN2W4"
--
encodeBase32Unpadded' :: ByteString -> ByteString
encodeBase32Unpadded' = fromChunks
. fmap B32.encodeBase32Unpadded'
. reChunkN 5
. toChunks
-- | Decode an unpadded Base32-encoded 'ByteString' value. Input strings are
-- required to be unpadded, and will undergo validation prior to decoding to
-- confirm.
--
-- In general, unless unpadded Base32 is explicitly required, it is
-- safer to call 'decodeBase32'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32Unpadded "KN2W4"
-- Right "Sun"
--
-- >>> decodeBase32Unpadded "KN2W4==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32Unpadded :: ByteString -> Either T.Text ByteString
decodeBase32Unpadded = fmap (fromChunks . (:[]))
. B32.decodeBase32Unpadded
. BS.concat
. toChunks
{-# INLINE decodeBase32Unpadded #-}
-- | Decode a padded Base32-encoded 'ByteString' value. Input strings are
-- required to be correctly padded, and will be validated prior to decoding
-- to confirm.
--
-- In general, unless padded Base32 is explicitly required, it is
-- safer to call 'decodeBase32'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32Padded "KN2W4==="
-- Right "Sun"
--
-- >>> decodeBase32Padded "KN2W4"
-- Left "Base32-encoded bytestring requires padding"
--
decodeBase32Padded :: ByteString -> Either T.Text ByteString
decodeBase32Padded = fmap (fromChunks . (:[]))
. B32.decodeBase32Padded
. BS.concat
. toChunks
{-# INLINE decodeBase32Padded #-}
-- -- | Leniently decode an unpadded Base32-encoded 'ByteString'. This function
-- -- will not generate parse errors. If input data contains padding chars,
-- -- then the input will be parsed up until the first pad character.
-- --
-- -- __Note:__ This is not RFC 4648-compliant.
-- --
-- -- === __Examples__:
-- --
-- -- >>> decodeBase32Lenient "PDw_Pj4="
-- -- "<<?>>"
-- --
-- -- >>> decodeBase32Lenient "PDw_%%%$}Pj4"
-- -- "<<?>>"
-- --
-- decodeBase32Lenient :: ByteString -> ByteString
-- decodeBase32Lenient = fromChunks
-- . fmap B32.decodeBase32Lenient
-- . reChunkN 8
-- . fmap (BS.filter (flip elem "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567="))
-- . toChunks
-- {-# INLINE decodeBase32Lenient #-}
-- | Tell whether a 'ByteString' is Base32-encoded.
--
-- === __Examples__:
--
-- >>> isBase32 "KN2W4"
-- True
--
-- >>> isBase32 "KN2W4==="
-- True
--
-- >>> isBase32 "KN2W4=="
-- False
--
isBase32 :: ByteString -> Bool
isBase32 bs = isValidBase32 bs && isRight (decodeBase32 bs)
{-# INLINE isBase32 #-}
-- | Tell whether a 'ByteString' is a valid Base32 format.
--
-- This will not tell you whether or not this is a correct Base32 representation,
-- only that it conforms to the correct shape. To check whether it is a true
-- Base32 encoded 'ByteString' value, use 'isBase32'.
--
-- === __Examples__:
--
-- >>> isValidBase32 "KN2W4"
-- True
--
-- >>> isValidBase32 "KN2W4="
-- False
--
-- >>> isValidBase32 "KN2W4%"
-- False
--
isValidBase32 :: ByteString -> Bool
isValidBase32 = go . toChunks
where
go [] = True
go [c] = B32.isValidBase32 c
go (c:cs) = -- note the lack of padding char
BS.all (flip elem "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567") c
&& go cs
{-# INLINE isValidBase32 #-}

View file

@ -0,0 +1,244 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Trustworthy #-}
-- |
-- Module : Data.ByteString.Lazy.Base32.Hex
-- Copyright : (c) 2019-2023 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : stable
-- Portability : non-portable
--
-- This module contains 'Data.ByteString.Lazy.ByteString'-valued combinators for
-- implementing the RFC 4648 specification of the Base32hex
-- encoding format. This includes strictly padded/unpadded
-- decoding variants, as well as internal and external validation for canonicity.
--
module Data.ByteString.Lazy.Base32.Hex
( -- * Encoding
encodeBase32
, encodeBase32'
, encodeBase32Unpadded
, encodeBase32Unpadded'
-- * Decoding
, decodeBase32
, decodeBase32Unpadded
, decodeBase32Padded
--, decodeBase32Lenient
-- * Validation
, isBase32Hex
, isValidBase32Hex
) where
import Prelude hiding (all, elem)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Base32.Hex as B32H
import Data.ByteString.Base32.Internal.Utils (reChunkN)
import Data.ByteString.Lazy (elem, fromChunks, toChunks)
import Data.ByteString.Lazy.Internal (ByteString(..))
import Data.Either (isRight)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
-- | Encode a 'ByteString' value as a Base32hex 'Text' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32 "Sun"
-- "ADQMS==="
--
encodeBase32 :: ByteString -> TL.Text
encodeBase32 = TL.decodeUtf8 . encodeBase32'
{-# INLINE encodeBase32 #-}
-- | Encode a 'ByteString' as a Base32hex 'ByteString' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32' "Sun"
-- "ADQMS==="
--
encodeBase32' :: ByteString -> ByteString
encodeBase32' = fromChunks
. fmap B32H.encodeBase32'
. reChunkN 5
. toChunks
-- | Decode an arbitrarily padded Base32hex encoded 'ByteString' value. If its length is not a multiple
-- of 4, then padding chars will be added to fill out the input to a multiple of
-- 4 for safe decoding as Base32hex-encoded values are optionally padded.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32 "ADQMS==="
-- Right "Sun"
--
-- >>> decodeBase32 "ADQMS"
-- Right "Sun"
--
-- >>> decodeBase32 "ADQMS==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32 :: ByteString -> Either T.Text ByteString
decodeBase32 = fmap (fromChunks . (:[]))
. B32H.decodeBase32
. BS.concat
. toChunks
{-# INLINE decodeBase32 #-}
-- | Encode a 'ByteString' value as Base32hex 'Text' without padding. Note that for Base32hex,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base32hex and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded "Sun"
-- "ADQMS"
--
encodeBase32Unpadded :: ByteString -> TL.Text
encodeBase32Unpadded = TL.decodeUtf8 . encodeBase32Unpadded'
{-# INLINE encodeBase32Unpadded #-}
-- | Encode a 'ByteString' value as Base32hex without padding. Note that for Base32hex,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base32hex and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "ADQMS"
--
encodeBase32Unpadded' :: ByteString -> ByteString
encodeBase32Unpadded' = fromChunks
. fmap B32H.encodeBase32Unpadded'
. reChunkN 5
. toChunks
-- | Decode an unpadded Base32hex-encoded 'ByteString' value. Input strings are
-- required to be unpadded, and will undergo validation prior to decoding to
-- confirm.
--
-- In general, unless unpadded Base32hex is explicitly required, it is
-- safer to call 'decodeBase32'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32Unpadded "ADQMS"
-- Right "Sun"
--
-- >>> decodeBase32Unpadded "ADQMS==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32Unpadded :: ByteString -> Either T.Text ByteString
decodeBase32Unpadded = fmap (fromChunks . (:[]))
. B32H.decodeBase32Unpadded
. BS.concat
. toChunks
{-# INLINE decodeBase32Unpadded #-}
-- | Decode a padded Base32hex-encoded 'ByteString' value. Input strings are
-- required to be correctly padded, and will be validated prior to decoding
-- to confirm.
--
-- In general, unless padded Base32hex is explicitly required, it is
-- safer to call 'decodeBase32'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32Padded "ADQMS==="
-- Right "Sun"
--
-- >>> decodeBase32Padded "ADQMS"
-- Left "Base32-encoded bytestring requires padding"
--
decodeBase32Padded :: ByteString -> Either T.Text ByteString
decodeBase32Padded = fmap (fromChunks . (:[]))
. B32H.decodeBase32Padded
. BS.concat
. toChunks
{-# INLINE decodeBase32Padded #-}
-- -- | Leniently decode an unpadded Base32hex-encoded 'ByteString'. This function
-- -- will not generate parse errors. If input data contains padding chars,
-- -- then the input will be parsed up until the first pad character.
-- --
-- -- __Note:__ This is not RFC 4648-compliant.
-- --
-- -- === __Examples__:
-- --
-- -- >>> decodeBase32Lenient "PDw_Pj4="
-- -- "<<?>>"
-- --
-- -- >>> decodeBase32Lenient "PDw_%%%$}Pj4"
-- -- "<<?>>"
-- --
-- decodeBase32Lenient :: ByteString -> ByteString
-- decodeBase32Lenient = fromChunks
-- . fmap B32H.decodeBase32Lenient
-- . reChunkN 8
-- . fmap (BS.filter (flip elem "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567="))
-- . toChunks
-- {-# INLINE decodeBase32Lenient #-}
-- | Tell whether a 'ByteString' is Base32hex-encoded.
--
-- === __Examples__:
--
-- >>> isBase32Hex "ADQMS"
-- True
--
-- >>> isBase32Hex "ADQMS==="
-- True
--
-- >>> isBase32Hex "ADQMS=="
-- False
--
isBase32Hex :: ByteString -> Bool
isBase32Hex bs = isValidBase32Hex bs && isRight (decodeBase32 bs)
{-# INLINE isBase32Hex #-}
-- | Tell whether a 'ByteString' is a valid Base32hex format.
--
-- This will not tell you whether or not this is a correct Base32hex representation,
-- only that it conforms to the correct shape. To check whether it is a true
-- Base32hex encoded 'ByteString' value, use 'isBase32Hex'.
--
-- === __Examples__:
--
--
-- >>> isValidBase32Hex "ADQMS"
-- True
--
-- >>> isValidBase32Hex "ADQMS="
-- False
--
-- >>> isValidBase32Hex "ADQMS%"
-- False
--
isValidBase32Hex :: ByteString -> Bool
isValidBase32Hex = go . toChunks
where
go [] = True
go [c] = B32H.isValidBase32Hex c
go (c:cs) = -- note the lack of padding char
BS.all (flip elem "0123456789ABCDEFGHIJKLMNOPQRSTUV") c
&& go cs
{-# INLINE isValidBase32Hex #-}

View file

@ -0,0 +1,212 @@
{-# LANGUAGE Trustworthy #-}
-- |
-- Module : Data.ByteString.Short.Base32
-- Copyright : (c) 2019-2023 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : stable
-- Portability : non-portable
--
-- This module contains 'Data.ByteString.Short.ShortByteString'-valued combinators for
-- implementing the RFC 4648 specification of the Base32
-- encoding format. This includes strictly padded/unpadded decoding
-- variants, as well as internal and external validation for canonicity.
--
module Data.ByteString.Short.Base32
( -- * Encoding
encodeBase32
, encodeBase32'
, encodeBase32Unpadded
, encodeBase32Unpadded'
-- * Decoding
, decodeBase32
, decodeBase32Unpadded
, decodeBase32Padded
--, decodeBase32Lenient
-- * Validation
, isBase32
, isValidBase32
) where
import qualified Data.ByteString.Base32 as B32
import Data.ByteString.Short (ShortByteString, fromShort, toShort)
import Data.Text (Text)
import Data.Text.Short (ShortText)
import Data.Text.Short.Unsafe (fromShortByteStringUnsafe)
-- | Encode a 'ShortByteString' value as a Base32 'Text' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32 "Sun"
-- "KN2W4==="
--
encodeBase32 :: ShortByteString -> ShortText
encodeBase32 = fromShortByteStringUnsafe . encodeBase32'
{-# INLINE encodeBase32 #-}
-- | Encode a 'ShortByteString' as a Base32 'ShortByteString' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32' "Sun"
-- "KN2W4==="
--
encodeBase32' :: ShortByteString -> ShortByteString
encodeBase32' = toShort . B32.encodeBase32' . fromShort
-- | Decode an arbitrarily padded Base32 encoded 'ShortByteString' value. If its length is not a multiple
-- of 4, then padding chars will be added to fill out the input to a multiple of
-- 4 for safe decoding as Base32-encoded values are optionally padded.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32 "KN2W4==="
-- Right "Sun"
--
-- >>> decodeBase32 "KN2W4"
-- Right "Sun"
--
-- >>> decodeBase32 "KN2W==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32 :: ShortByteString -> Either Text ShortByteString
decodeBase32 = fmap toShort . B32.decodeBase32 . fromShort
{-# INLINE decodeBase32 #-}
-- | Encode a 'ShortByteString' value as Base32 'Text' without padding. Note that for Base32,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base32 and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded "Sun"
-- "KN2W4"
--
encodeBase32Unpadded :: ShortByteString -> ShortText
encodeBase32Unpadded = fromShortByteStringUnsafe . encodeBase32Unpadded'
{-# INLINE encodeBase32Unpadded #-}
-- | Encode a 'ShortByteString' value as Base32 without padding. Note that for Base32,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base32 and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "KN2W4"
--
encodeBase32Unpadded' :: ShortByteString -> ShortByteString
encodeBase32Unpadded' = toShort . B32.encodeBase32Unpadded' . fromShort
-- | Decode an unpadded Base32-encoded 'ShortByteString' value. Input strings are
-- required to be unpadded, and will undergo validation prior to decoding to
-- confirm.
--
-- In general, unless unpadded Base32 is explicitly required, it is
-- safer to call 'decodeBase32'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32Unpadded "KN2W4"
-- Right "Sun"
--
-- >>> decodeBase32Unpadded "KN2W4==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32Unpadded :: ShortByteString -> Either Text ShortByteString
decodeBase32Unpadded = fmap toShort . B32.decodeBase32Unpadded . fromShort
{-# INLINE decodeBase32Unpadded #-}
-- | Decode a padded Base32-encoded 'ShortByteString' value. Input strings are
-- required to be correctly padded, and will be validated prior to decoding
-- to confirm.
--
-- In general, unless padded Base32 is explicitly required, it is
-- safer to call 'decodeBase32'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-6 RFC-4648 section 6>
--
-- === __Examples__:
--
-- >>> decodeBase32Padded "KN2W4==="
-- Right "Sun"
--
-- >>> decodeBase32Padded "KN2W4"
-- Left "Base32-encoded bytestring requires padding"
--
decodeBase32Padded :: ShortByteString -> Either Text ShortByteString
decodeBase32Padded = fmap toShort . B32.decodeBase32Padded . fromShort
{-# INLINE decodeBase32Padded #-}
-- -- | Leniently decode an unpadded Base32-encoded 'ShortByteString'. This function
-- -- will not generate parse errors. If input data contains padding chars,
-- -- then the input will be parsed up until the first pad character.
-- --
-- -- __Note:__ This is not RFC 4648-compliant.
-- --
-- -- === __Examples__:
-- --
-- -- >>> decodeBase32Lenient "PDw_Pj4="
-- -- "<<?>>"
-- --
-- -- >>> decodeBase32Lenient "PDw_%%%$}Pj4"
-- -- "<<?>>"
-- --
-- decodeBase32Lenient :: ShortByteString -> ShortByteString
-- decodeBase32Lenient = toShort . B32.decodeBase32Lenient . fromShort
-- {-# INLINE decodeBase32Lenient #-}
-- | Tell whether a 'ShortByteString' is Base32-encoded.
--
-- === __Examples__:
--
-- >>> isBase32 "KN2W4"
-- True
--
-- >>> isBase32 "KN2W4==="
-- True
--
-- >>> isBase32 "KN2W4=="
-- False
--
isBase32 :: ShortByteString -> Bool
isBase32 = B32.isBase32 . fromShort
{-# INLINE isBase32 #-}
-- | Tell whether a 'ShortByteString' is a valid Base32 format.
--
-- This will not tell you whether or not this is a correct Base32 representation,
-- only that it conforms to the correct shape. To check whether it is a true
-- Base32 encoded 'ShortByteString' value, use 'isBase32'.
--
-- === __Examples__:
--
-- >>> isValidBase32 "KN2W4"
-- True
--
-- >>> isValidBase32 "KN2W4="
-- False
--
-- >>> isValidBase32 "KN2W4%"
-- False
--
isValidBase32 :: ShortByteString -> Bool
isValidBase32 = B32.isValidBase32 . fromShort
{-# INLINE isValidBase32 #-}

View file

@ -0,0 +1,210 @@
{-# LANGUAGE Trustworthy #-}
-- |
-- Module : Data.ByteString.Short.Base32.Hex
-- Copyright : (c) 2019-2023 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : stable
-- Portability : non-portable
--
-- This module contains 'Data.ByteString.Short.ShortByteString'-valued combinators for
-- implementing the RFC 4648 specification of the Base32hex
-- encoding format. This includes strictly padded/unpadded and decoding
-- variants, as well as internal and external validation for canonicity.
--
module Data.ByteString.Short.Base32.Hex
( -- * Encoding
encodeBase32
, encodeBase32'
, encodeBase32Unpadded
, encodeBase32Unpadded'
-- * Decoding
, decodeBase32
, decodeBase32Unpadded
, decodeBase32Padded
--, decodeBase32Lenient
-- * Validation
, isBase32Hex
, isValidBase32Hex
) where
import qualified Data.ByteString.Base32.Hex as B32H
import Data.ByteString.Short (ShortByteString, fromShort, toShort)
import Data.Text (Text)
import Data.Text.Short (ShortText)
import Data.Text.Short.Unsafe (fromShortByteStringUnsafe)
-- | Encode a 'ShortByteString' value as a Base32hex 'Text' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32 "Sun"
-- "ADQMS==="
--
encodeBase32 :: ShortByteString -> ShortText
encodeBase32 = fromShortByteStringUnsafe . encodeBase32'
{-# INLINE encodeBase32 #-}
-- | Encode a 'ShortByteString' as a Base32hex 'ShortByteString' value with padding.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32' "Sun"
-- "ADQMS==="
--
encodeBase32' :: ShortByteString -> ShortByteString
encodeBase32' = toShort . B32H.encodeBase32' . fromShort
-- | Decode an arbitrarily padded Base32hex encoded 'ShortByteString' value. If its length is not a multiple
-- of 4, then padding chars will be added to fill out the input to a multiple of
-- 4 for safe decoding as Base32hex-encoded values are optionally padded.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32 "ADQMS==="
-- Right "Sun"
--
-- >>> decodeBase32 "ADQMS"
-- Right "Sun"
--
-- >>> decodeBase32 "ADQM==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32 :: ShortByteString -> Either Text ShortByteString
decodeBase32 = fmap toShort . B32H.decodeBase32 . fromShort
{-# INLINE decodeBase32 #-}
-- | Encode a 'ShortByteString' value as Base32hex 'Text' without padding. Note that for Base32hex,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base32hex and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "ADQMS"
--
encodeBase32Unpadded :: ShortByteString -> ShortText
encodeBase32Unpadded = fromShortByteStringUnsafe . encodeBase32Unpadded'
{-# INLINE encodeBase32Unpadded #-}
-- | Encode a 'ShortByteString' value as Base32hex without padding. Note that for Base32hex,
-- padding is optional. If you call this function, you will simply be encoding
-- as Base32hex and stripping padding chars from the output.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> encodeBase32Unpadded' "Sun"
-- "ADQMS"
--
encodeBase32Unpadded' :: ShortByteString -> ShortByteString
encodeBase32Unpadded' = toShort . B32H.encodeBase32Unpadded' . fromShort
-- | Decode an unpadded Base32hex-encoded 'ShortByteString' value. Input strings are
-- required to be unpadded, and will undergo validation prior to decoding to
-- confirm.
--
-- In general, unless unpadded Base32hex is explicitly required, it is
-- safer to call 'decodeBase32'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32Unpadded "ADQMS"
-- Right "Sun"
--
-- >>> decodeBase32Unpadded "ADQMS==="
-- Left "Base32-encoded bytestring has invalid padding"
--
decodeBase32Unpadded :: ShortByteString -> Either Text ShortByteString
decodeBase32Unpadded = fmap toShort . B32H.decodeBase32Unpadded . fromShort
{-# INLINE decodeBase32Unpadded #-}
-- | Decode a padded Base32hex-encoded 'ShortByteString' value. Input strings are
-- required to be correctly padded, and will be validated prior to decoding
-- to confirm.
--
-- In general, unless padded Base32hex is explicitly required, it is
-- safer to call 'decodeBase32'.
--
-- See: <https://tools.ietf.org/html/rfc4648#section-7 RFC-4648 section 7>
--
-- === __Examples__:
--
-- >>> decodeBase32Padded "ADQMS==="
-- Right "Sun"
--
-- >>> decodeBase32Padded "ADQMS"
-- Left "Base32-encoded bytestring requires padding"
--
decodeBase32Padded :: ShortByteString -> Either Text ShortByteString
decodeBase32Padded = fmap toShort . B32H.decodeBase32Padded . fromShort
{-# INLINE decodeBase32Padded #-}
-- -- | Leniently decode an unpadded Base32hex-encoded 'ShortByteString'. This function
-- -- will not generate parse errors. If input data contains padding chars,
-- -- then the input will be parsed up until the first pad character.
-- --
-- -- __Note:__ This is not RFC 4648-compliant.
-- --
-- -- === __Examples__:
-- --
-- -- >>> decodeBase32Lenient "PDw_Pj4="
-- -- "<<?>>"
-- --
-- -- >>> decodeBase32Lenient "PDw_%%%$}Pj4"
-- -- "<<?>>"
-- --
-- decodeBase32Lenient :: ShortByteString -> ShortByteString
-- decodeBase32Lenient = toShort . B32H.decodeBase32Lenient . fromShort
-- {-# INLINE decodeBase32Lenient #-}
-- | Tell whether a 'ShortByteString' is Base32hex-encoded.
--
-- === __Examples__:
--
-- >>> isBase32Hex "ADQMS"
-- True
--
-- >>> isBase32Hex "ADQMS==="
-- True
--
-- >>> isBase32Hex "ADQMS=="
-- False
--
isBase32Hex :: ShortByteString -> Bool
isBase32Hex = B32H.isBase32Hex . fromShort
{-# INLINE isBase32Hex #-}
-- | Tell whether a 'ShortByteString' is a valid Base32hex format.
--
-- This will not tell you whether or not this is a correct Base32hex representation,
-- only that it conforms to the correct shape. To check whether it is a true
-- Base32 encoded 'ShortByteString' value, use 'isBase32Hex'.
--
-- === __Examples__:
--
-- >>> isValidBase32Hex "ADQMS"
-- True
--
-- >>> isValidBase32Hex "ADQMS="
-- False
--
-- >>> isValidBase32Hex "ADQMS%"
-- False
--
isValidBase32Hex :: ShortByteString -> Bool
isValidBase32Hex = B32H.isValidBase32Hex . fromShort
{-# INLINE isValidBase32Hex #-}