basic key encryption

This commit is contained in:
La Ancapo 2026-01-26 23:09:58 +01:00
parent eff0fb49b6
commit 559900fd3a
31 changed files with 5501 additions and 4 deletions

View file

@ -2,6 +2,20 @@
The aim is to create a trustworthy Stellar transaction signer (and, by necessity, a pretty printer) using only Glasgow Haskell compiler code and Haskell Core libraries, reducing the possible supply chain attack surface.
## Installation and usage
To build and run it, install `cabal-install` and use `cabal run`. The program expects your transaction coming from the standard input, and your private (S...) key residing in `$HOME/~/.stellar-veritas-key`. It will produce the decoded transaction description afterwards, with simple descriptions of the most popular operations. When it is ran from an interactive terminal, it will ask for a confirmation before signing the transaction.
You can put the binary (`cabal list-bin stellar-veritas` to get the path) into your `$PATH` and use it directly.
## Encryption
The project supports key encryption. Encrypting your key protects you from key file theft (cracking a 8-character password is estimated to cost over $1M (in 2026), scales with password complexity and the performance of the computer at the moment of encryption).
To encrypt your key, run `cabal run stellar-veritas -- encrypt` (or just `stellar-veritas encrypt` after installation) and enter your password. Afterwards you'll be prompted your password every time you want to sign anything. `encrypt`ing an already-encrpyted key would result in decryption and re-encryption with new parameters and salt.
## Technical details
The project contains the code of trimmed-down non-core dependencies, mainly cryptographic libraries. To avoid using bundled libraries (to build against the current Hackage), do the same in the `src` directory. To further reduce the amount of code under audit, weeder can be used, although the utility is dubious.
Key encryption is implemented via XOR with Argon2i KDF with an adaptive iteration count.

View file

@ -0,0 +1,157 @@
-- |
-- Module : Crypto.KDF.Argon2
-- License : BSD-style
-- Maintainer : Vincent Hanquez <vincent@snarc.org>
-- Stability : experimental
-- Portability : unknown
--
-- Argon2 hashing function (P-H-C winner)
--
-- Recommended to use this module qualified
--
-- File started from Argon2.hs, from Oliver Charles
-- at https://github.com/ocharles/argon2
--
module Crypto.KDF.Argon2
(
Options(..)
, TimeCost
, MemoryCost
, Parallelism
, Variant(..)
, Version(..)
, defaultOptions
-- * Hashing function
, hash
) where
import Crypto.Internal.ByteArray (ByteArray, ByteArrayAccess)
import qualified Crypto.Internal.ByteArray as B
import Crypto.Error
import Control.Monad (when)
import Data.Word
import Foreign.C
import Foreign.Ptr
-- | Which variant of Argon2 to use. You should choose the variant that is most
-- applicable to your intention to hash inputs.
data Variant =
Argon2d -- ^ Argon2d is faster than Argon2i and uses data-depending memory access,
-- which makes it suitable for cryptocurrencies and applications with no
-- threats from side-channel timing attacks.
| Argon2i -- ^ Argon2i uses data-independent memory access, which is preferred
-- for password hashing and password-based key derivation. Argon2i
-- is slower as it makes more passes over the memory to protect from
-- tradeoff attacks.
| Argon2id -- ^ Argon2id is a hybrid of Argon2i and Argon2d, using a combination
-- of data-depending and data-independent memory accesses, which gives
-- some of Argon2i's resistance to side-channel cache timing attacks
-- and much of Argon2d's resistance to GPU cracking attacks
deriving (Eq,Ord,Read,Show,Enum,Bounded)
-- | Which version of Argon2 to use
data Version = Version10 | Version13
deriving (Eq,Ord,Read,Show,Enum,Bounded)
-- | The time cost, which defines the amount of computation realized and therefore the execution time, given in number of iterations.
--
-- 'FFI.ARGON2_MIN_TIME' <= 'hashIterations' <= 'FFI.ARGON2_MAX_TIME'
type TimeCost = Word32
-- | The memory cost, which defines the memory usage, given in kibibytes.
--
-- max 'FFI.ARGON2_MIN_MEMORY' (8 * 'hashParallelism') <= 'hashMemory' <= 'FFI.ARGON2_MAX_MEMORY'
type MemoryCost = Word32
-- | A parallelism degree, which defines the number of parallel threads.
--
-- 'FFI.ARGON2_MIN_LANES' <= 'hashParallelism' <= 'FFI.ARGON2_MAX_LANES' && 'FFI.ARGON_MIN_THREADS' <= 'hashParallelism' <= 'FFI.ARGON2_MAX_THREADS'
type Parallelism = Word32
-- | Parameters that can be adjusted to change the runtime performance of the
-- hashing.
data Options = Options
{ iterations :: !TimeCost
, memory :: !MemoryCost
, parallelism :: !Parallelism
, variant :: !Variant -- ^ Which variant of Argon2 to use.
, version :: !Version -- ^ Which version of Argon2 to use.
}
deriving (Eq,Ord,Read,Show)
saltMinLength :: Int
saltMinLength = 8
outputMinLength :: Int
outputMinLength = 4
-- specification allows up to 2^32-1 but this is too big for a signed Int
-- on a 32-bit architecture, so we limit tag length to 2^31-1 bytes
outputMaxLength :: Int
outputMaxLength = 0x7fffffff
defaultOptions :: Options
defaultOptions =
Options { iterations = 1
, memory = 2 ^ (17 :: Int)
, parallelism = 4
, variant = Argon2i
, version = Version13
}
hash :: (ByteArrayAccess password, ByteArrayAccess salt, ByteArray out)
=> Options
-> password
-> salt
-> Int
-> CryptoFailable out
hash options password salt outLen
| saltLen < saltMinLength = CryptoFailed CryptoError_SaltTooSmall
| outLen < outputMinLength = CryptoFailed CryptoError_OutputLengthTooSmall
| outLen > outputMaxLength = CryptoFailed CryptoError_OutputLengthTooBig
| otherwise = CryptoPassed $ B.allocAndFreeze outLen $ \out -> do
res <- B.withByteArray password $ \pPass ->
B.withByteArray salt $ \pSalt ->
argon2_hash (iterations options)
(memory options)
(parallelism options)
pPass
(csizeOfInt passwordLen)
pSalt
(csizeOfInt saltLen)
out
(csizeOfInt outLen)
(cOfVariant $ variant options)
(cOfVersion $ version options)
when (res /= 0) $ error "argon2: hash: internal error"
where
saltLen = B.length salt
passwordLen = B.length password
data Pass
data Salt
data HashOut
type CVariant = CInt -- valid value is 0 (Argon2d), 1 (Argon2i) and 2 (Argon2id)
type CVersion = CInt -- valid value is 0x10, 0x13
cOfVersion :: Version -> CVersion
cOfVersion Version10 = 0x10
cOfVersion Version13 = 0x13
cOfVariant :: Variant -> CVariant
cOfVariant Argon2d = 0
cOfVariant Argon2i = 1
cOfVariant Argon2id = 2
csizeOfInt :: Int -> CSize
csizeOfInt = fromIntegral
foreign import ccall unsafe "cryptonite_argon2_hash"
argon2_hash :: Word32 -> Word32 -> Word32
-> Ptr Pass -> CSize
-> Ptr Salt -> CSize
-> Ptr HashOut -> CSize
-> CVariant
-> CVersion
-> IO CInt

View file

@ -0,0 +1,142 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "argon2.h"
#include "core.c"
int cryptonite_argon2_ctx(argon2_context *context, argon2_type type) {
/* 1. Validate all inputs */
int result = validate_inputs(context);
uint32_t memory_blocks, segment_length;
argon2_instance_t instance;
if (ARGON2_OK != result) {
return result;
}
if (Argon2_d != type && Argon2_i != type && Argon2_id != type) {
return ARGON2_INCORRECT_TYPE;
}
/* 2. Align memory size */
/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
memory_blocks = context->m_cost;
if (memory_blocks < 2 * ARGON2_SYNC_POINTS * context->lanes) {
memory_blocks = 2 * ARGON2_SYNC_POINTS * context->lanes;
}
segment_length = memory_blocks / (context->lanes * ARGON2_SYNC_POINTS);
/* Ensure that all segments have equal length */
memory_blocks = segment_length * (context->lanes * ARGON2_SYNC_POINTS);
instance.version = context->version;
instance.memory = NULL;
instance.passes = context->t_cost;
instance.memory_blocks = memory_blocks;
instance.segment_length = segment_length;
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
instance.lanes = context->lanes;
instance.threads = context->threads;
instance.type = type;
/* 3. Initialization: Hashing inputs, allocating memory, filling first
* blocks
*/
result = initialize(&instance, context);
if (ARGON2_OK != result) {
return result;
}
/* 4. Filling memory */
result = fill_memory_blocks(&instance);
if (ARGON2_OK != result) {
return result;
}
/* 5. Finalization */
finalize(context, &instance);
return ARGON2_OK;
}
int cryptonite_argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt, const size_t saltlen,
void *hash, const size_t hashlen, argon2_type type,
const uint32_t version){
argon2_context context;
int result;
uint8_t *out;
if (hashlen > ARGON2_MAX_OUTLEN) {
return ARGON2_OUTPUT_TOO_LONG;
}
if (hashlen < ARGON2_MIN_OUTLEN) {
return ARGON2_OUTPUT_TOO_SHORT;
}
out = malloc(hashlen);
if (!out) {
return ARGON2_MEMORY_ALLOCATION_ERROR;
}
context.out = (uint8_t *)out;
context.outlen = (uint32_t)hashlen;
context.pwd = CONST_CAST(uint8_t *)pwd;
context.pwdlen = (uint32_t)pwdlen;
context.salt = CONST_CAST(uint8_t *)salt;
context.saltlen = (uint32_t)saltlen;
context.secret = NULL;
context.secretlen = 0;
context.ad = NULL;
context.adlen = 0;
context.t_cost = t_cost;
context.m_cost = m_cost;
context.lanes = parallelism;
context.threads = parallelism;
context.allocate_cbk = NULL;
context.free_cbk = NULL;
context.flags = ARGON2_DEFAULT_FLAGS;
context.version = version;
result = cryptonite_argon2_ctx(&context, type);
if (result != ARGON2_OK) {
clear_internal_memory(out, hashlen);
free(out);
return result;
}
/* if raw hash requested, write it */
if (hash) {
memcpy(hash, out, hashlen);
}
clear_internal_memory(out, hashlen);
free(out);
return ARGON2_OK;
}

View file

@ -0,0 +1,267 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
#ifndef ARGON2_H
#define ARGON2_H
#include <stdint.h>
#include <stddef.h>
#include <limits.h>
#if defined(__cplusplus)
extern "C" {
#endif
/* Symbols visibility control */
#ifdef A2_VISCTL
#define ARGON2_PUBLIC __attribute__((visibility("default")))
#elif _MSC_VER
#define ARGON2_PUBLIC __declspec(dllexport)
#else
#define ARGON2_PUBLIC
#endif
/*
* Argon2 input parameter restrictions
*/
/* Minimum and maximum number of lanes (degree of parallelism) */
#define ARGON2_MIN_LANES UINT32_C(1)
#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF)
/* Minimum and maximum number of threads */
#define ARGON2_MIN_THREADS UINT32_C(1)
#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF)
/* Number of synchronization points between lanes per pass */
#define ARGON2_SYNC_POINTS UINT32_C(4)
/* Minimum and maximum digest size in bytes */
#define ARGON2_MIN_OUTLEN UINT32_C(4)
#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF)
/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */
#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */
#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b))
/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */
#define ARGON2_MAX_MEMORY_BITS \
ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1))
#define ARGON2_MAX_MEMORY \
ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS)
/* Minimum and maximum number of passes */
#define ARGON2_MIN_TIME UINT32_C(1)
#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF)
/* Minimum and maximum password length in bytes */
#define ARGON2_MIN_PWD_LENGTH UINT32_C(0)
#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF)
/* Minimum and maximum associated data length in bytes */
#define ARGON2_MIN_AD_LENGTH UINT32_C(0)
#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF)
/* Minimum and maximum salt length in bytes */
#define ARGON2_MIN_SALT_LENGTH UINT32_C(8)
#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF)
/* Minimum and maximum key length in bytes */
#define ARGON2_MIN_SECRET UINT32_C(0)
#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF)
/* Flags to determine which fields are securely wiped (default = no wipe). */
#define ARGON2_DEFAULT_FLAGS UINT32_C(0)
#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0)
#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1)
/* Global flag to determine if we are wiping internal memory buffers. This flag
* is defined in core.c and deafults to 1 (wipe internal memory). */
//extern int FLAG_clear_internal_memory;
/* Error codes */
typedef enum Argon2_ErrorCodes {
ARGON2_OK = 0,
ARGON2_OUTPUT_PTR_NULL = -1,
ARGON2_OUTPUT_TOO_SHORT = -2,
ARGON2_OUTPUT_TOO_LONG = -3,
ARGON2_PWD_TOO_SHORT = -4,
ARGON2_PWD_TOO_LONG = -5,
ARGON2_SALT_TOO_SHORT = -6,
ARGON2_SALT_TOO_LONG = -7,
ARGON2_AD_TOO_SHORT = -8,
ARGON2_AD_TOO_LONG = -9,
ARGON2_SECRET_TOO_SHORT = -10,
ARGON2_SECRET_TOO_LONG = -11,
ARGON2_TIME_TOO_SMALL = -12,
ARGON2_TIME_TOO_LARGE = -13,
ARGON2_MEMORY_TOO_LITTLE = -14,
ARGON2_MEMORY_TOO_MUCH = -15,
ARGON2_LANES_TOO_FEW = -16,
ARGON2_LANES_TOO_MANY = -17,
ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */
ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */
ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */
ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */
ARGON2_MEMORY_ALLOCATION_ERROR = -22,
ARGON2_FREE_MEMORY_CBK_NULL = -23,
ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24,
ARGON2_INCORRECT_PARAMETER = -25,
ARGON2_INCORRECT_TYPE = -26,
ARGON2_OUT_PTR_MISMATCH = -27,
ARGON2_THREADS_TOO_FEW = -28,
ARGON2_THREADS_TOO_MANY = -29,
ARGON2_MISSING_ARGS = -30,
ARGON2_ENCODING_FAIL = -31,
ARGON2_DECODING_FAIL = -32,
ARGON2_THREAD_FAIL = -33,
ARGON2_DECODING_LENGTH_FAIL = -34,
ARGON2_VERIFY_MISMATCH = -35
} argon2_error_codes;
/* Memory allocator types --- for external allocation */
typedef int (*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate);
typedef void (*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate);
/* Argon2 external data structures */
/*
*****
* Context: structure to hold Argon2 inputs:
* output array and its length,
* password and its length,
* salt and its length,
* secret and its length,
* associated data and its length,
* number of passes, amount of used memory (in KBytes, can be rounded up a bit)
* number of parallel threads that will be run.
* All the parameters above affect the output hash value.
* Additionally, two function pointers can be provided to allocate and
* deallocate the memory (if NULL, memory will be allocated internally).
* Also, three flags indicate whether to erase password, secret as soon as they
* are pre-hashed (and thus not needed anymore), and the entire memory
*****
* Simplest situation: you have output array out[8], password is stored in
* pwd[32], salt is stored in salt[16], you do not have keys nor associated
* data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with
* 4 parallel lanes.
* You want to erase the password, but you're OK with last pass not being
* erased. You want to use the default memory allocator.
* Then you initialize:
Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false)
*/
typedef struct Argon2_Context {
uint8_t *out; /* output array */
uint32_t outlen; /* digest length */
uint8_t *pwd; /* password array */
uint32_t pwdlen; /* password length */
uint8_t *salt; /* salt array */
uint32_t saltlen; /* salt length */
uint8_t *secret; /* key array */
uint32_t secretlen; /* key length */
uint8_t *ad; /* associated data array */
uint32_t adlen; /* associated data length */
uint32_t t_cost; /* number of passes */
uint32_t m_cost; /* amount of memory requested (KB) */
uint32_t lanes; /* number of lanes */
uint32_t threads; /* maximum number of threads */
uint32_t version; /* version number */
allocate_fptr allocate_cbk; /* pointer to memory allocator */
deallocate_fptr free_cbk; /* pointer to memory deallocator */
uint32_t flags; /* array of bool options */
} argon2_context;
/* Argon2 primitive type */
typedef enum Argon2_type {
Argon2_d = 0,
Argon2_i = 1,
Argon2_id = 2
} argon2_type;
/* Version of the algorithm */
typedef enum Argon2_version {
ARGON2_VERSION_10 = 0x10,
ARGON2_VERSION_13 = 0x13,
ARGON2_VERSION_NUMBER = ARGON2_VERSION_13
} argon2_version;
/*
* Function that performs memory-hard hashing with certain degree of parallelism
* @param context Pointer to the Argon2 internal structure
* @return Error code if smth is wrong, ARGON2_OK otherwise
*/
ARGON2_PUBLIC int cryptonite_argon2_ctx(argon2_context *context, argon2_type type);
/**
* Hashes a password with Argon2i, producing a raw hash by allocating memory at
* @hash
* @param t_cost Number of iterations
* @param m_cost Sets memory usage to m_cost kibibytes
* @param parallelism Number of threads and compute lanes
* @param pwd Pointer to password
* @param pwdlen Password size in bytes
* @param salt Pointer to salt
* @param saltlen Salt size in bytes
* @param hash Buffer where to write the raw hash - updated by the function
* @param hashlen Desired length of the hash in bytes
* @pre Different parallelism levels will give different results
* @pre Returns ARGON2_OK if successful
*/
/* generic function underlying the above ones */
ARGON2_PUBLIC int cryptonite_argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, void *hash,
const size_t hashlen, argon2_type type,
const uint32_t version);
#if defined(__cplusplus)
}
#endif
#endif

View file

@ -0,0 +1,180 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
#ifndef BLAKE_ROUND_MKA_OPT_H
#define BLAKE_ROUND_MKA_OPT_H
#include "blake2-impl.h"
#include <emmintrin.h>
#if defined(__SSSE3__)
#include <tmmintrin.h> /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
#endif
#if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__))
#include <x86intrin.h>
#endif
#if !defined(__XOP__)
#if defined(__SSSE3__)
#define r16 \
(_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
#define r24 \
(_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
#define _mm_roti_epi64(x, c) \
(-(c) == 32) \
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
: (-(c) == 24) \
? _mm_shuffle_epi8((x), r24) \
: (-(c) == 16) \
? _mm_shuffle_epi8((x), r16) \
: (-(c) == 63) \
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_add_epi64((x), (x))) \
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_slli_epi64((x), 64 - (-(c))))
#else /* defined(__SSE2__) */
#define _mm_roti_epi64(r, c) \
_mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c))))
#endif
#else
#endif
static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
const __m128i z = _mm_mul_epu32(x, y);
return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
}
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = fBlaMka(A0, B0); \
A1 = fBlaMka(A1, B1); \
\
D0 = _mm_xor_si128(D0, A0); \
D1 = _mm_xor_si128(D1, A1); \
\
D0 = _mm_roti_epi64(D0, -32); \
D1 = _mm_roti_epi64(D1, -32); \
\
C0 = fBlaMka(C0, D0); \
C1 = fBlaMka(C1, D1); \
\
B0 = _mm_xor_si128(B0, C0); \
B1 = _mm_xor_si128(B1, C1); \
\
B0 = _mm_roti_epi64(B0, -24); \
B1 = _mm_roti_epi64(B1, -24); \
} while ((void)0, 0)
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = fBlaMka(A0, B0); \
A1 = fBlaMka(A1, B1); \
\
D0 = _mm_xor_si128(D0, A0); \
D1 = _mm_xor_si128(D1, A1); \
\
D0 = _mm_roti_epi64(D0, -16); \
D1 = _mm_roti_epi64(D1, -16); \
\
C0 = fBlaMka(C0, D0); \
C1 = fBlaMka(C1, D1); \
\
B0 = _mm_xor_si128(B0, C0); \
B1 = _mm_xor_si128(B1, C1); \
\
B0 = _mm_roti_epi64(B0, -63); \
B1 = _mm_roti_epi64(B1, -63); \
} while ((void)0, 0)
#if defined(__SSSE3__)
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
__m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
B0 = t0; \
B1 = t1; \
\
t0 = C0; \
C0 = C1; \
C1 = t0; \
\
t0 = _mm_alignr_epi8(D1, D0, 8); \
t1 = _mm_alignr_epi8(D0, D1, 8); \
D0 = t1; \
D1 = t0; \
} while ((void)0, 0)
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
__m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
B0 = t0; \
B1 = t1; \
\
t0 = C0; \
C0 = C1; \
C1 = t0; \
\
t0 = _mm_alignr_epi8(D0, D1, 8); \
t1 = _mm_alignr_epi8(D1, D0, 8); \
D0 = t1; \
D1 = t0; \
} while ((void)0, 0)
#else /* SSE2 */
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m128i t0 = D0; \
__m128i t1 = B0; \
D0 = C0; \
C0 = C1; \
C1 = D0; \
D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \
D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \
B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \
B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \
} while ((void)0, 0)
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m128i t0, t1; \
t0 = C0; \
C0 = C1; \
C1 = t0; \
t0 = B0; \
t1 = D0; \
B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \
B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \
D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \
D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \
} while ((void)0, 0)
#endif
#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
\
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
} while ((void)0, 0)
#endif

View file

@ -0,0 +1,56 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
#ifndef BLAKE_ROUND_MKA_H
#define BLAKE_ROUND_MKA_H
#include "blake2.h"
#include "blake2-impl.h"
/*designed by the Lyra PHC team */
static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
const uint64_t m = UINT64_C(0xFFFFFFFF);
const uint64_t xy = (x & m) * (y & m);
return x + y + 2 * xy;
}
#define G(a, b, c, d) \
do { \
a = fBlaMka(a, b); \
d = rotr64(d ^ a, 32); \
c = fBlaMka(c, d); \
b = rotr64(b ^ c, 24); \
a = fBlaMka(a, b); \
d = rotr64(d ^ a, 16); \
c = fBlaMka(c, d); \
b = rotr64(b ^ c, 63); \
} while ((void)0, 0)
#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \
v12, v13, v14, v15) \
do { \
G(v0, v4, v8, v12); \
G(v1, v5, v9, v13); \
G(v2, v6, v10, v14); \
G(v3, v7, v11, v15); \
G(v0, v5, v10, v15); \
G(v1, v6, v11, v12); \
G(v2, v7, v8, v13); \
G(v3, v4, v9, v14); \
} while ((void)0, 0)
#endif

701
bundled/cbits/argon2/core.c Normal file
View file

@ -0,0 +1,701 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
/*For memory wiping*/
#ifdef _MSC_VER
#include <windows.h>
#include <winbase.h> /* For SecureZeroMemory */
#endif
#if defined __STDC_LIB_EXT1__
#define __STDC_WANT_LIB_EXT1__ 1
#endif
#define VC_GE_2005(version) (version >= 1400)
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "core.h"
#include "thread.c"
#include "blake2.h"
#include "blake2-impl.h"
#ifdef GENKAT
#include "genkat.h"
#endif
#ifdef SUPPORT_SSE
#include "opt.c"
#else
#include "ref.c"
#endif
#if defined(__clang__)
#if __has_attribute(optnone)
#define NOT_OPTIMIZED __attribute__((optnone))
#endif
#elif defined(__GNUC__)
#define GCC_VERSION \
(__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#if GCC_VERSION >= 40400
#define NOT_OPTIMIZED __attribute__((optimize("O0")))
#endif
#endif
#ifndef NOT_OPTIMIZED
#define NOT_OPTIMIZED
#endif
/* Argon2 Team - Begin Code */
static int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
uint8_t *out = (uint8_t *)pout;
blake2b_state blake_state;
uint8_t outlen_bytes[sizeof(uint32_t)] = {0};
int ret = -1;
if (outlen > UINT32_MAX) {
goto fail;
}
/* Ensure little-endian byte order! */
store32(outlen_bytes, (uint32_t)outlen);
#define TRY(statement) \
do { \
ret = statement; \
if (ret < 0) { \
goto fail; \
} \
} while ((void)0, 0)
if (outlen <= BLAKE2B_OUTBYTES) {
TRY(_cryptonite_blake2b_init(&blake_state, outlen));
TRY(_cryptonite_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
TRY(_cryptonite_blake2b_update(&blake_state, in, inlen));
TRY(_cryptonite_blake2b_final(&blake_state, out, outlen));
} else {
uint32_t toproduce;
uint8_t out_buffer[BLAKE2B_OUTBYTES];
uint8_t in_buffer[BLAKE2B_OUTBYTES];
TRY(_cryptonite_blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
TRY(_cryptonite_blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
TRY(_cryptonite_blake2b_update(&blake_state, in, inlen));
TRY(_cryptonite_blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
out += BLAKE2B_OUTBYTES / 2;
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
while (toproduce > BLAKE2B_OUTBYTES) {
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
TRY(_cryptonite_blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
BLAKE2B_OUTBYTES, NULL, 0));
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
out += BLAKE2B_OUTBYTES / 2;
toproduce -= BLAKE2B_OUTBYTES / 2;
}
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
TRY(_cryptonite_blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
0));
memcpy(out, out_buffer, toproduce);
}
fail:
clear_internal_memory(&blake_state, sizeof(blake_state));
return ret;
#undef TRY
}
/* Argon2 Team - End Code */
/***************Instance and Position constructors**********/
static void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); }
static void copy_block(block *dst, const block *src) {
memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
}
static void xor_block(block *dst, const block *src) {
int i;
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
dst->v[i] ^= src->v[i];
}
}
static void load_block(block *dst, const void *input) {
unsigned i;
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i]));
}
}
static void store_block(void *output, const block *src) {
unsigned i;
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]);
}
}
/***************Memory functions*****************/
static
int allocate_memory(const argon2_context *context, uint8_t **memory,
size_t num, size_t size) {
size_t memory_size = num*size;
if (memory == NULL) {
return ARGON2_MEMORY_ALLOCATION_ERROR;
}
/* 1. Check for multiplication overflow */
if (size != 0 && memory_size / size != num) {
return ARGON2_MEMORY_ALLOCATION_ERROR;
}
/* 2. Try to allocate with appropriate allocator */
if (context->allocate_cbk) {
(context->allocate_cbk)(memory, memory_size);
} else {
*memory = malloc(memory_size);
}
if (*memory == NULL) {
return ARGON2_MEMORY_ALLOCATION_ERROR;
}
return ARGON2_OK;
}
static
void free_memory(const argon2_context *context, uint8_t *memory,
size_t num, size_t size) {
size_t memory_size = num*size;
clear_internal_memory(memory, memory_size);
if (context->free_cbk) {
(context->free_cbk)(memory, memory_size);
} else {
free(memory);
}
}
void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) {
#if defined(_MSC_VER) && VC_GE_2005(_MSC_VER)
SecureZeroMemory(v, n);
#elif defined memset_s
memset_s(v, n, 0, n);
#elif defined(__OpenBSD__)
explicit_bzero(v, n);
#else
static void *(*const volatile memset_sec)(void *, int, size_t) = &memset;
memset_sec(v, 0, n);
#endif
}
/* Memory clear flag defaults to true. */
static int FLAG_clear_internal_memory = 1;
static void clear_internal_memory(void *v, size_t n) {
if (FLAG_clear_internal_memory && v) {
secure_wipe_memory(v, n);
}
}
static void finalize(const argon2_context *context, argon2_instance_t *instance) {
if (context != NULL && instance != NULL) {
block blockhash;
uint32_t l;
copy_block(&blockhash, instance->memory + instance->lane_length - 1);
/* XOR the last blocks */
for (l = 1; l < instance->lanes; ++l) {
uint32_t last_block_in_lane =
l * instance->lane_length + (instance->lane_length - 1);
xor_block(&blockhash, instance->memory + last_block_in_lane);
}
/* Hash the result */
{
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
store_block(blockhash_bytes, &blockhash);
blake2b_long(context->out, context->outlen, blockhash_bytes,
ARGON2_BLOCK_SIZE);
/* clear blockhash and blockhash_bytes */
clear_internal_memory(blockhash.v, ARGON2_BLOCK_SIZE);
clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
}
#ifdef GENKAT
print_tag(context->out, context->outlen);
#endif
free_memory(context, (uint8_t *)instance->memory,
instance->memory_blocks, sizeof(block));
}
}
static uint32_t index_alpha(const argon2_instance_t *instance,
const argon2_position_t *position, uint32_t pseudo_rand,
int same_lane) {
/*
* Pass 0:
* This lane : all already finished segments plus already constructed
* blocks in this segment
* Other lanes : all already finished segments
* Pass 1+:
* This lane : (SYNC_POINTS - 1) last segments plus already constructed
* blocks in this segment
* Other lanes : (SYNC_POINTS - 1) last segments
*/
uint32_t reference_area_size;
uint64_t relative_position;
uint32_t start_position, absolute_position;
if (0 == position->pass) {
/* First pass */
if (0 == position->slice) {
/* First slice */
reference_area_size =
position->index - 1; /* all but the previous */
} else {
if (same_lane) {
/* The same lane => add current segment */
reference_area_size =
position->slice * instance->segment_length +
position->index - 1;
} else {
reference_area_size =
position->slice * instance->segment_length +
((position->index == 0) ? (-1) : 0);
}
}
} else {
/* Second pass */
if (same_lane) {
reference_area_size = instance->lane_length -
instance->segment_length + position->index -
1;
} else {
reference_area_size = instance->lane_length -
instance->segment_length +
((position->index == 0) ? (-1) : 0);
}
}
/* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
* relative position */
relative_position = pseudo_rand;
relative_position = relative_position * relative_position >> 32;
relative_position = reference_area_size - 1 -
(reference_area_size * relative_position >> 32);
/* 1.2.5 Computing starting position */
start_position = 0;
if (0 != position->pass) {
start_position = (position->slice == ARGON2_SYNC_POINTS - 1)
? 0
: (position->slice + 1) * instance->segment_length;
}
/* 1.2.6. Computing absolute position */
absolute_position = (start_position + relative_position) %
instance->lane_length; /* absolute position */
return absolute_position;
}
/* Single-threaded version for p=1 case */
static int fill_memory_blocks_st(argon2_instance_t *instance) {
uint32_t r, s, l;
for (r = 0; r < instance->passes; ++r) {
for (s = 0; s < ARGON2_SYNC_POINTS; ++s) {
for (l = 0; l < instance->lanes; ++l) {
argon2_position_t position = {r, l, (uint8_t)s, 0};
fill_segment(instance, position);
}
}
#ifdef GENKAT
internal_kat(instance, r); /* Print all memory blocks */
#endif
}
return ARGON2_OK;
}
#if !defined(ARGON2_NO_THREADS)
#ifdef _WIN32
static unsigned __stdcall fill_segment_thr(void *thread_data)
#else
static void *fill_segment_thr(void *thread_data)
#endif
{
argon2_thread_data *my_data = thread_data;
fill_segment(my_data->instance_ptr, my_data->pos);
argon2_thread_exit();
return 0;
}
/* Multi-threaded version for p > 1 case */
static int fill_memory_blocks_mt(argon2_instance_t *instance) {
uint32_t r, s;
argon2_thread_handle_t *thread = NULL;
argon2_thread_data *thr_data = NULL;
int rc = ARGON2_OK;
/* 1. Allocating space for threads */
thread = calloc(instance->lanes, sizeof(argon2_thread_handle_t));
if (thread == NULL) {
rc = ARGON2_MEMORY_ALLOCATION_ERROR;
goto fail;
}
thr_data = calloc(instance->lanes, sizeof(argon2_thread_data));
if (thr_data == NULL) {
rc = ARGON2_MEMORY_ALLOCATION_ERROR;
goto fail;
}
for (r = 0; r < instance->passes; ++r) {
for (s = 0; s < ARGON2_SYNC_POINTS; ++s) {
uint32_t l;
/* 2. Calling threads */
for (l = 0; l < instance->lanes; ++l) {
argon2_position_t position;
/* 2.1 Join a thread if limit is exceeded */
if (l >= instance->threads) {
if (argon2_thread_join(thread[l - instance->threads])) {
rc = ARGON2_THREAD_FAIL;
goto fail;
}
}
/* 2.2 Create thread */
position.pass = r;
position.lane = l;
position.slice = (uint8_t)s;
position.index = 0;
thr_data[l].instance_ptr =
instance; /* preparing the thread input */
memcpy(&(thr_data[l].pos), &position,
sizeof(argon2_position_t));
if (argon2_thread_create(&thread[l], &fill_segment_thr,
(void *)&thr_data[l])) {
rc = ARGON2_THREAD_FAIL;
goto fail;
}
/* fill_segment(instance, position); */
/*Non-thread equivalent of the lines above */
}
/* 3. Joining remaining threads */
for (l = instance->lanes - instance->threads; l < instance->lanes;
++l) {
if (argon2_thread_join(thread[l])) {
rc = ARGON2_THREAD_FAIL;
goto fail;
}
}
}
#ifdef GENKAT
internal_kat(instance, r); /* Print all memory blocks */
#endif
}
fail:
if (thread != NULL) {
free(thread);
}
if (thr_data != NULL) {
free(thr_data);
}
return rc;
}
#endif /* ARGON2_NO_THREADS */
static
int fill_memory_blocks(argon2_instance_t *instance) {
if (instance == NULL || instance->lanes == 0) {
return ARGON2_INCORRECT_PARAMETER;
}
#if defined(ARGON2_NO_THREADS)
return fill_memory_blocks_st(instance);
#else
return instance->threads == 1 ?
fill_memory_blocks_st(instance) : fill_memory_blocks_mt(instance);
#endif
}
static
int validate_inputs(const argon2_context *context) {
if (NULL == context) {
return ARGON2_INCORRECT_PARAMETER;
}
if (NULL == context->out) {
return ARGON2_OUTPUT_PTR_NULL;
}
/* Validate output length */
if (ARGON2_MIN_OUTLEN > context->outlen) {
return ARGON2_OUTPUT_TOO_SHORT;
}
if (ARGON2_MAX_OUTLEN < context->outlen) {
return ARGON2_OUTPUT_TOO_LONG;
}
/* Validate password (required param) */
if (NULL == context->pwd) {
if (0 != context->pwdlen) {
return ARGON2_PWD_PTR_MISMATCH;
}
}
if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) {
return ARGON2_PWD_TOO_SHORT;
}
if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) {
return ARGON2_PWD_TOO_LONG;
}
/* Validate salt (required param) */
if (NULL == context->salt) {
if (0 != context->saltlen) {
return ARGON2_SALT_PTR_MISMATCH;
}
}
if (ARGON2_MIN_SALT_LENGTH > context->saltlen) {
return ARGON2_SALT_TOO_SHORT;
}
if (ARGON2_MAX_SALT_LENGTH < context->saltlen) {
return ARGON2_SALT_TOO_LONG;
}
/* Validate secret (optional param) */
if (NULL == context->secret) {
if (0 != context->secretlen) {
return ARGON2_SECRET_PTR_MISMATCH;
}
} else {
if (ARGON2_MIN_SECRET > context->secretlen) {
return ARGON2_SECRET_TOO_SHORT;
}
if (ARGON2_MAX_SECRET < context->secretlen) {
return ARGON2_SECRET_TOO_LONG;
}
}
/* Validate associated data (optional param) */
if (NULL == context->ad) {
if (0 != context->adlen) {
return ARGON2_AD_PTR_MISMATCH;
}
} else {
if (ARGON2_MIN_AD_LENGTH > context->adlen) {
return ARGON2_AD_TOO_SHORT;
}
if (ARGON2_MAX_AD_LENGTH < context->adlen) {
return ARGON2_AD_TOO_LONG;
}
}
/* Validate memory cost */
if (ARGON2_MIN_MEMORY > context->m_cost) {
return ARGON2_MEMORY_TOO_LITTLE;
}
if (ARGON2_MAX_MEMORY < context->m_cost) {
return ARGON2_MEMORY_TOO_MUCH;
}
if (context->m_cost < 8 * context->lanes) {
return ARGON2_MEMORY_TOO_LITTLE;
}
/* Validate time cost */
if (ARGON2_MIN_TIME > context->t_cost) {
return ARGON2_TIME_TOO_SMALL;
}
if (ARGON2_MAX_TIME < context->t_cost) {
return ARGON2_TIME_TOO_LARGE;
}
/* Validate lanes */
if (ARGON2_MIN_LANES > context->lanes) {
return ARGON2_LANES_TOO_FEW;
}
if (ARGON2_MAX_LANES < context->lanes) {
return ARGON2_LANES_TOO_MANY;
}
/* Validate threads */
if (ARGON2_MIN_THREADS > context->threads) {
return ARGON2_THREADS_TOO_FEW;
}
if (ARGON2_MAX_THREADS < context->threads) {
return ARGON2_THREADS_TOO_MANY;
}
if (NULL != context->allocate_cbk && NULL == context->free_cbk) {
return ARGON2_FREE_MEMORY_CBK_NULL;
}
if (NULL == context->allocate_cbk && NULL != context->free_cbk) {
return ARGON2_ALLOCATE_MEMORY_CBK_NULL;
}
return ARGON2_OK;
}
static
void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) {
uint32_t l;
/* Make the first and second block in each lane as G(H0||i||0) or
G(H0||i||1) */
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
for (l = 0; l < instance->lanes; ++l) {
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
ARGON2_PREHASH_SEED_LENGTH);
load_block(&instance->memory[l * instance->lane_length + 0],
blockhash_bytes);
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
ARGON2_PREHASH_SEED_LENGTH);
load_block(&instance->memory[l * instance->lane_length + 1],
blockhash_bytes);
}
clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
}
static
void initial_hash(uint8_t *blockhash, argon2_context *context,
argon2_type type) {
blake2b_state BlakeHash;
uint8_t value[sizeof(uint32_t)];
if (NULL == context || NULL == blockhash) {
return;
}
_cryptonite_blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
store32(&value, context->lanes);
_cryptonite_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, context->outlen);
_cryptonite_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, context->m_cost);
_cryptonite_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, context->t_cost);
_cryptonite_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, context->version);
_cryptonite_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, (uint32_t)type);
_cryptonite_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, context->pwdlen);
_cryptonite_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
if (context->pwd != NULL) {
_cryptonite_blake2b_update(&BlakeHash, (const uint8_t *)context->pwd,
context->pwdlen);
if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) {
secure_wipe_memory(context->pwd, context->pwdlen);
context->pwdlen = 0;
}
}
store32(&value, context->saltlen);
_cryptonite_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
if (context->salt != NULL) {
_cryptonite_blake2b_update(&BlakeHash, (const uint8_t *)context->salt,
context->saltlen);
}
store32(&value, context->secretlen);
_cryptonite_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
if (context->secret != NULL) {
_cryptonite_blake2b_update(&BlakeHash, (const uint8_t *)context->secret,
context->secretlen);
if (context->flags & ARGON2_FLAG_CLEAR_SECRET) {
secure_wipe_memory(context->secret, context->secretlen);
context->secretlen = 0;
}
}
store32(&value, context->adlen);
_cryptonite_blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
if (context->ad != NULL) {
_cryptonite_blake2b_update(&BlakeHash, (const uint8_t *)context->ad,
context->adlen);
}
_cryptonite_blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
}
static
int initialize(argon2_instance_t *instance, argon2_context *context) {
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
int result = ARGON2_OK;
if (instance == NULL || context == NULL)
return ARGON2_INCORRECT_PARAMETER;
instance->context_ptr = context;
/* 1. Memory allocation */
result = allocate_memory(context, (uint8_t **)&(instance->memory),
instance->memory_blocks, sizeof(block));
if (result != ARGON2_OK) {
return result;
}
/* 2. Initial hashing */
/* H_0 + 8 extra bytes to produce the first blocks */
/* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */
/* Hashing all inputs */
initial_hash(blockhash, context, instance->type);
/* Zeroing 8 extra bytes */
clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
ARGON2_PREHASH_SEED_LENGTH -
ARGON2_PREHASH_DIGEST_LENGTH);
#ifdef GENKAT
initial_kat(blockhash, context, instance->type);
#endif
/* 3. Creating first blocks, we always have at least two blocks in a slice
*/
fill_first_blocks(blockhash, instance);
/* Clearing the hash */
clear_internal_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH);
return ARGON2_OK;
}

234
bundled/cbits/argon2/core.h Normal file
View file

@ -0,0 +1,234 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
#ifndef ARGON2_CORE_H
#define ARGON2_CORE_H
#include "argon2.h"
#if defined(_MSC_VER)
#define ALIGN(n) __declspec(align(16))
#elif defined(__GNUC__) || defined(__clang)
#define ALIGN(x) __attribute__((__aligned__(x)))
#else
#define ALIGN(x)
#endif
#define CONST_CAST(x) (x)(uintptr_t)
/**********************Argon2 internal constants*******************************/
enum argon2_core_constants {
/* Memory block size in bytes */
ARGON2_BLOCK_SIZE = 1024,
ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8,
ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16,
/* Number of pseudo-random values generated by one call to Blake in Argon2i
to
generate reference block positions */
ARGON2_ADDRESSES_IN_BLOCK = 128,
/* Pre-hashing digest length and its extension*/
ARGON2_PREHASH_DIGEST_LENGTH = 64,
ARGON2_PREHASH_SEED_LENGTH = 72
};
/*************************Argon2 internal data types***********************/
/*
* Structure for the (1KB) memory block implemented as 128 64-bit words.
* Memory blocks can be copied, XORed. Internal words can be accessed by [] (no
* bounds checking).
*/
typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block;
/*****************Functions that work with the block******************/
/* Initialize each byte of the block with @in */
static void init_block_value(block *b, uint8_t in);
/* Copy block @src to block @dst */
static void copy_block(block *dst, const block *src);
/* XOR @src onto @dst bytewise */
static void xor_block(block *dst, const block *src);
/*
* Argon2 instance: memory pointer, number of passes, amount of memory, type,
* and derived values.
* Used to evaluate the number and location of blocks to construct in each
* thread
*/
typedef struct Argon2_instance_t {
block *memory; /* Memory pointer */
uint32_t version;
uint32_t passes; /* Number of passes */
uint32_t memory_blocks; /* Number of blocks in memory */
uint32_t segment_length;
uint32_t lane_length;
uint32_t lanes;
uint32_t threads;
argon2_type type;
int print_internals; /* whether to print the memory blocks */
argon2_context *context_ptr; /* points back to original context */
} argon2_instance_t;
/*
* Argon2 position: where we construct the block right now. Used to distribute
* work between threads.
*/
typedef struct Argon2_position_t {
uint32_t pass;
uint32_t lane;
uint8_t slice;
uint32_t index;
} argon2_position_t;
/*Struct that holds the inputs for thread handling FillSegment*/
typedef struct Argon2_thread_data {
argon2_instance_t *instance_ptr;
argon2_position_t pos;
} argon2_thread_data;
/*************************Argon2 core functions********************************/
/* Allocates memory to the given pointer, uses the appropriate allocator as
* specified in the context. Total allocated memory is num*size.
* @param context argon2_context which specifies the allocator
* @param memory pointer to the pointer to the memory
* @param size the size in bytes for each element to be allocated
* @param num the number of elements to be allocated
* @return ARGON2_OK if @memory is a valid pointer and memory is allocated
*/
static int allocate_memory(const argon2_context *context, uint8_t **memory,
size_t num, size_t size);
/*
* Frees memory at the given pointer, uses the appropriate deallocator as
* specified in the context. Also cleans the memory using clear_internal_memory.
* @param context argon2_context which specifies the deallocator
* @param memory pointer to buffer to be freed
* @param size the size in bytes for each element to be deallocated
* @param num the number of elements to be deallocated
*/
static void free_memory(const argon2_context *context, uint8_t *memory,
size_t num, size_t size);
/* Function that securely cleans the memory. This ignores any flags set
* regarding clearing memory. Usually one just calls clear_internal_memory.
* @param mem Pointer to the memory
* @param s Memory size in bytes
*/
static void secure_wipe_memory(void *v, size_t n);
/* Function that securely clears the memory if FLAG_clear_internal_memory is
* set. If the flag isn't set, this function does nothing.
* @param mem Pointer to the memory
* @param s Memory size in bytes
*/
static void clear_internal_memory(void *v, size_t n);
/*
* Computes absolute position of reference block in the lane following a skewed
* distribution and using a pseudo-random value as input
* @param instance Pointer to the current instance
* @param position Pointer to the current position
* @param pseudo_rand 32-bit pseudo-random value used to determine the position
* @param same_lane Indicates if the block will be taken from the current lane.
* If so we can reference the current segment
* @pre All pointers must be valid
*/
static uint32_t index_alpha(const argon2_instance_t *instance,
const argon2_position_t *position, uint32_t pseudo_rand,
int same_lane);
/*
* Function that validates all inputs against predefined restrictions and return
* an error code
* @param context Pointer to current Argon2 context
* @return ARGON2_OK if everything is all right, otherwise one of error codes
* (all defined in <argon2.h>
*/
static int validate_inputs(const argon2_context *context);
/*
* Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears
* password and secret if needed
* @param context Pointer to the Argon2 internal structure containing memory
* pointer, and parameters for time and space requirements.
* @param blockhash Buffer for pre-hashing digest
* @param type Argon2 type
* @pre @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes
* allocated
*/
static void initial_hash(uint8_t *blockhash, argon2_context *context,
argon2_type type);
/*
* Function creates first 2 blocks per lane
* @param instance Pointer to the current instance
* @param blockhash Pointer to the pre-hashing digest
* @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values
*/
static void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
/*
* Function allocates memory, hashes the inputs with Blake, and creates first
* two blocks. Returns the pointer to the main memory with 2 blocks per lane
* initialized
* @param context Pointer to the Argon2 internal structure containing memory
* pointer, and parameters for time and space requirements.
* @param instance Current Argon2 instance
* @return Zero if successful, -1 if memory failed to allocate. @context->state
* will be modified if successful.
*/
static int initialize(argon2_instance_t *instance, argon2_context *context);
/*
* XORing the last block of each lane, hashing it, making the tag. Deallocates
* the memory.
* @param context Pointer to current Argon2 context (use only the out parameters
* from it)
* @param instance Pointer to current instance of Argon2
* @pre instance->state must point to necessary amount of memory
* @pre context->out must point to outlen bytes of memory
* @pre if context->free_cbk is not NULL, it should point to a function that
* deallocates memory
*/
static void finalize(const argon2_context *context, argon2_instance_t *instance);
/*
* Function that fills the segment using previous segments also from other
* threads
* @param context current context
* @param instance Pointer to the current instance
* @param position Current position
* @pre all block pointers must be valid
*/
static void fill_segment(const argon2_instance_t *instance,
argon2_position_t position);
/*
* Function that fills the entire memory t_cost times based on the first two
* blocks in each lane
* @param instance Pointer to the current instance
* @return ARGON2_OK if successful, @context->state
*/
static int fill_memory_blocks(argon2_instance_t *instance);
#endif

186
bundled/cbits/argon2/opt.c Normal file
View file

@ -0,0 +1,186 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include "argon2.h"
#include "opt.h"
#include "blake2/blake2.h"
#include "blake2/blamka-round-opt.h"
static void fill_block(__m128i *state, const block *ref_block, block *next_block,
int with_xor) {
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
unsigned int i;
if (with_xor) {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
state[i] = _mm_xor_si128(
state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
block_XY[i] = _mm_xor_si128(
state[i], _mm_loadu_si128((const __m128i *)next_block->v + i));
}
} else {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
block_XY[i] = state[i] = _mm_xor_si128(
state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
}
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
state[8 * i + 6], state[8 * i + 7]);
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
state[8 * 6 + i], state[8 * 7 + i]);
}
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
state[i] = _mm_xor_si128(state[i], block_XY[i]);
_mm_storeu_si128((__m128i *)next_block->v + i, state[i]);
}
}
static void next_addresses(block *address_block, block *input_block) {
/*Temporary zero-initialized blocks*/
__m128i zero_block[ARGON2_OWORDS_IN_BLOCK];
__m128i zero2_block[ARGON2_OWORDS_IN_BLOCK];
memset(zero_block, 0, sizeof(zero_block));
memset(zero2_block, 0, sizeof(zero2_block));
/*Increasing index counter*/
input_block->v[6]++;
/*First iteration of G*/
fill_block(zero_block, input_block, address_block, 0);
/*Second iteration of G*/
fill_block(zero2_block, address_block, address_block, 0);
}
static void fill_segment(const argon2_instance_t *instance,
argon2_position_t position) {
block *ref_block = NULL, *curr_block = NULL;
block address_block, input_block;
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index, i;
__m128i state[64];
int data_independent_addressing;
if (instance == NULL) {
return;
}
data_independent_addressing =
(instance->type == Argon2_i) ||
(instance->type == Argon2_id && (position.pass == 0) &&
(position.slice < ARGON2_SYNC_POINTS / 2));
if (data_independent_addressing) {
init_block_value(&input_block, 0);
input_block.v[0] = position.pass;
input_block.v[1] = position.lane;
input_block.v[2] = position.slice;
input_block.v[3] = instance->memory_blocks;
input_block.v[4] = instance->passes;
input_block.v[5] = instance->type;
}
starting_index = 0;
if ((0 == position.pass) && (0 == position.slice)) {
starting_index = 2; /* we have already generated the first two blocks */
/* Don't forget to generate the first block of addresses: */
if (data_independent_addressing) {
next_addresses(&address_block, &input_block);
}
}
/* Offset of the current block */
curr_offset = position.lane * instance->lane_length +
position.slice * instance->segment_length + starting_index;
if (0 == curr_offset % instance->lane_length) {
/* Last block in this lane */
prev_offset = curr_offset + instance->lane_length - 1;
} else {
/* Previous block */
prev_offset = curr_offset - 1;
}
memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
for (i = starting_index; i < instance->segment_length;
++i, ++curr_offset, ++prev_offset) {
/*1.1 Rotating prev_offset if needed */
if (curr_offset % instance->lane_length == 1) {
prev_offset = curr_offset - 1;
}
/* 1.2 Computing the index of the reference block */
/* 1.2.1 Taking pseudo-random value from the previous block */
if (data_independent_addressing) {
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
next_addresses(&address_block, &input_block);
}
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
} else {
pseudo_rand = instance->memory[prev_offset].v[0];
}
/* 1.2.2 Computing the lane of the reference block */
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
if ((position.pass == 0) && (position.slice == 0)) {
/* Can not reference other lanes yet */
ref_lane = position.lane;
}
/* 1.2.3 Computing the number of possible reference block within the
* lane.
*/
position.index = i;
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
ref_lane == position.lane);
/* 2 Creating a new block */
ref_block =
instance->memory + instance->lane_length * ref_lane + ref_index;
curr_block = instance->memory + curr_offset;
if (ARGON2_VERSION_10 == instance->version) {
/* version 1.2.1 and earlier: overwrite, not XOR */
fill_block(state, ref_block, curr_block, 0);
} else {
if(0 == position.pass) {
fill_block(state, ref_block, curr_block, 0);
} else {
fill_block(state, ref_block, curr_block, 1);
}
}
}
}

View file

@ -0,0 +1,35 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
#ifndef ARGON2_OPT_H
#define ARGON2_OPT_H
#include "core.h"
#include <emmintrin.h>
/*
* Function fills a new memory block and optionally XORs the old block over the new one.
* Memory must be initialized.
* @param state Pointer to the just produced block. Content will be updated(!)
* @param ref_block Pointer to the reference block
* @param next_block Pointer to the block to be XORed over. May coincide with @ref_block
* @param with_xor Whether to XOR into the new block (1) or just overwrite (0)
* @pre all block pointers must be valid
*/
static void fill_block(__m128i *s, const block *ref_block, block *next_block, int with_xor);
#endif /* ARGON2_OPT_H */

185
bundled/cbits/argon2/ref.c Normal file
View file

@ -0,0 +1,185 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include "argon2.h"
#include "ref.h"
#include "blamka-round-ref.h"
#include "blake2-impl.h"
#include "blake2.h"
static void fill_block(const block *prev_block, const block *ref_block,
block *next_block, int with_xor) {
block blockR, block_tmp;
unsigned i;
copy_block(&blockR, ref_block);
xor_block(&blockR, prev_block);
copy_block(&block_tmp, &blockR);
/* Now blockR = ref_block + prev_block and block_tmp = ref_block + prev_block */
if (with_xor) {
/* Saving the next block contents for XOR over: */
xor_block(&block_tmp, next_block);
/* Now blockR = ref_block + prev_block and
block_tmp = ref_block + prev_block + next_block */
}
/* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then
(16,17,..31)... finally (112,113,...127) */
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND_NOMSG(
blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
blockR.v[16 * i + 15]);
}
/* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
(2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */
for (i = 0; i < 8; i++) {
BLAKE2_ROUND_NOMSG(
blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
blockR.v[2 * i + 113]);
}
copy_block(next_block, &block_tmp);
xor_block(next_block, &blockR);
}
static void next_addresses(block *address_block, block *input_block,
const block *zero_block) {
input_block->v[6]++;
fill_block(zero_block, input_block, address_block, 0);
fill_block(zero_block, address_block, address_block, 0);
}
static void fill_segment(const argon2_instance_t *instance,
argon2_position_t position) {
block *ref_block = NULL, *curr_block = NULL;
block address_block, input_block, zero_block;
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index;
uint32_t i;
int data_independent_addressing;
if (instance == NULL) {
return;
}
data_independent_addressing =
(instance->type == Argon2_i) ||
(instance->type == Argon2_id && (position.pass == 0) &&
(position.slice < ARGON2_SYNC_POINTS / 2));
if (data_independent_addressing) {
init_block_value(&zero_block, 0);
init_block_value(&input_block, 0);
input_block.v[0] = position.pass;
input_block.v[1] = position.lane;
input_block.v[2] = position.slice;
input_block.v[3] = instance->memory_blocks;
input_block.v[4] = instance->passes;
input_block.v[5] = instance->type;
}
starting_index = 0;
if ((0 == position.pass) && (0 == position.slice)) {
starting_index = 2; /* we have already generated the first two blocks */
/* Don't forget to generate the first block of addresses: */
if (data_independent_addressing) {
next_addresses(&address_block, &input_block, &zero_block);
}
}
/* Offset of the current block */
curr_offset = position.lane * instance->lane_length +
position.slice * instance->segment_length + starting_index;
if (0 == curr_offset % instance->lane_length) {
/* Last block in this lane */
prev_offset = curr_offset + instance->lane_length - 1;
} else {
/* Previous block */
prev_offset = curr_offset - 1;
}
for (i = starting_index; i < instance->segment_length;
++i, ++curr_offset, ++prev_offset) {
/*1.1 Rotating prev_offset if needed */
if (curr_offset % instance->lane_length == 1) {
prev_offset = curr_offset - 1;
}
/* 1.2 Computing the index of the reference block */
/* 1.2.1 Taking pseudo-random value from the previous block */
if (data_independent_addressing) {
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
next_addresses(&address_block, &input_block, &zero_block);
}
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
} else {
pseudo_rand = instance->memory[prev_offset].v[0];
}
/* 1.2.2 Computing the lane of the reference block */
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
if ((position.pass == 0) && (position.slice == 0)) {
/* Can not reference other lanes yet */
ref_lane = position.lane;
}
/* 1.2.3 Computing the number of possible reference block within the
* lane.
*/
position.index = i;
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
ref_lane == position.lane);
/* 2 Creating a new block */
ref_block =
instance->memory + instance->lane_length * ref_lane + ref_index;
curr_block = instance->memory + curr_offset;
if (ARGON2_VERSION_10 == instance->version) {
/* version 1.2.1 and earlier: overwrite, not XOR */
fill_block(instance->memory + prev_offset, ref_block, curr_block, 0);
} else {
if(0 == position.pass) {
fill_block(instance->memory + prev_offset, ref_block,
curr_block, 0);
} else {
fill_block(instance->memory + prev_offset, ref_block,
curr_block, 1);
}
}
}
}

View file

@ -0,0 +1,35 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
#ifndef ARGON2_REF_H
#define ARGON2_REF_H
#include "core.h"
/*
* Function fills a new memory block and optionally XORs the old block over the new one.
* @next_block must be initialized.
* @param prev_block Pointer to the previous block
* @param ref_block Pointer to the reference block
* @param next_block Pointer to the block to be constructed
* @param with_xor Whether to XOR into the new block (1) or just overwrite (0)
* @pre all block pointers must be valid
*/
static void fill_block(const block *prev_block, const block *ref_block,
block *next_block, int with_xor);
#endif /* ARGON2_REF_H */

View file

@ -0,0 +1,57 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
#if !defined(ARGON2_NO_THREADS)
#include "thread.h"
#if defined(_WIN32)
#include <windows.h>
#endif
static int argon2_thread_create(argon2_thread_handle_t *handle,
argon2_thread_func_t func, void *args) {
if (NULL == handle || func == NULL) {
return -1;
}
#if defined(_WIN32)
*handle = _beginthreadex(NULL, 0, func, args, 0, NULL);
return *handle != 0 ? 0 : -1;
#else
return pthread_create(handle, NULL, func, args);
#endif
}
static int argon2_thread_join(argon2_thread_handle_t handle) {
#if defined(_WIN32)
if (WaitForSingleObject((HANDLE)handle, INFINITE) == WAIT_OBJECT_0) {
return CloseHandle((HANDLE)handle) != 0 ? 0 : -1;
}
return -1;
#else
return pthread_join(handle, NULL);
#endif
}
static void argon2_thread_exit(void) {
#if defined(_WIN32)
_endthreadex(0);
#else
pthread_exit(NULL);
#endif
}
#endif /* ARGON2_NO_THREADS */

View file

@ -0,0 +1,67 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
#ifndef ARGON2_THREAD_H
#define ARGON2_THREAD_H
#if !defined(ARGON2_NO_THREADS)
/*
Here we implement an abstraction layer for the simpĺe requirements
of the Argon2 code. We only require 3 primitives---thread creation,
joining, and termination---so full emulation of the pthreads API
is unwarranted. Currently we wrap pthreads and Win32 threads.
The API defines 2 types: the function pointer type,
argon2_thread_func_t,
and the type of the thread handle---argon2_thread_handle_t.
*/
#if defined(_WIN32)
#include <process.h>
typedef unsigned(__stdcall *argon2_thread_func_t)(void *);
typedef uintptr_t argon2_thread_handle_t;
#else
#include <pthread.h>
typedef void *(*argon2_thread_func_t)(void *);
typedef pthread_t argon2_thread_handle_t;
#endif
/* Creates a thread
* @param handle pointer to a thread handle, which is the output of this
* function. Must not be NULL.
* @param func A function pointer for the thread's entry point. Must not be
* NULL.
* @param args Pointer that is passed as an argument to @func. May be NULL.
* @return 0 if @handle and @func are valid pointers and a thread is successfuly
* created.
*/
static int argon2_thread_create(argon2_thread_handle_t *handle,
argon2_thread_func_t func, void *args);
/* Waits for a thread to terminate
* @param handle Handle to a thread created with argon2_thread_create.
* @return 0 if @handle is a valid handle, and joining completed successfully.
*/
static int argon2_thread_join(argon2_thread_handle_t handle);
/* Terminate the current thread. Must be run inside a thread created by
* argon2_thread_create.
*/
static void argon2_thread_exit(void);
#endif /* ARGON2_NO_THREADS */
#endif

View file

@ -0,0 +1,72 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2_CONFIG_H
#define BLAKE2_CONFIG_H
/* These don't work everywhere */
#if defined(__SSE2__) || defined(__x86_64__) || defined(__amd64__)
#define HAVE_SSE2
#endif
#if defined(__SSSE3__)
#define HAVE_SSSE3
#endif
#if defined(__SSE4_1__)
#define HAVE_SSE41
#endif
#if defined(__AVX__)
#define HAVE_AVX
#endif
#if defined(__XOP__)
#define HAVE_XOP
#endif
#ifdef HAVE_AVX2
#ifndef HAVE_AVX
#define HAVE_AVX
#endif
#endif
#ifdef HAVE_XOP
#ifndef HAVE_AVX
#define HAVE_AVX
#endif
#endif
#ifdef HAVE_AVX
#ifndef HAVE_SSE41
#define HAVE_SSE41
#endif
#endif
#ifdef HAVE_SSE41
#ifndef HAVE_SSSE3
#define HAVE_SSSE3
#endif
#endif
#ifdef HAVE_SSSE3
#define HAVE_SSE2
#endif
#if !defined(HAVE_SSE2)
#error "This code requires at least SSE2."
#endif
#endif

View file

@ -0,0 +1,160 @@
/*
BLAKE2 reference source code package - reference C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2_IMPL_H
#define BLAKE2_IMPL_H
#include <stdint.h>
#include <string.h>
#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L)
#if defined(_MSC_VER)
#define BLAKE2_INLINE __inline
#elif defined(__GNUC__)
#define BLAKE2_INLINE __inline__
#else
#define BLAKE2_INLINE
#endif
#else
#define BLAKE2_INLINE inline
#endif
static BLAKE2_INLINE uint32_t load32( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
uint32_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = ( const uint8_t * )src;
return (( uint32_t )( p[0] ) << 0) |
(( uint32_t )( p[1] ) << 8) |
(( uint32_t )( p[2] ) << 16) |
(( uint32_t )( p[3] ) << 24) ;
#endif
}
static BLAKE2_INLINE uint64_t load64( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
uint64_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = ( const uint8_t * )src;
return (( uint64_t )( p[0] ) << 0) |
(( uint64_t )( p[1] ) << 8) |
(( uint64_t )( p[2] ) << 16) |
(( uint64_t )( p[3] ) << 24) |
(( uint64_t )( p[4] ) << 32) |
(( uint64_t )( p[5] ) << 40) |
(( uint64_t )( p[6] ) << 48) |
(( uint64_t )( p[7] ) << 56) ;
#endif
}
static BLAKE2_INLINE uint16_t load16( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
uint16_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = ( const uint8_t * )src;
return ( uint16_t )((( uint32_t )( p[0] ) << 0) |
(( uint32_t )( p[1] ) << 8));
#endif
}
static BLAKE2_INLINE void store16( void *dst, uint16_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
#endif
}
static BLAKE2_INLINE void store32( void *dst, uint32_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = ( uint8_t * )dst;
p[0] = (uint8_t)(w >> 0);
p[1] = (uint8_t)(w >> 8);
p[2] = (uint8_t)(w >> 16);
p[3] = (uint8_t)(w >> 24);
#endif
}
static BLAKE2_INLINE void store64( void *dst, uint64_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = ( uint8_t * )dst;
p[0] = (uint8_t)(w >> 0);
p[1] = (uint8_t)(w >> 8);
p[2] = (uint8_t)(w >> 16);
p[3] = (uint8_t)(w >> 24);
p[4] = (uint8_t)(w >> 32);
p[5] = (uint8_t)(w >> 40);
p[6] = (uint8_t)(w >> 48);
p[7] = (uint8_t)(w >> 56);
#endif
}
static BLAKE2_INLINE uint64_t load48( const void *src )
{
const uint8_t *p = ( const uint8_t * )src;
return (( uint64_t )( p[0] ) << 0) |
(( uint64_t )( p[1] ) << 8) |
(( uint64_t )( p[2] ) << 16) |
(( uint64_t )( p[3] ) << 24) |
(( uint64_t )( p[4] ) << 32) |
(( uint64_t )( p[5] ) << 40) ;
}
static BLAKE2_INLINE void store48( void *dst, uint64_t w )
{
uint8_t *p = ( uint8_t * )dst;
p[0] = (uint8_t)(w >> 0);
p[1] = (uint8_t)(w >> 8);
p[2] = (uint8_t)(w >> 16);
p[3] = (uint8_t)(w >> 24);
p[4] = (uint8_t)(w >> 32);
p[5] = (uint8_t)(w >> 40);
}
static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c )
{
return ( w >> c ) | ( w << ( 32 - c ) );
}
static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c )
{
return ( w >> c ) | ( w << ( 64 - c ) );
}
/* prevents compiler optimizing out memset() */
static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n)
{
static void *(*const volatile memset_v)(void *, int, size_t) = &memset;
memset_v(v, 0, n);
}
#endif

View file

@ -0,0 +1,195 @@
/*
BLAKE2 reference source code package - reference C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2_H
#define BLAKE2_H
#include <stddef.h>
#include <stdint.h>
#if defined(_MSC_VER)
#define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop))
#else
#define BLAKE2_PACKED(x) x __attribute__((packed))
#endif
#if defined(__cplusplus)
extern "C" {
#endif
enum blake2s_constant
{
BLAKE2S_BLOCKBYTES = 64,
BLAKE2S_OUTBYTES = 32,
BLAKE2S_KEYBYTES = 32,
BLAKE2S_SALTBYTES = 8,
BLAKE2S_PERSONALBYTES = 8
};
enum blake2b_constant
{
BLAKE2B_BLOCKBYTES = 128,
BLAKE2B_OUTBYTES = 64,
BLAKE2B_KEYBYTES = 64,
BLAKE2B_SALTBYTES = 16,
BLAKE2B_PERSONALBYTES = 16
};
typedef struct blake2s_state__
{
uint32_t h[8];
uint32_t t[2];
uint32_t f[2];
uint8_t buf[BLAKE2S_BLOCKBYTES];
size_t buflen;
size_t outlen;
uint8_t last_node;
} blake2s_state;
typedef struct blake2b_state__
{
uint64_t h[8];
uint64_t t[2];
uint64_t f[2];
uint8_t buf[BLAKE2B_BLOCKBYTES];
size_t buflen;
size_t outlen;
uint8_t last_node;
} blake2b_state;
typedef struct blake2sp_state__
{
blake2s_state S[8][1];
blake2s_state R[1];
uint8_t buf[8 * BLAKE2S_BLOCKBYTES];
size_t buflen;
size_t outlen;
} blake2sp_state;
typedef struct blake2bp_state__
{
blake2b_state S[4][1];
blake2b_state R[1];
uint8_t buf[4 * BLAKE2B_BLOCKBYTES];
size_t buflen;
size_t outlen;
} blake2bp_state;
BLAKE2_PACKED(struct blake2s_param__
{
uint8_t digest_length; /* 1 */
uint8_t key_length; /* 2 */
uint8_t fanout; /* 3 */
uint8_t depth; /* 4 */
uint32_t leaf_length; /* 8 */
uint32_t node_offset; /* 12 */
uint16_t xof_length; /* 14 */
uint8_t node_depth; /* 15 */
uint8_t inner_length; /* 16 */
/* uint8_t reserved[0]; */
uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */
uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */
});
typedef struct blake2s_param__ blake2s_param;
BLAKE2_PACKED(struct blake2b_param__
{
uint8_t digest_length; /* 1 */
uint8_t key_length; /* 2 */
uint8_t fanout; /* 3 */
uint8_t depth; /* 4 */
uint32_t leaf_length; /* 8 */
uint32_t node_offset; /* 12 */
uint32_t xof_length; /* 16 */
uint8_t node_depth; /* 17 */
uint8_t inner_length; /* 18 */
uint8_t reserved[14]; /* 32 */
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
});
typedef struct blake2b_param__ blake2b_param;
typedef struct blake2xs_state__
{
blake2s_state S[1];
blake2s_param P[1];
} blake2xs_state;
typedef struct blake2xb_state__
{
blake2b_state S[1];
blake2b_param P[1];
} blake2xb_state;
/* Padded structs result in a compile-time error */
enum {
BLAKE2_DUMMY_1 = 1/(sizeof(blake2s_param) == BLAKE2S_OUTBYTES),
BLAKE2_DUMMY_2 = 1/(sizeof(blake2b_param) == BLAKE2B_OUTBYTES)
};
/* Streaming API */
int _cryptonite_blake2s_init( blake2s_state *S, size_t outlen );
int _cryptonite_blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
int _cryptonite_blake2s_init_param( blake2s_state *S, const blake2s_param *P );
int _cryptonite_blake2s_update( blake2s_state *S, const void *in, size_t inlen );
int _cryptonite_blake2s_final( blake2s_state *S, void *out, size_t outlen );
int _cryptonite_blake2b_init( blake2b_state *S, size_t outlen );
int _cryptonite_blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
int _cryptonite_blake2b_init_param( blake2b_state *S, const blake2b_param *P );
int _cryptonite_blake2b_update( blake2b_state *S, const void *in, size_t inlen );
int _cryptonite_blake2b_final( blake2b_state *S, void *out, size_t outlen );
int _cryptonite_blake2sp_init( blake2sp_state *S, size_t outlen );
int _cryptonite_blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen );
int _cryptonite_blake2sp_update( blake2sp_state *S, const void *in, size_t inlen );
int _cryptonite_blake2sp_final( blake2sp_state *S, void *out, size_t outlen );
int _cryptonite_blake2bp_init( blake2bp_state *S, size_t outlen );
int _cryptonite_blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen );
int _cryptonite_blake2bp_update( blake2bp_state *S, const void *in, size_t inlen );
int _cryptonite_blake2bp_final( blake2bp_state *S, void *out, size_t outlen );
/* Variable output length API */
int _cryptonite_blake2xs_init( blake2xs_state *S, const size_t outlen );
int _cryptonite_blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen );
int _cryptonite_blake2xs_update( blake2xs_state *S, const void *in, size_t inlen );
int _cryptonite_blake2xs_final(blake2xs_state *S, void *out, size_t outlen);
int _cryptonite_blake2xb_init( blake2xb_state *S, const size_t outlen );
int _cryptonite_blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen );
int _cryptonite_blake2xb_update( blake2xb_state *S, const void *in, size_t inlen );
int _cryptonite_blake2xb_final(blake2xb_state *S, void *out, size_t outlen);
/* Simple API */
int _cryptonite_blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
int _cryptonite_blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
int _cryptonite_blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
int _cryptonite_blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
int _cryptonite_blake2xs( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
int _cryptonite_blake2xb( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
/* This is simply an alias for blake2b */
int _cryptonite_blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
#if defined(__cplusplus)
}
#endif
#endif

View file

@ -0,0 +1,68 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2B_LOAD_SSE2_H
#define BLAKE2B_LOAD_SSE2_H
#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2)
#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7)
#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1)
#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13)
#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4)
#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0)
#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2)
#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4)
#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8)
#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0)
#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15)
#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14)
#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14)
#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13)
#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9)
#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2)
#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12)
#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8)
#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6)
#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11)
#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3)
#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4)
#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7)
#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6)
#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3)
#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
#endif

View file

@ -0,0 +1,402 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2B_LOAD_SSE41_H
#define BLAKE2B_LOAD_SSE41_H
#define LOAD_MSG_0_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_0_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_0_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_0_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_1_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while(0)
#define LOAD_MSG_1_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while(0)
#define LOAD_MSG_1_3(b0, b1) \
do \
{ \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while(0)
#define LOAD_MSG_1_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while(0)
#define LOAD_MSG_2_1(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m6, m5, 8); \
b1 = _mm_unpackhi_epi64(m2, m7); \
} while(0)
#define LOAD_MSG_2_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m0); \
b1 = _mm_blend_epi16(m1, m6, 0xF0); \
} while(0)
#define LOAD_MSG_2_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m5, m1, 0xF0); \
b1 = _mm_unpackhi_epi64(m3, m4); \
} while(0)
#define LOAD_MSG_2_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m3); \
b1 = _mm_alignr_epi8(m2, m0, 8); \
} while(0)
#define LOAD_MSG_3_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_unpackhi_epi64(m6, m5); \
} while(0)
#define LOAD_MSG_3_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m0); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_3_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m1, m2, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while(0)
#define LOAD_MSG_3_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m3, m5); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_4_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m2); \
b1 = _mm_unpacklo_epi64(m1, m5); \
} while(0)
#define LOAD_MSG_4_2(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m0, m3, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while(0)
#define LOAD_MSG_4_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m7, m5, 0xF0); \
b1 = _mm_blend_epi16(m3, m1, 0xF0); \
} while(0)
#define LOAD_MSG_4_4(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m6, m0, 8); \
b1 = _mm_blend_epi16(m4, m6, 0xF0); \
} while(0)
#define LOAD_MSG_5_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m1, m3); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_5_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m5); \
b1 = _mm_unpackhi_epi64(m5, m1); \
} while(0)
#define LOAD_MSG_5_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m2, m3, 0xF0); \
b1 = _mm_unpackhi_epi64(m7, m0); \
} while(0)
#define LOAD_MSG_5_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m6, m2); \
b1 = _mm_blend_epi16(m7, m4, 0xF0); \
} while(0)
#define LOAD_MSG_6_1(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m6, m0, 0xF0); \
b1 = _mm_unpacklo_epi64(m7, m2); \
} while(0)
#define LOAD_MSG_6_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_alignr_epi8(m5, m6, 8); \
} while(0)
#define LOAD_MSG_6_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m3); \
b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
} while(0)
#define LOAD_MSG_6_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_blend_epi16(m1, m5, 0xF0); \
} while(0)
#define LOAD_MSG_7_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m6, m3); \
b1 = _mm_blend_epi16(m6, m1, 0xF0); \
} while(0)
#define LOAD_MSG_7_2(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpackhi_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_7_3(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_unpacklo_epi64(m4, m1); \
} while(0)
#define LOAD_MSG_7_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m2); \
b1 = _mm_unpacklo_epi64(m3, m5); \
} while(0)
#define LOAD_MSG_8_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m3, m7); \
b1 = _mm_alignr_epi8(m0, m5, 8); \
} while(0)
#define LOAD_MSG_8_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_alignr_epi8(m4, m1, 8); \
} while(0)
#define LOAD_MSG_8_3(b0, b1) \
do \
{ \
b0 = m6; \
b1 = _mm_alignr_epi8(m5, m0, 8); \
} while(0)
#define LOAD_MSG_8_4(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m1, m3, 0xF0); \
b1 = m2; \
} while(0)
#define LOAD_MSG_9_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_unpackhi_epi64(m3, m0); \
} while(0)
#define LOAD_MSG_9_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m1, m2); \
b1 = _mm_blend_epi16(m3, m2, 0xF0); \
} while(0)
#define LOAD_MSG_9_3(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_unpackhi_epi64(m1, m6); \
} while(0)
#define LOAD_MSG_9_4(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpacklo_epi64(m6, m0); \
} while(0)
#define LOAD_MSG_10_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_10_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_10_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_10_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_11_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while(0)
#define LOAD_MSG_11_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while(0)
#define LOAD_MSG_11_3(b0, b1) \
do \
{ \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while(0)
#define LOAD_MSG_11_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while(0)
#endif

View file

@ -0,0 +1,157 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2B_ROUND_H
#define BLAKE2B_ROUND_H
#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) )
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
#define TOF(reg) _mm_castsi128_ps((reg))
#define TOI(reg) _mm_castps_si128((reg))
#define LIKELY(x) __builtin_expect((x),1)
/* Microarchitecture-specific macros */
#ifndef HAVE_XOP
#ifdef HAVE_SSSE3
#define _mm_roti_epi64(x, c) \
(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
: (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
: (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
: (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
#else
#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) ))
#endif
#else
/* ... */
#endif
#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -32); \
row4h = _mm_roti_epi64(row4h, -32); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -24); \
row2h = _mm_roti_epi64(row2h, -24); \
#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -16); \
row4h = _mm_roti_epi64(row4h, -16); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -63); \
row2h = _mm_roti_epi64(row2h, -63); \
#if defined(HAVE_SSSE3)
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
row4l = t1; \
row4h = t0;
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
row4l = t1; \
row4h = t0;
#else
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = row4l;\
t1 = row2l;\
row4l = row3l;\
row3l = row3h;\
row3h = row4l;\
row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \
row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \
row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \
row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1))
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = row3l;\
row3l = row3h;\
row3h = t0;\
t0 = row2l;\
t1 = row4l;\
row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \
row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \
row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \
row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1))
#endif
#if defined(HAVE_SSE41)
#include "blake2b-load-sse41.h"
#else
#include "blake2b-load-sse2.h"
#endif
#define ROUND(r) \
LOAD_MSG_ ##r ##_1(b0, b1); \
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
LOAD_MSG_ ##r ##_2(b0, b1); \
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
LOAD_MSG_ ##r ##_3(b0, b1); \
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
LOAD_MSG_ ##r ##_4(b0, b1); \
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
#endif

View file

@ -0,0 +1,373 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
#include "blake2-config.h"
#ifdef _MSC_VER
#include <intrin.h> /* for _mm_set_epi64x */
#endif
#include <emmintrin.h>
#if defined(HAVE_SSSE3)
#include <tmmintrin.h>
#endif
#if defined(HAVE_SSE41)
#include <smmintrin.h>
#endif
#if defined(HAVE_AVX)
#include <immintrin.h>
#endif
#if defined(HAVE_XOP)
#include <x86intrin.h>
#endif
#include "blake2b-round.h"
static const uint64_t blake2b_IV[8] =
{
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
/* Some helper functions */
static void blake2b_set_lastnode( blake2b_state *S )
{
S->f[1] = (uint64_t)-1;
}
static int blake2b_is_lastblock( const blake2b_state *S )
{
return S->f[0] != 0;
}
static void blake2b_set_lastblock( blake2b_state *S )
{
if( S->last_node ) blake2b_set_lastnode( S );
S->f[0] = (uint64_t)-1;
}
static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
{
S->t[0] += inc;
S->t[1] += ( S->t[0] < inc );
}
/* init xors IV with input parameter block */
int _cryptonite_blake2b_init_param( blake2b_state *S, const blake2b_param *P )
{
size_t i;
/*blake2b_init0( S ); */
const unsigned char * v = ( const unsigned char * )( blake2b_IV );
const unsigned char * p = ( const unsigned char * )( P );
unsigned char * h = ( unsigned char * )( S->h );
/* IV XOR ParamBlock */
memset( S, 0, sizeof( blake2b_state ) );
for( i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
S->outlen = P->digest_length;
return 0;
}
/* Some sort of default parameter block initialization, for sequential blake2b */
int _cryptonite_blake2b_init( blake2b_state *S, size_t outlen )
{
blake2b_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
P->digest_length = (uint8_t)outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, 0 );
store32( &P->xof_length, 0 );
P->node_depth = 0;
P->inner_length = 0;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return _cryptonite_blake2b_init_param( S, P );
}
int _cryptonite_blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
{
blake2b_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, 0 );
store32( &P->xof_length, 0 );
P->node_depth = 0;
P->inner_length = 0;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
if( _cryptonite_blake2b_init_param( S, P ) < 0 )
return 0;
{
uint8_t block[BLAKE2B_BLOCKBYTES];
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
_cryptonite_blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
}
return 0;
}
static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
{
__m128i row1l, row1h;
__m128i row2l, row2h;
__m128i row3l, row3h;
__m128i row4l, row4h;
__m128i b0, b1;
__m128i t0, t1;
#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
#endif
#if defined(HAVE_SSE41)
const __m128i m0 = LOADU( block + 00 );
const __m128i m1 = LOADU( block + 16 );
const __m128i m2 = LOADU( block + 32 );
const __m128i m3 = LOADU( block + 48 );
const __m128i m4 = LOADU( block + 64 );
const __m128i m5 = LOADU( block + 80 );
const __m128i m6 = LOADU( block + 96 );
const __m128i m7 = LOADU( block + 112 );
#else
const uint64_t m0 = load64(block + 0 * sizeof(uint64_t));
const uint64_t m1 = load64(block + 1 * sizeof(uint64_t));
const uint64_t m2 = load64(block + 2 * sizeof(uint64_t));
const uint64_t m3 = load64(block + 3 * sizeof(uint64_t));
const uint64_t m4 = load64(block + 4 * sizeof(uint64_t));
const uint64_t m5 = load64(block + 5 * sizeof(uint64_t));
const uint64_t m6 = load64(block + 6 * sizeof(uint64_t));
const uint64_t m7 = load64(block + 7 * sizeof(uint64_t));
const uint64_t m8 = load64(block + 8 * sizeof(uint64_t));
const uint64_t m9 = load64(block + 9 * sizeof(uint64_t));
const uint64_t m10 = load64(block + 10 * sizeof(uint64_t));
const uint64_t m11 = load64(block + 11 * sizeof(uint64_t));
const uint64_t m12 = load64(block + 12 * sizeof(uint64_t));
const uint64_t m13 = load64(block + 13 * sizeof(uint64_t));
const uint64_t m14 = load64(block + 14 * sizeof(uint64_t));
const uint64_t m15 = load64(block + 15 * sizeof(uint64_t));
#endif
row1l = LOADU( &S->h[0] );
row1h = LOADU( &S->h[2] );
row2l = LOADU( &S->h[4] );
row2h = LOADU( &S->h[6] );
row3l = LOADU( &blake2b_IV[0] );
row3h = LOADU( &blake2b_IV[2] );
row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
ROUND( 3 );
ROUND( 4 );
ROUND( 5 );
ROUND( 6 );
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
ROUND( 10 );
ROUND( 11 );
row1l = _mm_xor_si128( row3l, row1l );
row1h = _mm_xor_si128( row3h, row1h );
STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
row2l = _mm_xor_si128( row4l, row2l );
row2h = _mm_xor_si128( row4h, row2h );
STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
}
int _cryptonite_blake2b_update( blake2b_state *S, const void *pin, size_t inlen )
{
const unsigned char * in = (const unsigned char *)pin;
if( inlen > 0 )
{
size_t left = S->buflen;
size_t fill = BLAKE2B_BLOCKBYTES - left;
if( inlen > fill )
{
S->buflen = 0;
memcpy( S->buf + left, in, fill ); /* Fill buffer */
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
blake2b_compress( S, S->buf ); /* Compress */
in += fill; inlen -= fill;
while(inlen > BLAKE2B_BLOCKBYTES) {
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress( S, in );
in += BLAKE2B_BLOCKBYTES;
inlen -= BLAKE2B_BLOCKBYTES;
}
}
memcpy( S->buf + S->buflen, in, inlen );
S->buflen += inlen;
}
return 0;
}
int _cryptonite_blake2b_final( blake2b_state *S, void *out, size_t outlen )
{
if( out == NULL || outlen < S->outlen )
return -1;
if( blake2b_is_lastblock( S ) )
return -1;
blake2b_increment_counter( S, S->buflen );
blake2b_set_lastblock( S );
memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
blake2b_compress( S, S->buf );
memcpy( out, &S->h[0], S->outlen );
return 0;
}
int _cryptonite_blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
{
blake2b_state S[1];
/* Verify parameters */
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if( NULL == key && keylen > 0 ) return -1;
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
if( keylen > BLAKE2B_KEYBYTES ) return -1;
if( keylen )
{
if( _cryptonite_blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
}
else
{
if( _cryptonite_blake2b_init( S, outlen ) < 0 ) return -1;
}
_cryptonite_blake2b_update( S, ( const uint8_t * )in, inlen );
_cryptonite_blake2b_final( S, out, outlen );
return 0;
}
int _cryptonite_blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) {
return _cryptonite_blake2b(out, outlen, in, inlen, key, keylen);
}
#if defined(SUPERCOP)
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
{
return _cryptonite_blake2b( out, BLAKE2B_OUTBYTES, in, inlen, NULL, 0 );
}
#endif
#if defined(BLAKE2B_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( void )
{
uint8_t key[BLAKE2B_KEYBYTES];
uint8_t buf[BLAKE2_KAT_LENGTH];
size_t i, step;
for( i = 0; i < BLAKE2B_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
/* Test simple API */
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
{
uint8_t hash[BLAKE2B_OUTBYTES];
blake2b( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES );
if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) )
{
goto fail;
}
}
/* Test streaming API */
for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) {
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
uint8_t hash[BLAKE2B_OUTBYTES];
blake2b_state S;
uint8_t * p = buf;
size_t mlen = i;
int err = 0;
if( (err = _cryptonite_blake2b_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) {
goto fail;
}
while (mlen >= step) {
if ( (err = _cryptonite_blake2b_update(&S, p, step)) < 0 ) {
goto fail;
}
mlen -= step;
p += step;
}
if ( (err = _cryptonite_blake2b_update(&S, p, mlen)) < 0) {
goto fail;
}
if ( (err = _cryptonite_blake2b_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) {
goto fail;
}
if (0 != memcmp(hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES)) {
goto fail;
}
}
}
puts( "ok" );
return 0;
fail:
puts("error");
return -1;
}
#endif

View file

@ -0,0 +1,361 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#if defined(_OPENMP)
#include <omp.h>
#endif
#include "blake2.h"
#include "blake2-impl.h"
#define PARALLELISM_DEGREE 4
/*
blake2b_init_param defaults to setting the expecting output length
from the digest_length parameter block field.
In some cases, however, we do not want this, as the output length
of these instances is given by inner_length instead.
*/
static int blake2bp_init_leaf_param( blake2b_state *S, const blake2b_param *P )
{
int err = _cryptonite_blake2b_init_param(S, P);
S->outlen = P->inner_length;
return err;
}
static int blake2bp_init_leaf( blake2b_state *S, size_t outlen, size_t keylen, uint64_t offset )
{
blake2b_param P[1];
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = PARALLELISM_DEGREE;
P->depth = 2;
P->leaf_length = 0;
P->node_offset = offset;
P->xof_length = 0;
P->node_depth = 0;
P->inner_length = BLAKE2B_OUTBYTES;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2bp_init_leaf_param( S, P );
}
static int blake2bp_init_root( blake2b_state *S, size_t outlen, size_t keylen )
{
blake2b_param P[1];
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = PARALLELISM_DEGREE;
P->depth = 2;
P->leaf_length = 0;
P->node_offset = 0;
P->xof_length = 0;
P->node_depth = 1;
P->inner_length = BLAKE2B_OUTBYTES;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return _cryptonite_blake2b_init_param( S, P );
}
int _cryptonite_blake2bp_init( blake2bp_state *S, size_t outlen )
{
size_t i;
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
S->outlen = outlen;
if( blake2bp_init_root( S->R, outlen, 0 ) < 0 )
return -1;
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
return 0;
}
int _cryptonite_blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen )
{
size_t i;
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
if( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
S->outlen = outlen;
if( blake2bp_init_root( S->R, outlen, keylen ) < 0 )
return -1;
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
{
uint8_t block[BLAKE2B_BLOCKBYTES];
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
for( i = 0; i < PARALLELISM_DEGREE; ++i )
_cryptonite_blake2b_update( S->S[i], block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
}
return 0;
}
int _cryptonite_blake2bp_update( blake2bp_state *S, const void *pin, size_t inlen )
{
const unsigned char * in = (const unsigned char *)pin;
size_t left = S->buflen;
size_t fill = sizeof( S->buf ) - left;
size_t i;
if( left && inlen >= fill )
{
memcpy( S->buf + left, in, fill );
for( i = 0; i < PARALLELISM_DEGREE; ++i )
_cryptonite_blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES );
in += fill;
inlen -= fill;
left = 0;
}
#if defined(_OPENMP)
#pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE)
#else
for( i = 0; i < PARALLELISM_DEGREE; ++i )
#endif
{
#if defined(_OPENMP)
size_t i = omp_get_thread_num();
#endif
size_t inlen__ = inlen;
const unsigned char *in__ = ( const unsigned char * )in;
in__ += i * BLAKE2B_BLOCKBYTES;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES )
{
_cryptonite_blake2b_update( S->S[i], in__, BLAKE2B_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
}
}
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES );
inlen %= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
if( inlen > 0 )
memcpy( S->buf + left, in, inlen );
S->buflen = left + inlen;
return 0;
}
int _cryptonite_blake2bp_final( blake2bp_state *S, void *out, size_t outlen )
{
uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES];
size_t i;
if(out == NULL || outlen < S->outlen) {
return -1;
}
for( i = 0; i < PARALLELISM_DEGREE; ++i )
{
if( S->buflen > i * BLAKE2B_BLOCKBYTES )
{
size_t left = S->buflen - i * BLAKE2B_BLOCKBYTES;
if( left > BLAKE2B_BLOCKBYTES ) left = BLAKE2B_BLOCKBYTES;
_cryptonite_blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, left );
}
_cryptonite_blake2b_final( S->S[i], hash[i], BLAKE2B_OUTBYTES );
}
for( i = 0; i < PARALLELISM_DEGREE; ++i )
_cryptonite_blake2b_update( S->R, hash[i], BLAKE2B_OUTBYTES );
return _cryptonite_blake2b_final( S->R, out, S->outlen );
}
int _cryptonite_blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
{
uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES];
blake2b_state S[PARALLELISM_DEGREE][1];
blake2b_state FS[1];
size_t i;
/* Verify parameters */
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if( NULL == key && keylen > 0 ) return -1;
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
if( keylen > BLAKE2B_KEYBYTES ) return -1;
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1;
S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */
if( keylen > 0 )
{
uint8_t block[BLAKE2B_BLOCKBYTES];
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
for( i = 0; i < PARALLELISM_DEGREE; ++i )
_cryptonite_blake2b_update( S[i], block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
}
#if defined(_OPENMP)
#pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE)
#else
for( i = 0; i < PARALLELISM_DEGREE; ++i )
#endif
{
#if defined(_OPENMP)
size_t i = omp_get_thread_num();
#endif
size_t inlen__ = inlen;
const unsigned char *in__ = ( const unsigned char * )in;
in__ += i * BLAKE2B_BLOCKBYTES;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES )
{
_cryptonite_blake2b_update( S[i], in__, BLAKE2B_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
}
if( inlen__ > i * BLAKE2B_BLOCKBYTES )
{
const size_t left = inlen__ - i * BLAKE2B_BLOCKBYTES;
const size_t len = left <= BLAKE2B_BLOCKBYTES ? left : BLAKE2B_BLOCKBYTES;
_cryptonite_blake2b_update( S[i], in__, len );
}
_cryptonite_blake2b_final( S[i], hash[i], BLAKE2B_OUTBYTES );
}
if( blake2bp_init_root( FS, outlen, keylen ) < 0 )
return -1;
FS->last_node = 1; /* Mark as last node */
for( i = 0; i < PARALLELISM_DEGREE; ++i )
_cryptonite_blake2b_update( FS, hash[i], BLAKE2B_OUTBYTES );
return _cryptonite_blake2b_final( FS, out, outlen );
}
#if defined(BLAKE2BP_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( void )
{
uint8_t key[BLAKE2B_KEYBYTES];
uint8_t buf[BLAKE2_KAT_LENGTH];
size_t i, step;
for( i = 0; i < BLAKE2B_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
/* Test simple API */
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
{
uint8_t hash[BLAKE2B_OUTBYTES];
_cryptonite_blake2bp( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES );
if( 0 != memcmp( hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES ) )
{
goto fail;
}
}
/* Test streaming API */
for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) {
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
uint8_t hash[BLAKE2B_OUTBYTES];
blake2bp_state S;
uint8_t * p = buf;
size_t mlen = i;
int err = 0;
if( (err = _cryptonite_blake2bp_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) {
goto fail;
}
while (mlen >= step) {
if ( (err = _cryptonite_blake2bp_update(&S, p, step)) < 0 ) {
goto fail;
}
mlen -= step;
p += step;
}
if ( (err = _cryptonite_blake2bp_update(&S, p, mlen)) < 0) {
goto fail;
}
if ( (err = _cryptonite_blake2bp_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) {
goto fail;
}
if (0 != memcmp(hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES)) {
goto fail;
}
}
}
puts( "ok" );
return 0;
fail:
puts("error");
return -1;
}
#endif

View file

@ -0,0 +1,60 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2S_LOAD_SSE2_H
#define BLAKE2S_LOAD_SSE2_H
#define LOAD_MSG_0_1(buf) buf = _mm_set_epi32(m6,m4,m2,m0)
#define LOAD_MSG_0_2(buf) buf = _mm_set_epi32(m7,m5,m3,m1)
#define LOAD_MSG_0_3(buf) buf = _mm_set_epi32(m14,m12,m10,m8)
#define LOAD_MSG_0_4(buf) buf = _mm_set_epi32(m15,m13,m11,m9)
#define LOAD_MSG_1_1(buf) buf = _mm_set_epi32(m13,m9,m4,m14)
#define LOAD_MSG_1_2(buf) buf = _mm_set_epi32(m6,m15,m8,m10)
#define LOAD_MSG_1_3(buf) buf = _mm_set_epi32(m5,m11,m0,m1)
#define LOAD_MSG_1_4(buf) buf = _mm_set_epi32(m3,m7,m2,m12)
#define LOAD_MSG_2_1(buf) buf = _mm_set_epi32(m15,m5,m12,m11)
#define LOAD_MSG_2_2(buf) buf = _mm_set_epi32(m13,m2,m0,m8)
#define LOAD_MSG_2_3(buf) buf = _mm_set_epi32(m9,m7,m3,m10)
#define LOAD_MSG_2_4(buf) buf = _mm_set_epi32(m4,m1,m6,m14)
#define LOAD_MSG_3_1(buf) buf = _mm_set_epi32(m11,m13,m3,m7)
#define LOAD_MSG_3_2(buf) buf = _mm_set_epi32(m14,m12,m1,m9)
#define LOAD_MSG_3_3(buf) buf = _mm_set_epi32(m15,m4,m5,m2)
#define LOAD_MSG_3_4(buf) buf = _mm_set_epi32(m8,m0,m10,m6)
#define LOAD_MSG_4_1(buf) buf = _mm_set_epi32(m10,m2,m5,m9)
#define LOAD_MSG_4_2(buf) buf = _mm_set_epi32(m15,m4,m7,m0)
#define LOAD_MSG_4_3(buf) buf = _mm_set_epi32(m3,m6,m11,m14)
#define LOAD_MSG_4_4(buf) buf = _mm_set_epi32(m13,m8,m12,m1)
#define LOAD_MSG_5_1(buf) buf = _mm_set_epi32(m8,m0,m6,m2)
#define LOAD_MSG_5_2(buf) buf = _mm_set_epi32(m3,m11,m10,m12)
#define LOAD_MSG_5_3(buf) buf = _mm_set_epi32(m1,m15,m7,m4)
#define LOAD_MSG_5_4(buf) buf = _mm_set_epi32(m9,m14,m5,m13)
#define LOAD_MSG_6_1(buf) buf = _mm_set_epi32(m4,m14,m1,m12)
#define LOAD_MSG_6_2(buf) buf = _mm_set_epi32(m10,m13,m15,m5)
#define LOAD_MSG_6_3(buf) buf = _mm_set_epi32(m8,m9,m6,m0)
#define LOAD_MSG_6_4(buf) buf = _mm_set_epi32(m11,m2,m3,m7)
#define LOAD_MSG_7_1(buf) buf = _mm_set_epi32(m3,m12,m7,m13)
#define LOAD_MSG_7_2(buf) buf = _mm_set_epi32(m9,m1,m14,m11)
#define LOAD_MSG_7_3(buf) buf = _mm_set_epi32(m2,m8,m15,m5)
#define LOAD_MSG_7_4(buf) buf = _mm_set_epi32(m10,m6,m4,m0)
#define LOAD_MSG_8_1(buf) buf = _mm_set_epi32(m0,m11,m14,m6)
#define LOAD_MSG_8_2(buf) buf = _mm_set_epi32(m8,m3,m9,m15)
#define LOAD_MSG_8_3(buf) buf = _mm_set_epi32(m10,m1,m13,m12)
#define LOAD_MSG_8_4(buf) buf = _mm_set_epi32(m5,m4,m7,m2)
#define LOAD_MSG_9_1(buf) buf = _mm_set_epi32(m1,m7,m8,m10)
#define LOAD_MSG_9_2(buf) buf = _mm_set_epi32(m5,m6,m4,m2)
#define LOAD_MSG_9_3(buf) buf = _mm_set_epi32(m13,m3,m9,m15)
#define LOAD_MSG_9_4(buf) buf = _mm_set_epi32(m0,m12,m14,m11)
#endif

View file

@ -0,0 +1,229 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2S_LOAD_SSE41_H
#define BLAKE2S_LOAD_SSE41_H
#define LOAD_MSG_0_1(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0)));
#define LOAD_MSG_0_2(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(3,1,3,1)));
#define LOAD_MSG_0_3(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(2,0,2,0)));
#define LOAD_MSG_0_4(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(3,1,3,1)));
#define LOAD_MSG_1_1(buf) \
t0 = _mm_blend_epi16(m1, m2, 0x0C); \
t1 = _mm_slli_si128(m3, 4); \
t2 = _mm_blend_epi16(t0, t1, 0xF0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3));
#define LOAD_MSG_1_2(buf) \
t0 = _mm_shuffle_epi32(m2,_MM_SHUFFLE(0,0,2,0)); \
t1 = _mm_blend_epi16(m1,m3,0xC0); \
t2 = _mm_blend_epi16(t0, t1, 0xF0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
#define LOAD_MSG_1_3(buf) \
t0 = _mm_slli_si128(m1, 4); \
t1 = _mm_blend_epi16(m2, t0, 0x30); \
t2 = _mm_blend_epi16(m0, t1, 0xF0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
#define LOAD_MSG_1_4(buf) \
t0 = _mm_unpackhi_epi32(m0,m1); \
t1 = _mm_slli_si128(m3, 4); \
t2 = _mm_blend_epi16(t0, t1, 0x0C); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
#define LOAD_MSG_2_1(buf) \
t0 = _mm_unpackhi_epi32(m2,m3); \
t1 = _mm_blend_epi16(m3,m1,0x0C); \
t2 = _mm_blend_epi16(t0, t1, 0x0F); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2));
#define LOAD_MSG_2_2(buf) \
t0 = _mm_unpacklo_epi32(m2,m0); \
t1 = _mm_blend_epi16(t0, m0, 0xF0); \
t2 = _mm_slli_si128(m3, 8); \
buf = _mm_blend_epi16(t1, t2, 0xC0);
#define LOAD_MSG_2_3(buf) \
t0 = _mm_blend_epi16(m0, m2, 0x3C); \
t1 = _mm_srli_si128(m1, 12); \
t2 = _mm_blend_epi16(t0,t1,0x03); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,3,2));
#define LOAD_MSG_2_4(buf) \
t0 = _mm_slli_si128(m3, 4); \
t1 = _mm_blend_epi16(m0, m1, 0x33); \
t2 = _mm_blend_epi16(t1, t0, 0xC0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(0,1,2,3));
#define LOAD_MSG_3_1(buf) \
t0 = _mm_unpackhi_epi32(m0,m1); \
t1 = _mm_unpackhi_epi32(t0, m2); \
t2 = _mm_blend_epi16(t1, m3, 0x0C); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2));
#define LOAD_MSG_3_2(buf) \
t0 = _mm_slli_si128(m2, 8); \
t1 = _mm_blend_epi16(m3,m0,0x0C); \
t2 = _mm_blend_epi16(t1, t0, 0xC0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3));
#define LOAD_MSG_3_3(buf) \
t0 = _mm_blend_epi16(m0,m1,0x0F); \
t1 = _mm_blend_epi16(t0, m3, 0xC0); \
buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2));
#define LOAD_MSG_3_4(buf) \
t0 = _mm_unpacklo_epi32(m0,m2); \
t1 = _mm_unpackhi_epi32(m1,m2); \
buf = _mm_unpacklo_epi64(t1,t0);
#define LOAD_MSG_4_1(buf) \
t0 = _mm_unpacklo_epi64(m1,m2); \
t1 = _mm_unpackhi_epi64(m0,m2); \
t2 = _mm_blend_epi16(t0,t1,0x33); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3));
#define LOAD_MSG_4_2(buf) \
t0 = _mm_unpackhi_epi64(m1,m3); \
t1 = _mm_unpacklo_epi64(m0,m1); \
buf = _mm_blend_epi16(t0,t1,0x33);
#define LOAD_MSG_4_3(buf) \
t0 = _mm_unpackhi_epi64(m3,m1); \
t1 = _mm_unpackhi_epi64(m2,m0); \
buf = _mm_blend_epi16(t1,t0,0x33);
#define LOAD_MSG_4_4(buf) \
t0 = _mm_blend_epi16(m0,m2,0x03); \
t1 = _mm_slli_si128(t0, 8); \
t2 = _mm_blend_epi16(t1,m3,0x0F); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,0,3));
#define LOAD_MSG_5_1(buf) \
t0 = _mm_unpackhi_epi32(m0,m1); \
t1 = _mm_unpacklo_epi32(m0,m2); \
buf = _mm_unpacklo_epi64(t0,t1);
#define LOAD_MSG_5_2(buf) \
t0 = _mm_srli_si128(m2, 4); \
t1 = _mm_blend_epi16(m0,m3,0x03); \
buf = _mm_blend_epi16(t1,t0,0x3C);
#define LOAD_MSG_5_3(buf) \
t0 = _mm_blend_epi16(m1,m0,0x0C); \
t1 = _mm_srli_si128(m3, 4); \
t2 = _mm_blend_epi16(t0,t1,0x30); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,3,0));
#define LOAD_MSG_5_4(buf) \
t0 = _mm_unpacklo_epi64(m1,m2); \
t1= _mm_shuffle_epi32(m3, _MM_SHUFFLE(0,2,0,1)); \
buf = _mm_blend_epi16(t0,t1,0x33);
#define LOAD_MSG_6_1(buf) \
t0 = _mm_slli_si128(m1, 12); \
t1 = _mm_blend_epi16(m0,m3,0x33); \
buf = _mm_blend_epi16(t1,t0,0xC0);
#define LOAD_MSG_6_2(buf) \
t0 = _mm_blend_epi16(m3,m2,0x30); \
t1 = _mm_srli_si128(m1, 4); \
t2 = _mm_blend_epi16(t0,t1,0x03); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,3,0));
#define LOAD_MSG_6_3(buf) \
t0 = _mm_unpacklo_epi64(m0,m2); \
t1 = _mm_srli_si128(m1, 4); \
buf = _mm_shuffle_epi32(_mm_blend_epi16(t0,t1,0x0C), _MM_SHUFFLE(2,3,1,0));
#define LOAD_MSG_6_4(buf) \
t0 = _mm_unpackhi_epi32(m1,m2); \
t1 = _mm_unpackhi_epi64(m0,t0); \
buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2));
#define LOAD_MSG_7_1(buf) \
t0 = _mm_unpackhi_epi32(m0,m1); \
t1 = _mm_blend_epi16(t0,m3,0x0F); \
buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(2,0,3,1));
#define LOAD_MSG_7_2(buf) \
t0 = _mm_blend_epi16(m2,m3,0x30); \
t1 = _mm_srli_si128(m0,4); \
t2 = _mm_blend_epi16(t0,t1,0x03); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,2,3));
#define LOAD_MSG_7_3(buf) \
t0 = _mm_unpackhi_epi64(m0,m3); \
t1 = _mm_unpacklo_epi64(m1,m2); \
t2 = _mm_blend_epi16(t0,t1,0x3C); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,2,3,1));
#define LOAD_MSG_7_4(buf) \
t0 = _mm_unpacklo_epi32(m0,m1); \
t1 = _mm_unpackhi_epi32(m1,m2); \
buf = _mm_unpacklo_epi64(t0,t1);
#define LOAD_MSG_8_1(buf) \
t0 = _mm_unpackhi_epi32(m1,m3); \
t1 = _mm_unpacklo_epi64(t0,m0); \
t2 = _mm_blend_epi16(t1,m2,0xC0); \
buf = _mm_shufflehi_epi16(t2,_MM_SHUFFLE(1,0,3,2));
#define LOAD_MSG_8_2(buf) \
t0 = _mm_unpackhi_epi32(m0,m3); \
t1 = _mm_blend_epi16(m2,t0,0xF0); \
buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(0,2,1,3));
#define LOAD_MSG_8_3(buf) \
t0 = _mm_blend_epi16(m2,m0,0x0C); \
t1 = _mm_slli_si128(t0,4); \
buf = _mm_blend_epi16(t1,m3,0x0F);
#define LOAD_MSG_8_4(buf) \
t0 = _mm_blend_epi16(m1,m0,0x30); \
buf = _mm_shuffle_epi32(t0,_MM_SHUFFLE(1,0,3,2));
#define LOAD_MSG_9_1(buf) \
t0 = _mm_blend_epi16(m0,m2,0x03); \
t1 = _mm_blend_epi16(m1,m2,0x30); \
t2 = _mm_blend_epi16(t1,t0,0x0F); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(1,3,0,2));
#define LOAD_MSG_9_2(buf) \
t0 = _mm_slli_si128(m0,4); \
t1 = _mm_blend_epi16(m1,t0,0xC0); \
buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(1,2,0,3));
#define LOAD_MSG_9_3(buf) \
t0 = _mm_unpackhi_epi32(m0,m3); \
t1 = _mm_unpacklo_epi32(m2,m3); \
t2 = _mm_unpackhi_epi64(t0,t1); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(3,0,2,1));
#define LOAD_MSG_9_4(buf) \
t0 = _mm_blend_epi16(m3,m2,0xC0); \
t1 = _mm_unpacklo_epi32(m0,m3); \
t2 = _mm_blend_epi16(t0,t1,0x0F); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,1,2,3));
#endif

View file

@ -0,0 +1,191 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2S_LOAD_XOP_H
#define BLAKE2S_LOAD_XOP_H
#define TOB(x) ((x)*4*0x01010101 + 0x03020100) /* ..or not TOB */
#if 0
/* Basic VPPERM emulation, for testing purposes */
static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel)
{
const __m128i sixteen = _mm_set1_epi8(16);
const __m128i t0 = _mm_shuffle_epi8(src1, sel);
const __m128i s1 = _mm_shuffle_epi8(src2, _mm_sub_epi8(sel, sixteen));
const __m128i mask = _mm_or_si128(_mm_cmpeq_epi8(sel, sixteen),
_mm_cmpgt_epi8(sel, sixteen)); /* (>=16) = 0xff : 00 */
return _mm_blendv_epi8(t0, s1, mask);
}
#endif
#define LOAD_MSG_0_1(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) );
#define LOAD_MSG_0_2(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) );
#define LOAD_MSG_0_3(buf) \
buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) );
#define LOAD_MSG_0_4(buf) \
buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) );
#define LOAD_MSG_1_1(buf) \
t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(5),TOB(0),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) );
#define LOAD_MSG_1_2(buf) \
t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(2),TOB(0),TOB(4),TOB(6)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) );
#define LOAD_MSG_1_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(0),TOB(0),TOB(1)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) );
#define LOAD_MSG_1_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(7),TOB(2),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) );
#define LOAD_MSG_2_1(buf) \
t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(1),TOB(0),TOB(7)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(4),TOB(0)) );
#define LOAD_MSG_2_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(2),TOB(0),TOB(4)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_2_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(7),TOB(3),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) );
#define LOAD_MSG_2_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(1),TOB(6),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) );
#define LOAD_MSG_3_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(3),TOB(7)) ); \
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(1),TOB(0)) );
#define LOAD_MSG_3_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(1),TOB(5)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(1),TOB(0)) );
#define LOAD_MSG_3_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(5),TOB(2)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_3_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \
buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(6),TOB(0)) );
#define LOAD_MSG_4_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(5),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(5)) );
#define LOAD_MSG_4_2(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(7),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_4_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(6),TOB(0),TOB(0)) ); \
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) );
#define LOAD_MSG_4_4(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(4),TOB(0),TOB(1)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(4),TOB(0)) );
#define LOAD_MSG_5_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(2)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_5_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(6),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) );
#define LOAD_MSG_5_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(0),TOB(7),TOB(4)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) );
#define LOAD_MSG_5_4(buf) \
t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(5),TOB(0),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(5)) );
#define LOAD_MSG_6_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(0),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(4)) );
#define LOAD_MSG_6_2(buf) \
t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(6),TOB(0),TOB(0),TOB(1)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(7),TOB(0)) );
#define LOAD_MSG_6_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(5),TOB(1),TOB(0)) );
#define LOAD_MSG_6_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(3),TOB(7)) ); \
buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_7_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(0),TOB(7),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(5)) );
#define LOAD_MSG_7_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(5),TOB(1),TOB(0),TOB(7)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) );
#define LOAD_MSG_7_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(2),TOB(0),TOB(0),TOB(5)) ); \
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) );
#define LOAD_MSG_7_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(6),TOB(4),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_8_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) );
#define LOAD_MSG_8_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(4),TOB(3),TOB(5),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(7)) );
#define LOAD_MSG_8_3(buf) \
t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(6),TOB(1),TOB(0),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(5),TOB(4)) ); \
#define LOAD_MSG_8_4(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(4),TOB(7),TOB(2)) );
#define LOAD_MSG_9_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(7),TOB(0),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(4),TOB(6)) );
#define LOAD_MSG_9_2(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(6),TOB(4),TOB(2)) );
#define LOAD_MSG_9_3(buf) \
t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(3),TOB(5),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(7)) );
#define LOAD_MSG_9_4(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(7)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(6),TOB(0)) );
#endif

View file

@ -0,0 +1,88 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2S_ROUND_H
#define BLAKE2S_ROUND_H
#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) )
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
#define TOF(reg) _mm_castsi128_ps((reg))
#define TOI(reg) _mm_castps_si128((reg))
#define LIKELY(x) __builtin_expect((x),1)
/* Microarchitecture-specific macros */
#ifndef HAVE_XOP
#ifdef HAVE_SSSE3
#define _mm_roti_epi32(r, c) ( \
(8==-(c)) ? _mm_shuffle_epi8(r,r8) \
: (16==-(c)) ? _mm_shuffle_epi8(r,r16) \
: _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) )
#else
#define _mm_roti_epi32(r, c) _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) ))
#endif
#else
/* ... */
#endif
#define G1(row1,row2,row3,row4,buf) \
row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \
row4 = _mm_xor_si128( row4, row1 ); \
row4 = _mm_roti_epi32(row4, -16); \
row3 = _mm_add_epi32( row3, row4 ); \
row2 = _mm_xor_si128( row2, row3 ); \
row2 = _mm_roti_epi32(row2, -12);
#define G2(row1,row2,row3,row4,buf) \
row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \
row4 = _mm_xor_si128( row4, row1 ); \
row4 = _mm_roti_epi32(row4, -8); \
row3 = _mm_add_epi32( row3, row4 ); \
row2 = _mm_xor_si128( row2, row3 ); \
row2 = _mm_roti_epi32(row2, -7);
#define DIAGONALIZE(row1,row2,row3,row4) \
row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(2,1,0,3) ); \
row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \
row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(0,3,2,1) );
#define UNDIAGONALIZE(row1,row2,row3,row4) \
row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(0,3,2,1) ); \
row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \
row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(2,1,0,3) );
#if defined(HAVE_XOP)
#include "blake2s-load-xop.h"
#elif defined(HAVE_SSE41)
#include "blake2s-load-sse41.h"
#else
#include "blake2s-load-sse2.h"
#endif
#define ROUND(r) \
LOAD_MSG_ ##r ##_1(buf1); \
G1(row1,row2,row3,row4,buf1); \
LOAD_MSG_ ##r ##_2(buf2); \
G2(row1,row2,row3,row4,buf2); \
DIAGONALIZE(row1,row2,row3,row4); \
LOAD_MSG_ ##r ##_3(buf3); \
G1(row1,row2,row3,row4,buf3); \
LOAD_MSG_ ##r ##_4(buf4); \
G2(row1,row2,row3,row4,buf4); \
UNDIAGONALIZE(row1,row2,row3,row4); \
#endif

View file

@ -0,0 +1,363 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
#include "blake2-config.h"
#include <emmintrin.h>
#if defined(HAVE_SSSE3)
#include <tmmintrin.h>
#endif
#if defined(HAVE_SSE41)
#include <smmintrin.h>
#endif
#if defined(HAVE_AVX)
#include <immintrin.h>
#endif
#if defined(HAVE_XOP)
#include <x86intrin.h>
#endif
#include "blake2s-round.h"
static const uint32_t blake2s_IV[8] =
{
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
};
/* Some helper functions */
static void blake2s_set_lastnode( blake2s_state *S )
{
S->f[1] = (uint32_t)-1;
}
static int blake2s_is_lastblock( const blake2s_state *S )
{
return S->f[0] != 0;
}
static void blake2s_set_lastblock( blake2s_state *S )
{
if( S->last_node ) blake2s_set_lastnode( S );
S->f[0] = (uint32_t)-1;
}
static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
{
uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0];
t += inc;
S->t[0] = ( uint32_t )( t >> 0 );
S->t[1] = ( uint32_t )( t >> 32 );
}
/* init2 xors IV with input parameter block */
int _cryptonite_blake2s_init_param( blake2s_state *S, const blake2s_param *P )
{
size_t i;
/*blake2s_init0( S ); */
const uint8_t * v = ( const uint8_t * )( blake2s_IV );
const uint8_t * p = ( const uint8_t * )( P );
uint8_t * h = ( uint8_t * )( S->h );
/* IV XOR ParamBlock */
memset( S, 0, sizeof( blake2s_state ) );
for( i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
S->outlen = P->digest_length;
return 0;
}
/* Some sort of default parameter block initialization, for sequential blake2s */
int _cryptonite_blake2s_init( blake2s_state *S, size_t outlen )
{
blake2s_param P[1];
/* Move interval verification here? */
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
P->digest_length = (uint8_t)outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, 0 );
store16( &P->xof_length, 0 );
P->node_depth = 0;
P->inner_length = 0;
/* memset(P->reserved, 0, sizeof(P->reserved) ); */
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return _cryptonite_blake2s_init_param( S, P );
}
int _cryptonite_blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen )
{
blake2s_param P[1];
/* Move interval verification here? */
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1;
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, 0 );
store16( &P->xof_length, 0 );
P->node_depth = 0;
P->inner_length = 0;
/* memset(P->reserved, 0, sizeof(P->reserved) ); */
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
if( _cryptonite_blake2s_init_param( S, P ) < 0 )
return -1;
{
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
_cryptonite_blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
}
return 0;
}
static void blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] )
{
__m128i row1, row2, row3, row4;
__m128i buf1, buf2, buf3, buf4;
#if defined(HAVE_SSE41)
__m128i t0, t1;
#if !defined(HAVE_XOP)
__m128i t2;
#endif
#endif
__m128i ff0, ff1;
#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 );
const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 );
#endif
#if defined(HAVE_SSE41)
const __m128i m0 = LOADU( block + 00 );
const __m128i m1 = LOADU( block + 16 );
const __m128i m2 = LOADU( block + 32 );
const __m128i m3 = LOADU( block + 48 );
#else
const uint32_t m0 = load32(block + 0 * sizeof(uint32_t));
const uint32_t m1 = load32(block + 1 * sizeof(uint32_t));
const uint32_t m2 = load32(block + 2 * sizeof(uint32_t));
const uint32_t m3 = load32(block + 3 * sizeof(uint32_t));
const uint32_t m4 = load32(block + 4 * sizeof(uint32_t));
const uint32_t m5 = load32(block + 5 * sizeof(uint32_t));
const uint32_t m6 = load32(block + 6 * sizeof(uint32_t));
const uint32_t m7 = load32(block + 7 * sizeof(uint32_t));
const uint32_t m8 = load32(block + 8 * sizeof(uint32_t));
const uint32_t m9 = load32(block + 9 * sizeof(uint32_t));
const uint32_t m10 = load32(block + 10 * sizeof(uint32_t));
const uint32_t m11 = load32(block + 11 * sizeof(uint32_t));
const uint32_t m12 = load32(block + 12 * sizeof(uint32_t));
const uint32_t m13 = load32(block + 13 * sizeof(uint32_t));
const uint32_t m14 = load32(block + 14 * sizeof(uint32_t));
const uint32_t m15 = load32(block + 15 * sizeof(uint32_t));
#endif
row1 = ff0 = LOADU( &S->h[0] );
row2 = ff1 = LOADU( &S->h[4] );
row3 = _mm_loadu_si128( (__m128i const *)&blake2s_IV[0] );
row4 = _mm_xor_si128( _mm_loadu_si128( (__m128i const *)&blake2s_IV[4] ), LOADU( &S->t[0] ) );
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
ROUND( 3 );
ROUND( 4 );
ROUND( 5 );
ROUND( 6 );
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) );
STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) );
}
int _cryptonite_blake2s_update( blake2s_state *S, const void *pin, size_t inlen )
{
const unsigned char * in = (const unsigned char *)pin;
if( inlen > 0 )
{
size_t left = S->buflen;
size_t fill = BLAKE2S_BLOCKBYTES - left;
if( inlen > fill )
{
S->buflen = 0;
memcpy( S->buf + left, in, fill ); /* Fill buffer */
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
blake2s_compress( S, S->buf ); /* Compress */
in += fill; inlen -= fill;
while(inlen > BLAKE2S_BLOCKBYTES) {
blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES);
blake2s_compress( S, in );
in += BLAKE2S_BLOCKBYTES;
inlen -= BLAKE2S_BLOCKBYTES;
}
}
memcpy( S->buf + S->buflen, in, inlen );
S->buflen += inlen;
}
return 0;
}
int _cryptonite_blake2s_final( blake2s_state *S, void *out, size_t outlen )
{
uint8_t buffer[BLAKE2S_OUTBYTES] = {0};
size_t i;
if( out == NULL || outlen < S->outlen )
return -1;
if( blake2s_is_lastblock( S ) )
return -1;
blake2s_increment_counter( S, (uint32_t)S->buflen );
blake2s_set_lastblock( S );
memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
blake2s_compress( S, S->buf );
for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
memcpy( out, buffer, S->outlen );
secure_zero_memory( buffer, sizeof(buffer) );
return 0;
}
/* inlen, at least, should be uint64_t. Others can be size_t. */
int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
{
blake2s_state S[1];
/* Verify parameters */
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key && keylen > 0) return -1;
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
if( keylen > BLAKE2S_KEYBYTES ) return -1;
if( keylen > 0 )
{
if( _cryptonite_blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
}
else
{
if( _cryptonite_blake2s_init( S, outlen ) < 0 ) return -1;
}
_cryptonite_blake2s_update( S, ( const uint8_t * )in, inlen );
_cryptonite_blake2s_final( S, out, outlen );
return 0;
}
#if defined(SUPERCOP)
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
{
return blake2s( out, BLAKE2S_OUTBYTES, in, inlen, NULL, 0 );
}
#endif
#if defined(BLAKE2S_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( void )
{
uint8_t key[BLAKE2S_KEYBYTES];
uint8_t buf[BLAKE2_KAT_LENGTH];
size_t i, step;
for( i = 0; i < BLAKE2S_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
/* Test simple API */
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
{
uint8_t hash[BLAKE2S_OUTBYTES];
blake2s( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES );
if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) )
{
goto fail;
}
}
/* Test streaming API */
for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
uint8_t hash[BLAKE2S_OUTBYTES];
blake2s_state S;
uint8_t * p = buf;
size_t mlen = i;
int err = 0;
if( (err = _cryptonite_blake2s_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) {
goto fail;
}
while (mlen >= step) {
if ( (err = _cryptonite_blake2s_update(&S, p, step)) < 0 ) {
goto fail;
}
mlen -= step;
p += step;
}
if ( (err = _cryptonite_blake2s_update(&S, p, mlen)) < 0) {
goto fail;
}
if ( (err = _cryptonite_blake2s_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) {
goto fail;
}
if (0 != memcmp(hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES)) {
goto fail;
}
}
}
puts( "ok" );
return 0;
fail:
puts("error");
return -1;
}
#endif

View file

@ -0,0 +1,358 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#if defined(_OPENMP)
#include <omp.h>
#endif
#include "blake2.h"
#include "blake2-impl.h"
#define PARALLELISM_DEGREE 8
/*
blake2sp_init_param defaults to setting the expecting output length
from the digest_length parameter block field.
In some cases, however, we do not want this, as the output length
of these instances is given by inner_length instead.
*/
static int blake2sp_init_leaf_param( blake2s_state *S, const blake2s_param *P )
{
int err = _cryptonite_blake2s_init_param(S, P);
S->outlen = P->inner_length;
return err;
}
static int blake2sp_init_leaf( blake2s_state *S, size_t outlen, size_t keylen, uint64_t offset )
{
blake2s_param P[1];
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = PARALLELISM_DEGREE;
P->depth = 2;
P->leaf_length = 0;
P->node_offset = offset;
P->xof_length = 0;
P->node_depth = 0;
P->inner_length = BLAKE2S_OUTBYTES;
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2sp_init_leaf_param( S, P );
}
static int blake2sp_init_root( blake2s_state *S, size_t outlen, size_t keylen )
{
blake2s_param P[1];
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = PARALLELISM_DEGREE;
P->depth = 2;
P->leaf_length = 0;
P->node_offset = 0;
P->xof_length = 0;
P->node_depth = 1;
P->inner_length = BLAKE2S_OUTBYTES;
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return _cryptonite_blake2s_init_param( S, P );
}
int _cryptonite_blake2sp_init( blake2sp_state *S, size_t outlen )
{
size_t i;
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
S->outlen = outlen;
if( blake2sp_init_root( S->R, outlen, 0 ) < 0 )
return -1;
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
return 0;
}
int _cryptonite_blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen )
{
size_t i;
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
if( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
S->outlen = outlen;
if( blake2sp_init_root( S->R, outlen, keylen ) < 0 )
return -1;
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
{
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
for( i = 0; i < PARALLELISM_DEGREE; ++i )
_cryptonite_blake2s_update( S->S[i], block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
}
return 0;
}
int _cryptonite_blake2sp_update( blake2sp_state *S, const void *pin, size_t inlen )
{
const unsigned char * in = (const unsigned char *)pin;
size_t left = S->buflen;
size_t fill = sizeof( S->buf ) - left;
size_t i;
if( left && inlen >= fill )
{
memcpy( S->buf + left, in, fill );
for( i = 0; i < PARALLELISM_DEGREE; ++i )
_cryptonite_blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES );
in += fill;
inlen -= fill;
left = 0;
}
#if defined(_OPENMP)
#pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE)
#else
for( i = 0; i < PARALLELISM_DEGREE; ++i )
#endif
{
#if defined(_OPENMP)
size_t i = omp_get_thread_num();
#endif
size_t inlen__ = inlen;
const unsigned char *in__ = ( const unsigned char * )in;
in__ += i * BLAKE2S_BLOCKBYTES;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
{
_cryptonite_blake2s_update( S->S[i], in__, BLAKE2S_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
}
}
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES );
inlen %= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
if( inlen > 0 )
memcpy( S->buf + left, in, inlen );
S->buflen = left + inlen;
return 0;
}
int _cryptonite_blake2sp_final( blake2sp_state *S, void *out, size_t outlen )
{
uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
size_t i;
if(out == NULL || outlen < S->outlen) {
return -1;
}
for( i = 0; i < PARALLELISM_DEGREE; ++i )
{
if( S->buflen > i * BLAKE2S_BLOCKBYTES )
{
size_t left = S->buflen - i * BLAKE2S_BLOCKBYTES;
if( left > BLAKE2S_BLOCKBYTES ) left = BLAKE2S_BLOCKBYTES;
_cryptonite_blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, left );
}
_cryptonite_blake2s_final( S->S[i], hash[i], BLAKE2S_OUTBYTES );
}
for( i = 0; i < PARALLELISM_DEGREE; ++i )
_cryptonite_blake2s_update( S->R, hash[i], BLAKE2S_OUTBYTES );
return _cryptonite_blake2s_final( S->R, out, S->outlen );
}
int _cryptonite_blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
{
uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
blake2s_state S[PARALLELISM_DEGREE][1];
blake2s_state FS[1];
size_t i;
/* Verify parameters */
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key && keylen > 0) return -1;
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
if( keylen > BLAKE2S_KEYBYTES ) return -1;
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1;
S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */
if( keylen > 0 )
{
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
for( i = 0; i < PARALLELISM_DEGREE; ++i )
_cryptonite_blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
}
#if defined(_OPENMP)
#pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE)
#else
for( i = 0; i < PARALLELISM_DEGREE; ++i )
#endif
{
#if defined(_OPENMP)
size_t i = omp_get_thread_num();
#endif
size_t inlen__ = inlen;
const unsigned char *in__ = ( const unsigned char * )in;
in__ += i * BLAKE2S_BLOCKBYTES;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
{
_cryptonite_blake2s_update( S[i], in__, BLAKE2S_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
}
if( inlen__ > i * BLAKE2S_BLOCKBYTES )
{
const size_t left = inlen__ - i * BLAKE2S_BLOCKBYTES;
const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES;
_cryptonite_blake2s_update( S[i], in__, len );
}
_cryptonite_blake2s_final( S[i], hash[i], BLAKE2S_OUTBYTES );
}
if( blake2sp_init_root( FS, outlen, keylen ) < 0 )
return -1;
FS->last_node = 1;
for( i = 0; i < PARALLELISM_DEGREE; ++i )
_cryptonite_blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES );
return _cryptonite_blake2s_final( FS, out, outlen );
}
#if defined(BLAKE2SP_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( void )
{
uint8_t key[BLAKE2S_KEYBYTES];
uint8_t buf[BLAKE2_KAT_LENGTH];
size_t i, step;
for( i = 0; i < BLAKE2S_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
/* Test simple API */
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
{
uint8_t hash[BLAKE2S_OUTBYTES];
_cryptonite_blake2sp( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES );
if( 0 != memcmp( hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES ) )
{
goto fail;
}
}
/* Test streaming API */
for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
uint8_t hash[BLAKE2S_OUTBYTES];
blake2sp_state S;
uint8_t * p = buf;
size_t mlen = i;
int err = 0;
if( (err = _cryptonite_blake2sp_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) {
goto fail;
}
while (mlen >= step) {
if ( (err = _cryptonite_blake2sp_update(&S, p, step)) < 0 ) {
goto fail;
}
mlen -= step;
p += step;
}
if ( (err = _cryptonite_blake2sp_update(&S, p, mlen)) < 0) {
goto fail;
}
if ( (err = _cryptonite_blake2sp_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) {
goto fail;
}
if (0 != memcmp(hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES)) {
goto fail;
}
}
}
puts( "ok" );
return 0;
fail:
puts("error");
return -1;
}
#endif

74
src/Encryption.hs Normal file
View file

@ -0,0 +1,74 @@
{-# LANGUAGE OverloadedStrings #-}
module Encryption where
import Control.Monad
import Crypto.Error
import Crypto.KDF.Argon2
import Crypto.Random.Entropy
import Data.Binary.Get (getWord32be, runGet)
import Data.Binary.Put (putWord32be, runPut)
import Data.Bits (xor)
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC8
import qualified Data.ByteString.Lazy as BL
import Data.Word (Word32)
import System.CPUTime
import System.IO
-- picoseconds
measureIteration = do
start <- getCPUTime
throwaway <- throwCryptoErrorIO $ hash defaultOptions ("666" :: ByteString) ("solasgsdgdsgsd" :: ByteString) 32 :: IO ByteString
end <- seq throwaway getCPUTime
pure $ fromIntegral (end - start) / 1e12
getPassword = do
tty <- openFile "/dev/tty" ReadWriteMode
BC8.hPutStr tty "Password: "
hFlush tty
old <- hGetEcho tty
hSetEcho tty False
response <- BC8.hGetLine tty
hSetEcho tty old
BC8.hPutStrLn tty ""
hClose tty
pure response
xorWithKey :: ByteString -> ByteString -> ByteString
xorWithKey key bs
| B.null key = error "key must not be empty"
| otherwise = B.pack $ B.zipWith xor bs key
-- format: E[16-byte salt][4-byte big-endian iterations][ciphertext]
encryptData :: ByteString -> IO ByteString
encryptData privkey = do
iterationTime <- measureIteration
print iterationTime
let iterations = ceiling $ 1 / iterationTime
print iterations
salt <- getEntropy 16 :: IO ByteString
--let password = "password" :: ByteString
password <- getPassword
key <- throwCryptoErrorIO $ hash defaultOptions { iterations = iterations } password salt 56 :: IO ByteString
let ciphertext = xorWithKey key privkey
-- encode iterations as Word32 big-endian
let iterWord :: Word32
iterWord = fromIntegral iterations
iterBytes = BL.toStrict $ runPut (putWord32be iterWord)
pure $ B.concat ["E", salt, iterBytes, ciphertext]
decryptData :: ByteString -> IO ByteString
decryptData contents = do
password <- getPassword
-- parse salt (16 bytes), iterations (4 bytes BE), then ciphertext
when (B.length contents < 76) $ error "encrypted key is unexpectedly short"
let (salt, rest) = B.splitAt 16 contents
(iterBytes, ciphertext) = B.splitAt 4 rest
iterations = runGet getWord32be (BL.fromStrict iterBytes)
key <- throwCryptoErrorIO $ hash defaultOptions { iterations = iterations } password salt 56 :: IO ByteString
let plaintext = xorWithKey key ciphertext
case B.head plaintext of
0x53 -> pure plaintext
_ -> error "wrong password"

View file

@ -7,16 +7,30 @@ import qualified Data.ByteString as B
import qualified Data.ByteString.Base64 as B64
import qualified Data.ByteString.Char8 as BC8
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.IO as T
import Network.ONCRPC.XDR.Serial
import Network.Stellar.TransactionXdr
import qualified Stellar.Simple as S
import System.Directory
import System.Environment
import System.IO
import Encryption
import Pretty
main = do
args <- getArgs
case args of
["encrypt"] -> encrypt
_ -> sign
encrypt = do
encryptedFile <- T.encodeUtf8 <$> getPrivateKey
path <- getPrivateKeyPath
B.writeFile path =<< encryptData encryptedFile
sign = do
transactionB64 <- B.getContents
let transaction = case B64.decode $ BC8.strip transactionB64 of
Left err -> error err
@ -31,10 +45,17 @@ main = do
let sd = S.signWithSecret key parsedTransaction
T.putStrLn $ S.xdrSerializeBase64T sd
getPrivateKey = do
getPrivateKeyPath :: IO FilePath
getPrivateKeyPath = do
home <- getHomeDirectory
keyFile <- T.readFile $ home ++ "/.stellar-veritas-key"
pure $ T.strip keyFile
pure $ home ++ "/.stellar-veritas-key"
getPrivateKey = do
file <- B.readFile =<< getPrivateKeyPath
case B.head file of
0x53 -> pure $ T.strip $ T.decodeUtf8 file -- bare unencrypted secret key
0x45 -> T.decodeUtf8 <$> decryptData (B.tail file)
_ -> error "garbled private key file"
confirm :: IO Bool
confirm = do

View file

@ -13,6 +13,7 @@ tested-with: GHC == 9.12.3
executable stellar-veritas
main-is: Main.hs
CPP-options: -DWITH_BYTESTRING_SUPPORT
ghc-options: -W -Wcompat -Wno-missing-home-modules -fno-warn-tabs -rtsopts=ignoreAll -optl-Wl,-s
other-modules:
build-depends: base >= 4.9 && < 5,
@ -46,4 +47,12 @@ executable stellar-veritas
TypeOperators
UnliftedFFITypes
c-sources: bundled/cbits/ref10/ed25519.c
include-dirs: bundled/cbits/ref10 bundled/cbits/ref10/include
bundled/cbits/argon2/argon2.c
bundled/cbits/blake2/sse/blake2s.c
bundled/cbits/blake2/sse/blake2sp.c
bundled/cbits/blake2/sse/blake2b.c
bundled/cbits/blake2/sse/blake2bp.c
include-dirs: bundled/cbits/ref10
bundled/cbits/ref10/include
bundled/cbits/argon2
bundled/cbits/blake2/sse