From 0979e178d55f6fbcdebab4eec4b3f222548645c0 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Tue, 10 Dec 2024 23:37:28 +0800 Subject: [PATCH 1/5] Refactor KEX Common algorithm specific code is moved from common-kex.c to kex-*.c send_msg_kexdh_init() is simplified - parameters are no longer reused between first-follows and a subsequent kexdh_init. The default gen_kexcurve25519_param() is relatively fast, so recomputing is OK. --- Makefile.in | 3 +- src/algo.h | 1 + src/cli-kex.c | 70 ++++++-------- src/common-kex.c | 233 ----------------------------------------------- src/kex-dh.c | 120 ++++++++++++++++++++++++ src/kex-ecdh.c | 64 +++++++++++++ src/kex-x25519.c | 74 +++++++++++++++ src/session.h | 2 - 8 files changed, 290 insertions(+), 277 deletions(-) create mode 100644 src/kex-dh.c create mode 100644 src/kex-ecdh.c create mode 100644 src/kex-x25519.c diff --git a/Makefile.in b/Makefile.in index 5ebfca2e3..eb2bfc5af 100644 --- a/Makefile.in +++ b/Makefile.in @@ -63,7 +63,8 @@ CLIOBJS = $(patsubst %,$(OBJ_DIR)/%,$(_CLIOBJS)) _CLISVROBJS=common-session.o packet.o common-algo.o common-kex.o \ common-channel.o common-chansession.o termcodes.o loginrec.o \ tcp-accept.o listener.o process-packet.o dh_groups.o \ - common-runopts.o circbuffer.o list.o netio.o chachapoly.o gcm.o + common-runopts.o circbuffer.o list.o netio.o chachapoly.o gcm.o \ + kex-x25519.o kex-dh.o kex-ecdh.o CLISVROBJS = $(patsubst %,$(OBJ_DIR)/%,$(_CLISVROBJS)) _KEYOBJS=dropbearkey.o diff --git a/src/algo.h b/src/algo.h index c46b40985..1055601cb 100644 --- a/src/algo.h +++ b/src/algo.h @@ -113,6 +113,7 @@ struct dropbear_kex { #if DROPBEAR_ECDH const struct dropbear_ecc_curve *ecc_curve; #else + /* NULL for non-ecc curves */ const void* dummy; #endif diff --git a/src/cli-kex.c b/src/cli-kex.c index 6d2e4ac2d..a5d7accc2 100644 --- a/src/cli-kex.c +++ b/src/cli-kex.c @@ -42,6 +42,27 @@ static void checkhostkey(const unsigned char* keyblob, unsigned int keybloblen); #define MAX_KNOWNHOSTS_LINE 4500 +static void cli_kex_free_param(void) { +#if DROPBEAR_NORMAL_DH + if (cli_ses.dh_param) { + free_kexdh_param(cli_ses.dh_param); + cli_ses.dh_param = NULL; + } +#endif +#if DROPBEAR_ECDH + if (cli_ses.ecdh_param) { + free_kexecdh_param(cli_ses.ecdh_param); + cli_ses.ecdh_param = NULL; + } +#endif +#if DROPBEAR_CURVE25519 + if (cli_ses.curve25519_param) { + free_kexcurve25519_param(cli_ses.curve25519_param); + cli_ses.curve25519_param = NULL; + } +#endif +} + void send_msg_kexdh_init() { TRACE(("send_msg_kexdh_init()")) @@ -53,47 +74,30 @@ void send_msg_kexdh_init() { } #endif + cli_kex_free_param(); + buf_putbyte(ses.writepayload, SSH_MSG_KEXDH_INIT); switch (ses.newkeys->algo_kex->mode) { #if DROPBEAR_NORMAL_DH case DROPBEAR_KEX_NORMAL_DH: - if (ses.newkeys->algo_kex != cli_ses.param_kex_algo - || !cli_ses.dh_param) { - if (cli_ses.dh_param) { - free_kexdh_param(cli_ses.dh_param); - } - cli_ses.dh_param = gen_kexdh_param(); - } + cli_ses.dh_param = gen_kexdh_param(); buf_putmpint(ses.writepayload, &cli_ses.dh_param->pub); break; #endif #if DROPBEAR_ECDH case DROPBEAR_KEX_ECDH: - if (ses.newkeys->algo_kex != cli_ses.param_kex_algo - || !cli_ses.ecdh_param) { - if (cli_ses.ecdh_param) { - free_kexecdh_param(cli_ses.ecdh_param); - } - cli_ses.ecdh_param = gen_kexecdh_param(); - } + cli_ses.ecdh_param = gen_kexecdh_param(); buf_put_ecc_raw_pubkey_string(ses.writepayload, &cli_ses.ecdh_param->key); break; #endif #if DROPBEAR_CURVE25519 case DROPBEAR_KEX_CURVE25519: - if (ses.newkeys->algo_kex != cli_ses.param_kex_algo - || !cli_ses.curve25519_param) { - if (cli_ses.curve25519_param) { - free_kexcurve25519_param(cli_ses.curve25519_param); - } - cli_ses.curve25519_param = gen_kexcurve25519_param(); - } + cli_ses.curve25519_param = gen_kexcurve25519_param(); buf_putstring(ses.writepayload, cli_ses.curve25519_param->pub, CURVE25519_LEN); break; #endif } - cli_ses.param_kex_algo = ses.newkeys->algo_kex; encrypt_packet(); } @@ -132,6 +136,7 @@ void recv_msg_kexdh_reply() { dropbear_exit("Bad KEX packet"); } + /* Derive the shared secret */ switch (ses.newkeys->algo_kex->mode) { #if DROPBEAR_NORMAL_DH case DROPBEAR_KEX_NORMAL_DH: @@ -168,26 +173,9 @@ void recv_msg_kexdh_reply() { #endif } -#if DROPBEAR_NORMAL_DH - if (cli_ses.dh_param) { - free_kexdh_param(cli_ses.dh_param); - cli_ses.dh_param = NULL; - } -#endif -#if DROPBEAR_ECDH - if (cli_ses.ecdh_param) { - free_kexecdh_param(cli_ses.ecdh_param); - cli_ses.ecdh_param = NULL; - } -#endif -#if DROPBEAR_CURVE25519 - if (cli_ses.curve25519_param) { - free_kexcurve25519_param(cli_ses.curve25519_param); - cli_ses.curve25519_param = NULL; - } -#endif + /* Clear the local parameter */ + cli_kex_free_param(); - cli_ses.param_kex_algo = NULL; if (buf_verify(ses.payload, hostkey, ses.newkeys->algo_signature, ses.hash) != DROPBEAR_SUCCESS) { dropbear_exit("Bad hostkey signature"); diff --git a/src/common-kex.c b/src/common-kex.c index c7b8f8db3..bc65d3f0e 100644 --- a/src/common-kex.c +++ b/src/common-kex.c @@ -29,15 +29,11 @@ #include "buffer.h" #include "session.h" #include "kex.h" -#include "dh_groups.h" #include "ssh.h" #include "packet.h" #include "bignum.h" #include "dbrandom.h" #include "runopts.h" -#include "ecc.h" -#include "curve25519.h" -#include "crypto_desc.h" static void kexinitialise(void); static void gen_new_keys(void); @@ -572,235 +568,6 @@ void recv_msg_kexinit() { TRACE(("leave recv_msg_kexinit")) } -#if DROPBEAR_NORMAL_DH -static void load_dh_p(mp_int * dh_p) -{ - bytes_to_mp(dh_p, ses.newkeys->algo_kex->dh_p_bytes, - ses.newkeys->algo_kex->dh_p_len); -} - -/* Initialises and generate one side of the diffie-hellman key exchange values. - * See the transport rfc 4253 section 8 for details */ -/* dh_pub and dh_priv MUST be already initialised */ -struct kex_dh_param *gen_kexdh_param() { - struct kex_dh_param *param = NULL; - - DEF_MP_INT(dh_p); - DEF_MP_INT(dh_q); - DEF_MP_INT(dh_g); - - TRACE(("enter gen_kexdh_vals")) - - param = m_malloc(sizeof(*param)); - m_mp_init_multi(¶m->pub, ¶m->priv, &dh_g, &dh_p, &dh_q, NULL); - - /* read the prime and generator*/ - load_dh_p(&dh_p); - - mp_set_ul(&dh_g, DH_G_VAL); - - /* calculate q = (p-1)/2 */ - /* dh_priv is just a temp var here */ - if (mp_sub_d(&dh_p, 1, ¶m->priv) != MP_OKAY) { - dropbear_exit("Diffie-Hellman error"); - } - if (mp_div_2(¶m->priv, &dh_q) != MP_OKAY) { - dropbear_exit("Diffie-Hellman error"); - } - - /* Generate a private portion 0 < dh_priv < dh_q */ - gen_random_mpint(&dh_q, ¶m->priv); - - /* f = g^y mod p */ - if (mp_exptmod(&dh_g, ¶m->priv, &dh_p, ¶m->pub) != MP_OKAY) { - dropbear_exit("Diffie-Hellman error"); - } - mp_clear_multi(&dh_g, &dh_p, &dh_q, NULL); - return param; -} - -void free_kexdh_param(struct kex_dh_param *param) -{ - mp_clear_multi(¶m->pub, ¶m->priv, NULL); - m_free(param); -} - -/* This function is fairly common between client/server, with some substitution - * of dh_e/dh_f etc. Hence these arguments: - * dh_pub_us is 'e' for the client, 'f' for the server. dh_pub_them is - * vice-versa. dh_priv is the x/y value corresponding to dh_pub_us */ -void kexdh_comb_key(struct kex_dh_param *param, mp_int *dh_pub_them, - sign_key *hostkey) { - - DEF_MP_INT(dh_p); - DEF_MP_INT(dh_p_min1); - mp_int *dh_e = NULL, *dh_f = NULL; - - m_mp_init_multi(&dh_p, &dh_p_min1, NULL); - load_dh_p(&dh_p); - - if (mp_sub_d(&dh_p, 1, &dh_p_min1) != MP_OKAY) { - dropbear_exit("Diffie-Hellman error"); - } - - /* Check that dh_pub_them (dh_e or dh_f) is in the range [2, p-2] */ - if (mp_cmp(dh_pub_them, &dh_p_min1) != MP_LT - || mp_cmp_d(dh_pub_them, 1) != MP_GT) { - dropbear_exit("Diffie-Hellman error"); - } - - /* K = e^y mod p = f^x mod p */ - m_mp_alloc_init_multi(&ses.dh_K, NULL); - if (mp_exptmod(dh_pub_them, ¶m->priv, &dh_p, ses.dh_K) != MP_OKAY) { - dropbear_exit("Diffie-Hellman error"); - } - - /* clear no longer needed vars */ - mp_clear_multi(&dh_p, &dh_p_min1, NULL); - - /* From here on, the code needs to work with the _same_ vars on each side, - * not vice-versaing for client/server */ - if (IS_DROPBEAR_CLIENT) { - dh_e = ¶m->pub; - dh_f = dh_pub_them; - } else { - dh_e = dh_pub_them; - dh_f = ¶m->pub; - } - - /* Create the remainder of the hash buffer, to generate the exchange hash */ - /* K_S, the host key */ - buf_put_pub_key(ses.kexhashbuf, hostkey, ses.newkeys->algo_hostkey); - /* e, exchange value sent by the client */ - buf_putmpint(ses.kexhashbuf, dh_e); - /* f, exchange value sent by the server */ - buf_putmpint(ses.kexhashbuf, dh_f); - /* K, the shared secret */ - buf_putmpint(ses.kexhashbuf, ses.dh_K); - - /* calculate the hash H to sign */ - finish_kexhashbuf(); -} -#endif - -#if DROPBEAR_ECDH -struct kex_ecdh_param *gen_kexecdh_param() { - struct kex_ecdh_param *param = m_malloc(sizeof(*param)); - if (ecc_make_key_ex(NULL, dropbear_ltc_prng, - ¶m->key, ses.newkeys->algo_kex->ecc_curve->dp) != CRYPT_OK) { - dropbear_exit("ECC error"); - } - return param; -} - -void free_kexecdh_param(struct kex_ecdh_param *param) { - ecc_free(¶m->key); - m_free(param); - -} -void kexecdh_comb_key(struct kex_ecdh_param *param, buffer *pub_them, - sign_key *hostkey) { - const struct dropbear_kex *algo_kex = ses.newkeys->algo_kex; - /* public keys from client and server */ - ecc_key *Q_C, *Q_S, *Q_them; - - Q_them = buf_get_ecc_raw_pubkey(pub_them, algo_kex->ecc_curve); - if (Q_them == NULL) { - dropbear_exit("ECC error"); - } - - ses.dh_K = dropbear_ecc_shared_secret(Q_them, ¶m->key); - - /* Create the remainder of the hash buffer, to generate the exchange hash - See RFC5656 section 4 page 7 */ - if (IS_DROPBEAR_CLIENT) { - Q_C = ¶m->key; - Q_S = Q_them; - } else { - Q_C = Q_them; - Q_S = ¶m->key; - } - - /* K_S, the host key */ - buf_put_pub_key(ses.kexhashbuf, hostkey, ses.newkeys->algo_hostkey); - /* Q_C, client's ephemeral public key octet string */ - buf_put_ecc_raw_pubkey_string(ses.kexhashbuf, Q_C); - /* Q_S, server's ephemeral public key octet string */ - buf_put_ecc_raw_pubkey_string(ses.kexhashbuf, Q_S); - /* K, the shared secret */ - buf_putmpint(ses.kexhashbuf, ses.dh_K); - - ecc_free(Q_them); - m_free(Q_them); - - /* calculate the hash H to sign */ - finish_kexhashbuf(); -} -#endif /* DROPBEAR_ECDH */ - -#if DROPBEAR_CURVE25519 -struct kex_curve25519_param *gen_kexcurve25519_param() { - /* Per http://cr.yp.to/ecdh.html */ - struct kex_curve25519_param *param = m_malloc(sizeof(*param)); - const unsigned char basepoint[32] = {9}; - - genrandom(param->priv, CURVE25519_LEN); - dropbear_curve25519_scalarmult(param->pub, param->priv, basepoint); - - return param; -} - -void free_kexcurve25519_param(struct kex_curve25519_param *param) { - m_burn(param->priv, CURVE25519_LEN); - m_free(param); -} - -void kexcurve25519_comb_key(const struct kex_curve25519_param *param, const buffer *buf_pub_them, - sign_key *hostkey) { - unsigned char out[CURVE25519_LEN]; - const unsigned char* Q_C = NULL; - const unsigned char* Q_S = NULL; - char zeroes[CURVE25519_LEN] = {0}; - - if (buf_pub_them->len != CURVE25519_LEN) - { - dropbear_exit("Bad curve25519"); - } - - dropbear_curve25519_scalarmult(out, param->priv, buf_pub_them->data); - - if (constant_time_memcmp(zeroes, out, CURVE25519_LEN) == 0) { - dropbear_exit("Bad curve25519"); - } - - m_mp_alloc_init_multi(&ses.dh_K, NULL); - bytes_to_mp(ses.dh_K, out, CURVE25519_LEN); - m_burn(out, sizeof(out)); - - /* Create the remainder of the hash buffer, to generate the exchange hash. - See RFC5656 section 4 page 7 */ - if (IS_DROPBEAR_CLIENT) { - Q_C = param->pub; - Q_S = buf_pub_them->data; - } else { - Q_S = param->pub; - Q_C = buf_pub_them->data; - } - - /* K_S, the host key */ - buf_put_pub_key(ses.kexhashbuf, hostkey, ses.newkeys->algo_hostkey); - /* Q_C, client's ephemeral public key octet string */ - buf_putstring(ses.kexhashbuf, (const char*)Q_C, CURVE25519_LEN); - /* Q_S, server's ephemeral public key octet string */ - buf_putstring(ses.kexhashbuf, (const char*)Q_S, CURVE25519_LEN); - /* K, the shared secret */ - buf_putmpint(ses.kexhashbuf, ses.dh_K); - - /* calculate the hash H to sign */ - finish_kexhashbuf(); -} -#endif /* DROPBEAR_CURVE25519 */ - void finish_kexhashbuf(void) { hash_state hs; diff --git a/src/kex-dh.c b/src/kex-dh.c new file mode 100644 index 000000000..fa289a60b --- /dev/null +++ b/src/kex-dh.c @@ -0,0 +1,120 @@ +#include "includes.h" +#include "algo.h" +#include "buffer.h" +#include "session.h" +#include "bignum.h" +#include "dbrandom.h" +#include "crypto_desc.h" +#include "dh_groups.h" +#include "kex.h" + +#if DROPBEAR_NORMAL_DH +static void load_dh_p(mp_int * dh_p) +{ + bytes_to_mp(dh_p, ses.newkeys->algo_kex->dh_p_bytes, + ses.newkeys->algo_kex->dh_p_len); +} + +/* Initialises and generate one side of the diffie-hellman key exchange values. + * See the transport rfc 4253 section 8 for details */ +/* dh_pub and dh_priv MUST be already initialised */ +struct kex_dh_param *gen_kexdh_param() { + struct kex_dh_param *param = NULL; + + DEF_MP_INT(dh_p); + DEF_MP_INT(dh_q); + DEF_MP_INT(dh_g); + + TRACE(("enter gen_kexdh_vals")) + + param = m_malloc(sizeof(*param)); + m_mp_init_multi(¶m->pub, ¶m->priv, &dh_g, &dh_p, &dh_q, NULL); + + /* read the prime and generator*/ + load_dh_p(&dh_p); + + mp_set_ul(&dh_g, DH_G_VAL); + + /* calculate q = (p-1)/2 */ + /* dh_priv is just a temp var here */ + if (mp_sub_d(&dh_p, 1, ¶m->priv) != MP_OKAY) { + dropbear_exit("Diffie-Hellman error"); + } + if (mp_div_2(¶m->priv, &dh_q) != MP_OKAY) { + dropbear_exit("Diffie-Hellman error"); + } + + /* Generate a private portion 0 < dh_priv < dh_q */ + gen_random_mpint(&dh_q, ¶m->priv); + + /* f = g^y mod p */ + if (mp_exptmod(&dh_g, ¶m->priv, &dh_p, ¶m->pub) != MP_OKAY) { + dropbear_exit("Diffie-Hellman error"); + } + mp_clear_multi(&dh_g, &dh_p, &dh_q, NULL); + return param; +} + +void free_kexdh_param(struct kex_dh_param *param) +{ + mp_clear_multi(¶m->pub, ¶m->priv, NULL); + m_free(param); +} + +/* This function is fairly common between client/server, with some substitution + * of dh_e/dh_f etc. Hence these arguments: + * dh_pub_us is 'e' for the client, 'f' for the server. dh_pub_them is + * vice-versa. dh_priv is the x/y value corresponding to dh_pub_us */ +void kexdh_comb_key(struct kex_dh_param *param, mp_int *dh_pub_them, + sign_key *hostkey) { + + DEF_MP_INT(dh_p); + DEF_MP_INT(dh_p_min1); + mp_int *dh_e = NULL, *dh_f = NULL; + + m_mp_init_multi(&dh_p, &dh_p_min1, NULL); + load_dh_p(&dh_p); + + if (mp_sub_d(&dh_p, 1, &dh_p_min1) != MP_OKAY) { + dropbear_exit("Diffie-Hellman error"); + } + + /* Check that dh_pub_them (dh_e or dh_f) is in the range [2, p-2] */ + if (mp_cmp(dh_pub_them, &dh_p_min1) != MP_LT + || mp_cmp_d(dh_pub_them, 1) != MP_GT) { + dropbear_exit("Diffie-Hellman error"); + } + + /* K = e^y mod p = f^x mod p */ + m_mp_alloc_init_multi(&ses.dh_K, NULL); + if (mp_exptmod(dh_pub_them, ¶m->priv, &dh_p, ses.dh_K) != MP_OKAY) { + dropbear_exit("Diffie-Hellman error"); + } + + /* clear no longer needed vars */ + mp_clear_multi(&dh_p, &dh_p_min1, NULL); + + /* From here on, the code needs to work with the _same_ vars on each side, + * not vice-versaing for client/server */ + if (IS_DROPBEAR_CLIENT) { + dh_e = ¶m->pub; + dh_f = dh_pub_them; + } else { + dh_e = dh_pub_them; + dh_f = ¶m->pub; + } + + /* Create the remainder of the hash buffer, to generate the exchange hash */ + /* K_S, the host key */ + buf_put_pub_key(ses.kexhashbuf, hostkey, ses.newkeys->algo_hostkey); + /* e, exchange value sent by the client */ + buf_putmpint(ses.kexhashbuf, dh_e); + /* f, exchange value sent by the server */ + buf_putmpint(ses.kexhashbuf, dh_f); + /* K, the shared secret */ + buf_putmpint(ses.kexhashbuf, ses.dh_K); + + /* calculate the hash H to sign */ + finish_kexhashbuf(); +} +#endif diff --git a/src/kex-ecdh.c b/src/kex-ecdh.c new file mode 100644 index 000000000..668be9a67 --- /dev/null +++ b/src/kex-ecdh.c @@ -0,0 +1,64 @@ +#include "includes.h" +#include "algo.h" +#include "buffer.h" +#include "session.h" +#include "dbrandom.h" +#include "crypto_desc.h" +#include "ecc.h" +#include "kex.h" + +#if DROPBEAR_ECDH +struct kex_ecdh_param *gen_kexecdh_param() { + struct kex_ecdh_param *param = m_malloc(sizeof(*param)); + if (ecc_make_key_ex(NULL, dropbear_ltc_prng, + ¶m->key, ses.newkeys->algo_kex->ecc_curve->dp) != CRYPT_OK) { + dropbear_exit("ECC error"); + } + return param; +} + +void free_kexecdh_param(struct kex_ecdh_param *param) { + ecc_free(¶m->key); + m_free(param); + +} +void kexecdh_comb_key(struct kex_ecdh_param *param, buffer *pub_them, + sign_key *hostkey) { + const struct dropbear_kex *algo_kex = ses.newkeys->algo_kex; + /* public keys from client and server */ + ecc_key *Q_C, *Q_S, *Q_them; + + Q_them = buf_get_ecc_raw_pubkey(pub_them, algo_kex->ecc_curve); + if (Q_them == NULL) { + dropbear_exit("ECC error"); + } + + ses.dh_K = dropbear_ecc_shared_secret(Q_them, ¶m->key); + + /* Create the remainder of the hash buffer, to generate the exchange hash + See RFC5656 section 4 page 7 */ + if (IS_DROPBEAR_CLIENT) { + Q_C = ¶m->key; + Q_S = Q_them; + } else { + Q_C = Q_them; + Q_S = ¶m->key; + } + + /* K_S, the host key */ + buf_put_pub_key(ses.kexhashbuf, hostkey, ses.newkeys->algo_hostkey); + /* Q_C, client's ephemeral public key octet string */ + buf_put_ecc_raw_pubkey_string(ses.kexhashbuf, Q_C); + /* Q_S, server's ephemeral public key octet string */ + buf_put_ecc_raw_pubkey_string(ses.kexhashbuf, Q_S); + /* K, the shared secret */ + buf_putmpint(ses.kexhashbuf, ses.dh_K); + + ecc_free(Q_them); + m_free(Q_them); + + /* calculate the hash H to sign */ + finish_kexhashbuf(); +} +#endif /* DROPBEAR_ECDH */ + diff --git a/src/kex-x25519.c b/src/kex-x25519.c new file mode 100644 index 000000000..b7bc8ebad --- /dev/null +++ b/src/kex-x25519.c @@ -0,0 +1,74 @@ +#include "includes.h" +#include "algo.h" +#include "buffer.h" +#include "session.h" +#include "bignum.h" +#include "dbrandom.h" +#include "crypto_desc.h" +#include "curve25519.h" +#include "kex.h" + +#if DROPBEAR_CURVE25519 + +struct kex_curve25519_param *gen_kexcurve25519_param() { + /* Per http://cr.yp.to/ecdh.html */ + struct kex_curve25519_param *param = m_malloc(sizeof(*param)); + const unsigned char basepoint[32] = {9}; + + genrandom(param->priv, CURVE25519_LEN); + dropbear_curve25519_scalarmult(param->pub, param->priv, basepoint); + + return param; +} + +void free_kexcurve25519_param(struct kex_curve25519_param *param) { + m_burn(param->priv, CURVE25519_LEN); + m_free(param); +} + +void kexcurve25519_comb_key(const struct kex_curve25519_param *param, const buffer *buf_pub_them, + sign_key *hostkey) { + unsigned char out[CURVE25519_LEN]; + const unsigned char* Q_C = NULL; + const unsigned char* Q_S = NULL; + char zeroes[CURVE25519_LEN] = {0}; + + if (buf_pub_them->len != CURVE25519_LEN) + { + dropbear_exit("Bad curve25519"); + } + + dropbear_curve25519_scalarmult(out, param->priv, buf_pub_them->data); + + if (constant_time_memcmp(zeroes, out, CURVE25519_LEN) == 0) { + dropbear_exit("Bad curve25519"); + } + + m_mp_alloc_init_multi(&ses.dh_K, NULL); + bytes_to_mp(ses.dh_K, out, CURVE25519_LEN); + m_burn(out, sizeof(out)); + + /* Create the remainder of the hash buffer, to generate the exchange hash. + See RFC5656 section 4 page 7 */ + if (IS_DROPBEAR_CLIENT) { + Q_C = param->pub; + Q_S = buf_pub_them->data; + } else { + Q_S = param->pub; + Q_C = buf_pub_them->data; + } + + /* K_S, the host key */ + buf_put_pub_key(ses.kexhashbuf, hostkey, ses.newkeys->algo_hostkey); + /* Q_C, client's ephemeral public key octet string */ + buf_putstring(ses.kexhashbuf, (const char*)Q_C, CURVE25519_LEN); + /* Q_S, server's ephemeral public key octet string */ + buf_putstring(ses.kexhashbuf, (const char*)Q_S, CURVE25519_LEN); + /* K, the shared secret */ + buf_putmpint(ses.kexhashbuf, ses.dh_K); + + /* calculate the hash H to sign */ + finish_kexhashbuf(); +} + +#endif /* DROPBEAR_CURVE25519 */ diff --git a/src/session.h b/src/session.h index 08b5ad5ee..570a86593 100644 --- a/src/session.h +++ b/src/session.h @@ -292,11 +292,9 @@ typedef enum { struct clientsession { - /* XXX - move these to kexstate? */ struct kex_dh_param *dh_param; struct kex_ecdh_param *ecdh_param; struct kex_curve25519_param *curve25519_param; - const struct dropbear_kex *param_kex_algo; /* KEX algorithm corresponding to current dh_e and dh_x */ cli_kex_state kex_state; /* Used for progressing KEX */ cli_state state; /* Used to progress auth/channelsession etc */ From f65f423432dd40a9b3430d3c9555af812e1055df Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Fri, 13 Dec 2024 23:06:28 +0800 Subject: [PATCH 2/5] Add general post-quantum hybrid key exchange This implements draft-kampanakis-curdle-ssh-pq-ke-04, a mechanism combining a post-quantum KEM with x25519. Adding a post-quantum KEM can avoid harvest-now-decrypt-later style attacks (captured traffic decrypted in future by a quantum computer, if they are created). Combining it with existing x25519 ensures that security is not weakened compared to the present status, if a weakness in the PQ KEM is discovered. A future commit will add concrete PQ methods, currently this code is unused. x25519 could be generalised later if needed. --- .github/workflows/build.yml | 4 + Makefile.in | 2 +- src/algo.h | 22 ++++-- src/buffer.c | 9 +++ src/buffer.h | 1 + src/cli-kex.c | 21 ++++++ src/common-kex.c | 13 +++- src/common-session.c | 3 + src/curve25519.c | 12 +-- src/kex-ecdh.c | 8 +- src/kex-pqhybrid.c | 142 ++++++++++++++++++++++++++++++++++++ src/kex-x25519.c | 26 +++++-- src/kex.h | 31 +++++++- src/session.h | 5 ++ src/svr-kex.c | 16 +++- src/sysoptions.h | 4 + 16 files changed, 289 insertions(+), 30 deletions(-) create mode 100644 src/kex-pqhybrid.c diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4360d2bbf..50df8e469 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -150,6 +150,10 @@ jobs: #define DROPBEAR_SHA2_512_HMAC 1 #define DROPBEAR_CLI_PUBKEY_AUTH 0 + - name: pq, no plain x25519 + localoptions: | + #define DROPBEAR_CURVE25519 0 + # # Fuzzers run standalone. A bit superfluous with cifuzz, but # # good to run the whole corpus to keep it working. # - name: fuzzing with address sanitizer diff --git a/Makefile.in b/Makefile.in index eb2bfc5af..263153f62 100644 --- a/Makefile.in +++ b/Makefile.in @@ -64,7 +64,7 @@ _CLISVROBJS=common-session.o packet.o common-algo.o common-kex.o \ common-channel.o common-chansession.o termcodes.o loginrec.o \ tcp-accept.o listener.o process-packet.o dh_groups.o \ common-runopts.o circbuffer.o list.o netio.o chachapoly.o gcm.o \ - kex-x25519.o kex-dh.o kex-ecdh.o + kex-x25519.o kex-dh.o kex-ecdh.o kex-pqhybrid.o CLISVROBJS = $(patsubst %,$(OBJ_DIR)/%,$(_CLISVROBJS)) _KEYOBJS=dropbearkey.o diff --git a/src/algo.h b/src/algo.h index 1055601cb..3572844c8 100644 --- a/src/algo.h +++ b/src/algo.h @@ -100,6 +100,9 @@ enum dropbear_kex_mode { #if DROPBEAR_CURVE25519 DROPBEAR_KEX_CURVE25519, #endif +#if DROPBEAR_PQHYBRID + DROPBEAR_KEX_PQHYBRID, +#endif }; struct dropbear_kex { @@ -109,18 +112,23 @@ struct dropbear_kex { const unsigned char *dh_p_bytes; const int dh_p_len; - /* elliptic curve DH KEX */ -#if DROPBEAR_ECDH - const struct dropbear_ecc_curve *ecc_curve; -#else - /* NULL for non-ecc curves */ - const void* dummy; -#endif + /* kex specific, could be ecc_curve or pqhybrid_desc */ + const void* details; /* both */ const struct ltc_hash_descriptor *hash_desc; }; +struct dropbear_kem_desc { + unsigned int public_len; + unsigned int secret_len; + unsigned int ciphertext_len; + unsigned int output_len; + int (*kem_gen)(unsigned char *pk, unsigned char *sk); + int (*kem_enc)(unsigned char *c, unsigned char *k, const unsigned char *pk); + int (*kem_dec)(unsigned char *k, const unsigned char *c, const unsigned char *sk); +}; + /* Includes all algorithms is useall is set */ void buf_put_algolist_all(buffer * buf, const algo_type localalgos[], int useall); /* Includes "usable" algorithms */ diff --git a/src/buffer.c b/src/buffer.c index 1377b775b..0222ffb84 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -252,6 +252,15 @@ buffer * buf_getbuf(buffer *buf) { return buf_getstringbuf_int(buf, 1); } +/* Returns the equivalent of buf_getptr() as a new buffer. */ +buffer * buf_getptrcopy(const buffer* buf, unsigned int len) { + unsigned char *src = buf_getptr(buf, len); + buffer *ret = buf_new(len); + buf_putbytes(ret, src, len); + buf_setpos(ret, 0); + return ret; +} + /* Just increment the buffer position the same as if we'd used buf_getstring, * but don't bother copying/malloc()ing for it */ void buf_eatstring(buffer *buf) { diff --git a/src/buffer.h b/src/buffer.h index 0ba6683f8..1b16d684c 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -60,6 +60,7 @@ unsigned char* buf_getwriteptr(const buffer* buf, unsigned int len); char* buf_getstring(buffer* buf, unsigned int *retlen); buffer * buf_getstringbuf(buffer *buf); buffer * buf_getbuf(buffer *buf); +buffer * buf_getptrcopy(const buffer* buf, unsigned int len); void buf_eatstring(buffer *buf); void buf_putint(buffer* buf, unsigned int val); void buf_putstring(buffer* buf, const char* str, unsigned int len); diff --git a/src/cli-kex.c b/src/cli-kex.c index a5d7accc2..2e8ac91d1 100644 --- a/src/cli-kex.c +++ b/src/cli-kex.c @@ -61,6 +61,12 @@ static void cli_kex_free_param(void) { cli_ses.curve25519_param = NULL; } #endif +#if DROPBEAR_PQHYBRID + if (cli_ses.pqhybrid_param) { + free_kexpqhybrid_param(cli_ses.pqhybrid_param); + cli_ses.pqhybrid_param = NULL; + } +#endif } void send_msg_kexdh_init() { @@ -95,6 +101,12 @@ void send_msg_kexdh_init() { cli_ses.curve25519_param = gen_kexcurve25519_param(); buf_putstring(ses.writepayload, cli_ses.curve25519_param->pub, CURVE25519_LEN); break; +#endif +#if DROPBEAR_PQHYBRID + case DROPBEAR_KEX_PQHYBRID: + cli_ses.pqhybrid_param = gen_kexpqhybrid_param(); + buf_putbufstring(ses.writepayload, cli_ses.pqhybrid_param->concat_public); + break; #endif } @@ -170,6 +182,15 @@ void recv_msg_kexdh_reply() { buf_free(ecdh_qs); } break; +#endif +#if DROPBEAR_PQHYBRID + case DROPBEAR_KEX_PQHYBRID: + { + buffer *ecdh_qs = buf_getstringbuf(ses.payload); + kexpqhybrid_comb_key(cli_ses.pqhybrid_param, ecdh_qs, hostkey); + buf_free(ecdh_qs); + } + break; #endif } diff --git a/src/common-kex.c b/src/common-kex.c index bc65d3f0e..5aa04e80e 100644 --- a/src/common-kex.c +++ b/src/common-kex.c @@ -313,9 +313,16 @@ static void gen_new_keys() { /* the dh_K and hash are the start of all hashes, we make use of that */ hash_desc->init(&hs); - hash_process_mp(hash_desc, &hs, ses.dh_K); - mp_clear(ses.dh_K); - m_free(ses.dh_K); + if (ses.dh_K) { + hash_process_mp(hash_desc, &hs, ses.dh_K); + mp_clear(ses.dh_K); + m_free(ses.dh_K); + } + if (ses.dh_K_bytes) { + hash_desc->process(&hs, ses.dh_K_bytes->data, ses.dh_K_bytes->len); + buf_burn_free(ses.dh_K_bytes); + ses.dh_K_bytes = NULL; + } hash_desc->process(&hs, ses.hash->data, ses.hash->len); buf_burn_free(ses.hash); ses.hash = NULL; diff --git a/src/common-session.c b/src/common-session.c index a045adfc7..bea2d788d 100644 --- a/src/common-session.c +++ b/src/common-session.c @@ -353,6 +353,9 @@ void session_cleanup() { mp_clear(ses.dh_K); } m_free(ses.dh_K); + if (ses.dh_K_bytes) { + buf_burn_free(ses.dh_K_bytes); + } m_burn(ses.keys, sizeof(struct key_context)); m_free(ses.keys); diff --git a/src/curve25519.c b/src/curve25519.c index 5f743e358..aa16434db 100644 --- a/src/curve25519.c +++ b/src/curve25519.c @@ -26,7 +26,7 @@ #include "dbrandom.h" #include "curve25519.h" -#if DROPBEAR_CURVE25519 || DROPBEAR_ED25519 +#if DROPBEAR_CURVE25519_DEP || DROPBEAR_ED25519 /* Modified TweetNaCl version 20140427, a self-contained public-domain C library. * https://tweetnacl.cr.yp.to/ */ @@ -40,10 +40,10 @@ typedef unsigned long long u64; typedef long long i64; typedef i64 gf[16]; -#if DROPBEAR_CURVE25519 +#if DROPBEAR_CURVE25519_DEP static const gf _121665 = {0xDB41,1}; -#endif /* DROPBEAR_CURVE25519 */ +#endif /* DROPBEAR_CURVE25519_DEP */ #if DROPBEAR_ED25519 static const gf gf0, @@ -207,7 +207,7 @@ sv pow2523(gf o,const gf i) } #endif /* DROPBEAR_ED25519 && DROPBEAR_SIGNKEY_VERIFY */ -#if DROPBEAR_CURVE25519 +#if DROPBEAR_CURVE25519_DEP void dropbear_curve25519_scalarmult(u8 *q,const u8 *n,const u8 *p) { u8 z[32]; @@ -257,7 +257,7 @@ void dropbear_curve25519_scalarmult(u8 *q,const u8 *n,const u8 *p) M(x+16,x+16,x+32); pack25519(q,x+16); } -#endif /* DROPBEAR_CURVE25519 */ +#endif /* DROPBEAR_CURVE25519_DEP */ #if DROPBEAR_ED25519 static int crypto_hash(u8 *out,const u8 *m,u64 n) @@ -494,4 +494,4 @@ int dropbear_ed25519_verify(const u8 *m,u32 mlen,const u8 *s,u32 slen,const u8 * #endif /* DROPBEAR_ED25519 */ -#endif /* DROPBEAR_CURVE25519 || DROPBEAR_ED25519 */ +#endif /* DROPBEAR_CURVE25519_DEP || DROPBEAR_ED25519 */ diff --git a/src/kex-ecdh.c b/src/kex-ecdh.c index 668be9a67..262278877 100644 --- a/src/kex-ecdh.c +++ b/src/kex-ecdh.c @@ -10,8 +10,9 @@ #if DROPBEAR_ECDH struct kex_ecdh_param *gen_kexecdh_param() { struct kex_ecdh_param *param = m_malloc(sizeof(*param)); + const struct dropbear_ecc_curve *curve = ses.newkeys->algo_kex->details; if (ecc_make_key_ex(NULL, dropbear_ltc_prng, - ¶m->key, ses.newkeys->algo_kex->ecc_curve->dp) != CRYPT_OK) { + ¶m->key, curve->dp) != CRYPT_OK) { dropbear_exit("ECC error"); } return param; @@ -24,11 +25,12 @@ void free_kexecdh_param(struct kex_ecdh_param *param) { } void kexecdh_comb_key(struct kex_ecdh_param *param, buffer *pub_them, sign_key *hostkey) { - const struct dropbear_kex *algo_kex = ses.newkeys->algo_kex; + const struct dropbear_ecc_curve *curve + = ses.newkeys->algo_kex->details; /* public keys from client and server */ ecc_key *Q_C, *Q_S, *Q_them; - Q_them = buf_get_ecc_raw_pubkey(pub_them, algo_kex->ecc_curve); + Q_them = buf_get_ecc_raw_pubkey(pub_them, curve); if (Q_them == NULL) { dropbear_exit("ECC error"); } diff --git a/src/kex-pqhybrid.c b/src/kex-pqhybrid.c new file mode 100644 index 000000000..81c8fd42a --- /dev/null +++ b/src/kex-pqhybrid.c @@ -0,0 +1,142 @@ +#include "includes.h" +#include "algo.h" +#include "buffer.h" +#include "session.h" +#include "bignum.h" +#include "dbrandom.h" +#include "crypto_desc.h" +#include "curve25519.h" +#include "kex.h" + +#if DROPBEAR_PQHYBRID + +struct kex_pqhybrid_param *gen_kexpqhybrid_param() { + struct kex_pqhybrid_param *param = m_malloc(sizeof(*param)); + const struct dropbear_kem_desc *kem = ses.newkeys->algo_kex->details; + + param->curve25519 = gen_kexcurve25519_param(); + + if (IS_DROPBEAR_CLIENT) { + param->kem_cli_secret = buf_new(kem->secret_len); + param->concat_public = buf_new(kem->public_len + CURVE25519_LEN); + kem->kem_gen( + buf_getwriteptr(param->concat_public, kem->public_len), + buf_getwriteptr(param->kem_cli_secret, kem->secret_len)); + buf_incrwritepos(param->concat_public, kem->public_len); + buf_incrwritepos(param->kem_cli_secret, kem->secret_len); + buf_setpos(param->kem_cli_secret, 0); + /* Append the curve25519 parameter */ + buf_putbytes(param->concat_public, param->curve25519->pub, CURVE25519_LEN); + } + + return param; +} + +void free_kexpqhybrid_param(struct kex_pqhybrid_param *param) { + free_kexcurve25519_param(param->curve25519); + if (param->kem_cli_secret) { + buf_burn_free(param->kem_cli_secret); + param->kem_cli_secret = NULL; + } + buf_free(param->concat_public); + m_free(param); +} + +void kexpqhybrid_comb_key(struct kex_pqhybrid_param *param, + buffer *buf_pub, sign_key *hostkey) { + + const struct dropbear_kem_desc *kem = ses.newkeys->algo_kex->details; + const struct ltc_hash_descriptor *hash_desc + = ses.newkeys->algo_kex->hash_desc; + + /* Either public key (from client) or ciphertext (from server) */ + unsigned char *remote_pub_kem = NULL; + buffer *pub_25519 = NULL; + buffer *k_out = NULL; + unsigned int remote_len; + hash_state hs; + const buffer * Q_C = NULL; + const buffer * Q_S = NULL; + + /* Extract input parts from the remote peer */ + if (IS_DROPBEAR_CLIENT) { + /* S_REPLY = S_CT2 || S_PK1 */ + remote_len = kem->ciphertext_len; + } else { + /* C_INIT = C_PK2 || C_PK1 */ + remote_len = kem->public_len; + } + remote_pub_kem = buf_getptr(buf_pub, remote_len); + buf_incrpos(buf_pub, remote_len); + pub_25519 = buf_getptrcopy(buf_pub, CURVE25519_LEN); + buf_incrpos(buf_pub, CURVE25519_LEN); + /* Check all is consumed */ + if (buf_pub->pos != buf_pub->len) { + dropbear_exit("Bad sntrup"); + } + + /* k_out = K_PQ || K_CL */ + k_out = buf_new(kem->output_len + CURVE25519_LEN); + + /* Derive pq kem part (K_PQ) */ + if (IS_DROPBEAR_CLIENT) { + kem->kem_dec( + buf_getwriteptr(k_out, kem->output_len), + remote_pub_kem, + buf_getptr(param->kem_cli_secret, kem->secret_len)); + buf_burn_free(param->kem_cli_secret); + param->kem_cli_secret = NULL; + } else { + /* Server returns ciphertext */ + assert(param->concat_public == NULL); + param->concat_public = buf_new(kem->ciphertext_len + CURVE25519_LEN); + kem->kem_enc( + buf_getwriteptr(param->concat_public, kem->ciphertext_len), + buf_getwriteptr(k_out, kem->output_len), + remote_pub_kem); + buf_incrwritepos(param->concat_public, kem->ciphertext_len); + /* Append the curve25519 parameter */ + buf_putbytes(param->concat_public, param->curve25519->pub, CURVE25519_LEN); + } + buf_incrwritepos(k_out, kem->output_len); + + /* Derive ec part (K_CL) */ + kexcurve25519_derive(param->curve25519, pub_25519, + buf_getwriteptr(k_out, CURVE25519_LEN)); + buf_incrwritepos(k_out, CURVE25519_LEN); + + /* dh_K_bytes = HASH(k_out) + dh_K_bytes is a SSH string with length prefix, since + that is what needs to be hashed in gen_new_keys() */ + ses.dh_K_bytes = buf_new(4 + hash_desc->hashsize); + buf_putint(ses.dh_K_bytes, hash_desc->hashsize); + hash_desc->init(&hs); + hash_desc->process(&hs, k_out->data, k_out->len); + hash_desc->done(&hs, buf_getwriteptr(ses.dh_K_bytes, hash_desc->hashsize)); + m_burn(&hs, sizeof(hash_state)); + buf_incrwritepos(ses.dh_K_bytes, hash_desc->hashsize); + + /* Create the remainder of the hash buffer */ + if (IS_DROPBEAR_CLIENT) { + Q_C = param->concat_public; + Q_S = buf_pub; + } else { + Q_S = param->concat_public; + Q_C = buf_pub; + } + + /* K_S, the host key */ + buf_put_pub_key(ses.kexhashbuf, hostkey, ses.newkeys->algo_hostkey); + buf_putbufstring(ses.kexhashbuf, Q_C); + buf_putbufstring(ses.kexhashbuf, Q_S); + /* K, the shared secret */ + buf_putbytes(ses.kexhashbuf, ses.dh_K_bytes->data, ses.dh_K_bytes->len); + + /* calculate the hash H to sign */ + finish_kexhashbuf(); + + buf_burn_free(k_out); + buf_free(pub_25519); +} + +#endif /* DROPBEAR_PQHYBRID */ diff --git a/src/kex-x25519.c b/src/kex-x25519.c index b7bc8ebad..00212c34a 100644 --- a/src/kex-x25519.c +++ b/src/kex-x25519.c @@ -8,7 +8,8 @@ #include "curve25519.h" #include "kex.h" -#if DROPBEAR_CURVE25519 +/* PQ hybrids also use curve25519 internally */ +#if DROPBEAR_CURVE25519_DEP struct kex_curve25519_param *gen_kexcurve25519_param() { /* Per http://cr.yp.to/ecdh.html */ @@ -26,13 +27,10 @@ void free_kexcurve25519_param(struct kex_curve25519_param *param) { m_free(param); } -void kexcurve25519_comb_key(const struct kex_curve25519_param *param, const buffer *buf_pub_them, - sign_key *hostkey) { - unsigned char out[CURVE25519_LEN]; - const unsigned char* Q_C = NULL; - const unsigned char* Q_S = NULL; +/* out must be CURVE25519_LEN */ +void kexcurve25519_derive(const struct kex_curve25519_param *param, const buffer *buf_pub_them, + unsigned char *out) { char zeroes[CURVE25519_LEN] = {0}; - if (buf_pub_them->len != CURVE25519_LEN) { dropbear_exit("Bad curve25519"); @@ -43,6 +41,20 @@ void kexcurve25519_comb_key(const struct kex_curve25519_param *param, const buff if (constant_time_memcmp(zeroes, out, CURVE25519_LEN) == 0) { dropbear_exit("Bad curve25519"); } +} + +#endif /* DROPBEAR_CURVE25519_DEP */ + +#if DROPBEAR_CURVE25519 + +/* Only required for x25519 directly */ +void kexcurve25519_comb_key(const struct kex_curve25519_param *param, const buffer *buf_pub_them, + sign_key *hostkey) { + unsigned char out[CURVE25519_LEN]; + const unsigned char* Q_C = NULL; + const unsigned char* Q_S = NULL; + + kexcurve25519_derive(param, buf_pub_them, out); m_mp_alloc_init_multi(&ses.dh_K, NULL); bytes_to_mp(ses.dh_K, out, CURVE25519_LEN); diff --git a/src/kex.h b/src/kex.h index 7fcc3c252..dad599178 100644 --- a/src/kex.h +++ b/src/kex.h @@ -50,13 +50,24 @@ void kexecdh_comb_key(struct kex_ecdh_param *param, buffer *pub_them, sign_key *hostkey); #endif -#if DROPBEAR_CURVE25519 +#if DROPBEAR_CURVE25519_DEP struct kex_curve25519_param *gen_kexcurve25519_param(void); void free_kexcurve25519_param(struct kex_curve25519_param *param); +void kexcurve25519_derive(const struct kex_curve25519_param *param, const buffer *buf_pub_them, + unsigned char *out); +#endif +#if DROPBEAR_CURVE25519 void kexcurve25519_comb_key(const struct kex_curve25519_param *param, const buffer *pub_them, sign_key *hostkey); #endif +#if DROPBEAR_PQHYBRID +struct kex_pqhybrid_param *gen_kexpqhybrid_param(void); +void free_kexpqhybrid_param(struct kex_pqhybrid_param *param); +void kexpqhybrid_comb_key(struct kex_pqhybrid_param *param, + buffer *buf_pub, sign_key *hostkey); +#endif + #ifndef DISABLE_ZLIB int is_compress_trans(void); int is_compress_recv(void); @@ -105,7 +116,7 @@ struct kex_ecdh_param { }; #endif -#if DROPBEAR_CURVE25519 +#if DROPBEAR_CURVE25519_DEP #define CURVE25519_LEN 32 struct kex_curve25519_param { unsigned char priv[CURVE25519_LEN]; @@ -113,4 +124,20 @@ struct kex_curve25519_param { }; #endif +#if DROPBEAR_PQHYBRID +struct kex_pqhybrid_param { + struct kex_curve25519_param *curve25519; + + /* The public part sent, concatenated PQ and EC parts. + Client sets it in gen_kexpqybrid_param(). + C_INIT = C_PK2 || C_PK1 + Server sets it in kexpqhybrid_comb(). + S_REPLY = S_CT2 || S_PK1 + */ + buffer *concat_public; + /* pq secret, only used by the client */ + buffer *kem_cli_secret; +}; +#endif + #endif /* DROPBEAR_KEX_H_ */ diff --git a/src/session.h b/src/session.h index 570a86593..f37e7ff42 100644 --- a/src/session.h +++ b/src/session.h @@ -189,7 +189,11 @@ struct sshsession { struct key_context *newkeys; buffer *session_id; /* this is the hash from the first kex */ /* The below are used temporarily during kex, are freed after use */ + /* Either dh_K or dh_K_bytes is set, depending on kex type */ mp_int * dh_K; /* SSH_MSG_KEXDH_REPLY and sending SSH_MSH_NEWKEYS */ + /* dh_K_bytes holds a SSH string, including length prefix */ + buffer * dh_K_bytes; /* SSH_MSG_KEXDH_REPLY and sending SSH_MSH_NEWKEYS */ + buffer *hash; /* the session hash */ buffer* kexhashbuf; /* session hash buffer calculated from various packets*/ buffer* transkexinit; /* the kexinit packet we send should be kept so we @@ -295,6 +299,7 @@ struct clientsession { struct kex_dh_param *dh_param; struct kex_ecdh_param *ecdh_param; struct kex_curve25519_param *curve25519_param; + struct kex_pqhybrid_param *pqhybrid_param; cli_kex_state kex_state; /* Used for progressing KEX */ cli_state state; /* Used to progress auth/channelsession etc */ diff --git a/src/svr-kex.c b/src/svr-kex.c index 7d0f12c8c..d243469c5 100644 --- a/src/svr-kex.c +++ b/src/svr-kex.c @@ -70,7 +70,10 @@ void recv_msg_kexdh_init() { #if DROPBEAR_CURVE25519 case DROPBEAR_KEX_CURVE25519: #endif -#if DROPBEAR_ECDH || DROPBEAR_CURVE25519 +#if DROPBEAR_PQHYBRID + case DROPBEAR_KEX_PQHYBRID: +#endif +#if DROPBEAR_ECDH || DROPBEAR_CURVE25519 || DROPBEAR_PQHYBRID ecdh_qs = buf_getstringbuf(ses.payload); break; #endif @@ -244,6 +247,17 @@ static void send_msg_kexdh_reply(mp_int *dh_e, buffer *ecdh_qs) { free_kexcurve25519_param(param); } break; +#endif +#if DROPBEAR_PQHYBRID + case DROPBEAR_KEX_PQHYBRID: + { + struct kex_pqhybrid_param *param = gen_kexpqhybrid_param(); + kexpqhybrid_comb_key(param, ecdh_qs, svr_opts.hostkey); + + buf_putbufstring(ses.writepayload, param->concat_public); + free_kexpqhybrid_param(param); + } + break; #endif } diff --git a/src/sysoptions.h b/src/sysoptions.h index 9b325d5ba..d992a7aac 100644 --- a/src/sysoptions.h +++ b/src/sysoptions.h @@ -208,6 +208,10 @@ #define DROPBEAR_SK_ED25519 ((DROPBEAR_SK_KEYS) && (DROPBEAR_ED25519)) #endif +/* XXX: Not actually used */ +#define DROPBEAR_PQHYBRID 1 +#define DROPBEAR_CURVE25519_DEP (DROPBEAR_CURVE25519 || DROPBEAR_PQHYBRID) + /* Dropbear only uses server-sig-algs, only needed if we have rsa-sha256 pubkey auth */ #define DROPBEAR_EXT_INFO ((DROPBEAR_RSA_SHA256) \ && ((DROPBEAR_CLI_PUBKEY_AUTH) || (DROPBEAR_SVR_PUBKEY_AUTH))) From b0846ba95f9803f91ef6f0c4a1f4d066a8be94a2 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Fri, 13 Dec 2024 23:09:15 +0800 Subject: [PATCH 3/5] Add sntrup761x25519-sha512 post-quantum key exchange This follows draft-ietf-sshm-ntruprime-ssh-01, using the sntrup761 implementation from supercop. It is available under both sntrup761x25519-sha512 and sntrup761x25519-sha512@openssh.com names. Interoperability has been tested against OpenSSH 9.8 (client/server) and PuTTY 0.82 client. sntrup761.sh is taken from OpenSSH, to extract the code from the supercop distribution. KEX hash buffer size calculation has been updated to current algorithm limits, since sntrup761 was larger than the previous limit. Code size increases by approx 9kB (209 to 218kB) for a 32-bit armv7 build. --- .github/workflows/build.yml | 7 +- Makefile.in | 3 +- src/common-algo.c | 26 +- src/default_options.h | 6 + src/sntrup761.c | 2218 +++++++++++++++++++++++++++++++++++ src/sntrup761.h | 13 + src/sntrup761_compat.h | 44 + src/sysoptions.h | 36 +- 8 files changed, 2342 insertions(+), 11 deletions(-) create mode 100644 src/sntrup761.c create mode 100644 src/sntrup761.h create mode 100644 src/sntrup761_compat.h diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 50df8e469..bd341bf8f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -61,6 +61,9 @@ jobs: # enable all options nondefault: 1 configure_flags: --enable-pam + # sntrup761.c is not c89 compliant + localoptions: | + #define DROPBEAR_SNTRUP761 0 - name: macos 14 os: macos-14 @@ -134,7 +137,7 @@ jobs: #define DROPBEAR_USE_PASSWORD_ENV 0 #define DROPBEAR_SFTPSERVER 0 - - name: other algo combos + - name: no sha1 runcheck: 'no' # disables all sha1 localoptions: | @@ -217,7 +220,7 @@ jobs: if: ${{ matrix.nondefault }} run: | # Turn on anything that's off by default. Rough but seems sufficient - grep ' 0$' src/default_options.h | sed 's/0$/1/' > localoptions.h + grep ' 0$' src/default_options.h | sed 's/0$/1/' >> localoptions.h # PAM clashes with password echo "#define DROPBEAR_SVR_PASSWORD_AUTH 0" >> localoptions.h # 1 second timeout is too short diff --git a/Makefile.in b/Makefile.in index 263153f62..979a763be 100644 --- a/Makefile.in +++ b/Makefile.in @@ -64,7 +64,8 @@ _CLISVROBJS=common-session.o packet.o common-algo.o common-kex.o \ common-channel.o common-chansession.o termcodes.o loginrec.o \ tcp-accept.o listener.o process-packet.o dh_groups.o \ common-runopts.o circbuffer.o list.o netio.o chachapoly.o gcm.o \ - kex-x25519.o kex-dh.o kex-ecdh.o kex-pqhybrid.o + kex-x25519.o kex-dh.o kex-ecdh.o kex-pqhybrid.o \ + sntrup761.o CLISVROBJS = $(patsubst %,$(OBJ_DIR)/%,$(_CLISVROBJS)) _KEYOBJS=dropbearkey.o diff --git a/src/common-algo.c b/src/common-algo.c index 41c1c6247..e3713bc44 100644 --- a/src/common-algo.c +++ b/src/common-algo.c @@ -33,6 +33,7 @@ #include "gcm.h" #include "chachapoly.h" #include "ssh.h" +#include "sntrup761.h" /* This file (algo.c) organises the ciphers which can be used, and is used to * decide which ciphers/hashes/compression/signing to use during key exchange*/ @@ -266,12 +267,35 @@ static const struct dropbear_kex kex_ecdh_nistp521 = {DROPBEAR_KEX_ECDH, NULL, 0 #endif /* DROPBEAR_ECDH */ #if DROPBEAR_CURVE25519 -/* Referred to directly */ static const struct dropbear_kex kex_curve25519 = {DROPBEAR_KEX_CURVE25519, NULL, 0, NULL, &sha256_desc }; #endif + +#if DROPBEAR_SNTRUP761 +static const struct dropbear_kem_desc sntrup761_desc = { + .public_len = crypto_kem_sntrup761_PUBLICKEYBYTES, + .secret_len = crypto_kem_sntrup761_SECRETKEYBYTES, + .ciphertext_len = crypto_kem_sntrup761_CIPHERTEXTBYTES, + .output_len = crypto_kem_sntrup761_BYTES, + .kem_gen = crypto_kem_sntrup761_keypair, + .kem_enc = crypto_kem_sntrup761_enc, + .kem_dec = crypto_kem_sntrup761_dec, +}; +static const struct dropbear_kex kex_sntrup761 = {DROPBEAR_KEX_PQHYBRID, NULL, 0, &sntrup761_desc, &sha512_desc }; +#endif + +/* For sntrup761 */ +volatile int16_t crypto_int16_optblocker = 0; +volatile int32_t crypto_int32_optblocker = 0; +volatile int64_t crypto_int64_optblocker = 0; + + /* data == NULL for non-kex algorithm identifiers */ algo_type sshkex[] = { +#if DROPBEAR_SNTRUP761 + {"sntrup761x25519-sha512", 0, &kex_sntrup761, 1, NULL}, + {"sntrup761x25519-sha512@openssh.com", 0, &kex_sntrup761, 1, NULL}, +#endif #if DROPBEAR_CURVE25519 {"curve25519-sha256", 0, &kex_curve25519, 1, NULL}, {"curve25519-sha256@libssh.org", 0, &kex_curve25519, 1, NULL}, diff --git a/src/default_options.h b/src/default_options.h index 1518996ea..eca87398b 100644 --- a/src/default_options.h +++ b/src/default_options.h @@ -185,6 +185,7 @@ IMPORTANT: Some options will require "make clean" after changes */ * group1 - 1024 bit, sha1 * curve25519 - elliptic curve DH * ecdh - NIST elliptic curve DH (256, 384, 521) + * sntrup761 - post-quantum hybrid with x25519. * * group1 is too small for security though is necessary if you need compatibility with some implementations such as Dropbear versions < 0.53 @@ -194,6 +195,10 @@ IMPORTANT: Some options will require "make clean" after changes */ * curve25519 increases binary size by ~2,5kB on x86-64 * including either ECDH or ECDSA increases binary size by ~30kB on x86-64 + * sntrup761 is recommended to avoid possible decryption + * by future quantum computers. + * It is fast, but adds ~9kB code size (32-bit armv7) + * Small systems should generally include either curve25519 or ecdh for performance. * curve25519 is less widely supported but is faster */ @@ -201,6 +206,7 @@ IMPORTANT: Some options will require "make clean" after changes */ #define DROPBEAR_DH_GROUP14_SHA256 1 #define DROPBEAR_DH_GROUP16 0 #define DROPBEAR_CURVE25519 1 +#define DROPBEAR_SNTRUP761 1 #define DROPBEAR_ECDH 1 #define DROPBEAR_DH_GROUP1 0 diff --git a/src/sntrup761.c b/src/sntrup761.c new file mode 100644 index 000000000..c4cea2263 --- /dev/null +++ b/src/sntrup761.c @@ -0,0 +1,2218 @@ +/* + * Public Domain, Authors: + * - Daniel J. Bernstein + * - Chitchanok Chuengsatiansup + * - Tanja Lange + * - Christine van Vredendaal + */ + +#include +#include "sntrup761_compat.h" + +#if DROPBEAR_SNTRUP761 + +#define crypto_declassify(x, y) do {} while (0) + +#define int8 crypto_int8 +#define uint8 crypto_uint8 +#define int16 crypto_int16 +#define uint16 crypto_uint16 +#define int32 crypto_int32 +#define uint32 crypto_uint32 +#define int64 crypto_int64 +#define uint64 crypto_uint64 +extern volatile crypto_int16 crypto_int16_optblocker; +extern volatile crypto_int32 crypto_int32_optblocker; +extern volatile crypto_int64 crypto_int64_optblocker; + +/* from supercop-20241022/cryptoint/crypto_int16.h */ +/* auto-generated: cd cryptoint; ./autogen */ +/* cryptoint 20241003 */ + +#ifndef crypto_int16_h +#define crypto_int16_h + +#define crypto_int16 int16_t +#define crypto_int16_unsigned uint16_t + + + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_load(const unsigned char *crypto_int16_s) { + crypto_int16 crypto_int16_z = 0; + crypto_int16_z |= ((crypto_int16) (*crypto_int16_s++)) << 0; + crypto_int16_z |= ((crypto_int16) (*crypto_int16_s++)) << 8; + return crypto_int16_z; +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_load_bigendian(const unsigned char *crypto_int16_s) { + crypto_int16 crypto_int16_z = 0; + crypto_int16_z |= ((crypto_int16) (*crypto_int16_s++)) << 8; + crypto_int16_z |= ((crypto_int16) (*crypto_int16_s++)) << 0; + return crypto_int16_z; +} + +__attribute__((unused)) +static inline +void crypto_int16_store(unsigned char *crypto_int16_s,crypto_int16 crypto_int16_x) { + *crypto_int16_s++ = crypto_int16_x >> 0; + *crypto_int16_s++ = crypto_int16_x >> 8; +} + +__attribute__((unused)) +static inline +void crypto_int16_store_bigendian(unsigned char *crypto_int16_s,crypto_int16 crypto_int16_x) { + *crypto_int16_s++ = crypto_int16_x >> 8; + *crypto_int16_s++ = crypto_int16_x >> 0; +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_negative_mask(crypto_int16 crypto_int16_x) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("sarw $15,%0" : "+r"(crypto_int16_x) : : "cc"); + return crypto_int16_x; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_y; + __asm__ ("sbfx %w0,%w1,15,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); + return crypto_int16_y; +#else + crypto_int16_x >>= 16-6; + crypto_int16_x += crypto_int16_optblocker; + crypto_int16_x >>= 5; + return crypto_int16_x; +#endif +} + +__attribute__((unused)) +static inline +crypto_int16_unsigned crypto_int16_unsigned_topbit_01(crypto_int16_unsigned crypto_int16_x) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("shrw $15,%0" : "+r"(crypto_int16_x) : : "cc"); + return crypto_int16_x; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_y; + __asm__ ("ubfx %w0,%w1,15,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); + return crypto_int16_y; +#else + crypto_int16_x >>= 16-6; + crypto_int16_x += crypto_int16_optblocker; + crypto_int16_x >>= 5; + return crypto_int16_x; +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_negative_01(crypto_int16 crypto_int16_x) { + return crypto_int16_unsigned_topbit_01(crypto_int16_x); +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_topbit_mask(crypto_int16 crypto_int16_x) { + return crypto_int16_negative_mask(crypto_int16_x); +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_topbit_01(crypto_int16 crypto_int16_x) { + return crypto_int16_unsigned_topbit_01(crypto_int16_x); +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_bottombit_mask(crypto_int16 crypto_int16_x) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("andw $1,%0" : "+r"(crypto_int16_x) : : "cc"); + return -crypto_int16_x; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_y; + __asm__ ("sbfx %w0,%w1,0,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); + return crypto_int16_y; +#else + crypto_int16_x &= 1 + crypto_int16_optblocker; + return -crypto_int16_x; +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_bottombit_01(crypto_int16 crypto_int16_x) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("andw $1,%0" : "+r"(crypto_int16_x) : : "cc"); + return crypto_int16_x; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_y; + __asm__ ("ubfx %w0,%w1,0,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); + return crypto_int16_y; +#else + crypto_int16_x &= 1 + crypto_int16_optblocker; + return crypto_int16_x; +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_bitinrangepublicpos_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("sarw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc"); +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("sxth %w0,%w0\n asr %w0,%w0,%w1" : "+&r"(crypto_int16_x) : "r"(crypto_int16_s) : ); +#else + crypto_int16_x >>= crypto_int16_s ^ crypto_int16_optblocker; +#endif + return crypto_int16_bottombit_mask(crypto_int16_x); +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_bitinrangepublicpos_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("sarw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc"); +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("sxth %w0,%w0\n asr %w0,%w0,%w1" : "+&r"(crypto_int16_x) : "r"(crypto_int16_s) : ); +#else + crypto_int16_x >>= crypto_int16_s ^ crypto_int16_optblocker; +#endif + return crypto_int16_bottombit_01(crypto_int16_x); +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_shlmod(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16_s &= 15; + __asm__ ("shlw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc"); +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("and %w0,%w0,15\n and %w1,%w1,65535\n lsl %w1,%w1,%w0" : "+&r"(crypto_int16_s), "+r"(crypto_int16_x) : : ); +#else + int crypto_int16_k, crypto_int16_l; + for (crypto_int16_l = 0,crypto_int16_k = 1;crypto_int16_k < 16;++crypto_int16_l,crypto_int16_k *= 2) + crypto_int16_x ^= (crypto_int16_x ^ (crypto_int16_x << crypto_int16_k)) & crypto_int16_bitinrangepublicpos_mask(crypto_int16_s,crypto_int16_l); +#endif + return crypto_int16_x; +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_shrmod(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16_s &= 15; + __asm__ ("sarw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc"); +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("and %w0,%w0,15\n sxth %w1,%w1\n asr %w1,%w1,%w0" : "+&r"(crypto_int16_s), "+r"(crypto_int16_x) : : ); +#else + int crypto_int16_k, crypto_int16_l; + for (crypto_int16_l = 0,crypto_int16_k = 1;crypto_int16_k < 16;++crypto_int16_l,crypto_int16_k *= 2) + crypto_int16_x ^= (crypto_int16_x ^ (crypto_int16_x >> crypto_int16_k)) & crypto_int16_bitinrangepublicpos_mask(crypto_int16_s,crypto_int16_l); +#endif + return crypto_int16_x; +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_bitmod_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { + crypto_int16_x = crypto_int16_shrmod(crypto_int16_x,crypto_int16_s); + return crypto_int16_bottombit_mask(crypto_int16_x); +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_bitmod_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { + crypto_int16_x = crypto_int16_shrmod(crypto_int16_x,crypto_int16_s); + return crypto_int16_bottombit_01(crypto_int16_x); +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_nonzero_mask(crypto_int16 crypto_int16_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16 crypto_int16_q,crypto_int16_z; + __asm__ ("xorw %0,%0\n movw $-1,%1\n testw %2,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); + return crypto_int16_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_z; + __asm__ ("tst %w1,65535\n csetm %w0,ne" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); + return crypto_int16_z; +#else + crypto_int16_x |= -crypto_int16_x; + return crypto_int16_negative_mask(crypto_int16_x); +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_nonzero_01(crypto_int16 crypto_int16_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16 crypto_int16_q,crypto_int16_z; + __asm__ ("xorw %0,%0\n movw $1,%1\n testw %2,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); + return crypto_int16_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_z; + __asm__ ("tst %w1,65535\n cset %w0,ne" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); + return crypto_int16_z; +#else + crypto_int16_x |= -crypto_int16_x; + return crypto_int16_unsigned_topbit_01(crypto_int16_x); +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_positive_mask(crypto_int16 crypto_int16_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16 crypto_int16_q,crypto_int16_z; + __asm__ ("xorw %0,%0\n movw $-1,%1\n testw %2,%2\n cmovgw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); + return crypto_int16_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_z; + __asm__ ("sxth %w0,%w1\n cmp %w0,0\n csetm %w0,gt" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); + return crypto_int16_z; +#else + crypto_int16 crypto_int16_z = -crypto_int16_x; + crypto_int16_z ^= crypto_int16_x & crypto_int16_z; + return crypto_int16_negative_mask(crypto_int16_z); +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_positive_01(crypto_int16 crypto_int16_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16 crypto_int16_q,crypto_int16_z; + __asm__ ("xorw %0,%0\n movw $1,%1\n testw %2,%2\n cmovgw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); + return crypto_int16_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_z; + __asm__ ("sxth %w0,%w1\n cmp %w0,0\n cset %w0,gt" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); + return crypto_int16_z; +#else + crypto_int16 crypto_int16_z = -crypto_int16_x; + crypto_int16_z ^= crypto_int16_x & crypto_int16_z; + return crypto_int16_unsigned_topbit_01(crypto_int16_z); +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_zero_mask(crypto_int16 crypto_int16_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16 crypto_int16_q,crypto_int16_z; + __asm__ ("xorw %0,%0\n movw $-1,%1\n testw %2,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); + return crypto_int16_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_z; + __asm__ ("tst %w1,65535\n csetm %w0,eq" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); + return crypto_int16_z; +#else + return ~crypto_int16_nonzero_mask(crypto_int16_x); +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_zero_01(crypto_int16 crypto_int16_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16 crypto_int16_q,crypto_int16_z; + __asm__ ("xorw %0,%0\n movw $1,%1\n testw %2,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); + return crypto_int16_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_z; + __asm__ ("tst %w1,65535\n cset %w0,eq" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); + return crypto_int16_z; +#else + return 1-crypto_int16_nonzero_01(crypto_int16_x); +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_unequal_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16 crypto_int16_q,crypto_int16_z; + __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); + return crypto_int16_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_z; + __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n csetm %w0,ne" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); + return crypto_int16_z; +#else + return crypto_int16_nonzero_mask(crypto_int16_x ^ crypto_int16_y); +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_unequal_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16 crypto_int16_q,crypto_int16_z; + __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); + return crypto_int16_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_z; + __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n cset %w0,ne" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); + return crypto_int16_z; +#else + return crypto_int16_nonzero_01(crypto_int16_x ^ crypto_int16_y); +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_equal_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16 crypto_int16_q,crypto_int16_z; + __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); + return crypto_int16_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_z; + __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n csetm %w0,eq" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); + return crypto_int16_z; +#else + return ~crypto_int16_unequal_mask(crypto_int16_x,crypto_int16_y); +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_equal_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16 crypto_int16_q,crypto_int16_z; + __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); + return crypto_int16_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_z; + __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n cset %w0,eq" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); + return crypto_int16_z; +#else + return 1-crypto_int16_unequal_01(crypto_int16_x,crypto_int16_y); +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_min(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("cmpw %1,%0\n cmovgw %1,%0" : "+r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc"); + return crypto_int16_x; +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("sxth %w0,%w0\n cmp %w0,%w1,sxth\n csel %w0,%w0,%w1,lt" : "+&r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc"); + return crypto_int16_x; +#else + crypto_int16 crypto_int16_r = crypto_int16_y ^ crypto_int16_x; + crypto_int16 crypto_int16_z = crypto_int16_y - crypto_int16_x; + crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_y); + crypto_int16_z = crypto_int16_negative_mask(crypto_int16_z); + crypto_int16_z &= crypto_int16_r; + return crypto_int16_x ^ crypto_int16_z; +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_max(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("cmpw %1,%0\n cmovlw %1,%0" : "+r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc"); + return crypto_int16_x; +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("sxth %w0,%w0\n cmp %w0,%w1,sxth\n csel %w0,%w1,%w0,lt" : "+&r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc"); + return crypto_int16_x; +#else + crypto_int16 crypto_int16_r = crypto_int16_y ^ crypto_int16_x; + crypto_int16 crypto_int16_z = crypto_int16_y - crypto_int16_x; + crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_y); + crypto_int16_z = crypto_int16_negative_mask(crypto_int16_z); + crypto_int16_z &= crypto_int16_r; + return crypto_int16_y ^ crypto_int16_z; +#endif +} + +__attribute__((unused)) +static inline +void crypto_int16_minmax(crypto_int16 *crypto_int16_p,crypto_int16 *crypto_int16_q) { + crypto_int16 crypto_int16_x = *crypto_int16_p; + crypto_int16 crypto_int16_y = *crypto_int16_q; +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16 crypto_int16_z; + __asm__ ("cmpw %2,%1\n movw %1,%0\n cmovgw %2,%1\n cmovgw %0,%2" : "=&r"(crypto_int16_z), "+&r"(crypto_int16_x), "+r"(crypto_int16_y) : : "cc"); + *crypto_int16_p = crypto_int16_x; + *crypto_int16_q = crypto_int16_y; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_r, crypto_int16_s; + __asm__ ("sxth %w0,%w0\n cmp %w0,%w3,sxth\n csel %w1,%w0,%w3,lt\n csel %w2,%w3,%w0,lt" : "+&r"(crypto_int16_x), "=&r"(crypto_int16_r), "=r"(crypto_int16_s) : "r"(crypto_int16_y) : "cc"); + *crypto_int16_p = crypto_int16_r; + *crypto_int16_q = crypto_int16_s; +#else + crypto_int16 crypto_int16_r = crypto_int16_y ^ crypto_int16_x; + crypto_int16 crypto_int16_z = crypto_int16_y - crypto_int16_x; + crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_y); + crypto_int16_z = crypto_int16_negative_mask(crypto_int16_z); + crypto_int16_z &= crypto_int16_r; + crypto_int16_x ^= crypto_int16_z; + crypto_int16_y ^= crypto_int16_z; + *crypto_int16_p = crypto_int16_x; + *crypto_int16_q = crypto_int16_y; +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_smaller_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16 crypto_int16_q,crypto_int16_z; + __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovlw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); + return crypto_int16_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_z; + __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n csetm %w0,lt" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); + return crypto_int16_z; +#else + crypto_int16 crypto_int16_r = crypto_int16_x ^ crypto_int16_y; + crypto_int16 crypto_int16_z = crypto_int16_x - crypto_int16_y; + crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_x); + return crypto_int16_negative_mask(crypto_int16_z); +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_smaller_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16 crypto_int16_q,crypto_int16_z; + __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovlw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); + return crypto_int16_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_z; + __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n cset %w0,lt" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); + return crypto_int16_z; +#else + crypto_int16 crypto_int16_r = crypto_int16_x ^ crypto_int16_y; + crypto_int16 crypto_int16_z = crypto_int16_x - crypto_int16_y; + crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_x); + return crypto_int16_unsigned_topbit_01(crypto_int16_z); +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_leq_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16 crypto_int16_q,crypto_int16_z; + __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovlew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); + return crypto_int16_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_z; + __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n csetm %w0,le" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); + return crypto_int16_z; +#else + return ~crypto_int16_smaller_mask(crypto_int16_y,crypto_int16_x); +#endif +} + +__attribute__((unused)) +static inline +crypto_int16 crypto_int16_leq_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16 crypto_int16_q,crypto_int16_z; + __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovlew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); + return crypto_int16_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int16 crypto_int16_z; + __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n cset %w0,le" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); + return crypto_int16_z; +#else + return 1-crypto_int16_smaller_01(crypto_int16_y,crypto_int16_x); +#endif +} + +__attribute__((unused)) +static inline +int crypto_int16_ones_num(crypto_int16 crypto_int16_x) { + crypto_int16_unsigned crypto_int16_y = crypto_int16_x; + const crypto_int16 C0 = 0x5555; + const crypto_int16 C1 = 0x3333; + const crypto_int16 C2 = 0x0f0f; + crypto_int16_y -= ((crypto_int16_y >> 1) & C0); + crypto_int16_y = (crypto_int16_y & C1) + ((crypto_int16_y >> 2) & C1); + crypto_int16_y = (crypto_int16_y + (crypto_int16_y >> 4)) & C2; + crypto_int16_y = (crypto_int16_y + (crypto_int16_y >> 8)) & 0xff; + return crypto_int16_y; +} + +__attribute__((unused)) +static inline +int crypto_int16_bottomzeros_num(crypto_int16 crypto_int16_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int16 fallback = 16; + __asm__ ("bsfw %0,%0\n cmovew %1,%0" : "+&r"(crypto_int16_x) : "r"(fallback) : "cc"); + return crypto_int16_x; +#elif defined(__GNUC__) && defined(__aarch64__) + int64_t crypto_int16_y; + __asm__ ("orr %w0,%w1,-65536\n rbit %w0,%w0\n clz %w0,%w0" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); + return crypto_int16_y; +#else + crypto_int16 crypto_int16_y = crypto_int16_x ^ (crypto_int16_x-1); + crypto_int16_y = ((crypto_int16) crypto_int16_y) >> 1; + crypto_int16_y &= ~(crypto_int16_x & (((crypto_int16) 1) << (16-1))); + return crypto_int16_ones_num(crypto_int16_y); +#endif +} + +#endif + +/* from supercop-20241022/cryptoint/crypto_int32.h */ +/* auto-generated: cd cryptoint; ./autogen */ +/* cryptoint 20241003 */ + +#ifndef crypto_int32_h +#define crypto_int32_h + +#define crypto_int32 int32_t +#define crypto_int32_unsigned uint32_t + + + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_load(const unsigned char *crypto_int32_s) { + crypto_int32 crypto_int32_z = 0; + crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 0; + crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 8; + crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 16; + crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 24; + return crypto_int32_z; +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_load_bigendian(const unsigned char *crypto_int32_s) { + crypto_int32 crypto_int32_z = 0; + crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 24; + crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 16; + crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 8; + crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 0; + return crypto_int32_z; +} + +__attribute__((unused)) +static inline +void crypto_int32_store(unsigned char *crypto_int32_s,crypto_int32 crypto_int32_x) { + *crypto_int32_s++ = crypto_int32_x >> 0; + *crypto_int32_s++ = crypto_int32_x >> 8; + *crypto_int32_s++ = crypto_int32_x >> 16; + *crypto_int32_s++ = crypto_int32_x >> 24; +} + +__attribute__((unused)) +static inline +void crypto_int32_store_bigendian(unsigned char *crypto_int32_s,crypto_int32 crypto_int32_x) { + *crypto_int32_s++ = crypto_int32_x >> 24; + *crypto_int32_s++ = crypto_int32_x >> 16; + *crypto_int32_s++ = crypto_int32_x >> 8; + *crypto_int32_s++ = crypto_int32_x >> 0; +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_negative_mask(crypto_int32 crypto_int32_x) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("sarl $31,%0" : "+r"(crypto_int32_x) : : "cc"); + return crypto_int32_x; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_y; + __asm__ ("asr %w0,%w1,31" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); + return crypto_int32_y; +#else + crypto_int32_x >>= 32-6; + crypto_int32_x += crypto_int32_optblocker; + crypto_int32_x >>= 5; + return crypto_int32_x; +#endif +} + +__attribute__((unused)) +static inline +crypto_int32_unsigned crypto_int32_unsigned_topbit_01(crypto_int32_unsigned crypto_int32_x) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("shrl $31,%0" : "+r"(crypto_int32_x) : : "cc"); + return crypto_int32_x; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_y; + __asm__ ("lsr %w0,%w1,31" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); + return crypto_int32_y; +#else + crypto_int32_x >>= 32-6; + crypto_int32_x += crypto_int32_optblocker; + crypto_int32_x >>= 5; + return crypto_int32_x; +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_negative_01(crypto_int32 crypto_int32_x) { + return crypto_int32_unsigned_topbit_01(crypto_int32_x); +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_topbit_mask(crypto_int32 crypto_int32_x) { + return crypto_int32_negative_mask(crypto_int32_x); +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_topbit_01(crypto_int32 crypto_int32_x) { + return crypto_int32_unsigned_topbit_01(crypto_int32_x); +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_bottombit_mask(crypto_int32 crypto_int32_x) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("andl $1,%0" : "+r"(crypto_int32_x) : : "cc"); + return -crypto_int32_x; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_y; + __asm__ ("sbfx %w0,%w1,0,1" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); + return crypto_int32_y; +#else + crypto_int32_x &= 1 + crypto_int32_optblocker; + return -crypto_int32_x; +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_bottombit_01(crypto_int32 crypto_int32_x) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("andl $1,%0" : "+r"(crypto_int32_x) : : "cc"); + return crypto_int32_x; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_y; + __asm__ ("ubfx %w0,%w1,0,1" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); + return crypto_int32_y; +#else + crypto_int32_x &= 1 + crypto_int32_optblocker; + return crypto_int32_x; +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_bitinrangepublicpos_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("sarl %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc"); +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("asr %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : ); +#else + crypto_int32_x >>= crypto_int32_s ^ crypto_int32_optblocker; +#endif + return crypto_int32_bottombit_mask(crypto_int32_x); +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_bitinrangepublicpos_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("sarl %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc"); +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("asr %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : ); +#else + crypto_int32_x >>= crypto_int32_s ^ crypto_int32_optblocker; +#endif + return crypto_int32_bottombit_01(crypto_int32_x); +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_shlmod(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("shll %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc"); +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("lsl %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : ); +#else + int crypto_int32_k, crypto_int32_l; + for (crypto_int32_l = 0,crypto_int32_k = 1;crypto_int32_k < 32;++crypto_int32_l,crypto_int32_k *= 2) + crypto_int32_x ^= (crypto_int32_x ^ (crypto_int32_x << crypto_int32_k)) & crypto_int32_bitinrangepublicpos_mask(crypto_int32_s,crypto_int32_l); +#endif + return crypto_int32_x; +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_shrmod(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("sarl %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc"); +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("asr %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : ); +#else + int crypto_int32_k, crypto_int32_l; + for (crypto_int32_l = 0,crypto_int32_k = 1;crypto_int32_k < 32;++crypto_int32_l,crypto_int32_k *= 2) + crypto_int32_x ^= (crypto_int32_x ^ (crypto_int32_x >> crypto_int32_k)) & crypto_int32_bitinrangepublicpos_mask(crypto_int32_s,crypto_int32_l); +#endif + return crypto_int32_x; +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_bitmod_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { + crypto_int32_x = crypto_int32_shrmod(crypto_int32_x,crypto_int32_s); + return crypto_int32_bottombit_mask(crypto_int32_x); +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_bitmod_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { + crypto_int32_x = crypto_int32_shrmod(crypto_int32_x,crypto_int32_s); + return crypto_int32_bottombit_01(crypto_int32_x); +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_nonzero_mask(crypto_int32 crypto_int32_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int32 crypto_int32_q,crypto_int32_z; + __asm__ ("xorl %0,%0\n movl $-1,%1\n testl %2,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); + return crypto_int32_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_z; + __asm__ ("cmp %w1,0\n csetm %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); + return crypto_int32_z; +#else + crypto_int32_x |= -crypto_int32_x; + return crypto_int32_negative_mask(crypto_int32_x); +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_nonzero_01(crypto_int32 crypto_int32_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int32 crypto_int32_q,crypto_int32_z; + __asm__ ("xorl %0,%0\n movl $1,%1\n testl %2,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); + return crypto_int32_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_z; + __asm__ ("cmp %w1,0\n cset %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); + return crypto_int32_z; +#else + crypto_int32_x |= -crypto_int32_x; + return crypto_int32_unsigned_topbit_01(crypto_int32_x); +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_positive_mask(crypto_int32 crypto_int32_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int32 crypto_int32_q,crypto_int32_z; + __asm__ ("xorl %0,%0\n movl $-1,%1\n testl %2,%2\n cmovgl %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); + return crypto_int32_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_z; + __asm__ ("cmp %w1,0\n csetm %w0,gt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); + return crypto_int32_z; +#else + crypto_int32 crypto_int32_z = -crypto_int32_x; + crypto_int32_z ^= crypto_int32_x & crypto_int32_z; + return crypto_int32_negative_mask(crypto_int32_z); +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_positive_01(crypto_int32 crypto_int32_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int32 crypto_int32_q,crypto_int32_z; + __asm__ ("xorl %0,%0\n movl $1,%1\n testl %2,%2\n cmovgl %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); + return crypto_int32_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_z; + __asm__ ("cmp %w1,0\n cset %w0,gt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); + return crypto_int32_z; +#else + crypto_int32 crypto_int32_z = -crypto_int32_x; + crypto_int32_z ^= crypto_int32_x & crypto_int32_z; + return crypto_int32_unsigned_topbit_01(crypto_int32_z); +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_zero_mask(crypto_int32 crypto_int32_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int32 crypto_int32_q,crypto_int32_z; + __asm__ ("xorl %0,%0\n movl $-1,%1\n testl %2,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); + return crypto_int32_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_z; + __asm__ ("cmp %w1,0\n csetm %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); + return crypto_int32_z; +#else + return ~crypto_int32_nonzero_mask(crypto_int32_x); +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_zero_01(crypto_int32 crypto_int32_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int32 crypto_int32_q,crypto_int32_z; + __asm__ ("xorl %0,%0\n movl $1,%1\n testl %2,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); + return crypto_int32_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_z; + __asm__ ("cmp %w1,0\n cset %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); + return crypto_int32_z; +#else + return 1-crypto_int32_nonzero_01(crypto_int32_x); +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_unequal_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int32 crypto_int32_q,crypto_int32_z; + __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + return crypto_int32_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_z; + __asm__ ("cmp %w1,%w2\n csetm %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + return crypto_int32_z; +#else + return crypto_int32_nonzero_mask(crypto_int32_x ^ crypto_int32_y); +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_unequal_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int32 crypto_int32_q,crypto_int32_z; + __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + return crypto_int32_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_z; + __asm__ ("cmp %w1,%w2\n cset %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + return crypto_int32_z; +#else + return crypto_int32_nonzero_01(crypto_int32_x ^ crypto_int32_y); +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_equal_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int32 crypto_int32_q,crypto_int32_z; + __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + return crypto_int32_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_z; + __asm__ ("cmp %w1,%w2\n csetm %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + return crypto_int32_z; +#else + return ~crypto_int32_unequal_mask(crypto_int32_x,crypto_int32_y); +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_equal_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int32 crypto_int32_q,crypto_int32_z; + __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + return crypto_int32_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_z; + __asm__ ("cmp %w1,%w2\n cset %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + return crypto_int32_z; +#else + return 1-crypto_int32_unequal_01(crypto_int32_x,crypto_int32_y); +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_min(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("cmpl %1,%0\n cmovgl %1,%0" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc"); + return crypto_int32_x; +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("cmp %w0,%w1\n csel %w0,%w0,%w1,lt" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc"); + return crypto_int32_x; +#else + crypto_int64 crypto_int32_r = (crypto_int64)crypto_int32_y ^ (crypto_int64)crypto_int32_x; + crypto_int64 crypto_int32_z = (crypto_int64)crypto_int32_y - (crypto_int64)crypto_int32_x; + crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_y); + crypto_int32_z = crypto_int32_negative_mask(crypto_int32_z); + crypto_int32_z &= crypto_int32_r; + return crypto_int32_x ^ crypto_int32_z; +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_max(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("cmpl %1,%0\n cmovll %1,%0" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc"); + return crypto_int32_x; +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("cmp %w0,%w1\n csel %w0,%w1,%w0,lt" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc"); + return crypto_int32_x; +#else + crypto_int64 crypto_int32_r = (crypto_int64)crypto_int32_y ^ (crypto_int64)crypto_int32_x; + crypto_int64 crypto_int32_z = (crypto_int64)crypto_int32_y - (crypto_int64)crypto_int32_x; + crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_y); + crypto_int32_z = crypto_int32_negative_mask(crypto_int32_z); + crypto_int32_z &= crypto_int32_r; + return crypto_int32_y ^ crypto_int32_z; +#endif +} + +__attribute__((unused)) +static inline +void crypto_int32_minmax(crypto_int32 *crypto_int32_p,crypto_int32 *crypto_int32_q) { + crypto_int32 crypto_int32_x = *crypto_int32_p; + crypto_int32 crypto_int32_y = *crypto_int32_q; +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int32 crypto_int32_z; + __asm__ ("cmpl %2,%1\n movl %1,%0\n cmovgl %2,%1\n cmovgl %0,%2" : "=&r"(crypto_int32_z), "+&r"(crypto_int32_x), "+r"(crypto_int32_y) : : "cc"); + *crypto_int32_p = crypto_int32_x; + *crypto_int32_q = crypto_int32_y; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_r, crypto_int32_s; + __asm__ ("cmp %w2,%w3\n csel %w0,%w2,%w3,lt\n csel %w1,%w3,%w2,lt" : "=&r"(crypto_int32_r), "=r"(crypto_int32_s) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + *crypto_int32_p = crypto_int32_r; + *crypto_int32_q = crypto_int32_s; +#else + crypto_int64 crypto_int32_r = (crypto_int64)crypto_int32_y ^ (crypto_int64)crypto_int32_x; + crypto_int64 crypto_int32_z = (crypto_int64)crypto_int32_y - (crypto_int64)crypto_int32_x; + crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_y); + crypto_int32_z = crypto_int32_negative_mask(crypto_int32_z); + crypto_int32_z &= crypto_int32_r; + crypto_int32_x ^= crypto_int32_z; + crypto_int32_y ^= crypto_int32_z; + *crypto_int32_p = crypto_int32_x; + *crypto_int32_q = crypto_int32_y; +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_smaller_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int32 crypto_int32_q,crypto_int32_z; + __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovll %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + return crypto_int32_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_z; + __asm__ ("cmp %w1,%w2\n csetm %w0,lt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + return crypto_int32_z; +#else + crypto_int32 crypto_int32_r = crypto_int32_x ^ crypto_int32_y; + crypto_int32 crypto_int32_z = crypto_int32_x - crypto_int32_y; + crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_x); + return crypto_int32_negative_mask(crypto_int32_z); +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_smaller_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int32 crypto_int32_q,crypto_int32_z; + __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovll %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + return crypto_int32_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_z; + __asm__ ("cmp %w1,%w2\n cset %w0,lt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + return crypto_int32_z; +#else + crypto_int32 crypto_int32_r = crypto_int32_x ^ crypto_int32_y; + crypto_int32 crypto_int32_z = crypto_int32_x - crypto_int32_y; + crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_x); + return crypto_int32_unsigned_topbit_01(crypto_int32_z); +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_leq_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int32 crypto_int32_q,crypto_int32_z; + __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovlel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + return crypto_int32_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_z; + __asm__ ("cmp %w1,%w2\n csetm %w0,le" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + return crypto_int32_z; +#else + return ~crypto_int32_smaller_mask(crypto_int32_y,crypto_int32_x); +#endif +} + +__attribute__((unused)) +static inline +crypto_int32 crypto_int32_leq_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int32 crypto_int32_q,crypto_int32_z; + __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovlel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + return crypto_int32_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int32 crypto_int32_z; + __asm__ ("cmp %w1,%w2\n cset %w0,le" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); + return crypto_int32_z; +#else + return 1-crypto_int32_smaller_01(crypto_int32_y,crypto_int32_x); +#endif +} + +__attribute__((unused)) +static inline +int crypto_int32_ones_num(crypto_int32 crypto_int32_x) { + crypto_int32_unsigned crypto_int32_y = crypto_int32_x; + const crypto_int32 C0 = 0x55555555; + const crypto_int32 C1 = 0x33333333; + const crypto_int32 C2 = 0x0f0f0f0f; + crypto_int32_y -= ((crypto_int32_y >> 1) & C0); + crypto_int32_y = (crypto_int32_y & C1) + ((crypto_int32_y >> 2) & C1); + crypto_int32_y = (crypto_int32_y + (crypto_int32_y >> 4)) & C2; + crypto_int32_y += crypto_int32_y >> 8; + crypto_int32_y = (crypto_int32_y + (crypto_int32_y >> 16)) & 0xff; + return crypto_int32_y; +} + +__attribute__((unused)) +static inline +int crypto_int32_bottomzeros_num(crypto_int32 crypto_int32_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int32 fallback = 32; + __asm__ ("bsfl %0,%0\n cmovel %1,%0" : "+&r"(crypto_int32_x) : "r"(fallback) : "cc"); + return crypto_int32_x; +#elif defined(__GNUC__) && defined(__aarch64__) + int64_t crypto_int32_y; + __asm__ ("rbit %w0,%w1\n clz %w0,%w0" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); + return crypto_int32_y; +#else + crypto_int32 crypto_int32_y = crypto_int32_x ^ (crypto_int32_x-1); + crypto_int32_y = ((crypto_int32) crypto_int32_y) >> 1; + crypto_int32_y &= ~(crypto_int32_x & (((crypto_int32) 1) << (32-1))); + return crypto_int32_ones_num(crypto_int32_y); +#endif +} + +#endif + +/* from supercop-20241022/cryptoint/crypto_int64.h */ +/* auto-generated: cd cryptoint; ./autogen */ +/* cryptoint 20241003 */ + +#ifndef crypto_int64_h +#define crypto_int64_h + +#define crypto_int64 int64_t +#define crypto_int64_unsigned uint64_t + + + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_load(const unsigned char *crypto_int64_s) { + crypto_int64 crypto_int64_z = 0; + crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 0; + crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 8; + crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 16; + crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 24; + crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 32; + crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 40; + crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 48; + crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 56; + return crypto_int64_z; +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_load_bigendian(const unsigned char *crypto_int64_s) { + crypto_int64 crypto_int64_z = 0; + crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 56; + crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 48; + crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 40; + crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 32; + crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 24; + crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 16; + crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 8; + crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 0; + return crypto_int64_z; +} + +__attribute__((unused)) +static inline +void crypto_int64_store(unsigned char *crypto_int64_s,crypto_int64 crypto_int64_x) { + *crypto_int64_s++ = crypto_int64_x >> 0; + *crypto_int64_s++ = crypto_int64_x >> 8; + *crypto_int64_s++ = crypto_int64_x >> 16; + *crypto_int64_s++ = crypto_int64_x >> 24; + *crypto_int64_s++ = crypto_int64_x >> 32; + *crypto_int64_s++ = crypto_int64_x >> 40; + *crypto_int64_s++ = crypto_int64_x >> 48; + *crypto_int64_s++ = crypto_int64_x >> 56; +} + +__attribute__((unused)) +static inline +void crypto_int64_store_bigendian(unsigned char *crypto_int64_s,crypto_int64 crypto_int64_x) { + *crypto_int64_s++ = crypto_int64_x >> 56; + *crypto_int64_s++ = crypto_int64_x >> 48; + *crypto_int64_s++ = crypto_int64_x >> 40; + *crypto_int64_s++ = crypto_int64_x >> 32; + *crypto_int64_s++ = crypto_int64_x >> 24; + *crypto_int64_s++ = crypto_int64_x >> 16; + *crypto_int64_s++ = crypto_int64_x >> 8; + *crypto_int64_s++ = crypto_int64_x >> 0; +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_negative_mask(crypto_int64 crypto_int64_x) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("sarq $63,%0" : "+r"(crypto_int64_x) : : "cc"); + return crypto_int64_x; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_y; + __asm__ ("asr %0,%1,63" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); + return crypto_int64_y; +#else + crypto_int64_x >>= 64-6; + crypto_int64_x += crypto_int64_optblocker; + crypto_int64_x >>= 5; + return crypto_int64_x; +#endif +} + +__attribute__((unused)) +static inline +crypto_int64_unsigned crypto_int64_unsigned_topbit_01(crypto_int64_unsigned crypto_int64_x) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("shrq $63,%0" : "+r"(crypto_int64_x) : : "cc"); + return crypto_int64_x; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_y; + __asm__ ("lsr %0,%1,63" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); + return crypto_int64_y; +#else + crypto_int64_x >>= 64-6; + crypto_int64_x += crypto_int64_optblocker; + crypto_int64_x >>= 5; + return crypto_int64_x; +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_negative_01(crypto_int64 crypto_int64_x) { + return crypto_int64_unsigned_topbit_01(crypto_int64_x); +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_topbit_mask(crypto_int64 crypto_int64_x) { + return crypto_int64_negative_mask(crypto_int64_x); +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_topbit_01(crypto_int64 crypto_int64_x) { + return crypto_int64_unsigned_topbit_01(crypto_int64_x); +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_bottombit_mask(crypto_int64 crypto_int64_x) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("andq $1,%0" : "+r"(crypto_int64_x) : : "cc"); + return -crypto_int64_x; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_y; + __asm__ ("sbfx %0,%1,0,1" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); + return crypto_int64_y; +#else + crypto_int64_x &= 1 + crypto_int64_optblocker; + return -crypto_int64_x; +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_bottombit_01(crypto_int64 crypto_int64_x) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("andq $1,%0" : "+r"(crypto_int64_x) : : "cc"); + return crypto_int64_x; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_y; + __asm__ ("ubfx %0,%1,0,1" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); + return crypto_int64_y; +#else + crypto_int64_x &= 1 + crypto_int64_optblocker; + return crypto_int64_x; +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_bitinrangepublicpos_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("sarq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc"); +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("asr %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : ); +#else + crypto_int64_x >>= crypto_int64_s ^ crypto_int64_optblocker; +#endif + return crypto_int64_bottombit_mask(crypto_int64_x); +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_bitinrangepublicpos_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("sarq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc"); +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("asr %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : ); +#else + crypto_int64_x >>= crypto_int64_s ^ crypto_int64_optblocker; +#endif + return crypto_int64_bottombit_01(crypto_int64_x); +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_shlmod(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("shlq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc"); +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("lsl %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : ); +#else + int crypto_int64_k, crypto_int64_l; + for (crypto_int64_l = 0,crypto_int64_k = 1;crypto_int64_k < 64;++crypto_int64_l,crypto_int64_k *= 2) + crypto_int64_x ^= (crypto_int64_x ^ (crypto_int64_x << crypto_int64_k)) & crypto_int64_bitinrangepublicpos_mask(crypto_int64_s,crypto_int64_l); +#endif + return crypto_int64_x; +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_shrmod(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("sarq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc"); +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("asr %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : ); +#else + int crypto_int64_k, crypto_int64_l; + for (crypto_int64_l = 0,crypto_int64_k = 1;crypto_int64_k < 64;++crypto_int64_l,crypto_int64_k *= 2) + crypto_int64_x ^= (crypto_int64_x ^ (crypto_int64_x >> crypto_int64_k)) & crypto_int64_bitinrangepublicpos_mask(crypto_int64_s,crypto_int64_l); +#endif + return crypto_int64_x; +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_bitmod_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { + crypto_int64_x = crypto_int64_shrmod(crypto_int64_x,crypto_int64_s); + return crypto_int64_bottombit_mask(crypto_int64_x); +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_bitmod_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { + crypto_int64_x = crypto_int64_shrmod(crypto_int64_x,crypto_int64_s); + return crypto_int64_bottombit_01(crypto_int64_x); +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_nonzero_mask(crypto_int64 crypto_int64_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int64 crypto_int64_q,crypto_int64_z; + __asm__ ("xorq %0,%0\n movq $-1,%1\n testq %2,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); + return crypto_int64_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_z; + __asm__ ("cmp %1,0\n csetm %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); + return crypto_int64_z; +#else + crypto_int64_x |= -crypto_int64_x; + return crypto_int64_negative_mask(crypto_int64_x); +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_nonzero_01(crypto_int64 crypto_int64_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int64 crypto_int64_q,crypto_int64_z; + __asm__ ("xorq %0,%0\n movq $1,%1\n testq %2,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); + return crypto_int64_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_z; + __asm__ ("cmp %1,0\n cset %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); + return crypto_int64_z; +#else + crypto_int64_x |= -crypto_int64_x; + return crypto_int64_unsigned_topbit_01(crypto_int64_x); +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_positive_mask(crypto_int64 crypto_int64_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int64 crypto_int64_q,crypto_int64_z; + __asm__ ("xorq %0,%0\n movq $-1,%1\n testq %2,%2\n cmovgq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); + return crypto_int64_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_z; + __asm__ ("cmp %1,0\n csetm %0,gt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); + return crypto_int64_z; +#else + crypto_int64 crypto_int64_z = -crypto_int64_x; + crypto_int64_z ^= crypto_int64_x & crypto_int64_z; + return crypto_int64_negative_mask(crypto_int64_z); +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_positive_01(crypto_int64 crypto_int64_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int64 crypto_int64_q,crypto_int64_z; + __asm__ ("xorq %0,%0\n movq $1,%1\n testq %2,%2\n cmovgq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); + return crypto_int64_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_z; + __asm__ ("cmp %1,0\n cset %0,gt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); + return crypto_int64_z; +#else + crypto_int64 crypto_int64_z = -crypto_int64_x; + crypto_int64_z ^= crypto_int64_x & crypto_int64_z; + return crypto_int64_unsigned_topbit_01(crypto_int64_z); +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_zero_mask(crypto_int64 crypto_int64_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int64 crypto_int64_q,crypto_int64_z; + __asm__ ("xorq %0,%0\n movq $-1,%1\n testq %2,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); + return crypto_int64_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_z; + __asm__ ("cmp %1,0\n csetm %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); + return crypto_int64_z; +#else + return ~crypto_int64_nonzero_mask(crypto_int64_x); +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_zero_01(crypto_int64 crypto_int64_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int64 crypto_int64_q,crypto_int64_z; + __asm__ ("xorq %0,%0\n movq $1,%1\n testq %2,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); + return crypto_int64_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_z; + __asm__ ("cmp %1,0\n cset %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); + return crypto_int64_z; +#else + return 1-crypto_int64_nonzero_01(crypto_int64_x); +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_unequal_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int64 crypto_int64_q,crypto_int64_z; + __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + return crypto_int64_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_z; + __asm__ ("cmp %1,%2\n csetm %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + return crypto_int64_z; +#else + return crypto_int64_nonzero_mask(crypto_int64_x ^ crypto_int64_y); +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_unequal_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int64 crypto_int64_q,crypto_int64_z; + __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + return crypto_int64_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_z; + __asm__ ("cmp %1,%2\n cset %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + return crypto_int64_z; +#else + return crypto_int64_nonzero_01(crypto_int64_x ^ crypto_int64_y); +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_equal_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int64 crypto_int64_q,crypto_int64_z; + __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + return crypto_int64_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_z; + __asm__ ("cmp %1,%2\n csetm %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + return crypto_int64_z; +#else + return ~crypto_int64_unequal_mask(crypto_int64_x,crypto_int64_y); +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_equal_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int64 crypto_int64_q,crypto_int64_z; + __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + return crypto_int64_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_z; + __asm__ ("cmp %1,%2\n cset %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + return crypto_int64_z; +#else + return 1-crypto_int64_unequal_01(crypto_int64_x,crypto_int64_y); +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_min(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("cmpq %1,%0\n cmovgq %1,%0" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc"); + return crypto_int64_x; +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("cmp %0,%1\n csel %0,%0,%1,lt" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc"); + return crypto_int64_x; +#else + crypto_int64 crypto_int64_r = crypto_int64_y ^ crypto_int64_x; + crypto_int64 crypto_int64_z = crypto_int64_y - crypto_int64_x; + crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_y); + crypto_int64_z = crypto_int64_negative_mask(crypto_int64_z); + crypto_int64_z &= crypto_int64_r; + return crypto_int64_x ^ crypto_int64_z; +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_max(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { +#if defined(__GNUC__) && defined(__x86_64__) + __asm__ ("cmpq %1,%0\n cmovlq %1,%0" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc"); + return crypto_int64_x; +#elif defined(__GNUC__) && defined(__aarch64__) + __asm__ ("cmp %0,%1\n csel %0,%1,%0,lt" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc"); + return crypto_int64_x; +#else + crypto_int64 crypto_int64_r = crypto_int64_y ^ crypto_int64_x; + crypto_int64 crypto_int64_z = crypto_int64_y - crypto_int64_x; + crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_y); + crypto_int64_z = crypto_int64_negative_mask(crypto_int64_z); + crypto_int64_z &= crypto_int64_r; + return crypto_int64_y ^ crypto_int64_z; +#endif +} + +__attribute__((unused)) +static inline +void crypto_int64_minmax(crypto_int64 *crypto_int64_p,crypto_int64 *crypto_int64_q) { + crypto_int64 crypto_int64_x = *crypto_int64_p; + crypto_int64 crypto_int64_y = *crypto_int64_q; +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int64 crypto_int64_z; + __asm__ ("cmpq %2,%1\n movq %1,%0\n cmovgq %2,%1\n cmovgq %0,%2" : "=&r"(crypto_int64_z), "+&r"(crypto_int64_x), "+r"(crypto_int64_y) : : "cc"); + *crypto_int64_p = crypto_int64_x; + *crypto_int64_q = crypto_int64_y; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_r, crypto_int64_s; + __asm__ ("cmp %2,%3\n csel %0,%2,%3,lt\n csel %1,%3,%2,lt" : "=&r"(crypto_int64_r), "=r"(crypto_int64_s) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + *crypto_int64_p = crypto_int64_r; + *crypto_int64_q = crypto_int64_s; +#else + crypto_int64 crypto_int64_r = crypto_int64_y ^ crypto_int64_x; + crypto_int64 crypto_int64_z = crypto_int64_y - crypto_int64_x; + crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_y); + crypto_int64_z = crypto_int64_negative_mask(crypto_int64_z); + crypto_int64_z &= crypto_int64_r; + crypto_int64_x ^= crypto_int64_z; + crypto_int64_y ^= crypto_int64_z; + *crypto_int64_p = crypto_int64_x; + *crypto_int64_q = crypto_int64_y; +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_smaller_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int64 crypto_int64_q,crypto_int64_z; + __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmovlq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + return crypto_int64_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_z; + __asm__ ("cmp %1,%2\n csetm %0,lt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + return crypto_int64_z; +#else + crypto_int64 crypto_int64_r = crypto_int64_x ^ crypto_int64_y; + crypto_int64 crypto_int64_z = crypto_int64_x - crypto_int64_y; + crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_x); + return crypto_int64_negative_mask(crypto_int64_z); +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_smaller_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int64 crypto_int64_q,crypto_int64_z; + __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmovlq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + return crypto_int64_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_z; + __asm__ ("cmp %1,%2\n cset %0,lt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + return crypto_int64_z; +#else + crypto_int64 crypto_int64_r = crypto_int64_x ^ crypto_int64_y; + crypto_int64 crypto_int64_z = crypto_int64_x - crypto_int64_y; + crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_x); + return crypto_int64_unsigned_topbit_01(crypto_int64_z); +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_leq_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int64 crypto_int64_q,crypto_int64_z; + __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmovleq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + return crypto_int64_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_z; + __asm__ ("cmp %1,%2\n csetm %0,le" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + return crypto_int64_z; +#else + return ~crypto_int64_smaller_mask(crypto_int64_y,crypto_int64_x); +#endif +} + +__attribute__((unused)) +static inline +crypto_int64 crypto_int64_leq_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int64 crypto_int64_q,crypto_int64_z; + __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmovleq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + return crypto_int64_z; +#elif defined(__GNUC__) && defined(__aarch64__) + crypto_int64 crypto_int64_z; + __asm__ ("cmp %1,%2\n cset %0,le" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); + return crypto_int64_z; +#else + return 1-crypto_int64_smaller_01(crypto_int64_y,crypto_int64_x); +#endif +} + +__attribute__((unused)) +static inline +int crypto_int64_ones_num(crypto_int64 crypto_int64_x) { + crypto_int64_unsigned crypto_int64_y = crypto_int64_x; + const crypto_int64 C0 = 0x5555555555555555; + const crypto_int64 C1 = 0x3333333333333333; + const crypto_int64 C2 = 0x0f0f0f0f0f0f0f0f; + crypto_int64_y -= ((crypto_int64_y >> 1) & C0); + crypto_int64_y = (crypto_int64_y & C1) + ((crypto_int64_y >> 2) & C1); + crypto_int64_y = (crypto_int64_y + (crypto_int64_y >> 4)) & C2; + crypto_int64_y += crypto_int64_y >> 8; + crypto_int64_y += crypto_int64_y >> 16; + crypto_int64_y = (crypto_int64_y + (crypto_int64_y >> 32)) & 0xff; + return crypto_int64_y; +} + +__attribute__((unused)) +static inline +int crypto_int64_bottomzeros_num(crypto_int64 crypto_int64_x) { +#if defined(__GNUC__) && defined(__x86_64__) + crypto_int64 fallback = 64; + __asm__ ("bsfq %0,%0\n cmoveq %1,%0" : "+&r"(crypto_int64_x) : "r"(fallback) : "cc"); + return crypto_int64_x; +#elif defined(__GNUC__) && defined(__aarch64__) + int64_t crypto_int64_y; + __asm__ ("rbit %0,%1\n clz %0,%0" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); + return crypto_int64_y; +#else + crypto_int64 crypto_int64_y = crypto_int64_x ^ (crypto_int64_x-1); + crypto_int64_y = ((crypto_int64) crypto_int64_y) >> 1; + crypto_int64_y &= ~(crypto_int64_x & (((crypto_int64) 1) << (64-1))); + return crypto_int64_ones_num(crypto_int64_y); +#endif +} + +#endif + +/* from supercop-20241022/crypto_sort/int32/portable4/sort.c */ +#define int32_MINMAX(a,b) crypto_int32_minmax(&a,&b) + +static void crypto_sort_int32(void *array,long long n) +{ + long long top,p,q,r,i,j; + int32 *x = array; + + if (n < 2) return; + top = 1; + while (top < n - top) top += top; + + for (p = top;p >= 1;p >>= 1) { + i = 0; + while (i + 2 * p <= n) { + for (j = i;j < i + p;++j) + int32_MINMAX(x[j],x[j+p]); + i += 2 * p; + } + for (j = i;j < n - p;++j) + int32_MINMAX(x[j],x[j+p]); + + i = 0; + j = 0; + for (q = top;q > p;q >>= 1) { + if (j != i) for (;;) { + if (j == n - q) goto done; + int32 a = x[j + p]; + for (r = q;r > p;r >>= 1) + int32_MINMAX(a,x[j + r]); + x[j + p] = a; + ++j; + if (j == i + p) { + i += 2 * p; + break; + } + } + while (i + p <= n - q) { + for (j = i;j < i + p;++j) { + int32 a = x[j + p]; + for (r = q;r > p;r >>= 1) + int32_MINMAX(a,x[j+r]); + x[j + p] = a; + } + i += 2 * p; + } + /* now i + p > n - q */ + j = i; + while (j < n - q) { + int32 a = x[j + p]; + for (r = q;r > p;r >>= 1) + int32_MINMAX(a,x[j+r]); + x[j + p] = a; + ++j; + } + + done: ; + } + } +} + +/* from supercop-20241022/crypto_sort/uint32/useint32/sort.c */ + +/* can save time by vectorizing xor loops */ +/* can save time by integrating xor loops with int32_sort */ + +static void crypto_sort_uint32(void *array,long long n) +{ + crypto_uint32 *x = array; + long long j; + for (j = 0;j < n;++j) x[j] ^= 0x80000000; + crypto_sort_int32(array,n); + for (j = 0;j < n;++j) x[j] ^= 0x80000000; +} + +/* from supercop-20241022/crypto_kem/sntrup761/compact/kem.c */ +// 20240806 djb: some automated conversion to cryptoint + +#define p 761 +#define q 4591 +#define w 286 +#define q12 ((q - 1) / 2) +typedef int8_t small; +typedef int16_t Fq; +#define Hash_bytes 32 +#define Small_bytes ((p + 3) / 4) +typedef small Inputs[p]; +#define SecretKeys_bytes (2 * Small_bytes) +#define Confirm_bytes 32 + +static small F3_freeze(int16_t x) { return x - 3 * ((10923 * x + 16384) >> 15); } + +static Fq Fq_freeze(int32_t x) { + const int32_t q16 = (0x10000 + q / 2) / q; + const int32_t q20 = (0x100000 + q / 2) / q; + const int32_t q28 = (0x10000000 + q / 2) / q; + x -= q * ((q16 * x) >> 16); + x -= q * ((q20 * x) >> 20); + return x - q * ((q28 * x + 0x8000000) >> 28); +} + +static int Weightw_mask(small *r) { + int i, weight = 0; + for (i = 0; i < p; ++i) weight += crypto_int64_bottombit_01(r[i]); + return crypto_int16_nonzero_mask(weight - w); +} + +static void uint32_divmod_uint14(uint32_t *Q, uint16_t *r, uint32_t x, uint16_t m) { + uint32_t qpart, mask, v = 0x80000000 / m; + qpart = (x * (uint64_t)v) >> 31; + x -= qpart * m; + *Q = qpart; + qpart = (x * (uint64_t)v) >> 31; + x -= qpart * m; + *Q += qpart; + x -= m; + *Q += 1; + mask = crypto_int32_negative_mask(x); + x += mask & (uint32_t)m; + *Q += mask; + *r = x; +} + +static uint16_t uint32_mod_uint14(uint32_t x, uint16_t m) { + uint32_t Q; + uint16_t r; + uint32_divmod_uint14(&Q, &r, x, m); + return r; +} + +static void Encode(unsigned char *out, const uint16_t *R, const uint16_t *M, long long len) { + if (len == 1) { + uint16_t r = R[0], m = M[0]; + while (m > 1) { + *out++ = r; + r >>= 8; + m = (m + 255) >> 8; + } + } + if (len > 1) { + uint16_t R2[(len + 1) / 2], M2[(len + 1) / 2]; + long long i; + for (i = 0; i < len - 1; i += 2) { + uint32_t m0 = M[i]; + uint32_t r = R[i] + R[i + 1] * m0; + uint32_t m = M[i + 1] * m0; + while (m >= 16384) { + *out++ = r; + r >>= 8; + m = (m + 255) >> 8; + } + R2[i / 2] = r; + M2[i / 2] = m; + } + if (i < len) { + R2[i / 2] = R[i]; + M2[i / 2] = M[i]; + } + Encode(out, R2, M2, (len + 1) / 2); + } +} + +static void Decode(uint16_t *out, const unsigned char *S, const uint16_t *M, long long len) { + if (len == 1) { + if (M[0] == 1) + *out = 0; + else if (M[0] <= 256) + *out = uint32_mod_uint14(S[0], M[0]); + else + *out = uint32_mod_uint14(S[0] + (((uint16_t)S[1]) << 8), M[0]); + } + if (len > 1) { + uint16_t R2[(len + 1) / 2], M2[(len + 1) / 2], bottomr[len / 2]; + uint32_t bottomt[len / 2]; + long long i; + for (i = 0; i < len - 1; i += 2) { + uint32_t m = M[i] * (uint32_t)M[i + 1]; + if (m > 256 * 16383) { + bottomt[i / 2] = 256 * 256; + bottomr[i / 2] = S[0] + 256 * S[1]; + S += 2; + M2[i / 2] = (((m + 255) >> 8) + 255) >> 8; + } else if (m >= 16384) { + bottomt[i / 2] = 256; + bottomr[i / 2] = S[0]; + S += 1; + M2[i / 2] = (m + 255) >> 8; + } else { + bottomt[i / 2] = 1; + bottomr[i / 2] = 0; + M2[i / 2] = m; + } + } + if (i < len) M2[i / 2] = M[i]; + Decode(R2, S, M2, (len + 1) / 2); + for (i = 0; i < len - 1; i += 2) { + uint32_t r1, r = bottomr[i / 2]; + uint16_t r0; + r += bottomt[i / 2] * R2[i / 2]; + uint32_divmod_uint14(&r1, &r0, r, M[i]); + r1 = uint32_mod_uint14(r1, M[i + 1]); + *out++ = r0; + *out++ = r1; + } + if (i < len) *out++ = R2[i / 2]; + } +} + +static void R3_fromRq(small *out, const Fq *r) { + int i; + for (i = 0; i < p; ++i) out[i] = F3_freeze(r[i]); +} + +static void R3_mult(small *h, const small *f, const small *g) { + int16_t fg[p + p - 1]; + int i, j; + for (i = 0; i < p + p - 1; ++i) fg[i] = 0; + for (i = 0; i < p; ++i) + for (j = 0; j < p; ++j) fg[i + j] += f[i] * (int16_t)g[j]; + for (i = p; i < p + p - 1; ++i) fg[i - p] += fg[i]; + for (i = p; i < p + p - 1; ++i) fg[i - p + 1] += fg[i]; + for (i = 0; i < p; ++i) h[i] = F3_freeze(fg[i]); +} + +static int R3_recip(small *out, const small *in) { + small f[p + 1], g[p + 1], v[p + 1], r[p + 1]; + int sign, swap, t, i, loop, delta = 1; + for (i = 0; i < p + 1; ++i) v[i] = 0; + for (i = 0; i < p + 1; ++i) r[i] = 0; + r[0] = 1; + for (i = 0; i < p; ++i) f[i] = 0; + f[0] = 1; + f[p - 1] = f[p] = -1; + for (i = 0; i < p; ++i) g[p - 1 - i] = in[i]; + g[p] = 0; + for (loop = 0; loop < 2 * p - 1; ++loop) { + for (i = p; i > 0; --i) v[i] = v[i - 1]; + v[0] = 0; + sign = -g[0] * f[0]; + swap = crypto_int16_negative_mask(-delta) & crypto_int16_nonzero_mask(g[0]); + delta ^= swap & (delta ^ -delta); + delta += 1; + for (i = 0; i < p + 1; ++i) { + t = swap & (f[i] ^ g[i]); + f[i] ^= t; + g[i] ^= t; + t = swap & (v[i] ^ r[i]); + v[i] ^= t; + r[i] ^= t; + } + for (i = 0; i < p + 1; ++i) g[i] = F3_freeze(g[i] + sign * f[i]); + for (i = 0; i < p + 1; ++i) r[i] = F3_freeze(r[i] + sign * v[i]); + for (i = 0; i < p; ++i) g[i] = g[i + 1]; + g[p] = 0; + } + sign = f[0]; + for (i = 0; i < p; ++i) out[i] = sign * v[p - 1 - i]; + return crypto_int16_nonzero_mask(delta); +} + +static void Rq_mult_small(Fq *h, const Fq *f, const small *g) { + int32_t fg[p + p - 1]; + int i, j; + for (i = 0; i < p + p - 1; ++i) fg[i] = 0; + for (i = 0; i < p; ++i) + for (j = 0; j < p; ++j) fg[i + j] += f[i] * (int32_t)g[j]; + for (i = p; i < p + p - 1; ++i) fg[i - p] += fg[i]; + for (i = p; i < p + p - 1; ++i) fg[i - p + 1] += fg[i]; + for (i = 0; i < p; ++i) h[i] = Fq_freeze(fg[i]); +} + +static void Rq_mult3(Fq *h, const Fq *f) { + int i; + for (i = 0; i < p; ++i) h[i] = Fq_freeze(3 * f[i]); +} + +static Fq Fq_recip(Fq a1) { + int i = 1; + Fq ai = a1; + while (i < q - 2) { + ai = Fq_freeze(a1 * (int32_t)ai); + i += 1; + } + return ai; +} + +static int Rq_recip3(Fq *out, const small *in) { + Fq f[p + 1], g[p + 1], v[p + 1], r[p + 1], scale; + int swap, t, i, loop, delta = 1; + int32_t f0, g0; + for (i = 0; i < p + 1; ++i) v[i] = 0; + for (i = 0; i < p + 1; ++i) r[i] = 0; + r[0] = Fq_recip(3); + for (i = 0; i < p; ++i) f[i] = 0; + f[0] = 1; + f[p - 1] = f[p] = -1; + for (i = 0; i < p; ++i) g[p - 1 - i] = in[i]; + g[p] = 0; + for (loop = 0; loop < 2 * p - 1; ++loop) { + for (i = p; i > 0; --i) v[i] = v[i - 1]; + v[0] = 0; + swap = crypto_int16_negative_mask(-delta) & crypto_int16_nonzero_mask(g[0]); + delta ^= swap & (delta ^ -delta); + delta += 1; + for (i = 0; i < p + 1; ++i) { + t = swap & (f[i] ^ g[i]); + f[i] ^= t; + g[i] ^= t; + t = swap & (v[i] ^ r[i]); + v[i] ^= t; + r[i] ^= t; + } + f0 = f[0]; + g0 = g[0]; + for (i = 0; i < p + 1; ++i) g[i] = Fq_freeze(f0 * g[i] - g0 * f[i]); + for (i = 0; i < p + 1; ++i) r[i] = Fq_freeze(f0 * r[i] - g0 * v[i]); + for (i = 0; i < p; ++i) g[i] = g[i + 1]; + g[p] = 0; + } + scale = Fq_recip(f[0]); + for (i = 0; i < p; ++i) out[i] = Fq_freeze(scale * (int32_t)v[p - 1 - i]); + return crypto_int16_nonzero_mask(delta); +} + +static void Round(Fq *out, const Fq *a) { + int i; + for (i = 0; i < p; ++i) out[i] = a[i] - F3_freeze(a[i]); +} + +static void Short_fromlist(small *out, const uint32_t *in) { + uint32_t L[p]; + int i; + for (i = 0; i < w; ++i) L[i] = in[i] & (uint32_t)-2; + for (i = w; i < p; ++i) L[i] = (in[i] & (uint32_t)-3) | 1; + crypto_sort_uint32(L, p); + for (i = 0; i < p; ++i) out[i] = (L[i] & 3) - 1; +} + +static void Hash_prefix(unsigned char *out, int b, const unsigned char *in, int inlen) { + unsigned char x[inlen + 1], h[64]; + int i; + x[0] = b; + for (i = 0; i < inlen; ++i) x[i + 1] = in[i]; + crypto_hash_sha512(h, x, inlen + 1); + for (i = 0; i < 32; ++i) out[i] = h[i]; +} + +static uint32_t urandom32(void) { + unsigned char c[4]; + uint32_t result = 0; + int i; + randombytes(c, 4); + for (i = 0; i < 4; ++i) result += ((uint32_t)c[i]) << (8 * i); + return result; +} + +static void Short_random(small *out) { + uint32_t L[p]; + int i; + for (i = 0; i < p; ++i) L[i] = urandom32(); + Short_fromlist(out, L); +} + +static void Small_random(small *out) { + int i; + for (i = 0; i < p; ++i) out[i] = (((urandom32() & 0x3fffffff) * 3) >> 30) - 1; +} + +static void KeyGen(Fq *h, small *f, small *ginv) { + small g[p]; + Fq finv[p]; + for (;;) { + int result; + Small_random(g); + result = R3_recip(ginv, g); + crypto_declassify(&result, sizeof result); + if (result == 0) break; + } + Short_random(f); + Rq_recip3(finv, f); + Rq_mult_small(h, finv, g); +} + +static void Encrypt(Fq *c, const small *r, const Fq *h) { + Fq hr[p]; + Rq_mult_small(hr, h, r); + Round(c, hr); +} + +static void Decrypt(small *r, const Fq *c, const small *f, const small *ginv) { + Fq cf[p], cf3[p]; + small e[p], ev[p]; + int mask, i; + Rq_mult_small(cf, c, f); + Rq_mult3(cf3, cf); + R3_fromRq(e, cf3); + R3_mult(ev, e, ginv); + mask = Weightw_mask(ev); + for (i = 0; i < w; ++i) r[i] = ((ev[i] ^ 1) & ~mask) ^ 1; + for (i = w; i < p; ++i) r[i] = ev[i] & ~mask; +} + +static void Small_encode(unsigned char *s, const small *f) { + int i, j; + for (i = 0; i < p / 4; ++i) { + small x = 0; + for (j = 0;j < 4;++j) x += (*f++ + 1) << (2 * j); + *s++ = x; + } + *s = *f++ + 1; +} + +static void Small_decode(small *f, const unsigned char *s) { + int i, j; + for (i = 0; i < p / 4; ++i) { + unsigned char x = *s++; + for (j = 0;j < 4;++j) *f++ = ((small)((x >> (2 * j)) & 3)) - 1; + } + *f++ = ((small)(*s & 3)) - 1; +} + +static void Rq_encode(unsigned char *s, const Fq *r) { + uint16_t R[p], M[p]; + int i; + for (i = 0; i < p; ++i) R[i] = r[i] + q12; + for (i = 0; i < p; ++i) M[i] = q; + Encode(s, R, M, p); +} + +static void Rq_decode(Fq *r, const unsigned char *s) { + uint16_t R[p], M[p]; + int i; + for (i = 0; i < p; ++i) M[i] = q; + Decode(R, s, M, p); + for (i = 0; i < p; ++i) r[i] = ((Fq)R[i]) - q12; +} + +static void Rounded_encode(unsigned char *s, const Fq *r) { + uint16_t R[p], M[p]; + int i; + for (i = 0; i < p; ++i) R[i] = ((r[i] + q12) * 10923) >> 15; + for (i = 0; i < p; ++i) M[i] = (q + 2) / 3; + Encode(s, R, M, p); +} + +static void Rounded_decode(Fq *r, const unsigned char *s) { + uint16_t R[p], M[p]; + int i; + for (i = 0; i < p; ++i) M[i] = (q + 2) / 3; + Decode(R, s, M, p); + for (i = 0; i < p; ++i) r[i] = R[i] * 3 - q12; +} + +static void ZKeyGen(unsigned char *pk, unsigned char *sk) { + Fq h[p]; + small f[p], v[p]; + KeyGen(h, f, v); + Rq_encode(pk, h); + Small_encode(sk, f); + Small_encode(sk + Small_bytes, v); +} + +static void ZEncrypt(unsigned char *C, const Inputs r, const unsigned char *pk) { + Fq h[p], c[p]; + Rq_decode(h, pk); + Encrypt(c, r, h); + Rounded_encode(C, c); +} + +static void ZDecrypt(Inputs r, const unsigned char *C, const unsigned char *sk) { + small f[p], v[p]; + Fq c[p]; + Small_decode(f, sk); + Small_decode(v, sk + Small_bytes); + Rounded_decode(c, C); + Decrypt(r, c, f, v); +} + +static void HashConfirm(unsigned char *h, const unsigned char *r, const unsigned char *cache) { + unsigned char x[Hash_bytes * 2]; + int i; + Hash_prefix(x, 3, r, Small_bytes); + for (i = 0; i < Hash_bytes; ++i) x[Hash_bytes + i] = cache[i]; + Hash_prefix(h, 2, x, sizeof x); +} + +static void HashSession(unsigned char *k, int b, const unsigned char *y, const unsigned char *z) { + unsigned char x[Hash_bytes + crypto_kem_sntrup761_CIPHERTEXTBYTES]; + int i; + Hash_prefix(x, 3, y, Small_bytes); + for (i = 0; i < crypto_kem_sntrup761_CIPHERTEXTBYTES; ++i) x[Hash_bytes + i] = z[i]; + Hash_prefix(k, b, x, sizeof x); +} + +int crypto_kem_sntrup761_keypair(unsigned char *pk, unsigned char *sk) { + int i; + ZKeyGen(pk, sk); + sk += SecretKeys_bytes; + for (i = 0; i < crypto_kem_sntrup761_PUBLICKEYBYTES; ++i) *sk++ = pk[i]; + randombytes(sk, Small_bytes); + Hash_prefix(sk + Small_bytes, 4, pk, crypto_kem_sntrup761_PUBLICKEYBYTES); + return 0; +} + +static void Hide(unsigned char *c, unsigned char *r_enc, const Inputs r, const unsigned char *pk, const unsigned char *cache) { + Small_encode(r_enc, r); + ZEncrypt(c, r, pk); + HashConfirm(c + crypto_kem_sntrup761_CIPHERTEXTBYTES - Confirm_bytes, r_enc, cache); +} + +int crypto_kem_sntrup761_enc(unsigned char *c, unsigned char *k, const unsigned char *pk) { + Inputs r; + unsigned char r_enc[Small_bytes], cache[Hash_bytes]; + Hash_prefix(cache, 4, pk, crypto_kem_sntrup761_PUBLICKEYBYTES); + Short_random(r); + Hide(c, r_enc, r, pk, cache); + HashSession(k, 1, r_enc, c); + return 0; +} + +static int Ciphertexts_diff_mask(const unsigned char *c, const unsigned char *c2) { + uint16_t differentbits = 0; + int len = crypto_kem_sntrup761_CIPHERTEXTBYTES; + while (len-- > 0) differentbits |= (*c++) ^ (*c2++); + return (crypto_int64_bitmod_01((differentbits - 1),8)) - 1; +} + +int crypto_kem_sntrup761_dec(unsigned char *k, const unsigned char *c, const unsigned char *sk) { + const unsigned char *pk = sk + SecretKeys_bytes; + const unsigned char *rho = pk + crypto_kem_sntrup761_PUBLICKEYBYTES; + const unsigned char *cache = rho + Small_bytes; + Inputs r; + unsigned char r_enc[Small_bytes], cnew[crypto_kem_sntrup761_CIPHERTEXTBYTES]; + int mask, i; + ZDecrypt(r, c, sk); + Hide(cnew, r_enc, r, pk, cache); + mask = Ciphertexts_diff_mask(c, cnew); + for (i = 0; i < Small_bytes; ++i) r_enc[i] ^= mask & (r_enc[i] ^ rho[i]); + HashSession(k, 1 + mask, r_enc, c); + return 0; +} + +#endif /* DROPBEAR_SNTRUP761 */ diff --git a/src/sntrup761.h b/src/sntrup761.h new file mode 100644 index 000000000..7078c0345 --- /dev/null +++ b/src/sntrup761.h @@ -0,0 +1,13 @@ +#ifndef SNTRUP761_H +#define SNTRUP761_H + +#define crypto_kem_sntrup761_PUBLICKEYBYTES 1158 +#define crypto_kem_sntrup761_SECRETKEYBYTES 1763 +#define crypto_kem_sntrup761_CIPHERTEXTBYTES 1039 +#define crypto_kem_sntrup761_BYTES 32 + +int crypto_kem_sntrup761_keypair(unsigned char *pk, unsigned char *sk); +int crypto_kem_sntrup761_enc(unsigned char *c, unsigned char *k, const unsigned char *pk); +int crypto_kem_sntrup761_dec(unsigned char *k, const unsigned char *c, const unsigned char *sk); + +#endif /* SNTRUP761_H */ diff --git a/src/sntrup761_compat.h b/src/sntrup761_compat.h new file mode 100644 index 000000000..47832d616 --- /dev/null +++ b/src/sntrup761_compat.h @@ -0,0 +1,44 @@ +/* To be included only by sntrup761.c, provides + * random and sha512 implementation from Dropbear. + * Partially based on OpenSSH crypto_api.h */ +/* + * Assembled from generated headers and source files by Markus Friedl. + * Placed in the public domain. + */ + +#include "includes.h" +#include "dbrandom.h" +#include "sntrup761.h" + +#if DROPBEAR_SNTRUP761 + +typedef int8_t crypto_int8; +typedef uint8_t crypto_uint8; +typedef int16_t crypto_int16; +typedef uint16_t crypto_uint16; +typedef int32_t crypto_int32; +typedef uint32_t crypto_uint32; +typedef int64_t crypto_int64; +typedef uint64_t crypto_uint64; + +static inline void randombytes(unsigned char* buf, unsigned int len) { + genrandom(buf, len); +} + +static inline uint32_t small_random32(void) { + uint32_t v; + genrandom((unsigned char*)&v, sizeof(v)); + return v; +} + +static int crypto_hash_sha512(uint8_t *out, const uint8_t *m, + unsigned long long n) +{ + hash_state hs; + + sha512_init(&hs); + sha512_process(&hs, m, n); + return sha512_done(&hs, out); +} + +#endif /* DROPBEAR_SNTRUP761 */ diff --git a/src/sysoptions.h b/src/sysoptions.h index d992a7aac..29d92e7a7 100644 --- a/src/sysoptions.h +++ b/src/sysoptions.h @@ -208,8 +208,7 @@ #define DROPBEAR_SK_ED25519 ((DROPBEAR_SK_KEYS) && (DROPBEAR_ED25519)) #endif -/* XXX: Not actually used */ -#define DROPBEAR_PQHYBRID 1 +#define DROPBEAR_PQHYBRID DROPBEAR_SNTRUP761 #define DROPBEAR_CURVE25519_DEP (DROPBEAR_CURVE25519 || DROPBEAR_PQHYBRID) /* Dropbear only uses server-sig-algs, only needed if we have rsa-sha256 pubkey auth */ @@ -245,16 +244,39 @@ #define MAX_STRING_LEN (MAX(MAX_CMD_LEN, 2400)) /* Sun SSH needs 2400 for algos, MAX_CMD_LEN is usually longer */ -/* For a 4096 bit DSS key, empirically determined */ -#define MAX_PUBKEY_SIZE 1700 -/* For a 4096 bit DSS key, empirically determined */ + +/* Key type sizes are ordered large to small, all are + determined empirically, and rounded up */ +#if DROPBEAR_RSA +/* 4096 bit RSA key */ +#define MAX_PUBKEY_SIZE 600 #define MAX_PRIVKEY_SIZE 1700 +#elif DROPBEAR_DSS +#define MAX_PUBKEY_SIZE 500 +#define MAX_PRIVKEY_SIZE 500 +#else +/* 521 bit ecdsa key */ +#define MAX_PUBKEY_SIZE 200 +#define MAX_PRIVKEY_SIZE 200 +#endif + +/* For kex hash buffer, worst case size for Q_C || Q_S || K */ +#if DROPBEAR_SNTRUP761 +/* 2337 */ +#define MAX_KEX_PARTS (2*4 + 1158 + 1039 + 32*2 + 68) +#elif DROPBEAR_DH_GROUP16 +/* 4096 bit group */ +#define MAX_KEX_PARTS (3 * 520) +#else +/* Sufficent for 2048 bit group14, or ecdsa521 */ +#define MAX_KEX_PARTS 1000 +#endif #define MAX_HOSTKEYS 4 /* The maximum size of the bignum portion of the kexhash buffer */ -/* Sect. 8 of the transport rfc 4253, K_S + e + f + K */ -#define KEXHASHBUF_MAX_INTS (1700 + 130 + 130 + 130) +/* K_S + Q_C + Q_S + K */ +#define KEXHASHBUF_MAX_INTS (MAX_PUBKEY_SIZE + MAX_KEX_PARTS) #define DROPBEAR_MAX_SOCKS 2 /* IPv4, IPv6 are all we'll get for now. Revisit in a few years time.... */ From 18169c3529d52a7ad2c13ce248346e2e88a2cb0d Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Sat, 14 Dec 2024 21:29:38 +0800 Subject: [PATCH 4/5] Fix incorrect naming of ec_qs Change to q_s and q_c since pqhybrid isn't only EC. --- fuzz/fuzzer-kexcurve25519.c | 6 +++--- fuzz/fuzzer-kexecdh.c | 6 +++--- src/cli-kex.c | 6 +++--- src/svr-kex.c | 22 +++++++++++----------- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/fuzz/fuzzer-kexcurve25519.c b/fuzz/fuzzer-kexcurve25519.c index 78aaaeaf4..9651ca6e0 100644 --- a/fuzz/fuzzer-kexcurve25519.c +++ b/fuzz/fuzzer-kexcurve25519.c @@ -45,14 +45,14 @@ int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { unsigned int e = buf_getint(fuzz.input); struct kex_curve25519_param *curve25519_param = curve25519_params[e % NUM_PARAMS]; - buffer * ecdh_qs = buf_getstringbuf(fuzz.input); + buffer * ecdh_qc = buf_getstringbuf(fuzz.input); ses.kexhashbuf = buf_new(KEXHASHBUF_MAX_INTS); - kexcurve25519_comb_key(curve25519_param, ecdh_qs, svr_opts.hostkey); + kexcurve25519_comb_key(curve25519_param, ecdh_qc, svr_opts.hostkey); mp_clear(ses.dh_K); m_free(ses.dh_K); - buf_free(ecdh_qs); + buf_free(ecdh_qc); buf_free(ses.hash); buf_free(ses.session_id); diff --git a/fuzz/fuzzer-kexecdh.c b/fuzz/fuzzer-kexecdh.c index 217d730c0..b633c59d4 100644 --- a/fuzz/fuzzer-kexecdh.c +++ b/fuzz/fuzzer-kexecdh.c @@ -58,14 +58,14 @@ int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { unsigned int e = buf_getint(fuzz.input); struct kex_ecdh_param *ecdh_param = ecdh_params[e % NUM_PARAMS]; - buffer * ecdh_qs = buf_getstringbuf(fuzz.input); + buffer * ecdh_qc = buf_getstringbuf(fuzz.input); ses.kexhashbuf = buf_new(KEXHASHBUF_MAX_INTS); - kexecdh_comb_key(ecdh_param, ecdh_qs, svr_opts.hostkey); + kexecdh_comb_key(ecdh_param, ecdh_qc, svr_opts.hostkey); mp_clear(ses.dh_K); m_free(ses.dh_K); - buf_free(ecdh_qs); + buf_free(ecdh_qc); buf_free(ses.hash); buf_free(ses.session_id); diff --git a/src/cli-kex.c b/src/cli-kex.c index 2e8ac91d1..6881b360d 100644 --- a/src/cli-kex.c +++ b/src/cli-kex.c @@ -186,9 +186,9 @@ void recv_msg_kexdh_reply() { #if DROPBEAR_PQHYBRID case DROPBEAR_KEX_PQHYBRID: { - buffer *ecdh_qs = buf_getstringbuf(ses.payload); - kexpqhybrid_comb_key(cli_ses.pqhybrid_param, ecdh_qs, hostkey); - buf_free(ecdh_qs); + buffer *q_s = buf_getstringbuf(ses.payload); + kexpqhybrid_comb_key(cli_ses.pqhybrid_param, q_s, hostkey); + buf_free(q_s); } break; #endif diff --git a/src/svr-kex.c b/src/svr-kex.c index d243469c5..14df08a1a 100644 --- a/src/svr-kex.c +++ b/src/svr-kex.c @@ -37,7 +37,7 @@ #include "ecc.h" #include "gensignkey.h" -static void send_msg_kexdh_reply(mp_int *dh_e, buffer *ecdh_qs); +static void send_msg_kexdh_reply(mp_int *dh_e, buffer *q_c); #if DROPBEAR_EXT_INFO static void send_msg_ext_info(void); #endif @@ -48,7 +48,7 @@ static void send_msg_ext_info(void); * that function, then brings the new keys into use */ void recv_msg_kexdh_init() { DEF_MP_INT(dh_e); - buffer *ecdh_qs = NULL; + buffer *q_c = NULL; TRACE(("enter recv_msg_kexdh_init")) if (!ses.kexstate.recvkexinit) { @@ -74,7 +74,7 @@ void recv_msg_kexdh_init() { case DROPBEAR_KEX_PQHYBRID: #endif #if DROPBEAR_ECDH || DROPBEAR_CURVE25519 || DROPBEAR_PQHYBRID - ecdh_qs = buf_getstringbuf(ses.payload); + q_c = buf_getstringbuf(ses.payload); break; #endif } @@ -82,12 +82,12 @@ void recv_msg_kexdh_init() { dropbear_exit("Bad kex value"); } - send_msg_kexdh_reply(&dh_e, ecdh_qs); + send_msg_kexdh_reply(&dh_e, q_c); mp_clear(&dh_e); - if (ecdh_qs) { - buf_free(ecdh_qs); - ecdh_qs = NULL; + if (q_c) { + buf_free(q_c); + q_c = NULL; } send_msg_newkeys(); @@ -189,7 +189,7 @@ static void svr_ensure_hostkey() { * * See the transport RFC4253 section 8 for details * or RFC5656 section 4 for elliptic curve variant. */ -static void send_msg_kexdh_reply(mp_int *dh_e, buffer *ecdh_qs) { +static void send_msg_kexdh_reply(mp_int *dh_e, buffer *q_c) { TRACE(("enter send_msg_kexdh_reply")) /* we can start creating the kexdh_reply packet */ @@ -230,7 +230,7 @@ static void send_msg_kexdh_reply(mp_int *dh_e, buffer *ecdh_qs) { case DROPBEAR_KEX_ECDH: { struct kex_ecdh_param *ecdh_param = gen_kexecdh_param(); - kexecdh_comb_key(ecdh_param, ecdh_qs, svr_opts.hostkey); + kexecdh_comb_key(ecdh_param, q_c, svr_opts.hostkey); buf_put_ecc_raw_pubkey_string(ses.writepayload, &ecdh_param->key); free_kexecdh_param(ecdh_param); @@ -241,7 +241,7 @@ static void send_msg_kexdh_reply(mp_int *dh_e, buffer *ecdh_qs) { case DROPBEAR_KEX_CURVE25519: { struct kex_curve25519_param *param = gen_kexcurve25519_param(); - kexcurve25519_comb_key(param, ecdh_qs, svr_opts.hostkey); + kexcurve25519_comb_key(param, q_c, svr_opts.hostkey); buf_putstring(ses.writepayload, param->pub, CURVE25519_LEN); free_kexcurve25519_param(param); @@ -252,7 +252,7 @@ static void send_msg_kexdh_reply(mp_int *dh_e, buffer *ecdh_qs) { case DROPBEAR_KEX_PQHYBRID: { struct kex_pqhybrid_param *param = gen_kexpqhybrid_param(); - kexpqhybrid_comb_key(param, ecdh_qs, svr_opts.hostkey); + kexpqhybrid_comb_key(param, q_c, svr_opts.hostkey); buf_putbufstring(ses.writepayload, param->concat_public); free_kexpqhybrid_param(param); From 0ca5d5ea29fe3d4e18d51c7f1a4a06f40ecd7441 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Sat, 14 Dec 2024 21:39:08 +0800 Subject: [PATCH 5/5] Add fuzzers for sntrup Use separate fuzzers for client vs server since KEMs have an asymmetric API. --- Makefile.in | 3 +- fuzz/fuzzer-kexsntrup-cli.c | 55 +++++++++++++++++++++++++++++++++++++ fuzz/fuzzer-kexsntrup-srv.c | 54 ++++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 fuzz/fuzzer-kexsntrup-cli.c create mode 100644 fuzz/fuzzer-kexsntrup-srv.c diff --git a/Makefile.in b/Makefile.in index 979a763be..62bfe0bfc 100644 --- a/Makefile.in +++ b/Makefile.in @@ -297,7 +297,8 @@ check: lint # list of fuzz targets FUZZ_TARGETS=fuzzer-preauth fuzzer-pubkey fuzzer-verify fuzzer-preauth_nomaths \ fuzzer-kexdh fuzzer-kexecdh fuzzer-kexcurve25519 fuzzer-client fuzzer-client_nomaths \ - fuzzer-postauth_nomaths fuzzer-cliconf + fuzzer-postauth_nomaths fuzzer-cliconf \ + fuzzer-kexsntrup-srv fuzzer-kexsntrup-cli FUZZER_OPTIONS = $(addsuffix .options, $(FUZZ_TARGETS)) FUZZ_OBJS = $(addprefix fuzz/,$(addsuffix .o,$(FUZZ_TARGETS))) \ diff --git a/fuzz/fuzzer-kexsntrup-cli.c b/fuzz/fuzzer-kexsntrup-cli.c new file mode 100644 index 000000000..7753034ae --- /dev/null +++ b/fuzz/fuzzer-kexsntrup-cli.c @@ -0,0 +1,55 @@ +#include "fuzz.h" +#include "session.h" +#include "fuzz-wrapfd.h" +#include "debug.h" +#include "runopts.h" +#include "algo.h" + +static struct key_context* keep_newkeys = NULL; + +static void setup() __attribute__((constructor)); +static void setup() { + fuzz_common_setup(); + fuzz_cli_setup(); + + keep_newkeys = (struct key_context*)m_malloc(sizeof(struct key_context)); + keep_newkeys->algo_kex = fuzz_get_algo(sshkex, "sntrup761x25519-sha512"); +} + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (fuzz_set_input(Data, Size) == DROPBEAR_FAILURE) { + return 0; + } + + m_malloc_set_epoch(1); + + if (setjmp(fuzz.jmp) == 0) { + /* Arbitrary key to write into a buffer */ + sign_key *hostkey = cli_opts.privkeys->first; + ses.newkeys = keep_newkeys; + + struct kex_pqhybrid_param *param = gen_kexpqhybrid_param(); + + buffer * q_s = buf_getstringbuf(fuzz.input); + + ses.kexhashbuf = buf_new(KEXHASHBUF_MAX_INTS); + kexpqhybrid_comb_key(param, q_s, hostkey); + + free_kexpqhybrid_param(param); + + buf_free(ses.dh_K_bytes); + buf_free(q_s); + + buf_free(ses.hash); + buf_free(ses.session_id); + /* kexhashbuf is freed in kexpqhybrid_comb_key */ + + m_malloc_free_epoch(1, 0); + } else { + m_malloc_free_epoch(1, 1); + TRACE(("dropbear_exit longjmped")) + /* dropbear_exit jumped here */ + } + + return 0; +} diff --git a/fuzz/fuzzer-kexsntrup-srv.c b/fuzz/fuzzer-kexsntrup-srv.c new file mode 100644 index 000000000..b023fef2a --- /dev/null +++ b/fuzz/fuzzer-kexsntrup-srv.c @@ -0,0 +1,54 @@ +#include "fuzz.h" +#include "session.h" +#include "fuzz-wrapfd.h" +#include "debug.h" +#include "runopts.h" +#include "algo.h" + +static struct key_context* keep_newkeys = NULL; + +static void setup() __attribute__((constructor)); +static void setup() { + fuzz_common_setup(); + fuzz_svr_setup(); + + keep_newkeys = (struct key_context*)m_malloc(sizeof(struct key_context)); + keep_newkeys->algo_kex = fuzz_get_algo(sshkex, "sntrup761x25519-sha512"); + keep_newkeys->algo_hostkey = DROPBEAR_SIGNKEY_ED25519; +} + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (fuzz_set_input(Data, Size) == DROPBEAR_FAILURE) { + return 0; + } + + m_malloc_set_epoch(1); + + if (setjmp(fuzz.jmp) == 0) { + ses.newkeys = keep_newkeys; + + struct kex_pqhybrid_param *param = gen_kexpqhybrid_param(); + + buffer * q_c = buf_getstringbuf(fuzz.input); + + ses.kexhashbuf = buf_new(KEXHASHBUF_MAX_INTS); + kexpqhybrid_comb_key(param, q_c, svr_opts.hostkey); + + free_kexpqhybrid_param(param); + + buf_free(ses.dh_K_bytes); + buf_free(q_c); + + buf_free(ses.hash); + buf_free(ses.session_id); + /* kexhashbuf is freed in kexpqhybrid_comb_key */ + + m_malloc_free_epoch(1, 0); + } else { + m_malloc_free_epoch(1, 1); + TRACE(("dropbear_exit longjmped")) + /* dropbear_exit jumped here */ + } + + return 0; +}