diff --git a/client/CMakeLists.txt b/client/CMakeLists.txt index ed60b07fa14e3afcf2383721813cfb0515467b63..92e4c3b1dd1edd31d8bff6704aa9320cd2e5b9ac 100644 --- a/client/CMakeLists.txt +++ b/client/CMakeLists.txt @@ -1,9 +1,9 @@ cmake_minimum_required(VERSION 2.8) project (dapclient) -set(CLIENT_SRCS dap_client_remote.c dap_client.c) +set(CLIENT_SRCS dap_client.c dap_client_internal.c) -add_library(${PROJECT_NAME} STATIC ${CORE_SRCS}) +add_library(${PROJECT_NAME} STATIC ${CLIENT_SRCS}) set(${PROJECT_NAME}_DEFINITIONS CACHE INTERNAL "${PROJECT_NAME}: Definitions" FORCE) diff --git a/client/dap_client.c b/client/dap_client.c index 62dc62e0aabeab2ea971f1071690151d19bbe2cd..ab6eb6f2192eeb1742382b11d2138bdb4b11c559 100644 --- a/client/dap_client.c +++ b/client/dap_client.c @@ -1,167 +1,161 @@ +#include <stddef.h> #include "common.h" -#include "sap_client.h" -#include "sap_client_internal.h" +#include "dap_client.h" +#include "dap_client_internal.h" -#define LOG_TAG "sap_client" +#define LOG_TAG "dap_client" /** - * @brief sap_client_init + * @brief dap_client_init * @return */ -int sap_client_init() +int dap_client_init() { - log_it(L_INFO, "Init SAP client module"); + log_it(L_INFO, "Init DAP client module"); return 0; } /** - * @brief sap_client_deinit + * @brief dap_client_deinit */ -void sap_client_deinit() +void dap_client_deinit() { - log_it(L_INFO, "Deinit SAP client module"); + log_it(L_INFO, "Deinit DAP client module"); } /** - * @brief sap_client_new + * @brief dap_client_new * @param a_stage_status_callback * @return */ -sap_client_t * sap_client_new(sap_client_callback_t a_stage_status_callback) +dap_client_t * dap_client_new(dap_client_callback_t a_stage_status_callback) { } /** - * @brief sap_client_delete + * @brief dap_client_delete * @param a_client */ -void sap_client_delete(sap_client_t * a_client) +void dap_client_delete(dap_client_t * a_client) { } /** - * @brief sap_client_go_stage + * @brief dap_client_go_stage * @param a_client * @param a_stage_end */ -void sap_client_go_stage(sap_client_t * a_client, sap_client_stage_t a_stage_end, sap_client_callback_t a_stage_end_callback) +void dap_client_go_stage(dap_client_t * a_client, dap_client_stage_t a_stage_end, + dap_client_callback_t a_stage_end_callback) { } /** - * @brief sap_client_session_request + * @brief dap_client_session_request * @param a_client * @param a_path * @param a_request * @param a_request_size * @param a_response_proc */ -void sap_client_session_request(sap_client_t * a_client, const char * a_path, void * a_request, size_t a_request_size, - sap_client_callback_t a_response_proc) +void dap_client_session_request(dap_client_t * a_client, const char * a_path, void * a_request, size_t a_request_size, + dap_client_callback_t a_response_proc) { } /** - * @brief sap_client_set_uplink + * @brief dap_client_set_uplink * @param a_client * @param a_addr * @param a_port */ -void sap_client_set_uplink(sap_client_t * a_client,const char* a_addr, uint16_t a_port) +void dap_client_set_uplink(dap_client_t * a_client,const char* a_addr, uint16_t a_port) { } /** - * @brief sap_client_set_credentials + * @brief dap_client_set_credentials * @param a_client * @param a_user * @param a_password */ -void sap_client_set_credentials(sap_client_t * a_client,const char* a_user, const char * a_password) +void dap_client_set_credentials(dap_client_t * a_client,const char* a_user, const char * a_password) { } /** - * @brief sap_client_error_str + * @brief dap_client_error_str * @param a_client_error * @return */ -const char * sap_client_error_str(sap_client_error_t a_client_error) +const char * dap_client_error_str(sap_client_error_t a_client_error) { switch(a_client_error){ - case SAP_CLIENT_ERROR_ENC_NO_KEY: return "ENC_NO_KEY"; - case SAP_CLIENT_ERROR_ENC_WRONG_KEY: return "ENC_WRONG_KEY"; - case SAP_CLIENT_ERROR_AUTH_WRONG_COOKIE: return "AUTH_WRONG_COOKIE"; - case SAP_CLIENT_ERROR_AUTH_WRONG_CREDENTIALS: return "AUTH_WRONG_CREDENTIALS"; - case SAP_CLIENT_ERROR_NETWORK_CONNECTION_TIMEOUT: return "NETWORK_CONNECTION_TIMEOUT"; - case SAP_CLIENT_ERROR_NETWORK_CONNECTION_REFUSE: return "NETWORK_CONNECTION_REFUSE"; - case SAP_CLIENT_ERROR_NETWORK_DISCONNECTED: return "NETWORK_DISCONNECTED"; - case SAP_CLIENT_ERROR_STREAM_RESPONSE_WRONG: return "STREAM_RESPONSE_WRONG"; - case SAP_CLIENT_ERROR_STREAM_RESPONSE_TIMEOUT: return "STREAM_RESPONSE_TIMEOUT"; - case SAP_CLIENT_ERROR_STREAM_FREEZED: return "STREAM_FREEZED"; - case SAP_CLIENT_ERROR_LICENSE: return "LICENSE_ERROR"; + case DAP_CLIENT_ERROR_ENC_NO_KEY: return "ENC_NO_KEY"; + case DAP_CLIENT_ERROR_ENC_WRONG_KEY: return "ENC_WRONG_KEY"; + case DAP_CLIENT_ERROR_AUTH_WRONG_COOKIE: return "AUTH_WRONG_COOKIE"; + case DAP_CLIENT_ERROR_AUTH_WRONG_CREDENTIALS: return "AUTH_WRONG_CREDENTIALS"; + case DAP_CLIENT_ERROR_NETWORK_CONNECTION_TIMEOUT: return "NETWORK_CONNECTION_TIMEOUT"; + case DAP_CLIENT_ERROR_NETWORK_CONNECTION_REFUSE: return "NETWORK_CONNECTION_REFUSE"; + case DAP_CLIENT_ERROR_NETWORK_DISCONNECTED: return "NETWORK_DISCONNECTED"; default : return "UNDEFINED"; } } /** - * @brief sap_client_get_stage + * @brief dap_client_get_stage * @param a_client * @return */ -sap_client_stage_t sap_client_get_stage(sap_client_t * a_client) +dap_client_stage_t dap_client_get_stage(dap_client_t * a_client) { - return SAP_CLIENT_INTERNAL(a_client)->stage; + return DAP_CLIENT_INTERNAL(a_client)->stage; } /** - * @brief sap_client_get_stage_status_str + * @brief dap_client_get_stage_status_str * @param a_client * @return */ -const char * sap_client_get_stage_status_str(sap_client_t *a_client) +const char * dap_client_get_stage_status_str(dap_client_t *a_client) { - switch(SAP_CLIENT_INTERNAL(a_client)->stage_status){ - case SAP_CLIENT_STAGE_STATUS_NONE: return "NONE"; - case SAP_CLIENT_STAGE_STATUS_IN_PROGRESS: return "IN_PROGRESS"; - case SAP_CLIENT_STAGE_STATUS_ERROR: return "ERROR"; - case SAP_CLIENT_STAGE_STATUS_DONE: return "DONE"; + switch(DAP_CLIENT_INTERNAL(a_client)->stage_status){ + case DAP_CLIENT_STAGE_STATUS_NONE: return "NONE"; + case DAP_CLIENT_STAGE_STATUS_IN_PROGRESS: return "IN_PROGRESS"; + case DAP_CLIENT_STAGE_STATUS_ERROR: return "ERROR"; + case DAP_CLIENT_STAGE_STATUS_DONE: return "DONE"; default: return "UNDEFINED"; } } /** - * @brief sap_client_get_stage_str + * @brief dap_client_get_stage_str * @param a_client * @return */ -const char * sap_client_get_stage_str(sap_client_t * a_client) +const char * dap_client_get_stage_str(dap_client_t * a_client) { - switch(SAP_CLIENT_INTERNAL(a_client)->stage){ - case SAP_CLIENT_STAGE_BEGIN: return "BEGIN"; - case SAP_CLIENT_STAGE_ENC: return "ENC"; - case SAP_CLIENT_STAGE_AUTH: return "AUTH"; - case SAP_CLIENT_STAGE_STREAM_CTL: return "STREAM_CTL"; - case SAP_CLIENT_STAGE_STREAM: return "STREAM"; - case SAP_CLIENT_STAGE_NETCONF: return "NETCONF"; - case SAP_CLIENT_STAGE_TUNNEL: return "TUNNEL"; + switch(DAP_CLIENT_INTERNAL(a_client)->stage){ + case DAP_CLIENT_STAGE_BEGIN: return "BEGIN"; + case DAP_CLIENT_STAGE_ENC: return "ENC"; + case DAP_CLIENT_STAGE_AUTH: return "AUTH"; default: return "UNDEFINED"; } } /** - * @brief sap_client_get_stage_status + * @brief dap_client_get_stage_status * @param a_client * @return */ -sap_client_stage_status_t sap_client_get_stage_status(sap_client_t * a_client) +dap_client_stage_status_t dap_client_get_stage_status(dap_client_t * a_client) { - return SAP_CLIENT_INTERNAL(a_client)->stage_status; + return DAP_CLIENT_INTERNAL(a_client)->stage_status; } diff --git a/client/dap_client.h b/client/dap_client.h index a09cbadeeef4abfea666a937a78fe268a060f89d..14206d30b027a0c2acc634184f9db3f55962f4f7 100644 --- a/client/dap_client.h +++ b/client/dap_client.h @@ -1,5 +1,5 @@ -#ifndef _SAP_CLIENT_H_ -#define _SAP_CLIENT_H_ +#ifndef _DAP_CLIENT_H_ +#define _DAP_CLIENT_H_ #include <stdint.h> @@ -24,6 +24,8 @@ typedef enum dap_client_error { DAP_CLIENT_ERROR_UNDEFINED = 0, DAP_CLIENT_ERROR_ENC_NO_KEY, DAP_CLIENT_ERROR_ENC_WRONG_KEY, + DAP_CLIENT_ERROR_AUTH_WRONG_COOKIE, + DAP_CLIENT_ERROR_AUTH_WRONG_CREDENTIALS, DAP_CLIENT_ERROR_NETWORK_CONNECTION_TIMEOUT, DAP_CLIENT_ERROR_NETWORK_CONNECTION_REFUSE, DAP_CLIENT_ERROR_NETWORK_DISCONNECTED, @@ -53,6 +55,8 @@ void dap_client_delete(dap_client_t * a_client); void dap_client_set_uplink(dap_client_t * a_client,const char* a_addr, uint16_t a_port); void dap_client_go_stage(dap_client_t * a_client, dap_client_stage_t a_stage_end, dap_client_callback_t a_stage_end_callback); +void dap_client_set_credentials(dap_client_t * a_client,const char* a_user, const char * a_password); + void dap_client_enc_request(dap_client_t * a_client, const char * a_path, void * a_request, size_t a_request_size, dap_client_callback_t a_response_proc); diff --git a/client/dap_client_internal.c b/client/dap_client_internal.c new file mode 100644 index 0000000000000000000000000000000000000000..a9a836a9dfc8a716a5a7086db251d3f13592f2a2 --- /dev/null +++ b/client/dap_client_internal.c @@ -0,0 +1 @@ +#include "dap_client_internal.h" diff --git a/client/dap_client_internal.h b/client/dap_client_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..95c2ced41451627e4956db00ec4d12a2f333878f --- /dev/null +++ b/client/dap_client_internal.h @@ -0,0 +1,23 @@ +#ifndef _SAP_CLIENT_INTERNAL_H_ +#define _SAP_CLIENT_INTERNAL_H_ + +#include "dap_client.h" + +typedef struct dap_client_remote dap_client_remote_t; +typedef struct dap_enc_key dap_enc_key_t; + +typedef struct dap_client_remote_internal +{ + dap_client_t * client; + dap_client_remote_t * es; + + dap_enc_key_t * session_key; + + dap_client_stage_t stage; + dap_client_stage_status_t stage_status; + + dap_client_callback_t stage_status_callback; +} dap_client_internal_t; + +#define DAP_CLIENT_INTERNAL(a) ((dap_client_internal_t*) a->_internal ) +#endif diff --git a/client/dap_client_remote.c b/client/dap_client_remote.c index d2f9d4aefae5b4c53682c575a832f71e84929afc..d730d16407adcf99d3f821a638ea0b88014a0635 100644 --- a/client/dap_client_remote.c +++ b/client/dap_client_remote.c @@ -25,7 +25,7 @@ #include <unistd.h> #include <string.h> #include "common.h" -#include "dap_server.h" +#include "dap_loop.h" #include "dap_client.h" #include <ev.h> #define LOG_TAG "client" diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index b4ce8067765dbfb4775e1a1a5a9986d86ba9b1cf..ec9cd642cafbc6200ee205ab5050f28b20cd2b1b 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 2.8) project (dapcore) -set(CORE_SRCS common.c dap_client_remote.c) +set(CORE_SRCS dap_common.c ) add_library(${PROJECT_NAME} STATIC ${CORE_SRCS}) diff --git a/core/common.c b/core/common.c deleted file mode 100644 index dfa60d7da51bacdf7b8a1a0997f136f4472848fb..0000000000000000000000000000000000000000 --- a/core/common.c +++ /dev/null @@ -1,251 +0,0 @@ -/* - Copyright (c) 2017-2018 (c) Project "DeM Labs Inc" https://github.com/demlabsinc - All rights reserved. - - This file is part of DAP (Deus Applications Prototypes) the open source project - - DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - DAP is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with any DAP based project. If not, see <http://www.gnu.org/licenses/>. -*/ - - -#include <string.h> -#include <stdarg.h> -#include <pthread.h> -#include <stdio.h> -#include <syslog.h> -#include <libconfig.h> -#include <unistd.h> -#include "common.h" -#include "config.h" -#define LAST_ERROR_MAX 255 - -#define LOG_TAG "common" - -char last_error[LAST_ERROR_MAX]={0}; -enum log_level log_level=DEBUG; -FILE * lf=NULL; - -int common_init() -{ - const char * fn = (my_config.log_file)? my_config.log_file : DEF_LOG ; - lf=fopen(fn, "a"); - if(lf==NULL){ - fprintf(stderr,"Can't open log file %s to append\n", fn); - lf=stdout; - return -1; - } - - //printf("Common init\n"); - // lf=fopen("/dev/stdout","a"); - //lf=stdout; - //strcpy(last_error,"undefined"); - log_it(INFO,"Common modules init (%s)", fn); - return 0; -} - -void common_deinit() -{ - if(lf) fclose(lf); -} - -void _log_it(const char * log_tag,enum log_level ll, const char * format,...) -{ -// branch predictor optimization -#if defined(__GNUC__)||defined(__GNUG__)||defined(__clang__) - if (__builtin_expect(!lf,0)) -#else - if (!lf) -#endif - common_init(); - - va_list ap,ap2; - - static pthread_mutex_t mutex=PTHREAD_MUTEX_INITIALIZER; - - if(ll<log_level) - return; - - pthread_mutex_lock(&mutex); - time_t t=time(NULL); - struct tm* tmp=localtime(&t); - static char s_time[1024]={0}; - strftime(s_time,sizeof(s_time),"%x-%X",tmp); - - va_start(ap,format); - va_copy(ap2,ap); - fprintf(lf,"[%s] ",s_time); - printf("[%s] ",s_time); - /*if(ll>=ERROR){ - vsnprintf(last_error,LAST_ERROR_MAX,format,ap); - }*/ - if(ll==DEBUG){ - fprintf(lf,"[DBG] "); - printf( "\x1b[37;2m[DBG] "); - }else if(ll==INFO){ - fprintf(lf,"[ ] "); - printf("\x1b[32;2m[ ] "); - }else if(ll==NOTICE){ - fprintf(lf,"[ * ] "); - printf("\x1b[32m[ * ] "); - }else if(ll==WARNING){ - fprintf(lf,"[WRN] "); - printf("\x1b[31;2m[WRN] "); - }else if(ll==ERROR){ - fprintf(lf,"[ERR] "); - printf("\x1b[31m[ERR] "); - }else if(ll==CRITICAL){ - fprintf(lf,"[!!!] "); - printf("\x1b[1;5;31m[!!!] "); - } - fprintf(lf,"[%8s]\t",log_tag); - printf("[%8s]\t",log_tag); - - vfprintf(lf,format,ap); - vprintf(format,ap2); - fprintf(lf,"\n"); - printf("\x1b[0m\n"); - va_end(ap); - va_end(ap2); - fflush(lf); - fflush(stdout); - pthread_mutex_unlock(&mutex); -} - -const char * log_error() -{ - return last_error; -} - -#define INT_DIGITS 19 /* enough for 64 bit integer */ - -char *itoa(int i) -{ - /* Room for INT_DIGITS digits, - and '\0' */ - static char buf[INT_DIGITS + 2]; - char *p = buf + INT_DIGITS + 1; /* points to terminating '\0' */ - if (i >= 0) { - do { - *--p = '0' + (i % 10); - i /= 10; - } while (i != 0); - return p; - } - else { /* i < 0 */ - do { - *--p = '0' - (i % 10); - i /= 10; - } while (i != 0); - *--p = '-'; - } - return p; -} - -/** - * @brief time_to_rfc822 Convert time_t to string with RFC822 formatted date and time - * @param out Output buffer - * @param out_size_mac Maximum size of output buffer - * @param t UNIX time - * @return Length of resulting string if ok or lesser than zero if not - */ -int time_to_rfc822(char * out, size_t out_size_max, time_t t) -{ - struct tm *tmp; - tmp=localtime(&t); - if(tmp== NULL){ - log_it(ERROR,"Can't convert data from unix fromat to structured one"); - return -2; - }else{ - int ret; - ret=strftime(out, out_size_max,"%a, %d %b %y %T %z",tmp); - //free(tmp); - if(ret>0){ - return ret; - }else{ - log_it(ERROR,"Can't print formatted time in string"); - return -1; - } - } -} - -/** - * @brief get_select_breaker - * @return - */ -static int breaker_set[2] = { -1, -1 }; -static int initialized = 0; -static struct timespec break_latency = { tv_sec: 0, tv_nsec: 1 * 1000 * 1000 }; -int get_select_breaker() -{ - if (!initialized) - { - if (pipe(breaker_set) < 0) return -1; - else initialized = 1; - } - - return breaker_set[0]; -} - -int send_select_break() -{ - if (!initialized) return -1; - char buffer[1]; - if (write(breaker_set[1], "\0", 1) <= 0) return -1; - nanosleep(&break_latency, NULL); - if (read(breaker_set[0], buffer, 1) <= 0 || buffer[0] != '\0') return -1; - return 0; -} - - -void hexdump(const void* data, size_t size) -{ - char ascii[17]; - size_t i, j; - ascii[16] = '\0'; - for (i = 0; i < size; ++i) { - printf("%02X ", ((unsigned char*)data)[i]); - if (((unsigned char*)data)[i] >= ' ' && ((unsigned char*)data)[i] <= '~') { - ascii[i % 16] = ((unsigned char*)data)[i]; - } else { - ascii[i % 16] = '.'; - } - if ((i+1) % 8 == 0 || i+1 == size) { - printf(" "); - if ((i+1) % 16 == 0) { - printf("| %s \n", ascii); - } else if (i+1 == size) { - ascii[(i+1) % 16] = '\0'; - if ((i+1) % 16 <= 8) { - printf(" "); - } - for (j = (i+1) % 16; j < 16; ++j) { - printf(" "); - } - printf("| %s \n", ascii); - } - } - } -} - -/** -* @brief get_utc_date_time -* @param buf_out ( not less 20 bytes ) -* @return example: 2017-08-12 13:28:36 -*/ -void get_utc_date_time(char buf_out[]) -{ - struct tm *local; - time_t t = time(NULL); - local = gmtime(&t); - strftime(buf_out, 20, "%Y-%m-%d %H:%M:%S", local); -} diff --git a/core/common.h b/core/common.h deleted file mode 100644 index 5fd34ca0092f3a9a259cfbdbc33726be44a67798..0000000000000000000000000000000000000000 --- a/core/common.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - Copyright (c) 2017-2018 (c) Project "DeM Labs Inc" https://github.com/demlabsinc - All rights reserved. - - This file is part of DAP (Deus Applications Prototypes) the open source project - - DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - DAP is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with any DAP based project. If not, see <http://www.gnu.org/licenses/>. -*/ - - -#ifndef COMMON_H -#define COMMON_H -#include <stdarg.h> -#include <stddef.h> -#include <stdlib.h> -#include <time.h> - -#define CALLOC(a) ((a *) calloc(1,sizeof(a))) -#define DUP(a) (__typeof(a) ret = memcpy(ret,a,sizeof(*a)) ) - -#define DEF_LOG "/opt/dapserver/log/dapserver.log" -//#define DEF_LOG "/opt/DAP/log/confcall_server.log" - -enum log_level{CRITICAL=5,ERROR=4, WARNING=3,NOTICE=2,INFO=1,DEBUG=0}; -extern enum log_level log_level; - -extern int common_init(); -extern void common_deinit(); - -extern void _log_it(const char * log_tag, enum log_level, const char * format,...); -#define log_it(_log_level,...) _log_it(LOG_TAG,_log_level,##__VA_ARGS__) - -extern const char * log_error(); - -extern char *itoa(int i); -extern int time_to_rfc822(char * out, size_t out_size_max, time_t t); - -extern void get_utc_date_time(char buf_out[]); -extern void hexdump(const void* data, size_t size); -extern int send_select_break(); -extern int get_select_breaker(); -#endif diff --git a/core/dap_common.c b/core/dap_common.c new file mode 100644 index 0000000000000000000000000000000000000000..8f6a9ad4d7761c99a4ac2aebc39c2f72010fd78c --- /dev/null +++ b/core/dap_common.c @@ -0,0 +1,304 @@ +#ifdef SAP_OS_ANDROID +#include <android/log.h> +#endif + +#ifndef _MSC_VER +#include <unistd.h> /* 'pipe', 'read', 'write' */ +#include <pthread.h> +#include <syslog.h> +#elif defined(_MSC_VER) +#include <stdio.h> +#include <stdlib.h> +#include <windows.h> +#include <process.h> +typedef HANDLE pthread_mutex_t; +#define popen _popen +#define pclose _pclose +#define PTHREAD_MUTEX_INITIALIZER 0 +int pthread_mutex_lock(HANDLE **obj) +{ + return (( *obj = (HANDLE) CreateMutex(0, 1, 0) ) == NULL) ? 0 : 1; +} +int pthread_mutex_unlock(HANDLE *obj) { + return (ReleaseMutex(obj) == 0) ? 0 : 1; +} +#endif +#include <time.h> /* 'nanosleep' */ +#include <string.h> +#include <stdarg.h> +#include <stdio.h> +#include "dap_common.h" +#define LAST_ERROR_MAX 255 + +#define LOG_TAG "dap_common" + +char last_error[LAST_ERROR_MAX]={0}; +enum log_level log_level=L_DEBUG; +static FILE * s_lf=NULL; + +int dap_common_init( const char * a_log_file ) +{ + if ( a_log_file ) { + s_lf=fopen( a_log_file , "a"); + if(s_lf==NULL){ + fprintf(stderr,"Can't open log file %s to append\n", a_log_file); + s_lf=stdout; + return -1; + } + } + + return 0; +} + +void common_deinit() +{ + if(s_lf) fclose(s_lf); +} + +void _log_it(const char * log_tag,enum log_level ll, const char * format,...) +{ + if(ll<log_level) + return; + + va_list ap; + + + + va_start(ap,format); + _vlog_it(log_tag,ll, format,ap); + va_end(ap); +} + +void _vlog_it(const char * log_tag,enum log_level ll, const char * format,va_list ap) +{ + va_list ap2; + + static pthread_mutex_t mutex=PTHREAD_MUTEX_INITIALIZER; + + pthread_mutex_lock(&mutex); +#ifdef SAP_OS_ANDROID + char buf[4096]; + vsnprintf(buf,sizeof(buf),format,ap); + switch (ll) { + case L_INFO: + __android_log_write(ANDROID_LOG_INFO,SAP_BRAND,buf); + break; + case L_WARNING: + __android_log_write(ANDROID_LOG_WARN,SAP_BRAND,buf); + break; + case L_ERROR: + __android_log_write(ANDROID_LOG_ERROR,SAP_BRAND,buf); + break; + case L_CRITICAL: + __android_log_write(ANDROID_LOG_FATAL,SAP_BRAND,buf); + abort(); + break; + case L_DEBUG: + default: + __android_log_write(ANDROID_LOG_DEBUG,SAP_BRAND,buf); + } +#endif + time_t t=time(NULL); + struct tm* tmp=localtime(&t); + static char s_time[1024]={0}; + strftime(s_time,sizeof(s_time),"%x-%X",tmp); + + + va_copy(ap2,ap); + if (s_lf ) fprintf(s_lf,"[%s] ",s_time); + printf("[%s] ",s_time); + /*if(ll>=ERROR){ + vsnprintf(last_error,LAST_ERROR_MAX,format,ap); + }*/ + + if(ll==L_DEBUG){ + if (s_lf ) fprintf(s_lf,"[DBG] "); + printf( "\x1b[37;2m[DBG] "); + }else if(ll==L_INFO){ + if (s_lf ) fprintf(s_lf,"[ ] "); + printf("\x1b[32;2m[ ] "); + }else if(ll==L_NOTICE){ + if (s_lf ) fprintf(s_lf,"[ * ] "); + printf("\x1b[32m[ * ] "); + }else if(ll==L_WARNING){ + if (s_lf ) fprintf(s_lf,"[WRN] "); + printf("\x1b[31;2m[WRN] "); + }else if(ll==L_ERROR){ + if (s_lf ) fprintf(s_lf,"[ERR] "); + printf("\x1b[31m[ERR] "); + }else if(ll==L_CRITICAL){ + if (s_lf ) fprintf(s_lf,"[!!!] "); + printf("\x1b[1;5;31m[!!!] "); + } + if (s_lf ) fprintf(s_lf,"[%8s]\t",log_tag); + printf("[%8s]\t",log_tag); + + if (s_lf ) vfprintf(s_lf,format,ap); + vprintf(format,ap2); + if (s_lf ) fprintf(s_lf,"\n"); + printf("\x1b[0m\n"); + va_end(ap2); + if (s_lf ) fflush(s_lf); + fflush(stdout); + pthread_mutex_unlock(&mutex); +} + +const char * log_error() +{ + return last_error; +} + +#define INT_DIGITS 19 /* enough for 64 bit integer */ + +char *itoa(int i) +{ + /* Room for INT_DIGITS digits, - and '\0' */ + static char buf[INT_DIGITS + 2]; + char *p = buf + INT_DIGITS + 1; /* points to terminating '\0' */ + if (i >= 0) { + do { + *--p = '0' + (i % 10); + i /= 10; + } while (i != 0); + return p; + } + else { /* i < 0 */ + do { + *--p = '0' - (i % 10); + i /= 10; + } while (i != 0); + *--p = '-'; + } + return p; +} + +/** + * @brief time_to_rfc822 Convert time_t to string with RFC822 formatted date and time + * @param out Output buffer + * @param out_size_mac Maximum size of output buffer + * @param t UNIX time + * @return Length of resulting string if ok or lesser than zero if not + */ +int time_to_rfc822(char * out, size_t out_size_max, time_t t) +{ + struct tm *tmp; + tmp=localtime(&t); + if(tmp== NULL){ + log_it(L_ERROR,"Can't convert data from unix fromat to structured one"); + return -2; + }else{ + int ret; + ret=strftime(out, out_size_max,"%a, %d %b %y %T %z",tmp); + //free(tmp); + if(ret>0){ + return ret; + }else{ + log_it(L_ERROR,"Can't print formatted time in string"); + return -1; + } + } +} + + + +static int breaker_set[2] = { -1, -1 }; +static int initialized = 0; +static struct timespec break_latency = {0, 1 * 1000 * 1000 }; +#ifndef _MSC_VER +int get_select_breaker() +{ + if (!initialized) + { + if (pipe(breaker_set) < 0) return -1; + else initialized = 1; + } + + return breaker_set[0]; +} + +int send_select_break() +{ + if (!initialized) return -1; + char buffer[1]; + if (write(breaker_set[1], "\0", 1) <= 0) return -1; + nanosleep(&break_latency, NULL); + if (read(breaker_set[0], buffer, 1) <= 0 || buffer[0] != '\0') return -1; + return 0; +} +#else +char *strndup(const char *s, size_t n) { + char *p = memchr(s, '\0', n); + if (p != NULL) + n = p - s; + p = malloc(n + 1); + if (p != NULL) { + memcpy(p, s, n); + p[n] = '\0'; + } + return p; +} +#endif + +#ifdef ANDROID1 +static u_long myNextRandom = 1; + +double atof(const char *nptr) +{ + return (strtod(nptr, NULL)); +} + +int rand(void) +{ + return (int)((myNextRandom = (1103515245 * myNextRandom) + 12345) % ((u_long)RAND_MAX + 1)); +} + +void srand(u_int seed) +{ + myNextRandom = seed; +} + +#endif + +/** + * @brief exec_with_ret + * @param a_cmd + * @return + */ +char * exec_with_ret(const char * a_cmd) +{ + FILE * fp; + size_t buf_len = 0; + char buf[4096] = {0}; + fp= popen(a_cmd, "r"); + if (!fp) { + goto FIN; + } + memset(buf,0,sizeof(buf)); + fgets(buf,sizeof(buf)-1,fp); + pclose(fp); + buf_len=strlen(buf); + if(buf[buf_len-1] =='\n')buf[buf_len-1] ='\0'; +FIN: + return strdup(buf); +} + +char * exec_with_ret_multistring(const char * a_cmd) +{ + FILE * fp; + size_t buf_len = 0; + char buf[4096] = {0}; + fp= popen(a_cmd, "r"); + if (!fp) { + goto FIN; + } + memset(buf,0,sizeof(buf)); + char retbuf[4096] = {0}; + while(fgets(buf,sizeof(buf)-1,fp)) { + strcat(retbuf, buf); + } + pclose(fp); + buf_len=strlen(retbuf); + if(retbuf[buf_len-1] =='\n')retbuf[buf_len-1] ='\0'; +FIN: + return strdup(retbuf); +} diff --git a/core/dap_common.h b/core/dap_common.h new file mode 100644 index 0000000000000000000000000000000000000000..023cdd7d8693dcecbf3f8c857ba2d094e5b80fb2 --- /dev/null +++ b/core/dap_common.h @@ -0,0 +1,46 @@ +#ifndef COMMON_H +#define COMMON_H + +#include <stdarg.h> +#include <stddef.h> +#include <stdlib.h> +#include <time.h> + +#define MALLOC(a) ((a *) malloc(sizeof(a))) +#define CALLOC(a) ((a *) calloc(1,sizeof(a))) +#define DUP(a) (__typeof(a) ret = memcpy(ret,a,sizeof(*a)) ) + +enum log_level{L_CRITICAL=5,L_ERROR=4, L_WARNING=3,L_NOTICE=2,L_INFO=1,L_DEBUG=0}; +extern enum log_level log_level; + +#ifdef __cplusplus +extern "C" { +#endif + +int dap_common_init( const char * a_log_file ); +void dap_common_deinit(); + +void _log_it(const char * log_tag, enum log_level, const char * format,...); +void _vlog_it(const char * log_tag, enum log_level, const char * format, va_list ap ); +#define log_it(_log_level,...) _log_it(LOG_TAG,_log_level,##__VA_ARGS__) +#define vlog_it(a_log_level,a_format,a_ap) _vlog_it(LOG_TAG,a_log_level,a_format,a_ap) + +const char * log_error(); + +#ifdef __GNUC__ +char *itoa(int i); +#elif _MSC_VER +char *strndup(const char *s, size_t n); +#endif +int time_to_rfc822(char * out, size_t out_size_max, time_t t); + +int get_select_breaker(); +int send_select_break(); +char * exec_with_ret(const char * a_cmd); +char * exec_with_ret_multistring(const char * a_cmd); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/crypt/CMakeLists.txt b/crypt/CMakeLists.txt index 764bd9b2537ee3ee6cc2b63523db8e6a3b78c28c..32287691a0e3aebc8804286a59234f1aa91ee5a0 100644 --- a/crypt/CMakeLists.txt +++ b/crypt/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 2.8) project (dapcrypt) -set(CRYPT_SRCS enc.c enc_fnam2.c enc_key.c ) +set(CRYPT_SRCS dap_enc.c dap_enc_key.c ) include_directories("${dapcore_INCLUDE_DIRS}") add_definitions ("${dapcore_DEFINITIONS}") diff --git a/crypt/dap_enc.c b/crypt/dap_enc.c new file mode 100644 index 0000000000000000000000000000000000000000..5260cbbbb660b74128ad02f8873ecaaadb81f064 --- /dev/null +++ b/crypt/dap_enc.c @@ -0,0 +1,113 @@ +/* + Copyright (c) 2017-2018 (c) Project "DeM Labs Inc" https://github.com/demlabsinc + All rights reserved. + + This file is part of DAP (Deus Applications Prototypes) the open source project + + DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + DAP is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with any DAP based project. If not, see <http://www.gnu.org/licenses/>. +*/ + + +#include <arpa/inet.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <time.h> +#include "dap_enc.h" +#include "dap_enc_key.h" +#include "dap_common.h" + +#define LOG_TAG "dap_enc" + +/** + * @brief enc_init + * @return + */ +int dap_enc_init() +{ + srand(time(NULL)); + + return 0; +} + + +/** + * @brief dap_enc_code Encode data with key + * @param key_private Private key + * @param buf Input buffer + * @param buf_size Input buffer size + * @param buf_out Output buffer + * @return bytes actualy written in the output buffer + */ +size_t dap_enc_code(struct dap_enc_key * key,const void * buf,const size_t buf_size, void * buf_out, dap_enc_data_type_t data_type_out) +{ + //log_it(NOTICE,"In enc code"); + if(key->enc){ + void *proc_buf; + switch(data_type_out) + { + case ENC_DATA_TYPE_B64:{ + proc_buf=calloc(1,buf_size*2); + }break; + case ENC_DATA_TYPE_RAW:{ + proc_buf=buf_out; + }break; + } + size_t ret=key->enc(key,buf,buf_size,proc_buf); + if(data_type_out==ENC_DATA_TYPE_B64){ + ret=enc_base64_encode(proc_buf,ret,buf_out); + free(proc_buf); + return ret; + } + return ret; + }else{ + return 0; + } +} + +/** + * @brief enc_decode Decode data with key + * @param key_public Public key + * @param buf Input buffer + * @param buf_size Input buffer size + * @param buf_out Output buffer + * @param buf_out_max Maximum size of output buffer + * @return bytes actualy written in the output buffer + */ +size_t enc_decode(struct enc_key * key,const void * buf, const size_t buf_size, void * buf_out, enc_data_type_t data_type_in) +{ + void *proc_buf; + const void *proc_buf_const; + size_t proc_buf_size; + switch(data_type_in){ + case ENC_DATA_TYPE_B64:{ + proc_buf=calloc(1,buf_size); + proc_buf_size= enc_base64_decode((const char*) buf,buf_size,proc_buf); + proc_buf_const=proc_buf; + }break; + case ENC_DATA_TYPE_RAW:{ + proc_buf_const=buf; + proc_buf_size=buf_size; + }break; + } + + if(key->dec){ + size_t ret=key->dec(key,proc_buf_const,proc_buf_size,buf_out); + if(data_type_in==ENC_DATA_TYPE_B64) + free(proc_buf); + return ret; + }else{ + return 0; + } +} diff --git a/crypt/enc_fnam2.h b/crypt/dap_enc.h similarity index 68% rename from crypt/enc_fnam2.h rename to crypt/dap_enc.h index a287deb30a2d6623a2ea95a559b000b4abf8faea..9d83ce423f358888c33e6653e1893190dec440cc 100644 --- a/crypt/enc_fnam2.h +++ b/crypt/dap_enc.h @@ -18,16 +18,19 @@ along with any DAP based project. If not, see <http://www.gnu.org/licenses/>. */ - -#ifndef _ENC_FNAM2_H_ -#define _ENC_FNAM2_H_ +#ifndef _DAP_ENC_H_ +#define _DAP_ENC_H_ #include <stddef.h> +#include <stdbool.h> + +#include "dap_enc_key.h" -struct enc_key; +int dap_enc_init(); -extern void enc_fnam2_key_new(struct enc_key * key); +size_t dap_enc_code(struct dap_enc_key * key, const void * buf, const size_t buf_size, void * buf_out, + dap_enc_data_type_t data_type_out); +size_t dap_enc_decode(struct dap_enc_key * key, const void * buf, const size_t buf_size, void * buf_out, + dap_enc_data_type_t data_type_in); -extern size_t enc_fnam2_decode(struct enc_key * key, const void * in, size_t in_size,void * out); -extern size_t enc_fnam2_encode(struct enc_key * key,const void * in, size_t in_size,void * out); #endif diff --git a/crypt/dap_enc_aes.c b/crypt/dap_enc_aes.c new file mode 100755 index 0000000000000000000000000000000000000000..b2ba81a5b8e0b3b73147e09b910e27442f8aacf6 --- /dev/null +++ b/crypt/dap_enc_aes.c @@ -0,0 +1,103 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "enc_aes.h" +#include "enc_key.h" +#include "sap_aes.h" + +typedef struct enc_aes_key{ + KeySchedule ks; + byte salt[SALT_LEN*2]; +} enc_aes_key_t; + +#define ENC_AES_KEY(a) ((enc_aes_key_t *)((a)->internal) ) + +/** + * @brief enc_aes_key_new + * @param key + */ +void enc_aes_key_new(struct enc_key * key) +{ + char str[64]; + size_t i; + for(i=0;i<sizeof(str);i++) + str[i]=64+rand()%30; + str[sizeof(str)-1]=0; + enc_aes_key_create(key,str); +} + +/** + * @brief enc_aes_key_new + * @param key + */ +void enc_aes_key_create(struct enc_key * key, const char *password_string) +{ + char *p1; + char *p2; + key->data= (unsigned char*) calloc(1,33); + key->data_size=32; + key->internal = calloc(1,sizeof(enc_aes_key_t) ); + key->enc=enc_aes_encode; + key->dec=enc_aes_decode; + + size_t p_len=strlen(password_string)/2; + p1= calloc(1,p_len+1); + p2= calloc(1,p_len+1); + memcpy(p1,password_string,p_len); + memcpy(p2,password_string+p_len,p_len); + + Aes_KeyFromPassword(256,p1,key->data); + Aes_KeyFromPassword(256,p2,ENC_AES_KEY(key)->salt); + Aes_KeyExpansion( key->data , ENC_AES_KEY(key)->ks ); + if (p1) + free(p1); + if (p2) + free(p2); + //Aes_GenSalt(ENC_AES_KEY(key)->salt); +} + +void enc_aes_key_delete(struct enc_key *key) +{ + (void) key; +} + +/** + * @brief enc_aes_public_decode + * @param key + * @param key_size + * @param in + * @param in_size + * @param out + * @return + */ +size_t enc_aes_decode(struct enc_key* key, const void * in, size_t in_size,void * out) +{ + memcpy(out,in,in_size); + Aes_DecryptBlks( out,in_size,ENC_AES_KEY(key)->salt,ENC_AES_KEY(key)->ks ); + return in_size; + +} + +/** + * @brief enc_aes_public_encode + * @param key + * @param key_size + * @param in + * @param in_size + * @param out + * @return + */ +size_t enc_aes_encode(struct enc_key* key, const void * in, size_t in_size,void * out) +{ + size_t ret=(in_size%AES_BLOCKSIZE) ? ( in_size+ (AES_BLOCKSIZE- (in_size%AES_BLOCKSIZE) ) ): in_size ; + memcpy(out,in,in_size); + if(ret-in_size) + memset((unsigned char*)out+in_size,0,ret-in_size); + Aes_EncryptBlks(out,ret,ENC_AES_KEY(key)->salt,ENC_AES_KEY(key)->ks ); + return ret; +} + + + + + diff --git a/crypt/dap_enc_aes.h b/crypt/dap_enc_aes.h new file mode 100755 index 0000000000000000000000000000000000000000..698bfdf7ac5de35666465255e6d7cee0788c3ae4 --- /dev/null +++ b/crypt/dap_enc_aes.h @@ -0,0 +1,15 @@ +#ifndef _ENC_AES_H_ +#define _ENC_AES_H_ + +#include <stddef.h> + +struct enc_key; + +void enc_aes_key_new(struct enc_key * key); +void enc_aes_key_create(struct enc_key * key, const char *password_string); +void enc_aes_key_delete(struct enc_key *key); + +size_t enc_aes_decode(struct enc_key* key, const void * in, size_t in_size,void * out); +size_t enc_aes_encode(struct enc_key* key, const void * in, size_t in_size,void * out); + +#endif diff --git a/crypt/dap_enc_base64.c b/crypt/dap_enc_base64.c new file mode 100755 index 0000000000000000000000000000000000000000..2b399c4b281eefe263f4ea274da56ad7275f0dca --- /dev/null +++ b/crypt/dap_enc_base64.c @@ -0,0 +1,371 @@ +#include <math.h> +#include <stdio.h> +#include <stdint.h> +#include <ctype.h> +#include <stdlib.h> +#include "enc_base64.h" + +#define B64_TRUE 1 +#define B64_FALSE 0 + +typedef unsigned char byte; + +// get the size of the result buffer required for Base-64 +// encoding/decoding. +// sz - size of original buffer to be encoded/decoded +// isEncoded - true (1) when encoding the original buffer; +// false (0) when decoding the original buffer. +int B64_GetSize( int sz, int isEncode ); + +// Base-64 encode the given byte array +// outChars - buffer of length returned by GetSize(), filled upon return +void B64_Encode( const byte* srcBytes, int srcLen, char* outChars ); + +// Base-64 decode the given string +// srcChars - characters to be decoded +// outBytes - buffer of length returned by GetSize(), filled upon return +void B64_Decode( const char* srcChars, int srcLen, byte* outBytes ); + +// return the Base-64 encoded char for the given source byte +char B64_EncodeByte( byte b ); + +// return the Base-64 decoded byte for the given source char +// <returns></returns> +byte B64_DecodeByte( byte b ); + +#ifndef b64_malloc +# define b64_malloc(ptr) malloc(ptr) +#endif +#ifndef b64_realloc +# define b64_realloc(ptr, size) realloc(ptr, size) +#endif + +/** + * Base64 index table. + */ + +static const char b64_table[] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', + 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', + 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', + 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', + 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '0', '1', '2', '3', + '4', '5', '6', '7', '8', '9', '+', '/' +}; + +/** + * Encode `unsigned char *' source with `size_t' size. + * Returns a `char *' base64 encoded string. + */ + +char * +b64_encode (const unsigned char *, size_t); + +/** + * Dencode `char *' source with `size_t' size. + * Returns a `unsigned char *' base64 decoded string. + */ +unsigned char * +b64_decode (const char *, size_t); + +/** + * Dencode `char *' source with `size_t' size. + * Returns a `unsigned char *' base64 decoded string + size of decoded string. + */ +unsigned char * +b64_decode_ex (const char *, size_t, size_t *); + + + +size_t enc_base64_decode(const char * in, size_t in_size,void * out) +{ + //B64_Decode( in, in_size, (byte*) out ); + //return B64_GetSize( in_size,0 ); + uint8_t * out_bytes = (uint8_t*) out; + + int i = 0; + int j = 0; + int l = 0; + size_t l_size = 0; + unsigned char buf[3]; + unsigned char tmp[4]; + + // alloc + //dec = (unsigned char *) b64_malloc(1); + if (NULL == out) { return 0; } + + // parse until end of source + while (in_size--) { + // break if char is `=' or not base64 char + if ('=' == in[j]) { break; } + if (!(isalnum(in[j]) || '+' == in[j] || '/' == in[j])) { break; } + + // read up to 4 bytes at a time into `tmp' + tmp[i++] = in[j++]; + + // if 4 bytes read then decode into `buf' + if (4 == i) { + // translate values in `tmp' from table + for (i = 0; i < 4; ++i) { + // find translation char in `b64_table' + for (l = 0; l < 64; ++l) { + if (tmp[i] == b64_table[l]) { + tmp[i] = l; + break; + } + } + } + + // decode + buf[0] = (tmp[0] << 2) + ((tmp[1] & 0x30) >> 4); + buf[1] = ((tmp[1] & 0xf) << 4) + ((tmp[2] & 0x3c) >> 2); + buf[2] = ((tmp[2] & 0x3) << 6) + tmp[3]; + + // write decoded buffer to `dec' + for (i = 0; i < 3; ++i) { + out_bytes[l_size++] = buf[i]; + } + + // reset + i = 0; + } + } + + // remainder + if (i > 0) { + // fill `tmp' with `\0' at most 4 times + for (j = i; j < 4; ++j) { + tmp[j] = '\0'; + } + + // translate remainder + for (j = 0; j < 4; ++j) { + // find translation char in `b64_table' + for (l = 0; l < 64; ++l) { + if (tmp[j] == b64_table[l]) { + tmp[j] = l; + break; + } + } + } + + // decode remainder + buf[0] = (tmp[0] << 2) + ((tmp[1] & 0x30) >> 4); + buf[1] = ((tmp[1] & 0xf) << 4) + ((tmp[2] & 0x3c) >> 2); + buf[2] = ((tmp[2] & 0x3) << 6) + tmp[3]; + + // write remainer decoded buffer to `dec' + for (j = 0; (j < i - 1); ++j) { + out_bytes[l_size++] = buf[j]; + } + + } + +// out[l_size] = '\0'; + + return l_size; +} + +size_t enc_base64_encode(const void * a_in, size_t a_in_size, char * a_out) +{ + int i = 0; + int j = 0; + size_t size = 0; + unsigned char buf[4]; + unsigned char tmp[3]; + const unsigned char * l_in_bytes = (const unsigned char*) a_in; + + if (NULL == a_out) { return 0; } + + // parse until end of source + while (a_in_size--) { + // read up to 3 bytes at a time into `tmp' + tmp[i++] = *( l_in_bytes++); + + // if 3 bytes read then encode into `buf' + if (3 == i) { + buf[0] = (tmp[0] & 0xfc) >> 2; + buf[1] = ((tmp[0] & 0x03) << 4) + ((tmp[1] & 0xf0) >> 4); + buf[2] = ((tmp[1] & 0x0f) << 2) + ((tmp[2] & 0xc0) >> 6); + buf[3] = tmp[2] & 0x3f; + + for (i = 0; i < 4; ++i) { + a_out[size++] = b64_table[buf[i]]; + } + + // reset index + i = 0; + } + } + + // remainder + if (i > 0) { + // fill `tmp' with `\0' at most 3 times + for (j = i; j < 3; ++j) { + tmp[j] = '\0'; + } + + // perform same codec as above + buf[0] = (tmp[0] & 0xfc) >> 2; + buf[1] = ((tmp[0] & 0x03) << 4) + ((tmp[1] & 0xf0) >> 4); + buf[2] = ((tmp[1] & 0x0f) << 2) + ((tmp[2] & 0xc0) >> 6); + buf[3] = tmp[2] & 0x3f; + + // perform same write to `enc` with new allocation + for (j = 0; (j < i + 1); ++j) { + a_out[size++] = b64_table[buf[j]]; + } + + // while there is still a remainder + // append `=' to `enc' + while ((i++ < 3)) { + a_out[size++] = '='; + } + } + + // Make sure we have enough space to add '\0' character at end. + a_out[size] = '\0'; + return size; +} + + +// get the size of the result buffer required for Base-64 +// encoding/decoding. +// sz - size of original buffer to be encoded/decoded +// isEncoded - true (1) when encoding the original buffer; +// false (0) when decoding the original buffer. +int B64_GetSize( int sz, int isEncode ) +{ + int n = 0; + + if( isEncode ) { + n = ceil ( ((double) sz) / 3.0 ) * 4.0; + switch( sz % 3 ) { + case 0: break; + case 1: n += 2; break; + case 2: n += 3; break; + } + } + else { + n = ceil ( ((double) sz) / 4.0 ) * 3.0; + switch( sz % 4 ) { + case 0: break; + case 1: break; + case 2: n += 1; break; + case 3: n += 2; break; + } + } + return n; +} + + +// Base-64 encode the given byte array +// outChars - buffer of length returned by GetSize(), filled upon return +void B64_Encode( const byte* srcBytes, int srcLen, char* outChars ) +{ + byte b1, b2, b3; + byte* destBytes = (byte*)outChars; + + // walk through the source, taking 3 bytes at a time + int srcNdx = 0; + int destNdx = 0; + int remaining = srcLen; + for( ; remaining > 2; remaining -= 3 ) { + b1 = srcBytes[ srcNdx++ ]; + b2 = srcBytes[ srcNdx++ ]; + b3 = srcBytes[ srcNdx++ ]; + destBytes[destNdx++] = B64_EncodeByte( (byte)( b1 >> 2 ) ); + destBytes[destNdx++] = B64_EncodeByte( (byte)( ( b1 << 4 ) | ( b2 >> 4 ) ) ); + destBytes[destNdx++] = B64_EncodeByte( (byte)( ( b2 << 2 ) | ( b3 >> 6 ) ) ); + destBytes[destNdx++] = B64_EncodeByte( (byte)b3 ); + } + + // process the remaining bytes + b2 = 0; + if( remaining > 0 ) { + b1 = srcBytes[srcNdx++]; + if( remaining == 2 ) + b2 = srcBytes[srcNdx++]; + + destBytes[destNdx++] = B64_EncodeByte( (byte)( b1 >> 2 ) ); + destBytes[destNdx++] = B64_EncodeByte( (byte)( ( b1 << 4 ) | ( b2 >> 4 ) ) ); + if( remaining == 2 ) + destBytes[destNdx++] = B64_EncodeByte( (byte)( b2 << 2 ) ); + } +} + + +// Base-64 decode the given string +// srcChars - characters to be decoded +// outBytes - buffer of length returned by GetSize(), filled upon return +void B64_Decode( const char* srcChars, int srcLen, byte* outBytes ) +{ + byte b1, b2, b3, b4; + const byte* srcBytes = (byte*)srcChars; + byte* destBytes = outBytes; + + // walk through the source, taking 4 bytes at a time + int srcNdx = 0; + int destNdx = 0; + int remaining = srcLen; + for( ; remaining > 3; remaining -= 4 ) { + b1 = B64_DecodeByte( srcBytes[srcNdx++] ); + b2 = B64_DecodeByte( srcBytes[srcNdx++] ); + b3 = B64_DecodeByte( srcBytes[srcNdx++] ); + b4 = B64_DecodeByte( srcBytes[srcNdx++] ); + + destBytes[destNdx++] = (byte)( ( b1 << 2 ) | ( b2 >> 4 ) ); + destBytes[destNdx++] = (byte)( ( b2 << 4 ) | ( b3 >> 2 ) ); + destBytes[destNdx++] = (byte)( ( b3 << 6 ) | b4 ); + } + + // process the remaining bytes + b2 = b3 = 0; + if( remaining > 0 ) { + b1 = B64_DecodeByte( srcBytes[srcNdx++] ); + if( remaining > 1 ) + b2 = B64_DecodeByte( srcBytes[srcNdx++] ); + if( remaining == 3 ) + b3 = B64_DecodeByte( srcBytes[srcNdx++] ); + + destBytes[destNdx++] = (byte)( ( b1 << 2 ) | ( b2 >> 4 ) ); + if( remaining == 3 ) + destBytes[destNdx++] = (byte)( ( b2 << 4 ) | ( b3 >> 2 ) ); + } +} + + +// return the Base-64 encoded char for the given source byte +char B64_EncodeByte( byte b ) +{ + b &= 0x3f; + if( b <= 25 ) + return (byte)( b +'A' ); + if( b <= 51 ) + return (byte)( b - 26 + 'a' ); + if( b <= 61 ) + return (byte)( b - 52 + '0' ); + if( b == 62 ) + return (byte)'-'; + //if( b == 63 ) + return (byte)'_'; +} + + +// return the Base-64 decoded byte for the given source char +// <returns></returns> +byte B64_DecodeByte( byte b ) +{ + if (( b == '+' ) || (b =='-') ) + return 62; + if( (b == '/' ) || (b == '_') ) + return 63; + if( b <= '9' ) + return (byte)( b - '0' + 52 ); + if( b <= 'Z' ) + return (byte)( b - 'A' ); + return (byte)( b - 'a' + 26 ); +} + diff --git a/crypt/dap_enc_base64.h b/crypt/dap_enc_base64.h new file mode 100755 index 0000000000000000000000000000000000000000..bd3658e97413b4af438702d14679816abc51f433 --- /dev/null +++ b/crypt/dap_enc_base64.h @@ -0,0 +1,16 @@ +#ifndef _ENC_BASE64_H_ +#define _ENC_BASE64_H_ +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif + +size_t enc_base64_decode(const char * in, size_t in_size,void * out); +size_t enc_base64_encode(const void * in, size_t in_size,char * out); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/crypt/enc_key.c b/crypt/dap_enc_key.c similarity index 100% rename from crypt/enc_key.c rename to crypt/dap_enc_key.c diff --git a/crypt/dap_enc_key.h b/crypt/dap_enc_key.h new file mode 100644 index 0000000000000000000000000000000000000000..768b2c9e5aaca89167a942d506d87e2ddea38d19 --- /dev/null +++ b/crypt/dap_enc_key.h @@ -0,0 +1,105 @@ +/* + Copyright (c) 2017-2018 (c) Project "DeM Labs Inc" https://github.com/demlabsinc + All rights reserved. + + This file is part of DAP (Deus Applications Prototypes) the open source project + + DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + DAP is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with any DAP based project. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _DAP_ENC_KEY_H_ +#define _DAP_ENC_KEY_H_ + +#include <stddef.h> + +typedef enum dap_enc_data_type{DAP_ENC_DATA_TYPE_RAW, + DAP_ENC_DATA_TYPE_B64, + } dap_enc_data_type_t; + +typedef enum dap_enc_key_type{ DAP_ENC_KEY_TYPE_AES, // Symmetric AES + + DAP_ENC_KEY_rlwe_bcns15, // key exchange from the ring learning with errors problem + // (Bos, Costello, Naehrig, Stebila, + // IEEE Symposium on Security & Privacy 2015, + // https://eprint.iacr.org/2014/599) + + DAP_ENC_KEY_rlwe_newhope, // "NewHope": key exchange from the ring learning with errors problem + // (Alkim, Ducas, Pöppelmann, Schwabe, USENIX Security 2016 ) + // Using the reference C implementation of NewHope + // from https://github.com/tpoeppelmann/newhop + // https://eprint.iacr.org/2015/1092 + + DAP_ENC_KEY_rlwe_msrln16, // Microsoft Research implementation of Peikert's ring-LWE key exchange + // (Longa, Naehrig, CANS 2016, https://eprint.iacr.org/2016/504) + // based on the implementation of Alkim, Ducas, Pöppelmann, and Schwabe, + // with improvements from Longa and Naehrig, + // https://www.microsoft.com/en-us/research/project/lattice-cryptography-library/ + + DAP_ENC_KEY_lwe_frodo, // "Frodo": key exchange from the learning with errors problem + // Bos, Costello, Ducas, Mironov, Naehrig, Nikolaenko, Raghunathan, Stebila + // ACM Conference on Computer and Communications Security 2016 + // https://eprint.iacr.org/2016/659 + + DAP_ENC_KEY_sidh_cln16, // Key exchange from the supersingular isogeny Diffie-Hellman problem + // (Costello, Naehrig, Longa, CRYPTO 2016, https://eprint.iacr.org/2016/413) + // using the implementation of Microsoft Research + // https://www.microsoft.com/en-us/research/project/sidh-library/ + + DAP_ENC_KEY_sidh_iqc_ref, // key exchange from the supersingular isogeny Diffie-Hellman problem + // (De Feo, Jao, Plût, J. Math. Cryptol. 8(3):209, 2014 + // https://eprint.iacr.org/2011/506 + // + DAP_ENC_KEY_code_mcbits, // "McBits": key exchange from the error correcting codes, + // specifically Niederreiter's form of McEliece public key encryption + // using hidden Goppa codes (Bernstein, Chou, Schwabe, CHES 2013, https://eprint.iacr.org/2015/610) + // using the implementation of McBits from https://www.win.tue.nl/~tchou/mcbits/ + + DAP_ENC_KEY_ntru, // NTRU: key transport using NTRU public key encryption + // (Hoffstein, Pipher, Silverman, ANTS 1998) with the EES743EP1 parameter set + // wrapper around the implementation from the NTRU Open Source project + // https://github.com/NTRUOpenSourceProject/NTRUEncrypt) + + DAP_ENC_KEY_mlwe_kyber, // Kyber: a CCA-secure module-lattice-based key exchange mechanism + // (Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Shanck, Stehlé) + // Real World Crypto 2017, https://eprint.iacr.org/2017/634) + // using the reference C implementation of Kyber from pq-crystals/kyber + DAP_ENC_KEY_sig_picnic, // signature based on zero-knowledge proof as specified in + // Post-Quantum Zero-Knowledge and Signatures from Symmetric-Key Primitives + // (Melissa Chase and David Derler and Steven Goldfeder and Claudio Orlandi + // and Sebastian Ramacher and Christian Rechberger and Daniel Slamanig and Greg Zaverucha + // https://eprint.iacr.org/2017/279.pdf), using the optimized implemenation + // from https://github.com/IAIK/Picnic + } enc_key_type_t; + +struct enc_key; +typedef size_t (*enc_callback_t)(struct enc_key *, const void * , const size_t ,void *); + +typedef struct enc_key{ + unsigned char * data; + size_t data_size; + enc_key_type_t type; + + enc_callback_t enc; + enc_callback_t dec; + + void * internal; +} enc_key_t; + +extern enc_key_t *enc_key_new(size_t key_size,enc_key_type_t key_type); +extern enc_key_t *enc_key_generate(enc_data_type_t v_type, rsa_key_t* key_session_pair); +extern enc_key_t *enc_key_create(const char * key_input,enc_key_type_t v_type); +extern void enc_key_delete(enc_key_t * key); +extern rsa_key_t* enc_key_session_pair_create(const char* client_pub_key, u_int16_t key_len); + +#endif diff --git a/crypt/enc.c b/crypt/enc.c deleted file mode 100644 index d704dc7da7468a403a05b1bbf26270e67db9e3a6..0000000000000000000000000000000000000000 --- a/crypt/enc.c +++ /dev/null @@ -1,562 +0,0 @@ -/* - Copyright (c) 2017-2018 (c) Project "DeM Labs Inc" https://github.com/demlabsinc - All rights reserved. - - This file is part of DAP (Deus Applications Prototypes) the open source project - - DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - DAP is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with any DAP based project. If not, see <http://www.gnu.org/licenses/>. -*/ - - -#include <stdlib.h> -#include <stdint.h> -#include <string.h> -#include <time.h> -#include "enc.h" -#include "enc_key.h" -#include "common.h" -#include <openssl/aes.h> -#include <openssl/evp.h> -#include <openssl/pem.h> -#include <openssl/err.h> -#include <openssl/rand.h> - -#define LOG_TAG "enc" - -#include <arpa/inet.h> - -////////////////////////////////////////// BASE64 PART -static size_t b64_get_encodet_size(size_t in_size); -static size_t b64_get_decodet_size(size_t in_size); -static unsigned char b64_byte_decode(unsigned char b); -static unsigned char b64_byte_encode(unsigned char b); - -static void Base64Decode(const char* in, size_t srcLen, unsigned char* out); -static void Base64Encode(const unsigned char* in, size_t srcLen, char* out); - -static size_t b64_get_encodet_size(size_t in_size) -{ - return (in_size/3)*4 + ((in_size%3==1) ?2: (in_size%3==2) ? 3:0); -} - -static size_t b64_get_decodet_size(size_t in_size) -{ - return (in_size/4)*3 + ((in_size%4==2) ?1: (in_size%5==3) ? 2:0); -} - -static unsigned char b64_byte_decode(unsigned char b) -{ - if (( b == '+' ) || (b =='-') ) - return 62; - if( (b == '/' ) || (b == '_') ) - return 63; - if( b <= '9' ) - return (b - '0' + 52); - if(b <= 'Z') - return (b - 'A'); - return (b - 'a' + 26); -} - -static unsigned char b64_byte_encode(unsigned char b) -{ - b &= 0x3f; - if(b <= 25) - return (b +'A'); - if(b <= 51) - return (b - 26 + 'a'); - if(b <= 61) - return (b - 52 + '0'); - if(b == 62) - return '-'; - return '_'; -} - -static void Base64Decode(const char* source, size_t srcLen, unsigned char* out) -{ - unsigned char b1, b2, b3, b4; - const unsigned char* srcBytes = (unsigned char*)source; - unsigned char* dest = out; - - size_t dec_length = b64_get_decodet_size(srcLen); - unsigned char *buffer = (unsigned char*)malloc(dec_length + 1); - buffer[dec_length] = '\0'; - - // walk through the source, taking 4 bytes at a time - size_t source_index = 0; - size_t dest_index = 0; - size_t remaining = srcLen; - for( ; remaining > 3; remaining -= 4 ) { - b1 = b64_byte_decode(srcBytes[source_index++]); - b2 = b64_byte_decode(srcBytes[source_index++]); - b3 = b64_byte_decode(srcBytes[source_index++]); - b4 = b64_byte_decode(srcBytes[source_index++]); - - dest[dest_index++] = (unsigned char)( ( b1 << 2 ) | ( b2 >> 4 ) ); - dest[dest_index++] = (unsigned char)( ( b2 << 4 ) | ( b3 >> 2 ) ); - dest[dest_index++] = (unsigned char)( ( b3 << 6 ) | b4 ); - } - - // process the remaining bytes - b2 = b3 = 0; - if( remaining > 0 ) { - b1 = b64_byte_decode( srcBytes[source_index++] ); - if( remaining > 1 ) - b2 = b64_byte_decode( srcBytes[source_index++] ); - if( remaining == 3 ) - b3 = b64_byte_decode( srcBytes[source_index++] ); - - dest[dest_index++] = (unsigned char)( ( b1 << 2 ) | ( b2 >> 4 ) ); - if( remaining == 3 ) - dest[dest_index++] = (unsigned char)( ( b2 << 4 ) | ( b3 >> 2 ) ); - } -} - -static void Base64Encode(const unsigned char* source, size_t srcLen, char* out) -{ - unsigned char b1, b2, b3; - unsigned char* dest = (unsigned char*)out; - - // walk through the source, taking 3 bytes at a time - size_t source_index = 0; - size_t dest_index = 0; - size_t remaining = srcLen; - for( ; remaining > 2; remaining -= 3 ) { - b1 = source[ source_index++ ]; - b2 = source[ source_index++ ]; - b3 = source[ source_index++ ]; - dest[dest_index++] = b64_byte_encode( (unsigned char)( b1 >> 2 ) ); - dest[dest_index++] = b64_byte_encode( (unsigned char)( ( b1 << 4 ) | ( b2 >> 4 ) ) ); - dest[dest_index++] = b64_byte_encode( (unsigned char)( ( b2 << 2 ) | ( b3 >> 6 ) ) ); - dest[dest_index++] = b64_byte_encode( (unsigned char)b3 ); - } - - // process the remaining bytes - b2 = 0; - if( remaining > 0 ) { - b1 = source[source_index++]; - if( remaining == 2 ) - b2 = source[source_index++]; - - dest[dest_index++] = b64_byte_encode( (unsigned char)( b1 >> 2 ) ); - dest[dest_index++] = b64_byte_encode( (unsigned char)( ( b1 << 4 ) | ( b2 >> 4 ) ) ); - if( remaining == 2 ) - dest[dest_index++] = b64_byte_encode( (unsigned char)( b2 << 2 ) ); - } -} - -size_t enc_base64_encode(const void * in, size_t in_size, char * out) -{ - size_t ret= b64_get_encodet_size(in_size); - Base64Encode((const unsigned char*) in, in_size, out); - out[ret]='\0'; - return ret; -} - -size_t enc_base64_decode(const char *in, size_t in_size, void *out) -{ - Base64Decode(in, in_size, (unsigned char*) out); - return b64_get_decodet_size(in_size); -} -////////////////////////////////////////////////////// end of BASE64 PART - -////////////////////////////////////////// AES PART -#include <openssl/evp.h> -#include <openssl/err.h> -#include <openssl/rand.h> -#include "enc_key.h" -typedef unsigned char KeySchedule[4*(14+1)][4]; - -static int _crypto_inited = 0; - -typedef struct enc_aes_key{ - KeySchedule ks; - unsigned char salt[AES_BLOCK_SIZE*2]; -} enc_aes_key_t; - -#define ENC_AES_KEY(a) ((enc_aes_key_t *)((a)->internal) ) - -/** - * @brief enc_aes_key_new - * @param key - */ -void enc_aes_key_new(struct enc_key * key) -{ - char str[64]; - int i; - for(i=0;i<sizeof(str);i++) - str[i]=64+rand()%30; - str[sizeof(str)-1]=0; - enc_aes_key_create(key,str); -} - -/** - * @brief enc_aes_key_new - * @param key - */ -void enc_aes_key_create(struct enc_key * key, const char *str_key) -{ - key->data_size = strlen(str_key); - key->data= (unsigned char*) malloc(key->data_size); - memcpy(key->data, str_key, key->data_size); - key->internal = calloc(1,sizeof(enc_aes_key_t) ); - key->enc=enc_aes_encode; - key->dec=enc_aes_decode; - -} - -void enc_aes_key_delete(struct enc_key *key) -{ - (void) key; -} - - -size_t enc_aes_decode(struct enc_key* key, const void * in, size_t in_size,void * out) -{ - unsigned char *iv_dec = (unsigned char*)malloc(sizeof(unsigned char) *AES_BLOCK_SIZE); - memset(iv_dec, 0, sizeof(unsigned char) *AES_BLOCK_SIZE); - - AES_KEY dec_key; - AES_set_decrypt_key(key->data, 256, &dec_key); - AES_cbc_encrypt(in, out, in_size, - &dec_key,iv_dec, AES_DECRYPT); - - free(iv_dec); - - return in_size; - -} - -size_t enc_aes_encode(struct enc_key* key, const void * in, size_t in_size,void * out) -{ - size_t ret = (in_size % AES_BLOCK_SIZE) ? ( in_size+ (AES_BLOCK_SIZE- (in_size%AES_BLOCK_SIZE) ) ) : in_size ; - - unsigned char *iv_enc = (unsigned char*) malloc( sizeof(unsigned char) *AES_BLOCK_SIZE); - memset(iv_enc, 0, sizeof(unsigned char) *AES_BLOCK_SIZE); - - AES_KEY enc_key; - AES_set_encrypt_key(key->data, 256, &enc_key); - AES_cbc_encrypt(in, out, in_size, &enc_key, - iv_enc, AES_ENCRYPT); - - free(iv_enc); - return ret; -} - -////////////////////////////////////////// end of AES PART - -/** - * @brief enc_init - * @return - */ -int enc_init() -{ - if (_crypto_inited) - return 0; - _crypto_inited = 1; - - srand(time(NULL)); - - ERR_load_crypto_strings(); - OpenSSL_add_all_algorithms(); - - return 0; -} - - -/** - * @brief enc_code Encode data with key - * @param key_private Private key - * @param buf Input buffer - * @param buf_size Input buffer size - * @param buf_out Output buffer - * @return bytes actualy written in the output buffer - */ -size_t enc_code(struct enc_key * key,const void * buf,const size_t buf_size, void * buf_out, enc_data_type_t data_type_out) -{ - //log_it(NOTICE,"In enc code"); - if(key->enc){ - void *proc_buf; - switch(data_type_out) - { - case ENC_DATA_TYPE_B64:{ - proc_buf=calloc(1,buf_size*2); - }break; - case ENC_DATA_TYPE_RAW:{ - proc_buf=buf_out; - }break; - } - size_t ret=key->enc(key,buf,buf_size,proc_buf); - if(data_type_out==ENC_DATA_TYPE_B64){ - ret=enc_base64_encode(proc_buf,ret,buf_out); - free(proc_buf); - return ret; - } - return ret; - }else{ - return 0; - } -} - -/** - * @brief enc_decode Decode data with key - * @param key_public Public key - * @param buf Input buffer - * @param buf_size Input buffer size - * @param buf_out Output buffer - * @param buf_out_max Maximum size of output buffer - * @return bytes actualy written in the output buffer - */ -size_t enc_decode(struct enc_key * key,const void * buf, const size_t buf_size, void * buf_out, enc_data_type_t data_type_in) -{ - void *proc_buf; - const void *proc_buf_const; - size_t proc_buf_size; - switch(data_type_in){ - case ENC_DATA_TYPE_B64:{ - proc_buf=calloc(1,buf_size); - proc_buf_size= enc_base64_decode((const char*) buf,buf_size,proc_buf); - proc_buf_const=proc_buf; - }break; - case ENC_DATA_TYPE_RAW:{ - proc_buf_const=buf; - proc_buf_size=buf_size; - }break; - } - - if(key->dec){ - size_t ret=key->dec(key,proc_buf_const,proc_buf_size,buf_out); - if(data_type_in==ENC_DATA_TYPE_B64) - free(proc_buf); - return ret; - }else{ - return 0; - } -} - -/** - * @brief read_key_from_bio - * @param bio - * @return - */ -char* read_key_from_bio(BIO * bio) -{ - size_t length = BIO_pending(bio); - char *buff = (char*)malloc((length + 1)*sizeof(char)); - BIO_read(bio, buff, length); - buff[length] = '\0'; - return buff; -} - - -/** - * @brief bioToString - * @param bio - * @param string - * @details make string from bio - * @return - */ - -int bioToString(BIO *bio, unsigned char **string) -{ - - if( bio == NULL) - { - log_it(ERROR,"bioToString() BIO == NULL!"); - return -1; - } - - size_t bioLength = BIO_pending(bio); - - *string = (unsigned char*)malloc(bioLength + 1); - - if(string == NULL) - { - log_it(ERROR,"bioToString failed.\n"); - return -1; - } - - BIO_read(bio, *string, bioLength); - - (*string)[bioLength] = '\0'; - - BIO_free_all(bio); - - return (int)bioLength; -} - -/** - * @brief enc_rsa_decode - * @param key - * @param in - * @param in_size - * @param out - * @details decode by server local rsa key - * @return - */ -size_t enc_rsa_decode(struct enc_key* key, const void * in, size_t in_size,void * out) -{ - size_t decrypt_len; - - if(in == NULL) - { - log_it(ERROR,"enc_rsa_decode failed (empty message for decode)"); - return 0; - } - - if(key == NULL) - { - log_it(ERROR,"enc_rsa_decode failed (empty key for decode)"); - return 0; - } - - if((decrypt_len = RSA_private_decrypt(in_size, (unsigned char*)in, (unsigned char*)out, - ((rsa_key_t*)key->internal)->server_key, RSA_PKCS1_PADDING)) == -1) - { - log_it(ERROR,"enc_rsa_decode failed (incorrect decode)"); - return 0; - } - - memset(out + decrypt_len, 0, 1); - - //log_it(INFO, "Decode out = %s",out); - - return decrypt_len; -} - - -/** - * @brief enc_rsa_encode - * @param key - * @param in - * @param in_size - * @param out - * @details encode by RSA Public key Client - * @return - */ -size_t enc_rsa_encode(struct enc_key* key, void * in, size_t in_size,void * out) -{ - size_t encrypt_len = 0; - - if(in == NULL || key == NULL) - { - log_it(ERROR,"enc_rsa_encode failed"); - } - - if((encrypt_len = RSA_public_encrypt(in_size, (unsigned char*)in, (unsigned char*)out, - (RSA*)((rsa_key_t*)key->internal)->client_public_key, RSA_PKCS1_PADDING)) == -1) - { - log_it(ERROR,"enc_rsa_encode Error Encrypt"); - return 0; - } - - //log_it(INFO,"Encrypt Len = %d",encrypt_len); - - return encrypt_len; -} - - -/** - * @brief getRsaKeyFromString - * @param str_key - * @param strLen - * @return - */ -void setRsaPubKeyFromString(char *str_key, size_t strLen, struct enc_key * key) -{ - if(str_key == NULL) - { - log_it(ERROR,"getRsaKeyFromString failed"); - return; - } - - BIO *bio = BIO_new(BIO_s_mem()); - BIO_write(bio, str_key,strLen); - - PEM_read_bio_RSAPublicKey( bio, (void*)&key->internal, NULL, NULL); - - BIO_free_all(bio); - - key->enc = (void*) enc_rsa_encode; - key->dec = (void*) enc_rsa_decode; - - if ( key == NULL) - { - log_it(ERROR,"getRsaKeyFromString failed"); - return; - } - -} - - -/** - * @brief getStringPrivateKeyFromRsa - * @param key - * @param out - * @details get string public key from RSA* key ( Allocated memory for ptr ) - * @return - */ -size_t getStringPrivateKeyFromRsa(RSA *key, char **out) -{ - BIO *bio = BIO_new(BIO_s_mem()); - - if(key == NULL) - { - log_it(ERROR,"getStringPubKeyFromRsa failed"); - return 0; - } - - PEM_write_bio_RSAPrivateKey(bio,key,NULL,NULL,0,NULL,NULL); - - size_t key_len = BIO_pending(bio); - *out = malloc(key_len + 1); - - BIO_read(bio, *out, key_len); - - BIO_free_all(bio); - - return key_len; -} - - -/** - * @brief getStringPubKeyFromRsa - * @param key - * @param out - * @details get string public key from RSA* key ( Allocated memory for ptr ) - * @return - */ -size_t getStringPubKeyFromRsa(RSA *key, char **out) -{ - BIO *bio = BIO_new(BIO_s_mem()); - - if(key == NULL) - { - log_it(ERROR,"getStringPubKeyFromRsa failed"); - return 0; - } - - PEM_write_bio_RSAPublicKey(bio, key); - - size_t key_len = BIO_pending(bio); - *out = malloc(key_len + 1); - - BIO_read(bio, *out, key_len); - //out[key_len] = '\0'; - - BIO_free_all(bio); - - return key_len; -} - - diff --git a/crypt/enc.h b/crypt/enc.h deleted file mode 100644 index c751c93360d993f95690e1c77a2a9f41eab4302d..0000000000000000000000000000000000000000 --- a/crypt/enc.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - Copyright (c) 2017-2018 (c) Project "DeM Labs Inc" https://github.com/demlabsinc - All rights reserved. - - This file is part of DAP (Deus Applications Prototypes) the open source project - - DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - DAP is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with any DAP based project. If not, see <http://www.gnu.org/licenses/>. -*/ - - -#ifndef _ENC_H_ -#define _ENC_H_ -#include <stddef.h> -#include <openssl/aes.h> -#include <openssl/evp.h> -#include <openssl/pem.h> -#include <openssl/err.h> -#include <openssl/rand.h> - -#define SA_ENC_TYPE_1 0x01 -#define RSA_KEY_LENGTH 4096 -#define PUB_EXP 3 - -struct enc_key; - -typedef enum enc_data_type{ENC_DATA_TYPE_RAW, ENC_DATA_TYPE_B64, ENC_KEY_TYPE_RSA} enc_data_type_t; - -typedef struct rsa_session_key { - RSA* server_key; - RSA* client_public_key; - time_t last_time_use_key; -} rsa_key_t; - -extern int enc_init(); - -/// BASE64 -extern size_t enc_base64_decode(const char * in, size_t in_size,void * out); -extern size_t enc_base64_encode(const void * in, size_t in_size,char * out); -/// - -/// AES -#include "common.h" -struct enc_key; - -extern size_t enc_rsa_decode(struct enc_key* key, const void * in, size_t in_size,void * out); -extern size_t enc_rsa_encode(struct enc_key* key, void * in, size_t in_size,void * out); - -extern void setRsaPubKeyFromString(char *str_key, size_t strLen, struct enc_key * key); -extern size_t getStringPubKeyFromRsa(RSA *key, char **out); -extern size_t getStringPrivateKeyFromRsa(RSA *key, char **out); - - -extern void enc_aes_key_new(struct enc_key * key); -extern void enc_aes_key_create(struct enc_key * key, const char *password_string); -extern void enc_aes_key_delete(struct enc_key *key); -extern size_t enc_aes_decode(struct enc_key* key, const void * in, size_t in_size,void * out); -extern size_t enc_aes_encode(struct enc_key* key, const void * in, size_t in_size,void * out); - - -size_t enc_code(struct enc_key * key, const void * buf, const size_t buf_size, void * buf_out, enc_data_type_t data_type_out); -size_t enc_decode(struct enc_key * key, const void * buf, const size_t buf_size, void * buf_out, enc_data_type_t data_type_in); - - -#endif diff --git a/crypt/enc_fnam2.c b/crypt/enc_fnam2.c deleted file mode 100644 index 1ce583102d7b2ba6733e80c92b5cdbd04a51d9d3..0000000000000000000000000000000000000000 --- a/crypt/enc_fnam2.c +++ /dev/null @@ -1,212 +0,0 @@ -/* - Copyright (c) 2017-2018 (c) Project "DeM Labs Inc" https://github.com/demlabsinc - All rights reserved. - - This file is part of DAP (Deus Applications Prototypes) the open source project - - DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - DAP is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with any DAP based project. If not, see <http://www.gnu.org/licenses/>. -*/ - - -#include <stdlib.h> -#include <string.h> -#include <time.h> -#include "enc_key.h" -#include "enc_fnam2.h" - -void fnam2_crypt(int *key, int key_size, unsigned long *num_block, unsigned long b1,unsigned long b2,unsigned long b3,unsigned long b4, void * out ); -void fnam2_decrypt(int *key, int key_size, unsigned long *num_block, unsigned long b1,unsigned long b2,unsigned long b3,unsigned long b4, void * out ); - -/** - * @brief enc_fnam2_key_new - * @param key - */ -void enc_fnam2_key_new(struct enc_key * key) -{ - size_t i; - for(i=0;i<key->data_size;i++) - key->data[i] = rand()%255; -} - - -/** - * @brief enc_fnam2_decode - * @param key - * @param key_size - * @param in - * @param in_size - * @param out - * @return - */ -size_t enc_fnam2_decode(struct enc_key * key, const void * in, size_t in_size,void * out) -{ - unsigned long num_block=0; - int key_pos=0; - const size_t block_size=16; - const unsigned char * in_ul=(const unsigned char*) in; - - size_t pos; - - for (pos=0;pos<= in_size-block_size; pos+=block_size){ - fnam2_decrypt( (int *) (key->data+key_pos), block_size,&num_block, *((int*)(in_ul+pos)) , - *((int*)(in_ul+pos+4)), *((int*)(in_ul+pos+8)),*((int*)(in_ul+pos+12)),out+pos); - /*key_pos+=block_size; - if(key_pos+block_size>=key->data_size) - key_pos=0;*/ - } - - return pos; -} - -/** - * @brief enc_fnam2_encode - * @param key - * @param key_size - * @param in - * @param in_size - * @param out - * @return - */ -size_t enc_fnam2_encode(struct enc_key * key,const void * in, size_t in_size,void * out) -{ - unsigned long num_block=0; - int key_pos=0; - const size_t block_size=16; - const unsigned char * in_ul=(const unsigned char*) in; - - size_t pos; - - - for (pos=0;pos<= in_size-block_size; pos+=block_size){ - fnam2_crypt( (int *) (key->data+key_pos), block_size,&num_block, *((int*)(in_ul+pos)) , - *((int*)(in_ul+pos+4)), *((int*)(in_ul+pos+8)),*((int*)(in_ul+pos+12)),out+pos); - /* key_pos+=block_size; - if(key_pos+block_size>=key->data_size) - key_pos=0;*/ - } - - if(pos<in_size){ - char * buf = (char*) calloc(1,block_size); - memcpy(buf,in_ul+pos, in_size-pos); - fnam2_crypt(( int *)(key->data+key_pos), block_size,&num_block, *((int*)(buf)) , - *((int*)(buf+4)), *((int*)(buf+8)),*((int*)(buf+12)),out+pos); - pos+=block_size; - } - return pos; -} - -void fnam2_crypt(int *key, int key_size, unsigned long *num_block, unsigned long b1,unsigned long b2,unsigned long b3,unsigned long b4, void * out ) -{ - int subkey,i,ip,im; - unsigned long Num=*num_block; - int r; - - for(r=0;r<key_size*4;r++) { - //Selecting the part of key for a concrete stage - i=r%key_size; - if(i==key_size) {ip=1;im=key_size-1;} - if(i==1) {ip=2;im=key_size;} - else {ip=i+1;im=i-1;} - - //Generating the subkey on the basis of nmber part of a key, - //number of the block in a file and number of a round - subkey=key[i]*r+(key[im]*Num+key[ip]); - - //F - function - b1+=(((b2>>16)^((b2<<25)+subkey))+(subkey*(~(b2<<7)))); - b1=~b1; - r++; - - i=r%key_size; - if(i==key_size) {ip=1;im=key_size-1;} - if(i==1) {ip=2;im=key_size;} - else {ip=i+1;im=i-1;} - subkey=key[i]*r+(key[im]*Num+key[ip]); - b2+=(((b3>>16)^((b3<<25)+subkey))+(subkey*(~(b3<<7)))); - b2=~b2; - r++; - - i=r%key_size; - if(i==key_size) {ip=1;im=key_size-1;} - if(i==1) {ip=2;im=key_size;} - else {ip=i+1;im=i-1;} - subkey=key[i]*r+(key[im]*Num+key[ip]); - b3+=(((b4>>16)^((b4<<25)+subkey))+(subkey*(~(b4<<7)))); - b3=~b3; - r++; - - i=r%key_size; - if(i==key_size) {ip=1;im=key_size-1;} - if(i==1) {ip=2;im=key_size;} - else {ip=i+1;im=i-1;} - subkey=key[i]*r+(key[im]*Num+key[ip]); - b4+=(((b1>>16)^((b1<<25)+subkey))+(subkey*(~(b1<<7)))); - b4=~b4; - } - Num++; - *num_block=Num; - ((unsigned char*)out)[0]=b1; - ((unsigned char*)out)[1]=b2; - ((unsigned char*)out)[2]=b3; - ((unsigned char*)out)[3]=b4; -} - -void fnam2_decrypt(int *key, int key_size, unsigned long *num_block, unsigned long b1,unsigned long b2,unsigned long b3,unsigned long b4, void * out ) -{ - int subkey,i,ip,im; - unsigned long Num=*num_block; - int r; - for(r=key_size*sizeof(int)-1;r>=0;r--){ - i=r%key_size; - if(i==key_size) {ip=1;im=key_size-1;} - if(i==1) {ip=2;im=key_size;} - else {ip=i+1;im=i-1;} - subkey=key[i]*r+(key[im]*Num+key[ip]); - b4=~b4; - b4-=(((b1>>16)^((b1<<25)+subkey))+(subkey*(~(b1<<7)))); - r--; - - i=r%key_size; - if(i==key_size) {ip=1;im=key_size-1;} - if(i==1) {ip=2;im=key_size;} - else {ip=i+1;im=i-1;} - subkey=key[i]*r+(key[im]*Num+key[ip]); - b3=~b3; - b3-=(((b4>>16)^((b4<<25)+subkey))+(subkey*(~(b4<<7)))); - r--; - - i=r%key_size; - if(i==key_size) {ip=1;im=key_size-1;} - if(i==1) {ip=2;im=key_size;} - else {ip=i+1;im=i-1;} - subkey=key[i]*r+(key[im]*Num+key[ip]); - b2=~b2; - b2-=(((b3>>16)^((b3<<25)+subkey))+(subkey*(~(b3<<7)))); - r--; - - i=r%key_size; - if(i==key_size) {ip=1;im=key_size-1;} - if(i==1) {ip=2;im=key_size;} - else {ip=i+1;im=i-1;} - subkey=key[i]*r+(key[im]*Num+key[ip]); - b1=~b1; - b1-=(((b2>>16)^((b2<<25)+subkey))+(subkey*(~(b2<<7)))); - } - Num++; - *num_block=Num; - ((unsigned char*)out)[0]=b1; - ((unsigned char*)out)[1]=b2; - ((unsigned char*)out)[2]=b3; - ((unsigned char*)out)[3]=b4; -} diff --git a/crypt/enc_key.h b/crypt/enc_key.h deleted file mode 100644 index 58954225520844828204595f7b1eca1965f0510b..0000000000000000000000000000000000000000 --- a/crypt/enc_key.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - Copyright (c) 2017-2018 (c) Project "DeM Labs Inc" https://github.com/demlabsinc - All rights reserved. - - This file is part of DAP (Deus Applications Prototypes) the open source project - - DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - DAP is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with any DAP based project. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef _ENC_KEY_H_ -#define _ENC_KEY_H_ - -#include "enc.h" - -#include <stddef.h> -#include "enc_key.h" -typedef enum enc_key_type{ENC_KEY_TYPE_FNAM2, ENC_KEY_TYPE_AES,ENC_KEY_RSA_SESSION} enc_key_type_t; - -struct enc_key; -typedef size_t (*enc_callback_t)(struct enc_key *, const void * , const size_t ,void *); - -typedef struct enc_key{ - unsigned char * data; - size_t data_size; - enc_key_type_t type; - - enc_callback_t enc; - enc_callback_t dec; - - void * internal; -} enc_key_t; - -extern enc_key_t *enc_key_new(size_t key_size,enc_key_type_t key_type); -extern enc_key_t *enc_key_generate(enc_data_type_t v_type, rsa_key_t* key_session_pair); -extern enc_key_t *enc_key_create(const char * key_input,enc_key_type_t v_type); -extern void enc_key_delete(enc_key_t * key); -extern rsa_key_t* enc_key_session_pair_create(const char* client_pub_key, u_int16_t key_len); - -#endif diff --git a/crypt/liboqs/common/common.c b/crypt/liboqs/common/common.c new file mode 100644 index 0000000000000000000000000000000000000000..420aae263f282eded000d967e437e6e311d8ef0c --- /dev/null +++ b/crypt/liboqs/common/common.c @@ -0,0 +1,28 @@ +#include <oqs/common.h> + +#include <string.h> + +#if defined(WINDOWS) +#include <windows.h> +#endif + +void OQS_MEM_cleanse(void *ptr, size_t len) { +#if defined(WINDOWS) + SecureZeroMemory(ptr, len); +#elif defined(HAVE_MEMSET_S) + if (0U < len && memset_s(ptr, (rsize_t) len, 0, (rsize_t) len) != 0) { + abort(); + } +#else + typedef void *(*memset_t)(void *, int, size_t); + static volatile memset_t memset_func = memset; + memset_func(ptr, 0, len); +#endif +} + +void OQS_MEM_secure_free(void *ptr, size_t len) { + if (ptr != NULL) { + OQS_MEM_cleanse(ptr, len); + free(ptr); + } +} diff --git a/crypt/liboqs/common/common.h b/crypt/liboqs/common/common.h new file mode 100644 index 0000000000000000000000000000000000000000..010d9324d7033aaadc5c5e1da7a3ea11ea6a1bcf --- /dev/null +++ b/crypt/liboqs/common/common.h @@ -0,0 +1,19 @@ +#ifndef __OQS_COMMON_H +#define __OQS_COMMON_H + +#include <stdlib.h> + +#define OQS_SUCCESS 1 +#define OQS_ERROR 0 + +void OQS_MEM_cleanse(void *ptr, size_t len); +void OQS_MEM_secure_free(void *ptr, size_t len); + +#if __ANDROID__ +//android workaround +#define eprintf(...) printf(__VA_ARGS__); +#else +#define eprintf(...) fprintf(stderr, __VA_ARGS__); +#endif + +#endif diff --git a/crypt/liboqs/config.h b/crypt/liboqs/config.h new file mode 100644 index 0000000000000000000000000000000000000000..4d6f08aa132037e9c2dc0af8175a802e5641233a --- /dev/null +++ b/crypt/liboqs/config.h @@ -0,0 +1,197 @@ +/* config.h. Generated from config.h.in by configure. */ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* "Define to 1 when MCBITS enabled" */ +/* #undef ENABLE_CODE_MCBITS */ + +/* "Define to 1 when FRODO enabled" */ +#define ENABLE_KEX_LWE_FRODO 1 + +/* "Define to 1 when KYBER enabled" */ +#define ENABLE_KEX_MLWE_KYBER 1 + +/* "Define to 1 when NTRU enabled" */ +#define ENABLE_KEX_NTRU 1 + +/* "Define to 1 when RLWE MSRLN16 enabled" */ +#define ENABLE_KEX_RLWE_MSRLN16 1 + +/* "Define to 1 when RLWE NEWHOPE enabled" */ +#define ENABLE_KEX_RLWE_NEWHOPE 1 + +/* "Define to 1 when SIDH CLN16 enabled" */ +#define ENABLE_KEX_SIDH_CLN16 1 + +/* "Define to 1 when SIDH IQC enabled" */ +/* #undef ENABLE_SIDH_IQC_REF */ + +/* GMP DIR used locally */ +/* #undef GMPDIR */ + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you have the <fcntl.h> header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the `gettimeofday' function. */ +#define HAVE_GETTIMEOFDAY 1 + +/* Define to 1 if you have the <inttypes.h> header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the <limits.h> header file. */ +#define HAVE_LIMITS_H 1 + +/* Define to 1 if your system has a GNU libc compatible `malloc' function, and + to 0 otherwise. */ +#define HAVE_MALLOC 1 + +/* Define to 1 if you have the `memmove' function. */ +#define HAVE_MEMMOVE 1 + +/* Define to 1 if you have the <memory.h> header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `memset' function. */ +#define HAVE_MEMSET 1 + +/* Define to 1 if you have the `pow' function. */ +/* #undef HAVE_POW */ + +/* Define to 1 if you have the `sqrt' function. */ +/* #undef HAVE_SQRT */ + +/* Define to 1 if you have the <stddef.h> header file. */ +#define HAVE_STDDEF_H 1 + +/* Define to 1 if you have the <stdint.h> header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the <stdlib.h> header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the `strdup' function. */ +#define HAVE_STRDUP 1 + +/* Define to 1 if you have the <strings.h> header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the <string.h> header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the <sys/time.h> header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the <sys/types.h> header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the <unistd.h> header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if the system has the type `_Bool'. */ +#define HAVE__BOOL 1 + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* M4RI DIR used locally */ +/* #undef M4RIDIR */ + +/* OPENSSL DIR used locally */ +/* #undef OPENSSLDIR */ + +/* Name of package */ +#define PACKAGE "liboqs" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "liboqs" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "liboqs 1.0.0" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "liboqs" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "1.0.0" + +/* The size of `size_t', as computed by sizeof. */ +#define SIZEOF_SIZE_T 8 + +/* SODIUM DIR used locally */ +/* #undef SODIUMDIR */ + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Version number of package */ +#define VERSION "1.0.0" + +/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>, + <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the + #define below would cause a syntax error. */ +/* #undef _UINT32_T */ + +/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>, + <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the + #define below would cause a syntax error. */ +/* #undef _UINT64_T */ + +/* Define for Solaris 2.5.1 so the uint8_t typedef from <sys/synch.h>, + <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the + #define below would cause a syntax error. */ +/* #undef _UINT8_T */ + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +/* #undef inline */ +#endif + +/* Define to the type of a signed integer type of width exactly 16 bits if + such a type exists and the standard includes do not define it. */ +/* #undef int16_t */ + +/* Define to the type of a signed integer type of width exactly 32 bits if + such a type exists and the standard includes do not define it. */ +/* #undef int32_t */ + +/* Define to the type of a signed integer type of width exactly 64 bits if + such a type exists and the standard includes do not define it. */ +/* #undef int64_t */ + +/* Define to the type of a signed integer type of width exactly 8 bits if such + a type exists and the standard includes do not define it. */ +/* #undef int8_t */ + +/* Define to rpl_malloc if the replacement function should be used. */ +/* #undef malloc */ + +/* Define to `unsigned int' if <sys/types.h> does not define. */ +/* #undef size_t */ + +/* Define to the type of an unsigned integer type of width exactly 16 bits if + such a type exists and the standard includes do not define it. */ +/* #undef uint16_t */ + +/* Define to the type of an unsigned integer type of width exactly 32 bits if + such a type exists and the standard includes do not define it. */ +/* #undef uint32_t */ + +/* Define to the type of an unsigned integer type of width exactly 64 bits if + such a type exists and the standard includes do not define it. */ +/* #undef uint64_t */ + +/* Define to the type of an unsigned integer type of width exactly 8 bits if + such a type exists and the standard includes do not define it. */ +/* #undef uint8_t */ diff --git a/crypt/liboqs/crypto/aes/Makefile.am b/crypt/liboqs/crypto/aes/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..1317fb2dc1fd50489dea0dd02e108d607ab8b3e1 --- /dev/null +++ b/crypt/liboqs/crypto/aes/Makefile.am @@ -0,0 +1,18 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = libaes.la + +libaes_la_SOURCES = aes.c + +libaes_la_CPPFLAGS = -I../../../include +if USE_OPENSSL +libaes_la_CPPFLAGS += -I$(OPENSSL_DIR)/include +endif + +if USE_AES_NI +libaes_la_CPPFLAGS += -maes -msse2 +libaes_la_SOURCES += aes_ni.c +endif + +libaes_la_SOURCES += aes_c.c +libaes_la_CPPFLAGS += $(AM_CPPFLAGS) + diff --git a/crypt/liboqs/crypto/aes/aes.c b/crypt/liboqs/crypto/aes/aes.c new file mode 100644 index 0000000000000000000000000000000000000000..c77a799ddc178c944eb79e060a5f175c31fec774 --- /dev/null +++ b/crypt/liboqs/crypto/aes/aes.c @@ -0,0 +1,194 @@ +#include <assert.h> + +#include "aes.h" +#include "aes_local.h" + +void OQS_AES128_load_schedule(const uint8_t *key, void **schedule, int for_encryption) { +#ifdef USE_OPENSSL + oqs_aes128_load_schedule_ossl(key, schedule, for_encryption); +#else + for_encryption++; // need some dummy operation to avoid unused parameter warning +#ifdef AES_ENABLE_NI + oqs_aes128_load_schedule_ni(key, schedule); +#else + oqs_aes128_load_schedule_c(key, schedule); +#endif +#endif +} + +void OQS_AES128_free_schedule(void *schedule) { +#ifdef USE_OPENSSL + oqs_aes128_free_schedule_ossl(schedule); +#else +#ifdef AES_ENABLE_NI + oqs_aes128_free_schedule_ni(schedule); +#else + oqs_aes128_free_schedule_c(schedule); +#endif +#endif +} + +void OQS_AES128_ECB_enc(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext) { +#ifdef USE_OPENSSL + oqs_aes128_ecb_enc_ossl(plaintext, plaintext_len, key, ciphertext); +#else +#ifdef AES_ENABLE_NI + oqs_aes128_ecb_enc_ni(plaintext, plaintext_len, key, ciphertext); +#else + oqs_aes128_ecb_enc_c(plaintext, plaintext_len, key, ciphertext); +#endif +#endif +} + +void OQS_AES128_ECB_dec(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext) { +#ifdef USE_OPENSSL + oqs_aes128_ecb_dec_ossl(ciphertext, ciphertext_len, key, plaintext); +#else +#ifdef AES_ENABLE_NI + oqs_aes128_ecb_dec_ni(ciphertext, ciphertext_len, key, plaintext); +#else + oqs_aes128_ecb_dec_c(ciphertext, ciphertext_len, key, plaintext); +#endif +#endif +} + +void OQS_AES128_ECB_enc_sch(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext) { +#ifdef USE_OPENSSL + oqs_aes128_ecb_enc_sch_ossl(plaintext, plaintext_len, schedule, ciphertext); +#else +#ifdef AES_ENABLE_NI + oqs_aes128_ecb_enc_sch_ni(plaintext, plaintext_len, schedule, ciphertext); +#else + oqs_aes128_ecb_enc_sch_c(plaintext, plaintext_len, schedule, ciphertext); +#endif +#endif +} + +void OQS_AES128_ECB_dec_sch(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext) { +#ifdef USE_OPENSSL + oqs_aes128_ecb_dec_sch_ossl(ciphertext, ciphertext_len, schedule, plaintext); +#else +#ifdef AES_ENABLE_NI + oqs_aes128_ecb_dec_sch_ni(ciphertext, ciphertext_len, schedule, plaintext); +#else + oqs_aes128_ecb_dec_sch_c(ciphertext, ciphertext_len, schedule, plaintext); +#endif +#endif +} + +#ifdef AES_ENABLE_NI +inline void oqs_aes128_ecb_enc_ni(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext) { + void *schedule = NULL; + oqs_aes128_load_schedule_ni(key, &schedule); + oqs_aes128_ecb_enc_sch_ni(plaintext, plaintext_len, schedule, ciphertext); + oqs_aes128_free_schedule_ni(schedule); +} +#endif + +inline void oqs_aes128_ecb_enc_c(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext) { + void *schedule = NULL; + oqs_aes128_load_schedule_c(key, &schedule); + oqs_aes128_ecb_enc_sch_c(plaintext, plaintext_len, schedule, ciphertext); + oqs_aes128_free_schedule_c(schedule); +} + +#ifdef AES_ENABLE_NI +inline void oqs_aes128_ecb_enc_sch_ni(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext) { + assert(plaintext_len % 16 == 0); + for (size_t block = 0; block < plaintext_len / 16; block++) { + oqs_aes128_enc_ni(plaintext + (16 * block), schedule, ciphertext + (16 * block)); + } +} +#endif + +inline void oqs_aes128_ecb_enc_sch_c(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext) { + assert(plaintext_len % 16 == 0); + for (size_t block = 0; block < plaintext_len / 16; block++) { + oqs_aes128_enc_c(plaintext + (16 * block), schedule, ciphertext + (16 * block)); + } +} + +#ifdef AES_ENABLE_NI +inline void oqs_aes128_ecb_dec_ni(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext) { + void *schedule = NULL; + oqs_aes128_load_schedule_ni(key, &schedule); + oqs_aes128_ecb_dec_sch_ni(ciphertext, ciphertext_len, schedule, plaintext); + oqs_aes128_free_schedule_ni(schedule); +} +#endif + +inline void oqs_aes128_ecb_dec_c(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext) { + void *schedule = NULL; + oqs_aes128_load_schedule_c(key, &schedule); + oqs_aes128_ecb_dec_sch_c(ciphertext, ciphertext_len, schedule, plaintext); + oqs_aes128_free_schedule_c(schedule); +} + +#ifdef AES_ENABLE_NI +inline void oqs_aes128_ecb_dec_sch_ni(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext) { + assert(ciphertext_len % 16 == 0); + for (size_t block = 0; block < ciphertext_len / 16; block++) { + oqs_aes128_dec_ni(ciphertext + (16 * block), schedule, plaintext + (16 * block)); + } +} +#endif + +inline void oqs_aes128_ecb_dec_sch_c(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext) { + assert(ciphertext_len % 16 == 0); + for (size_t block = 0; block < ciphertext_len / 16; block++) { + oqs_aes128_dec_c(ciphertext + (16 * block), schedule, plaintext + (16 * block)); + } +} + +#ifdef USE_OPENSSL +#include <openssl/evp.h> + +inline void oqs_aes128_load_schedule_ossl(const uint8_t *key, void **schedule, int for_encryption) { + EVP_CIPHER_CTX *aes_ctx = EVP_CIPHER_CTX_new(); + assert(aes_ctx != NULL); + if (for_encryption) { + assert(1 == EVP_EncryptInit_ex(aes_ctx, EVP_aes_128_ecb(), NULL, key, NULL)); + } else { + assert(1 == EVP_DecryptInit_ex(aes_ctx, EVP_aes_128_ecb(), NULL, key, NULL)); + } + EVP_CIPHER_CTX_set_padding(aes_ctx, 0); + *schedule = aes_ctx; +} + +inline void oqs_aes128_free_schedule_ossl(void *schedule) { + if (schedule != NULL) { + EVP_CIPHER_CTX_free((EVP_CIPHER_CTX *) schedule); + } +} + +inline void oqs_aes128_ecb_enc_ossl(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext) { + void *schedule = NULL; + oqs_aes128_load_schedule_ossl(key, &schedule, 1); + oqs_aes128_ecb_enc_sch_ossl(plaintext, plaintext_len, schedule, ciphertext); + oqs_aes128_free_schedule_ossl(schedule); +} + +inline void oqs_aes128_ecb_dec_ossl(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext) { + void *schedule = NULL; + oqs_aes128_load_schedule_ossl(key, &schedule, 0); + oqs_aes128_ecb_dec_sch_ossl(ciphertext, ciphertext_len, schedule, plaintext); + oqs_aes128_free_schedule_ossl(schedule); +} + +inline void oqs_aes128_ecb_enc_sch_ossl(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext) { + assert(plaintext_len % 16 == 0); + int outlen; + assert(1 == EVP_EncryptUpdate((EVP_CIPHER_CTX *) schedule, ciphertext, &outlen, plaintext, plaintext_len)); + assert((size_t) outlen == plaintext_len); + assert(1 == EVP_EncryptFinal_ex((EVP_CIPHER_CTX *) schedule, ciphertext, &outlen)); +} + +inline void oqs_aes128_ecb_dec_sch_ossl(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext) { + assert(ciphertext_len % 16 == 0); + int outlen; + assert(1 == EVP_DecryptUpdate((EVP_CIPHER_CTX *) schedule, plaintext, &outlen, ciphertext, ciphertext_len)); + assert((size_t) outlen == ciphertext_len); + assert(1 == EVP_DecryptFinal_ex((EVP_CIPHER_CTX *) schedule, plaintext, &outlen)); +} + +#endif diff --git a/crypt/liboqs/crypto/aes/aes.h b/crypt/liboqs/crypto/aes/aes.h new file mode 100644 index 0000000000000000000000000000000000000000..f90574243aa0aeb8a5c0292d53a4f1a4801ebaca --- /dev/null +++ b/crypt/liboqs/crypto/aes/aes.h @@ -0,0 +1,66 @@ +/** + * \file aes.h + * \brief Header defining the API for OQS AES + */ + +#ifndef __OQS_AES_H +#define __OQS_AES_H + +#include <stdint.h> +#include <stdlib.h> + +/** + * Function to fill a key schedule given an initial key. + * + * @param key Initial Key. + * @param schedule Abstract data structure for a key schedule. + * @param forEncryption 1 if key schedule is for encryption, 0 if for decryption. + */ +void OQS_AES128_load_schedule(const uint8_t *key, void **schedule, int for_encryption); + +/** + * Function to free a key schedule. + * + * @param schedule Schedule generated with OQS_AES128_load_schedule(). + */ +void OQS_AES128_free_schedule(void *schedule); + +/** + * Function to encrypt blocks of plaintext using ECB mode. + * A schedule based on the key is generated and used internally. + * + * @param plaintext Plaintext to be encrypted. + * @param plaintext_len Length on the plaintext in bytes. Must be a multiple of 16. + * @param key Key to be used for encryption. + * @param ciphertext Pointer to a block of memory which >= in size to the plaintext block. The result will be written here. + */ +void OQS_AES128_ECB_enc(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext); + +/** + * Function to decrypt blocks of plaintext using ECB mode. + * A schedule based on the key is generated and used internally. + * + * @param ciphertext Ciphertext to be decrypted. + * @param ciphertext_len Length on the ciphertext in bytes. Must be a multiple of 16. + * @param key Key to be used for encryption. + * @param ciphertext Pointer to a block of memory which >= in size to the ciphertext block. The result will be written here. + */ +void OQS_AES128_ECB_dec(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext); + +/** + * Same as OQS_AES128_ECB_enc() except a schedule generated by + * OQS_AES128_load_schedule() is passed rather then a key. This is faster + * if the same schedule is used for multiple encryptions since it does + * not have to be regenerated from the key. + */ +void OQS_AES128_ECB_enc_sch(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext); + +/** + * Same as OQS_AES128_ECB_dec() except a schedule generated by + * OQS_AES128_load_schedule() is passed rather then a key. This is faster + * if the same schedule is used for multiple encryptions since it does + * not have to be regenerated from the key. + */ +void OQS_AES128_ECB_dec_sch(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext); + +#endif diff --git a/crypt/liboqs/crypto/aes/aes_c.c b/crypt/liboqs/crypto/aes/aes_c.c new file mode 100644 index 0000000000000000000000000000000000000000..553b4d1d7b622bfd638977cad7e40073f534c920 --- /dev/null +++ b/crypt/liboqs/crypto/aes/aes_c.c @@ -0,0 +1,340 @@ +// Simple, thoroughly commented implementation of 128-bit AES / Rijndael using C +// Chris Hulbert - chris.hulbert@gmail.com - http://splinter.com.au/blog +// References: +// http://en.wikipedia.org/wiki/Advanced_Encryption_Standard +// http://en.wikipedia.org/wiki/Rijndael_key_schedule +// http://en.wikipedia.org/wiki/Rijndael_mix_columns +// http://en.wikipedia.org/wiki/Rijndael_S-box +// This code is public domain, or any OSI-approved license, your choice. No warranty. + +#include <assert.h> +#include <stdio.h> +#include <string.h> + +#include "aes.h" + +typedef unsigned char byte; + +// Here are all the lookup tables for the row shifts, rcon, s-boxes, and galois field multiplications +static const byte shift_rows_table[] = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11}; +static const byte shift_rows_table_inv[] = {0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3}; +static const byte lookup_rcon[] = { + 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a}; +static const byte lookup_sbox[] = { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16}; +static const byte lookup_sbox_inv[] = { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d}; +static const byte lookup_g2[] = { + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, + 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, + 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, + 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, + 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, + 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, + 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, + 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, + 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05, + 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25, + 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, + 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, + 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85, + 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, + 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, + 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5}; +static const byte lookup_g3[] = { + 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, + 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, + 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, + 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, + 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, + 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, + 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, + 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, + 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a, + 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba, + 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, + 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda, + 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a, + 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, + 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a, + 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a}; +static const byte lookup_g9[] = { + 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, + 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, + 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, + 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc, + 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, + 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, + 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a, + 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, + 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b, + 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b, + 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, + 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, + 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed, + 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, + 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, + 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46}; +static const byte lookup_g11[] = { + 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, + 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, + 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, + 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2, + 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, + 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, + 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4, + 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, + 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e, + 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e, + 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, + 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, + 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, + 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, + 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, + 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3}; +static const byte lookup_g13[] = { + 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, + 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, + 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, + 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20, + 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, + 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, + 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d, + 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, + 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, + 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, + 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, + 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, + 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, + 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, + 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, + 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97}; +static const byte lookup_g14[] = { + 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, + 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, + 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, + 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61, + 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, + 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, + 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c, + 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, + 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b, + 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb, + 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, + 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, + 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6, + 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, + 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, + 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d}; + +// Xor's all elements in a n byte array a by b +static void xor (byte * a, const byte *b, int n) { + int i; + for (i = 0; i < n; i++) { + a[i] ^= b[i]; + } +} + + // Xor the current cipher state by a specific round key + static void xor_round_key(byte *state, const byte *keys, int round) { + xor(state, keys + round * 16, 16); +} + +// Apply the rijndael s-box to all elements in an array +// http://en.wikipedia.org/wiki/Rijndael_S-box +static void sub_bytes(byte *a, int n) { + int i; + for (i = 0; i < n; i++) { + a[i] = lookup_sbox[a[i]]; + } +} +static void sub_bytes_inv(byte *a, int n) { + int i; + for (i = 0; i < n; i++) { + a[i] = lookup_sbox_inv[a[i]]; + } +} + +// Perform the core key schedule transform on 4 bytes, as part of the key expansion process +// http://en.wikipedia.org/wiki/Rijndael_key_schedule#Key_schedule_core +static void key_schedule_core(byte *a, int i) { + byte temp = a[0]; // Rotate the output eight bits to the left + a[0] = a[1]; + a[1] = a[2]; + a[2] = a[3]; + a[3] = temp; + sub_bytes(a, 4); // Apply Rijndael's S-box on all four individual bytes in the output word + a[0] ^= lookup_rcon[i]; // On just the first (leftmost) byte of the output word, perform the rcon operation with i + // as the input, and exclusive or the rcon output with the first byte of the output word +} + +// Expand the 16-byte key to 11 round keys (176 bytes) +// http://en.wikipedia.org/wiki/Rijndael_key_schedule#The_key_schedule +void oqs_aes128_load_schedule_c(const uint8_t *key, void **_schedule) { + *_schedule = malloc(16 * 11); + assert(*_schedule != NULL); + uint8_t *schedule = (uint8_t *) *_schedule; + int bytes = 16; // The count of how many bytes we've created so far + int i = 1; // The rcon iteration value i is set to 1 + int j; // For repeating the second stage 3 times + byte t[4]; // Temporary working area known as 't' in the Wiki article + memcpy(schedule, key, 16); // The first 16 bytes of the expanded key are simply the encryption key + + while (bytes < 176) { // Until we have 176 bytes of expanded key, we do the following: + memcpy(t, schedule + bytes - 4, 4); // We assign the value of the previous four bytes in the expanded key to t + key_schedule_core(t, i); // We perform the key schedule core on t, with i as the rcon iteration value + i++; // We increment i by 1 + xor(t, schedule + bytes - 16, 4); // We exclusive-or t with the four-byte block 16 bytes before the new expanded key. + memcpy(schedule + bytes, t, 4); // This becomes the next 4 bytes in the expanded key + bytes += 4; // Keep track of how many expanded key bytes we've added + + // We then do the following three times to create the next twelve bytes + for (j = 0; j < 3; j++) { + memcpy(t, schedule + bytes - 4, 4); // We assign the value of the previous 4 bytes in the expanded key to t + xor(t, schedule + bytes - 16, 4); // We exclusive-or t with the four-byte block n bytes before + memcpy(schedule + bytes, t, 4); // This becomes the next 4 bytes in the expanded key + bytes += 4; // Keep track of how many expanded key bytes we've added + } + } +} + +void oqs_aes128_free_schedule_c(void *schedule) { + if (schedule != NULL) { + free(schedule); + } +} + +// Apply the shift rows step on the 16 byte cipher state +// http://en.wikipedia.org/wiki/Advanced_Encryption_Standard#The_ShiftRows_step +static void shift_rows(byte *state) { + int i; + byte temp[16]; + memcpy(temp, state, 16); + for (i = 0; i < 16; i++) { + state[i] = temp[shift_rows_table[i]]; + } +} +static void shift_rows_inv(byte *state) { + int i; + byte temp[16]; + memcpy(temp, state, 16); + for (i = 0; i < 16; i++) { + state[i] = temp[shift_rows_table_inv[i]]; + } +} + +// Perform the mix columns matrix on one column of 4 bytes +// http://en.wikipedia.org/wiki/Rijndael_mix_columns +static void mix_col(byte *state) { + byte a0 = state[0]; + byte a1 = state[1]; + byte a2 = state[2]; + byte a3 = state[3]; + state[0] = lookup_g2[a0] ^ lookup_g3[a1] ^ a2 ^ a3; + state[1] = lookup_g2[a1] ^ lookup_g3[a2] ^ a3 ^ a0; + state[2] = lookup_g2[a2] ^ lookup_g3[a3] ^ a0 ^ a1; + state[3] = lookup_g2[a3] ^ lookup_g3[a0] ^ a1 ^ a2; +} + +// Perform the mix columns matrix on each column of the 16 bytes +static void mix_cols(byte *state) { + mix_col(state); + mix_col(state + 4); + mix_col(state + 8); + mix_col(state + 12); +} + +// Perform the inverse mix columns matrix on one column of 4 bytes +// http://en.wikipedia.org/wiki/Rijndael_mix_columns +static void mix_col_inv(byte *state) { + byte a0 = state[0]; + byte a1 = state[1]; + byte a2 = state[2]; + byte a3 = state[3]; + state[0] = lookup_g14[a0] ^ lookup_g9[a3] ^ lookup_g13[a2] ^ lookup_g11[a1]; + state[1] = lookup_g14[a1] ^ lookup_g9[a0] ^ lookup_g13[a3] ^ lookup_g11[a2]; + state[2] = lookup_g14[a2] ^ lookup_g9[a1] ^ lookup_g13[a0] ^ lookup_g11[a3]; + state[3] = lookup_g14[a3] ^ lookup_g9[a2] ^ lookup_g13[a1] ^ lookup_g11[a0]; +} + +// Perform the inverse mix columns matrix on each column of the 16 bytes +static void mix_cols_inv(byte *state) { + mix_col_inv(state); + mix_col_inv(state + 4); + mix_col_inv(state + 8); + mix_col_inv(state + 12); +} + +void oqs_aes128_enc_c(const uint8_t *plaintext, const void *_schedule, uint8_t *ciphertext) { + const uint8_t *schedule = (const uint8_t *) _schedule; + int i; // To count the rounds + + // First Round + memcpy(ciphertext, plaintext, 16); + xor_round_key(ciphertext, schedule, 0); + + // Middle rounds + for (i = 0; i < 9; i++) { + sub_bytes(ciphertext, 16); + shift_rows(ciphertext); + mix_cols(ciphertext); + xor_round_key(ciphertext, schedule, i + 1); + } + + // Final Round + sub_bytes(ciphertext, 16); + shift_rows(ciphertext); + xor_round_key(ciphertext, schedule, 10); +} + +void oqs_aes128_dec_c(const uint8_t *ciphertext, const void *_schedule, uint8_t *plaintext) { + const uint8_t *schedule = (const uint8_t *) _schedule; + int i; // To count the rounds + + // Reverse the final Round + memcpy(plaintext, ciphertext, 16); + xor_round_key(plaintext, schedule, 10); + shift_rows_inv(plaintext); + sub_bytes_inv(plaintext, 16); + + // Reverse the middle rounds + for (i = 0; i < 9; i++) { + xor_round_key(plaintext, schedule, 9 - i); + mix_cols_inv(plaintext); + shift_rows_inv(plaintext); + sub_bytes_inv(plaintext, 16); + } + + // Reverse the first Round + xor_round_key(plaintext, schedule, 0); +} diff --git a/crypt/liboqs/crypto/aes/aes_local.h b/crypt/liboqs/crypto/aes/aes_local.h new file mode 100644 index 0000000000000000000000000000000000000000..0b226c7845fc7a0016596f10677bf8c835a0b932 --- /dev/null +++ b/crypt/liboqs/crypto/aes/aes_local.h @@ -0,0 +1,39 @@ +/** + * \file aes_local.h + * \brief Header defining additional internal functions for OQS AES + */ + +#ifndef __OQS_AES_LOCAL_H +#define __OQS_AES_LOCAL_H + +#include <stdint.h> +#include <stdlib.h> + +void oqs_aes128_load_schedule_ni(const uint8_t *key, void **schedule); +void oqs_aes128_free_schedule_ni(void *schedule); +void oqs_aes128_enc_ni(const uint8_t *plaintext, const void *schedule, uint8_t *ciphertext); +void oqs_aes128_dec_ni(const uint8_t *ciphertext, const void *schedule, uint8_t *plaintext); +void oqs_aes128_ecb_enc_ni(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext); +void oqs_aes128_ecb_dec_ni(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext); +void oqs_aes128_ecb_enc_sch_ni(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext); +void oqs_aes128_ecb_dec_sch_ni(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext); + +void oqs_aes128_load_schedule_c(const uint8_t *key, void **schedule); +void oqs_aes128_free_schedule_c(void *schedule); +void oqs_aes128_enc_c(const uint8_t *plaintext, const void *schedule, uint8_t *ciphertext); +void oqs_aes128_dec_c(const uint8_t *ciphertext, const void *schedule, uint8_t *plaintext); +void oqs_aes128_ecb_enc_c(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext); +void oqs_aes128_ecb_dec_c(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext); +void oqs_aes128_ecb_enc_sch_c(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext); +void oqs_aes128_ecb_dec_sch_c(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext); + +#ifdef USE_OPENSSL +void oqs_aes128_load_schedule_ossl(const uint8_t *key, void **schedule, int for_encryption); +void oqs_aes128_free_schedule_ossl(void *schedule); +void oqs_aes128_ecb_enc_ossl(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext); +void oqs_aes128_ecb_dec_ossl(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext); +void oqs_aes128_ecb_enc_sch_ossl(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext); +void oqs_aes128_ecb_dec_sch_ossl(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext); +#endif + +#endif diff --git a/crypt/liboqs/crypto/aes/aes_ni.c b/crypt/liboqs/crypto/aes/aes_ni.c new file mode 100644 index 0000000000000000000000000000000000000000..86eec3b7e700d63b20ba0ff29f202f70754e4fdb --- /dev/null +++ b/crypt/liboqs/crypto/aes/aes_ni.c @@ -0,0 +1,100 @@ +#if defined(WINDOWS) +#define UNUSED +// __attribute__ not supported in VS, is there something else I should define? +#else +#define UNUSED __attribute__((unused)) +#endif + +#include <assert.h> +#include <stdint.h> +#include <stdio.h> + +#ifndef AES_ENABLE_NI +#include <assert.h> +void oqs_aes128_load_schedule_ni(UNUSED const uint8_t *key, UNUSED void **_schedule) { + assert(0); +} +void oqs_aes128_free_schedule_ni(UNUSED void *_schedule) { + assert(0); +} +void oqs_aes128_enc_ni(UNUSED const uint8_t *plaintext, UNUSED const void *_schedule, UNUSED uint8_t *ciphertext) { + assert(0); +} +void oqs_aes128_dec_ni(UNUSED const uint8_t *ciphertext, UNUSED const void *_schedule, UNUSED uint8_t *plaintext) { + assert(0); +} +#else + +#include <wmmintrin.h> + +static __m128i key_expand(__m128i key, __m128i keygened) { + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + // The last 4 bytes from aeskeygenassist store the values we want so + // and they need to be xored all four sets of bytes in the result so + keygened = _mm_shuffle_epi32(keygened, _MM_SHUFFLE(3, 3, 3, 3)); + return _mm_xor_si128(key, keygened); +} + +//This is needed since the rcon argument to _mm_aeskeygenassist_si128 +//must be a compile time constaint + +#define key_exp(k, rcon) key_expand(k, _mm_aeskeygenassist_si128(k, rcon)) + +void oqs_aes128_load_schedule_ni(const uint8_t *key, void **_schedule) { + *_schedule = malloc(20 * 16); + assert(*_schedule != NULL); + __m128i *schedule = (__m128i *) *_schedule; + schedule[0] = _mm_loadu_si128((const __m128i *) key); + schedule[1] = key_exp(schedule[0], 0x01); + schedule[2] = key_exp(schedule[1], 0x02); + schedule[3] = key_exp(schedule[2], 0x04); + schedule[4] = key_exp(schedule[3], 0x08); + schedule[5] = key_exp(schedule[4], 0x10); + schedule[6] = key_exp(schedule[5], 0x20); + schedule[7] = key_exp(schedule[6], 0x40); + schedule[8] = key_exp(schedule[7], 0x80); + schedule[9] = key_exp(schedule[8], 0x1b); + schedule[10] = key_exp(schedule[9], 0x36); + // generate decryption keys in reverse order. + // schedule[10] is shared by last encryption and first decryption rounds + // schedule[0] is shared by first encryption round and last decryption round + for (size_t i = 0; i < 9; i++) { + schedule[11 + i] = _mm_aesimc_si128(schedule[9 - i]); + } +} + +void oqs_aes128_free_schedule_ni(void *schedule) { + if (schedule != NULL) { + free(schedule); + } +} + +void oqs_aes128_enc_ni(const uint8_t *plaintext, const void *_schedule, uint8_t *ciphertext) { + __m128i *schedule = (__m128i *) _schedule; + __m128i m = _mm_loadu_si128((__m128i *) plaintext); + + m = _mm_xor_si128(m, schedule[0]); + for (size_t i = 1; i < 10; i++) { + m = _mm_aesenc_si128(m, schedule[i]); + } + m = _mm_aesenclast_si128(m, schedule[10]); + + _mm_storeu_si128((__m128i *) ciphertext, m); +} + +void oqs_aes128_dec_ni(const uint8_t *ciphertext, const void *_schedule, uint8_t *plaintext) { + __m128i *schedule = (__m128i *) _schedule; + __m128i m = _mm_loadu_si128((__m128i *) ciphertext); + + m = _mm_xor_si128(m, schedule[10]); + for (size_t i = 1; i < 10; i++) { + m = _mm_aesdec_si128(m, schedule[10 + i]); + } + m = _mm_aesdeclast_si128(m, schedule[0]); + + _mm_storeu_si128((__m128i *) plaintext, m); +} + +#endif diff --git a/crypt/liboqs/crypto/aes/test_aes.c b/crypt/liboqs/crypto/aes/test_aes.c new file mode 100644 index 0000000000000000000000000000000000000000..62db255bfee1ca418fb6d77bb5fff162bcc016b3 --- /dev/null +++ b/crypt/liboqs/crypto/aes/test_aes.c @@ -0,0 +1,276 @@ +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <oqs/rand.h> + +#include "../../ds_benchmark.h" +#include "../../common/common.h" +#include "aes.h" +#include "aes_local.h" + +#define BENCH_DURATION 1 + +#define TEST_ITERATIONS 100 + +#define TEST_REPEATEDLY(x) \ + for (int i = 0; i < TEST_ITERATIONS; i++) { \ + int ok = (x); \ + if (ok != EXIT_SUCCESS) { \ + eprintf("Failure in %s (iteration %d)\n", #x, i); \ + return EXIT_FAILURE; \ + } \ + } + +static void print_bytes(uint8_t *bytes, size_t num_bytes) { + for (size_t i = 0; i < num_bytes; i++) { + printf("%02x", (unsigned) bytes[i]); + } +} + +static int test_aes128_correctness_c(OQS_RAND *rand) { + uint8_t key[16], plaintext[16], ciphertext[16], decrypted[16]; + void *schedule = NULL; + OQS_RAND_n(rand, key, 16); + OQS_RAND_n(rand, plaintext, 16); + oqs_aes128_load_schedule_c(key, &schedule); + oqs_aes128_enc_c(plaintext, schedule, ciphertext); + oqs_aes128_dec_c(ciphertext, schedule, decrypted); + oqs_aes128_free_schedule_c(schedule); + if (memcmp(plaintext, decrypted, 16) == 0) { + return EXIT_SUCCESS; + } else { + print_bytes(plaintext, 16); + printf("\n"); + print_bytes(decrypted, 16); + printf("\n"); + return EXIT_FAILURE; + } +} + +#ifdef AES_ENABLE_NI +static int test_aes128_correctness_ni(OQS_RAND *rand) { + uint8_t key[16], plaintext[16], ciphertext[16], decrypted[16]; + void *schedule = NULL; + OQS_RAND_n(rand, key, 16); + OQS_RAND_n(rand, plaintext, 16); + oqs_aes128_load_schedule_ni(key, &schedule); + oqs_aes128_enc_ni(plaintext, schedule, ciphertext); + oqs_aes128_dec_ni(ciphertext, schedule, decrypted); + oqs_aes128_free_schedule_ni(schedule); + if (memcmp(plaintext, decrypted, 16) == 0) { + return EXIT_SUCCESS; + } else { + print_bytes(plaintext, 16); + printf("\n"); + print_bytes(decrypted, 16); + printf("\n"); + return EXIT_FAILURE; + } +} + +static int test_aes128_c_equals_ni(OQS_RAND *rand) { + uint8_t key[16], plaintext[16], ciphertext_c[16], ciphertext_ni[16]; + void *schedule_c = NULL, *schedule_ni = NULL; + OQS_RAND_n(rand, key, 16); + OQS_RAND_n(rand, plaintext, 16); + oqs_aes128_load_schedule_c(key, &schedule_c); + oqs_aes128_load_schedule_ni(key, &schedule_ni); + oqs_aes128_enc_c(plaintext, schedule_c, ciphertext_c); + oqs_aes128_enc_ni(plaintext, schedule_ni, ciphertext_ni); + oqs_aes128_free_schedule_c(schedule_c); + oqs_aes128_free_schedule_ni(schedule_ni); + if (memcmp(ciphertext_c, ciphertext_ni, 16) == 0) { + return EXIT_SUCCESS; + } else { + print_bytes(ciphertext_c, 16); + printf("\n"); + print_bytes(ciphertext_ni, 16); + printf("\n"); + return EXIT_FAILURE; + } +} + +static int test_aes128_ecb_correctness_ni(OQS_RAND *rand) { + uint8_t key[16], plaintext[320], ciphertext[320], decrypted[320]; + void *schedule = NULL; + OQS_RAND_n(rand, key, 16); + OQS_RAND_n(rand, plaintext, 320); + oqs_aes128_load_schedule_ni(key, &schedule); + oqs_aes128_ecb_enc_ni(plaintext, 320, schedule, ciphertext); + oqs_aes128_ecb_dec_ni(ciphertext, 320, schedule, decrypted); + oqs_aes128_free_schedule_ni(schedule); + if (memcmp(plaintext, decrypted, 320) == 0) { + return EXIT_SUCCESS; + } else { + print_bytes(plaintext, 320); + printf("\n"); + print_bytes(decrypted, 320); + printf("\n"); + return EXIT_FAILURE; + } +} +#endif + +static int test_aes128_ecb_correctness_c(OQS_RAND *rand) { + uint8_t key[16], plaintext[320], ciphertext[320], decrypted[320]; + void *schedule = NULL; + OQS_RAND_n(rand, key, 16); + OQS_RAND_n(rand, plaintext, 320); + oqs_aes128_load_schedule_c(key, &schedule); + oqs_aes128_ecb_enc_c(plaintext, 320, schedule, ciphertext); + oqs_aes128_ecb_dec_c(ciphertext, 320, schedule, decrypted); + oqs_aes128_free_schedule_c(schedule); + if (memcmp(plaintext, decrypted, 320) == 0) { + return EXIT_SUCCESS; + } else { + print_bytes(plaintext, 320); + printf("\n"); + print_bytes(decrypted, 320); + printf("\n"); + return EXIT_FAILURE; + } +} + +#ifdef USE_OPENSSL +static int test_aes128_ecb_correctness_ossl(OQS_RAND *rand) { + uint8_t key[16], plaintext[320], ciphertext[320], decrypted[320]; + void *schedule = NULL; + OQS_RAND_n(rand, key, 16); + OQS_RAND_n(rand, plaintext, 320); + oqs_aes128_load_schedule_ossl(key, &schedule, 1); + oqs_aes128_ecb_enc_ossl(plaintext, 320, schedule, ciphertext); + oqs_aes128_free_schedule_ossl(schedule); + oqs_aes128_load_schedule_ossl(key, &schedule, 0); + oqs_aes128_ecb_dec_ossl(ciphertext, 320, schedule, decrypted); + oqs_aes128_free_schedule_ossl(schedule); + if (memcmp(plaintext, decrypted, 320) == 0) { + return EXIT_SUCCESS; + } else { + print_bytes(plaintext, 320); + printf("\n"); + print_bytes(decrypted, 320); + printf("\n"); + return EXIT_FAILURE; + } +} +#endif + +static void speed_aes128_c(OQS_RAND *rand) { + uint8_t key[16], plaintext[320], ciphertext[320], decrypted[320]; + void *schedule = NULL; + OQS_RAND_n(rand, key, 16); + OQS_RAND_n(rand, plaintext, 320); + TIME_OPERATION_SECONDS({ oqs_aes128_load_schedule_c(key, &schedule); oqs_aes128_free_schedule_c(schedule); }, "oqs_aes128_load_schedule_c", BENCH_DURATION); + + oqs_aes128_load_schedule_c(key, &schedule); + TIME_OPERATION_SECONDS(oqs_aes128_enc_c(plaintext, schedule, ciphertext), "oqs_aes128_enc_c", BENCH_DURATION); + TIME_OPERATION_SECONDS(oqs_aes128_dec_c(ciphertext, schedule, decrypted), "oqs_aes128_dec_c", BENCH_DURATION); + TIME_OPERATION_SECONDS(oqs_aes128_ecb_enc_c(plaintext, 320, key, ciphertext), "oqs_aes128_ecb_enc_c", BENCH_DURATION); + TIME_OPERATION_SECONDS(oqs_aes128_ecb_dec_c(ciphertext, 320, key, decrypted), "oqs_aes128_ecb_dec_c", BENCH_DURATION); + TIME_OPERATION_SECONDS(oqs_aes128_ecb_enc_sch_c(plaintext, 320, schedule, ciphertext), "oqs_aes128_ecb_enc_sch_c", BENCH_DURATION); + TIME_OPERATION_SECONDS(oqs_aes128_ecb_dec_sch_c(ciphertext, 320, schedule, decrypted), "oqs_aes128_ecb_dec_sch_c", BENCH_DURATION); + oqs_aes128_free_schedule_c(schedule); +} + +#ifdef AES_ENABLE_NI + +static void speed_aes128_ni(OQS_RAND *rand) { + uint8_t key[16], plaintext[320], ciphertext[320], decrypted[320]; + void *schedule = NULL; + OQS_RAND_n(rand, key, 16); + OQS_RAND_n(rand, plaintext, 320); + TIME_OPERATION_SECONDS({ oqs_aes128_load_schedule_ni(key, &schedule); oqs_aes128_free_schedule_ni(schedule); }, "oqs_aes128_load_schedule_ni", BENCH_DURATION); + + oqs_aes128_load_schedule_ni(key, &schedule); + TIME_OPERATION_SECONDS(oqs_aes128_enc_ni(plaintext, schedule, ciphertext), "oqs_aes128_enc_ni", BENCH_DURATION); + TIME_OPERATION_SECONDS(oqs_aes128_dec_ni(ciphertext, schedule, decrypted), "oqs_aes128_dec_ni", BENCH_DURATION); + TIME_OPERATION_SECONDS(oqs_aes128_ecb_enc_ni(plaintext, 320, key, ciphertext), "oqs_aes128_ecb_enc_ni", BENCH_DURATION); + TIME_OPERATION_SECONDS(oqs_aes128_ecb_dec_ni(ciphertext, 320, key, decrypted), "oqs_aes128_ecb_dec_ni", BENCH_DURATION); + TIME_OPERATION_SECONDS(oqs_aes128_ecb_enc_sch_ni(plaintext, 320, schedule, ciphertext), "oqs_aes128_ecb_enc_sch_ni", BENCH_DURATION); + TIME_OPERATION_SECONDS(oqs_aes128_ecb_dec_sch_ni(ciphertext, 320, schedule, decrypted), "oqs_aes128_ecb_dec_sch_ni", BENCH_DURATION); + oqs_aes128_free_schedule_ni(schedule); +} +#endif + +#ifdef USE_OPENSSL +static void speed_aes128_ossl(OQS_RAND *rand) { + uint8_t key[16], plaintext[320], ciphertext[320]; + void *schedule = NULL; + OQS_RAND_n(rand, key, 16); + OQS_RAND_n(rand, plaintext, 320); + TIME_OPERATION_SECONDS(oqs_aes128_load_schedule_ossl(key, &schedule, 1), "oqs_aes128_load_schedule_ossl 1", BENCH_DURATION); + TIME_OPERATION_SECONDS(oqs_aes128_load_schedule_ossl(key, &schedule, 0), "oqs_aes128_load_schedule_ossl 0", BENCH_DURATION); + TIME_OPERATION_SECONDS(oqs_aes128_ecb_enc_ossl(plaintext, 320, key, ciphertext), "oqs_aes128_ecb_enc_ossl", BENCH_DURATION); + TIME_OPERATION_SECONDS(oqs_aes128_ecb_dec_ossl(ciphertext, 320, key, plaintext), "oqs_aes128_ecb_dec_ossl", BENCH_DURATION); + oqs_aes128_load_schedule_ossl(key, &schedule, 1); + TIME_OPERATION_SECONDS(oqs_aes128_ecb_enc_sch_ossl(plaintext, 320, schedule, ciphertext), "oqs_aes128_ecb_enc_sch_ossl", BENCH_DURATION); + oqs_aes128_load_schedule_ossl(key, &schedule, 0); + TIME_OPERATION_SECONDS(oqs_aes128_ecb_dec_sch_ossl(ciphertext, 320, schedule, plaintext), "oqs_aes128_ecb_dec_sch_ossl", BENCH_DURATION); +} +#endif + +int main(int argc, char **argv) { + int ret; + bool bench = false; + + for (int i = 1; i < argc; i++) { + if (argv[i][0] == '-') { + if (strcmp(argv[i], "--bench") == 0 || strcmp(argv[i], "-b") == 0) { + bench = true; + } else { + printf("Usage: ./test_rand [options]\n"); + printf("\nOptions:\n"); + printf(" --bench, -b\n"); + printf(" Run benchmarks\n"); + if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "-help") == 0) || (strcmp(argv[i], "--help") == 0)) { + return EXIT_SUCCESS; + } else { + return EXIT_FAILURE; + } + } + } + } + + printf("=== test_aes correctness ===\n"); + OQS_RAND *rand = OQS_RAND_new(OQS_RAND_alg_default); + if (rand == NULL) { + eprintf("OQS_RAND_new() failed\n"); + goto err; + } + TEST_REPEATEDLY(test_aes128_correctness_c(rand)); +#ifdef AES_ENABLE_NI + TEST_REPEATEDLY(test_aes128_correctness_ni(rand)); + TEST_REPEATEDLY(test_aes128_c_equals_ni(rand)); +#endif + TEST_REPEATEDLY(test_aes128_ecb_correctness_c(rand)); +#ifdef AES_ENABLE_NI + TEST_REPEATEDLY(test_aes128_ecb_correctness_ni(rand)); +#endif +#ifdef USE_OPENSSL + TEST_REPEATEDLY(test_aes128_ecb_correctness_ossl(rand)); +#endif + printf("Tests passed.\n\n"); + + if (bench) { + printf("=== test_aes performance ===\n"); + PRINT_TIMER_HEADER + speed_aes128_c(rand); +#ifdef AES_ENABLE_NI + speed_aes128_ni(rand); +#endif +#ifdef USE_OPENSSL + speed_aes128_ossl(rand); +#endif + PRINT_TIMER_FOOTER + } + + ret = EXIT_SUCCESS; + goto cleanup; +err: + ret = EXIT_FAILURE; +cleanup: + OQS_RAND_free(rand); + return ret; +} diff --git a/crypt/liboqs/crypto/rand/Makefile.am b/crypt/liboqs/crypto/rand/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..ee421b82e26c6d4c01d9093bc3ab25d645872905 --- /dev/null +++ b/crypt/liboqs/crypto/rand/Makefile.am @@ -0,0 +1,16 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = librand.la + +librand_la_SOURCES = rand.c + +librand_la_CPPFLAGS = -I../../../include +if USE_OPENSSL +librand_la_CPPFLAGS += -I$(OPENSSL_DIR)/include +endif +librand_la_CPPFLAGS += $(AM_CPPFLAGS) + +librand_la_LDFLAGS = +if USE_OPENSSL +librand_la_LDFLAGS += -L$(OPENSSL_DIR)/lib +endif +librand_la_LDFLAGS += $(AM_LDFLAGS) diff --git a/crypt/liboqs/crypto/rand/rand.c b/crypt/liboqs/crypto/rand/rand.c new file mode 100644 index 0000000000000000000000000000000000000000..4dfc188f165c79b0971f6c4a950da63b4ffaa76d --- /dev/null +++ b/crypt/liboqs/crypto/rand/rand.c @@ -0,0 +1,183 @@ +#include <assert.h> +#include <stdio.h> +#include <math.h> +#if defined(WINDOWS) +#include <windows.h> +#include <Wincrypt.h> +#else +#include <unistd.h> +#include <fcntl.h> +#include <stdlib.h> +#endif + +#include <oqs/rand.h> +#include <oqs/rand_urandom_aesctr.h> +#include <oqs/rand_urandom_chacha20.h> + +OQS_RAND *OQS_RAND_new(enum OQS_RAND_alg_name alg_name) { + switch (alg_name) { + case OQS_RAND_alg_default: + case OQS_RAND_alg_urandom_chacha20: + return OQS_RAND_urandom_chacha20_new(); + case OQS_RAND_alg_urandom_aesctr: + return OQS_RAND_urandom_aesctr_new(); + default: + assert(0); + return NULL; // avoid the warning of potentialy uninitialized variable in VS + } +} + +uint8_t OQS_RAND_8(OQS_RAND *r) { + return r->rand_8(r); +} + +uint32_t OQS_RAND_32(OQS_RAND *r) { + return r->rand_32(r); +} + +uint64_t OQS_RAND_64(OQS_RAND *r) { + return r->rand_64(r); +} + +void OQS_RAND_n(OQS_RAND *r, uint8_t *out, size_t n) { + r->rand_n(r, out, n); +} + +void OQS_RAND_free(OQS_RAND *r) { + if (r) { + r->free(r); + } +} + +#if !defined(WINDOWS) +/* For some reason specifying inline results in a build error */ +inline +#endif + void + OQS_RAND_test_record_occurrence(const unsigned char b, unsigned long occurrences[256]) { + occurrences[b] += 1; +} + +double OQS_RAND_test_statistical_distance_from_uniform(const unsigned long occurrences[256]) { + + // compute total number of samples + unsigned long total = 0; + for (int i = 0; i < 256; i++) { + total += occurrences[i]; + } + + // compute statistical distance from uniform + // SD(X,Y) = 1/2 \sum_z | Pr[X=z] - Pr[Y=z] | + // = 1/2 \sum_z | 1/256 - Pr[Y=z] | + double distance = 0.0; + for (int i = 0; i < 256; i++) { + distance += fabs(1.0 / 256.0 - (double) occurrences[i] / (double) total); + } + distance /= 2.0; + + return distance; +} + +// Even for a perfectly uniform generator, if the number of samples is +// low then the std dev of the counts will be high. So, instead, whilst +// still assuming the number of samples isn't super-low, we calculate an +// approximate Chi-squared statistic and back-convert to the Normal +// distribution. The number of sigmas is reported: -3 to +3 is pretty +// ordinary, big negative is suspiciously-flat counts, big positive is +// wildly-fluctuating counts. +double OQS_RAND_zscore_deviation_from_uniform(const unsigned long occurrences[256]) { + double quantiles[102] = { + 156.7872, 158.4155, 160.0555, 161.7072, 163.3707, 165.0460, 166.7331, 168.4321, + 170.1430, 171.8658, 173.6006, 175.3475, 177.1064, 178.8773, 180.6604, 182.4557, + 184.2631, 186.0828, 187.9147, 189.7589, 191.6155, 193.4844, 195.3657, 197.2594, + 199.1656, 201.0843, 203.0155, 204.9593, 206.9157, 208.8847, 210.8663, 212.8607, + 214.8678, 216.8877, 218.9203, 220.9658, 223.0241, 225.0953, 227.1794, 229.2765, + 231.3866, 233.5096, 235.6457, 237.7949, 239.9572, 242.1326, 244.3212, 246.5230, + 248.7380, 250.9663, 253.2079, 255.4627, 257.7310, 260.0126, 262.3076, 264.6160, + 266.9379, 269.2733, 271.6222, 273.9846, 276.3607, 278.7503, 281.1536, 283.5705, + 286.0011, 288.4454, 290.9035, 293.3754, 295.8610, 298.3605, 300.8739, 303.4011, + 305.9422, 308.4973, 311.0663, 313.6493, 316.2463, 318.8574, 321.4825, 324.1217, + 326.7751, 329.4426, 332.1242, 334.8201, 337.5301, 340.2544, 342.9930, 345.7459, + 348.5131, 351.2947, 354.0906, 356.9009, 359.7256, 362.5648, 365.4184, 368.2866, + 371.1692, 374.0664, 376.9782, 379.9045, 382.8454, 385.8010}; // -5.05 to +5.05 sigma: qchisq(pnorm(seq(-5.05,5.05,length.out=102)),255) + unsigned long total; + double chsq; + int i; + + for (total = i = 0; i < 256; i++) { + total += occurrences[i]; + } + if (total / 256. < 5) { + return ZSCORE_SPARSE; + } + + for (chsq = i = 0; i < 256; i++) { + chsq += pow(occurrences[i] - total / 256., 2) * 256. / total; + } + + if (chsq <= quantiles[0]) { + return ZSCORE_BIGNEG; + } + for (i = 1; i < 102; i++) { + if (chsq <= quantiles[i]) { + return (i - 51) / 10.0; + } + } + return ZSCORE_BIGPOS; +} +// +// convenience function for statistics reporting +void OQS_RAND_report_statistics(const unsigned long occurrences[256], const char *indent) { + double zscore = OQS_RAND_zscore_deviation_from_uniform(occurrences); + printf("%sStatistical distance from uniform: %12.10f\n", indent, OQS_RAND_test_statistical_distance_from_uniform(occurrences)); + printf("%s Z-score deviation from uniform: ", indent); + if (zscore == ZSCORE_BIGNEG) { + printf("less than -5.0 sigma ***\n"); + } else if (zscore == ZSCORE_BIGPOS) { + printf("more than +5.0 sigma ***\n"); + } else if (zscore == ZSCORE_SPARSE) { + printf("(too few data)\n"); + } else { + printf("about %.1f sigma\n", zscore); + } + return; +} + +int OQS_RAND_get_system_entropy(uint8_t *buf, size_t n) { + int result = 0; + +#if !defined(WINDOWS) + int fd = 0; +#endif + + if (!buf) { + goto err; + } + +#if defined(WINDOWS) + HCRYPTPROV hCryptProv; + if (!CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT) || + !CryptGenRandom(hCryptProv, (DWORD) n, buf)) { + goto err; + } +#else + fd = open("/dev/urandom", O_RDONLY); + if (fd <= 0) { + goto err; + } + size_t r = read(fd, buf, n); + if (r != n) { + goto err; + } +#endif + result = 1; + +err: +#if !defined(WINDOWS) + if (fd > 0) { + close(fd); + } +#endif + + return result; +} diff --git a/crypt/liboqs/crypto/rand/rand.h b/crypt/liboqs/crypto/rand/rand.h new file mode 100644 index 0000000000000000000000000000000000000000..710e449273cee3b5ea0880fc0b8033e16bdade83 --- /dev/null +++ b/crypt/liboqs/crypto/rand/rand.h @@ -0,0 +1,98 @@ +/** + * \file rand.h + * \brief Header defining the generic OQS PRNG + */ + +#ifndef __OQS_RAND_H +#define __OQS_RAND_H + +#include <stddef.h> +#include <stdint.h> + +enum OQS_RAND_alg_name { + OQS_RAND_alg_default, + OQS_RAND_alg_urandom_chacha20, + OQS_RAND_alg_urandom_aesctr, +}; + +typedef struct OQS_RAND OQS_RAND; + +/** + * OQS PRNG object + */ +struct OQS_RAND { + + /** + * Specifies the name of the random number function + */ + char *method_name; + + /** + * Estimated number of bits of security provided against a classical + * attacker + */ + uint16_t estimated_classical_security; + + /** + * Estimated number of bits of security provided against a quantum + * attacker + */ + uint16_t estimated_quantum_security; + + /** + * Pointer for storing the state of the PRNG + */ + void *ctx; + + /** + * Function which returns an 8-bit random unsigned integer + */ + uint8_t (*rand_8)(OQS_RAND *r); + + /** + * Function which returns an 32-bit random unsigned integer + */ + uint32_t (*rand_32)(OQS_RAND *r); + + /** + * Function which returns an 64-bit random unsigned integer + */ + uint64_t (*rand_64)(OQS_RAND *r); + + /** + * Function which generates n random 8-bit unsigned integers + * + * @param out : pointer to an array large enough to store the output integers (\f$\text{size} \geq n\f$) + * @param n : number of integers to generate + */ + void (*rand_n)(OQS_RAND *r, uint8_t *out, size_t n); + + /** + * Pointer to a function for freeing the allocated key exchange structure + * + * @param k : Key exchange structure + * + */ + void (*free)(OQS_RAND *r); +}; + +OQS_RAND *OQS_RAND_new(enum OQS_RAND_alg_name alg_name); + +uint8_t OQS_RAND_8(OQS_RAND *r); +uint32_t OQS_RAND_32(OQS_RAND *r); +uint64_t OQS_RAND_64(OQS_RAND *r); +void OQS_RAND_n(OQS_RAND *r, uint8_t *out, size_t n); + +void OQS_RAND_free(OQS_RAND *r); + +void OQS_RAND_test_record_occurrence(const unsigned char b, unsigned long occurrences[256]); +double OQS_RAND_test_statistical_distance_from_uniform(const unsigned long occurrences[256]); + +#define ZSCORE_SPARSE (999.999) +#define ZSCORE_BIGNEG (-100.0) +#define ZSCORE_BIGPOS (+100.0) +double OQS_RAND_zscore_deviation_from_uniform(const unsigned long occurrences[256]); +void OQS_RAND_report_statistics(const unsigned long occurrences[256], const char *indent); + +int OQS_RAND_get_system_entropy(uint8_t *buf, size_t n); +#endif diff --git a/crypt/liboqs/crypto/rand/test_rand.c b/crypt/liboqs/crypto/rand/test_rand.c new file mode 100644 index 0000000000000000000000000000000000000000..253a8e8ba17d3f0f059e17ae93ae94b1cb6277a3 --- /dev/null +++ b/crypt/liboqs/crypto/rand/test_rand.c @@ -0,0 +1,197 @@ +#include <inttypes.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <oqs/rand.h> + +#include "../../common/common.h" + +struct rand_testcase { + enum OQS_RAND_alg_name alg_name; +}; + +/* Add new testcases here */ +struct rand_testcase rand_testcases[] = { + {OQS_RAND_alg_urandom_chacha20}, + {OQS_RAND_alg_urandom_aesctr}, +}; + +#define RAND_TEST_ITERATIONS 10000000L + +static void rand_test_distribution_8(OQS_RAND *rand, unsigned long occurrences[256], int iterations) { + uint8_t b; + for (int i = 0; i < iterations; i++) { + b = OQS_RAND_8(rand); + OQS_RAND_test_record_occurrence(b, occurrences); + } +} + +static void rand_test_distribution_32(OQS_RAND *rand, unsigned long occurrences[256], int iterations) { + uint32_t x; + for (int i = 0; i < iterations; i++) { + x = OQS_RAND_32(rand); + uint8_t b; + for (size_t j = 0; j < sizeof(uint32_t); j++) { + b = (x >> j) & 0xFF; + OQS_RAND_test_record_occurrence(b, occurrences); + } + } +} + +static void rand_test_distribution_64(OQS_RAND *rand, unsigned long occurrences[256], int iterations) { + uint64_t x; + for (int i = 0; i < iterations; i++) { + x = OQS_RAND_64(rand); + uint8_t b; + for (size_t j = 0; j < sizeof(uint64_t); j++) { + b = (x >> j) & 0xFF; + OQS_RAND_test_record_occurrence(b, occurrences); + } + } +} + +static int rand_test_distribution_n(OQS_RAND *rand, unsigned long occurrences[256], int len) { + uint8_t *x = malloc(len); + if (x == NULL) { + return 0; + } + OQS_RAND_n(rand, x, len); + for (int i = 0; i < len; i++) { + OQS_RAND_test_record_occurrence(x[i], occurrences); + } + free(x); + return 1; +} + +#define PRINT_HEX_STRING(label, str, len) \ + { \ + printf("%-20s (%4zu bytes): ", (label), (size_t)(len)); \ + for (size_t i = 0; i < (len); i++) { \ + printf("%02X", ((unsigned char *) (str))[i]); \ + } \ + printf("\n"); \ + } + +static int rand_test_distribution_wrapper(enum OQS_RAND_alg_name alg_name, int iterations, bool quiet) { + + OQS_RAND *rand = OQS_RAND_new(alg_name); + if (rand == NULL) { + eprintf("rand is NULL\n"); + return 0; + } + + if (!quiet) { + printf("================================================================================\n"); + printf("Sample outputs of PRNG %s\n", rand->method_name); + printf("================================================================================\n"); + + uint8_t x[256]; + OQS_RAND_n(rand, x, 256); + PRINT_HEX_STRING("OQS_RAND_n, n = 256", x, 256) + + uint8_t y8 = OQS_RAND_8(rand); + PRINT_HEX_STRING("OQS_RAND_8", (uint8_t *) &y8, sizeof(y8)); + y8 = OQS_RAND_8(rand); + PRINT_HEX_STRING("OQS_RAND_8", (uint8_t *) &y8, sizeof(y8)); + + uint32_t y32 = OQS_RAND_32(rand); + PRINT_HEX_STRING("OQS_RAND_32", (uint8_t *) &y32, sizeof(y32)); + y32 = OQS_RAND_32(rand); + PRINT_HEX_STRING("OQS_RAND_32", (uint8_t *) &y32, sizeof(y32)); + + uint64_t y64 = OQS_RAND_64(rand); + PRINT_HEX_STRING("OQS_RAND_64", (uint8_t *) &y64, sizeof(y64)); + y64 = OQS_RAND_64(rand); + PRINT_HEX_STRING("OQS_RAND_64", (uint8_t *) &y64, sizeof(y64)); + + OQS_RAND_n(rand, x, 256); + PRINT_HEX_STRING("OQS_RAND_n, n = 256", x, 256) + } + + printf("================================================================================\n"); + printf("Testing distribution of PRNG %s\n", rand->method_name); + printf("================================================================================\n"); + + unsigned long occurrences[256]; + for (int i = 0; i < 256; i++) { + occurrences[i] = 0; + } + + printf("1-byte mode for %d iterations\n", 8 * iterations); + rand_test_distribution_8(rand, occurrences, 8 * iterations); + OQS_RAND_report_statistics(occurrences, " "); + + for (int i = 0; i < 256; i++) { + occurrences[i] = 0; + } + + printf("4-byte mode for %d iterations\n", 2 * iterations); + rand_test_distribution_32(rand, occurrences, 2 * iterations); + OQS_RAND_report_statistics(occurrences, " "); + + for (int i = 0; i < 256; i++) { + occurrences[i] = 0; + } + + printf("8-byte mode for %d iterations\n", iterations); + rand_test_distribution_64(rand, occurrences, iterations); + OQS_RAND_report_statistics(occurrences, " "); + + for (int i = 0; i < 256; i++) { + occurrences[i] = 0; + } + + printf("n-byte mode for %d bytes\n", 8 * iterations); + rand_test_distribution_n(rand, occurrences, 8 * iterations); + OQS_RAND_report_statistics(occurrences, " "); + + OQS_RAND_free(rand); + + return 1; +} + +int main(int argc, char **argv) { + + int success; + bool quiet = false; + + for (int i = 1; i < argc; i++) { + if (argv[i][0] == '-') { + if (strcmp(argv[i], "--quiet") == 0 || strcmp(argv[i], "-q") == 0) { + quiet = true; + } else { + printf("Usage: ./test_rand [options]\n"); + printf("\nOptions:\n"); + printf(" --quiet, -q\n"); + printf(" Less verbose output\n"); + if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "-help") == 0) || (strcmp(argv[i], "--help") == 0)) { + return EXIT_SUCCESS; + } else { + return EXIT_FAILURE; + } + } + } + } + + size_t rand_testcases_len = sizeof(rand_testcases) / sizeof(struct rand_testcase); + for (size_t i = 0; i < rand_testcases_len; i++) { + success = rand_test_distribution_wrapper(rand_testcases[i].alg_name, RAND_TEST_ITERATIONS, quiet); + if (success != 1) { + goto err; + } + } + + success = 1; + goto cleanup; + +err: + success = 0; + eprintf("ERROR!\n"); + +cleanup: + + return (success == 1) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/crypt/liboqs/crypto/rand_urandom_aesctr/Makefile.am b/crypt/liboqs/crypto/rand_urandom_aesctr/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..b587d3cb90341eea827d4747736ce15f8e062790 --- /dev/null +++ b/crypt/liboqs/crypto/rand_urandom_aesctr/Makefile.am @@ -0,0 +1,7 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = librandaesctr.la + +librandaesctr_la_SOURCES = rand_urandom_aesctr.c +librandaesctr_la_CPPFLAGS = -I../../../include -I. +librandaesctr_la_CPPFLAGS += $(AM_CPPFLAGS) + diff --git a/crypt/liboqs/crypto/rand_urandom_aesctr/rand_urandom_aesctr.c b/crypt/liboqs/crypto/rand_urandom_aesctr/rand_urandom_aesctr.c new file mode 100644 index 0000000000000000000000000000000000000000..a10c41baf2253c0c66431da850b889cfd013a2df --- /dev/null +++ b/crypt/liboqs/crypto/rand_urandom_aesctr/rand_urandom_aesctr.c @@ -0,0 +1,142 @@ +#include <sys/types.h> +#if defined(WINDOWS) +#include <windows.h> +#include <Wincrypt.h> +#else +#include <strings.h> +#include <sys/uio.h> +#include <unistd.h> +#endif +#include <fcntl.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> //memcpy + +#include <assert.h> +#include <oqs/aes.h> +#include <oqs/rand.h> +#include <oqs/rand_urandom_aesctr.h> + +#if defined(WINDOWS) +#define strdup _strdup // for strdup deprecation warning +#endif + +typedef struct oqs_rand_urandom_aesctr_ctx { + uint64_t ctr; + void *schedule; + uint8_t cache[64]; + size_t cache_next_byte; +} oqs_rand_urandom_aesctr_ctx; + +static oqs_rand_urandom_aesctr_ctx *oqs_rand_urandom_aesctr_ctx_new() { + oqs_rand_urandom_aesctr_ctx *rand_ctx = NULL; + rand_ctx = (oqs_rand_urandom_aesctr_ctx *) malloc(sizeof(oqs_rand_urandom_aesctr_ctx)); + if (rand_ctx == NULL) { + goto err; + } + uint8_t key[16]; + if (!OQS_RAND_get_system_entropy(key, 16)) { + goto err; + } + OQS_AES128_load_schedule(key, &rand_ctx->schedule, 1); + rand_ctx->cache_next_byte = 64; // cache is empty + rand_ctx->ctr = 0; + goto okay; +err: + if (rand_ctx) { + free(rand_ctx); + } + return NULL; +okay: + return rand_ctx; +} + +void OQS_RAND_urandom_aesctr_n(OQS_RAND *r, uint8_t *out, size_t n) { + oqs_rand_urandom_aesctr_ctx *rand_ctx = (oqs_rand_urandom_aesctr_ctx *) r->ctx; + const uint64_t num_full_blocks = n / 16; + uint64_t *half_blocks = (uint64_t *) out; + for (size_t i = 0; i < num_full_blocks; i++) { + half_blocks[2 * i] = rand_ctx->ctr++; + half_blocks[2 * i + 1] = rand_ctx->ctr++; + } + OQS_AES128_ECB_enc_sch(out, 16 * num_full_blocks, rand_ctx->schedule, out); + if (n % 16 > 0) { + uint8_t tmp_8[16]; + uint64_t *tmp_64 = (uint64_t *) tmp_8; + tmp_64[0] = rand_ctx->ctr++; + tmp_64[1] = rand_ctx->ctr++; + OQS_AES128_ECB_enc_sch(tmp_8, 16, rand_ctx->schedule, tmp_8); + memcpy(out + 16 * num_full_blocks, tmp_8, n % 16); + } +} + +static void OQS_RAND_urandom_aesctr_fill_cache(OQS_RAND *r) { + oqs_rand_urandom_aesctr_ctx *rand_ctx = (oqs_rand_urandom_aesctr_ctx *) r->ctx; + OQS_RAND_urandom_aesctr_n(r, rand_ctx->cache, sizeof(rand_ctx->cache)); + rand_ctx->cache_next_byte = 0; +} + +uint8_t OQS_RAND_urandom_aesctr_8(OQS_RAND *r) { + oqs_rand_urandom_aesctr_ctx *rand_ctx = (oqs_rand_urandom_aesctr_ctx *) r->ctx; + if (rand_ctx->cache_next_byte > sizeof(rand_ctx->cache) - 1) { + OQS_RAND_urandom_aesctr_fill_cache(r); + } + uint8_t out = rand_ctx->cache[rand_ctx->cache_next_byte]; + rand_ctx->cache_next_byte += 1; + return out; +} + +uint32_t OQS_RAND_urandom_aesctr_32(OQS_RAND *r) { + oqs_rand_urandom_aesctr_ctx *rand_ctx = (oqs_rand_urandom_aesctr_ctx *) r->ctx; + if (rand_ctx->cache_next_byte > sizeof(rand_ctx->cache) - 4) { + OQS_RAND_urandom_aesctr_fill_cache(r); + } + uint32_t out; + memcpy(&out, &rand_ctx->cache[rand_ctx->cache_next_byte], 4); + rand_ctx->cache_next_byte += 4; + return out; +} + +uint64_t OQS_RAND_urandom_aesctr_64(OQS_RAND *r) { + oqs_rand_urandom_aesctr_ctx *rand_ctx = (oqs_rand_urandom_aesctr_ctx *) r->ctx; + if (rand_ctx->cache_next_byte > sizeof(rand_ctx->cache) - 8) { + OQS_RAND_urandom_aesctr_fill_cache(r); + } + uint64_t out; + memcpy(&out, &rand_ctx->cache[rand_ctx->cache_next_byte], 8); + rand_ctx->cache_next_byte += 8; + return out; +} + +void OQS_RAND_urandom_aesctr_free(OQS_RAND *r) { + if (r) { + oqs_rand_urandom_aesctr_ctx *rand_ctx = (oqs_rand_urandom_aesctr_ctx *) r->ctx; + if (rand_ctx) { + OQS_AES128_free_schedule(rand_ctx->schedule); + } + free(r->ctx); + free(r->method_name); + } + free(r); +} + +OQS_RAND *OQS_RAND_urandom_aesctr_new() { + OQS_RAND *r = malloc(sizeof(OQS_RAND)); + if (r == NULL) { + return NULL; + } + r->method_name = strdup("urandom_aesctr"); + r->ctx = oqs_rand_urandom_aesctr_ctx_new(); + if (r->ctx == NULL || r->method_name == NULL) { + OQS_RAND_urandom_aesctr_free(r); + return NULL; + } + r->estimated_classical_security = 128; + r->estimated_quantum_security = 64; // Grover search + r->rand_8 = &OQS_RAND_urandom_aesctr_8; + r->rand_32 = &OQS_RAND_urandom_aesctr_32; + r->rand_64 = &OQS_RAND_urandom_aesctr_64; + r->rand_n = &OQS_RAND_urandom_aesctr_n; + r->free = &OQS_RAND_urandom_aesctr_free; + return r; +} diff --git a/crypt/liboqs/crypto/rand_urandom_aesctr/rand_urandom_aesctr.h b/crypt/liboqs/crypto/rand_urandom_aesctr/rand_urandom_aesctr.h new file mode 100644 index 0000000000000000000000000000000000000000..d13df4f1f624071710662a588137b0fc00b58973 --- /dev/null +++ b/crypt/liboqs/crypto/rand_urandom_aesctr/rand_urandom_aesctr.h @@ -0,0 +1,23 @@ +/** + * \file rand_urandom_aesctr.h + * \brief Header for the chacha implementation of OQS_RAND + */ + +#ifndef __OQS_RAND_URANDOM_AESCTR_H +#define __OQS_RAND_URANDOM_AESCTR_H + +#include <stddef.h> +#include <stdint.h> + +#include <oqs/rand.h> + +OQS_RAND *OQS_RAND_urandom_aesctr_new(); + +uint8_t OQS_RAND_urandom_aesctr_8(OQS_RAND *r); +uint32_t OQS_RAND_urandom_aesctr_32(OQS_RAND *r); +uint64_t OQS_RAND_urandom_aesctr_64(OQS_RAND *r); +void OQS_RAND_urandom_aesctr_n(OQS_RAND *r, uint8_t *out, size_t n); + +void OQS_RAND_urandom_aesctr_free(OQS_RAND *r); + +#endif diff --git a/crypt/liboqs/crypto/rand_urandom_chacha20/Makefile.am b/crypt/liboqs/crypto/rand_urandom_chacha20/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..56f5aad7e71c60e43d0d2ef61dd012cb2e79ca89 --- /dev/null +++ b/crypt/liboqs/crypto/rand_urandom_chacha20/Makefile.am @@ -0,0 +1,8 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = librandchacha20.la + +librandchacha20_la_SOURCES = rand_urandom_chacha20.c + +librandchacha20_la_CPPFLAGS = -I../../../include -I. +librandchacha20_la_CPPFLAGS += $(AM_CPPFLAGS) + diff --git a/crypt/liboqs/crypto/rand_urandom_chacha20/external/LICENSE.txt b/crypt/liboqs/crypto/rand_urandom_chacha20/external/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..d21eeeb7a8dc655a6e6202844ffdc5c191b04fff --- /dev/null +++ b/crypt/liboqs/crypto/rand_urandom_chacha20/external/LICENSE.txt @@ -0,0 +1 @@ +Public domain. \ No newline at end of file diff --git a/crypt/liboqs/crypto/rand_urandom_chacha20/external/chacha20.c b/crypt/liboqs/crypto/rand_urandom_chacha20/external/chacha20.c new file mode 100644 index 0000000000000000000000000000000000000000..cc61d4c74aae7bda4ff9ee532a6bd8b99715b328 --- /dev/null +++ b/crypt/liboqs/crypto/rand_urandom_chacha20/external/chacha20.c @@ -0,0 +1,106 @@ +/* Adapted from chacha-ref.c version 20080118, D. J. Bernstein, Public domain. + * http://cr.yp.to/streamciphers/timings/estreambench/submissions/salsa20/chacha8/ref/chacha.c + */ + +#include <stdint.h> +#include <string.h> + +#include "ecrypt-portable.h" + +#define ROTATE(v, c) (ROTL32(v, c)) +#define XOR(v, w) ((v) ^ (w)) +#define PLUS(v, w) (U32V((v) + (w))) +#define PLUSONE(v) (PLUS((v), 1)) + +#define QUARTERROUND(a, b, c, d) \ + x[a] = PLUS(x[a], x[b]); \ + x[d] = ROTATE(XOR(x[d], x[a]), 16); \ + x[c] = PLUS(x[c], x[d]); \ + x[b] = ROTATE(XOR(x[b], x[c]), 12); \ + x[a] = PLUS(x[a], x[b]); \ + x[d] = ROTATE(XOR(x[d], x[a]), 8); \ + x[c] = PLUS(x[c], x[d]); \ + x[b] = ROTATE(XOR(x[b], x[c]), 7); + +static void salsa20_wordtobyte(u8 output[64], const u32 input[16]) { + u32 x[16]; + int i; + + for (i = 0; i < 16; ++i) + x[i] = input[i]; + for (i = 8; i > 0; i -= 2) { + QUARTERROUND(0, 4, 8, 12) + QUARTERROUND(1, 5, 9, 13) + QUARTERROUND(2, 6, 10, 14) + QUARTERROUND(3, 7, 11, 15) + QUARTERROUND(0, 5, 10, 15) + QUARTERROUND(1, 6, 11, 12) + QUARTERROUND(2, 7, 8, 13) + QUARTERROUND(3, 4, 9, 14) + } + for (i = 0; i < 16; ++i) + x[i] = PLUS(x[i], input[i]); + for (i = 0; i < 16; ++i) + U32TO8_LITTLE(output + 4 * i, x[i]); +} + +static const char sigma[16] = "expand 32-byte k"; + +static void ECRYPT_keysetup(u32 input[16], const u8 k[32]) { + const char *constants; + + input[4] = U8TO32_LITTLE(k + 0); + input[5] = U8TO32_LITTLE(k + 4); + input[6] = U8TO32_LITTLE(k + 8); + input[7] = U8TO32_LITTLE(k + 12); + k += 16; + constants = sigma; + input[8] = U8TO32_LITTLE(k + 0); + input[9] = U8TO32_LITTLE(k + 4); + input[10] = U8TO32_LITTLE(k + 8); + input[11] = U8TO32_LITTLE(k + 12); + input[0] = U8TO32_LITTLE(constants + 0); + input[1] = U8TO32_LITTLE(constants + 4); + input[2] = U8TO32_LITTLE(constants + 8); + input[3] = U8TO32_LITTLE(constants + 12); +} + +static void ECRYPT_ivsetup(u32 input[16], const u8 iv[8]) { + input[12] = 0; + input[13] = 0; + input[14] = U8TO32_LITTLE(iv + 0); + input[15] = U8TO32_LITTLE(iv + 4); +} + +static void ECRYPT_encrypt_bytes(u32 input[16], const u8 *m, u8 *c, size_t bytes) { + u8 output[64]; + size_t i; + + if (!bytes) + return; + for (;;) { + salsa20_wordtobyte(output, input); + input[12] = PLUSONE(input[12]); + if (!input[12]) { + input[13] = PLUSONE(input[13]); + /* stopping at 2^70 bytes per nonce is user's responsibility */ + } + if (bytes <= 64) { + for (i = 0; i < bytes; ++i) + c[i] = m[i] ^ output[i]; + return; + } + for (i = 0; i < 64; ++i) + c[i] = m[i] ^ output[i]; + bytes -= 64; + c += 64; + m += 64; + } +} + +static void ECRYPT_keystream_bytes(u32 input[16], u8 *stream, u32 bytes) { + u32 i; + for (i = 0; i < bytes; ++i) + stream[i] = 0; + ECRYPT_encrypt_bytes(input, stream, stream, bytes); +} diff --git a/crypt/liboqs/crypto/rand_urandom_chacha20/external/ecrypt-config.h b/crypt/liboqs/crypto/rand_urandom_chacha20/external/ecrypt-config.h new file mode 100644 index 0000000000000000000000000000000000000000..6525f4af72aff68ffae19a422ea452934106ea76 --- /dev/null +++ b/crypt/liboqs/crypto/rand_urandom_chacha20/external/ecrypt-config.h @@ -0,0 +1,272 @@ +/* ecrypt-config.h */ + +/* *** Normally, it should not be necessary to edit this file. *** */ + +#ifndef ECRYPT_CONFIG +#define ECRYPT_CONFIG + +/* ------------------------------------------------------------------------- */ + +/* Guess the endianness of the target architecture. */ + +/* + * The LITTLE endian machines: + */ +#if defined(__ultrix) /* Older MIPS */ +#define ECRYPT_LITTLE_ENDIAN +#elif defined(__alpha) /* Alpha */ +#define ECRYPT_LITTLE_ENDIAN +#elif defined(i386) /* x86 (gcc) */ +#define ECRYPT_LITTLE_ENDIAN +#elif defined(__i386) /* x86 (gcc) */ +#define ECRYPT_LITTLE_ENDIAN +#elif defined(_M_IX86) /* x86 (MSC, Borland) */ +#define ECRYPT_LITTLE_ENDIAN +#elif defined(_MSC_VER) /* x86 (surely MSC) */ +#define ECRYPT_LITTLE_ENDIAN +#elif defined(__INTEL_COMPILER) /* x86 (surely Intel compiler icl.exe) */ +#define ECRYPT_LITTLE_ENDIAN + +/* + * The BIG endian machines: + */ +#elif defined(sun) /* Newer Sparc's */ +#define ECRYPT_BIG_ENDIAN +#elif defined(__ppc__) /* PowerPC */ +#define ECRYPT_BIG_ENDIAN + +/* + * Finally machines with UNKNOWN endianness: + */ +#elif defined(_AIX) /* RS6000 */ +#define ECRYPT_UNKNOWN +#elif defined(__hpux) /* HP-PA */ +#define ECRYPT_UNKNOWN +#elif defined(__aux) /* 68K */ +#define ECRYPT_UNKNOWN +#elif defined(__dgux) /* 88K (but P6 in latest boxes) */ +#define ECRYPT_UNKNOWN +#elif defined(__sgi) /* Newer MIPS */ +#define ECRYPT_UNKNOWN +#else /* Any other processor */ +#define ECRYPT_UNKNOWN +#endif + +/* ------------------------------------------------------------------------- */ + +/* + * Find minimal-width types to store 8-bit, 16-bit, 32-bit, and 64-bit + * integers. + * + * Note: to enable 64-bit types on 32-bit compilers, it might be + * necessary to switch from ISO C90 mode to ISO C99 mode (e.g., gcc + * -std=c99). + */ + +#include <limits.h> + +/* --- check char --- */ + +#if (UCHAR_MAX / 0xFU > 0xFU) +#ifndef I8T +#define I8T char +#define U8C(v) (v##U) + +#if (UCHAR_MAX == 0xFFU) +#define ECRYPT_I8T_IS_BYTE +#endif + +#endif + +#if (UCHAR_MAX / 0xFFU > 0xFFU) +#ifndef I16T +#define I16T char +#define U16C(v) (v##U) +#endif + +#if (UCHAR_MAX / 0xFFFFU > 0xFFFFU) +#ifndef I32T +#define I32T char +#define U32C(v) (v##U) +#endif + +#if (UCHAR_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU) +#ifndef I64T +#define I64T char +#define U64C(v) (v##U) +#define ECRYPT_NATIVE64 +#endif + +#endif +#endif +#endif +#endif + +/* --- check short --- */ + +#if (USHRT_MAX / 0xFU > 0xFU) +#ifndef I8T +#define I8T short +#define U8C(v) (v##U) + +#if (USHRT_MAX == 0xFFU) +#define ECRYPT_I8T_IS_BYTE +#endif + +#endif + +#if (USHRT_MAX / 0xFFU > 0xFFU) +#ifndef I16T +#define I16T short +#define U16C(v) (v##U) +#endif + +#if (USHRT_MAX / 0xFFFFU > 0xFFFFU) +#ifndef I32T +#define I32T short +#define U32C(v) (v##U) +#endif + +#if (USHRT_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU) +#ifndef I64T +#define I64T short +#define U64C(v) (v##U) +#define ECRYPT_NATIVE64 +#endif + +#endif +#endif +#endif +#endif + +/* --- check int --- */ + +#if (UINT_MAX / 0xFU > 0xFU) +#ifndef I8T +#define I8T int +#define U8C(v) (v##U) + +#if (ULONG_MAX == 0xFFU) +#define ECRYPT_I8T_IS_BYTE +#endif + +#endif + +#if (UINT_MAX / 0xFFU > 0xFFU) +#ifndef I16T +#define I16T int +#define U16C(v) (v##U) +#endif + +#if (UINT_MAX / 0xFFFFU > 0xFFFFU) +#ifndef I32T +#define I32T int +#define U32C(v) (v##U) +#endif + +#if (UINT_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU) +#ifndef I64T +#define I64T int +#define U64C(v) (v##U) +#define ECRYPT_NATIVE64 +#endif + +#endif +#endif +#endif +#endif + +/* --- check long --- */ + +#if (ULONG_MAX / 0xFUL > 0xFUL) +#ifndef I8T +#define I8T long +#define U8C(v) (v##UL) + +#if (ULONG_MAX == 0xFFUL) +#define ECRYPT_I8T_IS_BYTE +#endif + +#endif + +#if (ULONG_MAX / 0xFFUL > 0xFFUL) +#ifndef I16T +#define I16T long +#define U16C(v) (v##UL) +#endif + +#if (ULONG_MAX / 0xFFFFUL > 0xFFFFUL) +#ifndef I32T +#define I32T long +#define U32C(v) (v##UL) +#endif + +#if (ULONG_MAX / 0xFFFFFFFFUL > 0xFFFFFFFFUL) +#ifndef I64T +#define I64T long +#define U64C(v) (v##UL) +#define ECRYPT_NATIVE64 +#endif + +#endif +#endif +#endif +#endif + +/* --- check long long --- */ + +#ifdef ULLONG_MAX + +#if (ULLONG_MAX / 0xFULL > 0xFULL) +#ifndef I8T +#define I8T long long +#define U8C(v) (v##ULL) + +#if (ULLONG_MAX == 0xFFULL) +#define ECRYPT_I8T_IS_BYTE +#endif + +#endif + +#if (ULLONG_MAX / 0xFFULL > 0xFFULL) +#ifndef I16T +#define I16T long long +#define U16C(v) (v##ULL) +#endif + +#if (ULLONG_MAX / 0xFFFFULL > 0xFFFFULL) +#ifndef I32T +#define I32T long long +#define U32C(v) (v##ULL) +#endif + +#if (ULLONG_MAX / 0xFFFFFFFFULL > 0xFFFFFFFFULL) +#ifndef I64T +#define I64T long long +#define U64C(v) (v##ULL) +#endif + +#endif +#endif +#endif +#endif + +#endif + +/* --- check __int64 --- */ + +#ifdef _UI64_MAX + +#if (_UI64_MAX / 0xFFFFFFFFui64 > 0xFFFFFFFFui64) +#ifndef I64T +#define I64T __int64 +#define U64C(v) (v##ui64) +#endif + +#endif + +#endif + +/* ------------------------------------------------------------------------- */ + +#endif diff --git a/crypt/liboqs/crypto/rand_urandom_chacha20/external/ecrypt-portable.h b/crypt/liboqs/crypto/rand_urandom_chacha20/external/ecrypt-portable.h new file mode 100644 index 0000000000000000000000000000000000000000..028ddf8e89d401f1fdb320706e6151cb59a5969d --- /dev/null +++ b/crypt/liboqs/crypto/rand_urandom_chacha20/external/ecrypt-portable.h @@ -0,0 +1,295 @@ +/* ecrypt-portable.h */ + +/* + * WARNING: the conversions defined below are implemented as macros, + * and should be used carefully. They should NOT be used with + * parameters which perform some action. E.g., the following two lines + * are not equivalent: + * + * 1) ++x; y = ROTL32(x, n); + * 2) y = ROTL32(++x, n); + */ + +/* + * *** Please do not edit this file. *** + * + * The default macros can be overridden for specific architectures by + * editing 'ecrypt-machine.h'. + */ + +#ifndef ECRYPT_PORTABLE +#define ECRYPT_PORTABLE + +#include "ecrypt-config.h" + +/* ------------------------------------------------------------------------- */ + +/* + * The following types are defined (if available): + * + * u8: unsigned integer type, at least 8 bits + * u16: unsigned integer type, at least 16 bits + * u32: unsigned integer type, at least 32 bits + * u64: unsigned integer type, at least 64 bits + * + * s8, s16, s32, s64 -> signed counterparts of u8, u16, u32, u64 + * + * The selection of minimum-width integer types is taken care of by + * 'ecrypt-config.h'. Note: to enable 64-bit types on 32-bit + * compilers, it might be necessary to switch from ISO C90 mode to ISO + * C99 mode (e.g., gcc -std=c99). + */ + +#ifdef I8T +typedef signed I8T s8; +typedef unsigned I8T u8; +#endif + +#ifdef I16T +typedef signed I16T s16; +typedef unsigned I16T u16; +#endif + +#ifdef I32T +typedef signed I32T s32; +typedef unsigned I32T u32; +#endif + +#ifdef I64T +typedef signed I64T s64; +typedef unsigned I64T u64; +#endif + +/* + * The following macros are used to obtain exact-width results. + */ + +#define U8V(v) ((u8)(v) &U8C(0xFF)) +#define U16V(v) ((u16)(v) &U16C(0xFFFF)) +#define U32V(v) ((u32)(v) &U32C(0xFFFFFFFF)) +#define U64V(v) ((u64)(v) &U64C(0xFFFFFFFFFFFFFFFF)) + +/* ------------------------------------------------------------------------- */ + +/* + * The following macros return words with their bits rotated over n + * positions to the left/right. + */ + +#define ECRYPT_DEFAULT_ROT + +#define ROTL8(v, n) \ + (U8V((v) << (n)) | ((v) >> (8 - (n)))) + +#define ROTL16(v, n) \ + (U16V((v) << (n)) | ((v) >> (16 - (n)))) + +#define ROTL32(v, n) \ + (U32V((v) << (n)) | ((v) >> (32 - (n)))) + +#define ROTL64(v, n) \ + (U64V((v) << (n)) | ((v) >> (64 - (n)))) + +#define ROTR8(v, n) ROTL8(v, 8 - (n)) +#define ROTR16(v, n) ROTL16(v, 16 - (n)) +#define ROTR32(v, n) ROTL32(v, 32 - (n)) +#define ROTR64(v, n) ROTL64(v, 64 - (n)) + +/* ------------------------------------------------------------------------- */ + +/* + * The following macros return a word with bytes in reverse order. + */ + +#define ECRYPT_DEFAULT_SWAP + +#define SWAP16(v) \ + ROTL16(v, 8) + +#define SWAP32(v) \ + ((ROTL32(v, 8) & U32C(0x00FF00FF)) | \ + (ROTL32(v, 24) & U32C(0xFF00FF00))) + +#ifdef ECRYPT_NATIVE64 +#define SWAP64(v) \ + ((ROTL64(v, 8) & U64C(0x000000FF000000FF)) | \ + (ROTL64(v, 24) & U64C(0x0000FF000000FF00)) | \ + (ROTL64(v, 40) & U64C(0x00FF000000FF0000)) | \ + (ROTL64(v, 56) & U64C(0xFF000000FF000000))) +#else +#define SWAP64(v) \ + (((u64) SWAP32(U32V(v)) << 32) | (u64) SWAP32(U32V(v >> 32))) +#endif + +#define ECRYPT_DEFAULT_WTOW + +#ifdef ECRYPT_LITTLE_ENDIAN +#define U16TO16_LITTLE(v) (v) +#define U32TO32_LITTLE(v) (v) +#define U64TO64_LITTLE(v) (v) + +#define U16TO16_BIG(v) SWAP16(v) +#define U32TO32_BIG(v) SWAP32(v) +#define U64TO64_BIG(v) SWAP64(v) +#endif + +#ifdef ECRYPT_BIG_ENDIAN +#define U16TO16_LITTLE(v) SWAP16(v) +#define U32TO32_LITTLE(v) SWAP32(v) +#define U64TO64_LITTLE(v) SWAP64(v) + +#define U16TO16_BIG(v) (v) +#define U32TO32_BIG(v) (v) +#define U64TO64_BIG(v) (v) +#endif + +/* + * The following macros load words from an array of bytes with + * different types of endianness, and vice versa. + */ + +#define ECRYPT_DEFAULT_BTOW + +#if (!defined(ECRYPT_UNKNOWN) && defined(ECRYPT_I8T_IS_BYTE)) + +#define U8TO16_LITTLE(p) U16TO16_LITTLE(((u16 *) (p))[0]) +#define U8TO32_LITTLE(p) U32TO32_LITTLE(((u32 *) (p))[0]) +#define U8TO64_LITTLE(p) U64TO64_LITTLE(((u64 *) (p))[0]) + +#define U8TO16_BIG(p) U16TO16_BIG(((u16 *) (p))[0]) +#define U8TO32_BIG(p) U32TO32_BIG(((u32 *) (p))[0]) +#define U8TO64_BIG(p) U64TO64_BIG(((u64 *) (p))[0]) + +#define U16TO8_LITTLE(p, v) (((u16 *) (p))[0] = U16TO16_LITTLE(v)) +#define U32TO8_LITTLE(p, v) (((u32 *) (p))[0] = U32TO32_LITTLE(v)) +#define U64TO8_LITTLE(p, v) (((u64 *) (p))[0] = U64TO64_LITTLE(v)) + +#define U16TO8_BIG(p, v) (((u16 *) (p))[0] = U16TO16_BIG(v)) +#define U32TO8_BIG(p, v) (((u32 *) (p))[0] = U32TO32_BIG(v)) +#define U64TO8_BIG(p, v) (((u64 *) (p))[0] = U64TO64_BIG(v)) + +#else + +#define U8TO16_LITTLE(p) \ + (((u16)((p)[0])) | \ + ((u16)((p)[1]) << 8)) + +#define U8TO32_LITTLE(p) \ + (((u32)((p)[0])) | \ + ((u32)((p)[1]) << 8) | \ + ((u32)((p)[2]) << 16) | \ + ((u32)((p)[3]) << 24)) + +#ifdef ECRYPT_NATIVE64 +#define U8TO64_LITTLE(p) \ + (((u64)((p)[0])) | \ + ((u64)((p)[1]) << 8) | \ + ((u64)((p)[2]) << 16) | \ + ((u64)((p)[3]) << 24) | \ + ((u64)((p)[4]) << 32) | \ + ((u64)((p)[5]) << 40) | \ + ((u64)((p)[6]) << 48) | \ + ((u64)((p)[7]) << 56)) +#else +#define U8TO64_LITTLE(p) \ + ((u64) U8TO32_LITTLE(p) | ((u64) U8TO32_LITTLE((p) + 4) << 32)) +#endif + +#define U8TO16_BIG(p) \ + (((u16)((p)[0]) << 8) | \ + ((u16)((p)[1]))) + +#define U8TO32_BIG(p) \ + (((u32)((p)[0]) << 24) | \ + ((u32)((p)[1]) << 16) | \ + ((u32)((p)[2]) << 8) | \ + ((u32)((p)[3]))) + +#ifdef ECRYPT_NATIVE64 +#define U8TO64_BIG(p) \ + (((u64)((p)[0]) << 56) | \ + ((u64)((p)[1]) << 48) | \ + ((u64)((p)[2]) << 40) | \ + ((u64)((p)[3]) << 32) | \ + ((u64)((p)[4]) << 24) | \ + ((u64)((p)[5]) << 16) | \ + ((u64)((p)[6]) << 8) | \ + ((u64)((p)[7]))) +#else +#define U8TO64_BIG(p) \ + (((u64) U8TO32_BIG(p) << 32) | (u64) U8TO32_BIG((p) + 4)) +#endif + +#define U16TO8_LITTLE(p, v) \ + do { \ + (p)[0] = U8V((v)); \ + (p)[1] = U8V((v) >> 8); \ + } while (0) + +#define U32TO8_LITTLE(p, v) \ + do { \ + (p)[0] = U8V((v)); \ + (p)[1] = U8V((v) >> 8); \ + (p)[2] = U8V((v) >> 16); \ + (p)[3] = U8V((v) >> 24); \ + } while (0) + +#ifdef ECRYPT_NATIVE64 +#define U64TO8_LITTLE(p, v) \ + do { \ + (p)[0] = U8V((v)); \ + (p)[1] = U8V((v) >> 8); \ + (p)[2] = U8V((v) >> 16); \ + (p)[3] = U8V((v) >> 24); \ + (p)[4] = U8V((v) >> 32); \ + (p)[5] = U8V((v) >> 40); \ + (p)[6] = U8V((v) >> 48); \ + (p)[7] = U8V((v) >> 56); \ + } while (0) +#else +#define U64TO8_LITTLE(p, v) \ + do { \ + U32TO8_LITTLE((p), U32V((v))); \ + U32TO8_LITTLE((p) + 4, U32V((v) >> 32)); \ + } while (0) +#endif + +#define U16TO8_BIG(p, v) \ + do { \ + (p)[0] = U8V((v)); \ + (p)[1] = U8V((v) >> 8); \ + } while (0) + +#define U32TO8_BIG(p, v) \ + do { \ + (p)[0] = U8V((v) >> 24); \ + (p)[1] = U8V((v) >> 16); \ + (p)[2] = U8V((v) >> 8); \ + (p)[3] = U8V((v)); \ + } while (0) + +#ifdef ECRYPT_NATIVE64 +#define U64TO8_BIG(p, v) \ + do { \ + (p)[0] = U8V((v) >> 56); \ + (p)[1] = U8V((v) >> 48); \ + (p)[2] = U8V((v) >> 40); \ + (p)[3] = U8V((v) >> 32); \ + (p)[4] = U8V((v) >> 24); \ + (p)[5] = U8V((v) >> 16); \ + (p)[6] = U8V((v) >> 8); \ + (p)[7] = U8V((v)); \ + } while (0) +#else +#define U64TO8_BIG(p, v) \ + do { \ + U32TO8_BIG((p), U32V((v) >> 32)); \ + U32TO8_BIG((p) + 4, U32V((v))); \ + } while (0) +#endif + +#endif + +/* ------------------------------------------------------------------------- */ + +#endif diff --git a/crypt/liboqs/crypto/rand_urandom_chacha20/rand_urandom_chacha20.c b/crypt/liboqs/crypto/rand_urandom_chacha20/rand_urandom_chacha20.c new file mode 100644 index 0000000000000000000000000000000000000000..6dbfcfaebe640f2b70fd5f605edae58bd307e4b8 --- /dev/null +++ b/crypt/liboqs/crypto/rand_urandom_chacha20/rand_urandom_chacha20.c @@ -0,0 +1,143 @@ +#if defined(WINDOWS) +#pragma warning(disable : 4267) +#endif + +#include <sys/types.h> +#if defined(WINDOWS) +#include <windows.h> +#include <Wincrypt.h> +#else +#include <strings.h> +#include <sys/uio.h> +#include <unistd.h> +#endif +#include <fcntl.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include <oqs/rand.h> +#include <oqs/rand_urandom_chacha20.h> + +#include "external/chacha20.c" + +#if defined(WINDOWS) +#define strdup _strdup // for strdup deprecation warning +#endif + +typedef struct OQS_RAND_urandom_chacha20_ctx { + uint8_t key[32]; + uint32_t nonce[2]; + uint8_t cache[64]; + size_t cache_next_byte; + uint32_t chacha20_input[16]; +} OQS_RAND_urandom_chacha20_ctx; + +static OQS_RAND_urandom_chacha20_ctx *OQS_RAND_urandom_chacha20_ctx_new(); +static void OQS_RAND_urandom_chacha20_fill_cache(OQS_RAND *r); +static void OQS_RAND_urandom_chacha20_ctx_free(void *rand_ctx); + +OQS_RAND *OQS_RAND_urandom_chacha20_new() { + OQS_RAND *r = malloc(sizeof(OQS_RAND)); + if (r == NULL) { + return NULL; + } + r->method_name = strdup("urandom_chacha20"); + r->ctx = OQS_RAND_urandom_chacha20_ctx_new(); + if (r->ctx == NULL || r->method_name == NULL) { + OQS_RAND_urandom_chacha20_free(r); + return NULL; + } + r->estimated_classical_security = 256; + r->estimated_quantum_security = 128; // Grover search + r->rand_8 = &OQS_RAND_urandom_chacha20_8; + r->rand_32 = &OQS_RAND_urandom_chacha20_32; + r->rand_64 = &OQS_RAND_urandom_chacha20_64; + r->rand_n = &OQS_RAND_urandom_chacha20_n; + r->free = &OQS_RAND_urandom_chacha20_free; + return r; +} + +static OQS_RAND_urandom_chacha20_ctx *OQS_RAND_urandom_chacha20_ctx_new() { + OQS_RAND_urandom_chacha20_ctx *rand_ctx = NULL; + rand_ctx = (OQS_RAND_urandom_chacha20_ctx *) malloc(sizeof(OQS_RAND_urandom_chacha20_ctx)); + if (rand_ctx == NULL) { + goto err; + } + if (!OQS_RAND_get_system_entropy(rand_ctx->key, 32)) { + goto err; + } + memset(rand_ctx->nonce, 0, 8); + rand_ctx->cache_next_byte = 64; // cache is empty + ECRYPT_keysetup(rand_ctx->chacha20_input, rand_ctx->key); + goto okay; +err: + if (rand_ctx) { + free(rand_ctx); + } + return NULL; +okay: + return rand_ctx; +} + +static void OQS_RAND_urandom_chacha20_fill_cache(OQS_RAND *r) { + OQS_RAND_urandom_chacha20_ctx *rand_ctx = (OQS_RAND_urandom_chacha20_ctx *) r->ctx; + r->rand_n(r, rand_ctx->cache, 64); + rand_ctx->cache_next_byte = 0; +} + +uint8_t OQS_RAND_urandom_chacha20_8(OQS_RAND *r) { + OQS_RAND_urandom_chacha20_ctx *rand_ctx = (OQS_RAND_urandom_chacha20_ctx *) r->ctx; + if (rand_ctx->cache_next_byte > 64 - 1) { + OQS_RAND_urandom_chacha20_fill_cache(r); + } + uint8_t out = rand_ctx->cache[rand_ctx->cache_next_byte]; + rand_ctx->cache_next_byte += 1; + return out; +} + +uint32_t OQS_RAND_urandom_chacha20_32(OQS_RAND *r) { + OQS_RAND_urandom_chacha20_ctx *rand_ctx = (OQS_RAND_urandom_chacha20_ctx *) r->ctx; + if (rand_ctx->cache_next_byte > 64 - 4) { + OQS_RAND_urandom_chacha20_fill_cache(r); + } + uint32_t out; + memcpy(&out, &rand_ctx->cache[rand_ctx->cache_next_byte], 4); + rand_ctx->cache_next_byte += 4; + return out; +} + +uint64_t OQS_RAND_urandom_chacha20_64(OQS_RAND *r) { + OQS_RAND_urandom_chacha20_ctx *rand_ctx = (OQS_RAND_urandom_chacha20_ctx *) r->ctx; + if (rand_ctx->cache_next_byte > 64 - 8) { + OQS_RAND_urandom_chacha20_fill_cache(r); + } + uint64_t out; + memcpy(&out, &rand_ctx->cache[rand_ctx->cache_next_byte], 8); + rand_ctx->cache_next_byte += 8; + return out; +} + +void OQS_RAND_urandom_chacha20_n(OQS_RAND *r, uint8_t *out, size_t n) { + OQS_RAND_urandom_chacha20_ctx *rand_ctx = (OQS_RAND_urandom_chacha20_ctx *) r->ctx; + rand_ctx->nonce[0]++; + if (rand_ctx->nonce[0] == 0) { + rand_ctx->nonce[1]++; + } + ECRYPT_ivsetup(rand_ctx->chacha20_input, (u8 *) rand_ctx->nonce); + ECRYPT_keystream_bytes(rand_ctx->chacha20_input, out, n); +} + +static void OQS_RAND_urandom_chacha20_ctx_free(void *rand_ctx) { + free(rand_ctx); +} + +void OQS_RAND_urandom_chacha20_free(OQS_RAND *r) { + if (r) { + OQS_RAND_urandom_chacha20_ctx_free(r->ctx); + } + if (r) { + free(r->method_name); + } + free(r); +} diff --git a/crypt/liboqs/crypto/rand_urandom_chacha20/rand_urandom_chacha20.h b/crypt/liboqs/crypto/rand_urandom_chacha20/rand_urandom_chacha20.h new file mode 100644 index 0000000000000000000000000000000000000000..9617085187cd30d621c773dacb039ca00c44d607 --- /dev/null +++ b/crypt/liboqs/crypto/rand_urandom_chacha20/rand_urandom_chacha20.h @@ -0,0 +1,23 @@ +/** + * \file rand_urandom_chacha20.h + * \brief Header for the chacha implementation of OQS_RAND + */ + +#ifndef __OQS_RAND_URANDOM_CHACHA20_H +#define __OQS_RAND_URANDOM_CHACHA20_H + +#include <stddef.h> +#include <stdint.h> + +#include <oqs/rand.h> + +OQS_RAND *OQS_RAND_urandom_chacha20_new(); + +uint8_t OQS_RAND_urandom_chacha20_8(OQS_RAND *r); +uint32_t OQS_RAND_urandom_chacha20_32(OQS_RAND *r); +uint64_t OQS_RAND_urandom_chacha20_64(OQS_RAND *r); +void OQS_RAND_urandom_chacha20_n(OQS_RAND *r, uint8_t *out, size_t n); + +void OQS_RAND_urandom_chacha20_free(OQS_RAND *r); + +#endif diff --git a/crypt/liboqs/crypto/sha3/Makefile.am b/crypt/liboqs/crypto/sha3/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..0e8f3512506582d2d684ff606a6d079c83e209ca --- /dev/null +++ b/crypt/liboqs/crypto/sha3/Makefile.am @@ -0,0 +1,8 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = libsha3.la + +libsha3_la_SOURCES = sha3.c + +libsha3_la_CPPFLAGS = -I../../../include -I. +libsha3_la_CPPFLAGS += $(AM_CPPFLAGS) + diff --git a/crypt/liboqs/crypto/sha3/sha3.c b/crypt/liboqs/crypto/sha3/sha3.c new file mode 100644 index 0000000000000000000000000000000000000000..1c229e6bcb52372f218e0761057946b703176173 --- /dev/null +++ b/crypt/liboqs/crypto/sha3/sha3.c @@ -0,0 +1,480 @@ +/* Based on the public domain implementation in + * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html + * by Ronny Van Keer + * and the public domain "TweetFips202" implementation + * from https://twitter.com/tweetfips202 + * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe */ + +#if defined(WINDOWS) +#pragma warning(disable : 4244) +#endif + +#include <assert.h> +#include <oqs/sha3.h> +#include <stdint.h> + +#define SHAKE128_RATE OQS_SHA3_SHAKE128_RATE +#define SHA3_256_RATE OQS_SHA3_SHA3_256_RATE +#define SHA3_512_RATE OQS_SHA3_SHA3_512_RATE +#define NROUNDS 24 +#define ROL(a, offset) ((a << offset) ^ (a >> (64 - offset))) + +static uint64_t load64(const unsigned char *x) { + unsigned long long r = 0, i; + + for (i = 0; i < 8; ++i) { + r |= (unsigned long long) x[i] << 8 * i; + } + return r; +} + +static void store64(uint8_t *x, uint64_t u) { + unsigned int i; + + for (i = 0; i < 8; ++i) { + x[i] = u; + u >>= 8; + } +} + +static const uint64_t KeccakF_RoundConstants[NROUNDS] = { + (uint64_t) 0x0000000000000001ULL, + (uint64_t) 0x0000000000008082ULL, + (uint64_t) 0x800000000000808aULL, + (uint64_t) 0x8000000080008000ULL, + (uint64_t) 0x000000000000808bULL, + (uint64_t) 0x0000000080000001ULL, + (uint64_t) 0x8000000080008081ULL, + (uint64_t) 0x8000000000008009ULL, + (uint64_t) 0x000000000000008aULL, + (uint64_t) 0x0000000000000088ULL, + (uint64_t) 0x0000000080008009ULL, + (uint64_t) 0x000000008000000aULL, + (uint64_t) 0x000000008000808bULL, + (uint64_t) 0x800000000000008bULL, + (uint64_t) 0x8000000000008089ULL, + (uint64_t) 0x8000000000008003ULL, + (uint64_t) 0x8000000000008002ULL, + (uint64_t) 0x8000000000000080ULL, + (uint64_t) 0x000000000000800aULL, + (uint64_t) 0x800000008000000aULL, + (uint64_t) 0x8000000080008081ULL, + (uint64_t) 0x8000000000008080ULL, + (uint64_t) 0x0000000080000001ULL, + (uint64_t) 0x8000000080008008ULL}; + +static void KeccakF1600_StatePermute(uint64_t *state) { + int round; + + uint64_t Aba, Abe, Abi, Abo, Abu; + uint64_t Aga, Age, Agi, Ago, Agu; + uint64_t Aka, Ake, Aki, Ako, Aku; + uint64_t Ama, Ame, Ami, Amo, Amu; + uint64_t Asa, Ase, Asi, Aso, Asu; + uint64_t BCa, BCe, BCi, BCo, BCu; + uint64_t Da, De, Di, Do, Du; + uint64_t Eba, Ebe, Ebi, Ebo, Ebu; + uint64_t Ega, Ege, Egi, Ego, Egu; + uint64_t Eka, Eke, Eki, Eko, Eku; + uint64_t Ema, Eme, Emi, Emo, Emu; + uint64_t Esa, Ese, Esi, Eso, Esu; + + //copyFromState(A, state) + Aba = state[0]; + Abe = state[1]; + Abi = state[2]; + Abo = state[3]; + Abu = state[4]; + Aga = state[5]; + Age = state[6]; + Agi = state[7]; + Ago = state[8]; + Agu = state[9]; + Aka = state[10]; + Ake = state[11]; + Aki = state[12]; + Ako = state[13]; + Aku = state[14]; + Ama = state[15]; + Ame = state[16]; + Ami = state[17]; + Amo = state[18]; + Amu = state[19]; + Asa = state[20]; + Ase = state[21]; + Asi = state[22]; + Aso = state[23]; + Asu = state[24]; + + for (round = 0; round < NROUNDS; round += 2) { + // prepareTheta + BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa; + BCe = Abe ^ Age ^ Ake ^ Ame ^ Ase; + BCi = Abi ^ Agi ^ Aki ^ Ami ^ Asi; + BCo = Abo ^ Ago ^ Ako ^ Amo ^ Aso; + BCu = Abu ^ Agu ^ Aku ^ Amu ^ Asu; + + //thetaRhoPiChiIotaPrepareTheta(round , A, E) + Da = BCu ^ ROL(BCe, 1); + De = BCa ^ ROL(BCi, 1); + Di = BCe ^ ROL(BCo, 1); + Do = BCi ^ ROL(BCu, 1); + Du = BCo ^ ROL(BCa, 1); + + Aba ^= Da; + BCa = Aba; + Age ^= De; + BCe = ROL(Age, 44); + Aki ^= Di; + BCi = ROL(Aki, 43); + Amo ^= Do; + BCo = ROL(Amo, 21); + Asu ^= Du; + BCu = ROL(Asu, 14); + Eba = BCa ^ ((~BCe) & BCi); + Eba ^= (uint64_t) KeccakF_RoundConstants[round]; + Ebe = BCe ^ ((~BCi) & BCo); + Ebi = BCi ^ ((~BCo) & BCu); + Ebo = BCo ^ ((~BCu) & BCa); + Ebu = BCu ^ ((~BCa) & BCe); + + Abo ^= Do; + BCa = ROL(Abo, 28); + Agu ^= Du; + BCe = ROL(Agu, 20); + Aka ^= Da; + BCi = ROL(Aka, 3); + Ame ^= De; + BCo = ROL(Ame, 45); + Asi ^= Di; + BCu = ROL(Asi, 61); + Ega = BCa ^ ((~BCe) & BCi); + Ege = BCe ^ ((~BCi) & BCo); + Egi = BCi ^ ((~BCo) & BCu); + Ego = BCo ^ ((~BCu) & BCa); + Egu = BCu ^ ((~BCa) & BCe); + + Abe ^= De; + BCa = ROL(Abe, 1); + Agi ^= Di; + BCe = ROL(Agi, 6); + Ako ^= Do; + BCi = ROL(Ako, 25); + Amu ^= Du; + BCo = ROL(Amu, 8); + Asa ^= Da; + BCu = ROL(Asa, 18); + Eka = BCa ^ ((~BCe) & BCi); + Eke = BCe ^ ((~BCi) & BCo); + Eki = BCi ^ ((~BCo) & BCu); + Eko = BCo ^ ((~BCu) & BCa); + Eku = BCu ^ ((~BCa) & BCe); + + Abu ^= Du; + BCa = ROL(Abu, 27); + Aga ^= Da; + BCe = ROL(Aga, 36); + Ake ^= De; + BCi = ROL(Ake, 10); + Ami ^= Di; + BCo = ROL(Ami, 15); + Aso ^= Do; + BCu = ROL(Aso, 56); + Ema = BCa ^ ((~BCe) & BCi); + Eme = BCe ^ ((~BCi) & BCo); + Emi = BCi ^ ((~BCo) & BCu); + Emo = BCo ^ ((~BCu) & BCa); + Emu = BCu ^ ((~BCa) & BCe); + + Abi ^= Di; + BCa = ROL(Abi, 62); + Ago ^= Do; + BCe = ROL(Ago, 55); + Aku ^= Du; + BCi = ROL(Aku, 39); + Ama ^= Da; + BCo = ROL(Ama, 41); + Ase ^= De; + BCu = ROL(Ase, 2); + Esa = BCa ^ ((~BCe) & BCi); + Ese = BCe ^ ((~BCi) & BCo); + Esi = BCi ^ ((~BCo) & BCu); + Eso = BCo ^ ((~BCu) & BCa); + Esu = BCu ^ ((~BCa) & BCe); + + // prepareTheta + BCa = Eba ^ Ega ^ Eka ^ Ema ^ Esa; + BCe = Ebe ^ Ege ^ Eke ^ Eme ^ Ese; + BCi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi; + BCo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso; + BCu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu; + + //thetaRhoPiChiIotaPrepareTheta(round+1, E, A) + Da = BCu ^ ROL(BCe, 1); + De = BCa ^ ROL(BCi, 1); + Di = BCe ^ ROL(BCo, 1); + Do = BCi ^ ROL(BCu, 1); + Du = BCo ^ ROL(BCa, 1); + + Eba ^= Da; + BCa = Eba; + Ege ^= De; + BCe = ROL(Ege, 44); + Eki ^= Di; + BCi = ROL(Eki, 43); + Emo ^= Do; + BCo = ROL(Emo, 21); + Esu ^= Du; + BCu = ROL(Esu, 14); + Aba = BCa ^ ((~BCe) & BCi); + Aba ^= (uint64_t) KeccakF_RoundConstants[round + 1]; + Abe = BCe ^ ((~BCi) & BCo); + Abi = BCi ^ ((~BCo) & BCu); + Abo = BCo ^ ((~BCu) & BCa); + Abu = BCu ^ ((~BCa) & BCe); + + Ebo ^= Do; + BCa = ROL(Ebo, 28); + Egu ^= Du; + BCe = ROL(Egu, 20); + Eka ^= Da; + BCi = ROL(Eka, 3); + Eme ^= De; + BCo = ROL(Eme, 45); + Esi ^= Di; + BCu = ROL(Esi, 61); + Aga = BCa ^ ((~BCe) & BCi); + Age = BCe ^ ((~BCi) & BCo); + Agi = BCi ^ ((~BCo) & BCu); + Ago = BCo ^ ((~BCu) & BCa); + Agu = BCu ^ ((~BCa) & BCe); + + Ebe ^= De; + BCa = ROL(Ebe, 1); + Egi ^= Di; + BCe = ROL(Egi, 6); + Eko ^= Do; + BCi = ROL(Eko, 25); + Emu ^= Du; + BCo = ROL(Emu, 8); + Esa ^= Da; + BCu = ROL(Esa, 18); + Aka = BCa ^ ((~BCe) & BCi); + Ake = BCe ^ ((~BCi) & BCo); + Aki = BCi ^ ((~BCo) & BCu); + Ako = BCo ^ ((~BCu) & BCa); + Aku = BCu ^ ((~BCa) & BCe); + + Ebu ^= Du; + BCa = ROL(Ebu, 27); + Ega ^= Da; + BCe = ROL(Ega, 36); + Eke ^= De; + BCi = ROL(Eke, 10); + Emi ^= Di; + BCo = ROL(Emi, 15); + Eso ^= Do; + BCu = ROL(Eso, 56); + Ama = BCa ^ ((~BCe) & BCi); + Ame = BCe ^ ((~BCi) & BCo); + Ami = BCi ^ ((~BCo) & BCu); + Amo = BCo ^ ((~BCu) & BCa); + Amu = BCu ^ ((~BCa) & BCe); + + Ebi ^= Di; + BCa = ROL(Ebi, 62); + Ego ^= Do; + BCe = ROL(Ego, 55); + Eku ^= Du; + BCi = ROL(Eku, 39); + Ema ^= Da; + BCo = ROL(Ema, 41); + Ese ^= De; + BCu = ROL(Ese, 2); + Asa = BCa ^ ((~BCe) & BCi); + Ase = BCe ^ ((~BCi) & BCo); + Asi = BCi ^ ((~BCo) & BCu); + Aso = BCo ^ ((~BCu) & BCa); + Asu = BCu ^ ((~BCa) & BCe); + } + + //copyToState(state, A) + state[0] = Aba; + state[1] = Abe; + state[2] = Abi; + state[3] = Abo; + state[4] = Abu; + state[5] = Aga; + state[6] = Age; + state[7] = Agi; + state[8] = Ago; + state[9] = Agu; + state[10] = Aka; + state[11] = Ake; + state[12] = Aki; + state[13] = Ako; + state[14] = Aku; + state[15] = Ama; + state[16] = Ame; + state[17] = Ami; + state[18] = Amo; + state[19] = Amu; + state[20] = Asa; + state[21] = Ase; + state[22] = Asi; + state[23] = Aso; + state[24] = Asu; + +#undef round +} + +#include <string.h> +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +static void keccak_absorb(uint64_t *s, + unsigned int r, + const unsigned char *m, unsigned long long int mlen, + unsigned char p) { + unsigned long long i; + unsigned char t[200]; + + for (i = 0; i < 25; ++i) + s[i] = 0; + + while (mlen >= r) { + for (i = 0; i < r / 8; ++i) + s[i] ^= load64(m + 8 * i); + + KeccakF1600_StatePermute(s); + mlen -= r; + m += r; + } + + for (i = 0; i < r; ++i) + t[i] = 0; + for (i = 0; i < mlen; ++i) + t[i] = m[i]; + t[i] = p; + t[r - 1] |= 128; + for (i = 0; i < r / 8; ++i) + s[i] ^= load64(t + 8 * i); +} + +void OQS_SHA3_keccak_squeezeblocks(unsigned char *h, unsigned long long int nblocks, + uint64_t *s, + unsigned int r) { + unsigned int i; + while (nblocks > 0) { + KeccakF1600_StatePermute(s); + for (i = 0; i < (r >> 3); i++) { + store64(h + 8 * i, s[i]); + } + h += r; + nblocks--; + } +} + +void OQS_SHA3_sha3256(unsigned char *output, const unsigned char *input, unsigned int inputByteLen) { + uint64_t s[25]; + unsigned char t[SHA3_256_RATE]; + int i; + + keccak_absorb(s, SHA3_256_RATE, input, inputByteLen, 0x06); + OQS_SHA3_keccak_squeezeblocks(t, 1, s, SHA3_256_RATE); + for (i = 0; i < 32; i++) + output[i] = t[i]; +} + +void OQS_SHA3_sha3512(unsigned char *output, const unsigned char *input, unsigned int inputByteLen) { + uint64_t s[25]; + unsigned char t[SHA3_512_RATE]; + int i; + //TODO: not sure about 0x80 + keccak_absorb(s, SHA3_512_RATE, input, inputByteLen, 0x80); + OQS_SHA3_keccak_squeezeblocks(t, 1, s, SHA3_512_RATE); + for (i = 0; i < 64; i++) + output[i] = t[i]; +} + +void OQS_SHA3_shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen) { + keccak_absorb(s, SHAKE128_RATE, input, inputByteLen, 0x1F); +} + +void OQS_SHA3_shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s) { + OQS_SHA3_keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE); +} + +void OQS_SHA3_shake128(unsigned char *output, unsigned long long outlen, + const unsigned char *input, unsigned long long inlen) { + uint64_t s[25]; + unsigned char t[SHAKE128_RATE]; + unsigned long long nblocks = outlen / SHAKE128_RATE; + size_t i; + + for (i = 0; i < 25; ++i) + s[i] = 0; + + /* Absorb input */ + keccak_absorb(s, SHAKE128_RATE, input, inlen, 0x1F); + + /* Squeeze output */ + OQS_SHA3_keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE); + + output += nblocks * SHAKE128_RATE; + outlen -= nblocks * SHAKE128_RATE; + + if (outlen) { + OQS_SHA3_keccak_squeezeblocks(t, 1, s, SHAKE128_RATE); + for (i = 0; i < outlen; i++) + output[i] = t[i]; + } +} + +void OQS_SHA3_cshake128_simple_absorb(uint64_t s[25], + uint16_t cstm, // 2-byte domain separator + const unsigned char *in, unsigned long long inlen) { + unsigned char *sep = (unsigned char *) s; + unsigned int i; + + for (i = 0; i < 25; i++) + s[i] = 0; + + /* Absorb customization (domain-separation) string */ + sep[0] = 0x01; + sep[1] = 0xa8; + sep[2] = 0x01; + sep[3] = 0x00; + sep[4] = 0x01; + sep[5] = 16; // fixed bitlen of cstm + sep[6] = cstm & 0xff; + sep[7] = cstm >> 8; + + KeccakF1600_StatePermute(s); + + /* Absorb input */ + keccak_absorb(s, SHAKE128_RATE, in, inlen, 0x04); +} + +void OQS_SHA3_cshake128_simple_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s) { + OQS_SHA3_keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE); +} + +void OQS_SHA3_cshake128_simple(unsigned char *output, unsigned long long outlen, + uint16_t cstm, // 2-byte domain separator + const unsigned char *in, unsigned long long inlen) { + uint64_t s[25]; + unsigned char t[SHAKE128_RATE]; + unsigned int i; + + OQS_SHA3_cshake128_simple_absorb(s, cstm, in, inlen); + + /* Squeeze output */ + OQS_SHA3_keccak_squeezeblocks(output, outlen / SHAKE128_RATE, s, SHAKE128_RATE); + output += (outlen / SHAKE128_RATE) * SHAKE128_RATE; + + if (outlen % SHAKE128_RATE) { + OQS_SHA3_keccak_squeezeblocks(t, 1, s, SHAKE128_RATE); + for (i = 0; i < outlen % SHAKE128_RATE; i++) + output[i] = t[i]; + } +} diff --git a/crypt/liboqs/crypto/sha3/sha3.h b/crypt/liboqs/crypto/sha3/sha3.h new file mode 100644 index 0000000000000000000000000000000000000000..5b02cb561c93c320cdd3e99ad4d6392a13e893f4 --- /dev/null +++ b/crypt/liboqs/crypto/sha3/sha3.h @@ -0,0 +1,35 @@ +/** + * \file sha3.h + * \brief Header defining the API for OQS SHA3 + */ + +#ifndef __OQS_SHA3_H +#define __OQS_SHA3_H + +#include <stdint.h> + +#define OQS_SHA3_STATESIZE 25 +#define OQS_SHA3_SHAKE128_RATE 168 +#define OQS_SHA3_SHA3_256_RATE 136 +#define OQS_SHA3_SHA3_512_RATE 72 + +void OQS_SHA3_keccak_squeezeblocks(unsigned char *h, unsigned long long int nblocks, uint64_t *s, unsigned int r); +void OQS_SHA3_sha3256(unsigned char *output, const unsigned char *input, unsigned int inputByteLen); +void OQS_SHA3_sha3512(unsigned char *output, const unsigned char *input, unsigned int inputByteLen); + +// SHAKE128 +void OQS_SHA3_shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen); +void OQS_SHA3_shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s); +void OQS_SHA3_shake128(unsigned char *output, unsigned long long outlen, + const unsigned char *input, unsigned long long inlen); + +// cSHAKE128 +void OQS_SHA3_cshake128_simple_absorb(uint64_t *s, + uint16_t cstm, // 2-byte domain separator + const unsigned char *in, unsigned long long inlen); +void OQS_SHA3_cshake128_simple_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s); +void OQS_SHA3_cshake128_simple(unsigned char *output, unsigned long long outlen, + uint16_t cstm, // 2-byte domain separator + const unsigned char *in, unsigned long long inlen); + +#endif diff --git a/crypt/liboqs/ds_benchmark.h b/crypt/liboqs/ds_benchmark.h new file mode 100644 index 0000000000000000000000000000000000000000..fc9efb02bddf8c18971673af3838494711db610e --- /dev/null +++ b/crypt/liboqs/ds_benchmark.h @@ -0,0 +1,240 @@ +/******************************************************************************************** + * ds_benchmark.h: Macros for simple benchmarking of C code. + * + * See instructions for usage below. + * Software originally developed by Douglas Stebila. + * Most recent version at https://gist.github.com/dstebila/6980008ec98209ef6075 + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * For more information, please refer to <http://unlicense.org> + ********************************************************************************************/ + +/** \file ds_benchmark.h + * Macros for simple benchmarking of C code. + */ + +#if 0 +/* example code: timing two operations */ +#include "ds_benchmark.h" +... +DEFINE_TIMER_VARIABLES +INITIALIZE_TIMER +START_TIMER +// your operation here +STOP_TIMER +START_TIMER +// another operation here +STOP_TIMER +FINALIZE_TIMER +PRINT_TIME_HEADER +PRINT_TIMER_AVG("my operation") +PRINT_TIMER_FOOTER + +/* example code: average multiple runs, run for e.g. 30 seconds */ +#include "ds_benchmark.h" +... +PRINT_TIMER_HEADER +TIME_OPERATION_SECONDS(MyFunction(myarg1, myarg2, ...), "my operation", 30) +TIME_OPERATION_SECONDS(MyOtherFunction(myarg3), "my other operation", 30) +PRINT_TIMER_FOOTER + +/* example code: average multiple runs, run for e.g. 100 iterations */ +#include "ds_benchmark.h" +... +PRINT_TIMER_HEADER +TIME_OPERATION_ITERATIONS(MyFunction(myarg1, myarg2, ...), "my operation", 1000) +TIME_OPERATION_ITERATIONS(MyOtherFunction(myarg3), "my other operation", 100) +PRINT_TIMER_FOOTER + +/* For most accurate results: + * - disable hyperthreading a.k.a. hardware multithreading + * (Linux instructions: http://bench.cr.yp.to/supercop.html) + * (Mac OS X instructions: Instruments -> Preferences -> CPUs -> uncheck "Hardware Multi-Threading" + * http://forums.macrumors.com/showthread.php?t=1484684) + * - disable TurboBoost + * (Linux instructions: http://bench.cr.yp.to/supercop.html) + * (Max OS X: use http://www.rugarciap.com/turbo-boost-switcher-for-os-x/) + * - run when the computer is idle (e.g., shut down all other applications, disable network access if possible, ...) + */ +#endif + +#ifndef _DS_BENCHMARK_H +#define _DS_BENCHMARK_H + +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#if !defined(WINDOWS) +#include <sys/time.h> +#endif +#include <math.h> +#include <time.h> + +#if defined(WINDOWS) +#include <Windows.h> + +int gettimeofday(struct timeval *tp, struct timezone *tzp) { + // Note: some broken versions only have 8 trailing zero's, the correct epoch has 9 trailing zero's + static const uint64_t EPOCH = ((uint64_t) 116444736000000000ULL); + + SYSTEMTIME system_time; + FILETIME file_time; + uint64_t time; + + GetSystemTime(&system_time); + SystemTimeToFileTime(&system_time, &file_time); + time = ((uint64_t) file_time.dwLowDateTime); + time += ((uint64_t) file_time.dwHighDateTime) << 32; + tp->tv_sec = (long) ((time - EPOCH) / 10000000L); + tp->tv_usec = (long) (system_time.wMilliseconds * 1000); + return 0; +} +#endif + +static uint64_t rdtsc(void) { +#if defined(WINDOWS) + return __rdtsc(); +#elif defined(__aarch64__) + uint64_t x; + asm volatile("isb; mrs %0, cntvct_el0" + : "=r"(x)); + return x; +#elif defined(__arm__) + struct timespec time; + clock_gettime(CLOCK_REALTIME, &time); + return (int64_t)(time.tv_sec * 1e9 + time.tv_nsec); +#else + uint64_t x; + __asm__ volatile(".byte 0x0f, 0x31" + : "=A"(x)); + return x; +#endif +} + +#define DEFINE_TIMER_VARIABLES \ + volatile uint64_t _bench_cycles_start, _bench_cycles_end; \ + uint64_t _bench_cycles_cumulative = 0; \ + int64_t _bench_cycles_diff; \ + struct timeval _bench_timeval_start, _bench_timeval_end; \ + uint64_t _bench_iterations, _bench_time_cumulative; \ + double _bench_cycles_x, _bench_cycles_mean, _bench_cycles_delta, _bench_cycles_M2, _bench_cycles_stdev; \ + double _bench_time_x, _bench_time_mean, _bench_time_delta, _bench_time_M2, _bench_time_stdev; + +#define INITIALIZE_TIMER \ + _bench_iterations = 0; \ + _bench_cycles_mean = 0.0; \ + _bench_cycles_M2 = 0.0; \ + _bench_time_cumulative = 0; \ + _bench_time_mean = 0.0; \ + _bench_time_M2 = 0.0; + +#define START_TIMER \ + gettimeofday(&_bench_timeval_start, NULL); \ + _bench_cycles_start = rdtsc(); + +// Mean and population standard deviation are calculated in an online way using the algorithm in +// http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm +#define STOP_TIMER \ + _bench_cycles_end = rdtsc(); \ + gettimeofday(&_bench_timeval_end, NULL); \ + _bench_iterations += 1; \ + if (_bench_cycles_end < _bench_cycles_start) { \ + _bench_cycles_end += (uint64_t) 1 << 32; \ + } \ + _bench_cycles_diff = _bench_cycles_end; \ + _bench_cycles_diff -= _bench_cycles_start; \ + _bench_cycles_cumulative += _bench_cycles_diff; \ + _bench_cycles_x = (double) (_bench_cycles_diff); \ + _bench_cycles_delta = _bench_cycles_x - _bench_cycles_mean; \ + _bench_cycles_mean += _bench_cycles_delta / (double) _bench_iterations; \ + _bench_cycles_M2 += _bench_cycles_delta * (_bench_cycles_x - _bench_cycles_mean); \ + _bench_time_x = (double) ((_bench_timeval_end.tv_sec * 1000000 + _bench_timeval_end.tv_usec) - (_bench_timeval_start.tv_sec * 1000000 + _bench_timeval_start.tv_usec)); \ + _bench_time_delta = _bench_time_x - _bench_time_mean; \ + _bench_time_mean += _bench_time_delta / (double) _bench_iterations; \ + _bench_time_M2 += _bench_time_delta * (_bench_time_x - _bench_time_mean); \ + _bench_time_cumulative += _bench_time_x; + +#define FINALIZE_TIMER \ + if (_bench_iterations == 2) { \ + _bench_cycles_stdev = 0.0; \ + } else { \ + _bench_cycles_stdev = sqrt(_bench_cycles_M2 / (double) _bench_iterations); \ + } \ + if (_bench_iterations == 2) { \ + _bench_time_stdev = 0.0; \ + } else { \ + _bench_time_stdev = sqrt(_bench_time_M2 / (double) _bench_iterations); \ + } + +#define PRINT_CURRENT_TIME \ + { \ + char _bench_time_buff[20]; \ + time_t _bench_time_now = time(0); \ + strftime(_bench_time_buff, 20, "%Y-%m-%d %H:%M:%S", localtime(&_bench_time_now)); \ + printf("%s", _bench_time_buff); \ + } + +#define PRINT_TIMER_HEADER \ + printf("Started at "); \ + PRINT_CURRENT_TIME \ + printf("\n"); \ + printf("%-30s | %10s | %14s | %15s | %10s | %16s | %10s\n", "Operation ", "Iterations", "Total time (s)", "Time (us): mean", "pop. stdev", "CPU cycles: mean", "pop. stdev"); \ + printf("%-30s | %10s:| %14s:| %15s:| %10s:| %16s:| %10s:\n", "------------------------------", "----------", "--------------", "---------------", "----------", "----------------", "----------"); +/* colons are used in above to right-align cell contents in Markdown */ + +#define PRINT_TIMER_FOOTER \ + printf("Ended at "); \ + PRINT_CURRENT_TIME \ + printf("\n"); + +#define PRINT_TIMER_AVG(op_name) \ + printf("%-30s | %10" PRIu64 " | %14.3f | %15.3f | %10.3f | %16.0f | %10.0f\n", (op_name), _bench_iterations, ((double) _bench_time_cumulative) / 1000000.0, _bench_time_mean, _bench_time_stdev, ((double) _bench_cycles_cumulative) / (double) _bench_iterations, _bench_cycles_stdev); + +#define TIME_OPERATION_ITERATIONS(op, op_name, it) \ + { \ + DEFINE_TIMER_VARIABLES \ + INITIALIZE_TIMER \ + for (int i = 0; i < (it); i++) { \ + START_TIMER { op; } \ + STOP_TIMER \ + } \ + FINALIZE_TIMER \ + PRINT_TIMER_AVG(op_name) \ + } + +#define TIME_OPERATION_SECONDS(op, op_name, secs) \ + { \ + DEFINE_TIMER_VARIABLES \ + INITIALIZE_TIMER \ + uint64_t _bench_time_goal_usecs = 1000000 * secs; \ + while (_bench_time_cumulative < _bench_time_goal_usecs) { \ + START_TIMER { op; } \ + STOP_TIMER \ + } \ + FINALIZE_TIMER \ + PRINT_TIMER_AVG(op_name) \ + } + +#endif diff --git a/crypt/liboqs/kex/Makefile.am b/crypt/liboqs/kex/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..311a3126c7281036c5e115bc9f9fd3ec8cfe7346 --- /dev/null +++ b/crypt/liboqs/kex/Makefile.am @@ -0,0 +1,8 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = libkex.la + +libkex_la_SOURCES = kex.c + +libkex_la_CPPFLAGS = -I../../include +libkex_la_CPPFLAGS += $(AM_CPPFLAGS) + diff --git a/crypt/liboqs/kex/kex.c b/crypt/liboqs/kex/kex.c new file mode 100644 index 0000000000000000000000000000000000000000..539027c90424c4eab24b58a4274d6a19615dddab --- /dev/null +++ b/crypt/liboqs/kex/kex.c @@ -0,0 +1,141 @@ +#include <assert.h> + +#include <oqs/kex.h> + +#include <oqs/kex_lwe_frodo.h> +#include <oqs/kex_mlwe_kyber.h> +#include <oqs/kex_ntru.h> +#include <oqs/kex_rlwe_bcns15.h> +#include <oqs/kex_rlwe_msrln16.h> +#include <oqs/kex_rlwe_newhope.h> +#include <oqs/kex_sidh_cln16.h> + +#ifdef ENABLE_CODE_MCBITS +#include <oqs/kex_code_mcbits.h> +#endif + +#ifdef ENABLE_SIDH_IQC_REF +#include <oqs/kex_sidh_iqc_ref.h> +#endif +#ifdef ENABLE_KEX_RLWE_NEWHOPE_AVX2 +#include <oqs/kex_rlwe_newhope_avx2.h> +#endif + +#define UNUSED(expr) \ + do { \ + (void) (expr); \ + } while (0) + +OQS_KEX *OQS_KEX_new(OQS_RAND *rand, enum OQS_KEX_alg_name alg_name, const uint8_t *seed, const size_t seed_len, const char *named_parameters) { + + //To disable warnings when the function arguments are not being used depending + //on which algorithm has been disabled + UNUSED(seed); + UNUSED(seed_len); + UNUSED(named_parameters); + + switch (alg_name) { + case OQS_KEX_alg_default: + return OQS_KEX_rlwe_bcns15_new(rand); +#ifdef ENABLE_KEX_LWE_FRODO + case OQS_KEX_alg_lwe_frodo: + return OQS_KEX_lwe_frodo_new_recommended(rand, seed, seed_len, named_parameters); +#else + assert(0); +#endif + case OQS_KEX_alg_code_mcbits: +#ifdef ENABLE_CODE_MCBITS + return OQS_KEX_code_mcbits_new(rand); +#else + assert(0); +#endif +#ifdef ENABLE_KEX_MLWE_KYBER + case OQS_KEX_alg_mlwe_kyber: + return OQS_KEX_mlwe_kyber_new(rand); +#else + assert(0); +#endif +#ifndef DISABLE_NTRU_ON_WINDOWS_BY_DEFAULT +#ifdef ENABLE_KEX_NTRU + case OQS_KEX_alg_ntru: + return OQS_KEX_ntru_new(rand); +#else + assert(0); +#endif +#endif + case OQS_KEX_alg_rlwe_bcns15: + return OQS_KEX_rlwe_bcns15_new(rand); +#ifdef ENABLE_KEX_RLWE_MSRLN16 + case OQS_KEX_alg_rlwe_msrln16: + return OQS_KEX_rlwe_msrln16_new(rand); +#else + assert(0); +#endif +#ifdef ENABLE_KEX_RLWE_NEWHOPE + case OQS_KEX_alg_rlwe_newhope: + return OQS_KEX_rlwe_newhope_new(rand); +#else + assert(0); +#endif +#ifdef ENABLE_KEX_RLWE_NEWHOPE_AVX2 + case OQS_KEX_alg_rlwe_newhope_avx2: + return OQS_KEX_rlwe_newhope_avx2_new(rand); +#else + assert(0); +#endif +#ifdef ENABLE_KEX_SIDH_CLN16 + case OQS_KEX_alg_sidh_cln16: + return OQS_KEX_sidh_cln16_new(rand, NULL); + case OQS_KEX_alg_sidh_cln16_compressed: + return OQS_KEX_sidh_cln16_new(rand, "compressedp751"); +#else + assert(0); +#endif + + case OQS_KEX_alg_sidh_iqc_ref: +#ifdef ENABLE_SIDH_IQC_REF + return OQS_KEX_sidh_iqc_ref_new(rand, named_parameters); +#else + assert(0); +#endif + default: + assert(0); + return NULL; + } +} + +int OQS_KEX_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) { + if (k == NULL) { + return 0; + } else { + return k->alice_0(k, alice_priv, alice_msg, alice_msg_len); + } +} + +int OQS_KEX_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) { + if (k == NULL) { + return 0; + } else { + return k->bob(k, alice_msg, alice_msg_len, bob_msg, bob_msg_len, key, key_len); + } +} + +int OQS_KEX_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) { + if (k == NULL) { + return 0; + } else { + return k->alice_1(k, alice_priv, bob_msg, bob_msg_len, key, key_len); + } +} + +void OQS_KEX_alice_priv_free(OQS_KEX *k, void *alice_priv) { + if (k) { + k->alice_priv_free(k, alice_priv); + } +} + +void OQS_KEX_free(OQS_KEX *k) { + if (k) { + k->free(k); + } +} diff --git a/crypt/liboqs/kex/kex.h b/crypt/liboqs/kex/kex.h new file mode 100644 index 0000000000000000000000000000000000000000..0decdf874b9a1a815738832f183f4a0b6ab479d1 --- /dev/null +++ b/crypt/liboqs/kex/kex.h @@ -0,0 +1,163 @@ +/** + * \file kex.h + * \brief Header defining the API for generic OQS Key exchange + */ + +#ifndef __OQS_KEX_H +#define __OQS_KEX_H + +#include <stddef.h> +#include <stdint.h> + +#include <oqs/rand.h> + +#if !defined(WINDOWS) +#include <oqs/config.h> +#endif + +enum OQS_KEX_alg_name { + OQS_KEX_alg_default, + OQS_KEX_alg_rlwe_bcns15, + OQS_KEX_alg_rlwe_newhope, + OQS_KEX_alg_rlwe_msrln16, + OQS_KEX_alg_lwe_frodo, + OQS_KEX_alg_sidh_cln16, + OQS_KEX_alg_sidh_cln16_compressed, + OQS_KEX_alg_code_mcbits, + OQS_KEX_alg_ntru, + OQS_KEX_alg_sidh_iqc_ref, + OQS_KEX_alg_mlwe_kyber, + OQS_KEX_alg_rlwe_newhope_avx2, +}; + +typedef struct OQS_KEX OQS_KEX; + +/** + * OQS key exchange object + */ +typedef struct OQS_KEX { + + /** + * PRNG + */ + OQS_RAND *rand; + + /** + * Specifies the name of the key exchange method + */ + char *method_name; + + /** + * Classical security in terms of the number of bits provided by the key + * exchange method. + */ + uint16_t estimated_classical_security; + + /** + * Equivalent quantum security in terms of the number of bits provided by the key + * exchange method. + */ + uint16_t estimated_quantum_security; + + /** + * An instance-specific seed, if any. + */ + uint8_t *seed; + + /** + * Size of instance-specific seed, if any. + */ + size_t seed_len; + + /** + * Named parameters for this key exchange method instance, if any. + */ + char *named_parameters; + + /** + * Opaque pointer for passing around instance-specific data + */ + void *params; + + /** + * Opaque pointer for passing around any computation context + */ + void *ctx; + + /** + * Pointer to a function for public and private key generation by Alice. + * + * @param k Key exchange structure + * @param alice_priv Alice's private key + * @param alice_msg Alice's message (public key + optional additional data) + * @param alice_msg_len Alice's message length + * @return 1 on success, or 0 on failure + */ + int (*alice_0)(OQS_KEX *k, void **alive_priv, uint8_t **alice_msg, size_t *alice_msg_len); + + /** + * Pointer to a function for shared key generation by Bob. + * + * @param k Key exchange structure + * @param alice_msg Alice's message (public key + optional additional data) + * @param alice_msg_len Alice's message length + * @param bob_msg Bob's message (public key / encryption of shared key + optional additional data) + * @param bob_msg_len Bob's message length + * @param key Shared key + * @param key_len Shared key length + * @return 1 on success, or 0 on failure + */ + int (*bob)(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len); + + /** + * Pointer to a function for shared key generation by Alice. + * + * @param k Key exchange structure + * @param alice_priv Alice's private key + * @param bob_msg Bob's message (public key / encryption of shared key + optional additional data) + * @param bob_msg_len Bob's message length + * @param key Shared key + * @param key_len Shared key length + * @return 1 on success, or 0 on failure + */ + int (*alice_1)(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len); + + /** + * Pointer to a function for freeing Alice's private key + * + * @param k Key exchange structure + * @param alice_priv Alice's private key + */ + void (*alice_priv_free)(OQS_KEX *k, void *alice_priv); + + /** + * Pointer to a function for freeing the allocated key exchange structure + * + * @param k Key exchange structure + */ + void (*free)(OQS_KEX *k); + +} OQS_KEX; + +/** + * Allocate a new key exchange object. + * + * @param rand Random number generator. + * @param alg_name Algorithm to be instantiated + * @param seed An instance-specific seed, if any, or NULL. + * @param seed_len The length of seed, or 0. + * @param named_parameters Name or description of method-specific parameters + * to use for this instance (as a NULL-terminated C string), + * if any, or NULL. + * @return The object on success, or NULL on failure. + */ +OQS_KEX *OQS_KEX_new(OQS_RAND *rand, enum OQS_KEX_alg_name alg_name, const uint8_t *seed, const size_t seed_len, const char *named_parameters); + +int OQS_KEX_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len); +int OQS_KEX_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len); +int OQS_KEX_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len); + +void OQS_KEX_alice_priv_free(OQS_KEX *k, void *alice_priv); +void OQS_KEX_free(OQS_KEX *k); + +#endif diff --git a/crypt/liboqs/kex/test_kex.c b/crypt/liboqs/kex/test_kex.c new file mode 100644 index 0000000000000000000000000000000000000000..fbb32c42156be858251f3bbf03fbb61433d02604 --- /dev/null +++ b/crypt/liboqs/kex/test_kex.c @@ -0,0 +1,442 @@ +#if defined(WINDOWS) +#pragma warning(disable : 4244 4293) +#endif + +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <oqs/kex.h> +#include <oqs/rand.h> + +#include "../ds_benchmark.h" +#include "../common/common.h" + +struct kex_testcase { + enum OQS_KEX_alg_name alg_name; + unsigned char *seed; + size_t seed_len; + char *named_parameters; + char *id; + int run; + int iter; +}; + +/* Add new testcases here */ +struct kex_testcase kex_testcases[] = { +#ifdef ENABLE_KEX_LWE_FRODO + {OQS_KEX_alg_lwe_frodo, (unsigned char *) "01234567890123456", 16, "recommended", "lwe_frodo_recommended", 0, 100}, +#endif +#ifdef ENABLE_CODE_MCBITS + {OQS_KEX_alg_code_mcbits, NULL, 0, NULL, "code_mcbits", 0, 25}, +#endif +#ifdef ENABLE_KEX_MLWE_KYBER + {OQS_KEX_alg_mlwe_kyber, NULL, 0, NULL, "mlwe_kyber", 0, 100}, +#endif +#ifndef DISABLE_NTRU_ON_WINDOWS_BY_DEFAULT +#ifdef ENABLE_KEX_NTRU + {OQS_KEX_alg_ntru, NULL, 0, NULL, "ntru", 0, 25}, +#endif +#endif + {OQS_KEX_alg_rlwe_bcns15, NULL, 0, NULL, "rlwe_bcns15", 0, 100}, +#ifdef ENABLE_KEX_RLWE_MSRLN16 + {OQS_KEX_alg_rlwe_msrln16, NULL, 0, NULL, "rlwe_msrln16", 0, 100}, +#endif +#ifdef ENABLE_KEX_RLWE_NEWHOPE + {OQS_KEX_alg_rlwe_newhope, NULL, 0, NULL, "rlwe_newhope", 0, 100}, +#endif +#ifdef ENABLE_KEX_SIDH_CLN16 + {OQS_KEX_alg_sidh_cln16, NULL, 0, NULL, "sidh_cln16", 0, 10}, + {OQS_KEX_alg_sidh_cln16_compressed, NULL, 0, NULL, "sidh_cln16_compressed", 0, 10}, +#endif +#ifdef ENABLE_SIDH_IQC_REF + {OQS_KEX_alg_sidh_iqc_ref, NULL, 0, "params771", "sidh_iqc_ref", 0, 10}, +#endif +#ifdef ENABLE_KEX_RLWE_NEWHOPE_AVX2 + {OQS_KEX_alg_rlwe_newhope_avx2, NULL, 0, NULL, "rlwe_newhope_avx2", 0, 100}, +#endif + +}; + +#define KEX_TEST_ITERATIONS 100 +#define KEX_BENCH_SECONDS_DEFAULT 1 + +#define PRINT_HEX_STRING(label, str, len) \ + { \ + printf("%-20s (%4zu bytes): ", (label), (size_t)(len)); \ + for (size_t i = 0; i < (len); i++) { \ + printf("%02X", ((unsigned char *) (str))[i]); \ + } \ + printf("\n"); \ + } + +static int kex_test_correctness(OQS_RAND *rand, enum OQS_KEX_alg_name alg_name, const uint8_t *seed, const size_t seed_len, const char *named_parameters, const int print, unsigned long occurrences[256]) { + + OQS_KEX *kex = NULL; + int rc; + + void *alice_priv = NULL; + uint8_t *alice_msg = NULL; + size_t alice_msg_len; + uint8_t *alice_key = NULL; + size_t alice_key_len; + + uint8_t *bob_msg = NULL; + size_t bob_msg_len; + uint8_t *bob_key = NULL; + size_t bob_key_len; + + /* setup KEX */ + kex = OQS_KEX_new(rand, alg_name, seed, seed_len, named_parameters); + if (kex == NULL) { + eprintf("new_method failed\n"); + goto err; + } + + if (print) { + printf("================================================================================\n"); + printf("Sample computation for key exchange method %s\n", kex->method_name); + printf("================================================================================\n"); + } + + /* Alice's initial message */ + rc = OQS_KEX_alice_0(kex, &alice_priv, &alice_msg, &alice_msg_len); + if (rc != 1) { + eprintf("OQS_KEX_alice_0 failed\n"); + goto err; + } + + if (print) { + PRINT_HEX_STRING("Alice message", alice_msg, alice_msg_len) + } + + /* Bob's response */ + rc = OQS_KEX_bob(kex, alice_msg, alice_msg_len, &bob_msg, &bob_msg_len, &bob_key, &bob_key_len); + if (rc != 1) { + eprintf("OQS_KEX_bob failed\n"); + goto err; + } + + if (print) { + PRINT_HEX_STRING("Bob message", bob_msg, bob_msg_len) + PRINT_HEX_STRING("Bob session key", bob_key, bob_key_len) + } + + /* Alice processes Bob's response */ + rc = OQS_KEX_alice_1(kex, alice_priv, bob_msg, bob_msg_len, &alice_key, &alice_key_len); + if (rc != 1) { + eprintf("OQS_KEX_alice_1 failed\n"); + goto err; + } + + if (print) { + PRINT_HEX_STRING("Alice session key", alice_key, alice_key_len) + } + + /* compare session key lengths and values */ + if (alice_key_len != bob_key_len) { + eprintf("ERROR: Alice's session key and Bob's session key are different lengths (%zu vs %zu)\n", alice_key_len, bob_key_len); + goto err; + } + rc = memcmp(alice_key, bob_key, alice_key_len); + if (rc != 0) { + eprintf("ERROR: Alice's session key and Bob's session key are not equal\n"); + PRINT_HEX_STRING("Alice session key", alice_key, alice_key_len) + PRINT_HEX_STRING("Bob session key", bob_key, bob_key_len) + goto err; + } + if (print) { + printf("Alice and Bob's session keys match.\n"); + printf("\n\n"); + } + + /* record generated bytes for statistical analysis */ + for (size_t i = 0; i < alice_key_len; i++) { + OQS_RAND_test_record_occurrence(alice_key[i], occurrences); + } + + rc = 1; + goto cleanup; + +err: + rc = 0; + +cleanup: + free(alice_msg); + free(alice_key); + free(bob_msg); + free(bob_key); + OQS_KEX_alice_priv_free(kex, alice_priv); + OQS_KEX_free(kex); + + return rc; +} + +static int kex_test_correctness_wrapper(OQS_RAND *rand, enum OQS_KEX_alg_name alg_name, const uint8_t *seed, const size_t seed_len, const char *named_parameters, int iterations, bool quiet) { + OQS_KEX *kex = NULL; + int ret; + + unsigned long occurrences[256]; + for (int i = 0; i < 256; i++) { + occurrences[i] = 0; + } + + ret = kex_test_correctness(rand, alg_name, seed, seed_len, named_parameters, quiet ? 0 : 1, occurrences); + + if (ret != 1) { + goto err; + } + + /* setup KEX */ + kex = OQS_KEX_new(rand, alg_name, seed, seed_len, named_parameters); + if (kex == NULL) { + goto err; + } + + printf("================================================================================\n"); + printf("Testing correctness and randomness of key exchange method %s (params=%s) for %d iterations\n", + kex->method_name, named_parameters, iterations); + printf("================================================================================\n"); + for (int i = 0; i < iterations; i++) { + ret = kex_test_correctness(rand, alg_name, seed, seed_len, named_parameters, 0, occurrences); + if (ret != 1) { + goto err; + } + } + printf("All session keys matched.\n"); + OQS_RAND_report_statistics(occurrences, ""); + printf("\n\n"); + + ret = 1; + goto cleanup; + +err: + ret = 0; + +cleanup: + OQS_KEX_free(kex); + + return ret; +} + +static void cleanup_alice_0(OQS_KEX *kex, void *alice_priv, uint8_t *alice_msg) { + free(alice_msg); + OQS_KEX_alice_priv_free(kex, alice_priv); +} + +static void cleanup_bob(uint8_t *bob_msg, uint8_t *bob_key) { + free(bob_msg); + free(bob_key); +} + +static int kex_bench_wrapper(OQS_RAND *rand, enum OQS_KEX_alg_name alg_name, const uint8_t *seed, const size_t seed_len, const char *named_parameters, const size_t seconds) { + + OQS_KEX *kex = NULL; + int rc; + + void *alice_priv = NULL; + uint8_t *alice_msg = NULL; + size_t alice_msg_len; + uint8_t *alice_key = NULL; + size_t alice_key_len; + + uint8_t *bob_msg = NULL; + size_t bob_msg_len; + uint8_t *bob_key = NULL; + size_t bob_key_len; + + /* setup KEX */ + kex = OQS_KEX_new(rand, alg_name, seed, seed_len, named_parameters); + if (kex == NULL) { + eprintf("new_method failed\n"); + goto err; + } + printf("%-30s | %10s | %14s | %15s | %10s | %16s | %10s\n", kex->method_name, "", "", "", "", "", ""); + + TIME_OPERATION_SECONDS({ OQS_KEX_alice_0(kex, &alice_priv, &alice_msg, &alice_msg_len); cleanup_alice_0(kex, alice_priv, alice_msg); }, "alice 0", seconds); + + OQS_KEX_alice_0(kex, &alice_priv, &alice_msg, &alice_msg_len); + TIME_OPERATION_SECONDS({ OQS_KEX_bob(kex, alice_msg, alice_msg_len, &bob_msg, &bob_msg_len, &bob_key, &bob_key_len); cleanup_bob(bob_msg, bob_key); }, "bob", seconds); + + OQS_KEX_bob(kex, alice_msg, alice_msg_len, &bob_msg, &bob_msg_len, &bob_key, &bob_key_len); + TIME_OPERATION_SECONDS({ OQS_KEX_alice_1(kex, alice_priv, bob_msg, bob_msg_len, &alice_key, &alice_key_len); free(alice_key); }, "alice 1", seconds); + alice_key = NULL; + + printf("Communication (bytes): A->B: %zu, B->A: %zu, total: %zu; classical/quantum security bits [%u:%u] \n", alice_msg_len, bob_msg_len, alice_msg_len + bob_msg_len, kex->estimated_classical_security, kex->estimated_quantum_security); + + rc = 1; + goto cleanup; + +err: + rc = 0; + +cleanup: + free(alice_msg); + free(alice_key); + free(bob_msg); + free(bob_key); + OQS_KEX_alice_priv_free(kex, alice_priv); + OQS_KEX_free(kex); + + return rc; +} + +static int kex_mem_bench_wrapper(OQS_RAND *rand, enum OQS_KEX_alg_name alg_name, const uint8_t *seed, const size_t seed_len, const char *named_parameters) { + + OQS_KEX *kex = NULL; + int rc; + + void *alice_priv = NULL; + uint8_t *alice_msg = NULL; + size_t alice_msg_len; + uint8_t *alice_key = NULL; + size_t alice_key_len; + + uint8_t *bob_msg = NULL; + size_t bob_msg_len; + uint8_t *bob_key = NULL; + size_t bob_key_len; + + kex = OQS_KEX_new(rand, alg_name, seed, seed_len, named_parameters); + if (kex == NULL) { + fprintf(stderr, "new_method failed\n"); + goto err; + } + + printf("running %s..\n", kex->method_name); + + OQS_KEX_alice_0(kex, &alice_priv, &alice_msg, &alice_msg_len); + OQS_KEX_bob(kex, alice_msg, alice_msg_len, &bob_msg, &bob_msg_len, &bob_key, &bob_key_len); + OQS_KEX_alice_1(kex, alice_priv, bob_msg, bob_msg_len, &alice_key, &alice_key_len); + + rc = 1; + goto cleanup; + +err: + rc = 0; + +cleanup: + free(alice_msg); + free(alice_key); + free(bob_msg); + free(bob_key); + OQS_KEX_alice_priv_free(kex, alice_priv); + OQS_KEX_free(kex); + + return rc; +} + +void print_help() { + printf("Usage: ./test_kex [options] [algorithms]\n"); + printf("\nOptions:\n"); + printf(" --quiet, -q\n"); + printf(" Less verbose output\n"); + printf(" --bench, -b\n"); + printf(" Run benchmarks\n"); + printf(" --seconds -s [SECONDS]\n"); + printf(" Number of seconds to run benchmarks (default==%d)\n", KEX_BENCH_SECONDS_DEFAULT); + printf(" --mem-bench\n"); + printf(" Run memory benchmarks (run once and allocate only what is required)\n"); + printf("\nalgorithms:\n"); + size_t kex_testcases_len = sizeof(kex_testcases) / sizeof(struct kex_testcase); + for (size_t i = 0; i < kex_testcases_len; i++) { + printf(" %s\n", kex_testcases[i].id); + } +} + +int main(int argc, char **argv) { + + int success = 1; + bool run_all = true; + bool quiet = false; + bool bench = false; + bool mem_bench = false; + size_t kex_testcases_len = sizeof(kex_testcases) / sizeof(struct kex_testcase); + size_t kex_bench_seconds = KEX_BENCH_SECONDS_DEFAULT; + for (int i = 1; i < argc; i++) { + if (argv[i][0] == '-') { + if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "-help") == 0) || (strcmp(argv[i], "--help") == 0)) { + print_help(); + return EXIT_SUCCESS; + } else if (strcmp(argv[i], "--quiet") == 0 || strcmp(argv[i], "-q") == 0) { + quiet = true; + } else if (strcmp(argv[i], "--bench") == 0 || strcmp(argv[i], "-b") == 0) { + bench = true; + } else if (strcmp(argv[i], "--seconds") == 0 || strcmp(argv[i], "-s") == 0) { + if (++i == argc) { + print_help(); + return EXIT_SUCCESS; + } + char *end; + int kex_bench_seconds_input = strtol(argv[i], &end, 10); + if (kex_bench_seconds_input < 1) { + print_help(); + return EXIT_SUCCESS; + } + kex_bench_seconds = kex_bench_seconds_input; + } else if ((strcmp(argv[i], "--mem-bench") == 0 || strcmp(argv[i], "-m") == 0)) { + mem_bench = true; + } + } else { + run_all = false; + for (size_t j = 0; j < kex_testcases_len; j++) { + if (strcmp(argv[i], kex_testcases[j].id) == 0) { + kex_testcases[j].run = 1; + } + } + } + } + + /* setup RAND */ + OQS_RAND *rand = OQS_RAND_new(OQS_RAND_alg_urandom_chacha20); + if (rand == NULL) { + goto err; + } + + if (mem_bench) { + for (size_t i = 0; i < kex_testcases_len; i++) { + if (run_all || kex_testcases[i].run == 1) { + success = kex_mem_bench_wrapper(rand, kex_testcases[i].alg_name, kex_testcases[i].seed, kex_testcases[i].seed_len, kex_testcases[i].named_parameters); + } + if (success != 1) { + goto err; + } + } + printf("memory benchmarks done, exiting..\n"); + success = 1; + goto cleanup; + } + + for (size_t i = 0; i < kex_testcases_len; i++) { + if (run_all || kex_testcases[i].run == 1) { + int num_iter = kex_testcases[i].iter; + success = kex_test_correctness_wrapper(rand, kex_testcases[i].alg_name, kex_testcases[i].seed, kex_testcases[i].seed_len, kex_testcases[i].named_parameters, num_iter, quiet); + } + if (success != 1) { + goto err; + } + } + + if (bench) { + PRINT_TIMER_HEADER + for (size_t i = 0; i < kex_testcases_len; i++) { + if (run_all || kex_testcases[i].run == 1) { + kex_bench_wrapper(rand, kex_testcases[i].alg_name, kex_testcases[i].seed, kex_testcases[i].seed_len, kex_testcases[i].named_parameters, kex_bench_seconds); + } + } + PRINT_TIMER_FOOTER + } + + success = 1; + goto cleanup; + +err: + success = 0; + eprintf("ERROR!\n"); + +cleanup: + OQS_RAND_free(rand); + + return (success == 1) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/crypt/liboqs/kex_code_mcbits/LICENSE.txt b/crypt/liboqs/kex_code_mcbits/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..cd487c9b5f55fb037d92c8f0d509df66d4cf06fc --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/LICENSE.txt @@ -0,0 +1,7 @@ +The files in this directory and its subdirectories (except kex_code_mcbits.*) +were originally written by Daniel J. Bernstein, Tung Chou, and Peter Schwabe +(https://www.win.tue.nl/~tchou/mcbits/). + +According to the distribution website (https://www.win.tue.nl/~tchou/mcbits/): + +"The software is in the public domain." diff --git a/crypt/liboqs/kex_code_mcbits/Makefile.am b/crypt/liboqs/kex_code_mcbits/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..2e29344b0db46d02c60f5b558286eeae5246676c --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/Makefile.am @@ -0,0 +1,9 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = libmcbits.la + +libmcbits_la_SOURCES = external/operations.c kex_code_mcbits.c + +libmcbits_la_CPPFLAGS = -I../../include -I${SODIUM_DIR}/include + +libmcbits_la_CPPFLAGS += $(AM_CPPFLAGS) + diff --git a/crypt/liboqs/kex_code_mcbits/external/api.h b/crypt/liboqs/kex_code_mcbits/external/api.h new file mode 100755 index 0000000000000000000000000000000000000000..5d9710852ac485320dc8d96507ac2a2daa75a355 --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/api.h @@ -0,0 +1,5 @@ +#define CRYPTO_SECRETKEYBYTES 5984 +#define CRYPTO_PUBLICKEYBYTES 311736 +#define CRYPTO_BYTES 109 + +#define CRYPTO_VERSION "1.0" diff --git a/crypt/liboqs/kex_code_mcbits/external/benes.c b/crypt/liboqs/kex_code_mcbits/external/benes.c new file mode 100644 index 0000000000000000000000000000000000000000..053c4f933d748dafc84df8601ace733959aac2c5 --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/benes.c @@ -0,0 +1,64 @@ +static void func(uint64_t *bs, uint64_t *cond_ptr, int low) { + int i, j, x, y; + + int high = 5 - low; + + uint64_t diff; + + // + + for (j = 0; j < (1 << low); j++) { + x = (0 << low) + j; + y = (1 << low) + j; + + for (i = 0; i < (1 << high); i++) { + diff = bs[x] ^ bs[y]; + diff &= (*cond_ptr++); + bs[x] ^= diff; + bs[y] ^= diff; + + x += (1 << (low + 1)); + y += (1 << (low + 1)); + } + } +} + +static void benes_compact(uint64_t *bs, uint64_t *cond, int rev) { + uint64_t *cond_ptr; + int inc, low; + + // + + if (rev == 0) { + inc = 32; + cond_ptr = cond; + } else { + inc = -32; + cond_ptr = &cond[704]; + } + + // + + for (low = 0; low <= 5; low++) { + func(bs, cond_ptr, low); + cond_ptr += inc; + } + + transpose_64x64_compact(bs, bs); + + for (low = 0; low <= 5; low++) { + func(bs, cond_ptr, low); + cond_ptr += inc; + } + for (low = 4; low >= 0; low--) { + func(bs, cond_ptr, low); + cond_ptr += inc; + } + + transpose_64x64_compact(bs, bs); + + for (low = 5; low >= 0; low--) { + func(bs, cond_ptr, low); + cond_ptr += inc; + } +} diff --git a/crypt/liboqs/kex_code_mcbits/external/bm.c b/crypt/liboqs/kex_code_mcbits/external/bm.c new file mode 100644 index 0000000000000000000000000000000000000000..cd0f23b76443cfaf82c29f7bf81a5e52b0c6dba2 --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/bm.c @@ -0,0 +1,135 @@ +typedef uint16_t gf; + +static void into_vec(uint64_t *out, gf in) { + int i; + + for (i = 0; i < GFBITS; i++) { + out[i] = (in >> i) & 1; + out[i] = -out[i]; + } +} + +static gf vec_reduce(uint64_t *prod) { + int i; + + uint64_t tmp[GFBITS]; + gf ret = 0; + + for (i = 0; i < GFBITS; i++) { + tmp[i] = prod[i]; + } + + for (i = GFBITS - 1; i >= 0; i--) + tmp[i] ^= (tmp[i] >> 32); + for (i = GFBITS - 1; i >= 0; i--) + tmp[i] ^= (tmp[i] >> 16); + for (i = GFBITS - 1; i >= 0; i--) + tmp[i] ^= (tmp[i] >> 8); + for (i = GFBITS - 1; i >= 0; i--) + tmp[i] ^= (tmp[i] >> 4); + for (i = GFBITS - 1; i >= 0; i--) { + ret <<= 1; + ret |= (0x6996 >> (tmp[i] & 0xF)) & 1; + }; + + return ret; +} + +static uint64_t mask_nonzero_64bit(gf a) { + uint64_t ret = a; + + ret -= 1; + ret >>= 63; + ret -= 1; + + return ret; +} + +static uint64_t mask_leq_64bit(uint16_t a, uint16_t b) { + uint64_t a_tmp = a; + uint64_t b_tmp = b; + uint64_t ret = b_tmp - a_tmp; + + ret >>= 63; + ret -= 1; + + return ret; +} + +static void vec_cmov(uint64_t *out, uint64_t *in, uint64_t mask) { + int i; + + for (i = 0; i < GFBITS; i++) + out[i] = (in[i] & mask) | (out[i] & ~mask); +} + +static void bm(uint64_t out[GFBITS], uint64_t in[][GFBITS]) { + uint16_t i; + uint16_t N, L; + + uint64_t C[GFBITS], B[GFBITS], prod[GFBITS]; + uint64_t in_tmp[GFBITS], r_vec[GFBITS], C_tmp[GFBITS]; + + uint64_t mask_nz, mask_leq; + uint16_t mask_16b; + + gf d, b, b_inv, r; + + // init + + C[0] = 1; + C[0] <<= 63; + B[0] = 1; + B[0] <<= 62; + + for (i = 1; i < GFBITS; i++) + B[i] = C[i] = 0; + + b = 1; + L = 0; + + // + + for (N = 0; N < SYS_T * 2; N++) { + // computing d + + if (N < 64) + for (i = 0; i < GFBITS; i++) + in_tmp[i] = in[0][i] << (63 - N); + + else + for (i = 0; i < GFBITS; i++) + in_tmp[i] = (in[0][i] >> (N - 63)) | (in[1][i] << (127 - N)); + + vec_mul(prod, C, in_tmp); + d = vec_reduce(prod); + + // 3 cases + + b_inv = gf_inv(b); + r = gf_mul(d, b_inv); + into_vec(r_vec, r); + vec_mul(C_tmp, r_vec, B); + + for (i = 0; i < GFBITS; i++) + C_tmp[i] ^= C[i]; + + mask_nz = mask_nonzero_64bit(d); + mask_leq = mask_leq_64bit(L * 2, N); + mask_16b = (mask_nz & mask_leq) & 0xFFFF; + + vec_cmov(B, C, mask_nz & mask_leq); + vec_copy(C, C_tmp); + + b = (d & mask_16b) | (b & ~mask_16b); + L = ((N + 1 - L) & mask_16b) | (L & ~mask_16b); + + for (i = 0; i < GFBITS; i++) + B[i] >>= 1; + } + + vec_copy(out, C); + + for (i = 0; i < GFBITS; i++) + out[i] >>= 64 - (SYS_T + 1); +} diff --git a/crypt/liboqs/kex_code_mcbits/external/consts.data b/crypt/liboqs/kex_code_mcbits/external/consts.data new file mode 100755 index 0000000000000000000000000000000000000000..a728344f04e0c04664655795e1fb9be363bc7863 --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/consts.data @@ -0,0 +1,888 @@ +//64 +{ + 0XF00F0FF0F00F0FF0, + 0XF0F00F0F0F0FF0F0, + 0X0FF00FF00FF00FF0, + 0XAA5555AAAA5555AA, + 0XF00F0FF0F00F0FF0, + 0X33CCCC33CC3333CC, + 0XFFFF0000FFFF0000, + 0XCC33CC3333CC33CC, + 0X33CC33CC33CC33CC, + 0X5A5A5A5A5A5A5A5A, + 0XFF00FF00FF00FF00, + 0XF00F0FF0F00F0FF0, +}, +//128 +{ + 0X3C3C3C3C3C3C3C3C, + 0XF0F0F0F0F0F0F0F0, + 0X5555AAAA5555AAAA, + 0XCC3333CCCC3333CC, + 0XC33CC33CC33CC33C, + 0X55555555AAAAAAAA, + 0X33333333CCCCCCCC, + 0X00FF00FFFF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0X0000000000000000, + 0X0000FFFFFFFF0000, + 0XF0F00F0F0F0FF0F0, +}, +{ + 0X3C3C3C3C3C3C3C3C, + 0X0F0F0F0F0F0F0F0F, + 0XAAAA5555AAAA5555, + 0XCC3333CCCC3333CC, + 0XC33CC33CC33CC33C, + 0X55555555AAAAAAAA, + 0X33333333CCCCCCCC, + 0XFF00FF0000FF00FF, + 0X0F0F0F0F0F0F0F0F, + 0X0000000000000000, + 0X0000FFFFFFFF0000, + 0XF0F00F0F0F0FF0F0, +}, +//256 +{ + 0XAA55AA5555AA55AA, + 0XCC33CC3333CC33CC, + 0X33CCCC33CC3333CC, + 0X55555555AAAAAAAA, + 0XFF0000FF00FFFF00, + 0X3CC33CC3C33CC33C, + 0X5555AAAA5555AAAA, + 0X0FF00FF00FF00FF0, + 0XCCCC33333333CCCC, + 0XF0F0F0F0F0F0F0F0, + 0X00FFFF0000FFFF00, + 0XC33CC33CC33CC33C, +}, +{ + 0X55AA55AAAA55AA55, + 0XCC33CC3333CC33CC, + 0XCC3333CC33CCCC33, + 0X55555555AAAAAAAA, + 0XFF0000FF00FFFF00, + 0XC33CC33C3CC33CC3, + 0XAAAA5555AAAA5555, + 0XF00FF00FF00FF00F, + 0X3333CCCCCCCC3333, + 0X0F0F0F0F0F0F0F0F, + 0XFF0000FFFF0000FF, + 0XC33CC33CC33CC33C, +}, +{ + 0XAA55AA5555AA55AA, + 0X33CC33CCCC33CC33, + 0XCC3333CC33CCCC33, + 0X55555555AAAAAAAA, + 0X00FFFF00FF0000FF, + 0X3CC33CC3C33CC33C, + 0X5555AAAA5555AAAA, + 0X0FF00FF00FF00FF0, + 0X3333CCCCCCCC3333, + 0XF0F0F0F0F0F0F0F0, + 0X00FFFF0000FFFF00, + 0XC33CC33CC33CC33C, +}, +{ + 0X55AA55AAAA55AA55, + 0X33CC33CCCC33CC33, + 0X33CCCC33CC3333CC, + 0X55555555AAAAAAAA, + 0X00FFFF00FF0000FF, + 0XC33CC33C3CC33CC3, + 0XAAAA5555AAAA5555, + 0XF00FF00FF00FF00F, + 0XCCCC33333333CCCC, + 0X0F0F0F0F0F0F0F0F, + 0XFF0000FFFF0000FF, + 0XC33CC33CC33CC33C, +}, +//512 +{ + 0X6699669999669966, + 0X33CCCC33CC3333CC, + 0XA5A5A5A55A5A5A5A, + 0X3C3CC3C3C3C33C3C, + 0XF00FF00F0FF00FF0, + 0X55AA55AA55AA55AA, + 0X3C3CC3C3C3C33C3C, + 0X0F0F0F0FF0F0F0F0, + 0X55AA55AA55AA55AA, + 0X33CCCC33CC3333CC, + 0XF0F0F0F0F0F0F0F0, + 0XA55A5AA55AA5A55A, +}, +{ + 0X9966996666996699, + 0X33CCCC33CC3333CC, + 0XA5A5A5A55A5A5A5A, + 0X3C3CC3C3C3C33C3C, + 0X0FF00FF0F00FF00F, + 0XAA55AA55AA55AA55, + 0X3C3CC3C3C3C33C3C, + 0XF0F0F0F00F0F0F0F, + 0XAA55AA55AA55AA55, + 0XCC3333CC33CCCC33, + 0X0F0F0F0F0F0F0F0F, + 0XA55A5AA55AA5A55A, +}, +{ + 0X6699669999669966, + 0X33CCCC33CC3333CC, + 0X5A5A5A5AA5A5A5A5, + 0XC3C33C3C3C3CC3C3, + 0X0FF00FF0F00FF00F, + 0XAA55AA55AA55AA55, + 0XC3C33C3C3C3CC3C3, + 0X0F0F0F0FF0F0F0F0, + 0XAA55AA55AA55AA55, + 0X33CCCC33CC3333CC, + 0XF0F0F0F0F0F0F0F0, + 0XA55A5AA55AA5A55A, +}, +{ + 0X9966996666996699, + 0X33CCCC33CC3333CC, + 0X5A5A5A5AA5A5A5A5, + 0XC3C33C3C3C3CC3C3, + 0XF00FF00F0FF00FF0, + 0X55AA55AA55AA55AA, + 0XC3C33C3C3C3CC3C3, + 0XF0F0F0F00F0F0F0F, + 0X55AA55AA55AA55AA, + 0XCC3333CC33CCCC33, + 0X0F0F0F0F0F0F0F0F, + 0XA55A5AA55AA5A55A, +}, +{ + 0X6699669999669966, + 0XCC3333CC33CCCC33, + 0X5A5A5A5AA5A5A5A5, + 0X3C3CC3C3C3C33C3C, + 0X0FF00FF0F00FF00F, + 0X55AA55AA55AA55AA, + 0X3C3CC3C3C3C33C3C, + 0X0F0F0F0FF0F0F0F0, + 0X55AA55AA55AA55AA, + 0X33CCCC33CC3333CC, + 0XF0F0F0F0F0F0F0F0, + 0XA55A5AA55AA5A55A, +}, +{ + 0X9966996666996699, + 0XCC3333CC33CCCC33, + 0X5A5A5A5AA5A5A5A5, + 0X3C3CC3C3C3C33C3C, + 0XF00FF00F0FF00FF0, + 0XAA55AA55AA55AA55, + 0X3C3CC3C3C3C33C3C, + 0XF0F0F0F00F0F0F0F, + 0XAA55AA55AA55AA55, + 0XCC3333CC33CCCC33, + 0X0F0F0F0F0F0F0F0F, + 0XA55A5AA55AA5A55A, +}, +{ + 0X6699669999669966, + 0XCC3333CC33CCCC33, + 0XA5A5A5A55A5A5A5A, + 0XC3C33C3C3C3CC3C3, + 0XF00FF00F0FF00FF0, + 0XAA55AA55AA55AA55, + 0XC3C33C3C3C3CC3C3, + 0X0F0F0F0FF0F0F0F0, + 0XAA55AA55AA55AA55, + 0X33CCCC33CC3333CC, + 0XF0F0F0F0F0F0F0F0, + 0XA55A5AA55AA5A55A, +}, +{ + 0X9966996666996699, + 0XCC3333CC33CCCC33, + 0XA5A5A5A55A5A5A5A, + 0XC3C33C3C3C3CC3C3, + 0X0FF00FF0F00FF00F, + 0X55AA55AA55AA55AA, + 0XC3C33C3C3C3CC3C3, + 0XF0F0F0F00F0F0F0F, + 0X55AA55AA55AA55AA, + 0XCC3333CC33CCCC33, + 0X0F0F0F0F0F0F0F0F, + 0XA55A5AA55AA5A55A, +}, +//1024 +{ + 0X9669699696696996, + 0X6996699669966996, + 0X6996699669966996, + 0X00FFFF0000FFFF00, + 0XFF00FF00FF00FF00, + 0XF00FF00F0FF00FF0, + 0XF0F00F0F0F0FF0F0, + 0XC33C3CC33CC3C33C, + 0XC33C3CC33CC3C33C, + 0XA55A5AA55AA5A55A, + 0XC33C3CC33CC3C33C, + 0X3CC3C33C3CC3C33C, +}, +{ + 0X9669699696696996, + 0X6996699669966996, + 0X6996699669966996, + 0X00FFFF0000FFFF00, + 0X00FF00FF00FF00FF, + 0X0FF00FF0F00FF00F, + 0X0F0FF0F0F0F00F0F, + 0X3CC3C33CC33C3CC3, + 0X3CC3C33CC33C3CC3, + 0XA55A5AA55AA5A55A, + 0XC33C3CC33CC3C33C, + 0X3CC3C33C3CC3C33C, +}, +{ + 0X9669699696696996, + 0X6996699669966996, + 0X6996699669966996, + 0XFF0000FFFF0000FF, + 0X00FF00FF00FF00FF, + 0X0FF00FF0F00FF00F, + 0X0F0FF0F0F0F00F0F, + 0XC33C3CC33CC3C33C, + 0XC33C3CC33CC3C33C, + 0XA55A5AA55AA5A55A, + 0XC33C3CC33CC3C33C, + 0X3CC3C33C3CC3C33C, +}, +{ + 0X9669699696696996, + 0X6996699669966996, + 0X6996699669966996, + 0XFF0000FFFF0000FF, + 0XFF00FF00FF00FF00, + 0XF00FF00F0FF00FF0, + 0XF0F00F0F0F0FF0F0, + 0X3CC3C33CC33C3CC3, + 0X3CC3C33CC33C3CC3, + 0XA55A5AA55AA5A55A, + 0XC33C3CC33CC3C33C, + 0X3CC3C33C3CC3C33C, +}, +{ + 0X9669699696696996, + 0X6996699669966996, + 0X9669966996699669, + 0XFF0000FFFF0000FF, + 0X00FF00FF00FF00FF, + 0XF00FF00F0FF00FF0, + 0XF0F00F0F0F0FF0F0, + 0XC33C3CC33CC3C33C, + 0XC33C3CC33CC3C33C, + 0XA55A5AA55AA5A55A, + 0XC33C3CC33CC3C33C, + 0X3CC3C33C3CC3C33C, +}, +{ + 0X9669699696696996, + 0X6996699669966996, + 0X9669966996699669, + 0XFF0000FFFF0000FF, + 0XFF00FF00FF00FF00, + 0X0FF00FF0F00FF00F, + 0X0F0FF0F0F0F00F0F, + 0X3CC3C33CC33C3CC3, + 0X3CC3C33CC33C3CC3, + 0XA55A5AA55AA5A55A, + 0XC33C3CC33CC3C33C, + 0X3CC3C33C3CC3C33C, +}, +{ + 0X9669699696696996, + 0X6996699669966996, + 0X9669966996699669, + 0X00FFFF0000FFFF00, + 0XFF00FF00FF00FF00, + 0X0FF00FF0F00FF00F, + 0X0F0FF0F0F0F00F0F, + 0XC33C3CC33CC3C33C, + 0XC33C3CC33CC3C33C, + 0XA55A5AA55AA5A55A, + 0XC33C3CC33CC3C33C, + 0X3CC3C33C3CC3C33C, +}, +{ + 0X9669699696696996, + 0X6996699669966996, + 0X9669966996699669, + 0X00FFFF0000FFFF00, + 0X00FF00FF00FF00FF, + 0XF00FF00F0FF00FF0, + 0XF0F00F0F0F0FF0F0, + 0X3CC3C33CC33C3CC3, + 0X3CC3C33CC33C3CC3, + 0XA55A5AA55AA5A55A, + 0XC33C3CC33CC3C33C, + 0X3CC3C33C3CC3C33C, +}, +{ + 0X9669699696696996, + 0X9669966996699669, + 0X9669966996699669, + 0X00FFFF0000FFFF00, + 0XFF00FF00FF00FF00, + 0XF00FF00F0FF00FF0, + 0XF0F00F0F0F0FF0F0, + 0XC33C3CC33CC3C33C, + 0XC33C3CC33CC3C33C, + 0XA55A5AA55AA5A55A, + 0XC33C3CC33CC3C33C, + 0X3CC3C33C3CC3C33C, +}, +{ + 0X9669699696696996, + 0X9669966996699669, + 0X9669966996699669, + 0X00FFFF0000FFFF00, + 0X00FF00FF00FF00FF, + 0X0FF00FF0F00FF00F, + 0X0F0FF0F0F0F00F0F, + 0X3CC3C33CC33C3CC3, + 0X3CC3C33CC33C3CC3, + 0XA55A5AA55AA5A55A, + 0XC33C3CC33CC3C33C, + 0X3CC3C33C3CC3C33C, +}, +{ + 0X9669699696696996, + 0X9669966996699669, + 0X9669966996699669, + 0XFF0000FFFF0000FF, + 0X00FF00FF00FF00FF, + 0X0FF00FF0F00FF00F, + 0X0F0FF0F0F0F00F0F, + 0XC33C3CC33CC3C33C, + 0XC33C3CC33CC3C33C, + 0XA55A5AA55AA5A55A, + 0XC33C3CC33CC3C33C, + 0X3CC3C33C3CC3C33C, +}, +{ + 0X9669699696696996, + 0X9669966996699669, + 0X9669966996699669, + 0XFF0000FFFF0000FF, + 0XFF00FF00FF00FF00, + 0XF00FF00F0FF00FF0, + 0XF0F00F0F0F0FF0F0, + 0X3CC3C33CC33C3CC3, + 0X3CC3C33CC33C3CC3, + 0XA55A5AA55AA5A55A, + 0XC33C3CC33CC3C33C, + 0X3CC3C33C3CC3C33C, +}, +{ + 0X9669699696696996, + 0X9669966996699669, + 0X6996699669966996, + 0XFF0000FFFF0000FF, + 0X00FF00FF00FF00FF, + 0XF00FF00F0FF00FF0, + 0XF0F00F0F0F0FF0F0, + 0XC33C3CC33CC3C33C, + 0XC33C3CC33CC3C33C, + 0XA55A5AA55AA5A55A, + 0XC33C3CC33CC3C33C, + 0X3CC3C33C3CC3C33C, +}, +{ + 0X9669699696696996, + 0X9669966996699669, + 0X6996699669966996, + 0XFF0000FFFF0000FF, + 0XFF00FF00FF00FF00, + 0X0FF00FF0F00FF00F, + 0X0F0FF0F0F0F00F0F, + 0X3CC3C33CC33C3CC3, + 0X3CC3C33CC33C3CC3, + 0XA55A5AA55AA5A55A, + 0XC33C3CC33CC3C33C, + 0X3CC3C33C3CC3C33C, +}, +{ + 0X9669699696696996, + 0X9669966996699669, + 0X6996699669966996, + 0X00FFFF0000FFFF00, + 0XFF00FF00FF00FF00, + 0X0FF00FF0F00FF00F, + 0X0F0FF0F0F0F00F0F, + 0XC33C3CC33CC3C33C, + 0XC33C3CC33CC3C33C, + 0XA55A5AA55AA5A55A, + 0XC33C3CC33CC3C33C, + 0X3CC3C33C3CC3C33C, +}, +{ + 0X9669699696696996, + 0X9669966996699669, + 0X6996699669966996, + 0X00FFFF0000FFFF00, + 0X00FF00FF00FF00FF, + 0XF00FF00F0FF00FF0, + 0XF0F00F0F0F0FF0F0, + 0X3CC3C33CC33C3CC3, + 0X3CC3C33CC33C3CC3, + 0XA55A5AA55AA5A55A, + 0XC33C3CC33CC3C33C, + 0X3CC3C33C3CC3C33C, +}, +//2048 +{ + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +} diff --git a/crypt/liboqs/kex_code_mcbits/external/decrypt.c b/crypt/liboqs/kex_code_mcbits/external/decrypt.c new file mode 100644 index 0000000000000000000000000000000000000000..6d18ccc0f987bc554fd424b9befb9b079190c469 --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/decrypt.c @@ -0,0 +1,185 @@ +static void scaling(uint64_t out[][GFBITS], uint64_t inv[][GFBITS], const unsigned char *sk, uint64_t *recv) { + int i, j; + uint64_t sk_int[GFBITS]; + + uint64_t eval[64][GFBITS]; + uint64_t tmp[GFBITS]; + + // computing inverses + + for (i = 0; i < GFBITS; i++) + sk_int[i] = load8(sk + i * 8); + + fft(eval, sk_int); + + for (i = 0; i < 64; i++) + vec_sq(eval[i], eval[i]); + + vec_copy(inv[0], eval[0]); + + for (i = 1; i < 64; i++) + vec_mul(inv[i], inv[i - 1], eval[i]); + + vec_inv(tmp, inv[63]); + + for (i = 62; i >= 0; i--) { + vec_mul(inv[i + 1], tmp, inv[i]); + vec_mul(tmp, tmp, eval[i + 1]); + } + + vec_copy(inv[0], tmp); + + // + + for (i = 0; i < 64; i++) + for (j = 0; j < GFBITS; j++) + out[i][j] = inv[i][j] & recv[i]; +} + +static void scaling_inv(uint64_t out[][GFBITS], uint64_t inv[][GFBITS], uint64_t *recv) { + int i, j; + + for (i = 0; i < 64; i++) + for (j = 0; j < GFBITS; j++) + out[i][j] = inv[i][j] & recv[i]; +} + +static void preprocess(uint64_t *recv, const unsigned char *s) { + int i; + + for (i = 0; i < 64; i++) + recv[i] = 0; + + for (i = 0; i < SYND_BYTES / 8; i++) + recv[i] = load8(s + i * 8); + + for (i = SYND_BYTES % 8 - 1; i >= 0; i--) { + recv[SYND_BYTES / 8] <<= 8; + recv[SYND_BYTES / 8] |= s[SYND_BYTES / 8 * 8 + i]; + } +} + +// + +static void acc(uint64_t *c, uint64_t v) { + int i; + + uint64_t carry = v; + uint64_t t; + + for (i = 0; i < 8; i++) { + t = c[i] ^ carry; + carry = c[i] & carry; + + c[i] = t; + } +} + +static int weight(uint64_t *v) { + int i; + int w; + + union { + uint64_t data_64[8]; + uint8_t data_8[64]; + } counter; + + // + + for (i = 0; i < 8; i++) + counter.data_64[i] = 0; + + for (i = 0; i < 64; i++) + acc(counter.data_64, v[i]); + + transpose_8x64(counter.data_64); + + // + + w = 0; + for (i = 0; i < 64; i++) + w += counter.data_8[i]; + + return w; +} + +// + +static void syndrome_adjust(uint64_t in[][GFBITS]) { + int i; + + for (i = 0; i < GFBITS; i++) { + in[1][i] <<= (128 - SYS_T * 2); + in[1][i] >>= (128 - SYS_T * 2); + } +} + +static int decrypt(unsigned char *e, const unsigned char *sk, const unsigned char *s) { + int i, j; + + uint64_t t; + + uint64_t diff; + + uint64_t inv[64][GFBITS]; + uint64_t scaled[64][GFBITS]; + uint64_t eval[64][GFBITS]; + + uint64_t error[64]; + + uint64_t s_priv[2][GFBITS]; + uint64_t s_priv_cmp[2][GFBITS]; + uint64_t locator[GFBITS]; + + uint64_t recv[64]; + uint64_t cond[COND_BYTES / 8]; + + // + + for (i = 0; i < COND_BYTES / 8; i++) + cond[i] = load8(sk + IRR_BYTES + i * 8); + + preprocess(recv, s); + benes_compact(recv, cond, 1); + scaling(scaled, inv, sk, recv); // scaling + fft_tr(s_priv, scaled); // transposed FFT + syndrome_adjust(s_priv); + bm(locator, s_priv); // Berlekamp Massey + fft(eval, locator); // FFT + + for (i = 0; i < 64; i++) { + error[i] = vec_or(eval[i]); + error[i] = ~error[i]; + } + + { + // reencrypt + + scaling_inv(scaled, inv, error); + fft_tr(s_priv_cmp, scaled); + syndrome_adjust(s_priv_cmp); + + diff = 0; + for (i = 0; i < 2; i++) + for (j = 0; j < GFBITS; j++) + diff |= s_priv[i][j] ^ s_priv_cmp[i][j]; + + diff |= diff >> 32; + diff |= diff >> 16; + diff |= diff >> 8; + t = diff & 0xFF; + } + + benes_compact(error, cond, 0); + + for (i = 0; i < 64; i++) + store8(e + i * 8, error[i]); + + // + + t |= weight(error) ^ SYS_T; + t -= 1; + t >>= 63; + + return (t - 1); +} diff --git a/crypt/liboqs/kex_code_mcbits/external/encrypt.c b/crypt/liboqs/kex_code_mcbits/external/encrypt.c new file mode 100644 index 0000000000000000000000000000000000000000..130d58db503f792ab8eb9a03ef78419a2872d1c2 --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/encrypt.c @@ -0,0 +1,98 @@ +static void gen_e(unsigned char *e, OQS_RAND *r) { + int i, j, eq; + + uint16_t ind[SYS_T]; + uint64_t e_int[64]; + uint64_t one = 1; + uint64_t mask; + uint64_t val[SYS_T]; + + while (1) { + OQS_RAND_n(r, (uint8_t *) ind, sizeof(ind)); + + for (i = 0; i < SYS_T; i++) + ind[i] &= (1 << GFBITS) - 1; + + eq = 0; + for (i = 1; i < SYS_T; i++) + for (j = 0; j < i; j++) + if (ind[i] == ind[j]) + eq = 1; + + if (eq == 0) + break; + } + + for (j = 0; j < SYS_T; j++) + val[j] = one << (ind[j] & 63); + + for (i = 0; i < 64; i++) { + e_int[i] = 0; + + for (j = 0; j < SYS_T; j++) { + mask = i ^ (ind[j] >> 6); + mask -= 1; + mask >>= 63; + mask = -mask; + + e_int[i] |= val[j] & mask; + } + } + + for (i = 0; i < 64; i++) + store8(e + i * 8, e_int[i]); +} + +#define C ((PK_NCOLS + 63) / 64) + +static void syndrome(unsigned char *s, const unsigned char *pk, const unsigned char *e) { + int i, j, t; + + const unsigned char *e_ptr = e + SYND_BYTES; + + uint64_t e_int[C]; + uint64_t row_int[C]; + uint64_t tmp[8]; + + unsigned char b; + + // + + memcpy(s, e, SYND_BYTES); + + e_int[C - 1] = 0; + memcpy(e_int, e_ptr, PK_NCOLS / 8); + + for (i = 0; i < PK_NROWS; i += 8) { + for (t = 0; t < 8; t++) { + row_int[C - 1] = 0; + memcpy(row_int, &pk[(i + t) * (PK_NCOLS / 8)], PK_NCOLS / 8); + + tmp[t] = 0; + for (j = 0; j < C; j++) + tmp[t] ^= e_int[j] & row_int[j]; + } + + b = 0; + + for (t = 7; t >= 0; t--) + tmp[t] ^= (tmp[t] >> 32); + for (t = 7; t >= 0; t--) + tmp[t] ^= (tmp[t] >> 16); + for (t = 7; t >= 0; t--) + tmp[t] ^= (tmp[t] >> 8); + for (t = 7; t >= 0; t--) + tmp[t] ^= (tmp[t] >> 4); + for (t = 7; t >= 0; t--) { + b <<= 1; + b |= (0x6996 >> (tmp[t] & 0xF)) & 1; + } + + s[i / 8] ^= b; + } +} + +static void encrypt(unsigned char *s, unsigned char *e, const unsigned char *pk, OQS_RAND *r) { + gen_e(e, r); + syndrome(s, pk, e); +} diff --git a/crypt/liboqs/kex_code_mcbits/external/fft.c b/crypt/liboqs/kex_code_mcbits/external/fft.c new file mode 100644 index 0000000000000000000000000000000000000000..8e92b3401c93d8e98bfc8d4f7e81d32aa6d92fde --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/fft.c @@ -0,0 +1,79 @@ +static void radix_conversions(uint64_t *in) { + int i, j, k; + + const uint64_t mask[5][2] = { + {0x8888888888888888, 0x4444444444444444}, + {0xC0C0C0C0C0C0C0C0, 0x3030303030303030}, + {0xF000F000F000F000, 0x0F000F000F000F00}, + {0xFF000000FF000000, 0x00FF000000FF0000}, + {0xFFFF000000000000, 0x0000FFFF00000000}}; + + const uint64_t s[5][GFBITS] = { +#include "scalars.data" + }; + + // + + for (j = 0; j <= 4; j++) { + for (i = 0; i < GFBITS; i++) + for (k = 4; k >= j; k--) { + in[i] ^= (in[i] & mask[k][0]) >> (1 << k); + in[i] ^= (in[i] & mask[k][1]) >> (1 << k); + } + + vec_mul(in, in, s[j]); // scaling + } +} + +static void butterflies(uint64_t out[][GFBITS], uint64_t *in) { + int i, j, k, s, b; + + uint64_t tmp[GFBITS]; + uint64_t consts[63][GFBITS] = { +#include "consts.data" + }; + + uint64_t consts_ptr = 0; + + const unsigned char reversal[64] = { + 0, 32, 16, 48, 8, 40, 24, 56, + 4, 36, 20, 52, 12, 44, 28, 60, + 2, 34, 18, 50, 10, 42, 26, 58, + 6, 38, 22, 54, 14, 46, 30, 62, + 1, 33, 17, 49, 9, 41, 25, 57, + 5, 37, 21, 53, 13, 45, 29, 61, + 3, 35, 19, 51, 11, 43, 27, 59, + 7, 39, 23, 55, 15, 47, 31, 63}; + + // boradcast + + for (j = 0; j < 64; j++) + for (i = 0; i < GFBITS; i++) { + out[j][i] = (in[i] >> reversal[j]) & 1; + out[j][i] = -out[j][i]; + } + + // butterflies + + for (i = 0; i <= 5; i++) { + s = 1 << i; + + for (j = 0; j < 64; j += 2 * s) { + for (k = j; k < j + s; k++) { + vec_mul(tmp, out[k + s], consts[consts_ptr + (k - j)]); + + for (b = 0; b < GFBITS; b++) + out[k][b] ^= tmp[b]; + for (b = 0; b < GFBITS; b++) + out[k + s][b] ^= out[k][b]; + } + } + + consts_ptr += (1 << i); + } +} + +static void fft(uint64_t out[][GFBITS], uint64_t *in) { + radix_conversions(in); + butterflies(out, in); +} diff --git a/crypt/liboqs/kex_code_mcbits/external/fft_tr.c b/crypt/liboqs/kex_code_mcbits/external/fft_tr.c new file mode 100644 index 0000000000000000000000000000000000000000..2ed75d26a39b8b4a57725b25bab1b66c43ff2966 --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/fft_tr.c @@ -0,0 +1,249 @@ +#define vec_add(z, x, y) \ + for (b = 0; b < GFBITS; b++) { \ + z[b] = x[b] ^ y[b]; \ + } + +static void radix_conversions_tr(uint64_t in[][GFBITS]) { + int i, j, k; + + const uint64_t mask[6][2] = { + {0x2222222222222222, 0x4444444444444444}, + {0x0C0C0C0C0C0C0C0C, 0x3030303030303030}, + {0x00F000F000F000F0, 0x0F000F000F000F00}, + {0x0000FF000000FF00, 0x00FF000000FF0000}, + {0x00000000FFFF0000, 0x0000FFFF00000000}, + {0xFFFFFFFF00000000, 0x00000000FFFFFFFF}}; + + const uint64_t s[5][2][GFBITS] = { +#include "scalars_2x.data" + }; + + // + + for (j = 5; j >= 0; j--) { + if (j < 5) { + vec_mul(in[0], in[0], s[j][0]); // scaling + vec_mul(in[1], in[1], s[j][1]); // scaling + } + + for (i = 0; i < GFBITS; i++) + for (k = j; k <= 4; k++) { + in[0][i] ^= (in[0][i] & mask[k][0]) << (1 << k); + in[0][i] ^= (in[0][i] & mask[k][1]) << (1 << k); + + in[1][i] ^= (in[1][i] & mask[k][0]) << (1 << k); + in[1][i] ^= (in[1][i] & mask[k][1]) << (1 << k); + } + + for (i = 0; i < GFBITS; i++) { + in[1][i] ^= (in[0][i] & mask[5][0]) >> 32; + in[1][i] ^= (in[1][i] & mask[5][1]) << 32; + } + } +} + +static void butterflies_tr(uint64_t out[][GFBITS], uint64_t in[][GFBITS]) { + int i, j, k, s, b; + + uint64_t tmp[GFBITS]; + uint64_t pre[6][GFBITS]; + uint64_t buf[64]; + + const uint64_t consts[63][GFBITS] = { +#include "consts.data" + }; + + uint64_t consts_ptr = 63; + + const unsigned char reversal[64] = { + 0, 32, 16, 48, 8, 40, 24, 56, + 4, 36, 20, 52, 12, 44, 28, 60, + 2, 34, 18, 50, 10, 42, 26, 58, + 6, 38, 22, 54, 14, 46, 30, 62, + 1, 33, 17, 49, 9, 41, 25, 57, + 5, 37, 21, 53, 13, 45, 29, 61, + 3, 35, 19, 51, 11, 43, 27, 59, + 7, 39, 23, 55, 15, 47, 31, 63}; + + const uint16_t beta[6] = {8, 1300, 3408, 1354, 2341, 1154}; + + // butterflies + + for (i = 5; i >= 0; i--) { + s = 1 << i; + consts_ptr -= s; + + for (j = 0; j < 64; j += 2 * s) + for (k = j; k < j + s; k++) { + vec_add(in[k], in[k], in[k + s]); + vec_mul(tmp, in[k], consts[consts_ptr + (k - j)]); + vec_add(in[k + s], in[k + s], tmp); + } + } + + // transpose + + for (i = 0; i < GFBITS; i++) { + for (j = 0; j < 64; j++) + buf[reversal[j]] = in[j][i]; + + transpose_64x64_compact(buf, buf); + + for (j = 0; j < 64; j++) + in[j][i] = buf[j]; + } + + // boradcast + + vec_copy(pre[0], in[32]); + vec_add(in[33], in[33], in[32]); + vec_copy(pre[1], in[33]); + vec_add(in[35], in[35], in[33]); + vec_add(pre[0], pre[0], in[35]); + vec_add(in[34], in[34], in[35]); + vec_copy(pre[2], in[34]); + vec_add(in[38], in[38], in[34]); + vec_add(pre[0], pre[0], in[38]); + vec_add(in[39], in[39], in[38]); + vec_add(pre[1], pre[1], in[39]); + vec_add(in[37], in[37], in[39]); + vec_add(pre[0], pre[0], in[37]); + vec_add(in[36], in[36], in[37]); + vec_copy(pre[3], in[36]); + vec_add(in[44], in[44], in[36]); + vec_add(pre[0], pre[0], in[44]); + vec_add(in[45], in[45], in[44]); + vec_add(pre[1], pre[1], in[45]); + vec_add(in[47], in[47], in[45]); + vec_add(pre[0], pre[0], in[47]); + vec_add(in[46], in[46], in[47]); + vec_add(pre[2], pre[2], in[46]); + vec_add(in[42], in[42], in[46]); + vec_add(pre[0], pre[0], in[42]); + vec_add(in[43], in[43], in[42]); + vec_add(pre[1], pre[1], in[43]); + vec_add(in[41], in[41], in[43]); + vec_add(pre[0], pre[0], in[41]); + vec_add(in[40], in[40], in[41]); + vec_copy(pre[4], in[40]); + vec_add(in[56], in[56], in[40]); + vec_add(pre[0], pre[0], in[56]); + vec_add(in[57], in[57], in[56]); + vec_add(pre[1], pre[1], in[57]); + vec_add(in[59], in[59], in[57]); + vec_add(pre[0], pre[0], in[59]); + vec_add(in[58], in[58], in[59]); + vec_add(pre[2], pre[2], in[58]); + vec_add(in[62], in[62], in[58]); + vec_add(pre[0], pre[0], in[62]); + vec_add(in[63], in[63], in[62]); + vec_add(pre[1], pre[1], in[63]); + vec_add(in[61], in[61], in[63]); + vec_add(pre[0], pre[0], in[61]); + vec_add(in[60], in[60], in[61]); + vec_add(pre[3], pre[3], in[60]); + vec_add(in[52], in[52], in[60]); + vec_add(pre[0], pre[0], in[52]); + vec_add(in[53], in[53], in[52]); + vec_add(pre[1], pre[1], in[53]); + vec_add(in[55], in[55], in[53]); + vec_add(pre[0], pre[0], in[55]); + vec_add(in[54], in[54], in[55]); + vec_add(pre[2], pre[2], in[54]); + vec_add(in[50], in[50], in[54]); + vec_add(pre[0], pre[0], in[50]); + vec_add(in[51], in[51], in[50]); + vec_add(pre[1], pre[1], in[51]); + vec_add(in[49], in[49], in[51]); + vec_add(pre[0], pre[0], in[49]); + vec_add(in[48], in[48], in[49]); + vec_copy(pre[5], in[48]); + vec_add(in[16], in[16], in[48]); + vec_add(pre[0], pre[0], in[16]); + vec_add(in[17], in[17], in[16]); + vec_add(pre[1], pre[1], in[17]); + vec_add(in[19], in[19], in[17]); + vec_add(pre[0], pre[0], in[19]); + vec_add(in[18], in[18], in[19]); + vec_add(pre[2], pre[2], in[18]); + vec_add(in[22], in[22], in[18]); + vec_add(pre[0], pre[0], in[22]); + vec_add(in[23], in[23], in[22]); + vec_add(pre[1], pre[1], in[23]); + vec_add(in[21], in[21], in[23]); + vec_add(pre[0], pre[0], in[21]); + vec_add(in[20], in[20], in[21]); + vec_add(pre[3], pre[3], in[20]); + vec_add(in[28], in[28], in[20]); + vec_add(pre[0], pre[0], in[28]); + vec_add(in[29], in[29], in[28]); + vec_add(pre[1], pre[1], in[29]); + vec_add(in[31], in[31], in[29]); + vec_add(pre[0], pre[0], in[31]); + vec_add(in[30], in[30], in[31]); + vec_add(pre[2], pre[2], in[30]); + vec_add(in[26], in[26], in[30]); + vec_add(pre[0], pre[0], in[26]); + vec_add(in[27], in[27], in[26]); + vec_add(pre[1], pre[1], in[27]); + vec_add(in[25], in[25], in[27]); + vec_add(pre[0], pre[0], in[25]); + vec_add(in[24], in[24], in[25]); + vec_add(pre[4], pre[4], in[24]); + vec_add(in[8], in[8], in[24]); + vec_add(pre[0], pre[0], in[8]); + vec_add(in[9], in[9], in[8]); + vec_add(pre[1], pre[1], in[9]); + vec_add(in[11], in[11], in[9]); + vec_add(pre[0], pre[0], in[11]); + vec_add(in[10], in[10], in[11]); + vec_add(pre[2], pre[2], in[10]); + vec_add(in[14], in[14], in[10]); + vec_add(pre[0], pre[0], in[14]); + vec_add(in[15], in[15], in[14]); + vec_add(pre[1], pre[1], in[15]); + vec_add(in[13], in[13], in[15]); + vec_add(pre[0], pre[0], in[13]); + vec_add(in[12], in[12], in[13]); + vec_add(pre[3], pre[3], in[12]); + vec_add(in[4], in[4], in[12]); + vec_add(pre[0], pre[0], in[4]); + vec_add(in[5], in[5], in[4]); + vec_add(pre[1], pre[1], in[5]); + vec_add(in[7], in[7], in[5]); + vec_add(pre[0], pre[0], in[7]); + vec_add(in[6], in[6], in[7]); + vec_add(pre[2], pre[2], in[6]); + vec_add(in[2], in[2], in[6]); + vec_add(pre[0], pre[0], in[2]); + vec_add(in[3], in[3], in[2]); + vec_add(pre[1], pre[1], in[3]); + vec_add(in[1], in[1], in[3]); + + vec_add(pre[0], pre[0], in[1]); + vec_add(out[0], in[0], in[1]); + + // + + for (j = 0; j < GFBITS; j++) { + tmp[j] = (beta[0] >> j) & 1; + tmp[j] = -tmp[j]; + } + + vec_mul(out[1], pre[0], tmp); + + for (i = 1; i < 6; i++) { + for (j = 0; j < GFBITS; j++) { + tmp[j] = (beta[i] >> j) & 1; + tmp[j] = -tmp[j]; + } + + vec_mul(tmp, pre[i], tmp); + vec_add(out[1], out[1], tmp); + } +} + +static void fft_tr(uint64_t out[][GFBITS], uint64_t in[][GFBITS]) { + butterflies_tr(out, in); + radix_conversions_tr(out); +} diff --git a/crypt/liboqs/kex_code_mcbits/external/gf.c b/crypt/liboqs/kex_code_mcbits/external/gf.c new file mode 100644 index 0000000000000000000000000000000000000000..5c15192242a5eadd7273174267844c21abc3b034 --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/gf.c @@ -0,0 +1,113 @@ +typedef uint16_t gf; + +static gf gf_mul(gf in0, gf in1) { + int i; + + uint32_t tmp; + uint32_t t0; + uint32_t t1; + uint32_t t; + + t0 = in0; + t1 = in1; + + tmp = t0 * (t1 & 1); + + for (i = 1; i < GFBITS; i++) + tmp ^= (t0 * (t1 & (1 << i))); + + t = tmp & 0x7FC000; + tmp ^= t >> 9; + tmp ^= t >> 12; + + t = tmp & 0x3000; + tmp ^= t >> 9; + tmp ^= t >> 12; + + return tmp & ((1 << GFBITS) - 1); +} + +static gf gf_sq(gf in) { + const uint32_t B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF}; + + uint32_t x = in; + uint32_t t; + + x = (x | (x << 8)) & B[3]; + x = (x | (x << 4)) & B[2]; + x = (x | (x << 2)) & B[1]; + x = (x | (x << 1)) & B[0]; + + t = x & 0x7FC000; + x ^= t >> 9; + x ^= t >> 12; + + t = x & 0x3000; + x ^= t >> 9; + x ^= t >> 12; + + return x & ((1 << GFBITS) - 1); +} + +static gf gf_inv(gf in) { + gf tmp_11; + gf tmp_1111; + + gf out = in; + + out = gf_sq(out); + tmp_11 = gf_mul(out, in); // 11 + + out = gf_sq(tmp_11); + out = gf_sq(out); + tmp_1111 = gf_mul(out, tmp_11); // 1111 + + out = gf_sq(tmp_1111); + out = gf_sq(out); + out = gf_sq(out); + out = gf_sq(out); + out = gf_mul(out, tmp_1111); // 11111111 + + out = gf_sq(out); + out = gf_sq(out); + out = gf_mul(out, tmp_11); // 1111111111 + + out = gf_sq(out); + out = gf_mul(out, in); // 11111111111 + + return gf_sq(out); // 111111111110 +} + +static gf gf_diff(gf a, gf b) { + uint32_t t = (uint32_t)(a ^ b); + + t = ((t - 1) >> 20) ^ 0xFFF; + + return (gf) t; +} + +/////////////////////////////////////////////////////////// + +static void GF_mul(gf *out, gf *in0, gf *in1) { + int i, j; + + gf tmp[123]; + + for (i = 0; i < 123; i++) + tmp[i] = 0; + + for (i = 0; i < 62; i++) + for (j = 0; j < 62; j++) + tmp[i + j] ^= gf_mul(in0[i], in1[j]); + + // + + for (i = 122; i >= 62; i--) { + tmp[i - 55] ^= gf_mul(tmp[i], (gf) 1763); + tmp[i - 61] ^= gf_mul(tmp[i], (gf) 1722); + tmp[i - 62] ^= gf_mul(tmp[i], (gf) 4033); + } + + for (i = 0; i < 62; i++) + out[i] = tmp[i]; +} diff --git a/crypt/liboqs/kex_code_mcbits/external/implementors b/crypt/liboqs/kex_code_mcbits/external/implementors new file mode 100755 index 0000000000000000000000000000000000000000..757e33743d8a2b2c4d53f7cc88e4ed096439a205 --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/implementors @@ -0,0 +1 @@ +Tung Chou diff --git a/crypt/liboqs/kex_code_mcbits/external/operations.c b/crypt/liboqs/kex_code_mcbits/external/operations.c new file mode 100644 index 0000000000000000000000000000000000000000..4946e80437eceb98401df844f97138a45920d775 --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/operations.c @@ -0,0 +1,114 @@ +#ifdef ENABLE_CODE_MCBITS // don't want this file in Visual Studio if libsodium is not present +#include <assert.h> +#include <stdint.h> +#include <string.h> + +#include "params.h" +#include <oqs/rand.h> +#include <oqs/sha3.h> +#include <sodium/crypto_onetimeauth_poly1305.h> +#include <sodium/crypto_stream_salsa20.h> +#include <sodium/randombytes.h> + +// clang-format off +// (order of include matters) +#include "util.c" +#include "transpose.c" +#include "benes.c" +#include "gf.c" +#include "vec.c" +#include "bm.c" +#include "fft.c" +#include "fft_tr.c" +#include "sk_gen.c" +#include "pk_gen.c" + +#include "encrypt.c" +#include "decrypt.c" +// clang-format on + +int oqs_kex_mcbits_encrypt( + unsigned char *c, size_t *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *pk, + OQS_RAND *r) { + unsigned char e[1 << (GFBITS - 3)]; + unsigned char key[64]; + unsigned char nonce[8] = {0}; + +// + +#define ct (c + SYND_BYTES) +#define tag (ct + mlen) + + encrypt(c, e, pk, r); + + //crypto_hash_keccakc1024(key, e, sizeof(e)); TODO is this ok to replace with the below? + OQS_SHA3_sha3512(key, e, sizeof(e)); + + crypto_stream_salsa20_xor(ct, m, mlen, nonce, key); + crypto_onetimeauth_poly1305(tag, ct, mlen, key + 32); + + *clen = SYND_BYTES + mlen + 16; + +#undef ct +#undef tag + + return 0; +} + +int oqs_kex_mcbits_decrypt( + unsigned char *m, size_t *mlen, + const unsigned char *c, unsigned long long clen, + const unsigned char *sk) { + int ret; + int ret_verify; + int ret_decrypt; + + unsigned char key[64]; + unsigned char nonce[8] = {0}; + unsigned char e[1 << (GFBITS - 3)]; + + // + + if (clen < SYND_BYTES + 16) + return -1; + else + *mlen = clen - SYND_BYTES - 16; + +#define ct (c + SYND_BYTES) +#define tag (ct + *mlen) + + ret_decrypt = decrypt(e, sk, c); + + //crypto_hash_keccakc1024(key, e, sizeof(e)); TODO is this ok to replace with the below? + OQS_SHA3_sha3512(key, e, sizeof(e)); + + ret_verify = crypto_onetimeauth_poly1305_verify(tag, ct, *mlen, key + 32); + crypto_stream_salsa20_xor(m, ct, *mlen, nonce, key); + + ret = ret_verify | ret_decrypt; + +#undef ct +#undef tag + + return ret; +} + +int oqs_kex_mcbits_gen_keypair( + unsigned char *pk, + unsigned char *sk, + OQS_RAND *r + + ) { + while (1) { + sk_gen(sk, r); + + if (pk_gen(pk, sk) == 0) + break; + } + + return 0; +} + +#endif diff --git a/crypt/liboqs/kex_code_mcbits/external/params.h b/crypt/liboqs/kex_code_mcbits/external/params.h new file mode 100644 index 0000000000000000000000000000000000000000..b4295d4a43c3356acaf4347f33854324201e504c --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/params.h @@ -0,0 +1,9 @@ +#define GFBITS 12 +#define SYS_T 62 + +#define PK_NROWS (SYS_T * GFBITS) +#define PK_NCOLS ((1 << GFBITS) - SYS_T * GFBITS) + +#define IRR_BYTES (GFBITS * 8) +#define COND_BYTES (736 * 8) +#define SYND_BYTES (PK_NROWS / 8) diff --git a/crypt/liboqs/kex_code_mcbits/external/pk_gen.c b/crypt/liboqs/kex_code_mcbits/external/pk_gen.c new file mode 100644 index 0000000000000000000000000000000000000000..c4b550a4ccd5c280f968910421de296f6ba11c10 --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/pk_gen.c @@ -0,0 +1,121 @@ +static int pk_gen(unsigned char *pk, const unsigned char *sk) { + unsigned char *pk_ptr = pk; + + int i, j, k; + int row, c, tail; + + uint64_t mat[GFBITS * SYS_T][64]; + uint64_t mask; + uint64_t u; + + uint64_t points[64][GFBITS] = { +#include "points.data" + }; + + uint64_t sk_int[GFBITS]; + + uint64_t eval[64][GFBITS]; + uint64_t inv[64][GFBITS]; + uint64_t tmp[GFBITS]; + + uint64_t cond[COND_BYTES / 8]; + + // compute the inverses + + for (i = 0; i < GFBITS; i++) + sk_int[i] = load8(sk + i * 8); + + fft(eval, sk_int); + + vec_copy(inv[0], eval[0]); + + for (i = 1; i < 64; i++) + vec_mul(inv[i], inv[i - 1], eval[i]); + + vec_inv(tmp, inv[63]); + + for (i = 62; i >= 0; i--) { + vec_mul(inv[i + 1], tmp, inv[i]); + vec_mul(tmp, tmp, eval[i + 1]); + } + + vec_copy(inv[0], tmp); + + // fill matrix + + for (j = 0; j < 64; j++) + for (k = 0; k < GFBITS; k++) + mat[k][j] = inv[j][k]; + + for (i = 1; i < SYS_T; i++) + for (j = 0; j < 64; j++) { + vec_mul(inv[j], inv[j], points[j]); + + for (k = 0; k < GFBITS; k++) + mat[i * GFBITS + k][j] = inv[j][k]; + } + + // permute + + for (i = 0; i < COND_BYTES / 8; i++) + cond[i] = load8(sk + IRR_BYTES + i * 8); + + for (i = 0; i < GFBITS * SYS_T; i++) + benes_compact(mat[i], cond, 0); + + // gaussian elimination + + for (i = 0; i < (GFBITS * SYS_T + 63) / 64; i++) + for (j = 0; j < 64; j++) { + row = i * 64 + j; + + if (row >= GFBITS * SYS_T) + break; + + for (k = row + 1; k < GFBITS * SYS_T; k++) { + mask = mat[row][i] ^ mat[k][i]; + mask >>= j; + mask &= 1; + mask = -mask; + + for (c = 0; c < 64; c++) + mat[row][c] ^= mat[k][c] & mask; + } + + if (((mat[row][i] >> j) & 1) == 0) { // return if not invertible + return -1; + } + + for (k = 0; k < GFBITS * SYS_T; k++) { + if (k != row) { + mask = mat[k][i] >> j; + mask &= 1; + mask = -mask; + + for (c = 0; c < 64; c++) + mat[k][c] ^= mat[row][c] & mask; + } + } + } + + // store pk + + tail = ((GFBITS * SYS_T) & 63) >> 3; + + for (i = 0; i < GFBITS * SYS_T; i++) { + u = mat[i][(GFBITS * SYS_T + 63) / 64 - 1]; + + for (k = tail; k < 8; k++) + pk_ptr[k - tail] = (u >> (8 * k)) & 0xFF; + + pk_ptr += 8 - tail; + + for (j = (GFBITS * SYS_T + 63) / 64; j < 64; j++) { + store8(pk_ptr, mat[i][j]); + + pk_ptr += 8; + } + } + + return 0; +} diff --git a/crypt/liboqs/kex_code_mcbits/external/points.data b/crypt/liboqs/kex_code_mcbits/external/points.data new file mode 100755 index 0000000000000000000000000000000000000000..7ee9f689461dad87b9c9bae9e771ad806eebb569 --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/points.data @@ -0,0 +1,896 @@ +{ + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +}, +{ + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFF0000FFFF0000, + 0XFF00FF00FF00FF00, + 0XF0F0F0F0F0F0F0F0, + 0XCCCCCCCCCCCCCCCC, + 0XAAAAAAAAAAAAAAAA, +} diff --git a/crypt/liboqs/kex_code_mcbits/external/scalars.data b/crypt/liboqs/kex_code_mcbits/external/scalars.data new file mode 100755 index 0000000000000000000000000000000000000000..aa8f64b95195a7b914e4281fadd55708db751a1f --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/scalars.data @@ -0,0 +1,70 @@ +{ + 0XF3CFC030FC30F003, + 0X3FCF0F003C00C00C, + 0X30033CC300C0C03C, + 0XCCFF0F3C0F30F0C0, + 0X0300C03FF303C3F0, + 0X3FFF3C0FF0CCCCC0, + 0XF3FFF0C00F3C3CC0, + 0X3003333FFFC3C000, + 0X0FF30FFFC3FFF300, + 0XFFC0F300F0F0CC00, + 0XC0CFF3FCCC3CFC00, + 0XFC3C03F0F330C000, +}, +{ + 0X000F00000000F00F, + 0X00000F00F00000F0, + 0X0F00000F00000F00, + 0XF00F00F00F000000, + 0X00F00000000000F0, + 0X0000000F00000000, + 0XF00000000F00F000, + 0X00F00F00000F0000, + 0X0000F00000F00F00, + 0X000F00F00F00F000, + 0X00F00F0000000000, + 0X0000000000F00000, +}, +{ + 0X0000FF00FF0000FF, + 0X0000FF000000FF00, + 0XFF0000FF00FF0000, + 0XFFFF0000FF000000, + 0X00FF00FF00FF0000, + 0X0000FFFFFF000000, + 0X00FFFF00FF000000, + 0XFFFFFF0000FF0000, + 0XFFFF00FFFF00FF00, + 0X0000FF0000000000, + 0XFFFFFF00FF000000, + 0X00FF000000000000, +}, +{ + 0X000000000000FFFF, + 0X00000000FFFF0000, + 0X0000000000000000, + 0XFFFF000000000000, + 0X00000000FFFF0000, + 0X0000FFFF00000000, + 0X0000000000000000, + 0X00000000FFFF0000, + 0X0000FFFF00000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, +}, +{ + 0X00000000FFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFFFFFF00000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, +} diff --git a/crypt/liboqs/kex_code_mcbits/external/scalars_2x.data b/crypt/liboqs/kex_code_mcbits/external/scalars_2x.data new file mode 100755 index 0000000000000000000000000000000000000000..e7c7fee5586e126a5ba27469fa3c7cc2ed0a1e47 --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/scalars_2x.data @@ -0,0 +1,140 @@ +{{ + 0XF3CFC030FC30F003, + 0X3FCF0F003C00C00C, + 0X30033CC300C0C03C, + 0XCCFF0F3C0F30F0C0, + 0X0300C03FF303C3F0, + 0X3FFF3C0FF0CCCCC0, + 0XF3FFF0C00F3C3CC0, + 0X3003333FFFC3C000, + 0X0FF30FFFC3FFF300, + 0XFFC0F300F0F0CC00, + 0XC0CFF3FCCC3CFC00, + 0XFC3C03F0F330C000, +}, +{ + 0X000C03C0C3C0330C, + 0XF330CFFCC00F33C0, + 0XCCF330F00F3C0333, + 0XFF03FFF3FF0CF0C0, + 0X3CC3FCF00FCC303C, + 0X0F000C0FC30303F3, + 0XCF0FC3FF333CCF3C, + 0X003F3FC3C0FF333F, + 0X3CC3F0F3CF0FF00F, + 0XF3F33CC03FC30CC0, + 0X3CC330CFC333F33F, + 0X3CC0303FF3C3FFFC, +}}, +{{ + 0X000F00000000F00F, + 0X00000F00F00000F0, + 0X0F00000F00000F00, + 0XF00F00F00F000000, + 0X00F00000000000F0, + 0X0000000F00000000, + 0XF00000000F00F000, + 0X00F00F00000F0000, + 0X0000F00000F00F00, + 0X000F00F00F00F000, + 0X00F00F0000000000, + 0X0000000000F00000, +}, +{ + 0X0F00F00F00000000, + 0XF00000000000F000, + 0X00000F00000000F0, + 0X0F00F00000F00000, + 0X000F00000F00F00F, + 0X00F00F00F00F0000, + 0X0F00F00000000000, + 0X000000000F000000, + 0X00F00000000F00F0, + 0X0000F00F00000F00, + 0XF00000F00000F00F, + 0X00000F00F00F00F0, +}}, +{{ + 0X0000FF00FF0000FF, + 0X0000FF000000FF00, + 0XFF0000FF00FF0000, + 0XFFFF0000FF000000, + 0X00FF00FF00FF0000, + 0X0000FFFFFF000000, + 0X00FFFF00FF000000, + 0XFFFFFF0000FF0000, + 0XFFFF00FFFF00FF00, + 0X0000FF0000000000, + 0XFFFFFF00FF000000, + 0X00FF000000000000, +}, +{ + 0XFF00FFFFFF000000, + 0XFF0000FFFF000000, + 0XFFFF00FFFF000000, + 0XFF00FFFFFFFFFF00, + 0X00000000FF00FF00, + 0XFFFFFFFF00FF0000, + 0X00FFFFFF00FF0000, + 0XFFFF00FFFF00FFFF, + 0XFFFF0000FFFFFFFF, + 0XFF00000000FF0000, + 0X000000FF00FF00FF, + 0X00FF00FF00FFFF00, +}}, +{{ + 0X000000000000FFFF, + 0X00000000FFFF0000, + 0X0000000000000000, + 0XFFFF000000000000, + 0X00000000FFFF0000, + 0X0000FFFF00000000, + 0X0000000000000000, + 0X00000000FFFF0000, + 0X0000FFFF00000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, +}, +{ + 0X0000000000000000, + 0XFFFF000000000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFF00000000FFFF, + 0X0000000000000000, + 0X0000FFFF00000000, + 0XFFFF00000000FFFF, + 0X00000000FFFF0000, + 0X0000000000000000, + 0XFFFF00000000FFFF, + 0X00000000FFFF0000, +}}, +{{ + 0X00000000FFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFFFFFF00000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0X0000000000000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0X0000000000000000, + 0X0000000000000000, + 0X0000000000000000, +}, +{ + 0X0000000000000000, + 0X0000000000000000, + 0X00000000FFFFFFFF, + 0XFFFFFFFF00000000, + 0XFFFFFFFF00000000, + 0X0000000000000000, + 0XFFFFFFFF00000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, + 0X0000000000000000, + 0XFFFFFFFFFFFFFFFF, + 0XFFFFFFFF00000000, +}} diff --git a/crypt/liboqs/kex_code_mcbits/external/sk_gen.c b/crypt/liboqs/kex_code_mcbits/external/sk_gen.c new file mode 100644 index 0000000000000000000000000000000000000000..860c70469a04c58a7885279cea093d5fa9d63b9f --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/sk_gen.c @@ -0,0 +1,98 @@ +static int irr_gen(gf *out, gf *f) { + int i, j, k, c; + + gf mat[SYS_T + 1][SYS_T]; + gf mask, inv, t; + + // fill matrix + + mat[0][0] = 1; + for (i = 1; i < SYS_T; i++) + mat[0][i] = 0; + + for (i = 0; i < SYS_T; i++) + mat[1][i] = f[i]; + + for (j = 2; j <= SYS_T; j++) + GF_mul(mat[j], mat[j - 1], f); + + // gaussian + + for (j = 0; j < SYS_T; j++) { + for (k = j + 1; k < SYS_T; k++) { + mask = gf_diff(mat[j][j], mat[j][k]); + + for (c = 0; c < SYS_T + 1; c++) + mat[c][j] ^= mat[c][k] & mask; + } + + if (mat[j][j] == 0) { // return if not invertible + return -1; + } + + // compute inverse + + inv = gf_inv(mat[j][j]); + + for (c = 0; c < SYS_T + 1; c++) + mat[c][j] = gf_mul(mat[c][j], inv); + + // + + for (k = 0; k < SYS_T; k++) { + t = mat[j][k]; + + if (k != j) { + for (c = 0; c < SYS_T + 1; c++) + mat[c][k] ^= gf_mul(mat[c][j], t); + } + } + } + + // + + for (i = 0; i < SYS_T; i++) + out[i] = mat[SYS_T][i]; + + out[SYS_T] = 1; + + return 0; +} + +static void sk_gen(unsigned char *sk, OQS_RAND *r) { + uint64_t cond[COND_BYTES / 8]; + uint64_t sk_int[GFBITS]; + + int i, j; + + gf irr[SYS_T + 1]; + gf f[SYS_T]; + + while (1) { + OQS_RAND_n(r, (uint8_t *) f, sizeof(f)); + + for (i = 0; i < SYS_T; i++) + f[i] &= (1 << GFBITS) - 1; + + if (irr_gen(irr, f) == 0) + break; + } + + for (i = 0; i < GFBITS; i++) { + sk_int[i] = 0; + + for (j = SYS_T; j >= 0; j--) { + sk_int[i] <<= 1; + sk_int[i] |= (irr[j] >> i) & 1; + } + + store8(sk + i * 8, sk_int[i]); + } + + // + + OQS_RAND_n(r, (uint8_t *) cond, sizeof(cond)); + + for (i = 0; i < COND_BYTES / 8; i++) + store8(sk + IRR_BYTES + i * 8, cond[i]); +} diff --git a/crypt/liboqs/kex_code_mcbits/external/transpose.c b/crypt/liboqs/kex_code_mcbits/external/transpose.c new file mode 100644 index 0000000000000000000000000000000000000000..c56f3b0e1285c094c1ff5da874a456ee368ee07f --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/transpose.c @@ -0,0 +1,122 @@ +static void transpose_64x64_compact(uint64_t *out, uint64_t *in) { + int i, j, s, p, idx0, idx1; + uint64_t x, y; + + const uint64_t mask[6][2] = { + {0X5555555555555555, 0XAAAAAAAAAAAAAAAA}, + {0X3333333333333333, 0XCCCCCCCCCCCCCCCC}, + {0X0F0F0F0F0F0F0F0F, 0XF0F0F0F0F0F0F0F0}, + {0X00FF00FF00FF00FF, 0XFF00FF00FF00FF00}, + {0X0000FFFF0000FFFF, 0XFFFF0000FFFF0000}, + {0X00000000FFFFFFFF, 0XFFFFFFFF00000000}}; + + // + + for (i = 0; i < 64; i++) + out[i] = in[i]; + + for (j = 5; j >= 0; j--) { + s = 1 << j; + + for (p = 0; p < 32 / s; p++) { + for (i = 0; i < s; i++) { + idx0 = p * 2 * s + i; + idx1 = p * 2 * s + i + s; + + x = (out[idx0] & mask[j][0]) | ((out[idx1] & mask[j][0]) << s); + y = ((out[idx0] & mask[j][1]) >> s) | (out[idx1] & mask[j][1]); + + out[idx0] = x; + out[idx1] = y; + } + } + } +} + +static void transpose_8x64(uint64_t *in) { + const uint64_t mask[3][2] = { + {0X5555555555555555, 0XAAAAAAAAAAAAAAAA}, + {0X3333333333333333, 0XCCCCCCCCCCCCCCCC}, + {0X0F0F0F0F0F0F0F0F, 0XF0F0F0F0F0F0F0F0}, + }; + + uint64_t x, y; + + // + + x = (in[0] & mask[2][0]) | ((in[4] & mask[2][0]) << 4); + y = ((in[0] & mask[2][1]) >> 4) | (in[4] & mask[2][1]); + + in[0] = x; + in[4] = y; + + x = (in[1] & mask[2][0]) | ((in[5] & mask[2][0]) << 4); + y = ((in[1] & mask[2][1]) >> 4) | (in[5] & mask[2][1]); + + in[1] = x; + in[5] = y; + + x = (in[2] & mask[2][0]) | ((in[6] & mask[2][0]) << 4); + y = ((in[2] & mask[2][1]) >> 4) | (in[6] & mask[2][1]); + + in[2] = x; + in[6] = y; + + x = (in[3] & mask[2][0]) | ((in[7] & mask[2][0]) << 4); + y = ((in[3] & mask[2][1]) >> 4) | (in[7] & mask[2][1]); + + in[3] = x; + in[7] = y; + + // + + x = (in[0] & mask[1][0]) | ((in[2] & mask[1][0]) << 2); + y = ((in[0] & mask[1][1]) >> 2) | (in[2] & mask[1][1]); + + in[0] = x; + in[2] = y; + + x = (in[1] & mask[1][0]) | ((in[3] & mask[1][0]) << 2); + y = ((in[1] & mask[1][1]) >> 2) | (in[3] & mask[1][1]); + + in[1] = x; + in[3] = y; + + x = (in[4] & mask[1][0]) | ((in[6] & mask[1][0]) << 2); + y = ((in[4] & mask[1][1]) >> 2) | (in[6] & mask[1][1]); + + in[4] = x; + in[6] = y; + + x = (in[5] & mask[1][0]) | ((in[7] & mask[1][0]) << 2); + y = ((in[5] & mask[1][1]) >> 2) | (in[7] & mask[1][1]); + + in[5] = x; + in[7] = y; + + // + + x = (in[0] & mask[0][0]) | ((in[1] & mask[0][0]) << 1); + y = ((in[0] & mask[0][1]) >> 1) | (in[1] & mask[0][1]); + + in[0] = x; + in[1] = y; + + x = (in[2] & mask[0][0]) | ((in[3] & mask[0][0]) << 1); + y = ((in[2] & mask[0][1]) >> 1) | (in[3] & mask[0][1]); + + in[2] = x; + in[3] = y; + + x = (in[4] & mask[0][0]) | ((in[5] & mask[0][0]) << 1); + y = ((in[4] & mask[0][1]) >> 1) | (in[5] & mask[0][1]); + + in[4] = x; + in[5] = y; + + x = (in[6] & mask[0][0]) | ((in[7] & mask[0][0]) << 1); + y = ((in[6] & mask[0][1]) >> 1) | (in[7] & mask[0][1]); + + in[6] = x; + in[7] = y; +} diff --git a/crypt/liboqs/kex_code_mcbits/external/util.c b/crypt/liboqs/kex_code_mcbits/external/util.c new file mode 100644 index 0000000000000000000000000000000000000000..dc557755a8c905e481a10f7376d75fdef9c54308 --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/util.c @@ -0,0 +1,22 @@ +static void store8(unsigned char *out, uint64_t in) { + out[0] = (in >> 0x00) & 0xFF; + out[1] = (in >> 0x08) & 0xFF; + out[2] = (in >> 0x10) & 0xFF; + out[3] = (in >> 0x18) & 0xFF; + out[4] = (in >> 0x20) & 0xFF; + out[5] = (in >> 0x28) & 0xFF; + out[6] = (in >> 0x30) & 0xFF; + out[7] = (in >> 0x38) & 0xFF; +} + +static uint64_t load8(const unsigned char *in) { + int i; + uint64_t ret = in[7]; + + for (i = 6; i >= 0; i--) { + ret <<= 8; + ret |= in[i]; + } + + return ret; +} diff --git a/crypt/liboqs/kex_code_mcbits/external/vec.c b/crypt/liboqs/kex_code_mcbits/external/vec.c new file mode 100644 index 0000000000000000000000000000000000000000..2812c17308440523d705e879a8ffc30334d14c8a --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/external/vec.c @@ -0,0 +1,322 @@ +static void vec_mul(uint64_t *h, uint64_t *f, const uint64_t *g) { + int i; + uint64_t result[2 * GFBITS - 1]; + + // + + uint64_t t1 = f[11] & g[11]; + uint64_t t2 = f[11] & g[9]; + uint64_t t3 = f[11] & g[10]; + uint64_t t4 = f[9] & g[11]; + uint64_t t5 = f[10] & g[11]; + uint64_t t6 = f[10] & g[10]; + uint64_t t7 = f[10] & g[9]; + uint64_t t8 = f[9] & g[10]; + uint64_t t9 = f[9] & g[9]; + uint64_t t10 = t8 ^ t7; + uint64_t t11 = t6 ^ t4; + uint64_t t12 = t11 ^ t2; + uint64_t t13 = t5 ^ t3; + uint64_t t14 = f[8] & g[8]; + uint64_t t15 = f[8] & g[6]; + uint64_t t16 = f[8] & g[7]; + uint64_t t17 = f[6] & g[8]; + uint64_t t18 = f[7] & g[8]; + uint64_t t19 = f[7] & g[7]; + uint64_t t20 = f[7] & g[6]; + uint64_t t21 = f[6] & g[7]; + uint64_t t22 = f[6] & g[6]; + uint64_t t23 = t21 ^ t20; + uint64_t t24 = t19 ^ t17; + uint64_t t25 = t24 ^ t15; + uint64_t t26 = t18 ^ t16; + uint64_t t27 = f[5] & g[5]; + uint64_t t28 = f[5] & g[3]; + uint64_t t29 = f[5] & g[4]; + uint64_t t30 = f[3] & g[5]; + uint64_t t31 = f[4] & g[5]; + uint64_t t32 = f[4] & g[4]; + uint64_t t33 = f[4] & g[3]; + uint64_t t34 = f[3] & g[4]; + uint64_t t35 = f[3] & g[3]; + uint64_t t36 = t34 ^ t33; + uint64_t t37 = t32 ^ t30; + uint64_t t38 = t37 ^ t28; + uint64_t t39 = t31 ^ t29; + uint64_t t40 = f[2] & g[2]; + uint64_t t41 = f[2] & g[0]; + uint64_t t42 = f[2] & g[1]; + uint64_t t43 = f[0] & g[2]; + uint64_t t44 = f[1] & g[2]; + uint64_t t45 = f[1] & g[1]; + uint64_t t46 = f[1] & g[0]; + uint64_t t47 = f[0] & g[1]; + uint64_t t48 = f[0] & g[0]; + uint64_t t49 = t47 ^ t46; + uint64_t t50 = t45 ^ t43; + uint64_t t51 = t50 ^ t41; + uint64_t t52 = t44 ^ t42; + uint64_t t53 = t52 ^ t35; + uint64_t t54 = t40 ^ t36; + uint64_t t55 = t39 ^ t22; + uint64_t t56 = t27 ^ t23; + uint64_t t57 = t26 ^ t9; + uint64_t t58 = t14 ^ t10; + uint64_t t59 = g[6] ^ g[9]; + uint64_t t60 = g[7] ^ g[10]; + uint64_t t61 = g[8] ^ g[11]; + uint64_t t62 = f[6] ^ f[9]; + uint64_t t63 = f[7] ^ f[10]; + uint64_t t64 = f[8] ^ f[11]; + uint64_t t65 = t64 & t61; + uint64_t t66 = t64 & t59; + uint64_t t67 = t64 & t60; + uint64_t t68 = t62 & t61; + uint64_t t69 = t63 & t61; + uint64_t t70 = t63 & t60; + uint64_t t71 = t63 & t59; + uint64_t t72 = t62 & t60; + uint64_t t73 = t62 & t59; + uint64_t t74 = t72 ^ t71; + uint64_t t75 = t70 ^ t68; + uint64_t t76 = t75 ^ t66; + uint64_t t77 = t69 ^ t67; + uint64_t t78 = g[0] ^ g[3]; + uint64_t t79 = g[1] ^ g[4]; + uint64_t t80 = g[2] ^ g[5]; + uint64_t t81 = f[0] ^ f[3]; + uint64_t t82 = f[1] ^ f[4]; + uint64_t t83 = f[2] ^ f[5]; + uint64_t t84 = t83 & t80; + uint64_t t85 = t83 & t78; + uint64_t t86 = t83 & t79; + uint64_t t87 = t81 & t80; + uint64_t t88 = t82 & t80; + uint64_t t89 = t82 & t79; + uint64_t t90 = t82 & t78; + uint64_t t91 = t81 & t79; + uint64_t t92 = t81 & t78; + uint64_t t93 = t91 ^ t90; + uint64_t t94 = t89 ^ t87; + uint64_t t95 = t94 ^ t85; + uint64_t t96 = t88 ^ t86; + uint64_t t97 = t53 ^ t48; + uint64_t t98 = t54 ^ t49; + uint64_t t99 = t38 ^ t51; + uint64_t t100 = t55 ^ t53; + uint64_t t101 = t56 ^ t54; + uint64_t t102 = t25 ^ t38; + uint64_t t103 = t57 ^ t55; + uint64_t t104 = t58 ^ t56; + uint64_t t105 = t12 ^ t25; + uint64_t t106 = t13 ^ t57; + uint64_t t107 = t1 ^ t58; + uint64_t t108 = t97 ^ t92; + uint64_t t109 = t98 ^ t93; + uint64_t t110 = t99 ^ t95; + uint64_t t111 = t100 ^ t96; + uint64_t t112 = t101 ^ t84; + uint64_t t113 = t103 ^ t73; + uint64_t t114 = t104 ^ t74; + uint64_t t115 = t105 ^ t76; + uint64_t t116 = t106 ^ t77; + uint64_t t117 = t107 ^ t65; + uint64_t t118 = g[3] ^ g[9]; + uint64_t t119 = g[4] ^ g[10]; + uint64_t t120 = g[5] ^ g[11]; + uint64_t t121 = g[0] ^ g[6]; + uint64_t t122 = g[1] ^ g[7]; + uint64_t t123 = g[2] ^ g[8]; + uint64_t t124 = f[3] ^ f[9]; + uint64_t t125 = f[4] ^ f[10]; + uint64_t t126 = f[5] ^ f[11]; + uint64_t t127 = f[0] ^ f[6]; + uint64_t t128 = f[1] ^ f[7]; + uint64_t t129 = f[2] ^ f[8]; + uint64_t t130 = t129 & t123; + uint64_t t131 = t129 & t121; + uint64_t t132 = t129 & t122; + uint64_t t133 = t127 & t123; + uint64_t t134 = t128 & t123; + uint64_t t135 = t128 & t122; + uint64_t t136 = t128 & t121; + uint64_t t137 = t127 & t122; + uint64_t t138 = t127 & t121; + uint64_t t139 = t137 ^ t136; + uint64_t t140 = t135 ^ t133; + uint64_t t141 = t140 ^ t131; + uint64_t t142 = t134 ^ t132; + uint64_t t143 = t126 & t120; + uint64_t t144 = t126 & t118; + uint64_t t145 = t126 & t119; + uint64_t t146 = t124 & t120; + uint64_t t147 = t125 & t120; + uint64_t t148 = t125 & t119; + uint64_t t149 = t125 & t118; + uint64_t t150 = t124 & t119; + uint64_t t151 = t124 & t118; + uint64_t t152 = t150 ^ t149; + uint64_t t153 = t148 ^ t146; + uint64_t t154 = t153 ^ t144; + uint64_t t155 = t147 ^ t145; + uint64_t t156 = t121 ^ t118; + uint64_t t157 = t122 ^ t119; + uint64_t t158 = t123 ^ t120; + uint64_t t159 = t127 ^ t124; + uint64_t t160 = t128 ^ t125; + uint64_t t161 = t129 ^ t126; + uint64_t t162 = t161 & t158; + uint64_t t163 = t161 & t156; + uint64_t t164 = t161 & t157; + uint64_t t165 = t159 & t158; + uint64_t t166 = t160 & t158; + uint64_t t167 = t160 & t157; + uint64_t t168 = t160 & t156; + uint64_t t169 = t159 & t157; + uint64_t t170 = t159 & t156; + uint64_t t171 = t169 ^ t168; + uint64_t t172 = t167 ^ t165; + uint64_t t173 = t172 ^ t163; + uint64_t t174 = t166 ^ t164; + uint64_t t175 = t142 ^ t151; + uint64_t t176 = t130 ^ t152; + uint64_t t177 = t170 ^ t175; + uint64_t t178 = t171 ^ t176; + uint64_t t179 = t173 ^ t154; + uint64_t t180 = t174 ^ t155; + uint64_t t181 = t162 ^ t143; + uint64_t t182 = t177 ^ t138; + uint64_t t183 = t178 ^ t139; + uint64_t t184 = t179 ^ t141; + uint64_t t185 = t180 ^ t175; + uint64_t t186 = t181 ^ t176; + uint64_t t187 = t111 ^ t48; + uint64_t t188 = t112 ^ t49; + uint64_t t189 = t102 ^ t51; + uint64_t t190 = t113 ^ t108; + uint64_t t191 = t114 ^ t109; + uint64_t t192 = t115 ^ t110; + uint64_t t193 = t116 ^ t111; + uint64_t t194 = t117 ^ t112; + uint64_t t195 = t12 ^ t102; + uint64_t t196 = t13 ^ t113; + uint64_t t197 = t1 ^ t114; + uint64_t t198 = t187 ^ t138; + uint64_t t199 = t188 ^ t139; + uint64_t t200 = t189 ^ t141; + uint64_t t201 = t190 ^ t182; + uint64_t t202 = t191 ^ t183; + uint64_t t203 = t192 ^ t184; + uint64_t t204 = t193 ^ t185; + uint64_t t205 = t194 ^ t186; + uint64_t t206 = t195 ^ t154; + uint64_t t207 = t196 ^ t155; + uint64_t t208 = t197 ^ t143; + + result[0] = t48; + result[1] = t49; + result[2] = t51; + result[3] = t108; + result[4] = t109; + result[5] = t110; + result[6] = t198; + result[7] = t199; + result[8] = t200; + result[9] = t201; + result[10] = t202; + result[11] = t203; + result[12] = t204; + result[13] = t205; + result[14] = t206; + result[15] = t207; + result[16] = t208; + result[17] = t115; + result[18] = t116; + result[19] = t117; + result[20] = t12; + result[21] = t13; + result[22] = t1; + + // + + for (i = 2 * GFBITS - 2; i >= GFBITS; i--) { + result[i - 9] ^= result[i]; + result[i - GFBITS] ^= result[i]; + } + + // + + for (i = 0; i < GFBITS; i++) + h[i] = result[i]; +} + +static void vec_sq(uint64_t *out, uint64_t *in) { + int i; + uint64_t result[GFBITS]; + + // + + result[0] = in[0] ^ in[6]; + result[1] = in[11]; + result[2] = in[1] ^ in[7]; + result[3] = in[6]; + result[4] = in[2] ^ in[11] ^ in[8]; + result[5] = in[7]; + result[6] = in[3] ^ in[9]; + result[7] = in[8]; + result[8] = in[4] ^ in[10]; + result[9] = in[9]; + result[10] = in[5] ^ in[11]; + result[11] = in[10]; + + // + + for (i = 0; i < GFBITS; i++) + out[i] = result[i]; +} + +static void vec_copy(uint64_t *out, const uint64_t *in) { + int i; + + for (i = 0; i < GFBITS; i++) + out[i] = in[i]; +} + +static uint64_t vec_or(const uint64_t *in) { + int i; + uint64_t ret = in[0]; + + for (i = 1; i < GFBITS; i++) + ret |= in[i]; + + return ret; +} + +static void vec_inv(uint64_t *out, const uint64_t *in) { + uint64_t tmp_11[GFBITS]; + uint64_t tmp_1111[GFBITS]; + + vec_copy(out, in); + + vec_sq(out, out); + vec_mul(tmp_11, out, in); // 11 + + vec_sq(out, tmp_11); + vec_sq(out, out); + vec_mul(tmp_1111, out, tmp_11); // 1111 + + vec_sq(out, tmp_1111); + vec_sq(out, out); + vec_sq(out, out); + vec_sq(out, out); + vec_mul(out, out, tmp_1111); // 11111111 + + vec_sq(out, out); + vec_sq(out, out); + vec_mul(out, out, tmp_11); // 1111111111 + + vec_sq(out, out); + vec_mul(out, out, in); // 11111111111 + + vec_sq(out, out); // 111111111110 +} diff --git a/crypt/liboqs/kex_code_mcbits/kex_code_mcbits.c b/crypt/liboqs/kex_code_mcbits/kex_code_mcbits.c new file mode 100644 index 0000000000000000000000000000000000000000..11c37ad51758e70c7e5b012a3d405538dff0439d --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/kex_code_mcbits.c @@ -0,0 +1,168 @@ +#ifdef ENABLE_CODE_MCBITS + +#if defined(WINDOWS) +#define UNUSED +// __attribute__ not supported in VS, is there something else I should define? +#else +#define UNUSED __attribute__((unused)) +#endif + +#include <stdlib.h> +#include <string.h> +#if !defined(WINDOWS) +#include <strings.h> +#include <unistd.h> +#endif + +#include <oqs/kex.h> +#include <oqs/rand.h> + +#include "kex_code_mcbits.h" +#include "mcbits.h" + +#if defined(WINDOWS) +#define strdup _strdup // for strdup deprecation warning +#endif + +OQS_KEX *OQS_KEX_code_mcbits_new(OQS_RAND *rand) { + OQS_KEX *k = malloc(sizeof(OQS_KEX)); + if (k == NULL) { + return NULL; + } + k->method_name = strdup("Code Mcbits"); + k->estimated_classical_security = 0; //TODO : Add these + k->estimated_quantum_security = 0; + k->seed = NULL; + k->seed_len = 0; + k->named_parameters = 0; + k->rand = rand; + k->params = NULL; + k->alice_0 = &OQS_KEX_code_mcbits_alice_0; + k->bob = &OQS_KEX_code_mcbits_bob; + k->alice_1 = &OQS_KEX_code_mcbits_alice_1; + k->alice_priv_free = &OQS_KEX_code_mcbits_alice_priv_free; + k->free = &OQS_KEX_code_mcbits_free; + return k; +} + +int OQS_KEX_code_mcbits_alice_0(UNUSED OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) { + + int ret; + + *alice_priv = NULL; + *alice_msg = NULL; + + /* allocate public/private key pair */ + *alice_msg = malloc(CRYPTO_PUBLICKEYBYTES); + *alice_msg_len = CRYPTO_PUBLICKEYBYTES; + if (*alice_msg == NULL) { + goto err; + } + *alice_priv = malloc(CRYPTO_SECRETKEYBYTES); + if (*alice_priv == NULL) { + goto err; + } + + /* generate public/private key pair */ + + oqs_kex_mcbits_gen_keypair(*alice_msg, *alice_priv, k->rand); + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*alice_msg); + *alice_msg = NULL; + free(*alice_priv); + *alice_priv = NULL; + +cleanup: + + return ret; +} + +int OQS_KEX_code_mcbits_bob(UNUSED OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + + *bob_msg = NULL; + *key = NULL; + + if (alice_msg_len != CRYPTO_PUBLICKEYBYTES) { + goto err; + } + + /* allocate message and session key */ + *bob_msg = malloc(CRYPTO_BYTES + 32); + if (*bob_msg == NULL) { + goto err; + } + *key = malloc(32); + if (*key == NULL) { + goto err; + } + OQS_RAND_n(k->rand, *key, 32); + oqs_kex_mcbits_encrypt(*bob_msg, bob_msg_len, *key, 32, alice_msg, k->rand); + *key_len = 32; + + ret = 1; + goto cleanup; +err: + ret = 0; + free(*bob_msg); + *bob_msg = NULL; + free(*key); + *key = NULL; + +cleanup: + return ret; +} + +int OQS_KEX_code_mcbits_alice_1(UNUSED OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + + *key = NULL; + + if (bob_msg_len != (CRYPTO_BYTES + 32)) { + goto err; + } + + /* allocate session key */ + *key = malloc(32); + if (*key == NULL) { + goto err; + } + oqs_kex_mcbits_decrypt(*key, key_len, bob_msg, CRYPTO_BYTES + 32, alice_priv); + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*key); + *key = NULL; + +cleanup: + + return ret; +} + +void OQS_KEX_code_mcbits_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) { + if (alice_priv) { + free(alice_priv); + } +} + +void OQS_KEX_code_mcbits_free(OQS_KEX *k) { + if (k) { + free(k->named_parameters); + k->named_parameters = NULL; + free(k->method_name); + k->method_name = NULL; + } + free(k); +} + +#endif diff --git a/crypt/liboqs/kex_code_mcbits/kex_code_mcbits.h b/crypt/liboqs/kex_code_mcbits/kex_code_mcbits.h new file mode 100644 index 0000000000000000000000000000000000000000..d748a03c197ad133a956191f5427b7e924106d65 --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/kex_code_mcbits.h @@ -0,0 +1,24 @@ +/** + * \file kex_code_mcbits.h + * \brief Header for code-based key exchange protocol McBits + */ + +#ifndef __OQS_KEX_CODE_MCBITS_H +#define __OQS_KEX_CODE_MCBITS_H + +#include <stddef.h> +#include <stdint.h> + +#include <oqs/kex.h> +#include <oqs/rand.h> + +OQS_KEX *OQS_KEX_code_mcbits_new(OQS_RAND *rand); + +int OQS_KEX_code_mcbits_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len); +int OQS_KEX_code_mcbits_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len); +int OQS_KEX_code_mcbits_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len); + +void OQS_KEX_code_mcbits_alice_priv_free(OQS_KEX *k, void *alice_priv); +void OQS_KEX_code_mcbits_free(OQS_KEX *k); + +#endif diff --git a/crypt/liboqs/kex_code_mcbits/mcbits.h b/crypt/liboqs/kex_code_mcbits/mcbits.h new file mode 100644 index 0000000000000000000000000000000000000000..71bf80e452a635840c86b18bcd10cf0b50ea4e3b --- /dev/null +++ b/crypt/liboqs/kex_code_mcbits/mcbits.h @@ -0,0 +1,27 @@ +/** + * \file mcbits.h + * \brief Header for internal functions of the code-based key exchange protocol McBits + */ + +#ifndef __OQS_MCBITS_H +#define __OQS_MCBITS_H + +#include "external/api.h" + +int oqs_kex_mcbits_encrypt( + unsigned char *c, size_t *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *pk, + OQS_RAND *r); + +int oqs_kex_mcbits_decrypt( + unsigned char *m, size_t *mlen, + const unsigned char *c, unsigned long long clen, + const unsigned char *sk); + +int oqs_kex_mcbits_gen_keypair( + unsigned char *pk, + unsigned char *sk, + OQS_RAND *r); + +#endif diff --git a/crypt/liboqs/kex_lwe_frodo/Makefile.am b/crypt/liboqs/kex_lwe_frodo/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..d91def87281b3c115e14cab615ea915324fd7988 --- /dev/null +++ b/crypt/liboqs/kex_lwe_frodo/Makefile.am @@ -0,0 +1,7 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = libfrodo.la + +libfrodo_la_SOURCES = kex_lwe_frodo.c lwe.c lwe_noise.c +libfrodo_la_CPPFLAGS = -I../../include -I. +libfrodo_la_CPPFLAGS += $(AM_CPPFLAGS) + diff --git a/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo.c b/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo.c new file mode 100644 index 0000000000000000000000000000000000000000..e9f6801af2c243b065b9905fad337b22763d79d6 --- /dev/null +++ b/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo.c @@ -0,0 +1,57 @@ +#if defined(WINDOWS) +#define UNUSED +#else +#define UNUSED __attribute__((unused)) +#endif + +#include <stdlib.h> +#include <string.h> +#if !defined(WINDOWS) +#include <strings.h> +#include <unistd.h> +#endif + +#include <oqs/common.h> +#include <oqs/kex.h> +#include <oqs/rand.h> + +#include "kex_lwe_frodo.h" +#include "local.h" + +#define LWE_DIV_ROUNDUP(x, y) (((x) + (y) -1) / y) + +#include <stdio.h> + +// pre-process code to obtain "recommended" functions +#include "recommended.h" +#define MACRIFY(NAME) NAME##_recommended +#include "kex_lwe_frodo_macrify.c" +// undefine macros to avoid any confusion later +#include "recommended.h" +#undef MACRIFY + +void OQS_KEX_lwe_frodo_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) { + free(alice_priv); +} + +void OQS_KEX_lwe_frodo_free(OQS_KEX *k) { + if (!k) { + return; + } + if (k->params) { + struct oqs_kex_lwe_frodo_params *params = (struct oqs_kex_lwe_frodo_params *) k->params; + free(params->cdf_table); + params->cdf_table = NULL; + free(params->seed); + params->seed = NULL; + free(params->param_name); + params->param_name = NULL; + free(k->params); + k->params = NULL; + } + free(k->named_parameters); + k->named_parameters = NULL; + free(k->method_name); + k->method_name = NULL; + free(k); +} diff --git a/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo.h b/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo.h new file mode 100644 index 0000000000000000000000000000000000000000..cb2742d0edd2139877eed62c24e6828400422be5 --- /dev/null +++ b/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo.h @@ -0,0 +1,24 @@ +/** + * \file kex_lwe_frodo.h + * \brief Header for LWE key exchange protocol Frodo. + */ + +#ifndef __OQS_KEX_LWE_FRODO_H +#define __OQS_KEX_LWE_FRODO_H + +#include <stddef.h> +#include <stdint.h> + +#include <oqs/kex.h> +#include <oqs/rand.h> + +OQS_KEX *OQS_KEX_lwe_frodo_new_recommended(OQS_RAND *rand, const uint8_t *seed, const size_t seed_len, const char *named_parameters); + +int OQS_KEX_lwe_frodo_alice_0_recommended(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len); +int OQS_KEX_lwe_frodo_bob_recommended(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len); +int OQS_KEX_lwe_frodo_alice_1_recommended(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len); + +void OQS_KEX_lwe_frodo_alice_priv_free(OQS_KEX *k, void *alice_priv); +void OQS_KEX_lwe_frodo_free(OQS_KEX *k); + +#endif diff --git a/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo_macrify.c b/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo_macrify.c new file mode 100644 index 0000000000000000000000000000000000000000..5dcba11820244e845eda4eadd03f3373b70cba28 --- /dev/null +++ b/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo_macrify.c @@ -0,0 +1,260 @@ +#if defined(WINDOWS) +#define strdup _strdup // for strdup deprecation warning +#endif + +OQS_KEX *MACRIFY(OQS_KEX_lwe_frodo_new)(OQS_RAND *rand, const uint8_t *seed, const size_t seed_len, const char *named_parameters) { + + OQS_KEX *k; + struct oqs_kex_lwe_frodo_params *params; + + if ((seed_len == 0) || (seed == NULL)) { + return NULL; + } + + k = malloc(sizeof(OQS_KEX)); + if (k == NULL) { + goto err; + } + k->named_parameters = NULL; + k->method_name = NULL; + + k->params = malloc(sizeof(struct oqs_kex_lwe_frodo_params)); + if (NULL == k->params) { + goto err; + } + params = (struct oqs_kex_lwe_frodo_params *) k->params; + params->cdf_table = NULL; + params->seed = NULL; + params->param_name = NULL; + + k->rand = rand; + k->ctx = NULL; + k->alice_priv_free = &OQS_KEX_lwe_frodo_alice_priv_free; + k->free = &OQS_KEX_lwe_frodo_free; + + if (strcmp(named_parameters, "recommended") == 0) { + + k->alice_0 = &OQS_KEX_lwe_frodo_alice_0_recommended; + k->bob = &OQS_KEX_lwe_frodo_bob_recommended; + k->alice_1 = &OQS_KEX_lwe_frodo_alice_1_recommended; + + k->method_name = strdup("LWE Frodo recommended"); + if (NULL == k->method_name) { + goto err; + } + k->estimated_classical_security = 144; + k->estimated_quantum_security = 130; + k->named_parameters = strdup(named_parameters); + if (k->named_parameters == NULL) { + goto err; + } + + params->seed = malloc(seed_len); + if (NULL == params->seed) { + goto err; + } + memcpy(params->seed, seed, seed_len); + params->seed_len = seed_len; + params->param_name = strdup("recommended"); + if (NULL == params->param_name) { + goto err; + } + params->log2_q = PARAMS_LOG2Q; + params->q = PARAMS_Q; + params->n = PARAMS_N; + params->extracted_bits = PARAMS_EXTRACTED_BITS; + params->nbar = PARAMS_NBAR; + params->key_bits = PARAMS_KEY_BITS; + params->rec_hint_len = PARAMS_REC_HINT_LENGTH; + params->pub_len = PARAMS_REC_PUB_LENGTH; + params->stripe_step = PARAMS_STRIPE_STEP; + params->sampler_num = 12; + params->cdf_table_len = 6; + params->cdf_table = malloc(params->cdf_table_len * sizeof(uint16_t)); + if (NULL == params->cdf_table) { + goto err; + } + uint16_t cdf_table_tmp[6] = {602, 1521, 1927, 2031, 2046, 2047}; + memcpy(params->cdf_table, cdf_table_tmp, sizeof(cdf_table_tmp)); + } else { + goto err; + } + return k; +err: + OQS_KEX_lwe_frodo_free(k); + return NULL; +} + +int MACRIFY(OQS_KEX_lwe_frodo_alice_0)(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) { + + int ret; + + struct oqs_kex_lwe_frodo_params *params = (struct oqs_kex_lwe_frodo_params *) k->params; + + *alice_priv = NULL; + *alice_msg = NULL; + + /* allocate private key, error, and outgoing message */ + *alice_priv = malloc(PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); + if (*alice_priv == NULL) { + goto err; + } + uint16_t b[PARAMS_N * PARAMS_NBAR]; + uint16_t e[PARAMS_N * PARAMS_NBAR]; + + *alice_msg = malloc(PARAMS_REC_PUB_LENGTH); + *alice_msg_len = PARAMS_REC_PUB_LENGTH; + if (*alice_msg == NULL) { + goto err; + } + + /* generate S and E */ + oqs_kex_lwe_frodo_sample_n(*alice_priv, PARAMS_N * PARAMS_NBAR, params, k->rand); + oqs_kex_lwe_frodo_sample_n(e, PARAMS_N * PARAMS_NBAR, params, k->rand); + + /* compute B = AS + E */ + MACRIFY(oqs_kex_lwe_frodo_mul_add_as_plus_e_on_the_fly) + (b, *alice_priv, e, params); + + oqs_kex_lwe_frodo_pack(*alice_msg, PARAMS_REC_PUB_LENGTH, b, PARAMS_N * PARAMS_NBAR, PARAMS_LOG2Q); + + ret = 1; + goto cleanup; + +err: + OQS_MEM_cleanse(e, sizeof(e)); + free(*alice_msg); + *alice_msg = NULL; + free(*alice_priv); + *alice_priv = NULL; + ret = 0; + +cleanup: + return ret; +} + +int MACRIFY(OQS_KEX_lwe_frodo_bob)(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + + struct oqs_kex_lwe_frodo_params *params = (struct oqs_kex_lwe_frodo_params *) k->params; + + uint8_t *bob_rec = NULL; + *bob_msg = NULL; + *key = NULL; + + /* check length of other party's public key */ + if (alice_msg_len != PARAMS_REC_PUB_LENGTH) { + goto err; + } + + /* allocate private key, errors, outgoing message, and key */ + uint16_t bob_priv[PARAMS_N * PARAMS_NBAR]; + uint16_t bprime[PARAMS_N * PARAMS_NBAR]; + uint16_t eprime[PARAMS_N * PARAMS_NBAR]; + uint16_t eprimeprime[PARAMS_N * PARAMS_NBAR]; + uint16_t b[PARAMS_N * PARAMS_NBAR]; + uint16_t v[PARAMS_N * PARAMS_NBAR]; + *bob_msg = malloc(PARAMS_REC_PUB_LENGTH + PARAMS_REC_HINT_LENGTH); + if (*bob_msg == NULL) { + goto err; + } + bob_rec = *bob_msg + PARAMS_REC_PUB_LENGTH; + *key = malloc(PARAMS_KEY_BYTES); + if (*key == NULL) { + goto err; + } + + /* generate S' and E' */ + oqs_kex_lwe_frodo_sample_n(bob_priv, PARAMS_N * PARAMS_NBAR, params, k->rand); + oqs_kex_lwe_frodo_sample_n(eprime, PARAMS_N * PARAMS_NBAR, params, k->rand); + + /* compute B' = S'A + E' */ + MACRIFY(oqs_kex_lwe_frodo_mul_add_sa_plus_e_on_the_fly) + (bprime, bob_priv, eprime, params); + + oqs_kex_lwe_frodo_pack(*bob_msg, PARAMS_REC_PUB_LENGTH, bprime, PARAMS_N * PARAMS_NBAR, PARAMS_LOG2Q); + + /* generate E'' */ + oqs_kex_lwe_frodo_sample_n(eprimeprime, PARAMS_NBAR * PARAMS_NBAR, params, k->rand); + + /* unpack B */ + oqs_kex_lwe_frodo_unpack(b, PARAMS_N * PARAMS_NBAR, alice_msg, alice_msg_len, PARAMS_LOG2Q); + + /* compute V = S'B + E'' */ + MACRIFY(oqs_kex_lwe_frodo_mul_add_sb_plus_e) + (v, b, bob_priv, eprimeprime); + + /* compute C = <V>_{2^B} */ + MACRIFY(oqs_kex_lwe_frodo_crossround2) + (bob_rec, v); + + /* compute K = round(V)_{2^B} */ + MACRIFY(oqs_kex_lwe_frodo_round2) + (*key, v); + + *bob_msg_len = PARAMS_REC_PUB_LENGTH + PARAMS_REC_HINT_LENGTH; + *key_len = PARAMS_KEY_BYTES; + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*bob_msg); + *bob_msg = NULL; + OQS_MEM_secure_free(*key, PARAMS_KEY_BYTES); + *key = NULL; + +cleanup: + OQS_MEM_cleanse(eprime, sizeof(eprime)); + OQS_MEM_cleanse(eprimeprime, sizeof(eprimeprime)); + OQS_MEM_cleanse(v, sizeof(v)); + + return ret; +} + +int MACRIFY(OQS_KEX_lwe_frodo_alice_1)(UNUSED OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + *key = NULL; + + /* check length of other party's public key */ + if (bob_msg_len != PARAMS_REC_PUB_LENGTH + PARAMS_REC_HINT_LENGTH) { + goto err; + } + + /* allocate working values and session key */ + uint16_t bprime[PARAMS_N * PARAMS_NBAR]; + uint16_t w[PARAMS_N * PARAMS_NBAR]; + + *key = malloc(PARAMS_KEY_BYTES); + if (*key == NULL) { + goto err; + } + + /* unpack B' */ + oqs_kex_lwe_frodo_unpack(bprime, PARAMS_N * PARAMS_NBAR, bob_msg, PARAMS_REC_PUB_LENGTH, PARAMS_LOG2Q); + + /* compute W = B'S */ + MACRIFY(oqs_kex_lwe_frodo_mul_bs) + (w, bprime, (uint16_t *) alice_priv); + + /* compute K = rec(B'S, C) */ + const uint8_t *bob_rec = bob_msg + PARAMS_REC_PUB_LENGTH; + MACRIFY(oqs_kex_lwe_frodo_reconcile) + (*key, w, bob_rec); + + *key_len = PARAMS_KEY_BYTES; + + ret = 1; + goto cleanup; + +err: + ret = 0; + OQS_MEM_secure_free(*key, PARAMS_KEY_BYTES); + *key = NULL; + +cleanup: + return ret; +} diff --git a/crypt/liboqs/kex_lwe_frodo/local.h b/crypt/liboqs/kex_lwe_frodo/local.h new file mode 100644 index 0000000000000000000000000000000000000000..1d8afa47f91904b5d1ce91c237b497869debd1b3 --- /dev/null +++ b/crypt/liboqs/kex_lwe_frodo/local.h @@ -0,0 +1,42 @@ +#ifndef _OQS_KEX_LWE_FRODO_LOCAL_H_ +#define _OQS_KEX_LWE_FRODO_LOCAL_H_ + +#include <stdint.h> + +#include <oqs/rand.h> + +struct oqs_kex_lwe_frodo_params { + uint8_t *seed; + size_t seed_len; + char *param_name; + uint16_t log2_q; + uint16_t q; + uint16_t n; + uint16_t extracted_bits; + uint16_t nbar; + uint16_t key_bits; + uint16_t rec_hint_len; + uint32_t pub_len; + uint16_t stripe_step; + int sampler_num; + uint16_t *cdf_table; + size_t cdf_table_len; +}; + +void oqs_kex_lwe_frodo_crossround2_recommended(unsigned char *out, const uint16_t *in); +void oqs_kex_lwe_frodo_round2_recommended(unsigned char *out, uint16_t *in); +void oqs_kex_lwe_frodo_reconcile_recommended(unsigned char *out, uint16_t *w, const unsigned char *hint); + +void oqs_kex_lwe_frodo_key_round(uint16_t *vec, const size_t length, const int b); +void oqs_kex_lwe_frodo_key_round_hints(uint16_t *vec, const size_t length, const int b, const unsigned char *hint); +void oqs_kex_lwe_frodo_pack(unsigned char *out, const size_t outlen, const uint16_t *in, const size_t inlen, const unsigned char lsb); +void oqs_kex_lwe_frodo_unpack(uint16_t *out, const size_t outlen, const unsigned char *in, const size_t inlen, const unsigned char lsb); + +void oqs_kex_lwe_frodo_sample_n(uint16_t *s, const size_t n, struct oqs_kex_lwe_frodo_params *params, OQS_RAND *rand); + +void oqs_kex_lwe_frodo_mul_add_as_plus_e_on_the_fly_recommended(uint16_t *b, const uint16_t *s, const uint16_t *e, struct oqs_kex_lwe_frodo_params *params); +void oqs_kex_lwe_frodo_mul_add_sa_plus_e_on_the_fly_recommended(uint16_t *b, const uint16_t *s, const uint16_t *e, struct oqs_kex_lwe_frodo_params *params); +void oqs_kex_lwe_frodo_mul_add_sb_plus_e_recommended(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e); +void oqs_kex_lwe_frodo_mul_bs_recommended(uint16_t *out, const uint16_t *b, const uint16_t *s); + +#endif /* _OQS_KEX_RLWE_BCNS15_LOCAL_H_ */ diff --git a/crypt/liboqs/kex_lwe_frodo/lwe.c b/crypt/liboqs/kex_lwe_frodo/lwe.c new file mode 100644 index 0000000000000000000000000000000000000000..ec64b57e8a845b0e17c6667e747dfa773352c4f5 --- /dev/null +++ b/crypt/liboqs/kex_lwe_frodo/lwe.c @@ -0,0 +1,150 @@ +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "local.h" + +#include <oqs/aes.h> + +#define min(x, y) (((x) < (y)) ? (x) : (y)) + +// round all elements of a vector to the nearest multiple of 2^b +void oqs_kex_lwe_frodo_key_round(uint16_t *vec, const size_t length, const int b) { + size_t i; + uint16_t negmask = ~((1 << b) - 1); + uint16_t half = b > 0 ? 1 << (b - 1) : 0; + for (i = 0; i < length; i++) { + vec[i] = (vec[i] + half) & negmask; + } +} + +// Round all elements of a vector to the multiple of 2^b, with a hint for the +// direction of rounding when close to the boundary. +void oqs_kex_lwe_frodo_key_round_hints(uint16_t *vec, const size_t length, const int b, const unsigned char *hint) { + size_t i; + uint16_t whole = 1 << b; + uint16_t mask = whole - 1; + uint16_t negmask = ~mask; + uint16_t half = 1 << (b - 1); + uint16_t quarter = 1 << (b - 2); + + for (i = 0; i < length; i++) { + uint16_t remainder = vec[i] & mask; + uint16_t use_hint = ((remainder + quarter) >> (b - 1)) & 0x1; + + unsigned char h = (hint[i / 8] >> (i % 8)) % 2; // the hint + uint16_t shift = use_hint * (2 * h - 1) * quarter; + + // if use_hint = 1 and h = 0, adding -quarter forces rounding down + // h = 1, adding quarter forces rounding up + + vec[i] = (vec[i] + half + shift) & negmask; + } +} + +// Pack the input uint16 vector into a char output vector, copying lsb bits +// from each input element. If inlen * lsb / 8 > outlen, only outlen * 8 bits +// are copied. +void oqs_kex_lwe_frodo_pack(unsigned char *out, const size_t outlen, const uint16_t *in, const size_t inlen, const unsigned char lsb) { + memset(out, 0, outlen); + + size_t i = 0; // whole bytes already filled in + size_t j = 0; // whole uint16_t already copied + uint16_t w = 0; // the leftover, not yet copied + unsigned char bits = 0; // the number of lsb in w + while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) { + /* + in: | | |********|********| + ^ + j + w : | ****| + ^ + bits + out:|**|**|**|**|**|**|**|**|* | + ^^ + ib + */ + unsigned char b = 0; // bits in out[i] already filled in + while (b < 8) { + int nbits = min(8 - b, bits); + uint16_t mask = (1 << nbits) - 1; + unsigned char t = (w >> (bits - nbits)) & mask; // the bits to copy from w to out + out[i] += t << (8 - b - nbits); + b += nbits; + bits -= nbits; + w &= ~(mask << bits); // not strictly necessary; mostly for debugging + + if (bits == 0) { + if (j < inlen) { + w = in[j]; + bits = lsb; + j++; + } else { + break; // the input vector is exhausted + } + } + } + if (b == 8) { // out[i] is filled in + i++; + } + } +} + +// Unpack the input char vector into a uint16_t output vector, copying lsb bits +// for each output element from input. outlen must be at least ceil(inlen * 8 / +// lsb). +void oqs_kex_lwe_frodo_unpack(uint16_t *out, const size_t outlen, const unsigned char *in, const size_t inlen, const unsigned char lsb) { + memset(out, 0, outlen * sizeof(uint16_t)); + + size_t i = 0; // whole uint16_t already filled in + size_t j = 0; // whole bytes already copied + unsigned char w = 0; // the leftover, not yet copied + unsigned char bits = 0; // the number of lsb bits of w + while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) { + /* + in: | | | | | | |**|**|... + ^ + j + w : | *| + ^ + bits + out:| *****| *****| *** | |... + ^ ^ + i b + */ + unsigned char b = 0; // bits in out[i] already filled in + while (b < lsb) { + int nbits = min(lsb - b, bits); + uint16_t mask = (1 << nbits) - 1; + unsigned char t = (w >> (bits - nbits)) & mask; // the bits to copy from w to out + out[i] += t << (lsb - b - nbits); + b += nbits; + bits -= nbits; + w &= ~(mask << bits); // not strictly necessary; mostly for debugging + + if (bits == 0) { + if (j < inlen) { + w = in[j]; + bits = 8; + j++; + } else { + break; // the input vector is exhausted + } + } + } + if (b == lsb) { // out[i] is filled in + i++; + } + } +} + +// define parameters for "recommended" parameter set +#include "recommended.h" +// pre-process code to obtain "recommended" functions +#define MACRIFY(NAME) NAME##_recommended +#include "lwe_macrify.c" +// undefine macros to avoid any confusion later +#include "recommended.h" +#undef MACRIFY diff --git a/crypt/liboqs/kex_lwe_frodo/lwe_macrify.c b/crypt/liboqs/kex_lwe_frodo/lwe_macrify.c new file mode 100644 index 0000000000000000000000000000000000000000..f39b59ba404cc42a5fc4fe901edff7bada2fb090 --- /dev/null +++ b/crypt/liboqs/kex_lwe_frodo/lwe_macrify.c @@ -0,0 +1,160 @@ +// [.]_2 +void MACRIFY(oqs_kex_lwe_frodo_round2)(unsigned char *out, uint16_t *in) { + oqs_kex_lwe_frodo_key_round(in, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOG2Q - PARAMS_EXTRACTED_BITS); + for (int i = 0; i < PARAMS_NBAR * PARAMS_NBAR; i++) { + in[i] >>= PARAMS_LOG2Q - PARAMS_EXTRACTED_BITS; // drop bits that were zeroed out + } + + // out should have enough space for the key + oqs_kex_lwe_frodo_pack(out, PARAMS_KEY_BITS / 8, in, PARAMS_NBAR * PARAMS_NBAR, PARAMS_EXTRACTED_BITS); +} + +void MACRIFY(oqs_kex_lwe_frodo_crossround2)(unsigned char *out, const uint16_t *in) { + // out should have enough space for N_BAR * N_BAR bits + memset((unsigned char *) out, 0, PARAMS_REC_HINT_LENGTH); + + uint16_t whole = 1 << (PARAMS_LOG2Q - PARAMS_EXTRACTED_BITS); + uint16_t half = whole >> 1; + uint16_t mask = whole - 1; + + for (int i = 0; i < PARAMS_NBAR * PARAMS_NBAR; i++) { + uint16_t remainder = in[i] & mask; + out[i / 8] += (remainder >= half) << (i % 8); + } +} + +void MACRIFY(oqs_kex_lwe_frodo_reconcile)(unsigned char *out, uint16_t *w, const unsigned char *hint) { + oqs_kex_lwe_frodo_key_round_hints(w, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOG2Q - PARAMS_EXTRACTED_BITS, hint); + for (int i = 0; i < PARAMS_NBAR * PARAMS_NBAR; i++) { + w[i] >>= PARAMS_LOG2Q - PARAMS_EXTRACTED_BITS; // drop bits that were zeroed out + } + oqs_kex_lwe_frodo_pack(out, PARAMS_KEY_BITS / 8, w, PARAMS_NBAR * PARAMS_NBAR, PARAMS_EXTRACTED_BITS); +} + +// Generate-and-multiply: generate A row-wise, multiply by s on the right. +void MACRIFY(oqs_kex_lwe_frodo_mul_add_as_plus_e_on_the_fly)(uint16_t *out, const uint16_t *s, const uint16_t *e, struct oqs_kex_lwe_frodo_params *params) { + // A (N x N) + // s,e (N x N_BAR) + // out = A * s + e (N x N_BAR) + + memcpy(out, e, PARAMS_NBAR * PARAMS_N * sizeof(uint16_t)); + + // transpose s to store it in the column-major order + uint16_t s_transpose[PARAMS_NBAR * PARAMS_N]; + for (int j = 0; j < PARAMS_N; j++) { + for (int k = 0; k < PARAMS_NBAR; k++) { + s_transpose[k * PARAMS_N + j] = s[j * PARAMS_NBAR + k]; + } + } + + assert(params->seed_len == 16); + void *aes_key_schedule = NULL; + OQS_AES128_load_schedule(params->seed, &aes_key_schedule, 1); + + for (int i = 0; i < PARAMS_N; i++) { + uint16_t a_row[PARAMS_N] = {0}; + // go through A's rows + for (int j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) { + // Loading values in the little-endian order! + a_row[j] = i; + a_row[j + 1] = j; + } + + OQS_AES128_ECB_enc_sch((uint8_t *) a_row, sizeof(a_row), aes_key_schedule, (uint8_t *) a_row); + + for (int k = 0; k < PARAMS_NBAR; k++) { + uint16_t sum = 0; + for (int j = 0; j < PARAMS_N; j++) { + // matrix-vector multiplication happens here + sum += a_row[j] * s_transpose[k * PARAMS_N + j]; + } + out[i * PARAMS_NBAR + k] += sum; + //Equivalent to %= PARAMS_Q if PARAMS_Q is a power of 2 + out[i * PARAMS_NBAR + k] &= PARAMS_Q - 1; + } + } + + OQS_AES128_free_schedule(aes_key_schedule); +} + +// Generate-and-multiply: generate A column-wise, multiply by s' on the left. +void MACRIFY(oqs_kex_lwe_frodo_mul_add_sa_plus_e_on_the_fly)(uint16_t *out, const uint16_t *s, const uint16_t *e, struct oqs_kex_lwe_frodo_params *params) { + // a (N x N) + // s',e' (N_BAR x N) + // out = s'a + e' (N_BAR x N) + + memcpy(out, e, PARAMS_NBAR * PARAMS_N * sizeof(uint16_t)); + + assert(params->seed_len == 16); + + void *aes_key_schedule = NULL; + OQS_AES128_load_schedule(params->seed, &aes_key_schedule, 1); + + for (int kk = 0; kk < PARAMS_N; kk += PARAMS_STRIPE_STEP) { + // Go through A's columns, 8 (== PARAMS_STRIPE_STEP) columns at a time. + // a_cols stores 8 columns of A at a time. + uint16_t a_cols[PARAMS_N * PARAMS_STRIPE_STEP] = {0}; + for (int i = 0; i < PARAMS_N; i++) { + // Loading values in the little-endian order! + a_cols[i * PARAMS_STRIPE_STEP] = i; + a_cols[i * PARAMS_STRIPE_STEP + 1] = kk; + } + + OQS_AES128_ECB_enc_sch((uint8_t *) a_cols, sizeof(a_cols), aes_key_schedule, (uint8_t *) a_cols); + + // transpose a_cols to have access to it in the column-major order. + uint16_t a_cols_t[PARAMS_N * PARAMS_STRIPE_STEP]; + for (int i = 0; i < PARAMS_N; i++) { + for (int k = 0; k < PARAMS_STRIPE_STEP; k++) { + a_cols_t[k * PARAMS_N + i] = a_cols[i * PARAMS_STRIPE_STEP + k]; + } + } + + for (int i = 0; i < PARAMS_NBAR; i++) { + for (int k = 0; k < PARAMS_STRIPE_STEP; k++) { + uint16_t sum = 0; + for (int j = 0; j < PARAMS_N; j++) { + sum += s[i * PARAMS_N + j] * a_cols_t[k * PARAMS_N + j]; + } + out[i * PARAMS_N + kk + k] += sum; + out[i * PARAMS_N + kk + k] &= PARAMS_Q - 1; //Works as long as PARAMS_Q is a power of 2 + } + } + } + OQS_AES128_free_schedule(aes_key_schedule); +} + +// multiply by s on the right +void MACRIFY(oqs_kex_lwe_frodo_mul_bs)(uint16_t *out, const uint16_t *b, const uint16_t *s) { + // b (N_BAR x N) + // s (N x N_BAR) + // out = bs + for (int i = 0; i < PARAMS_NBAR; i++) { + for (int j = 0; j < PARAMS_NBAR; j++) { + uint16_t sum = 0; + for (int k = 0; k < PARAMS_N; k++) { + sum += b[i * PARAMS_N + k] * s[k * PARAMS_NBAR + j]; + } + out[i * PARAMS_NBAR + j] = sum & (PARAMS_Q - 1); + } + } +} + +// multiply by s on the left +void MACRIFY(oqs_kex_lwe_frodo_mul_add_sb_plus_e)(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e) { + // b (N x N_BAR) + // s (N_BAR x N) + // e (N_BAR x N_BAR) + // out = sb + e + memcpy(out, e, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); + for (int k = 0; k < PARAMS_NBAR; k++) { + for (int i = 0; i < PARAMS_NBAR; i++) { + uint16_t sum = 0; + for (int j = 0; j < PARAMS_N; j++) { + sum += s[k * PARAMS_N + j] * b[j * PARAMS_NBAR + i]; + } + out[k * PARAMS_NBAR + i] += sum; + out[k * PARAMS_NBAR + i] &= PARAMS_Q - 1; // not really necessary since LWE_Q is a power of 2. + } + } +} diff --git a/crypt/liboqs/kex_lwe_frodo/lwe_noise.c b/crypt/liboqs/kex_lwe_frodo/lwe_noise.c new file mode 100644 index 0000000000000000000000000000000000000000..4dc4dc4ce2c28d81f24a59bf850b84fac3d7c542 --- /dev/null +++ b/crypt/liboqs/kex_lwe_frodo/lwe_noise.c @@ -0,0 +1,136 @@ +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <oqs/common.h> +#include <oqs/rand.h> + +#include "local.h" + +#define RECOMMENDED_N_ARRAY_SIZE (752 * 8) +#define RECOMMENDED_CDF_TABLE_LEN 6 +#if defined(WINDOWS) +// VS complains about arrays initialized with const param. On Windows, +// we use directly the recommended value passed down from calling functions. +// Currently there is only one set of params, so that works. Need to fix this +// in a more general setting (TODO). +#define IS_WINDOWS(windows, nix) (windows) +#else +#define IS_WINDOWS(windows, nix) (nix) +#endif + +static void lwe_sample_n_inverse_8(uint16_t *s, const size_t n, const uint8_t *cdf_table, const size_t cdf_table_len, OQS_RAND *rand) { + /* Fills vector s with n samples from the noise distribution which requires + * 8 bits to sample. The distribution is specified by its CDF. Super-constant + * timing: the CDF table is ingested for every sample. + */ + + uint8_t rndvec[IS_WINDOWS(RECOMMENDED_N_ARRAY_SIZE, n)]; + OQS_RAND_n(rand, rndvec, sizeof(rndvec)); + + for (size_t i = 0; i < n; ++i) { + uint8_t sample = 0; + uint8_t rnd = rndvec[i] >> 1; // drop the least significant bit + uint8_t sign = rndvec[i] & 0x1; // pick the least significant bit + + // No need to compare with the last value. + for (int64_t j = 0; j < (int64_t)(cdf_table_len - 1); j++) { + // Constant time comparison: 1 if cdf_table[j] < rnd, 0 otherwise. + // Critically uses the fact that cdf_table[j] and rnd fit in 7 bits. + sample += (uint8_t)(cdf_table[j] - rnd) >> 7; + } + // Assuming that sign is either 0 or 1, flips sample iff sign = 1 + s[i] = ((-sign) ^ sample) + sign; + } + OQS_MEM_cleanse(rndvec, sizeof(rndvec)); +} + +static void lwe_sample_n_inverse_12(uint16_t *s, const size_t n, const uint16_t *cdf_table, const size_t cdf_table_len, OQS_RAND *rand) { + /* Fills vector s with n samples from the noise distribution which requires + * 12 bits to sample. The distribution is specified by its CDF. Super-constant + * timing: the CDF table is ingested for every sample. + */ + + uint8_t rnd[3 * ((IS_WINDOWS(RECOMMENDED_N_ARRAY_SIZE, n) + 1) / 2)]; // 12 bits of unif randomness per output element + OQS_RAND_n(rand, rnd, sizeof(rnd)); + + for (size_t i = 0; i < n; i += 2) { // two output elements at a time + uint8_t *pRnd = (rnd + 3 * i / 2); + + uint16_t rnd1 = (((pRnd[0] << 8) + pRnd[1]) & 0xFFE0) >> 5; // first 11 bits (0..10) + uint16_t rnd2 = (((pRnd[1] << 8) + pRnd[2]) & 0x1FFC) >> 2; // next 11 bits (11..21) + + uint8_t sample1 = 0; + uint8_t sample2 = 0; + + // No need to compare with the last value. + for (size_t j = 0; j < cdf_table_len - 1; j++) { + // Constant time comparison: 1 if LWE_CDF_TABLE[j] < rnd1, 0 otherwise. + // Critically uses the fact that LWE_CDF_TABLE[j] and rnd1 fit in 15 bits. + sample1 += (uint16_t)(cdf_table[j] - rnd1) >> 15; + sample2 += (uint16_t)(cdf_table[j] - rnd2) >> 15; + } + + uint8_t sign1 = (pRnd[2] & 0x02) >> 1; // 22nd bit + uint8_t sign2 = pRnd[2] & 0x01; // 23rd bit + + // Assuming that sign1 is either 0 or 1, flips sample1 iff sign1 = 1 + s[i] = ((-sign1) ^ sample1) + sign1; + + if (i + 1 < n) { + s[i + 1] = ((-sign2) ^ sample2) + sign2; + } + } + OQS_MEM_cleanse(rnd, sizeof(rnd)); +} + +static void lwe_sample_n_inverse_16(uint16_t *s, const size_t n, const uint16_t *cdf_table, const size_t cdf_table_len, OQS_RAND *rand) { + /* Fills vector s with n samples from the noise distribution which requires + * 16 bits to sample. The distribution is specified by its CDF. Super-constant + * timing: the CDF table is ingested for every sample. + */ + + uint16_t rndvec[IS_WINDOWS(RECOMMENDED_N_ARRAY_SIZE, n)]; + OQS_RAND_n(rand, (uint8_t *) rndvec, sizeof(rndvec)); + + for (size_t i = 0; i < n; ++i) { + uint8_t sample = 0; + uint16_t rnd = rndvec[i] >> 1; // drop the least significant bit + uint8_t sign = rndvec[i] & 0x1; // pick the least significant bit + + // No need to compare with the last value. + for (size_t j = 0; j < cdf_table_len - 1; j++) { + // Constant time comparison: 1 if LWE_CDF_TABLE[j] < rnd, 0 otherwise. + // Critically uses the fact that LWE_CDF_TABLE[j] and rnd fit in 15 bits. + sample += (uint16_t)(cdf_table[j] - rnd) >> 15; + } + // Assuming that sign is either 0 or 1, flips sample iff sign = 1 + s[i] = ((-sign) ^ sample) + sign; + } + OQS_MEM_cleanse(rndvec, sizeof(rndvec)); +} + +void oqs_kex_lwe_frodo_sample_n(uint16_t *s, const size_t n, struct oqs_kex_lwe_frodo_params *params, OQS_RAND *rand) { + switch (params->sampler_num) { + case 8: { + // have to copy cdf_table from uint16_t to uint8_t + uint8_t cdf_table_8[IS_WINDOWS(RECOMMENDED_CDF_TABLE_LEN, params->cdf_table_len) * sizeof(uint8_t)]; + + for (size_t i = 0; i < params->cdf_table_len; i++) { + cdf_table_8[i] = (uint8_t) params->cdf_table[i]; + } + lwe_sample_n_inverse_8(s, n, cdf_table_8, params->cdf_table_len, rand); + } break; + case 12: + lwe_sample_n_inverse_12(s, n, params->cdf_table, params->cdf_table_len, rand); + break; + case 16: + lwe_sample_n_inverse_16(s, n, params->cdf_table, params->cdf_table_len, rand); + break; + default: + assert(0); //ERROR + break; + } +} diff --git a/crypt/liboqs/kex_lwe_frodo/recommended.h b/crypt/liboqs/kex_lwe_frodo/recommended.h new file mode 100644 index 0000000000000000000000000000000000000000..725a94ac53e1cbd701914038f39970fbb90360a1 --- /dev/null +++ b/crypt/liboqs/kex_lwe_frodo/recommended.h @@ -0,0 +1,29 @@ +//Recommended parameter set. Include twice to undefine + +#ifndef OQS_LWE_FRODO_RECOMMENDED_H +#define OQS_LWE_FRODO_RECOMMENDED_H +#define PARAMS_N 752 +#define PARAMS_NBAR 8 +#define PARAMS_LOG2Q 15 +#define PARAMS_Q (1 << PARAMS_LOG2Q) +#define PARAMS_EXTRACTED_BITS 4 +#define PARAMS_KEY_BITS 256 +#define PARAMS_KEY_BYTES (PARAMS_KEY_BITS >> 3) +#define PARAMS_STRIPE_STEP 8 +#define LWE_DIV_ROUNDUP(x, y) (((x) + (y) -1) / y) +#define PARAMS_REC_HINT_LENGTH LWE_DIV_ROUNDUP(PARAMS_NBAR *PARAMS_NBAR, 8) +#define PARAMS_REC_PUB_LENGTH LWE_DIV_ROUNDUP(PARAMS_N *PARAMS_NBAR *PARAMS_LOG2Q, 8) + +#else + +#undef OQS_LWE_FRODO_RECOMMENDED_H +#undef PARAMS_N +#undef PARAMS_NBAR +#undef PARAMS_LOG2Q +#undef PARAMS_Q +#undef PARAMS_EXTRACTED_BITS +#undef PARAMS_KEY_BITS +#undef PARAMS_STRIPE_STEP +#undef LWE_DIV_ROUNDUP +#undef PARAMS_REC_HINT_LENGTH +#endif diff --git a/crypt/liboqs/kex_mlwe_kyber/LICENSE.txt b/crypt/liboqs/kex_mlwe_kyber/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..085eb6bc97c23c738822aad8e591d0efbcccda75 --- /dev/null +++ b/crypt/liboqs/kex_mlwe_kyber/LICENSE.txt @@ -0,0 +1,7 @@ +The files in this directory (except kex_mlwe_kyber.*) were originally published in https://github.com/pq-crystals/kyber + + +The following license applies to all files in the src/kex_mlwe_kyber directory. + + +Public domain. diff --git a/crypt/liboqs/kex_mlwe_kyber/Makefile.am b/crypt/liboqs/kex_mlwe_kyber/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..d486e89a69f502efc8e23e73f7d584c2dd76b4ac --- /dev/null +++ b/crypt/liboqs/kex_mlwe_kyber/Makefile.am @@ -0,0 +1,8 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = libkyber.la + +libkyber_la_SOURCES = kex_mlwe_kyber.c + +libkyber_la_CPPFLAGS = -I../../include -I. +libkyber_la_CPPFLAGS += $(AM_CPPFLAGS) + diff --git a/crypt/liboqs/kex_mlwe_kyber/indcpa.c b/crypt/liboqs/kex_mlwe_kyber/indcpa.c new file mode 100644 index 0000000000000000000000000000000000000000..cb15fe687320ad5c90d281460ac8fc208bc34f5c --- /dev/null +++ b/crypt/liboqs/kex_mlwe_kyber/indcpa.c @@ -0,0 +1,179 @@ +#include "params.h" +#include <oqs/rand.h> +#include <oqs/sha3.h> + +static void pack_pk(unsigned char *r, const polyvec *pk, const unsigned char *seed) { + int i; + polyvec_compress(r, pk); + for (i = 0; i < KYBER_SEEDBYTES; i++) + r[i + KYBER_POLYVECCOMPRESSEDBYTES] = seed[i]; +} + +static void unpack_pk(polyvec *pk, unsigned char *seed, const unsigned char *packedpk) { + int i; + polyvec_decompress(pk, packedpk); + + for (i = 0; i < KYBER_SEEDBYTES; i++) + seed[i] = packedpk[i + KYBER_POLYVECCOMPRESSEDBYTES]; +} + +static void pack_ciphertext(unsigned char *r, const polyvec *b, const poly *v) { + polyvec_compress(r, b); + poly_compress(r + KYBER_POLYVECCOMPRESSEDBYTES, v); +} + +static void unpack_ciphertext(polyvec *b, poly *v, const unsigned char *c) { + polyvec_decompress(b, c); + poly_decompress(v, c + KYBER_POLYVECCOMPRESSEDBYTES); +} + +static void pack_sk(unsigned char *r, const polyvec *sk) { + polyvec_tobytes(r, sk); +} + +static void unpack_sk(polyvec *sk, const unsigned char *packedsk) { + polyvec_frombytes(sk, packedsk); +} + +#define gen_a(A, B) gen_matrix(A, B, 0) +#define gen_at(A, B) gen_matrix(A, B, 1) + +/* Generate entry a_{i,j} of matrix A as Parse(SHAKE128(seed|i|j)) */ +static void gen_matrix(polyvec *a, const unsigned char *seed, int transposed) //XXX: Not static for benchmarking +{ + unsigned int pos = 0, ctr; + uint16_t val; + unsigned int nblocks = 4; + uint8_t buf[OQS_SHA3_SHAKE128_RATE * 4]; // was * nblocks, but VS doesn't like this buf init + int i, j; + uint16_t dsep; + uint64_t state[25]; // CSHAKE state + + for (i = 0; i < KYBER_D; i++) { + for (j = 0; j < KYBER_D; j++) { + ctr = pos = 0; + if (transposed) + dsep = j + (i << 8); + else + dsep = i + (j << 8); + + OQS_SHA3_cshake128_simple_absorb(state, dsep, seed, KYBER_SEEDBYTES); + OQS_SHA3_cshake128_simple_squeezeblocks(buf, nblocks, state); + + while (ctr < KYBER_N) { + val = (buf[pos] | ((uint16_t) buf[pos + 1] << 8)) & 0x1fff; + if (val < KYBER_Q) { + a[i].vec[j].coeffs[ctr++] = val; + } + pos += 2; + + if (pos > OQS_SHA3_SHAKE128_RATE * nblocks - 2) { + nblocks = 1; + OQS_SHA3_cshake128_simple_squeezeblocks(buf, nblocks, state); + pos = 0; + } + } + } + } +} + +static void indcpa_keypair(unsigned char *pk, + unsigned char *sk, OQS_RAND *rand) { + polyvec a[KYBER_D], e, pkpv, skpv; + unsigned char seed[KYBER_SEEDBYTES]; + unsigned char noiseseed[KYBER_COINBYTES]; + int i; + unsigned char nonce = 0; + + rand->rand_n(rand, seed, KYBER_SEEDBYTES); + OQS_SHA3_shake128(seed, KYBER_SEEDBYTES, seed, KYBER_SEEDBYTES); /* Don't send output of system RNG */ + rand->rand_n(rand, noiseseed, KYBER_COINBYTES); + + gen_a(a, seed); + + for (i = 0; i < KYBER_D; i++) + poly_getnoise(skpv.vec + i, noiseseed, nonce++); + + polyvec_ntt(&skpv); + + for (i = 0; i < KYBER_D; i++) + poly_getnoise(e.vec + i, noiseseed, nonce++); + + // matrix-vector multiplication + for (i = 0; i < KYBER_D; i++) + polyvec_pointwise_acc(&pkpv.vec[i], &skpv, a + i); + + polyvec_invntt(&pkpv); + polyvec_add(&pkpv, &pkpv, &e); + + pack_sk(sk, &skpv); + pack_pk(pk, &pkpv, seed); +} + +static void indcpa_enc(unsigned char *c, + const unsigned char *m, + const unsigned char *pk, + const unsigned char *coins) { + polyvec sp, pkpv, ep, at[KYBER_D], bp; + poly v, k, epp; + unsigned char seed[KYBER_SEEDBYTES]; + int i; + unsigned char nonce = 0; + + unpack_pk(&pkpv, seed, pk); + + poly_frommsg(&k, m); + + for (i = 0; i < KYBER_D; i++) + bitrev_vector(pkpv.vec[i].coeffs); + polyvec_ntt(&pkpv); + + gen_at(at, seed); + + for (i = 0; i < KYBER_D; i++) + poly_getnoise(sp.vec + i, coins, nonce++); + + polyvec_ntt(&sp); + + for (i = 0; i < KYBER_D; i++) + poly_getnoise(ep.vec + i, coins, nonce++); + + // matrix-vector multiplication + for (i = 0; i < KYBER_D; i++) + polyvec_pointwise_acc(&bp.vec[i], &sp, at + i); + + polyvec_invntt(&bp); + polyvec_add(&bp, &bp, &ep); + + polyvec_pointwise_acc(&v, &pkpv, &sp); + poly_invntt(&v); + + poly_getnoise(&epp, coins, nonce++); + + poly_add(&v, &v, &epp); + poly_add(&v, &v, &k); + + pack_ciphertext(c, &bp, &v); +} + +static void indcpa_dec(unsigned char *m, + const unsigned char *c, + const unsigned char *sk) { + polyvec bp, skpv; + poly v, mp; + size_t i; + + unpack_ciphertext(&bp, &v, c); + unpack_sk(&skpv, sk); + + for (i = 0; i < KYBER_D; i++) + bitrev_vector(bp.vec[i].coeffs); + polyvec_ntt(&bp); + + polyvec_pointwise_acc(&mp, &skpv, &bp); + poly_invntt(&mp); + + poly_sub(&mp, &mp, &v); + + poly_tomsg(m, &mp); +} diff --git a/crypt/liboqs/kex_mlwe_kyber/kex_mlwe_kyber.c b/crypt/liboqs/kex_mlwe_kyber/kex_mlwe_kyber.c new file mode 100644 index 0000000000000000000000000000000000000000..a00b5a905f2e5aa75355d9391e1e39d6ea7b0362 --- /dev/null +++ b/crypt/liboqs/kex_mlwe_kyber/kex_mlwe_kyber.c @@ -0,0 +1,171 @@ +#if defined(WINDOWS) +#define UNUSED +// __attribute__ not supported in VS, is there something else I should define? +#else +#define UNUSED __attribute__((unused)) +#endif + +#include <stdlib.h> +#include <string.h> +#if !defined(WINDOWS) +#include <strings.h> +#include <unistd.h> +#endif + +#include <oqs/kex.h> +#include <oqs/rand.h> + +#include "kex_mlwe_kyber.h" +#include "kyber.c" +#include "params.h" + +#if defined(WINDOWS) +#define strdup _strdup // for strdup deprecation warning +#endif + +OQS_KEX *OQS_KEX_mlwe_kyber_new(OQS_RAND *rand) { + OQS_KEX *k = malloc(sizeof(OQS_KEX)); + if (k == NULL) { + return NULL; + } + k->method_name = strdup("MLWE Kyber"); + k->estimated_classical_security = 178; // using https://github.com/pq-crystals/kyber/blob/master/scripts/Kyber.py + k->estimated_quantum_security = 161; // using https://github.com/pq-crystals/kyber/blob/master/scripts/Kyber.py + k->seed = NULL; + k->seed_len = 0; + k->named_parameters = 0; + k->rand = rand; + k->params = NULL; + k->alice_0 = &OQS_KEX_mlwe_kyber_alice_0; + k->bob = &OQS_KEX_mlwe_kyber_bob; + k->alice_1 = &OQS_KEX_mlwe_kyber_alice_1; + k->alice_priv_free = &OQS_KEX_mlwe_kyber_alice_priv_free; + k->free = &OQS_KEX_mlwe_kyber_free; + return k; +} + +int OQS_KEX_mlwe_kyber_alice_0(UNUSED OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) { + + int ret; + + *alice_priv = NULL; + *alice_msg = NULL; + + /* allocate public/private key pair */ + *alice_msg = malloc(KYBER_PUBLICKEYBYTES); + if (*alice_msg == NULL) { + goto err; + } + *alice_priv = malloc(KYBER_SECRETKEYBYTES); + if (*alice_priv == NULL) { + goto err; + } + + /* generate public/private key pair */ + keygen(*alice_msg, (unsigned char *) *alice_priv, k->rand); + *alice_msg_len = KYBER_PUBLICKEYBYTES; + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*alice_msg); + *alice_msg = NULL; + free(*alice_priv); + *alice_priv = NULL; + +cleanup: + + return ret; +} + +int OQS_KEX_mlwe_kyber_bob(UNUSED OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + + *bob_msg = NULL; + *key = NULL; + + if (alice_msg_len != KYBER_PUBLICKEYBYTES) { + goto err; + } + + /* allocate message and session key */ + *bob_msg = malloc(KYBER_BYTES); + if (*bob_msg == NULL) { + goto err; + } + *key = malloc(32); + if (*key == NULL) { + goto err; + } + + /* generate Bob's response */ + sharedb(*key, *bob_msg, alice_msg, k->rand); + *bob_msg_len = KYBER_BYTES; + *key_len = 32; + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*bob_msg); + *bob_msg = NULL; + free(*key); + *key = NULL; + +cleanup: + + return ret; +} + +int OQS_KEX_mlwe_kyber_alice_1(UNUSED OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + + *key = NULL; + + if (bob_msg_len != KYBER_BYTES) { + goto err; + } + + /* allocate session key */ + *key = malloc(32); + if (*key == NULL) { + goto err; + } + + /* generate Alice's session key */ + shareda(*key, (unsigned char *) alice_priv, bob_msg); + *key_len = 32; + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*key); + *key = NULL; + +cleanup: + + return ret; +} + +void OQS_KEX_mlwe_kyber_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) { + if (alice_priv) { + free(alice_priv); + } +} + +void OQS_KEX_mlwe_kyber_free(OQS_KEX *k) { + if (k) { + free(k->named_parameters); + k->named_parameters = NULL; + free(k->method_name); + k->method_name = NULL; + } + free(k); +} diff --git a/crypt/liboqs/kex_mlwe_kyber/kex_mlwe_kyber.h b/crypt/liboqs/kex_mlwe_kyber/kex_mlwe_kyber.h new file mode 100644 index 0000000000000000000000000000000000000000..b3a64683080d4121168f98d059618e795835fd47 --- /dev/null +++ b/crypt/liboqs/kex_mlwe_kyber/kex_mlwe_kyber.h @@ -0,0 +1,24 @@ +/** + * \file kex_mlwe_kyber.h + * \brief Header for module-LWE key exchange protocol Kyber + */ + +#ifndef __OQS_KEX_MLWE_KYBER_H +#define __OQS_KEX_MLWE_KYBER_H + +#include <stddef.h> +#include <stdint.h> + +#include <oqs/kex.h> +#include <oqs/rand.h> + +OQS_KEX *OQS_KEX_mlwe_kyber_new(OQS_RAND *rand); + +int OQS_KEX_mlwe_kyber_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len); +int OQS_KEX_mlwe_kyber_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len); +int OQS_KEX_mlwe_kyber_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len); + +void OQS_KEX_mlwe_kyber_alice_priv_free(OQS_KEX *k, void *alice_priv); +void OQS_KEX_mlwe_kyber_free(OQS_KEX *k); + +#endif diff --git a/crypt/liboqs/kex_mlwe_kyber/kyber.c b/crypt/liboqs/kex_mlwe_kyber/kyber.c new file mode 100644 index 0000000000000000000000000000000000000000..36cc119027db28d5da6876ae4f04fcb03b5b1fc8 --- /dev/null +++ b/crypt/liboqs/kex_mlwe_kyber/kyber.c @@ -0,0 +1,86 @@ +#include <stdint.h> + +#include <oqs/sha3.h> +#include <oqs/rand.h> + +// clang-format off +// (order of include matters) +#include "precomp.c" +#include "reduce.c" +#include "verify.c" +#include "ntt.c" +#include "poly.c" +#include "polyvec.c" +#include "indcpa.c" +// clang-format on + +// API FUNCTIONS + +/* Build a CCA-secure KEM from an IND-CPA-secure encryption scheme */ + +static void keygen(unsigned char *pk, unsigned char *sk, OQS_RAND *rand) { + size_t i; + indcpa_keypair(pk, sk, rand); + for (i = 0; i < KYBER_INDCPA_PUBLICKEYBYTES; i++) + sk[i + KYBER_INDCPA_SECRETKEYBYTES] = pk[i]; + OQS_SHA3_shake128(sk + KYBER_SECRETKEYBYTES - 64, 32, pk, KYBER_PUBLICKEYBYTES); + rand->rand_n(rand, sk + KYBER_SECRETKEYBYTES - KYBER_SHAREDKEYBYTES, KYBER_SHAREDKEYBYTES); /* Value z for pseudo-random output on reject */ +} + +static void sharedb(unsigned char *sharedkey, unsigned char *send, + const unsigned char *received, OQS_RAND *rand) { + unsigned char krq[96]; /* Will contain key, coins, qrom-hash */ + unsigned char buf[64]; + int i; + + rand->rand_n(rand, buf, 32); + OQS_SHA3_shake128(buf, 32, buf, 32); /* Don't release system RNG output */ + + OQS_SHA3_shake128(buf + 32, 32, received, KYBER_PUBLICKEYBYTES); /* Multitarget countermeasure for coins + contributory KEM */ + OQS_SHA3_shake128(krq, 96, buf, 64); + + indcpa_enc(send, buf, received, krq + 32); /* coins are in krq+32 */ + + for (i = 0; i < 32; i++) + send[i + KYBER_INDCPA_BYTES] = krq[i + 64]; + + OQS_SHA3_shake128(krq + 32, 32, send, KYBER_BYTES); /* overwrite coins in krq with h(c) */ + OQS_SHA3_shake128(sharedkey, 32, krq, 64); /* hash concatenation of pre-k and h(c) to k */ + +#ifndef STATISTICAL_TEST + OQS_SHA3_sha3256(sharedkey, sharedkey, 32); +#endif +} + +static void shareda(unsigned char *sharedkey, const unsigned char *sk, + const unsigned char *received) { + int i, fail; + unsigned char cmp[KYBER_BYTES]; + unsigned char buf[64]; + unsigned char krq[96]; /* Will contain key, coins, qrom-hash */ + const unsigned char *pk = sk + KYBER_INDCPA_SECRETKEYBYTES; + + indcpa_dec(buf, received, sk); + + // shake128(buf+32, 32, pk, KYBER_PUBLICKEYBYTES); /* Multitarget countermeasure for coins + contributory KEM */ + for (i = 0; i < 32; i++) /* Save hash by storing h(pk) in sk */ + buf[32 + i] = sk[KYBER_SECRETKEYBYTES - 64 + i]; + OQS_SHA3_shake128(krq, 96, buf, 64); + + indcpa_enc(cmp, buf, pk, krq + 32); /* coins are in krq+32 */ + + for (i = 0; i < 32; i++) + cmp[i + KYBER_INDCPA_BYTES] = krq[i + 64]; + + fail = verify(received, cmp, KYBER_BYTES); + + OQS_SHA3_shake128(krq + 32, 32, received, KYBER_BYTES); /* overwrite coins in krq with h(c) */ + + cmov(krq, sk + KYBER_SECRETKEYBYTES - KYBER_SHAREDKEYBYTES, KYBER_SHAREDKEYBYTES, fail); /* Overwrite pre-k with z on re-encryption failure */ + + OQS_SHA3_shake128(sharedkey, 32, krq, 64); /* hash concatenation of pre-k and h(c) to k */ + +#ifndef STATISTICAL_TEST + OQS_SHA3_sha3256(sharedkey, sharedkey, 32); +#endif +} diff --git a/crypt/liboqs/kex_mlwe_kyber/ntt.c b/crypt/liboqs/kex_mlwe_kyber/ntt.c new file mode 100644 index 0000000000000000000000000000000000000000..14b60c8f7b3619ebdb4c74015487500a46b52598 --- /dev/null +++ b/crypt/liboqs/kex_mlwe_kyber/ntt.c @@ -0,0 +1,67 @@ +#include "params.h" + +static uint16_t bitrev_table[KYBER_N] = { + 0, 128, 64, 192, 32, 160, 96, 224, 16, 144, 80, 208, 48, 176, 112, 240, + 8, 136, 72, 200, 40, 168, 104, 232, 24, 152, 88, 216, 56, 184, 120, 248, + 4, 132, 68, 196, 36, 164, 100, 228, 20, 148, 84, 212, 52, 180, 116, 244, + 12, 140, 76, 204, 44, 172, 108, 236, 28, 156, 92, 220, 60, 188, 124, 252, + 2, 130, 66, 194, 34, 162, 98, 226, 18, 146, 82, 210, 50, 178, 114, 242, + 10, 138, 74, 202, 42, 170, 106, 234, 26, 154, 90, 218, 58, 186, 122, 250, + 6, 134, 70, 198, 38, 166, 102, 230, 22, 150, 86, 214, 54, 182, 118, 246, + 14, 142, 78, 206, 46, 174, 110, 238, 30, 158, 94, 222, 62, 190, 126, 254, + 1, 129, 65, 193, 33, 161, 97, 225, 17, 145, 81, 209, 49, 177, 113, 241, + 9, 137, 73, 201, 41, 169, 105, 233, 25, 153, 89, 217, 57, 185, 121, 249, + 5, 133, 69, 197, 37, 165, 101, 229, 21, 149, 85, 213, 53, 181, 117, 245, + 13, 141, 77, 205, 45, 173, 109, 237, 29, 157, 93, 221, 61, 189, 125, 253, + 3, 131, 67, 195, 35, 163, 99, 227, 19, 147, 83, 211, 51, 179, 115, 243, + 11, 139, 75, 203, 43, 171, 107, 235, 27, 155, 91, 219, 59, 187, 123, 251, + 7, 135, 71, 199, 39, 167, 103, 231, 23, 151, 87, 215, 55, 183, 119, 247, + 15, 143, 79, 207, 47, 175, 111, 239, 31, 159, 95, 223, 63, 191, 127, 255, +}; + +static void bitrev_vector(uint16_t *poly) { + unsigned int i, r; + uint16_t tmp; + + for (i = 0; i < KYBER_N; i++) { + r = bitrev_table[i]; + if (i < r) { + tmp = poly[i]; + poly[i] = poly[r]; + poly[r] = tmp; + } + } +} + +static void mul_coefficients(uint16_t *poly, const uint16_t *factors) { + unsigned int i; + + for (i = 0; i < KYBER_N; i++) + poly[i] = montgomery_reduce((poly[i] * factors[i])); +} + +/* GS_bo_to_no; omegas need to be in Montgomery domain */ +static void ntt(uint16_t *a, const uint16_t *omega) { + int start, j, jTwiddle, level; + uint16_t temp, W; + uint32_t t; + + for (level = 0; level < 8; level++) { + for (start = 0; start < (1 << level); start++) { + jTwiddle = 0; + for (j = start; j < KYBER_N - 1; j += 2 * (1 << level)) { + W = omega[jTwiddle++]; + temp = a[j]; + + if (level & 1) // odd level + a[j] = barrett_reduce((temp + a[j + (1 << level)])); + else + a[j] = (temp + a[j + (1 << level)]); // Omit reduction (be lazy) + + t = (W * ((uint32_t) temp + 4 * KYBER_Q - a[j + (1 << level)])); + + a[j + (1 << level)] = montgomery_reduce(t); + } + } + } +} diff --git a/crypt/liboqs/kex_mlwe_kyber/params.h b/crypt/liboqs/kex_mlwe_kyber/params.h new file mode 100644 index 0000000000000000000000000000000000000000..ba92261027ba72fb3190212a8961f2c34c193a2e --- /dev/null +++ b/crypt/liboqs/kex_mlwe_kyber/params.h @@ -0,0 +1,37 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#define KYBER_N 256 +#define KYBER_D 3 +#define KYBER_K 4 /* used in sampler */ +#define KYBER_Q 7681 + +#define KYBER_SEEDBYTES 32 +#define KYBER_NOISESEEDBYTES 32 +#define KYBER_COINBYTES 32 +#define KYBER_SHAREDKEYBYTES 32 + +#define KYBER_POLYBYTES 416 +#define KYBER_POLYCOMPRESSEDBYTES 96 +#define KYBER_POLYVECBYTES (KYBER_D * KYBER_POLYBYTES) +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_D * 352) + +#define KYBER_INDCPA_MSGBYTES 32 +#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_SEEDBYTES) +#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) +#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) + +#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) +#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 32 + KYBER_SHAREDKEYBYTES) +#define KYBER_BYTES (KYBER_INDCPA_BYTES + KYBER_INDCPA_MSGBYTES) /* Second part is for Targhi-Unruh */ + +extern uint16_t oqs_kex_mlwe_kyber_omegas_montgomery[]; +extern uint16_t oqs_kex_mlwe_kyber_omegas_inv_bitrev_montgomery[]; +extern uint16_t oqs_kex_mlwe_kyber_psis_inv_montgomery[]; +extern uint16_t oqs_kex_mlwe_kyber_psis_bitrev_montgomery[]; + +#if defined(WINDOWS) +typedef unsigned __int16 uint16_t; +#endif + +#endif diff --git a/crypt/liboqs/kex_mlwe_kyber/poly.c b/crypt/liboqs/kex_mlwe_kyber/poly.c new file mode 100644 index 0000000000000000000000000000000000000000..dc19b217cb7f24f43c22c4275f123580854bbccf --- /dev/null +++ b/crypt/liboqs/kex_mlwe_kyber/poly.c @@ -0,0 +1,169 @@ +#include "params.h" +#include <oqs/sha3.h> + +typedef struct { + uint16_t coeffs[KYBER_N]; +#if defined(WINDOWS) +} poly; +#else +} poly __attribute__((aligned(32))); +#endif + +/* include cbd.c */ +static uint32_t load_littleendian(const unsigned char *x) { + return x[0] | (((uint32_t) x[1]) << 8) | (((uint32_t) x[2]) << 16) | (((uint32_t) x[3]) << 24); +} + +static void cbd(poly *r, const unsigned char *buf) { +#if KYBER_K != 4 +#error "poly_getnoise in poly.c only supports k=4" +#endif + + uint32_t t, d, a[4], b[4]; + int i, j; + + for (i = 0; i < KYBER_N / 4; i++) { + t = load_littleendian(buf + 4 * i); + d = 0; + for (j = 0; j < 4; j++) + d += (t >> j) & 0x11111111; + + a[0] = d & 0xf; + b[0] = (d >> 4) & 0xf; + a[1] = (d >> 8) & 0xf; + b[1] = (d >> 12) & 0xf; + a[2] = (d >> 16) & 0xf; + b[2] = (d >> 20) & 0xf; + a[3] = (d >> 24) & 0xf; + b[3] = (d >> 28); + + r->coeffs[4 * i + 0] = a[0] + KYBER_Q - b[0]; + r->coeffs[4 * i + 1] = a[1] + KYBER_Q - b[1]; + r->coeffs[4 * i + 2] = a[2] + KYBER_Q - b[2]; + r->coeffs[4 * i + 3] = a[3] + KYBER_Q - b[3]; + } +} +/* end cbd.c */ + +static void poly_compress(unsigned char *r, const poly *a) { + uint32_t t[8]; + unsigned int i, j, k = 0; + + for (i = 0; i < KYBER_N; i += 8) { + for (j = 0; j < 8; j++) + t[j] = (((freeze(a->coeffs[i + j]) << 3) + KYBER_Q / 2) / KYBER_Q) & 7; + + r[k] = t[0] | (t[1] << 3) | (t[2] << 6); + r[k + 1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); + r[k + 2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); + k += 3; + } +} + +static void poly_decompress(poly *r, const unsigned char *a) { + unsigned int i; + for (i = 0; i < KYBER_N; i += 8) { + r->coeffs[i + 0] = (((a[0] & 7) * KYBER_Q) + 4) >> 3; + r->coeffs[i + 1] = ((((a[0] >> 3) & 7) * KYBER_Q) + 4) >> 3; + r->coeffs[i + 2] = ((((a[0] >> 6) | ((a[1] << 2) & 4)) * KYBER_Q) + 4) >> 3; + r->coeffs[i + 3] = ((((a[1] >> 1) & 7) * KYBER_Q) + 4) >> 3; + r->coeffs[i + 4] = ((((a[1] >> 4) & 7) * KYBER_Q) + 4) >> 3; + r->coeffs[i + 5] = ((((a[1] >> 7) | ((a[2] << 1) & 6)) * KYBER_Q) + 4) >> 3; + r->coeffs[i + 6] = ((((a[2] >> 2) & 7) * KYBER_Q) + 4) >> 3; + r->coeffs[i + 7] = ((((a[2] >> 5)) * KYBER_Q) + 4) >> 3; + a += 3; + } +} + +static void poly_tobytes(unsigned char *r, const poly *a) { + int i, j; + uint16_t t[8]; + + for (i = 0; i < KYBER_N / 8; i++) { + for (j = 0; j < 8; j++) + t[j] = freeze(a->coeffs[8 * i + j]); + + r[13 * i + 0] = t[0] & 0xff; + r[13 * i + 1] = (t[0] >> 8) | ((t[1] & 0x07) << 5); + r[13 * i + 2] = (t[1] >> 3) & 0xff; + r[13 * i + 3] = (t[1] >> 11) | ((t[2] & 0x3f) << 2); + r[13 * i + 4] = (t[2] >> 6) | ((t[3] & 0x01) << 7); + r[13 * i + 5] = (t[3] >> 1) & 0xff; + r[13 * i + 6] = (t[3] >> 9) | ((t[4] & 0x0f) << 4); + r[13 * i + 7] = (t[4] >> 4) & 0xff; + r[13 * i + 8] = (t[4] >> 12) | ((t[5] & 0x7f) << 1); + r[13 * i + 9] = (t[5] >> 7) | ((t[6] & 0x03) << 6); + r[13 * i + 10] = (t[6] >> 2) & 0xff; + r[13 * i + 11] = (t[6] >> 10) | ((t[7] & 0x1f) << 3); + r[13 * i + 12] = (t[7] >> 5); + } +} + +static void poly_frombytes(poly *r, const unsigned char *a) { + int i; + for (i = 0; i < KYBER_N / 8; i++) { + r->coeffs[8 * i + 0] = a[13 * i + 0] | (((uint16_t) a[13 * i + 1] & 0x1f) << 8); + r->coeffs[8 * i + 1] = (a[13 * i + 1] >> 5) | (((uint16_t) a[13 * i + 2]) << 3) | (((uint16_t) a[13 * i + 3] & 0x03) << 11); + r->coeffs[8 * i + 2] = (a[13 * i + 3] >> 2) | (((uint16_t) a[13 * i + 4] & 0x7f) << 6); + r->coeffs[8 * i + 3] = (a[13 * i + 4] >> 7) | (((uint16_t) a[13 * i + 5]) << 1) | (((uint16_t) a[13 * i + 6] & 0x0f) << 9); + r->coeffs[8 * i + 4] = (a[13 * i + 6] >> 4) | (((uint16_t) a[13 * i + 7]) << 4) | (((uint16_t) a[13 * i + 8] & 0x01) << 12); + r->coeffs[8 * i + 5] = (a[13 * i + 8] >> 1) | (((uint16_t) a[13 * i + 9] & 0x3f) << 7); + r->coeffs[8 * i + 6] = (a[13 * i + 9] >> 6) | (((uint16_t) a[13 * i + 10]) << 2) | (((uint16_t) a[13 * i + 11] & 0x07) << 10); + r->coeffs[8 * i + 7] = (a[13 * i + 11] >> 3) | (((uint16_t) a[13 * i + 12]) << 5); + } +} + +static void poly_getnoise(poly *r, const unsigned char *seed, unsigned char nonce) { + unsigned char buf[KYBER_N]; + + OQS_SHA3_cshake128_simple(buf, KYBER_N, nonce, seed, KYBER_NOISESEEDBYTES); + + cbd(r, buf); +} + +static void poly_ntt(poly *r) { + mul_coefficients(r->coeffs, oqs_kex_mlwe_kyber_psis_bitrev_montgomery); + ntt(r->coeffs, oqs_kex_mlwe_kyber_omegas_montgomery); +} + +static void poly_invntt(poly *r) { + bitrev_vector(r->coeffs); + ntt(r->coeffs, oqs_kex_mlwe_kyber_omegas_inv_bitrev_montgomery); + mul_coefficients(r->coeffs, oqs_kex_mlwe_kyber_psis_inv_montgomery); +} + +static void poly_add(poly *r, const poly *a, const poly *b) { + int i; + for (i = 0; i < KYBER_N; i++) + r->coeffs[i] = barrett_reduce(a->coeffs[i] + b->coeffs[i]); +} + +static void poly_sub(poly *r, const poly *a, const poly *b) { + int i; + for (i = 0; i < KYBER_N; i++) + r->coeffs[i] = barrett_reduce(a->coeffs[i] + 3 * KYBER_Q - b->coeffs[i]); +} + +static void poly_frommsg(poly *r, const unsigned char msg[KYBER_SHAREDKEYBYTES]) { + uint16_t i, j, mask; + + for (i = 0; i < KYBER_SHAREDKEYBYTES; i++) { + for (j = 0; j < 8; j++) { + mask = -((msg[i] >> j) & 1); + r->coeffs[8 * i + j] = mask & ((KYBER_Q + 1) / 2); + } + } +} + +static void poly_tomsg(unsigned char msg[KYBER_SHAREDKEYBYTES], const poly *a) { + uint16_t t; + int i, j; + + for (i = 0; i < KYBER_SHAREDKEYBYTES; i++) { + msg[i] = 0; + for (j = 0; j < 8; j++) { + t = (((freeze(a->coeffs[8 * i + j]) << 1) + KYBER_Q / 2) / KYBER_Q) & 1; + msg[i] |= t << j; + } + } +} diff --git a/crypt/liboqs/kex_mlwe_kyber/polyvec.c b/crypt/liboqs/kex_mlwe_kyber/polyvec.c new file mode 100644 index 0000000000000000000000000000000000000000..4f9d08ee95500c76f102e71b833642687958d821 --- /dev/null +++ b/crypt/liboqs/kex_mlwe_kyber/polyvec.c @@ -0,0 +1,196 @@ +#include "params.h" + +typedef struct { + poly vec[KYBER_D]; +#if defined(WINDOWS) +} polyvec; +#else +} polyvec __attribute__((aligned(32))); +#endif + +#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_D * 352)) +static void polyvec_compress(unsigned char *r, const polyvec *a) { + int i, j, k; + uint16_t t[8]; + for (i = 0; i < KYBER_D; i++) { + for (j = 0; j < KYBER_N / 8; j++) { + for (k = 0; k < 8; k++) + t[k] = ((((uint32_t) freeze(a->vec[i].coeffs[8 * j + k]) << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff; + + r[11 * j + 0] = t[0] & 0xff; + r[11 * j + 1] = (t[0] >> 8) | ((t[1] & 0x1f) << 3); + r[11 * j + 2] = (t[1] >> 5) | ((t[2] & 0x03) << 6); + r[11 * j + 3] = (t[2] >> 2) & 0xff; + r[11 * j + 4] = (t[2] >> 10) | ((t[3] & 0x7f) << 1); + r[11 * j + 5] = (t[3] >> 7) | ((t[4] & 0x0f) << 4); + r[11 * j + 6] = (t[4] >> 4) | ((t[5] & 0x01) << 7); + r[11 * j + 7] = (t[5] >> 1) & 0xff; + r[11 * j + 8] = (t[5] >> 9) | ((t[6] & 0x3f) << 2); + r[11 * j + 9] = (t[6] >> 6) | ((t[7] & 0x07) << 5); + r[11 * j + 10] = (t[7] >> 3); + } + r += 352; + } +} + +static void polyvec_decompress(polyvec *r, const unsigned char *a) { + int i, j; + for (i = 0; i < KYBER_D; i++) { + for (j = 0; j < KYBER_N / 8; j++) { + r->vec[i].coeffs[8 * j + 0] = (((a[11 * j + 0] | (((uint32_t) a[11 * j + 1] & 0x07) << 8)) * KYBER_Q) + 1024) >> 11; + r->vec[i].coeffs[8 * j + 1] = ((((a[11 * j + 1] >> 3) | (((uint32_t) a[11 * j + 2] & 0x3f) << 5)) * KYBER_Q) + 1024) >> 11; + r->vec[i].coeffs[8 * j + 2] = ((((a[11 * j + 2] >> 6) | (((uint32_t) a[11 * j + 3] & 0xff) << 2) | (((uint32_t) a[11 * j + 4] & 0x01) << 10)) * KYBER_Q) + 1024) >> 11; + r->vec[i].coeffs[8 * j + 3] = ((((a[11 * j + 4] >> 1) | (((uint32_t) a[11 * j + 5] & 0x0f) << 7)) * KYBER_Q) + 1024) >> 11; + r->vec[i].coeffs[8 * j + 4] = ((((a[11 * j + 5] >> 4) | (((uint32_t) a[11 * j + 6] & 0x7f) << 4)) * KYBER_Q) + 1024) >> 11; + r->vec[i].coeffs[8 * j + 5] = ((((a[11 * j + 6] >> 7) | (((uint32_t) a[11 * j + 7] & 0xff) << 1) | (((uint32_t) a[11 * j + 8] & 0x03) << 9)) * KYBER_Q) + 1024) >> 11; + r->vec[i].coeffs[8 * j + 6] = ((((a[11 * j + 8] >> 2) | (((uint32_t) a[11 * j + 9] & 0x1f) << 6)) * KYBER_Q) + 1024) >> 11; + r->vec[i].coeffs[8 * j + 7] = ((((a[11 * j + 9] >> 5) | (((uint32_t) a[11 * j + 10] & 0xff) << 3)) * KYBER_Q) + 1024) >> 11; + } + a += 352; + } +} + +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_D * 320)) + +static void polyvec_compress(unsigned char *r, const polyvec *a) { + int i, j, k; + uint16_t t[4]; + for (i = 0; i < KYBER_D; i++) { + for (j = 0; j < KYBER_N / 4; j++) { + for (k = 0; k < 4; k++) + t[k] = ((((uint32_t) freeze(a->vec[i].coeffs[4 * j + k]) << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; + + r[5 * j + 0] = t[0] & 0xff; + r[5 * j + 1] = (t[0] >> 8) | ((t[1] & 0x3f) << 2); + r[5 * j + 2] = (t[1] >> 6) | ((t[2] & 0x0f) << 4); + r[5 * j + 3] = (t[2] >> 4) | ((t[3] & 0x03) << 6); + r[5 * j + 4] = (t[3] >> 2); + } + r += 320; + } +} + +static void polyvec_decompress(polyvec *r, const unsigned char *a) { + int i, j; + for (i = 0; i < KYBER_D; i++) { + for (j = 0; j < KYBER_N / 4; j++) { + r->vec[i].coeffs[4 * j + 0] = (((a[5 * j + 0] | (((uint32_t) a[5 * j + 1] & 0x03) << 8)) * KYBER_Q) + 512) >> 10; + r->vec[i].coeffs[4 * j + 1] = ((((a[5 * j + 1] >> 2) | (((uint32_t) a[5 * j + 2] & 0x0f) << 6)) * KYBER_Q) + 512) >> 10; + r->vec[i].coeffs[4 * j + 2] = ((((a[5 * j + 2] >> 4) | (((uint32_t) a[5 * j + 3] & 0x3f) << 4)) * KYBER_Q) + 512) >> 10; + r->vec[i].coeffs[4 * j + 3] = ((((a[5 * j + 3] >> 6) | (((uint32_t) a[5 * j + 4] & 0xff) << 2)) * KYBER_Q) + 512) >> 10; + } + a += 320; + } +} + +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_D * 288)) + +static void polyvec_compress(unsigned char *r, const polyvec *a) { + int i, j, k; + uint16_t t[8]; + for (i = 0; i < KYBER_D; i++) { + for (j = 0; j < KYBER_N / 8; j++) { + for (k = 0; k < 8; k++) + t[k] = ((((uint32_t) freeze(a->vec[i].coeffs[8 * j + k]) << 9) + KYBER_Q / 2) / KYBER_Q) & 0x1ff; + + r[9 * j + 0] = t[0] & 0xff; + r[9 * j + 1] = (t[0] >> 8) | ((t[1] & 0x7f) << 1); + r[9 * j + 2] = (t[1] >> 7) | ((t[2] & 0x3f) << 2); + r[9 * j + 3] = (t[2] >> 6) | ((t[3] & 0x1f) << 3); + r[9 * j + 4] = (t[3] >> 5) | ((t[4] & 0x0f) << 4); + r[9 * j + 5] = (t[4] >> 4) | ((t[5] & 0x07) << 5); + r[9 * j + 6] = (t[5] >> 3) | ((t[6] & 0x03) << 6); + r[9 * j + 7] = (t[6] >> 2) | ((t[7] & 0x01) << 7); + r[9 * j + 8] = (t[7] >> 1); + } + r += 288; + } +} + +static void polyvec_decompress(polyvec *r, const unsigned char *a) { + int i, j; + for (i = 0; i < KYBER_D; i++) { + for (j = 0; j < KYBER_N / 8; j++) { + r->vec[i].coeffs[8 * j + 0] = (((a[9 * j + 0] | (((uint32_t) a[9 * j + 1] & 0x01) << 8)) * KYBER_Q) + 256) >> 9; + r->vec[i].coeffs[8 * j + 1] = ((((a[9 * j + 1] >> 1) | (((uint32_t) a[9 * j + 2] & 0x03) << 7)) * KYBER_Q) + 256) >> 9; + r->vec[i].coeffs[8 * j + 2] = ((((a[9 * j + 2] >> 2) | (((uint32_t) a[9 * j + 3] & 0x07) << 6)) * KYBER_Q) + 256) >> 9; + r->vec[i].coeffs[8 * j + 3] = ((((a[9 * j + 3] >> 3) | (((uint32_t) a[9 * j + 4] & 0x0f) << 5)) * KYBER_Q) + 256) >> 9; + r->vec[i].coeffs[8 * j + 4] = ((((a[9 * j + 4] >> 4) | (((uint32_t) a[9 * j + 5] & 0x1f) << 4)) * KYBER_Q) + 256) >> 9; + r->vec[i].coeffs[8 * j + 5] = ((((a[9 * j + 5] >> 5) | (((uint32_t) a[9 * j + 6] & 0x3f) << 3)) * KYBER_Q) + 256) >> 9; + r->vec[i].coeffs[8 * j + 6] = ((((a[9 * j + 6] >> 6) | (((uint32_t) a[9 * j + 7] & 0x7f) << 2)) * KYBER_Q) + 256) >> 9; + r->vec[i].coeffs[8 * j + 7] = ((((a[9 * j + 7] >> 7) | (((uint32_t) a[9 * j + 8] & 0xff) << 1)) * KYBER_Q) + 256) >> 9; + } + a += 288; + } +} + +#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_D * 256)) + +static void polyvec_compress(unsigned char *r, const polyvec *a) { + int i, j, k; + uint16_t t; + for (i = 0; i < KYBER_D; i++) { + for (j = 0; j < KYBER_N; j++) { + r[j] = ((((uint32_t) freeze(a->vec[i].coeffs[j]) << 8) + KYBER_Q / 2) / KYBER_Q) & 0xff; + } + r += 256; + } +} + +static void polyvec_decompress(polyvec *r, const unsigned char *a) { + int i, j; + for (i = 0; i < KYBER_D; i++) { + for (j = 0; j < KYBER_N; j++) { + r->vec[i].coeffs[j] = ((a[j] * KYBER_Q) + 128) >> 8; + } + a += 256; + } +} + +#else +#error "Unsupported compression of polyvec" +#endif + +static void polyvec_tobytes(unsigned char *r, const polyvec *a) { + int i; + for (i = 0; i < KYBER_D; i++) + poly_tobytes(r + i * KYBER_POLYBYTES, &a->vec[i]); +} + +static void polyvec_frombytes(polyvec *r, const unsigned char *a) { + int i; + for (i = 0; i < KYBER_D; i++) + poly_frombytes(&r->vec[i], a + i * KYBER_POLYBYTES); +} + +static void polyvec_ntt(polyvec *r) { + int i; + for (i = 0; i < KYBER_D; i++) + poly_ntt(&r->vec[i]); +} + +static void polyvec_invntt(polyvec *r) { + int i; + for (i = 0; i < KYBER_D; i++) + poly_invntt(&r->vec[i]); +} + +static void polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b) { + int i, j; + uint16_t t; + for (j = 0; j < KYBER_N; j++) { + t = montgomery_reduce(4613 * (uint32_t) b->vec[0].coeffs[j]); // 4613 = 2^{2*18} % q + r->coeffs[j] = montgomery_reduce(a->vec[0].coeffs[j] * t); + for (i = 1; i < KYBER_D; i++) { + t = montgomery_reduce(4613 * (uint32_t) b->vec[i].coeffs[j]); + r->coeffs[j] += montgomery_reduce(a->vec[i].coeffs[j] * t); + } + r->coeffs[j] = barrett_reduce(r->coeffs[j]); + } +} + +static void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) { + int i; + for (i = 0; i < KYBER_D; i++) + poly_add(&r->vec[i], &a->vec[i], &b->vec[i]); +} diff --git a/crypt/liboqs/kex_mlwe_kyber/precomp.c b/crypt/liboqs/kex_mlwe_kyber/precomp.c new file mode 100644 index 0000000000000000000000000000000000000000..e41122c2d4b2886b945dc29188d382b15cbd4262 --- /dev/null +++ b/crypt/liboqs/kex_mlwe_kyber/precomp.c @@ -0,0 +1,9 @@ +#include "params.h" + +uint16_t oqs_kex_mlwe_kyber_omegas_montgomery[KYBER_N / 2] = {990, 7427, 2634, 6819, 578, 3281, 2143, 1095, 484, 6362, 3336, 5382, 6086, 3823, 877, 5656, 3583, 7010, 6414, 263, 1285, 291, 7143, 7338, 1581, 5134, 5184, 5932, 4042, 5775, 2468, 3, 606, 729, 5383, 962, 3240, 7548, 5129, 7653, 5929, 4965, 2461, 641, 1584, 2666, 1142, 157, 7407, 5222, 5602, 5142, 6140, 5485, 4931, 1559, 2085, 5284, 2056, 3538, 7269, 3535, 7190, 1957, 3465, 6792, 1538, 4664, 2023, 7643, 3660, 7673, 1694, 6905, 3995, 3475, 5939, 1859, 6910, 4434, 1019, 1492, 7087, 4761, 657, 4859, 5798, 2640, 1693, 2607, 2782, 5400, 6466, 1010, 957, 3851, 2121, 6392, 7319, 3367, 3659, 3375, 6430, 7583, 1549, 5856, 4773, 6084, 5544, 1650, 3997, 4390, 6722, 2915, 4245, 2635, 6128, 7676, 5737, 1616, 3457, 3132, 7196, 4702, 6239, 851, 2122, 3009}; + +uint16_t oqs_kex_mlwe_kyber_omegas_inv_bitrev_montgomery[KYBER_N / 2] = {990, 254, 862, 5047, 6586, 5538, 4400, 7103, 2025, 6804, 3858, 1595, 2299, 4345, 1319, 7197, 7678, 5213, 1906, 3639, 1749, 2497, 2547, 6100, 343, 538, 7390, 6396, 7418, 1267, 671, 4098, 5724, 491, 4146, 412, 4143, 5625, 2397, 5596, 6122, 2750, 2196, 1541, 2539, 2079, 2459, 274, 7524, 6539, 5015, 6097, 7040, 5220, 2716, 1752, 28, 2552, 133, 4441, 6719, 2298, 6952, 7075, 4672, 5559, 6830, 1442, 2979, 485, 4549, 4224, 6065, 1944, 5, 1553, 5046, 3436, 4766, 959, 3291, 3684, 6031, 2137, 1597, 2908, 1825, 6132, 98, 1251, 4306, 4022, 4314, 362, 1289, 5560, 3830, 6724, 6671, 1215, 2281, 4899, 5074, 5988, 5041, 1883, 2822, 7024, 2920, 594, 6189, 6662, 3247, 771, 5822, 1742, 4206, 3686, 776, 5987, 8, 4021, 38, 5658, 3017, 6143, 889, 4216}; + +uint16_t oqs_kex_mlwe_kyber_psis_bitrev_montgomery[KYBER_N] = {990, 7427, 2634, 6819, 578, 3281, 2143, 1095, 484, 6362, 3336, 5382, 6086, 3823, 877, 5656, 3583, 7010, 6414, 263, 1285, 291, 7143, 7338, 1581, 5134, 5184, 5932, 4042, 5775, 2468, 3, 606, 729, 5383, 962, 3240, 7548, 5129, 7653, 5929, 4965, 2461, 641, 1584, 2666, 1142, 157, 7407, 5222, 5602, 5142, 6140, 5485, 4931, 1559, 2085, 5284, 2056, 3538, 7269, 3535, 7190, 1957, 3465, 6792, 1538, 4664, 2023, 7643, 3660, 7673, 1694, 6905, 3995, 3475, 5939, 1859, 6910, 4434, 1019, 1492, 7087, 4761, 657, 4859, 5798, 2640, 1693, 2607, 2782, 5400, 6466, 1010, 957, 3851, 2121, 6392, 7319, 3367, 3659, 3375, 6430, 7583, 1549, 5856, 4773, 6084, 5544, 1650, 3997, 4390, 6722, 2915, 4245, 2635, 6128, 7676, 5737, 1616, 3457, 3132, 7196, 4702, 6239, 851, 2122, 3009, 7613, 7295, 2007, 323, 5112, 3716, 2289, 6442, 6965, 2713, 7126, 3401, 963, 6596, 607, 5027, 7078, 4484, 5937, 944, 2860, 2680, 5049, 1777, 5850, 3387, 6487, 6777, 4812, 4724, 7077, 186, 6848, 6793, 3463, 5877, 1174, 7116, 3077, 5945, 6591, 590, 6643, 1337, 6036, 3991, 1675, 2053, 6055, 1162, 1679, 3883, 4311, 2106, 6163, 4486, 6374, 5006, 4576, 4288, 5180, 4102, 282, 6119, 7443, 6330, 3184, 4971, 2530, 5325, 4171, 7185, 5175, 5655, 1898, 382, 7211, 43, 5965, 6073, 1730, 332, 1577, 3304, 2329, 1699, 6150, 2379, 5113, 333, 3502, 4517, 1480, 1172, 5567, 651, 925, 4573, 599, 1367, 4109, 1863, 6929, 1605, 3866, 2065, 4048, 839, 5764, 2447, 2022, 3345, 1990, 4067, 2036, 2069, 3567, 7371, 2368, 339, 6947, 2159, 654, 7327, 2768, 6676, 987, 2214}; + +uint16_t oqs_kex_mlwe_kyber_psis_inv_montgomery[KYBER_N] = {1024, 4972, 5779, 6907, 4943, 4168, 315, 5580, 90, 497, 1123, 142, 4710, 5527, 2443, 4871, 698, 2489, 2394, 4003, 684, 2241, 2390, 7224, 5072, 2064, 4741, 1687, 6841, 482, 7441, 1235, 2126, 4742, 2802, 5744, 6287, 4933, 699, 3604, 1297, 2127, 5857, 1705, 3868, 3779, 4397, 2177, 159, 622, 2240, 1275, 640, 6948, 4572, 5277, 209, 2605, 1157, 7328, 5817, 3191, 1662, 2009, 4864, 574, 2487, 164, 6197, 4436, 7257, 3462, 4268, 4281, 3414, 4515, 3170, 1290, 2003, 5855, 7156, 6062, 7531, 1732, 3249, 4884, 7512, 3590, 1049, 2123, 1397, 6093, 3691, 6130, 6541, 3946, 6258, 3322, 1788, 4241, 4900, 2309, 1400, 1757, 400, 502, 6698, 2338, 3011, 668, 7444, 4580, 6516, 6795, 2959, 4136, 3040, 2279, 6355, 3943, 2913, 6613, 7416, 4084, 6508, 5556, 4054, 3782, 61, 6567, 2212, 779, 632, 5709, 5667, 4923, 4911, 6893, 4695, 4164, 3536, 2287, 7594, 2848, 3267, 1911, 3128, 546, 1991, 156, 4958, 5531, 6903, 483, 875, 138, 250, 2234, 2266, 7222, 2842, 4258, 812, 6703, 232, 5207, 6650, 2585, 1900, 6225, 4932, 7265, 4701, 3173, 4635, 6393, 227, 7313, 4454, 4284, 6759, 1224, 5223, 1447, 395, 2608, 4502, 4037, 189, 3348, 54, 6443, 2210, 6230, 2826, 1780, 3002, 5995, 1955, 6102, 6045, 3938, 5019, 4417, 1434, 1262, 1507, 5847, 5917, 7157, 7177, 6434, 7537, 741, 4348, 1309, 145, 374, 2236, 4496, 5028, 6771, 6923, 7421, 1978, 1023, 3857, 6876, 1102, 7451, 4704, 6518, 1344, 765, 384, 5705, 1207, 1630, 4734, 1563, 6839, 5933, 1954, 4987, 7142, 5814, 7527, 4953, 7637, 4707, 2182, 5734, 2818, 541, 4097, 5641}; diff --git a/crypt/liboqs/kex_mlwe_kyber/reduce.c b/crypt/liboqs/kex_mlwe_kyber/reduce.c new file mode 100644 index 0000000000000000000000000000000000000000..8c390ac21a66742fffe9ee536d48547d081f8de6 --- /dev/null +++ b/crypt/liboqs/kex_mlwe_kyber/reduce.c @@ -0,0 +1,36 @@ +#include "params.h" + +static const uint32_t qinv = 7679; // -inverse_mod(q,2^18) +static const uint32_t rlog = 18; + +static uint16_t montgomery_reduce(uint32_t a) { + uint32_t u; + + u = (a * qinv); + u &= ((1 << rlog) - 1); + u *= KYBER_Q; + a = a + u; + return a >> rlog; +} + +static uint16_t barrett_reduce(uint16_t a) { + uint32_t u; + + u = a >> 13; + u *= KYBER_Q; + a -= u; + return a; +} + +static uint16_t freeze(uint16_t x) { + uint16_t m, r; + int16_t c; + r = barrett_reduce(x); + + m = r - KYBER_Q; + c = m; + c >>= 15; + r = m ^ ((r ^ m) & c); + + return r; +} diff --git a/crypt/liboqs/kex_mlwe_kyber/verify.c b/crypt/liboqs/kex_mlwe_kyber/verify.c new file mode 100644 index 0000000000000000000000000000000000000000..5e709092872c8abe72acdd9bef22dc10cd8e3b2d --- /dev/null +++ b/crypt/liboqs/kex_mlwe_kyber/verify.c @@ -0,0 +1,28 @@ +#include <string.h> +#include <stdint.h> + +#if defined(WINDOWS) +#pragma warning(disable : 4146 4244) +#endif + +/* returns 0 for equal strings, 1 for non-equal strings */ +static int verify(const unsigned char *a, const unsigned char *b, size_t len) { + uint64_t r; + size_t i; + r = 0; + + for (i = 0; i < len; i++) + r |= a[i] ^ b[i]; + + r = (-r) >> 63; + return r; +} + +/* b = 1 means mov, b = 0 means don't mov*/ +static void cmov(unsigned char *r, const unsigned char *x, size_t len, unsigned char b) { + size_t i; + + b = -b; + for (i = 0; i < len; i++) + r[i] ^= b & (x[i] ^ r[i]); +} diff --git a/crypt/liboqs/kex_ntru/Makefile.am b/crypt/liboqs/kex_ntru/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..b767ea1d3aff92528e92382dd7d34899b3144f15 --- /dev/null +++ b/crypt/liboqs/kex_ntru/Makefile.am @@ -0,0 +1,25 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = libntru.la + + +libntru_la_SOURCES = kex_ntru.c \ + ntru_crypto_drbg.c \ + ntru_crypto_hash.c \ + ntru_crypto_hmac.c \ + ntru_crypto_msbyte_uint32.c \ + ntru_crypto_ntru_convert.c \ + ntru_crypto_ntru_encrypt.c \ + ntru_crypto_ntru_encrypt_key.c \ + ntru_crypto_ntru_encrypt_param_sets.c \ + ntru_crypto_ntru_mgf1.c \ + ntru_crypto_ntru_poly.c \ + ntru_crypto_sha256.c \ + ntru_crypto_sha1.c \ + ntru_crypto_sha2.c \ + ntru_crypto_ntru_mult_indices.c \ + ntru_crypto_ntru_mult_coeffs_karat.c + + +libntru_la_CPPFLAGS = -I../../include +libntru_la_CPPFLAGS += $(AM_CPPFLAGS) + diff --git a/crypt/liboqs/kex_ntru/kex_ntru.c b/crypt/liboqs/kex_ntru/kex_ntru.c new file mode 100644 index 0000000000000000000000000000000000000000..3984d00ada45027a2e872675893a20b3d7530ed8 --- /dev/null +++ b/crypt/liboqs/kex_ntru/kex_ntru.c @@ -0,0 +1,242 @@ +#ifndef DISABLE_NTRU_ON_WINDOWS_BY_DEFAULT + +#if defined(WINDOWS) +#define UNUSED +// __attribute__ not supported in VS +#else +#define UNUSED __attribute__((unused)) +#endif + +#include <fcntl.h> +#if defined(WINDOWS) +#include <windows.h> +#include <Wincrypt.h> +#else +#include <unistd.h> +#endif + +#include <oqs/kex.h> +#include <oqs/kex_ntru.h> +#include <oqs/rand.h> + +#include <ntru_crypto.h> + +#if defined(WINDOWS) +#define strdup _strdup // for strdup deprecation warning +#endif + +#define NTRU_PARAMETER_SELECTION NTRU_EES743EP1 +#define NTRU_PARAMETER_SELECTION_NAME "EES743EP1" + +OQS_KEX *OQS_KEX_ntru_new(OQS_RAND *rand) { + OQS_KEX *k = malloc(sizeof(OQS_KEX)); + if (k == NULL) + return NULL; + k->method_name = strdup("ntru " NTRU_PARAMETER_SELECTION_NAME); + k->estimated_classical_security = 256; // http://eprint.iacr.org/2015/708.pdf Table 3 N=743 product form search cost + k->estimated_quantum_security = 128; // need justification + k->rand = rand; + k->params = NULL; + k->alice_0 = &OQS_KEX_ntru_alice_0; + k->bob = &OQS_KEX_ntru_bob; + k->alice_1 = &OQS_KEX_ntru_alice_1; + k->alice_priv_free = &OQS_KEX_ntru_alice_priv_free; + k->free = &OQS_KEX_ntru_free; + return k; +} + +static uint8_t get_entropy_from_dev_urandom(ENTROPY_CMD cmd, uint8_t *out) { + if (cmd == INIT) { + return 1; + } + if (out == NULL) { + return 0; + } + if (cmd == GET_NUM_BYTES_PER_BYTE_OF_ENTROPY) { + *out = 1; + return 1; + } + if (cmd == GET_BYTE_OF_ENTROPY) { + // TODO: why is this called to get entropy bytes one by one? + if (!OQS_RAND_get_system_entropy(out, 1)) { + return 0; + } + return 1; + } + return 0; +} + +typedef struct OQS_KEX_ntru_alice_priv { + uint16_t priv_key_len; + uint8_t *priv_key; +} OQS_KEX_ntru_alice_priv; + +int OQS_KEX_ntru_alice_0(UNUSED OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) { + + int ret = 0; + uint32_t rc; + DRBG_HANDLE drbg; + OQS_KEX_ntru_alice_priv *ntru_alice_priv = NULL; + + *alice_priv = NULL; + *alice_msg = NULL; + + /* initialize NTRU DRBG */ + rc = ntru_crypto_drbg_instantiate(256, (uint8_t *) "OQS Alice", strlen("OQS Alice"), (ENTROPY_FN) &get_entropy_from_dev_urandom, &drbg); + if (rc != DRBG_OK) + return 0; + + /* allocate private key */ + ntru_alice_priv = malloc(sizeof(OQS_KEX_ntru_alice_priv)); + if (ntru_alice_priv == NULL) + goto err; + ntru_alice_priv->priv_key = NULL; + *alice_priv = ntru_alice_priv; + + /* calculate length of public/private keys */ + uint16_t ntru_alice_msg_len; + rc = ntru_crypto_ntru_encrypt_keygen(drbg, NTRU_PARAMETER_SELECTION, &ntru_alice_msg_len, NULL, &(ntru_alice_priv->priv_key_len), NULL); + if (rc != NTRU_OK) + goto err; + *alice_msg_len = (size_t) ntru_alice_msg_len; + + /* allocate private key bytes */ + ntru_alice_priv->priv_key = malloc(ntru_alice_priv->priv_key_len); + if (ntru_alice_priv->priv_key == NULL) + goto err; + /* allocate public key */ + *alice_msg = malloc(*alice_msg_len); + if (*alice_msg == NULL) + goto err; + + /* generate public/private key pair */ + rc = ntru_crypto_ntru_encrypt_keygen(drbg, NTRU_PARAMETER_SELECTION, &ntru_alice_msg_len, *alice_msg, &(ntru_alice_priv->priv_key_len), ntru_alice_priv->priv_key); + if (rc != NTRU_OK) + goto err; + *alice_msg_len = (size_t) ntru_alice_msg_len; + + ret = 1; + goto cleanup; + +err: + ret = 0; + if (ntru_alice_priv != NULL) + free(ntru_alice_priv->priv_key); + free(ntru_alice_priv); + *alice_priv = NULL; + free(*alice_msg); + *alice_msg = NULL; +cleanup: + ntru_crypto_drbg_uninstantiate(drbg); + + return ret; +} + +int OQS_KEX_ntru_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + uint32_t rc; + DRBG_HANDLE drbg; + + *bob_msg = NULL; + *key = NULL; + + /* initialize NTRU DRBG */ + rc = ntru_crypto_drbg_instantiate(256, (uint8_t *) "OQS Bob", strlen("OQS Bob"), (ENTROPY_FN) &get_entropy_from_dev_urandom, &drbg); + if (rc != DRBG_OK) + return 0; + + /* generate random session key */ + *key_len = 256 / 8; + *key = malloc(*key_len); + if (*key == NULL) + goto err; + OQS_RAND_n(k->rand, *key, *key_len); + + /* calculate length of ciphertext */ + uint16_t ntru_bob_msg_len; + rc = ntru_crypto_ntru_encrypt(drbg, alice_msg_len, alice_msg, *key_len, *key, &ntru_bob_msg_len, NULL); + if (rc != NTRU_OK) + goto err; + *bob_msg_len = (size_t) ntru_bob_msg_len; + + /* allocate ciphertext */ + *bob_msg = malloc(*bob_msg_len); + if (*bob_msg == NULL) + goto err; + + /* encrypt session key */ + rc = ntru_crypto_ntru_encrypt(drbg, alice_msg_len, alice_msg, *key_len, *key, &ntru_bob_msg_len, *bob_msg); + if (rc != NTRU_OK) + goto err; + *bob_msg_len = (size_t) ntru_bob_msg_len; + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*bob_msg); + *bob_msg = NULL; + free(*key); + *key = NULL; +cleanup: + ntru_crypto_drbg_uninstantiate(drbg); + + return ret; +} + +int OQS_KEX_ntru_alice_1(UNUSED OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + uint32_t rc; + + *key = NULL; + + OQS_KEX_ntru_alice_priv *ntru_alice_priv = (OQS_KEX_ntru_alice_priv *) alice_priv; + + /* calculate session key length */ + uint16_t ntru_key_len; + rc = ntru_crypto_ntru_decrypt(ntru_alice_priv->priv_key_len, ntru_alice_priv->priv_key, bob_msg_len, bob_msg, &ntru_key_len, NULL); + if (rc != NTRU_OK) + goto err; + *key_len = (size_t) ntru_key_len; + + /* allocate session key */ + *key = malloc(*key_len); + if (*key == NULL) + goto err; + + /* decrypt session key */ + rc = ntru_crypto_ntru_decrypt(ntru_alice_priv->priv_key_len, ntru_alice_priv->priv_key, bob_msg_len, bob_msg, &ntru_key_len, *key); + if (rc != NTRU_OK) + goto err; + *key_len = (size_t) ntru_key_len; + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*key); + *key = NULL; +cleanup: + + return ret; +} + +void OQS_KEX_ntru_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) { + if (alice_priv) { + OQS_KEX_ntru_alice_priv *ntru_alice_priv = (OQS_KEX_ntru_alice_priv *) alice_priv; + free(ntru_alice_priv->priv_key); + } + free(alice_priv); +} + +void OQS_KEX_ntru_free(OQS_KEX *k) { + if (k) + free(k->method_name); + free(k); +} + +#endif diff --git a/crypt/liboqs/kex_ntru/kex_ntru.h b/crypt/liboqs/kex_ntru/kex_ntru.h new file mode 100755 index 0000000000000000000000000000000000000000..517a2085b7d4c796021207a0ccaebc191c403259 --- /dev/null +++ b/crypt/liboqs/kex_ntru/kex_ntru.h @@ -0,0 +1,28 @@ +/** + * \file kex_ntru.h + * \brief Header for the NTRU implementation of OQS_KEX + */ + +#ifndef __OQS_KEX_NTRU_H +#define __OQS_KEX_NTRU_H + +#ifndef DISABLE_NTRU_ON_WINDOWS_BY_DEFAULT + +#include <stddef.h> +#include <stdint.h> + +#include <oqs/kex.h> +#include <oqs/rand.h> + +OQS_KEX *OQS_KEX_ntru_new(OQS_RAND *rand); + +int OQS_KEX_ntru_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len); +int OQS_KEX_ntru_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len); +int OQS_KEX_ntru_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len); + +void OQS_KEX_ntru_alice_priv_free(OQS_KEX *k, void *alice_priv); +void OQS_KEX_ntru_free(OQS_KEX *k); + +#endif + +#endif diff --git a/crypt/liboqs/kex_ntru/ntru_crypto.h b/crypt/liboqs/kex_ntru/ntru_crypto.h new file mode 100644 index 0000000000000000000000000000000000000000..7799dd46baa77f5778f77040d50ed9a6c834eae7 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto.h @@ -0,0 +1,340 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto.h + * + * Contents: Public header file for NTRUEncrypt. + * + *****************************************************************************/ + +#ifndef NTRU_CRYPTO_H +#define NTRU_CRYPTO_H + +#include "ntru_crypto_platform.h" +#include "ntru_crypto_drbg.h" +#include "ntru_crypto_error.h" + +#if !defined(NTRUCALL) +#if !defined(WIN32) || defined(NTRUCRYPTO_STATIC) +// Linux, or a Win32 static library +#define NTRUCALL extern uint32_t +#elif defined(NTRUCRYPTO_EXPORTS) +// Win32 DLL build +#define NTRUCALL extern __declspec(dllexport) uint32_t +#else +// Win32 DLL import +#define NTRUCALL extern __declspec(dllimport) uint32_t +#endif +#endif /* NTRUCALL */ + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + +/* parameter set ID list */ + +typedef enum _NTRU_ENCRYPT_PARAM_SET_ID { + NTRU_EES401EP1, + NTRU_EES449EP1, + NTRU_EES677EP1, + NTRU_EES1087EP2, + NTRU_EES541EP1, + NTRU_EES613EP1, + NTRU_EES887EP1, + NTRU_EES1171EP1, + NTRU_EES659EP1, + NTRU_EES761EP1, + NTRU_EES1087EP1, + NTRU_EES1499EP1, + NTRU_EES401EP2, + NTRU_EES439EP1, + NTRU_EES593EP1, + NTRU_EES743EP1, + NTRU_EES443EP1, + NTRU_EES587EP1, +} NTRU_ENCRYPT_PARAM_SET_ID; + +/* error codes */ + +#define NTRU_OK 0 +#define NTRU_FAIL 1 +#define NTRU_BAD_PARAMETER 2 +#define NTRU_BAD_LENGTH 3 +#define NTRU_BUFFER_TOO_SMALL 4 +#define NTRU_INVALID_PARAMETER_SET 5 +#define NTRU_BAD_PUBLIC_KEY 6 +#define NTRU_BAD_PRIVATE_KEY 7 +#define NTRU_OUT_OF_MEMORY 8 +#define NTRU_BAD_ENCODING 9 +#define NTRU_OID_NOT_RECOGNIZED 10 +#define NTRU_UNSUPPORTED_PARAM_SET 11 + +#define NTRU_RESULT(r) ((uint32_t)((r) ? NTRU_ERROR_BASE + (r) : (r))) +#define NTRU_RET(r) return NTRU_RESULT((r)) + +/* function declarations */ + +/* ntru_crypto_ntru_encrypt + * + * Implements NTRU encryption (SVES) for the parameter set specified in + * the public key blob. + * + * Before invoking this function, a DRBG must be instantiated using + * ntru_crypto_drbg_instantiate() to obtain a DRBG handle, and in that + * instantiation the requested security strength must be at least as large + * as the security strength of the NTRU parameter set being used. + * Failure to instantiate the DRBG with the proper security strength will + * result in this function returning DRBG_ERROR_BASE + DRBG_BAD_LENGTH. + * + * The required minimum size of the output ciphertext buffer (ct) may be + * queried by invoking this function with ct = NULL. In this case, no + * encryption is performed, NTRU_OK is returned, and the required minimum + * size for ct is returned in ct_len. + * + * When ct != NULL, at invocation *ct_len must be the size of the ct buffer. + * Upon return it is the actual size of the ciphertext. + * + * Returns NTRU_OK if successful. + * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if the DRBG handle is invalid. + * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer + * (other than ct) is NULL. + * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if a length argument + * (pubkey_blob_len or pt_len) is zero, or if pt_len exceeds the + * maximum plaintext length for the parameter set. + * Returns NTRU_ERROR_BASE + NTRU_BAD_PUBLIC_KEY if the public-key blob is + * invalid (unknown format, corrupt, bad length). + * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if the ciphertext buffer + * is too small. + * Returns NTRU_ERROR_BASE + NTRU_NO_MEMORY if memory needed cannot be + * allocated from the heap. + */ + +NTRUCALL +ntru_crypto_ntru_encrypt( + DRBG_HANDLE drbg_handle, /* in - handle for DRBG */ + uint16_t pubkey_blob_len, /* in - no. of octets in public key + blob */ + uint8_t const *pubkey_blob, /* in - pointer to public key */ + uint16_t pt_len, /* in - no. of octets in plaintext */ + uint8_t const *pt, /* in - pointer to plaintext */ + uint16_t *ct_len, /* in/out - no. of octets in ct, addr for + no. of octets in ciphertext */ + uint8_t *ct); /* out - address for ciphertext */ + +/* ntru_crypto_ntru_decrypt + * + * Implements NTRU decryption (SVES) for the parameter set specified in + * the private key blob. + * + * The maximum size of the output plaintext may be queried by invoking + * this function with pt = NULL. In this case, no decryption is performed, + * NTRU_OK is returned, and the maximum size the plaintext could be is + * returned in pt_len. + * Note that until the decryption is performed successfully, the actual size + * of the resulting plaintext cannot be known. + * + * When pt != NULL, at invocation *pt_len must be the size of the pt buffer. + * Upon return it is the actual size of the plaintext. + * + * Returns NTRU_OK if successful. + * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer + * (other than pt) is NULL. + * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if a length argument + * (privkey_blob) is zero, or if ct_len is invalid for the parameter set. + * Returns NTRU_ERROR_BASE + NTRU_BAD_PRIVATE_KEY if the private-key blob is + * invalid (unknown format, corrupt, bad length). + * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if the plaintext buffer + * is too small. + * Returns NTRU_ERROR_BASE + NTRU_NO_MEMORY if memory needed cannot be + * allocated from the heap. + * Returns NTRU_ERROR_BASE + NTRU_FAIL if a decryption error occurs. + */ + +NTRUCALL +ntru_crypto_ntru_decrypt( + uint16_t privkey_blob_len, /* in - no. of octets in private key + blob */ + uint8_t const *privkey_blob, /* in - pointer to private key */ + uint16_t ct_len, /* in - no. of octets in ciphertext */ + uint8_t const *ct, /* in - pointer to ciphertext */ + uint16_t *pt_len, /* in/out - no. of octets in pt, addr for + no. of octets in plaintext */ + uint8_t *pt); /* out - address for plaintext */ + +/* ntru_crypto_ntru_encrypt_keygen + * + * Implements key generation for NTRUEncrypt for the parameter set specified. + * + * Before invoking this function, a DRBG must be instantiated using + * ntru_crypto_drbg_instantiate() to obtain a DRBG handle, and in that + * instantiation the requested security strength must be at least as large + * as the security strength of the NTRU parameter set being used. + * Failure to instantiate the DRBG with the proper security strength will + * result in this function returning DRBG_ERROR_BASE + DRBG_BAD_LENGTH. + * + * The required minimum size of the output public-key buffer (pubkey_blob) + * may be queried by invoking this function with pubkey_blob = NULL. + * In this case, no key generation is performed, NTRU_OK is returned, and + * the required minimum size for pubkey_blob is returned in pubkey_blob_len. + * + * The required minimum size of the output private-key buffer (privkey_blob) + * may be queried by invoking this function with privkey_blob = NULL. + * In this case, no key generation is performed, NTRU_OK is returned, and + * the required minimum size for privkey_blob is returned in privkey_blob_len. + * + * The required minimum sizes of both pubkey_blob and privkey_blob may be + * queried as described above, in a single invocation of this function. + * + * When pubkey_blob != NULL and privkey_blob != NULL, at invocation + * *pubkey_blob_len must be the size of the pubkey_blob buffer and + * *privkey_blob_len must be the size of the privkey_blob buffer. + * Upon return, *pubkey_blob_len is the actual size of the public-key blob + * and *privkey_blob_len is the actual size of the private-key blob. + * + * Returns NTRU_OK if successful. + * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer + * (other than pubkey_blob or privkey_blob) is NULL. + * Returns NTRU_ERROR_BASE + NTRU_INVALID_PARAMETER_SET if the parameter-set + * ID is invalid. + * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if a length argument is invalid. + * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if either the pubkey_blob + * buffer or the privkey_blob buffer is too small. + * Returns NTRU_ERROR_BASE + NTRU_NO_MEMORY if memory needed cannot be + * allocated from the heap. + * Returns NTRU_ERROR_BASE + NTRU_FAIL if the polynomial generated for f is + * not invertible in (Z/qZ)[X]/(X^N - 1), which is extremely unlikely. + * Should this occur, this function should simply be invoked again. + */ + +NTRUCALL +ntru_crypto_ntru_encrypt_keygen( + DRBG_HANDLE drbg_handle, /* in - handle of DRBG */ + NTRU_ENCRYPT_PARAM_SET_ID param_set_id, /* in - parameter set ID */ + uint16_t *pubkey_blob_len, /* in/out - no. of octets in + pubkey_blob, addr + for no. of octets + in pubkey_blob */ + uint8_t *pubkey_blob, /* out - address for + public key blob */ + uint16_t *privkey_blob_len, /* in/out - no. of octets in + privkey_blob, addr + for no. of octets + in privkey_blob */ + uint8_t *privkey_blob); /* out - address for + private key blob */ + +/* ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo + * + * DER-encodes an NTRUEncrypt public-key from a public-key blob into a + * SubjectPublicKeyInfo field for inclusion in an X.509 certificate. + * + * The required minimum size of the output SubjectPublicKeyInfo buffer + * (encoded_subjectPublicKeyInfo) may be queried by invoking this function + * with encoded_subjectPublicKeyInfo = NULL. In this case, no encoding is + * performed, NTRU_OK is returned, and the required minimum size for + * encoded_subjectPublicKeyInfo is returned in encoded_subjectPublicKeyInfo_len. + * + * When encoded_subjectPublicKeyInfo != NULL, at invocation + * *encoded_subjectPublicKeyInfo_len must be the size of the + * encoded_subjectPublicKeyInfo buffer. + * Upon return, it is the actual size of the encoded public key. + * + * Returns NTRU_OK if successful. + * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer + * (other than encoded_subjectPublicKeyInfo) is NULL. + * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if pubkey_blob_len is zero. + * Returns NTRU_ERROR_BASE + NTRU_BAD_PUBLIC_KEY if the public-key blob is + * invalid (unknown format, corrupt, bad length). + * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if the SubjectPublicKeyInfo + * buffer is too small. + */ + +NTRUCALL +ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo( + uint16_t pubkey_blob_len, /* in - no. of octets in public-key + blob */ + uint8_t const *pubkey_blob, /* in - ptr to public-key blob */ + uint16_t *encoded_subjectPublicKeyInfo_len, + /* in/out - no. of octets in encoded info, + address for no. of octets in + encoded info */ + uint8_t *encoded_subjectPublicKeyInfo); +/* out - address for encoded info */ + +/* ntru_crypto_ntru_encrypt_subjectPublicKeyInfo2PublicKey + * + * Decodes a DER-encoded NTRUEncrypt public-key from a + * SubjectPublicKeyInfo field in an X.509 certificate and returns the + * public-key blob itself. + * + * The required minimum size of the output public-key buffer (pubkey_blob) + * may be queried by invoking this function with pubkey_blob = NULL. + * In this case, no decoding is performed, NTRU_OK is returned, and the + * required minimum size for pubkey_blob is returned in pubkey_blob_len. + * + * When pubkey_blob != NULL, at invocation *pubkey_blob_len must be the + * size of the pubkey_blob buffer. + * Upon return, it is the actual size of the public-key blob. + * + * Returns NTRU_OK if successful. + * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if the encoded data buffer + * does not contain a full der prefix and public key. + * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer + * (other than pubkey_blob) is NULL. + * Returns NTRU_ERROR_BASE + NTRU_BAD_ENCODING if the encoded data is + * an invalid encoding of an NTRU public key. + * Returns NTRU_ERROR_BASE + NTRU_OID_NOT_RECOGNIZED if the + * encoded data contains an OID that identifies an object other than + * an NTRU public key. + * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if the pubkey_blob buffer + * is too small. + */ + +NTRUCALL +ntru_crypto_ntru_encrypt_subjectPublicKeyInfo2PublicKey( + uint8_t const *encoded_data, /* in - ptr to subjectPublicKeyInfo + in the encoded data */ + uint16_t *pubkey_blob_len, /* in/out - no. of octets in pubkey blob, + address for no. of octets in + pubkey blob */ + uint8_t *pubkey_blob, /* out - address for pubkey blob */ + uint8_t **next, /* out - address for ptr to encoded + data following the + subjectPublicKeyInfo */ + uint32_t *remaining_data_len); /* in/out - number of bytes remaining in + buffer *next */ + +/* ntru_encrypt_get_param_set_name + * + * Returns pointer to null terminated parameter set name + * or NULL if parameter set ID is not found. + */ +const char * +ntru_encrypt_get_param_set_name( + NTRU_ENCRYPT_PARAM_SET_ID id); /* in - parameter-set id */ + +#if defined(__cplusplus) +} +#endif /* __cplusplus */ + +#endif /* NTRU_CRYPTO_H */ diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_drbg.c b/crypt/liboqs/kex_ntru/ntru_crypto_drbg.c new file mode 100644 index 0000000000000000000000000000000000000000..a94cacbfe93cb529dfa072f54f5e6f8d5dbaf109 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_drbg.c @@ -0,0 +1,849 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_drbg.c + * + * Contents: Implementation of a SHA-256 HMAC-based deterministic random byte + * generator (HMAC_DRBG) as defined in ANSI X9.82, Part 3 - 2007. + * + * This implementation: + * - allows for MAX_INSTANTIATIONS simultaneous drbg instantiations + * (may be overridden on compiler command line) + * - has a maximum security strength of 256 bits + * - automatically uses SHA-256 for all security strengths + * - allows a personalization string of length up to + * HMAC_DRBG_MAX_PERS_STR_BYTES bytes + * - implments reseeding + * - does not implement additional input for reseeding or generation + * - does not implement predictive resistance + * - limits the number of bytes requested in one invocation of generate to + * MAX_BYTES_PER_REQUEST + * - uses a callback function to allow the caller to supply the + * Get_entropy_input routine (entropy function) + * - limits the number of bytes returned from the entropy function to + * MAX_ENTROPY_NONCE_BYTES + * - gets the nonce bytes along with the entropy input from the entropy + * function + * - automatically reseeds an instantitation after MAX_REQUESTS calls to + * generate + * + *****************************************************************************/ + +#include "ntru_crypto.h" +#include "ntru_crypto_drbg.h" +#include "ntru_crypto_hmac.h" + +/************************ + * HMAC_DRBG parameters * + ************************/ + +/* Note: Combined entropy input and nonce are a total of 2 * sec_strength_bits + * of randomness to provide quantum resistance */ +#define HMAC_DRBG_MAX_MIN_ENTROPY_NONCE_BYTES \ + (2 * DRBG_MAX_SEC_STRENGTH_BITS) / 8 +#define HMAC_DRBG_MAX_ENTROPY_NONCE_BYTES \ + HMAC_DRBG_MAX_MIN_ENTROPY_NONCE_BYTES *DRBG_MAX_BYTES_PER_BYTE_OF_ENTROPY +#define HMAC_DRBG_MAX_REQUESTS 0xffffffff + +/******************* + * DRBG structures * + *******************/ + +/* SHA256_HMAC_DRBG state structure */ + +typedef struct { + uint32_t sec_strength; /* security strength in bits */ + uint32_t requests_left; /* generation requests remaining + before reseeding */ + ENTROPY_FN entropy_fn; /* pointer to entropy function */ + NTRU_CRYPTO_HMAC_CTX *hmac_ctx; /* pointer to HMAC context */ + uint8_t V[33]; /* md_len size internal state + 1 */ +} SHA256_HMAC_DRBG_STATE; + +/* External DRBG state structure */ + +typedef struct { + RANDOM_BYTES_FN randombytesfn; +} EXTERNAL_DRBG_STATE; + +/* DRBG state structure */ + +typedef struct { + uint32_t handle; + DRBG_TYPE type; + void *state; +} DRBG_STATE; + +/************* + * DRBG DATA * + *************/ + +/* array of drbg states */ + +static DRBG_STATE drbg_state[DRBG_MAX_INSTANTIATIONS]; + +/****************************** + * SHA256 HMAC_DRBG functions * + ******************************/ + +/* sha256_hmac_drbg_update + * + * This routine is the SHA-256 HMAC_DRBG derivation function for + * instantiation, and reseeding, and it is used in generation as well. + * It updates the internal state. + * + * For instantiation, provided_data1 holds the entropy input and nonce; + * provided_data2 holds the optional personalization string. Combined, this + * is the seed material. + * + * For reseeding, provided_data1 holds the entropy input; + * provided_data2 is NULL (because this implementation does not support + * additional input). + * + * For byte generation, both provided_data1 and provided_data2 are NULL. + * + * Returns DRBG_OK if successful. + * Returns HMAC errors if they occur. + */ + +static uint32_t +sha256_hmac_drbg_update( + SHA256_HMAC_DRBG_STATE *s, + uint8_t *key, /* md_len size array */ + uint32_t md_len, + uint8_t const *provided_data1, + uint32_t provided_data1_bytes, + uint8_t const *provided_data2, + uint32_t provided_data2_bytes) { + uint32_t result; + + /* new key = HMAC(K, V || 0x00 [|| provided data1 [|| provided data2]] */ + + if ((result = ntru_crypto_hmac_init(s->hmac_ctx)) != NTRU_CRYPTO_HMAC_OK) { + return result; + } + + s->V[md_len] = 0x00; + + if ((result = ntru_crypto_hmac_update(s->hmac_ctx, s->V, md_len + 1)) != + NTRU_CRYPTO_HMAC_OK) { + return result; + } + + if (provided_data1) { + if ((result = ntru_crypto_hmac_update(s->hmac_ctx, provided_data1, + provided_data1_bytes)) != NTRU_CRYPTO_HMAC_OK) { + return result; + } + + if (provided_data2) { + if ((result = ntru_crypto_hmac_update(s->hmac_ctx, provided_data2, + provided_data2_bytes)) != NTRU_CRYPTO_HMAC_OK) { + return result; + } + } + } + + if ((result = ntru_crypto_hmac_final(s->hmac_ctx, key)) != + NTRU_CRYPTO_HMAC_OK) { + return result; + } + + if ((result = ntru_crypto_hmac_set_key(s->hmac_ctx, key)) != + NTRU_CRYPTO_HMAC_OK) { + return result; + } + + /* new V = HMAC(K, V) */ + + if ((result = ntru_crypto_hmac_init(s->hmac_ctx)) != NTRU_CRYPTO_HMAC_OK) { + return result; + } + + if ((result = ntru_crypto_hmac_update(s->hmac_ctx, s->V, md_len)) != + NTRU_CRYPTO_HMAC_OK) { + return result; + } + + if ((result = ntru_crypto_hmac_final(s->hmac_ctx, s->V)) != + NTRU_CRYPTO_HMAC_OK) { + return result; + } + + /* if provided data exists, update K and V again */ + + if (provided_data1) { + /* new key = HMAC(K, V || 0x01 || provided data1 [|| provided data2] */ + + if ((result = ntru_crypto_hmac_init(s->hmac_ctx)) != + NTRU_CRYPTO_HMAC_OK) { + return result; + } + + s->V[md_len] = 0x01; + + if ((result = ntru_crypto_hmac_update(s->hmac_ctx, s->V, md_len + 1)) != + NTRU_CRYPTO_HMAC_OK) { + return result; + } + + if ((result = ntru_crypto_hmac_update(s->hmac_ctx, provided_data1, + provided_data1_bytes)) != NTRU_CRYPTO_HMAC_OK) { + return result; + } + + if (provided_data2) { + if ((result = ntru_crypto_hmac_update(s->hmac_ctx, provided_data2, + provided_data2_bytes)) != NTRU_CRYPTO_HMAC_OK) { + return result; + } + } + + if ((result = ntru_crypto_hmac_final(s->hmac_ctx, key)) != + NTRU_CRYPTO_HMAC_OK) { + return result; + } + + if ((result = ntru_crypto_hmac_set_key(s->hmac_ctx, key)) != + NTRU_CRYPTO_HMAC_OK) { + return result; + } + + /* new V = HMAC(K, V) */ + + if ((result = ntru_crypto_hmac_init(s->hmac_ctx)) != + NTRU_CRYPTO_HMAC_OK) { + return result; + } + + if ((result = ntru_crypto_hmac_update(s->hmac_ctx, s->V, md_len)) != + NTRU_CRYPTO_HMAC_OK) { + return result; + } + + if ((result = ntru_crypto_hmac_final(s->hmac_ctx, s->V)) != + NTRU_CRYPTO_HMAC_OK) { + return result; + } + } + + memset(key, 0, md_len); + DRBG_RET(DRBG_OK); +} + +/* sha256_hmac_drbg_instantiate + * + * This routine allocates and initializes a SHA-256 HMAC_DRBG internal state. + * + * Returns DRBG_OK if successful. + * Returns DRBG_BAD_LENGTH if the personalization string is too long. + * Returns DRBG_OUT_OF_MEMORY if the internal state cannot be allocated. + * Returns errors from HASH or SHA256 if those errors occur. + */ + +static uint32_t +sha256_hmac_drbg_instantiate( + uint32_t sec_strength_bits, /* strength to instantiate */ + uint8_t const *pers_str, + uint32_t pers_str_bytes, + ENTROPY_FN entropy_fn, + SHA256_HMAC_DRBG_STATE **state) { + uint8_t entropy_nonce[HMAC_DRBG_MAX_ENTROPY_NONCE_BYTES]; + uint32_t entropy_nonce_bytes; + uint32_t min_bytes_of_entropy; + uint8_t num_bytes_per_byte_of_entropy; + uint8_t key[32]; /* array of md_len size */ + SHA256_HMAC_DRBG_STATE *s; + uint32_t result; + uint32_t i; + + /* check arguments */ + + if (pers_str_bytes > HMAC_DRBG_MAX_PERS_STR_BYTES) { + DRBG_RET(DRBG_BAD_LENGTH); + } + + /* calculate number of bytes needed for the entropy input and nonce + * for a SHA256_HMAC_DRBG, and get them from the entropy source + */ + + if (entropy_fn(GET_NUM_BYTES_PER_BYTE_OF_ENTROPY, + &num_bytes_per_byte_of_entropy) == 0) { + DRBG_RET(DRBG_ENTROPY_FAIL); + } + + if ((num_bytes_per_byte_of_entropy == 0) || + (num_bytes_per_byte_of_entropy > + DRBG_MAX_BYTES_PER_BYTE_OF_ENTROPY)) { + DRBG_RET(DRBG_ENTROPY_FAIL); + } + + min_bytes_of_entropy = (2 * sec_strength_bits) / 8; + entropy_nonce_bytes = min_bytes_of_entropy * num_bytes_per_byte_of_entropy; + + for (i = 0; i < entropy_nonce_bytes; i++) { + if (entropy_fn(GET_BYTE_OF_ENTROPY, entropy_nonce + i) == 0) { + DRBG_RET(DRBG_ENTROPY_FAIL); + } + } + + /* allocate SHA256_HMAC_DRBG state */ + s = (SHA256_HMAC_DRBG_STATE *) MALLOC(sizeof(SHA256_HMAC_DRBG_STATE)); + if (s == NULL) { + DRBG_RET(DRBG_OUT_OF_MEMORY); + } + + /* allocate HMAC context */ + + memset(key, 0, sizeof(key)); + if ((result = ntru_crypto_hmac_create_ctx(NTRU_CRYPTO_HASH_ALGID_SHA256, + key, sizeof(key), &s->hmac_ctx)) != NTRU_CRYPTO_HMAC_OK) { + FREE(s); + return result; + } + + /* init and update internal state */ + + memset(s->V, 0x01, sizeof(s->V)); + if ((result = sha256_hmac_drbg_update(s, key, sizeof(key), + entropy_nonce, entropy_nonce_bytes, + pers_str, pers_str_bytes)) != DRBG_OK) { + (void) ntru_crypto_hmac_destroy_ctx(s->hmac_ctx); + memset(s->V, 0, sizeof(s->V)); + FREE(s); + memset(entropy_nonce, 0, sizeof(entropy_nonce)); + return result; + } + + memset(entropy_nonce, 0, sizeof(entropy_nonce)); + + /* init instantiation parameters */ + + s->sec_strength = sec_strength_bits; + s->requests_left = HMAC_DRBG_MAX_REQUESTS; + s->entropy_fn = entropy_fn; + *state = s; + + return result; +} + +/* sha256_hmac_drbg_free + * + * This routine frees a SHA-256 HMAC_DRBG internal state. + * + * Returns DRBG_OK if successful. + * Returns DRBG_BAD_PARAMETER if inappropriate NULL pointers are passed. + */ + +static void +sha256_hmac_drbg_free( + SHA256_HMAC_DRBG_STATE *s) { + if (s->hmac_ctx) { + (void) ntru_crypto_hmac_destroy_ctx(s->hmac_ctx); + } + + memset(s->V, 0, sizeof(s->V)); + s->sec_strength = 0; + s->requests_left = 0; + s->entropy_fn = NULL; + FREE(s); +} + +/* sha256_hmac_drbg_reseed + * + * This function reseeds an instantiated SHA256_HMAC DRBG. + * + * Returns DRBG_OK if successful. + * Returns HMAC errors if they occur. + */ + +static uint32_t +sha256_hmac_drbg_reseed( + SHA256_HMAC_DRBG_STATE *s) { + uint8_t entropy[HMAC_DRBG_MAX_ENTROPY_NONCE_BYTES]; + uint32_t entropy_bytes; + uint32_t min_bytes_of_entropy; + uint8_t num_bytes_per_byte_of_entropy; + uint8_t key[32]; /* array of md_len size for sha256_hmac_drbg_update() */ + uint32_t result; + uint32_t i; + + /* calculate number of bytes needed for the entropy input + * for a SHA256_HMAC_DRBG, and get them from the entropy source + */ + + if (s->entropy_fn(GET_NUM_BYTES_PER_BYTE_OF_ENTROPY, + &num_bytes_per_byte_of_entropy) == 0) { + DRBG_RET(DRBG_ENTROPY_FAIL); + } + + if ((num_bytes_per_byte_of_entropy == 0) || + (num_bytes_per_byte_of_entropy > + DRBG_MAX_BYTES_PER_BYTE_OF_ENTROPY)) { + DRBG_RET(DRBG_ENTROPY_FAIL); + } + + /* note: factor of 2 here is probably unnecessary, but ensures quantum + * resistance even if internal state is leaked prior to reseed */ + min_bytes_of_entropy = (2 * s->sec_strength) / 8; + entropy_bytes = min_bytes_of_entropy * num_bytes_per_byte_of_entropy; + + for (i = 0; i < entropy_bytes; i++) { + if (s->entropy_fn(GET_BYTE_OF_ENTROPY, entropy + i) == 0) { + DRBG_RET(DRBG_ENTROPY_FAIL); + } + } + + /* update internal state */ + + if ((result = sha256_hmac_drbg_update(s, key, sizeof(key), + entropy, entropy_bytes, NULL, 0)) != DRBG_OK) { + return result; + } + + /* reset request counter */ + + s->requests_left = HMAC_DRBG_MAX_REQUESTS; + DRBG_RET(DRBG_OK); +} + +/* sha256_hmac_drbg_generate + * + * This routine generates pseudorandom bytes from a SHA256_HMAC DRBG. + * + * Returns DRBG_OK if successful. + * Returns DRBG_BAD_LENGTH if too many bytes are requested or the requested + * security strength is too large. + * Returns HMAC errors if they occur. + */ + +static uint32_t +sha256_hmac_drbg_generate( + SHA256_HMAC_DRBG_STATE *s, + uint32_t sec_strength_bits, + uint32_t num_bytes, + uint8_t *out) { + uint8_t key[32]; /* array of md_len size for sha256_hmac_drbg_update() */ + uint32_t result; + + /* check if number of bytes requested exceeds the maximum allowed */ + + if (num_bytes > HMAC_DRBG_MAX_BYTES_PER_REQUEST) { + DRBG_RET(DRBG_BAD_LENGTH); + } + + /* check if drbg has adequate security strength */ + + if (sec_strength_bits > s->sec_strength) { + DRBG_RET(DRBG_BAD_LENGTH); + } + + /* check if max requests have been exceeded */ + + if (s->requests_left == 0) { + if ((result = sha256_hmac_drbg_reseed(s)) != DRBG_OK) { + return result; + } + } + + /* generate pseudorandom bytes */ + + while (num_bytes > 0) { + /* generate md_len bytes = V = HMAC(K, V) */ + + if ((result = ntru_crypto_hmac_init(s->hmac_ctx)) != + NTRU_CRYPTO_HMAC_OK) { + return result; + } + + if ((result = ntru_crypto_hmac_update(s->hmac_ctx, s->V, + sizeof(key))) != NTRU_CRYPTO_HMAC_OK) { + return result; + } + + if ((result = ntru_crypto_hmac_final(s->hmac_ctx, s->V)) != + NTRU_CRYPTO_HMAC_OK) { + return result; + } + + /* copy generated bytes to output buffer */ + + if (num_bytes < sizeof(key)) { + memcpy(out, s->V, num_bytes); + num_bytes = 0; + } else { + memcpy(out, s->V, sizeof(key)); + out += sizeof(key); + num_bytes -= sizeof(key); + } + } + + /* update internal state */ + + if ((result = sha256_hmac_drbg_update(s, key, sizeof(key), + NULL, 0, NULL, 0)) != DRBG_OK) { + return result; + } + + s->requests_left--; + + DRBG_RET(DRBG_OK); +} + +/****************** + * DRBG functions * + ******************/ + +/* drbg_get_new_drbg + * + * This routine finds an uninstantiated drbg state and returns a pointer to it. + * + * Returns a pointer to an uninstantiated drbg state if found. + * Returns NULL if all drbg states are instantiated. + */ + +static DRBG_STATE * +drbg_get_new_drbg() { + int i; + + for (i = 0; i < DRBG_MAX_INSTANTIATIONS; i++) { + if (drbg_state[i].state == NULL) { + return drbg_state + i; + } + } + + return NULL; +} + +/* drbg_get_drbg + * + * This routine finds an instantiated drbg state given its handle, and returns + * a pointer to it. + * + * Returns a pointer to the drbg state if found. + * Returns NULL if the drbg state is not found. + */ + +static DRBG_STATE * +drbg_get_drbg( + DRBG_HANDLE handle) /* in/out - drbg handle */ +{ + int i; + + for (i = 0; i < DRBG_MAX_INSTANTIATIONS; i++) { + if ((drbg_state[i].handle == handle) && drbg_state[i].state) { + return drbg_state + i; + } + } + + return NULL; +} + +/* drbg_get_new_handle + * + * This routine gets a new, unique 32-bit handle. + * + * Returns the new DRBG handle. + */ + +static DRBG_HANDLE +drbg_get_new_handle(void) { + DRBG_HANDLE h = 0; + + /* ensure the new handle is unique: + * if it already exists, increment it + */ + + while (drbg_get_drbg(h) != NULL) { + ++h; + } + + return h; +} + +/******************** + * Public functions * + ********************/ + +/* ntru_crypto_drbg_instantiate + * + * This routine instantiates a drbg with the requested security strength. + * See ANS X9.82: Part 3-2007. This routine currently returns an instance + * of SHA-256 HMAC_DRBG for all requested security strengths. + * + * Returns DRBG_OK if successful. + * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if an argument pointer is NULL. + * Returns DRBG_ERROR_BASE + DRBG_BAD_LENGTH if the security strength requested + * or the personalization string is too large. + * Returns DRBG_ERROR_BASE + DRBG_NOT_AVAILABLE if there are no instantiation + * slots available + * Returns DRBG_ERROR_BASE + DRBG_OUT_OF_MEMORY if the internal state cannot be + * allocated from the heap. + */ + +uint32_t +ntru_crypto_drbg_instantiate( + uint32_t sec_strength_bits, /* in - requested sec strength in bits */ + uint8_t const *pers_str, /* in - ptr to personalization string */ + uint32_t pers_str_bytes, /* in - no. personalization str bytes */ + ENTROPY_FN entropy_fn, /* in - pointer to entropy function */ + DRBG_HANDLE *handle) /* out - address for drbg handle */ +{ + DRBG_STATE *drbg = NULL; + SHA256_HMAC_DRBG_STATE *state = NULL; + uint32_t result; + + /* check arguments */ + + if ((!pers_str && pers_str_bytes) || !entropy_fn || !handle) { + DRBG_RET(DRBG_BAD_PARAMETER); + } + + if (sec_strength_bits > DRBG_MAX_SEC_STRENGTH_BITS) { + DRBG_RET(DRBG_BAD_LENGTH); + } + + if (pers_str && (pers_str_bytes == 0)) { + pers_str = NULL; + } + + /* set security strength */ + + if (sec_strength_bits <= 112) { + sec_strength_bits = 112; + } else if (sec_strength_bits <= 128) { + sec_strength_bits = 128; + } else if (sec_strength_bits <= 192) { + sec_strength_bits = 192; + } else { + sec_strength_bits = 256; + } + + /* get an uninstantiated drbg */ + + if ((drbg = drbg_get_new_drbg()) == NULL) { + DRBG_RET(DRBG_NOT_AVAILABLE); + } + + /* init entropy function */ + + if (entropy_fn(INIT, NULL) == 0) { + DRBG_RET(DRBG_ENTROPY_FAIL); + } + + /* instantiate a SHA-256 HMAC_DRBG */ + + if ((result = sha256_hmac_drbg_instantiate(sec_strength_bits, + pers_str, pers_str_bytes, + entropy_fn, + &state)) != DRBG_OK) { + return result; + } + + /* init drbg state */ + + drbg->handle = drbg_get_new_handle(); + drbg->type = SHA256_HMAC_DRBG; + drbg->state = state; + + /* return drbg handle */ + + *handle = drbg->handle; + DRBG_RET(DRBG_OK); +} + +/* ntru_crypto_drbg_external_instantiate + * + * This routine instruments an external DRBG so that ntru_crypto routines + * can call it. randombytesfn must be of type + * uint32_t (randombytesfn*)(unsigned char *out, unsigned long long num_bytes); + * and should return DRBG_OK on success. + * + * Returns DRBG_OK if successful. + * Returns DRBG_ERROR_BASE + DRBG_NOT_AVAILABLE if there are no instantiation + * slots available + * Returns DRBG_ERROR_BASE + DRBG_OUT_OF_MEMORY if the internal state cannot be + * allocated from the heap. + */ + +uint32_t +ntru_crypto_drbg_external_instantiate( + RANDOM_BYTES_FN randombytesfn, /* in - pointer to random bytes function */ + DRBG_HANDLE *handle) /* out - address for drbg handle */ +{ + DRBG_STATE *drbg = NULL; + EXTERNAL_DRBG_STATE *state = NULL; + + if (!randombytesfn || !handle) { + DRBG_RET(DRBG_BAD_PARAMETER); + } + + /* get an uninstantiated drbg */ + + if ((drbg = drbg_get_new_drbg()) == NULL) { + DRBG_RET(DRBG_NOT_AVAILABLE); + } + + /* instantiate an External DRBG */ + + state = (EXTERNAL_DRBG_STATE *) MALLOC(sizeof(EXTERNAL_DRBG_STATE)); + if (state == NULL) { + DRBG_RET(DRBG_OUT_OF_MEMORY); + } + + state->randombytesfn = randombytesfn; + + /* init drbg state */ + + drbg->handle = drbg_get_new_handle(); + drbg->type = EXTERNAL_DRBG; + drbg->state = state; + + /* return drbg handle */ + + *handle = drbg->handle; + + DRBG_RET(DRBG_OK); +} + +/* ntru_crypto_drbg_uninstantiate + * + * This routine frees a drbg given its handle. + * + * Returns DRBG_OK if successful. + * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if handle is not valid. + */ + +uint32_t +ntru_crypto_drbg_uninstantiate( + DRBG_HANDLE handle) /* in - drbg handle */ +{ + DRBG_STATE *drbg = NULL; + + /* find the instantiated drbg */ + + if ((drbg = drbg_get_drbg(handle)) == NULL) { + DRBG_RET(DRBG_BAD_PARAMETER); + } + + /* zero and free drbg state */ + + if (drbg->state) { + switch (drbg->type) { + case EXTERNAL_DRBG: + FREE(drbg->state); + break; + case SHA256_HMAC_DRBG: + sha256_hmac_drbg_free((SHA256_HMAC_DRBG_STATE *) drbg->state); + break; + } + drbg->state = NULL; + } + + drbg->handle = 0; + DRBG_RET(DRBG_OK); +} + +/* ntru_crypto_drbg_reseed + * + * This routine reseeds an instantiated drbg. + * See ANS X9.82: Part 3-2007. + * + * Returns DRBG_OK if successful. + * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if handle is not valid. + * Returns HMAC errors if they occur. + */ + +uint32_t +ntru_crypto_drbg_reseed( + DRBG_HANDLE handle) /* in - drbg handle */ +{ + DRBG_STATE *drbg = NULL; + + /* find the instantiated drbg */ + + if ((drbg = drbg_get_drbg(handle)) == NULL) { + DRBG_RET(DRBG_BAD_PARAMETER); + } + + if (drbg->type == EXTERNAL_DRBG) { + DRBG_RET(DRBG_BAD_PARAMETER); + } + + /* reseed the SHA-256 HMAC_DRBG */ + + return sha256_hmac_drbg_reseed((SHA256_HMAC_DRBG_STATE *) drbg->state); +} + +/* ntru_crypto_drbg_generate + * + * This routine generates pseudorandom bytes using an instantiated drbg. + * If the maximum number of requests has been reached, reseeding will occur. + * See ANS X9.82: Part 3-2007. + * + * Returns DRBG_OK if successful. + * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if handle is not valid or if + * an argument pointer is NULL. + * Returns DRBG_ERROR_BASE + DRBG_BAD_LENGTH if the security strength requested + * is too large or the number of bytes requested is zero or too large. + * Returns HMAC errors if they occur. + */ + +uint32_t +ntru_crypto_drbg_generate( + DRBG_HANDLE handle, /* in - drbg handle */ + uint32_t sec_strength_bits, /* in - requested sec strength in bits */ + uint32_t num_bytes, /* in - number of octets to generate */ + uint8_t *out) /* out - address for generated octets */ +{ + DRBG_STATE *drbg = NULL; + + /* find the instantiated drbg */ + + if ((drbg = drbg_get_drbg(handle)) == NULL) { + DRBG_RET(DRBG_BAD_PARAMETER); + } + + /* check arguments */ + + if (!out) { + DRBG_RET(DRBG_BAD_PARAMETER); + } + + if (num_bytes == 0) { + DRBG_RET(DRBG_BAD_LENGTH); + } + + /* generate pseudorandom output from the SHA256_HMAC_DRBG */ + + switch (drbg->type) { + case EXTERNAL_DRBG: + return ((EXTERNAL_DRBG_STATE *) drbg->state)->randombytesfn(out, num_bytes); + case SHA256_HMAC_DRBG: + return sha256_hmac_drbg_generate( + (SHA256_HMAC_DRBG_STATE *) drbg->state, + sec_strength_bits, num_bytes, out); + default: + DRBG_RET(DRBG_BAD_PARAMETER); + } +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_drbg.h b/crypt/liboqs/kex_ntru/ntru_crypto_drbg.h new file mode 100644 index 0000000000000000000000000000000000000000..9fea19fe754e513f686edc500eb5ca4bf8acf436 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_drbg.h @@ -0,0 +1,208 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_drbg.h + * + * Contents: Public header file for ntru_crypto_drbg.c. + * + *****************************************************************************/ + +#ifndef NTRU_CRYPTO_DRBG_H +#define NTRU_CRYPTO_DRBG_H + +#include "ntru_crypto_platform.h" +#include "ntru_crypto_error.h" + +#if !defined(NTRUCALL) +#if !defined(WIN32) || defined(NTRUCRYPTO_STATIC) +/* Linux, or a Win32 static library */ +#define NTRUCALL extern uint32_t +#elif defined(NTRUCRYPTO_EXPORTS) +/* Win32 DLL build */ +#define NTRUCALL extern __declspec(dllexport) uint32_t +#else +/* Win32 DLL import */ +#define NTRUCALL extern __declspec(dllimport) uint32_t +#endif +#endif /* NTRUCALL */ + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + +/******************* + * DRBG parameters * + *******************/ + +#if !defined(DRBG_MAX_INSTANTIATIONS) +#define DRBG_MAX_INSTANTIATIONS 4 +#endif +#define DRBG_MAX_SEC_STRENGTH_BITS 256 +#define DRBG_MAX_BYTES_PER_BYTE_OF_ENTROPY 8 + +/************************ + * HMAC_DRBG parameters * + ************************/ + +#define HMAC_DRBG_MAX_PERS_STR_BYTES 32 +#define HMAC_DRBG_MAX_BYTES_PER_REQUEST 1024 + +/******************** + * type definitions * + ********************/ + +typedef uint32_t DRBG_HANDLE; /* drbg handle */ + +typedef enum { /* drbg types */ + EXTERNAL_DRBG, + SHA256_HMAC_DRBG, +} DRBG_TYPE; + +typedef enum { /* entropy-function commands */ + GET_NUM_BYTES_PER_BYTE_OF_ENTROPY = 0, + INIT, + GET_BYTE_OF_ENTROPY, +} ENTROPY_CMD; +typedef uint8_t (*ENTROPY_FN)( /* get entropy function */ + ENTROPY_CMD cmd, /* command */ + uint8_t *out); /* address for output */ + +/* Type for external PRNG functions. Must return DRBG_OK on success */ +typedef uint32_t (*RANDOM_BYTES_FN)( /* random bytes function */ + uint8_t *out, /* output buffer */ + uint32_t num_bytes); /* number of bytes */ + +/*************** + * error codes * + ***************/ + +#define DRBG_OK 0x00000000 /* no errors */ +#define DRBG_OUT_OF_MEMORY 0x00000001 /* can't allocate memory */ +#define DRBG_BAD_PARAMETER 0x00000002 /* null pointer */ +#define DRBG_BAD_LENGTH 0x00000003 /* invalid no. of bytes */ +#define DRBG_NOT_AVAILABLE 0x00000004 /* no instantiation slot available */ +#define DRBG_ENTROPY_FAIL 0x00000005 /* entropy function failure */ + +/*************** + * error macro * + ***************/ + +#define DRBG_RESULT(r) ((uint32_t)((r) ? DRBG_ERROR_BASE + (r) : (r))) +#define DRBG_RET(r) return DRBG_RESULT(r); + +/************************* + * function declarations * + *************************/ + +/* ntru_crypto_drbg_instantiate + * + * This routine instantiates a drbg with the requested security strength. + * See ANS X9.82: Part 3-2007. + * + * Returns DRBG_OK if successful. + * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if an argument pointer is NULL. + * Returns DRBG_ERROR_BASE + DRBG_BAD_LENGTH if the security strength requested + * or the personalization string is too large. + * Returns DRBG_ERROR_BASE + DRBG_OUT_OF_MEMORY if the internal state cannot be + * allocated from the heap. + */ + +NTRUCALL +ntru_crypto_drbg_instantiate( + uint32_t sec_strength_bits, /* in - requested sec strength in bits */ + uint8_t const *pers_str, /* in - ptr to personalization string */ + uint32_t pers_str_bytes, /* in - no. personalization str bytes */ + ENTROPY_FN entropy_fn, /* in - pointer to entropy function */ + DRBG_HANDLE *handle); /* out - address for drbg handle */ + +/* ntru_crypto_drbg_external_instantiate + * + * This routine instruments an external DRBG so that ntru_crypto routines + * can call it. randombytesfn must be of type + * uint32_t (randombytesfn*)(unsigned char *out, unsigned long long num_bytes); + * and should return DRBG_OK on success. + * + * Returns DRBG_OK if successful. + * Returns DRBG_ERROR_BASE + DRBG_NOT_AVAILABLE if there are no instantiation + * slots available + * Returns DRBG_ERROR_BASE + DRBG_OUT_OF_MEMORY if the internal state cannot be + * allocated from the heap. + */ + +NTRUCALL +ntru_crypto_drbg_external_instantiate( + RANDOM_BYTES_FN randombytesfn, /* in - pointer to random bytes function */ + DRBG_HANDLE *handle); /* out - address for drbg handle */ + +/* ntru_crypto_drbg_uninstantiate + * + * This routine frees a drbg given its handle. + * + * Returns DRBG_OK if successful. + * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if handle is not valid. + */ + +NTRUCALL +ntru_crypto_drbg_uninstantiate( + DRBG_HANDLE handle); /* in - drbg handle */ + +/* ntru_crypto_drbg_reseed + * + * This routine reseeds an instantiated drbg. + * See ANS X9.82: Part 3-2007. + * + * Returns DRBG_OK if successful. + * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if handle is not valid. + * Returns NTRU_CRYPTO_HMAC errors if they occur. + */ + +NTRUCALL +ntru_crypto_drbg_reseed( + DRBG_HANDLE handle); /* in - drbg handle */ + +/* ntru_crypto_drbg_generate + * + * This routine generates pseudorandom bytes using an instantiated drbg. + * If the maximum number of requests has been reached, reseeding will occur. + * See ANS X9.82: Part 3-2007. + * + * Returns DRBG_OK if successful. + * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if handle is not valid or if + * an argument pointer is NULL. + * Returns DRBG_ERROR_BASE + DRBG_BAD_LENGTH if the security strength requested + * is too large or the number of bytes requested is zero or too large. + * Returns NTRU_CRYPTO_HMAC errors if they occur. + */ + +NTRUCALL +ntru_crypto_drbg_generate( + DRBG_HANDLE handle, /* in - drbg handle */ + uint32_t sec_strength_bits, /* in - requested sec strength in bits */ + uint32_t num_bytes, /* in - number of octets to generate */ + uint8_t *out); /* out - address for generated octets */ + +#if defined(__cplusplus) +} +#endif /* __cplusplus */ + +#endif /* NTRU_CRYPTO_DRBG_H */ diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_error.h b/crypt/liboqs/kex_ntru/ntru_crypto_error.h new file mode 100644 index 0000000000000000000000000000000000000000..9252542949068cf9f642d6e6a9eea252e3233113 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_error.h @@ -0,0 +1,42 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_error.h + * + * Contents: Contains base values for crypto error codes. + * + *****************************************************************************/ + +#ifndef NTRU_CRYPTO_ERROR_H +#define NTRU_CRYPTO_ERROR_H + +/* define base values for crypto error codes */ + +#define HASH_ERROR_BASE ((uint32_t) 0x00000100) +#define HMAC_ERROR_BASE ((uint32_t) 0x00000200) +#define SHA_ERROR_BASE ((uint32_t) 0x00000400) +#define DRBG_ERROR_BASE ((uint32_t) 0x00000a00) +#define NTRU_ERROR_BASE ((uint32_t) 0x00003000) +#define MGF1_ERROR_BASE ((uint32_t) 0x00004100) + +#endif /* NTRU_CRYPTO_ERROR_H */ diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_hash.c b/crypt/liboqs/kex_ntru/ntru_crypto_hash.c new file mode 100644 index 0000000000000000000000000000000000000000..549e2fac37b5593bb235d21088bb12171ae86c21 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_hash.c @@ -0,0 +1,307 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_hash.c + * + * Contents: Routines implementing the hash object abstraction. + * + *****************************************************************************/ + +#include "ntru_crypto.h" +#include "ntru_crypto_hash.h" + +typedef uint32_t (*NTRU_CRYPTO_HASH_INIT_FN)( + void *c); +typedef uint32_t (*NTRU_CRYPTO_HASH_UPDATE_FN)( + void *c, + void const *data, + uint32_t len); +typedef uint32_t (*NTRU_CRYPTO_HASH_FINAL_FN)( + void *c, + void *md); +typedef uint32_t (*NTRU_CRYPTO_HASH_DIGEST_FN)( + void const *data, + uint32_t len, + void *md); + +typedef struct _NTRU_CRYPTO_HASH_ALG_PARAMS { + uint8_t algid; + uint16_t block_length; + uint16_t digest_length; + NTRU_CRYPTO_HASH_INIT_FN init; + NTRU_CRYPTO_HASH_UPDATE_FN update; + NTRU_CRYPTO_HASH_FINAL_FN final; + NTRU_CRYPTO_HASH_DIGEST_FN digest; +} NTRU_CRYPTO_HASH_ALG_PARAMS; + +static NTRU_CRYPTO_HASH_ALG_PARAMS const algs_params[] = { + { + NTRU_CRYPTO_HASH_ALGID_SHA1, + SHA_1_BLK_LEN, + SHA_1_MD_LEN, + (NTRU_CRYPTO_HASH_INIT_FN) SHA_1_INIT_FN, + (NTRU_CRYPTO_HASH_UPDATE_FN) SHA_1_UPDATE_FN, + (NTRU_CRYPTO_HASH_FINAL_FN) SHA_1_FINAL_FN, + (NTRU_CRYPTO_HASH_DIGEST_FN) SHA_1_DIGEST_FN, + }, + { + NTRU_CRYPTO_HASH_ALGID_SHA256, + SHA_256_BLK_LEN, + SHA_256_MD_LEN, + (NTRU_CRYPTO_HASH_INIT_FN) SHA_256_INIT_FN, + (NTRU_CRYPTO_HASH_UPDATE_FN) SHA_256_UPDATE_FN, + (NTRU_CRYPTO_HASH_FINAL_FN) SHA_256_FINAL_FN, + (NTRU_CRYPTO_HASH_DIGEST_FN) SHA_256_DIGEST_FN, + }, +}; + +static int const numalgs = (sizeof(algs_params) / sizeof(algs_params[0])); + +/* get_alg_params + * + * Return a pointer to the hash algorithm parameters for the hash algorithm + * specified, by looking for algid in the global algs_params table. + * If not found, return NULL. + */ +static NTRU_CRYPTO_HASH_ALG_PARAMS const * +get_alg_params( + NTRU_CRYPTO_HASH_ALGID algid) /* in - the hash algorithm to find */ +{ + int i; + + for (i = 0; i < numalgs; i++) { + if (algs_params[i].algid == algid) { + return &algs_params[i]; + } + } + + return NULL; +} + +/* ntru_crypto_hash_set_alg + * + * Sets the hash algorithm for the hash context. This must be called before + * any calls to ntru_crypto_hash_block_length(), + * ntru_crypto_hash_digest_length(), or ntru_crypto_hash_init() are made. + * + * Returns NTRU_CRYPTO_HASH_OK on success. + * Returns NTRU_CRYPTO_HASH_BAD_ALG if the specified algorithm is not supported. + */ + +uint32_t +ntru_crypto_hash_set_alg( + NTRU_CRYPTO_HASH_ALGID algid, /* in - hash algorithm to be used */ + NTRU_CRYPTO_HASH_CTX *c) /* in/out - pointer to the hash context */ +{ + if (!c) { + HASH_RET(NTRU_CRYPTO_HASH_BAD_PARAMETER); + } + + c->alg_params = get_alg_params(algid); + + if (!c->alg_params) { + HASH_RET(NTRU_CRYPTO_HASH_BAD_ALG); + } + + HASH_RET(NTRU_CRYPTO_HASH_OK); +} + +/* ntru_crypto_hash_block_length + * + * Gets the number of bytes in an input block for the hash algorithm + * specified in the hash context. The hash algorithm must have been set + * in the hash context with a call to ntru_crypto_hash_set_alg() prior to + * calling this function. + * + * Returns NTRU_CRYPTO_HASH_OK on success. + * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set. + */ + +uint32_t +ntru_crypto_hash_block_length( + NTRU_CRYPTO_HASH_CTX *c, /* in - pointer to the hash context */ + uint16_t *blk_len) /* out - address for block length in bytes */ +{ + if (!c || !blk_len) { + HASH_RET(NTRU_CRYPTO_HASH_BAD_PARAMETER); + } + + if (!c->alg_params) { + HASH_RET(NTRU_CRYPTO_HASH_BAD_ALG); + } + + *blk_len = c->alg_params->block_length; + HASH_RET(NTRU_CRYPTO_HASH_OK); +} + +/* ntru_crypto_hash_digest_length + * + * Gets the number of bytes needed to hold the message digest for the + * hash algorithm specified in the hash context. The algorithm must have + * been set in the hash context with a call to ntru_crypto_hash_set_alg() prior + * to calling this function. + * + * Returns NTRU_CRYPTO_HASH_OK on success. + * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set. + */ + +uint32_t +ntru_crypto_hash_digest_length( + NTRU_CRYPTO_HASH_CTX const *c, /* in - pointer to the hash context */ + uint16_t *md_len) /* out - addr for digest length in bytes */ +{ + if (!c || !md_len) { + HASH_RET(NTRU_CRYPTO_HASH_BAD_PARAMETER); + } + + if (!c->alg_params) { + HASH_RET(NTRU_CRYPTO_HASH_BAD_ALG); + } + + *md_len = c->alg_params->digest_length; + HASH_RET(NTRU_CRYPTO_HASH_OK); +} + +/* ntru_crypto_hash_init + * + * This routine performs standard initialization of the hash state. + * + * Returns NTRU_CRYPTO_HASH_OK on success. + * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context. + * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set. + */ + +uint32_t +ntru_crypto_hash_init( + NTRU_CRYPTO_HASH_CTX *c) /* in/out - pointer to hash context */ +{ + if (!c) { + HASH_RET(NTRU_CRYPTO_HASH_BAD_PARAMETER); + } + + if (!c->alg_params) { + HASH_RET(NTRU_CRYPTO_HASH_BAD_ALG); + } + + return c->alg_params->init(&c->alg_ctx); +} + +/* ntru_crypto_hash_update + * + * This routine processes input data and updates the hash calculation. + * + * Returns NTRU_CRYPTO_HASH_OK on success. + * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context. + * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + * Returns NTRU_CRYPTO_HASH_OVERFLOW if too much text has been fed to the + * hash algorithm. The size limit is dependent on the hash algorithm, + * and not all algorithms have this limit. + * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set. + */ + +uint32_t +ntru_crypto_hash_update( + NTRU_CRYPTO_HASH_CTX *c, /* in/out - pointer to hash context */ + uint8_t const *data, /* in - pointer to input data */ + uint32_t data_len) /* in - number of bytes of input data */ +{ + if (!c || (data_len && !data)) { + HASH_RET(NTRU_CRYPTO_HASH_BAD_PARAMETER); + } + + if (!c->alg_params) { + HASH_RET(NTRU_CRYPTO_HASH_BAD_ALG); + } + + return c->alg_params->update(&c->alg_ctx, data, data_len); +} + +/* ntru_crypto_hash_final + * + * This routine completes the hash calculation and returns the message digest. + * + * Returns NTRU_CRYPTO_HASH_OK on success. + * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context. + * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set. + */ + +uint32_t +ntru_crypto_hash_final( + NTRU_CRYPTO_HASH_CTX *c, /* in/out - pointer to hash context */ + uint8_t *md) /* out - address for message digest */ +{ + if (!c || !md) { + HASH_RET(NTRU_CRYPTO_HASH_BAD_PARAMETER); + } + + if (!c->alg_params) { + HASH_RET(NTRU_CRYPTO_HASH_BAD_ALG); + } + + return c->alg_params->final(&c->alg_ctx, md); +} + +/* ntru_crypto_hash_digest + * + * This routine computes a message digest. It is assumed that the + * output buffer md is large enough to hold the output (see + * ntru_crypto_hash_digest_length) + * + * Returns NTRU_CRYPTO_HASH_OK on success. + * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context. + * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + * Returns NTRU_CRYPTO_HASH_OVERFLOW if too much text has been fed to the + * hash algorithm. The size limit is dependent on the hash algorithm, + * and not all algorithms have this limit. + * Returns NTRU_CRYPTO_HASH_BAD_ALG if the specified algorithm is not supported. + */ + +uint32_t +ntru_crypto_hash_digest( + NTRU_CRYPTO_HASH_ALGID algid, /* in - the hash algorithm to use */ + uint8_t const *data, /* in - pointer to input data */ + uint32_t data_len, /* in - number of bytes of input data */ + uint8_t *md) /* out - address for message digest */ +{ + NTRU_CRYPTO_HASH_ALG_PARAMS const *alg_params = get_alg_params(algid); + + if (!alg_params) { + HASH_RET(NTRU_CRYPTO_HASH_BAD_ALG); + } + + if ((data_len && !data) || !md) { + HASH_RET(NTRU_CRYPTO_HASH_BAD_PARAMETER); + } + + return alg_params->digest(data, data_len, md); +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_hash.h b/crypt/liboqs/kex_ntru/ntru_crypto_hash.h new file mode 100644 index 0000000000000000000000000000000000000000..063493d7d5f325748b41db8dbba3985f1c682729 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_hash.h @@ -0,0 +1,201 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_hash.h + * + * Contents: Definitions and declarations for the hash object abstraction. + * + *****************************************************************************/ + +#ifndef NTRU_CRYPTO_HASH_H +#define NTRU_CRYPTO_HASH_H + +#include "ntru_crypto_platform.h" +#include "ntru_crypto_error.h" +#include "ntru_crypto_hash_basics.h" +#include "ntru_crypto_sha1.h" +#include "ntru_crypto_sha256.h" + +/*************** + * error macro * + ***************/ + +#define HASH_RESULT(r) ((uint32_t)((r) ? HASH_ERROR_BASE + (r) : (r))) +#define HASH_RET(r) return HASH_RESULT(r); + +/************************* + * structure definitions * + *************************/ + +/* _NTRU_CRYPTO_HASH_ALG_PARAMS + * + * An opaque forward declaration for a private structure used + * internally by the hash object interface. + */ + +struct _NTRU_CRYPTO_HASH_ALG_PARAMS; + +/* NTRU_CRYPTO_HASH_CTX + * + * Hash object context information. + */ + +typedef struct { + struct _NTRU_CRYPTO_HASH_ALG_PARAMS const *alg_params; + union { + NTRU_CRYPTO_SHA1_CTX sha1; + NTRU_CRYPTO_SHA2_CTX sha256; + } alg_ctx; +} NTRU_CRYPTO_HASH_CTX; + +/************************* + * function declarations * + *************************/ + +/* ntru_crypto_hash_set_alg + * + * Sets the hash algorithm for the hash context. This must be called before + * any calls to crypto_hash_block_length(), crypto_hash_digest_length(), or + * crypto_hash_init() are made. + * + * Returns NTRU_CRYPTO_HASH_OK on success. + * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + * Returns NTRU_CRYPTO_HASH_BAD_ALG if the specified algorithm is not supported. + */ + +extern uint32_t +ntru_crypto_hash_set_alg( + NTRU_CRYPTO_HASH_ALGID algid, /* in - hash algoirithm to be used */ + NTRU_CRYPTO_HASH_CTX *c); /* in/out - pointer to the hash context */ + +/* ntru_crypto_hash_block_length + * + * Gets the number of bytes in an input block for the hash algorithm + * specified in the hash context. The hash algorithm must have been set + * in the hash context with a call to crypto_hash_set_alg() prior to + * calling this function. + * + * Returns NTRU_CRYPTO_HASH_OK on success. + * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set. + */ + +extern uint32_t +ntru_crypto_hash_block_length( + NTRU_CRYPTO_HASH_CTX *c, /* in - pointer to the hash context */ + uint16_t *blk_len); /* out - address for block length in bytes */ + +/* ntru_crypto_hash_digest_length + * + * Gets the number of bytes needed to hold the message digest for the + * hash algorithm specified in the hash context. The algorithm must have + * been set in the hash context with a call to crypto_hash_set_alg() prior + * to calling this function. + * + * Returns NTRU_CRYPTO_HASH_OK on success. + * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set. + */ + +extern uint32_t +ntru_crypto_hash_digest_length( + NTRU_CRYPTO_HASH_CTX const *c, /* in - pointer to the hash context */ + uint16_t *md_len); /*out - addr for digest length in bytes*/ + +/* ntru_crypto_hash_init + * + * This routine initializes the hash state. + * + * Returns NTRU_CRYPTO_HASH_OK on success. + * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context. + * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set. + */ + +extern uint32_t +ntru_crypto_hash_init( + NTRU_CRYPTO_HASH_CTX *c); /* in/out - pointer to hash context */ + +/* ntru_crypto_hash_update + * + * This routine processes input data and updates the hash calculation. + * + * Returns NTRU_CRYPTO_HASH_OK on success. + * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context. + * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + * Returns NTRU_CRYPTO_HASH_OVERFLOW if too much text has been fed to the + * hash algorithm. The size limit is dependent on the hash algorithm, + * and not all algorithms have this limit. + * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set. + */ + +extern uint32_t +ntru_crypto_hash_update( + NTRU_CRYPTO_HASH_CTX *c, /* in/out - pointer to hash context */ + uint8_t const *data, /* in - pointer to input data */ + uint32_t data_len); /* in - number of bytes of input data */ + +/* ntru_crypto_hash_final + * + * This routine completes the hash calculation and returns the message digest. + * + * Returns NTRU_CRYPTO_HASH_OK on success. + * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context. + * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set. + */ + +extern uint32_t +ntru_crypto_hash_final( + NTRU_CRYPTO_HASH_CTX *c, /* in/out - pointer to hash context */ + uint8_t *md); /* out - address for message digest */ + +/* ntru_crypto_hash_digest + * + * This routine computes a message digest. It is assumed that the + * output buffer md is large enough to hold the output (see + * crypto_hash_digest_length) + * + * Returns NTRU_CRYPTO_HASH_OK on success. + * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context. + * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns NTRU_CRYPTO_HASH_OVERFLOW if too much text has been fed to the + * hash algorithm. The size limit is dependent on the hash algorithm, + * and not all algorithms have this limit. + * Returns NTRU_CRYPTO_HASH_BAD_ALG if the specified algorithm is not supported. + */ + +extern uint32_t +ntru_crypto_hash_digest( + NTRU_CRYPTO_HASH_ALGID algid, /* in - the hash algorithm to use */ + uint8_t const *data, /* in - pointer to input data */ + uint32_t data_len, /* in - number of bytes of input data */ + uint8_t *md); /* out - address for message digest */ + +#endif /* NTRU_CRYPTO_HASH_H */ diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_hash_basics.h b/crypt/liboqs/kex_ntru/ntru_crypto_hash_basics.h new file mode 100644 index 0000000000000000000000000000000000000000..703d463eba73aeeb2b8134cc1da55602ceac0f01 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_hash_basics.h @@ -0,0 +1,67 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_hash_basics.h + * + * Contents: Common definitions for all hash algorithms. + * + *****************************************************************************/ + +#ifndef NTRU_CRYPTO_HASH_BASICS_H +#define NTRU_CRYPTO_HASH_BASICS_H + +#include "ntru_crypto_platform.h" + +/************** + * algorithms * + **************/ + +typedef enum { + NTRU_CRYPTO_HASH_ALGID_NONE = 0, + NTRU_CRYPTO_HASH_ALGID_SHA1, + NTRU_CRYPTO_HASH_ALGID_SHA256, +} NTRU_CRYPTO_HASH_ALGID; + +/*************** + * error codes * + ***************/ + +#define NTRU_CRYPTO_HASH_OK ((uint32_t) 0x00) +#define NTRU_CRYPTO_HASH_FAIL ((uint32_t) 0x01) +#define NTRU_CRYPTO_HASH_BAD_PARAMETER ((uint32_t) 0x02) +#define NTRU_CRYPTO_HASH_OVERFLOW ((uint32_t) 0x03) +#define NTRU_CRYPTO_HASH_BAD_ALG ((uint32_t) 0x20) +#define NTRU_CRYPTO_HASH_OUT_OF_MEMORY ((uint32_t) 0x21) + +/* For backward-compatibility */ +typedef uint32_t NTRU_CRYPTO_HASH_ERROR; + +/********* + * flags * + *********/ + +#define HASH_DATA_ONLY 0 +#define HASH_INIT (1 << 0) +#define HASH_FINISH (1 << 1) + +#endif /* NTRU_CRYPTO_HASH_BASICS_H */ diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_hmac.c b/crypt/liboqs/kex_ntru/ntru_crypto_hmac.c new file mode 100644 index 0000000000000000000000000000000000000000..b307df9a33d80deda7e3b5acf7bbf68d31afc877 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_hmac.c @@ -0,0 +1,319 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_hmac.c + * + * Contents: Routines implementing the HMAC hash calculation. + * + *****************************************************************************/ + +#include "ntru_crypto.h" +#include "ntru_crypto_hmac.h" + +/* HMAC context */ + +struct _NTRU_CRYPTO_HMAC_CTX { + NTRU_CRYPTO_HASH_CTX hash_ctx; + uint8_t *k0; + uint16_t blk_len; + uint16_t md_len; +}; + +/* ntru_crypto_hmac_create_ctx + * + * This routine creates an HMAC context, setting the hash algorithm and + * the key to be used. + * + * Returns NTRU_CRYPTO_HMAC_OK if successful. + * Returns NTRU_CRYPTO_HMAC_BAD_ALG if the specified algorithm is not supported. + * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + * Returns NTRU_CRYPTO_HMAC_OUT_OF_MEMORY if memory cannot be allocated. + */ + +uint32_t +ntru_crypto_hmac_create_ctx( + NTRU_CRYPTO_HASH_ALGID algid, /* in - the hash algorithm to be used */ + uint8_t const *key, /* in - pointer to the HMAC key */ + uint32_t key_len, /* in - number of bytes in HMAC key */ + NTRU_CRYPTO_HMAC_CTX **c) /* out - address for pointer to HMAC + context */ +{ + NTRU_CRYPTO_HMAC_CTX *ctx = NULL; + uint32_t result; + + /* check parameters */ + + if (!c || !key) { + HMAC_RET(NTRU_CRYPTO_HMAC_BAD_PARAMETER); + } + + *c = NULL; + + /* allocate memory for an HMAC context */ + if (NULL == (ctx = (NTRU_CRYPTO_HMAC_CTX *) MALLOC(sizeof(NTRU_CRYPTO_HMAC_CTX)))) { + HMAC_RET(NTRU_CRYPTO_HMAC_OUT_OF_MEMORY); + } + + /* set the algorithm */ + + if ((result = ntru_crypto_hash_set_alg(algid, &ctx->hash_ctx))) { + FREE(ctx); + HMAC_RET(NTRU_CRYPTO_HMAC_BAD_ALG); + } + + /* set block length and digest length */ + + if ((result = ntru_crypto_hash_block_length(&ctx->hash_ctx, + &ctx->blk_len)) || + (result = ntru_crypto_hash_digest_length(&ctx->hash_ctx, + &ctx->md_len))) { + FREE(ctx); + return result; + } + + /* allocate memory for K0 */ + if ((ctx->k0 = (uint8_t *) MALLOC(ctx->blk_len)) == NULL) { + FREE(ctx); + HMAC_RET(NTRU_CRYPTO_HMAC_OUT_OF_MEMORY); + } + + /* calculate K0 and store in HMAC context */ + + memset(ctx->k0, 0, ctx->blk_len); + + /* check if key is too large */ + + if (key_len > ctx->blk_len) { + if ((result = ntru_crypto_hash_digest(algid, key, key_len, ctx->k0))) { + memset(ctx->k0, 0, ctx->blk_len); + FREE(ctx->k0); + FREE(ctx); + return result; + } + } else { + memcpy(ctx->k0, key, key_len); + } + + /* return pointer to HMAC context */ + + *c = ctx; + HMAC_RET(NTRU_CRYPTO_HMAC_OK); +} + +/* ntru_crypto_hmac_destroy_ctx + * + * Destroys an HMAC context. + * + * Returns NTRU_CRYPTO_HMAC_OK if successful. + * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + */ + +uint32_t +ntru_crypto_hmac_destroy_ctx( + NTRU_CRYPTO_HMAC_CTX *c) /* in/out - pointer to HMAC context */ +{ + if (!c || !c->k0) { + HMAC_RET(NTRU_CRYPTO_HMAC_BAD_PARAMETER); + } + + /* clear key and release memory */ + + memset(c->k0, 0, c->blk_len); + FREE(c->k0); + FREE(c); + + HMAC_RET(NTRU_CRYPTO_HMAC_OK); +} + +/* ntru_crypto_hmac_get_md_len + * + * This routine gets the digest length of the HMAC. + * + * Returns NTRU_CRYPTO_HMAC_OK on success. + * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + */ + +uint32_t +ntru_crypto_hmac_get_md_len( + NTRU_CRYPTO_HMAC_CTX const *c, /* in - pointer to HMAC context */ + uint16_t *md_len) /* out - address for digest length */ +{ + /* check parameters */ + + if (!c || !md_len) { + HMAC_RET(NTRU_CRYPTO_HMAC_BAD_PARAMETER); + } + + /* get digest length */ + + *md_len = c->md_len; + HMAC_RET(NTRU_CRYPTO_HMAC_OK); +} + +/* ntru_crypto_hmac_set_key + * + * This routine sets a digest-length key into the HMAC context. + * + * Returns NTRU_CRYPTO_HMAC_OK on success. + * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + */ + +uint32_t +ntru_crypto_hmac_set_key( + NTRU_CRYPTO_HMAC_CTX *c, /* in - pointer to HMAC context */ + uint8_t const *key) /* in - pointer to new HMAC key */ +{ + /* check parameters */ + + if (!c || !key) { + HMAC_RET(NTRU_CRYPTO_HMAC_BAD_PARAMETER); + } + + /* copy key */ + + memcpy(c->k0, key, c->md_len); + HMAC_RET(NTRU_CRYPTO_HMAC_OK); +} + +/* ntru_crypto_hmac_init + * + * This routine performs standard initialization of the HMAC state. + * + * Returns NTRU_CRYPTO_HMAC_OK on success. + * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context. + * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + */ + +uint32_t +ntru_crypto_hmac_init( + NTRU_CRYPTO_HMAC_CTX *c) /* in/out - pointer to HMAC context */ +{ + uint32_t result; + int i; + + /* check parameters */ + + if (!c) { + HMAC_RET(NTRU_CRYPTO_HMAC_BAD_PARAMETER); + } + + /* init hash context and compute H(K0 ^ ipad) */ + + for (i = 0; i < c->blk_len; i++) { + c->k0[i] ^= 0x36; /* K0 ^ ipad */ + } + + if ((result = ntru_crypto_hash_init(&c->hash_ctx)) || + (result = ntru_crypto_hash_update(&c->hash_ctx, c->k0, c->blk_len))) { + return result; + } + + HMAC_RET(NTRU_CRYPTO_HMAC_OK); +} + +/* ntru_crypto_hmac_update + * + * This routine processes input data and updates the HMAC hash calculation. + * + * Returns NTRU_CRYPTO_HMAC_OK on success. + * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context. + * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + * Returns NTRU_CRYPTO_HASH_OVERFLOW if more than bytes are hashed than the + * underlying hash algorithm can handle. + */ + +uint32_t +ntru_crypto_hmac_update( + NTRU_CRYPTO_HMAC_CTX *c, /* in/out - pointer to HMAC context */ + const uint8_t *data, /* in - pointer to input data */ + uint32_t data_len) /* in - no. of bytes of input data */ +{ + uint32_t result; + + /* check parameters */ + + if (!c || (data_len && !data)) { + HMAC_RET(NTRU_CRYPTO_HMAC_BAD_PARAMETER); + } + + if ((result = ntru_crypto_hash_update(&c->hash_ctx, data, data_len))) { + return result; + } + + HMAC_RET(NTRU_CRYPTO_HMAC_OK); +} + +/* ntru_crypto_hmac_final + * + * This routine completes the HMAC hash calculation and returns the + * message digest. + * + * Returns NTRU_CRYPTO_HMAC_OK on success. + * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context. + * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + */ + +uint32_t +ntru_crypto_hmac_final( + NTRU_CRYPTO_HMAC_CTX *c, /* in/out - pointer to HMAC context */ + uint8_t *md) /* out - address for message digest */ +{ + uint32_t result = NTRU_CRYPTO_HMAC_OK; + int i; + + /* check parameters */ + + if (!c || !md) { + HMAC_RET(NTRU_CRYPTO_HMAC_BAD_PARAMETER); + } + + /* form K0 ^ opad + * complete md = H((K0 ^ ipad) || data) + * compute md = H((K0 ^ opad) || md) + * re-form K0 + */ + + for (i = 0; i < c->blk_len; i++) { + c->k0[i] ^= (0x36 ^ 0x5c); + } + + if ((result = ntru_crypto_hash_final(&c->hash_ctx, md)) || + (result = ntru_crypto_hash_init(&c->hash_ctx)) || + (result = ntru_crypto_hash_update(&c->hash_ctx, c->k0, c->blk_len)) || + (result = ntru_crypto_hash_update(&c->hash_ctx, md, c->md_len)) || + (result = ntru_crypto_hash_final(&c->hash_ctx, md))) { + } + + for (i = 0; i < c->blk_len; i++) { + c->k0[i] ^= 0x5c; + } + + return result; +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_hmac.h b/crypt/liboqs/kex_ntru/ntru_crypto_hmac.h new file mode 100644 index 0000000000000000000000000000000000000000..8878a7ef80098cb52a13ec6423fd096bd26e5d10 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_hmac.h @@ -0,0 +1,169 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_hmac.h + * + * Contents: Definitions and declarations for the HMAC implementation. + * + *****************************************************************************/ + +#ifndef NTRU_CRYPTO_HMAC_H +#define NTRU_CRYPTO_HMAC_H + +#include "ntru_crypto_platform.h" +#include "ntru_crypto_hash.h" + +/*************** + * error codes * + ***************/ + +#define NTRU_CRYPTO_HMAC_OK ((uint32_t) NTRU_CRYPTO_HASH_OK) +#define NTRU_CRYPTO_HMAC_BAD_PARAMETER ((uint32_t) NTRU_CRYPTO_HASH_BAD_PARAMETER) +#define NTRU_CRYPTO_HMAC_BAD_ALG ((uint32_t) NTRU_CRYPTO_HASH_BAD_ALG) +#define NTRU_CRYPTO_HMAC_OUT_OF_MEMORY ((uint32_t) NTRU_CRYPTO_HASH_OUT_OF_MEMORY) + +#define HMAC_RESULT(e) ((uint32_t)((e) ? HMAC_ERROR_BASE + (e) : (e))) +#define HMAC_RET(e) return HMAC_RESULT(e) + +/************************* + * structure definitions * + *************************/ + +/* HMAC context structure */ + +struct _NTRU_CRYPTO_HMAC_CTX; /* opaque forward reference */ +typedef struct _NTRU_CRYPTO_HMAC_CTX NTRU_CRYPTO_HMAC_CTX; + +/************************* + * function declarations * + *************************/ + +/* ntru_crypto_hmac_create_ctx + * + * This routine creates an HMAC context, setting the hash algorithm and + * the key to be used. + * + * Returns NTRU_CRYPTO_HASH_OK if successful. + * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + * Returns NTRU_CRYPTO_HASH_OUT_OF_MEMORY if memory cannot be allocated. + */ + +extern uint32_t +ntru_crypto_hmac_create_ctx( + NTRU_CRYPTO_HASH_ALGID algid, /* in - the hash algorithm to be used */ + uint8_t const *key, /* in - pointer to the HMAC key */ + uint32_t key_len, /* in - number of bytes in HMAC key */ + NTRU_CRYPTO_HMAC_CTX **c); /* out - address for pointer to HMAC + context */ + +/* ntru_crypto_hmac_destroy_ctx + * + * Destroys an HMAC context. + * + * Returns NTRU_CRYPTO_HASH_OK if successful. + * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + */ + +extern uint32_t +ntru_crypto_hmac_destroy_ctx( + NTRU_CRYPTO_HMAC_CTX *c); /* in/out - pointer to HMAC context */ + +/* ntru_crypto_hmac_get_md_len + * + * This routine gets the digest length of the HMAC. + * + * Returns NTRU_CRYPTO_HMAC_OK on success. + * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + */ + +extern uint32_t +ntru_crypto_hmac_get_md_len( + NTRU_CRYPTO_HMAC_CTX const *c, /* in - pointer to HMAC context */ + uint16_t *md_len); /* out - address for digest length */ + +/* ntru_crypto_hmac_set_key + * + * This routine sets a digest-length key into the HMAC context. + * + * Returns NTRU_CRYPTO_HMAC_OK on success. + * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + */ + +extern uint32_t +ntru_crypto_hmac_set_key( + NTRU_CRYPTO_HMAC_CTX *c, /* in - pointer to HMAC context */ + uint8_t const *key); /* in - pointer to new HMAC key */ + +/* ntru_crypto_hmac_init + * + * This routine performs standard initialization of the HMAC state. + * + * Returns NTRU_CRYPTO_HMAC_OK on success. + * Returns NTRU_CRYPTO_HMAC_FAIL with corrupted context. + * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + */ + +extern uint32_t +ntru_crypto_hmac_init( + NTRU_CRYPTO_HMAC_CTX *c); /* in/out - pointer to HMAC context */ + +/* ntru_crypto_hmac_update + * + * This routine processes input data and updates the HMAC hash calculation. + * + * Returns NTRU_CRYPTO_HMAC_OK on success. + * Returns NTRU_CRYPTO_HMAC_FAIL with corrupted context. + * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + * Returns NTRU_CRYPTO_HMAC_OVERFLOW if more than bytes are hashed than the underlying + * hash algorithm can handle. + */ + +extern uint32_t +ntru_crypto_hmac_update( + NTRU_CRYPTO_HMAC_CTX *c, /* in/out - pointer to HMAC context */ + uint8_t const *data, /* in - pointer to input data */ + uint32_t data_len); /* in - no. of bytes of input data */ + +/* ntru_crypto_hmac_final + * + * This routine completes the HMAC hash calculation and returns the + * message digest. + * + * Returns NTRU_CRYPTO_HMAC_OK on success. + * Returns NTRU_CRYPTO_HMAC_FAIL with corrupted context. + * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are + * passed. + */ + +extern uint32_t +ntru_crypto_hmac_final( + NTRU_CRYPTO_HMAC_CTX *c, /* in/out - pointer to HMAC context */ + uint8_t *md); /* out - address for message digest */ + +#endif /* NTRU_CRYPTO_HMAC_H */ diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_msbyte_uint32.c b/crypt/liboqs/kex_ntru/ntru_crypto_msbyte_uint32.c new file mode 100644 index 0000000000000000000000000000000000000000..12fc971ede649d20649b87284ad2580bbbbac3dd --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_msbyte_uint32.c @@ -0,0 +1,86 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_msbyte_uint32.c + * + * Contents: Routines to convert between an array of bytes in network byte + * order (most-significant byte first) and an array of uint32 words. + * + *****************************************************************************/ + +#include "ntru_crypto.h" +#include "ntru_crypto_msbyte_uint32.h" + +/* ntru_crypto_msbyte_2_uint32() + * + * This routine converts an array of bytes in network byte order to an array + * of uint32_t, placing the first byte in the most significant byte of the + * first uint32_t word. + * + * The number of bytes in the input stream MUST be at least 4 times the + * number of words expected in the output array. + */ + +void ntru_crypto_msbyte_2_uint32( + uint32_t *words, /* out - pointer to the output uint32_t array */ + uint8_t const *bytes, /* in - pointer to the input byte array */ + uint32_t n) /* in - number of words in the output array */ +{ + uint32_t i; + + for (i = 0; i < n; i++) { + words[i] = ((uint32_t)(*bytes++)) << 24; + words[i] |= ((uint32_t)(*bytes++)) << 16; + words[i] |= ((uint32_t)(*bytes++)) << 8; + words[i] |= (uint32_t)(*bytes++); + } + + return; +} + +/* ntru_crypto_uint32_2_msbyte() + * + * This routine converts an array of uint32_t to an array of bytes in + * network byte order, placing the most significant byte of the first uint32_t + * word as the first byte of the output array. + * + * The number of bytes in the output stream will be 4 times the number of words + * specified in the input array. + */ + +void ntru_crypto_uint32_2_msbyte( + uint8_t *bytes, /* out - pointer to the output byte array */ + uint32_t const *words, /* in - pointer to the input uint32_t array */ + uint32_t n) /* in - number of words in the input array */ +{ + uint32_t i; + + for (i = 0; i < n; i++) { + *bytes++ = (uint8_t)(words[i] >> 24); + *bytes++ = (uint8_t)(words[i] >> 16); + *bytes++ = (uint8_t)(words[i] >> 8); + *bytes++ = (uint8_t)(words[i]); + } + + return; +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_msbyte_uint32.h b/crypt/liboqs/kex_ntru/ntru_crypto_msbyte_uint32.h new file mode 100644 index 0000000000000000000000000000000000000000..d4f3599ce7b8050c481cbf57560cee4fe1dc611c --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_msbyte_uint32.h @@ -0,0 +1,68 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_msbyte_uint32.h + * + * Contents: Definitions and declarations for converting between a most- + * significant-first byte stream and a uint32_t array. + * + *****************************************************************************/ + +#ifndef NTRU_CRYPTO_MSBYTE_UINT32_H +#define NTRU_CRYPTO_MSBYTE_UINT32_H + +#include "ntru_crypto_platform.h" + +/* ntru_crypto_msbyte_2_uint32() + * + * This routine converts an array of bytes in network byte order to an array + * of uint32_t, placing the first byte in the most significant byte of the + * first uint32_t word. + * + * The number of bytes in the input stream MUST be at least 4 times the + * number of words expected in the output array. + */ + +extern void +ntru_crypto_msbyte_2_uint32( + uint32_t *words, /* out - pointer to the output uint32_t array */ + uint8_t const *bytes, /* in - pointer to the input byte array */ + uint32_t n); /* in - number of words in the output array */ + +/* ntru_crypto_uint32_2_msbyte() + * + * This routine converts an array of uint32_t to an array of bytes in + * network byte order, placing the most significant byte of the first uint32_t + * word as the first byte of the output array. + * + * The number of bytes in the output stream will be 4 times the number of words + * specified in the input array. + */ + +extern void +ntru_crypto_uint32_2_msbyte( + uint8_t *bytes, /* out - pointer to the output byte array */ + uint32_t const *words, /* in - pointer to the input uint32_t array */ + uint32_t n); /* in - number of words in the input array */ + +#endif /* NTRU_CRYPTO_MSBYTE_UINT32_H */ diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_convert.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_convert.c new file mode 100644 index 0000000000000000000000000000000000000000..d6514056970e43c59ab1feef9dae65c2ddad3c62 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_convert.c @@ -0,0 +1,556 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_ntru_convert.c + * + * Contents: Conversion routines for NTRUEncrypt, including packing, unpacking, + * and others. + * + *****************************************************************************/ + +#include "ntru_crypto.h" +#include "ntru_crypto_ntru_convert.h" + +/* 3-bit to 2-trit conversion tables: 2 represents -1 */ + +static uint8_t const bits_2_trit1[] = {0, 0, 0, 1, 1, 1, 2, 2}; +static uint8_t const bits_2_trit2[] = {0, 1, 2, 0, 1, 2, 0, 1}; + +/* ntru_bits_2_trits + * + * Each 3 bits in an array of octets is converted to 2 trits in an array + * of trits. + * + * The octet array may overlap the end of the trit array. + */ + +void ntru_bits_2_trits( + uint8_t const *octets, /* in - pointer to array of octets */ + uint16_t num_trits, /* in - number of trits to produce */ + uint8_t *trits) /* out - address for array of trits */ +{ + uint32_t bits24; + uint32_t bits3; + uint32_t shift; + + while (num_trits >= 16) { + /* get next three octets */ + + bits24 = ((uint32_t)(*octets++)) << 16; + bits24 |= ((uint32_t)(*octets++)) << 8; + bits24 |= (uint32_t)(*octets++); + + /* for each 3 bits in the three octets, output 2 trits */ + + bits3 = (bits24 >> 21) & 0x7; + *trits++ = bits_2_trit1[bits3]; + *trits++ = bits_2_trit2[bits3]; + + bits3 = (bits24 >> 18) & 0x7; + *trits++ = bits_2_trit1[bits3]; + *trits++ = bits_2_trit2[bits3]; + + bits3 = (bits24 >> 15) & 0x7; + *trits++ = bits_2_trit1[bits3]; + *trits++ = bits_2_trit2[bits3]; + + bits3 = (bits24 >> 12) & 0x7; + *trits++ = bits_2_trit1[bits3]; + *trits++ = bits_2_trit2[bits3]; + + bits3 = (bits24 >> 9) & 0x7; + *trits++ = bits_2_trit1[bits3]; + *trits++ = bits_2_trit2[bits3]; + + bits3 = (bits24 >> 6) & 0x7; + *trits++ = bits_2_trit1[bits3]; + *trits++ = bits_2_trit2[bits3]; + + bits3 = (bits24 >> 3) & 0x7; + *trits++ = bits_2_trit1[bits3]; + *trits++ = bits_2_trit2[bits3]; + + bits3 = bits24 & 0x7; + *trits++ = bits_2_trit1[bits3]; + *trits++ = bits_2_trit2[bits3]; + + num_trits -= 16; + } + + if (num_trits == 0) { + return; + } + + /* get three octets */ + + bits24 = ((uint32_t)(*octets++)) << 16; + bits24 |= ((uint32_t)(*octets++)) << 8; + bits24 |= (uint32_t)(*octets++); + + shift = 21; + while (num_trits) { + /* for each 3 bits in the three octets, output up to 2 trits + * until all trits needed are produced + */ + + bits3 = (bits24 >> shift) & 0x7; + shift -= 3; + *trits++ = bits_2_trit1[bits3]; + + if (--num_trits) { + *trits++ = bits_2_trit2[bits3]; + --num_trits; + } + } + + return; +} + +/* ntru_trits_2_bits + * + * Each 2 trits in an array of trits is converted to 3 bits, and the bits + * are packed in an array of octets. A multiple of 3 octets is output. + * Any bits in the final octets not derived from trits are zero. + * + * Returns TRUE if all trits were valid. + * Returns FALSE if invalid trits were found. + */ + +bool ntru_trits_2_bits( + uint8_t const *trits, /* in - pointer to array of trits */ + uint32_t num_trits, /* in - number of trits to convert */ + uint8_t *octets) /* out - address for array of octets */ +{ + bool all_trits_valid = TRUE; + uint32_t bits24; + uint32_t bits3; + uint32_t shift; + + while (num_trits >= 16) { + + /* convert each 2 trits to 3 bits and pack */ + + bits3 = *trits++ * 3; + bits3 += *trits++; + + if (bits3 > 7) { + bits3 = 7; + all_trits_valid = FALSE; + } + + bits24 = (bits3 << 21); + bits3 = *trits++ * 3; + bits3 += *trits++; + + if (bits3 > 7) { + bits3 = 7; + all_trits_valid = FALSE; + } + + bits24 |= (bits3 << 18); + bits3 = *trits++ * 3; + bits3 += *trits++; + + if (bits3 > 7) { + bits3 = 7; + all_trits_valid = FALSE; + } + + bits24 |= (bits3 << 15); + bits3 = *trits++ * 3; + bits3 += *trits++; + + if (bits3 > 7) { + bits3 = 7; + all_trits_valid = FALSE; + } + + bits24 |= (bits3 << 12); + bits3 = *trits++ * 3; + bits3 += *trits++; + + if (bits3 > 7) { + bits3 = 7; + all_trits_valid = FALSE; + } + + bits24 |= (bits3 << 9); + bits3 = *trits++ * 3; + bits3 += *trits++; + + if (bits3 > 7) { + bits3 = 7; + all_trits_valid = FALSE; + } + + bits24 |= (bits3 << 6); + bits3 = *trits++ * 3; + bits3 += *trits++; + + if (bits3 > 7) { + bits3 = 7; + all_trits_valid = FALSE; + } + + bits24 |= (bits3 << 3); + bits3 = *trits++ * 3; + bits3 += *trits++; + + if (bits3 > 7) { + bits3 = 7; + all_trits_valid = FALSE; + } + + bits24 |= bits3; + num_trits -= 16; + + /* output three octets */ + + *octets++ = (uint8_t)((bits24 >> 16) & 0xff); + *octets++ = (uint8_t)((bits24 >> 8) & 0xff); + *octets++ = (uint8_t)(bits24 & 0xff); + } + + bits24 = 0; + shift = 21; + + while (num_trits) { + + /* convert each 2 trits to 3 bits and pack */ + + bits3 = *trits++ * 3; + + if (--num_trits) { + bits3 += *trits++; + --num_trits; + } + + if (bits3 > 7) { + bits3 = 7; + all_trits_valid = FALSE; + } + + bits24 |= (bits3 << shift); + shift -= 3; + } + + /* output three octets */ + + *octets++ = (uint8_t)((bits24 >> 16) & 0xff); + *octets++ = (uint8_t)((bits24 >> 8) & 0xff); + *octets++ = (uint8_t)(bits24 & 0xff); + + return all_trits_valid; +} + +/* ntru_coeffs_mod4_2_octets + * + * Takes an array of ring element coefficients mod 4 and packs the + * results into an octet string. + */ + +void ntru_coeffs_mod4_2_octets( + uint16_t num_coeffs, /* in - number of coefficients */ + uint16_t const *coeffs, /* in - pointer to coefficients */ + uint8_t *octets) /* out - address for octets */ +{ + uint8_t bits2; + int shift; + uint16_t i; + + *octets = 0; + shift = 6; + for (i = 0; i < num_coeffs; i++) { + bits2 = (uint8_t)(coeffs[i] & 0x3); + *octets |= bits2 << shift; + shift -= 2; + + if (shift < 0) { + ++octets; + *octets = 0; + shift = 6; + } + } + + return; +} + +/* ntru_trits_2_octet + * + * Packs 5 trits in an octet, where a trit is 0, 1, or 2 (-1). + */ + +void ntru_trits_2_octet( + uint8_t const *trits, /* in - pointer to trits */ + uint8_t *octet) /* out - address for octet */ +{ + int i; + + *octet = 0; + for (i = 4; i >= 0; i--) { + *octet = (*octet * 3) + trits[i]; + } + + return; +} + +/* ntru_octet_2_trits + * + * Unpacks an octet to 5 trits, where a trit is 0, 1, or 2 (-1). + */ + +void ntru_octet_2_trits( + uint8_t octet, /* in - octet to be unpacked */ + uint8_t *trits) /* out - address for trits */ +{ + int i; + + for (i = 0; i < 5; i++) { + trits[i] = octet % 3; + octet = (octet - trits[i]) / 3; + } + + return; +} + +/* ntru_indices_2_trits + * + * Converts a list of the nonzero indices of a polynomial into an array of + * trits. + */ + +void ntru_indices_2_trits( + uint16_t in_len, /* in - no. of indices */ + uint16_t const *in, /* in - pointer to list of indices */ + bool plus1, /* in - if list is +1 cofficients */ + uint8_t *out) /* out - address of output polynomial */ +{ + uint8_t trit = plus1 ? 1 : 2; + uint16_t i; + + for (i = 0; i < in_len; i++) { + out[in[i]] = trit; + } + + return; +} + +/* ntru_packed_trits_2_indices + * + * Unpacks an array of N trits and creates a list of array indices + * corresponding to trits = +1, and list of array indices corresponding to + * trits = -1. + */ + +void ntru_packed_trits_2_indices( + uint8_t const *in, /* in - pointer to packed-trit octets */ + uint16_t num_trits, /* in - no. of packed trits */ + uint16_t *indices_plus1, /* out - address for indices of +1 trits */ + uint16_t *indices_minus1) /* out - address for indices of -1 trits */ +{ + uint8_t trits[5]; + uint16_t i = 0; + int j; + + while (num_trits >= 5) { + ntru_octet_2_trits(*in++, trits); + num_trits -= 5; + + for (j = 0; j < 5; j++, i++) { + if (trits[j] == 1) { + *indices_plus1 = i; + ++indices_plus1; + } else if (trits[j] == 2) { + *indices_minus1 = i; + ++indices_minus1; + } else { + ; + } + } + } + + if (num_trits) { + ntru_octet_2_trits(*in, trits); + + for (j = 0; num_trits && (j < 5); j++, i++) { + if (trits[j] == 1) { + *indices_plus1 = i; + ++indices_plus1; + } else if (trits[j] == 2) { + *indices_minus1 = i; + ++indices_minus1; + } else { + ; + } + + --num_trits; + } + } + + return; +} + +/* ntru_indices_2_packed_trits + * + * Takes a list of array indices corresponding to elements whose values + * are +1 or -1, and packs the N-element array of trits described by these + * lists into octets, 5 trits per octet. + */ + +void ntru_indices_2_packed_trits( + uint16_t const *indices, /* in - pointer to indices */ + uint16_t num_plus1, /* in - no. of indices for +1 trits */ + uint16_t num_minus1, /* in - no. of indices for -1 trits */ + uint16_t num_trits, /* in - N, no. of trits in array */ + uint8_t *buf, /* in - temp buf, N octets */ + uint8_t *out) /* out - address for packed octets */ +{ + + /* convert indices to an array of trits */ + + memset(buf, 0, num_trits); + ntru_indices_2_trits(num_plus1, indices, TRUE, buf); + ntru_indices_2_trits(num_minus1, indices + num_plus1, FALSE, buf); + + /* pack the array of trits */ + + while (num_trits >= 5) { + ntru_trits_2_octet(buf, out); + num_trits -= 5; + buf += 5; + ++out; + } + + if (num_trits) { + uint8_t trits[5]; + + memcpy(trits, buf, num_trits); + memset(trits + num_trits, 0, sizeof(trits) - num_trits); + ntru_trits_2_octet(trits, out); + } + + return; +} + +/* ntru_elements_2_octets + * + * Packs an array of n-bit elements into an array of + * ((in_len * n_bits) + 7) / 8 octets. + * NOTE: Assumes 8 < n_bits < 16. + */ + +void ntru_elements_2_octets( + uint16_t in_len, /* in - no. of elements to be packed */ + uint16_t const *in, /* in - ptr to elements to be packed */ + uint8_t n_bits, /* in - no. of bits in input element */ + uint8_t *out) /* out - addr for output octets */ +{ + uint16_t temp; + uint16_t shift; + uint16_t i; + + /* pack */ + + temp = 0; + shift = n_bits - 8; + i = 0; + while (i < in_len) { + /* add bits to temp to fill an octet and output the octet */ + temp |= in[i] >> shift; + *out++ = (uint8_t)(temp & 0xff); + if (shift > 8) { + /* next full octet is in current input word */ + + shift = shift - 8; + temp = 0; + } else { + shift = 8 - shift; + /* put remaining bits of input word in temp as partial octet, + * and increment index to next input word + */ + temp = in[i] << shift; + shift = n_bits - shift; + + ++i; + } + } + + /* output any bits remaining in last input word */ + + if (shift != n_bits - 8) { + *out++ = (uint8_t)(temp & 0xff); + } + + return; +} + +/* ntru_octets_2_elements + * + * Unpacks an octet string into an array of ((in_len * 8) / n_bits) + * n-bit elements. Any extra bits are discarded. + * NOTE: Assumes 8 < n_bits < 16. + */ + +void ntru_octets_2_elements( + uint16_t in_len, /* in - no. of octets to be unpacked */ + uint8_t const *in, /* in - ptr to octets to be unpacked */ + uint8_t n_bits, /* in - no. of bits in output element */ + uint16_t *out) /* out - addr for output elements */ +{ + uint16_t temp; + uint16_t mask; + uint16_t shift; + uint16_t i; + + /* unpack */ + + temp = 0; + mask = (1 << n_bits) - 1; + shift = n_bits; + i = 0; + + while (i < in_len) { + if (shift > 8) { + /* the current octet will not fill the current element */ + + shift = shift - 8; + temp |= ((uint16_t) in[i]) << shift; + } else { + /* add bits from the current octet to fill the current element and + * output the element + */ + + shift = 8 - shift; + + temp |= ((uint16_t) in[i]) >> shift; + *out++ = temp & mask; + + /* add the remaining bits of the current octet to start an element */ + shift = n_bits - shift; + temp = ((uint16_t) in[i]) << shift; + } + ++i; + } + + return; +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_convert.h b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_convert.h new file mode 100644 index 0000000000000000000000000000000000000000..73ddd7e46fb9c19bbd0ad05bd130bed305a6441e --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_convert.h @@ -0,0 +1,167 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_ntru_convert.h + * + * Contents: Definitions and declarations for conversion routines + * for NTRUEncrypt, including packing, unpacking and others. + * + *****************************************************************************/ + +#ifndef NTRU_CRYPTO_NTRU_CONVERT_H +#define NTRU_CRYPTO_NTRU_CONVERT_H + +#include "ntru_crypto.h" + +/* function declarations */ + +/* ntru_bits_2_trits + * + * Each 3 bits in an array of octets is converted to 2 trits in an array + * of trits. + */ + +extern void +ntru_bits_2_trits( + uint8_t const *octets, /* in - pointer to array of octets */ + uint16_t num_trits, /* in - number of trits to produce */ + uint8_t *trits); /* out - address for array of trits */ + +/* ntru_trits_2_bits + * + * Each 2 trits in an array of trits is converted to 3 bits, and the bits + * are packed in an array of octets. A multiple of 3 octets is output. + * Any bits in the final octets not derived from trits are zero. + * + * Returns TRUE if all trits were valid. + * Returns FALSE if invalid trits were found. + */ + +extern bool +ntru_trits_2_bits( + uint8_t const *trits, /* in - pointer to array of trits */ + uint32_t num_trits, /* in - number of trits to convert */ + uint8_t *octets); /* out - address for array of octets */ + +/* ntru_coeffs_mod4_2_octets + * + * Takes an array of coefficients mod 4 and packs the results into an + * octet string. + */ + +extern void +ntru_coeffs_mod4_2_octets( + uint16_t num_coeffs, /* in - number of coefficients */ + uint16_t const *coeffs, /* in - pointer to coefficients */ + uint8_t *octets); /* out - address for octets */ + +/* ntru_trits_2_octet + * + * Packs 5 trits in an octet, where a trit is 0, 1, or 2 (-1). + */ + +extern void +ntru_trits_2_octet( + uint8_t const *trits, /* in - pointer to trits */ + uint8_t *octet); /* out - address for octet */ + +/* ntru_octet_2_trits + * + * Unpacks an octet to 5 trits, where a trit is 0, 1, or 2 (-1). + */ + +extern void +ntru_octet_2_trits( + uint8_t octet, /* in - octet to be unpacked */ + uint8_t *trits); /* out - address for trits */ + +/* ntru_indices_2_trits + * + * Converts a list of the nonzero indices of a polynomial into an array of + * trits. + */ + +extern void +ntru_indices_2_trits( + uint16_t in_len, /* in - no. of indices */ + uint16_t const *in, /* in - pointer to list of indices */ + bool plus1, /* in - if list is +1 coefficients */ + uint8_t *out); /* out - address of output polynomial */ + +/* ntru_packed_trits_2_indices + * + * Unpacks an array of N trits and creates a list of array indices + * corresponding to trits = +1, and list of array indices corresponding to + * trits = -1. + */ + +extern void +ntru_packed_trits_2_indices( + uint8_t const *in, /* in - pointer to packed-trit octets */ + uint16_t num_trits, /* in - no. of packed trits */ + uint16_t *indices_plus1, /* out - address for indices of +1 trits */ + uint16_t *indices_minus1); /* out - address for indices of -1 trits */ + +/* ntru_indices_2_packed_trits + * + * Takes a list of array indices corresponding to elements whose values + * are +1 or -1, and packs the N-element array of trits described by these + * lists into octets, 5 trits per octet. + */ + +extern void +ntru_indices_2_packed_trits( + uint16_t const *indices, /* in - pointer to indices */ + uint16_t num_plus1, /* in - no. of indices for +1 trits */ + uint16_t num_minus1, /* in - no. of indices for -1 trits */ + uint16_t num_trits, /* in - N, no. of trits in array */ + uint8_t *buf, /* in - temp buf, N octets */ + uint8_t *out); /* out - address for packed octets */ + +/* ntru_elements_2_octets + * + * Packs an array of n-bit elements into an array of + * ((in_len * n_bits) + 7) / 8 octets, 8 < n_bits < 16. + */ + +extern void +ntru_elements_2_octets( + uint16_t in_len, /* in - no. of elements to be packed */ + uint16_t const *in, /* in - ptr to elements to be packed */ + uint8_t n_bits, /* in - no. of bits in input element */ + uint8_t *out); /* out - addr for output octets */ + +/* ntru_octets_2_elements + * + * Unpacks an octet string into an array of ((in_len * 8) / n_bits) + * n-bit elements, 8 < n < 16. Any extra bits are discarded. + */ + +extern void +ntru_octets_2_elements( + uint16_t in_len, /* in - no. of octets to be unpacked */ + uint8_t const *in, /* in - ptr to octets to be unpacked */ + uint8_t n_bits, /* in - no. of bits in output element */ + uint16_t *out); /* out - addr for output elements */ + +#endif /* NTRU_CRYPTO_NTRU_CONVERT_H */ diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt.c new file mode 100644 index 0000000000000000000000000000000000000000..8d93985125461345dc1b1d06a1dbc7348110411a --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt.c @@ -0,0 +1,1395 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_ntru_encrypt.c + * + * Contents: Routines implementing NTRUEncrypt encryption and decryption and + * key generation. + * + *****************************************************************************/ + +#include "ntru_crypto.h" +#include "ntru_crypto_ntru_encrypt_param_sets.h" +#include "ntru_crypto_ntru_encrypt_key.h" +#include "ntru_crypto_ntru_convert.h" +#include "ntru_crypto_ntru_poly.h" +#include "ntru_crypto_ntru_mgf1.h" +#include "ntru_crypto_drbg.h" + +/* ntru_crypto_ntru_encrypt + * + * Implements NTRU encryption (SVES) for the parameter set specified in + * the public key blob. + * + * Before invoking this function, a DRBG must be instantiated using + * ntru_crypto_drbg_instantiate() to obtain a DRBG handle, and in that + * instantiation the requested security strength must be at least as large + * as the security strength of the NTRU parameter set being used. + * Failure to instantiate the DRBG with the proper security strength will + * result in this function returning DRBG_ERROR_BASE + DRBG_BAD_LENGTH. + * + * The required minimum size of the output ciphertext buffer (ct) may be + * queried by invoking this function with ct = NULL. In this case, no + * encryption is performed, NTRU_OK is returned, and the required minimum + * size for ct is returned in ct_len. + * + * When ct != NULL, at invocation *ct_len must be the size of the ct buffer. + * Upon return it is the actual size of the ciphertext. + * + * Returns NTRU_OK if successful. + * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if the DRBG handle is invalid. + * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer + * (other than ct) is NULL. + * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if a length argument + * (pubkey_blob_len or pt_len) is zero, or if pt_len exceeds the + * maximum plaintext length for the parameter set. + * Returns NTRU_ERROR_BASE + NTRU_BAD_PUBLIC_KEY if the public-key blob is + * invalid (unknown format, corrupt, bad length). + * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if the ciphertext buffer + * is too small. + * Returns NTRU_ERROR_BASE + NTRU_NO_MEMORY if memory needed cannot be + * allocated from the heap. + */ + +uint32_t +ntru_crypto_ntru_encrypt( + DRBG_HANDLE drbg_handle, /* in - handle of DRBG */ + uint16_t pubkey_blob_len, /* in - no. of octets in public key + blob */ + uint8_t const *pubkey_blob, /* in - pointer to public key */ + uint16_t pt_len, /* in - no. of octets in plaintext */ + uint8_t const *pt, /* in - pointer to plaintext */ + uint16_t *ct_len, /* in/out - no. of octets in ct, addr for + no. of octets in ciphertext */ + uint8_t *ct) /* out - address for ciphertext */ +{ + NTRU_ENCRYPT_PARAM_SET *params = NULL; + uint8_t const *pubkey_packed = NULL; + uint8_t pubkey_pack_type = 0x00; + uint16_t packed_ct_len; + size_t scratch_buf_len; + uint32_t dr; + uint32_t dr1 = 0; + uint32_t dr2 = 0; + uint32_t dr3 = 0; + uint16_t num_scratch_polys; + uint16_t pad_deg; + uint16_t ring_mult_tmp_len; + uint16_t *scratch_buf = NULL; + uint16_t *ringel_buf = NULL; + uint16_t *r_buf = NULL; + uint8_t *b_buf = NULL; + uint8_t *tmp_buf = NULL; + bool msg_rep_good = FALSE; + NTRU_CRYPTO_HASH_ALGID hash_algid; + uint8_t md_len; + uint16_t mod_q_mask; + uint32_t result = NTRU_OK; + + /* check for bad parameters */ + + if (!pubkey_blob || !ct_len) { + NTRU_RET(NTRU_BAD_PARAMETER); + } + + if (pubkey_blob_len == 0) { + NTRU_RET(NTRU_BAD_LENGTH); + } + + /* get a pointer to the parameter-set parameters, the packing type for + * the public key, and a pointer to the packed public key + */ + + if (!ntru_crypto_ntru_encrypt_key_parse(TRUE /* pubkey */, pubkey_blob_len, + pubkey_blob, &pubkey_pack_type, + NULL, ¶ms, &pubkey_packed, + NULL)) { + NTRU_RET(NTRU_BAD_PUBLIC_KEY); + } + + if (params->q_bits <= 8 || params->q_bits >= 16 || pubkey_pack_type != NTRU_ENCRYPT_KEY_PACKED_COEFFICIENTS) { + NTRU_RET(NTRU_UNSUPPORTED_PARAM_SET); + } + + /* return the ciphertext size if requested */ + + packed_ct_len = (params->N * params->q_bits + 7) >> 3; + + if (!ct) { + *ct_len = packed_ct_len; + NTRU_RET(NTRU_OK); + } + + /* check the ciphertext buffer size */ + + if (*ct_len < packed_ct_len) { + NTRU_RET(NTRU_BUFFER_TOO_SMALL); + } + + /* check that a plaintext was provided */ + + if (!pt) { + NTRU_RET(NTRU_BAD_PARAMETER); + } + + /* check the plaintext length */ + + if (pt_len > params->m_len_max) { + NTRU_RET(NTRU_BAD_LENGTH); + } + + /* allocate memory for all operations */ + + ntru_ring_mult_indices_memreq(params->N, &num_scratch_polys, &pad_deg); + + if (params->is_product_form) { + dr1 = params->dF_r & 0xff; + dr2 = (params->dF_r >> 8) & 0xff; + dr3 = (params->dF_r >> 16) & 0xff; + dr = dr1 + dr2 + dr3; + num_scratch_polys += 1; /* mult_product_indices needs space for a + mult_indices and one intermediate result */ + } else { + dr = params->dF_r; + } + ring_mult_tmp_len = num_scratch_polys * pad_deg; + + scratch_buf_len = (ring_mult_tmp_len << 1) + + /* X-byte temp buf for ring mult and + other intermediate results */ + (pad_deg << 1) + /* 2N-byte buffer for ring elements + and overflow from temp buffer */ + (dr << 2) + /* buffer for r indices */ + params->b_len; + /* buffer for b */ + scratch_buf = MALLOC(scratch_buf_len); + if (!scratch_buf) { + NTRU_RET(NTRU_OUT_OF_MEMORY); + } + + ringel_buf = scratch_buf + ring_mult_tmp_len; + r_buf = ringel_buf + pad_deg; + b_buf = (uint8_t *) (r_buf + (dr << 1)); + tmp_buf = (uint8_t *) scratch_buf; + + /* set hash algorithm and seed length based on security strength */ + + if (params->hash_algid == NTRU_CRYPTO_HASH_ALGID_SHA1) { + hash_algid = NTRU_CRYPTO_HASH_ALGID_SHA1; + md_len = SHA_1_MD_LEN; + } else if (params->hash_algid == NTRU_CRYPTO_HASH_ALGID_SHA256) { + hash_algid = NTRU_CRYPTO_HASH_ALGID_SHA256; + md_len = SHA_256_MD_LEN; + } else { + FREE(scratch_buf); + NTRU_RET(NTRU_UNSUPPORTED_PARAM_SET); + } + + /* set constants */ + + mod_q_mask = params->q - 1; + + /* loop until a message representative with proper weight is achieved */ + + do { + uint8_t *ptr = tmp_buf; + + /* get b */ + result = ntru_crypto_drbg_generate(drbg_handle, + params->sec_strength_len << 3, + params->b_len, b_buf); + + if (result == NTRU_OK) { + /* form sData (OID || m || b || hTrunc) */ + + memcpy(ptr, params->OID, 3); + ptr += 3; + memcpy(ptr, pt, pt_len); + ptr += pt_len; + memcpy(ptr, b_buf, params->b_len); + ptr += params->b_len; + memcpy(ptr, pubkey_packed, params->sec_strength_len); + ptr += params->sec_strength_len; + + /* generate r */ + + result = ntru_gen_poly(hash_algid, md_len, + params->min_IGF_hash_calls, + (uint16_t)(ptr - tmp_buf), + tmp_buf, tmp_buf, + params->N, params->c_bits, + params->no_bias_limit, + params->is_product_form, + params->dF_r << 1, r_buf); + } + + if (result == NTRU_OK) { + uint16_t pubkey_packed_len; + + /* unpack the public key */ + pubkey_packed_len = (params->N * params->q_bits + 7) >> 3; + ntru_octets_2_elements(pubkey_packed_len, pubkey_packed, + params->q_bits, ringel_buf); + + /* form R = h * r */ + + if (params->is_product_form) { + ntru_ring_mult_product_indices(ringel_buf, (uint16_t) dr1, + (uint16_t) dr2, (uint16_t) dr3, + r_buf, params->N, params->q, + scratch_buf, ringel_buf); + } else { + ntru_ring_mult_indices(ringel_buf, (uint16_t) dr, (uint16_t) dr, + r_buf, params->N, params->q, + scratch_buf, ringel_buf); + } + + /* form R mod 4 */ + + ntru_coeffs_mod4_2_octets(params->N, ringel_buf, tmp_buf); + + /* form mask */ + + result = ntru_mgftp1(hash_algid, md_len, + params->min_MGF_hash_calls, + (params->N + 3) / 4, tmp_buf, + tmp_buf + params->N, params->N, tmp_buf); + } + + if (result == NTRU_OK) { + uint8_t *Mtrin_buf = tmp_buf + params->N; + uint8_t *M_buf = Mtrin_buf + params->N - + (params->b_len + params->m_len_len + + params->m_len_max + 2); + uint16_t i; + + /* form the padded message M */ + + ptr = M_buf; + memcpy(ptr, b_buf, params->b_len); + ptr += params->b_len; + if (params->m_len_len == 2) + *ptr++ = (uint8_t)((pt_len >> 8) & 0xff); + *ptr++ = (uint8_t)(pt_len & 0xff); + memcpy(ptr, pt, pt_len); + ptr += pt_len; + + /* add an extra zero byte in case without it the bit string + * is not a multiple of 3 bits and therefore might not be + * able to produce enough trits + */ + + memset(ptr, 0, params->m_len_max - pt_len + 2); + + /* convert M to trits (Mbin to Mtrin) */ + + ntru_bits_2_trits(M_buf, params->N, Mtrin_buf); + + /* form the msg representative m' by adding Mtrin to mask, mod p */ + + for (i = 0; i < params->N; i++) { + tmp_buf[i] = tmp_buf[i] + Mtrin_buf[i]; + + if (tmp_buf[i] >= 3) { + tmp_buf[i] -= 3; + } + } + + /* check that message representative meets minimum weight + * requirements + */ + msg_rep_good = ntru_poly_check_min_weight(params->N, tmp_buf, + params->min_msg_rep_wt); + } + } while ((result == NTRU_OK) && !msg_rep_good); + + if (result == NTRU_OK) { + uint16_t i; + + /* form ciphertext e by adding m' to R mod q */ + + for (i = 0; i < params->N; i++) { + if (tmp_buf[i] == 1) { + ringel_buf[i] = (ringel_buf[i] + 1) & mod_q_mask; + } else if (tmp_buf[i] == 2) { + ringel_buf[i] = (ringel_buf[i] - 1) & mod_q_mask; + } else { + ; + } + } + + /* pack ciphertext */ + + ntru_elements_2_octets(params->N, ringel_buf, params->q_bits, ct); + *ct_len = packed_ct_len; + } + + /* cleanup */ + + memset(scratch_buf, 0, scratch_buf_len); + FREE(scratch_buf); + + return result; +} + +/* ntru_crypto_ntru_decrypt + * + * Implements NTRU decryption (SVES) for the parameter set specified in + * the private key blob. + * + * The maximum size of the output plaintext may be queried by invoking + * this function with pt = NULL. In this case, no decryption is performed, + * NTRU_OK is returned, and the maximum size the plaintext could be is + * returned in pt_len. + * Note that until the decryption is performed successfully, the actual size + * of the resulting plaintext cannot be known. + * + * When pt != NULL, at invocation *pt_len must be the size of the pt buffer. + * Upon return it is the actual size of the plaintext. + * + * Returns NTRU_OK if successful. + * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer + * (other than pt) is NULL. + * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if a length argument + * (privkey_blob) is zero, or if ct_len is invalid for the parameter set. + * Returns NTRU_ERROR_BASE + NTRU_BAD_PRIVATE_KEY if the private-key blob is + * invalid (unknown format, corrupt, bad length). + * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if the plaintext buffer + * is too small. + * Returns NTRU_ERROR_BASE + NTRU_NO_MEMORY if memory needed cannot be + * allocated from the heap. + * Returns NTRU_ERROR_BASE + NTRU_FAIL if a decryption error occurs. + */ + +uint32_t +ntru_crypto_ntru_decrypt( + uint16_t privkey_blob_len, /* in - no. of octets in private key + blob */ + uint8_t const *privkey_blob, /* in - pointer to private key */ + uint16_t ct_len, /* in - no. of octets in ciphertext */ + uint8_t const *ct, /* in - pointer to ciphertext */ + uint16_t *pt_len, /* in/out - no. of octets in pt, addr for + no. of octets in plaintext */ + uint8_t *pt) /* out - address for plaintext */ +{ + NTRU_ENCRYPT_PARAM_SET *params = NULL; + uint8_t const *privkey_packed = NULL; + uint8_t const *pubkey_packed = NULL; + uint8_t privkey_pack_type = 0x00; + uint8_t pubkey_pack_type = 0x00; + size_t scratch_buf_len; + uint32_t dF_r; + uint32_t dF_r1 = 0; + uint32_t dF_r2 = 0; + uint32_t dF_r3 = 0; + uint16_t num_scratch_polys; + uint16_t pad_deg; + uint16_t ring_mult_tmp_len; + uint16_t *scratch_buf = NULL; + uint16_t *ringel_buf1 = NULL; + uint16_t *ringel_buf2 = NULL; + uint16_t *i_buf = NULL; + uint8_t *m_buf = NULL; + uint8_t *tmp_buf = NULL; + uint8_t *Mtrin_buf = NULL; + uint8_t *M_buf = NULL; + uint8_t *ptr = NULL; + NTRU_CRYPTO_HASH_ALGID hash_algid; + uint8_t md_len; + uint16_t mod_q_mask; + uint16_t q_mod_p; + uint16_t cm_len = 0; + uint16_t num_zeros; + uint16_t i; + bool decryption_ok = TRUE; + uint32_t result = NTRU_OK; + + /* check for bad parameters */ + + if (!privkey_blob || !pt_len) { + NTRU_RET(NTRU_BAD_PARAMETER); + } + + if (privkey_blob_len == 0) { + NTRU_RET(NTRU_BAD_LENGTH); + } + + /* get a pointer to the parameter-set parameters, the packing types for + * the public and private keys, and pointers to the packed public and + * private keys + */ + + if (!ntru_crypto_ntru_encrypt_key_parse(FALSE /* privkey */, + privkey_blob_len, + privkey_blob, &pubkey_pack_type, + &privkey_pack_type, ¶ms, + &pubkey_packed, &privkey_packed)) { + NTRU_RET(NTRU_BAD_PRIVATE_KEY); + } + + if (params->q_bits <= 8 || params->q_bits >= 16 || params->N_bits <= 8 || params->N_bits >= 16 || pubkey_pack_type != NTRU_ENCRYPT_KEY_PACKED_COEFFICIENTS || (privkey_pack_type != NTRU_ENCRYPT_KEY_PACKED_TRITS && privkey_pack_type != NTRU_ENCRYPT_KEY_PACKED_INDICES)) { + NTRU_RET(NTRU_UNSUPPORTED_PARAM_SET); + } + + /* return the max plaintext size if requested */ + + if (!pt) { + *pt_len = params->m_len_max; + NTRU_RET(NTRU_OK); + } + + /* check that a ciphertext was provided */ + + if (!ct) { + NTRU_RET(NTRU_BAD_PARAMETER); + } + + /* cannot check the plaintext buffer size until after the plaintext + * is derived, if we allow plaintext buffers only as large as the + * actual plaintext + */ + + /* check the ciphertext length */ + + if (ct_len != (params->N * params->q_bits + 7) >> 3) { + NTRU_RET(NTRU_BAD_LENGTH); + } + + /* allocate memory for all operations */ + + ntru_ring_mult_indices_memreq(params->N, &num_scratch_polys, &pad_deg); + + if (params->is_product_form) { + dF_r1 = params->dF_r & 0xff; + dF_r2 = (params->dF_r >> 8) & 0xff; + dF_r3 = (params->dF_r >> 16) & 0xff; + dF_r = dF_r1 + dF_r2 + dF_r3; + num_scratch_polys += 1; /* mult_product_indices needs space for a + mult_indices and one intermediate result */ + } else { + dF_r = params->dF_r; + } + ring_mult_tmp_len = num_scratch_polys * pad_deg; + + scratch_buf_len = (ring_mult_tmp_len << 1) + + /* X-byte temp buf for ring mult and + other intermediate results */ + (pad_deg << 2) + /* 2 2N-byte bufs for ring elements + and overflow from temp buffer */ + (dF_r << 2) + /* buffer for F, r indices */ + params->m_len_max; /* buffer for plaintext */ + + scratch_buf = MALLOC(scratch_buf_len); + if (!scratch_buf) { + NTRU_RET(NTRU_OUT_OF_MEMORY); + } + + ringel_buf1 = scratch_buf + ring_mult_tmp_len; + ringel_buf2 = ringel_buf1 + pad_deg; + i_buf = ringel_buf2 + pad_deg; + m_buf = (uint8_t *) (i_buf + (dF_r << 1)); + tmp_buf = (uint8_t *) scratch_buf; + Mtrin_buf = (uint8_t *) ringel_buf1; + M_buf = Mtrin_buf + params->N; + + /* set hash algorithm and seed length based on security strength */ + + if (params->hash_algid == NTRU_CRYPTO_HASH_ALGID_SHA1) { + hash_algid = NTRU_CRYPTO_HASH_ALGID_SHA1; + md_len = SHA_1_MD_LEN; + } else if (params->hash_algid == NTRU_CRYPTO_HASH_ALGID_SHA256) { + hash_algid = NTRU_CRYPTO_HASH_ALGID_SHA256; + md_len = SHA_256_MD_LEN; + } else { + FREE(scratch_buf); + NTRU_RET(NTRU_UNSUPPORTED_PARAM_SET); + } + + /* set constants */ + + mod_q_mask = params->q - 1; + q_mod_p = params->q % 3; + + /* unpack the ciphertext */ + + ntru_octets_2_elements(ct_len, ct, params->q_bits, ringel_buf2); + + /* unpack the private key */ + + if (privkey_pack_type == NTRU_ENCRYPT_KEY_PACKED_TRITS) { + ntru_packed_trits_2_indices(privkey_packed, params->N, i_buf, + i_buf + dF_r); + + } else if (privkey_pack_type == NTRU_ENCRYPT_KEY_PACKED_INDICES) { + ntru_octets_2_elements( + (((uint16_t) dF_r << 1) * params->N_bits + 7) >> 3, + privkey_packed, params->N_bits, i_buf); + } else { + /* Unreachable due to supported parameter set check above */ + } + + /* form cm': + * F * e + * A = e * (1 + pF) mod q = e + pFe mod q + * a = A in the range [-q/2, q/2) + * cm' = a mod p + * + * first compute F*e w/o reduction mod q and store in ringel_buf1 + */ + if (params->is_product_form) { + ntru_ring_mult_product_indices(ringel_buf2, (uint16_t) dF_r1, + (uint16_t) dF_r2, (uint16_t) dF_r3, + i_buf, params->N, params->q, + scratch_buf, ringel_buf1); + } else { + ntru_ring_mult_indices(ringel_buf2, (uint16_t) dF_r, (uint16_t) dF_r, + i_buf, params->N, params->q, + scratch_buf, ringel_buf1); + } + + /* then let ringel_buf1 = e + 3*ringel_buf1 (mod q) = e + pFe mod q + * lift ringel_buf1 elements to integers in the range [-q/2, q/2) + * let Mtrin_buf = ringel_buf1 (mod 3) = cm' + */ + for (i = 0; i < params->N; i++) { + ringel_buf1[i] = (ringel_buf2[i] + 3 * ringel_buf1[i]) & mod_q_mask; + + if (ringel_buf1[i] >= (params->q >> 1)) { + ringel_buf1[i] = ringel_buf1[i] - q_mod_p; + } + + Mtrin_buf[i] = (uint8_t)(ringel_buf1[i] % 3); + } + + /* check that the candidate message representative meets minimum weight + * requirements + */ + if (!ntru_poly_check_min_weight(params->N, + Mtrin_buf, params->min_msg_rep_wt)) { + decryption_ok = FALSE; + } + + /* form cR = e - cm' mod q */ + + for (i = 0; i < params->N; i++) { + if (Mtrin_buf[i] == 1) { + ringel_buf2[i] = (ringel_buf2[i] - 1) & mod_q_mask; + } else if (Mtrin_buf[i] == 2) { + ringel_buf2[i] = (ringel_buf2[i] + 1) & mod_q_mask; + } else { + ; + } + } + + /* form cR mod 4 */ + + ntru_coeffs_mod4_2_octets(params->N, ringel_buf2, tmp_buf); + + /* form mask */ + + result = ntru_mgftp1(hash_algid, md_len, + params->min_MGF_hash_calls, + (params->N + 3) / 4, tmp_buf, + tmp_buf + params->N, params->N, tmp_buf); + + if (result == NTRU_OK) { + /* form cMtrin by subtracting mask from cm', mod p */ + + for (i = 0; i < params->N; i++) { + Mtrin_buf[i] = Mtrin_buf[i] - tmp_buf[i]; + + if (Mtrin_buf[i] >= 3) { + Mtrin_buf[i] += 3; + } + } + + /* convert cMtrin to cM (Mtrin to Mbin) */ + + if (!ntru_trits_2_bits(Mtrin_buf, params->N, M_buf)) { + decryption_ok = FALSE; + } + + /* validate the padded message cM and copy cm to m_buf */ + + ptr = M_buf + params->b_len; + + if (params->m_len_len == 2) { + cm_len = (uint16_t)(*ptr++) << 8; + } + + cm_len |= (uint16_t)(*ptr++); + + if (cm_len > params->m_len_max) { + cm_len = params->m_len_max; + decryption_ok = FALSE; + } + + memcpy(m_buf, ptr, cm_len); + ptr += cm_len; + num_zeros = params->m_len_max - cm_len + 1; + + for (i = 0; i < num_zeros; i++) { + if (ptr[i] != 0) { + decryption_ok = FALSE; + } + } + + /* form sData (OID || m || b || hTrunc) */ + + ptr = tmp_buf; + memcpy(ptr, params->OID, 3); + ptr += 3; + memcpy(ptr, m_buf, cm_len); + ptr += cm_len; + memcpy(ptr, M_buf, params->b_len); + ptr += params->b_len; + memcpy(ptr, pubkey_packed, params->sec_strength_len); + ptr += params->sec_strength_len; + + /* generate cr */ + + result = ntru_gen_poly(hash_algid, md_len, + params->min_IGF_hash_calls, + (uint16_t)(ptr - tmp_buf), + tmp_buf, tmp_buf, + params->N, params->c_bits, + params->no_bias_limit, + params->is_product_form, + params->dF_r << 1, i_buf); + } + + if (result == NTRU_OK) { + /* unpack the public key */ + + { + uint16_t pubkey_packed_len; + pubkey_packed_len = (params->N * params->q_bits + 7) >> 3; + ntru_octets_2_elements(pubkey_packed_len, pubkey_packed, + params->q_bits, ringel_buf1); + } + + /* form cR' = h * cr */ + + if (params->is_product_form) { + ntru_ring_mult_product_indices(ringel_buf1, (uint16_t) dF_r1, + (uint16_t) dF_r2, (uint16_t) dF_r3, + i_buf, params->N, params->q, + scratch_buf, ringel_buf1); + } else { + ntru_ring_mult_indices(ringel_buf1, (uint16_t) dF_r, (uint16_t) dF_r, + i_buf, params->N, params->q, + scratch_buf, ringel_buf1); + } + + /* compare cR' to cR */ + + for (i = 0; i < params->N; i++) { + if (ringel_buf1[i] != ringel_buf2[i]) { + decryption_ok = FALSE; + } + } + + /* output plaintext and plaintext length */ + + if (decryption_ok) { + if (*pt_len < cm_len) { + memset(scratch_buf, 0, scratch_buf_len); + FREE(scratch_buf); + NTRU_RET(NTRU_BUFFER_TOO_SMALL); + } + + memcpy(pt, m_buf, cm_len); + *pt_len = cm_len; + } + } + + /* cleanup */ + + memset(scratch_buf, 0, scratch_buf_len); + FREE(scratch_buf); + + if (!decryption_ok) { + NTRU_RET(NTRU_FAIL); + } + + return result; +} + +/* ntru_crypto_ntru_encrypt_keygen + * + * Implements key generation for NTRUEncrypt for the parameter set specified. + * + * Before invoking this function, a DRBG must be instantiated using + * ntru_crypto_drbg_instantiate() to obtain a DRBG handle, and in that + * instantiation the requested security strength must be at least as large + * as the security strength of the NTRU parameter set being used. + * Failure to instantiate the DRBG with the proper security strength will + * result in this function returning DRBG_ERROR_BASE + DRBG_BAD_LENGTH. + * + * The required minimum size of the output public-key buffer (pubkey_blob) + * may be queried by invoking this function with pubkey_blob = NULL. + * In this case, no key generation is performed, NTRU_OK is returned, and + * the required minimum size for pubkey_blob is returned in pubkey_blob_len. + * + * The required minimum size of the output private-key buffer (privkey_blob) + * may be queried by invoking this function with privkey_blob = NULL. + * In this case, no key generation is performed, NTRU_OK is returned, and + * the required minimum size for privkey_blob is returned in privkey_blob_len. + * + * The required minimum sizes of both pubkey_blob and privkey_blob may be + * queried as described above, in a single invocation of this function. + * + * When pubkey_blob != NULL and privkey_blob != NULL, at invocation + * *pubkey_blob_len must be the size of the pubkey_blob buffer and + * *privkey_blob_len must be the size of the privkey_blob buffer. + * Upon return, *pubkey_blob_len is the actual size of the public-key blob + * and *privkey_blob_len is the actual size of the private-key blob. + * + * Returns NTRU_OK if successful. + * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer + * (other than pubkey_blob or privkey_blob) is NULL. + * Returns NTRU_ERROR_BASE + NTRU_INVALID_PARAMETER_SET if the parameter-set + * ID is invalid. + * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if a length argument is invalid. + * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if either the pubkey_blob + * buffer or the privkey_blob buffer is too small. + * Returns NTRU_ERROR_BASE + NTRU_NO_MEMORY if memory needed cannot be + * allocated from the heap. + * Returns NTRU_ERROR_BASE + NTRU_FAIL if the polynomial generated for f is + * not invertible in (Z/qZ)[X]/(X^N - 1), which is extremely unlikely. + * Should this occur, this function should simply be invoked again. + */ + +uint32_t +ntru_crypto_ntru_encrypt_keygen( + DRBG_HANDLE drbg_handle, /* in - handle of DRBG */ + NTRU_ENCRYPT_PARAM_SET_ID param_set_id, /* in - parameter set ID */ + uint16_t *pubkey_blob_len, /* in/out - no. of octets in + pubkey_blob, addr + for no. of octets + in pubkey_blob */ + uint8_t *pubkey_blob, /* out - address for + public key blob */ + uint16_t *privkey_blob_len, /* in/out - no. of octets in + privkey_blob, addr + for no. of octets + in privkey_blob */ + uint8_t *privkey_blob) /* out - address for + private key blob */ +{ + NTRU_ENCRYPT_PARAM_SET *params = NULL; + uint16_t public_key_blob_len; + uint16_t private_key_blob_len; + uint8_t pubkey_pack_type; + uint8_t privkey_pack_type; + size_t scratch_buf_len; + uint32_t dF; + uint32_t dF1 = 0; + uint32_t dF2 = 0; + uint32_t dF3 = 0; + uint16_t pad_deg; + uint16_t total_polys; + uint16_t num_scratch_polys; + uint16_t *scratch_buf = NULL; + uint16_t *ringel_buf1 = NULL; + uint16_t *ringel_buf2 = NULL; + uint16_t *F_buf = NULL; + uint8_t *tmp_buf = NULL; + uint16_t mod_q_mask; + NTRU_CRYPTO_HASH_ALGID hash_algid; + uint8_t md_len; + uint16_t seed_len; + uint32_t result = NTRU_OK; + + /* get a pointer to the parameter-set parameters */ + + if ((params = ntru_encrypt_get_params_with_id(param_set_id)) == NULL) { + NTRU_RET(NTRU_INVALID_PARAMETER_SET); + } + + /* check for bad parameters */ + + if (!pubkey_blob_len || !privkey_blob_len) { + NTRU_RET(NTRU_BAD_PARAMETER); + } + + /* get public and private key packing types and blob lengths */ + + ntru_crypto_ntru_encrypt_key_get_blob_params(params, &pubkey_pack_type, + &public_key_blob_len, + &privkey_pack_type, + &private_key_blob_len); + + /* return the pubkey_blob size and/or privkey_blob size if requested */ + + if (!pubkey_blob || !privkey_blob) { + if (!pubkey_blob) { + *pubkey_blob_len = public_key_blob_len; + } + + if (!privkey_blob) { + *privkey_blob_len = private_key_blob_len; + } + + NTRU_RET(NTRU_OK); + } + + /* check size of output buffers */ + + if ((*pubkey_blob_len < public_key_blob_len) || + (*privkey_blob_len < private_key_blob_len)) { + NTRU_RET(NTRU_BUFFER_TOO_SMALL); + } + + /* Allocate memory for all operations. We need: + * - 2 polynomials for results: ringel_buf1 and ringel_buf2. + * - scratch space for ntru_ring_mult_coefficients (which is + * implementation dependent) plus one additional polynomial + * of the same size for ntru_ring_lift_inv_pow2_x. + * - 2*dF coefficients for F + */ + ntru_ring_mult_coefficients_memreq(params->N, &num_scratch_polys, &pad_deg); + num_scratch_polys += 1; /* ntru_ring_lift_... */ + + total_polys = num_scratch_polys; + if (params->is_product_form) { + dF1 = params->dF_r & 0xff; + dF2 = (params->dF_r >> 8) & 0xff; + dF3 = (params->dF_r >> 16) & 0xff; + dF = dF1 + dF2 + dF3; + /* For product form keys we can overlap ringel_buf1 + * and the scratch space since mult. by f uses F_buf. + * so only add room for ringel_buf2 */ + num_scratch_polys -= 1; + total_polys += 1; + } else { + dF = params->dF_r; + total_polys += 2; /* ringel_buf{1,2} */ + } + + scratch_buf_len = ((size_t)(total_polys * pad_deg)) * sizeof(uint16_t); + scratch_buf_len += 2 * dF * sizeof(uint16_t); + scratch_buf = MALLOC(scratch_buf_len); + if (!scratch_buf) { + NTRU_RET(NTRU_OUT_OF_MEMORY); + } + memset(scratch_buf, 0, scratch_buf_len); + + ringel_buf1 = scratch_buf + num_scratch_polys * pad_deg; + ringel_buf2 = ringel_buf1 + pad_deg; + F_buf = ringel_buf2 + pad_deg; + tmp_buf = (uint8_t *) scratch_buf; + + /* set hash algorithm and seed length based on security strength */ + + if (params->hash_algid == NTRU_CRYPTO_HASH_ALGID_SHA1) { + hash_algid = NTRU_CRYPTO_HASH_ALGID_SHA1; + md_len = SHA_1_MD_LEN; + } else if (params->hash_algid == NTRU_CRYPTO_HASH_ALGID_SHA256) { + hash_algid = NTRU_CRYPTO_HASH_ALGID_SHA256; + md_len = SHA_256_MD_LEN; + } else { + FREE(scratch_buf); + NTRU_RET(NTRU_UNSUPPORTED_PARAM_SET); + } + + seed_len = 2 * params->sec_strength_len; + + /* set constants */ + + mod_q_mask = params->q - 1; + + /* get random bytes for seed for generating trinary F + * as a list of indices + */ + + result = ntru_crypto_drbg_generate(drbg_handle, + params->sec_strength_len << 3, + seed_len, tmp_buf); + + if (result == NTRU_OK) { + + /* generate F */ + + result = ntru_gen_poly(hash_algid, md_len, + params->min_IGF_hash_calls, + seed_len, tmp_buf, tmp_buf, + params->N, params->c_bits, + params->no_bias_limit, + params->is_product_form, + params->dF_r << 1, F_buf); + } + + if (result == NTRU_OK) { + uint32_t i; + + memset(ringel_buf1, 0, params->N * sizeof(uint16_t)); + + /* form F as a ring element */ + + if (params->is_product_form) { + uint32_t dF3_offset = (dF1 + dF2) << 1; + + /* form F1 as a ring element */ + + for (i = 0; i < dF1; i++) { + ringel_buf1[F_buf[i]] = 1; + } + + for (; i < (dF1 << 1); i++) { + ringel_buf1[F_buf[i]] = mod_q_mask; + } + + /* form F1 * F2 */ + + ntru_ring_mult_indices(ringel_buf1, (uint16_t) dF2, (uint16_t) dF2, + F_buf + (dF1 << 1), params->N, params->q, + scratch_buf, ringel_buf1); + + /* form (F1 * F2) + F3 */ + + for (i = 0; i < dF3; i++) { + uint16_t index = F_buf[dF3_offset + i]; + ringel_buf1[index] = (ringel_buf1[index] + 1) & mod_q_mask; + } + + for (; i < (dF3 << 1); i++) { + uint16_t index = F_buf[dF3_offset + i]; + ringel_buf1[index] = (ringel_buf1[index] - 1) & mod_q_mask; + } + + } else { + /* form F as a ring element */ + + for (i = 0; i < dF; i++) { + ringel_buf1[F_buf[i]] = 1; + } + + for (; i < (dF << 1); i++) { + ringel_buf1[F_buf[i]] = mod_q_mask; + } + } + + /* form f = 1 + pF */ + + for (i = 0; i < params->N; i++) { + ringel_buf1[i] = (ringel_buf1[i] * 3) & mod_q_mask; + } + + ringel_buf1[0] = (ringel_buf1[0] + 1) & mod_q_mask; + + /* find f^-1 in (Z/2Z)[X]/(X^N - 1) */ + + if (!ntru_ring_inv(ringel_buf1, params->N, scratch_buf, ringel_buf2)) { + result = NTRU_RESULT(NTRU_FAIL); + } + } + + if (result == NTRU_OK) { + /* lift f^-1 in (Z/2Z)[X]/(X^N - 1) to f^-1 in (Z/qZ)[X]/(X^N -1) */ + if (params->is_product_form) { + result = ntru_ring_lift_inv_pow2_product(ringel_buf2, + (uint16_t) dF1, (uint16_t) dF2, (uint16_t) dF3, + F_buf, params->N, params->q, scratch_buf); + } else { + result = ntru_ring_lift_inv_pow2_standard(ringel_buf2, + ringel_buf1, params->N, params->q, scratch_buf); + } + } + + if (result == NTRU_OK) { + + /* get random bytes for seed for generating trinary g + * as a list of indices + */ + result = ntru_crypto_drbg_generate(drbg_handle, + params->sec_strength_len << 3, + seed_len, tmp_buf); + } + + if (result == NTRU_OK) { + uint16_t min_IGF_hash_calls = + ((((params->dg << 2) + 2) * params->N_bits) + (md_len << 3) - 1) / + (md_len << 3); + + /* generate g */ + + result = ntru_gen_poly(hash_algid, md_len, + (uint8_t) min_IGF_hash_calls, + seed_len, tmp_buf, tmp_buf, + params->N, params->c_bits, + params->no_bias_limit, FALSE, + (params->dg << 1) + 1, ringel_buf1); + } + + if (result == NTRU_OK) { + uint16_t i; + + /* compute h = p * (f^-1 * g) mod q */ + + ntru_ring_mult_indices(ringel_buf2, params->dg + 1, params->dg, + ringel_buf1, params->N, params->q, scratch_buf, + ringel_buf2); + + for (i = 0; i < params->N; i++) { + ringel_buf2[i] = (ringel_buf2[i] * 3) & mod_q_mask; + } + + /* create public key blob */ + + result = ntru_crypto_ntru_encrypt_key_create_pubkey_blob(params, + ringel_buf2, pubkey_pack_type, pubkey_blob); + *pubkey_blob_len = public_key_blob_len; + } + + if (result == NTRU_OK) { + /* create private key blob */ + result = ntru_crypto_ntru_encrypt_key_create_privkey_blob(params, + ringel_buf2, F_buf, privkey_pack_type, tmp_buf, privkey_blob); + *privkey_blob_len = private_key_blob_len; + } + + /* cleanup */ + + memset(scratch_buf, 0, scratch_buf_len); + FREE(scratch_buf); + + return result; +} + +/* DER-encoding prefix template for NTRU public keys, + * with parameter-set-specific fields nomalized + */ + +static uint8_t const der_prefix_template[] = { + 0x30, 0x82, + 0x00, 0x25, /* add pubkey length 2 */ + 0x30, 0x1a, 0x06, 0x0b, 0x2b, 0x06, 0x01, + 0x04, 0x01, 0xc1, 0x16, 0x01, 0x01, 0x01, + 0x01, /* end of NTRU OID compare */ + 0x06, 0x0b, 0x2b, 0x06, 0x01, 0x04, 0x01, + 0xc1, 0x16, 0x01, 0x01, 0x02, + 0x00, /* set param-set DER id 31 */ + 0x03, 0x82, + 0x00, 0x05, /* add pubkey length 34 */ + 0x00, 0x04, 0x82, + 0x00, 0x00, /* add pubkey length 39 */ +}; + +/* add_16_to_8s + * + * adds a 16-bit value to two bytes + */ + +static void +add_16_to_8s( + uint16_t a, + uint8_t *b) { + uint16_t tmp = ((uint16_t) b[0] << 8) + b[1]; + + tmp = tmp + a; + b[0] = (uint8_t)((tmp >> 8) & 0xff); + b[1] = (uint8_t)(tmp & 0xff); + + return; +} + +/* sub_16_from_8s + * + * subtracts a 16-bit value from two bytes + */ + +static void +sub_16_from_8s( + uint16_t a, + uint8_t *b) { + uint16_t tmp = ((uint16_t) b[0] << 8) + b[1]; + + tmp = tmp - a; + b[0] = (uint8_t)((tmp >> 8) & 0xff); + b[1] = (uint8_t)(tmp & 0xff); + + return; +} + +/* ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo + * + * DER-encodes an NTRUEncrypt public-key from a public-key blob into a + * SubjectPublicKeyInfo field for inclusion in an X.509 certificate. + * + * The required minimum size of the output SubjectPublicKeyInfo buffer + * (encoded_subjectPublicKeyInfo) may be queried by invoking this function + * with encoded_subjectPublicKeyInfo = NULL. In this case, no encoding is + * performed, NTRU_OK is returned, and the required minimum size for + * encoded_subjectPublicKeyInfo is returned in encoded_subjectPublicKeyInfo_len. + * + * When encoded_subjectPublicKeyInfo != NULL, at invocation + * *encoded_subjectPublicKeyInfo_len must be the size of the + * encoded_subjectPublicKeyInfo buffer. + * Upon return, it is the actual size of the encoded public key. + * + * Returns NTRU_OK if successful. + * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer + * (other than encoded_subjectPublicKeyInfo) is NULL. + * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if pubkey_blob_len is zero. + * Returns NTRU_ERROR_BASE + NTRU_BAD_PUBLIC_KEY if the public-key blob is + * invalid (unknown format, corrupt, bad length). + * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if the SubjectPublicKeyInfo + * buffer is too small. + */ + +uint32_t +ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo( + uint16_t pubkey_blob_len, /* in - no. of octets in public-key + blob */ + uint8_t const *pubkey_blob, /* in - ptr to public-key blob */ + uint16_t *encoded_subjectPublicKeyInfo_len, + /* in/out - no. of octets in encoded info, + address for no. of octets in + encoded info */ + uint8_t *encoded_subjectPublicKeyInfo) +/* out - address for encoded info */ +{ + NTRU_ENCRYPT_PARAM_SET *params = NULL; + uint8_t const *pubkey_packed = NULL; + uint8_t pubkey_pack_type; + uint16_t packed_pubkey_len; + uint16_t encoded_len; + + /* check for bad parameters */ + + if (!pubkey_blob || !encoded_subjectPublicKeyInfo_len) { + NTRU_RET(NTRU_BAD_PARAMETER); + } + + if (pubkey_blob_len == 0) { + NTRU_RET(NTRU_BAD_LENGTH); + } + + /* get a pointer to the parameter-set parameters, the packing type for + * the public key, and a pointer to the packed public key + */ + + if (!ntru_crypto_ntru_encrypt_key_parse(TRUE /* pubkey */, pubkey_blob_len, + pubkey_blob, &pubkey_pack_type, + NULL, ¶ms, &pubkey_packed, + NULL)) { + NTRU_RET(NTRU_BAD_PUBLIC_KEY); + } + + /* return the encoded_subjectPublicKeyInfo size if requested */ + + packed_pubkey_len = (params->N * params->q_bits + 7) >> 3; + encoded_len = sizeof(der_prefix_template) + packed_pubkey_len; + + if (!encoded_subjectPublicKeyInfo) { + *encoded_subjectPublicKeyInfo_len = encoded_len; + NTRU_RET(NTRU_OK); + } + + /* check the encoded_subjectPublicKeyInfo buffer size */ + + if (*encoded_subjectPublicKeyInfo_len < encoded_len) { + NTRU_RET(NTRU_BUFFER_TOO_SMALL); + } + + /* form the encoded subjectPublicKey */ + + memcpy(encoded_subjectPublicKeyInfo, der_prefix_template, + sizeof(der_prefix_template)); + + add_16_to_8s(packed_pubkey_len, encoded_subjectPublicKeyInfo + 2); + add_16_to_8s(packed_pubkey_len, encoded_subjectPublicKeyInfo + 34); + add_16_to_8s(packed_pubkey_len, encoded_subjectPublicKeyInfo + 39); + encoded_subjectPublicKeyInfo[31] = params->der_id; + + memcpy(encoded_subjectPublicKeyInfo + sizeof(der_prefix_template), + pubkey_packed, packed_pubkey_len); + + *encoded_subjectPublicKeyInfo_len = encoded_len; + + NTRU_RET(NTRU_OK); +} + +/* ntru_crypto_ntru_encrypt_subjectPublicKeyInfo2PublicKey + * + * Decodes a DER-encoded NTRUEncrypt public-key from a + * SubjectPublicKeyInfo field in an X.509 certificate and returns the + * public-key blob itself. + * + * The required minimum size of the output public-key buffer (pubkey_blob) + * may be queried by invoking this function with pubkey_blob = NULL. + * In this case, no decoding is performed, NTRU_OK is returned, and the + * required minimum size for pubkey_blob is returned in pubkey_blob_len. + * + * When pubkey_blob != NULL, at invocation *pubkey_blob_len must be the + * size of the pubkey_blob buffer. + * Upon return, it is the actual size of the public-key blob. + * + * Returns NTRU_OK if successful. + * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if the encoded data buffer + * does not contain a full der prefix and public key. + * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer + * (other than pubkey_blob) is NULL. + * Returns NTRU_ERROR_BASE + NTRU_BAD_ENCODING if the encoded data is + * an invalid encoding of an NTRU public key. + * Returns NTRU_ERROR_BASE + NTRU_OID_NOT_RECOGNIZED if the + * encoded data contains an OID that identifies an object other than + * an NTRU public key. + * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if the pubkey_blob buffer + * is too small. + */ + +uint32_t +ntru_crypto_ntru_encrypt_subjectPublicKeyInfo2PublicKey( + uint8_t const *encoded_data, /* in - ptr to subjectPublicKeyInfo + in the encoded data */ + uint16_t *pubkey_blob_len, /* in/out - no. of octets in pubkey blob, + address for no. of octets in + pubkey blob */ + uint8_t *pubkey_blob, /* out - address for pubkey blob */ + uint8_t **next, /* out - address for ptr to encoded + data following the + subjectPublicKeyInfo */ + uint32_t *remaining_data_len) /* in/out - number of bytes remaining in + buffer *next */ +{ + NTRU_ENCRYPT_PARAM_SET *params = NULL; + uint8_t prefix_buf[41]; + bool der_id_valid; + uint16_t packed_pubkey_len = 0; + uint8_t pubkey_pack_type; + uint16_t public_key_blob_len; + uint8_t *data_ptr; + uint32_t data_len; + + /* check for bad parameters */ + + if (!encoded_data || !pubkey_blob_len || !next || !remaining_data_len) { + NTRU_RET(NTRU_BAD_PARAMETER); + } + + data_len = *remaining_data_len; + if (data_len < sizeof(prefix_buf)) { + NTRU_RET(NTRU_BAD_LENGTH); + } + + /* determine if data to be decoded is a valid encoding of an NTRU + * public key + */ + + data_ptr = (uint8_t *) encoded_data; + memcpy(prefix_buf, data_ptr, sizeof(prefix_buf)); + + /* get a pointer to the parameter-set parameters */ + + if ((params = ntru_encrypt_get_params_with_DER_id(data_ptr[31])) == NULL) { + der_id_valid = FALSE; + + /* normalize the prefix-buffer data used in an NTRU OID comparison */ + + prefix_buf[2] = der_prefix_template[2]; + prefix_buf[3] = der_prefix_template[3]; + + } else { + der_id_valid = TRUE; + + /* normalize the prefix-buffer data for the specific parameter set */ + + packed_pubkey_len = (params->N * params->q_bits + 7) >> 3; + sub_16_from_8s(packed_pubkey_len, prefix_buf + 2); + sub_16_from_8s(packed_pubkey_len, prefix_buf + 34); + sub_16_from_8s(packed_pubkey_len, prefix_buf + 39); + prefix_buf[31] = 0; + /*prefix_buf[40] = 0; */ + } + + /* validate the DER prefix encoding */ + + if (!der_id_valid || memcmp(prefix_buf, der_prefix_template, + sizeof(der_prefix_template))) { + + /* bad DER prefix, so determine if this is a bad NTRU encoding or an + * unknown OID by comparing the first 18 octets + */ + + if (memcmp(prefix_buf, der_prefix_template, 18) == 0) { + NTRU_RET(NTRU_OID_NOT_RECOGNIZED); + } else { + NTRU_RET(NTRU_BAD_ENCODING); + } + } + + /* done with prefix */ + + data_ptr += sizeof(prefix_buf); + data_len -= sizeof(prefix_buf); + + /* get public key packing type and blob length */ + + ntru_crypto_ntru_encrypt_key_get_blob_params(params, &pubkey_pack_type, + &public_key_blob_len, NULL, + NULL); + + /* return the pubkey_blob size if requested */ + + if (!pubkey_blob) { + *pubkey_blob_len = public_key_blob_len; + NTRU_RET(NTRU_OK); + } + + /* check size of output buffer */ + + if (*pubkey_blob_len < public_key_blob_len) { + NTRU_RET(NTRU_BUFFER_TOO_SMALL); + } + + /* check that blob contains additional data of length packed_pubkey_len */ + if (data_len < packed_pubkey_len) { + NTRU_RET(NTRU_BAD_LENGTH); + } + + /* check that the public key pack type is supported */ + if (pubkey_pack_type != NTRU_ENCRYPT_KEY_PACKED_COEFFICIENTS) { + NTRU_RET(NTRU_BAD_PUBLIC_KEY); + } + + /* create the public-key blob */ + ntru_crypto_ntru_encrypt_key_recreate_pubkey_blob(params, packed_pubkey_len, + data_ptr, pubkey_pack_type, pubkey_blob); + *pubkey_blob_len = public_key_blob_len; + + data_ptr += packed_pubkey_len; + data_len -= packed_pubkey_len; + + /* check whether the buffer is empty and update *next */ + if (data_len > 0) { + *next = data_ptr; + *remaining_data_len = data_len; + } else { + *next = NULL; + *remaining_data_len = 0; + } + + NTRU_RET(NTRU_OK); +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_key.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_key.c new file mode 100644 index 0000000000000000000000000000000000000000..5e515c006913f86636698ff6f2c41a42946aac9b --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_key.c @@ -0,0 +1,392 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_ntru_encrypt_key.c + * + * Contents: Routines for exporting and importing public and private keys + * for NTRUEncrypt. + * + *****************************************************************************/ + +#include "ntru_crypto.h" +#include "ntru_crypto_ntru_encrypt_key.h" + +/* ntru_crypto_ntru_encrypt_key_parse + * + * Parses an NTRUEncrypt key blob. + * If the blob is not corrupt, returns packing types for public and private + * keys, a pointer to the parameter set, a pointer to the public key, and + * a pointer to the private key if it exists. + * + * Returns TRUE if successful. + * Returns FALSE if the blob is invalid. + */ + +bool ntru_crypto_ntru_encrypt_key_parse( + bool pubkey_parse, /* in - if parsing pubkey + blob */ + uint16_t key_blob_len, /* in - no. octets in key + blob */ + uint8_t const *key_blob, /* in - pointer to key blob */ + uint8_t *pubkey_pack_type, /* out - addr for pubkey + packing type */ + uint8_t *privkey_pack_type, /* out - addr for privkey + packing type */ + NTRU_ENCRYPT_PARAM_SET **params, /* out - addr for ptr to + parameter set */ + uint8_t const **pubkey, /* out - addr for ptr to + packed pubkey */ + uint8_t const **privkey) /* out - addr for ptr to + packed privkey */ +{ + uint8_t tag; + + /* parse key blob based on tag */ + + tag = key_blob[0]; + switch (tag) { + case NTRU_ENCRYPT_PUBKEY_TAG: + + if (!pubkey_parse) { + return FALSE; + } + + break; + + case NTRU_ENCRYPT_PRIVKEY_DEFAULT_TAG: + case NTRU_ENCRYPT_PRIVKEY_TRITS_TAG: + case NTRU_ENCRYPT_PRIVKEY_INDICES_TAG: + + if (pubkey_parse) { + return FALSE; + } + break; + + default: + return FALSE; + break; + } + + switch (tag) { + case NTRU_ENCRYPT_PUBKEY_TAG: + case NTRU_ENCRYPT_PRIVKEY_DEFAULT_TAG: + case NTRU_ENCRYPT_PRIVKEY_TRITS_TAG: + case NTRU_ENCRYPT_PRIVKEY_INDICES_TAG: + + /* Version 0: + * byte 0: tag + * byte 1: no. of octets in OID + * bytes 2-4: OID + * bytes 5- : packed pubkey + * [packed privkey] + */ + + { + NTRU_ENCRYPT_PARAM_SET *p = NULL; + uint16_t pubkey_packed_len; + + /* check OID length and minimum blob length for tag and OID */ + + if ((key_blob_len < 5) || (key_blob[1] != 3)) { + return FALSE; + } + + /* get a pointer to the parameter set corresponding to the OID */ + + if ((p = ntru_encrypt_get_params_with_OID(key_blob + 2)) == NULL) { + return FALSE; + } + + /* check blob length and assign pointers to blob fields */ + + pubkey_packed_len = (p->N * p->q_bits + 7) / 8; + + if (pubkey_parse) /* public-key parsing */ + { + if (key_blob_len != 5 + pubkey_packed_len) { + return FALSE; + } + + *pubkey = key_blob + 5; + + } else /* private-key parsing */ + { + uint16_t privkey_packed_len; + uint16_t privkey_packed_trits_len = (p->N + 4) / 5; + uint16_t privkey_packed_indices_len; + uint16_t dF; + + /* check packing type for product-form private keys */ + + if (p->is_product_form && + (tag == NTRU_ENCRYPT_PRIVKEY_TRITS_TAG)) { + return FALSE; + } + + /* set packed-key length for packed indices */ + + if (p->is_product_form) { + dF = (uint16_t)((p->dF_r & 0xff) + /* df1 */ + ((p->dF_r >> 8) & 0xff) + /* df2 */ + ((p->dF_r >> 16) & 0xff)); /* df3 */ + } else { + dF = (uint16_t) p->dF_r; + } + + privkey_packed_indices_len = ((dF << 1) * p->N_bits + 7) >> 3; + + /* set private-key packing type if defaulted */ + + if (tag == NTRU_ENCRYPT_PRIVKEY_DEFAULT_TAG) { + if (p->is_product_form || + (privkey_packed_indices_len <= + privkey_packed_trits_len)) { + tag = NTRU_ENCRYPT_PRIVKEY_INDICES_TAG; + } else { + tag = NTRU_ENCRYPT_PRIVKEY_TRITS_TAG; + } + } + + if (tag == NTRU_ENCRYPT_PRIVKEY_TRITS_TAG) { + privkey_packed_len = privkey_packed_trits_len; + } else { + privkey_packed_len = privkey_packed_indices_len; + } + + if (key_blob_len != 5 + pubkey_packed_len + privkey_packed_len) { + return FALSE; + } + + *pubkey = key_blob + 5; + *privkey = *pubkey + pubkey_packed_len; + *privkey_pack_type = (tag == NTRU_ENCRYPT_PRIVKEY_TRITS_TAG) ? NTRU_ENCRYPT_KEY_PACKED_TRITS : NTRU_ENCRYPT_KEY_PACKED_INDICES; + } + + /* return parameter set pointer */ + + *pubkey_pack_type = NTRU_ENCRYPT_KEY_PACKED_COEFFICIENTS; + *params = p; + } + + default: + break; /* can't get here */ + } + + return TRUE; +} + +/* ntru_crypto_ntru_encrypt_key_get_blob_params + * + * Returns public and private key packing types and blob lengths given + * a packing format. For now, only a default packing format exists. + * + * Only public-key params may be returned by setting privkey_pack_type + * and privkey_blob_len to NULL. + */ + +void ntru_crypto_ntru_encrypt_key_get_blob_params( + NTRU_ENCRYPT_PARAM_SET const *params, /* in - pointer to + param set + parameters */ + uint8_t *pubkey_pack_type, /* out - addr for pubkey + packing type */ + uint16_t *pubkey_blob_len, /* out - addr for no. of + bytes in + pubkey blob */ + uint8_t *privkey_pack_type, /* out - addr for privkey + packing type */ + uint16_t *privkey_blob_len) /* out - addr for no. of + bytes in + privkey blob */ +{ + uint16_t pubkey_packed_len = (params->N * params->q_bits + 7) >> 3; + + *pubkey_pack_type = NTRU_ENCRYPT_KEY_PACKED_COEFFICIENTS; + *pubkey_blob_len = 5 + pubkey_packed_len; + + if (privkey_pack_type && privkey_blob_len) { + uint16_t privkey_packed_trits_len = (params->N + 4) / 5; + uint16_t privkey_packed_indices_len; + uint16_t dF; + + if (params->is_product_form) { + dF = (uint16_t)((params->dF_r & 0xff) + /* df1 */ + ((params->dF_r >> 8) & 0xff) + /* df2 */ + ((params->dF_r >> 16) & 0xff)); /* df3 */ + } else { + dF = (uint16_t) params->dF_r; + } + + privkey_packed_indices_len = ((dF << 1) * params->N_bits + 7) >> 3; + + if (params->is_product_form || + (privkey_packed_indices_len <= privkey_packed_trits_len)) { + *privkey_pack_type = NTRU_ENCRYPT_KEY_PACKED_INDICES; + *privkey_blob_len = + 5 + pubkey_packed_len + privkey_packed_indices_len; + } else { + *privkey_pack_type = NTRU_ENCRYPT_KEY_PACKED_TRITS; + *privkey_blob_len = + 5 + pubkey_packed_len + privkey_packed_trits_len; + } + } + + return; +} + +/* ntru_crypto_ntru_encrypt_key_create_pubkey_blob + * + * Returns a public key blob, packed according to the packing type provided. + */ + +uint32_t +ntru_crypto_ntru_encrypt_key_create_pubkey_blob( + NTRU_ENCRYPT_PARAM_SET const *params, /* in - pointer to + param set + parameters */ + uint16_t const *pubkey, /* in - pointer to the + coefficients + of the pubkey */ + uint8_t pubkey_pack_type, /* out - pubkey packing + type */ + uint8_t *pubkey_blob) /* out - addr for the + pubkey blob */ +{ + + switch (pubkey_pack_type) { + case NTRU_ENCRYPT_KEY_PACKED_COEFFICIENTS: + *pubkey_blob++ = NTRU_ENCRYPT_PUBKEY_TAG; + *pubkey_blob++ = (uint8_t) sizeof(params->OID); + memcpy(pubkey_blob, params->OID, sizeof(params->OID)); + pubkey_blob += sizeof(params->OID); + ntru_elements_2_octets(params->N, pubkey, params->q_bits, + pubkey_blob); + break; + + default: + NTRU_RET(NTRU_BAD_PARAMETER); + } + + NTRU_RET(NTRU_OK); +} + +/* ntru_crypto_ntru_encrypt_key_recreate_pubkey_blob + * + * Returns a public key blob, recreated from an already-packed public key. + */ + +uint32_t +ntru_crypto_ntru_encrypt_key_recreate_pubkey_blob( + NTRU_ENCRYPT_PARAM_SET const *params, /* in - pointer to + param set + parameters */ + uint16_t packed_pubkey_len, /* in - no. octets in + packed pubkey */ + uint8_t const *packed_pubkey, /* in - pointer to the + packed pubkey */ + uint8_t pubkey_pack_type, /* out - pubkey packing + type */ + uint8_t *pubkey_blob) /* out - addr for the + pubkey blob */ +{ + + switch (pubkey_pack_type) { + case NTRU_ENCRYPT_KEY_PACKED_COEFFICIENTS: + *pubkey_blob++ = NTRU_ENCRYPT_PUBKEY_TAG; + *pubkey_blob++ = (uint8_t) sizeof(params->OID); + memcpy(pubkey_blob, params->OID, sizeof(params->OID)); + pubkey_blob += sizeof(params->OID); + memcpy(pubkey_blob, packed_pubkey, packed_pubkey_len); + break; + + default: + NTRU_RET(NTRU_BAD_PARAMETER); + } + + NTRU_RET(NTRU_OK); +} + +/* ntru_crypto_ntru_encrypt_key_create_privkey_blob + * + * Returns a private key blob, packed according to the packing type provided. + */ + +uint32_t +ntru_crypto_ntru_encrypt_key_create_privkey_blob( + NTRU_ENCRYPT_PARAM_SET const *params, /* in - pointer to + param set + parameters */ + uint16_t const *pubkey, /* in - pointer to the + coefficients + of the pubkey */ + uint16_t const *privkey, /* in - pointer to the + indices of the + privkey */ + uint8_t privkey_pack_type, /* in - privkey packing + type */ + uint8_t *buf, /* in - temp, N bytes */ + uint8_t *privkey_blob) /* out - addr for the + privkey blob */ +{ + switch (privkey_pack_type) { + case NTRU_ENCRYPT_KEY_PACKED_TRITS: + case NTRU_ENCRYPT_KEY_PACKED_INDICES: + + /* format header and packed public key */ + + *privkey_blob++ = NTRU_ENCRYPT_PRIVKEY_DEFAULT_TAG; + *privkey_blob++ = (uint8_t) sizeof(params->OID); + memcpy(privkey_blob, params->OID, sizeof(params->OID)); + privkey_blob += sizeof(params->OID); + ntru_elements_2_octets(params->N, pubkey, params->q_bits, + privkey_blob); + privkey_blob += (params->N * params->q_bits + 7) >> 3; + + /* add packed private key */ + + if (privkey_pack_type == NTRU_ENCRYPT_KEY_PACKED_TRITS) { + ntru_indices_2_packed_trits(privkey, (uint16_t) params->dF_r, + (uint16_t) params->dF_r, + params->N, buf, privkey_blob); + } else { + uint32_t dF; + + if (params->is_product_form) { + dF = (params->dF_r & 0xff) + + ((params->dF_r >> 8) & 0xff) + + ((params->dF_r >> 16) & 0xff); + } else { + dF = params->dF_r; + } + + ntru_elements_2_octets((uint16_t) dF << 1, privkey, + params->N_bits, privkey_blob); + } + break; + + default: + NTRU_RET(NTRU_BAD_PARAMETER); + } + + NTRU_RET(NTRU_OK); +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_key.h b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_key.h new file mode 100644 index 0000000000000000000000000000000000000000..d2e3b03e3f942f249f91561ae4e1194f625b64a5 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_key.h @@ -0,0 +1,156 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +#ifndef NTRU_CRYPTO_NTRU_ENCRYPT_KEY_H +#define NTRU_CRYPTO_NTRU_ENCRYPT_KEY_H + +#include "ntru_crypto_ntru_convert.h" +#include "ntru_crypto_ntru_encrypt_param_sets.h" + +/* key-blob definitions */ + +#define NTRU_ENCRYPT_PUBKEY_TAG 0x01 +#define NTRU_ENCRYPT_PRIVKEY_DEFAULT_TAG 0x02 +#define NTRU_ENCRYPT_PRIVKEY_TRITS_TAG 0xfe +#define NTRU_ENCRYPT_PRIVKEY_INDICES_TAG 0xff + +/* packing types */ + +#define NTRU_ENCRYPT_KEY_PACKED_COEFFICIENTS 0x01 +#define NTRU_ENCRYPT_KEY_PACKED_INDICES 0x02 +#define NTRU_ENCRYPT_KEY_PACKED_TRITS 0x03 + +/* function declarations */ + +/* ntru_crypto_ntru_encrypt_key_parse + * + * Parses an NTRUEncrypt key blob. + * If the blob is not corrupt, returns packing types for public and private + * keys, a pointer to the parameter set, a pointer to the public key, and + * a pointer to the private key if it exists. + * + * Returns TRUE if successful. + * Returns FALSE if the blob is invalid. + */ + +extern bool +ntru_crypto_ntru_encrypt_key_parse( + bool pubkey_parse, /* in - if parsing pubkey + blob */ + uint16_t key_blob_len, /* in - no. octets in key + blob */ + uint8_t const *key_blob, /* in - pointer to key blob */ + uint8_t *pubkey_pack_type, /* out - addr for pubkey + packing type */ + uint8_t *privkey_pack_type, /* out - addr for privkey + packing type */ + NTRU_ENCRYPT_PARAM_SET **params, /* out - addr for ptr to + parameter set */ + uint8_t const **pubkey, /* out - addr for ptr to + packed pubkey */ + uint8_t const **privkey); /* out - addr for ptr to + packed privkey */ + +/* ntru_crypto_ntru_encrypt_key_get_blob_params + * + * Returns public and private key packing types and blob lengths given + * a packing format. For now, only a default packing format exists. + * + * Only public-key params may be returned by setting privkey_pack_type + * and privkey_blob_len to NULL. + */ + +extern void +ntru_crypto_ntru_encrypt_key_get_blob_params( + NTRU_ENCRYPT_PARAM_SET const *params, /* in - pointer to + param set + parameters */ + uint8_t *pubkey_pack_type, /* out - addr for pubkey + packing type */ + uint16_t *pubkey_blob_len, /* out - addr for no. of + bytes in + pubkey blob */ + uint8_t *privkey_pack_type, /* out - addr for privkey + packing type */ + uint16_t *privkey_blob_len); /* out - addr for no. of + bytes in + privkey blob */ + +/* ntru_crypto_ntru_encrypt_key_create_pubkey_blob + * + * Returns a public key blob, packed according to the packing type provided. + */ + +extern uint32_t +ntru_crypto_ntru_encrypt_key_create_pubkey_blob( + NTRU_ENCRYPT_PARAM_SET const *params, /* in - pointer to + param set + parameters */ + uint16_t const *pubkey, /* in - pointer to the + coefficients + of the pubkey */ + uint8_t pubkey_pack_type, /* out - addr for pubkey + packing type */ + uint8_t *pubkey_blob); /* out - addr for the + pubkey blob */ + +/* ntru_crypto_ntru_encrypt_key_recreate_pubkey_blob + * + * Returns a public key blob, recreated from an already-packed public key. + */ + +extern uint32_t +ntru_crypto_ntru_encrypt_key_recreate_pubkey_blob( + NTRU_ENCRYPT_PARAM_SET const *params, /* in - pointer to + param set + parameters */ + uint16_t packed_pubkey_len, /* in - no. octets in + packed pubkey */ + uint8_t const *packed_pubkey, /* in - pointer to the + packed pubkey */ + uint8_t pubkey_pack_type, /* out - pubkey packing + type */ + uint8_t *pubkey_blob); /* out - addr for the + pubkey blob */ + +/* ntru_crypto_ntru_encrypt_key_create_privkey_blob + * + * Returns a privlic key blob, packed according to the packing type provided. + */ + +extern uint32_t +ntru_crypto_ntru_encrypt_key_create_privkey_blob( + NTRU_ENCRYPT_PARAM_SET const *params, /* in - pointer to + param set + parameters */ + uint16_t const *pubkey, /* in - pointer to the + coefficients + of the pubkey */ + uint16_t const *privkey, /* in - pointer to the + indices of the + privkey */ + uint8_t privkey_pack_type, /* in - privkey packing + type */ + uint8_t *buf, /* in - temp, N bytes */ + uint8_t *privkey_blob); /* out - addr for the + privkey blob */ + +#endif /* NTRU_CRYPTO_NTRU_ENCRYPT_KEY_H */ diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_param_sets.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_param_sets.c new file mode 100644 index 0000000000000000000000000000000000000000..389e7a21a2c3714c270c54a130f7c4cb133cf901 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_param_sets.c @@ -0,0 +1,577 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_ntru_encrypt_param_sets.c + * + * Contents: Defines the NTRUEncrypt parameter sets. + * + *****************************************************************************/ + +#include "ntru_crypto.h" +#include "ntru_crypto_ntru_encrypt_param_sets.h" + +/* parameter sets */ + +static NTRU_ENCRYPT_PARAM_SET ntruParamSets[] = { + + { + NTRU_EES401EP1, /* parameter-set id */ + "ees401ep1", /* human readable param set name */ + {0x00, 0x02, 0x04}, /* OID */ + 0x22, /* DER id */ + 9, /* no. of bits in N (i.e., in an index) */ + 401, /* N */ + 14, /* security strength in octets */ + 14, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + FALSE, /* product form */ + 113, /* df, dr */ + 133, /* dg */ + 60, /* maxMsgLenBytes */ + 113, /* dm0 */ + 2005, /* 2^c - (2^c mod N) */ + 11, /* c */ + 1, /* lLen */ + 41, /* min. no. of hash calls for IGF-2 */ + 7, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA1, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES449EP1, /* parameter-set id */ + "ees449ep1", /* human readable param set name */ + {0x00, 0x03, 0x03}, /* OID */ + 0x23, /* DER id */ + 9, /* no. of bits in N (i.e., in an index) */ + 449, /* N */ + 16, /* security strength in octets */ + 16, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + FALSE, /* product form */ + 134, /* df, dr */ + 149, /* dg */ + 67, /* maxMsgLenBytes */ + 134, /* dm0 */ + 449, /* 2^c - (2^c mod N) */ + 9, /* c */ + 1, /* lLen */ + 47, /* min. no. of hash calls for IGF-2 */ + 8, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA1, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES677EP1, /* parameter-set id */ + "ees677ep1", /* human readable param set name */ + {0x00, 0x05, 0x03}, /* OID */ + 0x24, /* DER id */ + 10, /* no. of bits in N (i.e., in an index) */ + 677, /* N */ + 24, /* security strength in octets */ + 24, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + FALSE, /* product form */ + 157, /* df, dr */ + 225, /* dg */ + 101, /* maxMsgLenBytes */ + 157, /* dm0 */ + 2031, /* 2^c - (2^c mod N) */ + 11, /* c */ + 1, /* lLen */ + 32, /* min. no. of hash calls for IGF-2 */ + 8, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES1087EP2, /* parameter-set id */ + "ees1087ep2", /* human readable param set name */ + {0x00, 0x06, 0x03}, /* OID */ + 0x25, /* DER id */ + 11, /* no. of bits in N (i.e., in an index) */ + 1087, /* N */ + 32, /* security strength in octets */ + 32, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + FALSE, /* product form */ + 120, /* df, dr */ + 362, /* dg */ + 170, /* maxMsgLenBytes */ + 120, /* dm0 */ + 7609, /* 2^c - (2^c mod N) */ + 13, /* c */ + 1, /* lLen */ + 27, /* min. no. of hash calls for IGF-2 */ + 11, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES541EP1, /* parameter-set id */ + "ees541ep1", /* human readable param set name */ + {0x00, 0x02, 0x05}, /* OID */ + 0x26, /* DER id */ + 10, /* no. of bits in N (i.e., in an index) */ + 541, /* N */ + 14, /* security strength in octets */ + 14, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + FALSE, /* product form */ + 49, /* df, dr */ + 180, /* dg */ + 86, /* maxMsgLenBytes */ + 49, /* dm0 */ + 3787, /* 2^c - (2^c mod N) */ + 12, /* c */ + 1, /* lLen */ + 16, /* min. no. of hash calls for IGF-2 */ + 9, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA1, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES613EP1, /* parameter-set id */ + "ees613ep1", /* human readable param set name */ + {0x00, 0x03, 0x04}, /* OID */ + 0x27, /* DER id */ + 10, /* no. of bits in N (i.e., in an index) */ + 613, /* N */ + 16, /* securuity strength in octets */ + 16, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + FALSE, /* product form */ + 55, /* df, dr */ + 204, /* dg */ + 97, /* maxMsgLenBytes */ + 55, /* dm0 */ + 1839, /* 2^c - (2^c mod N) */ + 11, /* c */ + 1, /* lLen */ + 18, /* min. no. of hash calls for IGF-2 */ + 10, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA1, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES887EP1, /* parameter-set id */ + "ees887ep1", /* human readable param set name */ + {0x00, 0x05, 0x04}, /* OID */ + 0x28, /* DER id */ + 10, /* no. of bits in N (i.e., in an index) */ + 887, /* N */ + 24, /* security strength in octets */ + 24, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + FALSE, /* product form */ + 81, /* df, dr */ + 295, /* dg */ + 141, /* maxMsgLenBytes */ + 81, /* dm0 */ + 887, /* 2^c - (2^c mod N) */ + 10, /* c */ + 1, /* lLen */ + 16, /* min. no. of hash calls for IGF-2 */ + 9, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES1171EP1, /* parameter-set id */ + "ees1171ep1", /* human readable param set name */ + {0x00, 0x06, 0x04}, /* OID */ + 0x29, /* DER id */ + 11, /* no. of bits in N (i.e., in an index) */ + 1171, /* N */ + 32, /* security strength in octets */ + 32, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + FALSE, /* product form */ + 106, /* df, dr */ + 390, /* dg */ + 186, /* maxMsgLenBytes */ + 106, /* dm0 */ + 3513, /* 2^c - (2^c mod N) */ + 12, /* c */ + 1, /* lLen */ + 25, /* min. no. of hash calls for IGF-2 */ + 12, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES659EP1, /* parameter-set id */ + "ees659ep1", /* human readable param set name */ + {0x00, 0x02, 0x06}, /* OID */ + 0x2a, /* DER id */ + 10, /* no. of bits in N (i.e., in an index) */ + 659, /* N */ + 14, /* security strength in octets */ + 14, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + FALSE, /* product form */ + 38, /* df, dr */ + 219, /* dg */ + 108, /* maxMsgLenBytes */ + 38, /* dm0 */ + 1977, /* 2^c - (2^c mod N) */ + 11, /* c */ + 1, /* lLen */ + 11, /* min. no. of hash calls for IGF-2 */ + 10, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA1, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES761EP1, /* parameter-set id */ + "ees761ep1", /* human readable param set name */ + {0x00, 0x03, 0x05}, /* OID */ + 0x2b, /* DER id */ + 10, /* no. of bits in N (i.e., in an index) */ + 761, /* N */ + 16, /* security strength in octets */ + 16, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + FALSE, /* product form */ + 42, /* df, dr */ + 253, /* dg */ + 125, /* maxMsgLenBytes */ + 42, /* dm0 */ + 3805, /* 2^c - (2^c mod N) */ + 12, /* c */ + 1, /* lLen */ + 14, /* min. no. of hash calls for IGF-2 */ + 12, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA1, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES1087EP1, /* parameter-set id */ + "ees1087ep1", /* human readable param set name */ + {0x00, 0x05, 0x05}, /* OID */ + 0x2c, /* DER id */ + 11, /* no. of bits in N (i.e., in an index) */ + 1087, /* N */ + 24, /* security strength in octets */ + 24, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + FALSE, /* product form */ + 63, /* df, dr */ + 362, /* dg */ + 178, /* maxMsgLenBytes */ + 63, /* dm0 */ + 7609, /* 2^c - (2^c mod N) */ + 13, /* c */ + 1, /* lLen */ + 14, /* min. no. of hash calls for IGF-2 */ + 11, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES1499EP1, /* parameter-set id */ + "ees1499ep1", /* human readable param set name */ + {0x00, 0x06, 0x05}, /* OID */ + 0x2d, /* DER id */ + 11, /* no. of bits in N (i.e., in an index) */ + 1499, /* N */ + 32, /* security strength in octets */ + 32, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + FALSE, /* product form */ + 79, /* df, dr */ + 499, /* dg */ + 247, /* maxMsgLenBytes */ + 79, /* dm0 */ + 7495, /* 2^c - (2^c mod N) */ + 13, /* c */ + 1, /* lLen */ + 18, /* min. no. of hash calls for IGF-2 */ + 14, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES401EP2, /* parameter-set id */ + "ees401ep2", /* human readable param set name */ + {0x00, 0x02, 0x10}, /* OID */ + 0x2e, /* DER id */ + 9, /* no. of bits in N (i.e., in an index) */ + 401, /* N */ + 14, /* security strength in octets */ + 14, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + TRUE, /* product form */ + 8 + (8 << 8) + (6 << 16), /* df, dr */ + 133, /* dg */ + 60, /* maxMsgLenBytes */ + 101, /* dm0 */ + 2005, /* 2^c - (2^c mod N) */ + 11, /* c */ + 1, /* lLen */ + 7, /* min. no. of hash calls for IGF-2 */ + 7, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA1, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES439EP1, /* parameter-set id */ + "ees439ep1", /* human readable param set name */ + {0x00, 0x03, 0x10}, /* OID */ + 0x2f, /* DER id */ + 9, /* no. of bits in N (i.e., in an index) */ + 439, /* N */ + 16, /* security strength in octets */ + 16, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + TRUE, /* product form */ + 9 + (8 << 8) + (5 << 16), /* df, dr */ + 146, /* dg */ + 65, /* maxMsgLenBytes */ + 112, /* dm0 */ + 439, /* 2^c - (2^c mod N) */ + 9, /* c */ + 1, /* lLen */ + 8, /* min. no. of hash calls for IGF-2 */ + 8, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA1, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES593EP1, /* parameter-set id */ + "ees593ep1", /* human readable param set name */ + {0x00, 0x05, 0x10}, /* OID */ + 0x30, /* DER id */ + 10, /* no. of bits in N (i.e., in an index) */ + 593, /* N */ + 24, /* security strength in octets */ + 24, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + TRUE, /* product form */ + 10 + (10 << 8) + (8 << 16), /* df, dr */ + 197, /* dg */ + 86, /* maxMsgLenBytes */ + 158, /* dm0 */ + 1779, /* 2^c - (2^c mod N) */ + 11, /* c */ + 1, /* lLen */ + 9, /* min. no. of hash calls for IGF-2 */ + 7, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES743EP1, /* parameter-set id */ + "ees743ep1", /* human readable param set name */ + {0x00, 0x06, 0x10}, /* OID */ + 0x31, /* DER id */ + 10, /* no. of bits in N (i.e., in an index) */ + 743, /* N */ + 32, /* security strength in octets */ + 32, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + TRUE, /* product form */ + 11 + (11 << 8) + (15 << 16), /* df, dr */ + 247, /* dg */ + 106, /* maxMsgLenBytes */ + 204, /* dm0 */ + 8173, /* 2^c - (2^c mod N) */ + 13, /* c */ + 1, /* lLen */ + 9, /* min. no. of hash calls for IGF-2 */ + 9, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES443EP1, /* parameter-set id */ + "ees443ep1", /* human readable param set name */ + {0x00, 0x03, 0x11}, /* OID */ + 0x32, /* DER id */ + 9, /* no. of bits in N (i.e., in an index) */ + 443, /* N */ + 16, /* security strength in octets */ + 32, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + TRUE, /* product form */ + 9 + (8 << 8) + (5 << 16), /* df, dr */ + 148, /* dg */ + 49, /* maxMsgLenBytes */ + 115, /* dm0 */ + 443, /* 2^c - (2^c mod N) */ + 9, /* c */ + 1, /* lLen */ + 5, /* min. no. of hash calls for IGF-2 */ + 5, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, + + { + NTRU_EES587EP1, /* parameter-set id */ + "ees587ep1", /* human readable param set name */ + {0x00, 0x05, 0x11}, /* OID */ + 0x33, /* DER id */ + 10, /* no. of bits in N (i.e., in an index) */ + 587, /* N */ + 24, /* security strength in octets */ + 32, /* no. of octets for random string b */ + 2048, /* q */ + 11, /* no. of bits in q (i.e., in a coeff) */ + TRUE, /* product form */ + 10 + (10 << 8) + (8 << 16), /* df, dr */ + 196, /* dg */ + 76, /* maxMsgLenBytes */ + 157, /* dm0 */ + 1761, /* 2^c - (2^c mod N) */ + 11, /* c */ + 1, /* lLen */ + 7, /* min. no. of hash calls for IGF-2 */ + 7, /* min. no. of hash calls for MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ + }, +}; + +static size_t numParamSets = + sizeof(ntruParamSets) / sizeof(NTRU_ENCRYPT_PARAM_SET); + +/* functions */ + +/* ntru_encrypt_get_params_with_id + * + * Looks up a set of NTRUEncrypt parameters based on the id of the + * parameter set. + * + * Returns a pointer to the parameter set parameters if successful. + * Returns NULL if the parameter set cannot be found. + */ + +NTRU_ENCRYPT_PARAM_SET * +ntru_encrypt_get_params_with_id( + NTRU_ENCRYPT_PARAM_SET_ID id) /* in - parameter-set id */ +{ + size_t i; + + for (i = 0; i < numParamSets; i++) { + if (ntruParamSets[i].id == id) { + return &(ntruParamSets[i]); + } + } + + return NULL; +} + +/* ntru_encrypt_get_params_with_OID + * + * Looks up a set of NTRUEncrypt parameters based on the OID of the + * parameter set. + * + * Returns a pointer to the parameter set parameters if successful. + * Returns NULL if the parameter set cannot be found. + */ + +NTRU_ENCRYPT_PARAM_SET * +ntru_encrypt_get_params_with_OID( + uint8_t const *oid) /* in - pointer to parameter-set OID */ +{ + size_t i; + + for (i = 0; i < numParamSets; i++) { + if (!memcmp(ntruParamSets[i].OID, oid, 3)) { + return &(ntruParamSets[i]); + } + } + + return NULL; +} + +/* ntru_encrypt_get_params_with_DER_id + * + * Looks up a set of NTRUEncrypt parameters based on the DER id of the + * parameter set. + * + * Returns a pointer to the parameter set parameters if successful. + * Returns NULL if the parameter set cannot be found. + */ + +NTRU_ENCRYPT_PARAM_SET * +ntru_encrypt_get_params_with_DER_id( + uint8_t der_id) /* in - parameter-set DER id */ +{ + size_t i; + + for (i = 0; i < numParamSets; i++) { + if (ntruParamSets[i].der_id == der_id) { + return &(ntruParamSets[i]); + } + } + return NULL; +} + +const char * +ntru_encrypt_get_param_set_name( + NTRU_ENCRYPT_PARAM_SET_ID id) /* in - parameter-set id */ +{ + size_t i; + + for (i = 0; i < numParamSets; i++) { + if (ntruParamSets[i].id == id) { + return ntruParamSets[i].name; + } + } + + return NULL; +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_param_sets.h b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_param_sets.h new file mode 100644 index 0000000000000000000000000000000000000000..780f0ed262d311d15dbfbe64a8906e0b6edc37c6 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_param_sets.h @@ -0,0 +1,119 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_ntru_encrypt_param_sets.h + * + * Contents: Definitions and declarations for the NTRUEncrypt parameter sets. + * + *****************************************************************************/ + +#ifndef NTRU_CRYPTO_NTRU_ENCRYPT_PARAM_SETS_H +#define NTRU_CRYPTO_NTRU_ENCRYPT_PARAM_SETS_H + +#include "ntru_crypto.h" +#include "ntru_crypto_hash_basics.h" + +/* structures */ + +typedef struct _NTRU_ENCRYPT_PARAM_SET { + NTRU_ENCRYPT_PARAM_SET_ID id; /* parameter-set ID */ + const char *name; /* human readable param set name */ + uint8_t const OID[3]; /* pointer to OID */ + uint8_t der_id; /* parameter-set DER id */ + uint8_t N_bits; /* no. of bits in N (i.e. in + an index */ + uint16_t N; /* ring dimension */ + uint16_t sec_strength_len; /* no. of octets of + security strength */ + uint16_t b_len; /* no. of octets for random + string b */ + uint16_t q; /* big modulus */ + uint8_t q_bits; /* no. of bits in q (i.e. in + a coefficient */ + bool is_product_form; /* if product form used */ + uint32_t dF_r; /* no. of 1 or -1 coefficients + in ring elements F, r */ + uint16_t dg; /* no. - 1 of 1 coefficients + or no. of -1 coefficients + in ring element g */ + uint16_t m_len_max; /* max no. of plaintext + octets */ + uint16_t min_msg_rep_wt; /* min. message + representative weight */ + uint16_t no_bias_limit; /* limit for no bias in + IGF-2 */ + uint8_t c_bits; /* no. bits in candidate for + deriving an index in + IGF-2 */ + uint8_t m_len_len; /* no. of octets to hold + mLenOctets */ + uint8_t min_IGF_hash_calls; /* min. no. of hash calls for + IGF-2 */ + uint8_t min_MGF_hash_calls; /* min. no. of hash calls for + MGF-TP-1 */ + NTRU_CRYPTO_HASH_ALGID hash_algid; /* hash function for MGF-TP-1, + HMAC-DRBG, etc. */ +} NTRU_ENCRYPT_PARAM_SET; + +/* function declarations */ + +/* ntru_encrypt_get_params_with_id + * + * Looks up a set of NTRU Encrypt parameters based on the id of the + * parameter set. + * + * Returns a pointer to the parameter set parameters if successful. + * Returns NULL if the parameter set cannot be found. + */ + +extern NTRU_ENCRYPT_PARAM_SET * +ntru_encrypt_get_params_with_id( + NTRU_ENCRYPT_PARAM_SET_ID id); /* in - parameter-set id */ + +/* ntru_encrypt_get_params_with_OID + * + * Looks up a set of NTRU Encrypt parameters based on the OID of the + * parameter set. + * + * Returns a pointer to the parameter set parameters if successful. + * Returns NULL if the parameter set cannot be found. + */ + +extern NTRU_ENCRYPT_PARAM_SET * +ntru_encrypt_get_params_with_OID( + uint8_t const *oid); /* in - pointer to parameter-set OID */ + +/* ntru_encrypt_get_params_with_DER_id + * + * Looks up a set of NTRUEncrypt parameters based on the DER id of the + * parameter set. + * + * Returns a pointer to the parameter set parameters if successful. + * Returns NULL if the parameter set cannot be found. + */ + +extern NTRU_ENCRYPT_PARAM_SET * +ntru_encrypt_get_params_with_DER_id( + uint8_t der_id); /* in - parameter-set DER id */ + +#endif /* NTRU_CRYPTO_NTRU_ENCRYPT_PARAM_SETS_H */ diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mgf1.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mgf1.c new file mode 100644 index 0000000000000000000000000000000000000000..4660ac5b7c61f91e48f76374f0bf22f6ec850ccc --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mgf1.c @@ -0,0 +1,193 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_ntru_mgf1.c + * + * Contents: Routines implementing MGF-TP-1 and MGF-1. + * + *****************************************************************************/ + +#include "ntru_crypto.h" +#include "ntru_crypto_ntru_mgf1.h" +#include "ntru_crypto_ntru_convert.h" + +/* ntru_mgf1 + * + * Implements a basic mask-generation function, generating an arbitrary + * number of octets based on hashing a digest-length string concatenated + * with a 4-octet counter. + * + * The state (string and counter) is initialized when a seed is present. + * + * Returns NTRU_OK if successful. + * Returns NTRU_CRYPTO_HASH_ errors if they occur. + * + */ + +uint32_t +ntru_mgf1( + uint8_t *state, /* in/out - pointer to the state */ + NTRU_CRYPTO_HASH_ALGID algid, /* in - hash algorithm ID */ + uint8_t md_len, /* in - no. of octets in digest */ + uint8_t num_calls, /* in - no. of hash calls */ + uint16_t seed_len, /* in - no. of octets in seed */ + uint8_t const *seed, /* in - pointer to seed */ + uint8_t *out) /* out - address for output */ +{ + uint8_t *ctr = state + md_len; + uint32_t retcode; + + /* if seed present, init state */ + + if (seed) { + if ((retcode = ntru_crypto_hash_digest(algid, seed, seed_len, state)) != + NTRU_CRYPTO_HASH_OK) { + return retcode; + } + + memset(ctr, 0, 4); + } + + /* generate output */ + + while (num_calls-- > 0) { + if ((retcode = ntru_crypto_hash_digest(algid, state, md_len + 4, + out)) != NTRU_CRYPTO_HASH_OK) { + return retcode; + } + + out += md_len; + + /* increment counter */ + + if (++ctr[3] == 0) { + if (++ctr[2] == 0) { + if (++ctr[1] == 0) { + ++ctr[0]; + } + } + } + } + + NTRU_RET(NTRU_OK); +} + +/* ntru_mgftp1 + * + * Implements a mask-generation function for trinary polynomials, + * MGF-TP-1, generating an arbitrary number of octets based on hashing + * a digest-length string concatenated with a 4-octet counter. From + * these octets, N trits are derived. + * + * The state (string and counter) is initialized when a seed is present. + * + * Returns NTRU_OK if successful. + * Returns NTRU_CRYPTO_HASH_ errors if they occur. + * + */ + +uint32_t +ntru_mgftp1( + NTRU_CRYPTO_HASH_ALGID hash_algid, /* in - hash alg ID for + MGF-TP-1 */ + uint8_t md_len, /* in - no. of octets in + digest */ + uint8_t min_calls, /* in - minimum no. of hash + calls */ + uint16_t seed_len, /* in - no. of octets in seed */ + uint8_t *seed, /* in - pointer to seed */ + uint8_t *buf, /* in - pointer to working + buffer */ + uint16_t num_trits_needed, /* in - no. of trits in mask */ + uint8_t *mask) /* out - address for mask trits */ +{ + uint8_t *mgf_out; + uint8_t *octets; + uint16_t octets_available; + uint32_t retcode; + + /* generate minimum MGF1 output */ + + mgf_out = buf + md_len + 4; + if ((retcode = ntru_mgf1(buf, hash_algid, md_len, min_calls, + seed_len, seed, mgf_out)) != NTRU_OK) { + return retcode; + } + + octets = mgf_out; + octets_available = min_calls * md_len; + + /* get trits for mask */ + + while (num_trits_needed >= 5) { + /* get another octet and convert it to 5 trits */ + + if (octets_available == 0) { + if ((retcode = ntru_mgf1(buf, hash_algid, md_len, 1, + 0, NULL, mgf_out)) != NTRU_OK) { + return retcode; + } + + octets = mgf_out; + octets_available = md_len; + } + + if (*octets < 243) { + ntru_octet_2_trits(*octets, mask); + mask += 5; + num_trits_needed -= 5; + } + + octets++; + --octets_available; + } + + /* get any remaining trits */ + + while (num_trits_needed) { + uint8_t trits[5]; + + /* get another octet and convert it to remaining trits */ + + if (octets_available == 0) { + if ((retcode = ntru_mgf1(buf, hash_algid, md_len, 1, + 0, NULL, mgf_out)) != NTRU_OK) { + return retcode; + } + + octets = mgf_out; + octets_available = md_len; + } + + if (*octets < 243) { + ntru_octet_2_trits(*octets, trits); + memcpy(mask, trits, num_trits_needed); + num_trits_needed = 0; + } else { + octets++; + --octets_available; + } + } + + NTRU_RET(NTRU_OK); +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mgf1.h b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mgf1.h new file mode 100644 index 0000000000000000000000000000000000000000..546d4bf3e5eccd2760a6b367fc8d46c589a82850 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mgf1.h @@ -0,0 +1,90 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_ntru_mgf1.h + * + * Contents: Public header file for MGF-1 in the NTRU algorithm. + * + *****************************************************************************/ + +#ifndef NTRU_CRYPTO_NTRU_MGF1_H +#define NTRU_CRYPTO_NTRU_MGF1_H + +#include "ntru_crypto.h" +#include "ntru_crypto_hash.h" + +/* function declarations */ + +/* ntru_mgf1 + * + * Implements a basic mask-generation function, generating an arbitrary + * number of octets based on hashing a digest-length string concatenated + * with a 4-octet counter. + * + * The state (string and counter) is initialized when a seed is present. + * + * Returns NTRU_OK if successful. + * Returns NTRU_CRYPTO_HASH_ errors if they occur. + * + */ + +extern uint32_t +ntru_mgf1( + uint8_t *state, /* in/out - pointer to the state */ + NTRU_CRYPTO_HASH_ALGID algid, /* in - hash algorithm ID */ + uint8_t md_len, /* in - no. of octets in digest */ + uint8_t num_calls, /* in - no. of hash calls */ + uint16_t seed_len, /* in - no. of octets in seed */ + uint8_t const *seed, /* in - pointer to seed */ + uint8_t *out); /* out - address for output */ + +/* ntru_mgftp1 + * + * Implements a mask-generation function for trinary polynomials, + * MGF-TP-1, generating an arbitrary number of octets based on hashing + * a digest-length string concatenated with a 4-octet counter. From + * these octets, N trits are derived. + * + * The state (string and counter) is initialized when a seed is present. + * + * Returns NTRU_OK if successful. + * Returns NTRU_CRYPTO_HASH_ errors if they occur. + * + */ + +extern uint32_t +ntru_mgftp1( + NTRU_CRYPTO_HASH_ALGID hash_algid, /* in - hash alg ID for + MGF-TP-1 */ + uint8_t md_len, /* in - no. of octets in + digest */ + uint8_t min_calls, /* in - minimum no. of hash + calls */ + uint16_t seed_len, /* in - no. of octets in seed */ + uint8_t *seed, /* in - pointer to seed */ + uint8_t *buf, /* in - pointer to working + buffer */ + uint16_t num_trits_needed, /* in - no. of trits in mask */ + uint8_t *mask); /* out - address for mask trits */ + +#endif /* NTRU_CRYPTO_NTRU_MGF1_H */ diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_coeffs_karat.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_coeffs_karat.c new file mode 100644 index 0000000000000000000000000000000000000000..1fa8e7e519761cbbc8cbb5b302cede15178c7495 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_coeffs_karat.c @@ -0,0 +1,137 @@ +#include "ntru_crypto.h" +#include "ntru_crypto_ntru_poly.h" + +#define PAD(N) ((N + 0x000f) & 0xfff0) + +static void +grade_school_mul( + uint16_t *res1, /* out - a * b in Z[x], must be length 2N */ + uint16_t const *a, /* in - polynomial */ + uint16_t const *b, /* in - polynomial */ + uint16_t const N) /* in - number of coefficients in a and b */ +{ + uint16_t i; + uint16_t j; + + for (j = 0; j < N; j++) { + res1[j] = a[0] * b[j]; + } + for (i = 1; i < N; i++) { + res1[i + N - 1] = 0; + for (j = 0; j < N; j++) { + res1[i + j] += a[i] * b[j]; + } + } + res1[2 * N - 1] = 0; + + return; +} + +static void +karatsuba( + uint16_t *res1, /* out - a * b in Z[x], must be length 2k */ + uint16_t *tmp1, /* in - k coefficients of scratch space */ + uint16_t const *a, /* in - polynomial */ + uint16_t const *b, /* in - polynomial */ + uint16_t const k) /* in - number of coefficients in a and b */ +{ + uint16_t i; + + uint16_t const p = k >> 1; + + uint16_t *res2; + uint16_t *res3; + uint16_t *res4; + uint16_t *tmp2; + uint16_t const *a2; + uint16_t const *b2; + + /* Grade school multiplication for small / odd inputs */ + if (k <= 38 || (k & 1) != 0) { + grade_school_mul(res1, a, b, k); + return; + } + + res2 = res1 + p; + res3 = res1 + k; + res4 = res1 + k + p; + tmp2 = tmp1 + p; + a2 = a + p; + b2 = b + p; + + for (i = 0; i < p; i++) { + res1[i] = a[i] - a2[i]; + res2[i] = b2[i] - b[i]; + } + + karatsuba(tmp1, res3, res1, res2, p); + + karatsuba(res3, res1, a2, b2, p); + + for (i = 0; i < p; i++) { + tmp1[i] += res3[i]; + } + + for (i = 0; i < p; i++) { + res2[i] = tmp1[i]; + tmp2[i] += res4[i]; + res3[i] += tmp2[i]; + } + + karatsuba(tmp1, res1, a, b, p); + + for (i = 0; i < p; i++) { + res1[i] = tmp1[i]; + res2[i] += tmp1[i] + tmp2[i]; + res3[i] += tmp2[i]; + } + + return; +} + +void ntru_ring_mult_coefficients_memreq( + uint16_t N, + uint16_t *tmp_polys, + uint16_t *poly_coeffs) { + if (tmp_polys) { + *tmp_polys = 3; + } + + if (poly_coeffs) { + *poly_coeffs = PAD(N); + } +} + +/* ntru_ring_mult_coefficients + * + * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b" + * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1). + * This is a convolution operation. + * + * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum + * beyond 16 bits does not matter. + */ + +void ntru_ring_mult_coefficients( + uint16_t const *a, /* in - pointer to polynomial a */ + uint16_t const *b, /* in - pointer to polynomial b */ + uint16_t N, /* in - degree of (x^N - 1) */ + uint16_t q, /* in - large modulus */ + uint16_t *tmp, /* in - temp buffer of 3*padN elements */ + uint16_t *c) /* out - address for polynomial c */ +{ + uint16_t i; + uint16_t q_mask = q - 1; + + memset(tmp, 0, 3 * PAD(N) * sizeof(uint16_t)); + karatsuba(tmp, tmp + 2 * PAD(N), a, b, PAD(N)); + + for (i = 0; i < N; i++) { + c[i] = (tmp[i] + tmp[i + N]) & q_mask; + } + for (; i < PAD(N); i++) { + c[i] = 0; + } + + return; +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_coeffs_simd.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_coeffs_simd.c new file mode 100644 index 0000000000000000000000000000000000000000..88c0177b7f47ded8edb47db4c492ad12f05a5e9d --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_coeffs_simd.c @@ -0,0 +1,131 @@ +#include "ntru_crypto.h" +#include "ntru_crypto_ntru_poly.h" +#include <immintrin.h> + +#define PAD(N) ((N + 0x0007) & 0xfff8) + +static void +grade_school_mul( + uint16_t *res1, /* out - a * b in Z[x], must be length 2N */ + uint16_t const *a, /* in - polynomial */ + uint16_t const *b, /* in - polynomial */ + uint16_t const N) /* in - number of coefficients in a and b */ +{ + __m128i *T; + + uint16_t i; + uint16_t j; + uint16_t m; + + __m128i ai8; + __m128i ai8h; + __m128i ai8l; + __m128i abroad[8]; + + __m128i cur; + __m128i next; + + __m128i x1; + __m128i x2; + + T = (__m128i *) res1; + memset(T, 0, 2 * PAD(N) * sizeof(uint16_t)); + for (i = 0; i < PAD(N) / 8; i++) { + /* Broadcast each of the uint16's at a[8*i] into 8 + copies of that value in a separate __m128i. */ + ai8 = _mm_load_si128((__m128i *) a + i); + ai8h = _mm_unpackhi_epi16(ai8, ai8); + ai8l = _mm_unpacklo_epi16(ai8, ai8); + abroad[0] = _mm_shuffle_epi32(ai8h, 0xFFFF); + abroad[1] = _mm_shuffle_epi32(ai8h, 0xAAAA); + abroad[2] = _mm_shuffle_epi32(ai8h, 0x5555); + abroad[3] = _mm_shuffle_epi32(ai8h, 0x0000); + + abroad[4] = _mm_shuffle_epi32(ai8l, 0xFFFF); + abroad[5] = _mm_shuffle_epi32(ai8l, 0xAAAA); + abroad[6] = _mm_shuffle_epi32(ai8l, 0x5555); + abroad[7] = _mm_shuffle_epi32(ai8l, 0x0000); + + /* Load a 256 bit section of b. + Shift it down by 2*(m+1) bytes and multiply the + low 128 bits by abroad[m]. Add all 8 of these + values to T[i+j]. */ + cur = _mm_setzero_si128(); + for (j = 0; j < PAD(N) / 8; j++) { + next = _mm_load_si128((__m128i *) b + j); + + x2 = _mm_xor_si128(x2, x2); + for (m = 0; m < 8; m++) { + cur = _mm_alignr_epi8(next, cur, 2); + next = _mm_srli_si128(next, 2); + + x1 = _mm_mullo_epi16(cur, abroad[m]); + x2 = _mm_add_epi16(x2, x1); + } + x2 = _mm_add_epi16(x2, _mm_load_si128(T + i + j)); + _mm_store_si128(T + i + j, x2); + } + + /* Handle the last N&7 coefficients from a */ + x2 = _mm_xor_si128(x2, x2); + for (m = 0; m < (N & 7); m++) { + cur = _mm_srli_si128(cur, 2); + + x1 = _mm_mullo_epi16(cur, abroad[m]); + x2 = _mm_add_epi16(x2, x1); + } + _mm_store_si128(T + i + j, x2); + } + + return; +} + +/* To multiply polynomials mod x^N - 1 this mult_coefficients implementation + * needs scratch space of size num_polys * num_coeffs * sizeof(uint16_t) */ +void ntru_ring_mult_coefficients_memreq( + uint16_t N, + uint16_t *num_polys, + uint16_t *num_coeffs) { + if (num_polys) { + *num_polys = 2; + } + + if (num_coeffs) { + *num_coeffs = PAD(N); + } +} + +/* ntru_ring_mult_coefficients + * + * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b" + * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1). + * This is a convolution operation. + * + * Ring element "b" has coefficients in the range [0,N). + * + * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum + * beyond 16 bits does not matter. + */ + +void ntru_ring_mult_coefficients( + uint16_t const *a, /* in - pointer to polynomial a */ + uint16_t const *b, /* in - pointer to polynomial b */ + uint16_t N, /* in - degree of (x^N - 1) */ + uint16_t q, /* in - large modulus */ + uint16_t *tmp, /* in - temp buffer of 3*PAD(N) elements */ + uint16_t *c) /* out - address for polynomial c */ +{ + uint16_t i; + uint16_t q_mask = q - 1; + + grade_school_mul(tmp, a, b, N); + + for (i = 0; i < N; i++) { + c[i] = (tmp[i] + tmp[i + N]) & q_mask; + } + for (; i < PAD(N); i++) { + c[i] = 0; + } + + return; +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices.c new file mode 100644 index 0000000000000000000000000000000000000000..f532d0172ce2927708cec764b00a58513028b573 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices.c @@ -0,0 +1,98 @@ +#include "ntru_crypto.h" +#include "ntru_crypto_ntru_poly.h" + +void ntru_ring_mult_indices_memreq( + uint16_t N, + uint16_t *tmp_polys, + uint16_t *poly_coeffs) { + if (tmp_polys) { + *tmp_polys = 1; + } + + if (poly_coeffs) { + *poly_coeffs = N; + } +} + +/* ntru_ring_mult_indices + * + * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b" + * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1). + * This is a convolution operation. + * + * Ring element "b" is a sparse trinary polynomial with coefficients -1, 0, + * and 1. It is specified by a list, bi, of its nonzero indices containing + * indices for the bi_P1_len +1 coefficients followed by the indices for the + * bi_M1_len -1 coefficients. + * The indices are in the range [0,N). + * + * The result array "c" may share the same memory space as input array "a", + * input array "b", or temp array "t". + * + * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum + * beyond 16 bits does not matter. + */ + +void ntru_ring_mult_indices( + uint16_t const *a, /* in - pointer to ring element a */ + uint16_t const bi_P1_len, /* in - no. of +1 coefficients in b */ + uint16_t const bi_M1_len, /* in - no. of -1 coefficients in b */ + uint16_t const *bi, /* in - pointer to the list of nonzero + indices of ring element b, + containing indices for the +1 + coefficients followed by the + indices for -1 coefficients */ + uint16_t const N, /* in - no. of coefficients in a, b, c */ + uint16_t const q, /* in - large modulus */ + uint16_t *t, /* in - temp buffer of N elements */ + uint16_t *c) /* out - address for polynomial c */ +{ + uint16_t mod_q_mask = q - 1; + uint16_t i, j, k; + + /* t[(i+k)%N] = sum i=0 through N-1 of a[i], for b[k] = -1 */ + + for (k = 0; k < N; k++) { + t[k] = 0; + } + + for (j = bi_P1_len; j < bi_P1_len + bi_M1_len; j++) { + k = bi[j]; + + for (i = 0; k < N; ++i, ++k) { + t[k] = t[k] + a[i]; + } + + for (k = 0; i < N; ++i, ++k) { + t[k] = t[k] + a[i]; + } + } + + /* t[(i+k)%N] = -(sum i=0 through N-1 of a[i] for b[k] = -1) */ + + for (k = 0; k < N; k++) { + t[k] = -t[k]; + } + + /* t[(i+k)%N] += sum i=0 through N-1 of a[i] for b[k] = +1 */ + + for (j = 0; j < bi_P1_len; j++) { + k = bi[j]; + + for (i = 0; k < N; ++i, ++k) { + t[k] = t[k] + a[i]; + } + + for (k = 0; i < N; ++i, ++k) { + t[k] = t[k] + a[i]; + } + } + + /* c = (a * b) mod q */ + + for (k = 0; k < N; k++) { + c[k] = t[k] & mod_q_mask; + } + + return; +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_32.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_32.c new file mode 100644 index 0000000000000000000000000000000000000000..6712444071f2bbb9dcdbbb69b18115d3d3094a64 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_32.c @@ -0,0 +1,152 @@ +#include "ntru_crypto.h" +#include "ntru_crypto_ntru_poly.h" + +/* ntru_ring_mult_indices + * + * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b" + * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1). + * This is a convolution operation. + * + * Ring element "b" is a sparse trinary polynomial with coefficients -1, 0, + * and 1. It is specified by a list, bi, of its nonzero indices containing + * indices for the bi_P1_len +1 coefficients followed by the indices for the + * bi_M1_len -1 coefficients. + * The indices are in the range [0,N). + * + * The result array "c" may share the same memory space as input array "a", + * input array "b", or temp array "t". + * + * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum + * beyond 16 bits does not matter. + */ +void ntru_ring_mult_indices( + uint16_t const *a, /* in - pointer to ring element a */ + uint16_t const bi_P1_len, /* in - no. of +1 coefficients in b */ + uint16_t const bi_M1_len, /* in - no. of -1 coefficients in b */ + uint16_t const *bi, /* in - pointer to the list of nonzero + indices of ring element b, + containing indices for the +1 + coefficients followed by the + indices for -1 coefficients */ + uint16_t const N, /* in - no. of coefficients in a, b, c */ + uint16_t const q, /* in - large modulus */ + uint16_t *t, /* in - temp buffer of N elements */ + uint16_t *c) /* out - address for polynomial c */ +{ + uint16_t mod_q_mask; + uint32_t mask_interval; + uint16_t iA, iT, iB; /* Loop variables for the relevant arrays */ + uint16_t mask_time; + uint16_t end; + + uint32_t tmp1; + uint32_t tmp2; + + end = N & 0xfffe; /* 4 * floor((N-i)/4) */ + + mod_q_mask = q - 1; + mask_interval = ((1 << 16) / q); + mask_time = 0; + + /* t[(i+k)%N] = sum i=0 through N-1 of a[i], for b[k] = -1 */ + memset(t, 0, N * sizeof(uint16_t)); + for (iB = bi_P1_len; iB < bi_P1_len + bi_M1_len; iB++) { + /* first half -- iT from bi[iB] to N + iA from 0 to N - bi[iB] */ + iT = bi[iB]; + + for (iA = 0; iT < end; iA += 2, iT += 2) { + memcpy(&tmp1, t + iT, sizeof tmp1); + memcpy(&tmp2, a + iA, sizeof tmp2); + tmp1 += tmp2; + memcpy(t + iT, &tmp1, sizeof tmp1); + } + + if (iT < N) { + t[iT] += a[iA]; + iT++; + iA++; + } + + /* second half -- iT from 0 to bi[iB] + iA from bi[iB] to N */ + + for (iT = 0; iA < end; iA += 2, iT += 2) { + memcpy(&tmp1, t + iT, sizeof tmp1); + memcpy(&tmp2, a + iA, sizeof tmp2); + tmp1 += tmp2; + memcpy(t + iT, &tmp1, sizeof tmp1); + } + + if (iA < N) { + t[iT] += a[iA]; + iT++; + iA++; + } + + mask_time++; + if (mask_time == mask_interval) { + for (iT = 0; iT < N; iT++) { + t[iT] &= mod_q_mask; + } + mask_time = 0; + } + } /* for (iB = 0; iB < bi_M1_len; iB++) -- minus-index loop */ + + /* Minus everything */ + for (iT = 0; iT < N; iT++) { + t[iT] = -t[iT]; + t[iT] &= mod_q_mask; + } + mask_time = 0; + + for (iB = 0; iB < bi_P1_len; iB++) { + /* first half -- iT from bi[iB] to N + iA from 0 to N - bi[iB] */ + iT = bi[iB]; + + for (iA = 0; iT < end; iA += 2, iT += 2) { + memcpy(&tmp1, t + iT, sizeof tmp1); + memcpy(&tmp2, a + iA, sizeof tmp2); + tmp1 += tmp2; + memcpy(t + iT, &tmp1, sizeof tmp1); + } + + if (iT < N) { + t[iT] += a[iA]; + iT++; + iA++; + } + + /* second half -- iT from 0 to bi[iB] + iA from bi[iB] to N */ + for (iT = 0; iA < end; iA += 2, iT += 2) { + memcpy(&tmp1, t + iT, sizeof tmp1); + memcpy(&tmp2, a + iA, sizeof tmp2); + tmp1 += tmp2; + memcpy(t + iT, &tmp1, sizeof tmp1); + } + + if (iA < N) { + t[iT] += a[iA]; + iT++; + iA++; + } + + mask_time++; + if (mask_time == mask_interval) { + for (iT = 0; iT < N; iT++) { + t[iT] &= mod_q_mask; + } + mask_time = 0; + } + + } /* for (iB = 0; iB < bi_P1_len; iB++) -- plus-index loop */ + + /* c = (a * b) mod q */ + for (iT = 0; iT < N; iT++) { + c[iT] = t[iT] & mod_q_mask; + } + + return; +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_64.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_64.c new file mode 100644 index 0000000000000000000000000000000000000000..76eb59e32cc10882c75fdadf139c6ea5bd6301b3 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_64.c @@ -0,0 +1,186 @@ +#include "ntru_crypto.h" +#include "ntru_crypto_ntru_poly.h" + +/* ntru_ring_mult_indices + * + * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b" + * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1). + * This is a convolution operation. + * + * Ring element "b" is a sparse trinary polynomial with coefficients -1, 0, + * and 1. It is specified by a list, bi, of its nonzero indices containing + * indices for the bi_P1_len +1 coefficients followed by the indices for the + * bi_M1_len -1 coefficients. + * The indices are in the range [0,N). + * + * The result array "c" may share the same memory space as input array "a", + * input array "b", or temp array "t". + * + * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum + * beyond 16 bits does not matter. + */ + +void ntru_ring_mult_indices_64( + uint16_t const *a, /* in - pointer to ring element a */ + uint16_t bi_P1_len, /* in - no. of +1 coefficients in b */ + uint16_t bi_M1_len, /* in - no. of -1 coefficients in b */ + uint16_t const *bi, /* in - pointer to the list of nonzero + indices of ring element b, + containing indices for the +1 + coefficients followed by the + indices for -1 coefficients */ + uint16_t N, /* in - no. of coefficients in a, b, c */ + uint16_t q, /* in - large modulus */ + uint16_t *t, /* in - temp buffer of N elements */ + uint16_t *c) /* out - address for polynomial c */ +{ + uint16_t i; + uint16_t mod_q_mask; + uint64_t full_mod_q_mask; + uint32_t mask_interval; + uint16_t iA, iT, iB; /* Loop variables for the relevant arrays */ + uint16_t mask_time; + uint16_t oend[4]; + uint16_t end; + uint16_t const Nmod4 = N & 3; + + uint64_t tmp1; + uint64_t tmp2; + + for (i = 0; i < 4; i++) { + oend[i] = (N - i) & 0xfffc; /* 4 * floor((N-i)/4) */ + } + + mod_q_mask = q - 1; + full_mod_q_mask = (mod_q_mask << 16) | mod_q_mask; + full_mod_q_mask |= (full_mod_q_mask << 32); + mask_interval = ((1 << 16) / q); + + /* t[(i+k)%N] = sum i=0 through N-1 of a[i], for b[k] = -1 */ + + mask_time = 0; + + memset(t, 0, N * sizeof(uint16_t)); + for (iB = bi_P1_len; iB < bi_P1_len + bi_M1_len; iB++) { + /* first half -- from iT to N */ + iT = bi[iB]; + end = oend[iT & 3]; + + for (iA = 0; iT < end; iA += 4, iT += 4) { + memcpy(&tmp1, t + iT, sizeof tmp1); + memcpy(&tmp2, a + iA, sizeof tmp2); + tmp1 += tmp2; + memcpy(t + iT, &tmp1, sizeof tmp1); + } + + while (iT < N) { + t[iT] += a[iA]; + iT++; + iA++; + } + + /* second half -- from 0 to start -1 */ + + /* at this point we have used (N-bi[iB + bi_P1_len]) and iA should be + * equal to bi[iB+bi_P1_len]+1. + */ + end = oend[iA & 3]; + + for (iT = 0; iA < end; iA += 4, iT += 4) { + memcpy(&tmp1, t + iT, sizeof tmp1); + memcpy(&tmp2, a + iA, sizeof tmp2); + tmp1 += tmp2; + memcpy(t + iT, &tmp1, sizeof tmp1); + } + + while (iA < N) { + t[iT] += a[iA]; + iT++; + iA++; + } + + mask_time++; + if (mask_time == mask_interval) { + memcpy(&tmp1, t, sizeof tmp1); + tmp1 &= full_mod_q_mask; + memcpy(t, &tmp1, sizeof tmp1); + + end = oend[Nmod4]; + for (iT = Nmod4; iT < end; iT += 4) { + memcpy(&tmp1, t + iT, sizeof tmp1); + tmp1 &= full_mod_q_mask; + memcpy(t + iT, &tmp1, sizeof tmp1); + } + mask_time = 0; + } + } /* for (iB = 0; iB < bi_M1_len; iB++) -- minus-index loop */ + + /* Minus everything */ + for (iT = 0; iT < N; iT++) { + t[iT] = -t[iT]; + t[iT] &= mod_q_mask; + } + mask_time = 0; + + for (iB = 0; iB < bi_P1_len; iB++) { + /* first half -- from iT to N */ + iT = bi[iB]; + end = oend[iT & 3]; + + for (iA = 0; iT < end; iA += 4, iT += 4) { + memcpy(&tmp1, t + iT, sizeof tmp1); + memcpy(&tmp2, a + iA, sizeof tmp1); + tmp1 += tmp2; + memcpy(t + iT, &tmp1, sizeof tmp1); + } + + while (iT < N) { + t[iT] += a[iA]; + iT++; + iA++; + } + + /* second half -- from 0 to start -1 */ + + /* at this point we have used (N-bi[iB + bi_P1_len]) and iA should be + * equal to bi[iB+bi_P1_len]+1. + */ + end = oend[iA & 3]; + + for (iT = 0; iA < end; iA += 4, iT += 4) { + memcpy(&tmp1, t + iT, sizeof tmp1); + memcpy(&tmp2, a + iA, sizeof tmp1); + tmp1 += tmp2; + memcpy(t + iT, &tmp1, sizeof tmp1); + } + + while (iA < N) { + t[iT] += a[iA]; + iT++; + iA++; + } + + mask_time++; + if (mask_time == mask_interval) { + memcpy(&tmp1, t, sizeof tmp1); + tmp1 &= full_mod_q_mask; + memcpy(t, &tmp1, sizeof tmp1); + + end = oend[Nmod4]; + for (iT = Nmod4; iT < end; iT += 4) { + memcpy(&tmp1, t + iT, sizeof tmp1); + tmp1 &= full_mod_q_mask; + memcpy(t + iT, &tmp1, sizeof tmp1); + } + mask_time = 0; + } + + } /* for (iB = 0; iB < bi_P1_len; iB++) -- plus-index loop */ + + /* c = (a * b) mod q */ + for (iT = 0; iT < N; iT++) { + c[iT] = t[iT] & mod_q_mask; + } + + return; +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_simd.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_simd.c new file mode 100644 index 0000000000000000000000000000000000000000..0f971923b4ce1bf887a3c3221e73fde392c0180e --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_simd.c @@ -0,0 +1,149 @@ +#include "ntru_crypto.h" +#include "ntru_crypto_ntru_poly.h" +#include <immintrin.h> + +#define PAD(N) ((N + 0x0007) & 0xfff8) + +void ntru_ring_mult_indices_memreq( + uint16_t N, + uint16_t *tmp_polys, + uint16_t *poly_coeffs) { + if (tmp_polys) { + *tmp_polys = 2; + } + + if (poly_coeffs) { + *poly_coeffs = PAD(N); + } +} + +/* ntru_ring_mult_indices + * + * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b" + * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1). + * This is a convolution operation. + * + * Ring element "b" is a sparse trinary polynomial with coefficients -1, 0, + * and 1. It is specified by a list, bi, of its nonzero indices containing + * indices for the bi_P1_len +1 coefficients followed by the indices for the + * bi_M1_len -1 coefficients. + * The indices are in the range [0,N). + * + * The result array "c" may share the same memory space as input array "a", + * input array "b", or temp array "t". + * + * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum + * beyond 16 bits does not matter. + */ +void ntru_ring_mult_indices( + uint16_t const *a, /* in - pointer to ring element a */ + uint16_t const bi_P1_len, /* in - no. of +1 coefficients in b */ + uint16_t const bi_M1_len, /* in - no. of -1 coefficients in b */ + uint16_t const *bi, /* in - pointer to the list of nonzero + indices of ring element b, + containing indices for the +1 + coefficients followed by the + indices for -1 coefficients */ + uint16_t const N, /* in - no. of coefficients in a, b, c */ + uint16_t const q, /* in - large modulus */ + uint16_t *t, /* in - temp buffer of N elements */ + uint16_t *c) /* out - address for polynomial c */ +{ + __m128i *T; + __m128i *Tp; + __m128i *Ti; + + uint16_t i; + uint16_t j; + uint16_t k; + uint16_t m; + uint16_t const mod_q_mask = q - 1; + + __m128i a0s[8]; + __m128i aNs[8]; + + __m128i neg; + __m128i x0; + __m128i x1; + __m128i x2; + __m128i x3; + __m128i x4; + + T = (__m128i *) t; + memset(T, 0, 2 * PAD(N) * sizeof(uint16_t)); + + a0s[0] = _mm_lddqu_si128((__m128i *) a); + aNs[0] = _mm_lddqu_si128((__m128i *) (a + N - 8)); + for (i = 1; i < 8; i++) { + a0s[i] = _mm_slli_si128(a0s[i - 1], 2); + aNs[i] = _mm_srli_si128(aNs[i - 1], 2); + } + + for (i = bi_P1_len; i < bi_P1_len + bi_M1_len; i++) { + k = bi[i]; + m = k & 7; + k /= 8; + Tp = T + k; + x2 = _mm_add_epi16(*Tp, a0s[m]); + _mm_store_si128(Tp, x2); + Tp++; + for (j = 8 - m; j <= (N - 8); j += 8) { + x3 = _mm_lddqu_si128((__m128i *) &a[j]); + x2 = _mm_add_epi16(*Tp, x3); + _mm_store_si128(Tp, x2); + Tp++; + } + if (j == N) + continue; + x2 = _mm_add_epi16(*Tp, aNs[j - (N - 8)]); + _mm_store_si128(Tp, x2); + } + + neg = _mm_setzero_si128(); + neg = _mm_cmpeq_epi8(neg, neg); + Tp = T; + for (i = 0; i < (2 * PAD(N)) / 8; i++) { + x1 = _mm_sign_epi16(*Tp, neg); + _mm_store_si128(Tp, x1); + Tp++; + } + + for (i = 0; i < bi_P1_len; i++) { + k = bi[i]; + m = k & 7; + k /= 8; + Tp = T + k; + x2 = _mm_add_epi16(*Tp, a0s[m]); + _mm_store_si128(Tp, x2); + Tp++; + for (j = 8 - m; j <= (N - 8); j += 8) { + x3 = _mm_lddqu_si128((__m128i *) &a[j]); + x2 = _mm_add_epi16(*Tp, x3); + _mm_store_si128(Tp, x2); + Tp++; + } + if (j == N) + continue; + x2 = _mm_add_epi16(*Tp, aNs[j - (N - 8)]); + _mm_store_si128(Tp, x2); + } + + Ti = T; + Tp = (__m128i *) (((uint16_t *) T) + N); + x0 = _mm_set1_epi16(mod_q_mask); + for (j = 0; j < N; j += 8) { + x1 = _mm_load_si128(Ti); + x2 = _mm_lddqu_si128(Tp); + x3 = _mm_add_epi16(x1, x2); + x4 = _mm_and_si128(x3, x0); + _mm_store_si128(Ti, x4); + Ti++; + Tp++; + } + memmove(c, T, N * sizeof(uint16_t)); + for (j = N; j < PAD(N); j++) { + c[j] = 0; + } + + return; +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_poly.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_poly.c new file mode 100644 index 0000000000000000000000000000000000000000..60389dd680d1dedba01b1819a42d5ad603d08cdb --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_poly.c @@ -0,0 +1,547 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_ntru_poly.c + * + * Contents: Routines for generating and operating on polynomials in the + * NTRU algorithm. + * + *****************************************************************************/ + +#include "ntru_crypto.h" +#include "ntru_crypto_ntru_poly.h" +#include "ntru_crypto_ntru_mgf1.h" + +/* ntru_gen_poly + * + * Generates polynomials by creating for each polynomial, a list of the + * indices of the +1 coefficients followed by a list of the indices of + * the -1 coefficients. + * + * If a single polynomial is generated (non-product form), indices_counts + * contains a single value of the total number of indices (for +1 and -1 + * comefficients combined). + * + * If multiple polynomials are generated (for product form), their lists of + * indices are sequentially stored in the indices buffer. Each byte of + * indices_counts contains the total number of indices (for +1 and -1 + * coefficients combined) for a single polynomial, beginning with the + * low-order byte for the first polynomial. The high-order byte is unused. + * + * Returns NTRU_OK if successful. + * Returns HASH_BAD_ALG if the algorithm is not supported. + * + */ + +uint32_t +ntru_gen_poly( + NTRU_CRYPTO_HASH_ALGID hash_algid, /* in - hash algorithm ID for + IGF-2 */ + uint8_t md_len, /* in - no. of octets in digest */ + uint8_t min_calls, /* in - minimum no. of hash + calls */ + uint16_t seed_len, /* in - no. of octets in seed */ + uint8_t *seed, /* in - pointer to seed */ + uint8_t *buf, /* in - pointer to working + buffer */ + uint16_t N, /* in - max index + 1 */ + uint8_t c_bits, /* in - no. bits for candidate */ + uint16_t limit, /* in - conversion to index + limit */ + bool is_product_form, /* in - if generating multiple + polys */ + uint32_t indices_counts, /* in - nos. of indices needed */ + uint16_t *indices) /* out - address for indices */ +{ + uint8_t *mgf_out; + uint8_t *octets; + uint8_t *used; + uint8_t num_polys; + uint16_t num_indices; + uint16_t octets_available; + uint16_t index_cnt = 0; + uint8_t left = 0; + uint8_t num_left = 0; + uint32_t retcode; + + /* generate minimum MGF1 output */ + + mgf_out = buf + md_len + 4; + if ((retcode = ntru_mgf1(buf, hash_algid, md_len, min_calls, + seed_len, seed, mgf_out)) != NTRU_OK) { + return retcode; + } + + octets = mgf_out; + octets_available = min_calls * md_len; + + /* init indices counts for number of polynomials being generated */ + + if (is_product_form) { + /* number of indices for poly1 is in low byte of indices_counts, + * number of indices for poly2 and poly3 are in next higher bytes + */ + + num_polys = 3; + num_indices = (uint16_t)(indices_counts & 0xff); + indices_counts >>= 8; + + } else { + /* number of bytes for poly is in low 16 bits of indices_counts */ + + num_polys = 1; + num_indices = (uint16_t) indices_counts; + } + + /* init used-index array */ + + used = mgf_out + octets_available; + memset(used, 0, N); + + /* generate indices (IGF-2) for all polynomials */ + + while (num_polys > 0) { + + /* generate indices for a single polynomial */ + + while (index_cnt < num_indices) { + uint16_t index; + uint8_t num_needed; + + /* form next index to convert to an index */ + + do { + /* use any leftover bits first */ + + if (num_left != 0) { + index = left << (c_bits - num_left); + } else { + index = 0; + } + + /* get the rest of the bits needed from new octets */ + + num_needed = c_bits - num_left; + while (num_needed != 0) { + /* get another octet */ + + if (octets_available == 0) { + if ((retcode = ntru_mgf1(buf, hash_algid, md_len, 1, + 0, NULL, mgf_out)) != NTRU_OK) { + return retcode; + } + + octets = mgf_out; + octets_available = md_len; + } + left = *octets++; + --octets_available; + + if (num_needed <= 8) { + /* all bits needed to fill the index are in this octet */ + + index |= ((uint16_t)(left)) >> (8 - num_needed); + num_left = 8 - num_needed; + num_needed = 0; + left &= 0xff >> (8 - num_left); + + } else { + /* another octet will be needed after using this + * whole octet + */ + + index |= ((uint16_t) left) << (num_needed - 8); + num_needed -= 8; + } + } + } while (index >= limit); + + /* form index and check if unique */ + + index %= N; + + if (!used[index]) { + used[index] = 1; + indices[index_cnt] = index; + ++index_cnt; + } + } + --num_polys; + + /* init for next polynomial if another polynomial to be generated */ + + if (num_polys > 0) { + memset(used, 0, N); + num_indices = num_indices + + (uint16_t)(indices_counts & 0xff); + indices_counts >>= 8; + } + } + + NTRU_RET(NTRU_OK); +} + +/* ntru_poly_check_min_weight + * + * Checks that the number of 0, +1, and -1 trinary ring elements meet or exceed + * a minimum weight. + */ + +bool ntru_poly_check_min_weight( + uint16_t num_els, /* in - degree of polynomial */ + uint8_t *ringels, /* in - pointer to trinary ring elements */ + uint16_t min_wt) /* in - minimum weight */ +{ + uint16_t wt[3]; + uint16_t i; + + wt[0] = wt[1] = wt[2] = 0; + + for (i = 0; i < num_els; i++) { + ++wt[ringels[i]]; + } + + if ((wt[0] < min_wt) || (wt[1] < min_wt) || (wt[2] < min_wt)) { + return FALSE; + } + + return TRUE; +} + +/* ntru_ring_mult_product_indices + * + * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b" + * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1). + * This is a convolution operation. + * + * Ring element "b" is represented by the product form b1 * b2 + b3, where + * b1, b2, and b3 are each a sparse trinary polynomial with coefficients -1, + * 0, and 1. It is specified by a list, bi, of the nonzero indices of b1, b2, + * and b3, containing the indices for the +1 coefficients followed by the + * indices for the -1 coefficients for each polynomial in that order. + * The indices are in the range [0,N). + * + * The result array "c" may share the same memory space as input array "a", + * or input array "b". + * + * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum + * beyond 16 bits does not matter. + */ + +void ntru_ring_mult_product_indices( + uint16_t const *a, /* in - pointer to ring element a */ + uint16_t const b1i_len, /* in - no. of +1 or -1 coefficients in b1 */ + uint16_t const b2i_len, /* in - no. of +1 or -1 coefficients in b2 */ + uint16_t const b3i_len, /* in - no. of +1 or -1 coefficients in b3 */ + uint16_t const *bi, /* in - pointer to the list of nonzero + indices of polynomials b1, b2, b3, + containing indices for the +1 + coefficients followed by the + indices for -1 coefficients for + each polynomial */ + uint16_t const N, /* in - no. of coefficients in a, b, c */ + uint16_t const q, /* in - large modulus */ + uint16_t *t, /* in - temp buffer of 2N elements */ + uint16_t *c) /* out - address for polynomial c */ +{ + uint16_t scratch_polys; + uint16_t poly_coeffs; + uint16_t *t2; + uint16_t mod_q_mask; + uint16_t i; + + ntru_ring_mult_indices_memreq(N, &scratch_polys, &poly_coeffs); + t2 = t + scratch_polys * poly_coeffs; + mod_q_mask = q - 1; + + /* t2 = a * b1 */ + + ntru_ring_mult_indices(a, b1i_len, b1i_len, bi, N, q, t, t2); + + /* t2 = (a * b1) * b2 */ + + ntru_ring_mult_indices(t2, b2i_len, b2i_len, bi + (b1i_len << 1), N, q, + t, t2); + + /* t = a * b3 */ + + ntru_ring_mult_indices(a, b3i_len, b3i_len, + bi + ((b1i_len + b2i_len) << 1), N, q, t, t); + + /* c = (a * b1 * b2) + (a * b3) */ + + for (i = 0; i < N; i++) { + c[i] = (t2[i] + t[i]) & mod_q_mask; + } + for (; i < poly_coeffs; i++) { + c[i] = 0; + } + + return; +} + +/* ntru_ring_inv + * + * Finds the inverse of a polynomial, a, in (Z/2Z)[X]/(X^N - 1). + */ + +bool ntru_ring_inv( + uint16_t *a, /* in - pointer to polynomial a */ + uint16_t N, /* in - no. of coefficients in a */ + uint16_t *t, /* in - temp buffer of 2N elements */ + uint16_t *a_inv) /* out - address for polynomial a^-1 */ +{ + uint8_t *b = (uint8_t *) t; /* b cannot be in a_inv since it must be + rotated and copied there as a^-1 mod 2 */ + uint8_t *c = b + N; /* c cannot be in a_inv since it exchanges + with b, and b cannot be in a_inv */ + uint8_t *f = c + N; + uint8_t *g = (uint8_t *) a_inv; /* g needs N + 1 bytes */ + uint16_t deg_b; + uint16_t deg_c; + uint16_t deg_f; + uint16_t deg_g; + uint16_t k = 0; + uint16_t i, j; + + if (a == NULL || t == NULL || a_inv == NULL) { + return FALSE; + } + + /* form a^-1 in (Z/2Z)[X]/(X^N - 1) */ + + memset(b, 0, (N << 1)); /* clear to init b, c */ + + /* b(X) = 1 */ + + b[0] = 1; + deg_b = 0; + + /* c(X) = 0 (cleared above) */ + + deg_c = 0; + + /* f(X) = a(X) mod 2 */ + + deg_f = 0; + j = 0; + for (i = 0; i < N; i++) { + f[i] = (uint8_t)(a[i] & 1); + j ^= f[i]; + if (f[i]) + deg_f = i; + } + + /* Parity is zero, not invertible */ + if (j == 0) { + return FALSE; + } + + /* g(X) = X^N - 1 */ + + g[0] = 1; + memset(g + 1, 0, N - 1); + g[N] = 1; + deg_g = N; + + /* until f(X) = 1 */ + + while (1) { + /* while f[0] = 0, f(X) /= X, c(X) *= X, k++ */ + + for (i = 0; (i <= deg_f) && (f[i] == 0); ++i) + ; + if (i > deg_f) + return FALSE; + if (i) { + k = k + i; + + f = f + i; + deg_f = deg_f - i; + + memmove(c + i, c, deg_c + 1); + memset(c, 0, i); + deg_c = deg_c + i; + } + + /* if f(X) = 1, done */ + + if (deg_f == 0) { + break; + } + + /* if deg_f < deg_g, f <-> g, b <-> c */ + + if (deg_f < deg_g) { + uint8_t *x; + + x = f; + f = g; + g = x; + deg_f ^= deg_g; + deg_g ^= deg_f; + deg_f ^= deg_g; + x = b; + b = c; + c = x; + deg_b ^= deg_c; + deg_c ^= deg_b; + deg_b ^= deg_c; + } + + /* f(X) += g(X) + * might change degree of f if deg_g >= deg_f + */ + for (i = 0; i <= deg_g; i++) { + f[i] ^= g[i]; + } + + if (deg_g == deg_f) { + while (deg_f > 0 && f[deg_f] == 0) { + --deg_f; + } + } + + /* b(X) += c(X) */ + for (i = 0; i <= deg_c; i++) { + b[i] ^= c[i]; + } + + if (deg_c >= deg_b) { + deg_b = deg_c; + while (deg_b > 0 && b[deg_b] == 0) { + --deg_b; + } + } + } + + /* a^-1 in (Z/2Z)[X]/(X^N - 1) = b(X) shifted left k coefficients */ + + j = 0; + + if (k >= N) { + k = k - N; + } + + for (i = k; i < N; i++) { + a_inv[j++] = (uint16_t)(b[i]); + } + + for (i = 0; i < k; i++) { + a_inv[j++] = (uint16_t)(b[i]); + } + + return TRUE; +} + +/* ntru_ring_lift_inv_pow2_product + * + * Lifts an element of (Z/2)[x]/(x^N - 1) to (Z/q)[x]/(x^N - 1) + * where q is a power of 2 such that 256 < q <= 65536. + * + * inv must be padded with zeros to the degree used by + * ntru_ring_mult_coefficients. + * + * inv is assumed to be the inverse mod 2 of the product form element + * given by (1 + 3*(F1*F2 + F3)). The lift is performed in place -- + * inv will be overwritten with the result. + * + * Requires scratch space for ntru_ring_mult_coefficients + one extra + * polynomial with the same padding. + */ +uint32_t +ntru_ring_lift_inv_pow2_product( + uint16_t *inv, + uint16_t const dF1, + uint16_t const dF2, + uint16_t const dF3, + uint16_t const *F_buf, + uint16_t const N, + uint16_t const q, + uint16_t *t) { + uint16_t i; + uint16_t j; + uint16_t mod_q_mask = q - 1; + uint16_t padN; + ntru_ring_mult_coefficients_memreq(N, NULL, &padN); + + for (j = 0; j < 4; ++j) /* assumes 256 < q <= 65536 */ + { + /* f^-1 = f^-1 * (2 - f * f^-1) mod q */ + ntru_ring_mult_product_indices(inv, (uint16_t) dF1, + (uint16_t) dF2, (uint16_t) dF3, + F_buf, N, q, + t, t); + for (i = 0; i < N; ++i) { + t[i] = -((inv[i] + 3 * t[i]) & mod_q_mask); + } + t[0] = t[0] + 2; + /* mult_indices works with degree N, mult_coefficients with padN */ + memset(t + N, 0, (padN - N) * sizeof(uint16_t)); + + ntru_ring_mult_coefficients(inv, t, N, q, t + padN, inv); + } + + NTRU_RET(NTRU_OK); +} + +/* ntru_ring_lift_inv_pow2_standard + * + * Lifts an element of (Z/2)[x]/(x^N - 1) to (Z/q)[x]/(x^N - 1) + * where q is a power of 2 such that 256 < q <= 65536. + * + * inv must be padded with zeros to the degree used by + * ntru_ring_mult_coefficients. + * + * inv is assumed to be the inverse mod 2 of the trinary element f. + * The lift is performed in place -- inv will be overwritten with the result. + * + * Requires scratch space for ntru_ring_mult_coefficients + one extra + * polynomial with the same padding. + */ +uint32_t +ntru_ring_lift_inv_pow2_standard( + uint16_t *inv, + uint16_t const *f, + uint16_t const N, + uint16_t const q, + uint16_t *t) { + uint16_t i; + uint16_t j; + uint16_t padN; + ntru_ring_mult_coefficients_memreq(N, NULL, &padN); + + for (j = 0; j < 4; ++j) /* assumes 256 < q <= 65536 */ + { + /* f^-1 = f^-1 * (2 - f * f^-1) mod q */ + ntru_ring_mult_coefficients(f, inv, N, q, t, t); + for (i = 0; i < N; ++i) { + t[i] = -t[i]; + } + t[0] = t[0] + 2; + + ntru_ring_mult_coefficients(inv, t, N, q, t + padN, inv); + } + + NTRU_RET(NTRU_OK); +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_poly.h b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_poly.h new file mode 100644 index 0000000000000000000000000000000000000000..6aa48c9ac32411ed6f101eca2666a3ee3231c94e --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_poly.h @@ -0,0 +1,280 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_ntru_poly.h + * + * Contents: Public header file for generating and operating on polynomials + * in the NTRU algorithm. + * + *****************************************************************************/ + +#ifndef NTRU_CRYPTO_NTRU_POLY_H +#define NTRU_CRYPTO_NTRU_POLY_H + +#include "ntru_crypto.h" +#include "ntru_crypto_hash_basics.h" + +/* function declarations */ + +/* ntru_gen_poly + * + * Generates polynomials by creating for each polynomial, a list of the + * indices of the +1 coefficients followed by a list of the indices of + * the -1 coefficients. + * + * If a single polynomial is generated (non-product form), indices_counts + * contains a single value of the total number of indices (for +1 and -1 + * comefficients combined). + * + * If multiple polynomials are generated (for product form), their lists of + * indices are sequentially stored in the indices buffer. Each byte of + * indices_counts contains the total number of indices (for +1 and -1 + * coefficients combined) for a single polynomial, beginning with the + * low-order byte for the first polynomial. The high-order byte is unused. + * + * Returns NTRU_OK if successful. + * Returns HASH_BAD_ALG if the algorithm is not supported. + * + */ + +extern uint32_t +ntru_gen_poly( + NTRU_CRYPTO_HASH_ALGID hash_algid, /* in - hash algorithm ID for + IGF-2 */ + uint8_t md_len, /* in - no. of octets in digest */ + uint8_t min_calls, /* in - minimum no. of hash + calls */ + uint16_t seed_len, /* in - no. of octets in seed */ + uint8_t *seed, /* in - pointer to seed */ + uint8_t *buf, /* in - pointer to working + buffer */ + uint16_t N, /* in - max index + 1 */ + uint8_t c_bits, /* in - no. bits for candidate */ + uint16_t limit, /* in - conversion to index + limit */ + bool is_product_form, /* in - if generating multiple + polys */ + uint32_t indices_counts, /* in - nos. of indices needed */ + uint16_t *indices); /* out - address for indices */ + +/* ntru_poly_check_min_weight + * + * Checks that the number of 0, +1, and -1 trinary ring elements meet or exceed + * a minimum weight. + */ + +extern bool +ntru_poly_check_min_weight( + uint16_t num_els, /* in - degree of polynomial */ + uint8_t *ringels, /* in - pointer to trinary ring elements */ + uint16_t min_wt); /* in - minimum weight */ + +/* ntru_ring_mult_indices + * + * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b" + * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1). + * This is a convolution operation. + * + * Ring element "b" is a sparse trinary polynomial with coefficients -1, 0, + * and 1. It is specified by a list, bi, of its nonzero indices containing + * indices for the bi_P1_len +1 coefficients followed by the indices for the + * bi_M1_len -1 coefficients. + * The indices are in the range [0,N). + * + * The result array "c" may share the same memory space as input array "a", + * or input array "b". + * + * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum + * beyond 16 bits does not matter. + */ + +/* wrapper */ +extern void +ntru_ring_mult_indices( + uint16_t const *a, /* in - pointer to ring element a */ + uint16_t const bi_P1_len, /* in - no. of +1 coefficients in b */ + uint16_t const bi_M1_len, /* in - no. of -1 coefficients in b */ + uint16_t const *bi, /* in - pointer to the list of nonzero + indices of ring element b, + containing indices for the +1 + coefficients followed by the + indices for -1 coefficients */ + uint16_t const N, /* in - no. of coefficients in a, b, c */ + uint16_t const q, /* in - large modulus */ + uint16_t *t, /* in - temp buffer. Size is impl dependent. + see ntru_ring_mult_indices_memreq */ + uint16_t *c); /* out - address for polynomial c */ + +/* ntru_ring_mult_product_indices + * + * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b" + * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1). + * This is a convolution operation. + * + * Ring element "b" is represented by the product form b1 * b2 + b3, where + * b1, b2, and b3 are each a sparse trinary polynomial with coefficients -1, + * 0, and 1. It is specified by a list, bi, of the nonzero indices of b1, b2, + * and b3, containing the indices for the +1 coefficients followed by the + * indices for the -1 coefficients for each polynomial in that order. + * The indices are in the range [0,N). + * + * The result array "c" may share the same memory space as input array "a", + * or input array "b". + * + * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum + * beyond 16 bits does not matter. + */ + +extern void +ntru_ring_mult_product_indices( + uint16_t const *a, /* in - pointer to ring element a */ + uint16_t const b1i_len, /* in - no. of +1 or -1 coefficients in b1 */ + uint16_t const b2i_len, /* in - no. of +1 or -1 coefficients in b2 */ + uint16_t const b3i_len, /* in - no. of +1 or -1 coefficients in b3 */ + uint16_t const *bi, /* in - pointer to the list of nonzero + indices of polynomials b1, b2, b3, + containing indices for the +1 + coefficients followed by the + indices for -1 coefficients for + each polynomial */ + uint16_t const N, /* in - no. of coefficients in a, b, c */ + uint16_t const q, /* in - large modulus */ + uint16_t *t, /* in - temp buffer. Size is impl dependent. + see ntru_ring_mult_indices_memreq */ + uint16_t *c); /* out - address for polynomial c */ + +/* ntru_ring_mult_coefficients + * + * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b" + * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1). + * This is a convolution operation. + * + * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum + * beyond 16 bits does not matter. + */ + +extern void +ntru_ring_mult_coefficients( + uint16_t const *a, /* in - pointer to polynomial a */ + uint16_t const *b, /* in - pointer to polynomial b */ + uint16_t N, /* in - degree of (x^N - 1) */ + uint16_t q, /* in - large modulus */ + uint16_t *tmp, /* in - temp buffer. Size is impl dependent. + see ntru_ring_mult_coefficients_memreq */ + uint16_t *c); /* out - address for polynomial c */ + +/* ntru_ring_inv + * + * Finds the inverse of a polynomial, a, in (Z/2^rZ)[X]/(X^N - 1). + * + * This assumes q is 2^r where 8 < r < 16, so that operations mod q can + * wait until the end, and only 16-bit arrays need to be used. + */ + +extern bool +ntru_ring_inv( + uint16_t *a, /* in - pointer to polynomial a */ + uint16_t N, /* in - no. of coefficients in a */ + uint16_t *t, /* in - temp buffer of 2N elements */ + uint16_t *a_inv); /* out - address for polynomial a^-1 */ + +/* ntru_ring_lift_inv_pow2_standard + * + * Lifts an element of (Z/2)[x]/(x^N - 1) to (Z/q)[x]/(x^N - 1) + * where q is a power of 2 such that 256 < q <= 65536. + * + * inv must be padded with zeros to the degree used by + * ntru_ring_mult_coefficients. + * + * inv is assumed to be the inverse mod 2 of the trinary element f. + * The lift is performed in place -- inv will be overwritten with the result. + * + * Requires scratch space for ntru_ring_mult_coefficients + one extra + * polynomial with the same padding. + */ +uint32_t +ntru_ring_lift_inv_pow2_standard( + uint16_t *inv, + uint16_t const *f, + uint16_t const N, + uint16_t const q, + uint16_t *t); + +/* ntru_ring_lift_inv_pow2_product + * + * Lifts an element of (Z/2)[x]/(x^N - 1) to (Z/q)[x]/(x^N - 1) + * where q is a power of 2 such that 256 < q <= 65536. + * + * inv must be padded with zeros to the degree used by + * ntru_ring_mult_coefficients. + * + * inv is assumed to be the inverse mod 2 of the product form element + * given by (1 + 3*(F1*F2 + F3)). The lift is performed in place -- + * inv will be overwritten with the result. + * + * Requires scratch space for ntru_ring_mult_coefficients + one extra + * polynomial with the same padding. + */ +uint32_t +ntru_ring_lift_inv_pow2_product( + uint16_t *inv, + uint16_t const dF1, + uint16_t const dF2, + uint16_t const dF3, + uint16_t const *F_buf, + uint16_t const N, + uint16_t const q, + uint16_t *t); + +/* ntru_ring_mult_coefficients_memreq + * + * Different implementations of ntru_ring_mult_coefficients may + * have different memory requirements. + * + * This gets the memory requirements of ntru_ring_mult_coefficients as + * a number of scratch polynomials and the number of coefficients needed + * per polynomial. + */ +void ntru_ring_mult_coefficients_memreq( + uint16_t N, + uint16_t *num_scratch_polys, + uint16_t *pad_deg); + +/* ntru_ring_mult_indices_memreq + * + * Different implementations of ntru_ring_mult_indices may + * have different memory requirements. + * + * This gets the memory requirements of ntru_ring_mult_indices as + * a number of scratch polynomials (num_scratch_polys) and the number + * of coefficients needed per polynomial (pad_deg). + * + * Note that ntru_ring_mult_prod_indices requires one additional polynomial + * of degree pad_deg for holding a temporary result. + */ +void ntru_ring_mult_indices_memreq( + uint16_t N, + uint16_t *num_scratch_polys, + uint16_t *pad_deg); + +#endif /* NTRU_CRYPTO_NTRU_POLY_H */ diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_platform.h b/crypt/liboqs/kex_ntru/ntru_crypto_platform.h new file mode 100644 index 0000000000000000000000000000000000000000..b18ff6a27894044836a968fda07a33beaf54929a --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_platform.h @@ -0,0 +1,92 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_platform.h + * + * Contents: Platform-specific basic definitions. + * + *****************************************************************************/ + +#ifndef NTRU_CRYPTO_PLATFORM_H +#define NTRU_CRYPTO_PLATFORM_H + +/* The default implementation is to use stdint.h, a part of the C99 standard. + * Systems that don't support this are handled on a case-by-case basis. + */ + +#if defined(WIN32) && (_MSC_VER < 1600) + +#include <basetsd.h> +typedef unsigned char uint8_t; +typedef signed char int8_t; +typedef unsigned short int uint16_t; +typedef short int int16_t; +typedef UINT32 uint32_t; +typedef UINT64 uint64_t; + +#elif defined(linux) && defined(__KERNEL__) + +#include <linux/types.h> + +#else + +#include <stdint.h> + +#endif + +/* For linux kernel drivers: + * Use kmalloc and kfree in place of malloc / free + * Use BUG_ON in place of assert */ +#if defined(linux) && defined(__KERNEL__) + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/string.h> +#define MALLOC(size) (kmalloc(size, GFP_KERNEL)) +#define FREE(x) (kfree(x)) + +#else + +#include <stdlib.h> +#include <assert.h> +#include <string.h> +#define MALLOC(size) (malloc(size)) +#define FREE(x) (free(x)) + +#endif + +#if !defined(HAVE_BOOL) && !defined(__cplusplus) +#define HAVE_BOOL +typedef uint8_t bool; +#endif /* HAVE_BOOL */ + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +#endif /* NTRU_CRYPTO_PLATFORM_H */ diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_sha.h b/crypt/liboqs/kex_ntru/ntru_crypto_sha.h new file mode 100644 index 0000000000000000000000000000000000000000..4c0bb7bef4832cf165d08df800fda61dcd88c3e2 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_sha.h @@ -0,0 +1,56 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_sha.h + * + * Contents: Definitions and declarations common to all SHA hash algorithms. + * + *****************************************************************************/ + +#ifndef NTRU_CRYPTO_SHA_H +#define NTRU_CRYPTO_SHA_H + +#include "ntru_crypto_error.h" +#include "ntru_crypto_hash_basics.h" + +/*************** + * error codes * + ***************/ + +#define SHA_OK ((uint32_t) NTRU_CRYPTO_HASH_OK) +#define SHA_FAIL ((uint32_t) NTRU_CRYPTO_HASH_FAIL) +#define SHA_BAD_PARAMETER ((uint32_t) NTRU_CRYPTO_HASH_BAD_PARAMETER) +#define SHA_OVERFLOW ((uint32_t) NTRU_CRYPTO_HASH_OVERFLOW) + +#define SHA_RESULT(r) ((uint32_t)((r) ? SHA_ERROR_BASE + (r) : (r))) +#define SHA_RET(r) return SHA_RESULT(r); + +/********* + * flags * + *********/ + +#define SHA_DATA_ONLY HASH_DATA_ONLY +#define SHA_INIT HASH_INIT +#define SHA_FINISH HASH_FINISH + +#endif /* NTRU_CRYPTO_SHA_H */ diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_sha1.c b/crypt/liboqs/kex_ntru/ntru_crypto_sha1.c new file mode 100644 index 0000000000000000000000000000000000000000..d17da12c30a02e1a40e7884d7e1d68d2ef1a8a95 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_sha1.c @@ -0,0 +1,679 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_sha1.c + * + * Contents: Routines implementing the SHA-1 hash calculation. + * + *****************************************************************************/ + +#include "ntru_crypto.h" +#include "ntru_crypto_sha1.h" +#include "ntru_crypto_msbyte_uint32.h" + +/* chaining state elements */ + +#define H0 state[0] +#define H1 state[1] +#define H2 state[2] +#define H3 state[3] +#define H4 state[4] + +/* standard SHA-1 initialization values */ + +#define H0_INIT 0x67452301UL +#define H1_INIT 0xefcdab89UL +#define H2_INIT 0x98badcfeUL +#define H3_INIT 0x10325476UL +#define H4_INIT 0xc3d2e1f0UL + +/* sha1_blk() + * + * This routine updates the current hash output (chaining state) + * by performing SHA-1 on a 512-bit block of data represented as sixteen + * 32-bit words. + */ + +#define K00_19 0x5a827999UL +#define K20_39 0x6ed9eba1UL +#define K40_59 0x8f1bbcdcUL +#define K60_79 0xca62c1d6UL + +#define RL(a, n) (((a) << (n)) | ((a) >> (32 - (n)))) + +static void +sha1_blk( + uint32_t const *data, /* in - ptr to 16 32-bit word input block */ + uint32_t *state) /* in/out - ptr to 5 32-bit word chaining state */ +{ + uint32_t A, B, C, D, E; + uint32_t w[16]; + + /* init A - E */ + + A = H0; + B = H1; + C = H2; + D = H3; + E = H4; + + /* rounds 0 - 15 */ + + E += RL(A, 5) + K00_19 + ((B & (C ^ D)) ^ D) + data[0]; + B = RL(B, 30); + D += RL(E, 5) + K00_19 + ((A & (B ^ C)) ^ C) + data[1]; + A = RL(A, 30); + C += RL(D, 5) + K00_19 + ((E & (A ^ B)) ^ B) + data[2]; + E = RL(E, 30); + B += RL(C, 5) + K00_19 + ((D & (E ^ A)) ^ A) + data[3]; + D = RL(D, 30); + A += RL(B, 5) + K00_19 + ((C & (D ^ E)) ^ E) + data[4]; + C = RL(C, 30); + E += RL(A, 5) + K00_19 + ((B & (C ^ D)) ^ D) + data[5]; + B = RL(B, 30); + D += RL(E, 5) + K00_19 + ((A & (B ^ C)) ^ C) + data[6]; + A = RL(A, 30); + C += RL(D, 5) + K00_19 + ((E & (A ^ B)) ^ B) + data[7]; + E = RL(E, 30); + B += RL(C, 5) + K00_19 + ((D & (E ^ A)) ^ A) + data[8]; + D = RL(D, 30); + A += RL(B, 5) + K00_19 + ((C & (D ^ E)) ^ E) + data[9]; + C = RL(C, 30); + E += RL(A, 5) + K00_19 + ((B & (C ^ D)) ^ D) + data[10]; + B = RL(B, 30); + D += RL(E, 5) + K00_19 + ((A & (B ^ C)) ^ C) + data[11]; + A = RL(A, 30); + C += RL(D, 5) + K00_19 + ((E & (A ^ B)) ^ B) + data[12]; + E = RL(E, 30); + B += RL(C, 5) + K00_19 + ((D & (E ^ A)) ^ A) + data[13]; + D = RL(D, 30); + A += RL(B, 5) + K00_19 + ((C & (D ^ E)) ^ E) + data[14]; + C = RL(C, 30); + E += RL(A, 5) + K00_19 + ((B & (C ^ D)) ^ D) + data[15]; + B = RL(B, 30); + + /* rounds 16 - 19 */ + + w[0] = data[0] ^ data[2] ^ data[8] ^ data[13]; + w[0] = RL(w[0], 1); + D += RL(E, 5) + K00_19 + ((A & (B ^ C)) ^ C) + w[0]; + A = RL(A, 30); + w[1] = data[1] ^ data[3] ^ data[9] ^ data[14]; + w[1] = RL(w[1], 1); + C += RL(D, 5) + K00_19 + ((E & (A ^ B)) ^ B) + w[1]; + E = RL(E, 30); + w[2] = data[2] ^ data[4] ^ data[10] ^ data[15]; + w[2] = RL(w[2], 1); + B += RL(C, 5) + K00_19 + ((D & (E ^ A)) ^ A) + w[2]; + D = RL(D, 30); + w[3] = data[3] ^ data[5] ^ data[11] ^ w[0]; + w[3] = RL(w[3], 1); + A += RL(B, 5) + K00_19 + ((C & (D ^ E)) ^ E) + w[3]; + C = RL(C, 30); + + /* rounds 20 - 39 */ + + w[4] = data[4] ^ data[6] ^ data[12] ^ w[1]; + w[4] = RL(w[4], 1); + E += RL(A, 5) + K20_39 + (B ^ C ^ D) + w[4]; + B = RL(B, 30); + w[5] = data[5] ^ data[7] ^ data[13] ^ w[2]; + w[5] = RL(w[5], 1); + D += RL(E, 5) + K20_39 + (A ^ B ^ C) + w[5]; + A = RL(A, 30); + w[6] = data[6] ^ data[8] ^ data[14] ^ w[3]; + w[6] = RL(w[6], 1); + C += RL(D, 5) + K20_39 + (E ^ A ^ B) + w[6]; + E = RL(E, 30); + w[7] = data[7] ^ data[9] ^ data[15] ^ w[4]; + w[7] = RL(w[7], 1); + B += RL(C, 5) + K20_39 + (D ^ E ^ A) + w[7]; + D = RL(D, 30); + w[8] = data[8] ^ data[10] ^ w[0] ^ w[5]; + w[8] = RL(w[8], 1); + A += RL(B, 5) + K20_39 + (C ^ D ^ E) + w[8]; + C = RL(C, 30); + w[9] = data[9] ^ data[11] ^ w[1] ^ w[6]; + w[9] = RL(w[9], 1); + E += RL(A, 5) + K20_39 + (B ^ C ^ D) + w[9]; + B = RL(B, 30); + w[10] = data[10] ^ data[12] ^ w[2] ^ w[7]; + w[10] = RL(w[10], 1); + D += RL(E, 5) + K20_39 + (A ^ B ^ C) + w[10]; + A = RL(A, 30); + w[11] = data[11] ^ data[13] ^ w[3] ^ w[8]; + w[11] = RL(w[11], 1); + C += RL(D, 5) + K20_39 + (E ^ A ^ B) + w[11]; + E = RL(E, 30); + w[12] = data[12] ^ data[14] ^ w[4] ^ w[9]; + w[12] = RL(w[12], 1); + B += RL(C, 5) + K20_39 + (D ^ E ^ A) + w[12]; + D = RL(D, 30); + w[13] = data[13] ^ data[15] ^ w[5] ^ w[10]; + w[13] = RL(w[13], 1); + A += RL(B, 5) + K20_39 + (C ^ D ^ E) + w[13]; + C = RL(C, 30); + w[14] = data[14] ^ w[0] ^ w[6] ^ w[11]; + w[14] = RL(w[14], 1); + E += RL(A, 5) + K20_39 + (B ^ C ^ D) + w[14]; + B = RL(B, 30); + w[15] = data[15] ^ w[1] ^ w[7] ^ w[12]; + w[15] = RL(w[15], 1); + D += RL(E, 5) + K20_39 + (A ^ B ^ C) + w[15]; + A = RL(A, 30); + w[0] = w[0] ^ w[2] ^ w[8] ^ w[13]; + w[0] = RL(w[0], 1); + C += RL(D, 5) + K20_39 + (E ^ A ^ B) + w[0]; + E = RL(E, 30); + w[1] = w[1] ^ w[3] ^ w[9] ^ w[14]; + w[1] = RL(w[1], 1); + B += RL(C, 5) + K20_39 + (D ^ E ^ A) + w[1]; + D = RL(D, 30); + w[2] = w[2] ^ w[4] ^ w[10] ^ w[15]; + w[2] = RL(w[2], 1); + A += RL(B, 5) + K20_39 + (C ^ D ^ E) + w[2]; + C = RL(C, 30); + w[3] = w[3] ^ w[5] ^ w[11] ^ w[0]; + w[3] = RL(w[3], 1); + E += RL(A, 5) + K20_39 + (B ^ C ^ D) + w[3]; + B = RL(B, 30); + w[4] = w[4] ^ w[6] ^ w[12] ^ w[1]; + w[4] = RL(w[4], 1); + D += RL(E, 5) + K20_39 + (A ^ B ^ C) + w[4]; + A = RL(A, 30); + w[5] = w[5] ^ w[7] ^ w[13] ^ w[2]; + w[5] = RL(w[5], 1); + C += RL(D, 5) + K20_39 + (E ^ A ^ B) + w[5]; + E = RL(E, 30); + w[6] = w[6] ^ w[8] ^ w[14] ^ w[3]; + w[6] = RL(w[6], 1); + B += RL(C, 5) + K20_39 + (D ^ E ^ A) + w[6]; + D = RL(D, 30); + w[7] = w[7] ^ w[9] ^ w[15] ^ w[4]; + w[7] = RL(w[7], 1); + A += RL(B, 5) + K20_39 + (C ^ D ^ E) + w[7]; + C = RL(C, 30); + + /* rounds 40 - 59 */ + + w[8] = w[8] ^ w[10] ^ w[0] ^ w[5]; + w[8] = RL(w[8], 1); + E += RL(A, 5) + K40_59 + ((B & C) | (D & (B | C))) + w[8]; + B = RL(B, 30); + w[9] = w[9] ^ w[11] ^ w[1] ^ w[6]; + w[9] = RL(w[9], 1); + D += RL(E, 5) + K40_59 + ((A & B) | (C & (A | B))) + w[9]; + A = RL(A, 30); + w[10] = w[10] ^ w[12] ^ w[2] ^ w[7]; + w[10] = RL(w[10], 1); + C += RL(D, 5) + K40_59 + ((E & A) | (B & (E | A))) + w[10]; + E = RL(E, 30); + w[11] = w[11] ^ w[13] ^ w[3] ^ w[8]; + w[11] = RL(w[11], 1); + B += RL(C, 5) + K40_59 + ((D & E) | (A & (D | E))) + w[11]; + D = RL(D, 30); + w[12] = w[12] ^ w[14] ^ w[4] ^ w[9]; + w[12] = RL(w[12], 1); + A += RL(B, 5) + K40_59 + ((C & D) | (E & (C | D))) + w[12]; + C = RL(C, 30); + w[13] = w[13] ^ w[15] ^ w[5] ^ w[10]; + w[13] = RL(w[13], 1); + E += RL(A, 5) + K40_59 + ((B & C) | (D & (B | C))) + w[13]; + B = RL(B, 30); + w[14] = w[14] ^ w[0] ^ w[6] ^ w[11]; + w[14] = RL(w[14], 1); + D += RL(E, 5) + K40_59 + ((A & B) | (C & (A | B))) + w[14]; + A = RL(A, 30); + w[15] = w[15] ^ w[1] ^ w[7] ^ w[12]; + w[15] = RL(w[15], 1); + C += RL(D, 5) + K40_59 + ((E & A) | (B & (E | A))) + w[15]; + E = RL(E, 30); + w[0] = w[0] ^ w[2] ^ w[8] ^ w[13]; + w[0] = RL(w[0], 1); + B += RL(C, 5) + K40_59 + ((D & E) | (A & (D | E))) + w[0]; + D = RL(D, 30); + w[1] = w[1] ^ w[3] ^ w[9] ^ w[14]; + w[1] = RL(w[1], 1); + A += RL(B, 5) + K40_59 + ((C & D) | (E & (C | D))) + w[1]; + C = RL(C, 30); + w[2] = w[2] ^ w[4] ^ w[10] ^ w[15]; + w[2] = RL(w[2], 1); + E += RL(A, 5) + K40_59 + ((B & C) | (D & (B | C))) + w[2]; + B = RL(B, 30); + w[3] = w[3] ^ w[5] ^ w[11] ^ w[0]; + w[3] = RL(w[3], 1); + D += RL(E, 5) + K40_59 + ((A & B) | (C & (A | B))) + w[3]; + A = RL(A, 30); + w[4] = w[4] ^ w[6] ^ w[12] ^ w[1]; + w[4] = RL(w[4], 1); + C += RL(D, 5) + K40_59 + ((E & A) | (B & (E | A))) + w[4]; + E = RL(E, 30); + w[5] = w[5] ^ w[7] ^ w[13] ^ w[2]; + w[5] = RL(w[5], 1); + B += RL(C, 5) + K40_59 + ((D & E) | (A & (D | E))) + w[5]; + D = RL(D, 30); + w[6] = w[6] ^ w[8] ^ w[14] ^ w[3]; + w[6] = RL(w[6], 1); + A += RL(B, 5) + K40_59 + ((C & D) | (E & (C | D))) + w[6]; + C = RL(C, 30); + w[7] = w[7] ^ w[9] ^ w[15] ^ w[4]; + w[7] = RL(w[7], 1); + E += RL(A, 5) + K40_59 + ((B & C) | (D & (B | C))) + w[7]; + B = RL(B, 30); + w[8] = w[8] ^ w[10] ^ w[0] ^ w[5]; + w[8] = RL(w[8], 1); + D += RL(E, 5) + K40_59 + ((A & B) | (C & (A | B))) + w[8]; + A = RL(A, 30); + w[9] = w[9] ^ w[11] ^ w[1] ^ w[6]; + w[9] = RL(w[9], 1); + C += RL(D, 5) + K40_59 + ((E & A) | (B & (E | A))) + w[9]; + E = RL(E, 30); + w[10] = w[10] ^ w[12] ^ w[2] ^ w[7]; + w[10] = RL(w[10], 1); + B += RL(C, 5) + K40_59 + ((D & E) | (A & (D | E))) + w[10]; + D = RL(D, 30); + w[11] = w[11] ^ w[13] ^ w[3] ^ w[8]; + w[11] = RL(w[11], 1); + A += RL(B, 5) + K40_59 + ((C & D) | (E & (C | D))) + w[11]; + C = RL(C, 30); + + /* rounds 60 - 79 */ + + w[12] = w[12] ^ w[14] ^ w[4] ^ w[9]; + w[12] = RL(w[12], 1); + E += RL(A, 5) + K60_79 + (B ^ C ^ D) + w[12]; + B = RL(B, 30); + w[13] = w[13] ^ w[15] ^ w[5] ^ w[10]; + w[13] = RL(w[13], 1); + D += RL(E, 5) + K60_79 + (A ^ B ^ C) + w[13]; + A = RL(A, 30); + w[14] = w[14] ^ w[0] ^ w[6] ^ w[11]; + w[14] = RL(w[14], 1); + C += RL(D, 5) + K60_79 + (E ^ A ^ B) + w[14]; + E = RL(E, 30); + w[15] = w[15] ^ w[1] ^ w[7] ^ w[12]; + w[15] = RL(w[15], 1); + B += RL(C, 5) + K60_79 + (D ^ E ^ A) + w[15]; + D = RL(D, 30); + w[0] = w[0] ^ w[2] ^ w[8] ^ w[13]; + w[0] = RL(w[0], 1); + A += RL(B, 5) + K60_79 + (C ^ D ^ E) + w[0]; + C = RL(C, 30); + w[1] = w[1] ^ w[3] ^ w[9] ^ w[14]; + w[1] = RL(w[1], 1); + E += RL(A, 5) + K60_79 + (B ^ C ^ D) + w[1]; + B = RL(B, 30); + w[2] = w[2] ^ w[4] ^ w[10] ^ w[15]; + w[2] = RL(w[2], 1); + D += RL(E, 5) + K60_79 + (A ^ B ^ C) + w[2]; + A = RL(A, 30); + w[3] = w[3] ^ w[5] ^ w[11] ^ w[0]; + w[3] = RL(w[3], 1); + C += RL(D, 5) + K60_79 + (E ^ A ^ B) + w[3]; + E = RL(E, 30); + w[4] = w[4] ^ w[6] ^ w[12] ^ w[1]; + w[4] = RL(w[4], 1); + B += RL(C, 5) + K60_79 + (D ^ E ^ A) + w[4]; + D = RL(D, 30); + w[5] = w[5] ^ w[7] ^ w[13] ^ w[2]; + w[5] = RL(w[5], 1); + A += RL(B, 5) + K60_79 + (C ^ D ^ E) + w[5]; + C = RL(C, 30); + w[6] = w[6] ^ w[8] ^ w[14] ^ w[3]; + w[6] = RL(w[6], 1); + E += RL(A, 5) + K60_79 + (B ^ C ^ D) + w[6]; + B = RL(B, 30); + w[7] = w[7] ^ w[9] ^ w[15] ^ w[4]; + w[7] = RL(w[7], 1); + D += RL(E, 5) + K60_79 + (A ^ B ^ C) + w[7]; + A = RL(A, 30); + w[8] = w[8] ^ w[10] ^ w[0] ^ w[5]; + w[8] = RL(w[8], 1); + C += RL(D, 5) + K60_79 + (E ^ A ^ B) + w[8]; + E = RL(E, 30); + w[9] = w[9] ^ w[11] ^ w[1] ^ w[6]; + w[9] = RL(w[9], 1); + B += RL(C, 5) + K60_79 + (D ^ E ^ A) + w[9]; + D = RL(D, 30); + w[10] = w[10] ^ w[12] ^ w[2] ^ w[7]; + w[10] = RL(w[10], 1); + A += RL(B, 5) + K60_79 + (C ^ D ^ E) + w[10]; + C = RL(C, 30); + w[11] = w[11] ^ w[13] ^ w[3] ^ w[8]; + w[11] = RL(w[11], 1); + E += RL(A, 5) + K60_79 + (B ^ C ^ D) + w[11]; + B = RL(B, 30); + w[12] = w[12] ^ w[14] ^ w[4] ^ w[9]; + w[12] = RL(w[12], 1); + D += RL(E, 5) + K60_79 + (A ^ B ^ C) + w[12]; + A = RL(A, 30); + w[13] = w[13] ^ w[15] ^ w[5] ^ w[10]; + C += RL(D, 5) + K60_79 + (E ^ A ^ B) + RL(w[13], 1); + E = RL(E, 30); + w[14] = w[14] ^ w[0] ^ w[6] ^ w[11]; + B += RL(C, 5) + K60_79 + (D ^ E ^ A) + RL(w[14], 1); + D = RL(D, 30); + + /* update H0 - H4 */ + + w[15] = w[15] ^ w[1] ^ w[7] ^ w[12]; + H0 += A + RL(B, 5) + K60_79 + (C ^ D ^ E) + RL(w[15], 1); + H1 += B; + H2 += RL(C, 30); + H3 += D; + H4 += E; + + /* clear temp variables */ + + A = B = C = D = E = 0; + memset(w, 0, sizeof(w)); +} + +/* ntru_crypto_sha1() + * + * This routine provides all operations for a SHA-1 hash, and the use + * of SHA-1 for DSA signing and key generation. + * It may be used to initialize, update, or complete a message digest, + * or any combination of those actions, as determined by the SHA_INIT flag, + * the in_len parameter, and the SHA_FINISH flag, respectively. + * + * When in_len == 0 (no data to hash), the parameter, in, may be NULL. + * When the SHA_FINISH flag is not set, the parameter, md, may be NULL. + * + * Initialization may be standard or use a specified initialization vector, + * and is indicated by setting the SHA_INIT flag. + * Setting init = NULL specifies standard initialization. Otherwise, init + * points to the array of five alternate initialization 32-bit words. + * + * The hash operation can be updated with any number of input bytes, including + * zero. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed. + */ + +uint32_t +ntru_crypto_sha1( + NTRU_CRYPTO_SHA1_CTX *c, /* in/out - pointer to SHA-1 context */ + uint32_t const *init, /* in - pointer to alternate */ + /* initialization - may be NULL */ + uint8_t const *in, /* in - pointer to input data - + may be NULL if in_len == 0 */ + uint32_t in_len, /* in - number of input data bytes */ + uint32_t flags, /* in - INIT, FINISH flags */ + uint8_t *md) /* out - address for message digest - + may be NULL if not FINISH */ +{ + uint32_t in_blk[16]; /* input block */ + uint32_t space; + uint8_t *d = NULL; + + /* check error conditions */ + + if (!c || (in_len && !in) || ((flags & SHA_FINISH) && !md)) { + SHA_RET(SHA_BAD_PARAMETER) + } + + /* initialize context if requested */ + + if (flags & SHA_INIT) { + + /* init chaining state */ + + if (!init) { + c->state[0] = H0_INIT; /* standard initialization */ + c->state[1] = H1_INIT; + c->state[2] = H2_INIT; + c->state[3] = H3_INIT; + c->state[4] = H4_INIT; + } else { + /* Non standard initialization values are not supported */ + SHA_RET(SHA_BAD_PARAMETER); + } + + /* init bit count and number of unhashed data bytes */ + + c->num_bits_hashed[0] = 0; + c->num_bits_hashed[1] = 0; + c->unhashed_len = 0; + } + + /* determine space left in unhashed data buffer */ + + if (c->unhashed_len > 63) { + SHA_RET(SHA_FAIL) + } + + space = 64 - c->unhashed_len; + + /* process input if it exists */ + + if (in_len) { + + /* update count of bits hashed */ + + { + uint32_t bits0, bits1; + + bits0 = in_len << 3; + bits1 = in_len >> 29; + + if ((c->num_bits_hashed[0] += bits0) < bits0) { + bits1++; + } + + if ((c->num_bits_hashed[1] += bits1) < bits1) { + memset((uint8_t *) c, 0, sizeof(NTRU_CRYPTO_SHA1_CTX)); + memset((char *) in_blk, 0, sizeof(in_blk)); + SHA_RET(SHA_OVERFLOW) + } + } + + /* process input bytes */ + + if (in_len < space) { + + /* input does not fill block buffer: + * add input to buffer + */ + + memcpy(c->unhashed + c->unhashed_len, in, in_len); + c->unhashed_len += in_len; + + } else { + uint32_t blks; + + /* input will fill block buffer: + * fill unhashed data buffer, + * convert to block buffer, + * and process block + */ + + in_len -= space; + + for (d = c->unhashed + c->unhashed_len; space; space--) { + *d++ = *in++; + } + + ntru_crypto_msbyte_2_uint32(in_blk, (uint8_t const *) c->unhashed, + 16); + sha1_blk((uint32_t const *) in_blk, c->state); + + /* process any remaining full blocks */ + + for (blks = in_len >> 6; blks--; in += 64) { + ntru_crypto_msbyte_2_uint32(in_blk, in, 16); + sha1_blk((uint32_t const *) in_blk, c->state); + } + + /* put any remaining input in the unhashed data buffer */ + + in_len &= 0x3f; + memcpy(c->unhashed, in, in_len); + c->unhashed_len = in_len; + } + } + + /* complete message digest if requested */ + + if (flags & SHA_FINISH) { + space = 64 - c->unhashed_len; + + /* add 0x80 padding byte to the unhashed data buffer + * (there is always space since the buffer can't be full) + */ + + d = c->unhashed + c->unhashed_len; + *d++ = 0x80; + space--; + + /* check for space for bit count */ + + if (space < 8) { + + /* no space for count: + * fill remainder of unhashed data buffer with zeros, + * convert to input block, + * process block, + * fill all but 8 bytes of unhashed data buffer with zeros + */ + + memset(d, 0, space); + ntru_crypto_msbyte_2_uint32(in_blk, + (uint8_t const *) c->unhashed, 16); + sha1_blk((uint32_t const *) in_blk, c->state); + memset(c->unhashed, 0, 56); + + } else { + + /* fill unhashed data buffer with zeros, + * leaving space for bit count + */ + + for (space -= 8; space; space--) { + *d++ = 0; + } + } + + /* convert partially filled unhashed data buffer to input block and + * add bit count to input block + */ + + ntru_crypto_msbyte_2_uint32(in_blk, (uint8_t const *) c->unhashed, + 14); + in_blk[14] = c->num_bits_hashed[1]; + in_blk[15] = c->num_bits_hashed[0]; + + /* process last block */ + + sha1_blk((uint32_t const *) in_blk, c->state); + + /* copy result to message digest buffer */ + + ntru_crypto_uint32_2_msbyte(md, c->state, 5); + + /* clear context and stack variables */ + + memset((uint8_t *) c, 0, sizeof(NTRU_CRYPTO_SHA1_CTX)); + memset((char *) in_blk, 0, sizeof(in_blk)); + } + + SHA_RET(SHA_OK) +} + +/* ntru_crypto_sha1_init + * + * This routine performs standard initialization of the SHA-1 state. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + */ + +uint32_t +ntru_crypto_sha1_init( + NTRU_CRYPTO_SHA1_CTX *c) /* in/out - pointer to SHA-1 context */ +{ + return ntru_crypto_sha1(c, NULL, NULL, 0, SHA_INIT, NULL); +} + +/* ntru_crypto_sha1_update + * + * This routine processes input data and updates the SHA-1 hash calculation. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed. + */ + +uint32_t +ntru_crypto_sha1_update( + NTRU_CRYPTO_SHA1_CTX *c, /* in/out - pointer to SHA-1 context */ + uint8_t const *data, /* in - pointer to input data */ + uint32_t data_len) /* in - number of bytes of input data */ +{ + return ntru_crypto_sha1(c, NULL, data, data_len, SHA_DATA_ONLY, NULL); +} + +/* ntru_crypto_sha1_final + * + * This routine completes the SHA-1 hash calculation and returns the + * message digest. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed. + */ + +uint32_t +ntru_crypto_sha1_final( + NTRU_CRYPTO_SHA1_CTX *c, /* in/out - pointer to SHA-1 context */ + uint8_t *md) /* out - address for message digest */ +{ + return ntru_crypto_sha1(c, NULL, NULL, 0, SHA_FINISH, md); +} + +/* ntru_crypto_sha1_digest + * + * This routine computes a SHA-1 message digest. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed. + */ + +uint32_t +ntru_crypto_sha1_digest( + uint8_t const *data, /* in - pointer to input data */ + uint32_t data_len, /* in - number of bytes of input data */ + uint8_t *md) /* out - address for message digest */ +{ + NTRU_CRYPTO_SHA1_CTX c; + + return ntru_crypto_sha1(&c, NULL, data, data_len, SHA_INIT | SHA_FINISH, md); +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_sha1.h b/crypt/liboqs/kex_ntru/ntru_crypto_sha1.h new file mode 100644 index 0000000000000000000000000000000000000000..4a11de74ed61811fd643f95bd1c1cdaeb94705f7 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_sha1.h @@ -0,0 +1,163 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_sha1.h + * + * Contents: Definitions and declarations for the SHA-1 implementation. + * + *****************************************************************************/ + +#ifndef NTRU_CRYPTO_SHA1_H +#define NTRU_CRYPTO_SHA1_H + +#include "ntru_crypto_platform.h" +#include "ntru_crypto_sha.h" + +/****************************************** + * macros needed for generic hash objects * + ******************************************/ + +#define SHA_1_CTX_LEN sizeof(SHA1_CTX) /* no. bytes in SHA-1 ctx */ +#define SHA_1_BLK_LEN 64 /* 64 bytes in input block */ +#define SHA_1_MD_LEN 20 /* 20 bytes in msg digest */ +#define SHA_1_INIT_FN &ntru_crypto_sha1_init /* init function */ +#define SHA_1_UPDATE_FN &ntru_crypto_sha1_update /* update function */ +#define SHA_1_FINAL_FN &ntru_crypto_sha1_final /* final function */ +#define SHA_1_DIGEST_FN &ntru_crypto_sha1_digest /* digest function */ + +/************************* + * structure definitions * + *************************/ + +/* SHA-1 context structure */ + +typedef struct { + uint32_t state[5]; /* chaining state */ + uint32_t num_bits_hashed[2]; /* number of bits hashed */ + uint8_t unhashed[64]; /* input data not yet hashed */ + uint32_t unhashed_len; /* number of bytes of unhashed input data */ +} NTRU_CRYPTO_SHA1_CTX; + +/************************* + * function declarations * + *************************/ + +/* ntru_crypto_sha1() + * + * This routine provides all operations for a SHA-1 hash, and the use + * of SHA-1 for DSA signing and key generation. + * It may be used to initialize, update, or complete a message digest, + * or any combination of those actions, as determined by the SHA_INIT flag, + * the in_len parameter, and the SHA_FINISH flag, respectively. + * + * When in_len == 0 (no data to hash), the parameter, in, may be NULL. + * When the SHA_FINISH flag is not set, the parameter, md, may be NULL. + * + * Initialization may be standard or use a specified initialization vector, + * and is indicated by setting the SHA_INIT flag. + * Setting init = NULL specifies standard initialization. Otherwise, init + * points to the array of five alternate initialization 32-bit words. + * + * The hash operation can be updated with any number of input bytes, including + * zero. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed. + */ + +extern uint32_t +ntru_crypto_sha1( + NTRU_CRYPTO_SHA1_CTX *c, /* in/out - pointer to SHA-1 context */ + uint32_t const *init, /* in - pointer to alternate */ + /* initialization - may be NULL */ + uint8_t const *in, /* in - pointer to input data - + may be NULL if in_len == 0 */ + uint32_t in_len, /* in - number of input data bytes */ + uint32_t flags, /* in - INIT, FINISH */ + uint8_t *md); /* out - address for message digest - + may be NULL if not FINISH */ + +/* ntru_crypto_sha1_init + * + * This routine performs standard initialization of the SHA-1 state. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + */ + +extern uint32_t +ntru_crypto_sha1_init( + NTRU_CRYPTO_SHA1_CTX *c); /* in/out - pointer to SHA-1 context */ + +/* ntru_crypto_sha1_update + * + * This routine processes input data and updates the SHA-1 hash calculation. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed. + */ + +extern uint32_t +ntru_crypto_sha1_update( + NTRU_CRYPTO_SHA1_CTX *c, /* in/out - pointer to SHA-1 context */ + uint8_t const *data, /* in - pointer to input data */ + uint32_t data_len); /* in - number of bytes of input data */ + +/* ntru_crypto_sha1_final + * + * This routine completes the SHA-1 hash calculation and returns the + * message digest. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed. + */ + +extern uint32_t +ntru_crypto_sha1_final( + NTRU_CRYPTO_SHA1_CTX *c, /* in/out - pointer to SHA-1 context */ + uint8_t *md); /* out - address for message digest */ + +/* ntru_crypto_sha1_digest + * + * This routine computes a SHA-1 message digest. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed. + */ + +uint32_t +ntru_crypto_sha1_digest( + uint8_t const *data, /* in - pointer to input data */ + uint32_t data_len, /* in - number of bytes of input data */ + uint8_t *md); /* out - address for message digest */ + +#endif /* NTRU_CRYPTO_SHA1_H */ diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_sha2.c b/crypt/liboqs/kex_ntru/ntru_crypto_sha2.c new file mode 100644 index 0000000000000000000000000000000000000000..8dc54a311bcedebf91bc17f6c7b0a129ceadfda3 --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_sha2.c @@ -0,0 +1,570 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_sha2.c + * + * Contents: Routines implementing the SHA-256 hash calculation. + * + *****************************************************************************/ + +#include "ntru_crypto.h" +#include "ntru_crypto_sha2.h" +#include "ntru_crypto_msbyte_uint32.h" + +/* chaining state elements */ + +#define H0 state[0] +#define H1 state[1] +#define H2 state[2] +#define H3 state[3] +#define H4 state[4] +#define H5 state[5] +#define H6 state[6] +#define H7 state[7] + +/* standard SHA-256 initialization values */ + +#define H0_SHA256_INIT 0x6a09e667UL +#define H1_SHA256_INIT 0xbb67ae85UL +#define H2_SHA256_INIT 0x3c6ef372UL +#define H3_SHA256_INIT 0xa54ff53aUL +#define H4_SHA256_INIT 0x510e527fUL +#define H5_SHA256_INIT 0x9b05688cUL +#define H6_SHA256_INIT 0x1f83d9abUL +#define H7_SHA256_INIT 0x5be0cd19UL + +/* sha2_blk() + * + * This routine updates the current hash output (chaining state) + * by performing SHA-256 on a 512-bit block of data represented + * as sixteen 32-bit words. + */ + +#define RR(a, n) (((a) >> (n)) | ((a) << (32 - (n)))) +#define S0(a) (RR((a), 2) ^ RR((a), 13) ^ RR((a), 22)) +#define S1(a) (RR((a), 6) ^ RR((a), 11) ^ RR((a), 25)) +#define s0(a) (RR((a), 7) ^ RR((a), 18) ^ ((a) >> 3)) +#define s1(a) (RR((a), 17) ^ RR((a), 19) ^ ((a) >> 10)) + +static void +sha2_blk( + uint32_t const *data, /* in - ptr to 16 32-bit word input block */ + uint32_t *state) /* in/out - ptr to 8 32-bit word chaining state */ +{ + uint32_t A, B, C, D, E, F, G, H; + uint32_t w[16]; + + /* init A - H */ + + A = H0; + B = H1; + C = H2; + D = H3; + E = H4; + F = H5; + G = H6; + H = H7; + + /* rounds 0 - 15 */ + + H += S1(E) + ((E & (F ^ G)) ^ G) + 0x428A2F98UL + data[0]; + D += H; + H += S0(A) + ((A & B) | (C & (A | B))); + G += S1(D) + ((D & (E ^ F)) ^ F) + 0x71374491UL + data[1]; + C += G; + G += S0(H) + ((H & A) | (B & (H | A))); + F += S1(C) + ((C & (D ^ E)) ^ E) + 0xB5C0FBCFUL + data[2]; + B += F; + F += S0(G) + ((G & H) | (A & (G | H))); + E += S1(B) + ((B & (C ^ D)) ^ D) + 0xE9B5DBA5UL + data[3]; + A += E; + E += S0(F) + ((F & G) | (H & (F | G))); + D += S1(A) + ((A & (B ^ C)) ^ C) + 0x3956C25BUL + data[4]; + H += D; + D += S0(E) + ((E & F) | (G & (E | F))); + C += S1(H) + ((H & (A ^ B)) ^ B) + 0x59F111F1UL + data[5]; + G += C; + C += S0(D) + ((D & E) | (F & (D | E))); + B += S1(G) + ((G & (H ^ A)) ^ A) + 0x923F82A4UL + data[6]; + F += B; + B += S0(C) + ((C & D) | (E & (C | D))); + A += S1(F) + ((F & (G ^ H)) ^ H) + 0xAB1C5ED5UL + data[7]; + E += A; + A += S0(B) + ((B & C) | (D & (B | C))); + H += S1(E) + ((E & (F ^ G)) ^ G) + 0xD807AA98UL + data[8]; + D += H; + H += S0(A) + ((A & B) | (C & (A | B))); + G += S1(D) + ((D & (E ^ F)) ^ F) + 0x12835B01UL + data[9]; + C += G; + G += S0(H) + ((H & A) | (B & (H | A))); + F += S1(C) + ((C & (D ^ E)) ^ E) + 0x243185BEUL + data[10]; + B += F; + F += S0(G) + ((G & H) | (A & (G | H))); + E += S1(B) + ((B & (C ^ D)) ^ D) + 0x550C7DC3UL + data[11]; + A += E; + E += S0(F) + ((F & G) | (H & (F | G))); + D += S1(A) + ((A & (B ^ C)) ^ C) + 0x72BE5D74UL + data[12]; + H += D; + D += S0(E) + ((E & F) | (G & (E | F))); + C += S1(H) + ((H & (A ^ B)) ^ B) + 0x80DEB1FEUL + data[13]; + G += C; + C += S0(D) + ((D & E) | (F & (D | E))); + B += S1(G) + ((G & (H ^ A)) ^ A) + 0x9BDC06A7UL + data[14]; + F += B; + B += S0(C) + ((C & D) | (E & (C | D))); + A += S1(F) + ((F & (G ^ H)) ^ H) + 0xC19BF174UL + data[15]; + E += A; + A += S0(B) + ((B & C) | (D & (B | C))); + + /* rounds 16 - 63 */ + + w[0] = data[0] + s0(data[1]) + data[9] + s1(data[14]); + H += S1(E) + ((E & (F ^ G)) ^ G) + 0xE49B69C1UL + w[0]; + D += H; + H += S0(A) + ((A & B) | (C & (A | B))); + w[1] = data[1] + s0(data[2]) + data[10] + s1(data[15]); + G += S1(D) + ((D & (E ^ F)) ^ F) + 0xEFBE4786UL + w[1]; + C += G; + G += S0(H) + ((H & A) | (B & (H | A))); + w[2] = data[2] + s0(data[3]) + data[11] + s1(w[0]); + F += S1(C) + ((C & (D ^ E)) ^ E) + 0x0FC19DC6UL + w[2]; + B += F; + F += S0(G) + ((G & H) | (A & (G | H))); + w[3] = data[3] + s0(data[4]) + data[12] + s1(w[1]); + E += S1(B) + ((B & (C ^ D)) ^ D) + 0x240CA1CCUL + w[3]; + A += E; + E += S0(F) + ((F & G) | (H & (F | G))); + w[4] = data[4] + s0(data[5]) + data[13] + s1(w[2]); + D += S1(A) + ((A & (B ^ C)) ^ C) + 0x2DE92C6FUL + w[4]; + H += D; + D += S0(E) + ((E & F) | (G & (E | F))); + w[5] = data[5] + s0(data[6]) + data[14] + s1(w[3]); + C += S1(H) + ((H & (A ^ B)) ^ B) + 0x4A7484AAUL + w[5]; + G += C; + C += S0(D) + ((D & E) | (F & (D | E))); + w[6] = data[6] + s0(data[7]) + data[15] + s1(w[4]); + B += S1(G) + ((G & (H ^ A)) ^ A) + 0x5CB0A9DCUL + w[6]; + F += B; + B += S0(C) + ((C & D) | (E & (C | D))); + w[7] = data[7] + s0(data[8]) + w[0] + s1(w[5]); + A += S1(F) + ((F & (G ^ H)) ^ H) + 0x76F988DAUL + w[7]; + E += A; + A += S0(B) + ((B & C) | (D & (B | C))); + w[8] = data[8] + s0(data[9]) + w[1] + s1(w[6]); + H += S1(E) + ((E & (F ^ G)) ^ G) + 0x983E5152UL + w[8]; + D += H; + H += S0(A) + ((A & B) | (C & (A | B))); + w[9] = data[9] + s0(data[10]) + w[2] + s1(w[7]); + G += S1(D) + ((D & (E ^ F)) ^ F) + 0xA831C66DUL + w[9]; + C += G; + G += S0(H) + ((H & A) | (B & (H | A))); + w[10] = data[10] + s0(data[11]) + w[3] + s1(w[8]); + F += S1(C) + ((C & (D ^ E)) ^ E) + 0xB00327C8UL + w[10]; + B += F; + F += S0(G) + ((G & H) | (A & (G | H))); + w[11] = data[11] + s0(data[12]) + w[4] + s1(w[9]); + E += S1(B) + ((B & (C ^ D)) ^ D) + 0xBF597FC7UL + w[11]; + A += E; + E += S0(F) + ((F & G) | (H & (F | G))); + w[12] = data[12] + s0(data[13]) + w[5] + s1(w[10]); + D += S1(A) + ((A & (B ^ C)) ^ C) + 0xC6E00BF3UL + w[12]; + H += D; + D += S0(E) + ((E & F) | (G & (E | F))); + w[13] = data[13] + s0(data[14]) + w[6] + s1(w[11]); + C += S1(H) + ((H & (A ^ B)) ^ B) + 0xD5A79147UL + w[13]; + G += C; + C += S0(D) + ((D & E) | (F & (D | E))); + w[14] = data[14] + s0(data[15]) + w[7] + s1(w[12]); + B += S1(G) + ((G & (H ^ A)) ^ A) + 0x06CA6351UL + w[14]; + F += B; + B += S0(C) + ((C & D) | (E & (C | D))); + w[15] = data[15] + s0(w[0]) + w[8] + s1(w[13]); + A += S1(F) + ((F & (G ^ H)) ^ H) + 0x14292967UL + w[15]; + E += A; + A += S0(B) + ((B & C) | (D & (B | C))); + w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); + H += S1(E) + ((E & (F ^ G)) ^ G) + 0x27B70A85UL + w[0]; + D += H; + H += S0(A) + ((A & B) | (C & (A | B))); + w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); + G += S1(D) + ((D & (E ^ F)) ^ F) + 0x2E1B2138UL + w[1]; + C += G; + G += S0(H) + ((H & A) | (B & (H | A))); + w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]); + F += S1(C) + ((C & (D ^ E)) ^ E) + 0x4D2C6DFCUL + w[2]; + B += F; + F += S0(G) + ((G & H) | (A & (G | H))); + w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]); + E += S1(B) + ((B & (C ^ D)) ^ D) + 0x53380D13UL + w[3]; + A += E; + E += S0(F) + ((F & G) | (H & (F | G))); + w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]); + D += S1(A) + ((A & (B ^ C)) ^ C) + 0x650A7354UL + w[4]; + H += D; + D += S0(E) + ((E & F) | (G & (E | F))); + w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]); + C += S1(H) + ((H & (A ^ B)) ^ B) + 0x766A0ABBUL + w[5]; + G += C; + C += S0(D) + ((D & E) | (F & (D | E))); + w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); + B += S1(G) + ((G & (H ^ A)) ^ A) + 0x81C2C92EUL + w[6]; + F += B; + B += S0(C) + ((C & D) | (E & (C | D))); + w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); + A += S1(F) + ((F & (G ^ H)) ^ H) + 0x92722C85UL + w[7]; + E += A; + A += S0(B) + ((B & C) | (D & (B | C))); + w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]); + H += S1(E) + ((E & (F ^ G)) ^ G) + 0xA2BFE8A1UL + w[8]; + D += H; + H += S0(A) + ((A & B) | (C & (A | B))); + w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]); + G += S1(D) + ((D & (E ^ F)) ^ F) + 0xA81A664BUL + w[9]; + C += G; + G += S0(H) + ((H & A) | (B & (H | A))); + w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]); + F += S1(C) + ((C & (D ^ E)) ^ E) + 0xC24B8B70UL + w[10]; + B += F; + F += S0(G) + ((G & H) | (A & (G | H))); + w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]); + E += S1(B) + ((B & (C ^ D)) ^ D) + 0xC76C51A3UL + w[11]; + A += E; + E += S0(F) + ((F & G) | (H & (F | G))); + w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]); + D += S1(A) + ((A & (B ^ C)) ^ C) + 0xD192E819UL + w[12]; + H += D; + D += S0(E) + ((E & F) | (G & (E | F))); + w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]); + C += S1(H) + ((H & (A ^ B)) ^ B) + 0xD6990624UL + w[13]; + G += C; + C += S0(D) + ((D & E) | (F & (D | E))); + w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); + B += S1(G) + ((G & (H ^ A)) ^ A) + 0xF40E3585UL + w[14]; + F += B; + B += S0(C) + ((C & D) | (E & (C | D))); + w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); + A += S1(F) + ((F & (G ^ H)) ^ H) + 0x106AA070UL + w[15]; + E += A; + A += S0(B) + ((B & C) | (D & (B | C))); + w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]); + H += S1(E) + ((E & (F ^ G)) ^ G) + 0x19A4C116UL + w[0]; + D += H; + H += S0(A) + ((A & B) | (C & (A | B))); + w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]); + G += S1(D) + ((D & (E ^ F)) ^ F) + 0x1E376C08UL + w[1]; + C += G; + G += S0(H) + ((H & A) | (B & (H | A))); + w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]); + F += S1(C) + ((C & (D ^ E)) ^ E) + 0x2748774CUL + w[2]; + B += F; + F += S0(G) + ((G & H) | (A & (G | H))); + w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]); + E += S1(B) + ((B & (C ^ D)) ^ D) + 0x34B0BCB5UL + w[3]; + A += E; + E += S0(F) + ((F & G) | (H & (F | G))); + w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]); + D += S1(A) + ((A & (B ^ C)) ^ C) + 0x391C0CB3UL + w[4]; + H += D; + D += S0(E) + ((E & F) | (G & (E | F))); + w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]); + C += S1(H) + ((H & (A ^ B)) ^ B) + 0x4ED8AA4AUL + w[5]; + G += C; + C += S0(D) + ((D & E) | (F & (D | E))); + w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]); + B += S1(G) + ((G & (H ^ A)) ^ A) + 0x5B9CCA4FUL + w[6]; + F += B; + B += S0(C) + ((C & D) | (E & (C | D))); + w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]); + A += S1(F) + ((F & (G ^ H)) ^ H) + 0x682E6FF3UL + w[7]; + E += A; + A += S0(B) + ((B & C) | (D & (B | C))); + w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]); + H += S1(E) + ((E & (F ^ G)) ^ G) + 0x748F82EEUL + w[8]; + D += H; + H += S0(A) + ((A & B) | (C & (A | B))); + w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]); + G += S1(D) + ((D & (E ^ F)) ^ F) + 0x78A5636FUL + w[9]; + C += G; + G += S0(H) + ((H & A) | (B & (H | A))); + w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]); + F += S1(C) + ((C & (D ^ E)) ^ E) + 0x84C87814UL + w[10]; + B += F; + F += S0(G) + ((G & H) | (A & (G | H))); + w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]); + E += S1(B) + ((B & (C ^ D)) ^ D) + 0x8CC70208UL + w[11]; + A += E; + E += S0(F) + ((F & G) | (H & (F | G))); + w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]); + D += S1(A) + ((A & (B ^ C)) ^ C) + 0x90BEFFFAUL + w[12]; + H += D; + D += S0(E) + ((E & F) | (G & (E | F))); + w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]); + C += S1(H) + ((H & (A ^ B)) ^ B) + 0xA4506CEBUL + w[13]; + G += C; + C += S0(D) + ((D & E) | (F & (D | E))); + w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]); + B += S1(G) + ((G & (H ^ A)) ^ A) + 0xBEF9A3F7UL + w[14]; + F += B; + B += S0(C) + ((C & D) | (E & (C | D))); + w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]); + A += S1(F) + ((F & (G ^ H)) ^ H) + 0xC67178F2UL + w[15]; + E += A; + A += S0(B) + ((B & C) | (D & (B | C))); + + /* update H0 - H7 */ + + H0 += A; + H1 += B; + H2 += C; + H3 += D; + H4 += E; + H5 += F; + H6 += G; + H7 += H; + + /* clear temp variables */ + + A = B = C = D = E = F = G = H = 0; + memset(w, 0, sizeof(w)); +} + +/* ntru_crypto_sha2() + * + * This routine provides all operations for a SHA-256 hash, + * and the use of SHA-256 for DSA signing and key generation. + * It may be used to initialize, update, or complete a message digest, + * or any combination of those actions, as determined by the SHA_INIT flag, + * the in_len parameter, and the SHA_FINISH flag, respectively. + * + * When in_len == 0 (no data to hash), the parameter, in, may be NULL. + * When the SHA_FINISH flag is not set, the parameter, md, may be NULL. + * + * Initialization may be standard or use a specified initialization vector, + * and is indicated by setting the SHA_INIT flag. + * Setting init = NULL specifies standard initialization. Otherwise, init + * points to the array of eight alternate initialization 32-bit words. + * + * The hash operation can be updated with any number of input bytes, including + * zero. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed. + */ + +uint32_t +ntru_crypto_sha2( + NTRU_CRYPTO_HASH_ALGID algid, /* in - hash algorithm ID */ + NTRU_CRYPTO_SHA2_CTX *c, /* in/out - pointer to SHA-2 context */ + uint32_t const *init, /* in - pointer to alternate */ + /* initialization - may be NULL */ + uint8_t const *in, /* in - pointer to input data - */ + /* may be NULL if in_len == 0 */ + uint32_t in_len, /* in - number of input data bytes */ + uint32_t flags, /* in - INIT, FINISH flags */ + uint8_t *md) /* out - address for message digest - + * may be NULL if not FINISH */ +{ + uint32_t in_blk[16]; /* input block */ + uint32_t space; + uint8_t *d = NULL; + + /* check error conditions */ + + if (algid != NTRU_CRYPTO_HASH_ALGID_SHA256) { + SHA_RET(SHA_BAD_PARAMETER) + } + + if (!c || (in_len && !in) || ((flags & SHA_FINISH) && !md)) { + SHA_RET(SHA_BAD_PARAMETER) + } + + /* initialize context if requested */ + + if (flags & SHA_INIT) { + /* init chaining state */ + + if (!init) /* standard initialization */ + { + + c->state[0] = H0_SHA256_INIT; /* standard SHA-256 init */ + c->state[1] = H1_SHA256_INIT; + c->state[2] = H2_SHA256_INIT; + c->state[3] = H3_SHA256_INIT; + c->state[4] = H4_SHA256_INIT; + c->state[5] = H5_SHA256_INIT; + c->state[6] = H6_SHA256_INIT; + c->state[7] = H7_SHA256_INIT; + + } else { + /* Support for SHA-224 etc is disabled */ + SHA_RET(SHA_BAD_PARAMETER); + } + + /* init bit count and number of unhashed data bytes */ + + c->num_bits_hashed[0] = 0; + c->num_bits_hashed[1] = 0; + c->unhashed_len = 0; + } + + /* determine space left in unhashed data buffer */ + + if (c->unhashed_len > 63) { + SHA_RET(SHA_FAIL) + } + + space = 64 - c->unhashed_len; + + /* process input if it exists */ + + if (in_len) { + /* update count of bits hashed */ + + { + uint32_t bits0, bits1; + + bits0 = in_len << 3; + bits1 = in_len >> 29; + + if ((c->num_bits_hashed[0] += bits0) < bits0) { + bits1++; + } + + if ((c->num_bits_hashed[1] += bits1) < bits1) { + memset((uint8_t *) c, 0, sizeof(NTRU_CRYPTO_SHA2_CTX)); + memset((char *) in_blk, 0, sizeof(in_blk)); + SHA_RET(SHA_OVERFLOW) + } + } + + /* process input bytes */ + + if (in_len < space) { + + /* input does not fill block buffer: + * add input to buffer + */ + + memcpy(c->unhashed + c->unhashed_len, in, in_len); + c->unhashed_len += in_len; + + } else { + uint32_t blks; + + /* input will fill block buffer: + * fill unhashed data buffer, + * convert to block buffer, + * and process block + */ + + in_len -= space; + + for (d = c->unhashed + c->unhashed_len; space; space--) { + *d++ = *in++; + } + + ntru_crypto_msbyte_2_uint32(in_blk, (uint8_t const *) c->unhashed, + 16); + sha2_blk((uint32_t const *) in_blk, c->state); + + /* process any remaining full blocks */ + + for (blks = in_len >> 6; blks--; in += 64) { + ntru_crypto_msbyte_2_uint32(in_blk, in, 16); + sha2_blk((uint32_t const *) in_blk, c->state); + } + + /* put any remaining input in the unhashed data buffer */ + + in_len &= 0x3f; + memcpy(c->unhashed, in, in_len); + c->unhashed_len = in_len; + } + } + + /* complete message digest if requested */ + + if (flags & SHA_FINISH) { + space = 64 - c->unhashed_len; + + /* add 0x80 padding byte to the unhashed data buffer + * (there is always space since the buffer can't be full) + */ + + d = c->unhashed + c->unhashed_len; + *d++ = 0x80; + space--; + + /* check for space for bit count */ + + if (space < 8) { + /* no space for count: + * fill remainder of unhashed data buffer with zeros, + * convert to input block, + * process block, + * fill all but 8 bytes of unhashed data buffer with zeros + */ + + memset(d, 0, space); + ntru_crypto_msbyte_2_uint32(in_blk, + (uint8_t const *) c->unhashed, 16); + sha2_blk((uint32_t const *) in_blk, c->state); + memset(c->unhashed, 0, 56); + + } else { + /* fill unhashed data buffer with zeros, + * leaving space for bit count + */ + + for (space -= 8; space; space--) { + *d++ = 0; + } + } + + /* convert partially filled unhashed data buffer to input block and + * add bit count to input block + */ + + ntru_crypto_msbyte_2_uint32(in_blk, (uint8_t const *) c->unhashed, + 14); + in_blk[14] = c->num_bits_hashed[1]; + in_blk[15] = c->num_bits_hashed[0]; + + /* process last block */ + + sha2_blk((uint32_t const *) in_blk, c->state); + + /* copy result to message digest buffer */ + + ntru_crypto_uint32_2_msbyte(md, c->state, 8); + + /* clear context and stack variables */ + + memset((uint8_t *) c, 0, sizeof(NTRU_CRYPTO_SHA2_CTX)); + memset((char *) in_blk, 0, sizeof(in_blk)); + } + + SHA_RET(SHA_OK) +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_sha2.h b/crypt/liboqs/kex_ntru/ntru_crypto_sha2.h new file mode 100644 index 0000000000000000000000000000000000000000..b674adcbd59bce9a89e82e8f74f98a096bf08a6e --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_sha2.h @@ -0,0 +1,91 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_sha2.h + * + * Contents: Definitions and declarations for the SHA-256 implementation. + * + *****************************************************************************/ + +#ifndef NTRU_CRYPTO_SHA2_H +#define NTRU_CRYPTO_SHA2_H + +#include "ntru_crypto_platform.h" +#include "ntru_crypto_sha.h" + +/************************* + * structure definitions * + *************************/ + +/* SHA-256 context structure */ + +typedef struct { + uint32_t state[8]; /* chaining state */ + uint32_t num_bits_hashed[2]; /* number of bits hashed */ + uint8_t unhashed[64]; /* input data not yet hashed */ + uint32_t unhashed_len; /* number of bytes of unhashed input data */ +} NTRU_CRYPTO_SHA2_CTX; + +/************************* + * function declarations * + *************************/ + +/* ntru_crypto_sha2() + * + * This routine provides all operations for a SHA-256 hash, + * and the use of SHA-256 for DSA signing and key generation. + * It may be used to initialize, update, or complete a message digest, + * or any combination of those actions, as determined by the SHA_INIT flag, + * the in_len parameter, and the SHA_FINISH flag, respectively. + * + * When in_len == 0 (no data to hash), the parameter, in, may be NULL. + * When the SHA_FINISH flag is not set, the parameter, md, may be NULL. + * + * Initialization may be standard or use a specified initialization vector, + * and is indicated by setting the SHA_INIT flag. + * Setting init = NULL specifies standard initialization. Otherwise, init + * points to the array of eight alternate initialization 32-bit words. + * + * The hash operation can be updated with any number of input bytes, including + * zero. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed. + */ + +extern uint32_t +ntru_crypto_sha2( + NTRU_CRYPTO_HASH_ALGID algid, /* in - hash algorithm ID */ + NTRU_CRYPTO_SHA2_CTX *c, /* in/out - pointer to SHA-2 context */ + uint32_t const *init, /* in - pointer to alternate */ + /* initialization - may be NULL */ + uint8_t const *in, /* in - pointer to input data - + may be NULL if in_len == 0 */ + uint32_t in_len, /* in - number of input data bytes */ + uint32_t flags, /* in - INIT, FINISH */ + uint8_t *md); /* out - address for message digest - + may be NULL if not FINISH */ + +#endif /* NTRU_CRYPTO_SHA2_H */ diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_sha256.c b/crypt/liboqs/kex_ntru/ntru_crypto_sha256.c new file mode 100644 index 0000000000000000000000000000000000000000..544c51c53a915511f933d769e000c5ab7a2bda0e --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_sha256.c @@ -0,0 +1,109 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_sha256.c + * + * Contents: Routines implementing the SHA-256 hash calculations. + * + *****************************************************************************/ + +#include "ntru_crypto_sha256.h" + +/* ntru_crypto_sha256_init + * + * This routine performs standard initialization of the SHA-256 state. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + */ + +uint32_t +ntru_crypto_sha256_init( + NTRU_CRYPTO_SHA2_CTX *c) /* in/out - pointer to SHA-2 context */ +{ + return ntru_crypto_sha2(NTRU_CRYPTO_HASH_ALGID_SHA256, c, NULL, NULL, 0, + SHA_INIT, NULL); +} + +/* ntru_crypto_sha256_update + * + * This routine processes input data and updates the SHA-256 hash calculation. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed. + */ + +uint32_t +ntru_crypto_sha256_update( + NTRU_CRYPTO_SHA2_CTX *c, /* in/out - pointer to SHA-2 context */ + uint8_t const *data, /* in - pointer to input data */ + uint32_t data_len) /* in - no. of bytes of input data */ +{ + return ntru_crypto_sha2(NTRU_CRYPTO_HASH_ALGID_SHA256, c, NULL, data, + data_len, SHA_DATA_ONLY, NULL); +} + +/* ntru_crypto_sha256_final + * + * This routine completes the SHA-256 hash calculation and returns the + * message digest. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed. + */ + +uint32_t +ntru_crypto_sha256_final( + NTRU_CRYPTO_SHA2_CTX *c, /* in/out - pointer to SHA-2 context */ + uint8_t *md) /* out - address for message digest */ +{ + return ntru_crypto_sha2(NTRU_CRYPTO_HASH_ALGID_SHA256, c, NULL, NULL, 0, + SHA_FINISH, md); +} + +/* ntru_crypto_sha256_digest + * + * This routine computes a SHA-256 message digest. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed. + */ + +uint32_t +ntru_crypto_sha256_digest( + uint8_t const *data, /* in - pointer to input data */ + uint32_t data_len, /* in - number of bytes of input data */ + uint8_t *md) /* out - address for message digest */ +{ + NTRU_CRYPTO_SHA2_CTX c; + + return ntru_crypto_sha2(NTRU_CRYPTO_HASH_ALGID_SHA256, &c, NULL, data, + data_len, SHA_INIT | SHA_FINISH, md); +} diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_sha256.h b/crypt/liboqs/kex_ntru/ntru_crypto_sha256.h new file mode 100644 index 0000000000000000000000000000000000000000..717cf0471d2601c4a213a74cac077c495767d04c --- /dev/null +++ b/crypt/liboqs/kex_ntru/ntru_crypto_sha256.h @@ -0,0 +1,114 @@ +/****************************************************************************** + * NTRU Cryptography Reference Source Code + * + * Copyright (C) 2009-2016 Security Innovation (SI) + * + * SI has dedicated the work to the public domain by waiving all of its rights + * to the work worldwide under copyright law, including all related and + * neighboring rights, to the extent allowed by law. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * You can copy, modify, distribute and perform the work, even for commercial + * purposes, all without asking permission. You should have received a copy of + * the creative commons license (CC0 1.0 universal) along with this program. + * See the license file for more information. + * + * + *********************************************************************************/ + +/****************************************************************************** + * + * File: ntru_crypto_sha256.h + * + * Contents: Definitions and declarations for the SHA-256 implementation. + * + *****************************************************************************/ + +#ifndef NTRU_CRYPTO_SHA256_H +#define NTRU_CRYPTO_SHA256_H + +#include "ntru_crypto_platform.h" +#include "ntru_crypto_sha2.h" + +/****************************************** + * macros needed for generic hash objects * + ******************************************/ + +#define SHA_256_CTX_LEN sizeof(NTRU_CRYPTO_SHA2_CTX) +/* no. bytes in SHA-2 ctx */ +#define SHA_256_BLK_LEN 64 /* 64 bytes in input block */ +#define SHA_256_MD_LEN 32 /* 32 bytes in msg digest */ +#define SHA_256_INIT_FN &ntru_crypto_sha256_init /* init function */ +#define SHA_256_UPDATE_FN &ntru_crypto_sha256_update /* update function */ +#define SHA_256_FINAL_FN &ntru_crypto_sha256_final /* final function */ +#define SHA_256_DIGEST_FN &ntru_crypto_sha256_digest /* digest function */ + +/************************* + * function declarations * + *************************/ + +/* ntru_crypto_sha256_init + * + * This routine performs standard initialization of the SHA-256 state. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + */ + +extern uint32_t +ntru_crypto_sha256_init( + NTRU_CRYPTO_SHA2_CTX *c); /* in/out - pointer to SHA-2 context */ + +/* ntru_crypto_sha256_update + * + * This routine processes input data and updates the SHA-256 hash calculation. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed. + */ + +extern uint32_t +ntru_crypto_sha256_update( + NTRU_CRYPTO_SHA2_CTX *c, /* in/out - pointer to SHA-2 context */ + uint8_t const *data, /* in - pointer to input data */ + uint32_t data_len); /* in - no. of bytes of input data */ + +/* ntru_crypto_sha256_final + * + * This routine completes the SHA-256 hash calculation and returns the + * message digest. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed. + */ + +extern uint32_t +ntru_crypto_sha256_final( + NTRU_CRYPTO_SHA2_CTX *c, /* in/out - pointer to SHA-2 context */ + uint8_t *md); /* out - address for message digest */ + +/* ntru_crypto_sha256_digest + * + * This routine computes a SHA-256 message digest. + * + * Returns SHA_OK on success. + * Returns SHA_FAIL with corrupted context. + * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed. + * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed. + */ + +extern uint32_t +ntru_crypto_sha256_digest( + uint8_t const *data, /* in - pointer to input data */ + uint32_t data_len, /* in - number of bytes of input data */ + uint8_t *md); /* out - address for message digest */ + +#endif /* NTRU_CRYPTO_SHA256_H */ diff --git a/crypt/liboqs/kex_rlwe_bcns15/LICENSE.txt b/crypt/liboqs/kex_rlwe_bcns15/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..07f72eb90baf005fdfab0cc2e9e3a82c188bbaeb --- /dev/null +++ b/crypt/liboqs/kex_rlwe_bcns15/LICENSE.txt @@ -0,0 +1,32 @@ +The files in this directory (except kex_rlwe_bcns15.*) were originally written +by Joppe W. Bos, Craig Costello, Michael Naehrig, and Douglas Stebila +(https://github.com/dstebila/rlwekex). + + +The following license applies to all files in the src/kex_rlwe_bcns15 directory. + + +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to <http://unlicense.org> diff --git a/crypt/liboqs/kex_rlwe_bcns15/Makefile.am b/crypt/liboqs/kex_rlwe_bcns15/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..d2d0eff9af7a25ebecd4cd55a58ec4378669dd94 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_bcns15/Makefile.am @@ -0,0 +1,9 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = libbcns15.la + + +libbcns15_la_SOURCES = fft.c kex_rlwe_bcns15.c rlwe.c rlwe_kex.c + +libbcns15_la_CPPFLAGS = -I../../include +libbcns15_la_CPPFLAGS += $(AM_CPPFLAGS) + diff --git a/crypt/liboqs/kex_rlwe_bcns15/fft.c b/crypt/liboqs/kex_rlwe_bcns15/fft.c new file mode 100644 index 0000000000000000000000000000000000000000..7515209178ddd81609f192387a3e3ee6b1ed4098 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_bcns15/fft.c @@ -0,0 +1,243 @@ +/* This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * See LICENSE for complete information. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "local.h" + +static void *(*volatile rlwe_memset_volatile)(void *, int, size_t) = memset; + +/* Reduction modulo p = 2^32 - 1. + * This is not a prime since 2^32-1 = (2^1+1)*(2^2+1)*(2^4+1)*(2^8+1)*(2^16+1). + * But since 2 is a unit in Z/pZ we can use it for computing FFTs in + * Z/pZ[X]/(X^(2^7)+1) + */ + +/* Caution: + * We use a redundant representation where the integer 0 is represented both + * by 0 and 2^32-1. + * This approach follows the description from the paper: + * Joppe W. Bos, Craig Costello, Huseyin Hisil, and Kristin Lauter: Fast Cryptography in Genus 2 + * EUROCRYPT 2013, Lecture Notes in Computer Science 7881, pp. 194-210, Springer, 2013. + * More specifically see: Section 3 related to Modular Addition/Subtraction. + */ + +/* Compute: c = (a+b) mod (2^32-1) + * Let, t = a+b = t_1*2^32 + t0, where 0 <= t_1 <= 1, 0 <= t_0 < 2^32. + * Then t mod (2^32-1) = t0 + t1 */ + +/* NOTE: + * Implementing this arithmetic in asm might significantly + * increase performance. + */ + +#define modadd(c, a, b) \ + do { \ + uint32_t _t = a + b; \ + c = _t + (_t < a); \ + } while (0) + +#define modsub(c, a, b) c = (a - b) - (b > a) + +#define modmul(c, a, b) \ + do { \ + uint64_t _T = (uint64_t) a * (uint64_t) b; \ + modadd(c, ((uint32_t) _T), ((uint32_t)((uint64_t) _T >> (uint64_t) 32))); \ + } while (0) + +#define modmuladd(c, a, b) \ + do { \ + uint64_t _T = (uint64_t) a * (uint64_t) b + c; \ + modadd(c, ((uint32_t) _T), ((uint32_t)((uint64_t) _T >> (uint64_t) 32))); \ + } while (0) + +#define div2(c, a) c = (uint32_t)(((uint64_t)(a) + (uint64_t)((uint32_t)(0 - ((a) &1)) & 0xFFFFFFFF)) >> 1) +#define normalize(c, a) c = (a) + ((a) == 0xFFFFFFFF) + +/* Define the basic building blocks for the FFT. */ +#define SET_ZERO(x) (x) = 0 +#define add(c, a, b) modadd(c, a, b) +#define sub(c, a, b) modsub(c, a, b) +#define mul(c, a, b) modmul(c, a, b) +#define moddiv2(c, a) \ + normalize(c, a); \ + div2(c, c) +#define neg(c, a) \ + (c) = 0xFFFFFFFF - (a); \ + normalize(c, c) +#define squ(c, a) mul(c, a, a) +#define set(c, a) (c) = (a) + +/* Reverse the bits, approach from "Bit Twiddling Hacks" + * See: https://graphics.stanford.edu/~seander/bithacks.html + */ +static uint32_t reverse(uint32_t x) { + x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1)); + x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2)); + x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4)); + x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8)); + return ((x >> 16) | (x << 16)); +} + +/* Nussbaumer approach, see: + * H. J. Nussbaumer. Fast polynomial transform algorithms for digital convolution. Acoustics, Speech and + * Signal Processing, IEEE Transactions on, 28(2):205{215, 1980 + * We followed the description from Knuth: + * D. E. Knuth. Seminumerical Algorithms. The Art of Computer Programming. Addison-Wesley, Reading, + * Massachusetts, USA, 3rd edition, 1997 + * Exercise Exercise 4.6.4.59. + */ + +static void naive(uint32_t *z, const uint32_t *x, const uint32_t *y, unsigned int n) { + unsigned int i, j, k; + uint32_t A, B; + + for (i = 0; i < n; i++) { + SET_ZERO(B); + + mul(A, x[0], y[i]); + + for (j = 1; j <= i; j++) { + modmuladd(A, x[j], y[i - j]); + } + + for (k = 1; j < n; j++, k++) { + modmuladd(B, x[j], y[n - k]); + } + sub(z[i], A, B); + } +} + +static void nussbaumer_fft(uint32_t z[1024], const uint32_t x[1024], const uint32_t y[1024], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx) { + uint32_t(*X1)[64] = ctx->x1; + uint32_t(*Y1)[64] = ctx->y1; + uint32_t(*Z1)[64] = ctx->z1; + uint32_t *T1 = ctx->t1; + unsigned int i; + int j; + + for (i = 0; i < 32; i++) { + for (j = 0; j < 32; j++) { + set(X1[i][j], x[32 * j + i]); + set(X1[i + 32][j], x[32 * j + i]); + + set(Y1[i][j], y[32 * j + i]); + set(Y1[i + 32][j], y[32 * j + i]); + } + } + + for (j = 4; j >= 0; j--) { + for (i = 0; i < (1U << (5 - j)); i++) { + unsigned int t, ssr = reverse(i); + for (t = 0; t < (1U << j); t++) { + unsigned int s, sr, I, L, a; + s = i; + sr = (ssr >> (32 - 5 + j)); + sr <<= j; + s <<= (j + 1); + + // X_i(w) = X_i(w) + w^kX_l(w) can be computed as + // X_ij = X_ij - X_l(j-k+r) for 0 <= j < k + // X_ij = X_ij + X_l(j-k) for k <= j < r + I = s + t, L = s + t + (1 << j); + + for (a = sr; a < 32; a++) { + set(T1[a], X1[L][a - sr]); + } + for (a = 0; a < sr; a++) { + neg(T1[a], X1[L][32 + a - sr]); + } + + for (a = 0; a < 32; a++) { + sub(X1[L][a], X1[I][a], T1[a]); + add(X1[I][a], X1[I][a], T1[a]); + } + + for (a = sr; a < 32; a++) { + set(T1[a], Y1[L][a - sr]); + } + for (a = 0; a < sr; a++) { + neg(T1[a], Y1[L][32 + a - sr]); + } + + for (a = 0; a < 32; a++) { + sub(Y1[L][a], Y1[I][a], T1[a]); + add(Y1[I][a], Y1[I][a], T1[a]); + } + } + } + } + + for (i = 0; i < 2 * 32; i++) { + naive(Z1[i], X1[i], Y1[i], 32); + } + + for (j = 0; j <= (int) 5; j++) { + for (i = 0; i < (1U << (5 - j)); i++) { + unsigned int t, ssr = reverse(i); + for (t = 0; t < (1U << j); t++) { + unsigned int s, sr, A, B, a; + s = i; + sr = (ssr >> (32 - 5 + j)); + sr <<= j; + s <<= (j + 1); + + A = s + t; + B = s + t + (1 << j); + for (a = 0; a < 32; a++) { + sub(T1[a], Z1[A][a], Z1[B][a]); + moddiv2(T1[a], T1[a]); + add(Z1[A][a], Z1[A][a], Z1[B][a]); + moddiv2(Z1[A][a], Z1[A][a]); + } + + // w^{-(r/m)s'} (Z_{s+t}(w)-Z_{s+t+2^j}(w)) + for (a = 0; a < 32 - sr; a++) { + set(Z1[B][a], T1[a + sr]); + } + for (a = 32 - sr; a < 32; a++) { + neg(Z1[B][a], T1[a - (32 - sr)]); + } + } + } + } + + for (i = 0; i < 32; i++) { + sub(z[i], Z1[i][0], Z1[32 + i][32 - 1]); + for (j = 1; j < 32; j++) { + add(z[32 * j + i], Z1[i][j], Z1[32 + i][j - 1]); + } + } +} + +void oqs_kex_rlwe_bcns15_fft_mul(uint32_t z[1024], const uint32_t x[1024], const uint32_t y[1024], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx) { + nussbaumer_fft(z, x, y, ctx); +} + +void oqs_kex_rlwe_bcns15_fft_add(uint32_t z[1024], const uint32_t x[1024], const uint32_t y[1024]) { + int i; + for (i = 0; i < 1024; i++) { + add(z[i], x[i], y[i]); + } +} + +void oqs_kex_rlwe_bcns15_fft_ctx_clear(struct oqs_kex_rlwe_bcns15_fft_ctx *ctx) { + if (ctx == NULL) { + return; + } + for (int i = 0; i < 64; i++) { + rlwe_memset_volatile(ctx->x1[i], 0, 64 * sizeof(uint32_t)); + rlwe_memset_volatile(ctx->y1[i], 0, 64 * sizeof(uint32_t)); + rlwe_memset_volatile(ctx->z1[i], 0, 64 * sizeof(uint32_t)); + } + rlwe_memset_volatile(ctx->t1, 0, 64 * sizeof(uint32_t)); +} diff --git a/crypt/liboqs/kex_rlwe_bcns15/kex_rlwe_bcns15.c b/crypt/liboqs/kex_rlwe_bcns15/kex_rlwe_bcns15.c new file mode 100644 index 0000000000000000000000000000000000000000..8eee72ad308e06e1c784fc76b6ce122ef6006e58 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_bcns15/kex_rlwe_bcns15.c @@ -0,0 +1,198 @@ +#if defined(WINDOWS) +#define UNUSED +// __attribute__ not supported in VS, is there something else I should define? +#else +#define UNUSED __attribute__((unused)) +#endif + +#include <stdlib.h> +#include <string.h> +#if !defined(WINDOWS) +#include <strings.h> +#include <unistd.h> +#endif + +#include <oqs/common.h> +#include <oqs/kex.h> +#include <oqs/rand.h> + +#include "kex_rlwe_bcns15.h" +#include "local.h" + +#include "rlwe_a.h" + +#if defined(WINDOWS) +#define strdup _strdup // for strdup deprecation warning +#endif + +OQS_KEX *OQS_KEX_rlwe_bcns15_new(OQS_RAND *rand) { + + OQS_KEX *k = malloc(sizeof(OQS_KEX)); + if (k == NULL) { + return NULL; + } + + k->ctx = malloc(sizeof(struct oqs_kex_rlwe_bcns15_fft_ctx)); + if (k->ctx == NULL) { + free(k); + return NULL; + } + + k->method_name = strdup("RLWE BCNS15"); + k->estimated_classical_security = 163; + k->estimated_quantum_security = 76; + k->seed = NULL; + k->seed_len = 0; + k->named_parameters = NULL; + k->rand = rand; + k->params = NULL; + k->alice_0 = &OQS_KEX_rlwe_bcns15_alice_0; + k->bob = &OQS_KEX_rlwe_bcns15_bob; + k->alice_1 = &OQS_KEX_rlwe_bcns15_alice_1; + k->alice_priv_free = &OQS_KEX_rlwe_bcns15_alice_priv_free; + k->free = &OQS_KEX_rlwe_bcns15_free; + + return k; +} + +int OQS_KEX_rlwe_bcns15_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) { + + int ret; + uint32_t *alice_msg_32 = NULL; + + *alice_priv = NULL; + *alice_msg = NULL; + + /* allocate public/private key pair */ + alice_msg_32 = malloc(1024 * sizeof(uint32_t)); + if (alice_msg_32 == NULL) { + goto err; + } + *alice_priv = malloc(1024 * sizeof(uint32_t)); + if (*alice_priv == NULL) { + goto err; + } + + /* generate public/private key pair */ + oqs_kex_rlwe_bcns15_generate_keypair(oqs_kex_rlwe_bcns15_a, (uint32_t *) *alice_priv, alice_msg_32, k->ctx, k->rand); + *alice_msg = (uint8_t *) alice_msg_32; + *alice_msg_len = 1024 * sizeof(uint32_t); + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(alice_msg_32); + OQS_MEM_secure_free(*alice_priv, 1024 * sizeof(uint32_t)); + *alice_priv = NULL; + +cleanup: + return ret; +} + +int OQS_KEX_rlwe_bcns15_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + + uint32_t *bob_priv = NULL; + uint64_t *key_64 = NULL; + + *bob_msg = NULL; + *key = NULL; + + if (alice_msg_len != 1024 * sizeof(uint32_t)) { + goto err; + } + + bob_priv = malloc(1024 * sizeof(uint32_t)); + if (bob_priv == NULL) { + goto err; + } + /* allocate message and session key */ + *bob_msg = malloc(1024 * sizeof(uint32_t) + 16 * sizeof(uint64_t)); + if (*bob_msg == NULL) { + goto err; + } + key_64 = malloc(16 * sizeof(uint64_t)); + if (key_64 == NULL) { + goto err; + } + + /* generate public/private key pair */ + oqs_kex_rlwe_bcns15_generate_keypair(oqs_kex_rlwe_bcns15_a, bob_priv, (uint32_t *) *bob_msg, k->ctx, k->rand); + + /* generate Bob's response */ + uint8_t *bob_rec = *bob_msg + 1024 * sizeof(uint32_t); + oqs_kex_rlwe_bcns15_compute_key_bob((uint32_t *) alice_msg, bob_priv, (uint64_t *) bob_rec, key_64, k->ctx, k->rand); + *bob_msg_len = 1024 * sizeof(uint32_t) + 16 * sizeof(uint64_t); + *key = (uint8_t *) key_64; + *key_len = 16 * sizeof(uint64_t); + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*bob_msg); + *bob_msg = NULL; + OQS_MEM_secure_free(key_64, 16 * sizeof(uint64_t)); + +cleanup: + OQS_MEM_secure_free(bob_priv, 1024 * sizeof(uint32_t)); + + return ret; +} + +int OQS_KEX_rlwe_bcns15_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + + uint64_t *key_64 = NULL; + + *key = NULL; + + if (bob_msg_len != 1024 * sizeof(uint32_t) + 16 * sizeof(uint64_t)) { + goto err; + } + + /* allocate session key */ + key_64 = malloc(16 * sizeof(uint64_t)); + if (key_64 == NULL) { + goto err; + } + + /* generate Alice's session key */ + const uint8_t *bob_rec = bob_msg + 1024 * sizeof(uint32_t); + oqs_kex_rlwe_bcns15_compute_key_alice((uint32_t *) bob_msg, (uint32_t *) alice_priv, (uint64_t *) bob_rec, key_64, k->ctx); + *key = (uint8_t *) key_64; + *key_len = 16 * sizeof(uint64_t); + + ret = 1; + goto cleanup; + +err: + ret = 0; + OQS_MEM_secure_free(key_64, 16 * sizeof(uint64_t)); + +cleanup: + + return ret; +} + +void OQS_KEX_rlwe_bcns15_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) { + if (alice_priv) { + free(alice_priv); + } +} + +void OQS_KEX_rlwe_bcns15_free(OQS_KEX *k) { + if (!k) { + return; + } + free(k->method_name); + k->method_name = NULL; + free(k->ctx); + k->ctx = NULL; + free(k); +} diff --git a/crypt/liboqs/kex_rlwe_bcns15/kex_rlwe_bcns15.h b/crypt/liboqs/kex_rlwe_bcns15/kex_rlwe_bcns15.h new file mode 100644 index 0000000000000000000000000000000000000000..c098da4812bb43208e44edf2f5fbdbaae1a689ab --- /dev/null +++ b/crypt/liboqs/kex_rlwe_bcns15/kex_rlwe_bcns15.h @@ -0,0 +1,24 @@ +/** + * \file kex_rlwe_bcns15.h + * \brief Header for ring-LWE key exchange protocol BCNS15 + */ + +#ifndef __OQS_KEX_RLWE_BCNS15_H +#define __OQS_KEX_RLWE_BCNS15_H + +#include <stddef.h> +#include <stdint.h> + +#include <oqs/kex.h> +#include <oqs/rand.h> + +OQS_KEX *OQS_KEX_rlwe_bcns15_new(OQS_RAND *rand); + +int OQS_KEX_rlwe_bcns15_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len); +int OQS_KEX_rlwe_bcns15_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len); +int OQS_KEX_rlwe_bcns15_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len); + +void OQS_KEX_rlwe_bcns15_alice_priv_free(OQS_KEX *k, void *alice_priv); +void OQS_KEX_rlwe_bcns15_free(OQS_KEX *k); + +#endif diff --git a/crypt/liboqs/kex_rlwe_bcns15/local.h b/crypt/liboqs/kex_rlwe_bcns15/local.h new file mode 100644 index 0000000000000000000000000000000000000000..ac07fe173f7b7ec6ac9340cd43807f9e1c28f7b6 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_bcns15/local.h @@ -0,0 +1,46 @@ +/* This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * See LICENSE for complete information. + */ + +#ifndef _OQS_KEX_RLWE_BCNS15_LOCAL_H_ +#define _OQS_KEX_RLWE_BCNS15_LOCAL_H_ + +#include <stdint.h> + +#include <oqs/rand.h> + +struct oqs_kex_rlwe_bcns15_fft_ctx { + uint32_t x1[64][64]; + uint32_t y1[64][64]; + uint32_t z1[64][64]; + uint32_t t1[64]; +}; + +void oqs_kex_rlwe_bcns15_fft_mul(uint32_t z[1024], const uint32_t x[1024], const uint32_t y[1024], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx); +void oqs_kex_rlwe_bcns15_fft_add(uint32_t z[1024], const uint32_t x[1024], const uint32_t y[1024]); + +void oqs_kex_rlwe_bcns15_fft_ctx_clear(struct oqs_kex_rlwe_bcns15_fft_ctx *ctx); + +void oqs_kex_rlwe_bcns15_sample_ct(uint32_t s[1024], OQS_RAND *rand); +void oqs_kex_rlwe_bcns15_round2_ct(uint64_t out[16], const uint32_t in[1024]); +void oqs_kex_rlwe_bcns15_crossround2_ct(uint64_t out[16], const uint32_t in[1024], OQS_RAND *rand); +void oqs_kex_rlwe_bcns15_rec_ct(uint64_t out[16], const uint32_t w[1024], const uint64_t b[16]); + +void oqs_kex_rlwe_bcns15_sample(uint32_t s[1024], OQS_RAND *rand); +void oqs_kex_rlwe_bcns15_round2(uint64_t out[16], const uint32_t in[1024]); +void oqs_kex_rlwe_bcns15_crossround2(uint64_t out[16], const uint32_t in[1024], OQS_RAND *rand); +void oqs_kex_rlwe_bcns15_rec(uint64_t out[16], const uint32_t w[1024], const uint64_t b[16]); + +void oqs_kex_rlwe_bcns15_a_times_s_plus_e(uint32_t out[1024], const uint32_t a[1024], const uint32_t s[1024], const uint32_t e[1024], struct oqs_kex_rlwe_bcns15_fft_ctx *fft_ctx); + +void oqs_kex_rlwe_bcns15_generate_keypair(const uint32_t *a, uint32_t s[1024], uint32_t b[1024], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx, OQS_RAND *rand); +void oqs_kex_rlwe_bcns15_compute_key_alice(const uint32_t b[1024], const uint32_t s[1024], const uint64_t c[16], uint64_t k[16], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx); +void oqs_kex_rlwe_bcns15_compute_key_bob(const uint32_t b[1024], const uint32_t s[1024], uint64_t c[16], uint64_t k[16], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx, OQS_RAND *rand); + +#endif /* _OQS_KEX_RLWE_BCNS15_LOCAL_H_ */ diff --git a/crypt/liboqs/kex_rlwe_bcns15/rlwe.c b/crypt/liboqs/kex_rlwe_bcns15/rlwe.c new file mode 100644 index 0000000000000000000000000000000000000000..f6fc5ac4c89eafa14d77781605036eebf668a177 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_bcns15/rlwe.c @@ -0,0 +1,296 @@ +/* This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * See LICENSE for complete information. + */ + +#if defined(WINDOWS) +#pragma warning(disable : 4146 4244 4267) +#endif + +#include <inttypes.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <oqs/rand.h> + +#include "local.h" + +#include "rlwe_table.h" + +#define setbit(a, x) ((a)[(x) / 64] |= (((uint64_t) 1) << (uint64_t)((x) % 64))) +#define getbit(a, x) (((a)[(x) / 64] >> (uint64_t)((x) % 64)) & 1) +#define clearbit(a, x) ((a)[(x) / 64] &= ((~((uint64_t) 0)) - (((uint64_t) 1) << (uint64_t)((x) % 64)))) + +/* Auxiliary functions for constant-time comparison */ + +/* + * Returns 1 if x != 0 + * Returns 0 if x == 0 + * x and y are arbitrary unsigned 64-bit integers + */ +static uint64_t ct_isnonzero_u64(uint64_t x) { + return (x | -x) >> 63; +} + +/* + * Returns 1 if x != y + * Returns 0 if x == y + * x and y are arbitrary unsigned 64-bit integers + */ +static uint64_t ct_ne_u64(uint64_t x, uint64_t y) { + return ((x - y) | (y - x)) >> 63; +} + +/* + * Returns 1 if x == y + * Returns 0 if x != y + * x and y are arbitrary unsigned 64-bit integers + */ +static uint64_t ct_eq_u64(uint64_t x, uint64_t y) { + return 1 ^ ct_ne_u64(x, y); +} + +/* Returns 1 if x < y + * Returns 0 if x >= y + * x and y are arbitrary unsigned 64-bit integers + */ +static uint64_t ct_lt_u64(uint64_t x, uint64_t y) { + return (x ^ ((x ^ y) | ((x - y) ^ y))) >> 63; +} + +/* + * Returns 1 if x > y + * Returns 0 if x <= y + * x and y are arbitrary unsigned 64-bit integers + */ +static uint64_t ct_gt_u64(uint64_t x, uint64_t y) { + return ct_lt_u64(y, x); +} + +/* + * Returns 1 if x <= y + * Returns 0 if x > y + * x and y are arbitrary unsigned 64-bit integers + */ +static uint64_t ct_le_u64(uint64_t x, uint64_t y) { + return 1 ^ ct_gt_u64(x, y); +} + +/* + * Returns 1 if x >= y + * Returns 0 if x < y + * x and y are arbitrary unsigned 64-bit integers + */ +static uint64_t ct_ge_u64(uint64_t x, uint64_t y) { + return 1 ^ ct_lt_u64(x, y); +} + +/* Returns 0xFFFF..FFFF if bit != 0 + * Returns 0 if bit == 0 + */ +static uint64_t ct_mask_u64(uint64_t bit) { + return 0 - (uint64_t) ct_isnonzero_u64(bit); +} + +/* Conditionally return x or y depending on whether bit is set + * Equivalent to: return bit ? x : y + * x and y are arbitrary 64-bit unsigned integers + * bit must be either 0 or 1. + */ +static uint64_t ct_select_u64(uint64_t x, uint64_t y, uint64_t bit) { + uint64_t m = ct_mask_u64(bit); + return (x & m) | (y & ~m); +} + +/* Returns 0 if a >= b + * Returns 1 if a < b + * Where a and b are both 3-limb 64-bit integers. + * This function runs in constant time. + */ +static int cmplt_ct(uint64_t *a, uint64_t *b) { + uint64_t r = 0; /* result */ + uint64_t m = 0; /* mask */ + int i; + for (i = 2; i >= 0; --i) { + r |= ct_lt_u64(a[i], b[i]) & ~m; + m |= ct_mask_u64(ct_ne_u64(a[i], b[i])); /* stop when a[i] != b[i] */ + } + return r & 1; +} + +static uint32_t single_sample(uint64_t *in) { + size_t i = 0; + + while (cmplt_ct(rlwe_table[i], in)) { // ~3.5 comparisons in expectation + i++; + } + + return i; +} + +/* We assume that e contains two random bits in the two + * least significant positions. */ +static uint64_t dbl(const uint32_t in, int32_t e) { + // sample uniformly from [-1, 0, 0, 1] + // Hence, 0 is sampled with twice the probability of 1 + e = (((e >> 1) & 1) - ((int32_t)(e & 1))); + return (uint64_t)((((uint64_t) in) << (uint64_t) 1) - e); +} + +/* Constant time version. */ +static uint32_t single_sample_ct(uint64_t *in) { + uint32_t index = 0, i; + for (i = 0; i < 52; i++) { + index = ct_select_u64(index, i + 1, cmplt_ct(in, rlwe_table[i])); + } + return index; +} + +void oqs_kex_rlwe_bcns15_sample_ct(uint32_t s[1024], OQS_RAND *rand) { + int i, j; + for (i = 0; i < 16; i++) { + uint64_t r = rand->rand_64(rand); + for (j = 0; j < 64; j++) { + uint64_t rnd[3]; + uint32_t m; + uint32_t t; + rnd[0] = rand->rand_64(rand); + rnd[1] = rand->rand_64(rand); + rnd[2] = rand->rand_64(rand); + m = (r & 1); + r >>= 1; + // use the constant time version single_sample + s[i * 64 + j] = single_sample_ct(rnd); + t = (uint32_t) -s[i * 64 + j]; + s[i * 64 + j] = ct_select_u64(t, s[i * 64 + j], ct_eq_u64(m, 0)); + } + } +} + +void oqs_kex_rlwe_bcns15_round2_ct(uint64_t out[16], const uint32_t in[1024]) { + int i; + memset(out, 0, 128); + for (i = 0; i < 1024; i++) { + uint64_t b = ct_ge_u64(in[i], 1073741824ULL) & + ct_le_u64(in[i], 3221225471ULL); + out[i / 64] |= b << (uint64_t)(i % 64); + } +} + +void oqs_kex_rlwe_bcns15_crossround2_ct(uint64_t out[16], const uint32_t in[1024], OQS_RAND *rand) { + int i, j; + memset(out, 0, 128); + for (i = 0; i < 64; i++) { + uint32_t e = rand->rand_32(rand); + for (j = 0; j < 16; j++) { + uint64_t dd; + uint64_t b; + dd = dbl(in[i * 16 + j], (int32_t) e); + e >>= 2; + b = (ct_ge_u64(dd, 2147483648ULL) & ct_le_u64(dd, 4294967295ULL)) | + (ct_ge_u64(dd, 6442450942ULL) & ct_le_u64(dd, 8589934590ULL)); + out[(i * 16 + j) / 64] |= (b << (uint64_t)((i * 16 + j) % 64)); + } + } +} + +void oqs_kex_rlwe_bcns15_rec_ct(uint64_t out[16], const uint32_t w[1024], const uint64_t b[16]) { + int i; + memset(out, 0, 128); + for (i = 0; i < 1024; i++) { + uint64_t coswi; + uint64_t B; + coswi = (((uint64_t) w[i]) << (uint64_t) 1); + B = (ct_eq_u64(getbit(b, i), 0) & ct_ge_u64(coswi, 3221225472ULL) & + ct_le_u64(coswi, 7516192766ULL)) | + (ct_eq_u64(getbit(b, i), 1) & ct_ge_u64(coswi, 1073741824ULL) & + ct_le_u64(coswi, 5368709118ULL)); + out[i / 64] |= (B << (uint64_t)(i % 64)); + } +} + +void oqs_kex_rlwe_bcns15_sample(uint32_t s[1024], OQS_RAND *rand) { + int i, j; + for (i = 0; i < 16; i++) { + uint64_t r = rand->rand_64(rand); + for (j = 0; j < 64; j++) { + uint64_t rnd[3]; + int32_t m; + rnd[0] = rand->rand_64(rand); + rnd[1] = rand->rand_64(rand); + rnd[2] = rand->rand_64(rand); + m = (r & 1); + r >>= 1; + s[i * 64 + j] = single_sample(rnd); + if (m) { + s[i * 64 + j] = (uint32_t) -s[i * 64 + j]; + } + } + } +} + +void oqs_kex_rlwe_bcns15_round2(uint64_t out[16], const uint32_t in[1024]) { + int i; + + // out should have enough space for 1024-bits + memset(out, 0, 128); + + //q/4 and 3*q/4 + for (i = 0; i < 1024; i++) { + if (in[i] >= 1073741824 && in[i] <= 3221225471) { + setbit(out, i); + } + } +} + +void oqs_kex_rlwe_bcns15_crossround2(uint64_t out[16], const uint32_t in[1024], OQS_RAND *rand) { + int i, j; + // out should have enough space for 1024-bits + memset(out, 0, 128); + + for (i = 0; i < 64; i++) { + uint32_t e = rand->rand_32(rand); + for (j = 0; j < 16; j++) { + uint64_t dd = dbl(in[i * 16 + j], (int32_t) e); + e >>= 2; + //q/2 to q and 3*q/2 to 2*q + if ((dd >= (uint64_t) 2147483648 && dd <= (uint64_t) 4294967295) || (dd >= (uint64_t) 6442450942 && dd <= (uint64_t) 8589934590)) { + setbit(out, (i * 16 + j)); + } + } + } +} + +void oqs_kex_rlwe_bcns15_rec(uint64_t out[16], const uint32_t w[1024], const uint64_t b[16]) { + int i; + + // out should have enough space for 1024 bits + memset(out, 0, 128); + + for (i = 0; i < 1024; i++) { + uint64_t coswi = (((uint64_t) w[i]) << (uint64_t) 1); + if (getbit(b, i) == 0) { + //Ceiling(2*3*q/8)..Floor(2*7*q/8) + if (coswi >= (uint64_t) 3221225472 && coswi <= (uint64_t) 7516192766) { + setbit(out, i); + } + } else { + // Ceiling(2*q/8)..Floor(2*5*q/8) + if (coswi >= (uint64_t) 1073741824 && coswi <= (uint64_t) 5368709118) { + setbit(out, i); + } + } + } +} + +void oqs_kex_rlwe_bcns15_a_times_s_plus_e(uint32_t out[1024], const uint32_t a[1024], const uint32_t s[1024], const uint32_t e[1024], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx) { + oqs_kex_rlwe_bcns15_fft_mul(out, a, s, ctx); + oqs_kex_rlwe_bcns15_fft_add(out, out, e); +} diff --git a/crypt/liboqs/kex_rlwe_bcns15/rlwe_a.h b/crypt/liboqs/kex_rlwe_bcns15/rlwe_a.h new file mode 100644 index 0000000000000000000000000000000000000000..aa5eb8ebb36c3950d55ca598bf04512f7aa2afcf --- /dev/null +++ b/crypt/liboqs/kex_rlwe_bcns15/rlwe_a.h @@ -0,0 +1,267 @@ +/* This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * See LICENSE for complete information. + */ + +uint32_t oqs_kex_rlwe_bcns15_a[1024] = { + 0x29FE0191, 0xDD1A457D, 0x3534EE4B, 0x6450ED74, + 0xBBFE9F64, 0x92BF0F31, 0x8DCF8995, 0x4C5E30D0, + 0x9E2ED04D, 0x8C18FE0B, 0x1A70F2E7, 0x2625CD93, + 0x0065DA14, 0x6E009722, 0xE6A70E8B, 0xAEF6EF56, + 0x8C6C06AF, 0x9E59E953, 0x4995F67B, 0xE918EE9D, + 0x8B4F41A7, 0x0D811041, 0xF5FE6458, 0x3C02B584, + 0xCBCFC8FD, 0x5A01F116, 0x73408361, 0x44D3A098, + 0xBBDEECF6, 0x90E09082, 0xF8538BA4, 0xF9600091, + 0xD8D30FEF, 0x56201487, 0xACB2159D, 0x38F47F77, + 0xED7A864F, 0x8FC785CA, 0x7CBD6108, 0x3CA577DE, + 0xFF44CCC2, 0xA1385A79, 0x5C88E3AD, 0x177C46A9, + 0xDA4A4DD8, 0x2AA3594F, 0xA4A5E629, 0x47CA6F6E, + 0xB2DF1BC6, 0x6841B78E, 0x0823F5A8, 0xA18C7D52, + 0x7634A0D1, 0xDA1751BA, 0x18B9D25D, 0x5B2643BC, + 0xACC6975D, 0x48E786F4, 0x05E3ED4E, 0x4DC86568, + 0x3F5C5F99, 0x585DBFD7, 0xEF6E0715, 0x7D36B823, + 0x12D872CD, 0xD7B78F27, 0xDD672BF5, 0x2DC7C7EB, + 0xA3033801, 0x50E48348, 0x9162A260, 0x0BE8F15B, + 0xABB563EC, 0x06624C5A, 0x812BF7BC, 0x8637AC35, + 0xF44504F3, 0xFF8577AB, 0x4A0161B0, 0x000AEB0E, + 0x311204AF, 0x2A76831B, 0x4D903F3A, 0x97204FA9, + 0x9EB524E3, 0x1757AFAC, 0xBA369FEC, 0xCD8F198D, + 0x6B33C246, 0x51C13FCE, 0xB58ACC4E, 0x39ACF8DA, + 0x7BB7EBF7, 0xEDC1449D, 0xC7B47FDB, 0x9C39148D, + 0x4E688D7B, 0xFAD0C2C2, 0x296CE85C, 0x6045C89C, + 0x6441C0C6, 0x50C7C83A, 0xC11764DD, 0x58D7EEA2, + 0xE57B9D0E, 0x4E142770, 0xB8BFBB59, 0xE143EBAA, + 0xFF60C855, 0x238727F0, 0xE35B4A5B, 0x8F96940B, + 0x4498A6BA, 0x5911093A, 0x394DD002, 0x521B00D2, + 0x140BDAF9, 0xEAB67207, 0x21E631A6, 0xA04AADA9, + 0xA96A9843, 0x4B44CC9B, 0xE4D24C33, 0xC7E7AE78, + 0xE45A6C72, 0xCBE61D3C, 0xCE5A4869, 0x10442A52, + 0xDB11F194, 0x39FC415D, 0x7E7BDB76, 0xAE9EFA22, + 0x25F4F262, 0x472DD0A7, 0x42EBD7A0, 0xE8038ECE, + 0xD3DB002A, 0x8416D2EC, 0xDF88C989, 0x7FEA22D5, + 0xC7A3F6FE, 0x37409982, 0xF45B75E2, 0x9A4AC289, + 0x90406FD6, 0xEA1C74A5, 0x5777B39F, 0xD07F1FA3, + 0xCE6EDA0D, 0xD150ECFB, 0xBEFF71BA, 0x50129EFC, + 0x51CE65B9, 0xB9FB0AB8, 0x770C59CB, 0x11F2354F, + 0x8623D4BB, 0xD6FCAFD6, 0xB2B1697C, 0x0D7067E2, + 0x2BA5AFB9, 0xD369C585, 0x5B5E156C, 0xD8C81E6E, + 0x80CFDF16, 0xF6F441EB, 0xC173BAF5, 0x78099E3A, + 0xD38F027B, 0x4AC8D518, 0x8D0108A1, 0xE442B0F1, + 0x56F9EA3C, 0xD0D6BBCA, 0x4E17DCB4, 0x69BF743B, + 0x0CCE779F, 0xD5E59851, 0x63861EA2, 0xB1CB22C1, + 0xBBFD2ACE, 0xDDA390D1, 0xEDF1059F, 0x04F80F89, + 0xB13AF849, 0x58C66009, 0xE0D781C0, 0x588DC348, + 0xA305669D, 0x0D7AF67F, 0x32BC3C38, 0xD725EFBA, + 0xDC3D9434, 0x22BD7ED8, 0x2DFD2926, 0x4BDEAD3A, + 0xB2D5ECE6, 0x16B05C99, 0xFEEC7104, 0xF6CAC918, + 0x0944C774, 0xCE00633B, 0xC59DA01A, 0x41E8E924, + 0x335DF501, 0x3049E8EE, 0x5B4B8AAC, 0xC962FC91, + 0xD6BB22B3, 0x0AC870EB, 0xC3D99400, 0xA0CEAC28, + 0xAF07DE1E, 0x831C2824, 0x258C5DDC, 0x779417E6, + 0x41CB33D0, 0x4E51076A, 0xD1DB6038, 0x9E0B1C41, + 0xA9A1F90D, 0xF27E7705, 0x75892711, 0x5D9F1175, + 0x85CC508B, 0x5CA415BE, 0x1858C792, 0xFB18632F, + 0xC94111EB, 0x937C0D28, 0xC2A09970, 0x386209D9, + 0xBBDD9787, 0x2473F53A, 0xEF7E7637, 0xCFC8630B, + 0x2BA3B7F8, 0x3C0047AD, 0x10D76FF7, 0xB1D9414D, + 0xCEB7B902, 0xA5B543F5, 0x2E484905, 0xE0233C10, + 0xD061A1F8, 0xCED0A901, 0xAC373CAC, 0x04281F37, + 0x3609797F, 0xDB80964D, 0x7B49A74F, 0x7699656F, + 0x0DCEC4BC, 0x0EC49C2D, 0xF1573A4E, 0xA3708464, + 0x9A1E89F0, 0x6B26DEB6, 0x2329FA10, 0xCA4F2BFF, + 0x9E012C8E, 0x788C1DFD, 0x2C758156, 0x2774C544, + 0x150A1F7D, 0x50156D6E, 0x7B675DE1, 0x5D634703, + 0xA7CEB801, 0x92733DAB, 0xB213C00B, 0x304A65B1, + 0x8856CF8E, 0x7FF7DD67, 0xD0912293, 0x30064297, + 0x663D051D, 0x01BC31B4, 0x2B1700BD, 0x39D7D18F, + 0x1EAD5C95, 0x6FB9CD8B, 0xA09993A6, 0xB42071C0, + 0x3C1F2195, 0x7FDF4CF8, 0xC7565A7E, 0x64703D34, + 0x14B250EF, 0x2FA338D2, 0xAEE576DC, 0x6CCED41D, + 0x612D0913, 0xD0680733, 0x8B4DBE8A, 0x6FFEA3D0, + 0x46197CA2, 0xA77F916F, 0xFA5D7BD6, 0x01E22AEB, + 0x18E462DD, 0x4EC9B937, 0xDE753212, 0x05113C94, + 0x7786FBD4, 0xFB379F71, 0x756CF595, 0xEAADCFAB, + 0xBBD74C2E, 0x1F234AC9, 0x85E28AEB, 0x329F7878, + 0xD48FDE09, 0x47A60D0A, 0xAE95163F, 0x72E70995, + 0x27F9FCBF, 0xBDCFCC41, 0x334BC498, 0xEE7931A1, + 0xDFA6AEF4, 0x1EC5E1BF, 0x6221870F, 0xCD54AE13, + 0x7B56EF58, 0x4847B490, 0x31640CD3, 0x10940E14, + 0x556CC334, 0xC9E9B521, 0x499611FF, 0xBEC8D592, + 0x44A7DCB7, 0x4AC2EABD, 0x7D387357, 0x1B76D4B6, + 0x2EACE8C9, 0x52B2D2A4, 0x0C1F2A64, 0x50EF2B9A, + 0x3B23F4F4, 0x8DDE415E, 0xF6B92D2D, 0x9DB0F840, + 0xE18F309D, 0x737B7733, 0xF9F563C5, 0x3C5D4AEE, + 0x8136B0AF, 0xC5AC5550, 0x6E93DEF9, 0x946BCCEC, + 0x5163A273, 0xB5C72175, 0x4919EFBD, 0x222E9B68, + 0x6E43D8EE, 0xAA039B23, 0x913FD80D, 0x42206F18, + 0x5552C01F, 0x35B1136D, 0xFDC18279, 0x5946202B, + 0xFAAE3A37, 0x4C764C88, 0x78075D9B, 0x844C8BA0, + 0xCC33419E, 0x4B0832F6, 0x10D15E89, 0xEE0DD05A, + 0x27432AF3, 0xE12CECA6, 0x60A231B3, 0xF81F258E, + 0xE0BA44D7, 0x144F471B, 0xB4C8451E, 0x3705395C, + 0xE8A69794, 0x3C23F27E, 0x186D2FBA, 0x3DAED36B, + 0xF04DEFF1, 0x0CFA7BDD, 0xFEE45A4F, 0x5E9A4684, + 0x98438C69, 0x5F1D921B, 0x7E43FD86, 0xBD0CF049, + 0x28F47D38, 0x7DF38246, 0x8EED8923, 0xE524E7FC, + 0x089BEC03, 0x15E3DE77, 0x78E8AE28, 0xCB79A298, + 0x9F604E2B, 0x3C6428F7, 0xDCDEABF3, 0x33BAF60A, + 0xBF801273, 0x247B0C3E, 0xE74A8192, 0xB45AC81D, + 0xFC0D2ABE, 0xF17E99F5, 0x412BD1C1, 0x75DF4247, + 0xA90FC3C0, 0xB2A99C0E, 0x0D3999D7, 0xD04543BA, + 0x0FBC28A1, 0xEF68C7EF, 0x64327F30, 0xF11ECDBE, + 0x4DBD312C, 0xD71CE03A, 0xAEFDAD34, 0xE1CC7315, + 0x797A865C, 0xB9F1B1EB, 0xF7E68DFA, 0x816685B4, + 0x9F38D44B, 0x366911C8, 0x756A7336, 0x696B8261, + 0xC2FA21D2, 0x75085BF3, 0x2E5402B4, 0x75E6E744, + 0xEAD80B0C, 0x4E689F68, 0x7A9452C6, 0xA5E1958A, + 0x4B2B0A24, 0x97E0165E, 0xA4539B68, 0xF87A3096, + 0x6543CA9D, 0x92A8D398, 0xA7D7FDB4, 0x1EA966B3, + 0x75B50372, 0x4C63A778, 0x34E8E033, 0x87C60F82, + 0xFC47303B, 0x8469AB86, 0x2DAADA50, 0xCFBB663F, + 0x711C9C41, 0xE6C1C423, 0x8751BAA9, 0x861EC777, + 0x31BCCCE1, 0xC1333271, 0x06864BEE, 0x41B50595, + 0xD2267D30, 0x878BA5C5, 0x65267F56, 0x2118FB18, + 0xA6DDD3DE, 0x8D309B98, 0x68928CB2, 0xFAE967DC, + 0x3CEC52D0, 0x9CA8404B, 0xAADD68A8, 0x3AC6B1DF, + 0xD53D67EA, 0x95C8D163, 0xB5F03F1D, 0x3A4C28A7, + 0xE3C4B709, 0xB8EB7C65, 0xE76B42A3, 0x25E5A217, + 0x6B6DD2B4, 0xBEFC5DF4, 0x9ACA5758, 0xC17F14D3, + 0xB224A9D3, 0xDE1A7C8F, 0x1382911B, 0x627A2FB9, + 0xC66AE36E, 0x02CC60EF, 0xC6800B20, 0x7A583C77, + 0xE1CECEE8, 0xCA0001B4, 0x6A14CF16, 0xEF45DD21, + 0x64CAA7D5, 0xFF3F1D95, 0xD328C67E, 0xC85868B1, + 0x7FBF3FEB, 0x13D68388, 0x25373DD9, 0x8DE47EFB, + 0x47912F26, 0x65515942, 0xC5ED711D, 0x6A368929, + 0xA2405C50, 0xFFA9D6EB, 0xED39A0D4, 0xE456B8B5, + 0x53283330, 0x7837FD52, 0x6EE46629, 0xCAFC9D63, + 0xB781B08F, 0xDD61D834, 0xFB9ACF09, 0xEDA4444A, + 0xBB6AA57F, 0xAED2385C, 0x22C9474D, 0x36E90167, + 0xE6DF6150, 0xF1B0DA3B, 0xC3F6800E, 0x966302E0, + 0x7DB1F627, 0xF9632186, 0xB4933075, 0x81C5C817, + 0x878CA140, 0x4EDE8FED, 0x1AF347C1, 0xFDEB72BA, + 0x2DA7FF9A, 0xB9BA3638, 0x2BB883F1, 0x474D1417, + 0xC2F474A4, 0x1E2CF9F3, 0x231CB6B0, 0x7E574B53, + 0xEDA8E1DA, 0xE1ACB7BB, 0xD1E354A6, 0x7C32B431, + 0x8189991B, 0x25F9376A, 0x3FFA8782, 0xCD9038F1, + 0x119EDBD1, 0x5C571840, 0x3DCA350F, 0x83923909, + 0x9DC3CF55, 0x94D79DD0, 0xD683DE2B, 0xECF4316A, + 0x0FFF48D4, 0x5D8076ED, 0x12B42C97, 0x2284CDB4, + 0xCB245554, 0x3025B4D9, 0xB0075F35, 0x43A3802E, + 0x18332B4D, 0x056C4467, 0xC597E3F7, 0x3F0EAF9D, + 0xF48EBB9F, 0x92F62731, 0xBDB76296, 0x516D4466, + 0x226102B3, 0x15E38046, 0xA683C4E0, 0x6C0D1962, + 0xE20CB6CA, 0xC90C1D70, 0xD0FF8692, 0xD1419690, + 0x2D6F1081, 0x34782E5E, 0xAE092CD5, 0x90C99193, + 0xE97C0405, 0xEAE201DA, 0x631FB5AC, 0x279A2821, + 0xDF47BA5B, 0xFBE587E2, 0x6810AD2D, 0xC63E94BD, + 0x9AF36B42, 0xF14F0855, 0x946CE350, 0x7E3320E0, + 0x34130DFF, 0x8C57C413, 0xAB0723B2, 0xF514C743, + 0x63694BA3, 0x5665D23D, 0x6292C0B5, 0x9D768323, + 0x2F8E447C, 0xB99A00FB, 0x6F8E5970, 0x69B3BB45, + 0x59253E02, 0x1C518A02, 0xDD7C1232, 0xC6416C38, + 0x77E10340, 0xCF6BEB9A, 0x006F9239, 0x0E99B50F, + 0x863AD247, 0x75F0451A, 0x096E9094, 0xE0C2B357, + 0x7CC81E15, 0x222759D4, 0xEE5BCFD0, 0x050F829B, + 0x723B8FA9, 0x76143C55, 0x3B455EAF, 0xC2683EFD, + 0xEE7874B4, 0x9BCE92F7, 0x6EED7461, 0x8E93898F, + 0xA4EBE1D0, 0xFA4F019F, 0x1B0AD6DA, 0xA39CDE2F, + 0x27002B33, 0x830D478D, 0x3EEA937E, 0x572E7DA3, + 0x4BFFA4D1, 0x5E53DB0B, 0x708D21EE, 0xB003E23B, + 0x12ED0756, 0x53CA0412, 0x73237D35, 0x438EC16B, + 0x295177B8, 0xC85F4EE6, 0xB67FD3B4, 0x5221BC81, + 0xD84E3094, 0x18C84200, 0x855E0795, 0x37BEC004, + 0xDF9FAFC9, 0x60BEB6CD, 0x8645F0C5, 0xB1D2F1C3, + 0xECDC4AE3, 0x424D17F1, 0x8429238C, 0x6155EAAB, + 0xA17BEE21, 0x218D3637, 0x88A462CC, 0x8A1A031E, + 0x3F671EA5, 0x9FA08639, 0xFF4A0F8E, 0x34167A7D, + 0x1A817F54, 0x3215F21E, 0x412DD498, 0x57B633E7, + 0xE8A2431F, 0x397BD699, 0x5A155288, 0xBB3538E8, + 0xA49806D2, 0x49438A07, 0x24963568, 0x40414C26, + 0xE45C08D4, 0x61D2435B, 0x2F36AEDE, 0x6580370C, + 0x02A56A5E, 0x53B18017, 0xAF2C83FC, 0xF4C83871, + 0xD9E5DDC3, 0x17B90B01, 0xED4A0904, 0xFA6DA26B, + 0x35D9840D, 0xA0C505E4, 0x3396D0B5, 0xEC66B509, + 0xC190E41C, 0x2F0CE5CF, 0x419C3E94, 0x220D42CA, + 0x2F611F4F, 0x47906734, 0x8C2CDB17, 0xD8658F1C, + 0x2F6745CD, 0x543D0D4F, 0x818F0469, 0x380FFDAE, + 0xF5DD91E2, 0xAD25E46A, 0xE7039205, 0xA9F47165, + 0xB2114C12, 0xCF7F626F, 0x54D2C9FF, 0xE4736A36, + 0x16DB09FC, 0xE2B787BB, 0x9631709A, 0x72629F66, + 0x819EBA08, 0x7F5D73F3, 0xA0B0B91C, 0xFEDFBA71, + 0x252F14EE, 0xF26F8FA2, 0x92805F94, 0x43650F7F, + 0x3051124F, 0x72CA8EAD, 0x21973E34, 0xA5B70509, + 0xB36A41CC, 0xC52EDE5F, 0xF706A24E, 0x8AAF9F92, + 0xADF6D99A, 0x23746D73, 0x1DA39F70, 0x9660FC8F, + 0xA0A8CFEB, 0x83D5EFCA, 0x0AA4A72F, 0xEEF1B2DE, + 0x00CFCC66, 0x8A145369, 0x6376CEDA, 0xA3262E2E, + 0x3367BBA8, 0x01488C32, 0x5561A2AD, 0x40821BF2, + 0xF0C89F61, 0xC4FAA6B3, 0xD843377A, 0x67A76555, + 0xE8D9F1CE, 0x943034FF, 0x2BD468BD, 0xA514D935, + 0x50CDB19D, 0xA09C7E9E, 0x6FEBEC30, 0xB1B36CF7, + 0xCD7A30BC, 0x36C6FE0A, 0x2DF52C45, 0x45C9957F, + 0x65076A79, 0xBF783DEE, 0x718D37F0, 0x098F9117, + 0x9A70C430, 0x80EB1A53, 0x9F2505B1, 0x48D10D98, + 0xB8D781E9, 0xF2376133, 0xECF25B98, 0x5A3B0E18, + 0x2F623537, 0x9F0E34A4, 0xF1027EB6, 0xF9B16022, + 0xBA3FEC59, 0xEF7226FD, 0x9F3058AA, 0xBB51DE0E, + 0xD5435EA0, 0x8A6479D5, 0x077708B8, 0x9634876A, + 0x069A260A, 0x168D9E6A, 0x9FD18E94, 0x8A7ACD53, + 0x8E5A5869, 0x1B6F35FD, 0xA968913B, 0xC72F076B, + 0x7DDA354C, 0x25B0297C, 0xD07219D5, 0xA66862BA, + 0x87E8EE67, 0xFA28809B, 0x55762443, 0x31EF4956, + 0xF4F4A511, 0x9A9378CB, 0x42ABDBDE, 0x7AA484B7, + 0xE8EC22ED, 0xCADDEF61, 0x9D18538A, 0xA81B923E, + 0x9C32F92A, 0x6D278E58, 0x4CDFC716, 0xAB64814F, + 0xF832BF1A, 0xE2C1A36B, 0x20675610, 0xE78D855A, + 0x38332C3D, 0x5AE0EAD9, 0x2E23F22D, 0x3C8683C5, + 0xA351AF89, 0x54720D3B, 0xABC6E51F, 0x89330C8E, + 0x600D5650, 0x197EA0C6, 0x7D502A5D, 0x3A536EA7, + 0x7DF71F32, 0x456FE645, 0x3EF5E7A2, 0x6664BCAF, + 0xA9D074C2, 0xE9D9E478, 0x1AE9AB77, 0xFECE7160, + 0xC618EEEC, 0x771B0026, 0x2B54F43C, 0x145DA102, + 0x1B3D7949, 0xBB6E2D9D, 0xDB8FDC4A, 0x25397EBA, + 0x9228A6E9, 0x56B4C69D, 0x337B943C, 0xE35B716C, + 0xF7FE89A1, 0x023AC20D, 0x033165C8, 0x9F13B130, + 0xC1BAFB1D, 0xA2C42C8C, 0x58E4D431, 0xE10741E6, + 0x2547589A, 0x8D9EF7BD, 0x7E322280, 0xF49FDDC2, + 0xBE21A094, 0xA061178A, 0x34D9F13B, 0x694D652F, + 0x05084A2A, 0x2767B991, 0xE8536AB4, 0xEBFADF6F, + 0xF4C8DFAC, 0xD9967CCA, 0xE04BCF3F, 0x232B3460, + 0x9FF6E88A, 0x6DF3A2B0, 0x0FE10E99, 0x7B059283, + 0x067BFB57, 0x8DDA26B0, 0xB7D6652F, 0x85705248, + 0x0826240C, 0x5DF7F52E, 0x47973463, 0xB9C22D37, + 0x9BEB265D, 0x493AB6FD, 0x10C0FB07, 0x947C102A, + 0x5FEC0608, 0x140E07AE, 0x8B330F43, 0x9364A649, + 0xC9AD63EF, 0xBE4B2475, 0x1A09AC77, 0x9E40A4B0, + 0xBA9C23E7, 0x7F4A798D, 0xE2C52D66, 0xA26EE9E0, + 0x8C79DCE7, 0xDD7F1C3D, 0x6AE83B20, 0x073DBA03, + 0xB1844D97, 0x16D7ED6E, 0x5E0DE0B1, 0xA497D717, + 0xFA507AA2, 0xC332649B, 0x21419E15, 0x384D9CCC, + 0x8B915A8B, 0xBA328FD5, 0xF99E8016, 0x545725EC, + 0xED9840ED, 0x71E5D78A, 0x21862496, 0x6F858B6C, + 0xF3736AE2, 0x8979FC2B, 0x5C8122D0, 0x0A20EB5A, + 0x2278AA6E, 0x55275E74, 0x22D57650, 0xE5FFDC96, + 0x6BA86E10, 0x4EC5BFCC, 0x05AFA305, 0xFB7FD007, + 0x726EA097, 0xF6A349C4, 0xCB2F71E4, 0x08DD80BA, + 0x892D0E23, 0xBD2E0A55, 0x40AC0CD3, 0xBFAF5688, + 0x6E40A6A5, 0x6DA1BBE0, 0x969557A9, 0xFB88629B, + 0x11F845C4, 0x5FC91C6F, 0x1B0C7E79, 0xD6946953, + 0x27A164A0, 0x55D20869, 0x29A2182D, 0x406AA963, + 0x74F40C59, 0x56A90570, 0x535AC9C6, 0x9521EF76, + 0xBA38759B, 0xCD6EF76E, 0xF2181DB9, 0x7BE78DA6, + 0xF88E4115, 0xABA7E166, 0xF60DC9B3, 0xFECA1EF3, + 0x43DF196A, 0xCC4FC9DD, 0x428A8961, 0xCF6B4560, + 0x87B30B57, 0x20E7BAC5, 0xBFBDCCDF, 0xF7D3F6BB, + 0x7FC311C8, 0x2C7835B5, 0xA24F6821, 0x6A38454C, + 0x460E42FD, 0x2B6BA832, 0xC7068C72, 0x28CDCE59, + 0xAE82A0B4, 0x25F39572, 0x9B6C7758, 0xE0FE9EBA, + 0xA8F03EE1, 0xD70B928E, 0x95E529D7, 0xDD91DB86, + 0xF912BA8C, 0x7F478A6A, 0x1F017850, 0x5A717E10, + 0xDAC243F9, 0xD235F314, 0x4F80AAE6, 0xA46364D8, + 0xA1E3A9E9, 0x495FEFB1, 0xB9058508, 0x23A20999, + 0x73D18118, 0xCA3EEE2A, 0x34E1C7E2, 0xAADBADBD}; diff --git a/crypt/liboqs/kex_rlwe_bcns15/rlwe_kex.c b/crypt/liboqs/kex_rlwe_bcns15/rlwe_kex.c new file mode 100644 index 0000000000000000000000000000000000000000..7bf28e38debf14bba667a31a9dcd4644970aa34b --- /dev/null +++ b/crypt/liboqs/kex_rlwe_bcns15/rlwe_kex.c @@ -0,0 +1,63 @@ +/* This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * See LICENSE for complete information. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <oqs/rand.h> + +#include "local.h" + +static void *(*volatile rlwe_memset_volatile)(void *, int, size_t) = memset; + +void oqs_kex_rlwe_bcns15_generate_keypair(const uint32_t *a, uint32_t s[1024], uint32_t b[1024], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx, OQS_RAND *rand) { + uint32_t e[1024]; +#if CONSTANT_TIME + oqs_kex_rlwe_bcns15_sample_ct(s, rand); + oqs_kex_rlwe_bcns15_sample_ct(e, rand); +#else + oqs_kex_rlwe_bcns15_sample(s, rand); + oqs_kex_rlwe_bcns15_sample(e, rand); +#endif + oqs_kex_rlwe_bcns15_a_times_s_plus_e(b, a, s, e, ctx); + rlwe_memset_volatile(e, 0, 1024 * sizeof(uint32_t)); +} + +void oqs_kex_rlwe_bcns15_compute_key_alice(const uint32_t b[1024], const uint32_t s[1024], const uint64_t c[16], uint64_t k[16], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx) { + uint32_t w[1024]; + oqs_kex_rlwe_bcns15_fft_mul(w, b, s, ctx); +#if CONSTANT_TIME + oqs_kex_rlwe_bcns15_rec_ct(k, w, c); +#else + oqs_kex_rlwe_bcns15_rec(k, w, c); +#endif + rlwe_memset_volatile(w, 0, 1024 * sizeof(uint32_t)); +} + +void oqs_kex_rlwe_bcns15_compute_key_bob(const uint32_t b[1024], const uint32_t s[1024], uint64_t c[16], uint64_t k[16], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx, OQS_RAND *rand) { + uint32_t v[1024]; + uint32_t eprimeprime[1024]; +#if CONSTANT_TIME + oqs_kex_rlwe_bcns15_sample_ct(eprimeprime, rand); +#else + oqs_kex_rlwe_bcns15_sample(eprimeprime, rand); +#endif + oqs_kex_rlwe_bcns15_a_times_s_plus_e(v, b, s, eprimeprime, ctx); +#if CONSTANT_TIME + oqs_kex_rlwe_bcns15_crossround2_ct(c, v, rand); + oqs_kex_rlwe_bcns15_round2_ct(k, v); +#else + oqs_kex_rlwe_bcns15_crossround2(c, v, rand); + oqs_kex_rlwe_bcns15_round2(k, v); +#endif + rlwe_memset_volatile(v, 0, 1024 * sizeof(uint32_t)); + rlwe_memset_volatile(eprimeprime, 0, 1024 * sizeof(uint32_t)); +} diff --git a/crypt/liboqs/kex_rlwe_bcns15/rlwe_table.h b/crypt/liboqs/kex_rlwe_bcns15/rlwe_table.h new file mode 100644 index 0000000000000000000000000000000000000000..402e4fd8b18516b5930576f4c98fe51d5ddee0dd --- /dev/null +++ b/crypt/liboqs/kex_rlwe_bcns15/rlwe_table.h @@ -0,0 +1,63 @@ +/* This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * See LICENSE for complete information. + */ + +static uint64_t rlwe_table[52][3] = { + {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x1FFFFFFFFFFFFFFF}, + {0xE0C81DA0D6A8BD22, 0x161ABD186DA13542, 0x5CEF2C248806C827}, + {0x8D026C4E14BC7408, 0x4344C125B3533F22, 0x9186506BCC065F20}, + {0x10AC7CEC7D7E2A3B, 0x5D62CE65E6217813, 0xBAAB5F82BCDB43B3}, + {0x709C92996E94D801, 0x1411F551608E4D22, 0xD7D9769FAD23BCB1}, + {0x6287D827008404B7, 0x7E1526D618902F20, 0xEA9BE2F4D6DDB5ED}, + {0x34CBDC118C15F40E, 0xE7D2A13787E94674, 0xF58A99474919B8C9}, + {0xD521F7EBBBE8C3A2, 0xE8A773D9A1EA0AAB, 0xFB5117812753B7B8}, + {0xC3D9E58131089A6A, 0x148CB49FF716491B, 0xFE151BD0928596D3}, + {0x2E060C4A842A27F6, 0x07E44D009ADB0049, 0xFF487508BA9F7208}, + {0xFCEDEFCFAA887582, 0x1A5409BF5D4B039E, 0xFFC16686270CFC82}, + {0x4FE22E5DF9FAAC20, 0xFDC99BFE0F991958, 0xFFEC8AC3C159431B}, + {0xA36605F81B14FEDF, 0xA6FCD4C13F4AFCE0, 0xFFFA7DF4B6E92C28}, + {0x9D1FDCFF97BBC957, 0x4B869C6286ED0BB5, 0xFFFE94BB4554B5AC}, + {0x6B3EEBA74AAD104B, 0xEC72329E974D63C7, 0xFFFFAADE1B1CAA95}, + {0x48C8DA4009C10760, 0x337F6316C1FF0A59, 0xFFFFEDDC1C6436DC}, + {0x84480A71312F35E7, 0xD95E7B2CD6933C97, 0xFFFFFC7C9DC2569A}, + {0x23C01DAC1513FA0F, 0x8E0B132AE72F729F, 0xFFFFFF61BC337FED}, + {0x90C89D6570165907, 0x05B9D725AAEA5CAD, 0xFFFFFFE6B3CF05F7}, + {0x692E2A94C500EC7D, 0x99E8F72C370F27A6, 0xFFFFFFFC53EA610E}, + {0x28C2998CEAE37CC8, 0xC6E2F0D7CAFA9AB8, 0xFFFFFFFF841943DE}, + {0xC515CF4CB0130256, 0x4745913CB4F9E4DD, 0xFFFFFFFFF12D07EC}, + {0x39F0ECEA047D6E3A, 0xEE62D42142AC6544, 0xFFFFFFFFFE63E348}, + {0xDF11BB25B50462D6, 0x064A0C6CC136E943, 0xFFFFFFFFFFD762C7}, + {0xCDBA0DD69FD2EA0F, 0xC672F3A74DB0F175, 0xFFFFFFFFFFFC5E37}, + {0xFDB966A75F3604D9, 0x6ABEF8B144723D83, 0xFFFFFFFFFFFFB48F}, + {0x3C4FECBB600740D1, 0x697598CEADD71A15, 0xFFFFFFFFFFFFFA72}, + {0x1574CC916D60E673, 0x12F5A30DD99D7051, 0xFFFFFFFFFFFFFFA1}, + {0xDD3DCD1B9CB7321D, 0x4016ED3E05883572, 0xFFFFFFFFFFFFFFFA}, + {0xB4A4E8CF3DF79A7A, 0xAF22D9AFAD5A73CF, 0xFFFFFFFFFFFFFFFF}, + {0x91056A8196F74466, 0xFBF88681905332BA, 0xFFFFFFFFFFFFFFFF}, + {0x965B9ED9BD366C04, 0xFFD16385AF29A51F, 0xFFFFFFFFFFFFFFFF}, + {0xF05F75D38F2D28A3, 0xFFFE16FF8EA2B60C, 0xFFFFFFFFFFFFFFFF}, + {0x77E35C8980421EE8, 0xFFFFEDD3C9DDC7E8, 0xFFFFFFFFFFFFFFFF}, + {0x92783617956F140A, 0xFFFFFF63392B6E8F, 0xFFFFFFFFFFFFFFFF}, + {0xA536DC994639AD78, 0xFFFFFFFB3592B3D1, 0xFFFFFFFFFFFFFFFF}, + {0x8F3A871874DD9FD5, 0xFFFFFFFFDE04A5BB, 0xFFFFFFFFFFFFFFFF}, + {0x310DE3650170B717, 0xFFFFFFFFFF257152, 0xFFFFFFFFFFFFFFFF}, + {0x1F21A853A422F8CC, 0xFFFFFFFFFFFB057B, 0xFFFFFFFFFFFFFFFF}, + {0x3CA9D5C6DB4EE2BA, 0xFFFFFFFFFFFFE5AD, 0xFFFFFFFFFFFFFFFF}, + {0xCFD9CE958E59869C, 0xFFFFFFFFFFFFFF81, 0xFFFFFFFFFFFFFFFF}, + {0xDB8E1F91D955C452, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFF}, + {0xF78EE3A8E99E08C3, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + {0xFFE1D7858BABDA25, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + {0xFFFF9E52E32CAB4A, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + {0xFFFFFEE13217574F, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + {0xFFFFFFFD04888041, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + {0xFFFFFFFFF8CD8A56, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + {0xFFFFFFFFFFF04111, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + {0xFFFFFFFFFFFFE0C5, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + {0xFFFFFFFFFFFFFFC7, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}}; diff --git a/crypt/liboqs/kex_rlwe_msrln16/AMD64/consts.c b/crypt/liboqs/kex_rlwe_msrln16/AMD64/consts.c new file mode 100644 index 0000000000000000000000000000000000000000..9c1260e8729881ea6adfbee8a2fde4cc415c1dad --- /dev/null +++ b/crypt/liboqs/kex_rlwe_msrln16/AMD64/consts.c @@ -0,0 +1,38 @@ +/**************************************************************************************** +* LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: constants for the x64 assembly implementation +* +*****************************************************************************************/ + +#include "../LatticeCrypto_priv.h" +#include <stdint.h> + +uint32_t PRIME8x[8] = {OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q}; +uint8_t ONE32x[32] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; +uint32_t MASK12x8[8] = {0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff}; +uint32_t PERM0246[4] = {0, 2, 4, 6}; +uint32_t PERM00224466[8] = {0, 0, 2, 2, 4, 4, 6, 6}; +uint32_t PERM02134657[8] = {0, 2, 1, 3, 4, 6, 5, 7}; +uint64_t PERM0145[4] = {0, 1, 4, 5}; +uint64_t PERM2367[4] = {2, 3, 6, 7}; +uint64_t MASK32[4] = {0xffffffff, 0, 0xffffffff, 0}; +uint64_t MASK42[4] = {0x3fff0000000, 0, 0x3fff0000000, 0}; + +uint64_t MASK14_1[4] = {0x3fff, 0, 0x3fff, 0}; +uint64_t MASK14_2[4] = {0xFFFC000, 0, 0xFFFC000, 0}; +uint64_t MASK14_3[4] = {0x3FFF0000000, 0, 0x3FFF0000000, 0}; +uint64_t MASK14_4[4] = {0xFFFC0000000000, 0, 0xFFFC0000000000, 0}; + +uint32_t ONE8x[8] = {1, 1, 1, 1, 1, 1, 1, 1}; +uint32_t THREE8x[8] = {3, 3, 3, 3, 3, 3, 3, 3}; +uint32_t FOUR8x[8] = {4, 4, 4, 4, 4, 4, 4, 4}; +uint32_t PARAM_Q4x8[8] = {3073, 3073, 3073, 3073, 3073, 3073, 3073, 3073}; +uint32_t PARAM_3Q4x8[8] = {9217, 9217, 9217, 9217, 9217, 9217, 9217, 9217}; +uint32_t PARAM_5Q4x8[8] = {15362, 15362, 15362, 15362, 15362, 15362, 15362, 15362}; +uint32_t PARAM_7Q4x8[8] = {21506, 21506, 21506, 21506, 21506, 21506, 21506, 21506}; +uint32_t PARAM_Q2x8[8] = {6145, 6145, 6145, 6145, 6145, 6145, 6145, 6145}; +uint32_t PARAM_3Q2x8[8] = {18434, 18434, 18434, 18434, 18434, 18434, 18434, 18434}; diff --git a/crypt/liboqs/kex_rlwe_msrln16/AMD64/error_asm.S b/crypt/liboqs/kex_rlwe_msrln16/AMD64/error_asm.S new file mode 100644 index 0000000000000000000000000000000000000000..d5d5478575293fd18c73249ba025529ac297bf5e --- /dev/null +++ b/crypt/liboqs/kex_rlwe_msrln16/AMD64/error_asm.S @@ -0,0 +1,436 @@ +//**************************************************************************************** +// LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// +// Abstract: functions for error sampling and reconciliation in x64 assembly using AVX2 +// vector instructions for Linux +// +//**************************************************************************************** + +.intel_syntax noprefix + +// Registers that are used for parameter passing: +#define reg_p1 rdi +#define reg_p2 rsi +#define reg_p3 rdx +#define reg_p4 rcx +#define reg_p5 r8 + + +.text +//*********************************************************************** +// Error sampling from psi_12 +// Operation: c [reg_p2] <- sampling(a) [reg_p1] +//*********************************************************************** +.globl oqs_rlwe_msrln16_error_sampling_asm +oqs_rlwe_msrln16_error_sampling_asm: + vmovdqu ymm7, ONE32x + movq r11, 384 + movq r10, 32 + movq r8, 24 + xor rax, rax + xor rcx, rcx +loop1: + vmovdqu ymm0, YMMWORD PTR [reg_p1+4*rax] // sample + vmovdqu ymm2, YMMWORD PTR [reg_p1+4*rax+32] // sample + vmovdqu ymm4, YMMWORD PTR [reg_p1+4*rax+64] // sample + movq r9, 2 + +loop1b: + vpand ymm1, ymm0, ymm7 // Collecting 8 bits for first sample + vpsrlw ymm0, ymm0, 1 + vpand ymm3, ymm0, ymm7 + vpaddb ymm1, ymm1, ymm3 + vpsrlw ymm0, ymm0, 1 + vpand ymm3, ymm0, ymm7 + vpaddb ymm1, ymm1, ymm3 + vpsrlw ymm0, ymm0, 1 + vpand ymm3, ymm0, ymm7 + vpaddb ymm1, ymm1, ymm3 + vpsrlw ymm0, ymm0, 1 + vpand ymm3, ymm0, ymm7 + vpaddb ymm1, ymm1, ymm3 + vpsrlw ymm0, ymm0, 1 + vpand ymm3, ymm0, ymm7 + vpaddb ymm1, ymm1, ymm3 + vpsrlw ymm0, ymm0, 1 + vpand ymm3, ymm0, ymm7 + vpaddb ymm1, ymm1, ymm3 + vpsrlw ymm0, ymm0, 1 + vpand ymm3, ymm0, ymm7 + vpaddb ymm1, ymm1, ymm3 + + vpand ymm3, ymm2, ymm7 // Adding next 4 bits + vpaddb ymm1, ymm1, ymm3 + vpsrlw ymm2, ymm2, 1 + vpand ymm3, ymm2, ymm7 + vpaddb ymm1, ymm1, ymm3 + vpsrlw ymm2, ymm2, 1 + vpand ymm3, ymm2, ymm7 + vpaddb ymm1, ymm1, ymm3 + vpsrlw ymm2, ymm2, 1 + vpand ymm3, ymm2, ymm7 + vpaddb ymm1, ymm1, ymm3 + + vpsrlw ymm2, ymm2, 1 // Collecting 4-bits for second sample + vpand ymm5, ymm2, ymm7 + vpsrlw ymm2, ymm2, 1 + vpand ymm3, ymm2, ymm7 + vpaddb ymm5, ymm5, ymm3 + vpsrlw ymm2, ymm2, 1 + vpand ymm3, ymm2, ymm7 + vpaddb ymm5, ymm5, ymm3 + vpsrlw ymm2, ymm2, 1 + vpand ymm3, ymm2, ymm7 + vpaddb ymm5, ymm5, ymm3 + + vpand ymm3, ymm4, ymm7 // Adding next 8 bits + vpaddb ymm5, ymm5, ymm3 + vpsrlw ymm4, ymm4, 1 + vpand ymm3, ymm4, ymm7 + vpaddb ymm5, ymm5, ymm3 + vpsrlw ymm4, ymm4, 1 + vpand ymm3, ymm4, ymm7 + vpaddb ymm5, ymm5, ymm3 + vpsrlw ymm4, ymm4, 1 + vpand ymm3, ymm4, ymm7 + vpaddb ymm5, ymm5, ymm3 + vpsrlw ymm4, ymm4, 1 + vpand ymm3, ymm4, ymm7 + vpaddb ymm5, ymm5, ymm3 + vpsrlw ymm4, ymm4, 1 + vpand ymm3, ymm4, ymm7 + vpaddb ymm5, ymm5, ymm3 + vpsrlw ymm4, ymm4, 1 + vpand ymm3, ymm4, ymm7 + vpaddb ymm5, ymm5, ymm3 + vpsrlw ymm4, ymm4, 1 + vpand ymm3, ymm4, ymm7 + vpaddb ymm5, ymm5, ymm3 + + vpsubb ymm5, ymm1, ymm5 + vpermq ymm3, ymm5, 0x0e + vpmovsxbd ymm6, xmm5 + vpsrldq ymm5, ymm5, 8 + vpmovsxbd ymm7, xmm5 + vpmovsxbd ymm8, xmm3 + vpsrldq ymm3, ymm3, 8 + vpmovsxbd ymm9, xmm3 + vmovdqu YMMWORD PTR [reg_p2+4*rcx], ymm6 + vmovdqu YMMWORD PTR [reg_p2+4*rcx+32], ymm7 + vmovdqu YMMWORD PTR [reg_p2+4*rcx+64], ymm8 + vmovdqu YMMWORD PTR [reg_p2+4*rcx+96], ymm9 + + add rcx, r10 // i+32 + vpsrlw ymm0, ymm0, 1 + vpsrlw ymm2, ymm2, 1 + vpsrlw ymm4, ymm4, 1 + dec r9 + jnz loop1b + + add rax, r8 // j+24 + cmp rax, r11 + jl loop1 + ret + + +//*********************************************************************** +// Reconciliation helper function +// Operation: c [reg_p2] <- function(a) [reg_p1] +// [reg_p3] points to random bits +//*********************************************************************** +.globl oqs_rlwe_msrln16_helprec_asm +oqs_rlwe_msrln16_helprec_asm: + vmovdqu ymm8, ONE8x + movq r11, 256 + movq r10, 8 + xor rax, rax + vmovdqu ymm4, YMMWORD PTR [reg_p3] // rbits +loop2: + vmovdqu ymm0, YMMWORD PTR [reg_p1+4*rax] // x + vmovdqu ymm1, YMMWORD PTR [reg_p1+4*rax+4*256] // x+256 + vmovdqu ymm2, YMMWORD PTR [reg_p1+4*rax+4*512] // x+512 + vmovdqu ymm3, YMMWORD PTR [reg_p1+4*rax+4*768] // x+768 + + vpand ymm5, ymm4, ymm8 // Collecting 8 random bits + vpslld ymm0, ymm0, 1 // 2*x - rbits + vpslld ymm1, ymm1, 1 + vpslld ymm2, ymm2, 1 + vpslld ymm3, ymm3, 1 + vpsubd ymm0, ymm0, ymm5 + vpsubd ymm1, ymm1, ymm5 + vpsubd ymm2, ymm2, ymm5 + vpsubd ymm3, ymm3, ymm5 + + vmovdqu ymm15, PARAM_Q4x8 + vmovdqu ymm7, FOUR8x + vmovdqu ymm8, ymm7 + vmovdqu ymm9, ymm7 + vmovdqu ymm10, ymm7 + vpsubd ymm6, ymm0, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm7, ymm7, ymm6 + vpsubd ymm6, ymm1, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm8, ymm8, ymm6 + vpsubd ymm6, ymm2, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm9, ymm9, ymm6 + vpsubd ymm6, ymm3, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm10, ymm10, ymm6 + vmovdqu ymm15, PARAM_3Q4x8 + vpsubd ymm6, ymm0, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm7, ymm7, ymm6 + vpsubd ymm6, ymm1, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm8, ymm8, ymm6 + vpsubd ymm6, ymm2, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm9, ymm9, ymm6 + vpsubd ymm6, ymm3, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm10, ymm10, ymm6 + vmovdqu ymm15, PARAM_5Q4x8 + vpsubd ymm6, ymm0, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm7, ymm7, ymm6 + vpsubd ymm6, ymm1, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm8, ymm8, ymm6 + vpsubd ymm6, ymm2, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm9, ymm9, ymm6 + vpsubd ymm6, ymm3, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm10, ymm10, ymm6 + vmovdqu ymm15, PARAM_7Q4x8 + vpsubd ymm6, ymm0, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm7, ymm7, ymm6 // v0[0] + vpsubd ymm6, ymm1, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm8, ymm8, ymm6 // v0[1] + vpsubd ymm6, ymm2, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm9, ymm9, ymm6 // v0[2] + vpsubd ymm6, ymm3, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm10, ymm10, ymm6 // v0[3] + + vmovdqu ymm15, PARAM_Q2x8 + vmovdqu ymm11, THREE8x + vmovdqu ymm12, ymm11 + vmovdqu ymm13, ymm11 + vmovdqu ymm14, ymm11 + vpsubd ymm6, ymm0, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm11, ymm11, ymm6 + vpsubd ymm6, ymm1, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm12, ymm12, ymm6 + vpsubd ymm6, ymm2, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm13, ymm13, ymm6 + vpsubd ymm6, ymm3, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm14, ymm14, ymm6 + vmovdqu ymm15, PARAM_3Q2x8 + vpsubd ymm6, ymm0, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm11, ymm11, ymm6 + vpsubd ymm6, ymm1, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm12, ymm12, ymm6 + vpsubd ymm6, ymm2, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm13, ymm13, ymm6 + vpsubd ymm6, ymm3, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm14, ymm14, ymm6 + vmovdqu ymm15, PRIME8x + vpsubd ymm6, ymm0, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm11, ymm11, ymm6 // v1[0] + vpsubd ymm6, ymm1, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm12, ymm12, ymm6 // v1[1] + vpsubd ymm6, ymm2, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm13, ymm13, ymm6 // v1[2] + vpsubd ymm6, ymm3, ymm15 + vpsrld ymm6, ymm6, 31 + vpsubd ymm14, ymm14, ymm6 // v1[3] + + vpmulld ymm6, ymm7, ymm15 + vpslld ymm0, ymm0, 1 + vpsubd ymm0, ymm0, ymm6 + vpabsd ymm0, ymm0 + vpmulld ymm6, ymm8, ymm15 + vpslld ymm1, ymm1, 1 + vpsubd ymm1, ymm1, ymm6 + vpabsd ymm1, ymm1 + vpaddd ymm0, ymm0, ymm1 + vpmulld ymm6, ymm9, ymm15 + vpslld ymm2, ymm2, 1 + vpsubd ymm2, ymm2, ymm6 + vpabsd ymm2, ymm2 + vpaddd ymm0, ymm0, ymm2 + vpmulld ymm6, ymm10, ymm15 + vpslld ymm3, ymm3, 1 + vpsubd ymm3, ymm3, ymm6 + vpabsd ymm3, ymm3 + vpaddd ymm0, ymm0, ymm3 // norm + vpsubd ymm0, ymm0, ymm15 + vpsrad ymm0, ymm0, 31 // If norm < q then norm = 0xff...ff, else norm = 0 + + vpxor ymm7, ymm7, ymm11 // v0[i] = (norm & (v0[i] ^ v1[i])) ^ v1[i] + vpand ymm7, ymm7, ymm0 + vpxor ymm7, ymm7, ymm11 + vpxor ymm8, ymm8, ymm12 + vpand ymm8, ymm8, ymm0 + vpxor ymm8, ymm8, ymm12 + vpxor ymm9, ymm9, ymm13 + vpand ymm9, ymm9, ymm0 + vpxor ymm9, ymm9, ymm13 + vpxor ymm10, ymm10, ymm14 + vpand ymm10, ymm10, ymm0 + vpxor ymm10, ymm10, ymm14 + + vmovdqu ymm15, THREE8x + vmovdqu ymm14, ONE8x + vpsubd ymm7, ymm7, ymm10 + vpand ymm7, ymm7, ymm15 + vpsubd ymm8, ymm8, ymm10 + vpand ymm8, ymm8, ymm15 + vpsubd ymm9, ymm9, ymm10 + vpand ymm9, ymm9, ymm15 + vpslld ymm10, ymm10, 1 + vpxor ymm0, ymm0, ymm14 + vpand ymm0, ymm0, ymm14 + vpaddd ymm10, ymm0, ymm10 + vpand ymm10, ymm10, ymm15 + + vpsrld ymm4, ymm4, 1 + vmovdqu YMMWORD PTR [reg_p2+4*rax], ymm7 + vmovdqu YMMWORD PTR [reg_p2+4*rax+4*256], ymm8 + vmovdqu YMMWORD PTR [reg_p2+4*rax+4*512], ymm9 + vmovdqu YMMWORD PTR [reg_p2+4*rax+4*768], ymm10 + + add rax, r10 // j+8 + add rcx, r9 + cmp rax, r11 + jl loop2 + ret + + +//*********************************************************************** +// Reconciliation function +// Operation: c [reg_p3] <- function(a [reg_p1], b [reg_p2]) +//*********************************************************************** +.globl oqs_rlwe_msrln16_rec_asm +oqs_rlwe_msrln16_rec_asm: + vpxor ymm12, ymm12, ymm12 + vmovdqu ymm15, PRIME8x + vpslld ymm14, ymm15, 2 // 4*Q + vpslld ymm13, ymm15, 3 // 8*Q + vpsubd ymm12, ymm12, ymm13 // -8*Q + vpxor ymm11, ymm12, ymm13 // 8*Q ^ -8*Q + vmovdqu ymm10, ONE8x + movq r11, 256 + movq r10, 8 + xor rax, rax + xor rcx, rcx +loop3: + vmovdqu ymm0, YMMWORD PTR [reg_p1+4*rax] // x + vmovdqu ymm1, YMMWORD PTR [reg_p1+4*rax+4*256] // x+256 + vmovdqu ymm2, YMMWORD PTR [reg_p1+4*rax+4*512] // x+512 + vmovdqu ymm3, YMMWORD PTR [reg_p1+4*rax+4*768] // x+768 + vmovdqu ymm4, YMMWORD PTR [reg_p2+4*rax] // rvec + vmovdqu ymm5, YMMWORD PTR [reg_p2+4*rax+4*256] // rvec+256 + vmovdqu ymm6, YMMWORD PTR [reg_p2+4*rax+4*512] // rvec+512 + vmovdqu ymm7, YMMWORD PTR [reg_p2+4*rax+4*768] // rvec+768 + + vpslld ymm8, ymm4, 1 // 2*rvec + rvec + vpaddd ymm4, ymm7, ymm8 + vpslld ymm8, ymm5, 1 + vpaddd ymm5, ymm7, ymm8 + vpslld ymm8, ymm6, 1 + vpaddd ymm6, ymm7, ymm8 + vpmulld ymm4, ymm4, ymm15 + vpmulld ymm5, ymm5, ymm15 + vpmulld ymm6, ymm6, ymm15 + vpmulld ymm7, ymm7, ymm15 + vpslld ymm0, ymm0, 3 // 8*x + vpslld ymm1, ymm1, 3 + vpslld ymm2, ymm2, 3 + vpslld ymm3, ymm3, 3 + vpsubd ymm0, ymm0, ymm4 // t[i] + vpsubd ymm1, ymm1, ymm5 + vpsubd ymm2, ymm2, ymm6 + vpsubd ymm3, ymm3, ymm7 + + vpsrad ymm8, ymm0, 31 // mask1 + vpabsd ymm4, ymm0 + vpsubd ymm4, ymm14, ymm4 + vpsrad ymm4, ymm4, 31 // mask2 + vpand ymm8, ymm8, ymm11 // (mask1 & (8*PARAMETER_Q ^ -8*PARAMETER_Q)) ^ -8*PARAMETER_Q + vpxor ymm8, ymm8, ymm12 + vpand ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymm4 + vpabsd ymm0, ymm0 + vpsrad ymm8, ymm1, 31 // mask1 + vpabsd ymm4, ymm1 + vpsubd ymm4, ymm14, ymm4 + vpsrad ymm4, ymm4, 31 // mask2 + vpand ymm8, ymm8, ymm11 // (mask1 & (8*PARAMETER_Q ^ -8*PARAMETER_Q)) ^ -8*PARAMETER_Q + vpxor ymm8, ymm8, ymm12 + vpand ymm4, ymm4, ymm8 + vpaddd ymm1, ymm1, ymm4 + vpabsd ymm1, ymm1 + vpaddd ymm0, ymm0, ymm1 + vpsrad ymm8, ymm2, 31 // mask1 + vpabsd ymm4, ymm2 + vpsubd ymm4, ymm14, ymm4 + vpsrad ymm4, ymm4, 31 // mask2 + vpand ymm8, ymm8, ymm11 // (mask1 & (8*PARAMETER_Q ^ -8*PARAMETER_Q)) ^ -8*PARAMETER_Q + vpxor ymm8, ymm8, ymm12 + vpand ymm4, ymm4, ymm8 + vpaddd ymm2, ymm2, ymm4 + vpabsd ymm2, ymm2 + vpaddd ymm0, ymm0, ymm2 + vpsrad ymm8, ymm3, 31 // mask1 + vpabsd ymm4, ymm3 + vpsubd ymm4, ymm14, ymm4 + vpsrad ymm4, ymm4, 31 // mask2 + vpand ymm8, ymm8, ymm11 // (mask1 & (8*PARAMETER_Q ^ -8*PARAMETER_Q)) ^ -8*PARAMETER_Q + vpxor ymm8, ymm8, ymm12 + vpand ymm4, ymm4, ymm8 + vpaddd ymm3, ymm3, ymm4 + vpabsd ymm3, ymm3 + vpaddd ymm0, ymm0, ymm3 // norm + + vpsubd ymm0, ymm13, ymm0 // If norm < PARAMETER_Q then result = 1, else result = 0 + vpsrld ymm0, ymm0, 31 + vpxor ymm0, ymm0, ymm10 + + vpsrlq ymm1, ymm0, 31 + vpor ymm1, ymm0, ymm1 + vpsllq ymm2, ymm1, 2 + vpsrldq ymm2, ymm2, 8 + vpor ymm1, ymm2, ymm1 + vpsllq ymm2, ymm1, 4 + vpermq ymm2, ymm2, 0x56 + vpor ymm0, ymm1, ymm2 + vmovq r9, xmm0 + + mov BYTE PTR [reg_p3+rcx], r9b + + add rax, r10 // j+8 + inc rcx + cmp rax, r11 + jl loop3 + ret diff --git a/crypt/liboqs/kex_rlwe_msrln16/AMD64/ntt_x64.c b/crypt/liboqs/kex_rlwe_msrln16/AMD64/ntt_x64.c new file mode 100644 index 0000000000000000000000000000000000000000..a143f849ec8a6eee4272445aa0f9fbb88f3aba3b --- /dev/null +++ b/crypt/liboqs/kex_rlwe_msrln16/AMD64/ntt_x64.c @@ -0,0 +1,51 @@ +/**************************************************************************************** +* LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: NTT functions and other low-level operations +* +*****************************************************************************************/ + +#include "../LatticeCrypto_priv.h" + +void oqs_rlwe_msrln16_NTT_CT_std2rev_12289(int32_t *a, const int32_t *psi_rev, unsigned int N) { + oqs_rlwe_msrln16_NTT_CT_std2rev_12289_asm(a, psi_rev, N); +} + +void oqs_rlwe_msrln16_INTT_GS_rev2std_12289(int32_t *a, const int32_t *omegainv_rev, const int32_t omegainv1N_rev, const int32_t Ninv, unsigned int N) { + oqs_rlwe_msrln16_INTT_GS_rev2std_12289_asm(a, omegainv_rev, omegainv1N_rev, Ninv, N); +} + +void oqs_rlwe_msrln16_two_reduce12289(int32_t *a, unsigned int N) { + oqs_rlwe_msrln16_two_reduce12289_asm(a, N); +} + +void oqs_rlwe_msrln16_pmul(int32_t *a, int32_t *b, int32_t *c, unsigned int N) { + oqs_rlwe_msrln16_pmul_asm(a, b, c, N); +} + +void oqs_rlwe_msrln16_pmuladd(int32_t *a, int32_t *b, int32_t *c, int32_t *d, unsigned int N) { + oqs_rlwe_msrln16_pmuladd_asm(a, b, c, d, N); +} + +void oqs_rlwe_msrln16_smul(int32_t *a, int32_t scalar, unsigned int N) { + unsigned int i; + + for (i = 0; i < N; i++) { + a[i] = a[i] * scalar; + } +} + +void oqs_rlwe_msrln16_correction(int32_t *a, int32_t p, unsigned int N) { + unsigned int i; + int32_t mask; + + for (i = 0; i < N; i++) { + mask = a[i] >> (4 * sizeof(int32_t) - 1); + a[i] += (p & mask) - p; + mask = a[i] >> (4 * sizeof(int32_t) - 1); + a[i] += (p & mask); + } +} diff --git a/crypt/liboqs/kex_rlwe_msrln16/AMD64/ntt_x64_asm.S b/crypt/liboqs/kex_rlwe_msrln16/AMD64/ntt_x64_asm.S new file mode 100644 index 0000000000000000000000000000000000000000..0da17f66b147281125442c6d621d487110b93bf6 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_msrln16/AMD64/ntt_x64_asm.S @@ -0,0 +1,979 @@ +//**************************************************************************************** +// LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// +// Abstract: NTT functions in x64 assembly using AVX2 vector instructions for Linux +// +//**************************************************************************************** + +.intel_syntax noprefix + +// Registers that are used for parameter passing: +#define reg_p1 rdi +#define reg_p2 rsi +#define reg_p3 rdx +#define reg_p4 rcx +#define reg_p5 r8 + + +.text +//*********************************************************************** +// Forward NTT +// Operation: a [reg_p1] <- NTT(a) [reg_p1], +// [reg_p2] points to table and +// reg_p3 contains parameter n +//*********************************************************************** +.globl oqs_rlwe_msrln16_NTT_CT_std2rev_12289_asm +oqs_rlwe_msrln16_NTT_CT_std2rev_12289_asm: + push r12 + push r13 + push r14 + +// Stages m=1 -> m=32 + mov r9, 1 // m = 1 + mov rax, reg_p3 + mov r12, reg_p3 + shr r12, 4 // n/16 + vmovdqu ymm14, MASK12x8 + vmovdqu ymm12, PERM0246 + mov r14, 16 + mov rcx, 11 +loop1: + shr rax, 1 // k = k/2 + dec rcx + xor rdx, rdx // i = 0 +loop2: + mov r10, rdx + mov r11, rax + dec r11 + shl r10, cl // j1 + add r11, r10 // j2 + mov r13, r9 + add r13, rdx // m+i + vbroadcastss ymm11, DWORD PTR [reg_p2+4*r13] // S + +loop3: + mov r13, r10 + add r13, rax // j+k + vpmovsxdq ymm1, XMMWORD PTR [reg_p1+4*r13] // a[j+k] + vpmovsxdq ymm3, XMMWORD PTR [reg_p1+4*r13+16] // a[j+k] + vpmovsxdq ymm5, XMMWORD PTR [reg_p1+4*r13+32] // a[j+k] + vpmovsxdq ymm7, XMMWORD PTR [reg_p1+4*r13+48] // a[j+k] + + vpmuldq ymm1, ymm1, ymm11 // a[j+k].S + vpmuldq ymm3, ymm3, ymm11 + vpmuldq ymm5, ymm5, ymm11 + vpmuldq ymm7, ymm7, ymm11 + vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10] // U = a[j] + + vmovdqu ymm13, ymm1 + vpand ymm1, ymm14, ymm1 // c0 + vpsrlq ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm1, 1 // 2*c0 + vpsubd ymm13, ymm1, ymm13 // c0-c1 + vpaddd ymm13, ymm13, ymm15 // V = 3*c0-c1 + vpsubd ymm1, ymm0, ymm13 // a[j+k] = U - V + vpaddd ymm0, ymm0, ymm13 // a[j] = U + V + vpermd ymm1, ymm12, ymm1 + vpermd ymm0, ymm12, ymm0 + vpmovsxdq ymm2, XMMWORD PTR [reg_p1+4*r10+16] // U = a[j] + + vmovdqu ymm13, ymm3 + vpand ymm3, ymm14, ymm3 // c0 + vpsrlq ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm3, 1 // 2*c0 + vpsubd ymm13, ymm3, ymm13 // c0-c1 + vpaddd ymm13, ymm13, ymm15 // V = 3*c0-c1 + vpsubd ymm3, ymm2, ymm13 // a[j+k] = U - V + vpaddd ymm2, ymm2, ymm13 // a[j] = U + V + vmovdqu XMMWORD PTR [reg_p1+4*r10], xmm0 + vmovdqu XMMWORD PTR [reg_p1+4*r13], xmm1 + vpermd ymm3, ymm12, ymm3 + vpermd ymm2, ymm12, ymm2 + vpmovsxdq ymm4, XMMWORD PTR [reg_p1+4*r10+32] // U = a[j] + + vmovdqu ymm13, ymm5 + vpand ymm5, ymm14, ymm5 // c0 + vpsrlq ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm5, 1 // 2*c0 + vpsubd ymm13, ymm5, ymm13 // c0-c1 + vpaddd ymm13, ymm13, ymm15 // V = 3*c0-c1 + vpsubd ymm5, ymm4, ymm13 // a[j+k] = U - V + vpaddd ymm4, ymm4, ymm13 // a[j] = U + V + vmovdqu XMMWORD PTR [reg_p1+4*r10+16], xmm2 + vmovdqu XMMWORD PTR [reg_p1+4*r13+16], xmm3 + vpermd ymm5, ymm12, ymm5 + vpermd ymm4, ymm12, ymm4 + vpmovsxdq ymm6, XMMWORD PTR [reg_p1+4*r10+48] // U = a[j] + + vmovdqu ymm13, ymm7 + vpand ymm7, ymm14, ymm7 // c0 + vpsrlq ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm7, 1 // 2*c0 + vpsubd ymm13, ymm7, ymm13 // c0-c1 + vpaddd ymm13, ymm13, ymm15 // V = 3*c0-c1 + vpsubd ymm7, ymm6, ymm13 // a[j+k] = U - V + vpaddd ymm6, ymm6, ymm13 // a[j] = U + V + vmovdqu XMMWORD PTR [reg_p1+4*r10+32], xmm4 + vmovdqu XMMWORD PTR [reg_p1+4*r13+32], xmm5 + vpermd ymm6, ymm12, ymm6 + vpermd ymm7, ymm12, ymm7 + vmovdqu XMMWORD PTR [reg_p1+4*r13+48], xmm7 + vmovdqu XMMWORD PTR [reg_p1+4*r10+48], xmm6 + + add r10, r14 + cmp r10, r11 + jl loop3 + inc rdx + cmp rdx, r9 + jl loop2 + shl r9, 1 + cmp r9, r12 + jl loop1 + +// Stage m=64 + xor rdx, rdx // i = 0 + xor r10, r10 // j1 = 0 +loop4: + vbroadcastss ymm11, DWORD PTR [reg_p2+4*rdx+4*64] // S + vpmovsxdq ymm1, XMMWORD PTR [reg_p1+4*r10+32] // a[j+k] + vpmovsxdq ymm3, XMMWORD PTR [reg_p1+4*r10+48] // a[j+k] + vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10] // U = a[j] + vpmovsxdq ymm2, XMMWORD PTR [reg_p1+4*r10+16] // U = a[j] + vpmuldq ymm1, ymm1, ymm11 // a[j+k].S + vpmuldq ymm3, ymm3, ymm11 // a[j+k].S + + vmovdqu ymm13, ymm1 + vpand ymm1, ymm14, ymm1 // c0 + vpsrlq ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm1, 1 // 2*c0 + vpsubd ymm13, ymm1, ymm13 // c0-c1 + vpaddd ymm13, ymm13, ymm15 // V = 3*c0-c1 + + vmovdqu ymm10, ymm3 + vpand ymm3, ymm14, ymm3 // c0 + vpsrlq ymm10, ymm10, 12 // c1 + vpslld ymm15, ymm3, 1 // 2*c0 + vpsubd ymm10, ymm3, ymm10 // c0-c1 + vpaddd ymm10, ymm10, ymm15 // V = 3*c0-c1 + + vpsubd ymm1, ymm0, ymm13 // a[j+k] = U - V + vpaddd ymm0, ymm0, ymm13 // a[j] = U + V + vpsubd ymm3, ymm2, ymm10 // a[j+k] = U - V + vpaddd ymm2, ymm2, ymm10 // a[j] = U + V + + vpermd ymm0, ymm12, ymm0 + vpermd ymm1, ymm12, ymm1 + vpermd ymm2, ymm12, ymm2 + vpermd ymm3, ymm12, ymm3 + vmovdqu XMMWORD PTR [reg_p1+4*r10], xmm0 + vmovdqu XMMWORD PTR [reg_p1+4*r10+32], xmm1 + vmovdqu XMMWORD PTR [reg_p1+4*r10+16], xmm2 + vmovdqu XMMWORD PTR [reg_p1+4*r10+48], xmm3 + + add r10, r14 // j+16 + inc rdx // i+1 + cmp rdx, r9 + jl loop4 + +// Stage m=128 + shl r9, 1 + xor rdx, rdx // i = 0 + xor r10, r10 // j1 = 0 + mov r13, 8 +loop6: + vbroadcastss ymm2, DWORD PTR [reg_p2+4*rdx+4*128] // S + vpmovsxdq ymm1, XMMWORD PTR [reg_p1+4*r10+16] // a[j+k] + vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10] // U = a[j] + vpmuldq ymm1, ymm1, ymm2 // a[j+k].S + + vmovdqu ymm3, ymm0 + vpand ymm0, ymm14, ymm0 // c0 + vpsrad ymm3, ymm3, 12 // c1 + vpslld ymm4, ymm0, 1 // 2*c0 + vpsubd ymm3, ymm0, ymm3 // c0-c1 + vpaddd ymm0, ymm3, ymm4 // U = 3*c0-c1 + + vmovdqu ymm3, ymm1 + vpand ymm1, ymm14, ymm1 // c0 + vpsrlq ymm4, ymm3, 24 // c2 + vpsrad ymm3, ymm3, 12 // xc1 + vpand ymm3, ymm14, ymm3 // c1 + vpslld ymm5, ymm1, 3 // 8*c0 + vpaddd ymm4, ymm1, ymm4 // c0+c2 + vpaddd ymm4, ymm4, ymm5 // 9*c0+c2 + vpslld ymm5, ymm3, 1 // 2*c1 + vpaddd ymm1, ymm0, ymm3 // U+c1 + vpsubd ymm0, ymm0, ymm3 // U-c1 + vpsubd ymm4, ymm4, ymm5 // 9*c0-2*c1+c2 + vpaddd ymm0, ymm0, ymm4 // U+(9*c0-3*c1+c2) + vpsubd ymm1, ymm1, ymm4 // U-(9*c0-3*c1+c2) + vpermd ymm0, ymm12, ymm0 + vpermd ymm1, ymm12, ymm1 + vmovdqu XMMWORD PTR [reg_p1+4*r10], xmm0 + vmovdqu XMMWORD PTR [reg_p1+4*r10+16], xmm1 + + add r10, r13 // j+8 + inc rdx // i+1 + cmp rdx, r9 + jl loop6 + +// Stage m=256 + vmovdqu ymm9, PERM02134657 + shl r9, 1 + xor rdx, rdx // i = 0 + xor r10, r10 // j1 = 0 + mov r14, 32 +loop7: + vpmovsxdq ymm2, XMMWORD PTR [reg_p2+4*rdx+4*256] // S = psi[m+i]->psi[m+i+3] + vpermq ymm8, ymm2, 0x50 + vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10] // U = a[j]->a[j+3] + vpmovsxdq ymm1, XMMWORD PTR [reg_p1+4*r10+16] // a[j+k]->a[j+k+3] + vpermq ymm3, ymm0, 0x4e + vinserti128 ymm0, ymm0, xmm1, 1 // U + vpblendd ymm1, ymm1, ymm3, 15 + vpmuldq ymm3, ymm1, ymm8 // a[j+k].S + vmovdqu ymm4, ymm3 + vpand ymm3, ymm14, ymm3 // c0 + vpsrlq ymm4, ymm4, 12 // c1 + vpslld ymm5, ymm3, 1 // 2*c0 + vpsubd ymm4, ymm3, ymm4 // c0-c1 + vpaddd ymm4, ymm4, ymm5 // V = 3*c0-c1 + vpsubd ymm1, ymm0, ymm4 // a[j+k] = U - V + vpaddd ymm0, ymm0, ymm4 // a[j] = U + V + vpslldq ymm1, ymm1, 4 + vpblendd ymm0, ymm0, ymm1, 0xaa + vpermd ymm0, ymm9, ymm0 + vmovdqu YMMWORD PTR [reg_p1+4*r10], ymm0 + + vpermq ymm8, ymm2, 0xfa + vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10+32] // U = a[j]->a[j+3] + vpmovsxdq ymm1, XMMWORD PTR [reg_p1+4*r10+48] // a[j+k]->a[j+k+3] + vpermq ymm3, ymm0, 0x4e + vinserti128 ymm0, ymm0, xmm1, 1 // U + vpblendd ymm1, ymm1, ymm3, 15 + vpmuldq ymm3, ymm1, ymm8 // a[j+k].S + vmovdqu ymm4, ymm3 + vpand ymm3, ymm14, ymm3 // c0 + vpsrlq ymm4, ymm4, 12 // c1 + vpslld ymm5, ymm3, 1 // 2*c0 + vpsubd ymm4, ymm3, ymm4 // c0-c1 + vpaddd ymm4, ymm4, ymm5 // V = 3*c0-c1 + vpsubd ymm1, ymm0, ymm4 // a[j+k] = U - V + vpaddd ymm0, ymm0, ymm4 // a[j] = U + V + vpslldq ymm1, ymm1, 4 + vpblendd ymm0, ymm0, ymm1, 0xaa + vpermd ymm0, ymm9, ymm0 + vmovdqu YMMWORD PTR [reg_p1+4*r10+32], ymm0 + + vpmovsxdq ymm2, XMMWORD PTR [reg_p2+4*rdx+4*256+16] // S = psi[m+i]->psi[m+i+3] + vpermq ymm8, ymm2, 0x50 + vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10+64] // U = a[j]->a[j+3] + vpmovsxdq ymm1, XMMWORD PTR [reg_p1+4*r10+80] // a[j+k]->a[j+k+3] + vpermq ymm3, ymm0, 0x4e + vinserti128 ymm0, ymm0, xmm1, 1 // U + vpblendd ymm1, ymm1, ymm3, 15 + vpmuldq ymm3, ymm1, ymm8 // a[j+k].S + vmovdqu ymm4, ymm3 + vpand ymm3, ymm14, ymm3 // c0 + vpsrlq ymm4, ymm4, 12 // c1 + vpslld ymm5, ymm3, 1 // 2*c0 + vpsubd ymm4, ymm3, ymm4 // c0-c1 + vpaddd ymm4, ymm4, ymm5 // V = 3*c0-c1 + vpsubd ymm1, ymm0, ymm4 // a[j+k] = U - V + vpaddd ymm0, ymm0, ymm4 // a[j] = U + V + vpslldq ymm1, ymm1, 4 + vpblendd ymm0, ymm0, ymm1, 0xaa + vpermd ymm0, ymm9, ymm0 + vmovdqu YMMWORD PTR [reg_p1+4*r10+64], ymm0 + + vpermq ymm8, ymm2, 0xfa + vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10+96] // U = a[j]->a[j+3] + vpmovsxdq ymm1, XMMWORD PTR [reg_p1+4*r10+112] // a[j+k]->a[j+k+3] + vpermq ymm3, ymm0, 0x4e + vinserti128 ymm0, ymm0, xmm1, 1 // U + vpblendd ymm1, ymm1, ymm3, 15 + vpmuldq ymm3, ymm1, ymm8 // a[j+k].S + vmovdqu ymm4, ymm3 + vpand ymm3, ymm14, ymm3 // c0 + vpsrlq ymm4, ymm4, 12 // c1 + vpslld ymm5, ymm3, 1 // 2*c0 + vpsubd ymm4, ymm3, ymm4 // c0-c1 + vpaddd ymm4, ymm4, ymm5 // V = 3*c0-c1 + vpsubd ymm1, ymm0, ymm4 // a[j+k] = U - V + vpaddd ymm0, ymm0, ymm4 // a[j] = U + V + vpslldq ymm1, ymm1, 4 + vpblendd ymm0, ymm0, ymm1, 0xaa + vpermd ymm0, ymm9, ymm0 + vmovdqu YMMWORD PTR [reg_p1+4*r10+96], ymm0 + + add r10, r14 // j+32 + add rdx, r13 // i+8 + cmp rdx, r9 + jl loop7 + +// Stage m=512 + vmovdqu ymm9, PERM00224466 + shl r9, 1 // m = n/2 + xor rdx, rdx // i = 0 + xor r10, r10 // j1 = 0 + mov r14, 4 +loop8: + vpmovsxdq ymm2, XMMWORD PTR [reg_p2+4*rdx+4*512] // S + vmovdqu ymm0, YMMWORD PTR [reg_p1+4*r10] // U = a[j] + vmovdqu ymm1, YMMWORD PTR [reg_p1+4*r10+4] // a[j+k] + vpmuldq ymm3, ymm1, ymm2 // a[j+k].S + vmovdqu ymm4, ymm3 + vpand ymm3, ymm14, ymm3 // c0 + vpsrlq ymm4, ymm4, 12 // c1 + vpslld ymm5, ymm3, 1 // 2*c0 + vpsubd ymm4, ymm3, ymm4 // c0-c1 + vpaddd ymm4, ymm4, ymm5 // V = 3*c0-c1 + vpsubd ymm1, ymm0, ymm4 // a[j+k] = U - V + vpaddd ymm0, ymm0, ymm4 // a[j] = U + V + vpermd ymm1, ymm9, ymm1 + vpblendd ymm0, ymm0, ymm1, 0xaa + vmovdqu YMMWORD PTR [reg_p1+4*r10], ymm0 + + add r10, r13 // j+8 + add rdx, r14 // i+4 + cmp rdx, r9 + jl loop8 + + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Inverse NTT +// Operation: a [reg_p1] <- INTT(a) [reg_p1], +// [reg_p2] points to table +// reg_p3 and reg_p4 point to constants for scaling and +// reg_p5 contains parameter n +//*********************************************************************** +.globl oqs_rlwe_msrln16_INTT_GS_rev2std_12289_asm +oqs_rlwe_msrln16_INTT_GS_rev2std_12289_asm: + push r12 + push r13 + push r14 + push r15 + push rbx + +// Stage m=1024 + vmovdqu ymm9, PERM00224466 + vmovdqu ymm14, MASK12x8 + mov r12, reg_p5 + shr r12, 1 // n/2 = 512 + xor r15, r15 // i = 0 + xor r10, r10 // j1 = 0 + mov r13, 8 + mov r14, 4 +loop1b: + vmovdqu ymm1, YMMWORD PTR [reg_p1+4*r10+4] // V = a[j+k] + vmovdqu ymm0, YMMWORD PTR [reg_p1+4*r10] // U = a[j] + vpmovsxdq ymm2, XMMWORD PTR [reg_p2+4*r15+4*512] // S + vpsubd ymm3, ymm0, ymm1 // U - V + vpaddd ymm0, ymm0, ymm1 // U + V + vpmuldq ymm3, ymm3, ymm2 // (U - V).S + vmovdqu ymm4, ymm3 + vpand ymm3, ymm14, ymm3 // c0 + vpsrlq ymm4, ymm4, 12 // c1 + vpslld ymm5, ymm3, 1 // 2*c0 + vpsubd ymm4, ymm3, ymm4 // c0-c1 + vpaddd ymm1, ymm4, ymm5 // 3*c0-c1 + vpermd ymm1, ymm9, ymm1 + vpblendd ymm0, ymm0, ymm1, 0xaa + vmovdqu YMMWORD PTR [reg_p1+4*r10], ymm0 + + add r10, r13 // j+8 + add r15, r14 // i+4 + cmp r15, r12 + jl loop1b + +// Stage m=512 + vmovdqu ymm9, PERM02134657 + vmovdqu ymm13, PERM0145 + vmovdqu ymm15, PERM2367 + shr r12, 1 // n/4 = 256 + xor r15, r15 // i = 0 + xor r10, r10 // j1 = 0 + mov r14, 32 +loop2b: + vpmovsxdq ymm2, XMMWORD PTR [reg_p2+4*r15+4*256] // S = psi[m+i]->psi[m+i+3] + vpermq ymm8, ymm2, 0x50 + vmovdqu ymm0, YMMWORD PTR [reg_p1+4*r10] // U = a[j]->a[j+7] + vpermd ymm1, ymm15, ymm0 + vpermd ymm0, ymm13, ymm0 + vpsubd ymm3, ymm0, ymm1 // U - V + vpaddd ymm0, ymm0, ymm1 // U + V + vpmuldq ymm3, ymm3, ymm8 // (U - V).S + vmovdqu ymm4, ymm3 + vpand ymm3, ymm14, ymm3 // c0 + vpsrlq ymm4, ymm4, 12 // c1 + vpslld ymm5, ymm3, 1 // 2*c0 + vpsubd ymm4, ymm3, ymm4 // c0-c1 + vpaddd ymm1, ymm4, ymm5 // 3*c0-c1 + vpslldq ymm1, ymm1, 4 + vpblendd ymm0, ymm0, ymm1, 0xaa + vpermd ymm0, ymm9, ymm0 + vmovdqu YMMWORD PTR [reg_p1+4*r10], ymm0 + + vpermq ymm8, ymm2, 0xfa + vmovdqu ymm0, YMMWORD PTR [reg_p1+4*r10+32] // U = a[j]->a[j+7] + vpermd ymm1, ymm15, ymm0 + vpermd ymm0, ymm13, ymm0 + vpsubd ymm3, ymm0, ymm1 // U - V + vpaddd ymm0, ymm0, ymm1 // U + V + vpmuldq ymm3, ymm3, ymm8 // (U - V).S + vmovdqu ymm4, ymm3 + vpand ymm3, ymm14, ymm3 // c0 + vpsrlq ymm4, ymm4, 12 // c1 + vpslld ymm5, ymm3, 1 // 2*c0 + vpsubd ymm4, ymm3, ymm4 // c0-c1 + vpaddd ymm1, ymm4, ymm5 // 3*c0-c1 + vpslldq ymm1, ymm1, 4 + vpblendd ymm0, ymm0, ymm1, 0xaa + vpermd ymm0, ymm9, ymm0 + vmovdqu YMMWORD PTR [reg_p1+4*r10+32], ymm0 + + vpmovsxdq ymm2, XMMWORD PTR [reg_p2+4*r15+4*256+16]// S = psi[m+i]->psi[m+i+3] + vpermq ymm8, ymm2, 0x50 + vmovdqu ymm0, YMMWORD PTR [reg_p1+4*r10+64] // U = a[j]->a[j+7] + vpermd ymm1, ymm15, ymm0 + vpermd ymm0, ymm13, ymm0 + vpsubd ymm3, ymm0, ymm1 // U - V + vpaddd ymm0, ymm0, ymm1 // U + V + vpmuldq ymm3, ymm3, ymm8 // (U - V).S + vmovdqu ymm4, ymm3 + vpand ymm3, ymm14, ymm3 // c0 + vpsrlq ymm4, ymm4, 12 // c1 + vpslld ymm5, ymm3, 1 // 2*c0 + vpsubd ymm4, ymm3, ymm4 // c0-c1 + vpaddd ymm1, ymm4, ymm5 // 3*c0-c1 + vpslldq ymm1, ymm1, 4 + vpblendd ymm0, ymm0, ymm1, 0xaa + vpermd ymm0, ymm9, ymm0 + vmovdqu YMMWORD PTR [reg_p1+4*r10+64], ymm0 + + vpermq ymm8, ymm2, 0xfa + vmovdqu ymm0, YMMWORD PTR [reg_p1+4*r10+96] // U = a[j]->a[j+7] + vpermd ymm1, ymm15, ymm0 + vpermd ymm0, ymm13, ymm0 + vpsubd ymm3, ymm0, ymm1 // U - V + vpaddd ymm0, ymm0, ymm1 // U + V + vpmuldq ymm3, ymm3, ymm8 // (U - V).S + vmovdqu ymm4, ymm3 + vpand ymm3, ymm14, ymm3 // c0 + vpsrlq ymm4, ymm4, 12 // c1 + vpslld ymm5, ymm3, 1 // 2*c0 + vpsubd ymm4, ymm3, ymm4 // c0-c1 + vpaddd ymm1, ymm4, ymm5 // 3*c0-c1 + vpslldq ymm1, ymm1, 4 + vpblendd ymm0, ymm0, ymm1, 0xaa + vpermd ymm0, ymm9, ymm0 + vmovdqu YMMWORD PTR [reg_p1+4*r10+96], ymm0 + + add r10, r14 // j+32 + add r15, r13 // i+8 + cmp r15, r12 + jl loop2b + +// Stage m=256 + vmovdqu ymm12, PERM0246 + shr r12, 1 // n/8 = 128 + xor r15, r15 // i = 0 + xor r10, r10 // j1 = 0 +loop3b: + vbroadcastss ymm2, DWORD PTR [reg_p2+4*r15+4*128] // S + vpmovsxdq ymm1, XMMWORD PTR [reg_p1+4*r10+16] // V = a[j+k] + vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10] // U = a[j] + vpsubd ymm3, ymm0, ymm1 // U - V + vpaddd ymm0, ymm0, ymm1 // U + V + vpmuldq ymm3, ymm3, ymm2 // (U - V).S + vmovdqu ymm4, ymm3 + vpand ymm3, ymm14, ymm3 // c0 + vpsrlq ymm4, ymm4, 12 // c1 + vpslld ymm5, ymm3, 1 // 2*c0 + vpsubd ymm4, ymm3, ymm4 // c0-c1 + vpaddd ymm1, ymm4, ymm5 // 3*c0-c1 + vpermd ymm0, ymm12, ymm0 + vpermd ymm1, ymm12, ymm1 + vmovdqu XMMWORD PTR [reg_p1+4*r10], xmm0 + vmovdqu XMMWORD PTR [reg_p1+4*r10+16], xmm1 + + add r10, r13 // j+8 + inc r15 // i+1 + cmp r15, r12 + jl loop3b + +// Stage m=128 + shr r12, 1 // n/16 = 64 + xor r15, r15 // i = 0 + xor r10, r10 // j1 = 0 + mov r14, 16 +loop4b: + vbroadcastss ymm11, DWORD PTR [reg_p2+4*r15+4*64] // S + vpmovsxdq ymm13, XMMWORD PTR [reg_p1+4*r10+32] // V = a[j+k] + vpmovsxdq ymm15, XMMWORD PTR [reg_p1+4*r10+48] // V = a[j+k] + vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10] // U = a[j] + vpmovsxdq ymm2, XMMWORD PTR [reg_p1+4*r10+16] // U = a[j] + vpsubd ymm1, ymm0, ymm13 // U - V + vpaddd ymm0, ymm0, ymm13 // U + V + vpsubd ymm3, ymm2, ymm15 // U - V + vpaddd ymm2, ymm2, ymm15 // U + V + vpmuldq ymm1, ymm1, ymm11 // (U - V).S + vpmuldq ymm3, ymm3, ymm11 // (U - V).S + + vmovdqu ymm13, ymm1 + vpand ymm1, ymm14, ymm1 // c0 + vpsrlq ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm1, 1 // 2*c0 + vpsubd ymm13, ymm1, ymm13 // c0-c1 + vpaddd ymm1, ymm13, ymm15 // 3*c0-c1 + + vmovdqu ymm13, ymm3 + vpand ymm3, ymm14, ymm3 // c0 + vpsrlq ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm3, 1 // 2*c0 + vpsubd ymm13, ymm3, ymm13 // c0-c1 + vpaddd ymm3, ymm13, ymm15 // 3*c0-c1 + + vpermd ymm0, ymm12, ymm0 + vpermd ymm1, ymm12, ymm1 + vpermd ymm2, ymm12, ymm2 + vpermd ymm3, ymm12, ymm3 + vmovdqu XMMWORD PTR [reg_p1+4*r10], xmm0 + vmovdqu XMMWORD PTR [reg_p1+4*r10+32], xmm1 + vmovdqu XMMWORD PTR [reg_p1+4*r10+16], xmm2 + vmovdqu XMMWORD PTR [reg_p1+4*r10+48], xmm3 + + add r10, r14 // j+16 + inc r15 // i+1 + cmp r15, r12 + jl loop4b + +// Stages m=64 -> m=4 + mov r9, 5 // 5 iterations + mov rax, 8 +loop5b: + shl rax, 1 // k = 2*k + shr r12, 1 // m/2 + xor r15, r15 // i = 0 + xor r8, r8 +loop6b: + mov r10, r8 // Load j1 + mov r11, rax + dec r11 + add r11, r10 // j2 + mov r13, r12 + add r13, r15 // m/2+i + vbroadcastss ymm9, DWORD PTR [reg_p2+4*r13] // S + mov rbx, 4 + +loop7b: + mov r13, r10 + add r13, rax // j+k + vpmovsxdq ymm10, XMMWORD PTR [reg_p1+4*r13] // V = a[j+k] + vpmovsxdq ymm11, XMMWORD PTR [reg_p1+4*r13+16] // V = a[j+k] + vpmovsxdq ymm13, XMMWORD PTR [reg_p1+4*r13+32] // V = a[j+k] + vpmovsxdq ymm15, XMMWORD PTR [reg_p1+4*r13+48] // V = a[j+k] + vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10] // U = a[j] + vpmovsxdq ymm2, XMMWORD PTR [reg_p1+4*r10+16] // U = a[j] + vpmovsxdq ymm4, XMMWORD PTR [reg_p1+4*r10+32] // U = a[j] + vpmovsxdq ymm6, XMMWORD PTR [reg_p1+4*r10+48] // U = a[j] + + vpsubd ymm1, ymm0, ymm10 // U - V + vpaddd ymm0, ymm0, ymm10 // U + V + vpsubd ymm3, ymm2, ymm11 // U - V + vpaddd ymm2, ymm2, ymm11 // U + V + vpsubd ymm5, ymm4, ymm13 // U - V + vpaddd ymm4, ymm4, ymm13 // U + V + vpsubd ymm7, ymm6, ymm15 // U - V + vpaddd ymm6, ymm6, ymm15 // U + V + + vpmuldq ymm1, ymm1, ymm9 // (U - V).S + vpmuldq ymm3, ymm3, ymm9 + vpmuldq ymm5, ymm5, ymm9 + vpmuldq ymm7, ymm7, ymm9 + + vmovdqu ymm13, ymm1 + vpand ymm1, ymm14, ymm1 // c0 + vpsrlq ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm1, 1 // 2*c0 + vpsubd ymm13, ymm1, ymm13 // c0-c1 + vpaddd ymm1, ymm13, ymm15 // 3*c0-c1 + + cmp r9, rbx + jne skip1 + vmovdqu ymm13, ymm0 + vpand ymm0, ymm14, ymm0 // c0 + vpsrad ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm0, 1 // 2*c0 + vpsubd ymm13, ymm0, ymm13 // c0-c1 + vpaddd ymm0, ymm13, ymm15 // 3*c0-c1 + + vmovdqu ymm13, ymm1 + vpand ymm1, ymm14, ymm1 // c0 + vpsrad ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm1, 1 // 2*c0 + vpsubd ymm13, ymm1, ymm13 // c0-c1 + vpaddd ymm1, ymm13, ymm15 // 3*c0-c1 +skip1: + vpermd ymm1, ymm12, ymm1 + vpermd ymm0, ymm12, ymm0 + + vmovdqu ymm13, ymm3 + vpand ymm3, ymm14, ymm3 // c0 + vpsrlq ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm3, 1 // 2*c0 + vpsubd ymm13, ymm3, ymm13 // c0-c1 + vpaddd ymm3, ymm13, ymm15 // 3*c0-c1 + vmovdqu XMMWORD PTR [reg_p1+4*r10], xmm0 + vmovdqu XMMWORD PTR [reg_p1+4*r13], xmm1 + + cmp r9, rbx + jne skip2 + vmovdqu ymm13, ymm2 + vpand ymm2, ymm14, ymm2 // c0 + vpsrad ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm2, 1 // 2*c0 + vpsubd ymm13, ymm2, ymm13 // c0-c1 + vpaddd ymm2, ymm13, ymm15 // 3*c0-c1 + + vmovdqu ymm13, ymm3 + vpand ymm3, ymm14, ymm3 // c0 + vpsrad ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm3, 1 // 2*c0 + vpsubd ymm13, ymm3, ymm13 // c0-c1 + vpaddd ymm3, ymm13, ymm15 // 3*c0-c1 +skip2: + vpermd ymm3, ymm12, ymm3 + vpermd ymm2, ymm12, ymm2 + + vmovdqu ymm13, ymm5 + vpand ymm5, ymm14, ymm5 // c0 + vpsrlq ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm5, 1 // 2*c0 + vpsubd ymm13, ymm5, ymm13 // c0-c1 + vpaddd ymm5, ymm13, ymm15 // 3*c0-c1 + vmovdqu XMMWORD PTR [reg_p1+4*r10+16], xmm2 + vmovdqu XMMWORD PTR [reg_p1+4*r13+16], xmm3 + + cmp r9, rbx + jne skip3 + vmovdqu ymm13, ymm4 + vpand ymm4, ymm14, ymm4 // c0 + vpsrad ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm4, 1 // 2*c0 + vpsubd ymm13, ymm4, ymm13 // c0-c1 + vpaddd ymm4, ymm13, ymm15 // 3*c0-c1 + + vmovdqu ymm13, ymm5 + vpand ymm5, ymm14, ymm5 // c0 + vpsrad ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm5, 1 // 2*c0 + vpsubd ymm13, ymm5, ymm13 // c0-c1 + vpaddd ymm5, ymm13, ymm15 // 3*c0-c1 +skip3: + vpermd ymm5, ymm12, ymm5 + vpermd ymm4, ymm12, ymm4 + + vmovdqu ymm13, ymm7 + vpand ymm7, ymm14, ymm7 // c0 + vpsrlq ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm7, 1 // 2*c0 + vpsubd ymm13, ymm7, ymm13 // c0-c1 + vpaddd ymm7, ymm13, ymm15 // 3*c0-c1 + vmovdqu XMMWORD PTR [reg_p1+4*r10+32], xmm4 + vmovdqu XMMWORD PTR [reg_p1+4*r13+32], xmm5 + + cmp r9, rbx + jne skip4 + vmovdqu ymm13, ymm6 + vpand ymm6, ymm14, ymm6 // c0 + vpsrad ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm6, 1 // 2*c0 + vpsubd ymm13, ymm6, ymm13 // c0-c1 + vpaddd ymm6, ymm13, ymm15 // 3*c0-c1 + + vmovdqu ymm13, ymm7 + vpand ymm7, ymm14, ymm7 // c0 + vpsrad ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm7, 1 // 2*c0 + vpsubd ymm13, ymm7, ymm13 // c0-c1 + vpaddd ymm7, ymm13, ymm15 // 3*c0-c1 +skip4: + vpermd ymm7, ymm12, ymm7 + vpermd ymm6, ymm12, ymm6 + vmovdqu XMMWORD PTR [reg_p1+4*r13+48], xmm7 + vmovdqu XMMWORD PTR [reg_p1+4*r10+48], xmm6 + + add r10, r14 + cmp r10, r11 + jl loop7b + mov rbx, rax + shl rbx, 1 // 2*k + add r8, rbx // j1+2*k + inc r15 + cmp r15, r12 + jl loop6b + dec r9 + jnz loop5b + +// Scaling step + shl rax, 1 // k = 2*k = 512 + xor r10, r10 // j = 0 + mov r14, 4 + movq xmm0, reg_p3 + vbroadcastsd ymm10, xmm0 // S = omegainv1N_rev + movq xmm0, reg_p4 + vbroadcastsd ymm11, xmm0 // T = Ninv +loop8b: + vpmovsxdq ymm13, XMMWORD PTR [reg_p1+4*r10+4*512] // V = a[j+k] + vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10] // U = a[j] + vpsubd ymm1, ymm0, ymm13 // U - V + vpaddd ymm0, ymm0, ymm13 // U + V + vpmuldq ymm1, ymm1, ymm10 // (U - V).S + vpmuldq ymm0, ymm0, ymm11 // (U + V).T + + vmovdqu ymm13, ymm0 + vpand ymm0, ymm14, ymm0 // c0 + vpsrlq ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm0, 1 // 2*c0 + vpsubd ymm13, ymm0, ymm13 // c0-c1 + vpaddd ymm0, ymm13, ymm15 // 3*c0-c1 + + vmovdqu ymm13, ymm1 + vpand ymm1, ymm14, ymm1 // c0 + vpsrlq ymm13, ymm13, 12 // c1 + vpslld ymm15, ymm1, 1 // 2*c0 + vpsubd ymm13, ymm1, ymm13 // c0-c1 + vpaddd ymm1, ymm13, ymm15 // 3*c0-c1 + + vpermd ymm0, ymm12, ymm0 + vpermd ymm1, ymm12, ymm1 + vmovdqu XMMWORD PTR [reg_p1+4*r10], xmm0 + vmovdqu XMMWORD PTR [reg_p1+4*r10+4*512], xmm1 + + add r10, r14 // j+4 + cmp r10, rax + jl loop8b +loop9b: + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Component-wise multiplication and addition +// Operation: d [reg_p4] <- a [reg_p1] * b [reg_p2] + c [reg_p3] +// reg_p5 contains parameter n +//*********************************************************************** +.globl oqs_rlwe_msrln16_pmuladd_asm +oqs_rlwe_msrln16_pmuladd_asm: + vmovdqu ymm5, PERM0246 + vmovdqu ymm6, MASK12x8 + xor rax, rax + movq r11, 4 +lazo2: + vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*rax] // a + vpmovsxdq ymm1, XMMWORD PTR [reg_p2+4*rax] // b + vpmovsxdq ymm2, XMMWORD PTR [reg_p3+4*rax] // c + vpmuldq ymm0, ymm1, ymm0 + vpaddq ymm0, ymm2, ymm0 + + vmovdqu ymm3, ymm0 + vpand ymm0, ymm6, ymm0 // c0 + vpsrlq ymm3, ymm3, 12 // c1 + vpslld ymm4, ymm0, 1 // 2*c0 + vpsubd ymm3, ymm0, ymm3 // c0-c1 + vpaddd ymm0, ymm3, ymm4 // 3*c0-c1 + + vmovdqu ymm3, ymm0 + vpand ymm0, ymm6, ymm0 // c0 + vpsrad ymm3, ymm3, 12 // c1 + vpslld ymm4, ymm0, 1 // 2*c0 + vpsubd ymm3, ymm0, ymm3 // c0-c1 + vpaddd ymm0, ymm3, ymm4 // 3*c0-c1 + + vpermd ymm0, ymm5, ymm0 + vmovdqu XMMWORD PTR [reg_p4+4*rax], xmm0 + + add rax, r11 // j+4 + cmp rax, reg_p5 + jl lazo2 + ret + + +//*********************************************************************** +// Component-wise multiplication +// Operation: c [reg_p3] <- a [reg_p1] * b [reg_p2] +// reg_p4 contains parameter n +//*********************************************************************** +.globl oqs_rlwe_msrln16_pmul_asm +oqs_rlwe_msrln16_pmul_asm: + vmovdqu ymm5, PERM0246 + vmovdqu ymm6, MASK12x8 + xor rax, rax + movq r11, 4 +lazo3: + vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*rax] // a + vpmovsxdq ymm1, XMMWORD PTR [reg_p2+4*rax] // b + vpmuldq ymm0, ymm1, ymm0 + + vmovdqu ymm3, ymm0 + vpand ymm0, ymm6, ymm0 // c0 + vpsrlq ymm3, ymm3, 12 // c1 + vpslld ymm4, ymm0, 1 // 2*c0 + vpsubd ymm3, ymm0, ymm3 // c0-c1 + vpaddd ymm0, ymm3, ymm4 // 3*c0-c1 + + vmovdqu ymm3, ymm0 + vpand ymm0, ymm6, ymm0 // c0 + vpsrad ymm3, ymm3, 12 // c1 + vpslld ymm4, ymm0, 1 // 2*c0 + vpsubd ymm3, ymm0, ymm3 // c0-c1 + vpaddd ymm0, ymm3, ymm4 // 3*c0-c1 + + vpermd ymm0, ymm5, ymm0 + vmovdqu XMMWORD PTR [reg_p3+4*rax], xmm0 + + add rax, r11 // j+4 + cmp rax, reg_p4 + jl lazo3 + ret + + +//*********************************************************************** +// Two consecutive reductions +// Operation: c [reg_p1] <- a [reg_p1] +// reg_p2 contains parameter n +//*********************************************************************** +.globl oqs_rlwe_msrln16_two_reduce12289_asm +oqs_rlwe_msrln16_two_reduce12289_asm: + vmovdqu ymm6, MASK12x8 + vmovdqu ymm7, PRIME8x + xor rax, rax + movq r11, 8 +lazo4: + vmovdqu ymm0, YMMWORD PTR [reg_p1+4*rax] // a + + vmovdqu ymm3, ymm0 + vpand ymm0, ymm6, ymm0 // c0 + vpsrad ymm3, ymm3, 12 // c1 + vpslld ymm4, ymm0, 1 // 2*c0 + vpsubd ymm3, ymm0, ymm3 // c0-c1 + vpaddd ymm0, ymm3, ymm4 // 3*c0-c1 + + vmovdqu ymm3, ymm0 + vpand ymm0, ymm6, ymm0 // c0 + vpsrad ymm3, ymm3, 12 // c1 + vpslld ymm4, ymm0, 1 // 2*c0 + vpsubd ymm3, ymm0, ymm3 // c0-c1 + vpaddd ymm0, ymm3, ymm4 // 3*c0-c1 + + vpsrad ymm2, ymm0, 31 + vpand ymm2, ymm7, ymm2 + vpaddd ymm2, ymm0, ymm2 + vpsubd ymm0, ymm2, ymm7 + + vpsrad ymm2, ymm0, 31 + vpand ymm2, ymm7, ymm2 + vpaddd ymm0, ymm0, ymm2 + + vmovdqu YMMWORD PTR [reg_p1+4*rax], ymm0 + + add rax, r11 // j+8 + cmp rax, reg_p2 + jl lazo4 + ret + + +//*********************************************************************** +// Encoding +// Operation: c [reg_p2] <- a [reg_p1] +//*********************************************************************** +.globl oqs_rlwe_msrln16_encode_asm +oqs_rlwe_msrln16_encode_asm: + vmovdqu ymm6, MASK32 + vmovdqu ymm7, MASK42 + mov r9, 1024 + xor rax, rax + xor r10, r10 + mov r11, 14 + mov rcx, 8 +lazo5: + vmovdqu ymm0, YMMWORD PTR [reg_p1+4*rax] // a + + vpsrlq ymm1, ymm0, 18 + vpsllq ymm2, ymm0, 4 + vpand ymm0, ymm0, ymm6 + vpsrldq ymm2, ymm2, 5 + vpsrlq ymm3, ymm1, 4 + vpand ymm1, ymm1, ymm6 + vpand ymm2, ymm2, ymm7 + vpsrldq ymm3, ymm3, 4 + vpor ymm0, ymm0, ymm1 + vpor ymm0, ymm0, ymm2 + vpor ymm0, ymm0, ymm3 + vpermq ymm1, ymm0, 0x0e + + vmovdqu XMMWORD PTR [reg_p2+r10], xmm0 + vmovdqu XMMWORD PTR [reg_p2+r10+7], xmm1 + + add r10, r11 + add rax, rcx // j+8 + cmp rax, r9 + jl lazo5 + ret + + +//*********************************************************************** +// Decoding +// Operation: c [reg_p2] <- a [reg_p1] +//*********************************************************************** +.globl oqs_rlwe_msrln16_decode_asm +oqs_rlwe_msrln16_decode_asm: + vmovdqu ymm6, MASK14_1 + vmovdqu ymm7, MASK14_2 + vmovdqu ymm8, MASK14_3 + vmovdqu ymm9, MASK14_4 + mov r9, 1024 + xor rax, rax + xor r10, r10 + mov r11, 14 + mov rcx, 8 +lazo6: + vmovdqu xmm0, XMMWORD PTR [reg_p1+r10] + vmovdqu xmm1, XMMWORD PTR [reg_p1+r10+7] + vinserti128 ymm0, ymm0, xmm1, 1 + + vpand ymm1, ymm0, ymm6 + vpand ymm2, ymm0, ymm7 + vpand ymm3, ymm0, ymm8 + vpand ymm4, ymm0, ymm9 + + vpsllq ymm2, ymm2, 18 + vpsllq ymm3, ymm3, 4 + vpslldq ymm3, ymm3, 4 + vpsrlq ymm4, ymm4, 2 + vpslldq ymm4, ymm4, 7 + + vpor ymm1, ymm1, ymm2 + vpor ymm1, ymm1, ymm3 + vpor ymm1, ymm1, ymm4 + + vmovdqu YMMWORD PTR [reg_p2+4*rax], ymm1 + + add r10, r11 + add rax, rcx // j+8 + cmp rax, r9 + jl lazo6 + ret diff --git a/crypt/liboqs/kex_rlwe_msrln16/LICENSE.txt b/crypt/liboqs/kex_rlwe_msrln16/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..4340e43be7ba94b0f64000b9410ebccbc5f25f11 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_msrln16/LICENSE.txt @@ -0,0 +1,25 @@ +LatticeCrypto + +Copyright (c) Microsoft Corporation +All rights reserved. + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +associated documentation files (the ""Software""), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +The library uses the public domain implementation of SHAKE128 by the Keccak team; see the header +of shake128.c for details. diff --git a/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto.h b/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto.h new file mode 100644 index 0000000000000000000000000000000000000000..f921d879400f9e8cc56dfa087706afc89ab8004a --- /dev/null +++ b/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto.h @@ -0,0 +1,90 @@ +/*************************************************************************************** +* LatticeCrypt: an efficient post-quantum Ring-Learning With Errors cryptography library +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: main header file +* +****************************************************************************************/ + +#ifndef __LatticeCrypt_H__ +#define __LatticeCrypt_H__ + +// For C++ +#ifdef __cplusplus +extern "C" { +#endif + +#include <oqs/rand.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +// NOTE: probably a better way to do this. +#if (defined(__x86_64__) || defined(__x86_64) || defined(__arch64__) || defined(_M_AMD64) || defined(_M_X64) || defined(_WIN64) || !defined(__LP64__)) +#define RADIX 64 +typedef uint64_t digit_t; // Unsigned 64-bit digit +typedef int64_t sdigit_t; // Signed 64-bit digit +#else +#define RADIX 32 +typedef uint32_t digit_t; // Unsigned 32-bit digit +typedef int32_t sdigit_t; // Signed 32-bit digit + +#endif + +// Definitions of the error-handling type and error codes + +typedef enum { + CRYPTO_SUCCESS, // 0x00 + CRYPTO_ERROR, // 0x01 + CRYPTO_ERROR_DURING_TEST, // 0x02 + CRYPTO_ERROR_UNKNOWN, // 0x03 + CRYPTO_ERROR_NOT_IMPLEMENTED, // 0x04 + CRYPTO_ERROR_NO_MEMORY, // 0x05 + CRYPTO_ERROR_INVALID_PARAMETER, // 0x06 + CRYPTO_ERROR_SHARED_KEY, // 0x07 + CRYPTO_ERROR_TOO_MANY_ITERATIONS, // 0x08 + CRYPTO_ERROR_END_OF_LIST +} CRYPTO_STATUS; + +#define CRYPTO_STATUS_TYPE_SIZE (CRYPTO_ERROR_END_OF_LIST) + +// Basic key-exchange constants +#define OQS_RLWE_MSRLN16_PKA_BYTES 1824 // Alice's public key size +#define OQS_RLWE_MSRLN16_PKB_BYTES 2048 // Bob's public key size +#define OQS_RLWE_MSRLN16_SHAREDKEY_BYTES 32 // Shared key size + +/******************** Function prototypes *******************/ + +// Clear digits from memory. "nwords" indicates the number of digits to be zeroed. +extern void oqs_rlwe_msrln16_clear_words(void *mem, digit_t nwords); + +/*********************** Key exchange API ***********************/ + +// Alice's key generation +// It produces a private key SecretKeyA and computes the public key PublicKeyA. +// Outputs: the private key SecretKeyA that consists of a 32-bit signed 1024-element array (4096 bytes in total) +// the public key PublicKeyA that occupies 1824 bytes +CRYPTO_STATUS oqs_rlwe_msrln16_KeyGeneration_A(int32_t *SecretKeyA, unsigned char *PublicKeyA, OQS_RAND *rand); + +// Bob's key generation and shared secret computation +// It produces a private key and computes the public key PublicKeyB. In combination with Alice's public key PublicKeyA, it computes +// the shared secret SharedSecretB. +// Input: Alice's public key PublicKeyA that consists of 1824 bytes +// Outputs: the public key PublicKeyB that occupies 2048 bytes. +// the 256-bit shared secret SharedSecretB. +CRYPTO_STATUS oqs_rlwe_msrln16_SecretAgreement_B(unsigned char *PublicKeyA, unsigned char *SharedSecretB, unsigned char *PublicKeyB, OQS_RAND *rand); + +// Alice's shared secret computation +// It computes the shared secret SharedSecretA using Bob's public key PublicKeyB and Alice's private key SecretKeyA. +// Inputs: Bob's public key PublicKeyB that consists of 2048 bytes +// the private key SecretKeyA that consists of a 32-bit signed 1024-element array (4096 bytes in total) +// Output: the 256-bit shared secret SharedSecretA. +CRYPTO_STATUS oqs_rlwe_msrln16_SecretAgreement_A(unsigned char *PublicKeyB, int32_t *SecretKeyA, unsigned char *SharedSecretA); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto_kex.c b/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto_kex.c new file mode 100644 index 0000000000000000000000000000000000000000..5425366bf1602da99ae61172c19c42bf06482e22 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto_kex.c @@ -0,0 +1,438 @@ +/**************************************************************************************** + * LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library + * + * Copyright (c) Microsoft Corporation. All rights reserved. + * + * + * Abstract: Ring-LWE key exchange + * The implementation is based on the instantiation of Peikert's key exchange [1] + * due to Alkim, Ducas, Poppelmann and Schwabe [2]. + * + * [1] C. Peikert, "Lattice cryptography for the internet", in Post-Quantum Cryptography - + * 6th International Workshop (PQCrypto 2014), LNCS 8772, pp. 197-219. Springer, 2014. + * [2] E. Alkim, L. Ducas, T. Pöppelmann and P. Schwabe, "Post-quantum key exchange - a new + * hope", IACR Cryptology ePrint Archive, Report 2015/1092, 2015. + * + ******************************************************************************************/ + +#include "LatticeCrypto_priv.h" +#include "oqs/rand.h" +#include <oqs/sha3.h> + +extern const int32_t psi_rev_ntt1024_12289[1024]; +extern const int32_t omegainv_rev_ntt1024_12289[1024]; +extern const int32_t omegainv10N_rev_ntt1024_12289; +extern const int32_t Ninv11_ntt1024_12289; + +// import external code +#ifdef RLWE_ASM_AVX2 +#include "AMD64/consts.c" +#include "AMD64/ntt_x64.c" +#else +#include "generic/ntt.c" +#endif + +__inline void oqs_rlwe_msrln16_clear_words(void *mem, digit_t nwords) { + // Clear digits from memory. "nwords" indicates the number of digits to be zeroed. + // This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing. + unsigned int i; + volatile digit_t *v = mem; + + for (i = 0; i < nwords; i++) { + v[i] = 0; + } +} + +void oqs_rlwe_msrln16_encode_A(const uint32_t *pk, const unsigned char *seed, unsigned char *m) { + // Alice's message encoding + unsigned int i = 0, j; +#if defined(RLWE_ASM_AVX2) + oqs_rlwe_msrln16_encode_asm(pk, m); + i = 1792; +#else + for (j = 0; j < 1024; j += 4) { + m[i] = (unsigned char) (pk[j] & 0xFF); + m[i + 1] = (unsigned char) ((pk[j] >> 8) | ((pk[j + 1] & 0x03) << 6)); + m[i + 2] = (unsigned char) ((pk[j + 1] >> 2) & 0xFF); + m[i + 3] = (unsigned char) ((pk[j + 1] >> 10) | ((pk[j + 2] & 0x0F) << 4)); + m[i + 4] = (unsigned char) ((pk[j + 2] >> 4) & 0xFF); + m[i + 5] = (unsigned char) ((pk[j + 2] >> 12) | ((pk[j + 3] & 0x3F) << 2)); + m[i + 6] = (unsigned char) (pk[j + 3] >> 6); + i += 7; + } +#endif + + for (j = 0; j < 32; j++) { + m[i + j] = seed[j]; + } +} + +void oqs_rlwe_msrln16_decode_A(const unsigned char *m, uint32_t *pk, unsigned char *seed) { + // Alice's message decoding + unsigned int i = 0, j; + +#if defined(RLWE_ASM_AVX2) + oqs_rlwe_msrln16_decode_asm(m, pk); + i = 1792; +#else + for (j = 0; j < 1024; j += 4) { + pk[j] = ((uint32_t) m[i] | (((uint32_t) m[i + 1] & 0x3F) << 8)); + pk[j + 1] = (((uint32_t) m[i + 1] >> 6) | ((uint32_t) m[i + 2] << 2) | (((uint32_t) m[i + 3] & 0x0F) << 10)); + pk[j + 2] = (((uint32_t) m[i + 3] >> 4) | ((uint32_t) m[i + 4] << 4) | (((uint32_t) m[i + 5] & 0x03) << 12)); + pk[j + 3] = (((uint32_t) m[i + 5] >> 2) | ((uint32_t) m[i + 6] << 6)); + i += 7; + } +#endif + + for (j = 0; j < 32; j++) { + seed[j] = m[i + j]; + } +} + +void oqs_rlwe_msrln16_encode_B(const uint32_t *pk, const uint32_t *rvec, unsigned char *m) { + // Bob's message encoding + unsigned int i = 0, j; + +#if defined(RLWE_ASM_AVX2) + oqs_rlwe_msrln16_encode_asm(pk, m); +#else + for (j = 0; j < 1024; j += 4) { + m[i] = (unsigned char) (pk[j] & 0xFF); + m[i + 1] = (unsigned char) ((pk[j] >> 8) | ((pk[j + 1] & 0x03) << 6)); + m[i + 2] = (unsigned char) ((pk[j + 1] >> 2) & 0xFF); + m[i + 3] = (unsigned char) ((pk[j + 1] >> 10) | ((pk[j + 2] & 0x0F) << 4)); + m[i + 4] = (unsigned char) ((pk[j + 2] >> 4) & 0xFF); + m[i + 5] = (unsigned char) ((pk[j + 2] >> 12) | ((pk[j + 3] & 0x3F) << 2)); + m[i + 6] = (unsigned char) (pk[j + 3] >> 6); + i += 7; + } +#endif + + i = 0; + for (j = 0; j < 1024 / 4; j++) { + m[1792 + j] = (unsigned char) (rvec[i] | (rvec[i + 1] << 2) | (rvec[i + 2] << 4) | (rvec[i + 3] << 6)); + i += 4; + } +} + +void oqs_rlwe_msrln16_decode_B(unsigned char *m, uint32_t *pk, uint32_t *rvec) { + // Bob's message decoding + unsigned int i = 0, j; + +#if defined(RLWE_ASM_AVX2) + oqs_rlwe_msrln16_decode_asm(m, pk); + i = 1792; +#else + for (j = 0; j < 1024; j += 4) { + pk[j] = ((uint32_t) m[i] | (((uint32_t) m[i + 1] & 0x3F) << 8)); + pk[j + 1] = (((uint32_t) m[i + 1] >> 6) | ((uint32_t) m[i + 2] << 2) | (((uint32_t) m[i + 3] & 0x0F) << 10)); + pk[j + 2] = (((uint32_t) m[i + 3] >> 4) | ((uint32_t) m[i + 4] << 4) | (((uint32_t) m[i + 5] & 0x03) << 12)); + pk[j + 3] = (((uint32_t) m[i + 5] >> 2) | ((uint32_t) m[i + 6] << 6)); + i += 7; + } +#endif + + i = 0; + for (j = 0; j < 1024 / 4; j++) { + rvec[i] = (uint32_t)(m[1792 + j] & 0x03); + rvec[i + 1] = (uint32_t)((m[1792 + j] >> 2) & 0x03); + rvec[i + 2] = (uint32_t)((m[1792 + j] >> 4) & 0x03); + rvec[i + 3] = (uint32_t)(m[1792 + j] >> 6); + i += 4; + } +} + +static __inline uint32_t Abs(int32_t value) { + // Compute absolute value + uint32_t mask; + + mask = (uint32_t)(value >> 31); + return ((mask ^ value) - mask); +} + +CRYPTO_STATUS oqs_rlwe_msrln16_HelpRec(const uint32_t *x, uint32_t *rvec, OQS_RAND *rand) { + // Reconciliation helper + unsigned int i, j, norm; + unsigned char bit, random_bits[32]; + uint32_t v0[4], v1[4]; + // OQS integration note: call to aux API replaced with direct call to OQS_RAND + rand->rand_n(rand, random_bits, 32); + +#if defined(RLWE_ASM_AVX2) + oqs_rlwe_msrln16_helprec_asm(x, rvec, random_bits); +#else + for (i = 0; i < 256; i++) { + bit = 1 & (random_bits[i >> 3] >> (i & 0x07)); + rvec[i] = (x[i] << 1) - bit; + rvec[i + 256] = (x[i + 256] << 1) - bit; + rvec[i + 512] = (x[i + 512] << 1) - bit; + rvec[i + 768] = (x[i + 768] << 1) - bit; + + norm = 0; + v0[0] = 4; + v0[1] = 4; + v0[2] = 4; + v0[3] = 4; + v1[0] = 3; + v1[1] = 3; + v1[2] = 3; + v1[3] = 3; + for (j = 0; j < 4; j++) { + v0[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_Q4) >> 31; + v0[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_3Q4) >> 31; + v0[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_5Q4) >> 31; + v0[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_7Q4) >> 31; + v1[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_Q2) >> 31; + v1[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_Q) >> 31; + v1[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_3Q2) >> 31; + norm += Abs(2 * rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_Q * v0[j]); + } + + norm = (uint32_t)((int32_t)(norm - OQS_RLWE_MSRLN16_PARAMETER_Q) >> 31); // If norm < q then norm = 0xff...ff, else norm = 0 + v0[0] = (norm & (v0[0] ^ v1[0])) ^ v1[0]; + v0[1] = (norm & (v0[1] ^ v1[1])) ^ v1[1]; + v0[2] = (norm & (v0[2] ^ v1[2])) ^ v1[2]; + v0[3] = (norm & (v0[3] ^ v1[3])) ^ v1[3]; + rvec[i] = (v0[0] - v0[3]) & 0x03; + rvec[i + 256] = (v0[1] - v0[3]) & 0x03; + rvec[i + 512] = (v0[2] - v0[3]) & 0x03; + rvec[i + 768] = ((v0[3] << 1) + (1 & ~norm)) & 0x03; + } +#endif + + return CRYPTO_SUCCESS; +} + +static __inline uint32_t LDDecode(int32_t *t) { + // Low-density decoding + unsigned int i, norm = 0; + uint32_t mask1, mask2, value; + int32_t cneg = -8 * OQS_RLWE_MSRLN16_PARAMETER_Q; + + for (i = 0; i < 4; i++) { + mask1 = t[i] >> 31; // If t[i] < 0 then mask2 = 0xff...ff, else mask2 = 0 + mask2 = (4 * OQS_RLWE_MSRLN16_PARAMETER_Q - (int32_t) Abs(t[i])) >> 31; // If 4*PARAMETER_Q > Abs(t[i]) then mask2 = 0, else mask2 = 0xff...ff + + value = ((mask1 & (8 * OQS_RLWE_MSRLN16_PARAMETER_Q ^ cneg)) ^ cneg); + norm += Abs(t[i] + (mask2 & value)); + } + + return ((8 * OQS_RLWE_MSRLN16_PARAMETER_Q - norm) >> 31) ^ 1; // If norm < PARAMETER_Q then return 1, else return 0 +} + +void oqs_rlwe_msrln16_Rec(const uint32_t *x, const uint32_t *rvec, unsigned char *key) { +// Reconciliation + +#if defined(RLWE_ASM_AVX2) + oqs_rlwe_msrln16_rec_asm(x, rvec, key); +#else + unsigned int i; + uint32_t t[4]; + + for (i = 0; i < 32; i++) { + key[i] = 0; + } + for (i = 0; i < 256; i++) { + t[0] = 8 * x[i] - (2 * rvec[i] + rvec[i + 768]) * OQS_RLWE_MSRLN16_PARAMETER_Q; + t[1] = 8 * x[i + 256] - (2 * rvec[i + 256] + rvec[i + 768]) * OQS_RLWE_MSRLN16_PARAMETER_Q; + t[2] = 8 * x[i + 512] - (2 * rvec[i + 512] + rvec[i + 768]) * OQS_RLWE_MSRLN16_PARAMETER_Q; + t[3] = 8 * x[i + 768] - (rvec[i + 768]) * OQS_RLWE_MSRLN16_PARAMETER_Q; + + key[i >> 3] |= (unsigned char) LDDecode((int32_t *) t) << (i & 0x07); + } +#endif +} + +CRYPTO_STATUS oqs_rlwe_msrln16_get_error(int32_t *e, OQS_RAND *rand) { + // Error sampling + unsigned char stream[3 * OQS_RLWE_MSRLN16_PARAMETER_N]; + uint32_t *pstream = (uint32_t *) &stream; + uint32_t acc1, acc2, temp; + uint8_t *pacc1 = (uint8_t *) &acc1, *pacc2 = (uint8_t *) &acc2; + unsigned int i, j; + + // OQS integration note: call to aux API replaced with direct call to OQS_RAND + rand->rand_n(rand, stream, 3 * OQS_RLWE_MSRLN16_PARAMETER_N); + +#if defined(RLWE_ASM_AVX2) + oqs_rlwe_msrln16_error_sampling_asm(stream, e); +#else + for (i = 0; i < OQS_RLWE_MSRLN16_PARAMETER_N / 4; i++) { + acc1 = 0; + acc2 = 0; + for (j = 0; j < 8; j++) { + acc1 += (pstream[i] >> j) & 0x01010101; + acc2 += (pstream[i + OQS_RLWE_MSRLN16_PARAMETER_N / 4] >> j) & 0x01010101; + } + for (j = 0; j < 4; j++) { + temp = pstream[i + 2 * OQS_RLWE_MSRLN16_PARAMETER_N / 4] >> j; + acc1 += temp & 0x01010101; + acc2 += (temp >> 4) & 0x01010101; + } + e[2 * i] = pacc1[0] - pacc1[1]; + e[2 * i + 1] = pacc1[2] - pacc1[3]; + e[2 * i + OQS_RLWE_MSRLN16_PARAMETER_N / 2] = pacc2[0] - pacc2[1]; + e[2 * i + OQS_RLWE_MSRLN16_PARAMETER_N / 2 + 1] = pacc2[2] - pacc2[3]; + } +#endif + + return CRYPTO_SUCCESS; +} + +CRYPTO_STATUS oqs_rlwe_msrln16_generate_a(uint32_t *a, const unsigned char *seed) { + // Generation of parameter a + // OQS integration note: call to aux API replaced with direct call to shake128 + unsigned int pos = 0, ctr = 0; + uint16_t val; + unsigned int nblocks = 16; + uint8_t buf[OQS_SHA3_SHAKE128_RATE * 16]; // was * nblocks, but VS doesn't like this buf init + uint64_t state[OQS_SHA3_STATESIZE]; + OQS_SHA3_shake128_absorb(state, seed, OQS_RLWE_MSRLN16_SEED_BYTES); + OQS_SHA3_shake128_squeezeblocks((unsigned char *) buf, nblocks, state); + + while (ctr < OQS_RLWE_MSRLN16_PARAMETER_N) { + val = (buf[pos] | ((uint16_t) buf[pos + 1] << 8)) & 0x3fff; + if (val < OQS_RLWE_MSRLN16_PARAMETER_Q) { + a[ctr++] = val; + } + pos += 2; + if (pos > OQS_SHA3_SHAKE128_RATE * nblocks - 2) { + nblocks = 1; + OQS_SHA3_shake128_squeezeblocks((unsigned char *) buf, nblocks, state); + pos = 0; + } + } + + return CRYPTO_SUCCESS; +} + +CRYPTO_STATUS oqs_rlwe_msrln16_KeyGeneration_A(int32_t *SecretKeyA, unsigned char *PublicKeyA, OQS_RAND *rand) { + // Alice's key generation + // It produces a private key SecretKeyA and computes the public key PublicKeyA. + // Outputs: the private key SecretKeyA that consists of a 32-bit signed 1024-element array (4096 bytes in total) + // the public key PublicKeyA that occupies 1824 bytes + // pLatticeCrypto must be set up in advance using LatticeCrypto_initialize(). + uint32_t a[OQS_RLWE_MSRLN16_PARAMETER_N]; + int32_t e[OQS_RLWE_MSRLN16_PARAMETER_N]; + unsigned char seed[OQS_RLWE_MSRLN16_SEED_BYTES]; + CRYPTO_STATUS Status = CRYPTO_ERROR_UNKNOWN; + + rand->rand_n(rand, seed, OQS_RLWE_MSRLN16_SEED_BYTES); + Status = oqs_rlwe_msrln16_generate_a(a, seed); + if (Status != CRYPTO_SUCCESS) { + goto cleanup; + } + + Status = oqs_rlwe_msrln16_get_error(SecretKeyA, rand); + if (Status != CRYPTO_SUCCESS) { + goto cleanup; + } + Status = oqs_rlwe_msrln16_get_error(e, rand); + if (Status != CRYPTO_SUCCESS) { + goto cleanup; + } + oqs_rlwe_msrln16_NTT_CT_std2rev_12289(SecretKeyA, psi_rev_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N); + oqs_rlwe_msrln16_NTT_CT_std2rev_12289(e, psi_rev_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N); + oqs_rlwe_msrln16_smul(e, 3, OQS_RLWE_MSRLN16_PARAMETER_N); + + oqs_rlwe_msrln16_pmuladd((int32_t *) a, SecretKeyA, e, (int32_t *) a, OQS_RLWE_MSRLN16_PARAMETER_N); + oqs_rlwe_msrln16_correction((int32_t *) a, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_N); + oqs_rlwe_msrln16_encode_A(a, seed, PublicKeyA); + +cleanup: + oqs_rlwe_msrln16_clear_words((void *) e, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N)); + + return Status; +} + +CRYPTO_STATUS oqs_rlwe_msrln16_SecretAgreement_B(unsigned char *PublicKeyA, unsigned char *SharedSecretB, unsigned char *PublicKeyB, OQS_RAND *rand) { + // Bob's key generation and shared secret computation + // It produces a private key and computes the public key PublicKeyB. In combination with Alice's public key PublicKeyA, it computes + // the shared secret SharedSecretB. + // Input: Alice's public key PublicKeyA that consists of 1824 bytes + // Outputs: the public key PublicKeyB that occupies 2048 bytes. + // the 256-bit shared secret SharedSecretB. + // pLatticeCrypto must be set up in advance using LatticeCrypto_initialize(). + uint32_t pk_A[OQS_RLWE_MSRLN16_PARAMETER_N], a[OQS_RLWE_MSRLN16_PARAMETER_N], v[OQS_RLWE_MSRLN16_PARAMETER_N], r[OQS_RLWE_MSRLN16_PARAMETER_N]; + int32_t sk_B[OQS_RLWE_MSRLN16_PARAMETER_N], e[OQS_RLWE_MSRLN16_PARAMETER_N]; + unsigned char seed[OQS_RLWE_MSRLN16_SEED_BYTES]; + CRYPTO_STATUS Status = CRYPTO_ERROR_UNKNOWN; + + oqs_rlwe_msrln16_decode_A(PublicKeyA, pk_A, seed); + Status = oqs_rlwe_msrln16_generate_a(a, seed); + if (Status != CRYPTO_SUCCESS) { + goto cleanup; + } + + Status = oqs_rlwe_msrln16_get_error(sk_B, rand); + if (Status != CRYPTO_SUCCESS) { + goto cleanup; + } + Status = oqs_rlwe_msrln16_get_error(e, rand); + if (Status != CRYPTO_SUCCESS) { + goto cleanup; + } + oqs_rlwe_msrln16_NTT_CT_std2rev_12289(sk_B, psi_rev_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N); + oqs_rlwe_msrln16_NTT_CT_std2rev_12289(e, psi_rev_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N); + oqs_rlwe_msrln16_smul(e, 3, OQS_RLWE_MSRLN16_PARAMETER_N); + + oqs_rlwe_msrln16_pmuladd((int32_t *) a, sk_B, e, (int32_t *) a, OQS_RLWE_MSRLN16_PARAMETER_N); + oqs_rlwe_msrln16_correction((int32_t *) a, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_N); + + Status = oqs_rlwe_msrln16_get_error(e, rand); + if (Status != CRYPTO_SUCCESS) { + goto cleanup; + } + oqs_rlwe_msrln16_NTT_CT_std2rev_12289(e, psi_rev_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N); + oqs_rlwe_msrln16_smul(e, 81, OQS_RLWE_MSRLN16_PARAMETER_N); + + oqs_rlwe_msrln16_pmuladd((int32_t *) pk_A, sk_B, e, (int32_t *) v, OQS_RLWE_MSRLN16_PARAMETER_N); + oqs_rlwe_msrln16_INTT_GS_rev2std_12289((int32_t *) v, omegainv_rev_ntt1024_12289, omegainv10N_rev_ntt1024_12289, Ninv11_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N); + oqs_rlwe_msrln16_two_reduce12289((int32_t *) v, OQS_RLWE_MSRLN16_PARAMETER_N); +#if !defined(RLWE_ASM_AVX2) + oqs_rlwe_msrln16_correction((int32_t *) v, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_N); +#endif + + Status = oqs_rlwe_msrln16_HelpRec(v, r, rand); + if (Status != CRYPTO_SUCCESS) { + goto cleanup; + } + oqs_rlwe_msrln16_Rec(v, r, SharedSecretB); + oqs_rlwe_msrln16_encode_B(a, r, PublicKeyB); + +cleanup: + oqs_rlwe_msrln16_clear_words((void *) sk_B, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N)); + oqs_rlwe_msrln16_clear_words((void *) e, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N)); + oqs_rlwe_msrln16_clear_words((void *) a, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N)); + oqs_rlwe_msrln16_clear_words((void *) v, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N)); + oqs_rlwe_msrln16_clear_words((void *) r, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N)); + + return Status; +} + +CRYPTO_STATUS oqs_rlwe_msrln16_SecretAgreement_A(unsigned char *PublicKeyB, int32_t *SecretKeyA, unsigned char *SharedSecretA) { + // Alice's shared secret computation + // It computes the shared secret SharedSecretA using Bob's public key PublicKeyB and Alice's private key SecretKeyA. + // Inputs: Bob's public key PublicKeyB that consists of 2048 bytes + // the private key SecretKeyA that consists of a 32-bit signed 1024-element array (4096 bytes in total) + // Output: the 256-bit shared secret SharedSecretA. + uint32_t u[OQS_RLWE_MSRLN16_PARAMETER_N], r[OQS_RLWE_MSRLN16_PARAMETER_N]; + CRYPTO_STATUS Status = CRYPTO_SUCCESS; + + oqs_rlwe_msrln16_decode_B(PublicKeyB, u, r); + + oqs_rlwe_msrln16_pmul(SecretKeyA, (int32_t *) u, (int32_t *) u, OQS_RLWE_MSRLN16_PARAMETER_N); + oqs_rlwe_msrln16_INTT_GS_rev2std_12289((int32_t *) u, omegainv_rev_ntt1024_12289, omegainv10N_rev_ntt1024_12289, Ninv11_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N); + oqs_rlwe_msrln16_two_reduce12289((int32_t *) u, OQS_RLWE_MSRLN16_PARAMETER_N); +#if !defined(RLWE_ASM_AVX2) + oqs_rlwe_msrln16_correction((int32_t *) u, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_N); +#endif + + oqs_rlwe_msrln16_Rec(u, r, SharedSecretA); + + // Cleanup + oqs_rlwe_msrln16_clear_words((void *) u, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N)); + oqs_rlwe_msrln16_clear_words((void *) r, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N)); + + return Status; +} diff --git a/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto_priv.h b/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto_priv.h new file mode 100644 index 0000000000000000000000000000000000000000..5e8cd1db27abc534fb2a53aa1922233c56e52047 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto_priv.h @@ -0,0 +1,117 @@ +/**************************************************************************************** +* LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: internal header file +* +*****************************************************************************************/ + +#ifndef __LatticeCrypto_priv_H__ +#define __LatticeCrypto_priv_H__ + +// For C++ +#ifdef __cplusplus +extern "C" { +#endif + +#include "LatticeCrypto.h" +#include <oqs/rand.h> + +// Basic constants +#define OQS_RLWE_MSRLN16_PARAMETER_N 1024 +#define OQS_RLWE_MSRLN16_PARAMETER_Q 12289 +#define OQS_RLWE_MSRLN16_SEED_BYTES 256 / 8 +#define OQS_RLWE_MSRLN16_PARAMETER_Q4 3073 +#define OQS_RLWE_MSRLN16_PARAMETER_3Q4 9217 +#define OQS_RLWE_MSRLN16_PARAMETER_5Q4 15362 +#define OQS_RLWE_MSRLN16_PARAMETER_7Q4 21506 +#define OQS_RLWE_MSRLN16_PARAMETER_Q2 6145 +#define OQS_RLWE_MSRLN16_PARAMETER_3Q2 18434 + +// Macro definitions + +#define OQS_RLWE_MSRLN16_NBITS_TO_NWORDS(nbits) (((nbits) + (sizeof(digit_t) * 8) - 1) / (sizeof(digit_t) * 8)) // Conversion macro from number of bits to number of computer words +#define OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(nbytes) (((nbytes) + sizeof(digit_t) - 1) / sizeof(digit_t)) // Conversion macro from number of bytes to number of computer words + +// Macro to avoid compiler warnings when detecting unreferenced parameters +#define OQS_RLWE_MSRLN16_UNREFERENCED_PARAMETER(PAR) (PAR) + +/******************** Function prototypes *******************/ +/******************* Polynomial functions *******************/ + +// Forward NTT +void oqs_rlwe_msrln16_NTT_CT_std2rev_12289(int32_t *a, const int32_t *psi_rev, unsigned int N); +void oqs_rlwe_msrln16_NTT_CT_std2rev_12289_asm(int32_t *a, const int32_t *psi_rev, unsigned int N); + +// Inverse NTT +void oqs_rlwe_msrln16_INTT_GS_rev2std_12289(int32_t *a, const int32_t *omegainv_rev, const int32_t omegainv1N_rev, const int32_t Ninv, unsigned int N); +void oqs_rlwe_msrln16_INTT_GS_rev2std_12289_asm(int32_t *a, const int32_t *omegainv_rev, const int32_t omegainv1N_rev, const int32_t Ninv, unsigned int N); + +// Reduction modulo q +int32_t oqs_rlwe_msrln16_reduce12289(int64_t a); + +// Two merged reductions modulo q +int32_t oqs_rlwe_msrln16_reduce12289_2x(int64_t a); + +// Two consecutive reductions modulo q +void oqs_rlwe_msrln16_two_reduce12289(int32_t *a, unsigned int N); +void oqs_rlwe_msrln16_two_reduce12289_asm(int32_t *a, unsigned int N); + +// Correction modulo q +void oqs_rlwe_msrln16_correction(int32_t *a, int32_t p, unsigned int N); + +// Component-wise multiplication +void oqs_rlwe_msrln16_pmul(int32_t *a, int32_t *b, int32_t *c, unsigned int N); +void oqs_rlwe_msrln16_pmul_asm(int32_t *a, int32_t *b, int32_t *c, unsigned int N); + +// Component-wise multiplication and addition +void oqs_rlwe_msrln16_pmuladd(int32_t *a, int32_t *b, int32_t *c, int32_t *d, unsigned int N); +void oqs_rlwe_msrln16_pmuladd_asm(int32_t *a, int32_t *b, int32_t *c, int32_t *d, unsigned int N); + +// Component-wise multiplication with scalar +void oqs_rlwe_msrln16_smul(int32_t *a, int32_t scalar, unsigned int N); + +/******************* Key exchange functions *******************/ + +// Alice's message encoding +void oqs_rlwe_msrln16_encode_A(const uint32_t *pk, const unsigned char *seed, unsigned char *m); + +// Alice's message decoding +void oqs_rlwe_msrln16_decode_A(const unsigned char *m, uint32_t *pk, unsigned char *seed); + +// Bob's message encoding +void oqs_rlwe_msrln16_encode_B(const uint32_t *pk, const uint32_t *rvec, unsigned char *m); + +// Bob's message decoding +void oqs_rlwe_msrln16_decode_B(unsigned char *m, uint32_t *pk, uint32_t *rvec); + +// Partial message encoding/decoding (assembly optimized) +void oqs_rlwe_msrln16_encode_asm(const uint32_t *pk, unsigned char *m); +void oqs_rlwe_msrln16_decode_asm(const unsigned char *m, uint32_t *pk); + +// Reconciliation helper +CRYPTO_STATUS oqs_rlwe_msrln16_HelpRec(const uint32_t *x, uint32_t *rvec, OQS_RAND *rand); + +// Partial reconciliation helper (assembly optimized) +void oqs_rlwe_msrln16_helprec_asm(const uint32_t *x, uint32_t *rvec, unsigned char *random_bits); + +// Reconciliation +void oqs_rlwe_msrln16_Rec(const uint32_t *x, const uint32_t *rvec, unsigned char *key); +void oqs_rlwe_msrln16_rec_asm(const uint32_t *x, const uint32_t *rvec, unsigned char *key); + +// Error sampling +CRYPTO_STATUS oqs_rlwe_msrln16_get_error(int32_t *e, OQS_RAND *rand); + +// Partial error sampling (assembly optimized) +void oqs_rlwe_msrln16_error_sampling_asm(unsigned char *stream, int32_t *e); + +// Generation of parameter a +CRYPTO_STATUS oqs_rlwe_msrln16_generate_a(uint32_t *a, const unsigned char *seed); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/crypt/liboqs/kex_rlwe_msrln16/Makefile.am b/crypt/liboqs/kex_rlwe_msrln16/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..0149e305e02d573eabafe3ed48193226cc6ad91c --- /dev/null +++ b/crypt/liboqs/kex_rlwe_msrln16/Makefile.am @@ -0,0 +1,8 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = libmsrln16.la + + +libmsrln16_la_SOURCES = kex_rlwe_msrln16.c LatticeCrypto_kex.c ntt_constants.c +libmsrln16_la_CPPFLAGS = -I../../include +libmsrln16_la_CPPFLAGS += $(AM_CPPFLAGS) + diff --git a/crypt/liboqs/kex_rlwe_msrln16/README.txt b/crypt/liboqs/kex_rlwe_msrln16/README.txt new file mode 100644 index 0000000000000000000000000000000000000000..6ab71dbec2d9129b7a0b52c5644edbac74f80453 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_msrln16/README.txt @@ -0,0 +1,42 @@ + LatticeCrypto v1.0 (C Edition) + ============================== + +LatticeCrypto is a post-quantum secure cryptography library based on the Ring-Learning with Errors (R-LWE) +problem. The version 1.0 of the library implements the instantiation of Peikert's key exchange [1] due to +Alkim, Ducas, Pöppelmann and Schwabe [2], and incorporates novel techniques to provide higher performance. + +The library [3] was developed by Microsoft Research for experimentation purposes. + +*** THE ORIGINAL README HAS BEEN TRIMMED LEAVING ONLY THE INFO RELEVANT FOR THE OQS INTEGRATION *** + +1. CONTENTS: + -------- + +/ - Library C and header files +AMD64/ - Optimized implementation of the NTT for x64 platforms +generic/ - Implementation of the NTT in portable C +README.txt - This readme file + + +2. MAIN FEATURES: + ------------- + +- Support arithmetic functions for computations in power-of-2 cyclotomic rings that are the basis for + implementing Ring-LWE-based cryptographic algorithms. +- Support key exchange providing at least 128 bits of quantum and classical security. +- All functions evaluating secret data have regular, constant-time execution, which provides protection + against timing and cache attacks. +- Basic implementation of the underlying arithmetic functions using portable C to enable support on + a wide range of platforms including x64, x86 and ARM. +- Optional high-performance implementation of the underlying arithmetic functions for x64 platforms on + Linux using assembly and AVX2 vector instructions. + + +REFERENCES +---------- + +[1] C. Peikert, "Lattice cryptography for the internet", in Post-Quantum Cryptography - 6th International + Workshop (PQCrypto 2014), LNCS 8772, pp. 197-219. Springer, 2014. +[2] E. Alkim, L. Ducas, T. Pöppelmann and P. Schwabe, "Post-quantum key exchange - a new hope", IACR Cryp- + tology ePrint Archive, Report 2015/1092, 2015. +[3] https://www.microsoft.com/en-us/research/project/lattice-cryptography-library/ diff --git a/crypt/liboqs/kex_rlwe_msrln16/generic/ntt.c b/crypt/liboqs/kex_rlwe_msrln16/generic/ntt.c new file mode 100644 index 0000000000000000000000000000000000000000..bd7c3f4b63899a5ebd3a880689f2af7238e69cff --- /dev/null +++ b/crypt/liboqs/kex_rlwe_msrln16/generic/ntt.c @@ -0,0 +1,164 @@ +/**************************************************************************************** +* LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: NTT functions and other polynomial operations +* +*****************************************************************************************/ + +#include "../LatticeCrypto_priv.h" + +const uint32_t mask12 = ((uint64_t) 1 << 12) - 1; + +int32_t oqs_rlwe_msrln16_reduce12289(int64_t a) { // Reduction modulo q + int32_t c0, c1; + + c0 = (int32_t)(a & mask12); + c1 = (int32_t)(a >> 12); + + return (3 * c0 - c1); +} + +int32_t oqs_rlwe_msrln16_reduce12289_2x(int64_t a) { // Two merged reductions modulo q + int32_t c0, c1, c2; + + c0 = (int32_t)(a & mask12); + c1 = (int32_t)((a >> 12) & mask12); + c2 = (int32_t)(a >> 24); + + return (9 * c0 - 3 * c1 + c2); +} + +void oqs_rlwe_msrln16_NTT_CT_std2rev_12289(int32_t *a, const int32_t *psi_rev, unsigned int N) { // Forward NTT + unsigned int m, i, j, j1, j2, k = N; + int32_t S, U, V; + + for (m = 1; m < 128; m = 2 * m) { + k = k >> 1; + for (i = 0; i < m; i++) { + j1 = 2 * i * k; + j2 = j1 + k - 1; + S = psi_rev[m + i]; + for (j = j1; j <= j2; j++) { + U = a[j]; + V = oqs_rlwe_msrln16_reduce12289((int64_t) a[j + k] * S); + a[j] = U + V; + a[j + k] = U - V; + } + } + } + + k = 4; + for (i = 0; i < 128; i++) { + j1 = 8 * i; + j2 = j1 + 3; + S = psi_rev[i + 128]; + for (j = j1; j <= j2; j++) { + U = oqs_rlwe_msrln16_reduce12289((int64_t) a[j]); + V = oqs_rlwe_msrln16_reduce12289_2x((int64_t) a[j + 4] * S); + a[j] = U + V; + a[j + 4] = U - V; + } + } + + for (m = 256; m < N; m = 2 * m) { + k = k >> 1; + for (i = 0; i < m; i++) { + j1 = 2 * i * k; + j2 = j1 + k - 1; + S = psi_rev[m + i]; + for (j = j1; j <= j2; j++) { + U = a[j]; + V = oqs_rlwe_msrln16_reduce12289((int64_t) a[j + k] * S); + a[j] = U + V; + a[j + k] = U - V; + } + } + } + return; +} + +void oqs_rlwe_msrln16_INTT_GS_rev2std_12289(int32_t *a, const int32_t *omegainv_rev, const int32_t omegainv1N_rev, const int32_t Ninv, unsigned int N) { // Inverse NTT + unsigned int m, h, i, j, j1, j2, k = 1; + int32_t S, U, V; + int64_t temp; + + for (m = N; m > 2; m >>= 1) { + j1 = 0; + h = m >> 1; + for (i = 0; i < h; i++) { + j2 = j1 + k - 1; + S = omegainv_rev[h + i]; + for (j = j1; j <= j2; j++) { + U = a[j]; + V = a[j + k]; + a[j] = U + V; + temp = (int64_t)(U - V) * S; + if (m == 32) { + a[j] = oqs_rlwe_msrln16_reduce12289((int64_t) a[j]); + a[j + k] = oqs_rlwe_msrln16_reduce12289_2x(temp); + } else { + a[j + k] = oqs_rlwe_msrln16_reduce12289(temp); + } + } + j1 = j1 + 2 * k; + } + k = 2 * k; + } + for (j = 0; j < k; j++) { + U = a[j]; + V = a[j + k]; + a[j] = oqs_rlwe_msrln16_reduce12289((int64_t)(U + V) * Ninv); + a[j + k] = oqs_rlwe_msrln16_reduce12289((int64_t)(U - V) * omegainv1N_rev); + } + return; +} + +void oqs_rlwe_msrln16_two_reduce12289(int32_t *a, unsigned int N) { // Two consecutive reductions modulo q + unsigned int i; + + for (i = 0; i < N; i++) { + a[i] = oqs_rlwe_msrln16_reduce12289((int64_t) a[i]); + a[i] = oqs_rlwe_msrln16_reduce12289((int64_t) a[i]); + } +} + +void oqs_rlwe_msrln16_pmul(int32_t *a, int32_t *b, int32_t *c, unsigned int N) { // Component-wise multiplication + unsigned int i; + + for (i = 0; i < N; i++) { + c[i] = oqs_rlwe_msrln16_reduce12289((int64_t) a[i] * b[i]); + c[i] = oqs_rlwe_msrln16_reduce12289((int64_t) c[i]); + } +} + +void oqs_rlwe_msrln16_pmuladd(int32_t *a, int32_t *b, int32_t *c, int32_t *d, unsigned int N) { // Component-wise multiplication and addition + unsigned int i; + + for (i = 0; i < N; i++) { + d[i] = oqs_rlwe_msrln16_reduce12289((int64_t) a[i] * b[i] + c[i]); + d[i] = oqs_rlwe_msrln16_reduce12289((int64_t) d[i]); + } +} + +void oqs_rlwe_msrln16_smul(int32_t *a, int32_t scalar, unsigned int N) { // Component-wise multiplication with scalar + unsigned int i; + + for (i = 0; i < N; i++) { + a[i] = a[i] * scalar; + } +} + +void oqs_rlwe_msrln16_correction(int32_t *a, int32_t p, unsigned int N) { // Correction modulo q + unsigned int i; + int32_t mask; + + for (i = 0; i < N; i++) { + mask = a[i] >> (4 * sizeof(int32_t) - 1); + a[i] += (p & mask) - p; + mask = a[i] >> (4 * sizeof(int32_t) - 1); + a[i] += (p & mask); + } +} diff --git a/crypt/liboqs/kex_rlwe_msrln16/kex_rlwe_msrln16.c b/crypt/liboqs/kex_rlwe_msrln16/kex_rlwe_msrln16.c new file mode 100644 index 0000000000000000000000000000000000000000..8bcca49055a0ac47d8bfaa73d046f4ed8d494221 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_msrln16/kex_rlwe_msrln16.c @@ -0,0 +1,174 @@ +#if defined(WINDOWS) +#define UNUSED +#else +#define UNUSED __attribute__((unused)) +#endif + +#include <stdlib.h> +#include <string.h> +#if !defined(WINDOWS) +#include <strings.h> +#include <unistd.h> +#endif + +#include <oqs/kex.h> +#include <oqs/rand.h> + +#include "LatticeCrypto.h" +#include "LatticeCrypto_priv.h" +#include "kex_rlwe_msrln16.h" + +#if defined(WINDOWS) +#define strdup _strdup // for strdup deprecation warning +#endif + +OQS_KEX *OQS_KEX_rlwe_msrln16_new(OQS_RAND *rand) { + + OQS_KEX *k = malloc(sizeof(OQS_KEX)); + if (k == NULL) { + return NULL; + } + + k->ctx = NULL; + k->method_name = strdup("RLWE MSR LN16"); + k->estimated_classical_security = 128; + k->estimated_quantum_security = 128; + k->seed = NULL; + k->seed_len = 0; + k->named_parameters = NULL; + k->rand = rand; + k->params = NULL; + k->alice_0 = &OQS_KEX_rlwe_msrln16_alice_0; + k->bob = &OQS_KEX_rlwe_msrln16_bob; + k->alice_1 = &OQS_KEX_rlwe_msrln16_alice_1; + k->alice_priv_free = &OQS_KEX_rlwe_msrln16_alice_priv_free; + k->free = &OQS_KEX_rlwe_msrln16_free; + + return k; +} + +int OQS_KEX_rlwe_msrln16_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) { + + int ret; + + *alice_priv = NULL; + /* alice_msg is alice's public key */ + *alice_msg = NULL; + + *alice_msg = malloc(OQS_RLWE_MSRLN16_PKA_BYTES); + if (*alice_msg == NULL) { + goto err; + } + *alice_priv = malloc(1024 * sizeof(uint32_t)); + if (*alice_priv == NULL) { + goto err; + } + + if (oqs_rlwe_msrln16_KeyGeneration_A((int32_t *) *alice_priv, (unsigned char *) *alice_msg, k->rand) != CRYPTO_SUCCESS) { + goto err; + } + *alice_msg_len = OQS_RLWE_MSRLN16_PKA_BYTES; + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*alice_msg); + *alice_msg = NULL; + free(*alice_priv); + *alice_priv = NULL; + +cleanup: + return ret; +} + +int OQS_KEX_rlwe_msrln16_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + + *bob_msg = NULL; + *key = NULL; + + if (alice_msg_len != OQS_RLWE_MSRLN16_PKA_BYTES) { + goto err; + } + *bob_msg = malloc(OQS_RLWE_MSRLN16_PKB_BYTES); + if (*bob_msg == NULL) { + goto err; + } + *key = malloc(OQS_RLWE_MSRLN16_SHAREDKEY_BYTES); + if (*key == NULL) { + goto err; + } + + if (oqs_rlwe_msrln16_SecretAgreement_B((unsigned char *) alice_msg, (unsigned char *) *key, (unsigned char *) *bob_msg, k->rand) != CRYPTO_SUCCESS) { + goto err; + } + + *key_len = OQS_RLWE_MSRLN16_SHAREDKEY_BYTES; + *bob_msg_len = OQS_RLWE_MSRLN16_PKB_BYTES; + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*bob_msg); + *bob_msg = NULL; + free(*key); + *key = NULL; + +cleanup: + + return ret; +} + +int OQS_KEX_rlwe_msrln16_alice_1(UNUSED OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + + *key = NULL; + + if (bob_msg_len != OQS_RLWE_MSRLN16_PKB_BYTES) { + goto err; + } + + *key = malloc(OQS_RLWE_MSRLN16_SHAREDKEY_BYTES); + if (*key == NULL) { + goto err; + } + + if (oqs_rlwe_msrln16_SecretAgreement_A((unsigned char *) bob_msg, (int32_t *) alice_priv, (unsigned char *) *key) != CRYPTO_SUCCESS) { + goto err; + } + + *key_len = OQS_RLWE_MSRLN16_SHAREDKEY_BYTES; + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*key); + *key = NULL; + +cleanup: + + return ret; +} + +void OQS_KEX_rlwe_msrln16_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) { + if (alice_priv) { + free(alice_priv); + } +} + +void OQS_KEX_rlwe_msrln16_free(OQS_KEX *k) { + if (!k) { + return; + } + free(k->method_name); + k->method_name = NULL; + free(k); +} diff --git a/crypt/liboqs/kex_rlwe_msrln16/kex_rlwe_msrln16.h b/crypt/liboqs/kex_rlwe_msrln16/kex_rlwe_msrln16.h new file mode 100644 index 0000000000000000000000000000000000000000..ad1ee4f5202998afed40e13de1783d237e7767ab --- /dev/null +++ b/crypt/liboqs/kex_rlwe_msrln16/kex_rlwe_msrln16.h @@ -0,0 +1,24 @@ +/** + * \file kex_rlwe_msrln16.h + * \brief Header for ring-LWE key exchange protocol from the Microsoft LatticeCrypto library + */ + +#ifndef __OQS_KEX_RLWE_MSRLN16_H +#define __OQS_KEX_RLWE_MSRLN16_H + +#include <stddef.h> +#include <stdint.h> + +#include <oqs/kex.h> +#include <oqs/rand.h> + +OQS_KEX *OQS_KEX_rlwe_msrln16_new(OQS_RAND *rand); + +int OQS_KEX_rlwe_msrln16_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len); +int OQS_KEX_rlwe_msrln16_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len); +int OQS_KEX_rlwe_msrln16_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len); + +void OQS_KEX_rlwe_msrln16_alice_priv_free(OQS_KEX *k, void *alice_priv); +void OQS_KEX_rlwe_msrln16_free(OQS_KEX *k); + +#endif diff --git a/crypt/liboqs/kex_rlwe_msrln16/ntt_constants.c b/crypt/liboqs/kex_rlwe_msrln16/ntt_constants.c new file mode 100644 index 0000000000000000000000000000000000000000..828324ac67e9b5bb55bc4dc0a425674924a41c2a --- /dev/null +++ b/crypt/liboqs/kex_rlwe_msrln16/ntt_constants.c @@ -0,0 +1,136 @@ +/**************************************************************************************** +* LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: fixed constants for the Number Theoretic Transform (NTT) +* +*****************************************************************************************/ + +#include "LatticeCrypto_priv.h" + +// N^-1 * prime_scale^-8 +const int32_t Ninv8_ntt1024_12289 = 8350; +// N^-1 * prime_scale^-7 * omegainv_rev_ntt1024_12289[1] +const int32_t omegainv7N_rev_ntt1024_12289 = 795; +// N^-1 * prime_scale^-11 +const int32_t Ninv11_ntt1024_12289 = 2585; +// N^-1 * prime_scale^-10 * omegainv_rev_ntt1024_12289[1] +const int32_t omegainv10N_rev_ntt1024_12289 = 10953; + +// Index-reversed matrices containing powers of psi (psi_rev_nttxxx_yyy) and inverse powers of omega (omegainv_rev_nttxxx_yyy), +// where xxx is parameter N and yyy is the prime q. + +const int32_t psi_rev_ntt1024_12289[1024] = { + 8193, 493, 6845, 9908, 1378, 10377, 7952, 435, 10146, 1065, 404, 7644, 1207, 3248, 11121, 5277, 2437, 3646, 2987, 6022, 9867, 6250, 10102, 9723, 1002, 7278, 4284, 7201, + 875, 3780, 1607, 4976, 8146, 4714, 242, 1537, 3704, 9611, 5019, 545, 5084, 10657, 4885, 11272, 3066, 12262, 3763, 10849, 2912, 5698, 11935, 4861, 7277, 9808, 11244, 2859, + 7188, 1067, 2401, 11847, 390, 11516, 8511, 3833, 2780, 7094, 4895, 1484, 2305, 5042, 8236, 2645, 7875, 9442, 2174, 7917, 1689, 3364, 4057, 3271, 10863, 4654, 1777, 10626, + 3636, 7351, 9585, 6998, 160, 3149, 4437, 12286, 10123, 3915, 7370, 12176, 4048, 2249, 2884, 1153, 9103, 6882, 2126, 10659, 3510, 5332, 2865, 9919, 9320, 8311, 9603, 9042, + 3016, 12046, 9289, 11618, 7098, 3136, 9890, 3400, 2178, 1544, 5559, 420, 8304, 4905, 476, 3531, 9326, 4896, 9923, 3051, 3091, 81, 1000, 4320, 1177, 8034, 9521, 10654, 11563, + 7678, 10436, 12149, 3014, 9088, 5086, 1326, 11119, 2319, 11334, 790, 2747, 7443, 3135, 3712, 1062, 9995, 7484, 8736, 9283, 2744, 11726, 2975, 9664, 949, 7468, 9650, 7266, + 5828, 6561, 7698, 3328, 6512, 1351, 7311, 8155, 5736, 722, 10984, 4043, 7143, 10810, 1, 8668, 2545, 3504, 8747, 11077, 1646, 9094, 5860, 1759, 8582, 3694, 7110, 8907, 11934, + 8058, 9741, 9558, 3932, 5911, 4890, 3637, 8830, 5542, 12144, 5755, 7657, 7901, 11029, 11955, 9863, 10861, 1696, 3284, 2881, 7197, 2089, 9000, 2013, 729, 9048, 11809, 2842, + 11267, 9, 6498, 544, 2468, 339, 1381, 2525, 8112, 3584, 6958, 4989, 10616, 8011, 5374, 9452, 12159, 4354, 9893, 7837, 3296, 8340, 7222, 2197, 118, 2476, 5767, 827, 8541, + 11336, 3434, 3529, 2908, 12071, 2361, 1843, 3030, 8174, 6147, 9842, 8326, 576, 10335, 10238, 10484, 9407, 11836, 5908, 418, 3772, 7515, 5429, 7552, 10996, 12133, 2767, 3969, + 8298, 6413, 10008, 2031, 5333, 10800, 9789, 10706, 5942, 1263, 49, 5915, 10806, 11939, 10777, 1815, 5383, 3202, 4493, 6920, 10232, 1975, 8532, 2925, 347, 4754, 1858, 11863, + 8974, 9551, 5868, 9634, 5735, 11566, 12115, 10596, 3009, 6190, 11994, 6523, 652, 3762, 9370, 4016, 4077, 8561, 4049, 5990, 11130, 11143, 948, 325, 1404, 6992, 6119, 8333, + 10929, 1200, 5184, 2555, 6122, 1594, 10327, 7183, 5961, 2692, 12121, 4298, 3329, 5919, 4433, 8455, 7032, 1747, 3123, 3054, 6803, 5782, 10723, 9341, 2503, 683, 2459, 3656, + 64, 4240, 3570, 835, 6065, 4046, 11580, 10970, 3150, 10331, 4322, 2078, 1112, 4079, 11231, 441, 922, 1050, 4536, 6844, 8429, 2683, 11099, 3818, 6171, 8500, 12142, 6833, 4449, + 4749, 6752, 7500, 7822, 8214, 6974, 7965, 7373, 2169, 522, 5079, 3262, 10316, 6715, 1278, 9945, 3514, 11248, 11271, 5925, 468, 3988, 382, 11973, 5339, 6843, 6196, 8579, 2033, + 8291, 1922, 3879, 11035, 973, 6854, 10930, 5206, 6760, 3199, 56, 3565, 654, 1702, 10302, 5862, 6153, 5415, 8646, 11889, 10561, 7341, 6152, 7232, 4698, 8844, 4780, 10240, 4912, + 1321, 12097, 7048, 2920, 3127, 4169, 11502, 3482, 11279, 5468, 5874, 11612, 6055, 8953, 52, 3174, 10966, 9523, 151, 2127, 3957, 2839, 9784, 6383, 1579, 431, 7507, 5886, 3029, + 6695, 4213, 504, 11684, 2302, 8689, 9026, 4624, 6212, 11868, 4080, 6221, 8687, 1003, 8757, 241, 58, 5009, 10333, 885, 6281, 3438, 9445, 11314, 8077, 6608, 3477, 142, 1105, + 8841, 343, 4538, 1908, 1208, 4727, 7078, 10423, 10125, 6873, 11573, 10179, 416, 814, 1705, 2450, 8700, 717, 9307, 1373, 8186, 2429, 10568, 10753, 7228, 11071, 438, 8774, 5993, + 3278, 4209, 6877, 3449, 1136, 3708, 3238, 2926, 1826, 4489, 3171, 8024, 8611, 1928, 464, 3205, 8930, 7080, 1092, 10900, 10221, 11943, 4404, 9126, 4032, 7449, 6127, 8067, 10763, + 125, 540, 8921, 8062, 612, 8051, 12229, 9572, 9089, 10754, 10029, 68, 6453, 7723, 4781, 4924, 1014, 448, 3942, 5232, 1327, 8682, 3744, 7326, 3056, 9761, 5845, 5588, 412, 7187, + 3975, 4883, 3087, 6454, 2257, 7784, 5676, 1417, 8400, 11710, 5596, 5987, 9175, 2769, 5966, 212, 6555, 11113, 5508, 11014, 1125, 4860, 10844, 1131, 4267, 6636, 2275, 9828, 5063, + 4176, 3765, 1518, 8794, 4564, 10224, 5826, 3534, 3961, 4145, 10533, 506, 11034, 6505, 10897, 2674, 10077, 3338, 9013, 3511, 6811, 11111, 2776, 1165, 2575, 8881, 10347, 377, + 4578, 11914, 10669, 10104, 392, 10453, 425, 9489, 193, 2231, 6197, 1038, 11366, 6204, 8122, 2894, 3654, 10975, 10545, 6599, 2455, 11951, 3947, 20, 5002, 5163, 4608, 8946, 8170, + 10138, 1522, 8665, 10397, 3344, 5598, 10964, 6565, 11260, 1945, 11041, 9847, 7174, 4939, 2148, 6330, 3959, 5797, 4913, 3528, 8054, 3825, 8914, 9998, 4335, 8896, 9342, 3982, + 6680, 11653, 7790, 6617, 1737, 622, 10485, 10886, 6195, 7100, 1687, 406, 12143, 5268, 9389, 12050, 994, 7735, 5464, 7383, 4670, 512, 364, 9929, 3028, 5216, 5518, 1226, 7550, + 8038, 7043, 7814, 11053, 3017, 3121, 7584, 2600, 11232, 6780, 12085, 5219, 1409, 9600, 4605, 8151, 12109, 463, 8882, 8308, 10821, 9247, 10945, 9806, 2054, 6203, 6643, 3120, + 6105, 8348, 8536, 6919, 8753, 11007, 8717, 9457, 2021, 9060, 4730, 3929, 10583, 3723, 845, 1936, 7, 5054, 3154, 3285, 4360, 3805, 11522, 2213, 4153, 12239, 12073, 5526, 769, + 4099, 3944, 5604, 5530, 11024, 9282, 2171, 3480, 7434, 8520, 3232, 11996, 9656, 1406, 2945, 5349, 7207, 4590, 11607, 11309, 5202, 844, 7082, 4050, 8016, 9068, 9694, 8452, 7000, + 5662, 567, 2941, 8619, 3808, 4987, 2373, 5135, 63, 7605, 3360, 11839, 10345, 578, 6921, 7628, 510, 5386, 2622, 7806, 5703, 10783, 9224, 11379, 5900, 4719, 11538, 3502, 5789, + 10631, 5618, 826, 5043, 3090, 10891, 9951, 7596, 2293, 11872, 6151, 3469, 4443, 8871, 1555, 1802, 5103, 1891, 1223, 2334, 7878, 1590, 881, 365, 1927, 11274, 4510, 9652, 2946, + 6828, 1280, 614, 10918, 12265, 7250, 6742, 9804, 11385, 2276, 11307, 2593, 879, 7899, 8071, 3454, 8531, 3795, 9021, 5776, 1849, 7766, 7988, 457, 8, 530, 9663, 7785, 11511, 3578, + 7592, 10588, 3466, 8972, 9757, 3332, 139, 2046, 2940, 10808, 9332, 874, 2301, 5650, 12119, 150, 648, 8000, 9982, 9416, 2827, 2434, 11498, 6481, 12268, 9754, 11169, 11823, 11259, + 3821, 10608, 2929, 6263, 4649, 6320, 9687, 10388, 502, 5118, 8496, 6226, 10716, 8443, 7624, 6883, 9269, 6616, 8620, 5287, 944, 7519, 6125, 1882, 11249, 10254, 5410, 1251, 1790, + 5275, 8449, 10447, 4113, 72, 2828, 4352, 7455, 2712, 11048, 7911, 3451, 4094, 6508, 3045, 11194, 2643, 1783, 7211, 4974, 7724, 9811, 9449, 3019, 4194, 2730, 6878, 10421, 2253, + 4518, 9195, 7469, 11129, 9173, 12100, 1763, 2209, 9617, 5170, 865, 1279, 1694, 10759, 8420, 4423, 10555, 3815, 5832, 10939}; + +const int32_t omegainv_rev_ntt1024_12289[1024] = { + 8193, 11796, 2381, 5444, 11854, 4337, 1912, 10911, 7012, 1168, 9041, 11082, 4645, 11885, 11224, 2143, 7313, 10682, 8509, 11414, 5088, 8005, 5011, 11287, 2566, 2187, 6039, 2422, + 6267, 9302, 8643, 9852, 8456, 3778, 773, 11899, 442, 9888, 11222, 5101, 9430, 1045, 2481, 5012, 7428, 354, 6591, 9377, 1440, 8526, 27, 9223, 1017, 7404, 1632, 7205, 11744, 7270, + 2678, 8585, 10752, 12047, 7575, 4143, 8758, 11813, 7384, 3985, 11869, 6730, 10745, 10111, 8889, 2399, 9153, 5191, 671, 3000, 243, 9273, 3247, 2686, 3978, 2969, 2370, 9424, 6957, + 8779, 1630, 10163, 5407, 3186, 11136, 9405, 10040, 8241, 113, 4919, 8374, 2166, 3, 7852, 9140, 12129, 5291, 2704, 4938, 8653, 1663, 10512, 7635, 1426, 9018, 8232, 8925, 10600, 4372, + 10115, 2847, 4414, 9644, 4053, 7247, 9984, 10805, 7394, 5195, 9509, 953, 3748, 11462, 6522, 9813, 12171, 10092, 5067, 3949, 8993, 4452, 2396, 7935, 130, 2837, 6915, 4278, 1673, 7300, + 5331, 8705, 4177, 9764, 10908, 11950, 9821, 11745, 5791, 12280, 1022, 9447, 480, 3241, 11560, 10276, 3289, 10200, 5092, 9408, 9005, 10593, 1428, 2426, 334, 1260, 4388, 4632, 6534, + 145, 6747, 3459, 8652, 7399, 6378, 8357, 2731, 2548, 4231, 355, 3382, 5179, 8595, 3707, 10530, 6429, 3195, 10643, 1212, 3542, 8785, 9744, 3621, 12288, 1479, 5146, 8246, 1305, 11567, + 6553, 4134, 4978, 10938, 5777, 8961, 4591, 5728, 6461, 5023, 2639, 4821, 11340, 2625, 9314, 563, 9545, 3006, 3553, 4805, 2294, 11227, 8577, 9154, 4846, 9542, 11499, 955, 9970, 1170, + 10963, 7203, 3201, 9275, 140, 1853, 4611, 726, 1635, 2768, 4255, 11112, 7969, 11289, 12208, 9198, 9238, 2366, 7393, 2963, 11184, 12147, 8812, 5681, 4212, 975, 2844, 8851, 6008, 11404, + 1956, 7280, 12231, 12048, 3532, 11286, 3602, 6068, 8209, 421, 6077, 7665, 3263, 3600, 9987, 605, 11785, 8076, 5594, 9260, 6403, 4782, 11858, 10710, 5906, 2505, 9450, 8332, 10162, + 12138, 2766, 1323, 9115, 12237, 3336, 6234, 677, 6415, 6821, 1010, 8807, 787, 8120, 9162, 9369, 5241, 192, 10968, 7377, 2049, 7509, 3445, 7591, 5057, 6137, 4948, 1728, 400, 3643, + 6874, 6136, 6427, 1987, 10587, 11635, 8724, 12233, 9090, 5529, 7083, 1359, 5435, 11316, 1254, 8410, 10367, 3998, 10256, 3710, 6093, 5446, 6950, 316, 11907, 8301, 11821, 6364, 1018, + 1041, 8775, 2344, 11011, 5574, 1973, 9027, 7210, 11767, 10120, 4916, 4324, 5315, 4075, 4467, 4789, 5537, 7540, 7840, 5456, 147, 3789, 6118, 8471, 1190, 9606, 3860, 5445, 7753, 11239, + 11367, 11848, 1058, 8210, 11177, 10211, 7967, 1958, 9139, 1319, 709, 8243, 6224, 11454, 8719, 8049, 12225, 8633, 9830, 11606, 9786, 2948, 1566, 6507, 5486, 9235, 9166, 10542, 5257, + 3834, 7856, 6370, 8960, 7991, 168, 9597, 6328, 5106, 1962, 10695, 6167, 9734, 7105, 11089, 1360, 3956, 6170, 5297, 10885, 11964, 11341, 1146, 1159, 6299, 8240, 3728, 8212, 8273, 2919, + 8527, 11637, 5766, 295, 6099, 9280, 1693, 174, 723, 6554, 2655, 6421, 2738, 3315, 426, 10431, 7535, 11942, 9364, 3757, 10314, 2057, 5369, 7796, 9087, 6906, 10474, 1512, 350, 1483, + 6374, 12240, 11026, 6347, 1583, 2500, 1489, 6956, 10258, 2281, 5876, 3991, 8320, 9522, 156, 1293, 4737, 6860, 4774, 8517, 11871, 6381, 453, 2882, 1805, 2051, 1954, 11713, 3963, 2447, + 6142, 4115, 9259, 10446, 9928, 218, 9381, 8760, 8855, 1350, 6457, 8474, 1734, 7866, 3869, 1530, 10595, 11010, 11424, 7119, 2672, 10080, 10526, 189, 3116, 1160, 4820, 3094, 7771, 10036, + 1868, 5411, 9559, 8095, 9270, 2840, 2478, 4565, 7315, 5078, 10506, 9646, 1095, 9244, 5781, 8195, 8838, 4378, 1241, 9577, 4834, 7937, 9461, 12217, 8176, 1842, 3840, 7014, 10499, 11038, + 6879, 2035, 1040, 10407, 6164, 4770, 11345, 7002, 3669, 5673, 3020, 5406, 4665, 3846, 1573, 6063, 3793, 7171, 11787, 1901, 2602, 5969, 7640, 6026, 9360, 1681, 8468, 1030, 466, 1120, + 2535, 21, 5808, 791, 9855, 9462, 2873, 2307, 4289, 11641, 12139, 170, 6639, 9988, 11415, 2957, 1481, 9349, 10243, 12150, 8957, 2532, 3317, 8823, 1701, 4697, 8711, 778, 4504, 2626, + 11759, 12281, 11832, 4301, 4523, 10440, 6513, 3268, 8494, 3758, 8835, 4218, 4390, 11410, 9696, 982, 10013, 904, 2485, 5547, 5039, 24, 1371, 11675, 11009, 5461, 9343, 2637, 7779, 1015, + 10362, 11924, 11408, 10699, 4411, 9955, 11066, 10398, 7186, 10487, 10734, 3418, 7846, 8820, 6138, 417, 9996, 4693, 2338, 1398, 9199, 7246, 11463, 6671, 1658, 6500, 8787, 751, 7570, + 6389, 910, 3065, 1506, 6586, 4483, 9667, 6903, 11779, 4661, 5368, 11711, 1944, 450, 8929, 4684, 12226, 7154, 9916, 7302, 8481, 3670, 9348, 11722, 6627, 5289, 3837, 2595, 3221, 4273, + 8239, 5207, 11445, 7087, 980, 682, 7699, 5082, 6940, 9344, 10883, 2633, 293, 9057, 3769, 4855, 8809, 10118, 3007, 1265, 6759, 6685, 8345, 8190, 11520, 6763, 216, 50, 8136, 10076, 767, + 8484, 7929, 9004, 9135, 7235, 12282, 10353, 11444, 8566, 1706, 8360, 7559, 3229, 10268, 2832, 3572, 1282, 3536, 5370, 3753, 3941, 6184, 9169, 5646, 6086, 10235, 2483, 1344, 3042, 1468, + 3981, 3407, 11826, 180, 4138, 7684, 2689, 10880, 7070, 204, 5509, 1057, 9689, 4705, 9168, 9272, 1236, 4475, 5246, 4251, 4739, 11063, 6771, 7073, 9261, 2360, 11925, 11777, 7619, 4906, + 6825, 4554, 11295, 239, 2900, 7021, 146, 11883, 10602, 5189, 6094, 1403, 1804, 11667, 10552, 5672, 4499, 636, 5609, 8307, 2947, 3393, 7954, 2291, 3375, 8464, 4235, 8761, 7376, 6492, + 8330, 5959, 10141, 7350, 5115, 2442, 1248, 10344, 1029, 5724, 1325, 6691, 8945, 1892, 3624, 10767, 2151, 4119, 3343, 7681, 7126, 7287, 12269, 8342, 338, 9834, 5690, 1744, 1314, 8635, + 9395, 4167, 6085, 923, 11251, 6092, 10058, 12096, 2800, 11864, 1836, 11897, 2185, 1620, 375, 7711, 11912, 1942, 3408, 9714, 11124, 9513, 1178, 5478, 8778, 3276, 8951, 2212, 9615, 1392, + 5784, 1255, 11783, 1756, 8144, 8328, 8755, 6463, 2065, 7725, 3495, 10771, 8524, 8113, 7226, 2461, 10014, 5653, 8022, 11158, 1445, 7429, 11164, 1275, 6781, 1176, 5734, 12077, 6323, 9520, + 3114, 6302, 6693, 579, 3889, 10872, 6613, 4505, 10032, 5835, 9202, 7406, 8314, 5102, 11877, 6701, 6444, 2528, 9233, 4963, 8545, 3607, 10962, 7057, 8347, 11841, 11275, 7365, 7508, 4566, + 5836, 12221, 2260, 1535, 3200, 2717, 60, 4238, 11677, 4227, 3368, 11749, 12164, 1526, 4222, 6162, 4840, 8257, 3163, 7885, 346, 2068, 1389, 11197, 5209, 3359, 9084, 11825, 10361, 3678, + 4265, 9118, 7800, 10463, 9363, 9051, 8581, 11153, 8840, 5412, 8080, 9011, 6296, 3515, 11851, 1218, 5061, 1536, 1721, 9860, 4103, 10916, 2982, 11572, 3589, 9839, 10584, 11475, 11873, + 2110, 716, 5416, 2164, 1866, 5211, 7562, 11081, 10381, 7751, 11946, 3448}; + +const int32_t psi_rev_ntt512_12289[512] = { + 8193, 493, 6845, 9908, 1378, 10377, 7952, 435, 10146, 1065, 404, 7644, 1207, 3248, 11121, 5277, 2437, 3646, 2987, 6022, 9867, 6250, 10102, 9723, 1002, 7278, 4284, 7201, 875, 3780, 1607, + 4976, 8146, 4714, 242, 1537, 3704, 9611, 5019, 545, 5084, 10657, 4885, 11272, 3066, 12262, 3763, 10849, 2912, 5698, 11935, 4861, 7277, 9808, 11244, 2859, 7188, 1067, 2401, 11847, 390, + 11516, 8511, 3833, 2780, 7094, 4895, 1484, 2305, 5042, 8236, 2645, 7875, 9442, 2174, 7917, 1689, 3364, 4057, 3271, 10863, 4654, 1777, 10626, 3636, 7351, 9585, 6998, 160, 3149, 4437, + 12286, 10123, 3915, 7370, 12176, 4048, 2249, 2884, 1153, 9103, 6882, 2126, 10659, 3510, 5332, 2865, 9919, 9320, 8311, 9603, 9042, 3016, 12046, 9289, 11618, 7098, 3136, 9890, 3400, 2178, + 1544, 5559, 420, 8304, 4905, 476, 3531, 9326, 4896, 9923, 3051, 3091, 81, 1000, 4320, 1177, 8034, 9521, 10654, 11563, 7678, 10436, 12149, 3014, 9088, 5086, 1326, 11119, 2319, 11334, 790, + 2747, 7443, 3135, 3712, 1062, 9995, 7484, 8736, 9283, 2744, 11726, 2975, 9664, 949, 7468, 9650, 7266, 5828, 6561, 7698, 3328, 6512, 1351, 7311, 8155, 5736, 722, 10984, 4043, 7143, 10810, + 1, 8668, 2545, 3504, 8747, 11077, 1646, 9094, 5860, 1759, 8582, 3694, 7110, 8907, 11934, 8058, 9741, 9558, 3932, 5911, 4890, 3637, 8830, 5542, 12144, 5755, 7657, 7901, 11029, 11955, 9863, + 10861, 1696, 3284, 2881, 7197, 2089, 9000, 2013, 729, 9048, 11809, 2842, 11267, 9, 6498, 544, 2468, 339, 1381, 2525, 8112, 3584, 6958, 4989, 10616, 8011, 5374, 9452, 12159, 4354, 9893, + 7837, 3296, 8340, 7222, 2197, 118, 2476, 5767, 827, 8541, 11336, 8855, 8760, 9381, 218, 9928, 10446, 9259, 4115, 6142, 2447, 3963, 11713, 1954, 2051, 1805, 2882, 453, 6381, 11871, 8517, + 4774, 6860, 4737, 1293, 156, 9522, 8320, 3991, 5876, 2281, 10258, 6956, 1489, 2500, 1583, 6347, 11026, 12240, 6374, 1483, 350, 1512, 10474, 6906, 9087, 7796, 5369, 2057, 10314, 3757, + 9364, 11942, 7535, 10431, 426, 3315, 2738, 6421, 2655, 6554, 723, 174, 1693, 9280, 6099, 295, 5766, 11637, 8527, 2919, 8273, 8212, 3728, 8240, 6299, 1159, 1146, 11341, 11964, 10885, 5297, + 6170, 3956, 1360, 11089, 7105, 9734, 6167, 10695, 1962, 5106, 6328, 9597, 168, 7991, 8960, 6370, 7856, 3834, 5257, 10542, 9166, 9235, 5486, 6507, 1566, 2948, 9786, 11606, 9830, 8633, + 12225, 8049, 8719, 11454, 6224, 8243, 709, 1319, 9139, 1958, 7967, 10211, 11177, 8210, 1058, 11848, 11367, 11239, 7753, 5445, 3860, 9606, 1190, 8471, 6118, 3789, 147, 5456, 7840, 7540, + 5537, 4789, 4467, 4075, 5315, 4324, 4916, 10120, 11767, 7210, 9027, 1973, 5574, 11011, 2344, 8775, 1041, 1018, 6364, 11821, 8301, 11907, 316, 6950, 5446, 6093, 3710, 10256, 3998, 10367, + 8410, 1254, 11316, 5435, 1359, 7083, 5529, 9090, 12233, 8724, 11635, 10587, 1987, 6427, 6136, 6874, 3643, 400, 1728, 4948, 6137, 5057, 7591, 3445, 7509, 2049, 7377, 10968, 192, 5241, 9369, + 9162, 8120, 787, 8807, 1010, 6821, 6415, 677, 6234, 3336, 12237, 9115, 1323, 2766, 12138, 10162, 8332, 9450, 2505, 5906, 10710, 11858, 4782, 6403, 9260, 5594, 8076, 11785, 605, 9987, 3600, + 3263, 7665, 6077, 421, 8209, 6068, 3602, 11286, 3532, 12048, 12231, 7280, 1956, 11404, 6008, 8851, 2844, 975, 4212, 5681, 8812, 12147, 11184}; + +const int32_t omegainv_rev_ntt512_12289[512] = { + 8193, 11796, 2381, 5444, 11854, 4337, 1912, 10911, 7012, 1168, 9041, 11082, 4645, 11885, 11224, 2143, 7313, 10682, 8509, 11414, 5088, 8005, 5011, 11287, 2566, 2187, 6039, 2422, 6267, 9302, + 8643, 9852, 8456, 3778, 773, 11899, 442, 9888, 11222, 5101, 9430, 1045, 2481, 5012, 7428, 354, 6591, 9377, 1440, 8526, 27, 9223, 1017, 7404, 1632, 7205, 11744, 7270, 2678, 8585, 10752, + 12047, 7575, 4143, 8758, 11813, 7384, 3985, 11869, 6730, 10745, 10111, 8889, 2399, 9153, 5191, 671, 3000, 243, 9273, 3247, 2686, 3978, 2969, 2370, 9424, 6957, 8779, 1630, 10163, 5407, 3186, + 11136, 9405, 10040, 8241, 113, 4919, 8374, 2166, 3, 7852, 9140, 12129, 5291, 2704, 4938, 8653, 1663, 10512, 7635, 1426, 9018, 8232, 8925, 10600, 4372, 10115, 2847, 4414, 9644, 4053, 7247, + 9984, 10805, 7394, 5195, 9509, 953, 3748, 11462, 6522, 9813, 12171, 10092, 5067, 3949, 8993, 4452, 2396, 7935, 130, 2837, 6915, 4278, 1673, 7300, 5331, 8705, 4177, 9764, 10908, 11950, 9821, + 11745, 5791, 12280, 1022, 9447, 480, 3241, 11560, 10276, 3289, 10200, 5092, 9408, 9005, 10593, 1428, 2426, 334, 1260, 4388, 4632, 6534, 145, 6747, 3459, 8652, 7399, 6378, 8357, 2731, 2548, + 4231, 355, 3382, 5179, 8595, 3707, 10530, 6429, 3195, 10643, 1212, 3542, 8785, 9744, 3621, 12288, 1479, 5146, 8246, 1305, 11567, 6553, 4134, 4978, 10938, 5777, 8961, 4591, 5728, 6461, 5023, + 2639, 4821, 11340, 2625, 9314, 563, 9545, 3006, 3553, 4805, 2294, 11227, 8577, 9154, 4846, 9542, 11499, 955, 9970, 1170, 10963, 7203, 3201, 9275, 140, 1853, 4611, 726, 1635, 2768, 4255, + 11112, 7969, 11289, 12208, 9198, 9238, 2366, 7393, 2963, 1105, 142, 3477, 6608, 8077, 11314, 9445, 3438, 6281, 885, 10333, 5009, 58, 241, 8757, 1003, 8687, 6221, 4080, 11868, 6212, 4624, + 9026, 8689, 2302, 11684, 504, 4213, 6695, 3029, 5886, 7507, 431, 1579, 6383, 9784, 2839, 3957, 2127, 151, 9523, 10966, 3174, 52, 8953, 6055, 11612, 5874, 5468, 11279, 3482, 11502, 4169, + 3127, 2920, 7048, 12097, 1321, 4912, 10240, 4780, 8844, 4698, 7232, 6152, 7341, 10561, 11889, 8646, 5415, 6153, 5862, 10302, 1702, 654, 3565, 56, 3199, 6760, 5206, 10930, 6854, 973, 11035, + 3879, 1922, 8291, 2033, 8579, 6196, 6843, 5339, 11973, 382, 3988, 468, 5925, 11271, 11248, 3514, 9945, 1278, 6715, 10316, 3262, 5079, 522, 2169, 7373, 7965, 6974, 8214, 7822, 7500, 6752, + 4749, 4449, 6833, 12142, 8500, 6171, 3818, 11099, 2683, 8429, 6844, 4536, 1050, 922, 441, 11231, 4079, 1112, 2078, 4322, 10331, 3150, 10970, 11580, 4046, 6065, 835, 3570, 4240, 64, 3656, + 2459, 683, 2503, 9341, 10723, 5782, 6803, 3054, 3123, 1747, 7032, 8455, 4433, 5919, 3329, 4298, 12121, 2692, 5961, 7183, 10327, 1594, 6122, 2555, 5184, 1200, 10929, 8333, 6119, 6992, 1404, + 325, 948, 11143, 11130, 5990, 4049, 8561, 4077, 4016, 9370, 3762, 652, 6523, 11994, 6190, 3009, 10596, 12115, 11566, 5735, 9634, 5868, 9551, 8974, 11863, 1858, 4754, 347, 2925, 8532, 1975, + 10232, 6920, 4493, 3202, 5383, 1815, 10777, 11939, 10806, 5915, 49, 1263, 5942, 10706, 9789, 10800, 5333, 2031, 10008, 6413, 8298, 3969, 2767, 12133, 10996, 7552, 5429, 7515, 3772, 418, 5908, + 11836, 9407, 10484, 10238, 10335, 576, 8326, 9842, 6147, 8174, 3030, 1843, 2361, 12071, 2908, 3529, 3434}; diff --git a/crypt/liboqs/kex_rlwe_newhope/LICENSE.txt b/crypt/liboqs/kex_rlwe_newhope/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..ae012a47e6d65b106cb25938857c57975b37c368 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/LICENSE.txt @@ -0,0 +1,9 @@ +The files in this directory (except kex_rlwe_newhope.*) were originally written +by Erdem Alkim, Léo Ducas, Thomas Pöppelmann, and Peter Schwabe +(https://github.com/tpoeppelmann/newhope). + + +The following license applies to all files in the src/kex_rlwe_newhope directory. + + +Public domain. diff --git a/crypt/liboqs/kex_rlwe_newhope/Makefile.am b/crypt/liboqs/kex_rlwe_newhope/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..ae47e2c5f8fade19d0d6847a92676ff4c7a32859 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/Makefile.am @@ -0,0 +1,8 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = libnewhope.la + +libnewhope_la_SOURCES = kex_rlwe_newhope.c + +libnewhope_la_CPPFLAGS = -I../../include -I. +libnewhope_la_CPPFLAGS += $(AM_CPPFLAGS) + diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/Makefile.am b/crypt/liboqs/kex_rlwe_newhope/avx2/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..2efd00fb2f620cdc2e51f4ba04b10302ea2b6098 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/Makefile.am @@ -0,0 +1,12 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = libnewhope_avx2.la librevert.la + +libnewhope_avx2_la_SOURCES = crypto_stream_chacha20.c precomp.c +libnewhope_avx2_la_SOURCES += crypto_hash_sha256.c chacha.S cbd.s consts.c omegas.c ntt_double.s +libnewhope_avx2_la_SOURCES += bitrev.s crypto_stream_aes256ctr.s hr.s rec.s poly_pointwise.s kex_rlwe_newhope_avx2.c + +libnewhope_avx2_la_CPPFLAGS = -I../../../include -I. +libnewhope_avx2_la_CPPFLAGS += $(AM_CPPFLAGS) -O3 -fomit-frame-pointer -msse2avx -mavx2 -march=corei7-avx + +librevert.la: + cd ../../../ && bash patches/cleanup-patch.sh kex_rlwe_newhope/avx2 diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/bitrev.s b/crypt/liboqs/kex_rlwe_newhope/avx2/bitrev.s new file mode 100644 index 0000000000000000000000000000000000000000..002a1eef30617801abba9ad860b6138985e72feb --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/bitrev.s @@ -0,0 +1,9976 @@ + +# qhasm: int64 input_0 + +# qhasm: int64 input_1 + +# qhasm: int64 input_2 + +# qhasm: int64 input_3 + +# qhasm: int64 input_4 + +# qhasm: int64 input_5 + +# qhasm: stack64 input_6 + +# qhasm: stack64 input_7 + +# qhasm: int64 caller_r11 + +# qhasm: int64 caller_r12 + +# qhasm: int64 caller_r13 + +# qhasm: int64 caller_r14 + +# qhasm: int64 caller_r15 + +# qhasm: int64 caller_rbx + +# qhasm: int64 caller_rbp + +# qhasm: int64 temp1 + +# qhasm: int64 temp2 + +# qhasm: int64 ap + +# qhasm: enter bitrev_vector +.p2align 5 +.global _bitrev_vector +.global bitrev_vector +_bitrev_vector: +bitrev_vector: +movq %rsp,%r11 +and $31,%r11 +add $0,%r11 +sub %r11,%rsp + +# qhasm: ap = input_0 +# asm 1: mov <input_0=int64#1,>ap=int64#1 +# asm 2: mov <input_0=%rdi,>ap=%rdi +mov %rdi,%rdi + +# qhasm: temp1 = mem64[ap + 4] +# asm 1: mov 4(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 4(<ap=%rdi),>temp1=%esi +mov 4(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2048] +# asm 1: mov 2048(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2048(<ap=%rdi),>temp2=%edx +mov 2048(%rdi),%edx + +# qhasm: mem64[ap + 2048] = temp1 +# asm 1: mov <temp1=int64#2,2048(<ap=int64#1) +# asm 2: mov <temp1=%esi,2048(<ap=%rdi) +mov %esi,2048(%rdi) + +# qhasm: mem64[ap + 4] = temp2 +# asm 1: mov <temp2=int64#3,4(<ap=int64#1) +# asm 2: mov <temp2=%edx,4(<ap=%rdi) +mov %edx,4(%rdi) + +# qhasm: temp1 = mem64[ap + 8] +# asm 1: mov 8(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 8(<ap=%rdi),>temp1=%esi +mov 8(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1024] +# asm 1: mov 1024(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1024(<ap=%rdi),>temp2=%edx +mov 1024(%rdi),%edx + +# qhasm: mem64[ap + 1024] = temp1 +# asm 1: mov <temp1=int64#2,1024(<ap=int64#1) +# asm 2: mov <temp1=%esi,1024(<ap=%rdi) +mov %esi,1024(%rdi) + +# qhasm: mem64[ap + 8] = temp2 +# asm 1: mov <temp2=int64#3,8(<ap=int64#1) +# asm 2: mov <temp2=%edx,8(<ap=%rdi) +mov %edx,8(%rdi) + +# qhasm: temp1 = mem64[ap + 12] +# asm 1: mov 12(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 12(<ap=%rdi),>temp1=%esi +mov 12(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3072] +# asm 1: mov 3072(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3072(<ap=%rdi),>temp2=%edx +mov 3072(%rdi),%edx + +# qhasm: mem64[ap + 3072] = temp1 +# asm 1: mov <temp1=int64#2,3072(<ap=int64#1) +# asm 2: mov <temp1=%esi,3072(<ap=%rdi) +mov %esi,3072(%rdi) + +# qhasm: mem64[ap + 12] = temp2 +# asm 1: mov <temp2=int64#3,12(<ap=int64#1) +# asm 2: mov <temp2=%edx,12(<ap=%rdi) +mov %edx,12(%rdi) + +# qhasm: temp1 = mem64[ap + 16] +# asm 1: mov 16(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 16(<ap=%rdi),>temp1=%esi +mov 16(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 512] +# asm 1: mov 512(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 512(<ap=%rdi),>temp2=%edx +mov 512(%rdi),%edx + +# qhasm: mem64[ap + 512] = temp1 +# asm 1: mov <temp1=int64#2,512(<ap=int64#1) +# asm 2: mov <temp1=%esi,512(<ap=%rdi) +mov %esi,512(%rdi) + +# qhasm: mem64[ap + 16] = temp2 +# asm 1: mov <temp2=int64#3,16(<ap=int64#1) +# asm 2: mov <temp2=%edx,16(<ap=%rdi) +mov %edx,16(%rdi) + +# qhasm: temp1 = mem64[ap + 20] +# asm 1: mov 20(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 20(<ap=%rdi),>temp1=%esi +mov 20(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2560] +# asm 1: mov 2560(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2560(<ap=%rdi),>temp2=%edx +mov 2560(%rdi),%edx + +# qhasm: mem64[ap + 2560] = temp1 +# asm 1: mov <temp1=int64#2,2560(<ap=int64#1) +# asm 2: mov <temp1=%esi,2560(<ap=%rdi) +mov %esi,2560(%rdi) + +# qhasm: mem64[ap + 20] = temp2 +# asm 1: mov <temp2=int64#3,20(<ap=int64#1) +# asm 2: mov <temp2=%edx,20(<ap=%rdi) +mov %edx,20(%rdi) + +# qhasm: temp1 = mem64[ap + 24] +# asm 1: mov 24(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 24(<ap=%rdi),>temp1=%esi +mov 24(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1536] +# asm 1: mov 1536(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1536(<ap=%rdi),>temp2=%edx +mov 1536(%rdi),%edx + +# qhasm: mem64[ap + 1536] = temp1 +# asm 1: mov <temp1=int64#2,1536(<ap=int64#1) +# asm 2: mov <temp1=%esi,1536(<ap=%rdi) +mov %esi,1536(%rdi) + +# qhasm: mem64[ap + 24] = temp2 +# asm 1: mov <temp2=int64#3,24(<ap=int64#1) +# asm 2: mov <temp2=%edx,24(<ap=%rdi) +mov %edx,24(%rdi) + +# qhasm: temp1 = mem64[ap + 28] +# asm 1: mov 28(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 28(<ap=%rdi),>temp1=%esi +mov 28(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3584] +# asm 1: mov 3584(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3584(<ap=%rdi),>temp2=%edx +mov 3584(%rdi),%edx + +# qhasm: mem64[ap + 3584] = temp1 +# asm 1: mov <temp1=int64#2,3584(<ap=int64#1) +# asm 2: mov <temp1=%esi,3584(<ap=%rdi) +mov %esi,3584(%rdi) + +# qhasm: mem64[ap + 28] = temp2 +# asm 1: mov <temp2=int64#3,28(<ap=int64#1) +# asm 2: mov <temp2=%edx,28(<ap=%rdi) +mov %edx,28(%rdi) + +# qhasm: temp1 = mem64[ap + 32] +# asm 1: mov 32(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 32(<ap=%rdi),>temp1=%esi +mov 32(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 256] +# asm 1: mov 256(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 256(<ap=%rdi),>temp2=%edx +mov 256(%rdi),%edx + +# qhasm: mem64[ap + 256] = temp1 +# asm 1: mov <temp1=int64#2,256(<ap=int64#1) +# asm 2: mov <temp1=%esi,256(<ap=%rdi) +mov %esi,256(%rdi) + +# qhasm: mem64[ap + 32] = temp2 +# asm 1: mov <temp2=int64#3,32(<ap=int64#1) +# asm 2: mov <temp2=%edx,32(<ap=%rdi) +mov %edx,32(%rdi) + +# qhasm: temp1 = mem64[ap + 36] +# asm 1: mov 36(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 36(<ap=%rdi),>temp1=%esi +mov 36(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2304] +# asm 1: mov 2304(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2304(<ap=%rdi),>temp2=%edx +mov 2304(%rdi),%edx + +# qhasm: mem64[ap + 2304] = temp1 +# asm 1: mov <temp1=int64#2,2304(<ap=int64#1) +# asm 2: mov <temp1=%esi,2304(<ap=%rdi) +mov %esi,2304(%rdi) + +# qhasm: mem64[ap + 36] = temp2 +# asm 1: mov <temp2=int64#3,36(<ap=int64#1) +# asm 2: mov <temp2=%edx,36(<ap=%rdi) +mov %edx,36(%rdi) + +# qhasm: temp1 = mem64[ap + 40] +# asm 1: mov 40(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 40(<ap=%rdi),>temp1=%esi +mov 40(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1280] +# asm 1: mov 1280(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1280(<ap=%rdi),>temp2=%edx +mov 1280(%rdi),%edx + +# qhasm: mem64[ap + 1280] = temp1 +# asm 1: mov <temp1=int64#2,1280(<ap=int64#1) +# asm 2: mov <temp1=%esi,1280(<ap=%rdi) +mov %esi,1280(%rdi) + +# qhasm: mem64[ap + 40] = temp2 +# asm 1: mov <temp2=int64#3,40(<ap=int64#1) +# asm 2: mov <temp2=%edx,40(<ap=%rdi) +mov %edx,40(%rdi) + +# qhasm: temp1 = mem64[ap + 44] +# asm 1: mov 44(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 44(<ap=%rdi),>temp1=%esi +mov 44(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3328] +# asm 1: mov 3328(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3328(<ap=%rdi),>temp2=%edx +mov 3328(%rdi),%edx + +# qhasm: mem64[ap + 3328] = temp1 +# asm 1: mov <temp1=int64#2,3328(<ap=int64#1) +# asm 2: mov <temp1=%esi,3328(<ap=%rdi) +mov %esi,3328(%rdi) + +# qhasm: mem64[ap + 44] = temp2 +# asm 1: mov <temp2=int64#3,44(<ap=int64#1) +# asm 2: mov <temp2=%edx,44(<ap=%rdi) +mov %edx,44(%rdi) + +# qhasm: temp1 = mem64[ap + 48] +# asm 1: mov 48(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 48(<ap=%rdi),>temp1=%esi +mov 48(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 768] +# asm 1: mov 768(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 768(<ap=%rdi),>temp2=%edx +mov 768(%rdi),%edx + +# qhasm: mem64[ap + 768] = temp1 +# asm 1: mov <temp1=int64#2,768(<ap=int64#1) +# asm 2: mov <temp1=%esi,768(<ap=%rdi) +mov %esi,768(%rdi) + +# qhasm: mem64[ap + 48] = temp2 +# asm 1: mov <temp2=int64#3,48(<ap=int64#1) +# asm 2: mov <temp2=%edx,48(<ap=%rdi) +mov %edx,48(%rdi) + +# qhasm: temp1 = mem64[ap + 52] +# asm 1: mov 52(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 52(<ap=%rdi),>temp1=%esi +mov 52(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2816] +# asm 1: mov 2816(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2816(<ap=%rdi),>temp2=%edx +mov 2816(%rdi),%edx + +# qhasm: mem64[ap + 2816] = temp1 +# asm 1: mov <temp1=int64#2,2816(<ap=int64#1) +# asm 2: mov <temp1=%esi,2816(<ap=%rdi) +mov %esi,2816(%rdi) + +# qhasm: mem64[ap + 52] = temp2 +# asm 1: mov <temp2=int64#3,52(<ap=int64#1) +# asm 2: mov <temp2=%edx,52(<ap=%rdi) +mov %edx,52(%rdi) + +# qhasm: temp1 = mem64[ap + 56] +# asm 1: mov 56(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 56(<ap=%rdi),>temp1=%esi +mov 56(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1792] +# asm 1: mov 1792(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1792(<ap=%rdi),>temp2=%edx +mov 1792(%rdi),%edx + +# qhasm: mem64[ap + 1792] = temp1 +# asm 1: mov <temp1=int64#2,1792(<ap=int64#1) +# asm 2: mov <temp1=%esi,1792(<ap=%rdi) +mov %esi,1792(%rdi) + +# qhasm: mem64[ap + 56] = temp2 +# asm 1: mov <temp2=int64#3,56(<ap=int64#1) +# asm 2: mov <temp2=%edx,56(<ap=%rdi) +mov %edx,56(%rdi) + +# qhasm: temp1 = mem64[ap + 60] +# asm 1: mov 60(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 60(<ap=%rdi),>temp1=%esi +mov 60(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3840] +# asm 1: mov 3840(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3840(<ap=%rdi),>temp2=%edx +mov 3840(%rdi),%edx + +# qhasm: mem64[ap + 3840] = temp1 +# asm 1: mov <temp1=int64#2,3840(<ap=int64#1) +# asm 2: mov <temp1=%esi,3840(<ap=%rdi) +mov %esi,3840(%rdi) + +# qhasm: mem64[ap + 60] = temp2 +# asm 1: mov <temp2=int64#3,60(<ap=int64#1) +# asm 2: mov <temp2=%edx,60(<ap=%rdi) +mov %edx,60(%rdi) + +# qhasm: temp1 = mem64[ap + 64] +# asm 1: mov 64(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 64(<ap=%rdi),>temp1=%esi +mov 64(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 128] +# asm 1: mov 128(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 128(<ap=%rdi),>temp2=%edx +mov 128(%rdi),%edx + +# qhasm: mem64[ap + 128] = temp1 +# asm 1: mov <temp1=int64#2,128(<ap=int64#1) +# asm 2: mov <temp1=%esi,128(<ap=%rdi) +mov %esi,128(%rdi) + +# qhasm: mem64[ap + 64] = temp2 +# asm 1: mov <temp2=int64#3,64(<ap=int64#1) +# asm 2: mov <temp2=%edx,64(<ap=%rdi) +mov %edx,64(%rdi) + +# qhasm: temp1 = mem64[ap + 68] +# asm 1: mov 68(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 68(<ap=%rdi),>temp1=%esi +mov 68(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2176] +# asm 1: mov 2176(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2176(<ap=%rdi),>temp2=%edx +mov 2176(%rdi),%edx + +# qhasm: mem64[ap + 2176] = temp1 +# asm 1: mov <temp1=int64#2,2176(<ap=int64#1) +# asm 2: mov <temp1=%esi,2176(<ap=%rdi) +mov %esi,2176(%rdi) + +# qhasm: mem64[ap + 68] = temp2 +# asm 1: mov <temp2=int64#3,68(<ap=int64#1) +# asm 2: mov <temp2=%edx,68(<ap=%rdi) +mov %edx,68(%rdi) + +# qhasm: temp1 = mem64[ap + 72] +# asm 1: mov 72(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 72(<ap=%rdi),>temp1=%esi +mov 72(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1152] +# asm 1: mov 1152(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1152(<ap=%rdi),>temp2=%edx +mov 1152(%rdi),%edx + +# qhasm: mem64[ap + 1152] = temp1 +# asm 1: mov <temp1=int64#2,1152(<ap=int64#1) +# asm 2: mov <temp1=%esi,1152(<ap=%rdi) +mov %esi,1152(%rdi) + +# qhasm: mem64[ap + 72] = temp2 +# asm 1: mov <temp2=int64#3,72(<ap=int64#1) +# asm 2: mov <temp2=%edx,72(<ap=%rdi) +mov %edx,72(%rdi) + +# qhasm: temp1 = mem64[ap + 76] +# asm 1: mov 76(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 76(<ap=%rdi),>temp1=%esi +mov 76(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3200] +# asm 1: mov 3200(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3200(<ap=%rdi),>temp2=%edx +mov 3200(%rdi),%edx + +# qhasm: mem64[ap + 3200] = temp1 +# asm 1: mov <temp1=int64#2,3200(<ap=int64#1) +# asm 2: mov <temp1=%esi,3200(<ap=%rdi) +mov %esi,3200(%rdi) + +# qhasm: mem64[ap + 76] = temp2 +# asm 1: mov <temp2=int64#3,76(<ap=int64#1) +# asm 2: mov <temp2=%edx,76(<ap=%rdi) +mov %edx,76(%rdi) + +# qhasm: temp1 = mem64[ap + 80] +# asm 1: mov 80(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 80(<ap=%rdi),>temp1=%esi +mov 80(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 640] +# asm 1: mov 640(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 640(<ap=%rdi),>temp2=%edx +mov 640(%rdi),%edx + +# qhasm: mem64[ap + 640] = temp1 +# asm 1: mov <temp1=int64#2,640(<ap=int64#1) +# asm 2: mov <temp1=%esi,640(<ap=%rdi) +mov %esi,640(%rdi) + +# qhasm: mem64[ap + 80] = temp2 +# asm 1: mov <temp2=int64#3,80(<ap=int64#1) +# asm 2: mov <temp2=%edx,80(<ap=%rdi) +mov %edx,80(%rdi) + +# qhasm: temp1 = mem64[ap + 84] +# asm 1: mov 84(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 84(<ap=%rdi),>temp1=%esi +mov 84(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2688] +# asm 1: mov 2688(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2688(<ap=%rdi),>temp2=%edx +mov 2688(%rdi),%edx + +# qhasm: mem64[ap + 2688] = temp1 +# asm 1: mov <temp1=int64#2,2688(<ap=int64#1) +# asm 2: mov <temp1=%esi,2688(<ap=%rdi) +mov %esi,2688(%rdi) + +# qhasm: mem64[ap + 84] = temp2 +# asm 1: mov <temp2=int64#3,84(<ap=int64#1) +# asm 2: mov <temp2=%edx,84(<ap=%rdi) +mov %edx,84(%rdi) + +# qhasm: temp1 = mem64[ap + 88] +# asm 1: mov 88(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 88(<ap=%rdi),>temp1=%esi +mov 88(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1664] +# asm 1: mov 1664(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1664(<ap=%rdi),>temp2=%edx +mov 1664(%rdi),%edx + +# qhasm: mem64[ap + 1664] = temp1 +# asm 1: mov <temp1=int64#2,1664(<ap=int64#1) +# asm 2: mov <temp1=%esi,1664(<ap=%rdi) +mov %esi,1664(%rdi) + +# qhasm: mem64[ap + 88] = temp2 +# asm 1: mov <temp2=int64#3,88(<ap=int64#1) +# asm 2: mov <temp2=%edx,88(<ap=%rdi) +mov %edx,88(%rdi) + +# qhasm: temp1 = mem64[ap + 92] +# asm 1: mov 92(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 92(<ap=%rdi),>temp1=%esi +mov 92(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3712] +# asm 1: mov 3712(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3712(<ap=%rdi),>temp2=%edx +mov 3712(%rdi),%edx + +# qhasm: mem64[ap + 3712] = temp1 +# asm 1: mov <temp1=int64#2,3712(<ap=int64#1) +# asm 2: mov <temp1=%esi,3712(<ap=%rdi) +mov %esi,3712(%rdi) + +# qhasm: mem64[ap + 92] = temp2 +# asm 1: mov <temp2=int64#3,92(<ap=int64#1) +# asm 2: mov <temp2=%edx,92(<ap=%rdi) +mov %edx,92(%rdi) + +# qhasm: temp1 = mem64[ap + 96] +# asm 1: mov 96(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 96(<ap=%rdi),>temp1=%esi +mov 96(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 384] +# asm 1: mov 384(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 384(<ap=%rdi),>temp2=%edx +mov 384(%rdi),%edx + +# qhasm: mem64[ap + 384] = temp1 +# asm 1: mov <temp1=int64#2,384(<ap=int64#1) +# asm 2: mov <temp1=%esi,384(<ap=%rdi) +mov %esi,384(%rdi) + +# qhasm: mem64[ap + 96] = temp2 +# asm 1: mov <temp2=int64#3,96(<ap=int64#1) +# asm 2: mov <temp2=%edx,96(<ap=%rdi) +mov %edx,96(%rdi) + +# qhasm: temp1 = mem64[ap + 100] +# asm 1: mov 100(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 100(<ap=%rdi),>temp1=%esi +mov 100(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2432] +# asm 1: mov 2432(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2432(<ap=%rdi),>temp2=%edx +mov 2432(%rdi),%edx + +# qhasm: mem64[ap + 2432] = temp1 +# asm 1: mov <temp1=int64#2,2432(<ap=int64#1) +# asm 2: mov <temp1=%esi,2432(<ap=%rdi) +mov %esi,2432(%rdi) + +# qhasm: mem64[ap + 100] = temp2 +# asm 1: mov <temp2=int64#3,100(<ap=int64#1) +# asm 2: mov <temp2=%edx,100(<ap=%rdi) +mov %edx,100(%rdi) + +# qhasm: temp1 = mem64[ap + 104] +# asm 1: mov 104(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 104(<ap=%rdi),>temp1=%esi +mov 104(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1408] +# asm 1: mov 1408(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1408(<ap=%rdi),>temp2=%edx +mov 1408(%rdi),%edx + +# qhasm: mem64[ap + 1408] = temp1 +# asm 1: mov <temp1=int64#2,1408(<ap=int64#1) +# asm 2: mov <temp1=%esi,1408(<ap=%rdi) +mov %esi,1408(%rdi) + +# qhasm: mem64[ap + 104] = temp2 +# asm 1: mov <temp2=int64#3,104(<ap=int64#1) +# asm 2: mov <temp2=%edx,104(<ap=%rdi) +mov %edx,104(%rdi) + +# qhasm: temp1 = mem64[ap + 108] +# asm 1: mov 108(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 108(<ap=%rdi),>temp1=%esi +mov 108(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3456] +# asm 1: mov 3456(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3456(<ap=%rdi),>temp2=%edx +mov 3456(%rdi),%edx + +# qhasm: mem64[ap + 3456] = temp1 +# asm 1: mov <temp1=int64#2,3456(<ap=int64#1) +# asm 2: mov <temp1=%esi,3456(<ap=%rdi) +mov %esi,3456(%rdi) + +# qhasm: mem64[ap + 108] = temp2 +# asm 1: mov <temp2=int64#3,108(<ap=int64#1) +# asm 2: mov <temp2=%edx,108(<ap=%rdi) +mov %edx,108(%rdi) + +# qhasm: temp1 = mem64[ap + 112] +# asm 1: mov 112(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 112(<ap=%rdi),>temp1=%esi +mov 112(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 896] +# asm 1: mov 896(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 896(<ap=%rdi),>temp2=%edx +mov 896(%rdi),%edx + +# qhasm: mem64[ap + 896] = temp1 +# asm 1: mov <temp1=int64#2,896(<ap=int64#1) +# asm 2: mov <temp1=%esi,896(<ap=%rdi) +mov %esi,896(%rdi) + +# qhasm: mem64[ap + 112] = temp2 +# asm 1: mov <temp2=int64#3,112(<ap=int64#1) +# asm 2: mov <temp2=%edx,112(<ap=%rdi) +mov %edx,112(%rdi) + +# qhasm: temp1 = mem64[ap + 116] +# asm 1: mov 116(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 116(<ap=%rdi),>temp1=%esi +mov 116(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2944] +# asm 1: mov 2944(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2944(<ap=%rdi),>temp2=%edx +mov 2944(%rdi),%edx + +# qhasm: mem64[ap + 2944] = temp1 +# asm 1: mov <temp1=int64#2,2944(<ap=int64#1) +# asm 2: mov <temp1=%esi,2944(<ap=%rdi) +mov %esi,2944(%rdi) + +# qhasm: mem64[ap + 116] = temp2 +# asm 1: mov <temp2=int64#3,116(<ap=int64#1) +# asm 2: mov <temp2=%edx,116(<ap=%rdi) +mov %edx,116(%rdi) + +# qhasm: temp1 = mem64[ap + 120] +# asm 1: mov 120(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 120(<ap=%rdi),>temp1=%esi +mov 120(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1920] +# asm 1: mov 1920(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1920(<ap=%rdi),>temp2=%edx +mov 1920(%rdi),%edx + +# qhasm: mem64[ap + 1920] = temp1 +# asm 1: mov <temp1=int64#2,1920(<ap=int64#1) +# asm 2: mov <temp1=%esi,1920(<ap=%rdi) +mov %esi,1920(%rdi) + +# qhasm: mem64[ap + 120] = temp2 +# asm 1: mov <temp2=int64#3,120(<ap=int64#1) +# asm 2: mov <temp2=%edx,120(<ap=%rdi) +mov %edx,120(%rdi) + +# qhasm: temp1 = mem64[ap + 124] +# asm 1: mov 124(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 124(<ap=%rdi),>temp1=%esi +mov 124(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3968] +# asm 1: mov 3968(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3968(<ap=%rdi),>temp2=%edx +mov 3968(%rdi),%edx + +# qhasm: mem64[ap + 3968] = temp1 +# asm 1: mov <temp1=int64#2,3968(<ap=int64#1) +# asm 2: mov <temp1=%esi,3968(<ap=%rdi) +mov %esi,3968(%rdi) + +# qhasm: mem64[ap + 124] = temp2 +# asm 1: mov <temp2=int64#3,124(<ap=int64#1) +# asm 2: mov <temp2=%edx,124(<ap=%rdi) +mov %edx,124(%rdi) + +# qhasm: temp1 = mem64[ap + 132] +# asm 1: mov 132(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 132(<ap=%rdi),>temp1=%esi +mov 132(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2112] +# asm 1: mov 2112(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2112(<ap=%rdi),>temp2=%edx +mov 2112(%rdi),%edx + +# qhasm: mem64[ap + 2112] = temp1 +# asm 1: mov <temp1=int64#2,2112(<ap=int64#1) +# asm 2: mov <temp1=%esi,2112(<ap=%rdi) +mov %esi,2112(%rdi) + +# qhasm: mem64[ap + 132] = temp2 +# asm 1: mov <temp2=int64#3,132(<ap=int64#1) +# asm 2: mov <temp2=%edx,132(<ap=%rdi) +mov %edx,132(%rdi) + +# qhasm: temp1 = mem64[ap + 136] +# asm 1: mov 136(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 136(<ap=%rdi),>temp1=%esi +mov 136(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1088] +# asm 1: mov 1088(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1088(<ap=%rdi),>temp2=%edx +mov 1088(%rdi),%edx + +# qhasm: mem64[ap + 1088] = temp1 +# asm 1: mov <temp1=int64#2,1088(<ap=int64#1) +# asm 2: mov <temp1=%esi,1088(<ap=%rdi) +mov %esi,1088(%rdi) + +# qhasm: mem64[ap + 136] = temp2 +# asm 1: mov <temp2=int64#3,136(<ap=int64#1) +# asm 2: mov <temp2=%edx,136(<ap=%rdi) +mov %edx,136(%rdi) + +# qhasm: temp1 = mem64[ap + 140] +# asm 1: mov 140(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 140(<ap=%rdi),>temp1=%esi +mov 140(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3136] +# asm 1: mov 3136(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3136(<ap=%rdi),>temp2=%edx +mov 3136(%rdi),%edx + +# qhasm: mem64[ap + 3136] = temp1 +# asm 1: mov <temp1=int64#2,3136(<ap=int64#1) +# asm 2: mov <temp1=%esi,3136(<ap=%rdi) +mov %esi,3136(%rdi) + +# qhasm: mem64[ap + 140] = temp2 +# asm 1: mov <temp2=int64#3,140(<ap=int64#1) +# asm 2: mov <temp2=%edx,140(<ap=%rdi) +mov %edx,140(%rdi) + +# qhasm: temp1 = mem64[ap + 144] +# asm 1: mov 144(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 144(<ap=%rdi),>temp1=%esi +mov 144(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 576] +# asm 1: mov 576(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 576(<ap=%rdi),>temp2=%edx +mov 576(%rdi),%edx + +# qhasm: mem64[ap + 576] = temp1 +# asm 1: mov <temp1=int64#2,576(<ap=int64#1) +# asm 2: mov <temp1=%esi,576(<ap=%rdi) +mov %esi,576(%rdi) + +# qhasm: mem64[ap + 144] = temp2 +# asm 1: mov <temp2=int64#3,144(<ap=int64#1) +# asm 2: mov <temp2=%edx,144(<ap=%rdi) +mov %edx,144(%rdi) + +# qhasm: temp1 = mem64[ap + 148] +# asm 1: mov 148(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 148(<ap=%rdi),>temp1=%esi +mov 148(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2624] +# asm 1: mov 2624(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2624(<ap=%rdi),>temp2=%edx +mov 2624(%rdi),%edx + +# qhasm: mem64[ap + 2624] = temp1 +# asm 1: mov <temp1=int64#2,2624(<ap=int64#1) +# asm 2: mov <temp1=%esi,2624(<ap=%rdi) +mov %esi,2624(%rdi) + +# qhasm: mem64[ap + 148] = temp2 +# asm 1: mov <temp2=int64#3,148(<ap=int64#1) +# asm 2: mov <temp2=%edx,148(<ap=%rdi) +mov %edx,148(%rdi) + +# qhasm: temp1 = mem64[ap + 152] +# asm 1: mov 152(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 152(<ap=%rdi),>temp1=%esi +mov 152(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1600] +# asm 1: mov 1600(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1600(<ap=%rdi),>temp2=%edx +mov 1600(%rdi),%edx + +# qhasm: mem64[ap + 1600] = temp1 +# asm 1: mov <temp1=int64#2,1600(<ap=int64#1) +# asm 2: mov <temp1=%esi,1600(<ap=%rdi) +mov %esi,1600(%rdi) + +# qhasm: mem64[ap + 152] = temp2 +# asm 1: mov <temp2=int64#3,152(<ap=int64#1) +# asm 2: mov <temp2=%edx,152(<ap=%rdi) +mov %edx,152(%rdi) + +# qhasm: temp1 = mem64[ap + 156] +# asm 1: mov 156(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 156(<ap=%rdi),>temp1=%esi +mov 156(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3648] +# asm 1: mov 3648(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3648(<ap=%rdi),>temp2=%edx +mov 3648(%rdi),%edx + +# qhasm: mem64[ap + 3648] = temp1 +# asm 1: mov <temp1=int64#2,3648(<ap=int64#1) +# asm 2: mov <temp1=%esi,3648(<ap=%rdi) +mov %esi,3648(%rdi) + +# qhasm: mem64[ap + 156] = temp2 +# asm 1: mov <temp2=int64#3,156(<ap=int64#1) +# asm 2: mov <temp2=%edx,156(<ap=%rdi) +mov %edx,156(%rdi) + +# qhasm: temp1 = mem64[ap + 160] +# asm 1: mov 160(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 160(<ap=%rdi),>temp1=%esi +mov 160(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 320] +# asm 1: mov 320(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 320(<ap=%rdi),>temp2=%edx +mov 320(%rdi),%edx + +# qhasm: mem64[ap + 320] = temp1 +# asm 1: mov <temp1=int64#2,320(<ap=int64#1) +# asm 2: mov <temp1=%esi,320(<ap=%rdi) +mov %esi,320(%rdi) + +# qhasm: mem64[ap + 160] = temp2 +# asm 1: mov <temp2=int64#3,160(<ap=int64#1) +# asm 2: mov <temp2=%edx,160(<ap=%rdi) +mov %edx,160(%rdi) + +# qhasm: temp1 = mem64[ap + 164] +# asm 1: mov 164(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 164(<ap=%rdi),>temp1=%esi +mov 164(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2368] +# asm 1: mov 2368(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2368(<ap=%rdi),>temp2=%edx +mov 2368(%rdi),%edx + +# qhasm: mem64[ap + 2368] = temp1 +# asm 1: mov <temp1=int64#2,2368(<ap=int64#1) +# asm 2: mov <temp1=%esi,2368(<ap=%rdi) +mov %esi,2368(%rdi) + +# qhasm: mem64[ap + 164] = temp2 +# asm 1: mov <temp2=int64#3,164(<ap=int64#1) +# asm 2: mov <temp2=%edx,164(<ap=%rdi) +mov %edx,164(%rdi) + +# qhasm: temp1 = mem64[ap + 168] +# asm 1: mov 168(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 168(<ap=%rdi),>temp1=%esi +mov 168(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1344] +# asm 1: mov 1344(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1344(<ap=%rdi),>temp2=%edx +mov 1344(%rdi),%edx + +# qhasm: mem64[ap + 1344] = temp1 +# asm 1: mov <temp1=int64#2,1344(<ap=int64#1) +# asm 2: mov <temp1=%esi,1344(<ap=%rdi) +mov %esi,1344(%rdi) + +# qhasm: mem64[ap + 168] = temp2 +# asm 1: mov <temp2=int64#3,168(<ap=int64#1) +# asm 2: mov <temp2=%edx,168(<ap=%rdi) +mov %edx,168(%rdi) + +# qhasm: temp1 = mem64[ap + 172] +# asm 1: mov 172(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 172(<ap=%rdi),>temp1=%esi +mov 172(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3392] +# asm 1: mov 3392(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3392(<ap=%rdi),>temp2=%edx +mov 3392(%rdi),%edx + +# qhasm: mem64[ap + 3392] = temp1 +# asm 1: mov <temp1=int64#2,3392(<ap=int64#1) +# asm 2: mov <temp1=%esi,3392(<ap=%rdi) +mov %esi,3392(%rdi) + +# qhasm: mem64[ap + 172] = temp2 +# asm 1: mov <temp2=int64#3,172(<ap=int64#1) +# asm 2: mov <temp2=%edx,172(<ap=%rdi) +mov %edx,172(%rdi) + +# qhasm: temp1 = mem64[ap + 176] +# asm 1: mov 176(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 176(<ap=%rdi),>temp1=%esi +mov 176(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 832] +# asm 1: mov 832(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 832(<ap=%rdi),>temp2=%edx +mov 832(%rdi),%edx + +# qhasm: mem64[ap + 832] = temp1 +# asm 1: mov <temp1=int64#2,832(<ap=int64#1) +# asm 2: mov <temp1=%esi,832(<ap=%rdi) +mov %esi,832(%rdi) + +# qhasm: mem64[ap + 176] = temp2 +# asm 1: mov <temp2=int64#3,176(<ap=int64#1) +# asm 2: mov <temp2=%edx,176(<ap=%rdi) +mov %edx,176(%rdi) + +# qhasm: temp1 = mem64[ap + 180] +# asm 1: mov 180(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 180(<ap=%rdi),>temp1=%esi +mov 180(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2880] +# asm 1: mov 2880(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2880(<ap=%rdi),>temp2=%edx +mov 2880(%rdi),%edx + +# qhasm: mem64[ap + 2880] = temp1 +# asm 1: mov <temp1=int64#2,2880(<ap=int64#1) +# asm 2: mov <temp1=%esi,2880(<ap=%rdi) +mov %esi,2880(%rdi) + +# qhasm: mem64[ap + 180] = temp2 +# asm 1: mov <temp2=int64#3,180(<ap=int64#1) +# asm 2: mov <temp2=%edx,180(<ap=%rdi) +mov %edx,180(%rdi) + +# qhasm: temp1 = mem64[ap + 184] +# asm 1: mov 184(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 184(<ap=%rdi),>temp1=%esi +mov 184(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1856] +# asm 1: mov 1856(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1856(<ap=%rdi),>temp2=%edx +mov 1856(%rdi),%edx + +# qhasm: mem64[ap + 1856] = temp1 +# asm 1: mov <temp1=int64#2,1856(<ap=int64#1) +# asm 2: mov <temp1=%esi,1856(<ap=%rdi) +mov %esi,1856(%rdi) + +# qhasm: mem64[ap + 184] = temp2 +# asm 1: mov <temp2=int64#3,184(<ap=int64#1) +# asm 2: mov <temp2=%edx,184(<ap=%rdi) +mov %edx,184(%rdi) + +# qhasm: temp1 = mem64[ap + 188] +# asm 1: mov 188(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 188(<ap=%rdi),>temp1=%esi +mov 188(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3904] +# asm 1: mov 3904(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3904(<ap=%rdi),>temp2=%edx +mov 3904(%rdi),%edx + +# qhasm: mem64[ap + 3904] = temp1 +# asm 1: mov <temp1=int64#2,3904(<ap=int64#1) +# asm 2: mov <temp1=%esi,3904(<ap=%rdi) +mov %esi,3904(%rdi) + +# qhasm: mem64[ap + 188] = temp2 +# asm 1: mov <temp2=int64#3,188(<ap=int64#1) +# asm 2: mov <temp2=%edx,188(<ap=%rdi) +mov %edx,188(%rdi) + +# qhasm: temp1 = mem64[ap + 196] +# asm 1: mov 196(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 196(<ap=%rdi),>temp1=%esi +mov 196(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2240] +# asm 1: mov 2240(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2240(<ap=%rdi),>temp2=%edx +mov 2240(%rdi),%edx + +# qhasm: mem64[ap + 2240] = temp1 +# asm 1: mov <temp1=int64#2,2240(<ap=int64#1) +# asm 2: mov <temp1=%esi,2240(<ap=%rdi) +mov %esi,2240(%rdi) + +# qhasm: mem64[ap + 196] = temp2 +# asm 1: mov <temp2=int64#3,196(<ap=int64#1) +# asm 2: mov <temp2=%edx,196(<ap=%rdi) +mov %edx,196(%rdi) + +# qhasm: temp1 = mem64[ap + 200] +# asm 1: mov 200(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 200(<ap=%rdi),>temp1=%esi +mov 200(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1216] +# asm 1: mov 1216(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1216(<ap=%rdi),>temp2=%edx +mov 1216(%rdi),%edx + +# qhasm: mem64[ap + 1216] = temp1 +# asm 1: mov <temp1=int64#2,1216(<ap=int64#1) +# asm 2: mov <temp1=%esi,1216(<ap=%rdi) +mov %esi,1216(%rdi) + +# qhasm: mem64[ap + 200] = temp2 +# asm 1: mov <temp2=int64#3,200(<ap=int64#1) +# asm 2: mov <temp2=%edx,200(<ap=%rdi) +mov %edx,200(%rdi) + +# qhasm: temp1 = mem64[ap + 204] +# asm 1: mov 204(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 204(<ap=%rdi),>temp1=%esi +mov 204(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3264] +# asm 1: mov 3264(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3264(<ap=%rdi),>temp2=%edx +mov 3264(%rdi),%edx + +# qhasm: mem64[ap + 3264] = temp1 +# asm 1: mov <temp1=int64#2,3264(<ap=int64#1) +# asm 2: mov <temp1=%esi,3264(<ap=%rdi) +mov %esi,3264(%rdi) + +# qhasm: mem64[ap + 204] = temp2 +# asm 1: mov <temp2=int64#3,204(<ap=int64#1) +# asm 2: mov <temp2=%edx,204(<ap=%rdi) +mov %edx,204(%rdi) + +# qhasm: temp1 = mem64[ap + 208] +# asm 1: mov 208(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 208(<ap=%rdi),>temp1=%esi +mov 208(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 704] +# asm 1: mov 704(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 704(<ap=%rdi),>temp2=%edx +mov 704(%rdi),%edx + +# qhasm: mem64[ap + 704] = temp1 +# asm 1: mov <temp1=int64#2,704(<ap=int64#1) +# asm 2: mov <temp1=%esi,704(<ap=%rdi) +mov %esi,704(%rdi) + +# qhasm: mem64[ap + 208] = temp2 +# asm 1: mov <temp2=int64#3,208(<ap=int64#1) +# asm 2: mov <temp2=%edx,208(<ap=%rdi) +mov %edx,208(%rdi) + +# qhasm: temp1 = mem64[ap + 212] +# asm 1: mov 212(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 212(<ap=%rdi),>temp1=%esi +mov 212(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2752] +# asm 1: mov 2752(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2752(<ap=%rdi),>temp2=%edx +mov 2752(%rdi),%edx + +# qhasm: mem64[ap + 2752] = temp1 +# asm 1: mov <temp1=int64#2,2752(<ap=int64#1) +# asm 2: mov <temp1=%esi,2752(<ap=%rdi) +mov %esi,2752(%rdi) + +# qhasm: mem64[ap + 212] = temp2 +# asm 1: mov <temp2=int64#3,212(<ap=int64#1) +# asm 2: mov <temp2=%edx,212(<ap=%rdi) +mov %edx,212(%rdi) + +# qhasm: temp1 = mem64[ap + 216] +# asm 1: mov 216(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 216(<ap=%rdi),>temp1=%esi +mov 216(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1728] +# asm 1: mov 1728(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1728(<ap=%rdi),>temp2=%edx +mov 1728(%rdi),%edx + +# qhasm: mem64[ap + 1728] = temp1 +# asm 1: mov <temp1=int64#2,1728(<ap=int64#1) +# asm 2: mov <temp1=%esi,1728(<ap=%rdi) +mov %esi,1728(%rdi) + +# qhasm: mem64[ap + 216] = temp2 +# asm 1: mov <temp2=int64#3,216(<ap=int64#1) +# asm 2: mov <temp2=%edx,216(<ap=%rdi) +mov %edx,216(%rdi) + +# qhasm: temp1 = mem64[ap + 220] +# asm 1: mov 220(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 220(<ap=%rdi),>temp1=%esi +mov 220(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3776] +# asm 1: mov 3776(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3776(<ap=%rdi),>temp2=%edx +mov 3776(%rdi),%edx + +# qhasm: mem64[ap + 3776] = temp1 +# asm 1: mov <temp1=int64#2,3776(<ap=int64#1) +# asm 2: mov <temp1=%esi,3776(<ap=%rdi) +mov %esi,3776(%rdi) + +# qhasm: mem64[ap + 220] = temp2 +# asm 1: mov <temp2=int64#3,220(<ap=int64#1) +# asm 2: mov <temp2=%edx,220(<ap=%rdi) +mov %edx,220(%rdi) + +# qhasm: temp1 = mem64[ap + 224] +# asm 1: mov 224(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 224(<ap=%rdi),>temp1=%esi +mov 224(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 448] +# asm 1: mov 448(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 448(<ap=%rdi),>temp2=%edx +mov 448(%rdi),%edx + +# qhasm: mem64[ap + 448] = temp1 +# asm 1: mov <temp1=int64#2,448(<ap=int64#1) +# asm 2: mov <temp1=%esi,448(<ap=%rdi) +mov %esi,448(%rdi) + +# qhasm: mem64[ap + 224] = temp2 +# asm 1: mov <temp2=int64#3,224(<ap=int64#1) +# asm 2: mov <temp2=%edx,224(<ap=%rdi) +mov %edx,224(%rdi) + +# qhasm: temp1 = mem64[ap + 228] +# asm 1: mov 228(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 228(<ap=%rdi),>temp1=%esi +mov 228(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2496] +# asm 1: mov 2496(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2496(<ap=%rdi),>temp2=%edx +mov 2496(%rdi),%edx + +# qhasm: mem64[ap + 2496] = temp1 +# asm 1: mov <temp1=int64#2,2496(<ap=int64#1) +# asm 2: mov <temp1=%esi,2496(<ap=%rdi) +mov %esi,2496(%rdi) + +# qhasm: mem64[ap + 228] = temp2 +# asm 1: mov <temp2=int64#3,228(<ap=int64#1) +# asm 2: mov <temp2=%edx,228(<ap=%rdi) +mov %edx,228(%rdi) + +# qhasm: temp1 = mem64[ap + 232] +# asm 1: mov 232(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 232(<ap=%rdi),>temp1=%esi +mov 232(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1472] +# asm 1: mov 1472(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1472(<ap=%rdi),>temp2=%edx +mov 1472(%rdi),%edx + +# qhasm: mem64[ap + 1472] = temp1 +# asm 1: mov <temp1=int64#2,1472(<ap=int64#1) +# asm 2: mov <temp1=%esi,1472(<ap=%rdi) +mov %esi,1472(%rdi) + +# qhasm: mem64[ap + 232] = temp2 +# asm 1: mov <temp2=int64#3,232(<ap=int64#1) +# asm 2: mov <temp2=%edx,232(<ap=%rdi) +mov %edx,232(%rdi) + +# qhasm: temp1 = mem64[ap + 236] +# asm 1: mov 236(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 236(<ap=%rdi),>temp1=%esi +mov 236(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3520] +# asm 1: mov 3520(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3520(<ap=%rdi),>temp2=%edx +mov 3520(%rdi),%edx + +# qhasm: mem64[ap + 3520] = temp1 +# asm 1: mov <temp1=int64#2,3520(<ap=int64#1) +# asm 2: mov <temp1=%esi,3520(<ap=%rdi) +mov %esi,3520(%rdi) + +# qhasm: mem64[ap + 236] = temp2 +# asm 1: mov <temp2=int64#3,236(<ap=int64#1) +# asm 2: mov <temp2=%edx,236(<ap=%rdi) +mov %edx,236(%rdi) + +# qhasm: temp1 = mem64[ap + 240] +# asm 1: mov 240(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 240(<ap=%rdi),>temp1=%esi +mov 240(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 960] +# asm 1: mov 960(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 960(<ap=%rdi),>temp2=%edx +mov 960(%rdi),%edx + +# qhasm: mem64[ap + 960] = temp1 +# asm 1: mov <temp1=int64#2,960(<ap=int64#1) +# asm 2: mov <temp1=%esi,960(<ap=%rdi) +mov %esi,960(%rdi) + +# qhasm: mem64[ap + 240] = temp2 +# asm 1: mov <temp2=int64#3,240(<ap=int64#1) +# asm 2: mov <temp2=%edx,240(<ap=%rdi) +mov %edx,240(%rdi) + +# qhasm: temp1 = mem64[ap + 244] +# asm 1: mov 244(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 244(<ap=%rdi),>temp1=%esi +mov 244(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3008] +# asm 1: mov 3008(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3008(<ap=%rdi),>temp2=%edx +mov 3008(%rdi),%edx + +# qhasm: mem64[ap + 3008] = temp1 +# asm 1: mov <temp1=int64#2,3008(<ap=int64#1) +# asm 2: mov <temp1=%esi,3008(<ap=%rdi) +mov %esi,3008(%rdi) + +# qhasm: mem64[ap + 244] = temp2 +# asm 1: mov <temp2=int64#3,244(<ap=int64#1) +# asm 2: mov <temp2=%edx,244(<ap=%rdi) +mov %edx,244(%rdi) + +# qhasm: temp1 = mem64[ap + 248] +# asm 1: mov 248(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 248(<ap=%rdi),>temp1=%esi +mov 248(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1984] +# asm 1: mov 1984(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1984(<ap=%rdi),>temp2=%edx +mov 1984(%rdi),%edx + +# qhasm: mem64[ap + 1984] = temp1 +# asm 1: mov <temp1=int64#2,1984(<ap=int64#1) +# asm 2: mov <temp1=%esi,1984(<ap=%rdi) +mov %esi,1984(%rdi) + +# qhasm: mem64[ap + 248] = temp2 +# asm 1: mov <temp2=int64#3,248(<ap=int64#1) +# asm 2: mov <temp2=%edx,248(<ap=%rdi) +mov %edx,248(%rdi) + +# qhasm: temp1 = mem64[ap + 252] +# asm 1: mov 252(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 252(<ap=%rdi),>temp1=%esi +mov 252(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4032] +# asm 1: mov 4032(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4032(<ap=%rdi),>temp2=%edx +mov 4032(%rdi),%edx + +# qhasm: mem64[ap + 4032] = temp1 +# asm 1: mov <temp1=int64#2,4032(<ap=int64#1) +# asm 2: mov <temp1=%esi,4032(<ap=%rdi) +mov %esi,4032(%rdi) + +# qhasm: mem64[ap + 252] = temp2 +# asm 1: mov <temp2=int64#3,252(<ap=int64#1) +# asm 2: mov <temp2=%edx,252(<ap=%rdi) +mov %edx,252(%rdi) + +# qhasm: temp1 = mem64[ap + 260] +# asm 1: mov 260(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 260(<ap=%rdi),>temp1=%esi +mov 260(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2080] +# asm 1: mov 2080(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2080(<ap=%rdi),>temp2=%edx +mov 2080(%rdi),%edx + +# qhasm: mem64[ap + 2080] = temp1 +# asm 1: mov <temp1=int64#2,2080(<ap=int64#1) +# asm 2: mov <temp1=%esi,2080(<ap=%rdi) +mov %esi,2080(%rdi) + +# qhasm: mem64[ap + 260] = temp2 +# asm 1: mov <temp2=int64#3,260(<ap=int64#1) +# asm 2: mov <temp2=%edx,260(<ap=%rdi) +mov %edx,260(%rdi) + +# qhasm: temp1 = mem64[ap + 264] +# asm 1: mov 264(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 264(<ap=%rdi),>temp1=%esi +mov 264(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1056] +# asm 1: mov 1056(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1056(<ap=%rdi),>temp2=%edx +mov 1056(%rdi),%edx + +# qhasm: mem64[ap + 1056] = temp1 +# asm 1: mov <temp1=int64#2,1056(<ap=int64#1) +# asm 2: mov <temp1=%esi,1056(<ap=%rdi) +mov %esi,1056(%rdi) + +# qhasm: mem64[ap + 264] = temp2 +# asm 1: mov <temp2=int64#3,264(<ap=int64#1) +# asm 2: mov <temp2=%edx,264(<ap=%rdi) +mov %edx,264(%rdi) + +# qhasm: temp1 = mem64[ap + 268] +# asm 1: mov 268(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 268(<ap=%rdi),>temp1=%esi +mov 268(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3104] +# asm 1: mov 3104(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3104(<ap=%rdi),>temp2=%edx +mov 3104(%rdi),%edx + +# qhasm: mem64[ap + 3104] = temp1 +# asm 1: mov <temp1=int64#2,3104(<ap=int64#1) +# asm 2: mov <temp1=%esi,3104(<ap=%rdi) +mov %esi,3104(%rdi) + +# qhasm: mem64[ap + 268] = temp2 +# asm 1: mov <temp2=int64#3,268(<ap=int64#1) +# asm 2: mov <temp2=%edx,268(<ap=%rdi) +mov %edx,268(%rdi) + +# qhasm: temp1 = mem64[ap + 272] +# asm 1: mov 272(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 272(<ap=%rdi),>temp1=%esi +mov 272(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 544] +# asm 1: mov 544(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 544(<ap=%rdi),>temp2=%edx +mov 544(%rdi),%edx + +# qhasm: mem64[ap + 544] = temp1 +# asm 1: mov <temp1=int64#2,544(<ap=int64#1) +# asm 2: mov <temp1=%esi,544(<ap=%rdi) +mov %esi,544(%rdi) + +# qhasm: mem64[ap + 272] = temp2 +# asm 1: mov <temp2=int64#3,272(<ap=int64#1) +# asm 2: mov <temp2=%edx,272(<ap=%rdi) +mov %edx,272(%rdi) + +# qhasm: temp1 = mem64[ap + 276] +# asm 1: mov 276(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 276(<ap=%rdi),>temp1=%esi +mov 276(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2592] +# asm 1: mov 2592(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2592(<ap=%rdi),>temp2=%edx +mov 2592(%rdi),%edx + +# qhasm: mem64[ap + 2592] = temp1 +# asm 1: mov <temp1=int64#2,2592(<ap=int64#1) +# asm 2: mov <temp1=%esi,2592(<ap=%rdi) +mov %esi,2592(%rdi) + +# qhasm: mem64[ap + 276] = temp2 +# asm 1: mov <temp2=int64#3,276(<ap=int64#1) +# asm 2: mov <temp2=%edx,276(<ap=%rdi) +mov %edx,276(%rdi) + +# qhasm: temp1 = mem64[ap + 280] +# asm 1: mov 280(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 280(<ap=%rdi),>temp1=%esi +mov 280(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1568] +# asm 1: mov 1568(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1568(<ap=%rdi),>temp2=%edx +mov 1568(%rdi),%edx + +# qhasm: mem64[ap + 1568] = temp1 +# asm 1: mov <temp1=int64#2,1568(<ap=int64#1) +# asm 2: mov <temp1=%esi,1568(<ap=%rdi) +mov %esi,1568(%rdi) + +# qhasm: mem64[ap + 280] = temp2 +# asm 1: mov <temp2=int64#3,280(<ap=int64#1) +# asm 2: mov <temp2=%edx,280(<ap=%rdi) +mov %edx,280(%rdi) + +# qhasm: temp1 = mem64[ap + 284] +# asm 1: mov 284(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 284(<ap=%rdi),>temp1=%esi +mov 284(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3616] +# asm 1: mov 3616(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3616(<ap=%rdi),>temp2=%edx +mov 3616(%rdi),%edx + +# qhasm: mem64[ap + 3616] = temp1 +# asm 1: mov <temp1=int64#2,3616(<ap=int64#1) +# asm 2: mov <temp1=%esi,3616(<ap=%rdi) +mov %esi,3616(%rdi) + +# qhasm: mem64[ap + 284] = temp2 +# asm 1: mov <temp2=int64#3,284(<ap=int64#1) +# asm 2: mov <temp2=%edx,284(<ap=%rdi) +mov %edx,284(%rdi) + +# qhasm: temp1 = mem64[ap + 292] +# asm 1: mov 292(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 292(<ap=%rdi),>temp1=%esi +mov 292(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2336] +# asm 1: mov 2336(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2336(<ap=%rdi),>temp2=%edx +mov 2336(%rdi),%edx + +# qhasm: mem64[ap + 2336] = temp1 +# asm 1: mov <temp1=int64#2,2336(<ap=int64#1) +# asm 2: mov <temp1=%esi,2336(<ap=%rdi) +mov %esi,2336(%rdi) + +# qhasm: mem64[ap + 292] = temp2 +# asm 1: mov <temp2=int64#3,292(<ap=int64#1) +# asm 2: mov <temp2=%edx,292(<ap=%rdi) +mov %edx,292(%rdi) + +# qhasm: temp1 = mem64[ap + 296] +# asm 1: mov 296(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 296(<ap=%rdi),>temp1=%esi +mov 296(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1312] +# asm 1: mov 1312(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1312(<ap=%rdi),>temp2=%edx +mov 1312(%rdi),%edx + +# qhasm: mem64[ap + 1312] = temp1 +# asm 1: mov <temp1=int64#2,1312(<ap=int64#1) +# asm 2: mov <temp1=%esi,1312(<ap=%rdi) +mov %esi,1312(%rdi) + +# qhasm: mem64[ap + 296] = temp2 +# asm 1: mov <temp2=int64#3,296(<ap=int64#1) +# asm 2: mov <temp2=%edx,296(<ap=%rdi) +mov %edx,296(%rdi) + +# qhasm: temp1 = mem64[ap + 300] +# asm 1: mov 300(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 300(<ap=%rdi),>temp1=%esi +mov 300(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3360] +# asm 1: mov 3360(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3360(<ap=%rdi),>temp2=%edx +mov 3360(%rdi),%edx + +# qhasm: mem64[ap + 3360] = temp1 +# asm 1: mov <temp1=int64#2,3360(<ap=int64#1) +# asm 2: mov <temp1=%esi,3360(<ap=%rdi) +mov %esi,3360(%rdi) + +# qhasm: mem64[ap + 300] = temp2 +# asm 1: mov <temp2=int64#3,300(<ap=int64#1) +# asm 2: mov <temp2=%edx,300(<ap=%rdi) +mov %edx,300(%rdi) + +# qhasm: temp1 = mem64[ap + 304] +# asm 1: mov 304(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 304(<ap=%rdi),>temp1=%esi +mov 304(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 800] +# asm 1: mov 800(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 800(<ap=%rdi),>temp2=%edx +mov 800(%rdi),%edx + +# qhasm: mem64[ap + 800] = temp1 +# asm 1: mov <temp1=int64#2,800(<ap=int64#1) +# asm 2: mov <temp1=%esi,800(<ap=%rdi) +mov %esi,800(%rdi) + +# qhasm: mem64[ap + 304] = temp2 +# asm 1: mov <temp2=int64#3,304(<ap=int64#1) +# asm 2: mov <temp2=%edx,304(<ap=%rdi) +mov %edx,304(%rdi) + +# qhasm: temp1 = mem64[ap + 308] +# asm 1: mov 308(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 308(<ap=%rdi),>temp1=%esi +mov 308(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2848] +# asm 1: mov 2848(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2848(<ap=%rdi),>temp2=%edx +mov 2848(%rdi),%edx + +# qhasm: mem64[ap + 2848] = temp1 +# asm 1: mov <temp1=int64#2,2848(<ap=int64#1) +# asm 2: mov <temp1=%esi,2848(<ap=%rdi) +mov %esi,2848(%rdi) + +# qhasm: mem64[ap + 308] = temp2 +# asm 1: mov <temp2=int64#3,308(<ap=int64#1) +# asm 2: mov <temp2=%edx,308(<ap=%rdi) +mov %edx,308(%rdi) + +# qhasm: temp1 = mem64[ap + 312] +# asm 1: mov 312(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 312(<ap=%rdi),>temp1=%esi +mov 312(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1824] +# asm 1: mov 1824(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1824(<ap=%rdi),>temp2=%edx +mov 1824(%rdi),%edx + +# qhasm: mem64[ap + 1824] = temp1 +# asm 1: mov <temp1=int64#2,1824(<ap=int64#1) +# asm 2: mov <temp1=%esi,1824(<ap=%rdi) +mov %esi,1824(%rdi) + +# qhasm: mem64[ap + 312] = temp2 +# asm 1: mov <temp2=int64#3,312(<ap=int64#1) +# asm 2: mov <temp2=%edx,312(<ap=%rdi) +mov %edx,312(%rdi) + +# qhasm: temp1 = mem64[ap + 316] +# asm 1: mov 316(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 316(<ap=%rdi),>temp1=%esi +mov 316(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3872] +# asm 1: mov 3872(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3872(<ap=%rdi),>temp2=%edx +mov 3872(%rdi),%edx + +# qhasm: mem64[ap + 3872] = temp1 +# asm 1: mov <temp1=int64#2,3872(<ap=int64#1) +# asm 2: mov <temp1=%esi,3872(<ap=%rdi) +mov %esi,3872(%rdi) + +# qhasm: mem64[ap + 316] = temp2 +# asm 1: mov <temp2=int64#3,316(<ap=int64#1) +# asm 2: mov <temp2=%edx,316(<ap=%rdi) +mov %edx,316(%rdi) + +# qhasm: temp1 = mem64[ap + 324] +# asm 1: mov 324(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 324(<ap=%rdi),>temp1=%esi +mov 324(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2208] +# asm 1: mov 2208(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2208(<ap=%rdi),>temp2=%edx +mov 2208(%rdi),%edx + +# qhasm: mem64[ap + 2208] = temp1 +# asm 1: mov <temp1=int64#2,2208(<ap=int64#1) +# asm 2: mov <temp1=%esi,2208(<ap=%rdi) +mov %esi,2208(%rdi) + +# qhasm: mem64[ap + 324] = temp2 +# asm 1: mov <temp2=int64#3,324(<ap=int64#1) +# asm 2: mov <temp2=%edx,324(<ap=%rdi) +mov %edx,324(%rdi) + +# qhasm: temp1 = mem64[ap + 328] +# asm 1: mov 328(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 328(<ap=%rdi),>temp1=%esi +mov 328(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1184] +# asm 1: mov 1184(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1184(<ap=%rdi),>temp2=%edx +mov 1184(%rdi),%edx + +# qhasm: mem64[ap + 1184] = temp1 +# asm 1: mov <temp1=int64#2,1184(<ap=int64#1) +# asm 2: mov <temp1=%esi,1184(<ap=%rdi) +mov %esi,1184(%rdi) + +# qhasm: mem64[ap + 328] = temp2 +# asm 1: mov <temp2=int64#3,328(<ap=int64#1) +# asm 2: mov <temp2=%edx,328(<ap=%rdi) +mov %edx,328(%rdi) + +# qhasm: temp1 = mem64[ap + 332] +# asm 1: mov 332(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 332(<ap=%rdi),>temp1=%esi +mov 332(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3232] +# asm 1: mov 3232(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3232(<ap=%rdi),>temp2=%edx +mov 3232(%rdi),%edx + +# qhasm: mem64[ap + 3232] = temp1 +# asm 1: mov <temp1=int64#2,3232(<ap=int64#1) +# asm 2: mov <temp1=%esi,3232(<ap=%rdi) +mov %esi,3232(%rdi) + +# qhasm: mem64[ap + 332] = temp2 +# asm 1: mov <temp2=int64#3,332(<ap=int64#1) +# asm 2: mov <temp2=%edx,332(<ap=%rdi) +mov %edx,332(%rdi) + +# qhasm: temp1 = mem64[ap + 336] +# asm 1: mov 336(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 336(<ap=%rdi),>temp1=%esi +mov 336(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 672] +# asm 1: mov 672(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 672(<ap=%rdi),>temp2=%edx +mov 672(%rdi),%edx + +# qhasm: mem64[ap + 672] = temp1 +# asm 1: mov <temp1=int64#2,672(<ap=int64#1) +# asm 2: mov <temp1=%esi,672(<ap=%rdi) +mov %esi,672(%rdi) + +# qhasm: mem64[ap + 336] = temp2 +# asm 1: mov <temp2=int64#3,336(<ap=int64#1) +# asm 2: mov <temp2=%edx,336(<ap=%rdi) +mov %edx,336(%rdi) + +# qhasm: temp1 = mem64[ap + 340] +# asm 1: mov 340(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 340(<ap=%rdi),>temp1=%esi +mov 340(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2720] +# asm 1: mov 2720(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2720(<ap=%rdi),>temp2=%edx +mov 2720(%rdi),%edx + +# qhasm: mem64[ap + 2720] = temp1 +# asm 1: mov <temp1=int64#2,2720(<ap=int64#1) +# asm 2: mov <temp1=%esi,2720(<ap=%rdi) +mov %esi,2720(%rdi) + +# qhasm: mem64[ap + 340] = temp2 +# asm 1: mov <temp2=int64#3,340(<ap=int64#1) +# asm 2: mov <temp2=%edx,340(<ap=%rdi) +mov %edx,340(%rdi) + +# qhasm: temp1 = mem64[ap + 344] +# asm 1: mov 344(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 344(<ap=%rdi),>temp1=%esi +mov 344(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1696] +# asm 1: mov 1696(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1696(<ap=%rdi),>temp2=%edx +mov 1696(%rdi),%edx + +# qhasm: mem64[ap + 1696] = temp1 +# asm 1: mov <temp1=int64#2,1696(<ap=int64#1) +# asm 2: mov <temp1=%esi,1696(<ap=%rdi) +mov %esi,1696(%rdi) + +# qhasm: mem64[ap + 344] = temp2 +# asm 1: mov <temp2=int64#3,344(<ap=int64#1) +# asm 2: mov <temp2=%edx,344(<ap=%rdi) +mov %edx,344(%rdi) + +# qhasm: temp1 = mem64[ap + 348] +# asm 1: mov 348(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 348(<ap=%rdi),>temp1=%esi +mov 348(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3744] +# asm 1: mov 3744(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3744(<ap=%rdi),>temp2=%edx +mov 3744(%rdi),%edx + +# qhasm: mem64[ap + 3744] = temp1 +# asm 1: mov <temp1=int64#2,3744(<ap=int64#1) +# asm 2: mov <temp1=%esi,3744(<ap=%rdi) +mov %esi,3744(%rdi) + +# qhasm: mem64[ap + 348] = temp2 +# asm 1: mov <temp2=int64#3,348(<ap=int64#1) +# asm 2: mov <temp2=%edx,348(<ap=%rdi) +mov %edx,348(%rdi) + +# qhasm: temp1 = mem64[ap + 352] +# asm 1: mov 352(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 352(<ap=%rdi),>temp1=%esi +mov 352(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 416] +# asm 1: mov 416(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 416(<ap=%rdi),>temp2=%edx +mov 416(%rdi),%edx + +# qhasm: mem64[ap + 416] = temp1 +# asm 1: mov <temp1=int64#2,416(<ap=int64#1) +# asm 2: mov <temp1=%esi,416(<ap=%rdi) +mov %esi,416(%rdi) + +# qhasm: mem64[ap + 352] = temp2 +# asm 1: mov <temp2=int64#3,352(<ap=int64#1) +# asm 2: mov <temp2=%edx,352(<ap=%rdi) +mov %edx,352(%rdi) + +# qhasm: temp1 = mem64[ap + 356] +# asm 1: mov 356(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 356(<ap=%rdi),>temp1=%esi +mov 356(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2464] +# asm 1: mov 2464(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2464(<ap=%rdi),>temp2=%edx +mov 2464(%rdi),%edx + +# qhasm: mem64[ap + 2464] = temp1 +# asm 1: mov <temp1=int64#2,2464(<ap=int64#1) +# asm 2: mov <temp1=%esi,2464(<ap=%rdi) +mov %esi,2464(%rdi) + +# qhasm: mem64[ap + 356] = temp2 +# asm 1: mov <temp2=int64#3,356(<ap=int64#1) +# asm 2: mov <temp2=%edx,356(<ap=%rdi) +mov %edx,356(%rdi) + +# qhasm: temp1 = mem64[ap + 360] +# asm 1: mov 360(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 360(<ap=%rdi),>temp1=%esi +mov 360(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1440] +# asm 1: mov 1440(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1440(<ap=%rdi),>temp2=%edx +mov 1440(%rdi),%edx + +# qhasm: mem64[ap + 1440] = temp1 +# asm 1: mov <temp1=int64#2,1440(<ap=int64#1) +# asm 2: mov <temp1=%esi,1440(<ap=%rdi) +mov %esi,1440(%rdi) + +# qhasm: mem64[ap + 360] = temp2 +# asm 1: mov <temp2=int64#3,360(<ap=int64#1) +# asm 2: mov <temp2=%edx,360(<ap=%rdi) +mov %edx,360(%rdi) + +# qhasm: temp1 = mem64[ap + 364] +# asm 1: mov 364(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 364(<ap=%rdi),>temp1=%esi +mov 364(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3488] +# asm 1: mov 3488(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3488(<ap=%rdi),>temp2=%edx +mov 3488(%rdi),%edx + +# qhasm: mem64[ap + 3488] = temp1 +# asm 1: mov <temp1=int64#2,3488(<ap=int64#1) +# asm 2: mov <temp1=%esi,3488(<ap=%rdi) +mov %esi,3488(%rdi) + +# qhasm: mem64[ap + 364] = temp2 +# asm 1: mov <temp2=int64#3,364(<ap=int64#1) +# asm 2: mov <temp2=%edx,364(<ap=%rdi) +mov %edx,364(%rdi) + +# qhasm: temp1 = mem64[ap + 368] +# asm 1: mov 368(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 368(<ap=%rdi),>temp1=%esi +mov 368(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 928] +# asm 1: mov 928(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 928(<ap=%rdi),>temp2=%edx +mov 928(%rdi),%edx + +# qhasm: mem64[ap + 928] = temp1 +# asm 1: mov <temp1=int64#2,928(<ap=int64#1) +# asm 2: mov <temp1=%esi,928(<ap=%rdi) +mov %esi,928(%rdi) + +# qhasm: mem64[ap + 368] = temp2 +# asm 1: mov <temp2=int64#3,368(<ap=int64#1) +# asm 2: mov <temp2=%edx,368(<ap=%rdi) +mov %edx,368(%rdi) + +# qhasm: temp1 = mem64[ap + 372] +# asm 1: mov 372(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 372(<ap=%rdi),>temp1=%esi +mov 372(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2976] +# asm 1: mov 2976(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2976(<ap=%rdi),>temp2=%edx +mov 2976(%rdi),%edx + +# qhasm: mem64[ap + 2976] = temp1 +# asm 1: mov <temp1=int64#2,2976(<ap=int64#1) +# asm 2: mov <temp1=%esi,2976(<ap=%rdi) +mov %esi,2976(%rdi) + +# qhasm: mem64[ap + 372] = temp2 +# asm 1: mov <temp2=int64#3,372(<ap=int64#1) +# asm 2: mov <temp2=%edx,372(<ap=%rdi) +mov %edx,372(%rdi) + +# qhasm: temp1 = mem64[ap + 376] +# asm 1: mov 376(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 376(<ap=%rdi),>temp1=%esi +mov 376(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1952] +# asm 1: mov 1952(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1952(<ap=%rdi),>temp2=%edx +mov 1952(%rdi),%edx + +# qhasm: mem64[ap + 1952] = temp1 +# asm 1: mov <temp1=int64#2,1952(<ap=int64#1) +# asm 2: mov <temp1=%esi,1952(<ap=%rdi) +mov %esi,1952(%rdi) + +# qhasm: mem64[ap + 376] = temp2 +# asm 1: mov <temp2=int64#3,376(<ap=int64#1) +# asm 2: mov <temp2=%edx,376(<ap=%rdi) +mov %edx,376(%rdi) + +# qhasm: temp1 = mem64[ap + 380] +# asm 1: mov 380(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 380(<ap=%rdi),>temp1=%esi +mov 380(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4000] +# asm 1: mov 4000(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4000(<ap=%rdi),>temp2=%edx +mov 4000(%rdi),%edx + +# qhasm: mem64[ap + 4000] = temp1 +# asm 1: mov <temp1=int64#2,4000(<ap=int64#1) +# asm 2: mov <temp1=%esi,4000(<ap=%rdi) +mov %esi,4000(%rdi) + +# qhasm: mem64[ap + 380] = temp2 +# asm 1: mov <temp2=int64#3,380(<ap=int64#1) +# asm 2: mov <temp2=%edx,380(<ap=%rdi) +mov %edx,380(%rdi) + +# qhasm: temp1 = mem64[ap + 388] +# asm 1: mov 388(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 388(<ap=%rdi),>temp1=%esi +mov 388(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2144] +# asm 1: mov 2144(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2144(<ap=%rdi),>temp2=%edx +mov 2144(%rdi),%edx + +# qhasm: mem64[ap + 2144] = temp1 +# asm 1: mov <temp1=int64#2,2144(<ap=int64#1) +# asm 2: mov <temp1=%esi,2144(<ap=%rdi) +mov %esi,2144(%rdi) + +# qhasm: mem64[ap + 388] = temp2 +# asm 1: mov <temp2=int64#3,388(<ap=int64#1) +# asm 2: mov <temp2=%edx,388(<ap=%rdi) +mov %edx,388(%rdi) + +# qhasm: temp1 = mem64[ap + 392] +# asm 1: mov 392(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 392(<ap=%rdi),>temp1=%esi +mov 392(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1120] +# asm 1: mov 1120(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1120(<ap=%rdi),>temp2=%edx +mov 1120(%rdi),%edx + +# qhasm: mem64[ap + 1120] = temp1 +# asm 1: mov <temp1=int64#2,1120(<ap=int64#1) +# asm 2: mov <temp1=%esi,1120(<ap=%rdi) +mov %esi,1120(%rdi) + +# qhasm: mem64[ap + 392] = temp2 +# asm 1: mov <temp2=int64#3,392(<ap=int64#1) +# asm 2: mov <temp2=%edx,392(<ap=%rdi) +mov %edx,392(%rdi) + +# qhasm: temp1 = mem64[ap + 396] +# asm 1: mov 396(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 396(<ap=%rdi),>temp1=%esi +mov 396(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3168] +# asm 1: mov 3168(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3168(<ap=%rdi),>temp2=%edx +mov 3168(%rdi),%edx + +# qhasm: mem64[ap + 3168] = temp1 +# asm 1: mov <temp1=int64#2,3168(<ap=int64#1) +# asm 2: mov <temp1=%esi,3168(<ap=%rdi) +mov %esi,3168(%rdi) + +# qhasm: mem64[ap + 396] = temp2 +# asm 1: mov <temp2=int64#3,396(<ap=int64#1) +# asm 2: mov <temp2=%edx,396(<ap=%rdi) +mov %edx,396(%rdi) + +# qhasm: temp1 = mem64[ap + 400] +# asm 1: mov 400(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 400(<ap=%rdi),>temp1=%esi +mov 400(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 608] +# asm 1: mov 608(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 608(<ap=%rdi),>temp2=%edx +mov 608(%rdi),%edx + +# qhasm: mem64[ap + 608] = temp1 +# asm 1: mov <temp1=int64#2,608(<ap=int64#1) +# asm 2: mov <temp1=%esi,608(<ap=%rdi) +mov %esi,608(%rdi) + +# qhasm: mem64[ap + 400] = temp2 +# asm 1: mov <temp2=int64#3,400(<ap=int64#1) +# asm 2: mov <temp2=%edx,400(<ap=%rdi) +mov %edx,400(%rdi) + +# qhasm: temp1 = mem64[ap + 404] +# asm 1: mov 404(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 404(<ap=%rdi),>temp1=%esi +mov 404(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2656] +# asm 1: mov 2656(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2656(<ap=%rdi),>temp2=%edx +mov 2656(%rdi),%edx + +# qhasm: mem64[ap + 2656] = temp1 +# asm 1: mov <temp1=int64#2,2656(<ap=int64#1) +# asm 2: mov <temp1=%esi,2656(<ap=%rdi) +mov %esi,2656(%rdi) + +# qhasm: mem64[ap + 404] = temp2 +# asm 1: mov <temp2=int64#3,404(<ap=int64#1) +# asm 2: mov <temp2=%edx,404(<ap=%rdi) +mov %edx,404(%rdi) + +# qhasm: temp1 = mem64[ap + 408] +# asm 1: mov 408(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 408(<ap=%rdi),>temp1=%esi +mov 408(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1632] +# asm 1: mov 1632(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1632(<ap=%rdi),>temp2=%edx +mov 1632(%rdi),%edx + +# qhasm: mem64[ap + 1632] = temp1 +# asm 1: mov <temp1=int64#2,1632(<ap=int64#1) +# asm 2: mov <temp1=%esi,1632(<ap=%rdi) +mov %esi,1632(%rdi) + +# qhasm: mem64[ap + 408] = temp2 +# asm 1: mov <temp2=int64#3,408(<ap=int64#1) +# asm 2: mov <temp2=%edx,408(<ap=%rdi) +mov %edx,408(%rdi) + +# qhasm: temp1 = mem64[ap + 412] +# asm 1: mov 412(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 412(<ap=%rdi),>temp1=%esi +mov 412(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3680] +# asm 1: mov 3680(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3680(<ap=%rdi),>temp2=%edx +mov 3680(%rdi),%edx + +# qhasm: mem64[ap + 3680] = temp1 +# asm 1: mov <temp1=int64#2,3680(<ap=int64#1) +# asm 2: mov <temp1=%esi,3680(<ap=%rdi) +mov %esi,3680(%rdi) + +# qhasm: mem64[ap + 412] = temp2 +# asm 1: mov <temp2=int64#3,412(<ap=int64#1) +# asm 2: mov <temp2=%edx,412(<ap=%rdi) +mov %edx,412(%rdi) + +# qhasm: temp1 = mem64[ap + 420] +# asm 1: mov 420(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 420(<ap=%rdi),>temp1=%esi +mov 420(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2400] +# asm 1: mov 2400(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2400(<ap=%rdi),>temp2=%edx +mov 2400(%rdi),%edx + +# qhasm: mem64[ap + 2400] = temp1 +# asm 1: mov <temp1=int64#2,2400(<ap=int64#1) +# asm 2: mov <temp1=%esi,2400(<ap=%rdi) +mov %esi,2400(%rdi) + +# qhasm: mem64[ap + 420] = temp2 +# asm 1: mov <temp2=int64#3,420(<ap=int64#1) +# asm 2: mov <temp2=%edx,420(<ap=%rdi) +mov %edx,420(%rdi) + +# qhasm: temp1 = mem64[ap + 424] +# asm 1: mov 424(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 424(<ap=%rdi),>temp1=%esi +mov 424(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1376] +# asm 1: mov 1376(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1376(<ap=%rdi),>temp2=%edx +mov 1376(%rdi),%edx + +# qhasm: mem64[ap + 1376] = temp1 +# asm 1: mov <temp1=int64#2,1376(<ap=int64#1) +# asm 2: mov <temp1=%esi,1376(<ap=%rdi) +mov %esi,1376(%rdi) + +# qhasm: mem64[ap + 424] = temp2 +# asm 1: mov <temp2=int64#3,424(<ap=int64#1) +# asm 2: mov <temp2=%edx,424(<ap=%rdi) +mov %edx,424(%rdi) + +# qhasm: temp1 = mem64[ap + 428] +# asm 1: mov 428(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 428(<ap=%rdi),>temp1=%esi +mov 428(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3424] +# asm 1: mov 3424(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3424(<ap=%rdi),>temp2=%edx +mov 3424(%rdi),%edx + +# qhasm: mem64[ap + 3424] = temp1 +# asm 1: mov <temp1=int64#2,3424(<ap=int64#1) +# asm 2: mov <temp1=%esi,3424(<ap=%rdi) +mov %esi,3424(%rdi) + +# qhasm: mem64[ap + 428] = temp2 +# asm 1: mov <temp2=int64#3,428(<ap=int64#1) +# asm 2: mov <temp2=%edx,428(<ap=%rdi) +mov %edx,428(%rdi) + +# qhasm: temp1 = mem64[ap + 432] +# asm 1: mov 432(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 432(<ap=%rdi),>temp1=%esi +mov 432(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 864] +# asm 1: mov 864(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 864(<ap=%rdi),>temp2=%edx +mov 864(%rdi),%edx + +# qhasm: mem64[ap + 864] = temp1 +# asm 1: mov <temp1=int64#2,864(<ap=int64#1) +# asm 2: mov <temp1=%esi,864(<ap=%rdi) +mov %esi,864(%rdi) + +# qhasm: mem64[ap + 432] = temp2 +# asm 1: mov <temp2=int64#3,432(<ap=int64#1) +# asm 2: mov <temp2=%edx,432(<ap=%rdi) +mov %edx,432(%rdi) + +# qhasm: temp1 = mem64[ap + 436] +# asm 1: mov 436(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 436(<ap=%rdi),>temp1=%esi +mov 436(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2912] +# asm 1: mov 2912(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2912(<ap=%rdi),>temp2=%edx +mov 2912(%rdi),%edx + +# qhasm: mem64[ap + 2912] = temp1 +# asm 1: mov <temp1=int64#2,2912(<ap=int64#1) +# asm 2: mov <temp1=%esi,2912(<ap=%rdi) +mov %esi,2912(%rdi) + +# qhasm: mem64[ap + 436] = temp2 +# asm 1: mov <temp2=int64#3,436(<ap=int64#1) +# asm 2: mov <temp2=%edx,436(<ap=%rdi) +mov %edx,436(%rdi) + +# qhasm: temp1 = mem64[ap + 440] +# asm 1: mov 440(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 440(<ap=%rdi),>temp1=%esi +mov 440(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1888] +# asm 1: mov 1888(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1888(<ap=%rdi),>temp2=%edx +mov 1888(%rdi),%edx + +# qhasm: mem64[ap + 1888] = temp1 +# asm 1: mov <temp1=int64#2,1888(<ap=int64#1) +# asm 2: mov <temp1=%esi,1888(<ap=%rdi) +mov %esi,1888(%rdi) + +# qhasm: mem64[ap + 440] = temp2 +# asm 1: mov <temp2=int64#3,440(<ap=int64#1) +# asm 2: mov <temp2=%edx,440(<ap=%rdi) +mov %edx,440(%rdi) + +# qhasm: temp1 = mem64[ap + 444] +# asm 1: mov 444(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 444(<ap=%rdi),>temp1=%esi +mov 444(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3936] +# asm 1: mov 3936(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3936(<ap=%rdi),>temp2=%edx +mov 3936(%rdi),%edx + +# qhasm: mem64[ap + 3936] = temp1 +# asm 1: mov <temp1=int64#2,3936(<ap=int64#1) +# asm 2: mov <temp1=%esi,3936(<ap=%rdi) +mov %esi,3936(%rdi) + +# qhasm: mem64[ap + 444] = temp2 +# asm 1: mov <temp2=int64#3,444(<ap=int64#1) +# asm 2: mov <temp2=%edx,444(<ap=%rdi) +mov %edx,444(%rdi) + +# qhasm: temp1 = mem64[ap + 452] +# asm 1: mov 452(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 452(<ap=%rdi),>temp1=%esi +mov 452(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2272] +# asm 1: mov 2272(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2272(<ap=%rdi),>temp2=%edx +mov 2272(%rdi),%edx + +# qhasm: mem64[ap + 2272] = temp1 +# asm 1: mov <temp1=int64#2,2272(<ap=int64#1) +# asm 2: mov <temp1=%esi,2272(<ap=%rdi) +mov %esi,2272(%rdi) + +# qhasm: mem64[ap + 452] = temp2 +# asm 1: mov <temp2=int64#3,452(<ap=int64#1) +# asm 2: mov <temp2=%edx,452(<ap=%rdi) +mov %edx,452(%rdi) + +# qhasm: temp1 = mem64[ap + 456] +# asm 1: mov 456(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 456(<ap=%rdi),>temp1=%esi +mov 456(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1248] +# asm 1: mov 1248(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1248(<ap=%rdi),>temp2=%edx +mov 1248(%rdi),%edx + +# qhasm: mem64[ap + 1248] = temp1 +# asm 1: mov <temp1=int64#2,1248(<ap=int64#1) +# asm 2: mov <temp1=%esi,1248(<ap=%rdi) +mov %esi,1248(%rdi) + +# qhasm: mem64[ap + 456] = temp2 +# asm 1: mov <temp2=int64#3,456(<ap=int64#1) +# asm 2: mov <temp2=%edx,456(<ap=%rdi) +mov %edx,456(%rdi) + +# qhasm: temp1 = mem64[ap + 460] +# asm 1: mov 460(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 460(<ap=%rdi),>temp1=%esi +mov 460(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3296] +# asm 1: mov 3296(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3296(<ap=%rdi),>temp2=%edx +mov 3296(%rdi),%edx + +# qhasm: mem64[ap + 3296] = temp1 +# asm 1: mov <temp1=int64#2,3296(<ap=int64#1) +# asm 2: mov <temp1=%esi,3296(<ap=%rdi) +mov %esi,3296(%rdi) + +# qhasm: mem64[ap + 460] = temp2 +# asm 1: mov <temp2=int64#3,460(<ap=int64#1) +# asm 2: mov <temp2=%edx,460(<ap=%rdi) +mov %edx,460(%rdi) + +# qhasm: temp1 = mem64[ap + 464] +# asm 1: mov 464(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 464(<ap=%rdi),>temp1=%esi +mov 464(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 736] +# asm 1: mov 736(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 736(<ap=%rdi),>temp2=%edx +mov 736(%rdi),%edx + +# qhasm: mem64[ap + 736] = temp1 +# asm 1: mov <temp1=int64#2,736(<ap=int64#1) +# asm 2: mov <temp1=%esi,736(<ap=%rdi) +mov %esi,736(%rdi) + +# qhasm: mem64[ap + 464] = temp2 +# asm 1: mov <temp2=int64#3,464(<ap=int64#1) +# asm 2: mov <temp2=%edx,464(<ap=%rdi) +mov %edx,464(%rdi) + +# qhasm: temp1 = mem64[ap + 468] +# asm 1: mov 468(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 468(<ap=%rdi),>temp1=%esi +mov 468(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2784] +# asm 1: mov 2784(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2784(<ap=%rdi),>temp2=%edx +mov 2784(%rdi),%edx + +# qhasm: mem64[ap + 2784] = temp1 +# asm 1: mov <temp1=int64#2,2784(<ap=int64#1) +# asm 2: mov <temp1=%esi,2784(<ap=%rdi) +mov %esi,2784(%rdi) + +# qhasm: mem64[ap + 468] = temp2 +# asm 1: mov <temp2=int64#3,468(<ap=int64#1) +# asm 2: mov <temp2=%edx,468(<ap=%rdi) +mov %edx,468(%rdi) + +# qhasm: temp1 = mem64[ap + 472] +# asm 1: mov 472(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 472(<ap=%rdi),>temp1=%esi +mov 472(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1760] +# asm 1: mov 1760(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1760(<ap=%rdi),>temp2=%edx +mov 1760(%rdi),%edx + +# qhasm: mem64[ap + 1760] = temp1 +# asm 1: mov <temp1=int64#2,1760(<ap=int64#1) +# asm 2: mov <temp1=%esi,1760(<ap=%rdi) +mov %esi,1760(%rdi) + +# qhasm: mem64[ap + 472] = temp2 +# asm 1: mov <temp2=int64#3,472(<ap=int64#1) +# asm 2: mov <temp2=%edx,472(<ap=%rdi) +mov %edx,472(%rdi) + +# qhasm: temp1 = mem64[ap + 476] +# asm 1: mov 476(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 476(<ap=%rdi),>temp1=%esi +mov 476(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3808] +# asm 1: mov 3808(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3808(<ap=%rdi),>temp2=%edx +mov 3808(%rdi),%edx + +# qhasm: mem64[ap + 3808] = temp1 +# asm 1: mov <temp1=int64#2,3808(<ap=int64#1) +# asm 2: mov <temp1=%esi,3808(<ap=%rdi) +mov %esi,3808(%rdi) + +# qhasm: mem64[ap + 476] = temp2 +# asm 1: mov <temp2=int64#3,476(<ap=int64#1) +# asm 2: mov <temp2=%edx,476(<ap=%rdi) +mov %edx,476(%rdi) + +# qhasm: temp1 = mem64[ap + 484] +# asm 1: mov 484(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 484(<ap=%rdi),>temp1=%esi +mov 484(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2528] +# asm 1: mov 2528(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2528(<ap=%rdi),>temp2=%edx +mov 2528(%rdi),%edx + +# qhasm: mem64[ap + 2528] = temp1 +# asm 1: mov <temp1=int64#2,2528(<ap=int64#1) +# asm 2: mov <temp1=%esi,2528(<ap=%rdi) +mov %esi,2528(%rdi) + +# qhasm: mem64[ap + 484] = temp2 +# asm 1: mov <temp2=int64#3,484(<ap=int64#1) +# asm 2: mov <temp2=%edx,484(<ap=%rdi) +mov %edx,484(%rdi) + +# qhasm: temp1 = mem64[ap + 488] +# asm 1: mov 488(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 488(<ap=%rdi),>temp1=%esi +mov 488(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1504] +# asm 1: mov 1504(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1504(<ap=%rdi),>temp2=%edx +mov 1504(%rdi),%edx + +# qhasm: mem64[ap + 1504] = temp1 +# asm 1: mov <temp1=int64#2,1504(<ap=int64#1) +# asm 2: mov <temp1=%esi,1504(<ap=%rdi) +mov %esi,1504(%rdi) + +# qhasm: mem64[ap + 488] = temp2 +# asm 1: mov <temp2=int64#3,488(<ap=int64#1) +# asm 2: mov <temp2=%edx,488(<ap=%rdi) +mov %edx,488(%rdi) + +# qhasm: temp1 = mem64[ap + 492] +# asm 1: mov 492(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 492(<ap=%rdi),>temp1=%esi +mov 492(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3552] +# asm 1: mov 3552(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3552(<ap=%rdi),>temp2=%edx +mov 3552(%rdi),%edx + +# qhasm: mem64[ap + 3552] = temp1 +# asm 1: mov <temp1=int64#2,3552(<ap=int64#1) +# asm 2: mov <temp1=%esi,3552(<ap=%rdi) +mov %esi,3552(%rdi) + +# qhasm: mem64[ap + 492] = temp2 +# asm 1: mov <temp2=int64#3,492(<ap=int64#1) +# asm 2: mov <temp2=%edx,492(<ap=%rdi) +mov %edx,492(%rdi) + +# qhasm: temp1 = mem64[ap + 496] +# asm 1: mov 496(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 496(<ap=%rdi),>temp1=%esi +mov 496(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 992] +# asm 1: mov 992(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 992(<ap=%rdi),>temp2=%edx +mov 992(%rdi),%edx + +# qhasm: mem64[ap + 992] = temp1 +# asm 1: mov <temp1=int64#2,992(<ap=int64#1) +# asm 2: mov <temp1=%esi,992(<ap=%rdi) +mov %esi,992(%rdi) + +# qhasm: mem64[ap + 496] = temp2 +# asm 1: mov <temp2=int64#3,496(<ap=int64#1) +# asm 2: mov <temp2=%edx,496(<ap=%rdi) +mov %edx,496(%rdi) + +# qhasm: temp1 = mem64[ap + 500] +# asm 1: mov 500(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 500(<ap=%rdi),>temp1=%esi +mov 500(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3040] +# asm 1: mov 3040(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3040(<ap=%rdi),>temp2=%edx +mov 3040(%rdi),%edx + +# qhasm: mem64[ap + 3040] = temp1 +# asm 1: mov <temp1=int64#2,3040(<ap=int64#1) +# asm 2: mov <temp1=%esi,3040(<ap=%rdi) +mov %esi,3040(%rdi) + +# qhasm: mem64[ap + 500] = temp2 +# asm 1: mov <temp2=int64#3,500(<ap=int64#1) +# asm 2: mov <temp2=%edx,500(<ap=%rdi) +mov %edx,500(%rdi) + +# qhasm: temp1 = mem64[ap + 504] +# asm 1: mov 504(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 504(<ap=%rdi),>temp1=%esi +mov 504(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2016] +# asm 1: mov 2016(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2016(<ap=%rdi),>temp2=%edx +mov 2016(%rdi),%edx + +# qhasm: mem64[ap + 2016] = temp1 +# asm 1: mov <temp1=int64#2,2016(<ap=int64#1) +# asm 2: mov <temp1=%esi,2016(<ap=%rdi) +mov %esi,2016(%rdi) + +# qhasm: mem64[ap + 504] = temp2 +# asm 1: mov <temp2=int64#3,504(<ap=int64#1) +# asm 2: mov <temp2=%edx,504(<ap=%rdi) +mov %edx,504(%rdi) + +# qhasm: temp1 = mem64[ap + 508] +# asm 1: mov 508(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 508(<ap=%rdi),>temp1=%esi +mov 508(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4064] +# asm 1: mov 4064(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4064(<ap=%rdi),>temp2=%edx +mov 4064(%rdi),%edx + +# qhasm: mem64[ap + 4064] = temp1 +# asm 1: mov <temp1=int64#2,4064(<ap=int64#1) +# asm 2: mov <temp1=%esi,4064(<ap=%rdi) +mov %esi,4064(%rdi) + +# qhasm: mem64[ap + 508] = temp2 +# asm 1: mov <temp2=int64#3,508(<ap=int64#1) +# asm 2: mov <temp2=%edx,508(<ap=%rdi) +mov %edx,508(%rdi) + +# qhasm: temp1 = mem64[ap + 516] +# asm 1: mov 516(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 516(<ap=%rdi),>temp1=%esi +mov 516(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2064] +# asm 1: mov 2064(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2064(<ap=%rdi),>temp2=%edx +mov 2064(%rdi),%edx + +# qhasm: mem64[ap + 2064] = temp1 +# asm 1: mov <temp1=int64#2,2064(<ap=int64#1) +# asm 2: mov <temp1=%esi,2064(<ap=%rdi) +mov %esi,2064(%rdi) + +# qhasm: mem64[ap + 516] = temp2 +# asm 1: mov <temp2=int64#3,516(<ap=int64#1) +# asm 2: mov <temp2=%edx,516(<ap=%rdi) +mov %edx,516(%rdi) + +# qhasm: temp1 = mem64[ap + 520] +# asm 1: mov 520(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 520(<ap=%rdi),>temp1=%esi +mov 520(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1040] +# asm 1: mov 1040(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1040(<ap=%rdi),>temp2=%edx +mov 1040(%rdi),%edx + +# qhasm: mem64[ap + 1040] = temp1 +# asm 1: mov <temp1=int64#2,1040(<ap=int64#1) +# asm 2: mov <temp1=%esi,1040(<ap=%rdi) +mov %esi,1040(%rdi) + +# qhasm: mem64[ap + 520] = temp2 +# asm 1: mov <temp2=int64#3,520(<ap=int64#1) +# asm 2: mov <temp2=%edx,520(<ap=%rdi) +mov %edx,520(%rdi) + +# qhasm: temp1 = mem64[ap + 524] +# asm 1: mov 524(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 524(<ap=%rdi),>temp1=%esi +mov 524(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3088] +# asm 1: mov 3088(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3088(<ap=%rdi),>temp2=%edx +mov 3088(%rdi),%edx + +# qhasm: mem64[ap + 3088] = temp1 +# asm 1: mov <temp1=int64#2,3088(<ap=int64#1) +# asm 2: mov <temp1=%esi,3088(<ap=%rdi) +mov %esi,3088(%rdi) + +# qhasm: mem64[ap + 524] = temp2 +# asm 1: mov <temp2=int64#3,524(<ap=int64#1) +# asm 2: mov <temp2=%edx,524(<ap=%rdi) +mov %edx,524(%rdi) + +# qhasm: temp1 = mem64[ap + 532] +# asm 1: mov 532(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 532(<ap=%rdi),>temp1=%esi +mov 532(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2576] +# asm 1: mov 2576(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2576(<ap=%rdi),>temp2=%edx +mov 2576(%rdi),%edx + +# qhasm: mem64[ap + 2576] = temp1 +# asm 1: mov <temp1=int64#2,2576(<ap=int64#1) +# asm 2: mov <temp1=%esi,2576(<ap=%rdi) +mov %esi,2576(%rdi) + +# qhasm: mem64[ap + 532] = temp2 +# asm 1: mov <temp2=int64#3,532(<ap=int64#1) +# asm 2: mov <temp2=%edx,532(<ap=%rdi) +mov %edx,532(%rdi) + +# qhasm: temp1 = mem64[ap + 536] +# asm 1: mov 536(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 536(<ap=%rdi),>temp1=%esi +mov 536(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1552] +# asm 1: mov 1552(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1552(<ap=%rdi),>temp2=%edx +mov 1552(%rdi),%edx + +# qhasm: mem64[ap + 1552] = temp1 +# asm 1: mov <temp1=int64#2,1552(<ap=int64#1) +# asm 2: mov <temp1=%esi,1552(<ap=%rdi) +mov %esi,1552(%rdi) + +# qhasm: mem64[ap + 536] = temp2 +# asm 1: mov <temp2=int64#3,536(<ap=int64#1) +# asm 2: mov <temp2=%edx,536(<ap=%rdi) +mov %edx,536(%rdi) + +# qhasm: temp1 = mem64[ap + 540] +# asm 1: mov 540(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 540(<ap=%rdi),>temp1=%esi +mov 540(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3600] +# asm 1: mov 3600(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3600(<ap=%rdi),>temp2=%edx +mov 3600(%rdi),%edx + +# qhasm: mem64[ap + 3600] = temp1 +# asm 1: mov <temp1=int64#2,3600(<ap=int64#1) +# asm 2: mov <temp1=%esi,3600(<ap=%rdi) +mov %esi,3600(%rdi) + +# qhasm: mem64[ap + 540] = temp2 +# asm 1: mov <temp2=int64#3,540(<ap=int64#1) +# asm 2: mov <temp2=%edx,540(<ap=%rdi) +mov %edx,540(%rdi) + +# qhasm: temp1 = mem64[ap + 548] +# asm 1: mov 548(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 548(<ap=%rdi),>temp1=%esi +mov 548(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2320] +# asm 1: mov 2320(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2320(<ap=%rdi),>temp2=%edx +mov 2320(%rdi),%edx + +# qhasm: mem64[ap + 2320] = temp1 +# asm 1: mov <temp1=int64#2,2320(<ap=int64#1) +# asm 2: mov <temp1=%esi,2320(<ap=%rdi) +mov %esi,2320(%rdi) + +# qhasm: mem64[ap + 548] = temp2 +# asm 1: mov <temp2=int64#3,548(<ap=int64#1) +# asm 2: mov <temp2=%edx,548(<ap=%rdi) +mov %edx,548(%rdi) + +# qhasm: temp1 = mem64[ap + 552] +# asm 1: mov 552(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 552(<ap=%rdi),>temp1=%esi +mov 552(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1296] +# asm 1: mov 1296(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1296(<ap=%rdi),>temp2=%edx +mov 1296(%rdi),%edx + +# qhasm: mem64[ap + 1296] = temp1 +# asm 1: mov <temp1=int64#2,1296(<ap=int64#1) +# asm 2: mov <temp1=%esi,1296(<ap=%rdi) +mov %esi,1296(%rdi) + +# qhasm: mem64[ap + 552] = temp2 +# asm 1: mov <temp2=int64#3,552(<ap=int64#1) +# asm 2: mov <temp2=%edx,552(<ap=%rdi) +mov %edx,552(%rdi) + +# qhasm: temp1 = mem64[ap + 556] +# asm 1: mov 556(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 556(<ap=%rdi),>temp1=%esi +mov 556(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3344] +# asm 1: mov 3344(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3344(<ap=%rdi),>temp2=%edx +mov 3344(%rdi),%edx + +# qhasm: mem64[ap + 3344] = temp1 +# asm 1: mov <temp1=int64#2,3344(<ap=int64#1) +# asm 2: mov <temp1=%esi,3344(<ap=%rdi) +mov %esi,3344(%rdi) + +# qhasm: mem64[ap + 556] = temp2 +# asm 1: mov <temp2=int64#3,556(<ap=int64#1) +# asm 2: mov <temp2=%edx,556(<ap=%rdi) +mov %edx,556(%rdi) + +# qhasm: temp1 = mem64[ap + 560] +# asm 1: mov 560(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 560(<ap=%rdi),>temp1=%esi +mov 560(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 784] +# asm 1: mov 784(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 784(<ap=%rdi),>temp2=%edx +mov 784(%rdi),%edx + +# qhasm: mem64[ap + 784] = temp1 +# asm 1: mov <temp1=int64#2,784(<ap=int64#1) +# asm 2: mov <temp1=%esi,784(<ap=%rdi) +mov %esi,784(%rdi) + +# qhasm: mem64[ap + 560] = temp2 +# asm 1: mov <temp2=int64#3,560(<ap=int64#1) +# asm 2: mov <temp2=%edx,560(<ap=%rdi) +mov %edx,560(%rdi) + +# qhasm: temp1 = mem64[ap + 564] +# asm 1: mov 564(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 564(<ap=%rdi),>temp1=%esi +mov 564(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2832] +# asm 1: mov 2832(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2832(<ap=%rdi),>temp2=%edx +mov 2832(%rdi),%edx + +# qhasm: mem64[ap + 2832] = temp1 +# asm 1: mov <temp1=int64#2,2832(<ap=int64#1) +# asm 2: mov <temp1=%esi,2832(<ap=%rdi) +mov %esi,2832(%rdi) + +# qhasm: mem64[ap + 564] = temp2 +# asm 1: mov <temp2=int64#3,564(<ap=int64#1) +# asm 2: mov <temp2=%edx,564(<ap=%rdi) +mov %edx,564(%rdi) + +# qhasm: temp1 = mem64[ap + 568] +# asm 1: mov 568(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 568(<ap=%rdi),>temp1=%esi +mov 568(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1808] +# asm 1: mov 1808(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1808(<ap=%rdi),>temp2=%edx +mov 1808(%rdi),%edx + +# qhasm: mem64[ap + 1808] = temp1 +# asm 1: mov <temp1=int64#2,1808(<ap=int64#1) +# asm 2: mov <temp1=%esi,1808(<ap=%rdi) +mov %esi,1808(%rdi) + +# qhasm: mem64[ap + 568] = temp2 +# asm 1: mov <temp2=int64#3,568(<ap=int64#1) +# asm 2: mov <temp2=%edx,568(<ap=%rdi) +mov %edx,568(%rdi) + +# qhasm: temp1 = mem64[ap + 572] +# asm 1: mov 572(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 572(<ap=%rdi),>temp1=%esi +mov 572(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3856] +# asm 1: mov 3856(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3856(<ap=%rdi),>temp2=%edx +mov 3856(%rdi),%edx + +# qhasm: mem64[ap + 3856] = temp1 +# asm 1: mov <temp1=int64#2,3856(<ap=int64#1) +# asm 2: mov <temp1=%esi,3856(<ap=%rdi) +mov %esi,3856(%rdi) + +# qhasm: mem64[ap + 572] = temp2 +# asm 1: mov <temp2=int64#3,572(<ap=int64#1) +# asm 2: mov <temp2=%edx,572(<ap=%rdi) +mov %edx,572(%rdi) + +# qhasm: temp1 = mem64[ap + 580] +# asm 1: mov 580(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 580(<ap=%rdi),>temp1=%esi +mov 580(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2192] +# asm 1: mov 2192(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2192(<ap=%rdi),>temp2=%edx +mov 2192(%rdi),%edx + +# qhasm: mem64[ap + 2192] = temp1 +# asm 1: mov <temp1=int64#2,2192(<ap=int64#1) +# asm 2: mov <temp1=%esi,2192(<ap=%rdi) +mov %esi,2192(%rdi) + +# qhasm: mem64[ap + 580] = temp2 +# asm 1: mov <temp2=int64#3,580(<ap=int64#1) +# asm 2: mov <temp2=%edx,580(<ap=%rdi) +mov %edx,580(%rdi) + +# qhasm: temp1 = mem64[ap + 584] +# asm 1: mov 584(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 584(<ap=%rdi),>temp1=%esi +mov 584(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1168] +# asm 1: mov 1168(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1168(<ap=%rdi),>temp2=%edx +mov 1168(%rdi),%edx + +# qhasm: mem64[ap + 1168] = temp1 +# asm 1: mov <temp1=int64#2,1168(<ap=int64#1) +# asm 2: mov <temp1=%esi,1168(<ap=%rdi) +mov %esi,1168(%rdi) + +# qhasm: mem64[ap + 584] = temp2 +# asm 1: mov <temp2=int64#3,584(<ap=int64#1) +# asm 2: mov <temp2=%edx,584(<ap=%rdi) +mov %edx,584(%rdi) + +# qhasm: temp1 = mem64[ap + 588] +# asm 1: mov 588(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 588(<ap=%rdi),>temp1=%esi +mov 588(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3216] +# asm 1: mov 3216(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3216(<ap=%rdi),>temp2=%edx +mov 3216(%rdi),%edx + +# qhasm: mem64[ap + 3216] = temp1 +# asm 1: mov <temp1=int64#2,3216(<ap=int64#1) +# asm 2: mov <temp1=%esi,3216(<ap=%rdi) +mov %esi,3216(%rdi) + +# qhasm: mem64[ap + 588] = temp2 +# asm 1: mov <temp2=int64#3,588(<ap=int64#1) +# asm 2: mov <temp2=%edx,588(<ap=%rdi) +mov %edx,588(%rdi) + +# qhasm: temp1 = mem64[ap + 592] +# asm 1: mov 592(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 592(<ap=%rdi),>temp1=%esi +mov 592(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 656] +# asm 1: mov 656(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 656(<ap=%rdi),>temp2=%edx +mov 656(%rdi),%edx + +# qhasm: mem64[ap + 656] = temp1 +# asm 1: mov <temp1=int64#2,656(<ap=int64#1) +# asm 2: mov <temp1=%esi,656(<ap=%rdi) +mov %esi,656(%rdi) + +# qhasm: mem64[ap + 592] = temp2 +# asm 1: mov <temp2=int64#3,592(<ap=int64#1) +# asm 2: mov <temp2=%edx,592(<ap=%rdi) +mov %edx,592(%rdi) + +# qhasm: temp1 = mem64[ap + 596] +# asm 1: mov 596(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 596(<ap=%rdi),>temp1=%esi +mov 596(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2704] +# asm 1: mov 2704(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2704(<ap=%rdi),>temp2=%edx +mov 2704(%rdi),%edx + +# qhasm: mem64[ap + 2704] = temp1 +# asm 1: mov <temp1=int64#2,2704(<ap=int64#1) +# asm 2: mov <temp1=%esi,2704(<ap=%rdi) +mov %esi,2704(%rdi) + +# qhasm: mem64[ap + 596] = temp2 +# asm 1: mov <temp2=int64#3,596(<ap=int64#1) +# asm 2: mov <temp2=%edx,596(<ap=%rdi) +mov %edx,596(%rdi) + +# qhasm: temp1 = mem64[ap + 600] +# asm 1: mov 600(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 600(<ap=%rdi),>temp1=%esi +mov 600(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1680] +# asm 1: mov 1680(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1680(<ap=%rdi),>temp2=%edx +mov 1680(%rdi),%edx + +# qhasm: mem64[ap + 1680] = temp1 +# asm 1: mov <temp1=int64#2,1680(<ap=int64#1) +# asm 2: mov <temp1=%esi,1680(<ap=%rdi) +mov %esi,1680(%rdi) + +# qhasm: mem64[ap + 600] = temp2 +# asm 1: mov <temp2=int64#3,600(<ap=int64#1) +# asm 2: mov <temp2=%edx,600(<ap=%rdi) +mov %edx,600(%rdi) + +# qhasm: temp1 = mem64[ap + 604] +# asm 1: mov 604(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 604(<ap=%rdi),>temp1=%esi +mov 604(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3728] +# asm 1: mov 3728(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3728(<ap=%rdi),>temp2=%edx +mov 3728(%rdi),%edx + +# qhasm: mem64[ap + 3728] = temp1 +# asm 1: mov <temp1=int64#2,3728(<ap=int64#1) +# asm 2: mov <temp1=%esi,3728(<ap=%rdi) +mov %esi,3728(%rdi) + +# qhasm: mem64[ap + 604] = temp2 +# asm 1: mov <temp2=int64#3,604(<ap=int64#1) +# asm 2: mov <temp2=%edx,604(<ap=%rdi) +mov %edx,604(%rdi) + +# qhasm: temp1 = mem64[ap + 612] +# asm 1: mov 612(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 612(<ap=%rdi),>temp1=%esi +mov 612(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2448] +# asm 1: mov 2448(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2448(<ap=%rdi),>temp2=%edx +mov 2448(%rdi),%edx + +# qhasm: mem64[ap + 2448] = temp1 +# asm 1: mov <temp1=int64#2,2448(<ap=int64#1) +# asm 2: mov <temp1=%esi,2448(<ap=%rdi) +mov %esi,2448(%rdi) + +# qhasm: mem64[ap + 612] = temp2 +# asm 1: mov <temp2=int64#3,612(<ap=int64#1) +# asm 2: mov <temp2=%edx,612(<ap=%rdi) +mov %edx,612(%rdi) + +# qhasm: temp1 = mem64[ap + 616] +# asm 1: mov 616(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 616(<ap=%rdi),>temp1=%esi +mov 616(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1424] +# asm 1: mov 1424(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1424(<ap=%rdi),>temp2=%edx +mov 1424(%rdi),%edx + +# qhasm: mem64[ap + 1424] = temp1 +# asm 1: mov <temp1=int64#2,1424(<ap=int64#1) +# asm 2: mov <temp1=%esi,1424(<ap=%rdi) +mov %esi,1424(%rdi) + +# qhasm: mem64[ap + 616] = temp2 +# asm 1: mov <temp2=int64#3,616(<ap=int64#1) +# asm 2: mov <temp2=%edx,616(<ap=%rdi) +mov %edx,616(%rdi) + +# qhasm: temp1 = mem64[ap + 620] +# asm 1: mov 620(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 620(<ap=%rdi),>temp1=%esi +mov 620(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3472] +# asm 1: mov 3472(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3472(<ap=%rdi),>temp2=%edx +mov 3472(%rdi),%edx + +# qhasm: mem64[ap + 3472] = temp1 +# asm 1: mov <temp1=int64#2,3472(<ap=int64#1) +# asm 2: mov <temp1=%esi,3472(<ap=%rdi) +mov %esi,3472(%rdi) + +# qhasm: mem64[ap + 620] = temp2 +# asm 1: mov <temp2=int64#3,620(<ap=int64#1) +# asm 2: mov <temp2=%edx,620(<ap=%rdi) +mov %edx,620(%rdi) + +# qhasm: temp1 = mem64[ap + 624] +# asm 1: mov 624(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 624(<ap=%rdi),>temp1=%esi +mov 624(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 912] +# asm 1: mov 912(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 912(<ap=%rdi),>temp2=%edx +mov 912(%rdi),%edx + +# qhasm: mem64[ap + 912] = temp1 +# asm 1: mov <temp1=int64#2,912(<ap=int64#1) +# asm 2: mov <temp1=%esi,912(<ap=%rdi) +mov %esi,912(%rdi) + +# qhasm: mem64[ap + 624] = temp2 +# asm 1: mov <temp2=int64#3,624(<ap=int64#1) +# asm 2: mov <temp2=%edx,624(<ap=%rdi) +mov %edx,624(%rdi) + +# qhasm: temp1 = mem64[ap + 628] +# asm 1: mov 628(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 628(<ap=%rdi),>temp1=%esi +mov 628(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2960] +# asm 1: mov 2960(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2960(<ap=%rdi),>temp2=%edx +mov 2960(%rdi),%edx + +# qhasm: mem64[ap + 2960] = temp1 +# asm 1: mov <temp1=int64#2,2960(<ap=int64#1) +# asm 2: mov <temp1=%esi,2960(<ap=%rdi) +mov %esi,2960(%rdi) + +# qhasm: mem64[ap + 628] = temp2 +# asm 1: mov <temp2=int64#3,628(<ap=int64#1) +# asm 2: mov <temp2=%edx,628(<ap=%rdi) +mov %edx,628(%rdi) + +# qhasm: temp1 = mem64[ap + 632] +# asm 1: mov 632(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 632(<ap=%rdi),>temp1=%esi +mov 632(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1936] +# asm 1: mov 1936(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1936(<ap=%rdi),>temp2=%edx +mov 1936(%rdi),%edx + +# qhasm: mem64[ap + 1936] = temp1 +# asm 1: mov <temp1=int64#2,1936(<ap=int64#1) +# asm 2: mov <temp1=%esi,1936(<ap=%rdi) +mov %esi,1936(%rdi) + +# qhasm: mem64[ap + 632] = temp2 +# asm 1: mov <temp2=int64#3,632(<ap=int64#1) +# asm 2: mov <temp2=%edx,632(<ap=%rdi) +mov %edx,632(%rdi) + +# qhasm: temp1 = mem64[ap + 636] +# asm 1: mov 636(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 636(<ap=%rdi),>temp1=%esi +mov 636(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3984] +# asm 1: mov 3984(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3984(<ap=%rdi),>temp2=%edx +mov 3984(%rdi),%edx + +# qhasm: mem64[ap + 3984] = temp1 +# asm 1: mov <temp1=int64#2,3984(<ap=int64#1) +# asm 2: mov <temp1=%esi,3984(<ap=%rdi) +mov %esi,3984(%rdi) + +# qhasm: mem64[ap + 636] = temp2 +# asm 1: mov <temp2=int64#3,636(<ap=int64#1) +# asm 2: mov <temp2=%edx,636(<ap=%rdi) +mov %edx,636(%rdi) + +# qhasm: temp1 = mem64[ap + 644] +# asm 1: mov 644(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 644(<ap=%rdi),>temp1=%esi +mov 644(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2128] +# asm 1: mov 2128(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2128(<ap=%rdi),>temp2=%edx +mov 2128(%rdi),%edx + +# qhasm: mem64[ap + 2128] = temp1 +# asm 1: mov <temp1=int64#2,2128(<ap=int64#1) +# asm 2: mov <temp1=%esi,2128(<ap=%rdi) +mov %esi,2128(%rdi) + +# qhasm: mem64[ap + 644] = temp2 +# asm 1: mov <temp2=int64#3,644(<ap=int64#1) +# asm 2: mov <temp2=%edx,644(<ap=%rdi) +mov %edx,644(%rdi) + +# qhasm: temp1 = mem64[ap + 648] +# asm 1: mov 648(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 648(<ap=%rdi),>temp1=%esi +mov 648(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1104] +# asm 1: mov 1104(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1104(<ap=%rdi),>temp2=%edx +mov 1104(%rdi),%edx + +# qhasm: mem64[ap + 1104] = temp1 +# asm 1: mov <temp1=int64#2,1104(<ap=int64#1) +# asm 2: mov <temp1=%esi,1104(<ap=%rdi) +mov %esi,1104(%rdi) + +# qhasm: mem64[ap + 648] = temp2 +# asm 1: mov <temp2=int64#3,648(<ap=int64#1) +# asm 2: mov <temp2=%edx,648(<ap=%rdi) +mov %edx,648(%rdi) + +# qhasm: temp1 = mem64[ap + 652] +# asm 1: mov 652(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 652(<ap=%rdi),>temp1=%esi +mov 652(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3152] +# asm 1: mov 3152(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3152(<ap=%rdi),>temp2=%edx +mov 3152(%rdi),%edx + +# qhasm: mem64[ap + 3152] = temp1 +# asm 1: mov <temp1=int64#2,3152(<ap=int64#1) +# asm 2: mov <temp1=%esi,3152(<ap=%rdi) +mov %esi,3152(%rdi) + +# qhasm: mem64[ap + 652] = temp2 +# asm 1: mov <temp2=int64#3,652(<ap=int64#1) +# asm 2: mov <temp2=%edx,652(<ap=%rdi) +mov %edx,652(%rdi) + +# qhasm: temp1 = mem64[ap + 660] +# asm 1: mov 660(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 660(<ap=%rdi),>temp1=%esi +mov 660(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2640] +# asm 1: mov 2640(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2640(<ap=%rdi),>temp2=%edx +mov 2640(%rdi),%edx + +# qhasm: mem64[ap + 2640] = temp1 +# asm 1: mov <temp1=int64#2,2640(<ap=int64#1) +# asm 2: mov <temp1=%esi,2640(<ap=%rdi) +mov %esi,2640(%rdi) + +# qhasm: mem64[ap + 660] = temp2 +# asm 1: mov <temp2=int64#3,660(<ap=int64#1) +# asm 2: mov <temp2=%edx,660(<ap=%rdi) +mov %edx,660(%rdi) + +# qhasm: temp1 = mem64[ap + 664] +# asm 1: mov 664(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 664(<ap=%rdi),>temp1=%esi +mov 664(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1616] +# asm 1: mov 1616(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1616(<ap=%rdi),>temp2=%edx +mov 1616(%rdi),%edx + +# qhasm: mem64[ap + 1616] = temp1 +# asm 1: mov <temp1=int64#2,1616(<ap=int64#1) +# asm 2: mov <temp1=%esi,1616(<ap=%rdi) +mov %esi,1616(%rdi) + +# qhasm: mem64[ap + 664] = temp2 +# asm 1: mov <temp2=int64#3,664(<ap=int64#1) +# asm 2: mov <temp2=%edx,664(<ap=%rdi) +mov %edx,664(%rdi) + +# qhasm: temp1 = mem64[ap + 668] +# asm 1: mov 668(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 668(<ap=%rdi),>temp1=%esi +mov 668(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3664] +# asm 1: mov 3664(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3664(<ap=%rdi),>temp2=%edx +mov 3664(%rdi),%edx + +# qhasm: mem64[ap + 3664] = temp1 +# asm 1: mov <temp1=int64#2,3664(<ap=int64#1) +# asm 2: mov <temp1=%esi,3664(<ap=%rdi) +mov %esi,3664(%rdi) + +# qhasm: mem64[ap + 668] = temp2 +# asm 1: mov <temp2=int64#3,668(<ap=int64#1) +# asm 2: mov <temp2=%edx,668(<ap=%rdi) +mov %edx,668(%rdi) + +# qhasm: temp1 = mem64[ap + 676] +# asm 1: mov 676(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 676(<ap=%rdi),>temp1=%esi +mov 676(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2384] +# asm 1: mov 2384(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2384(<ap=%rdi),>temp2=%edx +mov 2384(%rdi),%edx + +# qhasm: mem64[ap + 2384] = temp1 +# asm 1: mov <temp1=int64#2,2384(<ap=int64#1) +# asm 2: mov <temp1=%esi,2384(<ap=%rdi) +mov %esi,2384(%rdi) + +# qhasm: mem64[ap + 676] = temp2 +# asm 1: mov <temp2=int64#3,676(<ap=int64#1) +# asm 2: mov <temp2=%edx,676(<ap=%rdi) +mov %edx,676(%rdi) + +# qhasm: temp1 = mem64[ap + 680] +# asm 1: mov 680(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 680(<ap=%rdi),>temp1=%esi +mov 680(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1360] +# asm 1: mov 1360(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1360(<ap=%rdi),>temp2=%edx +mov 1360(%rdi),%edx + +# qhasm: mem64[ap + 1360] = temp1 +# asm 1: mov <temp1=int64#2,1360(<ap=int64#1) +# asm 2: mov <temp1=%esi,1360(<ap=%rdi) +mov %esi,1360(%rdi) + +# qhasm: mem64[ap + 680] = temp2 +# asm 1: mov <temp2=int64#3,680(<ap=int64#1) +# asm 2: mov <temp2=%edx,680(<ap=%rdi) +mov %edx,680(%rdi) + +# qhasm: temp1 = mem64[ap + 684] +# asm 1: mov 684(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 684(<ap=%rdi),>temp1=%esi +mov 684(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3408] +# asm 1: mov 3408(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3408(<ap=%rdi),>temp2=%edx +mov 3408(%rdi),%edx + +# qhasm: mem64[ap + 3408] = temp1 +# asm 1: mov <temp1=int64#2,3408(<ap=int64#1) +# asm 2: mov <temp1=%esi,3408(<ap=%rdi) +mov %esi,3408(%rdi) + +# qhasm: mem64[ap + 684] = temp2 +# asm 1: mov <temp2=int64#3,684(<ap=int64#1) +# asm 2: mov <temp2=%edx,684(<ap=%rdi) +mov %edx,684(%rdi) + +# qhasm: temp1 = mem64[ap + 688] +# asm 1: mov 688(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 688(<ap=%rdi),>temp1=%esi +mov 688(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 848] +# asm 1: mov 848(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 848(<ap=%rdi),>temp2=%edx +mov 848(%rdi),%edx + +# qhasm: mem64[ap + 848] = temp1 +# asm 1: mov <temp1=int64#2,848(<ap=int64#1) +# asm 2: mov <temp1=%esi,848(<ap=%rdi) +mov %esi,848(%rdi) + +# qhasm: mem64[ap + 688] = temp2 +# asm 1: mov <temp2=int64#3,688(<ap=int64#1) +# asm 2: mov <temp2=%edx,688(<ap=%rdi) +mov %edx,688(%rdi) + +# qhasm: temp1 = mem64[ap + 692] +# asm 1: mov 692(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 692(<ap=%rdi),>temp1=%esi +mov 692(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2896] +# asm 1: mov 2896(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2896(<ap=%rdi),>temp2=%edx +mov 2896(%rdi),%edx + +# qhasm: mem64[ap + 2896] = temp1 +# asm 1: mov <temp1=int64#2,2896(<ap=int64#1) +# asm 2: mov <temp1=%esi,2896(<ap=%rdi) +mov %esi,2896(%rdi) + +# qhasm: mem64[ap + 692] = temp2 +# asm 1: mov <temp2=int64#3,692(<ap=int64#1) +# asm 2: mov <temp2=%edx,692(<ap=%rdi) +mov %edx,692(%rdi) + +# qhasm: temp1 = mem64[ap + 696] +# asm 1: mov 696(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 696(<ap=%rdi),>temp1=%esi +mov 696(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1872] +# asm 1: mov 1872(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1872(<ap=%rdi),>temp2=%edx +mov 1872(%rdi),%edx + +# qhasm: mem64[ap + 1872] = temp1 +# asm 1: mov <temp1=int64#2,1872(<ap=int64#1) +# asm 2: mov <temp1=%esi,1872(<ap=%rdi) +mov %esi,1872(%rdi) + +# qhasm: mem64[ap + 696] = temp2 +# asm 1: mov <temp2=int64#3,696(<ap=int64#1) +# asm 2: mov <temp2=%edx,696(<ap=%rdi) +mov %edx,696(%rdi) + +# qhasm: temp1 = mem64[ap + 700] +# asm 1: mov 700(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 700(<ap=%rdi),>temp1=%esi +mov 700(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3920] +# asm 1: mov 3920(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3920(<ap=%rdi),>temp2=%edx +mov 3920(%rdi),%edx + +# qhasm: mem64[ap + 3920] = temp1 +# asm 1: mov <temp1=int64#2,3920(<ap=int64#1) +# asm 2: mov <temp1=%esi,3920(<ap=%rdi) +mov %esi,3920(%rdi) + +# qhasm: mem64[ap + 700] = temp2 +# asm 1: mov <temp2=int64#3,700(<ap=int64#1) +# asm 2: mov <temp2=%edx,700(<ap=%rdi) +mov %edx,700(%rdi) + +# qhasm: temp1 = mem64[ap + 708] +# asm 1: mov 708(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 708(<ap=%rdi),>temp1=%esi +mov 708(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2256] +# asm 1: mov 2256(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2256(<ap=%rdi),>temp2=%edx +mov 2256(%rdi),%edx + +# qhasm: mem64[ap + 2256] = temp1 +# asm 1: mov <temp1=int64#2,2256(<ap=int64#1) +# asm 2: mov <temp1=%esi,2256(<ap=%rdi) +mov %esi,2256(%rdi) + +# qhasm: mem64[ap + 708] = temp2 +# asm 1: mov <temp2=int64#3,708(<ap=int64#1) +# asm 2: mov <temp2=%edx,708(<ap=%rdi) +mov %edx,708(%rdi) + +# qhasm: temp1 = mem64[ap + 712] +# asm 1: mov 712(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 712(<ap=%rdi),>temp1=%esi +mov 712(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1232] +# asm 1: mov 1232(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1232(<ap=%rdi),>temp2=%edx +mov 1232(%rdi),%edx + +# qhasm: mem64[ap + 1232] = temp1 +# asm 1: mov <temp1=int64#2,1232(<ap=int64#1) +# asm 2: mov <temp1=%esi,1232(<ap=%rdi) +mov %esi,1232(%rdi) + +# qhasm: mem64[ap + 712] = temp2 +# asm 1: mov <temp2=int64#3,712(<ap=int64#1) +# asm 2: mov <temp2=%edx,712(<ap=%rdi) +mov %edx,712(%rdi) + +# qhasm: temp1 = mem64[ap + 716] +# asm 1: mov 716(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 716(<ap=%rdi),>temp1=%esi +mov 716(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3280] +# asm 1: mov 3280(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3280(<ap=%rdi),>temp2=%edx +mov 3280(%rdi),%edx + +# qhasm: mem64[ap + 3280] = temp1 +# asm 1: mov <temp1=int64#2,3280(<ap=int64#1) +# asm 2: mov <temp1=%esi,3280(<ap=%rdi) +mov %esi,3280(%rdi) + +# qhasm: mem64[ap + 716] = temp2 +# asm 1: mov <temp2=int64#3,716(<ap=int64#1) +# asm 2: mov <temp2=%edx,716(<ap=%rdi) +mov %edx,716(%rdi) + +# qhasm: temp1 = mem64[ap + 724] +# asm 1: mov 724(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 724(<ap=%rdi),>temp1=%esi +mov 724(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2768] +# asm 1: mov 2768(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2768(<ap=%rdi),>temp2=%edx +mov 2768(%rdi),%edx + +# qhasm: mem64[ap + 2768] = temp1 +# asm 1: mov <temp1=int64#2,2768(<ap=int64#1) +# asm 2: mov <temp1=%esi,2768(<ap=%rdi) +mov %esi,2768(%rdi) + +# qhasm: mem64[ap + 724] = temp2 +# asm 1: mov <temp2=int64#3,724(<ap=int64#1) +# asm 2: mov <temp2=%edx,724(<ap=%rdi) +mov %edx,724(%rdi) + +# qhasm: temp1 = mem64[ap + 728] +# asm 1: mov 728(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 728(<ap=%rdi),>temp1=%esi +mov 728(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1744] +# asm 1: mov 1744(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1744(<ap=%rdi),>temp2=%edx +mov 1744(%rdi),%edx + +# qhasm: mem64[ap + 1744] = temp1 +# asm 1: mov <temp1=int64#2,1744(<ap=int64#1) +# asm 2: mov <temp1=%esi,1744(<ap=%rdi) +mov %esi,1744(%rdi) + +# qhasm: mem64[ap + 728] = temp2 +# asm 1: mov <temp2=int64#3,728(<ap=int64#1) +# asm 2: mov <temp2=%edx,728(<ap=%rdi) +mov %edx,728(%rdi) + +# qhasm: temp1 = mem64[ap + 732] +# asm 1: mov 732(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 732(<ap=%rdi),>temp1=%esi +mov 732(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3792] +# asm 1: mov 3792(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3792(<ap=%rdi),>temp2=%edx +mov 3792(%rdi),%edx + +# qhasm: mem64[ap + 3792] = temp1 +# asm 1: mov <temp1=int64#2,3792(<ap=int64#1) +# asm 2: mov <temp1=%esi,3792(<ap=%rdi) +mov %esi,3792(%rdi) + +# qhasm: mem64[ap + 732] = temp2 +# asm 1: mov <temp2=int64#3,732(<ap=int64#1) +# asm 2: mov <temp2=%edx,732(<ap=%rdi) +mov %edx,732(%rdi) + +# qhasm: temp1 = mem64[ap + 740] +# asm 1: mov 740(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 740(<ap=%rdi),>temp1=%esi +mov 740(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2512] +# asm 1: mov 2512(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2512(<ap=%rdi),>temp2=%edx +mov 2512(%rdi),%edx + +# qhasm: mem64[ap + 2512] = temp1 +# asm 1: mov <temp1=int64#2,2512(<ap=int64#1) +# asm 2: mov <temp1=%esi,2512(<ap=%rdi) +mov %esi,2512(%rdi) + +# qhasm: mem64[ap + 740] = temp2 +# asm 1: mov <temp2=int64#3,740(<ap=int64#1) +# asm 2: mov <temp2=%edx,740(<ap=%rdi) +mov %edx,740(%rdi) + +# qhasm: temp1 = mem64[ap + 744] +# asm 1: mov 744(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 744(<ap=%rdi),>temp1=%esi +mov 744(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1488] +# asm 1: mov 1488(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1488(<ap=%rdi),>temp2=%edx +mov 1488(%rdi),%edx + +# qhasm: mem64[ap + 1488] = temp1 +# asm 1: mov <temp1=int64#2,1488(<ap=int64#1) +# asm 2: mov <temp1=%esi,1488(<ap=%rdi) +mov %esi,1488(%rdi) + +# qhasm: mem64[ap + 744] = temp2 +# asm 1: mov <temp2=int64#3,744(<ap=int64#1) +# asm 2: mov <temp2=%edx,744(<ap=%rdi) +mov %edx,744(%rdi) + +# qhasm: temp1 = mem64[ap + 748] +# asm 1: mov 748(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 748(<ap=%rdi),>temp1=%esi +mov 748(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3536] +# asm 1: mov 3536(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3536(<ap=%rdi),>temp2=%edx +mov 3536(%rdi),%edx + +# qhasm: mem64[ap + 3536] = temp1 +# asm 1: mov <temp1=int64#2,3536(<ap=int64#1) +# asm 2: mov <temp1=%esi,3536(<ap=%rdi) +mov %esi,3536(%rdi) + +# qhasm: mem64[ap + 748] = temp2 +# asm 1: mov <temp2=int64#3,748(<ap=int64#1) +# asm 2: mov <temp2=%edx,748(<ap=%rdi) +mov %edx,748(%rdi) + +# qhasm: temp1 = mem64[ap + 752] +# asm 1: mov 752(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 752(<ap=%rdi),>temp1=%esi +mov 752(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 976] +# asm 1: mov 976(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 976(<ap=%rdi),>temp2=%edx +mov 976(%rdi),%edx + +# qhasm: mem64[ap + 976] = temp1 +# asm 1: mov <temp1=int64#2,976(<ap=int64#1) +# asm 2: mov <temp1=%esi,976(<ap=%rdi) +mov %esi,976(%rdi) + +# qhasm: mem64[ap + 752] = temp2 +# asm 1: mov <temp2=int64#3,752(<ap=int64#1) +# asm 2: mov <temp2=%edx,752(<ap=%rdi) +mov %edx,752(%rdi) + +# qhasm: temp1 = mem64[ap + 756] +# asm 1: mov 756(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 756(<ap=%rdi),>temp1=%esi +mov 756(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3024] +# asm 1: mov 3024(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3024(<ap=%rdi),>temp2=%edx +mov 3024(%rdi),%edx + +# qhasm: mem64[ap + 3024] = temp1 +# asm 1: mov <temp1=int64#2,3024(<ap=int64#1) +# asm 2: mov <temp1=%esi,3024(<ap=%rdi) +mov %esi,3024(%rdi) + +# qhasm: mem64[ap + 756] = temp2 +# asm 1: mov <temp2=int64#3,756(<ap=int64#1) +# asm 2: mov <temp2=%edx,756(<ap=%rdi) +mov %edx,756(%rdi) + +# qhasm: temp1 = mem64[ap + 760] +# asm 1: mov 760(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 760(<ap=%rdi),>temp1=%esi +mov 760(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2000] +# asm 1: mov 2000(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2000(<ap=%rdi),>temp2=%edx +mov 2000(%rdi),%edx + +# qhasm: mem64[ap + 2000] = temp1 +# asm 1: mov <temp1=int64#2,2000(<ap=int64#1) +# asm 2: mov <temp1=%esi,2000(<ap=%rdi) +mov %esi,2000(%rdi) + +# qhasm: mem64[ap + 760] = temp2 +# asm 1: mov <temp2=int64#3,760(<ap=int64#1) +# asm 2: mov <temp2=%edx,760(<ap=%rdi) +mov %edx,760(%rdi) + +# qhasm: temp1 = mem64[ap + 764] +# asm 1: mov 764(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 764(<ap=%rdi),>temp1=%esi +mov 764(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4048] +# asm 1: mov 4048(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4048(<ap=%rdi),>temp2=%edx +mov 4048(%rdi),%edx + +# qhasm: mem64[ap + 4048] = temp1 +# asm 1: mov <temp1=int64#2,4048(<ap=int64#1) +# asm 2: mov <temp1=%esi,4048(<ap=%rdi) +mov %esi,4048(%rdi) + +# qhasm: mem64[ap + 764] = temp2 +# asm 1: mov <temp2=int64#3,764(<ap=int64#1) +# asm 2: mov <temp2=%edx,764(<ap=%rdi) +mov %edx,764(%rdi) + +# qhasm: temp1 = mem64[ap + 772] +# asm 1: mov 772(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 772(<ap=%rdi),>temp1=%esi +mov 772(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2096] +# asm 1: mov 2096(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2096(<ap=%rdi),>temp2=%edx +mov 2096(%rdi),%edx + +# qhasm: mem64[ap + 2096] = temp1 +# asm 1: mov <temp1=int64#2,2096(<ap=int64#1) +# asm 2: mov <temp1=%esi,2096(<ap=%rdi) +mov %esi,2096(%rdi) + +# qhasm: mem64[ap + 772] = temp2 +# asm 1: mov <temp2=int64#3,772(<ap=int64#1) +# asm 2: mov <temp2=%edx,772(<ap=%rdi) +mov %edx,772(%rdi) + +# qhasm: temp1 = mem64[ap + 776] +# asm 1: mov 776(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 776(<ap=%rdi),>temp1=%esi +mov 776(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1072] +# asm 1: mov 1072(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1072(<ap=%rdi),>temp2=%edx +mov 1072(%rdi),%edx + +# qhasm: mem64[ap + 1072] = temp1 +# asm 1: mov <temp1=int64#2,1072(<ap=int64#1) +# asm 2: mov <temp1=%esi,1072(<ap=%rdi) +mov %esi,1072(%rdi) + +# qhasm: mem64[ap + 776] = temp2 +# asm 1: mov <temp2=int64#3,776(<ap=int64#1) +# asm 2: mov <temp2=%edx,776(<ap=%rdi) +mov %edx,776(%rdi) + +# qhasm: temp1 = mem64[ap + 780] +# asm 1: mov 780(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 780(<ap=%rdi),>temp1=%esi +mov 780(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3120] +# asm 1: mov 3120(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3120(<ap=%rdi),>temp2=%edx +mov 3120(%rdi),%edx + +# qhasm: mem64[ap + 3120] = temp1 +# asm 1: mov <temp1=int64#2,3120(<ap=int64#1) +# asm 2: mov <temp1=%esi,3120(<ap=%rdi) +mov %esi,3120(%rdi) + +# qhasm: mem64[ap + 780] = temp2 +# asm 1: mov <temp2=int64#3,780(<ap=int64#1) +# asm 2: mov <temp2=%edx,780(<ap=%rdi) +mov %edx,780(%rdi) + +# qhasm: temp1 = mem64[ap + 788] +# asm 1: mov 788(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 788(<ap=%rdi),>temp1=%esi +mov 788(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2608] +# asm 1: mov 2608(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2608(<ap=%rdi),>temp2=%edx +mov 2608(%rdi),%edx + +# qhasm: mem64[ap + 2608] = temp1 +# asm 1: mov <temp1=int64#2,2608(<ap=int64#1) +# asm 2: mov <temp1=%esi,2608(<ap=%rdi) +mov %esi,2608(%rdi) + +# qhasm: mem64[ap + 788] = temp2 +# asm 1: mov <temp2=int64#3,788(<ap=int64#1) +# asm 2: mov <temp2=%edx,788(<ap=%rdi) +mov %edx,788(%rdi) + +# qhasm: temp1 = mem64[ap + 792] +# asm 1: mov 792(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 792(<ap=%rdi),>temp1=%esi +mov 792(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1584] +# asm 1: mov 1584(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1584(<ap=%rdi),>temp2=%edx +mov 1584(%rdi),%edx + +# qhasm: mem64[ap + 1584] = temp1 +# asm 1: mov <temp1=int64#2,1584(<ap=int64#1) +# asm 2: mov <temp1=%esi,1584(<ap=%rdi) +mov %esi,1584(%rdi) + +# qhasm: mem64[ap + 792] = temp2 +# asm 1: mov <temp2=int64#3,792(<ap=int64#1) +# asm 2: mov <temp2=%edx,792(<ap=%rdi) +mov %edx,792(%rdi) + +# qhasm: temp1 = mem64[ap + 796] +# asm 1: mov 796(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 796(<ap=%rdi),>temp1=%esi +mov 796(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3632] +# asm 1: mov 3632(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3632(<ap=%rdi),>temp2=%edx +mov 3632(%rdi),%edx + +# qhasm: mem64[ap + 3632] = temp1 +# asm 1: mov <temp1=int64#2,3632(<ap=int64#1) +# asm 2: mov <temp1=%esi,3632(<ap=%rdi) +mov %esi,3632(%rdi) + +# qhasm: mem64[ap + 796] = temp2 +# asm 1: mov <temp2=int64#3,796(<ap=int64#1) +# asm 2: mov <temp2=%edx,796(<ap=%rdi) +mov %edx,796(%rdi) + +# qhasm: temp1 = mem64[ap + 804] +# asm 1: mov 804(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 804(<ap=%rdi),>temp1=%esi +mov 804(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2352] +# asm 1: mov 2352(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2352(<ap=%rdi),>temp2=%edx +mov 2352(%rdi),%edx + +# qhasm: mem64[ap + 2352] = temp1 +# asm 1: mov <temp1=int64#2,2352(<ap=int64#1) +# asm 2: mov <temp1=%esi,2352(<ap=%rdi) +mov %esi,2352(%rdi) + +# qhasm: mem64[ap + 804] = temp2 +# asm 1: mov <temp2=int64#3,804(<ap=int64#1) +# asm 2: mov <temp2=%edx,804(<ap=%rdi) +mov %edx,804(%rdi) + +# qhasm: temp1 = mem64[ap + 808] +# asm 1: mov 808(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 808(<ap=%rdi),>temp1=%esi +mov 808(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1328] +# asm 1: mov 1328(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1328(<ap=%rdi),>temp2=%edx +mov 1328(%rdi),%edx + +# qhasm: mem64[ap + 1328] = temp1 +# asm 1: mov <temp1=int64#2,1328(<ap=int64#1) +# asm 2: mov <temp1=%esi,1328(<ap=%rdi) +mov %esi,1328(%rdi) + +# qhasm: mem64[ap + 808] = temp2 +# asm 1: mov <temp2=int64#3,808(<ap=int64#1) +# asm 2: mov <temp2=%edx,808(<ap=%rdi) +mov %edx,808(%rdi) + +# qhasm: temp1 = mem64[ap + 812] +# asm 1: mov 812(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 812(<ap=%rdi),>temp1=%esi +mov 812(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3376] +# asm 1: mov 3376(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3376(<ap=%rdi),>temp2=%edx +mov 3376(%rdi),%edx + +# qhasm: mem64[ap + 3376] = temp1 +# asm 1: mov <temp1=int64#2,3376(<ap=int64#1) +# asm 2: mov <temp1=%esi,3376(<ap=%rdi) +mov %esi,3376(%rdi) + +# qhasm: mem64[ap + 812] = temp2 +# asm 1: mov <temp2=int64#3,812(<ap=int64#1) +# asm 2: mov <temp2=%edx,812(<ap=%rdi) +mov %edx,812(%rdi) + +# qhasm: temp1 = mem64[ap + 820] +# asm 1: mov 820(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 820(<ap=%rdi),>temp1=%esi +mov 820(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2864] +# asm 1: mov 2864(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2864(<ap=%rdi),>temp2=%edx +mov 2864(%rdi),%edx + +# qhasm: mem64[ap + 2864] = temp1 +# asm 1: mov <temp1=int64#2,2864(<ap=int64#1) +# asm 2: mov <temp1=%esi,2864(<ap=%rdi) +mov %esi,2864(%rdi) + +# qhasm: mem64[ap + 820] = temp2 +# asm 1: mov <temp2=int64#3,820(<ap=int64#1) +# asm 2: mov <temp2=%edx,820(<ap=%rdi) +mov %edx,820(%rdi) + +# qhasm: temp1 = mem64[ap + 824] +# asm 1: mov 824(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 824(<ap=%rdi),>temp1=%esi +mov 824(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1840] +# asm 1: mov 1840(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1840(<ap=%rdi),>temp2=%edx +mov 1840(%rdi),%edx + +# qhasm: mem64[ap + 1840] = temp1 +# asm 1: mov <temp1=int64#2,1840(<ap=int64#1) +# asm 2: mov <temp1=%esi,1840(<ap=%rdi) +mov %esi,1840(%rdi) + +# qhasm: mem64[ap + 824] = temp2 +# asm 1: mov <temp2=int64#3,824(<ap=int64#1) +# asm 2: mov <temp2=%edx,824(<ap=%rdi) +mov %edx,824(%rdi) + +# qhasm: temp1 = mem64[ap + 828] +# asm 1: mov 828(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 828(<ap=%rdi),>temp1=%esi +mov 828(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3888] +# asm 1: mov 3888(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3888(<ap=%rdi),>temp2=%edx +mov 3888(%rdi),%edx + +# qhasm: mem64[ap + 3888] = temp1 +# asm 1: mov <temp1=int64#2,3888(<ap=int64#1) +# asm 2: mov <temp1=%esi,3888(<ap=%rdi) +mov %esi,3888(%rdi) + +# qhasm: mem64[ap + 828] = temp2 +# asm 1: mov <temp2=int64#3,828(<ap=int64#1) +# asm 2: mov <temp2=%edx,828(<ap=%rdi) +mov %edx,828(%rdi) + +# qhasm: temp1 = mem64[ap + 836] +# asm 1: mov 836(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 836(<ap=%rdi),>temp1=%esi +mov 836(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2224] +# asm 1: mov 2224(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2224(<ap=%rdi),>temp2=%edx +mov 2224(%rdi),%edx + +# qhasm: mem64[ap + 2224] = temp1 +# asm 1: mov <temp1=int64#2,2224(<ap=int64#1) +# asm 2: mov <temp1=%esi,2224(<ap=%rdi) +mov %esi,2224(%rdi) + +# qhasm: mem64[ap + 836] = temp2 +# asm 1: mov <temp2=int64#3,836(<ap=int64#1) +# asm 2: mov <temp2=%edx,836(<ap=%rdi) +mov %edx,836(%rdi) + +# qhasm: temp1 = mem64[ap + 840] +# asm 1: mov 840(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 840(<ap=%rdi),>temp1=%esi +mov 840(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1200] +# asm 1: mov 1200(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1200(<ap=%rdi),>temp2=%edx +mov 1200(%rdi),%edx + +# qhasm: mem64[ap + 1200] = temp1 +# asm 1: mov <temp1=int64#2,1200(<ap=int64#1) +# asm 2: mov <temp1=%esi,1200(<ap=%rdi) +mov %esi,1200(%rdi) + +# qhasm: mem64[ap + 840] = temp2 +# asm 1: mov <temp2=int64#3,840(<ap=int64#1) +# asm 2: mov <temp2=%edx,840(<ap=%rdi) +mov %edx,840(%rdi) + +# qhasm: temp1 = mem64[ap + 844] +# asm 1: mov 844(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 844(<ap=%rdi),>temp1=%esi +mov 844(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3248] +# asm 1: mov 3248(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3248(<ap=%rdi),>temp2=%edx +mov 3248(%rdi),%edx + +# qhasm: mem64[ap + 3248] = temp1 +# asm 1: mov <temp1=int64#2,3248(<ap=int64#1) +# asm 2: mov <temp1=%esi,3248(<ap=%rdi) +mov %esi,3248(%rdi) + +# qhasm: mem64[ap + 844] = temp2 +# asm 1: mov <temp2=int64#3,844(<ap=int64#1) +# asm 2: mov <temp2=%edx,844(<ap=%rdi) +mov %edx,844(%rdi) + +# qhasm: temp1 = mem64[ap + 852] +# asm 1: mov 852(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 852(<ap=%rdi),>temp1=%esi +mov 852(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2736] +# asm 1: mov 2736(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2736(<ap=%rdi),>temp2=%edx +mov 2736(%rdi),%edx + +# qhasm: mem64[ap + 2736] = temp1 +# asm 1: mov <temp1=int64#2,2736(<ap=int64#1) +# asm 2: mov <temp1=%esi,2736(<ap=%rdi) +mov %esi,2736(%rdi) + +# qhasm: mem64[ap + 852] = temp2 +# asm 1: mov <temp2=int64#3,852(<ap=int64#1) +# asm 2: mov <temp2=%edx,852(<ap=%rdi) +mov %edx,852(%rdi) + +# qhasm: temp1 = mem64[ap + 856] +# asm 1: mov 856(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 856(<ap=%rdi),>temp1=%esi +mov 856(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1712] +# asm 1: mov 1712(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1712(<ap=%rdi),>temp2=%edx +mov 1712(%rdi),%edx + +# qhasm: mem64[ap + 1712] = temp1 +# asm 1: mov <temp1=int64#2,1712(<ap=int64#1) +# asm 2: mov <temp1=%esi,1712(<ap=%rdi) +mov %esi,1712(%rdi) + +# qhasm: mem64[ap + 856] = temp2 +# asm 1: mov <temp2=int64#3,856(<ap=int64#1) +# asm 2: mov <temp2=%edx,856(<ap=%rdi) +mov %edx,856(%rdi) + +# qhasm: temp1 = mem64[ap + 860] +# asm 1: mov 860(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 860(<ap=%rdi),>temp1=%esi +mov 860(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3760] +# asm 1: mov 3760(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3760(<ap=%rdi),>temp2=%edx +mov 3760(%rdi),%edx + +# qhasm: mem64[ap + 3760] = temp1 +# asm 1: mov <temp1=int64#2,3760(<ap=int64#1) +# asm 2: mov <temp1=%esi,3760(<ap=%rdi) +mov %esi,3760(%rdi) + +# qhasm: mem64[ap + 860] = temp2 +# asm 1: mov <temp2=int64#3,860(<ap=int64#1) +# asm 2: mov <temp2=%edx,860(<ap=%rdi) +mov %edx,860(%rdi) + +# qhasm: temp1 = mem64[ap + 868] +# asm 1: mov 868(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 868(<ap=%rdi),>temp1=%esi +mov 868(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2480] +# asm 1: mov 2480(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2480(<ap=%rdi),>temp2=%edx +mov 2480(%rdi),%edx + +# qhasm: mem64[ap + 2480] = temp1 +# asm 1: mov <temp1=int64#2,2480(<ap=int64#1) +# asm 2: mov <temp1=%esi,2480(<ap=%rdi) +mov %esi,2480(%rdi) + +# qhasm: mem64[ap + 868] = temp2 +# asm 1: mov <temp2=int64#3,868(<ap=int64#1) +# asm 2: mov <temp2=%edx,868(<ap=%rdi) +mov %edx,868(%rdi) + +# qhasm: temp1 = mem64[ap + 872] +# asm 1: mov 872(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 872(<ap=%rdi),>temp1=%esi +mov 872(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1456] +# asm 1: mov 1456(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1456(<ap=%rdi),>temp2=%edx +mov 1456(%rdi),%edx + +# qhasm: mem64[ap + 1456] = temp1 +# asm 1: mov <temp1=int64#2,1456(<ap=int64#1) +# asm 2: mov <temp1=%esi,1456(<ap=%rdi) +mov %esi,1456(%rdi) + +# qhasm: mem64[ap + 872] = temp2 +# asm 1: mov <temp2=int64#3,872(<ap=int64#1) +# asm 2: mov <temp2=%edx,872(<ap=%rdi) +mov %edx,872(%rdi) + +# qhasm: temp1 = mem64[ap + 876] +# asm 1: mov 876(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 876(<ap=%rdi),>temp1=%esi +mov 876(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3504] +# asm 1: mov 3504(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3504(<ap=%rdi),>temp2=%edx +mov 3504(%rdi),%edx + +# qhasm: mem64[ap + 3504] = temp1 +# asm 1: mov <temp1=int64#2,3504(<ap=int64#1) +# asm 2: mov <temp1=%esi,3504(<ap=%rdi) +mov %esi,3504(%rdi) + +# qhasm: mem64[ap + 876] = temp2 +# asm 1: mov <temp2=int64#3,876(<ap=int64#1) +# asm 2: mov <temp2=%edx,876(<ap=%rdi) +mov %edx,876(%rdi) + +# qhasm: temp1 = mem64[ap + 880] +# asm 1: mov 880(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 880(<ap=%rdi),>temp1=%esi +mov 880(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 944] +# asm 1: mov 944(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 944(<ap=%rdi),>temp2=%edx +mov 944(%rdi),%edx + +# qhasm: mem64[ap + 944] = temp1 +# asm 1: mov <temp1=int64#2,944(<ap=int64#1) +# asm 2: mov <temp1=%esi,944(<ap=%rdi) +mov %esi,944(%rdi) + +# qhasm: mem64[ap + 880] = temp2 +# asm 1: mov <temp2=int64#3,880(<ap=int64#1) +# asm 2: mov <temp2=%edx,880(<ap=%rdi) +mov %edx,880(%rdi) + +# qhasm: temp1 = mem64[ap + 884] +# asm 1: mov 884(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 884(<ap=%rdi),>temp1=%esi +mov 884(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2992] +# asm 1: mov 2992(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2992(<ap=%rdi),>temp2=%edx +mov 2992(%rdi),%edx + +# qhasm: mem64[ap + 2992] = temp1 +# asm 1: mov <temp1=int64#2,2992(<ap=int64#1) +# asm 2: mov <temp1=%esi,2992(<ap=%rdi) +mov %esi,2992(%rdi) + +# qhasm: mem64[ap + 884] = temp2 +# asm 1: mov <temp2=int64#3,884(<ap=int64#1) +# asm 2: mov <temp2=%edx,884(<ap=%rdi) +mov %edx,884(%rdi) + +# qhasm: temp1 = mem64[ap + 888] +# asm 1: mov 888(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 888(<ap=%rdi),>temp1=%esi +mov 888(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1968] +# asm 1: mov 1968(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1968(<ap=%rdi),>temp2=%edx +mov 1968(%rdi),%edx + +# qhasm: mem64[ap + 1968] = temp1 +# asm 1: mov <temp1=int64#2,1968(<ap=int64#1) +# asm 2: mov <temp1=%esi,1968(<ap=%rdi) +mov %esi,1968(%rdi) + +# qhasm: mem64[ap + 888] = temp2 +# asm 1: mov <temp2=int64#3,888(<ap=int64#1) +# asm 2: mov <temp2=%edx,888(<ap=%rdi) +mov %edx,888(%rdi) + +# qhasm: temp1 = mem64[ap + 892] +# asm 1: mov 892(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 892(<ap=%rdi),>temp1=%esi +mov 892(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4016] +# asm 1: mov 4016(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4016(<ap=%rdi),>temp2=%edx +mov 4016(%rdi),%edx + +# qhasm: mem64[ap + 4016] = temp1 +# asm 1: mov <temp1=int64#2,4016(<ap=int64#1) +# asm 2: mov <temp1=%esi,4016(<ap=%rdi) +mov %esi,4016(%rdi) + +# qhasm: mem64[ap + 892] = temp2 +# asm 1: mov <temp2=int64#3,892(<ap=int64#1) +# asm 2: mov <temp2=%edx,892(<ap=%rdi) +mov %edx,892(%rdi) + +# qhasm: temp1 = mem64[ap + 900] +# asm 1: mov 900(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 900(<ap=%rdi),>temp1=%esi +mov 900(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2160] +# asm 1: mov 2160(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2160(<ap=%rdi),>temp2=%edx +mov 2160(%rdi),%edx + +# qhasm: mem64[ap + 2160] = temp1 +# asm 1: mov <temp1=int64#2,2160(<ap=int64#1) +# asm 2: mov <temp1=%esi,2160(<ap=%rdi) +mov %esi,2160(%rdi) + +# qhasm: mem64[ap + 900] = temp2 +# asm 1: mov <temp2=int64#3,900(<ap=int64#1) +# asm 2: mov <temp2=%edx,900(<ap=%rdi) +mov %edx,900(%rdi) + +# qhasm: temp1 = mem64[ap + 904] +# asm 1: mov 904(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 904(<ap=%rdi),>temp1=%esi +mov 904(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1136] +# asm 1: mov 1136(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1136(<ap=%rdi),>temp2=%edx +mov 1136(%rdi),%edx + +# qhasm: mem64[ap + 1136] = temp1 +# asm 1: mov <temp1=int64#2,1136(<ap=int64#1) +# asm 2: mov <temp1=%esi,1136(<ap=%rdi) +mov %esi,1136(%rdi) + +# qhasm: mem64[ap + 904] = temp2 +# asm 1: mov <temp2=int64#3,904(<ap=int64#1) +# asm 2: mov <temp2=%edx,904(<ap=%rdi) +mov %edx,904(%rdi) + +# qhasm: temp1 = mem64[ap + 908] +# asm 1: mov 908(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 908(<ap=%rdi),>temp1=%esi +mov 908(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3184] +# asm 1: mov 3184(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3184(<ap=%rdi),>temp2=%edx +mov 3184(%rdi),%edx + +# qhasm: mem64[ap + 3184] = temp1 +# asm 1: mov <temp1=int64#2,3184(<ap=int64#1) +# asm 2: mov <temp1=%esi,3184(<ap=%rdi) +mov %esi,3184(%rdi) + +# qhasm: mem64[ap + 908] = temp2 +# asm 1: mov <temp2=int64#3,908(<ap=int64#1) +# asm 2: mov <temp2=%edx,908(<ap=%rdi) +mov %edx,908(%rdi) + +# qhasm: temp1 = mem64[ap + 916] +# asm 1: mov 916(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 916(<ap=%rdi),>temp1=%esi +mov 916(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2672] +# asm 1: mov 2672(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2672(<ap=%rdi),>temp2=%edx +mov 2672(%rdi),%edx + +# qhasm: mem64[ap + 2672] = temp1 +# asm 1: mov <temp1=int64#2,2672(<ap=int64#1) +# asm 2: mov <temp1=%esi,2672(<ap=%rdi) +mov %esi,2672(%rdi) + +# qhasm: mem64[ap + 916] = temp2 +# asm 1: mov <temp2=int64#3,916(<ap=int64#1) +# asm 2: mov <temp2=%edx,916(<ap=%rdi) +mov %edx,916(%rdi) + +# qhasm: temp1 = mem64[ap + 920] +# asm 1: mov 920(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 920(<ap=%rdi),>temp1=%esi +mov 920(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1648] +# asm 1: mov 1648(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1648(<ap=%rdi),>temp2=%edx +mov 1648(%rdi),%edx + +# qhasm: mem64[ap + 1648] = temp1 +# asm 1: mov <temp1=int64#2,1648(<ap=int64#1) +# asm 2: mov <temp1=%esi,1648(<ap=%rdi) +mov %esi,1648(%rdi) + +# qhasm: mem64[ap + 920] = temp2 +# asm 1: mov <temp2=int64#3,920(<ap=int64#1) +# asm 2: mov <temp2=%edx,920(<ap=%rdi) +mov %edx,920(%rdi) + +# qhasm: temp1 = mem64[ap + 924] +# asm 1: mov 924(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 924(<ap=%rdi),>temp1=%esi +mov 924(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3696] +# asm 1: mov 3696(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3696(<ap=%rdi),>temp2=%edx +mov 3696(%rdi),%edx + +# qhasm: mem64[ap + 3696] = temp1 +# asm 1: mov <temp1=int64#2,3696(<ap=int64#1) +# asm 2: mov <temp1=%esi,3696(<ap=%rdi) +mov %esi,3696(%rdi) + +# qhasm: mem64[ap + 924] = temp2 +# asm 1: mov <temp2=int64#3,924(<ap=int64#1) +# asm 2: mov <temp2=%edx,924(<ap=%rdi) +mov %edx,924(%rdi) + +# qhasm: temp1 = mem64[ap + 932] +# asm 1: mov 932(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 932(<ap=%rdi),>temp1=%esi +mov 932(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2416] +# asm 1: mov 2416(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2416(<ap=%rdi),>temp2=%edx +mov 2416(%rdi),%edx + +# qhasm: mem64[ap + 2416] = temp1 +# asm 1: mov <temp1=int64#2,2416(<ap=int64#1) +# asm 2: mov <temp1=%esi,2416(<ap=%rdi) +mov %esi,2416(%rdi) + +# qhasm: mem64[ap + 932] = temp2 +# asm 1: mov <temp2=int64#3,932(<ap=int64#1) +# asm 2: mov <temp2=%edx,932(<ap=%rdi) +mov %edx,932(%rdi) + +# qhasm: temp1 = mem64[ap + 936] +# asm 1: mov 936(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 936(<ap=%rdi),>temp1=%esi +mov 936(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1392] +# asm 1: mov 1392(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1392(<ap=%rdi),>temp2=%edx +mov 1392(%rdi),%edx + +# qhasm: mem64[ap + 1392] = temp1 +# asm 1: mov <temp1=int64#2,1392(<ap=int64#1) +# asm 2: mov <temp1=%esi,1392(<ap=%rdi) +mov %esi,1392(%rdi) + +# qhasm: mem64[ap + 936] = temp2 +# asm 1: mov <temp2=int64#3,936(<ap=int64#1) +# asm 2: mov <temp2=%edx,936(<ap=%rdi) +mov %edx,936(%rdi) + +# qhasm: temp1 = mem64[ap + 940] +# asm 1: mov 940(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 940(<ap=%rdi),>temp1=%esi +mov 940(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3440] +# asm 1: mov 3440(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3440(<ap=%rdi),>temp2=%edx +mov 3440(%rdi),%edx + +# qhasm: mem64[ap + 3440] = temp1 +# asm 1: mov <temp1=int64#2,3440(<ap=int64#1) +# asm 2: mov <temp1=%esi,3440(<ap=%rdi) +mov %esi,3440(%rdi) + +# qhasm: mem64[ap + 940] = temp2 +# asm 1: mov <temp2=int64#3,940(<ap=int64#1) +# asm 2: mov <temp2=%edx,940(<ap=%rdi) +mov %edx,940(%rdi) + +# qhasm: temp1 = mem64[ap + 948] +# asm 1: mov 948(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 948(<ap=%rdi),>temp1=%esi +mov 948(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2928] +# asm 1: mov 2928(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2928(<ap=%rdi),>temp2=%edx +mov 2928(%rdi),%edx + +# qhasm: mem64[ap + 2928] = temp1 +# asm 1: mov <temp1=int64#2,2928(<ap=int64#1) +# asm 2: mov <temp1=%esi,2928(<ap=%rdi) +mov %esi,2928(%rdi) + +# qhasm: mem64[ap + 948] = temp2 +# asm 1: mov <temp2=int64#3,948(<ap=int64#1) +# asm 2: mov <temp2=%edx,948(<ap=%rdi) +mov %edx,948(%rdi) + +# qhasm: temp1 = mem64[ap + 952] +# asm 1: mov 952(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 952(<ap=%rdi),>temp1=%esi +mov 952(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1904] +# asm 1: mov 1904(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1904(<ap=%rdi),>temp2=%edx +mov 1904(%rdi),%edx + +# qhasm: mem64[ap + 1904] = temp1 +# asm 1: mov <temp1=int64#2,1904(<ap=int64#1) +# asm 2: mov <temp1=%esi,1904(<ap=%rdi) +mov %esi,1904(%rdi) + +# qhasm: mem64[ap + 952] = temp2 +# asm 1: mov <temp2=int64#3,952(<ap=int64#1) +# asm 2: mov <temp2=%edx,952(<ap=%rdi) +mov %edx,952(%rdi) + +# qhasm: temp1 = mem64[ap + 956] +# asm 1: mov 956(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 956(<ap=%rdi),>temp1=%esi +mov 956(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3952] +# asm 1: mov 3952(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3952(<ap=%rdi),>temp2=%edx +mov 3952(%rdi),%edx + +# qhasm: mem64[ap + 3952] = temp1 +# asm 1: mov <temp1=int64#2,3952(<ap=int64#1) +# asm 2: mov <temp1=%esi,3952(<ap=%rdi) +mov %esi,3952(%rdi) + +# qhasm: mem64[ap + 956] = temp2 +# asm 1: mov <temp2=int64#3,956(<ap=int64#1) +# asm 2: mov <temp2=%edx,956(<ap=%rdi) +mov %edx,956(%rdi) + +# qhasm: temp1 = mem64[ap + 964] +# asm 1: mov 964(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 964(<ap=%rdi),>temp1=%esi +mov 964(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2288] +# asm 1: mov 2288(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2288(<ap=%rdi),>temp2=%edx +mov 2288(%rdi),%edx + +# qhasm: mem64[ap + 2288] = temp1 +# asm 1: mov <temp1=int64#2,2288(<ap=int64#1) +# asm 2: mov <temp1=%esi,2288(<ap=%rdi) +mov %esi,2288(%rdi) + +# qhasm: mem64[ap + 964] = temp2 +# asm 1: mov <temp2=int64#3,964(<ap=int64#1) +# asm 2: mov <temp2=%edx,964(<ap=%rdi) +mov %edx,964(%rdi) + +# qhasm: temp1 = mem64[ap + 968] +# asm 1: mov 968(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 968(<ap=%rdi),>temp1=%esi +mov 968(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1264] +# asm 1: mov 1264(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1264(<ap=%rdi),>temp2=%edx +mov 1264(%rdi),%edx + +# qhasm: mem64[ap + 1264] = temp1 +# asm 1: mov <temp1=int64#2,1264(<ap=int64#1) +# asm 2: mov <temp1=%esi,1264(<ap=%rdi) +mov %esi,1264(%rdi) + +# qhasm: mem64[ap + 968] = temp2 +# asm 1: mov <temp2=int64#3,968(<ap=int64#1) +# asm 2: mov <temp2=%edx,968(<ap=%rdi) +mov %edx,968(%rdi) + +# qhasm: temp1 = mem64[ap + 972] +# asm 1: mov 972(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 972(<ap=%rdi),>temp1=%esi +mov 972(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3312] +# asm 1: mov 3312(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3312(<ap=%rdi),>temp2=%edx +mov 3312(%rdi),%edx + +# qhasm: mem64[ap + 3312] = temp1 +# asm 1: mov <temp1=int64#2,3312(<ap=int64#1) +# asm 2: mov <temp1=%esi,3312(<ap=%rdi) +mov %esi,3312(%rdi) + +# qhasm: mem64[ap + 972] = temp2 +# asm 1: mov <temp2=int64#3,972(<ap=int64#1) +# asm 2: mov <temp2=%edx,972(<ap=%rdi) +mov %edx,972(%rdi) + +# qhasm: temp1 = mem64[ap + 980] +# asm 1: mov 980(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 980(<ap=%rdi),>temp1=%esi +mov 980(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2800] +# asm 1: mov 2800(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2800(<ap=%rdi),>temp2=%edx +mov 2800(%rdi),%edx + +# qhasm: mem64[ap + 2800] = temp1 +# asm 1: mov <temp1=int64#2,2800(<ap=int64#1) +# asm 2: mov <temp1=%esi,2800(<ap=%rdi) +mov %esi,2800(%rdi) + +# qhasm: mem64[ap + 980] = temp2 +# asm 1: mov <temp2=int64#3,980(<ap=int64#1) +# asm 2: mov <temp2=%edx,980(<ap=%rdi) +mov %edx,980(%rdi) + +# qhasm: temp1 = mem64[ap + 984] +# asm 1: mov 984(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 984(<ap=%rdi),>temp1=%esi +mov 984(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1776] +# asm 1: mov 1776(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1776(<ap=%rdi),>temp2=%edx +mov 1776(%rdi),%edx + +# qhasm: mem64[ap + 1776] = temp1 +# asm 1: mov <temp1=int64#2,1776(<ap=int64#1) +# asm 2: mov <temp1=%esi,1776(<ap=%rdi) +mov %esi,1776(%rdi) + +# qhasm: mem64[ap + 984] = temp2 +# asm 1: mov <temp2=int64#3,984(<ap=int64#1) +# asm 2: mov <temp2=%edx,984(<ap=%rdi) +mov %edx,984(%rdi) + +# qhasm: temp1 = mem64[ap + 988] +# asm 1: mov 988(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 988(<ap=%rdi),>temp1=%esi +mov 988(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3824] +# asm 1: mov 3824(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3824(<ap=%rdi),>temp2=%edx +mov 3824(%rdi),%edx + +# qhasm: mem64[ap + 3824] = temp1 +# asm 1: mov <temp1=int64#2,3824(<ap=int64#1) +# asm 2: mov <temp1=%esi,3824(<ap=%rdi) +mov %esi,3824(%rdi) + +# qhasm: mem64[ap + 988] = temp2 +# asm 1: mov <temp2=int64#3,988(<ap=int64#1) +# asm 2: mov <temp2=%edx,988(<ap=%rdi) +mov %edx,988(%rdi) + +# qhasm: temp1 = mem64[ap + 996] +# asm 1: mov 996(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 996(<ap=%rdi),>temp1=%esi +mov 996(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2544] +# asm 1: mov 2544(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2544(<ap=%rdi),>temp2=%edx +mov 2544(%rdi),%edx + +# qhasm: mem64[ap + 2544] = temp1 +# asm 1: mov <temp1=int64#2,2544(<ap=int64#1) +# asm 2: mov <temp1=%esi,2544(<ap=%rdi) +mov %esi,2544(%rdi) + +# qhasm: mem64[ap + 996] = temp2 +# asm 1: mov <temp2=int64#3,996(<ap=int64#1) +# asm 2: mov <temp2=%edx,996(<ap=%rdi) +mov %edx,996(%rdi) + +# qhasm: temp1 = mem64[ap + 1000] +# asm 1: mov 1000(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1000(<ap=%rdi),>temp1=%esi +mov 1000(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1520] +# asm 1: mov 1520(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1520(<ap=%rdi),>temp2=%edx +mov 1520(%rdi),%edx + +# qhasm: mem64[ap + 1520] = temp1 +# asm 1: mov <temp1=int64#2,1520(<ap=int64#1) +# asm 2: mov <temp1=%esi,1520(<ap=%rdi) +mov %esi,1520(%rdi) + +# qhasm: mem64[ap + 1000] = temp2 +# asm 1: mov <temp2=int64#3,1000(<ap=int64#1) +# asm 2: mov <temp2=%edx,1000(<ap=%rdi) +mov %edx,1000(%rdi) + +# qhasm: temp1 = mem64[ap + 1004] +# asm 1: mov 1004(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1004(<ap=%rdi),>temp1=%esi +mov 1004(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3568] +# asm 1: mov 3568(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3568(<ap=%rdi),>temp2=%edx +mov 3568(%rdi),%edx + +# qhasm: mem64[ap + 3568] = temp1 +# asm 1: mov <temp1=int64#2,3568(<ap=int64#1) +# asm 2: mov <temp1=%esi,3568(<ap=%rdi) +mov %esi,3568(%rdi) + +# qhasm: mem64[ap + 1004] = temp2 +# asm 1: mov <temp2=int64#3,1004(<ap=int64#1) +# asm 2: mov <temp2=%edx,1004(<ap=%rdi) +mov %edx,1004(%rdi) + +# qhasm: temp1 = mem64[ap + 1012] +# asm 1: mov 1012(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1012(<ap=%rdi),>temp1=%esi +mov 1012(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3056] +# asm 1: mov 3056(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3056(<ap=%rdi),>temp2=%edx +mov 3056(%rdi),%edx + +# qhasm: mem64[ap + 3056] = temp1 +# asm 1: mov <temp1=int64#2,3056(<ap=int64#1) +# asm 2: mov <temp1=%esi,3056(<ap=%rdi) +mov %esi,3056(%rdi) + +# qhasm: mem64[ap + 1012] = temp2 +# asm 1: mov <temp2=int64#3,1012(<ap=int64#1) +# asm 2: mov <temp2=%edx,1012(<ap=%rdi) +mov %edx,1012(%rdi) + +# qhasm: temp1 = mem64[ap + 1016] +# asm 1: mov 1016(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1016(<ap=%rdi),>temp1=%esi +mov 1016(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2032] +# asm 1: mov 2032(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2032(<ap=%rdi),>temp2=%edx +mov 2032(%rdi),%edx + +# qhasm: mem64[ap + 2032] = temp1 +# asm 1: mov <temp1=int64#2,2032(<ap=int64#1) +# asm 2: mov <temp1=%esi,2032(<ap=%rdi) +mov %esi,2032(%rdi) + +# qhasm: mem64[ap + 1016] = temp2 +# asm 1: mov <temp2=int64#3,1016(<ap=int64#1) +# asm 2: mov <temp2=%edx,1016(<ap=%rdi) +mov %edx,1016(%rdi) + +# qhasm: temp1 = mem64[ap + 1020] +# asm 1: mov 1020(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1020(<ap=%rdi),>temp1=%esi +mov 1020(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4080] +# asm 1: mov 4080(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4080(<ap=%rdi),>temp2=%edx +mov 4080(%rdi),%edx + +# qhasm: mem64[ap + 4080] = temp1 +# asm 1: mov <temp1=int64#2,4080(<ap=int64#1) +# asm 2: mov <temp1=%esi,4080(<ap=%rdi) +mov %esi,4080(%rdi) + +# qhasm: mem64[ap + 1020] = temp2 +# asm 1: mov <temp2=int64#3,1020(<ap=int64#1) +# asm 2: mov <temp2=%edx,1020(<ap=%rdi) +mov %edx,1020(%rdi) + +# qhasm: temp1 = mem64[ap + 1028] +# asm 1: mov 1028(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1028(<ap=%rdi),>temp1=%esi +mov 1028(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2056] +# asm 1: mov 2056(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2056(<ap=%rdi),>temp2=%edx +mov 2056(%rdi),%edx + +# qhasm: mem64[ap + 2056] = temp1 +# asm 1: mov <temp1=int64#2,2056(<ap=int64#1) +# asm 2: mov <temp1=%esi,2056(<ap=%rdi) +mov %esi,2056(%rdi) + +# qhasm: mem64[ap + 1028] = temp2 +# asm 1: mov <temp2=int64#3,1028(<ap=int64#1) +# asm 2: mov <temp2=%edx,1028(<ap=%rdi) +mov %edx,1028(%rdi) + +# qhasm: temp1 = mem64[ap + 1036] +# asm 1: mov 1036(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1036(<ap=%rdi),>temp1=%esi +mov 1036(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3080] +# asm 1: mov 3080(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3080(<ap=%rdi),>temp2=%edx +mov 3080(%rdi),%edx + +# qhasm: mem64[ap + 3080] = temp1 +# asm 1: mov <temp1=int64#2,3080(<ap=int64#1) +# asm 2: mov <temp1=%esi,3080(<ap=%rdi) +mov %esi,3080(%rdi) + +# qhasm: mem64[ap + 1036] = temp2 +# asm 1: mov <temp2=int64#3,1036(<ap=int64#1) +# asm 2: mov <temp2=%edx,1036(<ap=%rdi) +mov %edx,1036(%rdi) + +# qhasm: temp1 = mem64[ap + 1044] +# asm 1: mov 1044(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1044(<ap=%rdi),>temp1=%esi +mov 1044(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2568] +# asm 1: mov 2568(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2568(<ap=%rdi),>temp2=%edx +mov 2568(%rdi),%edx + +# qhasm: mem64[ap + 2568] = temp1 +# asm 1: mov <temp1=int64#2,2568(<ap=int64#1) +# asm 2: mov <temp1=%esi,2568(<ap=%rdi) +mov %esi,2568(%rdi) + +# qhasm: mem64[ap + 1044] = temp2 +# asm 1: mov <temp2=int64#3,1044(<ap=int64#1) +# asm 2: mov <temp2=%edx,1044(<ap=%rdi) +mov %edx,1044(%rdi) + +# qhasm: temp1 = mem64[ap + 1048] +# asm 1: mov 1048(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1048(<ap=%rdi),>temp1=%esi +mov 1048(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1544] +# asm 1: mov 1544(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1544(<ap=%rdi),>temp2=%edx +mov 1544(%rdi),%edx + +# qhasm: mem64[ap + 1544] = temp1 +# asm 1: mov <temp1=int64#2,1544(<ap=int64#1) +# asm 2: mov <temp1=%esi,1544(<ap=%rdi) +mov %esi,1544(%rdi) + +# qhasm: mem64[ap + 1048] = temp2 +# asm 1: mov <temp2=int64#3,1048(<ap=int64#1) +# asm 2: mov <temp2=%edx,1048(<ap=%rdi) +mov %edx,1048(%rdi) + +# qhasm: temp1 = mem64[ap + 1052] +# asm 1: mov 1052(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1052(<ap=%rdi),>temp1=%esi +mov 1052(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3592] +# asm 1: mov 3592(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3592(<ap=%rdi),>temp2=%edx +mov 3592(%rdi),%edx + +# qhasm: mem64[ap + 3592] = temp1 +# asm 1: mov <temp1=int64#2,3592(<ap=int64#1) +# asm 2: mov <temp1=%esi,3592(<ap=%rdi) +mov %esi,3592(%rdi) + +# qhasm: mem64[ap + 1052] = temp2 +# asm 1: mov <temp2=int64#3,1052(<ap=int64#1) +# asm 2: mov <temp2=%edx,1052(<ap=%rdi) +mov %edx,1052(%rdi) + +# qhasm: temp1 = mem64[ap + 1060] +# asm 1: mov 1060(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1060(<ap=%rdi),>temp1=%esi +mov 1060(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2312] +# asm 1: mov 2312(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2312(<ap=%rdi),>temp2=%edx +mov 2312(%rdi),%edx + +# qhasm: mem64[ap + 2312] = temp1 +# asm 1: mov <temp1=int64#2,2312(<ap=int64#1) +# asm 2: mov <temp1=%esi,2312(<ap=%rdi) +mov %esi,2312(%rdi) + +# qhasm: mem64[ap + 1060] = temp2 +# asm 1: mov <temp2=int64#3,1060(<ap=int64#1) +# asm 2: mov <temp2=%edx,1060(<ap=%rdi) +mov %edx,1060(%rdi) + +# qhasm: temp1 = mem64[ap + 1064] +# asm 1: mov 1064(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1064(<ap=%rdi),>temp1=%esi +mov 1064(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1288] +# asm 1: mov 1288(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1288(<ap=%rdi),>temp2=%edx +mov 1288(%rdi),%edx + +# qhasm: mem64[ap + 1288] = temp1 +# asm 1: mov <temp1=int64#2,1288(<ap=int64#1) +# asm 2: mov <temp1=%esi,1288(<ap=%rdi) +mov %esi,1288(%rdi) + +# qhasm: mem64[ap + 1064] = temp2 +# asm 1: mov <temp2=int64#3,1064(<ap=int64#1) +# asm 2: mov <temp2=%edx,1064(<ap=%rdi) +mov %edx,1064(%rdi) + +# qhasm: temp1 = mem64[ap + 1068] +# asm 1: mov 1068(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1068(<ap=%rdi),>temp1=%esi +mov 1068(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3336] +# asm 1: mov 3336(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3336(<ap=%rdi),>temp2=%edx +mov 3336(%rdi),%edx + +# qhasm: mem64[ap + 3336] = temp1 +# asm 1: mov <temp1=int64#2,3336(<ap=int64#1) +# asm 2: mov <temp1=%esi,3336(<ap=%rdi) +mov %esi,3336(%rdi) + +# qhasm: mem64[ap + 1068] = temp2 +# asm 1: mov <temp2=int64#3,1068(<ap=int64#1) +# asm 2: mov <temp2=%edx,1068(<ap=%rdi) +mov %edx,1068(%rdi) + +# qhasm: temp1 = mem64[ap + 1076] +# asm 1: mov 1076(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1076(<ap=%rdi),>temp1=%esi +mov 1076(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2824] +# asm 1: mov 2824(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2824(<ap=%rdi),>temp2=%edx +mov 2824(%rdi),%edx + +# qhasm: mem64[ap + 2824] = temp1 +# asm 1: mov <temp1=int64#2,2824(<ap=int64#1) +# asm 2: mov <temp1=%esi,2824(<ap=%rdi) +mov %esi,2824(%rdi) + +# qhasm: mem64[ap + 1076] = temp2 +# asm 1: mov <temp2=int64#3,1076(<ap=int64#1) +# asm 2: mov <temp2=%edx,1076(<ap=%rdi) +mov %edx,1076(%rdi) + +# qhasm: temp1 = mem64[ap + 1080] +# asm 1: mov 1080(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1080(<ap=%rdi),>temp1=%esi +mov 1080(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1800] +# asm 1: mov 1800(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1800(<ap=%rdi),>temp2=%edx +mov 1800(%rdi),%edx + +# qhasm: mem64[ap + 1800] = temp1 +# asm 1: mov <temp1=int64#2,1800(<ap=int64#1) +# asm 2: mov <temp1=%esi,1800(<ap=%rdi) +mov %esi,1800(%rdi) + +# qhasm: mem64[ap + 1080] = temp2 +# asm 1: mov <temp2=int64#3,1080(<ap=int64#1) +# asm 2: mov <temp2=%edx,1080(<ap=%rdi) +mov %edx,1080(%rdi) + +# qhasm: temp1 = mem64[ap + 1084] +# asm 1: mov 1084(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1084(<ap=%rdi),>temp1=%esi +mov 1084(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3848] +# asm 1: mov 3848(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3848(<ap=%rdi),>temp2=%edx +mov 3848(%rdi),%edx + +# qhasm: mem64[ap + 3848] = temp1 +# asm 1: mov <temp1=int64#2,3848(<ap=int64#1) +# asm 2: mov <temp1=%esi,3848(<ap=%rdi) +mov %esi,3848(%rdi) + +# qhasm: mem64[ap + 1084] = temp2 +# asm 1: mov <temp2=int64#3,1084(<ap=int64#1) +# asm 2: mov <temp2=%edx,1084(<ap=%rdi) +mov %edx,1084(%rdi) + +# qhasm: temp1 = mem64[ap + 1092] +# asm 1: mov 1092(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1092(<ap=%rdi),>temp1=%esi +mov 1092(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2184] +# asm 1: mov 2184(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2184(<ap=%rdi),>temp2=%edx +mov 2184(%rdi),%edx + +# qhasm: mem64[ap + 2184] = temp1 +# asm 1: mov <temp1=int64#2,2184(<ap=int64#1) +# asm 2: mov <temp1=%esi,2184(<ap=%rdi) +mov %esi,2184(%rdi) + +# qhasm: mem64[ap + 1092] = temp2 +# asm 1: mov <temp2=int64#3,1092(<ap=int64#1) +# asm 2: mov <temp2=%edx,1092(<ap=%rdi) +mov %edx,1092(%rdi) + +# qhasm: temp1 = mem64[ap + 1096] +# asm 1: mov 1096(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1096(<ap=%rdi),>temp1=%esi +mov 1096(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1160] +# asm 1: mov 1160(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1160(<ap=%rdi),>temp2=%edx +mov 1160(%rdi),%edx + +# qhasm: mem64[ap + 1160] = temp1 +# asm 1: mov <temp1=int64#2,1160(<ap=int64#1) +# asm 2: mov <temp1=%esi,1160(<ap=%rdi) +mov %esi,1160(%rdi) + +# qhasm: mem64[ap + 1096] = temp2 +# asm 1: mov <temp2=int64#3,1096(<ap=int64#1) +# asm 2: mov <temp2=%edx,1096(<ap=%rdi) +mov %edx,1096(%rdi) + +# qhasm: temp1 = mem64[ap + 1100] +# asm 1: mov 1100(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1100(<ap=%rdi),>temp1=%esi +mov 1100(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3208] +# asm 1: mov 3208(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3208(<ap=%rdi),>temp2=%edx +mov 3208(%rdi),%edx + +# qhasm: mem64[ap + 3208] = temp1 +# asm 1: mov <temp1=int64#2,3208(<ap=int64#1) +# asm 2: mov <temp1=%esi,3208(<ap=%rdi) +mov %esi,3208(%rdi) + +# qhasm: mem64[ap + 1100] = temp2 +# asm 1: mov <temp2=int64#3,1100(<ap=int64#1) +# asm 2: mov <temp2=%edx,1100(<ap=%rdi) +mov %edx,1100(%rdi) + +# qhasm: temp1 = mem64[ap + 1108] +# asm 1: mov 1108(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1108(<ap=%rdi),>temp1=%esi +mov 1108(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2696] +# asm 1: mov 2696(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2696(<ap=%rdi),>temp2=%edx +mov 2696(%rdi),%edx + +# qhasm: mem64[ap + 2696] = temp1 +# asm 1: mov <temp1=int64#2,2696(<ap=int64#1) +# asm 2: mov <temp1=%esi,2696(<ap=%rdi) +mov %esi,2696(%rdi) + +# qhasm: mem64[ap + 1108] = temp2 +# asm 1: mov <temp2=int64#3,1108(<ap=int64#1) +# asm 2: mov <temp2=%edx,1108(<ap=%rdi) +mov %edx,1108(%rdi) + +# qhasm: temp1 = mem64[ap + 1112] +# asm 1: mov 1112(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1112(<ap=%rdi),>temp1=%esi +mov 1112(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1672] +# asm 1: mov 1672(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1672(<ap=%rdi),>temp2=%edx +mov 1672(%rdi),%edx + +# qhasm: mem64[ap + 1672] = temp1 +# asm 1: mov <temp1=int64#2,1672(<ap=int64#1) +# asm 2: mov <temp1=%esi,1672(<ap=%rdi) +mov %esi,1672(%rdi) + +# qhasm: mem64[ap + 1112] = temp2 +# asm 1: mov <temp2=int64#3,1112(<ap=int64#1) +# asm 2: mov <temp2=%edx,1112(<ap=%rdi) +mov %edx,1112(%rdi) + +# qhasm: temp1 = mem64[ap + 1116] +# asm 1: mov 1116(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1116(<ap=%rdi),>temp1=%esi +mov 1116(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3720] +# asm 1: mov 3720(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3720(<ap=%rdi),>temp2=%edx +mov 3720(%rdi),%edx + +# qhasm: mem64[ap + 3720] = temp1 +# asm 1: mov <temp1=int64#2,3720(<ap=int64#1) +# asm 2: mov <temp1=%esi,3720(<ap=%rdi) +mov %esi,3720(%rdi) + +# qhasm: mem64[ap + 1116] = temp2 +# asm 1: mov <temp2=int64#3,1116(<ap=int64#1) +# asm 2: mov <temp2=%edx,1116(<ap=%rdi) +mov %edx,1116(%rdi) + +# qhasm: temp1 = mem64[ap + 1124] +# asm 1: mov 1124(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1124(<ap=%rdi),>temp1=%esi +mov 1124(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2440] +# asm 1: mov 2440(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2440(<ap=%rdi),>temp2=%edx +mov 2440(%rdi),%edx + +# qhasm: mem64[ap + 2440] = temp1 +# asm 1: mov <temp1=int64#2,2440(<ap=int64#1) +# asm 2: mov <temp1=%esi,2440(<ap=%rdi) +mov %esi,2440(%rdi) + +# qhasm: mem64[ap + 1124] = temp2 +# asm 1: mov <temp2=int64#3,1124(<ap=int64#1) +# asm 2: mov <temp2=%edx,1124(<ap=%rdi) +mov %edx,1124(%rdi) + +# qhasm: temp1 = mem64[ap + 1128] +# asm 1: mov 1128(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1128(<ap=%rdi),>temp1=%esi +mov 1128(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1416] +# asm 1: mov 1416(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1416(<ap=%rdi),>temp2=%edx +mov 1416(%rdi),%edx + +# qhasm: mem64[ap + 1416] = temp1 +# asm 1: mov <temp1=int64#2,1416(<ap=int64#1) +# asm 2: mov <temp1=%esi,1416(<ap=%rdi) +mov %esi,1416(%rdi) + +# qhasm: mem64[ap + 1128] = temp2 +# asm 1: mov <temp2=int64#3,1128(<ap=int64#1) +# asm 2: mov <temp2=%edx,1128(<ap=%rdi) +mov %edx,1128(%rdi) + +# qhasm: temp1 = mem64[ap + 1132] +# asm 1: mov 1132(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1132(<ap=%rdi),>temp1=%esi +mov 1132(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3464] +# asm 1: mov 3464(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3464(<ap=%rdi),>temp2=%edx +mov 3464(%rdi),%edx + +# qhasm: mem64[ap + 3464] = temp1 +# asm 1: mov <temp1=int64#2,3464(<ap=int64#1) +# asm 2: mov <temp1=%esi,3464(<ap=%rdi) +mov %esi,3464(%rdi) + +# qhasm: mem64[ap + 1132] = temp2 +# asm 1: mov <temp2=int64#3,1132(<ap=int64#1) +# asm 2: mov <temp2=%edx,1132(<ap=%rdi) +mov %edx,1132(%rdi) + +# qhasm: temp1 = mem64[ap + 1140] +# asm 1: mov 1140(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1140(<ap=%rdi),>temp1=%esi +mov 1140(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2952] +# asm 1: mov 2952(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2952(<ap=%rdi),>temp2=%edx +mov 2952(%rdi),%edx + +# qhasm: mem64[ap + 2952] = temp1 +# asm 1: mov <temp1=int64#2,2952(<ap=int64#1) +# asm 2: mov <temp1=%esi,2952(<ap=%rdi) +mov %esi,2952(%rdi) + +# qhasm: mem64[ap + 1140] = temp2 +# asm 1: mov <temp2=int64#3,1140(<ap=int64#1) +# asm 2: mov <temp2=%edx,1140(<ap=%rdi) +mov %edx,1140(%rdi) + +# qhasm: temp1 = mem64[ap + 1144] +# asm 1: mov 1144(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1144(<ap=%rdi),>temp1=%esi +mov 1144(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1928] +# asm 1: mov 1928(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1928(<ap=%rdi),>temp2=%edx +mov 1928(%rdi),%edx + +# qhasm: mem64[ap + 1928] = temp1 +# asm 1: mov <temp1=int64#2,1928(<ap=int64#1) +# asm 2: mov <temp1=%esi,1928(<ap=%rdi) +mov %esi,1928(%rdi) + +# qhasm: mem64[ap + 1144] = temp2 +# asm 1: mov <temp2=int64#3,1144(<ap=int64#1) +# asm 2: mov <temp2=%edx,1144(<ap=%rdi) +mov %edx,1144(%rdi) + +# qhasm: temp1 = mem64[ap + 1148] +# asm 1: mov 1148(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1148(<ap=%rdi),>temp1=%esi +mov 1148(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3976] +# asm 1: mov 3976(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3976(<ap=%rdi),>temp2=%edx +mov 3976(%rdi),%edx + +# qhasm: mem64[ap + 3976] = temp1 +# asm 1: mov <temp1=int64#2,3976(<ap=int64#1) +# asm 2: mov <temp1=%esi,3976(<ap=%rdi) +mov %esi,3976(%rdi) + +# qhasm: mem64[ap + 1148] = temp2 +# asm 1: mov <temp2=int64#3,1148(<ap=int64#1) +# asm 2: mov <temp2=%edx,1148(<ap=%rdi) +mov %edx,1148(%rdi) + +# qhasm: temp1 = mem64[ap + 1156] +# asm 1: mov 1156(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1156(<ap=%rdi),>temp1=%esi +mov 1156(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2120] +# asm 1: mov 2120(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2120(<ap=%rdi),>temp2=%edx +mov 2120(%rdi),%edx + +# qhasm: mem64[ap + 2120] = temp1 +# asm 1: mov <temp1=int64#2,2120(<ap=int64#1) +# asm 2: mov <temp1=%esi,2120(<ap=%rdi) +mov %esi,2120(%rdi) + +# qhasm: mem64[ap + 1156] = temp2 +# asm 1: mov <temp2=int64#3,1156(<ap=int64#1) +# asm 2: mov <temp2=%edx,1156(<ap=%rdi) +mov %edx,1156(%rdi) + +# qhasm: temp1 = mem64[ap + 1164] +# asm 1: mov 1164(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1164(<ap=%rdi),>temp1=%esi +mov 1164(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3144] +# asm 1: mov 3144(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3144(<ap=%rdi),>temp2=%edx +mov 3144(%rdi),%edx + +# qhasm: mem64[ap + 3144] = temp1 +# asm 1: mov <temp1=int64#2,3144(<ap=int64#1) +# asm 2: mov <temp1=%esi,3144(<ap=%rdi) +mov %esi,3144(%rdi) + +# qhasm: mem64[ap + 1164] = temp2 +# asm 1: mov <temp2=int64#3,1164(<ap=int64#1) +# asm 2: mov <temp2=%edx,1164(<ap=%rdi) +mov %edx,1164(%rdi) + +# qhasm: temp1 = mem64[ap + 1172] +# asm 1: mov 1172(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1172(<ap=%rdi),>temp1=%esi +mov 1172(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2632] +# asm 1: mov 2632(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2632(<ap=%rdi),>temp2=%edx +mov 2632(%rdi),%edx + +# qhasm: mem64[ap + 2632] = temp1 +# asm 1: mov <temp1=int64#2,2632(<ap=int64#1) +# asm 2: mov <temp1=%esi,2632(<ap=%rdi) +mov %esi,2632(%rdi) + +# qhasm: mem64[ap + 1172] = temp2 +# asm 1: mov <temp2=int64#3,1172(<ap=int64#1) +# asm 2: mov <temp2=%edx,1172(<ap=%rdi) +mov %edx,1172(%rdi) + +# qhasm: temp1 = mem64[ap + 1176] +# asm 1: mov 1176(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1176(<ap=%rdi),>temp1=%esi +mov 1176(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1608] +# asm 1: mov 1608(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1608(<ap=%rdi),>temp2=%edx +mov 1608(%rdi),%edx + +# qhasm: mem64[ap + 1608] = temp1 +# asm 1: mov <temp1=int64#2,1608(<ap=int64#1) +# asm 2: mov <temp1=%esi,1608(<ap=%rdi) +mov %esi,1608(%rdi) + +# qhasm: mem64[ap + 1176] = temp2 +# asm 1: mov <temp2=int64#3,1176(<ap=int64#1) +# asm 2: mov <temp2=%edx,1176(<ap=%rdi) +mov %edx,1176(%rdi) + +# qhasm: temp1 = mem64[ap + 1180] +# asm 1: mov 1180(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1180(<ap=%rdi),>temp1=%esi +mov 1180(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3656] +# asm 1: mov 3656(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3656(<ap=%rdi),>temp2=%edx +mov 3656(%rdi),%edx + +# qhasm: mem64[ap + 3656] = temp1 +# asm 1: mov <temp1=int64#2,3656(<ap=int64#1) +# asm 2: mov <temp1=%esi,3656(<ap=%rdi) +mov %esi,3656(%rdi) + +# qhasm: mem64[ap + 1180] = temp2 +# asm 1: mov <temp2=int64#3,1180(<ap=int64#1) +# asm 2: mov <temp2=%edx,1180(<ap=%rdi) +mov %edx,1180(%rdi) + +# qhasm: temp1 = mem64[ap + 1188] +# asm 1: mov 1188(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1188(<ap=%rdi),>temp1=%esi +mov 1188(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2376] +# asm 1: mov 2376(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2376(<ap=%rdi),>temp2=%edx +mov 2376(%rdi),%edx + +# qhasm: mem64[ap + 2376] = temp1 +# asm 1: mov <temp1=int64#2,2376(<ap=int64#1) +# asm 2: mov <temp1=%esi,2376(<ap=%rdi) +mov %esi,2376(%rdi) + +# qhasm: mem64[ap + 1188] = temp2 +# asm 1: mov <temp2=int64#3,1188(<ap=int64#1) +# asm 2: mov <temp2=%edx,1188(<ap=%rdi) +mov %edx,1188(%rdi) + +# qhasm: temp1 = mem64[ap + 1192] +# asm 1: mov 1192(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1192(<ap=%rdi),>temp1=%esi +mov 1192(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1352] +# asm 1: mov 1352(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1352(<ap=%rdi),>temp2=%edx +mov 1352(%rdi),%edx + +# qhasm: mem64[ap + 1352] = temp1 +# asm 1: mov <temp1=int64#2,1352(<ap=int64#1) +# asm 2: mov <temp1=%esi,1352(<ap=%rdi) +mov %esi,1352(%rdi) + +# qhasm: mem64[ap + 1192] = temp2 +# asm 1: mov <temp2=int64#3,1192(<ap=int64#1) +# asm 2: mov <temp2=%edx,1192(<ap=%rdi) +mov %edx,1192(%rdi) + +# qhasm: temp1 = mem64[ap + 1196] +# asm 1: mov 1196(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1196(<ap=%rdi),>temp1=%esi +mov 1196(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3400] +# asm 1: mov 3400(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3400(<ap=%rdi),>temp2=%edx +mov 3400(%rdi),%edx + +# qhasm: mem64[ap + 3400] = temp1 +# asm 1: mov <temp1=int64#2,3400(<ap=int64#1) +# asm 2: mov <temp1=%esi,3400(<ap=%rdi) +mov %esi,3400(%rdi) + +# qhasm: mem64[ap + 1196] = temp2 +# asm 1: mov <temp2=int64#3,1196(<ap=int64#1) +# asm 2: mov <temp2=%edx,1196(<ap=%rdi) +mov %edx,1196(%rdi) + +# qhasm: temp1 = mem64[ap + 1204] +# asm 1: mov 1204(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1204(<ap=%rdi),>temp1=%esi +mov 1204(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2888] +# asm 1: mov 2888(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2888(<ap=%rdi),>temp2=%edx +mov 2888(%rdi),%edx + +# qhasm: mem64[ap + 2888] = temp1 +# asm 1: mov <temp1=int64#2,2888(<ap=int64#1) +# asm 2: mov <temp1=%esi,2888(<ap=%rdi) +mov %esi,2888(%rdi) + +# qhasm: mem64[ap + 1204] = temp2 +# asm 1: mov <temp2=int64#3,1204(<ap=int64#1) +# asm 2: mov <temp2=%edx,1204(<ap=%rdi) +mov %edx,1204(%rdi) + +# qhasm: temp1 = mem64[ap + 1208] +# asm 1: mov 1208(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1208(<ap=%rdi),>temp1=%esi +mov 1208(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1864] +# asm 1: mov 1864(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1864(<ap=%rdi),>temp2=%edx +mov 1864(%rdi),%edx + +# qhasm: mem64[ap + 1864] = temp1 +# asm 1: mov <temp1=int64#2,1864(<ap=int64#1) +# asm 2: mov <temp1=%esi,1864(<ap=%rdi) +mov %esi,1864(%rdi) + +# qhasm: mem64[ap + 1208] = temp2 +# asm 1: mov <temp2=int64#3,1208(<ap=int64#1) +# asm 2: mov <temp2=%edx,1208(<ap=%rdi) +mov %edx,1208(%rdi) + +# qhasm: temp1 = mem64[ap + 1212] +# asm 1: mov 1212(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1212(<ap=%rdi),>temp1=%esi +mov 1212(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3912] +# asm 1: mov 3912(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3912(<ap=%rdi),>temp2=%edx +mov 3912(%rdi),%edx + +# qhasm: mem64[ap + 3912] = temp1 +# asm 1: mov <temp1=int64#2,3912(<ap=int64#1) +# asm 2: mov <temp1=%esi,3912(<ap=%rdi) +mov %esi,3912(%rdi) + +# qhasm: mem64[ap + 1212] = temp2 +# asm 1: mov <temp2=int64#3,1212(<ap=int64#1) +# asm 2: mov <temp2=%edx,1212(<ap=%rdi) +mov %edx,1212(%rdi) + +# qhasm: temp1 = mem64[ap + 1220] +# asm 1: mov 1220(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1220(<ap=%rdi),>temp1=%esi +mov 1220(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2248] +# asm 1: mov 2248(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2248(<ap=%rdi),>temp2=%edx +mov 2248(%rdi),%edx + +# qhasm: mem64[ap + 2248] = temp1 +# asm 1: mov <temp1=int64#2,2248(<ap=int64#1) +# asm 2: mov <temp1=%esi,2248(<ap=%rdi) +mov %esi,2248(%rdi) + +# qhasm: mem64[ap + 1220] = temp2 +# asm 1: mov <temp2=int64#3,1220(<ap=int64#1) +# asm 2: mov <temp2=%edx,1220(<ap=%rdi) +mov %edx,1220(%rdi) + +# qhasm: temp1 = mem64[ap + 1228] +# asm 1: mov 1228(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1228(<ap=%rdi),>temp1=%esi +mov 1228(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3272] +# asm 1: mov 3272(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3272(<ap=%rdi),>temp2=%edx +mov 3272(%rdi),%edx + +# qhasm: mem64[ap + 3272] = temp1 +# asm 1: mov <temp1=int64#2,3272(<ap=int64#1) +# asm 2: mov <temp1=%esi,3272(<ap=%rdi) +mov %esi,3272(%rdi) + +# qhasm: mem64[ap + 1228] = temp2 +# asm 1: mov <temp2=int64#3,1228(<ap=int64#1) +# asm 2: mov <temp2=%edx,1228(<ap=%rdi) +mov %edx,1228(%rdi) + +# qhasm: temp1 = mem64[ap + 1236] +# asm 1: mov 1236(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1236(<ap=%rdi),>temp1=%esi +mov 1236(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2760] +# asm 1: mov 2760(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2760(<ap=%rdi),>temp2=%edx +mov 2760(%rdi),%edx + +# qhasm: mem64[ap + 2760] = temp1 +# asm 1: mov <temp1=int64#2,2760(<ap=int64#1) +# asm 2: mov <temp1=%esi,2760(<ap=%rdi) +mov %esi,2760(%rdi) + +# qhasm: mem64[ap + 1236] = temp2 +# asm 1: mov <temp2=int64#3,1236(<ap=int64#1) +# asm 2: mov <temp2=%edx,1236(<ap=%rdi) +mov %edx,1236(%rdi) + +# qhasm: temp1 = mem64[ap + 1240] +# asm 1: mov 1240(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1240(<ap=%rdi),>temp1=%esi +mov 1240(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1736] +# asm 1: mov 1736(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1736(<ap=%rdi),>temp2=%edx +mov 1736(%rdi),%edx + +# qhasm: mem64[ap + 1736] = temp1 +# asm 1: mov <temp1=int64#2,1736(<ap=int64#1) +# asm 2: mov <temp1=%esi,1736(<ap=%rdi) +mov %esi,1736(%rdi) + +# qhasm: mem64[ap + 1240] = temp2 +# asm 1: mov <temp2=int64#3,1240(<ap=int64#1) +# asm 2: mov <temp2=%edx,1240(<ap=%rdi) +mov %edx,1240(%rdi) + +# qhasm: temp1 = mem64[ap + 1244] +# asm 1: mov 1244(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1244(<ap=%rdi),>temp1=%esi +mov 1244(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3784] +# asm 1: mov 3784(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3784(<ap=%rdi),>temp2=%edx +mov 3784(%rdi),%edx + +# qhasm: mem64[ap + 3784] = temp1 +# asm 1: mov <temp1=int64#2,3784(<ap=int64#1) +# asm 2: mov <temp1=%esi,3784(<ap=%rdi) +mov %esi,3784(%rdi) + +# qhasm: mem64[ap + 1244] = temp2 +# asm 1: mov <temp2=int64#3,1244(<ap=int64#1) +# asm 2: mov <temp2=%edx,1244(<ap=%rdi) +mov %edx,1244(%rdi) + +# qhasm: temp1 = mem64[ap + 1252] +# asm 1: mov 1252(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1252(<ap=%rdi),>temp1=%esi +mov 1252(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2504] +# asm 1: mov 2504(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2504(<ap=%rdi),>temp2=%edx +mov 2504(%rdi),%edx + +# qhasm: mem64[ap + 2504] = temp1 +# asm 1: mov <temp1=int64#2,2504(<ap=int64#1) +# asm 2: mov <temp1=%esi,2504(<ap=%rdi) +mov %esi,2504(%rdi) + +# qhasm: mem64[ap + 1252] = temp2 +# asm 1: mov <temp2=int64#3,1252(<ap=int64#1) +# asm 2: mov <temp2=%edx,1252(<ap=%rdi) +mov %edx,1252(%rdi) + +# qhasm: temp1 = mem64[ap + 1256] +# asm 1: mov 1256(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1256(<ap=%rdi),>temp1=%esi +mov 1256(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1480] +# asm 1: mov 1480(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1480(<ap=%rdi),>temp2=%edx +mov 1480(%rdi),%edx + +# qhasm: mem64[ap + 1480] = temp1 +# asm 1: mov <temp1=int64#2,1480(<ap=int64#1) +# asm 2: mov <temp1=%esi,1480(<ap=%rdi) +mov %esi,1480(%rdi) + +# qhasm: mem64[ap + 1256] = temp2 +# asm 1: mov <temp2=int64#3,1256(<ap=int64#1) +# asm 2: mov <temp2=%edx,1256(<ap=%rdi) +mov %edx,1256(%rdi) + +# qhasm: temp1 = mem64[ap + 1260] +# asm 1: mov 1260(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1260(<ap=%rdi),>temp1=%esi +mov 1260(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3528] +# asm 1: mov 3528(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3528(<ap=%rdi),>temp2=%edx +mov 3528(%rdi),%edx + +# qhasm: mem64[ap + 3528] = temp1 +# asm 1: mov <temp1=int64#2,3528(<ap=int64#1) +# asm 2: mov <temp1=%esi,3528(<ap=%rdi) +mov %esi,3528(%rdi) + +# qhasm: mem64[ap + 1260] = temp2 +# asm 1: mov <temp2=int64#3,1260(<ap=int64#1) +# asm 2: mov <temp2=%edx,1260(<ap=%rdi) +mov %edx,1260(%rdi) + +# qhasm: temp1 = mem64[ap + 1268] +# asm 1: mov 1268(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1268(<ap=%rdi),>temp1=%esi +mov 1268(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3016] +# asm 1: mov 3016(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3016(<ap=%rdi),>temp2=%edx +mov 3016(%rdi),%edx + +# qhasm: mem64[ap + 3016] = temp1 +# asm 1: mov <temp1=int64#2,3016(<ap=int64#1) +# asm 2: mov <temp1=%esi,3016(<ap=%rdi) +mov %esi,3016(%rdi) + +# qhasm: mem64[ap + 1268] = temp2 +# asm 1: mov <temp2=int64#3,1268(<ap=int64#1) +# asm 2: mov <temp2=%edx,1268(<ap=%rdi) +mov %edx,1268(%rdi) + +# qhasm: temp1 = mem64[ap + 1272] +# asm 1: mov 1272(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1272(<ap=%rdi),>temp1=%esi +mov 1272(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1992] +# asm 1: mov 1992(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1992(<ap=%rdi),>temp2=%edx +mov 1992(%rdi),%edx + +# qhasm: mem64[ap + 1992] = temp1 +# asm 1: mov <temp1=int64#2,1992(<ap=int64#1) +# asm 2: mov <temp1=%esi,1992(<ap=%rdi) +mov %esi,1992(%rdi) + +# qhasm: mem64[ap + 1272] = temp2 +# asm 1: mov <temp2=int64#3,1272(<ap=int64#1) +# asm 2: mov <temp2=%edx,1272(<ap=%rdi) +mov %edx,1272(%rdi) + +# qhasm: temp1 = mem64[ap + 1276] +# asm 1: mov 1276(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1276(<ap=%rdi),>temp1=%esi +mov 1276(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4040] +# asm 1: mov 4040(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4040(<ap=%rdi),>temp2=%edx +mov 4040(%rdi),%edx + +# qhasm: mem64[ap + 4040] = temp1 +# asm 1: mov <temp1=int64#2,4040(<ap=int64#1) +# asm 2: mov <temp1=%esi,4040(<ap=%rdi) +mov %esi,4040(%rdi) + +# qhasm: mem64[ap + 1276] = temp2 +# asm 1: mov <temp2=int64#3,1276(<ap=int64#1) +# asm 2: mov <temp2=%edx,1276(<ap=%rdi) +mov %edx,1276(%rdi) + +# qhasm: temp1 = mem64[ap + 1284] +# asm 1: mov 1284(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1284(<ap=%rdi),>temp1=%esi +mov 1284(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2088] +# asm 1: mov 2088(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2088(<ap=%rdi),>temp2=%edx +mov 2088(%rdi),%edx + +# qhasm: mem64[ap + 2088] = temp1 +# asm 1: mov <temp1=int64#2,2088(<ap=int64#1) +# asm 2: mov <temp1=%esi,2088(<ap=%rdi) +mov %esi,2088(%rdi) + +# qhasm: mem64[ap + 1284] = temp2 +# asm 1: mov <temp2=int64#3,1284(<ap=int64#1) +# asm 2: mov <temp2=%edx,1284(<ap=%rdi) +mov %edx,1284(%rdi) + +# qhasm: temp1 = mem64[ap + 1292] +# asm 1: mov 1292(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1292(<ap=%rdi),>temp1=%esi +mov 1292(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3112] +# asm 1: mov 3112(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3112(<ap=%rdi),>temp2=%edx +mov 3112(%rdi),%edx + +# qhasm: mem64[ap + 3112] = temp1 +# asm 1: mov <temp1=int64#2,3112(<ap=int64#1) +# asm 2: mov <temp1=%esi,3112(<ap=%rdi) +mov %esi,3112(%rdi) + +# qhasm: mem64[ap + 1292] = temp2 +# asm 1: mov <temp2=int64#3,1292(<ap=int64#1) +# asm 2: mov <temp2=%edx,1292(<ap=%rdi) +mov %edx,1292(%rdi) + +# qhasm: temp1 = mem64[ap + 1300] +# asm 1: mov 1300(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1300(<ap=%rdi),>temp1=%esi +mov 1300(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2600] +# asm 1: mov 2600(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2600(<ap=%rdi),>temp2=%edx +mov 2600(%rdi),%edx + +# qhasm: mem64[ap + 2600] = temp1 +# asm 1: mov <temp1=int64#2,2600(<ap=int64#1) +# asm 2: mov <temp1=%esi,2600(<ap=%rdi) +mov %esi,2600(%rdi) + +# qhasm: mem64[ap + 1300] = temp2 +# asm 1: mov <temp2=int64#3,1300(<ap=int64#1) +# asm 2: mov <temp2=%edx,1300(<ap=%rdi) +mov %edx,1300(%rdi) + +# qhasm: temp1 = mem64[ap + 1304] +# asm 1: mov 1304(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1304(<ap=%rdi),>temp1=%esi +mov 1304(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1576] +# asm 1: mov 1576(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1576(<ap=%rdi),>temp2=%edx +mov 1576(%rdi),%edx + +# qhasm: mem64[ap + 1576] = temp1 +# asm 1: mov <temp1=int64#2,1576(<ap=int64#1) +# asm 2: mov <temp1=%esi,1576(<ap=%rdi) +mov %esi,1576(%rdi) + +# qhasm: mem64[ap + 1304] = temp2 +# asm 1: mov <temp2=int64#3,1304(<ap=int64#1) +# asm 2: mov <temp2=%edx,1304(<ap=%rdi) +mov %edx,1304(%rdi) + +# qhasm: temp1 = mem64[ap + 1308] +# asm 1: mov 1308(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1308(<ap=%rdi),>temp1=%esi +mov 1308(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3624] +# asm 1: mov 3624(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3624(<ap=%rdi),>temp2=%edx +mov 3624(%rdi),%edx + +# qhasm: mem64[ap + 3624] = temp1 +# asm 1: mov <temp1=int64#2,3624(<ap=int64#1) +# asm 2: mov <temp1=%esi,3624(<ap=%rdi) +mov %esi,3624(%rdi) + +# qhasm: mem64[ap + 1308] = temp2 +# asm 1: mov <temp2=int64#3,1308(<ap=int64#1) +# asm 2: mov <temp2=%edx,1308(<ap=%rdi) +mov %edx,1308(%rdi) + +# qhasm: temp1 = mem64[ap + 1316] +# asm 1: mov 1316(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1316(<ap=%rdi),>temp1=%esi +mov 1316(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2344] +# asm 1: mov 2344(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2344(<ap=%rdi),>temp2=%edx +mov 2344(%rdi),%edx + +# qhasm: mem64[ap + 2344] = temp1 +# asm 1: mov <temp1=int64#2,2344(<ap=int64#1) +# asm 2: mov <temp1=%esi,2344(<ap=%rdi) +mov %esi,2344(%rdi) + +# qhasm: mem64[ap + 1316] = temp2 +# asm 1: mov <temp2=int64#3,1316(<ap=int64#1) +# asm 2: mov <temp2=%edx,1316(<ap=%rdi) +mov %edx,1316(%rdi) + +# qhasm: temp1 = mem64[ap + 1324] +# asm 1: mov 1324(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1324(<ap=%rdi),>temp1=%esi +mov 1324(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3368] +# asm 1: mov 3368(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3368(<ap=%rdi),>temp2=%edx +mov 3368(%rdi),%edx + +# qhasm: mem64[ap + 3368] = temp1 +# asm 1: mov <temp1=int64#2,3368(<ap=int64#1) +# asm 2: mov <temp1=%esi,3368(<ap=%rdi) +mov %esi,3368(%rdi) + +# qhasm: mem64[ap + 1324] = temp2 +# asm 1: mov <temp2=int64#3,1324(<ap=int64#1) +# asm 2: mov <temp2=%edx,1324(<ap=%rdi) +mov %edx,1324(%rdi) + +# qhasm: temp1 = mem64[ap + 1332] +# asm 1: mov 1332(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1332(<ap=%rdi),>temp1=%esi +mov 1332(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2856] +# asm 1: mov 2856(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2856(<ap=%rdi),>temp2=%edx +mov 2856(%rdi),%edx + +# qhasm: mem64[ap + 2856] = temp1 +# asm 1: mov <temp1=int64#2,2856(<ap=int64#1) +# asm 2: mov <temp1=%esi,2856(<ap=%rdi) +mov %esi,2856(%rdi) + +# qhasm: mem64[ap + 1332] = temp2 +# asm 1: mov <temp2=int64#3,1332(<ap=int64#1) +# asm 2: mov <temp2=%edx,1332(<ap=%rdi) +mov %edx,1332(%rdi) + +# qhasm: temp1 = mem64[ap + 1336] +# asm 1: mov 1336(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1336(<ap=%rdi),>temp1=%esi +mov 1336(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1832] +# asm 1: mov 1832(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1832(<ap=%rdi),>temp2=%edx +mov 1832(%rdi),%edx + +# qhasm: mem64[ap + 1832] = temp1 +# asm 1: mov <temp1=int64#2,1832(<ap=int64#1) +# asm 2: mov <temp1=%esi,1832(<ap=%rdi) +mov %esi,1832(%rdi) + +# qhasm: mem64[ap + 1336] = temp2 +# asm 1: mov <temp2=int64#3,1336(<ap=int64#1) +# asm 2: mov <temp2=%edx,1336(<ap=%rdi) +mov %edx,1336(%rdi) + +# qhasm: temp1 = mem64[ap + 1340] +# asm 1: mov 1340(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1340(<ap=%rdi),>temp1=%esi +mov 1340(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3880] +# asm 1: mov 3880(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3880(<ap=%rdi),>temp2=%edx +mov 3880(%rdi),%edx + +# qhasm: mem64[ap + 3880] = temp1 +# asm 1: mov <temp1=int64#2,3880(<ap=int64#1) +# asm 2: mov <temp1=%esi,3880(<ap=%rdi) +mov %esi,3880(%rdi) + +# qhasm: mem64[ap + 1340] = temp2 +# asm 1: mov <temp2=int64#3,1340(<ap=int64#1) +# asm 2: mov <temp2=%edx,1340(<ap=%rdi) +mov %edx,1340(%rdi) + +# qhasm: temp1 = mem64[ap + 1348] +# asm 1: mov 1348(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1348(<ap=%rdi),>temp1=%esi +mov 1348(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2216] +# asm 1: mov 2216(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2216(<ap=%rdi),>temp2=%edx +mov 2216(%rdi),%edx + +# qhasm: mem64[ap + 2216] = temp1 +# asm 1: mov <temp1=int64#2,2216(<ap=int64#1) +# asm 2: mov <temp1=%esi,2216(<ap=%rdi) +mov %esi,2216(%rdi) + +# qhasm: mem64[ap + 1348] = temp2 +# asm 1: mov <temp2=int64#3,1348(<ap=int64#1) +# asm 2: mov <temp2=%edx,1348(<ap=%rdi) +mov %edx,1348(%rdi) + +# qhasm: temp1 = mem64[ap + 1356] +# asm 1: mov 1356(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1356(<ap=%rdi),>temp1=%esi +mov 1356(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3240] +# asm 1: mov 3240(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3240(<ap=%rdi),>temp2=%edx +mov 3240(%rdi),%edx + +# qhasm: mem64[ap + 3240] = temp1 +# asm 1: mov <temp1=int64#2,3240(<ap=int64#1) +# asm 2: mov <temp1=%esi,3240(<ap=%rdi) +mov %esi,3240(%rdi) + +# qhasm: mem64[ap + 1356] = temp2 +# asm 1: mov <temp2=int64#3,1356(<ap=int64#1) +# asm 2: mov <temp2=%edx,1356(<ap=%rdi) +mov %edx,1356(%rdi) + +# qhasm: temp1 = mem64[ap + 1364] +# asm 1: mov 1364(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1364(<ap=%rdi),>temp1=%esi +mov 1364(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2728] +# asm 1: mov 2728(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2728(<ap=%rdi),>temp2=%edx +mov 2728(%rdi),%edx + +# qhasm: mem64[ap + 2728] = temp1 +# asm 1: mov <temp1=int64#2,2728(<ap=int64#1) +# asm 2: mov <temp1=%esi,2728(<ap=%rdi) +mov %esi,2728(%rdi) + +# qhasm: mem64[ap + 1364] = temp2 +# asm 1: mov <temp2=int64#3,1364(<ap=int64#1) +# asm 2: mov <temp2=%edx,1364(<ap=%rdi) +mov %edx,1364(%rdi) + +# qhasm: temp1 = mem64[ap + 1368] +# asm 1: mov 1368(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1368(<ap=%rdi),>temp1=%esi +mov 1368(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1704] +# asm 1: mov 1704(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1704(<ap=%rdi),>temp2=%edx +mov 1704(%rdi),%edx + +# qhasm: mem64[ap + 1704] = temp1 +# asm 1: mov <temp1=int64#2,1704(<ap=int64#1) +# asm 2: mov <temp1=%esi,1704(<ap=%rdi) +mov %esi,1704(%rdi) + +# qhasm: mem64[ap + 1368] = temp2 +# asm 1: mov <temp2=int64#3,1368(<ap=int64#1) +# asm 2: mov <temp2=%edx,1368(<ap=%rdi) +mov %edx,1368(%rdi) + +# qhasm: temp1 = mem64[ap + 1372] +# asm 1: mov 1372(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1372(<ap=%rdi),>temp1=%esi +mov 1372(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3752] +# asm 1: mov 3752(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3752(<ap=%rdi),>temp2=%edx +mov 3752(%rdi),%edx + +# qhasm: mem64[ap + 3752] = temp1 +# asm 1: mov <temp1=int64#2,3752(<ap=int64#1) +# asm 2: mov <temp1=%esi,3752(<ap=%rdi) +mov %esi,3752(%rdi) + +# qhasm: mem64[ap + 1372] = temp2 +# asm 1: mov <temp2=int64#3,1372(<ap=int64#1) +# asm 2: mov <temp2=%edx,1372(<ap=%rdi) +mov %edx,1372(%rdi) + +# qhasm: temp1 = mem64[ap + 1380] +# asm 1: mov 1380(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1380(<ap=%rdi),>temp1=%esi +mov 1380(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2472] +# asm 1: mov 2472(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2472(<ap=%rdi),>temp2=%edx +mov 2472(%rdi),%edx + +# qhasm: mem64[ap + 2472] = temp1 +# asm 1: mov <temp1=int64#2,2472(<ap=int64#1) +# asm 2: mov <temp1=%esi,2472(<ap=%rdi) +mov %esi,2472(%rdi) + +# qhasm: mem64[ap + 1380] = temp2 +# asm 1: mov <temp2=int64#3,1380(<ap=int64#1) +# asm 2: mov <temp2=%edx,1380(<ap=%rdi) +mov %edx,1380(%rdi) + +# qhasm: temp1 = mem64[ap + 1384] +# asm 1: mov 1384(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1384(<ap=%rdi),>temp1=%esi +mov 1384(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1448] +# asm 1: mov 1448(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1448(<ap=%rdi),>temp2=%edx +mov 1448(%rdi),%edx + +# qhasm: mem64[ap + 1448] = temp1 +# asm 1: mov <temp1=int64#2,1448(<ap=int64#1) +# asm 2: mov <temp1=%esi,1448(<ap=%rdi) +mov %esi,1448(%rdi) + +# qhasm: mem64[ap + 1384] = temp2 +# asm 1: mov <temp2=int64#3,1384(<ap=int64#1) +# asm 2: mov <temp2=%edx,1384(<ap=%rdi) +mov %edx,1384(%rdi) + +# qhasm: temp1 = mem64[ap + 1388] +# asm 1: mov 1388(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1388(<ap=%rdi),>temp1=%esi +mov 1388(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3496] +# asm 1: mov 3496(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3496(<ap=%rdi),>temp2=%edx +mov 3496(%rdi),%edx + +# qhasm: mem64[ap + 3496] = temp1 +# asm 1: mov <temp1=int64#2,3496(<ap=int64#1) +# asm 2: mov <temp1=%esi,3496(<ap=%rdi) +mov %esi,3496(%rdi) + +# qhasm: mem64[ap + 1388] = temp2 +# asm 1: mov <temp2=int64#3,1388(<ap=int64#1) +# asm 2: mov <temp2=%edx,1388(<ap=%rdi) +mov %edx,1388(%rdi) + +# qhasm: temp1 = mem64[ap + 1396] +# asm 1: mov 1396(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1396(<ap=%rdi),>temp1=%esi +mov 1396(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2984] +# asm 1: mov 2984(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2984(<ap=%rdi),>temp2=%edx +mov 2984(%rdi),%edx + +# qhasm: mem64[ap + 2984] = temp1 +# asm 1: mov <temp1=int64#2,2984(<ap=int64#1) +# asm 2: mov <temp1=%esi,2984(<ap=%rdi) +mov %esi,2984(%rdi) + +# qhasm: mem64[ap + 1396] = temp2 +# asm 1: mov <temp2=int64#3,1396(<ap=int64#1) +# asm 2: mov <temp2=%edx,1396(<ap=%rdi) +mov %edx,1396(%rdi) + +# qhasm: temp1 = mem64[ap + 1400] +# asm 1: mov 1400(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1400(<ap=%rdi),>temp1=%esi +mov 1400(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1960] +# asm 1: mov 1960(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1960(<ap=%rdi),>temp2=%edx +mov 1960(%rdi),%edx + +# qhasm: mem64[ap + 1960] = temp1 +# asm 1: mov <temp1=int64#2,1960(<ap=int64#1) +# asm 2: mov <temp1=%esi,1960(<ap=%rdi) +mov %esi,1960(%rdi) + +# qhasm: mem64[ap + 1400] = temp2 +# asm 1: mov <temp2=int64#3,1400(<ap=int64#1) +# asm 2: mov <temp2=%edx,1400(<ap=%rdi) +mov %edx,1400(%rdi) + +# qhasm: temp1 = mem64[ap + 1404] +# asm 1: mov 1404(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1404(<ap=%rdi),>temp1=%esi +mov 1404(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4008] +# asm 1: mov 4008(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4008(<ap=%rdi),>temp2=%edx +mov 4008(%rdi),%edx + +# qhasm: mem64[ap + 4008] = temp1 +# asm 1: mov <temp1=int64#2,4008(<ap=int64#1) +# asm 2: mov <temp1=%esi,4008(<ap=%rdi) +mov %esi,4008(%rdi) + +# qhasm: mem64[ap + 1404] = temp2 +# asm 1: mov <temp2=int64#3,1404(<ap=int64#1) +# asm 2: mov <temp2=%edx,1404(<ap=%rdi) +mov %edx,1404(%rdi) + +# qhasm: temp1 = mem64[ap + 1412] +# asm 1: mov 1412(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1412(<ap=%rdi),>temp1=%esi +mov 1412(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2152] +# asm 1: mov 2152(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2152(<ap=%rdi),>temp2=%edx +mov 2152(%rdi),%edx + +# qhasm: mem64[ap + 2152] = temp1 +# asm 1: mov <temp1=int64#2,2152(<ap=int64#1) +# asm 2: mov <temp1=%esi,2152(<ap=%rdi) +mov %esi,2152(%rdi) + +# qhasm: mem64[ap + 1412] = temp2 +# asm 1: mov <temp2=int64#3,1412(<ap=int64#1) +# asm 2: mov <temp2=%edx,1412(<ap=%rdi) +mov %edx,1412(%rdi) + +# qhasm: temp1 = mem64[ap + 1420] +# asm 1: mov 1420(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1420(<ap=%rdi),>temp1=%esi +mov 1420(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3176] +# asm 1: mov 3176(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3176(<ap=%rdi),>temp2=%edx +mov 3176(%rdi),%edx + +# qhasm: mem64[ap + 3176] = temp1 +# asm 1: mov <temp1=int64#2,3176(<ap=int64#1) +# asm 2: mov <temp1=%esi,3176(<ap=%rdi) +mov %esi,3176(%rdi) + +# qhasm: mem64[ap + 1420] = temp2 +# asm 1: mov <temp2=int64#3,1420(<ap=int64#1) +# asm 2: mov <temp2=%edx,1420(<ap=%rdi) +mov %edx,1420(%rdi) + +# qhasm: temp1 = mem64[ap + 1428] +# asm 1: mov 1428(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1428(<ap=%rdi),>temp1=%esi +mov 1428(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2664] +# asm 1: mov 2664(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2664(<ap=%rdi),>temp2=%edx +mov 2664(%rdi),%edx + +# qhasm: mem64[ap + 2664] = temp1 +# asm 1: mov <temp1=int64#2,2664(<ap=int64#1) +# asm 2: mov <temp1=%esi,2664(<ap=%rdi) +mov %esi,2664(%rdi) + +# qhasm: mem64[ap + 1428] = temp2 +# asm 1: mov <temp2=int64#3,1428(<ap=int64#1) +# asm 2: mov <temp2=%edx,1428(<ap=%rdi) +mov %edx,1428(%rdi) + +# qhasm: temp1 = mem64[ap + 1432] +# asm 1: mov 1432(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1432(<ap=%rdi),>temp1=%esi +mov 1432(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1640] +# asm 1: mov 1640(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1640(<ap=%rdi),>temp2=%edx +mov 1640(%rdi),%edx + +# qhasm: mem64[ap + 1640] = temp1 +# asm 1: mov <temp1=int64#2,1640(<ap=int64#1) +# asm 2: mov <temp1=%esi,1640(<ap=%rdi) +mov %esi,1640(%rdi) + +# qhasm: mem64[ap + 1432] = temp2 +# asm 1: mov <temp2=int64#3,1432(<ap=int64#1) +# asm 2: mov <temp2=%edx,1432(<ap=%rdi) +mov %edx,1432(%rdi) + +# qhasm: temp1 = mem64[ap + 1436] +# asm 1: mov 1436(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1436(<ap=%rdi),>temp1=%esi +mov 1436(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3688] +# asm 1: mov 3688(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3688(<ap=%rdi),>temp2=%edx +mov 3688(%rdi),%edx + +# qhasm: mem64[ap + 3688] = temp1 +# asm 1: mov <temp1=int64#2,3688(<ap=int64#1) +# asm 2: mov <temp1=%esi,3688(<ap=%rdi) +mov %esi,3688(%rdi) + +# qhasm: mem64[ap + 1436] = temp2 +# asm 1: mov <temp2=int64#3,1436(<ap=int64#1) +# asm 2: mov <temp2=%edx,1436(<ap=%rdi) +mov %edx,1436(%rdi) + +# qhasm: temp1 = mem64[ap + 1444] +# asm 1: mov 1444(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1444(<ap=%rdi),>temp1=%esi +mov 1444(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2408] +# asm 1: mov 2408(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2408(<ap=%rdi),>temp2=%edx +mov 2408(%rdi),%edx + +# qhasm: mem64[ap + 2408] = temp1 +# asm 1: mov <temp1=int64#2,2408(<ap=int64#1) +# asm 2: mov <temp1=%esi,2408(<ap=%rdi) +mov %esi,2408(%rdi) + +# qhasm: mem64[ap + 1444] = temp2 +# asm 1: mov <temp2=int64#3,1444(<ap=int64#1) +# asm 2: mov <temp2=%edx,1444(<ap=%rdi) +mov %edx,1444(%rdi) + +# qhasm: temp1 = mem64[ap + 1452] +# asm 1: mov 1452(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1452(<ap=%rdi),>temp1=%esi +mov 1452(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3432] +# asm 1: mov 3432(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3432(<ap=%rdi),>temp2=%edx +mov 3432(%rdi),%edx + +# qhasm: mem64[ap + 3432] = temp1 +# asm 1: mov <temp1=int64#2,3432(<ap=int64#1) +# asm 2: mov <temp1=%esi,3432(<ap=%rdi) +mov %esi,3432(%rdi) + +# qhasm: mem64[ap + 1452] = temp2 +# asm 1: mov <temp2=int64#3,1452(<ap=int64#1) +# asm 2: mov <temp2=%edx,1452(<ap=%rdi) +mov %edx,1452(%rdi) + +# qhasm: temp1 = mem64[ap + 1460] +# asm 1: mov 1460(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1460(<ap=%rdi),>temp1=%esi +mov 1460(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2920] +# asm 1: mov 2920(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2920(<ap=%rdi),>temp2=%edx +mov 2920(%rdi),%edx + +# qhasm: mem64[ap + 2920] = temp1 +# asm 1: mov <temp1=int64#2,2920(<ap=int64#1) +# asm 2: mov <temp1=%esi,2920(<ap=%rdi) +mov %esi,2920(%rdi) + +# qhasm: mem64[ap + 1460] = temp2 +# asm 1: mov <temp2=int64#3,1460(<ap=int64#1) +# asm 2: mov <temp2=%edx,1460(<ap=%rdi) +mov %edx,1460(%rdi) + +# qhasm: temp1 = mem64[ap + 1464] +# asm 1: mov 1464(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1464(<ap=%rdi),>temp1=%esi +mov 1464(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1896] +# asm 1: mov 1896(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1896(<ap=%rdi),>temp2=%edx +mov 1896(%rdi),%edx + +# qhasm: mem64[ap + 1896] = temp1 +# asm 1: mov <temp1=int64#2,1896(<ap=int64#1) +# asm 2: mov <temp1=%esi,1896(<ap=%rdi) +mov %esi,1896(%rdi) + +# qhasm: mem64[ap + 1464] = temp2 +# asm 1: mov <temp2=int64#3,1464(<ap=int64#1) +# asm 2: mov <temp2=%edx,1464(<ap=%rdi) +mov %edx,1464(%rdi) + +# qhasm: temp1 = mem64[ap + 1468] +# asm 1: mov 1468(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1468(<ap=%rdi),>temp1=%esi +mov 1468(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3944] +# asm 1: mov 3944(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3944(<ap=%rdi),>temp2=%edx +mov 3944(%rdi),%edx + +# qhasm: mem64[ap + 3944] = temp1 +# asm 1: mov <temp1=int64#2,3944(<ap=int64#1) +# asm 2: mov <temp1=%esi,3944(<ap=%rdi) +mov %esi,3944(%rdi) + +# qhasm: mem64[ap + 1468] = temp2 +# asm 1: mov <temp2=int64#3,1468(<ap=int64#1) +# asm 2: mov <temp2=%edx,1468(<ap=%rdi) +mov %edx,1468(%rdi) + +# qhasm: temp1 = mem64[ap + 1476] +# asm 1: mov 1476(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1476(<ap=%rdi),>temp1=%esi +mov 1476(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2280] +# asm 1: mov 2280(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2280(<ap=%rdi),>temp2=%edx +mov 2280(%rdi),%edx + +# qhasm: mem64[ap + 2280] = temp1 +# asm 1: mov <temp1=int64#2,2280(<ap=int64#1) +# asm 2: mov <temp1=%esi,2280(<ap=%rdi) +mov %esi,2280(%rdi) + +# qhasm: mem64[ap + 1476] = temp2 +# asm 1: mov <temp2=int64#3,1476(<ap=int64#1) +# asm 2: mov <temp2=%edx,1476(<ap=%rdi) +mov %edx,1476(%rdi) + +# qhasm: temp1 = mem64[ap + 1484] +# asm 1: mov 1484(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1484(<ap=%rdi),>temp1=%esi +mov 1484(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3304] +# asm 1: mov 3304(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3304(<ap=%rdi),>temp2=%edx +mov 3304(%rdi),%edx + +# qhasm: mem64[ap + 3304] = temp1 +# asm 1: mov <temp1=int64#2,3304(<ap=int64#1) +# asm 2: mov <temp1=%esi,3304(<ap=%rdi) +mov %esi,3304(%rdi) + +# qhasm: mem64[ap + 1484] = temp2 +# asm 1: mov <temp2=int64#3,1484(<ap=int64#1) +# asm 2: mov <temp2=%edx,1484(<ap=%rdi) +mov %edx,1484(%rdi) + +# qhasm: temp1 = mem64[ap + 1492] +# asm 1: mov 1492(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1492(<ap=%rdi),>temp1=%esi +mov 1492(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2792] +# asm 1: mov 2792(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2792(<ap=%rdi),>temp2=%edx +mov 2792(%rdi),%edx + +# qhasm: mem64[ap + 2792] = temp1 +# asm 1: mov <temp1=int64#2,2792(<ap=int64#1) +# asm 2: mov <temp1=%esi,2792(<ap=%rdi) +mov %esi,2792(%rdi) + +# qhasm: mem64[ap + 1492] = temp2 +# asm 1: mov <temp2=int64#3,1492(<ap=int64#1) +# asm 2: mov <temp2=%edx,1492(<ap=%rdi) +mov %edx,1492(%rdi) + +# qhasm: temp1 = mem64[ap + 1496] +# asm 1: mov 1496(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1496(<ap=%rdi),>temp1=%esi +mov 1496(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1768] +# asm 1: mov 1768(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1768(<ap=%rdi),>temp2=%edx +mov 1768(%rdi),%edx + +# qhasm: mem64[ap + 1768] = temp1 +# asm 1: mov <temp1=int64#2,1768(<ap=int64#1) +# asm 2: mov <temp1=%esi,1768(<ap=%rdi) +mov %esi,1768(%rdi) + +# qhasm: mem64[ap + 1496] = temp2 +# asm 1: mov <temp2=int64#3,1496(<ap=int64#1) +# asm 2: mov <temp2=%edx,1496(<ap=%rdi) +mov %edx,1496(%rdi) + +# qhasm: temp1 = mem64[ap + 1500] +# asm 1: mov 1500(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1500(<ap=%rdi),>temp1=%esi +mov 1500(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3816] +# asm 1: mov 3816(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3816(<ap=%rdi),>temp2=%edx +mov 3816(%rdi),%edx + +# qhasm: mem64[ap + 3816] = temp1 +# asm 1: mov <temp1=int64#2,3816(<ap=int64#1) +# asm 2: mov <temp1=%esi,3816(<ap=%rdi) +mov %esi,3816(%rdi) + +# qhasm: mem64[ap + 1500] = temp2 +# asm 1: mov <temp2=int64#3,1500(<ap=int64#1) +# asm 2: mov <temp2=%edx,1500(<ap=%rdi) +mov %edx,1500(%rdi) + +# qhasm: temp1 = mem64[ap + 1508] +# asm 1: mov 1508(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1508(<ap=%rdi),>temp1=%esi +mov 1508(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2536] +# asm 1: mov 2536(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2536(<ap=%rdi),>temp2=%edx +mov 2536(%rdi),%edx + +# qhasm: mem64[ap + 2536] = temp1 +# asm 1: mov <temp1=int64#2,2536(<ap=int64#1) +# asm 2: mov <temp1=%esi,2536(<ap=%rdi) +mov %esi,2536(%rdi) + +# qhasm: mem64[ap + 1508] = temp2 +# asm 1: mov <temp2=int64#3,1508(<ap=int64#1) +# asm 2: mov <temp2=%edx,1508(<ap=%rdi) +mov %edx,1508(%rdi) + +# qhasm: temp1 = mem64[ap + 1516] +# asm 1: mov 1516(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1516(<ap=%rdi),>temp1=%esi +mov 1516(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3560] +# asm 1: mov 3560(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3560(<ap=%rdi),>temp2=%edx +mov 3560(%rdi),%edx + +# qhasm: mem64[ap + 3560] = temp1 +# asm 1: mov <temp1=int64#2,3560(<ap=int64#1) +# asm 2: mov <temp1=%esi,3560(<ap=%rdi) +mov %esi,3560(%rdi) + +# qhasm: mem64[ap + 1516] = temp2 +# asm 1: mov <temp2=int64#3,1516(<ap=int64#1) +# asm 2: mov <temp2=%edx,1516(<ap=%rdi) +mov %edx,1516(%rdi) + +# qhasm: temp1 = mem64[ap + 1524] +# asm 1: mov 1524(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1524(<ap=%rdi),>temp1=%esi +mov 1524(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3048] +# asm 1: mov 3048(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3048(<ap=%rdi),>temp2=%edx +mov 3048(%rdi),%edx + +# qhasm: mem64[ap + 3048] = temp1 +# asm 1: mov <temp1=int64#2,3048(<ap=int64#1) +# asm 2: mov <temp1=%esi,3048(<ap=%rdi) +mov %esi,3048(%rdi) + +# qhasm: mem64[ap + 1524] = temp2 +# asm 1: mov <temp2=int64#3,1524(<ap=int64#1) +# asm 2: mov <temp2=%edx,1524(<ap=%rdi) +mov %edx,1524(%rdi) + +# qhasm: temp1 = mem64[ap + 1528] +# asm 1: mov 1528(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1528(<ap=%rdi),>temp1=%esi +mov 1528(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2024] +# asm 1: mov 2024(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2024(<ap=%rdi),>temp2=%edx +mov 2024(%rdi),%edx + +# qhasm: mem64[ap + 2024] = temp1 +# asm 1: mov <temp1=int64#2,2024(<ap=int64#1) +# asm 2: mov <temp1=%esi,2024(<ap=%rdi) +mov %esi,2024(%rdi) + +# qhasm: mem64[ap + 1528] = temp2 +# asm 1: mov <temp2=int64#3,1528(<ap=int64#1) +# asm 2: mov <temp2=%edx,1528(<ap=%rdi) +mov %edx,1528(%rdi) + +# qhasm: temp1 = mem64[ap + 1532] +# asm 1: mov 1532(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1532(<ap=%rdi),>temp1=%esi +mov 1532(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4072] +# asm 1: mov 4072(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4072(<ap=%rdi),>temp2=%edx +mov 4072(%rdi),%edx + +# qhasm: mem64[ap + 4072] = temp1 +# asm 1: mov <temp1=int64#2,4072(<ap=int64#1) +# asm 2: mov <temp1=%esi,4072(<ap=%rdi) +mov %esi,4072(%rdi) + +# qhasm: mem64[ap + 1532] = temp2 +# asm 1: mov <temp2=int64#3,1532(<ap=int64#1) +# asm 2: mov <temp2=%edx,1532(<ap=%rdi) +mov %edx,1532(%rdi) + +# qhasm: temp1 = mem64[ap + 1540] +# asm 1: mov 1540(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1540(<ap=%rdi),>temp1=%esi +mov 1540(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2072] +# asm 1: mov 2072(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2072(<ap=%rdi),>temp2=%edx +mov 2072(%rdi),%edx + +# qhasm: mem64[ap + 2072] = temp1 +# asm 1: mov <temp1=int64#2,2072(<ap=int64#1) +# asm 2: mov <temp1=%esi,2072(<ap=%rdi) +mov %esi,2072(%rdi) + +# qhasm: mem64[ap + 1540] = temp2 +# asm 1: mov <temp2=int64#3,1540(<ap=int64#1) +# asm 2: mov <temp2=%edx,1540(<ap=%rdi) +mov %edx,1540(%rdi) + +# qhasm: temp1 = mem64[ap + 1548] +# asm 1: mov 1548(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1548(<ap=%rdi),>temp1=%esi +mov 1548(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3096] +# asm 1: mov 3096(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3096(<ap=%rdi),>temp2=%edx +mov 3096(%rdi),%edx + +# qhasm: mem64[ap + 3096] = temp1 +# asm 1: mov <temp1=int64#2,3096(<ap=int64#1) +# asm 2: mov <temp1=%esi,3096(<ap=%rdi) +mov %esi,3096(%rdi) + +# qhasm: mem64[ap + 1548] = temp2 +# asm 1: mov <temp2=int64#3,1548(<ap=int64#1) +# asm 2: mov <temp2=%edx,1548(<ap=%rdi) +mov %edx,1548(%rdi) + +# qhasm: temp1 = mem64[ap + 1556] +# asm 1: mov 1556(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1556(<ap=%rdi),>temp1=%esi +mov 1556(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2584] +# asm 1: mov 2584(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2584(<ap=%rdi),>temp2=%edx +mov 2584(%rdi),%edx + +# qhasm: mem64[ap + 2584] = temp1 +# asm 1: mov <temp1=int64#2,2584(<ap=int64#1) +# asm 2: mov <temp1=%esi,2584(<ap=%rdi) +mov %esi,2584(%rdi) + +# qhasm: mem64[ap + 1556] = temp2 +# asm 1: mov <temp2=int64#3,1556(<ap=int64#1) +# asm 2: mov <temp2=%edx,1556(<ap=%rdi) +mov %edx,1556(%rdi) + +# qhasm: temp1 = mem64[ap + 1564] +# asm 1: mov 1564(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1564(<ap=%rdi),>temp1=%esi +mov 1564(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3608] +# asm 1: mov 3608(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3608(<ap=%rdi),>temp2=%edx +mov 3608(%rdi),%edx + +# qhasm: mem64[ap + 3608] = temp1 +# asm 1: mov <temp1=int64#2,3608(<ap=int64#1) +# asm 2: mov <temp1=%esi,3608(<ap=%rdi) +mov %esi,3608(%rdi) + +# qhasm: mem64[ap + 1564] = temp2 +# asm 1: mov <temp2=int64#3,1564(<ap=int64#1) +# asm 2: mov <temp2=%edx,1564(<ap=%rdi) +mov %edx,1564(%rdi) + +# qhasm: temp1 = mem64[ap + 1572] +# asm 1: mov 1572(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1572(<ap=%rdi),>temp1=%esi +mov 1572(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2328] +# asm 1: mov 2328(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2328(<ap=%rdi),>temp2=%edx +mov 2328(%rdi),%edx + +# qhasm: mem64[ap + 2328] = temp1 +# asm 1: mov <temp1=int64#2,2328(<ap=int64#1) +# asm 2: mov <temp1=%esi,2328(<ap=%rdi) +mov %esi,2328(%rdi) + +# qhasm: mem64[ap + 1572] = temp2 +# asm 1: mov <temp2=int64#3,1572(<ap=int64#1) +# asm 2: mov <temp2=%edx,1572(<ap=%rdi) +mov %edx,1572(%rdi) + +# qhasm: temp1 = mem64[ap + 1580] +# asm 1: mov 1580(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1580(<ap=%rdi),>temp1=%esi +mov 1580(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3352] +# asm 1: mov 3352(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3352(<ap=%rdi),>temp2=%edx +mov 3352(%rdi),%edx + +# qhasm: mem64[ap + 3352] = temp1 +# asm 1: mov <temp1=int64#2,3352(<ap=int64#1) +# asm 2: mov <temp1=%esi,3352(<ap=%rdi) +mov %esi,3352(%rdi) + +# qhasm: mem64[ap + 1580] = temp2 +# asm 1: mov <temp2=int64#3,1580(<ap=int64#1) +# asm 2: mov <temp2=%edx,1580(<ap=%rdi) +mov %edx,1580(%rdi) + +# qhasm: temp1 = mem64[ap + 1588] +# asm 1: mov 1588(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1588(<ap=%rdi),>temp1=%esi +mov 1588(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2840] +# asm 1: mov 2840(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2840(<ap=%rdi),>temp2=%edx +mov 2840(%rdi),%edx + +# qhasm: mem64[ap + 2840] = temp1 +# asm 1: mov <temp1=int64#2,2840(<ap=int64#1) +# asm 2: mov <temp1=%esi,2840(<ap=%rdi) +mov %esi,2840(%rdi) + +# qhasm: mem64[ap + 1588] = temp2 +# asm 1: mov <temp2=int64#3,1588(<ap=int64#1) +# asm 2: mov <temp2=%edx,1588(<ap=%rdi) +mov %edx,1588(%rdi) + +# qhasm: temp1 = mem64[ap + 1592] +# asm 1: mov 1592(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1592(<ap=%rdi),>temp1=%esi +mov 1592(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1816] +# asm 1: mov 1816(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1816(<ap=%rdi),>temp2=%edx +mov 1816(%rdi),%edx + +# qhasm: mem64[ap + 1816] = temp1 +# asm 1: mov <temp1=int64#2,1816(<ap=int64#1) +# asm 2: mov <temp1=%esi,1816(<ap=%rdi) +mov %esi,1816(%rdi) + +# qhasm: mem64[ap + 1592] = temp2 +# asm 1: mov <temp2=int64#3,1592(<ap=int64#1) +# asm 2: mov <temp2=%edx,1592(<ap=%rdi) +mov %edx,1592(%rdi) + +# qhasm: temp1 = mem64[ap + 1596] +# asm 1: mov 1596(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1596(<ap=%rdi),>temp1=%esi +mov 1596(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3864] +# asm 1: mov 3864(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3864(<ap=%rdi),>temp2=%edx +mov 3864(%rdi),%edx + +# qhasm: mem64[ap + 3864] = temp1 +# asm 1: mov <temp1=int64#2,3864(<ap=int64#1) +# asm 2: mov <temp1=%esi,3864(<ap=%rdi) +mov %esi,3864(%rdi) + +# qhasm: mem64[ap + 1596] = temp2 +# asm 1: mov <temp2=int64#3,1596(<ap=int64#1) +# asm 2: mov <temp2=%edx,1596(<ap=%rdi) +mov %edx,1596(%rdi) + +# qhasm: temp1 = mem64[ap + 1604] +# asm 1: mov 1604(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1604(<ap=%rdi),>temp1=%esi +mov 1604(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2200] +# asm 1: mov 2200(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2200(<ap=%rdi),>temp2=%edx +mov 2200(%rdi),%edx + +# qhasm: mem64[ap + 2200] = temp1 +# asm 1: mov <temp1=int64#2,2200(<ap=int64#1) +# asm 2: mov <temp1=%esi,2200(<ap=%rdi) +mov %esi,2200(%rdi) + +# qhasm: mem64[ap + 1604] = temp2 +# asm 1: mov <temp2=int64#3,1604(<ap=int64#1) +# asm 2: mov <temp2=%edx,1604(<ap=%rdi) +mov %edx,1604(%rdi) + +# qhasm: temp1 = mem64[ap + 1612] +# asm 1: mov 1612(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1612(<ap=%rdi),>temp1=%esi +mov 1612(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3224] +# asm 1: mov 3224(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3224(<ap=%rdi),>temp2=%edx +mov 3224(%rdi),%edx + +# qhasm: mem64[ap + 3224] = temp1 +# asm 1: mov <temp1=int64#2,3224(<ap=int64#1) +# asm 2: mov <temp1=%esi,3224(<ap=%rdi) +mov %esi,3224(%rdi) + +# qhasm: mem64[ap + 1612] = temp2 +# asm 1: mov <temp2=int64#3,1612(<ap=int64#1) +# asm 2: mov <temp2=%edx,1612(<ap=%rdi) +mov %edx,1612(%rdi) + +# qhasm: temp1 = mem64[ap + 1620] +# asm 1: mov 1620(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1620(<ap=%rdi),>temp1=%esi +mov 1620(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2712] +# asm 1: mov 2712(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2712(<ap=%rdi),>temp2=%edx +mov 2712(%rdi),%edx + +# qhasm: mem64[ap + 2712] = temp1 +# asm 1: mov <temp1=int64#2,2712(<ap=int64#1) +# asm 2: mov <temp1=%esi,2712(<ap=%rdi) +mov %esi,2712(%rdi) + +# qhasm: mem64[ap + 1620] = temp2 +# asm 1: mov <temp2=int64#3,1620(<ap=int64#1) +# asm 2: mov <temp2=%edx,1620(<ap=%rdi) +mov %edx,1620(%rdi) + +# qhasm: temp1 = mem64[ap + 1624] +# asm 1: mov 1624(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1624(<ap=%rdi),>temp1=%esi +mov 1624(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1688] +# asm 1: mov 1688(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1688(<ap=%rdi),>temp2=%edx +mov 1688(%rdi),%edx + +# qhasm: mem64[ap + 1688] = temp1 +# asm 1: mov <temp1=int64#2,1688(<ap=int64#1) +# asm 2: mov <temp1=%esi,1688(<ap=%rdi) +mov %esi,1688(%rdi) + +# qhasm: mem64[ap + 1624] = temp2 +# asm 1: mov <temp2=int64#3,1624(<ap=int64#1) +# asm 2: mov <temp2=%edx,1624(<ap=%rdi) +mov %edx,1624(%rdi) + +# qhasm: temp1 = mem64[ap + 1628] +# asm 1: mov 1628(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1628(<ap=%rdi),>temp1=%esi +mov 1628(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3736] +# asm 1: mov 3736(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3736(<ap=%rdi),>temp2=%edx +mov 3736(%rdi),%edx + +# qhasm: mem64[ap + 3736] = temp1 +# asm 1: mov <temp1=int64#2,3736(<ap=int64#1) +# asm 2: mov <temp1=%esi,3736(<ap=%rdi) +mov %esi,3736(%rdi) + +# qhasm: mem64[ap + 1628] = temp2 +# asm 1: mov <temp2=int64#3,1628(<ap=int64#1) +# asm 2: mov <temp2=%edx,1628(<ap=%rdi) +mov %edx,1628(%rdi) + +# qhasm: temp1 = mem64[ap + 1636] +# asm 1: mov 1636(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1636(<ap=%rdi),>temp1=%esi +mov 1636(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2456] +# asm 1: mov 2456(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2456(<ap=%rdi),>temp2=%edx +mov 2456(%rdi),%edx + +# qhasm: mem64[ap + 2456] = temp1 +# asm 1: mov <temp1=int64#2,2456(<ap=int64#1) +# asm 2: mov <temp1=%esi,2456(<ap=%rdi) +mov %esi,2456(%rdi) + +# qhasm: mem64[ap + 1636] = temp2 +# asm 1: mov <temp2=int64#3,1636(<ap=int64#1) +# asm 2: mov <temp2=%edx,1636(<ap=%rdi) +mov %edx,1636(%rdi) + +# qhasm: temp1 = mem64[ap + 1644] +# asm 1: mov 1644(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1644(<ap=%rdi),>temp1=%esi +mov 1644(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3480] +# asm 1: mov 3480(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3480(<ap=%rdi),>temp2=%edx +mov 3480(%rdi),%edx + +# qhasm: mem64[ap + 3480] = temp1 +# asm 1: mov <temp1=int64#2,3480(<ap=int64#1) +# asm 2: mov <temp1=%esi,3480(<ap=%rdi) +mov %esi,3480(%rdi) + +# qhasm: mem64[ap + 1644] = temp2 +# asm 1: mov <temp2=int64#3,1644(<ap=int64#1) +# asm 2: mov <temp2=%edx,1644(<ap=%rdi) +mov %edx,1644(%rdi) + +# qhasm: temp1 = mem64[ap + 1652] +# asm 1: mov 1652(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1652(<ap=%rdi),>temp1=%esi +mov 1652(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2968] +# asm 1: mov 2968(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2968(<ap=%rdi),>temp2=%edx +mov 2968(%rdi),%edx + +# qhasm: mem64[ap + 2968] = temp1 +# asm 1: mov <temp1=int64#2,2968(<ap=int64#1) +# asm 2: mov <temp1=%esi,2968(<ap=%rdi) +mov %esi,2968(%rdi) + +# qhasm: mem64[ap + 1652] = temp2 +# asm 1: mov <temp2=int64#3,1652(<ap=int64#1) +# asm 2: mov <temp2=%edx,1652(<ap=%rdi) +mov %edx,1652(%rdi) + +# qhasm: temp1 = mem64[ap + 1656] +# asm 1: mov 1656(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1656(<ap=%rdi),>temp1=%esi +mov 1656(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1944] +# asm 1: mov 1944(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1944(<ap=%rdi),>temp2=%edx +mov 1944(%rdi),%edx + +# qhasm: mem64[ap + 1944] = temp1 +# asm 1: mov <temp1=int64#2,1944(<ap=int64#1) +# asm 2: mov <temp1=%esi,1944(<ap=%rdi) +mov %esi,1944(%rdi) + +# qhasm: mem64[ap + 1656] = temp2 +# asm 1: mov <temp2=int64#3,1656(<ap=int64#1) +# asm 2: mov <temp2=%edx,1656(<ap=%rdi) +mov %edx,1656(%rdi) + +# qhasm: temp1 = mem64[ap + 1660] +# asm 1: mov 1660(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1660(<ap=%rdi),>temp1=%esi +mov 1660(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3992] +# asm 1: mov 3992(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3992(<ap=%rdi),>temp2=%edx +mov 3992(%rdi),%edx + +# qhasm: mem64[ap + 3992] = temp1 +# asm 1: mov <temp1=int64#2,3992(<ap=int64#1) +# asm 2: mov <temp1=%esi,3992(<ap=%rdi) +mov %esi,3992(%rdi) + +# qhasm: mem64[ap + 1660] = temp2 +# asm 1: mov <temp2=int64#3,1660(<ap=int64#1) +# asm 2: mov <temp2=%edx,1660(<ap=%rdi) +mov %edx,1660(%rdi) + +# qhasm: temp1 = mem64[ap + 1668] +# asm 1: mov 1668(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1668(<ap=%rdi),>temp1=%esi +mov 1668(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2136] +# asm 1: mov 2136(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2136(<ap=%rdi),>temp2=%edx +mov 2136(%rdi),%edx + +# qhasm: mem64[ap + 2136] = temp1 +# asm 1: mov <temp1=int64#2,2136(<ap=int64#1) +# asm 2: mov <temp1=%esi,2136(<ap=%rdi) +mov %esi,2136(%rdi) + +# qhasm: mem64[ap + 1668] = temp2 +# asm 1: mov <temp2=int64#3,1668(<ap=int64#1) +# asm 2: mov <temp2=%edx,1668(<ap=%rdi) +mov %edx,1668(%rdi) + +# qhasm: temp1 = mem64[ap + 1676] +# asm 1: mov 1676(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1676(<ap=%rdi),>temp1=%esi +mov 1676(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3160] +# asm 1: mov 3160(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3160(<ap=%rdi),>temp2=%edx +mov 3160(%rdi),%edx + +# qhasm: mem64[ap + 3160] = temp1 +# asm 1: mov <temp1=int64#2,3160(<ap=int64#1) +# asm 2: mov <temp1=%esi,3160(<ap=%rdi) +mov %esi,3160(%rdi) + +# qhasm: mem64[ap + 1676] = temp2 +# asm 1: mov <temp2=int64#3,1676(<ap=int64#1) +# asm 2: mov <temp2=%edx,1676(<ap=%rdi) +mov %edx,1676(%rdi) + +# qhasm: temp1 = mem64[ap + 1684] +# asm 1: mov 1684(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1684(<ap=%rdi),>temp1=%esi +mov 1684(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2648] +# asm 1: mov 2648(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2648(<ap=%rdi),>temp2=%edx +mov 2648(%rdi),%edx + +# qhasm: mem64[ap + 2648] = temp1 +# asm 1: mov <temp1=int64#2,2648(<ap=int64#1) +# asm 2: mov <temp1=%esi,2648(<ap=%rdi) +mov %esi,2648(%rdi) + +# qhasm: mem64[ap + 1684] = temp2 +# asm 1: mov <temp2=int64#3,1684(<ap=int64#1) +# asm 2: mov <temp2=%edx,1684(<ap=%rdi) +mov %edx,1684(%rdi) + +# qhasm: temp1 = mem64[ap + 1692] +# asm 1: mov 1692(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1692(<ap=%rdi),>temp1=%esi +mov 1692(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3672] +# asm 1: mov 3672(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3672(<ap=%rdi),>temp2=%edx +mov 3672(%rdi),%edx + +# qhasm: mem64[ap + 3672] = temp1 +# asm 1: mov <temp1=int64#2,3672(<ap=int64#1) +# asm 2: mov <temp1=%esi,3672(<ap=%rdi) +mov %esi,3672(%rdi) + +# qhasm: mem64[ap + 1692] = temp2 +# asm 1: mov <temp2=int64#3,1692(<ap=int64#1) +# asm 2: mov <temp2=%edx,1692(<ap=%rdi) +mov %edx,1692(%rdi) + +# qhasm: temp1 = mem64[ap + 1700] +# asm 1: mov 1700(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1700(<ap=%rdi),>temp1=%esi +mov 1700(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2392] +# asm 1: mov 2392(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2392(<ap=%rdi),>temp2=%edx +mov 2392(%rdi),%edx + +# qhasm: mem64[ap + 2392] = temp1 +# asm 1: mov <temp1=int64#2,2392(<ap=int64#1) +# asm 2: mov <temp1=%esi,2392(<ap=%rdi) +mov %esi,2392(%rdi) + +# qhasm: mem64[ap + 1700] = temp2 +# asm 1: mov <temp2=int64#3,1700(<ap=int64#1) +# asm 2: mov <temp2=%edx,1700(<ap=%rdi) +mov %edx,1700(%rdi) + +# qhasm: temp1 = mem64[ap + 1708] +# asm 1: mov 1708(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1708(<ap=%rdi),>temp1=%esi +mov 1708(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3416] +# asm 1: mov 3416(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3416(<ap=%rdi),>temp2=%edx +mov 3416(%rdi),%edx + +# qhasm: mem64[ap + 3416] = temp1 +# asm 1: mov <temp1=int64#2,3416(<ap=int64#1) +# asm 2: mov <temp1=%esi,3416(<ap=%rdi) +mov %esi,3416(%rdi) + +# qhasm: mem64[ap + 1708] = temp2 +# asm 1: mov <temp2=int64#3,1708(<ap=int64#1) +# asm 2: mov <temp2=%edx,1708(<ap=%rdi) +mov %edx,1708(%rdi) + +# qhasm: temp1 = mem64[ap + 1716] +# asm 1: mov 1716(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1716(<ap=%rdi),>temp1=%esi +mov 1716(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2904] +# asm 1: mov 2904(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2904(<ap=%rdi),>temp2=%edx +mov 2904(%rdi),%edx + +# qhasm: mem64[ap + 2904] = temp1 +# asm 1: mov <temp1=int64#2,2904(<ap=int64#1) +# asm 2: mov <temp1=%esi,2904(<ap=%rdi) +mov %esi,2904(%rdi) + +# qhasm: mem64[ap + 1716] = temp2 +# asm 1: mov <temp2=int64#3,1716(<ap=int64#1) +# asm 2: mov <temp2=%edx,1716(<ap=%rdi) +mov %edx,1716(%rdi) + +# qhasm: temp1 = mem64[ap + 1720] +# asm 1: mov 1720(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1720(<ap=%rdi),>temp1=%esi +mov 1720(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1880] +# asm 1: mov 1880(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1880(<ap=%rdi),>temp2=%edx +mov 1880(%rdi),%edx + +# qhasm: mem64[ap + 1880] = temp1 +# asm 1: mov <temp1=int64#2,1880(<ap=int64#1) +# asm 2: mov <temp1=%esi,1880(<ap=%rdi) +mov %esi,1880(%rdi) + +# qhasm: mem64[ap + 1720] = temp2 +# asm 1: mov <temp2=int64#3,1720(<ap=int64#1) +# asm 2: mov <temp2=%edx,1720(<ap=%rdi) +mov %edx,1720(%rdi) + +# qhasm: temp1 = mem64[ap + 1724] +# asm 1: mov 1724(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1724(<ap=%rdi),>temp1=%esi +mov 1724(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3928] +# asm 1: mov 3928(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3928(<ap=%rdi),>temp2=%edx +mov 3928(%rdi),%edx + +# qhasm: mem64[ap + 3928] = temp1 +# asm 1: mov <temp1=int64#2,3928(<ap=int64#1) +# asm 2: mov <temp1=%esi,3928(<ap=%rdi) +mov %esi,3928(%rdi) + +# qhasm: mem64[ap + 1724] = temp2 +# asm 1: mov <temp2=int64#3,1724(<ap=int64#1) +# asm 2: mov <temp2=%edx,1724(<ap=%rdi) +mov %edx,1724(%rdi) + +# qhasm: temp1 = mem64[ap + 1732] +# asm 1: mov 1732(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1732(<ap=%rdi),>temp1=%esi +mov 1732(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2264] +# asm 1: mov 2264(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2264(<ap=%rdi),>temp2=%edx +mov 2264(%rdi),%edx + +# qhasm: mem64[ap + 2264] = temp1 +# asm 1: mov <temp1=int64#2,2264(<ap=int64#1) +# asm 2: mov <temp1=%esi,2264(<ap=%rdi) +mov %esi,2264(%rdi) + +# qhasm: mem64[ap + 1732] = temp2 +# asm 1: mov <temp2=int64#3,1732(<ap=int64#1) +# asm 2: mov <temp2=%edx,1732(<ap=%rdi) +mov %edx,1732(%rdi) + +# qhasm: temp1 = mem64[ap + 1740] +# asm 1: mov 1740(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1740(<ap=%rdi),>temp1=%esi +mov 1740(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3288] +# asm 1: mov 3288(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3288(<ap=%rdi),>temp2=%edx +mov 3288(%rdi),%edx + +# qhasm: mem64[ap + 3288] = temp1 +# asm 1: mov <temp1=int64#2,3288(<ap=int64#1) +# asm 2: mov <temp1=%esi,3288(<ap=%rdi) +mov %esi,3288(%rdi) + +# qhasm: mem64[ap + 1740] = temp2 +# asm 1: mov <temp2=int64#3,1740(<ap=int64#1) +# asm 2: mov <temp2=%edx,1740(<ap=%rdi) +mov %edx,1740(%rdi) + +# qhasm: temp1 = mem64[ap + 1748] +# asm 1: mov 1748(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1748(<ap=%rdi),>temp1=%esi +mov 1748(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2776] +# asm 1: mov 2776(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2776(<ap=%rdi),>temp2=%edx +mov 2776(%rdi),%edx + +# qhasm: mem64[ap + 2776] = temp1 +# asm 1: mov <temp1=int64#2,2776(<ap=int64#1) +# asm 2: mov <temp1=%esi,2776(<ap=%rdi) +mov %esi,2776(%rdi) + +# qhasm: mem64[ap + 1748] = temp2 +# asm 1: mov <temp2=int64#3,1748(<ap=int64#1) +# asm 2: mov <temp2=%edx,1748(<ap=%rdi) +mov %edx,1748(%rdi) + +# qhasm: temp1 = mem64[ap + 1756] +# asm 1: mov 1756(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1756(<ap=%rdi),>temp1=%esi +mov 1756(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3800] +# asm 1: mov 3800(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3800(<ap=%rdi),>temp2=%edx +mov 3800(%rdi),%edx + +# qhasm: mem64[ap + 3800] = temp1 +# asm 1: mov <temp1=int64#2,3800(<ap=int64#1) +# asm 2: mov <temp1=%esi,3800(<ap=%rdi) +mov %esi,3800(%rdi) + +# qhasm: mem64[ap + 1756] = temp2 +# asm 1: mov <temp2=int64#3,1756(<ap=int64#1) +# asm 2: mov <temp2=%edx,1756(<ap=%rdi) +mov %edx,1756(%rdi) + +# qhasm: temp1 = mem64[ap + 1764] +# asm 1: mov 1764(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1764(<ap=%rdi),>temp1=%esi +mov 1764(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2520] +# asm 1: mov 2520(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2520(<ap=%rdi),>temp2=%edx +mov 2520(%rdi),%edx + +# qhasm: mem64[ap + 2520] = temp1 +# asm 1: mov <temp1=int64#2,2520(<ap=int64#1) +# asm 2: mov <temp1=%esi,2520(<ap=%rdi) +mov %esi,2520(%rdi) + +# qhasm: mem64[ap + 1764] = temp2 +# asm 1: mov <temp2=int64#3,1764(<ap=int64#1) +# asm 2: mov <temp2=%edx,1764(<ap=%rdi) +mov %edx,1764(%rdi) + +# qhasm: temp1 = mem64[ap + 1772] +# asm 1: mov 1772(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1772(<ap=%rdi),>temp1=%esi +mov 1772(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3544] +# asm 1: mov 3544(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3544(<ap=%rdi),>temp2=%edx +mov 3544(%rdi),%edx + +# qhasm: mem64[ap + 3544] = temp1 +# asm 1: mov <temp1=int64#2,3544(<ap=int64#1) +# asm 2: mov <temp1=%esi,3544(<ap=%rdi) +mov %esi,3544(%rdi) + +# qhasm: mem64[ap + 1772] = temp2 +# asm 1: mov <temp2=int64#3,1772(<ap=int64#1) +# asm 2: mov <temp2=%edx,1772(<ap=%rdi) +mov %edx,1772(%rdi) + +# qhasm: temp1 = mem64[ap + 1780] +# asm 1: mov 1780(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1780(<ap=%rdi),>temp1=%esi +mov 1780(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3032] +# asm 1: mov 3032(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3032(<ap=%rdi),>temp2=%edx +mov 3032(%rdi),%edx + +# qhasm: mem64[ap + 3032] = temp1 +# asm 1: mov <temp1=int64#2,3032(<ap=int64#1) +# asm 2: mov <temp1=%esi,3032(<ap=%rdi) +mov %esi,3032(%rdi) + +# qhasm: mem64[ap + 1780] = temp2 +# asm 1: mov <temp2=int64#3,1780(<ap=int64#1) +# asm 2: mov <temp2=%edx,1780(<ap=%rdi) +mov %edx,1780(%rdi) + +# qhasm: temp1 = mem64[ap + 1784] +# asm 1: mov 1784(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1784(<ap=%rdi),>temp1=%esi +mov 1784(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2008] +# asm 1: mov 2008(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2008(<ap=%rdi),>temp2=%edx +mov 2008(%rdi),%edx + +# qhasm: mem64[ap + 2008] = temp1 +# asm 1: mov <temp1=int64#2,2008(<ap=int64#1) +# asm 2: mov <temp1=%esi,2008(<ap=%rdi) +mov %esi,2008(%rdi) + +# qhasm: mem64[ap + 1784] = temp2 +# asm 1: mov <temp2=int64#3,1784(<ap=int64#1) +# asm 2: mov <temp2=%edx,1784(<ap=%rdi) +mov %edx,1784(%rdi) + +# qhasm: temp1 = mem64[ap + 1788] +# asm 1: mov 1788(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1788(<ap=%rdi),>temp1=%esi +mov 1788(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4056] +# asm 1: mov 4056(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4056(<ap=%rdi),>temp2=%edx +mov 4056(%rdi),%edx + +# qhasm: mem64[ap + 4056] = temp1 +# asm 1: mov <temp1=int64#2,4056(<ap=int64#1) +# asm 2: mov <temp1=%esi,4056(<ap=%rdi) +mov %esi,4056(%rdi) + +# qhasm: mem64[ap + 1788] = temp2 +# asm 1: mov <temp2=int64#3,1788(<ap=int64#1) +# asm 2: mov <temp2=%edx,1788(<ap=%rdi) +mov %edx,1788(%rdi) + +# qhasm: temp1 = mem64[ap + 1796] +# asm 1: mov 1796(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1796(<ap=%rdi),>temp1=%esi +mov 1796(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2104] +# asm 1: mov 2104(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2104(<ap=%rdi),>temp2=%edx +mov 2104(%rdi),%edx + +# qhasm: mem64[ap + 2104] = temp1 +# asm 1: mov <temp1=int64#2,2104(<ap=int64#1) +# asm 2: mov <temp1=%esi,2104(<ap=%rdi) +mov %esi,2104(%rdi) + +# qhasm: mem64[ap + 1796] = temp2 +# asm 1: mov <temp2=int64#3,1796(<ap=int64#1) +# asm 2: mov <temp2=%edx,1796(<ap=%rdi) +mov %edx,1796(%rdi) + +# qhasm: temp1 = mem64[ap + 1804] +# asm 1: mov 1804(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1804(<ap=%rdi),>temp1=%esi +mov 1804(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3128] +# asm 1: mov 3128(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3128(<ap=%rdi),>temp2=%edx +mov 3128(%rdi),%edx + +# qhasm: mem64[ap + 3128] = temp1 +# asm 1: mov <temp1=int64#2,3128(<ap=int64#1) +# asm 2: mov <temp1=%esi,3128(<ap=%rdi) +mov %esi,3128(%rdi) + +# qhasm: mem64[ap + 1804] = temp2 +# asm 1: mov <temp2=int64#3,1804(<ap=int64#1) +# asm 2: mov <temp2=%edx,1804(<ap=%rdi) +mov %edx,1804(%rdi) + +# qhasm: temp1 = mem64[ap + 1812] +# asm 1: mov 1812(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1812(<ap=%rdi),>temp1=%esi +mov 1812(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2616] +# asm 1: mov 2616(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2616(<ap=%rdi),>temp2=%edx +mov 2616(%rdi),%edx + +# qhasm: mem64[ap + 2616] = temp1 +# asm 1: mov <temp1=int64#2,2616(<ap=int64#1) +# asm 2: mov <temp1=%esi,2616(<ap=%rdi) +mov %esi,2616(%rdi) + +# qhasm: mem64[ap + 1812] = temp2 +# asm 1: mov <temp2=int64#3,1812(<ap=int64#1) +# asm 2: mov <temp2=%edx,1812(<ap=%rdi) +mov %edx,1812(%rdi) + +# qhasm: temp1 = mem64[ap + 1820] +# asm 1: mov 1820(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1820(<ap=%rdi),>temp1=%esi +mov 1820(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3640] +# asm 1: mov 3640(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3640(<ap=%rdi),>temp2=%edx +mov 3640(%rdi),%edx + +# qhasm: mem64[ap + 3640] = temp1 +# asm 1: mov <temp1=int64#2,3640(<ap=int64#1) +# asm 2: mov <temp1=%esi,3640(<ap=%rdi) +mov %esi,3640(%rdi) + +# qhasm: mem64[ap + 1820] = temp2 +# asm 1: mov <temp2=int64#3,1820(<ap=int64#1) +# asm 2: mov <temp2=%edx,1820(<ap=%rdi) +mov %edx,1820(%rdi) + +# qhasm: temp1 = mem64[ap + 1828] +# asm 1: mov 1828(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1828(<ap=%rdi),>temp1=%esi +mov 1828(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2360] +# asm 1: mov 2360(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2360(<ap=%rdi),>temp2=%edx +mov 2360(%rdi),%edx + +# qhasm: mem64[ap + 2360] = temp1 +# asm 1: mov <temp1=int64#2,2360(<ap=int64#1) +# asm 2: mov <temp1=%esi,2360(<ap=%rdi) +mov %esi,2360(%rdi) + +# qhasm: mem64[ap + 1828] = temp2 +# asm 1: mov <temp2=int64#3,1828(<ap=int64#1) +# asm 2: mov <temp2=%edx,1828(<ap=%rdi) +mov %edx,1828(%rdi) + +# qhasm: temp1 = mem64[ap + 1836] +# asm 1: mov 1836(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1836(<ap=%rdi),>temp1=%esi +mov 1836(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3384] +# asm 1: mov 3384(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3384(<ap=%rdi),>temp2=%edx +mov 3384(%rdi),%edx + +# qhasm: mem64[ap + 3384] = temp1 +# asm 1: mov <temp1=int64#2,3384(<ap=int64#1) +# asm 2: mov <temp1=%esi,3384(<ap=%rdi) +mov %esi,3384(%rdi) + +# qhasm: mem64[ap + 1836] = temp2 +# asm 1: mov <temp2=int64#3,1836(<ap=int64#1) +# asm 2: mov <temp2=%edx,1836(<ap=%rdi) +mov %edx,1836(%rdi) + +# qhasm: temp1 = mem64[ap + 1844] +# asm 1: mov 1844(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1844(<ap=%rdi),>temp1=%esi +mov 1844(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2872] +# asm 1: mov 2872(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2872(<ap=%rdi),>temp2=%edx +mov 2872(%rdi),%edx + +# qhasm: mem64[ap + 2872] = temp1 +# asm 1: mov <temp1=int64#2,2872(<ap=int64#1) +# asm 2: mov <temp1=%esi,2872(<ap=%rdi) +mov %esi,2872(%rdi) + +# qhasm: mem64[ap + 1844] = temp2 +# asm 1: mov <temp2=int64#3,1844(<ap=int64#1) +# asm 2: mov <temp2=%edx,1844(<ap=%rdi) +mov %edx,1844(%rdi) + +# qhasm: temp1 = mem64[ap + 1852] +# asm 1: mov 1852(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1852(<ap=%rdi),>temp1=%esi +mov 1852(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3896] +# asm 1: mov 3896(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3896(<ap=%rdi),>temp2=%edx +mov 3896(%rdi),%edx + +# qhasm: mem64[ap + 3896] = temp1 +# asm 1: mov <temp1=int64#2,3896(<ap=int64#1) +# asm 2: mov <temp1=%esi,3896(<ap=%rdi) +mov %esi,3896(%rdi) + +# qhasm: mem64[ap + 1852] = temp2 +# asm 1: mov <temp2=int64#3,1852(<ap=int64#1) +# asm 2: mov <temp2=%edx,1852(<ap=%rdi) +mov %edx,1852(%rdi) + +# qhasm: temp1 = mem64[ap + 1860] +# asm 1: mov 1860(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1860(<ap=%rdi),>temp1=%esi +mov 1860(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2232] +# asm 1: mov 2232(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2232(<ap=%rdi),>temp2=%edx +mov 2232(%rdi),%edx + +# qhasm: mem64[ap + 2232] = temp1 +# asm 1: mov <temp1=int64#2,2232(<ap=int64#1) +# asm 2: mov <temp1=%esi,2232(<ap=%rdi) +mov %esi,2232(%rdi) + +# qhasm: mem64[ap + 1860] = temp2 +# asm 1: mov <temp2=int64#3,1860(<ap=int64#1) +# asm 2: mov <temp2=%edx,1860(<ap=%rdi) +mov %edx,1860(%rdi) + +# qhasm: temp1 = mem64[ap + 1868] +# asm 1: mov 1868(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1868(<ap=%rdi),>temp1=%esi +mov 1868(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3256] +# asm 1: mov 3256(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3256(<ap=%rdi),>temp2=%edx +mov 3256(%rdi),%edx + +# qhasm: mem64[ap + 3256] = temp1 +# asm 1: mov <temp1=int64#2,3256(<ap=int64#1) +# asm 2: mov <temp1=%esi,3256(<ap=%rdi) +mov %esi,3256(%rdi) + +# qhasm: mem64[ap + 1868] = temp2 +# asm 1: mov <temp2=int64#3,1868(<ap=int64#1) +# asm 2: mov <temp2=%edx,1868(<ap=%rdi) +mov %edx,1868(%rdi) + +# qhasm: temp1 = mem64[ap + 1876] +# asm 1: mov 1876(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1876(<ap=%rdi),>temp1=%esi +mov 1876(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2744] +# asm 1: mov 2744(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2744(<ap=%rdi),>temp2=%edx +mov 2744(%rdi),%edx + +# qhasm: mem64[ap + 2744] = temp1 +# asm 1: mov <temp1=int64#2,2744(<ap=int64#1) +# asm 2: mov <temp1=%esi,2744(<ap=%rdi) +mov %esi,2744(%rdi) + +# qhasm: mem64[ap + 1876] = temp2 +# asm 1: mov <temp2=int64#3,1876(<ap=int64#1) +# asm 2: mov <temp2=%edx,1876(<ap=%rdi) +mov %edx,1876(%rdi) + +# qhasm: temp1 = mem64[ap + 1884] +# asm 1: mov 1884(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1884(<ap=%rdi),>temp1=%esi +mov 1884(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3768] +# asm 1: mov 3768(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3768(<ap=%rdi),>temp2=%edx +mov 3768(%rdi),%edx + +# qhasm: mem64[ap + 3768] = temp1 +# asm 1: mov <temp1=int64#2,3768(<ap=int64#1) +# asm 2: mov <temp1=%esi,3768(<ap=%rdi) +mov %esi,3768(%rdi) + +# qhasm: mem64[ap + 1884] = temp2 +# asm 1: mov <temp2=int64#3,1884(<ap=int64#1) +# asm 2: mov <temp2=%edx,1884(<ap=%rdi) +mov %edx,1884(%rdi) + +# qhasm: temp1 = mem64[ap + 1892] +# asm 1: mov 1892(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1892(<ap=%rdi),>temp1=%esi +mov 1892(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2488] +# asm 1: mov 2488(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2488(<ap=%rdi),>temp2=%edx +mov 2488(%rdi),%edx + +# qhasm: mem64[ap + 2488] = temp1 +# asm 1: mov <temp1=int64#2,2488(<ap=int64#1) +# asm 2: mov <temp1=%esi,2488(<ap=%rdi) +mov %esi,2488(%rdi) + +# qhasm: mem64[ap + 1892] = temp2 +# asm 1: mov <temp2=int64#3,1892(<ap=int64#1) +# asm 2: mov <temp2=%edx,1892(<ap=%rdi) +mov %edx,1892(%rdi) + +# qhasm: temp1 = mem64[ap + 1900] +# asm 1: mov 1900(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1900(<ap=%rdi),>temp1=%esi +mov 1900(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3512] +# asm 1: mov 3512(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3512(<ap=%rdi),>temp2=%edx +mov 3512(%rdi),%edx + +# qhasm: mem64[ap + 3512] = temp1 +# asm 1: mov <temp1=int64#2,3512(<ap=int64#1) +# asm 2: mov <temp1=%esi,3512(<ap=%rdi) +mov %esi,3512(%rdi) + +# qhasm: mem64[ap + 1900] = temp2 +# asm 1: mov <temp2=int64#3,1900(<ap=int64#1) +# asm 2: mov <temp2=%edx,1900(<ap=%rdi) +mov %edx,1900(%rdi) + +# qhasm: temp1 = mem64[ap + 1908] +# asm 1: mov 1908(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1908(<ap=%rdi),>temp1=%esi +mov 1908(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3000] +# asm 1: mov 3000(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3000(<ap=%rdi),>temp2=%edx +mov 3000(%rdi),%edx + +# qhasm: mem64[ap + 3000] = temp1 +# asm 1: mov <temp1=int64#2,3000(<ap=int64#1) +# asm 2: mov <temp1=%esi,3000(<ap=%rdi) +mov %esi,3000(%rdi) + +# qhasm: mem64[ap + 1908] = temp2 +# asm 1: mov <temp2=int64#3,1908(<ap=int64#1) +# asm 2: mov <temp2=%edx,1908(<ap=%rdi) +mov %edx,1908(%rdi) + +# qhasm: temp1 = mem64[ap + 1912] +# asm 1: mov 1912(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1912(<ap=%rdi),>temp1=%esi +mov 1912(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 1976] +# asm 1: mov 1976(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 1976(<ap=%rdi),>temp2=%edx +mov 1976(%rdi),%edx + +# qhasm: mem64[ap + 1976] = temp1 +# asm 1: mov <temp1=int64#2,1976(<ap=int64#1) +# asm 2: mov <temp1=%esi,1976(<ap=%rdi) +mov %esi,1976(%rdi) + +# qhasm: mem64[ap + 1912] = temp2 +# asm 1: mov <temp2=int64#3,1912(<ap=int64#1) +# asm 2: mov <temp2=%edx,1912(<ap=%rdi) +mov %edx,1912(%rdi) + +# qhasm: temp1 = mem64[ap + 1916] +# asm 1: mov 1916(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1916(<ap=%rdi),>temp1=%esi +mov 1916(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4024] +# asm 1: mov 4024(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4024(<ap=%rdi),>temp2=%edx +mov 4024(%rdi),%edx + +# qhasm: mem64[ap + 4024] = temp1 +# asm 1: mov <temp1=int64#2,4024(<ap=int64#1) +# asm 2: mov <temp1=%esi,4024(<ap=%rdi) +mov %esi,4024(%rdi) + +# qhasm: mem64[ap + 1916] = temp2 +# asm 1: mov <temp2=int64#3,1916(<ap=int64#1) +# asm 2: mov <temp2=%edx,1916(<ap=%rdi) +mov %edx,1916(%rdi) + +# qhasm: temp1 = mem64[ap + 1924] +# asm 1: mov 1924(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1924(<ap=%rdi),>temp1=%esi +mov 1924(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2168] +# asm 1: mov 2168(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2168(<ap=%rdi),>temp2=%edx +mov 2168(%rdi),%edx + +# qhasm: mem64[ap + 2168] = temp1 +# asm 1: mov <temp1=int64#2,2168(<ap=int64#1) +# asm 2: mov <temp1=%esi,2168(<ap=%rdi) +mov %esi,2168(%rdi) + +# qhasm: mem64[ap + 1924] = temp2 +# asm 1: mov <temp2=int64#3,1924(<ap=int64#1) +# asm 2: mov <temp2=%edx,1924(<ap=%rdi) +mov %edx,1924(%rdi) + +# qhasm: temp1 = mem64[ap + 1932] +# asm 1: mov 1932(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1932(<ap=%rdi),>temp1=%esi +mov 1932(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3192] +# asm 1: mov 3192(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3192(<ap=%rdi),>temp2=%edx +mov 3192(%rdi),%edx + +# qhasm: mem64[ap + 3192] = temp1 +# asm 1: mov <temp1=int64#2,3192(<ap=int64#1) +# asm 2: mov <temp1=%esi,3192(<ap=%rdi) +mov %esi,3192(%rdi) + +# qhasm: mem64[ap + 1932] = temp2 +# asm 1: mov <temp2=int64#3,1932(<ap=int64#1) +# asm 2: mov <temp2=%edx,1932(<ap=%rdi) +mov %edx,1932(%rdi) + +# qhasm: temp1 = mem64[ap + 1940] +# asm 1: mov 1940(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1940(<ap=%rdi),>temp1=%esi +mov 1940(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2680] +# asm 1: mov 2680(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2680(<ap=%rdi),>temp2=%edx +mov 2680(%rdi),%edx + +# qhasm: mem64[ap + 2680] = temp1 +# asm 1: mov <temp1=int64#2,2680(<ap=int64#1) +# asm 2: mov <temp1=%esi,2680(<ap=%rdi) +mov %esi,2680(%rdi) + +# qhasm: mem64[ap + 1940] = temp2 +# asm 1: mov <temp2=int64#3,1940(<ap=int64#1) +# asm 2: mov <temp2=%edx,1940(<ap=%rdi) +mov %edx,1940(%rdi) + +# qhasm: temp1 = mem64[ap + 1948] +# asm 1: mov 1948(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1948(<ap=%rdi),>temp1=%esi +mov 1948(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3704] +# asm 1: mov 3704(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3704(<ap=%rdi),>temp2=%edx +mov 3704(%rdi),%edx + +# qhasm: mem64[ap + 3704] = temp1 +# asm 1: mov <temp1=int64#2,3704(<ap=int64#1) +# asm 2: mov <temp1=%esi,3704(<ap=%rdi) +mov %esi,3704(%rdi) + +# qhasm: mem64[ap + 1948] = temp2 +# asm 1: mov <temp2=int64#3,1948(<ap=int64#1) +# asm 2: mov <temp2=%edx,1948(<ap=%rdi) +mov %edx,1948(%rdi) + +# qhasm: temp1 = mem64[ap + 1956] +# asm 1: mov 1956(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1956(<ap=%rdi),>temp1=%esi +mov 1956(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2424] +# asm 1: mov 2424(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2424(<ap=%rdi),>temp2=%edx +mov 2424(%rdi),%edx + +# qhasm: mem64[ap + 2424] = temp1 +# asm 1: mov <temp1=int64#2,2424(<ap=int64#1) +# asm 2: mov <temp1=%esi,2424(<ap=%rdi) +mov %esi,2424(%rdi) + +# qhasm: mem64[ap + 1956] = temp2 +# asm 1: mov <temp2=int64#3,1956(<ap=int64#1) +# asm 2: mov <temp2=%edx,1956(<ap=%rdi) +mov %edx,1956(%rdi) + +# qhasm: temp1 = mem64[ap + 1964] +# asm 1: mov 1964(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1964(<ap=%rdi),>temp1=%esi +mov 1964(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3448] +# asm 1: mov 3448(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3448(<ap=%rdi),>temp2=%edx +mov 3448(%rdi),%edx + +# qhasm: mem64[ap + 3448] = temp1 +# asm 1: mov <temp1=int64#2,3448(<ap=int64#1) +# asm 2: mov <temp1=%esi,3448(<ap=%rdi) +mov %esi,3448(%rdi) + +# qhasm: mem64[ap + 1964] = temp2 +# asm 1: mov <temp2=int64#3,1964(<ap=int64#1) +# asm 2: mov <temp2=%edx,1964(<ap=%rdi) +mov %edx,1964(%rdi) + +# qhasm: temp1 = mem64[ap + 1972] +# asm 1: mov 1972(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1972(<ap=%rdi),>temp1=%esi +mov 1972(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2936] +# asm 1: mov 2936(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2936(<ap=%rdi),>temp2=%edx +mov 2936(%rdi),%edx + +# qhasm: mem64[ap + 2936] = temp1 +# asm 1: mov <temp1=int64#2,2936(<ap=int64#1) +# asm 2: mov <temp1=%esi,2936(<ap=%rdi) +mov %esi,2936(%rdi) + +# qhasm: mem64[ap + 1972] = temp2 +# asm 1: mov <temp2=int64#3,1972(<ap=int64#1) +# asm 2: mov <temp2=%edx,1972(<ap=%rdi) +mov %edx,1972(%rdi) + +# qhasm: temp1 = mem64[ap + 1980] +# asm 1: mov 1980(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1980(<ap=%rdi),>temp1=%esi +mov 1980(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3960] +# asm 1: mov 3960(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3960(<ap=%rdi),>temp2=%edx +mov 3960(%rdi),%edx + +# qhasm: mem64[ap + 3960] = temp1 +# asm 1: mov <temp1=int64#2,3960(<ap=int64#1) +# asm 2: mov <temp1=%esi,3960(<ap=%rdi) +mov %esi,3960(%rdi) + +# qhasm: mem64[ap + 1980] = temp2 +# asm 1: mov <temp2=int64#3,1980(<ap=int64#1) +# asm 2: mov <temp2=%edx,1980(<ap=%rdi) +mov %edx,1980(%rdi) + +# qhasm: temp1 = mem64[ap + 1988] +# asm 1: mov 1988(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1988(<ap=%rdi),>temp1=%esi +mov 1988(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2296] +# asm 1: mov 2296(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2296(<ap=%rdi),>temp2=%edx +mov 2296(%rdi),%edx + +# qhasm: mem64[ap + 2296] = temp1 +# asm 1: mov <temp1=int64#2,2296(<ap=int64#1) +# asm 2: mov <temp1=%esi,2296(<ap=%rdi) +mov %esi,2296(%rdi) + +# qhasm: mem64[ap + 1988] = temp2 +# asm 1: mov <temp2=int64#3,1988(<ap=int64#1) +# asm 2: mov <temp2=%edx,1988(<ap=%rdi) +mov %edx,1988(%rdi) + +# qhasm: temp1 = mem64[ap + 1996] +# asm 1: mov 1996(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 1996(<ap=%rdi),>temp1=%esi +mov 1996(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3320] +# asm 1: mov 3320(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3320(<ap=%rdi),>temp2=%edx +mov 3320(%rdi),%edx + +# qhasm: mem64[ap + 3320] = temp1 +# asm 1: mov <temp1=int64#2,3320(<ap=int64#1) +# asm 2: mov <temp1=%esi,3320(<ap=%rdi) +mov %esi,3320(%rdi) + +# qhasm: mem64[ap + 1996] = temp2 +# asm 1: mov <temp2=int64#3,1996(<ap=int64#1) +# asm 2: mov <temp2=%edx,1996(<ap=%rdi) +mov %edx,1996(%rdi) + +# qhasm: temp1 = mem64[ap + 2004] +# asm 1: mov 2004(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2004(<ap=%rdi),>temp1=%esi +mov 2004(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2808] +# asm 1: mov 2808(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2808(<ap=%rdi),>temp2=%edx +mov 2808(%rdi),%edx + +# qhasm: mem64[ap + 2808] = temp1 +# asm 1: mov <temp1=int64#2,2808(<ap=int64#1) +# asm 2: mov <temp1=%esi,2808(<ap=%rdi) +mov %esi,2808(%rdi) + +# qhasm: mem64[ap + 2004] = temp2 +# asm 1: mov <temp2=int64#3,2004(<ap=int64#1) +# asm 2: mov <temp2=%edx,2004(<ap=%rdi) +mov %edx,2004(%rdi) + +# qhasm: temp1 = mem64[ap + 2012] +# asm 1: mov 2012(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2012(<ap=%rdi),>temp1=%esi +mov 2012(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3832] +# asm 1: mov 3832(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3832(<ap=%rdi),>temp2=%edx +mov 3832(%rdi),%edx + +# qhasm: mem64[ap + 3832] = temp1 +# asm 1: mov <temp1=int64#2,3832(<ap=int64#1) +# asm 2: mov <temp1=%esi,3832(<ap=%rdi) +mov %esi,3832(%rdi) + +# qhasm: mem64[ap + 2012] = temp2 +# asm 1: mov <temp2=int64#3,2012(<ap=int64#1) +# asm 2: mov <temp2=%edx,2012(<ap=%rdi) +mov %edx,2012(%rdi) + +# qhasm: temp1 = mem64[ap + 2020] +# asm 1: mov 2020(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2020(<ap=%rdi),>temp1=%esi +mov 2020(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2552] +# asm 1: mov 2552(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2552(<ap=%rdi),>temp2=%edx +mov 2552(%rdi),%edx + +# qhasm: mem64[ap + 2552] = temp1 +# asm 1: mov <temp1=int64#2,2552(<ap=int64#1) +# asm 2: mov <temp1=%esi,2552(<ap=%rdi) +mov %esi,2552(%rdi) + +# qhasm: mem64[ap + 2020] = temp2 +# asm 1: mov <temp2=int64#3,2020(<ap=int64#1) +# asm 2: mov <temp2=%edx,2020(<ap=%rdi) +mov %edx,2020(%rdi) + +# qhasm: temp1 = mem64[ap + 2028] +# asm 1: mov 2028(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2028(<ap=%rdi),>temp1=%esi +mov 2028(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3576] +# asm 1: mov 3576(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3576(<ap=%rdi),>temp2=%edx +mov 3576(%rdi),%edx + +# qhasm: mem64[ap + 3576] = temp1 +# asm 1: mov <temp1=int64#2,3576(<ap=int64#1) +# asm 2: mov <temp1=%esi,3576(<ap=%rdi) +mov %esi,3576(%rdi) + +# qhasm: mem64[ap + 2028] = temp2 +# asm 1: mov <temp2=int64#3,2028(<ap=int64#1) +# asm 2: mov <temp2=%edx,2028(<ap=%rdi) +mov %edx,2028(%rdi) + +# qhasm: temp1 = mem64[ap + 2036] +# asm 1: mov 2036(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2036(<ap=%rdi),>temp1=%esi +mov 2036(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3064] +# asm 1: mov 3064(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3064(<ap=%rdi),>temp2=%edx +mov 3064(%rdi),%edx + +# qhasm: mem64[ap + 3064] = temp1 +# asm 1: mov <temp1=int64#2,3064(<ap=int64#1) +# asm 2: mov <temp1=%esi,3064(<ap=%rdi) +mov %esi,3064(%rdi) + +# qhasm: mem64[ap + 2036] = temp2 +# asm 1: mov <temp2=int64#3,2036(<ap=int64#1) +# asm 2: mov <temp2=%edx,2036(<ap=%rdi) +mov %edx,2036(%rdi) + +# qhasm: temp1 = mem64[ap + 2044] +# asm 1: mov 2044(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2044(<ap=%rdi),>temp1=%esi +mov 2044(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4088] +# asm 1: mov 4088(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4088(<ap=%rdi),>temp2=%edx +mov 4088(%rdi),%edx + +# qhasm: mem64[ap + 4088] = temp1 +# asm 1: mov <temp1=int64#2,4088(<ap=int64#1) +# asm 2: mov <temp1=%esi,4088(<ap=%rdi) +mov %esi,4088(%rdi) + +# qhasm: mem64[ap + 2044] = temp2 +# asm 1: mov <temp2=int64#3,2044(<ap=int64#1) +# asm 2: mov <temp2=%edx,2044(<ap=%rdi) +mov %edx,2044(%rdi) + +# qhasm: temp1 = mem64[ap + 2060] +# asm 1: mov 2060(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2060(<ap=%rdi),>temp1=%esi +mov 2060(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3076] +# asm 1: mov 3076(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3076(<ap=%rdi),>temp2=%edx +mov 3076(%rdi),%edx + +# qhasm: mem64[ap + 3076] = temp1 +# asm 1: mov <temp1=int64#2,3076(<ap=int64#1) +# asm 2: mov <temp1=%esi,3076(<ap=%rdi) +mov %esi,3076(%rdi) + +# qhasm: mem64[ap + 2060] = temp2 +# asm 1: mov <temp2=int64#3,2060(<ap=int64#1) +# asm 2: mov <temp2=%edx,2060(<ap=%rdi) +mov %edx,2060(%rdi) + +# qhasm: temp1 = mem64[ap + 2068] +# asm 1: mov 2068(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2068(<ap=%rdi),>temp1=%esi +mov 2068(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2564] +# asm 1: mov 2564(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2564(<ap=%rdi),>temp2=%edx +mov 2564(%rdi),%edx + +# qhasm: mem64[ap + 2564] = temp1 +# asm 1: mov <temp1=int64#2,2564(<ap=int64#1) +# asm 2: mov <temp1=%esi,2564(<ap=%rdi) +mov %esi,2564(%rdi) + +# qhasm: mem64[ap + 2068] = temp2 +# asm 1: mov <temp2=int64#3,2068(<ap=int64#1) +# asm 2: mov <temp2=%edx,2068(<ap=%rdi) +mov %edx,2068(%rdi) + +# qhasm: temp1 = mem64[ap + 2076] +# asm 1: mov 2076(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2076(<ap=%rdi),>temp1=%esi +mov 2076(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3588] +# asm 1: mov 3588(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3588(<ap=%rdi),>temp2=%edx +mov 3588(%rdi),%edx + +# qhasm: mem64[ap + 3588] = temp1 +# asm 1: mov <temp1=int64#2,3588(<ap=int64#1) +# asm 2: mov <temp1=%esi,3588(<ap=%rdi) +mov %esi,3588(%rdi) + +# qhasm: mem64[ap + 2076] = temp2 +# asm 1: mov <temp2=int64#3,2076(<ap=int64#1) +# asm 2: mov <temp2=%edx,2076(<ap=%rdi) +mov %edx,2076(%rdi) + +# qhasm: temp1 = mem64[ap + 2084] +# asm 1: mov 2084(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2084(<ap=%rdi),>temp1=%esi +mov 2084(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2308] +# asm 1: mov 2308(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2308(<ap=%rdi),>temp2=%edx +mov 2308(%rdi),%edx + +# qhasm: mem64[ap + 2308] = temp1 +# asm 1: mov <temp1=int64#2,2308(<ap=int64#1) +# asm 2: mov <temp1=%esi,2308(<ap=%rdi) +mov %esi,2308(%rdi) + +# qhasm: mem64[ap + 2084] = temp2 +# asm 1: mov <temp2=int64#3,2084(<ap=int64#1) +# asm 2: mov <temp2=%edx,2084(<ap=%rdi) +mov %edx,2084(%rdi) + +# qhasm: temp1 = mem64[ap + 2092] +# asm 1: mov 2092(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2092(<ap=%rdi),>temp1=%esi +mov 2092(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3332] +# asm 1: mov 3332(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3332(<ap=%rdi),>temp2=%edx +mov 3332(%rdi),%edx + +# qhasm: mem64[ap + 3332] = temp1 +# asm 1: mov <temp1=int64#2,3332(<ap=int64#1) +# asm 2: mov <temp1=%esi,3332(<ap=%rdi) +mov %esi,3332(%rdi) + +# qhasm: mem64[ap + 2092] = temp2 +# asm 1: mov <temp2=int64#3,2092(<ap=int64#1) +# asm 2: mov <temp2=%edx,2092(<ap=%rdi) +mov %edx,2092(%rdi) + +# qhasm: temp1 = mem64[ap + 2100] +# asm 1: mov 2100(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2100(<ap=%rdi),>temp1=%esi +mov 2100(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2820] +# asm 1: mov 2820(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2820(<ap=%rdi),>temp2=%edx +mov 2820(%rdi),%edx + +# qhasm: mem64[ap + 2820] = temp1 +# asm 1: mov <temp1=int64#2,2820(<ap=int64#1) +# asm 2: mov <temp1=%esi,2820(<ap=%rdi) +mov %esi,2820(%rdi) + +# qhasm: mem64[ap + 2100] = temp2 +# asm 1: mov <temp2=int64#3,2100(<ap=int64#1) +# asm 2: mov <temp2=%edx,2100(<ap=%rdi) +mov %edx,2100(%rdi) + +# qhasm: temp1 = mem64[ap + 2108] +# asm 1: mov 2108(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2108(<ap=%rdi),>temp1=%esi +mov 2108(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3844] +# asm 1: mov 3844(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3844(<ap=%rdi),>temp2=%edx +mov 3844(%rdi),%edx + +# qhasm: mem64[ap + 3844] = temp1 +# asm 1: mov <temp1=int64#2,3844(<ap=int64#1) +# asm 2: mov <temp1=%esi,3844(<ap=%rdi) +mov %esi,3844(%rdi) + +# qhasm: mem64[ap + 2108] = temp2 +# asm 1: mov <temp2=int64#3,2108(<ap=int64#1) +# asm 2: mov <temp2=%edx,2108(<ap=%rdi) +mov %edx,2108(%rdi) + +# qhasm: temp1 = mem64[ap + 2116] +# asm 1: mov 2116(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2116(<ap=%rdi),>temp1=%esi +mov 2116(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2180] +# asm 1: mov 2180(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2180(<ap=%rdi),>temp2=%edx +mov 2180(%rdi),%edx + +# qhasm: mem64[ap + 2180] = temp1 +# asm 1: mov <temp1=int64#2,2180(<ap=int64#1) +# asm 2: mov <temp1=%esi,2180(<ap=%rdi) +mov %esi,2180(%rdi) + +# qhasm: mem64[ap + 2116] = temp2 +# asm 1: mov <temp2=int64#3,2116(<ap=int64#1) +# asm 2: mov <temp2=%edx,2116(<ap=%rdi) +mov %edx,2116(%rdi) + +# qhasm: temp1 = mem64[ap + 2124] +# asm 1: mov 2124(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2124(<ap=%rdi),>temp1=%esi +mov 2124(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3204] +# asm 1: mov 3204(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3204(<ap=%rdi),>temp2=%edx +mov 3204(%rdi),%edx + +# qhasm: mem64[ap + 3204] = temp1 +# asm 1: mov <temp1=int64#2,3204(<ap=int64#1) +# asm 2: mov <temp1=%esi,3204(<ap=%rdi) +mov %esi,3204(%rdi) + +# qhasm: mem64[ap + 2124] = temp2 +# asm 1: mov <temp2=int64#3,2124(<ap=int64#1) +# asm 2: mov <temp2=%edx,2124(<ap=%rdi) +mov %edx,2124(%rdi) + +# qhasm: temp1 = mem64[ap + 2132] +# asm 1: mov 2132(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2132(<ap=%rdi),>temp1=%esi +mov 2132(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2692] +# asm 1: mov 2692(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2692(<ap=%rdi),>temp2=%edx +mov 2692(%rdi),%edx + +# qhasm: mem64[ap + 2692] = temp1 +# asm 1: mov <temp1=int64#2,2692(<ap=int64#1) +# asm 2: mov <temp1=%esi,2692(<ap=%rdi) +mov %esi,2692(%rdi) + +# qhasm: mem64[ap + 2132] = temp2 +# asm 1: mov <temp2=int64#3,2132(<ap=int64#1) +# asm 2: mov <temp2=%edx,2132(<ap=%rdi) +mov %edx,2132(%rdi) + +# qhasm: temp1 = mem64[ap + 2140] +# asm 1: mov 2140(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2140(<ap=%rdi),>temp1=%esi +mov 2140(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3716] +# asm 1: mov 3716(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3716(<ap=%rdi),>temp2=%edx +mov 3716(%rdi),%edx + +# qhasm: mem64[ap + 3716] = temp1 +# asm 1: mov <temp1=int64#2,3716(<ap=int64#1) +# asm 2: mov <temp1=%esi,3716(<ap=%rdi) +mov %esi,3716(%rdi) + +# qhasm: mem64[ap + 2140] = temp2 +# asm 1: mov <temp2=int64#3,2140(<ap=int64#1) +# asm 2: mov <temp2=%edx,2140(<ap=%rdi) +mov %edx,2140(%rdi) + +# qhasm: temp1 = mem64[ap + 2148] +# asm 1: mov 2148(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2148(<ap=%rdi),>temp1=%esi +mov 2148(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2436] +# asm 1: mov 2436(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2436(<ap=%rdi),>temp2=%edx +mov 2436(%rdi),%edx + +# qhasm: mem64[ap + 2436] = temp1 +# asm 1: mov <temp1=int64#2,2436(<ap=int64#1) +# asm 2: mov <temp1=%esi,2436(<ap=%rdi) +mov %esi,2436(%rdi) + +# qhasm: mem64[ap + 2148] = temp2 +# asm 1: mov <temp2=int64#3,2148(<ap=int64#1) +# asm 2: mov <temp2=%edx,2148(<ap=%rdi) +mov %edx,2148(%rdi) + +# qhasm: temp1 = mem64[ap + 2156] +# asm 1: mov 2156(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2156(<ap=%rdi),>temp1=%esi +mov 2156(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3460] +# asm 1: mov 3460(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3460(<ap=%rdi),>temp2=%edx +mov 3460(%rdi),%edx + +# qhasm: mem64[ap + 3460] = temp1 +# asm 1: mov <temp1=int64#2,3460(<ap=int64#1) +# asm 2: mov <temp1=%esi,3460(<ap=%rdi) +mov %esi,3460(%rdi) + +# qhasm: mem64[ap + 2156] = temp2 +# asm 1: mov <temp2=int64#3,2156(<ap=int64#1) +# asm 2: mov <temp2=%edx,2156(<ap=%rdi) +mov %edx,2156(%rdi) + +# qhasm: temp1 = mem64[ap + 2164] +# asm 1: mov 2164(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2164(<ap=%rdi),>temp1=%esi +mov 2164(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2948] +# asm 1: mov 2948(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2948(<ap=%rdi),>temp2=%edx +mov 2948(%rdi),%edx + +# qhasm: mem64[ap + 2948] = temp1 +# asm 1: mov <temp1=int64#2,2948(<ap=int64#1) +# asm 2: mov <temp1=%esi,2948(<ap=%rdi) +mov %esi,2948(%rdi) + +# qhasm: mem64[ap + 2164] = temp2 +# asm 1: mov <temp2=int64#3,2164(<ap=int64#1) +# asm 2: mov <temp2=%edx,2164(<ap=%rdi) +mov %edx,2164(%rdi) + +# qhasm: temp1 = mem64[ap + 2172] +# asm 1: mov 2172(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2172(<ap=%rdi),>temp1=%esi +mov 2172(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3972] +# asm 1: mov 3972(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3972(<ap=%rdi),>temp2=%edx +mov 3972(%rdi),%edx + +# qhasm: mem64[ap + 3972] = temp1 +# asm 1: mov <temp1=int64#2,3972(<ap=int64#1) +# asm 2: mov <temp1=%esi,3972(<ap=%rdi) +mov %esi,3972(%rdi) + +# qhasm: mem64[ap + 2172] = temp2 +# asm 1: mov <temp2=int64#3,2172(<ap=int64#1) +# asm 2: mov <temp2=%edx,2172(<ap=%rdi) +mov %edx,2172(%rdi) + +# qhasm: temp1 = mem64[ap + 2188] +# asm 1: mov 2188(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2188(<ap=%rdi),>temp1=%esi +mov 2188(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3140] +# asm 1: mov 3140(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3140(<ap=%rdi),>temp2=%edx +mov 3140(%rdi),%edx + +# qhasm: mem64[ap + 3140] = temp1 +# asm 1: mov <temp1=int64#2,3140(<ap=int64#1) +# asm 2: mov <temp1=%esi,3140(<ap=%rdi) +mov %esi,3140(%rdi) + +# qhasm: mem64[ap + 2188] = temp2 +# asm 1: mov <temp2=int64#3,2188(<ap=int64#1) +# asm 2: mov <temp2=%edx,2188(<ap=%rdi) +mov %edx,2188(%rdi) + +# qhasm: temp1 = mem64[ap + 2196] +# asm 1: mov 2196(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2196(<ap=%rdi),>temp1=%esi +mov 2196(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2628] +# asm 1: mov 2628(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2628(<ap=%rdi),>temp2=%edx +mov 2628(%rdi),%edx + +# qhasm: mem64[ap + 2628] = temp1 +# asm 1: mov <temp1=int64#2,2628(<ap=int64#1) +# asm 2: mov <temp1=%esi,2628(<ap=%rdi) +mov %esi,2628(%rdi) + +# qhasm: mem64[ap + 2196] = temp2 +# asm 1: mov <temp2=int64#3,2196(<ap=int64#1) +# asm 2: mov <temp2=%edx,2196(<ap=%rdi) +mov %edx,2196(%rdi) + +# qhasm: temp1 = mem64[ap + 2204] +# asm 1: mov 2204(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2204(<ap=%rdi),>temp1=%esi +mov 2204(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3652] +# asm 1: mov 3652(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3652(<ap=%rdi),>temp2=%edx +mov 3652(%rdi),%edx + +# qhasm: mem64[ap + 3652] = temp1 +# asm 1: mov <temp1=int64#2,3652(<ap=int64#1) +# asm 2: mov <temp1=%esi,3652(<ap=%rdi) +mov %esi,3652(%rdi) + +# qhasm: mem64[ap + 2204] = temp2 +# asm 1: mov <temp2=int64#3,2204(<ap=int64#1) +# asm 2: mov <temp2=%edx,2204(<ap=%rdi) +mov %edx,2204(%rdi) + +# qhasm: temp1 = mem64[ap + 2212] +# asm 1: mov 2212(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2212(<ap=%rdi),>temp1=%esi +mov 2212(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2372] +# asm 1: mov 2372(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2372(<ap=%rdi),>temp2=%edx +mov 2372(%rdi),%edx + +# qhasm: mem64[ap + 2372] = temp1 +# asm 1: mov <temp1=int64#2,2372(<ap=int64#1) +# asm 2: mov <temp1=%esi,2372(<ap=%rdi) +mov %esi,2372(%rdi) + +# qhasm: mem64[ap + 2212] = temp2 +# asm 1: mov <temp2=int64#3,2212(<ap=int64#1) +# asm 2: mov <temp2=%edx,2212(<ap=%rdi) +mov %edx,2212(%rdi) + +# qhasm: temp1 = mem64[ap + 2220] +# asm 1: mov 2220(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2220(<ap=%rdi),>temp1=%esi +mov 2220(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3396] +# asm 1: mov 3396(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3396(<ap=%rdi),>temp2=%edx +mov 3396(%rdi),%edx + +# qhasm: mem64[ap + 3396] = temp1 +# asm 1: mov <temp1=int64#2,3396(<ap=int64#1) +# asm 2: mov <temp1=%esi,3396(<ap=%rdi) +mov %esi,3396(%rdi) + +# qhasm: mem64[ap + 2220] = temp2 +# asm 1: mov <temp2=int64#3,2220(<ap=int64#1) +# asm 2: mov <temp2=%edx,2220(<ap=%rdi) +mov %edx,2220(%rdi) + +# qhasm: temp1 = mem64[ap + 2228] +# asm 1: mov 2228(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2228(<ap=%rdi),>temp1=%esi +mov 2228(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2884] +# asm 1: mov 2884(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2884(<ap=%rdi),>temp2=%edx +mov 2884(%rdi),%edx + +# qhasm: mem64[ap + 2884] = temp1 +# asm 1: mov <temp1=int64#2,2884(<ap=int64#1) +# asm 2: mov <temp1=%esi,2884(<ap=%rdi) +mov %esi,2884(%rdi) + +# qhasm: mem64[ap + 2228] = temp2 +# asm 1: mov <temp2=int64#3,2228(<ap=int64#1) +# asm 2: mov <temp2=%edx,2228(<ap=%rdi) +mov %edx,2228(%rdi) + +# qhasm: temp1 = mem64[ap + 2236] +# asm 1: mov 2236(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2236(<ap=%rdi),>temp1=%esi +mov 2236(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3908] +# asm 1: mov 3908(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3908(<ap=%rdi),>temp2=%edx +mov 3908(%rdi),%edx + +# qhasm: mem64[ap + 3908] = temp1 +# asm 1: mov <temp1=int64#2,3908(<ap=int64#1) +# asm 2: mov <temp1=%esi,3908(<ap=%rdi) +mov %esi,3908(%rdi) + +# qhasm: mem64[ap + 2236] = temp2 +# asm 1: mov <temp2=int64#3,2236(<ap=int64#1) +# asm 2: mov <temp2=%edx,2236(<ap=%rdi) +mov %edx,2236(%rdi) + +# qhasm: temp1 = mem64[ap + 2252] +# asm 1: mov 2252(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2252(<ap=%rdi),>temp1=%esi +mov 2252(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3268] +# asm 1: mov 3268(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3268(<ap=%rdi),>temp2=%edx +mov 3268(%rdi),%edx + +# qhasm: mem64[ap + 3268] = temp1 +# asm 1: mov <temp1=int64#2,3268(<ap=int64#1) +# asm 2: mov <temp1=%esi,3268(<ap=%rdi) +mov %esi,3268(%rdi) + +# qhasm: mem64[ap + 2252] = temp2 +# asm 1: mov <temp2=int64#3,2252(<ap=int64#1) +# asm 2: mov <temp2=%edx,2252(<ap=%rdi) +mov %edx,2252(%rdi) + +# qhasm: temp1 = mem64[ap + 2260] +# asm 1: mov 2260(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2260(<ap=%rdi),>temp1=%esi +mov 2260(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2756] +# asm 1: mov 2756(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2756(<ap=%rdi),>temp2=%edx +mov 2756(%rdi),%edx + +# qhasm: mem64[ap + 2756] = temp1 +# asm 1: mov <temp1=int64#2,2756(<ap=int64#1) +# asm 2: mov <temp1=%esi,2756(<ap=%rdi) +mov %esi,2756(%rdi) + +# qhasm: mem64[ap + 2260] = temp2 +# asm 1: mov <temp2=int64#3,2260(<ap=int64#1) +# asm 2: mov <temp2=%edx,2260(<ap=%rdi) +mov %edx,2260(%rdi) + +# qhasm: temp1 = mem64[ap + 2268] +# asm 1: mov 2268(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2268(<ap=%rdi),>temp1=%esi +mov 2268(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3780] +# asm 1: mov 3780(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3780(<ap=%rdi),>temp2=%edx +mov 3780(%rdi),%edx + +# qhasm: mem64[ap + 3780] = temp1 +# asm 1: mov <temp1=int64#2,3780(<ap=int64#1) +# asm 2: mov <temp1=%esi,3780(<ap=%rdi) +mov %esi,3780(%rdi) + +# qhasm: mem64[ap + 2268] = temp2 +# asm 1: mov <temp2=int64#3,2268(<ap=int64#1) +# asm 2: mov <temp2=%edx,2268(<ap=%rdi) +mov %edx,2268(%rdi) + +# qhasm: temp1 = mem64[ap + 2276] +# asm 1: mov 2276(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2276(<ap=%rdi),>temp1=%esi +mov 2276(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2500] +# asm 1: mov 2500(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2500(<ap=%rdi),>temp2=%edx +mov 2500(%rdi),%edx + +# qhasm: mem64[ap + 2500] = temp1 +# asm 1: mov <temp1=int64#2,2500(<ap=int64#1) +# asm 2: mov <temp1=%esi,2500(<ap=%rdi) +mov %esi,2500(%rdi) + +# qhasm: mem64[ap + 2276] = temp2 +# asm 1: mov <temp2=int64#3,2276(<ap=int64#1) +# asm 2: mov <temp2=%edx,2276(<ap=%rdi) +mov %edx,2276(%rdi) + +# qhasm: temp1 = mem64[ap + 2284] +# asm 1: mov 2284(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2284(<ap=%rdi),>temp1=%esi +mov 2284(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3524] +# asm 1: mov 3524(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3524(<ap=%rdi),>temp2=%edx +mov 3524(%rdi),%edx + +# qhasm: mem64[ap + 3524] = temp1 +# asm 1: mov <temp1=int64#2,3524(<ap=int64#1) +# asm 2: mov <temp1=%esi,3524(<ap=%rdi) +mov %esi,3524(%rdi) + +# qhasm: mem64[ap + 2284] = temp2 +# asm 1: mov <temp2=int64#3,2284(<ap=int64#1) +# asm 2: mov <temp2=%edx,2284(<ap=%rdi) +mov %edx,2284(%rdi) + +# qhasm: temp1 = mem64[ap + 2292] +# asm 1: mov 2292(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2292(<ap=%rdi),>temp1=%esi +mov 2292(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3012] +# asm 1: mov 3012(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3012(<ap=%rdi),>temp2=%edx +mov 3012(%rdi),%edx + +# qhasm: mem64[ap + 3012] = temp1 +# asm 1: mov <temp1=int64#2,3012(<ap=int64#1) +# asm 2: mov <temp1=%esi,3012(<ap=%rdi) +mov %esi,3012(%rdi) + +# qhasm: mem64[ap + 2292] = temp2 +# asm 1: mov <temp2=int64#3,2292(<ap=int64#1) +# asm 2: mov <temp2=%edx,2292(<ap=%rdi) +mov %edx,2292(%rdi) + +# qhasm: temp1 = mem64[ap + 2300] +# asm 1: mov 2300(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2300(<ap=%rdi),>temp1=%esi +mov 2300(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4036] +# asm 1: mov 4036(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4036(<ap=%rdi),>temp2=%edx +mov 4036(%rdi),%edx + +# qhasm: mem64[ap + 4036] = temp1 +# asm 1: mov <temp1=int64#2,4036(<ap=int64#1) +# asm 2: mov <temp1=%esi,4036(<ap=%rdi) +mov %esi,4036(%rdi) + +# qhasm: mem64[ap + 2300] = temp2 +# asm 1: mov <temp2=int64#3,2300(<ap=int64#1) +# asm 2: mov <temp2=%edx,2300(<ap=%rdi) +mov %edx,2300(%rdi) + +# qhasm: temp1 = mem64[ap + 2316] +# asm 1: mov 2316(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2316(<ap=%rdi),>temp1=%esi +mov 2316(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3108] +# asm 1: mov 3108(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3108(<ap=%rdi),>temp2=%edx +mov 3108(%rdi),%edx + +# qhasm: mem64[ap + 3108] = temp1 +# asm 1: mov <temp1=int64#2,3108(<ap=int64#1) +# asm 2: mov <temp1=%esi,3108(<ap=%rdi) +mov %esi,3108(%rdi) + +# qhasm: mem64[ap + 2316] = temp2 +# asm 1: mov <temp2=int64#3,2316(<ap=int64#1) +# asm 2: mov <temp2=%edx,2316(<ap=%rdi) +mov %edx,2316(%rdi) + +# qhasm: temp1 = mem64[ap + 2324] +# asm 1: mov 2324(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2324(<ap=%rdi),>temp1=%esi +mov 2324(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2596] +# asm 1: mov 2596(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2596(<ap=%rdi),>temp2=%edx +mov 2596(%rdi),%edx + +# qhasm: mem64[ap + 2596] = temp1 +# asm 1: mov <temp1=int64#2,2596(<ap=int64#1) +# asm 2: mov <temp1=%esi,2596(<ap=%rdi) +mov %esi,2596(%rdi) + +# qhasm: mem64[ap + 2324] = temp2 +# asm 1: mov <temp2=int64#3,2324(<ap=int64#1) +# asm 2: mov <temp2=%edx,2324(<ap=%rdi) +mov %edx,2324(%rdi) + +# qhasm: temp1 = mem64[ap + 2332] +# asm 1: mov 2332(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2332(<ap=%rdi),>temp1=%esi +mov 2332(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3620] +# asm 1: mov 3620(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3620(<ap=%rdi),>temp2=%edx +mov 3620(%rdi),%edx + +# qhasm: mem64[ap + 3620] = temp1 +# asm 1: mov <temp1=int64#2,3620(<ap=int64#1) +# asm 2: mov <temp1=%esi,3620(<ap=%rdi) +mov %esi,3620(%rdi) + +# qhasm: mem64[ap + 2332] = temp2 +# asm 1: mov <temp2=int64#3,2332(<ap=int64#1) +# asm 2: mov <temp2=%edx,2332(<ap=%rdi) +mov %edx,2332(%rdi) + +# qhasm: temp1 = mem64[ap + 2348] +# asm 1: mov 2348(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2348(<ap=%rdi),>temp1=%esi +mov 2348(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3364] +# asm 1: mov 3364(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3364(<ap=%rdi),>temp2=%edx +mov 3364(%rdi),%edx + +# qhasm: mem64[ap + 3364] = temp1 +# asm 1: mov <temp1=int64#2,3364(<ap=int64#1) +# asm 2: mov <temp1=%esi,3364(<ap=%rdi) +mov %esi,3364(%rdi) + +# qhasm: mem64[ap + 2348] = temp2 +# asm 1: mov <temp2=int64#3,2348(<ap=int64#1) +# asm 2: mov <temp2=%edx,2348(<ap=%rdi) +mov %edx,2348(%rdi) + +# qhasm: temp1 = mem64[ap + 2356] +# asm 1: mov 2356(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2356(<ap=%rdi),>temp1=%esi +mov 2356(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2852] +# asm 1: mov 2852(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2852(<ap=%rdi),>temp2=%edx +mov 2852(%rdi),%edx + +# qhasm: mem64[ap + 2852] = temp1 +# asm 1: mov <temp1=int64#2,2852(<ap=int64#1) +# asm 2: mov <temp1=%esi,2852(<ap=%rdi) +mov %esi,2852(%rdi) + +# qhasm: mem64[ap + 2356] = temp2 +# asm 1: mov <temp2=int64#3,2356(<ap=int64#1) +# asm 2: mov <temp2=%edx,2356(<ap=%rdi) +mov %edx,2356(%rdi) + +# qhasm: temp1 = mem64[ap + 2364] +# asm 1: mov 2364(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2364(<ap=%rdi),>temp1=%esi +mov 2364(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3876] +# asm 1: mov 3876(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3876(<ap=%rdi),>temp2=%edx +mov 3876(%rdi),%edx + +# qhasm: mem64[ap + 3876] = temp1 +# asm 1: mov <temp1=int64#2,3876(<ap=int64#1) +# asm 2: mov <temp1=%esi,3876(<ap=%rdi) +mov %esi,3876(%rdi) + +# qhasm: mem64[ap + 2364] = temp2 +# asm 1: mov <temp2=int64#3,2364(<ap=int64#1) +# asm 2: mov <temp2=%edx,2364(<ap=%rdi) +mov %edx,2364(%rdi) + +# qhasm: temp1 = mem64[ap + 2380] +# asm 1: mov 2380(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2380(<ap=%rdi),>temp1=%esi +mov 2380(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3236] +# asm 1: mov 3236(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3236(<ap=%rdi),>temp2=%edx +mov 3236(%rdi),%edx + +# qhasm: mem64[ap + 3236] = temp1 +# asm 1: mov <temp1=int64#2,3236(<ap=int64#1) +# asm 2: mov <temp1=%esi,3236(<ap=%rdi) +mov %esi,3236(%rdi) + +# qhasm: mem64[ap + 2380] = temp2 +# asm 1: mov <temp2=int64#3,2380(<ap=int64#1) +# asm 2: mov <temp2=%edx,2380(<ap=%rdi) +mov %edx,2380(%rdi) + +# qhasm: temp1 = mem64[ap + 2388] +# asm 1: mov 2388(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2388(<ap=%rdi),>temp1=%esi +mov 2388(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2724] +# asm 1: mov 2724(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2724(<ap=%rdi),>temp2=%edx +mov 2724(%rdi),%edx + +# qhasm: mem64[ap + 2724] = temp1 +# asm 1: mov <temp1=int64#2,2724(<ap=int64#1) +# asm 2: mov <temp1=%esi,2724(<ap=%rdi) +mov %esi,2724(%rdi) + +# qhasm: mem64[ap + 2388] = temp2 +# asm 1: mov <temp2=int64#3,2388(<ap=int64#1) +# asm 2: mov <temp2=%edx,2388(<ap=%rdi) +mov %edx,2388(%rdi) + +# qhasm: temp1 = mem64[ap + 2396] +# asm 1: mov 2396(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2396(<ap=%rdi),>temp1=%esi +mov 2396(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3748] +# asm 1: mov 3748(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3748(<ap=%rdi),>temp2=%edx +mov 3748(%rdi),%edx + +# qhasm: mem64[ap + 3748] = temp1 +# asm 1: mov <temp1=int64#2,3748(<ap=int64#1) +# asm 2: mov <temp1=%esi,3748(<ap=%rdi) +mov %esi,3748(%rdi) + +# qhasm: mem64[ap + 2396] = temp2 +# asm 1: mov <temp2=int64#3,2396(<ap=int64#1) +# asm 2: mov <temp2=%edx,2396(<ap=%rdi) +mov %edx,2396(%rdi) + +# qhasm: temp1 = mem64[ap + 2404] +# asm 1: mov 2404(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2404(<ap=%rdi),>temp1=%esi +mov 2404(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2468] +# asm 1: mov 2468(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2468(<ap=%rdi),>temp2=%edx +mov 2468(%rdi),%edx + +# qhasm: mem64[ap + 2468] = temp1 +# asm 1: mov <temp1=int64#2,2468(<ap=int64#1) +# asm 2: mov <temp1=%esi,2468(<ap=%rdi) +mov %esi,2468(%rdi) + +# qhasm: mem64[ap + 2404] = temp2 +# asm 1: mov <temp2=int64#3,2404(<ap=int64#1) +# asm 2: mov <temp2=%edx,2404(<ap=%rdi) +mov %edx,2404(%rdi) + +# qhasm: temp1 = mem64[ap + 2412] +# asm 1: mov 2412(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2412(<ap=%rdi),>temp1=%esi +mov 2412(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3492] +# asm 1: mov 3492(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3492(<ap=%rdi),>temp2=%edx +mov 3492(%rdi),%edx + +# qhasm: mem64[ap + 3492] = temp1 +# asm 1: mov <temp1=int64#2,3492(<ap=int64#1) +# asm 2: mov <temp1=%esi,3492(<ap=%rdi) +mov %esi,3492(%rdi) + +# qhasm: mem64[ap + 2412] = temp2 +# asm 1: mov <temp2=int64#3,2412(<ap=int64#1) +# asm 2: mov <temp2=%edx,2412(<ap=%rdi) +mov %edx,2412(%rdi) + +# qhasm: temp1 = mem64[ap + 2420] +# asm 1: mov 2420(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2420(<ap=%rdi),>temp1=%esi +mov 2420(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2980] +# asm 1: mov 2980(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2980(<ap=%rdi),>temp2=%edx +mov 2980(%rdi),%edx + +# qhasm: mem64[ap + 2980] = temp1 +# asm 1: mov <temp1=int64#2,2980(<ap=int64#1) +# asm 2: mov <temp1=%esi,2980(<ap=%rdi) +mov %esi,2980(%rdi) + +# qhasm: mem64[ap + 2420] = temp2 +# asm 1: mov <temp2=int64#3,2420(<ap=int64#1) +# asm 2: mov <temp2=%edx,2420(<ap=%rdi) +mov %edx,2420(%rdi) + +# qhasm: temp1 = mem64[ap + 2428] +# asm 1: mov 2428(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2428(<ap=%rdi),>temp1=%esi +mov 2428(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4004] +# asm 1: mov 4004(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4004(<ap=%rdi),>temp2=%edx +mov 4004(%rdi),%edx + +# qhasm: mem64[ap + 4004] = temp1 +# asm 1: mov <temp1=int64#2,4004(<ap=int64#1) +# asm 2: mov <temp1=%esi,4004(<ap=%rdi) +mov %esi,4004(%rdi) + +# qhasm: mem64[ap + 2428] = temp2 +# asm 1: mov <temp2=int64#3,2428(<ap=int64#1) +# asm 2: mov <temp2=%edx,2428(<ap=%rdi) +mov %edx,2428(%rdi) + +# qhasm: temp1 = mem64[ap + 2444] +# asm 1: mov 2444(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2444(<ap=%rdi),>temp1=%esi +mov 2444(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3172] +# asm 1: mov 3172(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3172(<ap=%rdi),>temp2=%edx +mov 3172(%rdi),%edx + +# qhasm: mem64[ap + 3172] = temp1 +# asm 1: mov <temp1=int64#2,3172(<ap=int64#1) +# asm 2: mov <temp1=%esi,3172(<ap=%rdi) +mov %esi,3172(%rdi) + +# qhasm: mem64[ap + 2444] = temp2 +# asm 1: mov <temp2=int64#3,2444(<ap=int64#1) +# asm 2: mov <temp2=%edx,2444(<ap=%rdi) +mov %edx,2444(%rdi) + +# qhasm: temp1 = mem64[ap + 2452] +# asm 1: mov 2452(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2452(<ap=%rdi),>temp1=%esi +mov 2452(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2660] +# asm 1: mov 2660(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2660(<ap=%rdi),>temp2=%edx +mov 2660(%rdi),%edx + +# qhasm: mem64[ap + 2660] = temp1 +# asm 1: mov <temp1=int64#2,2660(<ap=int64#1) +# asm 2: mov <temp1=%esi,2660(<ap=%rdi) +mov %esi,2660(%rdi) + +# qhasm: mem64[ap + 2452] = temp2 +# asm 1: mov <temp2=int64#3,2452(<ap=int64#1) +# asm 2: mov <temp2=%edx,2452(<ap=%rdi) +mov %edx,2452(%rdi) + +# qhasm: temp1 = mem64[ap + 2460] +# asm 1: mov 2460(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2460(<ap=%rdi),>temp1=%esi +mov 2460(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3684] +# asm 1: mov 3684(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3684(<ap=%rdi),>temp2=%edx +mov 3684(%rdi),%edx + +# qhasm: mem64[ap + 3684] = temp1 +# asm 1: mov <temp1=int64#2,3684(<ap=int64#1) +# asm 2: mov <temp1=%esi,3684(<ap=%rdi) +mov %esi,3684(%rdi) + +# qhasm: mem64[ap + 2460] = temp2 +# asm 1: mov <temp2=int64#3,2460(<ap=int64#1) +# asm 2: mov <temp2=%edx,2460(<ap=%rdi) +mov %edx,2460(%rdi) + +# qhasm: temp1 = mem64[ap + 2476] +# asm 1: mov 2476(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2476(<ap=%rdi),>temp1=%esi +mov 2476(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3428] +# asm 1: mov 3428(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3428(<ap=%rdi),>temp2=%edx +mov 3428(%rdi),%edx + +# qhasm: mem64[ap + 3428] = temp1 +# asm 1: mov <temp1=int64#2,3428(<ap=int64#1) +# asm 2: mov <temp1=%esi,3428(<ap=%rdi) +mov %esi,3428(%rdi) + +# qhasm: mem64[ap + 2476] = temp2 +# asm 1: mov <temp2=int64#3,2476(<ap=int64#1) +# asm 2: mov <temp2=%edx,2476(<ap=%rdi) +mov %edx,2476(%rdi) + +# qhasm: temp1 = mem64[ap + 2484] +# asm 1: mov 2484(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2484(<ap=%rdi),>temp1=%esi +mov 2484(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2916] +# asm 1: mov 2916(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2916(<ap=%rdi),>temp2=%edx +mov 2916(%rdi),%edx + +# qhasm: mem64[ap + 2916] = temp1 +# asm 1: mov <temp1=int64#2,2916(<ap=int64#1) +# asm 2: mov <temp1=%esi,2916(<ap=%rdi) +mov %esi,2916(%rdi) + +# qhasm: mem64[ap + 2484] = temp2 +# asm 1: mov <temp2=int64#3,2484(<ap=int64#1) +# asm 2: mov <temp2=%edx,2484(<ap=%rdi) +mov %edx,2484(%rdi) + +# qhasm: temp1 = mem64[ap + 2492] +# asm 1: mov 2492(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2492(<ap=%rdi),>temp1=%esi +mov 2492(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3940] +# asm 1: mov 3940(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3940(<ap=%rdi),>temp2=%edx +mov 3940(%rdi),%edx + +# qhasm: mem64[ap + 3940] = temp1 +# asm 1: mov <temp1=int64#2,3940(<ap=int64#1) +# asm 2: mov <temp1=%esi,3940(<ap=%rdi) +mov %esi,3940(%rdi) + +# qhasm: mem64[ap + 2492] = temp2 +# asm 1: mov <temp2=int64#3,2492(<ap=int64#1) +# asm 2: mov <temp2=%edx,2492(<ap=%rdi) +mov %edx,2492(%rdi) + +# qhasm: temp1 = mem64[ap + 2508] +# asm 1: mov 2508(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2508(<ap=%rdi),>temp1=%esi +mov 2508(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3300] +# asm 1: mov 3300(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3300(<ap=%rdi),>temp2=%edx +mov 3300(%rdi),%edx + +# qhasm: mem64[ap + 3300] = temp1 +# asm 1: mov <temp1=int64#2,3300(<ap=int64#1) +# asm 2: mov <temp1=%esi,3300(<ap=%rdi) +mov %esi,3300(%rdi) + +# qhasm: mem64[ap + 2508] = temp2 +# asm 1: mov <temp2=int64#3,2508(<ap=int64#1) +# asm 2: mov <temp2=%edx,2508(<ap=%rdi) +mov %edx,2508(%rdi) + +# qhasm: temp1 = mem64[ap + 2516] +# asm 1: mov 2516(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2516(<ap=%rdi),>temp1=%esi +mov 2516(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2788] +# asm 1: mov 2788(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2788(<ap=%rdi),>temp2=%edx +mov 2788(%rdi),%edx + +# qhasm: mem64[ap + 2788] = temp1 +# asm 1: mov <temp1=int64#2,2788(<ap=int64#1) +# asm 2: mov <temp1=%esi,2788(<ap=%rdi) +mov %esi,2788(%rdi) + +# qhasm: mem64[ap + 2516] = temp2 +# asm 1: mov <temp2=int64#3,2516(<ap=int64#1) +# asm 2: mov <temp2=%edx,2516(<ap=%rdi) +mov %edx,2516(%rdi) + +# qhasm: temp1 = mem64[ap + 2524] +# asm 1: mov 2524(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2524(<ap=%rdi),>temp1=%esi +mov 2524(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3812] +# asm 1: mov 3812(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3812(<ap=%rdi),>temp2=%edx +mov 3812(%rdi),%edx + +# qhasm: mem64[ap + 3812] = temp1 +# asm 1: mov <temp1=int64#2,3812(<ap=int64#1) +# asm 2: mov <temp1=%esi,3812(<ap=%rdi) +mov %esi,3812(%rdi) + +# qhasm: mem64[ap + 2524] = temp2 +# asm 1: mov <temp2=int64#3,2524(<ap=int64#1) +# asm 2: mov <temp2=%edx,2524(<ap=%rdi) +mov %edx,2524(%rdi) + +# qhasm: temp1 = mem64[ap + 2540] +# asm 1: mov 2540(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2540(<ap=%rdi),>temp1=%esi +mov 2540(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3556] +# asm 1: mov 3556(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3556(<ap=%rdi),>temp2=%edx +mov 3556(%rdi),%edx + +# qhasm: mem64[ap + 3556] = temp1 +# asm 1: mov <temp1=int64#2,3556(<ap=int64#1) +# asm 2: mov <temp1=%esi,3556(<ap=%rdi) +mov %esi,3556(%rdi) + +# qhasm: mem64[ap + 2540] = temp2 +# asm 1: mov <temp2=int64#3,2540(<ap=int64#1) +# asm 2: mov <temp2=%edx,2540(<ap=%rdi) +mov %edx,2540(%rdi) + +# qhasm: temp1 = mem64[ap + 2548] +# asm 1: mov 2548(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2548(<ap=%rdi),>temp1=%esi +mov 2548(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3044] +# asm 1: mov 3044(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3044(<ap=%rdi),>temp2=%edx +mov 3044(%rdi),%edx + +# qhasm: mem64[ap + 3044] = temp1 +# asm 1: mov <temp1=int64#2,3044(<ap=int64#1) +# asm 2: mov <temp1=%esi,3044(<ap=%rdi) +mov %esi,3044(%rdi) + +# qhasm: mem64[ap + 2548] = temp2 +# asm 1: mov <temp2=int64#3,2548(<ap=int64#1) +# asm 2: mov <temp2=%edx,2548(<ap=%rdi) +mov %edx,2548(%rdi) + +# qhasm: temp1 = mem64[ap + 2556] +# asm 1: mov 2556(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2556(<ap=%rdi),>temp1=%esi +mov 2556(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4068] +# asm 1: mov 4068(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4068(<ap=%rdi),>temp2=%edx +mov 4068(%rdi),%edx + +# qhasm: mem64[ap + 4068] = temp1 +# asm 1: mov <temp1=int64#2,4068(<ap=int64#1) +# asm 2: mov <temp1=%esi,4068(<ap=%rdi) +mov %esi,4068(%rdi) + +# qhasm: mem64[ap + 2556] = temp2 +# asm 1: mov <temp2=int64#3,2556(<ap=int64#1) +# asm 2: mov <temp2=%edx,2556(<ap=%rdi) +mov %edx,2556(%rdi) + +# qhasm: temp1 = mem64[ap + 2572] +# asm 1: mov 2572(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2572(<ap=%rdi),>temp1=%esi +mov 2572(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3092] +# asm 1: mov 3092(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3092(<ap=%rdi),>temp2=%edx +mov 3092(%rdi),%edx + +# qhasm: mem64[ap + 3092] = temp1 +# asm 1: mov <temp1=int64#2,3092(<ap=int64#1) +# asm 2: mov <temp1=%esi,3092(<ap=%rdi) +mov %esi,3092(%rdi) + +# qhasm: mem64[ap + 2572] = temp2 +# asm 1: mov <temp2=int64#3,2572(<ap=int64#1) +# asm 2: mov <temp2=%edx,2572(<ap=%rdi) +mov %edx,2572(%rdi) + +# qhasm: temp1 = mem64[ap + 2588] +# asm 1: mov 2588(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2588(<ap=%rdi),>temp1=%esi +mov 2588(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3604] +# asm 1: mov 3604(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3604(<ap=%rdi),>temp2=%edx +mov 3604(%rdi),%edx + +# qhasm: mem64[ap + 3604] = temp1 +# asm 1: mov <temp1=int64#2,3604(<ap=int64#1) +# asm 2: mov <temp1=%esi,3604(<ap=%rdi) +mov %esi,3604(%rdi) + +# qhasm: mem64[ap + 2588] = temp2 +# asm 1: mov <temp2=int64#3,2588(<ap=int64#1) +# asm 2: mov <temp2=%edx,2588(<ap=%rdi) +mov %edx,2588(%rdi) + +# qhasm: temp1 = mem64[ap + 2604] +# asm 1: mov 2604(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2604(<ap=%rdi),>temp1=%esi +mov 2604(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3348] +# asm 1: mov 3348(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3348(<ap=%rdi),>temp2=%edx +mov 3348(%rdi),%edx + +# qhasm: mem64[ap + 3348] = temp1 +# asm 1: mov <temp1=int64#2,3348(<ap=int64#1) +# asm 2: mov <temp1=%esi,3348(<ap=%rdi) +mov %esi,3348(%rdi) + +# qhasm: mem64[ap + 2604] = temp2 +# asm 1: mov <temp2=int64#3,2604(<ap=int64#1) +# asm 2: mov <temp2=%edx,2604(<ap=%rdi) +mov %edx,2604(%rdi) + +# qhasm: temp1 = mem64[ap + 2612] +# asm 1: mov 2612(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2612(<ap=%rdi),>temp1=%esi +mov 2612(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2836] +# asm 1: mov 2836(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2836(<ap=%rdi),>temp2=%edx +mov 2836(%rdi),%edx + +# qhasm: mem64[ap + 2836] = temp1 +# asm 1: mov <temp1=int64#2,2836(<ap=int64#1) +# asm 2: mov <temp1=%esi,2836(<ap=%rdi) +mov %esi,2836(%rdi) + +# qhasm: mem64[ap + 2612] = temp2 +# asm 1: mov <temp2=int64#3,2612(<ap=int64#1) +# asm 2: mov <temp2=%edx,2612(<ap=%rdi) +mov %edx,2612(%rdi) + +# qhasm: temp1 = mem64[ap + 2620] +# asm 1: mov 2620(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2620(<ap=%rdi),>temp1=%esi +mov 2620(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3860] +# asm 1: mov 3860(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3860(<ap=%rdi),>temp2=%edx +mov 3860(%rdi),%edx + +# qhasm: mem64[ap + 3860] = temp1 +# asm 1: mov <temp1=int64#2,3860(<ap=int64#1) +# asm 2: mov <temp1=%esi,3860(<ap=%rdi) +mov %esi,3860(%rdi) + +# qhasm: mem64[ap + 2620] = temp2 +# asm 1: mov <temp2=int64#3,2620(<ap=int64#1) +# asm 2: mov <temp2=%edx,2620(<ap=%rdi) +mov %edx,2620(%rdi) + +# qhasm: temp1 = mem64[ap + 2636] +# asm 1: mov 2636(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2636(<ap=%rdi),>temp1=%esi +mov 2636(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3220] +# asm 1: mov 3220(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3220(<ap=%rdi),>temp2=%edx +mov 3220(%rdi),%edx + +# qhasm: mem64[ap + 3220] = temp1 +# asm 1: mov <temp1=int64#2,3220(<ap=int64#1) +# asm 2: mov <temp1=%esi,3220(<ap=%rdi) +mov %esi,3220(%rdi) + +# qhasm: mem64[ap + 2636] = temp2 +# asm 1: mov <temp2=int64#3,2636(<ap=int64#1) +# asm 2: mov <temp2=%edx,2636(<ap=%rdi) +mov %edx,2636(%rdi) + +# qhasm: temp1 = mem64[ap + 2644] +# asm 1: mov 2644(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2644(<ap=%rdi),>temp1=%esi +mov 2644(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2708] +# asm 1: mov 2708(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2708(<ap=%rdi),>temp2=%edx +mov 2708(%rdi),%edx + +# qhasm: mem64[ap + 2708] = temp1 +# asm 1: mov <temp1=int64#2,2708(<ap=int64#1) +# asm 2: mov <temp1=%esi,2708(<ap=%rdi) +mov %esi,2708(%rdi) + +# qhasm: mem64[ap + 2644] = temp2 +# asm 1: mov <temp2=int64#3,2644(<ap=int64#1) +# asm 2: mov <temp2=%edx,2644(<ap=%rdi) +mov %edx,2644(%rdi) + +# qhasm: temp1 = mem64[ap + 2652] +# asm 1: mov 2652(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2652(<ap=%rdi),>temp1=%esi +mov 2652(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3732] +# asm 1: mov 3732(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3732(<ap=%rdi),>temp2=%edx +mov 3732(%rdi),%edx + +# qhasm: mem64[ap + 3732] = temp1 +# asm 1: mov <temp1=int64#2,3732(<ap=int64#1) +# asm 2: mov <temp1=%esi,3732(<ap=%rdi) +mov %esi,3732(%rdi) + +# qhasm: mem64[ap + 2652] = temp2 +# asm 1: mov <temp2=int64#3,2652(<ap=int64#1) +# asm 2: mov <temp2=%edx,2652(<ap=%rdi) +mov %edx,2652(%rdi) + +# qhasm: temp1 = mem64[ap + 2668] +# asm 1: mov 2668(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2668(<ap=%rdi),>temp1=%esi +mov 2668(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3476] +# asm 1: mov 3476(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3476(<ap=%rdi),>temp2=%edx +mov 3476(%rdi),%edx + +# qhasm: mem64[ap + 3476] = temp1 +# asm 1: mov <temp1=int64#2,3476(<ap=int64#1) +# asm 2: mov <temp1=%esi,3476(<ap=%rdi) +mov %esi,3476(%rdi) + +# qhasm: mem64[ap + 2668] = temp2 +# asm 1: mov <temp2=int64#3,2668(<ap=int64#1) +# asm 2: mov <temp2=%edx,2668(<ap=%rdi) +mov %edx,2668(%rdi) + +# qhasm: temp1 = mem64[ap + 2676] +# asm 1: mov 2676(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2676(<ap=%rdi),>temp1=%esi +mov 2676(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2964] +# asm 1: mov 2964(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2964(<ap=%rdi),>temp2=%edx +mov 2964(%rdi),%edx + +# qhasm: mem64[ap + 2964] = temp1 +# asm 1: mov <temp1=int64#2,2964(<ap=int64#1) +# asm 2: mov <temp1=%esi,2964(<ap=%rdi) +mov %esi,2964(%rdi) + +# qhasm: mem64[ap + 2676] = temp2 +# asm 1: mov <temp2=int64#3,2676(<ap=int64#1) +# asm 2: mov <temp2=%edx,2676(<ap=%rdi) +mov %edx,2676(%rdi) + +# qhasm: temp1 = mem64[ap + 2684] +# asm 1: mov 2684(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2684(<ap=%rdi),>temp1=%esi +mov 2684(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3988] +# asm 1: mov 3988(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3988(<ap=%rdi),>temp2=%edx +mov 3988(%rdi),%edx + +# qhasm: mem64[ap + 3988] = temp1 +# asm 1: mov <temp1=int64#2,3988(<ap=int64#1) +# asm 2: mov <temp1=%esi,3988(<ap=%rdi) +mov %esi,3988(%rdi) + +# qhasm: mem64[ap + 2684] = temp2 +# asm 1: mov <temp2=int64#3,2684(<ap=int64#1) +# asm 2: mov <temp2=%edx,2684(<ap=%rdi) +mov %edx,2684(%rdi) + +# qhasm: temp1 = mem64[ap + 2700] +# asm 1: mov 2700(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2700(<ap=%rdi),>temp1=%esi +mov 2700(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3156] +# asm 1: mov 3156(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3156(<ap=%rdi),>temp2=%edx +mov 3156(%rdi),%edx + +# qhasm: mem64[ap + 3156] = temp1 +# asm 1: mov <temp1=int64#2,3156(<ap=int64#1) +# asm 2: mov <temp1=%esi,3156(<ap=%rdi) +mov %esi,3156(%rdi) + +# qhasm: mem64[ap + 2700] = temp2 +# asm 1: mov <temp2=int64#3,2700(<ap=int64#1) +# asm 2: mov <temp2=%edx,2700(<ap=%rdi) +mov %edx,2700(%rdi) + +# qhasm: temp1 = mem64[ap + 2716] +# asm 1: mov 2716(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2716(<ap=%rdi),>temp1=%esi +mov 2716(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3668] +# asm 1: mov 3668(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3668(<ap=%rdi),>temp2=%edx +mov 3668(%rdi),%edx + +# qhasm: mem64[ap + 3668] = temp1 +# asm 1: mov <temp1=int64#2,3668(<ap=int64#1) +# asm 2: mov <temp1=%esi,3668(<ap=%rdi) +mov %esi,3668(%rdi) + +# qhasm: mem64[ap + 2716] = temp2 +# asm 1: mov <temp2=int64#3,2716(<ap=int64#1) +# asm 2: mov <temp2=%edx,2716(<ap=%rdi) +mov %edx,2716(%rdi) + +# qhasm: temp1 = mem64[ap + 2732] +# asm 1: mov 2732(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2732(<ap=%rdi),>temp1=%esi +mov 2732(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3412] +# asm 1: mov 3412(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3412(<ap=%rdi),>temp2=%edx +mov 3412(%rdi),%edx + +# qhasm: mem64[ap + 3412] = temp1 +# asm 1: mov <temp1=int64#2,3412(<ap=int64#1) +# asm 2: mov <temp1=%esi,3412(<ap=%rdi) +mov %esi,3412(%rdi) + +# qhasm: mem64[ap + 2732] = temp2 +# asm 1: mov <temp2=int64#3,2732(<ap=int64#1) +# asm 2: mov <temp2=%edx,2732(<ap=%rdi) +mov %edx,2732(%rdi) + +# qhasm: temp1 = mem64[ap + 2740] +# asm 1: mov 2740(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2740(<ap=%rdi),>temp1=%esi +mov 2740(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2900] +# asm 1: mov 2900(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2900(<ap=%rdi),>temp2=%edx +mov 2900(%rdi),%edx + +# qhasm: mem64[ap + 2900] = temp1 +# asm 1: mov <temp1=int64#2,2900(<ap=int64#1) +# asm 2: mov <temp1=%esi,2900(<ap=%rdi) +mov %esi,2900(%rdi) + +# qhasm: mem64[ap + 2740] = temp2 +# asm 1: mov <temp2=int64#3,2740(<ap=int64#1) +# asm 2: mov <temp2=%edx,2740(<ap=%rdi) +mov %edx,2740(%rdi) + +# qhasm: temp1 = mem64[ap + 2748] +# asm 1: mov 2748(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2748(<ap=%rdi),>temp1=%esi +mov 2748(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3924] +# asm 1: mov 3924(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3924(<ap=%rdi),>temp2=%edx +mov 3924(%rdi),%edx + +# qhasm: mem64[ap + 3924] = temp1 +# asm 1: mov <temp1=int64#2,3924(<ap=int64#1) +# asm 2: mov <temp1=%esi,3924(<ap=%rdi) +mov %esi,3924(%rdi) + +# qhasm: mem64[ap + 2748] = temp2 +# asm 1: mov <temp2=int64#3,2748(<ap=int64#1) +# asm 2: mov <temp2=%edx,2748(<ap=%rdi) +mov %edx,2748(%rdi) + +# qhasm: temp1 = mem64[ap + 2764] +# asm 1: mov 2764(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2764(<ap=%rdi),>temp1=%esi +mov 2764(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3284] +# asm 1: mov 3284(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3284(<ap=%rdi),>temp2=%edx +mov 3284(%rdi),%edx + +# qhasm: mem64[ap + 3284] = temp1 +# asm 1: mov <temp1=int64#2,3284(<ap=int64#1) +# asm 2: mov <temp1=%esi,3284(<ap=%rdi) +mov %esi,3284(%rdi) + +# qhasm: mem64[ap + 2764] = temp2 +# asm 1: mov <temp2=int64#3,2764(<ap=int64#1) +# asm 2: mov <temp2=%edx,2764(<ap=%rdi) +mov %edx,2764(%rdi) + +# qhasm: temp1 = mem64[ap + 2780] +# asm 1: mov 2780(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2780(<ap=%rdi),>temp1=%esi +mov 2780(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3796] +# asm 1: mov 3796(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3796(<ap=%rdi),>temp2=%edx +mov 3796(%rdi),%edx + +# qhasm: mem64[ap + 3796] = temp1 +# asm 1: mov <temp1=int64#2,3796(<ap=int64#1) +# asm 2: mov <temp1=%esi,3796(<ap=%rdi) +mov %esi,3796(%rdi) + +# qhasm: mem64[ap + 2780] = temp2 +# asm 1: mov <temp2=int64#3,2780(<ap=int64#1) +# asm 2: mov <temp2=%edx,2780(<ap=%rdi) +mov %edx,2780(%rdi) + +# qhasm: temp1 = mem64[ap + 2796] +# asm 1: mov 2796(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2796(<ap=%rdi),>temp1=%esi +mov 2796(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3540] +# asm 1: mov 3540(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3540(<ap=%rdi),>temp2=%edx +mov 3540(%rdi),%edx + +# qhasm: mem64[ap + 3540] = temp1 +# asm 1: mov <temp1=int64#2,3540(<ap=int64#1) +# asm 2: mov <temp1=%esi,3540(<ap=%rdi) +mov %esi,3540(%rdi) + +# qhasm: mem64[ap + 2796] = temp2 +# asm 1: mov <temp2=int64#3,2796(<ap=int64#1) +# asm 2: mov <temp2=%edx,2796(<ap=%rdi) +mov %edx,2796(%rdi) + +# qhasm: temp1 = mem64[ap + 2804] +# asm 1: mov 2804(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2804(<ap=%rdi),>temp1=%esi +mov 2804(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3028] +# asm 1: mov 3028(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3028(<ap=%rdi),>temp2=%edx +mov 3028(%rdi),%edx + +# qhasm: mem64[ap + 3028] = temp1 +# asm 1: mov <temp1=int64#2,3028(<ap=int64#1) +# asm 2: mov <temp1=%esi,3028(<ap=%rdi) +mov %esi,3028(%rdi) + +# qhasm: mem64[ap + 2804] = temp2 +# asm 1: mov <temp2=int64#3,2804(<ap=int64#1) +# asm 2: mov <temp2=%edx,2804(<ap=%rdi) +mov %edx,2804(%rdi) + +# qhasm: temp1 = mem64[ap + 2812] +# asm 1: mov 2812(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2812(<ap=%rdi),>temp1=%esi +mov 2812(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4052] +# asm 1: mov 4052(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4052(<ap=%rdi),>temp2=%edx +mov 4052(%rdi),%edx + +# qhasm: mem64[ap + 4052] = temp1 +# asm 1: mov <temp1=int64#2,4052(<ap=int64#1) +# asm 2: mov <temp1=%esi,4052(<ap=%rdi) +mov %esi,4052(%rdi) + +# qhasm: mem64[ap + 2812] = temp2 +# asm 1: mov <temp2=int64#3,2812(<ap=int64#1) +# asm 2: mov <temp2=%edx,2812(<ap=%rdi) +mov %edx,2812(%rdi) + +# qhasm: temp1 = mem64[ap + 2828] +# asm 1: mov 2828(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2828(<ap=%rdi),>temp1=%esi +mov 2828(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3124] +# asm 1: mov 3124(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3124(<ap=%rdi),>temp2=%edx +mov 3124(%rdi),%edx + +# qhasm: mem64[ap + 3124] = temp1 +# asm 1: mov <temp1=int64#2,3124(<ap=int64#1) +# asm 2: mov <temp1=%esi,3124(<ap=%rdi) +mov %esi,3124(%rdi) + +# qhasm: mem64[ap + 2828] = temp2 +# asm 1: mov <temp2=int64#3,2828(<ap=int64#1) +# asm 2: mov <temp2=%edx,2828(<ap=%rdi) +mov %edx,2828(%rdi) + +# qhasm: temp1 = mem64[ap + 2844] +# asm 1: mov 2844(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2844(<ap=%rdi),>temp1=%esi +mov 2844(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3636] +# asm 1: mov 3636(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3636(<ap=%rdi),>temp2=%edx +mov 3636(%rdi),%edx + +# qhasm: mem64[ap + 3636] = temp1 +# asm 1: mov <temp1=int64#2,3636(<ap=int64#1) +# asm 2: mov <temp1=%esi,3636(<ap=%rdi) +mov %esi,3636(%rdi) + +# qhasm: mem64[ap + 2844] = temp2 +# asm 1: mov <temp2=int64#3,2844(<ap=int64#1) +# asm 2: mov <temp2=%edx,2844(<ap=%rdi) +mov %edx,2844(%rdi) + +# qhasm: temp1 = mem64[ap + 2860] +# asm 1: mov 2860(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2860(<ap=%rdi),>temp1=%esi +mov 2860(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3380] +# asm 1: mov 3380(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3380(<ap=%rdi),>temp2=%edx +mov 3380(%rdi),%edx + +# qhasm: mem64[ap + 3380] = temp1 +# asm 1: mov <temp1=int64#2,3380(<ap=int64#1) +# asm 2: mov <temp1=%esi,3380(<ap=%rdi) +mov %esi,3380(%rdi) + +# qhasm: mem64[ap + 2860] = temp2 +# asm 1: mov <temp2=int64#3,2860(<ap=int64#1) +# asm 2: mov <temp2=%edx,2860(<ap=%rdi) +mov %edx,2860(%rdi) + +# qhasm: temp1 = mem64[ap + 2876] +# asm 1: mov 2876(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2876(<ap=%rdi),>temp1=%esi +mov 2876(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3892] +# asm 1: mov 3892(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3892(<ap=%rdi),>temp2=%edx +mov 3892(%rdi),%edx + +# qhasm: mem64[ap + 3892] = temp1 +# asm 1: mov <temp1=int64#2,3892(<ap=int64#1) +# asm 2: mov <temp1=%esi,3892(<ap=%rdi) +mov %esi,3892(%rdi) + +# qhasm: mem64[ap + 2876] = temp2 +# asm 1: mov <temp2=int64#3,2876(<ap=int64#1) +# asm 2: mov <temp2=%edx,2876(<ap=%rdi) +mov %edx,2876(%rdi) + +# qhasm: temp1 = mem64[ap + 2892] +# asm 1: mov 2892(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2892(<ap=%rdi),>temp1=%esi +mov 2892(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3252] +# asm 1: mov 3252(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3252(<ap=%rdi),>temp2=%edx +mov 3252(%rdi),%edx + +# qhasm: mem64[ap + 3252] = temp1 +# asm 1: mov <temp1=int64#2,3252(<ap=int64#1) +# asm 2: mov <temp1=%esi,3252(<ap=%rdi) +mov %esi,3252(%rdi) + +# qhasm: mem64[ap + 2892] = temp2 +# asm 1: mov <temp2=int64#3,2892(<ap=int64#1) +# asm 2: mov <temp2=%edx,2892(<ap=%rdi) +mov %edx,2892(%rdi) + +# qhasm: temp1 = mem64[ap + 2908] +# asm 1: mov 2908(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2908(<ap=%rdi),>temp1=%esi +mov 2908(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3764] +# asm 1: mov 3764(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3764(<ap=%rdi),>temp2=%edx +mov 3764(%rdi),%edx + +# qhasm: mem64[ap + 3764] = temp1 +# asm 1: mov <temp1=int64#2,3764(<ap=int64#1) +# asm 2: mov <temp1=%esi,3764(<ap=%rdi) +mov %esi,3764(%rdi) + +# qhasm: mem64[ap + 2908] = temp2 +# asm 1: mov <temp2=int64#3,2908(<ap=int64#1) +# asm 2: mov <temp2=%edx,2908(<ap=%rdi) +mov %edx,2908(%rdi) + +# qhasm: temp1 = mem64[ap + 2924] +# asm 1: mov 2924(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2924(<ap=%rdi),>temp1=%esi +mov 2924(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3508] +# asm 1: mov 3508(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3508(<ap=%rdi),>temp2=%edx +mov 3508(%rdi),%edx + +# qhasm: mem64[ap + 3508] = temp1 +# asm 1: mov <temp1=int64#2,3508(<ap=int64#1) +# asm 2: mov <temp1=%esi,3508(<ap=%rdi) +mov %esi,3508(%rdi) + +# qhasm: mem64[ap + 2924] = temp2 +# asm 1: mov <temp2=int64#3,2924(<ap=int64#1) +# asm 2: mov <temp2=%edx,2924(<ap=%rdi) +mov %edx,2924(%rdi) + +# qhasm: temp1 = mem64[ap + 2932] +# asm 1: mov 2932(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2932(<ap=%rdi),>temp1=%esi +mov 2932(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 2996] +# asm 1: mov 2996(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 2996(<ap=%rdi),>temp2=%edx +mov 2996(%rdi),%edx + +# qhasm: mem64[ap + 2996] = temp1 +# asm 1: mov <temp1=int64#2,2996(<ap=int64#1) +# asm 2: mov <temp1=%esi,2996(<ap=%rdi) +mov %esi,2996(%rdi) + +# qhasm: mem64[ap + 2932] = temp2 +# asm 1: mov <temp2=int64#3,2932(<ap=int64#1) +# asm 2: mov <temp2=%edx,2932(<ap=%rdi) +mov %edx,2932(%rdi) + +# qhasm: temp1 = mem64[ap + 2940] +# asm 1: mov 2940(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2940(<ap=%rdi),>temp1=%esi +mov 2940(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4020] +# asm 1: mov 4020(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4020(<ap=%rdi),>temp2=%edx +mov 4020(%rdi),%edx + +# qhasm: mem64[ap + 4020] = temp1 +# asm 1: mov <temp1=int64#2,4020(<ap=int64#1) +# asm 2: mov <temp1=%esi,4020(<ap=%rdi) +mov %esi,4020(%rdi) + +# qhasm: mem64[ap + 2940] = temp2 +# asm 1: mov <temp2=int64#3,2940(<ap=int64#1) +# asm 2: mov <temp2=%edx,2940(<ap=%rdi) +mov %edx,2940(%rdi) + +# qhasm: temp1 = mem64[ap + 2956] +# asm 1: mov 2956(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2956(<ap=%rdi),>temp1=%esi +mov 2956(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3188] +# asm 1: mov 3188(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3188(<ap=%rdi),>temp2=%edx +mov 3188(%rdi),%edx + +# qhasm: mem64[ap + 3188] = temp1 +# asm 1: mov <temp1=int64#2,3188(<ap=int64#1) +# asm 2: mov <temp1=%esi,3188(<ap=%rdi) +mov %esi,3188(%rdi) + +# qhasm: mem64[ap + 2956] = temp2 +# asm 1: mov <temp2=int64#3,2956(<ap=int64#1) +# asm 2: mov <temp2=%edx,2956(<ap=%rdi) +mov %edx,2956(%rdi) + +# qhasm: temp1 = mem64[ap + 2972] +# asm 1: mov 2972(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2972(<ap=%rdi),>temp1=%esi +mov 2972(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3700] +# asm 1: mov 3700(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3700(<ap=%rdi),>temp2=%edx +mov 3700(%rdi),%edx + +# qhasm: mem64[ap + 3700] = temp1 +# asm 1: mov <temp1=int64#2,3700(<ap=int64#1) +# asm 2: mov <temp1=%esi,3700(<ap=%rdi) +mov %esi,3700(%rdi) + +# qhasm: mem64[ap + 2972] = temp2 +# asm 1: mov <temp2=int64#3,2972(<ap=int64#1) +# asm 2: mov <temp2=%edx,2972(<ap=%rdi) +mov %edx,2972(%rdi) + +# qhasm: temp1 = mem64[ap + 2988] +# asm 1: mov 2988(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 2988(<ap=%rdi),>temp1=%esi +mov 2988(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3444] +# asm 1: mov 3444(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3444(<ap=%rdi),>temp2=%edx +mov 3444(%rdi),%edx + +# qhasm: mem64[ap + 3444] = temp1 +# asm 1: mov <temp1=int64#2,3444(<ap=int64#1) +# asm 2: mov <temp1=%esi,3444(<ap=%rdi) +mov %esi,3444(%rdi) + +# qhasm: mem64[ap + 2988] = temp2 +# asm 1: mov <temp2=int64#3,2988(<ap=int64#1) +# asm 2: mov <temp2=%edx,2988(<ap=%rdi) +mov %edx,2988(%rdi) + +# qhasm: temp1 = mem64[ap + 3004] +# asm 1: mov 3004(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3004(<ap=%rdi),>temp1=%esi +mov 3004(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3956] +# asm 1: mov 3956(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3956(<ap=%rdi),>temp2=%edx +mov 3956(%rdi),%edx + +# qhasm: mem64[ap + 3956] = temp1 +# asm 1: mov <temp1=int64#2,3956(<ap=int64#1) +# asm 2: mov <temp1=%esi,3956(<ap=%rdi) +mov %esi,3956(%rdi) + +# qhasm: mem64[ap + 3004] = temp2 +# asm 1: mov <temp2=int64#3,3004(<ap=int64#1) +# asm 2: mov <temp2=%edx,3004(<ap=%rdi) +mov %edx,3004(%rdi) + +# qhasm: temp1 = mem64[ap + 3020] +# asm 1: mov 3020(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3020(<ap=%rdi),>temp1=%esi +mov 3020(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3316] +# asm 1: mov 3316(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3316(<ap=%rdi),>temp2=%edx +mov 3316(%rdi),%edx + +# qhasm: mem64[ap + 3316] = temp1 +# asm 1: mov <temp1=int64#2,3316(<ap=int64#1) +# asm 2: mov <temp1=%esi,3316(<ap=%rdi) +mov %esi,3316(%rdi) + +# qhasm: mem64[ap + 3020] = temp2 +# asm 1: mov <temp2=int64#3,3020(<ap=int64#1) +# asm 2: mov <temp2=%edx,3020(<ap=%rdi) +mov %edx,3020(%rdi) + +# qhasm: temp1 = mem64[ap + 3036] +# asm 1: mov 3036(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3036(<ap=%rdi),>temp1=%esi +mov 3036(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3828] +# asm 1: mov 3828(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3828(<ap=%rdi),>temp2=%edx +mov 3828(%rdi),%edx + +# qhasm: mem64[ap + 3828] = temp1 +# asm 1: mov <temp1=int64#2,3828(<ap=int64#1) +# asm 2: mov <temp1=%esi,3828(<ap=%rdi) +mov %esi,3828(%rdi) + +# qhasm: mem64[ap + 3036] = temp2 +# asm 1: mov <temp2=int64#3,3036(<ap=int64#1) +# asm 2: mov <temp2=%edx,3036(<ap=%rdi) +mov %edx,3036(%rdi) + +# qhasm: temp1 = mem64[ap + 3052] +# asm 1: mov 3052(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3052(<ap=%rdi),>temp1=%esi +mov 3052(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3572] +# asm 1: mov 3572(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3572(<ap=%rdi),>temp2=%edx +mov 3572(%rdi),%edx + +# qhasm: mem64[ap + 3572] = temp1 +# asm 1: mov <temp1=int64#2,3572(<ap=int64#1) +# asm 2: mov <temp1=%esi,3572(<ap=%rdi) +mov %esi,3572(%rdi) + +# qhasm: mem64[ap + 3052] = temp2 +# asm 1: mov <temp2=int64#3,3052(<ap=int64#1) +# asm 2: mov <temp2=%edx,3052(<ap=%rdi) +mov %edx,3052(%rdi) + +# qhasm: temp1 = mem64[ap + 3068] +# asm 1: mov 3068(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3068(<ap=%rdi),>temp1=%esi +mov 3068(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4084] +# asm 1: mov 4084(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4084(<ap=%rdi),>temp2=%edx +mov 4084(%rdi),%edx + +# qhasm: mem64[ap + 4084] = temp1 +# asm 1: mov <temp1=int64#2,4084(<ap=int64#1) +# asm 2: mov <temp1=%esi,4084(<ap=%rdi) +mov %esi,4084(%rdi) + +# qhasm: mem64[ap + 3068] = temp2 +# asm 1: mov <temp2=int64#3,3068(<ap=int64#1) +# asm 2: mov <temp2=%edx,3068(<ap=%rdi) +mov %edx,3068(%rdi) + +# qhasm: temp1 = mem64[ap + 3100] +# asm 1: mov 3100(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3100(<ap=%rdi),>temp1=%esi +mov 3100(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3596] +# asm 1: mov 3596(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3596(<ap=%rdi),>temp2=%edx +mov 3596(%rdi),%edx + +# qhasm: mem64[ap + 3596] = temp1 +# asm 1: mov <temp1=int64#2,3596(<ap=int64#1) +# asm 2: mov <temp1=%esi,3596(<ap=%rdi) +mov %esi,3596(%rdi) + +# qhasm: mem64[ap + 3100] = temp2 +# asm 1: mov <temp2=int64#3,3100(<ap=int64#1) +# asm 2: mov <temp2=%edx,3100(<ap=%rdi) +mov %edx,3100(%rdi) + +# qhasm: temp1 = mem64[ap + 3116] +# asm 1: mov 3116(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3116(<ap=%rdi),>temp1=%esi +mov 3116(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3340] +# asm 1: mov 3340(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3340(<ap=%rdi),>temp2=%edx +mov 3340(%rdi),%edx + +# qhasm: mem64[ap + 3340] = temp1 +# asm 1: mov <temp1=int64#2,3340(<ap=int64#1) +# asm 2: mov <temp1=%esi,3340(<ap=%rdi) +mov %esi,3340(%rdi) + +# qhasm: mem64[ap + 3116] = temp2 +# asm 1: mov <temp2=int64#3,3116(<ap=int64#1) +# asm 2: mov <temp2=%edx,3116(<ap=%rdi) +mov %edx,3116(%rdi) + +# qhasm: temp1 = mem64[ap + 3132] +# asm 1: mov 3132(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3132(<ap=%rdi),>temp1=%esi +mov 3132(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3852] +# asm 1: mov 3852(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3852(<ap=%rdi),>temp2=%edx +mov 3852(%rdi),%edx + +# qhasm: mem64[ap + 3852] = temp1 +# asm 1: mov <temp1=int64#2,3852(<ap=int64#1) +# asm 2: mov <temp1=%esi,3852(<ap=%rdi) +mov %esi,3852(%rdi) + +# qhasm: mem64[ap + 3132] = temp2 +# asm 1: mov <temp2=int64#3,3132(<ap=int64#1) +# asm 2: mov <temp2=%edx,3132(<ap=%rdi) +mov %edx,3132(%rdi) + +# qhasm: temp1 = mem64[ap + 3148] +# asm 1: mov 3148(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3148(<ap=%rdi),>temp1=%esi +mov 3148(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3212] +# asm 1: mov 3212(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3212(<ap=%rdi),>temp2=%edx +mov 3212(%rdi),%edx + +# qhasm: mem64[ap + 3212] = temp1 +# asm 1: mov <temp1=int64#2,3212(<ap=int64#1) +# asm 2: mov <temp1=%esi,3212(<ap=%rdi) +mov %esi,3212(%rdi) + +# qhasm: mem64[ap + 3148] = temp2 +# asm 1: mov <temp2=int64#3,3148(<ap=int64#1) +# asm 2: mov <temp2=%edx,3148(<ap=%rdi) +mov %edx,3148(%rdi) + +# qhasm: temp1 = mem64[ap + 3164] +# asm 1: mov 3164(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3164(<ap=%rdi),>temp1=%esi +mov 3164(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3724] +# asm 1: mov 3724(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3724(<ap=%rdi),>temp2=%edx +mov 3724(%rdi),%edx + +# qhasm: mem64[ap + 3724] = temp1 +# asm 1: mov <temp1=int64#2,3724(<ap=int64#1) +# asm 2: mov <temp1=%esi,3724(<ap=%rdi) +mov %esi,3724(%rdi) + +# qhasm: mem64[ap + 3164] = temp2 +# asm 1: mov <temp2=int64#3,3164(<ap=int64#1) +# asm 2: mov <temp2=%edx,3164(<ap=%rdi) +mov %edx,3164(%rdi) + +# qhasm: temp1 = mem64[ap + 3180] +# asm 1: mov 3180(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3180(<ap=%rdi),>temp1=%esi +mov 3180(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3468] +# asm 1: mov 3468(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3468(<ap=%rdi),>temp2=%edx +mov 3468(%rdi),%edx + +# qhasm: mem64[ap + 3468] = temp1 +# asm 1: mov <temp1=int64#2,3468(<ap=int64#1) +# asm 2: mov <temp1=%esi,3468(<ap=%rdi) +mov %esi,3468(%rdi) + +# qhasm: mem64[ap + 3180] = temp2 +# asm 1: mov <temp2=int64#3,3180(<ap=int64#1) +# asm 2: mov <temp2=%edx,3180(<ap=%rdi) +mov %edx,3180(%rdi) + +# qhasm: temp1 = mem64[ap + 3196] +# asm 1: mov 3196(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3196(<ap=%rdi),>temp1=%esi +mov 3196(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3980] +# asm 1: mov 3980(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3980(<ap=%rdi),>temp2=%edx +mov 3980(%rdi),%edx + +# qhasm: mem64[ap + 3980] = temp1 +# asm 1: mov <temp1=int64#2,3980(<ap=int64#1) +# asm 2: mov <temp1=%esi,3980(<ap=%rdi) +mov %esi,3980(%rdi) + +# qhasm: mem64[ap + 3196] = temp2 +# asm 1: mov <temp2=int64#3,3196(<ap=int64#1) +# asm 2: mov <temp2=%edx,3196(<ap=%rdi) +mov %edx,3196(%rdi) + +# qhasm: temp1 = mem64[ap + 3228] +# asm 1: mov 3228(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3228(<ap=%rdi),>temp1=%esi +mov 3228(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3660] +# asm 1: mov 3660(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3660(<ap=%rdi),>temp2=%edx +mov 3660(%rdi),%edx + +# qhasm: mem64[ap + 3660] = temp1 +# asm 1: mov <temp1=int64#2,3660(<ap=int64#1) +# asm 2: mov <temp1=%esi,3660(<ap=%rdi) +mov %esi,3660(%rdi) + +# qhasm: mem64[ap + 3228] = temp2 +# asm 1: mov <temp2=int64#3,3228(<ap=int64#1) +# asm 2: mov <temp2=%edx,3228(<ap=%rdi) +mov %edx,3228(%rdi) + +# qhasm: temp1 = mem64[ap + 3244] +# asm 1: mov 3244(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3244(<ap=%rdi),>temp1=%esi +mov 3244(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3404] +# asm 1: mov 3404(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3404(<ap=%rdi),>temp2=%edx +mov 3404(%rdi),%edx + +# qhasm: mem64[ap + 3404] = temp1 +# asm 1: mov <temp1=int64#2,3404(<ap=int64#1) +# asm 2: mov <temp1=%esi,3404(<ap=%rdi) +mov %esi,3404(%rdi) + +# qhasm: mem64[ap + 3244] = temp2 +# asm 1: mov <temp2=int64#3,3244(<ap=int64#1) +# asm 2: mov <temp2=%edx,3244(<ap=%rdi) +mov %edx,3244(%rdi) + +# qhasm: temp1 = mem64[ap + 3260] +# asm 1: mov 3260(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3260(<ap=%rdi),>temp1=%esi +mov 3260(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3916] +# asm 1: mov 3916(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3916(<ap=%rdi),>temp2=%edx +mov 3916(%rdi),%edx + +# qhasm: mem64[ap + 3916] = temp1 +# asm 1: mov <temp1=int64#2,3916(<ap=int64#1) +# asm 2: mov <temp1=%esi,3916(<ap=%rdi) +mov %esi,3916(%rdi) + +# qhasm: mem64[ap + 3260] = temp2 +# asm 1: mov <temp2=int64#3,3260(<ap=int64#1) +# asm 2: mov <temp2=%edx,3260(<ap=%rdi) +mov %edx,3260(%rdi) + +# qhasm: temp1 = mem64[ap + 3292] +# asm 1: mov 3292(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3292(<ap=%rdi),>temp1=%esi +mov 3292(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3788] +# asm 1: mov 3788(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3788(<ap=%rdi),>temp2=%edx +mov 3788(%rdi),%edx + +# qhasm: mem64[ap + 3788] = temp1 +# asm 1: mov <temp1=int64#2,3788(<ap=int64#1) +# asm 2: mov <temp1=%esi,3788(<ap=%rdi) +mov %esi,3788(%rdi) + +# qhasm: mem64[ap + 3292] = temp2 +# asm 1: mov <temp2=int64#3,3292(<ap=int64#1) +# asm 2: mov <temp2=%edx,3292(<ap=%rdi) +mov %edx,3292(%rdi) + +# qhasm: temp1 = mem64[ap + 3308] +# asm 1: mov 3308(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3308(<ap=%rdi),>temp1=%esi +mov 3308(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3532] +# asm 1: mov 3532(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3532(<ap=%rdi),>temp2=%edx +mov 3532(%rdi),%edx + +# qhasm: mem64[ap + 3532] = temp1 +# asm 1: mov <temp1=int64#2,3532(<ap=int64#1) +# asm 2: mov <temp1=%esi,3532(<ap=%rdi) +mov %esi,3532(%rdi) + +# qhasm: mem64[ap + 3308] = temp2 +# asm 1: mov <temp2=int64#3,3308(<ap=int64#1) +# asm 2: mov <temp2=%edx,3308(<ap=%rdi) +mov %edx,3308(%rdi) + +# qhasm: temp1 = mem64[ap + 3324] +# asm 1: mov 3324(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3324(<ap=%rdi),>temp1=%esi +mov 3324(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4044] +# asm 1: mov 4044(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4044(<ap=%rdi),>temp2=%edx +mov 4044(%rdi),%edx + +# qhasm: mem64[ap + 4044] = temp1 +# asm 1: mov <temp1=int64#2,4044(<ap=int64#1) +# asm 2: mov <temp1=%esi,4044(<ap=%rdi) +mov %esi,4044(%rdi) + +# qhasm: mem64[ap + 3324] = temp2 +# asm 1: mov <temp2=int64#3,3324(<ap=int64#1) +# asm 2: mov <temp2=%edx,3324(<ap=%rdi) +mov %edx,3324(%rdi) + +# qhasm: temp1 = mem64[ap + 3356] +# asm 1: mov 3356(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3356(<ap=%rdi),>temp1=%esi +mov 3356(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3628] +# asm 1: mov 3628(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3628(<ap=%rdi),>temp2=%edx +mov 3628(%rdi),%edx + +# qhasm: mem64[ap + 3628] = temp1 +# asm 1: mov <temp1=int64#2,3628(<ap=int64#1) +# asm 2: mov <temp1=%esi,3628(<ap=%rdi) +mov %esi,3628(%rdi) + +# qhasm: mem64[ap + 3356] = temp2 +# asm 1: mov <temp2=int64#3,3356(<ap=int64#1) +# asm 2: mov <temp2=%edx,3356(<ap=%rdi) +mov %edx,3356(%rdi) + +# qhasm: temp1 = mem64[ap + 3388] +# asm 1: mov 3388(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3388(<ap=%rdi),>temp1=%esi +mov 3388(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3884] +# asm 1: mov 3884(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3884(<ap=%rdi),>temp2=%edx +mov 3884(%rdi),%edx + +# qhasm: mem64[ap + 3884] = temp1 +# asm 1: mov <temp1=int64#2,3884(<ap=int64#1) +# asm 2: mov <temp1=%esi,3884(<ap=%rdi) +mov %esi,3884(%rdi) + +# qhasm: mem64[ap + 3388] = temp2 +# asm 1: mov <temp2=int64#3,3388(<ap=int64#1) +# asm 2: mov <temp2=%edx,3388(<ap=%rdi) +mov %edx,3388(%rdi) + +# qhasm: temp1 = mem64[ap + 3420] +# asm 1: mov 3420(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3420(<ap=%rdi),>temp1=%esi +mov 3420(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3756] +# asm 1: mov 3756(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3756(<ap=%rdi),>temp2=%edx +mov 3756(%rdi),%edx + +# qhasm: mem64[ap + 3756] = temp1 +# asm 1: mov <temp1=int64#2,3756(<ap=int64#1) +# asm 2: mov <temp1=%esi,3756(<ap=%rdi) +mov %esi,3756(%rdi) + +# qhasm: mem64[ap + 3420] = temp2 +# asm 1: mov <temp2=int64#3,3420(<ap=int64#1) +# asm 2: mov <temp2=%edx,3420(<ap=%rdi) +mov %edx,3420(%rdi) + +# qhasm: temp1 = mem64[ap + 3436] +# asm 1: mov 3436(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3436(<ap=%rdi),>temp1=%esi +mov 3436(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3500] +# asm 1: mov 3500(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3500(<ap=%rdi),>temp2=%edx +mov 3500(%rdi),%edx + +# qhasm: mem64[ap + 3500] = temp1 +# asm 1: mov <temp1=int64#2,3500(<ap=int64#1) +# asm 2: mov <temp1=%esi,3500(<ap=%rdi) +mov %esi,3500(%rdi) + +# qhasm: mem64[ap + 3436] = temp2 +# asm 1: mov <temp2=int64#3,3436(<ap=int64#1) +# asm 2: mov <temp2=%edx,3436(<ap=%rdi) +mov %edx,3436(%rdi) + +# qhasm: temp1 = mem64[ap + 3452] +# asm 1: mov 3452(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3452(<ap=%rdi),>temp1=%esi +mov 3452(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4012] +# asm 1: mov 4012(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4012(<ap=%rdi),>temp2=%edx +mov 4012(%rdi),%edx + +# qhasm: mem64[ap + 4012] = temp1 +# asm 1: mov <temp1=int64#2,4012(<ap=int64#1) +# asm 2: mov <temp1=%esi,4012(<ap=%rdi) +mov %esi,4012(%rdi) + +# qhasm: mem64[ap + 3452] = temp2 +# asm 1: mov <temp2=int64#3,3452(<ap=int64#1) +# asm 2: mov <temp2=%edx,3452(<ap=%rdi) +mov %edx,3452(%rdi) + +# qhasm: temp1 = mem64[ap + 3484] +# asm 1: mov 3484(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3484(<ap=%rdi),>temp1=%esi +mov 3484(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3692] +# asm 1: mov 3692(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3692(<ap=%rdi),>temp2=%edx +mov 3692(%rdi),%edx + +# qhasm: mem64[ap + 3692] = temp1 +# asm 1: mov <temp1=int64#2,3692(<ap=int64#1) +# asm 2: mov <temp1=%esi,3692(<ap=%rdi) +mov %esi,3692(%rdi) + +# qhasm: mem64[ap + 3484] = temp2 +# asm 1: mov <temp2=int64#3,3484(<ap=int64#1) +# asm 2: mov <temp2=%edx,3484(<ap=%rdi) +mov %edx,3484(%rdi) + +# qhasm: temp1 = mem64[ap + 3516] +# asm 1: mov 3516(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3516(<ap=%rdi),>temp1=%esi +mov 3516(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3948] +# asm 1: mov 3948(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3948(<ap=%rdi),>temp2=%edx +mov 3948(%rdi),%edx + +# qhasm: mem64[ap + 3948] = temp1 +# asm 1: mov <temp1=int64#2,3948(<ap=int64#1) +# asm 2: mov <temp1=%esi,3948(<ap=%rdi) +mov %esi,3948(%rdi) + +# qhasm: mem64[ap + 3516] = temp2 +# asm 1: mov <temp2=int64#3,3516(<ap=int64#1) +# asm 2: mov <temp2=%edx,3516(<ap=%rdi) +mov %edx,3516(%rdi) + +# qhasm: temp1 = mem64[ap + 3548] +# asm 1: mov 3548(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3548(<ap=%rdi),>temp1=%esi +mov 3548(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3820] +# asm 1: mov 3820(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3820(<ap=%rdi),>temp2=%edx +mov 3820(%rdi),%edx + +# qhasm: mem64[ap + 3820] = temp1 +# asm 1: mov <temp1=int64#2,3820(<ap=int64#1) +# asm 2: mov <temp1=%esi,3820(<ap=%rdi) +mov %esi,3820(%rdi) + +# qhasm: mem64[ap + 3548] = temp2 +# asm 1: mov <temp2=int64#3,3548(<ap=int64#1) +# asm 2: mov <temp2=%edx,3548(<ap=%rdi) +mov %edx,3548(%rdi) + +# qhasm: temp1 = mem64[ap + 3580] +# asm 1: mov 3580(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3580(<ap=%rdi),>temp1=%esi +mov 3580(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4076] +# asm 1: mov 4076(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4076(<ap=%rdi),>temp2=%edx +mov 4076(%rdi),%edx + +# qhasm: mem64[ap + 4076] = temp1 +# asm 1: mov <temp1=int64#2,4076(<ap=int64#1) +# asm 2: mov <temp1=%esi,4076(<ap=%rdi) +mov %esi,4076(%rdi) + +# qhasm: mem64[ap + 3580] = temp2 +# asm 1: mov <temp2=int64#3,3580(<ap=int64#1) +# asm 2: mov <temp2=%edx,3580(<ap=%rdi) +mov %edx,3580(%rdi) + +# qhasm: temp1 = mem64[ap + 3644] +# asm 1: mov 3644(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3644(<ap=%rdi),>temp1=%esi +mov 3644(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3868] +# asm 1: mov 3868(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3868(<ap=%rdi),>temp2=%edx +mov 3868(%rdi),%edx + +# qhasm: mem64[ap + 3868] = temp1 +# asm 1: mov <temp1=int64#2,3868(<ap=int64#1) +# asm 2: mov <temp1=%esi,3868(<ap=%rdi) +mov %esi,3868(%rdi) + +# qhasm: mem64[ap + 3644] = temp2 +# asm 1: mov <temp2=int64#3,3644(<ap=int64#1) +# asm 2: mov <temp2=%edx,3644(<ap=%rdi) +mov %edx,3644(%rdi) + +# qhasm: temp1 = mem64[ap + 3676] +# asm 1: mov 3676(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3676(<ap=%rdi),>temp1=%esi +mov 3676(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3740] +# asm 1: mov 3740(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3740(<ap=%rdi),>temp2=%edx +mov 3740(%rdi),%edx + +# qhasm: mem64[ap + 3740] = temp1 +# asm 1: mov <temp1=int64#2,3740(<ap=int64#1) +# asm 2: mov <temp1=%esi,3740(<ap=%rdi) +mov %esi,3740(%rdi) + +# qhasm: mem64[ap + 3676] = temp2 +# asm 1: mov <temp2=int64#3,3676(<ap=int64#1) +# asm 2: mov <temp2=%edx,3676(<ap=%rdi) +mov %edx,3676(%rdi) + +# qhasm: temp1 = mem64[ap + 3708] +# asm 1: mov 3708(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3708(<ap=%rdi),>temp1=%esi +mov 3708(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3996] +# asm 1: mov 3996(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3996(<ap=%rdi),>temp2=%edx +mov 3996(%rdi),%edx + +# qhasm: mem64[ap + 3996] = temp1 +# asm 1: mov <temp1=int64#2,3996(<ap=int64#1) +# asm 2: mov <temp1=%esi,3996(<ap=%rdi) +mov %esi,3996(%rdi) + +# qhasm: mem64[ap + 3708] = temp2 +# asm 1: mov <temp2=int64#3,3708(<ap=int64#1) +# asm 2: mov <temp2=%edx,3708(<ap=%rdi) +mov %edx,3708(%rdi) + +# qhasm: temp1 = mem64[ap + 3772] +# asm 1: mov 3772(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3772(<ap=%rdi),>temp1=%esi +mov 3772(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 3932] +# asm 1: mov 3932(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 3932(<ap=%rdi),>temp2=%edx +mov 3932(%rdi),%edx + +# qhasm: mem64[ap + 3932] = temp1 +# asm 1: mov <temp1=int64#2,3932(<ap=int64#1) +# asm 2: mov <temp1=%esi,3932(<ap=%rdi) +mov %esi,3932(%rdi) + +# qhasm: mem64[ap + 3772] = temp2 +# asm 1: mov <temp2=int64#3,3772(<ap=int64#1) +# asm 2: mov <temp2=%edx,3772(<ap=%rdi) +mov %edx,3772(%rdi) + +# qhasm: temp1 = mem64[ap + 3836] +# asm 1: mov 3836(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3836(<ap=%rdi),>temp1=%esi +mov 3836(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4060] +# asm 1: mov 4060(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4060(<ap=%rdi),>temp2=%edx +mov 4060(%rdi),%edx + +# qhasm: mem64[ap + 4060] = temp1 +# asm 1: mov <temp1=int64#2,4060(<ap=int64#1) +# asm 2: mov <temp1=%esi,4060(<ap=%rdi) +mov %esi,4060(%rdi) + +# qhasm: mem64[ap + 3836] = temp2 +# asm 1: mov <temp2=int64#3,3836(<ap=int64#1) +# asm 2: mov <temp2=%edx,3836(<ap=%rdi) +mov %edx,3836(%rdi) + +# qhasm: temp1 = mem64[ap + 3964] +# asm 1: mov 3964(<ap=int64#1),>temp1=int64#2 +# asm 2: mov 3964(<ap=%rdi),>temp1=%esi +mov 3964(%rdi),%esi + +# qhasm: temp2 = mem64[ap + 4028] +# asm 1: mov 4028(<ap=int64#1),>temp2=int64#3 +# asm 2: mov 4028(<ap=%rdi),>temp2=%edx +mov 4028(%rdi),%edx + +# qhasm: mem64[ap + 4028] = temp1 +# asm 1: mov <temp1=int64#2,4028(<ap=int64#1) +# asm 2: mov <temp1=%esi,4028(<ap=%rdi) +mov %esi,4028(%rdi) + +# qhasm: mem64[ap + 3964] = temp2 +# asm 1: mov <temp2=int64#3,3964(<ap=int64#1) +# asm 2: mov <temp2=%edx,3964(<ap=%rdi) +mov %edx,3964(%rdi) + +# qhasm: return +add %r11,%rsp +ret diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/cbd.s b/crypt/liboqs/kex_rlwe_newhope/avx2/cbd.s new file mode 100644 index 0000000000000000000000000000000000000000..7619a31ff038bd76c4b00507ccaa38e99da18385 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/cbd.s @@ -0,0 +1,275 @@ + +# qhasm: int64 input_0 + +# qhasm: int64 input_1 + +# qhasm: int64 input_2 + +# qhasm: int64 input_3 + +# qhasm: int64 input_4 + +# qhasm: int64 input_5 + +# qhasm: stack64 input_6 + +# qhasm: stack64 input_7 + +# qhasm: int64 caller_r11 + +# qhasm: int64 caller_r12 + +# qhasm: int64 caller_r13 + +# qhasm: int64 caller_r14 + +# qhasm: int64 caller_r15 + +# qhasm: int64 caller_rbx + +# qhasm: int64 caller_rbp + +# qhasm: reg256 r + +# qhasm: reg256 r2 + +# qhasm: reg256 a0 + +# qhasm: reg256 b0 + +# qhasm: reg256 a1 + +# qhasm: reg256 b1 + +# qhasm: reg256 t + +# qhasm: reg256 l + +# qhasm: reg256 h + +# qhasm: reg256 _mask1 + +# qhasm: reg256 _maskffff + +# qhasm: reg256 _maskff + +# qhasm: reg256 _q8x + +# qhasm: int64 ctr + +# qhasm: enter cbd +.p2align 5 +.global _cbd +.global cbd +_cbd: +cbd: +mov %rsp,%r11 +and $31,%r11 +add $0,%r11 +sub %r11,%rsp + +# qhasm: _mask1 = mem256[mask1] +# asm 1: vmovdqu mask1,>_mask1=reg256#1 +# asm 2: vmovdqu mask1,>_mask1=%ymm0 +vmovdqu mask1,%ymm0 + +# qhasm: _maskffff = mem256[maskffff] +# asm 1: vmovdqu maskffff,>_maskffff=reg256#2 +# asm 2: vmovdqu maskffff,>_maskffff=%ymm1 +vmovdqu maskffff,%ymm1 + +# qhasm: _maskff = mem256[maskff] +# asm 1: vmovdqu maskff,>_maskff=reg256#3 +# asm 2: vmovdqu maskff,>_maskff=%ymm2 +vmovdqu maskff,%ymm2 + +# qhasm: _q8x = mem256[q8x] +# asm 1: vmovdqu q8x,>_q8x=reg256#4 +# asm 2: vmovdqu q8x,>_q8x=%ymm3 +vmovdqu q8x,%ymm3 + +# qhasm: ctr = 128 +# asm 1: mov $128,>ctr=int64#3 +# asm 2: mov $128,>ctr=%rdx +mov $128,%rdx + +# qhasm: looptop: +._looptop: + +# qhasm: r = mem256[input_1 + 0] +# asm 1: vmovupd 0(<input_1=int64#2),>r=reg256#5 +# asm 2: vmovupd 0(<input_1=%rsi),>r=%ymm4 +vmovupd 0(%rsi),%ymm4 + +# qhasm: a0 = r & _mask1 +# asm 1: vpand <r=reg256#5,<_mask1=reg256#1,>a0=reg256#6 +# asm 2: vpand <r=%ymm4,<_mask1=%ymm0,>a0=%ymm5 +vpand %ymm4,%ymm0,%ymm5 + +# qhasm: 16x r unsigned>>= 1 +# asm 1: vpsrlw $1,<r=reg256#5,>r=reg256#5 +# asm 2: vpsrlw $1,<r=%ymm4,>r=%ymm4 +vpsrlw $1,%ymm4,%ymm4 + +# qhasm: t = r & _mask1 +# asm 1: vpand <r=reg256#5,<_mask1=reg256#1,>t=reg256#7 +# asm 2: vpand <r=%ymm4,<_mask1=%ymm0,>t=%ymm6 +vpand %ymm4,%ymm0,%ymm6 + +# qhasm: 16x a0 += t +# asm 1: vpaddw <t=reg256#7,<a0=reg256#6,>a0=reg256#6 +# asm 2: vpaddw <t=%ymm6,<a0=%ymm5,>a0=%ymm5 +vpaddw %ymm6,%ymm5,%ymm5 + +# qhasm: 16x r unsigned>>= 1 +# asm 1: vpsrlw $1,<r=reg256#5,>r=reg256#5 +# asm 2: vpsrlw $1,<r=%ymm4,>r=%ymm4 +vpsrlw $1,%ymm4,%ymm4 + +# qhasm: t = r & _mask1 +# asm 1: vpand <r=reg256#5,<_mask1=reg256#1,>t=reg256#7 +# asm 2: vpand <r=%ymm4,<_mask1=%ymm0,>t=%ymm6 +vpand %ymm4,%ymm0,%ymm6 + +# qhasm: 16x a0 += t +# asm 1: vpaddw <t=reg256#7,<a0=reg256#6,>a0=reg256#6 +# asm 2: vpaddw <t=%ymm6,<a0=%ymm5,>a0=%ymm5 +vpaddw %ymm6,%ymm5,%ymm5 + +# qhasm: 16x r unsigned>>= 1 +# asm 1: vpsrlw $1,<r=reg256#5,>r=reg256#5 +# asm 2: vpsrlw $1,<r=%ymm4,>r=%ymm4 +vpsrlw $1,%ymm4,%ymm4 + +# qhasm: t = r & _mask1 +# asm 1: vpand <r=reg256#5,<_mask1=reg256#1,>t=reg256#7 +# asm 2: vpand <r=%ymm4,<_mask1=%ymm0,>t=%ymm6 +vpand %ymm4,%ymm0,%ymm6 + +# qhasm: 16x a0 += t +# asm 1: vpaddw <t=reg256#7,<a0=reg256#6,>a0=reg256#6 +# asm 2: vpaddw <t=%ymm6,<a0=%ymm5,>a0=%ymm5 +vpaddw %ymm6,%ymm5,%ymm5 + +# qhasm: 16x r unsigned>>= 1 +# asm 1: vpsrlw $1,<r=reg256#5,>r=reg256#5 +# asm 2: vpsrlw $1,<r=%ymm4,>r=%ymm4 +vpsrlw $1,%ymm4,%ymm4 + +# qhasm: t = r & _mask1 +# asm 1: vpand <r=reg256#5,<_mask1=reg256#1,>t=reg256#7 +# asm 2: vpand <r=%ymm4,<_mask1=%ymm0,>t=%ymm6 +vpand %ymm4,%ymm0,%ymm6 + +# qhasm: 16x a0 += t +# asm 1: vpaddw <t=reg256#7,<a0=reg256#6,>a0=reg256#6 +# asm 2: vpaddw <t=%ymm6,<a0=%ymm5,>a0=%ymm5 +vpaddw %ymm6,%ymm5,%ymm5 + +# qhasm: 16x r unsigned>>= 1 +# asm 1: vpsrlw $1,<r=reg256#5,>r=reg256#5 +# asm 2: vpsrlw $1,<r=%ymm4,>r=%ymm4 +vpsrlw $1,%ymm4,%ymm4 + +# qhasm: t = r & _mask1 +# asm 1: vpand <r=reg256#5,<_mask1=reg256#1,>t=reg256#7 +# asm 2: vpand <r=%ymm4,<_mask1=%ymm0,>t=%ymm6 +vpand %ymm4,%ymm0,%ymm6 + +# qhasm: 16x a0 += t +# asm 1: vpaddw <t=reg256#7,<a0=reg256#6,>a0=reg256#6 +# asm 2: vpaddw <t=%ymm6,<a0=%ymm5,>a0=%ymm5 +vpaddw %ymm6,%ymm5,%ymm5 + +# qhasm: 16x r unsigned>>= 1 +# asm 1: vpsrlw $1,<r=reg256#5,>r=reg256#5 +# asm 2: vpsrlw $1,<r=%ymm4,>r=%ymm4 +vpsrlw $1,%ymm4,%ymm4 + +# qhasm: t = r & _mask1 +# asm 1: vpand <r=reg256#5,<_mask1=reg256#1,>t=reg256#7 +# asm 2: vpand <r=%ymm4,<_mask1=%ymm0,>t=%ymm6 +vpand %ymm4,%ymm0,%ymm6 + +# qhasm: 16x a0 += t +# asm 1: vpaddw <t=reg256#7,<a0=reg256#6,>a0=reg256#6 +# asm 2: vpaddw <t=%ymm6,<a0=%ymm5,>a0=%ymm5 +vpaddw %ymm6,%ymm5,%ymm5 + +# qhasm: 16x r unsigned>>= 1 +# asm 1: vpsrlw $1,<r=reg256#5,>r=reg256#5 +# asm 2: vpsrlw $1,<r=%ymm4,>r=%ymm4 +vpsrlw $1,%ymm4,%ymm4 + +# qhasm: t = r & _mask1 +# asm 1: vpand <r=reg256#5,<_mask1=reg256#1,>t=reg256#5 +# asm 2: vpand <r=%ymm4,<_mask1=%ymm0,>t=%ymm4 +vpand %ymm4,%ymm0,%ymm4 + +# qhasm: 16x a0 += t +# asm 1: vpaddw <t=reg256#5,<a0=reg256#6,>a0=reg256#5 +# asm 2: vpaddw <t=%ymm4,<a0=%ymm5,>a0=%ymm4 +vpaddw %ymm4,%ymm5,%ymm4 + +# qhasm: 16x t = a0 unsigned>> 8 +# asm 1: vpsrlw $8,<a0=reg256#5,>t=reg256#6 +# asm 2: vpsrlw $8,<a0=%ymm4,>t=%ymm5 +vpsrlw $8,%ymm4,%ymm5 + +# qhasm: a0 &= _maskff +# asm 1: vpand <_maskff=reg256#3,<a0=reg256#5,<a0=reg256#5 +# asm 2: vpand <_maskff=%ymm2,<a0=%ymm4,<a0=%ymm4 +vpand %ymm2,%ymm4,%ymm4 + +# qhasm: 16x a0 += t +# asm 1: vpaddw <t=reg256#6,<a0=reg256#5,>a0=reg256#5 +# asm 2: vpaddw <t=%ymm5,<a0=%ymm4,>a0=%ymm4 +vpaddw %ymm5,%ymm4,%ymm4 + +# qhasm: 8x b0 = a0 unsigned>> 16 +# asm 1: vpsrld $16,<a0=reg256#5,>b0=reg256#6 +# asm 2: vpsrld $16,<a0=%ymm4,>b0=%ymm5 +vpsrld $16,%ymm4,%ymm5 + +# qhasm: a0 &= _maskffff +# asm 1: vpand <_maskffff=reg256#2,<a0=reg256#5,<a0=reg256#5 +# asm 2: vpand <_maskffff=%ymm1,<a0=%ymm4,<a0=%ymm4 +vpand %ymm1,%ymm4,%ymm4 + +# qhasm: 16x a0 += _q8x +# asm 1: vpaddw <_q8x=reg256#4,<a0=reg256#5,>a0=reg256#5 +# asm 2: vpaddw <_q8x=%ymm3,<a0=%ymm4,>a0=%ymm4 +vpaddw %ymm3,%ymm4,%ymm4 + +# qhasm: 16x a0 -= b0 +# asm 1: vpsubw <b0=reg256#6,<a0=reg256#5,>a0=reg256#5 +# asm 2: vpsubw <b0=%ymm5,<a0=%ymm4,>a0=%ymm4 +vpsubw %ymm5,%ymm4,%ymm4 + +# qhasm: mem256[input_0 + 0] = a0 +# asm 1: vmovupd <a0=reg256#5,0(<input_0=int64#1) +# asm 2: vmovupd <a0=%ymm4,0(<input_0=%rdi) +vmovupd %ymm4,0(%rdi) + +# qhasm: input_0 += 32 +# asm 1: add $32,<input_0=int64#1 +# asm 2: add $32,<input_0=%rdi +add $32,%rdi + +# qhasm: input_1 += 32 +# asm 1: add $32,<input_1=int64#2 +# asm 2: add $32,<input_1=%rsi +add $32,%rsi + +# qhasm: unsigned>? ctr -= 1 +# asm 1: sub $1,<ctr=int64#3 +# asm 2: sub $1,<ctr=%rdx +sub $1,%rdx +# comment:fp stack unchanged by jump + +# qhasm: goto looptop if unsigned> +ja ._looptop + +# qhasm: return +add %r11,%rsp +ret diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/chacha.S b/crypt/liboqs/kex_rlwe_newhope/avx2/chacha.S new file mode 100644 index 0000000000000000000000000000000000000000..4597b5a02f8f39c9aab851f118f4fc9d521bf2ae --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/chacha.S @@ -0,0 +1,1184 @@ +/* From crypto_stream/chacha20/moon/avx2/64/ + * from http://bench.cr.yp.to/supercop.html + * by Andrew Moon */ + +#define GLOBAL2(n) .globl n##_avx2; .globl _##n##_avx2 +#define GLOBAL(n) GLOBAL2(n) +#define FN2(n) .p2align 4,,15; n##_avx2:; _##n##_avx2: +#define FN(n) FN2(n) + +/* linux/elf annotations and NX indicator */ +#if defined(__linux__) && defined(__ELF__) +#define ENDFN(n) .size n##_avx2, .-n##_avx2; .type n##_avx2, @function; +#define ENDFILE() .section .note.GNU-stack,"",%progbits +#else +#define ENDFN(n) +#define ENDFILE() +#endif + +.text + +GLOBAL(chacha) +GLOBAL(xchacha) +GLOBAL(hchacha) +GLOBAL(chacha_blocks) + +/* Windows 64 calling convention fixups */ +#if defined(_WIN64) || defined(__CYGWIN64__) +FN(chacha) +subq $184, %rsp +vmovdqa %xmm6, 0(%rsp) +vmovdqa %xmm7, 16(%rsp) +vmovdqa %xmm8, 32(%rsp) +vmovdqa %xmm9, 48(%rsp) +vmovdqa %xmm10, 64(%rsp) +vmovdqa %xmm11, 80(%rsp) +vmovdqa %xmm12, 96(%rsp) +vmovdqa %xmm13, 112(%rsp) +vmovdqa %xmm14, 128(%rsp) +vmovdqa %xmm15, 144(%rsp) +movq %rdi, 160(%rsp) +movq %rsi, 168(%rsp) +movq %rcx, %rdi +movq %rdx, %rsi +movq %r8, %rdx +movq %r9, %rcx +movq 224(%rsp), %r8 +movq 232(%rsp), %r9 +call chacha_thunk_avx2 +vmovdqa 0(%rsp), %xmm6 +vmovdqa 16(%rsp), %xmm7 +vmovdqa 32(%rsp), %xmm8 +vmovdqa 48(%rsp), %xmm9 +vmovdqa 64(%rsp), %xmm10 +vmovdqa 80(%rsp), %xmm11 +vmovdqa 96(%rsp), %xmm12 +vmovdqa 112(%rsp), %xmm13 +vmovdqa 128(%rsp), %xmm14 +vmovdqa 144(%rsp), %xmm15 +movq 160(%rsp), %rdi +movq 168(%rsp), %rsi +addq $184, %rsp +ret +ENDFN(chacha) + +FN(xchacha) +subq $184, %rsp +vmovdqa %xmm6, 0(%rsp) +vmovdqa %xmm7, 16(%rsp) +vmovdqa %xmm8, 32(%rsp) +vmovdqa %xmm9, 48(%rsp) +vmovdqa %xmm10, 64(%rsp) +vmovdqa %xmm11, 80(%rsp) +vmovdqa %xmm12, 96(%rsp) +vmovdqa %xmm13, 112(%rsp) +vmovdqa %xmm14, 128(%rsp) +vmovdqa %xmm15, 144(%rsp) +movq %rdi, 160(%rsp) +movq %rsi, 168(%rsp) +movq %rcx, %rdi +movq %rdx, %rsi +movq %r8, %rdx +movq %r9, %rcx +movq 224(%rsp), %r8 +movq 232(%rsp), %r9 +call xchacha_thunk_avx2 +vmovdqa 0(%rsp), %xmm6 +vmovdqa 16(%rsp), %xmm7 +vmovdqa 32(%rsp), %xmm8 +vmovdqa 48(%rsp), %xmm9 +vmovdqa 64(%rsp), %xmm10 +vmovdqa 80(%rsp), %xmm11 +vmovdqa 96(%rsp), %xmm12 +vmovdqa 112(%rsp), %xmm13 +vmovdqa 128(%rsp), %xmm14 +vmovdqa 144(%rsp), %xmm15 +movq 160(%rsp), %rdi +movq 168(%rsp), %rsi +addq $184, %rsp +ret +ENDFN(xchacha) + +FN(chacha_blocks) +subq $184, %rsp +movdqa %xmm6, 0(%rsp) +movdqa %xmm7, 16(%rsp) +movdqa %xmm8, 32(%rsp) +movdqa %xmm9, 48(%rsp) +movdqa %xmm10, 64(%rsp) +movdqa %xmm11, 80(%rsp) +movdqa %xmm12, 96(%rsp) +movdqa %xmm13, 112(%rsp) +movdqa %xmm14, 128(%rsp) +movdqa %xmm15, 144(%rsp) +movq %rdi, 160(%rsp) +movq %rsi, 168(%rsp) +movq %rcx, %rdi +movq %rdx, %rsi +movq %r8, %rdx +movq %r9, %rcx +call chacha_blocks_thunk_avx2 +movdqa 0(%rsp), %xmm6 +movdqa 16(%rsp), %xmm7 +movdqa 32(%rsp), %xmm8 +movdqa 48(%rsp), %xmm9 +movdqa 64(%rsp), %xmm10 +movdqa 80(%rsp), %xmm11 +movdqa 96(%rsp), %xmm12 +movdqa 112(%rsp), %xmm13 +movdqa 128(%rsp), %xmm14 +movdqa 144(%rsp), %xmm15 +movq 160(%rsp), %rdi +movq 168(%rsp), %rsi +addq $184, %rsp +ret +ENDFN(chacha_blocks) + +FN(hchacha) +subq $40, %rsp +movdqa %xmm6, 0(%rsp) +movq %rdi, 16(%rsp) +movq %rsi, 24(%rsp) +movq %rcx, %rdi +movq %rdx, %rsi +movq %r8, %rdx +movq %r9, %rcx +call hchacha_thunk_avx2 +movdqa 0(%rsp), %xmm6 +movq 16(%rsp), %rdi +movq 24(%rsp), %rsi +addq $40, %rsp +ret +ENDFN(hchacha) + +#define chacha chacha_thunk +#define xchacha xchacha_thunk +#define hchacha hchacha_thunk +#define chacha_blocks chacha_blocks_thunk +#endif + + +FN(chacha_blocks) +chacha_blocks_avx2_local: +pushq %rbx +pushq %rbp +pushq %r12 +pushq %r13 +pushq %r14 +movq %rsp, %rbp +andq $~63, %rsp +subq $512, %rsp +leaq C(%rip), %rax +vmovdqa 0(%rax), %xmm8 +vmovdqa 16(%rax), %xmm6 +vmovdqa 32(%rax), %xmm7 +vmovdqa 0(%rdi), %xmm9 +vmovdqa 16(%rdi), %xmm10 +vmovdqa 32(%rdi), %xmm11 +movq 48(%rdi), %rax +movq $1, %r9 +vmovdqa %xmm8, 0(%rsp) +vmovdqa %xmm9, 16(%rsp) +vmovdqa %xmm10, 32(%rsp) +vmovdqa %xmm11, 48(%rsp) +movq %rax, 64(%rsp) +vmovdqa %xmm6, 448(%rsp) +vmovdqa %xmm6, 464(%rsp) +vmovdqa %xmm7, 480(%rsp) +vmovdqa %xmm7, 496(%rsp) +cmpq $512, %rcx +jae chacha_blocks_avx2_atleast512 +cmp $256, %rcx +jae chacha_blocks_avx2_atleast256 +jmp chacha_blocks_avx2_below256 +.p2align 6,,63 +chacha_blocks_avx2_atleast512: +movq 48(%rsp), %rax +leaq 1(%rax), %r8 +leaq 2(%rax), %r9 +leaq 3(%rax), %r10 +leaq 4(%rax), %rbx +leaq 5(%rax), %r11 +leaq 6(%rax), %r12 +leaq 7(%rax), %r13 +leaq 8(%rax), %r14 +movl %eax, 128(%rsp) +movl %r8d, 4+128(%rsp) +movl %r9d, 8+128(%rsp) +movl %r10d, 12+128(%rsp) +movl %ebx, 16+128(%rsp) +movl %r11d, 20+128(%rsp) +movl %r12d, 24+128(%rsp) +movl %r13d, 28+128(%rsp) +shrq $32, %rax +shrq $32, %r8 +shrq $32, %r9 +shrq $32, %r10 +shrq $32, %rbx +shrq $32, %r11 +shrq $32, %r12 +shrq $32, %r13 +movl %eax, 160(%rsp) +movl %r8d, 4+160(%rsp) +movl %r9d, 8+160(%rsp) +movl %r10d, 12+160(%rsp) +movl %ebx, 16+160(%rsp) +movl %r11d, 20+160(%rsp) +movl %r12d, 24+160(%rsp) +movl %r13d, 28+160(%rsp) +movq %r14, 48(%rsp) +movq 64(%rsp), %rax +vpbroadcastd 0(%rsp), %ymm0 +vpbroadcastd 4+0(%rsp), %ymm1 +vpbroadcastd 8+0(%rsp), %ymm2 +vpbroadcastd 12+0(%rsp), %ymm3 +vpbroadcastd 16(%rsp), %ymm4 +vpbroadcastd 4+16(%rsp), %ymm5 +vpbroadcastd 8+16(%rsp), %ymm6 +vpbroadcastd 12+16(%rsp), %ymm7 +vpbroadcastd 32(%rsp), %ymm8 +vpbroadcastd 4+32(%rsp), %ymm9 +vpbroadcastd 8+32(%rsp), %ymm10 +vpbroadcastd 12+32(%rsp), %ymm11 +vpbroadcastd 8+48(%rsp), %ymm14 +vpbroadcastd 12+48(%rsp), %ymm15 +vmovdqa 128(%rsp), %ymm12 +vmovdqa 160(%rsp), %ymm13 +chacha_blocks_avx2_mainloop1: +vpaddd %ymm0, %ymm4, %ymm0 +vpaddd %ymm1, %ymm5, %ymm1 +vpxor %ymm12, %ymm0, %ymm12 +vpxor %ymm13, %ymm1, %ymm13 +vpaddd %ymm2, %ymm6, %ymm2 +vpaddd %ymm3, %ymm7, %ymm3 +vpxor %ymm14, %ymm2, %ymm14 +vpxor %ymm15, %ymm3, %ymm15 +vpshufb 448(%rsp), %ymm12, %ymm12 +vpshufb 448(%rsp), %ymm13, %ymm13 +vpaddd %ymm8, %ymm12, %ymm8 +vpaddd %ymm9, %ymm13, %ymm9 +vpshufb 448(%rsp), %ymm14, %ymm14 +vpshufb 448(%rsp), %ymm15, %ymm15 +vpaddd %ymm10, %ymm14, %ymm10 +vpaddd %ymm11, %ymm15, %ymm11 +vmovdqa %ymm12, 96(%rsp) +vpxor %ymm4, %ymm8, %ymm4 +vpxor %ymm5, %ymm9, %ymm5 +vpslld $ 12, %ymm4, %ymm12 +vpsrld $20, %ymm4, %ymm4 +vpxor %ymm4, %ymm12, %ymm4 +vpslld $ 12, %ymm5, %ymm12 +vpsrld $20, %ymm5, %ymm5 +vpxor %ymm5, %ymm12, %ymm5 +vpxor %ymm6, %ymm10, %ymm6 +vpxor %ymm7, %ymm11, %ymm7 +vpslld $ 12, %ymm6, %ymm12 +vpsrld $20, %ymm6, %ymm6 +vpxor %ymm6, %ymm12, %ymm6 +vpslld $ 12, %ymm7, %ymm12 +vpsrld $20, %ymm7, %ymm7 +vpxor %ymm7, %ymm12, %ymm7 +vpaddd %ymm0, %ymm4, %ymm0 +vpaddd %ymm1, %ymm5, %ymm1 +vpxor 96(%rsp), %ymm0, %ymm12 +vpxor %ymm13, %ymm1, %ymm13 +vpaddd %ymm2, %ymm6, %ymm2 +vpaddd %ymm3, %ymm7, %ymm3 +vpxor %ymm14, %ymm2, %ymm14 +vpxor %ymm15, %ymm3, %ymm15 +vpshufb 480(%rsp), %ymm12, %ymm12 +vpshufb 480(%rsp), %ymm13, %ymm13 +vpaddd %ymm8, %ymm12, %ymm8 +vpaddd %ymm9, %ymm13, %ymm9 +vpshufb 480(%rsp), %ymm14, %ymm14 +vpshufb 480(%rsp), %ymm15, %ymm15 +vpaddd %ymm10, %ymm14, %ymm10 +vpaddd %ymm11, %ymm15, %ymm11 +vmovdqa %ymm12, 96(%rsp) +vpxor %ymm4, %ymm8, %ymm4 +vpxor %ymm5, %ymm9, %ymm5 +vpslld $ 7, %ymm4, %ymm12 +vpsrld $25, %ymm4, %ymm4 +vpxor %ymm4, %ymm12, %ymm4 +vpslld $ 7, %ymm5, %ymm12 +vpsrld $25, %ymm5, %ymm5 +vpxor %ymm5, %ymm12, %ymm5 +vpxor %ymm6, %ymm10, %ymm6 +vpxor %ymm7, %ymm11, %ymm7 +vpslld $ 7, %ymm6, %ymm12 +vpsrld $25, %ymm6, %ymm6 +vpxor %ymm6, %ymm12, %ymm6 +vpslld $ 7, %ymm7, %ymm12 +vpsrld $25, %ymm7, %ymm7 +vpxor %ymm7, %ymm12, %ymm7 +vpaddd %ymm0, %ymm5, %ymm0 +vpaddd %ymm1, %ymm6, %ymm1 +vpxor %ymm15, %ymm0, %ymm15 +vpxor 96(%rsp), %ymm1, %ymm12 +vpaddd %ymm2, %ymm7, %ymm2 +vpaddd %ymm3, %ymm4, %ymm3 +vpxor %ymm13, %ymm2, %ymm13 +vpxor %ymm14, %ymm3, %ymm14 +vpshufb 448(%rsp), %ymm15, %ymm15 +vpshufb 448(%rsp), %ymm12, %ymm12 +vpaddd %ymm10, %ymm15, %ymm10 +vpaddd %ymm11, %ymm12, %ymm11 +vpshufb 448(%rsp), %ymm13, %ymm13 +vpshufb 448(%rsp), %ymm14, %ymm14 +vpaddd %ymm8, %ymm13, %ymm8 +vpaddd %ymm9, %ymm14, %ymm9 +vmovdqa %ymm15, 96(%rsp) +vpxor %ymm5, %ymm10, %ymm5 +vpxor %ymm6, %ymm11, %ymm6 +vpslld $ 12, %ymm5, %ymm15 +vpsrld $20, %ymm5, %ymm5 +vpxor %ymm5, %ymm15, %ymm5 +vpslld $ 12, %ymm6, %ymm15 +vpsrld $20, %ymm6, %ymm6 +vpxor %ymm6, %ymm15, %ymm6 +vpxor %ymm7, %ymm8, %ymm7 +vpxor %ymm4, %ymm9, %ymm4 +vpslld $ 12, %ymm7, %ymm15 +vpsrld $20, %ymm7, %ymm7 +vpxor %ymm7, %ymm15, %ymm7 +vpslld $ 12, %ymm4, %ymm15 +vpsrld $20, %ymm4, %ymm4 +vpxor %ymm4, %ymm15, %ymm4 +vpaddd %ymm0, %ymm5, %ymm0 +vpaddd %ymm1, %ymm6, %ymm1 +vpxor 96(%rsp), %ymm0, %ymm15 +vpxor %ymm12, %ymm1, %ymm12 +vpaddd %ymm2, %ymm7, %ymm2 +vpaddd %ymm3, %ymm4, %ymm3 +vpxor %ymm13, %ymm2, %ymm13 +vpxor %ymm14, %ymm3, %ymm14 +vpshufb 480(%rsp), %ymm15, %ymm15 +vpshufb 480(%rsp), %ymm12, %ymm12 +vpaddd %ymm10, %ymm15, %ymm10 +vpaddd %ymm11, %ymm12, %ymm11 +vpshufb 480(%rsp), %ymm13, %ymm13 +vpshufb 480(%rsp), %ymm14, %ymm14 +vpaddd %ymm8, %ymm13, %ymm8 +vpaddd %ymm9, %ymm14, %ymm9 +vmovdqa %ymm15, 96(%rsp) +vpxor %ymm5, %ymm10, %ymm5 +vpxor %ymm6, %ymm11, %ymm6 +vpslld $ 7, %ymm5, %ymm15 +vpsrld $25, %ymm5, %ymm5 +vpxor %ymm5, %ymm15, %ymm5 +vpslld $ 7, %ymm6, %ymm15 +vpsrld $25, %ymm6, %ymm6 +vpxor %ymm6, %ymm15, %ymm6 +vpxor %ymm7, %ymm8, %ymm7 +vpxor %ymm4, %ymm9, %ymm4 +vpslld $ 7, %ymm7, %ymm15 +vpsrld $25, %ymm7, %ymm7 +vpxor %ymm7, %ymm15, %ymm7 +vpslld $ 7, %ymm4, %ymm15 +vpsrld $25, %ymm4, %ymm4 +vpxor %ymm4, %ymm15, %ymm4 +vmovdqa 96(%rsp), %ymm15 +subq $2, %rax +jnz chacha_blocks_avx2_mainloop1 +vmovdqa %ymm8, 192(%rsp) +vmovdqa %ymm9, 224(%rsp) +vmovdqa %ymm10, 256(%rsp) +vmovdqa %ymm11, 288(%rsp) +vmovdqa %ymm12, 320(%rsp) +vmovdqa %ymm13, 352(%rsp) +vmovdqa %ymm14, 384(%rsp) +vmovdqa %ymm15, 416(%rsp) +vpbroadcastd 0(%rsp), %ymm8 +vpbroadcastd 4+0(%rsp), %ymm9 +vpbroadcastd 8+0(%rsp), %ymm10 +vpbroadcastd 12+0(%rsp), %ymm11 +vpbroadcastd 16(%rsp), %ymm12 +vpbroadcastd 4+16(%rsp), %ymm13 +vpbroadcastd 8+16(%rsp), %ymm14 +vpbroadcastd 12+16(%rsp), %ymm15 +vpaddd %ymm8, %ymm0, %ymm0 +vpaddd %ymm9, %ymm1, %ymm1 +vpaddd %ymm10, %ymm2, %ymm2 +vpaddd %ymm11, %ymm3, %ymm3 +vpaddd %ymm12, %ymm4, %ymm4 +vpaddd %ymm13, %ymm5, %ymm5 +vpaddd %ymm14, %ymm6, %ymm6 +vpaddd %ymm15, %ymm7, %ymm7 +vpunpckldq %ymm1, %ymm0, %ymm8 +vpunpckldq %ymm3, %ymm2, %ymm9 +vpunpckhdq %ymm1, %ymm0, %ymm12 +vpunpckhdq %ymm3, %ymm2, %ymm13 +vpunpckldq %ymm5, %ymm4, %ymm10 +vpunpckldq %ymm7, %ymm6, %ymm11 +vpunpckhdq %ymm5, %ymm4, %ymm14 +vpunpckhdq %ymm7, %ymm6, %ymm15 +vpunpcklqdq %ymm9, %ymm8, %ymm0 +vpunpcklqdq %ymm11, %ymm10, %ymm1 +vpunpckhqdq %ymm9, %ymm8, %ymm2 +vpunpckhqdq %ymm11, %ymm10, %ymm3 +vpunpcklqdq %ymm13, %ymm12, %ymm4 +vpunpcklqdq %ymm15, %ymm14, %ymm5 +vpunpckhqdq %ymm13, %ymm12, %ymm6 +vpunpckhqdq %ymm15, %ymm14, %ymm7 +vperm2i128 $0x20, %ymm1, %ymm0, %ymm8 +vperm2i128 $0x20, %ymm3, %ymm2, %ymm9 +vperm2i128 $0x31, %ymm1, %ymm0, %ymm12 +vperm2i128 $0x31, %ymm3, %ymm2, %ymm13 +vperm2i128 $0x20, %ymm5, %ymm4, %ymm10 +vperm2i128 $0x20, %ymm7, %ymm6, %ymm11 +vperm2i128 $0x31, %ymm5, %ymm4, %ymm14 +vperm2i128 $0x31, %ymm7, %ymm6, %ymm15 +andq %rsi, %rsi +jz chacha_blocks_avx2_noinput1 +vpxor 0(%rsi), %ymm8, %ymm8 +vpxor 64(%rsi), %ymm9, %ymm9 +vpxor 128(%rsi), %ymm10, %ymm10 +vpxor 192(%rsi), %ymm11, %ymm11 +vpxor 256(%rsi), %ymm12, %ymm12 +vpxor 320(%rsi), %ymm13, %ymm13 +vpxor 384(%rsi), %ymm14, %ymm14 +vpxor 448(%rsi), %ymm15, %ymm15 +vmovdqu %ymm8, 0(%rdx) +vmovdqu %ymm9, 64(%rdx) +vmovdqu %ymm10, 128(%rdx) +vmovdqu %ymm11, 192(%rdx) +vmovdqu %ymm12, 256(%rdx) +vmovdqu %ymm13, 320(%rdx) +vmovdqu %ymm14, 384(%rdx) +vmovdqu %ymm15, 448(%rdx) +vmovdqa 192(%rsp), %ymm0 +vmovdqa 224(%rsp), %ymm1 +vmovdqa 256(%rsp), %ymm2 +vmovdqa 288(%rsp), %ymm3 +vmovdqa 320(%rsp), %ymm4 +vmovdqa 352(%rsp), %ymm5 +vmovdqa 384(%rsp), %ymm6 +vmovdqa 416(%rsp), %ymm7 +vpbroadcastd 32(%rsp), %ymm8 +vpbroadcastd 4+32(%rsp), %ymm9 +vpbroadcastd 8+32(%rsp), %ymm10 +vpbroadcastd 12+32(%rsp), %ymm11 +vmovdqa 128(%rsp), %ymm12 +vmovdqa 160(%rsp), %ymm13 +vpbroadcastd 8+48(%rsp), %ymm14 +vpbroadcastd 12+48(%rsp), %ymm15 +vpaddd %ymm8, %ymm0, %ymm0 +vpaddd %ymm9, %ymm1, %ymm1 +vpaddd %ymm10, %ymm2, %ymm2 +vpaddd %ymm11, %ymm3, %ymm3 +vpaddd %ymm12, %ymm4, %ymm4 +vpaddd %ymm13, %ymm5, %ymm5 +vpaddd %ymm14, %ymm6, %ymm6 +vpaddd %ymm15, %ymm7, %ymm7 +vpunpckldq %ymm1, %ymm0, %ymm8 +vpunpckldq %ymm3, %ymm2, %ymm9 +vpunpckhdq %ymm1, %ymm0, %ymm12 +vpunpckhdq %ymm3, %ymm2, %ymm13 +vpunpckldq %ymm5, %ymm4, %ymm10 +vpunpckldq %ymm7, %ymm6, %ymm11 +vpunpckhdq %ymm5, %ymm4, %ymm14 +vpunpckhdq %ymm7, %ymm6, %ymm15 +vpunpcklqdq %ymm9, %ymm8, %ymm0 +vpunpcklqdq %ymm11, %ymm10, %ymm1 +vpunpckhqdq %ymm9, %ymm8, %ymm2 +vpunpckhqdq %ymm11, %ymm10, %ymm3 +vpunpcklqdq %ymm13, %ymm12, %ymm4 +vpunpcklqdq %ymm15, %ymm14, %ymm5 +vpunpckhqdq %ymm13, %ymm12, %ymm6 +vpunpckhqdq %ymm15, %ymm14, %ymm7 +vperm2i128 $0x20, %ymm1, %ymm0, %ymm8 +vperm2i128 $0x20, %ymm3, %ymm2, %ymm9 +vperm2i128 $0x31, %ymm1, %ymm0, %ymm12 +vperm2i128 $0x31, %ymm3, %ymm2, %ymm13 +vperm2i128 $0x20, %ymm5, %ymm4, %ymm10 +vperm2i128 $0x20, %ymm7, %ymm6, %ymm11 +vperm2i128 $0x31, %ymm5, %ymm4, %ymm14 +vperm2i128 $0x31, %ymm7, %ymm6, %ymm15 +vpxor 32(%rsi), %ymm8, %ymm8 +vpxor 96(%rsi), %ymm9, %ymm9 +vpxor 160(%rsi), %ymm10, %ymm10 +vpxor 224(%rsi), %ymm11, %ymm11 +vpxor 288(%rsi), %ymm12, %ymm12 +vpxor 352(%rsi), %ymm13, %ymm13 +vpxor 416(%rsi), %ymm14, %ymm14 +vpxor 480(%rsi), %ymm15, %ymm15 +vmovdqu %ymm8, 32(%rdx) +vmovdqu %ymm9, 96(%rdx) +vmovdqu %ymm10, 160(%rdx) +vmovdqu %ymm11, 224(%rdx) +vmovdqu %ymm12, 288(%rdx) +vmovdqu %ymm13, 352(%rdx) +vmovdqu %ymm14, 416(%rdx) +vmovdqu %ymm15, 480(%rdx) +addq $512, %rsi +jmp chacha_blocks_avx2_mainloop1_cont +chacha_blocks_avx2_noinput1: +vmovdqu %ymm8, 0(%rdx) +vmovdqu %ymm9, 64(%rdx) +vmovdqu %ymm10, 128(%rdx) +vmovdqu %ymm11, 192(%rdx) +vmovdqu %ymm12, 256(%rdx) +vmovdqu %ymm13, 320(%rdx) +vmovdqu %ymm14, 384(%rdx) +vmovdqu %ymm15, 448(%rdx) +vmovdqa 192(%rsp), %ymm0 +vmovdqa 224(%rsp), %ymm1 +vmovdqa 256(%rsp), %ymm2 +vmovdqa 288(%rsp), %ymm3 +vmovdqa 320(%rsp), %ymm4 +vmovdqa 352(%rsp), %ymm5 +vmovdqa 384(%rsp), %ymm6 +vmovdqa 416(%rsp), %ymm7 +vpbroadcastd 32(%rsp), %ymm8 +vpbroadcastd 4+32(%rsp), %ymm9 +vpbroadcastd 8+32(%rsp), %ymm10 +vpbroadcastd 12+32(%rsp), %ymm11 +vmovdqa 128(%rsp), %ymm12 +vmovdqa 160(%rsp), %ymm13 +vpbroadcastd 8+48(%rsp), %ymm14 +vpbroadcastd 12+48(%rsp), %ymm15 +vpaddd %ymm8, %ymm0, %ymm0 +vpaddd %ymm9, %ymm1, %ymm1 +vpaddd %ymm10, %ymm2, %ymm2 +vpaddd %ymm11, %ymm3, %ymm3 +vpaddd %ymm12, %ymm4, %ymm4 +vpaddd %ymm13, %ymm5, %ymm5 +vpaddd %ymm14, %ymm6, %ymm6 +vpaddd %ymm15, %ymm7, %ymm7 +vpunpckldq %ymm1, %ymm0, %ymm8 +vpunpckldq %ymm3, %ymm2, %ymm9 +vpunpckhdq %ymm1, %ymm0, %ymm12 +vpunpckhdq %ymm3, %ymm2, %ymm13 +vpunpckldq %ymm5, %ymm4, %ymm10 +vpunpckldq %ymm7, %ymm6, %ymm11 +vpunpckhdq %ymm5, %ymm4, %ymm14 +vpunpckhdq %ymm7, %ymm6, %ymm15 +vpunpcklqdq %ymm9, %ymm8, %ymm0 +vpunpcklqdq %ymm11, %ymm10, %ymm1 +vpunpckhqdq %ymm9, %ymm8, %ymm2 +vpunpckhqdq %ymm11, %ymm10, %ymm3 +vpunpcklqdq %ymm13, %ymm12, %ymm4 +vpunpcklqdq %ymm15, %ymm14, %ymm5 +vpunpckhqdq %ymm13, %ymm12, %ymm6 +vpunpckhqdq %ymm15, %ymm14, %ymm7 +vperm2i128 $0x20, %ymm1, %ymm0, %ymm8 +vperm2i128 $0x20, %ymm3, %ymm2, %ymm9 +vperm2i128 $0x31, %ymm1, %ymm0, %ymm12 +vperm2i128 $0x31, %ymm3, %ymm2, %ymm13 +vperm2i128 $0x20, %ymm5, %ymm4, %ymm10 +vperm2i128 $0x20, %ymm7, %ymm6, %ymm11 +vperm2i128 $0x31, %ymm5, %ymm4, %ymm14 +vperm2i128 $0x31, %ymm7, %ymm6, %ymm15 +vmovdqu %ymm8, 32(%rdx) +vmovdqu %ymm9, 96(%rdx) +vmovdqu %ymm10, 160(%rdx) +vmovdqu %ymm11, 224(%rdx) +vmovdqu %ymm12, 288(%rdx) +vmovdqu %ymm13, 352(%rdx) +vmovdqu %ymm14, 416(%rdx) +vmovdqu %ymm15, 480(%rdx) +chacha_blocks_avx2_mainloop1_cont: +addq $512, %rdx +subq $512, %rcx +cmp $512, %rcx +jae chacha_blocks_avx2_atleast512 +cmp $256, %rcx +jb chacha_blocks_avx2_below256_fixup +chacha_blocks_avx2_atleast256: +movq 48(%rsp), %rax +leaq 1(%rax), %r8 +leaq 2(%rax), %r9 +leaq 3(%rax), %r10 +leaq 4(%rax), %rbx +movl %eax, 128(%rsp) +movl %r8d, 4+128(%rsp) +movl %r9d, 8+128(%rsp) +movl %r10d, 12+128(%rsp) +shrq $32, %rax +shrq $32, %r8 +shrq $32, %r9 +shrq $32, %r10 +movl %eax, 160(%rsp) +movl %r8d, 4+160(%rsp) +movl %r9d, 8+160(%rsp) +movl %r10d, 12+160(%rsp) +movq %rbx, 48(%rsp) +movq 64(%rsp), %rax +vpbroadcastd 0(%rsp), %xmm0 +vpbroadcastd 4+0(%rsp), %xmm1 +vpbroadcastd 8+0(%rsp), %xmm2 +vpbroadcastd 12+0(%rsp), %xmm3 +vpbroadcastd 16(%rsp), %xmm4 +vpbroadcastd 4+16(%rsp), %xmm5 +vpbroadcastd 8+16(%rsp), %xmm6 +vpbroadcastd 12+16(%rsp), %xmm7 +vpbroadcastd 32(%rsp), %xmm8 +vpbroadcastd 4+32(%rsp), %xmm9 +vpbroadcastd 8+32(%rsp), %xmm10 +vpbroadcastd 12+32(%rsp), %xmm11 +vmovdqa 128(%rsp), %xmm12 +vmovdqa 160(%rsp), %xmm13 +vpbroadcastd 8+48(%rsp), %xmm14 +vpbroadcastd 12+48(%rsp), %xmm15 +chacha_blocks_avx2_mainloop2: +vpaddd %xmm0, %xmm4, %xmm0 +vpaddd %xmm1, %xmm5, %xmm1 +vpxor %xmm12, %xmm0, %xmm12 +vpxor %xmm13, %xmm1, %xmm13 +vpaddd %xmm2, %xmm6, %xmm2 +vpaddd %xmm3, %xmm7, %xmm3 +vpxor %xmm14, %xmm2, %xmm14 +vpxor %xmm15, %xmm3, %xmm15 +vpshufb 448(%rsp), %xmm12, %xmm12 +vpshufb 448(%rsp), %xmm13, %xmm13 +vpaddd %xmm8, %xmm12, %xmm8 +vpaddd %xmm9, %xmm13, %xmm9 +vpshufb 448(%rsp), %xmm14, %xmm14 +vpshufb 448(%rsp), %xmm15, %xmm15 +vpaddd %xmm10, %xmm14, %xmm10 +vpaddd %xmm11, %xmm15, %xmm11 +vmovdqa %xmm12, 96(%rsp) +vpxor %xmm4, %xmm8, %xmm4 +vpxor %xmm5, %xmm9, %xmm5 +vpslld $ 12, %xmm4, %xmm12 +vpsrld $20, %xmm4, %xmm4 +vpxor %xmm4, %xmm12, %xmm4 +vpslld $ 12, %xmm5, %xmm12 +vpsrld $20, %xmm5, %xmm5 +vpxor %xmm5, %xmm12, %xmm5 +vpxor %xmm6, %xmm10, %xmm6 +vpxor %xmm7, %xmm11, %xmm7 +vpslld $ 12, %xmm6, %xmm12 +vpsrld $20, %xmm6, %xmm6 +vpxor %xmm6, %xmm12, %xmm6 +vpslld $ 12, %xmm7, %xmm12 +vpsrld $20, %xmm7, %xmm7 +vpxor %xmm7, %xmm12, %xmm7 +vpaddd %xmm0, %xmm4, %xmm0 +vpaddd %xmm1, %xmm5, %xmm1 +vpxor 96(%rsp), %xmm0, %xmm12 +vpxor %xmm13, %xmm1, %xmm13 +vpaddd %xmm2, %xmm6, %xmm2 +vpaddd %xmm3, %xmm7, %xmm3 +vpxor %xmm14, %xmm2, %xmm14 +vpxor %xmm15, %xmm3, %xmm15 +vpshufb 480(%rsp), %xmm12, %xmm12 +vpshufb 480(%rsp), %xmm13, %xmm13 +vpaddd %xmm8, %xmm12, %xmm8 +vpaddd %xmm9, %xmm13, %xmm9 +vpshufb 480(%rsp), %xmm14, %xmm14 +vpshufb 480(%rsp), %xmm15, %xmm15 +vpaddd %xmm10, %xmm14, %xmm10 +vpaddd %xmm11, %xmm15, %xmm11 +vmovdqa %xmm12, 96(%rsp) +vpxor %xmm4, %xmm8, %xmm4 +vpxor %xmm5, %xmm9, %xmm5 +vpslld $ 7, %xmm4, %xmm12 +vpsrld $25, %xmm4, %xmm4 +vpxor %xmm4, %xmm12, %xmm4 +vpslld $ 7, %xmm5, %xmm12 +vpsrld $25, %xmm5, %xmm5 +vpxor %xmm5, %xmm12, %xmm5 +vpxor %xmm6, %xmm10, %xmm6 +vpxor %xmm7, %xmm11, %xmm7 +vpslld $ 7, %xmm6, %xmm12 +vpsrld $25, %xmm6, %xmm6 +vpxor %xmm6, %xmm12, %xmm6 +vpslld $ 7, %xmm7, %xmm12 +vpsrld $25, %xmm7, %xmm7 +vpxor %xmm7, %xmm12, %xmm7 +vpaddd %xmm0, %xmm5, %xmm0 +vpaddd %xmm1, %xmm6, %xmm1 +vpxor %xmm15, %xmm0, %xmm15 +vpxor 96(%rsp), %xmm1, %xmm12 +vpaddd %xmm2, %xmm7, %xmm2 +vpaddd %xmm3, %xmm4, %xmm3 +vpxor %xmm13, %xmm2, %xmm13 +vpxor %xmm14, %xmm3, %xmm14 +vpshufb 448(%rsp), %xmm15, %xmm15 +vpshufb 448(%rsp), %xmm12, %xmm12 +vpaddd %xmm10, %xmm15, %xmm10 +vpaddd %xmm11, %xmm12, %xmm11 +vpshufb 448(%rsp), %xmm13, %xmm13 +vpshufb 448(%rsp), %xmm14, %xmm14 +vpaddd %xmm8, %xmm13, %xmm8 +vpaddd %xmm9, %xmm14, %xmm9 +vmovdqa %xmm15, 96(%rsp) +vpxor %xmm5, %xmm10, %xmm5 +vpxor %xmm6, %xmm11, %xmm6 +vpslld $ 12, %xmm5, %xmm15 +vpsrld $20, %xmm5, %xmm5 +vpxor %xmm5, %xmm15, %xmm5 +vpslld $ 12, %xmm6, %xmm15 +vpsrld $20, %xmm6, %xmm6 +vpxor %xmm6, %xmm15, %xmm6 +vpxor %xmm7, %xmm8, %xmm7 +vpxor %xmm4, %xmm9, %xmm4 +vpslld $ 12, %xmm7, %xmm15 +vpsrld $20, %xmm7, %xmm7 +vpxor %xmm7, %xmm15, %xmm7 +vpslld $ 12, %xmm4, %xmm15 +vpsrld $20, %xmm4, %xmm4 +vpxor %xmm4, %xmm15, %xmm4 +vpaddd %xmm0, %xmm5, %xmm0 +vpaddd %xmm1, %xmm6, %xmm1 +vpxor 96(%rsp), %xmm0, %xmm15 +vpxor %xmm12, %xmm1, %xmm12 +vpaddd %xmm2, %xmm7, %xmm2 +vpaddd %xmm3, %xmm4, %xmm3 +vpxor %xmm13, %xmm2, %xmm13 +vpxor %xmm14, %xmm3, %xmm14 +vpshufb 480(%rsp), %xmm15, %xmm15 +vpshufb 480(%rsp), %xmm12, %xmm12 +vpaddd %xmm10, %xmm15, %xmm10 +vpaddd %xmm11, %xmm12, %xmm11 +vpshufb 480(%rsp), %xmm13, %xmm13 +vpshufb 480(%rsp), %xmm14, %xmm14 +vpaddd %xmm8, %xmm13, %xmm8 +vpaddd %xmm9, %xmm14, %xmm9 +vmovdqa %xmm15, 96(%rsp) +vpxor %xmm5, %xmm10, %xmm5 +vpxor %xmm6, %xmm11, %xmm6 +vpslld $ 7, %xmm5, %xmm15 +vpsrld $25, %xmm5, %xmm5 +vpxor %xmm5, %xmm15, %xmm5 +vpslld $ 7, %xmm6, %xmm15 +vpsrld $25, %xmm6, %xmm6 +vpxor %xmm6, %xmm15, %xmm6 +vpxor %xmm7, %xmm8, %xmm7 +vpxor %xmm4, %xmm9, %xmm4 +vpslld $ 7, %xmm7, %xmm15 +vpsrld $25, %xmm7, %xmm7 +vpxor %xmm7, %xmm15, %xmm7 +vpslld $ 7, %xmm4, %xmm15 +vpsrld $25, %xmm4, %xmm4 +vpxor %xmm4, %xmm15, %xmm4 +vmovdqa 96(%rsp), %xmm15 +subq $2, %rax +jnz chacha_blocks_avx2_mainloop2 +vmovdqa %xmm8, 192(%rsp) +vmovdqa %xmm9, 208(%rsp) +vmovdqa %xmm10, 224(%rsp) +vmovdqa %xmm11, 240(%rsp) +vmovdqa %xmm12, 256(%rsp) +vmovdqa %xmm13, 272(%rsp) +vmovdqa %xmm14, 288(%rsp) +vmovdqa %xmm15, 304(%rsp) +vpbroadcastd 0(%rsp), %xmm8 +vpbroadcastd 4+0(%rsp), %xmm9 +vpbroadcastd 8+0(%rsp), %xmm10 +vpbroadcastd 12+0(%rsp), %xmm11 +vpbroadcastd 16(%rsp), %xmm12 +vpbroadcastd 4+16(%rsp), %xmm13 +vpbroadcastd 8+16(%rsp), %xmm14 +vpbroadcastd 12+16(%rsp), %xmm15 +vpaddd %xmm8, %xmm0, %xmm0 +vpaddd %xmm9, %xmm1, %xmm1 +vpaddd %xmm10, %xmm2, %xmm2 +vpaddd %xmm11, %xmm3, %xmm3 +vpaddd %xmm12, %xmm4, %xmm4 +vpaddd %xmm13, %xmm5, %xmm5 +vpaddd %xmm14, %xmm6, %xmm6 +vpaddd %xmm15, %xmm7, %xmm7 +vpunpckldq %xmm1, %xmm0, %xmm8 +vpunpckldq %xmm3, %xmm2, %xmm9 +vpunpckhdq %xmm1, %xmm0, %xmm12 +vpunpckhdq %xmm3, %xmm2, %xmm13 +vpunpckldq %xmm5, %xmm4, %xmm10 +vpunpckldq %xmm7, %xmm6, %xmm11 +vpunpckhdq %xmm5, %xmm4, %xmm14 +vpunpckhdq %xmm7, %xmm6, %xmm15 +vpunpcklqdq %xmm9, %xmm8, %xmm0 +vpunpcklqdq %xmm11, %xmm10, %xmm1 +vpunpckhqdq %xmm9, %xmm8, %xmm2 +vpunpckhqdq %xmm11, %xmm10, %xmm3 +vpunpcklqdq %xmm13, %xmm12, %xmm4 +vpunpcklqdq %xmm15, %xmm14, %xmm5 +vpunpckhqdq %xmm13, %xmm12, %xmm6 +vpunpckhqdq %xmm15, %xmm14, %xmm7 +andq %rsi, %rsi +jz chacha_blocks_avx2_noinput2 +vpxor 0(%rsi), %xmm0, %xmm0 +vpxor 16(%rsi), %xmm1, %xmm1 +vpxor 64(%rsi), %xmm2, %xmm2 +vpxor 80(%rsi), %xmm3, %xmm3 +vpxor 128(%rsi), %xmm4, %xmm4 +vpxor 144(%rsi), %xmm5, %xmm5 +vpxor 192(%rsi), %xmm6, %xmm6 +vpxor 208(%rsi), %xmm7, %xmm7 +vmovdqu %xmm0, 0(%rdx) +vmovdqu %xmm1, 16(%rdx) +vmovdqu %xmm2, 64(%rdx) +vmovdqu %xmm3, 80(%rdx) +vmovdqu %xmm4, 128(%rdx) +vmovdqu %xmm5, 144(%rdx) +vmovdqu %xmm6, 192(%rdx) +vmovdqu %xmm7, 208(%rdx) +vmovdqa 192(%rsp), %xmm0 +vmovdqa 208(%rsp), %xmm1 +vmovdqa 224(%rsp), %xmm2 +vmovdqa 240(%rsp), %xmm3 +vmovdqa 256(%rsp), %xmm4 +vmovdqa 272(%rsp), %xmm5 +vmovdqa 288(%rsp), %xmm6 +vmovdqa 304(%rsp), %xmm7 +vpbroadcastd 32(%rsp), %xmm8 +vpbroadcastd 4+32(%rsp), %xmm9 +vpbroadcastd 8+32(%rsp), %xmm10 +vpbroadcastd 12+32(%rsp), %xmm11 +vmovdqa 128(%rsp), %xmm12 +vmovdqa 160(%rsp), %xmm13 +vpbroadcastd 8+48(%rsp), %xmm14 +vpbroadcastd 12+48(%rsp), %xmm15 +vpaddd %xmm8, %xmm0, %xmm0 +vpaddd %xmm9, %xmm1, %xmm1 +vpaddd %xmm10, %xmm2, %xmm2 +vpaddd %xmm11, %xmm3, %xmm3 +vpaddd %xmm12, %xmm4, %xmm4 +vpaddd %xmm13, %xmm5, %xmm5 +vpaddd %xmm14, %xmm6, %xmm6 +vpaddd %xmm15, %xmm7, %xmm7 +vpunpckldq %xmm1, %xmm0, %xmm8 +vpunpckldq %xmm3, %xmm2, %xmm9 +vpunpckhdq %xmm1, %xmm0, %xmm12 +vpunpckhdq %xmm3, %xmm2, %xmm13 +vpunpckldq %xmm5, %xmm4, %xmm10 +vpunpckldq %xmm7, %xmm6, %xmm11 +vpunpckhdq %xmm5, %xmm4, %xmm14 +vpunpckhdq %xmm7, %xmm6, %xmm15 +vpunpcklqdq %xmm9, %xmm8, %xmm0 +vpunpcklqdq %xmm11, %xmm10, %xmm1 +vpunpckhqdq %xmm9, %xmm8, %xmm2 +vpunpckhqdq %xmm11, %xmm10, %xmm3 +vpunpcklqdq %xmm13, %xmm12, %xmm4 +vpunpcklqdq %xmm15, %xmm14, %xmm5 +vpunpckhqdq %xmm13, %xmm12, %xmm6 +vpunpckhqdq %xmm15, %xmm14, %xmm7 +vpxor 32(%rsi), %xmm0, %xmm0 +vpxor 48(%rsi), %xmm1, %xmm1 +vpxor 96(%rsi), %xmm2, %xmm2 +vpxor 112(%rsi), %xmm3, %xmm3 +vpxor 160(%rsi), %xmm4, %xmm4 +vpxor 176(%rsi), %xmm5, %xmm5 +vpxor 224(%rsi), %xmm6, %xmm6 +vpxor 240(%rsi), %xmm7, %xmm7 +vmovdqu %xmm0, 32(%rdx) +vmovdqu %xmm1, 48(%rdx) +vmovdqu %xmm2, 96(%rdx) +vmovdqu %xmm3, 112(%rdx) +vmovdqu %xmm4, 160(%rdx) +vmovdqu %xmm5, 176(%rdx) +vmovdqu %xmm6, 224(%rdx) +vmovdqu %xmm7, 240(%rdx) +addq $256, %rsi +jmp chacha_blocks_avx2_mainloop2_cont +chacha_blocks_avx2_noinput2: +vmovdqu %xmm0, 0(%rdx) +vmovdqu %xmm1, 16(%rdx) +vmovdqu %xmm2, 64(%rdx) +vmovdqu %xmm3, 80(%rdx) +vmovdqu %xmm4, 128(%rdx) +vmovdqu %xmm5, 144(%rdx) +vmovdqu %xmm6, 192(%rdx) +vmovdqu %xmm7, 208(%rdx) +vmovdqa 192(%rsp), %xmm0 +vmovdqa 208(%rsp), %xmm1 +vmovdqa 224(%rsp), %xmm2 +vmovdqa 240(%rsp), %xmm3 +vmovdqa 256(%rsp), %xmm4 +vmovdqa 272(%rsp), %xmm5 +vmovdqa 288(%rsp), %xmm6 +vmovdqa 304(%rsp), %xmm7 +vpbroadcastd 32(%rsp), %xmm8 +vpbroadcastd 4+32(%rsp), %xmm9 +vpbroadcastd 8+32(%rsp), %xmm10 +vpbroadcastd 12+32(%rsp), %xmm11 +vmovdqa 128(%rsp), %xmm12 +vmovdqa 160(%rsp), %xmm13 +vpbroadcastd 8+48(%rsp), %xmm14 +vpbroadcastd 12+48(%rsp), %xmm15 +vpaddd %xmm8, %xmm0, %xmm0 +vpaddd %xmm9, %xmm1, %xmm1 +vpaddd %xmm10, %xmm2, %xmm2 +vpaddd %xmm11, %xmm3, %xmm3 +vpaddd %xmm12, %xmm4, %xmm4 +vpaddd %xmm13, %xmm5, %xmm5 +vpaddd %xmm14, %xmm6, %xmm6 +vpaddd %xmm15, %xmm7, %xmm7 +vpunpckldq %xmm1, %xmm0, %xmm8 +vpunpckldq %xmm3, %xmm2, %xmm9 +vpunpckhdq %xmm1, %xmm0, %xmm12 +vpunpckhdq %xmm3, %xmm2, %xmm13 +vpunpckldq %xmm5, %xmm4, %xmm10 +vpunpckldq %xmm7, %xmm6, %xmm11 +vpunpckhdq %xmm5, %xmm4, %xmm14 +vpunpckhdq %xmm7, %xmm6, %xmm15 +vpunpcklqdq %xmm9, %xmm8, %xmm0 +vpunpcklqdq %xmm11, %xmm10, %xmm1 +vpunpckhqdq %xmm9, %xmm8, %xmm2 +vpunpckhqdq %xmm11, %xmm10, %xmm3 +vpunpcklqdq %xmm13, %xmm12, %xmm4 +vpunpcklqdq %xmm15, %xmm14, %xmm5 +vpunpckhqdq %xmm13, %xmm12, %xmm6 +vpunpckhqdq %xmm15, %xmm14, %xmm7 +vmovdqu %xmm0, 32(%rdx) +vmovdqu %xmm1, 48(%rdx) +vmovdqu %xmm2, 96(%rdx) +vmovdqu %xmm3, 112(%rdx) +vmovdqu %xmm4, 160(%rdx) +vmovdqu %xmm5, 176(%rdx) +vmovdqu %xmm6, 224(%rdx) +vmovdqu %xmm7, 240(%rdx) +chacha_blocks_avx2_mainloop2_cont: +addq $256, %rdx +subq $256, %rcx +cmp $256, %rcx +jae chacha_blocks_avx2_atleast256 +chacha_blocks_avx2_below256_fixup: +vmovdqa 448(%rsp), %xmm6 +vmovdqa 480(%rsp), %xmm7 +vmovdqa 0(%rsp), %xmm8 +vmovdqa 16(%rsp), %xmm9 +vmovdqa 32(%rsp), %xmm10 +vmovdqa 48(%rsp), %xmm11 +movq $1, %r9 +chacha_blocks_avx2_below256: +vmovq %r9, %xmm5 +andq %rcx, %rcx +jz chacha_blocks_avx2_done +cmpq $64, %rcx +jae chacha_blocks_avx2_above63 +movq %rdx, %r9 +andq %rsi, %rsi +jz chacha_blocks_avx2_noinput3 +movq %rcx, %r10 +movq %rsp, %rdx +addq %r10, %rsi +addq %r10, %rdx +negq %r10 +chacha_blocks_avx2_copyinput: +movb (%rsi, %r10), %al +movb %al, (%rdx, %r10) +incq %r10 +jnz chacha_blocks_avx2_copyinput +movq %rsp, %rsi +chacha_blocks_avx2_noinput3: +movq %rsp, %rdx +chacha_blocks_avx2_above63: +vmovdqa %xmm8, %xmm0 +vmovdqa %xmm9, %xmm1 +vmovdqa %xmm10, %xmm2 +vmovdqa %xmm11, %xmm3 +movq 64(%rsp), %rax +chacha_blocks_avx2_mainloop3: +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm6, %xmm3, %xmm3 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpslld $12, %xmm1, %xmm4 +vpsrld $20, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm7, %xmm3, %xmm3 +vpshufd $0x93, %xmm0, %xmm0 +vpaddd %xmm2, %xmm3, %xmm2 +vpshufd $0x4e, %xmm3, %xmm3 +vpxor %xmm1, %xmm2, %xmm1 +vpshufd $0x39, %xmm2, %xmm2 +vpslld $7, %xmm1, %xmm4 +vpsrld $25, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm6, %xmm3, %xmm3 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpslld $12, %xmm1, %xmm4 +vpsrld $20, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm7, %xmm3, %xmm3 +vpshufd $0x39, %xmm0, %xmm0 +vpaddd %xmm2, %xmm3, %xmm2 +vpshufd $0x4e, %xmm3, %xmm3 +vpxor %xmm1, %xmm2, %xmm1 +vpshufd $0x93, %xmm2, %xmm2 +vpslld $7, %xmm1, %xmm4 +vpsrld $25, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +subq $2, %rax +jnz chacha_blocks_avx2_mainloop3 +vpaddd %xmm0, %xmm8, %xmm0 +vpaddd %xmm1, %xmm9, %xmm1 +vpaddd %xmm2, %xmm10, %xmm2 +vpaddd %xmm3, %xmm11, %xmm3 +andq %rsi, %rsi +jz chacha_blocks_avx2_noinput4 +vpxor 0(%rsi), %xmm0, %xmm0 +vpxor 16(%rsi), %xmm1, %xmm1 +vpxor 32(%rsi), %xmm2, %xmm2 +vpxor 48(%rsi), %xmm3, %xmm3 +addq $64, %rsi +chacha_blocks_avx2_noinput4: +vmovdqu %xmm0, 0(%rdx) +vmovdqu %xmm1, 16(%rdx) +vmovdqu %xmm2, 32(%rdx) +vmovdqu %xmm3, 48(%rdx) +vpaddq %xmm11, %xmm5, %xmm11 +cmpq $64, %rcx +jbe chacha_blocks_avx2_mainloop3_finishup +addq $64, %rdx +subq $64, %rcx +jmp chacha_blocks_avx2_below256 +chacha_blocks_avx2_mainloop3_finishup: +cmpq $64, %rcx +je chacha_blocks_avx2_done +addq %rcx, %r9 +addq %rcx, %rdx +negq %rcx +chacha_blocks_avx2_copyoutput: +movb (%rdx, %rcx), %al +movb %al, (%r9, %rcx) +incq %rcx +jnz chacha_blocks_avx2_copyoutput +chacha_blocks_avx2_done: +vmovdqa %xmm11, 32(%rdi) +movq %rbp, %rsp +popq %r14 +popq %r13 +popq %r12 +popq %rbp +popq %rbx +vzeroupper +ret +ENDFN(chacha_blocks) + + +FN(hchacha) +hchacha_avx2_local: +leaq C(%rip), %rax +vmovdqa 0(%rax), %xmm0 +vmovdqa 16(%rax), %xmm6 +vmovdqa 32(%rax), %xmm5 +vmovdqu 0(%rdi), %xmm1 +vmovdqu 16(%rdi), %xmm2 +vmovdqu 0(%rsi), %xmm3 +hhacha_mainloop_avx2: +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm6, %xmm3, %xmm3 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpslld $12, %xmm1, %xmm4 +vpsrld $20, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm5, %xmm3, %xmm3 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpslld $7, %xmm1, %xmm4 +vpsrld $25, %xmm1, %xmm1 +vpshufd $0x93, %xmm0, %xmm0 +vpxor %xmm1, %xmm4, %xmm1 +vpshufd $0x4e, %xmm3, %xmm3 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm6, %xmm3, %xmm3 +vpshufd $0x39, %xmm2, %xmm2 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpslld $12, %xmm1, %xmm4 +vpsrld $20, %xmm1, %xmm1 +vpxor %xmm1, %xmm4, %xmm1 +vpaddd %xmm0, %xmm1, %xmm0 +vpxor %xmm3, %xmm0, %xmm3 +vpshufb %xmm5, %xmm3, %xmm3 +vpaddd %xmm2, %xmm3, %xmm2 +vpxor %xmm1, %xmm2, %xmm1 +vpshufd $0x39, %xmm0, %xmm0 +vpslld $7, %xmm1, %xmm4 +vpshufd $0x4e, %xmm3, %xmm3 +vpsrld $25, %xmm1, %xmm1 +vpshufd $0x93, %xmm2, %xmm2 +vpxor %xmm1, %xmm4, %xmm1 +subl $2, %ecx +jne hhacha_mainloop_avx2 +vmovdqu %xmm0, (%rdx) +vmovdqu %xmm3, 16(%rdx) +ret +ENDFN(hchacha) + +FN(chacha) +pushq %rbp +movq %rsp, %rbp +subq $64, %rsp +andq $~63, %rsp +vmovdqu 0(%rdi), %xmm0 +vmovdqu 16(%rdi), %xmm1 +vmovdqa %xmm0, 0(%rsp) +vmovdqa %xmm1, 16(%rsp) +xorq %rdi, %rdi +movq %rdi, 32(%rsp) +movq 0(%rsi), %rsi +movq %rsi, 40(%rsp) +movq %r9, 48(%rsp) +movq %rsp, %rdi +movq %rdx, %rsi +movq %rcx, %rdx +movq %r8, %rcx +call chacha_blocks_avx2_local +vpxor %xmm0, %xmm0, %xmm0 +vmovdqa %xmm0, 0(%rsp) +vmovdqa %xmm0, 16(%rsp) +vmovdqa %xmm0, 32(%rsp) +movq %rbp, %rsp +popq %rbp +ret +ENDFN(chacha) + +FN(xchacha) +pushq %rbp +pushq %rbx +movq %rsp, %rbp +subq $64, %rsp +andq $~63, %rsp +movq %rsp, %rbx +xorq %rax, %rax +movq %rax, 32(%rbx) +movq 16(%rsi), %rax +movq %rax, 40(%rbx) +movq %r9, 48(%rbx) +pushq %rdx +pushq %rcx +pushq %r8 +movq %rbx, %rdx +movq %r9, %rcx +call hchacha_avx2_local +movq %rbx, %rdi +popq %rcx +popq %rdx +popq %rsi +call chacha_blocks_avx2_local +vpxor %xmm0, %xmm0, %xmm0 +vmovdqa %xmm0, 0(%rbx) +vmovdqa %xmm0, 16(%rbx) +vmovdqa %xmm0, 32(%rbx) +movq %rbp, %rsp +popq %rbx +popq %rbp +ret +ENDFN(xchacha) + + +.section .rodata, "a" +.p2align 4,,15 +C: +.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 /* "expand 32-byte k" */ +.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 /* pshufb rotate by 16 */ +.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 /* pshufb rotate by 8 */ + +ENDFILE() diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/consts.c b/crypt/liboqs/kex_rlwe_newhope/avx2/consts.c new file mode 100644 index 0000000000000000000000000000000000000000..96c7d36a6c8b6a1ff479a7ce97f7c4f06ff3c9c2 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/consts.c @@ -0,0 +1,19 @@ +#include <stdint.h> +#include "params.h" + +uint8_t mask1[32] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; +uint32_t vrshiftsx8[8] = {0,1,2,3,4,5,6,7}; +uint32_t maskffff[8] = {0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff}; +uint16_t maskff[16] = {0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff}; + +double q8[4] = {PARAM_Q, PARAM_Q, PARAM_Q, PARAM_Q}; +uint32_t q8x[8] = {PARAM_Q, PARAM_Q, PARAM_Q, PARAM_Q, PARAM_Q, PARAM_Q, PARAM_Q, PARAM_Q}; +uint32_t v1x8[8] = {1,1,1,1,1,1,1,1}; +uint32_t v3x8[8] = {3,3,3,3,3,3,3,3}; +uint32_t v2730x8[8] = {2730,2730,2730,2730,2730,2730,2730,2730}; + + +double qinv16[4] = {.00008137358613394092,.00008137358613394092,.00008137358613394092,.00008137358613394092}; +double neg2[4] = {1.,-1.,1.,-1.}; +double neg4[4] = {1.,1.,-1.,-1.}; + diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/cpucycles.c b/crypt/liboqs/kex_rlwe_newhope/avx2/cpucycles.c new file mode 100644 index 0000000000000000000000000000000000000000..e6803cef5d2f16d06ed1e303a369dda4ea5cfb08 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/cpucycles.c @@ -0,0 +1,9 @@ +#include "cpucycles.h" + +long long cpucycles(void) +{ + unsigned long long result; + asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" + : "=a" (result) :: "%rdx"); + return result; +} diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/cpucycles.h b/crypt/liboqs/kex_rlwe_newhope/avx2/cpucycles.h new file mode 100644 index 0000000000000000000000000000000000000000..7aac8a45d05105cc8b426415fd5938140caa0217 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/cpucycles.h @@ -0,0 +1,6 @@ +#ifndef CPUCYCLES_H +#define CPUCYCLES_H + +long long cpucycles(void); + +#endif diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_hash_sha256.c b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_hash_sha256.c new file mode 100644 index 0000000000000000000000000000000000000000..48159c22899ab3fc125a66eb7210d17b89ebb455 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_hash_sha256.c @@ -0,0 +1,278 @@ +/* +20080913 +D. J. Bernstein +Public domain. +*/ + +#define blocks crypto_hashblocks_sha256 + +typedef unsigned int uint32; + +static uint32 load_bigendian(const unsigned char *x) +{ + return + (uint32) (x[3]) \ + | (((uint32) (x[2])) << 8) \ + | (((uint32) (x[1])) << 16) \ + | (((uint32) (x[0])) << 24) + ; +} + +static void store_bigendian(unsigned char *x,uint32 u) +{ + x[3] = u; u >>= 8; + x[2] = u; u >>= 8; + x[1] = u; u >>= 8; + x[0] = u; +} + +#define SHR(x,c) ((x) >> (c)) +#define ROTR(x,c) (((x) >> (c)) | ((x) << (32 - (c)))) + +#define Ch(x,y,z) ((x & y) ^ (~x & z)) +#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z)) +#define Sigma0(x) (ROTR(x, 2) ^ ROTR(x,13) ^ ROTR(x,22)) +#define Sigma1(x) (ROTR(x, 6) ^ ROTR(x,11) ^ ROTR(x,25)) +#define sigma0(x) (ROTR(x, 7) ^ ROTR(x,18) ^ SHR(x, 3)) +#define sigma1(x) (ROTR(x,17) ^ ROTR(x,19) ^ SHR(x,10)) + +#define M(w0,w14,w9,w1) w0 = sigma1(w14) + w9 + sigma0(w1) + w0; + +#define EXPAND \ + M(w0 ,w14,w9 ,w1 ) \ + M(w1 ,w15,w10,w2 ) \ + M(w2 ,w0 ,w11,w3 ) \ + M(w3 ,w1 ,w12,w4 ) \ + M(w4 ,w2 ,w13,w5 ) \ + M(w5 ,w3 ,w14,w6 ) \ + M(w6 ,w4 ,w15,w7 ) \ + M(w7 ,w5 ,w0 ,w8 ) \ + M(w8 ,w6 ,w1 ,w9 ) \ + M(w9 ,w7 ,w2 ,w10) \ + M(w10,w8 ,w3 ,w11) \ + M(w11,w9 ,w4 ,w12) \ + M(w12,w10,w5 ,w13) \ + M(w13,w11,w6 ,w14) \ + M(w14,w12,w7 ,w15) \ + M(w15,w13,w8 ,w0 ) + +#define F(w,k) \ + T1 = h + Sigma1(e) + Ch(e,f,g) + k + w; \ + T2 = Sigma0(a) + Maj(a,b,c); \ + h = g; \ + g = f; \ + f = e; \ + e = d + T1; \ + d = c; \ + c = b; \ + b = a; \ + a = T1 + T2; + +static int crypto_hashblocks_sha256(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen) +{ + uint32 state[8]; + uint32 a; + uint32 b; + uint32 c; + uint32 d; + uint32 e; + uint32 f; + uint32 g; + uint32 h; + uint32 T1; + uint32 T2; + + a = load_bigendian(statebytes + 0); state[0] = a; + b = load_bigendian(statebytes + 4); state[1] = b; + c = load_bigendian(statebytes + 8); state[2] = c; + d = load_bigendian(statebytes + 12); state[3] = d; + e = load_bigendian(statebytes + 16); state[4] = e; + f = load_bigendian(statebytes + 20); state[5] = f; + g = load_bigendian(statebytes + 24); state[6] = g; + h = load_bigendian(statebytes + 28); state[7] = h; + + while (inlen >= 64) { + uint32 w0 = load_bigendian(in + 0); + uint32 w1 = load_bigendian(in + 4); + uint32 w2 = load_bigendian(in + 8); + uint32 w3 = load_bigendian(in + 12); + uint32 w4 = load_bigendian(in + 16); + uint32 w5 = load_bigendian(in + 20); + uint32 w6 = load_bigendian(in + 24); + uint32 w7 = load_bigendian(in + 28); + uint32 w8 = load_bigendian(in + 32); + uint32 w9 = load_bigendian(in + 36); + uint32 w10 = load_bigendian(in + 40); + uint32 w11 = load_bigendian(in + 44); + uint32 w12 = load_bigendian(in + 48); + uint32 w13 = load_bigendian(in + 52); + uint32 w14 = load_bigendian(in + 56); + uint32 w15 = load_bigendian(in + 60); + + F(w0 ,0x428a2f98) + F(w1 ,0x71374491) + F(w2 ,0xb5c0fbcf) + F(w3 ,0xe9b5dba5) + F(w4 ,0x3956c25b) + F(w5 ,0x59f111f1) + F(w6 ,0x923f82a4) + F(w7 ,0xab1c5ed5) + F(w8 ,0xd807aa98) + F(w9 ,0x12835b01) + F(w10,0x243185be) + F(w11,0x550c7dc3) + F(w12,0x72be5d74) + F(w13,0x80deb1fe) + F(w14,0x9bdc06a7) + F(w15,0xc19bf174) + + EXPAND + + F(w0 ,0xe49b69c1) + F(w1 ,0xefbe4786) + F(w2 ,0x0fc19dc6) + F(w3 ,0x240ca1cc) + F(w4 ,0x2de92c6f) + F(w5 ,0x4a7484aa) + F(w6 ,0x5cb0a9dc) + F(w7 ,0x76f988da) + F(w8 ,0x983e5152) + F(w9 ,0xa831c66d) + F(w10,0xb00327c8) + F(w11,0xbf597fc7) + F(w12,0xc6e00bf3) + F(w13,0xd5a79147) + F(w14,0x06ca6351) + F(w15,0x14292967) + + EXPAND + + F(w0 ,0x27b70a85) + F(w1 ,0x2e1b2138) + F(w2 ,0x4d2c6dfc) + F(w3 ,0x53380d13) + F(w4 ,0x650a7354) + F(w5 ,0x766a0abb) + F(w6 ,0x81c2c92e) + F(w7 ,0x92722c85) + F(w8 ,0xa2bfe8a1) + F(w9 ,0xa81a664b) + F(w10,0xc24b8b70) + F(w11,0xc76c51a3) + F(w12,0xd192e819) + F(w13,0xd6990624) + F(w14,0xf40e3585) + F(w15,0x106aa070) + + EXPAND + + F(w0 ,0x19a4c116) + F(w1 ,0x1e376c08) + F(w2 ,0x2748774c) + F(w3 ,0x34b0bcb5) + F(w4 ,0x391c0cb3) + F(w5 ,0x4ed8aa4a) + F(w6 ,0x5b9cca4f) + F(w7 ,0x682e6ff3) + F(w8 ,0x748f82ee) + F(w9 ,0x78a5636f) + F(w10,0x84c87814) + F(w11,0x8cc70208) + F(w12,0x90befffa) + F(w13,0xa4506ceb) + F(w14,0xbef9a3f7) + F(w15,0xc67178f2) + + a += state[0]; + b += state[1]; + c += state[2]; + d += state[3]; + e += state[4]; + f += state[5]; + g += state[6]; + h += state[7]; + + state[0] = a; + state[1] = b; + state[2] = c; + state[3] = d; + state[4] = e; + state[5] = f; + state[6] = g; + state[7] = h; + + in += 64; + inlen -= 64; + } + + store_bigendian(statebytes + 0,state[0]); + store_bigendian(statebytes + 4,state[1]); + store_bigendian(statebytes + 8,state[2]); + store_bigendian(statebytes + 12,state[3]); + store_bigendian(statebytes + 16,state[4]); + store_bigendian(statebytes + 20,state[5]); + store_bigendian(statebytes + 24,state[6]); + store_bigendian(statebytes + 28,state[7]); + + return inlen; +} + +static const char iv[32] = { + 0x6a,0x09,0xe6,0x67, + 0xbb,0x67,0xae,0x85, + 0x3c,0x6e,0xf3,0x72, + 0xa5,0x4f,0xf5,0x3a, + 0x51,0x0e,0x52,0x7f, + 0x9b,0x05,0x68,0x8c, + 0x1f,0x83,0xd9,0xab, + 0x5b,0xe0,0xcd,0x19, +} ; + +int crypto_hash_sha256(unsigned char *out,const unsigned char *in,unsigned long long inlen) +{ + unsigned char h[32]; + unsigned char padded[128]; + unsigned long long i; + unsigned long long bits = inlen << 3; + + for (i = 0;i < 32;++i) h[i] = iv[i]; + + blocks(h,in,inlen); + in += inlen; + inlen &= 63; + in -= inlen; + + for (i = 0;i < inlen;++i) padded[i] = in[i]; + padded[inlen] = 0x80; + + if (inlen < 56) { + for (i = inlen + 1;i < 56;++i) padded[i] = 0; + padded[56] = bits >> 56; + padded[57] = bits >> 48; + padded[58] = bits >> 40; + padded[59] = bits >> 32; + padded[60] = bits >> 24; + padded[61] = bits >> 16; + padded[62] = bits >> 8; + padded[63] = bits; + blocks(h,padded,64); + } else { + for (i = inlen + 1;i < 120;++i) padded[i] = 0; + padded[120] = bits >> 56; + padded[121] = bits >> 48; + padded[122] = bits >> 40; + padded[123] = bits >> 32; + padded[124] = bits >> 24; + padded[125] = bits >> 16; + padded[126] = bits >> 8; + padded[127] = bits; + blocks(h,padded,128); + } + + for (i = 0;i < 32;++i) out[i] = h[i]; + + return 0; +} + + + diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_hash_sha256.h b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_hash_sha256.h new file mode 100644 index 0000000000000000000000000000000000000000..4717f0983666cc1c94fc46147144f32bb8355237 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_hash_sha256.h @@ -0,0 +1,10 @@ +#ifndef CRYPTO_HASH_SHA256_H +#define CRYPTO_HASH_SHA256_H + +int crypto_hashblocks_sha256(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen); + +int crypto_hash_sha256(unsigned char *out,const unsigned char *in,unsigned long long inlen); + +#define crypto_hash_sha256_BYTES 32 + +#endif diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream.h b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream.h new file mode 100644 index 0000000000000000000000000000000000000000..aab80525ed832315fab036649d45e38c4e1006eb --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream.h @@ -0,0 +1,16 @@ +#ifndef CRYPTO_STREAM_H +#define CRYPTO_STREAM_H + +#ifdef TESTVECTORS + #include "crypto_stream_chacha20.h" + #define CRYPTO_STREAM_KEYBYTES 32 + #define CRYPTO_STREAM_NONCEBYTES 8 + #define crypto_stream crypto_stream_chacha20 +#else + #include "crypto_stream_aes256ctr.h" + #define CRYPTO_STREAM_KEYBYTES 32 + #define CRYPTO_STREAM_NONCEBYTES 16 + #define crypto_stream crypto_stream_aes256ctr +#endif + +#endif diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.c b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.c new file mode 100644 index 0000000000000000000000000000000000000000..3b745514265ff75858bb07a76567269fe6d7d06c --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.c @@ -0,0 +1,242 @@ +/* + aesenc-int.c version $Date: 2014/08/22 16:49:12 $ + AES-CTR + Romain Dolbeau + Public Domain +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <immintrin.h> +#include "crypto_stream_aes256ctr.h" + +#ifdef __INTEL_COMPILER +#define ALIGN16 __declspec(align(16)) +#define ALIGN32 __declspec(align(32)) +#define ALIGN64 __declspec(align(64)) +#else // assume GCC +#define ALIGN16 __attribute__((aligned(16))) +#define ALIGN32 __attribute__((aligned(32))) +#define ALIGN64 __attribute__((aligned(64))) +#define _bswap64(a) __builtin_bswap64(a) +#define _bswap(a) __builtin_bswap(a) +#endif + +static inline void aesni_key256_expand(const unsigned char* key, __m128 rkeys[16]) { + __m128 key0 = _mm_loadu_ps((const float *)(key+0)); + __m128 key1 = _mm_loadu_ps((const float *)(key+16)); + __m128 temp0, temp1, temp2, temp4; + int idx = 0; + + rkeys[idx++] = key0; + temp0 = key0; + temp2 = key1; + temp4 = _mm_setzero_ps(); + + /* why single precision floating-point rather than integer instructions ? + because _mm_shuffle_ps takes two inputs, while _mm_shuffle_epi32 only + takes one - it doesn't perform the same computation... + _mm_shuffle_ps takes the lower 64 bits of the result from the first + operand, and the higher 64 bits of the result from the second operand + (in both cases, all four input floats are accessible). + I don't like the non-orthogonal naming scheme :-( + + This is all strongly inspired by the openssl assembly code. + */ +#define BLOCK1(IMM) \ + temp1 = (__m128)_mm_aeskeygenassist_si128((__m128i)temp2, IMM); \ + rkeys[idx++] = temp2; \ + temp4 = _mm_shuffle_ps(temp4, temp0, 0x10); \ + temp0 = _mm_xor_ps(temp0, temp4); \ + temp4 = _mm_shuffle_ps(temp4, temp0, 0x8c); \ + temp0 = _mm_xor_ps(temp0, temp4); \ + temp1 = _mm_shuffle_ps(temp1, temp1, 0xff); \ + temp0 = _mm_xor_ps(temp0, temp1) + +#define BLOCK2(IMM) \ + temp1 = (__m128)_mm_aeskeygenassist_si128((__m128i)temp0, IMM); \ + rkeys[idx++] = temp0; \ + temp4 = _mm_shuffle_ps(temp4, temp2, 0x10); \ + temp2 = _mm_xor_ps(temp2, temp4); \ + temp4 = _mm_shuffle_ps(temp4, temp2, 0x8c); \ + temp2 = _mm_xor_ps(temp2, temp4); \ + temp1 = _mm_shuffle_ps(temp1, temp1, 0xaa); \ + temp2 = _mm_xor_ps(temp2, temp1) + + BLOCK1(0x01); + BLOCK2(0x01); + + BLOCK1(0x02); + BLOCK2(0x02); + + BLOCK1(0x04); + BLOCK2(0x04); + + BLOCK1(0x08); + BLOCK2(0x08); + + BLOCK1(0x10); + BLOCK2(0x10); + + BLOCK1(0x20); + BLOCK2(0x20); + + BLOCK1(0x40); + rkeys[idx++] = temp0; +} + +/** single, by-the-book AES encryption with AES-NI */ +static inline void aesni_encrypt1(unsigned char *out, unsigned char *n, __m128i rkeys[16]) { + __m128i nv = _mm_load_si128((const __m128i *)n); + int i; + __m128i temp = _mm_xor_si128(nv, rkeys[0]); +#pragma unroll(13) + for (i = 1 ; i < 14 ; i++) { + temp = _mm_aesenc_si128(temp, rkeys[i]); + } + temp = _mm_aesenclast_si128(temp, rkeys[14]); + _mm_store_si128((__m128i*)(out), temp); +} + +/** increment the 16-bytes nonce ; + this really should be improved somehow... + but it's not yet time-critical, because we + use the vector variant anyway */ +static inline void incle(unsigned char n[16]) { +/* unsigned long long out; */ +/* unsigned char carry; */ + unsigned long long *n_ = (unsigned long long*)n; + n_[1]++; + if (n_[1] == 0) + n_[0] ++; + /* perhaps this will be efficient on broadwell ? */ + /* carry = _addcarry_u64(0, n_[1], 1ULL, &out); */ + /* carry = _addcarry_u64(carry, n_[0], 0ULL, &out); */ +} + +/** multiple-blocks-at-once AES encryption with AES-NI ; + on Haswell, aesenc as a latency of 7 and a througput of 1 + so the sequence of aesenc should be bubble-free, if you + have at least 8 blocks. Let's build an arbitratry-sized + function */ +/* Step 1 : loading the nonce */ +/* load & increment the n vector (non-vectorized, unused for now) */ +#define NVx(a) \ + __m128i nv##a = _mm_shuffle_epi8(_mm_load_si128((const __m128i *)n), _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); incle(n) +/* load the incremented n vector (vectorized, probably buggy) */ +#define NVxV_DEC(a) \ + __m128i nv##a; +#define NVxV_NOWRAP(a) \ + nv##a = _mm_shuffle_epi8(_mm_add_epi64(nv0i, _mm_set_epi64x(a,0)), _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)) +#define NVxV_WRAP(a) \ + __m128i ad##a = _mm_add_epi64(nv0i, _mm_set_epi64x(a,a>=wrapnumber?1:0)); \ + nv##a = _mm_shuffle_epi8(ad##a, _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)) + +/* Step 2 : define value in round one (xor with subkey #0, aka key) */ +#define TEMPx(a) \ + __m128i temp##a = _mm_xor_si128(nv##a, rkeys[0]) + +/* Step 3: one round of AES */ +#define AESENCx(a) \ + temp##a = _mm_aesenc_si128(temp##a, rkeys[i]); + +/* Step 4: last round of AES */ +#define AESENCLASTx(a) \ + temp##a = _mm_aesenclast_si128(temp##a, rkeys[14]); + +/* Step 5: store result */ +#define STOREx(a) \ + _mm_store_si128((__m128i*)(out+(a*16)), temp##a); + +/* all the MAKE* macros are for automatic explicit unrolling */ +#define MAKE4(X) \ + X(0);X(1);X(2);X(3) + +#define MAKE6(X) \ + X(0);X(1);X(2);X(3); \ + X(4);X(5) + +#define MAKE7(X) \ + X(0);X(1);X(2);X(3); \ + X(4);X(5);X(6) + +#define MAKE8(X) \ + X(0);X(1);X(2);X(3); \ + X(4);X(5);X(6);X(7) + +#define MAKE10(X) \ + X(0);X(1);X(2);X(3); \ + X(4);X(5);X(6);X(7); \ + X(8);X(9) + +#define MAKE12(X) \ + X(0);X(1);X(2);X(3); \ + X(4);X(5);X(6);X(7); \ + X(8);X(9);X(10);X(11) + +/* create a function of unrolling N ; the MAKEN is the unrolling + macro, defined above. The N in MAKEN must match N, obviously. */ +#define FUNC(N, MAKEN) \ + static inline void aesni_encrypt##N(unsigned char *out, unsigned char *n, __m128i rkeys[16]) { \ + __m128i nv0i = _mm_load_si128((const __m128i *)n); \ + long long nl = *(long long*)&n[8]; \ + MAKEN(NVxV_DEC); \ + /* check for nonce wraparound */ \ + if ((nl < 0) && (nl + (N-1)) >= 0) { \ + int wrapnumber = (int)(N - (nl+N)); \ + MAKEN(NVxV_WRAP); \ + _mm_storeu_si128((__m128i*)n, _mm_add_epi64(nv0i, _mm_set_epi64x(N,1))); \ + } else { \ + MAKEN(NVxV_NOWRAP); \ + _mm_storeu_si128((__m128i*)n, _mm_add_epi64(nv0i, _mm_set_epi64x(N,0))); \ + } \ + int i; \ + MAKEN(TEMPx); \ + for (i = 1 ; i < 14 ; i++) { \ + MAKEN(AESENCx); \ + } \ + MAKEN(AESENCLASTx); \ + MAKEN(STOREx); \ + } + +/* and now building our unrolled function is trivial */ +FUNC(4, MAKE4) +FUNC(6, MAKE6) +FUNC(7, MAKE7) +FUNC(8, MAKE8) +FUNC(10, MAKE10) +FUNC(12, MAKE12) + +int crypto_stream_aes256ctr( +unsigned char *out, +unsigned long long outlen, +const unsigned char *n, +const unsigned char *k +) +{ + __m128 rkeys[16]; + ALIGN16 unsigned char n2[16]; + unsigned long long i, j; + aesni_key256_expand(k, rkeys); + /* n2 is in byte-reversed (i.e., native little endian) + order to make increment/testing easier */ + (*(unsigned long long*)&n2[8]) = _bswap64((*(unsigned long long*)&n[8])); + (*(unsigned long long*)&n2[0]) = _bswap64((*(unsigned long long*)&n[0])); + +#define LOOP(iter) \ + int lb = iter * 16; \ + for (i = 0 ; i < outlen ; i+= lb) { \ + ALIGN16 unsigned char outni[lb]; \ + aesni_encrypt##iter(outni, n2, (__m128i*)rkeys); \ + unsigned long long mj = lb; \ + if ((i+mj)>=outlen) \ + mj = outlen-i; \ + for (j = 0 ; j < mj ; j++) \ + out[i+j] = outni[j]; \ + } + + LOOP(8); + + return 0; +} diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.h b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.h new file mode 100644 index 0000000000000000000000000000000000000000..9be2881acfd348f66147f437552274944790aed9 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.h @@ -0,0 +1,6 @@ +#ifndef CRYPTO_STREAM_AES256CTR_H +#define CRYPTO_STREAM_AES256CTR_H + +int crypto_stream_aes256ctr(unsigned char *c,unsigned long long clen, const unsigned char *n, const unsigned char *k); + +#endif diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.s b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.s new file mode 100644 index 0000000000000000000000000000000000000000..0b56a2bb6ac540b9b81bae9a81f8bfdeb29f94d7 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.s @@ -0,0 +1,396 @@ + .file "crypto_stream_aes256ctr.c" + .section .text.unlikely,"ax",@progbits +.LCOLDB11: + .text +.LHOTB11: + .p2align 4,,15 + .globl crypto_stream_aes256ctr + .type crypto_stream_aes256ctr, @function +crypto_stream_aes256ctr: +.LFB2248: + .cfi_startproc + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset 6, -16 + vxorps %xmm0, %xmm0, %xmm0 + movq %rsp, %rbp + .cfi_def_cfa_register 6 + pushq %rbx + subq $280, %rsp + .cfi_offset 3, -24 + movq 8(%rdx), %rax + vmovups 16(%rcx), %xmm4 + vmovups (%rcx), %xmm10 + vaeskeygenassist $1, %xmm4, %xmm2 + vmovaps %xmm4, -256(%rbp) + vshufps $255, %xmm2, %xmm2, %xmm2 + bswap %rax + movq %rax, -280(%rbp) + movq (%rdx), %rax + vshufps $16, %xmm10, %xmm0, %xmm0 + vmovaps %xmm10, -272(%rbp) + vxorps %xmm0, %xmm10, %xmm1 + vshufps $140, %xmm1, %xmm0, %xmm0 + vxorps %xmm0, %xmm1, %xmm1 + vshufps $16, %xmm4, %xmm0, %xmm0 + vxorps %xmm2, %xmm1, %xmm2 + vxorps %xmm0, %xmm4, %xmm4 + vaeskeygenassist $1, %xmm2, %xmm3 + vshufps $140, %xmm4, %xmm0, %xmm0 + vshufps $170, %xmm3, %xmm3, %xmm3 + vxorps %xmm0, %xmm4, %xmm4 + vmovaps %xmm2, -240(%rbp) + vxorps %xmm3, %xmm4, %xmm3 + vshufps $16, %xmm2, %xmm0, %xmm0 + bswap %rax + vaeskeygenassist $2, %xmm3, %xmm1 + vxorps %xmm0, %xmm2, %xmm2 + vshufps $255, %xmm1, %xmm1, %xmm1 + vshufps $140, %xmm2, %xmm0, %xmm0 + vmovaps %xmm3, -224(%rbp) + vxorps %xmm0, %xmm2, %xmm2 + movq %rax, -288(%rbp) + vxorps %xmm1, %xmm2, %xmm1 + vshufps $16, %xmm3, %xmm0, %xmm0 + vaeskeygenassist $2, %xmm1, %xmm4 + vxorps %xmm0, %xmm3, %xmm3 + vshufps $170, %xmm4, %xmm4, %xmm4 + vshufps $140, %xmm3, %xmm0, %xmm0 + vmovaps %xmm1, -208(%rbp) + vxorps %xmm0, %xmm3, %xmm3 + vxorps %xmm4, %xmm3, %xmm4 + vshufps $16, %xmm1, %xmm0, %xmm0 + vaeskeygenassist $4, %xmm4, %xmm2 + vxorps %xmm0, %xmm1, %xmm1 + vshufps $255, %xmm2, %xmm2, %xmm2 + vshufps $140, %xmm1, %xmm0, %xmm0 + vmovaps %xmm4, -192(%rbp) + vxorps %xmm0, %xmm1, %xmm1 + vxorps %xmm2, %xmm1, %xmm2 + vshufps $16, %xmm4, %xmm0, %xmm0 + vaeskeygenassist $4, %xmm2, %xmm3 + vxorps %xmm0, %xmm4, %xmm4 + vshufps $170, %xmm3, %xmm3, %xmm3 + vshufps $140, %xmm4, %xmm0, %xmm0 + vmovaps %xmm2, -176(%rbp) + vxorps %xmm0, %xmm4, %xmm4 + vxorps %xmm3, %xmm4, %xmm3 + vshufps $16, %xmm2, %xmm0, %xmm0 + vaeskeygenassist $8, %xmm3, %xmm1 + vxorps %xmm0, %xmm2, %xmm2 + vshufps $255, %xmm1, %xmm1, %xmm1 + vshufps $140, %xmm2, %xmm0, %xmm0 + vmovaps %xmm3, -160(%rbp) + vxorps %xmm0, %xmm2, %xmm2 + vxorps %xmm1, %xmm2, %xmm1 + vshufps $16, %xmm3, %xmm0, %xmm0 + vaeskeygenassist $8, %xmm1, %xmm4 + vxorps %xmm0, %xmm3, %xmm3 + vshufps $170, %xmm4, %xmm4, %xmm4 + vshufps $140, %xmm3, %xmm0, %xmm0 + vmovaps %xmm1, -144(%rbp) + vxorps %xmm0, %xmm3, %xmm3 + vxorps %xmm4, %xmm3, %xmm4 + vshufps $16, %xmm1, %xmm0, %xmm0 + vaeskeygenassist $16, %xmm4, %xmm2 + vxorps %xmm0, %xmm1, %xmm1 + vmovaps %xmm4, -128(%rbp) + vshufps $140, %xmm1, %xmm0, %xmm0 + vshufps $255, %xmm2, %xmm2, %xmm2 + vxorps %xmm0, %xmm1, %xmm1 + vshufps $16, %xmm4, %xmm0, %xmm0 + vxorps %xmm2, %xmm1, %xmm2 + vxorps %xmm0, %xmm4, %xmm4 + vaeskeygenassist $16, %xmm2, %xmm3 + vshufps $140, %xmm4, %xmm0, %xmm0 + vmovaps %xmm2, -112(%rbp) + vxorps %xmm0, %xmm4, %xmm4 + vshufps $170, %xmm3, %xmm3, %xmm3 + vshufps $16, %xmm2, %xmm0, %xmm0 + vxorps %xmm3, %xmm4, %xmm3 + vxorps %xmm0, %xmm2, %xmm2 + vaeskeygenassist $32, %xmm3, %xmm1 + vshufps $140, %xmm2, %xmm0, %xmm0 + vmovaps %xmm3, -96(%rbp) + vxorps %xmm0, %xmm2, %xmm2 + vshufps $255, %xmm1, %xmm1, %xmm1 + vshufps $16, %xmm3, %xmm0, %xmm0 + vxorps %xmm1, %xmm2, %xmm1 + vxorps %xmm0, %xmm3, %xmm3 + vaeskeygenassist $32, %xmm1, %xmm2 + vshufps $140, %xmm3, %xmm0, %xmm0 + vmovaps %xmm1, -80(%rbp) + vxorps %xmm0, %xmm3, %xmm3 + vshufps $170, %xmm2, %xmm2, %xmm2 + vshufps $16, %xmm1, %xmm0, %xmm0 + vxorps %xmm2, %xmm3, %xmm2 + vxorps %xmm0, %xmm1, %xmm1 + vaeskeygenassist $64, %xmm2, %xmm9 + vshufps $140, %xmm1, %xmm0, %xmm0 + vshufps $255, %xmm9, %xmm9, %xmm9 + vxorps %xmm0, %xmm1, %xmm0 + vmovaps %xmm2, -64(%rbp) + vxorps %xmm9, %xmm0, %xmm9 + vmovaps %xmm9, -48(%rbp) + testq %rsi, %rsi + je .L14 + vmovdqa .LC0(%rip), %xmm11 + movq %rsi, %rdx + xorl %r10d, %r10d + vmovdqa .LC3(%rip), %xmm15 + vmovdqa .LC4(%rip), %xmm14 + vmovdqa .LC5(%rip), %xmm13 + vmovdqa .LC6(%rip), %xmm12 + .p2align 4,,10 + .p2align 3 +.L11: + movq -280(%rbp), %r8 + movq %rsp, %rbx + subq $144, %rsp + leaq 15(%rsp), %rcx + vmovdqa -288(%rbp), %xmm8 + andq $-16, %rcx + leaq 7(%r8), %rax + cmpq $6, %rax + ja .L4 + negl %r8d + xorl %eax, %eax + movl $1, %r9d + vpshufb %xmm11, %xmm8, %xmm7 + cmpl $1, %r8d + setle %al + vmovq %rax, %xmm6 + xorl %eax, %eax + cmpl $2, %r8d + setle %al + vpinsrq $1, %r9, %xmm6, %xmm6 + vpaddq %xmm6, %xmm8, %xmm6 + vmovq %rax, %xmm5 + movl $2, %eax + vpshufb %xmm11, %xmm6, %xmm6 + vpinsrq $1, %rax, %xmm5, %xmm5 + xorl %eax, %eax + cmpl $3, %r8d + vpaddq %xmm5, %xmm8, %xmm5 + setle %al + vpshufb %xmm11, %xmm5, %xmm5 + vmovq %rax, %xmm4 + movl $3, %eax + vpinsrq $1, %rax, %xmm4, %xmm4 + xorl %eax, %eax + cmpl $4, %r8d + vpaddq %xmm4, %xmm8, %xmm4 + setle %al + vpshufb %xmm11, %xmm4, %xmm4 + vmovq %rax, %xmm3 + movl $4, %eax + vpinsrq $1, %rax, %xmm3, %xmm3 + xorl %eax, %eax + cmpl $5, %r8d + vpaddq %xmm3, %xmm8, %xmm3 + setle %al + vpshufb %xmm11, %xmm3, %xmm3 + vmovq %rax, %xmm2 + movl $5, %eax + vpinsrq $1, %rax, %xmm2, %xmm2 + xorl %eax, %eax + cmpl $6, %r8d + vpaddq %xmm2, %xmm8, %xmm2 + setle %al + vpshufb %xmm11, %xmm2, %xmm2 + vmovq %rax, %xmm1 + movl $6, %eax + vpinsrq $1, %rax, %xmm1, %xmm0 + vpaddq %xmm0, %xmm8, %xmm0 + vpaddq .LC1(%rip), %xmm8, %xmm1 + vpaddq .LC2(%rip), %xmm8, %xmm8 + vpshufb %xmm11, %xmm0, %xmm0 + vpshufb %xmm11, %xmm1, %xmm1 + vmovups %xmm8, -288(%rbp) +.L5: + vpxor %xmm10, %xmm7, %xmm7 + vpxor %xmm10, %xmm6, %xmm6 + vpxor %xmm10, %xmm5, %xmm5 + vpxor %xmm10, %xmm4, %xmm4 + vpxor %xmm10, %xmm3, %xmm3 + vpxor %xmm10, %xmm2, %xmm2 + vpxor %xmm10, %xmm0, %xmm8 + vpxor %xmm10, %xmm1, %xmm1 + leaq -272(%rbp), %rax + leaq -272(%rbp), %r11 + addq $16, %rax + leaq 224(%r11), %r8 + .p2align 4,,10 + .p2align 3 +.L6: + vmovdqa (%rax), %xmm0 + addq $16, %rax + vaesenc %xmm0, %xmm7, %xmm7 + vaesenc %xmm0, %xmm6, %xmm6 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm4, %xmm4 + vaesenc %xmm0, %xmm3, %xmm3 + vaesenc %xmm0, %xmm2, %xmm2 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm1, %xmm1 + cmpq %r8, %rax + jne .L6 + vaesenclast %xmm9, %xmm7, %xmm7 + leaq 128(%r10), %r8 + vaesenclast %xmm9, %xmm6, %xmm6 + vaesenclast %xmm9, %xmm5, %xmm5 + vaesenclast %xmm9, %xmm4, %xmm4 + vaesenclast %xmm9, %xmm3, %xmm3 + vaesenclast %xmm9, %xmm2, %xmm2 + vaesenclast %xmm9, %xmm8, %xmm0 + vaesenclast %xmm9, %xmm1, %xmm1 + vmovaps %xmm7, (%rcx) + vmovaps %xmm6, 16(%rcx) + vmovaps %xmm5, 32(%rcx) + vmovaps %xmm4, 48(%rcx) + vmovaps %xmm3, 64(%rcx) + vmovaps %xmm2, 80(%rcx) + vmovaps %xmm0, 96(%rcx) + vmovaps %xmm1, 112(%rcx) + cmpq %r8, %rsi + jbe .L18 + movq (%rcx), %r9 + leaq (%rdi,%r10), %rax + addq $-128, %rdx + movq %r8, %r10 + movq %r9, (%rax) + movq 8(%rcx), %r9 + movq %r9, 8(%rax) + movq 16(%rcx), %r9 + movq %r9, 16(%rax) + movq 24(%rcx), %r9 + movq %r9, 24(%rax) + movq 32(%rcx), %r9 + movq %r9, 32(%rax) + movq 40(%rcx), %r9 + movq %r9, 40(%rax) + movq 48(%rcx), %r9 + movq %r9, 48(%rax) + movq 56(%rcx), %r9 + movq %r9, 56(%rax) + movq 64(%rcx), %r9 + movq %r9, 64(%rax) + movq 72(%rcx), %r9 + movq %r9, 72(%rax) + movq 80(%rcx), %r9 + movq %r9, 80(%rax) + movq 88(%rcx), %r9 + movq %r9, 88(%rax) + movq 96(%rcx), %r9 + movq %r9, 96(%rax) + movq 104(%rcx), %r9 + movq %r9, 104(%rax) + movq 112(%rcx), %r9 + movq %r9, 112(%rax) + movq 120(%rcx), %rcx + movq %rcx, 120(%rax) + movq %rbx, %rsp + jmp .L11 + .p2align 4,,10 + .p2align 3 +.L4: + vpaddq %xmm15, %xmm8, %xmm6 + vpaddq %xmm14, %xmm8, %xmm5 + vpshufb %xmm11, %xmm8, %xmm7 + vpaddq %xmm13, %xmm8, %xmm4 + vpaddq %xmm12, %xmm8, %xmm3 + vpshufb %xmm11, %xmm6, %xmm6 + vpshufb %xmm11, %xmm5, %xmm5 + vpaddq .LC7(%rip), %xmm8, %xmm2 + vpshufb %xmm11, %xmm4, %xmm4 + vpshufb %xmm11, %xmm3, %xmm3 + vpaddq .LC8(%rip), %xmm8, %xmm0 + vpaddq .LC9(%rip), %xmm8, %xmm1 + vpaddq .LC10(%rip), %xmm8, %xmm8 + vpshufb %xmm11, %xmm2, %xmm2 + vpshufb %xmm11, %xmm0, %xmm0 + vpshufb %xmm11, %xmm1, %xmm1 + vmovups %xmm8, -288(%rbp) + jmp .L5 +.L18: + testq %rdx, %rdx + je .L16 + addq %r10, %rdi + movq %rcx, %rsi + call memcpy +.L16: + movq %rbx, %rsp +.L14: + xorl %eax, %eax + movq -8(%rbp), %rbx + leave + .cfi_def_cfa 7, 8 + ret + .cfi_endproc +.LFE2248: + .size crypto_stream_aes256ctr, .-crypto_stream_aes256ctr + .section .text.unlikely +.LCOLDE11: + .text +.LHOTE11: + .section .rodata.cst16,"aM",@progbits,16 + .align 16 +.LC0: + .byte 7 + .byte 6 + .byte 5 + .byte 4 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .byte 15 + .byte 14 + .byte 13 + .byte 12 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .align 16 +.LC1: + .quad 1 + .quad 7 + .align 16 +.LC2: + .quad 1 + .quad 8 + .align 16 +.LC3: + .quad 0 + .quad 1 + .align 16 +.LC4: + .quad 0 + .quad 2 + .align 16 +.LC5: + .quad 0 + .quad 3 + .align 16 +.LC6: + .quad 0 + .quad 4 + .align 16 +.LC7: + .quad 0 + .quad 5 + .align 16 +.LC8: + .quad 0 + .quad 6 + .align 16 +.LC9: + .quad 0 + .quad 7 + .align 16 +.LC10: + .quad 0 + .quad 8 + .ident "GCC: (Debian 4.9.2-10) 4.9.2" + .section .note.GNU-stack,"",@progbits diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_chacha20.c b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_chacha20.c new file mode 100644 index 0000000000000000000000000000000000000000..a3ffee1b7ba2c3454dd787e15c18ed5bccf58075 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_chacha20.c @@ -0,0 +1,10 @@ +#include "crypto_stream_chacha20.h" +#include <stddef.h> + +extern void chacha_avx2(const unsigned char *k, const unsigned char *n, const unsigned char *in, unsigned char *out, size_t inlen, size_t rounds); + +int crypto_stream_chacha20(unsigned char *out, unsigned long long outlen, const unsigned char *n, const unsigned char *k) +{ + chacha_avx2(k, n, NULL, out, (size_t)outlen, 20); + return 0; +} diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_chacha20.h b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_chacha20.h new file mode 100644 index 0000000000000000000000000000000000000000..c0cf329052ad0b23ca28578217dfcccbebcb7c7b --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_chacha20.h @@ -0,0 +1,6 @@ +#ifndef CRYPTO_STREAM_CHACHA20 +#define CRYPTO_STREAM_CHACHA20 + +int crypto_stream_chacha20(unsigned char *c,unsigned long long clen, const unsigned char *n, const unsigned char *k); + +#endif diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/error_correction.c b/crypt/liboqs/kex_rlwe_newhope/avx2/error_correction.c new file mode 100644 index 0000000000000000000000000000000000000000..0970e6f754ce2d14383d12a80308f8990518d138 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/error_correction.c @@ -0,0 +1,21 @@ +#include "crypto_stream.h" +#include "error_correction.h" + +//See paper for details on the error reconciliation + +extern void hr(poly *c, const poly *v, unsigned char rand[32]); + +void helprec(poly *c, const poly *v, const unsigned char *seed, unsigned char nonce) +{ + unsigned char rand[32]; + unsigned char n[8]; + int i; + + for(i=0;i<7;i++) + n[i] = 0; + n[7] = nonce; + + crypto_stream(rand,32,n,seed); + + hr(c, v, rand); +} diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/error_correction.h b/crypt/liboqs/kex_rlwe_newhope/avx2/error_correction.h new file mode 100644 index 0000000000000000000000000000000000000000..488c10588dabaa68020bb5bd39afc146831d1a85 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/error_correction.h @@ -0,0 +1,15 @@ +#ifndef ERROR_CORRECTION_H +#define ERROR_CORRECTION_H + +#include "inttypes.h" +#include "params.h" +#include "randombytes.h" +#include "crypto_stream_chacha20.h" +#include "math.h" +#include "poly.h" +#include <stdio.h> + +void helprec(poly *c, const poly *v, const unsigned char *seed, unsigned char nonce); +void rec(unsigned char *key, const poly *v, const poly *c); + +#endif diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/fips202.c b/crypt/liboqs/kex_rlwe_newhope/avx2/fips202.c new file mode 100644 index 0000000000000000000000000000000000000000..f649a7d7ba80b953248d43024a564c0544bec67d --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/fips202.c @@ -0,0 +1,415 @@ +/* Based on the public domain implementation in + * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html + * by Ronny Van Keer + * and the public domain "TweetFips202" implementation + * from https://twitter.com/tweetfips202 + * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe */ + +#include <stdint.h> +#include <assert.h> +#include "fips202.h" + +#define NROUNDS 24 +#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset))) + +static uint64_t load64(const unsigned char *x) +{ + unsigned long long r = 0, i; + + for (i = 0; i < 8; ++i) { + r |= (unsigned long long)x[i] << 8 * i; + } + return r; +} + +static void store64(uint8_t *x, uint64_t u) +{ + unsigned int i; + + for(i=0; i<8; ++i) { + x[i] = u; + u >>= 8; + } +} + +static const uint64_t KeccakF_RoundConstants[NROUNDS] = +{ + (uint64_t)0x0000000000000001ULL, + (uint64_t)0x0000000000008082ULL, + (uint64_t)0x800000000000808aULL, + (uint64_t)0x8000000080008000ULL, + (uint64_t)0x000000000000808bULL, + (uint64_t)0x0000000080000001ULL, + (uint64_t)0x8000000080008081ULL, + (uint64_t)0x8000000000008009ULL, + (uint64_t)0x000000000000008aULL, + (uint64_t)0x0000000000000088ULL, + (uint64_t)0x0000000080008009ULL, + (uint64_t)0x000000008000000aULL, + (uint64_t)0x000000008000808bULL, + (uint64_t)0x800000000000008bULL, + (uint64_t)0x8000000000008089ULL, + (uint64_t)0x8000000000008003ULL, + (uint64_t)0x8000000000008002ULL, + (uint64_t)0x8000000000000080ULL, + (uint64_t)0x000000000000800aULL, + (uint64_t)0x800000008000000aULL, + (uint64_t)0x8000000080008081ULL, + (uint64_t)0x8000000000008080ULL, + (uint64_t)0x0000000080000001ULL, + (uint64_t)0x8000000080008008ULL +}; + +void KeccakF1600_StatePermute(uint64_t * state) +{ + int round; + + uint64_t Aba, Abe, Abi, Abo, Abu; + uint64_t Aga, Age, Agi, Ago, Agu; + uint64_t Aka, Ake, Aki, Ako, Aku; + uint64_t Ama, Ame, Ami, Amo, Amu; + uint64_t Asa, Ase, Asi, Aso, Asu; + uint64_t BCa, BCe, BCi, BCo, BCu; + uint64_t Da, De, Di, Do, Du; + uint64_t Eba, Ebe, Ebi, Ebo, Ebu; + uint64_t Ega, Ege, Egi, Ego, Egu; + uint64_t Eka, Eke, Eki, Eko, Eku; + uint64_t Ema, Eme, Emi, Emo, Emu; + uint64_t Esa, Ese, Esi, Eso, Esu; + + //copyFromState(A, state) + Aba = state[ 0]; + Abe = state[ 1]; + Abi = state[ 2]; + Abo = state[ 3]; + Abu = state[ 4]; + Aga = state[ 5]; + Age = state[ 6]; + Agi = state[ 7]; + Ago = state[ 8]; + Agu = state[ 9]; + Aka = state[10]; + Ake = state[11]; + Aki = state[12]; + Ako = state[13]; + Aku = state[14]; + Ama = state[15]; + Ame = state[16]; + Ami = state[17]; + Amo = state[18]; + Amu = state[19]; + Asa = state[20]; + Ase = state[21]; + Asi = state[22]; + Aso = state[23]; + Asu = state[24]; + + for( round = 0; round < NROUNDS; round += 2 ) + { + // prepareTheta + BCa = Aba^Aga^Aka^Ama^Asa; + BCe = Abe^Age^Ake^Ame^Ase; + BCi = Abi^Agi^Aki^Ami^Asi; + BCo = Abo^Ago^Ako^Amo^Aso; + BCu = Abu^Agu^Aku^Amu^Asu; + + //thetaRhoPiChiIotaPrepareTheta(round , A, E) + Da = BCu^ROL(BCe, 1); + De = BCa^ROL(BCi, 1); + Di = BCe^ROL(BCo, 1); + Do = BCi^ROL(BCu, 1); + Du = BCo^ROL(BCa, 1); + + Aba ^= Da; + BCa = Aba; + Age ^= De; + BCe = ROL(Age, 44); + Aki ^= Di; + BCi = ROL(Aki, 43); + Amo ^= Do; + BCo = ROL(Amo, 21); + Asu ^= Du; + BCu = ROL(Asu, 14); + Eba = BCa ^((~BCe)& BCi ); + Eba ^= (uint64_t)KeccakF_RoundConstants[round]; + Ebe = BCe ^((~BCi)& BCo ); + Ebi = BCi ^((~BCo)& BCu ); + Ebo = BCo ^((~BCu)& BCa ); + Ebu = BCu ^((~BCa)& BCe ); + + Abo ^= Do; + BCa = ROL(Abo, 28); + Agu ^= Du; + BCe = ROL(Agu, 20); + Aka ^= Da; + BCi = ROL(Aka, 3); + Ame ^= De; + BCo = ROL(Ame, 45); + Asi ^= Di; + BCu = ROL(Asi, 61); + Ega = BCa ^((~BCe)& BCi ); + Ege = BCe ^((~BCi)& BCo ); + Egi = BCi ^((~BCo)& BCu ); + Ego = BCo ^((~BCu)& BCa ); + Egu = BCu ^((~BCa)& BCe ); + + Abe ^= De; + BCa = ROL(Abe, 1); + Agi ^= Di; + BCe = ROL(Agi, 6); + Ako ^= Do; + BCi = ROL(Ako, 25); + Amu ^= Du; + BCo = ROL(Amu, 8); + Asa ^= Da; + BCu = ROL(Asa, 18); + Eka = BCa ^((~BCe)& BCi ); + Eke = BCe ^((~BCi)& BCo ); + Eki = BCi ^((~BCo)& BCu ); + Eko = BCo ^((~BCu)& BCa ); + Eku = BCu ^((~BCa)& BCe ); + + Abu ^= Du; + BCa = ROL(Abu, 27); + Aga ^= Da; + BCe = ROL(Aga, 36); + Ake ^= De; + BCi = ROL(Ake, 10); + Ami ^= Di; + BCo = ROL(Ami, 15); + Aso ^= Do; + BCu = ROL(Aso, 56); + Ema = BCa ^((~BCe)& BCi ); + Eme = BCe ^((~BCi)& BCo ); + Emi = BCi ^((~BCo)& BCu ); + Emo = BCo ^((~BCu)& BCa ); + Emu = BCu ^((~BCa)& BCe ); + + Abi ^= Di; + BCa = ROL(Abi, 62); + Ago ^= Do; + BCe = ROL(Ago, 55); + Aku ^= Du; + BCi = ROL(Aku, 39); + Ama ^= Da; + BCo = ROL(Ama, 41); + Ase ^= De; + BCu = ROL(Ase, 2); + Esa = BCa ^((~BCe)& BCi ); + Ese = BCe ^((~BCi)& BCo ); + Esi = BCi ^((~BCo)& BCu ); + Eso = BCo ^((~BCu)& BCa ); + Esu = BCu ^((~BCa)& BCe ); + + // prepareTheta + BCa = Eba^Ega^Eka^Ema^Esa; + BCe = Ebe^Ege^Eke^Eme^Ese; + BCi = Ebi^Egi^Eki^Emi^Esi; + BCo = Ebo^Ego^Eko^Emo^Eso; + BCu = Ebu^Egu^Eku^Emu^Esu; + + //thetaRhoPiChiIotaPrepareTheta(round+1, E, A) + Da = BCu^ROL(BCe, 1); + De = BCa^ROL(BCi, 1); + Di = BCe^ROL(BCo, 1); + Do = BCi^ROL(BCu, 1); + Du = BCo^ROL(BCa, 1); + + Eba ^= Da; + BCa = Eba; + Ege ^= De; + BCe = ROL(Ege, 44); + Eki ^= Di; + BCi = ROL(Eki, 43); + Emo ^= Do; + BCo = ROL(Emo, 21); + Esu ^= Du; + BCu = ROL(Esu, 14); + Aba = BCa ^((~BCe)& BCi ); + Aba ^= (uint64_t)KeccakF_RoundConstants[round+1]; + Abe = BCe ^((~BCi)& BCo ); + Abi = BCi ^((~BCo)& BCu ); + Abo = BCo ^((~BCu)& BCa ); + Abu = BCu ^((~BCa)& BCe ); + + Ebo ^= Do; + BCa = ROL(Ebo, 28); + Egu ^= Du; + BCe = ROL(Egu, 20); + Eka ^= Da; + BCi = ROL(Eka, 3); + Eme ^= De; + BCo = ROL(Eme, 45); + Esi ^= Di; + BCu = ROL(Esi, 61); + Aga = BCa ^((~BCe)& BCi ); + Age = BCe ^((~BCi)& BCo ); + Agi = BCi ^((~BCo)& BCu ); + Ago = BCo ^((~BCu)& BCa ); + Agu = BCu ^((~BCa)& BCe ); + + Ebe ^= De; + BCa = ROL(Ebe, 1); + Egi ^= Di; + BCe = ROL(Egi, 6); + Eko ^= Do; + BCi = ROL(Eko, 25); + Emu ^= Du; + BCo = ROL(Emu, 8); + Esa ^= Da; + BCu = ROL(Esa, 18); + Aka = BCa ^((~BCe)& BCi ); + Ake = BCe ^((~BCi)& BCo ); + Aki = BCi ^((~BCo)& BCu ); + Ako = BCo ^((~BCu)& BCa ); + Aku = BCu ^((~BCa)& BCe ); + + Ebu ^= Du; + BCa = ROL(Ebu, 27); + Ega ^= Da; + BCe = ROL(Ega, 36); + Eke ^= De; + BCi = ROL(Eke, 10); + Emi ^= Di; + BCo = ROL(Emi, 15); + Eso ^= Do; + BCu = ROL(Eso, 56); + Ama = BCa ^((~BCe)& BCi ); + Ame = BCe ^((~BCi)& BCo ); + Ami = BCi ^((~BCo)& BCu ); + Amo = BCo ^((~BCu)& BCa ); + Amu = BCu ^((~BCa)& BCe ); + + Ebi ^= Di; + BCa = ROL(Ebi, 62); + Ego ^= Do; + BCe = ROL(Ego, 55); + Eku ^= Du; + BCi = ROL(Eku, 39); + Ema ^= Da; + BCo = ROL(Ema, 41); + Ese ^= De; + BCu = ROL(Ese, 2); + Asa = BCa ^((~BCe)& BCi ); + Ase = BCe ^((~BCi)& BCo ); + Asi = BCi ^((~BCo)& BCu ); + Aso = BCo ^((~BCu)& BCa ); + Asu = BCu ^((~BCa)& BCe ); + } + + //copyToState(state, A) + state[ 0] = Aba; + state[ 1] = Abe; + state[ 2] = Abi; + state[ 3] = Abo; + state[ 4] = Abu; + state[ 5] = Aga; + state[ 6] = Age; + state[ 7] = Agi; + state[ 8] = Ago; + state[ 9] = Agu; + state[10] = Aka; + state[11] = Ake; + state[12] = Aki; + state[13] = Ako; + state[14] = Aku; + state[15] = Ama; + state[16] = Ame; + state[17] = Ami; + state[18] = Amo; + state[19] = Amu; + state[20] = Asa; + state[21] = Ase; + state[22] = Asi; + state[23] = Aso; + state[24] = Asu; + + #undef round +} + +#include <string.h> +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + + +static void keccak_absorb(uint64_t *s, + unsigned int r, + const unsigned char *m, unsigned long long int mlen, + unsigned char p) +{ + unsigned long long i; + unsigned char t[200]; + + for (i = 0; i < 25; ++i) + s[i] = 0; + + while (mlen >= r) + { + for (i = 0; i < r / 8; ++i) + s[i] ^= load64(m + 8 * i); + + KeccakF1600_StatePermute(s); + mlen -= r; + m += r; + } + + for (i = 0; i < r; ++i) + t[i] = 0; + for (i = 0; i < mlen; ++i) + t[i] = m[i]; + t[i] = p; + t[r - 1] |= 128; + for (i = 0; i < r / 8; ++i) + s[i] ^= load64(t + 8 * i); +} + + +static void keccak_squeezeblocks(unsigned char *h, unsigned long long int nblocks, + uint64_t *s, + unsigned int r) +{ + unsigned int i; + while(nblocks > 0) + { + KeccakF1600_StatePermute(s); + for(i=0;i<(r>>3);i++) + { + store64(h+8*i, s[i]); + } + h += r; + nblocks--; + } +} + + +void shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen) +{ + keccak_absorb(s, SHAKE128_RATE, input, inputByteLen, 0x1F); +} + + +void shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s) +{ + keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE); +} + + +void shake128(unsigned char *output, unsigned int outputByteLen, const unsigned char *input, unsigned int inputByteLen) +{ + uint64_t s[25]; + assert(!(outputByteLen%SHAKE128_RATE)); + shake128_absorb(s, input, inputByteLen); + shake128_squeezeblocks(output, outputByteLen/SHAKE128_RATE, s); +} + + +void sha3256(unsigned char *output, const unsigned char *input, unsigned int inputByteLen) +{ + uint64_t s[25]; + unsigned char t[SHA3_256_RATE]; + int i; + + keccak_absorb(s, SHA3_256_RATE, input, inputByteLen, 0x06); + keccak_squeezeblocks(t, 1, s, SHA3_256_RATE); + for(i=0;i<32;i++) + output[i] = t[i]; +} diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/fips202.h b/crypt/liboqs/kex_rlwe_newhope/avx2/fips202.h new file mode 100644 index 0000000000000000000000000000000000000000..b5644345035ceb8fc8c8be1ee6996b1e0aa937be --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/fips202.h @@ -0,0 +1,12 @@ +#ifndef FIPS202_H +#define FIPS202_H + +#define SHAKE128_RATE 168 +#define SHA3_256_RATE 136 + +void shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen); +void shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s); +void shake128(unsigned char *output, unsigned int outputByteLen, const unsigned char *input, unsigned int inputByteLen); +void sha3256(unsigned char *output, const unsigned char *input, unsigned int inputByteLen); + +#endif diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/hr.s b/crypt/liboqs/kex_rlwe_newhope/avx2/hr.s new file mode 100644 index 0000000000000000000000000000000000000000..beb93fe0c422fee57c730876ab6e237374e1b245 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/hr.s @@ -0,0 +1,767 @@ + +# qhasm: int64 input_0 + +# qhasm: int64 input_1 + +# qhasm: int64 input_2 + +# qhasm: int64 input_3 + +# qhasm: int64 input_4 + +# qhasm: int64 input_5 + +# qhasm: stack64 input_6 + +# qhasm: stack64 input_7 + +# qhasm: int64 caller_r11 + +# qhasm: int64 caller_r12 + +# qhasm: int64 caller_r13 + +# qhasm: int64 caller_r14 + +# qhasm: int64 caller_r15 + +# qhasm: int64 caller_rbx + +# qhasm: int64 caller_rbp + +# qhasm: reg256 v + +# qhasm: reg256 v0a + +# qhasm: reg256 v0b + +# qhasm: reg256 v0c + +# qhasm: reg256 v0d + +# qhasm: reg256 v1a + +# qhasm: reg256 v1b + +# qhasm: reg256 v1c + +# qhasm: reg256 v1d + +# qhasm: reg256 vtmp0 + +# qhasm: reg256 vtmp1 + +# qhasm: reg256 vtmp2 + +# qhasm: reg256 vtmp3 + +# qhasm: reg256 k + +# qhasm: reg256 b + +# qhasm: reg256 t + +# qhasm: reg256 d + +# qhasm: reg256 c + +# qhasm: reg256 rbit + +# qhasm: reg256 qx8 + +# qhasm: reg256 _1x8 + +# qhasm: reg256 _3x8 + +# qhasm: reg256 rshifts + +# qhasm: reg256 _2730 + +# qhasm: int64 ctr + +# qhasm: enter hr +.p2align 5 +.global _hr +.global hr +_hr: +hr: +mov %rsp,%r11 +and $31,%r11 +add $0,%r11 +sub %r11,%rsp + +# qhasm: ctr = 0 +# asm 1: mov $0,>ctr=int64#4 +# asm 2: mov $0,>ctr=%rcx +mov $0,%rcx + +# qhasm: _1x8 = mem256[v1x8] +# asm 1: vmovdqu v1x8,>_1x8=reg256#1 +# asm 2: vmovdqu v1x8,>_1x8=%ymm0 +vmovdqu v1x8,%ymm0 + +# qhasm: qx8 = mem256[q8x] +# asm 1: vmovdqu q8x,>qx8=reg256#2 +# asm 2: vmovdqu q8x,>qx8=%ymm1 +vmovdqu q8x,%ymm1 + +# qhasm: looptop: +._looptop: + +# qhasm: rshifts = mem256[vrshiftsx8] +# asm 1: vmovdqu vrshiftsx8,>rshifts=reg256#3 +# asm 2: vmovdqu vrshiftsx8,>rshifts=%ymm2 +vmovdqu vrshiftsx8,%ymm2 + +# qhasm: 32x rbit = mem8[input_2 + ctr + 0] +# asm 1: vpbroadcastb 0(<input_2=int64#3,<ctr=int64#4),>rbit=reg256#4 +# asm 2: vpbroadcastb 0(<input_2=%rdx,<ctr=%rcx),>rbit=%ymm3 +vpbroadcastb 0(%rdx,%rcx),%ymm3 + +# qhasm: 8x rbit unsigned>>= rshifts +# asm 1: vpsrlvd <rshifts=reg256#3,<rbit=reg256#4,>rbit=reg256#3 +# asm 2: vpsrlvd <rshifts=%ymm2,<rbit=%ymm3,>rbit=%ymm2 +vpsrlvd %ymm2,%ymm3,%ymm2 + +# qhasm: rbit &= _1x8 +# asm 1: vpand <_1x8=reg256#1,<rbit=reg256#3,<rbit=reg256#3 +# asm 2: vpand <_1x8=%ymm0,<rbit=%ymm2,<rbit=%ymm2 +vpand %ymm0,%ymm2,%ymm2 + +# qhasm: 8x rbit <<= 2 +# asm 1: vpslld $2,<rbit=reg256#3,>rbit=reg256#3 +# asm 2: vpslld $2,<rbit=%ymm2,>rbit=%ymm2 +vpslld $2,%ymm2,%ymm2 + +# qhasm: ctr <<= 5 +# asm 1: shl $5,<ctr=int64#4 +# asm 2: shl $5,<ctr=%rcx +shl $5,%rcx + +# qhasm: v = mem256[input_1 + ctr + 0] +# asm 1: vmovupd 0(<input_1=int64#2,<ctr=int64#4),>v=reg256#4 +# asm 2: vmovupd 0(<input_1=%rsi,<ctr=%rcx),>v=%ymm3 +vmovupd 0(%rsi,%rcx),%ymm3 + +# qhasm: 8x v <<= 3 +# asm 1: vpslld $3,<v=reg256#4,>v=reg256#4 +# asm 2: vpslld $3,<v=%ymm3,>v=%ymm3 +vpslld $3,%ymm3,%ymm3 + +# qhasm: 8x v += rbit +# asm 1: vpaddd <rbit=reg256#3,<v=reg256#4,>v=reg256#4 +# asm 2: vpaddd <rbit=%ymm2,<v=%ymm3,>v=%ymm3 +vpaddd %ymm2,%ymm3,%ymm3 + +# qhasm: 8x b = v * mem256[v2730x8] +# asm 1: vpmulld v2730x8,<v=reg256#4,>b=reg256#5 +# asm 2: vpmulld v2730x8,<v=%ymm3,>b=%ymm4 +vpmulld v2730x8,%ymm3,%ymm4 + +# qhasm: 8x t = b >> 25 +# asm 1: vpsrad $25,<b=reg256#5,>t=reg256#5 +# asm 2: vpsrad $25,<b=%ymm4,>t=%ymm4 +vpsrad $25,%ymm4,%ymm4 + +# qhasm: 8x d = t * qx8 +# asm 1: vpmulld <t=reg256#5,<qx8=reg256#2,>d=reg256#6 +# asm 2: vpmulld <t=%ymm4,<qx8=%ymm1,>d=%ymm5 +vpmulld %ymm4,%ymm1,%ymm5 + +# qhasm: 8x b = v - d +# asm 1: vpsubd <d=reg256#6,<v=reg256#4,>b=reg256#6 +# asm 2: vpsubd <d=%ymm5,<v=%ymm3,>b=%ymm5 +vpsubd %ymm5,%ymm3,%ymm5 + +# qhasm: 8x b += _1x8 +# asm 1: vpaddd <_1x8=reg256#1,<b=reg256#6,>b=reg256#6 +# asm 2: vpaddd <_1x8=%ymm0,<b=%ymm5,>b=%ymm5 +vpaddd %ymm0,%ymm5,%ymm5 + +# qhasm: 8x b = qx8 - b +# asm 1: vpsubd <b=reg256#6,<qx8=reg256#2,>b=reg256#6 +# asm 2: vpsubd <b=%ymm5,<qx8=%ymm1,>b=%ymm5 +vpsubd %ymm5,%ymm1,%ymm5 + +# qhasm: 8x b >>= 31 +# asm 1: vpsrad $31,<b=reg256#6,>b=reg256#6 +# asm 2: vpsrad $31,<b=%ymm5,>b=%ymm5 +vpsrad $31,%ymm5,%ymm5 + +# qhasm: 8x t -= b +# asm 1: vpsubd <b=reg256#6,<t=reg256#5,>t=reg256#5 +# asm 2: vpsubd <b=%ymm5,<t=%ymm4,>t=%ymm4 +vpsubd %ymm5,%ymm4,%ymm4 + +# qhasm: d = t & _1x8 +# asm 1: vpand <t=reg256#5,<_1x8=reg256#1,>d=reg256#6 +# asm 2: vpand <t=%ymm4,<_1x8=%ymm0,>d=%ymm5 +vpand %ymm4,%ymm0,%ymm5 + +# qhasm: 8x v0a = t >> 1 +# asm 1: vpsrad $1,<t=reg256#5,>v0a=reg256#7 +# asm 2: vpsrad $1,<t=%ymm4,>v0a=%ymm6 +vpsrad $1,%ymm4,%ymm6 + +# qhasm: 8x v0a += d +# asm 1: vpaddd <d=reg256#6,<v0a=reg256#7,>v0a=reg256#6 +# asm 2: vpaddd <d=%ymm5,<v0a=%ymm6,>v0a=%ymm5 +vpaddd %ymm5,%ymm6,%ymm5 + +# qhasm: 8x t -= _1x8 +# asm 1: vpsubd <_1x8=reg256#1,<t=reg256#5,>t=reg256#5 +# asm 2: vpsubd <_1x8=%ymm0,<t=%ymm4,>t=%ymm4 +vpsubd %ymm0,%ymm4,%ymm4 + +# qhasm: d = t & _1x8 +# asm 1: vpand <t=reg256#5,<_1x8=reg256#1,>d=reg256#7 +# asm 2: vpand <t=%ymm4,<_1x8=%ymm0,>d=%ymm6 +vpand %ymm4,%ymm0,%ymm6 + +# qhasm: 8x v1a = t >> 1 +# asm 1: vpsrad $1,<t=reg256#5,>v1a=reg256#5 +# asm 2: vpsrad $1,<t=%ymm4,>v1a=%ymm4 +vpsrad $1,%ymm4,%ymm4 + +# qhasm: 8x v1a += d +# asm 1: vpaddd <d=reg256#7,<v1a=reg256#5,>v1a=reg256#5 +# asm 2: vpaddd <d=%ymm6,<v1a=%ymm4,>v1a=%ymm4 +vpaddd %ymm6,%ymm4,%ymm4 + +# qhasm: 8x d = v0a * qx8 +# asm 1: vpmulld <v0a=reg256#6,<qx8=reg256#2,>d=reg256#7 +# asm 2: vpmulld <v0a=%ymm5,<qx8=%ymm1,>d=%ymm6 +vpmulld %ymm5,%ymm1,%ymm6 + +# qhasm: 8x d <<= 1 +# asm 1: vpslld $1,<d=reg256#7,>d=reg256#7 +# asm 2: vpslld $1,<d=%ymm6,>d=%ymm6 +vpslld $1,%ymm6,%ymm6 + +# qhasm: 8x d = v - d +# asm 1: vpsubd <d=reg256#7,<v=reg256#4,>d=reg256#4 +# asm 2: vpsubd <d=%ymm6,<v=%ymm3,>d=%ymm3 +vpsubd %ymm6,%ymm3,%ymm3 + +# qhasm: 8x k = abs(d) +# asm 1: vpabsd <d=reg256#4,>k=reg256#4 +# asm 2: vpabsd <d=%ymm3,>k=%ymm3 +vpabsd %ymm3,%ymm3 + +# qhasm: v = mem256[input_1 + ctr + 1024] +# asm 1: vmovupd 1024(<input_1=int64#2,<ctr=int64#4),>v=reg256#7 +# asm 2: vmovupd 1024(<input_1=%rsi,<ctr=%rcx),>v=%ymm6 +vmovupd 1024(%rsi,%rcx),%ymm6 + +# qhasm: 8x v <<= 3 +# asm 1: vpslld $3,<v=reg256#7,>v=reg256#7 +# asm 2: vpslld $3,<v=%ymm6,>v=%ymm6 +vpslld $3,%ymm6,%ymm6 + +# qhasm: 8x v += rbit +# asm 1: vpaddd <rbit=reg256#3,<v=reg256#7,>v=reg256#7 +# asm 2: vpaddd <rbit=%ymm2,<v=%ymm6,>v=%ymm6 +vpaddd %ymm2,%ymm6,%ymm6 + +# qhasm: 8x b = v * mem256[v2730x8] +# asm 1: vpmulld v2730x8,<v=reg256#7,>b=reg256#8 +# asm 2: vpmulld v2730x8,<v=%ymm6,>b=%ymm7 +vpmulld v2730x8,%ymm6,%ymm7 + +# qhasm: 8x t = b >> 25 +# asm 1: vpsrad $25,<b=reg256#8,>t=reg256#8 +# asm 2: vpsrad $25,<b=%ymm7,>t=%ymm7 +vpsrad $25,%ymm7,%ymm7 + +# qhasm: 8x d = t * qx8 +# asm 1: vpmulld <t=reg256#8,<qx8=reg256#2,>d=reg256#9 +# asm 2: vpmulld <t=%ymm7,<qx8=%ymm1,>d=%ymm8 +vpmulld %ymm7,%ymm1,%ymm8 + +# qhasm: 8x b = v - d +# asm 1: vpsubd <d=reg256#9,<v=reg256#7,>b=reg256#9 +# asm 2: vpsubd <d=%ymm8,<v=%ymm6,>b=%ymm8 +vpsubd %ymm8,%ymm6,%ymm8 + +# qhasm: 8x b += _1x8 +# asm 1: vpaddd <_1x8=reg256#1,<b=reg256#9,>b=reg256#9 +# asm 2: vpaddd <_1x8=%ymm0,<b=%ymm8,>b=%ymm8 +vpaddd %ymm0,%ymm8,%ymm8 + +# qhasm: 8x b = qx8 - b +# asm 1: vpsubd <b=reg256#9,<qx8=reg256#2,>b=reg256#9 +# asm 2: vpsubd <b=%ymm8,<qx8=%ymm1,>b=%ymm8 +vpsubd %ymm8,%ymm1,%ymm8 + +# qhasm: 8x b >>= 31 +# asm 1: vpsrad $31,<b=reg256#9,>b=reg256#9 +# asm 2: vpsrad $31,<b=%ymm8,>b=%ymm8 +vpsrad $31,%ymm8,%ymm8 + +# qhasm: 8x t -= b +# asm 1: vpsubd <b=reg256#9,<t=reg256#8,>t=reg256#8 +# asm 2: vpsubd <b=%ymm8,<t=%ymm7,>t=%ymm7 +vpsubd %ymm8,%ymm7,%ymm7 + +# qhasm: d = t & _1x8 +# asm 1: vpand <t=reg256#8,<_1x8=reg256#1,>d=reg256#9 +# asm 2: vpand <t=%ymm7,<_1x8=%ymm0,>d=%ymm8 +vpand %ymm7,%ymm0,%ymm8 + +# qhasm: 8x v0b = t >> 1 +# asm 1: vpsrad $1,<t=reg256#8,>v0b=reg256#10 +# asm 2: vpsrad $1,<t=%ymm7,>v0b=%ymm9 +vpsrad $1,%ymm7,%ymm9 + +# qhasm: 8x v0b += d +# asm 1: vpaddd <d=reg256#9,<v0b=reg256#10,>v0b=reg256#9 +# asm 2: vpaddd <d=%ymm8,<v0b=%ymm9,>v0b=%ymm8 +vpaddd %ymm8,%ymm9,%ymm8 + +# qhasm: 8x t -= _1x8 +# asm 1: vpsubd <_1x8=reg256#1,<t=reg256#8,>t=reg256#8 +# asm 2: vpsubd <_1x8=%ymm0,<t=%ymm7,>t=%ymm7 +vpsubd %ymm0,%ymm7,%ymm7 + +# qhasm: d = t & _1x8 +# asm 1: vpand <t=reg256#8,<_1x8=reg256#1,>d=reg256#10 +# asm 2: vpand <t=%ymm7,<_1x8=%ymm0,>d=%ymm9 +vpand %ymm7,%ymm0,%ymm9 + +# qhasm: 8x v1b = t >> 1 +# asm 1: vpsrad $1,<t=reg256#8,>v1b=reg256#8 +# asm 2: vpsrad $1,<t=%ymm7,>v1b=%ymm7 +vpsrad $1,%ymm7,%ymm7 + +# qhasm: 8x v1b += d +# asm 1: vpaddd <d=reg256#10,<v1b=reg256#8,>v1b=reg256#8 +# asm 2: vpaddd <d=%ymm9,<v1b=%ymm7,>v1b=%ymm7 +vpaddd %ymm9,%ymm7,%ymm7 + +# qhasm: 8x d = v0b * qx8 +# asm 1: vpmulld <v0b=reg256#9,<qx8=reg256#2,>d=reg256#10 +# asm 2: vpmulld <v0b=%ymm8,<qx8=%ymm1,>d=%ymm9 +vpmulld %ymm8,%ymm1,%ymm9 + +# qhasm: 8x d <<= 1 +# asm 1: vpslld $1,<d=reg256#10,>d=reg256#10 +# asm 2: vpslld $1,<d=%ymm9,>d=%ymm9 +vpslld $1,%ymm9,%ymm9 + +# qhasm: 8x d = v - d +# asm 1: vpsubd <d=reg256#10,<v=reg256#7,>d=reg256#7 +# asm 2: vpsubd <d=%ymm9,<v=%ymm6,>d=%ymm6 +vpsubd %ymm9,%ymm6,%ymm6 + +# qhasm: 8x v = abs(d) +# asm 1: vpabsd <d=reg256#7,>v=reg256#7 +# asm 2: vpabsd <d=%ymm6,>v=%ymm6 +vpabsd %ymm6,%ymm6 + +# qhasm: 8x k += v +# asm 1: vpaddd <v=reg256#7,<k=reg256#4,>k=reg256#4 +# asm 2: vpaddd <v=%ymm6,<k=%ymm3,>k=%ymm3 +vpaddd %ymm6,%ymm3,%ymm3 + +# qhasm: v = mem256[input_1 + ctr + 2048] +# asm 1: vmovupd 2048(<input_1=int64#2,<ctr=int64#4),>v=reg256#7 +# asm 2: vmovupd 2048(<input_1=%rsi,<ctr=%rcx),>v=%ymm6 +vmovupd 2048(%rsi,%rcx),%ymm6 + +# qhasm: 8x v <<= 3 +# asm 1: vpslld $3,<v=reg256#7,>v=reg256#7 +# asm 2: vpslld $3,<v=%ymm6,>v=%ymm6 +vpslld $3,%ymm6,%ymm6 + +# qhasm: 8x v += rbit +# asm 1: vpaddd <rbit=reg256#3,<v=reg256#7,>v=reg256#7 +# asm 2: vpaddd <rbit=%ymm2,<v=%ymm6,>v=%ymm6 +vpaddd %ymm2,%ymm6,%ymm6 + +# qhasm: 8x b = v * mem256[v2730x8] +# asm 1: vpmulld v2730x8,<v=reg256#7,>b=reg256#10 +# asm 2: vpmulld v2730x8,<v=%ymm6,>b=%ymm9 +vpmulld v2730x8,%ymm6,%ymm9 + +# qhasm: 8x t = b >> 25 +# asm 1: vpsrad $25,<b=reg256#10,>t=reg256#10 +# asm 2: vpsrad $25,<b=%ymm9,>t=%ymm9 +vpsrad $25,%ymm9,%ymm9 + +# qhasm: 8x d = t * qx8 +# asm 1: vpmulld <t=reg256#10,<qx8=reg256#2,>d=reg256#11 +# asm 2: vpmulld <t=%ymm9,<qx8=%ymm1,>d=%ymm10 +vpmulld %ymm9,%ymm1,%ymm10 + +# qhasm: 8x b = v - d +# asm 1: vpsubd <d=reg256#11,<v=reg256#7,>b=reg256#11 +# asm 2: vpsubd <d=%ymm10,<v=%ymm6,>b=%ymm10 +vpsubd %ymm10,%ymm6,%ymm10 + +# qhasm: 8x b += _1x8 +# asm 1: vpaddd <_1x8=reg256#1,<b=reg256#11,>b=reg256#11 +# asm 2: vpaddd <_1x8=%ymm0,<b=%ymm10,>b=%ymm10 +vpaddd %ymm0,%ymm10,%ymm10 + +# qhasm: 8x b = qx8 - b +# asm 1: vpsubd <b=reg256#11,<qx8=reg256#2,>b=reg256#11 +# asm 2: vpsubd <b=%ymm10,<qx8=%ymm1,>b=%ymm10 +vpsubd %ymm10,%ymm1,%ymm10 + +# qhasm: 8x b >>= 31 +# asm 1: vpsrad $31,<b=reg256#11,>b=reg256#11 +# asm 2: vpsrad $31,<b=%ymm10,>b=%ymm10 +vpsrad $31,%ymm10,%ymm10 + +# qhasm: 8x t -= b +# asm 1: vpsubd <b=reg256#11,<t=reg256#10,>t=reg256#10 +# asm 2: vpsubd <b=%ymm10,<t=%ymm9,>t=%ymm9 +vpsubd %ymm10,%ymm9,%ymm9 + +# qhasm: d = t & _1x8 +# asm 1: vpand <t=reg256#10,<_1x8=reg256#1,>d=reg256#11 +# asm 2: vpand <t=%ymm9,<_1x8=%ymm0,>d=%ymm10 +vpand %ymm9,%ymm0,%ymm10 + +# qhasm: 8x v0c = t >> 1 +# asm 1: vpsrad $1,<t=reg256#10,>v0c=reg256#12 +# asm 2: vpsrad $1,<t=%ymm9,>v0c=%ymm11 +vpsrad $1,%ymm9,%ymm11 + +# qhasm: 8x v0c += d +# asm 1: vpaddd <d=reg256#11,<v0c=reg256#12,>v0c=reg256#11 +# asm 2: vpaddd <d=%ymm10,<v0c=%ymm11,>v0c=%ymm10 +vpaddd %ymm10,%ymm11,%ymm10 + +# qhasm: 8x t -= _1x8 +# asm 1: vpsubd <_1x8=reg256#1,<t=reg256#10,>t=reg256#10 +# asm 2: vpsubd <_1x8=%ymm0,<t=%ymm9,>t=%ymm9 +vpsubd %ymm0,%ymm9,%ymm9 + +# qhasm: d = t & _1x8 +# asm 1: vpand <t=reg256#10,<_1x8=reg256#1,>d=reg256#12 +# asm 2: vpand <t=%ymm9,<_1x8=%ymm0,>d=%ymm11 +vpand %ymm9,%ymm0,%ymm11 + +# qhasm: 8x v1c = t >> 1 +# asm 1: vpsrad $1,<t=reg256#10,>v1c=reg256#10 +# asm 2: vpsrad $1,<t=%ymm9,>v1c=%ymm9 +vpsrad $1,%ymm9,%ymm9 + +# qhasm: 8x v1c += d +# asm 1: vpaddd <d=reg256#12,<v1c=reg256#10,>v1c=reg256#10 +# asm 2: vpaddd <d=%ymm11,<v1c=%ymm9,>v1c=%ymm9 +vpaddd %ymm11,%ymm9,%ymm9 + +# qhasm: 8x d = v0c * qx8 +# asm 1: vpmulld <v0c=reg256#11,<qx8=reg256#2,>d=reg256#12 +# asm 2: vpmulld <v0c=%ymm10,<qx8=%ymm1,>d=%ymm11 +vpmulld %ymm10,%ymm1,%ymm11 + +# qhasm: 8x d <<= 1 +# asm 1: vpslld $1,<d=reg256#12,>d=reg256#12 +# asm 2: vpslld $1,<d=%ymm11,>d=%ymm11 +vpslld $1,%ymm11,%ymm11 + +# qhasm: 8x d = v - d +# asm 1: vpsubd <d=reg256#12,<v=reg256#7,>d=reg256#7 +# asm 2: vpsubd <d=%ymm11,<v=%ymm6,>d=%ymm6 +vpsubd %ymm11,%ymm6,%ymm6 + +# qhasm: 8x v = abs(d) +# asm 1: vpabsd <d=reg256#7,>v=reg256#7 +# asm 2: vpabsd <d=%ymm6,>v=%ymm6 +vpabsd %ymm6,%ymm6 + +# qhasm: 8x k += v +# asm 1: vpaddd <v=reg256#7,<k=reg256#4,>k=reg256#4 +# asm 2: vpaddd <v=%ymm6,<k=%ymm3,>k=%ymm3 +vpaddd %ymm6,%ymm3,%ymm3 + +# qhasm: v = mem256[input_1 + ctr + 3072] +# asm 1: vmovupd 3072(<input_1=int64#2,<ctr=int64#4),>v=reg256#7 +# asm 2: vmovupd 3072(<input_1=%rsi,<ctr=%rcx),>v=%ymm6 +vmovupd 3072(%rsi,%rcx),%ymm6 + +# qhasm: 8x v <<= 3 +# asm 1: vpslld $3,<v=reg256#7,>v=reg256#7 +# asm 2: vpslld $3,<v=%ymm6,>v=%ymm6 +vpslld $3,%ymm6,%ymm6 + +# qhasm: 8x v += rbit +# asm 1: vpaddd <rbit=reg256#3,<v=reg256#7,>v=reg256#3 +# asm 2: vpaddd <rbit=%ymm2,<v=%ymm6,>v=%ymm2 +vpaddd %ymm2,%ymm6,%ymm2 + +# qhasm: 8x b = v * mem256[v2730x8] +# asm 1: vpmulld v2730x8,<v=reg256#3,>b=reg256#7 +# asm 2: vpmulld v2730x8,<v=%ymm2,>b=%ymm6 +vpmulld v2730x8,%ymm2,%ymm6 + +# qhasm: 8x t = b >> 25 +# asm 1: vpsrad $25,<b=reg256#7,>t=reg256#7 +# asm 2: vpsrad $25,<b=%ymm6,>t=%ymm6 +vpsrad $25,%ymm6,%ymm6 + +# qhasm: 8x d = t * qx8 +# asm 1: vpmulld <t=reg256#7,<qx8=reg256#2,>d=reg256#12 +# asm 2: vpmulld <t=%ymm6,<qx8=%ymm1,>d=%ymm11 +vpmulld %ymm6,%ymm1,%ymm11 + +# qhasm: 8x b = v - d +# asm 1: vpsubd <d=reg256#12,<v=reg256#3,>b=reg256#12 +# asm 2: vpsubd <d=%ymm11,<v=%ymm2,>b=%ymm11 +vpsubd %ymm11,%ymm2,%ymm11 + +# qhasm: 8x b += _1x8 +# asm 1: vpaddd <_1x8=reg256#1,<b=reg256#12,>b=reg256#12 +# asm 2: vpaddd <_1x8=%ymm0,<b=%ymm11,>b=%ymm11 +vpaddd %ymm0,%ymm11,%ymm11 + +# qhasm: 8x b = qx8 - b +# asm 1: vpsubd <b=reg256#12,<qx8=reg256#2,>b=reg256#12 +# asm 2: vpsubd <b=%ymm11,<qx8=%ymm1,>b=%ymm11 +vpsubd %ymm11,%ymm1,%ymm11 + +# qhasm: 8x b >>= 31 +# asm 1: vpsrad $31,<b=reg256#12,>b=reg256#12 +# asm 2: vpsrad $31,<b=%ymm11,>b=%ymm11 +vpsrad $31,%ymm11,%ymm11 + +# qhasm: 8x t -= b +# asm 1: vpsubd <b=reg256#12,<t=reg256#7,>t=reg256#7 +# asm 2: vpsubd <b=%ymm11,<t=%ymm6,>t=%ymm6 +vpsubd %ymm11,%ymm6,%ymm6 + +# qhasm: d = t & _1x8 +# asm 1: vpand <t=reg256#7,<_1x8=reg256#1,>d=reg256#12 +# asm 2: vpand <t=%ymm6,<_1x8=%ymm0,>d=%ymm11 +vpand %ymm6,%ymm0,%ymm11 + +# qhasm: 8x v0d = t >> 1 +# asm 1: vpsrad $1,<t=reg256#7,>v0d=reg256#13 +# asm 2: vpsrad $1,<t=%ymm6,>v0d=%ymm12 +vpsrad $1,%ymm6,%ymm12 + +# qhasm: 8x v0d += d +# asm 1: vpaddd <d=reg256#12,<v0d=reg256#13,>v0d=reg256#12 +# asm 2: vpaddd <d=%ymm11,<v0d=%ymm12,>v0d=%ymm11 +vpaddd %ymm11,%ymm12,%ymm11 + +# qhasm: 8x t -= _1x8 +# asm 1: vpsubd <_1x8=reg256#1,<t=reg256#7,>t=reg256#7 +# asm 2: vpsubd <_1x8=%ymm0,<t=%ymm6,>t=%ymm6 +vpsubd %ymm0,%ymm6,%ymm6 + +# qhasm: d = t & _1x8 +# asm 1: vpand <t=reg256#7,<_1x8=reg256#1,>d=reg256#13 +# asm 2: vpand <t=%ymm6,<_1x8=%ymm0,>d=%ymm12 +vpand %ymm6,%ymm0,%ymm12 + +# qhasm: 8x v1d = t >> 1 +# asm 1: vpsrad $1,<t=reg256#7,>v1d=reg256#7 +# asm 2: vpsrad $1,<t=%ymm6,>v1d=%ymm6 +vpsrad $1,%ymm6,%ymm6 + +# qhasm: 8x v1d += d +# asm 1: vpaddd <d=reg256#13,<v1d=reg256#7,>v1d=reg256#7 +# asm 2: vpaddd <d=%ymm12,<v1d=%ymm6,>v1d=%ymm6 +vpaddd %ymm12,%ymm6,%ymm6 + +# qhasm: 8x d = v0d * qx8 +# asm 1: vpmulld <v0d=reg256#12,<qx8=reg256#2,>d=reg256#13 +# asm 2: vpmulld <v0d=%ymm11,<qx8=%ymm1,>d=%ymm12 +vpmulld %ymm11,%ymm1,%ymm12 + +# qhasm: 8x d <<= 1 +# asm 1: vpslld $1,<d=reg256#13,>d=reg256#13 +# asm 2: vpslld $1,<d=%ymm12,>d=%ymm12 +vpslld $1,%ymm12,%ymm12 + +# qhasm: 8x d = v - d +# asm 1: vpsubd <d=reg256#13,<v=reg256#3,>d=reg256#3 +# asm 2: vpsubd <d=%ymm12,<v=%ymm2,>d=%ymm2 +vpsubd %ymm12,%ymm2,%ymm2 + +# qhasm: 8x v = abs(d) +# asm 1: vpabsd <d=reg256#3,>v=reg256#3 +# asm 2: vpabsd <d=%ymm2,>v=%ymm2 +vpabsd %ymm2,%ymm2 + +# qhasm: 8x k += v +# asm 1: vpaddd <v=reg256#3,<k=reg256#4,>k=reg256#3 +# asm 2: vpaddd <v=%ymm2,<k=%ymm3,>k=%ymm2 +vpaddd %ymm2,%ymm3,%ymm2 + +# qhasm: 8x d = qx8 << 1 +# asm 1: vpslld $1,<qx8=reg256#2,>d=reg256#4 +# asm 2: vpslld $1,<qx8=%ymm1,>d=%ymm3 +vpslld $1,%ymm1,%ymm3 + +# qhasm: 8x d -= _1x8 +# asm 1: vpsubd <_1x8=reg256#1,<d=reg256#4,>d=reg256#4 +# asm 2: vpsubd <_1x8=%ymm0,<d=%ymm3,>d=%ymm3 +vpsubd %ymm0,%ymm3,%ymm3 + +# qhasm: 8x k = d - k +# asm 1: vpsubd <k=reg256#3,<d=reg256#4,>k=reg256#3 +# asm 2: vpsubd <k=%ymm2,<d=%ymm3,>k=%ymm2 +vpsubd %ymm2,%ymm3,%ymm2 + +# qhasm: 8x k >>= 31 +# asm 1: vpsrad $31,<k=reg256#3,>k=reg256#3 +# asm 2: vpsrad $31,<k=%ymm2,>k=%ymm2 +vpsrad $31,%ymm2,%ymm2 + +# qhasm: vtmp0 = v0a ^ v1a +# asm 1: vpxor <v0a=reg256#6,<v1a=reg256#5,>vtmp0=reg256#4 +# asm 2: vpxor <v0a=%ymm5,<v1a=%ymm4,>vtmp0=%ymm3 +vpxor %ymm5,%ymm4,%ymm3 + +# qhasm: vtmp0 &= k +# asm 1: vpand <k=reg256#3,<vtmp0=reg256#4,<vtmp0=reg256#4 +# asm 2: vpand <k=%ymm2,<vtmp0=%ymm3,<vtmp0=%ymm3 +vpand %ymm2,%ymm3,%ymm3 + +# qhasm: vtmp0 ^= v0a +# asm 1: vpxor <v0a=reg256#6,<vtmp0=reg256#4,<vtmp0=reg256#4 +# asm 2: vpxor <v0a=%ymm5,<vtmp0=%ymm3,<vtmp0=%ymm3 +vpxor %ymm5,%ymm3,%ymm3 + +# qhasm: vtmp1 = v0b ^ v1b +# asm 1: vpxor <v0b=reg256#9,<v1b=reg256#8,>vtmp1=reg256#5 +# asm 2: vpxor <v0b=%ymm8,<v1b=%ymm7,>vtmp1=%ymm4 +vpxor %ymm8,%ymm7,%ymm4 + +# qhasm: vtmp1 &= k +# asm 1: vpand <k=reg256#3,<vtmp1=reg256#5,<vtmp1=reg256#5 +# asm 2: vpand <k=%ymm2,<vtmp1=%ymm4,<vtmp1=%ymm4 +vpand %ymm2,%ymm4,%ymm4 + +# qhasm: vtmp1 ^= v0b +# asm 1: vpxor <v0b=reg256#9,<vtmp1=reg256#5,<vtmp1=reg256#5 +# asm 2: vpxor <v0b=%ymm8,<vtmp1=%ymm4,<vtmp1=%ymm4 +vpxor %ymm8,%ymm4,%ymm4 + +# qhasm: vtmp2 = v0c ^ v1c +# asm 1: vpxor <v0c=reg256#11,<v1c=reg256#10,>vtmp2=reg256#6 +# asm 2: vpxor <v0c=%ymm10,<v1c=%ymm9,>vtmp2=%ymm5 +vpxor %ymm10,%ymm9,%ymm5 + +# qhasm: vtmp2 &= k +# asm 1: vpand <k=reg256#3,<vtmp2=reg256#6,<vtmp2=reg256#6 +# asm 2: vpand <k=%ymm2,<vtmp2=%ymm5,<vtmp2=%ymm5 +vpand %ymm2,%ymm5,%ymm5 + +# qhasm: vtmp2 ^= v0c +# asm 1: vpxor <v0c=reg256#11,<vtmp2=reg256#6,<vtmp2=reg256#6 +# asm 2: vpxor <v0c=%ymm10,<vtmp2=%ymm5,<vtmp2=%ymm5 +vpxor %ymm10,%ymm5,%ymm5 + +# qhasm: vtmp3 = v0d ^ v1d +# asm 1: vpxor <v0d=reg256#12,<v1d=reg256#7,>vtmp3=reg256#7 +# asm 2: vpxor <v0d=%ymm11,<v1d=%ymm6,>vtmp3=%ymm6 +vpxor %ymm11,%ymm6,%ymm6 + +# qhasm: vtmp3 &= k +# asm 1: vpand <k=reg256#3,<vtmp3=reg256#7,<vtmp3=reg256#7 +# asm 2: vpand <k=%ymm2,<vtmp3=%ymm6,<vtmp3=%ymm6 +vpand %ymm2,%ymm6,%ymm6 + +# qhasm: vtmp3 ^= v0d +# asm 1: vpxor <v0d=reg256#12,<vtmp3=reg256#7,<vtmp3=reg256#7 +# asm 2: vpxor <v0d=%ymm11,<vtmp3=%ymm6,<vtmp3=%ymm6 +vpxor %ymm11,%ymm6,%ymm6 + +# qhasm: _3x8 = mem256[v3x8] +# asm 1: vmovdqu v3x8,>_3x8=reg256#8 +# asm 2: vmovdqu v3x8,>_3x8=%ymm7 +vmovdqu v3x8,%ymm7 + +# qhasm: 8x c = vtmp0 - vtmp3 +# asm 1: vpsubd <vtmp3=reg256#7,<vtmp0=reg256#4,>c=reg256#4 +# asm 2: vpsubd <vtmp3=%ymm6,<vtmp0=%ymm3,>c=%ymm3 +vpsubd %ymm6,%ymm3,%ymm3 + +# qhasm: c &= _3x8 +# asm 1: vpand <_3x8=reg256#8,<c=reg256#4,<c=reg256#4 +# asm 2: vpand <_3x8=%ymm7,<c=%ymm3,<c=%ymm3 +vpand %ymm7,%ymm3,%ymm3 + +# qhasm: mem256[input_0 + ctr + 0] = c +# asm 1: vmovupd <c=reg256#4,0(<input_0=int64#1,<ctr=int64#4) +# asm 2: vmovupd <c=%ymm3,0(<input_0=%rdi,<ctr=%rcx) +vmovupd %ymm3,0(%rdi,%rcx) + +# qhasm: 8x c = vtmp1 - vtmp3 +# asm 1: vpsubd <vtmp3=reg256#7,<vtmp1=reg256#5,>c=reg256#4 +# asm 2: vpsubd <vtmp3=%ymm6,<vtmp1=%ymm4,>c=%ymm3 +vpsubd %ymm6,%ymm4,%ymm3 + +# qhasm: c &= _3x8 +# asm 1: vpand <_3x8=reg256#8,<c=reg256#4,<c=reg256#4 +# asm 2: vpand <_3x8=%ymm7,<c=%ymm3,<c=%ymm3 +vpand %ymm7,%ymm3,%ymm3 + +# qhasm: mem256[input_0 + ctr + 1024] = c +# asm 1: vmovupd <c=reg256#4,1024(<input_0=int64#1,<ctr=int64#4) +# asm 2: vmovupd <c=%ymm3,1024(<input_0=%rdi,<ctr=%rcx) +vmovupd %ymm3,1024(%rdi,%rcx) + +# qhasm: 8x c = vtmp2 - vtmp3 +# asm 1: vpsubd <vtmp3=reg256#7,<vtmp2=reg256#6,>c=reg256#4 +# asm 2: vpsubd <vtmp3=%ymm6,<vtmp2=%ymm5,>c=%ymm3 +vpsubd %ymm6,%ymm5,%ymm3 + +# qhasm: c &= _3x8 +# asm 1: vpand <_3x8=reg256#8,<c=reg256#4,<c=reg256#4 +# asm 2: vpand <_3x8=%ymm7,<c=%ymm3,<c=%ymm3 +vpand %ymm7,%ymm3,%ymm3 + +# qhasm: mem256[input_0 + ctr + 2048] = c +# asm 1: vmovupd <c=reg256#4,2048(<input_0=int64#1,<ctr=int64#4) +# asm 2: vmovupd <c=%ymm3,2048(<input_0=%rdi,<ctr=%rcx) +vmovupd %ymm3,2048(%rdi,%rcx) + +# qhasm: 8x c = vtmp3 << 1 +# asm 1: vpslld $1,<vtmp3=reg256#7,>c=reg256#4 +# asm 2: vpslld $1,<vtmp3=%ymm6,>c=%ymm3 +vpslld $1,%ymm6,%ymm3 + +# qhasm: 8x c -= k +# asm 1: vpsubd <k=reg256#3,<c=reg256#4,>c=reg256#3 +# asm 2: vpsubd <k=%ymm2,<c=%ymm3,>c=%ymm2 +vpsubd %ymm2,%ymm3,%ymm2 + +# qhasm: c &= _3x8 +# asm 1: vpand <_3x8=reg256#8,<c=reg256#3,<c=reg256#3 +# asm 2: vpand <_3x8=%ymm7,<c=%ymm2,<c=%ymm2 +vpand %ymm7,%ymm2,%ymm2 + +# qhasm: mem256[input_0 + ctr + 3072] = c +# asm 1: vmovupd <c=reg256#3,3072(<input_0=int64#1,<ctr=int64#4) +# asm 2: vmovupd <c=%ymm2,3072(<input_0=%rdi,<ctr=%rcx) +vmovupd %ymm2,3072(%rdi,%rcx) + +# qhasm: (uint64) ctr >>= 5 +# asm 1: shr $5,<ctr=int64#4 +# asm 2: shr $5,<ctr=%rcx +shr $5,%rcx + +# qhasm: ctr += 1 +# asm 1: add $1,<ctr=int64#4 +# asm 2: add $1,<ctr=%rcx +add $1,%rcx + +# qhasm: unsigned<? ctr - 32 +# asm 1: cmp $32,<ctr=int64#4 +# asm 2: cmp $32,<ctr=%rcx +cmp $32,%rcx +# comment:fp stack unchanged by jump + +# qhasm: goto looptop if unsigned< +jb ._looptop + +# qhasm: return +add %r11,%rsp +ret diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/newhope.c b/crypt/liboqs/kex_rlwe_newhope/avx2/newhope.c new file mode 100644 index 0000000000000000000000000000000000000000..b9f7a6c4f518f60608f0f616472295bcda9ec43b --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/newhope.c @@ -0,0 +1,127 @@ +#include "poly.h" +#include "randombytes.h" +#include "error_correction.h" +#include "fips202.h" + +static void encode_a(unsigned char *r, const poly *pk, const unsigned char *seed) +{ + int i; + poly_tobytes(r, pk); + for(i=0;i<NEWHOPE_SEEDBYTES;i++) + r[POLY_BYTES+i] = seed[i]; +} + +static void decode_a(poly *pk, unsigned char *seed, const unsigned char *r) +{ + int i; + poly_frombytes(pk, r); + for(i=0;i<NEWHOPE_SEEDBYTES;i++) + seed[i] = r[POLY_BYTES+i]; +} + +static void encode_b(unsigned char *r, const poly *b, const poly *c) +{ + int i; + poly_tobytes(r,b); + for(i=0;i<PARAM_N/4;i++) + r[POLY_BYTES+i] = c->coeffs[4*i] | (c->coeffs[4*i+1] << 2) | (c->coeffs[4*i+2] << 4) | (c->coeffs[4*i+3] << 6); +} + +static void decode_b(poly *b, poly *c, const unsigned char *r) +{ + int i; + poly_frombytes(b, r); + for(i=0;i<PARAM_N/4;i++) + { + c->coeffs[4*i+0] = r[POLY_BYTES+i] & 0x03; + c->coeffs[4*i+1] = (r[POLY_BYTES+i] >> 2) & 0x03; + c->coeffs[4*i+2] = (r[POLY_BYTES+i] >> 4) & 0x03; + c->coeffs[4*i+3] = (r[POLY_BYTES+i] >> 6); + } +} + +static void gen_a(poly *a, const unsigned char *seed) +{ + poly_uniform(a,seed); +} + + +// API FUNCTIONS + +void newhope_keygen(unsigned char *send, poly *sk) +{ + poly a, e, r, pk; + unsigned char seed[NEWHOPE_SEEDBYTES]; + unsigned char noiseseed[32]; + + randombytes(seed, NEWHOPE_SEEDBYTES); + sha3256(seed, seed, NEWHOPE_SEEDBYTES); /* Don't send output of system RNG */ + randombytes(noiseseed, 32); + + gen_a(&a, seed); + + poly_getnoise(sk,noiseseed,0); + poly_ntt(sk); + + poly_getnoise(&e,noiseseed,1); + poly_ntt(&e); + + poly_pointwise(&r,sk,&a); + poly_add(&pk,&e,&r); + + encode_a(send, &pk, seed); +} + + +void newhope_sharedb(unsigned char *sharedkey, unsigned char *send, const unsigned char *received) +{ + poly sp, ep, v, a, pka, c, epp, bp; + unsigned char seed[NEWHOPE_SEEDBYTES]; + unsigned char noiseseed[32]; + + randombytes(noiseseed, 32); + + decode_a(&pka, seed, received); + gen_a(&a, seed); + + poly_getnoise(&sp,noiseseed,0); + poly_ntt(&sp); + poly_getnoise(&ep,noiseseed,1); + poly_ntt(&ep); + + poly_pointwise(&bp, &a, &sp); + poly_add(&bp, &bp, &ep); + + poly_pointwise(&v, &pka, &sp); + poly_invntt(&v); + + poly_getnoise(&epp,noiseseed,2); + poly_add(&v, &v, &epp); + + helprec(&c, &v, noiseseed, 3); + + encode_b(send, &bp, &c); + + rec(sharedkey, &v, &c); + +#ifndef STATISTICAL_TEST + sha3256(sharedkey, sharedkey, 32); +#endif +} + + +void newhope_shareda(unsigned char *sharedkey, const poly *sk, const unsigned char *received) +{ + poly v,bp, c; + + decode_b(&bp, &c, received); + + poly_pointwise(&v,sk,&bp); + poly_invntt(&v); + + rec(sharedkey, &v, &c); + +#ifndef STATISTICAL_TEST + sha3256(sharedkey, sharedkey, 32); +#endif +} diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/newhope.h b/crypt/liboqs/kex_rlwe_newhope/avx2/newhope.h new file mode 100644 index 0000000000000000000000000000000000000000..34e519e6b8afdd4ef583d67e1a2a9ad35c289b72 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/newhope.h @@ -0,0 +1,15 @@ +#ifndef NEWHOPE_H +#define NEWHOPE_H + +#include "poly.h" +#include "randombytes.h" +#include "crypto_stream_chacha20.h" +#include "error_correction.h" +#include <math.h> +#include <stdio.h> + +void newhope_keygen(unsigned char *send, poly *sk); +void newhope_sharedb(unsigned char *sharedkey, unsigned char *send, const unsigned char *received); +void newhope_shareda(unsigned char *sharedkey, const poly *ska, const unsigned char *received); + +#endif diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/ntt.h b/crypt/liboqs/kex_rlwe_newhope/avx2/ntt.h new file mode 100644 index 0000000000000000000000000000000000000000..c24b1371a98ea4100305f6bbbaa48bdba1151f92 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/ntt.h @@ -0,0 +1,15 @@ +#ifndef NTT_H +#define NTT_H + +#include "inttypes.h" + +extern int32_t psis_bitrev[]; +extern int32_t psis_inv[]; +extern double omegas_double[]; +extern double omegas_inv_double[]; + +void bitrev_vector(int32_t* poly); +void pwmul_double(int32_t* poly, const int32_t* factors); +void ntt_double(int32_t*,const double*,const double*); + +#endif diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/ntt_double.s b/crypt/liboqs/kex_rlwe_newhope/avx2/ntt_double.s new file mode 100644 index 0000000000000000000000000000000000000000..1ec429f7243a22ff49a471eb000cdc8e9028e699 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/ntt_double.s @@ -0,0 +1,2209 @@ + +# qhasm: int64 input_0 + +# qhasm: int64 input_1 + +# qhasm: int64 input_2 + +# qhasm: int64 input_3 + +# qhasm: int64 input_4 + +# qhasm: int64 input_5 + +# qhasm: stack64 input_6 + +# qhasm: stack64 input_7 + +# qhasm: int64 caller_r11 + +# qhasm: int64 caller_r12 + +# qhasm: int64 caller_r13 + +# qhasm: int64 caller_r14 + +# qhasm: int64 caller_r15 + +# qhasm: int64 caller_rbx + +# qhasm: int64 caller_rbp + +# qhasm: int64 ctri + +# qhasm: int64 ctrj + +# qhasm: int64 ap + +# qhasm: int64 tp + +# qhasm: int64 wp + +# qhasm: int64 pp + +# qhasm: reg256 c + +# qhasm: reg256 qinv + +# qhasm: reg256 q + +# qhasm: reg256 t0 + +# qhasm: reg256 t1 + +# qhasm: reg256 t2 + +# qhasm: reg256 t3 + +# qhasm: reg256 w + +# qhasm: reg256 a0 + +# qhasm: reg256 a1 + +# qhasm: reg256 a2 + +# qhasm: reg256 a3 + +# qhasm: reg256 r0 + +# qhasm: reg256 r1 + +# qhasm: reg256 r2 + +# qhasm: reg256 r3 + +# qhasm: enter ntt_double +.p2align 5 +.global _ntt_double +.global ntt_double +_ntt_double: +ntt_double: +mov %rsp,%r11 +and $31,%r11 +add $0,%r11 +sub %r11,%rsp + +# qhasm: q = mem256[q8] +# asm 1: vmovdqu q8,>q=reg256#1 +# asm 2: vmovdqu q8,>q=%ymm0 +vmovdqu q8,%ymm0 + +# qhasm: qinv = mem256[qinv16] +# asm 1: vmovdqu qinv16,>qinv=reg256#2 +# asm 2: vmovdqu qinv16,>qinv=%ymm1 +vmovdqu qinv16,%ymm1 + +# qhasm: ctrj = 64 +# asm 1: mov $64,>ctrj=int64#4 +# asm 2: mov $64,>ctrj=%rcx +mov $64,%rcx + +# qhasm: ap = input_0 +# asm 1: mov <input_0=int64#1,>ap=int64#5 +# asm 2: mov <input_0=%rdi,>ap=%r8 +mov %rdi,%r8 + +# qhasm: tp = input_2 +# asm 1: mov <input_2=int64#3,>tp=int64#6 +# asm 2: mov <input_2=%rdx,>tp=%r9 +mov %rdx,%r9 + +# qhasm: wp = input_1 + 8192 +# asm 1: lea 8192(<input_1=int64#2),>wp=int64#7 +# asm 2: lea 8192(<input_1=%rsi),>wp=%rax +lea 8192(%rsi),%rax + +# qhasm: pp = input_1 +# asm 1: mov <input_1=int64#2,>pp=int64#2 +# asm 2: mov <input_1=%rsi,>pp=%rsi +mov %rsi,%rsi + +# qhasm: a0 = (4x double)(4x int32)mem128[ap + 0] +# asm 1: vcvtdq2pd 0(<ap=int64#5),>a0=reg256#3 +# asm 2: vcvtdq2pd 0(<ap=%r8),>a0=%ymm2 +vcvtdq2pd 0(%r8),%ymm2 + +# qhasm: a1 = (4x double)(4x int32)mem128[ap + 16] +# asm 1: vcvtdq2pd 16(<ap=int64#5),>a1=reg256#4 +# asm 2: vcvtdq2pd 16(<ap=%r8),>a1=%ymm3 +vcvtdq2pd 16(%r8),%ymm3 + +# qhasm: a2 = (4x double)(4x int32)mem128[ap + 32] +# asm 1: vcvtdq2pd 32(<ap=int64#5),>a2=reg256#5 +# asm 2: vcvtdq2pd 32(<ap=%r8),>a2=%ymm4 +vcvtdq2pd 32(%r8),%ymm4 + +# qhasm: a3 = (4x double)(4x int32)mem128[ap + 48] +# asm 1: vcvtdq2pd 48(<ap=int64#5),>a3=reg256#6 +# asm 2: vcvtdq2pd 48(<ap=%r8),>a3=%ymm5 +vcvtdq2pd 48(%r8),%ymm5 + +# qhasm: r3 = mem256[neg2] +# asm 1: vmovdqu neg2,>r3=reg256#7 +# asm 2: vmovdqu neg2,>r3=%ymm6 +vmovdqu neg2,%ymm6 + +# qhasm: 4x r0 = approx a0 * r3 +# asm 1: vmulpd <a0=reg256#3,<r3=reg256#7,>r0=reg256#8 +# asm 2: vmulpd <a0=%ymm2,<r3=%ymm6,>r0=%ymm7 +vmulpd %ymm2,%ymm6,%ymm7 + +# qhasm: 4x r1 = approx a1 * r3 +# asm 1: vmulpd <a1=reg256#4,<r3=reg256#7,>r1=reg256#9 +# asm 2: vmulpd <a1=%ymm3,<r3=%ymm6,>r1=%ymm8 +vmulpd %ymm3,%ymm6,%ymm8 + +# qhasm: 4x r2 = approx a2 * r3 +# asm 1: vmulpd <a2=reg256#5,<r3=reg256#7,>r2=reg256#10 +# asm 2: vmulpd <a2=%ymm4,<r3=%ymm6,>r2=%ymm9 +vmulpd %ymm4,%ymm6,%ymm9 + +# qhasm: 4x r3 approx*= a3 +# asm 1: vmulpd <a3=reg256#6,<r3=reg256#7,>r3=reg256#7 +# asm 2: vmulpd <a3=%ymm5,<r3=%ymm6,>r3=%ymm6 +vmulpd %ymm5,%ymm6,%ymm6 + +# qhasm: r0[0,1,2,3] = a0[0]approx+a0[1],r0[0]approx+r0[1],a0[2]approx+a0[3],r0[2]approx+r0[3] +# asm 1: vhaddpd <r0=reg256#8,<a0=reg256#3,>r0=reg256#3 +# asm 2: vhaddpd <r0=%ymm7,<a0=%ymm2,>r0=%ymm2 +vhaddpd %ymm7,%ymm2,%ymm2 + +# qhasm: w = mem256[pp + 0] +# asm 1: vmovupd 0(<pp=int64#2),>w=reg256#8 +# asm 2: vmovupd 0(<pp=%rsi),>w=%ymm7 +vmovupd 0(%rsi),%ymm7 + +# qhasm: 4x r0 approx*= w +# asm 1: vmulpd <w=reg256#8,<r0=reg256#3,>r0=reg256#3 +# asm 2: vmulpd <w=%ymm7,<r0=%ymm2,>r0=%ymm2 +vmulpd %ymm7,%ymm2,%ymm2 + +# qhasm: a0[0,1,2,3] = r0[2,3],r0[0,1] +# asm 1: vperm2f128 $0x21,<r0=reg256#3,<r0=reg256#3,>a0=reg256#8 +# asm 2: vperm2f128 $0x21,<r0=%ymm2,<r0=%ymm2,>a0=%ymm7 +vperm2f128 $0x21,%ymm2,%ymm2,%ymm7 + +# qhasm: r1[0,1,2,3] = a1[0]approx+a1[1],r1[0]approx+r1[1],a1[2]approx+a1[3],r1[2]approx+r1[3] +# asm 1: vhaddpd <r1=reg256#9,<a1=reg256#4,>r1=reg256#4 +# asm 2: vhaddpd <r1=%ymm8,<a1=%ymm3,>r1=%ymm3 +vhaddpd %ymm8,%ymm3,%ymm3 + +# qhasm: w = mem256[pp + 32] +# asm 1: vmovupd 32(<pp=int64#2),>w=reg256#9 +# asm 2: vmovupd 32(<pp=%rsi),>w=%ymm8 +vmovupd 32(%rsi),%ymm8 + +# qhasm: 4x r1 approx*= w +# asm 1: vmulpd <w=reg256#9,<r1=reg256#4,>r1=reg256#4 +# asm 2: vmulpd <w=%ymm8,<r1=%ymm3,>r1=%ymm3 +vmulpd %ymm8,%ymm3,%ymm3 + +# qhasm: a1[0,1,2,3] = r1[2,3],r1[0,1] +# asm 1: vperm2f128 $0x21,<r1=reg256#4,<r1=reg256#4,>a1=reg256#9 +# asm 2: vperm2f128 $0x21,<r1=%ymm3,<r1=%ymm3,>a1=%ymm8 +vperm2f128 $0x21,%ymm3,%ymm3,%ymm8 + +# qhasm: r2[0,1,2,3] = a2[0]approx+a2[1],r2[0]approx+r2[1],a2[2]approx+a2[3],r2[2]approx+r2[3] +# asm 1: vhaddpd <r2=reg256#10,<a2=reg256#5,>r2=reg256#5 +# asm 2: vhaddpd <r2=%ymm9,<a2=%ymm4,>r2=%ymm4 +vhaddpd %ymm9,%ymm4,%ymm4 + +# qhasm: w = mem256[pp + 64] +# asm 1: vmovupd 64(<pp=int64#2),>w=reg256#10 +# asm 2: vmovupd 64(<pp=%rsi),>w=%ymm9 +vmovupd 64(%rsi),%ymm9 + +# qhasm: 4x r2 approx*= w +# asm 1: vmulpd <w=reg256#10,<r2=reg256#5,>r2=reg256#5 +# asm 2: vmulpd <w=%ymm9,<r2=%ymm4,>r2=%ymm4 +vmulpd %ymm9,%ymm4,%ymm4 + +# qhasm: a2[0,1,2,3] = r2[2,3],r2[0,1] +# asm 1: vperm2f128 $0x21,<r2=reg256#5,<r2=reg256#5,>a2=reg256#10 +# asm 2: vperm2f128 $0x21,<r2=%ymm4,<r2=%ymm4,>a2=%ymm9 +vperm2f128 $0x21,%ymm4,%ymm4,%ymm9 + +# qhasm: r3[0,1,2,3] = a3[0]approx+a3[1],r3[0]approx+r3[1],a3[2]approx+a3[3],r3[2]approx+r3[3] +# asm 1: vhaddpd <r3=reg256#7,<a3=reg256#6,>r3=reg256#6 +# asm 2: vhaddpd <r3=%ymm6,<a3=%ymm5,>r3=%ymm5 +vhaddpd %ymm6,%ymm5,%ymm5 + +# qhasm: w = mem256[pp + 96] +# asm 1: vmovupd 96(<pp=int64#2),>w=reg256#7 +# asm 2: vmovupd 96(<pp=%rsi),>w=%ymm6 +vmovupd 96(%rsi),%ymm6 + +# qhasm: 4x r3 approx*= w +# asm 1: vmulpd <w=reg256#7,<r3=reg256#6,>r3=reg256#6 +# asm 2: vmulpd <w=%ymm6,<r3=%ymm5,>r3=%ymm5 +vmulpd %ymm6,%ymm5,%ymm5 + +# qhasm: a3[0,1,2,3] = r3[2,3],r3[0,1] +# asm 1: vperm2f128 $0x21,<r3=reg256#6,<r3=reg256#6,>a3=reg256#7 +# asm 2: vperm2f128 $0x21,<r3=%ymm5,<r3=%ymm5,>a3=%ymm6 +vperm2f128 $0x21,%ymm5,%ymm5,%ymm6 + +# qhasm: c = mem256[neg4] +# asm 1: vmovdqu neg4,>c=reg256#11 +# asm 2: vmovdqu neg4,>c=%ymm10 +vmovdqu neg4,%ymm10 + +# qhasm: 4x a0 approx+= r0 * c +# asm 1: vfmadd231pd <r0=reg256#3,<c=reg256#11,<a0=reg256#8 +# asm 2: vfmadd231pd <r0=%ymm2,<c=%ymm10,<a0=%ymm7 +vfmadd231pd %ymm2,%ymm10,%ymm7 + +# qhasm: 4x a1 approx+= r1 * c +# asm 1: vfmadd231pd <r1=reg256#4,<c=reg256#11,<a1=reg256#9 +# asm 2: vfmadd231pd <r1=%ymm3,<c=%ymm10,<a1=%ymm8 +vfmadd231pd %ymm3,%ymm10,%ymm8 + +# qhasm: w = mem256[wp + 32] +# asm 1: vmovupd 32(<wp=int64#7),>w=reg256#3 +# asm 2: vmovupd 32(<wp=%rax),>w=%ymm2 +vmovupd 32(%rax),%ymm2 + +# qhasm: 4x a1 approx*= w +# asm 1: vmulpd <w=reg256#3,<a1=reg256#9,>a1=reg256#3 +# asm 2: vmulpd <w=%ymm2,<a1=%ymm8,>a1=%ymm2 +vmulpd %ymm2,%ymm8,%ymm2 + +# qhasm: w = mem256[wp + 64] +# asm 1: vmovupd 64(<wp=int64#7),>w=reg256#4 +# asm 2: vmovupd 64(<wp=%rax),>w=%ymm3 +vmovupd 64(%rax),%ymm3 + +# qhasm: 4x a2 approx+= r2 * c +# asm 1: vfmadd231pd <r2=reg256#5,<c=reg256#11,<a2=reg256#10 +# asm 2: vfmadd231pd <r2=%ymm4,<c=%ymm10,<a2=%ymm9 +vfmadd231pd %ymm4,%ymm10,%ymm9 + +# qhasm: 4x a2 approx*= w +# asm 1: vmulpd <w=reg256#4,<a2=reg256#10,>a2=reg256#4 +# asm 2: vmulpd <w=%ymm3,<a2=%ymm9,>a2=%ymm3 +vmulpd %ymm3,%ymm9,%ymm3 + +# qhasm: w = mem256[wp + 96] +# asm 1: vmovupd 96(<wp=int64#7),>w=reg256#5 +# asm 2: vmovupd 96(<wp=%rax),>w=%ymm4 +vmovupd 96(%rax),%ymm4 + +# qhasm: 4x a3 approx+= r3 * c +# asm 1: vfmadd231pd <r3=reg256#6,<c=reg256#11,<a3=reg256#7 +# asm 2: vfmadd231pd <r3=%ymm5,<c=%ymm10,<a3=%ymm6 +vfmadd231pd %ymm5,%ymm10,%ymm6 + +# qhasm: 4x a3 approx*= w +# asm 1: vmulpd <w=reg256#5,<a3=reg256#7,>a3=reg256#5 +# asm 2: vmulpd <w=%ymm4,<a3=%ymm6,>a3=%ymm4 +vmulpd %ymm4,%ymm6,%ymm4 + +# qhasm: 4x c = approx a1 * qinv +# asm 1: vmulpd <a1=reg256#3,<qinv=reg256#2,>c=reg256#6 +# asm 2: vmulpd <a1=%ymm2,<qinv=%ymm1,>c=%ymm5 +vmulpd %ymm2,%ymm1,%ymm5 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#6,>c=reg256#6 +# asm 2: vroundpd $9,<c=%ymm5,>c=%ymm5 +vroundpd $9,%ymm5,%ymm5 + +# qhasm: 4x a1 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#6,<q=reg256#1,<a1=reg256#3 +# asm 2: vfnmadd231pd <c=%ymm5,<q=%ymm0,<a1=%ymm2 +vfnmadd231pd %ymm5,%ymm0,%ymm2 + +# qhasm: 4x c = approx a2 * qinv +# asm 1: vmulpd <a2=reg256#4,<qinv=reg256#2,>c=reg256#6 +# asm 2: vmulpd <a2=%ymm3,<qinv=%ymm1,>c=%ymm5 +vmulpd %ymm3,%ymm1,%ymm5 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#6,>c=reg256#6 +# asm 2: vroundpd $9,<c=%ymm5,>c=%ymm5 +vroundpd $9,%ymm5,%ymm5 + +# qhasm: 4x a2 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#6,<q=reg256#1,<a2=reg256#4 +# asm 2: vfnmadd231pd <c=%ymm5,<q=%ymm0,<a2=%ymm3 +vfnmadd231pd %ymm5,%ymm0,%ymm3 + +# qhasm: 4x c = approx a3 * qinv +# asm 1: vmulpd <a3=reg256#5,<qinv=reg256#2,>c=reg256#6 +# asm 2: vmulpd <a3=%ymm4,<qinv=%ymm1,>c=%ymm5 +vmulpd %ymm4,%ymm1,%ymm5 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#6,>c=reg256#6 +# asm 2: vroundpd $9,<c=%ymm5,>c=%ymm5 +vroundpd $9,%ymm5,%ymm5 + +# qhasm: 4x a3 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#6,<q=reg256#1,<a3=reg256#5 +# asm 2: vfnmadd231pd <c=%ymm5,<q=%ymm0,<a3=%ymm4 +vfnmadd231pd %ymm5,%ymm0,%ymm4 + +# qhasm: 4x r0 = approx a0 + a1 +# asm 1: vaddpd <a0=reg256#8,<a1=reg256#3,>r0=reg256#6 +# asm 2: vaddpd <a0=%ymm7,<a1=%ymm2,>r0=%ymm5 +vaddpd %ymm7,%ymm2,%ymm5 + +# qhasm: 4x r2 = approx a2 + a3 +# asm 1: vaddpd <a2=reg256#4,<a3=reg256#5,>r2=reg256#7 +# asm 2: vaddpd <a2=%ymm3,<a3=%ymm4,>r2=%ymm6 +vaddpd %ymm3,%ymm4,%ymm6 + +# qhasm: 4x r1 = approx a0 - a1 +# asm 1: vsubpd <a1=reg256#3,<a0=reg256#8,>r1=reg256#3 +# asm 2: vsubpd <a1=%ymm2,<a0=%ymm7,>r1=%ymm2 +vsubpd %ymm2,%ymm7,%ymm2 + +# qhasm: w = mem64[wp + 136],mem64[wp + 136],mem64[wp + 136],mem64[wp + 136] +# asm 1: vbroadcastsd 136(<wp=int64#7),>w=reg256#8 +# asm 2: vbroadcastsd 136(<wp=%rax),>w=%ymm7 +vbroadcastsd 136(%rax),%ymm7 + +# qhasm: 4x r3 = approx a2 - a3 +# asm 1: vsubpd <a3=reg256#5,<a2=reg256#4,>r3=reg256#4 +# asm 2: vsubpd <a3=%ymm4,<a2=%ymm3,>r3=%ymm3 +vsubpd %ymm4,%ymm3,%ymm3 + +# qhasm: 4x r3 approx*= w +# asm 1: vmulpd <w=reg256#8,<r3=reg256#4,>r3=reg256#4 +# asm 2: vmulpd <w=%ymm7,<r3=%ymm3,>r3=%ymm3 +vmulpd %ymm7,%ymm3,%ymm3 + +# qhasm: 4x a0 = approx r0 + r2 +# asm 1: vaddpd <r0=reg256#6,<r2=reg256#7,>a0=reg256#5 +# asm 2: vaddpd <r0=%ymm5,<r2=%ymm6,>a0=%ymm4 +vaddpd %ymm5,%ymm6,%ymm4 + +# qhasm: 4x a1 = approx r1 + r3 +# asm 1: vaddpd <r1=reg256#3,<r3=reg256#4,>a1=reg256#8 +# asm 2: vaddpd <r1=%ymm2,<r3=%ymm3,>a1=%ymm7 +vaddpd %ymm2,%ymm3,%ymm7 + +# qhasm: 4x a2 = approx r0 - r2 +# asm 1: vsubpd <r2=reg256#7,<r0=reg256#6,>a2=reg256#6 +# asm 2: vsubpd <r2=%ymm6,<r0=%ymm5,>a2=%ymm5 +vsubpd %ymm6,%ymm5,%ymm5 + +# qhasm: 4x a3 = approx r1 - r3 +# asm 1: vsubpd <r3=reg256#4,<r1=reg256#3,>a3=reg256#3 +# asm 2: vsubpd <r3=%ymm3,<r1=%ymm2,>a3=%ymm2 +vsubpd %ymm3,%ymm2,%ymm2 + +# qhasm: mem256[tp + 0] = a0 +# asm 1: vmovupd <a0=reg256#5,0(<tp=int64#6) +# asm 2: vmovupd <a0=%ymm4,0(<tp=%r9) +vmovupd %ymm4,0(%r9) + +# qhasm: mem256[tp + 32] = a1 +# asm 1: vmovupd <a1=reg256#8,32(<tp=int64#6) +# asm 2: vmovupd <a1=%ymm7,32(<tp=%r9) +vmovupd %ymm7,32(%r9) + +# qhasm: mem256[tp + 64] = a2 +# asm 1: vmovupd <a2=reg256#6,64(<tp=int64#6) +# asm 2: vmovupd <a2=%ymm5,64(<tp=%r9) +vmovupd %ymm5,64(%r9) + +# qhasm: mem256[tp + 96] = a3 +# asm 1: vmovupd <a3=reg256#3,96(<tp=int64#6) +# asm 2: vmovupd <a3=%ymm2,96(<tp=%r9) +vmovupd %ymm2,96(%r9) + +# qhasm: ap+= 64 +# asm 1: add $64,<ap=int64#5 +# asm 2: add $64,<ap=%r8 +add $64,%r8 + +# qhasm: tp+= 128 +# asm 1: add $128,<tp=int64#6 +# asm 2: add $128,<tp=%r9 +add $128,%r9 + +# qhasm: wp+= 152 +# asm 1: add $152,<wp=int64#7 +# asm 2: add $152,<wp=%rax +add $152,%rax + +# qhasm: pp+= 128 +# asm 1: add $128,<pp=int64#2 +# asm 2: add $128,<pp=%rsi +add $128,%rsi + +# qhasm: ctrj-=1 +# asm 1: sub $1,<ctrj=int64#4 +# asm 2: sub $1,<ctrj=%rcx +sub $1,%rcx + +# qhasm: loopinreg: +._loopinreg: + +# qhasm: a0 = (4x double)(4x int32)mem128[ap + 0] +# asm 1: vcvtdq2pd 0(<ap=int64#5),>a0=reg256#3 +# asm 2: vcvtdq2pd 0(<ap=%r8),>a0=%ymm2 +vcvtdq2pd 0(%r8),%ymm2 + +# qhasm: a1 = (4x double)(4x int32)mem128[ap + 16] +# asm 1: vcvtdq2pd 16(<ap=int64#5),>a1=reg256#4 +# asm 2: vcvtdq2pd 16(<ap=%r8),>a1=%ymm3 +vcvtdq2pd 16(%r8),%ymm3 + +# qhasm: a2 = (4x double)(4x int32)mem128[ap + 32] +# asm 1: vcvtdq2pd 32(<ap=int64#5),>a2=reg256#5 +# asm 2: vcvtdq2pd 32(<ap=%r8),>a2=%ymm4 +vcvtdq2pd 32(%r8),%ymm4 + +# qhasm: a3 = (4x double)(4x int32)mem128[ap + 48] +# asm 1: vcvtdq2pd 48(<ap=int64#5),>a3=reg256#6 +# asm 2: vcvtdq2pd 48(<ap=%r8),>a3=%ymm5 +vcvtdq2pd 48(%r8),%ymm5 + +# qhasm: r3 = mem256[neg2] +# asm 1: vmovdqu neg2,>r3=reg256#7 +# asm 2: vmovdqu neg2,>r3=%ymm6 +vmovdqu neg2,%ymm6 + +# qhasm: 4x r0 = approx a0 * r3 +# asm 1: vmulpd <a0=reg256#3,<r3=reg256#7,>r0=reg256#8 +# asm 2: vmulpd <a0=%ymm2,<r3=%ymm6,>r0=%ymm7 +vmulpd %ymm2,%ymm6,%ymm7 + +# qhasm: 4x r1 = approx a1 * r3 +# asm 1: vmulpd <a1=reg256#4,<r3=reg256#7,>r1=reg256#9 +# asm 2: vmulpd <a1=%ymm3,<r3=%ymm6,>r1=%ymm8 +vmulpd %ymm3,%ymm6,%ymm8 + +# qhasm: 4x r2 = approx a2 * r3 +# asm 1: vmulpd <a2=reg256#5,<r3=reg256#7,>r2=reg256#10 +# asm 2: vmulpd <a2=%ymm4,<r3=%ymm6,>r2=%ymm9 +vmulpd %ymm4,%ymm6,%ymm9 + +# qhasm: 4x r3 approx*= a3 +# asm 1: vmulpd <a3=reg256#6,<r3=reg256#7,>r3=reg256#7 +# asm 2: vmulpd <a3=%ymm5,<r3=%ymm6,>r3=%ymm6 +vmulpd %ymm5,%ymm6,%ymm6 + +# qhasm: r0[0,1,2,3] = a0[0]approx+a0[1],r0[0]approx+r0[1],a0[2]approx+a0[3],r0[2]approx+r0[3] +# asm 1: vhaddpd <r0=reg256#8,<a0=reg256#3,>r0=reg256#3 +# asm 2: vhaddpd <r0=%ymm7,<a0=%ymm2,>r0=%ymm2 +vhaddpd %ymm7,%ymm2,%ymm2 + +# qhasm: w = mem256[pp + 0] +# asm 1: vmovupd 0(<pp=int64#2),>w=reg256#8 +# asm 2: vmovupd 0(<pp=%rsi),>w=%ymm7 +vmovupd 0(%rsi),%ymm7 + +# qhasm: 4x r0 approx*= w +# asm 1: vmulpd <w=reg256#8,<r0=reg256#3,>r0=reg256#3 +# asm 2: vmulpd <w=%ymm7,<r0=%ymm2,>r0=%ymm2 +vmulpd %ymm7,%ymm2,%ymm2 + +# qhasm: a0[0,1,2,3] = r0[2,3],r0[0,1] +# asm 1: vperm2f128 $0x21,<r0=reg256#3,<r0=reg256#3,>a0=reg256#8 +# asm 2: vperm2f128 $0x21,<r0=%ymm2,<r0=%ymm2,>a0=%ymm7 +vperm2f128 $0x21,%ymm2,%ymm2,%ymm7 + +# qhasm: r1[0,1,2,3] = a1[0]approx+a1[1],r1[0]approx+r1[1],a1[2]approx+a1[3],r1[2]approx+r1[3] +# asm 1: vhaddpd <r1=reg256#9,<a1=reg256#4,>r1=reg256#4 +# asm 2: vhaddpd <r1=%ymm8,<a1=%ymm3,>r1=%ymm3 +vhaddpd %ymm8,%ymm3,%ymm3 + +# qhasm: w = mem256[pp + 32] +# asm 1: vmovupd 32(<pp=int64#2),>w=reg256#9 +# asm 2: vmovupd 32(<pp=%rsi),>w=%ymm8 +vmovupd 32(%rsi),%ymm8 + +# qhasm: 4x r1 approx*= w +# asm 1: vmulpd <w=reg256#9,<r1=reg256#4,>r1=reg256#4 +# asm 2: vmulpd <w=%ymm8,<r1=%ymm3,>r1=%ymm3 +vmulpd %ymm8,%ymm3,%ymm3 + +# qhasm: a1[0,1,2,3] = r1[2,3],r1[0,1] +# asm 1: vperm2f128 $0x21,<r1=reg256#4,<r1=reg256#4,>a1=reg256#9 +# asm 2: vperm2f128 $0x21,<r1=%ymm3,<r1=%ymm3,>a1=%ymm8 +vperm2f128 $0x21,%ymm3,%ymm3,%ymm8 + +# qhasm: r2[0,1,2,3] = a2[0]approx+a2[1],r2[0]approx+r2[1],a2[2]approx+a2[3],r2[2]approx+r2[3] +# asm 1: vhaddpd <r2=reg256#10,<a2=reg256#5,>r2=reg256#5 +# asm 2: vhaddpd <r2=%ymm9,<a2=%ymm4,>r2=%ymm4 +vhaddpd %ymm9,%ymm4,%ymm4 + +# qhasm: w = mem256[pp + 64] +# asm 1: vmovupd 64(<pp=int64#2),>w=reg256#10 +# asm 2: vmovupd 64(<pp=%rsi),>w=%ymm9 +vmovupd 64(%rsi),%ymm9 + +# qhasm: 4x r2 approx*= w +# asm 1: vmulpd <w=reg256#10,<r2=reg256#5,>r2=reg256#5 +# asm 2: vmulpd <w=%ymm9,<r2=%ymm4,>r2=%ymm4 +vmulpd %ymm9,%ymm4,%ymm4 + +# qhasm: a2[0,1,2,3] = r2[2,3],r2[0,1] +# asm 1: vperm2f128 $0x21,<r2=reg256#5,<r2=reg256#5,>a2=reg256#10 +# asm 2: vperm2f128 $0x21,<r2=%ymm4,<r2=%ymm4,>a2=%ymm9 +vperm2f128 $0x21,%ymm4,%ymm4,%ymm9 + +# qhasm: r3[0,1,2,3] = a3[0]approx+a3[1],r3[0]approx+r3[1],a3[2]approx+a3[3],r3[2]approx+r3[3] +# asm 1: vhaddpd <r3=reg256#7,<a3=reg256#6,>r3=reg256#6 +# asm 2: vhaddpd <r3=%ymm6,<a3=%ymm5,>r3=%ymm5 +vhaddpd %ymm6,%ymm5,%ymm5 + +# qhasm: w = mem256[pp + 96] +# asm 1: vmovupd 96(<pp=int64#2),>w=reg256#7 +# asm 2: vmovupd 96(<pp=%rsi),>w=%ymm6 +vmovupd 96(%rsi),%ymm6 + +# qhasm: 4x r3 approx*= w +# asm 1: vmulpd <w=reg256#7,<r3=reg256#6,>r3=reg256#6 +# asm 2: vmulpd <w=%ymm6,<r3=%ymm5,>r3=%ymm5 +vmulpd %ymm6,%ymm5,%ymm5 + +# qhasm: a3[0,1,2,3] = r3[2,3],r3[0,1] +# asm 1: vperm2f128 $0x21,<r3=reg256#6,<r3=reg256#6,>a3=reg256#7 +# asm 2: vperm2f128 $0x21,<r3=%ymm5,<r3=%ymm5,>a3=%ymm6 +vperm2f128 $0x21,%ymm5,%ymm5,%ymm6 + +# qhasm: c = mem256[neg4] +# asm 1: vmovdqu neg4,>c=reg256#11 +# asm 2: vmovdqu neg4,>c=%ymm10 +vmovdqu neg4,%ymm10 + +# qhasm: 4x a0 approx+= r0 * c +# asm 1: vfmadd231pd <r0=reg256#3,<c=reg256#11,<a0=reg256#8 +# asm 2: vfmadd231pd <r0=%ymm2,<c=%ymm10,<a0=%ymm7 +vfmadd231pd %ymm2,%ymm10,%ymm7 + +# qhasm: w = mem256[wp + 0] +# asm 1: vmovupd 0(<wp=int64#7),>w=reg256#3 +# asm 2: vmovupd 0(<wp=%rax),>w=%ymm2 +vmovupd 0(%rax),%ymm2 + +# qhasm: 4x a0 approx*= w +# asm 1: vmulpd <w=reg256#3,<a0=reg256#8,>a0=reg256#3 +# asm 2: vmulpd <w=%ymm2,<a0=%ymm7,>a0=%ymm2 +vmulpd %ymm2,%ymm7,%ymm2 + +# qhasm: 4x a1 approx+= r1 * c +# asm 1: vfmadd231pd <r1=reg256#4,<c=reg256#11,<a1=reg256#9 +# asm 2: vfmadd231pd <r1=%ymm3,<c=%ymm10,<a1=%ymm8 +vfmadd231pd %ymm3,%ymm10,%ymm8 + +# qhasm: w = mem256[wp + 32] +# asm 1: vmovupd 32(<wp=int64#7),>w=reg256#4 +# asm 2: vmovupd 32(<wp=%rax),>w=%ymm3 +vmovupd 32(%rax),%ymm3 + +# qhasm: 4x a1 approx*= w +# asm 1: vmulpd <w=reg256#4,<a1=reg256#9,>a1=reg256#4 +# asm 2: vmulpd <w=%ymm3,<a1=%ymm8,>a1=%ymm3 +vmulpd %ymm3,%ymm8,%ymm3 + +# qhasm: w = mem256[wp + 64] +# asm 1: vmovupd 64(<wp=int64#7),>w=reg256#8 +# asm 2: vmovupd 64(<wp=%rax),>w=%ymm7 +vmovupd 64(%rax),%ymm7 + +# qhasm: 4x a2 approx+= r2 * c +# asm 1: vfmadd231pd <r2=reg256#5,<c=reg256#11,<a2=reg256#10 +# asm 2: vfmadd231pd <r2=%ymm4,<c=%ymm10,<a2=%ymm9 +vfmadd231pd %ymm4,%ymm10,%ymm9 + +# qhasm: 4x a2 approx*= w +# asm 1: vmulpd <w=reg256#8,<a2=reg256#10,>a2=reg256#5 +# asm 2: vmulpd <w=%ymm7,<a2=%ymm9,>a2=%ymm4 +vmulpd %ymm7,%ymm9,%ymm4 + +# qhasm: w = mem256[wp + 96] +# asm 1: vmovupd 96(<wp=int64#7),>w=reg256#8 +# asm 2: vmovupd 96(<wp=%rax),>w=%ymm7 +vmovupd 96(%rax),%ymm7 + +# qhasm: 4x a3 approx+= r3 * c +# asm 1: vfmadd231pd <r3=reg256#6,<c=reg256#11,<a3=reg256#7 +# asm 2: vfmadd231pd <r3=%ymm5,<c=%ymm10,<a3=%ymm6 +vfmadd231pd %ymm5,%ymm10,%ymm6 + +# qhasm: 4x a3 approx*= w +# asm 1: vmulpd <w=reg256#8,<a3=reg256#7,>a3=reg256#6 +# asm 2: vmulpd <w=%ymm7,<a3=%ymm6,>a3=%ymm5 +vmulpd %ymm7,%ymm6,%ymm5 + +# qhasm: 4x c = approx a0 * qinv +# asm 1: vmulpd <a0=reg256#3,<qinv=reg256#2,>c=reg256#7 +# asm 2: vmulpd <a0=%ymm2,<qinv=%ymm1,>c=%ymm6 +vmulpd %ymm2,%ymm1,%ymm6 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7 +# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6 +vroundpd $9,%ymm6,%ymm6 + +# qhasm: 4x a0 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<a0=reg256#3 +# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<a0=%ymm2 +vfnmadd231pd %ymm6,%ymm0,%ymm2 + +# qhasm: 4x c = approx a1 * qinv +# asm 1: vmulpd <a1=reg256#4,<qinv=reg256#2,>c=reg256#7 +# asm 2: vmulpd <a1=%ymm3,<qinv=%ymm1,>c=%ymm6 +vmulpd %ymm3,%ymm1,%ymm6 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7 +# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6 +vroundpd $9,%ymm6,%ymm6 + +# qhasm: 4x a1 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<a1=reg256#4 +# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<a1=%ymm3 +vfnmadd231pd %ymm6,%ymm0,%ymm3 + +# qhasm: 4x c = approx a2 * qinv +# asm 1: vmulpd <a2=reg256#5,<qinv=reg256#2,>c=reg256#7 +# asm 2: vmulpd <a2=%ymm4,<qinv=%ymm1,>c=%ymm6 +vmulpd %ymm4,%ymm1,%ymm6 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7 +# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6 +vroundpd $9,%ymm6,%ymm6 + +# qhasm: 4x a2 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<a2=reg256#5 +# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<a2=%ymm4 +vfnmadd231pd %ymm6,%ymm0,%ymm4 + +# qhasm: 4x c = approx a3 * qinv +# asm 1: vmulpd <a3=reg256#6,<qinv=reg256#2,>c=reg256#7 +# asm 2: vmulpd <a3=%ymm5,<qinv=%ymm1,>c=%ymm6 +vmulpd %ymm5,%ymm1,%ymm6 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7 +# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6 +vroundpd $9,%ymm6,%ymm6 + +# qhasm: 4x a3 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<a3=reg256#6 +# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<a3=%ymm5 +vfnmadd231pd %ymm6,%ymm0,%ymm5 + +# qhasm: 4x r0 = approx a0 + a1 +# asm 1: vaddpd <a0=reg256#3,<a1=reg256#4,>r0=reg256#7 +# asm 2: vaddpd <a0=%ymm2,<a1=%ymm3,>r0=%ymm6 +vaddpd %ymm2,%ymm3,%ymm6 + +# qhasm: 4x r2 = approx a2 + a3 +# asm 1: vaddpd <a2=reg256#5,<a3=reg256#6,>r2=reg256#8 +# asm 2: vaddpd <a2=%ymm4,<a3=%ymm5,>r2=%ymm7 +vaddpd %ymm4,%ymm5,%ymm7 + +# qhasm: w = mem64[wp + 128],mem64[wp + 128],mem64[wp + 128],mem64[wp + 128] +# asm 1: vbroadcastsd 128(<wp=int64#7),>w=reg256#9 +# asm 2: vbroadcastsd 128(<wp=%rax),>w=%ymm8 +vbroadcastsd 128(%rax),%ymm8 + +# qhasm: 4x r1 = approx a0 - a1 +# asm 1: vsubpd <a1=reg256#4,<a0=reg256#3,>r1=reg256#3 +# asm 2: vsubpd <a1=%ymm3,<a0=%ymm2,>r1=%ymm2 +vsubpd %ymm3,%ymm2,%ymm2 + +# qhasm: 4x r1 approx*= w +# asm 1: vmulpd <w=reg256#9,<r1=reg256#3,>r1=reg256#3 +# asm 2: vmulpd <w=%ymm8,<r1=%ymm2,>r1=%ymm2 +vmulpd %ymm8,%ymm2,%ymm2 + +# qhasm: w = mem64[wp + 136],mem64[wp + 136],mem64[wp + 136],mem64[wp + 136] +# asm 1: vbroadcastsd 136(<wp=int64#7),>w=reg256#4 +# asm 2: vbroadcastsd 136(<wp=%rax),>w=%ymm3 +vbroadcastsd 136(%rax),%ymm3 + +# qhasm: 4x r3 = approx a2 - a3 +# asm 1: vsubpd <a3=reg256#6,<a2=reg256#5,>r3=reg256#5 +# asm 2: vsubpd <a3=%ymm5,<a2=%ymm4,>r3=%ymm4 +vsubpd %ymm5,%ymm4,%ymm4 + +# qhasm: 4x r3 approx*= w +# asm 1: vmulpd <w=reg256#4,<r3=reg256#5,>r3=reg256#4 +# asm 2: vmulpd <w=%ymm3,<r3=%ymm4,>r3=%ymm3 +vmulpd %ymm3,%ymm4,%ymm3 + +# qhasm: 4x a0 = approx r0 + r2 +# asm 1: vaddpd <r0=reg256#7,<r2=reg256#8,>a0=reg256#5 +# asm 2: vaddpd <r0=%ymm6,<r2=%ymm7,>a0=%ymm4 +vaddpd %ymm6,%ymm7,%ymm4 + +# qhasm: 4x a1 = approx r1 + r3 +# asm 1: vaddpd <r1=reg256#3,<r3=reg256#4,>a1=reg256#6 +# asm 2: vaddpd <r1=%ymm2,<r3=%ymm3,>a1=%ymm5 +vaddpd %ymm2,%ymm3,%ymm5 + +# qhasm: w = mem64[wp + 144],mem64[wp + 144],mem64[wp + 144],mem64[wp + 144] +# asm 1: vbroadcastsd 144(<wp=int64#7),>w=reg256#9 +# asm 2: vbroadcastsd 144(<wp=%rax),>w=%ymm8 +vbroadcastsd 144(%rax),%ymm8 + +# qhasm: 4x a2 = approx r0 - r2 +# asm 1: vsubpd <r2=reg256#8,<r0=reg256#7,>a2=reg256#7 +# asm 2: vsubpd <r2=%ymm7,<r0=%ymm6,>a2=%ymm6 +vsubpd %ymm7,%ymm6,%ymm6 + +# qhasm: 4x a3 = approx r1 - r3 +# asm 1: vsubpd <r3=reg256#4,<r1=reg256#3,>a3=reg256#3 +# asm 2: vsubpd <r3=%ymm3,<r1=%ymm2,>a3=%ymm2 +vsubpd %ymm3,%ymm2,%ymm2 + +# qhasm: 4x a2 approx*= w +# asm 1: vmulpd <w=reg256#9,<a2=reg256#7,>a2=reg256#4 +# asm 2: vmulpd <w=%ymm8,<a2=%ymm6,>a2=%ymm3 +vmulpd %ymm8,%ymm6,%ymm3 + +# qhasm: 4x c = approx a2 * qinv +# asm 1: vmulpd <a2=reg256#4,<qinv=reg256#2,>c=reg256#7 +# asm 2: vmulpd <a2=%ymm3,<qinv=%ymm1,>c=%ymm6 +vmulpd %ymm3,%ymm1,%ymm6 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7 +# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6 +vroundpd $9,%ymm6,%ymm6 + +# qhasm: 4x a2 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<a2=reg256#4 +# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<a2=%ymm3 +vfnmadd231pd %ymm6,%ymm0,%ymm3 + +# qhasm: 4x a3 approx*= w +# asm 1: vmulpd <w=reg256#9,<a3=reg256#3,>a3=reg256#3 +# asm 2: vmulpd <w=%ymm8,<a3=%ymm2,>a3=%ymm2 +vmulpd %ymm8,%ymm2,%ymm2 + +# qhasm: 4x c = approx a3 * qinv +# asm 1: vmulpd <a3=reg256#3,<qinv=reg256#2,>c=reg256#7 +# asm 2: vmulpd <a3=%ymm2,<qinv=%ymm1,>c=%ymm6 +vmulpd %ymm2,%ymm1,%ymm6 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7 +# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6 +vroundpd $9,%ymm6,%ymm6 + +# qhasm: 4x a3 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<a3=reg256#3 +# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<a3=%ymm2 +vfnmadd231pd %ymm6,%ymm0,%ymm2 + +# qhasm: mem256[tp + 0] = a0 +# asm 1: vmovupd <a0=reg256#5,0(<tp=int64#6) +# asm 2: vmovupd <a0=%ymm4,0(<tp=%r9) +vmovupd %ymm4,0(%r9) + +# qhasm: mem256[tp + 32] = a1 +# asm 1: vmovupd <a1=reg256#6,32(<tp=int64#6) +# asm 2: vmovupd <a1=%ymm5,32(<tp=%r9) +vmovupd %ymm5,32(%r9) + +# qhasm: mem256[tp + 64] = a2 +# asm 1: vmovupd <a2=reg256#4,64(<tp=int64#6) +# asm 2: vmovupd <a2=%ymm3,64(<tp=%r9) +vmovupd %ymm3,64(%r9) + +# qhasm: mem256[tp + 96] = a3 +# asm 1: vmovupd <a3=reg256#3,96(<tp=int64#6) +# asm 2: vmovupd <a3=%ymm2,96(<tp=%r9) +vmovupd %ymm2,96(%r9) + +# qhasm: ap+= 64 +# asm 1: add $64,<ap=int64#5 +# asm 2: add $64,<ap=%r8 +add $64,%r8 + +# qhasm: tp+= 128 +# asm 1: add $128,<tp=int64#6 +# asm 2: add $128,<tp=%r9 +add $128,%r9 + +# qhasm: wp+= 152 +# asm 1: add $152,<wp=int64#7 +# asm 2: add $152,<wp=%rax +add $152,%rax + +# qhasm: pp+= 128 +# asm 1: add $128,<pp=int64#2 +# asm 2: add $128,<pp=%rsi +add $128,%rsi + +# qhasm: unsigned>? ctrj-=1 +# asm 1: sub $1,<ctrj=int64#4 +# asm 2: sub $1,<ctrj=%rcx +sub $1,%rcx +# comment:fp stack unchanged by jump + +# qhasm: goto loopinreg if unsigned> +ja ._loopinreg + +# qhasm: ctri = 8 +# asm 1: mov $8,>ctri=int64#2 +# asm 2: mov $8,>ctri=%rsi +mov $8,%rsi + +# qhasm: tp = input_2 +# asm 1: mov <input_2=int64#3,>tp=int64#4 +# asm 2: mov <input_2=%rdx,>tp=%rcx +mov %rdx,%rcx + +# qhasm: ctrj = 4 +# asm 1: mov $4,>ctrj=int64#5 +# asm 2: mov $4,>ctrj=%r8 +mov $4,%r8 + +# qhasm: loop567jfirst: +._loop567jfirst: + +# qhasm: a0 = mem256[tp + 0] +# asm 1: vmovupd 0(<tp=int64#4),>a0=reg256#3 +# asm 2: vmovupd 0(<tp=%rcx),>a0=%ymm2 +vmovupd 0(%rcx),%ymm2 + +# qhasm: a1 = mem256[tp + 128] +# asm 1: vmovupd 128(<tp=int64#4),>a1=reg256#4 +# asm 2: vmovupd 128(<tp=%rcx),>a1=%ymm3 +vmovupd 128(%rcx),%ymm3 + +# qhasm: a2 = mem256[tp + 256] +# asm 1: vmovupd 256(<tp=int64#4),>a2=reg256#5 +# asm 2: vmovupd 256(<tp=%rcx),>a2=%ymm4 +vmovupd 256(%rcx),%ymm4 + +# qhasm: a3 = mem256[tp + 384] +# asm 1: vmovupd 384(<tp=int64#4),>a3=reg256#6 +# asm 2: vmovupd 384(<tp=%rcx),>a3=%ymm5 +vmovupd 384(%rcx),%ymm5 + +# qhasm: 4x r0 = approx a0 + a1 +# asm 1: vaddpd <a0=reg256#3,<a1=reg256#4,>r0=reg256#7 +# asm 2: vaddpd <a0=%ymm2,<a1=%ymm3,>r0=%ymm6 +vaddpd %ymm2,%ymm3,%ymm6 + +# qhasm: 4x r2 = approx a2 + a3 +# asm 1: vaddpd <a2=reg256#5,<a3=reg256#6,>r2=reg256#8 +# asm 2: vaddpd <a2=%ymm4,<a3=%ymm5,>r2=%ymm7 +vaddpd %ymm4,%ymm5,%ymm7 + +# qhasm: 4x r1 = approx a0 - a1 +# asm 1: vsubpd <a1=reg256#4,<a0=reg256#3,>r1=reg256#3 +# asm 2: vsubpd <a1=%ymm3,<a0=%ymm2,>r1=%ymm2 +vsubpd %ymm3,%ymm2,%ymm2 + +# qhasm: 4x r3 = approx a2 - a3 +# asm 1: vsubpd <a3=reg256#6,<a2=reg256#5,>r3=reg256#4 +# asm 2: vsubpd <a3=%ymm5,<a2=%ymm4,>r3=%ymm3 +vsubpd %ymm5,%ymm4,%ymm3 + +# qhasm: 4x a0 = approx r0 + r2 +# asm 1: vaddpd <r0=reg256#7,<r2=reg256#8,>a0=reg256#5 +# asm 2: vaddpd <r0=%ymm6,<r2=%ymm7,>a0=%ymm4 +vaddpd %ymm6,%ymm7,%ymm4 + +# qhasm: 4x a2 = approx r0 - r2 +# asm 1: vsubpd <r2=reg256#8,<r0=reg256#7,>a2=reg256#6 +# asm 2: vsubpd <r2=%ymm7,<r0=%ymm6,>a2=%ymm5 +vsubpd %ymm7,%ymm6,%ymm5 + +# qhasm: w = mem64[wp + 8],mem64[wp + 8],mem64[wp + 8],mem64[wp + 8] +# asm 1: vbroadcastsd 8(<wp=int64#7),>w=reg256#7 +# asm 2: vbroadcastsd 8(<wp=%rax),>w=%ymm6 +vbroadcastsd 8(%rax),%ymm6 + +# qhasm: 4x r3 approx*= w +# asm 1: vmulpd <w=reg256#7,<r3=reg256#4,>r3=reg256#4 +# asm 2: vmulpd <w=%ymm6,<r3=%ymm3,>r3=%ymm3 +vmulpd %ymm6,%ymm3,%ymm3 + +# qhasm: 4x c = approx r3 * qinv +# asm 1: vmulpd <r3=reg256#4,<qinv=reg256#2,>c=reg256#7 +# asm 2: vmulpd <r3=%ymm3,<qinv=%ymm1,>c=%ymm6 +vmulpd %ymm3,%ymm1,%ymm6 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7 +# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6 +vroundpd $9,%ymm6,%ymm6 + +# qhasm: 4x r3 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<r3=reg256#4 +# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<r3=%ymm3 +vfnmadd231pd %ymm6,%ymm0,%ymm3 + +# qhasm: 4x a1 = approx r1 + r3 +# asm 1: vaddpd <r1=reg256#3,<r3=reg256#4,>a1=reg256#7 +# asm 2: vaddpd <r1=%ymm2,<r3=%ymm3,>a1=%ymm6 +vaddpd %ymm2,%ymm3,%ymm6 + +# qhasm: 4x a3 = approx r1 - r3 +# asm 1: vsubpd <r3=reg256#4,<r1=reg256#3,>a3=reg256#3 +# asm 2: vsubpd <r3=%ymm3,<r1=%ymm2,>a3=%ymm2 +vsubpd %ymm3,%ymm2,%ymm2 + +# qhasm: t0 = mem256[tp + 512] +# asm 1: vmovupd 512(<tp=int64#4),>t0=reg256#4 +# asm 2: vmovupd 512(<tp=%rcx),>t0=%ymm3 +vmovupd 512(%rcx),%ymm3 + +# qhasm: t1 = mem256[tp + 640] +# asm 1: vmovupd 640(<tp=int64#4),>t1=reg256#8 +# asm 2: vmovupd 640(<tp=%rcx),>t1=%ymm7 +vmovupd 640(%rcx),%ymm7 + +# qhasm: t2 = mem256[tp + 768] +# asm 1: vmovupd 768(<tp=int64#4),>t2=reg256#9 +# asm 2: vmovupd 768(<tp=%rcx),>t2=%ymm8 +vmovupd 768(%rcx),%ymm8 + +# qhasm: t3 = mem256[tp + 896] +# asm 1: vmovupd 896(<tp=int64#4),>t3=reg256#10 +# asm 2: vmovupd 896(<tp=%rcx),>t3=%ymm9 +vmovupd 896(%rcx),%ymm9 + +# qhasm: 4x r0 = approx t0 + t1 +# asm 1: vaddpd <t0=reg256#4,<t1=reg256#8,>r0=reg256#11 +# asm 2: vaddpd <t0=%ymm3,<t1=%ymm7,>r0=%ymm10 +vaddpd %ymm3,%ymm7,%ymm10 + +# qhasm: 4x r2 = approx t2 + t3 +# asm 1: vaddpd <t2=reg256#9,<t3=reg256#10,>r2=reg256#12 +# asm 2: vaddpd <t2=%ymm8,<t3=%ymm9,>r2=%ymm11 +vaddpd %ymm8,%ymm9,%ymm11 + +# qhasm: 4x r1 = approx t0 - t1 +# asm 1: vsubpd <t1=reg256#8,<t0=reg256#4,>r1=reg256#4 +# asm 2: vsubpd <t1=%ymm7,<t0=%ymm3,>r1=%ymm3 +vsubpd %ymm7,%ymm3,%ymm3 + +# qhasm: 4x r3 = approx t2 - t3 +# asm 1: vsubpd <t3=reg256#10,<t2=reg256#9,>r3=reg256#8 +# asm 2: vsubpd <t3=%ymm9,<t2=%ymm8,>r3=%ymm7 +vsubpd %ymm9,%ymm8,%ymm7 + +# qhasm: 4x t0 = approx r0 + r2 +# asm 1: vaddpd <r0=reg256#11,<r2=reg256#12,>t0=reg256#9 +# asm 2: vaddpd <r0=%ymm10,<r2=%ymm11,>t0=%ymm8 +vaddpd %ymm10,%ymm11,%ymm8 + +# qhasm: 4x t2 = approx r0 - r2 +# asm 1: vsubpd <r2=reg256#12,<r0=reg256#11,>t2=reg256#10 +# asm 2: vsubpd <r2=%ymm11,<r0=%ymm10,>t2=%ymm9 +vsubpd %ymm11,%ymm10,%ymm9 + +# qhasm: w = mem64[wp + 24],mem64[wp + 24],mem64[wp + 24],mem64[wp + 24] +# asm 1: vbroadcastsd 24(<wp=int64#7),>w=reg256#11 +# asm 2: vbroadcastsd 24(<wp=%rax),>w=%ymm10 +vbroadcastsd 24(%rax),%ymm10 + +# qhasm: 4x r1 approx*= w +# asm 1: vmulpd <w=reg256#11,<r1=reg256#4,>r1=reg256#4 +# asm 2: vmulpd <w=%ymm10,<r1=%ymm3,>r1=%ymm3 +vmulpd %ymm10,%ymm3,%ymm3 + +# qhasm: 4x c = approx r1 * qinv +# asm 1: vmulpd <r1=reg256#4,<qinv=reg256#2,>c=reg256#11 +# asm 2: vmulpd <r1=%ymm3,<qinv=%ymm1,>c=%ymm10 +vmulpd %ymm3,%ymm1,%ymm10 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#11,>c=reg256#11 +# asm 2: vroundpd $9,<c=%ymm10,>c=%ymm10 +vroundpd $9,%ymm10,%ymm10 + +# qhasm: 4x r1 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#11,<q=reg256#1,<r1=reg256#4 +# asm 2: vfnmadd231pd <c=%ymm10,<q=%ymm0,<r1=%ymm3 +vfnmadd231pd %ymm10,%ymm0,%ymm3 + +# qhasm: w = mem64[wp + 32],mem64[wp + 32],mem64[wp + 32],mem64[wp + 32] +# asm 1: vbroadcastsd 32(<wp=int64#7),>w=reg256#11 +# asm 2: vbroadcastsd 32(<wp=%rax),>w=%ymm10 +vbroadcastsd 32(%rax),%ymm10 + +# qhasm: 4x r3 approx*= w +# asm 1: vmulpd <w=reg256#11,<r3=reg256#8,>r3=reg256#8 +# asm 2: vmulpd <w=%ymm10,<r3=%ymm7,>r3=%ymm7 +vmulpd %ymm10,%ymm7,%ymm7 + +# qhasm: 4x c = approx r3 * qinv +# asm 1: vmulpd <r3=reg256#8,<qinv=reg256#2,>c=reg256#11 +# asm 2: vmulpd <r3=%ymm7,<qinv=%ymm1,>c=%ymm10 +vmulpd %ymm7,%ymm1,%ymm10 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#11,>c=reg256#11 +# asm 2: vroundpd $9,<c=%ymm10,>c=%ymm10 +vroundpd $9,%ymm10,%ymm10 + +# qhasm: 4x r3 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#11,<q=reg256#1,<r3=reg256#8 +# asm 2: vfnmadd231pd <c=%ymm10,<q=%ymm0,<r3=%ymm7 +vfnmadd231pd %ymm10,%ymm0,%ymm7 + +# qhasm: 4x t1 = approx r1 + r3 +# asm 1: vaddpd <r1=reg256#4,<r3=reg256#8,>t1=reg256#11 +# asm 2: vaddpd <r1=%ymm3,<r3=%ymm7,>t1=%ymm10 +vaddpd %ymm3,%ymm7,%ymm10 + +# qhasm: w = mem64[wp + 40],mem64[wp + 40],mem64[wp + 40],mem64[wp + 40] +# asm 1: vbroadcastsd 40(<wp=int64#7),>w=reg256#12 +# asm 2: vbroadcastsd 40(<wp=%rax),>w=%ymm11 +vbroadcastsd 40(%rax),%ymm11 + +# qhasm: 4x t3 = approx r1 - r3 +# asm 1: vsubpd <r3=reg256#8,<r1=reg256#4,>t3=reg256#4 +# asm 2: vsubpd <r3=%ymm7,<r1=%ymm3,>t3=%ymm3 +vsubpd %ymm7,%ymm3,%ymm3 + +# qhasm: 4x t3 approx*= w +# asm 1: vmulpd <w=reg256#12,<t3=reg256#4,>t3=reg256#4 +# asm 2: vmulpd <w=%ymm11,<t3=%ymm3,>t3=%ymm3 +vmulpd %ymm11,%ymm3,%ymm3 + +# qhasm: 4x c = approx t3 * qinv +# asm 1: vmulpd <t3=reg256#4,<qinv=reg256#2,>c=reg256#8 +# asm 2: vmulpd <t3=%ymm3,<qinv=%ymm1,>c=%ymm7 +vmulpd %ymm3,%ymm1,%ymm7 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#8,>c=reg256#8 +# asm 2: vroundpd $9,<c=%ymm7,>c=%ymm7 +vroundpd $9,%ymm7,%ymm7 + +# qhasm: 4x t3 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#8,<q=reg256#1,<t3=reg256#4 +# asm 2: vfnmadd231pd <c=%ymm7,<q=%ymm0,<t3=%ymm3 +vfnmadd231pd %ymm7,%ymm0,%ymm3 + +# qhasm: 4x t2 approx*= w +# asm 1: vmulpd <w=reg256#12,<t2=reg256#10,>t2=reg256#8 +# asm 2: vmulpd <w=%ymm11,<t2=%ymm9,>t2=%ymm7 +vmulpd %ymm11,%ymm9,%ymm7 + +# qhasm: 4x c = approx t2 * qinv +# asm 1: vmulpd <t2=reg256#8,<qinv=reg256#2,>c=reg256#10 +# asm 2: vmulpd <t2=%ymm7,<qinv=%ymm1,>c=%ymm9 +vmulpd %ymm7,%ymm1,%ymm9 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#10,>c=reg256#10 +# asm 2: vroundpd $9,<c=%ymm9,>c=%ymm9 +vroundpd $9,%ymm9,%ymm9 + +# qhasm: 4x t2 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#10,<q=reg256#1,<t2=reg256#8 +# asm 2: vfnmadd231pd <c=%ymm9,<q=%ymm0,<t2=%ymm7 +vfnmadd231pd %ymm9,%ymm0,%ymm7 + +# qhasm: 4x r0 = approx a0 + t0 +# asm 1: vaddpd <a0=reg256#5,<t0=reg256#9,>r0=reg256#10 +# asm 2: vaddpd <a0=%ymm4,<t0=%ymm8,>r0=%ymm9 +vaddpd %ymm4,%ymm8,%ymm9 + +# qhasm: 4x r1 = approx a1 + t1 +# asm 1: vaddpd <a1=reg256#7,<t1=reg256#11,>r1=reg256#12 +# asm 2: vaddpd <a1=%ymm6,<t1=%ymm10,>r1=%ymm11 +vaddpd %ymm6,%ymm10,%ymm11 + +# qhasm: 4x r2 = approx a2 + t2 +# asm 1: vaddpd <a2=reg256#6,<t2=reg256#8,>r2=reg256#13 +# asm 2: vaddpd <a2=%ymm5,<t2=%ymm7,>r2=%ymm12 +vaddpd %ymm5,%ymm7,%ymm12 + +# qhasm: 4x r3 = approx a3 + t3 +# asm 1: vaddpd <a3=reg256#3,<t3=reg256#4,>r3=reg256#14 +# asm 2: vaddpd <a3=%ymm2,<t3=%ymm3,>r3=%ymm13 +vaddpd %ymm2,%ymm3,%ymm13 + +# qhasm: 4x a0 approx-= t0 +# asm 1: vsubpd <t0=reg256#9,<a0=reg256#5,>a0=reg256#5 +# asm 2: vsubpd <t0=%ymm8,<a0=%ymm4,>a0=%ymm4 +vsubpd %ymm8,%ymm4,%ymm4 + +# qhasm: 4x a1 approx-= t1 +# asm 1: vsubpd <t1=reg256#11,<a1=reg256#7,>a1=reg256#7 +# asm 2: vsubpd <t1=%ymm10,<a1=%ymm6,>a1=%ymm6 +vsubpd %ymm10,%ymm6,%ymm6 + +# qhasm: 4x a2 approx-= t2 +# asm 1: vsubpd <t2=reg256#8,<a2=reg256#6,>a2=reg256#6 +# asm 2: vsubpd <t2=%ymm7,<a2=%ymm5,>a2=%ymm5 +vsubpd %ymm7,%ymm5,%ymm5 + +# qhasm: 4x a3 approx-= t3 +# asm 1: vsubpd <t3=reg256#4,<a3=reg256#3,>a3=reg256#3 +# asm 2: vsubpd <t3=%ymm3,<a3=%ymm2,>a3=%ymm2 +vsubpd %ymm3,%ymm2,%ymm2 + +# qhasm: mem256[tp + 0] = r0 +# asm 1: vmovupd <r0=reg256#10,0(<tp=int64#4) +# asm 2: vmovupd <r0=%ymm9,0(<tp=%rcx) +vmovupd %ymm9,0(%rcx) + +# qhasm: mem256[tp + 128] = r1 +# asm 1: vmovupd <r1=reg256#12,128(<tp=int64#4) +# asm 2: vmovupd <r1=%ymm11,128(<tp=%rcx) +vmovupd %ymm11,128(%rcx) + +# qhasm: mem256[tp + 256] = r2 +# asm 1: vmovupd <r2=reg256#13,256(<tp=int64#4) +# asm 2: vmovupd <r2=%ymm12,256(<tp=%rcx) +vmovupd %ymm12,256(%rcx) + +# qhasm: mem256[tp + 384] = r3 +# asm 1: vmovupd <r3=reg256#14,384(<tp=int64#4) +# asm 2: vmovupd <r3=%ymm13,384(<tp=%rcx) +vmovupd %ymm13,384(%rcx) + +# qhasm: mem256[tp + 512] = a0 +# asm 1: vmovupd <a0=reg256#5,512(<tp=int64#4) +# asm 2: vmovupd <a0=%ymm4,512(<tp=%rcx) +vmovupd %ymm4,512(%rcx) + +# qhasm: mem256[tp + 640] = a1 +# asm 1: vmovupd <a1=reg256#7,640(<tp=int64#4) +# asm 2: vmovupd <a1=%ymm6,640(<tp=%rcx) +vmovupd %ymm6,640(%rcx) + +# qhasm: mem256[tp + 768] = a2 +# asm 1: vmovupd <a2=reg256#6,768(<tp=int64#4) +# asm 2: vmovupd <a2=%ymm5,768(<tp=%rcx) +vmovupd %ymm5,768(%rcx) + +# qhasm: mem256[tp + 896] = a3 +# asm 1: vmovupd <a3=reg256#3,896(<tp=int64#4) +# asm 2: vmovupd <a3=%ymm2,896(<tp=%rcx) +vmovupd %ymm2,896(%rcx) + +# qhasm: tp+=32 +# asm 1: add $32,<tp=int64#4 +# asm 2: add $32,<tp=%rcx +add $32,%rcx + +# qhasm: unsigned>? ctrj-=1 +# asm 1: sub $1,<ctrj=int64#5 +# asm 2: sub $1,<ctrj=%r8 +sub $1,%r8 +# comment:fp stack unchanged by jump + +# qhasm: goto loop567jfirst if unsigned> +ja ._loop567jfirst + +# qhasm: tp+= 896 +# asm 1: add $896,<tp=int64#4 +# asm 2: add $896,<tp=%rcx +add $896,%rcx + +# qhasm: wp+= 56 +# asm 1: add $56,<wp=int64#7 +# asm 2: add $56,<wp=%rax +add $56,%rax + +# qhasm: ctri-=1 +# asm 1: sub $1,<ctri=int64#2 +# asm 2: sub $1,<ctri=%rsi +sub $1,%rsi + +# qhasm: loop567i: +._loop567i: + +# qhasm: ctrj = 4 +# asm 1: mov $4,>ctrj=int64#5 +# asm 2: mov $4,>ctrj=%r8 +mov $4,%r8 + +# qhasm: loop567j: +._loop567j: + +# qhasm: a0 = mem256[tp + 0] +# asm 1: vmovupd 0(<tp=int64#4),>a0=reg256#3 +# asm 2: vmovupd 0(<tp=%rcx),>a0=%ymm2 +vmovupd 0(%rcx),%ymm2 + +# qhasm: a1 = mem256[tp + 128] +# asm 1: vmovupd 128(<tp=int64#4),>a1=reg256#4 +# asm 2: vmovupd 128(<tp=%rcx),>a1=%ymm3 +vmovupd 128(%rcx),%ymm3 + +# qhasm: a2 = mem256[tp + 256] +# asm 1: vmovupd 256(<tp=int64#4),>a2=reg256#5 +# asm 2: vmovupd 256(<tp=%rcx),>a2=%ymm4 +vmovupd 256(%rcx),%ymm4 + +# qhasm: a3 = mem256[tp + 384] +# asm 1: vmovupd 384(<tp=int64#4),>a3=reg256#6 +# asm 2: vmovupd 384(<tp=%rcx),>a3=%ymm5 +vmovupd 384(%rcx),%ymm5 + +# qhasm: 4x r0 = approx a0 + a1 +# asm 1: vaddpd <a0=reg256#3,<a1=reg256#4,>r0=reg256#7 +# asm 2: vaddpd <a0=%ymm2,<a1=%ymm3,>r0=%ymm6 +vaddpd %ymm2,%ymm3,%ymm6 + +# qhasm: 4x r2 = approx a2 + a3 +# asm 1: vaddpd <a2=reg256#5,<a3=reg256#6,>r2=reg256#8 +# asm 2: vaddpd <a2=%ymm4,<a3=%ymm5,>r2=%ymm7 +vaddpd %ymm4,%ymm5,%ymm7 + +# qhasm: 4x r1 = approx a0 - a1 +# asm 1: vsubpd <a1=reg256#4,<a0=reg256#3,>r1=reg256#3 +# asm 2: vsubpd <a1=%ymm3,<a0=%ymm2,>r1=%ymm2 +vsubpd %ymm3,%ymm2,%ymm2 + +# qhasm: 4x r3 = approx a2 - a3 +# asm 1: vsubpd <a3=reg256#6,<a2=reg256#5,>r3=reg256#4 +# asm 2: vsubpd <a3=%ymm5,<a2=%ymm4,>r3=%ymm3 +vsubpd %ymm5,%ymm4,%ymm3 + +# qhasm: 4x a0 = approx r0 + r2 +# asm 1: vaddpd <r0=reg256#7,<r2=reg256#8,>a0=reg256#5 +# asm 2: vaddpd <r0=%ymm6,<r2=%ymm7,>a0=%ymm4 +vaddpd %ymm6,%ymm7,%ymm4 + +# qhasm: 4x a2 = approx r0 - r2 +# asm 1: vsubpd <r2=reg256#8,<r0=reg256#7,>a2=reg256#6 +# asm 2: vsubpd <r2=%ymm7,<r0=%ymm6,>a2=%ymm5 +vsubpd %ymm7,%ymm6,%ymm5 + +# qhasm: w = mem64[wp + 0],mem64[wp + 0],mem64[wp + 0],mem64[wp + 0] +# asm 1: vbroadcastsd 0(<wp=int64#7),>w=reg256#7 +# asm 2: vbroadcastsd 0(<wp=%rax),>w=%ymm6 +vbroadcastsd 0(%rax),%ymm6 + +# qhasm: 4x r1 approx*= w +# asm 1: vmulpd <w=reg256#7,<r1=reg256#3,>r1=reg256#3 +# asm 2: vmulpd <w=%ymm6,<r1=%ymm2,>r1=%ymm2 +vmulpd %ymm6,%ymm2,%ymm2 + +# qhasm: 4x c = approx r1 * qinv +# asm 1: vmulpd <r1=reg256#3,<qinv=reg256#2,>c=reg256#7 +# asm 2: vmulpd <r1=%ymm2,<qinv=%ymm1,>c=%ymm6 +vmulpd %ymm2,%ymm1,%ymm6 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7 +# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6 +vroundpd $9,%ymm6,%ymm6 + +# qhasm: 4x r1 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<r1=reg256#3 +# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<r1=%ymm2 +vfnmadd231pd %ymm6,%ymm0,%ymm2 + +# qhasm: w = mem64[wp + 8],mem64[wp + 8],mem64[wp + 8],mem64[wp + 8] +# asm 1: vbroadcastsd 8(<wp=int64#7),>w=reg256#7 +# asm 2: vbroadcastsd 8(<wp=%rax),>w=%ymm6 +vbroadcastsd 8(%rax),%ymm6 + +# qhasm: 4x r3 approx*= w +# asm 1: vmulpd <w=reg256#7,<r3=reg256#4,>r3=reg256#4 +# asm 2: vmulpd <w=%ymm6,<r3=%ymm3,>r3=%ymm3 +vmulpd %ymm6,%ymm3,%ymm3 + +# qhasm: 4x c = approx r3 * qinv +# asm 1: vmulpd <r3=reg256#4,<qinv=reg256#2,>c=reg256#7 +# asm 2: vmulpd <r3=%ymm3,<qinv=%ymm1,>c=%ymm6 +vmulpd %ymm3,%ymm1,%ymm6 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7 +# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6 +vroundpd $9,%ymm6,%ymm6 + +# qhasm: 4x r3 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<r3=reg256#4 +# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<r3=%ymm3 +vfnmadd231pd %ymm6,%ymm0,%ymm3 + +# qhasm: 4x a1 = approx r1 + r3 +# asm 1: vaddpd <r1=reg256#3,<r3=reg256#4,>a1=reg256#7 +# asm 2: vaddpd <r1=%ymm2,<r3=%ymm3,>a1=%ymm6 +vaddpd %ymm2,%ymm3,%ymm6 + +# qhasm: 4x a3 = approx r1 - r3 +# asm 1: vsubpd <r3=reg256#4,<r1=reg256#3,>a3=reg256#3 +# asm 2: vsubpd <r3=%ymm3,<r1=%ymm2,>a3=%ymm2 +vsubpd %ymm3,%ymm2,%ymm2 + +# qhasm: w = mem64[wp + 16],mem64[wp + 16],mem64[wp + 16],mem64[wp + 16] +# asm 1: vbroadcastsd 16(<wp=int64#7),>w=reg256#4 +# asm 2: vbroadcastsd 16(<wp=%rax),>w=%ymm3 +vbroadcastsd 16(%rax),%ymm3 + +# qhasm: 4x a3 approx*= w +# asm 1: vmulpd <w=reg256#4,<a3=reg256#3,>a3=reg256#3 +# asm 2: vmulpd <w=%ymm3,<a3=%ymm2,>a3=%ymm2 +vmulpd %ymm3,%ymm2,%ymm2 + +# qhasm: 4x c = approx a3 * qinv +# asm 1: vmulpd <a3=reg256#3,<qinv=reg256#2,>c=reg256#8 +# asm 2: vmulpd <a3=%ymm2,<qinv=%ymm1,>c=%ymm7 +vmulpd %ymm2,%ymm1,%ymm7 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#8,>c=reg256#8 +# asm 2: vroundpd $9,<c=%ymm7,>c=%ymm7 +vroundpd $9,%ymm7,%ymm7 + +# qhasm: 4x a3 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#8,<q=reg256#1,<a3=reg256#3 +# asm 2: vfnmadd231pd <c=%ymm7,<q=%ymm0,<a3=%ymm2 +vfnmadd231pd %ymm7,%ymm0,%ymm2 + +# qhasm: 4x a2 approx*= w +# asm 1: vmulpd <w=reg256#4,<a2=reg256#6,>a2=reg256#4 +# asm 2: vmulpd <w=%ymm3,<a2=%ymm5,>a2=%ymm3 +vmulpd %ymm3,%ymm5,%ymm3 + +# qhasm: 4x c = approx a2 * qinv +# asm 1: vmulpd <a2=reg256#4,<qinv=reg256#2,>c=reg256#6 +# asm 2: vmulpd <a2=%ymm3,<qinv=%ymm1,>c=%ymm5 +vmulpd %ymm3,%ymm1,%ymm5 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#6,>c=reg256#6 +# asm 2: vroundpd $9,<c=%ymm5,>c=%ymm5 +vroundpd $9,%ymm5,%ymm5 + +# qhasm: 4x a2 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#6,<q=reg256#1,<a2=reg256#4 +# asm 2: vfnmadd231pd <c=%ymm5,<q=%ymm0,<a2=%ymm3 +vfnmadd231pd %ymm5,%ymm0,%ymm3 + +# qhasm: t0 = mem256[tp + 512] +# asm 1: vmovupd 512(<tp=int64#4),>t0=reg256#6 +# asm 2: vmovupd 512(<tp=%rcx),>t0=%ymm5 +vmovupd 512(%rcx),%ymm5 + +# qhasm: t1 = mem256[tp + 640] +# asm 1: vmovupd 640(<tp=int64#4),>t1=reg256#8 +# asm 2: vmovupd 640(<tp=%rcx),>t1=%ymm7 +vmovupd 640(%rcx),%ymm7 + +# qhasm: t2 = mem256[tp + 768] +# asm 1: vmovupd 768(<tp=int64#4),>t2=reg256#9 +# asm 2: vmovupd 768(<tp=%rcx),>t2=%ymm8 +vmovupd 768(%rcx),%ymm8 + +# qhasm: t3 = mem256[tp + 896] +# asm 1: vmovupd 896(<tp=int64#4),>t3=reg256#10 +# asm 2: vmovupd 896(<tp=%rcx),>t3=%ymm9 +vmovupd 896(%rcx),%ymm9 + +# qhasm: 4x r0 = approx t0 + t1 +# asm 1: vaddpd <t0=reg256#6,<t1=reg256#8,>r0=reg256#11 +# asm 2: vaddpd <t0=%ymm5,<t1=%ymm7,>r0=%ymm10 +vaddpd %ymm5,%ymm7,%ymm10 + +# qhasm: 4x r2 = approx t2 + t3 +# asm 1: vaddpd <t2=reg256#9,<t3=reg256#10,>r2=reg256#12 +# asm 2: vaddpd <t2=%ymm8,<t3=%ymm9,>r2=%ymm11 +vaddpd %ymm8,%ymm9,%ymm11 + +# qhasm: 4x r1 = approx t0 - t1 +# asm 1: vsubpd <t1=reg256#8,<t0=reg256#6,>r1=reg256#6 +# asm 2: vsubpd <t1=%ymm7,<t0=%ymm5,>r1=%ymm5 +vsubpd %ymm7,%ymm5,%ymm5 + +# qhasm: 4x r3 = approx t2 - t3 +# asm 1: vsubpd <t3=reg256#10,<t2=reg256#9,>r3=reg256#8 +# asm 2: vsubpd <t3=%ymm9,<t2=%ymm8,>r3=%ymm7 +vsubpd %ymm9,%ymm8,%ymm7 + +# qhasm: 4x t0 = approx r0 + r2 +# asm 1: vaddpd <r0=reg256#11,<r2=reg256#12,>t0=reg256#9 +# asm 2: vaddpd <r0=%ymm10,<r2=%ymm11,>t0=%ymm8 +vaddpd %ymm10,%ymm11,%ymm8 + +# qhasm: 4x t2 = approx r0 - r2 +# asm 1: vsubpd <r2=reg256#12,<r0=reg256#11,>t2=reg256#10 +# asm 2: vsubpd <r2=%ymm11,<r0=%ymm10,>t2=%ymm9 +vsubpd %ymm11,%ymm10,%ymm9 + +# qhasm: w = mem64[wp + 24],mem64[wp + 24],mem64[wp + 24],mem64[wp + 24] +# asm 1: vbroadcastsd 24(<wp=int64#7),>w=reg256#11 +# asm 2: vbroadcastsd 24(<wp=%rax),>w=%ymm10 +vbroadcastsd 24(%rax),%ymm10 + +# qhasm: 4x r1 approx*= w +# asm 1: vmulpd <w=reg256#11,<r1=reg256#6,>r1=reg256#6 +# asm 2: vmulpd <w=%ymm10,<r1=%ymm5,>r1=%ymm5 +vmulpd %ymm10,%ymm5,%ymm5 + +# qhasm: 4x c = approx r1 * qinv +# asm 1: vmulpd <r1=reg256#6,<qinv=reg256#2,>c=reg256#11 +# asm 2: vmulpd <r1=%ymm5,<qinv=%ymm1,>c=%ymm10 +vmulpd %ymm5,%ymm1,%ymm10 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#11,>c=reg256#11 +# asm 2: vroundpd $9,<c=%ymm10,>c=%ymm10 +vroundpd $9,%ymm10,%ymm10 + +# qhasm: 4x r1 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#11,<q=reg256#1,<r1=reg256#6 +# asm 2: vfnmadd231pd <c=%ymm10,<q=%ymm0,<r1=%ymm5 +vfnmadd231pd %ymm10,%ymm0,%ymm5 + +# qhasm: w = mem64[wp + 32],mem64[wp + 32],mem64[wp + 32],mem64[wp + 32] +# asm 1: vbroadcastsd 32(<wp=int64#7),>w=reg256#11 +# asm 2: vbroadcastsd 32(<wp=%rax),>w=%ymm10 +vbroadcastsd 32(%rax),%ymm10 + +# qhasm: 4x r3 approx*= w +# asm 1: vmulpd <w=reg256#11,<r3=reg256#8,>r3=reg256#8 +# asm 2: vmulpd <w=%ymm10,<r3=%ymm7,>r3=%ymm7 +vmulpd %ymm10,%ymm7,%ymm7 + +# qhasm: 4x c = approx r3 * qinv +# asm 1: vmulpd <r3=reg256#8,<qinv=reg256#2,>c=reg256#11 +# asm 2: vmulpd <r3=%ymm7,<qinv=%ymm1,>c=%ymm10 +vmulpd %ymm7,%ymm1,%ymm10 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#11,>c=reg256#11 +# asm 2: vroundpd $9,<c=%ymm10,>c=%ymm10 +vroundpd $9,%ymm10,%ymm10 + +# qhasm: 4x r3 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#11,<q=reg256#1,<r3=reg256#8 +# asm 2: vfnmadd231pd <c=%ymm10,<q=%ymm0,<r3=%ymm7 +vfnmadd231pd %ymm10,%ymm0,%ymm7 + +# qhasm: 4x t1 = approx r1 + r3 +# asm 1: vaddpd <r1=reg256#6,<r3=reg256#8,>t1=reg256#11 +# asm 2: vaddpd <r1=%ymm5,<r3=%ymm7,>t1=%ymm10 +vaddpd %ymm5,%ymm7,%ymm10 + +# qhasm: w = mem64[wp + 40],mem64[wp + 40],mem64[wp + 40],mem64[wp + 40] +# asm 1: vbroadcastsd 40(<wp=int64#7),>w=reg256#12 +# asm 2: vbroadcastsd 40(<wp=%rax),>w=%ymm11 +vbroadcastsd 40(%rax),%ymm11 + +# qhasm: 4x t3 = approx r1 - r3 +# asm 1: vsubpd <r3=reg256#8,<r1=reg256#6,>t3=reg256#6 +# asm 2: vsubpd <r3=%ymm7,<r1=%ymm5,>t3=%ymm5 +vsubpd %ymm7,%ymm5,%ymm5 + +# qhasm: 4x t3 approx*= w +# asm 1: vmulpd <w=reg256#12,<t3=reg256#6,>t3=reg256#6 +# asm 2: vmulpd <w=%ymm11,<t3=%ymm5,>t3=%ymm5 +vmulpd %ymm11,%ymm5,%ymm5 + +# qhasm: 4x c = approx t3 * qinv +# asm 1: vmulpd <t3=reg256#6,<qinv=reg256#2,>c=reg256#8 +# asm 2: vmulpd <t3=%ymm5,<qinv=%ymm1,>c=%ymm7 +vmulpd %ymm5,%ymm1,%ymm7 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#8,>c=reg256#8 +# asm 2: vroundpd $9,<c=%ymm7,>c=%ymm7 +vroundpd $9,%ymm7,%ymm7 + +# qhasm: 4x t3 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#8,<q=reg256#1,<t3=reg256#6 +# asm 2: vfnmadd231pd <c=%ymm7,<q=%ymm0,<t3=%ymm5 +vfnmadd231pd %ymm7,%ymm0,%ymm5 + +# qhasm: 4x t2 approx*= w +# asm 1: vmulpd <w=reg256#12,<t2=reg256#10,>t2=reg256#8 +# asm 2: vmulpd <w=%ymm11,<t2=%ymm9,>t2=%ymm7 +vmulpd %ymm11,%ymm9,%ymm7 + +# qhasm: 4x c = approx t2 * qinv +# asm 1: vmulpd <t2=reg256#8,<qinv=reg256#2,>c=reg256#10 +# asm 2: vmulpd <t2=%ymm7,<qinv=%ymm1,>c=%ymm9 +vmulpd %ymm7,%ymm1,%ymm9 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#10,>c=reg256#10 +# asm 2: vroundpd $9,<c=%ymm9,>c=%ymm9 +vroundpd $9,%ymm9,%ymm9 + +# qhasm: 4x t2 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#10,<q=reg256#1,<t2=reg256#8 +# asm 2: vfnmadd231pd <c=%ymm9,<q=%ymm0,<t2=%ymm7 +vfnmadd231pd %ymm9,%ymm0,%ymm7 + +# qhasm: 4x r0 = approx a0 + t0 +# asm 1: vaddpd <a0=reg256#5,<t0=reg256#9,>r0=reg256#10 +# asm 2: vaddpd <a0=%ymm4,<t0=%ymm8,>r0=%ymm9 +vaddpd %ymm4,%ymm8,%ymm9 + +# qhasm: 4x r1 = approx a1 + t1 +# asm 1: vaddpd <a1=reg256#7,<t1=reg256#11,>r1=reg256#12 +# asm 2: vaddpd <a1=%ymm6,<t1=%ymm10,>r1=%ymm11 +vaddpd %ymm6,%ymm10,%ymm11 + +# qhasm: 4x r2 = approx a2 + t2 +# asm 1: vaddpd <a2=reg256#4,<t2=reg256#8,>r2=reg256#13 +# asm 2: vaddpd <a2=%ymm3,<t2=%ymm7,>r2=%ymm12 +vaddpd %ymm3,%ymm7,%ymm12 + +# qhasm: 4x r3 = approx a3 + t3 +# asm 1: vaddpd <a3=reg256#3,<t3=reg256#6,>r3=reg256#14 +# asm 2: vaddpd <a3=%ymm2,<t3=%ymm5,>r3=%ymm13 +vaddpd %ymm2,%ymm5,%ymm13 + +# qhasm: 4x a0 approx-= t0 +# asm 1: vsubpd <t0=reg256#9,<a0=reg256#5,>a0=reg256#5 +# asm 2: vsubpd <t0=%ymm8,<a0=%ymm4,>a0=%ymm4 +vsubpd %ymm8,%ymm4,%ymm4 + +# qhasm: 4x a1 approx-= t1 +# asm 1: vsubpd <t1=reg256#11,<a1=reg256#7,>a1=reg256#7 +# asm 2: vsubpd <t1=%ymm10,<a1=%ymm6,>a1=%ymm6 +vsubpd %ymm10,%ymm6,%ymm6 + +# qhasm: 4x a2 approx-= t2 +# asm 1: vsubpd <t2=reg256#8,<a2=reg256#4,>a2=reg256#4 +# asm 2: vsubpd <t2=%ymm7,<a2=%ymm3,>a2=%ymm3 +vsubpd %ymm7,%ymm3,%ymm3 + +# qhasm: 4x a3 approx-= t3 +# asm 1: vsubpd <t3=reg256#6,<a3=reg256#3,>a3=reg256#3 +# asm 2: vsubpd <t3=%ymm5,<a3=%ymm2,>a3=%ymm2 +vsubpd %ymm5,%ymm2,%ymm2 + +# qhasm: w = mem64[wp + 48],mem64[wp + 48],mem64[wp + 48],mem64[wp + 48] +# asm 1: vbroadcastsd 48(<wp=int64#7),>w=reg256#6 +# asm 2: vbroadcastsd 48(<wp=%rax),>w=%ymm5 +vbroadcastsd 48(%rax),%ymm5 + +# qhasm: 4x a0 approx*= w +# asm 1: vmulpd <w=reg256#6,<a0=reg256#5,>a0=reg256#5 +# asm 2: vmulpd <w=%ymm5,<a0=%ymm4,>a0=%ymm4 +vmulpd %ymm5,%ymm4,%ymm4 + +# qhasm: 4x a1 approx*= w +# asm 1: vmulpd <w=reg256#6,<a1=reg256#7,>a1=reg256#7 +# asm 2: vmulpd <w=%ymm5,<a1=%ymm6,>a1=%ymm6 +vmulpd %ymm5,%ymm6,%ymm6 + +# qhasm: 4x a2 approx*= w +# asm 1: vmulpd <w=reg256#6,<a2=reg256#4,>a2=reg256#4 +# asm 2: vmulpd <w=%ymm5,<a2=%ymm3,>a2=%ymm3 +vmulpd %ymm5,%ymm3,%ymm3 + +# qhasm: 4x a3 approx*= w +# asm 1: vmulpd <w=reg256#6,<a3=reg256#3,>a3=reg256#3 +# asm 2: vmulpd <w=%ymm5,<a3=%ymm2,>a3=%ymm2 +vmulpd %ymm5,%ymm2,%ymm2 + +# qhasm: 4x c = approx a0 * qinv +# asm 1: vmulpd <a0=reg256#5,<qinv=reg256#2,>c=reg256#6 +# asm 2: vmulpd <a0=%ymm4,<qinv=%ymm1,>c=%ymm5 +vmulpd %ymm4,%ymm1,%ymm5 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#6,>c=reg256#6 +# asm 2: vroundpd $9,<c=%ymm5,>c=%ymm5 +vroundpd $9,%ymm5,%ymm5 + +# qhasm: 4x a0 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#6,<q=reg256#1,<a0=reg256#5 +# asm 2: vfnmadd231pd <c=%ymm5,<q=%ymm0,<a0=%ymm4 +vfnmadd231pd %ymm5,%ymm0,%ymm4 + +# qhasm: 4x c = approx a1 * qinv +# asm 1: vmulpd <a1=reg256#7,<qinv=reg256#2,>c=reg256#6 +# asm 2: vmulpd <a1=%ymm6,<qinv=%ymm1,>c=%ymm5 +vmulpd %ymm6,%ymm1,%ymm5 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#6,>c=reg256#6 +# asm 2: vroundpd $9,<c=%ymm5,>c=%ymm5 +vroundpd $9,%ymm5,%ymm5 + +# qhasm: 4x a1 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#6,<q=reg256#1,<a1=reg256#7 +# asm 2: vfnmadd231pd <c=%ymm5,<q=%ymm0,<a1=%ymm6 +vfnmadd231pd %ymm5,%ymm0,%ymm6 + +# qhasm: 4x c = approx a2 * qinv +# asm 1: vmulpd <a2=reg256#4,<qinv=reg256#2,>c=reg256#6 +# asm 2: vmulpd <a2=%ymm3,<qinv=%ymm1,>c=%ymm5 +vmulpd %ymm3,%ymm1,%ymm5 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#6,>c=reg256#6 +# asm 2: vroundpd $9,<c=%ymm5,>c=%ymm5 +vroundpd $9,%ymm5,%ymm5 + +# qhasm: 4x a2 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#6,<q=reg256#1,<a2=reg256#4 +# asm 2: vfnmadd231pd <c=%ymm5,<q=%ymm0,<a2=%ymm3 +vfnmadd231pd %ymm5,%ymm0,%ymm3 + +# qhasm: 4x c = approx a3 * qinv +# asm 1: vmulpd <a3=reg256#3,<qinv=reg256#2,>c=reg256#6 +# asm 2: vmulpd <a3=%ymm2,<qinv=%ymm1,>c=%ymm5 +vmulpd %ymm2,%ymm1,%ymm5 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#6,>c=reg256#6 +# asm 2: vroundpd $9,<c=%ymm5,>c=%ymm5 +vroundpd $9,%ymm5,%ymm5 + +# qhasm: 4x a3 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#6,<q=reg256#1,<a3=reg256#3 +# asm 2: vfnmadd231pd <c=%ymm5,<q=%ymm0,<a3=%ymm2 +vfnmadd231pd %ymm5,%ymm0,%ymm2 + +# qhasm: mem256[tp + 0] = r0 +# asm 1: vmovupd <r0=reg256#10,0(<tp=int64#4) +# asm 2: vmovupd <r0=%ymm9,0(<tp=%rcx) +vmovupd %ymm9,0(%rcx) + +# qhasm: mem256[tp + 128] = r1 +# asm 1: vmovupd <r1=reg256#12,128(<tp=int64#4) +# asm 2: vmovupd <r1=%ymm11,128(<tp=%rcx) +vmovupd %ymm11,128(%rcx) + +# qhasm: mem256[tp + 256] = r2 +# asm 1: vmovupd <r2=reg256#13,256(<tp=int64#4) +# asm 2: vmovupd <r2=%ymm12,256(<tp=%rcx) +vmovupd %ymm12,256(%rcx) + +# qhasm: mem256[tp + 384] = r3 +# asm 1: vmovupd <r3=reg256#14,384(<tp=int64#4) +# asm 2: vmovupd <r3=%ymm13,384(<tp=%rcx) +vmovupd %ymm13,384(%rcx) + +# qhasm: mem256[tp + 512] = a0 +# asm 1: vmovupd <a0=reg256#5,512(<tp=int64#4) +# asm 2: vmovupd <a0=%ymm4,512(<tp=%rcx) +vmovupd %ymm4,512(%rcx) + +# qhasm: mem256[tp + 640] = a1 +# asm 1: vmovupd <a1=reg256#7,640(<tp=int64#4) +# asm 2: vmovupd <a1=%ymm6,640(<tp=%rcx) +vmovupd %ymm6,640(%rcx) + +# qhasm: mem256[tp + 768] = a2 +# asm 1: vmovupd <a2=reg256#4,768(<tp=int64#4) +# asm 2: vmovupd <a2=%ymm3,768(<tp=%rcx) +vmovupd %ymm3,768(%rcx) + +# qhasm: mem256[tp + 896] = a3 +# asm 1: vmovupd <a3=reg256#3,896(<tp=int64#4) +# asm 2: vmovupd <a3=%ymm2,896(<tp=%rcx) +vmovupd %ymm2,896(%rcx) + +# qhasm: tp+=32 +# asm 1: add $32,<tp=int64#4 +# asm 2: add $32,<tp=%rcx +add $32,%rcx + +# qhasm: unsigned>? ctrj-=1 +# asm 1: sub $1,<ctrj=int64#5 +# asm 2: sub $1,<ctrj=%r8 +sub $1,%r8 +# comment:fp stack unchanged by jump + +# qhasm: goto loop567j if unsigned> +ja ._loop567j + +# qhasm: tp+= 896 +# asm 1: add $896,<tp=int64#4 +# asm 2: add $896,<tp=%rcx +add $896,%rcx + +# qhasm: wp+= 56 +# asm 1: add $56,<wp=int64#7 +# asm 2: add $56,<wp=%rax +add $56,%rax + +# qhasm: unsigned>? ctri-=1 +# asm 1: sub $1,<ctri=int64#2 +# asm 2: sub $1,<ctri=%rsi +sub $1,%rsi +# comment:fp stack unchanged by jump + +# qhasm: goto loop567i if unsigned> +ja ._loop567i + +# qhasm: ctrj = 32 +# asm 1: mov $32,>ctrj=int64#2 +# asm 2: mov $32,>ctrj=%rsi +mov $32,%rsi + +# qhasm: tp = input_2 +# asm 1: mov <input_2=int64#3,>tp=int64#3 +# asm 2: mov <input_2=%rdx,>tp=%rdx +mov %rdx,%rdx + +# qhasm: ap = input_0 +# asm 1: mov <input_0=int64#1,>ap=int64#1 +# asm 2: mov <input_0=%rdi,>ap=%rdi +mov %rdi,%rdi + +# qhasm: loop8910j: +._loop8910j: + +# qhasm: a0 = mem256[tp + 0] +# asm 1: vmovupd 0(<tp=int64#3),>a0=reg256#3 +# asm 2: vmovupd 0(<tp=%rdx),>a0=%ymm2 +vmovupd 0(%rdx),%ymm2 + +# qhasm: a1 = mem256[tp + 1024] +# asm 1: vmovupd 1024(<tp=int64#3),>a1=reg256#4 +# asm 2: vmovupd 1024(<tp=%rdx),>a1=%ymm3 +vmovupd 1024(%rdx),%ymm3 + +# qhasm: a2 = mem256[tp + 2048] +# asm 1: vmovupd 2048(<tp=int64#3),>a2=reg256#5 +# asm 2: vmovupd 2048(<tp=%rdx),>a2=%ymm4 +vmovupd 2048(%rdx),%ymm4 + +# qhasm: a3 = mem256[tp + 3072] +# asm 1: vmovupd 3072(<tp=int64#3),>a3=reg256#6 +# asm 2: vmovupd 3072(<tp=%rdx),>a3=%ymm5 +vmovupd 3072(%rdx),%ymm5 + +# qhasm: 4x r0 = approx a0 + a1 +# asm 1: vaddpd <a0=reg256#3,<a1=reg256#4,>r0=reg256#7 +# asm 2: vaddpd <a0=%ymm2,<a1=%ymm3,>r0=%ymm6 +vaddpd %ymm2,%ymm3,%ymm6 + +# qhasm: 4x r2 = approx a2 + a3 +# asm 1: vaddpd <a2=reg256#5,<a3=reg256#6,>r2=reg256#8 +# asm 2: vaddpd <a2=%ymm4,<a3=%ymm5,>r2=%ymm7 +vaddpd %ymm4,%ymm5,%ymm7 + +# qhasm: 4x r1 = approx a0 - a1 +# asm 1: vsubpd <a1=reg256#4,<a0=reg256#3,>r1=reg256#3 +# asm 2: vsubpd <a1=%ymm3,<a0=%ymm2,>r1=%ymm2 +vsubpd %ymm3,%ymm2,%ymm2 + +# qhasm: 4x r3 = approx a2 - a3 +# asm 1: vsubpd <a3=reg256#6,<a2=reg256#5,>r3=reg256#4 +# asm 2: vsubpd <a3=%ymm5,<a2=%ymm4,>r3=%ymm3 +vsubpd %ymm5,%ymm4,%ymm3 + +# qhasm: 4x a0 = approx r0 + r2 +# asm 1: vaddpd <r0=reg256#7,<r2=reg256#8,>a0=reg256#5 +# asm 2: vaddpd <r0=%ymm6,<r2=%ymm7,>a0=%ymm4 +vaddpd %ymm6,%ymm7,%ymm4 + +# qhasm: 4x a2 = approx r0 - r2 +# asm 1: vsubpd <r2=reg256#8,<r0=reg256#7,>a2=reg256#6 +# asm 2: vsubpd <r2=%ymm7,<r0=%ymm6,>a2=%ymm5 +vsubpd %ymm7,%ymm6,%ymm5 + +# qhasm: w = mem64[wp + 0],mem64[wp + 0],mem64[wp + 0],mem64[wp + 0] +# asm 1: vbroadcastsd 0(<wp=int64#7),>w=reg256#7 +# asm 2: vbroadcastsd 0(<wp=%rax),>w=%ymm6 +vbroadcastsd 0(%rax),%ymm6 + +# qhasm: 4x r3 approx*= w +# asm 1: vmulpd <w=reg256#7,<r3=reg256#4,>r3=reg256#4 +# asm 2: vmulpd <w=%ymm6,<r3=%ymm3,>r3=%ymm3 +vmulpd %ymm6,%ymm3,%ymm3 + +# qhasm: 4x a1 = approx r1 + r3 +# asm 1: vaddpd <r1=reg256#3,<r3=reg256#4,>a1=reg256#7 +# asm 2: vaddpd <r1=%ymm2,<r3=%ymm3,>a1=%ymm6 +vaddpd %ymm2,%ymm3,%ymm6 + +# qhasm: 4x a3 = approx r1 - r3 +# asm 1: vsubpd <r3=reg256#4,<r1=reg256#3,>a3=reg256#3 +# asm 2: vsubpd <r3=%ymm3,<r1=%ymm2,>a3=%ymm2 +vsubpd %ymm3,%ymm2,%ymm2 + +# qhasm: t0 = mem256[tp + 4096] +# asm 1: vmovupd 4096(<tp=int64#3),>t0=reg256#4 +# asm 2: vmovupd 4096(<tp=%rdx),>t0=%ymm3 +vmovupd 4096(%rdx),%ymm3 + +# qhasm: t1 = mem256[tp + 5120] +# asm 1: vmovupd 5120(<tp=int64#3),>t1=reg256#8 +# asm 2: vmovupd 5120(<tp=%rdx),>t1=%ymm7 +vmovupd 5120(%rdx),%ymm7 + +# qhasm: t2 = mem256[tp + 6144] +# asm 1: vmovupd 6144(<tp=int64#3),>t2=reg256#9 +# asm 2: vmovupd 6144(<tp=%rdx),>t2=%ymm8 +vmovupd 6144(%rdx),%ymm8 + +# qhasm: t3 = mem256[tp + 7168] +# asm 1: vmovupd 7168(<tp=int64#3),>t3=reg256#10 +# asm 2: vmovupd 7168(<tp=%rdx),>t3=%ymm9 +vmovupd 7168(%rdx),%ymm9 + +# qhasm: 4x r0 = approx t0 + t1 +# asm 1: vaddpd <t0=reg256#4,<t1=reg256#8,>r0=reg256#11 +# asm 2: vaddpd <t0=%ymm3,<t1=%ymm7,>r0=%ymm10 +vaddpd %ymm3,%ymm7,%ymm10 + +# qhasm: 4x r2 = approx t2 + t3 +# asm 1: vaddpd <t2=reg256#9,<t3=reg256#10,>r2=reg256#12 +# asm 2: vaddpd <t2=%ymm8,<t3=%ymm9,>r2=%ymm11 +vaddpd %ymm8,%ymm9,%ymm11 + +# qhasm: 4x r1 = approx t0 - t1 +# asm 1: vsubpd <t1=reg256#8,<t0=reg256#4,>r1=reg256#4 +# asm 2: vsubpd <t1=%ymm7,<t0=%ymm3,>r1=%ymm3 +vsubpd %ymm7,%ymm3,%ymm3 + +# qhasm: 4x r3 = approx t2 - t3 +# asm 1: vsubpd <t3=reg256#10,<t2=reg256#9,>r3=reg256#8 +# asm 2: vsubpd <t3=%ymm9,<t2=%ymm8,>r3=%ymm7 +vsubpd %ymm9,%ymm8,%ymm7 + +# qhasm: 4x t0 = approx r0 + r2 +# asm 1: vaddpd <r0=reg256#11,<r2=reg256#12,>t0=reg256#9 +# asm 2: vaddpd <r0=%ymm10,<r2=%ymm11,>t0=%ymm8 +vaddpd %ymm10,%ymm11,%ymm8 + +# qhasm: 4x t2 = approx r0 - r2 +# asm 1: vsubpd <r2=reg256#12,<r0=reg256#11,>t2=reg256#10 +# asm 2: vsubpd <r2=%ymm11,<r0=%ymm10,>t2=%ymm9 +vsubpd %ymm11,%ymm10,%ymm9 + +# qhasm: w = mem64[wp + 8],mem64[wp + 8],mem64[wp + 8],mem64[wp + 8] +# asm 1: vbroadcastsd 8(<wp=int64#7),>w=reg256#11 +# asm 2: vbroadcastsd 8(<wp=%rax),>w=%ymm10 +vbroadcastsd 8(%rax),%ymm10 + +# qhasm: 4x r1 approx*= w +# asm 1: vmulpd <w=reg256#11,<r1=reg256#4,>r1=reg256#4 +# asm 2: vmulpd <w=%ymm10,<r1=%ymm3,>r1=%ymm3 +vmulpd %ymm10,%ymm3,%ymm3 + +# qhasm: 4x c = approx r1 * qinv +# asm 1: vmulpd <r1=reg256#4,<qinv=reg256#2,>c=reg256#11 +# asm 2: vmulpd <r1=%ymm3,<qinv=%ymm1,>c=%ymm10 +vmulpd %ymm3,%ymm1,%ymm10 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#11,>c=reg256#11 +# asm 2: vroundpd $9,<c=%ymm10,>c=%ymm10 +vroundpd $9,%ymm10,%ymm10 + +# qhasm: 4x r1 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#11,<q=reg256#1,<r1=reg256#4 +# asm 2: vfnmadd231pd <c=%ymm10,<q=%ymm0,<r1=%ymm3 +vfnmadd231pd %ymm10,%ymm0,%ymm3 + +# qhasm: w = mem64[wp + 16],mem64[wp + 16],mem64[wp + 16],mem64[wp + 16] +# asm 1: vbroadcastsd 16(<wp=int64#7),>w=reg256#11 +# asm 2: vbroadcastsd 16(<wp=%rax),>w=%ymm10 +vbroadcastsd 16(%rax),%ymm10 + +# qhasm: 4x r3 approx*= w +# asm 1: vmulpd <w=reg256#11,<r3=reg256#8,>r3=reg256#8 +# asm 2: vmulpd <w=%ymm10,<r3=%ymm7,>r3=%ymm7 +vmulpd %ymm10,%ymm7,%ymm7 + +# qhasm: 4x c = approx r3 * qinv +# asm 1: vmulpd <r3=reg256#8,<qinv=reg256#2,>c=reg256#11 +# asm 2: vmulpd <r3=%ymm7,<qinv=%ymm1,>c=%ymm10 +vmulpd %ymm7,%ymm1,%ymm10 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#11,>c=reg256#11 +# asm 2: vroundpd $9,<c=%ymm10,>c=%ymm10 +vroundpd $9,%ymm10,%ymm10 + +# qhasm: 4x r3 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#11,<q=reg256#1,<r3=reg256#8 +# asm 2: vfnmadd231pd <c=%ymm10,<q=%ymm0,<r3=%ymm7 +vfnmadd231pd %ymm10,%ymm0,%ymm7 + +# qhasm: 4x t1 = approx r1 + r3 +# asm 1: vaddpd <r1=reg256#4,<r3=reg256#8,>t1=reg256#11 +# asm 2: vaddpd <r1=%ymm3,<r3=%ymm7,>t1=%ymm10 +vaddpd %ymm3,%ymm7,%ymm10 + +# qhasm: w = mem64[wp + 24],mem64[wp + 24],mem64[wp + 24],mem64[wp + 24] +# asm 1: vbroadcastsd 24(<wp=int64#7),>w=reg256#12 +# asm 2: vbroadcastsd 24(<wp=%rax),>w=%ymm11 +vbroadcastsd 24(%rax),%ymm11 + +# qhasm: 4x t3 = approx r1 - r3 +# asm 1: vsubpd <r3=reg256#8,<r1=reg256#4,>t3=reg256#4 +# asm 2: vsubpd <r3=%ymm7,<r1=%ymm3,>t3=%ymm3 +vsubpd %ymm7,%ymm3,%ymm3 + +# qhasm: 4x t3 approx*= w +# asm 1: vmulpd <w=reg256#12,<t3=reg256#4,>t3=reg256#4 +# asm 2: vmulpd <w=%ymm11,<t3=%ymm3,>t3=%ymm3 +vmulpd %ymm11,%ymm3,%ymm3 + +# qhasm: 4x t2 approx*= w +# asm 1: vmulpd <w=reg256#12,<t2=reg256#10,>t2=reg256#8 +# asm 2: vmulpd <w=%ymm11,<t2=%ymm9,>t2=%ymm7 +vmulpd %ymm11,%ymm9,%ymm7 + +# qhasm: 4x r0 = approx a0 + t0 +# asm 1: vaddpd <a0=reg256#5,<t0=reg256#9,>r0=reg256#10 +# asm 2: vaddpd <a0=%ymm4,<t0=%ymm8,>r0=%ymm9 +vaddpd %ymm4,%ymm8,%ymm9 + +# qhasm: 4x r1 = approx a1 + t1 +# asm 1: vaddpd <a1=reg256#7,<t1=reg256#11,>r1=reg256#12 +# asm 2: vaddpd <a1=%ymm6,<t1=%ymm10,>r1=%ymm11 +vaddpd %ymm6,%ymm10,%ymm11 + +# qhasm: 4x r2 = approx a2 + t2 +# asm 1: vaddpd <a2=reg256#6,<t2=reg256#8,>r2=reg256#13 +# asm 2: vaddpd <a2=%ymm5,<t2=%ymm7,>r2=%ymm12 +vaddpd %ymm5,%ymm7,%ymm12 + +# qhasm: 4x r3 = approx a3 + t3 +# asm 1: vaddpd <a3=reg256#3,<t3=reg256#4,>r3=reg256#14 +# asm 2: vaddpd <a3=%ymm2,<t3=%ymm3,>r3=%ymm13 +vaddpd %ymm2,%ymm3,%ymm13 + +# qhasm: 4x a0 = approx a0 - t0 +# asm 1: vsubpd <t0=reg256#9,<a0=reg256#5,>a0=reg256#5 +# asm 2: vsubpd <t0=%ymm8,<a0=%ymm4,>a0=%ymm4 +vsubpd %ymm8,%ymm4,%ymm4 + +# qhasm: 4x a1 = approx a1 - t1 +# asm 1: vsubpd <t1=reg256#11,<a1=reg256#7,>a1=reg256#7 +# asm 2: vsubpd <t1=%ymm10,<a1=%ymm6,>a1=%ymm6 +vsubpd %ymm10,%ymm6,%ymm6 + +# qhasm: 4x a2 = approx a2 - t2 +# asm 1: vsubpd <t2=reg256#8,<a2=reg256#6,>a2=reg256#6 +# asm 2: vsubpd <t2=%ymm7,<a2=%ymm5,>a2=%ymm5 +vsubpd %ymm7,%ymm5,%ymm5 + +# qhasm: 4x a3 = approx a3 - t3 +# asm 1: vsubpd <t3=reg256#4,<a3=reg256#3,>a3=reg256#3 +# asm 2: vsubpd <t3=%ymm3,<a3=%ymm2,>a3=%ymm2 +vsubpd %ymm3,%ymm2,%ymm2 + +# qhasm: 4x c = approx r0 * qinv +# asm 1: vmulpd <r0=reg256#10,<qinv=reg256#2,>c=reg256#4 +# asm 2: vmulpd <r0=%ymm9,<qinv=%ymm1,>c=%ymm3 +vmulpd %ymm9,%ymm1,%ymm3 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4 +# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3 +vroundpd $9,%ymm3,%ymm3 + +# qhasm: 4x r0 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<r0=reg256#10 +# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<r0=%ymm9 +vfnmadd231pd %ymm3,%ymm0,%ymm9 + +# qhasm: 4x c = approx r1 * qinv +# asm 1: vmulpd <r1=reg256#12,<qinv=reg256#2,>c=reg256#4 +# asm 2: vmulpd <r1=%ymm11,<qinv=%ymm1,>c=%ymm3 +vmulpd %ymm11,%ymm1,%ymm3 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4 +# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3 +vroundpd $9,%ymm3,%ymm3 + +# qhasm: 4x r1 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<r1=reg256#12 +# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<r1=%ymm11 +vfnmadd231pd %ymm3,%ymm0,%ymm11 + +# qhasm: 4x c = approx r2 * qinv +# asm 1: vmulpd <r2=reg256#13,<qinv=reg256#2,>c=reg256#4 +# asm 2: vmulpd <r2=%ymm12,<qinv=%ymm1,>c=%ymm3 +vmulpd %ymm12,%ymm1,%ymm3 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4 +# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3 +vroundpd $9,%ymm3,%ymm3 + +# qhasm: 4x r2 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<r2=reg256#13 +# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<r2=%ymm12 +vfnmadd231pd %ymm3,%ymm0,%ymm12 + +# qhasm: 4x c = approx r3 * qinv +# asm 1: vmulpd <r3=reg256#14,<qinv=reg256#2,>c=reg256#4 +# asm 2: vmulpd <r3=%ymm13,<qinv=%ymm1,>c=%ymm3 +vmulpd %ymm13,%ymm1,%ymm3 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4 +# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3 +vroundpd $9,%ymm3,%ymm3 + +# qhasm: 4x r3 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<r3=reg256#14 +# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<r3=%ymm13 +vfnmadd231pd %ymm3,%ymm0,%ymm13 + +# qhasm: 4x c = approx a0 * qinv +# asm 1: vmulpd <a0=reg256#5,<qinv=reg256#2,>c=reg256#4 +# asm 2: vmulpd <a0=%ymm4,<qinv=%ymm1,>c=%ymm3 +vmulpd %ymm4,%ymm1,%ymm3 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4 +# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3 +vroundpd $9,%ymm3,%ymm3 + +# qhasm: 4x a0 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<a0=reg256#5 +# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<a0=%ymm4 +vfnmadd231pd %ymm3,%ymm0,%ymm4 + +# qhasm: 4x c = approx a1 * qinv +# asm 1: vmulpd <a1=reg256#7,<qinv=reg256#2,>c=reg256#4 +# asm 2: vmulpd <a1=%ymm6,<qinv=%ymm1,>c=%ymm3 +vmulpd %ymm6,%ymm1,%ymm3 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4 +# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3 +vroundpd $9,%ymm3,%ymm3 + +# qhasm: 4x a1 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<a1=reg256#7 +# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<a1=%ymm6 +vfnmadd231pd %ymm3,%ymm0,%ymm6 + +# qhasm: 4x c = approx a2 * qinv +# asm 1: vmulpd <a2=reg256#6,<qinv=reg256#2,>c=reg256#4 +# asm 2: vmulpd <a2=%ymm5,<qinv=%ymm1,>c=%ymm3 +vmulpd %ymm5,%ymm1,%ymm3 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4 +# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3 +vroundpd $9,%ymm3,%ymm3 + +# qhasm: 4x a2 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<a2=reg256#6 +# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<a2=%ymm5 +vfnmadd231pd %ymm3,%ymm0,%ymm5 + +# qhasm: 4x c = approx a3 * qinv +# asm 1: vmulpd <a3=reg256#3,<qinv=reg256#2,>c=reg256#4 +# asm 2: vmulpd <a3=%ymm2,<qinv=%ymm1,>c=%ymm3 +vmulpd %ymm2,%ymm1,%ymm3 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4 +# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3 +vroundpd $9,%ymm3,%ymm3 + +# qhasm: 4x a3 approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<a3=reg256#3 +# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<a3=%ymm2 +vfnmadd231pd %ymm3,%ymm0,%ymm2 + +# qhasm: t0 = (4x int32)(4x double)r0,0,0,0,0 +# asm 1: vcvtpd2dq <r0=reg256#10,>t0=reg256#4dq +# asm 2: vcvtpd2dq <r0=%ymm9,>t0=%xmm3 +vcvtpd2dq %ymm9,%xmm3 + +# qhasm: t1 = (4x int32)(4x double)r1,0,0,0,0 +# asm 1: vcvtpd2dq <r1=reg256#12,>t1=reg256#8dq +# asm 2: vcvtpd2dq <r1=%ymm11,>t1=%xmm7 +vcvtpd2dq %ymm11,%xmm7 + +# qhasm: t2 = (4x int32)(4x double)r2,0,0,0,0 +# asm 1: vcvtpd2dq <r2=reg256#13,>t2=reg256#9dq +# asm 2: vcvtpd2dq <r2=%ymm12,>t2=%xmm8 +vcvtpd2dq %ymm12,%xmm8 + +# qhasm: t3 = (4x int32)(4x double)r3,0,0,0,0 +# asm 1: vcvtpd2dq <r3=reg256#14,>t3=reg256#10dq +# asm 2: vcvtpd2dq <r3=%ymm13,>t3=%xmm9 +vcvtpd2dq %ymm13,%xmm9 + +# qhasm: mem128[ap + 0] = t0 +# asm 1: vmovupd <t0=reg256#4dq,0(<ap=int64#1) +# asm 2: vmovupd <t0=%xmm3,0(<ap=%rdi) +vmovupd %xmm3,0(%rdi) + +# qhasm: mem128[ap + 512] = t1 +# asm 1: vmovupd <t1=reg256#8dq,512(<ap=int64#1) +# asm 2: vmovupd <t1=%xmm7,512(<ap=%rdi) +vmovupd %xmm7,512(%rdi) + +# qhasm: mem128[ap + 1024] = t2 +# asm 1: vmovupd <t2=reg256#9dq,1024(<ap=int64#1) +# asm 2: vmovupd <t2=%xmm8,1024(<ap=%rdi) +vmovupd %xmm8,1024(%rdi) + +# qhasm: mem128[ap + 1536] = t3 +# asm 1: vmovupd <t3=reg256#10dq,1536(<ap=int64#1) +# asm 2: vmovupd <t3=%xmm9,1536(<ap=%rdi) +vmovupd %xmm9,1536(%rdi) + +# qhasm: t0 = (4x int32)(4x double)a0,0,0,0,0 +# asm 1: vcvtpd2dq <a0=reg256#5,>t0=reg256#4dq +# asm 2: vcvtpd2dq <a0=%ymm4,>t0=%xmm3 +vcvtpd2dq %ymm4,%xmm3 + +# qhasm: t1 = (4x int32)(4x double)a1,0,0,0,0 +# asm 1: vcvtpd2dq <a1=reg256#7,>t1=reg256#5dq +# asm 2: vcvtpd2dq <a1=%ymm6,>t1=%xmm4 +vcvtpd2dq %ymm6,%xmm4 + +# qhasm: t2 = (4x int32)(4x double)a2,0,0,0,0 +# asm 1: vcvtpd2dq <a2=reg256#6,>t2=reg256#6dq +# asm 2: vcvtpd2dq <a2=%ymm5,>t2=%xmm5 +vcvtpd2dq %ymm5,%xmm5 + +# qhasm: t3 = (4x int32)(4x double)a3,0,0,0,0 +# asm 1: vcvtpd2dq <a3=reg256#3,>t3=reg256#3dq +# asm 2: vcvtpd2dq <a3=%ymm2,>t3=%xmm2 +vcvtpd2dq %ymm2,%xmm2 + +# qhasm: mem128[ap + 2048] = t0 +# asm 1: vmovupd <t0=reg256#4dq,2048(<ap=int64#1) +# asm 2: vmovupd <t0=%xmm3,2048(<ap=%rdi) +vmovupd %xmm3,2048(%rdi) + +# qhasm: mem128[ap + 2560] = t1 +# asm 1: vmovupd <t1=reg256#5dq,2560(<ap=int64#1) +# asm 2: vmovupd <t1=%xmm4,2560(<ap=%rdi) +vmovupd %xmm4,2560(%rdi) + +# qhasm: mem128[ap + 3072] = t2 +# asm 1: vmovupd <t2=reg256#6dq,3072(<ap=int64#1) +# asm 2: vmovupd <t2=%xmm5,3072(<ap=%rdi) +vmovupd %xmm5,3072(%rdi) + +# qhasm: mem128[ap + 3584] = t3 +# asm 1: vmovupd <t3=reg256#3dq,3584(<ap=int64#1) +# asm 2: vmovupd <t3=%xmm2,3584(<ap=%rdi) +vmovupd %xmm2,3584(%rdi) + +# qhasm: ap+=16 +# asm 1: add $16,<ap=int64#1 +# asm 2: add $16,<ap=%rdi +add $16,%rdi + +# qhasm: tp+=32 +# asm 1: add $32,<tp=int64#3 +# asm 2: add $32,<tp=%rdx +add $32,%rdx + +# qhasm: unsigned>? ctrj-=1 +# asm 1: sub $1,<ctrj=int64#2 +# asm 2: sub $1,<ctrj=%rsi +sub $1,%rsi +# comment:fp stack unchanged by jump + +# qhasm: goto loop8910j if unsigned> +ja ._loop8910j + +# qhasm: return +add %r11,%rsp +ret diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/omegas.c b/crypt/liboqs/kex_rlwe_newhope/avx2/omegas.c new file mode 100644 index 0000000000000000000000000000000000000000..8f9733158ae3b91bc48ce68f568d90dbee8a7696 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/omegas.c @@ -0,0 +1,463 @@ +double omegas_double[2300] = { + 1.0,1.0,1.0,10810.0,1.0,7143.0,1.0,4043.0,1.0,10984.0, + 1.0,722.0,1.0,5736.0,1.0,8155.0,1.0,3542.0,1.0,8785.0, + 1.0,9744.0,1.0,3621.0,1.0,10643.0,1.0,1212.0,1.0,3195.0, + 1.0,5860.0,1.0,7468.0,1.0,2639.0,1.0,9664.0,1.0,11340.0, + 1.0,11726.0,1.0,9314.0,1.0,9283.0,1.0,9545.0,1.0,5728.0, + 1.0,7698.0,1.0,5023.0,1.0,5828.0,1.0,8961.0,1.0,6512.0, + 1.0,7311.0,1.0,1351.0,1.0,2319.0,1.0,11119.0,1.0,11334.0, + 1.0,11499.0,1.0,9088.0,1.0,3014.0,1.0,5086.0,1.0,10963.0, + 1.0,4846.0,1.0,9542.0,1.0,9154.0,1.0,3712.0,1.0,4805.0, + 1.0,8736.0,1.0,11227.0,1.0,9995.0,1.0,3091.0,1.0,12208.0, + 1.0,7969.0,1.0,11289.0,1.0,9326.0,1.0,7393.0,1.0,9238.0, + 1.0,2366.0,1.0,11112.0,1.0,8034.0,1.0,10654.0,1.0,9521.0, + 1.0,12149.0,1.0,10436.0,1.0,7678.0,1.0,11563.0,1.0,1260.0, + 1.0,4388.0,1.0,4632.0,1.0,6534.0,1.0,2426.0,1.0,334.0, + 1.0,1428.0,1.0,1696.0,1.0,2013.0,1.0,9000.0,1.0,729.0, + 1.0,3241.0,1.0,2881.0,1.0,3284.0,1.0,7197.0,1.0,10200.0, + 1.0,8595.0,1.0,7110.0,1.0,10530.0,1.0,8582.0,1.0,3382.0, + 1.0,11934.0,1.0,9741.0,1.0,8058.0,1.0,3637.0,1.0,3459.0, + 1.0,145.0,1.0,6747.0,1.0,9558.0,1.0,8357.0,1.0,7399.0, + 1.0,6378.0,1.0,9447.0,1.0,480.0,1.0,1022.0,1.0,9.0, + 1.0,9821.0,1.0,339.0,1.0,5791.0,1.0,544.0,1.0,10616.0, + 1.0,4278.0,1.0,6958.0,1.0,7300.0,1.0,8112.0,1.0,8705.0, + 1.0,1381.0,1.0,9764.0,1.0,11336.0,1.0,8541.0,1.0,827.0, + 1.0,5767.0,1.0,2476.0,1.0,118.0,1.0,2197.0,1.0,7222.0, + 1.0,3949.0,1.0,8993.0,1.0,4452.0,1.0,2396.0,1.0,7935.0, + 1.0,130.0,1.0,2837.0,1.0,6915.0,1.0,2401.0,1.0,442.0, + 1.0,7188.0,1.0,11222.0,1.0,390.0,1.0,773.0,1.0,8456.0, + 1.0,3778.0,1.0,354.0,1.0,4861.0,1.0,9377.0,1.0,5698.0, + 1.0,5012.0,1.0,9808.0,1.0,2859.0,1.0,11244.0,1.0,1017.0, + 1.0,7404.0,1.0,1632.0,1.0,7205.0,1.0,27.0,1.0,9223.0, + 1.0,8526.0,1.0,10849.0,1.0,1537.0,1.0,242.0,1.0,4714.0, + 1.0,8146.0,1.0,9611.0,1.0,3704.0,1.0,5019.0,1.0,11744.0, + 1.0,1002.0,1.0,5011.0,1.0,5088.0,1.0,8005.0,1.0,7313.0, + 1.0,10682.0,1.0,8509.0,1.0,11414.0,1.0,9852.0,1.0,3646.0, + 1.0,6022.0,1.0,2987.0,1.0,9723.0,1.0,10102.0,1.0,6250.0, + 1.0,9867.0,1.0,11224.0,1.0,2143.0,1.0,11885.0,1.0,7644.0, + 1.0,1168.0,1.0,5277.0,1.0,11082.0,1.0,3248.0,1.0,493.0, + 1.0,8193.0,1.0,6845.0,1.0,2381.0,1.0,7952.0,1.0,11854.0, + 1.0,1378.0,1.0,1912.0,1.0,2166.0,1.0,3915.0,1.0,12176.0, + 1.0,7370.0,1.0,12129.0,1.0,3149.0,1.0,12286.0,1.0,4437.0, + 1.0,3636.0,1.0,4938.0,1.0,5291.0,1.0,2704.0,1.0,10863.0, + 1.0,7635.0,1.0,1663.0,1.0,10512.0,1.0,3364.0,1.0,1689.0, + 1.0,4057.0,1.0,9018.0,1.0,9442.0,1.0,7875.0,1.0,2174.0, + 1.0,4372.0,1.0,7247.0,1.0,9984.0,1.0,4053.0,1.0,2645.0, + 1.0,5195.0,1.0,9509.0,1.0,7394.0,1.0,1484.0,1.0,9042.0, + 1.0,9603.0,1.0,8311.0,1.0,9320.0,1.0,9919.0,1.0,2865.0, + 1.0,5332.0,1.0,3510.0,1.0,1630.0,1.0,10163.0,1.0,5407.0, + 1.0,3186.0,1.0,11136.0,1.0,9405.0,1.0,10040.0,1.0,8241.0, + 1.0,9890.0,1.0,8889.0,1.0,7098.0,1.0,9153.0,1.0,9289.0, + 1.0,671.0,1.0,3016.0,1.0,243.0,1.0,6730.0,1.0,420.0, + 1.0,10111.0,1.0,1544.0,1.0,3985.0,1.0,4905.0,1.0,3531.0, + 1.0,476.0,1.0,49.0,1.0,1263.0,1.0,5915.0,1.0,1483.0, + 1.0,9789.0,1.0,10800.0,1.0,10706.0,1.0,6347.0,1.0,1512.0, + 1.0,350.0,1.0,10474.0,1.0,5383.0,1.0,5369.0,1.0,10232.0, + 1.0,9087.0,1.0,4493.0,1.0,9551.0,1.0,6421.0,1.0,6554.0, + 1.0,2655.0,1.0,9280.0,1.0,1693.0,1.0,174.0,1.0,723.0, + 1.0,10314.0,1.0,8532.0,1.0,347.0,1.0,2925.0,1.0,8974.0, + 1.0,11863.0,1.0,1858.0,1.0,4754.0,1.0,3030.0,1.0,4115.0, + 1.0,2361.0,1.0,10446.0,1.0,2908.0,1.0,218.0,1.0,3434.0, + 1.0,8760.0,1.0,3963.0,1.0,576.0,1.0,6142.0,1.0,9842.0, + 1.0,1954.0,1.0,10238.0,1.0,9407.0,1.0,10484.0,1.0,3991.0, + 1.0,8320.0,1.0,9522.0,1.0,156.0,1.0,2281.0,1.0,5876.0, + 1.0,10258.0,1.0,5333.0,1.0,3772.0,1.0,418.0,1.0,5908.0, + 1.0,11836.0,1.0,5429.0,1.0,7515.0,1.0,7552.0,1.0,1293.0, + 1.0,295.0,1.0,6099.0,1.0,5766.0,1.0,652.0,1.0,8273.0, + 1.0,4077.0,1.0,8527.0,1.0,9370.0,1.0,325.0,1.0,10885.0, + 1.0,11143.0,1.0,11341.0,1.0,5990.0,1.0,1159.0,1.0,8561.0, + 1.0,8240.0,1.0,3329.0,1.0,4298.0,1.0,12121.0,1.0,2692.0, + 1.0,5961.0,1.0,7183.0,1.0,10327.0,1.0,1594.0,1.0,6167.0, + 1.0,9734.0,1.0,7105.0,1.0,11089.0,1.0,1360.0,1.0,3956.0, + 1.0,6170.0,1.0,5297.0,1.0,8210.0,1.0,11231.0,1.0,922.0, + 1.0,441.0,1.0,1958.0,1.0,4322.0,1.0,1112.0,1.0,2078.0, + 1.0,4046.0,1.0,709.0,1.0,9139.0,1.0,1319.0,1.0,4240.0, + 1.0,8719.0,1.0,6224.0,1.0,11454.0,1.0,2459.0,1.0,683.0, + 1.0,3656.0,1.0,12225.0,1.0,10723.0,1.0,5782.0,1.0,9341.0, + 1.0,9786.0,1.0,9166.0,1.0,10542.0,1.0,9235.0,1.0,6803.0, + 1.0,7856.0,1.0,6370.0,1.0,3834.0,1.0,7032.0,1.0,7048.0, + 1.0,9369.0,1.0,8120.0,1.0,9162.0,1.0,6821.0,1.0,1010.0, + 1.0,8807.0,1.0,787.0,1.0,5057.0,1.0,4698.0,1.0,4780.0, + 1.0,8844.0,1.0,12097.0,1.0,1321.0,1.0,4912.0,1.0,10240.0, + 1.0,677.0,1.0,6415.0,1.0,6234.0,1.0,8953.0,1.0,1323.0, + 1.0,9523.0,1.0,12237.0,1.0,3174.0,1.0,1579.0,1.0,11858.0, + 1.0,9784.0,1.0,5906.0,1.0,3957.0,1.0,9450.0,1.0,151.0, + 1.0,10162.0,1.0,12231.0,1.0,12048.0,1.0,3532.0,1.0,11286.0, + 1.0,1956.0,1.0,7280.0,1.0,11404.0,1.0,6281.0,1.0,3477.0, + 1.0,6608.0,1.0,142.0,1.0,11184.0,1.0,9445.0,1.0,3438.0, + 1.0,11314.0,1.0,4212.0,1.0,9260.0,1.0,6695.0,1.0,4782.0, + 1.0,5886.0,1.0,8076.0,1.0,504.0,1.0,2302.0,1.0,11684.0, + 1.0,11868.0,1.0,8209.0,1.0,3602.0,1.0,6068.0,1.0,8689.0, + 1.0,3263.0,1.0,6077.0,1.0,7665.0,1.0,7822.0,1.0,7500.0, + 1.0,6752.0,1.0,4749.0,1.0,4449.0,1.0,6833.0,1.0,12142.0, + 1.0,8500.0,1.0,6118.0,1.0,8471.0,1.0,1190.0,1.0,9606.0, + 1.0,3860.0,1.0,5445.0,1.0,7753.0,1.0,11239.0,1.0,5079.0, + 1.0,9027.0,1.0,2169.0,1.0,11767.0,1.0,7965.0,1.0,4916.0, + 1.0,8214.0,1.0,5315.0,1.0,11011.0,1.0,9945.0,1.0,1973.0, + 1.0,6715.0,1.0,8775.0,1.0,11248.0,1.0,5925.0,1.0,11271.0, + 1.0,654.0,1.0,3565.0,1.0,1702.0,1.0,1987.0,1.0,6760.0, + 1.0,5206.0,1.0,3199.0,1.0,12233.0,1.0,6136.0,1.0,6427.0, + 1.0,6874.0,1.0,8646.0,1.0,4948.0,1.0,6152.0,1.0,400.0, + 1.0,10561.0,1.0,5339.0,1.0,5446.0,1.0,3710.0,1.0,6093.0, + 1.0,468.0,1.0,8301.0,1.0,316.0,1.0,11907.0,1.0,10256.0, + 1.0,8291.0,1.0,3879.0,1.0,1922.0,1.0,10930.0,1.0,6854.0, + 1.0,973.0,1.0,11035.0,1.0,1.0,1.0,1.0,1.0,1.0, + 10810.0,10810.0,1.0,1.0,7143.0,7143.0,1.0,1.0,4043.0,4043.0, + 1.0,10810.0,1.0,1.0,1.0,10984.0,10984.0,1.0,1.0,722.0, + 722.0,1.0,1.0,5736.0,5736.0,1.0,1.0,8155.0,8155.0,7143.0, + 4043.0,10810.0,1.0,1.0,3542.0,3542.0,1.0,1.0,8785.0,8785.0, + 1.0,1.0,9744.0,9744.0,1.0,1.0,3621.0,3621.0,10984.0,722.0, + 7143.0,1.0,1.0,10643.0,10643.0,1.0,1.0,1212.0,1212.0,1.0, + 1.0,3195.0,3195.0,1.0,1.0,5860.0,5860.0,5736.0,8155.0,4043.0, + 1.0,1.0,7468.0,7468.0,1.0,1.0,2639.0,2639.0,1.0,1.0, + 9664.0,9664.0,1.0,1.0,11340.0,11340.0,3542.0,8785.0,10984.0,1.0, + 1.0,11726.0,11726.0,1.0,1.0,9314.0,9314.0,1.0,1.0,9283.0, + 9283.0,1.0,1.0,9545.0,9545.0,9744.0,3621.0,722.0,1.0,1.0, + 5728.0,5728.0,1.0,1.0,7698.0,7698.0,1.0,1.0,5023.0,5023.0, + 1.0,1.0,5828.0,5828.0,10643.0,1212.0,5736.0,1.0,1.0,8961.0, + 8961.0,1.0,1.0,6512.0,6512.0,1.0,1.0,7311.0,7311.0,1.0, + 1.0,1351.0,1351.0,3195.0,5860.0,8155.0,1.0,1.0,2319.0,2319.0, + 1.0,1.0,11119.0,11119.0,1.0,1.0,11334.0,11334.0,1.0,1.0, + 11499.0,11499.0,7468.0,2639.0,3542.0,1.0,1.0,9088.0,9088.0,1.0, + 1.0,3014.0,3014.0,1.0,1.0,5086.0,5086.0,1.0,1.0,10963.0, + 10963.0,9664.0,11340.0,8785.0,1.0,1.0,4846.0,4846.0,1.0,1.0, + 9542.0,9542.0,1.0,1.0,9154.0,9154.0,1.0,1.0,3712.0,3712.0, + 11726.0,9314.0,9744.0,1.0,1.0,4805.0,4805.0,1.0,1.0,8736.0, + 8736.0,1.0,1.0,11227.0,11227.0,1.0,1.0,9995.0,9995.0,9283.0, + 9545.0,3621.0,1.0,1.0,3091.0,3091.0,1.0,1.0,12208.0,12208.0, + 1.0,1.0,7969.0,7969.0,1.0,1.0,11289.0,11289.0,5728.0,7698.0, + 10643.0,1.0,1.0,9326.0,9326.0,1.0,1.0,7393.0,7393.0,1.0, + 1.0,9238.0,9238.0,1.0,1.0,2366.0,2366.0,5023.0,5828.0,1212.0, + 1.0,1.0,11112.0,11112.0,1.0,1.0,8034.0,8034.0,1.0,1.0, + 10654.0,10654.0,1.0,1.0,9521.0,9521.0,8961.0,6512.0,3195.0,1.0, + 1.0,12149.0,12149.0,1.0,1.0,10436.0,10436.0,1.0,1.0,7678.0, + 7678.0,1.0,1.0,11563.0,11563.0,7311.0,1351.0,5860.0,1.0,1.0, + 1260.0,1260.0,1.0,1.0,4388.0,4388.0,1.0,1.0,4632.0,4632.0, + 1.0,1.0,6534.0,6534.0,2319.0,11119.0,7468.0,1.0,1.0,2426.0, + 2426.0,1.0,1.0,334.0,334.0,1.0,1.0,1428.0,1428.0,1.0, + 1.0,1696.0,1696.0,11334.0,11499.0,2639.0,1.0,1.0,2013.0,2013.0, + 1.0,1.0,9000.0,9000.0,1.0,1.0,729.0,729.0,1.0,1.0, + 3241.0,3241.0,9088.0,3014.0,9664.0,1.0,1.0,2881.0,2881.0,1.0, + 1.0,3284.0,3284.0,1.0,1.0,7197.0,7197.0,1.0,1.0,10200.0, + 10200.0,5086.0,10963.0,11340.0,1.0,1.0,8595.0,8595.0,1.0,1.0, + 7110.0,7110.0,1.0,1.0,10530.0,10530.0,1.0,1.0,8582.0,8582.0, + 4846.0,9542.0,11726.0,1.0,1.0,3382.0,3382.0,1.0,1.0,11934.0, + 11934.0,1.0,1.0,9741.0,9741.0,1.0,1.0,8058.0,8058.0,9154.0, + 3712.0,9314.0,1.0,1.0,3637.0,3637.0,1.0,1.0,3459.0,3459.0, + 1.0,1.0,145.0,145.0,1.0,1.0,6747.0,6747.0,4805.0,8736.0, + 9283.0,1.0,1.0,9558.0,9558.0,1.0,1.0,8357.0,8357.0,1.0, + 1.0,7399.0,7399.0,1.0,1.0,6378.0,6378.0,11227.0,9995.0,9545.0, + 1.0,1.0,9447.0,9447.0,1.0,1.0,480.0,480.0,1.0,1.0, + 1022.0,1022.0,1.0,1.0,9.0,9.0,3091.0,12208.0,5728.0,1.0, + 1.0,9821.0,9821.0,1.0,1.0,339.0,339.0,1.0,1.0,5791.0, + 5791.0,1.0,1.0,544.0,544.0,7969.0,11289.0,7698.0,1.0,1.0, + 10616.0,10616.0,1.0,1.0,4278.0,4278.0,1.0,1.0,6958.0,6958.0, + 1.0,1.0,7300.0,7300.0,9326.0,7393.0,5023.0,1.0,1.0,8112.0, + 8112.0,1.0,1.0,8705.0,8705.0,1.0,1.0,1381.0,1381.0,1.0, + 1.0,9764.0,9764.0,9238.0,2366.0,5828.0,1.0,1.0,11336.0,11336.0, + 1.0,1.0,8541.0,8541.0,1.0,1.0,827.0,827.0,1.0,1.0, + 5767.0,5767.0,11112.0,8034.0,8961.0,1.0,1.0,2476.0,2476.0,1.0, + 1.0,118.0,118.0,1.0,1.0,2197.0,2197.0,1.0,1.0,7222.0, + 7222.0,10654.0,9521.0,6512.0,1.0,1.0,3949.0,3949.0,1.0,1.0, + 8993.0,8993.0,1.0,1.0,4452.0,4452.0,1.0,1.0,2396.0,2396.0, + 12149.0,10436.0,7311.0,1.0,1.0,7935.0,7935.0,1.0,1.0,130.0, + 130.0,1.0,1.0,2837.0,2837.0,1.0,1.0,6915.0,6915.0,7678.0, + 11563.0,1351.0,1.0,1.0,2401.0,2401.0,1.0,1.0,442.0,442.0, + 1.0,1.0,7188.0,7188.0,1.0,1.0,11222.0,11222.0,1260.0,4388.0, + 2319.0,1.0,1.0,390.0,390.0,1.0,1.0,773.0,773.0,1.0, + 1.0,8456.0,8456.0,1.0,1.0,3778.0,3778.0,4632.0,6534.0,11119.0, + 1.0,1.0,354.0,354.0,1.0,1.0,4861.0,4861.0,1.0,1.0, + 9377.0,9377.0,1.0,1.0,5698.0,5698.0,2426.0,334.0,11334.0,1.0, + 1.0,5012.0,5012.0,1.0,1.0,9808.0,9808.0,1.0,1.0,2859.0, + 2859.0,1.0,1.0,11244.0,11244.0,1428.0,1696.0,11499.0,1.0,1.0, + 1017.0,1017.0,1.0,1.0,7404.0,7404.0,1.0,1.0,1632.0,1632.0, + 1.0,1.0,7205.0,7205.0,2013.0,9000.0,9088.0,1.0,1.0,27.0, + 27.0,1.0,1.0,9223.0,9223.0,1.0,1.0,8526.0,8526.0,1.0, + 1.0,10849.0,10849.0,729.0,3241.0,3014.0,1.0,1.0,1537.0,1537.0, + 1.0,1.0,242.0,242.0,1.0,1.0,4714.0,4714.0,1.0,1.0, + 8146.0,8146.0,2881.0,3284.0,5086.0,1.0,1.0,9611.0,9611.0,1.0, + 1.0,3704.0,3704.0,1.0,1.0,5019.0,5019.0,1.0,1.0,11744.0, + 11744.0,7197.0,10200.0,10963.0,1.0,1.0,1002.0,1002.0,1.0,1.0, + 5011.0,5011.0,1.0,1.0,5088.0,5088.0,1.0,1.0,8005.0,8005.0, + 8595.0,7110.0,4846.0,1.0,1.0,7313.0,7313.0,1.0,1.0,10682.0, + 10682.0,1.0,1.0,8509.0,8509.0,1.0,1.0,11414.0,11414.0,10530.0, + 8582.0,9542.0,1.0,1.0,9852.0,9852.0,1.0,1.0,3646.0,3646.0, + 1.0,1.0,6022.0,6022.0,1.0,1.0,2987.0,2987.0,3382.0,11934.0, + 9154.0,1.0,1.0,9723.0,9723.0,1.0,1.0,10102.0,10102.0,1.0, + 1.0,6250.0,6250.0,1.0,1.0,9867.0,9867.0,9741.0,8058.0,3712.0, + 1.0,1.0,11224.0,11224.0,1.0,1.0,2143.0,2143.0,1.0,1.0, + 11885.0,11885.0,1.0,1.0,7644.0,7644.0,3637.0,3459.0,4805.0,1.0, + 1.0,1168.0,1168.0,1.0,1.0,5277.0,5277.0,1.0,1.0,11082.0, + 11082.0,1.0,1.0,3248.0,3248.0,145.0,6747.0,8736.0,1.0,1.0, + 493.0,493.0,1.0,1.0,8193.0,8193.0,1.0,1.0,6845.0,6845.0, + 1.0,1.0,2381.0,2381.0,9558.0,8357.0,11227.0,1.0,1.0,7952.0, + 7952.0,1.0,1.0,11854.0,11854.0,1.0,1.0,1378.0,1378.0,1.0, + 1.0,1912.0,1912.0,7399.0,6378.0,9995.0,1.0,1.0,2166.0,2166.0, + 1.0,1.0,3915.0,3915.0,1.0,1.0,12176.0,12176.0,1.0,1.0, + 7370.0,7370.0,9447.0,480.0,3091.0,1.0,1.0,12129.0,12129.0,1.0, + 1.0,3149.0,3149.0,1.0,1.0,12286.0,12286.0,1.0,1.0,4437.0, + 4437.0,1022.0,9.0,12208.0,1.0,1.0,3636.0,3636.0,1.0,1.0, + 4938.0,4938.0,1.0,1.0,5291.0,5291.0,1.0,1.0,2704.0,2704.0, + 9821.0,339.0,7969.0,1.0,1.0,10863.0,10863.0,1.0,1.0,7635.0, + 7635.0,1.0,1.0,1663.0,1663.0,1.0,1.0,10512.0,10512.0,5791.0, + 544.0,11289.0,1.0,1.0,3364.0,3364.0,1.0,1.0,1689.0,1689.0, + 1.0,1.0,4057.0,4057.0,1.0,1.0,9018.0,9018.0,10616.0,4278.0, + 9326.0,1.0,1.0,9442.0,9442.0,1.0,1.0,7875.0,7875.0,1.0, + 1.0,2174.0,2174.0,1.0,1.0,4372.0,4372.0,6958.0,7300.0,7393.0, + 1.0,1.0,7247.0,7247.0,1.0,1.0,9984.0,9984.0,1.0,1.0, + 4053.0,4053.0,1.0,1.0,2645.0,2645.0,8112.0,8705.0,9238.0,1.0, + 1.0,5195.0,5195.0,1.0,1.0,9509.0,9509.0,1.0,1.0,7394.0, + 7394.0,1.0,1.0,1484.0,1484.0,1381.0,9764.0,2366.0,1.0,1.0, + 9042.0,9042.0,1.0,1.0,9603.0,9603.0,1.0,1.0,8311.0,8311.0, + 1.0,1.0,9320.0,9320.0,11336.0,8541.0,11112.0,1.0,1.0,9919.0, + 9919.0,1.0,1.0,2865.0,2865.0,1.0,1.0,5332.0,5332.0,1.0, + 1.0,3510.0,3510.0,827.0,5767.0,8034.0,1.0,1.0,1630.0,1630.0, + 1.0,1.0,10163.0,10163.0,1.0,1.0,5407.0,5407.0,1.0,1.0, + 3186.0,3186.0,2476.0,118.0,10654.0,1.0,1.0,11136.0,11136.0,1.0, + 1.0,9405.0,9405.0,1.0,1.0,10040.0,10040.0,1.0,1.0,8241.0, + 8241.0,2197.0,7222.0,9521.0,1.0,1.0,9890.0,9890.0,1.0,1.0, + 8889.0,8889.0,1.0,1.0,7098.0,7098.0,1.0,1.0,9153.0,9153.0, + 3949.0,8993.0,12149.0,1.0,1.0,9289.0,9289.0,1.0,1.0,671.0, + 671.0,1.0,1.0,3016.0,3016.0,1.0,1.0,243.0,243.0,4452.0, + 2396.0,10436.0,1.0,1.0,6730.0,6730.0,1.0,1.0,420.0,420.0, + 1.0,1.0,10111.0,10111.0,1.0,1.0,1544.0,1544.0,7935.0,130.0, + 7678.0,1.0,1.0,3985.0,3985.0,1.0,1.0,4905.0,4905.0,1.0, + 1.0,3531.0,3531.0,1.0,1.0,476.0,476.0,2837.0,6915.0,11563.0, + 1.0,10810.0,1.0,7143.0,4043.0,10810.0,1.0,10984.0,722.0,7143.0, + 5736.0,8155.0,4043.0,10810.0,3542.0,8785.0,10984.0,9744.0,3621.0,722.0, + 7143.0,10643.0,1212.0,5736.0,3195.0,5860.0,8155.0,4043.0,7468.0,2639.0, + 3542.0,9664.0,11340.0,8785.0,10984.0,11726.0,9314.0,9744.0,9283.0,9545.0, + 3621.0,722.0,5728.0,7698.0,10643.0,5023.0,5828.0,1212.0,5736.0,8961.0, + 6512.0,3195.0,7311.0,1351.0,5860.0,8155.0,10810.0,7143.0,4043.0,10810.0}; + +double omegas_inv_double[2300] = { + 1.0, 1.0,1.0,1479.0,1.0,8246.0,1.0,5146.0,1.0,4134.0,1.0, + 6553.0,1.0,11567.0,1.0,1305.0,1.0,6429.0,1.0,9094.0,1.0, + 11077.0,1.0,1646.0,1.0,8668.0,1.0,2545.0,1.0,3504.0,1.0, + 8747.0,1.0,10938.0,1.0,4978.0,1.0,5777.0,1.0,3328.0,1.0, + 6461.0,1.0,7266.0,1.0,4591.0,1.0,6561.0,1.0,2744.0,1.0, + 3006.0,1.0,2975.0,1.0,563.0,1.0,949.0,1.0,2625.0,1.0, + 9650.0,1.0,4821.0,1.0,726.0,1.0,4611.0,1.0,1853.0,1.0, + 140.0,1.0,2768.0,1.0,1635.0,1.0,4255.0,1.0,1177.0,1.0, + 9923.0,1.0,3051.0,1.0,4896.0,1.0,2963.0,1.0,1000.0,1.0, + 4320.0,1.0,81.0,1.0,9198.0,1.0,2294.0,1.0,1062.0,1.0, + 3553.0,1.0,7484.0,1.0,8577.0,1.0,3135.0,1.0,2747.0,1.0, + 7443.0,1.0,1326.0,1.0,7203.0,1.0,9275.0,1.0,3201.0,1.0, + 790.0,1.0,955.0,1.0,1170.0,1.0,9970.0,1.0,5374.0,1.0, + 9452.0,1.0,12159.0,1.0,4354.0,1.0,9893.0,1.0,7837.0,1.0, + 3296.0,1.0,8340.0,1.0,5067.0,1.0,10092.0,1.0,12171.0,1.0, + 9813.0,1.0,6522.0,1.0,11462.0,1.0,3748.0,1.0,953.0,1.0, + 2525.0,1.0,10908.0,1.0,3584.0,1.0,4177.0,1.0,4989.0,1.0, + 5331.0,1.0,8011.0,1.0,1673.0,1.0,11745.0,1.0,6498.0,1.0, + 11950.0,1.0,2468.0,1.0,12280.0,1.0,11267.0,1.0,11809.0,1.0, + 2842.0,1.0,5911.0,1.0,4890.0,1.0,3932.0,1.0,2731.0,1.0, + 5542.0,1.0,12144.0,1.0,8830.0,1.0,8652.0,1.0,4231.0,1.0, + 2548.0,1.0,355.0,1.0,8907.0,1.0,3707.0,1.0,1759.0,1.0, + 5179.0,1.0,3694.0,1.0,2089.0,1.0,5092.0,1.0,9005.0,1.0, + 9408.0,1.0,9048.0,1.0,11560.0,1.0,3289.0,1.0,10276.0,1.0, + 10593.0,1.0,10861.0,1.0,11955.0,1.0,9863.0,1.0,5755.0,1.0, + 7657.0,1.0,7901.0,1.0,11029.0,1.0,11813.0,1.0,8758.0,1.0, + 7384.0,1.0,8304.0,1.0,10745.0,1.0,2178.0,1.0,11869.0,1.0, + 5559.0,1.0,12046.0,1.0,9273.0,1.0,11618.0,1.0,3000.0,1.0, + 3136.0,1.0,5191.0,1.0,3400.0,1.0,2399.0,1.0,4048.0,1.0, + 2249.0,1.0,2884.0,1.0,1153.0,1.0,9103.0,1.0,6882.0,1.0, + 2126.0,1.0,10659.0,1.0,8779.0,1.0,6957.0,1.0,9424.0,1.0, + 2370.0,1.0,2969.0,1.0,3978.0,1.0,2686.0,1.0,3247.0,1.0, + 10805.0,1.0,4895.0,1.0,2780.0,1.0,7094.0,1.0,9644.0,1.0, + 8236.0,1.0,2305.0,1.0,5042.0,1.0,7917.0,1.0,10115.0,1.0, + 4414.0,1.0,2847.0,1.0,3271.0,1.0,8232.0,1.0,10600.0,1.0, + 8925.0,1.0,1777.0,1.0,10626.0,1.0,4654.0,1.0,1426.0,1.0, + 9585.0,1.0,6998.0,1.0,7351.0,1.0,8653.0,1.0,7852.0,1.0, + 3.0,1.0,9140.0,1.0,160.0,1.0,4919.0,1.0,113.0,1.0, + 8374.0,1.0,10123.0,1.0,10377.0,1.0,10911.0,1.0,435.0,1.0, + 4337.0,1.0,9908.0,1.0,5444.0,1.0,4096.0,1.0,11796.0,1.0, + 9041.0,1.0,1207.0,1.0,7012.0,1.0,11121.0,1.0,4645.0,1.0, + 404.0,1.0,10146.0,1.0,1065.0,1.0,2422.0,1.0,6039.0,1.0, + 2187.0,1.0,2566.0,1.0,9302.0,1.0,6267.0,1.0,8643.0,1.0, + 2437.0,1.0,875.0,1.0,3780.0,1.0,1607.0,1.0,4976.0,1.0, + 4284.0,1.0,7201.0,1.0,7278.0,1.0,11287.0,1.0,545.0,1.0, + 7270.0,1.0,8585.0,1.0,2678.0,1.0,4143.0,1.0,7575.0,1.0, + 12047.0,1.0,10752.0,1.0,1440.0,1.0,3763.0,1.0,3066.0,1.0, + 12262.0,1.0,5084.0,1.0,10657.0,1.0,4885.0,1.0,11272.0,1.0, + 1045.0,1.0,9430.0,1.0,2481.0,1.0,7277.0,1.0,6591.0,1.0, + 2912.0,1.0,7428.0,1.0,11935.0,1.0,8511.0,1.0,3833.0,1.0, + 11516.0,1.0,11899.0,1.0,1067.0,1.0,5101.0,1.0,11847.0,1.0, + 9888.0,1.0,1254.0,1.0,11316.0,1.0,5435.0,1.0,1359.0,1.0, + 10367.0,1.0,8410.0,1.0,3998.0,1.0,2033.0,1.0,382.0,1.0, + 11973.0,1.0,3988.0,1.0,11821.0,1.0,6196.0,1.0,8579.0,1.0, + 6843.0,1.0,6950.0,1.0,1728.0,1.0,11889.0,1.0,6137.0,1.0, + 7341.0,1.0,3643.0,1.0,5415.0,1.0,5862.0,1.0,6153.0,1.0, + 56.0,1.0,9090.0,1.0,7083.0,1.0,5529.0,1.0,10302.0,1.0, + 10587.0,1.0,8724.0,1.0,11635.0,1.0,1018.0,1.0,6364.0,1.0, + 1041.0,1.0,3514.0,1.0,5574.0,1.0,10316.0,1.0,2344.0,1.0, + 1278.0,1.0,6974.0,1.0,4075.0,1.0,7373.0,1.0,4324.0,1.0, + 522.0,1.0,10120.0,1.0,3262.0,1.0,7210.0,1.0,1050.0,1.0, + 4536.0,1.0,6844.0,1.0,8429.0,1.0,2683.0,1.0,11099.0,1.0, + 3818.0,1.0,6171.0,1.0,3789.0,1.0,147.0,1.0,5456.0,1.0, + 7840.0,1.0,7540.0,1.0,5537.0,1.0,4789.0,1.0,4467.0,1.0, + 4624.0,1.0,6212.0,1.0,9026.0,1.0,3600.0,1.0,6221.0,1.0, + 8687.0,1.0,4080.0,1.0,421.0,1.0,605.0,1.0,9987.0,1.0, + 11785.0,1.0,4213.0,1.0,6403.0,1.0,7507.0,1.0,5594.0,1.0, + 3029.0,1.0,8077.0,1.0,975.0,1.0,8851.0,1.0,2844.0,1.0, + 1105.0,1.0,12147.0,1.0,5681.0,1.0,8812.0,1.0,6008.0,1.0, + 885.0,1.0,5009.0,1.0,10333.0,1.0,1003.0,1.0,8757.0,1.0, + 241.0,1.0,58.0,1.0,2127.0,1.0,12138.0,1.0,2839.0,1.0, + 8332.0,1.0,6383.0,1.0,2505.0,1.0,431.0,1.0,10710.0,1.0, + 9115.0,1.0,52.0,1.0,2766.0,1.0,10966.0,1.0,3336.0,1.0, + 6055.0,1.0,5874.0,1.0,11612.0,1.0,2049.0,1.0,7377.0,1.0, + 10968.0,1.0,192.0,1.0,3445.0,1.0,7509.0,1.0,7591.0,1.0, + 7232.0,1.0,11502.0,1.0,3482.0,1.0,11279.0,1.0,5468.0,1.0, + 3127.0,1.0,4169.0,1.0,2920.0,1.0,5241.0,1.0,5257.0,1.0, + 8455.0,1.0,5919.0,1.0,4433.0,1.0,5486.0,1.0,3054.0,1.0, + 1747.0,1.0,3123.0,1.0,2503.0,1.0,2948.0,1.0,6507.0,1.0, + 1566.0,1.0,64.0,1.0,8633.0,1.0,11606.0,1.0,9830.0,1.0, + 835.0,1.0,6065.0,1.0,3570.0,1.0,8049.0,1.0,10970.0,1.0, + 3150.0,1.0,11580.0,1.0,8243.0,1.0,10211.0,1.0,11177.0,1.0, + 7967.0,1.0,10331.0,1.0,11848.0,1.0,11367.0,1.0,1058.0,1.0, + 4079.0,1.0,6992.0,1.0,6119.0,1.0,8333.0,1.0,10929.0,1.0, + 1200.0,1.0,5184.0,1.0,2555.0,1.0,6122.0,1.0,10695.0,1.0, + 1962.0,1.0,5106.0,1.0,6328.0,1.0,9597.0,1.0,168.0,1.0, + 7991.0,1.0,8960.0,1.0,4049.0,1.0,3728.0,1.0,11130.0,1.0, + 6299.0,1.0,948.0,1.0,1146.0,1.0,1404.0,1.0,11964.0,1.0, + 2919.0,1.0,3762.0,1.0,8212.0,1.0,4016.0,1.0,11637.0,1.0, + 6523.0,1.0,6190.0,1.0,11994.0,1.0,10996.0,1.0,4737.0,1.0, + 4774.0,1.0,6860.0,1.0,453.0,1.0,6381.0,1.0,11871.0,1.0, + 8517.0,1.0,6956.0,1.0,2031.0,1.0,6413.0,1.0,10008.0,1.0, + 12133.0,1.0,2767.0,1.0,3969.0,1.0,8298.0,1.0,1805.0,1.0, + 2882.0,1.0,2051.0,1.0,10335.0,1.0,2447.0,1.0,6147.0,1.0, + 11713.0,1.0,8326.0,1.0,3529.0,1.0,8855.0,1.0,12071.0,1.0, + 9381.0,1.0,1843.0,1.0,9928.0,1.0,8174.0,1.0,9259.0,1.0, + 7535.0,1.0,10431.0,1.0,426.0,1.0,3315.0,1.0,9364.0,1.0, + 11942.0,1.0,3757.0,1.0,1975.0,1.0,11566.0,1.0,12115.0,1.0, + 10596.0,1.0,3009.0,1.0,9634.0,1.0,5735.0,1.0,5868.0,1.0, + 2738.0,1.0,7796.0,1.0,3202.0,1.0,2057.0,1.0,6920.0,1.0, + 6906.0,1.0,1815.0,1.0,11939.0,1.0,10777.0,1.0,5942.0,1.0, + 1583.0,1.0,1489.0,1.0,2500.0,1.0,10806.0,1.0,6374.0,1.0, + 11026.0,1.0,12240.0,1.0,1.0,1.0,1.0,1.0,1.0,1479.0, + 1479.0,1.0,1.0,8246.0,8246.0,1.0,1.0,5146.0,5146.0,1.0, + 1479.0,1.0,1.0,1.0,4134.0,4134.0,1.0,1.0,6553.0,6553.0, + 1.0,1.0,11567.0,11567.0,1.0,1.0,1305.0,1305.0,8246.0,5146.0, + 1479.0,1.0,1.0,6429.0,6429.0,1.0,1.0,9094.0,9094.0,1.0, + 1.0,11077.0,11077.0,1.0,1.0,1646.0,1646.0,4134.0,6553.0,8246.0, + 1.0,1.0,8668.0,8668.0,1.0,1.0,2545.0,2545.0,1.0,1.0, + 3504.0,3504.0,1.0,1.0,8747.0,8747.0,11567.0,1305.0,5146.0,1.0, + 1.0,10938.0,10938.0,1.0,1.0,4978.0,4978.0,1.0,1.0,5777.0, + 5777.0,1.0,1.0,3328.0,3328.0,6429.0,9094.0,4134.0,1.0,1.0, + 6461.0,6461.0,1.0,1.0,7266.0,7266.0,1.0,1.0,4591.0,4591.0, + 1.0,1.0,6561.0,6561.0,11077.0,1646.0,6553.0,1.0,1.0,2744.0, + 2744.0,1.0,1.0,3006.0,3006.0,1.0,1.0,2975.0,2975.0,1.0, + 1.0,563.0,563.0,8668.0,2545.0,11567.0,1.0,1.0,949.0,949.0, + 1.0,1.0,2625.0,2625.0,1.0,1.0,9650.0,9650.0,1.0,1.0, + 4821.0,4821.0,3504.0,8747.0,1305.0,1.0,1.0,726.0,726.0,1.0, + 1.0,4611.0,4611.0,1.0,1.0,1853.0,1853.0,1.0,1.0,140.0, + 140.0,10938.0,4978.0,6429.0,1.0,1.0,2768.0,2768.0,1.0,1.0, + 1635.0,1635.0,1.0,1.0,4255.0,4255.0,1.0,1.0,1177.0,1177.0, + 5777.0,3328.0,9094.0,1.0,1.0,9923.0,9923.0,1.0,1.0,3051.0, + 3051.0,1.0,1.0,4896.0,4896.0,1.0,1.0,2963.0,2963.0,6461.0, + 7266.0,11077.0,1.0,1.0,1000.0,1000.0,1.0,1.0,4320.0,4320.0, + 1.0,1.0,81.0,81.0,1.0,1.0,9198.0,9198.0,4591.0,6561.0, + 1646.0,1.0,1.0,2294.0,2294.0,1.0,1.0,1062.0,1062.0,1.0, + 1.0,3553.0,3553.0,1.0,1.0,7484.0,7484.0,2744.0,3006.0,8668.0, + 1.0,1.0,8577.0,8577.0,1.0,1.0,3135.0,3135.0,1.0,1.0, + 2747.0,2747.0,1.0,1.0,7443.0,7443.0,2975.0,563.0,2545.0,1.0, + 1.0,1326.0,1326.0,1.0,1.0,7203.0,7203.0,1.0,1.0,9275.0, + 9275.0,1.0,1.0,3201.0,3201.0,949.0,2625.0,3504.0,1.0,1.0, + 790.0,790.0,1.0,1.0,955.0,955.0,1.0,1.0,1170.0,1170.0, + 1.0,1.0,9970.0,9970.0,9650.0,4821.0,8747.0,1.0,1.0,5374.0, + 5374.0,1.0,1.0,9452.0,9452.0,1.0,1.0,12159.0,12159.0,1.0, + 1.0,4354.0,4354.0,726.0,4611.0,10938.0,1.0,1.0,9893.0,9893.0, + 1.0,1.0,7837.0,7837.0,1.0,1.0,3296.0,3296.0,1.0,1.0, + 8340.0,8340.0,1853.0,140.0,4978.0,1.0,1.0,5067.0,5067.0,1.0, + 1.0,10092.0,10092.0,1.0,1.0,12171.0,12171.0,1.0,1.0,9813.0, + 9813.0,2768.0,1635.0,5777.0,1.0,1.0,6522.0,6522.0,1.0,1.0, + 11462.0,11462.0,1.0,1.0,3748.0,3748.0,1.0,1.0,953.0,953.0, + 4255.0,1177.0,3328.0,1.0,1.0,2525.0,2525.0,1.0,1.0,10908.0, + 10908.0,1.0,1.0,3584.0,3584.0,1.0,1.0,4177.0,4177.0,9923.0, + 3051.0,6461.0,1.0,1.0,4989.0,4989.0,1.0,1.0,5331.0,5331.0, + 1.0,1.0,8011.0,8011.0,1.0,1.0,1673.0,1673.0,4896.0,2963.0, + 7266.0,1.0,1.0,11745.0,11745.0,1.0,1.0,6498.0,6498.0,1.0, + 1.0,11950.0,11950.0,1.0,1.0,2468.0,2468.0,1000.0,4320.0,4591.0, + 1.0,1.0,12280.0,12280.0,1.0,1.0,11267.0,11267.0,1.0,1.0, + 11809.0,11809.0,1.0,1.0,2842.0,2842.0,81.0,9198.0,6561.0,1.0, + 1.0,5911.0,5911.0,1.0,1.0,4890.0,4890.0,1.0,1.0,3932.0, + 3932.0,1.0,1.0,2731.0,2731.0,2294.0,1062.0,2744.0,1.0,1.0, + 5542.0,5542.0,1.0,1.0,12144.0,12144.0,1.0,1.0,8830.0,8830.0, + 1.0,1.0,8652.0,8652.0,3553.0,7484.0,3006.0,1.0,1.0,4231.0, + 4231.0,1.0,1.0,2548.0,2548.0,1.0,1.0,355.0,355.0,1.0, + 1.0,8907.0,8907.0,8577.0,3135.0,2975.0,1.0,1.0,3707.0,3707.0, + 1.0,1.0,1759.0,1759.0,1.0,1.0,5179.0,5179.0,1.0,1.0, + 3694.0,3694.0,2747.0,7443.0,563.0,1.0,1.0,2089.0,2089.0,1.0, + 1.0,5092.0,5092.0,1.0,1.0,9005.0,9005.0,1.0,1.0,9408.0, + 9408.0,1326.0,7203.0,949.0,1.0,1.0,9048.0,9048.0,1.0,1.0, + 11560.0,11560.0,1.0,1.0,3289.0,3289.0,1.0,1.0,10276.0,10276.0, + 9275.0,3201.0,2625.0,1.0,1.0,10593.0,10593.0,1.0,1.0,10861.0, + 10861.0,1.0,1.0,11955.0,11955.0,1.0,1.0,9863.0,9863.0,790.0, + 955.0,9650.0,1.0,1.0,5755.0,5755.0,1.0,1.0,7657.0,7657.0, + 1.0,1.0,7901.0,7901.0,1.0,1.0,11029.0,11029.0,1170.0,9970.0, + 4821.0,1.0,1.0,11813.0,11813.0,1.0,1.0,8758.0,8758.0,1.0, + 1.0,7384.0,7384.0,1.0,1.0,8304.0,8304.0,5374.0,9452.0,726.0, + 1.0,1.0,10745.0,10745.0,1.0,1.0,2178.0,2178.0,1.0,1.0, + 11869.0,11869.0,1.0,1.0,5559.0,5559.0,12159.0,4354.0,4611.0,1.0, + 1.0,12046.0,12046.0,1.0,1.0,9273.0,9273.0,1.0,1.0,11618.0, + 11618.0,1.0,1.0,3000.0,3000.0,9893.0,7837.0,1853.0,1.0,1.0, + 3136.0,3136.0,1.0,1.0,5191.0,5191.0,1.0,1.0,3400.0,3400.0, + 1.0,1.0,2399.0,2399.0,3296.0,8340.0,140.0,1.0,1.0,4048.0, + 4048.0,1.0,1.0,2249.0,2249.0,1.0,1.0,2884.0,2884.0,1.0, + 1.0,1153.0,1153.0,5067.0,10092.0,2768.0,1.0,1.0,9103.0,9103.0, + 1.0,1.0,6882.0,6882.0,1.0,1.0,2126.0,2126.0,1.0,1.0, + 10659.0,10659.0,12171.0,9813.0,1635.0,1.0,1.0,8779.0,8779.0,1.0, + 1.0,6957.0,6957.0,1.0,1.0,9424.0,9424.0,1.0,1.0,2370.0, + 2370.0,6522.0,11462.0,4255.0,1.0,1.0,2969.0,2969.0,1.0,1.0, + 3978.0,3978.0,1.0,1.0,2686.0,2686.0,1.0,1.0,3247.0,3247.0, + 3748.0,953.0,1177.0,1.0,1.0,10805.0,10805.0,1.0,1.0,4895.0, + 4895.0,1.0,1.0,2780.0,2780.0,1.0,1.0,7094.0,7094.0,2525.0, + 10908.0,9923.0,1.0,1.0,9644.0,9644.0,1.0,1.0,8236.0,8236.0, + 1.0,1.0,2305.0,2305.0,1.0,1.0,5042.0,5042.0,3584.0,4177.0, + 3051.0,1.0,1.0,7917.0,7917.0,1.0,1.0,10115.0,10115.0,1.0, + 1.0,4414.0,4414.0,1.0,1.0,2847.0,2847.0,4989.0,5331.0,4896.0, + 1.0,1.0,3271.0,3271.0,1.0,1.0,8232.0,8232.0,1.0,1.0, + 10600.0,10600.0,1.0,1.0,8925.0,8925.0,8011.0,1673.0,2963.0,1.0, + 1.0,1777.0,1777.0,1.0,1.0,10626.0,10626.0,1.0,1.0,4654.0, + 4654.0,1.0,1.0,1426.0,1426.0,11745.0,6498.0,1000.0,1.0,1.0, + 9585.0,9585.0,1.0,1.0,6998.0,6998.0,1.0,1.0,7351.0,7351.0, + 1.0,1.0,8653.0,8653.0,11950.0,2468.0,4320.0,1.0,1.0,7852.0, + 7852.0,1.0,1.0,3.0,3.0,1.0,1.0,9140.0,9140.0,1.0, + 1.0,160.0,160.0,12280.0,11267.0,81.0,1.0,1.0,4919.0,4919.0, + 1.0,1.0,113.0,113.0,1.0,1.0,8374.0,8374.0,1.0,1.0, + 10123.0,10123.0,11809.0,2842.0,9198.0,1.0,1.0,10377.0,10377.0,1.0, + 1.0,10911.0,10911.0,1.0,1.0,435.0,435.0,1.0,1.0,4337.0, + 4337.0,5911.0,4890.0,2294.0,1.0,1.0,9908.0,9908.0,1.0,1.0, + 5444.0,5444.0,1.0,1.0,4096.0,4096.0,1.0,1.0,11796.0,11796.0, + 3932.0,2731.0,1062.0,1.0,1.0,9041.0,9041.0,1.0,1.0,1207.0, + 1207.0,1.0,1.0,7012.0,7012.0,1.0,1.0,11121.0,11121.0,5542.0, + 12144.0,3553.0,1.0,1.0,4645.0,4645.0,1.0,1.0,404.0,404.0, + 1.0,1.0,10146.0,10146.0,1.0,1.0,1065.0,1065.0,8830.0,8652.0, + 7484.0,1.0,1.0,2422.0,2422.0,1.0,1.0,6039.0,6039.0,1.0, + 1.0,2187.0,2187.0,1.0,1.0,2566.0,2566.0,4231.0,2548.0,8577.0, + 1.0,1.0,9302.0,9302.0,1.0,1.0,6267.0,6267.0,1.0,1.0, + 8643.0,8643.0,1.0,1.0,2437.0,2437.0,355.0,8907.0,3135.0,1.0, + 1.0,875.0,875.0,1.0,1.0,3780.0,3780.0,1.0,1.0,1607.0, + 1607.0,1.0,1.0,4976.0,4976.0,3707.0,1759.0,2747.0,1.0,1.0, + 4284.0,4284.0,1.0,1.0,7201.0,7201.0,1.0,1.0,7278.0,7278.0, + 1.0,1.0,11287.0,11287.0,5179.0,3694.0,7443.0,1.0,1.0,545.0, + 545.0,1.0,1.0,7270.0,7270.0,1.0,1.0,8585.0,8585.0,1.0, + 1.0,2678.0,2678.0,2089.0,5092.0,1326.0,1.0,1.0,4143.0,4143.0, + 1.0,1.0,7575.0,7575.0,1.0,1.0,12047.0,12047.0,1.0,1.0, + 10752.0,10752.0,9005.0,9408.0,7203.0,1.0,1.0,1440.0,1440.0,1.0, + 1.0,3763.0,3763.0,1.0,1.0,3066.0,3066.0,1.0,1.0,12262.0, + 12262.0,9048.0,11560.0,9275.0,1.0,1.0,5084.0,5084.0,1.0,1.0, + 10657.0,10657.0,1.0,1.0,4885.0,4885.0,1.0,1.0,11272.0,11272.0, + 3289.0,10276.0,3201.0,1.0,1.0,1045.0,1045.0,1.0,1.0,9430.0, + 9430.0,1.0,1.0,2481.0,2481.0,1.0,1.0,7277.0,7277.0,10593.0, + 10861.0,790.0,1.0,1.0,6591.0,6591.0,1.0,1.0,2912.0,2912.0, + 1.0,1.0,7428.0,7428.0,1.0,1.0,11935.0,11935.0,11955.0,9863.0, + 955.0,1.0,1.0,8511.0,8511.0,1.0,1.0,3833.0,3833.0,1.0, + 1.0,11516.0,11516.0,1.0,1.0,11899.0,11899.0,5755.0,7657.0,1170.0, + 1.0,1.0,1067.0,1067.0,1.0,1.0,5101.0,5101.0,1.0,1.0, + 11847.0,11847.0,1.0,1.0,9888.0,9888.0,7901.0,11029.0,9970.0,1.0, + 1479.0,1.0,8246.0,5146.0,1479.0,1.0,4134.0,6553.0,8246.0,11567.0, + 1305.0,5146.0,1479.0,6429.0,9094.0,4134.0,11077.0,1646.0,6553.0,8246.0, + 8668.0,2545.0,11567.0,3504.0,8747.0,1305.0,5146.0,10938.0,4978.0,6429.0, + 5777.0,3328.0,9094.0,4134.0,6461.0,7266.0,11077.0,4591.0,6561.0,1646.0, + 6553.0,2744.0,3006.0,8668.0,2975.0,563.0,2545.0,11567.0,949.0,2625.0, + 3504.0,9650.0,4821.0,8747.0,1305.0,1479.0,8246.0,5146.0,1479.0}; diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/params.h b/crypt/liboqs/kex_rlwe_newhope/avx2/params.h new file mode 100644 index 0000000000000000000000000000000000000000..027454ffb0b9a3c44ee5c7648ca040774cf2d6b0 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/params.h @@ -0,0 +1,16 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#define PARAM_N 1024 + +#define PARAM_K 16 /* used in sampler */ +#define PARAM_Q 12289 + +#define POLY_BYTES 1792 +#define NEWHOPE_SEEDBYTES 32 +#define NEWHOPE_RECBYTES 256 + +#define NEWHOPE_SENDABYTES (POLY_BYTES + NEWHOPE_SEEDBYTES) +#define NEWHOPE_SENDBBYTES (POLY_BYTES + NEWHOPE_RECBYTES) + +#endif diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/poly.c b/crypt/liboqs/kex_rlwe_newhope/avx2/poly.c new file mode 100644 index 0000000000000000000000000000000000000000..43c0df218c2533f91c821dee76c45cd80a14a398 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/poly.c @@ -0,0 +1,143 @@ +#include "poly.h" +#include "ntt.h" +#include "randombytes.h" +#include "fips202.h" +#include "crypto_stream.h" + +static uint16_t barrett_reduce(uint16_t a) +{ + uint32_t u; + + u = ((uint32_t) a * 5) >> 16; + u *= PARAM_Q; + a -= u; + return a; +} + +void poly_frombytes(poly *r, const unsigned char *a) +{ + int i; + for(i=0;i<PARAM_N/4;i++) + { + r->coeffs[4*i+0] = a[7*i+0] | (((uint16_t)a[7*i+1] & 0x3f) << 8); + r->coeffs[4*i+1] = (a[7*i+1] >> 6) | (((uint16_t)a[7*i+2]) << 2) | (((uint16_t)a[7*i+3] & 0x0f) << 10); + r->coeffs[4*i+2] = (a[7*i+3] >> 4) | (((uint16_t)a[7*i+4]) << 4) | (((uint16_t)a[7*i+5] & 0x03) << 12); + r->coeffs[4*i+3] = (a[7*i+5] >> 2) | (((uint16_t)a[7*i+6]) << 6); + } +} + +void poly_tobytes(unsigned char *r, const poly *p) +{ + int i; + uint16_t t0,t1,t2,t3,m; + int16_t c; + for(i=0;i<PARAM_N/4;i++) + { + t0 = barrett_reduce(p->coeffs[4*i+0]); //Make sure that coefficients have only 14 bits + t1 = barrett_reduce(p->coeffs[4*i+1]); + t2 = barrett_reduce(p->coeffs[4*i+2]); + t3 = barrett_reduce(p->coeffs[4*i+3]); + + m = t0 - PARAM_Q; + c = m; + c >>= 15; + t0 = m ^ ((t0^m)&c); // <Make sure that coefficients are in [0,q] + + m = t1 - PARAM_Q; + c = m; + c >>= 15; + t1 = m ^ ((t1^m)&c); // <Make sure that coefficients are in [0,q] + + m = t2 - PARAM_Q; + c = m; + c >>= 15; + t2 = m ^ ((t2^m)&c); // <Make sure that coefficients are in [0,q] + + m = t3 - PARAM_Q; + c = m; + c >>= 15; + t3 = m ^ ((t3^m)&c); // <Make sure that coefficients are in [0,q] + + r[7*i+0] = t0 & 0xff; + r[7*i+1] = (t0 >> 8) | (t1 << 6); + r[7*i+2] = (t1 >> 2); + r[7*i+3] = (t1 >> 10) | (t2 << 4); + r[7*i+4] = (t2 >> 4); + r[7*i+5] = (t2 >> 12) | (t3 << 2); + r[7*i+6] = (t3 >> 6); + } +} + + + +void poly_uniform(poly *a, const unsigned char *seed) +{ + unsigned int pos=0, ctr=0; + uint16_t val; + uint64_t state[25]; + unsigned int nblocks=13; + uint8_t buf[SHAKE128_RATE*nblocks]; + + shake128_absorb(state, seed, NEWHOPE_SEEDBYTES); + + shake128_squeezeblocks((unsigned char *) buf, nblocks, state); + + while(ctr < PARAM_N) + { + //val = (buf[pos] | ((uint16_t) buf[pos+1] << 8)) & 0x3fff; // Specialized for q = 12889 + val = (buf[pos] | ((uint16_t) buf[pos+1] << 8)); + if(val < 5*PARAM_Q) + a->coeffs[ctr++] = val; + pos += 2; + if(pos > SHAKE128_RATE*nblocks-2) + { + nblocks=1; + shake128_squeezeblocks((unsigned char *) buf,nblocks,state); + pos = 0; + } + } +} + + +extern void cbd(poly *r, unsigned char *b); + +void poly_getnoise(poly *r, unsigned char *seed, unsigned char nonce) +{ +#if PARAM_K != 16 +#error "poly_getnoise in poly.c only supports k=16" +#endif + unsigned char buf[4*PARAM_N]; + unsigned char n[CRYPTO_STREAM_NONCEBYTES]; + int i; + + for(i=1;i<CRYPTO_STREAM_NONCEBYTES;i++) + n[i] = 0; + n[0] = nonce; + + crypto_stream(buf,4*PARAM_N,n,seed); + cbd(r,buf); +} + +void poly_add(poly *r, const poly *a, const poly *b) +{ + int i; + for(i=0;i<PARAM_N;i++) + r->coeffs[i] = barrett_reduce(a->coeffs[i] + b->coeffs[i]); +} + +void poly_ntt(poly *r) +{ + double __attribute__ ((aligned (32))) temp[PARAM_N]; + poly_pointwise(r, r, (poly *)psis_bitrev); + + ntt_double(r->coeffs,omegas_double,temp); +} + +void poly_invntt(poly *r) +{ + double __attribute__ ((aligned (32))) temp[PARAM_N]; + + bitrev_vector(r->coeffs); + ntt_double(r->coeffs, omegas_inv_double,temp); + poly_pointwise(r, r, (poly *)psis_inv); +} diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/poly.h b/crypt/liboqs/kex_rlwe_newhope/avx2/poly.h new file mode 100644 index 0000000000000000000000000000000000000000..bf866230f18faed2d09248c1bf846fca604cc3ef --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/poly.h @@ -0,0 +1,22 @@ +#ifndef POLY_H +#define POLY_H + +#include <stdint.h> +#include "params.h" + +typedef struct { + int32_t coeffs[PARAM_N]; +} poly __attribute__ ((aligned (32))); + +void poly_uniform(poly *a, const unsigned char *seed); +void poly_getnoise(poly *r, unsigned char *seed, unsigned char nonce); +void poly_add(poly *r, const poly *a, const poly *b); + +void poly_ntt(poly *r); +void poly_invntt(poly *r); +void poly_pointwise(poly *r, const poly *a, const poly *b); + +void poly_frombytes(poly *r, const unsigned char *a); +void poly_tobytes(unsigned char *r, const poly *p); + +#endif diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/poly_pointwise.s b/crypt/liboqs/kex_rlwe_newhope/avx2/poly_pointwise.s new file mode 100644 index 0000000000000000000000000000000000000000..53d7d26aa8ae0beabbf55afed75cb65b6d16347e --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/poly_pointwise.s @@ -0,0 +1,282 @@ + +# qhasm: int64 input_0 + +# qhasm: int64 input_1 + +# qhasm: int64 input_2 + +# qhasm: int64 input_3 + +# qhasm: int64 input_4 + +# qhasm: int64 input_5 + +# qhasm: stack64 input_6 + +# qhasm: stack64 input_7 + +# qhasm: int64 caller_r11 + +# qhasm: int64 caller_r12 + +# qhasm: int64 caller_r13 + +# qhasm: int64 caller_r14 + +# qhasm: int64 caller_r15 + +# qhasm: int64 caller_rbx + +# qhasm: int64 caller_rbp + +# qhasm: int64 ctri + +# qhasm: int64 rp + +# qhasm: int64 ap + +# qhasm: int64 bp + +# qhasm: reg256 r + +# qhasm: reg256 a + +# qhasm: reg256 b + +# qhasm: reg256 q + +# qhasm: reg256 qinv + +# qhasm: reg256 c + +# qhasm: enter poly_pointwise +.p2align 5 +.global _poly_pointwise +.global poly_pointwise +_poly_pointwise: +poly_pointwise: +mov %rsp,%r11 +and $31,%r11 +add $0,%r11 +sub %r11,%rsp + +# qhasm: rp = input_0 +# asm 1: mov <input_0=int64#1,>rp=int64#1 +# asm 2: mov <input_0=%rdi,>rp=%rdi +mov %rdi,%rdi + +# qhasm: ap = input_1 +# asm 1: mov <input_1=int64#2,>ap=int64#2 +# asm 2: mov <input_1=%rsi,>ap=%rsi +mov %rsi,%rsi + +# qhasm: bp = input_2 +# asm 1: mov <input_2=int64#3,>bp=int64#3 +# asm 2: mov <input_2=%rdx,>bp=%rdx +mov %rdx,%rdx + +# qhasm: q = mem256[q8] +# asm 1: vmovdqu q8,>q=reg256#1 +# asm 2: vmovdqu q8,>q=%ymm0 +vmovdqu q8,%ymm0 + +# qhasm: qinv = mem256[qinv16] +# asm 1: vmovdqu qinv16,>qinv=reg256#2 +# asm 2: vmovdqu qinv16,>qinv=%ymm1 +vmovdqu qinv16,%ymm1 + +# qhasm: ctri = 256 +# asm 1: mov $256,>ctri=int64#4 +# asm 2: mov $256,>ctri=%rcx +mov $256,%rcx + +# qhasm: loopi: +._loopi: + +# qhasm: a = (4x double)(4x int32)mem128[ap + 0] +# asm 1: vcvtdq2pd 0(<ap=int64#2),>a=reg256#3 +# asm 2: vcvtdq2pd 0(<ap=%rsi),>a=%ymm2 +vcvtdq2pd 0(%rsi),%ymm2 + +# qhasm: b = (4x double)(4x int32)mem128[bp + 0] +# asm 1: vcvtdq2pd 0(<bp=int64#3),>b=reg256#4 +# asm 2: vcvtdq2pd 0(<bp=%rdx),>b=%ymm3 +vcvtdq2pd 0(%rdx),%ymm3 + +# qhasm: 4x a approx*= b +# asm 1: vmulpd <b=reg256#4,<a=reg256#3,>a=reg256#3 +# asm 2: vmulpd <b=%ymm3,<a=%ymm2,>a=%ymm2 +vmulpd %ymm3,%ymm2,%ymm2 + +# qhasm: 4x c = approx a * qinv +# asm 1: vmulpd <a=reg256#3,<qinv=reg256#2,>c=reg256#4 +# asm 2: vmulpd <a=%ymm2,<qinv=%ymm1,>c=%ymm3 +vmulpd %ymm2,%ymm1,%ymm3 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4 +# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3 +vroundpd $9,%ymm3,%ymm3 + +# qhasm: 4x a approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<a=reg256#3 +# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<a=%ymm2 +vfnmadd231pd %ymm3,%ymm0,%ymm2 + +# qhasm: a = (4x int32)(4x double)a,0,0,0,0 +# asm 1: vcvtpd2dq <a=reg256#3,>a=reg256#3dq +# asm 2: vcvtpd2dq <a=%ymm2,>a=%xmm2 +vcvtpd2dq %ymm2,%xmm2 + +# qhasm: mem128[rp + 0] = a +# asm 1: vmovupd <a=reg256#3dq,0(<rp=int64#1) +# asm 2: vmovupd <a=%xmm2,0(<rp=%rdi) +vmovupd %xmm2,0(%rdi) + +# qhasm: a = (4x double)(4x int32)mem128[ap + 16] +# asm 1: vcvtdq2pd 16(<ap=int64#2),>a=reg256#3 +# asm 2: vcvtdq2pd 16(<ap=%rsi),>a=%ymm2 +vcvtdq2pd 16(%rsi),%ymm2 + +# qhasm: b = (4x double)(4x int32)mem128[bp + 16] +# asm 1: vcvtdq2pd 16(<bp=int64#3),>b=reg256#4 +# asm 2: vcvtdq2pd 16(<bp=%rdx),>b=%ymm3 +vcvtdq2pd 16(%rdx),%ymm3 + +# qhasm: 4x a approx*= b +# asm 1: vmulpd <b=reg256#4,<a=reg256#3,>a=reg256#3 +# asm 2: vmulpd <b=%ymm3,<a=%ymm2,>a=%ymm2 +vmulpd %ymm3,%ymm2,%ymm2 + +# qhasm: 4x c = approx a * qinv +# asm 1: vmulpd <a=reg256#3,<qinv=reg256#2,>c=reg256#4 +# asm 2: vmulpd <a=%ymm2,<qinv=%ymm1,>c=%ymm3 +vmulpd %ymm2,%ymm1,%ymm3 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4 +# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3 +vroundpd $9,%ymm3,%ymm3 + +# qhasm: 4x a approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<a=reg256#3 +# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<a=%ymm2 +vfnmadd231pd %ymm3,%ymm0,%ymm2 + +# qhasm: a = (4x int32)(4x double)a,0,0,0,0 +# asm 1: vcvtpd2dq <a=reg256#3,>a=reg256#3dq +# asm 2: vcvtpd2dq <a=%ymm2,>a=%xmm2 +vcvtpd2dq %ymm2,%xmm2 + +# qhasm: mem128[rp + 16] = a +# asm 1: vmovupd <a=reg256#3dq,16(<rp=int64#1) +# asm 2: vmovupd <a=%xmm2,16(<rp=%rdi) +vmovupd %xmm2,16(%rdi) + +# qhasm: a = (4x double)(4x int32)mem128[ap + 32] +# asm 1: vcvtdq2pd 32(<ap=int64#2),>a=reg256#3 +# asm 2: vcvtdq2pd 32(<ap=%rsi),>a=%ymm2 +vcvtdq2pd 32(%rsi),%ymm2 + +# qhasm: b = (4x double)(4x int32)mem128[bp + 32] +# asm 1: vcvtdq2pd 32(<bp=int64#3),>b=reg256#4 +# asm 2: vcvtdq2pd 32(<bp=%rdx),>b=%ymm3 +vcvtdq2pd 32(%rdx),%ymm3 + +# qhasm: 4x a approx*= b +# asm 1: vmulpd <b=reg256#4,<a=reg256#3,>a=reg256#3 +# asm 2: vmulpd <b=%ymm3,<a=%ymm2,>a=%ymm2 +vmulpd %ymm3,%ymm2,%ymm2 + +# qhasm: 4x c = approx a * qinv +# asm 1: vmulpd <a=reg256#3,<qinv=reg256#2,>c=reg256#4 +# asm 2: vmulpd <a=%ymm2,<qinv=%ymm1,>c=%ymm3 +vmulpd %ymm2,%ymm1,%ymm3 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4 +# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3 +vroundpd $9,%ymm3,%ymm3 + +# qhasm: 4x a approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<a=reg256#3 +# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<a=%ymm2 +vfnmadd231pd %ymm3,%ymm0,%ymm2 + +# qhasm: a = (4x int32)(4x double)a,0,0,0,0 +# asm 1: vcvtpd2dq <a=reg256#3,>a=reg256#3dq +# asm 2: vcvtpd2dq <a=%ymm2,>a=%xmm2 +vcvtpd2dq %ymm2,%xmm2 + +# qhasm: mem128[rp + 32] = a +# asm 1: vmovupd <a=reg256#3dq,32(<rp=int64#1) +# asm 2: vmovupd <a=%xmm2,32(<rp=%rdi) +vmovupd %xmm2,32(%rdi) + +# qhasm: a = (4x double)(4x int32)mem128[ap + 48] +# asm 1: vcvtdq2pd 48(<ap=int64#2),>a=reg256#3 +# asm 2: vcvtdq2pd 48(<ap=%rsi),>a=%ymm2 +vcvtdq2pd 48(%rsi),%ymm2 + +# qhasm: b = (4x double)(4x int32)mem128[bp + 48] +# asm 1: vcvtdq2pd 48(<bp=int64#3),>b=reg256#4 +# asm 2: vcvtdq2pd 48(<bp=%rdx),>b=%ymm3 +vcvtdq2pd 48(%rdx),%ymm3 + +# qhasm: 4x a approx*= b +# asm 1: vmulpd <b=reg256#4,<a=reg256#3,>a=reg256#3 +# asm 2: vmulpd <b=%ymm3,<a=%ymm2,>a=%ymm2 +vmulpd %ymm3,%ymm2,%ymm2 + +# qhasm: 4x c = approx a * qinv +# asm 1: vmulpd <a=reg256#3,<qinv=reg256#2,>c=reg256#4 +# asm 2: vmulpd <a=%ymm2,<qinv=%ymm1,>c=%ymm3 +vmulpd %ymm2,%ymm1,%ymm3 + +# qhasm: 4x c = floor(c) +# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4 +# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3 +vroundpd $9,%ymm3,%ymm3 + +# qhasm: 4x a approx-= c * q +# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<a=reg256#3 +# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<a=%ymm2 +vfnmadd231pd %ymm3,%ymm0,%ymm2 + +# qhasm: a = (4x int32)(4x double)a,0,0,0,0 +# asm 1: vcvtpd2dq <a=reg256#3,>a=reg256#3dq +# asm 2: vcvtpd2dq <a=%ymm2,>a=%xmm2 +vcvtpd2dq %ymm2,%xmm2 + +# qhasm: mem128[rp + 48] = a +# asm 1: vmovupd <a=reg256#3dq,48(<rp=int64#1) +# asm 2: vmovupd <a=%xmm2,48(<rp=%rdi) +vmovupd %xmm2,48(%rdi) + +# qhasm: rp += 64 +# asm 1: add $64,<rp=int64#1 +# asm 2: add $64,<rp=%rdi +add $64,%rdi + +# qhasm: ap += 64 +# asm 1: add $64,<ap=int64#2 +# asm 2: add $64,<ap=%rsi +add $64,%rsi + +# qhasm: bp += 64 +# asm 1: add $64,<bp=int64#3 +# asm 2: add $64,<bp=%rdx +add $64,%rdx + +# qhasm: unsigned>? ctri -= 4 +# asm 1: sub $4,<ctri=int64#4 +# asm 2: sub $4,<ctri=%rcx +sub $4,%rcx +# comment:fp stack unchanged by jump + +# qhasm: goto loopi if unsigned> +ja ._loopi + +# qhasm: return +add %r11,%rsp +ret diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/precomp.c b/crypt/liboqs/kex_rlwe_newhope/avx2/precomp.c new file mode 100644 index 0000000000000000000000000000000000000000..7be60ec821bc859a0f031eb87db148b43879ff3d --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/precomp.c @@ -0,0 +1,14 @@ +#include "inttypes.h" +#include "ntt.h" +#include "params.h" + +int32_t omegas[PARAM_N/2] = {1,10810,7143,4043,10984,722,5736,8155,3542,8785,9744,3621,10643,1212,3195,5860,7468,2639,9664,11340,11726,9314,9283,9545,5728,7698,5023,5828,8961,6512,7311,1351,2319,11119,11334,11499,9088,3014,5086,10963,4846,9542,9154,3712,4805,8736,11227,9995,3091,12208,7969,11289,9326,7393,9238,2366,11112,8034,10654,9521,12149,10436,7678,11563,1260,4388,4632,6534,2426,334,1428,1696,2013,9000,729,3241,2881,3284,7197,10200,8595,7110,10530,8582,3382,11934,9741,8058,3637,3459,145,6747,9558,8357,7399,6378,9447,480,1022,9,9821,339,5791,544,10616,4278,6958,7300,8112,8705,1381,9764,11336,8541,827,5767,2476,118,2197,7222,3949,8993,4452,2396,7935,130,2837,6915,2401,442,7188,11222,390,773,8456,3778,354,4861,9377,5698,5012,9808,2859,11244,1017,7404,1632,7205,27,9223,8526,10849,1537,242,4714,8146,9611,3704,5019,11744,1002,5011,5088,8005,7313,10682,8509,11414,9852,3646,6022,2987,9723,10102,6250,9867,11224,2143,11885,7644,1168,5277,11082,3248,493,8193,6845,2381,7952,11854,1378,1912,2166,3915,12176,7370,12129,3149,12286,4437,3636,4938,5291,2704,10863,7635,1663,10512,3364,1689,4057,9018,9442,7875,2174,4372,7247,9984,4053,2645,5195,9509,7394,1484,9042,9603,8311,9320,9919,2865,5332,3510,1630,10163,5407,3186,11136,9405,10040,8241,9890,8889,7098,9153,9289,671,3016,243,6730,420,10111,1544,3985,4905,3531,476,49,1263,5915,1483,9789,10800,10706,6347,1512,350,10474,5383,5369,10232,9087,4493,9551,6421,6554,2655,9280,1693,174,723,10314,8532,347,2925,8974,11863,1858,4754,3030,4115,2361,10446,2908,218,3434,8760,3963,576,6142,9842,1954,10238,9407,10484,3991,8320,9522,156,2281,5876,10258,5333,3772,418,5908,11836,5429,7515,7552,1293,295,6099,5766,652,8273,4077,8527,9370,325,10885,11143,11341,5990,1159,8561,8240,3329,4298,12121,2692,5961,7183,10327,1594,6167,9734,7105,11089,1360,3956,6170,5297,8210,11231,922,441,1958,4322,1112,2078,4046,709,9139,1319,4240,8719,6224,11454,2459,683,3656,12225,10723,5782,9341,9786,9166,10542,9235,6803,7856,6370,3834,7032,7048,9369,8120,9162,6821,1010,8807,787,5057,4698,4780,8844,12097,1321,4912,10240,677,6415,6234,8953,1323,9523,12237,3174,1579,11858,9784,5906,3957,9450,151,10162,12231,12048,3532,11286,1956,7280,11404,6281,3477,6608,142,11184,9445,3438,11314,4212,9260,6695,4782,5886,8076,504,2302,11684,11868,8209,3602,6068,8689,3263,6077,7665,7822,7500,6752,4749,4449,6833,12142,8500,6118,8471,1190,9606,3860,5445,7753,11239,5079,9027,2169,11767,7965,4916,8214,5315,11011,9945,1973,6715,8775,11248,5925,11271,654,3565,1702,1987,6760,5206,3199,12233,6136,6427,6874,8646,4948,6152,400,10561,5339,5446,3710,6093,468,8301,316,11907,10256,8291,3879,1922,10930,6854,973,11035}; + + +int32_t omegas_inv[PARAM_N/2] = {1,1479,8246,5146,4134,6553,11567,1305,6429,9094,11077,1646,8668,2545,3504,8747,10938,4978,5777,3328,6461,7266,4591,6561,2744,3006,2975,563,949,2625,9650,4821,726,4611,1853,140,2768,1635,4255,1177,9923,3051,4896,2963,1000,4320,81,9198,2294,1062,3553,7484,8577,3135,2747,7443,1326,7203,9275,3201,790,955,1170,9970,5374,9452,12159,4354,9893,7837,3296,8340,5067,10092,12171,9813,6522,11462,3748,953,2525,10908,3584,4177,4989,5331,8011,1673,11745,6498,11950,2468,12280,11267,11809,2842,5911,4890,3932,2731,5542,12144,8830,8652,4231,2548,355,8907,3707,1759,5179,3694,2089,5092,9005,9408,9048,11560,3289,10276,10593,10861,11955,9863,5755,7657,7901,11029,11813,8758,7384,8304,10745,2178,11869,5559,12046,9273,11618,3000,3136,5191,3400,2399,4048,2249,2884,1153,9103,6882,2126,10659,8779,6957,9424,2370,2969,3978,2686,3247,10805,4895,2780,7094,9644,8236,2305,5042,7917,10115,4414,2847,3271,8232,10600,8925,1777,10626,4654,1426,9585,6998,7351,8653,7852,3,9140,160,4919,113,8374,10123,10377,10911,435,4337,9908,5444,4096,11796,9041,1207,7012,11121,4645,404,10146,1065,2422,6039,2187,2566,9302,6267,8643,2437,875,3780,1607,4976,4284,7201,7278,11287,545,7270,8585,2678,4143,7575,12047,10752,1440,3763,3066,12262,5084,10657,4885,11272,1045,9430,2481,7277,6591,2912,7428,11935,8511,3833,11516,11899,1067,5101,11847,9888,1254,11316,5435,1359,10367,8410,3998,2033,382,11973,3988,11821,6196,8579,6843,6950,1728,11889,6137,7341,3643,5415,5862,6153,56,9090,7083,5529,10302,10587,8724,11635,1018,6364,1041,3514,5574,10316,2344,1278,6974,4075,7373,4324,522,10120,3262,7210,1050,4536,6844,8429,2683,11099,3818,6171,3789,147,5456,7840,7540,5537,4789,4467,4624,6212,9026,3600,6221,8687,4080,421,605,9987,11785,4213,6403,7507,5594,3029,8077,975,8851,2844,1105,12147,5681,8812,6008,885,5009,10333,1003,8757,241,58,2127,12138,2839,8332,6383,2505,431,10710,9115,52,2766,10966,3336,6055,5874,11612,2049,7377,10968,192,3445,7509,7591,7232,11502,3482,11279,5468,3127,4169,2920,5241,5257,8455,5919,4433,5486,3054,1747,3123,2503,2948,6507,1566,64,8633,11606,9830,835,6065,3570,8049,10970,3150,11580,8243,10211,11177,7967,10331,11848,11367,1058,4079,6992,6119,8333,10929,1200,5184,2555,6122,10695,1962,5106,6328,9597,168,7991,8960,4049,3728,11130,6299,948,1146,1404,11964,2919,3762,8212,4016,11637,6523,6190,11994,10996,4737,4774,6860,453,6381,11871,8517,6956,2031,6413,10008,12133,2767,3969,8298,1805,2882,2051,10335,2447,6147,11713,8326,3529,8855,12071,9381,1843,9928,8174,9259,7535,10431,426,3315,9364,11942,3757,1975,11566,12115,10596,3009,9634,5735,5868,2738,7796,3202,2057,6920,6906,1815,11939,10777,5942,1583,1489,2500,10806,6374,11026,12240}; + + +int32_t psis_bitrev[PARAM_N] = {1,10810,7143,4043,10984,722,5736,8155,3542,8785,9744,3621,10643,1212,3195,5860,7468,2639,9664,11340,11726,9314,9283,9545,5728,7698,5023,5828,8961,6512,7311,1351,2319,11119,11334,11499,9088,3014,5086,10963,4846,9542,9154,3712,4805,8736,11227,9995,3091,12208,7969,11289,9326,7393,9238,2366,11112,8034,10654,9521,12149,10436,7678,11563,1260,4388,4632,6534,2426,334,1428,1696,2013,9000,729,3241,2881,3284,7197,10200,8595,7110,10530,8582,3382,11934,9741,8058,3637,3459,145,6747,9558,8357,7399,6378,9447,480,1022,9,9821,339,5791,544,10616,4278,6958,7300,8112,8705,1381,9764,11336,8541,827,5767,2476,118,2197,7222,3949,8993,4452,2396,7935,130,2837,6915,2401,442,7188,11222,390,773,8456,3778,354,4861,9377,5698,5012,9808,2859,11244,1017,7404,1632,7205,27,9223,8526,10849,1537,242,4714,8146,9611,3704,5019,11744,1002,5011,5088,8005,7313,10682,8509,11414,9852,3646,6022,2987,9723,10102,6250,9867,11224,2143,11885,7644,1168,5277,11082,3248,493,8193,6845,2381,7952,11854,1378,1912,2166,3915,12176,7370,12129,3149,12286,4437,3636,4938,5291,2704,10863,7635,1663,10512,3364,1689,4057,9018,9442,7875,2174,4372,7247,9984,4053,2645,5195,9509,7394,1484,9042,9603,8311,9320,9919,2865,5332,3510,1630,10163,5407,3186,11136,9405,10040,8241,9890,8889,7098,9153,9289,671,3016,243,6730,420,10111,1544,3985,4905,3531,476,49,1263,5915,1483,9789,10800,10706,6347,1512,350,10474,5383,5369,10232,9087,4493,9551,6421,6554,2655,9280,1693,174,723,10314,8532,347,2925,8974,11863,1858,4754,3030,4115,2361,10446,2908,218,3434,8760,3963,576,6142,9842,1954,10238,9407,10484,3991,8320,9522,156,2281,5876,10258,5333,3772,418,5908,11836,5429,7515,7552,1293,295,6099,5766,652,8273,4077,8527,9370,325,10885,11143,11341,5990,1159,8561,8240,3329,4298,12121,2692,5961,7183,10327,1594,6167,9734,7105,11089,1360,3956,6170,5297,8210,11231,922,441,1958,4322,1112,2078,4046,709,9139,1319,4240,8719,6224,11454,2459,683,3656,12225,10723,5782,9341,9786,9166,10542,9235,6803,7856,6370,3834,7032,7048,9369,8120,9162,6821,1010,8807,787,5057,4698,4780,8844,12097,1321,4912,10240,677,6415,6234,8953,1323,9523,12237,3174,1579,11858,9784,5906,3957,9450,151,10162,12231,12048,3532,11286,1956,7280,11404,6281,3477,6608,142,11184,9445,3438,11314,4212,9260,6695,4782,5886,8076,504,2302,11684,11868,8209,3602,6068,8689,3263,6077,7665,7822,7500,6752,4749,4449,6833,12142,8500,6118,8471,1190,9606,3860,5445,7753,11239,5079,9027,2169,11767,7965,4916,8214,5315,11011,9945,1973,6715,8775,11248,5925,11271,654,3565,1702,1987,6760,5206,3199,12233,6136,6427,6874,8646,4948,6152,400,10561,5339,5446,3710,6093,468,8301,316,11907,10256,8291,3879,1922,10930,6854,973,11035,7,1936,845,3723,3154,5054,3285,7929,216,50,6763,769,767,8484,10076,4153,3120,6184,6203,5646,8348,3753,3536,5370,3229,4730,10583,3929,1282,8717,2021,9457,3944,4099,5604,6759,2171,8809,11024,3007,9344,5349,2633,1406,9057,11996,4855,8520,9348,11722,6627,5289,3837,2595,3221,4273,4050,7082,844,5202,11309,11607,4590,7207,8820,6138,7846,8871,4693,2338,9996,11872,1802,1555,5103,10398,7878,10699,1223,9955,11009,614,12265,10918,11385,9804,6742,7250,881,11924,1015,10362,5461,9343,2637,7779,4684,3360,7154,63,7302,2373,3670,3808,578,5368,11839,1944,7628,11779,9667,6903,5618,10631,5789,3502,5043,826,3090,1398,3065,1506,6586,4483,6389,910,7570,11538,4518,3094,1160,4820,2730,5411,10036,1868,2478,9449,4194,3019,10506,7211,7724,4974,7119,2672,11424,1279,189,3116,10526,2209,10759,1694,8420,7866,5832,1350,10555,8474,7014,10499,11038,6879,2035,1040,10407,6164,7519,944,5287,8620,6616,9269,6883,7624,4834,2712,9461,4352,8176,72,3840,10447,3451,8195,11048,4378,6508,9244,9646,1095,2873,2827,11498,2434,11169,9754,12268,6481,874,9988,170,6639,2307,4289,11641,12139,11259,11823,3821,1681,4649,5969,2929,6026,1573,8443,3793,6226,11787,5118,2602,10388,1849,5776,9021,3795,7988,7766,457,12281,11410,9696,982,10013,4218,4390,8835,8531,7785,778,530,2626,3578,4697,8823,1701,10243,2940,9332,10808,3317,9757,139,3332,343,8841,4538,10381,7078,1866,1208,7562,10584,2450,11873,814,716,10179,2164,6873,5412,8080,9011,6296,3515,11851,1218,5061,10753,10568,2429,8186,1373,9307,717,8700,8921,4227,4238,11677,8067,1526,11749,12164,3163,4032,6127,7449,1389,10221,4404,11943,3359,9084,5209,1092,3678,4265,10361,464,1826,2926,4489,9118,1136,3449,3708,9051,2065,5826,3495,4564,8755,3961,10533,4145,2275,2461,4267,5653,5063,8113,10771,8524,11014,5508,11113,6555,4860,1125,10844,11158,6302,6693,579,3889,9520,3114,6323,212,8314,4883,6454,3087,1417,5676,7784,2257,3744,4963,2528,9233,5102,11877,6701,6444,4924,4781,1014,11841,1327,3607,3942,7057,2717,60,3200,10754,5836,7723,2260,68,180,4138,7684,2689,10880,7070,204,5509,10821,8308,8882,463,10945,9247,9806,10235,4739,8038,6771,1226,9261,5216,11925,9929,11053,9272,7043,4475,3121,4705,1057,9689,11883,10602,146,5268,1403,1804,6094,7100,12050,9389,994,4554,4670,11777,5464,4906,3375,9998,8896,4335,7376,3528,3825,8054,9342,8307,636,5609,11667,10552,5672,4499,5598,3344,10397,8665,6565,10964,11260,10344,5959,10141,8330,5797,2442,1248,5115,4939,10975,1744,2894,8635,6599,9834,8342,338,3343,8170,1522,10138,12269,5002,4608,5163,4578,377,11914,1620,10453,11864,10104,11897,6085,8122,11251,11366,10058,6197,2800,193,506,1255,1392,5784,3276,8951,2212,9615,10347,8881,2575,1165,2776,11111,6811,3511}; +int32_t psis_inv[PARAM_N] = {12277,5265,9530,3117,5712,816,10650,3277,9246,4832,5957,851,10655,10300,3227,461,3577,511,73,1766,5519,2544,2119,7325,2802,5667,11343,3376,5749,6088,7892,2883,3923,2316,3842,4060,580,3594,2269,9102,6567,9716,1388,5465,7803,8137,2918,3928,9339,10112,11978,10489,3254,3976,568,8859,11799,12219,12279,10532,12038,8742,4760,680,8875,4779,7705,8123,2916,10950,6831,4487,641,10625,5029,2474,2109,5568,2551,2120,3814,4056,2335,10867,3308,11006,6839,977,10673,8547,1221,1930,7298,11576,8676,2995,3939,7585,11617,12193,5253,2506,358,8829,6528,11466,1638,234,1789,10789,6808,11506,8666,1238,3688,4038,4088,584,1839,7285,8063,4663,9444,10127,8469,4721,2430,9125,11837,1691,10775,6806,6239,6158,7902,4640,4174,5863,11371,3380,3994,11104,6853,979,3651,11055,6846,978,7162,9801,10178,1454,7230,4544,9427,8369,11729,12209,10522,10281,8491,1213,5440,9555,1365,195,3539,11039,1577,5492,11318,5128,11266,3365,7503,4583,7677,8119,4671,5934,7870,6391,913,1886,2025,5556,7816,11650,6931,9768,3151,9228,6585,7963,11671,6934,11524,6913,11521,5157,7759,2864,9187,3068,5705,815,1872,2023,289,5308,6025,7883,9904,4926,7726,8126,4672,2423,9124,3059,437,1818,7282,6307,901,7151,11555,8673,1239,177,5292,756,108,1771,253,8814,10037,4945,2462,7374,2809,5668,7832,4630,2417,5612,7824,8140,4674,7690,11632,8684,11774,1682,5507,7809,11649,10442,8514,6483,9704,6653,2706,10920,1560,3734,2289,327,7069,4521,4157,4105,2342,10868,12086,12260,3507,501,10605,1515,1972,7304,2799,3911,7581,1083,7177,6292,4410,630,90,3524,2259,7345,6316,6169,6148,6145,4389,627,10623,12051,12255,8773,6520,2687,3895,2312,5597,11333,1619,5498,2541,363,3563,509,7095,11547,12183,3496,2255,9100,1300,7208,8052,6417,7939,9912,1416,5469,6048,864,1879,2024,9067,6562,2693,7407,9836,10183,8477,1211,173,7047,8029,1147,3675,525,75,7033,8027,8169,1167,7189,1027,7169,9802,6667,2708,3898,4068,9359,1337,191,5294,6023,2616,7396,11590,8678,8262,6447,921,10665,12057,3478,4008,11106,12120,3487,9276,10103,6710,11492,8664,8260,1180,10702,5040,720,3614,5783,9604,1372,196,28,4,10534,5016,11250,10385,12017,8739,3004,9207,6582,6207,7909,4641,663,7117,8039,2904,3926,4072,7604,6353,11441,3390,5751,11355,10400,8508,2971,2180,2067,5562,11328,6885,11517,6912,2743,3903,11091,3340,9255,10100,4954,7730,6371,9688,1384,7220,2787,9176,4822,4200,600,7108,2771,3907,9336,8356,8216,8196,4682,4180,9375,6606,7966,1138,10696,1528,5485,11317,8639,10012,6697,7979,4651,2420,7368,11586,10433,3246,7486,2825,10937,3318,474,7090,4524,5913,7867,4635,9440,11882,3453,5760,4334,9397,3098,10976,1568,224,32,10538,3261,3977,9346,10113,8467,11743,12211,3500,500,1827,261,5304,7780,2867,10943,6830,7998,11676,1668,5505,2542,9141,4817,9466,6619,11479,5151,4247,7629,4601,5924,6113,6140,9655,6646,2705,2142,306,7066,2765,395,1812,3770,11072,8604,10007,11963,1709,9022,4800,7708,9879,6678,954,5403,4283,4123,589,8862,1266,3692,2283,9104,11834,12224,7013,4513,7667,6362,4420,2387,341,7071,9788,6665,9730,1390,10732,10311,1473,1966,3792,7564,11614,10437,1491,213,1786,9033,3046,9213,10094,1442,206,1785,255,1792,256,10570,1510,7238,1034,7170,6291,7921,11665,3422,4000,2327,2088,5565,795,10647,1521,5484,2539,7385,1055,7173,8047,11683,1669,1994,3796,5809,4341,9398,11876,12230,10525,12037,12253,3506,4012,9351,4847,2448,7372,9831,3160,2207,5582,2553,7387,6322,9681,1383,10731,1533,219,5298,4268,7632,6357,9686,8406,4712,9451,10128,4958,5975,11387,8649,11769,6948,11526,12180,1740,10782,6807,2728,7412,4570,4164,4106,11120,12122,8754,11784,3439,5758,11356,6889,9762,11928,1704,1999,10819,12079,12259,7018,11536,1648,1991,2040,2047,2048,10826,12080,8748,8272,8204,1172,1923,7297,2798,7422,6327,4415,7653,6360,11442,12168,7005,8023,9924,8440,8228,2931,7441,1063,3663,5790,9605,10150,1450,8985,11817,10466,10273,12001,3470,7518,1074,1909,7295,9820,4914,702,5367,7789,8135,9940,1420,3714,11064,12114,12264,1752,5517,9566,11900,1700,3754,5803,829,1874,7290,2797,10933,5073,7747,8129,6428,6185,11417,1631,233,5300,9535,10140,11982,8734,8270,2937,10953,8587,8249,2934,9197,4825,5956,4362,9401,1343,3703,529,10609,12049,6988,6265,895,3639,4031,4087,4095,585,10617,8539,4731,4187,9376,3095,9220,10095,10220,1460,10742,12068,1724,5513,11321,6884,2739,5658,6075,4379,11159,10372,8504,4726,9453,3106,7466,11600,10435,8513,9994,8450,9985,3182,10988,8592,2983,9204,4826,2445,5616,6069,867,3635,5786,11360,5134,2489,10889,12089,1727,7269,2794,9177,1311,5454,9557,6632,2703,9164,10087,1441,3717,531,3587,2268,324,5313,759,1864,5533,2546,7386,9833,8427,4715,11207,1601,7251,4547,11183,12131,1733,10781,10318,1474,10744,5046,4232,11138,10369,6748,964,7160,4534,7670,8118,8182,4680,11202,6867,981,8918,1274,182,26,7026,8026,11680,12202,10521,1503,7237,4545,5916,9623,8397,11733,10454,3249,9242,6587,941,1890,270,10572,6777,9746,6659,6218,6155,6146,878,1881,7291,11575,12187,1741,7271,8061,11685,6936,4502,9421,4857,4205,7623,1089,10689,1527,8996,10063,11971,10488,6765,2722,3900,9335,11867,6962,11528,5158,4248,4118,5855,2592,5637,6072,2623,7397,8079,9932,4930,5971,853,3633,519,8852,11798,3441,11025,1575,225,8810,11792,12218,3501,9278,3081,9218,4828,7712,8124,11694,12204,3499,4011,573,3593,5780,7848,9899,10192,1456,208,7052,2763,7417,11593,10434,12024,8740,11782,10461,3250,5731,7841,9898,1414,202,3540,7528,2831,2160,10842,5060,4234,4116,588,84,}; +int32_t omegas_montgomery[PARAM_N/2]={4091,7888,11060,11208,6960,4342,6275,9759,1591,6399,9477,5266,586,5825,7538,9710,1134,6407,1711,965,7099,7674,3743,6442,10414,8100,1885,1688,1364,10329,10164,9180,12210,6240,997,117,4783,4407,1549,7072,2829,6458,4431,8877,7144,2564,5664,4042,12189,432,10751,1237,7610,1534,3983,7863,2181,6308,8720,6570,4843,1690,14,3872,5569,9368,12163,2019,7543,2315,4673,7340,1553,1156,8401,11389,1020,2967,10772,7045,3316,11236,5285,11578,10637,10086,9493,6180,9277,6130,3323,883,10469,489,1502,2851,11061,9729,2742,12241,4970,10481,10078,1195,730,1762,3854,2030,5892,10922,9020,5274,9179,3604,3782,10206,3180,3467,4668,2446,7613,9386,834,7703,6836,3403,5351,12276,3580,1739,10820,9787,10209,4070,12250,8525,10401,2749,7338,10574,6040,943,9330,1477,6865,9668,3585,6633,12145,4063,3684,7680,8188,6902,3533,9807,6090,727,10099,7003,6945,1949,9731,10559,6057,378,7871,8763,8901,9229,8846,4551,9589,11664,7630,8821,5680,4956,6251,8388,10156,8723,2341,3159,1467,5460,8553,7783,2649,2320,9036,6188,737,3698,4699,5753,9046,3687,16,914,5186,10531,4552,1964,3509,8436,7516,5381,10733,3281,7037,1060,2895,7156,8887,5357,6409,8197,2962,6375,5064,6634,5625,278,932,10229,8927,7642,351,9298,237,5858,7692,3146,12126,7586,2053,11285,3802,5204,4602,1748,11300,340,3711,4614,300,10993,5070,10049,11616,12247,7421,10707,5746,5654,3835,5553,1224,8476,9237,3845,250,11209,4225,6326,9680,12254,4136,2778,692,8808,6410,6718,10105,10418,3759,7356,11361,8433,6437,3652,6342,8978,5391,2272,6476,7416,8418,10824,11986,5733,876,7030,2167,2436,3442,9217,8206,4858,5964,2746,7178,1434,7389,8879,10661,11457,4220,1432,10832,4328,8557,1867,9454,2416,3816,9076,686,5393,2523,4339,6115,619,937,2834,7775,3279,2363,7488,6112,5056,824,10204,11690,1113,2727,9848,896,2028,5075,2654,10464,7884,12169,5434,3070,6400,9132,11672,12153,4520,1273,9739,11468,9937,10039,9720,2262,9399,11192,315,4511,1158,6061,6751,11865,357,7367,4550,983,8534,8352,10126,7530,9253,4367,5221,3999,8777,3161,6990,4130,11652,3374,11477,1753,292,8681,2806,10378,12188,5800,11811,3181,1988,1024,9340,2477,10928,4582,6750,3619,5503,5233,2463,8470,7650,7964,6395,1071,1272,3474,11045,3291,11344,8502,9478,9837,1253,1857,6233,4720,11561,6034,9817,3339,1797,2879,6242,5200,2114,7962,9353,11363,5475,6084,9601,4108,7323,10438,9471,1271,408,6911,3079,360,8276,11535,9156,9049,11539,850,8617,784,7919,8334,12170,1846,10213,12184,7827,11903,5600,9779,1012,721,2784,6676,6552,5348,4424,6816,8405,9959,5150,2356,5552,5267,1333,8801,9661,7308,5788,4910,909,11613,4395,8238,6686,4302,3044,2285,12249,1963,9216,4296,11918,695,4371,9793,4884,2411,10230,2650,841,3890,10231,7248,8505,11196,6688}; +int32_t omegas_inv_montgomery[PARAM_N/2]= {4091,4401,1081,1229,2530,6014,7947,5329,2579,4751,6464,11703,7023,2812,5890,10698,3109,2125,1960,10925,10601,10404,4189,1875,5847,8546,4615,5190,11324,10578,5882,11155,8417,12275,10599,7446,5719,3569,5981,10108,4426,8306,10755,4679,11052,1538,11857,100,8247,6625,9725,5145,3412,7858,5831,9460,5217,10740,7882,7506,12172,11292,6049,79,13,6938,8886,5453,4586,11455,2903,4676,9843,7621,8822,9109,2083,8507,8685,3110,7015,3269,1367,6397,10259,8435,10527,11559,11094,2211,1808,7319,48,9547,2560,1228,9438,10787,11800,1820,11406,8966,6159,3012,6109,2796,2203,1652,711,7004,1053,8973,5244,1517,9322,11269,900,3888,11133,10736,4949,7616,9974,4746,10270,126,2921,6720,6635,6543,1582,4868,42,673,2240,7219,1296,11989,7675,8578,11949,989,10541,7687,7085,8487,1004,10236,4703,163,9143,4597,6431,12052,2991,11938,4647,3362,2060,11357,12011,6664,5655,7225,5914,9327,4092,5880,6932,3402,5133,9394,11229,5252,9008,1556,6908,4773,3853,8780,10325,7737,1758,7103,11375,12273,8602,3243,6536,7590,8591,11552,6101,3253,9969,9640,4506,3736,6829,10822,9130,9948,3566,2133,3901,6038,7333,6609,3468,4659,625,2700,7738,3443,3060,3388,3526,4418,11911,6232,1730,2558,10340,5344,5286,2190,11562,6199,2482,8756,5387,4101,4609,8605,8226,144,5656,8704,2621,5424,10812,2959,11346,6249,1715,4951,9540,1888,3764,39,8219,2080,2502,1469,10550,8709,5601,1093,3784,5041,2058,8399,11448,9639,2059,9878,7405,2496,7918,11594,371,7993,3073,10326,40,10004,9245,7987,5603,4051,7894,676,11380,7379,6501,4981,2628,3488,10956,7022,6737,9933,7139,2330,3884,5473,7865,6941,5737,5613,9505,11568,11277,2510,6689,386,4462,105,2076,10443,119,3955,4370,11505,3672,11439,750,3240,3133,754,4013,11929,9210,5378,11881,11018,2818,1851,4966,8181,2688,6205,6814,926,2936,4327,10175,7089,6047,9410,10492,8950,2472,6255,728,7569,6056,10432,11036,2452,2811,3787,945,8998,1244,8815,11017,11218,5894,4325,4639,3819,9826,7056,6786,8670,5539,7707,1361,9812,2949,11265,10301,9108,478,6489,101,1911,9483,3608,11997,10536,812,8915,637,8159,5299,9128,3512,8290,7068,7922,3036,4759,2163,3937,3755,11306,7739,4922,11932,424,5538,6228,11131,7778,11974,1097,2890,10027,2569,2250,2352,821,2550,11016,7769,136,617,3157,5889,9219,6855,120,4405,1825,9635,7214,10261,11393,2441,9562,11176,599,2085,11465,7233,6177,4801,9926,9010,4514,9455,11352,11670,6174,7950,9766,6896,11603,3213,8473,9873,2835,10422,3732,7961,1457,10857,8069,832,1628,3410,4900,10855,5111,9543,6325,7431,4083,3072,8847,9853,10122,5259,11413,6556,303,1465,3871,4873,5813,10017,6898,3311,5947,8637,5852,3856,928,4933,8530,1871,2184,5571,5879,3481,11597,9511,8153,35,2609,5963,8064,1080,12039,8444,3052,3813,11065,6736,8454}; diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/randombytes.c b/crypt/liboqs/kex_rlwe_newhope/avx2/randombytes.c new file mode 100644 index 0000000000000000000000000000000000000000..dba8ecadb2d47c0f6453ffc2da183072331fe4b5 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/randombytes.c @@ -0,0 +1,35 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include "randombytes.h" + +/* it's really stupid that there isn't a syscall for this */ + +static int fd = -1; + +void randombytes(unsigned char *x,unsigned long long xlen) +{ + int i; + + if (fd == -1) { + for (;;) { + fd = open("/dev/urandom",O_RDONLY); + if (fd != -1) break; + sleep(1); + } + } + + while (xlen > 0) { + if (xlen < 1048576) i = xlen; else i = 1048576; + + i = read(fd,x,i); + if (i < 1) { + sleep(1); + continue; + } + + x += i; + xlen -= i; + } +} diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/randombytes.h b/crypt/liboqs/kex_rlwe_newhope/avx2/randombytes.h new file mode 100644 index 0000000000000000000000000000000000000000..7f2c4c0bd73d4aa79ecb6d02143cac0350189f5a --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/randombytes.h @@ -0,0 +1,6 @@ +#ifndef RANDOMBYTES_H +#define RANDOMBYTES_H + +void randombytes(unsigned char *x,unsigned long long xlen); + +#endif diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/rec.s b/crypt/liboqs/kex_rlwe_newhope/avx2/rec.s new file mode 100644 index 0000000000000000000000000000000000000000..d1cf925132dd5fb16d28e207695e59120e4341d3 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/rec.s @@ -0,0 +1,752 @@ + +# qhasm: int64 input_0 + +# qhasm: int64 input_1 + +# qhasm: int64 input_2 + +# qhasm: int64 input_3 + +# qhasm: int64 input_4 + +# qhasm: int64 input_5 + +# qhasm: stack64 input_6 + +# qhasm: stack64 input_7 + +# qhasm: int64 caller_r11 + +# qhasm: int64 caller_r12 + +# qhasm: int64 caller_r13 + +# qhasm: int64 caller_r14 + +# qhasm: int64 caller_r15 + +# qhasm: int64 caller_rbx + +# qhasm: int64 caller_rbp + +# qhasm: reg256 tmp0 + +# qhasm: reg256 tmp1 + +# qhasm: reg256 tmp2 + +# qhasm: reg256 tmp3 + +# qhasm: reg256 c0 + +# qhasm: reg256 c1 + +# qhasm: reg256 c2 + +# qhasm: reg256 c3 + +# qhasm: reg256 b + +# qhasm: reg256 t + +# qhasm: reg256 d + +# qhasm: reg256 c + +# qhasm: reg256 qx8 + +# qhasm: reg256 _1x8 + +# qhasm: reg256 k + +# qhasm: stack256 pg + +# qhasm: int64 pgp + +# qhasm: int64 byte + +# qhasm: int64 key + +# qhasm: int64 ctr + +# qhasm: enter rec +.p2align 5 +.global _rec +.global rec +_rec: +rec: +mov %rsp,%r11 +and $31,%r11 +add $32,%r11 +sub %r11,%rsp + +# qhasm: ctr = 0 +# asm 1: mov $0,>ctr=int64#4 +# asm 2: mov $0,>ctr=%rcx +mov $0,%rcx + +# qhasm: _1x8 = mem256[v1x8] +# asm 1: vmovdqu v1x8,>_1x8=reg256#1 +# asm 2: vmovdqu v1x8,>_1x8=%ymm0 +vmovdqu v1x8,%ymm0 + +# qhasm: qx8 = mem256[q8x] +# asm 1: vmovdqu q8x,>qx8=reg256#2 +# asm 2: vmovdqu q8x,>qx8=%ymm1 +vmovdqu q8x,%ymm1 + +# qhasm: pgp = &pg +# asm 1: leaq <pg=stack256#1,>pgp=int64#5 +# asm 2: leaq <pg=0(%rsp),>pgp=%r8 +leaq 0(%rsp),%r8 + +# qhasm: looptop: +._looptop: + +# qhasm: ctr <<= 5 +# asm 1: shl $5,<ctr=int64#4 +# asm 2: shl $5,<ctr=%rcx +shl $5,%rcx + +# qhasm: c0 = mem256[input_2 + ctr + 0] +# asm 1: vmovupd 0(<input_2=int64#3,<ctr=int64#4),>c0=reg256#3 +# asm 2: vmovupd 0(<input_2=%rdx,<ctr=%rcx),>c0=%ymm2 +vmovupd 0(%rdx,%rcx),%ymm2 + +# qhasm: 8x c0 <<= 1 +# asm 1: vpslld $1,<c0=reg256#3,>c0=reg256#3 +# asm 2: vpslld $1,<c0=%ymm2,>c0=%ymm2 +vpslld $1,%ymm2,%ymm2 + +# qhasm: c1 = mem256[input_2 + ctr + 1024] +# asm 1: vmovupd 1024(<input_2=int64#3,<ctr=int64#4),>c1=reg256#4 +# asm 2: vmovupd 1024(<input_2=%rdx,<ctr=%rcx),>c1=%ymm3 +vmovupd 1024(%rdx,%rcx),%ymm3 + +# qhasm: 8x c1 <<= 1 +# asm 1: vpslld $1,<c1=reg256#4,>c1=reg256#4 +# asm 2: vpslld $1,<c1=%ymm3,>c1=%ymm3 +vpslld $1,%ymm3,%ymm3 + +# qhasm: c2 = mem256[input_2 + ctr + 2048] +# asm 1: vmovupd 2048(<input_2=int64#3,<ctr=int64#4),>c2=reg256#5 +# asm 2: vmovupd 2048(<input_2=%rdx,<ctr=%rcx),>c2=%ymm4 +vmovupd 2048(%rdx,%rcx),%ymm4 + +# qhasm: 8x c2 <<= 1 +# asm 1: vpslld $1,<c2=reg256#5,>c2=reg256#5 +# asm 2: vpslld $1,<c2=%ymm4,>c2=%ymm4 +vpslld $1,%ymm4,%ymm4 + +# qhasm: c3 = mem256[input_2 + ctr + 3072] +# asm 1: vmovupd 3072(<input_2=int64#3,<ctr=int64#4),>c3=reg256#6 +# asm 2: vmovupd 3072(<input_2=%rdx,<ctr=%rcx),>c3=%ymm5 +vmovupd 3072(%rdx,%rcx),%ymm5 + +# qhasm: 8x c0 += c3 +# asm 1: vpaddd <c3=reg256#6,<c0=reg256#3,>c0=reg256#3 +# asm 2: vpaddd <c3=%ymm5,<c0=%ymm2,>c0=%ymm2 +vpaddd %ymm5,%ymm2,%ymm2 + +# qhasm: 8x c1 += c3 +# asm 1: vpaddd <c3=reg256#6,<c1=reg256#4,>c1=reg256#4 +# asm 2: vpaddd <c3=%ymm5,<c1=%ymm3,>c1=%ymm3 +vpaddd %ymm5,%ymm3,%ymm3 + +# qhasm: 8x c2 += c3 +# asm 1: vpaddd <c3=reg256#6,<c2=reg256#5,>c2=reg256#5 +# asm 2: vpaddd <c3=%ymm5,<c2=%ymm4,>c2=%ymm4 +vpaddd %ymm5,%ymm4,%ymm4 + +# qhasm: 8x c0 *= qx8 +# asm 1: vpmulld <qx8=reg256#2,<c0=reg256#3,>c0=reg256#3 +# asm 2: vpmulld <qx8=%ymm1,<c0=%ymm2,>c0=%ymm2 +vpmulld %ymm1,%ymm2,%ymm2 + +# qhasm: 8x c1 *= qx8 +# asm 1: vpmulld <qx8=reg256#2,<c1=reg256#4,>c1=reg256#4 +# asm 2: vpmulld <qx8=%ymm1,<c1=%ymm3,>c1=%ymm3 +vpmulld %ymm1,%ymm3,%ymm3 + +# qhasm: 8x c2 *= qx8 +# asm 1: vpmulld <qx8=reg256#2,<c2=reg256#5,>c2=reg256#5 +# asm 2: vpmulld <qx8=%ymm1,<c2=%ymm4,>c2=%ymm4 +vpmulld %ymm1,%ymm4,%ymm4 + +# qhasm: 8x c3 *= qx8 +# asm 1: vpmulld <qx8=reg256#2,<c3=reg256#6,>c3=reg256#6 +# asm 2: vpmulld <qx8=%ymm1,<c3=%ymm5,>c3=%ymm5 +vpmulld %ymm1,%ymm5,%ymm5 + +# qhasm: tmp0 = mem256[input_1 + ctr + 0] +# asm 1: vmovupd 0(<input_1=int64#2,<ctr=int64#4),>tmp0=reg256#7 +# asm 2: vmovupd 0(<input_1=%rsi,<ctr=%rcx),>tmp0=%ymm6 +vmovupd 0(%rsi,%rcx),%ymm6 + +# qhasm: tmp1 = mem256[input_1 + ctr + 1024] +# asm 1: vmovupd 1024(<input_1=int64#2,<ctr=int64#4),>tmp1=reg256#8 +# asm 2: vmovupd 1024(<input_1=%rsi,<ctr=%rcx),>tmp1=%ymm7 +vmovupd 1024(%rsi,%rcx),%ymm7 + +# qhasm: tmp2 = mem256[input_1 + ctr + 2048] +# asm 1: vmovupd 2048(<input_1=int64#2,<ctr=int64#4),>tmp2=reg256#9 +# asm 2: vmovupd 2048(<input_1=%rsi,<ctr=%rcx),>tmp2=%ymm8 +vmovupd 2048(%rsi,%rcx),%ymm8 + +# qhasm: tmp3 = mem256[input_1 + ctr + 3072] +# asm 1: vmovupd 3072(<input_1=int64#2,<ctr=int64#4),>tmp3=reg256#10 +# asm 2: vmovupd 3072(<input_1=%rsi,<ctr=%rcx),>tmp3=%ymm9 +vmovupd 3072(%rsi,%rcx),%ymm9 + +# qhasm: (uint64) ctr >>= 5 +# asm 1: shr $5,<ctr=int64#4 +# asm 2: shr $5,<ctr=%rcx +shr $5,%rcx + +# qhasm: 8x tmp0 <<= 3 +# asm 1: vpslld $3,<tmp0=reg256#7,>tmp0=reg256#7 +# asm 2: vpslld $3,<tmp0=%ymm6,>tmp0=%ymm6 +vpslld $3,%ymm6,%ymm6 + +# qhasm: 8x tmp1 <<= 3 +# asm 1: vpslld $3,<tmp1=reg256#8,>tmp1=reg256#8 +# asm 2: vpslld $3,<tmp1=%ymm7,>tmp1=%ymm7 +vpslld $3,%ymm7,%ymm7 + +# qhasm: 8x tmp2 <<= 3 +# asm 1: vpslld $3,<tmp2=reg256#9,>tmp2=reg256#9 +# asm 2: vpslld $3,<tmp2=%ymm8,>tmp2=%ymm8 +vpslld $3,%ymm8,%ymm8 + +# qhasm: 8x tmp3 <<= 3 +# asm 1: vpslld $3,<tmp3=reg256#10,>tmp3=reg256#10 +# asm 2: vpslld $3,<tmp3=%ymm9,>tmp3=%ymm9 +vpslld $3,%ymm9,%ymm9 + +# qhasm: 8x qx8 <<= 4 +# asm 1: vpslld $4,<qx8=reg256#2,>qx8=reg256#2 +# asm 2: vpslld $4,<qx8=%ymm1,>qx8=%ymm1 +vpslld $4,%ymm1,%ymm1 + +# qhasm: 8x tmp0 += qx8 +# asm 1: vpaddd <qx8=reg256#2,<tmp0=reg256#7,>tmp0=reg256#7 +# asm 2: vpaddd <qx8=%ymm1,<tmp0=%ymm6,>tmp0=%ymm6 +vpaddd %ymm1,%ymm6,%ymm6 + +# qhasm: 8x tmp1 += qx8 +# asm 1: vpaddd <qx8=reg256#2,<tmp1=reg256#8,>tmp1=reg256#8 +# asm 2: vpaddd <qx8=%ymm1,<tmp1=%ymm7,>tmp1=%ymm7 +vpaddd %ymm1,%ymm7,%ymm7 + +# qhasm: 8x tmp2 += qx8 +# asm 1: vpaddd <qx8=reg256#2,<tmp2=reg256#9,>tmp2=reg256#9 +# asm 2: vpaddd <qx8=%ymm1,<tmp2=%ymm8,>tmp2=%ymm8 +vpaddd %ymm1,%ymm8,%ymm8 + +# qhasm: 8x tmp3 += qx8 +# asm 1: vpaddd <qx8=reg256#2,<tmp3=reg256#10,>tmp3=reg256#10 +# asm 2: vpaddd <qx8=%ymm1,<tmp3=%ymm9,>tmp3=%ymm9 +vpaddd %ymm1,%ymm9,%ymm9 + +# qhasm: 8x qx8 >>= 2 +# asm 1: vpsrad $2,<qx8=reg256#2,>qx8=reg256#2 +# asm 2: vpsrad $2,<qx8=%ymm1,>qx8=%ymm1 +vpsrad $2,%ymm1,%ymm1 + +# qhasm: 8x tmp0 -= c0 +# asm 1: vpsubd <c0=reg256#3,<tmp0=reg256#7,>tmp0=reg256#3 +# asm 2: vpsubd <c0=%ymm2,<tmp0=%ymm6,>tmp0=%ymm2 +vpsubd %ymm2,%ymm6,%ymm2 + +# qhasm: 8x tmp1 -= c1 +# asm 1: vpsubd <c1=reg256#4,<tmp1=reg256#8,>tmp1=reg256#4 +# asm 2: vpsubd <c1=%ymm3,<tmp1=%ymm7,>tmp1=%ymm3 +vpsubd %ymm3,%ymm7,%ymm3 + +# qhasm: 8x tmp2 -= c2 +# asm 1: vpsubd <c2=reg256#5,<tmp2=reg256#9,>tmp2=reg256#5 +# asm 2: vpsubd <c2=%ymm4,<tmp2=%ymm8,>tmp2=%ymm4 +vpsubd %ymm4,%ymm8,%ymm4 + +# qhasm: 8x tmp3 -= c3 +# asm 1: vpsubd <c3=reg256#6,<tmp3=reg256#10,>tmp3=reg256#6 +# asm 2: vpsubd <c3=%ymm5,<tmp3=%ymm9,>tmp3=%ymm5 +vpsubd %ymm5,%ymm9,%ymm5 + +# qhasm: 8x b = tmp0 * mem256[v2730x8] +# asm 1: vpmulld v2730x8,<tmp0=reg256#3,>b=reg256#7 +# asm 2: vpmulld v2730x8,<tmp0=%ymm2,>b=%ymm6 +vpmulld v2730x8,%ymm2,%ymm6 + +# qhasm: 8x t = b >> 27 +# asm 1: vpsrad $27,<b=reg256#7,>t=reg256#7 +# asm 2: vpsrad $27,<b=%ymm6,>t=%ymm6 +vpsrad $27,%ymm6,%ymm6 + +# qhasm: 8x d = t * qx8 +# asm 1: vpmulld <t=reg256#7,<qx8=reg256#2,>d=reg256#8 +# asm 2: vpmulld <t=%ymm6,<qx8=%ymm1,>d=%ymm7 +vpmulld %ymm6,%ymm1,%ymm7 + +# qhasm: 8x b = tmp0 - d +# asm 1: vpsubd <d=reg256#8,<tmp0=reg256#3,>b=reg256#8 +# asm 2: vpsubd <d=%ymm7,<tmp0=%ymm2,>b=%ymm7 +vpsubd %ymm7,%ymm2,%ymm7 + +# qhasm: 8x b += _1x8 +# asm 1: vpaddd <_1x8=reg256#1,<b=reg256#8,>b=reg256#8 +# asm 2: vpaddd <_1x8=%ymm0,<b=%ymm7,>b=%ymm7 +vpaddd %ymm0,%ymm7,%ymm7 + +# qhasm: 8x b = qx8 - b +# asm 1: vpsubd <b=reg256#8,<qx8=reg256#2,>b=reg256#8 +# asm 2: vpsubd <b=%ymm7,<qx8=%ymm1,>b=%ymm7 +vpsubd %ymm7,%ymm1,%ymm7 + +# qhasm: 8x b >>= 31 +# asm 1: vpsrad $31,<b=reg256#8,>b=reg256#8 +# asm 2: vpsrad $31,<b=%ymm7,>b=%ymm7 +vpsrad $31,%ymm7,%ymm7 + +# qhasm: 8x t -= b +# asm 1: vpsubd <b=reg256#8,<t=reg256#7,>t=reg256#7 +# asm 2: vpsubd <b=%ymm7,<t=%ymm6,>t=%ymm6 +vpsubd %ymm7,%ymm6,%ymm6 + +# qhasm: d = t & _1x8 +# asm 1: vpand <t=reg256#7,<_1x8=reg256#1,>d=reg256#8 +# asm 2: vpand <t=%ymm6,<_1x8=%ymm0,>d=%ymm7 +vpand %ymm6,%ymm0,%ymm7 + +# qhasm: 8x t = t >> 1 +# asm 1: vpsrad $1,<t=reg256#7,>t=reg256#7 +# asm 2: vpsrad $1,<t=%ymm6,>t=%ymm6 +vpsrad $1,%ymm6,%ymm6 + +# qhasm: 8x t += d +# asm 1: vpaddd <d=reg256#8,<t=reg256#7,>t=reg256#7 +# asm 2: vpaddd <d=%ymm7,<t=%ymm6,>t=%ymm6 +vpaddd %ymm7,%ymm6,%ymm6 + +# qhasm: 8x t *= qx8 +# asm 1: vpmulld <qx8=reg256#2,<t=reg256#7,>t=reg256#7 +# asm 2: vpmulld <qx8=%ymm1,<t=%ymm6,>t=%ymm6 +vpmulld %ymm1,%ymm6,%ymm6 + +# qhasm: 8x t <<= 1 +# asm 1: vpslld $1,<t=reg256#7,>t=reg256#7 +# asm 2: vpslld $1,<t=%ymm6,>t=%ymm6 +vpslld $1,%ymm6,%ymm6 + +# qhasm: 8x t -= tmp0 +# asm 1: vpsubd <tmp0=reg256#3,<t=reg256#7,>t=reg256#3 +# asm 2: vpsubd <tmp0=%ymm2,<t=%ymm6,>t=%ymm2 +vpsubd %ymm2,%ymm6,%ymm2 + +# qhasm: 8x k = abs(t) +# asm 1: vpabsd <t=reg256#3,>k=reg256#3 +# asm 2: vpabsd <t=%ymm2,>k=%ymm2 +vpabsd %ymm2,%ymm2 + +# qhasm: 8x b = tmp1 * mem256[v2730x8] +# asm 1: vpmulld v2730x8,<tmp1=reg256#4,>b=reg256#7 +# asm 2: vpmulld v2730x8,<tmp1=%ymm3,>b=%ymm6 +vpmulld v2730x8,%ymm3,%ymm6 + +# qhasm: 8x t = b >> 27 +# asm 1: vpsrad $27,<b=reg256#7,>t=reg256#7 +# asm 2: vpsrad $27,<b=%ymm6,>t=%ymm6 +vpsrad $27,%ymm6,%ymm6 + +# qhasm: 8x d = t * qx8 +# asm 1: vpmulld <t=reg256#7,<qx8=reg256#2,>d=reg256#8 +# asm 2: vpmulld <t=%ymm6,<qx8=%ymm1,>d=%ymm7 +vpmulld %ymm6,%ymm1,%ymm7 + +# qhasm: 8x b = tmp1 - d +# asm 1: vpsubd <d=reg256#8,<tmp1=reg256#4,>b=reg256#8 +# asm 2: vpsubd <d=%ymm7,<tmp1=%ymm3,>b=%ymm7 +vpsubd %ymm7,%ymm3,%ymm7 + +# qhasm: 8x b += _1x8 +# asm 1: vpaddd <_1x8=reg256#1,<b=reg256#8,>b=reg256#8 +# asm 2: vpaddd <_1x8=%ymm0,<b=%ymm7,>b=%ymm7 +vpaddd %ymm0,%ymm7,%ymm7 + +# qhasm: 8x b = qx8 - b +# asm 1: vpsubd <b=reg256#8,<qx8=reg256#2,>b=reg256#8 +# asm 2: vpsubd <b=%ymm7,<qx8=%ymm1,>b=%ymm7 +vpsubd %ymm7,%ymm1,%ymm7 + +# qhasm: 8x b >>= 31 +# asm 1: vpsrad $31,<b=reg256#8,>b=reg256#8 +# asm 2: vpsrad $31,<b=%ymm7,>b=%ymm7 +vpsrad $31,%ymm7,%ymm7 + +# qhasm: 8x t -= b +# asm 1: vpsubd <b=reg256#8,<t=reg256#7,>t=reg256#7 +# asm 2: vpsubd <b=%ymm7,<t=%ymm6,>t=%ymm6 +vpsubd %ymm7,%ymm6,%ymm6 + +# qhasm: d = t & _1x8 +# asm 1: vpand <t=reg256#7,<_1x8=reg256#1,>d=reg256#8 +# asm 2: vpand <t=%ymm6,<_1x8=%ymm0,>d=%ymm7 +vpand %ymm6,%ymm0,%ymm7 + +# qhasm: 8x t = t >> 1 +# asm 1: vpsrad $1,<t=reg256#7,>t=reg256#7 +# asm 2: vpsrad $1,<t=%ymm6,>t=%ymm6 +vpsrad $1,%ymm6,%ymm6 + +# qhasm: 8x t += d +# asm 1: vpaddd <d=reg256#8,<t=reg256#7,>t=reg256#7 +# asm 2: vpaddd <d=%ymm7,<t=%ymm6,>t=%ymm6 +vpaddd %ymm7,%ymm6,%ymm6 + +# qhasm: 8x t *= qx8 +# asm 1: vpmulld <qx8=reg256#2,<t=reg256#7,>t=reg256#7 +# asm 2: vpmulld <qx8=%ymm1,<t=%ymm6,>t=%ymm6 +vpmulld %ymm1,%ymm6,%ymm6 + +# qhasm: 8x t <<= 1 +# asm 1: vpslld $1,<t=reg256#7,>t=reg256#7 +# asm 2: vpslld $1,<t=%ymm6,>t=%ymm6 +vpslld $1,%ymm6,%ymm6 + +# qhasm: 8x t -= tmp1 +# asm 1: vpsubd <tmp1=reg256#4,<t=reg256#7,>t=reg256#4 +# asm 2: vpsubd <tmp1=%ymm3,<t=%ymm6,>t=%ymm3 +vpsubd %ymm3,%ymm6,%ymm3 + +# qhasm: 8x t = abs(t) +# asm 1: vpabsd <t=reg256#4,>t=reg256#4 +# asm 2: vpabsd <t=%ymm3,>t=%ymm3 +vpabsd %ymm3,%ymm3 + +# qhasm: 8x k += t +# asm 1: vpaddd <t=reg256#4,<k=reg256#3,>k=reg256#3 +# asm 2: vpaddd <t=%ymm3,<k=%ymm2,>k=%ymm2 +vpaddd %ymm3,%ymm2,%ymm2 + +# qhasm: 8x b = tmp2 * mem256[v2730x8] +# asm 1: vpmulld v2730x8,<tmp2=reg256#5,>b=reg256#4 +# asm 2: vpmulld v2730x8,<tmp2=%ymm4,>b=%ymm3 +vpmulld v2730x8,%ymm4,%ymm3 + +# qhasm: 8x t = b >> 27 +# asm 1: vpsrad $27,<b=reg256#4,>t=reg256#4 +# asm 2: vpsrad $27,<b=%ymm3,>t=%ymm3 +vpsrad $27,%ymm3,%ymm3 + +# qhasm: 8x d = t * qx8 +# asm 1: vpmulld <t=reg256#4,<qx8=reg256#2,>d=reg256#7 +# asm 2: vpmulld <t=%ymm3,<qx8=%ymm1,>d=%ymm6 +vpmulld %ymm3,%ymm1,%ymm6 + +# qhasm: 8x b = tmp2 - d +# asm 1: vpsubd <d=reg256#7,<tmp2=reg256#5,>b=reg256#7 +# asm 2: vpsubd <d=%ymm6,<tmp2=%ymm4,>b=%ymm6 +vpsubd %ymm6,%ymm4,%ymm6 + +# qhasm: 8x b += _1x8 +# asm 1: vpaddd <_1x8=reg256#1,<b=reg256#7,>b=reg256#7 +# asm 2: vpaddd <_1x8=%ymm0,<b=%ymm6,>b=%ymm6 +vpaddd %ymm0,%ymm6,%ymm6 + +# qhasm: 8x b = qx8 - b +# asm 1: vpsubd <b=reg256#7,<qx8=reg256#2,>b=reg256#7 +# asm 2: vpsubd <b=%ymm6,<qx8=%ymm1,>b=%ymm6 +vpsubd %ymm6,%ymm1,%ymm6 + +# qhasm: 8x b >>= 31 +# asm 1: vpsrad $31,<b=reg256#7,>b=reg256#7 +# asm 2: vpsrad $31,<b=%ymm6,>b=%ymm6 +vpsrad $31,%ymm6,%ymm6 + +# qhasm: 8x t -= b +# asm 1: vpsubd <b=reg256#7,<t=reg256#4,>t=reg256#4 +# asm 2: vpsubd <b=%ymm6,<t=%ymm3,>t=%ymm3 +vpsubd %ymm6,%ymm3,%ymm3 + +# qhasm: d = t & _1x8 +# asm 1: vpand <t=reg256#4,<_1x8=reg256#1,>d=reg256#7 +# asm 2: vpand <t=%ymm3,<_1x8=%ymm0,>d=%ymm6 +vpand %ymm3,%ymm0,%ymm6 + +# qhasm: 8x t = t >> 1 +# asm 1: vpsrad $1,<t=reg256#4,>t=reg256#4 +# asm 2: vpsrad $1,<t=%ymm3,>t=%ymm3 +vpsrad $1,%ymm3,%ymm3 + +# qhasm: 8x t += d +# asm 1: vpaddd <d=reg256#7,<t=reg256#4,>t=reg256#4 +# asm 2: vpaddd <d=%ymm6,<t=%ymm3,>t=%ymm3 +vpaddd %ymm6,%ymm3,%ymm3 + +# qhasm: 8x t *= qx8 +# asm 1: vpmulld <qx8=reg256#2,<t=reg256#4,>t=reg256#4 +# asm 2: vpmulld <qx8=%ymm1,<t=%ymm3,>t=%ymm3 +vpmulld %ymm1,%ymm3,%ymm3 + +# qhasm: 8x t <<= 1 +# asm 1: vpslld $1,<t=reg256#4,>t=reg256#4 +# asm 2: vpslld $1,<t=%ymm3,>t=%ymm3 +vpslld $1,%ymm3,%ymm3 + +# qhasm: 8x t -= tmp2 +# asm 1: vpsubd <tmp2=reg256#5,<t=reg256#4,>t=reg256#4 +# asm 2: vpsubd <tmp2=%ymm4,<t=%ymm3,>t=%ymm3 +vpsubd %ymm4,%ymm3,%ymm3 + +# qhasm: 8x t = abs(t) +# asm 1: vpabsd <t=reg256#4,>t=reg256#4 +# asm 2: vpabsd <t=%ymm3,>t=%ymm3 +vpabsd %ymm3,%ymm3 + +# qhasm: 8x k += t +# asm 1: vpaddd <t=reg256#4,<k=reg256#3,>k=reg256#3 +# asm 2: vpaddd <t=%ymm3,<k=%ymm2,>k=%ymm2 +vpaddd %ymm3,%ymm2,%ymm2 + +# qhasm: 8x b = tmp3 * mem256[v2730x8] +# asm 1: vpmulld v2730x8,<tmp3=reg256#6,>b=reg256#4 +# asm 2: vpmulld v2730x8,<tmp3=%ymm5,>b=%ymm3 +vpmulld v2730x8,%ymm5,%ymm3 + +# qhasm: 8x t = b >> 27 +# asm 1: vpsrad $27,<b=reg256#4,>t=reg256#4 +# asm 2: vpsrad $27,<b=%ymm3,>t=%ymm3 +vpsrad $27,%ymm3,%ymm3 + +# qhasm: 8x d = t * qx8 +# asm 1: vpmulld <t=reg256#4,<qx8=reg256#2,>d=reg256#5 +# asm 2: vpmulld <t=%ymm3,<qx8=%ymm1,>d=%ymm4 +vpmulld %ymm3,%ymm1,%ymm4 + +# qhasm: 8x b = tmp3 - d +# asm 1: vpsubd <d=reg256#5,<tmp3=reg256#6,>b=reg256#5 +# asm 2: vpsubd <d=%ymm4,<tmp3=%ymm5,>b=%ymm4 +vpsubd %ymm4,%ymm5,%ymm4 + +# qhasm: 8x b += _1x8 +# asm 1: vpaddd <_1x8=reg256#1,<b=reg256#5,>b=reg256#5 +# asm 2: vpaddd <_1x8=%ymm0,<b=%ymm4,>b=%ymm4 +vpaddd %ymm0,%ymm4,%ymm4 + +# qhasm: 8x b = qx8 - b +# asm 1: vpsubd <b=reg256#5,<qx8=reg256#2,>b=reg256#5 +# asm 2: vpsubd <b=%ymm4,<qx8=%ymm1,>b=%ymm4 +vpsubd %ymm4,%ymm1,%ymm4 + +# qhasm: 8x b >>= 31 +# asm 1: vpsrad $31,<b=reg256#5,>b=reg256#5 +# asm 2: vpsrad $31,<b=%ymm4,>b=%ymm4 +vpsrad $31,%ymm4,%ymm4 + +# qhasm: 8x t -= b +# asm 1: vpsubd <b=reg256#5,<t=reg256#4,>t=reg256#4 +# asm 2: vpsubd <b=%ymm4,<t=%ymm3,>t=%ymm3 +vpsubd %ymm4,%ymm3,%ymm3 + +# qhasm: d = t & _1x8 +# asm 1: vpand <t=reg256#4,<_1x8=reg256#1,>d=reg256#5 +# asm 2: vpand <t=%ymm3,<_1x8=%ymm0,>d=%ymm4 +vpand %ymm3,%ymm0,%ymm4 + +# qhasm: 8x t = t >> 1 +# asm 1: vpsrad $1,<t=reg256#4,>t=reg256#4 +# asm 2: vpsrad $1,<t=%ymm3,>t=%ymm3 +vpsrad $1,%ymm3,%ymm3 + +# qhasm: 8x t += d +# asm 1: vpaddd <d=reg256#5,<t=reg256#4,>t=reg256#4 +# asm 2: vpaddd <d=%ymm4,<t=%ymm3,>t=%ymm3 +vpaddd %ymm4,%ymm3,%ymm3 + +# qhasm: 8x t *= qx8 +# asm 1: vpmulld <qx8=reg256#2,<t=reg256#4,>t=reg256#4 +# asm 2: vpmulld <qx8=%ymm1,<t=%ymm3,>t=%ymm3 +vpmulld %ymm1,%ymm3,%ymm3 + +# qhasm: 8x t <<= 1 +# asm 1: vpslld $1,<t=reg256#4,>t=reg256#4 +# asm 2: vpslld $1,<t=%ymm3,>t=%ymm3 +vpslld $1,%ymm3,%ymm3 + +# qhasm: 8x t -= tmp3 +# asm 1: vpsubd <tmp3=reg256#6,<t=reg256#4,>t=reg256#4 +# asm 2: vpsubd <tmp3=%ymm5,<t=%ymm3,>t=%ymm3 +vpsubd %ymm5,%ymm3,%ymm3 + +# qhasm: 8x t = abs(t) +# asm 1: vpabsd <t=reg256#4,>t=reg256#4 +# asm 2: vpabsd <t=%ymm3,>t=%ymm3 +vpabsd %ymm3,%ymm3 + +# qhasm: 8x k += t +# asm 1: vpaddd <t=reg256#4,<k=reg256#3,>k=reg256#3 +# asm 2: vpaddd <t=%ymm3,<k=%ymm2,>k=%ymm2 +vpaddd %ymm3,%ymm2,%ymm2 + +# qhasm: 8x qx8 <<= 1 +# asm 1: vpslld $1,<qx8=reg256#2,>qx8=reg256#2 +# asm 2: vpslld $1,<qx8=%ymm1,>qx8=%ymm1 +vpslld $1,%ymm1,%ymm1 + +# qhasm: 8x k -= qx8 +# asm 1: vpsubd <qx8=reg256#2,<k=reg256#3,>k=reg256#3 +# asm 2: vpsubd <qx8=%ymm1,<k=%ymm2,>k=%ymm2 +vpsubd %ymm1,%ymm2,%ymm2 + +# qhasm: 8x k >>= 31 +# asm 1: vpsrad $31,<k=reg256#3,>k=reg256#3 +# asm 2: vpsrad $31,<k=%ymm2,>k=%ymm2 +vpsrad $31,%ymm2,%ymm2 + +# qhasm: k &= _1x8 +# asm 1: vpand <_1x8=reg256#1,<k=reg256#3,<k=reg256#3 +# asm 2: vpand <_1x8=%ymm0,<k=%ymm2,<k=%ymm2 +vpand %ymm0,%ymm2,%ymm2 + +# qhasm: pg = k +# asm 1: vmovapd <k=reg256#3,>pg=stack256#1 +# asm 2: vmovapd <k=%ymm2,>pg=0(%rsp) +vmovapd %ymm2,0(%rsp) + +# qhasm: key = *(uint32 *)(pgp + 28) +# asm 1: movl 28(<pgp=int64#5),>key=int64#6d +# asm 2: movl 28(<pgp=%r8),>key=%r9d +movl 28(%r8),%r9d + +# qhasm: key <<= 1 +# asm 1: shl $1,<key=int64#6 +# asm 2: shl $1,<key=%r9 +shl $1,%r9 + +# qhasm: byte = *(uint32 *)(pgp + 24) +# asm 1: movl 24(<pgp=int64#5),>byte=int64#7d +# asm 2: movl 24(<pgp=%r8),>byte=%eax +movl 24(%r8),%eax + +# qhasm: key |= byte +# asm 1: or <byte=int64#7,<key=int64#6 +# asm 2: or <byte=%rax,<key=%r9 +or %rax,%r9 + +# qhasm: key <<= 1 +# asm 1: shl $1,<key=int64#6 +# asm 2: shl $1,<key=%r9 +shl $1,%r9 + +# qhasm: byte = *(uint32 *)(pgp + 20) +# asm 1: movl 20(<pgp=int64#5),>byte=int64#7d +# asm 2: movl 20(<pgp=%r8),>byte=%eax +movl 20(%r8),%eax + +# qhasm: key |= byte +# asm 1: or <byte=int64#7,<key=int64#6 +# asm 2: or <byte=%rax,<key=%r9 +or %rax,%r9 + +# qhasm: key <<= 1 +# asm 1: shl $1,<key=int64#6 +# asm 2: shl $1,<key=%r9 +shl $1,%r9 + +# qhasm: byte = *(uint32 *)(pgp + 16) +# asm 1: movl 16(<pgp=int64#5),>byte=int64#7d +# asm 2: movl 16(<pgp=%r8),>byte=%eax +movl 16(%r8),%eax + +# qhasm: key |= byte +# asm 1: or <byte=int64#7,<key=int64#6 +# asm 2: or <byte=%rax,<key=%r9 +or %rax,%r9 + +# qhasm: key <<= 1 +# asm 1: shl $1,<key=int64#6 +# asm 2: shl $1,<key=%r9 +shl $1,%r9 + +# qhasm: byte = *(uint32 *)(pgp + 12) +# asm 1: movl 12(<pgp=int64#5),>byte=int64#7d +# asm 2: movl 12(<pgp=%r8),>byte=%eax +movl 12(%r8),%eax + +# qhasm: key |= byte +# asm 1: or <byte=int64#7,<key=int64#6 +# asm 2: or <byte=%rax,<key=%r9 +or %rax,%r9 + +# qhasm: key <<= 1 +# asm 1: shl $1,<key=int64#6 +# asm 2: shl $1,<key=%r9 +shl $1,%r9 + +# qhasm: byte = *(uint32 *)(pgp + 8) +# asm 1: movl 8(<pgp=int64#5),>byte=int64#7d +# asm 2: movl 8(<pgp=%r8),>byte=%eax +movl 8(%r8),%eax + +# qhasm: key |= byte +# asm 1: or <byte=int64#7,<key=int64#6 +# asm 2: or <byte=%rax,<key=%r9 +or %rax,%r9 + +# qhasm: key <<= 1 +# asm 1: shl $1,<key=int64#6 +# asm 2: shl $1,<key=%r9 +shl $1,%r9 + +# qhasm: byte = *(uint32 *)(pgp + 4) +# asm 1: movl 4(<pgp=int64#5),>byte=int64#7d +# asm 2: movl 4(<pgp=%r8),>byte=%eax +movl 4(%r8),%eax + +# qhasm: key |= byte +# asm 1: or <byte=int64#7,<key=int64#6 +# asm 2: or <byte=%rax,<key=%r9 +or %rax,%r9 + +# qhasm: key <<= 1 +# asm 1: shl $1,<key=int64#6 +# asm 2: shl $1,<key=%r9 +shl $1,%r9 + +# qhasm: byte = *(uint32 *)(pgp + 0) +# asm 1: movl 0(<pgp=int64#5),>byte=int64#7d +# asm 2: movl 0(<pgp=%r8),>byte=%eax +movl 0(%r8),%eax + +# qhasm: key |= byte +# asm 1: or <byte=int64#7,<key=int64#6 +# asm 2: or <byte=%rax,<key=%r9 +or %rax,%r9 + +# qhasm: mem8[input_0 + ctr + 0] = key +# asm 1: movb <key=int64#6b,0(<input_0=int64#1,<ctr=int64#4) +# asm 2: movb <key=%r9b,0(<input_0=%rdi,<ctr=%rcx) +movb %r9b,0(%rdi,%rcx) + +# qhasm: 8x qx8 >>= 3 +# asm 1: vpsrad $3,<qx8=reg256#2,>qx8=reg256#2 +# asm 2: vpsrad $3,<qx8=%ymm1,>qx8=%ymm1 +vpsrad $3,%ymm1,%ymm1 + +# qhasm: ctr += 1 +# asm 1: add $1,<ctr=int64#4 +# asm 2: add $1,<ctr=%rcx +add $1,%rcx + +# qhasm: unsigned<? ctr - 32 +# asm 1: cmp $32,<ctr=int64#4 +# asm 2: cmp $32,<ctr=%rcx +cmp $32,%rcx +# comment:fp stack unchanged by jump + +# qhasm: goto looptop if unsigned< +jb ._looptop + +# qhasm: return +add %r11,%rsp +ret diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/test/speed.c b/crypt/liboqs/kex_rlwe_newhope/avx2/test/speed.c new file mode 100644 index 0000000000000000000000000000000000000000..3b71b3aa3cc8736bc5f701549ae9a9d718af6100 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/test/speed.c @@ -0,0 +1,127 @@ +#include "../newhope.h" +#include "../poly.h" +#include "../error_correction.h" +#include "../cpucycles.h" +#include <stdlib.h> +#include <stdio.h> + +#define NTESTS 1000 + +static int cmp_llu(const void *a, const void*b) +{ + if(*(unsigned long long *)a < *(unsigned long long *)b) return -1; + if(*(unsigned long long *)a > *(unsigned long long *)b) return 1; + return 0; +} + +static unsigned long long median(unsigned long long *l, size_t llen) +{ + qsort(l,llen,sizeof(unsigned long long),cmp_llu); + + if(llen%2) return l[llen/2]; + else return (l[llen/2-1]+l[llen/2])/2; +} + +static unsigned long long average(unsigned long long *t, size_t tlen) +{ + unsigned long long acc=0; + size_t i; + for(i=0;i<tlen;i++) + acc += t[i]; + return acc/(tlen); +} + +static void print_results(const char *s, unsigned long long *t, size_t tlen) +{ + size_t i; + printf("%s", s); + for(i=0;i<tlen-1;i++) + { + t[i] = t[i+1] - t[i]; + // printf("%llu ", t[i]); + } + printf("\n"); + printf("median: %llu\n", median(t, tlen)); + printf("average: %llu\n", average(t, tlen-1)); + printf("\n"); +} + + +unsigned long long t[NTESTS]; + +int main() +{ + poly sk_a; + unsigned char key_a[32], key_b[32]; + unsigned char senda[NTESTS*NEWHOPE_SENDABYTES]; + unsigned char sendb[NTESTS*NEWHOPE_SENDBBYTES]; + unsigned char seed[NEWHOPE_SEEDBYTES]; + int i; + + for(i=0; i<NTESTS; i++) + { + t[i] = cpucycles(); + randombytes(seed, NEWHOPE_SEEDBYTES); + poly_uniform(&sk_a, seed); + } + print_results("poly_uniform: ", t, NTESTS); + + for(i=0; i<NTESTS; i++) + { + t[i] = cpucycles(); + poly_ntt(&sk_a); + } + print_results("poly_ntt: ", t, NTESTS); + + for(i=0; i<NTESTS; i++) + { + t[i] = cpucycles(); + poly_invntt(&sk_a); + } + print_results("poly_invntt: ", t, NTESTS); + + for(i=0; i<NTESTS; i++) + { + t[i] = cpucycles(); + poly_getnoise(&sk_a,seed,0); + } + print_results("poly_getnoise: ", t, NTESTS); + + for(i=0; i<NTESTS; i++) + { + t[i] = cpucycles(); + helprec(&sk_a, &sk_a, seed, 0); + } + print_results("helprec: ", t, NTESTS); + + for(i=0; i<NTESTS; i++) + { + t[i] = cpucycles(); + rec(key_a, &sk_a, &sk_a); + } + print_results("rec: ", t, NTESTS); + + for(i=0; i<NTESTS; i++) + { + t[i] = cpucycles(); + newhope_keygen(senda+i*NEWHOPE_SENDABYTES, &sk_a); + } + print_results("newhope_keygen: ", t, NTESTS); + + for(i=0; i<NTESTS; i++) + { + t[i] = cpucycles(); + newhope_sharedb(key_b, sendb+i*NEWHOPE_SENDBBYTES, senda+i*NEWHOPE_SENDABYTES); + } + print_results("newhope_sharedb: ", t, NTESTS); + + for(i=0; i<NTESTS; i++) + { + t[i] = cpucycles(); + newhope_shareda(key_a, &sk_a, sendb+i*NEWHOPE_SENDBBYTES); + } + print_results("newhope_shareda: ", t, NTESTS); + + + return 0; +} diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/test/test_newhope.c b/crypt/liboqs/kex_rlwe_newhope/avx2/test/test_newhope.c new file mode 100644 index 0000000000000000000000000000000000000000..8660357ceb6ea9f9aa2c0604884458d9689e5bbe --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/test/test_newhope.c @@ -0,0 +1,121 @@ + +#include "../newhope.h" +#include "../poly.h" +#include "../randombytes.h" +#include "../crypto_stream_chacha20.h" +#include "../error_correction.h" +#include <math.h> +#include <stdio.h> +#include <string.h> + +#define NTESTS 100000 + +int compare_keys(poly *a, poly *b){ + + int i; + + for(i=0; i<256; i++){ + if (a->coeffs[i] != b->coeffs[i]){ + return -1; + } + } + return 0; +} + + +int test_keys(){ + poly sk_a; + unsigned char key_a[32], key_b[32]; + unsigned char senda[NEWHOPE_SENDABYTES]; + unsigned char sendb[NEWHOPE_SENDBBYTES]; + int i; + + + + for(i=0; i<NTESTS; i++) + { + //Alice generates a public key + newhope_keygen(senda, &sk_a); + + //Bob derives a secret key and creates a response + newhope_sharedb(key_b, sendb, senda); + + //Alice uses Bobs response to get her secre key + newhope_shareda(key_a, &sk_a, sendb); + + if(memcmp(key_a, key_b, 32)) + printf("ERROR keys\n"); + } + + return 0; +} + +int test_invalid_sk_a() +{ + poly sk_a; + unsigned char key_a[32], key_b[32]; + unsigned char senda[NEWHOPE_SENDABYTES]; + unsigned char sendb[NEWHOPE_SENDBBYTES]; + unsigned char noiseseed[32]; + int i; + + randombytes(noiseseed,32); + + for(i=0; i<NTESTS; i++) + { + //Alice generates a public key + newhope_keygen(senda, &sk_a); + + //Bob derives a secret key and creates a response + newhope_sharedb(key_b, sendb, senda); + + //Overwrite the secret key + poly_getnoise(&sk_a,noiseseed,i); + + //Alice uses Bobs response to get her secre key + newhope_shareda(key_a, &sk_a, sendb); + + if(!memcmp(key_a, key_b, 32)) + printf("ERROR invalid sk_a\n"); + } + return 0; +} + + +int test_invalid_ciphertext() +{ + poly sk_a; + unsigned char key_a[32], key_b[32]; + unsigned char senda[NEWHOPE_SENDABYTES]; + unsigned char sendb[NEWHOPE_SENDBBYTES]; + int i; + + for(i=0; i<10; i++) + { + //Alice generates a public key + newhope_keygen(senda, &sk_a); + + //Bob derives a secret key and creates a response + newhope_sharedb(key_b, sendb, senda); + + //Change some byte in the "ciphertext" + randombytes(sendb+42,1); + + //Alice uses Bobs response to get her secre key + newhope_shareda(key_a, &sk_a, sendb); + + if(!memcmp(key_a, key_b, 32)) + printf("ERROR invalid sendb\n"); + } + + return 0; +} + + +int main(){ + + test_keys(); + test_invalid_sk_a(); + test_invalid_ciphertext(); + return 0; +} diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/test/test_statistical.c b/crypt/liboqs/kex_rlwe_newhope/avx2/test/test_statistical.c new file mode 100644 index 0000000000000000000000000000000000000000..3bc5a9d970712b92c63f2c97d5ce20c8106ce231 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/test/test_statistical.c @@ -0,0 +1,63 @@ + +#include "../newhope.h" +#include "../poly.h" +#include "../randombytes.h" +#include "../crypto_stream_chacha20.h" +#include "../error_correction.h" +#include <math.h> +#include <stdio.h> +#include <string.h> + +#define NRUNS 10000000UL + +int hamming[256] = { +0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, +1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, +1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, +2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, +1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, +2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, +2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, +3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, +1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, +2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, +2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, +3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, +2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, +3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, +3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, +4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; + + +static int hamming32(const unsigned char *k) +{ + int i; + int r = 0; + for(i=0;i<32;i++) + r += hamming[k[i]]; + return r; +} + +int main() +{ + poly sk_a; + unsigned char key_b[32]; + unsigned char senda[NEWHOPE_SENDABYTES]; + unsigned char sendb[NEWHOPE_SENDBBYTES]; + unsigned long i; + long t = 0; + + for(i=0;i<NRUNS;i++) + { + newhope_keygen(senda, &sk_a); + newhope_sharedb(key_b, sendb, senda); + + t += hamming32(key_b); + } + + printf("ones: %ld\n",t); + printf("zeroes: %ld\n",256*NRUNS-t); + printf("diff: %ld\n",256*NRUNS-2*t); + + return 0; +} diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/test/testvectors.c b/crypt/liboqs/kex_rlwe_newhope/avx2/test/testvectors.c new file mode 100644 index 0000000000000000000000000000000000000000..a72b5a44d22ce6af1887f622bf90290b70ebff2a --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/avx2/test/testvectors.c @@ -0,0 +1,95 @@ +/* Deterministic randombytes by Daniel J. Bernstein */ +/* taken from SUPERCOP (https://bench.cr.yp.to) */ + +#include "../newhope.h" +#include "../poly.h" +#include "../randombytes.h" +#include "../crypto_stream_chacha20.h" +#include "../error_correction.h" +#include <math.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> + +#define NTESTS 1000 + + +typedef uint32_t uint32; + +static uint32 seed[32] = { 3,1,4,1,5,9,2,6,5,3,5,8,9,7,9,3,2,3,8,4,6,2,6,4,3,3,8,3,2,7,9,5 } ; +static uint32 in[12]; +static uint32 out[8]; +static int outleft = 0; + +#define ROTATE(x,b) (((x) << (b)) | ((x) >> (32 - (b)))) +#define MUSH(i,b) x = t[i] += (((x ^ seed[i]) + sum) ^ ROTATE(x,b)); + +static void surf(void) +{ + uint32 t[12]; uint32 x; uint32 sum = 0; + int r; int i; int loop; + + for (i = 0;i < 12;++i) t[i] = in[i] ^ seed[12 + i]; + for (i = 0;i < 8;++i) out[i] = seed[24 + i]; + x = t[11]; + for (loop = 0;loop < 2;++loop) { + for (r = 0;r < 16;++r) { + sum += 0x9e3779b9; + MUSH(0,5) MUSH(1,7) MUSH(2,9) MUSH(3,13) + MUSH(4,5) MUSH(5,7) MUSH(6,9) MUSH(7,13) + MUSH(8,5) MUSH(9,7) MUSH(10,9) MUSH(11,13) + } + for (i = 0;i < 8;++i) out[i] ^= t[i + 4]; + } +} + +void randombytes(unsigned char *x,unsigned long long xlen) +{ + while (xlen > 0) { + if (!outleft) { + if (!++in[0]) if (!++in[1]) if (!++in[2]) ++in[3]; + surf(); + outleft = 8; + } + *x = out[--outleft]; + printf("%02x", *x); + ++x; + --xlen; + } + printf("\n"); +} + + + +int main(void) +{ + poly sk_a; + unsigned char key_a[32], key_b[32]; + unsigned char senda[NEWHOPE_SENDABYTES]; + unsigned char sendb[NEWHOPE_SENDBBYTES]; + int i,j; + + for(i=0;i<NTESTS;i++) + { + newhope_keygen(senda, &sk_a); + for(j=0;j<NEWHOPE_SENDABYTES;j++) + printf("%02x",senda[j]); + printf("\n"); + + newhope_sharedb(key_b, sendb, senda); + for(j=0;j<NEWHOPE_SENDBBYTES;j++) + printf("%02x",sendb[j]); + printf("\n"); + + newhope_shareda(key_a, &sk_a, sendb); + for(j=0;j<32;j++) + printf("%02x",key_a[j]); + printf("\n"); + for(j=0;j<32;j++) + printf("%02x",key_b[j]); + printf("\n"); + + } + + return 0; +} diff --git a/crypt/liboqs/kex_rlwe_newhope/kex_rlwe_newhope.c b/crypt/liboqs/kex_rlwe_newhope/kex_rlwe_newhope.c new file mode 100644 index 0000000000000000000000000000000000000000..b251a96f5737577610bd276ced6c1709d32e5c52 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/kex_rlwe_newhope.c @@ -0,0 +1,171 @@ +#if defined(WINDOWS) +#define UNUSED +// __attribute__ not supported in VS, is there something else I should define? +#else +#define UNUSED __attribute__((unused)) +#endif + +#include <stdlib.h> +#include <string.h> +#if !defined(WINDOWS) +#include <strings.h> +#include <unistd.h> +#endif + +#include <oqs/kex.h> +#include <oqs/rand.h> + +#include "kex_rlwe_newhope.h" +#include "newhope.c" +#include "params.h" + +#if defined(WINDOWS) +#define strdup _strdup // for strdup deprecation warning +#endif + +OQS_KEX *OQS_KEX_rlwe_newhope_new(OQS_RAND *rand) { + OQS_KEX *k = malloc(sizeof(OQS_KEX)); + if (k == NULL) { + return NULL; + } + k->method_name = strdup("RLWE NewHope"); + k->estimated_classical_security = 229; // http://eprint.iacr.org/2015/1092.pdf Table 1 NewHope dual known classical + k->estimated_quantum_security = 206; // http://eprint.iacr.org/2015/1092.pdf Table 1 NewHope dual known quantum + k->seed = NULL; + k->seed_len = 0; + k->named_parameters = 0; + k->rand = rand; + k->params = NULL; + k->alice_0 = &OQS_KEX_rlwe_newhope_alice_0; + k->bob = &OQS_KEX_rlwe_newhope_bob; + k->alice_1 = &OQS_KEX_rlwe_newhope_alice_1; + k->alice_priv_free = &OQS_KEX_rlwe_newhope_alice_priv_free; + k->free = &OQS_KEX_rlwe_newhope_free; + return k; +} + +int OQS_KEX_rlwe_newhope_alice_0(UNUSED OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) { + + int ret; + + *alice_priv = NULL; + *alice_msg = NULL; + + /* allocate public/private key pair */ + *alice_msg = malloc(NEWHOPE_SENDABYTES); + if (*alice_msg == NULL) { + goto err; + } + *alice_priv = malloc(sizeof(poly)); + if (*alice_priv == NULL) { + goto err; + } + + /* generate public/private key pair */ + keygen(*alice_msg, (poly *) (*alice_priv), k->rand); + *alice_msg_len = NEWHOPE_SENDABYTES; + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*alice_msg); + *alice_msg = NULL; + free(*alice_priv); + *alice_priv = NULL; + +cleanup: + + return ret; +} + +int OQS_KEX_rlwe_newhope_bob(UNUSED OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + + *bob_msg = NULL; + *key = NULL; + + if (alice_msg_len != NEWHOPE_SENDABYTES) { + goto err; + } + + /* allocate message and session key */ + *bob_msg = malloc(NEWHOPE_SENDBBYTES); + if (*bob_msg == NULL) { + goto err; + } + *key = malloc(32); + if (*key == NULL) { + goto err; + } + + /* generate Bob's response */ + sharedb(*key, *bob_msg, alice_msg, k->rand); + *bob_msg_len = NEWHOPE_SENDBBYTES; + *key_len = 32; + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*bob_msg); + *bob_msg = NULL; + free(*key); + *key = NULL; + +cleanup: + + return ret; +} + +int OQS_KEX_rlwe_newhope_alice_1(UNUSED OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + + *key = NULL; + + if (bob_msg_len != NEWHOPE_SENDBBYTES) { + goto err; + } + + /* allocate session key */ + *key = malloc(32); + if (*key == NULL) { + goto err; + } + + /* generate Alice's session key */ + shareda(*key, (poly *) alice_priv, bob_msg); + *key_len = 32; + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*key); + *key = NULL; + +cleanup: + + return ret; +} + +void OQS_KEX_rlwe_newhope_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) { + if (alice_priv) { + free(alice_priv); + } +} + +void OQS_KEX_rlwe_newhope_free(OQS_KEX *k) { + if (k) { + free(k->named_parameters); + k->named_parameters = NULL; + free(k->method_name); + k->method_name = NULL; + } + free(k); +} diff --git a/crypt/liboqs/kex_rlwe_newhope/kex_rlwe_newhope.h b/crypt/liboqs/kex_rlwe_newhope/kex_rlwe_newhope.h new file mode 100644 index 0000000000000000000000000000000000000000..ba3d70aebfc5f1576d67b2ff397d3bae7c6ecdcd --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/kex_rlwe_newhope.h @@ -0,0 +1,24 @@ +/** + * \file kex_rlwe_newhope.h + * \brief Header for ring-LWE key exchange protocol NewHope + */ + +#ifndef __OQS_KEX_RLWE_NEWHOPE_H +#define __OQS_KEX_RLWE_NEWHOPE_H + +#include <stddef.h> +#include <stdint.h> + +#include <oqs/kex.h> +#include <oqs/rand.h> + +OQS_KEX *OQS_KEX_rlwe_newhope_new(OQS_RAND *rand); + +int OQS_KEX_rlwe_newhope_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len); +int OQS_KEX_rlwe_newhope_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len); +int OQS_KEX_rlwe_newhope_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len); + +void OQS_KEX_rlwe_newhope_alice_priv_free(OQS_KEX *k, void *alice_priv); +void OQS_KEX_rlwe_newhope_free(OQS_KEX *k); + +#endif diff --git a/crypt/liboqs/kex_rlwe_newhope/newhope.c b/crypt/liboqs/kex_rlwe_newhope/newhope.c new file mode 100644 index 0000000000000000000000000000000000000000..8025273b5de5dd0a090d813c45b1727d6b7b7263 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/newhope.c @@ -0,0 +1,120 @@ +#include <stdint.h> + +#include <oqs/sha3.h> + +// clang-format off +// (order of include matters) +#include "precomp.c" +#include "poly.c" +// clang-format on + +static void encode_a(unsigned char *r, const poly *pk, + const unsigned char *seed) { + int i; + poly_tobytes(r, pk); + for (i = 0; i < NEWHOPE_SEEDBYTES; i++) { + r[POLY_BYTES + i] = seed[i]; + } +} + +static void decode_a(poly *pk, unsigned char *seed, const unsigned char *r) { + int i; + poly_frombytes(pk, r); + for (i = 0; i < NEWHOPE_SEEDBYTES; i++) { + seed[i] = r[POLY_BYTES + i]; + } +} + +static void encode_b(unsigned char *r, const poly *b, const poly *c) { + int i; + poly_tobytes(r, b); + for (i = 0; i < PARAM_N / 4; i++) { + r[POLY_BYTES + i] = c->coeffs[4 * i] | (c->coeffs[4 * i + 1] << 2) | + (c->coeffs[4 * i + 2] << 4) | + (c->coeffs[4 * i + 3] << 6); + } +} + +static void decode_b(poly *b, poly *c, const unsigned char *r) { + int i; + poly_frombytes(b, r); + for (i = 0; i < PARAM_N / 4; i++) { + c->coeffs[4 * i + 0] = r[POLY_BYTES + i] & 0x03; + c->coeffs[4 * i + 1] = (r[POLY_BYTES + i] >> 2) & 0x03; + c->coeffs[4 * i + 2] = (r[POLY_BYTES + i] >> 4) & 0x03; + c->coeffs[4 * i + 3] = (r[POLY_BYTES + i] >> 6); + } +} + +static void gen_a(poly *a, const unsigned char *seed) { poly_uniform(a, seed); } + +// API FUNCTIONS + +static void keygen(unsigned char *send, poly *sk, OQS_RAND *rand) { + poly a, e, r, pk; + unsigned char seed[NEWHOPE_SEEDBYTES]; + + rand->rand_n(rand, seed, NEWHOPE_SEEDBYTES); + + gen_a(&a, seed); + + poly_getnoise(sk, rand); + poly_ntt(sk); + + poly_getnoise(&e, rand); + poly_ntt(&e); + + poly_pointwise(&r, sk, &a); + poly_add(&pk, &e, &r); + + encode_a(send, &pk, seed); +} + +static void sharedb(unsigned char *sharedkey, unsigned char *send, + const unsigned char *received, OQS_RAND *rand) { + poly sp, ep, v, a, pka, c, epp, bp; + unsigned char seed[NEWHOPE_SEEDBYTES]; + + decode_a(&pka, seed, received); + gen_a(&a, seed); + + poly_getnoise(&sp, rand); + poly_ntt(&sp); + poly_getnoise(&ep, rand); + poly_ntt(&ep); + + poly_pointwise(&bp, &a, &sp); + poly_add(&bp, &bp, &ep); + + poly_pointwise(&v, &pka, &sp); + poly_invntt(&v); + + poly_getnoise(&epp, rand); + poly_add(&v, &v, &epp); + + helprec(&c, &v, rand); + + encode_b(send, &bp, &c); + + rec(sharedkey, &v, &c); + +#ifndef STATISTICAL_TEST + OQS_SHA3_sha3256(sharedkey, sharedkey, 32); +#endif +} + +static void shareda(unsigned char *sharedkey, const poly *sk, + const unsigned char *received) { + poly v, bp, c; + + decode_b(&bp, &c, received); + + poly_pointwise(&v, sk, &bp); + poly_invntt(&v); + + rec(sharedkey, &v, &c); + +#ifndef STATISTICAL_TEST + OQS_SHA3_sha3256(sharedkey, sharedkey, 32); +#endif +} diff --git a/crypt/liboqs/kex_rlwe_newhope/params.h b/crypt/liboqs/kex_rlwe_newhope/params.h new file mode 100644 index 0000000000000000000000000000000000000000..932770e4d093f753da393d40fe5bf874b94f9dba --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/params.h @@ -0,0 +1,28 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#include <stdint.h> + +#define PARAM_N 1024 + +#define PARAM_K 16 /* used in sampler */ +#define PARAM_Q 12289 + +#define POLY_BYTES 1792 +#define NEWHOPE_SEEDBYTES 32 +#define NEWHOPE_RECBYTES 256 + +#define NEWHOPE_SENDABYTES (POLY_BYTES + NEWHOPE_SEEDBYTES) +#define NEWHOPE_SENDBBYTES (POLY_BYTES + NEWHOPE_RECBYTES) + +extern uint16_t bitrev_table[]; +extern uint16_t omegas_montgomery[]; +extern uint16_t omegas_inv_montgomery[]; +extern uint16_t psis_inv_montgomery[]; +extern uint16_t psis_bitrev_montgomery[]; + +#if defined(WINDOWS) +typedef unsigned __int16 uint16_t; +#endif + +#endif diff --git a/crypt/liboqs/kex_rlwe_newhope/poly.c b/crypt/liboqs/kex_rlwe_newhope/poly.c new file mode 100644 index 0000000000000000000000000000000000000000..ca5014e9618d13fc0f7dec8d29de4c0283a96efd --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/poly.c @@ -0,0 +1,339 @@ +#include "params.h" +#include <oqs/rand.h> +#include <oqs/sha3.h> + +typedef struct { + uint16_t coeffs[PARAM_N]; +#if defined(WINDOWS) +} poly; +#else +} poly __attribute__((aligned(32))); +#endif + +static const uint32_t qinv = 12287; // -inverse_mod(p,2^18) +static const uint32_t rlog = 18; + +static uint16_t montgomery_reduce(uint32_t a) { + uint32_t u; + + u = (a * qinv); + u &= ((1 << rlog) - 1); + u *= PARAM_Q; + a = a + u; + return a >> 18; +} + +static uint16_t barrett_reduce(uint16_t a) { + uint32_t u; + + u = ((uint32_t) a * 5) >> 16; + u *= PARAM_Q; + a -= u; + return a; +} + +static void bitrev_vector(uint16_t *poly) { + unsigned int i, r; + uint16_t tmp; + + for (i = 0; i < PARAM_N; i++) { + r = oqs_kex_rlwe_newhope_bitrev_table[i]; + if (i < r) { + tmp = poly[i]; + poly[i] = poly[r]; + poly[r] = tmp; + } + } +} + +static void mul_coefficients(uint16_t *poly, const uint16_t *factors) { + unsigned int i; + + for (i = 0; i < PARAM_N; i++) { + poly[i] = montgomery_reduce((poly[i] * factors[i])); + } +} + +/* GS_bo_to_no; omegas need to be in Montgomery domain */ +static void ntt(uint16_t *a, const uint16_t *omega) { + int i, start, j, jTwiddle, distance; + uint16_t temp, W; + + for (i = 0; i < 10; i += 2) { + // Even level + distance = (1 << i); + for (start = 0; start < distance; start++) { + jTwiddle = 0; + for (j = start; j < PARAM_N - 1; j += 2 * distance) { + W = omega[jTwiddle++]; + temp = a[j]; + a[j] = (temp + a[j + distance]); // Omit reduction (be lazy) + a[j + distance] = montgomery_reduce( + (W * ((uint32_t) temp + 3 * PARAM_Q - a[j + distance]))); + } + } + + // Odd level + distance <<= 1; + for (start = 0; start < distance; start++) { + jTwiddle = 0; + for (j = start; j < PARAM_N - 1; j += 2 * distance) { + W = omega[jTwiddle++]; + temp = a[j]; + a[j] = barrett_reduce((temp + a[j + distance])); + a[j + distance] = montgomery_reduce( + (W * ((uint32_t) temp + 3 * PARAM_Q - a[j + distance]))); + } + } + } +} + +static void poly_frombytes(poly *r, const unsigned char *a) { + int i; + for (i = 0; i < PARAM_N / 4; i++) { + r->coeffs[4 * i + 0] = + a[7 * i + 0] | (((uint16_t) a[7 * i + 1] & 0x3f) << 8); + r->coeffs[4 * i + 1] = (a[7 * i + 1] >> 6) | + (((uint16_t) a[7 * i + 2]) << 2) | + (((uint16_t) a[7 * i + 3] & 0x0f) << 10); + r->coeffs[4 * i + 2] = (a[7 * i + 3] >> 4) | + (((uint16_t) a[7 * i + 4]) << 4) | + (((uint16_t) a[7 * i + 5] & 0x03) << 12); + r->coeffs[4 * i + 3] = + (a[7 * i + 5] >> 2) | (((uint16_t) a[7 * i + 6]) << 6); + } +} + +static void poly_tobytes(unsigned char *r, const poly *p) { + int i; + uint16_t t0, t1, t2, t3, m; + int16_t c; + for (i = 0; i < PARAM_N / 4; i++) { + t0 = barrett_reduce( + p->coeffs[4 * i + 0]); // Make sure that coefficients have only 14 bits + t1 = barrett_reduce(p->coeffs[4 * i + 1]); + t2 = barrett_reduce(p->coeffs[4 * i + 2]); + t3 = barrett_reduce(p->coeffs[4 * i + 3]); + + m = t0 - PARAM_Q; + c = m; + c >>= 15; + t0 = m ^ ((t0 ^ m) & c); // <Make sure that coefficients are in [0,q] + + m = t1 - PARAM_Q; + c = m; + c >>= 15; + t1 = m ^ ((t1 ^ m) & c); // <Make sure that coefficients are in [0,q] + + m = t2 - PARAM_Q; + c = m; + c >>= 15; + t2 = m ^ ((t2 ^ m) & c); // <Make sure that coefficients are in [0,q] + + m = t3 - PARAM_Q; + c = m; + c >>= 15; + t3 = m ^ ((t3 ^ m) & c); // <Make sure that coefficients are in [0,q] + + r[7 * i + 0] = t0 & 0xff; + r[7 * i + 1] = (t0 >> 8) | (t1 << 6); + r[7 * i + 2] = (t1 >> 2); + r[7 * i + 3] = (t1 >> 10) | (t2 << 4); + r[7 * i + 4] = (t2 >> 4); + r[7 * i + 5] = (t2 >> 12) | (t3 << 2); + r[7 * i + 6] = (t3 >> 6); + } +} + +static void poly_uniform(poly *a, const unsigned char *seed) { + unsigned int pos = 0, ctr = 0; + uint16_t val; + uint64_t state[OQS_SHA3_STATESIZE]; + unsigned int nblocks = 16; + uint8_t buf[OQS_SHA3_SHAKE128_RATE * 16]; + + OQS_SHA3_shake128_absorb(state, seed, NEWHOPE_SEEDBYTES); + + OQS_SHA3_shake128_squeezeblocks((unsigned char *) buf, nblocks, state); + + while (ctr < PARAM_N) { + val = (buf[pos] | ((uint16_t) buf[pos + 1] << 8)) & + 0x3fff; // Specialized for q = 12889 + if (val < PARAM_Q) { + a->coeffs[ctr++] = val; + } + pos += 2; + if (pos > OQS_SHA3_SHAKE128_RATE * nblocks - 2) { + nblocks = 1; + OQS_SHA3_shake128_squeezeblocks((unsigned char *) buf, nblocks, state); + pos = 0; + } + } +} + +static void poly_getnoise(poly *r, OQS_RAND *rand) { +#if PARAM_K != 16 +#error "poly_getnoise in poly.c only supports k=16" +#endif + + unsigned char buf[4 * PARAM_N]; + uint32_t *tp, t, d, a, b; + int i, j; + + tp = (uint32_t *) buf; + + rand->rand_n(rand, buf, 4 * PARAM_N); + + for (i = 0; i < PARAM_N; i++) { + t = tp[i]; + d = 0; + for (j = 0; j < 8; j++) { + d += (t >> j) & 0x01010101; + } + a = ((d >> 8) & 0xff) + (d & 0xff); + b = (d >> 24) + ((d >> 16) & 0xff); + r->coeffs[i] = a + PARAM_Q - b; + } +} + +static void poly_pointwise(poly *r, const poly *a, const poly *b) { + int i; + uint16_t t; + for (i = 0; i < PARAM_N; i++) { + t = montgomery_reduce(3186 * + b->coeffs[i]); /* t is now in Montgomery domain */ + r->coeffs[i] = montgomery_reduce( + a->coeffs[i] * t); /* r->coeffs[i] is back in normal domain */ + } +} + +static void poly_add(poly *r, const poly *a, const poly *b) { + int i; + for (i = 0; i < PARAM_N; i++) { + r->coeffs[i] = barrett_reduce(a->coeffs[i] + b->coeffs[i]); + } +} + +static void poly_ntt(poly *r) { + mul_coefficients(r->coeffs, oqs_kex_rlwe_newhope_psis_bitrev_montgomery); + ntt((uint16_t *) r->coeffs, oqs_kex_rlwe_newhope_omegas_montgomery); +} + +static void poly_invntt(poly *r) { + bitrev_vector(r->coeffs); + ntt((uint16_t *) r->coeffs, oqs_kex_rlwe_newhope_omegas_inv_montgomery); + mul_coefficients(r->coeffs, oqs_kex_rlwe_newhope_psis_inv_montgomery); +} + +// Error Correction: + +static int32_t nh_abs(int32_t v) { + int32_t mask = v >> 31; + return (v ^ mask) - mask; +} + +static int32_t f(int32_t *v0, int32_t *v1, int32_t x) { + int32_t xit, t, r, b; + + // Next 6 lines compute t = x/PARAM_Q; + b = x * 2730; + t = b >> 25; + b = x - t * 12289; + b = 12288 - b; + b >>= 31; + t -= b; + + r = t & 1; + xit = (t >> 1); + *v0 = xit + r; // v0 = round(x/(2*PARAM_Q)) + + t -= 1; + r = t & 1; + *v1 = (t >> 1) + r; + + return nh_abs(x - ((*v0) * 2 * PARAM_Q)); +} + +static int32_t g(int32_t x) { + int32_t t, c, b; + + // Next 6 lines compute t = x/(4*PARAM_Q); + b = x * 2730; + t = b >> 27; + b = x - t * 49156; + b = 49155 - b; + b >>= 31; + t -= b; + + c = t & 1; + t = (t >> 1) + c; // t = round(x/(8*PARAM_Q)) + + t *= 8 * PARAM_Q; + + return nh_abs(t - x); +} + +static int16_t LDDecode(int32_t xi0, int32_t xi1, int32_t xi2, int32_t xi3) { + int32_t t; + + t = g(xi0); + t += g(xi1); + t += g(xi2); + t += g(xi3); + + t -= 8 * PARAM_Q; + t >>= 31; + return t & 1; +} + +static void helprec(poly *c, const poly *v, OQS_RAND *oqs_rand) { + int32_t v0[4], v1[4], v_tmp[4], k; + unsigned char rbit; + unsigned char rand[32]; + int i; + + oqs_rand->rand_n(oqs_rand, rand, 32); + + for (i = 0; i < 256; i++) { + rbit = (rand[i >> 3] >> (i & 7)) & 1; + + k = f(v0 + 0, v1 + 0, 8 * v->coeffs[0 + i] + 4 * rbit); + k += f(v0 + 1, v1 + 1, 8 * v->coeffs[256 + i] + 4 * rbit); + k += f(v0 + 2, v1 + 2, 8 * v->coeffs[512 + i] + 4 * rbit); + k += f(v0 + 3, v1 + 3, 8 * v->coeffs[768 + i] + 4 * rbit); + + k = (2 * PARAM_Q - 1 - k) >> 31; + + v_tmp[0] = ((~k) & v0[0]) ^ (k & v1[0]); + v_tmp[1] = ((~k) & v0[1]) ^ (k & v1[1]); + v_tmp[2] = ((~k) & v0[2]) ^ (k & v1[2]); + v_tmp[3] = ((~k) & v0[3]) ^ (k & v1[3]); + + c->coeffs[0 + i] = (v_tmp[0] - v_tmp[3]) & 3; + c->coeffs[256 + i] = (v_tmp[1] - v_tmp[3]) & 3; + c->coeffs[512 + i] = (v_tmp[2] - v_tmp[3]) & 3; + c->coeffs[768 + i] = (-k + 2 * v_tmp[3]) & 3; + } +} + +static void rec(unsigned char *key, const poly *v, const poly *c) { + int i; + int32_t tmp[4]; + + for (i = 0; i < 32; i++) { + key[i] = 0; + } + + for (i = 0; i < 256; i++) { + tmp[0] = 16 * PARAM_Q + 8 * (int32_t) v->coeffs[0 + i] - + PARAM_Q * (2 * c->coeffs[0 + i] + c->coeffs[768 + i]); + tmp[1] = 16 * PARAM_Q + 8 * (int32_t) v->coeffs[256 + i] - + PARAM_Q * (2 * c->coeffs[256 + i] + c->coeffs[768 + i]); + tmp[2] = 16 * PARAM_Q + 8 * (int32_t) v->coeffs[512 + i] - + PARAM_Q * (2 * c->coeffs[512 + i] + c->coeffs[768 + i]); + tmp[3] = 16 * PARAM_Q + 8 * (int32_t) v->coeffs[768 + i] - + PARAM_Q * (c->coeffs[768 + i]); + + key[i >> 3] |= LDDecode(tmp[0], tmp[1], tmp[2], tmp[3]) << (i & 7); + } +} diff --git a/crypt/liboqs/kex_rlwe_newhope/precomp.c b/crypt/liboqs/kex_rlwe_newhope/precomp.c new file mode 100644 index 0000000000000000000000000000000000000000..675b75d1c4cee5e8fd52b24691fd2feeba1ac392 --- /dev/null +++ b/crypt/liboqs/kex_rlwe_newhope/precomp.c @@ -0,0 +1,43 @@ +#include "params.h" + +uint16_t oqs_kex_rlwe_newhope_bitrev_table[1024] = { + 0, 512, 256, 768, 128, 640, 384, 896, 64, 576, 320, 832, 192, 704, 448, 960, 32, 544, 288, 800, 160, 672, 416, 928, 96, 608, 352, 864, 224, 736, 480, 992, + 16, 528, 272, 784, 144, 656, 400, 912, 80, 592, 336, 848, 208, 720, 464, 976, 48, 560, 304, 816, 176, 688, 432, 944, 112, 624, 368, 880, 240, 752, 496, 1008, + 8, 520, 264, 776, 136, 648, 392, 904, 72, 584, 328, 840, 200, 712, 456, 968, 40, 552, 296, 808, 168, 680, 424, 936, 104, 616, 360, 872, 232, 744, 488, 1000, + 24, 536, 280, 792, 152, 664, 408, 920, 88, 600, 344, 856, 216, 728, 472, 984, 56, 568, 312, 824, 184, 696, 440, 952, 120, 632, 376, 888, 248, 760, 504, 1016, + 4, 516, 260, 772, 132, 644, 388, 900, 68, 580, 324, 836, 196, 708, 452, 964, 36, 548, 292, 804, 164, 676, 420, 932, 100, 612, 356, 868, 228, 740, 484, 996, + 20, 532, 276, 788, 148, 660, 404, 916, 84, 596, 340, 852, 212, 724, 468, 980, 52, 564, 308, 820, 180, 692, 436, 948, 116, 628, 372, 884, 244, 756, 500, 1012, + 12, 524, 268, 780, 140, 652, 396, 908, 76, 588, 332, 844, 204, 716, 460, 972, 44, 556, 300, 812, 172, 684, 428, 940, 108, 620, 364, 876, 236, 748, 492, 1004, + 28, 540, 284, 796, 156, 668, 412, 924, 92, 604, 348, 860, 220, 732, 476, 988, 60, 572, 316, 828, 188, 700, 444, 956, 124, 636, 380, 892, 252, 764, 508, 1020, + 2, 514, 258, 770, 130, 642, 386, 898, 66, 578, 322, 834, 194, 706, 450, 962, 34, 546, 290, 802, 162, 674, 418, 930, 98, 610, 354, 866, 226, 738, 482, 994, + 18, 530, 274, 786, 146, 658, 402, 914, 82, 594, 338, 850, 210, 722, 466, 978, 50, 562, 306, 818, 178, 690, 434, 946, 114, 626, 370, 882, 242, 754, 498, 1010, + 10, 522, 266, 778, 138, 650, 394, 906, 74, 586, 330, 842, 202, 714, 458, 970, 42, 554, 298, 810, 170, 682, 426, 938, 106, 618, 362, 874, 234, 746, 490, 1002, + 26, 538, 282, 794, 154, 666, 410, 922, 90, 602, 346, 858, 218, 730, 474, 986, 58, 570, 314, 826, 186, 698, 442, 954, 122, 634, 378, 890, 250, 762, 506, 1018, + 6, 518, 262, 774, 134, 646, 390, 902, 70, 582, 326, 838, 198, 710, 454, 966, 38, 550, 294, 806, 166, 678, 422, 934, 102, 614, 358, 870, 230, 742, 486, 998, + 22, 534, 278, 790, 150, 662, 406, 918, 86, 598, 342, 854, 214, 726, 470, 982, 54, 566, 310, 822, 182, 694, 438, 950, 118, 630, 374, 886, 246, 758, 502, 1014, + 14, 526, 270, 782, 142, 654, 398, 910, 78, 590, 334, 846, 206, 718, 462, 974, 46, 558, 302, 814, 174, 686, 430, 942, 110, 622, 366, 878, 238, 750, 494, 1006, + 30, 542, 286, 798, 158, 670, 414, 926, 94, 606, 350, 862, 222, 734, 478, 990, 62, 574, 318, 830, 190, 702, 446, 958, 126, 638, 382, 894, 254, 766, 510, 1022, + 1, 513, 257, 769, 129, 641, 385, 897, 65, 577, 321, 833, 193, 705, 449, 961, 33, 545, 289, 801, 161, 673, 417, 929, 97, 609, 353, 865, 225, 737, 481, 993, + 17, 529, 273, 785, 145, 657, 401, 913, 81, 593, 337, 849, 209, 721, 465, 977, 49, 561, 305, 817, 177, 689, 433, 945, 113, 625, 369, 881, 241, 753, 497, 1009, + 9, 521, 265, 777, 137, 649, 393, 905, 73, 585, 329, 841, 201, 713, 457, 969, 41, 553, 297, 809, 169, 681, 425, 937, 105, 617, 361, 873, 233, 745, 489, 1001, + 25, 537, 281, 793, 153, 665, 409, 921, 89, 601, 345, 857, 217, 729, 473, 985, 57, 569, 313, 825, 185, 697, 441, 953, 121, 633, 377, 889, 249, 761, 505, 1017, + 5, 517, 261, 773, 133, 645, 389, 901, 69, 581, 325, 837, 197, 709, 453, 965, 37, 549, 293, 805, 165, 677, 421, 933, 101, 613, 357, 869, 229, 741, 485, 997, + 21, 533, 277, 789, 149, 661, 405, 917, 85, 597, 341, 853, 213, 725, 469, 981, 53, 565, 309, 821, 181, 693, 437, 949, 117, 629, 373, 885, 245, 757, 501, 1013, + 13, 525, 269, 781, 141, 653, 397, 909, 77, 589, 333, 845, 205, 717, 461, 973, 45, 557, 301, 813, 173, 685, 429, 941, 109, 621, 365, 877, 237, 749, 493, 1005, + 29, 541, 285, 797, 157, 669, 413, 925, 93, 605, 349, 861, 221, 733, 477, 989, 61, 573, 317, 829, 189, 701, 445, 957, 125, 637, 381, 893, 253, 765, 509, 1021, + 3, 515, 259, 771, 131, 643, 387, 899, 67, 579, 323, 835, 195, 707, 451, 963, 35, 547, 291, 803, 163, 675, 419, 931, 99, 611, 355, 867, 227, 739, 483, 995, + 19, 531, 275, 787, 147, 659, 403, 915, 83, 595, 339, 851, 211, 723, 467, 979, 51, 563, 307, 819, 179, 691, 435, 947, 115, 627, 371, 883, 243, 755, 499, 1011, + 11, 523, 267, 779, 139, 651, 395, 907, 75, 587, 331, 843, 203, 715, 459, 971, 43, 555, 299, 811, 171, 683, 427, 939, 107, 619, 363, 875, 235, 747, 491, 1003, + 27, 539, 283, 795, 155, 667, 411, 923, 91, 603, 347, 859, 219, 731, 475, 987, 59, 571, 315, 827, 187, 699, 443, 955, 123, 635, 379, 891, 251, 763, 507, 1019, + 7, 519, 263, 775, 135, 647, 391, 903, 71, 583, 327, 839, 199, 711, 455, 967, 39, 551, 295, 807, 167, 679, 423, 935, 103, 615, 359, 871, 231, 743, 487, 999, + 23, 535, 279, 791, 151, 663, 407, 919, 87, 599, 343, 855, 215, 727, 471, 983, 55, 567, 311, 823, 183, 695, 439, 951, 119, 631, 375, 887, 247, 759, 503, 1015, + 15, 527, 271, 783, 143, 655, 399, 911, 79, 591, 335, 847, 207, 719, 463, 975, 47, 559, 303, 815, 175, 687, 431, 943, 111, 623, 367, 879, 239, 751, 495, 1007, + 31, 543, 287, 799, 159, 671, 415, 927, 95, 607, 351, 863, 223, 735, 479, 991, 63, 575, 319, 831, 191, 703, 447, 959, 127, 639, 383, 895, 255, 767, 511, 1023}; + +uint16_t oqs_kex_rlwe_newhope_omegas_montgomery[PARAM_N / 2] = {4075, 6974, 7373, 7965, 3262, 5079, 522, 2169, 6364, 1018, 1041, 8775, 2344, 11011, 5574, 1973, 4536, 1050, 6844, 3860, 3818, 6118, 2683, 1190, 4789, 7822, 7540, 6752, 5456, 4449, 3789, 12142, 11973, 382, 3988, 468, 6843, 5339, 6196, 3710, 11316, 1254, 5435, 10930, 3998, 10256, 10367, 3879, 11889, 1728, 6137, 4948, 5862, 6136, 3643, 6874, 8724, 654, 10302, 1702, 7083, 6760, 56, 3199, 9987, 605, 11785, 8076, 5594, 9260, 6403, 4782, 6212, 4624, 9026, 8689, 4080, 11868, 6221, 3602, 975, 8077, 8851, 9445, 5681, 3477, 1105, 142, 241, 12231, 1003, 3532, 5009, 1956, 6008, 11404, 7377, 2049, 10968, 12097, 7591, 5057, 3445, 4780, 2920, 7048, 3127, 8120, 11279, 6821, 11502, 8807, 12138, 2127, 2839, 3957, 431, 1579, 6383, 9784, 5874, 677, 3336, 6234, 2766, 1323, 9115, 12237, 2031, 6956, 6413, 2281, 3969, 3991, 12133, 9522, 4737, 10996, 4774, 5429, 11871, 3772, 453, 5908, 2882, 1805, 2051, 1954, 11713, 3963, 2447, 6142, 8174, 3030, 1843, 2361, 12071, 2908, 3529, 3434, 3202, 7796, 2057, 5369, 11939, 1512, 6906, 10474, 11026, 49, 10806, 5915, 1489, 9789, 5942, 10706, 10431, 7535, 426, 8974, 3757, 10314, 9364, 347, 5868, 9551, 9634, 6554, 10596, 9280, 11566, 174, 2948, 2503, 6507, 10723, 11606, 2459, 64, 3656, 8455, 5257, 5919, 7856, 1747, 9166, 5486, 9235, 6065, 835, 3570, 4240, 11580, 4046, 10970, 9139, 1058, 8210, 11848, 922, 7967, 1958, 10211, 1112, 3728, 4049, 11130, 5990, 1404, 325, 948, 11143, 6190, 295, 11637, 5766, 8212, 8273, 2919, 8527, 6119, 6992, 8333, 1360, 2555, 6167, 1200, 7105, 7991, 3329, 9597, 12121, 5106, 5961, 10695, 10327, 3051, 9923, 4896, 9326, 81, 3091, 1000, 7969, 4611, 726, 1853, 12149, 4255, 11112, 2768, 10654, 1062, 2294, 3553, 4805, 2747, 4846, 8577, 9154, 1170, 2319, 790, 11334, 9275, 9088, 1326, 5086, 9094, 6429, 11077, 10643, 3504, 3542, 8668, 9744, 1479, 1, 8246, 7143, 11567, 10984, 4134, 5736, 4978, 10938, 5777, 8961, 4591, 5728, 6461, 5023, 9650, 7468, 949, 9664, 2975, 11726, 2744, 9283, 10092, 5067, 12171, 2476, 3748, 11336, 6522, 827, 9452, 5374, 12159, 7935, 3296, 3949, 9893, 4452, 10908, 2525, 3584, 8112, 8011, 10616, 4989, 6958, 11809, 9447, 12280, 1022, 11950, 9821, 11745, 5791, 5092, 2089, 9005, 2881, 3289, 2013, 9048, 729, 7901, 1260, 5755, 4632, 11955, 2426, 10593, 1428, 4890, 5911, 3932, 9558, 8830, 3637, 5542, 145, 5179, 8595, 3707, 10530, 355, 3382, 4231, 9741, 1207, 9041, 7012, 1168, 10146, 11224, 4645, 11885, 10911, 10377, 435, 7952, 4096, 493, 9908, 6845, 6039, 2422, 2187, 9723, 8643, 9852, 9302, 6022, 7278, 1002, 4284, 5088, 1607, 7313, 875, 8509, 9430, 1045, 2481, 5012, 7428, 354, 6591, 9377, 11847, 2401, 1067, 7188, 11516, 390, 8511, 8456, 7270, 545, 8585, 9611, 12047, 1537, 4143, 4714, 4885, 1017, 5084, 1632, 3066, 27, 1440, 8526, 9273, 12046, 11618, 9289, 3400, 9890, 3136, 7098, 8758, 11813, 7384, 3985, 11869, 6730, 10745, 10111, 2249, 4048, 2884, 11136, 2126, 1630, 9103, 5407, 2686, 9042, 2969, 8311, 9424, 9919, 8779, 5332, 10626, 1777, 4654, 10863, 7351, 3636, 9585, 5291, 8374, 2166, 4919, 12176, 9140, 12129, 7852, 12286, 4895, 10805, 2780, 5195, 2305, 7247, 9644, 4053, 10600, 3364, 3271, 4057, 4414, 9442, 7917, 2174}; + +uint16_t oqs_kex_rlwe_newhope_omegas_inv_montgomery[PARAM_N / 2] = {4075, 5315, 4324, 4916, 10120, 11767, 7210, 9027, 10316, 6715, 1278, 9945, 3514, 11248, 11271, 5925, 147, 8500, 7840, 6833, 5537, 4749, 4467, 7500, 11099, 9606, 6171, 8471, 8429, 5445, 11239, 7753, 9090, 12233, 5529, 5206, 10587, 1987, 11635, 3565, 5415, 8646, 6153, 6427, 7341, 6152, 10561, 400, 8410, 1922, 2033, 8291, 1359, 6854, 11035, 973, 8579, 6093, 6950, 5446, 11821, 8301, 11907, 316, 52, 3174, 10966, 9523, 6055, 8953, 11612, 6415, 2505, 5906, 10710, 11858, 8332, 9450, 10162, 151, 3482, 787, 5468, 1010, 4169, 9162, 5241, 9369, 7509, 8844, 7232, 4698, 192, 1321, 10240, 4912, 885, 6281, 10333, 7280, 8757, 11286, 58, 12048, 12147, 11184, 8812, 6608, 2844, 3438, 4212, 11314, 8687, 6068, 421, 8209, 3600, 3263, 7665, 6077, 7507, 5886, 3029, 6695, 4213, 504, 11684, 2302, 1962, 1594, 6328, 7183, 168, 2692, 8960, 4298, 5184, 11089, 6122, 9734, 10929, 3956, 5297, 6170, 3762, 9370, 4016, 4077, 6523, 652, 11994, 6099, 1146, 11341, 11964, 10885, 6299, 1159, 8240, 8561, 11177, 2078, 10331, 4322, 11367, 441, 4079, 11231, 3150, 1319, 8243, 709, 8049, 8719, 11454, 6224, 3054, 6803, 3123, 10542, 4433, 6370, 7032, 3834, 8633, 12225, 9830, 683, 1566, 5782, 9786, 9341, 12115, 723, 3009, 1693, 5735, 2655, 2738, 6421, 11942, 2925, 1975, 8532, 3315, 11863, 4754, 1858, 1583, 6347, 2500, 10800, 6374, 1483, 12240, 1263, 1815, 5383, 10777, 350, 6920, 10232, 4493, 9087, 8855, 8760, 9381, 218, 9928, 10446, 9259, 4115, 6147, 9842, 8326, 576, 10335, 10238, 10484, 9407, 6381, 11836, 8517, 418, 6860, 7515, 1293, 7552, 2767, 156, 8298, 8320, 10008, 5876, 5333, 10258, 10115, 4372, 2847, 7875, 8232, 9018, 8925, 1689, 8236, 2645, 5042, 9984, 7094, 9509, 1484, 7394, 3, 4437, 160, 3149, 113, 7370, 10123, 3915, 6998, 2704, 8653, 4938, 1426, 7635, 10512, 1663, 6957, 3510, 2370, 2865, 3978, 9320, 3247, 9603, 6882, 3186, 10659, 10163, 1153, 9405, 8241, 10040, 2178, 1544, 5559, 420, 8304, 4905, 476, 3531, 5191, 9153, 2399, 8889, 3000, 671, 243, 3016, 3763, 10849, 12262, 9223, 10657, 7205, 11272, 7404, 7575, 8146, 10752, 242, 2678, 3704, 11744, 5019, 3833, 3778, 11899, 773, 5101, 11222, 9888, 442, 2912, 5698, 11935, 4861, 7277, 9808, 11244, 2859, 3780, 11414, 4976, 10682, 7201, 8005, 11287, 5011, 6267, 2987, 2437, 3646, 2566, 10102, 9867, 6250, 5444, 2381, 11796, 8193, 4337, 11854, 1912, 1378, 404, 7644, 1065, 2143, 11121, 5277, 3248, 11082, 2548, 8058, 8907, 11934, 1759, 8582, 3694, 7110, 12144, 6747, 8652, 3459, 2731, 8357, 6378, 7399, 10861, 1696, 9863, 334, 7657, 6534, 11029, 4388, 11560, 3241, 10276, 9000, 9408, 3284, 10200, 7197, 6498, 544, 2468, 339, 11267, 9, 2842, 480, 5331, 7300, 1673, 4278, 4177, 8705, 9764, 1381, 7837, 2396, 8340, 8993, 4354, 130, 6915, 2837, 11462, 5767, 953, 8541, 9813, 118, 7222, 2197, 3006, 9545, 563, 9314, 2625, 11340, 4821, 2639, 7266, 5828, 6561, 7698, 3328, 6512, 1351, 7311, 6553, 8155, 1305, 722, 5146, 4043, 12288, 10810, 2545, 3621, 8747, 8785, 1646, 1212, 5860, 3195, 7203, 10963, 3201, 3014, 955, 11499, 9970, 11119, 3135, 3712, 7443, 9542, 7484, 8736, 9995, 11227, 1635, 9521, 1177, 8034, 140, 10436, 11563, 7678, 4320, 11289, 9198, 12208, 2963, 7393, 2366, 9238}; + +uint16_t oqs_kex_rlwe_newhope_psis_bitrev_montgomery[PARAM_N] = {4075, 6974, 7373, 7965, 3262, 5079, 522, 2169, 6364, 1018, 1041, 8775, 2344, 11011, 5574, 1973, 4536, 1050, 6844, 3860, 3818, 6118, 2683, 1190, 4789, 7822, 7540, 6752, 5456, 4449, 3789, 12142, 11973, 382, 3988, 468, 6843, 5339, 6196, 3710, 11316, 1254, 5435, 10930, 3998, 10256, 10367, 3879, 11889, 1728, 6137, 4948, 5862, 6136, 3643, 6874, 8724, 654, 10302, 1702, 7083, 6760, 56, 3199, 9987, 605, 11785, 8076, 5594, 9260, 6403, 4782, 6212, 4624, 9026, 8689, 4080, 11868, 6221, 3602, 975, 8077, 8851, 9445, 5681, 3477, 1105, 142, 241, 12231, 1003, 3532, 5009, 1956, 6008, 11404, 7377, 2049, 10968, 12097, 7591, 5057, 3445, 4780, 2920, 7048, 3127, 8120, 11279, 6821, 11502, 8807, 12138, 2127, 2839, 3957, 431, 1579, 6383, 9784, 5874, 677, 3336, 6234, 2766, 1323, 9115, 12237, 2031, 6956, 6413, 2281, 3969, 3991, 12133, 9522, 4737, 10996, 4774, 5429, 11871, 3772, 453, 5908, 2882, 1805, 2051, 1954, 11713, 3963, 2447, 6142, 8174, 3030, 1843, 2361, 12071, 2908, 3529, 3434, 3202, 7796, 2057, 5369, 11939, 1512, 6906, 10474, 11026, 49, 10806, 5915, 1489, 9789, 5942, 10706, 10431, 7535, 426, 8974, 3757, 10314, 9364, 347, 5868, 9551, 9634, 6554, 10596, 9280, 11566, 174, 2948, 2503, 6507, 10723, 11606, 2459, 64, 3656, 8455, 5257, 5919, 7856, 1747, 9166, 5486, 9235, 6065, 835, 3570, 4240, 11580, 4046, 10970, 9139, 1058, 8210, 11848, 922, 7967, 1958, 10211, 1112, 3728, 4049, 11130, 5990, 1404, 325, 948, 11143, 6190, 295, 11637, 5766, 8212, 8273, 2919, 8527, 6119, 6992, 8333, 1360, 2555, 6167, 1200, 7105, 7991, 3329, 9597, 12121, 5106, 5961, 10695, 10327, 3051, 9923, 4896, 9326, 81, 3091, 1000, 7969, 4611, 726, 1853, 12149, 4255, 11112, 2768, 10654, 1062, 2294, 3553, 4805, 2747, 4846, 8577, 9154, 1170, 2319, 790, 11334, 9275, 9088, 1326, 5086, 9094, 6429, 11077, 10643, 3504, 3542, 8668, 9744, 1479, 1, 8246, 7143, 11567, 10984, 4134, 5736, 4978, 10938, 5777, 8961, 4591, 5728, 6461, 5023, 9650, 7468, 949, 9664, 2975, 11726, 2744, 9283, 10092, 5067, 12171, 2476, 3748, 11336, 6522, 827, 9452, 5374, 12159, 7935, 3296, 3949, 9893, 4452, 10908, 2525, 3584, 8112, 8011, 10616, 4989, 6958, 11809, 9447, 12280, 1022, 11950, 9821, 11745, 5791, 5092, 2089, 9005, 2881, 3289, 2013, 9048, 729, 7901, 1260, 5755, 4632, 11955, 2426, 10593, 1428, 4890, 5911, 3932, 9558, 8830, 3637, 5542, 145, 5179, 8595, 3707, 10530, 355, 3382, 4231, 9741, 1207, 9041, 7012, 1168, 10146, 11224, 4645, 11885, 10911, 10377, 435, 7952, 4096, 493, 9908, 6845, 6039, 2422, 2187, 9723, 8643, 9852, 9302, 6022, 7278, 1002, 4284, 5088, 1607, 7313, 875, 8509, 9430, 1045, 2481, 5012, 7428, 354, 6591, 9377, 11847, 2401, 1067, 7188, 11516, 390, 8511, 8456, 7270, 545, 8585, 9611, 12047, 1537, 4143, 4714, 4885, 1017, 5084, 1632, 3066, 27, 1440, 8526, 9273, 12046, 11618, 9289, 3400, 9890, 3136, 7098, 8758, 11813, 7384, 3985, 11869, 6730, 10745, 10111, 2249, 4048, 2884, 11136, 2126, 1630, 9103, 5407, 2686, 9042, 2969, 8311, 9424, 9919, 8779, 5332, 10626, 1777, 4654, 10863, 7351, 3636, 9585, 5291, 8374, 2166, 4919, 12176, 9140, 12129, 7852, 12286, 4895, 10805, 2780, 5195, 2305, 7247, 9644, 4053, 10600, 3364, 3271, 4057, 4414, 9442, 7917, 2174, 3947, 11951, 2455, 6599, 10545, 10975, 3654, 2894, 7681, 7126, 7287, 12269, 4119, 3343, 2151, 1522, 7174, 7350, 11041, 2442, 2148, 5959, 6492, 8330, 8945, 5598, 3624, 10397, 1325, 6565, 1945, 11260, 10077, 2674, 3338, 3276, 11034, 506, 6505, 1392, 5478, 8778, 1178, 2776, 3408, 10347, 11124, 2575, 9489, 12096, 6092, 10058, 4167, 6085, 923, 11251, 11912, 4578, 10669, 11914, 425, 10453, 392, 10104, 8464, 4235, 8761, 7376, 2291, 3375, 7954, 8896, 6617, 7790, 1737, 11667, 3982, 9342, 6680, 636, 6825, 7383, 512, 4670, 2900, 12050, 7735, 994, 1687, 11883, 7021, 146, 10485, 1403, 5189, 6094, 2483, 2054, 3042, 10945, 3981, 10821, 11826, 8882, 8151, 180, 9600, 7684, 5219, 10880, 6780, 204, 11232, 2600, 7584, 3121, 3017, 11053, 7814, 7043, 4251, 4739, 11063, 6771, 7073, 9261, 2360, 11925, 1928, 11825, 8024, 3678, 3205, 3359, 11197, 5209, 8581, 3238, 8840, 1136, 9363, 1826, 3171, 4489, 7885, 346, 2068, 1389, 8257, 3163, 4840, 6127, 8062, 8921, 612, 4238, 10763, 8067, 125, 11749, 10125, 5416, 2110, 716, 9839, 10584, 11475, 11873, 3448, 343, 1908, 4538, 10423, 7078, 4727, 1208, 11572, 3589, 2982, 1373, 1721, 10753, 4103, 2429, 4209, 5412, 5993, 9011, 438, 3515, 7228, 1218, 8347, 5232, 8682, 1327, 7508, 4924, 448, 1014, 10029, 12221, 4566, 5836, 12229, 2717, 1535, 3200, 5588, 5845, 412, 5102, 7326, 3744, 3056, 2528, 7406, 8314, 9202, 6454, 6613, 1417, 10032, 7784, 1518, 3765, 4176, 5063, 9828, 2275, 6636, 4267, 6463, 2065, 7725, 3495, 8328, 8755, 8144, 10533, 5966, 12077, 9175, 9520, 5596, 6302, 8400, 579, 6781, 11014, 5734, 11113, 11164, 4860, 1131, 10844, 9068, 8016, 9694, 3837, 567, 9348, 7000, 6627, 7699, 5082, 682, 11309, 5207, 4050, 7087, 844, 7434, 3769, 293, 9057, 6940, 9344, 10883, 2633, 8190, 3944, 5530, 5604, 3480, 2171, 9282, 11024, 2213, 8136, 3805, 767, 12239, 216, 11520, 6763, 10353, 7, 8566, 845, 7235, 3154, 4360, 3285, 10268, 2832, 3572, 1282, 7559, 3229, 8360, 10583, 6105, 3120, 6643, 6203, 8536, 8348, 6919, 3536, 9199, 10891, 11463, 5043, 1658, 5618, 8787, 5789, 4719, 751, 11379, 6389, 10783, 3065, 7806, 6586, 2622, 5386, 510, 7628, 6921, 578, 10345, 11839, 8929, 4684, 12226, 7154, 9916, 7302, 8481, 3670, 11066, 2334, 1590, 7878, 10734, 1802, 1891, 5103, 6151, 8820, 3418, 7846, 9951, 4693, 417, 9996, 9652, 4510, 2946, 5461, 365, 881, 1927, 1015, 11675, 11009, 1371, 12265, 2485, 11385, 5039, 6742, 8449, 1842, 12217, 8176, 9577, 4834, 7937, 9461, 2643, 11194, 3045, 6508, 4094, 3451, 7911, 11048, 5406, 4665, 3020, 6616, 11345, 7519, 3669, 5287, 1790, 7014, 5410, 11038, 11249, 2035, 6125, 10407, 4565, 7315, 5078, 10506, 2840, 2478, 9270, 4194, 9195, 4518, 7469, 1160, 6878, 2730, 10421, 10036, 1734, 3815, 10939, 5832, 10595, 10759, 4423, 8420, 9617, 7119, 11010, 11424, 9173, 189, 10080, 10526, 3466, 10588, 7592, 3578, 11511, 7785, 9663, 530, 12150, 8957, 2532, 3317, 9349, 10243, 1481, 9332, 3454, 3758, 7899, 4218, 2593, 11410, 2276, 982, 6513, 1849, 8494, 9021, 4523, 7988, 8, 457, 648, 150, 8000, 2307, 2301, 874, 5650, 170, 9462, 2873, 9855, 11498, 2535, 11169, 5808, 12268, 9687, 1901, 7171, 11787, 3846, 1573, 6063, 3793, 466, 11259, 10608, 3821, 6320, 4649, 6263, 2929}; + +uint16_t oqs_kex_rlwe_newhope_psis_inv_montgomery[PARAM_N] = {256, 10570, 1510, 7238, 1034, 7170, 6291, 7921, 11665, 3422, 4000, 2327, 2088, 5565, 795, 10647, 1521, 5484, 2539, 7385, 1055, 7173, 8047, 11683, 1669, 1994, 3796, 5809, 4341, 9398, 11876, 12230, 10525, 12037, 12253, 3506, 4012, 9351, 4847, 2448, 7372, 9831, 3160, 2207, 5582, 2553, 7387, 6322, 9681, 1383, 10731, 1533, 219, 5298, 4268, 7632, 6357, 9686, 8406, 4712, 9451, 10128, 4958, 5975, 11387, 8649, 11769, 6948, 11526, 12180, 1740, 10782, 6807, 2728, 7412, 4570, 4164, 4106, 11120, 12122, 8754, 11784, 3439, 5758, 11356, 6889, 9762, 11928, 1704, 1999, 10819, 12079, 12259, 7018, 11536, 1648, 1991, 2040, 2047, 2048, 10826, 12080, 8748, 8272, 8204, 1172, 1923, 7297, 2798, 7422, 6327, 4415, 7653, 6360, 11442, 12168, 7005, 8023, 9924, 8440, 8228, 2931, 7441, 1063, 3663, 5790, 9605, 10150, 1450, 8985, 11817, 10466, 10273, 12001, 3470, 7518, 1074, 1909, 7295, 9820, 4914, 702, 5367, 7789, 8135, 9940, 1420, 3714, 11064, 12114, 12264, 1752, 5517, 9566, 11900, 1700, 3754, 5803, 829, 1874, 7290, 2797, 10933, 5073, 7747, 8129, 6428, 6185, 11417, 1631, 233, 5300, 9535, 10140, 11982, 8734, 8270, 2937, 10953, 8587, 8249, 2934, 9197, 4825, 5956, 4362, 9401, 1343, 3703, 529, 10609, 12049, 6988, 6265, 895, 3639, 4031, 4087, 4095, 585, 10617, 8539, 4731, 4187, 9376, 3095, 9220, 10095, 10220, 1460, 10742, 12068, 1724, 5513, 11321, 6884, 2739, 5658, 6075, 4379, 11159, 10372, 8504, 4726, 9453, 3106, 7466, 11600, 10435, 8513, 9994, 8450, 9985, 3182, 10988, 8592, 2983, 9204, 4826, 2445, 5616, 6069, 867, 3635, 5786, 11360, 5134, 2489, 10889, 12089, 1727, 7269, 2794, 9177, 1311, 5454, 9557, 6632, 2703, 9164, 10087, 1441, 3717, 531, 3587, 2268, 324, 5313, 759, 1864, 5533, 2546, 7386, 9833, 8427, 4715, 11207, 1601, 7251, 4547, 11183, 12131, 1733, 10781, 10318, 1474, 10744, 5046, 4232, 11138, 10369, 6748, 964, 7160, 4534, 7670, 8118, 8182, 4680, 11202, 6867, 981, 8918, 1274, 182, 26, 7026, 8026, 11680, 12202, 10521, 1503, 7237, 4545, 5916, 9623, 8397, 11733, 10454, 3249, 9242, 6587, 941, 1890, 270, 10572, 6777, 9746, 6659, 6218, 6155, 6146, 878, 1881, 7291, 11575, 12187, 1741, 7271, 8061, 11685, 6936, 4502, 9421, 4857, 4205, 7623, 1089, 10689, 1527, 8996, 10063, 11971, 10488, 6765, 2722, 3900, 9335, 11867, 6962, 11528, 5158, 4248, 4118, 5855, 2592, 5637, 6072, 2623, 7397, 8079, 9932, 4930, 5971, 853, 3633, 519, 8852, 11798, 3441, 11025, 1575, 225, 8810, 11792, 12218, 3501, 9278, 3081, 9218, 4828, 7712, 8124, 11694, 12204, 3499, 4011, 573, 3593, 5780, 7848, 9899, 10192, 1456, 208, 7052, 2763, 7417, 11593, 10434, 12024, 8740, 11782, 10461, 3250, 5731, 7841, 9898, 1414, 202, 3540, 7528, 2831, 2160, 10842, 5060, 4234, 4116, 588, 84, 12, 7024, 2759, 9172, 6577, 11473, 1639, 9012, 3043, 7457, 6332, 11438, 1634, 1989, 9062, 11828, 8712, 11778, 12216, 10523, 6770, 9745, 10170, 4964, 9487, 6622, 946, 8913, 6540, 6201, 4397, 9406, 8366, 9973, 8447, 8229, 11709, 8695, 10020, 3187, 5722, 2573, 10901, 6824, 4486, 4152, 9371, 8361, 2950, 2177, 311, 1800, 9035, 8313, 11721, 3430, 490, 70, 10, 1757, 251, 3547, 7529, 11609, 3414, 7510, 4584, 4166, 9373, 1339, 5458, 7802, 11648, 1664, 7260, 9815, 10180, 6721, 9738, 10169, 8475, 8233, 9954, 1422, 8981, 1283, 5450, 11312, 1616, 3742, 11068, 10359, 4991, 713, 3613, 9294, 8350, 4704, 672, 96, 7036, 9783, 11931, 3460, 5761, 823, 10651, 12055, 10500, 1500, 5481, 783, 3623, 11051, 8601, 8251, 8201, 11705, 10450, 5004, 4226, 7626, 2845, 2162, 3820, 7568, 9859, 3164, 452, 10598, 1514, 5483, 6050, 6131, 4387, 7649, 8115, 6426, 918, 8909, 8295, 1185, 5436, 11310, 8638, 1234, 5443, 11311, 5127, 2488, 2111, 10835, 5059, 7745, 2862, 3920, 560, 80, 1767, 2008, 3798, 11076, 6849, 2734, 10924, 12094, 8750, 1250, 10712, 6797, 971, 7161, 1023, 8924, 4786, 7706, 4612, 4170, 7618, 6355, 4419, 5898, 11376, 10403, 10264, 6733, 4473, 639, 5358, 2521, 9138, 3061, 5704, 4326, 618, 5355, 765, 5376, 768, 7132, 4530, 9425, 3102, 9221, 6584, 11474, 10417, 10266, 12000, 6981, 6264, 4406, 2385, 7363, 4563, 4163, 7617, 9866, 3165, 9230, 11852, 10471, 5007, 5982, 11388, 5138, 734, 3616, 11050, 12112, 6997, 11533, 12181, 10518, 12036, 3475, 2252, 7344, 9827, 4915, 9480, 6621, 4457, 7659, 9872, 6677, 4465, 4149, 7615, 4599, 657, 3605, 515, 10607, 6782, 4480, 640, 1847, 3775, 5806, 2585, 5636, 9583, 1369, 10729, 8555, 10000, 11962, 5220, 7768, 8132, 8184, 9947, 1421, 203, 29, 8782, 11788, 1684, 10774, 10317, 4985, 9490, 8378, 4708, 11206, 5112, 5997, 7879, 11659, 12199, 8765, 10030, 4944, 5973, 6120, 6141, 6144, 7900, 11662, 1666, 238, 34, 3516, 5769, 9602, 8394, 9977, 6692, 956, 10670, 6791, 9748, 11926, 8726, 11780, 5194, 742, 106, 8793, 10034, 3189, 10989, 5081, 4237, 5872, 4350, 2377, 10873, 6820, 6241, 11425, 10410, 10265, 3222, 5727, 9596, 4882, 2453, 2106, 3812, 11078, 12116, 5242, 4260, 11142, 8614, 11764, 12214, 5256, 4262, 4120, 11122, 5100, 11262, 5120, 2487, 5622, 9581, 8391, 8221, 2930, 10952, 12098, 6995, 6266, 9673, 4893, 699, 3611, 4027, 5842, 11368, 1624, 232, 8811, 8281, 1183, 169, 8802, 3013, 2186, 5579, 797, 3625, 4029, 11109, 1587, 7249, 11569, 8675, 6506, 2685, 10917, 12093, 12261, 12285, 1755, 7273, 1039, 1904, 272, 3550, 9285, 3082, 5707, 6082, 4380, 7648, 11626, 5172, 4250, 9385, 8363, 8217, 4685, 5936, 848, 8899, 6538, 934, 1889, 3781, 9318, 10109, 10222, 6727, 961, 5404, 772, 5377, 9546, 8386, 1198, 8949, 3034, 2189, 7335, 4559, 5918, 2601, 10905, 5069, 9502, 3113, 7467, 8089, 11689, 5181, 9518, 8382, 2953, 3933, 4073, 4093, 7607, 8109, 2914, 5683, 4323, 11151, 1593, 10761, 6804, 972, 3650, 2277, 5592, 4310, 7638, 9869, 4921, 703, 1856, 9043, 4803, 9464, 1352, 8971, 11815, 5199, 7765, 6376, 4422, 7654, 2849, 407, 8836, 6529, 7955, 2892, 9191, 1313, 10721, 12065, 12257, 1751, 9028, 8312, 2943, 2176, 3822, 546, 78, 8789, 11789, 10462, 12028, 6985, 4509, 9422, 1346, 5459, 4291, 613, 10621, 6784, 9747, 3148, 7472, 2823, 5670, 810, 7138, 8042, 4660, 7688, 6365, 6176, 6149, 2634, 5643, 9584, 10147, 11983, 5223, 9524, 11894, 10477, 8519, 1217, 3685, 2282, 326, 10580, 3267, 7489, 4581, 2410, 5611, 11335, 6886, 8006, 8166, 11700, 3427, 11023, 8597, 10006, 3185, 455, 65, 5276, 7776, 4622, 5927, 7869, 9902, 11948, 5218, 2501, 5624, 2559, 10899, 1557, 1978, 10816, 10323, 8497, 4725, 675, 1852, 10798, 12076, 10503, 3256, 9243, 3076, 2195, 10847, 12083, 10504, 12034, 10497}; diff --git a/crypt/liboqs/kex_sidh_cln16/AMD64/fp_x64.c b/crypt/liboqs/kex_sidh_cln16/AMD64/fp_x64.c new file mode 100644 index 0000000000000000000000000000000000000000..60dede4391839bed4b9f8bd1947550941c3afae0 --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/AMD64/fp_x64.c @@ -0,0 +1,857 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral +* Diffie-Hellman key exchange. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: modular arithmetic optimized for x64 platforms +* +*********************************************************************************************/ + +#include "../SIDH_internal.h" + +// Global constants +extern const uint64_t p751[NWORDS_FIELD]; +extern const uint64_t p751p1[NWORDS_FIELD]; +extern const uint64_t p751x2[NWORDS_FIELD]; + +// Modular addition, c = a+b mod p751. +// Inputs: a, b in [0, 2*p751-1] +// Output: c in [0, 2*p751-1] +__inline void oqs_sidh_cln16_fpadd751(const digit_t *a, const digit_t *b, digit_t *c) { + +#if (OS_TARGET == OS_WIN) + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t *) p751x2)[i], carry, c[i]); + } + mask = 0 - (digit_t) carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t *) p751x2)[i] & mask, carry, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + oqs_sidh_cln16_fpadd751_asm(a, b, c); + +#endif +} + +// Modular subtraction, c = a-b mod p751. +// Inputs: a, b in [0, 2*p751-1] +// Output: c in [0, 2*p751-1] +__inline void oqs_sidh_cln16_fpsub751(const digit_t *a, const digit_t *b, digit_t *c) { + +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t) borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t *) p751x2)[i] & mask, borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + oqs_sidh_cln16_fpsub751_asm(a, b, c); + +#endif +} + +// Modular negation, a = -a mod p751. +// Input/output: a in [0, 2*p751-1] +__inline void oqs_sidh_cln16_fpneg751(digit_t *a) { + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t *) p751x2)[i], a[i], borrow, a[i]); + } +} + +// Modular division by two, c = a/2 mod p751. +// Input : a in [0, 2*p751-1] +// Output: c in [0, 2*p751-1] +void oqs_sidh_cln16_fpdiv2_751(const digit_t *a, digit_t *c) { + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p751 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t *) p751)[i] & mask, carry, c[i]); + } + + oqs_sidh_cln16_mp_shiftr1(c, NWORDS_FIELD); +} + +// Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1]. +void oqs_sidh_cln16_fpcorrection751(digit_t *a) { + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t *) p751)[i], borrow, a[i]); + } + mask = 0 - (digit_t) borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t *) p751)[i] & mask, borrow, a[i]); + } +} + +// Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords. +void oqs_sidh_cln16_mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { + + UNREFERENCED_PARAMETER(nwords); + +#if (OS_TARGET == OS_WIN) + digit_t t = 0; + uint128_t uv = {0}; + unsigned int carry = 0; + + MULADD128(a[0], b[0], uv, carry, uv); + t += carry; + c[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[0], uv, carry, uv); + t += carry; + c[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[0], uv, carry, uv); + t += carry; + c[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[0], uv, carry, uv); + t += carry; + c[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[0], uv, carry, uv); + t += carry; + c[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[1], uv, carry, uv); + t += carry; + MULADD128(a[3], b[2], uv, carry, uv); + t += carry; + MULADD128(a[2], b[3], uv, carry, uv); + t += carry; + MULADD128(a[1], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[0], uv, carry, uv); + t += carry; + c[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[6], uv, carry, uv); + t += carry; + MULADD128(a[5], b[1], uv, carry, uv); + t += carry; + MULADD128(a[4], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[4], uv, carry, uv); + t += carry; + MULADD128(a[1], b[5], uv, carry, uv); + t += carry; + MULADD128(a[6], b[0], uv, carry, uv); + t += carry; + c[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[7], uv, carry, uv); + t += carry; + MULADD128(a[6], b[1], uv, carry, uv); + t += carry; + MULADD128(a[5], b[2], uv, carry, uv); + t += carry; + MULADD128(a[4], b[3], uv, carry, uv); + t += carry; + MULADD128(a[3], b[4], uv, carry, uv); + t += carry; + MULADD128(a[2], b[5], uv, carry, uv); + t += carry; + MULADD128(a[1], b[6], uv, carry, uv); + t += carry; + MULADD128(a[7], b[0], uv, carry, uv); + t += carry; + c[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[8], uv, carry, uv); + t += carry; + MULADD128(a[7], b[1], uv, carry, uv); + t += carry; + MULADD128(a[6], b[2], uv, carry, uv); + t += carry; + MULADD128(a[5], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[5], uv, carry, uv); + t += carry; + MULADD128(a[2], b[6], uv, carry, uv); + t += carry; + MULADD128(a[1], b[7], uv, carry, uv); + t += carry; + MULADD128(a[8], b[0], uv, carry, uv); + t += carry; + c[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[9], uv, carry, uv); + t += carry; + MULADD128(a[8], b[1], uv, carry, uv); + t += carry; + MULADD128(a[7], b[2], uv, carry, uv); + t += carry; + MULADD128(a[6], b[3], uv, carry, uv); + t += carry; + MULADD128(a[5], b[4], uv, carry, uv); + t += carry; + MULADD128(a[4], b[5], uv, carry, uv); + t += carry; + MULADD128(a[3], b[6], uv, carry, uv); + t += carry; + MULADD128(a[2], b[7], uv, carry, uv); + t += carry; + MULADD128(a[1], b[8], uv, carry, uv); + t += carry; + MULADD128(a[9], b[0], uv, carry, uv); + t += carry; + c[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[10], uv, carry, uv); + t += carry; + MULADD128(a[9], b[1], uv, carry, uv); + t += carry; + MULADD128(a[8], b[2], uv, carry, uv); + t += carry; + MULADD128(a[7], b[3], uv, carry, uv); + t += carry; + MULADD128(a[6], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[6], uv, carry, uv); + t += carry; + MULADD128(a[3], b[7], uv, carry, uv); + t += carry; + MULADD128(a[2], b[8], uv, carry, uv); + t += carry; + MULADD128(a[1], b[9], uv, carry, uv); + t += carry; + MULADD128(a[10], b[0], uv, carry, uv); + t += carry; + c[10] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[11], uv, carry, uv); + t += carry; + MULADD128(a[10], b[1], uv, carry, uv); + t += carry; + MULADD128(a[9], b[2], uv, carry, uv); + t += carry; + MULADD128(a[8], b[3], uv, carry, uv); + t += carry; + MULADD128(a[7], b[4], uv, carry, uv); + t += carry; + MULADD128(a[6], b[5], uv, carry, uv); + t += carry; + MULADD128(a[5], b[6], uv, carry, uv); + t += carry; + MULADD128(a[4], b[7], uv, carry, uv); + t += carry; + MULADD128(a[3], b[8], uv, carry, uv); + t += carry; + MULADD128(a[2], b[9], uv, carry, uv); + t += carry; + MULADD128(a[1], b[10], uv, carry, uv); + t += carry; + MULADD128(a[11], b[0], uv, carry, uv); + t += carry; + c[11] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[1], b[11], uv, carry, uv); + t += carry; + MULADD128(a[10], b[2], uv, carry, uv); + t += carry; + MULADD128(a[9], b[3], uv, carry, uv); + t += carry; + MULADD128(a[8], b[4], uv, carry, uv); + t += carry; + MULADD128(a[7], b[5], uv, carry, uv); + t += carry; + MULADD128(a[6], b[6], uv, carry, uv); + t += carry; + MULADD128(a[5], b[7], uv, carry, uv); + t += carry; + MULADD128(a[4], b[8], uv, carry, uv); + t += carry; + MULADD128(a[3], b[9], uv, carry, uv); + t += carry; + MULADD128(a[2], b[10], uv, carry, uv); + t += carry; + MULADD128(a[11], b[1], uv, carry, uv); + t += carry; + c[12] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[2], uv, carry, uv); + t += carry; + MULADD128(a[10], b[3], uv, carry, uv); + t += carry; + MULADD128(a[9], b[4], uv, carry, uv); + t += carry; + MULADD128(a[8], b[5], uv, carry, uv); + t += carry; + MULADD128(a[7], b[6], uv, carry, uv); + t += carry; + MULADD128(a[6], b[7], uv, carry, uv); + t += carry; + MULADD128(a[5], b[8], uv, carry, uv); + t += carry; + MULADD128(a[4], b[9], uv, carry, uv); + t += carry; + MULADD128(a[3], b[10], uv, carry, uv); + t += carry; + MULADD128(a[2], b[11], uv, carry, uv); + t += carry; + c[13] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[3], uv, carry, uv); + t += carry; + MULADD128(a[10], b[4], uv, carry, uv); + t += carry; + MULADD128(a[9], b[5], uv, carry, uv); + t += carry; + MULADD128(a[8], b[6], uv, carry, uv); + t += carry; + MULADD128(a[7], b[7], uv, carry, uv); + t += carry; + MULADD128(a[6], b[8], uv, carry, uv); + t += carry; + MULADD128(a[5], b[9], uv, carry, uv); + t += carry; + MULADD128(a[4], b[10], uv, carry, uv); + t += carry; + MULADD128(a[3], b[11], uv, carry, uv); + t += carry; + c[14] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[4], uv, carry, uv); + t += carry; + MULADD128(a[10], b[5], uv, carry, uv); + t += carry; + MULADD128(a[9], b[6], uv, carry, uv); + t += carry; + MULADD128(a[8], b[7], uv, carry, uv); + t += carry; + MULADD128(a[7], b[8], uv, carry, uv); + t += carry; + MULADD128(a[6], b[9], uv, carry, uv); + t += carry; + MULADD128(a[5], b[10], uv, carry, uv); + t += carry; + MULADD128(a[4], b[11], uv, carry, uv); + t += carry; + c[15] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[5], uv, carry, uv); + t += carry; + MULADD128(a[10], b[6], uv, carry, uv); + t += carry; + MULADD128(a[9], b[7], uv, carry, uv); + t += carry; + MULADD128(a[8], b[8], uv, carry, uv); + t += carry; + MULADD128(a[7], b[9], uv, carry, uv); + t += carry; + MULADD128(a[6], b[10], uv, carry, uv); + t += carry; + MULADD128(a[5], b[11], uv, carry, uv); + t += carry; + c[16] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[6], uv, carry, uv); + t += carry; + MULADD128(a[10], b[7], uv, carry, uv); + t += carry; + MULADD128(a[9], b[8], uv, carry, uv); + t += carry; + MULADD128(a[8], b[9], uv, carry, uv); + t += carry; + MULADD128(a[7], b[10], uv, carry, uv); + t += carry; + MULADD128(a[6], b[11], uv, carry, uv); + t += carry; + c[17] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[7], uv, carry, uv); + t += carry; + MULADD128(a[10], b[8], uv, carry, uv); + t += carry; + MULADD128(a[9], b[9], uv, carry, uv); + t += carry; + MULADD128(a[8], b[10], uv, carry, uv); + t += carry; + MULADD128(a[7], b[11], uv, carry, uv); + t += carry; + c[18] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[8], uv, carry, uv); + t += carry; + MULADD128(a[10], b[9], uv, carry, uv); + t += carry; + MULADD128(a[9], b[10], uv, carry, uv); + t += carry; + MULADD128(a[8], b[11], uv, carry, uv); + t += carry; + c[19] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[9], uv, carry, uv); + t += carry; + MULADD128(a[10], b[10], uv, carry, uv); + t += carry; + MULADD128(a[9], b[11], uv, carry, uv); + t += carry; + c[20] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[10], uv, carry, uv); + t += carry; + MULADD128(a[10], b[11], uv, carry, uv); + t += carry; + c[21] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + + MULADD128(a[11], b[11], uv, carry, uv); + c[22] = uv[0]; + c[23] = uv[1]; + +#elif (OS_TARGET == OS_LINUX) + + oqs_sidh_cln16_mul751_asm(a, b, c); + +#endif +} + +// Efficient Montgomery reduction using comba and exploiting the special form of the prime p751. +// mc = ma*R^-1 mod p751x2, where R = 2^768. +// If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1]. +// ma is assumed to be in Montgomery representation. +void oqs_sidh_cln16_rdc_mont(const oqs_sidh_cln16_dfelm_t ma, oqs_sidh_cln16_felm_t mc) { +#if (OS_TARGET == OS_WIN) + unsigned int carry; + digit_t t = 0; + uint128_t uv = {0}; + + mc[0] = ma[0]; + mc[1] = ma[1]; + mc[2] = ma[2]; + mc[3] = ma[3]; + mc[4] = ma[4]; + MUL128(mc[0], ((digit_t *) p751p1)[5], uv); + ADDC(0, uv[0], ma[5], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + mc[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = 0; + + MULADD128(mc[0], ((digit_t *) p751p1)[6], uv, carry, uv); + MULADD128(mc[1], ((digit_t *) p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[6], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t *) p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t *) p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t *) p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[7], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t *) p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t *) p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t *) p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t *) p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[8], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t *) p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t *) p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t *) p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t *) p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t *) p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[9], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t *) p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t *) p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t *) p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t *) p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t *) p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t *) p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[10], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[10] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t *) p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t *) p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t *) p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t *) p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t *) p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t *) p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t *) p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[11], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[11] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[1], ((digit_t *) p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t *) p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t *) p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t *) p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t *) p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t *) p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t *) p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[12], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[2], ((digit_t *) p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t *) p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t *) p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t *) p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t *) p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t *) p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t *) p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[13], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[3], ((digit_t *) p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t *) p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t *) p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t *) p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t *) p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t *) p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t *) p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[14], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[4], ((digit_t *) p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t *) p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t *) p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t *) p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t *) p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t *) p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t *) p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[15], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[5], ((digit_t *) p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t *) p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t *) p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t *) p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t *) p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t *) p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t *) p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[16], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[6], ((digit_t *) p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t *) p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t *) p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t *) p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t *) p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t *) p751p1)[6], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[17], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[7], ((digit_t *) p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t *) p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t *) p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t *) p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t *) p751p1)[7], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[18], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[8], ((digit_t *) p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t *) p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t *) p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t *) p751p1)[8], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[19], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[9], ((digit_t *) p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t *) p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t *) p751p1)[9], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[20], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[10], ((digit_t *) p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t *) p751p1)[10], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[21], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[11], ((digit_t *) p751p1)[11], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[22], carry, mc[10]); + ADDC(carry, uv[1], 0, carry, uv[1]); + ADDC(0, uv[1], ma[23], carry, mc[11]); + +#elif (OS_TARGET == OS_LINUX) + + oqs_sidh_cln16_rdc751_asm(ma, mc); + +#endif +} diff --git a/crypt/liboqs/kex_sidh_cln16/AMD64/fp_x64_asm.S b/crypt/liboqs/kex_sidh_cln16/AMD64/fp_x64_asm.S new file mode 100644 index 0000000000000000000000000000000000000000..8f2cb09cec54294595e53474a9d53f92abce9527 --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/AMD64/fp_x64_asm.S @@ -0,0 +1,2021 @@ +//******************************************************************************************* +// SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral +// Diffie-Hellman key exchange. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// +// Abstract: field arithmetic in x64 assembly for Linux +// +//******************************************************************************************* + +.intel_syntax noprefix + +// Registers that are used for parameter passing: +#define reg_p1 rdi +#define reg_p2 rsi +#define reg_p3 rdx + +// p751 + 1 +#define p751p1_5 0xEEB0000000000000 +#define p751p1_6 0xE3EC968549F878A8 +#define p751p1_7 0xDA959B1A13F7CC76 +#define p751p1_8 0x084E9867D6EBE876 +#define p751p1_9 0x8562B5045CB25748 +#define p751p1_10 0x0E12909F97BADC66 +#define p751p1_11 0x00006FE5D541F71C + +#define p751_0 0xFFFFFFFFFFFFFFFF +#define p751_5 0xEEAFFFFFFFFFFFFF +#define p751_6 0xE3EC968549F878A8 +#define p751_7 0xDA959B1A13F7CC76 +#define p751_8 0x084E9867D6EBE876 +#define p751_9 0x8562B5045CB25748 +#define p751_10 0x0E12909F97BADC66 +#define p751_11 0x00006FE5D541F71C + +#define p751x2_0 0xFFFFFFFFFFFFFFFE +#define p751x2_1 0xFFFFFFFFFFFFFFFF +#define p751x2_5 0xDD5FFFFFFFFFFFFF +#define p751x2_6 0xC7D92D0A93F0F151 +#define p751x2_7 0xB52B363427EF98ED +#define p751x2_8 0x109D30CFADD7D0ED +#define p751x2_9 0x0AC56A08B964AE90 +#define p751x2_10 0x1C25213F2F75B8CD +#define p751x2_11 0x0000DFCBAA83EE38 + + +.text +//*********************************************************************** +// Field addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.globl oqs_sidh_cln16_fpadd751_asm +oqs_sidh_cln16_fpadd751_asm: + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rcx, [reg_p1+64] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc r12, [reg_p2+32] + adc r13, [reg_p2+40] + adc r14, [reg_p2+48] + adc r15, [reg_p2+56] + adc rcx, [reg_p2+64] + mov rax, [reg_p1+72] + adc rax, [reg_p2+72] + mov [reg_p3+72], rax + mov rax, [reg_p1+80] + adc rax, [reg_p2+80] + mov [reg_p3+80], rax + mov rax, [reg_p1+88] + adc rax, [reg_p2+88] + mov [reg_p3+88], rax + + movq rax, p751x2_0 + sub r8, rax + movq rax, p751x2_1 + sbb r9, rax + sbb r10, rax + sbb r11, rax + sbb r12, rax + movq rax, p751x2_5 + sbb r13, rax + movq rax, p751x2_6 + sbb r14, rax + movq rax, p751x2_7 + sbb r15, rax + movq rax, p751x2_8 + sbb rcx, rax + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + mov [reg_p3+64], rcx + mov r8, [reg_p3+72] + mov r9, [reg_p3+80] + mov r10, [reg_p3+88] + movq rax, p751x2_9 + sbb r8, rax + movq rax, p751x2_10 + sbb r9, rax + movq rax, p751x2_11 + sbb r10, rax + mov [reg_p3+72], r8 + mov [reg_p3+80], r9 + mov [reg_p3+88], r10 + movq rax, 0 + sbb rax, 0 + + mov rsi, p751x2_0 + and rsi, rax + mov r8, p751x2_1 + and r8, rax + movq r9, p751x2_5 + and r9, rax + movq r10, p751x2_6 + and r10, rax + movq r11, p751x2_7 + and r11, rax + movq r12, p751x2_8 + and r12, rax + movq r13, p751x2_9 + and r13, rax + movq r14, p751x2_10 + and r14, rax + movq r15, p751x2_11 + and r15, rax + + mov rax, [reg_p3] + add rax, rsi + mov [reg_p3], rax + mov rax, [reg_p3+8] + adc rax, r8 + mov [reg_p3+8], rax + mov rax, [reg_p3+16] + adc rax, r8 + mov [reg_p3+16], rax + mov rax, [reg_p3+24] + adc rax, r8 + mov [reg_p3+24], rax + mov rax, [reg_p3+32] + adc rax, r8 + mov [reg_p3+32], rax + mov rax, [reg_p3+40] + adc rax, r9 + mov [reg_p3+40], rax + mov rax, [reg_p3+48] + adc rax, r10 + mov [reg_p3+48], rax + mov rax, [reg_p3+56] + adc rax, r11 + mov [reg_p3+56], rax + mov rax, [reg_p3+64] + adc rax, r12 + mov [reg_p3+64], rax + mov rax, [reg_p3+72] + adc rax, r13 + mov [reg_p3+72], rax + mov rax, [reg_p3+80] + adc rax, r14 + mov [reg_p3+80], rax + mov rax, [reg_p3+88] + adc rax, r15 + mov [reg_p3+88], rax + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Field subtraction +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] +//*********************************************************************** +.globl oqs_sidh_cln16_fpsub751_asm +oqs_sidh_cln16_fpsub751_asm: + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rcx, [reg_p1+64] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + sbb r15, [reg_p2+56] + sbb rcx, [reg_p2+64] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + mov [reg_p3+64], rcx + mov rax, [reg_p1+72] + sbb rax, [reg_p2+72] + mov [reg_p3+72], rax + mov rax, [reg_p1+80] + sbb rax, [reg_p2+80] + mov [reg_p3+80], rax + mov rax, [reg_p1+88] + sbb rax, [reg_p2+88] + mov [reg_p3+88], rax + movq rax, 0 + sbb rax, 0 + + mov rsi, p751x2_0 + and rsi, rax + mov r8, p751x2_1 + and r8, rax + movq r9, p751x2_5 + and r9, rax + movq r10, p751x2_6 + and r10, rax + movq r11, p751x2_7 + and r11, rax + movq r12, p751x2_8 + and r12, rax + movq r13, p751x2_9 + and r13, rax + movq r14, p751x2_10 + and r14, rax + movq r15, p751x2_11 + and r15, rax + + mov rax, [reg_p3] + add rax, rsi + mov [reg_p3], rax + mov rax, [reg_p3+8] + adc rax, r8 + mov [reg_p3+8], rax + mov rax, [reg_p3+16] + adc rax, r8 + mov [reg_p3+16], rax + mov rax, [reg_p3+24] + adc rax, r8 + mov [reg_p3+24], rax + mov rax, [reg_p3+32] + adc rax, r8 + mov [reg_p3+32], rax + mov rax, [reg_p3+40] + adc rax, r9 + mov [reg_p3+40], rax + mov rax, [reg_p3+48] + adc rax, r10 + mov [reg_p3+48], rax + mov rax, [reg_p3+56] + adc rax, r11 + mov [reg_p3+56], rax + mov rax, [reg_p3+64] + adc rax, r12 + mov [reg_p3+64], rax + mov rax, [reg_p3+72] + adc rax, r13 + mov [reg_p3+72], rax + mov rax, [reg_p3+80] + adc rax, r14 + mov [reg_p3+80], rax + mov rax, [reg_p3+88] + adc rax, r15 + mov [reg_p3+88], rax + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Integer multiplication +// Based on Karatsuba method +// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2] +// NOTE: a=c or b=c are not allowed +//*********************************************************************** +.globl oqs_sidh_cln16_mul751_asm +oqs_sidh_cln16_mul751_asm: + push r12 + push r13 + push r14 + mov rcx, reg_p3 + + // rcx[0-5] <- AH+AL + xor rax, rax + mov r8, [reg_p1+48] + mov r9, [reg_p1+56] + mov r10, [reg_p1+64] + mov r11, [reg_p1+72] + mov r12, [reg_p1+80] + mov r13, [reg_p1+88] + add r8, [reg_p1] + adc r9, [reg_p1+8] + adc r10, [reg_p1+16] + adc r11, [reg_p1+24] + adc r12, [reg_p1+32] + adc r13, [reg_p1+40] + push r15 + mov [rcx], r8 + mov [rcx+8], r9 + mov [rcx+16], r10 + mov [rcx+24], r11 + mov [rcx+32], r12 + mov [rcx+40], r13 + sbb rax, 0 + sub rsp, 96 // Allocating space in stack + + // rcx[6-11] <- BH+BL + xor rdx, rdx + mov r8, [reg_p2+48] + mov r9, [reg_p2+56] + mov r10, [reg_p2+64] + mov r11, [reg_p2+72] + mov r12, [reg_p2+80] + mov r13, [reg_p2+88] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc r12, [reg_p2+32] + adc r13, [reg_p2+40] + mov [rcx+48], r8 + mov [rcx+56], r9 + mov [rcx+64], r10 + mov [rcx+72], r11 + mov [rcx+80], r12 + mov [rcx+88], r13 + sbb rdx, 0 + mov [rsp+80], rax + mov [rsp+88], rdx + + // (rsp[0-8],r10,r8,r9) <- (AH+AL)*(BH+BL) + mov r11, [rcx] + mov rax, r8 + mul r11 + mov [rsp], rax // c0 + mov r14, rdx + + xor r15, r15 + mov rax, r9 + mul r11 + xor r9, r9 + add r14, rax + adc r9, rdx + + mov r12, [rcx+8] + mov rax, r8 + mul r12 + add r14, rax + mov [rsp+8], r14 // c1 + adc r9, rdx + adc r15, 0 + + xor r8, r8 + mov rax, r10 + mul r11 + add r9, rax + mov r13, [rcx+48] + adc r15, rdx + adc r8, 0 + + mov rax, [rcx+16] + mul r13 + add r9, rax + adc r15, rdx + mov rax, [rcx+56] + adc r8, 0 + + mul r12 + add r9, rax + mov [rsp+16], r9 // c2 + adc r15, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [rcx+72] + mul r11 + add r15, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+24] + mul r13 + add r15, rax + adc r8, rdx + adc r9, 0 + + mov rax, r10 + mul r12 + add r15, rax + adc r8, rdx + adc r9, 0 + + mov r14, [rcx+16] + mov rax, [rcx+56] + mul r14 + add r15, rax + mov [rsp+24], r15 // c3 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [rcx+80] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+64] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r15, [rcx+48] + mov rax, [rcx+32] + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+72] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r13, [rcx+24] + mov rax, [rcx+56] + mul r13 + add r8, rax + mov [rsp+32], r8 // c4 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [rcx+88] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+64] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+72] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+40] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+80] + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r15, [rcx+32] + mov rax, [rcx+56] + mul r15 + add r9, rax + mov [rsp+40], r9 // c5 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [rcx+64] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+88] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+80] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r11, [rcx+40] + mov rax, [rcx+56] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+72] + mul r13 + add r10, rax + mov [rsp+48], r10 // c6 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [rcx+88] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+64] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+80] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+72] + mul r15 + add r8, rax + mov [rsp+56], r8 // c7 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [rcx+72] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+80] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+88] + mul r13 + add r9, rax + mov [rsp+64], r9 // c8 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [rcx+88] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+80] + mul r11 + add r10, rax // c9 + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+88] + mul r11 + add r8, rax // c10 + adc r9, rdx // c11 + + mov rax, [rsp+88] + mov rdx, [rcx] + and r12, rax + and r14, rax + and rdx, rax + and r13, rax + and r15, rax + and r11, rax + mov rax, [rsp+48] + add rdx, rax + mov rax, [rsp+56] + adc r12, rax + mov rax, [rsp+64] + adc r14, rax + adc r13, r10 + adc r15, r8 + adc r11, r9 + mov rax, [rsp+80] + mov [rsp+48], rdx + mov [rsp+56], r12 + mov [rsp+64], r14 + mov [rsp+72], r13 + mov [rsp+80], r15 + mov [rsp+88], r11 + + mov r8, [rcx+48] + mov r9, [rcx+56] + mov r10, [rcx+64] + mov r11, [rcx+72] + mov r12, [rcx+80] + mov r13, [rcx+88] + and r8, rax + and r9, rax + and r10, rax + and r11, rax + and r12, rax + and r13, rax + mov rax, [rsp+48] + add r8, rax + mov rax, [rsp+56] + adc r9, rax + mov rax, [rsp+64] + adc r10, rax + mov rax, [rsp+72] + adc r11, rax + mov rax, [rsp+80] + adc r12, rax + mov rax, [rsp+88] + adc r13, rax + mov [rsp+48], r8 + mov [rsp+56], r9 + mov [rsp+72], r11 + + // rcx[0-11] <- AL*BL + mov r11, [reg_p1] + mov rax, [reg_p2] + mul r11 + xor r9, r9 + mov [rcx], rax // c0 + mov [rsp+64], r10 + mov r8, rdx + + mov rax, [reg_p2+8] + mul r11 + xor r10, r10 + add r8, rax + mov [rsp+80], r12 + adc r9, rdx + + mov r12, [reg_p1+8] + mov rax, [reg_p2] + mul r12 + add r8, rax + mov [rcx+8], r8 // c1 + adc r9, rdx + mov [rsp+88], r13 + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+16] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r13, [reg_p2] + mov rax, [reg_p1+16] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+8] + mul r12 + add r9, rax + mov [rcx+16], r9 // c2 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+24] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p1+24] + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+16] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r14, [reg_p1+16] + mov rax, [reg_p2+8] + mul r14 + add r10, rax + mov [rcx+24], r10 // c3 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [reg_p2+32] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+16] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p1+32] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+24] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r13, [reg_p1+24] + mov rax, [reg_p2+8] + mul r13 + add r8, rax + mov [rcx+32], r8 // c4 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+40] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+16] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+24] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r11, [reg_p1+40] + mov rax, [reg_p2] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+32] + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r15, [reg_p1+32] + mov rax, [reg_p2+8] + mul r15 + add r9, rax + mov [rcx+40], r9 // c5 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+16] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+40] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+32] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+8] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+24] + mul r13 + add r10, rax + mov [rcx+48], r10 // c6 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [reg_p2+40] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+16] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+32] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+24] + mul r15 + add r8, rax + mov [rcx+56], r8 // c7 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+24] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+32] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+40] + mul r13 + add r9, rax + mov [rcx+64], r9 // c8 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+40] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+32] + mul r11 + add r10, rax + mov [rcx+72], r10 // c9 + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+40] + mul r11 + add r8, rax + mov [rcx+80], r8 // c10 + adc r9, rdx + mov [rcx+88], r9 // c11 + + // rcx[12-23] <- AH*BH + mov r11, [reg_p1+48] + mov rax, [reg_p2+48] + mul r11 + xor r9, r9 + mov [rcx+96], rax // c0 + mov r8, rdx + + mov rax, [reg_p2+56] + mul r11 + xor r10, r10 + add r8, rax + adc r9, rdx + + mov r12, [reg_p1+56] + mov rax, [reg_p2+48] + mul r12 + add r8, rax + mov [rcx+104], r8 // c1 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+64] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r13, [reg_p2+48] + mov rax, [reg_p1+64] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+56] + mul r12 + add r9, rax + mov [rcx+112], r9 // c2 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+72] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p1+72] + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+64] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r14, [reg_p1+64] + mov rax, [reg_p2+56] + mul r14 + add r10, rax + mov [rcx+120], r10 // c3 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [reg_p2+80] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+64] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r15, [reg_p1+80] + mov rax, r13 + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+72] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r13, [reg_p1+72] + mov rax, [reg_p2+56] + mul r13 + add r8, rax + mov [rcx+128], r8 // c4 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+88] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+64] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+72] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r11, [reg_p1+88] + mov rax, [reg_p2+48] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+80] + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+56] + mul r15 + add r9, rax + mov [rcx+136], r9 // c5 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+64] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+88] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+80] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+56] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+72] + mul r13 + add r10, rax + mov [rcx+144], r10 // c6 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [reg_p2+88] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+64] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+80] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+72] + mul r15 + add r8, rax + mov [rcx+152], r8 // c7 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+72] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+80] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+88] + mul r13 + add r9, rax + mov [rcx+160], r9 // c8 + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+88] + mul r15 + add r10, rax + adc r8, rdx + + mov rax, [reg_p2+80] + mul r11 + add r10, rax + mov [rcx+168], r10 // c9 + adc r8, rdx + + mov rax, [reg_p2+88] + mul r11 + add r8, rax + mov [rcx+176], r8 // c10 + adc rdx, 0 + mov [rcx+184], rdx // c11 + + // [r8-r15,rax,rdx,rdi,[rsp]] <- (AH+AL)*(BH+BL) - AL*BL + mov r8, [rsp] + sub r8, [rcx] + mov r9, [rsp+8] + sbb r9, [rcx+8] + mov r10, [rsp+16] + sbb r10, [rcx+16] + mov r11, [rsp+24] + sbb r11, [rcx+24] + mov r12, [rsp+32] + sbb r12, [rcx+32] + mov r13, [rsp+40] + sbb r13, [rcx+40] + mov r14, [rsp+48] + sbb r14, [rcx+48] + mov r15, [rsp+56] + sbb r15, [rcx+56] + mov rax, [rsp+64] + sbb rax, [rcx+64] + mov rdx, [rsp+72] + sbb rdx, [rcx+72] + mov rdi, [rsp+80] + sbb rdi, [rcx+80] + mov rsi, [rsp+88] + sbb rsi, [rcx+88] + mov [rsp], rsi + + // [r8-r15,rax,rdx,rdi,[rsp]] <- (AH+AL)*(BH+BL) - AL*BL - AH*BH + mov rsi, [rcx+96] + sub r8, rsi + mov rsi, [rcx+104] + sbb r9, rsi + mov rsi, [rcx+112] + sbb r10, rsi + mov rsi, [rcx+120] + sbb r11, rsi + mov rsi, [rcx+128] + sbb r12, rsi + mov rsi, [rcx+136] + sbb r13, rsi + mov rsi, [rcx+144] + sbb r14, rsi + mov rsi, [rcx+152] + sbb r15, rsi + mov rsi, [rcx+160] + sbb rax, rsi + mov rsi, [rcx+168] + sbb rdx, rsi + mov rsi, [rcx+176] + sbb rdi, rsi + mov rsi, [rsp] + sbb rsi, [rcx+184] + + // Final result + add r8, [rcx+48] + mov [rcx+48], r8 + adc r9, [rcx+56] + mov [rcx+56], r9 + adc r10, [rcx+64] + mov [rcx+64], r10 + adc r11, [rcx+72] + mov [rcx+72], r11 + adc r12, [rcx+80] + mov [rcx+80], r12 + adc r13, [rcx+88] + mov [rcx+88], r13 + adc r14, [rcx+96] + mov [rcx+96], r14 + adc r15, [rcx+104] + mov [rcx+104], r15 + adc rax, [rcx+112] + mov [rcx+112], rax + adc rdx, [rcx+120] + mov [rcx+120], rdx + adc rdi, [rcx+128] + mov [rcx+128], rdi + adc rsi, [rcx+136] + mov [rcx+136], rsi + mov rax, [rcx+144] + adc rax, 0 + mov [rcx+144], rax + mov rax, [rcx+152] + adc rax, 0 + mov [rcx+152], rax + mov rax, [rcx+160] + adc rax, 0 + mov [rcx+160], rax + mov rax, [rcx+168] + adc rax, 0 + mov [rcx+168], rax + mov rax, [rcx+176] + adc rax, 0 + mov [rcx+176], rax + mov rax, [rcx+184] + adc rax, 0 + mov [rcx+184], rax + + add rsp, 96 // Restoring space in stack + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Montgomery reduction +// Based on comba method +// Operation: c [reg_p2] = a [reg_p1] +// NOTE: a=c is not allowed +//*********************************************************************** +.globl oqs_sidh_cln16_rdc751_asm +oqs_sidh_cln16_rdc751_asm: + push r12 + push r13 + push r14 + push r15 + + mov r11, [reg_p1] + movq rax, p751p1_5 + mul r11 + xor r8, r8 + add rax, [reg_p1+40] + mov [reg_p2+40], rax // z5 + adc r8, rdx + + xor r9, r9 + movq rax, p751p1_6 + mul r11 + xor r10, r10 + add r8, rax + adc r9, rdx + + mov r12, [reg_p1+8] + movq rax, p751p1_5 + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+48] + mov [reg_p2+48], r8 // z6 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + movq rax, p751p1_7 + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_6 + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r13, [reg_p1+16] + movq rax, p751p1_5 + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+56] + mov [reg_p2+56], r9 // z7 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + movq rax, p751p1_8 + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_7 + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_6 + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r14, [reg_p1+24] + movq rax, p751p1_5 + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+64] + mov [reg_p2+64], r10 // z8 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + movq rax, p751p1_9 + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_8 + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_7 + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_6 + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r15, [reg_p1+32] + movq rax, p751p1_5 + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+72] + mov [reg_p2+72], r8 // z9 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + movq rax, p751p1_10 + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_9 + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_8 + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_7 + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_6 + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rcx, [reg_p2+40] + movq rax, p751p1_5 + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+80] + mov [reg_p2+80], r9 // z10 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + movq rax, p751p1_11 + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_10 + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_9 + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_8 + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_7 + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_6 + mul rcx + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r11, [reg_p2+48] + movq rax, p751p1_5 + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+88] + mov [reg_p2+88], r10 // z11 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + movq rax, p751p1_11 + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_10 + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_9 + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_8 + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_7 + mul rcx + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_6 + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r12, [reg_p2+56] + movq rax, p751p1_5 + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+96] + mov [reg_p2], r8 // z0 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + movq rax, p751p1_11 + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_10 + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_9 + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_8 + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_7 + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_6 + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r13, [reg_p2+64] + movq rax, p751p1_5 + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+104] + mov [reg_p2+8], r9 // z1 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + movq rax, p751p1_11 + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_10 + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_9 + mul rcx + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_8 + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_7 + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_6 + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r14, [reg_p2+72] + movq rax, p751p1_5 + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+112] + mov [reg_p2+16], r10 // z2 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + movq rax, p751p1_11 + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_10 + mul rcx + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_9 + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_8 + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_7 + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_6 + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r15, [reg_p2+80] + movq rax, p751p1_5 + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+120] + mov [reg_p2+24], r8 // z3 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + movq rax, p751p1_11 + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_10 + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_9 + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_8 + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_7 + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_6 + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rcx, [reg_p2+88] + movq rax, p751p1_5 + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+128] + mov [reg_p2+32], r9 // z4 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + movq rax, p751p1_11 + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_10 + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_9 + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_8 + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_7 + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_6 + mul rcx + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+136] + mov [reg_p2+40], r10 // z5 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + movq rax, p751p1_11 + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_10 + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_9 + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_8 + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_7 + mul rcx + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+144] + mov [reg_p2+48], r8 // z6 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + movq rax, p751p1_11 + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_10 + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_9 + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_8 + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+152] + mov [reg_p2+56], r9 // z7 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + movq rax, p751p1_11 + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_10 + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_9 + mul rcx + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+160] + mov [reg_p2+64], r10 // z8 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + movq rax, p751p1_11 + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_10 + mul rcx + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+168] // z9 + mov [reg_p2+72], r8 // z9 + adc r9, 0 + adc r10, 0 + + movq rax, p751p1_11 + mul rcx + add r9, rax + adc r10, rdx + add r9, [reg_p1+176] // z10 + mov [reg_p2+80], r9 // z10 + adc r10, 0 + add r10, [reg_p1+184] // z11 + mov [reg_p2+88], r10 // z11 + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// 751-bit multiprecision addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.globl oqs_sidh_cln16_mp_add751_asm +oqs_sidh_cln16_mp_add751_asm: + push r12 + push r13 + push r14 + push r15 + push rbx + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rax, [reg_p1+64] + mov rbx, [reg_p1+72] + mov rcx, [reg_p1+80] + mov rdi, [reg_p1+88] + + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc r12, [reg_p2+32] + adc r13, [reg_p2+40] + adc r14, [reg_p2+48] + adc r15, [reg_p2+56] + adc rax, [reg_p2+64] + adc rbx, [reg_p2+72] + adc rcx, [reg_p2+80] + adc rdi, [reg_p2+88] + + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + mov [reg_p3+64], rax + mov [reg_p3+72], rbx + mov [reg_p3+80], rcx + mov [reg_p3+88], rdi + + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// 2x751-bit multiprecision addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.globl oqs_sidh_cln16_mp_add751x2_asm +oqs_sidh_cln16_mp_add751x2_asm: + push r12 + push r13 + push r14 + push r15 + push rbx + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rax, [reg_p1+64] + mov rbx, [reg_p1+72] + mov rcx, [reg_p1+80] + + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc r12, [reg_p2+32] + adc r13, [reg_p2+40] + adc r14, [reg_p2+48] + adc r15, [reg_p2+56] + adc rax, [reg_p2+64] + adc rbx, [reg_p2+72] + adc rcx, [reg_p2+80] + + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + mov [reg_p3+64], rax + mov [reg_p3+72], rbx + mov [reg_p3+80], rcx + mov rax, [reg_p1+88] + adc rax, [reg_p2+88] + mov [reg_p3+88], rax + + mov r8, [reg_p1+96] + mov r9, [reg_p1+104] + mov r10, [reg_p1+112] + mov r11, [reg_p1+120] + mov r12, [reg_p1+128] + mov r13, [reg_p1+136] + mov r14, [reg_p1+144] + mov r15, [reg_p1+152] + mov rax, [reg_p1+160] + mov rbx, [reg_p1+168] + mov rcx, [reg_p1+176] + mov rdi, [reg_p1+184] + + adc r8, [reg_p2+96] + adc r9, [reg_p2+104] + adc r10, [reg_p2+112] + adc r11, [reg_p2+120] + adc r12, [reg_p2+128] + adc r13, [reg_p2+136] + adc r14, [reg_p2+144] + adc r15, [reg_p2+152] + adc rax, [reg_p2+160] + adc rbx, [reg_p2+168] + adc rcx, [reg_p2+176] + adc rdi, [reg_p2+184] + + mov [reg_p3+96], r8 + mov [reg_p3+104], r9 + mov [reg_p3+112], r10 + mov [reg_p3+120], r11 + mov [reg_p3+128], r12 + mov [reg_p3+136], r13 + mov [reg_p3+144], r14 + mov [reg_p3+152], r15 + mov [reg_p3+160], rax + mov [reg_p3+168], rbx + mov [reg_p3+176], rcx + mov [reg_p3+184], rdi + + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret diff --git a/crypt/liboqs/kex_sidh_cln16/ARM64/fp_arm64.c b/crypt/liboqs/kex_sidh_cln16/ARM64/fp_arm64.c new file mode 100644 index 0000000000000000000000000000000000000000..b0df611d5f614c102a74d3ffc07263e1a50616be --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/ARM64/fp_arm64.c @@ -0,0 +1,88 @@ +/******************************************************************************************** + * SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral + * Diffie-Hellman key exchange. + * + * Author: David Urbanik; dburbani@uwaterloo.ca + * + * Abstract: Finite field arithmetic for ARM64 using code modified from the original x86_64 + * and generic implementations by Microsoft. + * + * Most of this file is just a wrapper for the asm file. The other routines are + * direct copies of their counterparts on the AMD64 side. + * + * Modified to allow inputs in [0, 2*p751-1]. + * + *********************************************************************************************/ + +#include "../SIDH_internal.h" + +// Global constants +extern const uint64_t p751[NWORDS_FIELD]; +extern const uint64_t p751x2[NWORDS_FIELD]; + +__inline void oqs_sidh_cln16_fpadd751(const digit_t *a, const digit_t *b, digit_t *c) { // Modular addition, c = a+b mod p751. + // Inputs: a, b in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + + oqs_sidh_cln16_fpadd751_asm(a, b, c); +} + +__inline void oqs_sidh_cln16_fpsub751(const digit_t *a, const digit_t *b, digit_t *c) { // Modular subtraction, c = a-b mod p751. + // Inputs: a, b in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + + oqs_sidh_cln16_fpsub751_asm(a, b, c); +} + +__inline void oqs_sidh_cln16_fpneg751(digit_t *a) { // Modular negation, a = -a mod p751. + // Input/output: a in [0, 2*p751-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t *) p751x2)[i], a[i], borrow, a[i]); + } +} + +void oqs_sidh_cln16_fpdiv2_751(const digit_t *a, digit_t *c) { // Modular division by two, c = a/2 mod p751. + // Input : a in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p521 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t *) p751)[i] & mask, carry, c[i]); + } + + oqs_sidh_cln16_mp_shiftr1(c, NWORDS_FIELD); +} + +void oqs_sidh_cln16_fpcorrection751(digit_t *a) { // Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t *) p751)[i], borrow, a[i]); + } + mask = 0 - (digit_t) borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t *) p751)[i] & mask, borrow, a[i]); + } +} + +void oqs_sidh_cln16_mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords. + + UNREFERENCED_PARAMETER(nwords); + + oqs_sidh_cln16_mul751_asm(a, b, c); +} + +void oqs_sidh_cln16_rdc_mont(const digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p751. + // mc = ma*R^-1 mod p751x2, where R = 2^768. + // If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1]. + // ma is assumed to be in Montgomery representation. + + oqs_sidh_cln16_rdc751_asm(ma, mc); +} diff --git a/crypt/liboqs/kex_sidh_cln16/ARM64/fp_arm64_asm.S b/crypt/liboqs/kex_sidh_cln16/ARM64/fp_arm64_asm.S new file mode 100644 index 0000000000000000000000000000000000000000..b643d5e8f7a459acbe5cfa0f025ca4896bec4f9a --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/ARM64/fp_arm64_asm.S @@ -0,0 +1,2315 @@ +//******************************************************************************************* +// SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral +// Diffie-Hellman key exchange. +// +// Author: David Urbanik; dburbani@uwaterloo.ca +// +// Abstract: Assembly optimizations for finite field arithmetic on 64-bit ARM. +// +// Modified to allow inputs in [0, 2*p751-1]. +// +//******************************************************************************************* + +.data + +// p751 + 1 +p751p1: +.quad 0xEEB0000000000000 +.quad 0xE3EC968549F878A8 +.quad 0xDA959B1A13F7CC76 +.quad 0x084E9867D6EBE876 +.quad 0x8562B5045CB25748 +.quad 0x0E12909F97BADC66 +.quad 0x00006FE5D541F71C + +// p751 +p751: +.quad 0xFFFFFFFFFFFFFFFF +.quad 0xEEAFFFFFFFFFFFFF +.quad 0xE3EC968549F878A8 +.quad 0xDA959B1A13F7CC76 +.quad 0x084E9867D6EBE876 +.quad 0x8562B5045CB25748 +.quad 0x0E12909F97BADC66 +.quad 0x00006FE5D541F71C + +// 2 * p751 +p751x2: +.quad 0xFFFFFFFFFFFFFFFE +.quad 0xFFFFFFFFFFFFFFFF +.quad 0xDD5FFFFFFFFFFFFF +.quad 0xC7D92D0A93F0F151 +.quad 0xB52B363427EF98ED +.quad 0x109D30CFADD7D0ED +.quad 0x0AC56A08B964AE90 +.quad 0x1C25213F2F75B8CD +.quad 0x0000DFCBAA83EE38 + + +.text +//*********************************************************************** +// Field addition +// Operation: c [x2] = a [x0] + b [x1] +//*********************************************************************** +.global oqs_sidh_cln16_fpadd751_asm +oqs_sidh_cln16_fpadd751_asm: + // Arguments are 3 pointers of type digit_t*, where the first two arguments are summands and the third is the result register. + // These arguments are stored in x0, x1, and x2 respectively. + + // load first summand into x3 - x14 + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x7, x8, [x0,#32] + ldp x9, x10, [x0,#48] + ldp x11, x12, [x0,#64] + ldp x13, x14, [x0,#80] + + // add first summand and second summand and store result in x3 - x14 + ldp x15, x16, [x1,#0] + ldp x17, x18, [x1,#16] + adds x3, x3, x15 + adcs x4, x4, x16 + adcs x5, x5, x17 + adcs x6, x6, x18 + ldp x15, x16, [x1,#32] + ldp x17, x18, [x1,#48] + adcs x7, x7, x15 + adcs x8, x8, x16 + adcs x9, x9, x17 + adcs x10, x10, x18 + ldp x15, x16, [x1,#64] + ldp x17, x18, [x1,#80] + adcs x11, x11, x15 + adcs x12, x12, x16 + adcs x13, x13, x17 + adcs x14, x14, x18 + + // subtract 2xp751 to the resut in x3 - x14 + ldr x16, p751x2 + subs x3, x3, x16 + ldr x15, p751x2 + 8 + sbcs x4, x4, x15 + sbcs x5, x5, x15 + sbcs x6, x6, x15 + sbcs x7, x7, x15 + ldr x16, p751x2 + 16 + ldr x17, p751x2 + 24 + sbcs x8, x8, x16 + ldr x18, p751x2 + 32 + sbcs x9, x9, x17 + ldr x16, p751x2 + 40 + sbcs x10, x10, x18 + ldr x17, p751x2 + 48 + sbcs x11, x11, x16 + ldr x18, p751x2 + 56 + sbcs x12, x12, x17 + ldr x15, p751x2 + 64 + sbcs x13, x13, x18 + sbcs x14, x14, x15 + sbc x15, xzr, xzr + + // add 2xp751 back but anded with the mask in x15 + ldr x16, p751x2 + and x16, x16, x15 + ldr x17, p751x2 + 8 + and x17, x17, x15 + ldr x18, p751x2 + 16 + and x18, x18, x15 + + adds x3, x3, x16 + adcs x4, x4, x17 + adcs x5, x5, x17 + adcs x6, x6, x17 + adcs x7, x7, x17 + adcs x8, x8, x18 + + ldr x16, p751x2 + 24 + and x16, x16, x15 + adcs x9, x9, x16 + + ldr x16, p751x2 + 32 + and x16, x16, x15 + ldr x17, p751x2 + 40 + and x17, x17, x15 + ldr x18, p751x2 + 48 + and x18, x18, x15 + + adcs x10, x10, x16 + adcs x11, x11, x17 + adcs x12, x12, x18 + + ldr x16, p751x2 + 56 + and x16, x16, x15 + ldr x17, p751x2 + 64 + and x17, x17, x15 + + adcs x13, x13, x16 + adcs x14, x14, x17 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + stp x11, x12, [x2,#64] + stp x13, x14, [x2,#80] + ret + + +//*********************************************************************** +// Field subtraction +// Operation: c [x2] = a [x0] - b [x1] +//*********************************************************************** +.global oqs_sidh_cln16_fpsub751_asm +oqs_sidh_cln16_fpsub751_asm: + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x7, x8, [x0,#32] + ldp x9, x10, [x0,#48] + ldp x11, x12, [x0,#64] + ldp x13, x14, [x0,#80] + + ldp x15, x16, [x1, #0] + subs x3, x3, x15 + sbcs x4, x4, x16 + ldp x15, x16, [x1, #16] + sbcs x5, x5, x15 + sbcs x6, x6, x16 + ldp x15, x16, [x1, #32] + sbcs x7, x7, x15 + sbcs x8, x8, x16 + ldp x15, x16, [x1, #48] + sbcs x9, x9, x15 + sbcs x10, x10, x16 + ldp x15, x16, [x1, #64] + sbcs x11, x11, x15 + sbcs x12, x12, x16 + ldp x15, x16, [x1, #80] + sbcs x13, x13, x15 + sbcs x14, x14, x16 + sbc x17, xzr, xzr + + ldr x15, p751x2 + and x15, x15, x17 + ldr x16, p751x2 + 8 + and x16, x16, x17 + ldr x18, p751x2 + 16 + and x18, x18, x17 + + adds x3, x3, x15 + adcs x4, x4, x16 + adcs x5, x5, x16 + adcs x6, x6, x16 + adcs x7, x7, x16 + adcs x8, x8, x18 + + ldr x15, p751x2 + 24 + and x15, x15, x17 + ldr x16, p751x2 + 32 + and x16, x16, x17 + + adcs x9, x9, x15 + adcs x10, x10, x16 + + ldr x15, p751x2 + 40 + and x15, x15, x17 + ldr x16, p751x2 + 48 + and x16, x16, x17 + + adcs x11, x11, x15 + adcs x12, x12, x16 + + ldr x15, p751x2 + 56 + and x15, x15, x17 + ldr x16, p751x2 + 64 + and x16, x16, x17 + + adcs x13, x13, x15 + adcs x14, x14, x16 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + stp x11, x12, [x2,#64] + stp x13, x14, [x2,#80] + ret + + +//*********************************************************************** +// Integer multiplication using Comba method +// Operation: c [x2] = a [x0] * b [x1] +//*********************************************************************** +.global oqs_sidh_cln16_mul751_asm +oqs_sidh_cln16_mul751_asm: + sub sp, sp, #80 + stp x19, x20, [sp] + stp x21, x22, [sp, #16] + stp x23, x24, [sp, #32] + stp x25, x26, [sp, #48] + stp x27, x28, [sp, #64] + + ldp x3, x4, [x0, #0] + ldp x5, x6, [x1, #0] + mul x18, x3, x5 + umulh x17, x3, x5 + // c0 is now in x18 + + // a0 * b1 + mul x13, x3, x6 + umulh x14, x3, x6 + + adds x17, x17, x13 + adcs x16, x14, xzr + adcs x15, xzr, xzr + + // b0 * a1 + mul x13, x4, x5 + umulh x14, x4, x5 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // store c0 and c1 + stp x18, x17, [x2, #0] + + // load a2, a3, b2, b3 + ldp x7, x8, [x0, #16] + ldp x9, x10, [x1, #16] + + // a0 * b2 + mul x13, x3, x9 + umulh x14, x3, x9 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, xzr, xzr + + // a1 * b1 + mul x13, x4, x6 + umulh x14, x4, x6 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a2 * b0 + mul x13, x7, x5 + umulh x14, x7, x5 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // c2 is now in x16 + + // a0 * b3 + mul x13, x3, x10 + umulh x14, x3, x10 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, xzr, xzr + + // a1 * b2 + mul x13, x4, x9 + umulh x14, x4, x9 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a2 * b1 + mul x13, x7, x6 + umulh x14, x7, x6 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a3 * b0 + mul x13, x8, x5 + umulh x14, x8, x5 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // store c2 and c3 + stp x16, x15, [x2, #16] + + // a1 * b3 + mul x13, x4, x10 + umulh x14, x4, x10 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, xzr, xzr + + // a2 * b2 + mul x13, x7, x9 + umulh x14, x7, x9 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a3 * b1 + mul x13, x8, x6 + umulh x14, x8, x6 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // load a4, a5 + ldp x11, x12, [x0, #32] + + // a4 * b0 + mul x13, x11, x5 + umulh x14, x11, x5 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // load b4, b5 + ldp x19, x20, [x1, #32] + + // a0 * b4 + mul x13, x3, x19 + umulh x14, x3, x19 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // c4 is now in x18 + + // a0 * b5 + mul x13, x3, x20 + umulh x14, x3, x20 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, xzr, xzr + + // a1 * b4 + mul x13, x4, x19 + umulh x14, x4, x19 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a2 * b3 + mul x13, x7, x10 + umulh x14, x7, x10 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a3 * b2 + mul x13, x8, x9 + umulh x14, x8, x9 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a4 * b1 + mul x13, x11, x6 + umulh x14, x11, x6 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a5 * b0 + mul x13, x12, x5 + umulh x14, x12, x5 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // store c4 and c5 + stp x18, x17, [x2, #32] + + // load a6, a7 + ldp x21, x22, [x0, #48] + + // a6 * b0 + mul x13, x21, x5 + umulh x14, x21, x5 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, xzr, xzr + + // a5 * b1 + mul x13, x12, x6 + umulh x14, x12, x6 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a4 * b2 + mul x13, x11, x9 + umulh x14, x11, x9 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a3 * b3 + mul x13, x8, x10 + umulh x14, x8, x10 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a2 * b4 + mul x13, x7, x19 + umulh x14, x7, x19 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a1 * b5 + mul x13, x4, x20 + umulh x14, x4, x20 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // load b6, b7 + ldp x23, x24, [x1, #48] + + // a0 * b6 + mul x13, x3, x23 + umulh x14, x3, x23 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // c6 is now in x16 + + // a0 * b7 + mul x13, x3, x24 + umulh x14, x3, x24 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, xzr, xzr + + // a1 * b6 + mul x13, x4, x23 + umulh x14, x4, x23 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a2 * b5 + mul x13, x7, x20 + umulh x14, x7, x20 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a3 * b4 + mul x13, x8, x19 + umulh x14, x8, x19 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a4 * b3 + mul x13, x11, x10 + umulh x14, x11, x10 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a5 * b2 + mul x13, x12, x9 + umulh x14, x12, x9 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a6 * b1 + mul x13, x21, x6 + umulh x14, x21, x6 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a7 * b0 + mul x13, x22, x5 + umulh x14, x22, x5 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // store c6 and c7 + stp x16, x15, [x2, #48] + + // load a8, a9 + ldp x25, x26, [x0, #64] + + // a8 * b0 + mul x13, x25, x5 + umulh x14, x25, x5 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, xzr, xzr + + // a7 * b1 + mul x13, x22, x6 + umulh x14, x22, x6 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a6 * b2 + mul x13, x21, x9 + umulh x14, x21, x9 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a5 * b3 + mul x13, x12, x10 + umulh x14, x12, x10 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a4 * b4 + mul x13, x11, x19 + umulh x14, x11, x19 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a3 * b5 + mul x13, x8, x20 + umulh x14, x8, x20 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a2 * b6 + mul x13, x7, x23 + umulh x14, x7, x23 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a1 * b7 + mul x13, x4, x24 + umulh x14, x4, x24 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // load b8, b9 + ldp x27, x28, [x1, #64] + + // a0 * b8 + mul x13, x3, x27 + umulh x14, x3, x27 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // c8 is now in x18 + + // a0 * b9 + mul x13, x3, x28 + umulh x14, x3, x28 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, xzr, xzr + + // a1 * b8 + mul x13, x4, x27 + umulh x14, x4, x27 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a2 * b7 + mul x13, x7, x24 + umulh x14, x7, x24 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a3 * b6 + mul x13, x8, x23 + umulh x14, x8, x23 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a4 * b5 + mul x13, x11, x20 + umulh x14, x11, x20 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a5 * b4 + mul x13, x12, x19 + umulh x14, x12, x19 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a6 * b3 + mul x13, x21, x10 + umulh x14, x21, x10 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a7 * b2 + mul x13, x22, x9 + umulh x14, x22, x9 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a8 * b1 + mul x13, x25, x6 + umulh x14, x25, x6 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a9 * b0 + mul x13, x26, x5 + umulh x14, x26, x5 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // store c8 and c9 + stp x18, x17, [x2, #64] + + // load a10, a11; a0 and a1 unloaded + ldp x3, x4, [x0, #80] + + // a10 * b0 + mul x13, x3, x5 + umulh x14, x3, x5 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, xzr, xzr + + // a9 * b1 + mul x13, x26, x6 + umulh x14, x26, x6 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a8 * b2 + mul x13, x25, x9 + umulh x14, x25, x9 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a7 * b3 + mul x13, x22, x10 + umulh x14, x22, x10 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a6 * b4 + mul x13, x21, x19 + umulh x14, x21, x19 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a5 * b5 + mul x13, x12, x20 + umulh x14, x12, x20 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a4 * b6 + mul x13, x11, x23 + umulh x14, x11, x23 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a3 * b7 + mul x13, x8, x24 + umulh x14, x8, x24 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a2 * b8 + mul x13, x7, x27 + umulh x14, x7, x27 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // load a0, a1; b0 and b1 unloaded + ldp x5, x6, [x0, #0] + + // a1 * b9 + mul x13, x6, x28 + umulh x14, x6, x28 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // load b10, b11; a10 and a11 unloaded + ldp x3, x4, [x1, #80] + + // a0 * b10 + mul x13, x3, x5 + umulh x14, x3, x5 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // c10 now in x16 + + // a0 * b11 + mul x13, x4, x5 + umulh x14, x4, x5 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, xzr, xzr + + // a1 * b10 + mul x13, x3, x6 + umulh x14, x3, x6 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a2 * b9 + mul x13, x7, x28 + umulh x14, x7, x28 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a3 * b8 + mul x13, x8, x27 + umulh x14, x8, x27 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a4 * b7 + mul x13, x11, x24 + umulh x14, x11, x24 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a5 * b6 + mul x13, x12, x23 + umulh x14, x12, x23 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a6 * b5 + mul x13, x21, x20 + umulh x14, x21, x20 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a7 * b4 + mul x13, x22, x19 + umulh x14, x22, x19 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a8 * b3 + mul x13, x25, x10 + umulh x14, x25, x10 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a9 * b2 + mul x13, x26, x9 + umulh x14, x26, x9 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // load a10, a11; b10 and b11 unloaded + ldp x3, x4, [x0, #80] + // load b0, b1; a0 and a1 unloaded + ldp x5, x6, [x1, #0] + + // a10 * b1 + mul x13, x3, x6 + umulh x14, x3, x6 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a11 * b0 + mul x13, x4, x5 + umulh x14, x4, x5 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // store c10 and c11 + stp x16, x15, [x2, #80] + + // a11 * b1 + mul x13, x4, x6 + umulh x14, x4, x6 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, xzr, xzr + + // a10 * b2 + mul x13, x9, x3 + umulh x14, x9, x3 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a9 * b3 + mul x13, x26, x10 + umulh x14, x26, x10 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a8 * b4 + mul x13, x25, x19 + umulh x14, x25, x19 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a7 * b5 + mul x13, x22, x20 + umulh x14, x22, x20 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a6 * b6 + mul x13, x21, x23 + umulh x14, x21, x23 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a5 * b7 + mul x13, x12, x24 + umulh x14, x12, x24 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a4 * b8 + mul x13, x11, x27 + umulh x14, x11, x27 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a3 * b9 + mul x13, x8, x28 + umulh x14, x8, x28 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // load b10, b11; a10 and a11 unloaded + ldp x3, x4, [x1, #80] + // load a0, a1; b0 and b1 unloaded + ldp x5, x6, [x0, #0] + + // a2 * b10 + mul x13, x7, x3 + umulh x14, x7, x3 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a1 * b11 + mul x13, x6, x4 + umulh x14, x6, x4 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // c12 now in x18 + + // a2 * b11 + mul x13, x7, x4 + umulh x14, x7, x4 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, xzr, xzr + + // a3 * b10 + mul x13, x8, x3 + umulh x14, x8, x3 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a4 * b9 + mul x13, x11, x28 + umulh x14, x11, x28 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a5 * b8 + mul x13, x12, x27 + umulh x14, x12, x27 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a6 * b7 + mul x13, x21, x24 + umulh x14, x21, x24 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a7 * b6 + mul x13, x22, x23 + umulh x14, x22, x23 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a8 * b5 + mul x13, x25, x20 + umulh x14, x25, x20 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a9 * b4 + mul x13, x26, x19 + umulh x14, x26, x19 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // load a10, a11; a0 and a1 unloaded + ldp x5, x6, [x0, #80] + + // a10 * b3 + mul x13, x5, x10 + umulh x14, x5, x10 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a11 * b2 + mul x13, x6, x9 + umulh x14, x6, x9 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // store c12 and c13 + stp x18, x17, [x2, #96] + + // a11 * b3 + mul x13, x6, x10 + umulh x14, x6, x10 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, xzr, xzr + + // a10 * b4 + mul x13, x5, x19 + umulh x14, x5, x19 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a9 * b5 + mul x13, x26, x20 + umulh x14, x26, x20 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a8 * b6 + mul x13, x25, x23 + umulh x14, x25, x23 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a7 * b7 + mul x13, x22, x24 + umulh x14, x22, x24 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a6 * b8 + mul x13, x21, x27 + umulh x14, x21, x27 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a5 * b9 + mul x13, x12, x28 + umulh x14, x12, x28 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a4 * b10 + mul x13, x11, x3 + umulh x14, x11, x3 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a3 * b11 + mul x13, x8, x4 + umulh x14, x8, x4 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // c14 is now in x16 + + // a4 * b11 + mul x13, x11, x4 + umulh x14, x11, x4 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, xzr, xzr + + // a5 * b10 + mul x13, x12, x3 + umulh x14, x12, x3 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a6 * b9 + mul x13, x21, x28 + umulh x14, x21, x28 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a7 * b8 + mul x13, x22, x27 + umulh x14, x22, x27 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a8 * b7 + mul x13, x25, x24 + umulh x14, x25, x24 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a9 * b6 + mul x13, x26, x23 + umulh x14, x26, x23 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a10 * b5 + mul x13, x5, x20 + umulh x14, x5, x20 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a11 * b4 + mul x13, x6, x19 + umulh x14, x6, x19 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // c15 is now in x15 + + // store c14 and c15 + stp x16, x15, [x2, #112] + + // a11 * b5 + mul x13, x6, x20 + umulh x14, x6, x20 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, xzr, xzr + + // a10 * b6 + mul x13, x5, x23 + umulh x14, x5, x23 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a9 * b7 + mul x13, x26, x24 + umulh x14, x26, x24 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a8 * b8 + mul x13, x25, x27 + umulh x14, x25, x27 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a7 * b9 + mul x13, x22, x28 + umulh x14, x22, x28 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a6 * b10 + mul x13, x21, x3 + umulh x14, x21, x3 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a5 * b11 + mul x13, x12, x4 + umulh x14, x12, x4 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // c16 is now in x18 + + // a6 * b11 + mul x13, x21, x4 + umulh x14, x21, x4 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, xzr, xzr + + // a7 * b10 + mul x13, x22, x3 + umulh x14, x22, x3 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a8 * b9 + mul x13, x25, x28 + umulh x14, x25, x28 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a9 * b8 + mul x13, x26, x27 + umulh x14, x26, x27 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a10 * b7 + mul x13, x5, x24 + umulh x14, x5, x24 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // a11 * b6 + mul x13, x6, x23 + umulh x14, x6, x23 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // store c16 and c17 + stp x18, x17, [x2, #128] + + // a11 * b7 + mul x13, x6, x24 + umulh x14, x6, x24 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, xzr, xzr + + // a10 * b8 + mul x13, x5, x27 + umulh x14, x5, x27 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a9 * b9 + mul x13, x26, x28 + umulh x14, x26, x28 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a8 * b10 + mul x13, x25, x3 + umulh x14, x25, x3 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // a7 * b11 + mul x13, x22, x4 + umulh x14, x22, x4 + + adds x16, x16, x13 + adcs x15, x15, x14 + adcs x18, x18, xzr + + // c18 is now in x16 + + // a8 * b11 + mul x13, x25, x4 + umulh x14, x25, x4 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, xzr, xzr + + // a9 * b10 + mul x13, x26, x3 + umulh x14, x26, x3 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a10 * b9 + mul x13, x5, x28 + umulh x14, x5, x28 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // a11 * b8 + mul x13, x6, x27 + umulh x14, x6, x27 + + adds x15, x15, x13 + adcs x18, x18, x14 + adcs x17, x17, xzr + + // store c18 and c19 + stp x16, x15, [x2, #144] + + // a11 * b9 + mul x13, x6, x28 + umulh x14, x6, x28 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, xzr, xzr + + // a10 * b10 + mul x13, x5, x3 + umulh x14, x5, x3 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // a9 * b11 + mul x13, x26, x4 + umulh x14, x26, x4 + + adds x18, x18, x13 + adcs x17, x17, x14 + adcs x16, x16, xzr + + // c20 is now in x18 + + // a10 * b11 + mul x13, x5, x4 + umulh x14, x5, x4 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, xzr, xzr + + // a11 * b10 + mul x13, x6, x3 + umulh x14, x6, x3 + + adds x17, x17, x13 + adcs x16, x16, x14 + adcs x15, x15, xzr + + // store c20 and c21 + stp x18, x17, [x2, #160] + + // a11 * b11 + mul x13, x4, x6 + umulh x14, x4, x6 + + adds x16, x16, x13 + adcs x15, x15, x14 + + // store c22 and c23 + stp x16, x15, [x2, #176] + + ldp x19, x20, [sp] + ldp x21, x22, [sp, #16] + ldp x23, x24, [sp, #32] + ldp x25, x26, [sp, #48] + ldp x27, x28, [sp, #64] + add sp, sp, #80 + ret + + +//*********************************************************************** +// Montgomery reduction +// Based on comba method +// Operation: mc [x1] = ma [x0] +// NOTE: ma=mc is not allowed +//*********************************************************************** +.global oqs_sidh_cln16_rdc751_asm +oqs_sidh_cln16_rdc751_asm: + // ma is in x0 + // mc is in x1 + + sub sp, sp, #80 + stp x19, x20, [sp] + stp x21, x22, [sp, #16] + stp x23, x24, [sp, #32] + stp x25, x26, [sp, #48] + stp x27, x28, [sp, #64] + + // load the prime values into x14 through x20 + ldr x14, p751p1 + 0 + ldr x15, p751p1 + 8 + ldr x16, p751p1 + 16 + ldr x17, p751p1 + 24 + ldr x18, p751p1 + 32 + ldr x19, p751p1 + 40 + ldr x20, p751p1 + 48 + + // the values mc[0] through mc[11] will be held in x2 through x13 + // until the very end when they will be stored + + // load mc[0] through mc[4] and ma[5] + ldp x2, x3, [x0, #0] + ldp x4, x5, [x0, #16] + ldp x6, x21, [x0, #32] + + // ma[5] iteration + mul x22, x2, x14 + umulh x23, x2, x14 + adds x24, x22, x21 + adcs x25, x23, xzr + add x7, x24, xzr // set mc[5] + + // ma[6] iteration + + ldr x21, [x0, #48] + + mul x22, x2, x15 + umulh x23, x2, x15 + adds x25, x25, x22 + adcs x26, x23, xzr + + mul x22, x3, x14 + umulh x23, x3, x14 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, xzr, xzr + + adds x25, x25, x21 + adcs x26, x26, xzr + adcs x24, x24, xzr + add x8, x25, xzr // set mc[6] + + // ma[7] iteration + + ldr x21, [x0, #56] + mul x22, x2, x16 + umulh x23, x2, x16 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, xzr, xzr + + mul x22, x3, x15 + umulh x23, x3, x15 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x4, x14 + umulh x23, x4, x14 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + adds x26, x26, x21 + adcs x24, x24, xzr + adcs x25, x25, xzr + add x9, x26, xzr // set mc[7] + + // ma[8] iteration + + ldr x21, [x0, #64] + mul x22, x2, x17 + umulh x23, x2, x17 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, xzr, xzr + + mul x22, x3, x16 + umulh x23, x3, x16 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x4, x15 + umulh x23, x4, x15 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x5, x14 + umulh x23, x5, x14 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + adds x24, x24, x21 + adcs x25, x25, xzr + adcs x26, x26, xzr + add x10, x24, xzr // set mc[8] + + // ma[9] iteration + + ldr x21, [x0, #72] + mul x22, x2, x18 + umulh x23, x2, x18 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, xzr, xzr + + mul x22, x3, x17 + umulh x23, x3, x17 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + mul x22, x4, x16 + umulh x23, x4, x16 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + mul x22, x5, x15 + umulh x23, x5, x15 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + mul x22, x6, x14 + umulh x23, x6, x14 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + adds x25, x25, x21 + adcs x26, x26, xzr + adcs x24, x24, xzr + add x11, x25, xzr // set mc[9] + + // ma[10] iteration + + ldr x21, [x0, #80] + mul x22, x2, x19 + umulh x23, x2, x19 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, xzr, xzr + + mul x22, x3, x18 + umulh x23, x3, x18 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x4, x17 + umulh x23, x4, x17 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x5, x16 + umulh x23, x5, x16 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x6, x15 + umulh x23, x6, x15 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x7, x14 + umulh x23, x7, x14 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + adds x26, x26, x21 + adcs x24, x24, xzr + adcs x25, x25, xzr + add x12, x26, xzr // set mc[10] + + // ma[11] iteration + ldr x21, [x0, #88] + + mul x22, x2, x20 + umulh x23, x2, x20 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, xzr, xzr + + mul x22, x3, x19 + umulh x23, x3, x19 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x4, x18 + umulh x23, x4, x18 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x5, x17 + umulh x23, x5, x17 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x6, x16 + umulh x23, x6, x16 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x7, x15 + umulh x23, x7, x15 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x8, x14 + umulh x23, x8, x14 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + adds x24, x24, x21 + adcs x25, x25, xzr + adcs x26, x26, xzr + add x13, x24, xzr // set mc[11] + + // ma[12] iteration + + ldr x21, [x0, #96] + mul x22, x3, x20 + umulh x23, x3, x20 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, xzr, xzr + + mul x22, x4, x19 + umulh x23, x4, x19 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + mul x22, x5, x18 + umulh x23, x5, x18 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + mul x22, x6, x17 + umulh x23, x6, x17 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + mul x22, x7, x16 + umulh x23, x7, x16 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + mul x22, x8, x15 + umulh x23, x8, x15 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + mul x22, x9, x14 + umulh x23, x9, x14 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + adds x25, x25, x21 + adcs x26, x26, xzr + adcs x24, x24, xzr + add x2, x25, xzr // set mc[0] + + // ma[13] iteration + + ldr x21, [x0, #104] + mul x22, x4, x20 + umulh x23, x4, x20 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, xzr, xzr + + mul x22, x5, x19 + umulh x23, x5, x19 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x6, x18 + umulh x23, x6, x18 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x7, x17 + umulh x23, x7, x17 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x8, x16 + umulh x23, x8, x16 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x9, x15 + umulh x23, x9, x15 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x10, x14 + umulh x23, x10, x14 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + adds x26, x26, x21 + adcs x24, x24, xzr + adcs x25, x25, xzr + add x3, x26, xzr // set mc[1] + + // ma[14] iteration + + ldr x21, [x0, #112] + mul x22, x5, x20 + umulh x23, x5, x20 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, xzr, xzr + + mul x22, x6, x19 + umulh x23, x6, x19 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x7, x18 + umulh x23, x7, x18 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x8, x17 + umulh x23, x8, x17 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x9, x16 + umulh x23, x9, x16 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x10, x15 + umulh x23, x10, x15 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x11, x14 + umulh x23, x11, x14 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + adds x24, x24, x21 + adcs x25, x25, xzr + adcs x26, x26, xzr + add x4, x24, xzr // set mc[2] + + // ma[15] iteration + + ldr x21, [x0, #120] + mul x22, x6, x20 + umulh x23, x6, x20 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, xzr, xzr + + mul x22, x7, x19 + umulh x23, x7, x19 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + mul x22, x8, x18 + umulh x23, x8, x18 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + mul x22, x9, x17 + umulh x23, x9, x17 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + mul x22, x10, x16 + umulh x23, x10, x16 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + mul x22, x11, x15 + umulh x23, x11, x15 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + mul x22, x12, x14 + umulh x23, x12, x14 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + adds x25, x25, x21 + adcs x26, x26, xzr + adcs x24, x24, xzr + add x5, x25, xzr // set mc[3] + + // ma[16] iteration + + ldr x21, [x0, #128] + mul x22, x7, x20 + umulh x23, x7, x20 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, xzr, xzr + + mul x22, x8, x19 + umulh x23, x8, x19 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x9, x18 + umulh x23, x9, x18 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x10, x17 + umulh x23, x10, x17 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x11, x16 + umulh x23, x11, x16 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x12, x15 + umulh x23, x12, x15 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x13, x14 + umulh x23, x13, x14 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + adds x26, x26, x21 + adcs x24, x24, xzr + adcs x25, x25, xzr + add x6, x26, xzr // set mc[4] + + // ma[17] iteration + + ldr x21, [x0, #136] + mul x22, x8, x20 + umulh x23, x8, x20 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, xzr, xzr + + mul x22, x9, x19 + umulh x23, x9, x19 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x10, x18 + umulh x23, x10, x18 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x11, x17 + umulh x23, x11, x17 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x12, x16 + umulh x23, x12, x16 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x13, x15 + umulh x23, x13, x15 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + adds x24, x24, x21 + adcs x25, x25, xzr + adcs x26, x26, xzr + add x7, x24, xzr // set mc[5] + + // ma[18] iteration + + ldr x21, [x0, #144] + mul x22, x9, x20 + umulh x23, x9, x20 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, xzr, xzr + + mul x22, x10, x19 + umulh x23, x10, x19 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + mul x22, x11, x18 + umulh x23, x11, x18 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + mul x22, x12, x17 + umulh x23, x12, x17 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + mul x22, x13, x16 + umulh x23, x13, x16 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + adds x25, x25, x21 + adcs x26, x26, xzr + adcs x24, x24, xzr + add x8, x25, xzr // set mc[6] + + // ma[19] iteration + + ldr x21, [x0, #152] + mul x22, x10, x20 + umulh x23, x10, x20 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, xzr, xzr + + mul x22, x11, x19 + umulh x23, x11, x19 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x12, x18 + umulh x23, x12, x18 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + mul x22, x13, x17 + umulh x23, x13, x17 + adds x26, x26, x22 + adcs x24, x24, x23 + adcs x25, x25, xzr + + adds x26, x26, x21 + adcs x24, x24, xzr + adcs x25, x25, xzr + add x9, x26, xzr // set mc[7] + + // ma[20] iteration + ldr x21, [x0, #160] + + mul x22, x11, x20 + umulh x23, x11, x20 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, xzr, xzr + + mul x22, x12, x19 + umulh x23, x12, x19 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + mul x22, x13, x18 + umulh x23, x13, x18 + adds x24, x24, x22 + adcs x25, x25, x23 + adcs x26, x26, xzr + + adds x24, x24, x21 + adcs x25, x25, xzr + adcs x26, x26, xzr + add x10, x24, xzr // set mc[8] + + // ma[21] iteration + + ldr x21, [x0, #168] + mul x22, x12, x20 + umulh x23, x12, x20 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, xzr, xzr + + mul x22, x13, x19 + umulh x23, x13, x19 + adds x25, x25, x22 + adcs x26, x26, x23 + adcs x24, x24, xzr + + adds x25, x25, x21 + adcs x26, x26, xzr + adcs x24, x24, xzr + add x11, x25, xzr // set mc[9] + + // ma[22] iteration + + ldr x21, [x0, #176] + mul x22, x13, x20 + umulh x23, x13, x20 + adds x26, x26, x22 + adcs x24, x24, x23 + adds x26, x26, x21 + + ldr x21, [x0, #184] + adcs x24, x24, x21 + add x12, x26, xzr // set mc[10] + add x13, x24, xzr // set mc[11] + + stp x2, x3, [x1, #0] + stp x4, x5, [x1, #16] + stp x6, x7, [x1, #32] + stp x8, x9, [x1, #48] + stp x10, x11, [x1, #64] + stp x12, x13, [x1, #80] + + ldp x19, x20, [sp] + ldp x21, x22, [sp, #16] + ldp x23, x24, [sp, #32] + ldp x25, x26, [sp, #48] + ldp x27, x28, [sp, #64] + add sp, sp, #80 + ret + + +//*********************************************************************** +// 751-bit multiprecision addition +// Operation: c [x2] = a [x0] + b [x1] +//*********************************************************************** +.global oqs_sidh_cln16_mp_add751_asm +oqs_sidh_cln16_mp_add751_asm: + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x7, x8, [x0,#32] + ldp x9, x10, [x0,#48] + ldp x11, x12, [x0,#64] + ldp x13, x14, [x0,#80] + + ldp x15, x16, [x1,#0] + ldp x17, x18, [x1,#16] + adds x3, x3, x15 + adcs x4, x4, x16 + adcs x5, x5, x17 + adcs x6, x6, x18 + ldp x15, x16, [x1,#32] + ldp x17, x18, [x1,#48] + adcs x7, x7, x15 + adcs x8, x8, x16 + adcs x9, x9, x17 + adcs x10, x10, x18 + ldp x15, x16, [x1,#64] + ldp x17, x18, [x1,#80] + adcs x11, x11, x15 + adcs x12, x12, x16 + adcs x13, x13, x17 + adcs x14, x14, x18 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + stp x11, x12, [x2,#64] + stp x13, x14, [x2,#80] + ret + + +//*********************************************************************** +// 2x751-bit multiprecision addition +// Operation: c [x2] = a [x0] + b [x1] +//*********************************************************************** +.global oqs_sidh_cln16_mp_add751x2_asm +oqs_sidh_cln16_mp_add751x2_asm: + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x7, x8, [x0,#32] + ldp x9, x10, [x0,#48] + ldp x11, x12, [x0,#64] + ldp x13, x14, [x0,#80] + + ldp x15, x16, [x1,#0] + ldp x17, x18, [x1,#16] + adds x3, x3, x15 + adcs x4, x4, x16 + adcs x5, x5, x17 + adcs x6, x6, x18 + ldp x15, x16, [x1,#32] + ldp x17, x18, [x1,#48] + adcs x7, x7, x15 + adcs x8, x8, x16 + adcs x9, x9, x17 + adcs x10, x10, x18 + ldp x15, x16, [x1,#64] + ldp x17, x18, [x1,#80] + adcs x11, x11, x15 + adcs x12, x12, x16 + adcs x13, x13, x17 + adcs x14, x14, x18 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + stp x11, x12, [x2,#64] + stp x13, x14, [x2,#80] + + ldp x3, x4, [x0,#96] + ldp x5, x6, [x0,#112] + ldp x7, x8, [x0,#128] + ldp x9, x10, [x0,#144] + ldp x11, x12, [x0,#160] + ldp x13, x14, [x0,#176] + + ldp x15, x16, [x1,#96] + ldp x17, x18, [x1,#112] + adcs x3, x3, x15 + adcs x4, x4, x16 + adcs x5, x5, x17 + adcs x6, x6, x18 + ldp x15, x16, [x1,#128] + ldp x17, x18, [x1,#144] + adcs x7, x7, x15 + adcs x8, x8, x16 + adcs x9, x9, x17 + adcs x10, x10, x18 + ldp x15, x16, [x1,#160] + ldp x17, x18, [x1,#176] + adcs x11, x11, x15 + adcs x12, x12, x16 + adcs x13, x13, x17 + adcs x14, x14, x18 + + stp x3, x4, [x2,#96] + stp x5, x6, [x2,#112] + stp x7, x8, [x2,#128] + stp x9, x10, [x2,#144] + stp x11, x12, [x2,#160] + stp x13, x14, [x2,#176] + ret diff --git a/crypt/liboqs/kex_sidh_cln16/LICENSE.txt b/crypt/liboqs/kex_sidh_cln16/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..3b4ffccb0a28c6be8d8207d73b9fd3eab7155e60 --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/LICENSE.txt @@ -0,0 +1,21 @@ +SIDH Library + +Copyright (c) Microsoft Corporation +All rights reserved. + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +associated documentation files (the ""Software""), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/crypt/liboqs/kex_sidh_cln16/Makefile.am b/crypt/liboqs/kex_sidh_cln16/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..b7272433963a635647761870f0fcdb90b861083d --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/Makefile.am @@ -0,0 +1,18 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = libcln16.la + + +libcln16_la_SOURCES = ec_isogeny.c fpx.c kex_sidh_cln16.c SIDH.c sidh_kex.c SIDH_setup.c + +if X86_64 +libcln16_la_SOURCES += AMD64/fp_x64.c AMD64/fp_x64_asm.S +else +if ARM64 +libcln16_la_SOURCES += ARM64/fp_arm64.c ARM64/fp_arm64_asm.S +else +libcln16_la_SOURCES += generic/fp_generic.c +endif +endif + +libcln16_la_CPPFLAGS = -I../../include -I.-fPIC -w +libcln16_la_CPPFLAGS += $(AM_CPPFLAGS) diff --git a/crypt/liboqs/kex_sidh_cln16/README.txt b/crypt/liboqs/kex_sidh_cln16/README.txt new file mode 100644 index 0000000000000000000000000000000000000000..7f4e4df30356c90b051248029d0210c46f71a5bf --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/README.txt @@ -0,0 +1,79 @@ + SIDH v2.0 (C Edition) + ======================= + +The SIDH v2.0 library (C Edition) is a supersingular isogeny-based cryptography library that implements a +new suite of algorithms for a post-quantum, ephemeral Diffie-Hellman key exchange scheme [2]. + +The library was developed by Microsoft Research for experimentation purposes. + +SECURITY NOTE: the scheme is NOT secure when using static keys. + +*** THE ORIGINAL README HAS BEEN TRIMMED LEAVING ONLY THE INFO RELEVANT FOR THE OQS INTEGRATION *** + +1. CONTENTS: + -------- + +/ - Library C and header files. +AMD64/ - Optimized implementation of the field arithmetic for x64 platforms +ARM64/ - Optimized implementation of the field arithmetic for ARMv8 platforms +generic/ - Implementation of the field arithmetic in portable C +README.txt - This readme file + + +2. CONTRIBUTIONS: + ------------- + + The field arithmetic implementation for 64-bit ARM processors (ARM64 folder) was contributed by + David Urbanik (dburbani@uwaterloo.ca). + + +3. MAIN FEATURES: + ------------- + +- Support ephemeral Diffie-Hellman key exchange. +- Support a peace-of-mind hybrid key exchange mode that adds a classical elliptic curve Diffie-Hellman + key exchange on a high-security Montgomery curve providing 384 bits of classical ECDH security. +- Protected against timing and cache-timing attacks through regular, constant-time implementation of + all operations on secret key material. +- Basic implementation of the underlying arithmetic functions using portable C to enable support on + a wide range of platforms including x64, x86 and ARM. +- Optimized implementation of the underlying arithmetic functions for x64 platforms with optional, + high-performance x64 assembly for Linux. +- Optimized implementation of the underlying arithmetic functions for 64-bit ARM platforms using assembly + for Linux. + + +4. NEW IN VERSION 2.0: + ------------------ + +- A new variant of the isogeny-based key exchange that includes a new suite of algorithms for efficient + public key compression [3]. In this variant, public keys are only 330 bytes (compare to 564 bytes + required by the original SIDH key exchange variant without compression). +- An optimized implementation of the underlying arithmetic functions for 64-bit ARM (ARMv8) platforms. + + +5. SUPPORTED PLATFORMS: + ------------------- + +SIDH v2.0 is supported on a wide range of platforms including x64, x86 and ARM devices running Windows +or Linux OS. We have tested the library with Microsoft Visual Studio 2015, GNU GCC v4.9, and clang v3.8. +See instructions below to choose an implementation option and compile on one of the supported platforms. + + + +REFERENCES: +---------- + +[1] Craig Costello, Patrick Longa, and Michael Naehrig. + Efficient algorithms for supersingular isogeny Diffie-Hellman. + Advances in Cryptology - CRYPTO 2016, LNCS 9814, pp. 572-601, 2016. + Extended version available at: http://eprint.iacr.org/2016/413. + +[2] David Jao and Luca DeFeo. + Towards quantum-resistant cryptosystems from supersingular elliptic curve isogenies. + PQCrypto 2011, LNCS 7071, pp. 19-34, 2011. + +[3] Craig Costello, David Jao, Patrick Longa, Michael Naehrig, Joost Renes, and David Urbanik. + Efficient compression of SIDH public keys. + Advances in Cryptology - EUROCRYPT 2017, 2017. + Preprint version available at: http://eprint.iacr.org/2016/963. \ No newline at end of file diff --git a/crypt/liboqs/kex_sidh_cln16/SIDH.c b/crypt/liboqs/kex_sidh_cln16/SIDH.c new file mode 100644 index 0000000000000000000000000000000000000000..d99d1e695cae57ac39aa4984b6acc37f65469d64 --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/SIDH.c @@ -0,0 +1,133 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral +* Diffie-Hellman key exchange. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: supersingular elliptic curve isogeny parameters +* +*********************************************************************************************/ + +#include "SIDH_internal.h" + +// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points: +// -------------------------------------------------------------------------------------------------- +// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located +// at the leftmost position (i.e., little endian format). +// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {b, a}, with b +// in the least significant position. +// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. + +// +// Curve isogeny system "SIDHp751". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p751^2), where A=0, B=1 and C=1 +// + +CurveIsogenyStaticData CurveIsogeny_SIDHp751 = { + "SIDHp751", 768, 384, // Curve isogeny system ID, smallest multiple of 32 larger than the prime bitlength and smallest multiple of 32 larger than the order bitlength + 751, // Bitlength of the prime + // Prime p751 = 2^372*3^239-1 + {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xEEAFFFFFFFFFFFFF, 0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C}, + // Base curve parameter "A" + {0}, + // Base curve parameter "C" + {1}, + // Order bitlength for Alice + 372, + // Order of Alice's subgroup + {0x0, 0x0, 0x0, 0x0, 0x0, 0x0010000000000000}, + // Order bitlength for Bob + 379, + // Power of Bob's subgroup order + 239, + // Order of Bob's subgroup + {0xC968549F878A8EEB, 0x59B1A13F7CC76E3E, 0xE9867D6EBE876DA9, 0x2B5045CB25748084, 0x2909F97BADC66856, 0x06FE5D541F71C0E1}, + // Alice's generator PA = (XPA,YPA), where XPA and YPA are defined over GF(p751) + {0x4B0346F5CCE233E9, 0x632646086CE3ACD5, 0x5661D14AB7347693, 0xA58A20449AF1F133, 0xB9AC2F40C56D6FA4, 0x8E561E008FA0E3F3, 0x6CAE096D5DB822C9, 0x83FDB7A4AD3E83E8, 0xB1317AD904386217, 0x3FA23F89F6BE06D2, 0x429C8D36FF46BCC9, 0x00003E82027A38E9, 0x12E0D620BFB341D5, 0x0F8EEA7370893430, 0x5A99EBEC3B5B8B00, 0x236C7FAC9E69F7FD, 0x0F147EF3BD0CFEC5, 0x8ED5950D80325A8D, 0x1E911F50BF3F721A, 0x163A7421DFA8378D, 0xC331B043DA010E6A, 0x5E15915A755883B7, 0xB6236F5F598D56EB, 0x00003BBF8DCD4E7E}, + // Bob's generator PB = (XPB,YPB), where XPB and YPB are defined over GF(p751) + {0x76ED2325DCC93103, 0xD9E1DF566C1D26D3, 0x76AECB94B919AEED, 0xD3785AAAA4D646C5, 0xCB610E30288A7770, 0x9BD3778659023B9E, 0xD5E69CF26DF23742, 0xA3AD8E17B9F9238C, 0xE145FE2D525160E0, 0xF8D5BCE859ED725D, 0x960A01AB8FF409A2, 0x00002F1D80EF06EF, 0x91479226A0687894, 0xBBC6BAF5F6BA40BB, 0x15B529122CFE3CA6, 0x7D12754F00E898A3, 0x76EBA0C8419745E9, 0x0A94F06CDFB3EADE, 0x399A6EDB2EEB2F9B, 0xE302C5129C049EEB, 0xC35892123951D4B6, 0x15445287ED1CC55D, 0x1ACAF351F09AB55A, 0x00000127A46D082A}, + // BigMont's curve parameter A24 = (A+2)/4 + 156113, + // BigMont's order, where BigMont is defined by y^2=x^3+A*x^2+x + {0xA59B73D250E58055, 0xCB063593D0BE10E1, 0xF6515CCB5D076CBB, 0x66880747EDDF5E20, 0xBA515248A6BFD4AB, 0x3B8EF00DDDDC789D, 0xB8FB25A1527E1E2A, 0xB6A566C684FDF31D, 0x0213A619F5BAFA1D, 0xA158AD41172C95D2, 0x0384A427E5EEB719, 0x00001BF975507DC7}, + // Montgomery constant Montgomery_R2 = (2^768)^2 mod p751 + {0x233046449DAD4058, 0xDB010161A696452A, 0x5E36941472E3FD8E, 0xF40BFE2082A2E706, 0x4932CCA8904F8751, 0x1F735F1F1EE7FC81, 0xA24F4D80C1048E18, 0xB56C383CCDB607C5, 0x441DD47B735F9C90, 0x5673ED2C6A6AC82A, 0x06C905261132294B, 0x000041AD830F1F35}, + // Montgomery constant -p751^-1 mod 2^768 + {0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xEEB0000000000000, 0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x258C28E5D541F71C}, + // Value one in Montgomery representation + {0x00000000000249ad, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8310000000000000, 0x5527b1e4375c6c66, 0x697797bf3f4f24d0, 0xc89db7b2ac5c4e2e, 0x4ca4b439d2076956, 0x10f7926c7512c7e9, 0x00002d5b24bce5e2}}; + +// Fixed parameters for isogeny tree computation + +const unsigned int splits_Alice[SIDH_MAX_Alice] = { + 0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 4, 5, 5, 6, 7, 8, 8, 9, 9, 9, 9, 9, 9, 9, 12, + 11, 12, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 17, 17, 18, 18, 17, 21, 17, + 18, 21, 20, 21, 21, 21, 21, 21, 22, 25, 25, 25, 26, 27, 28, 28, 29, 30, 31, + 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 35, 36, 36, 33, 36, 35, 36, 36, 35, + 36, 36, 37, 38, 38, 39, 40, 41, 42, 38, 39, 40, 41, 42, 40, 46, 42, 43, 46, + 46, 46, 46, 48, 48, 48, 48, 49, 49, 48, 53, 54, 51, 52, 53, 54, 55, 56, 57, + 58, 59, 59, 60, 62, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, + 65, 66, 67, 65, 66, 67, 66, 69, 70, 66, 67, 66, 69, 70, 69, 70, 70, 71, 72, + 71, 72, 72, 74, 74, 75, 72, 72, 74, 74, 75, 72, 72, 74, 75, 75, 72, 72, 74, + 75, 75, 77, 77, 79, 80, 80, 82}; + +const unsigned int splits_Bob[SIDH_MAX_Bob] = { + 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 7, 8, 8, 8, 8, 9, 9, 9, 9, 9, + 10, 12, 12, 12, 12, 12, 12, 13, 14, 14, 15, 16, 16, 16, 16, 16, 17, 16, 16, + 17, 19, 19, 20, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 24, 24, 25, 27, + 27, 28, 28, 29, 28, 29, 28, 28, 28, 30, 28, 28, 28, 29, 30, 33, 33, 33, 33, + 34, 35, 37, 37, 37, 37, 38, 38, 37, 38, 38, 38, 38, 38, 39, 43, 38, 38, 38, + 38, 43, 40, 41, 42, 43, 48, 45, 46, 47, 47, 48, 49, 49, 49, 50, 51, 50, 49, + 49, 49, 49, 51, 49, 53, 50, 51, 50, 51, 51, 51, 52, 55, 55, 55, 56, 56, 56, + 56, 56, 58, 58, 61, 61, 61, 63, 63, 63, 64, 65, 65, 65, 65, 66, 66, 65, 65, + 66, 66, 66, 66, 66, 66, 66, 71, 66, 73, 66, 66, 71, 66, 73, 66, 66, 71, 66, + 73, 68, 68, 71, 71, 73, 73, 73, 75, 75, 78, 78, 78, 80, 80, 80, 81, 81, 82, + 83, 84, 85, 86, 86, 86, 86, 86, 87, 86, 88, 86, 86, 86, 86, 88, 86, 88, 86, + 86, 86, 88, 88, 86, 86, 86, 93, 90, 90, 92, 92, 92, 93, 93, 93, 93, 93, 97, + 97, 97, 97, 97, 97}; + +const uint64_t LIST[22][SIDH_NWORDS64_FIELD] = { + {0xC4EC4EC4EC4EDB72, 0xEC4EC4EC4EC4EC4E, 0x4EC4EC4EC4EC4EC4, 0xC4EC4EC4EC4EC4EC, 0xEC4EC4EC4EC4EC4E, 0x7464EC4EC4EC4EC4, + 0x40E503E18E2D8BE1, 0x4C633882E467773F, 0x998CB725CB703B25, 0x51F8F01043ABC448, 0x70A53813C7A0B43A, 0x00006D56A7157672}, + {0x276276276275B6C1, 0x6276276276276276, 0x7627627627627627, 0x2762762762762762, 0x6276276276276276, 0x6377627627627627, + 0x2F25DD32AAF69FE5, 0xC6FBECF3EDD1AA16, 0x29C9664A396A6297, 0x0110D8C47D20DEFD, 0x1322BABB1082C8DD, 0x00000CCBE6DE8350}, + {0x093B97EBDB11A7FE, 0x5093B97EBDB11A05, 0x05093B97EBDB11A0, 0xA05093B97EBDB11A, 0x1A05093B97EBDB11, 0x6F005093B97EBDB1, + 0x7204A6634D6196D9, 0x1D6428F62F917BE5, 0x037CE7F8E9689A28, 0x913EC08959C36290, 0x03D1055241F89FDD, 0x000066963FEC58EB}, + {0x98C2BA559CF4F604, 0xA98C2BA559CF516A, 0x6A98C2BA559CF516, 0x16A98C2BA559CF51, 0x516A98C2BA559CF5, 0x1A56A98C2BA559CF, + 0xDD14E231C3FF5DDC, 0x5AB78BDF0FB0C987, 0x168ED3F1672906EC, 0xAEF17C4BE3A425E0, 0x6F1B34309268385F, 0x0000438BAFFC5E17}, + {0xA37CA5409E30BE12, 0x20D6AFD873D163ED, 0xCA5409E30BA70497, 0x6AFD873D163EDA37, 0x409E30BA7049720D, 0x7013D163EDA37CA5, + 0x196C325CFB1D98A8, 0x2A83CC98457F6BB1, 0x157AA4649C505D94, 0x556B2CFA3ED1E977, 0x9C8FB301D3BE27CD, 0x0000659B5D688370}, + {0x437158A103E247EB, 0x23A9D7BF076A48BD, 0x158A103E256DD0AF, 0x9D7BF076A48BD437, 0xA103E256DD0AF23A, 0xD3776A48BD437158, + 0xD4F7B332C1F74531, 0x6A60D92C4C627CD9, 0xC8009067FA1223C2, 0x195578D349C85ABC, 0x24DCFD2C3CE56026, 0x00001170D9C4A49E}, + {0xBBC96234E708BFC3, 0xEE2CE77DBE4CE5A9, 0x21EF6EA93828AD37, 0x66C6ED51865018AE, 0xCB18F74253FB3379, 0x6231B31A5644369D, + 0xF1831316FD5F9AD5, 0xD64412327D9D93D5, 0x2D9659AFA40085D6, 0xB872D3713E1F01AD, 0x96B929E85C90E590, 0x00002A0A122F3E1B}, + {0x751DE109156C74F6, 0xC86993912AE79AFE, 0x96234E708BDAC04C, 0xCE77DBE4CE5A9BBC, 0xF6EA93828AD37EE2, 0x51B51865018AE21E, + 0x57F8534430BDF5AF, 0xA5BA9F3225E0FA02, 0x05DBA7E2AB49759E, 0xE4706D1BDBA54763, 0xC5316BE14AF60ADD, 0x00002007A8A7A392}, + {0x2DEC0AC86E1972FF, 0xD121D09CA2E105D1, 0x258D13A0778EDFB2, 0x25140153000C1B6E, 0xA06B73718D440E30, 0xA46BFDEB49118BC0, + 0x11C799EE82EF46CF, 0xF094D7258BE44445, 0x6B087550522BC899, 0xD4380D82ADEEA2D3, 0x2AFFEB03C6970E0B, 0x00004FF89FD0E867}, + {0xF48E11E080A36CD8, 0x75AA967CF316BF89, 0xED69E3E85A6CDEA8, 0x228638171449F794, 0xD4107549BB0BC6AE, 0xB7888349726731CC, + 0x0589577AC89D03A2, 0x79218D005004DCD2, 0xA69CB3C82106FDB8, 0xE54D908CD9B31ED9, 0x2BB46423F8B44F5D, 0x0000158FC37F2F78}, + {0xA2B8F30D2D8B2266, 0x37AE9DA734F3D4D4, 0x4BC3AC46B1EE2D59, 0xA541D219D9E660D2, 0xFD629383B8C12367, 0x0E789576DA7C1E23, + 0x2321F1135780B208, 0x059EED9A8BB7694E, 0x3EAC20CCA7C7B679, 0xADED37DC1395BAAB, 0xD701BA16F6CD4328, 0x0000250A355A8E3D}, + {0x8D08D7B596C87C8E, 0xFC2B5A576AB81FA7, 0x4ED68A1C251D1EAD, 0xA6618E345258FA06, 0xB532F4F490BD3165, 0x0987A5FDBAA88699, + 0x77E908F4AE484907, 0xC85226731C871CED, 0x6F3E5A699F216EC7, 0x70E42ADFCCD68C99, 0x2277864817AA0CAD, 0x000037F521DA6BAC}, + {0xDB72B65CA8D1D274, 0x286A73457D063FD5, 0x7355642D132BA567, 0x2A970D9461C0DC41, 0x93D2A07ED36F3BCC, 0xFD59A18D2D03447E, + 0xBC047FB33098286A, 0x153E65AE22E4D2F0, 0xBC3F628AF44DDCEB, 0xCF8C49463A2BEC5D, 0x64D31CBF9A0FAE5B, 0x00000E88DF789F48}, + {0x7E0E3CF3F602CC03, 0x240AE231C56EB636, 0x1630875FADB3CA47, 0x3FDF66239B9021FE, 0x4FA6BEA94AAE8287, 0x20BD32942BAEF1D9, + 0x3DBE52BE754CD223, 0xD46D6B986A4C461E, 0x31772CCF6AB0EC49, 0x0362808B445792BE, 0xA57068B23D5D4F04, 0x0000233188CFA1F9}, + {0x5CFEB9EE80FF8802, 0x641C991F35243E77, 0x109BF7F4D15352D9, 0xF57027C40F2AEC39, 0x78834C224A9E8F4D, 0x3B53C38C5DDA4903, + 0x2472CAD0E4A1DD20, 0x91121637EFEFBFEB, 0x555DDF1E4E875433, 0xD185E0CEBC9A6BF8, 0x247E7766FEA9846A, 0x00004E24131398C0}, + {0xAE911D5E41FDE1D5, 0x09FD291EAE9A7528, 0xD94DB04CE76D674F, 0xF269A050B317A36A, 0x1010C2464C5B488A, 0x165E22C0571F72CE, + 0xB649686CDD7FAA40, 0xC65F833CCBC8E854, 0xA1DC607E92B4EC01, 0x6A9F6EA6C5D5598C, 0xB73B45E033D20693, 0x0000126974812437}, + {0x7EF889C1569E078D, 0x8B4790D31AFC6D2F, 0x24BAD80FCF2607D2, 0x13C099586804EDD0, 0x0B219830D09F67F8, 0xFEEBDD0A795A4E0D, + 0x2C86D567D8A5A5C6, 0x29EFDB5516CD064B, 0xAFB0A05F0230B35C, 0x73FCFA65EC7C5CB4, 0x245E08DC310C14E1, 0x00001778AC2903DF}, + {0xF2BF1FF8427C7315, 0x591042D093B90137, 0x23EF8D48782832C9, 0x8DFB39E92296E3D6, 0x0C39FF556BEBDD42, 0x369F6980A4270C5D, + 0x901F9AD6FCBAA761, 0x0E8E81D435F5FC7F, 0x9A795B9A8409D3D3, 0xD29FB9AE4384290F, 0x3B58F53DD7270C90, 0x00001E27D50D0631}, + {0x838A7C8B0026C13C, 0xD38CAB350DC1F6BD, 0x426C57FE2436E928, 0xB81B289B8792A253, 0xF8EDB68037D3FB8E, 0x677EE0B4C50C01CD, + 0xF43DCE6FED67139A, 0xF87EFEBF43D77877, 0x3EEA0E8543763A8A, 0x26E5A18357A35379, 0x55867648B9EA7D35, 0x000069DEC7A3C7DA}, + {0x91CCFD3901F3F3FE, 0x2053992393125D73, 0x2129B3A10D7FF7C0, 0x74C64B3E68087A32, 0xEE46C5739B026DF9, 0x53E7B33F97EC0300, + 0x14672E57801EC044, 0x18610440AA870975, 0xB6B9D9E0E0097AE6, 0x37AD3B922ED0F367, 0xA737A55936D5A8B8, 0x00005A30AF4F51DA}, + {0xC925488939591E52, 0x8F87728BF0ED44E9, 0xF987EF64E4365147, 0x9338B89963265410, 0x340DA16F22024645, 0x5D295419E474BDC1, + 0xBA0C2E509FC0510B, 0x957E35D641D5DDB5, 0x922F901AA4A236D8, 0xCBFA24C0F7E172E3, 0xB05A32F88CB5B9DC, 0x00001DC7A766A676}, + {0x6128F8C2B276D2A1, 0x857530A2A633CE28, 0xEB624F41494C5D1E, 0x3FA62AE33B92CCA8, 0x11BCABB4CC4FBE22, 0x91EA14743FDBAC70, + 0x9876F7DF900DC277, 0x375FD25E09091CBA, 0x580F3084B099A111, 0x58E9B3FB623FB297, 0x957732F791F6C337, 0x00000B070F784B99}}; diff --git a/crypt/liboqs/kex_sidh_cln16/SIDH.h b/crypt/liboqs/kex_sidh_cln16/SIDH.h new file mode 100644 index 0000000000000000000000000000000000000000..762ea99b60e3704075d269f3f66f45e4f91e7934 --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/SIDH.h @@ -0,0 +1,356 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral +* Diffie-Hellman key exchange. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: main header file +* +*********************************************************************************************/ + +#ifndef __SIDH_H__ +#define __SIDH_H__ + +// For C++ +#ifdef __cplusplus +extern "C" { +#endif + +#include <oqs/rand.h> +#include <stdint.h> +#include <stdbool.h> +#include <stddef.h> + +// Definition of operating system + +#define OS_WIN 1 +#define OS_LINUX 2 + +#if defined(WINDOWS) // Microsoft Windows OS +#define OS_TARGET OS_WIN +#else +#define OS_TARGET OS_LINUX +#endif + +// Definition of the targeted architecture and basic data types + +#define TARGET_AMD64 1 +#define TARGET_x86 2 +#define TARGET_ARM 3 +#define TARGET_ARM64 4 + +#if (defined(__x86_64__) || defined(__x86_64) || defined(__arch64__) || defined(_M_AMD64) || defined(_M_X64) || defined(_WIN64) || !defined(__LP64__)) +#define _AMD64_ +#elif (defined(__aarch64__)) +#define _ARM64_ +#else +#define _X86_ +#endif + +#if defined(_AMD64_) +#define TARGET TARGET_AMD64 +#define RADIX 64 +typedef uint64_t digit_t; // Unsigned 64-bit digit +typedef int64_t sdigit_t; // Signed 64-bit digit +typedef uint32_t hdigit_t; // Unsigned 32-bit digit +#define NWORDS_FIELD 12 // Number of words of a 751-bit field element +#define p751_ZERO_WORDS 5 // Number of "0" digits in the least significant part of p751 + 1 +#elif defined(_X86_) +#define TARGET TARGET_x86 +#define RADIX 32 +typedef uint32_t digit_t; // Unsigned 32-bit digit +typedef int32_t sdigit_t; // Signed 32-bit digit +typedef uint16_t hdigit_t; // Unsigned 16-bit digit +#define NWORDS_FIELD 24 +#define p751_ZERO_WORDS 11 +#elif defined(_ARM_) +#define TARGET TARGET_ARM +#define RADIX 32 +typedef uint32_t digit_t; // Unsigned 32-bit digit +typedef int32_t sdigit_t; // Signed 32-bit digit +typedef uint16_t hdigit_t; // Unsigned 16-bit digit +#define NWORDS_FIELD 24 +#define p751_ZERO_WORDS 11 +#elif defined(_ARM64_) +#define TARGET TARGET_ARM64 +#define RADIX 64 +typedef uint64_t digit_t; +typedef int64_t sdigit_t; +typedef uint32_t hdigit_t; +#define NWORDS_FIELD 12 +#define p751_ZERO_WORDS 5 +#else +#error-- "Unsupported ARCHITECTURE" +#endif + +#define RADIX64 64 + +// Selection of generic, portable implementation + +#if !defined(SIDH_ASM) // defined(_GENERIC_) +#define GENERIC_IMPLEMENTATION +#endif + +// Unsupported configurations + +#if (TARGET != TARGET_AMD64) && (TARGET != TARGET_ARM64) && !defined(GENERIC_IMPLEMENTATION) +#error-- "Unsupported configuration" +#endif + +// Extended datatype support + +#if defined(GENERIC_IMPLEMENTATION) +typedef uint64_t uint128_t[2]; +#elif (TARGET == TARGET_AMD64 && OS_TARGET == OS_LINUX) && (COMPILER == COMPILER_GCC || COMPILER == COMPILER_CLANG) +#define UINT128_SUPPORT +typedef unsigned uint128_t __attribute__((mode(TI))); +#elif (TARGET == TARGET_ARM64 && OS_TARGET == OS_LINUX) && (COMPILER == COMPILER_GCC || COMPILER == COMPILER_CLANG) +#define UINT128_SUPPORT +typedef unsigned uint128_t __attribute__((mode(TI))); +#elif (TARGET == TARGET_AMD64) && (OS_TARGET == OS_WIN && COMPILER == COMPILER_VC) +#define SCALAR_INTRIN_SUPPORT +typedef uint64_t uint128_t[2]; +#else +#error-- "Unsupported configuration" +#endif + +// Basic constants + +#define SIDH_NBITS_FIELD 751 +#define SIDH_MAXBITS_FIELD 768 +#define SIDH_MAXWORDS_FIELD ((SIDH_MAXBITS_FIELD + RADIX - 1) / RADIX) // Max. number of words to represent field elements +#define SIDH_NWORDS64_FIELD ((SIDH_NBITS_FIELD + 63) / 64) // Number of 64-bit words of a 751-bit field element +#define SIDH_NBITS_ORDER 384 +#define SIDH_NWORDS_ORDER ((SIDH_NBITS_ORDER + RADIX - 1) / RADIX) // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp. +#define SIDH_NWORDS64_ORDER ((SIDH_NBITS_ORDER + 63) / 64) // Number of 64-bit words of a 384-bit element +#define SIDH_MAXBITS_ORDER SIDH_NBITS_ORDER +#define SIDH_MAXWORDS_ORDER ((SIDH_MAXBITS_ORDER + RADIX - 1) / RADIX) // Max. number of words to represent elements in [1, oA-1] or [1, oB]. + +// Basic constants for elliptic curve BigMont + +#define BIGMONT_SIDH_SIDH_NBITS_ORDER 749 +#define BIGMONT_MAXBITS_ORDER 768 +#define BIGMONT_NWORDS_ORDER ((BIGMONT_SIDH_SIDH_NBITS_ORDER + RADIX - 1) / RADIX) // Number of words of BigMont's subgroup order. +#define BIGMONT_MAXWORDS_ORDER ((BIGMONT_MAXBITS_ORDER + RADIX - 1) / RADIX) // Max. number of words to represent elements in [1, BigMont_order]. + +// Size of SIDH secret key = (CurveIsogeny_SIDHp751.owordbits + 7)/8 +#define SIDH_SECRETKEY_LEN 48 +// Size of SIDH public key = 3*2*((CurveIsogeny_SIDHp751.pwordbits + 7)/8) +#define SIDH_PUBKEY_LEN 576 +// Size of compressed SIDH public key = 3*((CurveIsogenyData->owordbits + 7)/8) + 2*((CurveIsogenyData->pwordbits + 7)/8) +#define SIDH_COMPRESSED_PUBKEY_LEN 336 +// Size of value R for decompression = 2*2*((CurveIsogenyData->pwordbits + 7)/8) +#define SIDH_COMPRESSED_R_LEN 384 +// Size of value A for decompression = 2*((CurveIsogeny_SIDHp751.pwordbits + 7)/8) +#define SIDH_COMPRESSED_A_LEN 192 +// Size of SIDH shared key = 2*PBYTES_SIDHp751 +#define SIDH_SHAREDKEY_LEN 192 + +// Definitions of the error-handling type and error codes + +typedef enum { + SIDH_CRYPTO_SUCCESS, + SIDH_CRYPTO_ERROR, + SIDH_CRYPTO_ERROR_UNKNOWN, + SIDH_CRYPTO_ERROR_INVALID_PARAMETER, + SIDH_CRYPTO_ERROR_PUBLIC_KEY_VALIDATION, + SIDH_CRYPTO_ERROR_TOO_MANY_ITERATIONS, + SIDH_CRYPTO_ERROR_END_OF_LIST +} SIDH_CRYPTO_STATUS; + +#define SIDH_CRYPTO_STATUS_TYPE_SIZE (SIDH_CRYPTO_ERROR_END_OF_LIST) + +// Definition of type for curve isogeny system identifiers. Currently valid value is "SIDHp751" (see SIDH.h) +typedef char CurveIsogeny_ID[10]; + +// Supersingular elliptic curve isogeny structures: + +// This data struct contains the static curve isogeny data +typedef struct +{ + CurveIsogeny_ID CurveIsogeny; // Curve isogeny system identifier, base curve defined over GF(p^2) + unsigned int pwordbits; // Smallest multiple of 32 larger than the prime bitlength + unsigned int owordbits; // Smallest multiple of 32 larger than the order bitlength + unsigned int pbits; // Bitlength of the prime p + uint64_t prime[SIDH_MAXWORDS_FIELD]; // Prime p + uint64_t A[SIDH_MAXWORDS_FIELD]; // Base curve parameter "A" + uint64_t C[SIDH_MAXWORDS_FIELD]; // Base curve parameter "C" + unsigned int oAbits; // Order bitlength for Alice + uint64_t Aorder[SIDH_MAXWORDS_ORDER]; // Order of Alice's (sub)group + unsigned int oBbits; // Order bitlength for Bob + unsigned int eB; // Power of Bob's subgroup order (i.e., oB = 3^eB) + uint64_t Border[SIDH_MAXWORDS_ORDER]; // Order of Bob's (sub)group + uint64_t PA[2 * SIDH_MAXWORDS_FIELD]; // Alice's generator PA = (XPA,YPA), where XPA and YPA are defined over GF(p) + uint64_t PB[2 * SIDH_MAXWORDS_FIELD]; // Bob's generator PB = (XPB,YPB), where XPB and YPB are defined over GF(p) + unsigned int BigMont_A24; // BigMont's curve parameter A24 = (A+2)/4 + uint64_t BigMont_order[BIGMONT_MAXWORDS_ORDER]; // BigMont's subgroup order + uint64_t Montgomery_R2[SIDH_MAXWORDS_FIELD]; // Montgomery constant (2^W)^2 mod p, using a suitable value W + uint64_t Montgomery_pp[SIDH_MAXWORDS_FIELD]; // Montgomery constant -p^-1 mod 2^W, using a suitable value W + uint64_t Montgomery_one[SIDH_MAXWORDS_FIELD]; // Value one in Montgomery representation +} CurveIsogenyStaticData, *PCurveIsogenyStaticData; + +// This data struct is initialized with the targeted curve isogeny system during setup +typedef struct +{ + CurveIsogeny_ID CurveIsogeny; // Curve isogeny system identifier, base curve defined over GF(p^2) + unsigned int pwordbits; // Closest multiple of 32 to prime bitlength + unsigned int owordbits; // Closest multiple of 32 to order bitlength + unsigned int pbits; // Bitlength of the prime p + digit_t *prime; // Prime p + digit_t *A; // Base curve parameter "A" + digit_t *C; // Base curve parameter "C" + unsigned int oAbits; // Order bitlength for Alice + digit_t *Aorder; // Order of Alice's (sub)group + unsigned int oBbits; // Order bitlength for Bob + unsigned int eB; // Power of Bob's subgroup order (i.e., oB = 3^eB) + digit_t *Border; // Order of Bob's (sub)group + digit_t *PA; // Alice's generator PA = (XPA,YPA), where XPA and YPA are defined over GF(p) + digit_t *PB; // Bob's generator PB = (XPB,YPB), where XPB and YPB are defined over GF(p) + unsigned int BigMont_A24; // BigMont's curve parameter A24 = (A+2)/4 + digit_t *BigMont_order; // BigMont's subgroup order + digit_t *Montgomery_R2; // Montgomery constant (2^W)^2 mod p, using a suitable value W + digit_t *Montgomery_pp; // Montgomery constant -p^-1 mod 2^W, using a suitable value W + digit_t *Montgomery_one; // Value one in Montgomery representation +} CurveIsogenyStruct, *PCurveIsogenyStruct; + +// Supported curve isogeny systems: + +// "SIDHp751", base curve: supersingular elliptic curve E: y^2 = x^3 + x +extern CurveIsogenyStaticData CurveIsogeny_SIDHp751; + +/******************** Function prototypes ***********************/ +/*************** Setup/initialization functions *****************/ + +// Dynamic allocation of memory for curve isogeny structure. +// Returns NULL on error. +PCurveIsogenyStruct oqs_sidh_cln16_curve_allocate(PCurveIsogenyStaticData CurveData); + +// Initialize curve isogeny structure pCurveIsogeny with static data extracted from pCurveIsogenyData. +// This needs to be called after allocating memory for "pCurveIsogeny" using SIDH_curve_allocate(). +SIDH_CRYPTO_STATUS oqs_sidh_cln16_curve_initialize(PCurveIsogenyStruct pCurveIsogeny, PCurveIsogenyStaticData pCurveIsogenyData); + +// Free memory for curve isogeny structure +void oqs_sidh_cln16_curve_free(PCurveIsogenyStruct pCurveIsogeny); + +// Output random values in the range [1, order-1] in little endian format that can be used as private keys. +SIDH_CRYPTO_STATUS oqs_sidh_cln16_random_mod_order(digit_t *random_digits, unsigned int AliceOrBob, PCurveIsogenyStruct pCurveIsogeny, OQS_RAND *rand); + +// Output random values in the range [1, BigMont_order-1] in little endian format that can be used as private keys +// to compute scalar multiplications using the elliptic curve BigMont. +SIDH_CRYPTO_STATUS oqs_sidh_cln16_random_BigMont_mod_order(digit_t *random_digits, PCurveIsogenyStruct pCurveIsogeny, OQS_RAND *rand); + +// Clear "nwords" digits from memory +void oqs_sidh_cln16_clear_words(void *mem, digit_t nwords); + +// OQS INTEGRATION NOTE: the following code used to be in SIDH_api.h. It is merged here to simplify integration. + +/*********************** Ephemeral key exchange API ***********************/ + +// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys. +// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016. +// Extended version available at: http://eprint.iacr.org/2016/859 + +// Alice's ephemeral key-pair generation +// It produces a private key pPrivateKeyA and computes the public key pPublicKeyA. +// The private key is an even integer in the range [2, oA-2], where oA = 2^372 (i.e., 372 bits in total). +// The public key consists of 3 elements in GF(p751^2), i.e., 564 bytes. +// CurveIsogeny must be set up in advance using SIDH_curve_initialize(). +SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralKeyGeneration_A(unsigned char *pPrivateKeyA, unsigned char *pPublicKeyA, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand); + +// Bob's ephemeral key-pair generation +// It produces a private key pPrivateKeyB and computes the public key pPublicKeyB. +// The private key is an integer in the range [1, oB-1], where oA = 3^239 (i.e., 379 bits in total). +// The public key consists of 3 elements in GF(p751^2), i.e., 564 bytes. +// CurveIsogeny must be set up in advance using SIDH_curve_initialize(). +SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralKeyGeneration_B(unsigned char *pPrivateKeyB, unsigned char *pPublicKeyB, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand); + +// Alice's ephemeral shared secret computation +// It produces a shared secret key pSharedSecretA using her secret key pPrivateKeyA and Bob's public key pPublicKeyB +// Inputs: Alice's pPrivateKeyA is an even integer in the range [2, oA-2], where oA = 2^372 (i.e., 372 bits in total). +// Bob's pPublicKeyB consists of 3 elements in GF(p751^2), i.e., 564 bytes. +// Output: a shared secret pSharedSecretA that consists of one element in GF(p751^2), i.e., 1502 bits in total. +// CurveIsogeny must be set up in advance using SIDH_curve_initialize(). +SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralSecretAgreement_A(const unsigned char *pPrivateKeyA, const unsigned char *pPublicKeyB, unsigned char *pSharedSecretA, PCurveIsogenyStruct CurveIsogeny); + +// Bob's ephemeral shared secret computation +// It produces a shared secret key pSharedSecretB using his secret key pPrivateKeyB and Alice's public key pPublicKeyA +// Inputs: Bob's pPrivateKeyB is an integer in the range [1, oB-1], where oA = 3^239 (i.e., 379 bits in total). +// Alice's pPublicKeyA consists of 3 elements in GF(p751^2), i.e., 564 bytes. +// Output: a shared secret pSharedSecretB that consists of one element in GF(p751^2), i.e., 1502 bits in total. +// CurveIsogeny must be set up in advance using SIDH_curve_initialize(). +SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralSecretAgreement_B(const unsigned char *pPrivateKeyB, const unsigned char *pPublicKeyA, unsigned char *pSharedSecretB, PCurveIsogenyStruct CurveIsogeny); + +/*********************** Ephemeral key exchange API with compressed public keys ***********************/ + +// Alice's public key compression +// It produces a compressed output that consists of three elements in Z_orderB and one field element +// Input : Alice's public key PublicKeyA, which consists of 3 elements in GF(p751^2). +// Output: a compressed value CompressedPKA that consists of three elements in Z_orderB and one element in GF(p751^2). +// CurveIsogeny must be set up in advance using SIDH_curve_initialize(). +void oqs_sidh_cln16_PublicKeyCompression_A(const unsigned char *PublicKeyA, unsigned char *CompressedPKA, PCurveIsogenyStruct CurveIsogeny); + +// Alice's public key value decompression computed by Bob +// Inputs: Bob's private key SecretKeyB, and +// Alice's compressed public key data CompressedPKA, which consists of three elements in Z_orderB and one element in GF(p751^2), +// Output: a point point_R in coordinates (X:Z) and the curve parameter param_A in GF(p751^2). Outputs are stored in Montgomery representation. +// CurveIsogeny must be set up in advance using SIDH_curve_initialize(). +void oqs_sidh_cln16_PublicKeyADecompression_B(const unsigned char *SecretKeyB, const unsigned char *CompressedPKA, unsigned char *point_R, unsigned char *param_A, PCurveIsogenyStruct CurveIsogeny); + +// Alice's ephemeral shared secret computation +// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB +// Inputs: Alice's PrivateKeyA is an even integer in the range [2, oA-2], where oA = 2^372 (i.e., 372 bits in total). +// Bob's PublicKeyB consists of 3 elements in GF(p751^2), i.e., 564 bytes. +// Output: a shared secret SharedSecretA that consists of one element in GF(p751^2), i.e., 1502 bits in total. +// CurveIsogeny must be set up in advance using SIDH_curve_initialize(). +SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralSecretAgreement_Compression_A(const unsigned char *PrivateKeyA, const unsigned char *point_R, const unsigned char *param_A, unsigned char *SharedSecretA, PCurveIsogenyStruct CurveIsogeny); + +// Bob's public key compression +// It produces a compressed output that consists of three elements in Z_orderA and one field element +// Input : Bob's public key PublicKeyB, which consists of 3 elements in GF(p751^2). +// Output: a compressed value CompressedPKB that consists of three elements in Z_orderA and one element in GF(p751^2). +// CurveIsogeny must be set up in advance using SIDH_curve_initialize(). +void oqs_sidh_cln16_PublicKeyCompression_B(const unsigned char *PublicKeyB, unsigned char *CompressedPKB, PCurveIsogenyStruct CurveIsogeny); + +// Bob's public key value decompression computed by Alice +// Inputs: Alice's private key SecretKeyA, and +// Bob's compressed public key data CompressedPKB, which consists of three elements in Z_orderA and one element in GF(p751^2). +// Output: a point point_R in coordinates (X:Z) and the curve parameter param_A in GF(p751^2). Outputs are stored in Montgomery representation. +// CurveIsogeny must be set up in advance using SIDH_curve_initialize(). +void oqs_sidh_cln16_PublicKeyBDecompression_A(const unsigned char *SecretKeyA, const unsigned char *CompressedPKB, unsigned char *point_R, unsigned char *param_A, PCurveIsogenyStruct CurveIsogeny); + +// Bob's ephemeral shared secret computation +// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's decompressed data point_R and param_A +// Inputs: Bob's PrivateKeyB is an integer in the range [1, oB-1], where oB = 3^239. +// Alice's decompressed data consists of point_R in (X:Z) coordinates and the curve paramater param_A in GF(p751^2). +// Output: a shared secret SharedSecretB that consists of one element in GF(p751^2). +// CurveIsogeny must be set up in advance using SIDH_curve_initialize(). +SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralSecretAgreement_Compression_B(const unsigned char *PrivateKeyB, const unsigned char *point_R, const unsigned char *param_A, unsigned char *SharedSecretB, PCurveIsogenyStruct CurveIsogeny); + +/*********************** Scalar multiplication API using BigMont ***********************/ + +// BigMont's scalar multiplication using the Montgomery ladder +// Inputs: x, the affine x-coordinate of a point P on BigMont: y^2=x^3+A*x^2+x, +// scalar m. +// Output: xout, the affine x-coordinate of m*(x:1) +// CurveIsogeny must be set up in advance using SIDH_curve_initialize(). +SIDH_CRYPTO_STATUS oqs_sidh_cln16_BigMont_ladder(unsigned char *x, digit_t *m, unsigned char *xout, PCurveIsogenyStruct CurveIsogeny); + +// Encoding of keys for isogeny system "SIDHp751" (wire format): +// ------------------------------------------------------------ +// Elements over GF(p751) are encoded in 96 octets in little endian format (i.e., the least significant octet located at the leftmost position). +// Elements (a+b*i) over GF(p751^2), where a and b are defined over GF(p751), are encoded as {b, a}, with b in the least significant position. +// Elements over Z_oA and Z_oB are encoded in 48 octets in little endian format. +// +// Private keys pPrivateKeyA and pPrivateKeyB are defined in Z_oA and Z_oB (resp.) and can have values in the range [2, 2^372-2] and [1, 3^239-1], resp. +// In the key exchange API, they are encoded in 48 octets in little endian format. +// Public keys pPublicKeyA and pPublicKeyB consist of four elements in GF(p751^2). In the key exchange API, they are encoded in 768 octets in little +// endian format. +// Shared keys pSharedSecretA and pSharedSecretB consist of one element in GF(p751^2). In the key exchange API, they are encoded in 192 octets in little +// endian format. + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/crypt/liboqs/kex_sidh_cln16/SIDH_internal.h b/crypt/liboqs/kex_sidh_cln16/SIDH_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..82a4a101b9a157c777042b23a8ff5de58b587329 --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/SIDH_internal.h @@ -0,0 +1,598 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral +* Diffie-Hellman key exchange. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: internal header file +* +*********************************************************************************************/ + +#ifndef __SIDH_INTERNAL_H__ +#define __SIDH_INTERNAL_H__ + +// For C++ +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(WINDOWS) +#define UNUSED +#else +#define UNUSED __attribute__((unused)) +#endif + +#include "SIDH.h" + +// Basic constants + +#define SIDH_ALICE 0 +#define SIDH_BOB 1 +#define SIDH_MAX_INT_POINTS_ALICE 8 +// Fixed parameters for isogeny tree computation +#define SIDH_MAX_INT_POINTS_BOB 10 +#define SIDH_MAX_Alice 185 +#define SIDH_MAX_Bob 239 + +// SIDH's basic element definitions and point representations + +typedef digit_t oqs_sidh_cln16_felm_t[NWORDS_FIELD]; // Datatype for representing 751-bit field elements (768-bit max.) +typedef digit_t oqs_sidh_cln16_dfelm_t[2 * NWORDS_FIELD]; // Datatype for representing double-precision 2x751-bit field elements (2x768-bit max.) +typedef oqs_sidh_cln16_felm_t oqs_sidh_cln16_f2elm_t[2]; // Datatype for representing quadratic extension field elements GF(p751^2) +typedef oqs_sidh_cln16_f2elm_t oqs_sidh_cln16_publickey_t[3]; // Datatype for representing public keys equivalent to three GF(p751^2) elements + +typedef struct { + oqs_sidh_cln16_f2elm_t x; + oqs_sidh_cln16_f2elm_t y; +} oqs_sidh_cln16_point_affine; // Point representation in affine coordinates on Montgomery curve. +typedef oqs_sidh_cln16_point_affine oqs_sidh_cln16_point_t[1]; + +typedef struct { + oqs_sidh_cln16_f2elm_t X; + oqs_sidh_cln16_f2elm_t Z; +} oqs_sidh_cln16_point_proj; // Point representation in projective XZ Montgomery coordinates. +typedef oqs_sidh_cln16_point_proj oqs_sidh_cln16_point_proj_t[1]; + +typedef struct { + oqs_sidh_cln16_f2elm_t X; + oqs_sidh_cln16_f2elm_t Y; + oqs_sidh_cln16_f2elm_t Z; +} oqs_sidh_cln16_point_full_proj; // Point representation in projective XYZ Montgomery coordinates. +typedef oqs_sidh_cln16_point_full_proj oqs_sidh_cln16_point_full_proj_t[1]; + +typedef struct { + oqs_sidh_cln16_f2elm_t X2; + oqs_sidh_cln16_f2elm_t XZ; + oqs_sidh_cln16_f2elm_t Z2; + oqs_sidh_cln16_f2elm_t YZ; +} oqs_sidh_cln16_point_ext_proj; +typedef oqs_sidh_cln16_point_ext_proj oqs_sidh_cln16_point_ext_proj_t[1]; // Point representation in extended projective XYZ Montgomery coordinates. + +typedef struct { + oqs_sidh_cln16_felm_t x; + oqs_sidh_cln16_felm_t y; +} oqs_sidh_cln16_point_basefield_affine; // Point representation in affine coordinates on Montgomery curve over the base field. +typedef oqs_sidh_cln16_point_basefield_affine oqs_sidh_cln16_point_basefield_t[1]; + +typedef struct { + oqs_sidh_cln16_felm_t X; + oqs_sidh_cln16_felm_t Z; +} oqs_sidh_cln16_point_basefield_proj; // Point representation in projective XZ Montgomery coordinates over the base field. +typedef oqs_sidh_cln16_point_basefield_proj oqs_sidh_cln16_point_basefield_proj_t[1]; + +// Macro definitions + +#define NBITS_TO_NBYTES(nbits) (((nbits) + 7) / 8) // Conversion macro from number of bits to number of bytes +#define NBITS_TO_NWORDS(nbits) (((nbits) + (sizeof(digit_t) * 8) - 1) / (sizeof(digit_t) * 8)) // Conversion macro from number of bits to number of computer words +#define NBYTES_TO_NWORDS(nbytes) (((nbytes) + sizeof(digit_t) - 1) / sizeof(digit_t)) // Conversion macro from number of bytes to number of computer words + +// Macro to avoid compiler warnings when detecting unreferenced parameters +#define UNREFERENCED_PARAMETER(PAR) (PAR) + +/********************** Constant-time unsigned comparisons ***********************/ + +// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise + +static __inline unsigned int is_digit_nonzero_ct(digit_t x) { // Is x != 0? + return (unsigned int) ((x | (0 - x)) >> (RADIX - 1)); +} + +static __inline unsigned int is_digit_zero_ct(digit_t x) { // Is x = 0? + return (unsigned int) (1 ^ is_digit_nonzero_ct(x)); +} + +static __inline unsigned int is_digit_lessthan_ct(digit_t x, digit_t y) { // Is x < y? + return (unsigned int) ((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX - 1)); +} + +/********************** Macros for platform-dependent operations **********************/ + +#if defined(GENERIC_IMPLEMENTATION) +// Digit multiplication +#define MUL(multiplier, multiplicand, hi, lo) \ + oqs_sidh_cln16_digit_x_digit((multiplier), (multiplicand), &(lo)); + +// Digit addition with carry +#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ + { \ + digit_t tempReg = (addend1) + (digit_t)(carryIn); \ + (sumOut) = (addend2) + tempReg; \ + (carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); \ + } + +// Digit subtraction with borrow +#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ + { \ + digit_t tempReg = (minuend) - (subtrahend); \ + unsigned int borrowReg = (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn) &is_digit_zero_ct(tempReg))); \ + (differenceOut) = tempReg - (digit_t)(borrowIn); \ + (borrowOut) = borrowReg; \ + } + +// Shift right with flexible datatype +#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift))); + +// Shift left with flexible datatype +#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (DigitSize - (shift))); + +// 64x64-bit multiplication +#define MUL128(multiplier, multiplicand, product) \ + oqs_sidh_cln16_mp_mul((digit_t *) &(multiplier), (digit_t *) &(multiplicand), (digit_t *) &(product), NWORDS_FIELD / 2); + +// 128-bit addition, inputs < 2^127 +#define ADD128(addend1, addend2, addition) \ + oqs_sidh_cln16_mp_add((digit_t *) (addend1), (digit_t *) (addend2), (digit_t *) (addition), NWORDS_FIELD); + +// 128-bit addition with output carry +#define ADC128(addend1, addend2, carry, addition) \ + (carry) = oqs_sidh_cln16_mp_add((digit_t *) (addend1), (digit_t *) (addend2), (digit_t *) (addition), NWORDS_FIELD); + +#elif (TARGET == TARGET_AMD64 && OS_TARGET == OS_WIN) + +// Digit multiplication +#define MUL(multiplier, multiplicand, hi, lo) \ + (lo) = _umul128((multiplier), (multiplicand), (hi)); + +// Digit addition with carry +#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ + (carryOut) = _addcarry_u64((carryIn), (addend1), (addend2), &(sumOut)); + +// Digit subtraction with borrow +#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ + (borrowOut) = _subborrow_u64((borrowIn), (minuend), (subtrahend), &(differenceOut)); + +// Digit shift right +#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = __shiftright128((lowIn), (highIn), (shift)); + +// Digit shift left +#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = __shiftleft128((lowIn), (highIn), (shift)); + +// 64x64-bit multiplication +#define MUL128(multiplier, multiplicand, product) \ + (product)[0] = _umul128((multiplier), (multiplicand), &(product)[1]); + +// 128-bit addition, inputs < 2^127 +#define ADD128(addend1, addend2, addition) \ + { \ + unsigned char carry = _addcarry_u64(0, (addend1)[0], (addend2)[0], &(addition)[0]); \ + _addcarry_u64(carry, (addend1)[1], (addend2)[1], &(addition)[1]); \ + } + +// 128-bit addition with output carry +#define ADC128(addend1, addend2, carry, addition) \ + (carry) = _addcarry_u64(0, (addend1)[0], (addend2)[0], &(addition)[0]); \ + (carry) = _addcarry_u64((carry), (addend1)[1], (addend2)[1], &(addition)[1]); + +// 128-bit subtraction, subtrahend < 2^127 +#define SUB128(minuend, subtrahend, difference) \ + { \ + unsigned char borrow = _subborrow_u64(0, (minuend)[0], (subtrahend)[0], &(difference)[0]); \ + _subborrow_u64(borrow, (minuend)[1], (subtrahend)[1], &(difference)[1]); \ + } + +// 128-bit right shift, max. shift value is 64 +#define SHIFTR128(Input, shift, shiftOut) \ + (shiftOut)[0] = __shiftright128((Input)[0], (Input)[1], (shift)); \ + (shiftOut)[1] = (Input)[1] >> (shift); + +// 128-bit left shift, max. shift value is 64 +#define SHIFTL128(Input, shift, shiftOut) \ + (shiftOut)[1] = __shiftleft128((Input)[0], (Input)[1], (shift)); \ + (shiftOut)[0] = (Input)[0] << (shift); + +#define MULADD128(multiplier, multiplicand, addend, carry, result) \ + ; \ + { \ + uint128_t product; \ + MUL128(multiplier, multiplicand, product); \ + ADC128(addend, product, carry, result); \ + } + +#elif ((TARGET == TARGET_AMD64 || TARGET == TARGET_ARM64) && OS_TARGET == OS_LINUX) + +// Digit multiplication +#define MUL(multiplier, multiplicand, hi, lo) \ + { \ + uint128_t tempReg = (uint128_t)(multiplier) * (uint128_t)(multiplicand); \ + *(hi) = (digit_t)(tempReg >> RADIX); \ + (lo) = (digit_t) tempReg; \ + } + +// Digit addition with carry +#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ + { \ + uint128_t tempReg = (uint128_t)(addend1) + (uint128_t)(addend2) + (uint128_t)(carryIn); \ + (carryOut) = (digit_t)(tempReg >> RADIX); \ + (sumOut) = (digit_t) tempReg; \ + } + +// Digit subtraction with borrow +#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ + { \ + uint128_t tempReg = (uint128_t)(minuend) - (uint128_t)(subtrahend) - (uint128_t)(borrowIn); \ + (borrowOut) = (digit_t)(tempReg >> (sizeof(uint128_t) * 8 - 1)); \ + (differenceOut) = (digit_t) tempReg; \ + } + +// Digit shift right +#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (RADIX - (shift))); + +// Digit shift left +#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (RADIX - (shift))); + +#endif + +// Multiprecision multiplication selection +#if defined(GENERIC_IMPLEMENTATION) && (TARGET == TARGET_AMD64) +#define oqs_sidh_cln16_mp_mul_comba oqs_sidh_cln16_mp_mul +#else +#define oqs_sidh_cln16_mp_mul_schoolbook oqs_sidh_cln16_mp_mul +#endif + +/**************** Function prototypes ****************/ +/************* Multiprecision functions **************/ + +// Copy wordsize digits, c = a, where lng(a) = nwords +void oqs_sidh_cln16_copy_words(const digit_t *a, digit_t *c, const unsigned int nwords); + +// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit +unsigned int oqs_sidh_cln16_mp_add(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords); + +// 751-bit multiprecision addition, c = a+b +void oqs_sidh_cln16_mp_add751(const digit_t *a, const digit_t *b, digit_t *c); +void oqs_sidh_cln16_mp_add751_asm(const digit_t *a, const digit_t *b, digit_t *c); + +// 2x751-bit multiprecision addition, c = a+b +void oqs_sidh_cln16_mp_add751x2(const digit_t *a, const digit_t *b, digit_t *c); +void oqs_sidh_cln16_mp_add751x2_asm(const digit_t *a, const digit_t *b, digit_t *c); + +// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit +unsigned int oqs_sidh_cln16_mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords); + +// Multiprecision right shift by one +void oqs_sidh_cln16_mp_shiftr1(digit_t *x, const unsigned int nwords); + +// Multiprecision left right shift by one +void oqs_sidh_cln16_mp_shiftl1(digit_t *x, const unsigned int nwords); + +// Digit multiplication, digit * digit -> 2-digit result +void oqs_sidh_cln16_digit_x_digit(const digit_t a, const digit_t b, digit_t *c); + +// Multiprecision schoolbook multiply, c = a*b, where lng(a) = lng(b) = nwords. +void oqs_sidh_cln16_mp_mul_schoolbook(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords); + +// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. +void oqs_sidh_cln16_mp_mul_comba(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords); + +void oqs_sidh_cln16_multiply(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords); + +// Montgomery multiplication modulo the group order, mc = ma*mb*r' mod order, where ma,mb,mc in [0, order-1] +void oqs_sidh_cln16_Montgomery_multiply_mod_order(const digit_t *ma, const digit_t *mb, digit_t *mc, const digit_t *order, const digit_t *Montgomery_rprime); + +// (Non-constant time) Montgomery inversion modulo the curve order using a^(-1) = a^(order-2) mod order +void oqs_sidh_cln16_Montgomery_inversion_mod_order(const digit_t *ma, digit_t *mc, const digit_t *order, const digit_t *Montgomery_rprime); + +void oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd(const digit_t *a, digit_t *c, const digit_t *order, const digit_t *Montgomery_rprime, const digit_t *Montgomery_R2); + +// Conversion of elements in Z_r to Montgomery representation, where the order r is up to 384 bits. +void oqs_sidh_cln16_to_Montgomery_mod_order(const digit_t *a, digit_t *mc, const digit_t *order, const digit_t *Montgomery_rprime, const digit_t *Montgomery_Rprime); + +// Conversion of elements in Z_r from Montgomery to standard representation, where the order is up to 384 bits. +void oqs_sidh_cln16_from_Montgomery_mod_order(const digit_t *ma, digit_t *c, const digit_t *order, const digit_t *Montgomery_rprime); + +// Inversion modulo Alice's order 2^372. +void oqs_sidh_cln16_inv_mod_orderA(const digit_t *a, digit_t *c); + +/************ Field arithmetic functions *************/ + +// Copy of a field element, c = a +void oqs_sidh_cln16_fpcopy751(const oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t c); + +// Zeroing a field element, a = 0 +void oqs_sidh_cln16_fpzero751(oqs_sidh_cln16_felm_t a); + +// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE +bool oqs_sidh_cln16_fpequal751_non_constant_time(const oqs_sidh_cln16_felm_t a, const oqs_sidh_cln16_felm_t b); + +// Modular addition, c = a+b mod p751 +extern void oqs_sidh_cln16_fpadd751(const digit_t *a, const digit_t *b, digit_t *c); +extern void oqs_sidh_cln16_fpadd751_asm(const digit_t *a, const digit_t *b, digit_t *c); + +// Modular subtraction, c = a-b mod p751 +extern void oqs_sidh_cln16_fpsub751(const digit_t *a, const digit_t *b, digit_t *c); +extern void oqs_sidh_cln16_fpsub751_asm(const digit_t *a, const digit_t *b, digit_t *c); + +// Modular negation, a = -a mod p751 +extern void oqs_sidh_cln16_fpneg751(digit_t *a); + +// Modular division by two, c = a/2 mod p751. +void oqs_sidh_cln16_fpdiv2_751(const digit_t *a, digit_t *c); + +// Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1]. +void oqs_sidh_cln16_fpcorrection751(digit_t *a); + +// 751-bit Montgomery reduction, c = a mod p +void oqs_sidh_cln16_rdc_mont(const digit_t *a, digit_t *c); + +// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p751, where R=2^768 +void oqs_sidh_cln16_fpmul751_mont(const oqs_sidh_cln16_felm_t a, const oqs_sidh_cln16_felm_t b, oqs_sidh_cln16_felm_t c); +void oqs_sidh_cln16_mul751_asm(const oqs_sidh_cln16_felm_t a, const oqs_sidh_cln16_felm_t b, oqs_sidh_cln16_dfelm_t c); +void oqs_sidh_cln16_rdc751_asm(const oqs_sidh_cln16_dfelm_t ma, oqs_sidh_cln16_dfelm_t mc); + +// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p751, where R=2^768 +void oqs_sidh_cln16_fpsqr751_mont(const oqs_sidh_cln16_felm_t ma, oqs_sidh_cln16_felm_t mc); + +// Conversion to Montgomery representation +void oqs_sidh_cln16_to_mont(const oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t mc); + +// Conversion from Montgomery representation to standard representation +void oqs_sidh_cln16_from_mont(const oqs_sidh_cln16_felm_t ma, oqs_sidh_cln16_felm_t c); + +// Field inversion, a = a^-1 in GF(p751) +void oqs_sidh_cln16_fpinv751_mont(oqs_sidh_cln16_felm_t a); + +// Field inversion, a = a^-1 in GF(p751) using the binary GCD +void oqs_sidh_cln16_fpinv751_mont_bingcd(oqs_sidh_cln16_felm_t a); + +// Chain to compute (p751-3)/4 using Montgomery arithmetic +void oqs_sidh_cln16_fpinv751_chain_mont(oqs_sidh_cln16_felm_t a); + +/************ GF(p^2) arithmetic functions *************/ + +// Copy of a GF(p751^2) element, c = a +void oqs_sidh_cln16_fp2copy751(const oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c); + +// Zeroing a GF(p751^2) element, a = 0 +void oqs_sidh_cln16_fp2zero751(oqs_sidh_cln16_f2elm_t a); + +// GF(p751^2) negation, a = -a in GF(p751^2) +void oqs_sidh_cln16_fp2neg751(oqs_sidh_cln16_f2elm_t a); + +// GF(p751^2) addition, c = a+b in GF(p751^2) +extern void oqs_sidh_cln16_fp2add751(const oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c); + +// GF(p751^2) subtraction, c = a-b in GF(p751^2) +extern void oqs_sidh_cln16_fp2sub751(const oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c); + +// GF(p751^2) division by two, c = a/2 in GF(p751^2) +void oqs_sidh_cln16_fp2div2_751(const oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c); + +// Modular correction, a = a in GF(p751^2) +void oqs_sidh_cln16_fp2correction751(oqs_sidh_cln16_f2elm_t a); + +// GF(p751^2) squaring using Montgomery arithmetic, c = a^2 in GF(p751^2) +void oqs_sidh_cln16_fp2sqr751_mont(const oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c); + +// GF(p751^2) multiplication using Montgomery arithmetic, c = a*b in GF(p751^2) +void oqs_sidh_cln16_fp2mul751_mont(const oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c); + +// Conversion of a GF(p751^2) element to Montgomery representation +void oqs_sidh_cln16_to_fp2mont(const oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t mc); + +// Conversion of a GF(p751^2) element from Montgomery representation to standard representation +void oqs_sidh_cln16_from_fp2mont(const oqs_sidh_cln16_f2elm_t ma, oqs_sidh_cln16_f2elm_t c); + +// GF(p751^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) +void oqs_sidh_cln16_fp2inv751_mont(oqs_sidh_cln16_f2elm_t a); + +// GF(p751^2) inversion, a = (a0-i*a1)/(a0^2+a1^2), GF(p751) inversion done using the binary GCD +void oqs_sidh_cln16_fp2inv751_mont_bingcd(oqs_sidh_cln16_f2elm_t a); + +// n-way Montgomery inversion +void oqs_sidh_cln16_mont_n_way_inv(const oqs_sidh_cln16_f2elm_t *vec, const int n, oqs_sidh_cln16_f2elm_t *out); + +// Select either x or y depending on value of option +void oqs_sidh_cln16_select_f2elm(const oqs_sidh_cln16_f2elm_t x, const oqs_sidh_cln16_f2elm_t y, oqs_sidh_cln16_f2elm_t z, const digit_t option); + +// Computes square roots of elements in (Fp2)^2 using Hamburg's trick. +void oqs_sidh_cln16_sqrt_Fp2(const oqs_sidh_cln16_f2elm_t u, oqs_sidh_cln16_f2elm_t y); + +// Computes square roots of elements in (Fp2)^2 using Hamburg's trick +void oqs_sidh_cln16_sqrt_Fp2_frac(const oqs_sidh_cln16_f2elm_t u, const oqs_sidh_cln16_f2elm_t v, oqs_sidh_cln16_f2elm_t y); + +// Cyclotomic cubing on elements of norm 1, using a^(p+1) = 1 +void oqs_sidh_cln16_cube_Fp2_cycl(oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_felm_t one); + +// Cyclotomic squaring on elements of norm 1, using a^(p+1) = 1 +void oqs_sidh_cln16_sqr_Fp2_cycl(oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_felm_t one); + +// Cyclotomic inversion, a^(p+1) = 1 => a^(-1) = a^p = a0 - i*a1 +extern void oqs_sidh_cln16_inv_Fp2_cycl(oqs_sidh_cln16_f2elm_t a); + +// Check if GF(p751^2) element is cube +bool oqs_sidh_cln16_is_cube_Fp2(oqs_sidh_cln16_f2elm_t u, PCurveIsogenyStruct CurveIsogeny); + +// Exponentiation y^t via square and multiply in the cyclotomic group. Exponent t is 6 bits at most +void oqs_sidh_cln16_exp6_Fp2_cycl(const oqs_sidh_cln16_f2elm_t y, const uint64_t t, const oqs_sidh_cln16_felm_t one, oqs_sidh_cln16_f2elm_t res); + +// Exponentiation y^t via square and multiply in the cyclotomic group. Exponent t is 21 bits at most +void oqs_sidh_cln16_exp21_Fp2_cycl(const oqs_sidh_cln16_f2elm_t y, const uint64_t t, const oqs_sidh_cln16_felm_t one, oqs_sidh_cln16_f2elm_t res); + +// Exponentiation y^t via square and multiply in the cyclotomic group. Exponent t is 84 bits at most +void oqs_sidh_cln16_exp84_Fp2_cycl(const oqs_sidh_cln16_f2elm_t y, uint64_t *t, const oqs_sidh_cln16_felm_t one, oqs_sidh_cln16_f2elm_t res); + +// Exponentiation y^t via square and multiply in the cyclotomic group. Exponent t is length bits. +void oqs_sidh_cln16_exp_Fp2_cycl(const oqs_sidh_cln16_f2elm_t y, uint64_t *t, const oqs_sidh_cln16_felm_t one, oqs_sidh_cln16_f2elm_t res, int length); + +/************ Elliptic curve and isogeny functions *************/ + +// Check if curve isogeny structure is NULL +bool oqs_sidh_cln16_is_CurveIsogenyStruct_null(PCurveIsogenyStruct pCurveIsogeny); + +// Swap points over the base field +void oqs_sidh_cln16_swap_points_basefield(oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, const digit_t option); + +// Swap points +void oqs_sidh_cln16_swap_points(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const digit_t option); + +// Computes the j-invariant of a Montgomery curve with projective constant. +void oqs_sidh_cln16_j_inv(const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_f2elm_t C, oqs_sidh_cln16_f2elm_t jinv); + +// Simultaneous doubling and differential addition. +void oqs_sidh_cln16_xDBLADD(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t xPQ, const oqs_sidh_cln16_f2elm_t A24); + +// Doubling of a Montgomery point in projective coordinates (X:Z). +void oqs_sidh_cln16_xDBL(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A24, const oqs_sidh_cln16_f2elm_t C24); + +// Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. +void oqs_sidh_cln16_xDBLe(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_f2elm_t C, const int e); + +// Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings and collects a few intermediate multiples. +void oqs_sidh_cln16_xDBLe_collect(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, unsigned int left_bound, const unsigned int right_bound, const unsigned int *col, oqs_sidh_cln16_point_proj_t *pts, unsigned int *pts_index, unsigned int *npts); + +// Differential addition. +void oqs_sidh_cln16_xADD(oqs_sidh_cln16_point_proj_t P, const oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t xPQ); + +// Doubling of a Montgomery point in projective coordinates (X:Z) over the base field. +void oqs_sidh_cln16_xDBL_basefield(const oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q); + +// Simultaneous doubling and differential addition over the base field. +void oqs_sidh_cln16_xDBLADD_basefield(oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, const oqs_sidh_cln16_felm_t xPQ, const oqs_sidh_cln16_felm_t A24); + +// The Montgomery ladder +void oqs_sidh_cln16_ladder(const oqs_sidh_cln16_felm_t x, digit_t *m, oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, const oqs_sidh_cln16_felm_t A24, const unsigned int order_bits, const unsigned int order_fullbits, PCurveIsogenyStruct CurveIsogeny); + +// Computes key generation entirely in the base field +SIDH_CRYPTO_STATUS oqs_sidh_cln16_secret_pt(const oqs_sidh_cln16_point_basefield_t P, const digit_t *m, const unsigned int AliceOrBob, oqs_sidh_cln16_point_proj_t R, PCurveIsogenyStruct CurveIsogeny); + +// Computes P+[m]Q via x-only arithmetic. +SIDH_CRYPTO_STATUS oqs_sidh_cln16_ladder_3_pt(const oqs_sidh_cln16_f2elm_t xP, const oqs_sidh_cln16_f2elm_t xQ, const oqs_sidh_cln16_f2elm_t xPQ, const digit_t *m, const unsigned int AliceOrBob, oqs_sidh_cln16_point_proj_t W, const oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny); + +// Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. +void oqs_sidh_cln16_get_4_isog(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, oqs_sidh_cln16_f2elm_t *coeff); + +// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny +void oqs_sidh_cln16_eval_4_isog(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t *coeff); + +// Computes first 4-isogeny computed by Alice. +void oqs_sidh_cln16_first_4_isog(oqs_sidh_cln16_point_proj_t P, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t Aout, oqs_sidh_cln16_f2elm_t Cout, PCurveIsogenyStruct CurveIsogeny); + +// Tripling of a Montgomery point in projective coordinates (X:Z). +void oqs_sidh_cln16_xTPL(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A24, const oqs_sidh_cln16_f2elm_t C24); + +// Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. +void oqs_sidh_cln16_xTPLe(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_f2elm_t C, const int e); + +// Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings and collects a few intermediate multiples. +void oqs_sidh_cln16_xTPLe_collect(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, unsigned int left_bound, const unsigned int right_bound, const unsigned int *col, oqs_sidh_cln16_point_proj_t *pts, unsigned int *pts_index, unsigned int *npts); + +// Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. +void oqs_sidh_cln16_get_3_isog(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C); + +// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and a point P = (X:Z). +void oqs_sidh_cln16_eval_3_isog(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q); + +// 3-way simultaneous inversion +void oqs_sidh_cln16_inv_3_way(oqs_sidh_cln16_f2elm_t z1, oqs_sidh_cln16_f2elm_t z2, oqs_sidh_cln16_f2elm_t z3); + +// Computing the point D = (x(Q-P),z(Q-P)) +void oqs_sidh_cln16_distort_and_diff(const oqs_sidh_cln16_felm_t xP, oqs_sidh_cln16_point_proj_t d, PCurveIsogenyStruct CurveIsogeny); + +// Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. +void oqs_sidh_cln16_get_A(const oqs_sidh_cln16_f2elm_t xP, const oqs_sidh_cln16_f2elm_t xQ, const oqs_sidh_cln16_f2elm_t xR, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny); + +/************ Functions for compression *************/ + +// Produces points R1 and R2 as basis for E[2^372] +void oqs_sidh_cln16_generate_2_torsion_basis(const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_point_full_proj_t R1, oqs_sidh_cln16_point_full_proj_t R2, PCurveIsogenyStruct CurveIsogeny); + +// Produces points R1 and R2 as basis for E[3^239] +void oqs_sidh_cln16_generate_3_torsion_basis(oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_point_full_proj_t R1, oqs_sidh_cln16_point_full_proj_t R2, PCurveIsogenyStruct CurveIsogeny); + +// 2-torsion Tate pairing +void oqs_sidh_cln16_Tate_pairings_2_torsion(const oqs_sidh_cln16_point_t R1, const oqs_sidh_cln16_point_t R2, const oqs_sidh_cln16_point_t P, const oqs_sidh_cln16_point_t Q, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t *n, PCurveIsogenyStruct CurveIsogeny); + +// 3-torsion Tate pairing +void oqs_sidh_cln16_Tate_pairings_3_torsion(const oqs_sidh_cln16_point_t R1, const oqs_sidh_cln16_point_t R2, const oqs_sidh_cln16_point_t P, const oqs_sidh_cln16_point_t Q, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t *n, PCurveIsogenyStruct CurveIsogeny); + +// The Montgomery ladder, running in non constant-time +void oqs_sidh_cln16_Mont_ladder(const oqs_sidh_cln16_f2elm_t x, const digit_t *m, oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A24, const unsigned int order_bits, const unsigned int order_fullbits, PCurveIsogenyStruct CurveIsogeny); + +// General addition +void oqs_sidh_cln16_ADD(const oqs_sidh_cln16_point_full_proj_t P, const oqs_sidh_cln16_f2elm_t QX, const oqs_sidh_cln16_f2elm_t QY, const oqs_sidh_cln16_f2elm_t QZ, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_point_full_proj_t R); + +// 2-torsion Pohlig-Hellman function +void oqs_sidh_cln16_ph2(const oqs_sidh_cln16_point_t phiP, const oqs_sidh_cln16_point_t phiQ, const oqs_sidh_cln16_point_t PS, const oqs_sidh_cln16_point_t QS, const oqs_sidh_cln16_f2elm_t A, uint64_t *a0, uint64_t *b0, uint64_t *a1, uint64_t *b1, PCurveIsogenyStruct CurveIsogeny); + +// Lookup table generation for 2-torsion PH +void oqs_sidh_cln16_build_LUTs(const oqs_sidh_cln16_f2elm_t u, oqs_sidh_cln16_f2elm_t *t_ori, oqs_sidh_cln16_f2elm_t *LUT, oqs_sidh_cln16_f2elm_t *LUT_0, oqs_sidh_cln16_f2elm_t *LUT_1, oqs_sidh_cln16_f2elm_t *LUT_3, const oqs_sidh_cln16_felm_t one); + +// Pohlig-Hellman for groups of 2-power order up to 2^6 +void oqs_sidh_cln16_phn1(const oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const uint64_t a, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_i); + +// Pohlig-Hellman for groups of 2-power order 2^21 +void oqs_sidh_cln16_phn5(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k); + +// Pohlig-Hellman for groups of 2-power order 2^84 +void oqs_sidh_cln16_phn21(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k); + +// Pohlig-Hellman for groups of 2-power order 2^372 +void oqs_sidh_cln16_phn84(oqs_sidh_cln16_f2elm_t r, const oqs_sidh_cln16_f2elm_t *t_ori, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_f2elm_t *LUT_3, const oqs_sidh_cln16_felm_t one, uint64_t *alpha); + +// 3-torsion Pohlig-Hellman function +void oqs_sidh_cln16_ph3(oqs_sidh_cln16_point_t phiP, oqs_sidh_cln16_point_t phiQ, oqs_sidh_cln16_point_t PS, oqs_sidh_cln16_point_t QS, oqs_sidh_cln16_f2elm_t A, uint64_t *a0, uint64_t *b0, uint64_t *a1, uint64_t *b1, PCurveIsogenyStruct CurveIsogeny); + +// Lookup table generation for 3-torsion PH +void oqs_sidh_cln16_build_LUTs_3(oqs_sidh_cln16_f2elm_t g, oqs_sidh_cln16_f2elm_t *t_ori, oqs_sidh_cln16_f2elm_t *LUT, oqs_sidh_cln16_f2elm_t *LUT_0, oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one); + +// Pohlig-Hellman for groups of 3-power order up to 3^2 or 3^3 +void oqs_sidh_cln16_phn1_3(const oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const uint64_t a, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_i); + +// Pohlig-Hellman for groups of 3-power order up 3^15 +void oqs_sidh_cln16_phn3(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k); + +// Pohlig-Hellman for groups of 3-power order up 3^56 +void oqs_sidh_cln16_phn15_1(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k); + +// Pohlig-Hellman for groups of 3-power order up 3^61 +void oqs_sidh_cln16_phn15(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k); + +// Pohlig-Hellman for groups of 3-power order up 3^239 +void oqs_sidh_cln16_phn61(oqs_sidh_cln16_f2elm_t r, oqs_sidh_cln16_f2elm_t *t_ori, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha); + +// Recover the y-coordinates of the public key +void oqs_sidh_cln16_recover_y(const oqs_sidh_cln16_publickey_t PK, oqs_sidh_cln16_point_full_proj_t phiP, oqs_sidh_cln16_point_full_proj_t phiQ, oqs_sidh_cln16_point_full_proj_t phiX, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny); + +// Computes the input modulo 3. The input is assumed to be SIDH_NWORDS_ORDER long +unsigned int oqs_sidh_cln16_mod3(digit_t *a); + +// Computes R+aS +void oqs_sidh_cln16_mont_twodim_scalarmult(digit_t *a, const oqs_sidh_cln16_point_t R, const oqs_sidh_cln16_point_t S, const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_f2elm_t A24, oqs_sidh_cln16_point_full_proj_t P, PCurveIsogenyStruct CurveIsogeny); + +void oqs_sidh_cln16_compress_2_torsion(const unsigned char *PublicKeyA, unsigned char *CompressedPKA, uint64_t *a0, uint64_t *b0, uint64_t *a1, uint64_t *b1, oqs_sidh_cln16_point_t R1, oqs_sidh_cln16_point_t R2, PCurveIsogenyStruct CurveIsogeny); +void oqs_sidh_cln16_compress_3_torsion(const unsigned char *PublicKeyA, unsigned char *CompressedPKA, uint64_t *a0, uint64_t *b0, uint64_t *a1, uint64_t *b1, oqs_sidh_cln16_point_t R1, oqs_sidh_cln16_point_t R2, PCurveIsogenyStruct CurveIsogeny); +void oqs_sidh_cln16_decompress_2_torsion(const unsigned char *SecretKey, const unsigned char *CompressedPKA, oqs_sidh_cln16_point_proj_t R, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny); +void oqs_sidh_cln16_decompress_3_torsion(const unsigned char *SecretKey, const unsigned char *CompressedPKA, oqs_sidh_cln16_point_proj_t R, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/crypt/liboqs/kex_sidh_cln16/SIDH_setup.c b/crypt/liboqs/kex_sidh_cln16/SIDH_setup.c new file mode 100644 index 0000000000000000000000000000000000000000..1a61e4dde893bfd6b31cd2738cecc7ac66b9d0b5 --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/SIDH_setup.c @@ -0,0 +1,211 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral +* Diffie-Hellman key exchange. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: functions for initialization and getting randomness +* +*********************************************************************************************/ + +#include "SIDH_internal.h" +#include <stdlib.h> + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_curve_initialize(PCurveIsogenyStruct pCurveIsogeny, PCurveIsogenyStaticData pCurveIsogenyData) { // Initialize curve isogeny structure pCurveIsogeny with static data extracted from pCurveIsogenyData. + // This needs to be called after allocating memory for "pCurveIsogeny" using SIDH_curve_allocate(). + unsigned int i, pwords, owords; + + if (oqs_sidh_cln16_is_CurveIsogenyStruct_null(pCurveIsogeny)) { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + for (i = 0; i < 8; i++) { // Copy 8-character identifier + pCurveIsogeny->CurveIsogeny[i] = pCurveIsogenyData->CurveIsogeny[i]; + } + pCurveIsogeny->pwordbits = pCurveIsogenyData->pwordbits; + pCurveIsogeny->owordbits = pCurveIsogenyData->owordbits; + pCurveIsogeny->pbits = pCurveIsogenyData->pbits; + pCurveIsogeny->oAbits = pCurveIsogenyData->oAbits; + pCurveIsogeny->oBbits = pCurveIsogenyData->oBbits; + pCurveIsogeny->eB = pCurveIsogenyData->eB; + pCurveIsogeny->BigMont_A24 = pCurveIsogenyData->BigMont_A24; + + pwords = (pCurveIsogeny->pwordbits + RADIX - 1) / RADIX; + owords = (pCurveIsogeny->owordbits + RADIX - 1) / RADIX; + oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->prime, pCurveIsogeny->prime, pwords); + oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->A, pCurveIsogeny->A, pwords); + oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->C, pCurveIsogeny->C, pwords); + oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->Aorder, pCurveIsogeny->Aorder, owords); + oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->Border, pCurveIsogeny->Border, owords); + oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->PA, pCurveIsogeny->PA, 2 * pwords); + oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->PB, pCurveIsogeny->PB, 2 * pwords); + oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->BigMont_order, pCurveIsogeny->BigMont_order, pwords); + oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->Montgomery_R2, pCurveIsogeny->Montgomery_R2, pwords); + oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->Montgomery_pp, pCurveIsogeny->Montgomery_pp, pwords); + oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->Montgomery_one, pCurveIsogeny->Montgomery_one, pwords); + + return SIDH_CRYPTO_SUCCESS; +} + +PCurveIsogenyStruct oqs_sidh_cln16_curve_allocate(PCurveIsogenyStaticData CurveData) { // Dynamic allocation of memory for curve isogeny structure. + // Returns NULL on error. + digit_t pbytes = (CurveData->pwordbits + 7) / 8; + digit_t obytes = (CurveData->owordbits + 7) / 8; + PCurveIsogenyStruct pCurveIsogeny = NULL; + + pCurveIsogeny = (PCurveIsogenyStruct) calloc(1, sizeof(CurveIsogenyStruct)); + pCurveIsogeny->prime = (digit_t *) calloc(1, pbytes); + pCurveIsogeny->A = (digit_t *) calloc(1, pbytes); + pCurveIsogeny->C = (digit_t *) calloc(1, pbytes); + pCurveIsogeny->Aorder = (digit_t *) calloc(1, obytes); + pCurveIsogeny->Border = (digit_t *) calloc(1, obytes); + pCurveIsogeny->PA = (digit_t *) calloc(1, 2 * pbytes); + pCurveIsogeny->PB = (digit_t *) calloc(1, 2 * pbytes); + pCurveIsogeny->BigMont_order = (digit_t *) calloc(1, pbytes); + pCurveIsogeny->Montgomery_R2 = (digit_t *) calloc(1, pbytes); + pCurveIsogeny->Montgomery_pp = (digit_t *) calloc(1, pbytes); + pCurveIsogeny->Montgomery_one = (digit_t *) calloc(1, pbytes); + + return pCurveIsogeny; +} + +void oqs_sidh_cln16_curve_free(PCurveIsogenyStruct pCurveIsogeny) { // Free memory for curve isogeny structure + + if (pCurveIsogeny != NULL) { + if (pCurveIsogeny->prime != NULL) + free(pCurveIsogeny->prime); + if (pCurveIsogeny->A != NULL) + free(pCurveIsogeny->A); + if (pCurveIsogeny->C != NULL) + free(pCurveIsogeny->C); + if (pCurveIsogeny->Aorder != NULL) + free(pCurveIsogeny->Aorder); + if (pCurveIsogeny->Border != NULL) + free(pCurveIsogeny->Border); + if (pCurveIsogeny->PA != NULL) + free(pCurveIsogeny->PA); + if (pCurveIsogeny->PB != NULL) + free(pCurveIsogeny->PB); + if (pCurveIsogeny->BigMont_order != NULL) + free(pCurveIsogeny->BigMont_order); + if (pCurveIsogeny->Montgomery_R2 != NULL) + free(pCurveIsogeny->Montgomery_R2); + if (pCurveIsogeny->Montgomery_pp != NULL) + free(pCurveIsogeny->Montgomery_pp); + if (pCurveIsogeny->Montgomery_one != NULL) + free(pCurveIsogeny->Montgomery_one); + + free(pCurveIsogeny); + } +} + +bool oqs_sidh_cln16_is_CurveIsogenyStruct_null(PCurveIsogenyStruct pCurveIsogeny) { // Check if curve isogeny structure is NULL + + if (pCurveIsogeny == NULL || pCurveIsogeny->prime == NULL || pCurveIsogeny->A == NULL || pCurveIsogeny->C == NULL || pCurveIsogeny->Aorder == NULL || pCurveIsogeny->Border == NULL || + pCurveIsogeny->PA == NULL || pCurveIsogeny->PB == NULL || pCurveIsogeny->BigMont_order == NULL || pCurveIsogeny->Montgomery_R2 == NULL || pCurveIsogeny->Montgomery_pp == NULL || + pCurveIsogeny->Montgomery_one == NULL) { + return true; + } + return false; +} + +const uint64_t Border_div3[SIDH_NWORDS_ORDER] = {0xEDCD718A828384F9, 0x733B35BFD4427A14, 0xF88229CF94D7CF38, 0x63C56C990C7C2AD6, 0xB858A87E8F4222C7, 0x254C9C6B525EAF5}; + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_random_mod_order(digit_t *random_digits, unsigned int AliceOrBob, PCurveIsogenyStruct pCurveIsogeny, OQS_RAND *rand) { // Output random values in the range [1, order-1] in little endian format that can be used as private keys. + // It makes requests of random values with length "oAbits" (when AliceOrBob = 0) or "oBbits" (when AliceOrBob = 1) to the "random_bytes" function. + // The process repeats until random value is in [0, Aorder-2] ([0, Border-2], resp.). + // If successful, the output is given in "random_digits" in the range [1, Aorder-1] ([1, Border-1], resp.). + unsigned int ntry = 0, nbytes, nwords; + digit_t t1[SIDH_MAXWORDS_ORDER] = {0}, order2[SIDH_MAXWORDS_ORDER] = {0}; + unsigned char mask; + SIDH_CRYPTO_STATUS Status = SIDH_CRYPTO_SUCCESS; + + if (random_digits == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(pCurveIsogeny) || AliceOrBob > 1) { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + oqs_sidh_cln16_clear_words((void *) random_digits, SIDH_MAXWORDS_ORDER); + t1[0] = 2; + if (AliceOrBob == SIDH_ALICE) { + nbytes = (pCurveIsogeny->oAbits + 7) / 8; // Number of random bytes to be requested + nwords = NBITS_TO_NWORDS(pCurveIsogeny->oAbits); + mask = 0x07; // Value for masking last random byte + oqs_sidh_cln16_copy_words(pCurveIsogeny->Aorder, order2, nwords); + oqs_sidh_cln16_mp_shiftr1(order2, nwords); // order/2 + oqs_sidh_cln16_mp_sub(order2, t1, order2, nwords); // order2 = order/2-2 + } else { + nbytes = (pCurveIsogeny->oBbits + 7) / 8; + nwords = NBITS_TO_NWORDS(pCurveIsogeny->oBbits); + mask = 0x03; // Value for masking last random byte + oqs_sidh_cln16_mp_sub((digit_t *) Border_div3, t1, order2, nwords); // order2 = order/3-2 + } + + do { + ntry++; + if (ntry > 100) { // Max. 100 iterations to obtain random value in [0, order-2] + return SIDH_CRYPTO_ERROR_TOO_MANY_ITERATIONS; + } + rand->rand_n(rand, (uint8_t *) random_digits, nbytes); + ((unsigned char *) random_digits)[nbytes - 1] &= mask; // Masking last byte + } while (oqs_sidh_cln16_mp_sub(order2, random_digits, t1, nwords) == 1); + + oqs_sidh_cln16_clear_words((void *) t1, SIDH_MAXWORDS_ORDER); + t1[0] = 1; + oqs_sidh_cln16_mp_add(random_digits, t1, random_digits, nwords); + oqs_sidh_cln16_copy_words(random_digits, t1, nwords); + oqs_sidh_cln16_mp_shiftl1(random_digits, nwords); // Alice's output in the range [2, order-2] + if (AliceOrBob == SIDH_BOB) { + oqs_sidh_cln16_mp_add(random_digits, t1, random_digits, nwords); // Bob's output in the range [3, order-3] + } + + return Status; +} + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_random_BigMont_mod_order(digit_t *random_digits, PCurveIsogenyStruct pCurveIsogeny, OQS_RAND *rand) { // Output random values in the range [1, BigMont_order-1] in little endian format that can be used as private keys to compute scalar multiplications + // using the elliptic curve BigMont. + // It makes requests of random values with length "BIGMONT_SIDH_SIDH_NBITS_ORDER" to the "random_bytes" function. + // The process repeats until random value is in [0, BigMont_order-2] + // If successful, the output is given in "random_digits" in the range [1, BigMont_order-1]. + // The "random_bytes" function, which is passed through the curve isogeny structure PCurveIsogeny, should be set up in advance using SIDH_curve_initialize(). + // The caller is responsible of providing the "random_bytes" function passing random values as octets. + unsigned int ntry = 0, nbytes = (BIGMONT_SIDH_SIDH_NBITS_ORDER + 7) / 8, nwords = NBITS_TO_NWORDS(BIGMONT_SIDH_SIDH_NBITS_ORDER); + digit_t t1[BIGMONT_MAXWORDS_ORDER] = {0}, order2[BIGMONT_MAXWORDS_ORDER] = {0}; + unsigned char mask; + SIDH_CRYPTO_STATUS Status = SIDH_CRYPTO_SUCCESS; + + if (random_digits == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(pCurveIsogeny)) { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + oqs_sidh_cln16_clear_words((void *) random_digits, BIGMONT_MAXWORDS_ORDER); + t1[0] = 2; + mask = (unsigned char) (8 * nbytes - BIGMONT_SIDH_SIDH_NBITS_ORDER); + oqs_sidh_cln16_mp_sub(pCurveIsogeny->BigMont_order, t1, order2, nwords); // order2 = order-2 + mask = ((unsigned char) -1 >> mask); // Value for masking last random byte + + do { + ntry++; + if (ntry > 100) { // Max. 100 iterations to obtain random value in [0, order-2] + return SIDH_CRYPTO_ERROR_TOO_MANY_ITERATIONS; + } + rand->rand_n(rand, (uint8_t *) random_digits, nbytes); + ((unsigned char *) random_digits)[nbytes - 1] &= mask; // Masking last byte + } while (oqs_sidh_cln16_mp_sub(order2, random_digits, t1, nwords) == 1); + + oqs_sidh_cln16_clear_words((void *) t1, BIGMONT_MAXWORDS_ORDER); + t1[0] = 1; + oqs_sidh_cln16_mp_add(random_digits, t1, random_digits, nwords); // Output in the range [1, order-1] + + return Status; +} + +void oqs_sidh_cln16_clear_words(void *mem, digit_t nwords) { // Clear digits from memory. "nwords" indicates the number of digits to be zeroed. + // This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing. + unsigned int i; + volatile digit_t *v = mem; + + for (i = 0; i < nwords; i++) { + v[i] = 0; + } +} diff --git a/crypt/liboqs/kex_sidh_cln16/ec_isogeny.c b/crypt/liboqs/kex_sidh_cln16/ec_isogeny.c new file mode 100644 index 0000000000000000000000000000000000000000..bdb2626c32f77ea0ee970b7d98caa05c202f6dc7 --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/ec_isogeny.c @@ -0,0 +1,2156 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for Diffie-Hellman key +* key exchange. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: elliptic curve and isogeny functions +* +*********************************************************************************************/ + +#include "SIDH_internal.h" +#include <math.h> + +extern const uint64_t LIST[22][SIDH_NWORDS64_FIELD]; + +void oqs_sidh_cln16_j_inv(const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_f2elm_t C, oqs_sidh_cln16_f2elm_t jinv) { // Computes the j-invariant of a Montgomery curve with projective constant. + // Input: A,C in GF(p^2). + // Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x. + oqs_sidh_cln16_f2elm_t t0, t1; + + oqs_sidh_cln16_fp2sqr751_mont(A, jinv); // jinv = A^2 + oqs_sidh_cln16_fp2sqr751_mont(C, t1); // t1 = C^2 + oqs_sidh_cln16_fp2add751(t1, t1, t0); // t0 = t1+t1 + oqs_sidh_cln16_fp2sub751(jinv, t0, t0); // t0 = jinv-t0 + oqs_sidh_cln16_fp2sub751(t0, t1, t0); // t0 = t0-t1 + oqs_sidh_cln16_fp2sub751(t0, t1, jinv); // jinv = t0-t1 + oqs_sidh_cln16_fp2sqr751_mont(t1, t1); // t1 = t1^2 + oqs_sidh_cln16_fp2mul751_mont(jinv, t1, jinv); // jinv = jinv*t1 + oqs_sidh_cln16_fp2add751(t0, t0, t0); // t0 = t0+t0 + oqs_sidh_cln16_fp2add751(t0, t0, t0); // t0 = t0+t0 + oqs_sidh_cln16_fp2sqr751_mont(t0, t1); // t1 = t0^2 + oqs_sidh_cln16_fp2mul751_mont(t0, t1, t0); // t0 = t0*t1 + oqs_sidh_cln16_fp2add751(t0, t0, t0); // t0 = t0+t0 + oqs_sidh_cln16_fp2add751(t0, t0, t0); // t0 = t0+t0 + oqs_sidh_cln16_fp2inv751_mont(jinv); // jinv = 1/jinv + oqs_sidh_cln16_fp2mul751_mont(jinv, t0, jinv); // jinv = t0*jinv +} + +void oqs_sidh_cln16_xDBLADD(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t xPQ, const oqs_sidh_cln16_f2elm_t A24) { // Simultaneous doubling and differential addition. + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4. + // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. + oqs_sidh_cln16_f2elm_t t0, t1, t2; + + oqs_sidh_cln16_fp2add751(P->X, P->Z, t0); // t0 = XP+ZP + oqs_sidh_cln16_fp2sub751(P->X, P->Z, t1); // t1 = XP-ZP + oqs_sidh_cln16_fp2sqr751_mont(t0, P->X); // XP = (XP+ZP)^2 + oqs_sidh_cln16_fp2sub751(Q->X, Q->Z, t2); // t2 = XQ-ZQ + oqs_sidh_cln16_fp2add751(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ + oqs_sidh_cln16_fp2mul751_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) + oqs_sidh_cln16_fp2sqr751_mont(t1, P->Z); // ZP = (XP-ZP)^2 + oqs_sidh_cln16_fp2mul751_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fp2sub751(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 + oqs_sidh_cln16_fp2mul751_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 + oqs_sidh_cln16_fp2mul751_mont(t2, A24, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] + oqs_sidh_cln16_fp2sub751(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fp2add751(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2 + oqs_sidh_cln16_fp2add751(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fp2mul751_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] + oqs_sidh_cln16_fp2sqr751_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + oqs_sidh_cln16_fp2sqr751_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 + oqs_sidh_cln16_fp2mul751_mont(Q->Z, xPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 +} + +void oqs_sidh_cln16_xDBL(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A24, const oqs_sidh_cln16_f2elm_t C24) { // Doubling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constant A24/C24=(A/C+2)/4. + // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2). + oqs_sidh_cln16_f2elm_t t0, t1; + + oqs_sidh_cln16_fp2sub751(P->X, P->Z, t0); // t0 = X1-Z1 + oqs_sidh_cln16_fp2add751(P->X, P->Z, t1); // t1 = X1+Z1 + oqs_sidh_cln16_fp2sqr751_mont(t0, t0); // t0 = (X1-Z1)^2 + oqs_sidh_cln16_fp2sqr751_mont(t1, t1); // t1 = (X1+Z1)^2 + oqs_sidh_cln16_fp2mul751_mont(C24, t0, Q->Z); // Z2 = C24*(X1-Z1)^2 + oqs_sidh_cln16_fp2mul751_mont(t1, Q->Z, Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2 + oqs_sidh_cln16_fp2sub751(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2 + oqs_sidh_cln16_fp2mul751_mont(A24, t1, t0); // t0 = A24*[(X1+Z1)^2-(X1-Z1)^2] + oqs_sidh_cln16_fp2add751(Q->Z, t0, Q->Z); // Z2 = A24*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2 + oqs_sidh_cln16_fp2mul751_mont(Q->Z, t1, Q->Z); // Z2 = [A24*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2] +} + +void oqs_sidh_cln16_xDBLe(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_f2elm_t C, const int e) { // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. + // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constant A/C. + // Output: projective Montgomery x-coordinates Q <- (2^e)*P. + oqs_sidh_cln16_f2elm_t A24num, A24den; + int i; + + oqs_sidh_cln16_fp2add751(C, C, A24num); + oqs_sidh_cln16_fp2add751(A24num, A24num, A24den); + oqs_sidh_cln16_fp2add751(A24num, A, A24num); + oqs_sidh_cln16_copy_words((digit_t *) P, (digit_t *) Q, 2 * 2 * NWORDS_FIELD); + + for (i = 0; i < e; i++) { + oqs_sidh_cln16_xDBL(Q, Q, A24num, A24den); + } +} + +void oqs_sidh_cln16_xADD(oqs_sidh_cln16_point_proj_t P, const oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t xPQ) { // Differential addition. + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, and affine difference xPQ=x(P-Q). + // Output: projective Montgomery point P <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. + oqs_sidh_cln16_f2elm_t t0, t1; + + oqs_sidh_cln16_fp2add751(P->X, P->Z, t0); // t0 = XP+ZP + oqs_sidh_cln16_fp2sub751(P->X, P->Z, t1); // t1 = XP-ZP + oqs_sidh_cln16_fp2sub751(Q->X, Q->Z, P->X); // XP = XQ-ZQ + oqs_sidh_cln16_fp2add751(Q->X, Q->Z, P->Z); // ZP = XQ+ZQ + oqs_sidh_cln16_fp2mul751_mont(t0, P->X, t0); // t0 = (XP+ZP)*(XQ-ZQ) + oqs_sidh_cln16_fp2mul751_mont(t1, P->Z, t1); // t1 = (XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fp2sub751(t0, t1, P->Z); // ZP = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fp2add751(t0, t1, P->X); // XP = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fp2sqr751_mont(P->Z, P->Z); // ZP = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + oqs_sidh_cln16_fp2sqr751_mont(P->X, P->X); // XP = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 + oqs_sidh_cln16_fp2mul751_mont(P->Z, xPQ, P->Z); // ZP = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 +} + +void oqs_sidh_cln16_xDBL_basefield(const oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q) { // Doubling of a Montgomery point in projective coordinates (X:Z) over the base field. + // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constant A24/C24=(A/C+2)/4. + // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2). + oqs_sidh_cln16_felm_t t0, t1; + + // NOTE: this function is fixed for A24=1, C24=2 + + oqs_sidh_cln16_fpsub751(P->X, P->Z, t0); // t0 = X1-Z1 + oqs_sidh_cln16_fpadd751(P->X, P->Z, t1); // t1 = X1+Z1 + oqs_sidh_cln16_fpsqr751_mont(t0, t0); // t0 = (X1-Z1)^2 + oqs_sidh_cln16_fpsqr751_mont(t1, t1); // t1 = (X1+Z1)^2 + oqs_sidh_cln16_fpadd751(t0, t0, Q->Z); // Z2 = C24*(X1-Z1)^2 + oqs_sidh_cln16_fpmul751_mont(t1, Q->Z, Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2 + oqs_sidh_cln16_fpsub751(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2 + oqs_sidh_cln16_fpadd751(Q->Z, t1, Q->Z); // Z2 = A24*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2 + oqs_sidh_cln16_fpmul751_mont(Q->Z, t1, Q->Z); // Z2 = [A24*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2] +} + +void oqs_sidh_cln16_xDBLADD_basefield(oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, const oqs_sidh_cln16_felm_t xPQ, const oqs_sidh_cln16_felm_t A24) { // Simultaneous doubling and differential addition over the base field. + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4. + // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. + oqs_sidh_cln16_felm_t t0, t1, t2; + + // NOTE: this function is fixed for C24=2 + + oqs_sidh_cln16_fpadd751(P->X, P->Z, t0); // t0 = XP+ZP + oqs_sidh_cln16_fpsub751(P->X, P->Z, t1); // t1 = XP-ZP + oqs_sidh_cln16_fpsqr751_mont(t0, P->X); // XP = (XP+ZP)^2 + oqs_sidh_cln16_fpsub751(Q->X, Q->Z, t2); // t2 = XQ-ZQ + oqs_sidh_cln16_fpadd751(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ + oqs_sidh_cln16_fpmul751_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) + oqs_sidh_cln16_fpsqr751_mont(t1, P->Z); // ZP = (XP-ZP)^2 + oqs_sidh_cln16_fpmul751_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fpsub751(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 + + if (A24[0] == 1) { + oqs_sidh_cln16_fpadd751(P->Z, P->Z, P->Z); // ZP = C24*(XP-ZP)^2 + oqs_sidh_cln16_fpmul751_mont(P->X, P->Z, P->X); // XP = C24*(XP+ZP)^2*(XP-ZP)^2 + oqs_sidh_cln16_fpadd751(t2, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+C24*(XP-ZP)^2 + } else { + oqs_sidh_cln16_fpmul751_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 + oqs_sidh_cln16_fpmul751_mont(A24, t2, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] + oqs_sidh_cln16_fpadd751(P->Z, Q->X, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+C24*(XP-ZP)^2 + } + + oqs_sidh_cln16_fpsub751(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fpadd751(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fpmul751_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+C24*(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] + oqs_sidh_cln16_fpsqr751_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + oqs_sidh_cln16_fpsqr751_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 + oqs_sidh_cln16_fpmul751_mont(Q->Z, xPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 +} + +void oqs_sidh_cln16_ladder(const oqs_sidh_cln16_felm_t x, digit_t *m, oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, const oqs_sidh_cln16_felm_t A24, const unsigned int order_bits, const unsigned int order_fullbits, PCurveIsogenyStruct CurveIsogeny) { // The Montgomery ladder + // Inputs: the affine x-coordinate of a point P on E: B*y^2=x^3+A*x^2+x, + // scalar m + // curve constant A24 = (A+2)/4 + // order_bits = subgroup order bitlength + // order_fullbits = smallest multiple of 32 larger than the order bitlength + // Output: Q = m*(x:1) + // CurveIsogeny must be set up in advance using SIDH_curve_initialize(). + unsigned int bit = 0, owords = NBITS_TO_NWORDS(order_fullbits); + digit_t mask; + int i; + + // Initializing with the points (1:0) and (x:1) + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, (digit_t *) P->X); + oqs_sidh_cln16_fpzero751(P->Z); + oqs_sidh_cln16_fpcopy751(x, Q->X); + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, (digit_t *) Q->Z); + + for (i = order_fullbits - order_bits; i > 0; i--) { + oqs_sidh_cln16_mp_shiftl1(m, owords); + } + + for (i = order_bits; i > 0; i--) { + bit = (unsigned int) (m[owords - 1] >> (RADIX - 1)); + oqs_sidh_cln16_mp_shiftl1(m, owords); + mask = 0 - (digit_t) bit; + + oqs_sidh_cln16_swap_points_basefield(P, Q, mask); + oqs_sidh_cln16_xDBLADD_basefield(P, Q, x, A24); // If bit=0 then P <- 2*P and Q <- P+Q, + oqs_sidh_cln16_swap_points_basefield(P, Q, mask); // else if bit=1 then Q <- 2*Q and P <- P+Q + } +} + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_BigMont_ladder(unsigned char *x, digit_t *m, unsigned char *xout, PCurveIsogenyStruct CurveIsogeny) { // BigMont's scalar multiplication using the Montgomery ladder + // Inputs: x, the affine x-coordinate of a point P on BigMont: y^2=x^3+A*x^2+x, + // scalar m. + // Output: xout, the affine x-coordinate of m*(x:1) + // CurveIsogeny must be set up in advance using SIDH_curve_initialize(). + oqs_sidh_cln16_point_basefield_proj_t P1, P2; + digit_t scalar[BIGMONT_NWORDS_ORDER]; + oqs_sidh_cln16_felm_t X, A24 = {0}; + + A24[0] = (digit_t) CurveIsogeny->BigMont_A24; + oqs_sidh_cln16_to_mont(A24, A24); // Conversion to Montgomery representation + oqs_sidh_cln16_to_mont((digit_t *) x, X); + + oqs_sidh_cln16_copy_words(m, scalar, BIGMONT_NWORDS_ORDER); + oqs_sidh_cln16_ladder(X, scalar, P1, P2, A24, BIGMONT_SIDH_SIDH_NBITS_ORDER, BIGMONT_MAXBITS_ORDER, CurveIsogeny); + + oqs_sidh_cln16_fpinv751_mont(P1->Z); + oqs_sidh_cln16_fpmul751_mont(P1->X, P1->Z, (digit_t *) xout); + oqs_sidh_cln16_from_mont((digit_t *) xout, (digit_t *) xout); // Conversion to standard representation + + return SIDH_CRYPTO_SUCCESS; +} + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_secret_pt(const oqs_sidh_cln16_point_basefield_t P, const digit_t *m, const unsigned int AliceOrBob, oqs_sidh_cln16_point_proj_t R, PCurveIsogenyStruct CurveIsogeny) { // Computes key generation entirely in the base field by exploiting a 1-dimensional Montgomery ladder in the trace zero subgroup and + // recovering the y-coordinate for the addition. All operations in the base field GF(p). + // Input: The scalar m, point P = (x,y) on E in the base field subgroup and Q = (x1,y1*i) on E in the trace-zero subgroup. + // x,y,x1,y1 are all in the base field. + // Output: R = (RX0+RX1*i)/RZ0 (the x-coordinate of P+[m]Q). + unsigned int nbits; + oqs_sidh_cln16_point_basefield_t Q; + oqs_sidh_cln16_point_basefield_proj_t S, T; + digit_t *X0 = (digit_t *) S->X, *Z0 = (digit_t *) S->Z, *X1 = (digit_t *) T->X, *Z1 = (digit_t *) T->Z; + digit_t *x = (digit_t *) P->x, *y = (digit_t *) P->y, *x1 = (digit_t *) Q->x, *y1 = (digit_t *) Q->y; + digit_t scalar[SIDH_NWORDS_ORDER]; + oqs_sidh_cln16_felm_t t0, t1, t2, A24 = {0}; + digit_t *RX0 = (digit_t *) R->X[0], *RX1 = (digit_t *) R->X[1], *RZ0 = (digit_t *) R->Z[0], *RZ1 = (digit_t *) R->Z[1]; + + oqs_sidh_cln16_fpcopy751(P->x, Q->x); // Q = (-XP,YP) + oqs_sidh_cln16_fpcopy751(P->y, Q->y); + oqs_sidh_cln16_fpneg751(Q->x); + + if (AliceOrBob == SIDH_ALICE) { + nbits = CurveIsogeny->oAbits; + } else if (AliceOrBob == SIDH_BOB) { + nbits = CurveIsogeny->oBbits; + } else { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + // Setting curve constant to one (in standard representation), used in xDBLADD_basefield() in the ladder computation + A24[0] = 1; + oqs_sidh_cln16_copy_words(m, scalar, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_ladder(Q->x, scalar, S, T, A24, nbits, CurveIsogeny->owordbits, CurveIsogeny); + + //RX0 = (2*y*y1*Z0^2*Z1 + Z1*(X0*x1+Z0)*(X0+x1*Z0) - X1*(X0-x1*Z0)^2)*(2*y*y1*Z0^2*Z1 - Z1*(X0*x1+Z0)*(X0+x1*Z0) + X1*(X0-x1*Z0)^2) - 4*y1^2*Z0*Z1^2*(X0+x*Z0)*(X0-x*Z0)^2; + //RX1 = 4*y*y1*Z0^2*Z1*(Z1*(X0*x1+Z0)*(X0+x1*Z0) - X1*(X0-x1*Z0)^2); + //RZ0 = 4*y1^2*Z0^2*Z1^2*(X0-x*Z0)^2; + + oqs_sidh_cln16_fpmul751_mont(x1, Z0, RX1); + oqs_sidh_cln16_fpmul751_mont(X0, x1, RX0); + oqs_sidh_cln16_fpsub751(X0, RX1, t0); + oqs_sidh_cln16_fpadd751(X0, RX1, RX1); + oqs_sidh_cln16_fpsqr751_mont(t0, t0); + oqs_sidh_cln16_fpadd751(RX0, Z0, RX0); + oqs_sidh_cln16_fpmul751_mont(t0, X1, t0); + oqs_sidh_cln16_fpmul751_mont(RX0, RX1, RX0); + oqs_sidh_cln16_fpmul751_mont(y1, Z1, t2); + oqs_sidh_cln16_fpmul751_mont(y, Z0, t1); + oqs_sidh_cln16_fpadd751(t2, t2, t2); + oqs_sidh_cln16_fpmul751_mont(t2, Z0, RX1); + oqs_sidh_cln16_fpmul751_mont(RX0, Z1, RX0); + oqs_sidh_cln16_fpsub751(RX0, t0, RX0); + oqs_sidh_cln16_fpmul751_mont(t1, RX1, t1); + oqs_sidh_cln16_fpsqr751_mont(RX1, t0); + oqs_sidh_cln16_fpmul751_mont(t2, RX1, t2); + oqs_sidh_cln16_fpmul751_mont(t1, RX0, RX1); + oqs_sidh_cln16_fpadd751(t1, RX0, RZ0); + oqs_sidh_cln16_fpadd751(RX1, RX1, RX1); + oqs_sidh_cln16_fpsub751(t1, RX0, t1); + oqs_sidh_cln16_fpmul751_mont(x, Z0, RX0); + oqs_sidh_cln16_fpmul751_mont(t1, RZ0, t1); + oqs_sidh_cln16_fpsub751(X0, RX0, RZ0); + oqs_sidh_cln16_fpadd751(X0, RX0, RX0); + oqs_sidh_cln16_fpsqr751_mont(RZ0, RZ0); + oqs_sidh_cln16_fpmul751_mont(t2, RX0, t2); + oqs_sidh_cln16_fpmul751_mont(t2, RZ0, t2); + oqs_sidh_cln16_fpmul751_mont(RZ0, t0, RZ0); + oqs_sidh_cln16_fpsub751(t1, t2, RX0); + oqs_sidh_cln16_fpzero751(RZ1); + + return SIDH_CRYPTO_SUCCESS; +} + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_ladder_3_pt(const oqs_sidh_cln16_f2elm_t xP, const oqs_sidh_cln16_f2elm_t xQ, const oqs_sidh_cln16_f2elm_t xPQ, const digit_t *m, const unsigned int AliceOrBob, oqs_sidh_cln16_point_proj_t W, const oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny) { // Computes P+[m]Q via x-only arithmetic. Algorithm by De Feo, Jao and Plut. + // Input: three affine points xP,xQ,xPQ and Montgomery constant A. + // Output: projective Montgomery x-coordinates of x(P+[m]Q)=WX/WZ + oqs_sidh_cln16_point_proj_t U = {0}, V = {0}; + oqs_sidh_cln16_f2elm_t A24, A24num, constant1 = {0}, constant2; + oqs_sidh_cln16_felm_t temp_scalar; + unsigned int bit = 0, nbits, fullbits = CurveIsogeny->owordbits; + digit_t mask; + int i; + + if (AliceOrBob == SIDH_ALICE) { + nbits = CurveIsogeny->oAbits; + } else if (AliceOrBob == SIDH_BOB) { + nbits = CurveIsogeny->oBbits; + } else { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, constant1[0]); + oqs_sidh_cln16_fp2add751(constant1, constant1, constant1); // constant = 2 + oqs_sidh_cln16_fp2add751(A, constant1, A24num); + oqs_sidh_cln16_fp2div2_751(A24num, A24); + oqs_sidh_cln16_fp2div2_751(A24, A24); + + // Initializing with the points (1:0), (xQ:1) and (xP:1) + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, (digit_t *) U->X); + oqs_sidh_cln16_fp2copy751(xQ, V->X); + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, (digit_t *) V->Z); + oqs_sidh_cln16_fp2copy751(xP, W->X); + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, (digit_t *) W->Z); + oqs_sidh_cln16_fpzero751(W->Z[1]); + oqs_sidh_cln16_copy_words(m, temp_scalar, SIDH_NWORDS_ORDER); + + for (i = fullbits - nbits; i > 0; i--) { + oqs_sidh_cln16_mp_shiftl1(temp_scalar, SIDH_NWORDS_ORDER); + } + + for (i = nbits; i > 0; i--) { + bit = (unsigned int) (temp_scalar[SIDH_NWORDS_ORDER - 1] >> (RADIX - 1)); + oqs_sidh_cln16_mp_shiftl1(temp_scalar, SIDH_NWORDS_ORDER); + mask = 0 - (digit_t) bit; + + oqs_sidh_cln16_swap_points(W, U, mask); + oqs_sidh_cln16_swap_points(U, V, mask); + oqs_sidh_cln16_select_f2elm(xP, xQ, constant1, mask); + oqs_sidh_cln16_select_f2elm(xQ, xPQ, constant2, mask); + oqs_sidh_cln16_xADD(W, U, constant1); // If bit=0 then W <- W+U, U <- 2*U and V <- U+V, + oqs_sidh_cln16_xDBLADD(U, V, constant2, A24); // else if bit=1 then U <- U+V, V <- 2*V and W <- V+W + oqs_sidh_cln16_swap_points(U, V, mask); + oqs_sidh_cln16_swap_points(W, U, mask); + } + + return SIDH_CRYPTO_SUCCESS; +} + +void oqs_sidh_cln16_get_4_isog(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, oqs_sidh_cln16_f2elm_t *coeff) { // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. + // Input: projective point of order four P = (X4:Z4). + // Output: the 4-isogenous Montgomery curve with projective coefficient A/C and the 5 coefficients + // that are used to evaluate the isogeny at a point in eval_4_isog(). + + oqs_sidh_cln16_fp2add751(P->X, P->Z, coeff[0]); // coeff[0] = X4+Z4 + oqs_sidh_cln16_fp2sqr751_mont(P->X, coeff[3]); // coeff[3] = X4^2 + oqs_sidh_cln16_fp2sqr751_mont(P->Z, coeff[4]); // coeff[4] = Z4^2 + oqs_sidh_cln16_fp2sqr751_mont(coeff[0], coeff[0]); // coeff[0] = (X4+Z4)^2 + oqs_sidh_cln16_fp2add751(coeff[3], coeff[4], coeff[1]); // coeff[1] = X4^2+Z4^2 + oqs_sidh_cln16_fp2sub751(coeff[3], coeff[4], coeff[2]); // coeff[2] = X4^2-Z4^2 + oqs_sidh_cln16_fp2sqr751_mont(coeff[3], coeff[3]); // coeff[3] = X4^4 + oqs_sidh_cln16_fp2sqr751_mont(coeff[4], coeff[4]); // coeff[4] = Z4^4 + oqs_sidh_cln16_fp2add751(coeff[3], coeff[3], A); // A = 2*X4^4 + oqs_sidh_cln16_fp2sub751(coeff[0], coeff[1], coeff[0]); // coeff[0] = 2*X4*Z4 = (X4+Z4)^2 - (X4^2+Z4^2) + oqs_sidh_cln16_fp2sub751(A, coeff[4], A); // A = 2*X4^4-Z4^4 + oqs_sidh_cln16_fp2copy751(coeff[4], C); // C = Z4^4 + oqs_sidh_cln16_fp2add751(A, A, A); // A = 2(2*X4^4-Z4^4) +} + +void oqs_sidh_cln16_eval_4_isog(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t *coeff) { // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined + // by the 5 coefficients in coeff (computed in the function four_isogeny_from_projective_kernel()). + // Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z). + // Output: the projective point P = phi(P) = (X:Z) in the codomain. + oqs_sidh_cln16_f2elm_t t0, t1; + + oqs_sidh_cln16_fp2mul751_mont(P->X, coeff[0], P->X); // X = coeff[0]*X + oqs_sidh_cln16_fp2mul751_mont(P->Z, coeff[1], t0); // t0 = coeff[1]*Z + oqs_sidh_cln16_fp2sub751(P->X, t0, P->X); // X = X-t0 + oqs_sidh_cln16_fp2mul751_mont(P->Z, coeff[2], P->Z); // Z = coeff[2]*Z + oqs_sidh_cln16_fp2sub751(P->X, P->Z, t0); // t0 = X-Z + oqs_sidh_cln16_fp2mul751_mont(P->Z, P->X, P->Z); // Z = X*Z + oqs_sidh_cln16_fp2sqr751_mont(t0, t0); // t0 = t0^2 + oqs_sidh_cln16_fp2add751(P->Z, P->Z, P->Z); // Z = Z+Z + oqs_sidh_cln16_fp2add751(P->Z, P->Z, P->Z); // Z = Z+Z + oqs_sidh_cln16_fp2add751(P->Z, t0, P->X); // X = t0+Z + oqs_sidh_cln16_fp2mul751_mont(P->Z, t0, P->Z); // Z = t0*Z + oqs_sidh_cln16_fp2mul751_mont(P->Z, coeff[4], P->Z); // Z = coeff[4]*Z + oqs_sidh_cln16_fp2mul751_mont(t0, coeff[4], t0); // t0 = t0*coeff[4] + oqs_sidh_cln16_fp2mul751_mont(P->X, coeff[3], t1); // t1 = X*coeff[3] + oqs_sidh_cln16_fp2sub751(t0, t1, t0); // t0 = t0-t1 + oqs_sidh_cln16_fp2mul751_mont(P->X, t0, P->X); // X = X*t0 +} + +void oqs_sidh_cln16_first_4_isog(oqs_sidh_cln16_point_proj_t P, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t Aout, oqs_sidh_cln16_f2elm_t Cout, PCurveIsogenyStruct CurveIsogeny) { // Computes first 4-isogeny computed by Alice. + // Inputs: projective point P = (X4:Z4) and curve constant A. + // Output: the projective point P = (X4:Z4) in the codomain and isogenous curve constant Aout/Cout. + oqs_sidh_cln16_f2elm_t t0 = {0}, t1, t2; + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, t0[0]); + oqs_sidh_cln16_fpadd751(t0[0], t0[0], t0[0]); // t0 = 2 (in Montgomery domain) + oqs_sidh_cln16_fp2sub751(A, t0, Cout); // Cout = A-2 + oqs_sidh_cln16_fpadd751(t0[0], t0[0], t1[0]); + oqs_sidh_cln16_fpadd751(t0[0], t1[0], t0[0]); // t0 = 6 (in Montgomery domain) + oqs_sidh_cln16_fp2add751(P->X, P->Z, t1); // t1 = X+Z + oqs_sidh_cln16_fp2sub751(P->X, P->Z, t2); // t2 = X-Z + oqs_sidh_cln16_fp2sqr751_mont(t1, t1); // t1 = (X+Z)^2 + oqs_sidh_cln16_fp2add751(A, t0, Aout); // A = A+6 + oqs_sidh_cln16_fp2mul751_mont(P->X, P->Z, P->Z); // Z = X*Z + oqs_sidh_cln16_fp2neg751(P->Z); // Z = -X*Z + oqs_sidh_cln16_fp2sqr751_mont(t2, t2); // t2 = (X-Z)^2 + oqs_sidh_cln16_fp2mul751_mont(P->Z, Cout, P->Z); // Z = -C*X*Z + oqs_sidh_cln16_fp2add751(Aout, Aout, Aout); // Aout = 2*A+12 + oqs_sidh_cln16_fp2sub751(t1, P->Z, P->X); // X = (X+Z)^2+C*X*Z + oqs_sidh_cln16_fp2mul751_mont(P->Z, t2, P->Z); // Z = -C*X*Z*(X-Z)^2 + oqs_sidh_cln16_fp2mul751_mont(P->X, t1, P->X); // X = (X+Z)^2*[(X+Z)^2+C*X*Z] +} + +void oqs_sidh_cln16_xTPL(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A24, const oqs_sidh_cln16_f2elm_t C24) { // Tripling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constant A/C. + // Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3). + oqs_sidh_cln16_f2elm_t t0, t1, t2, t3, t4, t5; + + oqs_sidh_cln16_fp2sub751(P->X, P->Z, t2); // t2 = X-Z + oqs_sidh_cln16_fp2add751(P->X, P->Z, t3); // t3 = X+Z + oqs_sidh_cln16_fp2sqr751_mont(t2, t0); // t0 = t2^2 + oqs_sidh_cln16_fp2sqr751_mont(t3, t1); // t1 = t3^2 + oqs_sidh_cln16_fp2mul751_mont(t0, C24, t4); // t4 = C24*t0 + oqs_sidh_cln16_fp2mul751_mont(t1, t4, t5); // t5 = t4*t1 + oqs_sidh_cln16_fp2sub751(t1, t0, t1); // t1 = t1-t0 + oqs_sidh_cln16_fp2mul751_mont(A24, t1, t0); // t0 = A24*t1 + oqs_sidh_cln16_fp2add751(t4, t0, t4); // t4 = t4+t0 + oqs_sidh_cln16_fp2mul751_mont(t1, t4, t4); // t4 = t4*t1 + oqs_sidh_cln16_fp2add751(t5, t4, t0); // t0 = t5+t4 + oqs_sidh_cln16_fp2sub751(t5, t4, t1); // t1 = t5-t4 + oqs_sidh_cln16_fp2mul751_mont(t0, t2, t0); // t0 = t2*t0 + oqs_sidh_cln16_fp2mul751_mont(t1, t3, t1); // t1 = t3*t1 + oqs_sidh_cln16_fp2sub751(t0, t1, t4); // t4 = t0-t1 + oqs_sidh_cln16_fp2add751(t0, t1, t5); // t5 = t0+t1 + oqs_sidh_cln16_fp2sqr751_mont(t4, t4); // t4 = t4^2 + oqs_sidh_cln16_fp2sqr751_mont(t5, t5); // t5 = t5^2 + oqs_sidh_cln16_fp2mul751_mont(P->X, t4, t4); // t4 = X*t4 + oqs_sidh_cln16_fp2mul751_mont(P->Z, t5, Q->X); // X3 = Z*t5 + oqs_sidh_cln16_fp2copy751(t4, Q->Z); // Z3 = t4 +} + +void oqs_sidh_cln16_xTPLe(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_f2elm_t C, const int e) { // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. + // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constant A/C. + // Output: projective Montgomery x-coordinates Q <- (3^e)*P. + oqs_sidh_cln16_f2elm_t A24, C24; + int i; + + oqs_sidh_cln16_fp2add751(C, C, A24); + oqs_sidh_cln16_fp2add751(A24, A24, C24); + oqs_sidh_cln16_fp2add751(A24, A, A24); + oqs_sidh_cln16_copy_words((digit_t *) P, (digit_t *) Q, 2 * 2 * NWORDS_FIELD); + + for (i = 0; i < e; i++) { + oqs_sidh_cln16_xTPL(Q, Q, A24, C24); + } +} + +void oqs_sidh_cln16_get_3_isog(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C) { // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. + // Input: projective point of order three P = (X3:Z3). + // Output: the 3-isogenous Montgomery curve with projective coefficient A/C. + oqs_sidh_cln16_f2elm_t t0, t1; + + oqs_sidh_cln16_fp2sqr751_mont(P->X, t0); // t0 = X^2 + oqs_sidh_cln16_fp2add751(t0, t0, t1); // t1 = 2*t0 + oqs_sidh_cln16_fp2add751(t0, t1, t0); // t0 = t0+t1 + oqs_sidh_cln16_fp2sqr751_mont(P->Z, t1); // t1 = Z^2 + oqs_sidh_cln16_fp2sqr751_mont(t1, A); // A = t1^2 + oqs_sidh_cln16_fp2add751(t1, t1, t1); // t1 = 2*t1 + oqs_sidh_cln16_fp2add751(t1, t1, C); // C = 2*t1 + oqs_sidh_cln16_fp2sub751(t0, t1, t1); // t1 = t0-t1 + oqs_sidh_cln16_fp2mul751_mont(t0, t1, t1); // t1 = t0*t1 + oqs_sidh_cln16_fp2sub751(A, t1, A); // A = A-t1 + oqs_sidh_cln16_fp2sub751(A, t1, A); // A = A-t1 + oqs_sidh_cln16_fp2sub751(A, t1, A); // A = A-t1 + oqs_sidh_cln16_fp2mul751_mont(P->X, P->Z, t1); // t1 = X*Z // ms trade-off possible (1 mul for 1sqr + 1add + 2sub) + oqs_sidh_cln16_fp2mul751_mont(C, t1, C); // C = C*t1 +} + +void oqs_sidh_cln16_eval_3_isog(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q) { // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and a point P = (X:Z). + // Inputs: projective points P = (X3:Z3) and Q = (X:Z). + // Output: the projective point Q <- phi(Q) = (XX:ZZ). + oqs_sidh_cln16_f2elm_t t0, t1, t2; + + oqs_sidh_cln16_fp2mul751_mont(P->X, Q->X, t0); // t0 = X3*X + oqs_sidh_cln16_fp2mul751_mont(P->Z, Q->X, t1); // t1 = Z3*X + oqs_sidh_cln16_fp2mul751_mont(P->Z, Q->Z, t2); // t2 = Z3*Z + oqs_sidh_cln16_fp2sub751(t0, t2, t0); // t0 = X3*X-Z3*Z + oqs_sidh_cln16_fp2mul751_mont(P->X, Q->Z, t2); // t2 = X3*Z + oqs_sidh_cln16_fp2sub751(t1, t2, t1); // t1 = Z3*X-X3*Z + oqs_sidh_cln16_fp2sqr751_mont(t0, t0); // t0 = (X3*X-Z3*Z)^2 + oqs_sidh_cln16_fp2sqr751_mont(t1, t1); // t1 = (Z3*X-X3*Z)^2 + oqs_sidh_cln16_fp2mul751_mont(Q->X, t0, Q->X); // X = X*(X3*X-Z3*Z)^2 + oqs_sidh_cln16_fp2mul751_mont(Q->Z, t1, Q->Z); // Z = Z*(Z3*X-X3*Z)^2 +} + +void oqs_sidh_cln16_inv_3_way(oqs_sidh_cln16_f2elm_t z1, oqs_sidh_cln16_f2elm_t z2, oqs_sidh_cln16_f2elm_t z3) { // 3-way simultaneous inversion + // Input: z1,z2,z3 + // Output: 1/z1,1/z2,1/z3 (override inputs). + oqs_sidh_cln16_f2elm_t t0, t1, t2, t3; + + oqs_sidh_cln16_fp2mul751_mont(z1, z2, t0); // t0 = z1*z2 + oqs_sidh_cln16_fp2mul751_mont(z3, t0, t1); // t1 = z1*z2*z3 + oqs_sidh_cln16_fp2inv751_mont(t1); // t1 = 1/(z1*z2*z3) + oqs_sidh_cln16_fp2mul751_mont(z3, t1, t2); // t2 = 1/(z1*z2) + oqs_sidh_cln16_fp2mul751_mont(t2, z2, t3); // t3 = 1/z1 + oqs_sidh_cln16_fp2mul751_mont(t2, z1, z2); // z2 = 1/z2 + oqs_sidh_cln16_fp2mul751_mont(t0, t1, z3); // z3 = 1/z3 + oqs_sidh_cln16_fp2copy751(t3, z1); // z1 = 1/z1 +} + +void oqs_sidh_cln16_distort_and_diff(const oqs_sidh_cln16_felm_t xP, oqs_sidh_cln16_point_proj_t D, PCurveIsogenyStruct CurveIsogeny) { // Computing the point (x(Q-P),z(Q-P)) + // Input: coordinate xP of point P=(xP,yP) + // Output: the point D = (x(Q-P),z(Q-P)), where Q=tau(P). + oqs_sidh_cln16_felm_t one; + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one); + oqs_sidh_cln16_fpsqr751_mont(xP, D->X[0]); // XD = xP^2 + oqs_sidh_cln16_fpadd751(D->X[0], one, D->X[0]); // XD = XD+1 + oqs_sidh_cln16_fpcopy751(D->X[0], D->X[1]); // XD = XD*i + oqs_sidh_cln16_fpzero751(D->X[0]); + oqs_sidh_cln16_fpadd751(xP, xP, D->Z[0]); // ZD = xP+xP +} + +void oqs_sidh_cln16_get_A(const oqs_sidh_cln16_f2elm_t xP, const oqs_sidh_cln16_f2elm_t xQ, const oqs_sidh_cln16_f2elm_t xR, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny) { // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. + // Input: the x-coordinates xP, xQ, and xR of the points P, Q and R. + // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x. + oqs_sidh_cln16_f2elm_t t0, t1, one = {0}; + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]); + oqs_sidh_cln16_fp2add751(xP, xQ, t1); // t1 = xP+xQ + oqs_sidh_cln16_fp2mul751_mont(xP, xQ, t0); // t0 = xP*xQ + oqs_sidh_cln16_fp2mul751_mont(xR, t1, A); // A = xR*t1 + oqs_sidh_cln16_fp2add751(t0, A, A); // A = A+t0 + oqs_sidh_cln16_fp2mul751_mont(t0, xR, t0); // t0 = t0*xR + oqs_sidh_cln16_fp2sub751(A, one, A); // A = A-1 + oqs_sidh_cln16_fp2add751(t0, t0, t0); // t0 = t0+t0 + oqs_sidh_cln16_fp2add751(t1, xR, t1); // t1 = t1+xR + oqs_sidh_cln16_fp2add751(t0, t0, t0); // t0 = t0+t0 + oqs_sidh_cln16_fp2sqr751_mont(A, A); // A = A^2 + oqs_sidh_cln16_fp2inv751_mont(t0); // t0 = 1/t0 + oqs_sidh_cln16_fp2mul751_mont(A, t0, A); // A = A*t0 + oqs_sidh_cln16_fp2sub751(A, t1, A); // Afinal = A-t1 +} + +/////////////////////////////////////////////////////////////////////////////////// +/////////////// FUNCTIONS FOR COMPRESSION /////////////// + +static void get_point_notin_2E(oqs_sidh_cln16_felm_t alpha, const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_felm_t one, const oqs_sidh_cln16_felm_t four, const oqs_sidh_cln16_felm_t value47, const oqs_sidh_cln16_felm_t value52) { // Inputs: alpha, a small integer (parsed in Fp), + // Montgomery coefficient A = A0+A1*i. + // Output: alpha such that alpha*u = alpha*(i+4) is a good x-coordinate, which means it corresponds to a point P not in [2]E. + // Then, [3^eB]P has full order 2^eA. + digit_t *A0 = (digit_t *) A[0], *A1 = (digit_t *) A[1]; + oqs_sidh_cln16_felm_t X0, X1, x0, x1, t0, sqrt, X0_temp = {0}, X1_temp = {0}, alpha52 = {0}, alpha52_2 = {0}, alpha47 = {0}, alpha47_2 = {0}; + unsigned int i; + + oqs_sidh_cln16_fpsub751(A0, A1, x0); // x0 = A0-A1 + oqs_sidh_cln16_fpadd751(x0, A0, x0); // x0 = x0+A0 + oqs_sidh_cln16_fpadd751(x0, x0, x0); + oqs_sidh_cln16_fpadd751(x0, x0, x0); + oqs_sidh_cln16_fpadd751(x0, x0, x0); // x0 = 8*x0 + oqs_sidh_cln16_fpsub751(x0, A0, X0); // X0 = x0-A0 + oqs_sidh_cln16_fpadd751(A0, A1, x1); // x1 = A0+A1 + oqs_sidh_cln16_fpadd751(x1, A1, x1); // x1 = x1+A1 + oqs_sidh_cln16_fpadd751(x1, x1, x1); + oqs_sidh_cln16_fpadd751(x1, x1, x1); + oqs_sidh_cln16_fpadd751(x1, x1, x1); // x1 = 8*x1 + oqs_sidh_cln16_fpsub751(x1, A1, X1); // X1 = x1-A1 + oqs_sidh_cln16_fpmul751_mont(alpha, value52, alpha52); // alpha52 = 52*alpha + oqs_sidh_cln16_fpmul751_mont(X0, alpha, X0_temp); // X0*alpha + oqs_sidh_cln16_fpmul751_mont(alpha52, alpha, alpha52_2); // alpha52^2 = 52*alpha^2 + oqs_sidh_cln16_fpmul751_mont(alpha, value47, alpha47); // alpha47 = 47*alpha + oqs_sidh_cln16_fpmul751_mont(X1, alpha, X1_temp); // X0*alpha + oqs_sidh_cln16_fpmul751_mont(alpha47, alpha, alpha47_2); // alpha47^2 = 47*alpha^2 + + do { + oqs_sidh_cln16_fpadd751(alpha, one, alpha); // alpha += 1 + oqs_sidh_cln16_fpadd751(X0_temp, X0, X0_temp); // X0*alpha + oqs_sidh_cln16_fpadd751(alpha52, value52, t0); // t0 = 52*alpha52 + 52 + oqs_sidh_cln16_fpadd751(alpha52, t0, alpha52); // 2*52*alpha52 + 52 + oqs_sidh_cln16_fpadd751(alpha52_2, alpha52, alpha52_2); // 52*alpha^2 = 52*alpha52^2 + 2*52*alpha52 + 52 + oqs_sidh_cln16_fpcopy751(t0, alpha52); // 52*alpha = 52*alpha52 + 52 + oqs_sidh_cln16_fpadd751(alpha52_2, four, x0); // 52*alpha^2 + 4 + oqs_sidh_cln16_fpadd751(X0_temp, x0, x0); // x0 = X0*alpha + 52*alpha^2 + 4 + oqs_sidh_cln16_fpadd751(X1_temp, X1, X1_temp); // X1*alpha + oqs_sidh_cln16_fpadd751(alpha47, value47, t0); // t0 = 47*alpha47 + 47 + oqs_sidh_cln16_fpadd751(alpha47, t0, alpha47); // 2*47*alpha52 + 47 + oqs_sidh_cln16_fpadd751(alpha47_2, alpha47, alpha47_2); // 47*alpha^2 = 47*alpha52^2 + 2*47*alpha52 + 47 + oqs_sidh_cln16_fpcopy751(t0, alpha47); // 47*alpha = 47*alpha52 + 47 + oqs_sidh_cln16_fpadd751(alpha47_2, one, x1); // 47*alpha^2 + 1 + oqs_sidh_cln16_fpadd751(X1_temp, x1, x1); // x0 = X0*alpha + 47*alpha^2 + 1 + oqs_sidh_cln16_fpsqr751_mont(x0, x0); // x0 = x0^2 + oqs_sidh_cln16_fpsqr751_mont(x1, x1); // x1 = x1^2 + oqs_sidh_cln16_fpsqr751_mont(alpha, t0); // t0 = alpha^2 + oqs_sidh_cln16_fpadd751(x0, x1, x0); // x0 = x0+x1 + oqs_sidh_cln16_fpmul751_mont(t0, x0, t0); // t0 = t0*x0 + oqs_sidh_cln16_fpcopy751(t0, sqrt); + for (i = 0; i < 371; i++) { // sqrt = t0^((p+1) div 2) + oqs_sidh_cln16_fpsqr751_mont(sqrt, sqrt); + } + for (i = 0; i < 239; i++) { + oqs_sidh_cln16_fpsqr751_mont(sqrt, x0); + oqs_sidh_cln16_fpmul751_mont(sqrt, x0, sqrt); + } + oqs_sidh_cln16_fpcorrection751(sqrt); + oqs_sidh_cln16_fpcorrection751(t0); + } while (oqs_sidh_cln16_fpequal751_non_constant_time(sqrt, t0) == false); +} + +void oqs_sidh_cln16_generate_2_torsion_basis(const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_point_full_proj_t R1, oqs_sidh_cln16_point_full_proj_t R2, PCurveIsogenyStruct CurveIsogeny) { // Produces points R1 and R2 such that {R1, R2} is a basis for E[2^372]. + // Input: curve constant A. + // Outputs: R1 = (X1:Y1:Z1) and R2 = (X2:Y2:Z2). + oqs_sidh_cln16_point_proj_t P, Q, P1 = {0}, P2 = {0}; + oqs_sidh_cln16_felm_t *X1 = (oqs_sidh_cln16_felm_t *) P1->X, *Z1 = (oqs_sidh_cln16_felm_t *) P1->Z; + oqs_sidh_cln16_felm_t *X2 = (oqs_sidh_cln16_felm_t *) P2->X, *Z2 = (oqs_sidh_cln16_felm_t *) P2->Z; + oqs_sidh_cln16_felm_t *XP = (oqs_sidh_cln16_felm_t *) P->X, *ZP = (oqs_sidh_cln16_felm_t *) P->Z; + oqs_sidh_cln16_felm_t *XQ = (oqs_sidh_cln16_felm_t *) Q->X, *ZQ = (oqs_sidh_cln16_felm_t *) Q->Z; + oqs_sidh_cln16_felm_t *Y1 = (oqs_sidh_cln16_felm_t *) R1->Y, *Y2 = (oqs_sidh_cln16_felm_t *) R2->Y; + oqs_sidh_cln16_felm_t zero, alpha = {0}; + oqs_sidh_cln16_f2elm_t t0, t1, one = {0}; + oqs_sidh_cln16_felm_t four, value47 = {0}, value52 = {0}; + + oqs_sidh_cln16_fpzero751(zero); + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]); + + value47[0] = 47; + value52[0] = 52; + oqs_sidh_cln16_to_mont(value47, value47); + oqs_sidh_cln16_to_mont(value52, value52); + oqs_sidh_cln16_fpadd751(one[0], one[0], four); + oqs_sidh_cln16_fpadd751(four, four, four); + + get_point_notin_2E(alpha, A, one[0], four, value47, value52); + oqs_sidh_cln16_fpcopy751(alpha, X1[1]); + oqs_sidh_cln16_fpadd751(alpha, alpha, X1[0]); + oqs_sidh_cln16_fpadd751(X1[0], X1[0], X1[0]); // X1 = alpha*i + alpha*4 + oqs_sidh_cln16_fpcopy751(one[0], Z1[0]); // Z1 = 1 + + oqs_sidh_cln16_xTPLe(P1, P1, A, one, 239); // xTPL assumes projective constant, but this is minor + oqs_sidh_cln16_xDBLe(P1, P, A, one, 371); + + // This loop is necessary to ensure that the order of the WeilPairing is oA and not smaller. + // This ensures that we have a basis. + do { + get_point_notin_2E(alpha, A, one[0], four, value47, value52); + oqs_sidh_cln16_fpcopy751(alpha, X2[1]); + oqs_sidh_cln16_fpadd751(alpha, alpha, X2[0]); + oqs_sidh_cln16_fpadd751(X2[0], X2[0], X2[0]); // X2 = alpha*i + alpha*4 + oqs_sidh_cln16_fpzero751(Z2[1]); + oqs_sidh_cln16_fpcopy751(one[0], Z2[0]); // Z2 = 1 + oqs_sidh_cln16_xTPLe(P2, P2, A, one, 239); // xTPL assumes projective constant, but this is minor + oqs_sidh_cln16_xDBLe(P2, Q, A, one, 371); + oqs_sidh_cln16_fp2mul751_mont(XP, ZQ, t0); // t0 = XP*ZQ + oqs_sidh_cln16_fp2mul751_mont(XQ, ZP, t1); // t1 = XQ*ZP + oqs_sidh_cln16_fp2sub751(t0, t1, t0); // t0 = XP*ZQ-XQ*ZP + oqs_sidh_cln16_fp2correction751(t0); + } while (oqs_sidh_cln16_fpequal751_non_constant_time(t0[0], zero) == true && oqs_sidh_cln16_fpequal751_non_constant_time(t0[1], zero) == true); + + oqs_sidh_cln16_fp2copy751(X1, R1->X); + oqs_sidh_cln16_fp2copy751(Z1, R1->Z); + oqs_sidh_cln16_fp2copy751(X2, R2->X); + oqs_sidh_cln16_fp2copy751(Z2, R2->Z); + + // Recover the y-coordinates. + oqs_sidh_cln16_fp2sqr751_mont(Z1, t0); // t0 = Z1^2 + oqs_sidh_cln16_fp2mul751_mont(A, Z1, Y1); // Y1 = A*Z1 + oqs_sidh_cln16_fp2add751(X1, Y1, Y1); // Y1 = X1+Y1 + oqs_sidh_cln16_fp2mul751_mont(X1, Y1, Y1); // Y1 = Y1*X1 + oqs_sidh_cln16_fp2add751(t0, Y1, Y1); // Y1 = Y1+t0 + oqs_sidh_cln16_fp2mul751_mont(X1, Y1, Y1); // Y1 = Y1*X1 + oqs_sidh_cln16_fp2mul751_mont(t0, Z1, t0); // t0 = t0*Z1 + oqs_sidh_cln16_sqrt_Fp2_frac(Y1, t0, t1); // t1 = sqrt(Y1/t0) + + oqs_sidh_cln16_fp2sqr751_mont(Z2, t0); // t0 = Z2^2 + oqs_sidh_cln16_fp2mul751_mont(A, Z2, Y2); // Y2 = A*Z2 + oqs_sidh_cln16_fp2add751(X2, Y2, Y2); // Y2 = X2+Y2 + oqs_sidh_cln16_fp2mul751_mont(Y2, X2, Y2); // Y2 = Y2*X2 + oqs_sidh_cln16_fp2add751(t0, Y2, Y2); // Y2 = Y2+t0 + oqs_sidh_cln16_fp2mul751_mont(Y2, X2, Y2); // Y2 = Y2*X2 + oqs_sidh_cln16_fp2mul751_mont(t0, Z2, t0); // t0 = t0*Z2 + oqs_sidh_cln16_fp2mul751_mont(t1, Z1, Y1); // Y1 = t1*Z1 + oqs_sidh_cln16_sqrt_Fp2_frac(Y2, t0, t1); // t1 = sqrt(Y2/t0) + oqs_sidh_cln16_fp2mul751_mont(Z2, t1, Y2); // Y2 = t1*Z2 +} + +static uint64_t sqrt17[SIDH_NWORDS64_FIELD] = {0x89127CDB8966913D, 0xF788014C8C8401A0, 0x1A16F73884F3E3E8, 0x2E67382B560FA195, 0xDD5EE869B7F4FD81, 0x16A0849EF695EFEB, + 0x3675244609DE1963, 0x36F02976EF2EB241, 0x92D09F939A20637F, 0x41496905F2B0112C, 0xA94C09B1F7242495, 0x0000297652D36A97}; + +static void get_X_on_curve(oqs_sidh_cln16_f2elm_t A, unsigned int *r, oqs_sidh_cln16_f2elm_t x, oqs_sidh_cln16_felm_t t1, oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t b) { // Elligator2 for X + oqs_sidh_cln16_felm_t v0, v1, r0, r1, t0, t2, t3, rsq = {0}; + unsigned int i; + + oqs_sidh_cln16_fpcopy751(((oqs_sidh_cln16_felm_t *) &LIST)[(*r << 1) - 2], r1); // r1 = list[2*r-1] + oqs_sidh_cln16_fpcopy751(((oqs_sidh_cln16_felm_t *) &LIST)[(*r << 1) - 1], r0); // r0 = list[2*r] + rsq[0] = (digit_t)(*r) * (*r); // rsp = r^2 + oqs_sidh_cln16_to_mont(rsq, rsq); // Converting to Montgomery representation + oqs_sidh_cln16_fpmul751_mont(A[1], r1, t0); // t0 = A1*r1 + oqs_sidh_cln16_fpmul751_mont(A[0], r0, v0); // v0 = A0*r0 + oqs_sidh_cln16_fpsub751(v0, t0, v0); // v0 = v0-t0 + oqs_sidh_cln16_fpmul751_mont(A[1], r0, t0); // t0 = A1*r0 + oqs_sidh_cln16_fpmul751_mont(A[0], r1, v1); // v1 = A0*r1 + oqs_sidh_cln16_fpadd751(v1, t0, v1); // v1 = v1+t0 + oqs_sidh_cln16_fpadd751(v0, A[0], t0); // t0 = v0+A0 + oqs_sidh_cln16_fpadd751(v1, A[1], t1); // t1 = v1+A1 + oqs_sidh_cln16_fpmul751_mont(v0, v1, t2); // t2 = v0*v1 + oqs_sidh_cln16_fpadd751(t2, t2, t2); // t2 = t2+t2 + oqs_sidh_cln16_fpmul751_mont(t2, A[1], a); // a = t2*A1 + oqs_sidh_cln16_fpsub751(v0, a, a); // a = v0-a + oqs_sidh_cln16_fpmul751_mont(t2, A[0], b); // b = t2*A0 + oqs_sidh_cln16_fpadd751(b, v1, b); // b = b+v1 + oqs_sidh_cln16_fpadd751(v0, v0, t2); // t2 = v0+v0 + oqs_sidh_cln16_fpadd751(t0, t2, t2); // t2 = t2+t0 + oqs_sidh_cln16_fpsqr751_mont(v0, t3); // t3 = v0^2 + oqs_sidh_cln16_fpmul751_mont(t0, t3, t0); // t0 = t0*t3 + oqs_sidh_cln16_fpadd751(a, t0, a); // a = a+t0 + oqs_sidh_cln16_fpsqr751_mont(v1, t0); // t0 = v1^2 + oqs_sidh_cln16_fpmul751_mont(t0, t2, t2); // t2 = t0*t2 + oqs_sidh_cln16_fpsub751(a, t2, a); // a = a-t2 + oqs_sidh_cln16_fpmul751_mont(t0, t1, t0); // t0 = t0*t1 + oqs_sidh_cln16_fpsub751(b, t0, b); // b = b-t0 + oqs_sidh_cln16_fpadd751(t1, v1, t1); // t1 = t1+v1 + oqs_sidh_cln16_fpadd751(v1, t1, t1); // t1 = t1+v1 + oqs_sidh_cln16_fpmul751_mont(t3, t1, t1); // t1 = t1*t3 + oqs_sidh_cln16_fpadd751(b, t1, b); // b = t1+b + oqs_sidh_cln16_fpsqr751_mont(a, t0); // t0 = a^2 + oqs_sidh_cln16_fpsqr751_mont(b, t1); // t1 = b^2 + oqs_sidh_cln16_fpadd751(t0, t1, t0); // t0 = t0+t1 + oqs_sidh_cln16_fpcopy751(t0, t1); + for (i = 0; i < 370; i++) { // t1 = t0^((p+1) div 4) + oqs_sidh_cln16_fpsqr751_mont(t1, t1); + } + for (i = 0; i < 239; i++) { + oqs_sidh_cln16_fpsqr751_mont(t1, t2); + oqs_sidh_cln16_fpmul751_mont(t1, t2, t1); + } + oqs_sidh_cln16_fpsqr751_mont(t1, t2); // t2 = t1^2 + oqs_sidh_cln16_fpcorrection751(t0); + oqs_sidh_cln16_fpcorrection751(t2); + if (oqs_sidh_cln16_fpequal751_non_constant_time(t0, t2) == false) { + oqs_sidh_cln16_fpadd751(v0, v0, x[0]); // x0 = v0+v0 + oqs_sidh_cln16_fpadd751(x[0], x[0], x[0]); // x0 = x0+x0 + oqs_sidh_cln16_fpsub751(x[0], v1, x[0]); // x0 = x0-v1 + oqs_sidh_cln16_fpmul751_mont(rsq, x[0], x[0]); // x0 = rsq*x0 + oqs_sidh_cln16_fpadd751(v1, v1, x[1]); // x1 = v1+v1 + oqs_sidh_cln16_fpadd751(x[1], x[1], x[1]); // x1 = x1+x1 + oqs_sidh_cln16_fpadd751(x[1], v0, x[1]); // x1 = x1+v0 + oqs_sidh_cln16_fpmul751_mont(rsq, x[1], x[1]); // x1 = rsq*x1 + oqs_sidh_cln16_fpcopy751(a, t0); // t0 = a + oqs_sidh_cln16_fpadd751(a, a, a); // a = a+a + oqs_sidh_cln16_fpadd751(a, a, a); // a = a+a + oqs_sidh_cln16_fpsub751(a, b, a); // a = a-b + oqs_sidh_cln16_fpmul751_mont(rsq, a, a); // a = rsq*a + oqs_sidh_cln16_fpadd751(b, b, b); // b = b+b + oqs_sidh_cln16_fpadd751(b, b, b); // b = b+b + oqs_sidh_cln16_fpadd751(t0, b, b); // b = b+t0 + oqs_sidh_cln16_fpmul751_mont(rsq, b, b); // b = rsq*b + oqs_sidh_cln16_fpmul751_mont(rsq, t1, t1); // t1 = t1*rsq + oqs_sidh_cln16_fpmul751_mont(t1, (digit_t *) sqrt17, t1); // t1 = t1*sqrt17 + } else { + oqs_sidh_cln16_fpcopy751(v0, x[0]); // x0 = v0 + oqs_sidh_cln16_fpcopy751(v1, x[1]); // x1 = v1 + } +} + +static void get_pt_on_curve(oqs_sidh_cln16_f2elm_t A, unsigned int *r, oqs_sidh_cln16_f2elm_t x, oqs_sidh_cln16_f2elm_t y) { // Elligator2 + oqs_sidh_cln16_felm_t t0, t1, t2, t3, a, b; + + get_X_on_curve(A, r, x, t1, a, b); + oqs_sidh_cln16_fpadd751(a, t1, t0); // t0 = a+t1 + oqs_sidh_cln16_fpdiv2_751(t0, t0); // t0 = t0/2 + oqs_sidh_cln16_fpcopy751(t0, t1); + oqs_sidh_cln16_fpinv751_chain_mont(t1); // t1 = t0^((p-3)/4) + oqs_sidh_cln16_fpmul751_mont(t0, t1, t3); // t3 = t0*t1 + oqs_sidh_cln16_fpsqr751_mont(t3, t2); // t2 = t3^2 + oqs_sidh_cln16_fpdiv2_751(t1, t1); // t1 = t1/2 + oqs_sidh_cln16_fpmul751_mont(b, t1, t1); // t1 = t1*b + oqs_sidh_cln16_fpcorrection751(t0); + oqs_sidh_cln16_fpcorrection751(t2); + + if (oqs_sidh_cln16_fpequal751_non_constant_time(t0, t2) == true) { + oqs_sidh_cln16_fpcopy751(t3, y[0]); // y0 = t3 + oqs_sidh_cln16_fpcopy751(t1, y[1]); // y1 = t1; + } else { + oqs_sidh_cln16_fpneg751(t3); + oqs_sidh_cln16_fpcopy751(t1, y[0]); // y0 = t1; + oqs_sidh_cln16_fpcopy751(t3, y[1]); // y1 = -t3 + } +} + +static void get_3_torsion_elt(oqs_sidh_cln16_f2elm_t A, unsigned int *r, oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t P3, unsigned int *triples, PCurveIsogenyStruct CurveIsogeny) { + oqs_sidh_cln16_point_proj_t PP; + oqs_sidh_cln16_f2elm_t A24, C24, one = {0}; + oqs_sidh_cln16_felm_t t0, t1, t2, zero = {0}; + + *triples = 0; + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]); + oqs_sidh_cln16_fpadd751(one[0], one[0], C24[0]); + oqs_sidh_cln16_fpzero751(C24[1]); + + get_X_on_curve(A, r, P->X, t0, t1, t2); + oqs_sidh_cln16_fp2copy751(one, P->Z); // Z = 1 + oqs_sidh_cln16_xDBLe(P, P, A, one, 372); + + oqs_sidh_cln16_fp2copy751(P->X, PP->X); // XX = X + oqs_sidh_cln16_fp2copy751(P->Z, PP->Z); // ZZ = Z + + oqs_sidh_cln16_fp2add751(A, C24, A24); // A24 = A+2 + oqs_sidh_cln16_fpadd751(C24[0], C24[0], C24[0]); // C24 = 4 + + oqs_sidh_cln16_fp2correction751(PP->Z); + while (oqs_sidh_cln16_fpequal751_non_constant_time(PP->Z[0], zero) == false || oqs_sidh_cln16_fpequal751_non_constant_time(PP->Z[1], zero) == false) { + oqs_sidh_cln16_fp2copy751(PP->X, P3->X); // X3 = XX + oqs_sidh_cln16_fp2copy751(PP->Z, P3->Z); // Z3 = ZZ + oqs_sidh_cln16_xTPL(PP, PP, A24, C24); + (*triples)++; + oqs_sidh_cln16_fp2correction751(PP->Z); + } +} + +void oqs_sidh_cln16_generate_3_torsion_basis(oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_point_full_proj_t R1, oqs_sidh_cln16_point_full_proj_t R2, PCurveIsogenyStruct CurveIsogeny) { // Produces points R1 and R2 such that {R1, R2} is a basis for E[3^239]. + // Input: curve constant A. + // Outputs: R1 = (X1:Y1:Z1) and R2 = (X2:Y2:Z2). + oqs_sidh_cln16_point_proj_t R, R3, R4; + oqs_sidh_cln16_felm_t *X = (oqs_sidh_cln16_felm_t *) R->X, *Z = (oqs_sidh_cln16_felm_t *) R->Z; + oqs_sidh_cln16_felm_t *X3 = (oqs_sidh_cln16_felm_t *) R3->X, *Z3 = (oqs_sidh_cln16_felm_t *) R3->Z; + oqs_sidh_cln16_felm_t *X4 = (oqs_sidh_cln16_felm_t *) R4->X, *Z4 = (oqs_sidh_cln16_felm_t *) R4->Z; + oqs_sidh_cln16_felm_t *X1 = (oqs_sidh_cln16_felm_t *) R1->X, *Y1 = (oqs_sidh_cln16_felm_t *) R1->Y, *Z1 = (oqs_sidh_cln16_felm_t *) R1->Z; + oqs_sidh_cln16_felm_t *X2 = (oqs_sidh_cln16_felm_t *) R2->X, *Y2 = (oqs_sidh_cln16_felm_t *) R2->Y, *Z2 = (oqs_sidh_cln16_felm_t *) R2->Z; + oqs_sidh_cln16_f2elm_t u, v, c, f, t0, f0, fX, fY, Y, Y3, one = {0}; + oqs_sidh_cln16_felm_t zero = {0}; + unsigned int r = 1; + unsigned int triples = 0, pts_found = 0; + + get_3_torsion_elt(A, &r, R, R3, &triples, CurveIsogeny); + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]); + oqs_sidh_cln16_fpzero751(zero); + + if (triples == 239) { + pts_found = 1; + oqs_sidh_cln16_fp2copy751(X, X1); // X1 = X + oqs_sidh_cln16_fp2copy751(Z, Z1); // Z1 = Z + oqs_sidh_cln16_fp2mul751_mont(A, Z1, u); // u = A*Z1 + oqs_sidh_cln16_fp2add751(u, X1, u); // u = u+X1 + oqs_sidh_cln16_fp2mul751_mont(u, X1, u); // u = u*X1 + oqs_sidh_cln16_fp2sqr751_mont(Z1, v); // v = Z1^2 + oqs_sidh_cln16_fp2add751(u, v, u); // u = u+v + oqs_sidh_cln16_fp2mul751_mont(u, X1, u); // u = u*X1 + oqs_sidh_cln16_fp2mul751_mont(v, Z1, v); // v = v*Z1 + oqs_sidh_cln16_sqrt_Fp2_frac(u, v, Y1); // Y1 = sqrt(u/v) + oqs_sidh_cln16_fp2mul751_mont(Y1, Z1, Y1); // Y1 = Y1*Z1 + } + + oqs_sidh_cln16_fp2mul751_mont(A, Z3, u); // u = A*Z3 + oqs_sidh_cln16_fp2add751(u, X3, u); // u = u+X3 + oqs_sidh_cln16_fp2mul751_mont(u, X3, u); // u = u*X3 + oqs_sidh_cln16_fp2sqr751_mont(Z3, v); // v = Z3^2 + oqs_sidh_cln16_fp2add751(u, v, u); // u = u+v + oqs_sidh_cln16_fp2mul751_mont(u, X3, u); // u = u*X3 + oqs_sidh_cln16_fp2mul751_mont(v, Z3, v); // v = v*Z3 + oqs_sidh_cln16_sqrt_Fp2_frac(u, v, Y3); // Y3 = sqrt(u/v) + oqs_sidh_cln16_fp2mul751_mont(Y3, Z3, Y3); // Y3 = Y3*Z3 + oqs_sidh_cln16_fp2sqr751_mont(X3, f0); // f0 = X3^2 + oqs_sidh_cln16_fp2sqr751_mont(Z3, t0); // t0 = Z3^2 + oqs_sidh_cln16_fp2mul751_mont(X3, Z3, fX); // fX = X3*Z3 + oqs_sidh_cln16_fp2mul751_mont(A, fX, fX); // fX = A*fX + oqs_sidh_cln16_fp2add751(fX, fX, fX); // fX = fX+fX + oqs_sidh_cln16_fp2add751(fX, t0, fX); // fX = fX+t0 + oqs_sidh_cln16_fp2add751(fX, f0, fX); // fX = fX+f0 + oqs_sidh_cln16_fp2add751(fX, f0, fX); // fX = fX+f0 + oqs_sidh_cln16_fp2add751(fX, f0, fX); // fX = fX+f0 + oqs_sidh_cln16_fp2sub751(t0, f0, f0); // f0 = t0-f0 + oqs_sidh_cln16_fp2mul751_mont(fX, Z3, fX); // fX = fX*Z3 + oqs_sidh_cln16_fp2mul751_mont(Y3, Z3, fY); // fY = Y3*Z3 + oqs_sidh_cln16_fp2add751(fY, fY, fY); // fY = fY+fY + oqs_sidh_cln16_fp2neg751(fY); // fY = -fY + oqs_sidh_cln16_fp2add751(fY, fY, c); // c = fY+fY + oqs_sidh_cln16_fp2mul751_mont(fY, Z3, fY); // fY = fY*Z3 + oqs_sidh_cln16_fp2mul751_mont(f0, X3, f0); // f0 = f0*X3 + oqs_sidh_cln16_fp2mul751_mont(c, Y3, c); // c = c*Y3 + oqs_sidh_cln16_fp2mul751_mont(fX, c, fX); // fX = c*fX + oqs_sidh_cln16_fp2mul751_mont(fY, c, fY); // fY = c*fY + oqs_sidh_cln16_fp2mul751_mont(f0, c, f0); // f0 = c*f0 + + do { + while (pts_found < 2) { + r++; + get_pt_on_curve(A, &r, X, Y); + oqs_sidh_cln16_fp2mul751_mont(fX, X, f); // f = fX*X + oqs_sidh_cln16_fp2mul751_mont(fY, Y, t0); // t0 = fY*Y + oqs_sidh_cln16_fp2add751(f, t0, f); // f = f+t0 + oqs_sidh_cln16_fp2add751(f, f0, f); // f = f+f0 + + if (oqs_sidh_cln16_is_cube_Fp2(f, CurveIsogeny) == false) { + oqs_sidh_cln16_fp2copy751(one, Z); // Z = 1 + oqs_sidh_cln16_xDBLe(R, R, A, one, 372); + oqs_sidh_cln16_fp2mul751_mont(A, Z, u); // u = A*Z + oqs_sidh_cln16_fp2add751(u, X, u); // u = u+X + oqs_sidh_cln16_fp2mul751_mont(u, X, u); // u = u*X + oqs_sidh_cln16_fp2sqr751_mont(Z, v); // v = Z^2 + oqs_sidh_cln16_fp2add751(u, v, u); // u = u+v + oqs_sidh_cln16_fp2mul751_mont(u, X, u); // u = u*X + oqs_sidh_cln16_fp2mul751_mont(v, Z, v); // v = v*Z + oqs_sidh_cln16_sqrt_Fp2_frac(u, v, Y); // Y = sqrt(u/v) + oqs_sidh_cln16_fp2mul751_mont(Y, Z, Y); // Y = Y*Z + + if (pts_found == 0) { + oqs_sidh_cln16_fp2copy751(X, X1); // X1 = X + oqs_sidh_cln16_fp2copy751(Y, Y1); // Y1 = Y + oqs_sidh_cln16_fp2copy751(Z, Z1); // Z1 = Z + oqs_sidh_cln16_xTPLe(R, R3, A, one, 238); + } else { + oqs_sidh_cln16_fp2copy751(X, X2); // X2 = X + oqs_sidh_cln16_fp2copy751(Y, Y2); // Y2 = Y + oqs_sidh_cln16_fp2copy751(Z, Z2); // Z2 = Z + oqs_sidh_cln16_xTPLe(R, R4, A, one, 238); + } + pts_found++; + } + } + oqs_sidh_cln16_fp2mul751_mont(X3, Z4, t0); + oqs_sidh_cln16_fp2mul751_mont(X4, Z3, v); + oqs_sidh_cln16_fp2sub751(t0, v, t0); + oqs_sidh_cln16_fp2correction751(t0); + pts_found--; + } while (oqs_sidh_cln16_fpequal751_non_constant_time(t0[0], zero) == true && oqs_sidh_cln16_fpequal751_non_constant_time(t0[1], zero) == true); +} + +static void dbl_and_line(const oqs_sidh_cln16_point_ext_proj_t P, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t lx, oqs_sidh_cln16_f2elm_t ly, oqs_sidh_cln16_f2elm_t l0, oqs_sidh_cln16_f2elm_t v0) { // Doubling step for computing the Tate pairing using Miller's algorithm. + // This function computes a point doubling of P and returns the corresponding line coefficients for the pairing doubling step. + oqs_sidh_cln16_felm_t *X2 = (oqs_sidh_cln16_felm_t *) P->X2, *XZ = (oqs_sidh_cln16_felm_t *) P->XZ, *YZ = (oqs_sidh_cln16_felm_t *) P->YZ, *Z2 = (oqs_sidh_cln16_felm_t *) P->Z2; + oqs_sidh_cln16_f2elm_t XX2, t0; + + oqs_sidh_cln16_fp2add751(YZ, YZ, XX2); //X2_: = YZ + YZ; + oqs_sidh_cln16_fp2sqr751_mont(XX2, ly); //ly: = X2_ ^ 2; + oqs_sidh_cln16_fp2sub751(X2, Z2, l0); //l0: = X2 - Z2; + oqs_sidh_cln16_fp2sqr751_mont(l0, v0); //v0: = l0 ^ 2; + oqs_sidh_cln16_fp2mul751_mont(XX2, l0, l0); //l0: = X2_*l0; + oqs_sidh_cln16_fp2mul751_mont(XZ, l0, lx); //lx: = XZ*l0; + oqs_sidh_cln16_fp2mul751_mont(YZ, ly, XX2); //X2_: = YZ*ly; + oqs_sidh_cln16_fp2add751(XX2, lx, lx); //lx: = X2_ + lx; + oqs_sidh_cln16_fp2add751(X2, Z2, YZ); //YZ: = X2 + Z2; + oqs_sidh_cln16_fp2mul751_mont(A, YZ, YZ); //YZ: = A*YZ; + oqs_sidh_cln16_fp2add751(XZ, XZ, XX2); //X2_: = XZ + XZ; + oqs_sidh_cln16_fp2add751(XX2, YZ, YZ); //YZ: = X2_ + YZ; + oqs_sidh_cln16_fp2add751(XX2, YZ, YZ); //YZ_: = X2_ + YZ_; + oqs_sidh_cln16_fp2mul751_mont(XX2, YZ, YZ); //YZ_: = X2_*YZ_; + + oqs_sidh_cln16_fp2sqr751_mont(v0, XX2); //X2_: = v0 ^ 2; + oqs_sidh_cln16_fp2sqr751_mont(l0, t0); //XZ_: = l0 ^ 2; + oqs_sidh_cln16_fp2sqr751_mont(ly, Z2); //Z2: = ly ^ 2; + oqs_sidh_cln16_fp2add751(v0, YZ, YZ); //YZ: = v0 + YZ; + oqs_sidh_cln16_fp2mul751_mont(l0, YZ, YZ); //YZ: = l0*Y_; + + oqs_sidh_cln16_fp2mul751_mont(XZ, ly, ly); //ly: = XZ*ly; + oqs_sidh_cln16_fp2mul751_mont(X2, l0, l0); //l0: = X2*l0; + oqs_sidh_cln16_fp2mul751_mont(XZ, v0, v0); //v0: = XZ*v0; + + oqs_sidh_cln16_fp2copy751(XX2, X2); + oqs_sidh_cln16_fp2copy751(t0, XZ); +} + +static void absorb_line(const oqs_sidh_cln16_f2elm_t lx, const oqs_sidh_cln16_f2elm_t ly, const oqs_sidh_cln16_f2elm_t l0, const oqs_sidh_cln16_f2elm_t v0, const oqs_sidh_cln16_point_t P, oqs_sidh_cln16_f2elm_t n, oqs_sidh_cln16_f2elm_t d) { // Absorbing line function values during Miller's algorithm. + // Evaluate the line functions at the point P and multiply values into the running value n/d of the pairing value, keeping numerator n + // and denominator d separate. + oqs_sidh_cln16_felm_t *x = (oqs_sidh_cln16_felm_t *) P->x, *y = (oqs_sidh_cln16_felm_t *) P->y; + oqs_sidh_cln16_f2elm_t l, v; + + oqs_sidh_cln16_fp2mul751_mont(lx, x, l); // l = lx*x + oqs_sidh_cln16_fp2mul751_mont(ly, y, v); // v = ly*y + oqs_sidh_cln16_fp2sub751(v, l, l); // l = v-l + oqs_sidh_cln16_fp2add751(l0, l, l); // l = l+l0 + oqs_sidh_cln16_fp2mul751_mont(ly, x, v); // v = ly*x + oqs_sidh_cln16_fp2sub751(v, v0, v); // v = v+v0 + oqs_sidh_cln16_fp2mul751_mont(n, l, n); // n = n*l + oqs_sidh_cln16_fp2mul751_mont(d, v, d); // d = d*v +} + +static void square_and_absorb_line(const oqs_sidh_cln16_f2elm_t lx, const oqs_sidh_cln16_f2elm_t ly, const oqs_sidh_cln16_f2elm_t l0, const oqs_sidh_cln16_f2elm_t v0, const oqs_sidh_cln16_point_t P, oqs_sidh_cln16_f2elm_t n, oqs_sidh_cln16_f2elm_t d) { // Square the running pairing value in Miller's algorithm and absorb line function values of the current Miller step. + oqs_sidh_cln16_fp2sqr751_mont(n, n); // n = n^2 + oqs_sidh_cln16_fp2sqr751_mont(d, d); // d = d^2 + absorb_line(lx, ly, l0, v0, P, n, d); +} + +static void final_dbl_iteration(const oqs_sidh_cln16_point_ext_proj_t P, const oqs_sidh_cln16_f2elm_t x, oqs_sidh_cln16_f2elm_t n, oqs_sidh_cln16_f2elm_t d) { // Special iteration for the final doubling step in Miller's algorithm. This is necessary since the doubling + // at the end of the Miller loop is an exceptional case (doubling a point of order 2). + oqs_sidh_cln16_felm_t *X = (oqs_sidh_cln16_felm_t *) P->XZ, *Z = (oqs_sidh_cln16_felm_t *) P->Z2; + oqs_sidh_cln16_f2elm_t l; + + oqs_sidh_cln16_fp2sqr751_mont(n, n); // n = n^2 + oqs_sidh_cln16_fp2sqr751_mont(d, d); // d = d^2 + oqs_sidh_cln16_fp2mul751_mont(Z, d, d); // d = d*Z + oqs_sidh_cln16_fp2mul751_mont(Z, x, l); // l = Z*x + oqs_sidh_cln16_fp2sub751(l, X, l); // l = l-X + oqs_sidh_cln16_fp2mul751_mont(n, l, n); // n = n*l +} + +static void final_exponentiation_2_torsion(oqs_sidh_cln16_f2elm_t n, oqs_sidh_cln16_f2elm_t d, const oqs_sidh_cln16_f2elm_t n_inv, const oqs_sidh_cln16_f2elm_t d_inv, oqs_sidh_cln16_f2elm_t nout, PCurveIsogenyStruct CurveIsogeny) { // The final exponentiation for pairings in the 2-torsion group. Raising the value n/d to the power (p^2-1)/2^eA. + oqs_sidh_cln16_felm_t one = {0}; + unsigned int i; + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one); + oqs_sidh_cln16_fp2mul751_mont(n, d_inv, n); // n = n*d_inv + //n = n^p, just call conjugation function + oqs_sidh_cln16_inv_Fp2_cycl(n); + oqs_sidh_cln16_fp2mul751_mont(d, n_inv, d); // d = d*n_inv + oqs_sidh_cln16_fp2mul751_mont(n, d, n); // n = n*d + + for (i = 0; i < 239; i++) { + oqs_sidh_cln16_cube_Fp2_cycl(n, one); + } + oqs_sidh_cln16_fp2copy751(n, nout); +} + +void oqs_sidh_cln16_Tate_pairings_2_torsion(const oqs_sidh_cln16_point_t R1, const oqs_sidh_cln16_point_t R2, const oqs_sidh_cln16_point_t P, const oqs_sidh_cln16_point_t Q, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t *n, PCurveIsogenyStruct CurveIsogeny) { // The doubling only 2-torsion Tate pairing of order 2^eA, consisting of the doubling only Miller loop and the final exponentiation.] + // Computes 5 pairings at once: e(R1, R2), e(R1, P), e(R1, Q), e(R2, P), e(R2,Q). + oqs_sidh_cln16_point_ext_proj_t P1 = {0}, P2 = {0}; + oqs_sidh_cln16_f2elm_t lx1, ly1, l01, v01, lx2, ly2, l02, v02; + oqs_sidh_cln16_f2elm_t invs[10], nd[10] = {0}; + oqs_sidh_cln16_felm_t one = {0}; + unsigned int i; + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one); + oqs_sidh_cln16_fp2copy751(R1->x, P1->XZ); + oqs_sidh_cln16_fp2sqr751_mont(P1->XZ, P1->X2); + oqs_sidh_cln16_fp2copy751(R1->y, P1->YZ); + oqs_sidh_cln16_fpcopy751(one, P1->Z2[0]); // P1 = (x1^2,x1,1,y1) + oqs_sidh_cln16_fp2copy751(R2->x, P2->XZ); + oqs_sidh_cln16_fp2sqr751_mont(P2->XZ, P2->X2); + oqs_sidh_cln16_fp2copy751(R2->y, P2->YZ); + oqs_sidh_cln16_fpcopy751(one, P2->Z2[0]); // P2 = (x2^2,x2,1,y2) + + for (i = 0; i < 10; i++) { // nd[i] = 1 + oqs_sidh_cln16_fpcopy751(one, nd[i][0]); + } + + for (i = 0; i < 371; i++) { + dbl_and_line(P1, A, lx1, ly1, l01, v01); // vx = ly + dbl_and_line(P2, A, lx2, ly2, l02, v02); // vx = ly + square_and_absorb_line(lx1, ly1, l01, v01, R2, nd[0], nd[5]); + square_and_absorb_line(lx1, ly1, l01, v01, P, nd[1], nd[6]); + square_and_absorb_line(lx1, ly1, l01, v01, Q, nd[2], nd[7]); + square_and_absorb_line(lx2, ly2, l02, v02, P, nd[3], nd[8]); + square_and_absorb_line(lx2, ly2, l02, v02, Q, nd[4], nd[9]); + } + + final_dbl_iteration(P1, R2->x, nd[0], nd[5]); + final_dbl_iteration(P1, P->x, nd[1], nd[6]); + final_dbl_iteration(P1, Q->x, nd[2], nd[7]); + final_dbl_iteration(P2, P->x, nd[3], nd[8]); + final_dbl_iteration(P2, Q->x, nd[4], nd[9]); + oqs_sidh_cln16_mont_n_way_inv(nd, 10, invs); + final_exponentiation_2_torsion(nd[0], nd[5], invs[0], invs[5], n[0], CurveIsogeny); + final_exponentiation_2_torsion(nd[1], nd[6], invs[1], invs[6], n[1], CurveIsogeny); + final_exponentiation_2_torsion(nd[2], nd[7], invs[2], invs[7], n[2], CurveIsogeny); + final_exponentiation_2_torsion(nd[3], nd[8], invs[3], invs[8], n[3], CurveIsogeny); + final_exponentiation_2_torsion(nd[4], nd[9], invs[4], invs[9], n[4], CurveIsogeny); +} + +static void tpl_and_parabola(oqs_sidh_cln16_point_ext_proj_t P, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t ly, oqs_sidh_cln16_f2elm_t lx2, oqs_sidh_cln16_f2elm_t lx1, oqs_sidh_cln16_f2elm_t lx0, oqs_sidh_cln16_f2elm_t vx, oqs_sidh_cln16_f2elm_t v0) { // Tripling step for computing the Tate pairing using Miller's algorithm. + // This function computes a point tripling of P and returns the coefficients of the corresponding parabola. + oqs_sidh_cln16_felm_t *X2 = (oqs_sidh_cln16_felm_t *) P->X2, *XZ = (oqs_sidh_cln16_felm_t *) P->XZ, *YZ = (oqs_sidh_cln16_felm_t *) P->YZ, *Z2 = (oqs_sidh_cln16_felm_t *) P->Z2; + oqs_sidh_cln16_f2elm_t AXZ, t0, t1, t2, t3, t4, tlx0, tlx1, tlx2; + + oqs_sidh_cln16_fp2add751(YZ, YZ, ly); //ly: = YZ + YZ + oqs_sidh_cln16_fp2sqr751_mont(ly, tlx2); //lx2: = ly ^ 2 + oqs_sidh_cln16_fp2mul751_mont(ly, tlx2, ly); //ly: = ly*lx2 + oqs_sidh_cln16_fp2mul751_mont(A, XZ, AXZ); //AXZ: = A*XZ + oqs_sidh_cln16_fp2add751(AXZ, Z2, t0); //t0: = AXZ + Z2 + oqs_sidh_cln16_fp2add751(t0, t0, t0); //t0: = t0 + t0 + oqs_sidh_cln16_fp2add751(X2, Z2, t1); //t1: = X2 + Z2 + oqs_sidh_cln16_fp2add751(X2, X2, t2); //t2: = X2 + X2 + oqs_sidh_cln16_fp2sub751(X2, Z2, t3); //t3: = X2 - Z2 + oqs_sidh_cln16_fp2sqr751_mont(t3, t3); //t3: = t3 ^ 2 + oqs_sidh_cln16_fp2add751(t2, t0, t4); //t4: = t2 + t0 + oqs_sidh_cln16_fp2mul751_mont(t2, t4, tlx2); //lx2: = t2*t4 + oqs_sidh_cln16_fp2sub751(tlx2, t3, tlx2); //lx2: = lx2 - t3 + oqs_sidh_cln16_fp2add751(t4, t1, tlx1); //lx1: = t4 + t1 + oqs_sidh_cln16_fp2sqr751_mont(t1, t1); //t1: = t1 ^ 2 + oqs_sidh_cln16_fp2mul751_mont(AXZ, tlx1, tlx1); //lx1: = AXZ*lx1 + oqs_sidh_cln16_fp2add751(t1, tlx1, tlx1); //lx1: = t1 + lx1 + oqs_sidh_cln16_fp2add751(tlx1, tlx1, tlx1); //lx1: = lx1 + lx1 + oqs_sidh_cln16_fp2add751(t3, tlx1, tlx1); //lx1: = t3 + lx1 + oqs_sidh_cln16_fp2mul751_mont(Z2, t0, tlx0); //lx0: = Z2*t0 + oqs_sidh_cln16_fp2sub751(t3, tlx0, tlx0); //lx0: = t3 - lx0 + oqs_sidh_cln16_fp2add751(tlx0, tlx0, tlx0); //lx0: = lx0 + lx0 + oqs_sidh_cln16_fp2sub751(t1, tlx0, tlx0); //lx0: = t1 - lx0 + oqs_sidh_cln16_fp2mul751_mont(Z2, tlx2, lx2); //lx2_: = Z2*lx2 + oqs_sidh_cln16_fp2mul751_mont(XZ, tlx1, lx1); //lx1_: = XZ*lx1 + oqs_sidh_cln16_fp2add751(lx1, lx1, lx1); //lx1_: = lx1_ + lx1_ + oqs_sidh_cln16_fp2mul751_mont(X2, tlx0, lx0); //lx0_: = X2*lx0 + // lx2_, lx1_, lx0_ done + oqs_sidh_cln16_fp2sqr751_mont(tlx2, t3); //t3: = lx2 ^ 2 + oqs_sidh_cln16_fp2mul751_mont(ly, t3, t2); //t2: = ly*t3 + oqs_sidh_cln16_fp2sqr751_mont(tlx0, t4); //t4: = lx0 ^ 2 + oqs_sidh_cln16_fp2sqr751_mont(t4, t0); //t0: = t4 ^ 2 + oqs_sidh_cln16_fp2mul751_mont(X2, t0, t0); //t0: = X2*t0 + oqs_sidh_cln16_fp2mul751_mont(ly, t0, X2); //X2_: = ly*t0 + oqs_sidh_cln16_fp2mul751_mont(XZ, t2, XZ); //XZ_: = XZ*t2 + oqs_sidh_cln16_fp2mul751_mont(XZ, t4, XZ); //XZ_: = XZ_*t4 + oqs_sidh_cln16_fp2mul751_mont(Z2, t2, Z2); //Z2_: = Z2*t2 + oqs_sidh_cln16_fp2mul751_mont(Z2, t3, Z2); //Z2_: = Z2_*t3 + oqs_sidh_cln16_fp2mul751_mont(tlx0, tlx1, t2); //t2: = lx0*lx1 + oqs_sidh_cln16_fp2add751(t2, t2, YZ); //YZ_: = t2 + t2 + oqs_sidh_cln16_fp2add751(YZ, t3, YZ); //YZ_: = YZ_ + t3 + oqs_sidh_cln16_fp2mul751_mont(lx0, tlx2, t2); //t2: = lx0_*lx2 + oqs_sidh_cln16_fp2mul751_mont(t2, YZ, YZ); //YZ_: = t2*YZ_ + oqs_sidh_cln16_fp2add751(t0, YZ, YZ); //YZ_: = t0 + YZ_ + oqs_sidh_cln16_fp2mul751_mont(lx2, YZ, YZ); //YZ_: = lx2_*YZ_ + oqs_sidh_cln16_fp2neg751(YZ); //YZ_: = -YZ_ + // X2_,XZ_,Z2_,YZ_ done + oqs_sidh_cln16_fp2copy751(Z2, vx); //vx: = Z2_ + oqs_sidh_cln16_fp2copy751(XZ, v0); //v0: = -XZ_ + oqs_sidh_cln16_fp2neg751(v0); + // vx,v0 done +} + +static void absorb_parab(const oqs_sidh_cln16_f2elm_t ly, const oqs_sidh_cln16_f2elm_t lx2, const oqs_sidh_cln16_f2elm_t lx1, const oqs_sidh_cln16_f2elm_t lx0, const oqs_sidh_cln16_f2elm_t vx, const oqs_sidh_cln16_f2elm_t v0, const oqs_sidh_cln16_point_t P, oqs_sidh_cln16_f2elm_t n, oqs_sidh_cln16_f2elm_t d) { // Absorbing parabola function values in Miller's algorithm. + // Evaluate the parabola at P and absorb the values into the running pairing value n/d, keeping numerator n and denominator d separate. + oqs_sidh_cln16_felm_t *x = (oqs_sidh_cln16_felm_t *) P->x, *y = (oqs_sidh_cln16_felm_t *) P->y; + oqs_sidh_cln16_f2elm_t ln, ld; + + oqs_sidh_cln16_fp2mul751_mont(lx0, x, ln); // ln = lx0*x + oqs_sidh_cln16_fp2mul751_mont(v0, x, ld); // ld = v0*x + oqs_sidh_cln16_fp2add751(vx, ld, ld); // ld = vx + ld + oqs_sidh_cln16_fp2mul751_mont(ld, ln, ld); // ld = ld*ln + oqs_sidh_cln16_fp2mul751_mont(d, ld, d); // d = d*ld + + oqs_sidh_cln16_fp2add751(lx1, ln, ln); // ln = lx1 + ln + oqs_sidh_cln16_fp2mul751_mont(x, ln, ln); // ln = x*ln + oqs_sidh_cln16_fp2mul751_mont(ly, y, ld); // t = ly*y + oqs_sidh_cln16_fp2add751(lx2, ln, ln); // ln = lx2 + ln + oqs_sidh_cln16_fp2add751(ld, ln, ln); // ln = t + ln + oqs_sidh_cln16_fp2mul751_mont(ln, v0, ln); // ln = ln*v0 + oqs_sidh_cln16_fp2mul751_mont(n, ln, n); // n = n*ln +} + +static void cube_and_absorb_parab(const oqs_sidh_cln16_f2elm_t ly, const oqs_sidh_cln16_f2elm_t lx2, const oqs_sidh_cln16_f2elm_t lx1, const oqs_sidh_cln16_f2elm_t lx0, const oqs_sidh_cln16_f2elm_t vx, const oqs_sidh_cln16_f2elm_t v0, const oqs_sidh_cln16_point_t P, oqs_sidh_cln16_f2elm_t n, oqs_sidh_cln16_f2elm_t d) { // Cube the running pairing value in Miller's algorithm and absorb parabola function values of the current Miller step. + oqs_sidh_cln16_f2elm_t ln, ld; + + oqs_sidh_cln16_fp2sqr751_mont(n, ln); // ln = n ^ 2 + oqs_sidh_cln16_fp2mul751_mont(n, ln, n); // n = n*ln + oqs_sidh_cln16_fp2sqr751_mont(d, ld); // ld = d ^ 2 + oqs_sidh_cln16_fp2mul751_mont(d, ld, d); // d = d*ld + absorb_parab(ly, lx2, lx1, lx0, vx, v0, P, n, d); +} + +static void final_tpl(oqs_sidh_cln16_point_ext_proj_t P, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t lam, oqs_sidh_cln16_f2elm_t mu, oqs_sidh_cln16_f2elm_t D) { // Special iteration for the final tripling step in Miller's algorithm. This is necessary since the tripling + // at the end of the Miller loop is an exceptional case (tripling a point of order 3). Uses lines instead of + // parabolas. + oqs_sidh_cln16_felm_t *X2 = (oqs_sidh_cln16_felm_t *) P->X2, *XZ = (oqs_sidh_cln16_felm_t *) P->XZ, *YZ = (oqs_sidh_cln16_felm_t *) P->YZ, *Z2 = (oqs_sidh_cln16_felm_t *) P->Z2; + oqs_sidh_cln16_f2elm_t X, Y, Z, Y2, tX2, AX2, tXZ, tAXZ; + + oqs_sidh_cln16_fp2copy751(XZ, X); + oqs_sidh_cln16_fp2copy751(YZ, Y); + oqs_sidh_cln16_fp2copy751(Z2, Z); + + oqs_sidh_cln16_fp2sqr751_mont(X, X2); // X2 = X ^ 2 + oqs_sidh_cln16_fp2add751(X2, X2, tX2); // tX2 = X2 + X2 + oqs_sidh_cln16_fp2mul751_mont(A, X2, AX2); // AX2 = A*X2 + oqs_sidh_cln16_fp2mul751_mont(X, Z, XZ); // XZ = X*Z + oqs_sidh_cln16_fp2sqr751_mont(Y, Y2); // Y2 = Y ^ 2 + oqs_sidh_cln16_fp2add751(XZ, XZ, tXZ); // tXZ = XZ + XZ + oqs_sidh_cln16_fp2mul751_mont(A, tXZ, tAXZ); // tAXZ = A*tXZ + oqs_sidh_cln16_fp2sqr751_mont(Z, Z2); // Z2 = Z ^ 2 + oqs_sidh_cln16_fp2mul751_mont(Y, Z, YZ); // YZ = Y*Z + + oqs_sidh_cln16_fp2add751(X2, Z2, lam); // lambda = X2 + Z2 + oqs_sidh_cln16_fp2add751(lam, tX2, lam); // lambda = lambda + tX2 + oqs_sidh_cln16_fp2add751(lam, tAXZ, lam); // lambda = lambda + tAXZ + oqs_sidh_cln16_fp2sub751(tXZ, Y2, mu); // mu = tXZ - Y2 + oqs_sidh_cln16_fp2add751(mu, AX2, mu); // mu = mu + AX2 + oqs_sidh_cln16_fp2add751(YZ, YZ, D); // D = YZ + YZ +} + +static void final_tpl_iteration(const oqs_sidh_cln16_f2elm_t x, const oqs_sidh_cln16_f2elm_t y, const oqs_sidh_cln16_f2elm_t lam, const oqs_sidh_cln16_f2elm_t mu, const oqs_sidh_cln16_f2elm_t D, oqs_sidh_cln16_f2elm_t n, oqs_sidh_cln16_f2elm_t d) { // Special iteration for the final tripling step in Miller's algorithm. This is necessary since the tripling + // at the end of the Miller loop is an exceptional case (tripling a point of order 3). + // Cubes the running pairing value n/d and absorbs the line function values. + oqs_sidh_cln16_f2elm_t ln, ld, t; + + oqs_sidh_cln16_fp2sqr751_mont(n, ln); // ln = n ^ 2 + oqs_sidh_cln16_fp2mul751_mont(n, ln, n); // n = n*ln + oqs_sidh_cln16_fp2sqr751_mont(d, ld); // ld = d ^ 2 + oqs_sidh_cln16_fp2mul751_mont(d, ld, d); // d = d*ld + oqs_sidh_cln16_fp2sqr751_mont(x, ld); // ld = x ^ 2 + oqs_sidh_cln16_fp2mul751_mont(mu, ld, ld); // ld = mu*ld + oqs_sidh_cln16_fp2mul751_mont(lam, x, t); // t = lambda*x + oqs_sidh_cln16_fp2add751(t, ld, ln); // ln = t + ld + oqs_sidh_cln16_fp2mul751_mont(D, y, t); // t = D*y + oqs_sidh_cln16_fp2add751(t, ln, ln); // ln = t + ln + oqs_sidh_cln16_fp2mul751_mont(n, ln, n); // n = n*ln + oqs_sidh_cln16_fp2mul751_mont(d, ld, d); // d = d*ld +} + +static void final_exponentiation_3_torsion(oqs_sidh_cln16_f2elm_t n, oqs_sidh_cln16_f2elm_t d, const oqs_sidh_cln16_f2elm_t n_inv, const oqs_sidh_cln16_f2elm_t d_inv, oqs_sidh_cln16_f2elm_t nout, PCurveIsogenyStruct CurveIsogeny) { // The final exponentiation for pairings in the 3-torsion group. Raising the value n/d to the power (p^2-1)/3^eB. + oqs_sidh_cln16_felm_t one = {0}; + unsigned int i; + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one); + oqs_sidh_cln16_fp2mul751_mont(n, d_inv, n); // n = n*d_inv + // n = n^p. Just call conjugation function + oqs_sidh_cln16_inv_Fp2_cycl(n); + oqs_sidh_cln16_fp2mul751_mont(d, n_inv, d); // d = d*n_inv + oqs_sidh_cln16_fp2mul751_mont(n, d, n); // n = n*d + + for (i = 0; i < 372; i++) { + oqs_sidh_cln16_sqr_Fp2_cycl(n, one); + } + oqs_sidh_cln16_fp2copy751(n, nout); +} + +void oqs_sidh_cln16_Tate_pairings_3_torsion(const oqs_sidh_cln16_point_t R1, const oqs_sidh_cln16_point_t R2, const oqs_sidh_cln16_point_t P, const oqs_sidh_cln16_point_t Q, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t *n, PCurveIsogenyStruct CurveIsogeny) { // The tripling only 3-torsion Tate pairing of order 3^eB, consisting of the tripling only Miller loop and the final exponentiation. + // Computes 5 pairings at once: e(R1, R2), e(R1, P), e(R1, Q), e(R2, P), e(R2,Q). + oqs_sidh_cln16_point_ext_proj_t P1 = {0}, P2 = {0}; + oqs_sidh_cln16_f2elm_t ly, lx2, lx1, lx0, vx, v0, lam, mu, d; + oqs_sidh_cln16_f2elm_t invs[10], nd[10] = {0}; + oqs_sidh_cln16_felm_t one = {0}; + unsigned int i; + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one); + oqs_sidh_cln16_fp2copy751(R1->x, P1->XZ); + oqs_sidh_cln16_fp2sqr751_mont(P1->XZ, P1->X2); + oqs_sidh_cln16_fp2copy751(R1->y, P1->YZ); + oqs_sidh_cln16_fpcopy751(one, P1->Z2[0]); // P1 = (x1^2,x1,1,y1) + oqs_sidh_cln16_fp2copy751(R2->x, P2->XZ); + oqs_sidh_cln16_fp2sqr751_mont(P2->XZ, P2->X2); + oqs_sidh_cln16_fp2copy751(R2->y, P2->YZ); + oqs_sidh_cln16_fpcopy751(one, P2->Z2[0]); // P2 = (x2^2,x2,1,y2) + + for (i = 0; i < 10; i++) { // nd[i] = 1 + oqs_sidh_cln16_fpcopy751(one, nd[i][0]); + } + + for (i = 239; i >= 2; i--) { + tpl_and_parabola(P1, A, ly, lx2, lx1, lx0, vx, v0); + cube_and_absorb_parab(ly, lx2, lx1, lx0, vx, v0, R2, nd[0], nd[5]); + cube_and_absorb_parab(ly, lx2, lx1, lx0, vx, v0, P, nd[1], nd[6]); + cube_and_absorb_parab(ly, lx2, lx1, lx0, vx, v0, Q, nd[2], nd[7]); + tpl_and_parabola(P2, A, ly, lx2, lx1, lx0, vx, v0); + cube_and_absorb_parab(ly, lx2, lx1, lx0, vx, v0, P, nd[3], nd[8]); + cube_and_absorb_parab(ly, lx2, lx1, lx0, vx, v0, Q, nd[4], nd[9]); + } + + final_tpl(P1, A, lam, mu, d); + final_tpl_iteration(R2->x, R2->y, lam, mu, d, nd[0], nd[5]); + final_tpl_iteration(P->x, P->y, lam, mu, d, nd[1], nd[6]); + final_tpl_iteration(Q->x, Q->y, lam, mu, d, nd[2], nd[7]); + final_tpl(P2, A, lam, mu, d); + final_tpl_iteration(P->x, P->y, lam, mu, d, nd[3], nd[8]); + final_tpl_iteration(Q->x, Q->y, lam, mu, d, nd[4], nd[9]); + + oqs_sidh_cln16_mont_n_way_inv(nd, 10, invs); + final_exponentiation_3_torsion(nd[0], nd[5], invs[0], invs[5], n[0], CurveIsogeny); + final_exponentiation_3_torsion(nd[1], nd[6], invs[1], invs[6], n[1], CurveIsogeny); + final_exponentiation_3_torsion(nd[2], nd[7], invs[2], invs[7], n[2], CurveIsogeny); + final_exponentiation_3_torsion(nd[3], nd[8], invs[3], invs[8], n[3], CurveIsogeny); + final_exponentiation_3_torsion(nd[4], nd[9], invs[4], invs[9], n[4], CurveIsogeny); +} + +void oqs_sidh_cln16_phn1(const oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const uint64_t a, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_i) { // Pohlig-Hellman for groups of 2-power order up to 2^6 + // This function solves the DLP in a subgroup of Fp2* of order 2^a, where a <= 6. + // The DL is returned in alpha which only needs a bits to store the result. + oqs_sidh_cln16_f2elm_t u, v, tmp; + oqs_sidh_cln16_felm_t zero = {0}; + uint64_t l, h; + + oqs_sidh_cln16_fp2copy751(q, u); // u = q + *alpha_i = 0; + for (l = 0; l < a - 1; l++) { + oqs_sidh_cln16_fp2copy751(u, v); // v = u + for (h = 1; h < (a - l); h++) { + oqs_sidh_cln16_sqr_Fp2_cycl(v, one); + } + oqs_sidh_cln16_fp2correction751(v); + if (oqs_sidh_cln16_fpequal751_non_constant_time(v[0], one) == false || oqs_sidh_cln16_fpequal751_non_constant_time(v[1], zero) == false) { + *alpha_i += ((uint64_t) 1 << l); + oqs_sidh_cln16_fp2copy751(LUT[6 - a + l], tmp); // tmp = LUT[6-a+l]; + oqs_sidh_cln16_fp2mul751_mont(u, tmp, u); + } + } + oqs_sidh_cln16_fp2correction751(u); + if (oqs_sidh_cln16_fpequal751_non_constant_time(u[0], one) == false || oqs_sidh_cln16_fpequal751_non_constant_time(u[1], zero) == false) { + *alpha_i += ((uint64_t) 1 << (a - 1)); + } +} + +void oqs_sidh_cln16_phn5(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k) { // Pohlig-Hellman for groups of 2-power order 2^21 + oqs_sidh_cln16_f2elm_t u, v, tmp; + oqs_sidh_cln16_felm_t zero = {0}; + uint64_t alpha_i; + uint64_t i, j; + + *alpha_k = 0; + oqs_sidh_cln16_fp2copy751(q, u); + for (i = 0; i < 4; i++) { + oqs_sidh_cln16_fp2copy751(u, v); + oqs_sidh_cln16_sqr_Fp2_cycl(v, one); + for (j = 0; j < (5 * (3 - i)); j++) { + oqs_sidh_cln16_sqr_Fp2_cycl(v, one); + } + oqs_sidh_cln16_phn1(v, LUT, 5, one, &alpha_i); // u order 2^5 + *alpha_k += (alpha_i << (5 * i)); + oqs_sidh_cln16_exp6_Fp2_cycl(LUT_1[i], alpha_i, one, tmp); + oqs_sidh_cln16_fp2mul751_mont(u, tmp, u); + } + oqs_sidh_cln16_fp2correction751(u); + // Do the last part + if (oqs_sidh_cln16_fpequal751_non_constant_time(u[0], one) == false || oqs_sidh_cln16_fpequal751_non_constant_time(u[1], zero) == false) { // q order 2 + *alpha_k += ((uint64_t) 1 << 20); + } +} + +void oqs_sidh_cln16_phn21(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k) { // Pohlig-Hellman for groups of 2-power order 2^84 + oqs_sidh_cln16_f2elm_t u, v, tmp; + uint64_t alpha_i; + uint64_t i, j; + + alpha_k[0] = 0; + alpha_k[1] = 0; + oqs_sidh_cln16_fp2copy751(q, u); + for (i = 0; i < 3; i++) { + oqs_sidh_cln16_fp2copy751(u, v); + for (j = 0; j < 21 * (3 - i); j++) { + oqs_sidh_cln16_sqr_Fp2_cycl(v, one); + } + oqs_sidh_cln16_phn5(v, LUT, LUT_1, one, &alpha_i); // u order 2^21 + alpha_k[0] += (alpha_i << (21 * i)); + oqs_sidh_cln16_exp21_Fp2_cycl(LUT_0[i], alpha_i, one, tmp); + oqs_sidh_cln16_fp2mul751_mont(u, tmp, u); + } + oqs_sidh_cln16_phn5(u, LUT, LUT_1, one, &alpha_i); // u order 2^21 + alpha_k[0] += (alpha_i << 63); + alpha_k[1] = (alpha_i >> 1); +} + +void oqs_sidh_cln16_phn84(oqs_sidh_cln16_f2elm_t r, const oqs_sidh_cln16_f2elm_t *t_ori, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_f2elm_t *LUT_3, const oqs_sidh_cln16_felm_t one, uint64_t *alpha) { // Pohlig-Hellman for groups of 2-power order 2^372 + oqs_sidh_cln16_f2elm_t u, q, t, tmp; + uint64_t alpha_k[2], alpha_i, mask; + uint64_t i, j, k; + + for (i = 0; i < SIDH_NWORDS64_ORDER; i++) + alpha[i] = 0; + oqs_sidh_cln16_fp2copy751(r, t); + for (k = 0; k < 4; k++) { + oqs_sidh_cln16_fp2copy751(t, q); + for (j = 0; j < 36; j++) { + oqs_sidh_cln16_sqr_Fp2_cycl(q, one); + } + for (j = 0; j < 84 * (3 - k); j++) { + oqs_sidh_cln16_sqr_Fp2_cycl(q, one); + } + oqs_sidh_cln16_phn21(q, LUT, LUT_0, LUT_1, one, alpha_k); // q order 2^84 + alpha[k] += (alpha_k[0] << (k * 20)); + mask = ((uint64_t) 1 << (k * 20)) - 1; + alpha[k + 1] += ((alpha_k[0] >> (64 - k * 20)) & mask); + alpha[k + 1] += (alpha_k[1] << (k * 20)); + oqs_sidh_cln16_exp84_Fp2_cycl(t_ori[k], alpha_k, one, tmp); + oqs_sidh_cln16_fp2mul751_mont(t, tmp, t); + } + alpha[5] = (alpha_k[1] >> 4); + // Do the last part + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fp2copy751(t, u); + for (j = 0; j < 6 * (5 - i); j++) { + oqs_sidh_cln16_sqr_Fp2_cycl(u, one); + } + oqs_sidh_cln16_phn1(u, LUT, 6, one, &alpha_i); // u order 2^6 + alpha[5] += (alpha_i << (16 + 6 * i)); + oqs_sidh_cln16_exp6_Fp2_cycl(LUT_3[i], alpha_i, one, tmp); + oqs_sidh_cln16_fp2mul751_mont(t, tmp, t); + } +} + +void oqs_sidh_cln16_build_LUTs(const oqs_sidh_cln16_f2elm_t g, oqs_sidh_cln16_f2elm_t *t_ori, oqs_sidh_cln16_f2elm_t *LUT, oqs_sidh_cln16_f2elm_t *LUT_0, oqs_sidh_cln16_f2elm_t *LUT_1, oqs_sidh_cln16_f2elm_t *LUT_3, const oqs_sidh_cln16_felm_t one) { // Lookup table generation for 2-torsion PH in a group of order 2^372 + oqs_sidh_cln16_f2elm_t tmp; + unsigned int i, j; + + oqs_sidh_cln16_fp2copy751(g, tmp); // tmp = g + oqs_sidh_cln16_inv_Fp2_cycl(tmp); + oqs_sidh_cln16_fp2copy751(tmp, t_ori[0]); // t_ori[0] = g^(-1), order 2^372 + for (i = 0; i < 3; i++) { + for (j = 0; j < 84; j++) + oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, t_ori[i + 1]); // order 2^288 & 2^204 & 2^120 + } + for (i = 0; i < 36; i++) + oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, t_ori[4]); // t_ori[4], order 2^84 + // t_ori done. + oqs_sidh_cln16_fp2copy751(tmp, LUT_0[0]); // LUT_0[0] = t_ori[4], order 2^84 + for (i = 0; i < 2; i++) { + for (j = 0; j < 21; j++) + oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, LUT_0[i + 1]); // order 2^63 & 2^42 + } + for (j = 0; j < 6; j++) + oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, LUT_3[0]); // LUT_3[0] = tmp, order 2^36 + for (j = 0; j < 6; j++) + oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, LUT_3[1]); // LUT_3[1] = tmp, order 2^30 + for (j = 0; j < 6; j++) + oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, LUT_3[2]); // LUT_3[2] = tmp, order 2^24 + for (j = 0; j < 3; j++) + oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, LUT_0[3]); // LUT_0[3] = tmp, order 2^21 + // LUT_0 done. + oqs_sidh_cln16_fp2copy751(tmp, LUT_1[0]); // LUT_1[0] = LUT_0[3], order 2^21 + for (i = 0; i < 3; i++) + oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, LUT_3[3]); // LUT_3[3] = tmp, order 2^18 + for (i = 0; i < 2; i++) + oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, LUT_1[1]); // LUT_1[1] = tmp, order 2^16 + for (i = 0; i < 4; i++) + oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, LUT_3[4]); // LUT_3[4] = tmp, order 2^12 + oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, LUT_1[2]); // LUT_1[2] = tmp, order 2^11 + for (i = 0; i < 5; i++) + oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, LUT_1[3]); // LUT_1[3] = tmp, order 2^16 & 2^11 & 2^6 + oqs_sidh_cln16_fp2copy751(tmp, LUT_3[5]); // LUT_3[5] = tmp + // LUT_1, LUT_3 done + oqs_sidh_cln16_fp2copy751(tmp, LUT[0]); // LUT = LUT_3[5] + for (i = 0; i < 4; i++) { + oqs_sidh_cln16_fp2copy751(LUT[i], LUT[i + 1]); + oqs_sidh_cln16_sqr_Fp2_cycl(LUT[i + 1], one); // order 2^5 -- 2^1 + } +} + +void oqs_sidh_cln16_ph2(const oqs_sidh_cln16_point_t phiP, const oqs_sidh_cln16_point_t phiQ, const oqs_sidh_cln16_point_t PS, const oqs_sidh_cln16_point_t QS, const oqs_sidh_cln16_f2elm_t A, uint64_t *a0, uint64_t *b0, uint64_t *a1, uint64_t *b1, PCurveIsogenyStruct CurveIsogeny) { // Pohlig-Hellman function. + // This function computes the five pairings e(QS, PS), e(QS, phiP), e(QS, phiQ), e(PS, phiP), e(PS,phiQ), + // computes the lookup tables for the Pohlig-Hellman functions, + // and then computes the discrete logarithms of the last four pairing values to the base of the first pairing value. + oqs_sidh_cln16_f2elm_t t_ori[5], n[5], LUT[5], LUT_0[4], LUT_1[4], LUT_3[6]; + oqs_sidh_cln16_felm_t one = {0}; + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one); + + // Compute the pairings. + oqs_sidh_cln16_Tate_pairings_2_torsion(QS, PS, phiP, phiQ, A, n, CurveIsogeny); + + // Build the lookup tables from element n[0] of order 2^372. + oqs_sidh_cln16_build_LUTs(n[0], t_ori, LUT, LUT_0, LUT_1, LUT_3, one); + + // Finish computation + oqs_sidh_cln16_phn84(n[1], t_ori, LUT, LUT_0, LUT_1, LUT_3, one, a0); + oqs_sidh_cln16_phn84(n[3], t_ori, LUT, LUT_0, LUT_1, LUT_3, one, b0); + oqs_sidh_cln16_mp_sub(CurveIsogeny->Aorder, (digit_t *) b0, (digit_t *) b0, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_phn84(n[2], t_ori, LUT, LUT_0, LUT_1, LUT_3, one, a1); + oqs_sidh_cln16_phn84(n[4], t_ori, LUT, LUT_0, LUT_1, LUT_3, one, b1); + oqs_sidh_cln16_mp_sub(CurveIsogeny->Aorder, (digit_t *) b1, (digit_t *) b1, SIDH_NWORDS_ORDER); +} + +static void recover_os(const oqs_sidh_cln16_f2elm_t X1, const oqs_sidh_cln16_f2elm_t Z1, const oqs_sidh_cln16_f2elm_t X2, const oqs_sidh_cln16_f2elm_t Z2, const oqs_sidh_cln16_f2elm_t x, const oqs_sidh_cln16_f2elm_t y, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t X3, oqs_sidh_cln16_f2elm_t Y3, oqs_sidh_cln16_f2elm_t Z3) { + oqs_sidh_cln16_f2elm_t t0, t1, t2, t3; + + //X3 := 2*y*Z1*Z2*X1; + //Y3 := Z2*((X1+x*Z1+2*A*Z1)*(X1*x+Z1)-2*A*Z1^2)-(X1-x*Z1)^2*X2; + //Z3 := 2*y*Z1*Z2*Z1; + + oqs_sidh_cln16_fp2add751(y, y, t0); + oqs_sidh_cln16_fp2mul751_mont(t0, Z1, t0); + oqs_sidh_cln16_fp2mul751_mont(t0, Z2, t0); // t0 = 2*y*Z1*Z2 + oqs_sidh_cln16_fp2mul751_mont(t0, Z1, Z3); // Z3 = 2*y*Z1*Z2*Z1 + oqs_sidh_cln16_fp2mul751_mont(t0, X1, X3); // X3 = 2*y*Z1*Z2*X1 + oqs_sidh_cln16_fp2add751(A, A, t0); + oqs_sidh_cln16_fp2mul751_mont(t0, Z1, t0); // t0 = 2*A*Z1 + oqs_sidh_cln16_fp2mul751_mont(x, Z1, t1); // t1 = x*Z1 + oqs_sidh_cln16_fp2add751(X1, t1, t2); // t2 = X1+x*Z1 + oqs_sidh_cln16_fp2sub751(X1, t1, t1); // t1 = X1-x*Z1 + oqs_sidh_cln16_fp2add751(t0, t2, t3); // t3 = X1+x*Z1+2*A*Z1 + oqs_sidh_cln16_fp2mul751_mont(t0, Z1, t0); // t0 = 2*A*Z1^2 + oqs_sidh_cln16_fp2sqr751_mont(t1, t1); // t1 = (X1-x*Z1)^2 + oqs_sidh_cln16_fp2mul751_mont(x, X1, t2); // t2 = x*X1 + oqs_sidh_cln16_fp2add751(t2, Z1, t2); // t2 = X1*x+Z1 + oqs_sidh_cln16_fp2mul751_mont(t2, t3, t2); // t2 = (X1+x*Z1+2*A*Z1)*(X1*x+Z1) + oqs_sidh_cln16_fp2sub751(t2, t0, t0); // t0 = (X1+x*Z1+2*A*Z1)*(X1*x+Z1)-2*A*Z1^2 + oqs_sidh_cln16_fp2mul751_mont(t1, X2, t1); // t1 = (X1-x*Z1)^2*X2 + oqs_sidh_cln16_fp2mul751_mont(t0, Z2, t0); // t0 = Z2*[(X1+x*Z1+2*A*Z1)*(X1*x+Z1)-2*A*Z1^2] + oqs_sidh_cln16_fp2sub751(t0, t1, Y3); // Y3 = Z2*[(X1+x*Z1+2*A*Z1)*(X1*x+Z1)-2*A*Z1^2] - (X1-x*Z1)^2*X2 +} + +void oqs_sidh_cln16_recover_y(const oqs_sidh_cln16_publickey_t PK, oqs_sidh_cln16_point_full_proj_t phiP, oqs_sidh_cln16_point_full_proj_t phiQ, oqs_sidh_cln16_point_full_proj_t phiX, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny) { // Recover the y-coordinates of the public key + // The three resulting points are (simultaneously) correct up to sign + oqs_sidh_cln16_f2elm_t tmp, phiXY, one = {0}; + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]); + oqs_sidh_cln16_get_A(PK[0], PK[1], PK[2], A, CurveIsogeny); // NOTE: don't have to compress this, can output in keygen + + oqs_sidh_cln16_fp2add751(PK[2], A, tmp); + oqs_sidh_cln16_fp2mul751_mont(PK[2], tmp, tmp); + oqs_sidh_cln16_fp2add751(tmp, one, tmp); + oqs_sidh_cln16_fp2mul751_mont(PK[2], tmp, tmp); // tmp = PK[2]^3+A*PK[2]^2+PK[2]; + oqs_sidh_cln16_sqrt_Fp2(tmp, phiXY); + oqs_sidh_cln16_fp2copy751(PK[2], phiX->X); + oqs_sidh_cln16_fp2copy751(phiXY, phiX->Y); + oqs_sidh_cln16_fp2copy751(one, phiX->Z); // phiX = [PK[2],phiXY,1]; + + recover_os(PK[1], one, PK[0], one, PK[2], phiXY, A, phiQ->X, phiQ->Y, phiQ->Z); + oqs_sidh_cln16_fp2neg751(phiXY); + recover_os(PK[0], one, PK[1], one, PK[2], phiXY, A, phiP->X, phiP->Y, phiP->Z); +} + +void oqs_sidh_cln16_compress_2_torsion(const unsigned char *PublicKeyA, unsigned char *CompressedPKA, uint64_t *a0, uint64_t *b0, uint64_t *a1, uint64_t *b1, oqs_sidh_cln16_point_t R1, oqs_sidh_cln16_point_t R2, PCurveIsogenyStruct CurveIsogeny) { // 2-torsion compression + oqs_sidh_cln16_point_full_proj_t P, Q, phP, phQ, phX; + oqs_sidh_cln16_point_t phiP, phiQ; + oqs_sidh_cln16_publickey_t PK; + digit_t *comp = (digit_t *) CompressedPKA; + digit_t inv[SIDH_NWORDS_ORDER]; + oqs_sidh_cln16_f2elm_t A, vec[4], Zinv[4]; + digit_t tmp[2 * SIDH_NWORDS_ORDER]; + + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyA)[0], ((oqs_sidh_cln16_f2elm_t *) &PK)[0]); // Converting to Montgomery representation + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyA)[1], ((oqs_sidh_cln16_f2elm_t *) &PK)[1]); + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyA)[2], ((oqs_sidh_cln16_f2elm_t *) &PK)[2]); + + oqs_sidh_cln16_recover_y(PK, phP, phQ, phX, A, CurveIsogeny); + oqs_sidh_cln16_generate_2_torsion_basis(A, P, Q, CurveIsogeny); + oqs_sidh_cln16_fp2copy751(P->Z, vec[0]); + oqs_sidh_cln16_fp2copy751(Q->Z, vec[1]); + oqs_sidh_cln16_fp2copy751(phP->Z, vec[2]); + oqs_sidh_cln16_fp2copy751(phQ->Z, vec[3]); + oqs_sidh_cln16_mont_n_way_inv(vec, 4, Zinv); + + oqs_sidh_cln16_fp2mul751_mont(P->X, Zinv[0], R1->x); + oqs_sidh_cln16_fp2mul751_mont(P->Y, Zinv[0], R1->y); + oqs_sidh_cln16_fp2mul751_mont(Q->X, Zinv[1], R2->x); + oqs_sidh_cln16_fp2mul751_mont(Q->Y, Zinv[1], R2->y); + oqs_sidh_cln16_fp2mul751_mont(phP->X, Zinv[2], phiP->x); + oqs_sidh_cln16_fp2mul751_mont(phP->Y, Zinv[2], phiP->y); + oqs_sidh_cln16_fp2mul751_mont(phQ->X, Zinv[3], phiQ->x); + oqs_sidh_cln16_fp2mul751_mont(phQ->Y, Zinv[3], phiQ->y); + + oqs_sidh_cln16_ph2(phiP, phiQ, R1, R2, A, a0, b0, a1, b1, CurveIsogeny); + + if ((a0[0] & 1) == 1) { // Storing [b1*a0inv, a1*a0inv, b0*a0inv] and setting bit384 to 0 + oqs_sidh_cln16_inv_mod_orderA((digit_t *) a0, inv); + oqs_sidh_cln16_multiply((digit_t *) b0, inv, tmp, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_copy_words(tmp, &comp[0], SIDH_NWORDS_ORDER); + comp[SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 12; // Hardcoded value + oqs_sidh_cln16_multiply((digit_t *) a1, inv, tmp, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_copy_words(tmp, &comp[SIDH_NWORDS_ORDER], SIDH_NWORDS_ORDER); + comp[2 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 12; + oqs_sidh_cln16_multiply((digit_t *) b1, inv, tmp, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_copy_words(tmp, &comp[2 * SIDH_NWORDS_ORDER], SIDH_NWORDS_ORDER); + comp[3 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 12; + } else { // Storing [b1*b0inv, a1*b0inv, a0*b0inv] and setting bit384 to 1 + oqs_sidh_cln16_inv_mod_orderA((digit_t *) b0, inv); + oqs_sidh_cln16_multiply((digit_t *) a0, inv, tmp, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_copy_words(tmp, &comp[0], SIDH_NWORDS_ORDER); + comp[SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 12; // Hardcoded value + oqs_sidh_cln16_multiply((digit_t *) a1, inv, tmp, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_copy_words(tmp, &comp[SIDH_NWORDS_ORDER], SIDH_NWORDS_ORDER); + comp[2 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 12; + oqs_sidh_cln16_multiply((digit_t *) b1, inv, tmp, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_copy_words(tmp, &comp[2 * SIDH_NWORDS_ORDER], SIDH_NWORDS_ORDER); + comp[3 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 12; + comp[3 * SIDH_NWORDS_ORDER - 1] |= (digit_t) 1 << (sizeof(digit_t) * 8 - 1); + } + + oqs_sidh_cln16_from_fp2mont(A, (oqs_sidh_cln16_felm_t *) &comp[3 * SIDH_NWORDS_ORDER]); // Converting back from Montgomery representation +} + +void oqs_sidh_cln16_phn1_3(const oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const uint64_t a, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_i) { + oqs_sidh_cln16_f2elm_t u, v, tmp; + oqs_sidh_cln16_felm_t zero = {0}; + uint64_t l, h; + // Hardcoded powers of 3, 3^0 = 1, 3^1 = 3, 3^2 = 9 + uint64_t pow3[3] = {0x0000000000000001, 0x0000000000000003, 0x0000000000000009}; + + oqs_sidh_cln16_fp2copy751(q, u); // u = q + *alpha_i = 0; + for (l = 0; l < a - 1; l++) { + oqs_sidh_cln16_fp2copy751(u, v); // v = u + for (h = 1; h < (a - l); h++) { + oqs_sidh_cln16_cube_Fp2_cycl(v, one); + } + oqs_sidh_cln16_fp2correction751(v); + if (oqs_sidh_cln16_fpequal751_non_constant_time(v[0], LUT[3][0]) == true && oqs_sidh_cln16_fpequal751_non_constant_time(v[1], LUT[3][1]) == true) { + *alpha_i += pow3[l]; + oqs_sidh_cln16_fp2copy751(LUT[3 - a + l], tmp); // tmp = LUT[3-a+l]; + oqs_sidh_cln16_fp2mul751_mont(u, tmp, u); + } else if (oqs_sidh_cln16_fpequal751_non_constant_time(v[0], one) == false || oqs_sidh_cln16_fpequal751_non_constant_time(v[1], zero) == false) { + *alpha_i += pow3[l] << 1; + oqs_sidh_cln16_fp2copy751(LUT[3 - a + l], tmp); // tmp = LUT[3-a+l]; + oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2mul751_mont(u, tmp, u); + } + } + oqs_sidh_cln16_fp2correction751(u); + if (oqs_sidh_cln16_fpequal751_non_constant_time(u[0], LUT[3][0]) == true && oqs_sidh_cln16_fpequal751_non_constant_time(u[1], LUT[3][1]) == true) { + *alpha_i += pow3[a - 1]; + } else if (oqs_sidh_cln16_fpequal751_non_constant_time(u[0], one) == false || oqs_sidh_cln16_fpequal751_non_constant_time(u[1], zero) == false) { + *alpha_i += pow3[a - 1] << 1; + } +} + +void oqs_sidh_cln16_phn3(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k) { + oqs_sidh_cln16_f2elm_t u, v, tmp; + uint64_t alpha_i; + uint64_t i, j; + // Powers of 3: 3^0 = 1, 3^3 = 27, 3^6 = 729, 3^9, 3^12 + uint64_t pow3[5] = {0x0000000000000001, 0x000000000000001B, + 0x00000000000002D9, 0x0000000000004CE3, + 0x0000000000081BF1}; + + *alpha_k = 0; + oqs_sidh_cln16_fp2copy751(q, u); + for (i = 0; i < 4; i++) { + oqs_sidh_cln16_fp2copy751(u, v); + for (j = 0; j < 3 * (4 - i); j++) { + oqs_sidh_cln16_cube_Fp2_cycl(v, one); + } + oqs_sidh_cln16_phn1_3(v, LUT, 3, one, &alpha_i); // u order 3^3 + *alpha_k += alpha_i * pow3[i]; + oqs_sidh_cln16_exp6_Fp2_cycl(LUT_1[i], alpha_i, one, tmp); + oqs_sidh_cln16_fp2mul751_mont(u, tmp, u); + } + oqs_sidh_cln16_phn1_3(u, LUT, 3, one, &alpha_i); // u order 3^3 + *alpha_k += alpha_i * pow3[4]; +} + +void oqs_sidh_cln16_phn15_1(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k) { + oqs_sidh_cln16_f2elm_t u, v, tmp; + uint64_t alpha_i, alpha_n[2], alpha_tmp[4]; // alpha_tmp[4] is overkill, only taking 4 since it is the result of a mp_mul with 2-word inputs. + uint64_t i, j; + // Powers of 3: 3^0 = 1, 3^15, 3^30 + uint64_t pow3_15[3] = {0x0000000000000001, 0x0000000000DAF26B, 0x0000BB41C3CA78B9}; + // Powers of 3: 3^0 = 1, 3^3 = 27, 3^6 + uint64_t pow3_3[4] = {0x0000000000000001, 0x000000000000001B, 0x00000000000002D9, 0x0000000000004CE3}; + // Powers of 3: 3^45 split up into two words. + uint64_t pow3_45[2] = {0x275329FD09495753, 0x00000000000000A0}; + + alpha_k[0] = 0; + alpha_k[1] = 0; + for (i = 0; i < 4; i++) + alpha_tmp[i] = 0; + oqs_sidh_cln16_fp2copy751(q, u); + for (i = 0; i < 3; i++) { + oqs_sidh_cln16_fp2copy751(u, v); + for (j = 0; j < 11; j++) { + oqs_sidh_cln16_cube_Fp2_cycl(v, one); + } + for (j = 0; j < 15 * (2 - i); j++) { + oqs_sidh_cln16_cube_Fp2_cycl(v, one); + } + oqs_sidh_cln16_phn3(v, LUT, LUT_1, one, &alpha_i); // v order 3^15 + oqs_sidh_cln16_multiply((digit_t *) &alpha_i, (digit_t *) &pow3_15[i], (digit_t *) alpha_tmp, 64 / RADIX); + oqs_sidh_cln16_mp_add((digit_t *) alpha_k, (digit_t *) alpha_tmp, (digit_t *) alpha_k, 2 * 64 / RADIX); + + oqs_sidh_cln16_fp2copy751(LUT_0[i], v); + for (j = 0; j < 5; j++) { + oqs_sidh_cln16_cube_Fp2_cycl(v, one); + } + + oqs_sidh_cln16_exp_Fp2_cycl(v, &alpha_i, one, tmp, 24); + oqs_sidh_cln16_fp2mul751_mont(u, tmp, u); + } + + // Do the last part + alpha_n[0] = 0; + alpha_n[1] = 0; + for (i = 0; i < 3; i++) { + oqs_sidh_cln16_fp2copy751(u, v); + for (j = 0; j < 2; j++) { + oqs_sidh_cln16_cube_Fp2_cycl(v, one); + } + for (j = 0; j < 3 * (2 - i); j++) { + oqs_sidh_cln16_cube_Fp2_cycl(v, one); + } + oqs_sidh_cln16_phn1_3(v, LUT, 3, one, &alpha_i); // v order 3^15 + alpha_n[0] += alpha_i * pow3_3[i]; + + oqs_sidh_cln16_fp2copy751(LUT_1[i], v); + for (j = 0; j < 4; j++) { + oqs_sidh_cln16_cube_Fp2_cycl(v, one); + } + oqs_sidh_cln16_exp_Fp2_cycl(v, &alpha_i, one, tmp, 5); + oqs_sidh_cln16_fp2mul751_mont(u, tmp, u); + } + + oqs_sidh_cln16_phn1_3(u, LUT, 2, one, &alpha_i); + alpha_n[0] += alpha_i * pow3_3[3]; + oqs_sidh_cln16_multiply((digit_t *) alpha_n, (digit_t *) pow3_45, (digit_t *) alpha_tmp, 2 * 64 / RADIX); // Can be optimized because alpha_n is only single precision and pow3_45 is only slightly larger than 64 bits. + oqs_sidh_cln16_mp_add((digit_t *) alpha_k, (digit_t *) alpha_tmp, (digit_t *) alpha_k, 2 * 64 / RADIX); +} + +void oqs_sidh_cln16_phn15(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k) { + oqs_sidh_cln16_felm_t zero = {0}; + oqs_sidh_cln16_f2elm_t u, v, tmp; + uint64_t alpha_i, alpha_n[2], alpha_tmp[4]; + uint64_t i, j; + // Powers of 3: 3^0 = 1, 3^15, 3^30 + uint64_t pow3_15[3] = {0x0000000000000001, 0x0000000000DAF26B, 0x0000BB41C3CA78B9}; + // Powers of 3: 3^45 split up into two words. + uint64_t pow3_45[2] = {0x275329FD09495753, 0x00000000000000A0}; + // Powers of 3: 3^60 split up into two words. + uint64_t pow3_60[2] = {0xCEEDA7FE92E1F5B1, 0x0000000088F924EE}; + uint64_t pow3_60_2[2] = {0x9DDB4FFD25C3EB62, 0x0000000111F249DD}; + + alpha_k[0] = 0; + alpha_k[1] = 0; + alpha_n[0] = 0; + alpha_n[1] = 0; + for (i = 0; i < 4; i++) + alpha_tmp[i] = 0; + oqs_sidh_cln16_fp2copy751(q, u); + for (i = 0; i < 3; i++) { + oqs_sidh_cln16_fp2copy751(u, v); + oqs_sidh_cln16_cube_Fp2_cycl(v, one); + for (j = 0; j < 15 * (3 - i); j++) { + oqs_sidh_cln16_cube_Fp2_cycl(v, one); + } + oqs_sidh_cln16_phn3(v, LUT, LUT_1, one, &alpha_i); // u order 3^15 + + oqs_sidh_cln16_multiply((digit_t *) &alpha_i, (digit_t *) &pow3_15[i], (digit_t *) alpha_tmp, 64 / RADIX); + oqs_sidh_cln16_mp_add((digit_t *) alpha_k, (digit_t *) alpha_tmp, (digit_t *) alpha_k, 2 * 64 / RADIX); + + oqs_sidh_cln16_exp_Fp2_cycl(LUT_0[i], &alpha_i, one, tmp, 24); + oqs_sidh_cln16_fp2mul751_mont(u, tmp, u); + } + + oqs_sidh_cln16_fp2copy751(u, v); + oqs_sidh_cln16_cube_Fp2_cycl(v, one); + oqs_sidh_cln16_phn3(v, LUT, LUT_1, one, &alpha_n[0]); // u order 3^15 + + oqs_sidh_cln16_multiply((digit_t *) alpha_n, (digit_t *) pow3_45, (digit_t *) alpha_tmp, 2 * 64 / RADIX); + oqs_sidh_cln16_mp_add((digit_t *) alpha_k, (digit_t *) alpha_tmp, (digit_t *) alpha_k, 2 * 64 / RADIX); + + oqs_sidh_cln16_exp_Fp2_cycl(LUT_0[3], &alpha_n[0], one, tmp, 24); + oqs_sidh_cln16_fp2mul751_mont(u, tmp, u); + oqs_sidh_cln16_fp2correction751(u); + if (oqs_sidh_cln16_fpequal751_non_constant_time(u[0], LUT[3][0]) == true && oqs_sidh_cln16_fpequal751_non_constant_time(u[1], LUT[3][1]) == true) { + oqs_sidh_cln16_mp_add((digit_t *) alpha_k, (digit_t *) pow3_60, (digit_t *) alpha_k, 2 * 64 / RADIX); + } else if (oqs_sidh_cln16_fpequal751_non_constant_time(u[0], one) == false || oqs_sidh_cln16_fpequal751_non_constant_time(u[1], zero) == false) { + oqs_sidh_cln16_mp_add((digit_t *) alpha_k, (digit_t *) pow3_60_2, (digit_t *) alpha_k, 2 * 64 / RADIX); + } +} + +void oqs_sidh_cln16_phn61(oqs_sidh_cln16_f2elm_t r, oqs_sidh_cln16_f2elm_t *t_ori, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha) { + oqs_sidh_cln16_f2elm_t u, v, tmp; + uint64_t alpha_k[5] = {0}, alpha_tmp[10] = {0}; + uint64_t i, k; + + uint64_t pow3_61[13] = {0x0000000000000001, 0x0000000000000000, // 3^0 = 1 + 0x6CC8F7FBB8A5E113, 0x000000019AEB6ECC, // 3^61 + 0x6878E44938606769, 0xD73A1059B8013933, // 3^(2*61) + 0x9396F76B67B7C403, 0x0000000000000002, + 0x25A79F6508B7F5CB, 0x05515FED4D025D6F, // 3^(3*61) + 0x37E2AD6FF9936EA9, 0xB69B5308880B15B6, + 0x0000000422BE6150}; + + for (i = 0; i < SIDH_NWORDS64_ORDER; i++) + alpha[i] = 0; + + oqs_sidh_cln16_fp2copy751(r, u); + for (k = 0; k < 2; k++) { + oqs_sidh_cln16_fp2copy751(u, v); + for (i = 0; i < 56; i++) { + oqs_sidh_cln16_cube_Fp2_cycl(v, one); + } + for (i = 0; i < 61 * (2 - k); i++) { + oqs_sidh_cln16_cube_Fp2_cycl(v, one); + } + oqs_sidh_cln16_phn15(v, LUT, LUT_0, LUT_1, one, alpha_k); // q order 3^61 + oqs_sidh_cln16_multiply((digit_t *) alpha_k, (digit_t *) &pow3_61[2 * k], (digit_t *) alpha_tmp, 2 * 64 / RADIX); + oqs_sidh_cln16_mp_add((digit_t *) alpha, (digit_t *) alpha_tmp, (digit_t *) alpha, 4 * 64 / RADIX); + + oqs_sidh_cln16_exp_Fp2_cycl(t_ori[k], alpha_k, one, tmp, 97); + oqs_sidh_cln16_fp2mul751_mont(u, tmp, u); + } + oqs_sidh_cln16_fp2copy751(u, v); + for (i = 0; i < 56; i++) { + oqs_sidh_cln16_cube_Fp2_cycl(v, one); + } + oqs_sidh_cln16_phn15(v, LUT, LUT_0, LUT_1, one, alpha_k); // q order 3^61 + oqs_sidh_cln16_multiply((digit_t *) alpha_k, (digit_t *) &pow3_61[4], (digit_t *) alpha_tmp, 4 * 64 / RADIX); + oqs_sidh_cln16_mp_add((digit_t *) alpha, (digit_t *) alpha_tmp, (digit_t *) alpha, SIDH_NWORDS_ORDER); + + oqs_sidh_cln16_exp_Fp2_cycl(t_ori[2], alpha_k, one, tmp, 97); + oqs_sidh_cln16_fp2mul751_mont(u, tmp, u); + oqs_sidh_cln16_phn15_1(u, LUT, LUT_0, LUT_1, one, alpha_k); // q order 3^56 + oqs_sidh_cln16_multiply((digit_t *) alpha_k, (digit_t *) &pow3_61[8], (digit_t *) alpha_tmp, 5 * 64 / RADIX); + oqs_sidh_cln16_mp_add((digit_t *) alpha, (digit_t *) alpha_tmp, (digit_t *) alpha, SIDH_NWORDS_ORDER); +} + +void oqs_sidh_cln16_build_LUTs_3(oqs_sidh_cln16_f2elm_t g, oqs_sidh_cln16_f2elm_t *t_ori, oqs_sidh_cln16_f2elm_t *LUT, oqs_sidh_cln16_f2elm_t *LUT_0, oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one) { // Lookup table generation for 3-torsion PH + oqs_sidh_cln16_f2elm_t tmp; + unsigned int i, j; + + // Build (small) tables + oqs_sidh_cln16_fp2copy751(g, tmp); + oqs_sidh_cln16_inv_Fp2_cycl(tmp); + oqs_sidh_cln16_fp2copy751(tmp, t_ori[0]); // t_ori[0] = g^(-1) + for (i = 0; i < 2; i++) { + for (j = 0; j < 61; j++) + oqs_sidh_cln16_cube_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, t_ori[i + 1]); + } + for (i = 0; i < 56; i++) + oqs_sidh_cln16_cube_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, t_ori[3]); + oqs_sidh_cln16_fp2copy751(tmp, LUT_0[0]); + for (i = 0; i < 5; i++) + oqs_sidh_cln16_cube_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, t_ori[4]); // t_ori done. + + for (i = 0; i < 10; i++) + oqs_sidh_cln16_cube_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, LUT_0[1]); + for (i = 1; i < 3; i++) { + for (j = 0; j < 15; j++) + oqs_sidh_cln16_cube_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, LUT_0[i + 1]); + } + oqs_sidh_cln16_cube_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, LUT_1[0]); + + for (i = 0; i < 4; i++) { + for (j = 0; j < 3; j++) + oqs_sidh_cln16_cube_Fp2_cycl(tmp, one); + oqs_sidh_cln16_fp2copy751(tmp, LUT_1[i + 1]); + } + oqs_sidh_cln16_fp2copy751(tmp, LUT[0]); + for (i = 0; i < 2; i++) { + oqs_sidh_cln16_fp2copy751(LUT[i], LUT[i + 1]); + oqs_sidh_cln16_cube_Fp2_cycl(LUT[i + 1], one); + } + oqs_sidh_cln16_fp2copy751(LUT[2], LUT[3]); + oqs_sidh_cln16_inv_Fp2_cycl(LUT[3]); + oqs_sidh_cln16_fp2correction751(LUT[3]); +} + +void oqs_sidh_cln16_ph3(oqs_sidh_cln16_point_t phiP, oqs_sidh_cln16_point_t phiQ, oqs_sidh_cln16_point_t PS, oqs_sidh_cln16_point_t QS, oqs_sidh_cln16_f2elm_t A, uint64_t *a0, uint64_t *b0, uint64_t *a1, uint64_t *b1, PCurveIsogenyStruct CurveIsogeny) { // 3-torsion Pohlig-Hellman function + // This function computes the five pairings e(QS, PS), e(QS, phiP), e(QS, phiQ), e(PS, phiP), e(PS,phiQ), + // computes the lookup tables for the Pohlig-Hellman functions, + // and then computes the discrete logarithms of the last four pairing values to the base of the first pairing value. + oqs_sidh_cln16_f2elm_t t_ori[5], n[5], LUT[4], LUT_0[4], LUT_1[5]; + oqs_sidh_cln16_felm_t one = {0}; + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one); + + // Compute the pairings + oqs_sidh_cln16_Tate_pairings_3_torsion(QS, PS, phiP, phiQ, A, n, CurveIsogeny); + + // Build the look-up tables + oqs_sidh_cln16_build_LUTs_3(n[0], t_ori, LUT, LUT_0, LUT_1, one); + + // Finish computation + oqs_sidh_cln16_phn61(n[1], t_ori, LUT, LUT_0, LUT_1, one, a0); + oqs_sidh_cln16_phn61(n[3], t_ori, LUT, LUT_0, LUT_1, one, b0); + oqs_sidh_cln16_mp_sub(CurveIsogeny->Border, (digit_t *) b0, (digit_t *) b0, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_phn61(n[2], t_ori, LUT, LUT_0, LUT_1, one, a1); + oqs_sidh_cln16_phn61(n[4], t_ori, LUT, LUT_0, LUT_1, one, b1); + oqs_sidh_cln16_mp_sub(CurveIsogeny->Border, (digit_t *) b1, (digit_t *) b1, SIDH_NWORDS_ORDER); +} + +unsigned int oqs_sidh_cln16_mod3(digit_t *a) { // Computes the input modulo 3 + // The input is assumed to be SIDH_NWORDS_ORDER long + digit_t temp; + hdigit_t *val = (hdigit_t *) a, r = 0; + int i; + + for (i = (2 * SIDH_NWORDS_ORDER - 1); i >= 0; i--) { + temp = ((digit_t) r << (sizeof(hdigit_t) * 8)) | (digit_t) val[i]; + r = temp % 3; + } + + return r; +} + +void oqs_sidh_cln16_compress_3_torsion(const unsigned char *pPublicKeyB, unsigned char *CompressedPKB, uint64_t *a0, uint64_t *b0, uint64_t *a1, uint64_t *b1, oqs_sidh_cln16_point_t R1, oqs_sidh_cln16_point_t R2, PCurveIsogenyStruct CurveIsogeny) { // 3-torsion compression function + oqs_sidh_cln16_point_full_proj_t P, Q, phP, phQ, phX; + oqs_sidh_cln16_point_t phiP, phiQ; + oqs_sidh_cln16_publickey_t PK; + digit_t *comp = (digit_t *) CompressedPKB; + digit_t inv[SIDH_NWORDS_ORDER]; + oqs_sidh_cln16_f2elm_t A, vec[4], Zinv[4]; + uint64_t Montgomery_Rprime[SIDH_NWORDS64_ORDER] = {0x1A55482318541298, 0x070A6370DFA12A03, 0xCB1658E0E3823A40, 0xB3B7384EB5DEF3F9, 0xCBCA952F7006EA33, 0x00569EF8EC94864C}; // Value (2^384)^2 mod 3^239 + uint64_t Montgomery_rprime[SIDH_NWORDS64_ORDER] = {0x48062A91D3AB563D, 0x6CE572751303C2F5, 0x5D1319F3F160EC9D, 0xE35554E8C2D5623A, 0xCA29300232BC79A5, 0x8AAD843D646D78C5}; // Value -(3^239)^-1 mod 2^384 + unsigned int bit; + + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) pPublicKeyB)[0], ((oqs_sidh_cln16_f2elm_t *) &PK)[0]); // Converting to Montgomery representation + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) pPublicKeyB)[1], ((oqs_sidh_cln16_f2elm_t *) &PK)[1]); + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) pPublicKeyB)[2], ((oqs_sidh_cln16_f2elm_t *) &PK)[2]); + + oqs_sidh_cln16_recover_y(PK, phP, phQ, phX, A, CurveIsogeny); + oqs_sidh_cln16_generate_3_torsion_basis(A, P, Q, CurveIsogeny); + oqs_sidh_cln16_fp2copy751(P->Z, vec[0]); + oqs_sidh_cln16_fp2copy751(Q->Z, vec[1]); + oqs_sidh_cln16_fp2copy751(phP->Z, vec[2]); + oqs_sidh_cln16_fp2copy751(phQ->Z, vec[3]); + oqs_sidh_cln16_mont_n_way_inv(vec, 4, Zinv); + + oqs_sidh_cln16_fp2mul751_mont(P->X, Zinv[0], R1->x); + oqs_sidh_cln16_fp2mul751_mont(P->Y, Zinv[0], R1->y); + oqs_sidh_cln16_fp2mul751_mont(Q->X, Zinv[1], R2->x); + oqs_sidh_cln16_fp2mul751_mont(Q->Y, Zinv[1], R2->y); + oqs_sidh_cln16_fp2mul751_mont(phP->X, Zinv[2], phiP->x); + oqs_sidh_cln16_fp2mul751_mont(phP->Y, Zinv[2], phiP->y); + oqs_sidh_cln16_fp2mul751_mont(phQ->X, Zinv[3], phiQ->x); + oqs_sidh_cln16_fp2mul751_mont(phQ->Y, Zinv[3], phiQ->y); + + oqs_sidh_cln16_ph3(phiP, phiQ, R1, R2, A, a0, b0, a1, b1, CurveIsogeny); + + bit = oqs_sidh_cln16_mod3((digit_t *) a0); + oqs_sidh_cln16_to_Montgomery_mod_order((digit_t *) a0, (digit_t *) a0, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); // Converting to Montgomery representation + oqs_sidh_cln16_to_Montgomery_mod_order((digit_t *) a1, (digit_t *) a1, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + oqs_sidh_cln16_to_Montgomery_mod_order((digit_t *) b0, (digit_t *) b0, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + oqs_sidh_cln16_to_Montgomery_mod_order((digit_t *) b1, (digit_t *) b1, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + + if (bit != 0) { // Storing [b1*a0inv, a1*a0inv, b0*a0inv] and setting bit384 to 0 + oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd((digit_t *) a0, inv, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + oqs_sidh_cln16_Montgomery_multiply_mod_order((digit_t *) b0, inv, &comp[0], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_Montgomery_multiply_mod_order((digit_t *) a1, inv, &comp[SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_Montgomery_multiply_mod_order((digit_t *) b1, inv, &comp[2 * SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_from_Montgomery_mod_order(&comp[0], &comp[0], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); // Converting back from Montgomery representation + oqs_sidh_cln16_from_Montgomery_mod_order(&comp[SIDH_NWORDS_ORDER], &comp[SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_from_Montgomery_mod_order(&comp[2 * SIDH_NWORDS_ORDER], &comp[2 * SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + comp[3 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 1; + } else { // Storing [b1*b0inv, a1*b0inv, a0*b0inv] and setting bit384 to 1 + oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd((digit_t *) b0, inv, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + oqs_sidh_cln16_Montgomery_multiply_mod_order((digit_t *) a0, inv, &comp[0], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_Montgomery_multiply_mod_order((digit_t *) a1, inv, &comp[SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_Montgomery_multiply_mod_order((digit_t *) b1, inv, &comp[2 * SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_from_Montgomery_mod_order(&comp[0], &comp[0], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); // Converting back from Montgomery representation + oqs_sidh_cln16_from_Montgomery_mod_order(&comp[SIDH_NWORDS_ORDER], &comp[SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_from_Montgomery_mod_order(&comp[2 * SIDH_NWORDS_ORDER], &comp[2 * SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + comp[3 * SIDH_NWORDS_ORDER - 1] |= (digit_t) 1 << (sizeof(digit_t) * 8 - 1); + } + + oqs_sidh_cln16_from_fp2mont(A, (oqs_sidh_cln16_felm_t *) &comp[3 * SIDH_NWORDS_ORDER]); +} + +/////////////////////////////////////////////////////////////////////////////////// +/////////////// FUNCTIONS FOR DECOMPRESSION /////////////// + +void oqs_sidh_cln16_ADD(const oqs_sidh_cln16_point_full_proj_t P, const oqs_sidh_cln16_f2elm_t QX, const oqs_sidh_cln16_f2elm_t QY, const oqs_sidh_cln16_f2elm_t QZ, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_point_full_proj_t R) { // General addition. + // Input: projective Montgomery points P=(XP:YP:ZP) and Q=(XQ:YQ:ZQ). + // Output: projective Montgomery point R <- P+Q = (XQP:YQP:ZQP). + oqs_sidh_cln16_f2elm_t t0, t1, t2, t3, t4, t5, t6, t7; + + oqs_sidh_cln16_fp2mul751_mont(QX, P->Z, t0); // t0 = x2*Z1 + oqs_sidh_cln16_fp2mul751_mont(P->X, QZ, t1); // t1 = X1*z2 + oqs_sidh_cln16_fp2add751(t0, t1, t2); // t2 = t0 + t1 + oqs_sidh_cln16_fp2sub751(t1, t0, t3); // t3 = t1 - t0 + oqs_sidh_cln16_fp2mul751_mont(QX, P->X, t0); // t0 = x2*X1 + oqs_sidh_cln16_fp2mul751_mont(P->Z, QZ, t1); // t1 = Z1*z2 + oqs_sidh_cln16_fp2add751(t0, t1, t4); // t4 = t0 + t1 + oqs_sidh_cln16_fp2mul751_mont(t0, A, t0); // t0 = t0*A + oqs_sidh_cln16_fp2mul751_mont(QY, P->Y, t5); // t5 = y2*Y1 + oqs_sidh_cln16_fp2sub751(t0, t5, t0); // t0 = t0 - t5 + oqs_sidh_cln16_fp2mul751_mont(t0, t1, t0); // t0 = t0*t1 + oqs_sidh_cln16_fp2add751(t0, t0, t0); // t0 = t0 + t0 + oqs_sidh_cln16_fp2mul751_mont(t2, t4, t5); // t5 = t2*t4 + oqs_sidh_cln16_fp2add751(t5, t0, t5); // t5 = t5 + t0 + oqs_sidh_cln16_fp2sqr751_mont(P->X, t0); // t0 = X1 ^ 2 + oqs_sidh_cln16_fp2sqr751_mont(P->Z, t6); // t6 = Z1 ^ 2 + oqs_sidh_cln16_fp2add751(t0, t6, t0); // t0 = t0 + t6 + oqs_sidh_cln16_fp2add751(t1, t1, t1); // t1 = t1 + t1 + oqs_sidh_cln16_fp2mul751_mont(QY, P->X, t7); // t7 = y2*X1 + oqs_sidh_cln16_fp2mul751_mont(QX, P->Y, t6); // t6 = x2*Y1 + oqs_sidh_cln16_fp2sub751(t7, t6, t7); // t7 = t7 - t6 + oqs_sidh_cln16_fp2mul751_mont(t1, t7, t1); // t1 = t1*t7 + oqs_sidh_cln16_fp2mul751_mont(A, t2, t7); // t7 = A*t2 + oqs_sidh_cln16_fp2add751(t7, t4, t4); // t4 = t4 + t7 + oqs_sidh_cln16_fp2mul751_mont(t1, t4, t4); // t4 = t1*t4 + oqs_sidh_cln16_fp2mul751_mont(QY, QZ, t1); // t1 = y2*z2 + oqs_sidh_cln16_fp2mul751_mont(t0, t1, t0); // t0 = t0*t1 + oqs_sidh_cln16_fp2sqr751_mont(QZ, t1); // t1 = z2 ^ 2 + oqs_sidh_cln16_fp2sqr751_mont(QX, t6); // t6 = x2 ^ 2 + oqs_sidh_cln16_fp2add751(t1, t6, t1); // t1 = t1 + t6 + oqs_sidh_cln16_fp2mul751_mont(P->Z, P->Y, t6); // t6 = Z1*Y1 + oqs_sidh_cln16_fp2mul751_mont(t1, t6, t1); // t1 = t1*t6 + oqs_sidh_cln16_fp2sub751(t0, t1, t0); // t0 = t0 - t1 + oqs_sidh_cln16_fp2mul751_mont(t2, t0, t0); // t0 = t2*t0 + oqs_sidh_cln16_fp2mul751_mont(t5, t3, R->X); // X3 = t5*t3 + oqs_sidh_cln16_fp2add751(t4, t0, R->Y); // Y3 = t4 + t0 + oqs_sidh_cln16_fp2sqr751_mont(t3, t0); // t0 = t3 ^ 2 + oqs_sidh_cln16_fp2mul751_mont(t3, t0, R->Z); // Z3 = t3*t0 +} + +void oqs_sidh_cln16_Mont_ladder(const oqs_sidh_cln16_f2elm_t x, const digit_t *m, oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A24, const unsigned int order_bits, const unsigned int order_fullbits, PCurveIsogenyStruct CurveIsogeny) { // The Montgomery ladder, running in non constant-time + // Inputs: the affine x-coordinate of a point P on E: B*y^2=x^3+A*x^2+x, + // scalar m + // curve constant A24 = (A+2)/4 + // order_bits = subgroup order bitlength + // order_fullbits = smallest multiple of 32 larger than the order bitlength + // Output: P = m*(x:1) + // CurveIsogeny must be set up in advance using SIDH_curve_initialize(). + unsigned int bit = 0, owords = NBITS_TO_NWORDS(order_fullbits); + digit_t scalar[SIDH_NWORDS_ORDER]; + digit_t mask; + int i; + + // Initializing with the points (1:0) and (x:1) + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, (digit_t *) P->X[0]); + oqs_sidh_cln16_fpzero751(P->X[1]); + oqs_sidh_cln16_fp2zero751(P->Z); + oqs_sidh_cln16_fp2copy751(x, Q->X); + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, (digit_t *) Q->Z[0]); + oqs_sidh_cln16_fpzero751(Q->Z[1]); + + for (i = SIDH_NWORDS_ORDER - 1; i >= 0; i--) { + scalar[i] = m[i]; + } + + for (i = order_fullbits - order_bits; i > 0; i--) { + oqs_sidh_cln16_mp_shiftl1(scalar, owords); + } + + for (i = order_bits; i > 0; i--) { + bit = (unsigned int) (scalar[owords - 1] >> (RADIX - 1)); + oqs_sidh_cln16_mp_shiftl1(scalar, owords); + mask = 0 - (digit_t) bit; + + oqs_sidh_cln16_swap_points(P, Q, mask); + oqs_sidh_cln16_xDBLADD(P, Q, x, A24); // If bit=0 then P <- 2*P and Q <- P+Q, + oqs_sidh_cln16_swap_points(P, Q, mask); // else if bit=1 then Q <- 2*Q and P <- P+Q + } +} + +void oqs_sidh_cln16_mont_twodim_scalarmult(digit_t *a, const oqs_sidh_cln16_point_t R, const oqs_sidh_cln16_point_t S, const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_f2elm_t A24, oqs_sidh_cln16_point_full_proj_t P, PCurveIsogenyStruct CurveIsogeny) { // Computes R+aS + oqs_sidh_cln16_point_proj_t P0, P1; + oqs_sidh_cln16_point_full_proj_t P2; + oqs_sidh_cln16_f2elm_t one = {0}; + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]); + oqs_sidh_cln16_Mont_ladder(S->x, a, P0, P1, A24, CurveIsogeny->oBbits, CurveIsogeny->owordbits, CurveIsogeny); // Hardwired to oBbits + recover_os(P0->X, P0->Z, P1->X, P1->Z, S->x, S->y, A, P2->X, P2->Y, P2->Z); + oqs_sidh_cln16_ADD(P2, R->x, R->y, one, A, P); +} + +void oqs_sidh_cln16_decompress_2_torsion(const unsigned char *SecretKey, const unsigned char *CompressedPKB, oqs_sidh_cln16_point_proj_t R, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny) { // 2-torsion decompression function + oqs_sidh_cln16_point_t R1, R2; + oqs_sidh_cln16_point_full_proj_t P, Q; + digit_t *comp = (digit_t *) CompressedPKB; + oqs_sidh_cln16_f2elm_t A24, vec[2], invs[2], one = {0}; + digit_t tmp1[2 * SIDH_NWORDS_ORDER], tmp2[2 * SIDH_NWORDS_ORDER], vone[2 * SIDH_NWORDS_ORDER] = {0}, mask = (digit_t)(-1); + unsigned int bit; + + mask >>= (CurveIsogeny->owordbits - CurveIsogeny->oAbits); + vone[0] = 1; + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]); + oqs_sidh_cln16_to_fp2mont((oqs_sidh_cln16_felm_t *) &comp[3 * SIDH_NWORDS_ORDER], A); // Converting to Montgomery representation + oqs_sidh_cln16_generate_2_torsion_basis(A, P, Q, CurveIsogeny); + + // normalize basis points + oqs_sidh_cln16_fp2copy751(P->Z, vec[0]); + oqs_sidh_cln16_fp2copy751(Q->Z, vec[1]); + oqs_sidh_cln16_mont_n_way_inv(vec, 2, invs); + oqs_sidh_cln16_fp2mul751_mont(P->X, invs[0], R1->x); + oqs_sidh_cln16_fp2mul751_mont(P->Y, invs[0], R1->y); + oqs_sidh_cln16_fp2mul751_mont(Q->X, invs[1], R2->x); + oqs_sidh_cln16_fp2mul751_mont(Q->Y, invs[1], R2->y); + + oqs_sidh_cln16_fp2add751(A, one, A24); + oqs_sidh_cln16_fp2add751(A24, one, A24); + oqs_sidh_cln16_fp2div2_751(A24, A24); + oqs_sidh_cln16_fp2div2_751(A24, A24); + + bit = comp[3 * SIDH_NWORDS_ORDER - 1] >> (sizeof(digit_t) * 8 - 1); + comp[3 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 1; + + if (bit == 0) { + oqs_sidh_cln16_multiply((digit_t *) SecretKey, &comp[SIDH_NWORDS_ORDER], tmp1, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_add(tmp1, vone, tmp1, SIDH_NWORDS_ORDER); + tmp1[SIDH_NWORDS_ORDER - 1] &= mask; + oqs_sidh_cln16_inv_mod_orderA(tmp1, tmp2); + oqs_sidh_cln16_multiply((digit_t *) SecretKey, &comp[2 * SIDH_NWORDS_ORDER], tmp1, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_add(&comp[0], tmp1, tmp1, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_multiply(tmp1, tmp2, vone, SIDH_NWORDS_ORDER); + vone[SIDH_NWORDS_ORDER - 1] &= mask; + oqs_sidh_cln16_mont_twodim_scalarmult(vone, R1, R2, A, A24, P, CurveIsogeny); + } else { + oqs_sidh_cln16_multiply((digit_t *) SecretKey, &comp[2 * SIDH_NWORDS_ORDER], tmp1, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_add(tmp1, vone, tmp1, SIDH_NWORDS_ORDER); + tmp1[SIDH_NWORDS_ORDER - 1] &= mask; + oqs_sidh_cln16_inv_mod_orderA(tmp1, tmp2); + oqs_sidh_cln16_multiply((digit_t *) SecretKey, &comp[SIDH_NWORDS_ORDER], tmp1, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_add(&comp[0], tmp1, tmp1, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_multiply(tmp1, tmp2, vone, SIDH_NWORDS_ORDER); + vone[SIDH_NWORDS_ORDER - 1] &= mask; + oqs_sidh_cln16_mont_twodim_scalarmult(vone, R2, R1, A, A24, P, CurveIsogeny); + } + + oqs_sidh_cln16_fp2copy751(P->X, R->X); + oqs_sidh_cln16_fp2copy751(P->Z, R->Z); +} + +void oqs_sidh_cln16_decompress_3_torsion(const unsigned char *SecretKey, const unsigned char *CompressedPKA, oqs_sidh_cln16_point_proj_t R, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny) { // 3-torsion decompression function + oqs_sidh_cln16_point_t R1, R2; + oqs_sidh_cln16_point_full_proj_t P, Q; + digit_t *comp = (digit_t *) CompressedPKA; + digit_t *SKin = (digit_t *) SecretKey; + oqs_sidh_cln16_f2elm_t A24, vec[2], invs[2], one = {0}; + digit_t t1[SIDH_NWORDS_ORDER], t2[SIDH_NWORDS_ORDER], t3[SIDH_NWORDS_ORDER], t4[SIDH_NWORDS_ORDER], vone[SIDH_NWORDS_ORDER] = {0}; + uint64_t Montgomery_Rprime[SIDH_NWORDS64_ORDER] = {0x1A55482318541298, 0x070A6370DFA12A03, 0xCB1658E0E3823A40, 0xB3B7384EB5DEF3F9, 0xCBCA952F7006EA33, 0x00569EF8EC94864C}; // Value (2^384)^2 mod 3^239 + uint64_t Montgomery_rprime[SIDH_NWORDS64_ORDER] = {0x48062A91D3AB563D, 0x6CE572751303C2F5, 0x5D1319F3F160EC9D, 0xE35554E8C2D5623A, 0xCA29300232BC79A5, 0x8AAD843D646D78C5}; // Value -(3^239)^-1 mod 2^384 + unsigned int bit; + + vone[0] = 1; + oqs_sidh_cln16_to_Montgomery_mod_order(vone, vone, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); // Converting to Montgomery representation + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]); + oqs_sidh_cln16_to_fp2mont((oqs_sidh_cln16_felm_t *) &comp[3 * SIDH_NWORDS_ORDER], A); // Converting to Montgomery representation + oqs_sidh_cln16_generate_3_torsion_basis(A, P, Q, CurveIsogeny); + + // normalize basis points + oqs_sidh_cln16_fp2copy751(P->Z, vec[0]); + oqs_sidh_cln16_fp2copy751(Q->Z, vec[1]); + oqs_sidh_cln16_mont_n_way_inv(vec, 2, invs); + oqs_sidh_cln16_fp2mul751_mont(P->X, invs[0], R1->x); + oqs_sidh_cln16_fp2mul751_mont(P->Y, invs[0], R1->y); + oqs_sidh_cln16_fp2mul751_mont(Q->X, invs[1], R2->x); + oqs_sidh_cln16_fp2mul751_mont(Q->Y, invs[1], R2->y); + + oqs_sidh_cln16_fp2add751(A, one, A24); + oqs_sidh_cln16_fp2add751(A24, one, A24); + oqs_sidh_cln16_fp2div2_751(A24, A24); + oqs_sidh_cln16_fp2div2_751(A24, A24); + + bit = comp[3 * SIDH_NWORDS_ORDER - 1] >> (sizeof(digit_t) * 8 - 1); + comp[3 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 1; + oqs_sidh_cln16_to_Montgomery_mod_order(SKin, t1, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); // Converting to Montgomery representation + oqs_sidh_cln16_to_Montgomery_mod_order(&comp[0], t2, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + oqs_sidh_cln16_to_Montgomery_mod_order(&comp[SIDH_NWORDS_ORDER], t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + oqs_sidh_cln16_to_Montgomery_mod_order(&comp[2 * SIDH_NWORDS_ORDER], t4, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + + if (bit == 0) { + oqs_sidh_cln16_Montgomery_multiply_mod_order(t1, t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_mp_add(t3, vone, t3, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd(t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + oqs_sidh_cln16_Montgomery_multiply_mod_order(t1, t4, t4, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_mp_add(t2, t4, t4, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_Montgomery_multiply_mod_order(t3, t4, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_from_Montgomery_mod_order(t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); // Converting back from Montgomery representation + oqs_sidh_cln16_mont_twodim_scalarmult(t3, R1, R2, A, A24, P, CurveIsogeny); + } else { + oqs_sidh_cln16_Montgomery_multiply_mod_order(t1, t4, t4, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_mp_add(t4, vone, t4, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd(t4, t4, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + oqs_sidh_cln16_Montgomery_multiply_mod_order(t1, t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_mp_add(t2, t3, t3, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_Montgomery_multiply_mod_order(t3, t4, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_from_Montgomery_mod_order(t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); // Converting back from Montgomery representation + oqs_sidh_cln16_mont_twodim_scalarmult(t3, R2, R1, A, A24, P, CurveIsogeny); + } + + oqs_sidh_cln16_fp2copy751(P->X, R->X); + oqs_sidh_cln16_fp2copy751(P->Z, R->Z); +} diff --git a/crypt/liboqs/kex_sidh_cln16/fpx.c b/crypt/liboqs/kex_sidh_cln16/fpx.c new file mode 100644 index 0000000000000000000000000000000000000000..c3d60404b6e0abc5f6d6ee318a224db05da32ac5 --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/fpx.c @@ -0,0 +1,1193 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral +* Diffie-Hellman key exchange. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: core functions over GF(p751^2) and field operations modulo the prime p751 +* +*********************************************************************************************/ + +#include "SIDH_internal.h" +#include <string.h> + +// Global constants +const uint64_t p751[NWORDS_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xEEAFFFFFFFFFFFFF, + 0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C}; +const uint64_t p751p1[NWORDS_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xEEB0000000000000, + 0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C}; +const uint64_t p751x2[NWORDS_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDD5FFFFFFFFFFFFF, + 0xC7D92D0A93F0F151, 0xB52B363427EF98ED, 0x109D30CFADD7D0ED, 0x0AC56A08B964AE90, 0x1C25213F2F75B8CD, 0x0000DFCBAA83EE38}; +const uint64_t Montgomery_R2[NWORDS_FIELD] = {0x233046449DAD4058, 0xDB010161A696452A, 0x5E36941472E3FD8E, 0xF40BFE2082A2E706, 0x4932CCA8904F8751, 0x1F735F1F1EE7FC81, + 0xA24F4D80C1048E18, 0xB56C383CCDB607C5, 0x441DD47B735F9C90, 0x5673ED2C6A6AC82A, 0x06C905261132294B, 0x000041AD830F1F35}; + +/*******************************************************/ +/************* Field arithmetic functions **************/ + +__inline void oqs_sidh_cln16_fpcopy751(const oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t c) { // Copy a field element, c = a. + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) + c[i] = a[i]; +} + +__inline void oqs_sidh_cln16_fpzero751(oqs_sidh_cln16_felm_t a) { // Zero a field element, a = 0. + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) + a[i] = 0; +} + +bool oqs_sidh_cln16_fpequal751_non_constant_time(const oqs_sidh_cln16_felm_t a, const oqs_sidh_cln16_felm_t b) { // Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE. + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) { + if (a[i] != b[i]) + return false; + } + + return true; +} + +void oqs_sidh_cln16_to_mont(const oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t mc) { // Conversion to Montgomery representation, + // mc = a*R^2*R^(-1) mod p751 = a*R mod p751, where a in [0, p751-1]. + // The Montgomery constant R^2 mod p751 is the global value "Montgomery_R2". + + oqs_sidh_cln16_fpmul751_mont(a, (digit_t *) &Montgomery_R2, mc); +} + +void oqs_sidh_cln16_from_mont(const oqs_sidh_cln16_felm_t ma, oqs_sidh_cln16_felm_t c) { // Conversion from Montgomery representation to standard representation, + // c = ma*R^(-1) mod p751 = a mod p751, where ma in [0, p751-1]. + digit_t one[NWORDS_FIELD] = {0}; + + one[0] = 1; + oqs_sidh_cln16_fpmul751_mont(ma, one, c); + oqs_sidh_cln16_fpcorrection751(c); +} + +static __inline unsigned int is_felm_zero(const oqs_sidh_cln16_felm_t x) { // Is x = 0? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise. + // SECURITY NOTE: This function does not run in constant-time. + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) { + if (x[i] != 0) + return false; + } + return true; +} + +static __inline unsigned int is_felm_even(const oqs_sidh_cln16_felm_t x) { // Is x even? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise. + return (unsigned int) ((x[0] & 1) ^ 1); +} + +static __inline unsigned int is_felm_lt(const oqs_sidh_cln16_felm_t x, const oqs_sidh_cln16_felm_t y) { // Is x < y? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise. + // SECURITY NOTE: This function does not run in constant-time. + int i; + + for (i = NWORDS_FIELD - 1; i >= 0; i--) { + if (x[i] < y[i]) { + return true; + } else if (x[i] > y[i]) { + return false; + } + } + return false; +} + +void oqs_sidh_cln16_copy_words(const digit_t *a, digit_t *c, const unsigned int nwords) { // Copy wordsize digits, c = a, where lng(a) = nwords. + unsigned int i; + + for (i = 0; i < nwords; i++) { + c[i] = a[i]; + } +} + +__inline unsigned int oqs_sidh_cln16_mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { // Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit. + unsigned int i, borrow = 0; + + for (i = 0; i < nwords; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + return borrow; +} + +__inline unsigned int oqs_sidh_cln16_mp_add(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { // Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit. + unsigned int i, carry = 0; + + for (i = 0; i < nwords; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + return carry; +} + +__inline void oqs_sidh_cln16_mp_add751(const digit_t *a, const digit_t *b, digit_t *c) { // 751-bit multiprecision addition, c = a+b. + +#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) + + oqs_sidh_cln16_mp_add(a, b, c, NWORDS_FIELD); + +#elif (OS_TARGET == OS_LINUX) + + oqs_sidh_cln16_mp_add751_asm(a, b, c); + +#endif +} + +__inline void oqs_sidh_cln16_mp_add751x2(const digit_t *a, const digit_t *b, digit_t *c) { // 2x751-bit multiprecision addition, c = a+b. + +#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) + + oqs_sidh_cln16_mp_add(a, b, c, 2 * NWORDS_FIELD); + +#elif (OS_TARGET == OS_LINUX) + + oqs_sidh_cln16_mp_add751x2_asm(a, b, c); + +#endif +} + +void oqs_sidh_cln16_mp_shiftr1(digit_t *x, const unsigned int nwords) { // Multiprecision right shift by one. + unsigned int i; + + for (i = 0; i < nwords - 1; i++) { + SHIFTR(x[i + 1], x[i], 1, x[i], RADIX); + } + x[nwords - 1] >>= 1; +} + +void oqs_sidh_cln16_mp_shiftl1(digit_t *x, const unsigned int nwords) { // Multiprecision left shift by one. + int i; + + for (i = nwords - 1; i > 0; i--) { + SHIFTL(x[i], x[i - 1], 1, x[i], RADIX); + } + x[0] <<= 1; +} + +void oqs_sidh_cln16_fpmul751_mont(const oqs_sidh_cln16_felm_t ma, const oqs_sidh_cln16_felm_t mb, oqs_sidh_cln16_felm_t mc) { // 751-bit Comba multi-precision multiplication, c = a*b mod p751. + oqs_sidh_cln16_dfelm_t temp = {0}; + + oqs_sidh_cln16_mp_mul(ma, mb, temp, NWORDS_FIELD); + oqs_sidh_cln16_rdc_mont(temp, mc); +} + +void oqs_sidh_cln16_fpsqr751_mont(const oqs_sidh_cln16_felm_t ma, oqs_sidh_cln16_felm_t mc) { // 751-bit Comba multi-precision squaring, c = a^2 mod p751. + oqs_sidh_cln16_dfelm_t temp = {0}; + + oqs_sidh_cln16_mp_mul(ma, ma, temp, NWORDS_FIELD); + oqs_sidh_cln16_rdc_mont(temp, mc); +} + +void oqs_sidh_cln16_fpinv751_chain_mont(oqs_sidh_cln16_felm_t a) { // Chain to compute a^(p751-3)/4 using Montgomery arithmetic. + oqs_sidh_cln16_felm_t t[27], tt; + unsigned int i, j; + + // Precomputed table + oqs_sidh_cln16_fpsqr751_mont(a, tt); + oqs_sidh_cln16_fpmul751_mont(a, tt, t[0]); + oqs_sidh_cln16_fpmul751_mont(t[0], tt, t[1]); + oqs_sidh_cln16_fpmul751_mont(t[1], tt, t[2]); + oqs_sidh_cln16_fpmul751_mont(t[2], tt, t[3]); + oqs_sidh_cln16_fpmul751_mont(t[3], tt, t[3]); + for (i = 3; i <= 8; i++) + oqs_sidh_cln16_fpmul751_mont(t[i], tt, t[i + 1]); + oqs_sidh_cln16_fpmul751_mont(t[9], tt, t[9]); + for (i = 9; i <= 20; i++) + oqs_sidh_cln16_fpmul751_mont(t[i], tt, t[i + 1]); + oqs_sidh_cln16_fpmul751_mont(t[21], tt, t[21]); + for (i = 21; i <= 24; i++) + oqs_sidh_cln16_fpmul751_mont(t[i], tt, t[i + 1]); + oqs_sidh_cln16_fpmul751_mont(t[25], tt, t[25]); + oqs_sidh_cln16_fpmul751_mont(t[25], tt, t[26]); + + oqs_sidh_cln16_fpcopy751(a, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[20], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[24], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[11], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[8], tt, tt); + for (i = 0; i < 8; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[23], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt); + for (i = 0; i < 9; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt); + for (i = 0; i < 10; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[15], tt, tt); + for (i = 0; i < 8; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[13], tt, tt); + for (i = 0; i < 8; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[26], tt, tt); + for (i = 0; i < 8; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[20], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[11], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[10], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[14], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[4], tt, tt); + for (i = 0; i < 10; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[18], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[1], tt, tt); + for (i = 0; i < 7; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[22], tt, tt); + for (i = 0; i < 10; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[6], tt, tt); + for (i = 0; i < 7; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[24], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[9], tt, tt); + for (i = 0; i < 8; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[18], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[17], tt, tt); + for (i = 0; i < 8; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(a, tt, tt); + for (i = 0; i < 10; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[16], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[7], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[0], tt, tt); + for (i = 0; i < 7; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[12], tt, tt); + for (i = 0; i < 7; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[19], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[22], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[25], tt, tt); + for (i = 0; i < 7; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[10], tt, tt); + for (i = 0; i < 7; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[22], tt, tt); + for (i = 0; i < 8; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[18], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[4], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[14], tt, tt); + for (i = 0; i < 7; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[13], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[5], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[23], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[21], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt); + for (i = 0; i < 7; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[23], tt, tt); + for (i = 0; i < 8; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[12], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[9], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[3], tt, tt); + for (i = 0; i < 7; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[13], tt, tt); + for (i = 0; i < 7; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[17], tt, tt); + for (i = 0; i < 8; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[26], tt, tt); + for (i = 0; i < 8; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[5], tt, tt); + for (i = 0; i < 8; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[8], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt); + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[11], tt, tt); + for (i = 0; i < 7; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[20], tt, tt); + for (j = 0; j < 61; j++) { + for (i = 0; i < 6; i++) + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(t[26], tt, tt); + } + oqs_sidh_cln16_fpcopy751(tt, a); +} + +void oqs_sidh_cln16_fpinv751_mont(oqs_sidh_cln16_felm_t a) { // Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p751. + oqs_sidh_cln16_felm_t tt; + + oqs_sidh_cln16_fpcopy751(a, tt); + oqs_sidh_cln16_fpinv751_chain_mont(tt); + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + oqs_sidh_cln16_fpmul751_mont(a, tt, a); +} + +static __inline void power2_setup(digit_t *x, int mark, const unsigned int nwords) { // Set up the value 2^mark. + unsigned int i; + + for (i = 0; i < nwords; i++) + x[i] = 0; + + i = 0; + while (mark >= 0) { + if (mark < RADIX) { + x[i] = (digit_t) 1 << mark; + } + mark -= RADIX; + i += 1; + } +} + +static __inline void fpinv751_mont_bingcd_partial(const oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t x1, unsigned int *k) { // Partial Montgomery inversion in GF(p751) via the binary GCD algorithm. + oqs_sidh_cln16_felm_t u, v, x2; + unsigned int cwords; // number of words necessary for x1, x2 + + oqs_sidh_cln16_fpcopy751(a, u); + oqs_sidh_cln16_fpcopy751((digit_t *) &p751, v); + oqs_sidh_cln16_fpzero751(x1); + x1[0] = 1; + oqs_sidh_cln16_fpzero751(x2); + *k = 0; + + while (!is_felm_zero(v)) { + cwords = ((*k + 1) / RADIX) + 1; + if ((cwords < NWORDS_FIELD)) { + if (is_felm_even(v)) { + oqs_sidh_cln16_mp_shiftr1(v, NWORDS_FIELD); + oqs_sidh_cln16_mp_shiftl1(x1, cwords); + } else if (is_felm_even(u)) { + oqs_sidh_cln16_mp_shiftr1(u, NWORDS_FIELD); + oqs_sidh_cln16_mp_shiftl1(x2, cwords); + } else if (!is_felm_lt(v, u)) { + oqs_sidh_cln16_mp_sub(v, u, v, NWORDS_FIELD); + oqs_sidh_cln16_mp_shiftr1(v, NWORDS_FIELD); + oqs_sidh_cln16_mp_add(x1, x2, x2, cwords); + oqs_sidh_cln16_mp_shiftl1(x1, cwords); + } else { + oqs_sidh_cln16_mp_sub(u, v, u, NWORDS_FIELD); + oqs_sidh_cln16_mp_shiftr1(u, NWORDS_FIELD); + oqs_sidh_cln16_mp_add(x1, x2, x1, cwords); + oqs_sidh_cln16_mp_shiftl1(x2, cwords); + } + } else { + if (is_felm_even(v)) { + oqs_sidh_cln16_mp_shiftr1(v, NWORDS_FIELD); + oqs_sidh_cln16_mp_shiftl1(x1, NWORDS_FIELD); + } else if (is_felm_even(u)) { + oqs_sidh_cln16_mp_shiftr1(u, NWORDS_FIELD); + oqs_sidh_cln16_mp_shiftl1(x2, NWORDS_FIELD); + } else if (!is_felm_lt(v, u)) { + oqs_sidh_cln16_mp_sub(v, u, v, NWORDS_FIELD); + oqs_sidh_cln16_mp_shiftr1(v, NWORDS_FIELD); + oqs_sidh_cln16_mp_add751(x1, x2, x2); + oqs_sidh_cln16_mp_shiftl1(x1, NWORDS_FIELD); + } else { + oqs_sidh_cln16_mp_sub(u, v, u, NWORDS_FIELD); + oqs_sidh_cln16_mp_shiftr1(u, NWORDS_FIELD); + oqs_sidh_cln16_mp_add751(x1, x2, x1); + oqs_sidh_cln16_mp_shiftl1(x2, NWORDS_FIELD); + } + } + *k += 1; + } + + if (is_felm_lt((digit_t *) &p751, x1)) { + oqs_sidh_cln16_mp_sub(x1, (digit_t *) &p751, x1, NWORDS_FIELD); + } +} + +void oqs_sidh_cln16_fpinv751_mont_bingcd(oqs_sidh_cln16_felm_t a) { // Field inversion via the binary GCD using Montgomery arithmetic, a = a^-1*R mod p751. + // SECURITY NOTE: This function does not run in constant-time. + oqs_sidh_cln16_felm_t x, t; + unsigned int k; + + fpinv751_mont_bingcd_partial(a, x, &k); + if (k < 768) { + oqs_sidh_cln16_fpmul751_mont(x, (digit_t *) &Montgomery_R2, x); + k += 768; + } + oqs_sidh_cln16_fpmul751_mont(x, (digit_t *) &Montgomery_R2, x); + power2_setup(t, 2 * 768 - k, NWORDS_FIELD); + oqs_sidh_cln16_fpmul751_mont(x, t, a); +} + +/***********************************************/ +/************* GF(p^2) FUNCTIONS ***************/ + +void oqs_sidh_cln16_fp2copy751(const oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c) { // Copy a GF(p751^2) element, c = a. + oqs_sidh_cln16_fpcopy751(a[0], c[0]); + oqs_sidh_cln16_fpcopy751(a[1], c[1]); +} + +void oqs_sidh_cln16_fp2zero751(oqs_sidh_cln16_f2elm_t a) { // Zero a GF(p751^2) element, a = 0. + oqs_sidh_cln16_fpzero751(a[0]); + oqs_sidh_cln16_fpzero751(a[1]); +} + +void oqs_sidh_cln16_fp2neg751(oqs_sidh_cln16_f2elm_t a) { // GF(p751^2) negation, a = -a in GF(p751^2). + oqs_sidh_cln16_fpneg751(a[0]); + oqs_sidh_cln16_fpneg751(a[1]); +} + +__inline void oqs_sidh_cln16_fp2add751(const oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c) { // GF(p751^2) addition, c = a+b in GF(p751^2). + oqs_sidh_cln16_fpadd751(a[0], b[0], c[0]); + oqs_sidh_cln16_fpadd751(a[1], b[1], c[1]); +} + +__inline void oqs_sidh_cln16_fp2sub751(const oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c) { // GF(p751^2) subtraction, c = a-b in GF(p751^2). + oqs_sidh_cln16_fpsub751(a[0], b[0], c[0]); + oqs_sidh_cln16_fpsub751(a[1], b[1], c[1]); +} + +void oqs_sidh_cln16_fp2div2_751(const oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c) { // GF(p751^2) division by two, c = a/2 in GF(p751^2). + oqs_sidh_cln16_fpdiv2_751(a[0], c[0]); + oqs_sidh_cln16_fpdiv2_751(a[1], c[1]); +} + +void oqs_sidh_cln16_fp2correction751(oqs_sidh_cln16_f2elm_t a) { // Modular correction, a = a in GF(p751^2). + oqs_sidh_cln16_fpcorrection751(a[0]); + oqs_sidh_cln16_fpcorrection751(a[1]); +} + +void oqs_sidh_cln16_fp2sqr751_mont(const oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c) { // GF(p751^2) squaring using Montgomery arithmetic, c = a^2 in GF(p751^2). + // Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p751-1] + // Output: c = c0+c1*i, where c0, c1 are in [0, 2*p751-1] + oqs_sidh_cln16_felm_t t1, t2, t3; + + oqs_sidh_cln16_mp_add751(a[0], a[1], t1); // t1 = a0+a1 + oqs_sidh_cln16_fpsub751(a[0], a[1], t2); // t2 = a0-a1 + oqs_sidh_cln16_mp_add751(a[0], a[0], t3); // t3 = 2a0 + oqs_sidh_cln16_fpmul751_mont(t1, t2, c[0]); // c0 = (a0+a1)(a0-a1) + oqs_sidh_cln16_fpmul751_mont(t3, a[1], c[1]); // c1 = 2a0*a1 +} + +void oqs_sidh_cln16_fp2mul751_mont(const oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c) { // GF(p751^2) multiplication using Montgomery arithmetic, c = a*b in GF(p751^2). + // Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p751-1] + // Output: c = c0+c1*i, where c0, c1 are in [0, 2*p751-1] + oqs_sidh_cln16_felm_t t1, t2; + oqs_sidh_cln16_dfelm_t tt1, tt2, tt3; + digit_t mask; + unsigned int i, borrow; + + oqs_sidh_cln16_mp_mul(a[0], b[0], tt1, NWORDS_FIELD); // tt1 = a0*b0 + oqs_sidh_cln16_mp_mul(a[1], b[1], tt2, NWORDS_FIELD); // tt2 = a1*b1 + oqs_sidh_cln16_mp_add751(a[0], a[1], t1); // t1 = a0+a1 + oqs_sidh_cln16_mp_add751(b[0], b[1], t2); // t2 = b0+b1 + borrow = oqs_sidh_cln16_mp_sub(tt1, tt2, tt3, 2 * NWORDS_FIELD); // tt3 = a0*b0 - a1*b1 + mask = 0 - (digit_t) borrow; // if tt3 < 0 then mask = 0xFF..F, else if tt3 >= 0 then mask = 0x00..0 + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, tt3[NWORDS_FIELD + i], ((digit_t *) p751)[i] & mask, borrow, tt3[NWORDS_FIELD + i]); + } + oqs_sidh_cln16_rdc_mont(tt3, c[0]); // c[0] = a0*b0 - a1*b1 + oqs_sidh_cln16_mp_add751x2(tt1, tt2, tt1); // tt1 = a0*b0 + a1*b1 + oqs_sidh_cln16_mp_mul(t1, t2, tt2, NWORDS_FIELD); // tt2 = (a0+a1)*(b0+b1) + oqs_sidh_cln16_mp_sub(tt2, tt1, tt2, 2 * NWORDS_FIELD); // tt2 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 + oqs_sidh_cln16_rdc_mont(tt2, c[1]); // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 +} + +void oqs_sidh_cln16_to_fp2mont(const oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t mc) { // Conversion of a GF(p751^2) element to Montgomery representation, + // mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p751^2). + + oqs_sidh_cln16_to_mont(a[0], mc[0]); + oqs_sidh_cln16_to_mont(a[1], mc[1]); +} + +void oqs_sidh_cln16_from_fp2mont(const oqs_sidh_cln16_f2elm_t ma, oqs_sidh_cln16_f2elm_t c) { // Conversion of a GF(p751^2) element from Montgomery representation to standard representation, + // c_i = ma_i*R^(-1) = a_i in GF(p751^2). + + oqs_sidh_cln16_from_mont(ma[0], c[0]); + oqs_sidh_cln16_from_mont(ma[1], c[1]); +} + +void oqs_sidh_cln16_fp2inv751_mont(oqs_sidh_cln16_f2elm_t a) { // GF(p751^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2). + oqs_sidh_cln16_f2elm_t t1; + + oqs_sidh_cln16_fpsqr751_mont(a[0], t1[0]); // t10 = a0^2 + oqs_sidh_cln16_fpsqr751_mont(a[1], t1[1]); // t11 = a1^2 + oqs_sidh_cln16_fpadd751(t1[0], t1[1], t1[0]); // t10 = a0^2+a1^2 + oqs_sidh_cln16_fpinv751_mont(t1[0]); // t10 = (a0^2+a1^2)^-1 + oqs_sidh_cln16_fpneg751(a[1]); // a = a0-i*a1 + oqs_sidh_cln16_fpmul751_mont(a[0], t1[0], a[0]); + oqs_sidh_cln16_fpmul751_mont(a[1], t1[0], a[1]); // a = (a0-i*a1)*(a0^2+a1^2)^-1 +} + +void oqs_sidh_cln16_fp2inv751_mont_bingcd(oqs_sidh_cln16_f2elm_t a) { // GF(p751^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) + // This uses the binary GCD for inversion in fp and is NOT constant time!!! + oqs_sidh_cln16_f2elm_t t1; + + oqs_sidh_cln16_fpsqr751_mont(a[0], t1[0]); // t10 = a0^2 + oqs_sidh_cln16_fpsqr751_mont(a[1], t1[1]); // t11 = a1^2 + oqs_sidh_cln16_fpadd751(t1[0], t1[1], t1[0]); // t10 = a0^2+a1^2 + oqs_sidh_cln16_fpinv751_mont_bingcd(t1[0]); // t10 = (a0^2+a1^2)^-1 + oqs_sidh_cln16_fpneg751(a[1]); // a = a0-i*a1 + oqs_sidh_cln16_fpmul751_mont(a[0], t1[0], a[0]); + oqs_sidh_cln16_fpmul751_mont(a[1], t1[0], a[1]); // a = (a0-i*a1)*(a0^2+a1^2)^-1 +} + +void oqs_sidh_cln16_swap_points_basefield(oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, const digit_t option) { // Swap points over the base field. + // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P + digit_t temp; + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) { + temp = option & (P->X[i] ^ Q->X[i]); + P->X[i] = temp ^ P->X[i]; + Q->X[i] = temp ^ Q->X[i]; + temp = option & (P->Z[i] ^ Q->Z[i]); + P->Z[i] = temp ^ P->Z[i]; + Q->Z[i] = temp ^ Q->Z[i]; + } +} + +void oqs_sidh_cln16_swap_points(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const digit_t option) { // Swap points. + // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P + digit_t temp; + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) { + temp = option & (P->X[0][i] ^ Q->X[0][i]); + P->X[0][i] = temp ^ P->X[0][i]; + Q->X[0][i] = temp ^ Q->X[0][i]; + temp = option & (P->Z[0][i] ^ Q->Z[0][i]); + P->Z[0][i] = temp ^ P->Z[0][i]; + Q->Z[0][i] = temp ^ Q->Z[0][i]; + temp = option & (P->X[1][i] ^ Q->X[1][i]); + P->X[1][i] = temp ^ P->X[1][i]; + Q->X[1][i] = temp ^ Q->X[1][i]; + temp = option & (P->Z[1][i] ^ Q->Z[1][i]); + P->Z[1][i] = temp ^ P->Z[1][i]; + Q->Z[1][i] = temp ^ Q->Z[1][i]; + } +} + +void oqs_sidh_cln16_select_f2elm(const oqs_sidh_cln16_f2elm_t x, const oqs_sidh_cln16_f2elm_t y, oqs_sidh_cln16_f2elm_t z, const digit_t option) { // Select either x or y depending on the value of option. + // If option = 0 then z <- x, else if option = 0xFF...FF then z <- y. + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) { + z[0][i] = (option & (x[0][i] ^ y[0][i])) ^ x[0][i]; + z[1][i] = (option & (x[1][i] ^ y[1][i])) ^ x[1][i]; + } +} + +void oqs_sidh_cln16_mont_n_way_inv(const oqs_sidh_cln16_f2elm_t *vec, const int n, oqs_sidh_cln16_f2elm_t *out) { // n-way simultaneous inversion using Montgomery's trick. + // SECURITY NOTE: This function does not run in constant time. + // Also, vec and out CANNOT be the same variable! + oqs_sidh_cln16_f2elm_t t1; + int i; + + oqs_sidh_cln16_fp2copy751(vec[0], out[0]); // out[0] = vec[0] + for (i = 1; i < n; i++) { + oqs_sidh_cln16_fp2mul751_mont(out[i - 1], vec[i], out[i]); // out[i] = out[i-1]*vec[i] + } + + oqs_sidh_cln16_fp2copy751(out[n - 1], t1); // t1 = 1/out[n-1] + oqs_sidh_cln16_fp2inv751_mont_bingcd(t1); + + for (i = n - 1; i >= 1; i--) { + oqs_sidh_cln16_fp2mul751_mont(out[i - 1], t1, out[i]); // out[i] = t1*out[i-1] + oqs_sidh_cln16_fp2mul751_mont(t1, vec[i], t1); // t1 = t1*vec[i] + } + oqs_sidh_cln16_fp2copy751(t1, out[0]); // out[0] = t1 +} + +void oqs_sidh_cln16_sqrt_Fp2_frac(const oqs_sidh_cln16_f2elm_t u, const oqs_sidh_cln16_f2elm_t v, oqs_sidh_cln16_f2elm_t y) { // Computes square roots of elements in (Fp2)^2 using Hamburg's trick. + oqs_sidh_cln16_felm_t t0, t1, t2, t3, t4, t; + digit_t *u0 = (digit_t *) u[0], *u1 = (digit_t *) u[1]; + digit_t *v0 = (digit_t *) v[0], *v1 = (digit_t *) v[1]; + digit_t *y0 = (digit_t *) y[0], *y1 = (digit_t *) y[1]; + unsigned int i; + + oqs_sidh_cln16_fpsqr751_mont(v0, t0); // t0 = v0^2 + oqs_sidh_cln16_fpsqr751_mont(v1, t1); // t1 = v1^2 + oqs_sidh_cln16_fpadd751(t0, t1, t0); // t0 = t0+t1 + oqs_sidh_cln16_fpmul751_mont(u0, v0, t1); // t1 = u0*v0 + oqs_sidh_cln16_fpmul751_mont(u1, v1, t2); // t2 = u1*v1 + oqs_sidh_cln16_fpadd751(t1, t2, t1); // t1 = t1+t2 + oqs_sidh_cln16_fpmul751_mont(u1, v0, t2); // t2 = u1*v0 + oqs_sidh_cln16_fpmul751_mont(u0, v1, t3); // t3 = u0*v1 + oqs_sidh_cln16_fpsub751(t2, t3, t2); // t2 = t2-t3 + oqs_sidh_cln16_fpsqr751_mont(t1, t3); // t3 = t1^2 + oqs_sidh_cln16_fpsqr751_mont(t2, t4); // t4 = t2^2 + oqs_sidh_cln16_fpadd751(t3, t4, t3); // t3 = t3+t4 + oqs_sidh_cln16_fpcopy751(t3, t); + for (i = 0; i < 370; i++) { // t = t3^((p+1)/4) + oqs_sidh_cln16_fpsqr751_mont(t, t); + } + for (i = 0; i < 239; i++) { + oqs_sidh_cln16_fpsqr751_mont(t, t3); + oqs_sidh_cln16_fpmul751_mont(t, t3, t); + } + oqs_sidh_cln16_fpadd751(t1, t, t); // t = t+t1 + oqs_sidh_cln16_fpadd751(t, t, t); // t = 2*t + oqs_sidh_cln16_fpsqr751_mont(t0, t3); // t3 = t0^2 + oqs_sidh_cln16_fpmul751_mont(t0, t3, t3); // t3 = t3*t0 + oqs_sidh_cln16_fpmul751_mont(t, t3, t3); // t3 = t3*t + oqs_sidh_cln16_fpinv751_chain_mont(t3); // t3 = t3^((p-3)/4) + oqs_sidh_cln16_fpmul751_mont(t0, t3, t3); // t3 = t3*t0 + oqs_sidh_cln16_fpmul751_mont(t, t3, t1); // t1 = t*t3 + oqs_sidh_cln16_fpdiv2_751(t1, y0); // y0 = t1/2 + oqs_sidh_cln16_fpmul751_mont(t2, t3, y1); // y1 = t3*t2 + oqs_sidh_cln16_fpsqr751_mont(t1, t1); // t1 = t1^2 + oqs_sidh_cln16_fpmul751_mont(t0, t1, t1); // t1 = t1*t0 + oqs_sidh_cln16_fpcorrection751(t); + oqs_sidh_cln16_fpcorrection751(t1); + + if (oqs_sidh_cln16_fpequal751_non_constant_time(t1, t) == false) { + oqs_sidh_cln16_fpcopy751(y0, t); + oqs_sidh_cln16_fpcopy751(y1, y0); // Swap y0 and y1 + oqs_sidh_cln16_fpcopy751(t, y1); + } + + oqs_sidh_cln16_fpsqr751_mont(y0, t0); // t0 = y0^2 + oqs_sidh_cln16_fpsqr751_mont(y1, t1); // t1 = y1^2 + oqs_sidh_cln16_fpsub751(t0, t1, t0); // t0 = t0-t1 + oqs_sidh_cln16_fpmul751_mont(t0, v0, t0); // t0 = t0*v0 + oqs_sidh_cln16_fpmul751_mont(y0, y1, t1); // t1 = y0*y1 + oqs_sidh_cln16_fpmul751_mont(v1, t1, t1); // t1 = t1*v1 + oqs_sidh_cln16_fpadd751(t1, t1, t1); // t1 = t1+t1 + oqs_sidh_cln16_fpsub751(t0, t1, t0); // t0 = t0-t1 + oqs_sidh_cln16_fpcorrection751(t0); + oqs_sidh_cln16_fpcorrection751(u0); + + if (oqs_sidh_cln16_fpequal751_non_constant_time(t0, u0) == false) { + oqs_sidh_cln16_fpneg751(y1); // y1 = -y1 + } +} + +void oqs_sidh_cln16_sqrt_Fp2(const oqs_sidh_cln16_f2elm_t u, oqs_sidh_cln16_f2elm_t y) { // Computes square roots of elements in (Fp2)^2 using Hamburg's trick. + oqs_sidh_cln16_felm_t t0, t1, t2, t3; + digit_t *a = (digit_t *) u[0], *b = (digit_t *) u[1]; + unsigned int i; + + oqs_sidh_cln16_fpsqr751_mont(a, t0); // t0 = a^2 + oqs_sidh_cln16_fpsqr751_mont(b, t1); // t1 = b^2 + oqs_sidh_cln16_fpadd751(t0, t1, t0); // t0 = t0+t1 + oqs_sidh_cln16_fpcopy751(t0, t1); + for (i = 0; i < 370; i++) { // t = t3^((p+1)/4) + oqs_sidh_cln16_fpsqr751_mont(t1, t1); + } + for (i = 0; i < 239; i++) { + oqs_sidh_cln16_fpsqr751_mont(t1, t0); + oqs_sidh_cln16_fpmul751_mont(t1, t0, t1); + } + oqs_sidh_cln16_fpadd751(a, t1, t0); // t0 = a+t1 + oqs_sidh_cln16_fpdiv2_751(t0, t0); // t0 = t0/2 + oqs_sidh_cln16_fpcopy751(t0, t2); + oqs_sidh_cln16_fpinv751_chain_mont(t2); // t2 = t0^((p-3)/4) + oqs_sidh_cln16_fpmul751_mont(t0, t2, t1); // t1 = t2*t0 + oqs_sidh_cln16_fpmul751_mont(t2, b, t2); // t2 = t2*b + oqs_sidh_cln16_fpdiv2_751(t2, t2); // t2 = t2/2 + oqs_sidh_cln16_fpsqr751_mont(t1, t3); // t3 = t1^2 + oqs_sidh_cln16_fpcorrection751(t0); + oqs_sidh_cln16_fpcorrection751(t3); + + if (oqs_sidh_cln16_fpequal751_non_constant_time(t0, t3) == true) { + oqs_sidh_cln16_fpcopy751(t1, y[0]); + oqs_sidh_cln16_fpcopy751(t2, y[1]); + } else { + oqs_sidh_cln16_fpneg751(t1); + oqs_sidh_cln16_fpcopy751(t2, y[0]); + oqs_sidh_cln16_fpcopy751(t1, y[1]); + } +} + +void oqs_sidh_cln16_cube_Fp2_cycl(oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_felm_t one) { // Cyclotomic cubing on elements of norm 1, using a^(p+1) = 1. + oqs_sidh_cln16_felm_t t0; + + oqs_sidh_cln16_fpadd751(a[0], a[0], t0); // t0 = a0 + a0 + oqs_sidh_cln16_fpsqr751_mont(t0, t0); // t0 = t0^2 + oqs_sidh_cln16_fpsub751(t0, one, t0); // t0 = t0 - 1 + oqs_sidh_cln16_fpmul751_mont(a[1], t0, a[1]); // a1 = t0*a1 + oqs_sidh_cln16_fpsub751(t0, one, t0); + oqs_sidh_cln16_fpsub751(t0, one, t0); // t0 = t0 - 2 + oqs_sidh_cln16_fpmul751_mont(a[0], t0, a[0]); // a0 = t0*a0 +} + +void oqs_sidh_cln16_sqr_Fp2_cycl(oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_felm_t one) { // Cyclotomic squaring on elements of norm 1, using a^(p+1) = 1. + oqs_sidh_cln16_felm_t t0; + + oqs_sidh_cln16_fpadd751(a[0], a[1], t0); // t0 = a0 + a1 + oqs_sidh_cln16_fpsqr751_mont(t0, t0); // t0 = t0^2 + oqs_sidh_cln16_fpsub751(t0, one, a[1]); // a1 = t0 - 1 + oqs_sidh_cln16_fpsqr751_mont(a[0], t0); // t0 = a0^2 + oqs_sidh_cln16_fpadd751(t0, t0, t0); // t0 = t0 + t0 + oqs_sidh_cln16_fpsub751(t0, one, a[0]); // a0 = t0 - 1 +} + +__inline void oqs_sidh_cln16_inv_Fp2_cycl(oqs_sidh_cln16_f2elm_t a) { // Cyclotomic inversion, a^(p+1) = 1 => a^(-1) = a^p = a0 - i*a1. + + oqs_sidh_cln16_fpneg751(a[1]); +} + +void oqs_sidh_cln16_exp6_Fp2_cycl(const oqs_sidh_cln16_f2elm_t y, const uint64_t t, const oqs_sidh_cln16_felm_t one, oqs_sidh_cln16_f2elm_t res) { // Exponentiation y^t via square and multiply in the cyclotomic group. Exponent t is 6 bits at most. + unsigned int i, bit; + + oqs_sidh_cln16_fp2zero751(res); + oqs_sidh_cln16_fpcopy751(one, res[0]); // res = 1 + + if (t != 0) { + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_sqr_Fp2_cycl(res, one); + bit = 1 & (t >> (5 - i)); + if (bit == 1) { + oqs_sidh_cln16_fp2mul751_mont(res, y, res); + } + } + } +} + +void oqs_sidh_cln16_exp21_Fp2_cycl(const oqs_sidh_cln16_f2elm_t y, const uint64_t t, const oqs_sidh_cln16_felm_t one, oqs_sidh_cln16_f2elm_t res) { // Exponentiation y^t via square and multiply in the cyclotomic group. Exponent t is 21 bits at most. + unsigned int i, bit; + + oqs_sidh_cln16_fp2zero751(res); + oqs_sidh_cln16_fpcopy751(one, res[0]); // res = 1 + + if (t != 0) { + for (i = 0; i < 21; i++) { + oqs_sidh_cln16_sqr_Fp2_cycl(res, one); + bit = 1 & (t >> (20 - i)); + if (bit == 1) { + oqs_sidh_cln16_fp2mul751_mont(res, y, res); + } + } + } +} + +static bool is_zero(digit_t *a, unsigned int nwords) { // Check if multiprecision element is zero. + // SECURITY NOTE: This function does not run in constant time. + unsigned int i; + + for (i = 0; i < nwords; i++) { + if (a[i] != 0) { + return false; + } + } + + return true; +} + +void oqs_sidh_cln16_exp_Fp2_cycl(const oqs_sidh_cln16_f2elm_t y, uint64_t *t, const oqs_sidh_cln16_felm_t one, oqs_sidh_cln16_f2elm_t res, int length) { // Exponentiation y^t via square and multiply in the cyclotomic group. + // This function uses 64-bit digits for representing exponents. + unsigned int nword, bit, nwords = (length + 63) / 64; + int i; + + oqs_sidh_cln16_fp2zero751(res); + oqs_sidh_cln16_fpcopy751(one, res[0]); // res = 1 + + if (!is_zero((digit_t *) t, nwords)) { // Is t = 0? + for (i = length; i >= 0; i--) { + oqs_sidh_cln16_sqr_Fp2_cycl(res, one); + nword = i >> 6; + bit = 1 & (t[nword] >> (i - (nword << 6))); + if (bit == 1) { + oqs_sidh_cln16_fp2mul751_mont(res, y, res); + } + } + } +} + +void oqs_sidh_cln16_exp84_Fp2_cycl(const oqs_sidh_cln16_f2elm_t y, uint64_t *t, const oqs_sidh_cln16_felm_t one, oqs_sidh_cln16_f2elm_t res) { // Exponentiation y^t via square and multiply in the cyclotomic group. Exponent t is 84 bits at most + // This function uses 64-bit digits for representing exponents. + unsigned int nword, bit, nwords = 2; + int i; + + oqs_sidh_cln16_fp2zero751(res); + oqs_sidh_cln16_fpcopy751(one, res[0]); // res = 1 + + if (!is_zero((digit_t *) t, nwords)) { // Is t = 0? + for (i = 83; i >= 0; i--) { + oqs_sidh_cln16_sqr_Fp2_cycl(res, one); + nword = i >> 6; + bit = 1 & (t[nword] >> (i - (nword << 6))); + if (bit == 1) { + oqs_sidh_cln16_fp2mul751_mont(res, y, res); + } + } + } +} + +bool oqs_sidh_cln16_is_cube_Fp2(oqs_sidh_cln16_f2elm_t u, PCurveIsogenyStruct CurveIsogeny) { // Check if a GF(p751^2) element is a cube. + oqs_sidh_cln16_f2elm_t v; + oqs_sidh_cln16_felm_t t0, zero = {0}, one = {0}; + unsigned int e; + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one); + oqs_sidh_cln16_fpsqr751_mont(u[0], v[0]); // v0 = u0^2 + oqs_sidh_cln16_fpsqr751_mont(u[1], v[1]); // v1 = u1^2 + oqs_sidh_cln16_fpadd751(v[0], v[1], t0); // t0 = v0+v1 + oqs_sidh_cln16_fpinv751_mont_bingcd(t0); // Fp inversion with binary Euclid + oqs_sidh_cln16_fpsub751(v[0], v[1], v[0]); // v0 = v0-v1 + oqs_sidh_cln16_fpmul751_mont(u[0], u[1], v[1]); // v1 = u0*u1 + oqs_sidh_cln16_fpadd751(v[1], v[1], v[1]); // v1 = 2*v1 + oqs_sidh_cln16_fpneg751(v[1]); // v1 = -v1 + oqs_sidh_cln16_fpmul751_mont(v[0], t0, v[0]); // v0 = v0*t0 + oqs_sidh_cln16_fpmul751_mont(v[1], t0, v[1]); // v1 = v1*t0 + + for (e = 0; e < 372; e++) { + oqs_sidh_cln16_sqr_Fp2_cycl(v, one); + } + + for (e = 0; e < 238; e++) { + oqs_sidh_cln16_cube_Fp2_cycl(v, one); + } + + oqs_sidh_cln16_fp2correction751(v); + + if (oqs_sidh_cln16_fpequal751_non_constant_time(v[0], one) == true && oqs_sidh_cln16_fpequal751_non_constant_time(v[1], zero) == true) { // v == 1? + return true; + } else { + return false; + } +} + +void oqs_sidh_cln16_multiply(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. + // NOTE: a and c CANNOT be the same variable! + unsigned int i, j; + digit_t t = 0, u = 0, v = 0, UV[2]; + unsigned int carry = 0; + + for (i = 0; i < nwords; i++) { + for (j = 0; j <= i; j++) { + MUL(a[j], b[i - j], UV + 1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = nwords; i < 2 * nwords - 1; i++) { + for (j = i - nwords + 1; j < nwords; j++) { + MUL(a[j], b[i - j], UV + 1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + c[2 * nwords - 1] = v; +} + +void oqs_sidh_cln16_Montgomery_multiply_mod_order(const digit_t *ma, const digit_t *mb, digit_t *mc, const digit_t *order, const digit_t *Montgomery_rprime) { // Montgomery multiplication modulo the group order, mc = ma*mb*r' mod order, where ma,mb,mc in [0, order-1]. + // ma, mb and mc are assumed to be in Montgomery representation. + // The Montgomery constant r' = -r^(-1) mod 2^(log_2(r)) is the value "Montgomery_rprime", where r is the order. + unsigned int i, cout = 0, bout = 0; + digit_t mask, P[2 * SIDH_NWORDS_ORDER], Q[2 * SIDH_NWORDS_ORDER], temp[2 * SIDH_NWORDS_ORDER]; + + oqs_sidh_cln16_multiply(ma, mb, P, SIDH_NWORDS_ORDER); // P = ma * mb + oqs_sidh_cln16_multiply(P, Montgomery_rprime, Q, SIDH_NWORDS_ORDER); // Q = P * r' mod 2^(log_2(r)) + oqs_sidh_cln16_multiply(Q, order, temp, SIDH_NWORDS_ORDER); // temp = Q * r + cout = oqs_sidh_cln16_mp_add(P, temp, temp, 2 * SIDH_NWORDS_ORDER); // (cout, temp) = P + Q * r + + for (i = 0; i < SIDH_NWORDS_ORDER; i++) { // (cout, mc) = (P + Q * r)/2^(log_2(r)) + mc[i] = temp[SIDH_NWORDS_ORDER + i]; + } + + // Final, constant-time subtraction + bout = oqs_sidh_cln16_mp_sub(mc, order, mc, SIDH_NWORDS_ORDER); // (cout, mc) = (cout, mc) - r + mask = (digit_t) cout - (digit_t) bout; // if (cout, mc) >= 0 then mask = 0x00..0, else if (cout, mc) < 0 then mask = 0xFF..F + + for (i = 0; i < SIDH_NWORDS_ORDER; i++) { // temp = mask & r + temp[i] = (order[i] & mask); + } + oqs_sidh_cln16_mp_add(mc, temp, mc, SIDH_NWORDS_ORDER); // mc = mc + (mask & r) +} + +void oqs_sidh_cln16_Montgomery_inversion_mod_order(const digit_t *ma, digit_t *mc, const digit_t *order, const digit_t *Montgomery_rprime) { // (Non-constant time) Montgomery inversion modulo the curve order using a^(-1) = a^(order-2) mod order + // This function uses the sliding-window method. + sdigit_t i = 384; + unsigned int j, nwords = SIDH_NWORDS_ORDER, nbytes = (unsigned int) i / 8; + digit_t temp, bit = 0, count, mod2, k_EXPON = 5; // Fixing parameter k to 5 for the sliding windows method + digit_t modulus2[SIDH_NWORDS_ORDER] = {0}, npoints = 16; + digit_t input_a[SIDH_NWORDS_ORDER]; + digit_t table[16][SIDH_NWORDS_ORDER]; // Fixing the number of precomputed elements to 16 (assuming k = 5) + digit_t mask = (digit_t) 1 << (sizeof(digit_t) * 8 - 1); // 0x800...000 + digit_t mask2 = ~((digit_t)(-1) >> k_EXPON); // 0xF800...000, assuming k = 5 + + // SECURITY NOTE: this function does not run in constant time. + + modulus2[0] = 2; + oqs_sidh_cln16_mp_sub(order, modulus2, modulus2, nwords); // modulus-2 + + // Precomputation stage + memmove((unsigned char *) &table[0], (unsigned char *) ma, nbytes); // table[0] = ma + oqs_sidh_cln16_Montgomery_multiply_mod_order(ma, ma, input_a, order, Montgomery_rprime); // ma^2 + for (j = 0; j < npoints - 1; j++) { + oqs_sidh_cln16_Montgomery_multiply_mod_order(table[j], input_a, table[j + 1], order, Montgomery_rprime); // table[j+1] = table[j] * ma^2 + } + + while (bit != 1) { // Shift (modulus-2) to the left until getting first bit 1 + i--; + temp = 0; + for (j = 0; j < nwords; j++) { + bit = (modulus2[j] & mask) >> (sizeof(digit_t) * 8 - 1); + modulus2[j] = (modulus2[j] << 1) | temp; + temp = bit; + } + } + + // Evaluation stage + memmove((unsigned char *) mc, (unsigned char *) ma, nbytes); + bit = (modulus2[nwords - 1] & mask) >> (sizeof(digit_t) * 8 - 1); + while (i > 0) { + if (bit == 0) { // Square accumulated value because bit = 0 and shift (modulus-2) one bit to the left + oqs_sidh_cln16_Montgomery_multiply_mod_order(mc, mc, mc, order, Montgomery_rprime); // mc = mc^2 + i--; + for (j = (nwords - 1); j > 0; j--) { + SHIFTL(modulus2[j], modulus2[j - 1], 1, modulus2[j], RADIX); + } + modulus2[0] = modulus2[0] << 1; + } else { // "temp" will store the longest odd bitstring with "count" bits s.t. temp <= 2^k - 1 + count = k_EXPON; + temp = (modulus2[nwords - 1] & mask2) >> (sizeof(digit_t) * 8 - k_EXPON); // Extracting next k bits to the left + mod2 = temp & 1; + while (mod2 == 0) { // if even then shift to the right and adjust count + temp = (temp >> 1); + mod2 = temp & 1; + count--; + } + for (j = 0; j < count; j++) { // mc = mc^count + oqs_sidh_cln16_Montgomery_multiply_mod_order(mc, mc, mc, order, Montgomery_rprime); + } + oqs_sidh_cln16_Montgomery_multiply_mod_order(mc, table[(temp - 1) >> 1], mc, order, Montgomery_rprime); // mc = mc * table[(temp-1)/2] + i = i - count; + + for (j = (nwords - 1); j > 0; j--) { // Shift (modulus-2) "count" bits to the left + SHIFTL(modulus2[j], modulus2[j - 1], count, modulus2[j], RADIX); + } + modulus2[0] = modulus2[0] << count; + } + bit = (modulus2[nwords - 1] & mask) >> (sizeof(digit_t) * 8 - 1); + } +} + +static __inline unsigned int is_zero_mod_order(const digit_t *x) { // Is x = 0? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise + // SECURITY NOTE: This function does not run in constant time. + unsigned int i; + + for (i = 0; i < SIDH_NWORDS_ORDER; i++) { + if (x[i] != 0) + return false; + } + return true; +} + +static __inline unsigned int is_even_mod_order(const digit_t *x) { // Is x even? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise. + return (unsigned int) ((x[0] & 1) ^ 1); +} + +static __inline unsigned int is_lt_mod_order(const digit_t *x, const digit_t *y) { // Is x < y? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise. + // SECURITY NOTE: This function does not run in constant time. + int i; + + for (i = SIDH_NWORDS_ORDER - 1; i >= 0; i--) { + if (x[i] < y[i]) { + return true; + } else if (x[i] > y[i]) { + return false; + } + } + return false; +} + +static __inline void Montgomery_inversion_mod_order_bingcd_partial(const digit_t *a, digit_t *x1, unsigned int *k, const digit_t *order) { // Partial Montgomery inversion modulo order. + digit_t u[SIDH_NWORDS_ORDER], v[SIDH_NWORDS_ORDER], x2[SIDH_NWORDS_ORDER] = {0}; + unsigned int cwords; // number of words necessary for x1, x2 + + oqs_sidh_cln16_copy_words(a, u, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_copy_words(order, v, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_copy_words(x2, x1, SIDH_NWORDS_ORDER); + x1[0] = 1; + *k = 0; + + while (!is_zero_mod_order(v)) { + cwords = ((*k + 1) / RADIX) + 1; + if ((cwords < SIDH_NWORDS_ORDER)) { + if (is_even_mod_order(v)) { + oqs_sidh_cln16_mp_shiftr1(v, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_shiftl1(x1, cwords); + } else if (is_even_mod_order(u)) { + oqs_sidh_cln16_mp_shiftr1(u, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_shiftl1(x2, cwords); + } else if (!is_lt_mod_order(v, u)) { + oqs_sidh_cln16_mp_sub(v, u, v, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_shiftr1(v, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_add(x1, x2, x2, cwords); + oqs_sidh_cln16_mp_shiftl1(x1, cwords); + } else { + oqs_sidh_cln16_mp_sub(u, v, u, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_shiftr1(u, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_add(x1, x2, x1, cwords); + oqs_sidh_cln16_mp_shiftl1(x2, cwords); + } + } else { + if (is_even_mod_order(v)) { + oqs_sidh_cln16_mp_shiftr1(v, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_shiftl1(x1, SIDH_NWORDS_ORDER); + } else if (is_even_mod_order(u)) { + oqs_sidh_cln16_mp_shiftr1(u, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_shiftl1(x2, SIDH_NWORDS_ORDER); + } else if (!is_lt_mod_order(v, u)) { + oqs_sidh_cln16_mp_sub(v, u, v, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_shiftr1(v, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_add(x1, x2, x2, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_shiftl1(x1, SIDH_NWORDS_ORDER); + } else { + oqs_sidh_cln16_mp_sub(u, v, u, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_shiftr1(u, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_add(x1, x2, x1, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_shiftl1(x2, SIDH_NWORDS_ORDER); + } + } + *k += 1; + } + + if (is_lt_mod_order(order, x1)) { + oqs_sidh_cln16_mp_sub(x1, order, x1, SIDH_NWORDS_ORDER); + } +} + +void oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd(const digit_t *a, digit_t *c, const digit_t *order, const digit_t *Montgomery_rprime, const digit_t *Montgomery_Rprime) { // Montgomery inversion modulo order, a = a^(-1)*R mod order. + digit_t x[SIDH_NWORDS_ORDER], t[SIDH_NWORDS_ORDER]; + unsigned int k; + + Montgomery_inversion_mod_order_bingcd_partial(a, x, &k, order); + if (k < 384) { + oqs_sidh_cln16_Montgomery_multiply_mod_order(x, Montgomery_Rprime, x, order, Montgomery_rprime); + k += 384; + } + oqs_sidh_cln16_Montgomery_multiply_mod_order(x, Montgomery_Rprime, x, order, Montgomery_rprime); + power2_setup(t, 2 * 384 - k, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_Montgomery_multiply_mod_order(x, t, c, order, Montgomery_rprime); +} + +void oqs_sidh_cln16_to_Montgomery_mod_order(const digit_t *a, digit_t *mc, const digit_t *order, const digit_t *Montgomery_rprime, const digit_t *Montgomery_Rprime) { // Conversion of elements in Z_r to Montgomery representation, where the order r is up to 384 bits. + + oqs_sidh_cln16_Montgomery_multiply_mod_order(a, Montgomery_Rprime, mc, order, Montgomery_rprime); +} + +void oqs_sidh_cln16_from_Montgomery_mod_order(const digit_t *ma, digit_t *c, const digit_t *order, const digit_t *Montgomery_rprime) { // Conversion of elements in Z_r from Montgomery to standard representation, where the order is up to 384 bits. + digit_t one[SIDH_NWORDS_ORDER] = {0}; + one[0] = 1; + + oqs_sidh_cln16_Montgomery_multiply_mod_order(ma, one, c, order, Montgomery_rprime); +} + +void oqs_sidh_cln16_inv_mod_orderA(const digit_t *a, digit_t *c) { // Inversion modulo an even integer of the form 2^m. + // Algorithm 3: Explicit Quadratic Modular inverse modulo 2^m from Dumas '12: http://arxiv.org/pdf/1209.6626.pdf + // NOTE: This function is hardwired for the current parameters using 2^372. + unsigned int i, f, s = 0; + digit_t am1[SIDH_NWORDS_ORDER] = {0}; + digit_t tmp1[SIDH_NWORDS_ORDER] = {0}; + digit_t tmp2[2 * SIDH_NWORDS_ORDER] = {0}; + digit_t one[SIDH_NWORDS_ORDER] = {0}; + digit_t order[SIDH_NWORDS_ORDER] = {0}; + digit_t mask = (digit_t)(-1) >> 12; + bool equal = true; + + order[SIDH_NWORDS_ORDER - 1] = (digit_t) 1 << (sizeof(digit_t) * 8 - 12); // Load most significant digit of Alice's order + one[0] = 1; + + for (i = 0; i < SIDH_NWORDS_ORDER; i++) { + if (a[i] != one[0]) + equal = false; + } + if (equal) { + oqs_sidh_cln16_copy_words(a, c, SIDH_NWORDS_ORDER); + } else { + oqs_sidh_cln16_mp_sub(a, one, am1, SIDH_NWORDS_ORDER); // am1 = a-1 + oqs_sidh_cln16_mp_sub(order, am1, c, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_add(c, one, c, SIDH_NWORDS_ORDER); // c = 2^m - a + 2 + + oqs_sidh_cln16_copy_words(am1, tmp1, SIDH_NWORDS_ORDER); + while ((tmp1[0] & (digit_t) 1) == 0) { + s += 1; + oqs_sidh_cln16_mp_shiftr1(tmp1, SIDH_NWORDS_ORDER); + } + + f = 372 / s; + for (i = 1; i < f; i <<= 1) { + oqs_sidh_cln16_multiply(am1, am1, tmp2, SIDH_NWORDS_ORDER); // tmp2 = am1^2 + oqs_sidh_cln16_copy_words(tmp2, am1, SIDH_NWORDS_ORDER); + am1[SIDH_NWORDS_ORDER - 1] &= mask; // am1 = tmp2 mod 2^e + oqs_sidh_cln16_mp_add(am1, one, tmp1, SIDH_NWORDS_ORDER); // tmp1 = am1 + 1 + tmp1[SIDH_NWORDS_ORDER - 1] &= mask; // mod 2^e + oqs_sidh_cln16_multiply(c, tmp1, tmp2, SIDH_NWORDS_ORDER); // c = c*tmp1 + oqs_sidh_cln16_copy_words(tmp2, c, SIDH_NWORDS_ORDER); + c[SIDH_NWORDS_ORDER - 1] &= mask; // mod 2^e + } + } +} diff --git a/crypt/liboqs/kex_sidh_cln16/generic/fp_generic.c b/crypt/liboqs/kex_sidh_cln16/generic/fp_generic.c new file mode 100644 index 0000000000000000000000000000000000000000..9a19019bd5e712af1e29d539fd4b9ff5f54839a9 --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/generic/fp_generic.c @@ -0,0 +1,234 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral +* Diffie-Hellman key exchange. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: portable modular arithmetic +* +*********************************************************************************************/ + +#include "../SIDH_internal.h" + +// Global constants +extern const uint64_t p751[NWORDS_FIELD]; +extern const uint64_t p751p1[NWORDS_FIELD]; +extern const uint64_t p751x2[NWORDS_FIELD]; + +__inline void oqs_sidh_cln16_fpadd751(const digit_t *a, const digit_t *b, digit_t *c) { // Modular addition, c = a+b mod p751. + // Inputs: a, b in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t *) p751x2)[i], carry, c[i]); + } + mask = 0 - (digit_t) carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t *) p751x2)[i] & mask, carry, c[i]); + } +} + +__inline void oqs_sidh_cln16_fpsub751(const digit_t *a, const digit_t *b, digit_t *c) { // Modular subtraction, c = a-b mod p751. + // Inputs: a, b in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t) borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t *) p751x2)[i] & mask, borrow, c[i]); + } +} + +__inline void oqs_sidh_cln16_fpneg751(digit_t *a) { // Modular negation, a = -a mod p751. + // Input/output: a in [0, 2*p751-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t *) p751x2)[i], a[i], borrow, a[i]); + } +} + +void oqs_sidh_cln16_fpdiv2_751(const digit_t *a, digit_t *c) { // Modular division by two, c = a/2 mod p751. + // Input : a in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p751 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t *) p751)[i] & mask, carry, c[i]); + } + + oqs_sidh_cln16_mp_shiftr1(c, NWORDS_FIELD); +} + +void oqs_sidh_cln16_fpcorrection751(digit_t *a) { // Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t *) p751)[i], borrow, a[i]); + } + mask = 0 - (digit_t) borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t *) p751)[i] & mask, borrow, a[i]); + } +} + +void oqs_sidh_cln16_digit_x_digit(const digit_t a, const digit_t b, digit_t *c) { // Digit multiplication, digit * digit -> 2-digit result + register digit_t al, ah, bl, bh, temp; + digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; + digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t) * 4), mask_high = (digit_t)(-1) << (sizeof(digit_t) * 4); + + al = a & mask_low; // Low part + ah = a >> (sizeof(digit_t) * 4); // High part + bl = b & mask_low; + bh = b >> (sizeof(digit_t) * 4); + + albl = al * bl; + albh = al * bh; + ahbl = ah * bl; + ahbh = ah * bh; + c[0] = albl & mask_low; // C00 + + res1 = albl >> (sizeof(digit_t) * 4); + res2 = ahbl & mask_low; + res3 = albh & mask_low; + temp = res1 + res2 + res3; + carry = temp >> (sizeof(digit_t) * 4); + c[0] ^= temp << (sizeof(digit_t) * 4); // C01 + + res1 = ahbl >> (sizeof(digit_t) * 4); + res2 = albh >> (sizeof(digit_t) * 4); + res3 = ahbh & mask_low; + temp = res1 + res2 + res3 + carry; + c[1] = temp & mask_low; // C10 + carry = temp & mask_high; + c[1] ^= (ahbh & mask_high) + carry; // C11 +} + +void oqs_sidh_cln16_mp_mul_schoolbook(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { // Multiprecision schoolbook multiply, c = a*b, where lng(a) = lng(b) = nwords. + unsigned int i, j; + digit_t u, v, UV[2]; + unsigned int carry = 0; + + for (i = 0; i < (2 * nwords); i++) + c[i] = 0; + + for (i = 0; i < nwords; i++) { + u = 0; + for (j = 0; j < nwords; j++) { + MUL(a[i], b[j], UV + 1, UV[0]); + ADDC(0, UV[0], u, carry, v); + u = UV[1] + carry; + ADDC(0, c[i + j], v, carry, v); + u = u + carry; + c[i + j] = v; + } + c[nwords + i] = u; + } +} + +void oqs_sidh_cln16_mp_mul_comba(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. + unsigned int i, j; + digit_t t = 0, u = 0, v = 0, UV[2]; + unsigned int carry = 0; + + for (i = 0; i < nwords; i++) { + for (j = 0; j <= i; j++) { + MUL(a[j], b[i - j], UV + 1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = nwords; i < 2 * nwords - 1; i++) { + for (j = i - nwords + 1; j < nwords; j++) { + MUL(a[j], b[i - j], UV + 1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + c[2 * nwords - 1] = v; +} + +void oqs_sidh_cln16_rdc_mont(const oqs_sidh_cln16_dfelm_t ma, oqs_sidh_cln16_felm_t mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p751. + // mc = ma*R^-1 mod p751x2, where R = 2^768. + // If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1]. + // ma is assumed to be in Montgomery representation. + unsigned int i, j, carry, count = p751_ZERO_WORDS; + digit_t UV[2], t = 0, u = 0, v = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + mc[i] = 0; + } + + for (i = 0; i < NWORDS_FIELD; i++) { + for (j = 0; j < i; j++) { + if (j < (i - p751_ZERO_WORDS + 1)) { + MUL(mc[j], ((digit_t *) p751p1)[i - j], UV + 1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = NWORDS_FIELD; i < 2 * NWORDS_FIELD - 1; i++) { + if (count > 0) { + count -= 1; + } + for (j = i - NWORDS_FIELD + 1; j < NWORDS_FIELD; j++) { + if (j < (NWORDS_FIELD - count)) { + MUL(mc[j], ((digit_t *) p751p1)[i - j], UV + 1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i - NWORDS_FIELD] = v; + v = u; + u = t; + t = 0; + } + ADDC(0, v, ma[2 * NWORDS_FIELD - 1], carry, v); + mc[NWORDS_FIELD - 1] = v; +} diff --git a/crypt/liboqs/kex_sidh_cln16/kex_sidh_cln16.c b/crypt/liboqs/kex_sidh_cln16/kex_sidh_cln16.c new file mode 100644 index 0000000000000000000000000000000000000000..c3d7420b4049f116923c3eb59e70a1eb43ce79e5 --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/kex_sidh_cln16.c @@ -0,0 +1,320 @@ +#if defined(WINDOWS) +#pragma warning(disable : 4047 4090) +#endif + +#if defined(WINDOWS) +#define UNUSED +#else +#define UNUSED __attribute__((unused)) +#endif + +#include <stdlib.h> +#include <string.h> +#if !defined(WINDOWS) +#include <strings.h> +#include <unistd.h> +#endif + +#include <oqs/kex.h> +#include <oqs/rand.h> + +#include "SIDH.h" +#include "kex_sidh_cln16.h" + +#if defined(WINDOWS) +#define strdup _strdup // for strdup deprecation warning +#endif + +static const char *P751 = "p751"; +static const char *CompressedP751 = "compressedp751"; + +static int isCompressed(const char *named_parameters) { + if (named_parameters != NULL && strcmp(named_parameters, CompressedP751) == 0) { + return 1; + } + + return 0; +} + +// Check if curve isogeny structure is NULL +extern bool oqs_sidh_cln16_is_CurveIsogenyStruct_null(PCurveIsogenyStruct pCurveIsogeny); + +OQS_KEX *OQS_KEX_sidh_cln16_new(OQS_RAND *rand, const char *named_parameters) { + int compressed = isCompressed(named_parameters); + OQS_KEX *k = malloc(sizeof(OQS_KEX)); + if (k == NULL) { + return NULL; + } + + // Curve isogeny system initialization + PCurveIsogenyStruct curveIsogeny = oqs_sidh_cln16_curve_allocate(&CurveIsogeny_SIDHp751); + + if (curveIsogeny == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(curveIsogeny)) { + free(k); + oqs_sidh_cln16_curve_free(curveIsogeny); + return NULL; + } + if (oqs_sidh_cln16_curve_initialize(curveIsogeny, &CurveIsogeny_SIDHp751) != SIDH_CRYPTO_SUCCESS) { + free(k); + oqs_sidh_cln16_curve_free(curveIsogeny); + return NULL; + } + k->ctx = curveIsogeny; + k->method_name = compressed ? strdup("SIDH CLN16 compressed") : strdup("SIDH CLN16"); + k->estimated_classical_security = 192; + k->estimated_quantum_security = 128; + k->seed = NULL; + k->seed_len = 0; + k->named_parameters = compressed ? CompressedP751 : P751; + k->rand = rand; + k->params = NULL; + k->alice_0 = &OQS_KEX_sidh_cln16_alice_0; + k->bob = &OQS_KEX_sidh_cln16_bob; + k->alice_1 = &OQS_KEX_sidh_cln16_alice_1; + k->alice_priv_free = &OQS_KEX_sidh_cln16_alice_priv_free; + k->free = &OQS_KEX_sidh_cln16_free; + + return k; +} + +int OQS_KEX_sidh_cln16_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) { + + int ret; + // non-compressed public key + uint8_t *alice_tmp_pub = NULL; + + if (!k || !alice_priv || !alice_msg || !alice_msg_len) { + return 0; + } + + int compressed = isCompressed(k->named_parameters); + *alice_priv = NULL; + /* alice_msg is alice's public key */ + *alice_msg = NULL; + if (compressed) { + alice_tmp_pub = malloc(SIDH_PUBKEY_LEN); + *alice_msg = malloc(SIDH_COMPRESSED_PUBKEY_LEN); + if (alice_tmp_pub == NULL || *alice_msg == NULL) { + goto err; + } + } else { + // non-compressed + *alice_msg = malloc(SIDH_PUBKEY_LEN); + if (*alice_msg == NULL) { + goto err; + } + alice_tmp_pub = *alice_msg; // point to the pub key + } + *alice_priv = malloc(SIDH_SECRETKEY_LEN); + if (*alice_priv == NULL) { + goto err; + } + + // generate Alice's key pair + if (oqs_sidh_cln16_EphemeralKeyGeneration_A((unsigned char *) *alice_priv, (unsigned char *) alice_tmp_pub, k->ctx, k->rand) != SIDH_CRYPTO_SUCCESS) { + goto err; + } + + if (compressed) { + // compress Alice's public key + oqs_sidh_cln16_PublicKeyCompression_A(alice_tmp_pub, (unsigned char *) *alice_msg, k->ctx); + *alice_msg_len = SIDH_COMPRESSED_PUBKEY_LEN; + } else { + *alice_msg_len = SIDH_PUBKEY_LEN; + alice_tmp_pub = NULL; // we don't want to double-free it + } + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*alice_msg); + *alice_msg = NULL; + free(*alice_priv); + *alice_priv = NULL; + +cleanup: + free(alice_tmp_pub); + + return ret; +} + +int OQS_KEX_sidh_cln16_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + uint8_t *bob_priv = NULL; + // non-compressed public key + uint8_t *bob_tmp_pub = NULL; + // decompression values + unsigned char *R = NULL, *A = NULL; + + if (!k || !alice_msg || !bob_msg || !bob_msg_len || !key || !key_len) { + return 0; + } + + *bob_msg = NULL; + *key = NULL; + + int compressed = isCompressed(k->named_parameters); + + if (compressed) { + if (alice_msg_len != SIDH_COMPRESSED_PUBKEY_LEN) { + goto err; + } + bob_tmp_pub = malloc(SIDH_PUBKEY_LEN); + *bob_msg = malloc(SIDH_COMPRESSED_PUBKEY_LEN); + if (bob_tmp_pub == NULL || *bob_msg == NULL) { + goto err; + } + A = malloc(SIDH_COMPRESSED_A_LEN); + if (A == NULL) { + goto err; + } + R = malloc(SIDH_COMPRESSED_R_LEN); + if (R == NULL) { + goto err; + } + } else { + if (alice_msg_len != SIDH_PUBKEY_LEN) { + goto err; + } + // non-compressed + *bob_msg = malloc(SIDH_PUBKEY_LEN); + if (*bob_msg == NULL) { + goto err; + } + bob_tmp_pub = *bob_msg; // point to the pub key + } + + bob_priv = malloc(SIDH_SECRETKEY_LEN); + if (bob_priv == NULL) { + goto err; + } + *key = malloc(SIDH_SHAREDKEY_LEN); + if (*key == NULL) { + goto err; + } + + // generate Bob's key pair + if (oqs_sidh_cln16_EphemeralKeyGeneration_B((unsigned char *) bob_priv, (unsigned char *) bob_tmp_pub, k->ctx, k->rand) != SIDH_CRYPTO_SUCCESS) { + goto err; + } + + if (compressed) { + // compress Bob's public key + oqs_sidh_cln16_PublicKeyCompression_B(bob_tmp_pub, (unsigned char *) *bob_msg, k->ctx); + *bob_msg_len = SIDH_COMPRESSED_PUBKEY_LEN; + // decompress Alice's public key + oqs_sidh_cln16_PublicKeyADecompression_B((unsigned char *) bob_priv, (unsigned char *) alice_msg, R, A, k->ctx); + // compute Bob's shared secret + if (oqs_sidh_cln16_EphemeralSecretAgreement_Compression_B((unsigned char *) bob_priv, R, A, (unsigned char *) *key, k->ctx) != SIDH_CRYPTO_SUCCESS) { + goto err; + } + } else { + *bob_msg_len = SIDH_PUBKEY_LEN; + bob_tmp_pub = NULL; // we don't want to double-free it + // compute Bob's shared secret + if (oqs_sidh_cln16_EphemeralSecretAgreement_B((unsigned char *) bob_priv, (unsigned char *) alice_msg, (unsigned char *) *key, k->ctx) != SIDH_CRYPTO_SUCCESS) { + goto err; + } + } + + *key_len = SIDH_SHAREDKEY_LEN; + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*bob_msg); + *bob_msg = NULL; + free(*key); + *key = NULL; + +cleanup: + free(bob_tmp_pub); + free(bob_priv); + free(A); + free(R); + + return ret; +} + +int OQS_KEX_sidh_cln16_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + // decompression values + unsigned char *R = NULL, *A = NULL; + + if (!k || !alice_priv || !bob_msg || !key || !key_len) { + return 0; + } + + *key = NULL; + + int compressed = isCompressed(k->named_parameters); + + *key = malloc(SIDH_SHAREDKEY_LEN); + if (*key == NULL) { + goto err; + } + *key_len = SIDH_SHAREDKEY_LEN; + + if (compressed) { + if (bob_msg_len != SIDH_COMPRESSED_PUBKEY_LEN) { + goto err; + } + A = malloc(SIDH_COMPRESSED_A_LEN); + if (A == NULL) { + goto err; + } + R = malloc(SIDH_COMPRESSED_R_LEN); + if (R == NULL) { + goto err; + } + // compute Alice's shared secret + oqs_sidh_cln16_PublicKeyBDecompression_A((unsigned char *) alice_priv, (unsigned char *) bob_msg, R, A, k->ctx); + if (oqs_sidh_cln16_EphemeralSecretAgreement_Compression_A((unsigned char *) alice_priv, R, A, (unsigned char *) *key, k->ctx) != SIDH_CRYPTO_SUCCESS) { + goto err; + } + } else { + if (bob_msg_len != SIDH_PUBKEY_LEN) { + goto err; + } + if (oqs_sidh_cln16_EphemeralSecretAgreement_A((unsigned char *) alice_priv, (unsigned char *) bob_msg, (unsigned char *) *key, k->ctx) != SIDH_CRYPTO_SUCCESS) { + goto err; + } + } + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*key); + *key = NULL; + +cleanup: + free(A); + free(R); + + return ret; +} + +void OQS_KEX_sidh_cln16_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) { + if (alice_priv) { + free(alice_priv); + } +} + +void OQS_KEX_sidh_cln16_free(OQS_KEX *k) { + if (!k) { + return; + } + oqs_sidh_cln16_curve_free((PCurveIsogenyStruct) k->ctx); + k->ctx = NULL; + free(k->method_name); + k->method_name = NULL; + free(k); +} diff --git a/crypt/liboqs/kex_sidh_cln16/kex_sidh_cln16.h b/crypt/liboqs/kex_sidh_cln16/kex_sidh_cln16.h new file mode 100644 index 0000000000000000000000000000000000000000..4409db7a6f9f0292ac6343066fc755ff75db21f1 --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/kex_sidh_cln16.h @@ -0,0 +1,24 @@ +/** + * \file kex_sidh_cln16.h + * \brief Header for SIDH key exchange protocol from the Microsoft SIDH library + */ + +#ifndef __OQS_KEX_SIDH_CLN16_H +#define __OQS_KEX_SIDH_CLN16_H + +#include <stddef.h> +#include <stdint.h> + +#include <oqs/kex.h> +#include <oqs/rand.h> + +OQS_KEX *OQS_KEX_sidh_cln16_new(OQS_RAND *rand, const char *named_parameters); + +int OQS_KEX_sidh_cln16_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len); +int OQS_KEX_sidh_cln16_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len); +int OQS_KEX_sidh_cln16_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len); + +void OQS_KEX_sidh_cln16_alice_priv_free(OQS_KEX *k, void *alice_priv); +void OQS_KEX_sidh_cln16_free(OQS_KEX *k); + +#endif diff --git a/crypt/liboqs/kex_sidh_cln16/sidh_kex.c b/crypt/liboqs/kex_sidh_cln16/sidh_kex.c new file mode 100644 index 0000000000000000000000000000000000000000..de1b3100a6ab1a35265b1dee33a9fbbb0c84d99b --- /dev/null +++ b/crypt/liboqs/kex_sidh_cln16/sidh_kex.c @@ -0,0 +1,737 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral +* Diffie-Hellman key exchange. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: ephemeral isogeny-based key exchange +* +*********************************************************************************************/ + +#include "SIDH_internal.h" + +extern const unsigned int splits_Alice[SIDH_MAX_Alice]; +extern const unsigned int splits_Bob[SIDH_MAX_Bob]; + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralKeyGeneration_A(unsigned char *PrivateKeyA, unsigned char *PublicKeyA, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand) { // Alice's ephemeral key-pair generation + // It produces a private key PrivateKeyA and computes the public key PublicKeyA. + // The private key is an even integer in the range [2, oA-2], where oA = 2^372. + // The public key consists of 3 elements in GF(p751^2). + // CurveIsogeny must be set up in advance using SIDH_curve_initialize(). + unsigned int owords = NBITS_TO_NWORDS(CurveIsogeny->owordbits), pwords = NBITS_TO_NWORDS(CurveIsogeny->pwordbits); + oqs_sidh_cln16_point_basefield_t P; + oqs_sidh_cln16_point_proj_t R, phiP = {0}, phiQ = {0}, phiD = {0}, pts[SIDH_MAX_INT_POINTS_ALICE]; + oqs_sidh_cln16_publickey_t *PublicKey = (oqs_sidh_cln16_publickey_t *) PublicKeyA; + unsigned int i, row, m, index = 0, pts_index[SIDH_MAX_INT_POINTS_ALICE], npts = 0; + oqs_sidh_cln16_f2elm_t coeff[5], A = {0}, C = {0}, Aout, Cout; + SIDH_CRYPTO_STATUS Status = SIDH_CRYPTO_SUCCESS; + + if (PrivateKeyA == NULL || PublicKey == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(CurveIsogeny)) { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + // Choose a random even number in the range [2, oA-2] as secret key for Alice + Status = oqs_sidh_cln16_random_mod_order((digit_t *) PrivateKeyA, SIDH_ALICE, CurveIsogeny, rand); + if (Status != SIDH_CRYPTO_SUCCESS) { + oqs_sidh_cln16_clear_words((void *) PrivateKeyA, owords); + return Status; + } + + oqs_sidh_cln16_to_mont((digit_t *) CurveIsogeny->PA, (digit_t *) P); // Conversion of Alice's generators to Montgomery representation + oqs_sidh_cln16_to_mont(((digit_t *) CurveIsogeny->PA) + NWORDS_FIELD, ((digit_t *) P) + NWORDS_FIELD); + + Status = oqs_sidh_cln16_secret_pt(P, (digit_t *) PrivateKeyA, SIDH_ALICE, R, CurveIsogeny); + if (Status != SIDH_CRYPTO_SUCCESS) { + oqs_sidh_cln16_clear_words((void *) PrivateKeyA, owords); + return Status; + } + + oqs_sidh_cln16_copy_words((digit_t *) CurveIsogeny->PB, (digit_t *) phiP, pwords); // Copy X-coordinates from Bob's public parameters, set Z <- 1 + oqs_sidh_cln16_fpcopy751((digit_t *) CurveIsogeny->Montgomery_one, (digit_t *) phiP->Z); + oqs_sidh_cln16_to_mont((digit_t *) phiP, (digit_t *) phiP); + oqs_sidh_cln16_copy_words((digit_t *) phiP, (digit_t *) phiQ, pwords); // QB = (-XPB:1) + oqs_sidh_cln16_fpneg751(phiQ->X[0]); + oqs_sidh_cln16_fpcopy751((digit_t *) CurveIsogeny->Montgomery_one, (digit_t *) phiQ->Z); + oqs_sidh_cln16_distort_and_diff(phiP->X[0], phiD, CurveIsogeny); // DB = (x(QB-PB),z(QB-PB)) + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->A, A[0]); // Extracting curve parameters A and C + oqs_sidh_cln16_fpcopy751(CurveIsogeny->C, C[0]); + oqs_sidh_cln16_to_mont(A[0], A[0]); + oqs_sidh_cln16_to_mont(C[0], C[0]); + + oqs_sidh_cln16_first_4_isog(phiP, A, Aout, Cout, CurveIsogeny); + oqs_sidh_cln16_first_4_isog(phiQ, A, Aout, Cout, CurveIsogeny); + oqs_sidh_cln16_first_4_isog(phiD, A, Aout, Cout, CurveIsogeny); + oqs_sidh_cln16_first_4_isog(R, A, A, C, CurveIsogeny); + + index = 0; + for (row = 1; row < SIDH_MAX_Alice; row++) { + while (index < SIDH_MAX_Alice - row) { + oqs_sidh_cln16_fp2copy751(R->X, pts[npts]->X); + oqs_sidh_cln16_fp2copy751(R->Z, pts[npts]->Z); + pts_index[npts] = index; + npts += 1; + m = splits_Alice[SIDH_MAX_Alice - index - row]; + oqs_sidh_cln16_xDBLe(R, R, A, C, (int) (2 * m)); + index += m; + } + oqs_sidh_cln16_get_4_isog(R, A, C, coeff); + + for (i = 0; i < npts; i++) { + oqs_sidh_cln16_eval_4_isog(pts[i], coeff); + } + oqs_sidh_cln16_eval_4_isog(phiP, coeff); + oqs_sidh_cln16_eval_4_isog(phiQ, coeff); + oqs_sidh_cln16_eval_4_isog(phiD, coeff); + + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->X, R->X); + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->Z, R->Z); + index = pts_index[npts - 1]; + npts -= 1; + } + + oqs_sidh_cln16_get_4_isog(R, A, C, coeff); + oqs_sidh_cln16_eval_4_isog(phiP, coeff); + oqs_sidh_cln16_eval_4_isog(phiQ, coeff); + oqs_sidh_cln16_eval_4_isog(phiD, coeff); + + oqs_sidh_cln16_inv_3_way(phiP->Z, phiQ->Z, phiD->Z); + oqs_sidh_cln16_fp2mul751_mont(phiP->X, phiP->Z, phiP->X); + oqs_sidh_cln16_fp2mul751_mont(phiQ->X, phiQ->Z, phiQ->X); + oqs_sidh_cln16_fp2mul751_mont(phiD->X, phiD->Z, phiD->X); + + oqs_sidh_cln16_from_fp2mont(phiP->X, ((oqs_sidh_cln16_f2elm_t *) PublicKey)[0]); // Converting back to standard representation + oqs_sidh_cln16_from_fp2mont(phiQ->X, ((oqs_sidh_cln16_f2elm_t *) PublicKey)[1]); + oqs_sidh_cln16_from_fp2mont(phiD->X, ((oqs_sidh_cln16_f2elm_t *) PublicKey)[2]); + + // Cleanup: + oqs_sidh_cln16_clear_words((void *) R, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) phiP, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) phiQ, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) phiD, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) pts, SIDH_MAX_INT_POINTS_ALICE * 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) A, 2 * pwords); + oqs_sidh_cln16_clear_words((void *) C, 2 * pwords); + oqs_sidh_cln16_clear_words((void *) coeff, 5 * 2 * pwords); + + return Status; +} + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralKeyGeneration_B(unsigned char *PrivateKeyB, unsigned char *PublicKeyB, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand) { // Bob's ephemeral key-pair generation + // It produces a private key PrivateKeyB and computes the public key PublicKeyB. + // The private key is an integer in the range [1, oB-1], where oA = 3^239. + // The public key consists of 3 elements in GF(p751^2). + // CurveIsogeny must be set up in advance using SIDH_curve_initialize(). + unsigned int owords = NBITS_TO_NWORDS(CurveIsogeny->owordbits), pwords = NBITS_TO_NWORDS(CurveIsogeny->pwordbits); + oqs_sidh_cln16_point_basefield_t P; + oqs_sidh_cln16_point_proj_t R, phiP = {0}, phiQ = {0}, phiD = {0}, pts[SIDH_MAX_INT_POINTS_BOB]; + oqs_sidh_cln16_publickey_t *PublicKey = (oqs_sidh_cln16_publickey_t *) PublicKeyB; + unsigned int i, row, m, index = 0, pts_index[SIDH_MAX_INT_POINTS_BOB], npts = 0; + oqs_sidh_cln16_f2elm_t A = {0}, C = {0}; + SIDH_CRYPTO_STATUS Status = SIDH_CRYPTO_SUCCESS; + + if (PrivateKeyB == NULL || PublicKey == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(CurveIsogeny)) { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + // Choose a random number equivalent to 0 (mod 3) in the range [3, oB-3] as secret key for Bob + Status = oqs_sidh_cln16_random_mod_order((digit_t *) PrivateKeyB, SIDH_BOB, CurveIsogeny, rand); + if (Status != SIDH_CRYPTO_SUCCESS) { + oqs_sidh_cln16_clear_words((void *) PrivateKeyB, owords); + return Status; + } + + oqs_sidh_cln16_to_mont((digit_t *) CurveIsogeny->PB, (digit_t *) P); // Conversion of Bob's generators to Montgomery representation + oqs_sidh_cln16_to_mont(((digit_t *) CurveIsogeny->PB) + NWORDS_FIELD, ((digit_t *) P) + NWORDS_FIELD); + + Status = oqs_sidh_cln16_secret_pt(P, (digit_t *) PrivateKeyB, SIDH_BOB, R, CurveIsogeny); + if (Status != SIDH_CRYPTO_SUCCESS) { + oqs_sidh_cln16_clear_words((void *) PrivateKeyB, owords); + return Status; + } + + oqs_sidh_cln16_copy_words((digit_t *) CurveIsogeny->PA, (digit_t *) phiP, pwords); // Copy X-coordinates from Alice's public parameters, set Z <- 1 + oqs_sidh_cln16_fpcopy751((digit_t *) CurveIsogeny->Montgomery_one, (digit_t *) phiP->Z); + oqs_sidh_cln16_to_mont((digit_t *) phiP, (digit_t *) phiP); // Conversion to Montgomery representation + oqs_sidh_cln16_copy_words((digit_t *) phiP, (digit_t *) phiQ, pwords); // QA = (-XPA:1) + oqs_sidh_cln16_fpneg751(phiQ->X[0]); + oqs_sidh_cln16_fpcopy751((digit_t *) CurveIsogeny->Montgomery_one, (digit_t *) phiQ->Z); + oqs_sidh_cln16_distort_and_diff(phiP->X[0], phiD, CurveIsogeny); // DA = (x(QA-PA),z(QA-PA)) + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->A, A[0]); // Extracting curve parameters A and C + oqs_sidh_cln16_fpcopy751(CurveIsogeny->C, C[0]); + oqs_sidh_cln16_to_mont(A[0], A[0]); + oqs_sidh_cln16_to_mont(C[0], C[0]); + + index = 0; + for (row = 1; row < SIDH_MAX_Bob; row++) { + while (index < SIDH_MAX_Bob - row) { + oqs_sidh_cln16_fp2copy751(R->X, pts[npts]->X); + oqs_sidh_cln16_fp2copy751(R->Z, pts[npts]->Z); + pts_index[npts] = index; + npts += 1; + m = splits_Bob[SIDH_MAX_Bob - index - row]; + oqs_sidh_cln16_xTPLe(R, R, A, C, (int) m); + index += m; + } + oqs_sidh_cln16_get_3_isog(R, A, C); + + for (i = 0; i < npts; i++) { + oqs_sidh_cln16_eval_3_isog(R, pts[i]); + } + oqs_sidh_cln16_eval_3_isog(R, phiP); + oqs_sidh_cln16_eval_3_isog(R, phiQ); + oqs_sidh_cln16_eval_3_isog(R, phiD); + + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->X, R->X); + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->Z, R->Z); + index = pts_index[npts - 1]; + npts -= 1; + } + + oqs_sidh_cln16_get_3_isog(R, A, C); + oqs_sidh_cln16_eval_3_isog(R, phiP); + oqs_sidh_cln16_eval_3_isog(R, phiQ); + oqs_sidh_cln16_eval_3_isog(R, phiD); + + oqs_sidh_cln16_inv_3_way(phiP->Z, phiQ->Z, phiD->Z); + oqs_sidh_cln16_fp2mul751_mont(phiP->X, phiP->Z, phiP->X); + oqs_sidh_cln16_fp2mul751_mont(phiQ->X, phiQ->Z, phiQ->X); + oqs_sidh_cln16_fp2mul751_mont(phiD->X, phiD->Z, phiD->X); + + oqs_sidh_cln16_from_fp2mont(phiP->X, ((oqs_sidh_cln16_f2elm_t *) PublicKey)[0]); // Converting back to standard representation + oqs_sidh_cln16_from_fp2mont(phiQ->X, ((oqs_sidh_cln16_f2elm_t *) PublicKey)[1]); + oqs_sidh_cln16_from_fp2mont(phiD->X, ((oqs_sidh_cln16_f2elm_t *) PublicKey)[2]); + + // Cleanup: + oqs_sidh_cln16_clear_words((void *) R, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) phiP, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) phiQ, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) phiD, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) pts, SIDH_MAX_INT_POINTS_BOB * 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) A, 2 * pwords); + oqs_sidh_cln16_clear_words((void *) C, 2 * pwords); + + return Status; +} + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralSecretAgreement_A(const unsigned char *PrivateKeyA, const unsigned char *PublicKeyB, unsigned char *SharedSecretA, PCurveIsogenyStruct CurveIsogeny) { // Alice's ephemeral shared secret computation + // It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB + // Inputs: Alice's PrivateKeyA is an even integer in the range [2, oA-2], where oA = 2^372. + // Bob's PublicKeyB consists of 3 elements in GF(p751^2). + // Output: a shared secret SharedSecretA that consists of one element in GF(p751^2). + // CurveIsogeny must be set up in advance using SIDH_curve_initialize(). + unsigned int pwords = NBITS_TO_NWORDS(CurveIsogeny->pwordbits); + unsigned int i, row, m, index = 0, pts_index[SIDH_MAX_INT_POINTS_ALICE], npts = 0; + oqs_sidh_cln16_point_proj_t R, pts[SIDH_MAX_INT_POINTS_ALICE]; + oqs_sidh_cln16_publickey_t *PublicKey = (oqs_sidh_cln16_publickey_t *) PublicKeyB; + oqs_sidh_cln16_f2elm_t jinv, coeff[5], PKB[3], A, C = {0}; + SIDH_CRYPTO_STATUS Status = SIDH_CRYPTO_SUCCESS; + + if (PrivateKeyA == NULL || PublicKey == NULL || SharedSecretA == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(CurveIsogeny)) { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKey)[0], PKB[0]); // Extracting and converting Bob's public curve parameters to Montgomery representation + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKey)[1], PKB[1]); + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKey)[2], PKB[2]); + + oqs_sidh_cln16_get_A(PKB[0], PKB[1], PKB[2], A, CurveIsogeny); + oqs_sidh_cln16_fpcopy751(CurveIsogeny->C, C[0]); + oqs_sidh_cln16_to_mont(C[0], C[0]); + + Status = oqs_sidh_cln16_ladder_3_pt(PKB[0], PKB[1], PKB[2], (digit_t *) PrivateKeyA, SIDH_ALICE, R, A, CurveIsogeny); + if (Status != SIDH_CRYPTO_SUCCESS) { + return Status; + } + oqs_sidh_cln16_first_4_isog(R, A, A, C, CurveIsogeny); + + index = 0; + for (row = 1; row < SIDH_MAX_Alice; row++) { + while (index < SIDH_MAX_Alice - row) { + oqs_sidh_cln16_fp2copy751(R->X, pts[npts]->X); + oqs_sidh_cln16_fp2copy751(R->Z, pts[npts]->Z); + pts_index[npts] = index; + npts += 1; + m = splits_Alice[SIDH_MAX_Alice - index - row]; + oqs_sidh_cln16_xDBLe(R, R, A, C, (int) (2 * m)); + index += m; + } + oqs_sidh_cln16_get_4_isog(R, A, C, coeff); + + for (i = 0; i < npts; i++) { + oqs_sidh_cln16_eval_4_isog(pts[i], coeff); + } + + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->X, R->X); + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->Z, R->Z); + index = pts_index[npts - 1]; + npts -= 1; + } + + oqs_sidh_cln16_get_4_isog(R, A, C, coeff); + oqs_sidh_cln16_j_inv(A, C, jinv); + oqs_sidh_cln16_from_fp2mont(jinv, (oqs_sidh_cln16_felm_t *) SharedSecretA); // Converting back to standard representation + + // Cleanup: + oqs_sidh_cln16_clear_words((void *) R, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) pts, SIDH_MAX_INT_POINTS_ALICE * 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) A, 2 * pwords); + oqs_sidh_cln16_clear_words((void *) C, 2 * pwords); + oqs_sidh_cln16_clear_words((void *) jinv, 2 * pwords); + oqs_sidh_cln16_clear_words((void *) coeff, 5 * 2 * pwords); + + return Status; +} + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralSecretAgreement_B(const unsigned char *PrivateKeyB, const unsigned char *PublicKeyA, unsigned char *SharedSecretB, PCurveIsogenyStruct CurveIsogeny) { // Bob's ephemeral shared secret computation + // It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA + // Inputs: Bob's PrivateKeyB is an integer in the range [1, oB-1], where oB = 3^239. + // Alice's PublicKeyA consists of 3 elements in GF(p751^2). + // Output: a shared secret SharedSecretB that consists of one element in GF(p751^2). + // CurveIsogeny must be set up in advance using SIDH_curve_initialize(). + unsigned int pwords = NBITS_TO_NWORDS(CurveIsogeny->pwordbits); + unsigned int i, row, m, index = 0, pts_index[SIDH_MAX_INT_POINTS_BOB], npts = 0; + oqs_sidh_cln16_point_proj_t R, pts[SIDH_MAX_INT_POINTS_BOB]; + oqs_sidh_cln16_publickey_t *PublicKey = (oqs_sidh_cln16_publickey_t *) PublicKeyA; + oqs_sidh_cln16_f2elm_t jinv, A, PKA[3], C = {0}; + SIDH_CRYPTO_STATUS Status = SIDH_CRYPTO_SUCCESS; + + if (PrivateKeyB == NULL || PublicKey == NULL || SharedSecretB == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(CurveIsogeny)) { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKey)[0], PKA[0]); // Extracting and converting Alice's public curve parameters to Montgomery representation + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKey)[1], PKA[1]); + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKey)[2], PKA[2]); + + oqs_sidh_cln16_get_A(PKA[0], PKA[1], PKA[2], A, CurveIsogeny); + oqs_sidh_cln16_fpcopy751(CurveIsogeny->C, C[0]); + oqs_sidh_cln16_to_mont(C[0], C[0]); + + Status = oqs_sidh_cln16_ladder_3_pt(PKA[0], PKA[1], PKA[2], (digit_t *) PrivateKeyB, SIDH_BOB, R, A, CurveIsogeny); + if (Status != SIDH_CRYPTO_SUCCESS) { + return Status; + } + + index = 0; + for (row = 1; row < SIDH_MAX_Bob; row++) { + while (index < SIDH_MAX_Bob - row) { + oqs_sidh_cln16_fp2copy751(R->X, pts[npts]->X); + oqs_sidh_cln16_fp2copy751(R->Z, pts[npts]->Z); + pts_index[npts] = index; + npts += 1; + m = splits_Bob[SIDH_MAX_Bob - index - row]; + oqs_sidh_cln16_xTPLe(R, R, A, C, (int) m); + index += m; + } + oqs_sidh_cln16_get_3_isog(R, A, C); + + for (i = 0; i < npts; i++) { + oqs_sidh_cln16_eval_3_isog(R, pts[i]); + } + + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->X, R->X); + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->Z, R->Z); + index = pts_index[npts - 1]; + npts -= 1; + } + + oqs_sidh_cln16_get_3_isog(R, A, C); + oqs_sidh_cln16_j_inv(A, C, jinv); + oqs_sidh_cln16_from_fp2mont(jinv, (oqs_sidh_cln16_felm_t *) SharedSecretB); // Converting back to standard representation + + // Cleanup: + oqs_sidh_cln16_clear_words((void *) R, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) pts, SIDH_MAX_INT_POINTS_BOB * 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) A, 2 * pwords); + oqs_sidh_cln16_clear_words((void *) C, 2 * pwords); + oqs_sidh_cln16_clear_words((void *) jinv, 2 * pwords); + + return Status; +} + +/////////////////////////////////////////////////////////////////////////////////// +/////////////// KEY EXCHANGE USING DECOMPRESSION /////////////// + +void oqs_sidh_cln16_PublicKeyCompression_A(const unsigned char *PublicKeyA, unsigned char *CompressedPKA, PCurveIsogenyStruct CurveIsogeny) { // Alice's public key compression + // It produces a compressed output that consists of three elements in Z_orderB and one field element + // Input : Alice's public key PublicKeyA, which consists of 3 elements in GF(p751^2). + // Output: a compressed value CompressedPKA that consists of three elements in Z_orderB and one element in GF(p751^2). + // CurveIsogeny must be set up in advance using SIDH_curve_initialize(). + oqs_sidh_cln16_point_full_proj_t P, Q, phP, phQ, phX; + oqs_sidh_cln16_point_t R1, R2, phiP, phiQ; + oqs_sidh_cln16_publickey_t PK; + digit_t *comp = (digit_t *) CompressedPKA; + digit_t inv[SIDH_NWORDS_ORDER]; + oqs_sidh_cln16_f2elm_t A, vec[4], Zinv[4]; + digit_t a0[SIDH_NWORDS_ORDER], b0[SIDH_NWORDS_ORDER], a1[SIDH_NWORDS_ORDER], b1[SIDH_NWORDS_ORDER]; + uint64_t Montgomery_Rprime[SIDH_NWORDS64_ORDER] = {0x1A55482318541298, 0x070A6370DFA12A03, 0xCB1658E0E3823A40, 0xB3B7384EB5DEF3F9, 0xCBCA952F7006EA33, 0x00569EF8EC94864C}; // Value (2^384)^2 mod 3^239 + uint64_t Montgomery_rprime[SIDH_NWORDS64_ORDER] = {0x48062A91D3AB563D, 0x6CE572751303C2F5, 0x5D1319F3F160EC9D, 0xE35554E8C2D5623A, 0xCA29300232BC79A5, 0x8AAD843D646D78C5}; // Value -(3^239)^-1 mod 2^384 + unsigned int bit; + + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyA)[0], ((oqs_sidh_cln16_f2elm_t *) &PK)[0]); // Converting to Montgomery representation + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyA)[1], ((oqs_sidh_cln16_f2elm_t *) &PK)[1]); + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyA)[2], ((oqs_sidh_cln16_f2elm_t *) &PK)[2]); + + oqs_sidh_cln16_recover_y(PK, phP, phQ, phX, A, CurveIsogeny); + oqs_sidh_cln16_generate_3_torsion_basis(A, P, Q, CurveIsogeny); + oqs_sidh_cln16_fp2copy751(P->Z, vec[0]); + oqs_sidh_cln16_fp2copy751(Q->Z, vec[1]); + oqs_sidh_cln16_fp2copy751(phP->Z, vec[2]); + oqs_sidh_cln16_fp2copy751(phQ->Z, vec[3]); + oqs_sidh_cln16_mont_n_way_inv(vec, 4, Zinv); + + oqs_sidh_cln16_fp2mul751_mont(P->X, Zinv[0], R1->x); + oqs_sidh_cln16_fp2mul751_mont(P->Y, Zinv[0], R1->y); + oqs_sidh_cln16_fp2mul751_mont(Q->X, Zinv[1], R2->x); + oqs_sidh_cln16_fp2mul751_mont(Q->Y, Zinv[1], R2->y); + oqs_sidh_cln16_fp2mul751_mont(phP->X, Zinv[2], phiP->x); + oqs_sidh_cln16_fp2mul751_mont(phP->Y, Zinv[2], phiP->y); + oqs_sidh_cln16_fp2mul751_mont(phQ->X, Zinv[3], phiQ->x); + oqs_sidh_cln16_fp2mul751_mont(phQ->Y, Zinv[3], phiQ->y); + + oqs_sidh_cln16_ph3(phiP, phiQ, R1, R2, A, (uint64_t *) a0, (uint64_t *) b0, (uint64_t *) a1, (uint64_t *) b1, CurveIsogeny); + + bit = oqs_sidh_cln16_mod3(a0); + oqs_sidh_cln16_to_Montgomery_mod_order(a0, a0, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); // Converting to Montgomery representation + oqs_sidh_cln16_to_Montgomery_mod_order(a1, a1, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + oqs_sidh_cln16_to_Montgomery_mod_order(b0, b0, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + oqs_sidh_cln16_to_Montgomery_mod_order(b1, b1, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + + if (bit != 0) { // Storing [b1*a0inv, a1*a0inv, b0*a0inv] and setting bit384 to 0 + oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd(a0, inv, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + oqs_sidh_cln16_Montgomery_multiply_mod_order(b0, inv, &comp[0], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_Montgomery_multiply_mod_order(a1, inv, &comp[SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_Montgomery_multiply_mod_order(b1, inv, &comp[2 * SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_from_Montgomery_mod_order(&comp[0], &comp[0], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); // Converting back from Montgomery representation + oqs_sidh_cln16_from_Montgomery_mod_order(&comp[SIDH_NWORDS_ORDER], &comp[SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_from_Montgomery_mod_order(&comp[2 * SIDH_NWORDS_ORDER], &comp[2 * SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + comp[3 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 1; + } else { // Storing [b1*b0inv, a1*b0inv, a0*b0inv] and setting bit384 to 1 + oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd(b0, inv, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + oqs_sidh_cln16_Montgomery_multiply_mod_order(a0, inv, &comp[0], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_Montgomery_multiply_mod_order(a1, inv, &comp[SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_Montgomery_multiply_mod_order(b1, inv, &comp[2 * SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_from_Montgomery_mod_order(&comp[0], &comp[0], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); // Converting back from Montgomery representation + oqs_sidh_cln16_from_Montgomery_mod_order(&comp[SIDH_NWORDS_ORDER], &comp[SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_from_Montgomery_mod_order(&comp[2 * SIDH_NWORDS_ORDER], &comp[2 * SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + comp[3 * SIDH_NWORDS_ORDER - 1] |= (digit_t) 1 << (sizeof(digit_t) * 8 - 1); + } + + oqs_sidh_cln16_from_fp2mont(A, (oqs_sidh_cln16_felm_t *) &comp[3 * SIDH_NWORDS_ORDER]); +} + +void oqs_sidh_cln16_PublicKeyADecompression_B(const unsigned char *SecretKeyB, const unsigned char *CompressedPKA, unsigned char *point_R, unsigned char *param_A, PCurveIsogenyStruct CurveIsogeny) { // Alice's public key value decompression computed by Bob + // Inputs: Bob's private key SecretKeyB, and + // Alice's compressed public key data CompressedPKA, which consists of three elements in Z_orderB and one element in GF(p751^2), + // Output: a point point_R in coordinates (X:Z) and the curve parameter param_A in GF(p751^2). Outputs are stored in Montgomery representation. + // CurveIsogeny must be set up in advance using SIDH_curve_initialize(). + oqs_sidh_cln16_point_t R1, R2; + oqs_sidh_cln16_point_proj_t *R = (oqs_sidh_cln16_point_proj_t *) point_R; + oqs_sidh_cln16_point_full_proj_t P, Q; + digit_t *comp = (digit_t *) CompressedPKA; + digit_t *SKin = (digit_t *) SecretKeyB; + oqs_sidh_cln16_f2elm_t A24, vec[2], invs[2], one = {0}; + oqs_sidh_cln16_felm_t *A = (oqs_sidh_cln16_felm_t *) param_A; + digit_t t1[SIDH_NWORDS_ORDER], t2[SIDH_NWORDS_ORDER], t3[SIDH_NWORDS_ORDER], t4[SIDH_NWORDS_ORDER], vone[SIDH_NWORDS_ORDER] = {0}; + uint64_t Montgomery_Rprime[SIDH_NWORDS64_ORDER] = {0x1A55482318541298, 0x070A6370DFA12A03, 0xCB1658E0E3823A40, 0xB3B7384EB5DEF3F9, 0xCBCA952F7006EA33, 0x00569EF8EC94864C}; // Value (2^384)^2 mod 3^239 + uint64_t Montgomery_rprime[SIDH_NWORDS64_ORDER] = {0x48062A91D3AB563D, 0x6CE572751303C2F5, 0x5D1319F3F160EC9D, 0xE35554E8C2D5623A, 0xCA29300232BC79A5, 0x8AAD843D646D78C5}; // Value -(3^239)^-1 mod 2^384 + unsigned int bit; + + vone[0] = 1; + oqs_sidh_cln16_to_Montgomery_mod_order(vone, vone, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); // Converting to Montgomery representation + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]); + oqs_sidh_cln16_to_fp2mont((oqs_sidh_cln16_felm_t *) &comp[3 * SIDH_NWORDS_ORDER], A); // Converting to Montgomery representation + oqs_sidh_cln16_generate_3_torsion_basis(A, P, Q, CurveIsogeny); + + // Normalize basis points + oqs_sidh_cln16_fp2copy751(P->Z, vec[0]); + oqs_sidh_cln16_fp2copy751(Q->Z, vec[1]); + oqs_sidh_cln16_mont_n_way_inv(vec, 2, invs); + oqs_sidh_cln16_fp2mul751_mont(P->X, invs[0], R1->x); + oqs_sidh_cln16_fp2mul751_mont(P->Y, invs[0], R1->y); + oqs_sidh_cln16_fp2mul751_mont(Q->X, invs[1], R2->x); + oqs_sidh_cln16_fp2mul751_mont(Q->Y, invs[1], R2->y); + + oqs_sidh_cln16_fp2add751(A, one, A24); + oqs_sidh_cln16_fp2add751(A24, one, A24); + oqs_sidh_cln16_fp2div2_751(A24, A24); + oqs_sidh_cln16_fp2div2_751(A24, A24); + + bit = comp[3 * SIDH_NWORDS_ORDER - 1] >> (sizeof(digit_t) * 8 - 1); + comp[3 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 1; + oqs_sidh_cln16_to_Montgomery_mod_order(SKin, t1, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); // Converting to Montgomery representation + oqs_sidh_cln16_to_Montgomery_mod_order(&comp[0], t2, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + oqs_sidh_cln16_to_Montgomery_mod_order(&comp[SIDH_NWORDS_ORDER], t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + oqs_sidh_cln16_to_Montgomery_mod_order(&comp[2 * SIDH_NWORDS_ORDER], t4, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + + if (bit == 0) { + oqs_sidh_cln16_Montgomery_multiply_mod_order(t1, t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_mp_add(t3, vone, t3, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd(t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + oqs_sidh_cln16_Montgomery_multiply_mod_order(t1, t4, t4, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_mp_add(t2, t4, t4, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_Montgomery_multiply_mod_order(t3, t4, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_from_Montgomery_mod_order(t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); // Converting back from Montgomery representation + oqs_sidh_cln16_mont_twodim_scalarmult(t3, R1, R2, A, A24, P, CurveIsogeny); + } else { + oqs_sidh_cln16_Montgomery_multiply_mod_order(t1, t4, t4, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_mp_add(t4, vone, t4, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd(t4, t4, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); + oqs_sidh_cln16_Montgomery_multiply_mod_order(t1, t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_mp_add(t2, t3, t3, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_Montgomery_multiply_mod_order(t3, t4, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); + oqs_sidh_cln16_from_Montgomery_mod_order(t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); // Converting back from Montgomery representation + oqs_sidh_cln16_mont_twodim_scalarmult(t3, R2, R1, A, A24, P, CurveIsogeny); + } + + oqs_sidh_cln16_fp2copy751(P->X, R[0]->X); + oqs_sidh_cln16_fp2copy751(P->Z, R[0]->Z); +} + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralSecretAgreement_Compression_A(const unsigned char *PrivateKeyA, const unsigned char *point_R, const unsigned char *param_A, unsigned char *SharedSecretA, PCurveIsogenyStruct CurveIsogeny) { // Alice's ephemeral shared secret computation + // It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's decompressed data point_R and param_A + // Inputs: Alice's PrivateKeyA is an even integer in the range [2, oA-2], where oA = 2^372. + // Bob's decompressed data consists of point_R in (X:Z) coordinates and the curve paramater param_A in GF(p751^2). + // Output: a shared secret SharedSecretA that consists of one element in GF(p751^2). + // CurveIsogeny must be set up in advance using SIDH_curve_initialize(). + unsigned int pwords = NBITS_TO_NWORDS(CurveIsogeny->pwordbits); + unsigned int i, row, m, index = 0, pts_index[SIDH_MAX_INT_POINTS_ALICE], npts = 0; + oqs_sidh_cln16_point_proj_t R, pts[SIDH_MAX_INT_POINTS_ALICE]; + oqs_sidh_cln16_f2elm_t jinv, coeff[5], A, C = {0}; + + if (PrivateKeyA == NULL || SharedSecretA == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(CurveIsogeny)) { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + oqs_sidh_cln16_fp2copy751((((oqs_sidh_cln16_point_proj_t *) point_R)[0])->X, R->X); + oqs_sidh_cln16_fp2copy751((((oqs_sidh_cln16_point_proj_t *) point_R)[0])->Z, R->Z); + oqs_sidh_cln16_fpcopy751(CurveIsogeny->C, C[0]); + oqs_sidh_cln16_to_mont(C[0], C[0]); + oqs_sidh_cln16_first_4_isog(R, (oqs_sidh_cln16_felm_t *) param_A, A, C, CurveIsogeny); + + index = 0; + for (row = 1; row < SIDH_MAX_Alice; row++) { + while (index < SIDH_MAX_Alice - row) { + oqs_sidh_cln16_fp2copy751(R->X, pts[npts]->X); + oqs_sidh_cln16_fp2copy751(R->Z, pts[npts]->Z); + pts_index[npts] = index; + npts += 1; + m = splits_Alice[SIDH_MAX_Alice - index - row]; + oqs_sidh_cln16_xDBLe(R, R, A, C, (int) (2 * m)); + index += m; + } + oqs_sidh_cln16_get_4_isog(R, A, C, coeff); + + for (i = 0; i < npts; i++) { + oqs_sidh_cln16_eval_4_isog(pts[i], coeff); + } + + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->X, R->X); + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->Z, R->Z); + index = pts_index[npts - 1]; + npts -= 1; + } + + oqs_sidh_cln16_get_4_isog(R, A, C, coeff); + oqs_sidh_cln16_j_inv(A, C, jinv); + oqs_sidh_cln16_from_fp2mont(jinv, (oqs_sidh_cln16_felm_t *) SharedSecretA); // Converting back to standard representation + + // Cleanup: + oqs_sidh_cln16_clear_words((void *) R, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) pts, SIDH_MAX_INT_POINTS_ALICE * 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) A, 2 * pwords); + oqs_sidh_cln16_clear_words((void *) C, 2 * pwords); + oqs_sidh_cln16_clear_words((void *) jinv, 2 * pwords); + oqs_sidh_cln16_clear_words((void *) coeff, 5 * 2 * pwords); + + return SIDH_CRYPTO_SUCCESS; +} + +void oqs_sidh_cln16_PublicKeyCompression_B(const unsigned char *PublicKeyB, unsigned char *CompressedPKB, PCurveIsogenyStruct CurveIsogeny) { // Bob's public key compression + // It produces a compressed output that consists of three elements in Z_orderA and one field element + // Input : Bob's public key PublicKeyB, which consists of 3 elements in GF(p751^2). + // Output: a compressed value CompressedPKB that consists of three elements in Z_orderA and one element in GF(p751^2). + // CurveIsogeny must be set up in advance using SIDH_curve_initialize(). + oqs_sidh_cln16_point_full_proj_t P, Q, phP, phQ, phX; + oqs_sidh_cln16_point_t R1, R2, phiP, phiQ; + oqs_sidh_cln16_publickey_t PK; + digit_t *comp = (digit_t *) CompressedPKB; + digit_t inv[SIDH_NWORDS_ORDER]; + oqs_sidh_cln16_f2elm_t A, vec[4], Zinv[4]; + digit_t a0[SIDH_NWORDS_ORDER], b0[SIDH_NWORDS_ORDER], a1[SIDH_NWORDS_ORDER], b1[SIDH_NWORDS_ORDER], tmp[2 * SIDH_NWORDS_ORDER], mask = (digit_t)(-1); + + mask >>= (CurveIsogeny->owordbits - CurveIsogeny->oAbits); + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyB)[0], ((oqs_sidh_cln16_f2elm_t *) &PK)[0]); // Converting to Montgomery representation + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyB)[1], ((oqs_sidh_cln16_f2elm_t *) &PK)[1]); + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyB)[2], ((oqs_sidh_cln16_f2elm_t *) &PK)[2]); + + oqs_sidh_cln16_recover_y(PK, phP, phQ, phX, A, CurveIsogeny); + oqs_sidh_cln16_generate_2_torsion_basis(A, P, Q, CurveIsogeny); + oqs_sidh_cln16_fp2copy751(P->Z, vec[0]); + oqs_sidh_cln16_fp2copy751(Q->Z, vec[1]); + oqs_sidh_cln16_fp2copy751(phP->Z, vec[2]); + oqs_sidh_cln16_fp2copy751(phQ->Z, vec[3]); + oqs_sidh_cln16_mont_n_way_inv(vec, 4, Zinv); + + oqs_sidh_cln16_fp2mul751_mont(P->X, Zinv[0], R1->x); + oqs_sidh_cln16_fp2mul751_mont(P->Y, Zinv[0], R1->y); + oqs_sidh_cln16_fp2mul751_mont(Q->X, Zinv[1], R2->x); + oqs_sidh_cln16_fp2mul751_mont(Q->Y, Zinv[1], R2->y); + oqs_sidh_cln16_fp2mul751_mont(phP->X, Zinv[2], phiP->x); + oqs_sidh_cln16_fp2mul751_mont(phP->Y, Zinv[2], phiP->y); + oqs_sidh_cln16_fp2mul751_mont(phQ->X, Zinv[3], phiQ->x); + oqs_sidh_cln16_fp2mul751_mont(phQ->Y, Zinv[3], phiQ->y); + + oqs_sidh_cln16_ph2(phiP, phiQ, R1, R2, A, (uint64_t *) a0, (uint64_t *) b0, (uint64_t *) a1, (uint64_t *) b1, CurveIsogeny); + + if ((a0[0] & 1) == 1) { // Storing [b1*a0inv, a1*a0inv, b0*a0inv] and setting bit384 to 0 + oqs_sidh_cln16_inv_mod_orderA(a0, inv); + oqs_sidh_cln16_multiply(b0, inv, tmp, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_copy_words(tmp, &comp[0], SIDH_NWORDS_ORDER); + comp[SIDH_NWORDS_ORDER - 1] &= mask; + oqs_sidh_cln16_multiply(a1, inv, tmp, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_copy_words(tmp, &comp[SIDH_NWORDS_ORDER], SIDH_NWORDS_ORDER); + comp[2 * SIDH_NWORDS_ORDER - 1] &= mask; + oqs_sidh_cln16_multiply(b1, inv, tmp, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_copy_words(tmp, &comp[2 * SIDH_NWORDS_ORDER], SIDH_NWORDS_ORDER); + comp[3 * SIDH_NWORDS_ORDER - 1] &= mask; + } else { // Storing [b1*b0inv, a1*b0inv, a0*b0inv] and setting bit384 to 1 + oqs_sidh_cln16_inv_mod_orderA(b0, inv); + oqs_sidh_cln16_multiply(a0, inv, tmp, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_copy_words(tmp, &comp[0], SIDH_NWORDS_ORDER); + comp[SIDH_NWORDS_ORDER - 1] &= mask; + oqs_sidh_cln16_multiply(a1, inv, tmp, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_copy_words(tmp, &comp[SIDH_NWORDS_ORDER], SIDH_NWORDS_ORDER); + comp[2 * SIDH_NWORDS_ORDER - 1] &= mask; + oqs_sidh_cln16_multiply(b1, inv, tmp, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_copy_words(tmp, &comp[2 * SIDH_NWORDS_ORDER], SIDH_NWORDS_ORDER); + comp[3 * SIDH_NWORDS_ORDER - 1] &= mask; + comp[3 * SIDH_NWORDS_ORDER - 1] |= (digit_t) 1 << (sizeof(digit_t) * 8 - 1); + } + + oqs_sidh_cln16_from_fp2mont(A, (oqs_sidh_cln16_felm_t *) &comp[3 * SIDH_NWORDS_ORDER]); // Converting back from Montgomery representation +} + +void oqs_sidh_cln16_PublicKeyBDecompression_A(const unsigned char *SecretKeyA, const unsigned char *CompressedPKB, unsigned char *point_R, unsigned char *param_A, PCurveIsogenyStruct CurveIsogeny) { // Bob's public key value decompression computed by Alice + // Inputs: Alice's private key SecretKeyA, and + // Bob's compressed public key data CompressedPKB, which consists of three elements in Z_orderA and one element in GF(p751^2). + // Output: a point point_R in coordinates (X:Z) and the curve parameter param_A in GF(p751^2). Outputs are stored in Montgomery representation. + // CurveIsogeny must be set up in advance using SIDH_curve_initialize(). + oqs_sidh_cln16_point_t R1, R2; + oqs_sidh_cln16_point_proj_t *R = (oqs_sidh_cln16_point_proj_t *) point_R; + oqs_sidh_cln16_point_full_proj_t P, Q; + digit_t *comp = (digit_t *) CompressedPKB; + oqs_sidh_cln16_f2elm_t A24, vec[2], invs[2], one = {0}; + oqs_sidh_cln16_felm_t *A = (oqs_sidh_cln16_felm_t *) param_A; + digit_t tmp1[2 * SIDH_NWORDS_ORDER], tmp2[2 * SIDH_NWORDS_ORDER], vone[2 * SIDH_NWORDS_ORDER] = {0}, mask = (digit_t)(-1); + unsigned int bit; + + mask >>= (CurveIsogeny->owordbits - CurveIsogeny->oAbits); + vone[0] = 1; + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]); + oqs_sidh_cln16_to_fp2mont((oqs_sidh_cln16_felm_t *) &comp[3 * SIDH_NWORDS_ORDER], A); // Converting to Montgomery representation + oqs_sidh_cln16_generate_2_torsion_basis(A, P, Q, CurveIsogeny); + + // normalize basis points + oqs_sidh_cln16_fp2copy751(P->Z, vec[0]); + oqs_sidh_cln16_fp2copy751(Q->Z, vec[1]); + oqs_sidh_cln16_mont_n_way_inv(vec, 2, invs); + oqs_sidh_cln16_fp2mul751_mont(P->X, invs[0], R1->x); + oqs_sidh_cln16_fp2mul751_mont(P->Y, invs[0], R1->y); + oqs_sidh_cln16_fp2mul751_mont(Q->X, invs[1], R2->x); + oqs_sidh_cln16_fp2mul751_mont(Q->Y, invs[1], R2->y); + + oqs_sidh_cln16_fp2add751(A, one, A24); + oqs_sidh_cln16_fp2add751(A24, one, A24); + oqs_sidh_cln16_fp2div2_751(A24, A24); + oqs_sidh_cln16_fp2div2_751(A24, A24); + + bit = comp[3 * SIDH_NWORDS_ORDER - 1] >> (sizeof(digit_t) * 8 - 1); + comp[3 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 1; + + if (bit == 0) { + oqs_sidh_cln16_multiply((digit_t *) SecretKeyA, &comp[SIDH_NWORDS_ORDER], tmp1, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_add(tmp1, vone, tmp1, SIDH_NWORDS_ORDER); + tmp1[SIDH_NWORDS_ORDER - 1] &= mask; + oqs_sidh_cln16_inv_mod_orderA(tmp1, tmp2); + oqs_sidh_cln16_multiply((digit_t *) SecretKeyA, &comp[2 * SIDH_NWORDS_ORDER], tmp1, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_add(&comp[0], tmp1, tmp1, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_multiply(tmp1, tmp2, vone, SIDH_NWORDS_ORDER); + vone[SIDH_NWORDS_ORDER - 1] &= mask; + oqs_sidh_cln16_mont_twodim_scalarmult(vone, R1, R2, A, A24, P, CurveIsogeny); + } else { + oqs_sidh_cln16_multiply((digit_t *) SecretKeyA, &comp[2 * SIDH_NWORDS_ORDER], tmp1, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_add(tmp1, vone, tmp1, SIDH_NWORDS_ORDER); + tmp1[SIDH_NWORDS_ORDER - 1] &= mask; + oqs_sidh_cln16_inv_mod_orderA(tmp1, tmp2); + oqs_sidh_cln16_multiply((digit_t *) SecretKeyA, &comp[SIDH_NWORDS_ORDER], tmp1, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_mp_add(&comp[0], tmp1, tmp1, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_multiply(tmp1, tmp2, vone, SIDH_NWORDS_ORDER); + vone[SIDH_NWORDS_ORDER - 1] &= mask; + oqs_sidh_cln16_mont_twodim_scalarmult(vone, R2, R1, A, A24, P, CurveIsogeny); + } + + oqs_sidh_cln16_fp2copy751(P->X, R[0]->X); + oqs_sidh_cln16_fp2copy751(P->Z, R[0]->Z); +} + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralSecretAgreement_Compression_B(const unsigned char *PrivateKeyB, const unsigned char *point_R, const unsigned char *param_A, unsigned char *SharedSecretB, PCurveIsogenyStruct CurveIsogeny) { // Bob's ephemeral shared secret computation + // It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's decompressed data point_R and param_A + // Inputs: Bob's PrivateKeyB is an integer in the range [1, oB-1], where oB = 3^239. + // Alice's decompressed data consists of point_R in (X:Z) coordinates and the curve paramater param_A in GF(p751^2). + // Output: a shared secret SharedSecretB that consists of one element in GF(p751^2). + // CurveIsogeny must be set up in advance using SIDH_curve_initialize(). + unsigned int pwords = NBITS_TO_NWORDS(CurveIsogeny->pwordbits); + unsigned int i, row, m, index = 0, pts_index[SIDH_MAX_INT_POINTS_BOB], npts = 0; + oqs_sidh_cln16_point_proj_t R, pts[SIDH_MAX_INT_POINTS_BOB]; + oqs_sidh_cln16_f2elm_t jinv, A, C = {0}; + + if (PrivateKeyB == NULL || SharedSecretB == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(CurveIsogeny)) { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + oqs_sidh_cln16_fp2copy751((((oqs_sidh_cln16_point_proj_t *) point_R)[0])->X, R->X); + oqs_sidh_cln16_fp2copy751((((oqs_sidh_cln16_point_proj_t *) point_R)[0])->Z, R->Z); + oqs_sidh_cln16_fp2copy751((oqs_sidh_cln16_felm_t *) param_A, A); + oqs_sidh_cln16_fpcopy751(CurveIsogeny->C, C[0]); + oqs_sidh_cln16_to_mont(C[0], C[0]); + + index = 0; + for (row = 1; row < SIDH_MAX_Bob; row++) { + while (index < SIDH_MAX_Bob - row) { + oqs_sidh_cln16_fp2copy751(R->X, pts[npts]->X); + oqs_sidh_cln16_fp2copy751(R->Z, pts[npts]->Z); + pts_index[npts] = index; + npts += 1; + m = splits_Bob[SIDH_MAX_Bob - index - row]; + oqs_sidh_cln16_xTPLe(R, R, A, C, (int) m); + index += m; + } + oqs_sidh_cln16_get_3_isog(R, A, C); + + for (i = 0; i < npts; i++) { + oqs_sidh_cln16_eval_3_isog(R, pts[i]); + } + + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->X, R->X); + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->Z, R->Z); + index = pts_index[npts - 1]; + npts -= 1; + } + + oqs_sidh_cln16_get_3_isog(R, A, C); + oqs_sidh_cln16_j_inv(A, C, jinv); + oqs_sidh_cln16_from_fp2mont(jinv, (oqs_sidh_cln16_felm_t *) SharedSecretB); // Converting back to standard representation + + // Cleanup: + oqs_sidh_cln16_clear_words((void *) R, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) pts, SIDH_MAX_INT_POINTS_BOB * 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *) A, 2 * pwords); + oqs_sidh_cln16_clear_words((void *) C, 2 * pwords); + oqs_sidh_cln16_clear_words((void *) jinv, 2 * pwords); + + return SIDH_CRYPTO_SUCCESS; +} diff --git a/crypt/liboqs/kex_sidh_iqc_ref/Makefile.am b/crypt/liboqs/kex_sidh_iqc_ref/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..d634163581b9e79e0207b355faa97c2f5d60202f --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/Makefile.am @@ -0,0 +1,11 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = libsidhiqc.la + + +libsidhiqc_la_SOURCES = kex_sidh_iqc_ref_params.c kex_sidh_iqc_ref.c sidh_elliptic_curve.c sidh_elliptic_curve_dlp.c sidh_isogeny.c +libsidhiqc_la_SOURCES += sidh_private_key.c sidh_public_key.c sidh_public_key_encryption.c sidh_public_key_validation.c +libsidhiqc_la_SOURCES += sidh_public_param.c sidh_quadratic_ext.c sidh_shared_key.c sidh_util.c +libsidhiqc_la_CPPFLAGS = -I../../include -I.-fPIC +libsidhiqc_la_CPPFLAGS += $(AM_CPPFLAGS) -I$(GMP_DIR)/include + +libsidhiqc_la_LDFLAGS = -L$(GMP_DIR)/lib $(AM_LDFLAGS) diff --git a/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref.c b/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref.c new file mode 100644 index 0000000000000000000000000000000000000000..448d7bbc6e4dfd532b165d4ed8c6c0c3a3c11418 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref.c @@ -0,0 +1,232 @@ +#if defined(WINDOWS) +#define UNUSED +#else +#define UNUSED __attribute__((unused)) +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "sidh_elliptic_curve.h" +#include "sidh_public_param.h" +#include "sidh_isogeny.h" +#include "sidh_private_key.h" +#include "sidh_public_key.h" +#include "sidh_shared_key.h" +#include "kex_sidh_iqc_ref.h" +#include "kex_sidh_iqc_ref_params.h" + +OQS_KEX *OQS_KEX_sidh_iqc_ref_new(OQS_RAND *rand, const char *named_parameters) { + + if (named_parameters == NULL) { + named_parameters = "params771"; + } + + OQS_KEX *k = malloc(sizeof(OQS_KEX)); + if (k == NULL) { + return NULL; + } + + public_params_t *params = + (public_params_t *) malloc(2 * sizeof(public_params_t)); + if (params == NULL) { + goto err; + } + + oqs_sidh_iqc_ref_public_params_init(params[0]); + oqs_sidh_iqc_ref_public_params_init(params[1]); + + const char **input = oqs_sidh_iqc_ref_params_from_name(named_parameters); + if (input == NULL) { + goto err_clear; + } + + if (!oqs_sidh_iqc_ref_public_params_read(params[0], params[1], input)) { + goto err_clear; + } + + oqs_sidh_iqc_ref_fp_init_chararacteristic(params[0]->characteristic); + + k->rand = rand; + k->method_name = strdup("SIDH IQC REFERENCE"); + k->estimated_classical_security = 192; + k->estimated_quantum_security = 128; + k->seed = NULL; + k->seed_len = 0; + k->named_parameters = strdup(named_parameters); + k->params = params; + k->ctx = NULL; + k->alice_0 = &OQS_KEX_sidh_iqc_ref_alice_0; + k->bob = &OQS_KEX_sidh_iqc_ref_bob; + k->alice_1 = &OQS_KEX_sidh_iqc_ref_alice_1; + k->alice_priv_free = &OQS_KEX_sidh_iqc_ref_alice_priv_free; + k->free = &OQS_KEX_sidh_iqc_ref_free; + + return k; + +err_clear: + oqs_sidh_iqc_ref_public_params_clear(params[0]); + oqs_sidh_iqc_ref_public_params_clear(params[1]); + +err: + free(params); + free(k); + return NULL; +} + +int OQS_KEX_sidh_iqc_ref_alice_0(OQS_KEX *k, void **alice_priv, + uint8_t **alice_msg, size_t *alice_msg_len) { + + public_params_t *params = (public_params_t *) k->params; + private_key_t Alice_private_key; + oqs_sidh_iqc_ref_private_key_init(Alice_private_key); + oqs_sidh_iqc_ref_private_key_generate(Alice_private_key, params[0]); + + public_key_t Alice_public_key; + oqs_sidh_iqc_ref_public_key_init(Alice_public_key); + point_t kernel_gen; + oqs_sidh_iqc_ref_point_init(kernel_gen); + oqs_sidh_iqc_ref_private_key_compute_kernel_gen(kernel_gen, Alice_private_key, + params[0]->P, params[0]->Q, + params[0]->le, params[0]->E); + oqs_sidh_iqc_ref_public_key_generate(Alice_public_key, kernel_gen, params[0], + params[1]); + + // sizes in bytes + uint32_t prime_size = (mpz_sizeinbase(characteristic, 2) + 7) / 8; + uint32_t private_key_size = 2 * prime_size; + uint32_t public_key_size = 12 * prime_size; + + *alice_priv = NULL; + *alice_msg = NULL; + *alice_priv = malloc(private_key_size); + *alice_msg = malloc(public_key_size); + *alice_msg_len = public_key_size; + + oqs_sidh_iqc_ref_private_key_to_bytes((uint8_t *) *alice_priv, + Alice_private_key, prime_size); + oqs_sidh_iqc_ref_public_key_to_bytes((uint8_t *) *alice_msg, Alice_public_key, + prime_size); + + oqs_sidh_iqc_ref_private_key_clear(Alice_private_key); + oqs_sidh_iqc_ref_public_key_clear(Alice_public_key); + oqs_sidh_iqc_ref_point_clear(kernel_gen); + + return 1; +} + +int OQS_KEX_sidh_iqc_ref_bob(OQS_KEX *k, const uint8_t *alice_msg, + UNUSED const size_t alice_msg_len, + uint8_t **bob_msg, size_t *bob_msg_len, + uint8_t **key, size_t *key_len) { + + public_params_t *params = (public_params_t *) k->params; + + private_key_t Bob_private_key; + oqs_sidh_iqc_ref_private_key_init(Bob_private_key); + oqs_sidh_iqc_ref_private_key_generate(Bob_private_key, params[1]); + + public_key_t Bob_public_key; + oqs_sidh_iqc_ref_public_key_init(Bob_public_key); + point_t kernel_gen; + oqs_sidh_iqc_ref_point_init(kernel_gen); + oqs_sidh_iqc_ref_private_key_compute_kernel_gen(kernel_gen, Bob_private_key, + params[1]->P, params[1]->Q, + params[1]->le, params[1]->E); + oqs_sidh_iqc_ref_public_key_generate(Bob_public_key, kernel_gen, params[1], + params[0]); + + // sizes in bytes + uint32_t prime_size = (mpz_sizeinbase(characteristic, 2) + 7) / 8; + uint32_t public_key_size = 12 * prime_size; + uint32_t shared_key_size = 2 * prime_size; + + *bob_msg = NULL; + *key = NULL; + *bob_msg = malloc(public_key_size); + *key = malloc(shared_key_size); + *bob_msg_len = public_key_size; + *key_len = shared_key_size; + + oqs_sidh_iqc_ref_public_key_to_bytes((uint8_t *) *bob_msg, Bob_public_key, + prime_size); + + public_key_t Alice_public_key; + oqs_sidh_iqc_ref_public_key_init(Alice_public_key); + oqs_sidh_iqc_ref_bytes_to_public_key(Alice_public_key, alice_msg, prime_size); + + fp2_element_t Bob_shared_key; + oqs_sidh_iqc_ref_fp2_init(Bob_shared_key); + oqs_sidh_iqc_ref_shared_key_generate(Bob_shared_key, Alice_public_key, + Bob_private_key, params[1]); + + oqs_sidh_iqc_ref_fp2_to_bytes((uint8_t *) *key, Bob_shared_key, prime_size); + + oqs_sidh_iqc_ref_public_key_clear(Alice_public_key); + oqs_sidh_iqc_ref_private_key_clear(Bob_private_key); + oqs_sidh_iqc_ref_public_key_clear(Bob_public_key); + oqs_sidh_iqc_ref_point_clear(kernel_gen); + oqs_sidh_iqc_ref_fp2_clear(Bob_shared_key); + + return 1; +} + +int OQS_KEX_sidh_iqc_ref_alice_1(OQS_KEX *k, const void *alice_priv, + const uint8_t *bob_msg, + UNUSED const size_t bob_msg_len, uint8_t **key, + size_t *key_len) { + + public_params_t *params = (public_params_t *) k->params; + + // sizes in bytes + uint32_t prime_size = (mpz_sizeinbase(characteristic, 2) + 7) / 8; + uint32_t shared_key_size = 2 * prime_size; + + *key = NULL; + *key_len = shared_key_size; + *key = malloc(shared_key_size); + + private_key_t Alice_private_key; + oqs_sidh_iqc_ref_private_key_init(Alice_private_key); + oqs_sidh_iqc_ref_bytes_to_private_key(Alice_private_key, alice_priv, + prime_size); + + public_key_t Bob_public_key; + oqs_sidh_iqc_ref_public_key_init(Bob_public_key); + oqs_sidh_iqc_ref_bytes_to_public_key(Bob_public_key, bob_msg, prime_size); + + fp2_element_t Alice_shared_key; + oqs_sidh_iqc_ref_fp2_init(Alice_shared_key); + oqs_sidh_iqc_ref_shared_key_generate(Alice_shared_key, Bob_public_key, + Alice_private_key, params[0]); + + oqs_sidh_iqc_ref_fp2_to_bytes((uint8_t *) *key, Alice_shared_key, prime_size); + + oqs_sidh_iqc_ref_private_key_clear(Alice_private_key); + oqs_sidh_iqc_ref_public_key_clear(Bob_public_key); + oqs_sidh_iqc_ref_fp2_clear(Alice_shared_key); + + return 1; +} + +void OQS_KEX_sidh_iqc_ref_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) { + if (alice_priv) { + free(alice_priv); + } +} + +void OQS_KEX_sidh_iqc_ref_free(OQS_KEX *k) { + if (!k) { + return; + } + + oqs_sidh_iqc_ref_public_params_clear(((public_params_t *) (k->params))[0]); + oqs_sidh_iqc_ref_public_params_clear(((public_params_t *) (k->params))[1]); + free(k->params); + k->params = NULL; + free(k->method_name); + k->method_name = NULL; + free(k->named_parameters); + k->named_parameters = NULL; + free(k); +} diff --git a/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref.h b/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref.h new file mode 100644 index 0000000000000000000000000000000000000000..a14d2822088f3879f4c4a1f00d5921964efb3627 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref.h @@ -0,0 +1,28 @@ + +#ifndef KEX_SIDH_IQC_REF_H +#define KEX_SIDH_IQC_REF_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stddef.h> +#include <stdint.h> + +#include <oqs/kex.h> +#include <oqs/rand.h> + +OQS_KEX *OQS_KEX_sidh_iqc_ref_new(OQS_RAND *rand, const char *named_parameters); + +int OQS_KEX_sidh_iqc_ref_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len); +int OQS_KEX_sidh_iqc_ref_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len); +int OQS_KEX_sidh_iqc_ref_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len); + +void OQS_KEX_sidh_iqc_ref_alice_priv_free(OQS_KEX *k, void *alice_priv); +void OQS_KEX_sidh_iqc_ref_free(OQS_KEX *k); + +#ifdef __cplusplus +} +#endif + +#endif /* KEX_SIDH_IQC_REF_H */ diff --git a/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref_params.c b/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref_params.c new file mode 100644 index 0000000000000000000000000000000000000000..9bbe77bbf74af588eb7be5ba1b7d9ed5c85205c5 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref_params.c @@ -0,0 +1,85 @@ +#include <stdlib.h> +#include <string.h> +#include "kex_sidh_iqc_ref_params.h" + +typedef struct { + const char *name; + const char *params[10]; +} params_def; + +// clang-format off +const params_def all_params[] = { + { + "params46", + { + "p : 60183678025727", + "E : y^2 = x^3 + (33377407586757 * i + 44218433491776) * x + (14267804413813 * i + 34113052821919)", + "lA: 2", + "eA: 22", + "PA: (3621292231555 * i + 37993208494088, 7444041801194 * i + 49342879615307)", + "QA: (42474562877393 * i + 53371276514445, 2096833973245 * i + 34935006825293)", + "lB: 3", + "eB: 15", + "PB: (15834791163149 * i + 48632673242917, 26787723276578 * i + 2080970701160)", + "QB: (41347477823487 * i + 16893996428645, 16353006256863 * i + 58871308637793)" + } + }, + { + "params263", + { + "p : 13278338917780691403163453935679248163066204141424819568321422575495838416502783", + "E : y^2 = x^3 + (10146232096640085910917654383121220722483913358884738813297160334128811466415525*i+12561065565697579851239386918801659303795666601356542822684985096240783059294353)*x + (5173097881985929355869345579251684505584624561073144550698251610858120795396524*i+7107679418274528586696192790945059679329002947961173384005281572895084003568218)", + "lA: 2", + "eA: 130", + "PA: (1195124728519659060317276132092013999345554256425666367370465963951595701748339*i + 12098972036709468461769702810131237350914726908853501736574286596252384974205652, 9783772475920257416467468866150378267376245694752823265285613818169901942309758*i + 11347159712348451494564706572599934965946403356550033502368700150470499448870987)", + "QA: (13205817885805264818436305084890835188490919868599289846511015770901764583677253*i + 5747572646648472262100078852868099320898697620053049578554081522615552834142382, 11801682343040573989191884352262922625922977024975963745404870899756844108073781*i + 995065035530346107238957276796927946979246210950956147759509023538740100220494)", + "lB: 3", + "eB: 81", + "PB: (5344800255669587458309912385997503623935901519546261901204157001079956379346933*i + 4377688844822469620769951245537289173274736372423169606270308984109645753298367, 6652276474756696057821879367411351758786745790244544252917780253177388224676512*i + 6708409928090950067466623637647088247028372838873736207829979327577754417492323)", + "QB: (5394161621076087291764603321428338049084294313968048256313378341079709241759382*i + 11839282739753708776384780179031575074752559110018400195581350405443930573103478, 13250321748367194013481592159238890438519376028036613608154243555537109237538486*i + 5018156126061581597984382235576466750307112019427938373002833669914648135622879)" + } + }, + { + "params521", + { + "p : 5646428529833603710854376801719732121889771686125102421349898409102848702003905102057129871853579459735758942656769724874115169484320460874488881525976727551", + "E : y^2 = x^3 + (749284552715987846148963973296050195126229569341142224654666772427960869882697237787535113296933915107646826510805251959952317725821624926383192023779227916*i+4450862168665197219135947325665108840719206715065697554561201799074300990784248608236935291171911258967881216685164820345027022153809546719817771293646383402)*x + (2090701186560231235295975659537182225064154823783034367876346818451609525370628921026656891712603865222854303410638520367213822274197422708317359681412801686*i+928331116130151780314451251635374082476545231185861659046556547242876069870814548070746611568992085667981514874929668958586384832118048434225604407758374282)", + "lA: 2", + "eA: 258", + "PA: (4099566244205693793351119863629118684504739011975746402268940060566068632610815266810397027797757094816929218567651253950072216325440815687610023993835084896*i + 1558017772998619899443036875935946235185689333987633624537644882488763783158554538347022310514300405167644627965823931934377803788161014061154800653636626931, 4309963503463625615680726988334053841208952164733703323705989592325431854359074218382917880219717866947948218257035199142577782828393068620191995480863080814*i + 371139087724151319343471759858355552237686119972572871121509307705868621618190178855645217401101942092226237837619601742237974591506374361483536984282167861)", + "QA: (1068668697541208179714192612921089347931894414290359842562082470165052062241629674686530102495737378212525479245784252461983051355518227298502808569246918728*i + 3439758296201500299118396242846510199830393172149382335887091564620130903972985332523718369650346601985540123834734249105539074407495456634862920938577617312, 1377114633894100174167466575056453645918713530999472681191914854993325497527119824352424425031078252594689770391880104513192317018010057467691025379460070671*i + 4932622986840321005380766859714312144000718130204073302754586852541324804616229501269099878044960212632820224170853684078869359092914886360672352750928115581)", + "lB: 3", + "eB: 161", + "PB: (1873601143875829767876930991819826178988629054425671567488176037109831662817961473686026144306947256312476316751547084289880741326649856082832561714610850944*i + 1560175318533519875886314144935322002014985257654221707041583923868859979591849198401249634353361077499233198751747045058302746944304448268907689086502178616, 4982994975025169124121736752171094366264972439763192439008272414941290947933013927951198144521578535758162978083130741822789277050594650669549067865269480720*i + 5276260709601376725929198456951440724276715138987805447686932240717621617089316893818094566900580557346731678327745302440662762271627385800896501795127323769)", + "QB: (697646709404910236660735870475422491121726200391885074740251760174191839071888445718446637282034941836922171390196797602747505117748596104805560163856490804*i + 3625576702015594834652275264908614642470435266304155762812699923280767886451566560460365242828862442624975825786369269113406701427398318872227046324990780609, 4066773540363717441440268891591433184568174886592697891244632377954091519594774358483000191238791345767299029423088482812653880802118173619454377886226640626*i + 4190003034380720563592676434553383980850520959077934081654167677748426947392920518786394581045699661017779519240551462796264093681413592624709890262004623150)" + } + }, + { + "params771", + { + "p : 9161191555982008052298538759697325872858383005444503030763917191888120427263653604739574602371851919945332710234806205297475768266460658683484318356498713773944703702864057467786913144364234277796785269800198817400814717913480036351", + "E : y^2 = x^3 + (834414288954992257633455994192711449929625512434805347027412475310171948875352369017186937444645005409828817983476510572586689796723205608064400704385270116308944492168385499542550868452776212626111499661118550170888024552876875729*i+6422246000772528873002015578224375300444670334298744905928223359513938843110113655634334268879522218663819121887750824098836054966064056104287198717041277477329053582144813207672147369924203318339728355843554541603191165928512397074)*x + (6952862402661321818296934608460489441319492072429008834217170925899505712694617760090534612163651714423387662001257443691685988562319647888954263195545834510820670121276853179853453135161349568882745925527747286264586000816122662211*i+1801461307959256058754493292728237821856779795303869606357346421878164668617118529630863155614277813374785952465141324739461376333648338471745996274618770379494305097783365807731152848487472399799827998470890201827624741461111844749)", + "lA: 2", + "eA: 386", + "PA: (8104593598414300086705087098908311675030394399959710332294184564762361863835814307651327024806690095568546280563692186849608460564606528773115207348687739021740029922619947714003573784888374548470687911506820492526985145356335776446*i + 632723349492681895135768435670357424582748082370711990704098097526814363254991414123449305576539513413267774301605548017161074510859073483979044361740779150098452314120335316096416597425252882878881588818896781804191220848731008911, 6034454472695438020325443031188950458345445112930585331722482288319997086142640364760979210290700670085317874428427766978327706213014073448960161802245001428613528860063394247112544226096847944993689243523414240839321526974724280084*i + 3376800547075148066131970733541260743185153743453912100162319249600572606491084521062090319658073231669236186390422247468127880255567740549554667385892315318084299520660699776774656198376921140804260118165199828142540405774846853362)", + "QA: (2765053530820933445180998871832795313413946616218824127593215418859013295660994609348273546952174346166291903038023352135058779784133949653750586420316372475215070348272866065120539728715859798970386824706592072979142529191135265323*i + 4732630024306258879927225136904171174931421517225731042645253305380470569884989406374249571295113204909266761424593068942210539695621294791558555039840356194123377363470161894673618251481432584334320864233790205509164494505329331140, 4708584843807409676003733183136845288008597122413250473476957203240325041335536376819164132204831789648003763063194049792870695914267484368767687682889171445457695974399304884657394666807097601382128550039198416659937221320896980595*i + 6104437072476030203734870744361913380396342850775889826642334399814339224961644051070388756974954400647041041004040997989961961816982978689836414675142946310418861822777207486737128660492374354598010889893873060781304652392500710082)", + "lB: 3", + "eB: 242", + "PB: (3723895349260758944309889666952259909100424286764560948312844268916772080039091215194337476636106420561414206591006321840112505108525982835492286766266110460566937541551059011352798312867785900902951383836578444811900436603779674156*i + 8743733696371247709279217014221258693400652288884395784267041686740452975091187425123471886102340505926375682973850553993788314479705338557349284829716634286210825839825870379330100097103593148671390855900137936801780665161878467993, 8424241650394632026078421716292833598872223719053431149109783663376931645995151278886785513466077121063748132909299925706619410172763350254899661370368971798311266097772796416940961537063436478392108549760549489321636110323643921123*i + 5374610701506876802640722880318277643810083668249556787469960695884056089879250039154916755196748541850985290281804798435130927222521567170894905772855374873224510597225929994186728464447646660504589278345473514974074045904197344273)", + "QB: (7981195513789185488304157075392399068225052449489399943063249773724560281912789833792310612686835775356813196319643714519912123781500389027567621573946130157326769787082613646934296091151487953874493791717298439146548339580934348575*i + 6959299245778867305112554827985507377113662771316265280990751282080086185858550157506531552361757479904416443825479048359163719118671413144273420888615839877660288012435298448942304481059091585291706567711227879486375625133765783910, 4336888647745442057861067196613721067586889048321014506728806821392030059558638097842307835657146159647787621123250859783441147632121339894258934458102109985684014691372520469809743302874968486912740925764328254939284436994206821845*i + 219245698132319235934495637714582743670714862281024333766283207034829039474459867538486706426384326703893620364910932534607493596118208826082598798090838576408297983032654112984263431060439529497966028364279027386883785406090014775)" + } + } +}; +// clang-format on + +const char **oqs_sidh_iqc_ref_params_from_name(const char *named_parameters) { + static const size_t elements = sizeof(all_params) / sizeof(all_params[0]); + + for (unsigned int i = 0; i < elements; ++i) { + if (0 == strcmp(all_params[i].name, named_parameters)) { + return (const char **) all_params[i].params; + } + } + + return NULL; +} diff --git a/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref_params.h b/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref_params.h new file mode 100644 index 0000000000000000000000000000000000000000..ec1c6f3d72fe3e8eab0e95e6981c520a935dbe72 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref_params.h @@ -0,0 +1,14 @@ +#ifndef KEX_SIDH_IQC_REF_PARAMS_H +#define KEX_SIDH_IQC_REF_PARAMS_H + +#ifdef __cplusplus +extern "C" { +#endif + +const char **oqs_sidh_iqc_ref_params_from_name(const char *named_parameters); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve.c new file mode 100644 index 0000000000000000000000000000000000000000..ea944ae6f301bdbb5d8c42a78b88764ae828ea03 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve.c @@ -0,0 +1,351 @@ +#include <stdlib.h> + +#include "sidh_elliptic_curve.h" +#include "sidh_util.h" +#include <string.h> + +void oqs_sidh_iqc_ref_elliptic_curve_init(elliptic_curve_t E) { + oqs_sidh_iqc_ref_fp2_init_set_si(E->a, 0, 1); + oqs_sidh_iqc_ref_fp2_init_set_si(E->b, 0, 1); +} + +void oqs_sidh_iqc_ref_elliptic_curve_set(elliptic_curve_t E, + const elliptic_curve_t T) { + oqs_sidh_iqc_ref_fp2_set(E->a, T->a); + oqs_sidh_iqc_ref_fp2_set(E->b, T->b); +} + +void oqs_sidh_iqc_ref_elliptic_curve_set_coeffs(elliptic_curve_t E, + const fp2_element_t a, + const fp2_element_t b) { + oqs_sidh_iqc_ref_fp2_set(E->a, a); + oqs_sidh_iqc_ref_fp2_set(E->b, b); +} + +void oqs_sidh_iqc_ref_point_init(point_t P) { + oqs_sidh_iqc_ref_fp2_init(P->x); + oqs_sidh_iqc_ref_fp2_init(P->y); + oqs_sidh_iqc_ref_point_zero(P); +} + +void oqs_sidh_iqc_ref_point_set_coordinates(point_t P, + const fp2_element_t x, + const fp2_element_t y, + int z) { + oqs_sidh_iqc_ref_fp2_set(P->x, x); + oqs_sidh_iqc_ref_fp2_set(P->y, y); + P->z = z; +} + +void oqs_sidh_iqc_ref_point_set(point_t P, + const point_t Q) { + oqs_sidh_iqc_ref_point_set_coordinates(P, Q->x, Q->y, Q->z); +} + +void oqs_sidh_iqc_ref_point_zero(point_t P) { + oqs_sidh_iqc_ref_fp2_zero(P->x); + oqs_sidh_iqc_ref_fp2_one(P->y); + P->z = 0; +} + +int oqs_sidh_iqc_ref_point_is_zero(const point_t P) { + return P->z == 0; +} + +void oqs_sidh_iqc_ref_point_negate(point_t P, + const point_t Q) { + oqs_sidh_iqc_ref_point_set(P, Q); + oqs_sidh_iqc_ref_fp2_negate(P->y, P->y); +} + +int oqs_sidh_iqc_ref_point_has_order_2(const point_t P) { + return oqs_sidh_iqc_ref_fp2_is_zero(P->y); +} + +void oqs_sidh_iqc_ref_elliptic_curve_clear(elliptic_curve_t E) { + oqs_sidh_iqc_ref_fp2_clear(E->a); + oqs_sidh_iqc_ref_fp2_clear(E->b); +} + +void oqs_sidh_iqc_ref_point_clear(point_t P) { + oqs_sidh_iqc_ref_fp2_clear(P->x); + oqs_sidh_iqc_ref_fp2_clear(P->y); +} + +int oqs_sidh_iqc_ref_point_equals(const point_t P, + const point_t Q) { + return oqs_sidh_iqc_ref_fp2_equals(P->x, Q->x) && + oqs_sidh_iqc_ref_fp2_equals(P->y, Q->y) && + (P->z == Q->z); +} + +char *oqs_sidh_iqc_ref_elliptic_curve_get_str(const elliptic_curve_t E) { + char *result = ""; + result = oqs_sidh_iqc_ref_concat(result, "y^2 = x^3"); + if (!oqs_sidh_iqc_ref_fp2_is_zero(E->a)) { + result = oqs_sidh_iqc_ref_concat(result, " + ("); + result = oqs_sidh_iqc_ref_concat(result, oqs_sidh_iqc_ref_fp2_get_str(E->a)); + result = oqs_sidh_iqc_ref_concat(result, ")"); + result = oqs_sidh_iqc_ref_concat(result, " * x"); + } + + if (!oqs_sidh_iqc_ref_fp2_is_zero(E->b)) { + result = oqs_sidh_iqc_ref_concat(result, " + ("); + result = oqs_sidh_iqc_ref_concat(result, oqs_sidh_iqc_ref_fp2_get_str(E->b)); + result = oqs_sidh_iqc_ref_concat(result, ")"); + } + + return result; +} + +char *oqs_sidh_iqc_ref_point_get_str(const point_t P) { + char *result = ""; + result = oqs_sidh_iqc_ref_concat(result, "("); + result = oqs_sidh_iqc_ref_concat(result, oqs_sidh_iqc_ref_fp2_get_str(P->x)); + result = oqs_sidh_iqc_ref_concat(result, " : "); + result = oqs_sidh_iqc_ref_concat(result, oqs_sidh_iqc_ref_fp2_get_str(P->y)); + result = oqs_sidh_iqc_ref_concat(result, " : "); + result = oqs_sidh_iqc_ref_concat(result, (P->z == 1 ? "1" : "0")); + result = oqs_sidh_iqc_ref_concat(result, ")"); + + return result; +} + +void oqs_sidh_iqc_ref_point_add_with_lambda(point_t R, + const point_t P, + const point_t Q, + const fp2_element_t lambda) { + point_t result; + oqs_sidh_iqc_ref_point_init(result); + result->z = 1; + + // x_R = lambda^2 - x_P - x_Q + oqs_sidh_iqc_ref_fp2_square(result->x, lambda); + oqs_sidh_iqc_ref_fp2_sub(result->x, result->x, P->x); + oqs_sidh_iqc_ref_fp2_sub(result->x, result->x, Q->x); + + // y_R = lambda * (x_P - x_R) - y_P + oqs_sidh_iqc_ref_fp2_sub(result->y, P->x, result->x); + oqs_sidh_iqc_ref_fp2_mul(result->y, result->y, lambda); + oqs_sidh_iqc_ref_fp2_sub(result->y, result->y, P->y); + oqs_sidh_iqc_ref_point_set(R, result); + + oqs_sidh_iqc_ref_point_clear(result); +} + +void oqs_sidh_iqc_ref_point_double(point_t R, + const point_t P, + const elliptic_curve_t E) { + if (oqs_sidh_iqc_ref_point_is_zero(P)) { + oqs_sidh_iqc_ref_point_zero(R); + return; + } + + // check if the point is of order 2 + if (oqs_sidh_iqc_ref_point_has_order_2(P)) { + oqs_sidh_iqc_ref_point_zero(R); + return; + } + + fp2_element_t temp; + fp2_element_t lambda; + + oqs_sidh_iqc_ref_fp2_init(temp); + oqs_sidh_iqc_ref_fp2_init(lambda); + + // lambda = (3(x_P)^2 + a) / (2y_p) + oqs_sidh_iqc_ref_fp2_square(lambda, P->x); + oqs_sidh_iqc_ref_fp2_mul_scaler_si(lambda, lambda, 3); + oqs_sidh_iqc_ref_fp2_add(lambda, lambda, E->a); + oqs_sidh_iqc_ref_fp2_mul_scaler_si(temp, P->y, 2); + oqs_sidh_iqc_ref_fp2_div(lambda, lambda, temp); + + oqs_sidh_iqc_ref_point_add_with_lambda(R, P, P, lambda); + + oqs_sidh_iqc_ref_fp2_clear(temp); + oqs_sidh_iqc_ref_fp2_clear(lambda); +} + +void oqs_sidh_iqc_ref_point_add(point_t R, + const point_t P, + const point_t Q, + const elliptic_curve_t E) { + if (oqs_sidh_iqc_ref_point_is_zero(P)) { + oqs_sidh_iqc_ref_point_set(R, Q); + return; + } + + if (oqs_sidh_iqc_ref_point_is_zero(Q)) { + oqs_sidh_iqc_ref_point_set(R, P); + return; + } + + if (oqs_sidh_iqc_ref_fp2_equals(P->x, Q->x)) { + if (oqs_sidh_iqc_ref_fp2_equals(P->y, Q->y)) { + oqs_sidh_iqc_ref_point_double(R, P, E); + return; + } + + oqs_sidh_iqc_ref_point_zero(R); + return; + } + + fp2_element_t temp; + fp2_element_t lambda; + + oqs_sidh_iqc_ref_fp2_init(temp); + oqs_sidh_iqc_ref_fp2_init(lambda); + + // lambda = (y_Q - y_P) / (x_Q - x_P) + oqs_sidh_iqc_ref_fp2_sub(lambda, Q->y, P->y); + oqs_sidh_iqc_ref_fp2_sub(temp, Q->x, P->x); + oqs_sidh_iqc_ref_fp2_div(lambda, lambda, temp); + + oqs_sidh_iqc_ref_point_add_with_lambda(R, P, Q, lambda); + + oqs_sidh_iqc_ref_fp2_clear(temp); + oqs_sidh_iqc_ref_fp2_clear(lambda); +} + +void oqs_sidh_iqc_ref_point_sub(point_t R, + const point_t P, + const point_t Q, + const elliptic_curve_t E) { + point_t temp; + oqs_sidh_iqc_ref_point_init(temp); + oqs_sidh_iqc_ref_point_negate(temp, Q); + oqs_sidh_iqc_ref_point_add(R, P, temp, E); + oqs_sidh_iqc_ref_point_clear(temp); +} + +void oqs_sidh_iqc_ref_point_mul_scaler(point_t R, + const point_t P, + const mpz_t scaler, + const elliptic_curve_t E) { + if (mpz_cmp_ui(scaler, 0) == 0) { + oqs_sidh_iqc_ref_point_zero(R); + return; + } + + if (mpz_cmp_ui(scaler, 1) == 0) { + oqs_sidh_iqc_ref_point_set(R, P); + return; + } + + point_t R0; + point_t R1; + + oqs_sidh_iqc_ref_point_init(R0); + oqs_sidh_iqc_ref_point_init(R1); + oqs_sidh_iqc_ref_point_set(R1, P); + + long num_bits = mpz_sizeinbase(scaler, 2); + for (long i = 0; i < num_bits; i++) { + if (mpz_tstbit(scaler, i) == 1) + oqs_sidh_iqc_ref_point_add(R0, R0, R1, E); + oqs_sidh_iqc_ref_point_double(R1, R1, E); + } + + if (mpz_sgn(scaler) < 0) + oqs_sidh_iqc_ref_point_negate(R0, R0); + + oqs_sidh_iqc_ref_point_set(R, R0); + oqs_sidh_iqc_ref_point_clear(R0); + oqs_sidh_iqc_ref_point_clear(R1); +} + +void oqs_sidh_iqc_ref_point_mul_scaler_si(point_t R, + const point_t P, + long scaler, + const elliptic_curve_t E) { + mpz_t temp; + mpz_init_set_si(temp, scaler); + oqs_sidh_iqc_ref_point_mul_scaler(R, P, temp, E); + mpz_clear(temp); +} + +void oqs_sidh_iqc_ref_elliptic_curve_compute_j_inv(fp2_element_t j_inv, + const elliptic_curve_t E) { + fp2_element_t result; + fp2_element_t temp; + oqs_sidh_iqc_ref_fp2_init(result); + oqs_sidh_iqc_ref_fp2_init(temp); + + oqs_sidh_iqc_ref_fp2_pow_ui(temp, E->a, 3); + oqs_sidh_iqc_ref_fp2_mul_scaler_si(temp, temp, 4); + oqs_sidh_iqc_ref_fp2_square(result, E->b); + oqs_sidh_iqc_ref_fp2_mul_scaler_si(result, result, 27); + oqs_sidh_iqc_ref_fp2_add(result, result, temp); + oqs_sidh_iqc_ref_fp2_inv(result, result); + oqs_sidh_iqc_ref_fp2_mul(result, result, temp); + oqs_sidh_iqc_ref_fp2_mul_scaler_si(result, result, 1728); + oqs_sidh_iqc_ref_fp2_set(j_inv, result); + + oqs_sidh_iqc_ref_fp2_clear(result); + oqs_sidh_iqc_ref_fp2_clear(temp); +} + +int oqs_sidh_iqc_ref_point_is_on_curve(const point_t P, + const elliptic_curve_t E) { + + if (oqs_sidh_iqc_ref_point_is_zero(P)) + return 1; + + fp2_element_t temp_x; + oqs_sidh_iqc_ref_fp2_init(temp_x); + + // compute x^3 + a * x + b = x * (x^2 + a) + b + oqs_sidh_iqc_ref_fp2_square(temp_x, P->x); + oqs_sidh_iqc_ref_fp2_add(temp_x, temp_x, E->a); + oqs_sidh_iqc_ref_fp2_mul(temp_x, temp_x, P->x); + oqs_sidh_iqc_ref_fp2_add(temp_x, temp_x, E->b); + + fp2_element_t temp_y; + oqs_sidh_iqc_ref_fp2_init(temp_y); + oqs_sidh_iqc_ref_fp2_square(temp_y, P->y); + + int result = oqs_sidh_iqc_ref_fp2_equals(temp_y, temp_x); + + oqs_sidh_iqc_ref_fp2_clear(temp_x); + oqs_sidh_iqc_ref_fp2_clear(temp_y); + + return result; +} + +void oqs_sidh_iqc_ref_elliptic_curve_random_point(point_t P, + const elliptic_curve_t E) { + point_t result; + oqs_sidh_iqc_ref_point_init(result); + result->z = 1; + + fp2_element_t temp_x; + oqs_sidh_iqc_ref_fp2_init(temp_x); + + fp2_element_t temp_y; + oqs_sidh_iqc_ref_fp2_init(temp_y); + + gmp_randstate_t randstate; + gmp_randinit_default(randstate); + + while (1) { + oqs_sidh_iqc_ref_fp2_random(result->x, randstate); + + // compute x^3 + a * x + b = x * (x^2 + a) + b + oqs_sidh_iqc_ref_fp2_square(temp_x, result->x); + oqs_sidh_iqc_ref_fp2_add(temp_x, temp_x, E->a); + oqs_sidh_iqc_ref_fp2_mul(temp_x, temp_x, result->x); + oqs_sidh_iqc_ref_fp2_add(temp_x, temp_x, E->b); + + if (oqs_sidh_iqc_ref_fp2_is_square(temp_x)) { + oqs_sidh_iqc_ref_fp2_sqrt(result->y, temp_x); + break; + } + } + + oqs_sidh_iqc_ref_point_set(P, result); + + oqs_sidh_iqc_ref_point_clear(result); + oqs_sidh_iqc_ref_fp2_clear(temp_x); + oqs_sidh_iqc_ref_fp2_clear(temp_y); + gmp_randclear(randstate); +} diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve.h new file mode 100644 index 0000000000000000000000000000000000000000..3ccbcef101b228ca6c8a2d2d5ba99ddeef69140d --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve.h @@ -0,0 +1,242 @@ +#ifndef CURVE_H +#define CURVE_H + +#include "sidh_quadratic_ext.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Representation of the elliptic curve y^2 = x^3 + a * x^2 + b * x + */ +typedef struct { + fp2_element_t a; + fp2_element_t b; +} elliptic_curve_struct; + +typedef elliptic_curve_struct elliptic_curve_t[1]; + +/** + * Representation of a point in the standard affine D+(z) of the + * plain projective projective space + */ +typedef struct { + fp2_element_t x; + fp2_element_t y; + int z; +} point_struct; + +typedef point_struct point_t[1]; + +/** + * Initializes the input curve to y^2 = x^3 + x + 1. + * @param E + */ +void oqs_sidh_iqc_ref_elliptic_curve_init(elliptic_curve_t E); + +/** + * Copies T into E + * @param E + * @param T + */ +void oqs_sidh_iqc_ref_elliptic_curve_set(elliptic_curve_t E, + const elliptic_curve_t T); + +/** + * Sets the coefficients of E: y^2 = x^3 + a * x^2 + b * x. + * @param E + * @param a + * @param b + */ +void oqs_sidh_iqc_ref_elliptic_curve_set_coeffs(elliptic_curve_t E, + const fp2_element_t a, + const fp2_element_t b); + +/** + * Initializes the point {@code P} to the zero point (0 : 1 : 0). + * @param P + */ +void oqs_sidh_iqc_ref_point_init(point_t P); + +/** + * Sets the coordinates of the point {@code P}. + * @param P + * @param x + * @param y + * @param z + */ +void oqs_sidh_iqc_ref_point_set_coordinates(point_t P, + const fp2_element_t x, + const fp2_element_t y, + int z); + +/** + * Copies {@code Q} into {@code P} + * @param P + * @param Q + */ +void oqs_sidh_iqc_ref_point_set(point_t P, + const point_t Q); + +/** + * Sets the given point to zero. + * @param P + */ +void oqs_sidh_iqc_ref_point_zero(point_t P); + +/** + * Checks if a given point is zero. + * @param P + * @return + */ +int oqs_sidh_iqc_ref_point_is_zero(const point_t P); + +/** + * Sets {@code P} to {@code -Q} as a group element. + * @param P + * @param Q + */ +void oqs_sidh_iqc_ref_point_negate(point_t P, + const point_t Q); + +/** + * Checks if 2 * {@code P} = 0. + * @param P + * @return + */ +int oqs_sidh_iqc_ref_point_has_order_2(const point_t P); + +/** + * Frees the memory allocated to {@code E}. + * @param E + */ +void oqs_sidh_iqc_ref_elliptic_curve_clear(elliptic_curve_t E); + +/** + * Frees the memory allocated to {@code P}. + * @param P + */ +void oqs_sidh_iqc_ref_point_clear(point_t P); + +/** + * Checks if {@code P = Q}. + * @param P + * @param Q + * @return 1 if the points are equal, 0 otherwise + */ +int oqs_sidh_iqc_ref_point_equals(const point_t P, + const point_t Q); + +/** + * @param E + * @return A string representation of {@code E} + */ +char *oqs_sidh_iqc_ref_elliptic_curve_get_str(const elliptic_curve_t E); + +/** + * @param P + * @return A string representation of {@code P} + */ +char *oqs_sidh_iqc_ref_point_get_str(const point_t P); + +/** + * Sets {@code R = P + Q} on {@code E}. + * @param R + * @param P + * @param Q + * @param E + */ +void oqs_sidh_iqc_ref_point_add(point_t R, + const point_t P, + const point_t Q, + const elliptic_curve_t E); + +/** + * Sets {@code R = P - Q}. + * @param R + * @param P + * @param Q + * @param E + */ +void oqs_sidh_iqc_ref_point_sub(point_t R, + const point_t P, + const point_t Q, + const elliptic_curve_t E); + +/** + * Sets {@code R = P + Q} on {@code E}. + * @param R + * @param P + * @param Q + * @param lambda The slope of the line passing through {@code P, Q} + */ +void oqs_sidh_iqc_ref_point_add_with_lambda(point_t R, + const point_t P, + const point_t Q, + const fp2_element_t lambda); + +/** + * Sets {@code R = 2 * P} on {@code E}. + * @param R + * @param P + * @param E + */ +void oqs_sidh_iqc_ref_point_double(point_t R, + const point_t P, + const elliptic_curve_t E); + +/** + * Sets {@code R = scaler * P} on {@code E}. + * @param R + * @param P + * @param scaler + * @param E + */ +void oqs_sidh_iqc_ref_point_mul_scaler(point_t R, + const point_t P, + const mpz_t scaler, + const elliptic_curve_t E); + +/** + * {@link oqs_sidh_iqc_ref_point_mul_scaler} + * @param R + * @param P + * @param scaler + * @param E + */ +void oqs_sidh_iqc_ref_point_mul_scaler_si(point_t R, + const point_t P, + long scaler, + const elliptic_curve_t E); + +/** + * Computes the j-invariant of {@code E}. + * @param j_inv + * @param E + */ +void oqs_sidh_iqc_ref_elliptic_curve_compute_j_inv(fp2_element_t j_inv, + const elliptic_curve_t E); + +/** + * Checks if the point {@code P} is on the curve {@code E}. + * @param P + * @param E + * @return 1 if the point is on the curve, 0 otherwise + */ +int oqs_sidh_iqc_ref_point_is_on_curve(const point_t P, + const elliptic_curve_t E); + +/** + * Generates a random point on the curve {@code E}. + * @param P the generated random point. + * @param E + */ +void oqs_sidh_iqc_ref_elliptic_curve_random_point(point_t P, + const elliptic_curve_t E); + +#ifdef __cplusplus +} +#endif + +#endif /* CURVE_H */ diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve_dlp.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve_dlp.c new file mode 100644 index 0000000000000000000000000000000000000000..838a7dcd6a1ffbe099352f0487609a974958d5d7 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve_dlp.c @@ -0,0 +1,97 @@ +#include "sidh_elliptic_curve_dlp.h" +#include <stdio.h> + +void oqs_sidh_iqc_ref_elliptic_curve_prime_power_dlp(mpz_t x, + const point_t P, + const point_t Q, + const elliptic_curve_t E, + long l, + long e) { + mpz_t exponent1; + mpz_t exponent2; + point_t temp_P; + point_t temp_Q; + point_t temp_R; + point_t PP; + + mpz_init(exponent1); + mpz_init(exponent2); + oqs_sidh_iqc_ref_point_init(temp_P); + oqs_sidh_iqc_ref_point_init(temp_Q); + oqs_sidh_iqc_ref_point_init(temp_R); + oqs_sidh_iqc_ref_point_init(PP); + + int ladic_rep[e]; + mpz_ui_pow_ui(exponent1, l, e - 1); + + // PP = l^(e - 1) * P once and for all + oqs_sidh_iqc_ref_point_mul_scaler(PP, P, exponent1, E); + + // compute the first ladic coefficient + oqs_sidh_iqc_ref_point_mul_scaler(temp_Q, Q, exponent1, E); + long ladic_coeff = oqs_sidh_iqc_ref_elliptic_curve_prime_dlp(PP, temp_Q, E, l); + + for (int j = 1; j < e; j++) { + if (ladic_coeff >= 0) { + ladic_rep[j - 1] = ladic_coeff; + } else { + break; + } + + mpz_ui_pow_ui(exponent2, l, j - 1); + mpz_mul_ui(exponent2, exponent2, ladic_rep[j - 1]); + mpz_divexact_ui(exponent1, exponent1, l); + oqs_sidh_iqc_ref_point_mul_scaler(temp_P, P, exponent2, E); + oqs_sidh_iqc_ref_point_add(temp_R, temp_R, temp_P, E); + oqs_sidh_iqc_ref_point_sub(temp_Q, Q, temp_R, E); + oqs_sidh_iqc_ref_point_mul_scaler(temp_Q, temp_Q, exponent1, E); + ladic_coeff = oqs_sidh_iqc_ref_elliptic_curve_prime_dlp(PP, temp_Q, E, l); + } + + if (ladic_coeff >= 0) { + ladic_rep[e - 1] = ladic_coeff; + + // set x = l_{e - 1}l^{e - 1} + ... + l_1l + l_0 + mpz_set_ui(x, ladic_rep[e - 1]); + for (long i = e - 2; i >= 0; i--) { + mpz_mul_ui(x, x, l); + mpz_add_ui(x, x, ladic_rep[i]); + } + } else { + mpz_set_si(x, -1); + } + + mpz_clear(exponent1); + mpz_clear(exponent2); + oqs_sidh_iqc_ref_point_clear(temp_P); + oqs_sidh_iqc_ref_point_clear(temp_Q); + oqs_sidh_iqc_ref_point_clear(temp_R); + oqs_sidh_iqc_ref_point_clear(PP); +} + +long oqs_sidh_iqc_ref_elliptic_curve_prime_dlp(const point_t P, + const point_t Q, + const elliptic_curve_t E, + long l) { + if (oqs_sidh_iqc_ref_point_is_zero(Q)) + return 0; + + if (oqs_sidh_iqc_ref_point_equals(P, Q)) + return 1; + + point_t temp; + oqs_sidh_iqc_ref_point_init(temp); + oqs_sidh_iqc_ref_point_set(temp, P); + + long result = -1; + for (long i = 2; i < l; i++) { + oqs_sidh_iqc_ref_point_add(temp, temp, P, E); + if (oqs_sidh_iqc_ref_point_equals(temp, Q)) { + result = i; + break; + } + } + + oqs_sidh_iqc_ref_point_clear(temp); + return result; +} diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve_dlp.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve_dlp.h new file mode 100644 index 0000000000000000000000000000000000000000..8d9c6c4b232d8207364802a0626bb1d11555dbe4 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve_dlp.h @@ -0,0 +1,45 @@ +#ifndef ELLIPTIC_CURVE_DLP_H +#define ELLIPTIC_CURVE_DLP_H + +#include "sidh_elliptic_curve.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Computes the discrete logarithm {@code P = x * Q} in a group of order + * {@code l^e} generated by {@code P}. The Pohlig–Hellman algorithm is used. + * @param x the discrete logarithm if it exists, or -1 otherwise + * @param P the generator of the cyclic group + * @param Q an element in the the group generated by {@code P} + * @param E + * @param l a prime number + * @param e a positive integer + */ +void oqs_sidh_iqc_ref_elliptic_curve_prime_power_dlp(mpz_t x, + const point_t P, + const point_t Q, + const elliptic_curve_t E, + long l, + long e); + +/** + * Computes the discrete logarithm {@code P = x * Q} in a group of order + * {@code l} generated by {@code P}. + * @param P the generator of the cyclic group + * @param Q an element in the the group generated by {@code P} + * @param E + * @param l a prime number + * @return the discrete logarithm if it exists, or -1 otherwise + */ +long oqs_sidh_iqc_ref_elliptic_curve_prime_dlp(const point_t P, + const point_t Q, + const elliptic_curve_t E, + long l); + +#ifdef __cplusplus +} +#endif + +#endif /* ELLIPTIC_CURVE_DLP_H */ diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_isogeny.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_isogeny.c new file mode 100644 index 0000000000000000000000000000000000000000..e5005f8adbed475eee7502821ad72067634e128c --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_isogeny.c @@ -0,0 +1,470 @@ +#include <stdlib.h> +#include <stdio.h> +#include "sidh_isogeny.h" +#include <math.h> + +void oqs_sidh_iqc_ref_isogeny_init(isogeny_t isogeny, + long kernel_size) { + isogeny->kernel_size = 0; + isogeny->partition_size = 0; + oqs_sidh_iqc_ref_isogeny_set_kernel_size(isogeny, kernel_size); + long size = isogeny->partition_size; + isogeny->partition = (point_t *) malloc(size * sizeof(point_t)); + isogeny->gx = (fp2_element_t *) malloc(size * sizeof(fp2_element_t)); + isogeny->gy = (fp2_element_t *) malloc(size * sizeof(fp2_element_t)); + isogeny->u = (fp2_element_t *) malloc(size * sizeof(fp2_element_t)); + isogeny->v = (fp2_element_t *) malloc(size * sizeof(fp2_element_t)); + + oqs_sidh_iqc_ref_elliptic_curve_init(isogeny->domain); + oqs_sidh_iqc_ref_elliptic_curve_init(isogeny->codomain); + + for (long i = 0; i < size; i++) { + oqs_sidh_iqc_ref_point_init(isogeny->partition[i]); + oqs_sidh_iqc_ref_fp2_init(isogeny->gx[i]); + oqs_sidh_iqc_ref_fp2_init(isogeny->gy[i]); + oqs_sidh_iqc_ref_fp2_init(isogeny->u[i]); + oqs_sidh_iqc_ref_fp2_init(isogeny->v[i]); + } +} + +void oqs_sidh_iqc_ref_isogeny_clear(isogeny_t isogeny) { + oqs_sidh_iqc_ref_elliptic_curve_clear(isogeny->domain); + oqs_sidh_iqc_ref_elliptic_curve_clear(isogeny->codomain); + + for (long i = 0; i < isogeny->partition_size; i++) { + oqs_sidh_iqc_ref_point_clear(isogeny->partition[i]); + oqs_sidh_iqc_ref_fp2_clear(isogeny->gx[i]); + oqs_sidh_iqc_ref_fp2_clear(isogeny->gy[i]); + oqs_sidh_iqc_ref_fp2_clear(isogeny->u[i]); + oqs_sidh_iqc_ref_fp2_clear(isogeny->v[i]); + } + + free(isogeny->partition); + free(isogeny->gx); + free(isogeny->gy); + free(isogeny->u); + free(isogeny->v); +} + +void oqs_sidh_iqc_ref_isogeny_compute(isogeny_t isogeny, + const point_t kernel_gen) { + oqs_sidh_iqc_ref_isogeny_partition_kernel(isogeny->partition, + isogeny->partition_size, + kernel_gen, + isogeny->domain); + long size = isogeny->partition_size; + + // compute gx_P = 3 * x_P^2 + a + for (long i = 0; i < size; i++) { + oqs_sidh_iqc_ref_fp2_square(isogeny->gx[i], isogeny->partition[i]->x); + oqs_sidh_iqc_ref_fp2_mul_scaler_si(isogeny->gx[i], isogeny->gx[i], 3); + oqs_sidh_iqc_ref_fp2_add(isogeny->gx[i], isogeny->gx[i], isogeny->domain->a); + } + + // compute gy_P = -2y_P + for (long i = 0; i < size; i++) { + oqs_sidh_iqc_ref_fp2_mul_scaler_si(isogeny->gy[i], isogeny->partition[i]->y, -2); + } + + // compute v_P = gx_P or 2gx_P + for (long i = 0; i < size; i++) { + if (oqs_sidh_iqc_ref_point_has_order_2(isogeny->partition[i])) + oqs_sidh_iqc_ref_fp2_set(isogeny->v[i], isogeny->gx[i]); + else + oqs_sidh_iqc_ref_fp2_mul_scaler_si(isogeny->v[i], isogeny->gx[i], 2); + } + + // compute u_P = gy_P^2 + for (long i = 0; i < size; i++) { + oqs_sidh_iqc_ref_fp2_square(isogeny->u[i], isogeny->gy[i]); + } + + // compute the codomain curve + fp2_element_t v; + fp2_element_t w; + fp2_element_t temp; + oqs_sidh_iqc_ref_fp2_init(v); + oqs_sidh_iqc_ref_fp2_init(w); + oqs_sidh_iqc_ref_fp2_init(temp); + + for (long i = 0; i < size; i++) { + oqs_sidh_iqc_ref_fp2_add(v, v, isogeny->v[i]); + oqs_sidh_iqc_ref_fp2_mul(temp, isogeny->v[i], isogeny->partition[i]->x); + oqs_sidh_iqc_ref_fp2_add(temp, isogeny->u[i], temp); + oqs_sidh_iqc_ref_fp2_add(w, w, temp); + } + + oqs_sidh_iqc_ref_fp2_mul_scaler_si(v, v, 5); + oqs_sidh_iqc_ref_fp2_sub(v, isogeny->domain->a, v); + oqs_sidh_iqc_ref_fp2_mul_scaler_si(w, w, 7); + oqs_sidh_iqc_ref_fp2_sub(w, isogeny->domain->b, w); + oqs_sidh_iqc_ref_elliptic_curve_set_coeffs(isogeny->codomain, v, w); + + oqs_sidh_iqc_ref_fp2_clear(v); + oqs_sidh_iqc_ref_fp2_clear(w); + oqs_sidh_iqc_ref_fp2_clear(temp); +} + +void oqs_sidh_iqc_ref_isogeny_partition_kernel(point_t *partition, + long partition_size, + const point_t kernel_gen, + const elliptic_curve_t E) { + oqs_sidh_iqc_ref_point_set(partition[0], kernel_gen); + for (long i = 1; i < partition_size; i++) { + oqs_sidh_iqc_ref_point_add(partition[i], partition[i - 1], kernel_gen, E); + } +} + +void oqs_sidh_iqc_ref_isogeny_set_kernel_size(isogeny_t isogeny, + long kernel_size) { + long current_size = isogeny->kernel_size; + if (current_size != 0 && current_size <= kernel_size) + return; + + current_size = isogeny->partition_size; + isogeny->kernel_size = kernel_size; + + if (kernel_size % 2 == 0) + isogeny->partition_size = kernel_size / 2; + else + isogeny->partition_size = (kernel_size - 1) / 2; + + // clear the the unused memory after shrinking + for (long i = isogeny->partition_size; i < current_size; i++) { + oqs_sidh_iqc_ref_point_clear(isogeny->partition[i]); + oqs_sidh_iqc_ref_fp2_clear(isogeny->gx[i]); + oqs_sidh_iqc_ref_fp2_clear(isogeny->gy[i]); + oqs_sidh_iqc_ref_fp2_clear(isogeny->u[i]); + oqs_sidh_iqc_ref_fp2_clear(isogeny->v[i]); + } +} + +void oqs_sidh_iqc_ref_isogeny_evaluate_velu(point_t Q, + const isogeny_t isogeny, + const point_t P) { + + if (oqs_sidh_iqc_ref_point_is_zero(P)) { + oqs_sidh_iqc_ref_point_zero(Q); + return; + } + + long size = isogeny->partition_size; + + fp2_element_t temp1; + fp2_element_t temp2; + fp2_element_t temp3; + oqs_sidh_iqc_ref_fp2_init(temp1); + oqs_sidh_iqc_ref_fp2_init(temp2); + oqs_sidh_iqc_ref_fp2_init(temp3); + + point_t result; + oqs_sidh_iqc_ref_point_init(result); + oqs_sidh_iqc_ref_point_set(result, P); + + for (long i = 0; i < size; i++) { + oqs_sidh_iqc_ref_fp2_sub(temp1, P->x, isogeny->partition[i]->x); + + // check if the point is in the kernel + if (oqs_sidh_iqc_ref_fp2_is_zero(temp1)) { + oqs_sidh_iqc_ref_point_zero(result); + break; + } + + // 1 / (x - x_P) + oqs_sidh_iqc_ref_fp2_inv(temp1, temp1); + + // add 1 / (x - x_P) * (v_P + u_P / (x - x_P)) to x + oqs_sidh_iqc_ref_fp2_mul(temp2, isogeny->u[i], temp1); + oqs_sidh_iqc_ref_fp2_add(temp2, temp2, isogeny->v[i]); + oqs_sidh_iqc_ref_fp2_mul(temp2, temp2, temp1); + oqs_sidh_iqc_ref_fp2_add(result->x, result->x, temp2); + + // v_P * (y - y_P) - gx_P * gy_P + oqs_sidh_iqc_ref_fp2_sub(temp2, P->y, isogeny->partition[i]->y); + oqs_sidh_iqc_ref_fp2_mul(temp2, temp2, isogeny->v[i]); + oqs_sidh_iqc_ref_fp2_mul(temp3, isogeny->gx[i], isogeny->gy[i]); + oqs_sidh_iqc_ref_fp2_sub(temp2, temp2, temp3); + + // 2 * u_P * y / (x - x_P) + oqs_sidh_iqc_ref_fp2_mul(temp3, isogeny->u[i], P->y); + oqs_sidh_iqc_ref_fp2_mul_scaler_si(temp3, temp3, 2); + oqs_sidh_iqc_ref_fp2_mul(temp3, temp3, temp1); + + oqs_sidh_iqc_ref_fp2_add(temp3, temp3, temp2); + oqs_sidh_iqc_ref_fp2_square(temp1, temp1); + oqs_sidh_iqc_ref_fp2_mul(temp3, temp3, temp1); + oqs_sidh_iqc_ref_fp2_sub(result->y, result->y, temp3); + } + + oqs_sidh_iqc_ref_point_set(Q, result); + + oqs_sidh_iqc_ref_point_clear(result); + oqs_sidh_iqc_ref_fp2_clear(temp1); + oqs_sidh_iqc_ref_fp2_clear(temp2); + oqs_sidh_iqc_ref_fp2_clear(temp3); +} + +void oqs_sidh_iqc_ref_isogeny_evaluate_kohel(point_t Q, + const isogeny_t isogeny, + const point_t P) { + fp2_element_t ix1; + fp2_element_t ix2; + fp2_element_t ix3; + fp2_element_t temp1; + fp2_element_t temp2; + fp2_element_t temp3; + fp2_element_t sigma1; + + oqs_sidh_iqc_ref_fp2_init(ix1); + oqs_sidh_iqc_ref_fp2_init(ix2); + oqs_sidh_iqc_ref_fp2_init(ix3); + oqs_sidh_iqc_ref_fp2_init(temp1); + oqs_sidh_iqc_ref_fp2_init(temp2); + oqs_sidh_iqc_ref_fp2_init(temp3); + oqs_sidh_iqc_ref_fp2_init(sigma1); + + point_t result; + oqs_sidh_iqc_ref_point_init(result); + oqs_sidh_iqc_ref_point_set(result, P); + + long size = isogeny->partition_size; + + for (long i = 0; i < size; i++) { + oqs_sidh_iqc_ref_fp2_add(sigma1, sigma1, isogeny->partition[i]->x); + oqs_sidh_iqc_ref_fp2_sub(temp1, P->x, isogeny->partition[i]->x); + + // check if the point is in the kernel + if (oqs_sidh_iqc_ref_fp2_is_zero(temp1)) { + oqs_sidh_iqc_ref_point_zero(result); + break; + } + + // 1 / (x - x_P) + oqs_sidh_iqc_ref_fp2_inv(temp1, temp1); + + // 1 / (x - x_P)^2 + oqs_sidh_iqc_ref_fp2_square(temp2, temp1); + + // 1 / (x - x_P)^3 + oqs_sidh_iqc_ref_fp2_mul(temp3, temp2, temp1); + + if (!oqs_sidh_iqc_ref_point_has_order_2(isogeny->partition[i])) { + oqs_sidh_iqc_ref_fp2_add(temp1, temp1, temp1); + oqs_sidh_iqc_ref_fp2_add(temp2, temp2, temp2); + oqs_sidh_iqc_ref_fp2_add(temp3, temp3, temp3); + oqs_sidh_iqc_ref_fp2_add(sigma1, sigma1, isogeny->partition[i]->x); + } + + oqs_sidh_iqc_ref_fp2_add(ix1, ix1, temp1); + oqs_sidh_iqc_ref_fp2_add(ix2, ix2, temp2); + oqs_sidh_iqc_ref_fp2_add(ix3, ix3, temp3); + } + + if (!oqs_sidh_iqc_ref_point_is_zero(result)) { + fp2_element_t u1; + fp2_element_t u2; + + oqs_sidh_iqc_ref_fp2_init(u1); + oqs_sidh_iqc_ref_fp2_init(u2); + + // 3 * x^2 + a + oqs_sidh_iqc_ref_fp2_square(u1, P->x); + oqs_sidh_iqc_ref_fp2_mul_scaler_si(u1, u1, 3); + oqs_sidh_iqc_ref_fp2_add(u1, u1, isogeny->domain->a); + + // 2 * y^2 + oqs_sidh_iqc_ref_fp2_square(u2, P->y); + oqs_sidh_iqc_ref_fp2_mul_scaler_si(u2, u2, 2); + + // compute the first coordinate + oqs_sidh_iqc_ref_fp2_mul_scaler_si(result->x, P->x, isogeny->kernel_size); + oqs_sidh_iqc_ref_fp2_sub(result->x, result->x, sigma1); + oqs_sidh_iqc_ref_fp2_mul(temp1, u1, ix1); + oqs_sidh_iqc_ref_fp2_sub(result->x, result->x, temp1); + oqs_sidh_iqc_ref_fp2_mul(temp1, u2, ix2); + oqs_sidh_iqc_ref_fp2_add(result->x, result->x, temp1); + + // compute the second coordinate + oqs_sidh_iqc_ref_fp2_mul_scaler_si(temp1, P->x, -6); + oqs_sidh_iqc_ref_fp2_mul(result->y, temp1, ix1); + oqs_sidh_iqc_ref_fp2_add_ui(result->y, result->y, isogeny->kernel_size); + oqs_sidh_iqc_ref_fp2_mul_scaler_si(temp1, u1, 3); + oqs_sidh_iqc_ref_fp2_mul(temp1, temp1, ix2); + oqs_sidh_iqc_ref_fp2_add(result->y, result->y, temp1); + oqs_sidh_iqc_ref_fp2_mul_scaler_si(temp1, u2, -2); + oqs_sidh_iqc_ref_fp2_mul(temp1, temp1, ix3); + oqs_sidh_iqc_ref_fp2_add(result->y, result->y, temp1); + oqs_sidh_iqc_ref_fp2_mul(result->y, result->y, P->y); + + oqs_sidh_iqc_ref_fp2_clear(u1); + oqs_sidh_iqc_ref_fp2_clear(u2); + } + + oqs_sidh_iqc_ref_point_set(Q, result); + + oqs_sidh_iqc_ref_point_clear(result); + oqs_sidh_iqc_ref_fp2_clear(ix1); + oqs_sidh_iqc_ref_fp2_clear(ix2); + oqs_sidh_iqc_ref_fp2_clear(ix3); + oqs_sidh_iqc_ref_fp2_clear(temp1); + oqs_sidh_iqc_ref_fp2_clear(temp2); + oqs_sidh_iqc_ref_fp2_clear(temp3); + oqs_sidh_iqc_ref_fp2_clear(sigma1); +} + +void oqs_sidh_iqc_ref_isogeny_evaluate_naive(elliptic_curve_t E, + point_t *points, + long num_points, + const point_t kernel_gen, + long l, + long e, + long isogeny_jump) { + + point_t temp_gen; + oqs_sidh_iqc_ref_point_init(temp_gen); + oqs_sidh_iqc_ref_point_set(temp_gen, kernel_gen); + + mpz_t le; + mpz_init(le); + mpz_ui_pow_ui(le, l, e); + + long kernel_size = 0; + if (e <= isogeny_jump) + kernel_size = mpz_get_si(le); + else + kernel_size = (long) pow(l, isogeny_jump); + + isogeny_t isogeny; + oqs_sidh_iqc_ref_isogeny_init(isogeny, kernel_size); + oqs_sidh_iqc_ref_elliptic_curve_set(isogeny->domain, E); + + long i = 0; + while (i < e) { + mpz_divexact_ui(le, le, kernel_size); + oqs_sidh_iqc_ref_isogeny_evaluate_naive_helper(isogeny, + E, + points, + num_points, + temp_gen, + le); + i += isogeny_jump; + + if ((e - i > 0) && (e - i) < isogeny_jump) { + kernel_size = (long) pow(l, e - i); + oqs_sidh_iqc_ref_isogeny_set_kernel_size(isogeny, kernel_size); + } + } + + oqs_sidh_iqc_ref_point_clear(temp_gen); + mpz_clear(le); + oqs_sidh_iqc_ref_isogeny_clear(isogeny); +} + +void oqs_sidh_iqc_ref_isogeny_evaluate_naive_curve(elliptic_curve_t E, + const point_t kernel_gen, + long l, + long e, + long isogeny_jump) { + oqs_sidh_iqc_ref_isogeny_evaluate_naive(E, NULL, 0, kernel_gen, l, e, isogeny_jump); +} + +void oqs_sidh_iqc_ref_isogeny_evaluate_naive_helper(isogeny_t isogeny, + elliptic_curve_t E, + point_t *points, + long num_points, + point_t kernel_gen, + const mpz_t le) { + point_t K; + oqs_sidh_iqc_ref_point_init(K); + + oqs_sidh_iqc_ref_point_mul_scaler(K, kernel_gen, le, E); + oqs_sidh_iqc_ref_isogeny_compute(isogeny, K); + oqs_sidh_iqc_ref_isogeny_evaluate_kohel(kernel_gen, isogeny, kernel_gen); + + for (long i = 0; i < num_points; i++) { + oqs_sidh_iqc_ref_isogeny_evaluate_kohel(points[i], isogeny, points[i]); + } + + oqs_sidh_iqc_ref_elliptic_curve_set(E, isogeny->codomain); + oqs_sidh_iqc_ref_elliptic_curve_set(isogeny->domain, isogeny->codomain); + + oqs_sidh_iqc_ref_point_clear(K); +} + +void oqs_sidh_iqc_ref_isogeny_evaluate_strategy_rec(elliptic_curve_t E, + point_t *points, + long num_points, + point_t *kernel_gens, + long num_gens, + long l, + long e, + float ratio) { + + if (e == 1) { + isogeny_t isogeny; + + long kernel_size = (long) pow(l, e); + oqs_sidh_iqc_ref_isogeny_init(isogeny, kernel_size); + oqs_sidh_iqc_ref_elliptic_curve_set(isogeny->domain, E); + oqs_sidh_iqc_ref_isogeny_compute(isogeny, kernel_gens[num_gens - 1]); + oqs_sidh_iqc_ref_elliptic_curve_set(E, isogeny->codomain); + + for (long i = 0; i < num_points; i++) { + oqs_sidh_iqc_ref_isogeny_evaluate_velu(points[i], isogeny, points[i]); + } + + for (long i = 0; i < num_gens - 1; i++) { + oqs_sidh_iqc_ref_isogeny_evaluate_velu(kernel_gens[i], + isogeny, + kernel_gens[i]); + } + + oqs_sidh_iqc_ref_isogeny_clear(isogeny); + return; + } + + long r = (long) (ratio * e); + + mpz_t exponent; + mpz_init(exponent); + mpz_ui_pow_ui(exponent, l, r); + + oqs_sidh_iqc_ref_point_mul_scaler(kernel_gens[num_gens], + kernel_gens[num_gens - 1], + exponent, E); + + oqs_sidh_iqc_ref_isogeny_evaluate_strategy_rec(E, points, num_points, kernel_gens, + num_gens + 1, l, e - r, ratio); + oqs_sidh_iqc_ref_isogeny_evaluate_strategy_rec(E, points, num_points, kernel_gens, + num_gens, l, r, ratio); + mpz_clear(exponent); +} + +void oqs_sidh_iqc_ref_isogeny_evaluate_strategy(elliptic_curve_t E, + point_t *points, + long num_points, + const point_t kernel_gen, + long l, + long e, + float ratio) { + + point_t *kernel_gens = (point_t *) malloc(e * sizeof(point_t)); + for (long i = 0; i < e; i++) + oqs_sidh_iqc_ref_point_init(kernel_gens[i]); + oqs_sidh_iqc_ref_point_set(kernel_gens[0], kernel_gen); + + oqs_sidh_iqc_ref_isogeny_evaluate_strategy_rec(E, points, num_points, + kernel_gens, 1, l, e, ratio); + + for (long i = 0; i < e; i++) + oqs_sidh_iqc_ref_point_clear(kernel_gens[i]); + free(kernel_gens); +} + +void oqs_sidh_iqc_ref_isogeny_evaluate_strategy_curve(elliptic_curve_t E, + const point_t kernel_gen, + long l, + long e, + float ratio) { + oqs_sidh_iqc_ref_isogeny_evaluate_strategy(E, NULL, 0, kernel_gen, l, e, ratio); +} diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_isogeny.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_isogeny.h new file mode 100644 index 0000000000000000000000000000000000000000..a1f26122c7dd19079a90a3e97be31bc7f711fdb2 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_isogeny.h @@ -0,0 +1,215 @@ +#ifndef ISOGENY_H +#define ISOGENY_H + +#include "sidh_elliptic_curve.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Representation of an isogeny between two elliptic curve + */ +typedef struct { + // Let the kernel K of the isogeny (excluding the zero point) be the union + // of F and G such that R \in F if and only if -R \in G for all points + // R \in K. Then the partition is F. + point_t *partition; + fp2_element_t *gx; + fp2_element_t *gy; + fp2_element_t *u; + fp2_element_t *v; + elliptic_curve_t domain; + elliptic_curve_t codomain; + long partition_size; + long kernel_size; +} isogeny_struct; + +typedef isogeny_struct isogeny_t[1]; + +/** + * Initializes the isogeny {@code isogeny}. + * @param isogeny + * @param kernel_size + */ +void oqs_sidh_iqc_ref_isogeny_init(isogeny_t isogeny, + long kernel_size); + +/** + * Frees the memory allocated to {@code isogeny}. + * @param isogeny + */ +void oqs_sidh_iqc_ref_isogeny_clear(isogeny_t isogeny); + +/** + * Computes the isogeny from the kernel generated by {@code kernel_gen}. + * @param isogeny + * @param kernel_gen + */ +void oqs_sidh_iqc_ref_isogeny_compute(isogeny_t isogeny, + const point_t kernel_gen); + +/** + * Evaluates {@code isogeny} at the point {@code P}, using Velu's formulas. + * @param Q The result of the evaluation {@code isogeny(P)} + * @param isogeny + * @param P + */ +void oqs_sidh_iqc_ref_isogeny_evaluate_velu(point_t Q, + const isogeny_t isogeny, + const point_t P); + +/** + * Evaluates {@code isogeny} at the point {@code P}, using Kohel's formulas. + * @param Q The result of the evaluation {@code isogeny(P)} + * @param isogeny + * @param P + */ +void oqs_sidh_iqc_ref_isogeny_evaluate_kohel(point_t Q, + const isogeny_t isogeny, + const point_t P); + +/** + * Computes the partition for the isogeny generated by {@code kernel_gen}. + * @see isogeny_struct. + * @param partition + * @param partition_size + * @param kernel_gen + * @param E + */ +void oqs_sidh_iqc_ref_isogeny_partition_kernel(point_t *partition, + long partition_size, + const point_t kernel_gen, + const elliptic_curve_t E); + +/** + * Sets the kernel size for {@code isogeny}. The new kernel size is assumed + * to be smaller than the current kernel size. + * @param isogeny + * @param kernel_size + */ +void oqs_sidh_iqc_ref_isogeny_set_kernel_size(isogeny_t isogeny, + long kernel_size); + +/** + * Computes the images of the elliptic curve {@code E} and the points + * {@code points} through the isogeny with kernel generated by the point + * {@code kernel_gen}. The size of the kernel is {@code l^e}. + * @param E + * @param points + * @param num_points + * @param kernel_gen + * @param l + * @param e the length of the chain of l-isogenies + * @param isogeny_jump the number of successive l-isogenies that should + * be computed at once. For example, if {@code isogeny_jump = 2} then a + * chain of l-isogenies of length e is computed by doing e / 2 {l^2-isogenies}. + */ +void oqs_sidh_iqc_ref_isogeny_evaluate_naive(elliptic_curve_t E, + point_t *points, + long num_points, + const point_t kernel_gen, + long l, + long e, + long isogeny_jump); + +/** + * Computes the images of the elliptic curve {@code E} through the isogeny + * with kernel generated by the point {@code kernel_gen}. + * {@link oqs_sidh_iqc_ref_isogeny_evaluate_naive} + * @param E + * @param kernel_gen + * @param l + * @param e + * @param isogeny_jump + */ +void oqs_sidh_iqc_ref_isogeny_evaluate_naive_curve(elliptic_curve_t E, + const point_t kernel_gen, + long l, + long e, + long isogeny_jump); + +/** + * A helper method for {@link oqs_sidh_iqc_ref_isogeny_evaluate_naive}. All the arguments except + * {@code num_points, le} will be pushed through the isogeny. For example + * {@code E} will be the codomain of the isogeny. This method should not be + * called directly. + * @param isogeny + * @param E + * @param points + * @param num_points + * @param kernel_gen + * @param le + */ +void oqs_sidh_iqc_ref_isogeny_evaluate_naive_helper(isogeny_t isogeny, + elliptic_curve_t E, + point_t *points, + long num_points, + point_t kernel_gen, + const mpz_t le); + +/** + * The recursion for {@link oqs_sidh_iqc_ref_isogeny_evaluate_strategy}. + * @param E + * @param points see {@link oqs_sidh_iqc_ref_isogeny_evaluate_strategy} + * @param num_points see {@link oqs_sidh_iqc_ref_isogeny_evaluate_strategy} + * @param kernel_gens contains the previous kernels computed while going down + * the recursion tree. + * @param num_gens number of elements in {@code kernel_gens} + * @param l + * @param e + * @param ratio see {@link oqs_sidh_iqc_ref_isogeny_evaluate_strategy} + */ +void oqs_sidh_iqc_ref_isogeny_evaluate_strategy_rec(elliptic_curve_t E, + point_t *points, + long num_points, + point_t *kernel_gens, + long num_gens, + long l, + long e, + float ratio); + +/** + * This method implements the optimal strategy approach proposed in the paper + * De Feo, Luca, David Jao, and Jérôme Plût. "Towards quantum-resistant + * cryptosystems from supersingular elliptic curve isogenies". + * @param E + * @param points the points to be evaluated through the isogeny + * @param num_points number of points in {@code points} + * @param kernel_gen the generator of the kernel of the isogeny + * @param l + * @param e + * @param ratio a float in the range (0, 1). This indicates the portions of + * the computation that is done through point multiplication and isogeny + * evaluation. The larger values of {@code ratio} means more multiplication + * and less isogeny evaluation. + */ +void oqs_sidh_iqc_ref_isogeny_evaluate_strategy(elliptic_curve_t E, + point_t *points, + long num_points, + const point_t kernel_gen, + long l, + long e, + float ratio); + +/** + * The same as {@link oqs_sidh_iqc_ref_isogeny_evaluate_strategy} except there is no point + * to evaluate through the isogeny. This method simply calls + * {@link oqs_sidh_iqc_ref_isogeny_evaluate_strategy} with {@code points = NULL, num_points = 0}. + * @param E + * @param kernel_gen + * @param l + * @param e + * @param ratio + */ +void oqs_sidh_iqc_ref_isogeny_evaluate_strategy_curve(elliptic_curve_t E, + const point_t kernel_gen, + long l, + long e, + float ratio); + +#ifdef __cplusplus +} +#endif + +#endif /* ISOGENY_H */ diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_private_key.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_private_key.c new file mode 100644 index 0000000000000000000000000000000000000000..007bb70018530028a22b9b98b1eebee3cd73e7f1 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_private_key.c @@ -0,0 +1,85 @@ +#include "sidh_private_key.h" +#include "sidh_util.h" +#include "sidh_public_param.h" +#include <stdio.h> + +void oqs_sidh_iqc_ref_private_key_init(private_key_t private_key) { + mpz_inits(private_key->m, private_key->n, NULL); +} + +void oqs_sidh_iqc_ref_private_key_clear(private_key_t private_key) { + mpz_clears(private_key->m, private_key->n, NULL); +} + +void oqs_sidh_iqc_ref_private_key_generate(private_key_t private_key, + const public_params_t params) { + gmp_randstate_t randstate; + gmp_randinit_default(randstate); + mpz_t seed; + mpz_init(seed); + oqs_sidh_iqc_ref_get_random_mpz(seed); + gmp_randseed(randstate, seed); + + while (1) { + mpz_urandomm(private_key->m, randstate, params->le); + mpz_urandomm(private_key->n, randstate, params->le); + + if (!mpz_divisible_ui_p(private_key->m, params->l)) + break; + + if (!mpz_divisible_ui_p(private_key->n, params->l)) { + mpz_swap(private_key->m, private_key->n); + break; + } + } + + gmp_randclear(randstate); + mpz_clear(seed); +} + +void oqs_sidh_iqc_ref_private_key_compute_kernel_gen( + point_t gen, const private_key_t private_key, const point_t P, + const point_t Q, const mpz_t le, const elliptic_curve_t E) { + mpz_t temp_m; + mpz_t temp_n; + mpz_init_set(temp_m, private_key->m); + mpz_init_set(temp_n, private_key->n); + + point_t result; + oqs_sidh_iqc_ref_point_init(result); + + mpz_invert(temp_m, temp_m, le); + mpz_mul(temp_n, temp_m, temp_n); + mpz_mod(temp_n, temp_n, le); + + oqs_sidh_iqc_ref_point_mul_scaler(result, Q, temp_n, E); + oqs_sidh_iqc_ref_point_add(result, result, P, E); + oqs_sidh_iqc_ref_point_set(gen, result); + + mpz_clears(temp_m, temp_n, NULL); + oqs_sidh_iqc_ref_point_clear(result); +} + +void oqs_sidh_iqc_ref_private_key_print(const private_key_t private_key) { + printf("m: %s\n", mpz_get_str(NULL, 10, private_key->m)); + printf("n: %s\n", mpz_get_str(NULL, 10, private_key->n)); +} + +void oqs_sidh_iqc_ref_private_key_to_bytes(uint8_t *bytes, + const private_key_t private_key, + long prime_size) { + for (long i = 0; i < 2 * prime_size; i++) + bytes[i] = 0; + + mpz_export(bytes, NULL, -1, 1, 0, 0, private_key->m); + mpz_export(bytes + prime_size, NULL, -1, 1, 0, 0, private_key->n); +} + +void oqs_sidh_iqc_ref_bytes_to_private_key(private_key_t private_key, + const uint8_t *bytes, + long prime_size) { + mpz_set_ui(private_key->m, 0); + mpz_set_ui(private_key->n, 0); + mpz_import(private_key->m, prime_size, -1, 1, 0, 0, bytes); + mpz_import(private_key->n, prime_size, -1, 1, 0, 0, bytes + prime_size); +} diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_private_key.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_private_key.h new file mode 100644 index 0000000000000000000000000000000000000000..b8ca5a10d78bff9790cb12420c478054b95803de --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_private_key.h @@ -0,0 +1,89 @@ +#ifndef PRIVATE_KEY_H +#define PRIVATE_KEY_H + +#include "sidh_elliptic_curve.h" +#include "sidh_public_param.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Representation of the private key in oqs_sidh_iqc_ref + */ +typedef struct { + mpz_t m; + mpz_t n; +} private_key_struct; + +typedef private_key_struct private_key_t[1]; + +/** + * Initializes the private-key. + * @param private_key + */ +void oqs_sidh_iqc_ref_private_key_init(private_key_t private_key); + +/** + * Frees the memory allocated to the private-key. + * @param private_key + */ +void oqs_sidh_iqc_ref_private_key_clear(private_key_t private_key); + +/** + * Generates the private-key. It is guaranteed that {@code private_key->m} + * is comprime to {@code params->l}. + * @param private_key + * @param params + */ +void oqs_sidh_iqc_ref_private_key_generate(private_key_t private_key, + const public_params_t params); + +/** + * Computes a generator for th kernel generated by {@code gen = m * P + n * Q}. + * It is assumed that {@code m} is invertible modulo {@code le}. + * @param gen + * @param P one of the generators of the l^e torsion. + * @param Q one of the generators of the l^e torsion. + * @param private_key + * @param le + * @param E + */ +void oqs_sidh_iqc_ref_private_key_compute_kernel_gen(point_t gen, + const private_key_t private_key, + const point_t P, + const point_t Q, + const mpz_t le, + const elliptic_curve_t E); + +/** + * Converts a private-key to an array of bytes. + * @param bytes + * @param private_key + * @param prime_size + */ +void oqs_sidh_iqc_ref_private_key_to_bytes(uint8_t *bytes, + const private_key_t private_key, + long prime_size); + +/** + * Converts an array of bytes to a private-key. + * @param private_key + * @param bytes + * @param prime_size + */ +void oqs_sidh_iqc_ref_bytes_to_private_key(private_key_t private_key, + const uint8_t *bytes, + long prime_size); + +/** + * Prints {@code private_key} to the standard output. + * @param private_key + */ +void oqs_sidh_iqc_ref_private_key_print(const private_key_t private_key); + +#ifdef __cplusplus +} +#endif + +#endif /* PRIVATE_KEY_H */ diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key.c new file mode 100644 index 0000000000000000000000000000000000000000..77877a66979b0b21646f98391dfeff8756cc68b1 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key.c @@ -0,0 +1,96 @@ +#include "sidh_public_key.h" +#include "sidh_isogeny.h" +#include "sidh_private_key.h" +#include <stdio.h> +#include <math.h> + +void oqs_sidh_iqc_ref_public_key_init(public_key_t public_key) { + oqs_sidh_iqc_ref_elliptic_curve_init(public_key->E); + oqs_sidh_iqc_ref_point_init(public_key->P); + oqs_sidh_iqc_ref_point_init(public_key->Q); +} + +void oqs_sidh_iqc_ref_public_key_clear(public_key_t public_key) { + oqs_sidh_iqc_ref_elliptic_curve_clear(public_key->E); + oqs_sidh_iqc_ref_point_clear(public_key->P); + oqs_sidh_iqc_ref_point_clear(public_key->Q); +} + +void oqs_sidh_iqc_ref_public_key_generate(public_key_t public_key, + const point_t kernel_gen, + const public_params_t paramsA, + const public_params_t paramsB) { + + point_t points[2]; + oqs_sidh_iqc_ref_point_init(points[0]); + oqs_sidh_iqc_ref_point_init(points[1]); + + oqs_sidh_iqc_ref_elliptic_curve_set(public_key->E, paramsA->E); + oqs_sidh_iqc_ref_point_set(points[0], paramsB->P); + oqs_sidh_iqc_ref_point_set(points[1], paramsB->Q); + + oqs_sidh_iqc_ref_isogeny_evaluate_strategy(public_key->E, + points, + 2, + kernel_gen, + paramsA->l, + paramsA->e, + 0.5); + + // oqs_sidh_iqc_ref_isogeny_evaluate_naive(public_key->E, + // points, + // 2, + // kernel_gen, + // paramsA->l, + // paramsA->e, + // 10); + + oqs_sidh_iqc_ref_point_set(public_key->P, points[0]); + oqs_sidh_iqc_ref_point_set(public_key->Q, points[1]); + + oqs_sidh_iqc_ref_point_clear(points[0]); + oqs_sidh_iqc_ref_point_clear(points[1]); +} + +void oqs_sidh_iqc_ref_public_key_print(const public_key_t public_key) { + printf("E: %s\n", oqs_sidh_iqc_ref_elliptic_curve_get_str(public_key->E)); + printf("P: %s\n", oqs_sidh_iqc_ref_point_get_str(public_key->P)); + printf("Q: %s\n", oqs_sidh_iqc_ref_point_get_str(public_key->Q)); +} + +void oqs_sidh_iqc_ref_public_key_to_bytes(uint8_t *bytes, + const public_key_t public_key, + long prime_size) { + long index = 0; + oqs_sidh_iqc_ref_fp2_to_bytes(bytes + index, public_key->E->a, prime_size); + index += 2 * prime_size; + oqs_sidh_iqc_ref_fp2_to_bytes(bytes + index, public_key->E->b, prime_size); + index += 2 * prime_size; + oqs_sidh_iqc_ref_fp2_to_bytes(bytes + index, public_key->P->x, prime_size); + index += 2 * prime_size; + oqs_sidh_iqc_ref_fp2_to_bytes(bytes + index, public_key->P->y, prime_size); + index += 2 * prime_size; + oqs_sidh_iqc_ref_fp2_to_bytes(bytes + index, public_key->Q->x, prime_size); + index += 2 * prime_size; + oqs_sidh_iqc_ref_fp2_to_bytes(bytes + index, public_key->Q->y, prime_size); +} + +void oqs_sidh_iqc_ref_bytes_to_public_key(public_key_t public_key, + const uint8_t *bytes, + long prime_size) { + long index = 0; + oqs_sidh_iqc_ref_bytes_to_fp2(public_key->E->a, bytes + index, prime_size); + index += 2 * prime_size; + oqs_sidh_iqc_ref_bytes_to_fp2(public_key->E->b, bytes + index, prime_size); + index += 2 * prime_size; + oqs_sidh_iqc_ref_bytes_to_fp2(public_key->P->x, bytes + index, prime_size); + index += 2 * prime_size; + oqs_sidh_iqc_ref_bytes_to_fp2(public_key->P->y, bytes + index, prime_size); + index += 2 * prime_size; + oqs_sidh_iqc_ref_bytes_to_fp2(public_key->Q->x, bytes + index, prime_size); + index += 2 * prime_size; + oqs_sidh_iqc_ref_bytes_to_fp2(public_key->Q->y, bytes + index, prime_size); + + public_key->P->z = 1; + public_key->Q->z = 1; +} diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key.h new file mode 100644 index 0000000000000000000000000000000000000000..5b55a4e49c179ef2c03782c924d876240b683ac6 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key.h @@ -0,0 +1,77 @@ +#ifndef PUBLIC_KEY_H +#define PUBLIC_KEY_H + +#include "sidh_public_param.h" +#include "sidh_private_key.h" +#include "sidh_isogeny.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Representation of the public key in oqs_sidh_iqc_ref + */ +typedef struct { + elliptic_curve_t E; + point_t P; + point_t Q; +} public_key_struct; + +typedef public_key_struct public_key_t[1]; + +/** + * Initializes the public-key. + * @param public_key + */ +void oqs_sidh_iqc_ref_public_key_init(public_key_t public_key); + +/** + * Frees the memory allocated to the public-key. + * @param public_key + */ +void oqs_sidh_iqc_ref_public_key_clear(public_key_t public_key); + +/** + * Generates the public-key + * @param public_key + * @param kernel_gen a generator for the kernel of the isogeny + * @param paramsA own params + * @param paramsB other's params + */ +void oqs_sidh_iqc_ref_public_key_generate(public_key_t public_key, + const point_t kernel_gen, + const public_params_t paramsA, + const public_params_t paramsB); + +/** + * Prints {@code public_key} to the standard output. + * @param public_key + */ +void oqs_sidh_iqc_ref_public_key_print(const public_key_t public_key); + +/** + * Converts a public-key to a byte array. + * @param bytes + * @param public_key + * @param prime_size + */ +void oqs_sidh_iqc_ref_public_key_to_bytes(uint8_t *bytes, + const public_key_t public_key, + long prime_size); + +/** + * Converts a byte array to a public-key. + * @param public_key + * @param bytes + * @param prime_size + */ +void oqs_sidh_iqc_ref_bytes_to_public_key(public_key_t public_key, + const uint8_t *bytes, + long prime_size); + +#ifdef __cplusplus +} +#endif + +#endif /* PUBLIC_KEY_H */ diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_encryption.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_encryption.c new file mode 100644 index 0000000000000000000000000000000000000000..ff43ea94c2a245579f08c10222ca7a1f2738c7c2 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_encryption.c @@ -0,0 +1,141 @@ +#include "sidh_public_key_encryption.h" +#include "sidh_public_key.h" +#include "sidh_util.h" +#include "sidh_shared_key.h" +#include <string.h> +#include <stdlib.h> +#include <stdio.h> + +void oqs_sidh_iqc_ref_public_key_ciphertext_init(ciphertext_t ciphertext) { + oqs_sidh_iqc_ref_elliptic_curve_init(ciphertext->E); + oqs_sidh_iqc_ref_point_init(ciphertext->P); + oqs_sidh_iqc_ref_point_init(ciphertext->Q); + ciphertext->size = 0; +} + +void oqs_sidh_iqc_ref_public_key_ciphertext_clear(ciphertext_t ciphertext) { + oqs_sidh_iqc_ref_elliptic_curve_clear(ciphertext->E); + oqs_sidh_iqc_ref_point_clear(ciphertext->P); + oqs_sidh_iqc_ref_point_clear(ciphertext->Q); + free(ciphertext->content); + ciphertext->size = 0; +} + +void oqs_sidh_iqc_ref_public_key_plaintext_init(plaintext_t plaintext) { + plaintext->size = 0; +} + +void oqs_sidh_iqc_ref_public_key_plaintext_clear(plaintext_t plaintext) { + plaintext->size = 0; +} + +int oqs_sidh_iqc_ref_public_key_pad_plaintext(plaintext_t result, + const plaintext_t raw) { + long key_size = oqs_sidh_iqc_ref_public_key_get_key_size(); + long max_msg_size = key_size - 1; + + if (raw->size > key_size) { + printf("\nMessage too large. It should be less than %ld bytes.\n", + max_msg_size); + return -1; + } + + // pad the message + char *new_content = (char *) malloc(max_msg_size); + memset(new_content, 0, max_msg_size); + memcpy(new_content, raw->content, raw->size); + + result->content = new_content; + result->size = max_msg_size; + + return 1; +} + +void oqs_sidh_iqc_ref_public_key_encrypt(ciphertext_t ciphertext, + const plaintext_t plaintext, + const public_key_t public_keyA, + const public_params_t paramsA, + const public_params_t paramsB) { + + private_key_t private_key_temp; + oqs_sidh_iqc_ref_private_key_init(private_key_temp); + oqs_sidh_iqc_ref_private_key_generate(private_key_temp, paramsB); + + point_t kernel_gen; + oqs_sidh_iqc_ref_point_init(kernel_gen); + oqs_sidh_iqc_ref_private_key_compute_kernel_gen(kernel_gen, + private_key_temp, + paramsB->P, + paramsB->Q, + paramsB->le, + paramsB->E); + + public_key_t public_key_temp; + oqs_sidh_iqc_ref_public_key_init(public_key_temp); + oqs_sidh_iqc_ref_public_key_generate(public_key_temp, kernel_gen, paramsB, paramsA); + + fp2_element_t shared_key; + oqs_sidh_iqc_ref_fp2_init(shared_key); + oqs_sidh_iqc_ref_shared_key_generate(shared_key, public_keyA, private_key_temp, paramsB); + char *hash = oqs_sidh_iqc_ref_public_key_encryption_hash(shared_key, plaintext->size); + + ciphertext->content = oqs_sidh_iqc_ref_array_xor(plaintext->content, + hash, plaintext->size); + ciphertext->size = plaintext->size; + oqs_sidh_iqc_ref_elliptic_curve_set(ciphertext->E, public_key_temp->E); + oqs_sidh_iqc_ref_point_set(ciphertext->P, public_key_temp->P); + oqs_sidh_iqc_ref_point_set(ciphertext->Q, public_key_temp->Q); + + oqs_sidh_iqc_ref_private_key_clear(private_key_temp); + oqs_sidh_iqc_ref_point_clear(kernel_gen); + oqs_sidh_iqc_ref_public_key_clear(public_key_temp); + oqs_sidh_iqc_ref_fp2_clear(shared_key); + free(hash); +} + +void oqs_sidh_iqc_ref_public_key_decrypt(plaintext_t plaintext, + const ciphertext_t ciphertext, + const private_key_t private_keyA, + const public_params_t paramsA) { + + public_key_t public_key_temp; + oqs_sidh_iqc_ref_public_key_init(public_key_temp); + oqs_sidh_iqc_ref_elliptic_curve_set(public_key_temp->E, ciphertext->E); + oqs_sidh_iqc_ref_point_set(public_key_temp->P, ciphertext->P); + oqs_sidh_iqc_ref_point_set(public_key_temp->Q, ciphertext->Q); + + fp2_element_t shared_key; + oqs_sidh_iqc_ref_fp2_init(shared_key); + oqs_sidh_iqc_ref_shared_key_generate(shared_key, public_key_temp, private_keyA, paramsA); + char *hash = oqs_sidh_iqc_ref_public_key_encryption_hash(shared_key, ciphertext->size); + + plaintext->content = oqs_sidh_iqc_ref_array_xor(ciphertext->content, hash, + ciphertext->size); + plaintext->size = ciphertext->size; + + oqs_sidh_iqc_ref_public_key_clear(public_key_temp); + oqs_sidh_iqc_ref_fp2_clear(shared_key); + free(hash); +} + +const mp_limb_t *mpz_limbs_read(const mpz_t x); + +char *oqs_sidh_iqc_ref_public_key_encryption_hash(const fp2_element_t value, + long size) { + // compute the size of value in chars + long size_a = mpz_size(value->a) * sizeof(mp_limb_t); + long size_b = mpz_size(value->b) * sizeof(mp_limb_t); + + char *hash = (char *) malloc(size); + + memcpy(hash, (char *) mpz_limbs_read(value->a), size_a); + memcpy(hash + size_a, (char *) mpz_limbs_read(value->b), size_b); + + return hash; +} + +long oqs_sidh_iqc_ref_public_key_get_key_size() { + // the key size is twice as large as the base prime. + long key_size = 2 * mpz_size(characteristic) * sizeof(mp_limb_t); + return key_size; +} diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_encryption.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_encryption.h new file mode 100644 index 0000000000000000000000000000000000000000..c9a97107f0795d6f7125a4e591e69ca5e4b230d0 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_encryption.h @@ -0,0 +1,115 @@ +#ifndef PUBLIC_KEY_ENCRYPTION_H +#define PUBLIC_KEY_ENCRYPTION_H + +#include "sidh_elliptic_curve.h" +#include "sidh_public_key.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Representation of ciphertext in oqs_sidh_iqc_ref + */ +typedef struct { + elliptic_curve_t E; + point_t P; + point_t Q; + char *content; + + // size of the content field + long size; +} ciphertext_struct; + +/** + * Representation of plaintext in oqs_sidh_iqc_ref + */ +typedef struct { + char *content; + + // size of the content field + long size; +} plaintext_struct; + +typedef ciphertext_struct ciphertext_t[1]; +typedef plaintext_struct plaintext_t[1]; + +/** + * Initializes the ciphertext. + * @param ciphertext + */ +void oqs_sidh_iqc_ref_public_key_ciphertext_init(ciphertext_t ciphertext); + +/** + * Frees the memory allocated to {@code ciphertext}. + * @param ciphertext + */ +void oqs_sidh_iqc_ref_public_key_ciphertext_clear(ciphertext_t ciphertext); + +/** + * Initializes the plaintext. + * @param plaintext + */ +void oqs_sidh_iqc_ref_public_key_plaintext_init(plaintext_t plaintext); + +/** + * Frees the memory allocated to {@code plaintext}. + * @param plaintext + */ +void oqs_sidh_iqc_ref_public_key_plaintext_clear(plaintext_t plaintext); + +/** + * Pads a given plain text for encryption. + * @param result the prepared plaintext + * @param raw the given plaintext + * @return 1 if successful, and -1 otherwise + */ +int oqs_sidh_iqc_ref_public_key_pad_plaintext(plaintext_t result, + const plaintext_t raw); + +/** + * Encrypts the {@code plaintext} using {@code public_key}. + * @param ciphertext the generated cipher + * @param plaintext + * @param public_keyA other's public-key + * @param paramsA other's public params + * @param paramsB own pubic params + */ +void oqs_sidh_iqc_ref_public_key_encrypt(ciphertext_t ciphertext, + const plaintext_t plaintext, + const public_key_t public_keyA, + const public_params_t paramsA, + const public_params_t paramsB); + +/** + * Decrypts the {@code ciphertext} using {@code private_key}. + * @param plaintext the result + * @param ciphertext the given ciphertext + * @param private_keyA + * @param paramsA the public parameters associated to the owner of + * the private-key + */ +void oqs_sidh_iqc_ref_public_key_decrypt(plaintext_t plaintext, + const ciphertext_t ciphertext, + const private_key_t private_keyA, + const public_params_t paramsA); + +/** + * Computes the hash of {@code value} + * @param value + * @param size size of the output hash + * @return the hash + */ +char *oqs_sidh_iqc_ref_public_key_encryption_hash(const fp2_element_t value, + long size); + +/** + * @return the key-size in bytes + */ +long oqs_sidh_iqc_ref_public_key_get_key_size(); + +#ifdef __cplusplus +} +#endif + +#endif /* PUBLIC_KEY_ENCRYPTION_H */ diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_validation.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_validation.c new file mode 100644 index 0000000000000000000000000000000000000000..d90f56c5ae3cb15619007fc2443eab2d4982b93e --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_validation.c @@ -0,0 +1,89 @@ +#include "sidh_public_key_validation.h" +#include "sidh_elliptic_curve_dlp.h" +#include <stdio.h> + +int oqs_sidh_iqc_ref_public_key_is_valid(const public_key_t public_key, + const public_params_t params) { + if (!oqs_sidh_iqc_ref_public_key_check_order(public_key->P, public_key->E, params)) + return 0; + + if (!oqs_sidh_iqc_ref_public_key_check_order(public_key->Q, public_key->E, params)) + return 0; + + if (!oqs_sidh_iqc_ref_public_key_check_dependency(public_key, params)) + return 0; + + if (!oqs_sidh_iqc_ref_public_key_check_curve(public_key->E)) + return 0; + + return 1; +} + +int oqs_sidh_iqc_ref_public_key_check_order(const point_t P, + const elliptic_curve_t E, + const public_params_t params) { + mpz_t order; + point_t temp; + + mpz_init_set(order, params->le); + oqs_sidh_iqc_ref_point_init(temp); + + int result = 0; + mpz_divexact_ui(order, order, params->l); + oqs_sidh_iqc_ref_point_mul_scaler(temp, P, order, E); + if (!oqs_sidh_iqc_ref_point_is_zero(temp)) { + oqs_sidh_iqc_ref_point_mul_scaler_si(temp, temp, params->l, E); + if (oqs_sidh_iqc_ref_point_is_zero(temp)) + result = 1; + } + + mpz_clear(order); + oqs_sidh_iqc_ref_point_clear(temp); + return result; +} + +int oqs_sidh_iqc_ref_public_key_check_dependency(const public_key_t public_key, + const public_params_t params) { + mpz_t x; + mpz_init(x); + + int result = 0; + oqs_sidh_iqc_ref_elliptic_curve_prime_power_dlp(x, + public_key->P, + public_key->Q, + public_key->E, + params->l, + params->e); + + if (mpz_cmp_si(x, -1) == 0) { + oqs_sidh_iqc_ref_elliptic_curve_prime_power_dlp(x, + public_key->Q, + public_key->P, + public_key->E, + params->l, + params->e); + if (mpz_cmp_si(x, -1) == 0) + result = 1; + } + + mpz_clear(x); + return result; +} + +int oqs_sidh_iqc_ref_public_key_check_curve(const elliptic_curve_t E) { + point_t temp; + mpz_t exponent; + + oqs_sidh_iqc_ref_point_init(temp); + mpz_init_set(exponent, characteristic); + mpz_add_ui(exponent, exponent, 1); + + oqs_sidh_iqc_ref_elliptic_curve_random_point(temp, E); + oqs_sidh_iqc_ref_point_mul_scaler(temp, temp, exponent, E); + int result = oqs_sidh_iqc_ref_point_is_zero(temp); + + oqs_sidh_iqc_ref_point_clear(temp); + mpz_clear(exponent); + + return result; +} diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_validation.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_validation.h new file mode 100644 index 0000000000000000000000000000000000000000..40fd369b4563d36a21119d2e8d349be89edb343d --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_validation.h @@ -0,0 +1,55 @@ +#ifndef PUBLIC_KEY_VALIDATION_H +#define PUBLIC_KEY_VALIDATION_H + +#include "sidh_elliptic_curve.h" +#include "sidh_public_key.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Check if a given public-key is valid. + * @param public_key + * @param params the other party's public parameters from which + * the public-key is generated. + * @return 1 if the public-key is valid, 0 otherwise + */ +int oqs_sidh_iqc_ref_public_key_is_valid(const public_key_t public_key, + const public_params_t params); + +/** + * Checks if {@code P} has the exact order l^e where l, e are given in + * {@code params}. + * @param P + * @param E + * @param params + * @return 1 if {@code P} has order l^e, 0 otherwise + */ +int oqs_sidh_iqc_ref_public_key_check_order(const point_t P, + const elliptic_curve_t E, + const public_params_t params); + +/** + * Checks if the two point in {@code public-key} are linearly independent. + * @param public_key + * @param params + * @return 1 if the points are linearly independent, 0 otherwise + */ +int oqs_sidh_iqc_ref_public_key_check_dependency(const public_key_t public_key, + const public_params_t params); + +/** + * Checks if a given is valid supersingular curve. A curve is considered + * valid if it has order (p + 1)^2 where p is the characteristic. The test + * is done probabilistically. + * @param E + * @return 1 if the curve is valid, 0 otherwise. + */ +int oqs_sidh_iqc_ref_public_key_check_curve(const elliptic_curve_t E); + +#ifdef __cplusplus +} +#endif + +#endif /* PUBLIC_KEY_VALIDATION_H */ diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_param.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_param.c new file mode 100644 index 0000000000000000000000000000000000000000..260a63b80ea13cd32ae44b212a14b6b1fafbb3d8 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_param.c @@ -0,0 +1,75 @@ +#include <stdio.h> +#include "sidh_public_param.h" + +void oqs_sidh_iqc_ref_public_params_init(public_params_t params) { + mpz_init(params->characteristic); + oqs_sidh_iqc_ref_elliptic_curve_init(params->E); + oqs_sidh_iqc_ref_point_init(params->P); + oqs_sidh_iqc_ref_point_init(params->Q); + mpz_init(params->le); +} + +int oqs_sidh_iqc_ref_public_params_read(public_params_t paramsA, + public_params_t paramsB, + const char **input) { + fp2_element_t a; + fp2_element_t b; + oqs_sidh_iqc_ref_fp2_init(a); + oqs_sidh_iqc_ref_fp2_init(b); + + gmp_sscanf(input[0], "p : %Zd \n", paramsA->characteristic); + mpz_set(paramsB->characteristic, paramsA->characteristic); + gmp_sscanf(input[1], + "E : y^2 = x^3 + (%Zd * i + %Zd) * x + (%Zd * i + %Zd) \n", + a->a, a->b, b->a, b->b); + oqs_sidh_iqc_ref_elliptic_curve_set_coeffs(paramsA->E, a, b); + oqs_sidh_iqc_ref_elliptic_curve_set(paramsB->E, paramsA->E); + gmp_sscanf(input[2], "lA: %ld \n", ¶msA->l); + gmp_sscanf(input[3], "eA: %ld \n", ¶msA->e); + mpz_ui_pow_ui(paramsA->le, paramsA->l, paramsA->e); + gmp_sscanf(input[4], + "PA: (%Zd * i + %Zd, %Zd * i + %Zd) \n", + a->a, a->b, b->a, b->b); + oqs_sidh_iqc_ref_point_set_coordinates(paramsA->P, a, b, 1); + gmp_sscanf(input[5], + "QA: (%Zd * i + %Zd, %Zd * i + %Zd) \n", + a->a, a->b, b->a, b->b); + oqs_sidh_iqc_ref_point_set_coordinates(paramsA->Q, a, b, 1); + gmp_sscanf(input[6], "lB: %ld \n", ¶msB->l); + gmp_sscanf(input[7], "eB: %ld \n", ¶msB->e); + mpz_ui_pow_ui(paramsB->le, paramsB->l, paramsB->e); + gmp_sscanf(input[8], + "PB: (%Zd * i + %Zd, %Zd * i + %Zd) \n", + a->a, a->b, b->a, b->b); + oqs_sidh_iqc_ref_point_set_coordinates(paramsB->P, a, b, 1); + gmp_sscanf(input[9], + "QB: (%Zd * i + %Zd, %Zd * i + %Zd) \n", + a->a, a->b, b->a, b->b); + oqs_sidh_iqc_ref_point_set_coordinates(paramsB->Q, a, b, 1); + + oqs_sidh_iqc_ref_fp2_clear(a); + oqs_sidh_iqc_ref_fp2_clear(b); + + return 1; +} + +void oqs_sidh_iqc_ref_public_params_print(const public_params_t params, + int print_torsion) { + if (print_torsion != 1) { + printf("p : %s\n", mpz_get_str(NULL, 10, params->characteristic)); + printf("E : %s\n", oqs_sidh_iqc_ref_elliptic_curve_get_str(params->E)); + } + + printf("lA: %ld\n", params->l); + printf("eA: %ld\n", params->e); + printf("PA: %s\n", oqs_sidh_iqc_ref_point_get_str(params->P)); + printf("QA: %s\n", oqs_sidh_iqc_ref_point_get_str(params->Q)); +} + +void oqs_sidh_iqc_ref_public_params_clear(public_params_t params) { + mpz_clear(params->characteristic); + oqs_sidh_iqc_ref_elliptic_curve_clear(params->E); + oqs_sidh_iqc_ref_point_clear(params->P); + oqs_sidh_iqc_ref_point_clear(params->Q); + mpz_clear(params->le); +} diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_param.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_param.h new file mode 100644 index 0000000000000000000000000000000000000000..7d594452706db802b254b6fee7d8f9c38a37c669 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_param.h @@ -0,0 +1,68 @@ +#ifndef PUBLIC_PARAM_H +#define PUBLIC_PARAM_H + +#include "sidh_elliptic_curve.h" +#include "sidh_quadratic_ext.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Representation of the public parameters in oqs_sidh_iqc_ref + */ +typedef struct { + // the characteristic + mpz_t characteristic; + + elliptic_curve_t E; + unsigned long l; + unsigned long e; + + // a generator for the l^e torsion subgroup of E + point_t P; + point_t Q; + + // l^e, precomputed + mpz_t le; + +} public_params_struct; + +typedef public_params_struct public_params_t[1]; + +/** + * Initializes the public parameters. + * @param params + */ +void oqs_sidh_iqc_ref_public_params_init(public_params_t params); + +/** + * Reads the public parameters from array pointed by {@code input}. + * @param paramsA + * @param paramsB + * @param input + * @return 1 if the parameters are read successfully, and 0 otherwise. + */ +int oqs_sidh_iqc_ref_public_params_read(public_params_t paramsA, + public_params_t paramsB, + const char **input); + +/** + * Prints the public parameters to the standard output. + * @param params + * @param torsion if it is 1 only the torsion parameters are printed + */ +void oqs_sidh_iqc_ref_public_params_print(const public_params_t params, + int print_torsion); + +/** + * Frees the memory allocated to {@code params}. + * @param params + */ +void oqs_sidh_iqc_ref_public_params_clear(public_params_t params); + +#ifdef __cplusplus +} +#endif + +#endif /* PUBLIC_PARAM_H */ diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_quadratic_ext.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_quadratic_ext.c new file mode 100644 index 0000000000000000000000000000000000000000..990f5466c9c27a7b6f30fad960ba7177c15987eb --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_quadratic_ext.c @@ -0,0 +1,426 @@ +#include "sidh_quadratic_ext.h" +#include "sidh_util.h" +#include <string.h> +#include <stdlib.h> +#include <stdio.h> + +void oqs_sidh_iqc_ref_fp_init_chararacteristic_ui(long p) { + mpz_init_set_ui(characteristic, p); +} + +void oqs_sidh_iqc_ref_fp_init_chararacteristic_str(const char *value) { + mpz_init_set_str(characteristic, value, 10); +} + +void oqs_sidh_iqc_ref_fp_init_chararacteristic(const mpz_t p) { + mpz_init_set(characteristic, p); +} + +void oqs_sidh_iqc_ref_fp_set(mpz_t x, const mpz_t a) { + mpz_mod(x, a, characteristic); +} + +void oqs_sidh_iqc_ref_fp_add(mpz_t x, const mpz_t a, const mpz_t b) { + mpz_add(x, a, b); + mpz_mod(x, x, characteristic); +} + +void oqs_sidh_iqc_ref_fp_add_ui(mpz_t x, const mpz_t a, unsigned long b) { + mpz_add_ui(x, a, b); + mpz_mod(x, x, characteristic); +} + +void oqs_sidh_iqc_ref_fp_sub(mpz_t x, const mpz_t a, const mpz_t b) { + mpz_sub(x, a, b); + mpz_mod(x, x, characteristic); +} + +void oqs_sidh_iqc_ref_fp_sub_ui(mpz_t x, const mpz_t a, unsigned long b) { + mpz_sub_ui(x, a, b); + mpz_mod(x, x, characteristic); +} + +void oqs_sidh_iqc_ref_fp_mul(mpz_t x, const mpz_t a, const mpz_t b) { + mpz_mul(x, a, b); + mpz_mod(x, x, characteristic); +} + +void oqs_sidh_iqc_ref_fp_mul_si(mpz_t x, const mpz_t a, long b) { + mpz_mul_si(x, a, b); + mpz_mod(x, x, characteristic); +} + +void oqs_sidh_iqc_ref_fp_inv(mpz_t x, const mpz_t a) { + mpz_invert(x, a, characteristic); +} + +void oqs_sidh_iqc_ref_fp_div(mpz_t x, const mpz_t a, const mpz_t b) { + oqs_sidh_iqc_ref_fp_inv(x, b); + oqs_sidh_iqc_ref_fp_mul(x, a, x); +} + +void oqs_sidh_iqc_ref_fp_neg(mpz_t x, const mpz_t a) { + oqs_sidh_iqc_ref_fp_sub(x, characteristic, a); +} + +void oqs_sidh_iqc_ref_fp_sqrt(mpz_t x, const mpz_t a) { + mpz_t exponent; + mpz_init(exponent); + + // compute (p + 1) / 4 + mpz_add_ui(exponent, characteristic, 1); + mpz_divexact_ui(exponent, exponent, 4); + + mpz_powm(x, a, exponent, characteristic); + mpz_clear(exponent); +} + +//////////////// fp2 methods ////////////////////////// + +void oqs_sidh_iqc_ref_fp2_init(fp2_element_t x) { mpz_inits(x->a, x->b, NULL); } + +void oqs_sidh_iqc_ref_fp2_init_set_si(fp2_element_t x, long a, long b) { + mpz_init_set_si(x->a, a); + mpz_init_set_si(x->b, b); +} + +void oqs_sidh_iqc_ref_fp2_init_set_str(fp2_element_t x, const char *a, + const char *b) { + mpz_init_set_str(x->a, a, 10); + mpz_init_set_str(x->b, b, 10); +} + +void oqs_sidh_iqc_ref_fp2_init_set(fp2_element_t x, const fp2_element_t a) { + mpz_init_set(x->a, a->a); + mpz_init_set(x->b, a->b); +} + +void oqs_sidh_iqc_ref_fp2_clear(fp2_element_t x) { + mpz_clears(x->a, x->b, NULL); +} + +void oqs_sidh_iqc_ref_fp2_set(fp2_element_t x, const fp2_element_t b) { + mpz_set(x->a, b->a); + mpz_set(x->b, b->b); +} + +void oqs_sidh_iqc_ref_fp2_zero(fp2_element_t x) { + mpz_set_si(x->a, 0); + mpz_set_si(x->b, 0); +} + +void oqs_sidh_iqc_ref_fp2_one(fp2_element_t x) { + mpz_set_si(x->a, 0); + mpz_set_si(x->b, 1); +} + +char *oqs_sidh_iqc_ref_fp2_get_str(const fp2_element_t a) { + + if (mpz_cmp_si(a->a, 0) == 0 && mpz_cmp_si(a->b, 0) == 0) { + return "0"; + } + + if (mpz_cmp_si(a->a, 0) == 0) { + return mpz_get_str(NULL, 10, a->b); + } + + char *result = ""; + + if (mpz_cmp_si(a->b, 0) == 0) { + result = oqs_sidh_iqc_ref_concat(result, mpz_get_str(NULL, 10, a->a)); + result = oqs_sidh_iqc_ref_concat(result, " * i"); + return result; + } + + result = oqs_sidh_iqc_ref_concat(result, mpz_get_str(NULL, 10, a->a)); + result = oqs_sidh_iqc_ref_concat(result, " * i + "); + result = oqs_sidh_iqc_ref_concat(result, mpz_get_str(NULL, 10, a->b)); + + return result; +} + +void oqs_sidh_iqc_ref_fp2_add(fp2_element_t x, const fp2_element_t a, + const fp2_element_t b) { + oqs_sidh_iqc_ref_fp_add(x->a, a->a, b->a); + oqs_sidh_iqc_ref_fp_add(x->b, a->b, b->b); +} + +void oqs_sidh_iqc_ref_fp2_add_ui(fp2_element_t x, const fp2_element_t a, + unsigned long b) { + oqs_sidh_iqc_ref_fp_add_ui(x->b, a->b, b); + oqs_sidh_iqc_ref_fp_set(x->a, a->a); +} + +void oqs_sidh_iqc_ref_fp2_sub(fp2_element_t x, const fp2_element_t a, + const fp2_element_t b) { + oqs_sidh_iqc_ref_fp_sub(x->a, a->a, b->a); + oqs_sidh_iqc_ref_fp_sub(x->b, a->b, b->b); +} + +void oqs_sidh_iqc_ref_fp2_sub_ui(fp2_element_t x, const fp2_element_t a, + unsigned long b) { + oqs_sidh_iqc_ref_fp_sub_ui(x->b, a->b, b); + oqs_sidh_iqc_ref_fp_set(x->a, a->a); +} + +void oqs_sidh_iqc_ref_fp2_mul(fp2_element_t x, const fp2_element_t a, + const fp2_element_t b) { + mpz_t temp1; + mpz_t temp2; + + mpz_init(temp1); + mpz_init(temp2); + + fp2_element_t result; + oqs_sidh_iqc_ref_fp2_init(result); + + // (a + b) * (c + d) + oqs_sidh_iqc_ref_fp_add(temp1, a->a, a->b); + oqs_sidh_iqc_ref_fp_add(temp2, b->a, b->b); + oqs_sidh_iqc_ref_fp_mul(result->a, temp1, temp2); + + // a * c + oqs_sidh_iqc_ref_fp_mul(temp1, a->a, b->a); + // b * d + oqs_sidh_iqc_ref_fp_mul(temp2, a->b, b->b); + + oqs_sidh_iqc_ref_fp_sub(result->a, result->a, temp1); + oqs_sidh_iqc_ref_fp_sub(result->a, result->a, temp2); + oqs_sidh_iqc_ref_fp_sub(result->b, temp2, temp1); + oqs_sidh_iqc_ref_fp2_set(x, result); + + mpz_clear(temp1); + mpz_clear(temp2); + oqs_sidh_iqc_ref_fp2_clear(result); +} + +void oqs_sidh_iqc_ref_fp2_square(fp2_element_t x, const fp2_element_t a) { + mpz_t temp1; + mpz_t temp2; + + mpz_init(temp1); + mpz_init(temp2); + + fp2_element_t result; + oqs_sidh_iqc_ref_fp2_init(result); + + // (b + a) * (b - a) + oqs_sidh_iqc_ref_fp_add(temp1, a->a, a->b); + oqs_sidh_iqc_ref_fp_sub(temp2, a->b, a->a); + oqs_sidh_iqc_ref_fp_mul(result->b, temp1, temp2); + + // 2 * a * b + oqs_sidh_iqc_ref_fp_mul(result->a, a->a, a->b); + oqs_sidh_iqc_ref_fp_mul_si(result->a, result->a, 2); + + oqs_sidh_iqc_ref_fp2_set(x, result); + + mpz_clear(temp1); + mpz_clear(temp2); + oqs_sidh_iqc_ref_fp2_clear(result); +} + +void oqs_sidh_iqc_ref_fp2_pow_ui(fp2_element_t x, const fp2_element_t a, + unsigned long n) { + mpz_t temp_n; + mpz_init_set_ui(temp_n, n); + oqs_sidh_iqc_ref_fp2_pow(x, a, temp_n); + mpz_clear(temp_n); +} + +void oqs_sidh_iqc_ref_fp2_pow(fp2_element_t x, const fp2_element_t a, + const mpz_t n) { + if (mpz_cmp_ui(n, 0) == 0) { + oqs_sidh_iqc_ref_fp2_one(x); + return; + } + + fp2_element_t temp1; + fp2_element_t temp2; + oqs_sidh_iqc_ref_fp2_init_set_si(temp1, 0, 1); + oqs_sidh_iqc_ref_fp2_init_set(temp2, a); + + long num_bits = mpz_sizeinbase(n, 2); + for (long i = 0; i < num_bits; i++) { + if (mpz_tstbit(n, i) == 1) + oqs_sidh_iqc_ref_fp2_mul(temp1, temp1, temp2); + oqs_sidh_iqc_ref_fp2_square(temp2, temp2); + } + + oqs_sidh_iqc_ref_fp2_set(x, temp1); + + oqs_sidh_iqc_ref_fp2_clear(temp1); + oqs_sidh_iqc_ref_fp2_clear(temp2); +} + +void oqs_sidh_iqc_ref_fp2_conjugate(fp2_element_t x, const fp2_element_t a) { + oqs_sidh_iqc_ref_fp2_set(x, a); + oqs_sidh_iqc_ref_fp_neg(x->a, x->a); +} + +void oqs_sidh_iqc_ref_fp2_negate(fp2_element_t x, const fp2_element_t a) { + oqs_sidh_iqc_ref_fp2_set(x, a); + oqs_sidh_iqc_ref_fp_neg(x->a, x->a); + oqs_sidh_iqc_ref_fp_neg(x->b, x->b); +} + +void oqs_sidh_iqc_ref_fp2_mul_scaler(fp2_element_t x, const fp2_element_t a, + const mpz_t scaler) { + oqs_sidh_iqc_ref_fp_mul(x->a, a->a, scaler); + oqs_sidh_iqc_ref_fp_mul(x->b, a->b, scaler); +} + +void oqs_sidh_iqc_ref_fp2_mul_scaler_si(fp2_element_t x, const fp2_element_t a, + long scaler) { + oqs_sidh_iqc_ref_fp_mul_si(x->a, a->a, scaler); + oqs_sidh_iqc_ref_fp_mul_si(x->b, a->b, scaler); +} + +void oqs_sidh_iqc_ref_fp2_inv(fp2_element_t x, const fp2_element_t a) { + mpz_t temp; + fp2_element_t result; + + mpz_init(temp); + oqs_sidh_iqc_ref_fp2_init(result); + + oqs_sidh_iqc_ref_fp2_conjugate(result, a); + oqs_sidh_iqc_ref_fp2_norm(temp, a); + oqs_sidh_iqc_ref_fp_inv(temp, temp); + oqs_sidh_iqc_ref_fp2_mul_scaler(result, result, temp); + oqs_sidh_iqc_ref_fp2_set(x, result); + + mpz_clear(temp); + oqs_sidh_iqc_ref_fp2_clear(result); +} + +void oqs_sidh_iqc_ref_fp2_div(fp2_element_t x, const fp2_element_t a, + const fp2_element_t b) { + fp2_element_t result; + oqs_sidh_iqc_ref_fp2_init(result); + + oqs_sidh_iqc_ref_fp2_inv(result, b); + oqs_sidh_iqc_ref_fp2_mul(result, a, result); + oqs_sidh_iqc_ref_fp2_set(x, result); + + oqs_sidh_iqc_ref_fp2_clear(result); +} + +int oqs_sidh_iqc_ref_fp2_is_zero(const fp2_element_t a) { + return !mpz_cmp_si(a->a, 0) && !mpz_cmp_si(a->b, 0); +} + +int oqs_sidh_iqc_ref_fp2_is_one(const fp2_element_t a) { + return !mpz_cmp_si(a->a, 0) && !mpz_cmp_si(a->b, 1); +} + +int oqs_sidh_iqc_ref_fp2_equals(const fp2_element_t a, const fp2_element_t b) { + return (mpz_cmp(a->a, b->a) == 0) && (mpz_cmp(a->b, b->b) == 0); +} + +void oqs_sidh_iqc_ref_fp2_random(fp2_element_t x, gmp_randstate_t randstate) { + mpz_urandomm(x->a, randstate, characteristic); + mpz_urandomm(x->b, randstate, characteristic); +} + +void oqs_sidh_iqc_ref_fp2_sqrt(fp2_element_t x, const fp2_element_t a) { + mpz_t exponent; + fp2_element_t temp_a; + fp2_element_t b; + fp2_element_t c; + fp2_element_t beta; + mpz_t base_root; + gmp_randstate_t randstate; + + mpz_init(exponent); + oqs_sidh_iqc_ref_fp2_init(temp_a); + oqs_sidh_iqc_ref_fp2_init(b); + oqs_sidh_iqc_ref_fp2_init(c); + oqs_sidh_iqc_ref_fp2_init(beta); + mpz_init(base_root); + gmp_randinit_default(randstate); + + // compute (p - 1) / 2 + mpz_sub_ui(exponent, characteristic, 1); + mpz_divexact_ui(exponent, exponent, 2); + + while (oqs_sidh_iqc_ref_fp2_is_zero(b)) { + oqs_sidh_iqc_ref_fp2_random(c, randstate); + oqs_sidh_iqc_ref_fp2_square(temp_a, c); + oqs_sidh_iqc_ref_fp2_mul(temp_a, temp_a, a); + + // compute 1 + temp_a^((p - 1) / 2) + oqs_sidh_iqc_ref_fp2_pow(b, temp_a, exponent); + oqs_sidh_iqc_ref_fp2_add_ui(b, b, 1); + } + + // compute temp_a * b^2 + oqs_sidh_iqc_ref_fp2_square(beta, b); + oqs_sidh_iqc_ref_fp2_mul(beta, beta, temp_a); + + // beta is now in the prime field + oqs_sidh_iqc_ref_fp_sqrt(base_root, beta->b); + oqs_sidh_iqc_ref_fp2_inv(b, b); + oqs_sidh_iqc_ref_fp2_mul_scaler(b, b, base_root); + oqs_sidh_iqc_ref_fp2_div(x, b, c); + + mpz_clear(exponent); + oqs_sidh_iqc_ref_fp2_clear(temp_a); + oqs_sidh_iqc_ref_fp2_clear(b); + oqs_sidh_iqc_ref_fp2_clear(c); + oqs_sidh_iqc_ref_fp2_clear(beta); + mpz_clear(base_root); + gmp_randclear(randstate); +} + +int oqs_sidh_iqc_ref_fp2_is_square(const fp2_element_t a) { + mpz_t exponent; + mpz_t norm; + fp2_element_t temp; + + mpz_init(exponent); + mpz_init(norm); + oqs_sidh_iqc_ref_fp2_init(temp); + + // a^((p - 1) / 2) + mpz_sub_ui(exponent, characteristic, 1); + mpz_divexact_ui(exponent, exponent, 2); + oqs_sidh_iqc_ref_fp2_pow(temp, a, exponent); + + oqs_sidh_iqc_ref_fp2_norm(norm, temp); + int result = (mpz_cmp_si(norm, 1) == 0); + + mpz_clear(exponent); + mpz_clear(norm); + oqs_sidh_iqc_ref_fp2_clear(temp); + + return result; +} + +void oqs_sidh_iqc_ref_fp2_norm(mpz_t x, const fp2_element_t a) { + mpz_t temp1; + mpz_t temp2; + mpz_inits(temp1, temp2, NULL); + + oqs_sidh_iqc_ref_fp_mul(temp1, a->a, a->a); + oqs_sidh_iqc_ref_fp_mul(temp2, a->b, a->b); + oqs_sidh_iqc_ref_fp_add(temp1, temp1, temp2); + + mpz_set(x, temp1); + mpz_clears(temp1, temp2, NULL); +} + +void oqs_sidh_iqc_ref_fp2_to_bytes(uint8_t *bytes, const fp2_element_t a, + long prime_size) { + for (long i = 0; i < 2 * prime_size; i++) + bytes[i] = 0; + + mpz_export(bytes, NULL, -1, 1, 0, 0, a->a); + mpz_export(bytes + prime_size, NULL, -1, 1, 0, 0, a->b); +} + +void oqs_sidh_iqc_ref_bytes_to_fp2(fp2_element_t a, const uint8_t *bytes, + long prime_size) { + oqs_sidh_iqc_ref_fp2_zero(a); + mpz_import(a->a, prime_size, -1, 1, 0, 0, bytes); + mpz_import(a->b, prime_size, -1, 1, 0, 0, bytes + prime_size); +} diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_quadratic_ext.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_quadratic_ext.h new file mode 100644 index 0000000000000000000000000000000000000000..3d9b4f7623ea61ca7c89777784ebd76cb863a14a --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_quadratic_ext.h @@ -0,0 +1,428 @@ +#ifndef FP2_H +#define FP2_H + +#include <gmp.h> +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +mpz_t characteristic; + +/** + * Representation of elements of the quadratic extension F_(p^2) + * of F_p. We assume F_(p^2) is represented by the quotient + * F_p[X] / (X^2 + 1) which requires X^2 + 1 to be irreducible over F_p. + * The elements are therefore of the form a * i + b where i^2 = -1. + */ +typedef struct { + mpz_t a; + mpz_t b; +} fp2_element_struct; + +typedef fp2_element_struct fp2_element_t[1]; + +//////////////// fp methods ////////////////////////// + +/** + * {@link oqs_sidh_iqc_ref_init_chararacteristic} + * @param p + */ +void oqs_sidh_iqc_ref_fp_init_chararacteristic_ui(long p); + +/** + * {@link oqs_sidh_iqc_ref_init_chararacteristic} + * @param value + */ +void oqs_sidh_iqc_ref_fp_init_chararacteristic_str(const char *value); + +/** + * Initializes the characteristic to {@code p}. + * @param p + */ +void oqs_sidh_iqc_ref_fp_init_chararacteristic(const mpz_t p); + +/** + * Sets {@code x = a}. + * @param x + * @param a + */ +void oqs_sidh_iqc_ref_fp_set(mpz_t x, const mpz_t a); + +/** + * Sets {@code x = a + b}. + * @param x + * @param a + * @param b + */ +void oqs_sidh_iqc_ref_fp_add(mpz_t x, + const mpz_t a, + const mpz_t b); + +/** + * {@link oqs_sidh_iqc_ref_fp_add}. + * @param x + * @param a + * @param b + */ +void oqs_sidh_iqc_ref_fp_add_ui(mpz_t x, + const mpz_t a, + unsigned long b); + +/** + * Sets {@code x = a - b}. + * @param x + * @param a + * @param b + */ +void oqs_sidh_iqc_ref_fp_sub(mpz_t x, + const mpz_t a, + const mpz_t b); + +/** + * {@link oqs_sidh_iqc_ref_fp_sub} + * @param x + * @param a + * @param b + */ +void oqs_sidh_iqc_ref_fp_sub_ui(mpz_t x, + const mpz_t a, + unsigned long b); + +/** + * Sets {@code x = a * b}. + * @param x + * @param a + * @param b + */ +void oqs_sidh_iqc_ref_fp_mul(mpz_t x, + const mpz_t a, + const mpz_t b); + +/** + * {@link oqs_sidh_iqc_ref_fp_mul} + * @param x + * @param a + * @param b + */ +void oqs_sidh_iqc_ref_fp_mul_si(mpz_t x, + const mpz_t a, + long b); + +/** + * Sets {@code x = 1 / a}. This is possible only if {@code a} is + * prime to the characteristic. + * @param x + * @param a + */ +void oqs_sidh_iqc_ref_fp_inv(mpz_t x, + const mpz_t a); + +/** + * Sets {x = a / b}. @see fp_inv. + * @param x + * @param a + * @param b + */ +void oqs_sidh_iqc_ref_fp_div(mpz_t x, + const mpz_t a, + const mpz_t b); + +/** + * Sets {@code x = -a}. + * @param x + * @param a + */ +void oqs_sidh_iqc_ref_fp_neg(mpz_t x, + const mpz_t a); + +/** + * Computes the square root of {@code a}. + * This method works only for p = 3 mod 4. + * @param x the square root + * @param a + */ +void oqs_sidh_iqc_ref_fp_sqrt(mpz_t x, + const mpz_t a); + +//////////////// fp2 methods ////////////////////////// + +/** + * Initializes {@code x} to zero. + * @param x + */ +void oqs_sidh_iqc_ref_fp2_init(fp2_element_t x); + +/** + * Initializes {@code x} to {@code a * i + b}. + * @param x + * @param a + * @param b + */ +void oqs_sidh_iqc_ref_fp2_init_set_si(fp2_element_t x, + long a, + long b); + +/** + * {@link oqs_sidh_iqc_ref_fp2_init_set_si}. + * @param x + * @param a + * @param b + */ +void oqs_sidh_iqc_ref_fp2_init_set_str(fp2_element_t x, + const char *a, + const char *b); + +/** + * Initializes {@code x} to {@code a}. + * @param x + * @param a + */ +void oqs_sidh_iqc_ref_fp2_init_set(fp2_element_t x, + const fp2_element_t a); + +/** + * Frees the memory allocated to {@code x}. + * @param x + */ +void oqs_sidh_iqc_ref_fp2_clear(fp2_element_t x); + +/** + * Copies {@code a} into {@code x}. + * @param x + * @param b + */ +void oqs_sidh_iqc_ref_fp2_set(fp2_element_t x, + const fp2_element_t b); + +/** + * Sets {@code a = 0} + * @param x + */ +void oqs_sidh_iqc_ref_fp2_zero(fp2_element_t x); + +/** + * Sets {@code x = 1}. + * @param x + */ +void oqs_sidh_iqc_ref_fp2_one(fp2_element_t x); + +/** + * @param a + * @return the string representation of {@code a} + */ +char *oqs_sidh_iqc_ref_fp2_get_str(const fp2_element_t a); + +/** + * Sets {@code x = a + b}. + * @param x + * @param a + * @param b + */ +void oqs_sidh_iqc_ref_fp2_add(fp2_element_t x, + const fp2_element_t a, + const fp2_element_t b); + +/** + * {@link oqs_sidh_iqc_ref_fp2_add} + * @param x + * @param a + * @param b + */ +void oqs_sidh_iqc_ref_fp2_add_ui(fp2_element_t x, + const fp2_element_t a, + unsigned long b); + +/** + * Sets {@code x = a - b}. + * @param x + * @param a + * @param b + */ +void oqs_sidh_iqc_ref_fp2_sub(fp2_element_t x, + const fp2_element_t a, + const fp2_element_t b); + +/** + * {@link oqs_sidh_iqc_ref_fp2_sub} + * @param x + * @param a + * @param b + */ +void oqs_sidh_iqc_ref_fp2_sub_ui(fp2_element_t x, + const fp2_element_t a, + unsigned long b); + +/** + * Sets {@code x = a * b}. + * @param x + * @param a + * @param b + */ +void oqs_sidh_iqc_ref_fp2_mul(fp2_element_t x, + const fp2_element_t a, + const fp2_element_t b); + +/** + * Sets {@code x = a^2}. + * @param x + * @param a + */ +void oqs_sidh_iqc_ref_fp2_square(fp2_element_t x, + const fp2_element_t a); + +/** + * {@link oqs_sidh_iqc_ref_fp2_pow} + */ +void oqs_sidh_iqc_ref_fp2_pow_ui(fp2_element_t x, + const fp2_element_t a, + unsigned long n); + +/** + * Sets {@code x = a^n}. + * @param x + * @param a + * @param n + */ +void oqs_sidh_iqc_ref_fp2_pow(fp2_element_t x, + const fp2_element_t a, + const mpz_t n); + +/** + * Sets {@code x = 1 / a}. + * @param x + * @param a + */ +void oqs_sidh_iqc_ref_fp2_inv(fp2_element_t x, + const fp2_element_t a); + +/** + * Sets {@code x = a / b}. + * @param x + * @param a + * @param b + */ +void oqs_sidh_iqc_ref_fp2_div(fp2_element_t x, + const fp2_element_t a, + const fp2_element_t b); + +/** + * Sets {@code x = -u * i + v} where {@code a = u * i + v}. + * @param x + * @param a + */ +void oqs_sidh_iqc_ref_fp2_conjugate(fp2_element_t x, + const fp2_element_t a); + +/** + * Sets {@code x = -a}. + * @param x + * @param a + */ +void oqs_sidh_iqc_ref_fp2_negate(fp2_element_t x, + const fp2_element_t a); + +/** + * Sets {@code x = a * scaler}. + * @param x + * @param a + * @param scaler + */ +void oqs_sidh_iqc_ref_fp2_mul_scaler(fp2_element_t x, + const fp2_element_t a, + const mpz_t scaler); + +/** + * {@link oqs_sidh_iqc_ref_fp2_mul_scaler} + * @param x + * @param a + * @param scaler + */ +void oqs_sidh_iqc_ref_fp2_mul_scaler_si(fp2_element_t x, + const fp2_element_t a, + long scaler); + +/** + * Checks if {@code a} is zero. + * @param a + * @return 1 if {@code a == 0}, and 0 otherwise + */ +int oqs_sidh_iqc_ref_fp2_is_zero(const fp2_element_t a); + +/** + * Checks if {@code a} is one. + * @param a + * @return 1 if {@code a == 1}, and 0 otherwise + */ +int oqs_sidh_iqc_ref_fp2_is_one(const fp2_element_t a); + +/** + * Checks if {@code a == b}. + * @param a + * @param b + * @return 1 if {@code a == b}, and 0 otherwise. + */ +int oqs_sidh_iqc_ref_fp2_equals(const fp2_element_t a, + const fp2_element_t b); + +/** + * Generates a random element in the quadratic extension. + * @param x the generated random element + * @param randstate + */ +void oqs_sidh_iqc_ref_fp2_random(fp2_element_t x, + gmp_randstate_t randstate); + +/** + * Computes the square root of {@code a}. + * The algorithm is based on + * Doliskani & Schost, Taking Roots over High Extensions of Finite Fields, 2011. + * It works for any characteristic, but since it uses {@link oqs_sidh_iqc_ref_fp_sqrt} for + * base-case square root, it is limited to p = 3 mod 4. + * @param x the square root + * @param a + */ +void oqs_sidh_iqc_ref_fp2_sqrt(fp2_element_t x, + const fp2_element_t a); + +/** + * Checks if {@code a} is a square. + * @param a + * @return 1 if {@code a} is a square, 0 otherwise + */ +int oqs_sidh_iqc_ref_fp2_is_square(const fp2_element_t a); + +/** + * Computes the norm of {@code x = b * i + c} which is b^2 + c^2. + * @param x the computed norm + * @param a + */ +void oqs_sidh_iqc_ref_fp2_norm(mpz_t x, + const fp2_element_t a); + +/** + * Converts bytes an fp2 element to a byte array. + * @param bytes + * @param a + * @param prime_size + */ +void oqs_sidh_iqc_ref_fp2_to_bytes(uint8_t *bytes, + const fp2_element_t a, + long prime_size); + +/** + * Converts a byte array to an fp2 element. + * @param a + * @param bytes + * @param prime_size + */ +void oqs_sidh_iqc_ref_bytes_to_fp2(fp2_element_t a, + const uint8_t *bytes, + long prime_size); + +#ifdef __cplusplus +} +#endif + +#endif /* FP2_H */ diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_shared_key.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_shared_key.c new file mode 100644 index 0000000000000000000000000000000000000000..1dbc7bcdd969eb9c5ea07a6cbbd0bfc20f9ce057 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_shared_key.c @@ -0,0 +1,30 @@ +#include "sidh_shared_key.h" +#include "sidh_isogeny.h" + +void oqs_sidh_iqc_ref_shared_key_generate(fp2_element_t shared_key, + const public_key_t public_key, + const private_key_t private_key, + const public_params_t params) { + + point_t kernel_gen; + oqs_sidh_iqc_ref_point_init(kernel_gen); + + // compute a generator for the kernel of the isogeny + oqs_sidh_iqc_ref_private_key_compute_kernel_gen(kernel_gen, + private_key, + public_key->P, + public_key->Q, + params->le, + public_key->E); + elliptic_curve_t E; + oqs_sidh_iqc_ref_elliptic_curve_init(E); + oqs_sidh_iqc_ref_elliptic_curve_set(E, public_key->E); + + oqs_sidh_iqc_ref_isogeny_evaluate_strategy_curve(E, kernel_gen, params->l, params->e, 0.5); + // oqs_sidh_iqc_ref_isogeny_evaluate_naive_curve(E, kernel_gen, params->l, params->e, 3); + + oqs_sidh_iqc_ref_elliptic_curve_compute_j_inv(shared_key, E); + + oqs_sidh_iqc_ref_point_clear(kernel_gen); + oqs_sidh_iqc_ref_elliptic_curve_clear(E); +} diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_shared_key.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_shared_key.h new file mode 100644 index 0000000000000000000000000000000000000000..4b4547816f0a57c73d6db64d8cea6e017285557f --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_shared_key.h @@ -0,0 +1,27 @@ +#ifndef SHARED_KEY_H +#define SHARED_KEY_H + +#include "sidh_private_key.h" +#include "sidh_public_key.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Generates the shared-key. + * @param shared_key the generated shared-key + * @param public_key other's public-key + * @param private_key own private-key + * @param params own parameters + */ +void oqs_sidh_iqc_ref_shared_key_generate(fp2_element_t shared_key, + const public_key_t public_key, + const private_key_t private_key, + const public_params_t params); + +#ifdef __cplusplus +} +#endif + +#endif /* SHARED_KEY_H */ diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_util.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_util.c new file mode 100644 index 0000000000000000000000000000000000000000..8f68a36f7dd791a27c15f31d7e542f5c31a4f45b --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_util.c @@ -0,0 +1,37 @@ +#include <stdlib.h> +#include <stdio.h> +#include <gmp.h> +#include <time.h> +#include <oqs/rand.h> + +#include "sidh_util.h" + +char *oqs_sidh_iqc_ref_concat(char *str1, const char *str2) { + char *temp = (char *) malloc(strlen(str1) + strlen(str2) + 1); + strcpy(temp, str1); + strcat(temp, str2); + return temp; +} + +char *oqs_sidh_iqc_ref_get_random_str(int num_bytes) { + char *rand_value = (char *) malloc(num_bytes); + OQS_RAND *rand = OQS_RAND_new(OQS_RAND_alg_urandom_chacha20); + OQS_RAND_n(rand, (uint8_t *) rand_value, num_bytes); + + return rand_value; +} + +void oqs_sidh_iqc_ref_get_random_mpz(mpz_t x) { + int num_bytes = 20; + char *a = oqs_sidh_iqc_ref_get_random_str(num_bytes); + mpz_import(x, num_bytes, 1, sizeof(char), 0, 0, a); +} + +char *oqs_sidh_iqc_ref_array_xor(const char *array1, const char *array2, + long lenght) { + char *result = (char *) malloc(lenght); + for (long i = 0; i < lenght; i++) + result[i] = array1[i] ^ array2[i]; + + return result; +} diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_util.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_util.h new file mode 100644 index 0000000000000000000000000000000000000000..e0373498b5a86024a128b2d304e48de7e47b9fe4 --- /dev/null +++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_util.h @@ -0,0 +1,45 @@ +#ifndef UTIL_H +#define UTIL_H + +#include <string.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Concatenates two strings. + * @param str1 + * @param str2 + * @return the concatenation of {@code str1, str2} + */ +char *oqs_sidh_iqc_ref_concat(char *str1, + const char *str2); + +/** + * Generates a random char array of length {@code num_bytes}. + * @param num_bytes + * @return a random char array of length {@code num_bytes} + */ +char *oqs_sidh_iqc_ref_get_random_str(int num_bytes); + +/** + * @param x a randomly generated 160bit integer + */ +void oqs_sidh_iqc_ref_get_random_mpz(mpz_t x); + +/** + * @param array1 + * @param array2 + * @param lenght + * @return the bitwise xor of the two arrays + */ +char *oqs_sidh_iqc_ref_array_xor(const char *array1, + const char *array2, + long lenght); + +#ifdef __cplusplus +} +#endif + +#endif /* UTIL_H */ diff --git a/crypt/liboqs/sig/Makefile.am b/crypt/liboqs/sig/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..bc279ca069d4258d89a225ddb9b5e89b04014741 --- /dev/null +++ b/crypt/liboqs/sig/Makefile.am @@ -0,0 +1,8 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = libsig.la + +libsig_la_SOURCES = sig.c + +libsig_la_CPPFLAGS = -I../../include +libsig_la_CPPFLAGS += $(AM_CPPFLAGS) + diff --git a/crypt/liboqs/sig/sig.c b/crypt/liboqs/sig/sig.c new file mode 100644 index 0000000000000000000000000000000000000000..bc6d49cb1091104ed120f51a93e114c56f6676e8 --- /dev/null +++ b/crypt/liboqs/sig/sig.c @@ -0,0 +1,71 @@ +#include <assert.h> +#include <oqs/common.h> +#include <oqs/sig.h> +#ifdef ENABLE_SIG_PICNIC +#include <oqs/sig_picnic.h> +#endif + +OQS_SIG *OQS_SIG_new(OQS_RAND *rand, enum OQS_SIG_algid algid) { + if (rand == NULL) { + return NULL; + } + + OQS_SIG *s = malloc(sizeof(OQS_SIG)); + if (s == NULL) { + return NULL; + } + s->rand = rand; + + switch (algid) { +#ifdef ENABLE_SIG_PICNIC + case OQS_SIG_picnic_L1_FS: + case OQS_SIG_picnic_L1_UR: + case OQS_SIG_picnic_L3_FS: + case OQS_SIG_picnic_L3_UR: + case OQS_SIG_picnic_L5_FS: + case OQS_SIG_picnic_L5_UR: + case OQS_SIG_picnic_default: + if (OQS_SIG_picnic_get(s, algid) != OQS_SUCCESS) { + free(s); + return NULL; + } + break; +#endif + default: + free(s); + return NULL; + } + + return s; +} + +int OQS_SIG_keygen(const OQS_SIG *s, uint8_t *priv, uint8_t *pub) { + if (s == NULL) { + return OQS_ERROR; + } else { + return s->keygen(s, priv, pub); + } +} + +int OQS_SIG_sign(const OQS_SIG *s, const uint8_t *priv, const uint8_t *msg, const size_t msg_len, uint8_t *sig, size_t *sig_len) { + if (s == NULL) { + return OQS_ERROR; + } else { + return s->sign(s, priv, msg, msg_len, sig, sig_len); + } +} + +int OQS_SIG_verify(const OQS_SIG *s, const uint8_t *pub, const uint8_t *msg, const size_t msg_len, const uint8_t *sig, const size_t sig_len) { + if (s == NULL) { + return OQS_ERROR; + } else { + return s->verify(s, pub, msg, msg_len, sig, sig_len); + } +} + +void OQS_SIG_free(OQS_SIG *s) { + if (s == NULL) { + return; + } + free(s); +} diff --git a/crypt/liboqs/sig/sig.h b/crypt/liboqs/sig/sig.h new file mode 100644 index 0000000000000000000000000000000000000000..60d9199dc3e9f5cb6f2ca66dd5cb589f1b7e8373 --- /dev/null +++ b/crypt/liboqs/sig/sig.h @@ -0,0 +1,166 @@ +/** + * \file sig.h + * \brief Header defining the API for generic OQS Signature + */ + +#ifndef __OQS_SIG_H +#define __OQS_SIG_H + +#include <stddef.h> +#include <stdint.h> +#include <oqs/rand.h> + +/** + * Supported signature algorithms. + * Note: the Picnic algs are not wrapped with a ENABLE_SIG_PICNIC + * to avoid forcing calling apps to define the macro. The library + * compiled without the macro fails if these algid are requested. + */ +enum OQS_SIG_algid { + /* Picnic sig algs */ + OQS_SIG_picnic_default, // equivalent to OQS_SIG_picnic_L1_FS + OQS_SIG_picnic_L1_FS, + OQS_SIG_picnic_L1_UR, + OQS_SIG_picnic_L3_FS, + OQS_SIG_picnic_L3_UR, + OQS_SIG_picnic_L5_FS, + OQS_SIG_picnic_L5_UR, +}; + +/** + * OQS signature object + */ +typedef struct OQS_SIG OQS_SIG; // so the code below compiles... +struct OQS_SIG { + + /** + * PRNG + */ + OQS_RAND *rand; + + /** + * Specifies the name of the signature method + */ + char *method_name; + + /** + * Classical security in terms of the number of bits provided by the + * signature method. + */ + uint16_t estimated_classical_security; + + /** + * Equivalent quantum security in terms of the number of bits provided by the + * signature method. + */ + uint16_t estimated_quantum_security; + + /** + * Private key length. + */ + uint16_t priv_key_len; + + /** + * Public key length. + */ + uint16_t pub_key_len; + + /** + * Maximum signature length. + */ + uint32_t max_sig_len; + + /** + * Opaque pointer for passing around any computation context + */ + void *ctx; + + /** + * Pointer to a function for public and private signature key generation. + * + * @param s The signature structure. + * @param priv The signer's private key. + * @param pub The signer's public key. + * @return OQS_SUCCESS on success, or OQS_ERROR on failure. + */ + int (*keygen)(const OQS_SIG *s, uint8_t *priv, uint8_t *pub); + + /** + * Pointer to a function for signature generation. + * + * @param s The signature structure. + * @param priv The signer's private key. + * @param msg The message to sign. + * @param msg_len Length of the message to sign. + * @param sig The generated signature. Must be allocated by the caller, or NULL to learn how much space is needed, as returned in sig_len. + * @param sig_len In: length of sig, out: length of the generated signature. + * @return OQS_SUCCESS on success, or OQS_ERROR on failure. + */ + int (*sign)(const OQS_SIG *s, const uint8_t *priv, const uint8_t *msg, const size_t msg_len, uint8_t *sig, size_t *sig_len); + + /** + * Pointer to a function for signature verification. + * + * @param s The signature structure. + * @param pub The signer's public key. + * @param msg The signed message. + * @param msg_len Length of the signed message. + * @param sig The signature to verify. + * @param sig_len Length of the signature to verify. + @return OQS_SUCCESS on success, or OQS_ERROR on failure. + */ + int (*verify)(const OQS_SIG *s, const uint8_t *pub, const uint8_t *msg, const size_t msg_len, const uint8_t *sig, const size_t sig_len); +}; + +/** + * Instantiate a new signature object. + * + * @param rand The random number generator. + * @param algid The id of the signature algorithm to be instantiated. + * @return A new signature object on success, or NULL on failure. + */ +OQS_SIG *OQS_SIG_new(OQS_RAND *rand, enum OQS_SIG_algid algid); + +/** + * Generates a new signature key pair. + * @param s Pointer to the signature object. + * @param priv Pointer where the generated private key will be stored. Caller + * must have allocated s->priv_key_len bytes. + * @param pub Pointer where the generated public key will be stored. Caller + * must have allocated s->pub_key_len bytes. + * @return OQS_SUCCESS on success, or OQS_ERROR on failure + */ +int OQS_SIG_keygen(const OQS_SIG *s, uint8_t *priv, uint8_t *pub); + +/** + * Generates a new signature. + * @param s Pointer to the signature object. + * @param priv Pointer to the signer's private key, of expected length `s->priv_key_len` bytes. + * @param msg Pointer to the message to sign. + * @param msg_len Length of the message to sign `msg`. + * @param sig Pointer where the generated signature will be stored. Caller must have allocated `s->max_sig_len` bytes. + * @param sig_len Pointer to the length of the generated signature. + * @return OQS_SUCCESS on success, or OQS_ERROR on failure + */ +int OQS_SIG_sign(const OQS_SIG *s, const uint8_t *priv, const uint8_t *msg, const size_t msg_len, uint8_t *sig, size_t *sig_len); + +/** + * Verifies a signature. + * @param s Pointer to the signature object. + * @param pub Pointer to the signer's public key, of expected length `s->pub_key_len` bytes. + * @param msg Pointer to the signed message. + * @param msg_len Length of the signed message `msg`. + * @param sig Pointer to the signature. + * @param sig_len Length of the signature. + * @return OQS_SUCCESS on success, or OQS_ERROR on failure + */ +int OQS_SIG_verify(const OQS_SIG *s, const uint8_t *pub, const uint8_t *msg, const size_t msg_len, const uint8_t *sig, const size_t sig_len); + +/** + * Frees the signature object, de-initializing the underlying library code. + * Does NOT free the rand object passed to OQS_SIG_new. + * @param s The signature object. + */ +void OQS_SIG_free(OQS_SIG *s); + +#endif diff --git a/crypt/liboqs/sig/test_sig.c b/crypt/liboqs/sig/test_sig.c new file mode 100644 index 0000000000000000000000000000000000000000..3507278866c6d39ffafe4a9183666cfe43dfbfcc --- /dev/null +++ b/crypt/liboqs/sig/test_sig.c @@ -0,0 +1,353 @@ +#if defined(WINDOWS) +#pragma warning(disable : 4244 4293) +#endif + +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <oqs/common.h> +#include <oqs/sig.h> +#include <oqs/rand.h> + +#include "../ds_benchmark.h" +#include "../common/common.h" + +// TODO: add signature size to benchmark + +struct sig_testcase { + enum OQS_SIG_algid algid; + char *algid_name; + int run; + int iter; +}; + +/* Add new testcases here */ +#ifdef ENABLE_SIG_PICNIC +struct sig_testcase sig_testcases[] = { + {OQS_SIG_picnic_L1_FS, "picnic_L1_FS", 0, 10}, + {OQS_SIG_picnic_L1_UR, "picnic_L1_UR", 0, 10}, + {OQS_SIG_picnic_L3_FS, "picnic_L3_FS", 0, 10}, + {OQS_SIG_picnic_L3_UR, "picnic_L3_UR", 0, 10}, + {OQS_SIG_picnic_L5_FS, "picnic_L5_FS", 0, 10}, + {OQS_SIG_picnic_L5_UR, "picnic_L5_UR", 0, 10}, +}; +#endif + +#define SIG_TEST_ITERATIONS 100 +#define SIG_BENCH_SECONDS 1 + +#define PRINT_HEX_STRING(label, str, len) \ + { \ + printf("%-20s (%4zu bytes): ", (label), (size_t)(len)); \ + for (size_t i = 0; i < (len); i++) { \ + printf("%02X", ((unsigned char *) (str))[i]); \ + } \ + printf("\n"); \ + } + +#define PRINT_PARTIAL_HEX_STRING(label, str, len, sublen) \ + { \ + printf("%-20s (%4zu bytes): ", (label), (size_t)(len)); \ + for (size_t i = 0; i < (sublen); i++) { \ + printf("%02X", ((unsigned char *) (str))[i]); \ + } \ + printf("..."); \ + for (size_t i = 0; i < (sublen); i++) { \ + printf("%02X", ((unsigned char *) (str))[len - sublen + i]); \ + } \ + printf("\n"); \ + } + +static int sig_test_correctness(OQS_RAND *rand, enum OQS_SIG_algid algid, const int print) { + + int rc; + + uint8_t *priv = NULL; + uint8_t *pub = NULL; + uint8_t *msg = NULL; + size_t msg_len; + uint8_t *sig = NULL; + size_t sig_len; + + /* setup signature object */ + OQS_SIG *s = OQS_SIG_new(rand, algid); + if (s == NULL) { + eprintf("sig new failed\n"); + goto err; + } + + if (print) { + printf("================================================================================\n"); + printf("Sample computation for signature method %s\n", s->method_name); + printf("================================================================================\n"); + } + + /* key generation */ + priv = malloc(s->priv_key_len); + if (priv == NULL) { + eprintf("priv malloc failed\n"); + goto err; + } + pub = malloc(s->pub_key_len); + if (pub == NULL) { + eprintf("pub malloc failed\n"); + goto err; + } + + rc = OQS_SIG_keygen(s, priv, pub); + if (rc != 1) { + eprintf("OQS_SIG_keygen failed\n"); + goto err; + } + + if (print) { + PRINT_HEX_STRING("Private key", priv, s->priv_key_len) + PRINT_HEX_STRING("Public key", pub, s->pub_key_len) + } + + /* Generate message to sign */ + msg_len = 100; // FIXME TODO: randomize based on scheme's max length + msg = malloc(msg_len); + if (msg == NULL) { + eprintf("msg malloc failed\n"); + goto err; + } + OQS_RAND_n(rand, msg, msg_len); + if (print) { + PRINT_HEX_STRING("Message", msg, msg_len) + } + + /* Signature */ + sig_len = s->max_sig_len; + sig = malloc(sig_len); + if (sig == NULL) { + eprintf("sig malloc failed\n"); + goto err; + } + + rc = OQS_SIG_sign(s, priv, msg, msg_len, sig, &sig_len); + if (rc != 1) { + eprintf("OQS_SIG_sign failed\n"); + goto err; + } + + if (print) { + if (sig_len > 40) { + // only print the parts of the sig if too long + PRINT_PARTIAL_HEX_STRING("Signature", sig, sig_len, 20); + } + } + + /* Verification */ + rc = OQS_SIG_verify(s, pub, msg, msg_len, sig, sig_len); + if (rc != 1) { + eprintf("ERROR: OQS_SIG_verify failed\n"); + goto err; + } + + if (print) { + printf("Signature is valid.\n"); + printf("\n\n"); + } + + rc = 1; + goto cleanup; + +err: + rc = 0; + +cleanup: + if (msg != NULL) { + free(msg); + } + if (sig != NULL) { + free(sig); + } + if (pub != NULL) { + free(pub); + } + if (priv != NULL) { + free(priv); + } + if (s != NULL) { + OQS_SIG_free(s); + } + + return rc; +} + +static int sig_test_correctness_wrapper(OQS_RAND *rand, enum OQS_SIG_algid algid, int iterations, bool quiet) { + int ret; + ret = sig_test_correctness(rand, algid, !quiet); + if (ret != 1) { + goto err; + } + + printf("Testing correctness and randomness of signature for %d iterations\n", iterations); + for (int i = 0; i < iterations; i++) { + ret = sig_test_correctness(rand, algid, 0); + if (ret != 1) { + goto err; + } + } + printf("All signatures were valid.\n"); + printf("\n\n"); + return 1; +err: + return ret; +} + +static int sig_bench_wrapper(OQS_RAND *rand, enum OQS_SIG_algid algid, const int seconds) { + int rc; + + uint8_t *priv = NULL; + uint8_t *pub = NULL; + uint8_t *msg = NULL; + size_t msg_len; + uint8_t *sig = NULL; + size_t sig_len; + + /* setup signature object */ + OQS_SIG *s = OQS_SIG_new(rand, algid); + if (s == NULL) { + eprintf("sig new failed\n"); + goto err; + } + + /* key generation */ + priv = malloc(s->priv_key_len); + if (priv == NULL) { + eprintf("priv malloc failed\n"); + goto err; + } + pub = malloc(s->pub_key_len); + if (pub == NULL) { + eprintf("pub malloc failed\n"); + goto err; + } + + printf("%-30s | %10s | %14s | %15s | %10s | %16s | %10s\n", s->method_name, "", "", "", "", "", ""); + + TIME_OPERATION_SECONDS({ OQS_SIG_keygen(s, priv, pub); }, "keygen", seconds); + + OQS_SIG_keygen(s, priv, pub); + /* Generate message to sign */ + msg_len = 100; // FIXME TODO: randomize based on scheme's max length + msg = malloc(msg_len); + if (msg == NULL) { + eprintf("msg malloc failed\n"); + goto err; + } + sig_len = s->max_sig_len; + sig = malloc(sig_len); + if (sig == NULL) { + eprintf("sig malloc failed\n"); + goto err; + } + + TIME_OPERATION_SECONDS({ OQS_SIG_sign(s, priv, msg, msg_len, sig, &sig_len); sig_len = s->max_sig_len; }, "sign", seconds); + + OQS_SIG_sign(s, priv, msg, msg_len, sig, &sig_len); + TIME_OPERATION_SECONDS({ OQS_SIG_verify(s, pub, msg, msg_len, sig, sig_len); }, "verify", seconds); + + rc = 1; + goto cleanup; + +err: + rc = 0; + +cleanup: + free(priv); + free(pub); + free(msg); + free(sig); + OQS_SIG_free(s); + + return rc; +} + +#ifdef ENABLE_SIG_PICNIC +int main(int argc, char **argv) { + int success = 1; + bool run_all = true; + bool quiet = false; + bool bench = false; + OQS_RAND *rand = NULL; + size_t sig_testcases_len = sizeof(sig_testcases) / sizeof(struct sig_testcase); + for (int i = 1; i < argc; i++) { + if (argv[i][0] == '-') { + if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "-help") == 0) || (strcmp(argv[i], "--help") == 0)) { + printf("Usage: ./test_sig [options] [schemes]\n"); + printf("\nOptions:\n"); + printf(" --quiet, -q\n"); + printf(" Less verbose output\n"); + printf(" --bench, -b\n"); + printf(" Run benchmarks\n"); + printf("\nschemes:\n"); + for (size_t i = 0; i < sig_testcases_len; i++) { + printf(" %s\n", sig_testcases[i].algid_name); + } + return EXIT_SUCCESS; + } else if (strcmp(argv[i], "--quiet") == 0 || strcmp(argv[i], "-q") == 0) { + quiet = true; + } else if (strcmp(argv[i], "--bench") == 0 || strcmp(argv[i], "-b") == 0) { + bench = true; + } + + } else { + run_all = false; + for (size_t j = 0; j < sig_testcases_len; j++) { + if (strcmp(argv[i], sig_testcases[j].algid_name) == 0) { + sig_testcases[j].run = 1; + } + } + } + } + + /* setup RAND */ + rand = OQS_RAND_new(OQS_RAND_alg_urandom_chacha20); + if (rand == NULL) { + goto err; + } + + for (size_t i = 0; i < sig_testcases_len; i++) { + if (run_all || sig_testcases[i].run == 1) { + int num_iter = sig_testcases[i].iter; + success = sig_test_correctness_wrapper(rand, sig_testcases[i].algid, num_iter, quiet); + } + if (success != 1) { + goto err; + } + } + + if (bench) { + PRINT_TIMER_HEADER + for (size_t i = 0; i < sig_testcases_len; i++) { + if (run_all || sig_testcases[i].run == 1) { + sig_bench_wrapper(rand, sig_testcases[i].algid, SIG_BENCH_SECONDS); + } + } + PRINT_TIMER_FOOTER + } + + success = 1; + goto cleanup; + +err: + success = 0; + eprintf("ERROR!\n"); + +cleanup: + if (rand) { + OQS_RAND_free(rand); + } + return (success == 1) ? EXIT_SUCCESS : EXIT_FAILURE; +} +#else // !ENABLE_SIG_PICNIC +int main() { + printf("No signature algorithm available. Make sure configure was run properly; see Readme.md.\n"); + return 0; +} +#endif diff --git a/crypt/liboqs/sig_picnic/Makefile.am b/crypt/liboqs/sig_picnic/Makefile.am new file mode 100644 index 0000000000000000000000000000000000000000..fc76287b3d03da4c233b0e1b0c7d8fd0d2516aa8 --- /dev/null +++ b/crypt/liboqs/sig_picnic/Makefile.am @@ -0,0 +1,6 @@ +AUTOMAKE_OPTIONS = foreign +noinst_LTLIBRARIES = libpicnic_i.la + +libpicnic_i_la_SOURCES = sig_picnic.c +libpicnic_i_la_CPPFLAGS = -march=native -I../../include -Iexternal -Iexternal/build +libpicnic_i_la_CPPFLAGS += $(AM_CPPFLAGS) diff --git a/crypt/liboqs/sig_picnic/build_picnic.sh b/crypt/liboqs/sig_picnic/build_picnic.sh new file mode 100755 index 0000000000000000000000000000000000000000..54f8ecbbaec8d402d12e207698fbb379f9d06576 --- /dev/null +++ b/crypt/liboqs/sig_picnic/build_picnic.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +mkdir -p external/build +# check if patch has already been applied +patch -p1 -N -f --dry-run --silent -d external < oqs_sig_picnic.patch 2>/dev/null 1>/dev/null +if [ $? -eq 0 ]; +then + # apply the patch + echo Patching Picnic external + patch -p1 -N -f -d external < oqs_sig_picnic.patch +fi +cd external/build +# make picnic +cmake -UWITH_LTO -DWITH_LTO:BOOL=OFF .. +make + diff --git a/crypt/liboqs/sig_picnic/oqs_sig_picnic.patch b/crypt/liboqs/sig_picnic/oqs_sig_picnic.patch new file mode 100644 index 0000000000000000000000000000000000000000..597afd7a3c3b1f6ed9bd0ce952a4b18591c195be --- /dev/null +++ b/crypt/liboqs/sig_picnic/oqs_sig_picnic.patch @@ -0,0 +1,3730 @@ +diff --git a/bitstream.c b/bitstream.c +index 69f5dfd..3bbd97d 100644 +--- a/bitstream.c ++++ b/bitstream.c +@@ -13,7 +13,7 @@ + + #include "bitstream.h" + +-bitstream_value_t bitstream_get_bits(bitstream_t* bs, unsigned int num_bits) { ++bitstream_value_t oqs_sig_picnic_bitstream_get_bits(bitstream_t* bs, unsigned int num_bits) { + const uint8_t* p = &bs->buffer[bs->position / 8]; + const unsigned int skip_bits = bs->position % 8; + const unsigned int start_bits = 8 - skip_bits; +@@ -37,7 +37,7 @@ bitstream_value_t bitstream_get_bits(bitstream_t* bs, unsigned int num_bits) { + return ret; + } + +-int bitstream_put_bits(bitstream_t* bs, bitstream_value_t value, unsigned int num_bits) { ++int oqs_sig_picnic_bitstream_put_bits(bitstream_t* bs, bitstream_value_t value, unsigned int num_bits) { + const unsigned int skip_bits = bs->position % 8; + uint8_t* p = &bs->buffer[bs->position / 8]; + +diff --git a/bitstream.h b/bitstream.h +index 551c90c..134ecad 100644 +--- a/bitstream.h ++++ b/bitstream.h +@@ -19,7 +19,7 @@ typedef struct { + size_t position; + } bitstream_t; + +-bitstream_value_t bitstream_get_bits(bitstream_t* bs, unsigned int num_bits); +-int bitstream_put_bits(bitstream_t* bs, bitstream_value_t value, unsigned int num_bits); ++bitstream_value_t oqs_sig_picnic_bitstream_get_bits(bitstream_t* bs, unsigned int num_bits); ++int oqs_sig_picnic_bitstream_put_bits(bitstream_t* bs, bitstream_value_t value, unsigned int num_bits); + + #endif +diff --git a/io.c b/io.c +index 0630aab..07d8fb4 100644 +--- a/io.c ++++ b/io.c +@@ -15,7 +15,7 @@ + + #include "compat.h" + +-void mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, unsigned len) { ++void oqs_sig_picnic_mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, unsigned len) { + const size_t word_count = len / sizeof(uint64_t); + const uint64_t* rows = &CONST_FIRST_ROW(data)[word_count - 1]; + uint64_t* wdst = (uint64_t*)dst; +@@ -25,7 +25,7 @@ void mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, unsigned len) { + } + } + +-void mzd_from_char_array(mzd_local_t* result, const uint8_t* data, unsigned len) { ++void oqs_sig_picnic_mzd_from_char_array(mzd_local_t* result, const uint8_t* data, unsigned len) { + const size_t word_count = len / sizeof(uint64_t); + uint64_t* rows = &FIRST_ROW(result)[word_count - 1]; + const uint64_t* wsrc = (const uint64_t*)data; +diff --git a/io.h b/io.h +index 5544302..2799a75 100644 +--- a/io.h ++++ b/io.h +@@ -15,8 +15,8 @@ + + #include "mzd_additional.h" + +-void mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, unsigned numbytes); +-void mzd_from_char_array(mzd_local_t* result, const uint8_t* data, unsigned len); ++void oqs_sig_picnic_mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, unsigned numbytes); ++void oqs_sig_picnic_mzd_from_char_array(mzd_local_t* result, const uint8_t* data, unsigned len); + + void print_hex(FILE* out, const uint8_t* data, size_t len); + +diff --git a/kdf_shake.c b/kdf_shake.c +index 14b6dbb..572b402 100644 +--- a/kdf_shake.c ++++ b/kdf_shake.c +@@ -13,7 +13,7 @@ + + #include "kdf_shake.h" + +-void hash_init(hash_context* ctx, const picnic_instance_t* pp) { ++void oqs_sig_picnic_hash_init(hash_context* ctx, const picnic_instance_t* pp) { + if (pp->security_level == 64) { + Keccak_HashInitialize_SHAKE128(ctx); + } else { +diff --git a/kdf_shake.h b/kdf_shake.h +index 82152fd..9fd44a0 100644 +--- a/kdf_shake.h ++++ b/kdf_shake.h +@@ -35,15 +35,15 @@ + + typedef Keccak_HashInstance hash_context; + +-void hash_init(hash_context* ctx, const picnic_instance_t* pp); ++void oqs_sig_picnic_hash_init(hash_context* ctx, const picnic_instance_t* pp); + +-#define hash_update(ctx, data, size) Keccak_HashUpdate((ctx), (data), (size) << 3) +-#define hash_final(ctx) Keccak_HashFinal((ctx), NULL) +-#define hash_squeeze(buffer, buflen, ctx) Keccak_HashSqueeze((ctx), (buffer), (buflen) << 3) ++#define hash_update(ctx, data, size) oqs_sig_picnic_Keccak_HashUpdate((ctx), (data), (size) << 3) ++#define hash_final(ctx) oqs_sig_picnic_Keccak_HashFinal((ctx), NULL) ++#define hash_squeeze(buffer, buflen, ctx) oqs_sig_picnic_Keccak_HashSqueeze((ctx), (buffer), (buflen) << 3) + + typedef Keccak_HashInstance kdf_shake_t; + +-#define kdf_shake_init(ctx, pp) hash_init((ctx), (pp)) ++#define kdf_shake_init(ctx, pp) oqs_sig_picnic_hash_init((ctx), (pp)) + #define kdf_shake_update_key(ctx, key, keylen) hash_update((ctx), (key), (keylen)) + #define kdf_shake_finalize_key(ctx) hash_final((ctx)) + #define kdf_shake_get_randomness(ctx, dst, count) hash_squeeze((dst), (count), (ctx)) +diff --git a/lowmc.c b/lowmc.c +index d4e5387..2d7a036 100644 +--- a/lowmc.c ++++ b/lowmc.c +@@ -47,46 +47,46 @@ static void sbox_layer_uint64(mzd_local_t* y, mzd_local_t const* x, mask_t const + #ifdef WITH_CUSTOM_INSTANCES + static void sbox_layer_bitsliced(mzd_local_t* out, mzd_local_t const* in, mask_t const* mask) { + mzd_local_t* buffer[6] = {NULL}; +- mzd_local_init_multiple_ex(buffer, 6, 1, in->ncols, false); ++ oqs_sig_picnic_mzd_local_init_multiple_ex(buffer, 6, 1, in->ncols, false); + + // a +- mzd_local_t* x0m = mzd_and(buffer[0], mask->x0, in); ++ mzd_local_t* x0m = oqs_sig_picnic_mzd_and(buffer[0], mask->x0, in); + // b +- mzd_local_t* x1m = mzd_and(buffer[1], mask->x1, in); ++ mzd_local_t* x1m = oqs_sig_picnic_mzd_and(buffer[1], mask->x1, in); + // c +- mzd_local_t* x2m = mzd_and(buffer[2], mask->x2, in); ++ mzd_local_t* x2m = oqs_sig_picnic_mzd_and(buffer[2], mask->x2, in); + +- mzd_shift_left(x0m, x0m, 2); +- mzd_shift_left(x1m, x1m, 1); ++ oqs_sig_picnic_mzd_shift_left(x0m, x0m, 2); ++ oqs_sig_picnic_mzd_shift_left(x1m, x1m, 1); + + // b & c +- mzd_local_t* t0 = mzd_and(buffer[3], x1m, x2m); ++ mzd_local_t* t0 = oqs_sig_picnic_mzd_and(buffer[3], x1m, x2m); + // c & a +- mzd_local_t* t1 = mzd_and(buffer[4], x0m, x2m); ++ mzd_local_t* t1 = oqs_sig_picnic_mzd_and(buffer[4], x0m, x2m); + // a & b +- mzd_local_t* t2 = mzd_and(buffer[5], x0m, x1m); ++ mzd_local_t* t2 = oqs_sig_picnic_mzd_and(buffer[5], x0m, x1m); + + // (b & c) ^ a +- mzd_xor(t0, t0, x0m); ++ oqs_sig_picnic_mzd_xor(t0, t0, x0m); + + // (c & a) ^ a ^ b +- mzd_xor(t1, t1, x0m); +- mzd_xor(t1, t1, x1m); ++ oqs_sig_picnic_mzd_xor(t1, t1, x0m); ++ oqs_sig_picnic_mzd_xor(t1, t1, x1m); + + // (a & b) ^ a ^ b ^c +- mzd_xor(t2, t2, x0m); +- mzd_xor(t2, t2, x1m); +- mzd_xor(t2, t2, x2m); ++ oqs_sig_picnic_mzd_xor(t2, t2, x0m); ++ oqs_sig_picnic_mzd_xor(t2, t2, x1m); ++ oqs_sig_picnic_mzd_xor(t2, t2, x2m); + +- mzd_shift_right(t0, t0, 2); +- mzd_shift_right(t1, t1, 1); ++ oqs_sig_picnic_mzd_shift_right(t0, t0, 2); ++ oqs_sig_picnic_mzd_shift_right(t1, t1, 1); + +- mzd_and(out, in, mask->mask); +- mzd_xor(out, out, t2); +- mzd_xor(out, out, t0); +- mzd_xor(out, out, t1); ++ oqs_sig_picnic_mzd_and(out, in, mask->mask); ++ oqs_sig_picnic_mzd_xor(out, out, t2); ++ oqs_sig_picnic_mzd_xor(out, out, t0); ++ oqs_sig_picnic_mzd_xor(out, out, t1); + +- mzd_local_free_multiple(buffer); ++ oqs_sig_picnic_mzd_local_free_multiple(buffer); + } + + #ifdef WITH_OPT +@@ -264,17 +264,17 @@ static sbox_layer_impl get_sbox_layer(const lowmc_t* lowmc) { + #if defined(REDUCED_LINEAR_LAYER) + static mzd_local_t* lowmc_reduced_linear_layer(lowmc_t const* lowmc, lowmc_key_t const* lowmc_key, + mzd_local_t const* p, sbox_layer_impl sbox_layer) { +- mzd_local_t* x = mzd_local_init_ex(1, lowmc->n, false); +- mzd_local_t* y = mzd_local_init_ex(1, lowmc->n, false); +- mzd_local_t* nl_part = mzd_local_init_ex(1, lowmc->r * 32, false); ++ mzd_local_t* x = oqs_sig_picnic_mzd_local_init_ex(1, lowmc->n, false); ++ mzd_local_t* y = oqs_sig_picnic_mzd_local_init_ex(1, lowmc->n, false); ++ mzd_local_t* nl_part = oqs_sig_picnic_mzd_local_init_ex(1, lowmc->r * 32, false); + +- mzd_local_copy(x, p); ++ oqs_sig_picnic_mzd_local_copy(x, p); + #if defined(MUL_M4RI) +- mzd_addmul_vl(x, lowmc_key, lowmc->k0_lookup); +- mzd_mul_vl(nl_part, lowmc_key, lowmc->precomputed_non_linear_part_lookup); ++ oqs_sig_picnic_mzd_addmul_vl(x, lowmc_key, lowmc->k0_lookup); ++ oqs_sig_picnic_mzd_mul_vl(nl_part, lowmc_key, lowmc->precomputed_non_linear_part_lookup); + #else +- mzd_addmul_v(x, lowmc_key, lowmc->k0_matrix); +- mzd_mul_v(nl_part, lowmc_key, lowmc->precomputed_non_linear_part_matrix); ++ oqs_sig_picnic_mzd_addmul_v(x, lowmc_key, lowmc->k0_matrix); ++ oqs_sig_picnic_mzd_mul_v(nl_part, lowmc_key, lowmc->precomputed_non_linear_part_matrix); + #endif + + word mask = WORD_C(0xFFFFFFFF); +@@ -287,28 +287,28 @@ static mzd_local_t* lowmc_reduced_linear_layer(lowmc_t const* lowmc, lowmc_key_t + mask = ~mask; + + #if defined(MUL_M4RI) +- mzd_mul_vl(y, x, round->l_lookup); ++ oqs_sig_picnic_mzd_mul_vl(y, x, round->l_lookup); + #else +- mzd_mul_v(y, x, round->l_matrix); ++ oqs_sig_picnic_mzd_mul_v(y, x, round->l_matrix); + #endif +- mzd_xor(x, y, round->constant); ++ oqs_sig_picnic_mzd_xor(x, y, round->constant); + } + +- mzd_local_free(y); +- mzd_local_free(nl_part); ++ oqs_sig_picnic_mzd_local_free(y); ++ oqs_sig_picnic_mzd_local_free(nl_part); + return x; + } + #else + static mzd_local_t* lowmc_plain(lowmc_t const* lowmc, lowmc_key_t const* lowmc_key, + mzd_local_t const* p, sbox_layer_impl sbox_layer) { +- mzd_local_t* x = mzd_local_init_ex(1, lowmc->n, false); +- mzd_local_t* y = mzd_local_init_ex(1, lowmc->n, false); ++ mzd_local_t* x = oqs_sig_picnic_mzd_local_init_ex(1, lowmc->n, false); ++ mzd_local_t* y = oqs_sig_picnic_mzd_local_init_ex(1, lowmc->n, false); + +- mzd_local_copy(x, p); ++ oqs_sig_picnic_mzd_local_copy(x, p); + #if defined(MUL_M4RI) +- mzd_addmul_vl(x, lowmc_key, lowmc->k0_lookup); ++ oqs_sig_picnic_mzd_addmul_vl(x, lowmc_key, lowmc->k0_lookup); + #else +- mzd_addmul_v(x, lowmc_key, lowmc->k0_matrix); ++ oqs_sig_picnic_mzd_addmul_v(x, lowmc_key, lowmc->k0_matrix); + #endif + + lowmc_round_t const* round = lowmc->rounds; +@@ -316,24 +316,24 @@ static mzd_local_t* lowmc_plain(lowmc_t const* lowmc, lowmc_key_t const* lowmc_k + sbox_layer(x, x, &lowmc->mask); + + #if defined(MUL_M4RI) +- mzd_mul_vl(y, x, round->l_lookup); ++ oqs_sig_picnic_mzd_mul_vl(y, x, round->l_lookup); + #else +- mzd_mul_v(y, x, round->l_matrix); ++ oqs_sig_picnic_mzd_mul_v(y, x, round->l_matrix); + #endif +- mzd_xor(x, y, round->constant); ++ oqs_sig_picnic_mzd_xor(x, y, round->constant); + #if defined(MUL_M4RI) && !defined(REDUCED_LINEAR_LAYER) +- mzd_addmul_vl(x, lowmc_key, round->k_lookup); ++ oqs_sig_picnic_mzd_addmul_vl(x, lowmc_key, round->k_lookup); + #else +- mzd_addmul_v(x, lowmc_key, round->k_matrix); ++ oqs_sig_picnic_mzd_addmul_v(x, lowmc_key, round->k_matrix); + #endif + } + +- mzd_local_free(y); ++ oqs_sig_picnic_mzd_local_free(y); + return x; + } + #endif + +-mzd_local_t* lowmc_call(lowmc_t const* lowmc, lowmc_key_t const* lowmc_key, mzd_local_t const* p) { ++mzd_local_t* oqs_sig_picnic_lowmc_call(lowmc_t const* lowmc, lowmc_key_t const* lowmc_key, mzd_local_t const* p) { + sbox_layer_impl sbox_layer = get_sbox_layer(lowmc); + if (!sbox_layer) { + return NULL; +diff --git a/lowmc.h b/lowmc.h +index 38eba01..ef46d82 100644 +--- a/lowmc.h ++++ b/lowmc.h +@@ -19,6 +19,6 @@ + * \param p the plaintext + * \return the ciphertext + */ +-mzd_local_t* lowmc_call(lowmc_t const* lowmc, lowmc_key_t const* lowmc_key, mzd_local_t const* p); ++mzd_local_t* oqs_sig_picnic_lowmc_call(lowmc_t const* lowmc, lowmc_key_t const* lowmc_key, mzd_local_t const* p); + + #endif +diff --git a/lowmc_128_128_20.c b/lowmc_128_128_20.c +index 32c87a4..c79516e 100644 +--- a/lowmc_128_128_20.c ++++ b/lowmc_128_128_20.c +@@ -5723,7 +5723,7 @@ static const mzd_local_t precomputed_round_key_matrix_non_linear_part_128_128_20 + }}; + + #endif +-const mzd_local_t* lowmc_128_128_20_get_linear_layer(uint32_t r) { ++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_linear_layer(uint32_t r) { + switch(r) { + default: + return NULL; +@@ -5771,7 +5771,7 @@ const mzd_local_t* lowmc_128_128_20_get_linear_layer(uint32_t r) { + } + + #if !defined(REDUCED_LINEAR_LAYER) +-const mzd_local_t* lowmc_128_128_20_get_round_key(uint32_t r) { ++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_round_key(uint32_t r) { + switch(r) { + default: + return NULL; +@@ -5822,7 +5822,7 @@ const mzd_local_t* lowmc_128_128_20_get_round_key(uint32_t r) { + #endif + + +-const mzd_local_t* lowmc_128_128_20_get_round_const(uint32_t r) { ++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_round_const(uint32_t r) { + switch(r) { + default: + return NULL; +@@ -5869,11 +5869,11 @@ const mzd_local_t* lowmc_128_128_20_get_round_const(uint32_t r) { + } + } + #if defined(REDUCED_LINEAR_LAYER) +-const mzd_local_t* lowmc_128_128_20_get_precomputed_round_key_matrix_non_linear_part(void) { ++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_precomputed_round_key_matrix_non_linear_part(void) { + return &precomputed_round_key_matrix_non_linear_part_128_128_20; + } + +-const mzd_local_t* lowmc_128_128_20_get_precomputed_round_key_matrix_linear_part(void) { ++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_precomputed_round_key_matrix_linear_part(void) { + return &precomputed_round_key_matrix_linear_part_128_128_20; + } + #endif +diff --git a/lowmc_128_128_20.h b/lowmc_128_128_20.h +index 83a1b67..8cbd325 100644 +--- a/lowmc_128_128_20.h ++++ b/lowmc_128_128_20.h +@@ -6,10 +6,10 @@ + #include "mzd_additional.h" + + +-const mzd_local_t* lowmc_128_128_20_get_linear_layer(uint32_t r); +-const mzd_local_t* lowmc_128_128_20_get_round_key(uint32_t r); +-const mzd_local_t* lowmc_128_128_20_get_round_const(uint32_t r); +-const mzd_local_t* lowmc_128_128_20_get_precomputed_round_key_matrix_non_linear_part(void); +-const mzd_local_t* lowmc_128_128_20_get_precomputed_round_key_matrix_linear_part(void); ++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_linear_layer(uint32_t r); ++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_round_key(uint32_t r); ++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_round_const(uint32_t r); ++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_precomputed_round_key_matrix_non_linear_part(void); ++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_precomputed_round_key_matrix_linear_part(void); + + #endif +diff --git a/lowmc_192_192_30.c b/lowmc_192_192_30.c +index 40a4eb2..7e82863 100644 +--- a/lowmc_192_192_30.c ++++ b/lowmc_192_192_30.c +@@ -12415,7 +12415,7 @@ static const mzd_local_t precomputed_round_key_matrix_non_linear_part_192_192_30 + }}; + + #endif +-const mzd_local_t* lowmc_192_192_30_get_linear_layer(uint32_t r) { ++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_linear_layer(uint32_t r) { + switch(r) { + default: + return NULL; +@@ -12483,7 +12483,7 @@ const mzd_local_t* lowmc_192_192_30_get_linear_layer(uint32_t r) { + } + + #if !defined(REDUCED_LINEAR_LAYER) +-const mzd_local_t* lowmc_192_192_30_get_round_key(uint32_t r) { ++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_round_key(uint32_t r) { + switch(r) { + default: + return NULL; +@@ -12554,7 +12554,7 @@ const mzd_local_t* lowmc_192_192_30_get_round_key(uint32_t r) { + #endif + + +-const mzd_local_t* lowmc_192_192_30_get_round_const(uint32_t r) { ++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_round_const(uint32_t r) { + switch(r) { + default: + return NULL; +@@ -12621,11 +12621,11 @@ const mzd_local_t* lowmc_192_192_30_get_round_const(uint32_t r) { + } + } + #if defined(REDUCED_LINEAR_LAYER) +-const mzd_local_t* lowmc_192_192_30_get_precomputed_round_key_matrix_non_linear_part(void) { ++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_precomputed_round_key_matrix_non_linear_part(void) { + return &precomputed_round_key_matrix_non_linear_part_192_192_30; + } + +-const mzd_local_t* lowmc_192_192_30_get_precomputed_round_key_matrix_linear_part(void) { ++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_precomputed_round_key_matrix_linear_part(void) { + return &precomputed_round_key_matrix_linear_part_192_192_30; + } + #endif +diff --git a/lowmc_192_192_30.h b/lowmc_192_192_30.h +index fb0169a..538166b 100644 +--- a/lowmc_192_192_30.h ++++ b/lowmc_192_192_30.h +@@ -6,10 +6,10 @@ + #include "mzd_additional.h" + + +-const mzd_local_t* lowmc_192_192_30_get_linear_layer(uint32_t r); +-const mzd_local_t* lowmc_192_192_30_get_round_key(uint32_t r); +-const mzd_local_t* lowmc_192_192_30_get_round_const(uint32_t r); +-const mzd_local_t* lowmc_192_192_30_get_precomputed_round_key_matrix_non_linear_part(void); +-const mzd_local_t* lowmc_192_192_30_get_precomputed_round_key_matrix_linear_part(void); ++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_linear_layer(uint32_t r); ++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_round_key(uint32_t r); ++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_round_const(uint32_t r); ++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_precomputed_round_key_matrix_non_linear_part(void); ++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_precomputed_round_key_matrix_linear_part(void); + + #endif +diff --git a/lowmc_256_256_38.c b/lowmc_256_256_38.c +index fd2284f..092d6f0 100644 +--- a/lowmc_256_256_38.c ++++ b/lowmc_256_256_38.c +@@ -20623,7 +20623,7 @@ static const mzd_local_t precomputed_round_key_matrix_non_linear_part_256_256_38 + }}; + + #endif +-const mzd_local_t* lowmc_256_256_38_get_linear_layer(uint32_t r) { ++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_linear_layer(uint32_t r) { + switch(r) { + default: + return NULL; +@@ -20707,7 +20707,7 @@ const mzd_local_t* lowmc_256_256_38_get_linear_layer(uint32_t r) { + } + + #if !defined(REDUCED_LINEAR_LAYER) +-const mzd_local_t* lowmc_256_256_38_get_round_key(uint32_t r) { ++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_round_key(uint32_t r) { + switch(r) { + default: + return NULL; +@@ -20794,7 +20794,7 @@ const mzd_local_t* lowmc_256_256_38_get_round_key(uint32_t r) { + #endif + + +-const mzd_local_t* lowmc_256_256_38_get_round_const(uint32_t r) { ++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_round_const(uint32_t r) { + switch(r) { + default: + return NULL; +@@ -20877,11 +20877,11 @@ const mzd_local_t* lowmc_256_256_38_get_round_const(uint32_t r) { + } + } + #if defined(REDUCED_LINEAR_LAYER) +-const mzd_local_t* lowmc_256_256_38_get_precomputed_round_key_matrix_non_linear_part(void) { ++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_precomputed_round_key_matrix_non_linear_part(void) { + return &precomputed_round_key_matrix_non_linear_part_256_256_38; + } + +-const mzd_local_t* lowmc_256_256_38_get_precomputed_round_key_matrix_linear_part(void) { ++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_precomputed_round_key_matrix_linear_part(void) { + return &precomputed_round_key_matrix_linear_part_256_256_38; + } + #endif +diff --git a/lowmc_256_256_38.h b/lowmc_256_256_38.h +index 6bb0c59..b0c0afc 100644 +--- a/lowmc_256_256_38.h ++++ b/lowmc_256_256_38.h +@@ -6,10 +6,10 @@ + #include "mzd_additional.h" + + +-const mzd_local_t* lowmc_256_256_38_get_linear_layer(uint32_t r); +-const mzd_local_t* lowmc_256_256_38_get_round_key(uint32_t r); +-const mzd_local_t* lowmc_256_256_38_get_round_const(uint32_t r); +-const mzd_local_t* lowmc_256_256_38_get_precomputed_round_key_matrix_non_linear_part(void); +-const mzd_local_t* lowmc_256_256_38_get_precomputed_round_key_matrix_linear_part(void); ++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_linear_layer(uint32_t r); ++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_round_key(uint32_t r); ++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_round_const(uint32_t r); ++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_precomputed_round_key_matrix_non_linear_part(void); ++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_precomputed_round_key_matrix_linear_part(void); + + #endif +diff --git a/lowmc_pars.c b/lowmc_pars.c +index 24bfb76..6b59834 100644 +--- a/lowmc_pars.c ++++ b/lowmc_pars.c +@@ -38,20 +38,20 @@ + #include <string.h> + + static mask_t* prepare_masks(mask_t* mask, unsigned int n, unsigned int m) { +- mask->x0 = mzd_local_init(1, n); +- mask->x1 = mzd_local_init_ex(1, n, false); +- mask->x2 = mzd_local_init_ex(1, n, false); +- mask->mask = mzd_local_init(1, n); ++ mask->x0 = oqs_sig_picnic_mzd_local_init(1, n); ++ mask->x1 = oqs_sig_picnic_mzd_local_init_ex(1, n, false); ++ mask->x2 = oqs_sig_picnic_mzd_local_init_ex(1, n, false); ++ mask->mask = oqs_sig_picnic_mzd_local_init(1, n); + + const unsigned int bound = n - 3 * m; + for (unsigned int i = 0; i < bound; ++i) { +- mzd_local_write_bit(mask->mask, 0, i, 1); ++ oqs_sig_picnic_mzd_local_write_bit(mask->mask, 0, i, 1); + } + for (unsigned int i = bound; i < n; i += 3) { +- mzd_local_write_bit(mask->x0, 0, i, 1); ++ oqs_sig_picnic_mzd_local_write_bit(mask->x0, 0, i, 1); + } +- mzd_shift_left(mask->x1, mask->x0, 1); +- mzd_shift_left(mask->x2, mask->x0, 2); ++ oqs_sig_picnic_mzd_shift_left(mask->x1, mask->x0, 1); ++ oqs_sig_picnic_mzd_shift_left(mask->x2, mask->x0, 2); + + mask->x0i = FIRST_ROW(mask->x0)[n / 64 - 1]; + mask->x1i = FIRST_ROW(mask->x1)[n / 64 - 1]; +@@ -61,7 +61,7 @@ static mask_t* prepare_masks(mask_t* mask, unsigned int n, unsigned int m) { + return mask; + } + +-bool lowmc_init(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r, unsigned int k) { ++bool oqs_sig_picnic_lowmc_init(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r, unsigned int k) { + if (!lowmc) { + return false; + } +@@ -80,19 +80,19 @@ bool lowmc_init(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r, + + #define LOAD_OPT(N, K, R) \ + lowmc->precomputed_non_linear_part_matrix = \ +- lowmc_##N##_##K##_##R##_get_precomputed_round_key_matrix_non_linear_part(); \ +- lowmc->k0_matrix = lowmc_##N##_##K##_##R##_get_precomputed_round_key_matrix_linear_part() ++ oqs_sig_picnic_lowmc_##N##_##K##_##R##_get_precomputed_round_key_matrix_non_linear_part(); \ ++ lowmc->k0_matrix = oqs_sig_picnic_lowmc_##N##_##K##_##R##_get_precomputed_round_key_matrix_linear_part() + + #define LOAD(N, K, R) \ +- lowmc->k0_matrix = lowmc_##N##_##K##_##R##_get_round_key(0); \ ++ lowmc->k0_matrix = oqs_sig_picnic_lowmc_##N##_##K##_##R##_get_round_key(0); \ + for (unsigned int i = 0; i < (R); ++i) { \ +- lowmc->rounds[i].k_matrix = lowmc_##N##_##K##_##R##_get_round_key(i + 1); \ ++ lowmc->rounds[i].k_matrix = oqs_sig_picnic_lowmc_##N##_##K##_##R##_get_round_key(i + 1); \ + } + + #define LOAD_FROM_FIXED_IMPL(N, K, R, PREC) \ + for (unsigned int i = 0; i < (R); ++i) { \ +- lowmc->rounds[i].l_matrix = lowmc_##N##_##K##_##R##_get_linear_layer(i); \ +- lowmc->rounds[i].constant = lowmc_##N##_##K##_##R##_get_round_const(i); \ ++ lowmc->rounds[i].l_matrix = oqs_sig_picnic_lowmc_##N##_##K##_##R##_get_linear_layer(i); \ ++ lowmc->rounds[i].constant = oqs_sig_picnic_lowmc_##N##_##K##_##R##_get_round_const(i); \ + } \ + LOAD##PREC(N, K, R); + +@@ -127,29 +127,29 @@ bool lowmc_init(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r, + } + #endif + +- lowmc_clear(lowmc); ++ oqs_sig_picnic_lowmc_clear(lowmc); + return false; + + precomp: + + #ifdef MUL_M4RI +- lowmc->k0_lookup = mzd_precompute_matrix_lookup(lowmc->k0_matrix); ++ lowmc->k0_lookup = oqs_sig_picnic_mzd_precompute_matrix_lookup(lowmc->k0_matrix); + #ifdef REDUCED_LINEAR_LAYER + lowmc->precomputed_non_linear_part_lookup = +- mzd_precompute_matrix_lookup(lowmc->precomputed_non_linear_part_matrix); ++ oqs_sig_picnic_mzd_precompute_matrix_lookup(lowmc->precomputed_non_linear_part_matrix); + #endif + #endif + #ifdef MUL_M4RI + for (unsigned int i = 0; i < r; ++i) { +- lowmc->rounds[i].l_lookup = mzd_precompute_matrix_lookup(lowmc->rounds[i].l_matrix); ++ lowmc->rounds[i].l_lookup = oqs_sig_picnic_mzd_precompute_matrix_lookup(lowmc->rounds[i].l_matrix); + #if !defined(REDUCED_LINEAR_LAYER) +- lowmc->rounds[i].k_lookup = mzd_precompute_matrix_lookup(lowmc->rounds[i].k_matrix); ++ lowmc->rounds[i].k_lookup = oqs_sig_picnic_mzd_precompute_matrix_lookup(lowmc->rounds[i].k_matrix); + #endif + } + #endif + + if (!prepare_masks(&lowmc->mask, n, m)) { +- lowmc_clear(lowmc); ++ oqs_sig_picnic_lowmc_clear(lowmc); + return false; + } + +@@ -164,7 +164,7 @@ static mzd_local_t* readMZD_TStructFromFile(FILE* file) { + ret += fread(&(nrows), sizeof(uint32_t), 1, file); + ret += fread(&(ncols), sizeof(uint32_t), 1, file); + +- mzd_local_t* A = mzd_local_init_ex(nrows, ncols, false); ++ mzd_local_t* A = oqs_sig_picnic_mzd_local_init_ex(nrows, ncols, false); + for (unsigned int i = 0; i < A->nrows; i++) { + ret += fread(ROW(A, i), A->rowstride * sizeof(word), 1, file); + } +@@ -172,7 +172,7 @@ static mzd_local_t* readMZD_TStructFromFile(FILE* file) { + return A; + } + +-bool lowmc_read_file(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r, ++bool oqs_sig_picnic_lowmc_read_file(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r, + unsigned int k) { + if (!lowmc) { + return false; +@@ -217,40 +217,40 @@ bool lowmc_read_file(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned in + } + #endif + +-void lowmc_clear(lowmc_t* lowmc) { ++void oqs_sig_picnic_lowmc_clear(lowmc_t* lowmc) { + for (unsigned int i = 0; i < lowmc->r; ++i) { + #ifdef MUL_M4RI + #if !defined(REDUCED_LINEAR_LAYER) +- mzd_local_free(lowmc->rounds[i].k_lookup); ++ oqs_sig_picnic_mzd_local_free(lowmc->rounds[i].k_lookup); + #endif +- mzd_local_free(lowmc->rounds[i].l_lookup); ++ oqs_sig_picnic_mzd_local_free(lowmc->rounds[i].l_lookup); + #endif + if (lowmc->needs_free) { +- mzd_local_free((mzd_local_t*)lowmc->rounds[i].constant); ++ oqs_sig_picnic_mzd_local_free((mzd_local_t*)lowmc->rounds[i].constant); + #if !defined(REDUCED_LINEAR_LAYER) +- mzd_local_free((mzd_local_t*)lowmc->rounds[i].k_matrix); ++ oqs_sig_picnic_mzd_local_free((mzd_local_t*)lowmc->rounds[i].k_matrix); + #endif +- mzd_local_free((mzd_local_t*)lowmc->rounds[i].l_matrix); ++ oqs_sig_picnic_mzd_local_free((mzd_local_t*)lowmc->rounds[i].l_matrix); + } + } + #ifdef REDUCED_LINEAR_LAYER + if (lowmc->needs_free) { +- mzd_local_free((mzd_local_t*)lowmc->precomputed_non_linear_part_matrix); ++ oqs_sig_picnic_mzd_local_free((mzd_local_t*)lowmc->precomputed_non_linear_part_matrix); + } + #endif + #ifdef MUL_M4RI +- mzd_local_free(lowmc->k0_lookup); ++ oqs_sig_picnic_mzd_local_free(lowmc->k0_lookup); + #ifdef REDUCED_LINEAR_LAYER +- mzd_local_free(lowmc->precomputed_non_linear_part_lookup); ++ oqs_sig_picnic_mzd_local_free(lowmc->precomputed_non_linear_part_lookup); + #endif + #endif + if (lowmc->needs_free) { +- mzd_local_free((mzd_local_t*)lowmc->k0_matrix); ++ oqs_sig_picnic_mzd_local_free((mzd_local_t*)lowmc->k0_matrix); + } + free(lowmc->rounds); + +- mzd_local_free(lowmc->mask.x0); +- mzd_local_free(lowmc->mask.x1); +- mzd_local_free(lowmc->mask.x2); +- mzd_local_free(lowmc->mask.mask); ++ oqs_sig_picnic_mzd_local_free(lowmc->mask.x0); ++ oqs_sig_picnic_mzd_local_free(lowmc->mask.x1); ++ oqs_sig_picnic_mzd_local_free(lowmc->mask.x2); ++ oqs_sig_picnic_mzd_local_free(lowmc->mask.mask); + } +diff --git a/lowmc_pars.h b/lowmc_pars.h +index 0adaca8..429d98a 100644 +--- a/lowmc_pars.h ++++ b/lowmc_pars.h +@@ -72,16 +72,16 @@ typedef struct { + * + * \return parameters defining a LowMC instance (including a key) + */ +-bool lowmc_init(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r, unsigned int k); ++bool oqs_sig_picnic_lowmc_init(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r, unsigned int k); + + /** + * Clears the allocated LowMC parameters + * + * \param lowmc the LowMC parameters to be cleared + */ +-void lowmc_clear(lowmc_t* lowmc); ++void oqs_sig_picnic_lowmc_clear(lowmc_t* lowmc); + +-bool lowmc_read_file(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r, ++bool oqs_sig_picnic_lowmc_read_file(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r, + unsigned int k); + + #endif +diff --git a/mpc.c b/mpc.c +index db17ffb..095a901 100644 +--- a/mpc.c ++++ b/mpc.c +@@ -22,32 +22,32 @@ + + #include <string.h> + +-void mpc_clear(mzd_local_t* const* res, unsigned sc) { ++void oqs_sig_picnic_mpc_clear(mzd_local_t* const* res, unsigned sc) { + for (unsigned int i = 0; i < sc; i++) { +- mzd_local_clear(res[i]); ++ oqs_sig_picnic_mzd_local_clear(res[i]); + } + } + +-void mpc_shift_right(mzd_local_t* const* res, mzd_local_t* const* val, unsigned count, ++void oqs_sig_picnic_mpc_shift_right(mzd_local_t* const* res, mzd_local_t* const* val, unsigned count, + unsigned sc) { +- MPC_LOOP_CONST(mzd_shift_right, res, val, count, sc); ++ MPC_LOOP_CONST(oqs_sig_picnic_mzd_shift_right, res, val, count, sc); + } + +-void mpc_shift_left(mzd_local_t* const* res, mzd_local_t* const* val, unsigned count, unsigned sc) { +- MPC_LOOP_CONST(mzd_shift_left, res, val, count, sc); ++void oqs_sig_picnic_mpc_shift_left(mzd_local_t* const* res, mzd_local_t* const* val, unsigned count, unsigned sc) { ++ MPC_LOOP_CONST(oqs_sig_picnic_mzd_shift_left, res, val, count, sc); + } + +-void mpc_and_const(mzd_local_t* const* result, mzd_local_t* const* first, mzd_local_t const* second, ++void oqs_sig_picnic_mpc_and_const(mzd_local_t* const* result, mzd_local_t* const* first, mzd_local_t const* second, + unsigned sc) { +- MPC_LOOP_CONST(mzd_xor, result, first, second, sc); ++ MPC_LOOP_CONST(oqs_sig_picnic_mzd_xor, result, first, second, sc); + } + +-void mpc_xor(mzd_local_t* const* result, mzd_local_t* const* first, mzd_local_t* const* second, ++void oqs_sig_picnic_mpc_xor(mzd_local_t* const* result, mzd_local_t* const* first, mzd_local_t* const* second, + unsigned sc) { +- MPC_LOOP_SHARED(mzd_xor, result, first, second, sc); ++ MPC_LOOP_SHARED(oqs_sig_picnic_mzd_xor, result, first, second, sc); + } + +-void mpc_and_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second, uint64_t const* r, ++void oqs_sig_picnic_mpc_and_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second, uint64_t const* r, + view_t* view, unsigned viewshift) { + for (unsigned m = 0; m < SC_PROOF; ++m) { + const unsigned j = (m + 1) % SC_PROOF; +@@ -96,27 +96,27 @@ void mpc_and_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second + #ifdef WITH_SSE2 + #ifdef WITH_CUSTOM_INSTANCES + ATTRIBUTE_TARGET("sse2") +-void mpc_and_sse(__m128i* res, __m128i const* first, __m128i const* second, __m128i const* r, ++void oqs_sig_picnic_mpc_and_sse(__m128i* res, __m128i const* first, __m128i const* second, __m128i const* r, + view_t* view, unsigned viewshift) { + mpc_and_def(__m128i, _mm_and_si128, _mm_xor_si128, mm128_shift_right); + } + + ATTRIBUTE_TARGET("sse2") +-void mpc_and_256_sse(__m128i res[SC_PROOF][2], __m128i const first[SC_PROOF][2], ++void oqs_sig_picnic_mpc_and_256_sse(__m128i res[SC_PROOF][2], __m128i const first[SC_PROOF][2], + __m128i const second[SC_PROOF][2], __m128i const r[SC_PROOF][2], view_t* view, + unsigned viewshift) { + mpc_and_def_multiple(__m128i, mm256_and_sse, mm256_xor_sse, mm256_shift_right_sse, 2); + } + + ATTRIBUTE_TARGET("sse2") +-void mpc_and_384_sse(__m128i res[SC_PROOF][3], __m128i const first[SC_PROOF][3], ++void oqs_sig_picnic_mpc_and_384_sse(__m128i res[SC_PROOF][3], __m128i const first[SC_PROOF][3], + __m128i const second[SC_PROOF][3], __m128i const r[SC_PROOF][3], view_t* view, + unsigned viewshift) { + mpc_and_def_multiple(__m128i, mm384_and_sse, mm384_xor_sse, mm384_shift_right_sse, 3); + } + + ATTRIBUTE_TARGET("sse2") +-void mpc_and_512_sse(__m128i res[SC_PROOF][4], __m128i const first[SC_PROOF][4], ++void oqs_sig_picnic_mpc_and_512_sse(__m128i res[SC_PROOF][4], __m128i const first[SC_PROOF][4], + __m128i const second[SC_PROOF][4], __m128i const r[SC_PROOF][4], view_t* view, + unsigned viewshift) { + mpc_and_def_multiple(__m128i, mm512_and_sse, mm512_xor_sse, mm512_shift_right_sse, 4); +@@ -169,7 +169,7 @@ void mpc_and_512_neon(uint32x4_t res[SC_PROOF][4], uint32x4_t const first[SC_PRO + #endif + #endif + +-void mpc_and(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second, ++void oqs_sig_picnic_mpc_and(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second, + mzd_local_t* const* r, view_t* view, unsigned viewshift, mzd_local_t* const* buffer) { + mzd_local_t* b = buffer[0]; + +@@ -177,26 +177,26 @@ void mpc_and(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* co + const unsigned j = (m + 1) % SC_PROOF; + + // f[m] & s[m] +- mzd_and(res[m], first[m], second[m]); ++ oqs_sig_picnic_mzd_and(res[m], first[m], second[m]); + + // f[m + 1] & s[m] +- mzd_and(b, first[j], second[m]); +- mzd_xor(res[m], res[m], b); ++ oqs_sig_picnic_mzd_and(b, first[j], second[m]); ++ oqs_sig_picnic_mzd_xor(res[m], res[m], b); + + // f[m] & s[m + 1] +- mzd_and(b, first[m], second[j]); +- mzd_xor(res[m], res[m], b); ++ oqs_sig_picnic_mzd_and(b, first[m], second[j]); ++ oqs_sig_picnic_mzd_xor(res[m], res[m], b); + + // ... ^ r[m] ^ r[m + 1] +- mzd_xor(res[m], res[m], r[m]); +- mzd_xor(res[m], res[m], r[j]); ++ oqs_sig_picnic_mzd_xor(res[m], res[m], r[m]); ++ oqs_sig_picnic_mzd_xor(res[m], res[m], r[j]); + } + +- mpc_shift_right(buffer, res, viewshift, SC_PROOF); +- mpc_xor(view->s, view->s, buffer, SC_PROOF); ++ oqs_sig_picnic_mpc_shift_right(buffer, res, viewshift, SC_PROOF); ++ oqs_sig_picnic_mpc_xor(view->s, view->s, buffer, SC_PROOF); + } + +-void mpc_and_verify_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second, ++void oqs_sig_picnic_mpc_and_verify_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second, + uint64_t const* r, view_t* view, uint64_t const mask, + unsigned viewshift) { + for (unsigned m = 0; m < (SC_VERIFY - 1); ++m) { +@@ -312,13 +312,13 @@ void mpc_and_verify_512_avx(__m256i res[SC_VERIFY][2], __m256i const first[SC_VE + + #ifdef WITH_NEON + #ifdef WITH_CUSTOM_INSTANCES +-void mpc_and_verify_neon(uint32x4_t* res, uint32x4_t const* first, uint32x4_t const* second, ++void oqs_sig_picnic_mpc_and_verify_neon(uint32x4_t* res, uint32x4_t const* first, uint32x4_t const* second, + uint32x4_t const* r, view_t* view, uint32x4_t const mask, + unsigned viewshift) { + mpc_and_verify_def(uint32x4_t, vandq_u32, veorq_u32, mm128_shift_right, mm128_shift_left); + } + +-void mpc_and_verify_256_neon(uint32x4_t res[SC_VERIFY][2], uint32x4_t const first[SC_VERIFY][2], ++void oqs_sig_picnic_mpc_and_verify_256_neon(uint32x4_t res[SC_VERIFY][2], uint32x4_t const first[SC_VERIFY][2], + uint32x4_t const second[SC_VERIFY][2], + uint32x4_t const r[SC_VERIFY][2], view_t* view, uint32x4_t const* mask, + unsigned viewshift) { +@@ -326,7 +326,7 @@ void mpc_and_verify_256_neon(uint32x4_t res[SC_VERIFY][2], uint32x4_t const firs + 2); + } + +-void mpc_and_verify_384_neon(uint32x4_t res[SC_VERIFY][3], uint32x4_t const first[SC_VERIFY][3], ++void oqs_sig_picnic_mpc_and_verify_384_neon(uint32x4_t res[SC_VERIFY][3], uint32x4_t const first[SC_VERIFY][3], + uint32x4_t const second[SC_VERIFY][3], + uint32x4_t const r[SC_VERIFY][3], view_t* view, uint32x4_t const* mask, + unsigned viewshift) { +@@ -334,7 +334,7 @@ void mpc_and_verify_384_neon(uint32x4_t res[SC_VERIFY][3], uint32x4_t const firs + 3); + } + +-void mpc_and_verify_512_neon(uint32x4_t res[SC_VERIFY][4], uint32x4_t const first[SC_VERIFY][4], ++void oqs_sig_picnic_mpc_and_verify_512_neon(uint32x4_t res[SC_VERIFY][4], uint32x4_t const first[SC_VERIFY][4], + uint32x4_t const second[SC_VERIFY][4], + uint32x4_t const r[SC_VERIFY][4], view_t* view, uint32x4_t const* mask, + unsigned viewshift) { +@@ -345,7 +345,7 @@ void mpc_and_verify_512_neon(uint32x4_t res[SC_VERIFY][4], uint32x4_t const firs + #endif + #endif + +-void mpc_and_verify(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second, ++void oqs_sig_picnic_mpc_and_verify(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second, + mzd_local_t* const* r, view_t* view, mzd_local_t const* mask, + unsigned viewshift, mzd_local_t* const* buffer) { + mzd_local_t* b = buffer[0]; +@@ -353,29 +353,29 @@ void mpc_and_verify(mzd_local_t* const* res, mzd_local_t* const* first, mzd_loca + for (unsigned m = 0; m < (SC_VERIFY - 1); ++m) { + const unsigned j = m + 1; + +- mzd_and(res[m], first[m], second[m]); ++ oqs_sig_picnic_mzd_and(res[m], first[m], second[m]); + +- mzd_and(b, first[j], second[m]); +- mzd_xor(res[m], res[m], b); ++ oqs_sig_picnic_mzd_and(b, first[j], second[m]); ++ oqs_sig_picnic_mzd_xor(res[m], res[m], b); + +- mzd_and(b, first[m], second[j]); +- mzd_xor(res[m], res[m], b); ++ oqs_sig_picnic_mzd_and(b, first[m], second[j]); ++ oqs_sig_picnic_mzd_xor(res[m], res[m], b); + +- mzd_xor(res[m], res[m], r[m]); +- mzd_xor(res[m], res[m], r[j]); ++ oqs_sig_picnic_mzd_xor(res[m], res[m], r[m]); ++ oqs_sig_picnic_mzd_xor(res[m], res[m], r[j]); + } + + for (unsigned m = 0; m < (SC_VERIFY - 1); ++m) { +- mzd_shift_right(b, res[m], viewshift); +- mzd_xor(view->s[m], view->s[m], b); ++ oqs_sig_picnic_mzd_shift_right(b, res[m], viewshift); ++ oqs_sig_picnic_mzd_xor(view->s[m], view->s[m], b); + } + +- mzd_shift_left(res[SC_VERIFY - 1], view->s[SC_VERIFY - 1], viewshift); +- mzd_and(res[SC_VERIFY - 1], res[SC_VERIFY - 1], mask); ++ oqs_sig_picnic_mzd_shift_left(res[SC_VERIFY - 1], view->s[SC_VERIFY - 1], viewshift); ++ oqs_sig_picnic_mzd_and(res[SC_VERIFY - 1], res[SC_VERIFY - 1], mask); + } + +-void mpc_copy(mzd_local_t** out, mzd_local_t* const* in, unsigned sc) { ++void oqs_sig_picnic_mpc_copy(mzd_local_t** out, mzd_local_t* const* in, unsigned sc) { + for (unsigned i = 0; i < sc; ++i) { +- mzd_local_copy(out[i], in[i]); ++ oqs_sig_picnic_mzd_local_copy(out[i], in[i]); + } + } +diff --git a/mpc.h b/mpc.h +index 7539e90..8b7a2d7 100644 +--- a/mpc.h ++++ b/mpc.h +@@ -48,32 +48,32 @@ typedef view_t rvec_t; + } \ + } while (0) + +-void mpc_shift_right(mzd_local_t* const* res, mzd_local_t* const* val, unsigned count, ++void oqs_sig_picnic_mpc_shift_right(mzd_local_t* const* res, mzd_local_t* const* val, unsigned count, + unsigned sc) ATTR_NONNULL; + +-void mpc_shift_left(mzd_local_t* const* res, mzd_local_t* const* val, unsigned count, ++void oqs_sig_picnic_mpc_shift_left(mzd_local_t* const* res, mzd_local_t* const* val, unsigned count, + unsigned sc) ATTR_NONNULL; + +-void mpc_and_const(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t const* second, ++void oqs_sig_picnic_mpc_and_const(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t const* second, + unsigned sc) ATTR_NONNULL; + +-void mpc_xor(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second, ++void oqs_sig_picnic_mpc_xor(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second, + unsigned sc) ATTR_NONNULL; + +-void mpc_clear(mzd_local_t* const* res, unsigned sc) ATTR_NONNULL; ++void oqs_sig_picnic_mpc_clear(mzd_local_t* const* res, unsigned sc) ATTR_NONNULL; + +-void mpc_and(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second, ++void oqs_sig_picnic_mpc_and(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second, + mzd_local_t* const* r, view_t* view, unsigned viewshift, + mzd_local_t* const* buffer) ATTR_NONNULL; + +-void mpc_and_verify(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second, ++void oqs_sig_picnic_mpc_and_verify(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second, + mzd_local_t* const* r, view_t* view, mzd_local_t const* mask, + unsigned viewshift, mzd_local_t* const* buffer) ATTR_NONNULL; + +-void mpc_and_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second, uint64_t const* r, ++void oqs_sig_picnic_mpc_and_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second, uint64_t const* r, + view_t* view, unsigned viewshift) ATTR_NONNULL; + +-void mpc_and_verify_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second, ++void oqs_sig_picnic_mpc_and_verify_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second, + uint64_t const* r, view_t* view, uint64_t const mask, + unsigned viewshift) ATTR_NONNULL; + +@@ -81,85 +81,85 @@ void mpc_and_verify_uint64(uint64_t* res, uint64_t const* first, uint64_t const* + #include "simd.h" + #if defined(WITH_SSE2) || defined(WITH_AVX) || defined(WITH_SSE4_1) + +-void mpc_and_sse(__m128i* res, __m128i const* first, __m128i const* second, __m128i const* r, ++void oqs_sig_picnic_mpc_and_sse(__m128i* res, __m128i const* first, __m128i const* second, __m128i const* r, + view_t* view, unsigned viewshift) ATTR_NONNULL; + +-void mpc_and_verify_sse(__m128i* res, __m128i const* first, __m128i const* second, __m128i const* r, ++void oqs_sig_picnic_mpc_and_verify_sse(__m128i* res, __m128i const* first, __m128i const* second, __m128i const* r, + view_t* view, __m128i const mask, unsigned viewshift) ATTR_NONNULL; + +-void mpc_and_256_sse(__m128i res[SC_PROOF][2], __m128i const first[SC_PROOF][2], ++void oqs_sig_picnic_mpc_and_256_sse(__m128i res[SC_PROOF][2], __m128i const first[SC_PROOF][2], + __m128i const second[SC_PROOF][2], __m128i const r[SC_PROOF][2], view_t* view, + unsigned viewshift) ATTR_NONNULL; + +-void mpc_and_verify_256_sse(__m128i res[SC_VERIFY][2], __m128i const first[SC_VERIFY][2], ++void oqs_sig_picnic_mpc_and_verify_256_sse(__m128i res[SC_VERIFY][2], __m128i const first[SC_VERIFY][2], + __m128i const second[SC_VERIFY][2], __m128i const r[SC_VERIFY][2], + view_t* view, __m128i const* mask, unsigned viewshift) ATTR_NONNULL; + +-void mpc_and_384_sse(__m128i res[SC_PROOF][3], __m128i const first[SC_PROOF][3], ++void oqs_sig_picnic_mpc_and_384_sse(__m128i res[SC_PROOF][3], __m128i const first[SC_PROOF][3], + __m128i const second[SC_PROOF][3], __m128i const r[SC_PROOF][3], view_t* view, + unsigned viewshift) ATTR_NONNULL; + +-void mpc_and_verify_384_sse(__m128i res[SC_VERIFY][3], __m128i const first[SC_VERIFY][3], ++void oqs_sig_picnic_mpc_and_verify_384_sse(__m128i res[SC_VERIFY][3], __m128i const first[SC_VERIFY][3], + __m128i const second[SC_VERIFY][3], __m128i const r[SC_VERIFY][3], + view_t* view, __m128i const* mask, unsigned viewshift) ATTR_NONNULL; + +-void mpc_and_512_sse(__m128i res[SC_PROOF][4], __m128i const first[SC_PROOF][4], ++void oqs_sig_picnic_mpc_and_512_sse(__m128i res[SC_PROOF][4], __m128i const first[SC_PROOF][4], + __m128i const second[SC_PROOF][4], __m128i const r[SC_PROOF][4], view_t* view, + unsigned viewshift) ATTR_NONNULL; + +-void mpc_and_verify_512_sse(__m128i res[SC_VERIFY][4], __m128i const first[SC_VERIFY][4], ++void oqs_sig_picnic_mpc_and_verify_512_sse(__m128i res[SC_VERIFY][4], __m128i const first[SC_VERIFY][4], + __m128i const second[SC_VERIFY][4], __m128i const r[SC_VERIFY][4], + view_t* view, __m128i const* mask, unsigned viewshift) ATTR_NONNULL; + + #endif + + #if defined(WITH_AVX2) +-void mpc_and_avx(__m256i* res, __m256i const* first, __m256i const* second, __m256i const* r, ++void oqs_sig_picnic_mpc_and_avx(__m256i* res, __m256i const* first, __m256i const* second, __m256i const* r, + view_t* view, unsigned viewshift) ATTR_NONNULL; + +-void mpc_and_verify_avx(__m256i* res, __m256i const* first, __m256i const* second, __m256i const* r, ++void oqs_sig_picnic_mpc_and_verify_avx(__m256i* res, __m256i const* first, __m256i const* second, __m256i const* r, + view_t* view, __m256i const mask, unsigned viewshift) ATTR_NONNULL; + +-void mpc_and_512_avx(__m256i res[SC_VERIFY][2], __m256i const first[SC_VERIFY][2], ++void oqs_sig_picnic_mpc_and_512_avx(__m256i res[SC_VERIFY][2], __m256i const first[SC_VERIFY][2], + __m256i const second[SC_VERIFY][2], __m256i const r[SC_VERIFY][2], + view_t* view, unsigned viewshift) ATTR_NONNULL; + +-void mpc_and_verify_512_avx(__m256i res[SC_VERIFY][2], __m256i const first[SC_VERIFY][2], ++void oqs_sig_picnic_mpc_and_verify_512_avx(__m256i res[SC_VERIFY][2], __m256i const first[SC_VERIFY][2], + __m256i const second[SC_VERIFY][2], __m256i const r[SC_VERIFY][2], + view_t* view, __m256i const* mask, unsigned viewshift) ATTR_NONNULL; + #endif + + #ifdef WITH_NEON +-void mpc_and_neon(uint32x4_t* res, uint32x4_t const* first, uint32x4_t const* second, ++void oqs_sig_picnic_mpc_and_neon(uint32x4_t* res, uint32x4_t const* first, uint32x4_t const* second, + uint32x4_t const* r, view_t* view, unsigned viewshift); + +-void mpc_and_verify_neon(uint32x4_t* res, uint32x4_t const* first, uint32x4_t const* second, ++void oqs_sig_picnic_mpc_and_verify_neon(uint32x4_t* res, uint32x4_t const* first, uint32x4_t const* second, + uint32x4_t const* r, view_t* view, uint32x4_t const mask, + unsigned viewshift) ATTR_NONNULL; + +-void mpc_and_256_neon(uint32x4_t res[SC_PROOF][2], uint32x4_t const first[SC_PROOF][2], ++void oqs_sig_picnic_mpc_and_256_neon(uint32x4_t res[SC_PROOF][2], uint32x4_t const first[SC_PROOF][2], + uint32x4_t const second[SC_PROOF][2], uint32x4_t const r[SC_PROOF][2], + view_t* view, unsigned viewshift); + +-void mpc_and_verify_256_neon(uint32x4_t res[SC_VERIFY][2], uint32x4_t const first[SC_VERIFY][2], ++void oqs_sig_picnic_mpc_and_verify_256_neon(uint32x4_t res[SC_VERIFY][2], uint32x4_t const first[SC_VERIFY][2], + uint32x4_t const second[SC_VERIFY][2], + uint32x4_t const r[SC_VERIFY][2], view_t* view, uint32x4_t const* mask, + unsigned viewshift) ATTR_NONNULL; + +-void mpc_and_384_neon(uint32x4_t res[SC_PROOF][3], uint32x4_t const first[SC_PROOF][3], ++void oqs_sig_picnic_mpc_and_384_neon(uint32x4_t res[SC_PROOF][3], uint32x4_t const first[SC_PROOF][3], + uint32x4_t const second[SC_PROOF][3], uint32x4_t const r[SC_PROOF][3], + view_t* view, unsigned viewshift) ATTR_NONNULL; + +-void mpc_and_verify_384_neon(uint32x4_t res[SC_VERIFY][3], uint32x4_t const first[SC_VERIFY][3], ++void oqs_sig_picnic_mpc_and_verify_384_neon(uint32x4_t res[SC_VERIFY][3], uint32x4_t const first[SC_VERIFY][3], + uint32x4_t const second[SC_VERIFY][3], + uint32x4_t const r[SC_VERIFY][3], view_t* view, uint32x4_t const* mask, + unsigned viewshift) ATTR_NONNULL; + +-void mpc_and_512_neon(uint32x4_t res[SC_PROOF][4], uint32x4_t const first[SC_PROOF][4], ++void oqs_sig_picnic_mpc_and_512_neon(uint32x4_t res[SC_PROOF][4], uint32x4_t const first[SC_PROOF][4], + uint32x4_t const second[SC_PROOF][4], uint32x4_t const r[SC_PROOF][4], + view_t* view, unsigned viewshift) ATTR_NONNULL; + +-void mpc_and_verify_512_neon(uint32x4_t res[SC_VERIFY][4], uint32x4_t const first[SC_VERIFY][4], ++void oqs_sig_picnic_mpc_and_verify_512_neon(uint32x4_t res[SC_VERIFY][4], uint32x4_t const first[SC_VERIFY][4], + uint32x4_t const second[SC_VERIFY][4], + uint32x4_t const r[SC_VERIFY][4], view_t* view, uint32x4_t const* mask, + unsigned viewshift) ATTR_NONNULL; +@@ -167,6 +167,6 @@ void mpc_and_verify_512_neon(uint32x4_t res[SC_VERIFY][4], uint32x4_t const firs + #endif + #endif + +-void mpc_copy(mzd_local_t** out, mzd_local_t* const* in, unsigned sc) ATTR_NONNULL_ARG(2); ++void oqs_sig_picnic_mpc_copy(mzd_local_t** out, mzd_local_t* const* in, unsigned sc) ATTR_NONNULL_ARG(2); + + #endif +diff --git a/mpc_lowmc.c b/mpc_lowmc.c +index 7e3d8fe..41ffbdc 100644 +--- a/mpc_lowmc.c ++++ b/mpc_lowmc.c +@@ -102,13 +102,13 @@ static void _mpc_sbox_layer_bitsliced_verify(mzd_local_t** out, mzd_local_t* con + mask_t const* mask, sbox_vars_t const* vars) { + bitsliced_step_1(SC_VERIFY); + +- mzd_local_clear(view->s[0]); ++ oqs_sig_picnic_mzd_local_clear(view->s[0]); + // a & b +- mpc_and_verify(vars->r0m, vars->x0s, vars->x1s, vars->r2m, view, mask->x2, 0, vars->v); ++ oqs_sig_picnic_mpc_and_verify(vars->r0m, vars->x0s, vars->x1s, vars->r2m, view, mask->x2, 0, vars->v); + // b & c +- mpc_and_verify(vars->r2m, vars->x1s, vars->x2m, vars->r1s, view, mask->x2, 1, vars->v); ++ oqs_sig_picnic_mpc_and_verify(vars->r2m, vars->x1s, vars->x2m, vars->r1s, view, mask->x2, 1, vars->v); + // c & a +- mpc_and_verify(vars->r1m, vars->x0s, vars->x2m, vars->r0s, view, mask->x2, 2, vars->v); ++ oqs_sig_picnic_mpc_and_verify(vars->r1m, vars->x0s, vars->x2m, vars->r0s, view, mask->x2, 2, vars->v); + + bitsliced_step_2(SC_VERIFY); + } +@@ -167,9 +167,9 @@ static void _mpc_sbox_layer_bitsliced_uint64(uint64_t* out, uint64_t const* in, + bitsliced_step_1_uint64(SC_PROOF); + + memset(view->t, 0, sizeof(uint64_t) * SC_PROOF); +- mpc_and_uint64(r0m, x0s, x1s, r2m, view, 0); +- mpc_and_uint64(r2m, x1s, x2m, r1s, view, 1); +- mpc_and_uint64(r1m, x0s, x2m, r0s, view, 2); ++ oqs_sig_picnic_mpc_and_uint64(r0m, x0s, x1s, r2m, view, 0); ++ oqs_sig_picnic_mpc_and_uint64(r2m, x1s, x2m, r1s, view, 1); ++ oqs_sig_picnic_mpc_and_uint64(r1m, x0s, x2m, r0s, view, 2); + + bitsliced_step_2_uint64(SC_PROOF); + } +@@ -179,9 +179,9 @@ static void _mpc_sbox_layer_bitsliced_verify_uint64(uint64_t* out, uint64_t cons + bitsliced_step_1_uint64(SC_VERIFY); + + view->t[0] = 0; +- mpc_and_verify_uint64(r0m, x0s, x1s, r2m, view, mx2, 0); +- mpc_and_verify_uint64(r2m, x1s, x2m, r1s, view, mx2, 1); +- mpc_and_verify_uint64(r1m, x0s, x2m, r0s, view, mx2, 2); ++ oqs_sig_picnic_mpc_and_verify_uint64(r0m, x0s, x1s, r2m, view, mx2, 0); ++ oqs_sig_picnic_mpc_and_verify_uint64(r2m, x1s, x2m, r1s, view, mx2, 1); ++ oqs_sig_picnic_mpc_and_verify_uint64(r1m, x0s, x2m, r0s, view, mx2, 2); + + bitsliced_step_2_uint64(SC_VERIFY); + } +@@ -335,10 +335,10 @@ static void _mpc_sbox_layer_bitsliced_128_sse(mzd_local_t** out, mzd_local_t* co + mask_t const* mask) { + bitsliced_mm_step_1(SC_PROOF, __m128i, _mm_and_si128, mm128_shift_left); + +- mpc_clear(view->s, SC_PROOF); +- mpc_and_sse(r0m, x0s, x1s, r2m, view, 0); +- mpc_and_sse(r2m, x1s, x2m, r1s, view, 1); +- mpc_and_sse(r1m, x0s, x2m, r0s, view, 2); ++ oqs_sig_picnic_mpc_clear(view->s, SC_PROOF); ++ oqs_sig_picnic_mpc_and_sse(r0m, x0s, x1s, r2m, view, 0); ++ oqs_sig_picnic_mpc_and_sse(r2m, x1s, x2m, r1s, view, 1); ++ oqs_sig_picnic_mpc_and_sse(r1m, x0s, x2m, r0s, view, 2); + + bitsliced_mm_step_2(SC_PROOF, __m128i, _mm_and_si128, _mm_xor_si128, mm128_shift_right); + } +@@ -349,10 +349,10 @@ static void _mpc_sbox_layer_bitsliced_verify_128_sse(mzd_local_t** out, mzd_loca + mask_t const* mask) { + bitsliced_mm_step_1(SC_VERIFY, __m128i, _mm_and_si128, mm128_shift_left); + +- mzd_local_clear(view->s[0]); +- mpc_and_verify_sse(r0m, x0s, x1s, r2m, view, mx2, 0); +- mpc_and_verify_sse(r2m, x1s, x2m, r1s, view, mx2, 1); +- mpc_and_verify_sse(r1m, x0s, x2m, r0s, view, mx2, 2); ++ oqs_sig_picnic_mzd_local_clear(view->s[0]); ++ oqs_sig_picnic_mpc_and_verify_sse(r0m, x0s, x1s, r2m, view, mx2, 0); ++ oqs_sig_picnic_mpc_and_verify_sse(r2m, x1s, x2m, r1s, view, mx2, 1); ++ oqs_sig_picnic_mpc_and_verify_sse(r1m, x0s, x2m, r0s, view, mx2, 2); + + bitsliced_mm_step_2(SC_VERIFY, __m128i, _mm_and_si128, _mm_xor_si128, mm128_shift_right); + } +@@ -363,10 +363,10 @@ static void _mpc_sbox_layer_bitsliced_256_sse(mzd_local_t** out, mzd_local_t* co + mask_t const* mask) { + bitsliced_mm_step_1_multiple_of_128(SC_PROOF, __m128i, mm256_and_sse, mm256_shift_left_sse, 2); + +- mpc_clear(view->s, SC_PROOF); +- mpc_and_256_sse(r0m, x0s, x1s, r2m, view, 0); +- mpc_and_256_sse(r2m, x1s, x2m, r1s, view, 1); +- mpc_and_256_sse(r1m, x0s, x2m, r0s, view, 2); ++ oqs_sig_picnic_mpc_clear(view->s, SC_PROOF); ++ oqs_sig_picnic_mpc_and_256_sse(r0m, x0s, x1s, r2m, view, 0); ++ oqs_sig_picnic_mpc_and_256_sse(r2m, x1s, x2m, r1s, view, 1); ++ oqs_sig_picnic_mpc_and_256_sse(r1m, x0s, x2m, r0s, view, 2); + + bitsliced_mm_step_2_multiple_of_128(SC_PROOF, __m128i, mm256_and_sse, mm256_xor_sse, + mm256_shift_right_sse, 2); +@@ -378,10 +378,10 @@ static void _mpc_sbox_layer_bitsliced_verify_256_sse(mzd_local_t** out, mzd_loca + mask_t const* mask) { + bitsliced_mm_step_1_multiple_of_128(SC_VERIFY, __m128i, mm256_and_sse, mm256_shift_left_sse, 2); + +- mzd_local_clear(view->s[0]); +- mpc_and_verify_256_sse(r0m, x0s, x1s, r2m, view, mx2, 0); +- mpc_and_verify_256_sse(r2m, x1s, x2m, r1s, view, mx2, 1); +- mpc_and_verify_256_sse(r1m, x0s, x2m, r0s, view, mx2, 2); ++ oqs_sig_picnic_mzd_local_clear(view->s[0]); ++ oqs_sig_picnic_mpc_and_verify_256_sse(r0m, x0s, x1s, r2m, view, mx2, 0); ++ oqs_sig_picnic_mpc_and_verify_256_sse(r2m, x1s, x2m, r1s, view, mx2, 1); ++ oqs_sig_picnic_mpc_and_verify_256_sse(r1m, x0s, x2m, r0s, view, mx2, 2); + + bitsliced_mm_step_2_multiple_of_128(SC_VERIFY, __m128i, mm256_and_sse, mm256_xor_sse, + mm256_shift_right_sse, 2); +@@ -393,10 +393,10 @@ static void _mpc_sbox_layer_bitsliced_384_sse(mzd_local_t** out, mzd_local_t* co + mask_t const* mask) { + bitsliced_mm_step_1_multiple_of_128(SC_PROOF, __m128i, mm384_and_sse, mm384_shift_left_sse, 3); + +- mpc_clear(view->s, SC_PROOF); +- mpc_and_384_sse(r0m, x0s, x1s, r2m, view, 0); +- mpc_and_384_sse(r2m, x1s, x2m, r1s, view, 1); +- mpc_and_384_sse(r1m, x0s, x2m, r0s, view, 2); ++ oqs_sig_picnic_mpc_clear(view->s, SC_PROOF); ++ oqs_sig_picnic_mpc_and_384_sse(r0m, x0s, x1s, r2m, view, 0); ++ oqs_sig_picnic_mpc_and_384_sse(r2m, x1s, x2m, r1s, view, 1); ++ oqs_sig_picnic_mpc_and_384_sse(r1m, x0s, x2m, r0s, view, 2); + + bitsliced_mm_step_2_multiple_of_128(SC_PROOF, __m128i, mm384_and_sse, mm384_xor_sse, + mm384_shift_right_sse, 3); +@@ -408,10 +408,10 @@ static void _mpc_sbox_layer_bitsliced_verify_384_sse(mzd_local_t** out, mzd_loca + mask_t const* mask) { + bitsliced_mm_step_1_multiple_of_128(SC_VERIFY, __m128i, mm384_and_sse, mm384_shift_left_sse, 3); + +- mzd_local_clear(view->s[0]); +- mpc_and_verify_384_sse(r0m, x0s, x1s, r2m, view, mx2, 0); +- mpc_and_verify_384_sse(r2m, x1s, x2m, r1s, view, mx2, 1); +- mpc_and_verify_384_sse(r1m, x0s, x2m, r0s, view, mx2, 2); ++ oqs_sig_picnic_mzd_local_clear(view->s[0]); ++ oqs_sig_picnic_mpc_and_verify_384_sse(r0m, x0s, x1s, r2m, view, mx2, 0); ++ oqs_sig_picnic_mpc_and_verify_384_sse(r2m, x1s, x2m, r1s, view, mx2, 1); ++ oqs_sig_picnic_mpc_and_verify_384_sse(r1m, x0s, x2m, r0s, view, mx2, 2); + + bitsliced_mm_step_2_multiple_of_128(SC_VERIFY, __m128i, mm384_and_sse, mm384_xor_sse, + mm384_shift_right_sse, 3); +@@ -423,10 +423,10 @@ static void _mpc_sbox_layer_bitsliced_512_sse(mzd_local_t** out, mzd_local_t* co + mask_t const* mask) { + bitsliced_mm_step_1_multiple_of_128(SC_PROOF, __m128i, mm512_and_sse, mm512_shift_left_sse, 4); + +- mpc_clear(view->s, SC_PROOF); +- mpc_and_512_sse(r0m, x0s, x1s, r2m, view, 0); +- mpc_and_512_sse(r2m, x1s, x2m, r1s, view, 1); +- mpc_and_512_sse(r1m, x0s, x2m, r0s, view, 2); ++ oqs_sig_picnic_mpc_clear(view->s, SC_PROOF); ++ oqs_sig_picnic_mpc_and_512_sse(r0m, x0s, x1s, r2m, view, 0); ++ oqs_sig_picnic_mpc_and_512_sse(r2m, x1s, x2m, r1s, view, 1); ++ oqs_sig_picnic_mpc_and_512_sse(r1m, x0s, x2m, r0s, view, 2); + + bitsliced_mm_step_2_multiple_of_128(SC_PROOF, __m128i, mm512_and_sse, mm512_xor_sse, + mm512_shift_right_sse, 4); +@@ -438,10 +438,10 @@ static void _mpc_sbox_layer_bitsliced_verify_512_sse(mzd_local_t** out, mzd_loca + mask_t const* mask) { + bitsliced_mm_step_1_multiple_of_128(SC_VERIFY, __m128i, mm512_and_sse, mm512_shift_left_sse, 4); + +- mzd_local_clear(view->s[0]); +- mpc_and_verify_512_sse(r0m, x0s, x1s, r2m, view, mx2, 0); +- mpc_and_verify_512_sse(r2m, x1s, x2m, r1s, view, mx2, 1); +- mpc_and_verify_512_sse(r1m, x0s, x2m, r0s, view, mx2, 2); ++ oqs_sig_picnic_mzd_local_clear(view->s[0]); ++ oqs_sig_picnic_mpc_and_verify_512_sse(r0m, x0s, x1s, r2m, view, mx2, 0); ++ oqs_sig_picnic_mpc_and_verify_512_sse(r2m, x1s, x2m, r1s, view, mx2, 1); ++ oqs_sig_picnic_mpc_and_verify_512_sse(r1m, x0s, x2m, r0s, view, mx2, 2); + + bitsliced_mm_step_2_multiple_of_128(SC_VERIFY, __m128i, mm512_and_sse, mm512_xor_sse, + mm512_shift_right_sse, 4); +@@ -457,10 +457,10 @@ static void _mpc_sbox_layer_bitsliced_256_avx(mzd_local_t** out, mzd_local_t* co + mask_t const* mask) { + bitsliced_mm_step_1(SC_PROOF, __m256i, _mm256_and_si256, mm256_shift_left); + +- mpc_clear(view->s, SC_PROOF); +- mpc_and_avx(r0m, x0s, x1s, r2m, view, 0); +- mpc_and_avx(r2m, x1s, x2m, r1s, view, 1); +- mpc_and_avx(r1m, x0s, x2m, r0s, view, 2); ++ oqs_sig_picnic_mpc_clear(view->s, SC_PROOF); ++ oqs_sig_picnic_mpc_and_avx(r0m, x0s, x1s, r2m, view, 0); ++ oqs_sig_picnic_mpc_and_avx(r2m, x1s, x2m, r1s, view, 1); ++ oqs_sig_picnic_mpc_and_avx(r1m, x0s, x2m, r0s, view, 2); + + bitsliced_mm_step_2(SC_PROOF, __m256i, _mm256_and_si256, _mm256_xor_si256, mm256_shift_right); + } +@@ -471,10 +471,10 @@ static void _mpc_sbox_layer_bitsliced_verify_256_avx(mzd_local_t** out, mzd_loca + mask_t const* mask) { + bitsliced_mm_step_1(SC_VERIFY, __m256i, _mm256_and_si256, mm256_shift_left); + +- mzd_local_clear(view->s[0]); +- mpc_and_verify_avx(r0m, x0s, x1s, r2m, view, mx2, 0); +- mpc_and_verify_avx(r2m, x1s, x2m, r1s, view, mx2, 1); +- mpc_and_verify_avx(r1m, x0s, x2m, r0s, view, mx2, 2); ++ oqs_sig_picnic_mzd_local_clear(view->s[0]); ++ oqs_sig_picnic_mpc_and_verify_avx(r0m, x0s, x1s, r2m, view, mx2, 0); ++ oqs_sig_picnic_mpc_and_verify_avx(r2m, x1s, x2m, r1s, view, mx2, 1); ++ oqs_sig_picnic_mpc_and_verify_avx(r1m, x0s, x2m, r0s, view, mx2, 2); + + bitsliced_mm_step_2(SC_VERIFY, __m256i, _mm256_and_si256, _mm256_xor_si256, mm256_shift_right); + } +@@ -485,10 +485,10 @@ static void _mpc_sbox_layer_bitsliced_512_avx(mzd_local_t** out, mzd_local_t* co + mask_t const* mask) { + bitsliced_mm_step_1_multiple_of_128(SC_PROOF, __m256i, mm512_and_avx, mm512_shift_left_avx, 2); + +- mpc_clear(view->s, SC_PROOF); +- mpc_and_512_avx(r0m, x0s, x1s, r2m, view, 0); +- mpc_and_512_avx(r2m, x1s, x2m, r1s, view, 1); +- mpc_and_512_avx(r1m, x0s, x2m, r0s, view, 2); ++ oqs_sig_picnic_mpc_clear(view->s, SC_PROOF); ++ oqs_sig_picnic_mpc_and_512_avx(r0m, x0s, x1s, r2m, view, 0); ++ oqs_sig_picnic_mpc_and_512_avx(r2m, x1s, x2m, r1s, view, 1); ++ oqs_sig_picnic_mpc_and_512_avx(r1m, x0s, x2m, r0s, view, 2); + + bitsliced_mm_step_2_multiple_of_128(SC_PROOF, __m256i, mm512_and_avx, mm512_xor_avx, + mm512_shift_right_avx, 2); +@@ -500,10 +500,10 @@ static void _mpc_sbox_layer_bitsliced_verify_512_avx(mzd_local_t** out, mzd_loca + mask_t const* mask) { + bitsliced_mm_step_1_multiple_of_128(SC_VERIFY, __m256i, mm512_and_avx, mm512_shift_left_avx, 2); + +- mzd_local_clear(view->s[0]); +- mpc_and_verify_512_avx(r0m, x0s, x1s, r2m, view, mx2, 0); +- mpc_and_verify_512_avx(r2m, x1s, x2m, r1s, view, mx2, 1); +- mpc_and_verify_512_avx(r1m, x0s, x2m, r0s, view, mx2, 2); ++ oqs_sig_picnic_mzd_local_clear(view->s[0]); ++ oqs_sig_picnic_mpc_and_verify_512_avx(r0m, x0s, x1s, r2m, view, mx2, 0); ++ oqs_sig_picnic_mpc_and_verify_512_avx(r2m, x1s, x2m, r1s, view, mx2, 1); ++ oqs_sig_picnic_mpc_and_verify_512_avx(r1m, x0s, x2m, r0s, view, mx2, 2); + + bitsliced_mm_step_2_multiple_of_128(SC_VERIFY, __m256i, mm512_and_avx, mm512_xor_avx, + mm512_shift_right_avx, 2); +@@ -518,10 +518,10 @@ static void _mpc_sbox_layer_bitsliced_128_neon(mzd_local_t** out, mzd_local_t* c + mask_t const* mask) { + bitsliced_mm_step_1(SC_PROOF, uint32x4_t, vandq_u32, mm128_shift_left); + +- mpc_clear(view->s, SC_PROOF); +- mpc_and_neon(r0m, x0s, x1s, r2m, view, 0); +- mpc_and_neon(r2m, x1s, x2m, r1s, view, 1); +- mpc_and_neon(r1m, x0s, x2m, r0s, view, 2); ++ oqs_sig_picnic_mpc_clear(view->s, SC_PROOF); ++ oqs_sig_picnic_mpc_and_neon(r0m, x0s, x1s, r2m, view, 0); ++ oqs_sig_picnic_mpc_and_neon(r2m, x1s, x2m, r1s, view, 1); ++ oqs_sig_picnic_mpc_and_neon(r1m, x0s, x2m, r0s, view, 2); + + bitsliced_mm_step_2(SC_PROOF, uint32x4_t, vandq_u32, veorq_u32, mm128_shift_right); + } +@@ -531,10 +531,10 @@ static void _mpc_sbox_layer_bitsliced_verify_128_neon(mzd_local_t** out, mzd_loc + mask_t const* mask) { + bitsliced_mm_step_1(SC_VERIFY, uint32x4_t, vandq_u32, mm128_shift_left); + +- mzd_local_clear(view->s[0]); +- mpc_and_verify_neon(r0m, x0s, x1s, r2m, view, mx2, 0); +- mpc_and_verify_neon(r2m, x1s, x2m, r1s, view, mx2, 1); +- mpc_and_verify_neon(r1m, x0s, x2m, r0s, view, mx2, 2); ++ oqs_sig_picnic_mzd_local_clear(view->s[0]); ++ oqs_sig_picnic_mpc_and_verify_neon(r0m, x0s, x1s, r2m, view, mx2, 0); ++ oqs_sig_picnic_mpc_and_verify_neon(r2m, x1s, x2m, r1s, view, mx2, 1); ++ oqs_sig_picnic_mpc_and_verify_neon(r1m, x0s, x2m, r0s, view, mx2, 2); + + bitsliced_mm_step_2(SC_VERIFY, uint32x4_t, vandq_u32, veorq_u32, mm128_shift_right); + } +@@ -544,10 +544,10 @@ static void _mpc_sbox_layer_bitsliced_256_neon(mzd_local_t** out, mzd_local_t* c + mask_t const* mask) { + bitsliced_mm_step_1_multiple_of_128(SC_PROOF, uint32x4_t, mm256_and, mm256_shift_left, 2); + +- mpc_clear(view->s, SC_PROOF); +- mpc_and_256_neon(r0m, x0s, x1s, r2m, view, 0); +- mpc_and_256_neon(r2m, x1s, x2m, r1s, view, 1); +- mpc_and_256_neon(r1m, x0s, x2m, r0s, view, 2); ++ oqs_sig_picnic_mpc_clear(view->s, SC_PROOF); ++ oqs_sig_picnic_mpc_and_256_neon(r0m, x0s, x1s, r2m, view, 0); ++ oqs_sig_picnic_mpc_and_256_neon(r2m, x1s, x2m, r1s, view, 1); ++ oqs_sig_picnic_mpc_and_256_neon(r1m, x0s, x2m, r0s, view, 2); + + bitsliced_mm_step_2_multiple_of_128(SC_PROOF, uint32x4_t, mm256_and, mm256_xor, mm256_shift_right, + 2); +@@ -558,10 +558,10 @@ static void _mpc_sbox_layer_bitsliced_verify_256_neon(mzd_local_t** out, mzd_loc + mask_t const* mask) { + bitsliced_mm_step_1_multiple_of_128(SC_VERIFY, uint32x4_t, mm256_and, mm256_shift_left, 2); + +- mzd_local_clear(view->s[0]); +- mpc_and_verify_256_neon(r0m, x0s, x1s, r2m, view, mx2, 0); +- mpc_and_verify_256_neon(r2m, x1s, x2m, r1s, view, mx2, 1); +- mpc_and_verify_256_neon(r1m, x0s, x2m, r0s, view, mx2, 2); ++ oqs_sig_picnic_mzd_local_clear(view->s[0]); ++ oqs_sig_picnic_mpc_and_verify_256_neon(r0m, x0s, x1s, r2m, view, mx2, 0); ++ oqs_sig_picnic_mpc_and_verify_256_neon(r2m, x1s, x2m, r1s, view, mx2, 1); ++ oqs_sig_picnic_mpc_and_verify_256_neon(r1m, x0s, x2m, r0s, view, mx2, 2); + + bitsliced_mm_step_2_multiple_of_128(SC_VERIFY, uint32x4_t, mm256_and, mm256_xor, + mm256_shift_right, 2); +@@ -572,10 +572,10 @@ static void _mpc_sbox_layer_bitsliced_384_neon(mzd_local_t** out, mzd_local_t* c + mask_t const* mask) { + bitsliced_mm_step_1_multiple_of_128(SC_PROOF, uint32x4_t, mm384_and, mm384_shift_left, 3); + +- mpc_clear(view->s, SC_PROOF); +- mpc_and_384_neon(r0m, x0s, x1s, r2m, view, 0); +- mpc_and_384_neon(r2m, x1s, x2m, r1s, view, 1); +- mpc_and_384_neon(r1m, x0s, x2m, r0s, view, 2); ++ oqs_sig_picnic_mpc_clear(view->s, SC_PROOF); ++ oqs_sig_picnic_mpc_and_384_neon(r0m, x0s, x1s, r2m, view, 0); ++ oqs_sig_picnic_mpc_and_384_neon(r2m, x1s, x2m, r1s, view, 1); ++ oqs_sig_picnic_mpc_and_384_neon(r1m, x0s, x2m, r0s, view, 2); + + bitsliced_mm_step_2_multiple_of_128(SC_PROOF, uint32x4_t, mm384_and, mm384_xor, mm384_shift_right, + 3); +@@ -586,10 +586,10 @@ static void _mpc_sbox_layer_bitsliced_verify_384_neon(mzd_local_t** out, mzd_loc + mask_t const* mask) { + bitsliced_mm_step_1_multiple_of_128(SC_VERIFY, uint32x4_t, mm384_and, mm384_shift_left, 3); + +- mzd_local_clear(view->s[0]); +- mpc_and_verify_384_neon(r0m, x0s, x1s, r2m, view, mx2, 0); +- mpc_and_verify_384_neon(r2m, x1s, x2m, r1s, view, mx2, 1); +- mpc_and_verify_384_neon(r1m, x0s, x2m, r0s, view, mx2, 2); ++ oqs_sig_picnic_mzd_local_clear(view->s[0]); ++ oqs_sig_picnic_mpc_and_verify_384_neon(r0m, x0s, x1s, r2m, view, mx2, 0); ++ oqs_sig_picnic_mpc_and_verify_384_neon(r2m, x1s, x2m, r1s, view, mx2, 1); ++ oqs_sig_picnic_mpc_and_verify_384_neon(r1m, x0s, x2m, r0s, view, mx2, 2); + + bitsliced_mm_step_2_multiple_of_128(SC_VERIFY, uint32x4_t, mm384_and, mm384_xor, + mm384_shift_right, 3); +@@ -601,10 +601,10 @@ static void _mpc_sbox_layer_bitsliced_512_neon(mzd_local_t** out, mzd_local_t* c + mask_t const* mask) { + bitsliced_mm_step_1_multiple_of_128(SC_PROOF, uint32x4_t, mm512_and, mm512_shift_left, 4); + +- mpc_clear(view->s, SC_PROOF); +- mpc_and_512_neon(r0m, x0s, x1s, r2m, view, 0); +- mpc_and_512_neon(r2m, x1s, x2m, r1s, view, 1); +- mpc_and_512_neon(r1m, x0s, x2m, r0s, view, 2); ++ oqs_sig_picnic_mpc_clear(view->s, SC_PROOF); ++ oqs_sig_picnic_mpc_and_512_neon(r0m, x0s, x1s, r2m, view, 0); ++ oqs_sig_picnic_mpc_and_512_neon(r2m, x1s, x2m, r1s, view, 1); ++ oqs_sig_picnic_mpc_and_512_neon(r1m, x0s, x2m, r0s, view, 2); + + bitsliced_mm_step_2_multiple_of_128(SC_PROOF, uint32x4_t, mm512_and, mm512_xor, mm512_shift_right, + 4); +@@ -615,10 +615,10 @@ static void _mpc_sbox_layer_bitsliced_verify_512_neon(mzd_local_t** out, mzd_loc + mask_t const* mask) { + bitsliced_mm_step_1_multiple_of_128(SC_VERIFY, uint32x4_t, mm512_and, mm512_shift_left, 4); + +- mzd_local_clear(view->s[0]); +- mpc_and_verify_512_neon(r0m, x0s, x1s, r2m, view, mx2, 0); +- mpc_and_verify_512_neon(r2m, x1s, x2m, r1s, view, mx2, 1); +- mpc_and_verify_512_neon(r1m, x0s, x2m, r0s, view, mx2, 2); ++ oqs_sig_picnic_mzd_local_clear(view->s[0]); ++ oqs_sig_picnic_mpc_and_verify_512_neon(r0m, x0s, x1s, r2m, view, mx2, 0); ++ oqs_sig_picnic_mpc_and_verify_512_neon(r2m, x1s, x2m, r1s, view, mx2, 1); ++ oqs_sig_picnic_mpc_and_verify_512_neon(r1m, x0s, x2m, r0s, view, mx2, 2); + + bitsliced_mm_step_2_multiple_of_128(SC_VERIFY, uint32x4_t, mm512_and, mm512_xor, + mm512_shift_right, 4); +@@ -700,7 +700,7 @@ static void _mpc_sbox_layer_bitsliced_verify_512_neon(mzd_local_t** out, mzd_loc + #define loop_optimize(sbox_args, sbox, sbox_selector, no_scr, no_scr_active, const_mat_mul_func, \ + add_func, mul_more_cols, const_addmat_mul_func, ch, shares) \ + mzd_local_t* nl_part[shares]; \ +- mzd_local_init_multiple_ex(nl_part, shares, 1, lowmc->r * 32, false); \ ++ oqs_sig_picnic_mzd_local_init_multiple_ex(nl_part, shares, 1, lowmc->r * 32, false); \ + MPC_LOOP(mul_more_cols, nl_part, lowmc_key, lowmc->precomputed_non_linear_part_##no_scr, \ + shares); \ + word mask = 0x00000000FFFFFFFF; \ +@@ -715,7 +715,7 @@ static void _mpc_sbox_layer_bitsliced_verify_512_neon(mzd_local_t** out, mzd_loc + MPC_LOOP(const_mat_mul_func, x, y, round->l_##no_scr, shares); \ + MPC_IF_ELSE(add_func, x, x, round->constant, shares, ch); \ + } \ +- mzd_local_free_multiple(nl_part); ++ oqs_sig_picnic_mzd_local_free_multiple(nl_part); + + #define loop(sbox_args, sbox, sbox_selector, no_scr, no_scr_active, const_mat_mul_func, add_func, \ + mul_more_cols, const_addmat_mul_func, ch, shares) \ +@@ -739,12 +739,12 @@ static void _mpc_sbox_layer_bitsliced_verify_512_neon(mzd_local_t** out, mzd_loc + #define _mpc_lowmc_call_bitsliced(ch, sbox_args, sbox, sbox_selector, no_scr, no_scr_active, \ + optimize, const_mat_mul_func, add_func, mul_more_cols, \ + const_addmat_mul_func) \ +- mpc_copy(in_out_shares->s, lowmc_key, SC_PROOF); \ ++ oqs_sig_picnic_mpc_copy(in_out_shares->s, lowmc_key, SC_PROOF); \ + ++in_out_shares; \ + VARS_##sbox_args(SC_PROOF); \ + mzd_local_t** x = in_out_shares->s; \ + mzd_local_t* y[SC_PROOF]; \ +- mzd_local_init_multiple_ex(y, SC_PROOF, 1, lowmc->n, false); \ ++ oqs_sig_picnic_mzd_local_init_multiple_ex(y, SC_PROOF, 1, lowmc->n, false); \ + \ + MPC_LOOP(const_mat_mul_func, x, lowmc_key, lowmc->k0_##no_scr, SC_PROOF); \ + MPC_IF_ELSE(add_func, x, x, p, SC_PROOF, ch); \ +@@ -754,7 +754,7 @@ static void _mpc_sbox_layer_bitsliced_verify_512_neon(mzd_local_t** out, mzd_loc + loop##optimize(sbox_args, sbox, sbox_selector, no_scr, no_scr_active, const_mat_mul_func, \ + add_func, mul_more_cols, const_addmat_mul_func, ch, SC_PROOF) \ + VARS_FREE_##sbox_args; \ +- mzd_local_free_multiple(y); ++ oqs_sig_picnic_mzd_local_free_multiple(y); + + #define init_key mzd_local_t* const* lowmc_key = &in_out_shares->s[0]; + +@@ -767,7 +767,7 @@ static void _mpc_sbox_layer_bitsliced_verify_512_neon(mzd_local_t** out, mzd_loc + VARS_##sbox_args(SC_VERIFY); \ + mzd_local_t* x[2 * SC_VERIFY]; \ + mzd_local_t** y = &x[SC_VERIFY]; \ +- mzd_local_init_multiple_ex(x, 2 * SC_VERIFY, 1, lowmc->n, false); \ ++ oqs_sig_picnic_mzd_local_init_multiple_ex(x, 2 * SC_VERIFY, 1, lowmc->n, false); \ + \ + MPC_LOOP(const_mat_mul_func, x, lowmc_key, lowmc->k0_##no_scr, SC_VERIFY); \ + MPC_IF_ELSE(add_func, x, x, p, SC_VERIFY, ch); \ +@@ -776,8 +776,8 @@ static void _mpc_sbox_layer_bitsliced_verify_512_neon(mzd_local_t** out, mzd_loc + \ + loop##optimize(sbox_args, sbox, sbox_selector, no_scr, no_scr_active, const_mat_mul_func, \ + add_func, mul_more_cols, const_addmat_mul_func, ch, SC_VERIFY); \ +- mpc_copy(in_out_shares->s, x, SC_VERIFY); \ +- mzd_local_free_multiple(x); \ ++ oqs_sig_picnic_mpc_copy(in_out_shares->s, x, SC_VERIFY); \ ++ oqs_sig_picnic_mzd_local_free_multiple(x); \ + VARS_FREE_##sbox_args; + + static void mpc_lowmc_call(lowmc_t const* lowmc, mpc_lowmc_key_t* lowmc_key, mzd_local_t const* p, +@@ -787,25 +787,25 @@ static void mpc_lowmc_call(lowmc_t const* lowmc, mpc_lowmc_key_t* lowmc_key, mzd + #ifdef WITH_CUSTOM_INSTANCES + if (lowmc->m != 10) { + _mpc_lowmc_call_bitsliced(0, 6, _mpc_sbox_layer_bitsliced, mzd, lookup, noscr, _optimize, +- mzd_mul_vl_general, mzd_xor_general, mzd_mul_vl_general, ++ oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general, + mzd_addmul_vl_general); + } else + #endif + { +- _mpc_lowmc_call_bitsliced(0, 6, , uint64, lookup, noscr, _optimize, mzd_mul_vl_general, +- mzd_xor_general, mzd_mul_vl_general, mzd_addmul_vl_general); ++ _mpc_lowmc_call_bitsliced(0, 6, , uint64, lookup, noscr, _optimize, oqs_sig_picnic_mzd_mul_vl_general, ++ oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general, mzd_addmul_vl_general); + } + #else + #ifdef WITH_CUSTOM_INSTANCES + if (lowmc->m != 10) { + _mpc_lowmc_call_bitsliced(0, 6, _mpc_sbox_layer_bitsliced, mzd, matrix, scr, _optimize, +- mzd_mul_v_general, mzd_xor_general, mzd_mul_v_general, +- mzd_addmul_v_general); ++ oqs_sig_picnic_mzd_mul_v_general, oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_v_general, ++ oqs_sig_picnic_mzd_addmul_v_general); + } else + #endif + { +- _mpc_lowmc_call_bitsliced(0, 6, , uint64, matrix, scr, _optimize, mzd_mul_v_general, +- mzd_xor_general, mzd_mul_v_general, mzd_addmul_v_general); ++ _mpc_lowmc_call_bitsliced(0, 6, , uint64, matrix, scr, _optimize, oqs_sig_picnic_mzd_mul_v_general, ++ oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_v_general, oqs_sig_picnic_mzd_addmul_v_general); + } + #endif + #else +@@ -813,23 +813,23 @@ static void mpc_lowmc_call(lowmc_t const* lowmc, mpc_lowmc_key_t* lowmc_key, mzd + #ifdef WITH_CUSTOM_INSTANCES + if (lowmc->m != 10) { + _mpc_lowmc_call_bitsliced(0, 6, _mpc_sbox_layer_bitsliced, mzd, lookup, noscr, , +- mzd_mul_vl_general, mzd_xor_general, , mzd_addmul_vl_general); ++ oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general, , mzd_addmul_vl_general); + } else + #endif + { +- _mpc_lowmc_call_bitsliced(0, 6, , uint64, lookup, noscr, , mzd_mul_vl_general, mzd_xor_general, ++ _mpc_lowmc_call_bitsliced(0, 6, , uint64, lookup, noscr, , oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general, + , mzd_addmul_vl_general); + } + #else + #ifdef WITH_CUSTOM_INSTANCES + if (lowmc->m != 10) { + _mpc_lowmc_call_bitsliced(0, 6, _mpc_sbox_layer_bitsliced, mzd, matrix, scr, , +- mzd_mul_v_general, mzd_xor_general, , mzd_addmul_v_general); ++ oqs_sig_picnic_mzd_mul_v_general, oqs_sig_picnic_mzd_mul_vl_general, , oqs_sig_picnic_mzd_addmul_v_general); + } else + #endif + { +- _mpc_lowmc_call_bitsliced(0, 6, , uint64, matrix, scr, , mzd_mul_v_general, mzd_xor_general, , +- mzd_addmul_v_general); ++ _mpc_lowmc_call_bitsliced(0, 6, , uint64, matrix, scr, , oqs_sig_picnic_mzd_mul_v_general, oqs_sig_picnic_mzd_mul_vl_general, , ++ oqs_sig_picnic_mzd_addmul_v_general); + } + #endif + #endif +@@ -842,26 +842,26 @@ static void mpc_lowmc_call_verify(lowmc_t const* lowmc, mzd_local_t const* p, vi + #ifdef WITH_CUSTOM_INSTANCES + if (lowmc->m != 10) { + _mpc_lowmc_call_bitsliced_verify_m(ch, 6, _mpc_sbox_layer_bitsliced_verify, mzd, lookup, noscr, +- _optimize, mzd_mul_vl_general, mzd_xor_general, +- mzd_mul_vl_general, mzd_addmul_vl_general); ++ _optimize, oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general, ++ oqs_sig_picnic_mzd_mul_vl_general, mzd_addmul_vl_general); + } else + #endif + { + _mpc_lowmc_call_bitsliced_verify_m(ch, 6, , uint64, lookup, noscr, _optimize, +- mzd_mul_vl_general, mzd_xor_general, mzd_mul_vl_general, ++ oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general, + mzd_addmul_vl_general); + } + #else + #ifdef WITH_CUSTOM_INSTANCES + if (lowmc->m != 10) { + _mpc_lowmc_call_bitsliced_verify_m(ch, 6, _mpc_sbox_layer_bitsliced_verify, mzd, matrix, scr, +- _optimize, mzd_mul_v_general, mzd_xor_general, +- mzd_mul_v_general, mzd_addmul_v_general); ++ _optimize, oqs_sig_picnic_mzd_mul_v_general, oqs_sig_picnic_mzd_mul_vl_general, ++ oqs_sig_picnic_mzd_mul_v_general, oqs_sig_picnic_mzd_addmul_v_general); + } else + #endif + { +- _mpc_lowmc_call_bitsliced_verify_m(ch, 6, , uint64, matrix, scr, _optimize, mzd_mul_v_general, +- mzd_xor_general, mzd_mul_v_general, mzd_addmul_v_general); ++ _mpc_lowmc_call_bitsliced_verify_m(ch, 6, , uint64, matrix, scr, _optimize, oqs_sig_picnic_mzd_mul_v_general, ++ oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_v_general, oqs_sig_picnic_mzd_addmul_v_general); + } + #endif + #else +@@ -869,24 +869,24 @@ static void mpc_lowmc_call_verify(lowmc_t const* lowmc, mzd_local_t const* p, vi + #ifdef WITH_CUSTOM_INSTANCES + if (lowmc->m != 10) { + _mpc_lowmc_call_bitsliced_verify_m(ch, 6, _mpc_sbox_layer_bitsliced_verify, mzd, lookup, noscr, +- , mzd_mul_vl_general, mzd_xor_general, , ++ , oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general, , + mzd_addmul_vl_general); + } else + #endif + { +- _mpc_lowmc_call_bitsliced_verify_m(ch, 6, , uint64, lookup, noscr, , mzd_mul_vl_general, +- mzd_xor_general, , mzd_addmul_vl_general); ++ _mpc_lowmc_call_bitsliced_verify_m(ch, 6, , uint64, lookup, noscr, , oqs_sig_picnic_mzd_mul_vl_general, ++ oqs_sig_picnic_mzd_mul_vl_general, , mzd_addmul_vl_general); + } + #else + #ifdef WITH_CUSTOM_INSTANCES + if (lowmc->m != 10) { + _mpc_lowmc_call_bitsliced_verify_m(ch, 6, _mpc_sbox_layer_bitsliced_verify, mzd, matrix, scr, , +- mzd_mul_v_general, mzd_xor_general, , mzd_addmul_v_general); ++ oqs_sig_picnic_mzd_mul_v_general, oqs_sig_picnic_mzd_mul_vl_general, , oqs_sig_picnic_mzd_addmul_v_general); + } else + #endif + { +- _mpc_lowmc_call_bitsliced_verify_m(ch, 6, , uint64, matrix, scr, , mzd_mul_v_general, +- mzd_xor_general, , mzd_addmul_v_general); ++ _mpc_lowmc_call_bitsliced_verify_m(ch, 6, , uint64, matrix, scr, , oqs_sig_picnic_mzd_mul_v_general, ++ mzd_xor_general, , oqs_sig_picnic_mzd_addmul_v_general); + } + #endif + #endif +@@ -1026,67 +1026,67 @@ static void mpc_lowmc_call_verify(lowmc_t const* lowmc, mzd_local_t const* p, vi + #ifdef WITH_SSE2 + mpc_lowmc_call_def(mpc_lowmc_call_128_sse, mpc_lowmc_call_verify_128_sse, + _mpc_sbox_layer_bitsliced_128_sse, _mpc_sbox_layer_bitsliced_verify_128_sse, +- mzd_mul_v_sse, mzd_mul_vl_sse_128, mzd_xor_sse, mzd_xor_sse, mzd_mul_v_sse, +- mzd_mul_vl_sse, mzd_addmul_v_sse, mzd_addmul_vl_sse_128); ++ oqs_sig_picnic_mzd_mul_v_sse, oqs_sig_picnic_mzd_mul_vl_sse_128, oqs_sig_picnic_mzd_xor_sse, oqs_sig_picnic_mzd_xor_sse, oqs_sig_picnic_mzd_mul_v_sse, ++ oqs_sig_picnic_mzd_mul_vl_sse, oqs_sig_picnic_mzd_addmul_v_sse, oqs_sig_picnic_mzd_addmul_vl_sse_128); + mpc_lowmc_call_def(mpc_lowmc_call_256_sse, mpc_lowmc_call_verify_256_sse, + _mpc_sbox_layer_bitsliced_256_sse, _mpc_sbox_layer_bitsliced_verify_256_sse, +- mzd_mul_v_sse, mzd_mul_vl_sse, mzd_xor_sse, mzd_xor_sse, mzd_mul_v_sse, +- mzd_mul_vl_sse, mzd_addmul_v_sse, mzd_addmul_vl_sse); ++ oqs_sig_picnic_mzd_mul_v_sse, oqs_sig_picnic_mzd_mul_vl_sse, oqs_sig_picnic_mzd_xor_sse, oqs_sig_picnic_mzd_xor_sse, oqs_sig_picnic_mzd_mul_v_sse, ++ oqs_sig_picnic_mzd_mul_vl_sse, oqs_sig_picnic_mzd_addmul_v_sse, oqs_sig_picnic_mzd_addmul_vl_sse); + #ifdef WITH_CUSTOM_INSTANCES + mpc_lowmc_call_def(mpc_lowmc_call_384_sse, mpc_lowmc_call_verify_384_sse, + _mpc_sbox_layer_bitsliced_384_sse, _mpc_sbox_layer_bitsliced_verify_384_sse, +- mzd_mul_v_sse, mzd_mul_vl_sse, mzd_xor_sse, mzd_xor_sse, mzd_mul_v_sse, +- mzd_mul_vl_sse, mzd_addmul_v_sse, mzd_addmul_vl_sse); ++ oqs_sig_picnic_mzd_mul_v_sse, oqs_sig_picnic_mzd_mul_vl_sse, oqs_sig_picnic_mzd_xor_sse, oqs_sig_picnic_mzd_xor_sse, oqs_sig_picnic_mzd_mul_v_sse, ++ oqs_sig_picnic_mzd_mul_vl_sse, oqs_sig_picnic_mzd_addmul_v_sse, oqs_sig_picnic_mzd_addmul_vl_sse); + mpc_lowmc_call_def(mpc_lowmc_call_512_sse, mpc_lowmc_call_verify_512_sse, + _mpc_sbox_layer_bitsliced_512_sse, _mpc_sbox_layer_bitsliced_verify_512_sse, +- mzd_mul_v_sse, mzd_mul_vl_sse, mzd_xor_sse, mzd_xor_sse, mzd_mul_v_sse, +- mzd_mul_vl_sse, mzd_addmul_v_sse, mzd_addmul_vl_sse); ++ oqs_sig_picnic_mzd_mul_v_sse, oqs_sig_picnic_mzd_mul_vl_sse, oqs_sig_picnic_mzd_xor_sse, oqs_sig_picnic_mzd_xor_sse, oqs_sig_picnic_mzd_mul_v_sse, ++ oqs_sig_picnic_mzd_mul_vl_sse, oqs_sig_picnic_mzd_addmul_v_sse, oqs_sig_picnic_mzd_addmul_vl_sse); + #endif + #endif + #ifdef WITH_AVX2 + mpc_lowmc_call_def(mpc_lowmc_call_256_avx, mpc_lowmc_call_verify_256_avx, + _mpc_sbox_layer_bitsliced_256_avx, _mpc_sbox_layer_bitsliced_verify_256_avx, +- mzd_mul_v_avx, mzd_mul_vl_avx_256, mzd_xor_avx, mzd_xor_avx, mzd_mul_v_avx, +- mzd_mul_vl_avx, mzd_addmul_v_avx, mzd_addmul_vl_avx_256); ++ oqs_sig_picnic_mzd_mul_v_avx, oqs_sig_picnic_mzd_mul_vl_avx_256, oqs_sig_picnic_mzd_xor_avx, oqs_sig_picnic_mzd_xor_avx, oqs_sig_picnic_mzd_mul_v_avx, ++ oqs_sig_picnic_mzd_mul_vl_avx, oqs_sig_picnic_mzd_addmul_v_avx, oqs_sig_picnic_mzd_addmul_vl_avx_256); + #ifdef WITH_CUSTOM_INSTANCES + mpc_lowmc_call_def(mpc_lowmc_call_384_avx, mpc_lowmc_call_verify_384_avx, + _mpc_sbox_layer_bitsliced_512_avx, _mpc_sbox_layer_bitsliced_verify_512_avx, +- mzd_mul_v_avx, mzd_mul_vl_avx, mzd_xor_avx, mzd_xor_avx, mzd_mul_v_avx, +- mzd_mul_vl_avx, mzd_addmul_v_avx, mzd_addmul_vl_avx); ++ oqs_sig_picnic_mzd_mul_v_avx, oqs_sig_picnic_mzd_mul_vl_avx, oqs_sig_picnic_mzd_xor_avx, oqs_sig_picnic_mzd_xor_avx, oqs_sig_picnic_mzd_mul_v_avx, ++ oqs_sig_picnic_mzd_mul_vl_avx, oqs_sig_picnic_mzd_addmul_v_avx, oqs_sig_picnic_mzd_addmul_vl_avx); + mpc_lowmc_call_def(mpc_lowmc_call_512_avx, mpc_lowmc_call_verify_512_avx, + _mpc_sbox_layer_bitsliced_512_avx, _mpc_sbox_layer_bitsliced_verify_512_avx, +- mzd_mul_v_avx, mzd_mul_vl_avx, mzd_xor_avx, mzd_xor_avx, mzd_mul_v_avx, +- mzd_mul_vl_avx, mzd_addmul_v_avx, mzd_addmul_vl_avx); ++ oqs_sig_picnic_mzd_mul_v_avx, oqs_sig_picnic_mzd_mul_vl_avx, oqs_sig_picnic_mzd_xor_avx, oqs_sig_picnic_mzd_xor_avx, oqs_sig_picnic_mzd_mul_v_avx, ++ oqs_sig_picnic_mzd_mul_vl_avx, oqs_sig_picnic_mzd_addmul_v_avx, oqs_sig_picnic_mzd_addmul_vl_avx); + #endif + #endif + #ifdef WITH_NEON + mpc_lowmc_call_def(mpc_lowmc_call_128_neon, mpc_lowmc_call_verify_128_neon, + _mpc_sbox_layer_bitsliced_128_neon, _mpc_sbox_layer_bitsliced_verify_128_neon, +- mzd_mul_v_neon, mzd_mul_vl_neon_128, mzd_xor_neon, mzd_xor_neon, mzd_mul_v_neon, +- mzd_mul_vl_neon_multiple_of_128, mzd_addmul_v_neon, mzd_addmul_vl_neon_128); ++ oqs_sig_picnic_mzd_mul_v_neon, oqs_sig_picnic_mzd_mul_vl_neon_128, oqs_sig_picnic_mzd_xor_neon, oqs_sig_picnic_mzd_xor_neon, oqs_sig_picnic_mzd_mul_v_neon, ++ oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128, oqs_sig_picnic_mzd_addmul_v_neon, oqs_sig_picnic_mzd_addmul_vl_neon_128); + mpc_lowmc_call_def(mpc_lowmc_call_256_neon, mpc_lowmc_call_verify_256_neon, + _mpc_sbox_layer_bitsliced_256_neon, _mpc_sbox_layer_bitsliced_verify_256_neon, +- mzd_mul_v_neon, mzd_mul_vl_neon_multiple_of_128, mzd_xor_neon, mzd_xor_neon, +- mzd_mul_v_neon, mzd_mul_vl_neon_multiple_of_128, mzd_addmul_v_neon, +- mzd_addmul_vl_neon); ++ oqs_sig_picnic_mzd_mul_v_neon, oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128, oqs_sig_picnic_mzd_xor_neon, oqs_sig_picnic_mzd_xor_neon, ++ oqs_sig_picnic_mzd_mul_v_neon, oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128, oqs_sig_picnic_mzd_addmul_v_neon, ++ oqs_sig_picnic_mzd_addmul_vl_neon); + #ifdef WITH_CUSTOM_INSTANCES + mpc_lowmc_call_def(mpc_lowmc_call_384_neon, mpc_lowmc_call_verify_384_neon, + _mpc_sbox_layer_bitsliced_384_neon, _mpc_sbox_layer_bitsliced_verify_384_neon, +- mzd_mul_v_neon, mzd_mul_vl_neon_multiple_of_128, mzd_xor_neon, mzd_xor_neon, +- mzd_mul_v_neon, mzd_mul_vl_neon_multiple_of_128, mzd_addmul_v_neon, +- mzd_addmul_vl_neon); ++ oqs_sig_picnic_mzd_mul_v_neon, oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128, oqs_sig_picnic_mzd_xor_neon, oqs_sig_picnic_mzd_xor_neon, ++ oqs_sig_picnic_mzd_mul_v_neon, oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128, oqs_sig_picnic_mzd_addmul_v_neon, ++ oqs_sig_picnic_mzd_addmul_vl_neon); + mpc_lowmc_call_def(mpc_lowmc_call_512_neon, mpc_lowmc_call_verify_512_neon, + _mpc_sbox_layer_bitsliced_512_neon, _mpc_sbox_layer_bitsliced_verify_512_neon, +- mzd_mul_v_neon, mzd_mul_vl_neon_multiple_of_128, mzd_xor_neon, mzd_xor_neon, +- mzd_mul_v_neon, mzd_mul_vl_neon_multiple_of_128, mzd_addmul_v_neon, +- mzd_addmul_vl_neon); ++ oqs_sig_picnic_mzd_mul_v_neon, oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128, oqs_sig_picnic_mzd_xor_neon, oqs_sig_picnic_mzd_xor_neon, ++ oqs_sig_picnic_mzd_mul_v_neon, oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128, oqs_sig_picnic_mzd_addmul_v_neon, ++ oqs_sig_picnic_mzd_addmul_vl_neon); + #endif + #endif + #endif + + static void sbox_vars_clear(sbox_vars_t* vars) { + if (vars->storage) { +- mzd_local_free_multiple(vars->storage); ++ oqs_sig_picnic_mzd_local_free_multiple(vars->storage); + free(vars->storage); + memset(vars, 0, sizeof(*vars)); + } +@@ -1094,7 +1094,7 @@ static void sbox_vars_clear(sbox_vars_t* vars) { + + static sbox_vars_t* sbox_vars_init(sbox_vars_t* vars, uint32_t n, unsigned sc) { + vars->storage = calloc(11 * sc, sizeof(mzd_local_t*)); +- mzd_local_init_multiple_ex(vars->storage, 11 * sc, 1, n, false); ++ oqs_sig_picnic_mzd_local_init_multiple_ex(vars->storage, 11 * sc, 1, n, false); + + for (unsigned int i = 0; i < sc; ++i) { + vars->x0m[i] = vars->storage[11 * i + 0]; +@@ -1119,7 +1119,7 @@ static sbox_vars_t* sbox_vars_init(sbox_vars_t* vars, uint32_t n, unsigned sc) { + #define general_or_10(l, f) f##_10 + #endif + +-lowmc_implementation_f get_lowmc_implementation(const lowmc_t* lowmc) { ++lowmc_implementation_f oqs_sig_picnic_get_lowmc_implementation(const lowmc_t* lowmc) { + #ifdef WITH_OPT + #ifdef WITH_SSE2 + if (CPU_SUPPORTS_SSE2 && lowmc->n <= 128) { +@@ -1176,7 +1176,7 @@ lowmc_implementation_f get_lowmc_implementation(const lowmc_t* lowmc) { + return mpc_lowmc_call; + } + +-lowmc_verify_implementation_f get_lowmc_verify_implementation(const lowmc_t* lowmc) { ++lowmc_verify_implementation_f oqs_sig_picnic_get_lowmc_verify_implementation(const lowmc_t* lowmc) { + #ifdef WITH_OPT + #ifdef WITH_SSE2 + if (CPU_SUPPORTS_SSE2 && lowmc->n <= 128) { +diff --git a/mpc_lowmc.h b/mpc_lowmc.h +index 39f5d95..de6b38f 100644 +--- a/mpc_lowmc.h ++++ b/mpc_lowmc.h +@@ -24,7 +24,7 @@ typedef void (*lowmc_implementation_f)(lowmc_t const*, mpc_lowmc_key_t*, mzd_loc + typedef void (*lowmc_verify_implementation_f)(lowmc_t const*, mzd_local_t const*, view_t*, + in_out_shares_t*, rvec_t*, unsigned int); + +-lowmc_implementation_f get_lowmc_implementation(const lowmc_t* lowmc); +-lowmc_verify_implementation_f get_lowmc_verify_implementation(const lowmc_t* lowmc); ++lowmc_implementation_f oqs_sig_picnic_get_lowmc_implementation(const lowmc_t* lowmc); ++lowmc_verify_implementation_f oqs_sig_picnic_get_lowmc_verify_implementation(const lowmc_t* lowmc); + + #endif +diff --git a/mzd_additional.c b/mzd_additional.c +index a0e362d..91d15b8 100644 +--- a/mzd_additional.c ++++ b/mzd_additional.c +@@ -55,7 +55,7 @@ static uint32_t calculate_rowstride(uint32_t width) { + // In mzd_local_init_multiple we do the same, but store n mzd_local_t instances in one + // memory block. + +-mzd_local_t* mzd_local_init_ex(uint32_t r, uint32_t c, bool clear) { ++mzd_local_t* oqs_sig_picnic_mzd_local_init_ex(uint32_t r, uint32_t c, bool clear) { + const uint32_t width = (c + 64 - 1) / 64; + const uint32_t rowstride = calculate_rowstride(width); + +@@ -79,11 +79,11 @@ mzd_local_t* mzd_local_init_ex(uint32_t r, uint32_t c, bool clear) { + return A; + } + +-void mzd_local_free(mzd_local_t* v) { ++void oqs_sig_picnic_mzd_local_free(mzd_local_t* v) { + aligned_free(v); + } + +-void mzd_local_init_multiple_ex(mzd_local_t** dst, size_t n, uint32_t r, uint32_t c, bool clear) { ++void oqs_sig_picnic_mzd_local_init_multiple_ex(mzd_local_t** dst, size_t n, uint32_t r, uint32_t c, bool clear) { + const uint32_t width = (c + 64 - 1) / 64; + const uint32_t rowstride = calculate_rowstride(width); + +@@ -111,19 +111,19 @@ void mzd_local_init_multiple_ex(mzd_local_t** dst, size_t n, uint32_t r, uint32_ + } + } + +-void mzd_local_free_multiple(mzd_local_t** vs) { ++void oqs_sig_picnic_mzd_local_free_multiple(mzd_local_t** vs) { + if (vs) { + aligned_free(vs[0]); + } + } + +-mzd_local_t* mzd_local_copy(mzd_local_t* dst, mzd_local_t const* src) { ++mzd_local_t* oqs_sig_picnic_mzd_local_copy(mzd_local_t* dst, mzd_local_t const* src) { + if (dst == src) { + return dst; + } + + if (!dst) { +- dst = mzd_local_init(src->nrows, src->ncols); ++ dst = oqs_sig_picnic_mzd_local_init(src->nrows, src->ncols); + } + + memcpy(ASSUME_ALIGNED(FIRST_ROW(dst), 32), ASSUME_ALIGNED(CONST_FIRST_ROW(src), 32), +@@ -131,13 +131,13 @@ mzd_local_t* mzd_local_copy(mzd_local_t* dst, mzd_local_t const* src) { + return dst; + } + +-void mzd_local_clear(mzd_local_t* c) { ++void oqs_sig_picnic_mzd_local_clear(mzd_local_t* c) { + memset(ASSUME_ALIGNED(FIRST_ROW(c), 32), 0, c->nrows * sizeof(word) * c->rowstride); + } + +-void mzd_shift_right(mzd_local_t* res, mzd_local_t const* val, unsigned count) { ++void oqs_sig_picnic_mzd_shift_right(mzd_local_t* res, mzd_local_t const* val, unsigned count) { + if (!count) { +- mzd_local_copy(res, val); ++ oqs_sig_picnic_mzd_local_copy(res, val); + return; + } + +@@ -154,9 +154,9 @@ void mzd_shift_right(mzd_local_t* res, mzd_local_t const* val, unsigned count) { + *resptr = *valptr >> count; + } + +-void mzd_shift_left(mzd_local_t* res, mzd_local_t const* val, unsigned count) { ++void oqs_sig_picnic_mzd_shift_left(mzd_local_t* res, mzd_local_t const* val, unsigned count) { + if (!count) { +- mzd_local_copy(res, val); ++ oqs_sig_picnic_mzd_local_copy(res, val); + return; + } + +@@ -240,7 +240,7 @@ static inline mzd_local_t* mzd_and_neon(mzd_local_t* res, mzd_local_t const* fir + #endif + #endif + +-mzd_local_t* mzd_and(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { ++mzd_local_t* oqs_sig_picnic_mzd_and(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { + #ifdef WITH_OPT + #ifdef WITH_AVX2 + if (CPU_SUPPORTS_AVX2 && first->ncols >= 256 && ((first->ncols & (word_size_bits - 1)) == 0)) { +@@ -274,7 +274,7 @@ mzd_local_t* mzd_and(mzd_local_t* res, mzd_local_t const* first, mzd_local_t con + #ifdef WITH_OPT + #ifdef WITH_SSE2 + ATTRIBUTE_TARGET("sse2") +-mzd_local_t* mzd_xor_sse(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { ++mzd_local_t* oqs_sig_picnic_mzd_xor_sse(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { + unsigned int width = first->rowstride; + word* resptr = FIRST_ROW(res); + word const* firstptr = CONST_FIRST_ROW(first); +@@ -295,7 +295,7 @@ mzd_local_t* mzd_xor_sse(mzd_local_t* res, mzd_local_t const* first, mzd_local_t + + #ifdef WITH_AVX2 + ATTRIBUTE_TARGET("avx2") +-mzd_local_t* mzd_xor_avx(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { ++mzd_local_t* oqs_sig_picnic_mzd_xor_avx(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { + unsigned int width = first->rowstride; + word* resptr = FIRST_ROW(res); + word const* firstptr = CONST_FIRST_ROW(first); +@@ -315,7 +315,7 @@ mzd_local_t* mzd_xor_avx(mzd_local_t* res, mzd_local_t const* first, mzd_local_t + #endif + + #ifdef WITH_NEON +-inline mzd_local_t* mzd_xor_neon(mzd_local_t* res, mzd_local_t const* first, ++inline mzd_local_t* oqs_sig_picnic_mzd_xor_neon(mzd_local_t* res, mzd_local_t const* first, + mzd_local_t const* second) { + unsigned int width = first->rowstride; + word* resptr = FIRST_ROW(res); +@@ -336,28 +336,28 @@ inline mzd_local_t* mzd_xor_neon(mzd_local_t* res, mzd_local_t const* first, + #endif + #endif + +-mzd_local_t* mzd_xor(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { ++mzd_local_t* oqs_sig_picnic_mzd_xor(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) { + #ifdef WITH_OPT + #ifdef WITH_AVX2 + if (CPU_SUPPORTS_AVX2 && first->ncols >= 256 && ((first->ncols & (word_size_bits - 1)) == 0)) { +- return mzd_xor_avx(res, first, second); ++ return oqs_sig_picnic_mzd_xor_avx(res, first, second); + } + #endif + #ifdef WITH_SSE2 + if (CPU_SUPPORTS_SSE2 && ((first->ncols & (word_size_bits - 1)) == 0)) { +- return mzd_xor_sse(res, first, second); ++ return oqs_sig_picnic_mzd_xor_sse(res, first, second); + } + #endif + #ifdef WITH_NEON + if (CPU_SUPPORTS_NEON && ((first->ncols & (word_size_bits - 1)) == 0)) { +- return mzd_xor_neon(res, first, second); ++ return oqs_sig_picnic_mzd_xor_neon(res, first, second); + } + #endif + #endif +- return mzd_xor_general(res, first, second); ++ return oqs_sig_picnic_mzd_xor_general(res, first, second); + } + +-mzd_local_t* mzd_xor_general(mzd_local_t* res, mzd_local_t const* first, ++mzd_local_t* oqs_sig_picnic_mzd_xor_general(mzd_local_t* res, mzd_local_t const* first, + mzd_local_t const* second) { + unsigned int width = first->width; + word* resptr = FIRST_ROW(res); +@@ -371,37 +371,37 @@ mzd_local_t* mzd_xor_general(mzd_local_t* res, mzd_local_t const* first, + return res; + } + +-mzd_local_t* mzd_mul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) { ++mzd_local_t* oqs_sig_picnic_mzd_mul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) { + if (At->nrows != v->ncols) { + // number of columns does not match + return NULL; + } + +- mzd_local_clear(c); +- return mzd_addmul_v(c, v, At); ++ oqs_sig_picnic_mzd_local_clear(c); ++ return oqs_sig_picnic_mzd_addmul_v(c, v, At); + } + +-mzd_local_t* mzd_mul_v_general(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) { ++mzd_local_t* oqs_sig_picnic_mzd_mul_v_general(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) { + + if (At->nrows != v->ncols) { + // number of columns does not match + return NULL; + } + +- mzd_local_clear(c); +- return mzd_addmul_v_general(c, v, At); ++ oqs_sig_picnic_mzd_local_clear(c); ++ return oqs_sig_picnic_mzd_addmul_v_general(c, v, At); + } + + #ifdef WITH_OPT + #ifdef WITH_SSE2 + ATTRIBUTE_TARGET("sse2") +-mzd_local_t* mzd_mul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { +- mzd_local_clear(c); +- return mzd_addmul_v_sse(c, v, A); ++mzd_local_t* oqs_sig_picnic_mzd_mul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++ oqs_sig_picnic_mzd_local_clear(c); ++ return oqs_sig_picnic_mzd_addmul_v_sse(c, v, A); + } + + ATTRIBUTE_TARGET("sse2") +-mzd_local_t* mzd_addmul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++mzd_local_t* oqs_sig_picnic_mzd_addmul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + word* cptr = FIRST_ROW(c); + word const* vptr = CONST_FIRST_ROW(v); + const unsigned int width = v->width; +@@ -428,13 +428,13 @@ mzd_local_t* mzd_addmul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t + + #ifdef WITH_AVX2 + ATTRIBUTE_TARGET("avx2") +-mzd_local_t* mzd_mul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { +- mzd_local_clear(c); +- return mzd_addmul_v_avx(c, v, A); ++mzd_local_t* oqs_sig_picnic_mzd_mul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++ oqs_sig_picnic_mzd_local_clear(c); ++ return oqs_sig_picnic_mzd_addmul_v_avx(c, v, A); + } + + ATTRIBUTE_TARGET("avx2") +-mzd_local_t* mzd_addmul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++mzd_local_t* oqs_sig_picnic_mzd_addmul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + word* cptr = FIRST_ROW(c); + word const* vptr = CONST_FIRST_ROW(v); + const unsigned int width = v->width; +@@ -460,12 +460,12 @@ mzd_local_t* mzd_addmul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t + #endif + + #ifdef WITH_NEON +-mzd_local_t* mzd_mul_v_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { +- mzd_local_clear(c); +- return mzd_addmul_v_neon(c, v, A); ++mzd_local_t* oqs_sig_picnic_mzd_mul_v_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++ oqs_sig_picnic_mzd_local_clear(c); ++ return oqs_sig_picnic_mzd_addmul_v_neon(c, v, A); + } + +-inline mzd_local_t* mzd_addmul_v_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++inline mzd_local_t* oqs_sig_picnic_mzd_addmul_v_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + word* cptr = FIRST_ROW(c); + word const* vptr = CONST_FIRST_ROW(v); + const unsigned int width = v->width; +@@ -491,7 +491,7 @@ inline mzd_local_t* mzd_addmul_v_neon(mzd_local_t* c, mzd_local_t const* v, mzd_ + #endif + #endif + +-mzd_local_t* mzd_addmul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++mzd_local_t* oqs_sig_picnic_mzd_addmul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + if (A->ncols != c->ncols || A->nrows != v->ncols) { + // number of columns does not match + return NULL; +@@ -501,26 +501,26 @@ mzd_local_t* mzd_addmul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t cons + if (A->nrows % (sizeof(word) * 8) == 0) { + #ifdef WITH_AVX2 + if (CPU_SUPPORTS_AVX2 && (A->ncols & 0xff) == 0) { +- return mzd_addmul_v_avx(c, v, A); ++ return oqs_sig_picnic_mzd_addmul_v_avx(c, v, A); + } + #endif + #ifdef WITH_SSE2 + if (CPU_SUPPORTS_SSE2 && (A->ncols & 0x7f) == 0) { +- return mzd_addmul_v_sse(c, v, A); ++ return oqs_sig_picnic_mzd_addmul_v_sse(c, v, A); + } + #endif + #ifdef WITH_NEON + if (CPU_SUPPORTS_NEON && (A->ncols & 0x7f) == 0) { +- return mzd_addmul_v_neon(c, v, A); ++ return oqs_sig_picnic_mzd_addmul_v_neon(c, v, A); + } + #endif + } + #endif + +- return mzd_addmul_v_general(c, v, A); ++ return oqs_sig_picnic_mzd_addmul_v_general(c, v, A); + } + +-mzd_local_t* mzd_addmul_v_general(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++mzd_local_t* oqs_sig_picnic_mzd_addmul_v_general(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + + const unsigned int len = A->width; + const unsigned int rowstride = A->rowstride; +@@ -547,7 +547,7 @@ mzd_local_t* mzd_addmul_v_general(mzd_local_t* c, mzd_local_t const* v, mzd_loca + return c; + } + +-bool mzd_local_equal(mzd_local_t const* first, mzd_local_t const* second) { ++bool oqs_sig_picnic_mzd_local_equal(mzd_local_t const* first, mzd_local_t const* second) { + if (first == second) { + return true; + } +@@ -587,8 +587,8 @@ static void xor_comb(const unsigned int len, word* Brow, mzd_local_t const* A, + * Pre-compute matrices for faster mzd_addmul_v computions. + * + */ +-mzd_local_t* mzd_precompute_matrix_lookup(mzd_local_t const* A) { +- mzd_local_t* B = mzd_local_init_ex(32 * A->nrows, A->ncols, true); ++mzd_local_t* oqs_sig_picnic_mzd_precompute_matrix_lookup(mzd_local_t const* A) { ++ mzd_local_t* B = oqs_sig_picnic_mzd_local_init_ex(32 * A->nrows, A->ncols, true); + + const unsigned int len = A->width; + +@@ -608,7 +608,7 @@ mzd_local_t* mzd_precompute_matrix_lookup(mzd_local_t const* A) { + #ifdef WITH_OPT + #ifdef WITH_SSE2 + ATTRIBUTE_TARGET("sse2") +-mzd_local_t* mzd_mul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + word const* vptr = ASSUME_ALIGNED(CONST_FIRST_ROW(v), 16); + const unsigned int width = v->width; + static const unsigned int moff2 = 256; +@@ -630,13 +630,13 @@ mzd_local_t* mzd_mul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_ + } + + ATTRIBUTE_TARGET("sse2") +-mzd_local_t* mzd_mul_vl_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { +- mzd_local_clear(c); +- return mzd_addmul_vl_sse(c, v, A); ++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++ oqs_sig_picnic_mzd_local_clear(c); ++ return oqs_sig_picnic_mzd_addmul_vl_sse(c, v, A); + } + + ATTRIBUTE_TARGET("sse2") +-mzd_local_t* mzd_addmul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + word const* vptr = ASSUME_ALIGNED(CONST_FIRST_ROW(v), 16); + const unsigned int width = v->width; + static const unsigned int moff2 = 256; +@@ -658,7 +658,7 @@ mzd_local_t* mzd_addmul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v, mzd_loc + } + + ATTRIBUTE_TARGET("sse2") +-mzd_local_t* mzd_addmul_vl_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + word const* vptr = ASSUME_ALIGNED(CONST_FIRST_ROW(v), 16); + const unsigned int width = v->width; + const unsigned int rowstride = A->rowstride; +@@ -683,7 +683,7 @@ mzd_local_t* mzd_addmul_vl_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t + + #ifdef WITH_AVX2 + ATTRIBUTE_TARGET("avx2") +-mzd_local_t* mzd_mul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + word const* vptr = ASSUME_ALIGNED(CONST_FIRST_ROW(v), 16); + const unsigned int width = v->width; + static const unsigned int moff2 = 256; +@@ -705,7 +705,7 @@ mzd_local_t* mzd_mul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_ + } + + ATTRIBUTE_TARGET("avx2") +-mzd_local_t* mzd_addmul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + word const* vptr = ASSUME_ALIGNED(CONST_FIRST_ROW(v), 16); + const unsigned int width = v->width; + static const unsigned int moff2 = 256; +@@ -727,13 +727,13 @@ mzd_local_t* mzd_addmul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v, mzd_loc + } + + ATTRIBUTE_TARGET("avx2") +-mzd_local_t* mzd_mul_vl_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { +- mzd_local_clear(c); +- return mzd_addmul_vl_avx(c, v, A); ++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++ oqs_sig_picnic_mzd_local_clear(c); ++ return oqs_sig_picnic_mzd_addmul_vl_avx(c, v, A); + } + + ATTRIBUTE_TARGET("avx2") +-mzd_local_t* mzd_addmul_vl_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + word const* vptr = ASSUME_ALIGNED(CONST_FIRST_ROW(v), 16); + const unsigned int width = v->width; + const unsigned int rowstride = A->rowstride; +@@ -757,7 +757,7 @@ mzd_local_t* mzd_addmul_vl_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t + #endif + + #ifdef WITH_NEON +-mzd_local_t* mzd_mul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + + word const* vptr = ASSUME_ALIGNED(CONST_FIRST_ROW(v), 16); + const unsigned int width = v->width; +@@ -780,7 +780,7 @@ mzd_local_t* mzd_mul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v, mzd_local + return c; + } + +-mzd_local_t* mzd_addmul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + word const* vptr = ASSUME_ALIGNED(CONST_FIRST_ROW(v), 16); + static const unsigned int moff2 = 256; + +@@ -804,13 +804,13 @@ mzd_local_t* mzd_addmul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v, mzd_lo + return c; + } + +-mzd_local_t* mzd_mul_vl_neon_multiple_of_128(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) { +- mzd_local_clear(c); ++ oqs_sig_picnic_mzd_local_clear(c); + return mzd_addmul_vl_neon(c, v, A); + } + +-mzd_local_t* mzd_addmul_vl_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + word const* vptr = ASSUME_ALIGNED(CONST_FIRST_ROW(v), alignof(uint32x4_t)); + const unsigned int width = v->width; + const unsigned int rowstride = A->rowstride; +@@ -835,7 +835,7 @@ mzd_local_t* mzd_addmul_vl_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_ + #endif + #endif + +-mzd_local_t* mzd_mul_vl(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++mzd_local_t* oqs_sig_picnic_mzd_mul_vl(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + if (A->nrows != 32 * v->ncols) { + // number of columns does not match + return NULL; +@@ -846,36 +846,36 @@ mzd_local_t* mzd_mul_vl(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* + #ifdef WITH_AVX2 + if (CPU_SUPPORTS_AVX2) { + if (A->ncols == 256) { +- return mzd_mul_vl_avx_256(c, v, A); ++ return oqs_sig_picnic_mzd_mul_vl_avx_256(c, v, A); + } + } + #endif + #ifdef WITH_SSE2 + if (CPU_SUPPORTS_SSE2) { + if (A->ncols == 128) { +- return mzd_mul_vl_sse_128(c, v, A); ++ return oqs_sig_picnic_mzd_mul_vl_sse_128(c, v, A); + } + } + #endif + #ifdef WITH_NEON + if (CPU_SUPPORTS_NEON) { + if (A->ncols == 128) { +- return mzd_mul_vl_neon_128(c, v, A); ++ return oqs_sig_picnic_mzd_mul_vl_neon_128(c, v, A); + } + } + #endif + } + #endif +- mzd_local_clear(c); +- return mzd_addmul_vl(c, v, A); ++ oqs_sig_picnic_mzd_local_clear(c); ++ return oqs_sig_picnic_mzd_addmul_vl(c, v, A); + } + +-mzd_local_t* mzd_mul_vl_general(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { +- mzd_local_clear(c); +- return mzd_addmul_vl_general(c, v, A); ++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_general(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++ oqs_sig_picnic_mzd_local_clear(c); ++ return oqs_sig_picnic_mzd_addmul_vl_general(c, v, A); + } + +-mzd_local_t* mzd_addmul_vl(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + if (A->ncols != c->ncols || A->nrows != 32 * v->ncols) { + // number of columns does not match + return NULL; +@@ -886,39 +886,39 @@ mzd_local_t* mzd_addmul_vl(mzd_local_t* c, mzd_local_t const* v, mzd_local_t con + #ifdef WITH_AVX2 + if (CPU_SUPPORTS_AVX2) { + if (A->ncols == 256) { +- return mzd_addmul_vl_avx_256(c, v, A); ++ return oqs_sig_picnic_mzd_addmul_vl_avx_256(c, v, A); + } + if ((A->ncols & 0xff) == 0) { +- return mzd_addmul_vl_avx(c, v, A); ++ return oqs_sig_picnic_mzd_addmul_vl_avx(c, v, A); + } + } + #endif + #ifdef WITH_SSE2 + if (CPU_SUPPORTS_SSE2) { + if (A->ncols == 128) { +- return mzd_addmul_vl_sse_128(c, v, A); ++ return oqs_sig_picnic_mzd_addmul_vl_sse_128(c, v, A); + } + if ((A->ncols & 0x7f) == 0) { +- return mzd_addmul_vl_sse(c, v, A); ++ return oqs_sig_picnic_mzd_addmul_vl_sse(c, v, A); + } + } + #endif + #ifdef WITH_NEON + if (CPU_SUPPORTS_NEON) { + if (A->ncols == 128) { +- return mzd_addmul_vl_neon_128(c, v, A); ++ return oqs_sig_picnic_mzd_addmul_vl_neon_128(c, v, A); + } + if ((A->ncols & 0x7f) == 0) { +- return mzd_addmul_vl_neon(c, v, A); ++ return oqs_sig_picnic_mzd_addmul_vl_neon(c, v, A); + } + } + #endif + } + #endif +- return mzd_addmul_vl_general(c, v, A); ++ return oqs_sig_picnic_mzd_addmul_vl_general(c, v, A); + } + +-mzd_local_t* mzd_addmul_vl_general(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { ++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_general(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) { + const unsigned int len = A->width; + word* cptr = FIRST_ROW(c); + word const* vptr = CONST_FIRST_ROW(v); +diff --git a/mzd_additional.h b/mzd_additional.h +index 7992128..c084e6d 100644 +--- a/mzd_additional.h ++++ b/mzd_additional.h +@@ -27,44 +27,44 @@ typedef struct { + uint64_t rows[]; + } mzd_local_t ATTR_ALIGNED(32); + +-mzd_local_t* mzd_local_init_ex(uint32_t r, uint32_t c, bool clear) ATTR_ASSUME_ALIGNED(32); ++mzd_local_t* oqs_sig_picnic_mzd_local_init_ex(uint32_t r, uint32_t c, bool clear) ATTR_ASSUME_ALIGNED(32); + +-#define mzd_local_init(r, c) mzd_local_init_ex(r, c, true) ++#define oqs_sig_picnic_mzd_local_init(r, c) oqs_sig_picnic_mzd_local_init_ex(r, c, true) + +-void mzd_local_free(mzd_local_t* v); ++void oqs_sig_picnic_mzd_local_free(mzd_local_t* v); + +-void mzd_local_init_multiple_ex(mzd_local_t** dst, size_t n, uint32_t r, uint32_t c, bool clear) ++void oqs_sig_picnic_mzd_local_init_multiple_ex(mzd_local_t** dst, size_t n, uint32_t r, uint32_t c, bool clear) + ATTR_NONNULL_ARG(1); + +-#define mzd_local_init_multiple(dst, n, r, c) mzd_local_init_multiple_ex(dst, n, r, c, true) ++#define oqs_sig_picnic_mzd_local_init_multiple(dst, n, r, c) oqs_sig_picnic_mzd_local_init_multiple_ex(dst, n, r, c, true) + + /** +- * mzd_free for mzd_local_init_multiple. ++ * oqs_sig_picnic_mzd_free for oqs_sig_picnic_mzd_local_init_multiple. + */ +-void mzd_local_free_multiple(mzd_local_t** vs); ++void oqs_sig_picnic_mzd_local_free_multiple(mzd_local_t** vs); + /** +- * Improved mzd_copy for specific memory layouts. ++ * Improved oqs_sig_picnic_mzd_copy for specific memory layouts. + */ +-mzd_local_t* mzd_local_copy(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL_ARG(2); ++mzd_local_t* oqs_sig_picnic_mzd_local_copy(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL_ARG(2); + +-void mzd_local_clear(mzd_local_t* c) ATTR_NONNULL; ++void oqs_sig_picnic_mzd_local_clear(mzd_local_t* c) ATTR_NONNULL; + +-void mzd_shift_right(mzd_local_t* res, mzd_local_t const* val, unsigned count) ATTR_NONNULL; ++void oqs_sig_picnic_mzd_shift_right(mzd_local_t* res, mzd_local_t const* val, unsigned count) ATTR_NONNULL; + +-void mzd_shift_left(mzd_local_t* res, mzd_local_t const* val, unsigned count) ATTR_NONNULL; ++void oqs_sig_picnic_mzd_shift_left(mzd_local_t* res, mzd_local_t const* val, unsigned count) ATTR_NONNULL; + +-mzd_local_t* mzd_and(mzd_local_t* res, mzd_local_t const* first, ++mzd_local_t* oqs_sig_picnic_mzd_and(mzd_local_t* res, mzd_local_t const* first, + mzd_local_t const* second) ATTR_NONNULL; + +-mzd_local_t* mzd_xor(mzd_local_t* res, mzd_local_t const* first, ++mzd_local_t* oqs_sig_picnic_mzd_xor(mzd_local_t* res, mzd_local_t const* first, + mzd_local_t const* second) ATTR_NONNULL; +-mzd_local_t* mzd_xor_sse(mzd_local_t* res, mzd_local_t const* first, ++mzd_local_t* oqs_sig_picnic_mzd_xor_sse(mzd_local_t* res, mzd_local_t const* first, + mzd_local_t const* second) ATTR_NONNULL; +-mzd_local_t* mzd_xor_avx(mzd_local_t* res, mzd_local_t const* first, ++mzd_local_t* oqs_sig_picnic_mzd_xor_avx(mzd_local_t* res, mzd_local_t const* first, + mzd_local_t const* second) ATTR_NONNULL; +-mzd_local_t* mzd_xor_general(mzd_local_t* res, mzd_local_t const* first, ++mzd_local_t* oqs_sig_picnic_mzd_xor_general(mzd_local_t* res, mzd_local_t const* first, + mzd_local_t const* second) ATTR_NONNULL; +-mzd_local_t* mzd_xor_neon(mzd_local_t* res, mzd_local_t const* first, ++mzd_local_t* oqs_sig_picnic_mzd_xor_neon(mzd_local_t* res, mzd_local_t const* first, + mzd_local_t const* second) ATTR_NONNULL; + + /** +@@ -77,85 +77,85 @@ mzd_local_t* mzd_xor_neon(mzd_local_t* res, mzd_local_t const* first, + * second vector + * \returns true if both vectors are equal, false otherwise. + */ +-bool mzd_local_equal(mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL; ++bool oqs_sig_picnic_mzd_local_equal(mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL; + + /** + * Compute v * A optimized for v being a vector. + */ +-mzd_local_t* mzd_mul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL; +-mzd_local_t* mzd_mul_v_general(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_mul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL; ++mzd_local_t* oqs_sig_picnic_mzd_mul_v_general(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* At) ATTR_NONNULL; +-mzd_local_t* mzd_mul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; +-mzd_local_t* mzd_mul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; +-mzd_local_t* mzd_mul_v_neon(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_mul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; ++mzd_local_t* oqs_sig_picnic_mzd_mul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL; ++mzd_local_t* oqs_sig_picnic_mzd_mul_v_neon(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) ATTR_NONNULL; + + /** + * Compute c + v * A optimized for c and v being vectors. + */ +-mzd_local_t* mzd_addmul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL; +-mzd_local_t* mzd_addmul_v_general(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_addmul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL; ++mzd_local_t* oqs_sig_picnic_mzd_addmul_v_general(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) ATTR_NONNULL; +-mzd_local_t* mzd_addmul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A); +-mzd_local_t* mzd_addmul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A); +-mzd_local_t* mzd_addmul_v_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A); ++mzd_local_t* oqs_sig_picnic_mzd_addmul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A); ++mzd_local_t* oqs_sig_picnic_mzd_addmul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A); ++mzd_local_t* oqs_sig_picnic_mzd_addmul_v_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A); + + /** + * Compute v * A optimized for v being a vector. + */ +-mzd_local_t* mzd_mul_vl(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL; +-mzd_local_t* mzd_mul_vl_general(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_mul_vl(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL; ++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_general(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) ATTR_NONNULL; +-mzd_local_t* mzd_mul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) ATTR_NONNULL; +-mzd_local_t* mzd_mul_vl_sse(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_sse(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) ATTR_NONNULL; +-mzd_local_t* mzd_mul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) ATTR_NONNULL; +-mzd_local_t* mzd_mul_vl_avx(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_avx(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) ATTR_NONNULL; +-mzd_local_t* mzd_mul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) ATTR_NONNULL; + +-mzd_local_t* mzd_mul_vl_neon_multiple_of_128(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) ATTR_NONNULL; + /** + * Compute c + v * A optimized for c and v being vectors. + */ +-mzd_local_t* mzd_addmul_vl(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* At) ATTR_NONNULL; +-mzd_local_t* mzd_addmul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) ATTR_NONNULL; +-mzd_local_t* mzd_addmul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) ATTR_NONNULL; +-mzd_local_t* mzd_addmul_vl_sse(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_sse(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) ATTR_NONNULL; +-mzd_local_t* mzd_addmul_vl_avx(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_avx(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) ATTR_NONNULL; +-mzd_local_t* mzd_addmul_vl_general(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_general(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) ATTR_NONNULL; +-mzd_local_t* mzd_addmul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) ATTR_NONNULL; +-mzd_local_t* mzd_addmul_vl_neon(mzd_local_t* c, mzd_local_t const* v, ++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_neon(mzd_local_t* c, mzd_local_t const* v, + mzd_local_t const* A) ATTR_NONNULL; + + /** + * Compute v * A optimized for v being a vector. + */ +-void mzd_mul_vlm(mzd_local_t** c, mzd_local_t const* const* v, mzd_local_t const* At, ++void oqs_sig_picnic_mzd_mul_vlm(mzd_local_t** c, mzd_local_t const* const* v, mzd_local_t const* At, + unsigned int sc) ATTR_NONNULL; + + /** + * Compute c + v * A optimized for c and v being vectors. + */ +-void mzd_addmul_vlm(mzd_local_t** c, mzd_local_t const* const* v, mzd_local_t const* At, ++void oqs_sig_picnic_mzd_addmul_vlm(mzd_local_t** c, mzd_local_t const* const* v, mzd_local_t const* At, + unsigned int sc) ATTR_NONNULL; + + /** +- * Pre-compute matrices for faster mzd_addmul_v computions. ++ * Pre-compute matrices for faster oqs_sig_picnic_mzd_addmul_v computions. + * + */ +-mzd_local_t* mzd_precompute_matrix_lookup(mzd_local_t const* A) ATTR_NONNULL; ++mzd_local_t* oqs_sig_picnic_mzd_precompute_matrix_lookup(mzd_local_t const* A) ATTR_NONNULL; + + #define ROW(v, r) ((word*)(((uint8_t*)(v)) + 32 + (v)->rowstride * (r) * sizeof(word))) + #define CONST_ROW(v, r) \ +@@ -167,7 +167,7 @@ mzd_local_t* mzd_precompute_matrix_lookup(mzd_local_t const* A) ATTR_NONNULL; + #define WRITE_BIT(w, spot, value) \ + ((w) = (((w) & ~(UINT64_C(1) << (spot))) | (-(word)(value) & (UINT64_C(1) << (spot))))) + +-#define mzd_local_write_bit(v, r, c, b) \ ++#define oqs_sig_picnic_mzd_local_write_bit(v, r, c, b) \ + WRITE_BIT(ROW(v, r)[c / (sizeof(word) * 8)], c % (sizeof(word) * 8), b) + + #endif +diff --git a/picnic.c b/picnic.c +index 91eeed3..55cb19c 100644 +--- a/picnic.c ++++ b/picnic.c +@@ -22,7 +22,7 @@ + #include "randomness.h" + + const picnic_instance_t* picnic_instance_get(picnic_params_t param) { +- return get_instance(param); ++ return oqs_sig_picnic_get_instance(param); + } + + size_t PICNIC_CALLING_CONVENTION picnic_signature_size(picnic_params_t param) { +@@ -110,22 +110,22 @@ int PICNIC_CALLING_CONVENTION picnic_sk_to_pk(const picnic_privatekey_t* sk, + uint8_t* pk_pt = &pk->data[1]; + const uint8_t* sk_pt = &sk->data[1 + input_size]; + +- mzd_local_t* plaintext = mzd_local_init_ex(1, instance->lowmc.n, false); +- mzd_local_t* privkey = mzd_local_init_ex(1, instance->lowmc.k, false); ++ mzd_local_t* plaintext = oqs_sig_picnic_mzd_local_init_ex(1, instance->lowmc.n, false); ++ mzd_local_t* privkey = oqs_sig_picnic_mzd_local_init_ex(1, instance->lowmc.k, false); + +- mzd_from_char_array(plaintext, sk_pt, output_size); +- mzd_from_char_array(privkey, sk_sk, input_size); ++ oqs_sig_picnic_mzd_from_char_array(plaintext, sk_pt, output_size); ++ oqs_sig_picnic_mzd_from_char_array(privkey, sk_sk, input_size); + + // compute public key +- mzd_local_t* ciphertext = lowmc_call(&instance->lowmc, privkey, plaintext); ++ mzd_local_t* ciphertext = oqs_sig_picnic_lowmc_call(&instance->lowmc, privkey, plaintext); + + pk->data[0] = param; + memcpy(pk_pt, sk_pt, output_size); +- mzd_to_char_array(pk_c, ciphertext, output_size); ++ oqs_sig_picnic_mzd_to_char_array(pk_c, ciphertext, output_size); + +- mzd_local_free(ciphertext); +- mzd_local_free(privkey); +- mzd_local_free(plaintext); ++ oqs_sig_picnic_mzd_local_free(ciphertext); ++ oqs_sig_picnic_mzd_local_free(privkey); ++ oqs_sig_picnic_mzd_local_free(plaintext); + + return 0; + } +@@ -156,21 +156,21 @@ int PICNIC_CALLING_CONVENTION picnic_validate_keypair(const picnic_privatekey_t* + return -1; + } + +- mzd_local_t* plaintext = mzd_local_init_ex(1, instance->lowmc.n, false); +- mzd_local_t* privkey = mzd_local_init_ex(1, instance->lowmc.k, false); ++ mzd_local_t* plaintext = oqs_sig_picnic_mzd_local_init_ex(1, instance->lowmc.n, false); ++ mzd_local_t* privkey = oqs_sig_picnic_mzd_local_init_ex(1, instance->lowmc.k, false); + +- mzd_from_char_array(plaintext, sk_pt, instance->output_size); +- mzd_from_char_array(privkey, sk_sk, instance->input_size); ++ oqs_sig_picnic_mzd_from_char_array(plaintext, sk_pt, instance->output_size); ++ oqs_sig_picnic_mzd_from_char_array(privkey, sk_sk, instance->input_size); + + // compute public key +- mzd_local_t* ciphertext = lowmc_call(&instance->lowmc, privkey, plaintext); ++ mzd_local_t* ciphertext = oqs_sig_picnic_lowmc_call(&instance->lowmc, privkey, plaintext); + + uint8_t buffer[MAX_LOWMC_BLOCK_SIZE]; +- mzd_to_char_array(buffer, ciphertext, output_size); ++ oqs_sig_picnic_mzd_to_char_array(buffer, ciphertext, output_size); + +- mzd_local_free(ciphertext); +- mzd_local_free(privkey); +- mzd_local_free(plaintext); ++ oqs_sig_picnic_mzd_local_free(ciphertext); ++ oqs_sig_picnic_mzd_local_free(privkey); ++ oqs_sig_picnic_mzd_local_free(plaintext); + + return memcmp(buffer, pk_c, output_size); + } +@@ -195,7 +195,7 @@ int PICNIC_CALLING_CONVENTION picnic_sign(const picnic_privatekey_t* sk, const u + const uint8_t* sk_c = &sk->data[1 + input_size + output_size]; + const uint8_t* sk_pt = &sk->data[1 + input_size]; + +- return fis_sign(instance, sk_pt, sk_sk, sk_c, message, message_len, signature, signature_len) ++ return oqs_sig_picnic_fis_sign(instance, sk_pt, sk_sk, sk_c, message, message_len, signature, signature_len) + ? 0 + : -1; + } +@@ -218,7 +218,7 @@ int PICNIC_CALLING_CONVENTION picnic_verify(const picnic_publickey_t* pk, const + const uint8_t* pk_c = &pk->data[1 + output_size]; + const uint8_t* pk_pt = &pk->data[1]; + +- return fis_verify(instance, pk_pt, pk_c, message, message_len, signature, signature_len) ? 0 : -1; ++ return oqs_sig_picnic_fis_verify(instance, pk_pt, pk_c, message, message_len, signature, signature_len) ? 0 : -1; + } + + void picnic_visualize(FILE* out, const uint8_t* public_key, size_t public_key_size, +@@ -233,7 +233,7 @@ void picnic_visualize(FILE* out, const uint8_t* public_key, size_t public_key_si + return; + } + +- visualize_signature(out, instance, msg, msglen, sig, siglen); ++ oqs_sig_picnic_visualize_signature(out, instance, msg, msglen, sig, siglen); + } + + const char* PICNIC_CALLING_CONVENTION picnic_get_param_name(picnic_params_t parameters) { +diff --git a/picnic_impl.c b/picnic_impl.c +index f1a308b..ced069e 100644 +--- a/picnic_impl.c ++++ b/picnic_impl.c +@@ -246,10 +246,10 @@ static void mzd_to_bitstream(bitstream_t* bs, const mzd_local_t* v, const size_t + const uint64_t* d = &CONST_FIRST_ROW(v)[v->width - 1]; + size_t bits = size; + for (; bits >= sizeof(uint64_t) * 8; bits -= sizeof(uint64_t) * 8, --d) { +- bitstream_put_bits(bs, *d, sizeof(uint64_t) * 8); ++ oqs_sig_picnic_bitstream_put_bits(bs, *d, sizeof(uint64_t) * 8); + } + if (bits) { +- bitstream_put_bits(bs, *d >> (sizeof(uint64_t) * 8 - bits), bits); ++ oqs_sig_picnic_bitstream_put_bits(bs, *d >> (sizeof(uint64_t) * 8 - bits), bits); + } + } + +@@ -259,10 +259,10 @@ static void mzd_from_bitstream(bitstream_t* bs, mzd_local_t* v, const size_t siz + + size_t bits = size; + for (; bits >= sizeof(uint64_t) * 8; bits -= sizeof(uint64_t) * 8, --d) { +- *d = bitstream_get_bits(bs, sizeof(uint64_t) * 8); ++ *d = oqs_sig_picnic_bitstream_get_bits(bs, sizeof(uint64_t) * 8); + } + if (bits) { +- *d = bitstream_get_bits(bs, bits) << (sizeof(uint64_t) * 8 - bits); ++ *d = oqs_sig_picnic_bitstream_get_bits(bs, bits) << (sizeof(uint64_t) * 8 - bits); + --d; + } + for (; d >= f; --d) { +@@ -271,11 +271,11 @@ static void mzd_from_bitstream(bitstream_t* bs, mzd_local_t* v, const size_t siz + } + + static void uint64_to_bitstream(bitstream_t* bs, const uint64_t v) { +- bitstream_put_bits(bs, v >> (64 - 30), 30); ++ oqs_sig_picnic_bitstream_put_bits(bs, v >> (64 - 30), 30); + } + + static uint64_t uint64_from_bitstream(bitstream_t* bs) { +- return bitstream_get_bits(bs, 30) << (64 - 30); ++ return oqs_sig_picnic_bitstream_get_bits(bs, 30) << (64 - 30); + } + + static void compress_view(uint8_t* dst, const picnic_instance_t* pp, const view_t* views, +@@ -356,13 +356,13 @@ static void decompress_random_tape_new(rvec_t* rvec, const picnic_instance_t* pp + } + + static void mzd_share(mzd_local_t* shared_value[SC_PROOF]) { +- mzd_xor(shared_value[2], shared_value[0], shared_value[2]); +- mzd_xor(shared_value[2], shared_value[1], shared_value[2]); ++ oqs_sig_picnic_mzd_xor(shared_value[2], shared_value[0], shared_value[2]); ++ oqs_sig_picnic_mzd_xor(shared_value[2], shared_value[1], shared_value[2]); + } + + static void mzd_unshare(mzd_local_t* dst, mzd_local_t* shared_value[SC_PROOF]) { +- mzd_xor(dst, shared_value[0], shared_value[1]); +- mzd_xor(dst, dst, shared_value[2]); ++ oqs_sig_picnic_mzd_xor(dst, shared_value[0], shared_value[1]); ++ oqs_sig_picnic_mzd_xor(dst, dst, shared_value[2]); + } + + static bool sign_impl(const picnic_instance_t* pp, const uint8_t* private_key, +@@ -388,13 +388,13 @@ static bool sign_impl(const picnic_instance_t* pp, const uint8_t* private_key, + view_t* views = calloc(sizeof(view_t), view_count); + if (lowmc->m != 10) { + for (size_t i = 0; i < view_count; ++i) { +- mzd_local_init_multiple_ex(views[i].s, SC_PROOF, 1, lowmc_n, false); ++ oqs_sig_picnic_mzd_local_init_multiple_ex(views[i].s, SC_PROOF, 1, lowmc_n, false); + } + } + + in_out_shares_t in_out_shares[2]; +- mzd_local_init_multiple_ex(in_out_shares[0].s, SC_PROOF, 1, lowmc_k, false); +- mzd_local_init_multiple_ex(in_out_shares[1].s, SC_PROOF, 1, lowmc_n, false); ++ oqs_sig_picnic_mzd_local_init_multiple_ex(in_out_shares[0].s, SC_PROOF, 1, lowmc_k, false); ++ oqs_sig_picnic_mzd_local_init_multiple_ex(in_out_shares[1].s, SC_PROOF, 1, lowmc_n, false); + + // Generate seeds + START_TIMING; +@@ -418,7 +418,7 @@ static bool sign_impl(const picnic_instance_t* pp, const uint8_t* private_key, + + START_TIMING; + mzd_local_t* shared_key[SC_PROOF]; +- mzd_local_init_multiple(shared_key, SC_PROOF, 1, lowmc_k); ++ oqs_sig_picnic_mzd_local_init_multiple(shared_key, SC_PROOF, 1, lowmc_k); + END_TIMING(timing_and_size->sign.secret_sharing); + + // START_TIMING; TODO: I guess this shouldn't be here +@@ -426,7 +426,7 @@ static bool sign_impl(const picnic_instance_t* pp, const uint8_t* private_key, + rvec_t* rvec = calloc(sizeof(rvec_t), lowmc_r); // random tapes for and-gates + if (lowmc->m != 10) { + for (unsigned int i = 0; i < lowmc_r; ++i) { +- mzd_local_init_multiple_ex(rvec[i].s, SC_PROOF, 1, lowmc_n, false); ++ oqs_sig_picnic_mzd_local_init_multiple_ex(rvec[i].s, SC_PROOF, 1, lowmc_n, false); + } + } + +@@ -443,11 +443,11 @@ static bool sign_impl(const picnic_instance_t* pp, const uint8_t* private_key, + // compute sharing + for (unsigned int j = 0; j < SC_PROOF - 1; ++j) { + kdf_shake_get_randomness(&kdfs[j], round->input_shares[j], input_size); +- mzd_from_char_array(shared_key[j], round->input_shares[j], input_size); ++ oqs_sig_picnic_mzd_from_char_array(shared_key[j], round->input_shares[j], input_size); + } +- mzd_local_copy(shared_key[SC_PROOF - 1], lowmc_key); ++ oqs_sig_picnic_mzd_local_copy(shared_key[SC_PROOF - 1], lowmc_key); + mzd_share(shared_key); +- mzd_to_char_array(round->input_shares[SC_PROOF - 1], shared_key[SC_PROOF - 1], input_size); ++ oqs_sig_picnic_mzd_to_char_array(round->input_shares[SC_PROOF - 1], shared_key[SC_PROOF - 1], input_size); + + // compute random tapes + for (unsigned int j = 0; j < SC_PROOF; ++j) { +@@ -464,7 +464,7 @@ static bool sign_impl(const picnic_instance_t* pp, const uint8_t* private_key, + + // commitments + for (unsigned int j = 0; j < SC_PROOF; ++j) { +- mzd_to_char_array(round->output_shares[j], in_out_shares[1].s[j], output_size); ++ oqs_sig_picnic_mzd_to_char_array(round->output_shares[j], in_out_shares[1].s[j], output_size); + compress_view(round->communicated_bits[j], pp, views, j); + hash_commitment(pp, round, j); + } +@@ -487,17 +487,17 @@ static bool sign_impl(const picnic_instance_t* pp, const uint8_t* private_key, + free(tape_bytes); + if (lowmc->m != 10) { + for (unsigned n = 0; n < view_count; ++n) { +- mzd_local_free_multiple(rvec[n].s); ++ oqs_sig_picnic_mzd_local_free_multiple(rvec[n].s); + } + for (unsigned n = 0; n < view_count; ++n) { +- mzd_local_free_multiple(views[n].s); ++ oqs_sig_picnic_mzd_local_free_multiple(views[n].s); + } + } + free(views); + free(rvec); +- mzd_local_free_multiple(shared_key); +- mzd_local_free_multiple(in_out_shares[0].s); +- mzd_local_free_multiple(in_out_shares[1].s); ++ oqs_sig_picnic_mzd_local_free_multiple(shared_key); ++ oqs_sig_picnic_mzd_local_free_multiple(in_out_shares[0].s); ++ oqs_sig_picnic_mzd_local_free_multiple(in_out_shares[1].s); + proof_free(prf); + + END_TIMING(timing_and_size->sign.challenge); +@@ -528,12 +528,12 @@ static bool verify_impl(const picnic_instance_t* pp, const uint8_t* plaintext, m + } + + in_out_shares_t in_out_shares[2]; +- mzd_local_init_multiple_ex(in_out_shares[0].s, SC_VERIFY, 1, lowmc_k, false); +- mzd_local_init_multiple_ex(in_out_shares[1].s, SC_PROOF, 1, lowmc_n, false); ++ oqs_sig_picnic_mzd_local_init_multiple_ex(in_out_shares[0].s, SC_VERIFY, 1, lowmc_k, false); ++ oqs_sig_picnic_mzd_local_init_multiple_ex(in_out_shares[1].s, SC_PROOF, 1, lowmc_n, false); + view_t* views = calloc(sizeof(view_t), view_count); + if (lowmc->m != 10) { + for (size_t i = 0; i < view_count; ++i) { +- mzd_local_init_multiple_ex(views[i].s, SC_VERIFY, 1, lowmc_n, false); ++ oqs_sig_picnic_mzd_local_init_multiple_ex(views[i].s, SC_VERIFY, 1, lowmc_n, false); + } + } + +@@ -542,7 +542,7 @@ static bool verify_impl(const picnic_instance_t* pp, const uint8_t* plaintext, m + rvec_t* rvec = calloc(sizeof(rvec_t), lowmc_r); // random tapes for and-gates + if (lowmc->m != 10) { + for (unsigned int i = 0; i < lowmc_r; ++i) { +- mzd_local_init_multiple_ex(rvec[i].s, SC_VERIFY, 1, lowmc_n, false); ++ oqs_sig_picnic_mzd_local_init_multiple_ex(rvec[i].s, SC_VERIFY, 1, lowmc_n, false); + } + } + uint8_t* tape_bytes = malloc(view_size); +@@ -566,8 +566,8 @@ static bool verify_impl(const picnic_instance_t* pp, const uint8_t* plaintext, m + kdf_shake_get_randomness(&kdfs[1], round->input_shares[1], input_size); + } + +- mzd_from_char_array(in_out_shares[0].s[0], round->input_shares[0], input_size); +- mzd_from_char_array(in_out_shares[0].s[1], round->input_shares[1], input_size); ++ oqs_sig_picnic_mzd_from_char_array(in_out_shares[0].s[0], round->input_shares[0], input_size); ++ oqs_sig_picnic_mzd_from_char_array(in_out_shares[0].s[1], round->input_shares[1], input_size); + + // compute random tapes + for (unsigned int j = 0; j < SC_VERIFY; ++j) { +@@ -590,10 +590,10 @@ static bool verify_impl(const picnic_instance_t* pp, const uint8_t* plaintext, m + mzd_unshare(in_out_shares[1].s[2], ys); + + for (unsigned int j = 0; j < SC_VERIFY; ++j) { +- mzd_to_char_array(round->output_shares[j], in_out_shares[1].s[j], output_size); ++ oqs_sig_picnic_mzd_to_char_array(round->output_shares[j], in_out_shares[1].s[j], output_size); + hash_commitment(pp, round, j); + } +- mzd_to_char_array(round->output_shares[SC_VERIFY], in_out_shares[1].s[SC_VERIFY], output_size); ++ oqs_sig_picnic_mzd_to_char_array(round->output_shares[SC_VERIFY], in_out_shares[1].s[SC_VERIFY], output_size); + + if (transform == TRANSFORM_UR) { + for (unsigned int j = 0; j < SC_VERIFY; ++j) { +@@ -611,16 +611,16 @@ static bool verify_impl(const picnic_instance_t* pp, const uint8_t* plaintext, m + free(tape_bytes); + if (lowmc->m != 10) { + for (unsigned n = 0; n < view_count; ++n) { +- mzd_local_free_multiple(rvec[n].s); ++ oqs_sig_picnic_mzd_local_free_multiple(rvec[n].s); + } + for (unsigned n = 0; n < view_count; ++n) { +- mzd_local_free_multiple(views[n].s); ++ oqs_sig_picnic_mzd_local_free_multiple(views[n].s); + } + } + free(views); + free(rvec); +- mzd_local_free_multiple(in_out_shares[0].s); +- mzd_local_free_multiple(in_out_shares[1].s); ++ oqs_sig_picnic_mzd_local_free_multiple(in_out_shares[0].s); ++ oqs_sig_picnic_mzd_local_free_multiple(in_out_shares[1].s); + + proof_free(prf); + +@@ -777,42 +777,42 @@ err: + return NULL; + } + +-bool fis_sign(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* private_key, ++bool oqs_sig_picnic_fis_sign(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* private_key, + const uint8_t* public_key, const uint8_t* msg, size_t msglen, uint8_t* sig, + size_t* siglen) { +- mzd_local_t* m_plaintext = mzd_local_init_ex(1, pp->lowmc.n, false); +- mzd_local_t* m_privatekey = mzd_local_init_ex(1, pp->lowmc.k, false); ++ mzd_local_t* m_plaintext = oqs_sig_picnic_mzd_local_init_ex(1, pp->lowmc.n, false); ++ mzd_local_t* m_privatekey = oqs_sig_picnic_mzd_local_init_ex(1, pp->lowmc.k, false); + +- mzd_from_char_array(m_plaintext, plaintext, pp->output_size); +- mzd_from_char_array(m_privatekey, private_key, pp->input_size); ++ oqs_sig_picnic_mzd_from_char_array(m_plaintext, plaintext, pp->output_size); ++ oqs_sig_picnic_mzd_from_char_array(m_privatekey, private_key, pp->input_size); + + const bool result = sign_impl(pp, private_key, m_privatekey, plaintext, m_plaintext, public_key, + msg, msglen, sig, siglen); + +- mzd_local_free(m_privatekey); +- mzd_local_free(m_plaintext); ++ oqs_sig_picnic_mzd_local_free(m_privatekey); ++ oqs_sig_picnic_mzd_local_free(m_plaintext); + + return result; + } + +-bool fis_verify(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* public_key, ++bool oqs_sig_picnic_fis_verify(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* public_key, + const uint8_t* msg, size_t msglen, const uint8_t* sig, size_t siglen) { +- mzd_local_t* m_plaintext = mzd_local_init_ex(1, pp->lowmc.n, false); +- mzd_local_t* m_publickey = mzd_local_init_ex(1, pp->lowmc.n, false); ++ mzd_local_t* m_plaintext = oqs_sig_picnic_mzd_local_init_ex(1, pp->lowmc.n, false); ++ mzd_local_t* m_publickey = oqs_sig_picnic_mzd_local_init_ex(1, pp->lowmc.n, false); + +- mzd_from_char_array(m_plaintext, plaintext, pp->output_size); +- mzd_from_char_array(m_publickey, public_key, pp->output_size); ++ oqs_sig_picnic_mzd_from_char_array(m_plaintext, plaintext, pp->output_size); ++ oqs_sig_picnic_mzd_from_char_array(m_publickey, public_key, pp->output_size); + + const bool result = + verify_impl(pp, plaintext, m_plaintext, public_key, m_publickey, msg, msglen, sig, siglen); + +- mzd_local_free(m_publickey); +- mzd_local_free(m_plaintext); ++ oqs_sig_picnic_mzd_local_free(m_publickey); ++ oqs_sig_picnic_mzd_local_free(m_plaintext); + + return result; + } + +-void visualize_signature(FILE* out, const picnic_instance_t* pp, const uint8_t* msg, size_t msglen, ++void oqs_sig_picnic_visualize_signature(FILE* out, const picnic_instance_t* pp, const uint8_t* msg, size_t msglen, + const uint8_t* sig, size_t siglen) { + const size_t digest_size = pp->digest_size; + const size_t seed_size = pp->seed_size; +@@ -894,14 +894,14 @@ void hash_commitment(const picnic_instance_t* pp, proof_round_t* prf_round, unsi + hash_context ctx; + + // hash the seed +- hash_init(&ctx, pp); ++ oqs_sig_picnic_hash_init(&ctx, pp); + hash_update(&ctx, &HASH_PREFIX_4, sizeof(HASH_PREFIX_4)); + hash_update(&ctx, prf_round->seeds[vidx], pp->seed_size); + hash_final(&ctx); + hash_squeeze(tmp, hashlen, &ctx); + + // compute H_0(H_4(seed), view) +- hash_init(&ctx, pp); ++ oqs_sig_picnic_hash_init(&ctx, pp); + hash_update(&ctx, &HASH_PREFIX_0, sizeof(HASH_PREFIX_0)); + hash_update(&ctx, tmp, hashlen); + // hash input share +@@ -925,7 +925,7 @@ static void H3_compute(const picnic_instance_t* pp, uint8_t* hash, uint8_t* ch) + while (ch < eof) { + if (bit_idx >= digest_size_bits) { + hash_context ctx; +- hash_init(&ctx, pp); ++ oqs_sig_picnic_hash_init(&ctx, pp); + hash_update(&ctx, &HASH_PREFIX_1, sizeof(HASH_PREFIX_1)); + hash_update(&ctx, hash, digest_size); + hash_final(&ctx); +@@ -948,7 +948,7 @@ void fs_H3_verify(const picnic_instance_t* pp, sig_proof_t* prf, const uint8_t* + const size_t output_size = pp->output_size; + + hash_context ctx; +- hash_init(&ctx, pp); ++ oqs_sig_picnic_hash_init(&ctx, pp); + hash_update(&ctx, &HASH_PREFIX_1, sizeof(HASH_PREFIX_1)); + + // hash output shares +@@ -1048,7 +1048,7 @@ void fs_H3(const picnic_instance_t* pp, sig_proof_t* prf, const uint8_t* circuit + const size_t num_rounds = pp->num_rounds; + + hash_context ctx; +- hash_init(&ctx, pp); ++ oqs_sig_picnic_hash_init(&ctx, pp); + hash_update(&ctx, &HASH_PREFIX_1, sizeof(HASH_PREFIX_1)); + + // hash output shares +@@ -1084,7 +1084,7 @@ void unruh_G(const picnic_instance_t* pp, proof_round_t* prf_round, unsigned vid + const size_t seedlen = pp->seed_size; + + /* Hash the seed with H_5, store digest in output */ +- hash_init(&ctx, pp); ++ oqs_sig_picnic_hash_init(&ctx, pp); + hash_update(&ctx, &HASH_PREFIX_5, sizeof(HASH_PREFIX_5)); + hash_update(&ctx, prf_round->seeds[vidx], seedlen); + hash_final(&ctx); +@@ -1093,7 +1093,7 @@ void unruh_G(const picnic_instance_t* pp, proof_round_t* prf_round, unsigned vid + hash_squeeze(tmp, digest_size, &ctx); + + /* Hash H_5(seed), the view, and the length */ +- hash_init(&ctx, pp); ++ oqs_sig_picnic_hash_init(&ctx, pp); + hash_update(&ctx, tmp, digest_size); + if (include_is) { + hash_update(&ctx, prf_round->input_shares[vidx], pp->input_size); +@@ -1181,14 +1181,14 @@ static bool create_instance(picnic_instance_t* pp, picnic_params_t param, uint32 + } + #endif + if (!have_instance) { +- have_instance = lowmc_init(&pp->lowmc, m, n, r, k); ++ have_instance = oqs_sig_picnic_lowmc_init(&pp->lowmc, m, n, r, k); + } + if (!have_instance) { + return false; + } + +- pp->lowmc_impl = get_lowmc_implementation(&pp->lowmc); +- pp->lowmc_verify_impl = get_lowmc_verify_implementation(&pp->lowmc); ++ pp->lowmc_impl = oqs_sig_picnic_get_lowmc_implementation(&pp->lowmc); ++ pp->lowmc_verify_impl = oqs_sig_picnic_get_lowmc_verify_implementation(&pp->lowmc); + + pp->params = param; + pp->transform = param_to_transform(param); +@@ -1227,10 +1227,10 @@ static bool create_instance(picnic_instance_t* pp, picnic_params_t param, uint32 + } + + static void destroy_instance(picnic_instance_t* pp) { +- lowmc_clear(&pp->lowmc); ++ oqs_sig_picnic_lowmc_clear(&pp->lowmc); + } + +-picnic_instance_t* get_instance(picnic_params_t param) { ++picnic_instance_t* oqs_sig_picnic_get_instance(picnic_params_t param) { + if (param <= PARAMETER_SET_INVALID || param >= PARAMETER_SET_MAX_INDEX) { + return NULL; + } +@@ -1262,8 +1262,8 @@ static void collapse_challenge(uint8_t* collapsed, const picnic_instance_t* pp, + bs.position = 0; + + for (unsigned int i = 0; i < pp->num_rounds; ++i) { +- bitstream_put_bits(&bs, challenge[i] & 1, 1); +- bitstream_put_bits(&bs, (challenge[i] >> 1) & 1, 1); ++ oqs_sig_picnic_bitstream_put_bits(&bs, challenge[i] & 1, 1); ++ oqs_sig_picnic_bitstream_put_bits(&bs, (challenge[i] >> 1) & 1, 1); + } + } + +@@ -1274,8 +1274,8 @@ static bool expand_challenge(uint8_t* challenge, const picnic_instance_t* pp, + bs.position = 0; + + for (unsigned int i = 0; i < pp->num_rounds; ++i) { +- uint8_t ch = bitstream_get_bits(&bs, 1); +- ch |= bitstream_get_bits(&bs, 1) << 1; ++ uint8_t ch = oqs_sig_picnic_bitstream_get_bits(&bs, 1); ++ ch |= oqs_sig_picnic_bitstream_get_bits(&bs, 1) << 1; + if (ch == 3) { + return false; + } +@@ -1283,7 +1283,7 @@ static bool expand_challenge(uint8_t* challenge, const picnic_instance_t* pp, + } + + size_t remaining_bits = (pp->collapsed_challenge_size << 3) - bs.position; +- if (remaining_bits && bitstream_get_bits(&bs, remaining_bits)) { ++ if (remaining_bits && oqs_sig_picnic_bitstream_get_bits(&bs, remaining_bits)) { + return false; + } + +diff --git a/picnic_impl.h b/picnic_impl.h +index dcc3747..18bfde5 100644 +--- a/picnic_impl.h ++++ b/picnic_impl.h +@@ -42,17 +42,17 @@ typedef struct { + transform_t transform; + } picnic_instance_t; + +-picnic_instance_t* get_instance(picnic_params_t param); ++picnic_instance_t* oqs_sig_picnic_get_instance(picnic_params_t param); + const picnic_instance_t* picnic_instance_get(picnic_params_t param); + +-bool fis_sign(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* private_key, ++bool oqs_sig_picnic_fis_sign(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* private_key, + const uint8_t* public_key, const uint8_t* msg, size_t msglen, uint8_t* sig, + size_t* siglen); + +-bool fis_verify(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* public_key, ++bool oqs_sig_picnic_fis_verify(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* public_key, + const uint8_t* msg, size_t msglen, const uint8_t* sig, size_t siglen); + +-void visualize_signature(FILE* out, const picnic_instance_t* pp, const uint8_t* msg, size_t msglen, ++void oqs_sig_picnic_visualize_signature(FILE* out, const picnic_instance_t* pp, const uint8_t* msg, size_t msglen, + const uint8_t* sig, size_t siglen); + + PICNIC_EXPORT size_t PICNIC_CALLING_CONVENTION picnic_get_private_key_size(picnic_params_t param); +diff --git a/sha3/KeccakHash.c b/sha3/KeccakHash.c +index bcfd1e9..6c7a0e6 100644 +--- a/sha3/KeccakHash.c ++++ b/sha3/KeccakHash.c +@@ -18,7 +18,7 @@ http://creativecommons.org/publicdomain/zero/1.0/ + + /* ---------------------------------------------------------------- */ + +-HashReturn Keccak_HashInitialize(Keccak_HashInstance *instance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix) ++HashReturn oqs_sig_picnic_Keccak_HashInitialize(Keccak_HashInstance *instance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix) + { + HashReturn result; + +@@ -34,7 +34,7 @@ HashReturn Keccak_HashInitialize(Keccak_HashInstance *instance, unsigned int rat + + /* ---------------------------------------------------------------- */ + +-HashReturn Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *data, BitLength databitlen) ++HashReturn oqs_sig_picnic_Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *data, BitLength databitlen) + { + if ((databitlen % 8) == 0) + return (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8); +@@ -61,7 +61,7 @@ HashReturn Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *d + + /* ---------------------------------------------------------------- */ + +-HashReturn Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval) ++HashReturn oqs_sig_picnic_Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval) + { + HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorbLastFewBits(&instance->sponge, instance->delimitedSuffix); + if (ret == SUCCESS) +@@ -72,7 +72,7 @@ HashReturn Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval) + + /* ---------------------------------------------------------------- */ + +-HashReturn Keccak_HashSqueeze(Keccak_HashInstance *instance, BitSequence *data, BitLength databitlen) ++HashReturn oqs_sig_picnic_Keccak_HashSqueeze(Keccak_HashInstance *instance, BitSequence *data, BitLength databitlen) + { + if ((databitlen % 8) != 0) + return FAIL; +diff --git a/sha3/KeccakHash.h b/sha3/KeccakHash.h +index 99347d6..1ba03a0 100644 +--- a/sha3/KeccakHash.h ++++ b/sha3/KeccakHash.h +@@ -51,31 +51,31 @@ typedef struct { + * @pre One must have r+c=1600 and the rate a multiple of 8 bits in this implementation. + * @return SUCCESS if successful, FAIL otherwise. + */ +-HashReturn Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix); ++HashReturn oqs_sig_picnic_Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix); + + /** Macro to initialize a SHAKE128 instance as specified in the FIPS 202 standard. + */ +-#define Keccak_HashInitialize_SHAKE128(hashInstance) Keccak_HashInitialize(hashInstance, 1344, 256, 0, 0x1F) ++#define Keccak_HashInitialize_SHAKE128(hashInstance) oqs_sig_picnic_Keccak_HashInitialize(hashInstance, 1344, 256, 0, 0x1F) + + /** Macro to initialize a SHAKE256 instance as specified in the FIPS 202 standard. + */ +-#define Keccak_HashInitialize_SHAKE256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 0, 0x1F) ++#define Keccak_HashInitialize_SHAKE256(hashInstance) oqs_sig_picnic_Keccak_HashInitialize(hashInstance, 1088, 512, 0, 0x1F) + + /** Macro to initialize a SHA3-224 instance as specified in the FIPS 202 standard. + */ +-#define Keccak_HashInitialize_SHA3_224(hashInstance) Keccak_HashInitialize(hashInstance, 1152, 448, 224, 0x06) ++#define Keccak_HashInitialize_SHA3_224(hashInstance) oqs_sig_picnic_Keccak_HashInitialize(hashInstance, 1152, 448, 224, 0x06) + + /** Macro to initialize a SHA3-256 instance as specified in the FIPS 202 standard. + */ +-#define Keccak_HashInitialize_SHA3_256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 256, 0x06) ++#define Keccak_HashInitialize_SHA3_256(hashInstance) oqs_sig_picnic_Keccak_HashInitialize(hashInstance, 1088, 512, 256, 0x06) + + /** Macro to initialize a SHA3-384 instance as specified in the FIPS 202 standard. + */ +-#define Keccak_HashInitialize_SHA3_384(hashInstance) Keccak_HashInitialize(hashInstance, 832, 768, 384, 0x06) ++#define Keccak_HashInitialize_SHA3_384(hashInstance) oqs_sig_picnic_Keccak_HashInitialize(hashInstance, 832, 768, 384, 0x06) + + /** Macro to initialize a SHA3-512 instance as specified in the FIPS 202 standard. + */ +-#define Keccak_HashInitialize_SHA3_512(hashInstance) Keccak_HashInitialize(hashInstance, 576, 1024, 512, 0x06) ++#define Keccak_HashInitialize_SHA3_512(hashInstance) oqs_sig_picnic_Keccak_HashInitialize(hashInstance, 576, 1024, 512, 0x06) + + /** + * Function to give input data to be absorbed. +@@ -87,7 +87,7 @@ HashReturn Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int + * @pre In the previous call to Keccak_HashUpdate(), databitlen was a multiple of 8. + * @return SUCCESS if successful, FAIL otherwise. + */ +-HashReturn Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequence *data, BitLength databitlen); ++HashReturn oqs_sig_picnic_Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequence *data, BitLength databitlen); + + /** + * Function to call after all input blocks have been input and to get +@@ -100,7 +100,7 @@ HashReturn Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequenc + * @param hashval Pointer to the buffer where to store the output data. + * @return SUCCESS if successful, FAIL otherwise. + */ +-HashReturn Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hashval); ++HashReturn oqs_sig_picnic_Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hashval); + + /** + * Function to squeeze output data. +@@ -111,7 +111,7 @@ HashReturn Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hash + * @pre @a databitlen is a multiple of 8. + * @return SUCCESS if successful, FAIL otherwise. + */ +-HashReturn Keccak_HashSqueeze(Keccak_HashInstance *hashInstance, BitSequence *data, BitLength databitlen); ++HashReturn oqs_sig_picnic_Keccak_HashSqueeze(Keccak_HashInstance *hashInstance, BitSequence *data, BitLength databitlen); + + #endif + +diff --git a/tests/bitstream_test.c b/tests/bitstream_test.c +index 620bcc6..7bc11e4 100644 +--- a/tests/bitstream_test.c ++++ b/tests/bitstream_test.c +@@ -16,12 +16,12 @@ static int simple_test(void) { + bitstream_t bsw; + bsw.buffer = buffer; + bsw.position = 0; +- bitstream_put_bits(&bsw, v, i); ++ oqs_sig_picnic_bitstream_put_bits(&bsw, v, i); + + bitstream_t bsr; + bsr.buffer = buffer; + bsr.position = 0; +- const uint64_t r = bitstream_get_bits(&bsr, i); ++ const uint64_t r = oqs_sig_picnic_bitstream_get_bits(&bsr, i); + if (r != v) { + printf("simple_test: expected %016" PRIx64 ", got %016" PRIx64 "\n", v, r); + ret = -1; +@@ -49,19 +49,19 @@ static int test_30(void) { + bitstream_t bsw; + bsw.buffer = buffer; + bsw.position = 0; +- bitstream_put_bits(&bsw, v, 30); ++ oqs_sig_picnic_bitstream_put_bits(&bsw, v, 30); + + bitstream_t bsw2; + bsw2.buffer = buffer2; + bsw2.position = 0; + for (unsigned int i = 0; i < 30; ++i) { +- bitstream_put_bits(&bsw2, v >> (30 - i - 1), 1); ++ oqs_sig_picnic_bitstream_put_bits(&bsw2, v >> (30 - i - 1), 1); + } + + bitstream_t bsr; + bsr.buffer = buffer; + bsr.position = 0; +- uint64_t r = bitstream_get_bits(&bsr, 30); ++ uint64_t r = oqs_sig_picnic_bitstream_get_bits(&bsr, 30); + if (r != v) { + printf("test_30: expected %016" PRIx64 ", got %016" PRIx64 "\n", v, r); + ret = -1; +@@ -71,7 +71,7 @@ static int test_30(void) { + bsr2.buffer = buffer2; + bsr2.position = 0; + for (unsigned int i = 0; i < 30; ++i) { +- r = bitstream_get_bits(&bsr2, 1); ++ r = oqs_sig_picnic_bitstream_get_bits(&bsr2, 1); + const uint64_t e = (v >> (30 - i - 1)) & 0x1; + if (e != r) { + printf("test_30: expected2 %016" PRIx64 ", got %016" PRIx64 "\n", e, r); +@@ -105,18 +105,18 @@ static int test_multiple_30(void) { + bitstream_t bsw; + bsw.buffer = buffer; + bsw.position = 0; +- bitstream_put_bits(&bsw, v, 30); +- bitstream_put_bits(&bsw, v2, 30); ++ oqs_sig_picnic_bitstream_put_bits(&bsw, v, 30); ++ oqs_sig_picnic_bitstream_put_bits(&bsw, v2, 30); + + bitstream_t bsr; + bsr.buffer = buffer; + bsr.position = 0; +- uint64_t r = bitstream_get_bits(&bsr, 30); ++ uint64_t r = oqs_sig_picnic_bitstream_get_bits(&bsr, 30); + if (r != v) { + printf("test_multiple_30: expected %016" PRIx64 ", got %016" PRIx64 "\n", v, r); + ret = -1; + } +- r = bitstream_get_bits(&bsr, 30); ++ r = oqs_sig_picnic_bitstream_get_bits(&bsr, 30); + if (r != v2) { + printf("test_multiple_30: expected %016" PRIx64 ", got %016" PRIx64 "\n", v2, r); + ret = -1; +diff --git a/tests/lowmc_test.c b/tests/lowmc_test.c +index 753fe8a..0288c5d 100644 +--- a/tests/lowmc_test.c ++++ b/tests/lowmc_test.c +@@ -13,7 +13,7 @@ + + static int lowmc_enc_str(const picnic_params_t param, const char* key, const char* plaintext, + const char* expected) { +- picnic_instance_t* pp = get_instance(param); ++ picnic_instance_t* pp = oqs_sig_picnic_get_instance(param); + if (!pp) { + return -1; + } +@@ -27,21 +27,21 @@ static int lowmc_enc_str(const picnic_params_t param, const char* key, const cha + mzd_local_t* ctl = mzd_convert(ct); + + int ret = 0; +- mzd_local_t* ctr = lowmc_call(&pp->lowmc, skl, ptl); ++ mzd_local_t* ctr = oqs_sig_picnic_lowmc_call(&pp->lowmc, skl, ptl); + if (!ctr) { + ret = 1; + goto end; + } + +- if (!mzd_local_equal(ctr, ctl)) { ++ if (!oqs_sig_picnic_mzd_local_equal(ctr, ctl)) { + ret = 2; + } + + end: +- mzd_local_free(ctr); +- mzd_local_free(ctl); +- mzd_local_free(ptl); +- mzd_local_free(skl); ++ oqs_sig_picnic_mzd_local_free(ctr); ++ oqs_sig_picnic_mzd_local_free(ctl); ++ oqs_sig_picnic_mzd_local_free(ptl); ++ oqs_sig_picnic_mzd_local_free(skl); + mzd_free(ct); + mzd_free(pt); + mzd_free(sk); +@@ -51,35 +51,35 @@ end: + + static int lowmc_enc(const picnic_params_t param, const uint8_t* key, const uint8_t* plaintext, + const uint8_t* expected) { +- picnic_instance_t* pp = get_instance(param); ++ picnic_instance_t* pp = oqs_sig_picnic_get_instance(param); + if (!pp) { + return -1; + } + +- mzd_local_t* sk = mzd_local_init(1, pp->lowmc.k); +- mzd_local_t* pt = mzd_local_init(1, pp->lowmc.n); +- mzd_local_t* ct = mzd_local_init(1, pp->lowmc.n); ++ mzd_local_t* sk = oqs_sig_picnic_mzd_local_init(1, pp->lowmc.k); ++ mzd_local_t* pt = oqs_sig_picnic_mzd_local_init(1, pp->lowmc.n); ++ mzd_local_t* ct = oqs_sig_picnic_mzd_local_init(1, pp->lowmc.n); + +- mzd_from_char_array(sk, key, pp->input_size); +- mzd_from_char_array(pt, plaintext, pp->output_size); +- mzd_from_char_array(ct, expected, pp->output_size); ++ oqs_sig_picnic_mzd_from_char_array(sk, key, pp->input_size); ++ oqs_sig_picnic_mzd_from_char_array(pt, plaintext, pp->output_size); ++ oqs_sig_picnic_mzd_from_char_array(ct, expected, pp->output_size); + + int ret = 0; +- mzd_local_t* ctr = lowmc_call(&pp->lowmc, sk, pt); ++ mzd_local_t* ctr = oqs_sig_picnic_lowmc_call(&pp->lowmc, sk, pt); + if (!ctr) { + ret = 1; + goto end; + } + +- if (!mzd_local_equal(ctr, ct)) { ++ if (!oqs_sig_picnic_mzd_local_equal(ctr, ct)) { + ret = 2; + } + + end: +- mzd_local_free(ctr); +- mzd_local_free(ct); +- mzd_local_free(pt); +- mzd_local_free(sk); ++ oqs_sig_picnic_mzd_local_free(ctr); ++ oqs_sig_picnic_mzd_local_free(ct); ++ oqs_sig_picnic_mzd_local_free(pt); ++ oqs_sig_picnic_mzd_local_free(sk); + + return ret; + } +diff --git a/tests/mpc_test.c b/tests/mpc_test.c +index 57d5fda..de06da3 100644 +--- a/tests/mpc_test.c ++++ b/tests/mpc_test.c +@@ -11,34 +11,34 @@ + + static mzd_local_t** mpc_init_empty_share_vector(uint32_t n, unsigned sc) { + mzd_local_t** s = malloc(sc * sizeof(mzd_local_t*)); +- mzd_local_init_multiple(s, sc, 1, n); ++ oqs_sig_picnic_mzd_local_init_multiple(s, sc, 1, n); + return s; + } + + static mzd_local_t* mpc_reconstruct_from_share(mzd_local_t* dst, mzd_local_t** shared_vec) { + if (!dst) { +- dst = mzd_local_init_ex(shared_vec[0]->nrows, shared_vec[0]->ncols, false); ++ dst = oqs_sig_picnic_mzd_local_init_ex(shared_vec[0]->nrows, shared_vec[0]->ncols, false); + } + +- mzd_xor(dst, shared_vec[0], shared_vec[1]); +- return mzd_xor(dst, dst, shared_vec[2]); ++ oqs_sig_picnic_mzd_xor(dst, shared_vec[0], shared_vec[1]); ++ return oqs_sig_picnic_mzd_xor(dst, dst, shared_vec[2]); + } + + static mzd_local_t* mzd_init_random_vector(rci_t n) { +- mzd_local_t* a = mzd_local_init(1, n); ++ mzd_local_t* a = oqs_sig_picnic_mzd_local_init(1, n); + mzd_randomize_ssl(a); + return a; + } + + static mzd_local_t** mpc_init_share_vector(mzd_local_t const* v) { + mzd_local_t** s = malloc(3 * sizeof(mzd_local_t*)); +- mzd_local_init_multiple_ex(s, 3, 1, v->ncols, false); ++ oqs_sig_picnic_mzd_local_init_multiple_ex(s, 3, 1, v->ncols, false); + + mzd_randomize_ssl(s[0]); + mzd_randomize_ssl(s[1]); + +- mzd_xor(s[2], s[0], s[1]); +- mzd_xor(s[2], s[2], v); ++ oqs_sig_picnic_mzd_xor(s[2], s[0], s[1]); ++ oqs_sig_picnic_mzd_xor(s[2], s[2], v); + + return s; + } +@@ -48,37 +48,37 @@ static void test_mpc_share(void) { + mzd_local_t** s1 = mpc_init_share_vector(t1); + mzd_local_t* t1cmb = mpc_reconstruct_from_share(NULL, s1); + +- if (mzd_local_equal(t1, t1cmb)) ++ if (oqs_sig_picnic_mzd_local_equal(t1, t1cmb)) + printf("Share test successful.\n"); + +- mzd_local_free(t1); +- mzd_local_free_multiple(s1); +- mzd_local_free(t1cmb); ++ oqs_sig_picnic_mzd_local_free(t1); ++ oqs_sig_picnic_mzd_local_free_multiple(s1); ++ oqs_sig_picnic_mzd_local_free(t1cmb); + } + + static void test_mpc_add(void) { + mzd_local_t* t1 = mzd_init_random_vector(10); + mzd_local_t* t2 = mzd_init_random_vector(10); +- mzd_local_t* res = mzd_local_init(1, 10); +- mzd_xor(res, t1, t2); ++ mzd_local_t* res = oqs_sig_picnic_mzd_local_init(1, 10); ++ oqs_sig_picnic_mzd_xor(res, t1, t2); + + mzd_local_t** s1 = mpc_init_share_vector(t1); + mzd_local_t** s2 = mpc_init_share_vector(t2); + mzd_local_t** ress = mpc_init_empty_share_vector(10, 3); +- mpc_xor(ress, s1, s2, 3); ++ oqs_sig_picnic_mpc_xor(ress, s1, s2, 3); + + mzd_local_t* cmp = mpc_reconstruct_from_share(NULL, ress); + +- if (mzd_local_equal(res, cmp)) ++ if (oqs_sig_picnic_mzd_local_equal(res, cmp)) + printf("Shared add test successful.\n"); + +- mzd_local_free(t1); +- mzd_local_free(t2); +- mzd_local_free(res); +- mzd_local_free_multiple(s1); +- mzd_local_free_multiple(s2); +- mzd_local_free_multiple(ress); +- mzd_local_free(cmp); ++ oqs_sig_picnic_mzd_local_free(t1); ++ oqs_sig_picnic_mzd_local_free(t2); ++ oqs_sig_picnic_mzd_local_free(res); ++ oqs_sig_picnic_mzd_local_free_multiple(s1); ++ oqs_sig_picnic_mzd_local_free_multiple(s2); ++ oqs_sig_picnic_mzd_local_free_multiple(ress); ++ oqs_sig_picnic_mzd_local_free(cmp); + } + + void run_tests(void) { +diff --git a/tests/mzd_test.c b/tests/mzd_test.c +index e1243d2..264edde 100644 +--- a/tests/mzd_test.c ++++ b/tests/mzd_test.c +@@ -5,21 +5,21 @@ + + static void test_mzd_local_equal(void) { + for (unsigned int i = 0; i < 10; ++i) { +- mzd_local_t* a = mzd_local_init(1, (i + 1) * 64); ++ mzd_local_t* a = oqs_sig_picnic_mzd_local_init(1, (i + 1) * 64); + mzd_randomize_ssl(a); +- mzd_local_t* b = mzd_local_copy(NULL, a); ++ mzd_local_t* b = oqs_sig_picnic_mzd_local_copy(NULL, a); + +- if (mzd_local_equal(a, b)) { ++ if (oqs_sig_picnic_mzd_local_equal(a, b)) { + printf("equal: ok [%u]\n", (i + 1) * 64); + } + +- b = mzd_xor(b, b, a); +- if (mzd_local_equal(a, b)) { ++ b = oqs_sig_picnic_mzd_xor(b, b, a); ++ if (oqs_sig_picnic_mzd_local_equal(a, b)) { + printf("equal: ok [%u]\n", (i + 1) * 64); + } + +- mzd_local_free(a); +- mzd_local_free(b); ++ oqs_sig_picnic_mzd_local_free(a); ++ oqs_sig_picnic_mzd_local_free(b); + } + } + +@@ -44,27 +44,27 @@ static int test_mzd_mul_avx(void) { + for (unsigned int k = 0; k < 3; ++k) { + + mzd_t* r = mzd_mul_naive(c, v, A); +- mzd_local_t* rl = mzd_mul_v_avx(c2, vl, Al); ++ mzd_local_t* rl = oqs_sig_picnic_mzd_mul_v_avx(c2, vl, Al); + + mzd_local_t* rc = mzd_convert(r); + +- if (!mzd_local_equal(rc, rl)) { ++ if (!oqs_sig_picnic_mzd_local_equal(rc, rl)) { + printf("mul: fail [%u x %u]\n", size, size); + ret = -1; + } else { + printf("mul: ok [%u x %u]\n", size, size); + } + +- mzd_local_free(rc); ++ oqs_sig_picnic_mzd_local_free(rc); + } + + mzd_free(A); + mzd_free(v); + mzd_free(c); + +- mzd_local_free(c2); +- mzd_local_free(Al); +- mzd_local_free(vl); ++ oqs_sig_picnic_mzd_local_free(c2); ++ oqs_sig_picnic_mzd_local_free(Al); ++ oqs_sig_picnic_mzd_local_free(vl); + #endif + + return ret; +@@ -82,18 +82,18 @@ static void test_mzd_mul_vl_neon_192(void) { + mzd_randomize(v); + mzd_randomize(c); + +- mzd_local_t* Al = mzd_local_copy(NULL, A); +- mzd_local_t* All = mzd_precompute_matrix_lookup(Al); +- mzd_local_t* vl = mzd_local_copy(NULL, v); ++ mzd_local_t* Al = oqs_sig_picnic_mzd_local_copy(NULL, A); ++ mzd_local_t* All = oqs_sig_picnic_mzd_precompute_matrix_lookup(Al); ++ mzd_local_t* vl = oqs_sig_picnic_mzd_local_copy(NULL, v); + +- mzd_local_t* c2 = mzd_local_copy(NULL, c); ++ mzd_local_t* c2 = oqs_sig_picnic_mzd_local_copy(NULL, c); + + for (unsigned int k = 0; k < 3; ++k) { + + mzd_local_t* r = mzd_mul_naive(c, v, A); + mzd_local_t* rl = mzd_mul_vl_neon_multiple_of_128(c2, vl, All); + +- if (!mzd_local_equal(r, rl)) { ++ if (!oqs_sig_picnic_mzd_local_equal(r, rl)) { + printf("mul: fail [%u x %u]\n", size, size); + printf("r = "); + mzd_print(r); +@@ -108,9 +108,9 @@ static void test_mzd_mul_vl_neon_192(void) { + mzd_free(v); + mzd_free(c); + +- mzd_local_free(c2); +- mzd_local_free(Al); +- mzd_local_free(vl); ++ oqs_sig_picnic_mzd_local_free(c2); ++ oqs_sig_picnic_mzd_local_free(Al); ++ oqs_sig_picnic_mzd_local_free(vl); + } + + static void test_mzd_mul_vl_neon_256(void) { +@@ -124,18 +124,18 @@ static void test_mzd_mul_vl_neon_256(void) { + mzd_randomize(v); + mzd_randomize(c); + +- mzd_local_t* Al = mzd_local_copy(NULL, A); +- mzd_local_t* All = mzd_precompute_matrix_lookup(Al); +- mzd_local_t* vl = mzd_local_copy(NULL, v); ++ mzd_local_t* Al = oqs_sig_picnic_mzd_local_copy(NULL, A); ++ mzd_local_t* All = oqs_sig_picnic_mzd_precompute_matrix_lookup(Al); ++ mzd_local_t* vl = oqs_sig_picnic_mzd_local_copy(NULL, v); + +- mzd_local_t* c2 = mzd_local_copy(NULL, c); ++ mzd_local_t* c2 = oqs_sig_picnic_mzd_local_copy(NULL, c); + + for (unsigned int k = 0; k < 3; ++k) { + + mzd_local_t* r = mzd_mul_naive(c, v, A); + mzd_local_t* rl = mzd_mul_vl_neon_multiple_of_128(c2, vl, All); + +- if (!mzd_local_equal(r, rl)) { ++ if (!oqs_sig_picnic_mzd_local_equal(r, rl)) { + printf("mul: fail [%u x %u]\n", size, size); + printf("r = "); + mzd_print(r); +@@ -150,9 +150,9 @@ static void test_mzd_mul_vl_neon_256(void) { + mzd_free(v); + mzd_free(c); + +- mzd_local_free(c2); +- mzd_local_free(Al); +- mzd_local_free(vl); ++ oqs_sig_picnic_mzd_local_free(c2); ++ oqs_sig_picnic_mzd_local_free(Al); ++ oqs_sig_picnic_mzd_local_free(vl); + } + + static void test_mzd_addmul_vl_neon_192(void) { +@@ -166,19 +166,19 @@ static void test_mzd_addmul_vl_neon_192(void) { + mzd_randomize(v); + mzd_randomize(c); + +- mzd_local_t* Al = mzd_local_copy(NULL, A); +- mzd_local_t* All = mzd_precompute_matrix_lookup(Al); +- mzd_local_t* vl = mzd_local_copy(NULL, v); ++ mzd_local_t* Al = oqs_sig_picnic_mzd_local_copy(NULL, A); ++ mzd_local_t* All = oqs_sig_picnic_mzd_precompute_matrix_lookup(Al); ++ mzd_local_t* vl = oqs_sig_picnic_mzd_local_copy(NULL, v); + +- mzd_local_t* c2 = mzd_local_copy(NULL, c); +- mzd_local_t* c3 = mzd_local_copy(NULL, c); ++ mzd_local_t* c2 = oqs_sig_picnic_mzd_local_copy(NULL, c); ++ mzd_local_t* c3 = oqs_sig_picnic_mzd_local_copy(NULL, c); + + for (unsigned int k = 0; k < 3; ++k) { + + mzd_local_t* r = mzd_addmul_naive(c, v, A); + mzd_local_t* rl2 = mzd_addmul_vl_neon(c3, vl, All); + +- if (!mzd_local_equal(r, rl2)) { ++ if (!oqs_sig_picnic_mzd_local_equal(r, rl2)) { + printf("addmul2: fail [%u x %u]\n", size, size); + printf("r = "); + mzd_print(r); +@@ -193,9 +193,9 @@ static void test_mzd_addmul_vl_neon_192(void) { + mzd_free(v); + mzd_free(c); + +- mzd_local_free(c2); +- mzd_local_free(Al); +- mzd_local_free(vl); ++ oqs_sig_picnic_mzd_local_free(c2); ++ oqs_sig_picnic_mzd_local_free(Al); ++ oqs_sig_picnic_mzd_local_free(vl); + } + + static void test_mzd_addmul_vl_neon_256(void) { +@@ -209,19 +209,19 @@ static void test_mzd_addmul_vl_neon_256(void) { + mzd_randomize(v); + mzd_randomize(c); + +- mzd_local_t* Al = mzd_local_copy(NULL, A); +- mzd_local_t* All = mzd_precompute_matrix_lookup(Al); +- mzd_local_t* vl = mzd_local_copy(NULL, v); ++ mzd_local_t* Al = oqs_sig_picnic_mzd_local_copy(NULL, A); ++ mzd_local_t* All = oqs_sig_picnic_mzd_precompute_matrix_lookup(Al); ++ mzd_local_t* vl = oqs_sig_picnic_mzd_local_copy(NULL, v); + +- mzd_local_t* c2 = mzd_local_copy(NULL, c); +- mzd_local_t* c3 = mzd_local_copy(NULL, c); ++ mzd_local_t* c2 = oqs_sig_picnic_mzd_local_copy(NULL, c); ++ mzd_local_t* c3 = oqs_sig_picnic_mzd_local_copy(NULL, c); + + for (unsigned int k = 0; k < 3; ++k) { + + mzd_local_t* r = mzd_addmul_naive(c, v, A); + mzd_local_t* rl2 = mzd_addmul_vl_neon(c3, vl, All); + +- if (!mzd_local_equal(r, rl2)) { ++ if (!oqs_sig_picnic_mzd_local_equal(r, rl2)) { + printf("addmul2: fail [%u x %u]\n", size, size); + printf("r = "); + mzd_print(r); +@@ -236,9 +236,9 @@ static void test_mzd_addmul_vl_neon_256(void) { + mzd_free(v); + mzd_free(c); + +- mzd_local_free(c2); +- mzd_local_free(Al); +- mzd_local_free(vl); ++ oqs_sig_picnic_mzd_local_free(c2); ++ oqs_sig_picnic_mzd_local_free(Al); ++ oqs_sig_picnic_mzd_local_free(vl); + } + + #endif +@@ -255,7 +255,7 @@ static void test_mzd_mul(void) { + mzd_randomize(c); + + mzd_local_t* Al = mzd_convert(A); +- mzd_local_t* All = mzd_precompute_matrix_lookup(Al); ++ mzd_local_t* All = oqs_sig_picnic_mzd_precompute_matrix_lookup(Al); + mzd_local_t* vl = mzd_convert(v); + mzd_local_t* cl = mzd_convert(c); + mzd_local_t* cll = mzd_convert(c); +@@ -266,20 +266,20 @@ static void test_mzd_mul(void) { + mzd_t* c3 = mzd_transpose(NULL, c); + + for (unsigned int k = 0; k < 3; ++k) { +- mzd_local_t* r = mzd_mul_v(cl, vl, Al); +- mzd_local_t* rl = mzd_mul_vl(cll, vl, All); ++ mzd_local_t* r = oqs_sig_picnic_mzd_mul_v(cl, vl, Al); ++ mzd_local_t* rl = oqs_sig_picnic_mzd_mul_vl(cll, vl, All); + mzd_t* r2 = mzd_mul(c2, v, A, __M4RI_STRASSEN_MUL_CUTOFF); + mzd_t* r3 = mzd_mul(c3, At, vt, __M4RI_STRASSEN_MUL_CUTOFF); + +- if (!mzd_local_equal(r, rl)) { ++ if (!oqs_sig_picnic_mzd_local_equal(r, rl)) { + printf("mul: fail [%u x %u]\n", i * 64, j * 64); + } + + mzd_local_t* rc = mzd_convert(r2); +- if (!mzd_local_equal(r, rc)) { ++ if (!oqs_sig_picnic_mzd_local_equal(r, rc)) { + printf("mul: fail [%u x %u]\n", i * 64, j * 64); + } +- mzd_local_free(rc); ++ oqs_sig_picnic_mzd_local_free(rc); + + mzd_t* r4 = mzd_transpose(NULL, r3); + if (mzd_cmp(r4, r2) != 0) { +@@ -297,11 +297,11 @@ static void test_mzd_mul(void) { + mzd_free(c2); + mzd_free(c3); + +- mzd_local_free(All); +- mzd_local_free(Al); +- mzd_local_free(cll); +- mzd_local_free(cl); +- mzd_local_free(vl); ++ oqs_sig_picnic_mzd_local_free(All); ++ oqs_sig_picnic_mzd_local_free(Al); ++ oqs_sig_picnic_mzd_local_free(cll); ++ oqs_sig_picnic_mzd_local_free(cl); ++ oqs_sig_picnic_mzd_local_free(vl); + } + } + } +@@ -310,14 +310,14 @@ static void test_mzd_shift(void) { + #ifdef WITH_OPT + #ifdef WITH_SSE2 + if (CPU_SUPPORTS_SSE2) { +- mzd_local_t* v = mzd_local_init(1, 128); +- mzd_local_t* w = mzd_local_copy(NULL, v); +- mzd_local_t* r = mzd_local_copy(NULL, v); ++ mzd_local_t* v = oqs_sig_picnic_mzd_local_init(1, 128); ++ mzd_local_t* w = oqs_sig_picnic_mzd_local_copy(NULL, v); ++ mzd_local_t* r = oqs_sig_picnic_mzd_local_copy(NULL, v); + __m128i* wr = __builtin_assume_aligned(FIRST_ROW(w), 16); + + for (unsigned int i = 0; i < 32; ++i) { + mzd_randomize_ssl(v); +- mzd_local_copy(w, v); ++ oqs_sig_picnic_mzd_local_copy(w, v); + + mzd_shift_left(r, v, i); + *wr = mm128_shift_left(*wr, i); +@@ -329,7 +329,7 @@ static void test_mzd_shift(void) { + + for (unsigned int i = 0; i < 32; ++i) { + mzd_randomize_ssl(v); +- mzd_local_copy(w, v); ++ oqs_sig_picnic_mzd_local_copy(w, v); + + mzd_shift_right(r, v, i); + *wr = mm128_shift_right(*wr, i); +@@ -339,21 +339,21 @@ static void test_mzd_shift(void) { + } + } + +- mzd_local_free(w); +- mzd_local_free(v); +- mzd_local_free(r); ++ oqs_sig_picnic_mzd_local_free(w); ++ oqs_sig_picnic_mzd_local_free(v); ++ oqs_sig_picnic_mzd_local_free(r); + } + #endif + #ifdef WITH_AVX2 + if (CPU_SUPPORTS_AVX2) { +- mzd_local_t* v = mzd_local_init(1, 256); +- mzd_local_t* w = mzd_local_copy(NULL, v); +- mzd_local_t* r = mzd_local_copy(NULL, v); ++ mzd_local_t* v = oqs_sig_picnic_mzd_local_init(1, 256); ++ mzd_local_t* w = oqs_sig_picnic_mzd_local_copy(NULL, v); ++ mzd_local_t* r = oqs_sig_picnic_mzd_local_copy(NULL, v); + __m256i* wr = __builtin_assume_aligned(FIRST_ROW(w), 32); + + for (unsigned int i = 0; i < 32; ++i) { + mzd_randomize_ssl(v); +- mzd_local_copy(w, v); ++ oqs_sig_picnic_mzd_local_copy(w, v); + + mzd_shift_left(r, v, i); + *wr = mm256_shift_left(*wr, i); +@@ -365,7 +365,7 @@ static void test_mzd_shift(void) { + + for (unsigned int i = 0; i < 32; ++i) { + mzd_randomize_ssl(v); +- mzd_local_copy(w, v); ++ oqs_sig_picnic_mzd_local_copy(w, v); + + mzd_shift_right(r, v, i); + mm512_shift_right_avx(wr, wr, i); +@@ -375,21 +375,21 @@ static void test_mzd_shift(void) { + } + } + +- mzd_local_free(w); +- mzd_local_free(v); +- mzd_local_free(r); ++ oqs_sig_picnic_mzd_local_free(w); ++ oqs_sig_picnic_mzd_local_free(v); ++ oqs_sig_picnic_mzd_local_free(r); + } + #endif + #ifdef WITH_NEON + if (CPU_SUPPORTS_NEON) { +- mzd_local_t* v = mzd_local_init(1, 384); +- mzd_local_t* w = mzd_local_copy(NULL, v); +- mzd_local_t* r = mzd_local_copy(NULL, v); ++ mzd_local_t* v = oqs_sig_picnic_mzd_local_init(1, 384); ++ mzd_local_t* w = oqs_sig_picnic_mzd_local_copy(NULL, v); ++ mzd_local_t* r = oqs_sig_picnic_mzd_local_copy(NULL, v); + uint32x4_t* wr = __builtin_assume_aligned(FIRST_ROW(w), alignof(uint32x4_t)); + + for (unsigned int i = 0; i < 32; ++i) { + mzd_randomize_ssl(v); +- mzd_local_copy(w, v); ++ oqs_sig_picnic_mzd_local_copy(w, v); + + mzd_shift_left(r, v, i); + mm384_shift_left(wr, wr, i); +@@ -406,7 +406,7 @@ static void test_mzd_shift(void) { + + for (unsigned int i = 0; i < 32; ++i) { + mzd_randomize_ssl(v); +- mzd_local_copy(w, v); ++ oqs_sig_picnic_mzd_local_copy(w, v); + + mzd_shift_right(r, v, i); + mm384_shift_right(wr, wr, i); +@@ -421,9 +421,9 @@ static void test_mzd_shift(void) { + } + } + +- mzd_local_free(w); +- mzd_local_free(v); +- mzd_local_free(r); ++ oqs_sig_picnic_mzd_local_free(w); ++ oqs_sig_picnic_mzd_local_free(v); ++ oqs_sig_picnic_mzd_local_free(r); + } + #endif + #endif +diff --git a/tests/utils.c.i b/tests/utils.c.i +index 558180a..69b18af 100644 +--- a/tests/utils.c.i ++++ b/tests/utils.c.i +@@ -8,7 +8,7 @@ void mzd_randomize_ssl(mzd_local_t* val) { + } + + mzd_local_t* mzd_convert(const mzd_t* v) { +- mzd_local_t* r = mzd_local_init(v->nrows, v->ncols); ++ mzd_local_t* r = oqs_sig_picnic_mzd_local_init(v->nrows, v->ncols); + + for (rci_t i = 0; i < v->nrows; ++i) { + memcpy(ROW(r, i), v->rows[i], v->width * sizeof(word)); diff --git a/crypt/liboqs/sig_picnic/sig_picnic.c b/crypt/liboqs/sig_picnic/sig_picnic.c new file mode 100644 index 0000000000000000000000000000000000000000..3c7be8b7297cef2bccda0bafe4d4cecf1bfb61d2 --- /dev/null +++ b/crypt/liboqs/sig_picnic/sig_picnic.c @@ -0,0 +1,174 @@ +#ifdef ENABLE_SIG_PICNIC +#if defined(WINDOWS) +#define UNUSED +#else +#define UNUSED __attribute__((unused)) +#endif + +#include <string.h> +#include <oqs/common.h> +#include <oqs/sig.h> +#include <oqs/rand.h> +#include "sig_picnic.h" +#include "picnic.h" + +static char *Picnic_L1_FS_name = "Picnic_L1_FS"; +static char *Picnic_L1_UR_name = "Picnic_L1_UR"; +static char *Picnic_L3_FS_name = "Picnic_L3_FS"; +static char *Picnic_L3_UR_name = "Picnic_L3_UR"; +static char *Picnic_L5_FS_name = "Picnic_L5_FS"; +static char *Picnic_L5_UR_name = "Picnic_L5_UR"; +static size_t PRIV_KEY_LEN[] = { + 0, + PICNIC_PRIVATE_KEY_SIZE(Picnic_L1_FS), + PICNIC_PRIVATE_KEY_SIZE(Picnic_L1_UR), + PICNIC_PRIVATE_KEY_SIZE(Picnic_L3_FS), + PICNIC_PRIVATE_KEY_SIZE(Picnic_L3_UR), + PICNIC_PRIVATE_KEY_SIZE(Picnic_L5_FS), + PICNIC_PRIVATE_KEY_SIZE(Picnic_L5_UR)}; +static size_t PUB_KEY_LEN[] = { + 0, + PICNIC_PUBLIC_KEY_SIZE(Picnic_L1_FS), + PICNIC_PUBLIC_KEY_SIZE(Picnic_L1_UR), + PICNIC_PUBLIC_KEY_SIZE(Picnic_L3_FS), + PICNIC_PUBLIC_KEY_SIZE(Picnic_L3_UR), + PICNIC_PUBLIC_KEY_SIZE(Picnic_L5_FS), + PICNIC_PUBLIC_KEY_SIZE(Picnic_L5_UR)}; +static size_t SIG_LEN[] = { + 0, + PICNIC_SIGNATURE_SIZE_Picnic_L1_FS, + PICNIC_SIGNATURE_SIZE_Picnic_L1_UR, + PICNIC_SIGNATURE_SIZE_Picnic_L3_FS, + PICNIC_SIGNATURE_SIZE_Picnic_L3_UR, + PICNIC_SIGNATURE_SIZE_Picnic_L5_FS, + PICNIC_SIGNATURE_SIZE_Picnic_L5_UR}; + +typedef struct PICNIC_CTX { + picnic_params_t params; +} PICNIC_CTX; + +int OQS_SIG_picnic_get(OQS_SIG *s, enum OQS_SIG_algid algid) { + if (s == NULL) { + return OQS_ERROR; + } + + PICNIC_CTX *pctx = malloc(sizeof(PICNIC_CTX)); + if (pctx == NULL) { + return OQS_ERROR; + } + + // set the scheme-specific alg values + // NOTE: the key and sig len values use macros, so we can't + // parametrized with pctx->params to shorten the code. + switch (algid) { + case OQS_SIG_picnic_default: + case OQS_SIG_picnic_L1_FS: + pctx->params = Picnic_L1_FS; + s->method_name = Picnic_L1_FS_name; + s->estimated_classical_security = 128; + s->estimated_quantum_security = 64; + break; + case OQS_SIG_picnic_L1_UR: + pctx->params = Picnic_L1_UR; + s->method_name = Picnic_L1_UR_name; + s->estimated_classical_security = 128; + s->estimated_quantum_security = 64; + break; + case OQS_SIG_picnic_L3_FS: + pctx->params = Picnic_L3_FS; + s->method_name = Picnic_L3_FS_name; + s->estimated_classical_security = 192; + s->estimated_quantum_security = 96; + break; + case OQS_SIG_picnic_L3_UR: + pctx->params = Picnic_L3_UR; + s->method_name = Picnic_L3_UR_name; + s->estimated_classical_security = 192; + s->estimated_quantum_security = 96; + break; + case OQS_SIG_picnic_L5_FS: + pctx->params = Picnic_L5_FS; + s->method_name = Picnic_L5_FS_name; + s->estimated_classical_security = 256; + s->estimated_quantum_security = 128; + break; + case OQS_SIG_picnic_L5_UR: + pctx->params = Picnic_L5_UR; + s->method_name = Picnic_L5_UR_name; + s->estimated_classical_security = 256; + s->estimated_quantum_security = 128; + break; + default: + return OQS_ERROR; + } + // set the ctx, sizes, and API functions + s->ctx = pctx; + s->priv_key_len = PRIV_KEY_LEN[pctx->params] + PUB_KEY_LEN[pctx->params]; // priv key also contains pub key + s->pub_key_len = PUB_KEY_LEN[pctx->params]; + s->max_sig_len = SIG_LEN[pctx->params]; + s->keygen = &OQS_SIG_picnic_keygen; + s->sign = &OQS_SIG_picnic_sign; + s->verify = &OQS_SIG_picnic_verify; + + return OQS_SUCCESS; +} + +int OQS_SIG_picnic_keygen(const OQS_SIG *s, uint8_t *priv, uint8_t *pub) { + if (s == NULL || priv == NULL || pub == NULL) { + return OQS_ERROR; + } + picnic_publickey_t pk; + picnic_privatekey_t sk; + picnic_params_t parameters = ((PICNIC_CTX *) s->ctx)->params; + int ret = picnic_keygen(parameters, &pk, &sk); + if (ret != 0) { + return OQS_ERROR; + } + // serialize the public key + int pk_len = picnic_write_public_key(&pk, pub, PUB_KEY_LEN[parameters]); + if ((size_t) pk_len != PUB_KEY_LEN[parameters]) { + return OQS_ERROR; + } + + // serialize the private key + int sk_len = picnic_write_private_key(&sk, priv, PRIV_KEY_LEN[parameters]); + if ((size_t) sk_len != PRIV_KEY_LEN[parameters]) { + return OQS_ERROR; + } + // wipe the private key + OQS_MEM_cleanse(&sk, sizeof(picnic_privatekey_t)); + return OQS_SUCCESS; +} + +int OQS_SIG_picnic_sign(const OQS_SIG *s, const uint8_t *priv, const uint8_t *msg, const size_t msg_len, uint8_t *sig, size_t *sig_len) { + if (s == NULL || priv == NULL || msg == NULL || sig == NULL || sig_len == NULL) { + return OQS_ERROR; + } + picnic_privatekey_t sk; + picnic_params_t parameters = ((PICNIC_CTX *) s->ctx)->params; + // deserialize the private key + if (picnic_read_private_key(&sk, priv, PRIV_KEY_LEN[parameters]) != 0) { + return OQS_ERROR; + } + if (picnic_sign(&sk, msg, msg_len, sig, sig_len) != 0) { + return OQS_ERROR; + } + return OQS_SUCCESS; +} + +int OQS_SIG_picnic_verify(UNUSED const OQS_SIG *s, const uint8_t *pub, const uint8_t *msg, const size_t msg_len, const uint8_t *sig, const size_t sig_len) { + if (pub == NULL || msg == NULL || sig == NULL) { + return OQS_ERROR; + } + picnic_publickey_t pk; + // deserialize the public key + picnic_params_t parameters = ((PICNIC_CTX *) s->ctx)->params; + if (picnic_read_public_key(&pk, pub, PUB_KEY_LEN[parameters]) != 0) { + return OQS_ERROR; + } + if (picnic_verify(&pk, msg, msg_len, sig, sig_len) != 0) { + return OQS_ERROR; + } + return OQS_SUCCESS; +} +#endif diff --git a/crypt/liboqs/sig_picnic/sig_picnic.h b/crypt/liboqs/sig_picnic/sig_picnic.h new file mode 100644 index 0000000000000000000000000000000000000000..8ffa5fd2c0113fd6d6fb41e9d5841bc7869c9822 --- /dev/null +++ b/crypt/liboqs/sig_picnic/sig_picnic.h @@ -0,0 +1,20 @@ +/** + * \file sig_picnic.h + * \brief Header for the Microsoft Picnic library + */ +#ifndef __OQS_SIG_PICNIC_H +#define __OQS_SIG_PICNIC_H + +#ifdef ENABLE_SIG_PICNIC +#include <stddef.h> +#include <stdint.h> + +#include <oqs/sig.h> +#include <oqs/rand.h> + +int OQS_SIG_picnic_get(OQS_SIG *sig, enum OQS_SIG_algid algid); +int OQS_SIG_picnic_keygen(const OQS_SIG *s, uint8_t *priv, uint8_t *pub); +int OQS_SIG_picnic_sign(const OQS_SIG *s, const uint8_t *priv, const uint8_t *msg, const size_t msg_len, uint8_t *sig, size_t *sig_len); +int OQS_SIG_picnic_verify(const OQS_SIG *s, const uint8_t *pub, const uint8_t *msg, const size_t msg_len, const uint8_t *sig, const size_t sig_len); +#endif +#endif