diff --git a/client/CMakeLists.txt b/client/CMakeLists.txt
index ed60b07fa14e3afcf2383721813cfb0515467b63..92e4c3b1dd1edd31d8bff6704aa9320cd2e5b9ac 100644
--- a/client/CMakeLists.txt
+++ b/client/CMakeLists.txt
@@ -1,9 +1,9 @@
 cmake_minimum_required(VERSION 2.8)
 project (dapclient)
   
-set(CLIENT_SRCS dap_client_remote.c dap_client.c)
+set(CLIENT_SRCS dap_client.c dap_client_internal.c)
  
-add_library(${PROJECT_NAME} STATIC ${CORE_SRCS})
+add_library(${PROJECT_NAME} STATIC ${CLIENT_SRCS})
 
 set(${PROJECT_NAME}_DEFINITIONS CACHE INTERNAL "${PROJECT_NAME}: Definitions" FORCE)
 
diff --git a/client/dap_client.c b/client/dap_client.c
index 62dc62e0aabeab2ea971f1071690151d19bbe2cd..ab6eb6f2192eeb1742382b11d2138bdb4b11c559 100644
--- a/client/dap_client.c
+++ b/client/dap_client.c
@@ -1,167 +1,161 @@
+#include <stddef.h>
 #include "common.h"
 
-#include "sap_client.h"
-#include "sap_client_internal.h"
+#include "dap_client.h"
+#include "dap_client_internal.h"
 
-#define LOG_TAG "sap_client"
+#define LOG_TAG "dap_client"
 
 /**
- * @brief sap_client_init
+ * @brief dap_client_init
  * @return
  */
-int sap_client_init()
+int dap_client_init()
 {
-    log_it(L_INFO, "Init SAP client module");
+    log_it(L_INFO, "Init DAP client module");
     return 0;
 }
 
 /**
- * @brief sap_client_deinit
+ * @brief dap_client_deinit
  */
-void sap_client_deinit()
+void dap_client_deinit()
 {
-    log_it(L_INFO, "Deinit SAP client module");
+    log_it(L_INFO, "Deinit DAP client module");
 }
 
 /**
- * @brief sap_client_new
+ * @brief dap_client_new
  * @param a_stage_status_callback
  * @return
  */
-sap_client_t * sap_client_new(sap_client_callback_t a_stage_status_callback)
+dap_client_t * dap_client_new(dap_client_callback_t a_stage_status_callback)
 {
 
 }
 
 /**
- * @brief sap_client_delete
+ * @brief dap_client_delete
  * @param a_client
  */
-void sap_client_delete(sap_client_t * a_client)
+void dap_client_delete(dap_client_t * a_client)
 {
 
 }
 
 /**
- * @brief sap_client_go_stage
+ * @brief dap_client_go_stage
  * @param a_client
  * @param a_stage_end
  */
-void sap_client_go_stage(sap_client_t * a_client, sap_client_stage_t a_stage_end, sap_client_callback_t a_stage_end_callback)
+void dap_client_go_stage(dap_client_t * a_client, dap_client_stage_t a_stage_end,
+                         dap_client_callback_t a_stage_end_callback)
 {
 
 }
 
 /**
- * @brief sap_client_session_request
+ * @brief dap_client_session_request
  * @param a_client
  * @param a_path
  * @param a_request
  * @param a_request_size
  * @param a_response_proc
  */
-void sap_client_session_request(sap_client_t * a_client, const char * a_path, void * a_request, size_t a_request_size,
-                                sap_client_callback_t a_response_proc)
+void dap_client_session_request(dap_client_t * a_client, const char * a_path, void * a_request, size_t a_request_size,
+                                dap_client_callback_t a_response_proc)
 {
 
 }
 
 /**
- * @brief sap_client_set_uplink
+ * @brief dap_client_set_uplink
  * @param a_client
  * @param a_addr
  * @param a_port
  */
-void sap_client_set_uplink(sap_client_t * a_client,const char* a_addr, uint16_t a_port)
+void dap_client_set_uplink(dap_client_t * a_client,const char* a_addr, uint16_t a_port)
 {
 
 }
 
 /**
- * @brief sap_client_set_credentials
+ * @brief dap_client_set_credentials
  * @param a_client
  * @param a_user
  * @param a_password
  */
-void sap_client_set_credentials(sap_client_t * a_client,const char* a_user, const char * a_password)
+void dap_client_set_credentials(dap_client_t * a_client,const char* a_user, const char * a_password)
 {
 
 }
 
 
 /**
- * @brief sap_client_error_str
+ * @brief dap_client_error_str
  * @param a_client_error
  * @return
  */
-const char * sap_client_error_str(sap_client_error_t a_client_error)
+const char * dap_client_error_str(sap_client_error_t a_client_error)
 {
     switch(a_client_error){
-        case SAP_CLIENT_ERROR_ENC_NO_KEY: return "ENC_NO_KEY";
-        case SAP_CLIENT_ERROR_ENC_WRONG_KEY: return "ENC_WRONG_KEY";
-        case SAP_CLIENT_ERROR_AUTH_WRONG_COOKIE: return "AUTH_WRONG_COOKIE";
-        case SAP_CLIENT_ERROR_AUTH_WRONG_CREDENTIALS: return "AUTH_WRONG_CREDENTIALS";
-        case SAP_CLIENT_ERROR_NETWORK_CONNECTION_TIMEOUT: return "NETWORK_CONNECTION_TIMEOUT";
-        case SAP_CLIENT_ERROR_NETWORK_CONNECTION_REFUSE: return "NETWORK_CONNECTION_REFUSE";
-        case SAP_CLIENT_ERROR_NETWORK_DISCONNECTED: return "NETWORK_DISCONNECTED";
-        case SAP_CLIENT_ERROR_STREAM_RESPONSE_WRONG: return "STREAM_RESPONSE_WRONG";
-        case SAP_CLIENT_ERROR_STREAM_RESPONSE_TIMEOUT: return "STREAM_RESPONSE_TIMEOUT";
-        case SAP_CLIENT_ERROR_STREAM_FREEZED: return "STREAM_FREEZED";
-        case SAP_CLIENT_ERROR_LICENSE: return "LICENSE_ERROR";
+        case DAP_CLIENT_ERROR_ENC_NO_KEY: return "ENC_NO_KEY";
+        case DAP_CLIENT_ERROR_ENC_WRONG_KEY: return "ENC_WRONG_KEY";
+        case DAP_CLIENT_ERROR_AUTH_WRONG_COOKIE: return "AUTH_WRONG_COOKIE";
+        case DAP_CLIENT_ERROR_AUTH_WRONG_CREDENTIALS: return "AUTH_WRONG_CREDENTIALS";
+        case DAP_CLIENT_ERROR_NETWORK_CONNECTION_TIMEOUT: return "NETWORK_CONNECTION_TIMEOUT";
+        case DAP_CLIENT_ERROR_NETWORK_CONNECTION_REFUSE: return "NETWORK_CONNECTION_REFUSE";
+        case DAP_CLIENT_ERROR_NETWORK_DISCONNECTED: return "NETWORK_DISCONNECTED";
         default : return "UNDEFINED";
     }
 }
 
 /**
- * @brief sap_client_get_stage
+ * @brief dap_client_get_stage
  * @param a_client
  * @return
  */
-sap_client_stage_t sap_client_get_stage(sap_client_t * a_client)
+dap_client_stage_t dap_client_get_stage(dap_client_t * a_client)
 {
-    return SAP_CLIENT_INTERNAL(a_client)->stage;
+    return DAP_CLIENT_INTERNAL(a_client)->stage;
 }
 
 /**
- * @brief sap_client_get_stage_status_str
+ * @brief dap_client_get_stage_status_str
  * @param a_client
  * @return
  */
-const char * sap_client_get_stage_status_str(sap_client_t *a_client)
+const char * dap_client_get_stage_status_str(dap_client_t *a_client)
 {
-    switch(SAP_CLIENT_INTERNAL(a_client)->stage_status){
-        case SAP_CLIENT_STAGE_STATUS_NONE: return "NONE";
-        case SAP_CLIENT_STAGE_STATUS_IN_PROGRESS: return "IN_PROGRESS";
-        case SAP_CLIENT_STAGE_STATUS_ERROR: return "ERROR";
-        case SAP_CLIENT_STAGE_STATUS_DONE: return "DONE";
+    switch(DAP_CLIENT_INTERNAL(a_client)->stage_status){
+        case DAP_CLIENT_STAGE_STATUS_NONE: return "NONE";
+        case DAP_CLIENT_STAGE_STATUS_IN_PROGRESS: return "IN_PROGRESS";
+        case DAP_CLIENT_STAGE_STATUS_ERROR: return "ERROR";
+        case DAP_CLIENT_STAGE_STATUS_DONE: return "DONE";
         default: return "UNDEFINED";
     }
 }
 
 /**
- * @brief sap_client_get_stage_str
+ * @brief dap_client_get_stage_str
  * @param a_client
  * @return
  */
-const char * sap_client_get_stage_str(sap_client_t * a_client)
+const char * dap_client_get_stage_str(dap_client_t * a_client)
 {
-    switch(SAP_CLIENT_INTERNAL(a_client)->stage){
-        case SAP_CLIENT_STAGE_BEGIN: return "BEGIN";
-        case SAP_CLIENT_STAGE_ENC: return "ENC";
-        case SAP_CLIENT_STAGE_AUTH: return "AUTH";
-        case SAP_CLIENT_STAGE_STREAM_CTL: return "STREAM_CTL";
-        case SAP_CLIENT_STAGE_STREAM: return "STREAM";
-        case SAP_CLIENT_STAGE_NETCONF: return "NETCONF";
-        case SAP_CLIENT_STAGE_TUNNEL: return "TUNNEL";
+    switch(DAP_CLIENT_INTERNAL(a_client)->stage){
+        case DAP_CLIENT_STAGE_BEGIN: return "BEGIN";
+        case DAP_CLIENT_STAGE_ENC: return "ENC";
+        case DAP_CLIENT_STAGE_AUTH: return "AUTH";
         default: return "UNDEFINED";
     }
 }
 /**
- * @brief sap_client_get_stage_status
+ * @brief dap_client_get_stage_status
  * @param a_client
  * @return
  */
-sap_client_stage_status_t sap_client_get_stage_status(sap_client_t * a_client)
+dap_client_stage_status_t dap_client_get_stage_status(dap_client_t * a_client)
 {
-    return SAP_CLIENT_INTERNAL(a_client)->stage_status;
+    return DAP_CLIENT_INTERNAL(a_client)->stage_status;
 }
diff --git a/client/dap_client.h b/client/dap_client.h
index a09cbadeeef4abfea666a937a78fe268a060f89d..14206d30b027a0c2acc634184f9db3f55962f4f7 100644
--- a/client/dap_client.h
+++ b/client/dap_client.h
@@ -1,5 +1,5 @@
-#ifndef _SAP_CLIENT_H_
-#define _SAP_CLIENT_H_
+#ifndef _DAP_CLIENT_H_
+#define _DAP_CLIENT_H_
 #include <stdint.h>
 
 
@@ -24,6 +24,8 @@ typedef enum dap_client_error {
     DAP_CLIENT_ERROR_UNDEFINED = 0,
     DAP_CLIENT_ERROR_ENC_NO_KEY,
     DAP_CLIENT_ERROR_ENC_WRONG_KEY,
+    DAP_CLIENT_ERROR_AUTH_WRONG_COOKIE,
+    DAP_CLIENT_ERROR_AUTH_WRONG_CREDENTIALS,
     DAP_CLIENT_ERROR_NETWORK_CONNECTION_TIMEOUT,
     DAP_CLIENT_ERROR_NETWORK_CONNECTION_REFUSE,
     DAP_CLIENT_ERROR_NETWORK_DISCONNECTED,
@@ -53,6 +55,8 @@ void dap_client_delete(dap_client_t * a_client);
 void dap_client_set_uplink(dap_client_t * a_client,const char* a_addr, uint16_t a_port);
 void dap_client_go_stage(dap_client_t * a_client, dap_client_stage_t a_stage_end, dap_client_callback_t a_stage_end_callback);
 
+void dap_client_set_credentials(dap_client_t * a_client,const char* a_user, const char * a_password);
+
 void dap_client_enc_request(dap_client_t * a_client, const char * a_path, void * a_request, size_t a_request_size,
                                 dap_client_callback_t a_response_proc);
 
diff --git a/client/dap_client_internal.c b/client/dap_client_internal.c
new file mode 100644
index 0000000000000000000000000000000000000000..a9a836a9dfc8a716a5a7086db251d3f13592f2a2
--- /dev/null
+++ b/client/dap_client_internal.c
@@ -0,0 +1 @@
+#include "dap_client_internal.h"
diff --git a/client/dap_client_internal.h b/client/dap_client_internal.h
new file mode 100644
index 0000000000000000000000000000000000000000..95c2ced41451627e4956db00ec4d12a2f333878f
--- /dev/null
+++ b/client/dap_client_internal.h
@@ -0,0 +1,23 @@
+#ifndef _SAP_CLIENT_INTERNAL_H_
+#define _SAP_CLIENT_INTERNAL_H_
+
+#include "dap_client.h"
+
+typedef struct dap_client_remote dap_client_remote_t;
+typedef struct dap_enc_key dap_enc_key_t;
+
+typedef struct dap_client_remote_internal
+{
+    dap_client_t * client;
+    dap_client_remote_t * es;
+
+    dap_enc_key_t * session_key;
+
+    dap_client_stage_t stage;
+    dap_client_stage_status_t stage_status;
+
+    dap_client_callback_t stage_status_callback;
+} dap_client_internal_t;
+
+#define DAP_CLIENT_INTERNAL(a) ((dap_client_internal_t*) a->_internal )
+#endif
diff --git a/client/dap_client_remote.c b/client/dap_client_remote.c
index d2f9d4aefae5b4c53682c575a832f71e84929afc..d730d16407adcf99d3f821a638ea0b88014a0635 100644
--- a/client/dap_client_remote.c
+++ b/client/dap_client_remote.c
@@ -25,7 +25,7 @@
 #include <unistd.h>
 #include <string.h>
 #include "common.h"
-#include "dap_server.h"
+#include "dap_loop.h"
 #include "dap_client.h"
 #include <ev.h>
 #define LOG_TAG "client"
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index b4ce8067765dbfb4775e1a1a5a9986d86ba9b1cf..ec9cd642cafbc6200ee205ab5050f28b20cd2b1b 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 2.8)
 project (dapcore)
   
-set(CORE_SRCS common.c dap_client_remote.c)
+set(CORE_SRCS dap_common.c )
  
 add_library(${PROJECT_NAME} STATIC ${CORE_SRCS})
 
diff --git a/core/common.c b/core/common.c
deleted file mode 100644
index dfa60d7da51bacdf7b8a1a0997f136f4472848fb..0000000000000000000000000000000000000000
--- a/core/common.c
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- Copyright (c) 2017-2018 (c) Project "DeM Labs Inc" https://github.com/demlabsinc
-  All rights reserved.
-
- This file is part of DAP (Deus Applications Prototypes) the open source project
-
-    DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Lesser General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    DAP is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public License
-    along with any DAP based project.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-
-#include <string.h>
-#include <stdarg.h>
-#include <pthread.h>
-#include <stdio.h>
-#include <syslog.h>
-#include <libconfig.h>
-#include <unistd.h>
-#include "common.h"
-#include "config.h"
-#define LAST_ERROR_MAX 255
-
-#define LOG_TAG "common"
-
-char last_error[LAST_ERROR_MAX]={0};
-enum log_level log_level=DEBUG;
-FILE * lf=NULL;
-
-int common_init()
-{
-    const char * fn = (my_config.log_file)? my_config.log_file : DEF_LOG ;
-    lf=fopen(fn, "a");
-    if(lf==NULL){
-        fprintf(stderr,"Can't open log file %s to append\n", fn);
-        lf=stdout;
-        return -1;
-    }
-
-	//printf("Common init\n");
-    //    lf=fopen("/dev/stdout","a");
-	//lf=stdout;
-	//strcpy(last_error,"undefined");
-    log_it(INFO,"Common modules init (%s)", fn);
-	return 0;
-}
-
-void common_deinit()
-{
-	if(lf) fclose(lf);
-}
-
-void _log_it(const char * log_tag,enum log_level ll, const char * format,...)
-{
-// branch predictor optimization
-#if defined(__GNUC__)||defined(__GNUG__)||defined(__clang__)
-        if (__builtin_expect(!lf,0))
-#else
-        if (!lf)
-#endif
-        common_init();
-
-    va_list ap,ap2;
-
-    static pthread_mutex_t mutex=PTHREAD_MUTEX_INITIALIZER;
-
-    if(ll<log_level)
-        return;
-
-    pthread_mutex_lock(&mutex);
-    time_t t=time(NULL);
-    struct tm* tmp=localtime(&t);
-    static char s_time[1024]={0};
-    strftime(s_time,sizeof(s_time),"%x-%X",tmp);
-        
-	va_start(ap,format);
-	va_copy(ap2,ap);
-        fprintf(lf,"[%s] ",s_time);
-        printf("[%s] ",s_time);
-	/*if(ll>=ERROR){
-		vsnprintf(last_error,LAST_ERROR_MAX,format,ap);
-	}*/
-	if(ll==DEBUG){
-		fprintf(lf,"[DBG] ");
-		printf(	"\x1b[37;2m[DBG] ");
-	}else if(ll==INFO){
-		fprintf(lf,"[   ] ");
-		printf("\x1b[32;2m[   ] ");
-	}else if(ll==NOTICE){
-		fprintf(lf,"[ * ] ");
-		printf("\x1b[32m[ * ] ");
-	}else if(ll==WARNING){
-		fprintf(lf,"[WRN] ");
-		printf("\x1b[31;2m[WRN] ");
-	}else if(ll==ERROR){
-		fprintf(lf,"[ERR] ");
-        printf("\x1b[31m[ERR] ");
-	}else if(ll==CRITICAL){
-		fprintf(lf,"[!!!] ");
-		printf("\x1b[1;5;31m[!!!] ");
-        }
-    fprintf(lf,"[%8s]\t",log_tag);
-    printf("[%8s]\t",log_tag);
-
-	vfprintf(lf,format,ap);
-	vprintf(format,ap2);
-	fprintf(lf,"\n");
-	printf("\x1b[0m\n");
-	va_end(ap);
-	va_end(ap2);
-        fflush(lf);
-	fflush(stdout);
-        pthread_mutex_unlock(&mutex);
-}
-
-const char * log_error()
-{
-	return last_error;
-}
-
-#define INT_DIGITS 19		/* enough for 64 bit integer */
-
-char *itoa(int i)
-{
-  /* Room for INT_DIGITS digits, - and '\0' */
-  static char buf[INT_DIGITS + 2];
-  char *p = buf + INT_DIGITS + 1;	/* points to terminating '\0' */
-  if (i >= 0) {
-    do {
-      *--p = '0' + (i % 10);
-      i /= 10;
-    } while (i != 0);
-    return p;
-  }
-  else {			/* i < 0 */
-    do {
-      *--p = '0' - (i % 10);
-      i /= 10;
-    } while (i != 0);
-    *--p = '-';
-  }
-  return p;
-}
-
-/**
- * @brief time_to_rfc822 Convert time_t to string with RFC822 formatted date and time
- * @param out Output buffer
- * @param out_size_mac Maximum size of output buffer
- * @param t UNIX time
- * @return Length of resulting string if ok or lesser than zero if not
- */
-int time_to_rfc822(char * out, size_t out_size_max, time_t t)
-{
-    struct tm *tmp;
-    tmp=localtime(&t);
-    if(tmp== NULL){
-        log_it(ERROR,"Can't convert data from unix fromat to structured one");
-        return -2;
-    }else{
-        int ret;
-        ret=strftime(out, out_size_max,"%a, %d %b %y %T %z",tmp);
-        //free(tmp);
-        if(ret>0){
-            return ret;
-        }else{
-            log_it(ERROR,"Can't print formatted time in string");
-            return -1;
-        }
-    }
-}
-
-/**
- * @brief get_select_breaker
- * @return
- */
-static int breaker_set[2] = { -1, -1 };
-static int initialized = 0;
-static struct timespec break_latency = { tv_sec: 0, tv_nsec: 1 * 1000 * 1000 };
-int get_select_breaker()
-{
-    if (!initialized)
-    {
-    if (pipe(breaker_set) < 0) return -1;
-    else initialized = 1;
-    }
-
-    return breaker_set[0];
-}
-
-int send_select_break()
-{
-    if (!initialized) return -1;
-    char buffer[1];
-    if (write(breaker_set[1], "\0", 1) <= 0) return -1;
-    nanosleep(&break_latency, NULL);
-    if (read(breaker_set[0], buffer, 1) <= 0 || buffer[0] != '\0') return -1;
-    return 0;
-}
-
-
-void hexdump(const void* data, size_t size)
-{
-    char ascii[17];
-    size_t i, j;
-    ascii[16] = '\0';
-    for (i = 0; i < size; ++i) {
-        printf("%02X ", ((unsigned char*)data)[i]);
-        if (((unsigned char*)data)[i] >= ' ' && ((unsigned char*)data)[i] <= '~') {
-            ascii[i % 16] = ((unsigned char*)data)[i];
-        } else {
-            ascii[i % 16] = '.';
-        }
-        if ((i+1) % 8 == 0 || i+1 == size) {
-            printf(" ");
-            if ((i+1) % 16 == 0) {
-                printf("|  %s \n", ascii);
-            } else if (i+1 == size) {
-                ascii[(i+1) % 16] = '\0';
-                if ((i+1) % 16 <= 8) {
-                    printf(" ");
-                }
-                for (j = (i+1) % 16; j < 16; ++j) {
-                    printf("   ");
-                }
-                printf("|  %s \n", ascii);
-            }
-        }
-    }
-}
-
-/**
-* @brief get_utc_date_time
-* @param buf_out ( not less 20 bytes )
-* @return example: 2017-08-12 13:28:36
-*/
-void get_utc_date_time(char buf_out[])
-{
-    struct tm *local;
-    time_t t = time(NULL);
-    local = gmtime(&t);
-    strftime(buf_out, 20, "%Y-%m-%d %H:%M:%S", local);
-}
diff --git a/core/common.h b/core/common.h
deleted file mode 100644
index 5fd34ca0092f3a9a259cfbdbc33726be44a67798..0000000000000000000000000000000000000000
--- a/core/common.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- Copyright (c) 2017-2018 (c) Project "DeM Labs Inc" https://github.com/demlabsinc
-  All rights reserved.
-
- This file is part of DAP (Deus Applications Prototypes) the open source project
-
-    DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Lesser General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    DAP is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public License
-    along with any DAP based project.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef COMMON_H
-#define COMMON_H
-#include <stdarg.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <time.h>
-
-#define CALLOC(a) ((a *) calloc(1,sizeof(a)))
-#define DUP(a) (__typeof(a) ret = memcpy(ret,a,sizeof(*a)) )
-
-#define DEF_LOG  "/opt/dapserver/log/dapserver.log"
-//#define DEF_LOG  "/opt/DAP/log/confcall_server.log"
-
-enum log_level{CRITICAL=5,ERROR=4, WARNING=3,NOTICE=2,INFO=1,DEBUG=0};
-extern enum log_level log_level;
-
-extern int common_init();
-extern void common_deinit();
-
-extern void _log_it(const char * log_tag, enum log_level, const char * format,...);
-#define log_it(_log_level,...) _log_it(LOG_TAG,_log_level,##__VA_ARGS__)
-
-extern const char * log_error();
-
-extern char *itoa(int i);
-extern int time_to_rfc822(char * out, size_t out_size_max, time_t t);
-
-extern void get_utc_date_time(char buf_out[]);
-extern void hexdump(const void* data, size_t size);
-extern int send_select_break();
-extern int get_select_breaker();
-#endif
diff --git a/core/dap_common.c b/core/dap_common.c
new file mode 100644
index 0000000000000000000000000000000000000000..8f6a9ad4d7761c99a4ac2aebc39c2f72010fd78c
--- /dev/null
+++ b/core/dap_common.c
@@ -0,0 +1,304 @@
+#ifdef SAP_OS_ANDROID
+#include <android/log.h>
+#endif
+
+#ifndef _MSC_VER
+#include <unistd.h> /* 'pipe', 'read', 'write' */
+#include <pthread.h>
+#include <syslog.h>
+#elif defined(_MSC_VER)
+#include <stdio.h>
+#include <stdlib.h>
+#include <windows.h>
+#include <process.h>
+typedef HANDLE pthread_mutex_t;
+#define popen _popen
+#define pclose _pclose
+#define PTHREAD_MUTEX_INITIALIZER 0
+int pthread_mutex_lock(HANDLE **obj)
+{
+    return (( *obj = (HANDLE) CreateMutex(0, 1, 0) ) == NULL) ? 0 : 1;
+}
+int pthread_mutex_unlock(HANDLE *obj) {
+    return (ReleaseMutex(obj) == 0) ? 0 : 1;
+}
+#endif
+#include <time.h> /* 'nanosleep' */
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include "dap_common.h"
+#define LAST_ERROR_MAX 255
+
+#define LOG_TAG "dap_common"
+
+char last_error[LAST_ERROR_MAX]={0};
+enum log_level log_level=L_DEBUG;
+static FILE * s_lf=NULL;
+
+int dap_common_init( const char * a_log_file )
+{
+    if ( a_log_file ) {
+        s_lf=fopen( a_log_file , "a");
+        if(s_lf==NULL){
+            fprintf(stderr,"Can't open log file %s to append\n", a_log_file);
+            s_lf=stdout;
+            return -1;
+        }
+    }
+
+	return 0;
+}
+
+void common_deinit()
+{
+    if(s_lf) fclose(s_lf);
+}
+
+void _log_it(const char * log_tag,enum log_level ll, const char * format,...)
+{
+    if(ll<log_level)
+        return;
+
+    va_list ap;
+
+
+        
+    va_start(ap,format);
+    _vlog_it(log_tag,ll, format,ap);
+    va_end(ap);
+}
+
+void _vlog_it(const char * log_tag,enum log_level ll, const char * format,va_list ap)
+{
+    va_list ap2;
+
+    static pthread_mutex_t mutex=PTHREAD_MUTEX_INITIALIZER;
+
+    pthread_mutex_lock(&mutex);
+#ifdef SAP_OS_ANDROID
+    char buf[4096];
+    vsnprintf(buf,sizeof(buf),format,ap);
+    switch (ll) {
+        case L_INFO:
+            __android_log_write(ANDROID_LOG_INFO,SAP_BRAND,buf);
+        break;
+        case L_WARNING:
+            __android_log_write(ANDROID_LOG_WARN,SAP_BRAND,buf);
+        break;
+        case L_ERROR:
+            __android_log_write(ANDROID_LOG_ERROR,SAP_BRAND,buf);
+        break;
+        case L_CRITICAL:
+            __android_log_write(ANDROID_LOG_FATAL,SAP_BRAND,buf);
+            abort();
+        break;
+        case L_DEBUG:
+        default:
+            __android_log_write(ANDROID_LOG_DEBUG,SAP_BRAND,buf);
+    }
+#endif
+    time_t t=time(NULL);
+    struct tm* tmp=localtime(&t);
+    static char s_time[1024]={0};
+    strftime(s_time,sizeof(s_time),"%x-%X",tmp);
+
+
+    va_copy(ap2,ap);
+    if (s_lf ) fprintf(s_lf,"[%s] ",s_time);
+    printf("[%s] ",s_time);
+	/*if(ll>=ERROR){
+		vsnprintf(last_error,LAST_ERROR_MAX,format,ap);
+	}*/
+
+    if(ll==L_DEBUG){
+        if (s_lf ) fprintf(s_lf,"[DBG] ");
+		printf(	"\x1b[37;2m[DBG] ");
+    }else if(ll==L_INFO){
+        if (s_lf ) fprintf(s_lf,"[   ] ");
+		printf("\x1b[32;2m[   ] ");
+    }else if(ll==L_NOTICE){
+        if (s_lf ) fprintf(s_lf,"[ * ] ");
+		printf("\x1b[32m[ * ] ");
+    }else if(ll==L_WARNING){
+        if (s_lf ) fprintf(s_lf,"[WRN] ");
+		printf("\x1b[31;2m[WRN] ");
+    }else if(ll==L_ERROR){
+        if (s_lf ) fprintf(s_lf,"[ERR] ");
+        printf("\x1b[31m[ERR] ");
+    }else if(ll==L_CRITICAL){
+        if (s_lf ) fprintf(s_lf,"[!!!] ");
+        printf("\x1b[1;5;31m[!!!] ");
+    }
+    if (s_lf ) fprintf(s_lf,"[%8s]\t",log_tag);
+    printf("[%8s]\t",log_tag);
+
+    if (s_lf ) vfprintf(s_lf,format,ap);
+	vprintf(format,ap2);
+    if (s_lf ) fprintf(s_lf,"\n");
+	printf("\x1b[0m\n");
+	va_end(ap2);
+    if (s_lf ) fflush(s_lf);
+	fflush(stdout);
+    pthread_mutex_unlock(&mutex);
+}
+
+const char * log_error()
+{
+	return last_error;
+}
+
+#define INT_DIGITS 19		/* enough for 64 bit integer */
+
+char *itoa(int i)
+{
+  /* Room for INT_DIGITS digits, - and '\0' */
+  static char buf[INT_DIGITS + 2];
+  char *p = buf + INT_DIGITS + 1;	/* points to terminating '\0' */
+  if (i >= 0) {
+    do {
+      *--p = '0' + (i % 10);
+      i /= 10;
+    } while (i != 0);
+    return p;
+  }
+  else {			/* i < 0 */
+    do {
+      *--p = '0' - (i % 10);
+      i /= 10;
+    } while (i != 0);
+    *--p = '-';
+  }
+  return p;
+}
+
+/**
+ * @brief time_to_rfc822 Convert time_t to string with RFC822 formatted date and time
+ * @param out Output buffer
+ * @param out_size_mac Maximum size of output buffer
+ * @param t UNIX time
+ * @return Length of resulting string if ok or lesser than zero if not
+ */
+int time_to_rfc822(char * out, size_t out_size_max, time_t t)
+{
+    struct tm *tmp;
+    tmp=localtime(&t);
+    if(tmp== NULL){
+        log_it(L_ERROR,"Can't convert data from unix fromat to structured one");
+        return -2;
+    }else{
+        int ret;
+        ret=strftime(out, out_size_max,"%a, %d %b %y %T %z",tmp);
+        //free(tmp);
+        if(ret>0){
+            return ret;
+        }else{
+            log_it(L_ERROR,"Can't print formatted time in string");
+            return -1;
+        }
+    }
+}
+
+
+
+static int breaker_set[2] = { -1, -1 };
+static int initialized = 0;
+static struct timespec break_latency = {0, 1 * 1000 * 1000 };
+#ifndef _MSC_VER
+int get_select_breaker()
+{
+    if (!initialized)
+    {
+    if (pipe(breaker_set) < 0) return -1;
+    else initialized = 1;
+    }
+
+    return breaker_set[0];
+}
+
+int send_select_break()
+{
+    if (!initialized) return -1;
+    char buffer[1];
+    if (write(breaker_set[1], "\0", 1) <= 0) return -1;
+    nanosleep(&break_latency, NULL);
+    if (read(breaker_set[0], buffer, 1) <= 0 || buffer[0] != '\0') return -1;
+    return 0;
+}
+#else
+char *strndup(const char *s, size_t n) {
+    char *p = memchr(s, '\0', n);
+    if (p != NULL)
+        n = p - s;
+    p = malloc(n + 1);
+    if (p != NULL) {
+        memcpy(p, s, n);
+        p[n] = '\0';
+    }
+    return p;
+}
+#endif
+
+#ifdef ANDROID1
+static u_long myNextRandom = 1;
+
+double atof(const char *nptr)
+{
+    return (strtod(nptr, NULL));
+}
+
+int rand(void)
+{
+    return (int)((myNextRandom = (1103515245 * myNextRandom) + 12345) % ((u_long)RAND_MAX + 1));
+}
+
+void srand(u_int seed)
+{
+    myNextRandom = seed;
+}
+
+#endif
+
+/**
+ * @brief exec_with_ret
+ * @param a_cmd
+ * @return
+ */
+char * exec_with_ret(const char * a_cmd)
+{
+    FILE * fp;
+    size_t buf_len = 0;
+    char buf[4096] = {0};
+    fp= popen(a_cmd, "r");
+    if (!fp) {
+        goto FIN;
+    }
+    memset(buf,0,sizeof(buf));
+    fgets(buf,sizeof(buf)-1,fp);
+    pclose(fp);
+    buf_len=strlen(buf);
+    if(buf[buf_len-1] =='\n')buf[buf_len-1] ='\0';
+FIN:
+    return strdup(buf);
+}
+
+char * exec_with_ret_multistring(const char * a_cmd)
+{
+    FILE * fp;
+    size_t buf_len = 0;
+    char buf[4096] = {0};
+    fp= popen(a_cmd, "r");
+    if (!fp) {
+        goto FIN;
+    }
+    memset(buf,0,sizeof(buf));
+    char retbuf[4096] = {0};
+    while(fgets(buf,sizeof(buf)-1,fp)) {
+        strcat(retbuf, buf);
+    }
+    pclose(fp);
+    buf_len=strlen(retbuf);
+    if(retbuf[buf_len-1] =='\n')retbuf[buf_len-1] ='\0';
+FIN:
+    return strdup(retbuf);
+}
diff --git a/core/dap_common.h b/core/dap_common.h
new file mode 100644
index 0000000000000000000000000000000000000000..023cdd7d8693dcecbf3f8c857ba2d094e5b80fb2
--- /dev/null
+++ b/core/dap_common.h
@@ -0,0 +1,46 @@
+#ifndef COMMON_H
+#define COMMON_H
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <time.h>
+
+#define MALLOC(a) ((a *) malloc(sizeof(a)))
+#define CALLOC(a) ((a *) calloc(1,sizeof(a)))
+#define DUP(a) (__typeof(a) ret = memcpy(ret,a,sizeof(*a)) )
+
+enum log_level{L_CRITICAL=5,L_ERROR=4, L_WARNING=3,L_NOTICE=2,L_INFO=1,L_DEBUG=0};
+extern enum log_level log_level;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int dap_common_init( const char * a_log_file );
+void dap_common_deinit();
+
+void _log_it(const char * log_tag, enum log_level, const char * format,...);
+void _vlog_it(const char * log_tag, enum log_level, const char * format, va_list ap );
+#define log_it(_log_level,...) _log_it(LOG_TAG,_log_level,##__VA_ARGS__)
+#define vlog_it(a_log_level,a_format,a_ap) _vlog_it(LOG_TAG,a_log_level,a_format,a_ap)
+
+const char * log_error();
+
+#ifdef __GNUC__
+char *itoa(int i);
+#elif _MSC_VER
+char *strndup(const char *s, size_t n);
+#endif
+int time_to_rfc822(char * out, size_t out_size_max, time_t t);
+
+int get_select_breaker();
+int send_select_break();
+char * exec_with_ret(const char * a_cmd);
+char * exec_with_ret_multistring(const char * a_cmd);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/crypt/CMakeLists.txt b/crypt/CMakeLists.txt
index 764bd9b2537ee3ee6cc2b63523db8e6a3b78c28c..32287691a0e3aebc8804286a59234f1aa91ee5a0 100644
--- a/crypt/CMakeLists.txt
+++ b/crypt/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 2.8)
 project (dapcrypt)
   
-set(CRYPT_SRCS enc.c  enc_fnam2.c enc_key.c  )
+set(CRYPT_SRCS dap_enc.c dap_enc_key.c  )
  
 include_directories("${dapcore_INCLUDE_DIRS}")
 add_definitions ("${dapcore_DEFINITIONS}")
diff --git a/crypt/dap_enc.c b/crypt/dap_enc.c
new file mode 100644
index 0000000000000000000000000000000000000000..5260cbbbb660b74128ad02f8873ecaaadb81f064
--- /dev/null
+++ b/crypt/dap_enc.c
@@ -0,0 +1,113 @@
+/*
+ Copyright (c) 2017-2018 (c) Project "DeM Labs Inc" https://github.com/demlabsinc
+  All rights reserved.
+
+ This file is part of DAP (Deus Applications Prototypes) the open source project
+
+    DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Lesser General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    DAP is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public License
+    along with any DAP based project.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+
+#include <arpa/inet.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <time.h>
+#include "dap_enc.h"
+#include "dap_enc_key.h"
+#include "dap_common.h"
+
+#define LOG_TAG "dap_enc"
+
+/**
+ * @brief enc_init
+ * @return
+ */
+int dap_enc_init()
+{
+    srand(time(NULL));
+
+    return 0;
+}
+
+
+/**
+ * @brief dap_enc_code Encode data with key
+ * @param key_private Private key
+ * @param buf  Input buffer
+ * @param buf_size Input buffer size
+ * @param buf_out Output buffer
+ * @return bytes actualy written in the output buffer
+ */
+size_t dap_enc_code(struct dap_enc_key * key,const void * buf,const size_t buf_size, void * buf_out, dap_enc_data_type_t data_type_out)
+{
+    //log_it(NOTICE,"In enc code");
+    if(key->enc){
+        void *proc_buf;
+        switch(data_type_out)
+        {
+            case ENC_DATA_TYPE_B64:{
+                proc_buf=calloc(1,buf_size*2);
+            }break;
+            case ENC_DATA_TYPE_RAW:{
+                proc_buf=buf_out;
+            }break;
+        }
+        size_t ret=key->enc(key,buf,buf_size,proc_buf);
+        if(data_type_out==ENC_DATA_TYPE_B64){
+            ret=enc_base64_encode(proc_buf,ret,buf_out);
+            free(proc_buf);
+            return ret;
+        }
+        return ret;
+    }else{
+        return 0;
+    }
+}
+
+/**
+ * @brief enc_decode Decode data with key
+ * @param key_public Public key
+ * @param buf  Input buffer
+ * @param buf_size Input buffer size
+ * @param buf_out Output buffer
+ * @param buf_out_max Maximum size of output buffer
+ * @return bytes actualy written in the output buffer
+ */
+size_t enc_decode(struct enc_key * key,const void * buf, const size_t buf_size, void * buf_out, enc_data_type_t data_type_in)
+{
+    void *proc_buf;
+    const void *proc_buf_const;
+    size_t proc_buf_size;
+    switch(data_type_in){
+        case ENC_DATA_TYPE_B64:{
+            proc_buf=calloc(1,buf_size);
+            proc_buf_size= enc_base64_decode((const char*) buf,buf_size,proc_buf);
+            proc_buf_const=proc_buf;
+        }break;
+        case ENC_DATA_TYPE_RAW:{
+            proc_buf_const=buf;
+            proc_buf_size=buf_size;
+        }break;
+    }
+
+    if(key->dec){
+        size_t ret=key->dec(key,proc_buf_const,proc_buf_size,buf_out);
+        if(data_type_in==ENC_DATA_TYPE_B64)
+            free(proc_buf);
+        return ret;
+    }else{
+        return 0;
+    }
+}
diff --git a/crypt/enc_fnam2.h b/crypt/dap_enc.h
similarity index 68%
rename from crypt/enc_fnam2.h
rename to crypt/dap_enc.h
index a287deb30a2d6623a2ea95a559b000b4abf8faea..9d83ce423f358888c33e6653e1893190dec440cc 100644
--- a/crypt/enc_fnam2.h
+++ b/crypt/dap_enc.h
@@ -18,16 +18,19 @@
     along with any DAP based project.  If not, see <http://www.gnu.org/licenses/>.
 */
 
-
-#ifndef _ENC_FNAM2_H_
-#define _ENC_FNAM2_H_
+#ifndef _DAP_ENC_H_
+#define _DAP_ENC_H_
 #include <stddef.h>
+#include <stdbool.h>
+
+#include "dap_enc_key.h"
 
-struct enc_key;
+int dap_enc_init();
 
-extern void enc_fnam2_key_new(struct enc_key * key);
+size_t dap_enc_code(struct dap_enc_key * key, const void * buf, const size_t buf_size, void * buf_out,
+                    dap_enc_data_type_t data_type_out);
+size_t dap_enc_decode(struct dap_enc_key * key, const void * buf, const size_t buf_size, void * buf_out,
+                      dap_enc_data_type_t data_type_in);
 
-extern size_t enc_fnam2_decode(struct enc_key * key, const void * in, size_t in_size,void * out);
-extern size_t enc_fnam2_encode(struct enc_key * key,const void * in, size_t in_size,void * out);
 
 #endif
diff --git a/crypt/dap_enc_aes.c b/crypt/dap_enc_aes.c
new file mode 100755
index 0000000000000000000000000000000000000000..b2ba81a5b8e0b3b73147e09b910e27442f8aacf6
--- /dev/null
+++ b/crypt/dap_enc_aes.c
@@ -0,0 +1,103 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "enc_aes.h"
+#include "enc_key.h"
+#include "sap_aes.h"
+
+typedef struct enc_aes_key{
+    KeySchedule ks;
+    byte salt[SALT_LEN*2];
+} enc_aes_key_t;
+
+#define ENC_AES_KEY(a) ((enc_aes_key_t *)((a)->internal) )
+
+/**
+ * @brief enc_aes_key_new
+ * @param key
+ */
+void enc_aes_key_new(struct enc_key * key)
+{
+    char str[64];
+    size_t i;
+    for(i=0;i<sizeof(str);i++)
+        str[i]=64+rand()%30;
+    str[sizeof(str)-1]=0;
+    enc_aes_key_create(key,str);
+}
+
+/**
+ * @brief enc_aes_key_new
+ * @param key
+ */
+void enc_aes_key_create(struct enc_key * key, const char *password_string)
+{
+    char *p1;
+    char *p2;
+    key->data= (unsigned char*) calloc(1,33);
+    key->data_size=32;
+    key->internal = calloc(1,sizeof(enc_aes_key_t) );
+    key->enc=enc_aes_encode;
+    key->dec=enc_aes_decode;
+
+    size_t p_len=strlen(password_string)/2;
+    p1= calloc(1,p_len+1);
+    p2= calloc(1,p_len+1);
+    memcpy(p1,password_string,p_len);
+    memcpy(p2,password_string+p_len,p_len);
+
+    Aes_KeyFromPassword(256,p1,key->data);
+    Aes_KeyFromPassword(256,p2,ENC_AES_KEY(key)->salt);
+    Aes_KeyExpansion( key->data , ENC_AES_KEY(key)->ks );
+    if (p1)
+    	free(p1);
+    if (p2)
+    	free(p2);
+    //Aes_GenSalt(ENC_AES_KEY(key)->salt);
+}
+
+void enc_aes_key_delete(struct enc_key *key)
+{
+    (void) key;
+}
+
+/**
+ * @brief enc_aes_public_decode
+ * @param key
+ * @param key_size
+ * @param in
+ * @param in_size
+ * @param out
+ * @return
+ */
+size_t enc_aes_decode(struct enc_key* key, const void * in, size_t in_size,void * out)
+{
+    memcpy(out,in,in_size);
+    Aes_DecryptBlks( out,in_size,ENC_AES_KEY(key)->salt,ENC_AES_KEY(key)->ks );
+    return in_size;
+
+}
+
+/**
+ * @brief enc_aes_public_encode
+ * @param key
+ * @param key_size
+ * @param in
+ * @param in_size
+ * @param out
+ * @return
+ */
+size_t enc_aes_encode(struct enc_key* key, const void * in, size_t in_size,void * out)
+{
+    size_t ret=(in_size%AES_BLOCKSIZE) ? ( in_size+ (AES_BLOCKSIZE- (in_size%AES_BLOCKSIZE) ) ): in_size ;
+    memcpy(out,in,in_size);
+    if(ret-in_size)
+        memset((unsigned char*)out+in_size,0,ret-in_size);
+    Aes_EncryptBlks(out,ret,ENC_AES_KEY(key)->salt,ENC_AES_KEY(key)->ks );
+    return ret;
+}
+
+
+
+
+
diff --git a/crypt/dap_enc_aes.h b/crypt/dap_enc_aes.h
new file mode 100755
index 0000000000000000000000000000000000000000..698bfdf7ac5de35666465255e6d7cee0788c3ae4
--- /dev/null
+++ b/crypt/dap_enc_aes.h
@@ -0,0 +1,15 @@
+#ifndef _ENC_AES_H_
+#define _ENC_AES_H_
+
+#include <stddef.h>
+
+struct enc_key;
+
+void enc_aes_key_new(struct enc_key * key);
+void enc_aes_key_create(struct enc_key * key, const char *password_string);
+void enc_aes_key_delete(struct enc_key *key);
+
+size_t enc_aes_decode(struct enc_key* key, const void * in, size_t in_size,void * out);
+size_t enc_aes_encode(struct enc_key* key, const void * in, size_t in_size,void * out);
+
+#endif
diff --git a/crypt/dap_enc_base64.c b/crypt/dap_enc_base64.c
new file mode 100755
index 0000000000000000000000000000000000000000..2b399c4b281eefe263f4ea274da56ad7275f0dca
--- /dev/null
+++ b/crypt/dap_enc_base64.c
@@ -0,0 +1,371 @@
+#include <math.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include "enc_base64.h"
+
+#define B64_TRUE	1
+#define B64_FALSE	0
+
+typedef unsigned char byte;
+
+// get the size of the result buffer required for Base-64
+// encoding/decoding.
+// sz - size of original buffer to be encoded/decoded
+// isEncoded - true (1) when encoding the original buffer;
+//				false (0) when decoding the original buffer.
+int B64_GetSize( int sz, int isEncode );
+
+// Base-64 encode the given byte array
+// outChars - buffer of length returned by GetSize(), filled upon return
+void B64_Encode( const byte* srcBytes, int srcLen, char* outChars );
+
+// Base-64 decode the given string
+// srcChars - characters to be decoded
+// outBytes - buffer of length returned by GetSize(), filled upon return
+void B64_Decode( const char* srcChars, int srcLen, byte* outBytes );
+
+// return the Base-64 encoded char for the given source byte
+char B64_EncodeByte( byte b );
+
+// return the Base-64 decoded byte for the given source char
+// <returns></returns>
+byte B64_DecodeByte( byte b );
+
+#ifndef b64_malloc
+#  define b64_malloc(ptr) malloc(ptr)
+#endif
+#ifndef b64_realloc
+#  define b64_realloc(ptr, size) realloc(ptr, size)
+#endif
+
+/**
+ * Base64 index table.
+ */
+
+static const char b64_table[] = {
+  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
+  'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
+  'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
+  'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
+  'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
+  'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
+  'w', 'x', 'y', 'z', '0', '1', '2', '3',
+  '4', '5', '6', '7', '8', '9', '+', '/'
+};
+
+/**
+ * Encode `unsigned char *' source with `size_t' size.
+ * Returns a `char *' base64 encoded string.
+ */
+
+char *
+b64_encode (const unsigned char *, size_t);
+
+/**
+ * Dencode `char *' source with `size_t' size.
+ * Returns a `unsigned char *' base64 decoded string.
+ */
+unsigned char *
+b64_decode (const char *, size_t);
+
+/**
+ * Dencode `char *' source with `size_t' size.
+ * Returns a `unsigned char *' base64 decoded string + size of decoded string.
+ */
+unsigned char *
+b64_decode_ex (const char *, size_t, size_t *);
+
+
+
+size_t enc_base64_decode(const char * in, size_t in_size,void * out)
+{
+    //B64_Decode( in, in_size, (byte*) out );
+    //return B64_GetSize( in_size,0 );
+    uint8_t * out_bytes = (uint8_t*) out;
+
+    int i = 0;
+    int j = 0;
+    int l = 0;
+    size_t l_size = 0;
+    unsigned char buf[3];
+    unsigned char tmp[4];
+
+    // alloc
+    //dec = (unsigned char *) b64_malloc(1);
+    if (NULL == out) { return 0; }
+
+    // parse until end of source
+    while (in_size--) {
+      // break if char is `=' or not base64 char
+      if ('=' == in[j]) { break; }
+      if (!(isalnum(in[j]) || '+' == in[j] || '/' == in[j])) { break; }
+
+      // read up to 4 bytes at a time into `tmp'
+      tmp[i++] = in[j++];
+
+      // if 4 bytes read then decode into `buf'
+      if (4 == i) {
+        // translate values in `tmp' from table
+        for (i = 0; i < 4; ++i) {
+          // find translation char in `b64_table'
+          for (l = 0; l < 64; ++l) {
+            if (tmp[i] == b64_table[l]) {
+              tmp[i] = l;
+              break;
+            }
+          }
+        }
+
+        // decode
+        buf[0] = (tmp[0] << 2) + ((tmp[1] & 0x30) >> 4);
+        buf[1] = ((tmp[1] & 0xf) << 4) + ((tmp[2] & 0x3c) >> 2);
+        buf[2] = ((tmp[2] & 0x3) << 6) + tmp[3];
+
+        // write decoded buffer to `dec'
+          for (i = 0; i < 3; ++i) {
+            out_bytes[l_size++] = buf[i];
+          }
+
+        // reset
+        i = 0;
+      }
+    }
+
+    // remainder
+    if (i > 0) {
+      // fill `tmp' with `\0' at most 4 times
+      for (j = i; j < 4; ++j) {
+        tmp[j] = '\0';
+      }
+
+      // translate remainder
+      for (j = 0; j < 4; ++j) {
+          // find translation char in `b64_table'
+          for (l = 0; l < 64; ++l) {
+            if (tmp[j] == b64_table[l]) {
+              tmp[j] = l;
+              break;
+            }
+          }
+      }
+
+      // decode remainder
+      buf[0] = (tmp[0] << 2) + ((tmp[1] & 0x30) >> 4);
+      buf[1] = ((tmp[1] & 0xf) << 4) + ((tmp[2] & 0x3c) >> 2);
+      buf[2] = ((tmp[2] & 0x3) << 6) + tmp[3];
+
+      // write remainer decoded buffer to `dec'
+        for (j = 0; (j < i - 1); ++j) {
+          out_bytes[l_size++] = buf[j];
+        }
+
+    }
+
+//    out[l_size] = '\0';
+
+    return l_size;
+}
+
+size_t enc_base64_encode(const void * a_in, size_t a_in_size, char * a_out)
+{
+  int i = 0;
+  int j = 0;
+  size_t size = 0;
+  unsigned char buf[4];
+  unsigned char tmp[3];
+  const unsigned char * l_in_bytes = (const unsigned char*) a_in;
+
+  if (NULL == a_out) { return 0; }
+
+  // parse until end of source
+  while (a_in_size--) {
+    // read up to 3 bytes at a time into `tmp'
+    tmp[i++] = *(  l_in_bytes++);
+
+    // if 3 bytes read then encode into `buf'
+    if (3 == i) {
+      buf[0] = (tmp[0] & 0xfc) >> 2;
+      buf[1] = ((tmp[0] & 0x03) << 4) + ((tmp[1] & 0xf0) >> 4);
+      buf[2] = ((tmp[1] & 0x0f) << 2) + ((tmp[2] & 0xc0) >> 6);
+      buf[3] = tmp[2] & 0x3f;
+
+      for (i = 0; i < 4; ++i) {
+        a_out[size++] = b64_table[buf[i]];
+      }
+
+      // reset index
+      i = 0;
+    }
+  }
+
+  // remainder
+  if (i > 0) {
+    // fill `tmp' with `\0' at most 3 times
+    for (j = i; j < 3; ++j) {
+      tmp[j] = '\0';
+    }
+
+    // perform same codec as above
+    buf[0] = (tmp[0] & 0xfc) >> 2;
+    buf[1] = ((tmp[0] & 0x03) << 4) + ((tmp[1] & 0xf0) >> 4);
+    buf[2] = ((tmp[1] & 0x0f) << 2) + ((tmp[2] & 0xc0) >> 6);
+    buf[3] = tmp[2] & 0x3f;
+
+    // perform same write to `enc` with new allocation
+    for (j = 0; (j < i + 1); ++j) {
+      a_out[size++] = b64_table[buf[j]];
+    }
+
+    // while there is still a remainder
+    // append `=' to `enc'
+    while ((i++ < 3)) {
+      a_out[size++] = '=';
+    }
+  }
+
+  // Make sure we have enough space to add '\0' character at end.
+  a_out[size] = '\0';
+  return size;
+}
+
+
+// get the size of the result buffer required for Base-64
+// encoding/decoding.
+// sz - size of original buffer to be encoded/decoded
+// isEncoded - true (1) when encoding the original buffer;
+//				false (0) when decoding the original buffer.
+int B64_GetSize( int sz, int isEncode )
+{
+    int n = 0;
+
+    if( isEncode ) {
+        n = ceil ( ((double) sz) / 3.0 ) * 4.0;
+        switch( sz % 3 ) {
+        case 0: break;
+        case 1: n += 2; break;
+        case 2: n += 3; break;
+        }
+    }
+    else {
+        n = ceil ( ((double) sz) / 4.0 ) * 3.0;
+        switch( sz % 4 ) {
+        case 0: break;
+        case 1: break;
+        case 2: n += 1; break;
+        case 3: n += 2; break;
+        }
+    }
+    return n;
+}
+
+
+// Base-64 encode the given byte array
+// outChars - buffer of length returned by GetSize(), filled upon return
+void B64_Encode( const byte* srcBytes, int srcLen, char* outChars )
+{
+    byte b1, b2, b3;
+    byte* destBytes = (byte*)outChars;
+
+    // walk through the source, taking 3 bytes at a time
+    int srcNdx = 0;
+    int destNdx = 0;
+    int remaining = srcLen;
+    for( ; remaining > 2; remaining -= 3 ) {
+        b1 = srcBytes[ srcNdx++ ];
+        b2 = srcBytes[ srcNdx++ ];
+        b3 = srcBytes[ srcNdx++ ];
+        destBytes[destNdx++] = B64_EncodeByte( (byte)( b1 >> 2 ) );
+        destBytes[destNdx++] = B64_EncodeByte( (byte)( ( b1 << 4 ) | ( b2 >> 4 ) ) );
+        destBytes[destNdx++] = B64_EncodeByte( (byte)( ( b2 << 2 ) | ( b3 >> 6 ) ) );
+        destBytes[destNdx++] = B64_EncodeByte( (byte)b3 );
+    }
+
+    // process the remaining bytes
+    b2 = 0;
+    if( remaining > 0 ) {
+        b1 = srcBytes[srcNdx++];
+        if( remaining == 2 )
+            b2 = srcBytes[srcNdx++];
+
+        destBytes[destNdx++] = B64_EncodeByte( (byte)( b1 >> 2 ) );
+        destBytes[destNdx++] = B64_EncodeByte( (byte)( ( b1 << 4 ) | ( b2 >> 4 ) ) );
+        if( remaining == 2 )
+            destBytes[destNdx++] = B64_EncodeByte( (byte)( b2 << 2 ) );
+    }
+}
+
+
+// Base-64 decode the given string
+// srcChars - characters to be decoded
+// outBytes - buffer of length returned by GetSize(), filled upon return
+void B64_Decode( const char* srcChars, int srcLen, byte* outBytes )
+{
+    byte b1, b2, b3, b4;
+    const byte* srcBytes = (byte*)srcChars;
+    byte* destBytes = outBytes;
+
+    // walk through the source, taking 4 bytes at a time
+    int srcNdx = 0;
+    int destNdx = 0;
+    int remaining = srcLen;
+    for( ; remaining > 3; remaining -= 4 ) {
+        b1 = B64_DecodeByte( srcBytes[srcNdx++] );
+        b2 = B64_DecodeByte( srcBytes[srcNdx++] );
+        b3 = B64_DecodeByte( srcBytes[srcNdx++] );
+        b4 = B64_DecodeByte( srcBytes[srcNdx++] );
+
+        destBytes[destNdx++] = (byte)( ( b1 << 2 ) | ( b2 >> 4 ) );
+        destBytes[destNdx++] = (byte)( ( b2 << 4 ) | ( b3 >> 2 ) );
+        destBytes[destNdx++] = (byte)( ( b3 << 6 ) | b4 );
+    }
+
+    // process the remaining bytes
+    b2 = b3 = 0;
+    if( remaining > 0 ) {
+        b1 = B64_DecodeByte( srcBytes[srcNdx++] );
+        if( remaining > 1 )
+            b2 = B64_DecodeByte( srcBytes[srcNdx++] );
+        if( remaining == 3 )
+            b3 = B64_DecodeByte( srcBytes[srcNdx++] );
+
+        destBytes[destNdx++] = (byte)( ( b1 << 2 ) | ( b2 >> 4 ) );
+        if( remaining == 3 )
+            destBytes[destNdx++] = (byte)( ( b2 << 4 ) | ( b3 >> 2 ) );
+    }
+}
+
+
+// return the Base-64 encoded char for the given source byte
+char B64_EncodeByte( byte b )
+{
+    b &= 0x3f;
+    if( b <= 25 )
+        return (byte)( b +'A' );
+    if( b <= 51 )
+        return (byte)( b - 26 + 'a' );
+    if( b <= 61 )
+        return (byte)( b - 52 + '0' );
+    if( b == 62 )
+        return (byte)'-';
+    //if( b == 63 )
+    return (byte)'_';
+}
+
+
+// return the Base-64 decoded byte for the given source char
+// <returns></returns>
+byte B64_DecodeByte( byte b )
+{
+    if (( b == '+' ) || (b =='-') )
+        return 62;
+    if( (b == '/' ) || (b == '_') )
+        return 63;
+    if( b <= '9' )
+        return (byte)( b - '0' + 52 );
+    if( b <= 'Z' )
+        return (byte)( b - 'A' );
+    return (byte)( b - 'a' + 26 );
+}
+
diff --git a/crypt/dap_enc_base64.h b/crypt/dap_enc_base64.h
new file mode 100755
index 0000000000000000000000000000000000000000..bd3658e97413b4af438702d14679816abc51f433
--- /dev/null
+++ b/crypt/dap_enc_base64.h
@@ -0,0 +1,16 @@
+#ifndef _ENC_BASE64_H_
+#define _ENC_BASE64_H_
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+size_t enc_base64_decode(const char * in, size_t in_size,void * out);
+size_t enc_base64_encode(const void * in, size_t in_size,char * out);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/crypt/enc_key.c b/crypt/dap_enc_key.c
similarity index 100%
rename from crypt/enc_key.c
rename to crypt/dap_enc_key.c
diff --git a/crypt/dap_enc_key.h b/crypt/dap_enc_key.h
new file mode 100644
index 0000000000000000000000000000000000000000..768b2c9e5aaca89167a942d506d87e2ddea38d19
--- /dev/null
+++ b/crypt/dap_enc_key.h
@@ -0,0 +1,105 @@
+/*
+ Copyright (c) 2017-2018 (c) Project "DeM Labs Inc" https://github.com/demlabsinc
+  All rights reserved.
+
+ This file is part of DAP (Deus Applications Prototypes) the open source project
+
+    DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Lesser General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    DAP is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public License
+    along with any DAP based project.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _DAP_ENC_KEY_H_
+#define _DAP_ENC_KEY_H_
+
+#include <stddef.h>
+
+typedef enum dap_enc_data_type{DAP_ENC_DATA_TYPE_RAW,
+                               DAP_ENC_DATA_TYPE_B64,
+                               } dap_enc_data_type_t;
+
+typedef enum dap_enc_key_type{ DAP_ENC_KEY_TYPE_AES, // Symmetric AES
+
+                           DAP_ENC_KEY_rlwe_bcns15, // key exchange from the ring learning with errors problem
+                                                // (Bos, Costello, Naehrig, Stebila,
+                                                // IEEE Symposium on Security & Privacy 2015,
+                                                // https://eprint.iacr.org/2014/599)
+
+                           DAP_ENC_KEY_rlwe_newhope, // "NewHope": key exchange from the ring learning with errors problem
+                                                //  (Alkim, Ducas, Pöppelmann, Schwabe, USENIX Security 2016 )
+                                                //  Using the reference C implementation of NewHope
+                                                // from https://github.com/tpoeppelmann/newhop
+                                                // https://eprint.iacr.org/2015/1092
+
+                           DAP_ENC_KEY_rlwe_msrln16, // Microsoft Research implementation of Peikert's ring-LWE key exchange
+                                               // (Longa, Naehrig, CANS 2016, https://eprint.iacr.org/2016/504)
+                                               // based on the implementation of Alkim, Ducas, Pöppelmann, and Schwabe,
+                                               // with improvements from Longa and Naehrig,
+                                               //  https://www.microsoft.com/en-us/research/project/lattice-cryptography-library/
+
+                           DAP_ENC_KEY_lwe_frodo,  // "Frodo": key exchange from the learning with errors problem
+                                               // Bos, Costello, Ducas, Mironov, Naehrig, Nikolaenko, Raghunathan, Stebila
+                                               // ACM Conference on Computer and Communications Security 2016
+                                               // https://eprint.iacr.org/2016/659
+
+                           DAP_ENC_KEY_sidh_cln16, // Key exchange from the supersingular isogeny Diffie-Hellman problem
+                                               // (Costello, Naehrig, Longa, CRYPTO 2016, https://eprint.iacr.org/2016/413)
+                                               // using the implementation of Microsoft Research
+                                               // https://www.microsoft.com/en-us/research/project/sidh-library/
+
+                           DAP_ENC_KEY_sidh_iqc_ref, // key exchange from the supersingular isogeny Diffie-Hellman problem
+                                                 // (De Feo, Jao, Plût, J. Math. Cryptol. 8(3):209, 2014
+                                                 // https://eprint.iacr.org/2011/506
+                                                 //
+                           DAP_ENC_KEY_code_mcbits, // "McBits": key exchange from the error correcting codes,
+                                                // specifically Niederreiter's form of McEliece public key encryption
+                                                //  using hidden Goppa codes (Bernstein, Chou, Schwabe, CHES 2013, https://eprint.iacr.org/2015/610)
+                                                // using the implementation of McBits from https://www.win.tue.nl/~tchou/mcbits/
+
+                           DAP_ENC_KEY_ntru,       // NTRU: key transport using NTRU public key encryption
+                                               // (Hoffstein, Pipher, Silverman, ANTS 1998) with the EES743EP1 parameter set
+                                               //  wrapper around the implementation from the NTRU Open Source project
+                                               // https://github.com/NTRUOpenSourceProject/NTRUEncrypt)
+
+                           DAP_ENC_KEY_mlwe_kyber, // Kyber: a CCA-secure module-lattice-based key exchange mechanism
+                                               // (Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Shanck, Stehlé)
+                                               // Real World Crypto 2017, https://eprint.iacr.org/2017/634)
+                                               // using the reference C implementation of Kyber from pq-crystals/kyber
+                           DAP_ENC_KEY_sig_picnic, // signature based on zero-knowledge proof as specified in
+                                               // Post-Quantum Zero-Knowledge and Signatures from Symmetric-Key Primitives
+                                               // (Melissa Chase and David Derler and Steven Goldfeder and Claudio Orlandi
+                                               // and Sebastian Ramacher and Christian Rechberger and Daniel Slamanig and Greg Zaverucha
+                                               // https://eprint.iacr.org/2017/279.pdf), using the optimized implemenation
+                                               //  from https://github.com/IAIK/Picnic
+                         } enc_key_type_t;
+
+struct enc_key;
+typedef size_t (*enc_callback_t)(struct enc_key *, const void * , const size_t ,void *);
+
+typedef struct enc_key{
+    unsigned char * data;
+    size_t data_size;
+    enc_key_type_t type;
+
+    enc_callback_t enc;
+    enc_callback_t dec;
+
+    void * internal;
+} enc_key_t;
+
+extern enc_key_t *enc_key_new(size_t key_size,enc_key_type_t key_type);
+extern enc_key_t *enc_key_generate(enc_data_type_t v_type, rsa_key_t* key_session_pair);
+extern enc_key_t *enc_key_create(const char * key_input,enc_key_type_t v_type);
+extern void enc_key_delete(enc_key_t * key);
+extern rsa_key_t* enc_key_session_pair_create(const char* client_pub_key, u_int16_t key_len);
+
+#endif
diff --git a/crypt/enc.c b/crypt/enc.c
deleted file mode 100644
index d704dc7da7468a403a05b1bbf26270e67db9e3a6..0000000000000000000000000000000000000000
--- a/crypt/enc.c
+++ /dev/null
@@ -1,562 +0,0 @@
-/*
- Copyright (c) 2017-2018 (c) Project "DeM Labs Inc" https://github.com/demlabsinc
-  All rights reserved.
-
- This file is part of DAP (Deus Applications Prototypes) the open source project
-
-    DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Lesser General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    DAP is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public License
-    along with any DAP based project.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <time.h>
-#include "enc.h"
-#include "enc_key.h"
-#include "common.h"
-#include <openssl/aes.h>
-#include <openssl/evp.h>
-#include <openssl/pem.h>
-#include <openssl/err.h>
-#include <openssl/rand.h>
-
-#define LOG_TAG "enc"
-
-#include <arpa/inet.h>
-
-////////////////////////////////////////// BASE64 PART
-static size_t b64_get_encodet_size(size_t in_size);
-static size_t b64_get_decodet_size(size_t in_size);
-static unsigned char b64_byte_decode(unsigned char b);
-static unsigned char b64_byte_encode(unsigned char b);
-
-static void Base64Decode(const char* in, size_t srcLen, unsigned char* out);
-static void Base64Encode(const unsigned char* in, size_t srcLen, char* out);
-
-static size_t b64_get_encodet_size(size_t in_size)
-{
-    return (in_size/3)*4 + ((in_size%3==1) ?2: (in_size%3==2) ? 3:0);
-}
-
-static size_t b64_get_decodet_size(size_t in_size)
-{
-    return (in_size/4)*3 + ((in_size%4==2) ?1: (in_size%5==3) ? 2:0);
-}
-
-static unsigned char b64_byte_decode(unsigned char b)
-{
-    if (( b == '+' ) || (b =='-') )
-        return 62;
-    if( (b == '/' ) || (b == '_') )
-        return 63;
-    if( b <= '9' )
-        return (b - '0' + 52);
-    if(b <= 'Z')
-        return (b - 'A');
-    return (b - 'a' + 26);
-}
-
-static unsigned char b64_byte_encode(unsigned char b)
-{
-    b &= 0x3f;
-    if(b <= 25)
-        return (b +'A');
-    if(b <= 51)
-        return (b - 26 + 'a');
-    if(b <= 61)
-        return (b - 52 + '0');
-    if(b == 62)
-        return '-';
-    return '_';
-}
-
-static void Base64Decode(const char* source, size_t srcLen, unsigned char* out)
-{
-    unsigned char b1, b2, b3, b4;
-    const unsigned char* srcBytes = (unsigned char*)source;
-    unsigned char* dest = out;
-
-    size_t dec_length = b64_get_decodet_size(srcLen);
-    unsigned char *buffer = (unsigned char*)malloc(dec_length + 1);
-    buffer[dec_length] = '\0';
-
-    // walk through the source, taking 4 bytes at a time
-    size_t source_index = 0;
-    size_t dest_index = 0;
-    size_t remaining = srcLen;
-    for( ; remaining > 3; remaining -= 4 ) {
-        b1 = b64_byte_decode(srcBytes[source_index++]);
-        b2 = b64_byte_decode(srcBytes[source_index++]);
-        b3 = b64_byte_decode(srcBytes[source_index++]);
-        b4 = b64_byte_decode(srcBytes[source_index++]);
-
-        dest[dest_index++] = (unsigned char)( ( b1 << 2 ) | ( b2 >> 4 ) );
-        dest[dest_index++] = (unsigned char)( ( b2 << 4 ) | ( b3 >> 2 ) );
-        dest[dest_index++] = (unsigned char)( ( b3 << 6 ) | b4 );
-    }
-
-    // process the remaining bytes
-    b2 = b3 = 0;
-    if( remaining > 0 ) {
-        b1 = b64_byte_decode( srcBytes[source_index++] );
-        if( remaining > 1 )
-            b2 = b64_byte_decode( srcBytes[source_index++] );
-        if( remaining == 3 )
-            b3 = b64_byte_decode( srcBytes[source_index++] );
-
-        dest[dest_index++] = (unsigned char)( ( b1 << 2 ) | ( b2 >> 4 ) );
-        if( remaining == 3 )
-            dest[dest_index++] = (unsigned char)( ( b2 << 4 ) | ( b3 >> 2 ) );
-    }
-}
-
-static void Base64Encode(const unsigned char* source, size_t srcLen, char* out)
-{
-    unsigned char b1, b2, b3;
-    unsigned char* dest = (unsigned char*)out;
-
-    // walk through the source, taking 3 bytes at a time
-    size_t source_index = 0;
-    size_t dest_index = 0;
-    size_t remaining = srcLen;
-    for( ; remaining > 2; remaining -= 3 ) {
-        b1 = source[ source_index++ ];
-        b2 = source[ source_index++ ];
-        b3 = source[ source_index++ ];
-        dest[dest_index++] = b64_byte_encode( (unsigned char)( b1 >> 2 ) );
-        dest[dest_index++] = b64_byte_encode( (unsigned char)( ( b1 << 4 ) | ( b2 >> 4 ) ) );
-        dest[dest_index++] = b64_byte_encode( (unsigned char)( ( b2 << 2 ) | ( b3 >> 6 ) ) );
-        dest[dest_index++] = b64_byte_encode( (unsigned char)b3 );
-    }
-
-    // process the remaining bytes
-    b2 = 0;
-    if( remaining > 0 ) {
-        b1 = source[source_index++];
-        if( remaining == 2 )
-            b2 = source[source_index++];
-
-        dest[dest_index++] = b64_byte_encode( (unsigned char)( b1 >> 2 ) );
-        dest[dest_index++] = b64_byte_encode( (unsigned char)( ( b1 << 4 ) | ( b2 >> 4 ) ) );
-        if( remaining == 2 )
-            dest[dest_index++] = b64_byte_encode( (unsigned char)( b2 << 2 ) );
-    }
-}
-
-size_t enc_base64_encode(const void * in, size_t in_size, char * out)
-{
-    size_t ret= b64_get_encodet_size(in_size);
-    Base64Encode((const unsigned char*) in, in_size, out);
-    out[ret]='\0';
-    return ret;
-}
-
-size_t enc_base64_decode(const char *in, size_t in_size, void *out)
-{
-    Base64Decode(in, in_size, (unsigned char*) out);
-    return b64_get_decodet_size(in_size);
-}
-////////////////////////////////////////////////////// end of BASE64 PART
-
-////////////////////////////////////////// AES PART
-#include <openssl/evp.h>
-#include <openssl/err.h>
-#include <openssl/rand.h>
-#include "enc_key.h"
-typedef unsigned char KeySchedule[4*(14+1)][4];
-
-static int _crypto_inited = 0;
-
-typedef struct enc_aes_key{
-    KeySchedule ks;
-    unsigned char salt[AES_BLOCK_SIZE*2];
-} enc_aes_key_t;
-
-#define ENC_AES_KEY(a) ((enc_aes_key_t *)((a)->internal) )
-
-/**
- * @brief enc_aes_key_new
- * @param key
- */
-void enc_aes_key_new(struct enc_key * key)
-{
-    char str[64];
-    int i;
-    for(i=0;i<sizeof(str);i++)
-        str[i]=64+rand()%30;
-    str[sizeof(str)-1]=0;
-    enc_aes_key_create(key,str);
-}
-
-/**
- * @brief enc_aes_key_new
- * @param key
- */
-void enc_aes_key_create(struct enc_key * key, const char *str_key)
-{
-    key->data_size = strlen(str_key);
-    key->data= (unsigned char*) malloc(key->data_size);
-    memcpy(key->data, str_key, key->data_size);
-    key->internal = calloc(1,sizeof(enc_aes_key_t) );
-    key->enc=enc_aes_encode;
-    key->dec=enc_aes_decode;
-
-}
-
-void enc_aes_key_delete(struct enc_key *key)
-{
-    (void) key;
-}
-
-
-size_t enc_aes_decode(struct enc_key* key, const void * in, size_t in_size,void * out)
-{
-    unsigned char *iv_dec = (unsigned char*)malloc(sizeof(unsigned char) *AES_BLOCK_SIZE);
-    memset(iv_dec, 0, sizeof(unsigned char) *AES_BLOCK_SIZE);
-
-    AES_KEY dec_key;
-    AES_set_decrypt_key(key->data, 256, &dec_key);
-    AES_cbc_encrypt(in, out, in_size,
-                    &dec_key,iv_dec, AES_DECRYPT);
-
-    free(iv_dec);
-
-    return in_size;
-
-}
-
-size_t enc_aes_encode(struct enc_key* key, const void * in, size_t in_size,void * out)
-{
-    size_t ret = (in_size % AES_BLOCK_SIZE) ? ( in_size+ (AES_BLOCK_SIZE- (in_size%AES_BLOCK_SIZE) ) ) : in_size ;
-
-    unsigned char *iv_enc = (unsigned char*) malloc( sizeof(unsigned char) *AES_BLOCK_SIZE);
-    memset(iv_enc, 0, sizeof(unsigned char) *AES_BLOCK_SIZE);
-
-    AES_KEY enc_key;
-    AES_set_encrypt_key(key->data, 256, &enc_key);
-    AES_cbc_encrypt(in, out, in_size, &enc_key,
-                    iv_enc, AES_ENCRYPT);
-
-    free(iv_enc);
-    return ret;
-}
-
-////////////////////////////////////////// end of AES PART
-
-/**
- * @brief enc_init
- * @return
- */
-int enc_init()
-{
-    if (_crypto_inited)
-        return 0;
-    _crypto_inited = 1;
-
-    srand(time(NULL));
-
-    ERR_load_crypto_strings();
-    OpenSSL_add_all_algorithms();
-
-    return 0;
-}
-
-
-/**
- * @brief enc_code Encode data with key
- * @param key_private Private key
- * @param buf  Input buffer
- * @param buf_size Input buffer size
- * @param buf_out Output buffer
- * @return bytes actualy written in the output buffer
- */
-size_t enc_code(struct enc_key * key,const void * buf,const size_t buf_size, void * buf_out, enc_data_type_t data_type_out)
-{
-    //log_it(NOTICE,"In enc code");
-    if(key->enc){
-        void *proc_buf;
-        switch(data_type_out)
-        {
-            case ENC_DATA_TYPE_B64:{
-                proc_buf=calloc(1,buf_size*2);
-            }break;
-            case ENC_DATA_TYPE_RAW:{
-                proc_buf=buf_out;
-            }break;
-        }
-        size_t ret=key->enc(key,buf,buf_size,proc_buf);
-        if(data_type_out==ENC_DATA_TYPE_B64){
-            ret=enc_base64_encode(proc_buf,ret,buf_out);
-            free(proc_buf);
-            return ret;
-        }
-        return ret;
-    }else{
-        return 0;
-    }
-}
-
-/**
- * @brief enc_decode Decode data with key
- * @param key_public Public key
- * @param buf  Input buffer
- * @param buf_size Input buffer size
- * @param buf_out Output buffer
- * @param buf_out_max Maximum size of output buffer
- * @return bytes actualy written in the output buffer
- */
-size_t enc_decode(struct enc_key * key,const void * buf, const size_t buf_size, void * buf_out, enc_data_type_t data_type_in)
-{
-    void *proc_buf;
-    const void *proc_buf_const;
-    size_t proc_buf_size;
-    switch(data_type_in){
-        case ENC_DATA_TYPE_B64:{
-            proc_buf=calloc(1,buf_size);
-            proc_buf_size= enc_base64_decode((const char*) buf,buf_size,proc_buf);
-            proc_buf_const=proc_buf;
-        }break;
-        case ENC_DATA_TYPE_RAW:{
-            proc_buf_const=buf;
-            proc_buf_size=buf_size;
-        }break;
-    }
-
-    if(key->dec){
-        size_t ret=key->dec(key,proc_buf_const,proc_buf_size,buf_out);
-        if(data_type_in==ENC_DATA_TYPE_B64)
-            free(proc_buf);
-        return ret;
-    }else{
-        return 0;
-    }
-}
-
-/**
- * @brief read_key_from_bio
- * @param bio
- * @return
- */
-char* read_key_from_bio(BIO * bio)
-{
-    size_t length = BIO_pending(bio);
-    char *buff = (char*)malloc((length + 1)*sizeof(char));
-    BIO_read(bio, buff, length);
-    buff[length] = '\0';
-    return buff;
-}
-
-
-/**
- * @brief bioToString
- * @param bio
- * @param string
- * @details make string from bio
- * @return
- */
-
-int bioToString(BIO *bio, unsigned char **string)
-{
-
-    if( bio == NULL)
-    {
-        log_it(ERROR,"bioToString() BIO == NULL!");
-        return -1;
-    }
-
-    size_t bioLength = BIO_pending(bio);
-
-    *string = (unsigned char*)malloc(bioLength + 1);
-
-    if(string == NULL)
-    {
-        log_it(ERROR,"bioToString failed.\n");
-        return -1;
-    }
-
-    BIO_read(bio, *string, bioLength);
-
-    (*string)[bioLength] = '\0';
-
-    BIO_free_all(bio);
-
-    return (int)bioLength;
-}
-
-/**
- * @brief enc_rsa_decode
- * @param key
- * @param in
- * @param in_size
- * @param out
- * @details decode by server local rsa key
- * @return
- */
-size_t enc_rsa_decode(struct enc_key* key, const void * in, size_t in_size,void * out)
-{
-    size_t decrypt_len;
-
-    if(in == NULL)
-    {
-         log_it(ERROR,"enc_rsa_decode failed (empty message for decode)");
-         return 0;
-    }
-
-    if(key == NULL)
-    {
-         log_it(ERROR,"enc_rsa_decode failed (empty key for decode)");
-         return 0;
-    }
-
-    if((decrypt_len = RSA_private_decrypt(in_size, (unsigned char*)in, (unsigned char*)out,
-                                             ((rsa_key_t*)key->internal)->server_key, RSA_PKCS1_PADDING)) == -1)
-    {
-            log_it(ERROR,"enc_rsa_decode failed (incorrect decode)");
-            return 0;
-    }
-
-    memset(out + decrypt_len, 0, 1);
-
-    //log_it(INFO, "Decode out = %s",out);
-
-    return decrypt_len;
-}
-
-
-/**
- * @brief enc_rsa_encode
- * @param key
- * @param in
- * @param in_size
- * @param out
- * @details encode by RSA Public key Client
- * @return
- */
-size_t enc_rsa_encode(struct enc_key* key, void * in, size_t in_size,void * out)
-{
-    size_t encrypt_len = 0;
-
-    if(in == NULL || key == NULL)
-    {
-         log_it(ERROR,"enc_rsa_encode failed");
-    }
-
-    if((encrypt_len = RSA_public_encrypt(in_size, (unsigned char*)in, (unsigned char*)out,
-                                             (RSA*)((rsa_key_t*)key->internal)->client_public_key, RSA_PKCS1_PADDING)) == -1)
-    {
-            log_it(ERROR,"enc_rsa_encode Error Encrypt");
-            return 0;
-    }
-
-    //log_it(INFO,"Encrypt Len = %d",encrypt_len);
-
-    return encrypt_len;
-}
-
-
-/**
- * @brief getRsaKeyFromString
- * @param str_key
- * @param strLen
- * @return
- */
-void setRsaPubKeyFromString(char *str_key, size_t strLen, struct enc_key * key)
-{
-    if(str_key == NULL)
-    {
-        log_it(ERROR,"getRsaKeyFromString failed");
-        return;
-    }
-
-    BIO *bio = BIO_new(BIO_s_mem());
-    BIO_write(bio, str_key,strLen);
-
-    PEM_read_bio_RSAPublicKey( bio, (void*)&key->internal, NULL, NULL);
-
-    BIO_free_all(bio);
-
-    key->enc = (void*) enc_rsa_encode;
-    key->dec = (void*) enc_rsa_decode;
-
-    if ( key == NULL)
-    {
-        log_it(ERROR,"getRsaKeyFromString failed");
-        return;
-    }
-
-}
-
-
-/**
- * @brief getStringPrivateKeyFromRsa
- * @param key
- * @param out
- * @details get string public key from RSA* key ( Allocated memory for ptr )
- * @return
- */
-size_t getStringPrivateKeyFromRsa(RSA *key, char **out)
-{
-    BIO *bio = BIO_new(BIO_s_mem());
-
-    if(key == NULL)
-    {
-        log_it(ERROR,"getStringPubKeyFromRsa failed");
-        return 0;
-    }
-
-    PEM_write_bio_RSAPrivateKey(bio,key,NULL,NULL,0,NULL,NULL);
-
-    size_t key_len = BIO_pending(bio);
-    *out = malloc(key_len + 1);
-
-    BIO_read(bio, *out, key_len);
-
-    BIO_free_all(bio);
-
-    return key_len;
-}
-
-
-/**
- * @brief getStringPubKeyFromRsa
- * @param key
- * @param out
- * @details get string public key from RSA* key ( Allocated memory for ptr )
- * @return
- */
-size_t getStringPubKeyFromRsa(RSA *key, char **out)
-{
-    BIO *bio = BIO_new(BIO_s_mem());
-
-    if(key == NULL)
-    {
-        log_it(ERROR,"getStringPubKeyFromRsa failed");
-        return 0;
-    }
-
-    PEM_write_bio_RSAPublicKey(bio, key);
-
-    size_t key_len = BIO_pending(bio);
-    *out = malloc(key_len + 1);
-
-    BIO_read(bio, *out, key_len);
-    //out[key_len] = '\0';
-
-    BIO_free_all(bio);
-
-    return key_len;
-}
-
-
diff --git a/crypt/enc.h b/crypt/enc.h
deleted file mode 100644
index c751c93360d993f95690e1c77a2a9f41eab4302d..0000000000000000000000000000000000000000
--- a/crypt/enc.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- Copyright (c) 2017-2018 (c) Project "DeM Labs Inc" https://github.com/demlabsinc
-  All rights reserved.
-
- This file is part of DAP (Deus Applications Prototypes) the open source project
-
-    DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Lesser General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    DAP is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public License
-    along with any DAP based project.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _ENC_H_
-#define _ENC_H_
-#include <stddef.h>
-#include <openssl/aes.h>
-#include <openssl/evp.h>
-#include <openssl/pem.h>
-#include <openssl/err.h>
-#include <openssl/rand.h>
-
-#define SA_ENC_TYPE_1 0x01
-#define RSA_KEY_LENGTH 4096
-#define PUB_EXP     3
-
-struct enc_key;
-
-typedef enum enc_data_type{ENC_DATA_TYPE_RAW, ENC_DATA_TYPE_B64, ENC_KEY_TYPE_RSA} enc_data_type_t;
-
-typedef struct rsa_session_key {
-    RSA* server_key;
-    RSA* client_public_key;
-    time_t last_time_use_key;
-} rsa_key_t;
-
-extern int enc_init();
-
-/// BASE64
-extern size_t enc_base64_decode(const char * in, size_t in_size,void * out);
-extern size_t enc_base64_encode(const void * in, size_t in_size,char * out);
-///
-
-/// AES
-#include "common.h"
-struct enc_key;
-
-extern size_t enc_rsa_decode(struct enc_key* key, const void * in, size_t in_size,void * out);
-extern size_t enc_rsa_encode(struct enc_key* key, void * in, size_t in_size,void * out);
-
-extern void setRsaPubKeyFromString(char *str_key, size_t strLen, struct enc_key * key);
-extern size_t getStringPubKeyFromRsa(RSA *key, char **out);
-extern size_t getStringPrivateKeyFromRsa(RSA *key, char **out);
-
-
-extern void enc_aes_key_new(struct enc_key * key);
-extern void enc_aes_key_create(struct enc_key * key, const char *password_string);
-extern void enc_aes_key_delete(struct enc_key *key);
-extern size_t enc_aes_decode(struct enc_key* key, const void * in, size_t in_size,void * out);
-extern size_t enc_aes_encode(struct enc_key* key, const void * in, size_t in_size,void * out);
-
-
-size_t enc_code(struct enc_key * key, const void * buf, const size_t buf_size, void * buf_out, enc_data_type_t data_type_out);
-size_t enc_decode(struct enc_key * key, const void * buf, const size_t buf_size, void * buf_out, enc_data_type_t data_type_in);
-
-
-#endif
diff --git a/crypt/enc_fnam2.c b/crypt/enc_fnam2.c
deleted file mode 100644
index 1ce583102d7b2ba6733e80c92b5cdbd04a51d9d3..0000000000000000000000000000000000000000
--- a/crypt/enc_fnam2.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- Copyright (c) 2017-2018 (c) Project "DeM Labs Inc" https://github.com/demlabsinc
-  All rights reserved.
-
- This file is part of DAP (Deus Applications Prototypes) the open source project
-
-    DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Lesser General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    DAP is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public License
-    along with any DAP based project.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include "enc_key.h"
-#include "enc_fnam2.h"
-
-void fnam2_crypt(int *key, int key_size,  unsigned long *num_block,  unsigned long b1,unsigned long b2,unsigned long b3,unsigned long b4, void * out );
-void fnam2_decrypt(int *key, int key_size, unsigned long *num_block,  unsigned long b1,unsigned long b2,unsigned long b3,unsigned long b4, void * out );
-
-/**
- * @brief enc_fnam2_key_new
- * @param key
- */
-void enc_fnam2_key_new(struct enc_key * key)
-{
-    size_t i;
-    for(i=0;i<key->data_size;i++)
-        key->data[i] = rand()%255;
-}
-
-
-/**
- * @brief enc_fnam2_decode
- * @param key
- * @param key_size
- * @param in
- * @param in_size
- * @param out
- * @return
- */
-size_t enc_fnam2_decode(struct enc_key * key, const void * in, size_t in_size,void * out)
-{
-    unsigned long num_block=0;
-    int key_pos=0;
-    const size_t block_size=16;
-    const unsigned char * in_ul=(const unsigned char*) in;
-
-    size_t pos;
-
-    for (pos=0;pos<= in_size-block_size; pos+=block_size){
-        fnam2_decrypt( (int *) (key->data+key_pos), block_size,&num_block, *((int*)(in_ul+pos)) ,
-                     *((int*)(in_ul+pos+4)), *((int*)(in_ul+pos+8)),*((int*)(in_ul+pos+12)),out+pos);
-        /*key_pos+=block_size;
-        if(key_pos+block_size>=key->data_size)
-            key_pos=0;*/
-    }
-
-    return pos;
-}
-
-/**
- * @brief enc_fnam2_encode
- * @param key
- * @param key_size
- * @param in
- * @param in_size
- * @param out
- * @return
- */
-size_t enc_fnam2_encode(struct enc_key * key,const void * in, size_t in_size,void * out)
-{
-    unsigned long num_block=0;
-    int key_pos=0;
-    const size_t block_size=16;
-    const unsigned char * in_ul=(const unsigned char*) in;
-
-    size_t pos;
-
-
-    for (pos=0;pos<= in_size-block_size; pos+=block_size){
-        fnam2_crypt( (int *) (key->data+key_pos), block_size,&num_block, *((int*)(in_ul+pos)) ,
-                     *((int*)(in_ul+pos+4)), *((int*)(in_ul+pos+8)),*((int*)(in_ul+pos+12)),out+pos);
-       /* key_pos+=block_size;
-        if(key_pos+block_size>=key->data_size)
-            key_pos=0;*/
-    }
-
-    if(pos<in_size){
-        char * buf = (char*) calloc(1,block_size);
-        memcpy(buf,in_ul+pos, in_size-pos);
-        fnam2_crypt(( int *)(key->data+key_pos), block_size,&num_block, *((int*)(buf)) ,
-                     *((int*)(buf+4)), *((int*)(buf+8)),*((int*)(buf+12)),out+pos);
-        pos+=block_size;
-    }
-    return pos;
-}
-
-void fnam2_crypt(int *key, int key_size,  unsigned long *num_block,  unsigned long b1,unsigned long b2,unsigned long b3,unsigned long b4, void * out )
-{
-    int subkey,i,ip,im;
-    unsigned long Num=*num_block;
-    int r;
-
-    for(r=0;r<key_size*4;r++) {
-        //Selecting the part of key for a concrete stage
-        i=r%key_size;
-        if(i==key_size) {ip=1;im=key_size-1;}
-        if(i==1) {ip=2;im=key_size;}
-        else {ip=i+1;im=i-1;}
-
-        //Generating the subkey on the basis of nmber part of a key,
-        //number of the block in a file and number of a round
-        subkey=key[i]*r+(key[im]*Num+key[ip]);
-
-        //F - function
-        b1+=(((b2>>16)^((b2<<25)+subkey))+(subkey*(~(b2<<7))));
-        b1=~b1;
-        r++;
-
-        i=r%key_size;
-        if(i==key_size) {ip=1;im=key_size-1;}
-        if(i==1) {ip=2;im=key_size;}
-        else {ip=i+1;im=i-1;}
-        subkey=key[i]*r+(key[im]*Num+key[ip]);
-        b2+=(((b3>>16)^((b3<<25)+subkey))+(subkey*(~(b3<<7))));
-        b2=~b2;
-        r++;
-
-        i=r%key_size;
-        if(i==key_size) {ip=1;im=key_size-1;}
-        if(i==1) {ip=2;im=key_size;}
-        else {ip=i+1;im=i-1;}
-        subkey=key[i]*r+(key[im]*Num+key[ip]);
-        b3+=(((b4>>16)^((b4<<25)+subkey))+(subkey*(~(b4<<7))));
-        b3=~b3;
-        r++;
-
-        i=r%key_size;
-        if(i==key_size) {ip=1;im=key_size-1;}
-        if(i==1) {ip=2;im=key_size;}
-        else {ip=i+1;im=i-1;}
-        subkey=key[i]*r+(key[im]*Num+key[ip]);
-        b4+=(((b1>>16)^((b1<<25)+subkey))+(subkey*(~(b1<<7))));
-        b4=~b4;
-    }
-    Num++;
-    *num_block=Num;
-    ((unsigned char*)out)[0]=b1;
-    ((unsigned char*)out)[1]=b2;
-    ((unsigned char*)out)[2]=b3;
-    ((unsigned char*)out)[3]=b4;
-}
-
-void fnam2_decrypt(int *key, int key_size, unsigned long *num_block,  unsigned long b1,unsigned long b2,unsigned long b3,unsigned long b4, void * out )
-{
-    int subkey,i,ip,im;
-    unsigned long Num=*num_block;
-    int r;
-    for(r=key_size*sizeof(int)-1;r>=0;r--){
-        i=r%key_size;
-        if(i==key_size) {ip=1;im=key_size-1;}
-        if(i==1) {ip=2;im=key_size;}
-        else {ip=i+1;im=i-1;}
-        subkey=key[i]*r+(key[im]*Num+key[ip]);
-        b4=~b4;
-        b4-=(((b1>>16)^((b1<<25)+subkey))+(subkey*(~(b1<<7))));
-        r--;
-
-        i=r%key_size;
-        if(i==key_size) {ip=1;im=key_size-1;}
-        if(i==1) {ip=2;im=key_size;}
-        else {ip=i+1;im=i-1;}
-        subkey=key[i]*r+(key[im]*Num+key[ip]);
-        b3=~b3;
-        b3-=(((b4>>16)^((b4<<25)+subkey))+(subkey*(~(b4<<7))));
-        r--;
-
-        i=r%key_size;
-        if(i==key_size) {ip=1;im=key_size-1;}
-        if(i==1) {ip=2;im=key_size;}
-        else {ip=i+1;im=i-1;}
-        subkey=key[i]*r+(key[im]*Num+key[ip]);
-        b2=~b2;
-        b2-=(((b3>>16)^((b3<<25)+subkey))+(subkey*(~(b3<<7))));
-        r--;
-
-        i=r%key_size;
-        if(i==key_size) {ip=1;im=key_size-1;}
-        if(i==1) {ip=2;im=key_size;}
-        else {ip=i+1;im=i-1;}
-        subkey=key[i]*r+(key[im]*Num+key[ip]);
-        b1=~b1;
-        b1-=(((b2>>16)^((b2<<25)+subkey))+(subkey*(~(b2<<7))));
-    }
-    Num++;
-    *num_block=Num;
-    ((unsigned char*)out)[0]=b1;
-    ((unsigned char*)out)[1]=b2;
-    ((unsigned char*)out)[2]=b3;
-    ((unsigned char*)out)[3]=b4;
-}
diff --git a/crypt/enc_key.h b/crypt/enc_key.h
deleted file mode 100644
index 58954225520844828204595f7b1eca1965f0510b..0000000000000000000000000000000000000000
--- a/crypt/enc_key.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- Copyright (c) 2017-2018 (c) Project "DeM Labs Inc" https://github.com/demlabsinc
-  All rights reserved.
-
- This file is part of DAP (Deus Applications Prototypes) the open source project
-
-    DAP (Deus Applicaions Prototypes) is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Lesser General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    DAP is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public License
-    along with any DAP based project.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _ENC_KEY_H_
-#define _ENC_KEY_H_
-
-#include "enc.h"
-
-#include <stddef.h>
-#include "enc_key.h"
-typedef enum enc_key_type{ENC_KEY_TYPE_FNAM2, ENC_KEY_TYPE_AES,ENC_KEY_RSA_SESSION} enc_key_type_t;
-
-struct enc_key;
-typedef size_t (*enc_callback_t)(struct enc_key *, const void * , const size_t ,void *);
-
-typedef struct enc_key{
-    unsigned char * data;
-    size_t data_size;
-    enc_key_type_t type;
-
-    enc_callback_t enc;
-    enc_callback_t dec;
-
-    void * internal;
-} enc_key_t;
-
-extern enc_key_t *enc_key_new(size_t key_size,enc_key_type_t key_type);
-extern enc_key_t *enc_key_generate(enc_data_type_t v_type, rsa_key_t* key_session_pair);
-extern enc_key_t *enc_key_create(const char * key_input,enc_key_type_t v_type);
-extern void enc_key_delete(enc_key_t * key);
-extern rsa_key_t* enc_key_session_pair_create(const char* client_pub_key, u_int16_t key_len);
-
-#endif
diff --git a/crypt/liboqs/common/common.c b/crypt/liboqs/common/common.c
new file mode 100644
index 0000000000000000000000000000000000000000..420aae263f282eded000d967e437e6e311d8ef0c
--- /dev/null
+++ b/crypt/liboqs/common/common.c
@@ -0,0 +1,28 @@
+#include <oqs/common.h>
+
+#include <string.h>
+
+#if defined(WINDOWS)
+#include <windows.h>
+#endif
+
+void OQS_MEM_cleanse(void *ptr, size_t len) {
+#if defined(WINDOWS)
+	SecureZeroMemory(ptr, len);
+#elif defined(HAVE_MEMSET_S)
+	if (0U < len && memset_s(ptr, (rsize_t) len, 0, (rsize_t) len) != 0) {
+		abort();
+	}
+#else
+	typedef void *(*memset_t)(void *, int, size_t);
+	static volatile memset_t memset_func = memset;
+	memset_func(ptr, 0, len);
+#endif
+}
+
+void OQS_MEM_secure_free(void *ptr, size_t len) {
+	if (ptr != NULL) {
+		OQS_MEM_cleanse(ptr, len);
+		free(ptr);
+	}
+}
diff --git a/crypt/liboqs/common/common.h b/crypt/liboqs/common/common.h
new file mode 100644
index 0000000000000000000000000000000000000000..010d9324d7033aaadc5c5e1da7a3ea11ea6a1bcf
--- /dev/null
+++ b/crypt/liboqs/common/common.h
@@ -0,0 +1,19 @@
+#ifndef __OQS_COMMON_H
+#define __OQS_COMMON_H
+
+#include <stdlib.h>
+
+#define OQS_SUCCESS 1
+#define OQS_ERROR 0
+
+void OQS_MEM_cleanse(void *ptr, size_t len);
+void OQS_MEM_secure_free(void *ptr, size_t len);
+
+#if __ANDROID__
+//android workaround
+#define eprintf(...) printf(__VA_ARGS__);
+#else
+#define eprintf(...) fprintf(stderr, __VA_ARGS__);
+#endif
+
+#endif
diff --git a/crypt/liboqs/config.h b/crypt/liboqs/config.h
new file mode 100644
index 0000000000000000000000000000000000000000..4d6f08aa132037e9c2dc0af8175a802e5641233a
--- /dev/null
+++ b/crypt/liboqs/config.h
@@ -0,0 +1,197 @@
+/* config.h.  Generated from config.h.in by configure.  */
+/* config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* "Define to 1 when MCBITS enabled" */
+/* #undef ENABLE_CODE_MCBITS */
+
+/* "Define to 1 when FRODO enabled" */
+#define ENABLE_KEX_LWE_FRODO 1
+
+/* "Define to 1 when KYBER enabled" */
+#define ENABLE_KEX_MLWE_KYBER 1
+
+/* "Define to 1 when NTRU enabled" */
+#define ENABLE_KEX_NTRU 1
+
+/* "Define to 1 when RLWE MSRLN16 enabled" */
+#define ENABLE_KEX_RLWE_MSRLN16 1
+
+/* "Define to 1 when RLWE NEWHOPE enabled" */
+#define ENABLE_KEX_RLWE_NEWHOPE 1
+
+/* "Define to 1 when SIDH CLN16 enabled" */
+#define ENABLE_KEX_SIDH_CLN16 1
+
+/* "Define to 1 when SIDH IQC enabled" */
+/* #undef ENABLE_SIDH_IQC_REF */
+
+/* GMP DIR used locally */
+/* #undef GMPDIR */
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#define HAVE_FCNTL_H 1
+
+/* Define to 1 if you have the `gettimeofday' function. */
+#define HAVE_GETTIMEOFDAY 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the <limits.h> header file. */
+#define HAVE_LIMITS_H 1
+
+/* Define to 1 if your system has a GNU libc compatible `malloc' function, and
+   to 0 otherwise. */
+#define HAVE_MALLOC 1
+
+/* Define to 1 if you have the `memmove' function. */
+#define HAVE_MEMMOVE 1
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the `memset' function. */
+#define HAVE_MEMSET 1
+
+/* Define to 1 if you have the `pow' function. */
+/* #undef HAVE_POW */
+
+/* Define to 1 if you have the `sqrt' function. */
+/* #undef HAVE_SQRT */
+
+/* Define to 1 if you have the <stddef.h> header file. */
+#define HAVE_STDDEF_H 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the `strdup' function. */
+#define HAVE_STRDUP 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#define HAVE_SYS_TIME_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if the system has the type `_Bool'. */
+#define HAVE__BOOL 1
+
+/* Define to the sub-directory where libtool stores uninstalled libraries. */
+#define LT_OBJDIR ".libs/"
+
+/* M4RI DIR used locally */
+/* #undef M4RIDIR */
+
+/* OPENSSL DIR used locally */
+/* #undef OPENSSLDIR */
+
+/* Name of package */
+#define PACKAGE "liboqs"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT ""
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "liboqs"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "liboqs 1.0.0"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "liboqs"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "1.0.0"
+
+/* The size of `size_t', as computed by sizeof. */
+#define SIZEOF_SIZE_T 8
+
+/* SODIUM DIR used locally */
+/* #undef SODIUMDIR */
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Version number of package */
+#define VERSION "1.0.0"
+
+/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
+   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
+   #define below would cause a syntax error. */
+/* #undef _UINT32_T */
+
+/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
+   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
+   #define below would cause a syntax error. */
+/* #undef _UINT64_T */
+
+/* Define for Solaris 2.5.1 so the uint8_t typedef from <sys/synch.h>,
+   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
+   #define below would cause a syntax error. */
+/* #undef _UINT8_T */
+
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+   calls it, or to nothing if 'inline' is not supported under any name.  */
+#ifndef __cplusplus
+/* #undef inline */
+#endif
+
+/* Define to the type of a signed integer type of width exactly 16 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef int16_t */
+
+/* Define to the type of a signed integer type of width exactly 32 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef int32_t */
+
+/* Define to the type of a signed integer type of width exactly 64 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef int64_t */
+
+/* Define to the type of a signed integer type of width exactly 8 bits if such
+   a type exists and the standard includes do not define it. */
+/* #undef int8_t */
+
+/* Define to rpl_malloc if the replacement function should be used. */
+/* #undef malloc */
+
+/* Define to `unsigned int' if <sys/types.h> does not define. */
+/* #undef size_t */
+
+/* Define to the type of an unsigned integer type of width exactly 16 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint16_t */
+
+/* Define to the type of an unsigned integer type of width exactly 32 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint32_t */
+
+/* Define to the type of an unsigned integer type of width exactly 64 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint64_t */
+
+/* Define to the type of an unsigned integer type of width exactly 8 bits if
+   such a type exists and the standard includes do not define it. */
+/* #undef uint8_t */
diff --git a/crypt/liboqs/crypto/aes/Makefile.am b/crypt/liboqs/crypto/aes/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..1317fb2dc1fd50489dea0dd02e108d607ab8b3e1
--- /dev/null
+++ b/crypt/liboqs/crypto/aes/Makefile.am
@@ -0,0 +1,18 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = libaes.la
+
+libaes_la_SOURCES = aes.c
+
+libaes_la_CPPFLAGS = -I../../../include
+if USE_OPENSSL
+libaes_la_CPPFLAGS += -I$(OPENSSL_DIR)/include
+endif
+
+if USE_AES_NI
+libaes_la_CPPFLAGS += -maes -msse2
+libaes_la_SOURCES += aes_ni.c
+endif
+
+libaes_la_SOURCES += aes_c.c
+libaes_la_CPPFLAGS += $(AM_CPPFLAGS)
+
diff --git a/crypt/liboqs/crypto/aes/aes.c b/crypt/liboqs/crypto/aes/aes.c
new file mode 100644
index 0000000000000000000000000000000000000000..c77a799ddc178c944eb79e060a5f175c31fec774
--- /dev/null
+++ b/crypt/liboqs/crypto/aes/aes.c
@@ -0,0 +1,194 @@
+#include <assert.h>
+
+#include "aes.h"
+#include "aes_local.h"
+
+void OQS_AES128_load_schedule(const uint8_t *key, void **schedule, int for_encryption) {
+#ifdef USE_OPENSSL
+	oqs_aes128_load_schedule_ossl(key, schedule, for_encryption);
+#else
+	for_encryption++; // need some dummy operation to avoid unused parameter warning
+#ifdef AES_ENABLE_NI
+	oqs_aes128_load_schedule_ni(key, schedule);
+#else
+	oqs_aes128_load_schedule_c(key, schedule);
+#endif
+#endif
+}
+
+void OQS_AES128_free_schedule(void *schedule) {
+#ifdef USE_OPENSSL
+	oqs_aes128_free_schedule_ossl(schedule);
+#else
+#ifdef AES_ENABLE_NI
+	oqs_aes128_free_schedule_ni(schedule);
+#else
+	oqs_aes128_free_schedule_c(schedule);
+#endif
+#endif
+}
+
+void OQS_AES128_ECB_enc(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext) {
+#ifdef USE_OPENSSL
+	oqs_aes128_ecb_enc_ossl(plaintext, plaintext_len, key, ciphertext);
+#else
+#ifdef AES_ENABLE_NI
+	oqs_aes128_ecb_enc_ni(plaintext, plaintext_len, key, ciphertext);
+#else
+	oqs_aes128_ecb_enc_c(plaintext, plaintext_len, key, ciphertext);
+#endif
+#endif
+}
+
+void OQS_AES128_ECB_dec(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext) {
+#ifdef USE_OPENSSL
+	oqs_aes128_ecb_dec_ossl(ciphertext, ciphertext_len, key, plaintext);
+#else
+#ifdef AES_ENABLE_NI
+	oqs_aes128_ecb_dec_ni(ciphertext, ciphertext_len, key, plaintext);
+#else
+	oqs_aes128_ecb_dec_c(ciphertext, ciphertext_len, key, plaintext);
+#endif
+#endif
+}
+
+void OQS_AES128_ECB_enc_sch(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext) {
+#ifdef USE_OPENSSL
+	oqs_aes128_ecb_enc_sch_ossl(plaintext, plaintext_len, schedule, ciphertext);
+#else
+#ifdef AES_ENABLE_NI
+	oqs_aes128_ecb_enc_sch_ni(plaintext, plaintext_len, schedule, ciphertext);
+#else
+	oqs_aes128_ecb_enc_sch_c(plaintext, plaintext_len, schedule, ciphertext);
+#endif
+#endif
+}
+
+void OQS_AES128_ECB_dec_sch(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext) {
+#ifdef USE_OPENSSL
+	oqs_aes128_ecb_dec_sch_ossl(ciphertext, ciphertext_len, schedule, plaintext);
+#else
+#ifdef AES_ENABLE_NI
+	oqs_aes128_ecb_dec_sch_ni(ciphertext, ciphertext_len, schedule, plaintext);
+#else
+	oqs_aes128_ecb_dec_sch_c(ciphertext, ciphertext_len, schedule, plaintext);
+#endif
+#endif
+}
+
+#ifdef AES_ENABLE_NI
+inline void oqs_aes128_ecb_enc_ni(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext) {
+	void *schedule = NULL;
+	oqs_aes128_load_schedule_ni(key, &schedule);
+	oqs_aes128_ecb_enc_sch_ni(plaintext, plaintext_len, schedule, ciphertext);
+	oqs_aes128_free_schedule_ni(schedule);
+}
+#endif
+
+inline void oqs_aes128_ecb_enc_c(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext) {
+	void *schedule = NULL;
+	oqs_aes128_load_schedule_c(key, &schedule);
+	oqs_aes128_ecb_enc_sch_c(plaintext, plaintext_len, schedule, ciphertext);
+	oqs_aes128_free_schedule_c(schedule);
+}
+
+#ifdef AES_ENABLE_NI
+inline void oqs_aes128_ecb_enc_sch_ni(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext) {
+	assert(plaintext_len % 16 == 0);
+	for (size_t block = 0; block < plaintext_len / 16; block++) {
+		oqs_aes128_enc_ni(plaintext + (16 * block), schedule, ciphertext + (16 * block));
+	}
+}
+#endif
+
+inline void oqs_aes128_ecb_enc_sch_c(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext) {
+	assert(plaintext_len % 16 == 0);
+	for (size_t block = 0; block < plaintext_len / 16; block++) {
+		oqs_aes128_enc_c(plaintext + (16 * block), schedule, ciphertext + (16 * block));
+	}
+}
+
+#ifdef AES_ENABLE_NI
+inline void oqs_aes128_ecb_dec_ni(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext) {
+	void *schedule = NULL;
+	oqs_aes128_load_schedule_ni(key, &schedule);
+	oqs_aes128_ecb_dec_sch_ni(ciphertext, ciphertext_len, schedule, plaintext);
+	oqs_aes128_free_schedule_ni(schedule);
+}
+#endif
+
+inline void oqs_aes128_ecb_dec_c(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext) {
+	void *schedule = NULL;
+	oqs_aes128_load_schedule_c(key, &schedule);
+	oqs_aes128_ecb_dec_sch_c(ciphertext, ciphertext_len, schedule, plaintext);
+	oqs_aes128_free_schedule_c(schedule);
+}
+
+#ifdef AES_ENABLE_NI
+inline void oqs_aes128_ecb_dec_sch_ni(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext) {
+	assert(ciphertext_len % 16 == 0);
+	for (size_t block = 0; block < ciphertext_len / 16; block++) {
+		oqs_aes128_dec_ni(ciphertext + (16 * block), schedule, plaintext + (16 * block));
+	}
+}
+#endif
+
+inline void oqs_aes128_ecb_dec_sch_c(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext) {
+	assert(ciphertext_len % 16 == 0);
+	for (size_t block = 0; block < ciphertext_len / 16; block++) {
+		oqs_aes128_dec_c(ciphertext + (16 * block), schedule, plaintext + (16 * block));
+	}
+}
+
+#ifdef USE_OPENSSL
+#include <openssl/evp.h>
+
+inline void oqs_aes128_load_schedule_ossl(const uint8_t *key, void **schedule, int for_encryption) {
+	EVP_CIPHER_CTX *aes_ctx = EVP_CIPHER_CTX_new();
+	assert(aes_ctx != NULL);
+	if (for_encryption) {
+		assert(1 == EVP_EncryptInit_ex(aes_ctx, EVP_aes_128_ecb(), NULL, key, NULL));
+	} else {
+		assert(1 == EVP_DecryptInit_ex(aes_ctx, EVP_aes_128_ecb(), NULL, key, NULL));
+	}
+	EVP_CIPHER_CTX_set_padding(aes_ctx, 0);
+	*schedule = aes_ctx;
+}
+
+inline void oqs_aes128_free_schedule_ossl(void *schedule) {
+	if (schedule != NULL) {
+		EVP_CIPHER_CTX_free((EVP_CIPHER_CTX *) schedule);
+	}
+}
+
+inline void oqs_aes128_ecb_enc_ossl(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext) {
+	void *schedule = NULL;
+	oqs_aes128_load_schedule_ossl(key, &schedule, 1);
+	oqs_aes128_ecb_enc_sch_ossl(plaintext, plaintext_len, schedule, ciphertext);
+	oqs_aes128_free_schedule_ossl(schedule);
+}
+
+inline void oqs_aes128_ecb_dec_ossl(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext) {
+	void *schedule = NULL;
+	oqs_aes128_load_schedule_ossl(key, &schedule, 0);
+	oqs_aes128_ecb_dec_sch_ossl(ciphertext, ciphertext_len, schedule, plaintext);
+	oqs_aes128_free_schedule_ossl(schedule);
+}
+
+inline void oqs_aes128_ecb_enc_sch_ossl(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext) {
+	assert(plaintext_len % 16 == 0);
+	int outlen;
+	assert(1 == EVP_EncryptUpdate((EVP_CIPHER_CTX *) schedule, ciphertext, &outlen, plaintext, plaintext_len));
+	assert((size_t) outlen == plaintext_len);
+	assert(1 == EVP_EncryptFinal_ex((EVP_CIPHER_CTX *) schedule, ciphertext, &outlen));
+}
+
+inline void oqs_aes128_ecb_dec_sch_ossl(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext) {
+	assert(ciphertext_len % 16 == 0);
+	int outlen;
+	assert(1 == EVP_DecryptUpdate((EVP_CIPHER_CTX *) schedule, plaintext, &outlen, ciphertext, ciphertext_len));
+	assert((size_t) outlen == ciphertext_len);
+	assert(1 == EVP_DecryptFinal_ex((EVP_CIPHER_CTX *) schedule, plaintext, &outlen));
+}
+
+#endif
diff --git a/crypt/liboqs/crypto/aes/aes.h b/crypt/liboqs/crypto/aes/aes.h
new file mode 100644
index 0000000000000000000000000000000000000000..f90574243aa0aeb8a5c0292d53a4f1a4801ebaca
--- /dev/null
+++ b/crypt/liboqs/crypto/aes/aes.h
@@ -0,0 +1,66 @@
+/**
+ * \file aes.h
+ * \brief Header defining the API for OQS AES
+ */
+
+#ifndef __OQS_AES_H
+#define __OQS_AES_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/**
+ * Function to fill a key schedule given an initial key.
+ *
+ * @param key            Initial Key.
+ * @param schedule       Abstract data structure for a key schedule.
+ * @param forEncryption  1 if key schedule is for encryption, 0 if for decryption.
+ */
+void OQS_AES128_load_schedule(const uint8_t *key, void **schedule, int for_encryption);
+
+/**
+ * Function to free a key schedule.
+ *
+ * @param schedule       Schedule generated with OQS_AES128_load_schedule().
+ */
+void OQS_AES128_free_schedule(void *schedule);
+
+/**
+ * Function to encrypt blocks of plaintext using ECB mode.
+ * A schedule based on the key is generated and used internally.
+ *
+ * @param plaintext     Plaintext to be encrypted.
+ * @param plaintext_len Length on the plaintext in bytes. Must be a multiple of 16.
+ * @param key           Key to be used for encryption.
+ * @param ciphertext    Pointer to a block of memory which >= in size to the plaintext block. The result will be written here.
+ */
+void OQS_AES128_ECB_enc(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext);
+
+/**
+ * Function to decrypt blocks of plaintext using ECB mode.
+ * A schedule based on the key is generated and used internally.
+ *
+ * @param ciphertext     Ciphertext to be decrypted.
+ * @param ciphertext_len Length on the ciphertext in bytes. Must be a multiple of 16.
+ * @param key            Key to be used for encryption.
+ * @param ciphertext     Pointer to a block of memory which >= in size to the ciphertext block. The result will be written here.
+ */
+void OQS_AES128_ECB_dec(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext);
+
+/**
+ * Same as OQS_AES128_ECB_enc() except a schedule generated by
+ * OQS_AES128_load_schedule() is passed rather then a key. This is faster
+ * if the same schedule is used for multiple encryptions since it does
+ * not have to be regenerated from the key.
+ */
+void OQS_AES128_ECB_enc_sch(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext);
+
+/**
+ * Same as OQS_AES128_ECB_dec() except a schedule generated by
+ * OQS_AES128_load_schedule() is passed rather then a key. This is faster
+ * if the same schedule is used for multiple encryptions since it does
+ * not have to be regenerated from the key.
+ */
+void OQS_AES128_ECB_dec_sch(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext);
+
+#endif
diff --git a/crypt/liboqs/crypto/aes/aes_c.c b/crypt/liboqs/crypto/aes/aes_c.c
new file mode 100644
index 0000000000000000000000000000000000000000..553b4d1d7b622bfd638977cad7e40073f534c920
--- /dev/null
+++ b/crypt/liboqs/crypto/aes/aes_c.c
@@ -0,0 +1,340 @@
+// Simple, thoroughly commented implementation of 128-bit AES / Rijndael using C
+// Chris Hulbert - chris.hulbert@gmail.com - http://splinter.com.au/blog
+// References:
+// http://en.wikipedia.org/wiki/Advanced_Encryption_Standard
+// http://en.wikipedia.org/wiki/Rijndael_key_schedule
+// http://en.wikipedia.org/wiki/Rijndael_mix_columns
+// http://en.wikipedia.org/wiki/Rijndael_S-box
+// This code is public domain, or any OSI-approved license, your choice. No warranty.
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "aes.h"
+
+typedef unsigned char byte;
+
+// Here are all the lookup tables for the row shifts, rcon, s-boxes, and galois field multiplications
+static const byte shift_rows_table[] = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11};
+static const byte shift_rows_table_inv[] = {0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3};
+static const byte lookup_rcon[] = {
+    0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a};
+static const byte lookup_sbox[] = {
+    0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+    0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+    0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+    0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+    0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+    0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+    0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+    0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+    0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+    0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+    0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+    0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+    0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+    0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+    0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+    0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
+static const byte lookup_sbox_inv[] = {
+    0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+    0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+    0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+    0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+    0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+    0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+    0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+    0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+    0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+    0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+    0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+    0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+    0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+    0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+    0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+    0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d};
+static const byte lookup_g2[] = {
+    0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
+    0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
+    0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
+    0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
+    0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
+    0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
+    0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
+    0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
+    0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
+    0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25,
+    0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45,
+    0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
+    0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85,
+    0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5,
+    0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
+    0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5};
+static const byte lookup_g3[] = {
+    0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
+    0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
+    0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
+    0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,
+    0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,
+    0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
+    0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
+    0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,
+    0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
+    0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba,
+    0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea,
+    0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
+    0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a,
+    0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a,
+    0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
+    0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a};
+static const byte lookup_g9[] = {
+    0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
+    0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
+    0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
+    0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc,
+    0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01,
+    0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
+    0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a,
+    0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa,
+    0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
+    0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b,
+    0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,
+    0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
+    0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed,
+    0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d,
+    0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
+    0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46};
+static const byte lookup_g11[] = {
+    0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
+    0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
+    0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
+    0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2,
+    0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f,
+    0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
+    0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4,
+    0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54,
+    0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
+    0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e,
+    0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5,
+    0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
+    0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,
+    0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,
+    0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
+    0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3};
+static const byte lookup_g13[] = {
+    0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
+    0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
+    0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
+    0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20,
+    0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,
+    0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
+    0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d,
+    0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d,
+    0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
+    0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,
+    0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a,
+    0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
+    0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,
+    0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,
+    0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
+    0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97};
+static const byte lookup_g14[] = {
+    0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
+    0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
+    0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
+    0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61,
+    0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,
+    0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
+    0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c,
+    0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc,
+    0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
+    0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb,
+    0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,
+    0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
+    0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6,
+    0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56,
+    0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
+    0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d};
+
+// Xor's all elements in a n byte array a by b
+static void xor (byte * a, const byte *b, int n) {
+	int i;
+	for (i = 0; i < n; i++) {
+		a[i] ^= b[i];
+	}
+}
+
+    // Xor the current cipher state by a specific round key
+    static void xor_round_key(byte *state, const byte *keys, int round) {
+	xor(state, keys + round * 16, 16);
+}
+
+// Apply the rijndael s-box to all elements in an array
+// http://en.wikipedia.org/wiki/Rijndael_S-box
+static void sub_bytes(byte *a, int n) {
+	int i;
+	for (i = 0; i < n; i++) {
+		a[i] = lookup_sbox[a[i]];
+	}
+}
+static void sub_bytes_inv(byte *a, int n) {
+	int i;
+	for (i = 0; i < n; i++) {
+		a[i] = lookup_sbox_inv[a[i]];
+	}
+}
+
+// Perform the core key schedule transform on 4 bytes, as part of the key expansion process
+// http://en.wikipedia.org/wiki/Rijndael_key_schedule#Key_schedule_core
+static void key_schedule_core(byte *a, int i) {
+	byte temp = a[0]; // Rotate the output eight bits to the left
+	a[0] = a[1];
+	a[1] = a[2];
+	a[2] = a[3];
+	a[3] = temp;
+	sub_bytes(a, 4);        // Apply Rijndael's S-box on all four individual bytes in the output word
+	a[0] ^= lookup_rcon[i]; // On just the first (leftmost) byte of the output word, perform the rcon operation with i
+	                        // as the input, and exclusive or the rcon output with the first byte of the output word
+}
+
+// Expand the 16-byte key to 11 round keys (176 bytes)
+// http://en.wikipedia.org/wiki/Rijndael_key_schedule#The_key_schedule
+void oqs_aes128_load_schedule_c(const uint8_t *key, void **_schedule) {
+	*_schedule = malloc(16 * 11);
+	assert(*_schedule != NULL);
+	uint8_t *schedule = (uint8_t *) *_schedule;
+	int bytes = 16;            // The count of how many bytes we've created so far
+	int i = 1;                 // The rcon iteration value i is set to 1
+	int j;                     // For repeating the second stage 3 times
+	byte t[4];                 // Temporary working area known as 't' in the Wiki article
+	memcpy(schedule, key, 16); // The first 16 bytes of the expanded key are simply the encryption key
+
+	while (bytes < 176) {                   // Until we have 176 bytes of expanded key, we do the following:
+		memcpy(t, schedule + bytes - 4, 4); // We assign the value of the previous four bytes in the expanded key to t
+		key_schedule_core(t, i);            // We perform the key schedule core on t, with i as the rcon iteration value
+		i++;                                // We increment i by 1
+		xor(t, schedule + bytes - 16, 4);   // We exclusive-or t with the four-byte block 16 bytes before the new expanded key.
+		memcpy(schedule + bytes, t, 4);     // This becomes the next 4 bytes in the expanded key
+		bytes += 4;                         // Keep track of how many expanded key bytes we've added
+
+		// We then do the following three times to create the next twelve bytes
+		for (j = 0; j < 3; j++) {
+			memcpy(t, schedule + bytes - 4, 4); // We assign the value of the previous 4 bytes in the expanded key to t
+			xor(t, schedule + bytes - 16, 4);   // We exclusive-or t with the four-byte block n bytes before
+			memcpy(schedule + bytes, t, 4);     // This becomes the next 4 bytes in the expanded key
+			bytes += 4;                         // Keep track of how many expanded key bytes we've added
+		}
+	}
+}
+
+void oqs_aes128_free_schedule_c(void *schedule) {
+	if (schedule != NULL) {
+		free(schedule);
+	}
+}
+
+// Apply the shift rows step on the 16 byte cipher state
+// http://en.wikipedia.org/wiki/Advanced_Encryption_Standard#The_ShiftRows_step
+static void shift_rows(byte *state) {
+	int i;
+	byte temp[16];
+	memcpy(temp, state, 16);
+	for (i = 0; i < 16; i++) {
+		state[i] = temp[shift_rows_table[i]];
+	}
+}
+static void shift_rows_inv(byte *state) {
+	int i;
+	byte temp[16];
+	memcpy(temp, state, 16);
+	for (i = 0; i < 16; i++) {
+		state[i] = temp[shift_rows_table_inv[i]];
+	}
+}
+
+// Perform the mix columns matrix on one column of 4 bytes
+// http://en.wikipedia.org/wiki/Rijndael_mix_columns
+static void mix_col(byte *state) {
+	byte a0 = state[0];
+	byte a1 = state[1];
+	byte a2 = state[2];
+	byte a3 = state[3];
+	state[0] = lookup_g2[a0] ^ lookup_g3[a1] ^ a2 ^ a3;
+	state[1] = lookup_g2[a1] ^ lookup_g3[a2] ^ a3 ^ a0;
+	state[2] = lookup_g2[a2] ^ lookup_g3[a3] ^ a0 ^ a1;
+	state[3] = lookup_g2[a3] ^ lookup_g3[a0] ^ a1 ^ a2;
+}
+
+// Perform the mix columns matrix on each column of the 16 bytes
+static void mix_cols(byte *state) {
+	mix_col(state);
+	mix_col(state + 4);
+	mix_col(state + 8);
+	mix_col(state + 12);
+}
+
+// Perform the inverse mix columns matrix on one column of 4 bytes
+// http://en.wikipedia.org/wiki/Rijndael_mix_columns
+static void mix_col_inv(byte *state) {
+	byte a0 = state[0];
+	byte a1 = state[1];
+	byte a2 = state[2];
+	byte a3 = state[3];
+	state[0] = lookup_g14[a0] ^ lookup_g9[a3] ^ lookup_g13[a2] ^ lookup_g11[a1];
+	state[1] = lookup_g14[a1] ^ lookup_g9[a0] ^ lookup_g13[a3] ^ lookup_g11[a2];
+	state[2] = lookup_g14[a2] ^ lookup_g9[a1] ^ lookup_g13[a0] ^ lookup_g11[a3];
+	state[3] = lookup_g14[a3] ^ lookup_g9[a2] ^ lookup_g13[a1] ^ lookup_g11[a0];
+}
+
+// Perform the inverse mix columns matrix on each column of the 16 bytes
+static void mix_cols_inv(byte *state) {
+	mix_col_inv(state);
+	mix_col_inv(state + 4);
+	mix_col_inv(state + 8);
+	mix_col_inv(state + 12);
+}
+
+void oqs_aes128_enc_c(const uint8_t *plaintext, const void *_schedule, uint8_t *ciphertext) {
+	const uint8_t *schedule = (const uint8_t *) _schedule;
+	int i; // To count the rounds
+
+	// First Round
+	memcpy(ciphertext, plaintext, 16);
+	xor_round_key(ciphertext, schedule, 0);
+
+	// Middle rounds
+	for (i = 0; i < 9; i++) {
+		sub_bytes(ciphertext, 16);
+		shift_rows(ciphertext);
+		mix_cols(ciphertext);
+		xor_round_key(ciphertext, schedule, i + 1);
+	}
+
+	// Final Round
+	sub_bytes(ciphertext, 16);
+	shift_rows(ciphertext);
+	xor_round_key(ciphertext, schedule, 10);
+}
+
+void oqs_aes128_dec_c(const uint8_t *ciphertext, const void *_schedule, uint8_t *plaintext) {
+	const uint8_t *schedule = (const uint8_t *) _schedule;
+	int i; // To count the rounds
+
+	// Reverse the final Round
+	memcpy(plaintext, ciphertext, 16);
+	xor_round_key(plaintext, schedule, 10);
+	shift_rows_inv(plaintext);
+	sub_bytes_inv(plaintext, 16);
+
+	// Reverse the middle rounds
+	for (i = 0; i < 9; i++) {
+		xor_round_key(plaintext, schedule, 9 - i);
+		mix_cols_inv(plaintext);
+		shift_rows_inv(plaintext);
+		sub_bytes_inv(plaintext, 16);
+	}
+
+	// Reverse the first Round
+	xor_round_key(plaintext, schedule, 0);
+}
diff --git a/crypt/liboqs/crypto/aes/aes_local.h b/crypt/liboqs/crypto/aes/aes_local.h
new file mode 100644
index 0000000000000000000000000000000000000000..0b226c7845fc7a0016596f10677bf8c835a0b932
--- /dev/null
+++ b/crypt/liboqs/crypto/aes/aes_local.h
@@ -0,0 +1,39 @@
+/**
+ * \file aes_local.h
+ * \brief Header defining additional internal functions for OQS AES
+ */
+
+#ifndef __OQS_AES_LOCAL_H
+#define __OQS_AES_LOCAL_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+void oqs_aes128_load_schedule_ni(const uint8_t *key, void **schedule);
+void oqs_aes128_free_schedule_ni(void *schedule);
+void oqs_aes128_enc_ni(const uint8_t *plaintext, const void *schedule, uint8_t *ciphertext);
+void oqs_aes128_dec_ni(const uint8_t *ciphertext, const void *schedule, uint8_t *plaintext);
+void oqs_aes128_ecb_enc_ni(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext);
+void oqs_aes128_ecb_dec_ni(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext);
+void oqs_aes128_ecb_enc_sch_ni(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext);
+void oqs_aes128_ecb_dec_sch_ni(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext);
+
+void oqs_aes128_load_schedule_c(const uint8_t *key, void **schedule);
+void oqs_aes128_free_schedule_c(void *schedule);
+void oqs_aes128_enc_c(const uint8_t *plaintext, const void *schedule, uint8_t *ciphertext);
+void oqs_aes128_dec_c(const uint8_t *ciphertext, const void *schedule, uint8_t *plaintext);
+void oqs_aes128_ecb_enc_c(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext);
+void oqs_aes128_ecb_dec_c(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext);
+void oqs_aes128_ecb_enc_sch_c(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext);
+void oqs_aes128_ecb_dec_sch_c(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext);
+
+#ifdef USE_OPENSSL
+void oqs_aes128_load_schedule_ossl(const uint8_t *key, void **schedule, int for_encryption);
+void oqs_aes128_free_schedule_ossl(void *schedule);
+void oqs_aes128_ecb_enc_ossl(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext);
+void oqs_aes128_ecb_dec_ossl(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext);
+void oqs_aes128_ecb_enc_sch_ossl(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext);
+void oqs_aes128_ecb_dec_sch_ossl(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext);
+#endif
+
+#endif
diff --git a/crypt/liboqs/crypto/aes/aes_ni.c b/crypt/liboqs/crypto/aes/aes_ni.c
new file mode 100644
index 0000000000000000000000000000000000000000..86eec3b7e700d63b20ba0ff29f202f70754e4fdb
--- /dev/null
+++ b/crypt/liboqs/crypto/aes/aes_ni.c
@@ -0,0 +1,100 @@
+#if defined(WINDOWS)
+#define UNUSED
+// __attribute__ not supported in VS, is there something else I should define?
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#ifndef AES_ENABLE_NI
+#include <assert.h>
+void oqs_aes128_load_schedule_ni(UNUSED const uint8_t *key, UNUSED void **_schedule) {
+	assert(0);
+}
+void oqs_aes128_free_schedule_ni(UNUSED void *_schedule) {
+	assert(0);
+}
+void oqs_aes128_enc_ni(UNUSED const uint8_t *plaintext, UNUSED const void *_schedule, UNUSED uint8_t *ciphertext) {
+	assert(0);
+}
+void oqs_aes128_dec_ni(UNUSED const uint8_t *ciphertext, UNUSED const void *_schedule, UNUSED uint8_t *plaintext) {
+	assert(0);
+}
+#else
+
+#include <wmmintrin.h>
+
+static __m128i key_expand(__m128i key, __m128i keygened) {
+	key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
+	key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
+	key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
+	// The last 4 bytes from aeskeygenassist store the values we want so
+	// and they need to be xored all four sets of bytes in the result so
+	keygened = _mm_shuffle_epi32(keygened, _MM_SHUFFLE(3, 3, 3, 3));
+	return _mm_xor_si128(key, keygened);
+}
+
+//This is needed since the rcon argument to _mm_aeskeygenassist_si128
+//must be a compile time constaint
+
+#define key_exp(k, rcon) key_expand(k, _mm_aeskeygenassist_si128(k, rcon))
+
+void oqs_aes128_load_schedule_ni(const uint8_t *key, void **_schedule) {
+	*_schedule = malloc(20 * 16);
+	assert(*_schedule != NULL);
+	__m128i *schedule = (__m128i *) *_schedule;
+	schedule[0] = _mm_loadu_si128((const __m128i *) key);
+	schedule[1] = key_exp(schedule[0], 0x01);
+	schedule[2] = key_exp(schedule[1], 0x02);
+	schedule[3] = key_exp(schedule[2], 0x04);
+	schedule[4] = key_exp(schedule[3], 0x08);
+	schedule[5] = key_exp(schedule[4], 0x10);
+	schedule[6] = key_exp(schedule[5], 0x20);
+	schedule[7] = key_exp(schedule[6], 0x40);
+	schedule[8] = key_exp(schedule[7], 0x80);
+	schedule[9] = key_exp(schedule[8], 0x1b);
+	schedule[10] = key_exp(schedule[9], 0x36);
+	// generate decryption keys in reverse order.
+	// schedule[10] is shared by last encryption and first decryption rounds
+	// schedule[0] is shared by first encryption round and last decryption round
+	for (size_t i = 0; i < 9; i++) {
+		schedule[11 + i] = _mm_aesimc_si128(schedule[9 - i]);
+	}
+}
+
+void oqs_aes128_free_schedule_ni(void *schedule) {
+	if (schedule != NULL) {
+		free(schedule);
+	}
+}
+
+void oqs_aes128_enc_ni(const uint8_t *plaintext, const void *_schedule, uint8_t *ciphertext) {
+	__m128i *schedule = (__m128i *) _schedule;
+	__m128i m = _mm_loadu_si128((__m128i *) plaintext);
+
+	m = _mm_xor_si128(m, schedule[0]);
+	for (size_t i = 1; i < 10; i++) {
+		m = _mm_aesenc_si128(m, schedule[i]);
+	}
+	m = _mm_aesenclast_si128(m, schedule[10]);
+
+	_mm_storeu_si128((__m128i *) ciphertext, m);
+}
+
+void oqs_aes128_dec_ni(const uint8_t *ciphertext, const void *_schedule, uint8_t *plaintext) {
+	__m128i *schedule = (__m128i *) _schedule;
+	__m128i m = _mm_loadu_si128((__m128i *) ciphertext);
+
+	m = _mm_xor_si128(m, schedule[10]);
+	for (size_t i = 1; i < 10; i++) {
+		m = _mm_aesdec_si128(m, schedule[10 + i]);
+	}
+	m = _mm_aesdeclast_si128(m, schedule[0]);
+
+	_mm_storeu_si128((__m128i *) plaintext, m);
+}
+
+#endif
diff --git a/crypt/liboqs/crypto/aes/test_aes.c b/crypt/liboqs/crypto/aes/test_aes.c
new file mode 100644
index 0000000000000000000000000000000000000000..62db255bfee1ca418fb6d77bb5fff162bcc016b3
--- /dev/null
+++ b/crypt/liboqs/crypto/aes/test_aes.c
@@ -0,0 +1,276 @@
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <oqs/rand.h>
+
+#include "../../ds_benchmark.h"
+#include "../../common/common.h"
+#include "aes.h"
+#include "aes_local.h"
+
+#define BENCH_DURATION 1
+
+#define TEST_ITERATIONS 100
+
+#define TEST_REPEATEDLY(x)                                    \
+	for (int i = 0; i < TEST_ITERATIONS; i++) {               \
+		int ok = (x);                                         \
+		if (ok != EXIT_SUCCESS) {                             \
+			eprintf("Failure in %s (iteration %d)\n", #x, i); \
+			return EXIT_FAILURE;                              \
+		}                                                     \
+	}
+
+static void print_bytes(uint8_t *bytes, size_t num_bytes) {
+	for (size_t i = 0; i < num_bytes; i++) {
+		printf("%02x", (unsigned) bytes[i]);
+	}
+}
+
+static int test_aes128_correctness_c(OQS_RAND *rand) {
+	uint8_t key[16], plaintext[16], ciphertext[16], decrypted[16];
+	void *schedule = NULL;
+	OQS_RAND_n(rand, key, 16);
+	OQS_RAND_n(rand, plaintext, 16);
+	oqs_aes128_load_schedule_c(key, &schedule);
+	oqs_aes128_enc_c(plaintext, schedule, ciphertext);
+	oqs_aes128_dec_c(ciphertext, schedule, decrypted);
+	oqs_aes128_free_schedule_c(schedule);
+	if (memcmp(plaintext, decrypted, 16) == 0) {
+		return EXIT_SUCCESS;
+	} else {
+		print_bytes(plaintext, 16);
+		printf("\n");
+		print_bytes(decrypted, 16);
+		printf("\n");
+		return EXIT_FAILURE;
+	}
+}
+
+#ifdef AES_ENABLE_NI
+static int test_aes128_correctness_ni(OQS_RAND *rand) {
+	uint8_t key[16], plaintext[16], ciphertext[16], decrypted[16];
+	void *schedule = NULL;
+	OQS_RAND_n(rand, key, 16);
+	OQS_RAND_n(rand, plaintext, 16);
+	oqs_aes128_load_schedule_ni(key, &schedule);
+	oqs_aes128_enc_ni(plaintext, schedule, ciphertext);
+	oqs_aes128_dec_ni(ciphertext, schedule, decrypted);
+	oqs_aes128_free_schedule_ni(schedule);
+	if (memcmp(plaintext, decrypted, 16) == 0) {
+		return EXIT_SUCCESS;
+	} else {
+		print_bytes(plaintext, 16);
+		printf("\n");
+		print_bytes(decrypted, 16);
+		printf("\n");
+		return EXIT_FAILURE;
+	}
+}
+
+static int test_aes128_c_equals_ni(OQS_RAND *rand) {
+	uint8_t key[16], plaintext[16], ciphertext_c[16], ciphertext_ni[16];
+	void *schedule_c = NULL, *schedule_ni = NULL;
+	OQS_RAND_n(rand, key, 16);
+	OQS_RAND_n(rand, plaintext, 16);
+	oqs_aes128_load_schedule_c(key, &schedule_c);
+	oqs_aes128_load_schedule_ni(key, &schedule_ni);
+	oqs_aes128_enc_c(plaintext, schedule_c, ciphertext_c);
+	oqs_aes128_enc_ni(plaintext, schedule_ni, ciphertext_ni);
+	oqs_aes128_free_schedule_c(schedule_c);
+	oqs_aes128_free_schedule_ni(schedule_ni);
+	if (memcmp(ciphertext_c, ciphertext_ni, 16) == 0) {
+		return EXIT_SUCCESS;
+	} else {
+		print_bytes(ciphertext_c, 16);
+		printf("\n");
+		print_bytes(ciphertext_ni, 16);
+		printf("\n");
+		return EXIT_FAILURE;
+	}
+}
+
+static int test_aes128_ecb_correctness_ni(OQS_RAND *rand) {
+	uint8_t key[16], plaintext[320], ciphertext[320], decrypted[320];
+	void *schedule = NULL;
+	OQS_RAND_n(rand, key, 16);
+	OQS_RAND_n(rand, plaintext, 320);
+	oqs_aes128_load_schedule_ni(key, &schedule);
+	oqs_aes128_ecb_enc_ni(plaintext, 320, schedule, ciphertext);
+	oqs_aes128_ecb_dec_ni(ciphertext, 320, schedule, decrypted);
+	oqs_aes128_free_schedule_ni(schedule);
+	if (memcmp(plaintext, decrypted, 320) == 0) {
+		return EXIT_SUCCESS;
+	} else {
+		print_bytes(plaintext, 320);
+		printf("\n");
+		print_bytes(decrypted, 320);
+		printf("\n");
+		return EXIT_FAILURE;
+	}
+}
+#endif
+
+static int test_aes128_ecb_correctness_c(OQS_RAND *rand) {
+	uint8_t key[16], plaintext[320], ciphertext[320], decrypted[320];
+	void *schedule = NULL;
+	OQS_RAND_n(rand, key, 16);
+	OQS_RAND_n(rand, plaintext, 320);
+	oqs_aes128_load_schedule_c(key, &schedule);
+	oqs_aes128_ecb_enc_c(plaintext, 320, schedule, ciphertext);
+	oqs_aes128_ecb_dec_c(ciphertext, 320, schedule, decrypted);
+	oqs_aes128_free_schedule_c(schedule);
+	if (memcmp(plaintext, decrypted, 320) == 0) {
+		return EXIT_SUCCESS;
+	} else {
+		print_bytes(plaintext, 320);
+		printf("\n");
+		print_bytes(decrypted, 320);
+		printf("\n");
+		return EXIT_FAILURE;
+	}
+}
+
+#ifdef USE_OPENSSL
+static int test_aes128_ecb_correctness_ossl(OQS_RAND *rand) {
+	uint8_t key[16], plaintext[320], ciphertext[320], decrypted[320];
+	void *schedule = NULL;
+	OQS_RAND_n(rand, key, 16);
+	OQS_RAND_n(rand, plaintext, 320);
+	oqs_aes128_load_schedule_ossl(key, &schedule, 1);
+	oqs_aes128_ecb_enc_ossl(plaintext, 320, schedule, ciphertext);
+	oqs_aes128_free_schedule_ossl(schedule);
+	oqs_aes128_load_schedule_ossl(key, &schedule, 0);
+	oqs_aes128_ecb_dec_ossl(ciphertext, 320, schedule, decrypted);
+	oqs_aes128_free_schedule_ossl(schedule);
+	if (memcmp(plaintext, decrypted, 320) == 0) {
+		return EXIT_SUCCESS;
+	} else {
+		print_bytes(plaintext, 320);
+		printf("\n");
+		print_bytes(decrypted, 320);
+		printf("\n");
+		return EXIT_FAILURE;
+	}
+}
+#endif
+
+static void speed_aes128_c(OQS_RAND *rand) {
+	uint8_t key[16], plaintext[320], ciphertext[320], decrypted[320];
+	void *schedule = NULL;
+	OQS_RAND_n(rand, key, 16);
+	OQS_RAND_n(rand, plaintext, 320);
+	TIME_OPERATION_SECONDS({ oqs_aes128_load_schedule_c(key, &schedule); oqs_aes128_free_schedule_c(schedule); }, "oqs_aes128_load_schedule_c", BENCH_DURATION);
+
+	oqs_aes128_load_schedule_c(key, &schedule);
+	TIME_OPERATION_SECONDS(oqs_aes128_enc_c(plaintext, schedule, ciphertext), "oqs_aes128_enc_c", BENCH_DURATION);
+	TIME_OPERATION_SECONDS(oqs_aes128_dec_c(ciphertext, schedule, decrypted), "oqs_aes128_dec_c", BENCH_DURATION);
+	TIME_OPERATION_SECONDS(oqs_aes128_ecb_enc_c(plaintext, 320, key, ciphertext), "oqs_aes128_ecb_enc_c", BENCH_DURATION);
+	TIME_OPERATION_SECONDS(oqs_aes128_ecb_dec_c(ciphertext, 320, key, decrypted), "oqs_aes128_ecb_dec_c", BENCH_DURATION);
+	TIME_OPERATION_SECONDS(oqs_aes128_ecb_enc_sch_c(plaintext, 320, schedule, ciphertext), "oqs_aes128_ecb_enc_sch_c", BENCH_DURATION);
+	TIME_OPERATION_SECONDS(oqs_aes128_ecb_dec_sch_c(ciphertext, 320, schedule, decrypted), "oqs_aes128_ecb_dec_sch_c", BENCH_DURATION);
+	oqs_aes128_free_schedule_c(schedule);
+}
+
+#ifdef AES_ENABLE_NI
+
+static void speed_aes128_ni(OQS_RAND *rand) {
+	uint8_t key[16], plaintext[320], ciphertext[320], decrypted[320];
+	void *schedule = NULL;
+	OQS_RAND_n(rand, key, 16);
+	OQS_RAND_n(rand, plaintext, 320);
+	TIME_OPERATION_SECONDS({ oqs_aes128_load_schedule_ni(key, &schedule); oqs_aes128_free_schedule_ni(schedule); }, "oqs_aes128_load_schedule_ni", BENCH_DURATION);
+
+	oqs_aes128_load_schedule_ni(key, &schedule);
+	TIME_OPERATION_SECONDS(oqs_aes128_enc_ni(plaintext, schedule, ciphertext), "oqs_aes128_enc_ni", BENCH_DURATION);
+	TIME_OPERATION_SECONDS(oqs_aes128_dec_ni(ciphertext, schedule, decrypted), "oqs_aes128_dec_ni", BENCH_DURATION);
+	TIME_OPERATION_SECONDS(oqs_aes128_ecb_enc_ni(plaintext, 320, key, ciphertext), "oqs_aes128_ecb_enc_ni", BENCH_DURATION);
+	TIME_OPERATION_SECONDS(oqs_aes128_ecb_dec_ni(ciphertext, 320, key, decrypted), "oqs_aes128_ecb_dec_ni", BENCH_DURATION);
+	TIME_OPERATION_SECONDS(oqs_aes128_ecb_enc_sch_ni(plaintext, 320, schedule, ciphertext), "oqs_aes128_ecb_enc_sch_ni", BENCH_DURATION);
+	TIME_OPERATION_SECONDS(oqs_aes128_ecb_dec_sch_ni(ciphertext, 320, schedule, decrypted), "oqs_aes128_ecb_dec_sch_ni", BENCH_DURATION);
+	oqs_aes128_free_schedule_ni(schedule);
+}
+#endif
+
+#ifdef USE_OPENSSL
+static void speed_aes128_ossl(OQS_RAND *rand) {
+	uint8_t key[16], plaintext[320], ciphertext[320];
+	void *schedule = NULL;
+	OQS_RAND_n(rand, key, 16);
+	OQS_RAND_n(rand, plaintext, 320);
+	TIME_OPERATION_SECONDS(oqs_aes128_load_schedule_ossl(key, &schedule, 1), "oqs_aes128_load_schedule_ossl 1", BENCH_DURATION);
+	TIME_OPERATION_SECONDS(oqs_aes128_load_schedule_ossl(key, &schedule, 0), "oqs_aes128_load_schedule_ossl 0", BENCH_DURATION);
+	TIME_OPERATION_SECONDS(oqs_aes128_ecb_enc_ossl(plaintext, 320, key, ciphertext), "oqs_aes128_ecb_enc_ossl", BENCH_DURATION);
+	TIME_OPERATION_SECONDS(oqs_aes128_ecb_dec_ossl(ciphertext, 320, key, plaintext), "oqs_aes128_ecb_dec_ossl", BENCH_DURATION);
+	oqs_aes128_load_schedule_ossl(key, &schedule, 1);
+	TIME_OPERATION_SECONDS(oqs_aes128_ecb_enc_sch_ossl(plaintext, 320, schedule, ciphertext), "oqs_aes128_ecb_enc_sch_ossl", BENCH_DURATION);
+	oqs_aes128_load_schedule_ossl(key, &schedule, 0);
+	TIME_OPERATION_SECONDS(oqs_aes128_ecb_dec_sch_ossl(ciphertext, 320, schedule, plaintext), "oqs_aes128_ecb_dec_sch_ossl", BENCH_DURATION);
+}
+#endif
+
+int main(int argc, char **argv) {
+	int ret;
+	bool bench = false;
+
+	for (int i = 1; i < argc; i++) {
+		if (argv[i][0] == '-') {
+			if (strcmp(argv[i], "--bench") == 0 || strcmp(argv[i], "-b") == 0) {
+				bench = true;
+			} else {
+				printf("Usage: ./test_rand [options]\n");
+				printf("\nOptions:\n");
+				printf("  --bench, -b\n");
+				printf("    Run benchmarks\n");
+				if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "-help") == 0) || (strcmp(argv[i], "--help") == 0)) {
+					return EXIT_SUCCESS;
+				} else {
+					return EXIT_FAILURE;
+				}
+			}
+		}
+	}
+
+	printf("=== test_aes correctness ===\n");
+	OQS_RAND *rand = OQS_RAND_new(OQS_RAND_alg_default);
+	if (rand == NULL) {
+		eprintf("OQS_RAND_new() failed\n");
+		goto err;
+	}
+	TEST_REPEATEDLY(test_aes128_correctness_c(rand));
+#ifdef AES_ENABLE_NI
+	TEST_REPEATEDLY(test_aes128_correctness_ni(rand));
+	TEST_REPEATEDLY(test_aes128_c_equals_ni(rand));
+#endif
+	TEST_REPEATEDLY(test_aes128_ecb_correctness_c(rand));
+#ifdef AES_ENABLE_NI
+	TEST_REPEATEDLY(test_aes128_ecb_correctness_ni(rand));
+#endif
+#ifdef USE_OPENSSL
+	TEST_REPEATEDLY(test_aes128_ecb_correctness_ossl(rand));
+#endif
+	printf("Tests passed.\n\n");
+
+	if (bench) {
+		printf("=== test_aes performance ===\n");
+		PRINT_TIMER_HEADER
+		speed_aes128_c(rand);
+#ifdef AES_ENABLE_NI
+		speed_aes128_ni(rand);
+#endif
+#ifdef USE_OPENSSL
+		speed_aes128_ossl(rand);
+#endif
+		PRINT_TIMER_FOOTER
+	}
+
+	ret = EXIT_SUCCESS;
+	goto cleanup;
+err:
+	ret = EXIT_FAILURE;
+cleanup:
+	OQS_RAND_free(rand);
+	return ret;
+}
diff --git a/crypt/liboqs/crypto/rand/Makefile.am b/crypt/liboqs/crypto/rand/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..ee421b82e26c6d4c01d9093bc3ab25d645872905
--- /dev/null
+++ b/crypt/liboqs/crypto/rand/Makefile.am
@@ -0,0 +1,16 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = librand.la
+
+librand_la_SOURCES = rand.c
+
+librand_la_CPPFLAGS = -I../../../include
+if USE_OPENSSL
+librand_la_CPPFLAGS += -I$(OPENSSL_DIR)/include
+endif
+librand_la_CPPFLAGS += $(AM_CPPFLAGS)
+
+librand_la_LDFLAGS =
+if USE_OPENSSL
+librand_la_LDFLAGS += -L$(OPENSSL_DIR)/lib
+endif
+librand_la_LDFLAGS += $(AM_LDFLAGS)
diff --git a/crypt/liboqs/crypto/rand/rand.c b/crypt/liboqs/crypto/rand/rand.c
new file mode 100644
index 0000000000000000000000000000000000000000..4dfc188f165c79b0971f6c4a950da63b4ffaa76d
--- /dev/null
+++ b/crypt/liboqs/crypto/rand/rand.c
@@ -0,0 +1,183 @@
+#include <assert.h>
+#include <stdio.h>
+#include <math.h>
+#if defined(WINDOWS)
+#include <windows.h>
+#include <Wincrypt.h>
+#else
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#endif
+
+#include <oqs/rand.h>
+#include <oqs/rand_urandom_aesctr.h>
+#include <oqs/rand_urandom_chacha20.h>
+
+OQS_RAND *OQS_RAND_new(enum OQS_RAND_alg_name alg_name) {
+	switch (alg_name) {
+	case OQS_RAND_alg_default:
+	case OQS_RAND_alg_urandom_chacha20:
+		return OQS_RAND_urandom_chacha20_new();
+	case OQS_RAND_alg_urandom_aesctr:
+		return OQS_RAND_urandom_aesctr_new();
+	default:
+		assert(0);
+		return NULL; // avoid the warning of potentialy uninitialized variable in VS
+	}
+}
+
+uint8_t OQS_RAND_8(OQS_RAND *r) {
+	return r->rand_8(r);
+}
+
+uint32_t OQS_RAND_32(OQS_RAND *r) {
+	return r->rand_32(r);
+}
+
+uint64_t OQS_RAND_64(OQS_RAND *r) {
+	return r->rand_64(r);
+}
+
+void OQS_RAND_n(OQS_RAND *r, uint8_t *out, size_t n) {
+	r->rand_n(r, out, n);
+}
+
+void OQS_RAND_free(OQS_RAND *r) {
+	if (r) {
+		r->free(r);
+	}
+}
+
+#if !defined(WINDOWS)
+/* For some reason specifying inline results in a build error */
+inline
+#endif
+    void
+    OQS_RAND_test_record_occurrence(const unsigned char b, unsigned long occurrences[256]) {
+	occurrences[b] += 1;
+}
+
+double OQS_RAND_test_statistical_distance_from_uniform(const unsigned long occurrences[256]) {
+
+	// compute total number of samples
+	unsigned long total = 0;
+	for (int i = 0; i < 256; i++) {
+		total += occurrences[i];
+	}
+
+	// compute statistical distance from uniform
+	// SD(X,Y) = 1/2 \sum_z | Pr[X=z] - Pr[Y=z] |
+	//         = 1/2 \sum_z | 1/256   - Pr[Y=z] |
+	double distance = 0.0;
+	for (int i = 0; i < 256; i++) {
+		distance += fabs(1.0 / 256.0 - (double) occurrences[i] / (double) total);
+	}
+	distance /= 2.0;
+
+	return distance;
+}
+
+// Even for a perfectly uniform generator, if the number of samples is
+// low then the std dev of the counts will be high.  So, instead, whilst
+// still assuming the number of samples isn't super-low, we calculate an
+// approximate Chi-squared statistic and back-convert to the Normal
+// distribution.  The number of sigmas is reported: -3 to +3 is pretty
+// ordinary, big negative is suspiciously-flat counts, big positive is
+// wildly-fluctuating counts.
+double OQS_RAND_zscore_deviation_from_uniform(const unsigned long occurrences[256]) {
+	double quantiles[102] = {
+	    156.7872, 158.4155, 160.0555, 161.7072, 163.3707, 165.0460, 166.7331, 168.4321,
+	    170.1430, 171.8658, 173.6006, 175.3475, 177.1064, 178.8773, 180.6604, 182.4557,
+	    184.2631, 186.0828, 187.9147, 189.7589, 191.6155, 193.4844, 195.3657, 197.2594,
+	    199.1656, 201.0843, 203.0155, 204.9593, 206.9157, 208.8847, 210.8663, 212.8607,
+	    214.8678, 216.8877, 218.9203, 220.9658, 223.0241, 225.0953, 227.1794, 229.2765,
+	    231.3866, 233.5096, 235.6457, 237.7949, 239.9572, 242.1326, 244.3212, 246.5230,
+	    248.7380, 250.9663, 253.2079, 255.4627, 257.7310, 260.0126, 262.3076, 264.6160,
+	    266.9379, 269.2733, 271.6222, 273.9846, 276.3607, 278.7503, 281.1536, 283.5705,
+	    286.0011, 288.4454, 290.9035, 293.3754, 295.8610, 298.3605, 300.8739, 303.4011,
+	    305.9422, 308.4973, 311.0663, 313.6493, 316.2463, 318.8574, 321.4825, 324.1217,
+	    326.7751, 329.4426, 332.1242, 334.8201, 337.5301, 340.2544, 342.9930, 345.7459,
+	    348.5131, 351.2947, 354.0906, 356.9009, 359.7256, 362.5648, 365.4184, 368.2866,
+	    371.1692, 374.0664, 376.9782, 379.9045, 382.8454, 385.8010}; // -5.05 to +5.05 sigma: qchisq(pnorm(seq(-5.05,5.05,length.out=102)),255)
+	unsigned long total;
+	double chsq;
+	int i;
+
+	for (total = i = 0; i < 256; i++) {
+		total += occurrences[i];
+	}
+	if (total / 256. < 5) {
+		return ZSCORE_SPARSE;
+	}
+
+	for (chsq = i = 0; i < 256; i++) {
+		chsq += pow(occurrences[i] - total / 256., 2) * 256. / total;
+	}
+
+	if (chsq <= quantiles[0]) {
+		return ZSCORE_BIGNEG;
+	}
+	for (i = 1; i < 102; i++) {
+		if (chsq <= quantiles[i]) {
+			return (i - 51) / 10.0;
+		}
+	}
+	return ZSCORE_BIGPOS;
+}
+//
+// convenience function for statistics reporting
+void OQS_RAND_report_statistics(const unsigned long occurrences[256], const char *indent) {
+	double zscore = OQS_RAND_zscore_deviation_from_uniform(occurrences);
+	printf("%sStatistical distance from uniform: %12.10f\n", indent, OQS_RAND_test_statistical_distance_from_uniform(occurrences));
+	printf("%s   Z-score deviation from uniform: ", indent);
+	if (zscore == ZSCORE_BIGNEG) {
+		printf("less than -5.0 sigma ***\n");
+	} else if (zscore == ZSCORE_BIGPOS) {
+		printf("more than +5.0 sigma ***\n");
+	} else if (zscore == ZSCORE_SPARSE) {
+		printf("(too few data)\n");
+	} else {
+		printf("about %.1f sigma\n", zscore);
+	}
+	return;
+}
+
+int OQS_RAND_get_system_entropy(uint8_t *buf, size_t n) {
+	int result = 0;
+
+#if !defined(WINDOWS)
+	int fd = 0;
+#endif
+
+	if (!buf) {
+		goto err;
+	}
+
+#if defined(WINDOWS)
+	HCRYPTPROV hCryptProv;
+	if (!CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT) ||
+	    !CryptGenRandom(hCryptProv, (DWORD) n, buf)) {
+		goto err;
+	}
+#else
+	fd = open("/dev/urandom", O_RDONLY);
+	if (fd <= 0) {
+		goto err;
+	}
+	size_t r = read(fd, buf, n);
+	if (r != n) {
+		goto err;
+	}
+#endif
+	result = 1;
+
+err:
+#if !defined(WINDOWS)
+	if (fd > 0) {
+		close(fd);
+	}
+#endif
+
+	return result;
+}
diff --git a/crypt/liboqs/crypto/rand/rand.h b/crypt/liboqs/crypto/rand/rand.h
new file mode 100644
index 0000000000000000000000000000000000000000..710e449273cee3b5ea0880fc0b8033e16bdade83
--- /dev/null
+++ b/crypt/liboqs/crypto/rand/rand.h
@@ -0,0 +1,98 @@
+/**
+ * \file rand.h
+ * \brief Header defining the generic OQS PRNG
+ */
+
+#ifndef __OQS_RAND_H
+#define __OQS_RAND_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+enum OQS_RAND_alg_name {
+	OQS_RAND_alg_default,
+	OQS_RAND_alg_urandom_chacha20,
+	OQS_RAND_alg_urandom_aesctr,
+};
+
+typedef struct OQS_RAND OQS_RAND;
+
+/**
+ * OQS PRNG object
+ */
+struct OQS_RAND {
+
+	/**
+	 * Specifies the name of the random number function
+	 */
+	char *method_name;
+
+	/**
+	 * Estimated number of bits of security provided against a classical
+	 * attacker
+	 */
+	uint16_t estimated_classical_security;
+
+	/**
+	 * Estimated number of bits of security provided against a quantum
+	 * attacker
+	 */
+	uint16_t estimated_quantum_security;
+
+	/**
+	 * Pointer for storing the state of the PRNG
+	 */
+	void *ctx;
+
+	/**
+	 * Function which returns an 8-bit random unsigned integer
+	 */
+	uint8_t (*rand_8)(OQS_RAND *r);
+
+	/**
+	 * Function which returns an 32-bit random unsigned integer
+	 */
+	uint32_t (*rand_32)(OQS_RAND *r);
+
+	/**
+	 * Function which returns an 64-bit random unsigned integer
+	 */
+	uint64_t (*rand_64)(OQS_RAND *r);
+
+	/**
+	 * Function which generates n random 8-bit unsigned integers
+	 *
+	 * @param out : pointer to an array large enough to store the output integers (\f$\text{size} \geq n\f$)
+	 * @param n : number of integers to generate
+	 */
+	void (*rand_n)(OQS_RAND *r, uint8_t *out, size_t n);
+
+	/**
+	 * Pointer to a function for freeing the allocated key exchange structure
+	 *
+	 * @param k : Key exchange structure
+	 *
+	 */
+	void (*free)(OQS_RAND *r);
+};
+
+OQS_RAND *OQS_RAND_new(enum OQS_RAND_alg_name alg_name);
+
+uint8_t OQS_RAND_8(OQS_RAND *r);
+uint32_t OQS_RAND_32(OQS_RAND *r);
+uint64_t OQS_RAND_64(OQS_RAND *r);
+void OQS_RAND_n(OQS_RAND *r, uint8_t *out, size_t n);
+
+void OQS_RAND_free(OQS_RAND *r);
+
+void OQS_RAND_test_record_occurrence(const unsigned char b, unsigned long occurrences[256]);
+double OQS_RAND_test_statistical_distance_from_uniform(const unsigned long occurrences[256]);
+
+#define ZSCORE_SPARSE (999.999)
+#define ZSCORE_BIGNEG (-100.0)
+#define ZSCORE_BIGPOS (+100.0)
+double OQS_RAND_zscore_deviation_from_uniform(const unsigned long occurrences[256]);
+void OQS_RAND_report_statistics(const unsigned long occurrences[256], const char *indent);
+
+int OQS_RAND_get_system_entropy(uint8_t *buf, size_t n);
+#endif
diff --git a/crypt/liboqs/crypto/rand/test_rand.c b/crypt/liboqs/crypto/rand/test_rand.c
new file mode 100644
index 0000000000000000000000000000000000000000..253a8e8ba17d3f0f059e17ae93ae94b1cb6277a3
--- /dev/null
+++ b/crypt/liboqs/crypto/rand/test_rand.c
@@ -0,0 +1,197 @@
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <oqs/rand.h>
+
+#include "../../common/common.h"
+
+struct rand_testcase {
+	enum OQS_RAND_alg_name alg_name;
+};
+
+/* Add new testcases here */
+struct rand_testcase rand_testcases[] = {
+    {OQS_RAND_alg_urandom_chacha20},
+    {OQS_RAND_alg_urandom_aesctr},
+};
+
+#define RAND_TEST_ITERATIONS 10000000L
+
+static void rand_test_distribution_8(OQS_RAND *rand, unsigned long occurrences[256], int iterations) {
+	uint8_t b;
+	for (int i = 0; i < iterations; i++) {
+		b = OQS_RAND_8(rand);
+		OQS_RAND_test_record_occurrence(b, occurrences);
+	}
+}
+
+static void rand_test_distribution_32(OQS_RAND *rand, unsigned long occurrences[256], int iterations) {
+	uint32_t x;
+	for (int i = 0; i < iterations; i++) {
+		x = OQS_RAND_32(rand);
+		uint8_t b;
+		for (size_t j = 0; j < sizeof(uint32_t); j++) {
+			b = (x >> j) & 0xFF;
+			OQS_RAND_test_record_occurrence(b, occurrences);
+		}
+	}
+}
+
+static void rand_test_distribution_64(OQS_RAND *rand, unsigned long occurrences[256], int iterations) {
+	uint64_t x;
+	for (int i = 0; i < iterations; i++) {
+		x = OQS_RAND_64(rand);
+		uint8_t b;
+		for (size_t j = 0; j < sizeof(uint64_t); j++) {
+			b = (x >> j) & 0xFF;
+			OQS_RAND_test_record_occurrence(b, occurrences);
+		}
+	}
+}
+
+static int rand_test_distribution_n(OQS_RAND *rand, unsigned long occurrences[256], int len) {
+	uint8_t *x = malloc(len);
+	if (x == NULL) {
+		return 0;
+	}
+	OQS_RAND_n(rand, x, len);
+	for (int i = 0; i < len; i++) {
+		OQS_RAND_test_record_occurrence(x[i], occurrences);
+	}
+	free(x);
+	return 1;
+}
+
+#define PRINT_HEX_STRING(label, str, len)                        \
+	{                                                            \
+		printf("%-20s (%4zu bytes):  ", (label), (size_t)(len)); \
+		for (size_t i = 0; i < (len); i++) {                     \
+			printf("%02X", ((unsigned char *) (str))[i]);        \
+		}                                                        \
+		printf("\n");                                            \
+	}
+
+static int rand_test_distribution_wrapper(enum OQS_RAND_alg_name alg_name, int iterations, bool quiet) {
+
+	OQS_RAND *rand = OQS_RAND_new(alg_name);
+	if (rand == NULL) {
+		eprintf("rand is NULL\n");
+		return 0;
+	}
+
+	if (!quiet) {
+		printf("================================================================================\n");
+		printf("Sample outputs of PRNG %s\n", rand->method_name);
+		printf("================================================================================\n");
+
+		uint8_t x[256];
+		OQS_RAND_n(rand, x, 256);
+		PRINT_HEX_STRING("OQS_RAND_n, n = 256", x, 256)
+
+		uint8_t y8 = OQS_RAND_8(rand);
+		PRINT_HEX_STRING("OQS_RAND_8", (uint8_t *) &y8, sizeof(y8));
+		y8 = OQS_RAND_8(rand);
+		PRINT_HEX_STRING("OQS_RAND_8", (uint8_t *) &y8, sizeof(y8));
+
+		uint32_t y32 = OQS_RAND_32(rand);
+		PRINT_HEX_STRING("OQS_RAND_32", (uint8_t *) &y32, sizeof(y32));
+		y32 = OQS_RAND_32(rand);
+		PRINT_HEX_STRING("OQS_RAND_32", (uint8_t *) &y32, sizeof(y32));
+
+		uint64_t y64 = OQS_RAND_64(rand);
+		PRINT_HEX_STRING("OQS_RAND_64", (uint8_t *) &y64, sizeof(y64));
+		y64 = OQS_RAND_64(rand);
+		PRINT_HEX_STRING("OQS_RAND_64", (uint8_t *) &y64, sizeof(y64));
+
+		OQS_RAND_n(rand, x, 256);
+		PRINT_HEX_STRING("OQS_RAND_n, n = 256", x, 256)
+	}
+
+	printf("================================================================================\n");
+	printf("Testing distribution of PRNG %s\n", rand->method_name);
+	printf("================================================================================\n");
+
+	unsigned long occurrences[256];
+	for (int i = 0; i < 256; i++) {
+		occurrences[i] = 0;
+	}
+
+	printf("1-byte mode for %d iterations\n", 8 * iterations);
+	rand_test_distribution_8(rand, occurrences, 8 * iterations);
+	OQS_RAND_report_statistics(occurrences, "    ");
+
+	for (int i = 0; i < 256; i++) {
+		occurrences[i] = 0;
+	}
+
+	printf("4-byte mode for %d iterations\n", 2 * iterations);
+	rand_test_distribution_32(rand, occurrences, 2 * iterations);
+	OQS_RAND_report_statistics(occurrences, "    ");
+
+	for (int i = 0; i < 256; i++) {
+		occurrences[i] = 0;
+	}
+
+	printf("8-byte mode for %d iterations\n", iterations);
+	rand_test_distribution_64(rand, occurrences, iterations);
+	OQS_RAND_report_statistics(occurrences, "    ");
+
+	for (int i = 0; i < 256; i++) {
+		occurrences[i] = 0;
+	}
+
+	printf("n-byte mode for %d bytes\n", 8 * iterations);
+	rand_test_distribution_n(rand, occurrences, 8 * iterations);
+	OQS_RAND_report_statistics(occurrences, "    ");
+
+	OQS_RAND_free(rand);
+
+	return 1;
+}
+
+int main(int argc, char **argv) {
+
+	int success;
+	bool quiet = false;
+
+	for (int i = 1; i < argc; i++) {
+		if (argv[i][0] == '-') {
+			if (strcmp(argv[i], "--quiet") == 0 || strcmp(argv[i], "-q") == 0) {
+				quiet = true;
+			} else {
+				printf("Usage: ./test_rand [options]\n");
+				printf("\nOptions:\n");
+				printf("  --quiet, -q\n");
+				printf("    Less verbose output\n");
+				if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "-help") == 0) || (strcmp(argv[i], "--help") == 0)) {
+					return EXIT_SUCCESS;
+				} else {
+					return EXIT_FAILURE;
+				}
+			}
+		}
+	}
+
+	size_t rand_testcases_len = sizeof(rand_testcases) / sizeof(struct rand_testcase);
+	for (size_t i = 0; i < rand_testcases_len; i++) {
+		success = rand_test_distribution_wrapper(rand_testcases[i].alg_name, RAND_TEST_ITERATIONS, quiet);
+		if (success != 1) {
+			goto err;
+		}
+	}
+
+	success = 1;
+	goto cleanup;
+
+err:
+	success = 0;
+	eprintf("ERROR!\n");
+
+cleanup:
+
+	return (success == 1) ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/crypt/liboqs/crypto/rand_urandom_aesctr/Makefile.am b/crypt/liboqs/crypto/rand_urandom_aesctr/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..b587d3cb90341eea827d4747736ce15f8e062790
--- /dev/null
+++ b/crypt/liboqs/crypto/rand_urandom_aesctr/Makefile.am
@@ -0,0 +1,7 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = librandaesctr.la
+
+librandaesctr_la_SOURCES = rand_urandom_aesctr.c
+librandaesctr_la_CPPFLAGS = -I../../../include -I.
+librandaesctr_la_CPPFLAGS += $(AM_CPPFLAGS)
+
diff --git a/crypt/liboqs/crypto/rand_urandom_aesctr/rand_urandom_aesctr.c b/crypt/liboqs/crypto/rand_urandom_aesctr/rand_urandom_aesctr.c
new file mode 100644
index 0000000000000000000000000000000000000000..a10c41baf2253c0c66431da850b889cfd013a2df
--- /dev/null
+++ b/crypt/liboqs/crypto/rand_urandom_aesctr/rand_urandom_aesctr.c
@@ -0,0 +1,142 @@
+#include <sys/types.h>
+#if defined(WINDOWS)
+#include <windows.h>
+#include <Wincrypt.h>
+#else
+#include <strings.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#endif
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h> //memcpy
+
+#include <assert.h>
+#include <oqs/aes.h>
+#include <oqs/rand.h>
+#include <oqs/rand_urandom_aesctr.h>
+
+#if defined(WINDOWS)
+#define strdup _strdup // for strdup deprecation warning
+#endif
+
+typedef struct oqs_rand_urandom_aesctr_ctx {
+	uint64_t ctr;
+	void *schedule;
+	uint8_t cache[64];
+	size_t cache_next_byte;
+} oqs_rand_urandom_aesctr_ctx;
+
+static oqs_rand_urandom_aesctr_ctx *oqs_rand_urandom_aesctr_ctx_new() {
+	oqs_rand_urandom_aesctr_ctx *rand_ctx = NULL;
+	rand_ctx = (oqs_rand_urandom_aesctr_ctx *) malloc(sizeof(oqs_rand_urandom_aesctr_ctx));
+	if (rand_ctx == NULL) {
+		goto err;
+	}
+	uint8_t key[16];
+	if (!OQS_RAND_get_system_entropy(key, 16)) {
+		goto err;
+	}
+	OQS_AES128_load_schedule(key, &rand_ctx->schedule, 1);
+	rand_ctx->cache_next_byte = 64; // cache is empty
+	rand_ctx->ctr = 0;
+	goto okay;
+err:
+	if (rand_ctx) {
+		free(rand_ctx);
+	}
+	return NULL;
+okay:
+	return rand_ctx;
+}
+
+void OQS_RAND_urandom_aesctr_n(OQS_RAND *r, uint8_t *out, size_t n) {
+	oqs_rand_urandom_aesctr_ctx *rand_ctx = (oqs_rand_urandom_aesctr_ctx *) r->ctx;
+	const uint64_t num_full_blocks = n / 16;
+	uint64_t *half_blocks = (uint64_t *) out;
+	for (size_t i = 0; i < num_full_blocks; i++) {
+		half_blocks[2 * i] = rand_ctx->ctr++;
+		half_blocks[2 * i + 1] = rand_ctx->ctr++;
+	}
+	OQS_AES128_ECB_enc_sch(out, 16 * num_full_blocks, rand_ctx->schedule, out);
+	if (n % 16 > 0) {
+		uint8_t tmp_8[16];
+		uint64_t *tmp_64 = (uint64_t *) tmp_8;
+		tmp_64[0] = rand_ctx->ctr++;
+		tmp_64[1] = rand_ctx->ctr++;
+		OQS_AES128_ECB_enc_sch(tmp_8, 16, rand_ctx->schedule, tmp_8);
+		memcpy(out + 16 * num_full_blocks, tmp_8, n % 16);
+	}
+}
+
+static void OQS_RAND_urandom_aesctr_fill_cache(OQS_RAND *r) {
+	oqs_rand_urandom_aesctr_ctx *rand_ctx = (oqs_rand_urandom_aesctr_ctx *) r->ctx;
+	OQS_RAND_urandom_aesctr_n(r, rand_ctx->cache, sizeof(rand_ctx->cache));
+	rand_ctx->cache_next_byte = 0;
+}
+
+uint8_t OQS_RAND_urandom_aesctr_8(OQS_RAND *r) {
+	oqs_rand_urandom_aesctr_ctx *rand_ctx = (oqs_rand_urandom_aesctr_ctx *) r->ctx;
+	if (rand_ctx->cache_next_byte > sizeof(rand_ctx->cache) - 1) {
+		OQS_RAND_urandom_aesctr_fill_cache(r);
+	}
+	uint8_t out = rand_ctx->cache[rand_ctx->cache_next_byte];
+	rand_ctx->cache_next_byte += 1;
+	return out;
+}
+
+uint32_t OQS_RAND_urandom_aesctr_32(OQS_RAND *r) {
+	oqs_rand_urandom_aesctr_ctx *rand_ctx = (oqs_rand_urandom_aesctr_ctx *) r->ctx;
+	if (rand_ctx->cache_next_byte > sizeof(rand_ctx->cache) - 4) {
+		OQS_RAND_urandom_aesctr_fill_cache(r);
+	}
+	uint32_t out;
+	memcpy(&out, &rand_ctx->cache[rand_ctx->cache_next_byte], 4);
+	rand_ctx->cache_next_byte += 4;
+	return out;
+}
+
+uint64_t OQS_RAND_urandom_aesctr_64(OQS_RAND *r) {
+	oqs_rand_urandom_aesctr_ctx *rand_ctx = (oqs_rand_urandom_aesctr_ctx *) r->ctx;
+	if (rand_ctx->cache_next_byte > sizeof(rand_ctx->cache) - 8) {
+		OQS_RAND_urandom_aesctr_fill_cache(r);
+	}
+	uint64_t out;
+	memcpy(&out, &rand_ctx->cache[rand_ctx->cache_next_byte], 8);
+	rand_ctx->cache_next_byte += 8;
+	return out;
+}
+
+void OQS_RAND_urandom_aesctr_free(OQS_RAND *r) {
+	if (r) {
+		oqs_rand_urandom_aesctr_ctx *rand_ctx = (oqs_rand_urandom_aesctr_ctx *) r->ctx;
+		if (rand_ctx) {
+			OQS_AES128_free_schedule(rand_ctx->schedule);
+		}
+		free(r->ctx);
+		free(r->method_name);
+	}
+	free(r);
+}
+
+OQS_RAND *OQS_RAND_urandom_aesctr_new() {
+	OQS_RAND *r = malloc(sizeof(OQS_RAND));
+	if (r == NULL) {
+		return NULL;
+	}
+	r->method_name = strdup("urandom_aesctr");
+	r->ctx = oqs_rand_urandom_aesctr_ctx_new();
+	if (r->ctx == NULL || r->method_name == NULL) {
+		OQS_RAND_urandom_aesctr_free(r);
+		return NULL;
+	}
+	r->estimated_classical_security = 128;
+	r->estimated_quantum_security = 64; // Grover search
+	r->rand_8 = &OQS_RAND_urandom_aesctr_8;
+	r->rand_32 = &OQS_RAND_urandom_aesctr_32;
+	r->rand_64 = &OQS_RAND_urandom_aesctr_64;
+	r->rand_n = &OQS_RAND_urandom_aesctr_n;
+	r->free = &OQS_RAND_urandom_aesctr_free;
+	return r;
+}
diff --git a/crypt/liboqs/crypto/rand_urandom_aesctr/rand_urandom_aesctr.h b/crypt/liboqs/crypto/rand_urandom_aesctr/rand_urandom_aesctr.h
new file mode 100644
index 0000000000000000000000000000000000000000..d13df4f1f624071710662a588137b0fc00b58973
--- /dev/null
+++ b/crypt/liboqs/crypto/rand_urandom_aesctr/rand_urandom_aesctr.h
@@ -0,0 +1,23 @@
+/**
+ * \file rand_urandom_aesctr.h
+ * \brief Header for the chacha implementation of OQS_RAND
+ */
+
+#ifndef __OQS_RAND_URANDOM_AESCTR_H
+#define __OQS_RAND_URANDOM_AESCTR_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <oqs/rand.h>
+
+OQS_RAND *OQS_RAND_urandom_aesctr_new();
+
+uint8_t OQS_RAND_urandom_aesctr_8(OQS_RAND *r);
+uint32_t OQS_RAND_urandom_aesctr_32(OQS_RAND *r);
+uint64_t OQS_RAND_urandom_aesctr_64(OQS_RAND *r);
+void OQS_RAND_urandom_aesctr_n(OQS_RAND *r, uint8_t *out, size_t n);
+
+void OQS_RAND_urandom_aesctr_free(OQS_RAND *r);
+
+#endif
diff --git a/crypt/liboqs/crypto/rand_urandom_chacha20/Makefile.am b/crypt/liboqs/crypto/rand_urandom_chacha20/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..56f5aad7e71c60e43d0d2ef61dd012cb2e79ca89
--- /dev/null
+++ b/crypt/liboqs/crypto/rand_urandom_chacha20/Makefile.am
@@ -0,0 +1,8 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = librandchacha20.la
+
+librandchacha20_la_SOURCES = rand_urandom_chacha20.c
+
+librandchacha20_la_CPPFLAGS = -I../../../include -I.
+librandchacha20_la_CPPFLAGS += $(AM_CPPFLAGS)
+
diff --git a/crypt/liboqs/crypto/rand_urandom_chacha20/external/LICENSE.txt b/crypt/liboqs/crypto/rand_urandom_chacha20/external/LICENSE.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d21eeeb7a8dc655a6e6202844ffdc5c191b04fff
--- /dev/null
+++ b/crypt/liboqs/crypto/rand_urandom_chacha20/external/LICENSE.txt
@@ -0,0 +1 @@
+Public domain.
\ No newline at end of file
diff --git a/crypt/liboqs/crypto/rand_urandom_chacha20/external/chacha20.c b/crypt/liboqs/crypto/rand_urandom_chacha20/external/chacha20.c
new file mode 100644
index 0000000000000000000000000000000000000000..cc61d4c74aae7bda4ff9ee532a6bd8b99715b328
--- /dev/null
+++ b/crypt/liboqs/crypto/rand_urandom_chacha20/external/chacha20.c
@@ -0,0 +1,106 @@
+/* Adapted from chacha-ref.c version 20080118, D. J. Bernstein, Public domain.
+ * http://cr.yp.to/streamciphers/timings/estreambench/submissions/salsa20/chacha8/ref/chacha.c
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "ecrypt-portable.h"
+
+#define ROTATE(v, c) (ROTL32(v, c))
+#define XOR(v, w) ((v) ^ (w))
+#define PLUS(v, w) (U32V((v) + (w)))
+#define PLUSONE(v) (PLUS((v), 1))
+
+#define QUARTERROUND(a, b, c, d)        \
+	x[a] = PLUS(x[a], x[b]);            \
+	x[d] = ROTATE(XOR(x[d], x[a]), 16); \
+	x[c] = PLUS(x[c], x[d]);            \
+	x[b] = ROTATE(XOR(x[b], x[c]), 12); \
+	x[a] = PLUS(x[a], x[b]);            \
+	x[d] = ROTATE(XOR(x[d], x[a]), 8);  \
+	x[c] = PLUS(x[c], x[d]);            \
+	x[b] = ROTATE(XOR(x[b], x[c]), 7);
+
+static void salsa20_wordtobyte(u8 output[64], const u32 input[16]) {
+	u32 x[16];
+	int i;
+
+	for (i = 0; i < 16; ++i)
+		x[i] = input[i];
+	for (i = 8; i > 0; i -= 2) {
+		QUARTERROUND(0, 4, 8, 12)
+		QUARTERROUND(1, 5, 9, 13)
+		QUARTERROUND(2, 6, 10, 14)
+		QUARTERROUND(3, 7, 11, 15)
+		QUARTERROUND(0, 5, 10, 15)
+		QUARTERROUND(1, 6, 11, 12)
+		QUARTERROUND(2, 7, 8, 13)
+		QUARTERROUND(3, 4, 9, 14)
+	}
+	for (i = 0; i < 16; ++i)
+		x[i] = PLUS(x[i], input[i]);
+	for (i = 0; i < 16; ++i)
+		U32TO8_LITTLE(output + 4 * i, x[i]);
+}
+
+static const char sigma[16] = "expand 32-byte k";
+
+static void ECRYPT_keysetup(u32 input[16], const u8 k[32]) {
+	const char *constants;
+
+	input[4] = U8TO32_LITTLE(k + 0);
+	input[5] = U8TO32_LITTLE(k + 4);
+	input[6] = U8TO32_LITTLE(k + 8);
+	input[7] = U8TO32_LITTLE(k + 12);
+	k += 16;
+	constants = sigma;
+	input[8] = U8TO32_LITTLE(k + 0);
+	input[9] = U8TO32_LITTLE(k + 4);
+	input[10] = U8TO32_LITTLE(k + 8);
+	input[11] = U8TO32_LITTLE(k + 12);
+	input[0] = U8TO32_LITTLE(constants + 0);
+	input[1] = U8TO32_LITTLE(constants + 4);
+	input[2] = U8TO32_LITTLE(constants + 8);
+	input[3] = U8TO32_LITTLE(constants + 12);
+}
+
+static void ECRYPT_ivsetup(u32 input[16], const u8 iv[8]) {
+	input[12] = 0;
+	input[13] = 0;
+	input[14] = U8TO32_LITTLE(iv + 0);
+	input[15] = U8TO32_LITTLE(iv + 4);
+}
+
+static void ECRYPT_encrypt_bytes(u32 input[16], const u8 *m, u8 *c, size_t bytes) {
+	u8 output[64];
+	size_t i;
+
+	if (!bytes)
+		return;
+	for (;;) {
+		salsa20_wordtobyte(output, input);
+		input[12] = PLUSONE(input[12]);
+		if (!input[12]) {
+			input[13] = PLUSONE(input[13]);
+			/* stopping at 2^70 bytes per nonce is user's responsibility */
+		}
+		if (bytes <= 64) {
+			for (i = 0; i < bytes; ++i)
+				c[i] = m[i] ^ output[i];
+			return;
+		}
+		for (i = 0; i < 64; ++i)
+			c[i] = m[i] ^ output[i];
+		bytes -= 64;
+		c += 64;
+		m += 64;
+	}
+}
+
+static void ECRYPT_keystream_bytes(u32 input[16], u8 *stream, u32 bytes) {
+	u32 i;
+	for (i = 0; i < bytes; ++i)
+		stream[i] = 0;
+	ECRYPT_encrypt_bytes(input, stream, stream, bytes);
+}
diff --git a/crypt/liboqs/crypto/rand_urandom_chacha20/external/ecrypt-config.h b/crypt/liboqs/crypto/rand_urandom_chacha20/external/ecrypt-config.h
new file mode 100644
index 0000000000000000000000000000000000000000..6525f4af72aff68ffae19a422ea452934106ea76
--- /dev/null
+++ b/crypt/liboqs/crypto/rand_urandom_chacha20/external/ecrypt-config.h
@@ -0,0 +1,272 @@
+/* ecrypt-config.h */
+
+/* *** Normally, it should not be necessary to edit this file. *** */
+
+#ifndef ECRYPT_CONFIG
+#define ECRYPT_CONFIG
+
+/* ------------------------------------------------------------------------- */
+
+/* Guess the endianness of the target architecture. */
+
+/* 
+ * The LITTLE endian machines:
+ */
+#if defined(__ultrix) /* Older MIPS */
+#define ECRYPT_LITTLE_ENDIAN
+#elif defined(__alpha) /* Alpha */
+#define ECRYPT_LITTLE_ENDIAN
+#elif defined(i386) /* x86 (gcc) */
+#define ECRYPT_LITTLE_ENDIAN
+#elif defined(__i386) /* x86 (gcc) */
+#define ECRYPT_LITTLE_ENDIAN
+#elif defined(_M_IX86) /* x86 (MSC, Borland) */
+#define ECRYPT_LITTLE_ENDIAN
+#elif defined(_MSC_VER) /* x86 (surely MSC) */
+#define ECRYPT_LITTLE_ENDIAN
+#elif defined(__INTEL_COMPILER) /* x86 (surely Intel compiler icl.exe) */
+#define ECRYPT_LITTLE_ENDIAN
+
+/* 
+ * The BIG endian machines: 
+ */
+#elif defined(sun) /* Newer Sparc's */
+#define ECRYPT_BIG_ENDIAN
+#elif defined(__ppc__) /* PowerPC */
+#define ECRYPT_BIG_ENDIAN
+
+/* 
+ * Finally machines with UNKNOWN endianness:
+ */
+#elif defined(_AIX) /* RS6000 */
+#define ECRYPT_UNKNOWN
+#elif defined(__hpux) /* HP-PA */
+#define ECRYPT_UNKNOWN
+#elif defined(__aux) /* 68K */
+#define ECRYPT_UNKNOWN
+#elif defined(__dgux) /* 88K (but P6 in latest boxes) */
+#define ECRYPT_UNKNOWN
+#elif defined(__sgi) /* Newer MIPS */
+#define ECRYPT_UNKNOWN
+#else /* Any other processor */
+#define ECRYPT_UNKNOWN
+#endif
+
+/* ------------------------------------------------------------------------- */
+
+/*
+ * Find minimal-width types to store 8-bit, 16-bit, 32-bit, and 64-bit
+ * integers.
+ *
+ * Note: to enable 64-bit types on 32-bit compilers, it might be
+ * necessary to switch from ISO C90 mode to ISO C99 mode (e.g., gcc
+ * -std=c99).
+ */
+
+#include <limits.h>
+
+/* --- check char --- */
+
+#if (UCHAR_MAX / 0xFU > 0xFU)
+#ifndef I8T
+#define I8T char
+#define U8C(v) (v##U)
+
+#if (UCHAR_MAX == 0xFFU)
+#define ECRYPT_I8T_IS_BYTE
+#endif
+
+#endif
+
+#if (UCHAR_MAX / 0xFFU > 0xFFU)
+#ifndef I16T
+#define I16T char
+#define U16C(v) (v##U)
+#endif
+
+#if (UCHAR_MAX / 0xFFFFU > 0xFFFFU)
+#ifndef I32T
+#define I32T char
+#define U32C(v) (v##U)
+#endif
+
+#if (UCHAR_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU)
+#ifndef I64T
+#define I64T char
+#define U64C(v) (v##U)
+#define ECRYPT_NATIVE64
+#endif
+
+#endif
+#endif
+#endif
+#endif
+
+/* --- check short --- */
+
+#if (USHRT_MAX / 0xFU > 0xFU)
+#ifndef I8T
+#define I8T short
+#define U8C(v) (v##U)
+
+#if (USHRT_MAX == 0xFFU)
+#define ECRYPT_I8T_IS_BYTE
+#endif
+
+#endif
+
+#if (USHRT_MAX / 0xFFU > 0xFFU)
+#ifndef I16T
+#define I16T short
+#define U16C(v) (v##U)
+#endif
+
+#if (USHRT_MAX / 0xFFFFU > 0xFFFFU)
+#ifndef I32T
+#define I32T short
+#define U32C(v) (v##U)
+#endif
+
+#if (USHRT_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU)
+#ifndef I64T
+#define I64T short
+#define U64C(v) (v##U)
+#define ECRYPT_NATIVE64
+#endif
+
+#endif
+#endif
+#endif
+#endif
+
+/* --- check int --- */
+
+#if (UINT_MAX / 0xFU > 0xFU)
+#ifndef I8T
+#define I8T int
+#define U8C(v) (v##U)
+
+#if (ULONG_MAX == 0xFFU)
+#define ECRYPT_I8T_IS_BYTE
+#endif
+
+#endif
+
+#if (UINT_MAX / 0xFFU > 0xFFU)
+#ifndef I16T
+#define I16T int
+#define U16C(v) (v##U)
+#endif
+
+#if (UINT_MAX / 0xFFFFU > 0xFFFFU)
+#ifndef I32T
+#define I32T int
+#define U32C(v) (v##U)
+#endif
+
+#if (UINT_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU)
+#ifndef I64T
+#define I64T int
+#define U64C(v) (v##U)
+#define ECRYPT_NATIVE64
+#endif
+
+#endif
+#endif
+#endif
+#endif
+
+/* --- check long --- */
+
+#if (ULONG_MAX / 0xFUL > 0xFUL)
+#ifndef I8T
+#define I8T long
+#define U8C(v) (v##UL)
+
+#if (ULONG_MAX == 0xFFUL)
+#define ECRYPT_I8T_IS_BYTE
+#endif
+
+#endif
+
+#if (ULONG_MAX / 0xFFUL > 0xFFUL)
+#ifndef I16T
+#define I16T long
+#define U16C(v) (v##UL)
+#endif
+
+#if (ULONG_MAX / 0xFFFFUL > 0xFFFFUL)
+#ifndef I32T
+#define I32T long
+#define U32C(v) (v##UL)
+#endif
+
+#if (ULONG_MAX / 0xFFFFFFFFUL > 0xFFFFFFFFUL)
+#ifndef I64T
+#define I64T long
+#define U64C(v) (v##UL)
+#define ECRYPT_NATIVE64
+#endif
+
+#endif
+#endif
+#endif
+#endif
+
+/* --- check long long --- */
+
+#ifdef ULLONG_MAX
+
+#if (ULLONG_MAX / 0xFULL > 0xFULL)
+#ifndef I8T
+#define I8T long long
+#define U8C(v) (v##ULL)
+
+#if (ULLONG_MAX == 0xFFULL)
+#define ECRYPT_I8T_IS_BYTE
+#endif
+
+#endif
+
+#if (ULLONG_MAX / 0xFFULL > 0xFFULL)
+#ifndef I16T
+#define I16T long long
+#define U16C(v) (v##ULL)
+#endif
+
+#if (ULLONG_MAX / 0xFFFFULL > 0xFFFFULL)
+#ifndef I32T
+#define I32T long long
+#define U32C(v) (v##ULL)
+#endif
+
+#if (ULLONG_MAX / 0xFFFFFFFFULL > 0xFFFFFFFFULL)
+#ifndef I64T
+#define I64T long long
+#define U64C(v) (v##ULL)
+#endif
+
+#endif
+#endif
+#endif
+#endif
+
+#endif
+
+/* --- check __int64 --- */
+
+#ifdef _UI64_MAX
+
+#if (_UI64_MAX / 0xFFFFFFFFui64 > 0xFFFFFFFFui64)
+#ifndef I64T
+#define I64T __int64
+#define U64C(v) (v##ui64)
+#endif
+
+#endif
+
+#endif
+
+/* ------------------------------------------------------------------------- */
+
+#endif
diff --git a/crypt/liboqs/crypto/rand_urandom_chacha20/external/ecrypt-portable.h b/crypt/liboqs/crypto/rand_urandom_chacha20/external/ecrypt-portable.h
new file mode 100644
index 0000000000000000000000000000000000000000..028ddf8e89d401f1fdb320706e6151cb59a5969d
--- /dev/null
+++ b/crypt/liboqs/crypto/rand_urandom_chacha20/external/ecrypt-portable.h
@@ -0,0 +1,295 @@
+/* ecrypt-portable.h */
+
+/*
+ * WARNING: the conversions defined below are implemented as macros,
+ * and should be used carefully. They should NOT be used with
+ * parameters which perform some action. E.g., the following two lines
+ * are not equivalent:
+ * 
+ *  1) ++x; y = ROTL32(x, n); 
+ *  2) y = ROTL32(++x, n);
+ */
+
+/*
+ * *** Please do not edit this file. ***
+ *
+ * The default macros can be overridden for specific architectures by
+ * editing 'ecrypt-machine.h'.
+ */
+
+#ifndef ECRYPT_PORTABLE
+#define ECRYPT_PORTABLE
+
+#include "ecrypt-config.h"
+
+/* ------------------------------------------------------------------------- */
+
+/*
+ * The following types are defined (if available):
+ *
+ * u8:  unsigned integer type, at least 8 bits
+ * u16: unsigned integer type, at least 16 bits
+ * u32: unsigned integer type, at least 32 bits
+ * u64: unsigned integer type, at least 64 bits
+ *
+ * s8, s16, s32, s64 -> signed counterparts of u8, u16, u32, u64
+ *
+ * The selection of minimum-width integer types is taken care of by
+ * 'ecrypt-config.h'. Note: to enable 64-bit types on 32-bit
+ * compilers, it might be necessary to switch from ISO C90 mode to ISO
+ * C99 mode (e.g., gcc -std=c99).
+ */
+
+#ifdef I8T
+typedef signed I8T s8;
+typedef unsigned I8T u8;
+#endif
+
+#ifdef I16T
+typedef signed I16T s16;
+typedef unsigned I16T u16;
+#endif
+
+#ifdef I32T
+typedef signed I32T s32;
+typedef unsigned I32T u32;
+#endif
+
+#ifdef I64T
+typedef signed I64T s64;
+typedef unsigned I64T u64;
+#endif
+
+/*
+ * The following macros are used to obtain exact-width results.
+ */
+
+#define U8V(v) ((u8)(v) &U8C(0xFF))
+#define U16V(v) ((u16)(v) &U16C(0xFFFF))
+#define U32V(v) ((u32)(v) &U32C(0xFFFFFFFF))
+#define U64V(v) ((u64)(v) &U64C(0xFFFFFFFFFFFFFFFF))
+
+/* ------------------------------------------------------------------------- */
+
+/*
+ * The following macros return words with their bits rotated over n
+ * positions to the left/right.
+ */
+
+#define ECRYPT_DEFAULT_ROT
+
+#define ROTL8(v, n) \
+	(U8V((v) << (n)) | ((v) >> (8 - (n))))
+
+#define ROTL16(v, n) \
+	(U16V((v) << (n)) | ((v) >> (16 - (n))))
+
+#define ROTL32(v, n) \
+	(U32V((v) << (n)) | ((v) >> (32 - (n))))
+
+#define ROTL64(v, n) \
+	(U64V((v) << (n)) | ((v) >> (64 - (n))))
+
+#define ROTR8(v, n) ROTL8(v, 8 - (n))
+#define ROTR16(v, n) ROTL16(v, 16 - (n))
+#define ROTR32(v, n) ROTL32(v, 32 - (n))
+#define ROTR64(v, n) ROTL64(v, 64 - (n))
+
+/* ------------------------------------------------------------------------- */
+
+/*
+ * The following macros return a word with bytes in reverse order.
+ */
+
+#define ECRYPT_DEFAULT_SWAP
+
+#define SWAP16(v) \
+	ROTL16(v, 8)
+
+#define SWAP32(v)                        \
+	((ROTL32(v, 8) & U32C(0x00FF00FF)) | \
+	 (ROTL32(v, 24) & U32C(0xFF00FF00)))
+
+#ifdef ECRYPT_NATIVE64
+#define SWAP64(v)                                 \
+	((ROTL64(v, 8) & U64C(0x000000FF000000FF)) |  \
+	 (ROTL64(v, 24) & U64C(0x0000FF000000FF00)) | \
+	 (ROTL64(v, 40) & U64C(0x00FF000000FF0000)) | \
+	 (ROTL64(v, 56) & U64C(0xFF000000FF000000)))
+#else
+#define SWAP64(v) \
+	(((u64) SWAP32(U32V(v)) << 32) | (u64) SWAP32(U32V(v >> 32)))
+#endif
+
+#define ECRYPT_DEFAULT_WTOW
+
+#ifdef ECRYPT_LITTLE_ENDIAN
+#define U16TO16_LITTLE(v) (v)
+#define U32TO32_LITTLE(v) (v)
+#define U64TO64_LITTLE(v) (v)
+
+#define U16TO16_BIG(v) SWAP16(v)
+#define U32TO32_BIG(v) SWAP32(v)
+#define U64TO64_BIG(v) SWAP64(v)
+#endif
+
+#ifdef ECRYPT_BIG_ENDIAN
+#define U16TO16_LITTLE(v) SWAP16(v)
+#define U32TO32_LITTLE(v) SWAP32(v)
+#define U64TO64_LITTLE(v) SWAP64(v)
+
+#define U16TO16_BIG(v) (v)
+#define U32TO32_BIG(v) (v)
+#define U64TO64_BIG(v) (v)
+#endif
+
+/*
+ * The following macros load words from an array of bytes with
+ * different types of endianness, and vice versa.
+ */
+
+#define ECRYPT_DEFAULT_BTOW
+
+#if (!defined(ECRYPT_UNKNOWN) && defined(ECRYPT_I8T_IS_BYTE))
+
+#define U8TO16_LITTLE(p) U16TO16_LITTLE(((u16 *) (p))[0])
+#define U8TO32_LITTLE(p) U32TO32_LITTLE(((u32 *) (p))[0])
+#define U8TO64_LITTLE(p) U64TO64_LITTLE(((u64 *) (p))[0])
+
+#define U8TO16_BIG(p) U16TO16_BIG(((u16 *) (p))[0])
+#define U8TO32_BIG(p) U32TO32_BIG(((u32 *) (p))[0])
+#define U8TO64_BIG(p) U64TO64_BIG(((u64 *) (p))[0])
+
+#define U16TO8_LITTLE(p, v) (((u16 *) (p))[0] = U16TO16_LITTLE(v))
+#define U32TO8_LITTLE(p, v) (((u32 *) (p))[0] = U32TO32_LITTLE(v))
+#define U64TO8_LITTLE(p, v) (((u64 *) (p))[0] = U64TO64_LITTLE(v))
+
+#define U16TO8_BIG(p, v) (((u16 *) (p))[0] = U16TO16_BIG(v))
+#define U32TO8_BIG(p, v) (((u32 *) (p))[0] = U32TO32_BIG(v))
+#define U64TO8_BIG(p, v) (((u64 *) (p))[0] = U64TO64_BIG(v))
+
+#else
+
+#define U8TO16_LITTLE(p) \
+	(((u16)((p)[0])) |   \
+	 ((u16)((p)[1]) << 8))
+
+#define U8TO32_LITTLE(p)     \
+	(((u32)((p)[0])) |       \
+	 ((u32)((p)[1]) << 8) |  \
+	 ((u32)((p)[2]) << 16) | \
+	 ((u32)((p)[3]) << 24))
+
+#ifdef ECRYPT_NATIVE64
+#define U8TO64_LITTLE(p)     \
+	(((u64)((p)[0])) |       \
+	 ((u64)((p)[1]) << 8) |  \
+	 ((u64)((p)[2]) << 16) | \
+	 ((u64)((p)[3]) << 24) | \
+	 ((u64)((p)[4]) << 32) | \
+	 ((u64)((p)[5]) << 40) | \
+	 ((u64)((p)[6]) << 48) | \
+	 ((u64)((p)[7]) << 56))
+#else
+#define U8TO64_LITTLE(p) \
+	((u64) U8TO32_LITTLE(p) | ((u64) U8TO32_LITTLE((p) + 4) << 32))
+#endif
+
+#define U8TO16_BIG(p)       \
+	(((u16)((p)[0]) << 8) | \
+	 ((u16)((p)[1])))
+
+#define U8TO32_BIG(p)        \
+	(((u32)((p)[0]) << 24) | \
+	 ((u32)((p)[1]) << 16) | \
+	 ((u32)((p)[2]) << 8) |  \
+	 ((u32)((p)[3])))
+
+#ifdef ECRYPT_NATIVE64
+#define U8TO64_BIG(p)        \
+	(((u64)((p)[0]) << 56) | \
+	 ((u64)((p)[1]) << 48) | \
+	 ((u64)((p)[2]) << 40) | \
+	 ((u64)((p)[3]) << 32) | \
+	 ((u64)((p)[4]) << 24) | \
+	 ((u64)((p)[5]) << 16) | \
+	 ((u64)((p)[6]) << 8) |  \
+	 ((u64)((p)[7])))
+#else
+#define U8TO64_BIG(p) \
+	(((u64) U8TO32_BIG(p) << 32) | (u64) U8TO32_BIG((p) + 4))
+#endif
+
+#define U16TO8_LITTLE(p, v)     \
+	do {                        \
+		(p)[0] = U8V((v));      \
+		(p)[1] = U8V((v) >> 8); \
+	} while (0)
+
+#define U32TO8_LITTLE(p, v)      \
+	do {                         \
+		(p)[0] = U8V((v));       \
+		(p)[1] = U8V((v) >> 8);  \
+		(p)[2] = U8V((v) >> 16); \
+		(p)[3] = U8V((v) >> 24); \
+	} while (0)
+
+#ifdef ECRYPT_NATIVE64
+#define U64TO8_LITTLE(p, v)      \
+	do {                         \
+		(p)[0] = U8V((v));       \
+		(p)[1] = U8V((v) >> 8);  \
+		(p)[2] = U8V((v) >> 16); \
+		(p)[3] = U8V((v) >> 24); \
+		(p)[4] = U8V((v) >> 32); \
+		(p)[5] = U8V((v) >> 40); \
+		(p)[6] = U8V((v) >> 48); \
+		(p)[7] = U8V((v) >> 56); \
+	} while (0)
+#else
+#define U64TO8_LITTLE(p, v)                      \
+	do {                                         \
+		U32TO8_LITTLE((p), U32V((v)));           \
+		U32TO8_LITTLE((p) + 4, U32V((v) >> 32)); \
+	} while (0)
+#endif
+
+#define U16TO8_BIG(p, v)        \
+	do {                        \
+		(p)[0] = U8V((v));      \
+		(p)[1] = U8V((v) >> 8); \
+	} while (0)
+
+#define U32TO8_BIG(p, v)         \
+	do {                         \
+		(p)[0] = U8V((v) >> 24); \
+		(p)[1] = U8V((v) >> 16); \
+		(p)[2] = U8V((v) >> 8);  \
+		(p)[3] = U8V((v));       \
+	} while (0)
+
+#ifdef ECRYPT_NATIVE64
+#define U64TO8_BIG(p, v)         \
+	do {                         \
+		(p)[0] = U8V((v) >> 56); \
+		(p)[1] = U8V((v) >> 48); \
+		(p)[2] = U8V((v) >> 40); \
+		(p)[3] = U8V((v) >> 32); \
+		(p)[4] = U8V((v) >> 24); \
+		(p)[5] = U8V((v) >> 16); \
+		(p)[6] = U8V((v) >> 8);  \
+		(p)[7] = U8V((v));       \
+	} while (0)
+#else
+#define U64TO8_BIG(p, v)                  \
+	do {                                  \
+		U32TO8_BIG((p), U32V((v) >> 32)); \
+		U32TO8_BIG((p) + 4, U32V((v)));   \
+	} while (0)
+#endif
+
+#endif
+
+/* ------------------------------------------------------------------------- */
+
+#endif
diff --git a/crypt/liboqs/crypto/rand_urandom_chacha20/rand_urandom_chacha20.c b/crypt/liboqs/crypto/rand_urandom_chacha20/rand_urandom_chacha20.c
new file mode 100644
index 0000000000000000000000000000000000000000..6dbfcfaebe640f2b70fd5f605edae58bd307e4b8
--- /dev/null
+++ b/crypt/liboqs/crypto/rand_urandom_chacha20/rand_urandom_chacha20.c
@@ -0,0 +1,143 @@
+#if defined(WINDOWS)
+#pragma warning(disable : 4267)
+#endif
+
+#include <sys/types.h>
+#if defined(WINDOWS)
+#include <windows.h>
+#include <Wincrypt.h>
+#else
+#include <strings.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#endif
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <oqs/rand.h>
+#include <oqs/rand_urandom_chacha20.h>
+
+#include "external/chacha20.c"
+
+#if defined(WINDOWS)
+#define strdup _strdup // for strdup deprecation warning
+#endif
+
+typedef struct OQS_RAND_urandom_chacha20_ctx {
+	uint8_t key[32];
+	uint32_t nonce[2];
+	uint8_t cache[64];
+	size_t cache_next_byte;
+	uint32_t chacha20_input[16];
+} OQS_RAND_urandom_chacha20_ctx;
+
+static OQS_RAND_urandom_chacha20_ctx *OQS_RAND_urandom_chacha20_ctx_new();
+static void OQS_RAND_urandom_chacha20_fill_cache(OQS_RAND *r);
+static void OQS_RAND_urandom_chacha20_ctx_free(void *rand_ctx);
+
+OQS_RAND *OQS_RAND_urandom_chacha20_new() {
+	OQS_RAND *r = malloc(sizeof(OQS_RAND));
+	if (r == NULL) {
+		return NULL;
+	}
+	r->method_name = strdup("urandom_chacha20");
+	r->ctx = OQS_RAND_urandom_chacha20_ctx_new();
+	if (r->ctx == NULL || r->method_name == NULL) {
+		OQS_RAND_urandom_chacha20_free(r);
+		return NULL;
+	}
+	r->estimated_classical_security = 256;
+	r->estimated_quantum_security = 128; // Grover search
+	r->rand_8 = &OQS_RAND_urandom_chacha20_8;
+	r->rand_32 = &OQS_RAND_urandom_chacha20_32;
+	r->rand_64 = &OQS_RAND_urandom_chacha20_64;
+	r->rand_n = &OQS_RAND_urandom_chacha20_n;
+	r->free = &OQS_RAND_urandom_chacha20_free;
+	return r;
+}
+
+static OQS_RAND_urandom_chacha20_ctx *OQS_RAND_urandom_chacha20_ctx_new() {
+	OQS_RAND_urandom_chacha20_ctx *rand_ctx = NULL;
+	rand_ctx = (OQS_RAND_urandom_chacha20_ctx *) malloc(sizeof(OQS_RAND_urandom_chacha20_ctx));
+	if (rand_ctx == NULL) {
+		goto err;
+	}
+	if (!OQS_RAND_get_system_entropy(rand_ctx->key, 32)) {
+		goto err;
+	}
+	memset(rand_ctx->nonce, 0, 8);
+	rand_ctx->cache_next_byte = 64; // cache is empty
+	ECRYPT_keysetup(rand_ctx->chacha20_input, rand_ctx->key);
+	goto okay;
+err:
+	if (rand_ctx) {
+		free(rand_ctx);
+	}
+	return NULL;
+okay:
+	return rand_ctx;
+}
+
+static void OQS_RAND_urandom_chacha20_fill_cache(OQS_RAND *r) {
+	OQS_RAND_urandom_chacha20_ctx *rand_ctx = (OQS_RAND_urandom_chacha20_ctx *) r->ctx;
+	r->rand_n(r, rand_ctx->cache, 64);
+	rand_ctx->cache_next_byte = 0;
+}
+
+uint8_t OQS_RAND_urandom_chacha20_8(OQS_RAND *r) {
+	OQS_RAND_urandom_chacha20_ctx *rand_ctx = (OQS_RAND_urandom_chacha20_ctx *) r->ctx;
+	if (rand_ctx->cache_next_byte > 64 - 1) {
+		OQS_RAND_urandom_chacha20_fill_cache(r);
+	}
+	uint8_t out = rand_ctx->cache[rand_ctx->cache_next_byte];
+	rand_ctx->cache_next_byte += 1;
+	return out;
+}
+
+uint32_t OQS_RAND_urandom_chacha20_32(OQS_RAND *r) {
+	OQS_RAND_urandom_chacha20_ctx *rand_ctx = (OQS_RAND_urandom_chacha20_ctx *) r->ctx;
+	if (rand_ctx->cache_next_byte > 64 - 4) {
+		OQS_RAND_urandom_chacha20_fill_cache(r);
+	}
+	uint32_t out;
+	memcpy(&out, &rand_ctx->cache[rand_ctx->cache_next_byte], 4);
+	rand_ctx->cache_next_byte += 4;
+	return out;
+}
+
+uint64_t OQS_RAND_urandom_chacha20_64(OQS_RAND *r) {
+	OQS_RAND_urandom_chacha20_ctx *rand_ctx = (OQS_RAND_urandom_chacha20_ctx *) r->ctx;
+	if (rand_ctx->cache_next_byte > 64 - 8) {
+		OQS_RAND_urandom_chacha20_fill_cache(r);
+	}
+	uint64_t out;
+	memcpy(&out, &rand_ctx->cache[rand_ctx->cache_next_byte], 8);
+	rand_ctx->cache_next_byte += 8;
+	return out;
+}
+
+void OQS_RAND_urandom_chacha20_n(OQS_RAND *r, uint8_t *out, size_t n) {
+	OQS_RAND_urandom_chacha20_ctx *rand_ctx = (OQS_RAND_urandom_chacha20_ctx *) r->ctx;
+	rand_ctx->nonce[0]++;
+	if (rand_ctx->nonce[0] == 0) {
+		rand_ctx->nonce[1]++;
+	}
+	ECRYPT_ivsetup(rand_ctx->chacha20_input, (u8 *) rand_ctx->nonce);
+	ECRYPT_keystream_bytes(rand_ctx->chacha20_input, out, n);
+}
+
+static void OQS_RAND_urandom_chacha20_ctx_free(void *rand_ctx) {
+	free(rand_ctx);
+}
+
+void OQS_RAND_urandom_chacha20_free(OQS_RAND *r) {
+	if (r) {
+		OQS_RAND_urandom_chacha20_ctx_free(r->ctx);
+	}
+	if (r) {
+		free(r->method_name);
+	}
+	free(r);
+}
diff --git a/crypt/liboqs/crypto/rand_urandom_chacha20/rand_urandom_chacha20.h b/crypt/liboqs/crypto/rand_urandom_chacha20/rand_urandom_chacha20.h
new file mode 100644
index 0000000000000000000000000000000000000000..9617085187cd30d621c773dacb039ca00c44d607
--- /dev/null
+++ b/crypt/liboqs/crypto/rand_urandom_chacha20/rand_urandom_chacha20.h
@@ -0,0 +1,23 @@
+/**
+ * \file rand_urandom_chacha20.h
+ * \brief Header for the chacha implementation of OQS_RAND
+ */
+
+#ifndef __OQS_RAND_URANDOM_CHACHA20_H
+#define __OQS_RAND_URANDOM_CHACHA20_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <oqs/rand.h>
+
+OQS_RAND *OQS_RAND_urandom_chacha20_new();
+
+uint8_t OQS_RAND_urandom_chacha20_8(OQS_RAND *r);
+uint32_t OQS_RAND_urandom_chacha20_32(OQS_RAND *r);
+uint64_t OQS_RAND_urandom_chacha20_64(OQS_RAND *r);
+void OQS_RAND_urandom_chacha20_n(OQS_RAND *r, uint8_t *out, size_t n);
+
+void OQS_RAND_urandom_chacha20_free(OQS_RAND *r);
+
+#endif
diff --git a/crypt/liboqs/crypto/sha3/Makefile.am b/crypt/liboqs/crypto/sha3/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..0e8f3512506582d2d684ff606a6d079c83e209ca
--- /dev/null
+++ b/crypt/liboqs/crypto/sha3/Makefile.am
@@ -0,0 +1,8 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = libsha3.la
+
+libsha3_la_SOURCES = sha3.c
+
+libsha3_la_CPPFLAGS = -I../../../include -I.
+libsha3_la_CPPFLAGS += $(AM_CPPFLAGS)
+
diff --git a/crypt/liboqs/crypto/sha3/sha3.c b/crypt/liboqs/crypto/sha3/sha3.c
new file mode 100644
index 0000000000000000000000000000000000000000..1c229e6bcb52372f218e0761057946b703176173
--- /dev/null
+++ b/crypt/liboqs/crypto/sha3/sha3.c
@@ -0,0 +1,480 @@
+/* Based on the public domain implementation in
+ * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html
+ * by Ronny Van Keer
+ * and the public domain "TweetFips202" implementation
+ * from https://twitter.com/tweetfips202
+ * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe */
+
+#if defined(WINDOWS)
+#pragma warning(disable : 4244)
+#endif
+
+#include <assert.h>
+#include <oqs/sha3.h>
+#include <stdint.h>
+
+#define SHAKE128_RATE OQS_SHA3_SHAKE128_RATE
+#define SHA3_256_RATE OQS_SHA3_SHA3_256_RATE
+#define SHA3_512_RATE OQS_SHA3_SHA3_512_RATE
+#define NROUNDS 24
+#define ROL(a, offset) ((a << offset) ^ (a >> (64 - offset)))
+
+static uint64_t load64(const unsigned char *x) {
+	unsigned long long r = 0, i;
+
+	for (i = 0; i < 8; ++i) {
+		r |= (unsigned long long) x[i] << 8 * i;
+	}
+	return r;
+}
+
+static void store64(uint8_t *x, uint64_t u) {
+	unsigned int i;
+
+	for (i = 0; i < 8; ++i) {
+		x[i] = u;
+		u >>= 8;
+	}
+}
+
+static const uint64_t KeccakF_RoundConstants[NROUNDS] = {
+    (uint64_t) 0x0000000000000001ULL,
+    (uint64_t) 0x0000000000008082ULL,
+    (uint64_t) 0x800000000000808aULL,
+    (uint64_t) 0x8000000080008000ULL,
+    (uint64_t) 0x000000000000808bULL,
+    (uint64_t) 0x0000000080000001ULL,
+    (uint64_t) 0x8000000080008081ULL,
+    (uint64_t) 0x8000000000008009ULL,
+    (uint64_t) 0x000000000000008aULL,
+    (uint64_t) 0x0000000000000088ULL,
+    (uint64_t) 0x0000000080008009ULL,
+    (uint64_t) 0x000000008000000aULL,
+    (uint64_t) 0x000000008000808bULL,
+    (uint64_t) 0x800000000000008bULL,
+    (uint64_t) 0x8000000000008089ULL,
+    (uint64_t) 0x8000000000008003ULL,
+    (uint64_t) 0x8000000000008002ULL,
+    (uint64_t) 0x8000000000000080ULL,
+    (uint64_t) 0x000000000000800aULL,
+    (uint64_t) 0x800000008000000aULL,
+    (uint64_t) 0x8000000080008081ULL,
+    (uint64_t) 0x8000000000008080ULL,
+    (uint64_t) 0x0000000080000001ULL,
+    (uint64_t) 0x8000000080008008ULL};
+
+static void KeccakF1600_StatePermute(uint64_t *state) {
+	int round;
+
+	uint64_t Aba, Abe, Abi, Abo, Abu;
+	uint64_t Aga, Age, Agi, Ago, Agu;
+	uint64_t Aka, Ake, Aki, Ako, Aku;
+	uint64_t Ama, Ame, Ami, Amo, Amu;
+	uint64_t Asa, Ase, Asi, Aso, Asu;
+	uint64_t BCa, BCe, BCi, BCo, BCu;
+	uint64_t Da, De, Di, Do, Du;
+	uint64_t Eba, Ebe, Ebi, Ebo, Ebu;
+	uint64_t Ega, Ege, Egi, Ego, Egu;
+	uint64_t Eka, Eke, Eki, Eko, Eku;
+	uint64_t Ema, Eme, Emi, Emo, Emu;
+	uint64_t Esa, Ese, Esi, Eso, Esu;
+
+	//copyFromState(A, state)
+	Aba = state[0];
+	Abe = state[1];
+	Abi = state[2];
+	Abo = state[3];
+	Abu = state[4];
+	Aga = state[5];
+	Age = state[6];
+	Agi = state[7];
+	Ago = state[8];
+	Agu = state[9];
+	Aka = state[10];
+	Ake = state[11];
+	Aki = state[12];
+	Ako = state[13];
+	Aku = state[14];
+	Ama = state[15];
+	Ame = state[16];
+	Ami = state[17];
+	Amo = state[18];
+	Amu = state[19];
+	Asa = state[20];
+	Ase = state[21];
+	Asi = state[22];
+	Aso = state[23];
+	Asu = state[24];
+
+	for (round = 0; round < NROUNDS; round += 2) {
+		//    prepareTheta
+		BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa;
+		BCe = Abe ^ Age ^ Ake ^ Ame ^ Ase;
+		BCi = Abi ^ Agi ^ Aki ^ Ami ^ Asi;
+		BCo = Abo ^ Ago ^ Ako ^ Amo ^ Aso;
+		BCu = Abu ^ Agu ^ Aku ^ Amu ^ Asu;
+
+		//thetaRhoPiChiIotaPrepareTheta(round  , A, E)
+		Da = BCu ^ ROL(BCe, 1);
+		De = BCa ^ ROL(BCi, 1);
+		Di = BCe ^ ROL(BCo, 1);
+		Do = BCi ^ ROL(BCu, 1);
+		Du = BCo ^ ROL(BCa, 1);
+
+		Aba ^= Da;
+		BCa = Aba;
+		Age ^= De;
+		BCe = ROL(Age, 44);
+		Aki ^= Di;
+		BCi = ROL(Aki, 43);
+		Amo ^= Do;
+		BCo = ROL(Amo, 21);
+		Asu ^= Du;
+		BCu = ROL(Asu, 14);
+		Eba = BCa ^ ((~BCe) & BCi);
+		Eba ^= (uint64_t) KeccakF_RoundConstants[round];
+		Ebe = BCe ^ ((~BCi) & BCo);
+		Ebi = BCi ^ ((~BCo) & BCu);
+		Ebo = BCo ^ ((~BCu) & BCa);
+		Ebu = BCu ^ ((~BCa) & BCe);
+
+		Abo ^= Do;
+		BCa = ROL(Abo, 28);
+		Agu ^= Du;
+		BCe = ROL(Agu, 20);
+		Aka ^= Da;
+		BCi = ROL(Aka, 3);
+		Ame ^= De;
+		BCo = ROL(Ame, 45);
+		Asi ^= Di;
+		BCu = ROL(Asi, 61);
+		Ega = BCa ^ ((~BCe) & BCi);
+		Ege = BCe ^ ((~BCi) & BCo);
+		Egi = BCi ^ ((~BCo) & BCu);
+		Ego = BCo ^ ((~BCu) & BCa);
+		Egu = BCu ^ ((~BCa) & BCe);
+
+		Abe ^= De;
+		BCa = ROL(Abe, 1);
+		Agi ^= Di;
+		BCe = ROL(Agi, 6);
+		Ako ^= Do;
+		BCi = ROL(Ako, 25);
+		Amu ^= Du;
+		BCo = ROL(Amu, 8);
+		Asa ^= Da;
+		BCu = ROL(Asa, 18);
+		Eka = BCa ^ ((~BCe) & BCi);
+		Eke = BCe ^ ((~BCi) & BCo);
+		Eki = BCi ^ ((~BCo) & BCu);
+		Eko = BCo ^ ((~BCu) & BCa);
+		Eku = BCu ^ ((~BCa) & BCe);
+
+		Abu ^= Du;
+		BCa = ROL(Abu, 27);
+		Aga ^= Da;
+		BCe = ROL(Aga, 36);
+		Ake ^= De;
+		BCi = ROL(Ake, 10);
+		Ami ^= Di;
+		BCo = ROL(Ami, 15);
+		Aso ^= Do;
+		BCu = ROL(Aso, 56);
+		Ema = BCa ^ ((~BCe) & BCi);
+		Eme = BCe ^ ((~BCi) & BCo);
+		Emi = BCi ^ ((~BCo) & BCu);
+		Emo = BCo ^ ((~BCu) & BCa);
+		Emu = BCu ^ ((~BCa) & BCe);
+
+		Abi ^= Di;
+		BCa = ROL(Abi, 62);
+		Ago ^= Do;
+		BCe = ROL(Ago, 55);
+		Aku ^= Du;
+		BCi = ROL(Aku, 39);
+		Ama ^= Da;
+		BCo = ROL(Ama, 41);
+		Ase ^= De;
+		BCu = ROL(Ase, 2);
+		Esa = BCa ^ ((~BCe) & BCi);
+		Ese = BCe ^ ((~BCi) & BCo);
+		Esi = BCi ^ ((~BCo) & BCu);
+		Eso = BCo ^ ((~BCu) & BCa);
+		Esu = BCu ^ ((~BCa) & BCe);
+
+		//    prepareTheta
+		BCa = Eba ^ Ega ^ Eka ^ Ema ^ Esa;
+		BCe = Ebe ^ Ege ^ Eke ^ Eme ^ Ese;
+		BCi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi;
+		BCo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso;
+		BCu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu;
+
+		//thetaRhoPiChiIotaPrepareTheta(round+1, E, A)
+		Da = BCu ^ ROL(BCe, 1);
+		De = BCa ^ ROL(BCi, 1);
+		Di = BCe ^ ROL(BCo, 1);
+		Do = BCi ^ ROL(BCu, 1);
+		Du = BCo ^ ROL(BCa, 1);
+
+		Eba ^= Da;
+		BCa = Eba;
+		Ege ^= De;
+		BCe = ROL(Ege, 44);
+		Eki ^= Di;
+		BCi = ROL(Eki, 43);
+		Emo ^= Do;
+		BCo = ROL(Emo, 21);
+		Esu ^= Du;
+		BCu = ROL(Esu, 14);
+		Aba = BCa ^ ((~BCe) & BCi);
+		Aba ^= (uint64_t) KeccakF_RoundConstants[round + 1];
+		Abe = BCe ^ ((~BCi) & BCo);
+		Abi = BCi ^ ((~BCo) & BCu);
+		Abo = BCo ^ ((~BCu) & BCa);
+		Abu = BCu ^ ((~BCa) & BCe);
+
+		Ebo ^= Do;
+		BCa = ROL(Ebo, 28);
+		Egu ^= Du;
+		BCe = ROL(Egu, 20);
+		Eka ^= Da;
+		BCi = ROL(Eka, 3);
+		Eme ^= De;
+		BCo = ROL(Eme, 45);
+		Esi ^= Di;
+		BCu = ROL(Esi, 61);
+		Aga = BCa ^ ((~BCe) & BCi);
+		Age = BCe ^ ((~BCi) & BCo);
+		Agi = BCi ^ ((~BCo) & BCu);
+		Ago = BCo ^ ((~BCu) & BCa);
+		Agu = BCu ^ ((~BCa) & BCe);
+
+		Ebe ^= De;
+		BCa = ROL(Ebe, 1);
+		Egi ^= Di;
+		BCe = ROL(Egi, 6);
+		Eko ^= Do;
+		BCi = ROL(Eko, 25);
+		Emu ^= Du;
+		BCo = ROL(Emu, 8);
+		Esa ^= Da;
+		BCu = ROL(Esa, 18);
+		Aka = BCa ^ ((~BCe) & BCi);
+		Ake = BCe ^ ((~BCi) & BCo);
+		Aki = BCi ^ ((~BCo) & BCu);
+		Ako = BCo ^ ((~BCu) & BCa);
+		Aku = BCu ^ ((~BCa) & BCe);
+
+		Ebu ^= Du;
+		BCa = ROL(Ebu, 27);
+		Ega ^= Da;
+		BCe = ROL(Ega, 36);
+		Eke ^= De;
+		BCi = ROL(Eke, 10);
+		Emi ^= Di;
+		BCo = ROL(Emi, 15);
+		Eso ^= Do;
+		BCu = ROL(Eso, 56);
+		Ama = BCa ^ ((~BCe) & BCi);
+		Ame = BCe ^ ((~BCi) & BCo);
+		Ami = BCi ^ ((~BCo) & BCu);
+		Amo = BCo ^ ((~BCu) & BCa);
+		Amu = BCu ^ ((~BCa) & BCe);
+
+		Ebi ^= Di;
+		BCa = ROL(Ebi, 62);
+		Ego ^= Do;
+		BCe = ROL(Ego, 55);
+		Eku ^= Du;
+		BCi = ROL(Eku, 39);
+		Ema ^= Da;
+		BCo = ROL(Ema, 41);
+		Ese ^= De;
+		BCu = ROL(Ese, 2);
+		Asa = BCa ^ ((~BCe) & BCi);
+		Ase = BCe ^ ((~BCi) & BCo);
+		Asi = BCi ^ ((~BCo) & BCu);
+		Aso = BCo ^ ((~BCu) & BCa);
+		Asu = BCu ^ ((~BCa) & BCe);
+	}
+
+	//copyToState(state, A)
+	state[0] = Aba;
+	state[1] = Abe;
+	state[2] = Abi;
+	state[3] = Abo;
+	state[4] = Abu;
+	state[5] = Aga;
+	state[6] = Age;
+	state[7] = Agi;
+	state[8] = Ago;
+	state[9] = Agu;
+	state[10] = Aka;
+	state[11] = Ake;
+	state[12] = Aki;
+	state[13] = Ako;
+	state[14] = Aku;
+	state[15] = Ama;
+	state[16] = Ame;
+	state[17] = Ami;
+	state[18] = Amo;
+	state[19] = Amu;
+	state[20] = Asa;
+	state[21] = Ase;
+	state[22] = Asi;
+	state[23] = Aso;
+	state[24] = Asu;
+
+#undef round
+}
+
+#include <string.h>
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+static void keccak_absorb(uint64_t *s,
+                          unsigned int r,
+                          const unsigned char *m, unsigned long long int mlen,
+                          unsigned char p) {
+	unsigned long long i;
+	unsigned char t[200];
+
+	for (i = 0; i < 25; ++i)
+		s[i] = 0;
+
+	while (mlen >= r) {
+		for (i = 0; i < r / 8; ++i)
+			s[i] ^= load64(m + 8 * i);
+
+		KeccakF1600_StatePermute(s);
+		mlen -= r;
+		m += r;
+	}
+
+	for (i = 0; i < r; ++i)
+		t[i] = 0;
+	for (i = 0; i < mlen; ++i)
+		t[i] = m[i];
+	t[i] = p;
+	t[r - 1] |= 128;
+	for (i = 0; i < r / 8; ++i)
+		s[i] ^= load64(t + 8 * i);
+}
+
+void OQS_SHA3_keccak_squeezeblocks(unsigned char *h, unsigned long long int nblocks,
+                                   uint64_t *s,
+                                   unsigned int r) {
+	unsigned int i;
+	while (nblocks > 0) {
+		KeccakF1600_StatePermute(s);
+		for (i = 0; i < (r >> 3); i++) {
+			store64(h + 8 * i, s[i]);
+		}
+		h += r;
+		nblocks--;
+	}
+}
+
+void OQS_SHA3_sha3256(unsigned char *output, const unsigned char *input, unsigned int inputByteLen) {
+	uint64_t s[25];
+	unsigned char t[SHA3_256_RATE];
+	int i;
+
+	keccak_absorb(s, SHA3_256_RATE, input, inputByteLen, 0x06);
+	OQS_SHA3_keccak_squeezeblocks(t, 1, s, SHA3_256_RATE);
+	for (i = 0; i < 32; i++)
+		output[i] = t[i];
+}
+
+void OQS_SHA3_sha3512(unsigned char *output, const unsigned char *input, unsigned int inputByteLen) {
+	uint64_t s[25];
+	unsigned char t[SHA3_512_RATE];
+	int i;
+	//TODO: not sure about 0x80
+	keccak_absorb(s, SHA3_512_RATE, input, inputByteLen, 0x80);
+	OQS_SHA3_keccak_squeezeblocks(t, 1, s, SHA3_512_RATE);
+	for (i = 0; i < 64; i++)
+		output[i] = t[i];
+}
+
+void OQS_SHA3_shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen) {
+	keccak_absorb(s, SHAKE128_RATE, input, inputByteLen, 0x1F);
+}
+
+void OQS_SHA3_shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s) {
+	OQS_SHA3_keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE);
+}
+
+void OQS_SHA3_shake128(unsigned char *output, unsigned long long outlen,
+                       const unsigned char *input, unsigned long long inlen) {
+	uint64_t s[25];
+	unsigned char t[SHAKE128_RATE];
+	unsigned long long nblocks = outlen / SHAKE128_RATE;
+	size_t i;
+
+	for (i = 0; i < 25; ++i)
+		s[i] = 0;
+
+	/* Absorb input */
+	keccak_absorb(s, SHAKE128_RATE, input, inlen, 0x1F);
+
+	/* Squeeze output */
+	OQS_SHA3_keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE);
+
+	output += nblocks * SHAKE128_RATE;
+	outlen -= nblocks * SHAKE128_RATE;
+
+	if (outlen) {
+		OQS_SHA3_keccak_squeezeblocks(t, 1, s, SHAKE128_RATE);
+		for (i = 0; i < outlen; i++)
+			output[i] = t[i];
+	}
+}
+
+void OQS_SHA3_cshake128_simple_absorb(uint64_t s[25],
+                                      uint16_t cstm, // 2-byte domain separator
+                                      const unsigned char *in, unsigned long long inlen) {
+	unsigned char *sep = (unsigned char *) s;
+	unsigned int i;
+
+	for (i = 0; i < 25; i++)
+		s[i] = 0;
+
+	/* Absorb customization (domain-separation) string */
+	sep[0] = 0x01;
+	sep[1] = 0xa8;
+	sep[2] = 0x01;
+	sep[3] = 0x00;
+	sep[4] = 0x01;
+	sep[5] = 16; // fixed bitlen of cstm
+	sep[6] = cstm & 0xff;
+	sep[7] = cstm >> 8;
+
+	KeccakF1600_StatePermute(s);
+
+	/* Absorb input */
+	keccak_absorb(s, SHAKE128_RATE, in, inlen, 0x04);
+}
+
+void OQS_SHA3_cshake128_simple_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s) {
+	OQS_SHA3_keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE);
+}
+
+void OQS_SHA3_cshake128_simple(unsigned char *output, unsigned long long outlen,
+                               uint16_t cstm, // 2-byte domain separator
+                               const unsigned char *in, unsigned long long inlen) {
+	uint64_t s[25];
+	unsigned char t[SHAKE128_RATE];
+	unsigned int i;
+
+	OQS_SHA3_cshake128_simple_absorb(s, cstm, in, inlen);
+
+	/* Squeeze output */
+	OQS_SHA3_keccak_squeezeblocks(output, outlen / SHAKE128_RATE, s, SHAKE128_RATE);
+	output += (outlen / SHAKE128_RATE) * SHAKE128_RATE;
+
+	if (outlen % SHAKE128_RATE) {
+		OQS_SHA3_keccak_squeezeblocks(t, 1, s, SHAKE128_RATE);
+		for (i = 0; i < outlen % SHAKE128_RATE; i++)
+			output[i] = t[i];
+	}
+}
diff --git a/crypt/liboqs/crypto/sha3/sha3.h b/crypt/liboqs/crypto/sha3/sha3.h
new file mode 100644
index 0000000000000000000000000000000000000000..5b02cb561c93c320cdd3e99ad4d6392a13e893f4
--- /dev/null
+++ b/crypt/liboqs/crypto/sha3/sha3.h
@@ -0,0 +1,35 @@
+/**
+ * \file sha3.h
+ * \brief Header defining the API for OQS SHA3
+ */
+
+#ifndef __OQS_SHA3_H
+#define __OQS_SHA3_H
+
+#include <stdint.h>
+
+#define OQS_SHA3_STATESIZE 25
+#define OQS_SHA3_SHAKE128_RATE 168
+#define OQS_SHA3_SHA3_256_RATE 136
+#define OQS_SHA3_SHA3_512_RATE 72
+
+void OQS_SHA3_keccak_squeezeblocks(unsigned char *h, unsigned long long int nblocks, uint64_t *s, unsigned int r);
+void OQS_SHA3_sha3256(unsigned char *output, const unsigned char *input, unsigned int inputByteLen);
+void OQS_SHA3_sha3512(unsigned char *output, const unsigned char *input, unsigned int inputByteLen);
+
+// SHAKE128
+void OQS_SHA3_shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen);
+void OQS_SHA3_shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s);
+void OQS_SHA3_shake128(unsigned char *output, unsigned long long outlen,
+                       const unsigned char *input, unsigned long long inlen);
+
+// cSHAKE128
+void OQS_SHA3_cshake128_simple_absorb(uint64_t *s,
+                                      uint16_t cstm, // 2-byte domain separator
+                                      const unsigned char *in, unsigned long long inlen);
+void OQS_SHA3_cshake128_simple_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s);
+void OQS_SHA3_cshake128_simple(unsigned char *output, unsigned long long outlen,
+                               uint16_t cstm, // 2-byte domain separator
+                               const unsigned char *in, unsigned long long inlen);
+
+#endif
diff --git a/crypt/liboqs/ds_benchmark.h b/crypt/liboqs/ds_benchmark.h
new file mode 100644
index 0000000000000000000000000000000000000000..fc9efb02bddf8c18971673af3838494711db610e
--- /dev/null
+++ b/crypt/liboqs/ds_benchmark.h
@@ -0,0 +1,240 @@
+/********************************************************************************************
+ * ds_benchmark.h: Macros for simple benchmarking of C code.
+ *
+ * See instructions for usage below.
+ * Software originally developed by Douglas Stebila.
+ * Most recent version at https://gist.github.com/dstebila/6980008ec98209ef6075
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * For more information, please refer to <http://unlicense.org>
+ ********************************************************************************************/
+
+/** \file ds_benchmark.h
+ * Macros for simple benchmarking of C code.
+ */
+
+#if 0
+/* example code: timing two operations */
+#include "ds_benchmark.h"
+...
+DEFINE_TIMER_VARIABLES
+INITIALIZE_TIMER
+START_TIMER
+// your operation here
+STOP_TIMER
+START_TIMER
+// another operation here
+STOP_TIMER
+FINALIZE_TIMER
+PRINT_TIME_HEADER
+PRINT_TIMER_AVG("my operation")
+PRINT_TIMER_FOOTER
+
+/* example code: average multiple runs, run for e.g. 30 seconds */
+#include "ds_benchmark.h"
+...
+PRINT_TIMER_HEADER
+TIME_OPERATION_SECONDS(MyFunction(myarg1, myarg2, ...), "my operation", 30)
+TIME_OPERATION_SECONDS(MyOtherFunction(myarg3), "my other operation", 30)
+PRINT_TIMER_FOOTER
+
+/* example code: average multiple runs, run for e.g. 100 iterations */
+#include "ds_benchmark.h"
+...
+PRINT_TIMER_HEADER
+TIME_OPERATION_ITERATIONS(MyFunction(myarg1, myarg2, ...), "my operation", 1000)
+TIME_OPERATION_ITERATIONS(MyOtherFunction(myarg3), "my other operation", 100)
+PRINT_TIMER_FOOTER
+
+/* For most accurate results:
+ *  - disable hyperthreading a.k.a. hardware multithreading
+ *    (Linux instructions: http://bench.cr.yp.to/supercop.html)
+ *    (Mac OS X instructions: Instruments -> Preferences -> CPUs -> uncheck "Hardware Multi-Threading"
+ *     http://forums.macrumors.com/showthread.php?t=1484684)
+ *  - disable TurboBoost
+ *    (Linux instructions: http://bench.cr.yp.to/supercop.html)
+ *    (Max OS X: use http://www.rugarciap.com/turbo-boost-switcher-for-os-x/)
+ *  - run when the computer is idle (e.g., shut down all other applications, disable network access if possible, ...)
+ */
+#endif
+
+#ifndef _DS_BENCHMARK_H
+#define _DS_BENCHMARK_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#if !defined(WINDOWS)
+#include <sys/time.h>
+#endif
+#include <math.h>
+#include <time.h>
+
+#if defined(WINDOWS)
+#include <Windows.h>
+
+int gettimeofday(struct timeval *tp, struct timezone *tzp) {
+	// Note: some broken versions only have 8 trailing zero's, the correct epoch has 9 trailing zero's
+	static const uint64_t EPOCH = ((uint64_t) 116444736000000000ULL);
+
+	SYSTEMTIME system_time;
+	FILETIME file_time;
+	uint64_t time;
+
+	GetSystemTime(&system_time);
+	SystemTimeToFileTime(&system_time, &file_time);
+	time = ((uint64_t) file_time.dwLowDateTime);
+	time += ((uint64_t) file_time.dwHighDateTime) << 32;
+	tp->tv_sec = (long) ((time - EPOCH) / 10000000L);
+	tp->tv_usec = (long) (system_time.wMilliseconds * 1000);
+	return 0;
+}
+#endif
+
+static uint64_t rdtsc(void) {
+#if defined(WINDOWS)
+	return __rdtsc();
+#elif defined(__aarch64__)
+	uint64_t x;
+	asm volatile("isb; mrs %0, cntvct_el0"
+	             : "=r"(x));
+	return x;
+#elif defined(__arm__)
+	struct timespec time;
+	clock_gettime(CLOCK_REALTIME, &time);
+	return (int64_t)(time.tv_sec * 1e9 + time.tv_nsec);
+#else
+	uint64_t x;
+	__asm__ volatile(".byte 0x0f, 0x31"
+	                 : "=A"(x));
+	return x;
+#endif
+}
+
+#define DEFINE_TIMER_VARIABLES                                                                              \
+	volatile uint64_t _bench_cycles_start, _bench_cycles_end;                                               \
+	uint64_t _bench_cycles_cumulative = 0;                                                                  \
+	int64_t _bench_cycles_diff;                                                                             \
+	struct timeval _bench_timeval_start, _bench_timeval_end;                                                \
+	uint64_t _bench_iterations, _bench_time_cumulative;                                                     \
+	double _bench_cycles_x, _bench_cycles_mean, _bench_cycles_delta, _bench_cycles_M2, _bench_cycles_stdev; \
+	double _bench_time_x, _bench_time_mean, _bench_time_delta, _bench_time_M2, _bench_time_stdev;
+
+#define INITIALIZE_TIMER        \
+	_bench_iterations = 0;      \
+	_bench_cycles_mean = 0.0;   \
+	_bench_cycles_M2 = 0.0;     \
+	_bench_time_cumulative = 0; \
+	_bench_time_mean = 0.0;     \
+	_bench_time_M2 = 0.0;
+
+#define START_TIMER                            \
+	gettimeofday(&_bench_timeval_start, NULL); \
+	_bench_cycles_start = rdtsc();
+
+// Mean and population standard deviation are calculated in an online way using the algorithm in
+//     http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm
+#define STOP_TIMER                                                                                                                                                          \
+	_bench_cycles_end = rdtsc();                                                                                                                                            \
+	gettimeofday(&_bench_timeval_end, NULL);                                                                                                                                \
+	_bench_iterations += 1;                                                                                                                                                 \
+	if (_bench_cycles_end < _bench_cycles_start) {                                                                                                                          \
+		_bench_cycles_end += (uint64_t) 1 << 32;                                                                                                                            \
+	}                                                                                                                                                                       \
+	_bench_cycles_diff = _bench_cycles_end;                                                                                                                                 \
+	_bench_cycles_diff -= _bench_cycles_start;                                                                                                                              \
+	_bench_cycles_cumulative += _bench_cycles_diff;                                                                                                                         \
+	_bench_cycles_x = (double) (_bench_cycles_diff);                                                                                                                        \
+	_bench_cycles_delta = _bench_cycles_x - _bench_cycles_mean;                                                                                                             \
+	_bench_cycles_mean += _bench_cycles_delta / (double) _bench_iterations;                                                                                                 \
+	_bench_cycles_M2 += _bench_cycles_delta * (_bench_cycles_x - _bench_cycles_mean);                                                                                       \
+	_bench_time_x = (double) ((_bench_timeval_end.tv_sec * 1000000 + _bench_timeval_end.tv_usec) - (_bench_timeval_start.tv_sec * 1000000 + _bench_timeval_start.tv_usec)); \
+	_bench_time_delta = _bench_time_x - _bench_time_mean;                                                                                                                   \
+	_bench_time_mean += _bench_time_delta / (double) _bench_iterations;                                                                                                     \
+	_bench_time_M2 += _bench_time_delta * (_bench_time_x - _bench_time_mean);                                                                                               \
+	_bench_time_cumulative += _bench_time_x;
+
+#define FINALIZE_TIMER                                                             \
+	if (_bench_iterations == 2) {                                                  \
+		_bench_cycles_stdev = 0.0;                                                 \
+	} else {                                                                       \
+		_bench_cycles_stdev = sqrt(_bench_cycles_M2 / (double) _bench_iterations); \
+	}                                                                              \
+	if (_bench_iterations == 2) {                                                  \
+		_bench_time_stdev = 0.0;                                                   \
+	} else {                                                                       \
+		_bench_time_stdev = sqrt(_bench_time_M2 / (double) _bench_iterations);     \
+	}
+
+#define PRINT_CURRENT_TIME                                                                \
+	{                                                                                     \
+		char _bench_time_buff[20];                                                        \
+		time_t _bench_time_now = time(0);                                                 \
+		strftime(_bench_time_buff, 20, "%Y-%m-%d %H:%M:%S", localtime(&_bench_time_now)); \
+		printf("%s", _bench_time_buff);                                                   \
+	}
+
+#define PRINT_TIMER_HEADER                                                                                                                                                                            \
+	printf("Started at ");                                                                                                                                                                            \
+	PRINT_CURRENT_TIME                                                                                                                                                                                \
+	printf("\n");                                                                                                                                                                                     \
+	printf("%-30s | %10s | %14s | %15s | %10s | %16s | %10s\n", "Operation                     ", "Iterations", "Total time (s)", "Time (us): mean", "pop. stdev", "CPU cycles: mean", "pop. stdev"); \
+	printf("%-30s | %10s:| %14s:| %15s:| %10s:| %16s:| %10s:\n", "------------------------------", "----------", "--------------", "---------------", "----------", "----------------", "----------");
+/* colons are used in above to right-align cell contents in Markdown */
+
+#define PRINT_TIMER_FOOTER \
+	printf("Ended at ");   \
+	PRINT_CURRENT_TIME     \
+	printf("\n");
+
+#define PRINT_TIMER_AVG(op_name) \
+	printf("%-30s | %10" PRIu64 " | %14.3f | %15.3f | %10.3f | %16.0f | %10.0f\n", (op_name), _bench_iterations, ((double) _bench_time_cumulative) / 1000000.0, _bench_time_mean, _bench_time_stdev, ((double) _bench_cycles_cumulative) / (double) _bench_iterations, _bench_cycles_stdev);
+
+#define TIME_OPERATION_ITERATIONS(op, op_name, it) \
+	{                                              \
+		DEFINE_TIMER_VARIABLES                     \
+		INITIALIZE_TIMER                           \
+		for (int i = 0; i < (it); i++) {           \
+			START_TIMER { op; }                    \
+			STOP_TIMER                             \
+		}                                          \
+		FINALIZE_TIMER                             \
+		PRINT_TIMER_AVG(op_name)                   \
+	}
+
+#define TIME_OPERATION_SECONDS(op, op_name, secs)                 \
+	{                                                             \
+		DEFINE_TIMER_VARIABLES                                    \
+		INITIALIZE_TIMER                                          \
+		uint64_t _bench_time_goal_usecs = 1000000 * secs;         \
+		while (_bench_time_cumulative < _bench_time_goal_usecs) { \
+			START_TIMER { op; }                                   \
+			STOP_TIMER                                            \
+		}                                                         \
+		FINALIZE_TIMER                                            \
+		PRINT_TIMER_AVG(op_name)                                  \
+	}
+
+#endif
diff --git a/crypt/liboqs/kex/Makefile.am b/crypt/liboqs/kex/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..311a3126c7281036c5e115bc9f9fd3ec8cfe7346
--- /dev/null
+++ b/crypt/liboqs/kex/Makefile.am
@@ -0,0 +1,8 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = libkex.la
+
+libkex_la_SOURCES = kex.c
+
+libkex_la_CPPFLAGS = -I../../include
+libkex_la_CPPFLAGS += $(AM_CPPFLAGS)
+
diff --git a/crypt/liboqs/kex/kex.c b/crypt/liboqs/kex/kex.c
new file mode 100644
index 0000000000000000000000000000000000000000..539027c90424c4eab24b58a4274d6a19615dddab
--- /dev/null
+++ b/crypt/liboqs/kex/kex.c
@@ -0,0 +1,141 @@
+#include <assert.h>
+
+#include <oqs/kex.h>
+
+#include <oqs/kex_lwe_frodo.h>
+#include <oqs/kex_mlwe_kyber.h>
+#include <oqs/kex_ntru.h>
+#include <oqs/kex_rlwe_bcns15.h>
+#include <oqs/kex_rlwe_msrln16.h>
+#include <oqs/kex_rlwe_newhope.h>
+#include <oqs/kex_sidh_cln16.h>
+
+#ifdef ENABLE_CODE_MCBITS
+#include <oqs/kex_code_mcbits.h>
+#endif
+
+#ifdef ENABLE_SIDH_IQC_REF
+#include <oqs/kex_sidh_iqc_ref.h>
+#endif
+#ifdef ENABLE_KEX_RLWE_NEWHOPE_AVX2
+#include <oqs/kex_rlwe_newhope_avx2.h>
+#endif
+
+#define UNUSED(expr)   \
+	do {               \
+		(void) (expr); \
+	} while (0)
+
+OQS_KEX *OQS_KEX_new(OQS_RAND *rand, enum OQS_KEX_alg_name alg_name, const uint8_t *seed, const size_t seed_len, const char *named_parameters) {
+
+	//To disable warnings when the function arguments are not being used depending
+	//on which algorithm has been disabled
+	UNUSED(seed);
+	UNUSED(seed_len);
+	UNUSED(named_parameters);
+
+	switch (alg_name) {
+	case OQS_KEX_alg_default:
+		return OQS_KEX_rlwe_bcns15_new(rand);
+#ifdef ENABLE_KEX_LWE_FRODO
+	case OQS_KEX_alg_lwe_frodo:
+		return OQS_KEX_lwe_frodo_new_recommended(rand, seed, seed_len, named_parameters);
+#else
+		assert(0);
+#endif
+	case OQS_KEX_alg_code_mcbits:
+#ifdef ENABLE_CODE_MCBITS
+		return OQS_KEX_code_mcbits_new(rand);
+#else
+		assert(0);
+#endif
+#ifdef ENABLE_KEX_MLWE_KYBER
+	case OQS_KEX_alg_mlwe_kyber:
+		return OQS_KEX_mlwe_kyber_new(rand);
+#else
+		assert(0);
+#endif
+#ifndef DISABLE_NTRU_ON_WINDOWS_BY_DEFAULT
+#ifdef ENABLE_KEX_NTRU
+	case OQS_KEX_alg_ntru:
+		return OQS_KEX_ntru_new(rand);
+#else
+		assert(0);
+#endif
+#endif
+	case OQS_KEX_alg_rlwe_bcns15:
+		return OQS_KEX_rlwe_bcns15_new(rand);
+#ifdef ENABLE_KEX_RLWE_MSRLN16
+	case OQS_KEX_alg_rlwe_msrln16:
+		return OQS_KEX_rlwe_msrln16_new(rand);
+#else
+		assert(0);
+#endif
+#ifdef ENABLE_KEX_RLWE_NEWHOPE
+	case OQS_KEX_alg_rlwe_newhope:
+		return OQS_KEX_rlwe_newhope_new(rand);
+#else
+		assert(0);
+#endif
+#ifdef ENABLE_KEX_RLWE_NEWHOPE_AVX2
+	case OQS_KEX_alg_rlwe_newhope_avx2:
+		return OQS_KEX_rlwe_newhope_avx2_new(rand);
+#else
+		assert(0);
+#endif
+#ifdef ENABLE_KEX_SIDH_CLN16
+	case OQS_KEX_alg_sidh_cln16:
+		return OQS_KEX_sidh_cln16_new(rand, NULL);
+	case OQS_KEX_alg_sidh_cln16_compressed:
+		return OQS_KEX_sidh_cln16_new(rand, "compressedp751");
+#else
+		assert(0);
+#endif
+
+	case OQS_KEX_alg_sidh_iqc_ref:
+#ifdef ENABLE_SIDH_IQC_REF
+		return OQS_KEX_sidh_iqc_ref_new(rand, named_parameters);
+#else
+		assert(0);
+#endif
+	default:
+		assert(0);
+		return NULL;
+	}
+}
+
+int OQS_KEX_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) {
+	if (k == NULL) {
+		return 0;
+	} else {
+		return k->alice_0(k, alice_priv, alice_msg, alice_msg_len);
+	}
+}
+
+int OQS_KEX_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) {
+	if (k == NULL) {
+		return 0;
+	} else {
+		return k->bob(k, alice_msg, alice_msg_len, bob_msg, bob_msg_len, key, key_len);
+	}
+}
+
+int OQS_KEX_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) {
+	if (k == NULL) {
+		return 0;
+	} else {
+		return k->alice_1(k, alice_priv, bob_msg, bob_msg_len, key, key_len);
+	}
+}
+
+void OQS_KEX_alice_priv_free(OQS_KEX *k, void *alice_priv) {
+	if (k) {
+		k->alice_priv_free(k, alice_priv);
+	}
+}
+
+void OQS_KEX_free(OQS_KEX *k) {
+	if (k) {
+		k->free(k);
+	}
+}
diff --git a/crypt/liboqs/kex/kex.h b/crypt/liboqs/kex/kex.h
new file mode 100644
index 0000000000000000000000000000000000000000..0decdf874b9a1a815738832f183f4a0b6ab479d1
--- /dev/null
+++ b/crypt/liboqs/kex/kex.h
@@ -0,0 +1,163 @@
+/**
+ * \file kex.h
+ * \brief Header defining the API for generic OQS Key exchange
+ */
+
+#ifndef __OQS_KEX_H
+#define __OQS_KEX_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <oqs/rand.h>
+
+#if !defined(WINDOWS)
+#include <oqs/config.h>
+#endif
+
+enum OQS_KEX_alg_name {
+	OQS_KEX_alg_default,
+	OQS_KEX_alg_rlwe_bcns15,
+	OQS_KEX_alg_rlwe_newhope,
+	OQS_KEX_alg_rlwe_msrln16,
+	OQS_KEX_alg_lwe_frodo,
+	OQS_KEX_alg_sidh_cln16,
+	OQS_KEX_alg_sidh_cln16_compressed,
+	OQS_KEX_alg_code_mcbits,
+	OQS_KEX_alg_ntru,
+	OQS_KEX_alg_sidh_iqc_ref,
+	OQS_KEX_alg_mlwe_kyber,
+	OQS_KEX_alg_rlwe_newhope_avx2,
+};
+
+typedef struct OQS_KEX OQS_KEX;
+
+/**
+ * OQS key exchange object
+ */
+typedef struct OQS_KEX {
+
+	/**
+	 * PRNG
+	 */
+	OQS_RAND *rand;
+
+	/**
+	 * Specifies the name of the key exchange method
+	 */
+	char *method_name;
+
+	/**
+	 * Classical security in terms of the number of bits provided by the key
+	 * exchange method.
+	 */
+	uint16_t estimated_classical_security;
+
+	/**
+	 *  Equivalent quantum security in terms of the number of bits provided by the key
+	 *  exchange method.
+	 */
+	uint16_t estimated_quantum_security;
+
+	/**
+	 * An instance-specific seed, if any.
+	 */
+	uint8_t *seed;
+
+	/**
+	 * Size of instance-specific seed, if any.
+	 */
+	size_t seed_len;
+
+	/**
+	 * Named parameters for this key exchange method instance, if any.
+	 */
+	char *named_parameters;
+
+	/**
+	 * Opaque pointer for passing around instance-specific data
+	 */
+	void *params;
+
+	/**
+	 * Opaque pointer for passing around any computation context
+	 */
+	void *ctx;
+
+	/**
+	 * Pointer to a function for public and private key generation by Alice.
+	 *
+	 * @param k                Key exchange structure
+	 * @param alice_priv       Alice's private key
+	 * @param alice_msg        Alice's message (public key + optional additional data)
+	 * @param alice_msg_len    Alice's message length
+	 * @return                 1 on success, or 0 on failure
+	 */
+	int (*alice_0)(OQS_KEX *k, void **alive_priv, uint8_t **alice_msg, size_t *alice_msg_len);
+
+	/**
+	 * Pointer to a function for shared key generation by Bob.
+	 *
+	 * @param k                Key exchange structure
+	 * @param alice_msg        Alice's message (public key + optional additional data)
+	 * @param alice_msg_len    Alice's message length
+	 * @param bob_msg          Bob's message (public key / encryption of shared key + optional additional data)
+	 * @param bob_msg_len      Bob's message length
+	 * @param key              Shared key
+	 * @param key_len          Shared key length
+	 * @return                 1 on success, or 0 on failure
+	 */
+	int (*bob)(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len);
+
+	/**
+	 * Pointer to a function for shared key generation by Alice.
+	 *
+	 * @param k                Key exchange structure
+	 * @param alice_priv       Alice's private key
+	 * @param bob_msg          Bob's message (public key / encryption of shared key + optional additional data)
+	 * @param bob_msg_len      Bob's message length
+	 * @param key              Shared key
+	 * @param key_len          Shared key length
+	 * @return                 1 on success, or 0 on failure
+	 */
+	int (*alice_1)(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len);
+
+	/**
+	 * Pointer to a function for freeing Alice's private key
+	 *
+	 * @param k                Key exchange structure
+	 * @param alice_priv       Alice's private key
+	 */
+	void (*alice_priv_free)(OQS_KEX *k, void *alice_priv);
+
+	/**
+	 * Pointer to a function for freeing the allocated key exchange structure
+	 *
+	 * @param k                Key exchange structure
+	 */
+	void (*free)(OQS_KEX *k);
+
+} OQS_KEX;
+
+/**
+ * Allocate a new key exchange object.
+ *
+ * @param rand               Random number generator.
+ * @param alg_name           Algorithm to be instantiated
+ * @param seed               An instance-specific seed, if any, or NULL.
+ * @param seed_len           The length of seed, or 0.
+ * @param named_parameters   Name or description of method-specific parameters
+ *                           to use for this instance (as a NULL-terminated C string),
+ *                           if any, or NULL.
+ * @return                   The object on success, or NULL on failure.
+ */
+OQS_KEX *OQS_KEX_new(OQS_RAND *rand, enum OQS_KEX_alg_name alg_name, const uint8_t *seed, const size_t seed_len, const char *named_parameters);
+
+int OQS_KEX_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len);
+int OQS_KEX_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len);
+int OQS_KEX_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len);
+
+void OQS_KEX_alice_priv_free(OQS_KEX *k, void *alice_priv);
+void OQS_KEX_free(OQS_KEX *k);
+
+#endif
diff --git a/crypt/liboqs/kex/test_kex.c b/crypt/liboqs/kex/test_kex.c
new file mode 100644
index 0000000000000000000000000000000000000000..fbb32c42156be858251f3bbf03fbb61433d02604
--- /dev/null
+++ b/crypt/liboqs/kex/test_kex.c
@@ -0,0 +1,442 @@
+#if defined(WINDOWS)
+#pragma warning(disable : 4244 4293)
+#endif
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+#include "../ds_benchmark.h"
+#include "../common/common.h"
+
+struct kex_testcase {
+	enum OQS_KEX_alg_name alg_name;
+	unsigned char *seed;
+	size_t seed_len;
+	char *named_parameters;
+	char *id;
+	int run;
+	int iter;
+};
+
+/* Add new testcases here */
+struct kex_testcase kex_testcases[] = {
+#ifdef ENABLE_KEX_LWE_FRODO
+    {OQS_KEX_alg_lwe_frodo, (unsigned char *) "01234567890123456", 16, "recommended", "lwe_frodo_recommended", 0, 100},
+#endif
+#ifdef ENABLE_CODE_MCBITS
+    {OQS_KEX_alg_code_mcbits, NULL, 0, NULL, "code_mcbits", 0, 25},
+#endif
+#ifdef ENABLE_KEX_MLWE_KYBER
+    {OQS_KEX_alg_mlwe_kyber, NULL, 0, NULL, "mlwe_kyber", 0, 100},
+#endif
+#ifndef DISABLE_NTRU_ON_WINDOWS_BY_DEFAULT
+#ifdef ENABLE_KEX_NTRU
+    {OQS_KEX_alg_ntru, NULL, 0, NULL, "ntru", 0, 25},
+#endif
+#endif
+    {OQS_KEX_alg_rlwe_bcns15, NULL, 0, NULL, "rlwe_bcns15", 0, 100},
+#ifdef ENABLE_KEX_RLWE_MSRLN16
+    {OQS_KEX_alg_rlwe_msrln16, NULL, 0, NULL, "rlwe_msrln16", 0, 100},
+#endif
+#ifdef ENABLE_KEX_RLWE_NEWHOPE
+    {OQS_KEX_alg_rlwe_newhope, NULL, 0, NULL, "rlwe_newhope", 0, 100},
+#endif
+#ifdef ENABLE_KEX_SIDH_CLN16
+    {OQS_KEX_alg_sidh_cln16, NULL, 0, NULL, "sidh_cln16", 0, 10},
+    {OQS_KEX_alg_sidh_cln16_compressed, NULL, 0, NULL, "sidh_cln16_compressed", 0, 10},
+#endif
+#ifdef ENABLE_SIDH_IQC_REF
+    {OQS_KEX_alg_sidh_iqc_ref, NULL, 0, "params771", "sidh_iqc_ref", 0, 10},
+#endif
+#ifdef ENABLE_KEX_RLWE_NEWHOPE_AVX2
+    {OQS_KEX_alg_rlwe_newhope_avx2, NULL, 0, NULL, "rlwe_newhope_avx2", 0, 100},
+#endif
+
+};
+
+#define KEX_TEST_ITERATIONS 100
+#define KEX_BENCH_SECONDS_DEFAULT 1
+
+#define PRINT_HEX_STRING(label, str, len)                        \
+	{                                                            \
+		printf("%-20s (%4zu bytes):  ", (label), (size_t)(len)); \
+		for (size_t i = 0; i < (len); i++) {                     \
+			printf("%02X", ((unsigned char *) (str))[i]);        \
+		}                                                        \
+		printf("\n");                                            \
+	}
+
+static int kex_test_correctness(OQS_RAND *rand, enum OQS_KEX_alg_name alg_name, const uint8_t *seed, const size_t seed_len, const char *named_parameters, const int print, unsigned long occurrences[256]) {
+
+	OQS_KEX *kex = NULL;
+	int rc;
+
+	void *alice_priv = NULL;
+	uint8_t *alice_msg = NULL;
+	size_t alice_msg_len;
+	uint8_t *alice_key = NULL;
+	size_t alice_key_len;
+
+	uint8_t *bob_msg = NULL;
+	size_t bob_msg_len;
+	uint8_t *bob_key = NULL;
+	size_t bob_key_len;
+
+	/* setup KEX */
+	kex = OQS_KEX_new(rand, alg_name, seed, seed_len, named_parameters);
+	if (kex == NULL) {
+		eprintf("new_method failed\n");
+		goto err;
+	}
+
+	if (print) {
+		printf("================================================================================\n");
+		printf("Sample computation for key exchange method %s\n", kex->method_name);
+		printf("================================================================================\n");
+	}
+
+	/* Alice's initial message */
+	rc = OQS_KEX_alice_0(kex, &alice_priv, &alice_msg, &alice_msg_len);
+	if (rc != 1) {
+		eprintf("OQS_KEX_alice_0 failed\n");
+		goto err;
+	}
+
+	if (print) {
+		PRINT_HEX_STRING("Alice message", alice_msg, alice_msg_len)
+	}
+
+	/* Bob's response */
+	rc = OQS_KEX_bob(kex, alice_msg, alice_msg_len, &bob_msg, &bob_msg_len, &bob_key, &bob_key_len);
+	if (rc != 1) {
+		eprintf("OQS_KEX_bob failed\n");
+		goto err;
+	}
+
+	if (print) {
+		PRINT_HEX_STRING("Bob message", bob_msg, bob_msg_len)
+		PRINT_HEX_STRING("Bob session key", bob_key, bob_key_len)
+	}
+
+	/* Alice processes Bob's response */
+	rc = OQS_KEX_alice_1(kex, alice_priv, bob_msg, bob_msg_len, &alice_key, &alice_key_len);
+	if (rc != 1) {
+		eprintf("OQS_KEX_alice_1 failed\n");
+		goto err;
+	}
+
+	if (print) {
+		PRINT_HEX_STRING("Alice session key", alice_key, alice_key_len)
+	}
+
+	/* compare session key lengths and values */
+	if (alice_key_len != bob_key_len) {
+		eprintf("ERROR: Alice's session key and Bob's session key are different lengths (%zu vs %zu)\n", alice_key_len, bob_key_len);
+		goto err;
+	}
+	rc = memcmp(alice_key, bob_key, alice_key_len);
+	if (rc != 0) {
+		eprintf("ERROR: Alice's session key and Bob's session key are not equal\n");
+		PRINT_HEX_STRING("Alice session key", alice_key, alice_key_len)
+		PRINT_HEX_STRING("Bob session key", bob_key, bob_key_len)
+		goto err;
+	}
+	if (print) {
+		printf("Alice and Bob's session keys match.\n");
+		printf("\n\n");
+	}
+
+	/* record generated bytes for statistical analysis */
+	for (size_t i = 0; i < alice_key_len; i++) {
+		OQS_RAND_test_record_occurrence(alice_key[i], occurrences);
+	}
+
+	rc = 1;
+	goto cleanup;
+
+err:
+	rc = 0;
+
+cleanup:
+	free(alice_msg);
+	free(alice_key);
+	free(bob_msg);
+	free(bob_key);
+	OQS_KEX_alice_priv_free(kex, alice_priv);
+	OQS_KEX_free(kex);
+
+	return rc;
+}
+
+static int kex_test_correctness_wrapper(OQS_RAND *rand, enum OQS_KEX_alg_name alg_name, const uint8_t *seed, const size_t seed_len, const char *named_parameters, int iterations, bool quiet) {
+	OQS_KEX *kex = NULL;
+	int ret;
+
+	unsigned long occurrences[256];
+	for (int i = 0; i < 256; i++) {
+		occurrences[i] = 0;
+	}
+
+	ret = kex_test_correctness(rand, alg_name, seed, seed_len, named_parameters, quiet ? 0 : 1, occurrences);
+
+	if (ret != 1) {
+		goto err;
+	}
+
+	/* setup KEX */
+	kex = OQS_KEX_new(rand, alg_name, seed, seed_len, named_parameters);
+	if (kex == NULL) {
+		goto err;
+	}
+
+	printf("================================================================================\n");
+	printf("Testing correctness and randomness of key exchange method %s (params=%s) for %d iterations\n",
+	       kex->method_name, named_parameters, iterations);
+	printf("================================================================================\n");
+	for (int i = 0; i < iterations; i++) {
+		ret = kex_test_correctness(rand, alg_name, seed, seed_len, named_parameters, 0, occurrences);
+		if (ret != 1) {
+			goto err;
+		}
+	}
+	printf("All session keys matched.\n");
+	OQS_RAND_report_statistics(occurrences, "");
+	printf("\n\n");
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+
+cleanup:
+	OQS_KEX_free(kex);
+
+	return ret;
+}
+
+static void cleanup_alice_0(OQS_KEX *kex, void *alice_priv, uint8_t *alice_msg) {
+	free(alice_msg);
+	OQS_KEX_alice_priv_free(kex, alice_priv);
+}
+
+static void cleanup_bob(uint8_t *bob_msg, uint8_t *bob_key) {
+	free(bob_msg);
+	free(bob_key);
+}
+
+static int kex_bench_wrapper(OQS_RAND *rand, enum OQS_KEX_alg_name alg_name, const uint8_t *seed, const size_t seed_len, const char *named_parameters, const size_t seconds) {
+
+	OQS_KEX *kex = NULL;
+	int rc;
+
+	void *alice_priv = NULL;
+	uint8_t *alice_msg = NULL;
+	size_t alice_msg_len;
+	uint8_t *alice_key = NULL;
+	size_t alice_key_len;
+
+	uint8_t *bob_msg = NULL;
+	size_t bob_msg_len;
+	uint8_t *bob_key = NULL;
+	size_t bob_key_len;
+
+	/* setup KEX */
+	kex = OQS_KEX_new(rand, alg_name, seed, seed_len, named_parameters);
+	if (kex == NULL) {
+		eprintf("new_method failed\n");
+		goto err;
+	}
+	printf("%-30s | %10s | %14s | %15s | %10s | %16s | %10s\n", kex->method_name, "", "", "", "", "", "");
+
+	TIME_OPERATION_SECONDS({ OQS_KEX_alice_0(kex, &alice_priv, &alice_msg, &alice_msg_len); cleanup_alice_0(kex, alice_priv, alice_msg); }, "alice 0", seconds);
+
+	OQS_KEX_alice_0(kex, &alice_priv, &alice_msg, &alice_msg_len);
+	TIME_OPERATION_SECONDS({ OQS_KEX_bob(kex, alice_msg, alice_msg_len, &bob_msg, &bob_msg_len, &bob_key, &bob_key_len); cleanup_bob(bob_msg, bob_key); }, "bob", seconds);
+
+	OQS_KEX_bob(kex, alice_msg, alice_msg_len, &bob_msg, &bob_msg_len, &bob_key, &bob_key_len);
+	TIME_OPERATION_SECONDS({ OQS_KEX_alice_1(kex, alice_priv, bob_msg, bob_msg_len, &alice_key, &alice_key_len); free(alice_key); }, "alice 1", seconds);
+	alice_key = NULL;
+
+	printf("Communication (bytes): A->B: %zu, B->A: %zu, total: %zu; classical/quantum security bits [%u:%u] \n", alice_msg_len, bob_msg_len, alice_msg_len + bob_msg_len, kex->estimated_classical_security, kex->estimated_quantum_security);
+
+	rc = 1;
+	goto cleanup;
+
+err:
+	rc = 0;
+
+cleanup:
+	free(alice_msg);
+	free(alice_key);
+	free(bob_msg);
+	free(bob_key);
+	OQS_KEX_alice_priv_free(kex, alice_priv);
+	OQS_KEX_free(kex);
+
+	return rc;
+}
+
+static int kex_mem_bench_wrapper(OQS_RAND *rand, enum OQS_KEX_alg_name alg_name, const uint8_t *seed, const size_t seed_len, const char *named_parameters) {
+
+	OQS_KEX *kex = NULL;
+	int rc;
+
+	void *alice_priv = NULL;
+	uint8_t *alice_msg = NULL;
+	size_t alice_msg_len;
+	uint8_t *alice_key = NULL;
+	size_t alice_key_len;
+
+	uint8_t *bob_msg = NULL;
+	size_t bob_msg_len;
+	uint8_t *bob_key = NULL;
+	size_t bob_key_len;
+
+	kex = OQS_KEX_new(rand, alg_name, seed, seed_len, named_parameters);
+	if (kex == NULL) {
+		fprintf(stderr, "new_method failed\n");
+		goto err;
+	}
+
+	printf("running %s..\n", kex->method_name);
+
+	OQS_KEX_alice_0(kex, &alice_priv, &alice_msg, &alice_msg_len);
+	OQS_KEX_bob(kex, alice_msg, alice_msg_len, &bob_msg, &bob_msg_len, &bob_key, &bob_key_len);
+	OQS_KEX_alice_1(kex, alice_priv, bob_msg, bob_msg_len, &alice_key, &alice_key_len);
+
+	rc = 1;
+	goto cleanup;
+
+err:
+	rc = 0;
+
+cleanup:
+	free(alice_msg);
+	free(alice_key);
+	free(bob_msg);
+	free(bob_key);
+	OQS_KEX_alice_priv_free(kex, alice_priv);
+	OQS_KEX_free(kex);
+
+	return rc;
+}
+
+void print_help() {
+	printf("Usage: ./test_kex [options] [algorithms]\n");
+	printf("\nOptions:\n");
+	printf("  --quiet, -q\n");
+	printf("    Less verbose output\n");
+	printf("  --bench, -b\n");
+	printf("    Run benchmarks\n");
+	printf("  --seconds -s [SECONDS]\n");
+	printf("    Number of seconds to run benchmarks (default==%d)\n", KEX_BENCH_SECONDS_DEFAULT);
+	printf("  --mem-bench\n");
+	printf("    Run memory benchmarks (run once and allocate only what is required)\n");
+	printf("\nalgorithms:\n");
+	size_t kex_testcases_len = sizeof(kex_testcases) / sizeof(struct kex_testcase);
+	for (size_t i = 0; i < kex_testcases_len; i++) {
+		printf("  %s\n", kex_testcases[i].id);
+	}
+}
+
+int main(int argc, char **argv) {
+
+	int success = 1;
+	bool run_all = true;
+	bool quiet = false;
+	bool bench = false;
+	bool mem_bench = false;
+	size_t kex_testcases_len = sizeof(kex_testcases) / sizeof(struct kex_testcase);
+	size_t kex_bench_seconds = KEX_BENCH_SECONDS_DEFAULT;
+	for (int i = 1; i < argc; i++) {
+		if (argv[i][0] == '-') {
+			if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "-help") == 0) || (strcmp(argv[i], "--help") == 0)) {
+				print_help();
+				return EXIT_SUCCESS;
+			} else if (strcmp(argv[i], "--quiet") == 0 || strcmp(argv[i], "-q") == 0) {
+				quiet = true;
+			} else if (strcmp(argv[i], "--bench") == 0 || strcmp(argv[i], "-b") == 0) {
+				bench = true;
+			} else if (strcmp(argv[i], "--seconds") == 0 || strcmp(argv[i], "-s") == 0) {
+				if (++i == argc) {
+					print_help();
+					return EXIT_SUCCESS;
+				}
+				char *end;
+				int kex_bench_seconds_input = strtol(argv[i], &end, 10);
+				if (kex_bench_seconds_input < 1) {
+					print_help();
+					return EXIT_SUCCESS;
+				}
+				kex_bench_seconds = kex_bench_seconds_input;
+			} else if ((strcmp(argv[i], "--mem-bench") == 0 || strcmp(argv[i], "-m") == 0)) {
+				mem_bench = true;
+			}
+		} else {
+			run_all = false;
+			for (size_t j = 0; j < kex_testcases_len; j++) {
+				if (strcmp(argv[i], kex_testcases[j].id) == 0) {
+					kex_testcases[j].run = 1;
+				}
+			}
+		}
+	}
+
+	/* setup RAND */
+	OQS_RAND *rand = OQS_RAND_new(OQS_RAND_alg_urandom_chacha20);
+	if (rand == NULL) {
+		goto err;
+	}
+
+	if (mem_bench) {
+		for (size_t i = 0; i < kex_testcases_len; i++) {
+			if (run_all || kex_testcases[i].run == 1) {
+				success = kex_mem_bench_wrapper(rand, kex_testcases[i].alg_name, kex_testcases[i].seed, kex_testcases[i].seed_len, kex_testcases[i].named_parameters);
+			}
+			if (success != 1) {
+				goto err;
+			}
+		}
+		printf("memory benchmarks done, exiting..\n");
+		success = 1;
+		goto cleanup;
+	}
+
+	for (size_t i = 0; i < kex_testcases_len; i++) {
+		if (run_all || kex_testcases[i].run == 1) {
+			int num_iter = kex_testcases[i].iter;
+			success = kex_test_correctness_wrapper(rand, kex_testcases[i].alg_name, kex_testcases[i].seed, kex_testcases[i].seed_len, kex_testcases[i].named_parameters, num_iter, quiet);
+		}
+		if (success != 1) {
+			goto err;
+		}
+	}
+
+	if (bench) {
+		PRINT_TIMER_HEADER
+		for (size_t i = 0; i < kex_testcases_len; i++) {
+			if (run_all || kex_testcases[i].run == 1) {
+				kex_bench_wrapper(rand, kex_testcases[i].alg_name, kex_testcases[i].seed, kex_testcases[i].seed_len, kex_testcases[i].named_parameters, kex_bench_seconds);
+			}
+		}
+		PRINT_TIMER_FOOTER
+	}
+
+	success = 1;
+	goto cleanup;
+
+err:
+	success = 0;
+	eprintf("ERROR!\n");
+
+cleanup:
+	OQS_RAND_free(rand);
+
+	return (success == 1) ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/crypt/liboqs/kex_code_mcbits/LICENSE.txt b/crypt/liboqs/kex_code_mcbits/LICENSE.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd487c9b5f55fb037d92c8f0d509df66d4cf06fc
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/LICENSE.txt
@@ -0,0 +1,7 @@
+The files in this directory and its subdirectories (except kex_code_mcbits.*) 
+were originally written by Daniel J. Bernstein, Tung Chou, and Peter Schwabe
+(https://www.win.tue.nl/~tchou/mcbits/).
+
+According to the distribution website (https://www.win.tue.nl/~tchou/mcbits/):
+
+"The software is in the public domain."
diff --git a/crypt/liboqs/kex_code_mcbits/Makefile.am b/crypt/liboqs/kex_code_mcbits/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..2e29344b0db46d02c60f5b558286eeae5246676c
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/Makefile.am
@@ -0,0 +1,9 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = libmcbits.la
+
+libmcbits_la_SOURCES = external/operations.c kex_code_mcbits.c
+
+libmcbits_la_CPPFLAGS = -I../../include -I${SODIUM_DIR}/include
+
+libmcbits_la_CPPFLAGS += $(AM_CPPFLAGS)
+
diff --git a/crypt/liboqs/kex_code_mcbits/external/api.h b/crypt/liboqs/kex_code_mcbits/external/api.h
new file mode 100755
index 0000000000000000000000000000000000000000..5d9710852ac485320dc8d96507ac2a2daa75a355
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/api.h
@@ -0,0 +1,5 @@
+#define CRYPTO_SECRETKEYBYTES 5984
+#define CRYPTO_PUBLICKEYBYTES 311736
+#define CRYPTO_BYTES 109
+
+#define CRYPTO_VERSION "1.0"
diff --git a/crypt/liboqs/kex_code_mcbits/external/benes.c b/crypt/liboqs/kex_code_mcbits/external/benes.c
new file mode 100644
index 0000000000000000000000000000000000000000..053c4f933d748dafc84df8601ace733959aac2c5
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/benes.c
@@ -0,0 +1,64 @@
+static void func(uint64_t *bs, uint64_t *cond_ptr, int low) {
+	int i, j, x, y;
+
+	int high = 5 - low;
+
+	uint64_t diff;
+
+	//
+
+	for (j = 0; j < (1 << low); j++) {
+		x = (0 << low) + j;
+		y = (1 << low) + j;
+
+		for (i = 0; i < (1 << high); i++) {
+			diff = bs[x] ^ bs[y];
+			diff &= (*cond_ptr++);
+			bs[x] ^= diff;
+			bs[y] ^= diff;
+
+			x += (1 << (low + 1));
+			y += (1 << (low + 1));
+		}
+	}
+}
+
+static void benes_compact(uint64_t *bs, uint64_t *cond, int rev) {
+	uint64_t *cond_ptr;
+	int inc, low;
+
+	//
+
+	if (rev == 0) {
+		inc = 32;
+		cond_ptr = cond;
+	} else {
+		inc = -32;
+		cond_ptr = &cond[704];
+	}
+
+	//
+
+	for (low = 0; low <= 5; low++) {
+		func(bs, cond_ptr, low);
+		cond_ptr += inc;
+	}
+
+	transpose_64x64_compact(bs, bs);
+
+	for (low = 0; low <= 5; low++) {
+		func(bs, cond_ptr, low);
+		cond_ptr += inc;
+	}
+	for (low = 4; low >= 0; low--) {
+		func(bs, cond_ptr, low);
+		cond_ptr += inc;
+	}
+
+	transpose_64x64_compact(bs, bs);
+
+	for (low = 5; low >= 0; low--) {
+		func(bs, cond_ptr, low);
+		cond_ptr += inc;
+	}
+}
diff --git a/crypt/liboqs/kex_code_mcbits/external/bm.c b/crypt/liboqs/kex_code_mcbits/external/bm.c
new file mode 100644
index 0000000000000000000000000000000000000000..cd0f23b76443cfaf82c29f7bf81a5e52b0c6dba2
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/bm.c
@@ -0,0 +1,135 @@
+typedef uint16_t gf;
+
+static void into_vec(uint64_t *out, gf in) {
+	int i;
+
+	for (i = 0; i < GFBITS; i++) {
+		out[i] = (in >> i) & 1;
+		out[i] = -out[i];
+	}
+}
+
+static gf vec_reduce(uint64_t *prod) {
+	int i;
+
+	uint64_t tmp[GFBITS];
+	gf ret = 0;
+
+	for (i = 0; i < GFBITS; i++) {
+		tmp[i] = prod[i];
+	}
+
+	for (i = GFBITS - 1; i >= 0; i--)
+		tmp[i] ^= (tmp[i] >> 32);
+	for (i = GFBITS - 1; i >= 0; i--)
+		tmp[i] ^= (tmp[i] >> 16);
+	for (i = GFBITS - 1; i >= 0; i--)
+		tmp[i] ^= (tmp[i] >> 8);
+	for (i = GFBITS - 1; i >= 0; i--)
+		tmp[i] ^= (tmp[i] >> 4);
+	for (i = GFBITS - 1; i >= 0; i--) {
+		ret <<= 1;
+		ret |= (0x6996 >> (tmp[i] & 0xF)) & 1;
+	};
+
+	return ret;
+}
+
+static uint64_t mask_nonzero_64bit(gf a) {
+	uint64_t ret = a;
+
+	ret -= 1;
+	ret >>= 63;
+	ret -= 1;
+
+	return ret;
+}
+
+static uint64_t mask_leq_64bit(uint16_t a, uint16_t b) {
+	uint64_t a_tmp = a;
+	uint64_t b_tmp = b;
+	uint64_t ret = b_tmp - a_tmp;
+
+	ret >>= 63;
+	ret -= 1;
+
+	return ret;
+}
+
+static void vec_cmov(uint64_t *out, uint64_t *in, uint64_t mask) {
+	int i;
+
+	for (i = 0; i < GFBITS; i++)
+		out[i] = (in[i] & mask) | (out[i] & ~mask);
+}
+
+static void bm(uint64_t out[GFBITS], uint64_t in[][GFBITS]) {
+	uint16_t i;
+	uint16_t N, L;
+
+	uint64_t C[GFBITS], B[GFBITS], prod[GFBITS];
+	uint64_t in_tmp[GFBITS], r_vec[GFBITS], C_tmp[GFBITS];
+
+	uint64_t mask_nz, mask_leq;
+	uint16_t mask_16b;
+
+	gf d, b, b_inv, r;
+
+	// init
+
+	C[0] = 1;
+	C[0] <<= 63;
+	B[0] = 1;
+	B[0] <<= 62;
+
+	for (i = 1; i < GFBITS; i++)
+		B[i] = C[i] = 0;
+
+	b = 1;
+	L = 0;
+
+	//
+
+	for (N = 0; N < SYS_T * 2; N++) {
+		// computing d
+
+		if (N < 64)
+			for (i = 0; i < GFBITS; i++)
+				in_tmp[i] = in[0][i] << (63 - N);
+
+		else
+			for (i = 0; i < GFBITS; i++)
+				in_tmp[i] = (in[0][i] >> (N - 63)) | (in[1][i] << (127 - N));
+
+		vec_mul(prod, C, in_tmp);
+		d = vec_reduce(prod);
+
+		// 3 cases
+
+		b_inv = gf_inv(b);
+		r = gf_mul(d, b_inv);
+		into_vec(r_vec, r);
+		vec_mul(C_tmp, r_vec, B);
+
+		for (i = 0; i < GFBITS; i++)
+			C_tmp[i] ^= C[i];
+
+		mask_nz = mask_nonzero_64bit(d);
+		mask_leq = mask_leq_64bit(L * 2, N);
+		mask_16b = (mask_nz & mask_leq) & 0xFFFF;
+
+		vec_cmov(B, C, mask_nz & mask_leq);
+		vec_copy(C, C_tmp);
+
+		b = (d & mask_16b) | (b & ~mask_16b);
+		L = ((N + 1 - L) & mask_16b) | (L & ~mask_16b);
+
+		for (i = 0; i < GFBITS; i++)
+			B[i] >>= 1;
+	}
+
+	vec_copy(out, C);
+
+	for (i = 0; i < GFBITS; i++)
+		out[i] >>= 64 - (SYS_T + 1);
+}
diff --git a/crypt/liboqs/kex_code_mcbits/external/consts.data b/crypt/liboqs/kex_code_mcbits/external/consts.data
new file mode 100755
index 0000000000000000000000000000000000000000..a728344f04e0c04664655795e1fb9be363bc7863
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/consts.data
@@ -0,0 +1,888 @@
+//64
+{
+	0XF00F0FF0F00F0FF0,
+	0XF0F00F0F0F0FF0F0,
+	0X0FF00FF00FF00FF0,
+	0XAA5555AAAA5555AA,
+	0XF00F0FF0F00F0FF0,
+	0X33CCCC33CC3333CC,
+	0XFFFF0000FFFF0000,
+	0XCC33CC3333CC33CC,
+	0X33CC33CC33CC33CC,
+	0X5A5A5A5A5A5A5A5A,
+	0XFF00FF00FF00FF00,
+	0XF00F0FF0F00F0FF0,
+},
+//128
+{
+	0X3C3C3C3C3C3C3C3C,
+	0XF0F0F0F0F0F0F0F0,
+	0X5555AAAA5555AAAA,
+	0XCC3333CCCC3333CC,
+	0XC33CC33CC33CC33C,
+	0X55555555AAAAAAAA,
+	0X33333333CCCCCCCC,
+	0X00FF00FFFF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0X0000000000000000,
+	0X0000FFFFFFFF0000,
+	0XF0F00F0F0F0FF0F0,
+},
+{
+	0X3C3C3C3C3C3C3C3C,
+	0X0F0F0F0F0F0F0F0F,
+	0XAAAA5555AAAA5555,
+	0XCC3333CCCC3333CC,
+	0XC33CC33CC33CC33C,
+	0X55555555AAAAAAAA,
+	0X33333333CCCCCCCC,
+	0XFF00FF0000FF00FF,
+	0X0F0F0F0F0F0F0F0F,
+	0X0000000000000000,
+	0X0000FFFFFFFF0000,
+	0XF0F00F0F0F0FF0F0,
+},
+//256
+{
+	0XAA55AA5555AA55AA,
+	0XCC33CC3333CC33CC,
+	0X33CCCC33CC3333CC,
+	0X55555555AAAAAAAA,
+	0XFF0000FF00FFFF00,
+	0X3CC33CC3C33CC33C,
+	0X5555AAAA5555AAAA,
+	0X0FF00FF00FF00FF0,
+	0XCCCC33333333CCCC,
+	0XF0F0F0F0F0F0F0F0,
+	0X00FFFF0000FFFF00,
+	0XC33CC33CC33CC33C,
+},
+{
+	0X55AA55AAAA55AA55,
+	0XCC33CC3333CC33CC,
+	0XCC3333CC33CCCC33,
+	0X55555555AAAAAAAA,
+	0XFF0000FF00FFFF00,
+	0XC33CC33C3CC33CC3,
+	0XAAAA5555AAAA5555,
+	0XF00FF00FF00FF00F,
+	0X3333CCCCCCCC3333,
+	0X0F0F0F0F0F0F0F0F,
+	0XFF0000FFFF0000FF,
+	0XC33CC33CC33CC33C,
+},
+{
+	0XAA55AA5555AA55AA,
+	0X33CC33CCCC33CC33,
+	0XCC3333CC33CCCC33,
+	0X55555555AAAAAAAA,
+	0X00FFFF00FF0000FF,
+	0X3CC33CC3C33CC33C,
+	0X5555AAAA5555AAAA,
+	0X0FF00FF00FF00FF0,
+	0X3333CCCCCCCC3333,
+	0XF0F0F0F0F0F0F0F0,
+	0X00FFFF0000FFFF00,
+	0XC33CC33CC33CC33C,
+},
+{
+	0X55AA55AAAA55AA55,
+	0X33CC33CCCC33CC33,
+	0X33CCCC33CC3333CC,
+	0X55555555AAAAAAAA,
+	0X00FFFF00FF0000FF,
+	0XC33CC33C3CC33CC3,
+	0XAAAA5555AAAA5555,
+	0XF00FF00FF00FF00F,
+	0XCCCC33333333CCCC,
+	0X0F0F0F0F0F0F0F0F,
+	0XFF0000FFFF0000FF,
+	0XC33CC33CC33CC33C,
+},
+//512
+{
+	0X6699669999669966,
+	0X33CCCC33CC3333CC,
+	0XA5A5A5A55A5A5A5A,
+	0X3C3CC3C3C3C33C3C,
+	0XF00FF00F0FF00FF0,
+	0X55AA55AA55AA55AA,
+	0X3C3CC3C3C3C33C3C,
+	0X0F0F0F0FF0F0F0F0,
+	0X55AA55AA55AA55AA,
+	0X33CCCC33CC3333CC,
+	0XF0F0F0F0F0F0F0F0,
+	0XA55A5AA55AA5A55A,
+},
+{
+	0X9966996666996699,
+	0X33CCCC33CC3333CC,
+	0XA5A5A5A55A5A5A5A,
+	0X3C3CC3C3C3C33C3C,
+	0X0FF00FF0F00FF00F,
+	0XAA55AA55AA55AA55,
+	0X3C3CC3C3C3C33C3C,
+	0XF0F0F0F00F0F0F0F,
+	0XAA55AA55AA55AA55,
+	0XCC3333CC33CCCC33,
+	0X0F0F0F0F0F0F0F0F,
+	0XA55A5AA55AA5A55A,
+},
+{
+	0X6699669999669966,
+	0X33CCCC33CC3333CC,
+	0X5A5A5A5AA5A5A5A5,
+	0XC3C33C3C3C3CC3C3,
+	0X0FF00FF0F00FF00F,
+	0XAA55AA55AA55AA55,
+	0XC3C33C3C3C3CC3C3,
+	0X0F0F0F0FF0F0F0F0,
+	0XAA55AA55AA55AA55,
+	0X33CCCC33CC3333CC,
+	0XF0F0F0F0F0F0F0F0,
+	0XA55A5AA55AA5A55A,
+},
+{
+	0X9966996666996699,
+	0X33CCCC33CC3333CC,
+	0X5A5A5A5AA5A5A5A5,
+	0XC3C33C3C3C3CC3C3,
+	0XF00FF00F0FF00FF0,
+	0X55AA55AA55AA55AA,
+	0XC3C33C3C3C3CC3C3,
+	0XF0F0F0F00F0F0F0F,
+	0X55AA55AA55AA55AA,
+	0XCC3333CC33CCCC33,
+	0X0F0F0F0F0F0F0F0F,
+	0XA55A5AA55AA5A55A,
+},
+{
+	0X6699669999669966,
+	0XCC3333CC33CCCC33,
+	0X5A5A5A5AA5A5A5A5,
+	0X3C3CC3C3C3C33C3C,
+	0X0FF00FF0F00FF00F,
+	0X55AA55AA55AA55AA,
+	0X3C3CC3C3C3C33C3C,
+	0X0F0F0F0FF0F0F0F0,
+	0X55AA55AA55AA55AA,
+	0X33CCCC33CC3333CC,
+	0XF0F0F0F0F0F0F0F0,
+	0XA55A5AA55AA5A55A,
+},
+{
+	0X9966996666996699,
+	0XCC3333CC33CCCC33,
+	0X5A5A5A5AA5A5A5A5,
+	0X3C3CC3C3C3C33C3C,
+	0XF00FF00F0FF00FF0,
+	0XAA55AA55AA55AA55,
+	0X3C3CC3C3C3C33C3C,
+	0XF0F0F0F00F0F0F0F,
+	0XAA55AA55AA55AA55,
+	0XCC3333CC33CCCC33,
+	0X0F0F0F0F0F0F0F0F,
+	0XA55A5AA55AA5A55A,
+},
+{
+	0X6699669999669966,
+	0XCC3333CC33CCCC33,
+	0XA5A5A5A55A5A5A5A,
+	0XC3C33C3C3C3CC3C3,
+	0XF00FF00F0FF00FF0,
+	0XAA55AA55AA55AA55,
+	0XC3C33C3C3C3CC3C3,
+	0X0F0F0F0FF0F0F0F0,
+	0XAA55AA55AA55AA55,
+	0X33CCCC33CC3333CC,
+	0XF0F0F0F0F0F0F0F0,
+	0XA55A5AA55AA5A55A,
+},
+{
+	0X9966996666996699,
+	0XCC3333CC33CCCC33,
+	0XA5A5A5A55A5A5A5A,
+	0XC3C33C3C3C3CC3C3,
+	0X0FF00FF0F00FF00F,
+	0X55AA55AA55AA55AA,
+	0XC3C33C3C3C3CC3C3,
+	0XF0F0F0F00F0F0F0F,
+	0X55AA55AA55AA55AA,
+	0XCC3333CC33CCCC33,
+	0X0F0F0F0F0F0F0F0F,
+	0XA55A5AA55AA5A55A,
+},
+//1024
+{
+	0X9669699696696996,
+	0X6996699669966996,
+	0X6996699669966996,
+	0X00FFFF0000FFFF00,
+	0XFF00FF00FF00FF00,
+	0XF00FF00F0FF00FF0,
+	0XF0F00F0F0F0FF0F0,
+	0XC33C3CC33CC3C33C,
+	0XC33C3CC33CC3C33C,
+	0XA55A5AA55AA5A55A,
+	0XC33C3CC33CC3C33C,
+	0X3CC3C33C3CC3C33C,
+},
+{
+	0X9669699696696996,
+	0X6996699669966996,
+	0X6996699669966996,
+	0X00FFFF0000FFFF00,
+	0X00FF00FF00FF00FF,
+	0X0FF00FF0F00FF00F,
+	0X0F0FF0F0F0F00F0F,
+	0X3CC3C33CC33C3CC3,
+	0X3CC3C33CC33C3CC3,
+	0XA55A5AA55AA5A55A,
+	0XC33C3CC33CC3C33C,
+	0X3CC3C33C3CC3C33C,
+},
+{
+	0X9669699696696996,
+	0X6996699669966996,
+	0X6996699669966996,
+	0XFF0000FFFF0000FF,
+	0X00FF00FF00FF00FF,
+	0X0FF00FF0F00FF00F,
+	0X0F0FF0F0F0F00F0F,
+	0XC33C3CC33CC3C33C,
+	0XC33C3CC33CC3C33C,
+	0XA55A5AA55AA5A55A,
+	0XC33C3CC33CC3C33C,
+	0X3CC3C33C3CC3C33C,
+},
+{
+	0X9669699696696996,
+	0X6996699669966996,
+	0X6996699669966996,
+	0XFF0000FFFF0000FF,
+	0XFF00FF00FF00FF00,
+	0XF00FF00F0FF00FF0,
+	0XF0F00F0F0F0FF0F0,
+	0X3CC3C33CC33C3CC3,
+	0X3CC3C33CC33C3CC3,
+	0XA55A5AA55AA5A55A,
+	0XC33C3CC33CC3C33C,
+	0X3CC3C33C3CC3C33C,
+},
+{
+	0X9669699696696996,
+	0X6996699669966996,
+	0X9669966996699669,
+	0XFF0000FFFF0000FF,
+	0X00FF00FF00FF00FF,
+	0XF00FF00F0FF00FF0,
+	0XF0F00F0F0F0FF0F0,
+	0XC33C3CC33CC3C33C,
+	0XC33C3CC33CC3C33C,
+	0XA55A5AA55AA5A55A,
+	0XC33C3CC33CC3C33C,
+	0X3CC3C33C3CC3C33C,
+},
+{
+	0X9669699696696996,
+	0X6996699669966996,
+	0X9669966996699669,
+	0XFF0000FFFF0000FF,
+	0XFF00FF00FF00FF00,
+	0X0FF00FF0F00FF00F,
+	0X0F0FF0F0F0F00F0F,
+	0X3CC3C33CC33C3CC3,
+	0X3CC3C33CC33C3CC3,
+	0XA55A5AA55AA5A55A,
+	0XC33C3CC33CC3C33C,
+	0X3CC3C33C3CC3C33C,
+},
+{
+	0X9669699696696996,
+	0X6996699669966996,
+	0X9669966996699669,
+	0X00FFFF0000FFFF00,
+	0XFF00FF00FF00FF00,
+	0X0FF00FF0F00FF00F,
+	0X0F0FF0F0F0F00F0F,
+	0XC33C3CC33CC3C33C,
+	0XC33C3CC33CC3C33C,
+	0XA55A5AA55AA5A55A,
+	0XC33C3CC33CC3C33C,
+	0X3CC3C33C3CC3C33C,
+},
+{
+	0X9669699696696996,
+	0X6996699669966996,
+	0X9669966996699669,
+	0X00FFFF0000FFFF00,
+	0X00FF00FF00FF00FF,
+	0XF00FF00F0FF00FF0,
+	0XF0F00F0F0F0FF0F0,
+	0X3CC3C33CC33C3CC3,
+	0X3CC3C33CC33C3CC3,
+	0XA55A5AA55AA5A55A,
+	0XC33C3CC33CC3C33C,
+	0X3CC3C33C3CC3C33C,
+},
+{
+	0X9669699696696996,
+	0X9669966996699669,
+	0X9669966996699669,
+	0X00FFFF0000FFFF00,
+	0XFF00FF00FF00FF00,
+	0XF00FF00F0FF00FF0,
+	0XF0F00F0F0F0FF0F0,
+	0XC33C3CC33CC3C33C,
+	0XC33C3CC33CC3C33C,
+	0XA55A5AA55AA5A55A,
+	0XC33C3CC33CC3C33C,
+	0X3CC3C33C3CC3C33C,
+},
+{
+	0X9669699696696996,
+	0X9669966996699669,
+	0X9669966996699669,
+	0X00FFFF0000FFFF00,
+	0X00FF00FF00FF00FF,
+	0X0FF00FF0F00FF00F,
+	0X0F0FF0F0F0F00F0F,
+	0X3CC3C33CC33C3CC3,
+	0X3CC3C33CC33C3CC3,
+	0XA55A5AA55AA5A55A,
+	0XC33C3CC33CC3C33C,
+	0X3CC3C33C3CC3C33C,
+},
+{
+	0X9669699696696996,
+	0X9669966996699669,
+	0X9669966996699669,
+	0XFF0000FFFF0000FF,
+	0X00FF00FF00FF00FF,
+	0X0FF00FF0F00FF00F,
+	0X0F0FF0F0F0F00F0F,
+	0XC33C3CC33CC3C33C,
+	0XC33C3CC33CC3C33C,
+	0XA55A5AA55AA5A55A,
+	0XC33C3CC33CC3C33C,
+	0X3CC3C33C3CC3C33C,
+},
+{
+	0X9669699696696996,
+	0X9669966996699669,
+	0X9669966996699669,
+	0XFF0000FFFF0000FF,
+	0XFF00FF00FF00FF00,
+	0XF00FF00F0FF00FF0,
+	0XF0F00F0F0F0FF0F0,
+	0X3CC3C33CC33C3CC3,
+	0X3CC3C33CC33C3CC3,
+	0XA55A5AA55AA5A55A,
+	0XC33C3CC33CC3C33C,
+	0X3CC3C33C3CC3C33C,
+},
+{
+	0X9669699696696996,
+	0X9669966996699669,
+	0X6996699669966996,
+	0XFF0000FFFF0000FF,
+	0X00FF00FF00FF00FF,
+	0XF00FF00F0FF00FF0,
+	0XF0F00F0F0F0FF0F0,
+	0XC33C3CC33CC3C33C,
+	0XC33C3CC33CC3C33C,
+	0XA55A5AA55AA5A55A,
+	0XC33C3CC33CC3C33C,
+	0X3CC3C33C3CC3C33C,
+},
+{
+	0X9669699696696996,
+	0X9669966996699669,
+	0X6996699669966996,
+	0XFF0000FFFF0000FF,
+	0XFF00FF00FF00FF00,
+	0X0FF00FF0F00FF00F,
+	0X0F0FF0F0F0F00F0F,
+	0X3CC3C33CC33C3CC3,
+	0X3CC3C33CC33C3CC3,
+	0XA55A5AA55AA5A55A,
+	0XC33C3CC33CC3C33C,
+	0X3CC3C33C3CC3C33C,
+},
+{
+	0X9669699696696996,
+	0X9669966996699669,
+	0X6996699669966996,
+	0X00FFFF0000FFFF00,
+	0XFF00FF00FF00FF00,
+	0X0FF00FF0F00FF00F,
+	0X0F0FF0F0F0F00F0F,
+	0XC33C3CC33CC3C33C,
+	0XC33C3CC33CC3C33C,
+	0XA55A5AA55AA5A55A,
+	0XC33C3CC33CC3C33C,
+	0X3CC3C33C3CC3C33C,
+},
+{
+	0X9669699696696996,
+	0X9669966996699669,
+	0X6996699669966996,
+	0X00FFFF0000FFFF00,
+	0X00FF00FF00FF00FF,
+	0XF00FF00F0FF00FF0,
+	0XF0F00F0F0F0FF0F0,
+	0X3CC3C33CC33C3CC3,
+	0X3CC3C33CC33C3CC3,
+	0XA55A5AA55AA5A55A,
+	0XC33C3CC33CC3C33C,
+	0X3CC3C33C3CC3C33C,
+},
+//2048
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+}
diff --git a/crypt/liboqs/kex_code_mcbits/external/decrypt.c b/crypt/liboqs/kex_code_mcbits/external/decrypt.c
new file mode 100644
index 0000000000000000000000000000000000000000..6d18ccc0f987bc554fd424b9befb9b079190c469
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/decrypt.c
@@ -0,0 +1,185 @@
+static void scaling(uint64_t out[][GFBITS], uint64_t inv[][GFBITS], const unsigned char *sk, uint64_t *recv) {
+	int i, j;
+	uint64_t sk_int[GFBITS];
+
+	uint64_t eval[64][GFBITS];
+	uint64_t tmp[GFBITS];
+
+	// computing inverses
+
+	for (i = 0; i < GFBITS; i++)
+		sk_int[i] = load8(sk + i * 8);
+
+	fft(eval, sk_int);
+
+	for (i = 0; i < 64; i++)
+		vec_sq(eval[i], eval[i]);
+
+	vec_copy(inv[0], eval[0]);
+
+	for (i = 1; i < 64; i++)
+		vec_mul(inv[i], inv[i - 1], eval[i]);
+
+	vec_inv(tmp, inv[63]);
+
+	for (i = 62; i >= 0; i--) {
+		vec_mul(inv[i + 1], tmp, inv[i]);
+		vec_mul(tmp, tmp, eval[i + 1]);
+	}
+
+	vec_copy(inv[0], tmp);
+
+	//
+
+	for (i = 0; i < 64; i++)
+		for (j = 0; j < GFBITS; j++)
+			out[i][j] = inv[i][j] & recv[i];
+}
+
+static void scaling_inv(uint64_t out[][GFBITS], uint64_t inv[][GFBITS], uint64_t *recv) {
+	int i, j;
+
+	for (i = 0; i < 64; i++)
+		for (j = 0; j < GFBITS; j++)
+			out[i][j] = inv[i][j] & recv[i];
+}
+
+static void preprocess(uint64_t *recv, const unsigned char *s) {
+	int i;
+
+	for (i = 0; i < 64; i++)
+		recv[i] = 0;
+
+	for (i = 0; i < SYND_BYTES / 8; i++)
+		recv[i] = load8(s + i * 8);
+
+	for (i = SYND_BYTES % 8 - 1; i >= 0; i--) {
+		recv[SYND_BYTES / 8] <<= 8;
+		recv[SYND_BYTES / 8] |= s[SYND_BYTES / 8 * 8 + i];
+	}
+}
+
+//
+
+static void acc(uint64_t *c, uint64_t v) {
+	int i;
+
+	uint64_t carry = v;
+	uint64_t t;
+
+	for (i = 0; i < 8; i++) {
+		t = c[i] ^ carry;
+		carry = c[i] & carry;
+
+		c[i] = t;
+	}
+}
+
+static int weight(uint64_t *v) {
+	int i;
+	int w;
+
+	union {
+		uint64_t data_64[8];
+		uint8_t data_8[64];
+	} counter;
+
+	//
+
+	for (i = 0; i < 8; i++)
+		counter.data_64[i] = 0;
+
+	for (i = 0; i < 64; i++)
+		acc(counter.data_64, v[i]);
+
+	transpose_8x64(counter.data_64);
+
+	//
+
+	w = 0;
+	for (i = 0; i < 64; i++)
+		w += counter.data_8[i];
+
+	return w;
+}
+
+//
+
+static void syndrome_adjust(uint64_t in[][GFBITS]) {
+	int i;
+
+	for (i = 0; i < GFBITS; i++) {
+		in[1][i] <<= (128 - SYS_T * 2);
+		in[1][i] >>= (128 - SYS_T * 2);
+	}
+}
+
+static int decrypt(unsigned char *e, const unsigned char *sk, const unsigned char *s) {
+	int i, j;
+
+	uint64_t t;
+
+	uint64_t diff;
+
+	uint64_t inv[64][GFBITS];
+	uint64_t scaled[64][GFBITS];
+	uint64_t eval[64][GFBITS];
+
+	uint64_t error[64];
+
+	uint64_t s_priv[2][GFBITS];
+	uint64_t s_priv_cmp[2][GFBITS];
+	uint64_t locator[GFBITS];
+
+	uint64_t recv[64];
+	uint64_t cond[COND_BYTES / 8];
+
+	//
+
+	for (i = 0; i < COND_BYTES / 8; i++)
+		cond[i] = load8(sk + IRR_BYTES + i * 8);
+
+	preprocess(recv, s);
+	benes_compact(recv, cond, 1);
+	scaling(scaled, inv, sk, recv); // scaling
+	fft_tr(s_priv, scaled);         // transposed FFT
+	syndrome_adjust(s_priv);
+	bm(locator, s_priv); // Berlekamp Massey
+	fft(eval, locator);  // FFT
+
+	for (i = 0; i < 64; i++) {
+		error[i] = vec_or(eval[i]);
+		error[i] = ~error[i];
+	}
+
+	{
+		// reencrypt
+
+		scaling_inv(scaled, inv, error);
+		fft_tr(s_priv_cmp, scaled);
+		syndrome_adjust(s_priv_cmp);
+
+		diff = 0;
+		for (i = 0; i < 2; i++)
+			for (j = 0; j < GFBITS; j++)
+				diff |= s_priv[i][j] ^ s_priv_cmp[i][j];
+
+		diff |= diff >> 32;
+		diff |= diff >> 16;
+		diff |= diff >> 8;
+		t = diff & 0xFF;
+	}
+
+	benes_compact(error, cond, 0);
+
+	for (i = 0; i < 64; i++)
+		store8(e + i * 8, error[i]);
+
+	//
+
+	t |= weight(error) ^ SYS_T;
+	t -= 1;
+	t >>= 63;
+
+	return (t - 1);
+}
diff --git a/crypt/liboqs/kex_code_mcbits/external/encrypt.c b/crypt/liboqs/kex_code_mcbits/external/encrypt.c
new file mode 100644
index 0000000000000000000000000000000000000000..130d58db503f792ab8eb9a03ef78419a2872d1c2
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/encrypt.c
@@ -0,0 +1,98 @@
+static void gen_e(unsigned char *e, OQS_RAND *r) {
+	int i, j, eq;
+
+	uint16_t ind[SYS_T];
+	uint64_t e_int[64];
+	uint64_t one = 1;
+	uint64_t mask;
+	uint64_t val[SYS_T];
+
+	while (1) {
+		OQS_RAND_n(r, (uint8_t *) ind, sizeof(ind));
+
+		for (i = 0; i < SYS_T; i++)
+			ind[i] &= (1 << GFBITS) - 1;
+
+		eq = 0;
+		for (i = 1; i < SYS_T; i++)
+			for (j = 0; j < i; j++)
+				if (ind[i] == ind[j])
+					eq = 1;
+
+		if (eq == 0)
+			break;
+	}
+
+	for (j = 0; j < SYS_T; j++)
+		val[j] = one << (ind[j] & 63);
+
+	for (i = 0; i < 64; i++) {
+		e_int[i] = 0;
+
+		for (j = 0; j < SYS_T; j++) {
+			mask = i ^ (ind[j] >> 6);
+			mask -= 1;
+			mask >>= 63;
+			mask = -mask;
+
+			e_int[i] |= val[j] & mask;
+		}
+	}
+
+	for (i = 0; i < 64; i++)
+		store8(e + i * 8, e_int[i]);
+}
+
+#define C ((PK_NCOLS + 63) / 64)
+
+static void syndrome(unsigned char *s, const unsigned char *pk, const unsigned char *e) {
+	int i, j, t;
+
+	const unsigned char *e_ptr = e + SYND_BYTES;
+
+	uint64_t e_int[C];
+	uint64_t row_int[C];
+	uint64_t tmp[8];
+
+	unsigned char b;
+
+	//
+
+	memcpy(s, e, SYND_BYTES);
+
+	e_int[C - 1] = 0;
+	memcpy(e_int, e_ptr, PK_NCOLS / 8);
+
+	for (i = 0; i < PK_NROWS; i += 8) {
+		for (t = 0; t < 8; t++) {
+			row_int[C - 1] = 0;
+			memcpy(row_int, &pk[(i + t) * (PK_NCOLS / 8)], PK_NCOLS / 8);
+
+			tmp[t] = 0;
+			for (j = 0; j < C; j++)
+				tmp[t] ^= e_int[j] & row_int[j];
+		}
+
+		b = 0;
+
+		for (t = 7; t >= 0; t--)
+			tmp[t] ^= (tmp[t] >> 32);
+		for (t = 7; t >= 0; t--)
+			tmp[t] ^= (tmp[t] >> 16);
+		for (t = 7; t >= 0; t--)
+			tmp[t] ^= (tmp[t] >> 8);
+		for (t = 7; t >= 0; t--)
+			tmp[t] ^= (tmp[t] >> 4);
+		for (t = 7; t >= 0; t--) {
+			b <<= 1;
+			b |= (0x6996 >> (tmp[t] & 0xF)) & 1;
+		}
+
+		s[i / 8] ^= b;
+	}
+}
+
+static void encrypt(unsigned char *s, unsigned char *e, const unsigned char *pk, OQS_RAND *r) {
+	gen_e(e, r);
+	syndrome(s, pk, e);
+}
diff --git a/crypt/liboqs/kex_code_mcbits/external/fft.c b/crypt/liboqs/kex_code_mcbits/external/fft.c
new file mode 100644
index 0000000000000000000000000000000000000000..8e92b3401c93d8e98bfc8d4f7e81d32aa6d92fde
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/fft.c
@@ -0,0 +1,79 @@
+static void radix_conversions(uint64_t *in) {
+	int i, j, k;
+
+	const uint64_t mask[5][2] = {
+	    {0x8888888888888888, 0x4444444444444444},
+	    {0xC0C0C0C0C0C0C0C0, 0x3030303030303030},
+	    {0xF000F000F000F000, 0x0F000F000F000F00},
+	    {0xFF000000FF000000, 0x00FF000000FF0000},
+	    {0xFFFF000000000000, 0x0000FFFF00000000}};
+
+	const uint64_t s[5][GFBITS] = {
+#include "scalars.data"
+	};
+
+	//
+
+	for (j = 0; j <= 4; j++) {
+		for (i = 0; i < GFBITS; i++)
+			for (k = 4; k >= j; k--) {
+				in[i] ^= (in[i] & mask[k][0]) >> (1 << k);
+				in[i] ^= (in[i] & mask[k][1]) >> (1 << k);
+			}
+
+		vec_mul(in, in, s[j]); // scaling
+	}
+}
+
+static void butterflies(uint64_t out[][GFBITS], uint64_t *in) {
+	int i, j, k, s, b;
+
+	uint64_t tmp[GFBITS];
+	uint64_t consts[63][GFBITS] = {
+#include "consts.data"
+	};
+
+	uint64_t consts_ptr = 0;
+
+	const unsigned char reversal[64] = {
+	    0, 32, 16, 48, 8, 40, 24, 56,
+	    4, 36, 20, 52, 12, 44, 28, 60,
+	    2, 34, 18, 50, 10, 42, 26, 58,
+	    6, 38, 22, 54, 14, 46, 30, 62,
+	    1, 33, 17, 49, 9, 41, 25, 57,
+	    5, 37, 21, 53, 13, 45, 29, 61,
+	    3, 35, 19, 51, 11, 43, 27, 59,
+	    7, 39, 23, 55, 15, 47, 31, 63};
+
+	// boradcast
+
+	for (j = 0; j < 64; j++)
+		for (i = 0; i < GFBITS; i++) {
+			out[j][i] = (in[i] >> reversal[j]) & 1;
+			out[j][i] = -out[j][i];
+		}
+
+	// butterflies
+
+	for (i = 0; i <= 5; i++) {
+		s = 1 << i;
+
+		for (j = 0; j < 64; j += 2 * s) {
+			for (k = j; k < j + s; k++) {
+				vec_mul(tmp, out[k + s], consts[consts_ptr + (k - j)]);
+
+				for (b = 0; b < GFBITS; b++)
+					out[k][b] ^= tmp[b];
+				for (b = 0; b < GFBITS; b++)
+					out[k + s][b] ^= out[k][b];
+			}
+		}
+
+		consts_ptr += (1 << i);
+	}
+}
+
+static void fft(uint64_t out[][GFBITS], uint64_t *in) {
+	radix_conversions(in);
+	butterflies(out, in);
+}
diff --git a/crypt/liboqs/kex_code_mcbits/external/fft_tr.c b/crypt/liboqs/kex_code_mcbits/external/fft_tr.c
new file mode 100644
index 0000000000000000000000000000000000000000..2ed75d26a39b8b4a57725b25bab1b66c43ff2966
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/fft_tr.c
@@ -0,0 +1,249 @@
+#define vec_add(z, x, y)           \
+	for (b = 0; b < GFBITS; b++) { \
+		z[b] = x[b] ^ y[b];        \
+	}
+
+static void radix_conversions_tr(uint64_t in[][GFBITS]) {
+	int i, j, k;
+
+	const uint64_t mask[6][2] = {
+	    {0x2222222222222222, 0x4444444444444444},
+	    {0x0C0C0C0C0C0C0C0C, 0x3030303030303030},
+	    {0x00F000F000F000F0, 0x0F000F000F000F00},
+	    {0x0000FF000000FF00, 0x00FF000000FF0000},
+	    {0x00000000FFFF0000, 0x0000FFFF00000000},
+	    {0xFFFFFFFF00000000, 0x00000000FFFFFFFF}};
+
+	const uint64_t s[5][2][GFBITS] = {
+#include "scalars_2x.data"
+	};
+
+	//
+
+	for (j = 5; j >= 0; j--) {
+		if (j < 5) {
+			vec_mul(in[0], in[0], s[j][0]); // scaling
+			vec_mul(in[1], in[1], s[j][1]); // scaling
+		}
+
+		for (i = 0; i < GFBITS; i++)
+			for (k = j; k <= 4; k++) {
+				in[0][i] ^= (in[0][i] & mask[k][0]) << (1 << k);
+				in[0][i] ^= (in[0][i] & mask[k][1]) << (1 << k);
+
+				in[1][i] ^= (in[1][i] & mask[k][0]) << (1 << k);
+				in[1][i] ^= (in[1][i] & mask[k][1]) << (1 << k);
+			}
+
+		for (i = 0; i < GFBITS; i++) {
+			in[1][i] ^= (in[0][i] & mask[5][0]) >> 32;
+			in[1][i] ^= (in[1][i] & mask[5][1]) << 32;
+		}
+	}
+}
+
+static void butterflies_tr(uint64_t out[][GFBITS], uint64_t in[][GFBITS]) {
+	int i, j, k, s, b;
+
+	uint64_t tmp[GFBITS];
+	uint64_t pre[6][GFBITS];
+	uint64_t buf[64];
+
+	const uint64_t consts[63][GFBITS] = {
+#include "consts.data"
+	};
+
+	uint64_t consts_ptr = 63;
+
+	const unsigned char reversal[64] = {
+	    0, 32, 16, 48, 8, 40, 24, 56,
+	    4, 36, 20, 52, 12, 44, 28, 60,
+	    2, 34, 18, 50, 10, 42, 26, 58,
+	    6, 38, 22, 54, 14, 46, 30, 62,
+	    1, 33, 17, 49, 9, 41, 25, 57,
+	    5, 37, 21, 53, 13, 45, 29, 61,
+	    3, 35, 19, 51, 11, 43, 27, 59,
+	    7, 39, 23, 55, 15, 47, 31, 63};
+
+	const uint16_t beta[6] = {8, 1300, 3408, 1354, 2341, 1154};
+
+	// butterflies
+
+	for (i = 5; i >= 0; i--) {
+		s = 1 << i;
+		consts_ptr -= s;
+
+		for (j = 0; j < 64; j += 2 * s)
+			for (k = j; k < j + s; k++) {
+				vec_add(in[k], in[k], in[k + s]);
+				vec_mul(tmp, in[k], consts[consts_ptr + (k - j)]);
+				vec_add(in[k + s], in[k + s], tmp);
+			}
+	}
+
+	// transpose
+
+	for (i = 0; i < GFBITS; i++) {
+		for (j = 0; j < 64; j++)
+			buf[reversal[j]] = in[j][i];
+
+		transpose_64x64_compact(buf, buf);
+
+		for (j = 0; j < 64; j++)
+			in[j][i] = buf[j];
+	}
+
+	// boradcast
+
+	vec_copy(pre[0], in[32]);
+	vec_add(in[33], in[33], in[32]);
+	vec_copy(pre[1], in[33]);
+	vec_add(in[35], in[35], in[33]);
+	vec_add(pre[0], pre[0], in[35]);
+	vec_add(in[34], in[34], in[35]);
+	vec_copy(pre[2], in[34]);
+	vec_add(in[38], in[38], in[34]);
+	vec_add(pre[0], pre[0], in[38]);
+	vec_add(in[39], in[39], in[38]);
+	vec_add(pre[1], pre[1], in[39]);
+	vec_add(in[37], in[37], in[39]);
+	vec_add(pre[0], pre[0], in[37]);
+	vec_add(in[36], in[36], in[37]);
+	vec_copy(pre[3], in[36]);
+	vec_add(in[44], in[44], in[36]);
+	vec_add(pre[0], pre[0], in[44]);
+	vec_add(in[45], in[45], in[44]);
+	vec_add(pre[1], pre[1], in[45]);
+	vec_add(in[47], in[47], in[45]);
+	vec_add(pre[0], pre[0], in[47]);
+	vec_add(in[46], in[46], in[47]);
+	vec_add(pre[2], pre[2], in[46]);
+	vec_add(in[42], in[42], in[46]);
+	vec_add(pre[0], pre[0], in[42]);
+	vec_add(in[43], in[43], in[42]);
+	vec_add(pre[1], pre[1], in[43]);
+	vec_add(in[41], in[41], in[43]);
+	vec_add(pre[0], pre[0], in[41]);
+	vec_add(in[40], in[40], in[41]);
+	vec_copy(pre[4], in[40]);
+	vec_add(in[56], in[56], in[40]);
+	vec_add(pre[0], pre[0], in[56]);
+	vec_add(in[57], in[57], in[56]);
+	vec_add(pre[1], pre[1], in[57]);
+	vec_add(in[59], in[59], in[57]);
+	vec_add(pre[0], pre[0], in[59]);
+	vec_add(in[58], in[58], in[59]);
+	vec_add(pre[2], pre[2], in[58]);
+	vec_add(in[62], in[62], in[58]);
+	vec_add(pre[0], pre[0], in[62]);
+	vec_add(in[63], in[63], in[62]);
+	vec_add(pre[1], pre[1], in[63]);
+	vec_add(in[61], in[61], in[63]);
+	vec_add(pre[0], pre[0], in[61]);
+	vec_add(in[60], in[60], in[61]);
+	vec_add(pre[3], pre[3], in[60]);
+	vec_add(in[52], in[52], in[60]);
+	vec_add(pre[0], pre[0], in[52]);
+	vec_add(in[53], in[53], in[52]);
+	vec_add(pre[1], pre[1], in[53]);
+	vec_add(in[55], in[55], in[53]);
+	vec_add(pre[0], pre[0], in[55]);
+	vec_add(in[54], in[54], in[55]);
+	vec_add(pre[2], pre[2], in[54]);
+	vec_add(in[50], in[50], in[54]);
+	vec_add(pre[0], pre[0], in[50]);
+	vec_add(in[51], in[51], in[50]);
+	vec_add(pre[1], pre[1], in[51]);
+	vec_add(in[49], in[49], in[51]);
+	vec_add(pre[0], pre[0], in[49]);
+	vec_add(in[48], in[48], in[49]);
+	vec_copy(pre[5], in[48]);
+	vec_add(in[16], in[16], in[48]);
+	vec_add(pre[0], pre[0], in[16]);
+	vec_add(in[17], in[17], in[16]);
+	vec_add(pre[1], pre[1], in[17]);
+	vec_add(in[19], in[19], in[17]);
+	vec_add(pre[0], pre[0], in[19]);
+	vec_add(in[18], in[18], in[19]);
+	vec_add(pre[2], pre[2], in[18]);
+	vec_add(in[22], in[22], in[18]);
+	vec_add(pre[0], pre[0], in[22]);
+	vec_add(in[23], in[23], in[22]);
+	vec_add(pre[1], pre[1], in[23]);
+	vec_add(in[21], in[21], in[23]);
+	vec_add(pre[0], pre[0], in[21]);
+	vec_add(in[20], in[20], in[21]);
+	vec_add(pre[3], pre[3], in[20]);
+	vec_add(in[28], in[28], in[20]);
+	vec_add(pre[0], pre[0], in[28]);
+	vec_add(in[29], in[29], in[28]);
+	vec_add(pre[1], pre[1], in[29]);
+	vec_add(in[31], in[31], in[29]);
+	vec_add(pre[0], pre[0], in[31]);
+	vec_add(in[30], in[30], in[31]);
+	vec_add(pre[2], pre[2], in[30]);
+	vec_add(in[26], in[26], in[30]);
+	vec_add(pre[0], pre[0], in[26]);
+	vec_add(in[27], in[27], in[26]);
+	vec_add(pre[1], pre[1], in[27]);
+	vec_add(in[25], in[25], in[27]);
+	vec_add(pre[0], pre[0], in[25]);
+	vec_add(in[24], in[24], in[25]);
+	vec_add(pre[4], pre[4], in[24]);
+	vec_add(in[8], in[8], in[24]);
+	vec_add(pre[0], pre[0], in[8]);
+	vec_add(in[9], in[9], in[8]);
+	vec_add(pre[1], pre[1], in[9]);
+	vec_add(in[11], in[11], in[9]);
+	vec_add(pre[0], pre[0], in[11]);
+	vec_add(in[10], in[10], in[11]);
+	vec_add(pre[2], pre[2], in[10]);
+	vec_add(in[14], in[14], in[10]);
+	vec_add(pre[0], pre[0], in[14]);
+	vec_add(in[15], in[15], in[14]);
+	vec_add(pre[1], pre[1], in[15]);
+	vec_add(in[13], in[13], in[15]);
+	vec_add(pre[0], pre[0], in[13]);
+	vec_add(in[12], in[12], in[13]);
+	vec_add(pre[3], pre[3], in[12]);
+	vec_add(in[4], in[4], in[12]);
+	vec_add(pre[0], pre[0], in[4]);
+	vec_add(in[5], in[5], in[4]);
+	vec_add(pre[1], pre[1], in[5]);
+	vec_add(in[7], in[7], in[5]);
+	vec_add(pre[0], pre[0], in[7]);
+	vec_add(in[6], in[6], in[7]);
+	vec_add(pre[2], pre[2], in[6]);
+	vec_add(in[2], in[2], in[6]);
+	vec_add(pre[0], pre[0], in[2]);
+	vec_add(in[3], in[3], in[2]);
+	vec_add(pre[1], pre[1], in[3]);
+	vec_add(in[1], in[1], in[3]);
+
+	vec_add(pre[0], pre[0], in[1]);
+	vec_add(out[0], in[0], in[1]);
+
+	//
+
+	for (j = 0; j < GFBITS; j++) {
+		tmp[j] = (beta[0] >> j) & 1;
+		tmp[j] = -tmp[j];
+	}
+
+	vec_mul(out[1], pre[0], tmp);
+
+	for (i = 1; i < 6; i++) {
+		for (j = 0; j < GFBITS; j++) {
+			tmp[j] = (beta[i] >> j) & 1;
+			tmp[j] = -tmp[j];
+		}
+
+		vec_mul(tmp, pre[i], tmp);
+		vec_add(out[1], out[1], tmp);
+	}
+}
+
+static void fft_tr(uint64_t out[][GFBITS], uint64_t in[][GFBITS]) {
+	butterflies_tr(out, in);
+	radix_conversions_tr(out);
+}
diff --git a/crypt/liboqs/kex_code_mcbits/external/gf.c b/crypt/liboqs/kex_code_mcbits/external/gf.c
new file mode 100644
index 0000000000000000000000000000000000000000..5c15192242a5eadd7273174267844c21abc3b034
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/gf.c
@@ -0,0 +1,113 @@
+typedef uint16_t gf;
+
+static gf gf_mul(gf in0, gf in1) {
+	int i;
+
+	uint32_t tmp;
+	uint32_t t0;
+	uint32_t t1;
+	uint32_t t;
+
+	t0 = in0;
+	t1 = in1;
+
+	tmp = t0 * (t1 & 1);
+
+	for (i = 1; i < GFBITS; i++)
+		tmp ^= (t0 * (t1 & (1 << i)));
+
+	t = tmp & 0x7FC000;
+	tmp ^= t >> 9;
+	tmp ^= t >> 12;
+
+	t = tmp & 0x3000;
+	tmp ^= t >> 9;
+	tmp ^= t >> 12;
+
+	return tmp & ((1 << GFBITS) - 1);
+}
+
+static gf gf_sq(gf in) {
+	const uint32_t B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF};
+
+	uint32_t x = in;
+	uint32_t t;
+
+	x = (x | (x << 8)) & B[3];
+	x = (x | (x << 4)) & B[2];
+	x = (x | (x << 2)) & B[1];
+	x = (x | (x << 1)) & B[0];
+
+	t = x & 0x7FC000;
+	x ^= t >> 9;
+	x ^= t >> 12;
+
+	t = x & 0x3000;
+	x ^= t >> 9;
+	x ^= t >> 12;
+
+	return x & ((1 << GFBITS) - 1);
+}
+
+static gf gf_inv(gf in) {
+	gf tmp_11;
+	gf tmp_1111;
+
+	gf out = in;
+
+	out = gf_sq(out);
+	tmp_11 = gf_mul(out, in); // 11
+
+	out = gf_sq(tmp_11);
+	out = gf_sq(out);
+	tmp_1111 = gf_mul(out, tmp_11); // 1111
+
+	out = gf_sq(tmp_1111);
+	out = gf_sq(out);
+	out = gf_sq(out);
+	out = gf_sq(out);
+	out = gf_mul(out, tmp_1111); // 11111111
+
+	out = gf_sq(out);
+	out = gf_sq(out);
+	out = gf_mul(out, tmp_11); // 1111111111
+
+	out = gf_sq(out);
+	out = gf_mul(out, in); // 11111111111
+
+	return gf_sq(out); // 111111111110
+}
+
+static gf gf_diff(gf a, gf b) {
+	uint32_t t = (uint32_t)(a ^ b);
+
+	t = ((t - 1) >> 20) ^ 0xFFF;
+
+	return (gf) t;
+}
+
+///////////////////////////////////////////////////////////
+
+static void GF_mul(gf *out, gf *in0, gf *in1) {
+	int i, j;
+
+	gf tmp[123];
+
+	for (i = 0; i < 123; i++)
+		tmp[i] = 0;
+
+	for (i = 0; i < 62; i++)
+		for (j = 0; j < 62; j++)
+			tmp[i + j] ^= gf_mul(in0[i], in1[j]);
+
+	//
+
+	for (i = 122; i >= 62; i--) {
+		tmp[i - 55] ^= gf_mul(tmp[i], (gf) 1763);
+		tmp[i - 61] ^= gf_mul(tmp[i], (gf) 1722);
+		tmp[i - 62] ^= gf_mul(tmp[i], (gf) 4033);
+	}
+
+	for (i = 0; i < 62; i++)
+		out[i] = tmp[i];
+}
diff --git a/crypt/liboqs/kex_code_mcbits/external/implementors b/crypt/liboqs/kex_code_mcbits/external/implementors
new file mode 100755
index 0000000000000000000000000000000000000000..757e33743d8a2b2c4d53f7cc88e4ed096439a205
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/implementors
@@ -0,0 +1 @@
+Tung Chou
diff --git a/crypt/liboqs/kex_code_mcbits/external/operations.c b/crypt/liboqs/kex_code_mcbits/external/operations.c
new file mode 100644
index 0000000000000000000000000000000000000000..4946e80437eceb98401df844f97138a45920d775
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/operations.c
@@ -0,0 +1,114 @@
+#ifdef ENABLE_CODE_MCBITS // don't want this file in Visual Studio if libsodium is not present
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "params.h"
+#include <oqs/rand.h>
+#include <oqs/sha3.h>
+#include <sodium/crypto_onetimeauth_poly1305.h>
+#include <sodium/crypto_stream_salsa20.h>
+#include <sodium/randombytes.h>
+
+// clang-format off
+// (order of include matters)
+#include "util.c"
+#include "transpose.c"
+#include "benes.c"
+#include "gf.c"
+#include "vec.c"
+#include "bm.c"
+#include "fft.c"
+#include "fft_tr.c"
+#include "sk_gen.c"
+#include "pk_gen.c"
+
+#include "encrypt.c"
+#include "decrypt.c"
+// clang-format on
+
+int oqs_kex_mcbits_encrypt(
+    unsigned char *c, size_t *clen,
+    const unsigned char *m, unsigned long long mlen,
+    const unsigned char *pk,
+    OQS_RAND *r) {
+	unsigned char e[1 << (GFBITS - 3)];
+	unsigned char key[64];
+	unsigned char nonce[8] = {0};
+
+//
+
+#define ct (c + SYND_BYTES)
+#define tag (ct + mlen)
+
+	encrypt(c, e, pk, r);
+
+	//crypto_hash_keccakc1024(key, e, sizeof(e)); TODO is this ok to replace with the below?
+	OQS_SHA3_sha3512(key, e, sizeof(e));
+
+	crypto_stream_salsa20_xor(ct, m, mlen, nonce, key);
+	crypto_onetimeauth_poly1305(tag, ct, mlen, key + 32);
+
+	*clen = SYND_BYTES + mlen + 16;
+
+#undef ct
+#undef tag
+
+	return 0;
+}
+
+int oqs_kex_mcbits_decrypt(
+    unsigned char *m, size_t *mlen,
+    const unsigned char *c, unsigned long long clen,
+    const unsigned char *sk) {
+	int ret;
+	int ret_verify;
+	int ret_decrypt;
+
+	unsigned char key[64];
+	unsigned char nonce[8] = {0};
+	unsigned char e[1 << (GFBITS - 3)];
+
+	//
+
+	if (clen < SYND_BYTES + 16)
+		return -1;
+	else
+		*mlen = clen - SYND_BYTES - 16;
+
+#define ct (c + SYND_BYTES)
+#define tag (ct + *mlen)
+
+	ret_decrypt = decrypt(e, sk, c);
+
+	//crypto_hash_keccakc1024(key, e, sizeof(e)); TODO is this ok to replace with the below?
+	OQS_SHA3_sha3512(key, e, sizeof(e));
+
+	ret_verify = crypto_onetimeauth_poly1305_verify(tag, ct, *mlen, key + 32);
+	crypto_stream_salsa20_xor(m, ct, *mlen, nonce, key);
+
+	ret = ret_verify | ret_decrypt;
+
+#undef ct
+#undef tag
+
+	return ret;
+}
+
+int oqs_kex_mcbits_gen_keypair(
+    unsigned char *pk,
+    unsigned char *sk,
+    OQS_RAND *r
+
+    ) {
+	while (1) {
+		sk_gen(sk, r);
+
+		if (pk_gen(pk, sk) == 0)
+			break;
+	}
+
+	return 0;
+}
+
+#endif
diff --git a/crypt/liboqs/kex_code_mcbits/external/params.h b/crypt/liboqs/kex_code_mcbits/external/params.h
new file mode 100644
index 0000000000000000000000000000000000000000..b4295d4a43c3356acaf4347f33854324201e504c
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/params.h
@@ -0,0 +1,9 @@
+#define GFBITS 12
+#define SYS_T 62
+
+#define PK_NROWS (SYS_T * GFBITS)
+#define PK_NCOLS ((1 << GFBITS) - SYS_T * GFBITS)
+
+#define IRR_BYTES (GFBITS * 8)
+#define COND_BYTES (736 * 8)
+#define SYND_BYTES (PK_NROWS / 8)
diff --git a/crypt/liboqs/kex_code_mcbits/external/pk_gen.c b/crypt/liboqs/kex_code_mcbits/external/pk_gen.c
new file mode 100644
index 0000000000000000000000000000000000000000..c4b550a4ccd5c280f968910421de296f6ba11c10
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/pk_gen.c
@@ -0,0 +1,121 @@
+static int pk_gen(unsigned char *pk, const unsigned char *sk) {
+	unsigned char *pk_ptr = pk;
+
+	int i, j, k;
+	int row, c, tail;
+
+	uint64_t mat[GFBITS * SYS_T][64];
+	uint64_t mask;
+	uint64_t u;
+
+	uint64_t points[64][GFBITS] = {
+#include "points.data"
+	};
+
+	uint64_t sk_int[GFBITS];
+
+	uint64_t eval[64][GFBITS];
+	uint64_t inv[64][GFBITS];
+	uint64_t tmp[GFBITS];
+
+	uint64_t cond[COND_BYTES / 8];
+
+	// compute the inverses
+
+	for (i = 0; i < GFBITS; i++)
+		sk_int[i] = load8(sk + i * 8);
+
+	fft(eval, sk_int);
+
+	vec_copy(inv[0], eval[0]);
+
+	for (i = 1; i < 64; i++)
+		vec_mul(inv[i], inv[i - 1], eval[i]);
+
+	vec_inv(tmp, inv[63]);
+
+	for (i = 62; i >= 0; i--) {
+		vec_mul(inv[i + 1], tmp, inv[i]);
+		vec_mul(tmp, tmp, eval[i + 1]);
+	}
+
+	vec_copy(inv[0], tmp);
+
+	// fill matrix
+
+	for (j = 0; j < 64; j++)
+		for (k = 0; k < GFBITS; k++)
+			mat[k][j] = inv[j][k];
+
+	for (i = 1; i < SYS_T; i++)
+		for (j = 0; j < 64; j++) {
+			vec_mul(inv[j], inv[j], points[j]);
+
+			for (k = 0; k < GFBITS; k++)
+				mat[i * GFBITS + k][j] = inv[j][k];
+		}
+
+	// permute
+
+	for (i = 0; i < COND_BYTES / 8; i++)
+		cond[i] = load8(sk + IRR_BYTES + i * 8);
+
+	for (i = 0; i < GFBITS * SYS_T; i++)
+		benes_compact(mat[i], cond, 0);
+
+	// gaussian elimination
+
+	for (i = 0; i < (GFBITS * SYS_T + 63) / 64; i++)
+		for (j = 0; j < 64; j++) {
+			row = i * 64 + j;
+
+			if (row >= GFBITS * SYS_T)
+				break;
+
+			for (k = row + 1; k < GFBITS * SYS_T; k++) {
+				mask = mat[row][i] ^ mat[k][i];
+				mask >>= j;
+				mask &= 1;
+				mask = -mask;
+
+				for (c = 0; c < 64; c++)
+					mat[row][c] ^= mat[k][c] & mask;
+			}
+
+			if (((mat[row][i] >> j) & 1) == 0) { // return if not invertible
+				return -1;
+			}
+
+			for (k = 0; k < GFBITS * SYS_T; k++) {
+				if (k != row) {
+					mask = mat[k][i] >> j;
+					mask &= 1;
+					mask = -mask;
+
+					for (c = 0; c < 64; c++)
+						mat[k][c] ^= mat[row][c] & mask;
+				}
+			}
+		}
+
+	// store pk
+
+	tail = ((GFBITS * SYS_T) & 63) >> 3;
+
+	for (i = 0; i < GFBITS * SYS_T; i++) {
+		u = mat[i][(GFBITS * SYS_T + 63) / 64 - 1];
+
+		for (k = tail; k < 8; k++)
+			pk_ptr[k - tail] = (u >> (8 * k)) & 0xFF;
+
+		pk_ptr += 8 - tail;
+
+		for (j = (GFBITS * SYS_T + 63) / 64; j < 64; j++) {
+			store8(pk_ptr, mat[i][j]);
+
+			pk_ptr += 8;
+		}
+	}
+
+	return 0;
+}
diff --git a/crypt/liboqs/kex_code_mcbits/external/points.data b/crypt/liboqs/kex_code_mcbits/external/points.data
new file mode 100755
index 0000000000000000000000000000000000000000..7ee9f689461dad87b9c9bae9e771ad806eebb569
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/points.data
@@ -0,0 +1,896 @@
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+},
+{
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFF0000FFFF0000,
+	0XFF00FF00FF00FF00,
+	0XF0F0F0F0F0F0F0F0,
+	0XCCCCCCCCCCCCCCCC,
+	0XAAAAAAAAAAAAAAAA,
+}
diff --git a/crypt/liboqs/kex_code_mcbits/external/scalars.data b/crypt/liboqs/kex_code_mcbits/external/scalars.data
new file mode 100755
index 0000000000000000000000000000000000000000..aa8f64b95195a7b914e4281fadd55708db751a1f
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/scalars.data
@@ -0,0 +1,70 @@
+{
+	0XF3CFC030FC30F003,
+	0X3FCF0F003C00C00C,
+	0X30033CC300C0C03C,
+	0XCCFF0F3C0F30F0C0,
+	0X0300C03FF303C3F0,
+	0X3FFF3C0FF0CCCCC0,
+	0XF3FFF0C00F3C3CC0,
+	0X3003333FFFC3C000,
+	0X0FF30FFFC3FFF300,
+	0XFFC0F300F0F0CC00,
+	0XC0CFF3FCCC3CFC00,
+	0XFC3C03F0F330C000,
+},
+{
+	0X000F00000000F00F,
+	0X00000F00F00000F0,
+	0X0F00000F00000F00,
+	0XF00F00F00F000000,
+	0X00F00000000000F0,
+	0X0000000F00000000,
+	0XF00000000F00F000,
+	0X00F00F00000F0000,
+	0X0000F00000F00F00,
+	0X000F00F00F00F000,
+	0X00F00F0000000000,
+	0X0000000000F00000,
+},
+{
+	0X0000FF00FF0000FF,
+	0X0000FF000000FF00,
+	0XFF0000FF00FF0000,
+	0XFFFF0000FF000000,
+	0X00FF00FF00FF0000,
+	0X0000FFFFFF000000,
+	0X00FFFF00FF000000,
+	0XFFFFFF0000FF0000,
+	0XFFFF00FFFF00FF00,
+	0X0000FF0000000000,
+	0XFFFFFF00FF000000,
+	0X00FF000000000000,
+},
+{
+	0X000000000000FFFF,
+	0X00000000FFFF0000,
+	0X0000000000000000,
+	0XFFFF000000000000,
+	0X00000000FFFF0000,
+	0X0000FFFF00000000,
+	0X0000000000000000,
+	0X00000000FFFF0000,
+	0X0000FFFF00000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+},
+{
+	0X00000000FFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFFFFFF00000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+}
diff --git a/crypt/liboqs/kex_code_mcbits/external/scalars_2x.data b/crypt/liboqs/kex_code_mcbits/external/scalars_2x.data
new file mode 100755
index 0000000000000000000000000000000000000000..e7c7fee5586e126a5ba27469fa3c7cc2ed0a1e47
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/scalars_2x.data
@@ -0,0 +1,140 @@
+{{
+	0XF3CFC030FC30F003,
+	0X3FCF0F003C00C00C,
+	0X30033CC300C0C03C,
+	0XCCFF0F3C0F30F0C0,
+	0X0300C03FF303C3F0,
+	0X3FFF3C0FF0CCCCC0,
+	0XF3FFF0C00F3C3CC0,
+	0X3003333FFFC3C000,
+	0X0FF30FFFC3FFF300,
+	0XFFC0F300F0F0CC00,
+	0XC0CFF3FCCC3CFC00,
+	0XFC3C03F0F330C000,
+},
+{
+	0X000C03C0C3C0330C,
+	0XF330CFFCC00F33C0,
+	0XCCF330F00F3C0333,
+	0XFF03FFF3FF0CF0C0,
+	0X3CC3FCF00FCC303C,
+	0X0F000C0FC30303F3,
+	0XCF0FC3FF333CCF3C,
+	0X003F3FC3C0FF333F,
+	0X3CC3F0F3CF0FF00F,
+	0XF3F33CC03FC30CC0,
+	0X3CC330CFC333F33F,
+	0X3CC0303FF3C3FFFC,
+}},
+{{
+	0X000F00000000F00F,
+	0X00000F00F00000F0,
+	0X0F00000F00000F00,
+	0XF00F00F00F000000,
+	0X00F00000000000F0,
+	0X0000000F00000000,
+	0XF00000000F00F000,
+	0X00F00F00000F0000,
+	0X0000F00000F00F00,
+	0X000F00F00F00F000,
+	0X00F00F0000000000,
+	0X0000000000F00000,
+},
+{
+	0X0F00F00F00000000,
+	0XF00000000000F000,
+	0X00000F00000000F0,
+	0X0F00F00000F00000,
+	0X000F00000F00F00F,
+	0X00F00F00F00F0000,
+	0X0F00F00000000000,
+	0X000000000F000000,
+	0X00F00000000F00F0,
+	0X0000F00F00000F00,
+	0XF00000F00000F00F,
+	0X00000F00F00F00F0,
+}},
+{{
+	0X0000FF00FF0000FF,
+	0X0000FF000000FF00,
+	0XFF0000FF00FF0000,
+	0XFFFF0000FF000000,
+	0X00FF00FF00FF0000,
+	0X0000FFFFFF000000,
+	0X00FFFF00FF000000,
+	0XFFFFFF0000FF0000,
+	0XFFFF00FFFF00FF00,
+	0X0000FF0000000000,
+	0XFFFFFF00FF000000,
+	0X00FF000000000000,
+},
+{
+	0XFF00FFFFFF000000,
+	0XFF0000FFFF000000,
+	0XFFFF00FFFF000000,
+	0XFF00FFFFFFFFFF00,
+	0X00000000FF00FF00,
+	0XFFFFFFFF00FF0000,
+	0X00FFFFFF00FF0000,
+	0XFFFF00FFFF00FFFF,
+	0XFFFF0000FFFFFFFF,
+	0XFF00000000FF0000,
+	0X000000FF00FF00FF,
+	0X00FF00FF00FFFF00,
+}},
+{{
+	0X000000000000FFFF,
+	0X00000000FFFF0000,
+	0X0000000000000000,
+	0XFFFF000000000000,
+	0X00000000FFFF0000,
+	0X0000FFFF00000000,
+	0X0000000000000000,
+	0X00000000FFFF0000,
+	0X0000FFFF00000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+},
+{
+	0X0000000000000000,
+	0XFFFF000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFF00000000FFFF,
+	0X0000000000000000,
+	0X0000FFFF00000000,
+	0XFFFF00000000FFFF,
+	0X00000000FFFF0000,
+	0X0000000000000000,
+	0XFFFF00000000FFFF,
+	0X00000000FFFF0000,
+}},
+{{
+	0X00000000FFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFFFFFF00000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0X0000000000000000,
+	0X0000000000000000,
+	0X0000000000000000,
+},
+{
+	0X0000000000000000,
+	0X0000000000000000,
+	0X00000000FFFFFFFF,
+	0XFFFFFFFF00000000,
+	0XFFFFFFFF00000000,
+	0X0000000000000000,
+	0XFFFFFFFF00000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+	0X0000000000000000,
+	0XFFFFFFFFFFFFFFFF,
+	0XFFFFFFFF00000000,
+}}
diff --git a/crypt/liboqs/kex_code_mcbits/external/sk_gen.c b/crypt/liboqs/kex_code_mcbits/external/sk_gen.c
new file mode 100644
index 0000000000000000000000000000000000000000..860c70469a04c58a7885279cea093d5fa9d63b9f
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/sk_gen.c
@@ -0,0 +1,98 @@
+static int irr_gen(gf *out, gf *f) {
+	int i, j, k, c;
+
+	gf mat[SYS_T + 1][SYS_T];
+	gf mask, inv, t;
+
+	// fill matrix
+
+	mat[0][0] = 1;
+	for (i = 1; i < SYS_T; i++)
+		mat[0][i] = 0;
+
+	for (i = 0; i < SYS_T; i++)
+		mat[1][i] = f[i];
+
+	for (j = 2; j <= SYS_T; j++)
+		GF_mul(mat[j], mat[j - 1], f);
+
+	// gaussian
+
+	for (j = 0; j < SYS_T; j++) {
+		for (k = j + 1; k < SYS_T; k++) {
+			mask = gf_diff(mat[j][j], mat[j][k]);
+
+			for (c = 0; c < SYS_T + 1; c++)
+				mat[c][j] ^= mat[c][k] & mask;
+		}
+
+		if (mat[j][j] == 0) { // return if not invertible
+			return -1;
+		}
+
+		// compute inverse
+
+		inv = gf_inv(mat[j][j]);
+
+		for (c = 0; c < SYS_T + 1; c++)
+			mat[c][j] = gf_mul(mat[c][j], inv);
+
+		//
+
+		for (k = 0; k < SYS_T; k++) {
+			t = mat[j][k];
+
+			if (k != j) {
+				for (c = 0; c < SYS_T + 1; c++)
+					mat[c][k] ^= gf_mul(mat[c][j], t);
+			}
+		}
+	}
+
+	//
+
+	for (i = 0; i < SYS_T; i++)
+		out[i] = mat[SYS_T][i];
+
+	out[SYS_T] = 1;
+
+	return 0;
+}
+
+static void sk_gen(unsigned char *sk, OQS_RAND *r) {
+	uint64_t cond[COND_BYTES / 8];
+	uint64_t sk_int[GFBITS];
+
+	int i, j;
+
+	gf irr[SYS_T + 1];
+	gf f[SYS_T];
+
+	while (1) {
+		OQS_RAND_n(r, (uint8_t *) f, sizeof(f));
+
+		for (i = 0; i < SYS_T; i++)
+			f[i] &= (1 << GFBITS) - 1;
+
+		if (irr_gen(irr, f) == 0)
+			break;
+	}
+
+	for (i = 0; i < GFBITS; i++) {
+		sk_int[i] = 0;
+
+		for (j = SYS_T; j >= 0; j--) {
+			sk_int[i] <<= 1;
+			sk_int[i] |= (irr[j] >> i) & 1;
+		}
+
+		store8(sk + i * 8, sk_int[i]);
+	}
+
+	//
+
+	OQS_RAND_n(r, (uint8_t *) cond, sizeof(cond));
+
+	for (i = 0; i < COND_BYTES / 8; i++)
+		store8(sk + IRR_BYTES + i * 8, cond[i]);
+}
diff --git a/crypt/liboqs/kex_code_mcbits/external/transpose.c b/crypt/liboqs/kex_code_mcbits/external/transpose.c
new file mode 100644
index 0000000000000000000000000000000000000000..c56f3b0e1285c094c1ff5da874a456ee368ee07f
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/transpose.c
@@ -0,0 +1,122 @@
+static void transpose_64x64_compact(uint64_t *out, uint64_t *in) {
+	int i, j, s, p, idx0, idx1;
+	uint64_t x, y;
+
+	const uint64_t mask[6][2] = {
+	    {0X5555555555555555, 0XAAAAAAAAAAAAAAAA},
+	    {0X3333333333333333, 0XCCCCCCCCCCCCCCCC},
+	    {0X0F0F0F0F0F0F0F0F, 0XF0F0F0F0F0F0F0F0},
+	    {0X00FF00FF00FF00FF, 0XFF00FF00FF00FF00},
+	    {0X0000FFFF0000FFFF, 0XFFFF0000FFFF0000},
+	    {0X00000000FFFFFFFF, 0XFFFFFFFF00000000}};
+
+	//
+
+	for (i = 0; i < 64; i++)
+		out[i] = in[i];
+
+	for (j = 5; j >= 0; j--) {
+		s = 1 << j;
+
+		for (p = 0; p < 32 / s; p++) {
+			for (i = 0; i < s; i++) {
+				idx0 = p * 2 * s + i;
+				idx1 = p * 2 * s + i + s;
+
+				x = (out[idx0] & mask[j][0]) | ((out[idx1] & mask[j][0]) << s);
+				y = ((out[idx0] & mask[j][1]) >> s) | (out[idx1] & mask[j][1]);
+
+				out[idx0] = x;
+				out[idx1] = y;
+			}
+		}
+	}
+}
+
+static void transpose_8x64(uint64_t *in) {
+	const uint64_t mask[3][2] = {
+	    {0X5555555555555555, 0XAAAAAAAAAAAAAAAA},
+	    {0X3333333333333333, 0XCCCCCCCCCCCCCCCC},
+	    {0X0F0F0F0F0F0F0F0F, 0XF0F0F0F0F0F0F0F0},
+	};
+
+	uint64_t x, y;
+
+	//
+
+	x = (in[0] & mask[2][0]) | ((in[4] & mask[2][0]) << 4);
+	y = ((in[0] & mask[2][1]) >> 4) | (in[4] & mask[2][1]);
+
+	in[0] = x;
+	in[4] = y;
+
+	x = (in[1] & mask[2][0]) | ((in[5] & mask[2][0]) << 4);
+	y = ((in[1] & mask[2][1]) >> 4) | (in[5] & mask[2][1]);
+
+	in[1] = x;
+	in[5] = y;
+
+	x = (in[2] & mask[2][0]) | ((in[6] & mask[2][0]) << 4);
+	y = ((in[2] & mask[2][1]) >> 4) | (in[6] & mask[2][1]);
+
+	in[2] = x;
+	in[6] = y;
+
+	x = (in[3] & mask[2][0]) | ((in[7] & mask[2][0]) << 4);
+	y = ((in[3] & mask[2][1]) >> 4) | (in[7] & mask[2][1]);
+
+	in[3] = x;
+	in[7] = y;
+
+	//
+
+	x = (in[0] & mask[1][0]) | ((in[2] & mask[1][0]) << 2);
+	y = ((in[0] & mask[1][1]) >> 2) | (in[2] & mask[1][1]);
+
+	in[0] = x;
+	in[2] = y;
+
+	x = (in[1] & mask[1][0]) | ((in[3] & mask[1][0]) << 2);
+	y = ((in[1] & mask[1][1]) >> 2) | (in[3] & mask[1][1]);
+
+	in[1] = x;
+	in[3] = y;
+
+	x = (in[4] & mask[1][0]) | ((in[6] & mask[1][0]) << 2);
+	y = ((in[4] & mask[1][1]) >> 2) | (in[6] & mask[1][1]);
+
+	in[4] = x;
+	in[6] = y;
+
+	x = (in[5] & mask[1][0]) | ((in[7] & mask[1][0]) << 2);
+	y = ((in[5] & mask[1][1]) >> 2) | (in[7] & mask[1][1]);
+
+	in[5] = x;
+	in[7] = y;
+
+	//
+
+	x = (in[0] & mask[0][0]) | ((in[1] & mask[0][0]) << 1);
+	y = ((in[0] & mask[0][1]) >> 1) | (in[1] & mask[0][1]);
+
+	in[0] = x;
+	in[1] = y;
+
+	x = (in[2] & mask[0][0]) | ((in[3] & mask[0][0]) << 1);
+	y = ((in[2] & mask[0][1]) >> 1) | (in[3] & mask[0][1]);
+
+	in[2] = x;
+	in[3] = y;
+
+	x = (in[4] & mask[0][0]) | ((in[5] & mask[0][0]) << 1);
+	y = ((in[4] & mask[0][1]) >> 1) | (in[5] & mask[0][1]);
+
+	in[4] = x;
+	in[5] = y;
+
+	x = (in[6] & mask[0][0]) | ((in[7] & mask[0][0]) << 1);
+	y = ((in[6] & mask[0][1]) >> 1) | (in[7] & mask[0][1]);
+
+	in[6] = x;
+	in[7] = y;
+}
diff --git a/crypt/liboqs/kex_code_mcbits/external/util.c b/crypt/liboqs/kex_code_mcbits/external/util.c
new file mode 100644
index 0000000000000000000000000000000000000000..dc557755a8c905e481a10f7376d75fdef9c54308
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/util.c
@@ -0,0 +1,22 @@
+static void store8(unsigned char *out, uint64_t in) {
+	out[0] = (in >> 0x00) & 0xFF;
+	out[1] = (in >> 0x08) & 0xFF;
+	out[2] = (in >> 0x10) & 0xFF;
+	out[3] = (in >> 0x18) & 0xFF;
+	out[4] = (in >> 0x20) & 0xFF;
+	out[5] = (in >> 0x28) & 0xFF;
+	out[6] = (in >> 0x30) & 0xFF;
+	out[7] = (in >> 0x38) & 0xFF;
+}
+
+static uint64_t load8(const unsigned char *in) {
+	int i;
+	uint64_t ret = in[7];
+
+	for (i = 6; i >= 0; i--) {
+		ret <<= 8;
+		ret |= in[i];
+	}
+
+	return ret;
+}
diff --git a/crypt/liboqs/kex_code_mcbits/external/vec.c b/crypt/liboqs/kex_code_mcbits/external/vec.c
new file mode 100644
index 0000000000000000000000000000000000000000..2812c17308440523d705e879a8ffc30334d14c8a
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/external/vec.c
@@ -0,0 +1,322 @@
+static void vec_mul(uint64_t *h, uint64_t *f, const uint64_t *g) {
+	int i;
+	uint64_t result[2 * GFBITS - 1];
+
+	//
+
+	uint64_t t1 = f[11] & g[11];
+	uint64_t t2 = f[11] & g[9];
+	uint64_t t3 = f[11] & g[10];
+	uint64_t t4 = f[9] & g[11];
+	uint64_t t5 = f[10] & g[11];
+	uint64_t t6 = f[10] & g[10];
+	uint64_t t7 = f[10] & g[9];
+	uint64_t t8 = f[9] & g[10];
+	uint64_t t9 = f[9] & g[9];
+	uint64_t t10 = t8 ^ t7;
+	uint64_t t11 = t6 ^ t4;
+	uint64_t t12 = t11 ^ t2;
+	uint64_t t13 = t5 ^ t3;
+	uint64_t t14 = f[8] & g[8];
+	uint64_t t15 = f[8] & g[6];
+	uint64_t t16 = f[8] & g[7];
+	uint64_t t17 = f[6] & g[8];
+	uint64_t t18 = f[7] & g[8];
+	uint64_t t19 = f[7] & g[7];
+	uint64_t t20 = f[7] & g[6];
+	uint64_t t21 = f[6] & g[7];
+	uint64_t t22 = f[6] & g[6];
+	uint64_t t23 = t21 ^ t20;
+	uint64_t t24 = t19 ^ t17;
+	uint64_t t25 = t24 ^ t15;
+	uint64_t t26 = t18 ^ t16;
+	uint64_t t27 = f[5] & g[5];
+	uint64_t t28 = f[5] & g[3];
+	uint64_t t29 = f[5] & g[4];
+	uint64_t t30 = f[3] & g[5];
+	uint64_t t31 = f[4] & g[5];
+	uint64_t t32 = f[4] & g[4];
+	uint64_t t33 = f[4] & g[3];
+	uint64_t t34 = f[3] & g[4];
+	uint64_t t35 = f[3] & g[3];
+	uint64_t t36 = t34 ^ t33;
+	uint64_t t37 = t32 ^ t30;
+	uint64_t t38 = t37 ^ t28;
+	uint64_t t39 = t31 ^ t29;
+	uint64_t t40 = f[2] & g[2];
+	uint64_t t41 = f[2] & g[0];
+	uint64_t t42 = f[2] & g[1];
+	uint64_t t43 = f[0] & g[2];
+	uint64_t t44 = f[1] & g[2];
+	uint64_t t45 = f[1] & g[1];
+	uint64_t t46 = f[1] & g[0];
+	uint64_t t47 = f[0] & g[1];
+	uint64_t t48 = f[0] & g[0];
+	uint64_t t49 = t47 ^ t46;
+	uint64_t t50 = t45 ^ t43;
+	uint64_t t51 = t50 ^ t41;
+	uint64_t t52 = t44 ^ t42;
+	uint64_t t53 = t52 ^ t35;
+	uint64_t t54 = t40 ^ t36;
+	uint64_t t55 = t39 ^ t22;
+	uint64_t t56 = t27 ^ t23;
+	uint64_t t57 = t26 ^ t9;
+	uint64_t t58 = t14 ^ t10;
+	uint64_t t59 = g[6] ^ g[9];
+	uint64_t t60 = g[7] ^ g[10];
+	uint64_t t61 = g[8] ^ g[11];
+	uint64_t t62 = f[6] ^ f[9];
+	uint64_t t63 = f[7] ^ f[10];
+	uint64_t t64 = f[8] ^ f[11];
+	uint64_t t65 = t64 & t61;
+	uint64_t t66 = t64 & t59;
+	uint64_t t67 = t64 & t60;
+	uint64_t t68 = t62 & t61;
+	uint64_t t69 = t63 & t61;
+	uint64_t t70 = t63 & t60;
+	uint64_t t71 = t63 & t59;
+	uint64_t t72 = t62 & t60;
+	uint64_t t73 = t62 & t59;
+	uint64_t t74 = t72 ^ t71;
+	uint64_t t75 = t70 ^ t68;
+	uint64_t t76 = t75 ^ t66;
+	uint64_t t77 = t69 ^ t67;
+	uint64_t t78 = g[0] ^ g[3];
+	uint64_t t79 = g[1] ^ g[4];
+	uint64_t t80 = g[2] ^ g[5];
+	uint64_t t81 = f[0] ^ f[3];
+	uint64_t t82 = f[1] ^ f[4];
+	uint64_t t83 = f[2] ^ f[5];
+	uint64_t t84 = t83 & t80;
+	uint64_t t85 = t83 & t78;
+	uint64_t t86 = t83 & t79;
+	uint64_t t87 = t81 & t80;
+	uint64_t t88 = t82 & t80;
+	uint64_t t89 = t82 & t79;
+	uint64_t t90 = t82 & t78;
+	uint64_t t91 = t81 & t79;
+	uint64_t t92 = t81 & t78;
+	uint64_t t93 = t91 ^ t90;
+	uint64_t t94 = t89 ^ t87;
+	uint64_t t95 = t94 ^ t85;
+	uint64_t t96 = t88 ^ t86;
+	uint64_t t97 = t53 ^ t48;
+	uint64_t t98 = t54 ^ t49;
+	uint64_t t99 = t38 ^ t51;
+	uint64_t t100 = t55 ^ t53;
+	uint64_t t101 = t56 ^ t54;
+	uint64_t t102 = t25 ^ t38;
+	uint64_t t103 = t57 ^ t55;
+	uint64_t t104 = t58 ^ t56;
+	uint64_t t105 = t12 ^ t25;
+	uint64_t t106 = t13 ^ t57;
+	uint64_t t107 = t1 ^ t58;
+	uint64_t t108 = t97 ^ t92;
+	uint64_t t109 = t98 ^ t93;
+	uint64_t t110 = t99 ^ t95;
+	uint64_t t111 = t100 ^ t96;
+	uint64_t t112 = t101 ^ t84;
+	uint64_t t113 = t103 ^ t73;
+	uint64_t t114 = t104 ^ t74;
+	uint64_t t115 = t105 ^ t76;
+	uint64_t t116 = t106 ^ t77;
+	uint64_t t117 = t107 ^ t65;
+	uint64_t t118 = g[3] ^ g[9];
+	uint64_t t119 = g[4] ^ g[10];
+	uint64_t t120 = g[5] ^ g[11];
+	uint64_t t121 = g[0] ^ g[6];
+	uint64_t t122 = g[1] ^ g[7];
+	uint64_t t123 = g[2] ^ g[8];
+	uint64_t t124 = f[3] ^ f[9];
+	uint64_t t125 = f[4] ^ f[10];
+	uint64_t t126 = f[5] ^ f[11];
+	uint64_t t127 = f[0] ^ f[6];
+	uint64_t t128 = f[1] ^ f[7];
+	uint64_t t129 = f[2] ^ f[8];
+	uint64_t t130 = t129 & t123;
+	uint64_t t131 = t129 & t121;
+	uint64_t t132 = t129 & t122;
+	uint64_t t133 = t127 & t123;
+	uint64_t t134 = t128 & t123;
+	uint64_t t135 = t128 & t122;
+	uint64_t t136 = t128 & t121;
+	uint64_t t137 = t127 & t122;
+	uint64_t t138 = t127 & t121;
+	uint64_t t139 = t137 ^ t136;
+	uint64_t t140 = t135 ^ t133;
+	uint64_t t141 = t140 ^ t131;
+	uint64_t t142 = t134 ^ t132;
+	uint64_t t143 = t126 & t120;
+	uint64_t t144 = t126 & t118;
+	uint64_t t145 = t126 & t119;
+	uint64_t t146 = t124 & t120;
+	uint64_t t147 = t125 & t120;
+	uint64_t t148 = t125 & t119;
+	uint64_t t149 = t125 & t118;
+	uint64_t t150 = t124 & t119;
+	uint64_t t151 = t124 & t118;
+	uint64_t t152 = t150 ^ t149;
+	uint64_t t153 = t148 ^ t146;
+	uint64_t t154 = t153 ^ t144;
+	uint64_t t155 = t147 ^ t145;
+	uint64_t t156 = t121 ^ t118;
+	uint64_t t157 = t122 ^ t119;
+	uint64_t t158 = t123 ^ t120;
+	uint64_t t159 = t127 ^ t124;
+	uint64_t t160 = t128 ^ t125;
+	uint64_t t161 = t129 ^ t126;
+	uint64_t t162 = t161 & t158;
+	uint64_t t163 = t161 & t156;
+	uint64_t t164 = t161 & t157;
+	uint64_t t165 = t159 & t158;
+	uint64_t t166 = t160 & t158;
+	uint64_t t167 = t160 & t157;
+	uint64_t t168 = t160 & t156;
+	uint64_t t169 = t159 & t157;
+	uint64_t t170 = t159 & t156;
+	uint64_t t171 = t169 ^ t168;
+	uint64_t t172 = t167 ^ t165;
+	uint64_t t173 = t172 ^ t163;
+	uint64_t t174 = t166 ^ t164;
+	uint64_t t175 = t142 ^ t151;
+	uint64_t t176 = t130 ^ t152;
+	uint64_t t177 = t170 ^ t175;
+	uint64_t t178 = t171 ^ t176;
+	uint64_t t179 = t173 ^ t154;
+	uint64_t t180 = t174 ^ t155;
+	uint64_t t181 = t162 ^ t143;
+	uint64_t t182 = t177 ^ t138;
+	uint64_t t183 = t178 ^ t139;
+	uint64_t t184 = t179 ^ t141;
+	uint64_t t185 = t180 ^ t175;
+	uint64_t t186 = t181 ^ t176;
+	uint64_t t187 = t111 ^ t48;
+	uint64_t t188 = t112 ^ t49;
+	uint64_t t189 = t102 ^ t51;
+	uint64_t t190 = t113 ^ t108;
+	uint64_t t191 = t114 ^ t109;
+	uint64_t t192 = t115 ^ t110;
+	uint64_t t193 = t116 ^ t111;
+	uint64_t t194 = t117 ^ t112;
+	uint64_t t195 = t12 ^ t102;
+	uint64_t t196 = t13 ^ t113;
+	uint64_t t197 = t1 ^ t114;
+	uint64_t t198 = t187 ^ t138;
+	uint64_t t199 = t188 ^ t139;
+	uint64_t t200 = t189 ^ t141;
+	uint64_t t201 = t190 ^ t182;
+	uint64_t t202 = t191 ^ t183;
+	uint64_t t203 = t192 ^ t184;
+	uint64_t t204 = t193 ^ t185;
+	uint64_t t205 = t194 ^ t186;
+	uint64_t t206 = t195 ^ t154;
+	uint64_t t207 = t196 ^ t155;
+	uint64_t t208 = t197 ^ t143;
+
+	result[0] = t48;
+	result[1] = t49;
+	result[2] = t51;
+	result[3] = t108;
+	result[4] = t109;
+	result[5] = t110;
+	result[6] = t198;
+	result[7] = t199;
+	result[8] = t200;
+	result[9] = t201;
+	result[10] = t202;
+	result[11] = t203;
+	result[12] = t204;
+	result[13] = t205;
+	result[14] = t206;
+	result[15] = t207;
+	result[16] = t208;
+	result[17] = t115;
+	result[18] = t116;
+	result[19] = t117;
+	result[20] = t12;
+	result[21] = t13;
+	result[22] = t1;
+
+	//
+
+	for (i = 2 * GFBITS - 2; i >= GFBITS; i--) {
+		result[i - 9] ^= result[i];
+		result[i - GFBITS] ^= result[i];
+	}
+
+	//
+
+	for (i = 0; i < GFBITS; i++)
+		h[i] = result[i];
+}
+
+static void vec_sq(uint64_t *out, uint64_t *in) {
+	int i;
+	uint64_t result[GFBITS];
+
+	//
+
+	result[0] = in[0] ^ in[6];
+	result[1] = in[11];
+	result[2] = in[1] ^ in[7];
+	result[3] = in[6];
+	result[4] = in[2] ^ in[11] ^ in[8];
+	result[5] = in[7];
+	result[6] = in[3] ^ in[9];
+	result[7] = in[8];
+	result[8] = in[4] ^ in[10];
+	result[9] = in[9];
+	result[10] = in[5] ^ in[11];
+	result[11] = in[10];
+
+	//
+
+	for (i = 0; i < GFBITS; i++)
+		out[i] = result[i];
+}
+
+static void vec_copy(uint64_t *out, const uint64_t *in) {
+	int i;
+
+	for (i = 0; i < GFBITS; i++)
+		out[i] = in[i];
+}
+
+static uint64_t vec_or(const uint64_t *in) {
+	int i;
+	uint64_t ret = in[0];
+
+	for (i = 1; i < GFBITS; i++)
+		ret |= in[i];
+
+	return ret;
+}
+
+static void vec_inv(uint64_t *out, const uint64_t *in) {
+	uint64_t tmp_11[GFBITS];
+	uint64_t tmp_1111[GFBITS];
+
+	vec_copy(out, in);
+
+	vec_sq(out, out);
+	vec_mul(tmp_11, out, in); // 11
+
+	vec_sq(out, tmp_11);
+	vec_sq(out, out);
+	vec_mul(tmp_1111, out, tmp_11); // 1111
+
+	vec_sq(out, tmp_1111);
+	vec_sq(out, out);
+	vec_sq(out, out);
+	vec_sq(out, out);
+	vec_mul(out, out, tmp_1111); // 11111111
+
+	vec_sq(out, out);
+	vec_sq(out, out);
+	vec_mul(out, out, tmp_11); // 1111111111
+
+	vec_sq(out, out);
+	vec_mul(out, out, in); // 11111111111
+
+	vec_sq(out, out); // 111111111110
+}
diff --git a/crypt/liboqs/kex_code_mcbits/kex_code_mcbits.c b/crypt/liboqs/kex_code_mcbits/kex_code_mcbits.c
new file mode 100644
index 0000000000000000000000000000000000000000..11c37ad51758e70c7e5b012a3d405538dff0439d
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/kex_code_mcbits.c
@@ -0,0 +1,168 @@
+#ifdef ENABLE_CODE_MCBITS
+
+#if defined(WINDOWS)
+#define UNUSED
+// __attribute__ not supported in VS, is there something else I should define?
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#if !defined(WINDOWS)
+#include <strings.h>
+#include <unistd.h>
+#endif
+
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+#include "kex_code_mcbits.h"
+#include "mcbits.h"
+
+#if defined(WINDOWS)
+#define strdup _strdup // for strdup deprecation warning
+#endif
+
+OQS_KEX *OQS_KEX_code_mcbits_new(OQS_RAND *rand) {
+	OQS_KEX *k = malloc(sizeof(OQS_KEX));
+	if (k == NULL) {
+		return NULL;
+	}
+	k->method_name = strdup("Code Mcbits");
+	k->estimated_classical_security = 0; //TODO : Add these
+	k->estimated_quantum_security = 0;
+	k->seed = NULL;
+	k->seed_len = 0;
+	k->named_parameters = 0;
+	k->rand = rand;
+	k->params = NULL;
+	k->alice_0 = &OQS_KEX_code_mcbits_alice_0;
+	k->bob = &OQS_KEX_code_mcbits_bob;
+	k->alice_1 = &OQS_KEX_code_mcbits_alice_1;
+	k->alice_priv_free = &OQS_KEX_code_mcbits_alice_priv_free;
+	k->free = &OQS_KEX_code_mcbits_free;
+	return k;
+}
+
+int OQS_KEX_code_mcbits_alice_0(UNUSED OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) {
+
+	int ret;
+
+	*alice_priv = NULL;
+	*alice_msg = NULL;
+
+	/* allocate public/private key pair */
+	*alice_msg = malloc(CRYPTO_PUBLICKEYBYTES);
+	*alice_msg_len = CRYPTO_PUBLICKEYBYTES;
+	if (*alice_msg == NULL) {
+		goto err;
+	}
+	*alice_priv = malloc(CRYPTO_SECRETKEYBYTES);
+	if (*alice_priv == NULL) {
+		goto err;
+	}
+
+	/* generate public/private key pair */
+
+	oqs_kex_mcbits_gen_keypair(*alice_msg, *alice_priv, k->rand);
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*alice_msg);
+	*alice_msg = NULL;
+	free(*alice_priv);
+	*alice_priv = NULL;
+
+cleanup:
+
+	return ret;
+}
+
+int OQS_KEX_code_mcbits_bob(UNUSED OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) {
+
+	int ret;
+
+	*bob_msg = NULL;
+	*key = NULL;
+
+	if (alice_msg_len != CRYPTO_PUBLICKEYBYTES) {
+		goto err;
+	}
+
+	/* allocate message and session key */
+	*bob_msg = malloc(CRYPTO_BYTES + 32);
+	if (*bob_msg == NULL) {
+		goto err;
+	}
+	*key = malloc(32);
+	if (*key == NULL) {
+		goto err;
+	}
+	OQS_RAND_n(k->rand, *key, 32);
+	oqs_kex_mcbits_encrypt(*bob_msg, bob_msg_len, *key, 32, alice_msg, k->rand);
+	*key_len = 32;
+
+	ret = 1;
+	goto cleanup;
+err:
+	ret = 0;
+	free(*bob_msg);
+	*bob_msg = NULL;
+	free(*key);
+	*key = NULL;
+
+cleanup:
+	return ret;
+}
+
+int OQS_KEX_code_mcbits_alice_1(UNUSED OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) {
+
+	int ret;
+
+	*key = NULL;
+
+	if (bob_msg_len != (CRYPTO_BYTES + 32)) {
+		goto err;
+	}
+
+	/* allocate session key */
+	*key = malloc(32);
+	if (*key == NULL) {
+		goto err;
+	}
+	oqs_kex_mcbits_decrypt(*key, key_len, bob_msg, CRYPTO_BYTES + 32, alice_priv);
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*key);
+	*key = NULL;
+
+cleanup:
+
+	return ret;
+}
+
+void OQS_KEX_code_mcbits_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) {
+	if (alice_priv) {
+		free(alice_priv);
+	}
+}
+
+void OQS_KEX_code_mcbits_free(OQS_KEX *k) {
+	if (k) {
+		free(k->named_parameters);
+		k->named_parameters = NULL;
+		free(k->method_name);
+		k->method_name = NULL;
+	}
+	free(k);
+}
+
+#endif
diff --git a/crypt/liboqs/kex_code_mcbits/kex_code_mcbits.h b/crypt/liboqs/kex_code_mcbits/kex_code_mcbits.h
new file mode 100644
index 0000000000000000000000000000000000000000..d748a03c197ad133a956191f5427b7e924106d65
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/kex_code_mcbits.h
@@ -0,0 +1,24 @@
+/**
+ * \file kex_code_mcbits.h
+ * \brief Header for code-based key exchange protocol McBits
+ */
+
+#ifndef __OQS_KEX_CODE_MCBITS_H
+#define __OQS_KEX_CODE_MCBITS_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+OQS_KEX *OQS_KEX_code_mcbits_new(OQS_RAND *rand);
+
+int OQS_KEX_code_mcbits_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len);
+int OQS_KEX_code_mcbits_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len);
+int OQS_KEX_code_mcbits_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len);
+
+void OQS_KEX_code_mcbits_alice_priv_free(OQS_KEX *k, void *alice_priv);
+void OQS_KEX_code_mcbits_free(OQS_KEX *k);
+
+#endif
diff --git a/crypt/liboqs/kex_code_mcbits/mcbits.h b/crypt/liboqs/kex_code_mcbits/mcbits.h
new file mode 100644
index 0000000000000000000000000000000000000000..71bf80e452a635840c86b18bcd10cf0b50ea4e3b
--- /dev/null
+++ b/crypt/liboqs/kex_code_mcbits/mcbits.h
@@ -0,0 +1,27 @@
+/**
+ * \file mcbits.h
+ * \brief Header for internal functions of the code-based key exchange protocol McBits
+ */
+
+#ifndef __OQS_MCBITS_H
+#define __OQS_MCBITS_H
+
+#include "external/api.h"
+
+int oqs_kex_mcbits_encrypt(
+    unsigned char *c, size_t *clen,
+    const unsigned char *m, unsigned long long mlen,
+    const unsigned char *pk,
+    OQS_RAND *r);
+
+int oqs_kex_mcbits_decrypt(
+    unsigned char *m, size_t *mlen,
+    const unsigned char *c, unsigned long long clen,
+    const unsigned char *sk);
+
+int oqs_kex_mcbits_gen_keypair(
+    unsigned char *pk,
+    unsigned char *sk,
+    OQS_RAND *r);
+
+#endif
diff --git a/crypt/liboqs/kex_lwe_frodo/Makefile.am b/crypt/liboqs/kex_lwe_frodo/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..d91def87281b3c115e14cab615ea915324fd7988
--- /dev/null
+++ b/crypt/liboqs/kex_lwe_frodo/Makefile.am
@@ -0,0 +1,7 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = libfrodo.la
+
+libfrodo_la_SOURCES = kex_lwe_frodo.c lwe.c lwe_noise.c 
+libfrodo_la_CPPFLAGS = -I../../include -I.
+libfrodo_la_CPPFLAGS += $(AM_CPPFLAGS)
+
diff --git a/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo.c b/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo.c
new file mode 100644
index 0000000000000000000000000000000000000000..e9f6801af2c243b065b9905fad337b22763d79d6
--- /dev/null
+++ b/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo.c
@@ -0,0 +1,57 @@
+#if defined(WINDOWS)
+#define UNUSED
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#if !defined(WINDOWS)
+#include <strings.h>
+#include <unistd.h>
+#endif
+
+#include <oqs/common.h>
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+#include "kex_lwe_frodo.h"
+#include "local.h"
+
+#define LWE_DIV_ROUNDUP(x, y) (((x) + (y) -1) / y)
+
+#include <stdio.h>
+
+// pre-process code to obtain "recommended" functions
+#include "recommended.h"
+#define MACRIFY(NAME) NAME##_recommended
+#include "kex_lwe_frodo_macrify.c"
+// undefine macros to avoid any confusion later
+#include "recommended.h"
+#undef MACRIFY
+
+void OQS_KEX_lwe_frodo_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) {
+	free(alice_priv);
+}
+
+void OQS_KEX_lwe_frodo_free(OQS_KEX *k) {
+	if (!k) {
+		return;
+	}
+	if (k->params) {
+		struct oqs_kex_lwe_frodo_params *params = (struct oqs_kex_lwe_frodo_params *) k->params;
+		free(params->cdf_table);
+		params->cdf_table = NULL;
+		free(params->seed);
+		params->seed = NULL;
+		free(params->param_name);
+		params->param_name = NULL;
+		free(k->params);
+		k->params = NULL;
+	}
+	free(k->named_parameters);
+	k->named_parameters = NULL;
+	free(k->method_name);
+	k->method_name = NULL;
+	free(k);
+}
diff --git a/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo.h b/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo.h
new file mode 100644
index 0000000000000000000000000000000000000000..cb2742d0edd2139877eed62c24e6828400422be5
--- /dev/null
+++ b/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo.h
@@ -0,0 +1,24 @@
+/**
+ * \file kex_lwe_frodo.h
+ * \brief Header for LWE key exchange protocol Frodo.
+ */
+
+#ifndef __OQS_KEX_LWE_FRODO_H
+#define __OQS_KEX_LWE_FRODO_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+OQS_KEX *OQS_KEX_lwe_frodo_new_recommended(OQS_RAND *rand, const uint8_t *seed, const size_t seed_len, const char *named_parameters);
+
+int OQS_KEX_lwe_frodo_alice_0_recommended(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len);
+int OQS_KEX_lwe_frodo_bob_recommended(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len);
+int OQS_KEX_lwe_frodo_alice_1_recommended(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len);
+
+void OQS_KEX_lwe_frodo_alice_priv_free(OQS_KEX *k, void *alice_priv);
+void OQS_KEX_lwe_frodo_free(OQS_KEX *k);
+
+#endif
diff --git a/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo_macrify.c b/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo_macrify.c
new file mode 100644
index 0000000000000000000000000000000000000000..5dcba11820244e845eda4eadd03f3373b70cba28
--- /dev/null
+++ b/crypt/liboqs/kex_lwe_frodo/kex_lwe_frodo_macrify.c
@@ -0,0 +1,260 @@
+#if defined(WINDOWS)
+#define strdup _strdup // for strdup deprecation warning
+#endif
+
+OQS_KEX *MACRIFY(OQS_KEX_lwe_frodo_new)(OQS_RAND *rand, const uint8_t *seed, const size_t seed_len, const char *named_parameters) {
+
+	OQS_KEX *k;
+	struct oqs_kex_lwe_frodo_params *params;
+
+	if ((seed_len == 0) || (seed == NULL)) {
+		return NULL;
+	}
+
+	k = malloc(sizeof(OQS_KEX));
+	if (k == NULL) {
+		goto err;
+	}
+	k->named_parameters = NULL;
+	k->method_name = NULL;
+
+	k->params = malloc(sizeof(struct oqs_kex_lwe_frodo_params));
+	if (NULL == k->params) {
+		goto err;
+	}
+	params = (struct oqs_kex_lwe_frodo_params *) k->params;
+	params->cdf_table = NULL;
+	params->seed = NULL;
+	params->param_name = NULL;
+
+	k->rand = rand;
+	k->ctx = NULL;
+	k->alice_priv_free = &OQS_KEX_lwe_frodo_alice_priv_free;
+	k->free = &OQS_KEX_lwe_frodo_free;
+
+	if (strcmp(named_parameters, "recommended") == 0) {
+
+		k->alice_0 = &OQS_KEX_lwe_frodo_alice_0_recommended;
+		k->bob = &OQS_KEX_lwe_frodo_bob_recommended;
+		k->alice_1 = &OQS_KEX_lwe_frodo_alice_1_recommended;
+
+		k->method_name = strdup("LWE Frodo recommended");
+		if (NULL == k->method_name) {
+			goto err;
+		}
+		k->estimated_classical_security = 144;
+		k->estimated_quantum_security = 130;
+		k->named_parameters = strdup(named_parameters);
+		if (k->named_parameters == NULL) {
+			goto err;
+		}
+
+		params->seed = malloc(seed_len);
+		if (NULL == params->seed) {
+			goto err;
+		}
+		memcpy(params->seed, seed, seed_len);
+		params->seed_len = seed_len;
+		params->param_name = strdup("recommended");
+		if (NULL == params->param_name) {
+			goto err;
+		}
+		params->log2_q = PARAMS_LOG2Q;
+		params->q = PARAMS_Q;
+		params->n = PARAMS_N;
+		params->extracted_bits = PARAMS_EXTRACTED_BITS;
+		params->nbar = PARAMS_NBAR;
+		params->key_bits = PARAMS_KEY_BITS;
+		params->rec_hint_len = PARAMS_REC_HINT_LENGTH;
+		params->pub_len = PARAMS_REC_PUB_LENGTH;
+		params->stripe_step = PARAMS_STRIPE_STEP;
+		params->sampler_num = 12;
+		params->cdf_table_len = 6;
+		params->cdf_table = malloc(params->cdf_table_len * sizeof(uint16_t));
+		if (NULL == params->cdf_table) {
+			goto err;
+		}
+		uint16_t cdf_table_tmp[6] = {602, 1521, 1927, 2031, 2046, 2047};
+		memcpy(params->cdf_table, cdf_table_tmp, sizeof(cdf_table_tmp));
+	} else {
+		goto err;
+	}
+	return k;
+err:
+	OQS_KEX_lwe_frodo_free(k);
+	return NULL;
+}
+
+int MACRIFY(OQS_KEX_lwe_frodo_alice_0)(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) {
+
+	int ret;
+
+	struct oqs_kex_lwe_frodo_params *params = (struct oqs_kex_lwe_frodo_params *) k->params;
+
+	*alice_priv = NULL;
+	*alice_msg = NULL;
+
+	/* allocate private key, error, and outgoing message */
+	*alice_priv = malloc(PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
+	if (*alice_priv == NULL) {
+		goto err;
+	}
+	uint16_t b[PARAMS_N * PARAMS_NBAR];
+	uint16_t e[PARAMS_N * PARAMS_NBAR];
+
+	*alice_msg = malloc(PARAMS_REC_PUB_LENGTH);
+	*alice_msg_len = PARAMS_REC_PUB_LENGTH;
+	if (*alice_msg == NULL) {
+		goto err;
+	}
+
+	/* generate S and E */
+	oqs_kex_lwe_frodo_sample_n(*alice_priv, PARAMS_N * PARAMS_NBAR, params, k->rand);
+	oqs_kex_lwe_frodo_sample_n(e, PARAMS_N * PARAMS_NBAR, params, k->rand);
+
+	/* compute B = AS + E */
+	MACRIFY(oqs_kex_lwe_frodo_mul_add_as_plus_e_on_the_fly)
+	(b, *alice_priv, e, params);
+
+	oqs_kex_lwe_frodo_pack(*alice_msg, PARAMS_REC_PUB_LENGTH, b, PARAMS_N * PARAMS_NBAR, PARAMS_LOG2Q);
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	OQS_MEM_cleanse(e, sizeof(e));
+	free(*alice_msg);
+	*alice_msg = NULL;
+	free(*alice_priv);
+	*alice_priv = NULL;
+	ret = 0;
+
+cleanup:
+	return ret;
+}
+
+int MACRIFY(OQS_KEX_lwe_frodo_bob)(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) {
+
+	int ret;
+
+	struct oqs_kex_lwe_frodo_params *params = (struct oqs_kex_lwe_frodo_params *) k->params;
+
+	uint8_t *bob_rec = NULL;
+	*bob_msg = NULL;
+	*key = NULL;
+
+	/* check length of other party's public key */
+	if (alice_msg_len != PARAMS_REC_PUB_LENGTH) {
+		goto err;
+	}
+
+	/* allocate private key, errors, outgoing message, and key */
+	uint16_t bob_priv[PARAMS_N * PARAMS_NBAR];
+	uint16_t bprime[PARAMS_N * PARAMS_NBAR];
+	uint16_t eprime[PARAMS_N * PARAMS_NBAR];
+	uint16_t eprimeprime[PARAMS_N * PARAMS_NBAR];
+	uint16_t b[PARAMS_N * PARAMS_NBAR];
+	uint16_t v[PARAMS_N * PARAMS_NBAR];
+	*bob_msg = malloc(PARAMS_REC_PUB_LENGTH + PARAMS_REC_HINT_LENGTH);
+	if (*bob_msg == NULL) {
+		goto err;
+	}
+	bob_rec = *bob_msg + PARAMS_REC_PUB_LENGTH;
+	*key = malloc(PARAMS_KEY_BYTES);
+	if (*key == NULL) {
+		goto err;
+	}
+
+	/* generate S' and E' */
+	oqs_kex_lwe_frodo_sample_n(bob_priv, PARAMS_N * PARAMS_NBAR, params, k->rand);
+	oqs_kex_lwe_frodo_sample_n(eprime, PARAMS_N * PARAMS_NBAR, params, k->rand);
+
+	/* compute B' = S'A + E' */
+	MACRIFY(oqs_kex_lwe_frodo_mul_add_sa_plus_e_on_the_fly)
+	(bprime, bob_priv, eprime, params);
+
+	oqs_kex_lwe_frodo_pack(*bob_msg, PARAMS_REC_PUB_LENGTH, bprime, PARAMS_N * PARAMS_NBAR, PARAMS_LOG2Q);
+
+	/* generate E'' */
+	oqs_kex_lwe_frodo_sample_n(eprimeprime, PARAMS_NBAR * PARAMS_NBAR, params, k->rand);
+
+	/* unpack B */
+	oqs_kex_lwe_frodo_unpack(b, PARAMS_N * PARAMS_NBAR, alice_msg, alice_msg_len, PARAMS_LOG2Q);
+
+	/* compute V = S'B + E'' */
+	MACRIFY(oqs_kex_lwe_frodo_mul_add_sb_plus_e)
+	(v, b, bob_priv, eprimeprime);
+
+	/* compute C = <V>_{2^B} */
+	MACRIFY(oqs_kex_lwe_frodo_crossround2)
+	(bob_rec, v);
+
+	/* compute K = round(V)_{2^B} */
+	MACRIFY(oqs_kex_lwe_frodo_round2)
+	(*key, v);
+
+	*bob_msg_len = PARAMS_REC_PUB_LENGTH + PARAMS_REC_HINT_LENGTH;
+	*key_len = PARAMS_KEY_BYTES;
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*bob_msg);
+	*bob_msg = NULL;
+	OQS_MEM_secure_free(*key, PARAMS_KEY_BYTES);
+	*key = NULL;
+
+cleanup:
+	OQS_MEM_cleanse(eprime, sizeof(eprime));
+	OQS_MEM_cleanse(eprimeprime, sizeof(eprimeprime));
+	OQS_MEM_cleanse(v, sizeof(v));
+
+	return ret;
+}
+
+int MACRIFY(OQS_KEX_lwe_frodo_alice_1)(UNUSED OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) {
+
+	int ret;
+	*key = NULL;
+
+	/* check length of other party's public key */
+	if (bob_msg_len != PARAMS_REC_PUB_LENGTH + PARAMS_REC_HINT_LENGTH) {
+		goto err;
+	}
+
+	/* allocate working values and session key */
+	uint16_t bprime[PARAMS_N * PARAMS_NBAR];
+	uint16_t w[PARAMS_N * PARAMS_NBAR];
+
+	*key = malloc(PARAMS_KEY_BYTES);
+	if (*key == NULL) {
+		goto err;
+	}
+
+	/* unpack B' */
+	oqs_kex_lwe_frodo_unpack(bprime, PARAMS_N * PARAMS_NBAR, bob_msg, PARAMS_REC_PUB_LENGTH, PARAMS_LOG2Q);
+
+	/* compute W = B'S */
+	MACRIFY(oqs_kex_lwe_frodo_mul_bs)
+	(w, bprime, (uint16_t *) alice_priv);
+
+	/* compute K = rec(B'S, C) */
+	const uint8_t *bob_rec = bob_msg + PARAMS_REC_PUB_LENGTH;
+	MACRIFY(oqs_kex_lwe_frodo_reconcile)
+	(*key, w, bob_rec);
+
+	*key_len = PARAMS_KEY_BYTES;
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	OQS_MEM_secure_free(*key, PARAMS_KEY_BYTES);
+	*key = NULL;
+
+cleanup:
+	return ret;
+}
diff --git a/crypt/liboqs/kex_lwe_frodo/local.h b/crypt/liboqs/kex_lwe_frodo/local.h
new file mode 100644
index 0000000000000000000000000000000000000000..1d8afa47f91904b5d1ce91c237b497869debd1b3
--- /dev/null
+++ b/crypt/liboqs/kex_lwe_frodo/local.h
@@ -0,0 +1,42 @@
+#ifndef _OQS_KEX_LWE_FRODO_LOCAL_H_
+#define _OQS_KEX_LWE_FRODO_LOCAL_H_
+
+#include <stdint.h>
+
+#include <oqs/rand.h>
+
+struct oqs_kex_lwe_frodo_params {
+	uint8_t *seed;
+	size_t seed_len;
+	char *param_name;
+	uint16_t log2_q;
+	uint16_t q;
+	uint16_t n;
+	uint16_t extracted_bits;
+	uint16_t nbar;
+	uint16_t key_bits;
+	uint16_t rec_hint_len;
+	uint32_t pub_len;
+	uint16_t stripe_step;
+	int sampler_num;
+	uint16_t *cdf_table;
+	size_t cdf_table_len;
+};
+
+void oqs_kex_lwe_frodo_crossround2_recommended(unsigned char *out, const uint16_t *in);
+void oqs_kex_lwe_frodo_round2_recommended(unsigned char *out, uint16_t *in);
+void oqs_kex_lwe_frodo_reconcile_recommended(unsigned char *out, uint16_t *w, const unsigned char *hint);
+
+void oqs_kex_lwe_frodo_key_round(uint16_t *vec, const size_t length, const int b);
+void oqs_kex_lwe_frodo_key_round_hints(uint16_t *vec, const size_t length, const int b, const unsigned char *hint);
+void oqs_kex_lwe_frodo_pack(unsigned char *out, const size_t outlen, const uint16_t *in, const size_t inlen, const unsigned char lsb);
+void oqs_kex_lwe_frodo_unpack(uint16_t *out, const size_t outlen, const unsigned char *in, const size_t inlen, const unsigned char lsb);
+
+void oqs_kex_lwe_frodo_sample_n(uint16_t *s, const size_t n, struct oqs_kex_lwe_frodo_params *params, OQS_RAND *rand);
+
+void oqs_kex_lwe_frodo_mul_add_as_plus_e_on_the_fly_recommended(uint16_t *b, const uint16_t *s, const uint16_t *e, struct oqs_kex_lwe_frodo_params *params);
+void oqs_kex_lwe_frodo_mul_add_sa_plus_e_on_the_fly_recommended(uint16_t *b, const uint16_t *s, const uint16_t *e, struct oqs_kex_lwe_frodo_params *params);
+void oqs_kex_lwe_frodo_mul_add_sb_plus_e_recommended(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e);
+void oqs_kex_lwe_frodo_mul_bs_recommended(uint16_t *out, const uint16_t *b, const uint16_t *s);
+
+#endif /* _OQS_KEX_RLWE_BCNS15_LOCAL_H_ */
diff --git a/crypt/liboqs/kex_lwe_frodo/lwe.c b/crypt/liboqs/kex_lwe_frodo/lwe.c
new file mode 100644
index 0000000000000000000000000000000000000000..ec64b57e8a845b0e17c6667e747dfa773352c4f5
--- /dev/null
+++ b/crypt/liboqs/kex_lwe_frodo/lwe.c
@@ -0,0 +1,150 @@
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "local.h"
+
+#include <oqs/aes.h>
+
+#define min(x, y) (((x) < (y)) ? (x) : (y))
+
+// round all elements of a vector to the nearest multiple of 2^b
+void oqs_kex_lwe_frodo_key_round(uint16_t *vec, const size_t length, const int b) {
+	size_t i;
+	uint16_t negmask = ~((1 << b) - 1);
+	uint16_t half = b > 0 ? 1 << (b - 1) : 0;
+	for (i = 0; i < length; i++) {
+		vec[i] = (vec[i] + half) & negmask;
+	}
+}
+
+// Round all elements of a vector to the multiple of 2^b, with a hint for the
+// direction of rounding when close to the boundary.
+void oqs_kex_lwe_frodo_key_round_hints(uint16_t *vec, const size_t length, const int b, const unsigned char *hint) {
+	size_t i;
+	uint16_t whole = 1 << b;
+	uint16_t mask = whole - 1;
+	uint16_t negmask = ~mask;
+	uint16_t half = 1 << (b - 1);
+	uint16_t quarter = 1 << (b - 2);
+
+	for (i = 0; i < length; i++) {
+		uint16_t remainder = vec[i] & mask;
+		uint16_t use_hint = ((remainder + quarter) >> (b - 1)) & 0x1;
+
+		unsigned char h = (hint[i / 8] >> (i % 8)) % 2; // the hint
+		uint16_t shift = use_hint * (2 * h - 1) * quarter;
+
+		// if use_hint = 1 and h = 0, adding -quarter forces rounding down
+		//                     h = 1, adding quarter forces rounding up
+
+		vec[i] = (vec[i] + half + shift) & negmask;
+	}
+}
+
+// Pack the input uint16 vector into a char output vector, copying lsb bits
+// from each input element. If inlen * lsb / 8 > outlen, only outlen * 8 bits
+// are copied.
+void oqs_kex_lwe_frodo_pack(unsigned char *out, const size_t outlen, const uint16_t *in, const size_t inlen, const unsigned char lsb) {
+	memset(out, 0, outlen);
+
+	size_t i = 0;           // whole bytes already filled in
+	size_t j = 0;           // whole uint16_t already copied
+	uint16_t w = 0;         // the leftover, not yet copied
+	unsigned char bits = 0; // the number of lsb in w
+	while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
+		/*
+		in: |        |        |********|********|
+		                      ^
+		                      j
+		w : |   ****|
+		        ^
+		       bits
+		out:|**|**|**|**|**|**|**|**|* |
+		                            ^^
+		                            ib
+		*/
+		unsigned char b = 0; // bits in out[i] already filled in
+		while (b < 8) {
+			int nbits = min(8 - b, bits);
+			uint16_t mask = (1 << nbits) - 1;
+			unsigned char t = (w >> (bits - nbits)) & mask; // the bits to copy from w to out
+			out[i] += t << (8 - b - nbits);
+			b += nbits;
+			bits -= nbits;
+			w &= ~(mask << bits); // not strictly necessary; mostly for debugging
+
+			if (bits == 0) {
+				if (j < inlen) {
+					w = in[j];
+					bits = lsb;
+					j++;
+				} else {
+					break; // the input vector is exhausted
+				}
+			}
+		}
+		if (b == 8) { // out[i] is filled in
+			i++;
+		}
+	}
+}
+
+// Unpack the input char vector into a uint16_t output vector, copying lsb bits
+// for each output element from input. outlen must be at least ceil(inlen * 8 /
+// lsb).
+void oqs_kex_lwe_frodo_unpack(uint16_t *out, const size_t outlen, const unsigned char *in, const size_t inlen, const unsigned char lsb) {
+	memset(out, 0, outlen * sizeof(uint16_t));
+
+	size_t i = 0;           // whole uint16_t already filled in
+	size_t j = 0;           // whole bytes already copied
+	unsigned char w = 0;    // the leftover, not yet copied
+	unsigned char bits = 0; // the number of lsb bits of w
+	while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
+		/*
+		in: |  |  |  |  |  |  |**|**|...
+		                      ^
+		                      j
+		w : | *|
+		      ^
+		      bits
+		out:|   *****|   *****|   ***  |        |...
+		                      ^   ^
+		                      i   b
+		*/
+		unsigned char b = 0; // bits in out[i] already filled in
+		while (b < lsb) {
+			int nbits = min(lsb - b, bits);
+			uint16_t mask = (1 << nbits) - 1;
+			unsigned char t = (w >> (bits - nbits)) & mask; // the bits to copy from w to out
+			out[i] += t << (lsb - b - nbits);
+			b += nbits;
+			bits -= nbits;
+			w &= ~(mask << bits); // not strictly necessary; mostly for debugging
+
+			if (bits == 0) {
+				if (j < inlen) {
+					w = in[j];
+					bits = 8;
+					j++;
+				} else {
+					break; // the input vector is exhausted
+				}
+			}
+		}
+		if (b == lsb) { // out[i] is filled in
+			i++;
+		}
+	}
+}
+
+// define parameters for "recommended" parameter set
+#include "recommended.h"
+// pre-process code to obtain "recommended" functions
+#define MACRIFY(NAME) NAME##_recommended
+#include "lwe_macrify.c"
+// undefine macros to avoid any confusion later
+#include "recommended.h"
+#undef MACRIFY
diff --git a/crypt/liboqs/kex_lwe_frodo/lwe_macrify.c b/crypt/liboqs/kex_lwe_frodo/lwe_macrify.c
new file mode 100644
index 0000000000000000000000000000000000000000..f39b59ba404cc42a5fc4fe901edff7bada2fb090
--- /dev/null
+++ b/crypt/liboqs/kex_lwe_frodo/lwe_macrify.c
@@ -0,0 +1,160 @@
+// [.]_2
+void MACRIFY(oqs_kex_lwe_frodo_round2)(unsigned char *out, uint16_t *in) {
+	oqs_kex_lwe_frodo_key_round(in, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOG2Q - PARAMS_EXTRACTED_BITS);
+	for (int i = 0; i < PARAMS_NBAR * PARAMS_NBAR; i++) {
+		in[i] >>= PARAMS_LOG2Q - PARAMS_EXTRACTED_BITS; // drop bits that were zeroed out
+	}
+
+	// out should have enough space for the key
+	oqs_kex_lwe_frodo_pack(out, PARAMS_KEY_BITS / 8, in, PARAMS_NBAR * PARAMS_NBAR, PARAMS_EXTRACTED_BITS);
+}
+
+void MACRIFY(oqs_kex_lwe_frodo_crossround2)(unsigned char *out, const uint16_t *in) {
+	// out should have enough space for N_BAR * N_BAR bits
+	memset((unsigned char *) out, 0, PARAMS_REC_HINT_LENGTH);
+
+	uint16_t whole = 1 << (PARAMS_LOG2Q - PARAMS_EXTRACTED_BITS);
+	uint16_t half = whole >> 1;
+	uint16_t mask = whole - 1;
+
+	for (int i = 0; i < PARAMS_NBAR * PARAMS_NBAR; i++) {
+		uint16_t remainder = in[i] & mask;
+		out[i / 8] += (remainder >= half) << (i % 8);
+	}
+}
+
+void MACRIFY(oqs_kex_lwe_frodo_reconcile)(unsigned char *out, uint16_t *w, const unsigned char *hint) {
+	oqs_kex_lwe_frodo_key_round_hints(w, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOG2Q - PARAMS_EXTRACTED_BITS, hint);
+	for (int i = 0; i < PARAMS_NBAR * PARAMS_NBAR; i++) {
+		w[i] >>= PARAMS_LOG2Q - PARAMS_EXTRACTED_BITS; // drop bits that were zeroed out
+	}
+	oqs_kex_lwe_frodo_pack(out, PARAMS_KEY_BITS / 8, w, PARAMS_NBAR * PARAMS_NBAR, PARAMS_EXTRACTED_BITS);
+}
+
+// Generate-and-multiply: generate A row-wise, multiply by s on the right.
+void MACRIFY(oqs_kex_lwe_frodo_mul_add_as_plus_e_on_the_fly)(uint16_t *out, const uint16_t *s, const uint16_t *e, struct oqs_kex_lwe_frodo_params *params) {
+	// A (N x N)
+	// s,e (N x N_BAR)
+	// out = A * s + e (N x N_BAR)
+
+	memcpy(out, e, PARAMS_NBAR * PARAMS_N * sizeof(uint16_t));
+
+	// transpose s to store it in the column-major order
+	uint16_t s_transpose[PARAMS_NBAR * PARAMS_N];
+	for (int j = 0; j < PARAMS_N; j++) {
+		for (int k = 0; k < PARAMS_NBAR; k++) {
+			s_transpose[k * PARAMS_N + j] = s[j * PARAMS_NBAR + k];
+		}
+	}
+
+	assert(params->seed_len == 16);
+	void *aes_key_schedule = NULL;
+	OQS_AES128_load_schedule(params->seed, &aes_key_schedule, 1);
+
+	for (int i = 0; i < PARAMS_N; i++) {
+		uint16_t a_row[PARAMS_N] = {0};
+		// go through A's rows
+		for (int j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) {
+			// Loading values in the little-endian order!
+			a_row[j] = i;
+			a_row[j + 1] = j;
+		}
+
+		OQS_AES128_ECB_enc_sch((uint8_t *) a_row, sizeof(a_row), aes_key_schedule, (uint8_t *) a_row);
+
+		for (int k = 0; k < PARAMS_NBAR; k++) {
+			uint16_t sum = 0;
+			for (int j = 0; j < PARAMS_N; j++) {
+				// matrix-vector multiplication happens here
+				sum += a_row[j] * s_transpose[k * PARAMS_N + j];
+			}
+			out[i * PARAMS_NBAR + k] += sum;
+			//Equivalent to %= PARAMS_Q if PARAMS_Q is a power of 2
+			out[i * PARAMS_NBAR + k] &= PARAMS_Q - 1;
+		}
+	}
+
+	OQS_AES128_free_schedule(aes_key_schedule);
+}
+
+// Generate-and-multiply: generate A column-wise, multiply by s' on the left.
+void MACRIFY(oqs_kex_lwe_frodo_mul_add_sa_plus_e_on_the_fly)(uint16_t *out, const uint16_t *s, const uint16_t *e, struct oqs_kex_lwe_frodo_params *params) {
+	// a (N x N)
+	// s',e' (N_BAR x N)
+	// out = s'a + e' (N_BAR x N)
+
+	memcpy(out, e, PARAMS_NBAR * PARAMS_N * sizeof(uint16_t));
+
+	assert(params->seed_len == 16);
+
+	void *aes_key_schedule = NULL;
+	OQS_AES128_load_schedule(params->seed, &aes_key_schedule, 1);
+
+	for (int kk = 0; kk < PARAMS_N; kk += PARAMS_STRIPE_STEP) {
+		// Go through A's columns, 8 (== PARAMS_STRIPE_STEP) columns at a time.
+		// a_cols stores 8 columns of A at a time.
+		uint16_t a_cols[PARAMS_N * PARAMS_STRIPE_STEP] = {0};
+		for (int i = 0; i < PARAMS_N; i++) {
+			// Loading values in the little-endian order!
+			a_cols[i * PARAMS_STRIPE_STEP] = i;
+			a_cols[i * PARAMS_STRIPE_STEP + 1] = kk;
+		}
+
+		OQS_AES128_ECB_enc_sch((uint8_t *) a_cols, sizeof(a_cols), aes_key_schedule, (uint8_t *) a_cols);
+
+		// transpose a_cols to have access to it in the column-major order.
+		uint16_t a_cols_t[PARAMS_N * PARAMS_STRIPE_STEP];
+		for (int i = 0; i < PARAMS_N; i++) {
+			for (int k = 0; k < PARAMS_STRIPE_STEP; k++) {
+				a_cols_t[k * PARAMS_N + i] = a_cols[i * PARAMS_STRIPE_STEP + k];
+			}
+		}
+
+		for (int i = 0; i < PARAMS_NBAR; i++) {
+			for (int k = 0; k < PARAMS_STRIPE_STEP; k++) {
+				uint16_t sum = 0;
+				for (int j = 0; j < PARAMS_N; j++) {
+					sum += s[i * PARAMS_N + j] * a_cols_t[k * PARAMS_N + j];
+				}
+				out[i * PARAMS_N + kk + k] += sum;
+				out[i * PARAMS_N + kk + k] &= PARAMS_Q - 1; //Works as long as PARAMS_Q is a power of 2
+			}
+		}
+	}
+	OQS_AES128_free_schedule(aes_key_schedule);
+}
+
+// multiply by s on the right
+void MACRIFY(oqs_kex_lwe_frodo_mul_bs)(uint16_t *out, const uint16_t *b, const uint16_t *s) {
+	// b (N_BAR x N)
+	// s (N x N_BAR)
+	// out = bs
+	for (int i = 0; i < PARAMS_NBAR; i++) {
+		for (int j = 0; j < PARAMS_NBAR; j++) {
+			uint16_t sum = 0;
+			for (int k = 0; k < PARAMS_N; k++) {
+				sum += b[i * PARAMS_N + k] * s[k * PARAMS_NBAR + j];
+			}
+			out[i * PARAMS_NBAR + j] = sum & (PARAMS_Q - 1);
+		}
+	}
+}
+
+// multiply by s on the left
+void MACRIFY(oqs_kex_lwe_frodo_mul_add_sb_plus_e)(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e) {
+	// b (N x N_BAR)
+	// s (N_BAR x N)
+	// e (N_BAR x N_BAR)
+	// out = sb + e
+	memcpy(out, e, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
+	for (int k = 0; k < PARAMS_NBAR; k++) {
+		for (int i = 0; i < PARAMS_NBAR; i++) {
+			uint16_t sum = 0;
+			for (int j = 0; j < PARAMS_N; j++) {
+				sum += s[k * PARAMS_N + j] * b[j * PARAMS_NBAR + i];
+			}
+			out[k * PARAMS_NBAR + i] += sum;
+			out[k * PARAMS_NBAR + i] &= PARAMS_Q - 1; // not really necessary since LWE_Q is a power of 2.
+		}
+	}
+}
diff --git a/crypt/liboqs/kex_lwe_frodo/lwe_noise.c b/crypt/liboqs/kex_lwe_frodo/lwe_noise.c
new file mode 100644
index 0000000000000000000000000000000000000000..4dc4dc4ce2c28d81f24a59bf850b84fac3d7c542
--- /dev/null
+++ b/crypt/liboqs/kex_lwe_frodo/lwe_noise.c
@@ -0,0 +1,136 @@
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <oqs/common.h>
+#include <oqs/rand.h>
+
+#include "local.h"
+
+#define RECOMMENDED_N_ARRAY_SIZE (752 * 8)
+#define RECOMMENDED_CDF_TABLE_LEN 6
+#if defined(WINDOWS)
+// VS complains about arrays initialized with const param. On Windows,
+// we use directly the recommended value passed down from calling functions.
+// Currently there is only one set of params, so that works. Need to fix this
+// in a more general setting (TODO).
+#define IS_WINDOWS(windows, nix) (windows)
+#else
+#define IS_WINDOWS(windows, nix) (nix)
+#endif
+
+static void lwe_sample_n_inverse_8(uint16_t *s, const size_t n, const uint8_t *cdf_table, const size_t cdf_table_len, OQS_RAND *rand) {
+	/* Fills vector s with n samples from the noise distribution which requires
+	 * 8 bits to sample. The distribution is specified by its CDF. Super-constant
+	 * timing: the CDF table is ingested for every sample.
+	 */
+
+	uint8_t rndvec[IS_WINDOWS(RECOMMENDED_N_ARRAY_SIZE, n)];
+	OQS_RAND_n(rand, rndvec, sizeof(rndvec));
+
+	for (size_t i = 0; i < n; ++i) {
+		uint8_t sample = 0;
+		uint8_t rnd = rndvec[i] >> 1;   // drop the least significant bit
+		uint8_t sign = rndvec[i] & 0x1; // pick the least significant bit
+
+		// No need to compare with the last value.
+		for (int64_t j = 0; j < (int64_t)(cdf_table_len - 1); j++) {
+			// Constant time comparison: 1 if cdf_table[j] < rnd, 0 otherwise.
+			// Critically uses the fact that cdf_table[j] and rnd fit in 7 bits.
+			sample += (uint8_t)(cdf_table[j] - rnd) >> 7;
+		}
+		// Assuming that sign is either 0 or 1, flips sample iff sign = 1
+		s[i] = ((-sign) ^ sample) + sign;
+	}
+	OQS_MEM_cleanse(rndvec, sizeof(rndvec));
+}
+
+static void lwe_sample_n_inverse_12(uint16_t *s, const size_t n, const uint16_t *cdf_table, const size_t cdf_table_len, OQS_RAND *rand) {
+	/* Fills vector s with n samples from the noise distribution which requires
+	 * 12 bits to sample. The distribution is specified by its CDF. Super-constant
+	 * timing: the CDF table is ingested for every sample.
+	 */
+
+	uint8_t rnd[3 * ((IS_WINDOWS(RECOMMENDED_N_ARRAY_SIZE, n) + 1) / 2)]; // 12 bits of unif randomness per output element
+	OQS_RAND_n(rand, rnd, sizeof(rnd));
+
+	for (size_t i = 0; i < n; i += 2) { // two output elements at a time
+		uint8_t *pRnd = (rnd + 3 * i / 2);
+
+		uint16_t rnd1 = (((pRnd[0] << 8) + pRnd[1]) & 0xFFE0) >> 5; // first 11 bits (0..10)
+		uint16_t rnd2 = (((pRnd[1] << 8) + pRnd[2]) & 0x1FFC) >> 2; // next 11 bits (11..21)
+
+		uint8_t sample1 = 0;
+		uint8_t sample2 = 0;
+
+		// No need to compare with the last value.
+		for (size_t j = 0; j < cdf_table_len - 1; j++) {
+			// Constant time comparison: 1 if LWE_CDF_TABLE[j] < rnd1, 0 otherwise.
+			// Critically uses the fact that LWE_CDF_TABLE[j] and rnd1 fit in 15 bits.
+			sample1 += (uint16_t)(cdf_table[j] - rnd1) >> 15;
+			sample2 += (uint16_t)(cdf_table[j] - rnd2) >> 15;
+		}
+
+		uint8_t sign1 = (pRnd[2] & 0x02) >> 1; // 22nd bit
+		uint8_t sign2 = pRnd[2] & 0x01;        // 23rd bit
+
+		// Assuming that sign1 is either 0 or 1, flips sample1 iff sign1 = 1
+		s[i] = ((-sign1) ^ sample1) + sign1;
+
+		if (i + 1 < n) {
+			s[i + 1] = ((-sign2) ^ sample2) + sign2;
+		}
+	}
+	OQS_MEM_cleanse(rnd, sizeof(rnd));
+}
+
+static void lwe_sample_n_inverse_16(uint16_t *s, const size_t n, const uint16_t *cdf_table, const size_t cdf_table_len, OQS_RAND *rand) {
+	/* Fills vector s with n samples from the noise distribution which requires
+	 * 16 bits to sample. The distribution is specified by its CDF. Super-constant
+	 * timing: the CDF table is ingested for every sample.
+	 */
+
+	uint16_t rndvec[IS_WINDOWS(RECOMMENDED_N_ARRAY_SIZE, n)];
+	OQS_RAND_n(rand, (uint8_t *) rndvec, sizeof(rndvec));
+
+	for (size_t i = 0; i < n; ++i) {
+		uint8_t sample = 0;
+		uint16_t rnd = rndvec[i] >> 1;  // drop the least significant bit
+		uint8_t sign = rndvec[i] & 0x1; // pick the least significant bit
+
+		// No need to compare with the last value.
+		for (size_t j = 0; j < cdf_table_len - 1; j++) {
+			// Constant time comparison: 1 if LWE_CDF_TABLE[j] < rnd, 0 otherwise.
+			// Critically uses the fact that LWE_CDF_TABLE[j] and rnd fit in 15 bits.
+			sample += (uint16_t)(cdf_table[j] - rnd) >> 15;
+		}
+		// Assuming that sign is either 0 or 1, flips sample iff sign = 1
+		s[i] = ((-sign) ^ sample) + sign;
+	}
+	OQS_MEM_cleanse(rndvec, sizeof(rndvec));
+}
+
+void oqs_kex_lwe_frodo_sample_n(uint16_t *s, const size_t n, struct oqs_kex_lwe_frodo_params *params, OQS_RAND *rand) {
+	switch (params->sampler_num) {
+	case 8: {
+		// have to copy cdf_table from uint16_t to uint8_t
+		uint8_t cdf_table_8[IS_WINDOWS(RECOMMENDED_CDF_TABLE_LEN, params->cdf_table_len) * sizeof(uint8_t)];
+
+		for (size_t i = 0; i < params->cdf_table_len; i++) {
+			cdf_table_8[i] = (uint8_t) params->cdf_table[i];
+		}
+		lwe_sample_n_inverse_8(s, n, cdf_table_8, params->cdf_table_len, rand);
+	} break;
+	case 12:
+		lwe_sample_n_inverse_12(s, n, params->cdf_table, params->cdf_table_len, rand);
+		break;
+	case 16:
+		lwe_sample_n_inverse_16(s, n, params->cdf_table, params->cdf_table_len, rand);
+		break;
+	default:
+		assert(0); //ERROR
+		break;
+	}
+}
diff --git a/crypt/liboqs/kex_lwe_frodo/recommended.h b/crypt/liboqs/kex_lwe_frodo/recommended.h
new file mode 100644
index 0000000000000000000000000000000000000000..725a94ac53e1cbd701914038f39970fbb90360a1
--- /dev/null
+++ b/crypt/liboqs/kex_lwe_frodo/recommended.h
@@ -0,0 +1,29 @@
+//Recommended parameter set. Include twice to undefine
+
+#ifndef OQS_LWE_FRODO_RECOMMENDED_H
+#define OQS_LWE_FRODO_RECOMMENDED_H
+#define PARAMS_N 752
+#define PARAMS_NBAR 8
+#define PARAMS_LOG2Q 15
+#define PARAMS_Q (1 << PARAMS_LOG2Q)
+#define PARAMS_EXTRACTED_BITS 4
+#define PARAMS_KEY_BITS 256
+#define PARAMS_KEY_BYTES (PARAMS_KEY_BITS >> 3)
+#define PARAMS_STRIPE_STEP 8
+#define LWE_DIV_ROUNDUP(x, y) (((x) + (y) -1) / y)
+#define PARAMS_REC_HINT_LENGTH LWE_DIV_ROUNDUP(PARAMS_NBAR *PARAMS_NBAR, 8)
+#define PARAMS_REC_PUB_LENGTH LWE_DIV_ROUNDUP(PARAMS_N *PARAMS_NBAR *PARAMS_LOG2Q, 8)
+
+#else
+
+#undef OQS_LWE_FRODO_RECOMMENDED_H
+#undef PARAMS_N
+#undef PARAMS_NBAR
+#undef PARAMS_LOG2Q
+#undef PARAMS_Q
+#undef PARAMS_EXTRACTED_BITS
+#undef PARAMS_KEY_BITS
+#undef PARAMS_STRIPE_STEP
+#undef LWE_DIV_ROUNDUP
+#undef PARAMS_REC_HINT_LENGTH
+#endif
diff --git a/crypt/liboqs/kex_mlwe_kyber/LICENSE.txt b/crypt/liboqs/kex_mlwe_kyber/LICENSE.txt
new file mode 100644
index 0000000000000000000000000000000000000000..085eb6bc97c23c738822aad8e591d0efbcccda75
--- /dev/null
+++ b/crypt/liboqs/kex_mlwe_kyber/LICENSE.txt
@@ -0,0 +1,7 @@
+The files in this directory (except kex_mlwe_kyber.*) were originally published in https://github.com/pq-crystals/kyber
+
+
+The following license applies to all files in the src/kex_mlwe_kyber directory.
+
+
+Public domain.
diff --git a/crypt/liboqs/kex_mlwe_kyber/Makefile.am b/crypt/liboqs/kex_mlwe_kyber/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..d486e89a69f502efc8e23e73f7d584c2dd76b4ac
--- /dev/null
+++ b/crypt/liboqs/kex_mlwe_kyber/Makefile.am
@@ -0,0 +1,8 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = libkyber.la
+
+libkyber_la_SOURCES = kex_mlwe_kyber.c
+
+libkyber_la_CPPFLAGS = -I../../include -I.
+libkyber_la_CPPFLAGS += $(AM_CPPFLAGS)
+
diff --git a/crypt/liboqs/kex_mlwe_kyber/indcpa.c b/crypt/liboqs/kex_mlwe_kyber/indcpa.c
new file mode 100644
index 0000000000000000000000000000000000000000..cb15fe687320ad5c90d281460ac8fc208bc34f5c
--- /dev/null
+++ b/crypt/liboqs/kex_mlwe_kyber/indcpa.c
@@ -0,0 +1,179 @@
+#include "params.h"
+#include <oqs/rand.h>
+#include <oqs/sha3.h>
+
+static void pack_pk(unsigned char *r, const polyvec *pk, const unsigned char *seed) {
+	int i;
+	polyvec_compress(r, pk);
+	for (i = 0; i < KYBER_SEEDBYTES; i++)
+		r[i + KYBER_POLYVECCOMPRESSEDBYTES] = seed[i];
+}
+
+static void unpack_pk(polyvec *pk, unsigned char *seed, const unsigned char *packedpk) {
+	int i;
+	polyvec_decompress(pk, packedpk);
+
+	for (i = 0; i < KYBER_SEEDBYTES; i++)
+		seed[i] = packedpk[i + KYBER_POLYVECCOMPRESSEDBYTES];
+}
+
+static void pack_ciphertext(unsigned char *r, const polyvec *b, const poly *v) {
+	polyvec_compress(r, b);
+	poly_compress(r + KYBER_POLYVECCOMPRESSEDBYTES, v);
+}
+
+static void unpack_ciphertext(polyvec *b, poly *v, const unsigned char *c) {
+	polyvec_decompress(b, c);
+	poly_decompress(v, c + KYBER_POLYVECCOMPRESSEDBYTES);
+}
+
+static void pack_sk(unsigned char *r, const polyvec *sk) {
+	polyvec_tobytes(r, sk);
+}
+
+static void unpack_sk(polyvec *sk, const unsigned char *packedsk) {
+	polyvec_frombytes(sk, packedsk);
+}
+
+#define gen_a(A, B) gen_matrix(A, B, 0)
+#define gen_at(A, B) gen_matrix(A, B, 1)
+
+/* Generate entry a_{i,j} of matrix A as Parse(SHAKE128(seed|i|j)) */
+static void gen_matrix(polyvec *a, const unsigned char *seed, int transposed) //XXX: Not static for benchmarking
+{
+	unsigned int pos = 0, ctr;
+	uint16_t val;
+	unsigned int nblocks = 4;
+	uint8_t buf[OQS_SHA3_SHAKE128_RATE * 4]; // was * nblocks, but VS doesn't like this buf init
+	int i, j;
+	uint16_t dsep;
+	uint64_t state[25]; // CSHAKE state
+
+	for (i = 0; i < KYBER_D; i++) {
+		for (j = 0; j < KYBER_D; j++) {
+			ctr = pos = 0;
+			if (transposed)
+				dsep = j + (i << 8);
+			else
+				dsep = i + (j << 8);
+
+			OQS_SHA3_cshake128_simple_absorb(state, dsep, seed, KYBER_SEEDBYTES);
+			OQS_SHA3_cshake128_simple_squeezeblocks(buf, nblocks, state);
+
+			while (ctr < KYBER_N) {
+				val = (buf[pos] | ((uint16_t) buf[pos + 1] << 8)) & 0x1fff;
+				if (val < KYBER_Q) {
+					a[i].vec[j].coeffs[ctr++] = val;
+				}
+				pos += 2;
+
+				if (pos > OQS_SHA3_SHAKE128_RATE * nblocks - 2) {
+					nblocks = 1;
+					OQS_SHA3_cshake128_simple_squeezeblocks(buf, nblocks, state);
+					pos = 0;
+				}
+			}
+		}
+	}
+}
+
+static void indcpa_keypair(unsigned char *pk,
+                           unsigned char *sk, OQS_RAND *rand) {
+	polyvec a[KYBER_D], e, pkpv, skpv;
+	unsigned char seed[KYBER_SEEDBYTES];
+	unsigned char noiseseed[KYBER_COINBYTES];
+	int i;
+	unsigned char nonce = 0;
+
+	rand->rand_n(rand, seed, KYBER_SEEDBYTES);
+	OQS_SHA3_shake128(seed, KYBER_SEEDBYTES, seed, KYBER_SEEDBYTES); /* Don't send output of system RNG */
+	rand->rand_n(rand, noiseseed, KYBER_COINBYTES);
+
+	gen_a(a, seed);
+
+	for (i = 0; i < KYBER_D; i++)
+		poly_getnoise(skpv.vec + i, noiseseed, nonce++);
+
+	polyvec_ntt(&skpv);
+
+	for (i = 0; i < KYBER_D; i++)
+		poly_getnoise(e.vec + i, noiseseed, nonce++);
+
+	// matrix-vector multiplication
+	for (i = 0; i < KYBER_D; i++)
+		polyvec_pointwise_acc(&pkpv.vec[i], &skpv, a + i);
+
+	polyvec_invntt(&pkpv);
+	polyvec_add(&pkpv, &pkpv, &e);
+
+	pack_sk(sk, &skpv);
+	pack_pk(pk, &pkpv, seed);
+}
+
+static void indcpa_enc(unsigned char *c,
+                       const unsigned char *m,
+                       const unsigned char *pk,
+                       const unsigned char *coins) {
+	polyvec sp, pkpv, ep, at[KYBER_D], bp;
+	poly v, k, epp;
+	unsigned char seed[KYBER_SEEDBYTES];
+	int i;
+	unsigned char nonce = 0;
+
+	unpack_pk(&pkpv, seed, pk);
+
+	poly_frommsg(&k, m);
+
+	for (i = 0; i < KYBER_D; i++)
+		bitrev_vector(pkpv.vec[i].coeffs);
+	polyvec_ntt(&pkpv);
+
+	gen_at(at, seed);
+
+	for (i = 0; i < KYBER_D; i++)
+		poly_getnoise(sp.vec + i, coins, nonce++);
+
+	polyvec_ntt(&sp);
+
+	for (i = 0; i < KYBER_D; i++)
+		poly_getnoise(ep.vec + i, coins, nonce++);
+
+	// matrix-vector multiplication
+	for (i = 0; i < KYBER_D; i++)
+		polyvec_pointwise_acc(&bp.vec[i], &sp, at + i);
+
+	polyvec_invntt(&bp);
+	polyvec_add(&bp, &bp, &ep);
+
+	polyvec_pointwise_acc(&v, &pkpv, &sp);
+	poly_invntt(&v);
+
+	poly_getnoise(&epp, coins, nonce++);
+
+	poly_add(&v, &v, &epp);
+	poly_add(&v, &v, &k);
+
+	pack_ciphertext(c, &bp, &v);
+}
+
+static void indcpa_dec(unsigned char *m,
+                       const unsigned char *c,
+                       const unsigned char *sk) {
+	polyvec bp, skpv;
+	poly v, mp;
+	size_t i;
+
+	unpack_ciphertext(&bp, &v, c);
+	unpack_sk(&skpv, sk);
+
+	for (i = 0; i < KYBER_D; i++)
+		bitrev_vector(bp.vec[i].coeffs);
+	polyvec_ntt(&bp);
+
+	polyvec_pointwise_acc(&mp, &skpv, &bp);
+	poly_invntt(&mp);
+
+	poly_sub(&mp, &mp, &v);
+
+	poly_tomsg(m, &mp);
+}
diff --git a/crypt/liboqs/kex_mlwe_kyber/kex_mlwe_kyber.c b/crypt/liboqs/kex_mlwe_kyber/kex_mlwe_kyber.c
new file mode 100644
index 0000000000000000000000000000000000000000..a00b5a905f2e5aa75355d9391e1e39d6ea7b0362
--- /dev/null
+++ b/crypt/liboqs/kex_mlwe_kyber/kex_mlwe_kyber.c
@@ -0,0 +1,171 @@
+#if defined(WINDOWS)
+#define UNUSED
+// __attribute__ not supported in VS, is there something else I should define?
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#if !defined(WINDOWS)
+#include <strings.h>
+#include <unistd.h>
+#endif
+
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+#include "kex_mlwe_kyber.h"
+#include "kyber.c"
+#include "params.h"
+
+#if defined(WINDOWS)
+#define strdup _strdup // for strdup deprecation warning
+#endif
+
+OQS_KEX *OQS_KEX_mlwe_kyber_new(OQS_RAND *rand) {
+	OQS_KEX *k = malloc(sizeof(OQS_KEX));
+	if (k == NULL) {
+		return NULL;
+	}
+	k->method_name = strdup("MLWE Kyber");
+	k->estimated_classical_security = 178; // using https://github.com/pq-crystals/kyber/blob/master/scripts/Kyber.py
+	k->estimated_quantum_security = 161;   // using https://github.com/pq-crystals/kyber/blob/master/scripts/Kyber.py
+	k->seed = NULL;
+	k->seed_len = 0;
+	k->named_parameters = 0;
+	k->rand = rand;
+	k->params = NULL;
+	k->alice_0 = &OQS_KEX_mlwe_kyber_alice_0;
+	k->bob = &OQS_KEX_mlwe_kyber_bob;
+	k->alice_1 = &OQS_KEX_mlwe_kyber_alice_1;
+	k->alice_priv_free = &OQS_KEX_mlwe_kyber_alice_priv_free;
+	k->free = &OQS_KEX_mlwe_kyber_free;
+	return k;
+}
+
+int OQS_KEX_mlwe_kyber_alice_0(UNUSED OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) {
+
+	int ret;
+
+	*alice_priv = NULL;
+	*alice_msg = NULL;
+
+	/* allocate public/private key pair */
+	*alice_msg = malloc(KYBER_PUBLICKEYBYTES);
+	if (*alice_msg == NULL) {
+		goto err;
+	}
+	*alice_priv = malloc(KYBER_SECRETKEYBYTES);
+	if (*alice_priv == NULL) {
+		goto err;
+	}
+
+	/* generate public/private key pair */
+	keygen(*alice_msg, (unsigned char *) *alice_priv, k->rand);
+	*alice_msg_len = KYBER_PUBLICKEYBYTES;
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*alice_msg);
+	*alice_msg = NULL;
+	free(*alice_priv);
+	*alice_priv = NULL;
+
+cleanup:
+
+	return ret;
+}
+
+int OQS_KEX_mlwe_kyber_bob(UNUSED OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) {
+
+	int ret;
+
+	*bob_msg = NULL;
+	*key = NULL;
+
+	if (alice_msg_len != KYBER_PUBLICKEYBYTES) {
+		goto err;
+	}
+
+	/* allocate message and session key */
+	*bob_msg = malloc(KYBER_BYTES);
+	if (*bob_msg == NULL) {
+		goto err;
+	}
+	*key = malloc(32);
+	if (*key == NULL) {
+		goto err;
+	}
+
+	/* generate Bob's response */
+	sharedb(*key, *bob_msg, alice_msg, k->rand);
+	*bob_msg_len = KYBER_BYTES;
+	*key_len = 32;
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*bob_msg);
+	*bob_msg = NULL;
+	free(*key);
+	*key = NULL;
+
+cleanup:
+
+	return ret;
+}
+
+int OQS_KEX_mlwe_kyber_alice_1(UNUSED OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) {
+
+	int ret;
+
+	*key = NULL;
+
+	if (bob_msg_len != KYBER_BYTES) {
+		goto err;
+	}
+
+	/* allocate session key */
+	*key = malloc(32);
+	if (*key == NULL) {
+		goto err;
+	}
+
+	/* generate Alice's session key */
+	shareda(*key, (unsigned char *) alice_priv, bob_msg);
+	*key_len = 32;
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*key);
+	*key = NULL;
+
+cleanup:
+
+	return ret;
+}
+
+void OQS_KEX_mlwe_kyber_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) {
+	if (alice_priv) {
+		free(alice_priv);
+	}
+}
+
+void OQS_KEX_mlwe_kyber_free(OQS_KEX *k) {
+	if (k) {
+		free(k->named_parameters);
+		k->named_parameters = NULL;
+		free(k->method_name);
+		k->method_name = NULL;
+	}
+	free(k);
+}
diff --git a/crypt/liboqs/kex_mlwe_kyber/kex_mlwe_kyber.h b/crypt/liboqs/kex_mlwe_kyber/kex_mlwe_kyber.h
new file mode 100644
index 0000000000000000000000000000000000000000..b3a64683080d4121168f98d059618e795835fd47
--- /dev/null
+++ b/crypt/liboqs/kex_mlwe_kyber/kex_mlwe_kyber.h
@@ -0,0 +1,24 @@
+/**
+ * \file kex_mlwe_kyber.h
+ * \brief Header for module-LWE key exchange protocol Kyber
+ */
+
+#ifndef __OQS_KEX_MLWE_KYBER_H
+#define __OQS_KEX_MLWE_KYBER_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+OQS_KEX *OQS_KEX_mlwe_kyber_new(OQS_RAND *rand);
+
+int OQS_KEX_mlwe_kyber_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len);
+int OQS_KEX_mlwe_kyber_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len);
+int OQS_KEX_mlwe_kyber_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len);
+
+void OQS_KEX_mlwe_kyber_alice_priv_free(OQS_KEX *k, void *alice_priv);
+void OQS_KEX_mlwe_kyber_free(OQS_KEX *k);
+
+#endif
diff --git a/crypt/liboqs/kex_mlwe_kyber/kyber.c b/crypt/liboqs/kex_mlwe_kyber/kyber.c
new file mode 100644
index 0000000000000000000000000000000000000000..36cc119027db28d5da6876ae4f04fcb03b5b1fc8
--- /dev/null
+++ b/crypt/liboqs/kex_mlwe_kyber/kyber.c
@@ -0,0 +1,86 @@
+#include <stdint.h>
+
+#include <oqs/sha3.h>
+#include <oqs/rand.h>
+
+// clang-format off
+// (order of include matters)
+#include "precomp.c"
+#include "reduce.c"
+#include "verify.c"
+#include "ntt.c"
+#include "poly.c"
+#include "polyvec.c"
+#include "indcpa.c"
+// clang-format on
+
+// API FUNCTIONS
+
+/* Build a CCA-secure KEM from an IND-CPA-secure encryption scheme */
+
+static void keygen(unsigned char *pk, unsigned char *sk, OQS_RAND *rand) {
+	size_t i;
+	indcpa_keypair(pk, sk, rand);
+	for (i = 0; i < KYBER_INDCPA_PUBLICKEYBYTES; i++)
+		sk[i + KYBER_INDCPA_SECRETKEYBYTES] = pk[i];
+	OQS_SHA3_shake128(sk + KYBER_SECRETKEYBYTES - 64, 32, pk, KYBER_PUBLICKEYBYTES);
+	rand->rand_n(rand, sk + KYBER_SECRETKEYBYTES - KYBER_SHAREDKEYBYTES, KYBER_SHAREDKEYBYTES); /* Value z for pseudo-random output on reject */
+}
+
+static void sharedb(unsigned char *sharedkey, unsigned char *send,
+                    const unsigned char *received, OQS_RAND *rand) {
+	unsigned char krq[96]; /* Will contain key, coins, qrom-hash */
+	unsigned char buf[64];
+	int i;
+
+	rand->rand_n(rand, buf, 32);
+	OQS_SHA3_shake128(buf, 32, buf, 32); /* Don't release system RNG output */
+
+	OQS_SHA3_shake128(buf + 32, 32, received, KYBER_PUBLICKEYBYTES); /* Multitarget countermeasure for coins + contributory KEM */
+	OQS_SHA3_shake128(krq, 96, buf, 64);
+
+	indcpa_enc(send, buf, received, krq + 32); /* coins are in krq+32 */
+
+	for (i = 0; i < 32; i++)
+		send[i + KYBER_INDCPA_BYTES] = krq[i + 64];
+
+	OQS_SHA3_shake128(krq + 32, 32, send, KYBER_BYTES); /* overwrite coins in krq with h(c) */
+	OQS_SHA3_shake128(sharedkey, 32, krq, 64);          /* hash concatenation of pre-k and h(c) to k */
+
+#ifndef STATISTICAL_TEST
+	OQS_SHA3_sha3256(sharedkey, sharedkey, 32);
+#endif
+}
+
+static void shareda(unsigned char *sharedkey, const unsigned char *sk,
+                    const unsigned char *received) {
+	int i, fail;
+	unsigned char cmp[KYBER_BYTES];
+	unsigned char buf[64];
+	unsigned char krq[96]; /* Will contain key, coins, qrom-hash */
+	const unsigned char *pk = sk + KYBER_INDCPA_SECRETKEYBYTES;
+
+	indcpa_dec(buf, received, sk);
+
+	// shake128(buf+32, 32, pk, KYBER_PUBLICKEYBYTES); /* Multitarget countermeasure for coins + contributory KEM */
+	for (i = 0; i < 32; i++) /* Save hash by storing h(pk) in sk */
+		buf[32 + i] = sk[KYBER_SECRETKEYBYTES - 64 + i];
+	OQS_SHA3_shake128(krq, 96, buf, 64);
+
+	indcpa_enc(cmp, buf, pk, krq + 32); /* coins are in krq+32 */
+
+	for (i = 0; i < 32; i++)
+		cmp[i + KYBER_INDCPA_BYTES] = krq[i + 64];
+
+	fail = verify(received, cmp, KYBER_BYTES);
+
+	OQS_SHA3_shake128(krq + 32, 32, received, KYBER_BYTES); /* overwrite coins in krq with h(c)  */
+
+	cmov(krq, sk + KYBER_SECRETKEYBYTES - KYBER_SHAREDKEYBYTES, KYBER_SHAREDKEYBYTES, fail); /* Overwrite pre-k with z on re-encryption failure */
+
+	OQS_SHA3_shake128(sharedkey, 32, krq, 64); /* hash concatenation of pre-k and h(c) to k */
+
+#ifndef STATISTICAL_TEST
+	OQS_SHA3_sha3256(sharedkey, sharedkey, 32);
+#endif
+}
diff --git a/crypt/liboqs/kex_mlwe_kyber/ntt.c b/crypt/liboqs/kex_mlwe_kyber/ntt.c
new file mode 100644
index 0000000000000000000000000000000000000000..14b60c8f7b3619ebdb4c74015487500a46b52598
--- /dev/null
+++ b/crypt/liboqs/kex_mlwe_kyber/ntt.c
@@ -0,0 +1,67 @@
+#include "params.h"
+
+static uint16_t bitrev_table[KYBER_N] = {
+    0, 128, 64, 192, 32, 160, 96, 224, 16, 144, 80, 208, 48, 176, 112, 240,
+    8, 136, 72, 200, 40, 168, 104, 232, 24, 152, 88, 216, 56, 184, 120, 248,
+    4, 132, 68, 196, 36, 164, 100, 228, 20, 148, 84, 212, 52, 180, 116, 244,
+    12, 140, 76, 204, 44, 172, 108, 236, 28, 156, 92, 220, 60, 188, 124, 252,
+    2, 130, 66, 194, 34, 162, 98, 226, 18, 146, 82, 210, 50, 178, 114, 242,
+    10, 138, 74, 202, 42, 170, 106, 234, 26, 154, 90, 218, 58, 186, 122, 250,
+    6, 134, 70, 198, 38, 166, 102, 230, 22, 150, 86, 214, 54, 182, 118, 246,
+    14, 142, 78, 206, 46, 174, 110, 238, 30, 158, 94, 222, 62, 190, 126, 254,
+    1, 129, 65, 193, 33, 161, 97, 225, 17, 145, 81, 209, 49, 177, 113, 241,
+    9, 137, 73, 201, 41, 169, 105, 233, 25, 153, 89, 217, 57, 185, 121, 249,
+    5, 133, 69, 197, 37, 165, 101, 229, 21, 149, 85, 213, 53, 181, 117, 245,
+    13, 141, 77, 205, 45, 173, 109, 237, 29, 157, 93, 221, 61, 189, 125, 253,
+    3, 131, 67, 195, 35, 163, 99, 227, 19, 147, 83, 211, 51, 179, 115, 243,
+    11, 139, 75, 203, 43, 171, 107, 235, 27, 155, 91, 219, 59, 187, 123, 251,
+    7, 135, 71, 199, 39, 167, 103, 231, 23, 151, 87, 215, 55, 183, 119, 247,
+    15, 143, 79, 207, 47, 175, 111, 239, 31, 159, 95, 223, 63, 191, 127, 255,
+};
+
+static void bitrev_vector(uint16_t *poly) {
+	unsigned int i, r;
+	uint16_t tmp;
+
+	for (i = 0; i < KYBER_N; i++) {
+		r = bitrev_table[i];
+		if (i < r) {
+			tmp = poly[i];
+			poly[i] = poly[r];
+			poly[r] = tmp;
+		}
+	}
+}
+
+static void mul_coefficients(uint16_t *poly, const uint16_t *factors) {
+	unsigned int i;
+
+	for (i = 0; i < KYBER_N; i++)
+		poly[i] = montgomery_reduce((poly[i] * factors[i]));
+}
+
+/* GS_bo_to_no; omegas need to be in Montgomery domain */
+static void ntt(uint16_t *a, const uint16_t *omega) {
+	int start, j, jTwiddle, level;
+	uint16_t temp, W;
+	uint32_t t;
+
+	for (level = 0; level < 8; level++) {
+		for (start = 0; start < (1 << level); start++) {
+			jTwiddle = 0;
+			for (j = start; j < KYBER_N - 1; j += 2 * (1 << level)) {
+				W = omega[jTwiddle++];
+				temp = a[j];
+
+				if (level & 1) // odd level
+					a[j] = barrett_reduce((temp + a[j + (1 << level)]));
+				else
+					a[j] = (temp + a[j + (1 << level)]); // Omit reduction (be lazy)
+
+				t = (W * ((uint32_t) temp + 4 * KYBER_Q - a[j + (1 << level)]));
+
+				a[j + (1 << level)] = montgomery_reduce(t);
+			}
+		}
+	}
+}
diff --git a/crypt/liboqs/kex_mlwe_kyber/params.h b/crypt/liboqs/kex_mlwe_kyber/params.h
new file mode 100644
index 0000000000000000000000000000000000000000..ba92261027ba72fb3190212a8961f2c34c193a2e
--- /dev/null
+++ b/crypt/liboqs/kex_mlwe_kyber/params.h
@@ -0,0 +1,37 @@
+#ifndef PARAMS_H
+#define PARAMS_H
+
+#define KYBER_N 256
+#define KYBER_D 3
+#define KYBER_K 4 /* used in sampler */
+#define KYBER_Q 7681
+
+#define KYBER_SEEDBYTES 32
+#define KYBER_NOISESEEDBYTES 32
+#define KYBER_COINBYTES 32
+#define KYBER_SHAREDKEYBYTES 32
+
+#define KYBER_POLYBYTES 416
+#define KYBER_POLYCOMPRESSEDBYTES 96
+#define KYBER_POLYVECBYTES (KYBER_D * KYBER_POLYBYTES)
+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_D * 352)
+
+#define KYBER_INDCPA_MSGBYTES 32
+#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_SEEDBYTES)
+#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES)
+#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES)
+
+#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES)
+#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 32 + KYBER_SHAREDKEYBYTES)
+#define KYBER_BYTES (KYBER_INDCPA_BYTES + KYBER_INDCPA_MSGBYTES) /* Second part is for Targhi-Unruh */
+
+extern uint16_t oqs_kex_mlwe_kyber_omegas_montgomery[];
+extern uint16_t oqs_kex_mlwe_kyber_omegas_inv_bitrev_montgomery[];
+extern uint16_t oqs_kex_mlwe_kyber_psis_inv_montgomery[];
+extern uint16_t oqs_kex_mlwe_kyber_psis_bitrev_montgomery[];
+
+#if defined(WINDOWS)
+typedef unsigned __int16 uint16_t;
+#endif
+
+#endif
diff --git a/crypt/liboqs/kex_mlwe_kyber/poly.c b/crypt/liboqs/kex_mlwe_kyber/poly.c
new file mode 100644
index 0000000000000000000000000000000000000000..dc19b217cb7f24f43c22c4275f123580854bbccf
--- /dev/null
+++ b/crypt/liboqs/kex_mlwe_kyber/poly.c
@@ -0,0 +1,169 @@
+#include "params.h"
+#include <oqs/sha3.h>
+
+typedef struct {
+	uint16_t coeffs[KYBER_N];
+#if defined(WINDOWS)
+} poly;
+#else
+} poly __attribute__((aligned(32)));
+#endif
+
+/* include cbd.c */
+static uint32_t load_littleendian(const unsigned char *x) {
+	return x[0] | (((uint32_t) x[1]) << 8) | (((uint32_t) x[2]) << 16) | (((uint32_t) x[3]) << 24);
+}
+
+static void cbd(poly *r, const unsigned char *buf) {
+#if KYBER_K != 4
+#error "poly_getnoise in poly.c only supports k=4"
+#endif
+
+	uint32_t t, d, a[4], b[4];
+	int i, j;
+
+	for (i = 0; i < KYBER_N / 4; i++) {
+		t = load_littleendian(buf + 4 * i);
+		d = 0;
+		for (j = 0; j < 4; j++)
+			d += (t >> j) & 0x11111111;
+
+		a[0] = d & 0xf;
+		b[0] = (d >> 4) & 0xf;
+		a[1] = (d >> 8) & 0xf;
+		b[1] = (d >> 12) & 0xf;
+		a[2] = (d >> 16) & 0xf;
+		b[2] = (d >> 20) & 0xf;
+		a[3] = (d >> 24) & 0xf;
+		b[3] = (d >> 28);
+
+		r->coeffs[4 * i + 0] = a[0] + KYBER_Q - b[0];
+		r->coeffs[4 * i + 1] = a[1] + KYBER_Q - b[1];
+		r->coeffs[4 * i + 2] = a[2] + KYBER_Q - b[2];
+		r->coeffs[4 * i + 3] = a[3] + KYBER_Q - b[3];
+	}
+}
+/* end cbd.c */
+
+static void poly_compress(unsigned char *r, const poly *a) {
+	uint32_t t[8];
+	unsigned int i, j, k = 0;
+
+	for (i = 0; i < KYBER_N; i += 8) {
+		for (j = 0; j < 8; j++)
+			t[j] = (((freeze(a->coeffs[i + j]) << 3) + KYBER_Q / 2) / KYBER_Q) & 7;
+
+		r[k] = t[0] | (t[1] << 3) | (t[2] << 6);
+		r[k + 1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7);
+		r[k + 2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5);
+		k += 3;
+	}
+}
+
+static void poly_decompress(poly *r, const unsigned char *a) {
+	unsigned int i;
+	for (i = 0; i < KYBER_N; i += 8) {
+		r->coeffs[i + 0] = (((a[0] & 7) * KYBER_Q) + 4) >> 3;
+		r->coeffs[i + 1] = ((((a[0] >> 3) & 7) * KYBER_Q) + 4) >> 3;
+		r->coeffs[i + 2] = ((((a[0] >> 6) | ((a[1] << 2) & 4)) * KYBER_Q) + 4) >> 3;
+		r->coeffs[i + 3] = ((((a[1] >> 1) & 7) * KYBER_Q) + 4) >> 3;
+		r->coeffs[i + 4] = ((((a[1] >> 4) & 7) * KYBER_Q) + 4) >> 3;
+		r->coeffs[i + 5] = ((((a[1] >> 7) | ((a[2] << 1) & 6)) * KYBER_Q) + 4) >> 3;
+		r->coeffs[i + 6] = ((((a[2] >> 2) & 7) * KYBER_Q) + 4) >> 3;
+		r->coeffs[i + 7] = ((((a[2] >> 5)) * KYBER_Q) + 4) >> 3;
+		a += 3;
+	}
+}
+
+static void poly_tobytes(unsigned char *r, const poly *a) {
+	int i, j;
+	uint16_t t[8];
+
+	for (i = 0; i < KYBER_N / 8; i++) {
+		for (j = 0; j < 8; j++)
+			t[j] = freeze(a->coeffs[8 * i + j]);
+
+		r[13 * i + 0] = t[0] & 0xff;
+		r[13 * i + 1] = (t[0] >> 8) | ((t[1] & 0x07) << 5);
+		r[13 * i + 2] = (t[1] >> 3) & 0xff;
+		r[13 * i + 3] = (t[1] >> 11) | ((t[2] & 0x3f) << 2);
+		r[13 * i + 4] = (t[2] >> 6) | ((t[3] & 0x01) << 7);
+		r[13 * i + 5] = (t[3] >> 1) & 0xff;
+		r[13 * i + 6] = (t[3] >> 9) | ((t[4] & 0x0f) << 4);
+		r[13 * i + 7] = (t[4] >> 4) & 0xff;
+		r[13 * i + 8] = (t[4] >> 12) | ((t[5] & 0x7f) << 1);
+		r[13 * i + 9] = (t[5] >> 7) | ((t[6] & 0x03) << 6);
+		r[13 * i + 10] = (t[6] >> 2) & 0xff;
+		r[13 * i + 11] = (t[6] >> 10) | ((t[7] & 0x1f) << 3);
+		r[13 * i + 12] = (t[7] >> 5);
+	}
+}
+
+static void poly_frombytes(poly *r, const unsigned char *a) {
+	int i;
+	for (i = 0; i < KYBER_N / 8; i++) {
+		r->coeffs[8 * i + 0] = a[13 * i + 0] | (((uint16_t) a[13 * i + 1] & 0x1f) << 8);
+		r->coeffs[8 * i + 1] = (a[13 * i + 1] >> 5) | (((uint16_t) a[13 * i + 2]) << 3) | (((uint16_t) a[13 * i + 3] & 0x03) << 11);
+		r->coeffs[8 * i + 2] = (a[13 * i + 3] >> 2) | (((uint16_t) a[13 * i + 4] & 0x7f) << 6);
+		r->coeffs[8 * i + 3] = (a[13 * i + 4] >> 7) | (((uint16_t) a[13 * i + 5]) << 1) | (((uint16_t) a[13 * i + 6] & 0x0f) << 9);
+		r->coeffs[8 * i + 4] = (a[13 * i + 6] >> 4) | (((uint16_t) a[13 * i + 7]) << 4) | (((uint16_t) a[13 * i + 8] & 0x01) << 12);
+		r->coeffs[8 * i + 5] = (a[13 * i + 8] >> 1) | (((uint16_t) a[13 * i + 9] & 0x3f) << 7);
+		r->coeffs[8 * i + 6] = (a[13 * i + 9] >> 6) | (((uint16_t) a[13 * i + 10]) << 2) | (((uint16_t) a[13 * i + 11] & 0x07) << 10);
+		r->coeffs[8 * i + 7] = (a[13 * i + 11] >> 3) | (((uint16_t) a[13 * i + 12]) << 5);
+	}
+}
+
+static void poly_getnoise(poly *r, const unsigned char *seed, unsigned char nonce) {
+	unsigned char buf[KYBER_N];
+
+	OQS_SHA3_cshake128_simple(buf, KYBER_N, nonce, seed, KYBER_NOISESEEDBYTES);
+
+	cbd(r, buf);
+}
+
+static void poly_ntt(poly *r) {
+	mul_coefficients(r->coeffs, oqs_kex_mlwe_kyber_psis_bitrev_montgomery);
+	ntt(r->coeffs, oqs_kex_mlwe_kyber_omegas_montgomery);
+}
+
+static void poly_invntt(poly *r) {
+	bitrev_vector(r->coeffs);
+	ntt(r->coeffs, oqs_kex_mlwe_kyber_omegas_inv_bitrev_montgomery);
+	mul_coefficients(r->coeffs, oqs_kex_mlwe_kyber_psis_inv_montgomery);
+}
+
+static void poly_add(poly *r, const poly *a, const poly *b) {
+	int i;
+	for (i = 0; i < KYBER_N; i++)
+		r->coeffs[i] = barrett_reduce(a->coeffs[i] + b->coeffs[i]);
+}
+
+static void poly_sub(poly *r, const poly *a, const poly *b) {
+	int i;
+	for (i = 0; i < KYBER_N; i++)
+		r->coeffs[i] = barrett_reduce(a->coeffs[i] + 3 * KYBER_Q - b->coeffs[i]);
+}
+
+static void poly_frommsg(poly *r, const unsigned char msg[KYBER_SHAREDKEYBYTES]) {
+	uint16_t i, j, mask;
+
+	for (i = 0; i < KYBER_SHAREDKEYBYTES; i++) {
+		for (j = 0; j < 8; j++) {
+			mask = -((msg[i] >> j) & 1);
+			r->coeffs[8 * i + j] = mask & ((KYBER_Q + 1) / 2);
+		}
+	}
+}
+
+static void poly_tomsg(unsigned char msg[KYBER_SHAREDKEYBYTES], const poly *a) {
+	uint16_t t;
+	int i, j;
+
+	for (i = 0; i < KYBER_SHAREDKEYBYTES; i++) {
+		msg[i] = 0;
+		for (j = 0; j < 8; j++) {
+			t = (((freeze(a->coeffs[8 * i + j]) << 1) + KYBER_Q / 2) / KYBER_Q) & 1;
+			msg[i] |= t << j;
+		}
+	}
+}
diff --git a/crypt/liboqs/kex_mlwe_kyber/polyvec.c b/crypt/liboqs/kex_mlwe_kyber/polyvec.c
new file mode 100644
index 0000000000000000000000000000000000000000..4f9d08ee95500c76f102e71b833642687958d821
--- /dev/null
+++ b/crypt/liboqs/kex_mlwe_kyber/polyvec.c
@@ -0,0 +1,196 @@
+#include "params.h"
+
+typedef struct {
+	poly vec[KYBER_D];
+#if defined(WINDOWS)
+} polyvec;
+#else
+} polyvec __attribute__((aligned(32)));
+#endif
+
+#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_D * 352))
+static void polyvec_compress(unsigned char *r, const polyvec *a) {
+	int i, j, k;
+	uint16_t t[8];
+	for (i = 0; i < KYBER_D; i++) {
+		for (j = 0; j < KYBER_N / 8; j++) {
+			for (k = 0; k < 8; k++)
+				t[k] = ((((uint32_t) freeze(a->vec[i].coeffs[8 * j + k]) << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff;
+
+			r[11 * j + 0] = t[0] & 0xff;
+			r[11 * j + 1] = (t[0] >> 8) | ((t[1] & 0x1f) << 3);
+			r[11 * j + 2] = (t[1] >> 5) | ((t[2] & 0x03) << 6);
+			r[11 * j + 3] = (t[2] >> 2) & 0xff;
+			r[11 * j + 4] = (t[2] >> 10) | ((t[3] & 0x7f) << 1);
+			r[11 * j + 5] = (t[3] >> 7) | ((t[4] & 0x0f) << 4);
+			r[11 * j + 6] = (t[4] >> 4) | ((t[5] & 0x01) << 7);
+			r[11 * j + 7] = (t[5] >> 1) & 0xff;
+			r[11 * j + 8] = (t[5] >> 9) | ((t[6] & 0x3f) << 2);
+			r[11 * j + 9] = (t[6] >> 6) | ((t[7] & 0x07) << 5);
+			r[11 * j + 10] = (t[7] >> 3);
+		}
+		r += 352;
+	}
+}
+
+static void polyvec_decompress(polyvec *r, const unsigned char *a) {
+	int i, j;
+	for (i = 0; i < KYBER_D; i++) {
+		for (j = 0; j < KYBER_N / 8; j++) {
+			r->vec[i].coeffs[8 * j + 0] = (((a[11 * j + 0] | (((uint32_t) a[11 * j + 1] & 0x07) << 8)) * KYBER_Q) + 1024) >> 11;
+			r->vec[i].coeffs[8 * j + 1] = ((((a[11 * j + 1] >> 3) | (((uint32_t) a[11 * j + 2] & 0x3f) << 5)) * KYBER_Q) + 1024) >> 11;
+			r->vec[i].coeffs[8 * j + 2] = ((((a[11 * j + 2] >> 6) | (((uint32_t) a[11 * j + 3] & 0xff) << 2) | (((uint32_t) a[11 * j + 4] & 0x01) << 10)) * KYBER_Q) + 1024) >> 11;
+			r->vec[i].coeffs[8 * j + 3] = ((((a[11 * j + 4] >> 1) | (((uint32_t) a[11 * j + 5] & 0x0f) << 7)) * KYBER_Q) + 1024) >> 11;
+			r->vec[i].coeffs[8 * j + 4] = ((((a[11 * j + 5] >> 4) | (((uint32_t) a[11 * j + 6] & 0x7f) << 4)) * KYBER_Q) + 1024) >> 11;
+			r->vec[i].coeffs[8 * j + 5] = ((((a[11 * j + 6] >> 7) | (((uint32_t) a[11 * j + 7] & 0xff) << 1) | (((uint32_t) a[11 * j + 8] & 0x03) << 9)) * KYBER_Q) + 1024) >> 11;
+			r->vec[i].coeffs[8 * j + 6] = ((((a[11 * j + 8] >> 2) | (((uint32_t) a[11 * j + 9] & 0x1f) << 6)) * KYBER_Q) + 1024) >> 11;
+			r->vec[i].coeffs[8 * j + 7] = ((((a[11 * j + 9] >> 5) | (((uint32_t) a[11 * j + 10] & 0xff) << 3)) * KYBER_Q) + 1024) >> 11;
+		}
+		a += 352;
+	}
+}
+
+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_D * 320))
+
+static void polyvec_compress(unsigned char *r, const polyvec *a) {
+	int i, j, k;
+	uint16_t t[4];
+	for (i = 0; i < KYBER_D; i++) {
+		for (j = 0; j < KYBER_N / 4; j++) {
+			for (k = 0; k < 4; k++)
+				t[k] = ((((uint32_t) freeze(a->vec[i].coeffs[4 * j + k]) << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff;
+
+			r[5 * j + 0] = t[0] & 0xff;
+			r[5 * j + 1] = (t[0] >> 8) | ((t[1] & 0x3f) << 2);
+			r[5 * j + 2] = (t[1] >> 6) | ((t[2] & 0x0f) << 4);
+			r[5 * j + 3] = (t[2] >> 4) | ((t[3] & 0x03) << 6);
+			r[5 * j + 4] = (t[3] >> 2);
+		}
+		r += 320;
+	}
+}
+
+static void polyvec_decompress(polyvec *r, const unsigned char *a) {
+	int i, j;
+	for (i = 0; i < KYBER_D; i++) {
+		for (j = 0; j < KYBER_N / 4; j++) {
+			r->vec[i].coeffs[4 * j + 0] = (((a[5 * j + 0] | (((uint32_t) a[5 * j + 1] & 0x03) << 8)) * KYBER_Q) + 512) >> 10;
+			r->vec[i].coeffs[4 * j + 1] = ((((a[5 * j + 1] >> 2) | (((uint32_t) a[5 * j + 2] & 0x0f) << 6)) * KYBER_Q) + 512) >> 10;
+			r->vec[i].coeffs[4 * j + 2] = ((((a[5 * j + 2] >> 4) | (((uint32_t) a[5 * j + 3] & 0x3f) << 4)) * KYBER_Q) + 512) >> 10;
+			r->vec[i].coeffs[4 * j + 3] = ((((a[5 * j + 3] >> 6) | (((uint32_t) a[5 * j + 4] & 0xff) << 2)) * KYBER_Q) + 512) >> 10;
+		}
+		a += 320;
+	}
+}
+
+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_D * 288))
+
+static void polyvec_compress(unsigned char *r, const polyvec *a) {
+	int i, j, k;
+	uint16_t t[8];
+	for (i = 0; i < KYBER_D; i++) {
+		for (j = 0; j < KYBER_N / 8; j++) {
+			for (k = 0; k < 8; k++)
+				t[k] = ((((uint32_t) freeze(a->vec[i].coeffs[8 * j + k]) << 9) + KYBER_Q / 2) / KYBER_Q) & 0x1ff;
+
+			r[9 * j + 0] = t[0] & 0xff;
+			r[9 * j + 1] = (t[0] >> 8) | ((t[1] & 0x7f) << 1);
+			r[9 * j + 2] = (t[1] >> 7) | ((t[2] & 0x3f) << 2);
+			r[9 * j + 3] = (t[2] >> 6) | ((t[3] & 0x1f) << 3);
+			r[9 * j + 4] = (t[3] >> 5) | ((t[4] & 0x0f) << 4);
+			r[9 * j + 5] = (t[4] >> 4) | ((t[5] & 0x07) << 5);
+			r[9 * j + 6] = (t[5] >> 3) | ((t[6] & 0x03) << 6);
+			r[9 * j + 7] = (t[6] >> 2) | ((t[7] & 0x01) << 7);
+			r[9 * j + 8] = (t[7] >> 1);
+		}
+		r += 288;
+	}
+}
+
+static void polyvec_decompress(polyvec *r, const unsigned char *a) {
+	int i, j;
+	for (i = 0; i < KYBER_D; i++) {
+		for (j = 0; j < KYBER_N / 8; j++) {
+			r->vec[i].coeffs[8 * j + 0] = (((a[9 * j + 0] | (((uint32_t) a[9 * j + 1] & 0x01) << 8)) * KYBER_Q) + 256) >> 9;
+			r->vec[i].coeffs[8 * j + 1] = ((((a[9 * j + 1] >> 1) | (((uint32_t) a[9 * j + 2] & 0x03) << 7)) * KYBER_Q) + 256) >> 9;
+			r->vec[i].coeffs[8 * j + 2] = ((((a[9 * j + 2] >> 2) | (((uint32_t) a[9 * j + 3] & 0x07) << 6)) * KYBER_Q) + 256) >> 9;
+			r->vec[i].coeffs[8 * j + 3] = ((((a[9 * j + 3] >> 3) | (((uint32_t) a[9 * j + 4] & 0x0f) << 5)) * KYBER_Q) + 256) >> 9;
+			r->vec[i].coeffs[8 * j + 4] = ((((a[9 * j + 4] >> 4) | (((uint32_t) a[9 * j + 5] & 0x1f) << 4)) * KYBER_Q) + 256) >> 9;
+			r->vec[i].coeffs[8 * j + 5] = ((((a[9 * j + 5] >> 5) | (((uint32_t) a[9 * j + 6] & 0x3f) << 3)) * KYBER_Q) + 256) >> 9;
+			r->vec[i].coeffs[8 * j + 6] = ((((a[9 * j + 6] >> 6) | (((uint32_t) a[9 * j + 7] & 0x7f) << 2)) * KYBER_Q) + 256) >> 9;
+			r->vec[i].coeffs[8 * j + 7] = ((((a[9 * j + 7] >> 7) | (((uint32_t) a[9 * j + 8] & 0xff) << 1)) * KYBER_Q) + 256) >> 9;
+		}
+		a += 288;
+	}
+}
+
+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_D * 256))
+
+static void polyvec_compress(unsigned char *r, const polyvec *a) {
+	int i, j, k;
+	uint16_t t;
+	for (i = 0; i < KYBER_D; i++) {
+		for (j = 0; j < KYBER_N; j++) {
+			r[j] = ((((uint32_t) freeze(a->vec[i].coeffs[j]) << 8) + KYBER_Q / 2) / KYBER_Q) & 0xff;
+		}
+		r += 256;
+	}
+}
+
+static void polyvec_decompress(polyvec *r, const unsigned char *a) {
+	int i, j;
+	for (i = 0; i < KYBER_D; i++) {
+		for (j = 0; j < KYBER_N; j++) {
+			r->vec[i].coeffs[j] = ((a[j] * KYBER_Q) + 128) >> 8;
+		}
+		a += 256;
+	}
+}
+
+#else
+#error "Unsupported compression of polyvec"
+#endif
+
+static void polyvec_tobytes(unsigned char *r, const polyvec *a) {
+	int i;
+	for (i = 0; i < KYBER_D; i++)
+		poly_tobytes(r + i * KYBER_POLYBYTES, &a->vec[i]);
+}
+
+static void polyvec_frombytes(polyvec *r, const unsigned char *a) {
+	int i;
+	for (i = 0; i < KYBER_D; i++)
+		poly_frombytes(&r->vec[i], a + i * KYBER_POLYBYTES);
+}
+
+static void polyvec_ntt(polyvec *r) {
+	int i;
+	for (i = 0; i < KYBER_D; i++)
+		poly_ntt(&r->vec[i]);
+}
+
+static void polyvec_invntt(polyvec *r) {
+	int i;
+	for (i = 0; i < KYBER_D; i++)
+		poly_invntt(&r->vec[i]);
+}
+
+static void polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b) {
+	int i, j;
+	uint16_t t;
+	for (j = 0; j < KYBER_N; j++) {
+		t = montgomery_reduce(4613 * (uint32_t) b->vec[0].coeffs[j]); // 4613 = 2^{2*18} % q
+		r->coeffs[j] = montgomery_reduce(a->vec[0].coeffs[j] * t);
+		for (i = 1; i < KYBER_D; i++) {
+			t = montgomery_reduce(4613 * (uint32_t) b->vec[i].coeffs[j]);
+			r->coeffs[j] += montgomery_reduce(a->vec[i].coeffs[j] * t);
+		}
+		r->coeffs[j] = barrett_reduce(r->coeffs[j]);
+	}
+}
+
+static void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) {
+	int i;
+	for (i = 0; i < KYBER_D; i++)
+		poly_add(&r->vec[i], &a->vec[i], &b->vec[i]);
+}
diff --git a/crypt/liboqs/kex_mlwe_kyber/precomp.c b/crypt/liboqs/kex_mlwe_kyber/precomp.c
new file mode 100644
index 0000000000000000000000000000000000000000..e41122c2d4b2886b945dc29188d382b15cbd4262
--- /dev/null
+++ b/crypt/liboqs/kex_mlwe_kyber/precomp.c
@@ -0,0 +1,9 @@
+#include "params.h"
+
+uint16_t oqs_kex_mlwe_kyber_omegas_montgomery[KYBER_N / 2] = {990, 7427, 2634, 6819, 578, 3281, 2143, 1095, 484, 6362, 3336, 5382, 6086, 3823, 877, 5656, 3583, 7010, 6414, 263, 1285, 291, 7143, 7338, 1581, 5134, 5184, 5932, 4042, 5775, 2468, 3, 606, 729, 5383, 962, 3240, 7548, 5129, 7653, 5929, 4965, 2461, 641, 1584, 2666, 1142, 157, 7407, 5222, 5602, 5142, 6140, 5485, 4931, 1559, 2085, 5284, 2056, 3538, 7269, 3535, 7190, 1957, 3465, 6792, 1538, 4664, 2023, 7643, 3660, 7673, 1694, 6905, 3995, 3475, 5939, 1859, 6910, 4434, 1019, 1492, 7087, 4761, 657, 4859, 5798, 2640, 1693, 2607, 2782, 5400, 6466, 1010, 957, 3851, 2121, 6392, 7319, 3367, 3659, 3375, 6430, 7583, 1549, 5856, 4773, 6084, 5544, 1650, 3997, 4390, 6722, 2915, 4245, 2635, 6128, 7676, 5737, 1616, 3457, 3132, 7196, 4702, 6239, 851, 2122, 3009};
+
+uint16_t oqs_kex_mlwe_kyber_omegas_inv_bitrev_montgomery[KYBER_N / 2] = {990, 254, 862, 5047, 6586, 5538, 4400, 7103, 2025, 6804, 3858, 1595, 2299, 4345, 1319, 7197, 7678, 5213, 1906, 3639, 1749, 2497, 2547, 6100, 343, 538, 7390, 6396, 7418, 1267, 671, 4098, 5724, 491, 4146, 412, 4143, 5625, 2397, 5596, 6122, 2750, 2196, 1541, 2539, 2079, 2459, 274, 7524, 6539, 5015, 6097, 7040, 5220, 2716, 1752, 28, 2552, 133, 4441, 6719, 2298, 6952, 7075, 4672, 5559, 6830, 1442, 2979, 485, 4549, 4224, 6065, 1944, 5, 1553, 5046, 3436, 4766, 959, 3291, 3684, 6031, 2137, 1597, 2908, 1825, 6132, 98, 1251, 4306, 4022, 4314, 362, 1289, 5560, 3830, 6724, 6671, 1215, 2281, 4899, 5074, 5988, 5041, 1883, 2822, 7024, 2920, 594, 6189, 6662, 3247, 771, 5822, 1742, 4206, 3686, 776, 5987, 8, 4021, 38, 5658, 3017, 6143, 889, 4216};
+
+uint16_t oqs_kex_mlwe_kyber_psis_bitrev_montgomery[KYBER_N] = {990, 7427, 2634, 6819, 578, 3281, 2143, 1095, 484, 6362, 3336, 5382, 6086, 3823, 877, 5656, 3583, 7010, 6414, 263, 1285, 291, 7143, 7338, 1581, 5134, 5184, 5932, 4042, 5775, 2468, 3, 606, 729, 5383, 962, 3240, 7548, 5129, 7653, 5929, 4965, 2461, 641, 1584, 2666, 1142, 157, 7407, 5222, 5602, 5142, 6140, 5485, 4931, 1559, 2085, 5284, 2056, 3538, 7269, 3535, 7190, 1957, 3465, 6792, 1538, 4664, 2023, 7643, 3660, 7673, 1694, 6905, 3995, 3475, 5939, 1859, 6910, 4434, 1019, 1492, 7087, 4761, 657, 4859, 5798, 2640, 1693, 2607, 2782, 5400, 6466, 1010, 957, 3851, 2121, 6392, 7319, 3367, 3659, 3375, 6430, 7583, 1549, 5856, 4773, 6084, 5544, 1650, 3997, 4390, 6722, 2915, 4245, 2635, 6128, 7676, 5737, 1616, 3457, 3132, 7196, 4702, 6239, 851, 2122, 3009, 7613, 7295, 2007, 323, 5112, 3716, 2289, 6442, 6965, 2713, 7126, 3401, 963, 6596, 607, 5027, 7078, 4484, 5937, 944, 2860, 2680, 5049, 1777, 5850, 3387, 6487, 6777, 4812, 4724, 7077, 186, 6848, 6793, 3463, 5877, 1174, 7116, 3077, 5945, 6591, 590, 6643, 1337, 6036, 3991, 1675, 2053, 6055, 1162, 1679, 3883, 4311, 2106, 6163, 4486, 6374, 5006, 4576, 4288, 5180, 4102, 282, 6119, 7443, 6330, 3184, 4971, 2530, 5325, 4171, 7185, 5175, 5655, 1898, 382, 7211, 43, 5965, 6073, 1730, 332, 1577, 3304, 2329, 1699, 6150, 2379, 5113, 333, 3502, 4517, 1480, 1172, 5567, 651, 925, 4573, 599, 1367, 4109, 1863, 6929, 1605, 3866, 2065, 4048, 839, 5764, 2447, 2022, 3345, 1990, 4067, 2036, 2069, 3567, 7371, 2368, 339, 6947, 2159, 654, 7327, 2768, 6676, 987, 2214};
+
+uint16_t oqs_kex_mlwe_kyber_psis_inv_montgomery[KYBER_N] = {1024, 4972, 5779, 6907, 4943, 4168, 315, 5580, 90, 497, 1123, 142, 4710, 5527, 2443, 4871, 698, 2489, 2394, 4003, 684, 2241, 2390, 7224, 5072, 2064, 4741, 1687, 6841, 482, 7441, 1235, 2126, 4742, 2802, 5744, 6287, 4933, 699, 3604, 1297, 2127, 5857, 1705, 3868, 3779, 4397, 2177, 159, 622, 2240, 1275, 640, 6948, 4572, 5277, 209, 2605, 1157, 7328, 5817, 3191, 1662, 2009, 4864, 574, 2487, 164, 6197, 4436, 7257, 3462, 4268, 4281, 3414, 4515, 3170, 1290, 2003, 5855, 7156, 6062, 7531, 1732, 3249, 4884, 7512, 3590, 1049, 2123, 1397, 6093, 3691, 6130, 6541, 3946, 6258, 3322, 1788, 4241, 4900, 2309, 1400, 1757, 400, 502, 6698, 2338, 3011, 668, 7444, 4580, 6516, 6795, 2959, 4136, 3040, 2279, 6355, 3943, 2913, 6613, 7416, 4084, 6508, 5556, 4054, 3782, 61, 6567, 2212, 779, 632, 5709, 5667, 4923, 4911, 6893, 4695, 4164, 3536, 2287, 7594, 2848, 3267, 1911, 3128, 546, 1991, 156, 4958, 5531, 6903, 483, 875, 138, 250, 2234, 2266, 7222, 2842, 4258, 812, 6703, 232, 5207, 6650, 2585, 1900, 6225, 4932, 7265, 4701, 3173, 4635, 6393, 227, 7313, 4454, 4284, 6759, 1224, 5223, 1447, 395, 2608, 4502, 4037, 189, 3348, 54, 6443, 2210, 6230, 2826, 1780, 3002, 5995, 1955, 6102, 6045, 3938, 5019, 4417, 1434, 1262, 1507, 5847, 5917, 7157, 7177, 6434, 7537, 741, 4348, 1309, 145, 374, 2236, 4496, 5028, 6771, 6923, 7421, 1978, 1023, 3857, 6876, 1102, 7451, 4704, 6518, 1344, 765, 384, 5705, 1207, 1630, 4734, 1563, 6839, 5933, 1954, 4987, 7142, 5814, 7527, 4953, 7637, 4707, 2182, 5734, 2818, 541, 4097, 5641};
diff --git a/crypt/liboqs/kex_mlwe_kyber/reduce.c b/crypt/liboqs/kex_mlwe_kyber/reduce.c
new file mode 100644
index 0000000000000000000000000000000000000000..8c390ac21a66742fffe9ee536d48547d081f8de6
--- /dev/null
+++ b/crypt/liboqs/kex_mlwe_kyber/reduce.c
@@ -0,0 +1,36 @@
+#include "params.h"
+
+static const uint32_t qinv = 7679; // -inverse_mod(q,2^18)
+static const uint32_t rlog = 18;
+
+static uint16_t montgomery_reduce(uint32_t a) {
+	uint32_t u;
+
+	u = (a * qinv);
+	u &= ((1 << rlog) - 1);
+	u *= KYBER_Q;
+	a = a + u;
+	return a >> rlog;
+}
+
+static uint16_t barrett_reduce(uint16_t a) {
+	uint32_t u;
+
+	u = a >> 13;
+	u *= KYBER_Q;
+	a -= u;
+	return a;
+}
+
+static uint16_t freeze(uint16_t x) {
+	uint16_t m, r;
+	int16_t c;
+	r = barrett_reduce(x);
+
+	m = r - KYBER_Q;
+	c = m;
+	c >>= 15;
+	r = m ^ ((r ^ m) & c);
+
+	return r;
+}
diff --git a/crypt/liboqs/kex_mlwe_kyber/verify.c b/crypt/liboqs/kex_mlwe_kyber/verify.c
new file mode 100644
index 0000000000000000000000000000000000000000..5e709092872c8abe72acdd9bef22dc10cd8e3b2d
--- /dev/null
+++ b/crypt/liboqs/kex_mlwe_kyber/verify.c
@@ -0,0 +1,28 @@
+#include <string.h>
+#include <stdint.h>
+
+#if defined(WINDOWS)
+#pragma warning(disable : 4146 4244)
+#endif
+
+/* returns 0 for equal strings, 1 for non-equal strings */
+static int verify(const unsigned char *a, const unsigned char *b, size_t len) {
+	uint64_t r;
+	size_t i;
+	r = 0;
+
+	for (i = 0; i < len; i++)
+		r |= a[i] ^ b[i];
+
+	r = (-r) >> 63;
+	return r;
+}
+
+/* b = 1 means mov, b = 0 means don't mov*/
+static void cmov(unsigned char *r, const unsigned char *x, size_t len, unsigned char b) {
+	size_t i;
+
+	b = -b;
+	for (i = 0; i < len; i++)
+		r[i] ^= b & (x[i] ^ r[i]);
+}
diff --git a/crypt/liboqs/kex_ntru/Makefile.am b/crypt/liboqs/kex_ntru/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..b767ea1d3aff92528e92382dd7d34899b3144f15
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/Makefile.am
@@ -0,0 +1,25 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = libntru.la
+
+
+libntru_la_SOURCES = kex_ntru.c \
+	ntru_crypto_drbg.c \
+	ntru_crypto_hash.c \
+	ntru_crypto_hmac.c \
+	ntru_crypto_msbyte_uint32.c \
+	ntru_crypto_ntru_convert.c \
+	ntru_crypto_ntru_encrypt.c \
+	ntru_crypto_ntru_encrypt_key.c \
+	ntru_crypto_ntru_encrypt_param_sets.c \
+	ntru_crypto_ntru_mgf1.c \
+	ntru_crypto_ntru_poly.c \
+	ntru_crypto_sha256.c \
+	ntru_crypto_sha1.c \
+	ntru_crypto_sha2.c \
+	ntru_crypto_ntru_mult_indices.c \
+	ntru_crypto_ntru_mult_coeffs_karat.c
+
+
+libntru_la_CPPFLAGS = -I../../include
+libntru_la_CPPFLAGS += $(AM_CPPFLAGS) 
+
diff --git a/crypt/liboqs/kex_ntru/kex_ntru.c b/crypt/liboqs/kex_ntru/kex_ntru.c
new file mode 100644
index 0000000000000000000000000000000000000000..3984d00ada45027a2e872675893a20b3d7530ed8
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/kex_ntru.c
@@ -0,0 +1,242 @@
+#ifndef DISABLE_NTRU_ON_WINDOWS_BY_DEFAULT
+
+#if defined(WINDOWS)
+#define UNUSED
+// __attribute__ not supported in VS
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#include <fcntl.h>
+#if defined(WINDOWS)
+#include <windows.h>
+#include <Wincrypt.h>
+#else
+#include <unistd.h>
+#endif
+
+#include <oqs/kex.h>
+#include <oqs/kex_ntru.h>
+#include <oqs/rand.h>
+
+#include <ntru_crypto.h>
+
+#if defined(WINDOWS)
+#define strdup _strdup // for strdup deprecation warning
+#endif
+
+#define NTRU_PARAMETER_SELECTION NTRU_EES743EP1
+#define NTRU_PARAMETER_SELECTION_NAME "EES743EP1"
+
+OQS_KEX *OQS_KEX_ntru_new(OQS_RAND *rand) {
+	OQS_KEX *k = malloc(sizeof(OQS_KEX));
+	if (k == NULL)
+		return NULL;
+	k->method_name = strdup("ntru " NTRU_PARAMETER_SELECTION_NAME);
+	k->estimated_classical_security = 256; // http://eprint.iacr.org/2015/708.pdf Table 3 N=743 product form search cost
+	k->estimated_quantum_security = 128;   // need justification
+	k->rand = rand;
+	k->params = NULL;
+	k->alice_0 = &OQS_KEX_ntru_alice_0;
+	k->bob = &OQS_KEX_ntru_bob;
+	k->alice_1 = &OQS_KEX_ntru_alice_1;
+	k->alice_priv_free = &OQS_KEX_ntru_alice_priv_free;
+	k->free = &OQS_KEX_ntru_free;
+	return k;
+}
+
+static uint8_t get_entropy_from_dev_urandom(ENTROPY_CMD cmd, uint8_t *out) {
+	if (cmd == INIT) {
+		return 1;
+	}
+	if (out == NULL) {
+		return 0;
+	}
+	if (cmd == GET_NUM_BYTES_PER_BYTE_OF_ENTROPY) {
+		*out = 1;
+		return 1;
+	}
+	if (cmd == GET_BYTE_OF_ENTROPY) {
+		// TODO: why is this called to get entropy bytes one by one?
+		if (!OQS_RAND_get_system_entropy(out, 1)) {
+			return 0;
+		}
+		return 1;
+	}
+	return 0;
+}
+
+typedef struct OQS_KEX_ntru_alice_priv {
+	uint16_t priv_key_len;
+	uint8_t *priv_key;
+} OQS_KEX_ntru_alice_priv;
+
+int OQS_KEX_ntru_alice_0(UNUSED OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) {
+
+	int ret = 0;
+	uint32_t rc;
+	DRBG_HANDLE drbg;
+	OQS_KEX_ntru_alice_priv *ntru_alice_priv = NULL;
+
+	*alice_priv = NULL;
+	*alice_msg = NULL;
+
+	/* initialize NTRU DRBG */
+	rc = ntru_crypto_drbg_instantiate(256, (uint8_t *) "OQS Alice", strlen("OQS Alice"), (ENTROPY_FN) &get_entropy_from_dev_urandom, &drbg);
+	if (rc != DRBG_OK)
+		return 0;
+
+	/* allocate private key */
+	ntru_alice_priv = malloc(sizeof(OQS_KEX_ntru_alice_priv));
+	if (ntru_alice_priv == NULL)
+		goto err;
+	ntru_alice_priv->priv_key = NULL;
+	*alice_priv = ntru_alice_priv;
+
+	/* calculate length of public/private keys */
+	uint16_t ntru_alice_msg_len;
+	rc = ntru_crypto_ntru_encrypt_keygen(drbg, NTRU_PARAMETER_SELECTION, &ntru_alice_msg_len, NULL, &(ntru_alice_priv->priv_key_len), NULL);
+	if (rc != NTRU_OK)
+		goto err;
+	*alice_msg_len = (size_t) ntru_alice_msg_len;
+
+	/* allocate private key bytes */
+	ntru_alice_priv->priv_key = malloc(ntru_alice_priv->priv_key_len);
+	if (ntru_alice_priv->priv_key == NULL)
+		goto err;
+	/* allocate public key */
+	*alice_msg = malloc(*alice_msg_len);
+	if (*alice_msg == NULL)
+		goto err;
+
+	/* generate public/private key pair */
+	rc = ntru_crypto_ntru_encrypt_keygen(drbg, NTRU_PARAMETER_SELECTION, &ntru_alice_msg_len, *alice_msg, &(ntru_alice_priv->priv_key_len), ntru_alice_priv->priv_key);
+	if (rc != NTRU_OK)
+		goto err;
+	*alice_msg_len = (size_t) ntru_alice_msg_len;
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	if (ntru_alice_priv != NULL)
+		free(ntru_alice_priv->priv_key);
+	free(ntru_alice_priv);
+	*alice_priv = NULL;
+	free(*alice_msg);
+	*alice_msg = NULL;
+cleanup:
+	ntru_crypto_drbg_uninstantiate(drbg);
+
+	return ret;
+}
+
+int OQS_KEX_ntru_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) {
+
+	int ret;
+	uint32_t rc;
+	DRBG_HANDLE drbg;
+
+	*bob_msg = NULL;
+	*key = NULL;
+
+	/* initialize NTRU DRBG */
+	rc = ntru_crypto_drbg_instantiate(256, (uint8_t *) "OQS Bob", strlen("OQS Bob"), (ENTROPY_FN) &get_entropy_from_dev_urandom, &drbg);
+	if (rc != DRBG_OK)
+		return 0;
+
+	/* generate random session key */
+	*key_len = 256 / 8;
+	*key = malloc(*key_len);
+	if (*key == NULL)
+		goto err;
+	OQS_RAND_n(k->rand, *key, *key_len);
+
+	/* calculate length of ciphertext */
+	uint16_t ntru_bob_msg_len;
+	rc = ntru_crypto_ntru_encrypt(drbg, alice_msg_len, alice_msg, *key_len, *key, &ntru_bob_msg_len, NULL);
+	if (rc != NTRU_OK)
+		goto err;
+	*bob_msg_len = (size_t) ntru_bob_msg_len;
+
+	/* allocate ciphertext */
+	*bob_msg = malloc(*bob_msg_len);
+	if (*bob_msg == NULL)
+		goto err;
+
+	/* encrypt session key */
+	rc = ntru_crypto_ntru_encrypt(drbg, alice_msg_len, alice_msg, *key_len, *key, &ntru_bob_msg_len, *bob_msg);
+	if (rc != NTRU_OK)
+		goto err;
+	*bob_msg_len = (size_t) ntru_bob_msg_len;
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*bob_msg);
+	*bob_msg = NULL;
+	free(*key);
+	*key = NULL;
+cleanup:
+	ntru_crypto_drbg_uninstantiate(drbg);
+
+	return ret;
+}
+
+int OQS_KEX_ntru_alice_1(UNUSED OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) {
+
+	int ret;
+	uint32_t rc;
+
+	*key = NULL;
+
+	OQS_KEX_ntru_alice_priv *ntru_alice_priv = (OQS_KEX_ntru_alice_priv *) alice_priv;
+
+	/* calculate session key length */
+	uint16_t ntru_key_len;
+	rc = ntru_crypto_ntru_decrypt(ntru_alice_priv->priv_key_len, ntru_alice_priv->priv_key, bob_msg_len, bob_msg, &ntru_key_len, NULL);
+	if (rc != NTRU_OK)
+		goto err;
+	*key_len = (size_t) ntru_key_len;
+
+	/* allocate session key */
+	*key = malloc(*key_len);
+	if (*key == NULL)
+		goto err;
+
+	/* decrypt session key */
+	rc = ntru_crypto_ntru_decrypt(ntru_alice_priv->priv_key_len, ntru_alice_priv->priv_key, bob_msg_len, bob_msg, &ntru_key_len, *key);
+	if (rc != NTRU_OK)
+		goto err;
+	*key_len = (size_t) ntru_key_len;
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*key);
+	*key = NULL;
+cleanup:
+
+	return ret;
+}
+
+void OQS_KEX_ntru_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) {
+	if (alice_priv) {
+		OQS_KEX_ntru_alice_priv *ntru_alice_priv = (OQS_KEX_ntru_alice_priv *) alice_priv;
+		free(ntru_alice_priv->priv_key);
+	}
+	free(alice_priv);
+}
+
+void OQS_KEX_ntru_free(OQS_KEX *k) {
+	if (k)
+		free(k->method_name);
+	free(k);
+}
+
+#endif
diff --git a/crypt/liboqs/kex_ntru/kex_ntru.h b/crypt/liboqs/kex_ntru/kex_ntru.h
new file mode 100755
index 0000000000000000000000000000000000000000..517a2085b7d4c796021207a0ccaebc191c403259
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/kex_ntru.h
@@ -0,0 +1,28 @@
+/**
+ * \file kex_ntru.h
+ * \brief Header for the NTRU implementation of OQS_KEX
+ */
+
+#ifndef __OQS_KEX_NTRU_H
+#define __OQS_KEX_NTRU_H
+
+#ifndef DISABLE_NTRU_ON_WINDOWS_BY_DEFAULT
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+OQS_KEX *OQS_KEX_ntru_new(OQS_RAND *rand);
+
+int OQS_KEX_ntru_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len);
+int OQS_KEX_ntru_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len);
+int OQS_KEX_ntru_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len);
+
+void OQS_KEX_ntru_alice_priv_free(OQS_KEX *k, void *alice_priv);
+void OQS_KEX_ntru_free(OQS_KEX *k);
+
+#endif
+
+#endif
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto.h b/crypt/liboqs/kex_ntru/ntru_crypto.h
new file mode 100644
index 0000000000000000000000000000000000000000..7799dd46baa77f5778f77040d50ed9a6c834eae7
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto.h
@@ -0,0 +1,340 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto.h
+ *
+ * Contents: Public header file for NTRUEncrypt.
+ *
+ *****************************************************************************/
+
+#ifndef NTRU_CRYPTO_H
+#define NTRU_CRYPTO_H
+
+#include "ntru_crypto_platform.h"
+#include "ntru_crypto_drbg.h"
+#include "ntru_crypto_error.h"
+
+#if !defined(NTRUCALL)
+#if !defined(WIN32) || defined(NTRUCRYPTO_STATIC)
+// Linux, or a Win32 static library
+#define NTRUCALL extern uint32_t
+#elif defined(NTRUCRYPTO_EXPORTS)
+// Win32 DLL build
+#define NTRUCALL extern __declspec(dllexport) uint32_t
+#else
+// Win32 DLL import
+#define NTRUCALL extern __declspec(dllimport) uint32_t
+#endif
+#endif /* NTRUCALL */
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* __cplusplus */
+
+/* parameter set ID list */
+
+typedef enum _NTRU_ENCRYPT_PARAM_SET_ID {
+	NTRU_EES401EP1,
+	NTRU_EES449EP1,
+	NTRU_EES677EP1,
+	NTRU_EES1087EP2,
+	NTRU_EES541EP1,
+	NTRU_EES613EP1,
+	NTRU_EES887EP1,
+	NTRU_EES1171EP1,
+	NTRU_EES659EP1,
+	NTRU_EES761EP1,
+	NTRU_EES1087EP1,
+	NTRU_EES1499EP1,
+	NTRU_EES401EP2,
+	NTRU_EES439EP1,
+	NTRU_EES593EP1,
+	NTRU_EES743EP1,
+	NTRU_EES443EP1,
+	NTRU_EES587EP1,
+} NTRU_ENCRYPT_PARAM_SET_ID;
+
+/* error codes */
+
+#define NTRU_OK 0
+#define NTRU_FAIL 1
+#define NTRU_BAD_PARAMETER 2
+#define NTRU_BAD_LENGTH 3
+#define NTRU_BUFFER_TOO_SMALL 4
+#define NTRU_INVALID_PARAMETER_SET 5
+#define NTRU_BAD_PUBLIC_KEY 6
+#define NTRU_BAD_PRIVATE_KEY 7
+#define NTRU_OUT_OF_MEMORY 8
+#define NTRU_BAD_ENCODING 9
+#define NTRU_OID_NOT_RECOGNIZED 10
+#define NTRU_UNSUPPORTED_PARAM_SET 11
+
+#define NTRU_RESULT(r) ((uint32_t)((r) ? NTRU_ERROR_BASE + (r) : (r)))
+#define NTRU_RET(r) return NTRU_RESULT((r))
+
+/* function declarations */
+
+/* ntru_crypto_ntru_encrypt
+ *
+ * Implements NTRU encryption (SVES) for the parameter set specified in
+ * the public key blob.
+ *
+ * Before invoking this function, a DRBG must be instantiated using
+ * ntru_crypto_drbg_instantiate() to obtain a DRBG handle, and in that
+ * instantiation the requested security strength must be at least as large
+ * as the security strength of the NTRU parameter set being used.
+ * Failure to instantiate the DRBG with the proper security strength will
+ * result in this function returning DRBG_ERROR_BASE + DRBG_BAD_LENGTH.
+ *
+ * The required minimum size of the output ciphertext buffer (ct) may be
+ * queried by invoking this function with ct = NULL.  In this case, no
+ * encryption is performed, NTRU_OK is returned, and the required minimum
+ * size for ct is returned in ct_len.
+ *
+ * When ct != NULL, at invocation *ct_len must be the size of the ct buffer.
+ * Upon return it is the actual size of the ciphertext.
+ *
+ * Returns NTRU_OK if successful.
+ * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if the DRBG handle is invalid.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer
+ *  (other than ct) is NULL.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if a length argument
+ *  (pubkey_blob_len or pt_len) is zero, or if pt_len exceeds the
+ *  maximum plaintext length for the parameter set.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_PUBLIC_KEY if the public-key blob is
+ *  invalid (unknown format, corrupt, bad length).
+ * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if the ciphertext buffer
+ *  is too small.
+ * Returns NTRU_ERROR_BASE + NTRU_NO_MEMORY if memory needed cannot be
+ *  allocated from the heap.
+ */
+
+NTRUCALL
+ntru_crypto_ntru_encrypt(
+    DRBG_HANDLE drbg_handle,    /*     in - handle for DRBG */
+    uint16_t pubkey_blob_len,   /*     in - no. of octets in public key
+                                                 blob */
+    uint8_t const *pubkey_blob, /*     in - pointer to public key */
+    uint16_t pt_len,            /*     in - no. of octets in plaintext */
+    uint8_t const *pt,          /*     in - pointer to plaintext */
+    uint16_t *ct_len,           /* in/out - no. of octets in ct, addr for
+                                                 no. of octets in ciphertext */
+    uint8_t *ct);               /*    out - address for ciphertext */
+
+/* ntru_crypto_ntru_decrypt
+ *
+ * Implements NTRU decryption (SVES) for the parameter set specified in
+ * the private key blob.
+ *
+ * The maximum size of the output plaintext may be queried by invoking
+ * this function with pt = NULL.  In this case, no decryption is performed,
+ * NTRU_OK is returned, and the maximum size the plaintext could be is
+ * returned in pt_len.
+ * Note that until the decryption is performed successfully, the actual size
+ * of the resulting plaintext cannot be known.
+ *
+ * When pt != NULL, at invocation *pt_len must be the size of the pt buffer.
+ * Upon return it is the actual size of the plaintext.
+ *
+ * Returns NTRU_OK if successful.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer
+ *  (other than pt) is NULL.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if a length argument
+ *  (privkey_blob) is zero, or if ct_len is invalid for the parameter set.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_PRIVATE_KEY if the private-key blob is
+ *  invalid (unknown format, corrupt, bad length).
+ * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if the plaintext buffer
+ *  is too small.
+ * Returns NTRU_ERROR_BASE + NTRU_NO_MEMORY if memory needed cannot be
+ *  allocated from the heap.
+ * Returns NTRU_ERROR_BASE + NTRU_FAIL if a decryption error occurs.
+ */
+
+NTRUCALL
+ntru_crypto_ntru_decrypt(
+    uint16_t privkey_blob_len,   /*     in - no. of octets in private key
+                                                 blob */
+    uint8_t const *privkey_blob, /*     in - pointer to private key */
+    uint16_t ct_len,             /*     in - no. of octets in ciphertext */
+    uint8_t const *ct,           /*     in - pointer to ciphertext */
+    uint16_t *pt_len,            /* in/out - no. of octets in pt, addr for
+                                                 no. of octets in plaintext */
+    uint8_t *pt);                /*    out - address for plaintext */
+
+/* ntru_crypto_ntru_encrypt_keygen
+ *
+ * Implements key generation for NTRUEncrypt for the parameter set specified.
+ *
+ * Before invoking this function, a DRBG must be instantiated using
+ * ntru_crypto_drbg_instantiate() to obtain a DRBG handle, and in that
+ * instantiation the requested security strength must be at least as large
+ * as the security strength of the NTRU parameter set being used.
+ * Failure to instantiate the DRBG with the proper security strength will
+ * result in this function returning DRBG_ERROR_BASE + DRBG_BAD_LENGTH.
+ *
+ * The required minimum size of the output public-key buffer (pubkey_blob)
+ * may be queried by invoking this function with pubkey_blob = NULL.
+ * In this case, no key generation is performed, NTRU_OK is returned, and
+ * the required minimum size for pubkey_blob is returned in pubkey_blob_len.
+ *
+ * The required minimum size of the output private-key buffer (privkey_blob)
+ * may be queried by invoking this function with privkey_blob = NULL.
+ * In this case, no key generation is performed, NTRU_OK is returned, and
+ * the required minimum size for privkey_blob is returned in privkey_blob_len.
+ *
+ * The required minimum sizes of both pubkey_blob and privkey_blob may be
+ * queried as described above, in a single invocation of this function.
+ *
+ * When pubkey_blob != NULL and privkey_blob != NULL, at invocation
+ * *pubkey_blob_len must be the size of the pubkey_blob buffer and
+ * *privkey_blob_len must be the size of the privkey_blob buffer.
+ * Upon return, *pubkey_blob_len is the actual size of the public-key blob
+ * and *privkey_blob_len is the actual size of the private-key blob.
+ *
+ * Returns NTRU_OK if successful.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer
+ *  (other than pubkey_blob or privkey_blob) is NULL.
+ * Returns NTRU_ERROR_BASE + NTRU_INVALID_PARAMETER_SET if the parameter-set
+ *  ID is invalid.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if a length argument is invalid.
+ * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if either the pubkey_blob
+ * buffer or the privkey_blob buffer is too small.
+ * Returns NTRU_ERROR_BASE + NTRU_NO_MEMORY if memory needed cannot be
+ *  allocated from the heap.
+ * Returns NTRU_ERROR_BASE + NTRU_FAIL if the polynomial generated for f is
+ *  not invertible in (Z/qZ)[X]/(X^N - 1), which is extremely unlikely.
+ *  Should this occur, this function should simply be invoked again.
+ */
+
+NTRUCALL
+ntru_crypto_ntru_encrypt_keygen(
+    DRBG_HANDLE drbg_handle,                /*     in - handle of DRBG */
+    NTRU_ENCRYPT_PARAM_SET_ID param_set_id, /*     in - parameter set ID */
+    uint16_t *pubkey_blob_len,              /* in/out - no. of octets in
+                                                             pubkey_blob, addr
+                                                             for no. of octets
+                                                             in pubkey_blob */
+    uint8_t *pubkey_blob,                   /*    out - address for
+                                                             public key blob */
+    uint16_t *privkey_blob_len,             /* in/out - no. of octets in
+                                                             privkey_blob, addr
+                                                             for no. of octets
+                                                             in privkey_blob */
+    uint8_t *privkey_blob);                 /*    out - address for
+                                                             private key blob */
+
+/* ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo
+ *
+ * DER-encodes an NTRUEncrypt public-key from a public-key blob into a
+ * SubjectPublicKeyInfo field for inclusion in an X.509 certificate.
+ *
+ * The required minimum size of the output SubjectPublicKeyInfo buffer
+ * (encoded_subjectPublicKeyInfo) may be queried by invoking this function
+ * with encoded_subjectPublicKeyInfo = NULL.  In this case, no encoding is
+ * performed, NTRU_OK is returned, and the required minimum size for
+ * encoded_subjectPublicKeyInfo is returned in encoded_subjectPublicKeyInfo_len.
+ *
+ * When encoded_subjectPublicKeyInfo != NULL, at invocation
+ * *encoded_subjectPublicKeyInfo_len must be the size of the
+ * encoded_subjectPublicKeyInfo buffer.
+ * Upon return, it is the actual size of the encoded public key.
+ *
+ * Returns NTRU_OK if successful.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer
+ *  (other than encoded_subjectPublicKeyInfo) is NULL.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if pubkey_blob_len is zero.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_PUBLIC_KEY if the public-key blob is
+ *  invalid (unknown format, corrupt, bad length).
+ * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if the SubjectPublicKeyInfo
+ *  buffer is too small.
+ */
+
+NTRUCALL
+ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo(
+    uint16_t pubkey_blob_len,   /*     in - no. of octets in public-key
+                                                blob */
+    uint8_t const *pubkey_blob, /*     in - ptr to public-key blob */
+    uint16_t *encoded_subjectPublicKeyInfo_len,
+    /* in/out - no. of octets in encoded info,
+                                                address for no. of octets in
+                                                encoded info */
+    uint8_t *encoded_subjectPublicKeyInfo);
+/*    out - address for encoded info */
+
+/* ntru_crypto_ntru_encrypt_subjectPublicKeyInfo2PublicKey
+ *
+ * Decodes a DER-encoded NTRUEncrypt public-key from a
+ * SubjectPublicKeyInfo field in an X.509 certificate and returns the
+ * public-key blob itself.
+ *
+ * The required minimum size of the output public-key buffer (pubkey_blob)
+ * may be queried by invoking this function with pubkey_blob = NULL.
+ * In this case, no decoding is performed, NTRU_OK is returned, and the
+ * required minimum size for pubkey_blob is returned in pubkey_blob_len.
+ *
+ * When pubkey_blob != NULL, at invocation *pubkey_blob_len must be the
+ * size of the pubkey_blob buffer.
+ * Upon return, it is the actual size of the public-key blob.
+ *
+ * Returns NTRU_OK if successful.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if the encoded data buffer
+ *  does not contain a full der prefix and public key.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer
+ *  (other than pubkey_blob) is NULL.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_ENCODING if the encoded data is
+ *  an invalid encoding of an NTRU public key.
+ * Returns NTRU_ERROR_BASE + NTRU_OID_NOT_RECOGNIZED if the
+ *  encoded data contains an OID that identifies an object other than
+ *  an NTRU public key.
+ * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if the pubkey_blob buffer
+ *  is too small.
+ */
+
+NTRUCALL
+ntru_crypto_ntru_encrypt_subjectPublicKeyInfo2PublicKey(
+    uint8_t const *encoded_data,   /*     in - ptr to subjectPublicKeyInfo
+                                                 in the encoded data */
+    uint16_t *pubkey_blob_len,     /* in/out - no. of octets in pubkey blob,
+                                                 address for no. of octets in
+                                                 pubkey blob */
+    uint8_t *pubkey_blob,          /*    out - address for pubkey blob */
+    uint8_t **next,                /*    out - address for ptr to encoded
+                                                 data following the 
+                                                 subjectPublicKeyInfo */
+    uint32_t *remaining_data_len); /* in/out - number of bytes remaining in
+                                                    buffer *next */
+
+/* ntru_encrypt_get_param_set_name
+ *
+ * Returns pointer to null terminated parameter set name
+ * or NULL if parameter set ID is not found.
+ */
+const char *
+ntru_encrypt_get_param_set_name(
+    NTRU_ENCRYPT_PARAM_SET_ID id); /*  in - parameter-set id */
+
+#if defined(__cplusplus)
+}
+#endif /* __cplusplus */
+
+#endif /* NTRU_CRYPTO_H */
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_drbg.c b/crypt/liboqs/kex_ntru/ntru_crypto_drbg.c
new file mode 100644
index 0000000000000000000000000000000000000000..a94cacbfe93cb529dfa072f54f5e6f8d5dbaf109
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_drbg.c
@@ -0,0 +1,849 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File:  ntru_crypto_drbg.c
+ *
+ * Contents: Implementation of a SHA-256 HMAC-based deterministic random byte
+ *           generator (HMAC_DRBG) as defined in ANSI X9.82, Part 3 - 2007.
+ *
+ * This implementation:
+ *   - allows for MAX_INSTANTIATIONS simultaneous drbg instantiations
+ *     (may be overridden on compiler command line)
+ *   - has a maximum security strength of 256 bits
+ *   - automatically uses SHA-256 for all security strengths
+ *   - allows a personalization string of length up to
+ *     HMAC_DRBG_MAX_PERS_STR_BYTES bytes
+ *   - implments reseeding
+ *   - does not implement additional input for reseeding or generation
+ *   - does not implement predictive resistance
+ *   - limits the number of bytes requested in one invocation of generate to
+ *     MAX_BYTES_PER_REQUEST
+ *   - uses a callback function to allow the caller to supply the
+ *     Get_entropy_input routine (entropy function)
+ *   - limits the number of bytes returned from the entropy function to
+ *     MAX_ENTROPY_NONCE_BYTES
+ *   - gets the nonce bytes along with the entropy input from the entropy
+ *     function
+ *   - automatically reseeds an instantitation after MAX_REQUESTS calls to
+ *     generate
+ *
+ *****************************************************************************/
+
+#include "ntru_crypto.h"
+#include "ntru_crypto_drbg.h"
+#include "ntru_crypto_hmac.h"
+
+/************************
+ * HMAC_DRBG parameters *
+ ************************/
+
+/* Note: Combined entropy input and nonce are a total of 2 * sec_strength_bits
+ * of randomness to provide quantum resistance */
+#define HMAC_DRBG_MAX_MIN_ENTROPY_NONCE_BYTES \
+	(2 * DRBG_MAX_SEC_STRENGTH_BITS) / 8
+#define HMAC_DRBG_MAX_ENTROPY_NONCE_BYTES \
+	HMAC_DRBG_MAX_MIN_ENTROPY_NONCE_BYTES *DRBG_MAX_BYTES_PER_BYTE_OF_ENTROPY
+#define HMAC_DRBG_MAX_REQUESTS 0xffffffff
+
+/*******************
+ * DRBG structures *
+ *******************/
+
+/* SHA256_HMAC_DRBG state structure */
+
+typedef struct {
+	uint32_t sec_strength;          /* security strength in bits */
+	uint32_t requests_left;         /* generation requests remaining
+                                            before reseeding */
+	ENTROPY_FN entropy_fn;          /* pointer to entropy function */
+	NTRU_CRYPTO_HMAC_CTX *hmac_ctx; /* pointer to HMAC context */
+	uint8_t V[33];                  /* md_len size internal state + 1 */
+} SHA256_HMAC_DRBG_STATE;
+
+/* External DRBG state structure */
+
+typedef struct {
+	RANDOM_BYTES_FN randombytesfn;
+} EXTERNAL_DRBG_STATE;
+
+/* DRBG state structure */
+
+typedef struct {
+	uint32_t handle;
+	DRBG_TYPE type;
+	void *state;
+} DRBG_STATE;
+
+/*************
+ * DRBG DATA *
+ *************/
+
+/* array of drbg states */
+
+static DRBG_STATE drbg_state[DRBG_MAX_INSTANTIATIONS];
+
+/******************************
+ * SHA256 HMAC_DRBG functions *
+ ******************************/
+
+/* sha256_hmac_drbg_update
+ *
+ * This routine is the SHA-256 HMAC_DRBG derivation function for
+ * instantiation, and reseeding, and it is used in generation as well.
+ * It updates the internal state.
+ *
+ * For instantiation, provided_data1 holds the entropy input and nonce;
+ * provided_data2 holds the optional personalization string.  Combined, this
+ * is the seed material.
+ *
+ * For reseeding, provided_data1 holds the entropy input;
+ * provided_data2 is NULL (because this implementation does not support
+ * additional input).
+ *
+ * For byte generation, both provided_data1 and provided_data2 are NULL.
+ *
+ * Returns DRBG_OK if successful.
+ * Returns HMAC errors if they occur.
+ */
+
+static uint32_t
+sha256_hmac_drbg_update(
+    SHA256_HMAC_DRBG_STATE *s,
+    uint8_t *key, /* md_len size array */
+    uint32_t md_len,
+    uint8_t const *provided_data1,
+    uint32_t provided_data1_bytes,
+    uint8_t const *provided_data2,
+    uint32_t provided_data2_bytes) {
+	uint32_t result;
+
+	/* new key = HMAC(K, V || 0x00 [|| provided data1 [|| provided data2]] */
+
+	if ((result = ntru_crypto_hmac_init(s->hmac_ctx)) != NTRU_CRYPTO_HMAC_OK) {
+		return result;
+	}
+
+	s->V[md_len] = 0x00;
+
+	if ((result = ntru_crypto_hmac_update(s->hmac_ctx, s->V, md_len + 1)) !=
+	    NTRU_CRYPTO_HMAC_OK) {
+		return result;
+	}
+
+	if (provided_data1) {
+		if ((result = ntru_crypto_hmac_update(s->hmac_ctx, provided_data1,
+		                                      provided_data1_bytes)) != NTRU_CRYPTO_HMAC_OK) {
+			return result;
+		}
+
+		if (provided_data2) {
+			if ((result = ntru_crypto_hmac_update(s->hmac_ctx, provided_data2,
+			                                      provided_data2_bytes)) != NTRU_CRYPTO_HMAC_OK) {
+				return result;
+			}
+		}
+	}
+
+	if ((result = ntru_crypto_hmac_final(s->hmac_ctx, key)) !=
+	    NTRU_CRYPTO_HMAC_OK) {
+		return result;
+	}
+
+	if ((result = ntru_crypto_hmac_set_key(s->hmac_ctx, key)) !=
+	    NTRU_CRYPTO_HMAC_OK) {
+		return result;
+	}
+
+	/* new V = HMAC(K, V) */
+
+	if ((result = ntru_crypto_hmac_init(s->hmac_ctx)) != NTRU_CRYPTO_HMAC_OK) {
+		return result;
+	}
+
+	if ((result = ntru_crypto_hmac_update(s->hmac_ctx, s->V, md_len)) !=
+	    NTRU_CRYPTO_HMAC_OK) {
+		return result;
+	}
+
+	if ((result = ntru_crypto_hmac_final(s->hmac_ctx, s->V)) !=
+	    NTRU_CRYPTO_HMAC_OK) {
+		return result;
+	}
+
+	/* if provided data exists, update K and V again */
+
+	if (provided_data1) {
+		/* new key = HMAC(K, V || 0x01 || provided data1 [|| provided data2] */
+
+		if ((result = ntru_crypto_hmac_init(s->hmac_ctx)) !=
+		    NTRU_CRYPTO_HMAC_OK) {
+			return result;
+		}
+
+		s->V[md_len] = 0x01;
+
+		if ((result = ntru_crypto_hmac_update(s->hmac_ctx, s->V, md_len + 1)) !=
+		    NTRU_CRYPTO_HMAC_OK) {
+			return result;
+		}
+
+		if ((result = ntru_crypto_hmac_update(s->hmac_ctx, provided_data1,
+		                                      provided_data1_bytes)) != NTRU_CRYPTO_HMAC_OK) {
+			return result;
+		}
+
+		if (provided_data2) {
+			if ((result = ntru_crypto_hmac_update(s->hmac_ctx, provided_data2,
+			                                      provided_data2_bytes)) != NTRU_CRYPTO_HMAC_OK) {
+				return result;
+			}
+		}
+
+		if ((result = ntru_crypto_hmac_final(s->hmac_ctx, key)) !=
+		    NTRU_CRYPTO_HMAC_OK) {
+			return result;
+		}
+
+		if ((result = ntru_crypto_hmac_set_key(s->hmac_ctx, key)) !=
+		    NTRU_CRYPTO_HMAC_OK) {
+			return result;
+		}
+
+		/* new V = HMAC(K, V) */
+
+		if ((result = ntru_crypto_hmac_init(s->hmac_ctx)) !=
+		    NTRU_CRYPTO_HMAC_OK) {
+			return result;
+		}
+
+		if ((result = ntru_crypto_hmac_update(s->hmac_ctx, s->V, md_len)) !=
+		    NTRU_CRYPTO_HMAC_OK) {
+			return result;
+		}
+
+		if ((result = ntru_crypto_hmac_final(s->hmac_ctx, s->V)) !=
+		    NTRU_CRYPTO_HMAC_OK) {
+			return result;
+		}
+	}
+
+	memset(key, 0, md_len);
+	DRBG_RET(DRBG_OK);
+}
+
+/* sha256_hmac_drbg_instantiate
+ *
+ * This routine allocates and initializes a SHA-256 HMAC_DRBG internal state. 
+ *
+ * Returns DRBG_OK if successful.
+ * Returns DRBG_BAD_LENGTH if the personalization string is too long.
+ * Returns DRBG_OUT_OF_MEMORY if the internal state cannot be allocated.
+ * Returns errors from HASH or SHA256 if those errors occur.
+ */
+
+static uint32_t
+sha256_hmac_drbg_instantiate(
+    uint32_t sec_strength_bits, /* strength to instantiate */
+    uint8_t const *pers_str,
+    uint32_t pers_str_bytes,
+    ENTROPY_FN entropy_fn,
+    SHA256_HMAC_DRBG_STATE **state) {
+	uint8_t entropy_nonce[HMAC_DRBG_MAX_ENTROPY_NONCE_BYTES];
+	uint32_t entropy_nonce_bytes;
+	uint32_t min_bytes_of_entropy;
+	uint8_t num_bytes_per_byte_of_entropy;
+	uint8_t key[32]; /* array of md_len size */
+	SHA256_HMAC_DRBG_STATE *s;
+	uint32_t result;
+	uint32_t i;
+
+	/* check arguments */
+
+	if (pers_str_bytes > HMAC_DRBG_MAX_PERS_STR_BYTES) {
+		DRBG_RET(DRBG_BAD_LENGTH);
+	}
+
+	/* calculate number of bytes needed for the entropy input and nonce
+     * for a SHA256_HMAC_DRBG, and get them from the entropy source
+     */
+
+	if (entropy_fn(GET_NUM_BYTES_PER_BYTE_OF_ENTROPY,
+	               &num_bytes_per_byte_of_entropy) == 0) {
+		DRBG_RET(DRBG_ENTROPY_FAIL);
+	}
+
+	if ((num_bytes_per_byte_of_entropy == 0) ||
+	    (num_bytes_per_byte_of_entropy >
+	     DRBG_MAX_BYTES_PER_BYTE_OF_ENTROPY)) {
+		DRBG_RET(DRBG_ENTROPY_FAIL);
+	}
+
+	min_bytes_of_entropy = (2 * sec_strength_bits) / 8;
+	entropy_nonce_bytes = min_bytes_of_entropy * num_bytes_per_byte_of_entropy;
+
+	for (i = 0; i < entropy_nonce_bytes; i++) {
+		if (entropy_fn(GET_BYTE_OF_ENTROPY, entropy_nonce + i) == 0) {
+			DRBG_RET(DRBG_ENTROPY_FAIL);
+		}
+	}
+
+	/* allocate SHA256_HMAC_DRBG state */
+	s = (SHA256_HMAC_DRBG_STATE *) MALLOC(sizeof(SHA256_HMAC_DRBG_STATE));
+	if (s == NULL) {
+		DRBG_RET(DRBG_OUT_OF_MEMORY);
+	}
+
+	/* allocate HMAC context */
+
+	memset(key, 0, sizeof(key));
+	if ((result = ntru_crypto_hmac_create_ctx(NTRU_CRYPTO_HASH_ALGID_SHA256,
+	                                          key, sizeof(key), &s->hmac_ctx)) != NTRU_CRYPTO_HMAC_OK) {
+		FREE(s);
+		return result;
+	}
+
+	/* init and update internal state */
+
+	memset(s->V, 0x01, sizeof(s->V));
+	if ((result = sha256_hmac_drbg_update(s, key, sizeof(key),
+	                                      entropy_nonce, entropy_nonce_bytes,
+	                                      pers_str, pers_str_bytes)) != DRBG_OK) {
+		(void) ntru_crypto_hmac_destroy_ctx(s->hmac_ctx);
+		memset(s->V, 0, sizeof(s->V));
+		FREE(s);
+		memset(entropy_nonce, 0, sizeof(entropy_nonce));
+		return result;
+	}
+
+	memset(entropy_nonce, 0, sizeof(entropy_nonce));
+
+	/* init instantiation parameters */
+
+	s->sec_strength = sec_strength_bits;
+	s->requests_left = HMAC_DRBG_MAX_REQUESTS;
+	s->entropy_fn = entropy_fn;
+	*state = s;
+
+	return result;
+}
+
+/* sha256_hmac_drbg_free
+ *
+ * This routine frees a SHA-256 HMAC_DRBG internal state.
+ *
+ * Returns DRBG_OK if successful.
+ * Returns DRBG_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ */
+
+static void
+sha256_hmac_drbg_free(
+    SHA256_HMAC_DRBG_STATE *s) {
+	if (s->hmac_ctx) {
+		(void) ntru_crypto_hmac_destroy_ctx(s->hmac_ctx);
+	}
+
+	memset(s->V, 0, sizeof(s->V));
+	s->sec_strength = 0;
+	s->requests_left = 0;
+	s->entropy_fn = NULL;
+	FREE(s);
+}
+
+/* sha256_hmac_drbg_reseed
+ *
+ * This function reseeds an instantiated SHA256_HMAC DRBG.
+ *
+ * Returns DRBG_OK if successful.
+ * Returns HMAC errors if they occur.
+ */
+
+static uint32_t
+sha256_hmac_drbg_reseed(
+    SHA256_HMAC_DRBG_STATE *s) {
+	uint8_t entropy[HMAC_DRBG_MAX_ENTROPY_NONCE_BYTES];
+	uint32_t entropy_bytes;
+	uint32_t min_bytes_of_entropy;
+	uint8_t num_bytes_per_byte_of_entropy;
+	uint8_t key[32]; /* array of md_len size for sha256_hmac_drbg_update() */
+	uint32_t result;
+	uint32_t i;
+
+	/* calculate number of bytes needed for the entropy input
+     * for a SHA256_HMAC_DRBG, and get them from the entropy source
+     */
+
+	if (s->entropy_fn(GET_NUM_BYTES_PER_BYTE_OF_ENTROPY,
+	                  &num_bytes_per_byte_of_entropy) == 0) {
+		DRBG_RET(DRBG_ENTROPY_FAIL);
+	}
+
+	if ((num_bytes_per_byte_of_entropy == 0) ||
+	    (num_bytes_per_byte_of_entropy >
+	     DRBG_MAX_BYTES_PER_BYTE_OF_ENTROPY)) {
+		DRBG_RET(DRBG_ENTROPY_FAIL);
+	}
+
+	/* note: factor of 2 here is probably unnecessary, but ensures quantum
+     * resistance even if internal state is leaked prior to reseed */
+	min_bytes_of_entropy = (2 * s->sec_strength) / 8;
+	entropy_bytes = min_bytes_of_entropy * num_bytes_per_byte_of_entropy;
+
+	for (i = 0; i < entropy_bytes; i++) {
+		if (s->entropy_fn(GET_BYTE_OF_ENTROPY, entropy + i) == 0) {
+			DRBG_RET(DRBG_ENTROPY_FAIL);
+		}
+	}
+
+	/* update internal state */
+
+	if ((result = sha256_hmac_drbg_update(s, key, sizeof(key),
+	                                      entropy, entropy_bytes, NULL, 0)) != DRBG_OK) {
+		return result;
+	}
+
+	/* reset request counter */
+
+	s->requests_left = HMAC_DRBG_MAX_REQUESTS;
+	DRBG_RET(DRBG_OK);
+}
+
+/* sha256_hmac_drbg_generate
+ *
+ * This routine generates pseudorandom bytes from a SHA256_HMAC DRBG.
+ *
+ * Returns DRBG_OK if successful.
+ * Returns DRBG_BAD_LENGTH if too many bytes are requested or the requested
+ *  security strength is too large.
+ * Returns HMAC errors if they occur.
+ */
+
+static uint32_t
+sha256_hmac_drbg_generate(
+    SHA256_HMAC_DRBG_STATE *s,
+    uint32_t sec_strength_bits,
+    uint32_t num_bytes,
+    uint8_t *out) {
+	uint8_t key[32]; /* array of md_len size for sha256_hmac_drbg_update() */
+	uint32_t result;
+
+	/* check if number of bytes requested exceeds the maximum allowed */
+
+	if (num_bytes > HMAC_DRBG_MAX_BYTES_PER_REQUEST) {
+		DRBG_RET(DRBG_BAD_LENGTH);
+	}
+
+	/* check if drbg has adequate security strength */
+
+	if (sec_strength_bits > s->sec_strength) {
+		DRBG_RET(DRBG_BAD_LENGTH);
+	}
+
+	/* check if max requests have been exceeded */
+
+	if (s->requests_left == 0) {
+		if ((result = sha256_hmac_drbg_reseed(s)) != DRBG_OK) {
+			return result;
+		}
+	}
+
+	/* generate pseudorandom bytes */
+
+	while (num_bytes > 0) {
+		/* generate md_len bytes = V = HMAC(K, V) */
+
+		if ((result = ntru_crypto_hmac_init(s->hmac_ctx)) !=
+		    NTRU_CRYPTO_HMAC_OK) {
+			return result;
+		}
+
+		if ((result = ntru_crypto_hmac_update(s->hmac_ctx, s->V,
+		                                      sizeof(key))) != NTRU_CRYPTO_HMAC_OK) {
+			return result;
+		}
+
+		if ((result = ntru_crypto_hmac_final(s->hmac_ctx, s->V)) !=
+		    NTRU_CRYPTO_HMAC_OK) {
+			return result;
+		}
+
+		/* copy generated bytes to output buffer */
+
+		if (num_bytes < sizeof(key)) {
+			memcpy(out, s->V, num_bytes);
+			num_bytes = 0;
+		} else {
+			memcpy(out, s->V, sizeof(key));
+			out += sizeof(key);
+			num_bytes -= sizeof(key);
+		}
+	}
+
+	/* update internal state */
+
+	if ((result = sha256_hmac_drbg_update(s, key, sizeof(key),
+	                                      NULL, 0, NULL, 0)) != DRBG_OK) {
+		return result;
+	}
+
+	s->requests_left--;
+
+	DRBG_RET(DRBG_OK);
+}
+
+/******************
+ * DRBG functions *
+ ******************/
+
+/* drbg_get_new_drbg
+ *
+ * This routine finds an uninstantiated drbg state and returns a pointer to it.
+ *
+ * Returns a pointer to an uninstantiated drbg state if found.
+ * Returns NULL if all drbg states are instantiated.
+ */
+
+static DRBG_STATE *
+drbg_get_new_drbg() {
+	int i;
+
+	for (i = 0; i < DRBG_MAX_INSTANTIATIONS; i++) {
+		if (drbg_state[i].state == NULL) {
+			return drbg_state + i;
+		}
+	}
+
+	return NULL;
+}
+
+/* drbg_get_drbg
+ *
+ * This routine finds an instantiated drbg state given its handle, and returns
+ * a pointer to it.
+ *
+ * Returns a pointer to the drbg state if found.
+ * Returns NULL if the drbg state is not found.
+ */
+
+static DRBG_STATE *
+drbg_get_drbg(
+    DRBG_HANDLE handle) /* in/out - drbg handle */
+{
+	int i;
+
+	for (i = 0; i < DRBG_MAX_INSTANTIATIONS; i++) {
+		if ((drbg_state[i].handle == handle) && drbg_state[i].state) {
+			return drbg_state + i;
+		}
+	}
+
+	return NULL;
+}
+
+/* drbg_get_new_handle
+ *
+ * This routine gets a new, unique 32-bit handle.
+ *
+ * Returns the new DRBG handle.
+ */
+
+static DRBG_HANDLE
+drbg_get_new_handle(void) {
+	DRBG_HANDLE h = 0;
+
+	/* ensure the new handle is unique:
+     *  if it already exists, increment it
+     */
+
+	while (drbg_get_drbg(h) != NULL) {
+		++h;
+	}
+
+	return h;
+}
+
+/********************
+ * Public functions *
+ ********************/
+
+/* ntru_crypto_drbg_instantiate
+ *
+ * This routine instantiates a drbg with the requested security strength.
+ * See ANS X9.82: Part 3-2007. This routine currently returns an instance
+ * of SHA-256 HMAC_DRBG for all requested security strengths.
+ *
+ * Returns DRBG_OK if successful.
+ * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if an argument pointer is NULL.
+ * Returns DRBG_ERROR_BASE + DRBG_BAD_LENGTH if the security strength requested
+ *  or the personalization string is too large.
+ * Returns DRBG_ERROR_BASE + DRBG_NOT_AVAILABLE if there are no instantiation
+ *  slots available
+ * Returns DRBG_ERROR_BASE + DRBG_OUT_OF_MEMORY if the internal state cannot be
+ *  allocated from the heap.
+ */
+
+uint32_t
+ntru_crypto_drbg_instantiate(
+    uint32_t sec_strength_bits, /*  in - requested sec strength in bits */
+    uint8_t const *pers_str,    /*  in - ptr to personalization string */
+    uint32_t pers_str_bytes,    /*  in - no. personalization str bytes */
+    ENTROPY_FN entropy_fn,      /*  in - pointer to entropy function */
+    DRBG_HANDLE *handle)        /* out - address for drbg handle */
+{
+	DRBG_STATE *drbg = NULL;
+	SHA256_HMAC_DRBG_STATE *state = NULL;
+	uint32_t result;
+
+	/* check arguments */
+
+	if ((!pers_str && pers_str_bytes) || !entropy_fn || !handle) {
+		DRBG_RET(DRBG_BAD_PARAMETER);
+	}
+
+	if (sec_strength_bits > DRBG_MAX_SEC_STRENGTH_BITS) {
+		DRBG_RET(DRBG_BAD_LENGTH);
+	}
+
+	if (pers_str && (pers_str_bytes == 0)) {
+		pers_str = NULL;
+	}
+
+	/* set security strength */
+
+	if (sec_strength_bits <= 112) {
+		sec_strength_bits = 112;
+	} else if (sec_strength_bits <= 128) {
+		sec_strength_bits = 128;
+	} else if (sec_strength_bits <= 192) {
+		sec_strength_bits = 192;
+	} else {
+		sec_strength_bits = 256;
+	}
+
+	/* get an uninstantiated drbg */
+
+	if ((drbg = drbg_get_new_drbg()) == NULL) {
+		DRBG_RET(DRBG_NOT_AVAILABLE);
+	}
+
+	/* init entropy function */
+
+	if (entropy_fn(INIT, NULL) == 0) {
+		DRBG_RET(DRBG_ENTROPY_FAIL);
+	}
+
+	/* instantiate a SHA-256 HMAC_DRBG */
+
+	if ((result = sha256_hmac_drbg_instantiate(sec_strength_bits,
+	                                           pers_str, pers_str_bytes,
+	                                           entropy_fn,
+	                                           &state)) != DRBG_OK) {
+		return result;
+	}
+
+	/* init drbg state */
+
+	drbg->handle = drbg_get_new_handle();
+	drbg->type = SHA256_HMAC_DRBG;
+	drbg->state = state;
+
+	/* return drbg handle */
+
+	*handle = drbg->handle;
+	DRBG_RET(DRBG_OK);
+}
+
+/* ntru_crypto_drbg_external_instantiate
+ *
+ * This routine instruments an external DRBG so that ntru_crypto routines
+ * can call it. randombytesfn must be of type
+ * uint32_t (randombytesfn*)(unsigned char *out, unsigned long long num_bytes);
+ * and should return DRBG_OK on success.
+ *
+ * Returns DRBG_OK if successful.
+ * Returns DRBG_ERROR_BASE + DRBG_NOT_AVAILABLE if there are no instantiation
+ *  slots available
+ * Returns DRBG_ERROR_BASE + DRBG_OUT_OF_MEMORY if the internal state cannot be
+ *  allocated from the heap.
+ */
+
+uint32_t
+ntru_crypto_drbg_external_instantiate(
+    RANDOM_BYTES_FN randombytesfn, /*  in - pointer to random bytes function */
+    DRBG_HANDLE *handle)           /* out - address for drbg handle */
+{
+	DRBG_STATE *drbg = NULL;
+	EXTERNAL_DRBG_STATE *state = NULL;
+
+	if (!randombytesfn || !handle) {
+		DRBG_RET(DRBG_BAD_PARAMETER);
+	}
+
+	/* get an uninstantiated drbg */
+
+	if ((drbg = drbg_get_new_drbg()) == NULL) {
+		DRBG_RET(DRBG_NOT_AVAILABLE);
+	}
+
+	/* instantiate an External DRBG */
+
+	state = (EXTERNAL_DRBG_STATE *) MALLOC(sizeof(EXTERNAL_DRBG_STATE));
+	if (state == NULL) {
+		DRBG_RET(DRBG_OUT_OF_MEMORY);
+	}
+
+	state->randombytesfn = randombytesfn;
+
+	/* init drbg state */
+
+	drbg->handle = drbg_get_new_handle();
+	drbg->type = EXTERNAL_DRBG;
+	drbg->state = state;
+
+	/* return drbg handle */
+
+	*handle = drbg->handle;
+
+	DRBG_RET(DRBG_OK);
+}
+
+/* ntru_crypto_drbg_uninstantiate
+ *
+ * This routine frees a drbg given its handle.
+ *
+ * Returns DRBG_OK if successful.
+ * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if handle is not valid.
+ */
+
+uint32_t
+ntru_crypto_drbg_uninstantiate(
+    DRBG_HANDLE handle) /* in - drbg handle */
+{
+	DRBG_STATE *drbg = NULL;
+
+	/* find the instantiated drbg */
+
+	if ((drbg = drbg_get_drbg(handle)) == NULL) {
+		DRBG_RET(DRBG_BAD_PARAMETER);
+	}
+
+	/* zero and free drbg state */
+
+	if (drbg->state) {
+		switch (drbg->type) {
+		case EXTERNAL_DRBG:
+			FREE(drbg->state);
+			break;
+		case SHA256_HMAC_DRBG:
+			sha256_hmac_drbg_free((SHA256_HMAC_DRBG_STATE *) drbg->state);
+			break;
+		}
+		drbg->state = NULL;
+	}
+
+	drbg->handle = 0;
+	DRBG_RET(DRBG_OK);
+}
+
+/* ntru_crypto_drbg_reseed
+ *
+ * This routine reseeds an instantiated drbg.
+ * See ANS X9.82: Part 3-2007.
+ *
+ * Returns DRBG_OK if successful.
+ * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if handle is not valid.
+ * Returns HMAC errors if they occur.
+ */
+
+uint32_t
+ntru_crypto_drbg_reseed(
+    DRBG_HANDLE handle) /* in - drbg handle */
+{
+	DRBG_STATE *drbg = NULL;
+
+	/* find the instantiated drbg */
+
+	if ((drbg = drbg_get_drbg(handle)) == NULL) {
+		DRBG_RET(DRBG_BAD_PARAMETER);
+	}
+
+	if (drbg->type == EXTERNAL_DRBG) {
+		DRBG_RET(DRBG_BAD_PARAMETER);
+	}
+
+	/* reseed the SHA-256 HMAC_DRBG */
+
+	return sha256_hmac_drbg_reseed((SHA256_HMAC_DRBG_STATE *) drbg->state);
+}
+
+/* ntru_crypto_drbg_generate
+ *
+ * This routine generates pseudorandom bytes using an instantiated drbg.
+ * If the maximum number of requests has been reached, reseeding will occur.
+ * See ANS X9.82: Part 3-2007.
+ *
+ * Returns DRBG_OK if successful.
+ * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if handle is not valid or if
+ *  an argument pointer is NULL.
+ * Returns DRBG_ERROR_BASE + DRBG_BAD_LENGTH if the security strength requested
+ *  is too large or the number of bytes requested is zero or too large.
+ * Returns HMAC errors if they occur.
+ */
+
+uint32_t
+ntru_crypto_drbg_generate(
+    DRBG_HANDLE handle,         /*  in - drbg handle */
+    uint32_t sec_strength_bits, /*  in - requested sec strength in bits */
+    uint32_t num_bytes,         /*  in - number of octets to generate */
+    uint8_t *out)               /* out - address for generated octets */
+{
+	DRBG_STATE *drbg = NULL;
+
+	/* find the instantiated drbg */
+
+	if ((drbg = drbg_get_drbg(handle)) == NULL) {
+		DRBG_RET(DRBG_BAD_PARAMETER);
+	}
+
+	/* check arguments */
+
+	if (!out) {
+		DRBG_RET(DRBG_BAD_PARAMETER);
+	}
+
+	if (num_bytes == 0) {
+		DRBG_RET(DRBG_BAD_LENGTH);
+	}
+
+	/* generate pseudorandom output from the SHA256_HMAC_DRBG */
+
+	switch (drbg->type) {
+	case EXTERNAL_DRBG:
+		return ((EXTERNAL_DRBG_STATE *) drbg->state)->randombytesfn(out, num_bytes);
+	case SHA256_HMAC_DRBG:
+		return sha256_hmac_drbg_generate(
+		    (SHA256_HMAC_DRBG_STATE *) drbg->state,
+		    sec_strength_bits, num_bytes, out);
+	default:
+		DRBG_RET(DRBG_BAD_PARAMETER);
+	}
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_drbg.h b/crypt/liboqs/kex_ntru/ntru_crypto_drbg.h
new file mode 100644
index 0000000000000000000000000000000000000000..9fea19fe754e513f686edc500eb5ca4bf8acf436
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_drbg.h
@@ -0,0 +1,208 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File:  ntru_crypto_drbg.h
+ *
+ * Contents: Public header file for ntru_crypto_drbg.c.
+ *
+ *****************************************************************************/
+
+#ifndef NTRU_CRYPTO_DRBG_H
+#define NTRU_CRYPTO_DRBG_H
+
+#include "ntru_crypto_platform.h"
+#include "ntru_crypto_error.h"
+
+#if !defined(NTRUCALL)
+#if !defined(WIN32) || defined(NTRUCRYPTO_STATIC)
+/* Linux, or a Win32 static library */
+#define NTRUCALL extern uint32_t
+#elif defined(NTRUCRYPTO_EXPORTS)
+/* Win32 DLL build */
+#define NTRUCALL extern __declspec(dllexport) uint32_t
+#else
+/* Win32 DLL import */
+#define NTRUCALL extern __declspec(dllimport) uint32_t
+#endif
+#endif /* NTRUCALL */
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* __cplusplus */
+
+/*******************
+ * DRBG parameters *
+ *******************/
+
+#if !defined(DRBG_MAX_INSTANTIATIONS)
+#define DRBG_MAX_INSTANTIATIONS 4
+#endif
+#define DRBG_MAX_SEC_STRENGTH_BITS 256
+#define DRBG_MAX_BYTES_PER_BYTE_OF_ENTROPY 8
+
+/************************
+ * HMAC_DRBG parameters *
+ ************************/
+
+#define HMAC_DRBG_MAX_PERS_STR_BYTES 32
+#define HMAC_DRBG_MAX_BYTES_PER_REQUEST 1024
+
+/********************
+ * type definitions *
+ ********************/
+
+typedef uint32_t DRBG_HANDLE; /* drbg handle */
+
+typedef enum { /* drbg types */
+	           EXTERNAL_DRBG,
+	           SHA256_HMAC_DRBG,
+} DRBG_TYPE;
+
+typedef enum { /* entropy-function commands */
+	           GET_NUM_BYTES_PER_BYTE_OF_ENTROPY = 0,
+	           INIT,
+	           GET_BYTE_OF_ENTROPY,
+} ENTROPY_CMD;
+typedef uint8_t (*ENTROPY_FN)(                 /* get entropy function */
+                              ENTROPY_CMD cmd, /* command */
+                              uint8_t *out);   /* address for output */
+
+/* Type for external PRNG functions. Must return DRBG_OK on success */
+typedef uint32_t (*RANDOM_BYTES_FN)(                     /* random bytes function */
+                                    uint8_t *out,        /* output buffer */
+                                    uint32_t num_bytes); /* number of bytes */
+
+/***************
+ * error codes *
+ ***************/
+
+#define DRBG_OK 0x00000000            /* no errors */
+#define DRBG_OUT_OF_MEMORY 0x00000001 /* can't allocate memory */
+#define DRBG_BAD_PARAMETER 0x00000002 /* null pointer */
+#define DRBG_BAD_LENGTH 0x00000003    /* invalid no. of bytes */
+#define DRBG_NOT_AVAILABLE 0x00000004 /* no instantiation slot available */
+#define DRBG_ENTROPY_FAIL 0x00000005  /* entropy function failure */
+
+/***************
+ * error macro *
+ ***************/
+
+#define DRBG_RESULT(r) ((uint32_t)((r) ? DRBG_ERROR_BASE + (r) : (r)))
+#define DRBG_RET(r) return DRBG_RESULT(r);
+
+/*************************
+ * function declarations *
+ *************************/
+
+/* ntru_crypto_drbg_instantiate
+ *
+ * This routine instantiates a drbg with the requested security strength.
+ * See ANS X9.82: Part 3-2007.
+ *
+ * Returns DRBG_OK if successful.
+ * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if an argument pointer is NULL.
+ * Returns DRBG_ERROR_BASE + DRBG_BAD_LENGTH if the security strength requested
+ *  or the personalization string is too large.
+ * Returns DRBG_ERROR_BASE + DRBG_OUT_OF_MEMORY if the internal state cannot be
+ *  allocated from the heap.
+ */
+
+NTRUCALL
+ntru_crypto_drbg_instantiate(
+    uint32_t sec_strength_bits, /*  in - requested sec strength in bits */
+    uint8_t const *pers_str,    /*  in - ptr to personalization string */
+    uint32_t pers_str_bytes,    /*  in - no. personalization str bytes */
+    ENTROPY_FN entropy_fn,      /*  in - pointer to entropy function */
+    DRBG_HANDLE *handle);       /* out - address for drbg handle */
+
+/* ntru_crypto_drbg_external_instantiate
+ *
+ * This routine instruments an external DRBG so that ntru_crypto routines
+ * can call it. randombytesfn must be of type
+ * uint32_t (randombytesfn*)(unsigned char *out, unsigned long long num_bytes);
+ * and should return DRBG_OK on success.
+ *
+ * Returns DRBG_OK if successful.
+ * Returns DRBG_ERROR_BASE + DRBG_NOT_AVAILABLE if there are no instantiation
+ *  slots available
+ * Returns DRBG_ERROR_BASE + DRBG_OUT_OF_MEMORY if the internal state cannot be
+ *  allocated from the heap.
+ */
+
+NTRUCALL
+ntru_crypto_drbg_external_instantiate(
+    RANDOM_BYTES_FN randombytesfn, /*  in - pointer to random bytes function */
+    DRBG_HANDLE *handle);          /* out - address for drbg handle */
+
+/* ntru_crypto_drbg_uninstantiate
+ *
+ * This routine frees a drbg given its handle.
+ *
+ * Returns DRBG_OK if successful.
+ * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if handle is not valid.
+ */
+
+NTRUCALL
+ntru_crypto_drbg_uninstantiate(
+    DRBG_HANDLE handle); /* in - drbg handle */
+
+/* ntru_crypto_drbg_reseed
+ *
+ * This routine reseeds an instantiated drbg.
+ * See ANS X9.82: Part 3-2007.
+ *
+ * Returns DRBG_OK if successful.
+ * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if handle is not valid.
+ * Returns NTRU_CRYPTO_HMAC errors if they occur.
+ */
+
+NTRUCALL
+ntru_crypto_drbg_reseed(
+    DRBG_HANDLE handle); /* in - drbg handle */
+
+/* ntru_crypto_drbg_generate
+ *
+ * This routine generates pseudorandom bytes using an instantiated drbg.
+ * If the maximum number of requests has been reached, reseeding will occur.
+ * See ANS X9.82: Part 3-2007.
+ *
+ * Returns DRBG_OK if successful.
+ * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if handle is not valid or if
+ *  an argument pointer is NULL.
+ * Returns DRBG_ERROR_BASE + DRBG_BAD_LENGTH if the security strength requested
+ *  is too large or the number of bytes requested is zero or too large.
+ * Returns NTRU_CRYPTO_HMAC errors if they occur.
+ */
+
+NTRUCALL
+ntru_crypto_drbg_generate(
+    DRBG_HANDLE handle,         /*  in - drbg handle */
+    uint32_t sec_strength_bits, /*  in - requested sec strength in bits */
+    uint32_t num_bytes,         /*  in - number of octets to generate */
+    uint8_t *out);              /* out - address for generated octets */
+
+#if defined(__cplusplus)
+}
+#endif /* __cplusplus */
+
+#endif /* NTRU_CRYPTO_DRBG_H */
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_error.h b/crypt/liboqs/kex_ntru/ntru_crypto_error.h
new file mode 100644
index 0000000000000000000000000000000000000000..9252542949068cf9f642d6e6a9eea252e3233113
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_error.h
@@ -0,0 +1,42 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File:  ntru_crypto_error.h
+ *
+ * Contents: Contains base values for crypto error codes.
+ *
+ *****************************************************************************/
+
+#ifndef NTRU_CRYPTO_ERROR_H
+#define NTRU_CRYPTO_ERROR_H
+
+/* define base values for crypto error codes */
+
+#define HASH_ERROR_BASE ((uint32_t) 0x00000100)
+#define HMAC_ERROR_BASE ((uint32_t) 0x00000200)
+#define SHA_ERROR_BASE ((uint32_t) 0x00000400)
+#define DRBG_ERROR_BASE ((uint32_t) 0x00000a00)
+#define NTRU_ERROR_BASE ((uint32_t) 0x00003000)
+#define MGF1_ERROR_BASE ((uint32_t) 0x00004100)
+
+#endif /* NTRU_CRYPTO_ERROR_H */
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_hash.c b/crypt/liboqs/kex_ntru/ntru_crypto_hash.c
new file mode 100644
index 0000000000000000000000000000000000000000..549e2fac37b5593bb235d21088bb12171ae86c21
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_hash.c
@@ -0,0 +1,307 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_hash.c
+ *
+ * Contents: Routines implementing the hash object abstraction.
+ *
+ *****************************************************************************/
+
+#include "ntru_crypto.h"
+#include "ntru_crypto_hash.h"
+
+typedef uint32_t (*NTRU_CRYPTO_HASH_INIT_FN)(
+    void *c);
+typedef uint32_t (*NTRU_CRYPTO_HASH_UPDATE_FN)(
+    void *c,
+    void const *data,
+    uint32_t len);
+typedef uint32_t (*NTRU_CRYPTO_HASH_FINAL_FN)(
+    void *c,
+    void *md);
+typedef uint32_t (*NTRU_CRYPTO_HASH_DIGEST_FN)(
+    void const *data,
+    uint32_t len,
+    void *md);
+
+typedef struct _NTRU_CRYPTO_HASH_ALG_PARAMS {
+	uint8_t algid;
+	uint16_t block_length;
+	uint16_t digest_length;
+	NTRU_CRYPTO_HASH_INIT_FN init;
+	NTRU_CRYPTO_HASH_UPDATE_FN update;
+	NTRU_CRYPTO_HASH_FINAL_FN final;
+	NTRU_CRYPTO_HASH_DIGEST_FN digest;
+} NTRU_CRYPTO_HASH_ALG_PARAMS;
+
+static NTRU_CRYPTO_HASH_ALG_PARAMS const algs_params[] = {
+    {
+        NTRU_CRYPTO_HASH_ALGID_SHA1,
+        SHA_1_BLK_LEN,
+        SHA_1_MD_LEN,
+        (NTRU_CRYPTO_HASH_INIT_FN) SHA_1_INIT_FN,
+        (NTRU_CRYPTO_HASH_UPDATE_FN) SHA_1_UPDATE_FN,
+        (NTRU_CRYPTO_HASH_FINAL_FN) SHA_1_FINAL_FN,
+        (NTRU_CRYPTO_HASH_DIGEST_FN) SHA_1_DIGEST_FN,
+    },
+    {
+        NTRU_CRYPTO_HASH_ALGID_SHA256,
+        SHA_256_BLK_LEN,
+        SHA_256_MD_LEN,
+        (NTRU_CRYPTO_HASH_INIT_FN) SHA_256_INIT_FN,
+        (NTRU_CRYPTO_HASH_UPDATE_FN) SHA_256_UPDATE_FN,
+        (NTRU_CRYPTO_HASH_FINAL_FN) SHA_256_FINAL_FN,
+        (NTRU_CRYPTO_HASH_DIGEST_FN) SHA_256_DIGEST_FN,
+    },
+};
+
+static int const numalgs = (sizeof(algs_params) / sizeof(algs_params[0]));
+
+/* get_alg_params
+ *
+ * Return a pointer to the hash algorithm parameters for the hash algorithm
+ * specified, by looking for algid in the global algs_params table.
+ * If not found, return NULL.
+ */
+static NTRU_CRYPTO_HASH_ALG_PARAMS const *
+get_alg_params(
+    NTRU_CRYPTO_HASH_ALGID algid) /*  in - the hash algorithm to find */
+{
+	int i;
+
+	for (i = 0; i < numalgs; i++) {
+		if (algs_params[i].algid == algid) {
+			return &algs_params[i];
+		}
+	}
+
+	return NULL;
+}
+
+/* ntru_crypto_hash_set_alg
+ *
+ * Sets the hash algorithm for the hash context.  This must be called before
+ * any calls to ntru_crypto_hash_block_length(),
+ * ntru_crypto_hash_digest_length(), or ntru_crypto_hash_init() are made.
+ *
+ * Returns NTRU_CRYPTO_HASH_OK on success.
+ * Returns NTRU_CRYPTO_HASH_BAD_ALG if the specified algorithm is not supported.
+ */
+
+uint32_t
+ntru_crypto_hash_set_alg(
+    NTRU_CRYPTO_HASH_ALGID algid, /*      in - hash algorithm to be used */
+    NTRU_CRYPTO_HASH_CTX *c)      /*  in/out - pointer to the hash context */
+{
+	if (!c) {
+		HASH_RET(NTRU_CRYPTO_HASH_BAD_PARAMETER);
+	}
+
+	c->alg_params = get_alg_params(algid);
+
+	if (!c->alg_params) {
+		HASH_RET(NTRU_CRYPTO_HASH_BAD_ALG);
+	}
+
+	HASH_RET(NTRU_CRYPTO_HASH_OK);
+}
+
+/* ntru_crypto_hash_block_length
+ *
+ * Gets the number of bytes in an input block for the hash algorithm
+ * specified in the hash context.  The hash algorithm must have been set
+ * in the hash context with a call to ntru_crypto_hash_set_alg() prior to
+ * calling this function.
+ *
+ * Returns NTRU_CRYPTO_HASH_OK on success.
+ * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set.
+ */
+
+uint32_t
+ntru_crypto_hash_block_length(
+    NTRU_CRYPTO_HASH_CTX *c, /*  in - pointer to the hash context */
+    uint16_t *blk_len)       /* out - address for block length in bytes */
+{
+	if (!c || !blk_len) {
+		HASH_RET(NTRU_CRYPTO_HASH_BAD_PARAMETER);
+	}
+
+	if (!c->alg_params) {
+		HASH_RET(NTRU_CRYPTO_HASH_BAD_ALG);
+	}
+
+	*blk_len = c->alg_params->block_length;
+	HASH_RET(NTRU_CRYPTO_HASH_OK);
+}
+
+/* ntru_crypto_hash_digest_length
+ *
+ * Gets the number of bytes needed to hold the message digest for the
+ * hash algorithm specified in the hash context.  The algorithm must have
+ * been set in the hash context with a call to ntru_crypto_hash_set_alg() prior
+ * to calling this function.
+ *
+ * Returns NTRU_CRYPTO_HASH_OK on success.
+ * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set.
+ */
+
+uint32_t
+ntru_crypto_hash_digest_length(
+    NTRU_CRYPTO_HASH_CTX const *c, /*  in - pointer to the hash context */
+    uint16_t *md_len)              /* out - addr for digest length in bytes */
+{
+	if (!c || !md_len) {
+		HASH_RET(NTRU_CRYPTO_HASH_BAD_PARAMETER);
+	}
+
+	if (!c->alg_params) {
+		HASH_RET(NTRU_CRYPTO_HASH_BAD_ALG);
+	}
+
+	*md_len = c->alg_params->digest_length;
+	HASH_RET(NTRU_CRYPTO_HASH_OK);
+}
+
+/* ntru_crypto_hash_init
+ *
+ * This routine performs standard initialization of the hash state.
+ *
+ * Returns NTRU_CRYPTO_HASH_OK on success.
+ * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context.
+ * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set.
+ */
+
+uint32_t
+ntru_crypto_hash_init(
+    NTRU_CRYPTO_HASH_CTX *c) /* in/out - pointer to hash context */
+{
+	if (!c) {
+		HASH_RET(NTRU_CRYPTO_HASH_BAD_PARAMETER);
+	}
+
+	if (!c->alg_params) {
+		HASH_RET(NTRU_CRYPTO_HASH_BAD_ALG);
+	}
+
+	return c->alg_params->init(&c->alg_ctx);
+}
+
+/* ntru_crypto_hash_update
+ *
+ * This routine processes input data and updates the hash calculation.
+ *
+ * Returns NTRU_CRYPTO_HASH_OK on success.
+ * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context.
+ * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ * Returns NTRU_CRYPTO_HASH_OVERFLOW if too much text has been fed to the
+ *         hash algorithm. The size limit is dependent on the hash algorithm,
+ *         and not all algorithms have this limit.
+ * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set.
+ */
+
+uint32_t
+ntru_crypto_hash_update(
+    NTRU_CRYPTO_HASH_CTX *c, /* in/out - pointer to hash context */
+    uint8_t const *data,     /*     in - pointer to input data */
+    uint32_t data_len)       /*     in - number of bytes of input data */
+{
+	if (!c || (data_len && !data)) {
+		HASH_RET(NTRU_CRYPTO_HASH_BAD_PARAMETER);
+	}
+
+	if (!c->alg_params) {
+		HASH_RET(NTRU_CRYPTO_HASH_BAD_ALG);
+	}
+
+	return c->alg_params->update(&c->alg_ctx, data, data_len);
+}
+
+/* ntru_crypto_hash_final
+ *
+ * This routine completes the hash calculation and returns the message digest.
+ * 
+ * Returns NTRU_CRYPTO_HASH_OK on success.
+ * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context.
+ * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set.
+ */
+
+uint32_t
+ntru_crypto_hash_final(
+    NTRU_CRYPTO_HASH_CTX *c, /* in/out - pointer to hash context */
+    uint8_t *md)             /*   out  - address for message digest */
+{
+	if (!c || !md) {
+		HASH_RET(NTRU_CRYPTO_HASH_BAD_PARAMETER);
+	}
+
+	if (!c->alg_params) {
+		HASH_RET(NTRU_CRYPTO_HASH_BAD_ALG);
+	}
+
+	return c->alg_params->final(&c->alg_ctx, md);
+}
+
+/* ntru_crypto_hash_digest
+ *
+ * This routine computes a message digest. It is assumed that the
+ * output buffer md is large enough to hold the output (see
+ * ntru_crypto_hash_digest_length)
+ *
+ * Returns NTRU_CRYPTO_HASH_OK on success.
+ * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context.
+ * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ * Returns NTRU_CRYPTO_HASH_OVERFLOW if too much text has been fed to the
+ *         hash algorithm. The size limit is dependent on the hash algorithm,
+ *         and not all algorithms have this limit.
+ * Returns NTRU_CRYPTO_HASH_BAD_ALG if the specified algorithm is not supported.
+ */
+
+uint32_t
+ntru_crypto_hash_digest(
+    NTRU_CRYPTO_HASH_ALGID algid, /*  in - the hash algorithm to use */
+    uint8_t const *data,          /*  in - pointer to input data */
+    uint32_t data_len,            /*  in - number of bytes of input data */
+    uint8_t *md)                  /* out - address for message digest */
+{
+	NTRU_CRYPTO_HASH_ALG_PARAMS const *alg_params = get_alg_params(algid);
+
+	if (!alg_params) {
+		HASH_RET(NTRU_CRYPTO_HASH_BAD_ALG);
+	}
+
+	if ((data_len && !data) || !md) {
+		HASH_RET(NTRU_CRYPTO_HASH_BAD_PARAMETER);
+	}
+
+	return alg_params->digest(data, data_len, md);
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_hash.h b/crypt/liboqs/kex_ntru/ntru_crypto_hash.h
new file mode 100644
index 0000000000000000000000000000000000000000..063493d7d5f325748b41db8dbba3985f1c682729
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_hash.h
@@ -0,0 +1,201 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_hash.h
+ *
+ * Contents: Definitions and declarations for the hash object abstraction.
+ *
+ *****************************************************************************/
+
+#ifndef NTRU_CRYPTO_HASH_H
+#define NTRU_CRYPTO_HASH_H
+
+#include "ntru_crypto_platform.h"
+#include "ntru_crypto_error.h"
+#include "ntru_crypto_hash_basics.h"
+#include "ntru_crypto_sha1.h"
+#include "ntru_crypto_sha256.h"
+
+/***************
+ * error macro *
+ ***************/
+
+#define HASH_RESULT(r) ((uint32_t)((r) ? HASH_ERROR_BASE + (r) : (r)))
+#define HASH_RET(r) return HASH_RESULT(r);
+
+/*************************
+ * structure definitions *
+ *************************/
+
+/* _NTRU_CRYPTO_HASH_ALG_PARAMS
+ *
+ * An opaque forward declaration for a private structure used
+ * internally by the hash object interface.
+ */
+
+struct _NTRU_CRYPTO_HASH_ALG_PARAMS;
+
+/* NTRU_CRYPTO_HASH_CTX
+ *
+ * Hash object context information.
+ */
+
+typedef struct {
+	struct _NTRU_CRYPTO_HASH_ALG_PARAMS const *alg_params;
+	union {
+		NTRU_CRYPTO_SHA1_CTX sha1;
+		NTRU_CRYPTO_SHA2_CTX sha256;
+	} alg_ctx;
+} NTRU_CRYPTO_HASH_CTX;
+
+/*************************
+ * function declarations *
+ *************************/
+
+/* ntru_crypto_hash_set_alg
+ *
+ * Sets the hash algorithm for the hash context.  This must be called before
+ * any calls to crypto_hash_block_length(), crypto_hash_digest_length(), or
+ * crypto_hash_init() are made.
+ *
+ * Returns NTRU_CRYPTO_HASH_OK on success.
+ * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ * Returns NTRU_CRYPTO_HASH_BAD_ALG if the specified algorithm is not supported.
+ */
+
+extern uint32_t
+ntru_crypto_hash_set_alg(
+    NTRU_CRYPTO_HASH_ALGID algid, /*      in - hash algoirithm to be used */
+    NTRU_CRYPTO_HASH_CTX *c);     /* in/out - pointer to the hash context */
+
+/* ntru_crypto_hash_block_length
+ *
+ * Gets the number of bytes in an input block for the hash algorithm
+ * specified in the hash context.  The hash algorithm must have been set
+ * in the hash context with a call to crypto_hash_set_alg() prior to
+ * calling this function.
+ *
+ * Returns NTRU_CRYPTO_HASH_OK on success.
+ * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set.
+ */
+
+extern uint32_t
+ntru_crypto_hash_block_length(
+    NTRU_CRYPTO_HASH_CTX *c, /*  in - pointer to the hash context */
+    uint16_t *blk_len);      /* out - address for block length in bytes */
+
+/* ntru_crypto_hash_digest_length
+ *
+ * Gets the number of bytes needed to hold the message digest for the
+ * hash algorithm specified in the hash context.  The algorithm must have
+ * been set in the hash context with a call to crypto_hash_set_alg() prior
+ * to calling this function.
+ *
+ * Returns NTRU_CRYPTO_HASH_OK on success.
+ * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set.
+ */
+
+extern uint32_t
+ntru_crypto_hash_digest_length(
+    NTRU_CRYPTO_HASH_CTX const *c, /*  in - pointer to the hash context */
+    uint16_t *md_len);             /*out - addr for digest length in bytes*/
+
+/* ntru_crypto_hash_init
+ *
+ * This routine initializes the hash state.
+ *
+ * Returns NTRU_CRYPTO_HASH_OK on success.
+ * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context.
+ * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set.
+ */
+
+extern uint32_t
+ntru_crypto_hash_init(
+    NTRU_CRYPTO_HASH_CTX *c); /* in/out - pointer to hash context */
+
+/* ntru_crypto_hash_update
+ *
+ * This routine processes input data and updates the hash calculation.
+ *
+ * Returns NTRU_CRYPTO_HASH_OK on success.
+ * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context.
+ * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ * Returns NTRU_CRYPTO_HASH_OVERFLOW if too much text has been fed to the
+ *         hash algorithm. The size limit is dependent on the hash algorithm,
+ *         and not all algorithms have this limit.
+ * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set.
+ */
+
+extern uint32_t
+ntru_crypto_hash_update(
+    NTRU_CRYPTO_HASH_CTX *c, /* in/out - pointer to hash context */
+    uint8_t const *data,     /*     in - pointer to input data */
+    uint32_t data_len);      /*     in - number of bytes of input data */
+
+/* ntru_crypto_hash_final
+ *
+ * This routine completes the hash calculation and returns the message digest.
+ * 
+ * Returns NTRU_CRYPTO_HASH_OK on success.
+ * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context.
+ * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ * Returns NTRU_CRYPTO_HASH_BAD_ALG if the algorithm has not been set.
+ */
+
+extern uint32_t
+ntru_crypto_hash_final(
+    NTRU_CRYPTO_HASH_CTX *c, /* in/out - pointer to hash context */
+    uint8_t *md);            /*   out  - address for message digest */
+
+/* ntru_crypto_hash_digest
+ *
+ * This routine computes a message digest. It is assumed that the
+ * output buffer md is large enough to hold the output (see
+ * crypto_hash_digest_length)
+ *
+ * Returns NTRU_CRYPTO_HASH_OK on success.
+ * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context.
+ * Returns NTRU_CRYPTO_HASH_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ * Returns NTRU_CRYPTO_HASH_OVERFLOW if too much text has been fed to the
+ *         hash algorithm. The size limit is dependent on the hash algorithm,
+ *         and not all algorithms have this limit.
+ * Returns NTRU_CRYPTO_HASH_BAD_ALG if the specified algorithm is not supported.
+ */
+
+extern uint32_t
+ntru_crypto_hash_digest(
+    NTRU_CRYPTO_HASH_ALGID algid, /*  in - the hash algorithm to use */
+    uint8_t const *data,          /*  in - pointer to input data */
+    uint32_t data_len,            /*  in - number of bytes of input data */
+    uint8_t *md);                 /* out - address for message digest */
+
+#endif /* NTRU_CRYPTO_HASH_H */
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_hash_basics.h b/crypt/liboqs/kex_ntru/ntru_crypto_hash_basics.h
new file mode 100644
index 0000000000000000000000000000000000000000..703d463eba73aeeb2b8134cc1da55602ceac0f01
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_hash_basics.h
@@ -0,0 +1,67 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_hash_basics.h
+ *
+ * Contents: Common definitions for all hash algorithms.
+ *
+ *****************************************************************************/
+
+#ifndef NTRU_CRYPTO_HASH_BASICS_H
+#define NTRU_CRYPTO_HASH_BASICS_H
+
+#include "ntru_crypto_platform.h"
+
+/**************
+ * algorithms *
+ **************/
+
+typedef enum {
+	NTRU_CRYPTO_HASH_ALGID_NONE = 0,
+	NTRU_CRYPTO_HASH_ALGID_SHA1,
+	NTRU_CRYPTO_HASH_ALGID_SHA256,
+} NTRU_CRYPTO_HASH_ALGID;
+
+/***************
+ * error codes *
+ ***************/
+
+#define NTRU_CRYPTO_HASH_OK ((uint32_t) 0x00)
+#define NTRU_CRYPTO_HASH_FAIL ((uint32_t) 0x01)
+#define NTRU_CRYPTO_HASH_BAD_PARAMETER ((uint32_t) 0x02)
+#define NTRU_CRYPTO_HASH_OVERFLOW ((uint32_t) 0x03)
+#define NTRU_CRYPTO_HASH_BAD_ALG ((uint32_t) 0x20)
+#define NTRU_CRYPTO_HASH_OUT_OF_MEMORY ((uint32_t) 0x21)
+
+/* For backward-compatibility */
+typedef uint32_t NTRU_CRYPTO_HASH_ERROR;
+
+/*********
+ * flags *
+ *********/
+
+#define HASH_DATA_ONLY 0
+#define HASH_INIT (1 << 0)
+#define HASH_FINISH (1 << 1)
+
+#endif /* NTRU_CRYPTO_HASH_BASICS_H */
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_hmac.c b/crypt/liboqs/kex_ntru/ntru_crypto_hmac.c
new file mode 100644
index 0000000000000000000000000000000000000000..b307df9a33d80deda7e3b5acf7bbf68d31afc877
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_hmac.c
@@ -0,0 +1,319 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_hmac.c
+ *
+ * Contents: Routines implementing the HMAC hash calculation.
+ *
+ *****************************************************************************/
+
+#include "ntru_crypto.h"
+#include "ntru_crypto_hmac.h"
+
+/* HMAC context */
+
+struct _NTRU_CRYPTO_HMAC_CTX {
+	NTRU_CRYPTO_HASH_CTX hash_ctx;
+	uint8_t *k0;
+	uint16_t blk_len;
+	uint16_t md_len;
+};
+
+/* ntru_crypto_hmac_create_ctx
+ *
+ * This routine creates an HMAC context, setting the hash algorithm and
+ * the key to be used.
+ *
+ * Returns NTRU_CRYPTO_HMAC_OK if successful.
+ * Returns NTRU_CRYPTO_HMAC_BAD_ALG if the specified algorithm is not supported.
+ * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ * Returns NTRU_CRYPTO_HMAC_OUT_OF_MEMORY if memory cannot be allocated.
+ */
+
+uint32_t
+ntru_crypto_hmac_create_ctx(
+    NTRU_CRYPTO_HASH_ALGID algid, /*  in - the hash algorithm to be used */
+    uint8_t const *key,           /*  in - pointer to the HMAC key */
+    uint32_t key_len,             /*  in - number of bytes in HMAC key */
+    NTRU_CRYPTO_HMAC_CTX **c)     /* out - address for pointer to HMAC
+                                               context */
+{
+	NTRU_CRYPTO_HMAC_CTX *ctx = NULL;
+	uint32_t result;
+
+	/* check parameters */
+
+	if (!c || !key) {
+		HMAC_RET(NTRU_CRYPTO_HMAC_BAD_PARAMETER);
+	}
+
+	*c = NULL;
+
+	/* allocate memory for an HMAC context */
+	if (NULL == (ctx = (NTRU_CRYPTO_HMAC_CTX *) MALLOC(sizeof(NTRU_CRYPTO_HMAC_CTX)))) {
+		HMAC_RET(NTRU_CRYPTO_HMAC_OUT_OF_MEMORY);
+	}
+
+	/* set the algorithm */
+
+	if ((result = ntru_crypto_hash_set_alg(algid, &ctx->hash_ctx))) {
+		FREE(ctx);
+		HMAC_RET(NTRU_CRYPTO_HMAC_BAD_ALG);
+	}
+
+	/* set block length and digest length */
+
+	if ((result = ntru_crypto_hash_block_length(&ctx->hash_ctx,
+	                                            &ctx->blk_len)) ||
+	    (result = ntru_crypto_hash_digest_length(&ctx->hash_ctx,
+	                                             &ctx->md_len))) {
+		FREE(ctx);
+		return result;
+	}
+
+	/* allocate memory for K0 */
+	if ((ctx->k0 = (uint8_t *) MALLOC(ctx->blk_len)) == NULL) {
+		FREE(ctx);
+		HMAC_RET(NTRU_CRYPTO_HMAC_OUT_OF_MEMORY);
+	}
+
+	/* calculate K0 and store in HMAC context */
+
+	memset(ctx->k0, 0, ctx->blk_len);
+
+	/* check if key is too large */
+
+	if (key_len > ctx->blk_len) {
+		if ((result = ntru_crypto_hash_digest(algid, key, key_len, ctx->k0))) {
+			memset(ctx->k0, 0, ctx->blk_len);
+			FREE(ctx->k0);
+			FREE(ctx);
+			return result;
+		}
+	} else {
+		memcpy(ctx->k0, key, key_len);
+	}
+
+	/* return pointer to HMAC context */
+
+	*c = ctx;
+	HMAC_RET(NTRU_CRYPTO_HMAC_OK);
+}
+
+/* ntru_crypto_hmac_destroy_ctx
+ *
+ * Destroys an HMAC context.
+ *
+ * Returns NTRU_CRYPTO_HMAC_OK if successful.
+ * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ */
+
+uint32_t
+ntru_crypto_hmac_destroy_ctx(
+    NTRU_CRYPTO_HMAC_CTX *c) /* in/out - pointer to HMAC context */
+{
+	if (!c || !c->k0) {
+		HMAC_RET(NTRU_CRYPTO_HMAC_BAD_PARAMETER);
+	}
+
+	/* clear key and release memory */
+
+	memset(c->k0, 0, c->blk_len);
+	FREE(c->k0);
+	FREE(c);
+
+	HMAC_RET(NTRU_CRYPTO_HMAC_OK);
+}
+
+/* ntru_crypto_hmac_get_md_len
+ *
+ * This routine gets the digest length of the HMAC.
+ *
+ * Returns NTRU_CRYPTO_HMAC_OK on success.
+ * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ */
+
+uint32_t
+ntru_crypto_hmac_get_md_len(
+    NTRU_CRYPTO_HMAC_CTX const *c, /*  in - pointer to HMAC context */
+    uint16_t *md_len)              /* out - address for digest length */
+{
+	/* check parameters */
+
+	if (!c || !md_len) {
+		HMAC_RET(NTRU_CRYPTO_HMAC_BAD_PARAMETER);
+	}
+
+	/* get digest length */
+
+	*md_len = c->md_len;
+	HMAC_RET(NTRU_CRYPTO_HMAC_OK);
+}
+
+/* ntru_crypto_hmac_set_key
+ *
+ * This routine sets a digest-length key into the HMAC context.
+ *
+ * Returns NTRU_CRYPTO_HMAC_OK on success.
+ * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ */
+
+uint32_t
+ntru_crypto_hmac_set_key(
+    NTRU_CRYPTO_HMAC_CTX *c, /*  in - pointer to HMAC context */
+    uint8_t const *key)      /*  in - pointer to new HMAC key */
+{
+	/* check parameters */
+
+	if (!c || !key) {
+		HMAC_RET(NTRU_CRYPTO_HMAC_BAD_PARAMETER);
+	}
+
+	/* copy key */
+
+	memcpy(c->k0, key, c->md_len);
+	HMAC_RET(NTRU_CRYPTO_HMAC_OK);
+}
+
+/* ntru_crypto_hmac_init
+ *
+ * This routine performs standard initialization of the HMAC state.
+ *
+ * Returns NTRU_CRYPTO_HMAC_OK on success.
+ * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context.
+ * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ */
+
+uint32_t
+ntru_crypto_hmac_init(
+    NTRU_CRYPTO_HMAC_CTX *c) /* in/out - pointer to HMAC context */
+{
+	uint32_t result;
+	int i;
+
+	/* check parameters */
+
+	if (!c) {
+		HMAC_RET(NTRU_CRYPTO_HMAC_BAD_PARAMETER);
+	}
+
+	/* init hash context and compute H(K0 ^ ipad) */
+
+	for (i = 0; i < c->blk_len; i++) {
+		c->k0[i] ^= 0x36; /* K0 ^ ipad */
+	}
+
+	if ((result = ntru_crypto_hash_init(&c->hash_ctx)) ||
+	    (result = ntru_crypto_hash_update(&c->hash_ctx, c->k0, c->blk_len))) {
+		return result;
+	}
+
+	HMAC_RET(NTRU_CRYPTO_HMAC_OK);
+}
+
+/* ntru_crypto_hmac_update
+ *
+ * This routine processes input data and updates the HMAC hash calculation.
+ *
+ * Returns NTRU_CRYPTO_HMAC_OK on success.
+ * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context.
+ * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ * Returns NTRU_CRYPTO_HASH_OVERFLOW if more than bytes are hashed than the
+ *         underlying hash algorithm can handle.
+ */
+
+uint32_t
+ntru_crypto_hmac_update(
+    NTRU_CRYPTO_HMAC_CTX *c, /* in/out - pointer to HMAC context */
+    const uint8_t *data,     /*     in - pointer to input data */
+    uint32_t data_len)       /*     in - no. of bytes of input data */
+{
+	uint32_t result;
+
+	/* check parameters */
+
+	if (!c || (data_len && !data)) {
+		HMAC_RET(NTRU_CRYPTO_HMAC_BAD_PARAMETER);
+	}
+
+	if ((result = ntru_crypto_hash_update(&c->hash_ctx, data, data_len))) {
+		return result;
+	}
+
+	HMAC_RET(NTRU_CRYPTO_HMAC_OK);
+}
+
+/* ntru_crypto_hmac_final
+ *
+ * This routine completes the HMAC hash calculation and returns the
+ * message digest.
+ *
+ * Returns NTRU_CRYPTO_HMAC_OK on success.
+ * Returns NTRU_CRYPTO_HASH_FAIL with corrupted context.
+ * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ */
+
+uint32_t
+ntru_crypto_hmac_final(
+    NTRU_CRYPTO_HMAC_CTX *c, /* in/out - pointer to HMAC context */
+    uint8_t *md)             /*   out - address for message digest */
+{
+	uint32_t result = NTRU_CRYPTO_HMAC_OK;
+	int i;
+
+	/* check parameters */
+
+	if (!c || !md) {
+		HMAC_RET(NTRU_CRYPTO_HMAC_BAD_PARAMETER);
+	}
+
+	/* form K0 ^ opad
+     * complete md = H((K0 ^ ipad) || data)
+     * compute  md = H((K0 ^ opad) || md)
+     * re-form K0
+     */
+
+	for (i = 0; i < c->blk_len; i++) {
+		c->k0[i] ^= (0x36 ^ 0x5c);
+	}
+
+	if ((result = ntru_crypto_hash_final(&c->hash_ctx, md)) ||
+	    (result = ntru_crypto_hash_init(&c->hash_ctx)) ||
+	    (result = ntru_crypto_hash_update(&c->hash_ctx, c->k0, c->blk_len)) ||
+	    (result = ntru_crypto_hash_update(&c->hash_ctx, md, c->md_len)) ||
+	    (result = ntru_crypto_hash_final(&c->hash_ctx, md))) {
+	}
+
+	for (i = 0; i < c->blk_len; i++) {
+		c->k0[i] ^= 0x5c;
+	}
+
+	return result;
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_hmac.h b/crypt/liboqs/kex_ntru/ntru_crypto_hmac.h
new file mode 100644
index 0000000000000000000000000000000000000000..8878a7ef80098cb52a13ec6423fd096bd26e5d10
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_hmac.h
@@ -0,0 +1,169 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_hmac.h
+ *
+ * Contents: Definitions and declarations for the HMAC implementation.
+ *
+ *****************************************************************************/
+
+#ifndef NTRU_CRYPTO_HMAC_H
+#define NTRU_CRYPTO_HMAC_H
+
+#include "ntru_crypto_platform.h"
+#include "ntru_crypto_hash.h"
+
+/***************
+ * error codes *
+ ***************/
+
+#define NTRU_CRYPTO_HMAC_OK ((uint32_t) NTRU_CRYPTO_HASH_OK)
+#define NTRU_CRYPTO_HMAC_BAD_PARAMETER ((uint32_t) NTRU_CRYPTO_HASH_BAD_PARAMETER)
+#define NTRU_CRYPTO_HMAC_BAD_ALG ((uint32_t) NTRU_CRYPTO_HASH_BAD_ALG)
+#define NTRU_CRYPTO_HMAC_OUT_OF_MEMORY ((uint32_t) NTRU_CRYPTO_HASH_OUT_OF_MEMORY)
+
+#define HMAC_RESULT(e) ((uint32_t)((e) ? HMAC_ERROR_BASE + (e) : (e)))
+#define HMAC_RET(e) return HMAC_RESULT(e)
+
+/*************************
+ * structure definitions *
+ *************************/
+
+/* HMAC context structure */
+
+struct _NTRU_CRYPTO_HMAC_CTX; /* opaque forward reference */
+typedef struct _NTRU_CRYPTO_HMAC_CTX NTRU_CRYPTO_HMAC_CTX;
+
+/*************************
+ * function declarations *
+ *************************/
+
+/* ntru_crypto_hmac_create_ctx
+ *
+ * This routine creates an HMAC context, setting the hash algorithm and
+ * the key to be used.
+ *
+ * Returns NTRU_CRYPTO_HASH_OK if successful.
+ * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ * Returns NTRU_CRYPTO_HASH_OUT_OF_MEMORY if memory cannot be allocated.
+ */
+
+extern uint32_t
+ntru_crypto_hmac_create_ctx(
+    NTRU_CRYPTO_HASH_ALGID algid, /*  in - the hash algorithm to be used */
+    uint8_t const *key,           /*  in - pointer to the HMAC key */
+    uint32_t key_len,             /*  in - number of bytes in HMAC key */
+    NTRU_CRYPTO_HMAC_CTX **c);    /* out - address for pointer to HMAC
+                                               context */
+
+/* ntru_crypto_hmac_destroy_ctx
+ *
+ * Destroys an HMAC context.
+ *
+ * Returns NTRU_CRYPTO_HASH_OK if successful.
+ * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ */
+
+extern uint32_t
+ntru_crypto_hmac_destroy_ctx(
+    NTRU_CRYPTO_HMAC_CTX *c); /* in/out - pointer to HMAC context */
+
+/* ntru_crypto_hmac_get_md_len
+ *
+ * This routine gets the digest length of the HMAC.
+ *
+ * Returns NTRU_CRYPTO_HMAC_OK on success.
+ * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ */
+
+extern uint32_t
+ntru_crypto_hmac_get_md_len(
+    NTRU_CRYPTO_HMAC_CTX const *c, /*  in - pointer to HMAC context */
+    uint16_t *md_len);             /* out - address for digest length */
+
+/* ntru_crypto_hmac_set_key
+ *
+ * This routine sets a digest-length key into the HMAC context.
+ *
+ * Returns NTRU_CRYPTO_HMAC_OK on success.
+ * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ */
+
+extern uint32_t
+ntru_crypto_hmac_set_key(
+    NTRU_CRYPTO_HMAC_CTX *c, /*  in - pointer to HMAC context */
+    uint8_t const *key);     /*  in - pointer to new HMAC key */
+
+/* ntru_crypto_hmac_init
+ *
+ * This routine performs standard initialization of the HMAC state.
+ *
+ * Returns NTRU_CRYPTO_HMAC_OK on success.
+ * Returns NTRU_CRYPTO_HMAC_FAIL with corrupted context.
+ * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ */
+
+extern uint32_t
+ntru_crypto_hmac_init(
+    NTRU_CRYPTO_HMAC_CTX *c); /* in/out - pointer to HMAC context */
+
+/* ntru_crypto_hmac_update
+ *
+ * This routine processes input data and updates the HMAC hash calculation.
+ *
+ * Returns NTRU_CRYPTO_HMAC_OK on success.
+ * Returns NTRU_CRYPTO_HMAC_FAIL with corrupted context.
+ * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ * Returns NTRU_CRYPTO_HMAC_OVERFLOW if more than bytes are hashed than the underlying
+ *         hash algorithm can handle.
+ */
+
+extern uint32_t
+ntru_crypto_hmac_update(
+    NTRU_CRYPTO_HMAC_CTX *c, /* in/out - pointer to HMAC context */
+    uint8_t const *data,     /*     in - pointer to input data */
+    uint32_t data_len);      /*     in - no. of bytes of input data */
+
+/* ntru_crypto_hmac_final
+ *
+ * This routine completes the HMAC hash calculation and returns the
+ * message digest.
+ *
+ * Returns NTRU_CRYPTO_HMAC_OK on success.
+ * Returns NTRU_CRYPTO_HMAC_FAIL with corrupted context.
+ * Returns NTRU_CRYPTO_HMAC_BAD_PARAMETER if inappropriate NULL pointers are
+ * passed.
+ */
+
+extern uint32_t
+ntru_crypto_hmac_final(
+    NTRU_CRYPTO_HMAC_CTX *c, /* in/out - pointer to HMAC context */
+    uint8_t *md);            /*    out - address for message digest */
+
+#endif /* NTRU_CRYPTO_HMAC_H */
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_msbyte_uint32.c b/crypt/liboqs/kex_ntru/ntru_crypto_msbyte_uint32.c
new file mode 100644
index 0000000000000000000000000000000000000000..12fc971ede649d20649b87284ad2580bbbbac3dd
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_msbyte_uint32.c
@@ -0,0 +1,86 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_msbyte_uint32.c
+ *
+ * Contents: Routines to convert between an array of bytes in network byte
+ *           order (most-significant byte first) and an array of uint32 words.
+ *
+ *****************************************************************************/
+
+#include "ntru_crypto.h"
+#include "ntru_crypto_msbyte_uint32.h"
+
+/* ntru_crypto_msbyte_2_uint32()
+ *
+ * This routine converts an array of bytes in network byte order to an array
+ * of uint32_t, placing the first byte in the most significant byte of the
+ * first uint32_t word.
+ *
+ * The number of bytes in the input stream MUST be at least 4 times the
+ * number of words expected in the output array.
+ */
+
+void ntru_crypto_msbyte_2_uint32(
+    uint32_t *words,      /* out - pointer to the output uint32_t array */
+    uint8_t const *bytes, /*  in - pointer to the input byte array */
+    uint32_t n)           /*  in - number of words in the output array */
+{
+	uint32_t i;
+
+	for (i = 0; i < n; i++) {
+		words[i] = ((uint32_t)(*bytes++)) << 24;
+		words[i] |= ((uint32_t)(*bytes++)) << 16;
+		words[i] |= ((uint32_t)(*bytes++)) << 8;
+		words[i] |= (uint32_t)(*bytes++);
+	}
+
+	return;
+}
+
+/* ntru_crypto_uint32_2_msbyte()
+ *
+ * This routine converts an array of uint32_t to an array of bytes in
+ * network byte order, placing the most significant byte of the first uint32_t
+ * word as the first byte of the output array.
+ *
+ * The number of bytes in the output stream will be 4 times the number of words
+ * specified in the input array.
+ */
+
+void ntru_crypto_uint32_2_msbyte(
+    uint8_t *bytes,        /* out - pointer to the output byte array */
+    uint32_t const *words, /*  in - pointer to the input uint32_t array */
+    uint32_t n)            /*  in - number of words in the input array */
+{
+	uint32_t i;
+
+	for (i = 0; i < n; i++) {
+		*bytes++ = (uint8_t)(words[i] >> 24);
+		*bytes++ = (uint8_t)(words[i] >> 16);
+		*bytes++ = (uint8_t)(words[i] >> 8);
+		*bytes++ = (uint8_t)(words[i]);
+	}
+
+	return;
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_msbyte_uint32.h b/crypt/liboqs/kex_ntru/ntru_crypto_msbyte_uint32.h
new file mode 100644
index 0000000000000000000000000000000000000000..d4f3599ce7b8050c481cbf57560cee4fe1dc611c
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_msbyte_uint32.h
@@ -0,0 +1,68 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_msbyte_uint32.h
+ *
+ * Contents: Definitions and declarations for converting between a most-
+ *           significant-first byte stream and a uint32_t array.
+ *
+ *****************************************************************************/
+
+#ifndef NTRU_CRYPTO_MSBYTE_UINT32_H
+#define NTRU_CRYPTO_MSBYTE_UINT32_H
+
+#include "ntru_crypto_platform.h"
+
+/* ntru_crypto_msbyte_2_uint32()
+ *
+ * This routine converts an array of bytes in network byte order to an array
+ * of uint32_t, placing the first byte in the most significant byte of the
+ * first uint32_t word.
+ *
+ * The number of bytes in the input stream MUST be at least 4 times the
+ * number of words expected in the output array.
+ */
+
+extern void
+ntru_crypto_msbyte_2_uint32(
+    uint32_t *words,      /* out - pointer to the output uint32_t array */
+    uint8_t const *bytes, /*  in - pointer to the input byte array */
+    uint32_t n);          /*  in - number of words in the output array */
+
+/* ntru_crypto_uint32_2_msbyte()
+ *
+ * This routine converts an array of uint32_t to an array of bytes in
+ * network byte order, placing the most significant byte of the first uint32_t
+ * word as the first byte of the output array.
+ *
+ * The number of bytes in the output stream will be 4 times the number of words
+ * specified in the input array.
+ */
+
+extern void
+ntru_crypto_uint32_2_msbyte(
+    uint8_t *bytes,        /* out - pointer to the output byte array */
+    uint32_t const *words, /*  in - pointer to the input uint32_t array */
+    uint32_t n);           /*  in - number of words in the input array */
+
+#endif /* NTRU_CRYPTO_MSBYTE_UINT32_H */
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_convert.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_convert.c
new file mode 100644
index 0000000000000000000000000000000000000000..d6514056970e43c59ab1feef9dae65c2ddad3c62
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_convert.c
@@ -0,0 +1,556 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_ntru_convert.c
+ *
+ * Contents: Conversion routines for NTRUEncrypt, including packing, unpacking,
+ *           and others.
+ *
+ *****************************************************************************/
+
+#include "ntru_crypto.h"
+#include "ntru_crypto_ntru_convert.h"
+
+/* 3-bit to 2-trit conversion tables: 2 represents -1 */
+
+static uint8_t const bits_2_trit1[] = {0, 0, 0, 1, 1, 1, 2, 2};
+static uint8_t const bits_2_trit2[] = {0, 1, 2, 0, 1, 2, 0, 1};
+
+/* ntru_bits_2_trits
+ *
+ * Each 3 bits in an array of octets is converted to 2 trits in an array
+ * of trits.
+ *
+ * The octet array may overlap the end of the trit array.
+ */
+
+void ntru_bits_2_trits(
+    uint8_t const *octets, /*  in - pointer to array of octets */
+    uint16_t num_trits,    /*  in - number of trits to produce */
+    uint8_t *trits)        /* out - address for array of trits */
+{
+	uint32_t bits24;
+	uint32_t bits3;
+	uint32_t shift;
+
+	while (num_trits >= 16) {
+		/* get next three octets */
+
+		bits24 = ((uint32_t)(*octets++)) << 16;
+		bits24 |= ((uint32_t)(*octets++)) << 8;
+		bits24 |= (uint32_t)(*octets++);
+
+		/* for each 3 bits in the three octets, output 2 trits */
+
+		bits3 = (bits24 >> 21) & 0x7;
+		*trits++ = bits_2_trit1[bits3];
+		*trits++ = bits_2_trit2[bits3];
+
+		bits3 = (bits24 >> 18) & 0x7;
+		*trits++ = bits_2_trit1[bits3];
+		*trits++ = bits_2_trit2[bits3];
+
+		bits3 = (bits24 >> 15) & 0x7;
+		*trits++ = bits_2_trit1[bits3];
+		*trits++ = bits_2_trit2[bits3];
+
+		bits3 = (bits24 >> 12) & 0x7;
+		*trits++ = bits_2_trit1[bits3];
+		*trits++ = bits_2_trit2[bits3];
+
+		bits3 = (bits24 >> 9) & 0x7;
+		*trits++ = bits_2_trit1[bits3];
+		*trits++ = bits_2_trit2[bits3];
+
+		bits3 = (bits24 >> 6) & 0x7;
+		*trits++ = bits_2_trit1[bits3];
+		*trits++ = bits_2_trit2[bits3];
+
+		bits3 = (bits24 >> 3) & 0x7;
+		*trits++ = bits_2_trit1[bits3];
+		*trits++ = bits_2_trit2[bits3];
+
+		bits3 = bits24 & 0x7;
+		*trits++ = bits_2_trit1[bits3];
+		*trits++ = bits_2_trit2[bits3];
+
+		num_trits -= 16;
+	}
+
+	if (num_trits == 0) {
+		return;
+	}
+
+	/* get three octets */
+
+	bits24 = ((uint32_t)(*octets++)) << 16;
+	bits24 |= ((uint32_t)(*octets++)) << 8;
+	bits24 |= (uint32_t)(*octets++);
+
+	shift = 21;
+	while (num_trits) {
+		/* for each 3 bits in the three octets, output up to 2 trits
+         * until all trits needed are produced
+         */
+
+		bits3 = (bits24 >> shift) & 0x7;
+		shift -= 3;
+		*trits++ = bits_2_trit1[bits3];
+
+		if (--num_trits) {
+			*trits++ = bits_2_trit2[bits3];
+			--num_trits;
+		}
+	}
+
+	return;
+}
+
+/* ntru_trits_2_bits
+ *
+ * Each 2 trits in an array of trits is converted to 3 bits, and the bits
+ * are packed in an array of octets.  A multiple of 3 octets is output.
+ * Any bits in the final octets not derived from trits are zero.
+ *
+ * Returns TRUE if all trits were valid.
+ * Returns FALSE if invalid trits were found.
+ */
+
+bool ntru_trits_2_bits(
+    uint8_t const *trits, /*  in - pointer to array of trits */
+    uint32_t num_trits,   /*  in - number of trits to convert */
+    uint8_t *octets)      /* out - address for array of octets */
+{
+	bool all_trits_valid = TRUE;
+	uint32_t bits24;
+	uint32_t bits3;
+	uint32_t shift;
+
+	while (num_trits >= 16) {
+
+		/* convert each 2 trits to 3 bits and pack */
+
+		bits3 = *trits++ * 3;
+		bits3 += *trits++;
+
+		if (bits3 > 7) {
+			bits3 = 7;
+			all_trits_valid = FALSE;
+		}
+
+		bits24 = (bits3 << 21);
+		bits3 = *trits++ * 3;
+		bits3 += *trits++;
+
+		if (bits3 > 7) {
+			bits3 = 7;
+			all_trits_valid = FALSE;
+		}
+
+		bits24 |= (bits3 << 18);
+		bits3 = *trits++ * 3;
+		bits3 += *trits++;
+
+		if (bits3 > 7) {
+			bits3 = 7;
+			all_trits_valid = FALSE;
+		}
+
+		bits24 |= (bits3 << 15);
+		bits3 = *trits++ * 3;
+		bits3 += *trits++;
+
+		if (bits3 > 7) {
+			bits3 = 7;
+			all_trits_valid = FALSE;
+		}
+
+		bits24 |= (bits3 << 12);
+		bits3 = *trits++ * 3;
+		bits3 += *trits++;
+
+		if (bits3 > 7) {
+			bits3 = 7;
+			all_trits_valid = FALSE;
+		}
+
+		bits24 |= (bits3 << 9);
+		bits3 = *trits++ * 3;
+		bits3 += *trits++;
+
+		if (bits3 > 7) {
+			bits3 = 7;
+			all_trits_valid = FALSE;
+		}
+
+		bits24 |= (bits3 << 6);
+		bits3 = *trits++ * 3;
+		bits3 += *trits++;
+
+		if (bits3 > 7) {
+			bits3 = 7;
+			all_trits_valid = FALSE;
+		}
+
+		bits24 |= (bits3 << 3);
+		bits3 = *trits++ * 3;
+		bits3 += *trits++;
+
+		if (bits3 > 7) {
+			bits3 = 7;
+			all_trits_valid = FALSE;
+		}
+
+		bits24 |= bits3;
+		num_trits -= 16;
+
+		/* output three octets */
+
+		*octets++ = (uint8_t)((bits24 >> 16) & 0xff);
+		*octets++ = (uint8_t)((bits24 >> 8) & 0xff);
+		*octets++ = (uint8_t)(bits24 & 0xff);
+	}
+
+	bits24 = 0;
+	shift = 21;
+
+	while (num_trits) {
+
+		/* convert each 2 trits to 3 bits and pack */
+
+		bits3 = *trits++ * 3;
+
+		if (--num_trits) {
+			bits3 += *trits++;
+			--num_trits;
+		}
+
+		if (bits3 > 7) {
+			bits3 = 7;
+			all_trits_valid = FALSE;
+		}
+
+		bits24 |= (bits3 << shift);
+		shift -= 3;
+	}
+
+	/* output three octets */
+
+	*octets++ = (uint8_t)((bits24 >> 16) & 0xff);
+	*octets++ = (uint8_t)((bits24 >> 8) & 0xff);
+	*octets++ = (uint8_t)(bits24 & 0xff);
+
+	return all_trits_valid;
+}
+
+/* ntru_coeffs_mod4_2_octets
+ *
+ * Takes an array of ring element coefficients mod 4 and packs the
+ * results into an octet string.
+ */
+
+void ntru_coeffs_mod4_2_octets(
+    uint16_t num_coeffs,    /*  in - number of coefficients */
+    uint16_t const *coeffs, /*  in - pointer to coefficients */
+    uint8_t *octets)        /* out - address for octets */
+{
+	uint8_t bits2;
+	int shift;
+	uint16_t i;
+
+	*octets = 0;
+	shift = 6;
+	for (i = 0; i < num_coeffs; i++) {
+		bits2 = (uint8_t)(coeffs[i] & 0x3);
+		*octets |= bits2 << shift;
+		shift -= 2;
+
+		if (shift < 0) {
+			++octets;
+			*octets = 0;
+			shift = 6;
+		}
+	}
+
+	return;
+}
+
+/* ntru_trits_2_octet
+ *
+ * Packs 5 trits in an octet, where a trit is 0, 1, or 2 (-1).
+ */
+
+void ntru_trits_2_octet(
+    uint8_t const *trits, /*  in - pointer to trits */
+    uint8_t *octet)       /* out - address for octet */
+{
+	int i;
+
+	*octet = 0;
+	for (i = 4; i >= 0; i--) {
+		*octet = (*octet * 3) + trits[i];
+	}
+
+	return;
+}
+
+/* ntru_octet_2_trits
+ *
+ * Unpacks an octet to 5 trits, where a trit is 0, 1, or 2 (-1).
+ */
+
+void ntru_octet_2_trits(
+    uint8_t octet,  /*  in - octet to be unpacked */
+    uint8_t *trits) /* out - address for trits */
+{
+	int i;
+
+	for (i = 0; i < 5; i++) {
+		trits[i] = octet % 3;
+		octet = (octet - trits[i]) / 3;
+	}
+
+	return;
+}
+
+/* ntru_indices_2_trits
+ *
+ * Converts a list of the nonzero indices of a polynomial into an array of
+ * trits.
+ */
+
+void ntru_indices_2_trits(
+    uint16_t in_len,    /*  in - no. of indices */
+    uint16_t const *in, /*  in - pointer to list of indices */
+    bool plus1,         /*  in - if list is +1 cofficients */
+    uint8_t *out)       /* out - address of output polynomial */
+{
+	uint8_t trit = plus1 ? 1 : 2;
+	uint16_t i;
+
+	for (i = 0; i < in_len; i++) {
+		out[in[i]] = trit;
+	}
+
+	return;
+}
+
+/* ntru_packed_trits_2_indices
+ *
+ * Unpacks an array of N trits and creates a list of array indices 
+ * corresponding to trits = +1, and list of array indices corresponding to
+ * trits = -1.
+ */
+
+void ntru_packed_trits_2_indices(
+    uint8_t const *in,        /*  in - pointer to packed-trit octets */
+    uint16_t num_trits,       /*  in - no. of packed trits */
+    uint16_t *indices_plus1,  /* out - address for indices of +1 trits */
+    uint16_t *indices_minus1) /* out - address for indices of -1 trits */
+{
+	uint8_t trits[5];
+	uint16_t i = 0;
+	int j;
+
+	while (num_trits >= 5) {
+		ntru_octet_2_trits(*in++, trits);
+		num_trits -= 5;
+
+		for (j = 0; j < 5; j++, i++) {
+			if (trits[j] == 1) {
+				*indices_plus1 = i;
+				++indices_plus1;
+			} else if (trits[j] == 2) {
+				*indices_minus1 = i;
+				++indices_minus1;
+			} else {
+				;
+			}
+		}
+	}
+
+	if (num_trits) {
+		ntru_octet_2_trits(*in, trits);
+
+		for (j = 0; num_trits && (j < 5); j++, i++) {
+			if (trits[j] == 1) {
+				*indices_plus1 = i;
+				++indices_plus1;
+			} else if (trits[j] == 2) {
+				*indices_minus1 = i;
+				++indices_minus1;
+			} else {
+				;
+			}
+
+			--num_trits;
+		}
+	}
+
+	return;
+}
+
+/* ntru_indices_2_packed_trits
+ *
+ * Takes a list of array indices corresponding to elements whose values
+ * are +1 or -1, and packs the N-element array of trits described by these
+ * lists into octets, 5 trits per octet.
+ */
+
+void ntru_indices_2_packed_trits(
+    uint16_t const *indices, /*  in - pointer to indices */
+    uint16_t num_plus1,      /*  in - no. of indices for +1 trits */
+    uint16_t num_minus1,     /*  in - no. of indices for -1 trits */
+    uint16_t num_trits,      /*  in - N, no. of trits in array */
+    uint8_t *buf,            /*  in - temp buf, N octets */
+    uint8_t *out)            /* out - address for packed octets */
+{
+
+	/* convert indices to an array of trits */
+
+	memset(buf, 0, num_trits);
+	ntru_indices_2_trits(num_plus1, indices, TRUE, buf);
+	ntru_indices_2_trits(num_minus1, indices + num_plus1, FALSE, buf);
+
+	/* pack the array of trits */
+
+	while (num_trits >= 5) {
+		ntru_trits_2_octet(buf, out);
+		num_trits -= 5;
+		buf += 5;
+		++out;
+	}
+
+	if (num_trits) {
+		uint8_t trits[5];
+
+		memcpy(trits, buf, num_trits);
+		memset(trits + num_trits, 0, sizeof(trits) - num_trits);
+		ntru_trits_2_octet(trits, out);
+	}
+
+	return;
+}
+
+/* ntru_elements_2_octets
+ *
+ * Packs an array of n-bit elements into an array of
+ * ((in_len * n_bits) + 7) / 8 octets.
+ * NOTE: Assumes 8 < n_bits < 16.
+ */
+
+void ntru_elements_2_octets(
+    uint16_t in_len,    /*  in - no. of elements to be packed */
+    uint16_t const *in, /*  in - ptr to elements to be packed */
+    uint8_t n_bits,     /*  in - no. of bits in input element */
+    uint8_t *out)       /* out - addr for output octets */
+{
+	uint16_t temp;
+	uint16_t shift;
+	uint16_t i;
+
+	/* pack */
+
+	temp = 0;
+	shift = n_bits - 8;
+	i = 0;
+	while (i < in_len) {
+		/* add bits to temp to fill an octet and output the octet */
+		temp |= in[i] >> shift;
+		*out++ = (uint8_t)(temp & 0xff);
+		if (shift > 8) {
+			/* next full octet is in current input word */
+
+			shift = shift - 8;
+			temp = 0;
+		} else {
+			shift = 8 - shift;
+			/* put remaining bits of input word in temp as partial octet,
+             * and increment index to next input word
+             */
+			temp = in[i] << shift;
+			shift = n_bits - shift;
+
+			++i;
+		}
+	}
+
+	/* output any bits remaining in last input word */
+
+	if (shift != n_bits - 8) {
+		*out++ = (uint8_t)(temp & 0xff);
+	}
+
+	return;
+}
+
+/* ntru_octets_2_elements
+ *
+ * Unpacks an octet string into an array of ((in_len * 8) / n_bits)
+ * n-bit elements.  Any extra bits are discarded.
+ * NOTE: Assumes 8 < n_bits < 16.
+ */
+
+void ntru_octets_2_elements(
+    uint16_t in_len,   /*  in - no. of octets to be unpacked */
+    uint8_t const *in, /*  in - ptr to octets to be unpacked */
+    uint8_t n_bits,    /*  in - no. of bits in output element */
+    uint16_t *out)     /* out - addr for output elements */
+{
+	uint16_t temp;
+	uint16_t mask;
+	uint16_t shift;
+	uint16_t i;
+
+	/* unpack */
+
+	temp = 0;
+	mask = (1 << n_bits) - 1;
+	shift = n_bits;
+	i = 0;
+
+	while (i < in_len) {
+		if (shift > 8) {
+			/* the current octet will not fill the current element */
+
+			shift = shift - 8;
+			temp |= ((uint16_t) in[i]) << shift;
+		} else {
+			/* add bits from the current octet to fill the current element and
+             * output the element
+             */
+
+			shift = 8 - shift;
+
+			temp |= ((uint16_t) in[i]) >> shift;
+			*out++ = temp & mask;
+
+			/* add the remaining bits of the current octet to start an element */
+			shift = n_bits - shift;
+			temp = ((uint16_t) in[i]) << shift;
+		}
+		++i;
+	}
+
+	return;
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_convert.h b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_convert.h
new file mode 100644
index 0000000000000000000000000000000000000000..73ddd7e46fb9c19bbd0ad05bd130bed305a6441e
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_convert.h
@@ -0,0 +1,167 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_ntru_convert.h
+ *
+ * Contents: Definitions and declarations for conversion routines
+ *           for NTRUEncrypt, including packing, unpacking and others.
+ *
+ *****************************************************************************/
+
+#ifndef NTRU_CRYPTO_NTRU_CONVERT_H
+#define NTRU_CRYPTO_NTRU_CONVERT_H
+
+#include "ntru_crypto.h"
+
+/* function declarations */
+
+/* ntru_bits_2_trits
+ *
+ * Each 3 bits in an array of octets is converted to 2 trits in an array
+ * of trits.
+ */
+
+extern void
+ntru_bits_2_trits(
+    uint8_t const *octets, /*  in - pointer to array of octets */
+    uint16_t num_trits,    /*  in - number of trits to produce */
+    uint8_t *trits);       /* out - address for array of trits */
+
+/* ntru_trits_2_bits
+ *
+ * Each 2 trits in an array of trits is converted to 3 bits, and the bits
+ * are packed in an array of octets.  A multiple of 3 octets is output.
+ * Any bits in the final octets not derived from trits are zero.
+ *
+ * Returns TRUE if all trits were valid.
+ * Returns FALSE if invalid trits were found.
+ */
+
+extern bool
+ntru_trits_2_bits(
+    uint8_t const *trits, /*  in - pointer to array of trits */
+    uint32_t num_trits,   /*  in - number of trits to convert */
+    uint8_t *octets);     /* out - address for array of octets */
+
+/* ntru_coeffs_mod4_2_octets
+ *
+ * Takes an array of coefficients mod 4 and packs the results into an
+ * octet string.
+ */
+
+extern void
+ntru_coeffs_mod4_2_octets(
+    uint16_t num_coeffs,    /*  in - number of coefficients */
+    uint16_t const *coeffs, /*  in - pointer to coefficients */
+    uint8_t *octets);       /* out - address for octets */
+
+/* ntru_trits_2_octet
+ *
+ * Packs 5 trits in an octet, where a trit is 0, 1, or 2 (-1).
+ */
+
+extern void
+ntru_trits_2_octet(
+    uint8_t const *trits, /*  in - pointer to trits */
+    uint8_t *octet);      /* out - address for octet */
+
+/* ntru_octet_2_trits
+ *
+ * Unpacks an octet to 5 trits, where a trit is 0, 1, or 2 (-1).
+ */
+
+extern void
+ntru_octet_2_trits(
+    uint8_t octet,   /*  in - octet to be unpacked */
+    uint8_t *trits); /* out - address for trits */
+
+/* ntru_indices_2_trits
+ *
+ * Converts a list of the nonzero indices of a polynomial into an array of
+ * trits.
+ */
+
+extern void
+ntru_indices_2_trits(
+    uint16_t in_len,    /*  in - no. of indices */
+    uint16_t const *in, /*  in - pointer to list of indices */
+    bool plus1,         /*  in - if list is +1 coefficients */
+    uint8_t *out);      /* out - address of output polynomial */
+
+/* ntru_packed_trits_2_indices
+ *
+ * Unpacks an array of N trits and creates a list of array indices 
+ * corresponding to trits = +1, and list of array indices corresponding to
+ * trits = -1.
+ */
+
+extern void
+ntru_packed_trits_2_indices(
+    uint8_t const *in,         /*  in - pointer to packed-trit octets */
+    uint16_t num_trits,        /*  in - no. of packed trits */
+    uint16_t *indices_plus1,   /* out - address for indices of +1 trits */
+    uint16_t *indices_minus1); /* out - address for indices of -1 trits */
+
+/* ntru_indices_2_packed_trits
+ *
+ * Takes a list of array indices corresponding to elements whose values
+ * are +1 or -1, and packs the N-element array of trits described by these
+ * lists into octets, 5 trits per octet.
+ */
+
+extern void
+ntru_indices_2_packed_trits(
+    uint16_t const *indices, /*  in - pointer to indices */
+    uint16_t num_plus1,      /*  in - no. of indices for +1 trits */
+    uint16_t num_minus1,     /*  in - no. of indices for -1 trits */
+    uint16_t num_trits,      /*  in - N, no. of trits in array */
+    uint8_t *buf,            /*  in - temp buf, N octets */
+    uint8_t *out);           /* out - address for packed octets */
+
+/* ntru_elements_2_octets
+ *
+ * Packs an array of n-bit elements into an array of
+ * ((in_len * n_bits) + 7) / 8 octets, 8 < n_bits < 16.
+ */
+
+extern void
+ntru_elements_2_octets(
+    uint16_t in_len,    /*  in - no. of elements to be packed */
+    uint16_t const *in, /*  in - ptr to elements to be packed */
+    uint8_t n_bits,     /*  in - no. of bits in input element */
+    uint8_t *out);      /* out - addr for output octets */
+
+/* ntru_octets_2_elements
+ *
+ * Unpacks an octet string into an array of ((in_len * 8) / n_bits)
+ * n-bit elements, 8 < n < 16.  Any extra bits are discarded.
+ */
+
+extern void
+ntru_octets_2_elements(
+    uint16_t in_len,   /*  in - no. of octets to be unpacked */
+    uint8_t const *in, /*  in - ptr to octets to be unpacked */
+    uint8_t n_bits,    /*  in - no. of bits in output element */
+    uint16_t *out);    /* out - addr for output elements */
+
+#endif /* NTRU_CRYPTO_NTRU_CONVERT_H */
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt.c
new file mode 100644
index 0000000000000000000000000000000000000000..8d93985125461345dc1b1d06a1dbc7348110411a
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt.c
@@ -0,0 +1,1395 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_ntru_encrypt.c
+ *
+ * Contents: Routines implementing NTRUEncrypt encryption and decryption and
+ *           key generation.
+ *
+ *****************************************************************************/
+
+#include "ntru_crypto.h"
+#include "ntru_crypto_ntru_encrypt_param_sets.h"
+#include "ntru_crypto_ntru_encrypt_key.h"
+#include "ntru_crypto_ntru_convert.h"
+#include "ntru_crypto_ntru_poly.h"
+#include "ntru_crypto_ntru_mgf1.h"
+#include "ntru_crypto_drbg.h"
+
+/* ntru_crypto_ntru_encrypt
+ *
+ * Implements NTRU encryption (SVES) for the parameter set specified in
+ * the public key blob.
+ *
+ * Before invoking this function, a DRBG must be instantiated using
+ * ntru_crypto_drbg_instantiate() to obtain a DRBG handle, and in that
+ * instantiation the requested security strength must be at least as large
+ * as the security strength of the NTRU parameter set being used.
+ * Failure to instantiate the DRBG with the proper security strength will
+ * result in this function returning DRBG_ERROR_BASE + DRBG_BAD_LENGTH.
+ *
+ * The required minimum size of the output ciphertext buffer (ct) may be
+ * queried by invoking this function with ct = NULL.  In this case, no
+ * encryption is performed, NTRU_OK is returned, and the required minimum
+ * size for ct is returned in ct_len.
+ *
+ * When ct != NULL, at invocation *ct_len must be the size of the ct buffer.
+ * Upon return it is the actual size of the ciphertext.
+ *
+ * Returns NTRU_OK if successful.
+ * Returns DRBG_ERROR_BASE + DRBG_BAD_PARAMETER if the DRBG handle is invalid.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer
+ *  (other than ct) is NULL.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if a length argument
+ *  (pubkey_blob_len or pt_len) is zero, or if pt_len exceeds the
+ *  maximum plaintext length for the parameter set.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_PUBLIC_KEY if the public-key blob is
+ *  invalid (unknown format, corrupt, bad length).
+ * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if the ciphertext buffer
+ *  is too small.
+ * Returns NTRU_ERROR_BASE + NTRU_NO_MEMORY if memory needed cannot be
+ *  allocated from the heap.
+ */
+
+uint32_t
+ntru_crypto_ntru_encrypt(
+    DRBG_HANDLE drbg_handle,    /*     in - handle of DRBG */
+    uint16_t pubkey_blob_len,   /*     in - no. of octets in public key
+                                                 blob */
+    uint8_t const *pubkey_blob, /*     in - pointer to public key */
+    uint16_t pt_len,            /*     in - no. of octets in plaintext */
+    uint8_t const *pt,          /*     in - pointer to plaintext */
+    uint16_t *ct_len,           /* in/out - no. of octets in ct, addr for
+                                                 no. of octets in ciphertext */
+    uint8_t *ct)                /*    out - address for ciphertext */
+{
+	NTRU_ENCRYPT_PARAM_SET *params = NULL;
+	uint8_t const *pubkey_packed = NULL;
+	uint8_t pubkey_pack_type = 0x00;
+	uint16_t packed_ct_len;
+	size_t scratch_buf_len;
+	uint32_t dr;
+	uint32_t dr1 = 0;
+	uint32_t dr2 = 0;
+	uint32_t dr3 = 0;
+	uint16_t num_scratch_polys;
+	uint16_t pad_deg;
+	uint16_t ring_mult_tmp_len;
+	uint16_t *scratch_buf = NULL;
+	uint16_t *ringel_buf = NULL;
+	uint16_t *r_buf = NULL;
+	uint8_t *b_buf = NULL;
+	uint8_t *tmp_buf = NULL;
+	bool msg_rep_good = FALSE;
+	NTRU_CRYPTO_HASH_ALGID hash_algid;
+	uint8_t md_len;
+	uint16_t mod_q_mask;
+	uint32_t result = NTRU_OK;
+
+	/* check for bad parameters */
+
+	if (!pubkey_blob || !ct_len) {
+		NTRU_RET(NTRU_BAD_PARAMETER);
+	}
+
+	if (pubkey_blob_len == 0) {
+		NTRU_RET(NTRU_BAD_LENGTH);
+	}
+
+	/* get a pointer to the parameter-set parameters, the packing type for
+     * the public key, and a pointer to the packed public key
+     */
+
+	if (!ntru_crypto_ntru_encrypt_key_parse(TRUE /* pubkey */, pubkey_blob_len,
+	                                        pubkey_blob, &pubkey_pack_type,
+	                                        NULL, &params, &pubkey_packed,
+	                                        NULL)) {
+		NTRU_RET(NTRU_BAD_PUBLIC_KEY);
+	}
+
+	if (params->q_bits <= 8 || params->q_bits >= 16 || pubkey_pack_type != NTRU_ENCRYPT_KEY_PACKED_COEFFICIENTS) {
+		NTRU_RET(NTRU_UNSUPPORTED_PARAM_SET);
+	}
+
+	/* return the ciphertext size if requested */
+
+	packed_ct_len = (params->N * params->q_bits + 7) >> 3;
+
+	if (!ct) {
+		*ct_len = packed_ct_len;
+		NTRU_RET(NTRU_OK);
+	}
+
+	/* check the ciphertext buffer size */
+
+	if (*ct_len < packed_ct_len) {
+		NTRU_RET(NTRU_BUFFER_TOO_SMALL);
+	}
+
+	/* check that a plaintext was provided */
+
+	if (!pt) {
+		NTRU_RET(NTRU_BAD_PARAMETER);
+	}
+
+	/* check the plaintext length */
+
+	if (pt_len > params->m_len_max) {
+		NTRU_RET(NTRU_BAD_LENGTH);
+	}
+
+	/* allocate memory for all operations */
+
+	ntru_ring_mult_indices_memreq(params->N, &num_scratch_polys, &pad_deg);
+
+	if (params->is_product_form) {
+		dr1 = params->dF_r & 0xff;
+		dr2 = (params->dF_r >> 8) & 0xff;
+		dr3 = (params->dF_r >> 16) & 0xff;
+		dr = dr1 + dr2 + dr3;
+		num_scratch_polys += 1; /* mult_product_indices needs space for a
+                                   mult_indices and one intermediate result */
+	} else {
+		dr = params->dF_r;
+	}
+	ring_mult_tmp_len = num_scratch_polys * pad_deg;
+
+	scratch_buf_len = (ring_mult_tmp_len << 1) +
+	                  /* X-byte temp buf for ring mult and
+                                                other intermediate results */
+	                  (pad_deg << 1) + /* 2N-byte buffer for ring elements
+                                                and overflow from temp buffer */
+	                  (dr << 2) +      /* buffer for r indices */
+	                  params->b_len;
+	/* buffer for b */
+	scratch_buf = MALLOC(scratch_buf_len);
+	if (!scratch_buf) {
+		NTRU_RET(NTRU_OUT_OF_MEMORY);
+	}
+
+	ringel_buf = scratch_buf + ring_mult_tmp_len;
+	r_buf = ringel_buf + pad_deg;
+	b_buf = (uint8_t *) (r_buf + (dr << 1));
+	tmp_buf = (uint8_t *) scratch_buf;
+
+	/* set hash algorithm and seed length based on security strength */
+
+	if (params->hash_algid == NTRU_CRYPTO_HASH_ALGID_SHA1) {
+		hash_algid = NTRU_CRYPTO_HASH_ALGID_SHA1;
+		md_len = SHA_1_MD_LEN;
+	} else if (params->hash_algid == NTRU_CRYPTO_HASH_ALGID_SHA256) {
+		hash_algid = NTRU_CRYPTO_HASH_ALGID_SHA256;
+		md_len = SHA_256_MD_LEN;
+	} else {
+		FREE(scratch_buf);
+		NTRU_RET(NTRU_UNSUPPORTED_PARAM_SET);
+	}
+
+	/* set constants */
+
+	mod_q_mask = params->q - 1;
+
+	/* loop until a message representative with proper weight is achieved */
+
+	do {
+		uint8_t *ptr = tmp_buf;
+
+		/* get b */
+		result = ntru_crypto_drbg_generate(drbg_handle,
+		                                   params->sec_strength_len << 3,
+		                                   params->b_len, b_buf);
+
+		if (result == NTRU_OK) {
+			/* form sData (OID || m || b || hTrunc) */
+
+			memcpy(ptr, params->OID, 3);
+			ptr += 3;
+			memcpy(ptr, pt, pt_len);
+			ptr += pt_len;
+			memcpy(ptr, b_buf, params->b_len);
+			ptr += params->b_len;
+			memcpy(ptr, pubkey_packed, params->sec_strength_len);
+			ptr += params->sec_strength_len;
+
+			/* generate r */
+
+			result = ntru_gen_poly(hash_algid, md_len,
+			                       params->min_IGF_hash_calls,
+			                       (uint16_t)(ptr - tmp_buf),
+			                       tmp_buf, tmp_buf,
+			                       params->N, params->c_bits,
+			                       params->no_bias_limit,
+			                       params->is_product_form,
+			                       params->dF_r << 1, r_buf);
+		}
+
+		if (result == NTRU_OK) {
+			uint16_t pubkey_packed_len;
+
+			/* unpack the public key */
+			pubkey_packed_len = (params->N * params->q_bits + 7) >> 3;
+			ntru_octets_2_elements(pubkey_packed_len, pubkey_packed,
+			                       params->q_bits, ringel_buf);
+
+			/* form R = h * r */
+
+			if (params->is_product_form) {
+				ntru_ring_mult_product_indices(ringel_buf, (uint16_t) dr1,
+				                               (uint16_t) dr2, (uint16_t) dr3,
+				                               r_buf, params->N, params->q,
+				                               scratch_buf, ringel_buf);
+			} else {
+				ntru_ring_mult_indices(ringel_buf, (uint16_t) dr, (uint16_t) dr,
+				                       r_buf, params->N, params->q,
+				                       scratch_buf, ringel_buf);
+			}
+
+			/* form R mod 4 */
+
+			ntru_coeffs_mod4_2_octets(params->N, ringel_buf, tmp_buf);
+
+			/* form mask */
+
+			result = ntru_mgftp1(hash_algid, md_len,
+			                     params->min_MGF_hash_calls,
+			                     (params->N + 3) / 4, tmp_buf,
+			                     tmp_buf + params->N, params->N, tmp_buf);
+		}
+
+		if (result == NTRU_OK) {
+			uint8_t *Mtrin_buf = tmp_buf + params->N;
+			uint8_t *M_buf = Mtrin_buf + params->N -
+			                 (params->b_len + params->m_len_len +
+			                  params->m_len_max + 2);
+			uint16_t i;
+
+			/* form the padded message M */
+
+			ptr = M_buf;
+			memcpy(ptr, b_buf, params->b_len);
+			ptr += params->b_len;
+			if (params->m_len_len == 2)
+				*ptr++ = (uint8_t)((pt_len >> 8) & 0xff);
+			*ptr++ = (uint8_t)(pt_len & 0xff);
+			memcpy(ptr, pt, pt_len);
+			ptr += pt_len;
+
+			/* add an extra zero byte in case without it the bit string
+             * is not a multiple of 3 bits and therefore might not be
+             * able to produce enough trits
+             */
+
+			memset(ptr, 0, params->m_len_max - pt_len + 2);
+
+			/* convert M to trits (Mbin to Mtrin) */
+
+			ntru_bits_2_trits(M_buf, params->N, Mtrin_buf);
+
+			/* form the msg representative m' by adding Mtrin to mask, mod p */
+
+			for (i = 0; i < params->N; i++) {
+				tmp_buf[i] = tmp_buf[i] + Mtrin_buf[i];
+
+				if (tmp_buf[i] >= 3) {
+					tmp_buf[i] -= 3;
+				}
+			}
+
+			/* check that message representative meets minimum weight
+             * requirements
+             */
+			msg_rep_good = ntru_poly_check_min_weight(params->N, tmp_buf,
+			                                          params->min_msg_rep_wt);
+		}
+	} while ((result == NTRU_OK) && !msg_rep_good);
+
+	if (result == NTRU_OK) {
+		uint16_t i;
+
+		/* form ciphertext e by adding m' to R mod q */
+
+		for (i = 0; i < params->N; i++) {
+			if (tmp_buf[i] == 1) {
+				ringel_buf[i] = (ringel_buf[i] + 1) & mod_q_mask;
+			} else if (tmp_buf[i] == 2) {
+				ringel_buf[i] = (ringel_buf[i] - 1) & mod_q_mask;
+			} else {
+				;
+			}
+		}
+
+		/* pack ciphertext */
+
+		ntru_elements_2_octets(params->N, ringel_buf, params->q_bits, ct);
+		*ct_len = packed_ct_len;
+	}
+
+	/* cleanup */
+
+	memset(scratch_buf, 0, scratch_buf_len);
+	FREE(scratch_buf);
+
+	return result;
+}
+
+/* ntru_crypto_ntru_decrypt
+ *
+ * Implements NTRU decryption (SVES) for the parameter set specified in
+ * the private key blob.
+ *
+ * The maximum size of the output plaintext may be queried by invoking
+ * this function with pt = NULL.  In this case, no decryption is performed,
+ * NTRU_OK is returned, and the maximum size the plaintext could be is
+ * returned in pt_len.
+ * Note that until the decryption is performed successfully, the actual size
+ * of the resulting plaintext cannot be known.
+ *
+ * When pt != NULL, at invocation *pt_len must be the size of the pt buffer.
+ * Upon return it is the actual size of the plaintext.
+ *
+ * Returns NTRU_OK if successful.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer
+ *  (other than pt) is NULL.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if a length argument
+ *  (privkey_blob) is zero, or if ct_len is invalid for the parameter set.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_PRIVATE_KEY if the private-key blob is
+ *  invalid (unknown format, corrupt, bad length).
+ * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if the plaintext buffer
+ *  is too small.
+ * Returns NTRU_ERROR_BASE + NTRU_NO_MEMORY if memory needed cannot be
+ *  allocated from the heap.
+ * Returns NTRU_ERROR_BASE + NTRU_FAIL if a decryption error occurs.
+ */
+
+uint32_t
+ntru_crypto_ntru_decrypt(
+    uint16_t privkey_blob_len,   /*     in - no. of octets in private key
+                                                 blob */
+    uint8_t const *privkey_blob, /*     in - pointer to private key */
+    uint16_t ct_len,             /*     in - no. of octets in ciphertext */
+    uint8_t const *ct,           /*     in - pointer to ciphertext */
+    uint16_t *pt_len,            /* in/out - no. of octets in pt, addr for
+                                                 no. of octets in plaintext */
+    uint8_t *pt)                 /*    out - address for plaintext */
+{
+	NTRU_ENCRYPT_PARAM_SET *params = NULL;
+	uint8_t const *privkey_packed = NULL;
+	uint8_t const *pubkey_packed = NULL;
+	uint8_t privkey_pack_type = 0x00;
+	uint8_t pubkey_pack_type = 0x00;
+	size_t scratch_buf_len;
+	uint32_t dF_r;
+	uint32_t dF_r1 = 0;
+	uint32_t dF_r2 = 0;
+	uint32_t dF_r3 = 0;
+	uint16_t num_scratch_polys;
+	uint16_t pad_deg;
+	uint16_t ring_mult_tmp_len;
+	uint16_t *scratch_buf = NULL;
+	uint16_t *ringel_buf1 = NULL;
+	uint16_t *ringel_buf2 = NULL;
+	uint16_t *i_buf = NULL;
+	uint8_t *m_buf = NULL;
+	uint8_t *tmp_buf = NULL;
+	uint8_t *Mtrin_buf = NULL;
+	uint8_t *M_buf = NULL;
+	uint8_t *ptr = NULL;
+	NTRU_CRYPTO_HASH_ALGID hash_algid;
+	uint8_t md_len;
+	uint16_t mod_q_mask;
+	uint16_t q_mod_p;
+	uint16_t cm_len = 0;
+	uint16_t num_zeros;
+	uint16_t i;
+	bool decryption_ok = TRUE;
+	uint32_t result = NTRU_OK;
+
+	/* check for bad parameters */
+
+	if (!privkey_blob || !pt_len) {
+		NTRU_RET(NTRU_BAD_PARAMETER);
+	}
+
+	if (privkey_blob_len == 0) {
+		NTRU_RET(NTRU_BAD_LENGTH);
+	}
+
+	/* get a pointer to the parameter-set parameters, the packing types for
+     * the public and private keys, and pointers to the packed public and
+     * private keys
+     */
+
+	if (!ntru_crypto_ntru_encrypt_key_parse(FALSE /* privkey */,
+	                                        privkey_blob_len,
+	                                        privkey_blob, &pubkey_pack_type,
+	                                        &privkey_pack_type, &params,
+	                                        &pubkey_packed, &privkey_packed)) {
+		NTRU_RET(NTRU_BAD_PRIVATE_KEY);
+	}
+
+	if (params->q_bits <= 8 || params->q_bits >= 16 || params->N_bits <= 8 || params->N_bits >= 16 || pubkey_pack_type != NTRU_ENCRYPT_KEY_PACKED_COEFFICIENTS || (privkey_pack_type != NTRU_ENCRYPT_KEY_PACKED_TRITS && privkey_pack_type != NTRU_ENCRYPT_KEY_PACKED_INDICES)) {
+		NTRU_RET(NTRU_UNSUPPORTED_PARAM_SET);
+	}
+
+	/* return the max plaintext size if requested */
+
+	if (!pt) {
+		*pt_len = params->m_len_max;
+		NTRU_RET(NTRU_OK);
+	}
+
+	/* check that a ciphertext was provided */
+
+	if (!ct) {
+		NTRU_RET(NTRU_BAD_PARAMETER);
+	}
+
+	/* cannot check the plaintext buffer size until after the plaintext
+     * is derived, if we allow plaintext buffers only as large as the
+     * actual plaintext
+     */
+
+	/* check the ciphertext length */
+
+	if (ct_len != (params->N * params->q_bits + 7) >> 3) {
+		NTRU_RET(NTRU_BAD_LENGTH);
+	}
+
+	/* allocate memory for all operations */
+
+	ntru_ring_mult_indices_memreq(params->N, &num_scratch_polys, &pad_deg);
+
+	if (params->is_product_form) {
+		dF_r1 = params->dF_r & 0xff;
+		dF_r2 = (params->dF_r >> 8) & 0xff;
+		dF_r3 = (params->dF_r >> 16) & 0xff;
+		dF_r = dF_r1 + dF_r2 + dF_r3;
+		num_scratch_polys += 1; /* mult_product_indices needs space for a
+                                   mult_indices and one intermediate result */
+	} else {
+		dF_r = params->dF_r;
+	}
+	ring_mult_tmp_len = num_scratch_polys * pad_deg;
+
+	scratch_buf_len = (ring_mult_tmp_len << 1) +
+	                  /* X-byte temp buf for ring mult and
+                                                other intermediate results */
+	                  (pad_deg << 2) +   /* 2 2N-byte bufs for ring elements
+                                                and overflow from temp buffer */
+	                  (dF_r << 2) +      /* buffer for F, r indices */
+	                  params->m_len_max; /* buffer for plaintext */
+
+	scratch_buf = MALLOC(scratch_buf_len);
+	if (!scratch_buf) {
+		NTRU_RET(NTRU_OUT_OF_MEMORY);
+	}
+
+	ringel_buf1 = scratch_buf + ring_mult_tmp_len;
+	ringel_buf2 = ringel_buf1 + pad_deg;
+	i_buf = ringel_buf2 + pad_deg;
+	m_buf = (uint8_t *) (i_buf + (dF_r << 1));
+	tmp_buf = (uint8_t *) scratch_buf;
+	Mtrin_buf = (uint8_t *) ringel_buf1;
+	M_buf = Mtrin_buf + params->N;
+
+	/* set hash algorithm and seed length based on security strength */
+
+	if (params->hash_algid == NTRU_CRYPTO_HASH_ALGID_SHA1) {
+		hash_algid = NTRU_CRYPTO_HASH_ALGID_SHA1;
+		md_len = SHA_1_MD_LEN;
+	} else if (params->hash_algid == NTRU_CRYPTO_HASH_ALGID_SHA256) {
+		hash_algid = NTRU_CRYPTO_HASH_ALGID_SHA256;
+		md_len = SHA_256_MD_LEN;
+	} else {
+		FREE(scratch_buf);
+		NTRU_RET(NTRU_UNSUPPORTED_PARAM_SET);
+	}
+
+	/* set constants */
+
+	mod_q_mask = params->q - 1;
+	q_mod_p = params->q % 3;
+
+	/* unpack the ciphertext */
+
+	ntru_octets_2_elements(ct_len, ct, params->q_bits, ringel_buf2);
+
+	/* unpack the private key */
+
+	if (privkey_pack_type == NTRU_ENCRYPT_KEY_PACKED_TRITS) {
+		ntru_packed_trits_2_indices(privkey_packed, params->N, i_buf,
+		                            i_buf + dF_r);
+
+	} else if (privkey_pack_type == NTRU_ENCRYPT_KEY_PACKED_INDICES) {
+		ntru_octets_2_elements(
+		    (((uint16_t) dF_r << 1) * params->N_bits + 7) >> 3,
+		    privkey_packed, params->N_bits, i_buf);
+	} else {
+		/* Unreachable due to supported parameter set check above */
+	}
+
+	/* form cm':
+     *  F * e
+     *  A = e * (1 + pF) mod q = e + pFe mod q
+     *  a = A in the range [-q/2, q/2)
+     *  cm' = a mod p
+     *
+     * first compute F*e w/o reduction mod q and store in ringel_buf1
+     */
+	if (params->is_product_form) {
+		ntru_ring_mult_product_indices(ringel_buf2, (uint16_t) dF_r1,
+		                               (uint16_t) dF_r2, (uint16_t) dF_r3,
+		                               i_buf, params->N, params->q,
+		                               scratch_buf, ringel_buf1);
+	} else {
+		ntru_ring_mult_indices(ringel_buf2, (uint16_t) dF_r, (uint16_t) dF_r,
+		                       i_buf, params->N, params->q,
+		                       scratch_buf, ringel_buf1);
+	}
+
+	/* then let ringel_buf1 = e + 3*ringel_buf1 (mod q) = e + pFe mod q
+     * lift ringel_buf1 elements to integers in the range [-q/2, q/2)
+     * let Mtrin_buf = ringel_buf1 (mod 3) = cm'
+     */
+	for (i = 0; i < params->N; i++) {
+		ringel_buf1[i] = (ringel_buf2[i] + 3 * ringel_buf1[i]) & mod_q_mask;
+
+		if (ringel_buf1[i] >= (params->q >> 1)) {
+			ringel_buf1[i] = ringel_buf1[i] - q_mod_p;
+		}
+
+		Mtrin_buf[i] = (uint8_t)(ringel_buf1[i] % 3);
+	}
+
+	/* check that the candidate message representative meets minimum weight
+     * requirements
+     */
+	if (!ntru_poly_check_min_weight(params->N,
+	                                Mtrin_buf, params->min_msg_rep_wt)) {
+		decryption_ok = FALSE;
+	}
+
+	/* form cR = e - cm' mod q */
+
+	for (i = 0; i < params->N; i++) {
+		if (Mtrin_buf[i] == 1) {
+			ringel_buf2[i] = (ringel_buf2[i] - 1) & mod_q_mask;
+		} else if (Mtrin_buf[i] == 2) {
+			ringel_buf2[i] = (ringel_buf2[i] + 1) & mod_q_mask;
+		} else {
+			;
+		}
+	}
+
+	/* form cR mod 4 */
+
+	ntru_coeffs_mod4_2_octets(params->N, ringel_buf2, tmp_buf);
+
+	/* form mask */
+
+	result = ntru_mgftp1(hash_algid, md_len,
+	                     params->min_MGF_hash_calls,
+	                     (params->N + 3) / 4, tmp_buf,
+	                     tmp_buf + params->N, params->N, tmp_buf);
+
+	if (result == NTRU_OK) {
+		/* form cMtrin by subtracting mask from cm', mod p */
+
+		for (i = 0; i < params->N; i++) {
+			Mtrin_buf[i] = Mtrin_buf[i] - tmp_buf[i];
+
+			if (Mtrin_buf[i] >= 3) {
+				Mtrin_buf[i] += 3;
+			}
+		}
+
+		/* convert cMtrin to cM (Mtrin to Mbin) */
+
+		if (!ntru_trits_2_bits(Mtrin_buf, params->N, M_buf)) {
+			decryption_ok = FALSE;
+		}
+
+		/* validate the padded message cM and copy cm to m_buf */
+
+		ptr = M_buf + params->b_len;
+
+		if (params->m_len_len == 2) {
+			cm_len = (uint16_t)(*ptr++) << 8;
+		}
+
+		cm_len |= (uint16_t)(*ptr++);
+
+		if (cm_len > params->m_len_max) {
+			cm_len = params->m_len_max;
+			decryption_ok = FALSE;
+		}
+
+		memcpy(m_buf, ptr, cm_len);
+		ptr += cm_len;
+		num_zeros = params->m_len_max - cm_len + 1;
+
+		for (i = 0; i < num_zeros; i++) {
+			if (ptr[i] != 0) {
+				decryption_ok = FALSE;
+			}
+		}
+
+		/* form sData (OID || m || b || hTrunc) */
+
+		ptr = tmp_buf;
+		memcpy(ptr, params->OID, 3);
+		ptr += 3;
+		memcpy(ptr, m_buf, cm_len);
+		ptr += cm_len;
+		memcpy(ptr, M_buf, params->b_len);
+		ptr += params->b_len;
+		memcpy(ptr, pubkey_packed, params->sec_strength_len);
+		ptr += params->sec_strength_len;
+
+		/* generate cr */
+
+		result = ntru_gen_poly(hash_algid, md_len,
+		                       params->min_IGF_hash_calls,
+		                       (uint16_t)(ptr - tmp_buf),
+		                       tmp_buf, tmp_buf,
+		                       params->N, params->c_bits,
+		                       params->no_bias_limit,
+		                       params->is_product_form,
+		                       params->dF_r << 1, i_buf);
+	}
+
+	if (result == NTRU_OK) {
+		/* unpack the public key */
+
+		{
+			uint16_t pubkey_packed_len;
+			pubkey_packed_len = (params->N * params->q_bits + 7) >> 3;
+			ntru_octets_2_elements(pubkey_packed_len, pubkey_packed,
+			                       params->q_bits, ringel_buf1);
+		}
+
+		/* form cR' = h * cr */
+
+		if (params->is_product_form) {
+			ntru_ring_mult_product_indices(ringel_buf1, (uint16_t) dF_r1,
+			                               (uint16_t) dF_r2, (uint16_t) dF_r3,
+			                               i_buf, params->N, params->q,
+			                               scratch_buf, ringel_buf1);
+		} else {
+			ntru_ring_mult_indices(ringel_buf1, (uint16_t) dF_r, (uint16_t) dF_r,
+			                       i_buf, params->N, params->q,
+			                       scratch_buf, ringel_buf1);
+		}
+
+		/* compare cR' to cR */
+
+		for (i = 0; i < params->N; i++) {
+			if (ringel_buf1[i] != ringel_buf2[i]) {
+				decryption_ok = FALSE;
+			}
+		}
+
+		/* output plaintext and plaintext length */
+
+		if (decryption_ok) {
+			if (*pt_len < cm_len) {
+				memset(scratch_buf, 0, scratch_buf_len);
+				FREE(scratch_buf);
+				NTRU_RET(NTRU_BUFFER_TOO_SMALL);
+			}
+
+			memcpy(pt, m_buf, cm_len);
+			*pt_len = cm_len;
+		}
+	}
+
+	/* cleanup */
+
+	memset(scratch_buf, 0, scratch_buf_len);
+	FREE(scratch_buf);
+
+	if (!decryption_ok) {
+		NTRU_RET(NTRU_FAIL);
+	}
+
+	return result;
+}
+
+/* ntru_crypto_ntru_encrypt_keygen
+ *
+ * Implements key generation for NTRUEncrypt for the parameter set specified.
+ *
+ * Before invoking this function, a DRBG must be instantiated using
+ * ntru_crypto_drbg_instantiate() to obtain a DRBG handle, and in that
+ * instantiation the requested security strength must be at least as large
+ * as the security strength of the NTRU parameter set being used.
+ * Failure to instantiate the DRBG with the proper security strength will
+ * result in this function returning DRBG_ERROR_BASE + DRBG_BAD_LENGTH.
+ *
+ * The required minimum size of the output public-key buffer (pubkey_blob)
+ * may be queried by invoking this function with pubkey_blob = NULL.
+ * In this case, no key generation is performed, NTRU_OK is returned, and
+ * the required minimum size for pubkey_blob is returned in pubkey_blob_len.
+ *
+ * The required minimum size of the output private-key buffer (privkey_blob)
+ * may be queried by invoking this function with privkey_blob = NULL.
+ * In this case, no key generation is performed, NTRU_OK is returned, and
+ * the required minimum size for privkey_blob is returned in privkey_blob_len.
+ *
+ * The required minimum sizes of both pubkey_blob and privkey_blob may be
+ * queried as described above, in a single invocation of this function.
+ *
+ * When pubkey_blob != NULL and privkey_blob != NULL, at invocation
+ * *pubkey_blob_len must be the size of the pubkey_blob buffer and
+ * *privkey_blob_len must be the size of the privkey_blob buffer.
+ * Upon return, *pubkey_blob_len is the actual size of the public-key blob
+ * and *privkey_blob_len is the actual size of the private-key blob.
+ *
+ * Returns NTRU_OK if successful.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer
+ *  (other than pubkey_blob or privkey_blob) is NULL.
+ * Returns NTRU_ERROR_BASE + NTRU_INVALID_PARAMETER_SET if the parameter-set
+ *  ID is invalid.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if a length argument is invalid.
+ * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if either the pubkey_blob
+ * buffer or the privkey_blob buffer is too small.
+ * Returns NTRU_ERROR_BASE + NTRU_NO_MEMORY if memory needed cannot be
+ *  allocated from the heap.
+ * Returns NTRU_ERROR_BASE + NTRU_FAIL if the polynomial generated for f is
+ *  not invertible in (Z/qZ)[X]/(X^N - 1), which is extremely unlikely.
+ *  Should this occur, this function should simply be invoked again.
+ */
+
+uint32_t
+ntru_crypto_ntru_encrypt_keygen(
+    DRBG_HANDLE drbg_handle,                /*     in - handle of DRBG */
+    NTRU_ENCRYPT_PARAM_SET_ID param_set_id, /*     in - parameter set ID */
+    uint16_t *pubkey_blob_len,              /* in/out - no. of octets in
+                                                             pubkey_blob, addr
+                                                             for no. of octets
+                                                             in pubkey_blob */
+    uint8_t *pubkey_blob,                   /*    out - address for
+                                                             public key blob */
+    uint16_t *privkey_blob_len,             /* in/out - no. of octets in
+                                                             privkey_blob, addr
+                                                             for no. of octets
+                                                             in privkey_blob */
+    uint8_t *privkey_blob)                  /*    out - address for
+                                                             private key blob */
+{
+	NTRU_ENCRYPT_PARAM_SET *params = NULL;
+	uint16_t public_key_blob_len;
+	uint16_t private_key_blob_len;
+	uint8_t pubkey_pack_type;
+	uint8_t privkey_pack_type;
+	size_t scratch_buf_len;
+	uint32_t dF;
+	uint32_t dF1 = 0;
+	uint32_t dF2 = 0;
+	uint32_t dF3 = 0;
+	uint16_t pad_deg;
+	uint16_t total_polys;
+	uint16_t num_scratch_polys;
+	uint16_t *scratch_buf = NULL;
+	uint16_t *ringel_buf1 = NULL;
+	uint16_t *ringel_buf2 = NULL;
+	uint16_t *F_buf = NULL;
+	uint8_t *tmp_buf = NULL;
+	uint16_t mod_q_mask;
+	NTRU_CRYPTO_HASH_ALGID hash_algid;
+	uint8_t md_len;
+	uint16_t seed_len;
+	uint32_t result = NTRU_OK;
+
+	/* get a pointer to the parameter-set parameters */
+
+	if ((params = ntru_encrypt_get_params_with_id(param_set_id)) == NULL) {
+		NTRU_RET(NTRU_INVALID_PARAMETER_SET);
+	}
+
+	/* check for bad parameters */
+
+	if (!pubkey_blob_len || !privkey_blob_len) {
+		NTRU_RET(NTRU_BAD_PARAMETER);
+	}
+
+	/* get public and private key packing types and blob lengths */
+
+	ntru_crypto_ntru_encrypt_key_get_blob_params(params, &pubkey_pack_type,
+	                                             &public_key_blob_len,
+	                                             &privkey_pack_type,
+	                                             &private_key_blob_len);
+
+	/* return the pubkey_blob size and/or privkey_blob size if requested */
+
+	if (!pubkey_blob || !privkey_blob) {
+		if (!pubkey_blob) {
+			*pubkey_blob_len = public_key_blob_len;
+		}
+
+		if (!privkey_blob) {
+			*privkey_blob_len = private_key_blob_len;
+		}
+
+		NTRU_RET(NTRU_OK);
+	}
+
+	/* check size of output buffers */
+
+	if ((*pubkey_blob_len < public_key_blob_len) ||
+	    (*privkey_blob_len < private_key_blob_len)) {
+		NTRU_RET(NTRU_BUFFER_TOO_SMALL);
+	}
+
+	/* Allocate memory for all operations. We need:
+     *  - 2 polynomials for results: ringel_buf1 and ringel_buf2.
+     *  - scratch space for ntru_ring_mult_coefficients (which is
+     *    implementation dependent) plus one additional polynomial
+     *    of the same size for ntru_ring_lift_inv_pow2_x.
+     *  - 2*dF coefficients for F
+     */
+	ntru_ring_mult_coefficients_memreq(params->N, &num_scratch_polys, &pad_deg);
+	num_scratch_polys += 1; /* ntru_ring_lift_... */
+
+	total_polys = num_scratch_polys;
+	if (params->is_product_form) {
+		dF1 = params->dF_r & 0xff;
+		dF2 = (params->dF_r >> 8) & 0xff;
+		dF3 = (params->dF_r >> 16) & 0xff;
+		dF = dF1 + dF2 + dF3;
+		/* For product form keys we can overlap ringel_buf1
+         * and the scratch space since mult. by f uses F_buf.
+         * so only add room for ringel_buf2 */
+		num_scratch_polys -= 1;
+		total_polys += 1;
+	} else {
+		dF = params->dF_r;
+		total_polys += 2; /* ringel_buf{1,2} */
+	}
+
+	scratch_buf_len = ((size_t)(total_polys * pad_deg)) * sizeof(uint16_t);
+	scratch_buf_len += 2 * dF * sizeof(uint16_t);
+	scratch_buf = MALLOC(scratch_buf_len);
+	if (!scratch_buf) {
+		NTRU_RET(NTRU_OUT_OF_MEMORY);
+	}
+	memset(scratch_buf, 0, scratch_buf_len);
+
+	ringel_buf1 = scratch_buf + num_scratch_polys * pad_deg;
+	ringel_buf2 = ringel_buf1 + pad_deg;
+	F_buf = ringel_buf2 + pad_deg;
+	tmp_buf = (uint8_t *) scratch_buf;
+
+	/* set hash algorithm and seed length based on security strength */
+
+	if (params->hash_algid == NTRU_CRYPTO_HASH_ALGID_SHA1) {
+		hash_algid = NTRU_CRYPTO_HASH_ALGID_SHA1;
+		md_len = SHA_1_MD_LEN;
+	} else if (params->hash_algid == NTRU_CRYPTO_HASH_ALGID_SHA256) {
+		hash_algid = NTRU_CRYPTO_HASH_ALGID_SHA256;
+		md_len = SHA_256_MD_LEN;
+	} else {
+		FREE(scratch_buf);
+		NTRU_RET(NTRU_UNSUPPORTED_PARAM_SET);
+	}
+
+	seed_len = 2 * params->sec_strength_len;
+
+	/* set constants */
+
+	mod_q_mask = params->q - 1;
+
+	/* get random bytes for seed for generating trinary F
+     * as a list of indices
+     */
+
+	result = ntru_crypto_drbg_generate(drbg_handle,
+	                                   params->sec_strength_len << 3,
+	                                   seed_len, tmp_buf);
+
+	if (result == NTRU_OK) {
+
+		/* generate F */
+
+		result = ntru_gen_poly(hash_algid, md_len,
+		                       params->min_IGF_hash_calls,
+		                       seed_len, tmp_buf, tmp_buf,
+		                       params->N, params->c_bits,
+		                       params->no_bias_limit,
+		                       params->is_product_form,
+		                       params->dF_r << 1, F_buf);
+	}
+
+	if (result == NTRU_OK) {
+		uint32_t i;
+
+		memset(ringel_buf1, 0, params->N * sizeof(uint16_t));
+
+		/* form F as a ring element */
+
+		if (params->is_product_form) {
+			uint32_t dF3_offset = (dF1 + dF2) << 1;
+
+			/* form F1 as a ring element */
+
+			for (i = 0; i < dF1; i++) {
+				ringel_buf1[F_buf[i]] = 1;
+			}
+
+			for (; i < (dF1 << 1); i++) {
+				ringel_buf1[F_buf[i]] = mod_q_mask;
+			}
+
+			/* form F1 * F2 */
+
+			ntru_ring_mult_indices(ringel_buf1, (uint16_t) dF2, (uint16_t) dF2,
+			                       F_buf + (dF1 << 1), params->N, params->q,
+			                       scratch_buf, ringel_buf1);
+
+			/* form (F1 * F2) + F3 */
+
+			for (i = 0; i < dF3; i++) {
+				uint16_t index = F_buf[dF3_offset + i];
+				ringel_buf1[index] = (ringel_buf1[index] + 1) & mod_q_mask;
+			}
+
+			for (; i < (dF3 << 1); i++) {
+				uint16_t index = F_buf[dF3_offset + i];
+				ringel_buf1[index] = (ringel_buf1[index] - 1) & mod_q_mask;
+			}
+
+		} else {
+			/* form F as a ring element */
+
+			for (i = 0; i < dF; i++) {
+				ringel_buf1[F_buf[i]] = 1;
+			}
+
+			for (; i < (dF << 1); i++) {
+				ringel_buf1[F_buf[i]] = mod_q_mask;
+			}
+		}
+
+		/* form f = 1 + pF */
+
+		for (i = 0; i < params->N; i++) {
+			ringel_buf1[i] = (ringel_buf1[i] * 3) & mod_q_mask;
+		}
+
+		ringel_buf1[0] = (ringel_buf1[0] + 1) & mod_q_mask;
+
+		/* find f^-1 in (Z/2Z)[X]/(X^N - 1) */
+
+		if (!ntru_ring_inv(ringel_buf1, params->N, scratch_buf, ringel_buf2)) {
+			result = NTRU_RESULT(NTRU_FAIL);
+		}
+	}
+
+	if (result == NTRU_OK) {
+		/* lift f^-1 in (Z/2Z)[X]/(X^N - 1) to f^-1 in (Z/qZ)[X]/(X^N -1) */
+		if (params->is_product_form) {
+			result = ntru_ring_lift_inv_pow2_product(ringel_buf2,
+			                                         (uint16_t) dF1, (uint16_t) dF2, (uint16_t) dF3,
+			                                         F_buf, params->N, params->q, scratch_buf);
+		} else {
+			result = ntru_ring_lift_inv_pow2_standard(ringel_buf2,
+			                                          ringel_buf1, params->N, params->q, scratch_buf);
+		}
+	}
+
+	if (result == NTRU_OK) {
+
+		/* get random bytes for seed for generating trinary g
+         * as a list of indices
+         */
+		result = ntru_crypto_drbg_generate(drbg_handle,
+		                                   params->sec_strength_len << 3,
+		                                   seed_len, tmp_buf);
+	}
+
+	if (result == NTRU_OK) {
+		uint16_t min_IGF_hash_calls =
+		    ((((params->dg << 2) + 2) * params->N_bits) + (md_len << 3) - 1) /
+		    (md_len << 3);
+
+		/* generate g */
+
+		result = ntru_gen_poly(hash_algid, md_len,
+		                       (uint8_t) min_IGF_hash_calls,
+		                       seed_len, tmp_buf, tmp_buf,
+		                       params->N, params->c_bits,
+		                       params->no_bias_limit, FALSE,
+		                       (params->dg << 1) + 1, ringel_buf1);
+	}
+
+	if (result == NTRU_OK) {
+		uint16_t i;
+
+		/* compute h = p * (f^-1 * g) mod q */
+
+		ntru_ring_mult_indices(ringel_buf2, params->dg + 1, params->dg,
+		                       ringel_buf1, params->N, params->q, scratch_buf,
+		                       ringel_buf2);
+
+		for (i = 0; i < params->N; i++) {
+			ringel_buf2[i] = (ringel_buf2[i] * 3) & mod_q_mask;
+		}
+
+		/* create public key blob */
+
+		result = ntru_crypto_ntru_encrypt_key_create_pubkey_blob(params,
+		                                                         ringel_buf2, pubkey_pack_type, pubkey_blob);
+		*pubkey_blob_len = public_key_blob_len;
+	}
+
+	if (result == NTRU_OK) {
+		/* create private key blob */
+		result = ntru_crypto_ntru_encrypt_key_create_privkey_blob(params,
+		                                                          ringel_buf2, F_buf, privkey_pack_type, tmp_buf, privkey_blob);
+		*privkey_blob_len = private_key_blob_len;
+	}
+
+	/* cleanup */
+
+	memset(scratch_buf, 0, scratch_buf_len);
+	FREE(scratch_buf);
+
+	return result;
+}
+
+/* DER-encoding prefix template for NTRU public keys,
+ * with parameter-set-specific fields nomalized
+ */
+
+static uint8_t const der_prefix_template[] = {
+    0x30, 0x82,
+    0x00, 0x25, /* add pubkey length 2 */
+    0x30, 0x1a, 0x06, 0x0b, 0x2b, 0x06, 0x01,
+    0x04, 0x01, 0xc1, 0x16, 0x01, 0x01, 0x01,
+    0x01, /* end of NTRU OID compare */
+    0x06, 0x0b, 0x2b, 0x06, 0x01, 0x04, 0x01,
+    0xc1, 0x16, 0x01, 0x01, 0x02,
+    0x00, /* set param-set DER id 31 */
+    0x03, 0x82,
+    0x00, 0x05, /* add pubkey length 34 */
+    0x00, 0x04, 0x82,
+    0x00, 0x00, /* add pubkey length 39 */
+};
+
+/* add_16_to_8s
+ *
+ * adds a 16-bit value to two bytes
+ */
+
+static void
+add_16_to_8s(
+    uint16_t a,
+    uint8_t *b) {
+	uint16_t tmp = ((uint16_t) b[0] << 8) + b[1];
+
+	tmp = tmp + a;
+	b[0] = (uint8_t)((tmp >> 8) & 0xff);
+	b[1] = (uint8_t)(tmp & 0xff);
+
+	return;
+}
+
+/* sub_16_from_8s
+ *
+ * subtracts a 16-bit value from two bytes
+ */
+
+static void
+sub_16_from_8s(
+    uint16_t a,
+    uint8_t *b) {
+	uint16_t tmp = ((uint16_t) b[0] << 8) + b[1];
+
+	tmp = tmp - a;
+	b[0] = (uint8_t)((tmp >> 8) & 0xff);
+	b[1] = (uint8_t)(tmp & 0xff);
+
+	return;
+}
+
+/* ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo
+ *
+ * DER-encodes an NTRUEncrypt public-key from a public-key blob into a
+ * SubjectPublicKeyInfo field for inclusion in an X.509 certificate.
+ *
+ * The required minimum size of the output SubjectPublicKeyInfo buffer
+ * (encoded_subjectPublicKeyInfo) may be queried by invoking this function
+ * with encoded_subjectPublicKeyInfo = NULL.  In this case, no encoding is
+ * performed, NTRU_OK is returned, and the required minimum size for
+ * encoded_subjectPublicKeyInfo is returned in encoded_subjectPublicKeyInfo_len.
+ *
+ * When encoded_subjectPublicKeyInfo != NULL, at invocation
+ * *encoded_subjectPublicKeyInfo_len must be the size of the
+ * encoded_subjectPublicKeyInfo buffer.
+ * Upon return, it is the actual size of the encoded public key.
+ *
+ * Returns NTRU_OK if successful.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer
+ *  (other than encoded_subjectPublicKeyInfo) is NULL.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if pubkey_blob_len is zero.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_PUBLIC_KEY if the public-key blob is
+ *  invalid (unknown format, corrupt, bad length).
+ * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if the SubjectPublicKeyInfo
+ *  buffer is too small.
+ */
+
+uint32_t
+ntru_crypto_ntru_encrypt_publicKey2SubjectPublicKeyInfo(
+    uint16_t pubkey_blob_len,   /*     in - no. of octets in public-key
+                                                blob */
+    uint8_t const *pubkey_blob, /*     in - ptr to public-key blob */
+    uint16_t *encoded_subjectPublicKeyInfo_len,
+    /* in/out - no. of octets in encoded info,
+                                                address for no. of octets in
+                                                encoded info */
+    uint8_t *encoded_subjectPublicKeyInfo)
+/*    out - address for encoded info */
+{
+	NTRU_ENCRYPT_PARAM_SET *params = NULL;
+	uint8_t const *pubkey_packed = NULL;
+	uint8_t pubkey_pack_type;
+	uint16_t packed_pubkey_len;
+	uint16_t encoded_len;
+
+	/* check for bad parameters */
+
+	if (!pubkey_blob || !encoded_subjectPublicKeyInfo_len) {
+		NTRU_RET(NTRU_BAD_PARAMETER);
+	}
+
+	if (pubkey_blob_len == 0) {
+		NTRU_RET(NTRU_BAD_LENGTH);
+	}
+
+	/* get a pointer to the parameter-set parameters, the packing type for
+     * the public key, and a pointer to the packed public key
+     */
+
+	if (!ntru_crypto_ntru_encrypt_key_parse(TRUE /* pubkey */, pubkey_blob_len,
+	                                        pubkey_blob, &pubkey_pack_type,
+	                                        NULL, &params, &pubkey_packed,
+	                                        NULL)) {
+		NTRU_RET(NTRU_BAD_PUBLIC_KEY);
+	}
+
+	/* return the encoded_subjectPublicKeyInfo size if requested */
+
+	packed_pubkey_len = (params->N * params->q_bits + 7) >> 3;
+	encoded_len = sizeof(der_prefix_template) + packed_pubkey_len;
+
+	if (!encoded_subjectPublicKeyInfo) {
+		*encoded_subjectPublicKeyInfo_len = encoded_len;
+		NTRU_RET(NTRU_OK);
+	}
+
+	/* check the encoded_subjectPublicKeyInfo buffer size */
+
+	if (*encoded_subjectPublicKeyInfo_len < encoded_len) {
+		NTRU_RET(NTRU_BUFFER_TOO_SMALL);
+	}
+
+	/* form the encoded subjectPublicKey */
+
+	memcpy(encoded_subjectPublicKeyInfo, der_prefix_template,
+	       sizeof(der_prefix_template));
+
+	add_16_to_8s(packed_pubkey_len, encoded_subjectPublicKeyInfo + 2);
+	add_16_to_8s(packed_pubkey_len, encoded_subjectPublicKeyInfo + 34);
+	add_16_to_8s(packed_pubkey_len, encoded_subjectPublicKeyInfo + 39);
+	encoded_subjectPublicKeyInfo[31] = params->der_id;
+
+	memcpy(encoded_subjectPublicKeyInfo + sizeof(der_prefix_template),
+	       pubkey_packed, packed_pubkey_len);
+
+	*encoded_subjectPublicKeyInfo_len = encoded_len;
+
+	NTRU_RET(NTRU_OK);
+}
+
+/* ntru_crypto_ntru_encrypt_subjectPublicKeyInfo2PublicKey
+ *
+ * Decodes a DER-encoded NTRUEncrypt public-key from a
+ * SubjectPublicKeyInfo field in an X.509 certificate and returns the
+ * public-key blob itself.
+ *
+ * The required minimum size of the output public-key buffer (pubkey_blob)
+ * may be queried by invoking this function with pubkey_blob = NULL.
+ * In this case, no decoding is performed, NTRU_OK is returned, and the
+ * required minimum size for pubkey_blob is returned in pubkey_blob_len.
+ *
+ * When pubkey_blob != NULL, at invocation *pubkey_blob_len must be the
+ * size of the pubkey_blob buffer.
+ * Upon return, it is the actual size of the public-key blob.
+ *
+ * Returns NTRU_OK if successful.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_LENGTH if the encoded data buffer
+ *  does not contain a full der prefix and public key.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_PARAMETER if an argument pointer
+ *  (other than pubkey_blob) is NULL.
+ * Returns NTRU_ERROR_BASE + NTRU_BAD_ENCODING if the encoded data is
+ *  an invalid encoding of an NTRU public key.
+ * Returns NTRU_ERROR_BASE + NTRU_OID_NOT_RECOGNIZED if the
+ *  encoded data contains an OID that identifies an object other than
+ *  an NTRU public key.
+ * Returns NTRU_ERROR_BASE + NTRU_BUFFER_TOO_SMALL if the pubkey_blob buffer
+ *  is too small.
+ */
+
+uint32_t
+ntru_crypto_ntru_encrypt_subjectPublicKeyInfo2PublicKey(
+    uint8_t const *encoded_data,  /*     in - ptr to subjectPublicKeyInfo
+                                                 in the encoded data */
+    uint16_t *pubkey_blob_len,    /* in/out - no. of octets in pubkey blob,
+                                                 address for no. of octets in
+                                                 pubkey blob */
+    uint8_t *pubkey_blob,         /*    out - address for pubkey blob */
+    uint8_t **next,               /*    out - address for ptr to encoded
+                                                 data following the 
+                                                 subjectPublicKeyInfo */
+    uint32_t *remaining_data_len) /* in/out - number of bytes remaining in
+                                                    buffer *next */
+{
+	NTRU_ENCRYPT_PARAM_SET *params = NULL;
+	uint8_t prefix_buf[41];
+	bool der_id_valid;
+	uint16_t packed_pubkey_len = 0;
+	uint8_t pubkey_pack_type;
+	uint16_t public_key_blob_len;
+	uint8_t *data_ptr;
+	uint32_t data_len;
+
+	/* check for bad parameters */
+
+	if (!encoded_data || !pubkey_blob_len || !next || !remaining_data_len) {
+		NTRU_RET(NTRU_BAD_PARAMETER);
+	}
+
+	data_len = *remaining_data_len;
+	if (data_len < sizeof(prefix_buf)) {
+		NTRU_RET(NTRU_BAD_LENGTH);
+	}
+
+	/* determine if data to be decoded is a valid encoding of an NTRU
+     * public key
+     */
+
+	data_ptr = (uint8_t *) encoded_data;
+	memcpy(prefix_buf, data_ptr, sizeof(prefix_buf));
+
+	/* get a pointer to the parameter-set parameters */
+
+	if ((params = ntru_encrypt_get_params_with_DER_id(data_ptr[31])) == NULL) {
+		der_id_valid = FALSE;
+
+		/* normalize the prefix-buffer data used in an NTRU OID comparison */
+
+		prefix_buf[2] = der_prefix_template[2];
+		prefix_buf[3] = der_prefix_template[3];
+
+	} else {
+		der_id_valid = TRUE;
+
+		/* normalize the prefix-buffer data for the specific parameter set */
+
+		packed_pubkey_len = (params->N * params->q_bits + 7) >> 3;
+		sub_16_from_8s(packed_pubkey_len, prefix_buf + 2);
+		sub_16_from_8s(packed_pubkey_len, prefix_buf + 34);
+		sub_16_from_8s(packed_pubkey_len, prefix_buf + 39);
+		prefix_buf[31] = 0;
+		/*prefix_buf[40] = 0; */
+	}
+
+	/* validate the DER prefix encoding */
+
+	if (!der_id_valid || memcmp(prefix_buf, der_prefix_template,
+	                            sizeof(der_prefix_template))) {
+
+		/* bad DER prefix, so determine if this is a bad NTRU encoding or an
+         * unknown OID by comparing the first 18 octets
+         */
+
+		if (memcmp(prefix_buf, der_prefix_template, 18) == 0) {
+			NTRU_RET(NTRU_OID_NOT_RECOGNIZED);
+		} else {
+			NTRU_RET(NTRU_BAD_ENCODING);
+		}
+	}
+
+	/* done with prefix */
+
+	data_ptr += sizeof(prefix_buf);
+	data_len -= sizeof(prefix_buf);
+
+	/* get public key packing type and blob length */
+
+	ntru_crypto_ntru_encrypt_key_get_blob_params(params, &pubkey_pack_type,
+	                                             &public_key_blob_len, NULL,
+	                                             NULL);
+
+	/* return the pubkey_blob size if requested */
+
+	if (!pubkey_blob) {
+		*pubkey_blob_len = public_key_blob_len;
+		NTRU_RET(NTRU_OK);
+	}
+
+	/* check size of output buffer */
+
+	if (*pubkey_blob_len < public_key_blob_len) {
+		NTRU_RET(NTRU_BUFFER_TOO_SMALL);
+	}
+
+	/* check that blob contains additional data of length packed_pubkey_len */
+	if (data_len < packed_pubkey_len) {
+		NTRU_RET(NTRU_BAD_LENGTH);
+	}
+
+	/* check that the public key pack type is supported */
+	if (pubkey_pack_type != NTRU_ENCRYPT_KEY_PACKED_COEFFICIENTS) {
+		NTRU_RET(NTRU_BAD_PUBLIC_KEY);
+	}
+
+	/* create the public-key blob */
+	ntru_crypto_ntru_encrypt_key_recreate_pubkey_blob(params, packed_pubkey_len,
+	                                                  data_ptr, pubkey_pack_type, pubkey_blob);
+	*pubkey_blob_len = public_key_blob_len;
+
+	data_ptr += packed_pubkey_len;
+	data_len -= packed_pubkey_len;
+
+	/* check whether the buffer is empty and update *next */
+	if (data_len > 0) {
+		*next = data_ptr;
+		*remaining_data_len = data_len;
+	} else {
+		*next = NULL;
+		*remaining_data_len = 0;
+	}
+
+	NTRU_RET(NTRU_OK);
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_key.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_key.c
new file mode 100644
index 0000000000000000000000000000000000000000..5e515c006913f86636698ff6f2c41a42946aac9b
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_key.c
@@ -0,0 +1,392 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_ntru_encrypt_key.c
+ *
+ * Contents: Routines for exporting and importing public and private keys
+ *           for NTRUEncrypt.
+ *
+ *****************************************************************************/
+
+#include "ntru_crypto.h"
+#include "ntru_crypto_ntru_encrypt_key.h"
+
+/* ntru_crypto_ntru_encrypt_key_parse
+ *
+ * Parses an NTRUEncrypt key blob.
+ * If the blob is not corrupt, returns packing types for public and private
+ * keys, a pointer to the parameter set, a pointer to the public key, and
+ * a pointer to the private key if it exists.
+ *
+ * Returns TRUE if successful.
+ * Returns FALSE if the blob is invalid.
+ */
+
+bool ntru_crypto_ntru_encrypt_key_parse(
+    bool pubkey_parse,               /*  in - if parsing pubkey
+                                                         blob */
+    uint16_t key_blob_len,           /*  in - no. octets in key
+                                                         blob */
+    uint8_t const *key_blob,         /*  in - pointer to key blob */
+    uint8_t *pubkey_pack_type,       /* out - addr for pubkey
+                                                         packing type */
+    uint8_t *privkey_pack_type,      /* out - addr for privkey
+                                                         packing type */
+    NTRU_ENCRYPT_PARAM_SET **params, /* out - addr for ptr to
+                                                         parameter set */
+    uint8_t const **pubkey,          /* out - addr for ptr to
+                                                         packed pubkey */
+    uint8_t const **privkey)         /* out - addr for ptr to
+                                                         packed privkey */
+{
+	uint8_t tag;
+
+	/* parse key blob based on tag */
+
+	tag = key_blob[0];
+	switch (tag) {
+	case NTRU_ENCRYPT_PUBKEY_TAG:
+
+		if (!pubkey_parse) {
+			return FALSE;
+		}
+
+		break;
+
+	case NTRU_ENCRYPT_PRIVKEY_DEFAULT_TAG:
+	case NTRU_ENCRYPT_PRIVKEY_TRITS_TAG:
+	case NTRU_ENCRYPT_PRIVKEY_INDICES_TAG:
+
+		if (pubkey_parse) {
+			return FALSE;
+		}
+		break;
+
+	default:
+		return FALSE;
+		break;
+	}
+
+	switch (tag) {
+	case NTRU_ENCRYPT_PUBKEY_TAG:
+	case NTRU_ENCRYPT_PRIVKEY_DEFAULT_TAG:
+	case NTRU_ENCRYPT_PRIVKEY_TRITS_TAG:
+	case NTRU_ENCRYPT_PRIVKEY_INDICES_TAG:
+
+		/* Version 0:
+             *  byte  0:   tag
+             *  byte  1:   no. of octets in OID
+             *  bytes 2-4: OID
+             *  bytes 5- : packed pubkey
+             *             [packed privkey]
+             */
+
+		{
+			NTRU_ENCRYPT_PARAM_SET *p = NULL;
+			uint16_t pubkey_packed_len;
+
+			/* check OID length and minimum blob length for tag and OID */
+
+			if ((key_blob_len < 5) || (key_blob[1] != 3)) {
+				return FALSE;
+			}
+
+			/* get a pointer to the parameter set corresponding to the OID */
+
+			if ((p = ntru_encrypt_get_params_with_OID(key_blob + 2)) == NULL) {
+				return FALSE;
+			}
+
+			/* check blob length and assign pointers to blob fields */
+
+			pubkey_packed_len = (p->N * p->q_bits + 7) / 8;
+
+			if (pubkey_parse) /* public-key parsing */
+			{
+				if (key_blob_len != 5 + pubkey_packed_len) {
+					return FALSE;
+				}
+
+				*pubkey = key_blob + 5;
+
+			} else /* private-key parsing */
+			{
+				uint16_t privkey_packed_len;
+				uint16_t privkey_packed_trits_len = (p->N + 4) / 5;
+				uint16_t privkey_packed_indices_len;
+				uint16_t dF;
+
+				/* check packing type for product-form private keys */
+
+				if (p->is_product_form &&
+				    (tag == NTRU_ENCRYPT_PRIVKEY_TRITS_TAG)) {
+					return FALSE;
+				}
+
+				/* set packed-key length for packed indices */
+
+				if (p->is_product_form) {
+					dF = (uint16_t)((p->dF_r & 0xff) +         /* df1 */
+					                ((p->dF_r >> 8) & 0xff) +  /* df2 */
+					                ((p->dF_r >> 16) & 0xff)); /* df3 */
+				} else {
+					dF = (uint16_t) p->dF_r;
+				}
+
+				privkey_packed_indices_len = ((dF << 1) * p->N_bits + 7) >> 3;
+
+				/* set private-key packing type if defaulted */
+
+				if (tag == NTRU_ENCRYPT_PRIVKEY_DEFAULT_TAG) {
+					if (p->is_product_form ||
+					    (privkey_packed_indices_len <=
+					     privkey_packed_trits_len)) {
+						tag = NTRU_ENCRYPT_PRIVKEY_INDICES_TAG;
+					} else {
+						tag = NTRU_ENCRYPT_PRIVKEY_TRITS_TAG;
+					}
+				}
+
+				if (tag == NTRU_ENCRYPT_PRIVKEY_TRITS_TAG) {
+					privkey_packed_len = privkey_packed_trits_len;
+				} else {
+					privkey_packed_len = privkey_packed_indices_len;
+				}
+
+				if (key_blob_len != 5 + pubkey_packed_len + privkey_packed_len) {
+					return FALSE;
+				}
+
+				*pubkey = key_blob + 5;
+				*privkey = *pubkey + pubkey_packed_len;
+				*privkey_pack_type = (tag == NTRU_ENCRYPT_PRIVKEY_TRITS_TAG) ? NTRU_ENCRYPT_KEY_PACKED_TRITS : NTRU_ENCRYPT_KEY_PACKED_INDICES;
+			}
+
+			/* return parameter set pointer */
+
+			*pubkey_pack_type = NTRU_ENCRYPT_KEY_PACKED_COEFFICIENTS;
+			*params = p;
+		}
+
+	default:
+		break; /* can't get here */
+	}
+
+	return TRUE;
+}
+
+/* ntru_crypto_ntru_encrypt_key_get_blob_params
+ *
+ * Returns public and private key packing types and blob lengths given
+ * a packing format.  For now, only a default packing format exists.
+ *
+ * Only public-key params may be returned by setting privkey_pack_type
+ * and privkey_blob_len to NULL.
+ */
+
+void ntru_crypto_ntru_encrypt_key_get_blob_params(
+    NTRU_ENCRYPT_PARAM_SET const *params, /*  in - pointer to
+                                                               param set
+                                                               parameters */
+    uint8_t *pubkey_pack_type,            /* out - addr for pubkey
+                                                               packing type */
+    uint16_t *pubkey_blob_len,            /* out - addr for no. of
+                                                               bytes in
+                                                               pubkey blob */
+    uint8_t *privkey_pack_type,           /* out - addr for privkey
+                                                               packing type */
+    uint16_t *privkey_blob_len)           /* out - addr for no. of
+                                                               bytes in
+                                                               privkey blob */
+{
+	uint16_t pubkey_packed_len = (params->N * params->q_bits + 7) >> 3;
+
+	*pubkey_pack_type = NTRU_ENCRYPT_KEY_PACKED_COEFFICIENTS;
+	*pubkey_blob_len = 5 + pubkey_packed_len;
+
+	if (privkey_pack_type && privkey_blob_len) {
+		uint16_t privkey_packed_trits_len = (params->N + 4) / 5;
+		uint16_t privkey_packed_indices_len;
+		uint16_t dF;
+
+		if (params->is_product_form) {
+			dF = (uint16_t)((params->dF_r & 0xff) +         /* df1 */
+			                ((params->dF_r >> 8) & 0xff) +  /* df2 */
+			                ((params->dF_r >> 16) & 0xff)); /* df3 */
+		} else {
+			dF = (uint16_t) params->dF_r;
+		}
+
+		privkey_packed_indices_len = ((dF << 1) * params->N_bits + 7) >> 3;
+
+		if (params->is_product_form ||
+		    (privkey_packed_indices_len <= privkey_packed_trits_len)) {
+			*privkey_pack_type = NTRU_ENCRYPT_KEY_PACKED_INDICES;
+			*privkey_blob_len =
+			    5 + pubkey_packed_len + privkey_packed_indices_len;
+		} else {
+			*privkey_pack_type = NTRU_ENCRYPT_KEY_PACKED_TRITS;
+			*privkey_blob_len =
+			    5 + pubkey_packed_len + privkey_packed_trits_len;
+		}
+	}
+
+	return;
+}
+
+/* ntru_crypto_ntru_encrypt_key_create_pubkey_blob
+ *
+ * Returns a public key blob, packed according to the packing type provided.
+ */
+
+uint32_t
+ntru_crypto_ntru_encrypt_key_create_pubkey_blob(
+    NTRU_ENCRYPT_PARAM_SET const *params, /*  in - pointer to
+                                                               param set
+                                                               parameters */
+    uint16_t const *pubkey,               /*  in - pointer to the
+                                                               coefficients
+                                                               of the pubkey */
+    uint8_t pubkey_pack_type,             /* out - pubkey packing
+                                                               type */
+    uint8_t *pubkey_blob)                 /* out - addr for the
+                                                               pubkey blob */
+{
+
+	switch (pubkey_pack_type) {
+	case NTRU_ENCRYPT_KEY_PACKED_COEFFICIENTS:
+		*pubkey_blob++ = NTRU_ENCRYPT_PUBKEY_TAG;
+		*pubkey_blob++ = (uint8_t) sizeof(params->OID);
+		memcpy(pubkey_blob, params->OID, sizeof(params->OID));
+		pubkey_blob += sizeof(params->OID);
+		ntru_elements_2_octets(params->N, pubkey, params->q_bits,
+		                       pubkey_blob);
+		break;
+
+	default:
+		NTRU_RET(NTRU_BAD_PARAMETER);
+	}
+
+	NTRU_RET(NTRU_OK);
+}
+
+/* ntru_crypto_ntru_encrypt_key_recreate_pubkey_blob
+ *
+ * Returns a public key blob, recreated from an already-packed public key.
+ */
+
+uint32_t
+ntru_crypto_ntru_encrypt_key_recreate_pubkey_blob(
+    NTRU_ENCRYPT_PARAM_SET const *params, /*  in - pointer to
+                                                               param set
+                                                               parameters */
+    uint16_t packed_pubkey_len,           /*  in - no. octets in
+                                                               packed pubkey */
+    uint8_t const *packed_pubkey,         /*  in - pointer to the
+                                                               packed pubkey */
+    uint8_t pubkey_pack_type,             /* out - pubkey packing
+                                                               type */
+    uint8_t *pubkey_blob)                 /* out - addr for the
+                                                               pubkey blob */
+{
+
+	switch (pubkey_pack_type) {
+	case NTRU_ENCRYPT_KEY_PACKED_COEFFICIENTS:
+		*pubkey_blob++ = NTRU_ENCRYPT_PUBKEY_TAG;
+		*pubkey_blob++ = (uint8_t) sizeof(params->OID);
+		memcpy(pubkey_blob, params->OID, sizeof(params->OID));
+		pubkey_blob += sizeof(params->OID);
+		memcpy(pubkey_blob, packed_pubkey, packed_pubkey_len);
+		break;
+
+	default:
+		NTRU_RET(NTRU_BAD_PARAMETER);
+	}
+
+	NTRU_RET(NTRU_OK);
+}
+
+/* ntru_crypto_ntru_encrypt_key_create_privkey_blob
+ *
+ * Returns a private key blob, packed according to the packing type provided.
+ */
+
+uint32_t
+ntru_crypto_ntru_encrypt_key_create_privkey_blob(
+    NTRU_ENCRYPT_PARAM_SET const *params, /*  in - pointer to
+                                                               param set
+                                                               parameters */
+    uint16_t const *pubkey,               /*  in - pointer to the
+                                                               coefficients
+                                                               of the pubkey */
+    uint16_t const *privkey,              /*  in - pointer to the
+                                                               indices of the
+                                                               privkey */
+    uint8_t privkey_pack_type,            /*  in - privkey packing
+                                                               type */
+    uint8_t *buf,                         /*  in - temp, N bytes */
+    uint8_t *privkey_blob)                /* out - addr for the
+                                                               privkey blob */
+{
+	switch (privkey_pack_type) {
+	case NTRU_ENCRYPT_KEY_PACKED_TRITS:
+	case NTRU_ENCRYPT_KEY_PACKED_INDICES:
+
+		/* format header and packed public key */
+
+		*privkey_blob++ = NTRU_ENCRYPT_PRIVKEY_DEFAULT_TAG;
+		*privkey_blob++ = (uint8_t) sizeof(params->OID);
+		memcpy(privkey_blob, params->OID, sizeof(params->OID));
+		privkey_blob += sizeof(params->OID);
+		ntru_elements_2_octets(params->N, pubkey, params->q_bits,
+		                       privkey_blob);
+		privkey_blob += (params->N * params->q_bits + 7) >> 3;
+
+		/* add packed private key */
+
+		if (privkey_pack_type == NTRU_ENCRYPT_KEY_PACKED_TRITS) {
+			ntru_indices_2_packed_trits(privkey, (uint16_t) params->dF_r,
+			                            (uint16_t) params->dF_r,
+			                            params->N, buf, privkey_blob);
+		} else {
+			uint32_t dF;
+
+			if (params->is_product_form) {
+				dF = (params->dF_r & 0xff) +
+				     ((params->dF_r >> 8) & 0xff) +
+				     ((params->dF_r >> 16) & 0xff);
+			} else {
+				dF = params->dF_r;
+			}
+
+			ntru_elements_2_octets((uint16_t) dF << 1, privkey,
+			                       params->N_bits, privkey_blob);
+		}
+		break;
+
+	default:
+		NTRU_RET(NTRU_BAD_PARAMETER);
+	}
+
+	NTRU_RET(NTRU_OK);
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_key.h b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_key.h
new file mode 100644
index 0000000000000000000000000000000000000000..d2e3b03e3f942f249f91561ae4e1194f625b64a5
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_key.h
@@ -0,0 +1,156 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+#ifndef NTRU_CRYPTO_NTRU_ENCRYPT_KEY_H
+#define NTRU_CRYPTO_NTRU_ENCRYPT_KEY_H
+
+#include "ntru_crypto_ntru_convert.h"
+#include "ntru_crypto_ntru_encrypt_param_sets.h"
+
+/* key-blob definitions */
+
+#define NTRU_ENCRYPT_PUBKEY_TAG 0x01
+#define NTRU_ENCRYPT_PRIVKEY_DEFAULT_TAG 0x02
+#define NTRU_ENCRYPT_PRIVKEY_TRITS_TAG 0xfe
+#define NTRU_ENCRYPT_PRIVKEY_INDICES_TAG 0xff
+
+/* packing types */
+
+#define NTRU_ENCRYPT_KEY_PACKED_COEFFICIENTS 0x01
+#define NTRU_ENCRYPT_KEY_PACKED_INDICES 0x02
+#define NTRU_ENCRYPT_KEY_PACKED_TRITS 0x03
+
+/* function declarations */
+
+/* ntru_crypto_ntru_encrypt_key_parse
+ *
+ * Parses an NTRUEncrypt key blob.
+ * If the blob is not corrupt, returns packing types for public and private
+ * keys, a pointer to the parameter set, a pointer to the public key, and
+ * a pointer to the private key if it exists.
+ *
+ * Returns TRUE if successful.
+ * Returns FALSE if the blob is invalid.
+ */
+
+extern bool
+ntru_crypto_ntru_encrypt_key_parse(
+    bool pubkey_parse,               /*  in - if parsing pubkey
+                                                         blob */
+    uint16_t key_blob_len,           /*  in - no. octets in key
+                                                         blob */
+    uint8_t const *key_blob,         /*  in - pointer to key blob */
+    uint8_t *pubkey_pack_type,       /* out - addr for pubkey
+                                                         packing type */
+    uint8_t *privkey_pack_type,      /* out - addr for privkey
+                                                         packing type */
+    NTRU_ENCRYPT_PARAM_SET **params, /* out - addr for ptr to
+                                                         parameter set */
+    uint8_t const **pubkey,          /* out - addr for ptr to
+                                                         packed pubkey */
+    uint8_t const **privkey);        /* out - addr for ptr to
+                                                         packed privkey */
+
+/* ntru_crypto_ntru_encrypt_key_get_blob_params
+ *
+ * Returns public and private key packing types and blob lengths given
+ * a packing format.  For now, only a default packing format exists.
+ *
+ * Only public-key params may be returned by setting privkey_pack_type
+ * and privkey_blob_len to NULL.
+ */
+
+extern void
+ntru_crypto_ntru_encrypt_key_get_blob_params(
+    NTRU_ENCRYPT_PARAM_SET const *params, /*  in - pointer to
+                                                               param set
+                                                               parameters */
+    uint8_t *pubkey_pack_type,            /* out - addr for pubkey
+                                                               packing type */
+    uint16_t *pubkey_blob_len,            /* out - addr for no. of
+                                                               bytes in
+                                                               pubkey blob */
+    uint8_t *privkey_pack_type,           /* out - addr for privkey
+                                                               packing type */
+    uint16_t *privkey_blob_len);          /* out - addr for no. of
+                                                               bytes in
+                                                               privkey blob */
+
+/* ntru_crypto_ntru_encrypt_key_create_pubkey_blob
+ *
+ * Returns a public key blob, packed according to the packing type provided.
+ */
+
+extern uint32_t
+ntru_crypto_ntru_encrypt_key_create_pubkey_blob(
+    NTRU_ENCRYPT_PARAM_SET const *params, /*  in - pointer to
+                                                               param set
+                                                               parameters */
+    uint16_t const *pubkey,               /*  in - pointer to the
+                                                               coefficients
+                                                               of the pubkey */
+    uint8_t pubkey_pack_type,             /* out - addr for pubkey
+                                                               packing type */
+    uint8_t *pubkey_blob);                /* out - addr for the
+                                                               pubkey blob */
+
+/* ntru_crypto_ntru_encrypt_key_recreate_pubkey_blob
+ *
+ * Returns a public key blob, recreated from an already-packed public key.
+ */
+
+extern uint32_t
+ntru_crypto_ntru_encrypt_key_recreate_pubkey_blob(
+    NTRU_ENCRYPT_PARAM_SET const *params, /*  in - pointer to
+                                                               param set
+                                                               parameters */
+    uint16_t packed_pubkey_len,           /*  in - no. octets in
+                                                               packed pubkey */
+    uint8_t const *packed_pubkey,         /*  in - pointer to the
+                                                               packed pubkey */
+    uint8_t pubkey_pack_type,             /* out - pubkey packing
+                                                               type */
+    uint8_t *pubkey_blob);                /* out - addr for the
+                                                               pubkey blob */
+
+/* ntru_crypto_ntru_encrypt_key_create_privkey_blob
+ *
+ * Returns a privlic key blob, packed according to the packing type provided.
+ */
+
+extern uint32_t
+ntru_crypto_ntru_encrypt_key_create_privkey_blob(
+    NTRU_ENCRYPT_PARAM_SET const *params, /*  in - pointer to
+                                                               param set
+                                                               parameters */
+    uint16_t const *pubkey,               /*  in - pointer to the
+                                                               coefficients
+                                                               of the pubkey */
+    uint16_t const *privkey,              /*  in - pointer to the
+                                                               indices of the
+                                                               privkey */
+    uint8_t privkey_pack_type,            /*  in - privkey packing
+                                                               type */
+    uint8_t *buf,                         /*  in - temp, N bytes */
+    uint8_t *privkey_blob);               /* out - addr for the
+                                                               privkey blob */
+
+#endif /* NTRU_CRYPTO_NTRU_ENCRYPT_KEY_H */
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_param_sets.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_param_sets.c
new file mode 100644
index 0000000000000000000000000000000000000000..389e7a21a2c3714c270c54a130f7c4cb133cf901
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_param_sets.c
@@ -0,0 +1,577 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_ntru_encrypt_param_sets.c
+ *
+ * Contents: Defines the NTRUEncrypt parameter sets.
+ *
+ *****************************************************************************/
+
+#include "ntru_crypto.h"
+#include "ntru_crypto_ntru_encrypt_param_sets.h"
+
+/* parameter sets */
+
+static NTRU_ENCRYPT_PARAM_SET ntruParamSets[] = {
+
+    {
+        NTRU_EES401EP1,              /* parameter-set id */
+        "ees401ep1",                 /* human readable param set name */
+        {0x00, 0x02, 0x04},          /* OID */
+        0x22,                        /* DER id */
+        9,                           /* no. of bits in N (i.e., in an index) */
+        401,                         /* N */
+        14,                          /* security strength in octets */
+        14,                          /* no. of octets for random string b */
+        2048,                        /* q */
+        11,                          /* no. of bits in q (i.e., in a coeff) */
+        FALSE,                       /* product form */
+        113,                         /* df, dr */
+        133,                         /* dg */
+        60,                          /* maxMsgLenBytes */
+        113,                         /* dm0 */
+        2005,                        /* 2^c - (2^c mod N) */
+        11,                          /* c */
+        1,                           /* lLen */
+        41,                          /* min. no. of hash calls for IGF-2 */
+        7,                           /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA1, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES449EP1,              /* parameter-set id */
+        "ees449ep1",                 /* human readable param set name */
+        {0x00, 0x03, 0x03},          /* OID */
+        0x23,                        /* DER id */
+        9,                           /* no. of bits in N (i.e., in an index) */
+        449,                         /* N */
+        16,                          /* security strength in octets */
+        16,                          /* no. of octets for random string b */
+        2048,                        /* q */
+        11,                          /* no. of bits in q (i.e., in a coeff) */
+        FALSE,                       /* product form */
+        134,                         /* df, dr */
+        149,                         /* dg */
+        67,                          /* maxMsgLenBytes */
+        134,                         /* dm0 */
+        449,                         /* 2^c - (2^c mod N) */
+        9,                           /* c */
+        1,                           /* lLen */
+        47,                          /* min. no. of hash calls for IGF-2 */
+        8,                           /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA1, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES677EP1,                /* parameter-set id */
+        "ees677ep1",                   /* human readable param set name */
+        {0x00, 0x05, 0x03},            /* OID */
+        0x24,                          /* DER id */
+        10,                            /* no. of bits in N (i.e., in an index) */
+        677,                           /* N */
+        24,                            /* security strength in octets */
+        24,                            /* no. of octets for random string b */
+        2048,                          /* q */
+        11,                            /* no. of bits in q (i.e., in a coeff) */
+        FALSE,                         /* product form */
+        157,                           /* df, dr */
+        225,                           /* dg */
+        101,                           /* maxMsgLenBytes */
+        157,                           /* dm0 */
+        2031,                          /* 2^c - (2^c mod N) */
+        11,                            /* c */
+        1,                             /* lLen */
+        32,                            /* min. no. of hash calls for IGF-2 */
+        8,                             /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES1087EP2,               /* parameter-set id */
+        "ees1087ep2",                  /* human readable param set name */
+        {0x00, 0x06, 0x03},            /* OID */
+        0x25,                          /* DER id */
+        11,                            /* no. of bits in N (i.e., in an index) */
+        1087,                          /* N */
+        32,                            /* security strength in octets */
+        32,                            /* no. of octets for random string b */
+        2048,                          /* q */
+        11,                            /* no. of bits in q (i.e., in a coeff) */
+        FALSE,                         /* product form */
+        120,                           /* df, dr */
+        362,                           /* dg */
+        170,                           /* maxMsgLenBytes */
+        120,                           /* dm0 */
+        7609,                          /* 2^c - (2^c mod N) */
+        13,                            /* c */
+        1,                             /* lLen */
+        27,                            /* min. no. of hash calls for IGF-2 */
+        11,                            /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES541EP1,              /* parameter-set id */
+        "ees541ep1",                 /* human readable param set name */
+        {0x00, 0x02, 0x05},          /* OID */
+        0x26,                        /* DER id */
+        10,                          /* no. of bits in N (i.e., in an index) */
+        541,                         /* N */
+        14,                          /* security strength in octets */
+        14,                          /* no. of octets for random string b */
+        2048,                        /* q */
+        11,                          /* no. of bits in q (i.e., in a coeff) */
+        FALSE,                       /* product form */
+        49,                          /* df, dr */
+        180,                         /* dg */
+        86,                          /* maxMsgLenBytes */
+        49,                          /* dm0 */
+        3787,                        /* 2^c - (2^c mod N) */
+        12,                          /* c */
+        1,                           /* lLen */
+        16,                          /* min. no. of hash calls for IGF-2 */
+        9,                           /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA1, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES613EP1,              /* parameter-set id */
+        "ees613ep1",                 /* human readable param set name */
+        {0x00, 0x03, 0x04},          /* OID */
+        0x27,                        /* DER id */
+        10,                          /* no. of bits in N (i.e., in an index) */
+        613,                         /* N */
+        16,                          /* securuity strength in octets */
+        16,                          /* no. of octets for random string b */
+        2048,                        /* q */
+        11,                          /* no. of bits in q (i.e., in a coeff) */
+        FALSE,                       /* product form */
+        55,                          /* df, dr */
+        204,                         /* dg */
+        97,                          /* maxMsgLenBytes */
+        55,                          /* dm0 */
+        1839,                        /* 2^c - (2^c mod N) */
+        11,                          /* c */
+        1,                           /* lLen */
+        18,                          /* min. no. of hash calls for IGF-2 */
+        10,                          /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA1, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES887EP1,                /* parameter-set id */
+        "ees887ep1",                   /* human readable param set name */
+        {0x00, 0x05, 0x04},            /* OID */
+        0x28,                          /* DER id */
+        10,                            /* no. of bits in N (i.e., in an index) */
+        887,                           /* N */
+        24,                            /* security strength in octets */
+        24,                            /* no. of octets for random string b */
+        2048,                          /* q */
+        11,                            /* no. of bits in q (i.e., in a coeff) */
+        FALSE,                         /* product form */
+        81,                            /* df, dr */
+        295,                           /* dg */
+        141,                           /* maxMsgLenBytes */
+        81,                            /* dm0 */
+        887,                           /* 2^c - (2^c mod N) */
+        10,                            /* c */
+        1,                             /* lLen */
+        16,                            /* min. no. of hash calls for IGF-2 */
+        9,                             /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES1171EP1,               /* parameter-set id */
+        "ees1171ep1",                  /* human readable param set name */
+        {0x00, 0x06, 0x04},            /* OID */
+        0x29,                          /* DER id */
+        11,                            /* no. of bits in N (i.e., in an index) */
+        1171,                          /* N */
+        32,                            /* security strength in octets */
+        32,                            /* no. of octets for random string b */
+        2048,                          /* q */
+        11,                            /* no. of bits in q (i.e., in a coeff) */
+        FALSE,                         /* product form */
+        106,                           /* df, dr */
+        390,                           /* dg */
+        186,                           /* maxMsgLenBytes */
+        106,                           /* dm0 */
+        3513,                          /* 2^c - (2^c mod N) */
+        12,                            /* c */
+        1,                             /* lLen */
+        25,                            /* min. no. of hash calls for IGF-2 */
+        12,                            /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES659EP1,              /* parameter-set id */
+        "ees659ep1",                 /* human readable param set name */
+        {0x00, 0x02, 0x06},          /* OID */
+        0x2a,                        /* DER id */
+        10,                          /* no. of bits in N (i.e., in an index) */
+        659,                         /* N */
+        14,                          /* security strength in octets */
+        14,                          /* no. of octets for random string b */
+        2048,                        /* q */
+        11,                          /* no. of bits in q (i.e., in a coeff) */
+        FALSE,                       /* product form */
+        38,                          /* df, dr */
+        219,                         /* dg */
+        108,                         /* maxMsgLenBytes */
+        38,                          /* dm0 */
+        1977,                        /* 2^c - (2^c mod N) */
+        11,                          /* c */
+        1,                           /* lLen */
+        11,                          /* min. no. of hash calls for IGF-2 */
+        10,                          /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA1, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES761EP1,              /* parameter-set id */
+        "ees761ep1",                 /* human readable param set name */
+        {0x00, 0x03, 0x05},          /* OID */
+        0x2b,                        /* DER id */
+        10,                          /* no. of bits in N (i.e., in an index) */
+        761,                         /* N */
+        16,                          /* security strength in octets */
+        16,                          /* no. of octets for random string b */
+        2048,                        /* q */
+        11,                          /* no. of bits in q (i.e., in a coeff) */
+        FALSE,                       /* product form */
+        42,                          /* df, dr */
+        253,                         /* dg */
+        125,                         /* maxMsgLenBytes */
+        42,                          /* dm0 */
+        3805,                        /* 2^c - (2^c mod N) */
+        12,                          /* c */
+        1,                           /* lLen */
+        14,                          /* min. no. of hash calls for IGF-2 */
+        12,                          /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA1, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES1087EP1,               /* parameter-set id */
+        "ees1087ep1",                  /* human readable param set name */
+        {0x00, 0x05, 0x05},            /* OID */
+        0x2c,                          /* DER id */
+        11,                            /* no. of bits in N (i.e., in an index) */
+        1087,                          /* N */
+        24,                            /* security strength in octets */
+        24,                            /* no. of octets for random string b */
+        2048,                          /* q */
+        11,                            /* no. of bits in q (i.e., in a coeff) */
+        FALSE,                         /* product form */
+        63,                            /* df, dr */
+        362,                           /* dg */
+        178,                           /* maxMsgLenBytes */
+        63,                            /* dm0 */
+        7609,                          /* 2^c - (2^c mod N) */
+        13,                            /* c */
+        1,                             /* lLen */
+        14,                            /* min. no. of hash calls for IGF-2 */
+        11,                            /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES1499EP1,               /* parameter-set id */
+        "ees1499ep1",                  /* human readable param set name */
+        {0x00, 0x06, 0x05},            /* OID */
+        0x2d,                          /* DER id */
+        11,                            /* no. of bits in N (i.e., in an index) */
+        1499,                          /* N */
+        32,                            /* security strength in octets */
+        32,                            /* no. of octets for random string b */
+        2048,                          /* q */
+        11,                            /* no. of bits in q (i.e., in a coeff) */
+        FALSE,                         /* product form */
+        79,                            /* df, dr */
+        499,                           /* dg */
+        247,                           /* maxMsgLenBytes */
+        79,                            /* dm0 */
+        7495,                          /* 2^c - (2^c mod N) */
+        13,                            /* c */
+        1,                             /* lLen */
+        18,                            /* min. no. of hash calls for IGF-2 */
+        14,                            /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES401EP2,              /* parameter-set id */
+        "ees401ep2",                 /* human readable param set name */
+        {0x00, 0x02, 0x10},          /* OID */
+        0x2e,                        /* DER id */
+        9,                           /* no. of bits in N (i.e., in an index) */
+        401,                         /* N */
+        14,                          /* security strength in octets */
+        14,                          /* no. of octets for random string b */
+        2048,                        /* q */
+        11,                          /* no. of bits in q (i.e., in a coeff) */
+        TRUE,                        /* product form */
+        8 + (8 << 8) + (6 << 16),    /* df, dr */
+        133,                         /* dg */
+        60,                          /* maxMsgLenBytes */
+        101,                         /* dm0 */
+        2005,                        /* 2^c - (2^c mod N) */
+        11,                          /* c */
+        1,                           /* lLen */
+        7,                           /* min. no. of hash calls for IGF-2 */
+        7,                           /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA1, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES439EP1,              /* parameter-set id */
+        "ees439ep1",                 /* human readable param set name */
+        {0x00, 0x03, 0x10},          /* OID */
+        0x2f,                        /* DER id */
+        9,                           /* no. of bits in N (i.e., in an index) */
+        439,                         /* N */
+        16,                          /* security strength in octets */
+        16,                          /* no. of octets for random string b */
+        2048,                        /* q */
+        11,                          /* no. of bits in q (i.e., in a coeff) */
+        TRUE,                        /* product form */
+        9 + (8 << 8) + (5 << 16),    /* df, dr */
+        146,                         /* dg */
+        65,                          /* maxMsgLenBytes */
+        112,                         /* dm0 */
+        439,                         /* 2^c - (2^c mod N) */
+        9,                           /* c */
+        1,                           /* lLen */
+        8,                           /* min. no. of hash calls for IGF-2 */
+        8,                           /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA1, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES593EP1,                /* parameter-set id */
+        "ees593ep1",                   /* human readable param set name */
+        {0x00, 0x05, 0x10},            /* OID */
+        0x30,                          /* DER id */
+        10,                            /* no. of bits in N (i.e., in an index) */
+        593,                           /* N */
+        24,                            /* security strength in octets */
+        24,                            /* no. of octets for random string b */
+        2048,                          /* q */
+        11,                            /* no. of bits in q (i.e., in a coeff) */
+        TRUE,                          /* product form */
+        10 + (10 << 8) + (8 << 16),    /* df, dr */
+        197,                           /* dg */
+        86,                            /* maxMsgLenBytes */
+        158,                           /* dm0 */
+        1779,                          /* 2^c - (2^c mod N) */
+        11,                            /* c */
+        1,                             /* lLen */
+        9,                             /* min. no. of hash calls for IGF-2 */
+        7,                             /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES743EP1,                /* parameter-set id */
+        "ees743ep1",                   /* human readable param set name */
+        {0x00, 0x06, 0x10},            /* OID */
+        0x31,                          /* DER id */
+        10,                            /* no. of bits in N (i.e., in an index) */
+        743,                           /* N */
+        32,                            /* security strength in octets */
+        32,                            /* no. of octets for random string b */
+        2048,                          /* q */
+        11,                            /* no. of bits in q (i.e., in a coeff) */
+        TRUE,                          /* product form */
+        11 + (11 << 8) + (15 << 16),   /* df, dr */
+        247,                           /* dg */
+        106,                           /* maxMsgLenBytes */
+        204,                           /* dm0 */
+        8173,                          /* 2^c - (2^c mod N) */
+        13,                            /* c */
+        1,                             /* lLen */
+        9,                             /* min. no. of hash calls for IGF-2 */
+        9,                             /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES443EP1,                /* parameter-set id */
+        "ees443ep1",                   /* human readable param set name */
+        {0x00, 0x03, 0x11},            /* OID */
+        0x32,                          /* DER id */
+        9,                             /* no. of bits in N (i.e., in an index) */
+        443,                           /* N */
+        16,                            /* security strength in octets */
+        32,                            /* no. of octets for random string b */
+        2048,                          /* q */
+        11,                            /* no. of bits in q (i.e., in a coeff) */
+        TRUE,                          /* product form */
+        9 + (8 << 8) + (5 << 16),      /* df, dr */
+        148,                           /* dg */
+        49,                            /* maxMsgLenBytes */
+        115,                           /* dm0 */
+        443,                           /* 2^c - (2^c mod N) */
+        9,                             /* c */
+        1,                             /* lLen */
+        5,                             /* min. no. of hash calls for IGF-2 */
+        5,                             /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1,
+                                        HMAC-DRBG, etc. */
+    },
+
+    {
+        NTRU_EES587EP1,                /* parameter-set id */
+        "ees587ep1",                   /* human readable param set name */
+        {0x00, 0x05, 0x11},            /* OID */
+        0x33,                          /* DER id */
+        10,                            /* no. of bits in N (i.e., in an index) */
+        587,                           /* N */
+        24,                            /* security strength in octets */
+        32,                            /* no. of octets for random string b  */
+        2048,                          /* q */
+        11,                            /* no. of bits in q (i.e., in a coeff) */
+        TRUE,                          /* product form */
+        10 + (10 << 8) + (8 << 16),    /* df, dr */
+        196,                           /* dg */
+        76,                            /* maxMsgLenBytes */
+        157,                           /* dm0 */
+        1761,                          /* 2^c - (2^c mod N) */
+        11,                            /* c */
+        1,                             /* lLen */
+        7,                             /* min. no. of hash calls for IGF-2 */
+        7,                             /* min. no. of hash calls for MGF-TP-1 */
+        NTRU_CRYPTO_HASH_ALGID_SHA256, /* hash function for MGF-TP-1,
+                                           HMAC-DRBG, etc. */
+    },
+};
+
+static size_t numParamSets =
+    sizeof(ntruParamSets) / sizeof(NTRU_ENCRYPT_PARAM_SET);
+
+/* functions */
+
+/* ntru_encrypt_get_params_with_id
+ *
+ * Looks up a set of NTRUEncrypt parameters based on the id of the
+ * parameter set.
+ *
+ * Returns a pointer to the parameter set parameters if successful.
+ * Returns NULL if the parameter set cannot be found.
+ */
+
+NTRU_ENCRYPT_PARAM_SET *
+ntru_encrypt_get_params_with_id(
+    NTRU_ENCRYPT_PARAM_SET_ID id) /*  in - parameter-set id */
+{
+	size_t i;
+
+	for (i = 0; i < numParamSets; i++) {
+		if (ntruParamSets[i].id == id) {
+			return &(ntruParamSets[i]);
+		}
+	}
+
+	return NULL;
+}
+
+/* ntru_encrypt_get_params_with_OID
+ *
+ * Looks up a set of NTRUEncrypt parameters based on the OID of the
+ * parameter set.
+ *
+ * Returns a pointer to the parameter set parameters if successful.
+ * Returns NULL if the parameter set cannot be found.
+ */
+
+NTRU_ENCRYPT_PARAM_SET *
+ntru_encrypt_get_params_with_OID(
+    uint8_t const *oid) /*  in - pointer to parameter-set OID */
+{
+	size_t i;
+
+	for (i = 0; i < numParamSets; i++) {
+		if (!memcmp(ntruParamSets[i].OID, oid, 3)) {
+			return &(ntruParamSets[i]);
+		}
+	}
+
+	return NULL;
+}
+
+/* ntru_encrypt_get_params_with_DER_id
+ *
+ * Looks up a set of NTRUEncrypt parameters based on the DER id of the
+ * parameter set.
+ *
+ * Returns a pointer to the parameter set parameters if successful.
+ * Returns NULL if the parameter set cannot be found.
+ */
+
+NTRU_ENCRYPT_PARAM_SET *
+ntru_encrypt_get_params_with_DER_id(
+    uint8_t der_id) /*  in - parameter-set DER id */
+{
+	size_t i;
+
+	for (i = 0; i < numParamSets; i++) {
+		if (ntruParamSets[i].der_id == der_id) {
+			return &(ntruParamSets[i]);
+		}
+	}
+	return NULL;
+}
+
+const char *
+ntru_encrypt_get_param_set_name(
+    NTRU_ENCRYPT_PARAM_SET_ID id) /*  in - parameter-set id */
+{
+	size_t i;
+
+	for (i = 0; i < numParamSets; i++) {
+		if (ntruParamSets[i].id == id) {
+			return ntruParamSets[i].name;
+		}
+	}
+
+	return NULL;
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_param_sets.h b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_param_sets.h
new file mode 100644
index 0000000000000000000000000000000000000000..780f0ed262d311d15dbfbe64a8906e0b6edc37c6
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_encrypt_param_sets.h
@@ -0,0 +1,119 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_ntru_encrypt_param_sets.h
+ *
+ * Contents: Definitions and declarations for the NTRUEncrypt parameter sets.
+ *
+ *****************************************************************************/
+
+#ifndef NTRU_CRYPTO_NTRU_ENCRYPT_PARAM_SETS_H
+#define NTRU_CRYPTO_NTRU_ENCRYPT_PARAM_SETS_H
+
+#include "ntru_crypto.h"
+#include "ntru_crypto_hash_basics.h"
+
+/* structures */
+
+typedef struct _NTRU_ENCRYPT_PARAM_SET {
+	NTRU_ENCRYPT_PARAM_SET_ID id;      /* parameter-set ID */
+	const char *name;                  /* human readable param set name */
+	uint8_t const OID[3];              /* pointer to OID */
+	uint8_t der_id;                    /* parameter-set DER id */
+	uint8_t N_bits;                    /* no. of bits in N (i.e. in
+                                                     an index */
+	uint16_t N;                        /* ring dimension */
+	uint16_t sec_strength_len;         /* no. of octets of
+                                                     security strength */
+	uint16_t b_len;                    /* no. of octets for random
+                                                     string b */
+	uint16_t q;                        /* big modulus */
+	uint8_t q_bits;                    /* no. of bits in q (i.e. in
+                                                     a coefficient */
+	bool is_product_form;              /* if product form used */
+	uint32_t dF_r;                     /* no. of 1 or -1 coefficients
+                                                     in ring elements F, r */
+	uint16_t dg;                       /* no. - 1 of 1 coefficients
+                                                     or no. of -1 coefficients
+                                                     in ring element g */
+	uint16_t m_len_max;                /* max no. of plaintext
+                                                     octets */
+	uint16_t min_msg_rep_wt;           /* min. message
+                                                     representative weight */
+	uint16_t no_bias_limit;            /* limit for no bias in
+                                                     IGF-2 */
+	uint8_t c_bits;                    /* no. bits in candidate for
+                                                     deriving an index in
+                                                     IGF-2 */
+	uint8_t m_len_len;                 /* no. of octets to hold
+                                                     mLenOctets */
+	uint8_t min_IGF_hash_calls;        /* min. no. of hash calls for
+                                                     IGF-2 */
+	uint8_t min_MGF_hash_calls;        /* min. no. of hash calls for
+                                                     MGF-TP-1 */
+	NTRU_CRYPTO_HASH_ALGID hash_algid; /* hash function for MGF-TP-1,
+                                                     HMAC-DRBG, etc. */
+} NTRU_ENCRYPT_PARAM_SET;
+
+/* function declarations */
+
+/* ntru_encrypt_get_params_with_id
+ *
+ * Looks up a set of NTRU Encrypt parameters based on the id of the
+ * parameter set.
+ *
+ * Returns a pointer to the parameter set parameters if successful.
+ * Returns NULL if the parameter set cannot be found.
+ */
+
+extern NTRU_ENCRYPT_PARAM_SET *
+ntru_encrypt_get_params_with_id(
+    NTRU_ENCRYPT_PARAM_SET_ID id); /*  in - parameter-set id */
+
+/* ntru_encrypt_get_params_with_OID
+ *
+ * Looks up a set of NTRU Encrypt parameters based on the OID of the
+ * parameter set.
+ *
+ * Returns a pointer to the parameter set parameters if successful.
+ * Returns NULL if the parameter set cannot be found.
+ */
+
+extern NTRU_ENCRYPT_PARAM_SET *
+ntru_encrypt_get_params_with_OID(
+    uint8_t const *oid); /*  in - pointer to parameter-set OID */
+
+/* ntru_encrypt_get_params_with_DER_id
+ *
+ * Looks up a set of NTRUEncrypt parameters based on the DER id of the
+ * parameter set.
+ *
+ * Returns a pointer to the parameter set parameters if successful.
+ * Returns NULL if the parameter set cannot be found.
+ */
+
+extern NTRU_ENCRYPT_PARAM_SET *
+ntru_encrypt_get_params_with_DER_id(
+    uint8_t der_id); /*  in - parameter-set DER id */
+
+#endif /* NTRU_CRYPTO_NTRU_ENCRYPT_PARAM_SETS_H */
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mgf1.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mgf1.c
new file mode 100644
index 0000000000000000000000000000000000000000..4660ac5b7c61f91e48f76374f0bf22f6ec850ccc
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mgf1.c
@@ -0,0 +1,193 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_ntru_mgf1.c
+ *
+ * Contents: Routines implementing MGF-TP-1 and MGF-1.
+ *
+ *****************************************************************************/
+
+#include "ntru_crypto.h"
+#include "ntru_crypto_ntru_mgf1.h"
+#include "ntru_crypto_ntru_convert.h"
+
+/* ntru_mgf1
+ *
+ * Implements a basic mask-generation function, generating an arbitrary
+ * number of octets based on hashing a digest-length string concatenated
+ * with a 4-octet counter.
+ *
+ * The state (string and counter) is initialized when a seed is present.
+ *
+ * Returns NTRU_OK if successful.
+ * Returns NTRU_CRYPTO_HASH_ errors if they occur.
+ *
+ */
+
+uint32_t
+ntru_mgf1(
+    uint8_t *state,               /* in/out - pointer to the state */
+    NTRU_CRYPTO_HASH_ALGID algid, /*     in - hash algorithm ID */
+    uint8_t md_len,               /*     in - no. of octets in digest */
+    uint8_t num_calls,            /*     in - no. of hash calls */
+    uint16_t seed_len,            /*     in - no. of octets in seed */
+    uint8_t const *seed,          /*     in - pointer to seed */
+    uint8_t *out)                 /*    out - address for output */
+{
+	uint8_t *ctr = state + md_len;
+	uint32_t retcode;
+
+	/* if seed present, init state */
+
+	if (seed) {
+		if ((retcode = ntru_crypto_hash_digest(algid, seed, seed_len, state)) !=
+		    NTRU_CRYPTO_HASH_OK) {
+			return retcode;
+		}
+
+		memset(ctr, 0, 4);
+	}
+
+	/* generate output */
+
+	while (num_calls-- > 0) {
+		if ((retcode = ntru_crypto_hash_digest(algid, state, md_len + 4,
+		                                       out)) != NTRU_CRYPTO_HASH_OK) {
+			return retcode;
+		}
+
+		out += md_len;
+
+		/* increment counter */
+
+		if (++ctr[3] == 0) {
+			if (++ctr[2] == 0) {
+				if (++ctr[1] == 0) {
+					++ctr[0];
+				}
+			}
+		}
+	}
+
+	NTRU_RET(NTRU_OK);
+}
+
+/* ntru_mgftp1
+ *
+ * Implements a mask-generation function for trinary polynomials,
+ * MGF-TP-1, generating an arbitrary number of octets based on hashing
+ * a digest-length string concatenated with a 4-octet counter.  From
+ * these octets, N trits are derived.
+ *
+ * The state (string and counter) is initialized when a seed is present.
+ *
+ * Returns NTRU_OK if successful.
+ * Returns NTRU_CRYPTO_HASH_ errors if they occur.
+ *
+ */
+
+uint32_t
+ntru_mgftp1(
+    NTRU_CRYPTO_HASH_ALGID hash_algid, /*  in - hash alg ID for
+                                                       MGF-TP-1 */
+    uint8_t md_len,                    /*  in - no. of octets in
+                                                       digest */
+    uint8_t min_calls,                 /*  in - minimum no. of hash
+                                                       calls */
+    uint16_t seed_len,                 /*  in - no. of octets in seed */
+    uint8_t *seed,                     /*  in - pointer to seed */
+    uint8_t *buf,                      /*  in - pointer to working
+                                                       buffer */
+    uint16_t num_trits_needed,         /*  in - no. of trits in mask */
+    uint8_t *mask)                     /* out - address for mask trits */
+{
+	uint8_t *mgf_out;
+	uint8_t *octets;
+	uint16_t octets_available;
+	uint32_t retcode;
+
+	/* generate minimum MGF1 output */
+
+	mgf_out = buf + md_len + 4;
+	if ((retcode = ntru_mgf1(buf, hash_algid, md_len, min_calls,
+	                         seed_len, seed, mgf_out)) != NTRU_OK) {
+		return retcode;
+	}
+
+	octets = mgf_out;
+	octets_available = min_calls * md_len;
+
+	/* get trits for mask */
+
+	while (num_trits_needed >= 5) {
+		/* get another octet and convert it to 5 trits */
+
+		if (octets_available == 0) {
+			if ((retcode = ntru_mgf1(buf, hash_algid, md_len, 1,
+			                         0, NULL, mgf_out)) != NTRU_OK) {
+				return retcode;
+			}
+
+			octets = mgf_out;
+			octets_available = md_len;
+		}
+
+		if (*octets < 243) {
+			ntru_octet_2_trits(*octets, mask);
+			mask += 5;
+			num_trits_needed -= 5;
+		}
+
+		octets++;
+		--octets_available;
+	}
+
+	/* get any remaining trits */
+
+	while (num_trits_needed) {
+		uint8_t trits[5];
+
+		/* get another octet and convert it to remaining trits */
+
+		if (octets_available == 0) {
+			if ((retcode = ntru_mgf1(buf, hash_algid, md_len, 1,
+			                         0, NULL, mgf_out)) != NTRU_OK) {
+				return retcode;
+			}
+
+			octets = mgf_out;
+			octets_available = md_len;
+		}
+
+		if (*octets < 243) {
+			ntru_octet_2_trits(*octets, trits);
+			memcpy(mask, trits, num_trits_needed);
+			num_trits_needed = 0;
+		} else {
+			octets++;
+			--octets_available;
+		}
+	}
+
+	NTRU_RET(NTRU_OK);
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mgf1.h b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mgf1.h
new file mode 100644
index 0000000000000000000000000000000000000000..546d4bf3e5eccd2760a6b367fc8d46c589a82850
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mgf1.h
@@ -0,0 +1,90 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File:  ntru_crypto_ntru_mgf1.h
+ *
+ * Contents: Public header file for MGF-1 in the NTRU algorithm.
+ *
+ *****************************************************************************/
+
+#ifndef NTRU_CRYPTO_NTRU_MGF1_H
+#define NTRU_CRYPTO_NTRU_MGF1_H
+
+#include "ntru_crypto.h"
+#include "ntru_crypto_hash.h"
+
+/* function declarations */
+
+/* ntru_mgf1
+ *
+ * Implements a basic mask-generation function, generating an arbitrary
+ * number of octets based on hashing a digest-length string concatenated
+ * with a 4-octet counter.
+ *
+ * The state (string and counter) is initialized when a seed is present.
+ *
+ * Returns NTRU_OK if successful.
+ * Returns NTRU_CRYPTO_HASH_ errors if they occur.
+ *
+ */
+
+extern uint32_t
+ntru_mgf1(
+    uint8_t *state,               /* in/out - pointer to the state */
+    NTRU_CRYPTO_HASH_ALGID algid, /*     in - hash algorithm ID */
+    uint8_t md_len,               /*     in - no. of octets in digest */
+    uint8_t num_calls,            /*     in - no. of hash calls */
+    uint16_t seed_len,            /*     in - no. of octets in seed */
+    uint8_t const *seed,          /*     in - pointer to seed */
+    uint8_t *out);                /*    out - address for output */
+
+/* ntru_mgftp1
+ *
+ * Implements a mask-generation function for trinary polynomials,
+ * MGF-TP-1, generating an arbitrary number of octets based on hashing
+ * a digest-length string concatenated with a 4-octet counter.  From
+ * these octets, N trits are derived.
+ *
+ * The state (string and counter) is initialized when a seed is present.
+ *
+ * Returns NTRU_OK if successful.
+ * Returns NTRU_CRYPTO_HASH_ errors if they occur.
+ *
+ */
+
+extern uint32_t
+ntru_mgftp1(
+    NTRU_CRYPTO_HASH_ALGID hash_algid, /*  in - hash alg ID for
+                                                       MGF-TP-1 */
+    uint8_t md_len,                    /*  in - no. of octets in
+                                                       digest */
+    uint8_t min_calls,                 /*  in - minimum no. of hash
+                                                       calls */
+    uint16_t seed_len,                 /*  in - no. of octets in seed */
+    uint8_t *seed,                     /*  in - pointer to seed */
+    uint8_t *buf,                      /*  in - pointer to working
+                                                       buffer */
+    uint16_t num_trits_needed,         /*  in - no. of trits in mask */
+    uint8_t *mask);                    /* out - address for mask trits */
+
+#endif /* NTRU_CRYPTO_NTRU_MGF1_H */
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_coeffs_karat.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_coeffs_karat.c
new file mode 100644
index 0000000000000000000000000000000000000000..1fa8e7e519761cbbc8cbb5b302cede15178c7495
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_coeffs_karat.c
@@ -0,0 +1,137 @@
+#include "ntru_crypto.h"
+#include "ntru_crypto_ntru_poly.h"
+
+#define PAD(N) ((N + 0x000f) & 0xfff0)
+
+static void
+grade_school_mul(
+    uint16_t *res1,    /* out - a * b in Z[x], must be length 2N */
+    uint16_t const *a, /*  in - polynomial */
+    uint16_t const *b, /*  in - polynomial */
+    uint16_t const N)  /*  in - number of coefficients in a and b */
+{
+	uint16_t i;
+	uint16_t j;
+
+	for (j = 0; j < N; j++) {
+		res1[j] = a[0] * b[j];
+	}
+	for (i = 1; i < N; i++) {
+		res1[i + N - 1] = 0;
+		for (j = 0; j < N; j++) {
+			res1[i + j] += a[i] * b[j];
+		}
+	}
+	res1[2 * N - 1] = 0;
+
+	return;
+}
+
+static void
+karatsuba(
+    uint16_t *res1,    /* out - a * b in Z[x], must be length 2k */
+    uint16_t *tmp1,    /*  in - k coefficients of scratch space */
+    uint16_t const *a, /*  in - polynomial */
+    uint16_t const *b, /*  in - polynomial */
+    uint16_t const k)  /*  in - number of coefficients in a and b */
+{
+	uint16_t i;
+
+	uint16_t const p = k >> 1;
+
+	uint16_t *res2;
+	uint16_t *res3;
+	uint16_t *res4;
+	uint16_t *tmp2;
+	uint16_t const *a2;
+	uint16_t const *b2;
+
+	/* Grade school multiplication for small / odd inputs */
+	if (k <= 38 || (k & 1) != 0) {
+		grade_school_mul(res1, a, b, k);
+		return;
+	}
+
+	res2 = res1 + p;
+	res3 = res1 + k;
+	res4 = res1 + k + p;
+	tmp2 = tmp1 + p;
+	a2 = a + p;
+	b2 = b + p;
+
+	for (i = 0; i < p; i++) {
+		res1[i] = a[i] - a2[i];
+		res2[i] = b2[i] - b[i];
+	}
+
+	karatsuba(tmp1, res3, res1, res2, p);
+
+	karatsuba(res3, res1, a2, b2, p);
+
+	for (i = 0; i < p; i++) {
+		tmp1[i] += res3[i];
+	}
+
+	for (i = 0; i < p; i++) {
+		res2[i] = tmp1[i];
+		tmp2[i] += res4[i];
+		res3[i] += tmp2[i];
+	}
+
+	karatsuba(tmp1, res1, a, b, p);
+
+	for (i = 0; i < p; i++) {
+		res1[i] = tmp1[i];
+		res2[i] += tmp1[i] + tmp2[i];
+		res3[i] += tmp2[i];
+	}
+
+	return;
+}
+
+void ntru_ring_mult_coefficients_memreq(
+    uint16_t N,
+    uint16_t *tmp_polys,
+    uint16_t *poly_coeffs) {
+	if (tmp_polys) {
+		*tmp_polys = 3;
+	}
+
+	if (poly_coeffs) {
+		*poly_coeffs = PAD(N);
+	}
+}
+
+/* ntru_ring_mult_coefficients
+ *
+ * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b"
+ * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1).
+ * This is a convolution operation.
+ *
+ * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum
+ * beyond 16 bits does not matter.
+ */
+
+void ntru_ring_mult_coefficients(
+    uint16_t const *a, /*  in - pointer to polynomial a */
+    uint16_t const *b, /*  in - pointer to polynomial b */
+    uint16_t N,        /*  in - degree of (x^N - 1) */
+    uint16_t q,        /*  in - large modulus */
+    uint16_t *tmp,     /*  in - temp buffer of 3*padN elements */
+    uint16_t *c)       /* out - address for polynomial c */
+{
+	uint16_t i;
+	uint16_t q_mask = q - 1;
+
+	memset(tmp, 0, 3 * PAD(N) * sizeof(uint16_t));
+	karatsuba(tmp, tmp + 2 * PAD(N), a, b, PAD(N));
+
+	for (i = 0; i < N; i++) {
+		c[i] = (tmp[i] + tmp[i + N]) & q_mask;
+	}
+	for (; i < PAD(N); i++) {
+		c[i] = 0;
+	}
+
+	return;
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_coeffs_simd.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_coeffs_simd.c
new file mode 100644
index 0000000000000000000000000000000000000000..88c0177b7f47ded8edb47db4c492ad12f05a5e9d
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_coeffs_simd.c
@@ -0,0 +1,131 @@
+#include "ntru_crypto.h"
+#include "ntru_crypto_ntru_poly.h"
+#include <immintrin.h>
+
+#define PAD(N) ((N + 0x0007) & 0xfff8)
+
+static void
+grade_school_mul(
+    uint16_t *res1,    /* out - a * b in Z[x], must be length 2N */
+    uint16_t const *a, /*  in - polynomial */
+    uint16_t const *b, /*  in - polynomial */
+    uint16_t const N)  /*  in - number of coefficients in a and b */
+{
+	__m128i *T;
+
+	uint16_t i;
+	uint16_t j;
+	uint16_t m;
+
+	__m128i ai8;
+	__m128i ai8h;
+	__m128i ai8l;
+	__m128i abroad[8];
+
+	__m128i cur;
+	__m128i next;
+
+	__m128i x1;
+	__m128i x2;
+
+	T = (__m128i *) res1;
+	memset(T, 0, 2 * PAD(N) * sizeof(uint16_t));
+	for (i = 0; i < PAD(N) / 8; i++) {
+		/* Broadcast each of the uint16's at a[8*i] into 8
+       copies of that value in a separate __m128i. */
+		ai8 = _mm_load_si128((__m128i *) a + i);
+		ai8h = _mm_unpackhi_epi16(ai8, ai8);
+		ai8l = _mm_unpacklo_epi16(ai8, ai8);
+		abroad[0] = _mm_shuffle_epi32(ai8h, 0xFFFF);
+		abroad[1] = _mm_shuffle_epi32(ai8h, 0xAAAA);
+		abroad[2] = _mm_shuffle_epi32(ai8h, 0x5555);
+		abroad[3] = _mm_shuffle_epi32(ai8h, 0x0000);
+
+		abroad[4] = _mm_shuffle_epi32(ai8l, 0xFFFF);
+		abroad[5] = _mm_shuffle_epi32(ai8l, 0xAAAA);
+		abroad[6] = _mm_shuffle_epi32(ai8l, 0x5555);
+		abroad[7] = _mm_shuffle_epi32(ai8l, 0x0000);
+
+		/* Load a 256 bit section of b.
+       Shift it down by 2*(m+1) bytes and multiply the
+       low 128 bits by abroad[m]. Add all 8 of these
+       values to T[i+j]. */
+		cur = _mm_setzero_si128();
+		for (j = 0; j < PAD(N) / 8; j++) {
+			next = _mm_load_si128((__m128i *) b + j);
+
+			x2 = _mm_xor_si128(x2, x2);
+			for (m = 0; m < 8; m++) {
+				cur = _mm_alignr_epi8(next, cur, 2);
+				next = _mm_srli_si128(next, 2);
+
+				x1 = _mm_mullo_epi16(cur, abroad[m]);
+				x2 = _mm_add_epi16(x2, x1);
+			}
+			x2 = _mm_add_epi16(x2, _mm_load_si128(T + i + j));
+			_mm_store_si128(T + i + j, x2);
+		}
+
+		/* Handle the last N&7 coefficients from a */
+		x2 = _mm_xor_si128(x2, x2);
+		for (m = 0; m < (N & 7); m++) {
+			cur = _mm_srli_si128(cur, 2);
+
+			x1 = _mm_mullo_epi16(cur, abroad[m]);
+			x2 = _mm_add_epi16(x2, x1);
+		}
+		_mm_store_si128(T + i + j, x2);
+	}
+
+	return;
+}
+
+/* To multiply polynomials mod x^N - 1 this mult_coefficients implementation
+ * needs scratch space of size num_polys * num_coeffs * sizeof(uint16_t) */
+void ntru_ring_mult_coefficients_memreq(
+    uint16_t N,
+    uint16_t *num_polys,
+    uint16_t *num_coeffs) {
+	if (num_polys) {
+		*num_polys = 2;
+	}
+
+	if (num_coeffs) {
+		*num_coeffs = PAD(N);
+	}
+}
+
+/* ntru_ring_mult_coefficients
+ *
+ * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b"
+ * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1).
+ * This is a convolution operation.
+ *
+ * Ring element "b" has coefficients in the range [0,N).
+ *
+ * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum
+ * beyond 16 bits does not matter.
+ */
+
+void ntru_ring_mult_coefficients(
+    uint16_t const *a, /*  in - pointer to polynomial a */
+    uint16_t const *b, /*  in - pointer to polynomial b */
+    uint16_t N,        /*  in - degree of (x^N - 1) */
+    uint16_t q,        /*  in - large modulus */
+    uint16_t *tmp,     /*  in - temp buffer of 3*PAD(N) elements */
+    uint16_t *c)       /* out - address for polynomial c */
+{
+	uint16_t i;
+	uint16_t q_mask = q - 1;
+
+	grade_school_mul(tmp, a, b, N);
+
+	for (i = 0; i < N; i++) {
+		c[i] = (tmp[i] + tmp[i + N]) & q_mask;
+	}
+	for (; i < PAD(N); i++) {
+		c[i] = 0;
+	}
+
+	return;
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices.c
new file mode 100644
index 0000000000000000000000000000000000000000..f532d0172ce2927708cec764b00a58513028b573
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices.c
@@ -0,0 +1,98 @@
+#include "ntru_crypto.h"
+#include "ntru_crypto_ntru_poly.h"
+
+void ntru_ring_mult_indices_memreq(
+    uint16_t N,
+    uint16_t *tmp_polys,
+    uint16_t *poly_coeffs) {
+	if (tmp_polys) {
+		*tmp_polys = 1;
+	}
+
+	if (poly_coeffs) {
+		*poly_coeffs = N;
+	}
+}
+
+/* ntru_ring_mult_indices
+ *
+ * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b"
+ * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1).
+ * This is a convolution operation.
+ *
+ * Ring element "b" is a sparse trinary polynomial with coefficients -1, 0,
+ * and 1.  It is specified by a list, bi, of its nonzero indices containing
+ * indices for the bi_P1_len +1 coefficients followed by the indices for the
+ * bi_M1_len -1 coefficients.
+ * The indices are in the range [0,N).
+ *
+ * The result array "c" may share the same memory space as input array "a",
+ * input array "b", or temp array "t".
+ *
+ * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum
+ * beyond 16 bits does not matter.
+ */
+
+void ntru_ring_mult_indices(
+    uint16_t const *a,        /*  in - pointer to ring element a */
+    uint16_t const bi_P1_len, /*  in - no. of +1 coefficients in b */
+    uint16_t const bi_M1_len, /*  in - no. of -1 coefficients in b */
+    uint16_t const *bi,       /*  in - pointer to the list of nonzero
+                                         indices of ring element b,
+                                         containing indices for the +1
+                                         coefficients followed by the
+                                         indices for -1 coefficients */
+    uint16_t const N,         /*  in - no. of coefficients in a, b, c */
+    uint16_t const q,         /*  in - large modulus */
+    uint16_t *t,              /*  in - temp buffer of N elements */
+    uint16_t *c)              /* out - address for polynomial c */
+{
+	uint16_t mod_q_mask = q - 1;
+	uint16_t i, j, k;
+
+	/* t[(i+k)%N] = sum i=0 through N-1 of a[i], for b[k] = -1 */
+
+	for (k = 0; k < N; k++) {
+		t[k] = 0;
+	}
+
+	for (j = bi_P1_len; j < bi_P1_len + bi_M1_len; j++) {
+		k = bi[j];
+
+		for (i = 0; k < N; ++i, ++k) {
+			t[k] = t[k] + a[i];
+		}
+
+		for (k = 0; i < N; ++i, ++k) {
+			t[k] = t[k] + a[i];
+		}
+	}
+
+	/* t[(i+k)%N] = -(sum i=0 through N-1 of a[i] for b[k] = -1) */
+
+	for (k = 0; k < N; k++) {
+		t[k] = -t[k];
+	}
+
+	/* t[(i+k)%N] += sum i=0 through N-1 of a[i] for b[k] = +1 */
+
+	for (j = 0; j < bi_P1_len; j++) {
+		k = bi[j];
+
+		for (i = 0; k < N; ++i, ++k) {
+			t[k] = t[k] + a[i];
+		}
+
+		for (k = 0; i < N; ++i, ++k) {
+			t[k] = t[k] + a[i];
+		}
+	}
+
+	/* c = (a * b) mod q */
+
+	for (k = 0; k < N; k++) {
+		c[k] = t[k] & mod_q_mask;
+	}
+
+	return;
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_32.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_32.c
new file mode 100644
index 0000000000000000000000000000000000000000..6712444071f2bbb9dcdbbb69b18115d3d3094a64
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_32.c
@@ -0,0 +1,152 @@
+#include "ntru_crypto.h"
+#include "ntru_crypto_ntru_poly.h"
+
+/* ntru_ring_mult_indices
+ *
+ * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b"
+ * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1).
+ * This is a convolution operation.
+ *
+ * Ring element "b" is a sparse trinary polynomial with coefficients -1, 0,
+ * and 1.  It is specified by a list, bi, of its nonzero indices containing
+ * indices for the bi_P1_len +1 coefficients followed by the indices for the
+ * bi_M1_len -1 coefficients.
+ * The indices are in the range [0,N).
+ *
+ * The result array "c" may share the same memory space as input array "a",
+ * input array "b", or temp array "t".
+ *
+ * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum
+ * beyond 16 bits does not matter.
+ */
+void ntru_ring_mult_indices(
+    uint16_t const *a,        /*  in - pointer to ring element a */
+    uint16_t const bi_P1_len, /*  in - no. of +1 coefficients in b */
+    uint16_t const bi_M1_len, /*  in - no. of -1 coefficients in b */
+    uint16_t const *bi,       /*  in - pointer to the list of nonzero
+                                         indices of ring element b,
+                                         containing indices for the +1
+                                         coefficients followed by the
+                                         indices for -1 coefficients */
+    uint16_t const N,         /*  in - no. of coefficients in a, b, c */
+    uint16_t const q,         /*  in - large modulus */
+    uint16_t *t,              /*  in - temp buffer of N elements */
+    uint16_t *c)              /* out - address for polynomial c */
+{
+	uint16_t mod_q_mask;
+	uint32_t mask_interval;
+	uint16_t iA, iT, iB; /* Loop variables for the relevant arrays */
+	uint16_t mask_time;
+	uint16_t end;
+
+	uint32_t tmp1;
+	uint32_t tmp2;
+
+	end = N & 0xfffe; /* 4 * floor((N-i)/4) */
+
+	mod_q_mask = q - 1;
+	mask_interval = ((1 << 16) / q);
+	mask_time = 0;
+
+	/* t[(i+k)%N] = sum i=0 through N-1 of a[i], for b[k] = -1 */
+	memset(t, 0, N * sizeof(uint16_t));
+	for (iB = bi_P1_len; iB < bi_P1_len + bi_M1_len; iB++) {
+		/* first half -- iT from bi[iB] to N
+                         iA from 0 to N - bi[iB] */
+		iT = bi[iB];
+
+		for (iA = 0; iT < end; iA += 2, iT += 2) {
+			memcpy(&tmp1, t + iT, sizeof tmp1);
+			memcpy(&tmp2, a + iA, sizeof tmp2);
+			tmp1 += tmp2;
+			memcpy(t + iT, &tmp1, sizeof tmp1);
+		}
+
+		if (iT < N) {
+			t[iT] += a[iA];
+			iT++;
+			iA++;
+		}
+
+		/* second half -- iT from 0 to bi[iB]
+                          iA from bi[iB] to N  */
+
+		for (iT = 0; iA < end; iA += 2, iT += 2) {
+			memcpy(&tmp1, t + iT, sizeof tmp1);
+			memcpy(&tmp2, a + iA, sizeof tmp2);
+			tmp1 += tmp2;
+			memcpy(t + iT, &tmp1, sizeof tmp1);
+		}
+
+		if (iA < N) {
+			t[iT] += a[iA];
+			iT++;
+			iA++;
+		}
+
+		mask_time++;
+		if (mask_time == mask_interval) {
+			for (iT = 0; iT < N; iT++) {
+				t[iT] &= mod_q_mask;
+			}
+			mask_time = 0;
+		}
+	} /* for (iB = 0; iB < bi_M1_len; iB++) -- minus-index loop */
+
+	/* Minus everything */
+	for (iT = 0; iT < N; iT++) {
+		t[iT] = -t[iT];
+		t[iT] &= mod_q_mask;
+	}
+	mask_time = 0;
+
+	for (iB = 0; iB < bi_P1_len; iB++) {
+		/* first half -- iT from bi[iB] to N
+                         iA from 0 to N - bi[iB] */
+		iT = bi[iB];
+
+		for (iA = 0; iT < end; iA += 2, iT += 2) {
+			memcpy(&tmp1, t + iT, sizeof tmp1);
+			memcpy(&tmp2, a + iA, sizeof tmp2);
+			tmp1 += tmp2;
+			memcpy(t + iT, &tmp1, sizeof tmp1);
+		}
+
+		if (iT < N) {
+			t[iT] += a[iA];
+			iT++;
+			iA++;
+		}
+
+		/* second half -- iT from 0 to bi[iB]
+                          iA from bi[iB] to N  */
+		for (iT = 0; iA < end; iA += 2, iT += 2) {
+			memcpy(&tmp1, t + iT, sizeof tmp1);
+			memcpy(&tmp2, a + iA, sizeof tmp2);
+			tmp1 += tmp2;
+			memcpy(t + iT, &tmp1, sizeof tmp1);
+		}
+
+		if (iA < N) {
+			t[iT] += a[iA];
+			iT++;
+			iA++;
+		}
+
+		mask_time++;
+		if (mask_time == mask_interval) {
+			for (iT = 0; iT < N; iT++) {
+				t[iT] &= mod_q_mask;
+			}
+			mask_time = 0;
+		}
+
+	} /* for (iB = 0; iB < bi_P1_len; iB++) -- plus-index loop */
+
+	/* c = (a * b) mod q */
+	for (iT = 0; iT < N; iT++) {
+		c[iT] = t[iT] & mod_q_mask;
+	}
+
+	return;
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_64.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_64.c
new file mode 100644
index 0000000000000000000000000000000000000000..76eb59e32cc10882c75fdadf139c6ea5bd6301b3
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_64.c
@@ -0,0 +1,186 @@
+#include "ntru_crypto.h"
+#include "ntru_crypto_ntru_poly.h"
+
+/* ntru_ring_mult_indices
+ *
+ * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b"
+ * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1).
+ * This is a convolution operation.
+ *
+ * Ring element "b" is a sparse trinary polynomial with coefficients -1, 0,
+ * and 1.  It is specified by a list, bi, of its nonzero indices containing
+ * indices for the bi_P1_len +1 coefficients followed by the indices for the
+ * bi_M1_len -1 coefficients.
+ * The indices are in the range [0,N).
+ *
+ * The result array "c" may share the same memory space as input array "a",
+ * input array "b", or temp array "t".
+ *
+ * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum
+ * beyond 16 bits does not matter.
+ */
+
+void ntru_ring_mult_indices_64(
+    uint16_t const *a,  /*  in - pointer to ring element a */
+    uint16_t bi_P1_len, /*  in - no. of +1 coefficients in b */
+    uint16_t bi_M1_len, /*  in - no. of -1 coefficients in b */
+    uint16_t const *bi, /*  in - pointer to the list of nonzero
+                                         indices of ring element b,
+                                         containing indices for the +1
+                                         coefficients followed by the
+                                         indices for -1 coefficients */
+    uint16_t N,         /*  in - no. of coefficients in a, b, c */
+    uint16_t q,         /*  in - large modulus */
+    uint16_t *t,        /*  in - temp buffer of N elements */
+    uint16_t *c)        /* out - address for polynomial c */
+{
+	uint16_t i;
+	uint16_t mod_q_mask;
+	uint64_t full_mod_q_mask;
+	uint32_t mask_interval;
+	uint16_t iA, iT, iB; /* Loop variables for the relevant arrays */
+	uint16_t mask_time;
+	uint16_t oend[4];
+	uint16_t end;
+	uint16_t const Nmod4 = N & 3;
+
+	uint64_t tmp1;
+	uint64_t tmp2;
+
+	for (i = 0; i < 4; i++) {
+		oend[i] = (N - i) & 0xfffc; /* 4 * floor((N-i)/4) */
+	}
+
+	mod_q_mask = q - 1;
+	full_mod_q_mask = (mod_q_mask << 16) | mod_q_mask;
+	full_mod_q_mask |= (full_mod_q_mask << 32);
+	mask_interval = ((1 << 16) / q);
+
+	/* t[(i+k)%N] = sum i=0 through N-1 of a[i], for b[k] = -1 */
+
+	mask_time = 0;
+
+	memset(t, 0, N * sizeof(uint16_t));
+	for (iB = bi_P1_len; iB < bi_P1_len + bi_M1_len; iB++) {
+		/* first half -- from iT to N */
+		iT = bi[iB];
+		end = oend[iT & 3];
+
+		for (iA = 0; iT < end; iA += 4, iT += 4) {
+			memcpy(&tmp1, t + iT, sizeof tmp1);
+			memcpy(&tmp2, a + iA, sizeof tmp2);
+			tmp1 += tmp2;
+			memcpy(t + iT, &tmp1, sizeof tmp1);
+		}
+
+		while (iT < N) {
+			t[iT] += a[iA];
+			iT++;
+			iA++;
+		}
+
+		/* second half -- from 0 to start -1 */
+
+		/* at this point we have used (N-bi[iB + bi_P1_len]) and iA should be
+         * equal to bi[iB+bi_P1_len]+1.
+         */
+		end = oend[iA & 3];
+
+		for (iT = 0; iA < end; iA += 4, iT += 4) {
+			memcpy(&tmp1, t + iT, sizeof tmp1);
+			memcpy(&tmp2, a + iA, sizeof tmp2);
+			tmp1 += tmp2;
+			memcpy(t + iT, &tmp1, sizeof tmp1);
+		}
+
+		while (iA < N) {
+			t[iT] += a[iA];
+			iT++;
+			iA++;
+		}
+
+		mask_time++;
+		if (mask_time == mask_interval) {
+			memcpy(&tmp1, t, sizeof tmp1);
+			tmp1 &= full_mod_q_mask;
+			memcpy(t, &tmp1, sizeof tmp1);
+
+			end = oend[Nmod4];
+			for (iT = Nmod4; iT < end; iT += 4) {
+				memcpy(&tmp1, t + iT, sizeof tmp1);
+				tmp1 &= full_mod_q_mask;
+				memcpy(t + iT, &tmp1, sizeof tmp1);
+			}
+			mask_time = 0;
+		}
+	} /* for (iB = 0; iB < bi_M1_len; iB++) -- minus-index loop */
+
+	/* Minus everything */
+	for (iT = 0; iT < N; iT++) {
+		t[iT] = -t[iT];
+		t[iT] &= mod_q_mask;
+	}
+	mask_time = 0;
+
+	for (iB = 0; iB < bi_P1_len; iB++) {
+		/* first half -- from iT to N */
+		iT = bi[iB];
+		end = oend[iT & 3];
+
+		for (iA = 0; iT < end; iA += 4, iT += 4) {
+			memcpy(&tmp1, t + iT, sizeof tmp1);
+			memcpy(&tmp2, a + iA, sizeof tmp1);
+			tmp1 += tmp2;
+			memcpy(t + iT, &tmp1, sizeof tmp1);
+		}
+
+		while (iT < N) {
+			t[iT] += a[iA];
+			iT++;
+			iA++;
+		}
+
+		/* second half -- from 0 to start -1 */
+
+		/* at this point we have used (N-bi[iB + bi_P1_len]) and iA should be
+         * equal to bi[iB+bi_P1_len]+1.
+         */
+		end = oend[iA & 3];
+
+		for (iT = 0; iA < end; iA += 4, iT += 4) {
+			memcpy(&tmp1, t + iT, sizeof tmp1);
+			memcpy(&tmp2, a + iA, sizeof tmp1);
+			tmp1 += tmp2;
+			memcpy(t + iT, &tmp1, sizeof tmp1);
+		}
+
+		while (iA < N) {
+			t[iT] += a[iA];
+			iT++;
+			iA++;
+		}
+
+		mask_time++;
+		if (mask_time == mask_interval) {
+			memcpy(&tmp1, t, sizeof tmp1);
+			tmp1 &= full_mod_q_mask;
+			memcpy(t, &tmp1, sizeof tmp1);
+
+			end = oend[Nmod4];
+			for (iT = Nmod4; iT < end; iT += 4) {
+				memcpy(&tmp1, t + iT, sizeof tmp1);
+				tmp1 &= full_mod_q_mask;
+				memcpy(t + iT, &tmp1, sizeof tmp1);
+			}
+			mask_time = 0;
+		}
+
+	} /* for (iB = 0; iB < bi_P1_len; iB++) -- plus-index loop */
+
+	/* c = (a * b) mod q */
+	for (iT = 0; iT < N; iT++) {
+		c[iT] = t[iT] & mod_q_mask;
+	}
+
+	return;
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_simd.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_simd.c
new file mode 100644
index 0000000000000000000000000000000000000000..0f971923b4ce1bf887a3c3221e73fde392c0180e
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_mult_indices_simd.c
@@ -0,0 +1,149 @@
+#include "ntru_crypto.h"
+#include "ntru_crypto_ntru_poly.h"
+#include <immintrin.h>
+
+#define PAD(N) ((N + 0x0007) & 0xfff8)
+
+void ntru_ring_mult_indices_memreq(
+    uint16_t N,
+    uint16_t *tmp_polys,
+    uint16_t *poly_coeffs) {
+	if (tmp_polys) {
+		*tmp_polys = 2;
+	}
+
+	if (poly_coeffs) {
+		*poly_coeffs = PAD(N);
+	}
+}
+
+/* ntru_ring_mult_indices
+ *
+ * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b"
+ * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1).
+ * This is a convolution operation.
+ *
+ * Ring element "b" is a sparse trinary polynomial with coefficients -1, 0,
+ * and 1.  It is specified by a list, bi, of its nonzero indices containing
+ * indices for the bi_P1_len +1 coefficients followed by the indices for the
+ * bi_M1_len -1 coefficients.
+ * The indices are in the range [0,N).
+ *
+ * The result array "c" may share the same memory space as input array "a",
+ * input array "b", or temp array "t".
+ *
+ * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum
+ * beyond 16 bits does not matter.
+ */
+void ntru_ring_mult_indices(
+    uint16_t const *a,        /*  in - pointer to ring element a */
+    uint16_t const bi_P1_len, /*  in - no. of +1 coefficients in b */
+    uint16_t const bi_M1_len, /*  in - no. of -1 coefficients in b */
+    uint16_t const *bi,       /*  in - pointer to the list of nonzero
+                                         indices of ring element b,
+                                         containing indices for the +1
+                                         coefficients followed by the
+                                         indices for -1 coefficients */
+    uint16_t const N,         /*  in - no. of coefficients in a, b, c */
+    uint16_t const q,         /*  in - large modulus */
+    uint16_t *t,              /*  in - temp buffer of N elements */
+    uint16_t *c)              /* out - address for polynomial c */
+{
+	__m128i *T;
+	__m128i *Tp;
+	__m128i *Ti;
+
+	uint16_t i;
+	uint16_t j;
+	uint16_t k;
+	uint16_t m;
+	uint16_t const mod_q_mask = q - 1;
+
+	__m128i a0s[8];
+	__m128i aNs[8];
+
+	__m128i neg;
+	__m128i x0;
+	__m128i x1;
+	__m128i x2;
+	__m128i x3;
+	__m128i x4;
+
+	T = (__m128i *) t;
+	memset(T, 0, 2 * PAD(N) * sizeof(uint16_t));
+
+	a0s[0] = _mm_lddqu_si128((__m128i *) a);
+	aNs[0] = _mm_lddqu_si128((__m128i *) (a + N - 8));
+	for (i = 1; i < 8; i++) {
+		a0s[i] = _mm_slli_si128(a0s[i - 1], 2);
+		aNs[i] = _mm_srli_si128(aNs[i - 1], 2);
+	}
+
+	for (i = bi_P1_len; i < bi_P1_len + bi_M1_len; i++) {
+		k = bi[i];
+		m = k & 7;
+		k /= 8;
+		Tp = T + k;
+		x2 = _mm_add_epi16(*Tp, a0s[m]);
+		_mm_store_si128(Tp, x2);
+		Tp++;
+		for (j = 8 - m; j <= (N - 8); j += 8) {
+			x3 = _mm_lddqu_si128((__m128i *) &a[j]);
+			x2 = _mm_add_epi16(*Tp, x3);
+			_mm_store_si128(Tp, x2);
+			Tp++;
+		}
+		if (j == N)
+			continue;
+		x2 = _mm_add_epi16(*Tp, aNs[j - (N - 8)]);
+		_mm_store_si128(Tp, x2);
+	}
+
+	neg = _mm_setzero_si128();
+	neg = _mm_cmpeq_epi8(neg, neg);
+	Tp = T;
+	for (i = 0; i < (2 * PAD(N)) / 8; i++) {
+		x1 = _mm_sign_epi16(*Tp, neg);
+		_mm_store_si128(Tp, x1);
+		Tp++;
+	}
+
+	for (i = 0; i < bi_P1_len; i++) {
+		k = bi[i];
+		m = k & 7;
+		k /= 8;
+		Tp = T + k;
+		x2 = _mm_add_epi16(*Tp, a0s[m]);
+		_mm_store_si128(Tp, x2);
+		Tp++;
+		for (j = 8 - m; j <= (N - 8); j += 8) {
+			x3 = _mm_lddqu_si128((__m128i *) &a[j]);
+			x2 = _mm_add_epi16(*Tp, x3);
+			_mm_store_si128(Tp, x2);
+			Tp++;
+		}
+		if (j == N)
+			continue;
+		x2 = _mm_add_epi16(*Tp, aNs[j - (N - 8)]);
+		_mm_store_si128(Tp, x2);
+	}
+
+	Ti = T;
+	Tp = (__m128i *) (((uint16_t *) T) + N);
+	x0 = _mm_set1_epi16(mod_q_mask);
+	for (j = 0; j < N; j += 8) {
+		x1 = _mm_load_si128(Ti);
+		x2 = _mm_lddqu_si128(Tp);
+		x3 = _mm_add_epi16(x1, x2);
+		x4 = _mm_and_si128(x3, x0);
+		_mm_store_si128(Ti, x4);
+		Ti++;
+		Tp++;
+	}
+	memmove(c, T, N * sizeof(uint16_t));
+	for (j = N; j < PAD(N); j++) {
+		c[j] = 0;
+	}
+
+	return;
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_poly.c b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_poly.c
new file mode 100644
index 0000000000000000000000000000000000000000..60389dd680d1dedba01b1819a42d5ad603d08cdb
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_poly.c
@@ -0,0 +1,547 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_ntru_poly.c
+ *
+ * Contents: Routines for generating and operating on polynomials in the
+ *           NTRU algorithm.
+ *
+ *****************************************************************************/
+
+#include "ntru_crypto.h"
+#include "ntru_crypto_ntru_poly.h"
+#include "ntru_crypto_ntru_mgf1.h"
+
+/* ntru_gen_poly
+ *
+ * Generates polynomials by creating for each polynomial, a list of the
+ * indices of the +1 coefficients followed by a list of the indices of
+ * the -1 coefficients.
+ *
+ * If a single polynomial is generated (non-product form), indices_counts
+ * contains a single value of the total number of indices (for +1 and -1
+ * comefficients combined).
+ *
+ * If multiple polynomials are generated (for product form), their lists of
+ * indices are sequentially stored in the indices buffer.  Each byte of
+ * indices_counts contains the total number of indices (for +1 and -1
+ * coefficients combined) for a single polynomial, beginning with the
+ * low-order byte for the first polynomial.  The high-order byte is unused.
+ *
+ * Returns NTRU_OK if successful.
+ * Returns HASH_BAD_ALG if the algorithm is not supported.
+ *
+ */
+
+uint32_t
+ntru_gen_poly(
+    NTRU_CRYPTO_HASH_ALGID hash_algid, /*  in - hash algorithm ID for
+                                                      IGF-2 */
+    uint8_t md_len,                    /*  in - no. of octets in digest */
+    uint8_t min_calls,                 /*  in - minimum no. of hash
+                                                      calls */
+    uint16_t seed_len,                 /*  in - no. of octets in seed */
+    uint8_t *seed,                     /*  in - pointer to seed */
+    uint8_t *buf,                      /*  in - pointer to working
+                                                      buffer */
+    uint16_t N,                        /*  in - max index + 1 */
+    uint8_t c_bits,                    /*  in - no. bits for candidate */
+    uint16_t limit,                    /*  in - conversion to index
+                                                      limit */
+    bool is_product_form,              /*  in - if generating multiple
+                                                      polys */
+    uint32_t indices_counts,           /*  in - nos. of indices needed */
+    uint16_t *indices)                 /* out - address for indices */
+{
+	uint8_t *mgf_out;
+	uint8_t *octets;
+	uint8_t *used;
+	uint8_t num_polys;
+	uint16_t num_indices;
+	uint16_t octets_available;
+	uint16_t index_cnt = 0;
+	uint8_t left = 0;
+	uint8_t num_left = 0;
+	uint32_t retcode;
+
+	/* generate minimum MGF1 output */
+
+	mgf_out = buf + md_len + 4;
+	if ((retcode = ntru_mgf1(buf, hash_algid, md_len, min_calls,
+	                         seed_len, seed, mgf_out)) != NTRU_OK) {
+		return retcode;
+	}
+
+	octets = mgf_out;
+	octets_available = min_calls * md_len;
+
+	/* init indices counts for number of polynomials being generated */
+
+	if (is_product_form) {
+		/* number of indices for poly1 is in low byte of indices_counts,
+         * number of indices for poly2 and poly3 are in next higher bytes
+         */
+
+		num_polys = 3;
+		num_indices = (uint16_t)(indices_counts & 0xff);
+		indices_counts >>= 8;
+
+	} else {
+		/* number of bytes for poly is in low 16 bits of indices_counts */
+
+		num_polys = 1;
+		num_indices = (uint16_t) indices_counts;
+	}
+
+	/* init used-index array */
+
+	used = mgf_out + octets_available;
+	memset(used, 0, N);
+
+	/* generate indices (IGF-2) for all polynomials */
+
+	while (num_polys > 0) {
+
+		/* generate indices for a single polynomial */
+
+		while (index_cnt < num_indices) {
+			uint16_t index;
+			uint8_t num_needed;
+
+			/* form next index to convert to an index */
+
+			do {
+				/* use any leftover bits first */
+
+				if (num_left != 0) {
+					index = left << (c_bits - num_left);
+				} else {
+					index = 0;
+				}
+
+				/* get the rest of the bits needed from new octets */
+
+				num_needed = c_bits - num_left;
+				while (num_needed != 0) {
+					/* get another octet */
+
+					if (octets_available == 0) {
+						if ((retcode = ntru_mgf1(buf, hash_algid, md_len, 1,
+						                         0, NULL, mgf_out)) != NTRU_OK) {
+							return retcode;
+						}
+
+						octets = mgf_out;
+						octets_available = md_len;
+					}
+					left = *octets++;
+					--octets_available;
+
+					if (num_needed <= 8) {
+						/* all bits needed to fill the index are in this octet */
+
+						index |= ((uint16_t)(left)) >> (8 - num_needed);
+						num_left = 8 - num_needed;
+						num_needed = 0;
+						left &= 0xff >> (8 - num_left);
+
+					} else {
+						/* another octet will be needed after using this
+                         * whole octet
+                         */
+
+						index |= ((uint16_t) left) << (num_needed - 8);
+						num_needed -= 8;
+					}
+				}
+			} while (index >= limit);
+
+			/* form index and check if unique */
+
+			index %= N;
+
+			if (!used[index]) {
+				used[index] = 1;
+				indices[index_cnt] = index;
+				++index_cnt;
+			}
+		}
+		--num_polys;
+
+		/* init for next polynomial if another polynomial to be generated */
+
+		if (num_polys > 0) {
+			memset(used, 0, N);
+			num_indices = num_indices +
+			              (uint16_t)(indices_counts & 0xff);
+			indices_counts >>= 8;
+		}
+	}
+
+	NTRU_RET(NTRU_OK);
+}
+
+/* ntru_poly_check_min_weight
+ *
+ * Checks that the number of 0, +1, and -1 trinary ring elements meet or exceed
+ * a minimum weight.
+ */
+
+bool ntru_poly_check_min_weight(
+    uint16_t num_els, /*  in - degree of polynomial */
+    uint8_t *ringels, /*  in - pointer to trinary ring elements */
+    uint16_t min_wt)  /*  in - minimum weight */
+{
+	uint16_t wt[3];
+	uint16_t i;
+
+	wt[0] = wt[1] = wt[2] = 0;
+
+	for (i = 0; i < num_els; i++) {
+		++wt[ringels[i]];
+	}
+
+	if ((wt[0] < min_wt) || (wt[1] < min_wt) || (wt[2] < min_wt)) {
+		return FALSE;
+	}
+
+	return TRUE;
+}
+
+/* ntru_ring_mult_product_indices
+ *
+ * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b"
+ * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1).
+ * This is a convolution operation.
+ *
+ * Ring element "b" is represented by the product form b1 * b2 + b3, where
+ * b1, b2, and b3 are each a sparse trinary polynomial with coefficients -1,
+ * 0, and 1.  It is specified by a list, bi, of the nonzero indices of b1, b2,
+ * and b3, containing the indices for the +1 coefficients followed by the
+ * indices for the -1 coefficients for each polynomial in that order.
+ * The indices are in the range [0,N).
+ *
+ * The result array "c" may share the same memory space as input array "a",
+ * or input array "b".
+ *
+ * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum
+ * beyond 16 bits does not matter.
+ */
+
+void ntru_ring_mult_product_indices(
+    uint16_t const *a,      /*  in - pointer to ring element a */
+    uint16_t const b1i_len, /*  in - no. of +1 or -1 coefficients in b1 */
+    uint16_t const b2i_len, /*  in - no. of +1 or -1 coefficients in b2 */
+    uint16_t const b3i_len, /*  in - no. of +1 or -1 coefficients in b3 */
+    uint16_t const *bi,     /*  in - pointer to the list of nonzero
+                                         indices of polynomials b1, b2, b3,
+                                         containing indices for the +1
+                                         coefficients followed by the
+                                         indices for -1 coefficients for
+                                         each polynomial */
+    uint16_t const N,       /*  in - no. of coefficients in a, b, c */
+    uint16_t const q,       /*  in - large modulus */
+    uint16_t *t,            /*  in - temp buffer of 2N elements */
+    uint16_t *c)            /* out - address for polynomial c */
+{
+	uint16_t scratch_polys;
+	uint16_t poly_coeffs;
+	uint16_t *t2;
+	uint16_t mod_q_mask;
+	uint16_t i;
+
+	ntru_ring_mult_indices_memreq(N, &scratch_polys, &poly_coeffs);
+	t2 = t + scratch_polys * poly_coeffs;
+	mod_q_mask = q - 1;
+
+	/* t2 = a * b1 */
+
+	ntru_ring_mult_indices(a, b1i_len, b1i_len, bi, N, q, t, t2);
+
+	/* t2 = (a * b1) * b2 */
+
+	ntru_ring_mult_indices(t2, b2i_len, b2i_len, bi + (b1i_len << 1), N, q,
+	                       t, t2);
+
+	/* t = a * b3 */
+
+	ntru_ring_mult_indices(a, b3i_len, b3i_len,
+	                       bi + ((b1i_len + b2i_len) << 1), N, q, t, t);
+
+	/* c = (a * b1 * b2) + (a * b3) */
+
+	for (i = 0; i < N; i++) {
+		c[i] = (t2[i] + t[i]) & mod_q_mask;
+	}
+	for (; i < poly_coeffs; i++) {
+		c[i] = 0;
+	}
+
+	return;
+}
+
+/* ntru_ring_inv
+ *
+ * Finds the inverse of a polynomial, a, in (Z/2Z)[X]/(X^N - 1).
+  */
+
+bool ntru_ring_inv(
+    uint16_t *a,     /*  in - pointer to polynomial a */
+    uint16_t N,      /*  in - no. of coefficients in a */
+    uint16_t *t,     /*  in - temp buffer of 2N elements */
+    uint16_t *a_inv) /* out - address for polynomial a^-1 */
+{
+	uint8_t *b = (uint8_t *) t; /* b cannot be in a_inv since it must be
+                                       rotated and copied there as a^-1 mod 2 */
+	uint8_t *c = b + N;         /* c cannot be in a_inv since it exchanges
+                                       with b, and b cannot be in a_inv */
+	uint8_t *f = c + N;
+	uint8_t *g = (uint8_t *) a_inv; /* g needs N + 1 bytes */
+	uint16_t deg_b;
+	uint16_t deg_c;
+	uint16_t deg_f;
+	uint16_t deg_g;
+	uint16_t k = 0;
+	uint16_t i, j;
+
+	if (a == NULL || t == NULL || a_inv == NULL) {
+		return FALSE;
+	}
+
+	/* form a^-1 in (Z/2Z)[X]/(X^N - 1) */
+
+	memset(b, 0, (N << 1)); /* clear to init b, c */
+
+	/* b(X) = 1 */
+
+	b[0] = 1;
+	deg_b = 0;
+
+	/* c(X) = 0 (cleared above) */
+
+	deg_c = 0;
+
+	/* f(X) = a(X) mod 2 */
+
+	deg_f = 0;
+	j = 0;
+	for (i = 0; i < N; i++) {
+		f[i] = (uint8_t)(a[i] & 1);
+		j ^= f[i];
+		if (f[i])
+			deg_f = i;
+	}
+
+	/* Parity is zero, not invertible */
+	if (j == 0) {
+		return FALSE;
+	}
+
+	/* g(X) = X^N - 1 */
+
+	g[0] = 1;
+	memset(g + 1, 0, N - 1);
+	g[N] = 1;
+	deg_g = N;
+
+	/* until f(X) = 1 */
+
+	while (1) {
+		/* while f[0] = 0, f(X) /= X, c(X) *= X, k++ */
+
+		for (i = 0; (i <= deg_f) && (f[i] == 0); ++i)
+			;
+		if (i > deg_f)
+			return FALSE;
+		if (i) {
+			k = k + i;
+
+			f = f + i;
+			deg_f = deg_f - i;
+
+			memmove(c + i, c, deg_c + 1);
+			memset(c, 0, i);
+			deg_c = deg_c + i;
+		}
+
+		/* if f(X) = 1, done */
+
+		if (deg_f == 0) {
+			break;
+		}
+
+		/* if deg_f < deg_g, f <-> g, b <-> c */
+
+		if (deg_f < deg_g) {
+			uint8_t *x;
+
+			x = f;
+			f = g;
+			g = x;
+			deg_f ^= deg_g;
+			deg_g ^= deg_f;
+			deg_f ^= deg_g;
+			x = b;
+			b = c;
+			c = x;
+			deg_b ^= deg_c;
+			deg_c ^= deg_b;
+			deg_b ^= deg_c;
+		}
+
+		/* f(X) += g(X)
+         * might change degree of f if deg_g >= deg_f
+         */
+		for (i = 0; i <= deg_g; i++) {
+			f[i] ^= g[i];
+		}
+
+		if (deg_g == deg_f) {
+			while (deg_f > 0 && f[deg_f] == 0) {
+				--deg_f;
+			}
+		}
+
+		/* b(X) += c(X) */
+		for (i = 0; i <= deg_c; i++) {
+			b[i] ^= c[i];
+		}
+
+		if (deg_c >= deg_b) {
+			deg_b = deg_c;
+			while (deg_b > 0 && b[deg_b] == 0) {
+				--deg_b;
+			}
+		}
+	}
+
+	/* a^-1 in (Z/2Z)[X]/(X^N - 1) = b(X) shifted left k coefficients */
+
+	j = 0;
+
+	if (k >= N) {
+		k = k - N;
+	}
+
+	for (i = k; i < N; i++) {
+		a_inv[j++] = (uint16_t)(b[i]);
+	}
+
+	for (i = 0; i < k; i++) {
+		a_inv[j++] = (uint16_t)(b[i]);
+	}
+
+	return TRUE;
+}
+
+/* ntru_ring_lift_inv_pow2_product
+ *
+ * Lifts an element of (Z/2)[x]/(x^N - 1) to (Z/q)[x]/(x^N - 1)
+ * where q is a power of 2 such that 256 < q <= 65536.
+ *
+ * inv must be padded with zeros to the degree used by
+ * ntru_ring_mult_coefficients.
+ *
+ * inv is assumed to be the inverse mod 2 of the product form element
+ * given by (1 + 3*(F1*F2 + F3)). The lift is performed in place --
+ * inv will be overwritten with the result.
+ *
+ * Requires scratch space for ntru_ring_mult_coefficients + one extra
+ * polynomial with the same padding.
+ */
+uint32_t
+ntru_ring_lift_inv_pow2_product(
+    uint16_t *inv,
+    uint16_t const dF1,
+    uint16_t const dF2,
+    uint16_t const dF3,
+    uint16_t const *F_buf,
+    uint16_t const N,
+    uint16_t const q,
+    uint16_t *t) {
+	uint16_t i;
+	uint16_t j;
+	uint16_t mod_q_mask = q - 1;
+	uint16_t padN;
+	ntru_ring_mult_coefficients_memreq(N, NULL, &padN);
+
+	for (j = 0; j < 4; ++j) /* assumes 256 < q <= 65536 */
+	{
+		/* f^-1 = f^-1 * (2 - f * f^-1) mod q */
+		ntru_ring_mult_product_indices(inv, (uint16_t) dF1,
+		                               (uint16_t) dF2, (uint16_t) dF3,
+		                               F_buf, N, q,
+		                               t, t);
+		for (i = 0; i < N; ++i) {
+			t[i] = -((inv[i] + 3 * t[i]) & mod_q_mask);
+		}
+		t[0] = t[0] + 2;
+		/* mult_indices works with degree N, mult_coefficients with padN */
+		memset(t + N, 0, (padN - N) * sizeof(uint16_t));
+
+		ntru_ring_mult_coefficients(inv, t, N, q, t + padN, inv);
+	}
+
+	NTRU_RET(NTRU_OK);
+}
+
+/* ntru_ring_lift_inv_pow2_standard
+ *
+ * Lifts an element of (Z/2)[x]/(x^N - 1) to (Z/q)[x]/(x^N - 1)
+ * where q is a power of 2 such that 256 < q <= 65536.
+ *
+ * inv must be padded with zeros to the degree used by
+ * ntru_ring_mult_coefficients.
+ *
+ * inv is assumed to be the inverse mod 2 of the trinary element f.
+ * The lift is performed in place -- inv will be overwritten with the result.
+ *
+ * Requires scratch space for ntru_ring_mult_coefficients + one extra
+ * polynomial with the same padding.
+ */
+uint32_t
+ntru_ring_lift_inv_pow2_standard(
+    uint16_t *inv,
+    uint16_t const *f,
+    uint16_t const N,
+    uint16_t const q,
+    uint16_t *t) {
+	uint16_t i;
+	uint16_t j;
+	uint16_t padN;
+	ntru_ring_mult_coefficients_memreq(N, NULL, &padN);
+
+	for (j = 0; j < 4; ++j) /* assumes 256 < q <= 65536 */
+	{
+		/* f^-1 = f^-1 * (2 - f * f^-1) mod q */
+		ntru_ring_mult_coefficients(f, inv, N, q, t, t);
+		for (i = 0; i < N; ++i) {
+			t[i] = -t[i];
+		}
+		t[0] = t[0] + 2;
+
+		ntru_ring_mult_coefficients(inv, t, N, q, t + padN, inv);
+	}
+
+	NTRU_RET(NTRU_OK);
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_ntru_poly.h b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_poly.h
new file mode 100644
index 0000000000000000000000000000000000000000..6aa48c9ac32411ed6f101eca2666a3ee3231c94e
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_ntru_poly.h
@@ -0,0 +1,280 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File:  ntru_crypto_ntru_poly.h
+ *
+ * Contents: Public header file for generating and operating on polynomials
+ *           in the NTRU algorithm.
+ *
+ *****************************************************************************/
+
+#ifndef NTRU_CRYPTO_NTRU_POLY_H
+#define NTRU_CRYPTO_NTRU_POLY_H
+
+#include "ntru_crypto.h"
+#include "ntru_crypto_hash_basics.h"
+
+/* function declarations */
+
+/* ntru_gen_poly
+ *
+ * Generates polynomials by creating for each polynomial, a list of the
+ * indices of the +1 coefficients followed by a list of the indices of
+ * the -1 coefficients.
+ *
+ * If a single polynomial is generated (non-product form), indices_counts
+ * contains a single value of the total number of indices (for +1 and -1
+ * comefficients combined).
+ *
+ * If multiple polynomials are generated (for product form), their lists of
+ * indices are sequentially stored in the indices buffer.  Each byte of
+ * indices_counts contains the total number of indices (for +1 and -1
+ * coefficients combined) for a single polynomial, beginning with the
+ * low-order byte for the first polynomial.  The high-order byte is unused.
+ *
+ * Returns NTRU_OK if successful.
+ * Returns HASH_BAD_ALG if the algorithm is not supported.
+ *
+ */
+
+extern uint32_t
+ntru_gen_poly(
+    NTRU_CRYPTO_HASH_ALGID hash_algid, /*  in - hash algorithm ID for
+                                                      IGF-2 */
+    uint8_t md_len,                    /*  in - no. of octets in digest */
+    uint8_t min_calls,                 /*  in - minimum no. of hash
+                                                      calls */
+    uint16_t seed_len,                 /*  in - no. of octets in seed */
+    uint8_t *seed,                     /*  in - pointer to seed */
+    uint8_t *buf,                      /*  in - pointer to working
+                                                      buffer */
+    uint16_t N,                        /*  in - max index + 1 */
+    uint8_t c_bits,                    /*  in - no. bits for candidate */
+    uint16_t limit,                    /*  in - conversion to index
+                                                      limit */
+    bool is_product_form,              /*  in - if generating multiple
+                                                      polys */
+    uint32_t indices_counts,           /*  in - nos. of indices needed */
+    uint16_t *indices);                /* out - address for indices */
+
+/* ntru_poly_check_min_weight
+ *
+ * Checks that the number of 0, +1, and -1 trinary ring elements meet or exceed
+ * a minimum weight.
+ */
+
+extern bool
+ntru_poly_check_min_weight(
+    uint16_t num_els, /*  in - degree of polynomial */
+    uint8_t *ringels, /*  in - pointer to trinary ring elements */
+    uint16_t min_wt); /*  in - minimum weight */
+
+/* ntru_ring_mult_indices
+ *
+ * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b"
+ * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1).
+ * This is a convolution operation.
+ *
+ * Ring element "b" is a sparse trinary polynomial with coefficients -1, 0,
+ * and 1.  It is specified by a list, bi, of its nonzero indices containing
+ * indices for the bi_P1_len +1 coefficients followed by the indices for the
+ * bi_M1_len -1 coefficients.
+ * The indices are in the range [0,N).
+ *
+ * The result array "c" may share the same memory space as input array "a",
+ * or input array "b".
+ *
+ * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum
+ * beyond 16 bits does not matter.
+ */
+
+/* wrapper */
+extern void
+ntru_ring_mult_indices(
+    uint16_t const *a,        /*  in - pointer to ring element a */
+    uint16_t const bi_P1_len, /*  in - no. of +1 coefficients in b */
+    uint16_t const bi_M1_len, /*  in - no. of -1 coefficients in b */
+    uint16_t const *bi,       /*  in - pointer to the list of nonzero
+                                         indices of ring element b,
+                                         containing indices for the +1
+                                         coefficients followed by the
+                                         indices for -1 coefficients */
+    uint16_t const N,         /*  in - no. of coefficients in a, b, c */
+    uint16_t const q,         /*  in - large modulus */
+    uint16_t *t,              /*  in - temp buffer. Size is impl dependent.
+                                         see ntru_ring_mult_indices_memreq */
+    uint16_t *c);             /* out - address for polynomial c */
+
+/* ntru_ring_mult_product_indices
+ *
+ * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b"
+ * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1).
+ * This is a convolution operation.
+ *
+ * Ring element "b" is represented by the product form b1 * b2 + b3, where
+ * b1, b2, and b3 are each a sparse trinary polynomial with coefficients -1,
+ * 0, and 1.  It is specified by a list, bi, of the nonzero indices of b1, b2,
+ * and b3, containing the indices for the +1 coefficients followed by the
+ * indices for the -1 coefficients for each polynomial in that order.
+ * The indices are in the range [0,N).
+ *
+ * The result array "c" may share the same memory space as input array "a",
+ * or input array "b".
+ *
+ * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum
+ * beyond 16 bits does not matter.
+ */
+
+extern void
+ntru_ring_mult_product_indices(
+    uint16_t const *a,      /*  in - pointer to ring element a */
+    uint16_t const b1i_len, /*  in - no. of +1 or -1 coefficients in b1 */
+    uint16_t const b2i_len, /*  in - no. of +1 or -1 coefficients in b2 */
+    uint16_t const b3i_len, /*  in - no. of +1 or -1 coefficients in b3 */
+    uint16_t const *bi,     /*  in - pointer to the list of nonzero
+                                         indices of polynomials b1, b2, b3,
+                                         containing indices for the +1
+                                         coefficients followed by the
+                                         indices for -1 coefficients for
+                                         each polynomial */
+    uint16_t const N,       /*  in - no. of coefficients in a, b, c */
+    uint16_t const q,       /*  in - large modulus */
+    uint16_t *t,            /*  in - temp buffer. Size is impl dependent.
+                                         see ntru_ring_mult_indices_memreq */
+    uint16_t *c);           /* out - address for polynomial c */
+
+/* ntru_ring_mult_coefficients
+ *
+ * Multiplies ring element (polynomial) "a" by ring element (polynomial) "b"
+ * to produce ring element (polynomial) "c" in (Z/qZ)[X]/(X^N - 1).
+ * This is a convolution operation.
+ *
+ * This assumes q is 2^r where 8 < r < 16, so that overflow of the sum
+ * beyond 16 bits does not matter.
+ */
+
+extern void
+ntru_ring_mult_coefficients(
+    uint16_t const *a, /*  in - pointer to polynomial a */
+    uint16_t const *b, /*  in - pointer to polynomial b */
+    uint16_t N,        /*  in - degree of (x^N - 1) */
+    uint16_t q,        /*  in - large modulus */
+    uint16_t *tmp,     /*  in - temp buffer. Size is impl dependent.
+                                       see ntru_ring_mult_coefficients_memreq */
+    uint16_t *c);      /* out - address for polynomial c */
+
+/* ntru_ring_inv
+ *
+ * Finds the inverse of a polynomial, a, in (Z/2^rZ)[X]/(X^N - 1).
+ *
+ * This assumes q is 2^r where 8 < r < 16, so that operations mod q can
+ * wait until the end, and only 16-bit arrays need to be used.
+ */
+
+extern bool
+ntru_ring_inv(
+    uint16_t *a,      /*  in - pointer to polynomial a */
+    uint16_t N,       /*  in - no. of coefficients in a */
+    uint16_t *t,      /*  in - temp buffer of 2N elements */
+    uint16_t *a_inv); /* out - address for polynomial a^-1 */
+
+/* ntru_ring_lift_inv_pow2_standard
+ *
+ * Lifts an element of (Z/2)[x]/(x^N - 1) to (Z/q)[x]/(x^N - 1)
+ * where q is a power of 2 such that 256 < q <= 65536.
+ *
+ * inv must be padded with zeros to the degree used by
+ * ntru_ring_mult_coefficients.
+ *
+ * inv is assumed to be the inverse mod 2 of the trinary element f.
+ * The lift is performed in place -- inv will be overwritten with the result.
+ *
+ * Requires scratch space for ntru_ring_mult_coefficients + one extra
+ * polynomial with the same padding.
+ */
+uint32_t
+ntru_ring_lift_inv_pow2_standard(
+    uint16_t *inv,
+    uint16_t const *f,
+    uint16_t const N,
+    uint16_t const q,
+    uint16_t *t);
+
+/* ntru_ring_lift_inv_pow2_product
+ *
+ * Lifts an element of (Z/2)[x]/(x^N - 1) to (Z/q)[x]/(x^N - 1)
+ * where q is a power of 2 such that 256 < q <= 65536.
+ *
+ * inv must be padded with zeros to the degree used by
+ * ntru_ring_mult_coefficients.
+ *
+ * inv is assumed to be the inverse mod 2 of the product form element
+ * given by (1 + 3*(F1*F2 + F3)). The lift is performed in place --
+ * inv will be overwritten with the result.
+ *
+ * Requires scratch space for ntru_ring_mult_coefficients + one extra
+ * polynomial with the same padding.
+ */
+uint32_t
+ntru_ring_lift_inv_pow2_product(
+    uint16_t *inv,
+    uint16_t const dF1,
+    uint16_t const dF2,
+    uint16_t const dF3,
+    uint16_t const *F_buf,
+    uint16_t const N,
+    uint16_t const q,
+    uint16_t *t);
+
+/* ntru_ring_mult_coefficients_memreq
+ *
+ * Different implementations of ntru_ring_mult_coefficients may
+ * have different memory requirements.
+ *
+ * This gets the memory requirements of ntru_ring_mult_coefficients as
+ * a number of scratch polynomials and the number of coefficients needed
+ * per polynomial.
+ */
+void ntru_ring_mult_coefficients_memreq(
+    uint16_t N,
+    uint16_t *num_scratch_polys,
+    uint16_t *pad_deg);
+
+/* ntru_ring_mult_indices_memreq
+ *
+ * Different implementations of ntru_ring_mult_indices may
+ * have different memory requirements.
+ *
+ * This gets the memory requirements of ntru_ring_mult_indices as
+ * a number of scratch polynomials (num_scratch_polys) and the number
+ * of coefficients needed per polynomial (pad_deg).
+ *
+ * Note that ntru_ring_mult_prod_indices requires one additional polynomial
+ * of degree pad_deg for holding a temporary result.
+ */
+void ntru_ring_mult_indices_memreq(
+    uint16_t N,
+    uint16_t *num_scratch_polys,
+    uint16_t *pad_deg);
+
+#endif /* NTRU_CRYPTO_NTRU_POLY_H */
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_platform.h b/crypt/liboqs/kex_ntru/ntru_crypto_platform.h
new file mode 100644
index 0000000000000000000000000000000000000000..b18ff6a27894044836a968fda07a33beaf54929a
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_platform.h
@@ -0,0 +1,92 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_platform.h
+ *
+ * Contents: Platform-specific basic definitions.
+ *
+ *****************************************************************************/
+
+#ifndef NTRU_CRYPTO_PLATFORM_H
+#define NTRU_CRYPTO_PLATFORM_H
+
+/* The default implementation is to use stdint.h, a part of the C99 standard.
+ * Systems that don't support this are handled on a case-by-case basis.
+ */
+
+#if defined(WIN32) && (_MSC_VER < 1600)
+
+#include <basetsd.h>
+typedef unsigned char uint8_t;
+typedef signed char int8_t;
+typedef unsigned short int uint16_t;
+typedef short int int16_t;
+typedef UINT32 uint32_t;
+typedef UINT64 uint64_t;
+
+#elif defined(linux) && defined(__KERNEL__)
+
+#include <linux/types.h>
+
+#else
+
+#include <stdint.h>
+
+#endif
+
+/* For linux kernel drivers:
+ * Use kmalloc and kfree in place of malloc / free
+ * Use BUG_ON in place of assert */
+#if defined(linux) && defined(__KERNEL__)
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#define MALLOC(size) (kmalloc(size, GFP_KERNEL))
+#define FREE(x) (kfree(x))
+
+#else
+
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#define MALLOC(size) (malloc(size))
+#define FREE(x) (free(x))
+
+#endif
+
+#if !defined(HAVE_BOOL) && !defined(__cplusplus)
+#define HAVE_BOOL
+typedef uint8_t bool;
+#endif /* HAVE_BOOL */
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#endif /* NTRU_CRYPTO_PLATFORM_H */
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_sha.h b/crypt/liboqs/kex_ntru/ntru_crypto_sha.h
new file mode 100644
index 0000000000000000000000000000000000000000..4c0bb7bef4832cf165d08df800fda61dcd88c3e2
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_sha.h
@@ -0,0 +1,56 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_sha.h
+ *
+ * Contents: Definitions and declarations common to all SHA hash algorithms.
+ *
+ *****************************************************************************/
+
+#ifndef NTRU_CRYPTO_SHA_H
+#define NTRU_CRYPTO_SHA_H
+
+#include "ntru_crypto_error.h"
+#include "ntru_crypto_hash_basics.h"
+
+/***************
+ * error codes *
+ ***************/
+
+#define SHA_OK ((uint32_t) NTRU_CRYPTO_HASH_OK)
+#define SHA_FAIL ((uint32_t) NTRU_CRYPTO_HASH_FAIL)
+#define SHA_BAD_PARAMETER ((uint32_t) NTRU_CRYPTO_HASH_BAD_PARAMETER)
+#define SHA_OVERFLOW ((uint32_t) NTRU_CRYPTO_HASH_OVERFLOW)
+
+#define SHA_RESULT(r) ((uint32_t)((r) ? SHA_ERROR_BASE + (r) : (r)))
+#define SHA_RET(r) return SHA_RESULT(r);
+
+/*********
+ * flags *
+ *********/
+
+#define SHA_DATA_ONLY HASH_DATA_ONLY
+#define SHA_INIT HASH_INIT
+#define SHA_FINISH HASH_FINISH
+
+#endif /* NTRU_CRYPTO_SHA_H */
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_sha1.c b/crypt/liboqs/kex_ntru/ntru_crypto_sha1.c
new file mode 100644
index 0000000000000000000000000000000000000000..d17da12c30a02e1a40e7884d7e1d68d2ef1a8a95
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_sha1.c
@@ -0,0 +1,679 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_sha1.c
+ *
+ * Contents: Routines implementing the SHA-1 hash calculation.
+ *
+ *****************************************************************************/
+
+#include "ntru_crypto.h"
+#include "ntru_crypto_sha1.h"
+#include "ntru_crypto_msbyte_uint32.h"
+
+/* chaining state elements */
+
+#define H0 state[0]
+#define H1 state[1]
+#define H2 state[2]
+#define H3 state[3]
+#define H4 state[4]
+
+/* standard SHA-1 initialization values */
+
+#define H0_INIT 0x67452301UL
+#define H1_INIT 0xefcdab89UL
+#define H2_INIT 0x98badcfeUL
+#define H3_INIT 0x10325476UL
+#define H4_INIT 0xc3d2e1f0UL
+
+/* sha1_blk()
+ *
+ * This routine updates the current hash output (chaining state)
+ * by performing SHA-1 on a 512-bit block of data represented as sixteen
+ * 32-bit words.
+ */
+
+#define K00_19 0x5a827999UL
+#define K20_39 0x6ed9eba1UL
+#define K40_59 0x8f1bbcdcUL
+#define K60_79 0xca62c1d6UL
+
+#define RL(a, n) (((a) << (n)) | ((a) >> (32 - (n))))
+
+static void
+sha1_blk(
+    uint32_t const *data, /*     in - ptr to 16 32-bit word input block */
+    uint32_t *state)      /* in/out - ptr to 5 32-bit word chaining state */
+{
+	uint32_t A, B, C, D, E;
+	uint32_t w[16];
+
+	/* init A - E */
+
+	A = H0;
+	B = H1;
+	C = H2;
+	D = H3;
+	E = H4;
+
+	/* rounds 0 - 15 */
+
+	E += RL(A, 5) + K00_19 + ((B & (C ^ D)) ^ D) + data[0];
+	B = RL(B, 30);
+	D += RL(E, 5) + K00_19 + ((A & (B ^ C)) ^ C) + data[1];
+	A = RL(A, 30);
+	C += RL(D, 5) + K00_19 + ((E & (A ^ B)) ^ B) + data[2];
+	E = RL(E, 30);
+	B += RL(C, 5) + K00_19 + ((D & (E ^ A)) ^ A) + data[3];
+	D = RL(D, 30);
+	A += RL(B, 5) + K00_19 + ((C & (D ^ E)) ^ E) + data[4];
+	C = RL(C, 30);
+	E += RL(A, 5) + K00_19 + ((B & (C ^ D)) ^ D) + data[5];
+	B = RL(B, 30);
+	D += RL(E, 5) + K00_19 + ((A & (B ^ C)) ^ C) + data[6];
+	A = RL(A, 30);
+	C += RL(D, 5) + K00_19 + ((E & (A ^ B)) ^ B) + data[7];
+	E = RL(E, 30);
+	B += RL(C, 5) + K00_19 + ((D & (E ^ A)) ^ A) + data[8];
+	D = RL(D, 30);
+	A += RL(B, 5) + K00_19 + ((C & (D ^ E)) ^ E) + data[9];
+	C = RL(C, 30);
+	E += RL(A, 5) + K00_19 + ((B & (C ^ D)) ^ D) + data[10];
+	B = RL(B, 30);
+	D += RL(E, 5) + K00_19 + ((A & (B ^ C)) ^ C) + data[11];
+	A = RL(A, 30);
+	C += RL(D, 5) + K00_19 + ((E & (A ^ B)) ^ B) + data[12];
+	E = RL(E, 30);
+	B += RL(C, 5) + K00_19 + ((D & (E ^ A)) ^ A) + data[13];
+	D = RL(D, 30);
+	A += RL(B, 5) + K00_19 + ((C & (D ^ E)) ^ E) + data[14];
+	C = RL(C, 30);
+	E += RL(A, 5) + K00_19 + ((B & (C ^ D)) ^ D) + data[15];
+	B = RL(B, 30);
+
+	/* rounds 16 - 19 */
+
+	w[0] = data[0] ^ data[2] ^ data[8] ^ data[13];
+	w[0] = RL(w[0], 1);
+	D += RL(E, 5) + K00_19 + ((A & (B ^ C)) ^ C) + w[0];
+	A = RL(A, 30);
+	w[1] = data[1] ^ data[3] ^ data[9] ^ data[14];
+	w[1] = RL(w[1], 1);
+	C += RL(D, 5) + K00_19 + ((E & (A ^ B)) ^ B) + w[1];
+	E = RL(E, 30);
+	w[2] = data[2] ^ data[4] ^ data[10] ^ data[15];
+	w[2] = RL(w[2], 1);
+	B += RL(C, 5) + K00_19 + ((D & (E ^ A)) ^ A) + w[2];
+	D = RL(D, 30);
+	w[3] = data[3] ^ data[5] ^ data[11] ^ w[0];
+	w[3] = RL(w[3], 1);
+	A += RL(B, 5) + K00_19 + ((C & (D ^ E)) ^ E) + w[3];
+	C = RL(C, 30);
+
+	/* rounds 20 - 39 */
+
+	w[4] = data[4] ^ data[6] ^ data[12] ^ w[1];
+	w[4] = RL(w[4], 1);
+	E += RL(A, 5) + K20_39 + (B ^ C ^ D) + w[4];
+	B = RL(B, 30);
+	w[5] = data[5] ^ data[7] ^ data[13] ^ w[2];
+	w[5] = RL(w[5], 1);
+	D += RL(E, 5) + K20_39 + (A ^ B ^ C) + w[5];
+	A = RL(A, 30);
+	w[6] = data[6] ^ data[8] ^ data[14] ^ w[3];
+	w[6] = RL(w[6], 1);
+	C += RL(D, 5) + K20_39 + (E ^ A ^ B) + w[6];
+	E = RL(E, 30);
+	w[7] = data[7] ^ data[9] ^ data[15] ^ w[4];
+	w[7] = RL(w[7], 1);
+	B += RL(C, 5) + K20_39 + (D ^ E ^ A) + w[7];
+	D = RL(D, 30);
+	w[8] = data[8] ^ data[10] ^ w[0] ^ w[5];
+	w[8] = RL(w[8], 1);
+	A += RL(B, 5) + K20_39 + (C ^ D ^ E) + w[8];
+	C = RL(C, 30);
+	w[9] = data[9] ^ data[11] ^ w[1] ^ w[6];
+	w[9] = RL(w[9], 1);
+	E += RL(A, 5) + K20_39 + (B ^ C ^ D) + w[9];
+	B = RL(B, 30);
+	w[10] = data[10] ^ data[12] ^ w[2] ^ w[7];
+	w[10] = RL(w[10], 1);
+	D += RL(E, 5) + K20_39 + (A ^ B ^ C) + w[10];
+	A = RL(A, 30);
+	w[11] = data[11] ^ data[13] ^ w[3] ^ w[8];
+	w[11] = RL(w[11], 1);
+	C += RL(D, 5) + K20_39 + (E ^ A ^ B) + w[11];
+	E = RL(E, 30);
+	w[12] = data[12] ^ data[14] ^ w[4] ^ w[9];
+	w[12] = RL(w[12], 1);
+	B += RL(C, 5) + K20_39 + (D ^ E ^ A) + w[12];
+	D = RL(D, 30);
+	w[13] = data[13] ^ data[15] ^ w[5] ^ w[10];
+	w[13] = RL(w[13], 1);
+	A += RL(B, 5) + K20_39 + (C ^ D ^ E) + w[13];
+	C = RL(C, 30);
+	w[14] = data[14] ^ w[0] ^ w[6] ^ w[11];
+	w[14] = RL(w[14], 1);
+	E += RL(A, 5) + K20_39 + (B ^ C ^ D) + w[14];
+	B = RL(B, 30);
+	w[15] = data[15] ^ w[1] ^ w[7] ^ w[12];
+	w[15] = RL(w[15], 1);
+	D += RL(E, 5) + K20_39 + (A ^ B ^ C) + w[15];
+	A = RL(A, 30);
+	w[0] = w[0] ^ w[2] ^ w[8] ^ w[13];
+	w[0] = RL(w[0], 1);
+	C += RL(D, 5) + K20_39 + (E ^ A ^ B) + w[0];
+	E = RL(E, 30);
+	w[1] = w[1] ^ w[3] ^ w[9] ^ w[14];
+	w[1] = RL(w[1], 1);
+	B += RL(C, 5) + K20_39 + (D ^ E ^ A) + w[1];
+	D = RL(D, 30);
+	w[2] = w[2] ^ w[4] ^ w[10] ^ w[15];
+	w[2] = RL(w[2], 1);
+	A += RL(B, 5) + K20_39 + (C ^ D ^ E) + w[2];
+	C = RL(C, 30);
+	w[3] = w[3] ^ w[5] ^ w[11] ^ w[0];
+	w[3] = RL(w[3], 1);
+	E += RL(A, 5) + K20_39 + (B ^ C ^ D) + w[3];
+	B = RL(B, 30);
+	w[4] = w[4] ^ w[6] ^ w[12] ^ w[1];
+	w[4] = RL(w[4], 1);
+	D += RL(E, 5) + K20_39 + (A ^ B ^ C) + w[4];
+	A = RL(A, 30);
+	w[5] = w[5] ^ w[7] ^ w[13] ^ w[2];
+	w[5] = RL(w[5], 1);
+	C += RL(D, 5) + K20_39 + (E ^ A ^ B) + w[5];
+	E = RL(E, 30);
+	w[6] = w[6] ^ w[8] ^ w[14] ^ w[3];
+	w[6] = RL(w[6], 1);
+	B += RL(C, 5) + K20_39 + (D ^ E ^ A) + w[6];
+	D = RL(D, 30);
+	w[7] = w[7] ^ w[9] ^ w[15] ^ w[4];
+	w[7] = RL(w[7], 1);
+	A += RL(B, 5) + K20_39 + (C ^ D ^ E) + w[7];
+	C = RL(C, 30);
+
+	/* rounds 40 - 59 */
+
+	w[8] = w[8] ^ w[10] ^ w[0] ^ w[5];
+	w[8] = RL(w[8], 1);
+	E += RL(A, 5) + K40_59 + ((B & C) | (D & (B | C))) + w[8];
+	B = RL(B, 30);
+	w[9] = w[9] ^ w[11] ^ w[1] ^ w[6];
+	w[9] = RL(w[9], 1);
+	D += RL(E, 5) + K40_59 + ((A & B) | (C & (A | B))) + w[9];
+	A = RL(A, 30);
+	w[10] = w[10] ^ w[12] ^ w[2] ^ w[7];
+	w[10] = RL(w[10], 1);
+	C += RL(D, 5) + K40_59 + ((E & A) | (B & (E | A))) + w[10];
+	E = RL(E, 30);
+	w[11] = w[11] ^ w[13] ^ w[3] ^ w[8];
+	w[11] = RL(w[11], 1);
+	B += RL(C, 5) + K40_59 + ((D & E) | (A & (D | E))) + w[11];
+	D = RL(D, 30);
+	w[12] = w[12] ^ w[14] ^ w[4] ^ w[9];
+	w[12] = RL(w[12], 1);
+	A += RL(B, 5) + K40_59 + ((C & D) | (E & (C | D))) + w[12];
+	C = RL(C, 30);
+	w[13] = w[13] ^ w[15] ^ w[5] ^ w[10];
+	w[13] = RL(w[13], 1);
+	E += RL(A, 5) + K40_59 + ((B & C) | (D & (B | C))) + w[13];
+	B = RL(B, 30);
+	w[14] = w[14] ^ w[0] ^ w[6] ^ w[11];
+	w[14] = RL(w[14], 1);
+	D += RL(E, 5) + K40_59 + ((A & B) | (C & (A | B))) + w[14];
+	A = RL(A, 30);
+	w[15] = w[15] ^ w[1] ^ w[7] ^ w[12];
+	w[15] = RL(w[15], 1);
+	C += RL(D, 5) + K40_59 + ((E & A) | (B & (E | A))) + w[15];
+	E = RL(E, 30);
+	w[0] = w[0] ^ w[2] ^ w[8] ^ w[13];
+	w[0] = RL(w[0], 1);
+	B += RL(C, 5) + K40_59 + ((D & E) | (A & (D | E))) + w[0];
+	D = RL(D, 30);
+	w[1] = w[1] ^ w[3] ^ w[9] ^ w[14];
+	w[1] = RL(w[1], 1);
+	A += RL(B, 5) + K40_59 + ((C & D) | (E & (C | D))) + w[1];
+	C = RL(C, 30);
+	w[2] = w[2] ^ w[4] ^ w[10] ^ w[15];
+	w[2] = RL(w[2], 1);
+	E += RL(A, 5) + K40_59 + ((B & C) | (D & (B | C))) + w[2];
+	B = RL(B, 30);
+	w[3] = w[3] ^ w[5] ^ w[11] ^ w[0];
+	w[3] = RL(w[3], 1);
+	D += RL(E, 5) + K40_59 + ((A & B) | (C & (A | B))) + w[3];
+	A = RL(A, 30);
+	w[4] = w[4] ^ w[6] ^ w[12] ^ w[1];
+	w[4] = RL(w[4], 1);
+	C += RL(D, 5) + K40_59 + ((E & A) | (B & (E | A))) + w[4];
+	E = RL(E, 30);
+	w[5] = w[5] ^ w[7] ^ w[13] ^ w[2];
+	w[5] = RL(w[5], 1);
+	B += RL(C, 5) + K40_59 + ((D & E) | (A & (D | E))) + w[5];
+	D = RL(D, 30);
+	w[6] = w[6] ^ w[8] ^ w[14] ^ w[3];
+	w[6] = RL(w[6], 1);
+	A += RL(B, 5) + K40_59 + ((C & D) | (E & (C | D))) + w[6];
+	C = RL(C, 30);
+	w[7] = w[7] ^ w[9] ^ w[15] ^ w[4];
+	w[7] = RL(w[7], 1);
+	E += RL(A, 5) + K40_59 + ((B & C) | (D & (B | C))) + w[7];
+	B = RL(B, 30);
+	w[8] = w[8] ^ w[10] ^ w[0] ^ w[5];
+	w[8] = RL(w[8], 1);
+	D += RL(E, 5) + K40_59 + ((A & B) | (C & (A | B))) + w[8];
+	A = RL(A, 30);
+	w[9] = w[9] ^ w[11] ^ w[1] ^ w[6];
+	w[9] = RL(w[9], 1);
+	C += RL(D, 5) + K40_59 + ((E & A) | (B & (E | A))) + w[9];
+	E = RL(E, 30);
+	w[10] = w[10] ^ w[12] ^ w[2] ^ w[7];
+	w[10] = RL(w[10], 1);
+	B += RL(C, 5) + K40_59 + ((D & E) | (A & (D | E))) + w[10];
+	D = RL(D, 30);
+	w[11] = w[11] ^ w[13] ^ w[3] ^ w[8];
+	w[11] = RL(w[11], 1);
+	A += RL(B, 5) + K40_59 + ((C & D) | (E & (C | D))) + w[11];
+	C = RL(C, 30);
+
+	/* rounds 60 - 79 */
+
+	w[12] = w[12] ^ w[14] ^ w[4] ^ w[9];
+	w[12] = RL(w[12], 1);
+	E += RL(A, 5) + K60_79 + (B ^ C ^ D) + w[12];
+	B = RL(B, 30);
+	w[13] = w[13] ^ w[15] ^ w[5] ^ w[10];
+	w[13] = RL(w[13], 1);
+	D += RL(E, 5) + K60_79 + (A ^ B ^ C) + w[13];
+	A = RL(A, 30);
+	w[14] = w[14] ^ w[0] ^ w[6] ^ w[11];
+	w[14] = RL(w[14], 1);
+	C += RL(D, 5) + K60_79 + (E ^ A ^ B) + w[14];
+	E = RL(E, 30);
+	w[15] = w[15] ^ w[1] ^ w[7] ^ w[12];
+	w[15] = RL(w[15], 1);
+	B += RL(C, 5) + K60_79 + (D ^ E ^ A) + w[15];
+	D = RL(D, 30);
+	w[0] = w[0] ^ w[2] ^ w[8] ^ w[13];
+	w[0] = RL(w[0], 1);
+	A += RL(B, 5) + K60_79 + (C ^ D ^ E) + w[0];
+	C = RL(C, 30);
+	w[1] = w[1] ^ w[3] ^ w[9] ^ w[14];
+	w[1] = RL(w[1], 1);
+	E += RL(A, 5) + K60_79 + (B ^ C ^ D) + w[1];
+	B = RL(B, 30);
+	w[2] = w[2] ^ w[4] ^ w[10] ^ w[15];
+	w[2] = RL(w[2], 1);
+	D += RL(E, 5) + K60_79 + (A ^ B ^ C) + w[2];
+	A = RL(A, 30);
+	w[3] = w[3] ^ w[5] ^ w[11] ^ w[0];
+	w[3] = RL(w[3], 1);
+	C += RL(D, 5) + K60_79 + (E ^ A ^ B) + w[3];
+	E = RL(E, 30);
+	w[4] = w[4] ^ w[6] ^ w[12] ^ w[1];
+	w[4] = RL(w[4], 1);
+	B += RL(C, 5) + K60_79 + (D ^ E ^ A) + w[4];
+	D = RL(D, 30);
+	w[5] = w[5] ^ w[7] ^ w[13] ^ w[2];
+	w[5] = RL(w[5], 1);
+	A += RL(B, 5) + K60_79 + (C ^ D ^ E) + w[5];
+	C = RL(C, 30);
+	w[6] = w[6] ^ w[8] ^ w[14] ^ w[3];
+	w[6] = RL(w[6], 1);
+	E += RL(A, 5) + K60_79 + (B ^ C ^ D) + w[6];
+	B = RL(B, 30);
+	w[7] = w[7] ^ w[9] ^ w[15] ^ w[4];
+	w[7] = RL(w[7], 1);
+	D += RL(E, 5) + K60_79 + (A ^ B ^ C) + w[7];
+	A = RL(A, 30);
+	w[8] = w[8] ^ w[10] ^ w[0] ^ w[5];
+	w[8] = RL(w[8], 1);
+	C += RL(D, 5) + K60_79 + (E ^ A ^ B) + w[8];
+	E = RL(E, 30);
+	w[9] = w[9] ^ w[11] ^ w[1] ^ w[6];
+	w[9] = RL(w[9], 1);
+	B += RL(C, 5) + K60_79 + (D ^ E ^ A) + w[9];
+	D = RL(D, 30);
+	w[10] = w[10] ^ w[12] ^ w[2] ^ w[7];
+	w[10] = RL(w[10], 1);
+	A += RL(B, 5) + K60_79 + (C ^ D ^ E) + w[10];
+	C = RL(C, 30);
+	w[11] = w[11] ^ w[13] ^ w[3] ^ w[8];
+	w[11] = RL(w[11], 1);
+	E += RL(A, 5) + K60_79 + (B ^ C ^ D) + w[11];
+	B = RL(B, 30);
+	w[12] = w[12] ^ w[14] ^ w[4] ^ w[9];
+	w[12] = RL(w[12], 1);
+	D += RL(E, 5) + K60_79 + (A ^ B ^ C) + w[12];
+	A = RL(A, 30);
+	w[13] = w[13] ^ w[15] ^ w[5] ^ w[10];
+	C += RL(D, 5) + K60_79 + (E ^ A ^ B) + RL(w[13], 1);
+	E = RL(E, 30);
+	w[14] = w[14] ^ w[0] ^ w[6] ^ w[11];
+	B += RL(C, 5) + K60_79 + (D ^ E ^ A) + RL(w[14], 1);
+	D = RL(D, 30);
+
+	/* update H0 - H4 */
+
+	w[15] = w[15] ^ w[1] ^ w[7] ^ w[12];
+	H0 += A + RL(B, 5) + K60_79 + (C ^ D ^ E) + RL(w[15], 1);
+	H1 += B;
+	H2 += RL(C, 30);
+	H3 += D;
+	H4 += E;
+
+	/* clear temp variables */
+
+	A = B = C = D = E = 0;
+	memset(w, 0, sizeof(w));
+}
+
+/* ntru_crypto_sha1()
+ *
+ * This routine provides all operations for a SHA-1 hash, and the use
+ * of SHA-1 for DSA signing and key generation.
+ * It may be used to initialize, update, or complete a message digest,
+ * or any combination of those actions, as determined by the SHA_INIT flag,
+ * the in_len parameter, and the SHA_FINISH flag, respectively.
+ *
+ * When in_len == 0 (no data to hash), the parameter, in, may be NULL.
+ * When the SHA_FINISH flag is not set, the parameter, md, may be NULL.
+ *
+ * Initialization may be standard or use a specified initialization vector,
+ * and is indicated by setting the SHA_INIT flag.
+ * Setting init = NULL specifies standard initialization.  Otherwise, init
+ * points to the array of five alternate initialization 32-bit words.
+ *
+ * The hash operation can be updated with any number of input bytes, including
+ * zero.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if  inappropriate NULL pointers are passed.
+ * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed.
+ */
+
+uint32_t
+ntru_crypto_sha1(
+    NTRU_CRYPTO_SHA1_CTX *c, /* in/out - pointer to SHA-1 context */
+    uint32_t const *init,    /*     in - pointer to alternate */
+                             /*          initialization - may be NULL */
+    uint8_t const *in,       /*     in - pointer to input data -
+                                                may be NULL if in_len == 0 */
+    uint32_t in_len,         /*     in - number of input data bytes */
+    uint32_t flags,          /*     in - INIT, FINISH flags */
+    uint8_t *md)             /*    out - address for message digest -
+                                                may be NULL if not FINISH */
+{
+	uint32_t in_blk[16]; /* input block */
+	uint32_t space;
+	uint8_t *d = NULL;
+
+	/* check error conditions */
+
+	if (!c || (in_len && !in) || ((flags & SHA_FINISH) && !md)) {
+		SHA_RET(SHA_BAD_PARAMETER)
+	}
+
+	/* initialize context if requested */
+
+	if (flags & SHA_INIT) {
+
+		/* init chaining state */
+
+		if (!init) {
+			c->state[0] = H0_INIT; /* standard initialization */
+			c->state[1] = H1_INIT;
+			c->state[2] = H2_INIT;
+			c->state[3] = H3_INIT;
+			c->state[4] = H4_INIT;
+		} else {
+			/* Non standard initialization values are not supported */
+			SHA_RET(SHA_BAD_PARAMETER);
+		}
+
+		/* init bit count and number of unhashed data bytes */
+
+		c->num_bits_hashed[0] = 0;
+		c->num_bits_hashed[1] = 0;
+		c->unhashed_len = 0;
+	}
+
+	/* determine space left in unhashed data buffer */
+
+	if (c->unhashed_len > 63) {
+		SHA_RET(SHA_FAIL)
+	}
+
+	space = 64 - c->unhashed_len;
+
+	/* process input if it exists */
+
+	if (in_len) {
+
+		/* update count of bits hashed */
+
+		{
+			uint32_t bits0, bits1;
+
+			bits0 = in_len << 3;
+			bits1 = in_len >> 29;
+
+			if ((c->num_bits_hashed[0] += bits0) < bits0) {
+				bits1++;
+			}
+
+			if ((c->num_bits_hashed[1] += bits1) < bits1) {
+				memset((uint8_t *) c, 0, sizeof(NTRU_CRYPTO_SHA1_CTX));
+				memset((char *) in_blk, 0, sizeof(in_blk));
+				SHA_RET(SHA_OVERFLOW)
+			}
+		}
+
+		/* process input bytes */
+
+		if (in_len < space) {
+
+			/* input does not fill block buffer:
+             * add input to buffer
+             */
+
+			memcpy(c->unhashed + c->unhashed_len, in, in_len);
+			c->unhashed_len += in_len;
+
+		} else {
+			uint32_t blks;
+
+			/* input will fill block buffer:
+             *  fill unhashed data buffer,
+             *  convert to block buffer,
+             *  and process block
+             */
+
+			in_len -= space;
+
+			for (d = c->unhashed + c->unhashed_len; space; space--) {
+				*d++ = *in++;
+			}
+
+			ntru_crypto_msbyte_2_uint32(in_blk, (uint8_t const *) c->unhashed,
+			                            16);
+			sha1_blk((uint32_t const *) in_blk, c->state);
+
+			/* process any remaining full blocks */
+
+			for (blks = in_len >> 6; blks--; in += 64) {
+				ntru_crypto_msbyte_2_uint32(in_blk, in, 16);
+				sha1_blk((uint32_t const *) in_blk, c->state);
+			}
+
+			/* put any remaining input in the unhashed data buffer */
+
+			in_len &= 0x3f;
+			memcpy(c->unhashed, in, in_len);
+			c->unhashed_len = in_len;
+		}
+	}
+
+	/* complete message digest if requested */
+
+	if (flags & SHA_FINISH) {
+		space = 64 - c->unhashed_len;
+
+		/* add 0x80 padding byte to the unhashed data buffer
+         * (there is always space since the buffer can't be full)
+         */
+
+		d = c->unhashed + c->unhashed_len;
+		*d++ = 0x80;
+		space--;
+
+		/* check for space for bit count */
+
+		if (space < 8) {
+
+			/* no space for count:
+             *  fill remainder of unhashed data buffer with zeros,
+             *  convert to input block,
+             *  process block,
+             *  fill all but 8 bytes of unhashed data buffer with zeros
+             */
+
+			memset(d, 0, space);
+			ntru_crypto_msbyte_2_uint32(in_blk,
+			                            (uint8_t const *) c->unhashed, 16);
+			sha1_blk((uint32_t const *) in_blk, c->state);
+			memset(c->unhashed, 0, 56);
+
+		} else {
+
+			/* fill unhashed data buffer with zeros,
+             *  leaving space for bit count
+             */
+
+			for (space -= 8; space; space--) {
+				*d++ = 0;
+			}
+		}
+
+		/* convert partially filled unhashed data buffer to input block and
+         *  add bit count to input block
+         */
+
+		ntru_crypto_msbyte_2_uint32(in_blk, (uint8_t const *) c->unhashed,
+		                            14);
+		in_blk[14] = c->num_bits_hashed[1];
+		in_blk[15] = c->num_bits_hashed[0];
+
+		/* process last block */
+
+		sha1_blk((uint32_t const *) in_blk, c->state);
+
+		/* copy result to message digest buffer */
+
+		ntru_crypto_uint32_2_msbyte(md, c->state, 5);
+
+		/* clear context and stack variables */
+
+		memset((uint8_t *) c, 0, sizeof(NTRU_CRYPTO_SHA1_CTX));
+		memset((char *) in_blk, 0, sizeof(in_blk));
+	}
+
+	SHA_RET(SHA_OK)
+}
+
+/* ntru_crypto_sha1_init
+ *
+ * This routine performs standard initialization of the SHA-1 state.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ */
+
+uint32_t
+ntru_crypto_sha1_init(
+    NTRU_CRYPTO_SHA1_CTX *c) /* in/out - pointer to SHA-1 context */
+{
+	return ntru_crypto_sha1(c, NULL, NULL, 0, SHA_INIT, NULL);
+}
+
+/* ntru_crypto_sha1_update
+ *
+ * This routine processes input data and updates the SHA-1 hash calculation.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed.
+ */
+
+uint32_t
+ntru_crypto_sha1_update(
+    NTRU_CRYPTO_SHA1_CTX *c, /* in/out - pointer to SHA-1 context */
+    uint8_t const *data,     /*    in - pointer to input data */
+    uint32_t data_len)       /*    in - number of bytes of input data */
+{
+	return ntru_crypto_sha1(c, NULL, data, data_len, SHA_DATA_ONLY, NULL);
+}
+
+/* ntru_crypto_sha1_final
+ *
+ * This routine completes the SHA-1 hash calculation and returns the
+ * message digest.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed.
+ */
+
+uint32_t
+ntru_crypto_sha1_final(
+    NTRU_CRYPTO_SHA1_CTX *c, /* in/out - pointer to SHA-1 context */
+    uint8_t *md)             /*   out - address for message digest */
+{
+	return ntru_crypto_sha1(c, NULL, NULL, 0, SHA_FINISH, md);
+}
+
+/* ntru_crypto_sha1_digest
+ *
+ * This routine computes a SHA-1 message digest.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed.
+ */
+
+uint32_t
+ntru_crypto_sha1_digest(
+    uint8_t const *data, /*  in - pointer to input data */
+    uint32_t data_len,   /*  in - number of bytes of input data */
+    uint8_t *md)         /* out - address for message digest */
+{
+	NTRU_CRYPTO_SHA1_CTX c;
+
+	return ntru_crypto_sha1(&c, NULL, data, data_len, SHA_INIT | SHA_FINISH, md);
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_sha1.h b/crypt/liboqs/kex_ntru/ntru_crypto_sha1.h
new file mode 100644
index 0000000000000000000000000000000000000000..4a11de74ed61811fd643f95bd1c1cdaeb94705f7
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_sha1.h
@@ -0,0 +1,163 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_sha1.h
+ *
+ * Contents: Definitions and declarations for the SHA-1 implementation.
+ *
+ *****************************************************************************/
+
+#ifndef NTRU_CRYPTO_SHA1_H
+#define NTRU_CRYPTO_SHA1_H
+
+#include "ntru_crypto_platform.h"
+#include "ntru_crypto_sha.h"
+
+/******************************************
+ * macros needed for generic hash objects * 
+ ******************************************/
+
+#define SHA_1_CTX_LEN sizeof(SHA1_CTX)           /* no. bytes in SHA-1 ctx */
+#define SHA_1_BLK_LEN 64                         /* 64 bytes in input block */
+#define SHA_1_MD_LEN 20                          /* 20 bytes in msg digest */
+#define SHA_1_INIT_FN &ntru_crypto_sha1_init     /* init function */
+#define SHA_1_UPDATE_FN &ntru_crypto_sha1_update /* update function */
+#define SHA_1_FINAL_FN &ntru_crypto_sha1_final   /* final function */
+#define SHA_1_DIGEST_FN &ntru_crypto_sha1_digest /* digest function */
+
+/*************************
+ * structure definitions *
+ *************************/
+
+/* SHA-1 context structure */
+
+typedef struct {
+	uint32_t state[5];           /* chaining state */
+	uint32_t num_bits_hashed[2]; /* number of bits hashed */
+	uint8_t unhashed[64];        /* input data not yet hashed */
+	uint32_t unhashed_len;       /* number of bytes of unhashed input data */
+} NTRU_CRYPTO_SHA1_CTX;
+
+/*************************
+ * function declarations *
+ *************************/
+
+/* ntru_crypto_sha1()
+ *
+ * This routine provides all operations for a SHA-1 hash, and the use
+ * of SHA-1 for DSA signing and key generation.
+ * It may be used to initialize, update, or complete a message digest,
+ * or any combination of those actions, as determined by the SHA_INIT flag,
+ * the in_len parameter, and the SHA_FINISH flag, respectively.
+ *
+ * When in_len == 0 (no data to hash), the parameter, in, may be NULL.
+ * When the SHA_FINISH flag is not set, the parameter, md, may be NULL.
+ *
+ * Initialization may be standard or use a specified initialization vector,
+ * and is indicated by setting the SHA_INIT flag.
+ * Setting init = NULL specifies standard initialization.  Otherwise, init
+ * points to the array of five alternate initialization 32-bit words.
+ *
+ * The hash operation can be updated with any number of input bytes, including
+ * zero.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed.
+ */
+
+extern uint32_t
+ntru_crypto_sha1(
+    NTRU_CRYPTO_SHA1_CTX *c, /* in/out - pointer to SHA-1 context */
+    uint32_t const *init,    /*     in - pointer to alternate */
+                             /*          initialization - may be NULL */
+    uint8_t const *in,       /*     in - pointer to input data -
+                                                may be NULL if in_len == 0 */
+    uint32_t in_len,         /*     in - number of input data bytes */
+    uint32_t flags,          /*     in - INIT, FINISH */
+    uint8_t *md);            /*    out - address for message digest -
+                                                may be NULL if not FINISH */
+
+/* ntru_crypto_sha1_init
+ *
+ * This routine performs standard initialization of the SHA-1 state.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ */
+
+extern uint32_t
+ntru_crypto_sha1_init(
+    NTRU_CRYPTO_SHA1_CTX *c); /* in/out - pointer to SHA-1 context */
+
+/* ntru_crypto_sha1_update
+ *
+ * This routine processes input data and updates the SHA-1 hash calculation.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed.
+ */
+
+extern uint32_t
+ntru_crypto_sha1_update(
+    NTRU_CRYPTO_SHA1_CTX *c, /* in/out - pointer to SHA-1 context */
+    uint8_t const *data,     /*    in - pointer to input data */
+    uint32_t data_len);      /*    in - number of bytes of input data */
+
+/* ntru_crypto_sha1_final
+ *
+ * This routine completes the SHA-1 hash calculation and returns the
+ * message digest.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed.
+ */
+
+extern uint32_t
+ntru_crypto_sha1_final(
+    NTRU_CRYPTO_SHA1_CTX *c, /* in/out - pointer to SHA-1 context */
+    uint8_t *md);            /*   out - address for message digest */
+
+/* ntru_crypto_sha1_digest
+ *
+ * This routine computes a SHA-1 message digest.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed.
+ */
+
+uint32_t
+ntru_crypto_sha1_digest(
+    uint8_t const *data, /*  in - pointer to input data */
+    uint32_t data_len,   /*  in - number of bytes of input data */
+    uint8_t *md);        /* out - address for message digest */
+
+#endif /* NTRU_CRYPTO_SHA1_H */
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_sha2.c b/crypt/liboqs/kex_ntru/ntru_crypto_sha2.c
new file mode 100644
index 0000000000000000000000000000000000000000..8dc54a311bcedebf91bc17f6c7b0a129ceadfda3
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_sha2.c
@@ -0,0 +1,570 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_sha2.c
+ *
+ * Contents: Routines implementing the SHA-256 hash calculation.
+ *
+ *****************************************************************************/
+
+#include "ntru_crypto.h"
+#include "ntru_crypto_sha2.h"
+#include "ntru_crypto_msbyte_uint32.h"
+
+/* chaining state elements */
+
+#define H0 state[0]
+#define H1 state[1]
+#define H2 state[2]
+#define H3 state[3]
+#define H4 state[4]
+#define H5 state[5]
+#define H6 state[6]
+#define H7 state[7]
+
+/* standard SHA-256 initialization values */
+
+#define H0_SHA256_INIT 0x6a09e667UL
+#define H1_SHA256_INIT 0xbb67ae85UL
+#define H2_SHA256_INIT 0x3c6ef372UL
+#define H3_SHA256_INIT 0xa54ff53aUL
+#define H4_SHA256_INIT 0x510e527fUL
+#define H5_SHA256_INIT 0x9b05688cUL
+#define H6_SHA256_INIT 0x1f83d9abUL
+#define H7_SHA256_INIT 0x5be0cd19UL
+
+/* sha2_blk()
+ *
+ * This routine updates the current hash output (chaining state)
+ * by performing SHA-256 on a 512-bit block of data represented
+ * as sixteen 32-bit words.
+ */
+
+#define RR(a, n) (((a) >> (n)) | ((a) << (32 - (n))))
+#define S0(a) (RR((a), 2) ^ RR((a), 13) ^ RR((a), 22))
+#define S1(a) (RR((a), 6) ^ RR((a), 11) ^ RR((a), 25))
+#define s0(a) (RR((a), 7) ^ RR((a), 18) ^ ((a) >> 3))
+#define s1(a) (RR((a), 17) ^ RR((a), 19) ^ ((a) >> 10))
+
+static void
+sha2_blk(
+    uint32_t const *data, /*     in - ptr to 16 32-bit word input block */
+    uint32_t *state)      /* in/out - ptr to 8 32-bit word chaining state */
+{
+	uint32_t A, B, C, D, E, F, G, H;
+	uint32_t w[16];
+
+	/* init A - H */
+
+	A = H0;
+	B = H1;
+	C = H2;
+	D = H3;
+	E = H4;
+	F = H5;
+	G = H6;
+	H = H7;
+
+	/* rounds 0 - 15 */
+
+	H += S1(E) + ((E & (F ^ G)) ^ G) + 0x428A2F98UL + data[0];
+	D += H;
+	H += S0(A) + ((A & B) | (C & (A | B)));
+	G += S1(D) + ((D & (E ^ F)) ^ F) + 0x71374491UL + data[1];
+	C += G;
+	G += S0(H) + ((H & A) | (B & (H | A)));
+	F += S1(C) + ((C & (D ^ E)) ^ E) + 0xB5C0FBCFUL + data[2];
+	B += F;
+	F += S0(G) + ((G & H) | (A & (G | H)));
+	E += S1(B) + ((B & (C ^ D)) ^ D) + 0xE9B5DBA5UL + data[3];
+	A += E;
+	E += S0(F) + ((F & G) | (H & (F | G)));
+	D += S1(A) + ((A & (B ^ C)) ^ C) + 0x3956C25BUL + data[4];
+	H += D;
+	D += S0(E) + ((E & F) | (G & (E | F)));
+	C += S1(H) + ((H & (A ^ B)) ^ B) + 0x59F111F1UL + data[5];
+	G += C;
+	C += S0(D) + ((D & E) | (F & (D | E)));
+	B += S1(G) + ((G & (H ^ A)) ^ A) + 0x923F82A4UL + data[6];
+	F += B;
+	B += S0(C) + ((C & D) | (E & (C | D)));
+	A += S1(F) + ((F & (G ^ H)) ^ H) + 0xAB1C5ED5UL + data[7];
+	E += A;
+	A += S0(B) + ((B & C) | (D & (B | C)));
+	H += S1(E) + ((E & (F ^ G)) ^ G) + 0xD807AA98UL + data[8];
+	D += H;
+	H += S0(A) + ((A & B) | (C & (A | B)));
+	G += S1(D) + ((D & (E ^ F)) ^ F) + 0x12835B01UL + data[9];
+	C += G;
+	G += S0(H) + ((H & A) | (B & (H | A)));
+	F += S1(C) + ((C & (D ^ E)) ^ E) + 0x243185BEUL + data[10];
+	B += F;
+	F += S0(G) + ((G & H) | (A & (G | H)));
+	E += S1(B) + ((B & (C ^ D)) ^ D) + 0x550C7DC3UL + data[11];
+	A += E;
+	E += S0(F) + ((F & G) | (H & (F | G)));
+	D += S1(A) + ((A & (B ^ C)) ^ C) + 0x72BE5D74UL + data[12];
+	H += D;
+	D += S0(E) + ((E & F) | (G & (E | F)));
+	C += S1(H) + ((H & (A ^ B)) ^ B) + 0x80DEB1FEUL + data[13];
+	G += C;
+	C += S0(D) + ((D & E) | (F & (D | E)));
+	B += S1(G) + ((G & (H ^ A)) ^ A) + 0x9BDC06A7UL + data[14];
+	F += B;
+	B += S0(C) + ((C & D) | (E & (C | D)));
+	A += S1(F) + ((F & (G ^ H)) ^ H) + 0xC19BF174UL + data[15];
+	E += A;
+	A += S0(B) + ((B & C) | (D & (B | C)));
+
+	/* rounds 16 - 63 */
+
+	w[0] = data[0] + s0(data[1]) + data[9] + s1(data[14]);
+	H += S1(E) + ((E & (F ^ G)) ^ G) + 0xE49B69C1UL + w[0];
+	D += H;
+	H += S0(A) + ((A & B) | (C & (A | B)));
+	w[1] = data[1] + s0(data[2]) + data[10] + s1(data[15]);
+	G += S1(D) + ((D & (E ^ F)) ^ F) + 0xEFBE4786UL + w[1];
+	C += G;
+	G += S0(H) + ((H & A) | (B & (H | A)));
+	w[2] = data[2] + s0(data[3]) + data[11] + s1(w[0]);
+	F += S1(C) + ((C & (D ^ E)) ^ E) + 0x0FC19DC6UL + w[2];
+	B += F;
+	F += S0(G) + ((G & H) | (A & (G | H)));
+	w[3] = data[3] + s0(data[4]) + data[12] + s1(w[1]);
+	E += S1(B) + ((B & (C ^ D)) ^ D) + 0x240CA1CCUL + w[3];
+	A += E;
+	E += S0(F) + ((F & G) | (H & (F | G)));
+	w[4] = data[4] + s0(data[5]) + data[13] + s1(w[2]);
+	D += S1(A) + ((A & (B ^ C)) ^ C) + 0x2DE92C6FUL + w[4];
+	H += D;
+	D += S0(E) + ((E & F) | (G & (E | F)));
+	w[5] = data[5] + s0(data[6]) + data[14] + s1(w[3]);
+	C += S1(H) + ((H & (A ^ B)) ^ B) + 0x4A7484AAUL + w[5];
+	G += C;
+	C += S0(D) + ((D & E) | (F & (D | E)));
+	w[6] = data[6] + s0(data[7]) + data[15] + s1(w[4]);
+	B += S1(G) + ((G & (H ^ A)) ^ A) + 0x5CB0A9DCUL + w[6];
+	F += B;
+	B += S0(C) + ((C & D) | (E & (C | D)));
+	w[7] = data[7] + s0(data[8]) + w[0] + s1(w[5]);
+	A += S1(F) + ((F & (G ^ H)) ^ H) + 0x76F988DAUL + w[7];
+	E += A;
+	A += S0(B) + ((B & C) | (D & (B | C)));
+	w[8] = data[8] + s0(data[9]) + w[1] + s1(w[6]);
+	H += S1(E) + ((E & (F ^ G)) ^ G) + 0x983E5152UL + w[8];
+	D += H;
+	H += S0(A) + ((A & B) | (C & (A | B)));
+	w[9] = data[9] + s0(data[10]) + w[2] + s1(w[7]);
+	G += S1(D) + ((D & (E ^ F)) ^ F) + 0xA831C66DUL + w[9];
+	C += G;
+	G += S0(H) + ((H & A) | (B & (H | A)));
+	w[10] = data[10] + s0(data[11]) + w[3] + s1(w[8]);
+	F += S1(C) + ((C & (D ^ E)) ^ E) + 0xB00327C8UL + w[10];
+	B += F;
+	F += S0(G) + ((G & H) | (A & (G | H)));
+	w[11] = data[11] + s0(data[12]) + w[4] + s1(w[9]);
+	E += S1(B) + ((B & (C ^ D)) ^ D) + 0xBF597FC7UL + w[11];
+	A += E;
+	E += S0(F) + ((F & G) | (H & (F | G)));
+	w[12] = data[12] + s0(data[13]) + w[5] + s1(w[10]);
+	D += S1(A) + ((A & (B ^ C)) ^ C) + 0xC6E00BF3UL + w[12];
+	H += D;
+	D += S0(E) + ((E & F) | (G & (E | F)));
+	w[13] = data[13] + s0(data[14]) + w[6] + s1(w[11]);
+	C += S1(H) + ((H & (A ^ B)) ^ B) + 0xD5A79147UL + w[13];
+	G += C;
+	C += S0(D) + ((D & E) | (F & (D | E)));
+	w[14] = data[14] + s0(data[15]) + w[7] + s1(w[12]);
+	B += S1(G) + ((G & (H ^ A)) ^ A) + 0x06CA6351UL + w[14];
+	F += B;
+	B += S0(C) + ((C & D) | (E & (C | D)));
+	w[15] = data[15] + s0(w[0]) + w[8] + s1(w[13]);
+	A += S1(F) + ((F & (G ^ H)) ^ H) + 0x14292967UL + w[15];
+	E += A;
+	A += S0(B) + ((B & C) | (D & (B | C)));
+	w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
+	H += S1(E) + ((E & (F ^ G)) ^ G) + 0x27B70A85UL + w[0];
+	D += H;
+	H += S0(A) + ((A & B) | (C & (A | B)));
+	w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
+	G += S1(D) + ((D & (E ^ F)) ^ F) + 0x2E1B2138UL + w[1];
+	C += G;
+	G += S0(H) + ((H & A) | (B & (H | A)));
+	w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]);
+	F += S1(C) + ((C & (D ^ E)) ^ E) + 0x4D2C6DFCUL + w[2];
+	B += F;
+	F += S0(G) + ((G & H) | (A & (G | H)));
+	w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]);
+	E += S1(B) + ((B & (C ^ D)) ^ D) + 0x53380D13UL + w[3];
+	A += E;
+	E += S0(F) + ((F & G) | (H & (F | G)));
+	w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]);
+	D += S1(A) + ((A & (B ^ C)) ^ C) + 0x650A7354UL + w[4];
+	H += D;
+	D += S0(E) + ((E & F) | (G & (E | F)));
+	w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]);
+	C += S1(H) + ((H & (A ^ B)) ^ B) + 0x766A0ABBUL + w[5];
+	G += C;
+	C += S0(D) + ((D & E) | (F & (D | E)));
+	w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
+	B += S1(G) + ((G & (H ^ A)) ^ A) + 0x81C2C92EUL + w[6];
+	F += B;
+	B += S0(C) + ((C & D) | (E & (C | D)));
+	w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
+	A += S1(F) + ((F & (G ^ H)) ^ H) + 0x92722C85UL + w[7];
+	E += A;
+	A += S0(B) + ((B & C) | (D & (B | C)));
+	w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]);
+	H += S1(E) + ((E & (F ^ G)) ^ G) + 0xA2BFE8A1UL + w[8];
+	D += H;
+	H += S0(A) + ((A & B) | (C & (A | B)));
+	w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]);
+	G += S1(D) + ((D & (E ^ F)) ^ F) + 0xA81A664BUL + w[9];
+	C += G;
+	G += S0(H) + ((H & A) | (B & (H | A)));
+	w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]);
+	F += S1(C) + ((C & (D ^ E)) ^ E) + 0xC24B8B70UL + w[10];
+	B += F;
+	F += S0(G) + ((G & H) | (A & (G | H)));
+	w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]);
+	E += S1(B) + ((B & (C ^ D)) ^ D) + 0xC76C51A3UL + w[11];
+	A += E;
+	E += S0(F) + ((F & G) | (H & (F | G)));
+	w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]);
+	D += S1(A) + ((A & (B ^ C)) ^ C) + 0xD192E819UL + w[12];
+	H += D;
+	D += S0(E) + ((E & F) | (G & (E | F)));
+	w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]);
+	C += S1(H) + ((H & (A ^ B)) ^ B) + 0xD6990624UL + w[13];
+	G += C;
+	C += S0(D) + ((D & E) | (F & (D | E)));
+	w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
+	B += S1(G) + ((G & (H ^ A)) ^ A) + 0xF40E3585UL + w[14];
+	F += B;
+	B += S0(C) + ((C & D) | (E & (C | D)));
+	w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
+	A += S1(F) + ((F & (G ^ H)) ^ H) + 0x106AA070UL + w[15];
+	E += A;
+	A += S0(B) + ((B & C) | (D & (B | C)));
+	w[0] = w[0] + s0(w[1]) + w[9] + s1(w[14]);
+	H += S1(E) + ((E & (F ^ G)) ^ G) + 0x19A4C116UL + w[0];
+	D += H;
+	H += S0(A) + ((A & B) | (C & (A | B)));
+	w[1] = w[1] + s0(w[2]) + w[10] + s1(w[15]);
+	G += S1(D) + ((D & (E ^ F)) ^ F) + 0x1E376C08UL + w[1];
+	C += G;
+	G += S0(H) + ((H & A) | (B & (H | A)));
+	w[2] = w[2] + s0(w[3]) + w[11] + s1(w[0]);
+	F += S1(C) + ((C & (D ^ E)) ^ E) + 0x2748774CUL + w[2];
+	B += F;
+	F += S0(G) + ((G & H) | (A & (G | H)));
+	w[3] = w[3] + s0(w[4]) + w[12] + s1(w[1]);
+	E += S1(B) + ((B & (C ^ D)) ^ D) + 0x34B0BCB5UL + w[3];
+	A += E;
+	E += S0(F) + ((F & G) | (H & (F | G)));
+	w[4] = w[4] + s0(w[5]) + w[13] + s1(w[2]);
+	D += S1(A) + ((A & (B ^ C)) ^ C) + 0x391C0CB3UL + w[4];
+	H += D;
+	D += S0(E) + ((E & F) | (G & (E | F)));
+	w[5] = w[5] + s0(w[6]) + w[14] + s1(w[3]);
+	C += S1(H) + ((H & (A ^ B)) ^ B) + 0x4ED8AA4AUL + w[5];
+	G += C;
+	C += S0(D) + ((D & E) | (F & (D | E)));
+	w[6] = w[6] + s0(w[7]) + w[15] + s1(w[4]);
+	B += S1(G) + ((G & (H ^ A)) ^ A) + 0x5B9CCA4FUL + w[6];
+	F += B;
+	B += S0(C) + ((C & D) | (E & (C | D)));
+	w[7] = w[7] + s0(w[8]) + w[0] + s1(w[5]);
+	A += S1(F) + ((F & (G ^ H)) ^ H) + 0x682E6FF3UL + w[7];
+	E += A;
+	A += S0(B) + ((B & C) | (D & (B | C)));
+	w[8] = w[8] + s0(w[9]) + w[1] + s1(w[6]);
+	H += S1(E) + ((E & (F ^ G)) ^ G) + 0x748F82EEUL + w[8];
+	D += H;
+	H += S0(A) + ((A & B) | (C & (A | B)));
+	w[9] = w[9] + s0(w[10]) + w[2] + s1(w[7]);
+	G += S1(D) + ((D & (E ^ F)) ^ F) + 0x78A5636FUL + w[9];
+	C += G;
+	G += S0(H) + ((H & A) | (B & (H | A)));
+	w[10] = w[10] + s0(w[11]) + w[3] + s1(w[8]);
+	F += S1(C) + ((C & (D ^ E)) ^ E) + 0x84C87814UL + w[10];
+	B += F;
+	F += S0(G) + ((G & H) | (A & (G | H)));
+	w[11] = w[11] + s0(w[12]) + w[4] + s1(w[9]);
+	E += S1(B) + ((B & (C ^ D)) ^ D) + 0x8CC70208UL + w[11];
+	A += E;
+	E += S0(F) + ((F & G) | (H & (F | G)));
+	w[12] = w[12] + s0(w[13]) + w[5] + s1(w[10]);
+	D += S1(A) + ((A & (B ^ C)) ^ C) + 0x90BEFFFAUL + w[12];
+	H += D;
+	D += S0(E) + ((E & F) | (G & (E | F)));
+	w[13] = w[13] + s0(w[14]) + w[6] + s1(w[11]);
+	C += S1(H) + ((H & (A ^ B)) ^ B) + 0xA4506CEBUL + w[13];
+	G += C;
+	C += S0(D) + ((D & E) | (F & (D | E)));
+	w[14] = w[14] + s0(w[15]) + w[7] + s1(w[12]);
+	B += S1(G) + ((G & (H ^ A)) ^ A) + 0xBEF9A3F7UL + w[14];
+	F += B;
+	B += S0(C) + ((C & D) | (E & (C | D)));
+	w[15] = w[15] + s0(w[0]) + w[8] + s1(w[13]);
+	A += S1(F) + ((F & (G ^ H)) ^ H) + 0xC67178F2UL + w[15];
+	E += A;
+	A += S0(B) + ((B & C) | (D & (B | C)));
+
+	/* update H0 - H7 */
+
+	H0 += A;
+	H1 += B;
+	H2 += C;
+	H3 += D;
+	H4 += E;
+	H5 += F;
+	H6 += G;
+	H7 += H;
+
+	/* clear temp variables */
+
+	A = B = C = D = E = F = G = H = 0;
+	memset(w, 0, sizeof(w));
+}
+
+/* ntru_crypto_sha2()
+ *
+ * This routine provides all operations for a SHA-256 hash,
+ * and the use of SHA-256 for DSA signing and key generation.
+ * It may be used to initialize, update, or complete a message digest,
+ * or any combination of those actions, as determined by the SHA_INIT flag,
+ * the in_len parameter, and the SHA_FINISH flag, respectively.
+ *
+ * When in_len == 0 (no data to hash), the parameter, in, may be NULL.
+ * When the SHA_FINISH flag is not set, the parameter, md, may be NULL.
+ *
+ * Initialization may be standard or use a specified initialization vector,
+ * and is indicated by setting the SHA_INIT flag.
+ * Setting init = NULL specifies standard initialization.  Otherwise, init
+ * points to the array of eight alternate initialization 32-bit words.
+ *
+ * The hash operation can be updated with any number of input bytes, including
+ * zero.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if  inappropriate NULL pointers are passed.
+ * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed.
+ */
+
+uint32_t
+ntru_crypto_sha2(
+    NTRU_CRYPTO_HASH_ALGID algid, /*     in - hash algorithm ID */
+    NTRU_CRYPTO_SHA2_CTX *c,      /* in/out - pointer to SHA-2 context */
+    uint32_t const *init,         /*     in - pointer to alternate */
+                                  /*          initialization - may be NULL */
+    uint8_t const *in,            /*     in - pointer to input data - */
+                                  /*          may be NULL if in_len == 0 */
+    uint32_t in_len,              /*     in - number of input data bytes */
+    uint32_t flags,               /*     in - INIT, FINISH flags */
+    uint8_t *md)                  /*    out - address for message digest -
+                                     *          may be NULL if not FINISH */
+{
+	uint32_t in_blk[16]; /* input block */
+	uint32_t space;
+	uint8_t *d = NULL;
+
+	/* check error conditions */
+
+	if (algid != NTRU_CRYPTO_HASH_ALGID_SHA256) {
+		SHA_RET(SHA_BAD_PARAMETER)
+	}
+
+	if (!c || (in_len && !in) || ((flags & SHA_FINISH) && !md)) {
+		SHA_RET(SHA_BAD_PARAMETER)
+	}
+
+	/* initialize context if requested */
+
+	if (flags & SHA_INIT) {
+		/* init chaining state */
+
+		if (!init) /* standard initialization */
+		{
+
+			c->state[0] = H0_SHA256_INIT; /* standard SHA-256 init */
+			c->state[1] = H1_SHA256_INIT;
+			c->state[2] = H2_SHA256_INIT;
+			c->state[3] = H3_SHA256_INIT;
+			c->state[4] = H4_SHA256_INIT;
+			c->state[5] = H5_SHA256_INIT;
+			c->state[6] = H6_SHA256_INIT;
+			c->state[7] = H7_SHA256_INIT;
+
+		} else {
+			/* Support for SHA-224 etc is disabled */
+			SHA_RET(SHA_BAD_PARAMETER);
+		}
+
+		/* init bit count and number of unhashed data bytes */
+
+		c->num_bits_hashed[0] = 0;
+		c->num_bits_hashed[1] = 0;
+		c->unhashed_len = 0;
+	}
+
+	/* determine space left in unhashed data buffer */
+
+	if (c->unhashed_len > 63) {
+		SHA_RET(SHA_FAIL)
+	}
+
+	space = 64 - c->unhashed_len;
+
+	/* process input if it exists */
+
+	if (in_len) {
+		/* update count of bits hashed */
+
+		{
+			uint32_t bits0, bits1;
+
+			bits0 = in_len << 3;
+			bits1 = in_len >> 29;
+
+			if ((c->num_bits_hashed[0] += bits0) < bits0) {
+				bits1++;
+			}
+
+			if ((c->num_bits_hashed[1] += bits1) < bits1) {
+				memset((uint8_t *) c, 0, sizeof(NTRU_CRYPTO_SHA2_CTX));
+				memset((char *) in_blk, 0, sizeof(in_blk));
+				SHA_RET(SHA_OVERFLOW)
+			}
+		}
+
+		/* process input bytes */
+
+		if (in_len < space) {
+
+			/* input does not fill block buffer:
+             * add input to buffer
+             */
+
+			memcpy(c->unhashed + c->unhashed_len, in, in_len);
+			c->unhashed_len += in_len;
+
+		} else {
+			uint32_t blks;
+
+			/* input will fill block buffer:
+             *  fill unhashed data buffer,
+             *  convert to block buffer,
+             *  and process block
+             */
+
+			in_len -= space;
+
+			for (d = c->unhashed + c->unhashed_len; space; space--) {
+				*d++ = *in++;
+			}
+
+			ntru_crypto_msbyte_2_uint32(in_blk, (uint8_t const *) c->unhashed,
+			                            16);
+			sha2_blk((uint32_t const *) in_blk, c->state);
+
+			/* process any remaining full blocks */
+
+			for (blks = in_len >> 6; blks--; in += 64) {
+				ntru_crypto_msbyte_2_uint32(in_blk, in, 16);
+				sha2_blk((uint32_t const *) in_blk, c->state);
+			}
+
+			/* put any remaining input in the unhashed data buffer */
+
+			in_len &= 0x3f;
+			memcpy(c->unhashed, in, in_len);
+			c->unhashed_len = in_len;
+		}
+	}
+
+	/* complete message digest if requested */
+
+	if (flags & SHA_FINISH) {
+		space = 64 - c->unhashed_len;
+
+		/* add 0x80 padding byte to the unhashed data buffer
+         * (there is always space since the buffer can't be full)
+         */
+
+		d = c->unhashed + c->unhashed_len;
+		*d++ = 0x80;
+		space--;
+
+		/* check for space for bit count */
+
+		if (space < 8) {
+			/* no space for count:
+             *  fill remainder of unhashed data buffer with zeros,
+             *  convert to input block,
+             *  process block,
+             *  fill all but 8 bytes of unhashed data buffer with zeros
+             */
+
+			memset(d, 0, space);
+			ntru_crypto_msbyte_2_uint32(in_blk,
+			                            (uint8_t const *) c->unhashed, 16);
+			sha2_blk((uint32_t const *) in_blk, c->state);
+			memset(c->unhashed, 0, 56);
+
+		} else {
+			/* fill unhashed data buffer with zeros,
+             *  leaving space for bit count
+             */
+
+			for (space -= 8; space; space--) {
+				*d++ = 0;
+			}
+		}
+
+		/* convert partially filled unhashed data buffer to input block and
+         *  add bit count to input block
+         */
+
+		ntru_crypto_msbyte_2_uint32(in_blk, (uint8_t const *) c->unhashed,
+		                            14);
+		in_blk[14] = c->num_bits_hashed[1];
+		in_blk[15] = c->num_bits_hashed[0];
+
+		/* process last block */
+
+		sha2_blk((uint32_t const *) in_blk, c->state);
+
+		/* copy result to message digest buffer */
+
+		ntru_crypto_uint32_2_msbyte(md, c->state, 8);
+
+		/* clear context and stack variables */
+
+		memset((uint8_t *) c, 0, sizeof(NTRU_CRYPTO_SHA2_CTX));
+		memset((char *) in_blk, 0, sizeof(in_blk));
+	}
+
+	SHA_RET(SHA_OK)
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_sha2.h b/crypt/liboqs/kex_ntru/ntru_crypto_sha2.h
new file mode 100644
index 0000000000000000000000000000000000000000..b674adcbd59bce9a89e82e8f74f98a096bf08a6e
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_sha2.h
@@ -0,0 +1,91 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_sha2.h
+ *
+ * Contents: Definitions and declarations for the SHA-256 implementation.
+ *
+ *****************************************************************************/
+
+#ifndef NTRU_CRYPTO_SHA2_H
+#define NTRU_CRYPTO_SHA2_H
+
+#include "ntru_crypto_platform.h"
+#include "ntru_crypto_sha.h"
+
+/*************************
+ * structure definitions *
+ *************************/
+
+/* SHA-256 context structure */
+
+typedef struct {
+	uint32_t state[8];           /* chaining state */
+	uint32_t num_bits_hashed[2]; /* number of bits hashed */
+	uint8_t unhashed[64];        /* input data not yet hashed */
+	uint32_t unhashed_len;       /* number of bytes of unhashed input data */
+} NTRU_CRYPTO_SHA2_CTX;
+
+/*************************
+ * function declarations *
+ *************************/
+
+/* ntru_crypto_sha2()
+ *
+ * This routine provides all operations for a SHA-256 hash,
+ * and the use of SHA-256 for DSA signing and key generation.
+ * It may be used to initialize, update, or complete a message digest,
+ * or any combination of those actions, as determined by the SHA_INIT flag,
+ * the in_len parameter, and the SHA_FINISH flag, respectively.
+ *
+ * When in_len == 0 (no data to hash), the parameter, in, may be NULL.
+ * When the SHA_FINISH flag is not set, the parameter, md, may be NULL.
+ *
+ * Initialization may be standard or use a specified initialization vector,
+ * and is indicated by setting the SHA_INIT flag.
+ * Setting init = NULL specifies standard initialization.  Otherwise, init
+ * points to the array of eight alternate initialization 32-bit words.
+ *
+ * The hash operation can be updated with any number of input bytes, including
+ * zero.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed.
+ */
+
+extern uint32_t
+ntru_crypto_sha2(
+    NTRU_CRYPTO_HASH_ALGID algid, /*     in - hash algorithm ID */
+    NTRU_CRYPTO_SHA2_CTX *c,      /* in/out - pointer to SHA-2 context */
+    uint32_t const *init,         /*     in - pointer to alternate */
+                                  /*          initialization - may be NULL */
+    uint8_t const *in,            /*     in - pointer to input data -
+                                                may be NULL if in_len == 0 */
+    uint32_t in_len,              /*     in - number of input data bytes */
+    uint32_t flags,               /*     in - INIT, FINISH */
+    uint8_t *md);                 /*    out - address for message digest -
+                                                may be NULL if not FINISH */
+
+#endif /* NTRU_CRYPTO_SHA2_H */
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_sha256.c b/crypt/liboqs/kex_ntru/ntru_crypto_sha256.c
new file mode 100644
index 0000000000000000000000000000000000000000..544c51c53a915511f933d769e000c5ab7a2bda0e
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_sha256.c
@@ -0,0 +1,109 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_sha256.c
+ *
+ * Contents: Routines implementing the SHA-256 hash calculations.
+ *
+ *****************************************************************************/
+
+#include "ntru_crypto_sha256.h"
+
+/* ntru_crypto_sha256_init
+ *
+ * This routine performs standard initialization of the SHA-256 state.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ */
+
+uint32_t
+ntru_crypto_sha256_init(
+    NTRU_CRYPTO_SHA2_CTX *c) /* in/out - pointer to SHA-2 context */
+{
+	return ntru_crypto_sha2(NTRU_CRYPTO_HASH_ALGID_SHA256, c, NULL, NULL, 0,
+	                        SHA_INIT, NULL);
+}
+
+/* ntru_crypto_sha256_update
+ *
+ * This routine processes input data and updates the SHA-256 hash calculation.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed.
+ */
+
+uint32_t
+ntru_crypto_sha256_update(
+    NTRU_CRYPTO_SHA2_CTX *c, /* in/out - pointer to SHA-2 context */
+    uint8_t const *data,     /*     in - pointer to input data */
+    uint32_t data_len)       /*     in - no. of bytes of input data */
+{
+	return ntru_crypto_sha2(NTRU_CRYPTO_HASH_ALGID_SHA256, c, NULL, data,
+	                        data_len, SHA_DATA_ONLY, NULL);
+}
+
+/* ntru_crypto_sha256_final
+ *
+ * This routine completes the SHA-256 hash calculation and returns the
+ * message digest.
+ * 
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed.
+ */
+
+uint32_t
+ntru_crypto_sha256_final(
+    NTRU_CRYPTO_SHA2_CTX *c, /* in/out - pointer to SHA-2 context */
+    uint8_t *md)             /*    out - address for message digest */
+{
+	return ntru_crypto_sha2(NTRU_CRYPTO_HASH_ALGID_SHA256, c, NULL, NULL, 0,
+	                        SHA_FINISH, md);
+}
+
+/* ntru_crypto_sha256_digest
+ *
+ * This routine computes a SHA-256 message digest.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed.
+ */
+
+uint32_t
+ntru_crypto_sha256_digest(
+    uint8_t const *data, /*  in - pointer to input data */
+    uint32_t data_len,   /*  in - number of bytes of input data */
+    uint8_t *md)         /* out - address for message digest */
+{
+	NTRU_CRYPTO_SHA2_CTX c;
+
+	return ntru_crypto_sha2(NTRU_CRYPTO_HASH_ALGID_SHA256, &c, NULL, data,
+	                        data_len, SHA_INIT | SHA_FINISH, md);
+}
diff --git a/crypt/liboqs/kex_ntru/ntru_crypto_sha256.h b/crypt/liboqs/kex_ntru/ntru_crypto_sha256.h
new file mode 100644
index 0000000000000000000000000000000000000000..717cf0471d2601c4a213a74cac077c495767d04c
--- /dev/null
+++ b/crypt/liboqs/kex_ntru/ntru_crypto_sha256.h
@@ -0,0 +1,114 @@
+/******************************************************************************
+ * NTRU Cryptography Reference Source Code
+ *
+ * Copyright (C) 2009-2016  Security Innovation (SI)
+ *
+ * SI has dedicated the work to the public domain by waiving all of its rights
+ * to the work worldwide under copyright law, including all related and
+ * neighboring rights, to the extent allowed by law.
+ *
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * You can copy, modify, distribute and perform the work, even for commercial
+ * purposes, all without asking permission. You should have received a copy of
+ * the creative commons license (CC0 1.0 universal) along with this program.
+ * See the license file for more information. 
+ *
+ *
+ *********************************************************************************/
+
+/******************************************************************************
+ *
+ * File: ntru_crypto_sha256.h
+ *
+ * Contents: Definitions and declarations for the SHA-256 implementation.
+ *
+ *****************************************************************************/
+
+#ifndef NTRU_CRYPTO_SHA256_H
+#define NTRU_CRYPTO_SHA256_H
+
+#include "ntru_crypto_platform.h"
+#include "ntru_crypto_sha2.h"
+
+/******************************************
+ * macros needed for generic hash objects * 
+ ******************************************/
+
+#define SHA_256_CTX_LEN sizeof(NTRU_CRYPTO_SHA2_CTX)
+/* no. bytes in SHA-2 ctx */
+#define SHA_256_BLK_LEN 64                           /* 64 bytes in input  block */
+#define SHA_256_MD_LEN 32                            /* 32 bytes in msg digest */
+#define SHA_256_INIT_FN &ntru_crypto_sha256_init     /* init function */
+#define SHA_256_UPDATE_FN &ntru_crypto_sha256_update /* update function */
+#define SHA_256_FINAL_FN &ntru_crypto_sha256_final   /* final function */
+#define SHA_256_DIGEST_FN &ntru_crypto_sha256_digest /* digest function */
+
+/*************************
+ * function declarations *
+ *************************/
+
+/* ntru_crypto_sha256_init
+ *
+ * This routine performs standard initialization of the SHA-256 state.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ */
+
+extern uint32_t
+ntru_crypto_sha256_init(
+    NTRU_CRYPTO_SHA2_CTX *c); /* in/out - pointer to SHA-2 context */
+
+/* ntru_crypto_sha256_update
+ *
+ * This routine processes input data and updates the SHA-256 hash calculation.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed.
+ */
+
+extern uint32_t
+ntru_crypto_sha256_update(
+    NTRU_CRYPTO_SHA2_CTX *c, /* in/out - pointer to SHA-2 context */
+    uint8_t const *data,     /*     in - pointer to input data */
+    uint32_t data_len);      /*     in - no. of bytes of input data */
+
+/* ntru_crypto_sha256_final
+ *
+ * This routine completes the SHA-256 hash calculation and returns the
+ * message digest.
+ * 
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed.
+ */
+
+extern uint32_t
+ntru_crypto_sha256_final(
+    NTRU_CRYPTO_SHA2_CTX *c, /* in/out - pointer to SHA-2 context */
+    uint8_t *md);            /*    out - address for message digest */
+
+/* ntru_crypto_sha256_digest
+ *
+ * This routine computes a SHA-256 message digest.
+ *
+ * Returns SHA_OK on success.
+ * Returns SHA_FAIL with corrupted context.
+ * Returns SHA_BAD_PARAMETER if inappropriate NULL pointers are passed.
+ * Returns SHA_OVERFLOW if more than 2^64 - 1 bytes are hashed.
+ */
+
+extern uint32_t
+ntru_crypto_sha256_digest(
+    uint8_t const *data, /*  in - pointer to input data */
+    uint32_t data_len,   /*  in - number of bytes of input data */
+    uint8_t *md);        /* out - address for message digest */
+
+#endif /* NTRU_CRYPTO_SHA256_H */
diff --git a/crypt/liboqs/kex_rlwe_bcns15/LICENSE.txt b/crypt/liboqs/kex_rlwe_bcns15/LICENSE.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07f72eb90baf005fdfab0cc2e9e3a82c188bbaeb
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_bcns15/LICENSE.txt
@@ -0,0 +1,32 @@
+The files in this directory (except kex_rlwe_bcns15.*) were originally written
+by Joppe W. Bos, Craig Costello, Michael Naehrig, and Douglas Stebila
+(https://github.com/dstebila/rlwekex).
+
+
+The following license applies to all files in the src/kex_rlwe_bcns15 directory.
+
+
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org>
diff --git a/crypt/liboqs/kex_rlwe_bcns15/Makefile.am b/crypt/liboqs/kex_rlwe_bcns15/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..d2d0eff9af7a25ebecd4cd55a58ec4378669dd94
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_bcns15/Makefile.am
@@ -0,0 +1,9 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = libbcns15.la
+
+
+libbcns15_la_SOURCES = fft.c kex_rlwe_bcns15.c rlwe.c rlwe_kex.c
+
+libbcns15_la_CPPFLAGS = -I../../include
+libbcns15_la_CPPFLAGS += $(AM_CPPFLAGS) 
+
diff --git a/crypt/liboqs/kex_rlwe_bcns15/fft.c b/crypt/liboqs/kex_rlwe_bcns15/fft.c
new file mode 100644
index 0000000000000000000000000000000000000000..7515209178ddd81609f192387a3e3ee6b1ed4098
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_bcns15/fft.c
@@ -0,0 +1,243 @@
+/* This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * See LICENSE for complete information.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "local.h"
+
+static void *(*volatile rlwe_memset_volatile)(void *, int, size_t) = memset;
+
+/* Reduction modulo p = 2^32 - 1.
+ * This is not a prime since 2^32-1 = (2^1+1)*(2^2+1)*(2^4+1)*(2^8+1)*(2^16+1).
+ * But since 2 is a unit in Z/pZ we can use it for computing FFTs in
+ * Z/pZ[X]/(X^(2^7)+1)
+ */
+
+/* Caution:
+ * We use a redundant representation where the integer 0 is represented both
+ * by 0 and 2^32-1.
+ * This approach follows the description from the paper:
+ * Joppe W. Bos, Craig Costello, Huseyin Hisil, and Kristin Lauter: Fast Cryptography in Genus 2
+ * EUROCRYPT 2013, Lecture Notes in Computer Science 7881, pp. 194-210, Springer, 2013.
+ * More specifically see: Section 3 related to Modular Addition/Subtraction.
+ */
+
+/* Compute: c = (a+b) mod (2^32-1)
+ * Let, t = a+b = t_1*2^32 + t0, where 0 <= t_1 <= 1, 0 <= t_0 < 2^32.
+ * Then t mod (2^32-1) = t0 + t1 */
+
+/* NOTE:
+ * Implementing this arithmetic in asm might significantly
+ * increase performance.
+ */
+
+#define modadd(c, a, b)      \
+	do {                     \
+		uint32_t _t = a + b; \
+		c = _t + (_t < a);   \
+	} while (0)
+
+#define modsub(c, a, b) c = (a - b) - (b > a)
+
+#define modmul(c, a, b)                                                           \
+	do {                                                                          \
+		uint64_t _T = (uint64_t) a * (uint64_t) b;                                \
+		modadd(c, ((uint32_t) _T), ((uint32_t)((uint64_t) _T >> (uint64_t) 32))); \
+	} while (0)
+
+#define modmuladd(c, a, b)                                                        \
+	do {                                                                          \
+		uint64_t _T = (uint64_t) a * (uint64_t) b + c;                            \
+		modadd(c, ((uint32_t) _T), ((uint32_t)((uint64_t) _T >> (uint64_t) 32))); \
+	} while (0)
+
+#define div2(c, a) c = (uint32_t)(((uint64_t)(a) + (uint64_t)((uint32_t)(0 - ((a) &1)) & 0xFFFFFFFF)) >> 1)
+#define normalize(c, a) c = (a) + ((a) == 0xFFFFFFFF)
+
+/* Define the basic building blocks for the FFT. */
+#define SET_ZERO(x) (x) = 0
+#define add(c, a, b) modadd(c, a, b)
+#define sub(c, a, b) modsub(c, a, b)
+#define mul(c, a, b) modmul(c, a, b)
+#define moddiv2(c, a) \
+	normalize(c, a);  \
+	div2(c, c)
+#define neg(c, a)           \
+	(c) = 0xFFFFFFFF - (a); \
+	normalize(c, c)
+#define squ(c, a) mul(c, a, a)
+#define set(c, a) (c) = (a)
+
+/* Reverse the bits, approach from "Bit Twiddling Hacks"
+ * See: https://graphics.stanford.edu/~seander/bithacks.html
+ */
+static uint32_t reverse(uint32_t x) {
+	x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));
+	x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));
+	x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));
+	x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));
+	return ((x >> 16) | (x << 16));
+}
+
+/* Nussbaumer approach, see:
+ * H. J. Nussbaumer. Fast polynomial transform algorithms for digital convolution. Acoustics, Speech and
+ * Signal Processing, IEEE Transactions on, 28(2):205{215, 1980
+ * We followed the description from Knuth:
+ * D. E. Knuth. Seminumerical Algorithms. The Art of Computer Programming. Addison-Wesley, Reading,
+ * Massachusetts, USA, 3rd edition, 1997
+ * Exercise Exercise 4.6.4.59.
+ */
+
+static void naive(uint32_t *z, const uint32_t *x, const uint32_t *y, unsigned int n) {
+	unsigned int i, j, k;
+	uint32_t A, B;
+
+	for (i = 0; i < n; i++) {
+		SET_ZERO(B);
+
+		mul(A, x[0], y[i]);
+
+		for (j = 1; j <= i; j++) {
+			modmuladd(A, x[j], y[i - j]);
+		}
+
+		for (k = 1; j < n; j++, k++) {
+			modmuladd(B, x[j], y[n - k]);
+		}
+		sub(z[i], A, B);
+	}
+}
+
+static void nussbaumer_fft(uint32_t z[1024], const uint32_t x[1024], const uint32_t y[1024], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx) {
+	uint32_t(*X1)[64] = ctx->x1;
+	uint32_t(*Y1)[64] = ctx->y1;
+	uint32_t(*Z1)[64] = ctx->z1;
+	uint32_t *T1 = ctx->t1;
+	unsigned int i;
+	int j;
+
+	for (i = 0; i < 32; i++) {
+		for (j = 0; j < 32; j++) {
+			set(X1[i][j], x[32 * j + i]);
+			set(X1[i + 32][j], x[32 * j + i]);
+
+			set(Y1[i][j], y[32 * j + i]);
+			set(Y1[i + 32][j], y[32 * j + i]);
+		}
+	}
+
+	for (j = 4; j >= 0; j--) {
+		for (i = 0; i < (1U << (5 - j)); i++) {
+			unsigned int t, ssr = reverse(i);
+			for (t = 0; t < (1U << j); t++) {
+				unsigned int s, sr, I, L, a;
+				s = i;
+				sr = (ssr >> (32 - 5 + j));
+				sr <<= j;
+				s <<= (j + 1);
+
+				// X_i(w) = X_i(w) + w^kX_l(w) can be computed as
+				// X_ij = X_ij - X_l(j-k+r)  for  0 <= j < k
+				// X_ij = X_ij + X_l(j-k)    for  k <= j < r
+				I = s + t, L = s + t + (1 << j);
+
+				for (a = sr; a < 32; a++) {
+					set(T1[a], X1[L][a - sr]);
+				}
+				for (a = 0; a < sr; a++) {
+					neg(T1[a], X1[L][32 + a - sr]);
+				}
+
+				for (a = 0; a < 32; a++) {
+					sub(X1[L][a], X1[I][a], T1[a]);
+					add(X1[I][a], X1[I][a], T1[a]);
+				}
+
+				for (a = sr; a < 32; a++) {
+					set(T1[a], Y1[L][a - sr]);
+				}
+				for (a = 0; a < sr; a++) {
+					neg(T1[a], Y1[L][32 + a - sr]);
+				}
+
+				for (a = 0; a < 32; a++) {
+					sub(Y1[L][a], Y1[I][a], T1[a]);
+					add(Y1[I][a], Y1[I][a], T1[a]);
+				}
+			}
+		}
+	}
+
+	for (i = 0; i < 2 * 32; i++) {
+		naive(Z1[i], X1[i], Y1[i], 32);
+	}
+
+	for (j = 0; j <= (int) 5; j++) {
+		for (i = 0; i < (1U << (5 - j)); i++) {
+			unsigned int t, ssr = reverse(i);
+			for (t = 0; t < (1U << j); t++) {
+				unsigned int s, sr, A, B, a;
+				s = i;
+				sr = (ssr >> (32 - 5 + j));
+				sr <<= j;
+				s <<= (j + 1);
+
+				A = s + t;
+				B = s + t + (1 << j);
+				for (a = 0; a < 32; a++) {
+					sub(T1[a], Z1[A][a], Z1[B][a]);
+					moddiv2(T1[a], T1[a]);
+					add(Z1[A][a], Z1[A][a], Z1[B][a]);
+					moddiv2(Z1[A][a], Z1[A][a]);
+				}
+
+				// w^{-(r/m)s'} (Z_{s+t}(w)-Z_{s+t+2^j}(w))
+				for (a = 0; a < 32 - sr; a++) {
+					set(Z1[B][a], T1[a + sr]);
+				}
+				for (a = 32 - sr; a < 32; a++) {
+					neg(Z1[B][a], T1[a - (32 - sr)]);
+				}
+			}
+		}
+	}
+
+	for (i = 0; i < 32; i++) {
+		sub(z[i], Z1[i][0], Z1[32 + i][32 - 1]);
+		for (j = 1; j < 32; j++) {
+			add(z[32 * j + i], Z1[i][j], Z1[32 + i][j - 1]);
+		}
+	}
+}
+
+void oqs_kex_rlwe_bcns15_fft_mul(uint32_t z[1024], const uint32_t x[1024], const uint32_t y[1024], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx) {
+	nussbaumer_fft(z, x, y, ctx);
+}
+
+void oqs_kex_rlwe_bcns15_fft_add(uint32_t z[1024], const uint32_t x[1024], const uint32_t y[1024]) {
+	int i;
+	for (i = 0; i < 1024; i++) {
+		add(z[i], x[i], y[i]);
+	}
+}
+
+void oqs_kex_rlwe_bcns15_fft_ctx_clear(struct oqs_kex_rlwe_bcns15_fft_ctx *ctx) {
+	if (ctx == NULL) {
+		return;
+	}
+	for (int i = 0; i < 64; i++) {
+		rlwe_memset_volatile(ctx->x1[i], 0, 64 * sizeof(uint32_t));
+		rlwe_memset_volatile(ctx->y1[i], 0, 64 * sizeof(uint32_t));
+		rlwe_memset_volatile(ctx->z1[i], 0, 64 * sizeof(uint32_t));
+	}
+	rlwe_memset_volatile(ctx->t1, 0, 64 * sizeof(uint32_t));
+}
diff --git a/crypt/liboqs/kex_rlwe_bcns15/kex_rlwe_bcns15.c b/crypt/liboqs/kex_rlwe_bcns15/kex_rlwe_bcns15.c
new file mode 100644
index 0000000000000000000000000000000000000000..8eee72ad308e06e1c784fc76b6ce122ef6006e58
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_bcns15/kex_rlwe_bcns15.c
@@ -0,0 +1,198 @@
+#if defined(WINDOWS)
+#define UNUSED
+// __attribute__ not supported in VS, is there something else I should define?
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#if !defined(WINDOWS)
+#include <strings.h>
+#include <unistd.h>
+#endif
+
+#include <oqs/common.h>
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+#include "kex_rlwe_bcns15.h"
+#include "local.h"
+
+#include "rlwe_a.h"
+
+#if defined(WINDOWS)
+#define strdup _strdup // for strdup deprecation warning
+#endif
+
+OQS_KEX *OQS_KEX_rlwe_bcns15_new(OQS_RAND *rand) {
+
+	OQS_KEX *k = malloc(sizeof(OQS_KEX));
+	if (k == NULL) {
+		return NULL;
+	}
+
+	k->ctx = malloc(sizeof(struct oqs_kex_rlwe_bcns15_fft_ctx));
+	if (k->ctx == NULL) {
+		free(k);
+		return NULL;
+	}
+
+	k->method_name = strdup("RLWE BCNS15");
+	k->estimated_classical_security = 163;
+	k->estimated_quantum_security = 76;
+	k->seed = NULL;
+	k->seed_len = 0;
+	k->named_parameters = NULL;
+	k->rand = rand;
+	k->params = NULL;
+	k->alice_0 = &OQS_KEX_rlwe_bcns15_alice_0;
+	k->bob = &OQS_KEX_rlwe_bcns15_bob;
+	k->alice_1 = &OQS_KEX_rlwe_bcns15_alice_1;
+	k->alice_priv_free = &OQS_KEX_rlwe_bcns15_alice_priv_free;
+	k->free = &OQS_KEX_rlwe_bcns15_free;
+
+	return k;
+}
+
+int OQS_KEX_rlwe_bcns15_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) {
+
+	int ret;
+	uint32_t *alice_msg_32 = NULL;
+
+	*alice_priv = NULL;
+	*alice_msg = NULL;
+
+	/* allocate public/private key pair */
+	alice_msg_32 = malloc(1024 * sizeof(uint32_t));
+	if (alice_msg_32 == NULL) {
+		goto err;
+	}
+	*alice_priv = malloc(1024 * sizeof(uint32_t));
+	if (*alice_priv == NULL) {
+		goto err;
+	}
+
+	/* generate public/private key pair */
+	oqs_kex_rlwe_bcns15_generate_keypair(oqs_kex_rlwe_bcns15_a, (uint32_t *) *alice_priv, alice_msg_32, k->ctx, k->rand);
+	*alice_msg = (uint8_t *) alice_msg_32;
+	*alice_msg_len = 1024 * sizeof(uint32_t);
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(alice_msg_32);
+	OQS_MEM_secure_free(*alice_priv, 1024 * sizeof(uint32_t));
+	*alice_priv = NULL;
+
+cleanup:
+	return ret;
+}
+
+int OQS_KEX_rlwe_bcns15_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) {
+
+	int ret;
+
+	uint32_t *bob_priv = NULL;
+	uint64_t *key_64 = NULL;
+
+	*bob_msg = NULL;
+	*key = NULL;
+
+	if (alice_msg_len != 1024 * sizeof(uint32_t)) {
+		goto err;
+	}
+
+	bob_priv = malloc(1024 * sizeof(uint32_t));
+	if (bob_priv == NULL) {
+		goto err;
+	}
+	/* allocate message and session key */
+	*bob_msg = malloc(1024 * sizeof(uint32_t) + 16 * sizeof(uint64_t));
+	if (*bob_msg == NULL) {
+		goto err;
+	}
+	key_64 = malloc(16 * sizeof(uint64_t));
+	if (key_64 == NULL) {
+		goto err;
+	}
+
+	/* generate public/private key pair */
+	oqs_kex_rlwe_bcns15_generate_keypair(oqs_kex_rlwe_bcns15_a, bob_priv, (uint32_t *) *bob_msg, k->ctx, k->rand);
+
+	/* generate Bob's response */
+	uint8_t *bob_rec = *bob_msg + 1024 * sizeof(uint32_t);
+	oqs_kex_rlwe_bcns15_compute_key_bob((uint32_t *) alice_msg, bob_priv, (uint64_t *) bob_rec, key_64, k->ctx, k->rand);
+	*bob_msg_len = 1024 * sizeof(uint32_t) + 16 * sizeof(uint64_t);
+	*key = (uint8_t *) key_64;
+	*key_len = 16 * sizeof(uint64_t);
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*bob_msg);
+	*bob_msg = NULL;
+	OQS_MEM_secure_free(key_64, 16 * sizeof(uint64_t));
+
+cleanup:
+	OQS_MEM_secure_free(bob_priv, 1024 * sizeof(uint32_t));
+
+	return ret;
+}
+
+int OQS_KEX_rlwe_bcns15_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) {
+
+	int ret;
+
+	uint64_t *key_64 = NULL;
+
+	*key = NULL;
+
+	if (bob_msg_len != 1024 * sizeof(uint32_t) + 16 * sizeof(uint64_t)) {
+		goto err;
+	}
+
+	/* allocate session key */
+	key_64 = malloc(16 * sizeof(uint64_t));
+	if (key_64 == NULL) {
+		goto err;
+	}
+
+	/* generate Alice's session key */
+	const uint8_t *bob_rec = bob_msg + 1024 * sizeof(uint32_t);
+	oqs_kex_rlwe_bcns15_compute_key_alice((uint32_t *) bob_msg, (uint32_t *) alice_priv, (uint64_t *) bob_rec, key_64, k->ctx);
+	*key = (uint8_t *) key_64;
+	*key_len = 16 * sizeof(uint64_t);
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	OQS_MEM_secure_free(key_64, 16 * sizeof(uint64_t));
+
+cleanup:
+
+	return ret;
+}
+
+void OQS_KEX_rlwe_bcns15_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) {
+	if (alice_priv) {
+		free(alice_priv);
+	}
+}
+
+void OQS_KEX_rlwe_bcns15_free(OQS_KEX *k) {
+	if (!k) {
+		return;
+	}
+	free(k->method_name);
+	k->method_name = NULL;
+	free(k->ctx);
+	k->ctx = NULL;
+	free(k);
+}
diff --git a/crypt/liboqs/kex_rlwe_bcns15/kex_rlwe_bcns15.h b/crypt/liboqs/kex_rlwe_bcns15/kex_rlwe_bcns15.h
new file mode 100644
index 0000000000000000000000000000000000000000..c098da4812bb43208e44edf2f5fbdbaae1a689ab
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_bcns15/kex_rlwe_bcns15.h
@@ -0,0 +1,24 @@
+/**
+ * \file kex_rlwe_bcns15.h
+ * \brief Header for ring-LWE key exchange protocol BCNS15
+ */
+
+#ifndef __OQS_KEX_RLWE_BCNS15_H
+#define __OQS_KEX_RLWE_BCNS15_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+OQS_KEX *OQS_KEX_rlwe_bcns15_new(OQS_RAND *rand);
+
+int OQS_KEX_rlwe_bcns15_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len);
+int OQS_KEX_rlwe_bcns15_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len);
+int OQS_KEX_rlwe_bcns15_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len);
+
+void OQS_KEX_rlwe_bcns15_alice_priv_free(OQS_KEX *k, void *alice_priv);
+void OQS_KEX_rlwe_bcns15_free(OQS_KEX *k);
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_bcns15/local.h b/crypt/liboqs/kex_rlwe_bcns15/local.h
new file mode 100644
index 0000000000000000000000000000000000000000..ac07fe173f7b7ec6ac9340cd43807f9e1c28f7b6
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_bcns15/local.h
@@ -0,0 +1,46 @@
+/* This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * See LICENSE for complete information.
+ */
+
+#ifndef _OQS_KEX_RLWE_BCNS15_LOCAL_H_
+#define _OQS_KEX_RLWE_BCNS15_LOCAL_H_
+
+#include <stdint.h>
+
+#include <oqs/rand.h>
+
+struct oqs_kex_rlwe_bcns15_fft_ctx {
+	uint32_t x1[64][64];
+	uint32_t y1[64][64];
+	uint32_t z1[64][64];
+	uint32_t t1[64];
+};
+
+void oqs_kex_rlwe_bcns15_fft_mul(uint32_t z[1024], const uint32_t x[1024], const uint32_t y[1024], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx);
+void oqs_kex_rlwe_bcns15_fft_add(uint32_t z[1024], const uint32_t x[1024], const uint32_t y[1024]);
+
+void oqs_kex_rlwe_bcns15_fft_ctx_clear(struct oqs_kex_rlwe_bcns15_fft_ctx *ctx);
+
+void oqs_kex_rlwe_bcns15_sample_ct(uint32_t s[1024], OQS_RAND *rand);
+void oqs_kex_rlwe_bcns15_round2_ct(uint64_t out[16], const uint32_t in[1024]);
+void oqs_kex_rlwe_bcns15_crossround2_ct(uint64_t out[16], const uint32_t in[1024], OQS_RAND *rand);
+void oqs_kex_rlwe_bcns15_rec_ct(uint64_t out[16], const uint32_t w[1024], const uint64_t b[16]);
+
+void oqs_kex_rlwe_bcns15_sample(uint32_t s[1024], OQS_RAND *rand);
+void oqs_kex_rlwe_bcns15_round2(uint64_t out[16], const uint32_t in[1024]);
+void oqs_kex_rlwe_bcns15_crossround2(uint64_t out[16], const uint32_t in[1024], OQS_RAND *rand);
+void oqs_kex_rlwe_bcns15_rec(uint64_t out[16], const uint32_t w[1024], const uint64_t b[16]);
+
+void oqs_kex_rlwe_bcns15_a_times_s_plus_e(uint32_t out[1024], const uint32_t a[1024], const uint32_t s[1024], const uint32_t e[1024], struct oqs_kex_rlwe_bcns15_fft_ctx *fft_ctx);
+
+void oqs_kex_rlwe_bcns15_generate_keypair(const uint32_t *a, uint32_t s[1024], uint32_t b[1024], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx, OQS_RAND *rand);
+void oqs_kex_rlwe_bcns15_compute_key_alice(const uint32_t b[1024], const uint32_t s[1024], const uint64_t c[16], uint64_t k[16], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx);
+void oqs_kex_rlwe_bcns15_compute_key_bob(const uint32_t b[1024], const uint32_t s[1024], uint64_t c[16], uint64_t k[16], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx, OQS_RAND *rand);
+
+#endif /* _OQS_KEX_RLWE_BCNS15_LOCAL_H_ */
diff --git a/crypt/liboqs/kex_rlwe_bcns15/rlwe.c b/crypt/liboqs/kex_rlwe_bcns15/rlwe.c
new file mode 100644
index 0000000000000000000000000000000000000000..f6fc5ac4c89eafa14d77781605036eebf668a177
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_bcns15/rlwe.c
@@ -0,0 +1,296 @@
+/* This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * See LICENSE for complete information.
+ */
+
+#if defined(WINDOWS)
+#pragma warning(disable : 4146 4244 4267)
+#endif
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <oqs/rand.h>
+
+#include "local.h"
+
+#include "rlwe_table.h"
+
+#define setbit(a, x) ((a)[(x) / 64] |= (((uint64_t) 1) << (uint64_t)((x) % 64)))
+#define getbit(a, x) (((a)[(x) / 64] >> (uint64_t)((x) % 64)) & 1)
+#define clearbit(a, x) ((a)[(x) / 64] &= ((~((uint64_t) 0)) - (((uint64_t) 1) << (uint64_t)((x) % 64))))
+
+/* Auxiliary functions for constant-time comparison */
+
+/*
+ * Returns 1 if x != 0
+ * Returns 0 if x == 0
+ * x and y are arbitrary unsigned 64-bit integers
+ */
+static uint64_t ct_isnonzero_u64(uint64_t x) {
+	return (x | -x) >> 63;
+}
+
+/*
+ * Returns 1 if x != y
+ * Returns 0 if x == y
+ * x and y are arbitrary unsigned 64-bit integers
+ */
+static uint64_t ct_ne_u64(uint64_t x, uint64_t y) {
+	return ((x - y) | (y - x)) >> 63;
+}
+
+/*
+ * Returns 1 if x == y
+ * Returns 0 if x != y
+ * x and y are arbitrary unsigned 64-bit integers
+ */
+static uint64_t ct_eq_u64(uint64_t x, uint64_t y) {
+	return 1 ^ ct_ne_u64(x, y);
+}
+
+/* Returns 1 if x < y
+ * Returns 0 if x >= y
+ * x and y are arbitrary unsigned 64-bit integers
+ */
+static uint64_t ct_lt_u64(uint64_t x, uint64_t y) {
+	return (x ^ ((x ^ y) | ((x - y) ^ y))) >> 63;
+}
+
+/*
+ * Returns 1 if x > y
+ * Returns 0 if x <= y
+ * x and y are arbitrary unsigned 64-bit integers
+ */
+static uint64_t ct_gt_u64(uint64_t x, uint64_t y) {
+	return ct_lt_u64(y, x);
+}
+
+/*
+ * Returns 1 if x <= y
+ * Returns 0 if x > y
+ * x and y are arbitrary unsigned 64-bit integers
+ */
+static uint64_t ct_le_u64(uint64_t x, uint64_t y) {
+	return 1 ^ ct_gt_u64(x, y);
+}
+
+/*
+ * Returns 1 if x >= y
+ * Returns 0 if x < y
+ * x and y are arbitrary unsigned 64-bit integers
+ */
+static uint64_t ct_ge_u64(uint64_t x, uint64_t y) {
+	return 1 ^ ct_lt_u64(x, y);
+}
+
+/* Returns 0xFFFF..FFFF if bit != 0
+ * Returns            0 if bit == 0
+ */
+static uint64_t ct_mask_u64(uint64_t bit) {
+	return 0 - (uint64_t) ct_isnonzero_u64(bit);
+}
+
+/* Conditionally return x or y depending on whether bit is set
+ * Equivalent to: return bit ? x : y
+ * x and y are arbitrary 64-bit unsigned integers
+ * bit must be either 0 or 1.
+ */
+static uint64_t ct_select_u64(uint64_t x, uint64_t y, uint64_t bit) {
+	uint64_t m = ct_mask_u64(bit);
+	return (x & m) | (y & ~m);
+}
+
+/* Returns 0 if a >= b
+ * Returns 1 if a < b
+ * Where a and b are both 3-limb 64-bit integers.
+ * This function runs in constant time.
+ */
+static int cmplt_ct(uint64_t *a, uint64_t *b) {
+	uint64_t r = 0; /* result */
+	uint64_t m = 0; /* mask   */
+	int i;
+	for (i = 2; i >= 0; --i) {
+		r |= ct_lt_u64(a[i], b[i]) & ~m;
+		m |= ct_mask_u64(ct_ne_u64(a[i], b[i])); /* stop when a[i] != b[i] */
+	}
+	return r & 1;
+}
+
+static uint32_t single_sample(uint64_t *in) {
+	size_t i = 0;
+
+	while (cmplt_ct(rlwe_table[i], in)) { // ~3.5 comparisons in expectation
+		i++;
+	}
+
+	return i;
+}
+
+/* We assume that e contains two random bits in the two
+ * least significant positions. */
+static uint64_t dbl(const uint32_t in, int32_t e) {
+	// sample uniformly from [-1, 0, 0, 1]
+	// Hence, 0 is sampled with twice the probability of 1
+	e = (((e >> 1) & 1) - ((int32_t)(e & 1)));
+	return (uint64_t)((((uint64_t) in) << (uint64_t) 1) - e);
+}
+
+/* Constant time version. */
+static uint32_t single_sample_ct(uint64_t *in) {
+	uint32_t index = 0, i;
+	for (i = 0; i < 52; i++) {
+		index = ct_select_u64(index, i + 1, cmplt_ct(in, rlwe_table[i]));
+	}
+	return index;
+}
+
+void oqs_kex_rlwe_bcns15_sample_ct(uint32_t s[1024], OQS_RAND *rand) {
+	int i, j;
+	for (i = 0; i < 16; i++) {
+		uint64_t r = rand->rand_64(rand);
+		for (j = 0; j < 64; j++) {
+			uint64_t rnd[3];
+			uint32_t m;
+			uint32_t t;
+			rnd[0] = rand->rand_64(rand);
+			rnd[1] = rand->rand_64(rand);
+			rnd[2] = rand->rand_64(rand);
+			m = (r & 1);
+			r >>= 1;
+			// use the constant time version single_sample
+			s[i * 64 + j] = single_sample_ct(rnd);
+			t = (uint32_t) -s[i * 64 + j];
+			s[i * 64 + j] = ct_select_u64(t, s[i * 64 + j], ct_eq_u64(m, 0));
+		}
+	}
+}
+
+void oqs_kex_rlwe_bcns15_round2_ct(uint64_t out[16], const uint32_t in[1024]) {
+	int i;
+	memset(out, 0, 128);
+	for (i = 0; i < 1024; i++) {
+		uint64_t b = ct_ge_u64(in[i], 1073741824ULL) &
+		             ct_le_u64(in[i], 3221225471ULL);
+		out[i / 64] |= b << (uint64_t)(i % 64);
+	}
+}
+
+void oqs_kex_rlwe_bcns15_crossround2_ct(uint64_t out[16], const uint32_t in[1024], OQS_RAND *rand) {
+	int i, j;
+	memset(out, 0, 128);
+	for (i = 0; i < 64; i++) {
+		uint32_t e = rand->rand_32(rand);
+		for (j = 0; j < 16; j++) {
+			uint64_t dd;
+			uint64_t b;
+			dd = dbl(in[i * 16 + j], (int32_t) e);
+			e >>= 2;
+			b = (ct_ge_u64(dd, 2147483648ULL) & ct_le_u64(dd, 4294967295ULL)) |
+			    (ct_ge_u64(dd, 6442450942ULL) & ct_le_u64(dd, 8589934590ULL));
+			out[(i * 16 + j) / 64] |= (b << (uint64_t)((i * 16 + j) % 64));
+		}
+	}
+}
+
+void oqs_kex_rlwe_bcns15_rec_ct(uint64_t out[16], const uint32_t w[1024], const uint64_t b[16]) {
+	int i;
+	memset(out, 0, 128);
+	for (i = 0; i < 1024; i++) {
+		uint64_t coswi;
+		uint64_t B;
+		coswi = (((uint64_t) w[i]) << (uint64_t) 1);
+		B = (ct_eq_u64(getbit(b, i), 0) & ct_ge_u64(coswi, 3221225472ULL) &
+		     ct_le_u64(coswi, 7516192766ULL)) |
+		    (ct_eq_u64(getbit(b, i), 1) & ct_ge_u64(coswi, 1073741824ULL) &
+		     ct_le_u64(coswi, 5368709118ULL));
+		out[i / 64] |= (B << (uint64_t)(i % 64));
+	}
+}
+
+void oqs_kex_rlwe_bcns15_sample(uint32_t s[1024], OQS_RAND *rand) {
+	int i, j;
+	for (i = 0; i < 16; i++) {
+		uint64_t r = rand->rand_64(rand);
+		for (j = 0; j < 64; j++) {
+			uint64_t rnd[3];
+			int32_t m;
+			rnd[0] = rand->rand_64(rand);
+			rnd[1] = rand->rand_64(rand);
+			rnd[2] = rand->rand_64(rand);
+			m = (r & 1);
+			r >>= 1;
+			s[i * 64 + j] = single_sample(rnd);
+			if (m) {
+				s[i * 64 + j] = (uint32_t) -s[i * 64 + j];
+			}
+		}
+	}
+}
+
+void oqs_kex_rlwe_bcns15_round2(uint64_t out[16], const uint32_t in[1024]) {
+	int i;
+
+	// out should have enough space for 1024-bits
+	memset(out, 0, 128);
+
+	//q/4 and 3*q/4
+	for (i = 0; i < 1024; i++) {
+		if (in[i] >= 1073741824 && in[i] <= 3221225471) {
+			setbit(out, i);
+		}
+	}
+}
+
+void oqs_kex_rlwe_bcns15_crossround2(uint64_t out[16], const uint32_t in[1024], OQS_RAND *rand) {
+	int i, j;
+	// out should have enough space for 1024-bits
+	memset(out, 0, 128);
+
+	for (i = 0; i < 64; i++) {
+		uint32_t e = rand->rand_32(rand);
+		for (j = 0; j < 16; j++) {
+			uint64_t dd = dbl(in[i * 16 + j], (int32_t) e);
+			e >>= 2;
+			//q/2 to q and 3*q/2 to 2*q
+			if ((dd >= (uint64_t) 2147483648 && dd <= (uint64_t) 4294967295) || (dd >= (uint64_t) 6442450942 && dd <= (uint64_t) 8589934590)) {
+				setbit(out, (i * 16 + j));
+			}
+		}
+	}
+}
+
+void oqs_kex_rlwe_bcns15_rec(uint64_t out[16], const uint32_t w[1024], const uint64_t b[16]) {
+	int i;
+
+	// out should have enough space for 1024 bits
+	memset(out, 0, 128);
+
+	for (i = 0; i < 1024; i++) {
+		uint64_t coswi = (((uint64_t) w[i]) << (uint64_t) 1);
+		if (getbit(b, i) == 0) {
+			//Ceiling(2*3*q/8)..Floor(2*7*q/8)
+			if (coswi >= (uint64_t) 3221225472 && coswi <= (uint64_t) 7516192766) {
+				setbit(out, i);
+			}
+		} else {
+			// Ceiling(2*q/8)..Floor(2*5*q/8)
+			if (coswi >= (uint64_t) 1073741824 && coswi <= (uint64_t) 5368709118) {
+				setbit(out, i);
+			}
+		}
+	}
+}
+
+void oqs_kex_rlwe_bcns15_a_times_s_plus_e(uint32_t out[1024], const uint32_t a[1024], const uint32_t s[1024], const uint32_t e[1024], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx) {
+	oqs_kex_rlwe_bcns15_fft_mul(out, a, s, ctx);
+	oqs_kex_rlwe_bcns15_fft_add(out, out, e);
+}
diff --git a/crypt/liboqs/kex_rlwe_bcns15/rlwe_a.h b/crypt/liboqs/kex_rlwe_bcns15/rlwe_a.h
new file mode 100644
index 0000000000000000000000000000000000000000..aa5eb8ebb36c3950d55ca598bf04512f7aa2afcf
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_bcns15/rlwe_a.h
@@ -0,0 +1,267 @@
+/* This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * See LICENSE for complete information.
+ */
+
+uint32_t oqs_kex_rlwe_bcns15_a[1024] = {
+    0x29FE0191, 0xDD1A457D, 0x3534EE4B, 0x6450ED74,
+    0xBBFE9F64, 0x92BF0F31, 0x8DCF8995, 0x4C5E30D0,
+    0x9E2ED04D, 0x8C18FE0B, 0x1A70F2E7, 0x2625CD93,
+    0x0065DA14, 0x6E009722, 0xE6A70E8B, 0xAEF6EF56,
+    0x8C6C06AF, 0x9E59E953, 0x4995F67B, 0xE918EE9D,
+    0x8B4F41A7, 0x0D811041, 0xF5FE6458, 0x3C02B584,
+    0xCBCFC8FD, 0x5A01F116, 0x73408361, 0x44D3A098,
+    0xBBDEECF6, 0x90E09082, 0xF8538BA4, 0xF9600091,
+    0xD8D30FEF, 0x56201487, 0xACB2159D, 0x38F47F77,
+    0xED7A864F, 0x8FC785CA, 0x7CBD6108, 0x3CA577DE,
+    0xFF44CCC2, 0xA1385A79, 0x5C88E3AD, 0x177C46A9,
+    0xDA4A4DD8, 0x2AA3594F, 0xA4A5E629, 0x47CA6F6E,
+    0xB2DF1BC6, 0x6841B78E, 0x0823F5A8, 0xA18C7D52,
+    0x7634A0D1, 0xDA1751BA, 0x18B9D25D, 0x5B2643BC,
+    0xACC6975D, 0x48E786F4, 0x05E3ED4E, 0x4DC86568,
+    0x3F5C5F99, 0x585DBFD7, 0xEF6E0715, 0x7D36B823,
+    0x12D872CD, 0xD7B78F27, 0xDD672BF5, 0x2DC7C7EB,
+    0xA3033801, 0x50E48348, 0x9162A260, 0x0BE8F15B,
+    0xABB563EC, 0x06624C5A, 0x812BF7BC, 0x8637AC35,
+    0xF44504F3, 0xFF8577AB, 0x4A0161B0, 0x000AEB0E,
+    0x311204AF, 0x2A76831B, 0x4D903F3A, 0x97204FA9,
+    0x9EB524E3, 0x1757AFAC, 0xBA369FEC, 0xCD8F198D,
+    0x6B33C246, 0x51C13FCE, 0xB58ACC4E, 0x39ACF8DA,
+    0x7BB7EBF7, 0xEDC1449D, 0xC7B47FDB, 0x9C39148D,
+    0x4E688D7B, 0xFAD0C2C2, 0x296CE85C, 0x6045C89C,
+    0x6441C0C6, 0x50C7C83A, 0xC11764DD, 0x58D7EEA2,
+    0xE57B9D0E, 0x4E142770, 0xB8BFBB59, 0xE143EBAA,
+    0xFF60C855, 0x238727F0, 0xE35B4A5B, 0x8F96940B,
+    0x4498A6BA, 0x5911093A, 0x394DD002, 0x521B00D2,
+    0x140BDAF9, 0xEAB67207, 0x21E631A6, 0xA04AADA9,
+    0xA96A9843, 0x4B44CC9B, 0xE4D24C33, 0xC7E7AE78,
+    0xE45A6C72, 0xCBE61D3C, 0xCE5A4869, 0x10442A52,
+    0xDB11F194, 0x39FC415D, 0x7E7BDB76, 0xAE9EFA22,
+    0x25F4F262, 0x472DD0A7, 0x42EBD7A0, 0xE8038ECE,
+    0xD3DB002A, 0x8416D2EC, 0xDF88C989, 0x7FEA22D5,
+    0xC7A3F6FE, 0x37409982, 0xF45B75E2, 0x9A4AC289,
+    0x90406FD6, 0xEA1C74A5, 0x5777B39F, 0xD07F1FA3,
+    0xCE6EDA0D, 0xD150ECFB, 0xBEFF71BA, 0x50129EFC,
+    0x51CE65B9, 0xB9FB0AB8, 0x770C59CB, 0x11F2354F,
+    0x8623D4BB, 0xD6FCAFD6, 0xB2B1697C, 0x0D7067E2,
+    0x2BA5AFB9, 0xD369C585, 0x5B5E156C, 0xD8C81E6E,
+    0x80CFDF16, 0xF6F441EB, 0xC173BAF5, 0x78099E3A,
+    0xD38F027B, 0x4AC8D518, 0x8D0108A1, 0xE442B0F1,
+    0x56F9EA3C, 0xD0D6BBCA, 0x4E17DCB4, 0x69BF743B,
+    0x0CCE779F, 0xD5E59851, 0x63861EA2, 0xB1CB22C1,
+    0xBBFD2ACE, 0xDDA390D1, 0xEDF1059F, 0x04F80F89,
+    0xB13AF849, 0x58C66009, 0xE0D781C0, 0x588DC348,
+    0xA305669D, 0x0D7AF67F, 0x32BC3C38, 0xD725EFBA,
+    0xDC3D9434, 0x22BD7ED8, 0x2DFD2926, 0x4BDEAD3A,
+    0xB2D5ECE6, 0x16B05C99, 0xFEEC7104, 0xF6CAC918,
+    0x0944C774, 0xCE00633B, 0xC59DA01A, 0x41E8E924,
+    0x335DF501, 0x3049E8EE, 0x5B4B8AAC, 0xC962FC91,
+    0xD6BB22B3, 0x0AC870EB, 0xC3D99400, 0xA0CEAC28,
+    0xAF07DE1E, 0x831C2824, 0x258C5DDC, 0x779417E6,
+    0x41CB33D0, 0x4E51076A, 0xD1DB6038, 0x9E0B1C41,
+    0xA9A1F90D, 0xF27E7705, 0x75892711, 0x5D9F1175,
+    0x85CC508B, 0x5CA415BE, 0x1858C792, 0xFB18632F,
+    0xC94111EB, 0x937C0D28, 0xC2A09970, 0x386209D9,
+    0xBBDD9787, 0x2473F53A, 0xEF7E7637, 0xCFC8630B,
+    0x2BA3B7F8, 0x3C0047AD, 0x10D76FF7, 0xB1D9414D,
+    0xCEB7B902, 0xA5B543F5, 0x2E484905, 0xE0233C10,
+    0xD061A1F8, 0xCED0A901, 0xAC373CAC, 0x04281F37,
+    0x3609797F, 0xDB80964D, 0x7B49A74F, 0x7699656F,
+    0x0DCEC4BC, 0x0EC49C2D, 0xF1573A4E, 0xA3708464,
+    0x9A1E89F0, 0x6B26DEB6, 0x2329FA10, 0xCA4F2BFF,
+    0x9E012C8E, 0x788C1DFD, 0x2C758156, 0x2774C544,
+    0x150A1F7D, 0x50156D6E, 0x7B675DE1, 0x5D634703,
+    0xA7CEB801, 0x92733DAB, 0xB213C00B, 0x304A65B1,
+    0x8856CF8E, 0x7FF7DD67, 0xD0912293, 0x30064297,
+    0x663D051D, 0x01BC31B4, 0x2B1700BD, 0x39D7D18F,
+    0x1EAD5C95, 0x6FB9CD8B, 0xA09993A6, 0xB42071C0,
+    0x3C1F2195, 0x7FDF4CF8, 0xC7565A7E, 0x64703D34,
+    0x14B250EF, 0x2FA338D2, 0xAEE576DC, 0x6CCED41D,
+    0x612D0913, 0xD0680733, 0x8B4DBE8A, 0x6FFEA3D0,
+    0x46197CA2, 0xA77F916F, 0xFA5D7BD6, 0x01E22AEB,
+    0x18E462DD, 0x4EC9B937, 0xDE753212, 0x05113C94,
+    0x7786FBD4, 0xFB379F71, 0x756CF595, 0xEAADCFAB,
+    0xBBD74C2E, 0x1F234AC9, 0x85E28AEB, 0x329F7878,
+    0xD48FDE09, 0x47A60D0A, 0xAE95163F, 0x72E70995,
+    0x27F9FCBF, 0xBDCFCC41, 0x334BC498, 0xEE7931A1,
+    0xDFA6AEF4, 0x1EC5E1BF, 0x6221870F, 0xCD54AE13,
+    0x7B56EF58, 0x4847B490, 0x31640CD3, 0x10940E14,
+    0x556CC334, 0xC9E9B521, 0x499611FF, 0xBEC8D592,
+    0x44A7DCB7, 0x4AC2EABD, 0x7D387357, 0x1B76D4B6,
+    0x2EACE8C9, 0x52B2D2A4, 0x0C1F2A64, 0x50EF2B9A,
+    0x3B23F4F4, 0x8DDE415E, 0xF6B92D2D, 0x9DB0F840,
+    0xE18F309D, 0x737B7733, 0xF9F563C5, 0x3C5D4AEE,
+    0x8136B0AF, 0xC5AC5550, 0x6E93DEF9, 0x946BCCEC,
+    0x5163A273, 0xB5C72175, 0x4919EFBD, 0x222E9B68,
+    0x6E43D8EE, 0xAA039B23, 0x913FD80D, 0x42206F18,
+    0x5552C01F, 0x35B1136D, 0xFDC18279, 0x5946202B,
+    0xFAAE3A37, 0x4C764C88, 0x78075D9B, 0x844C8BA0,
+    0xCC33419E, 0x4B0832F6, 0x10D15E89, 0xEE0DD05A,
+    0x27432AF3, 0xE12CECA6, 0x60A231B3, 0xF81F258E,
+    0xE0BA44D7, 0x144F471B, 0xB4C8451E, 0x3705395C,
+    0xE8A69794, 0x3C23F27E, 0x186D2FBA, 0x3DAED36B,
+    0xF04DEFF1, 0x0CFA7BDD, 0xFEE45A4F, 0x5E9A4684,
+    0x98438C69, 0x5F1D921B, 0x7E43FD86, 0xBD0CF049,
+    0x28F47D38, 0x7DF38246, 0x8EED8923, 0xE524E7FC,
+    0x089BEC03, 0x15E3DE77, 0x78E8AE28, 0xCB79A298,
+    0x9F604E2B, 0x3C6428F7, 0xDCDEABF3, 0x33BAF60A,
+    0xBF801273, 0x247B0C3E, 0xE74A8192, 0xB45AC81D,
+    0xFC0D2ABE, 0xF17E99F5, 0x412BD1C1, 0x75DF4247,
+    0xA90FC3C0, 0xB2A99C0E, 0x0D3999D7, 0xD04543BA,
+    0x0FBC28A1, 0xEF68C7EF, 0x64327F30, 0xF11ECDBE,
+    0x4DBD312C, 0xD71CE03A, 0xAEFDAD34, 0xE1CC7315,
+    0x797A865C, 0xB9F1B1EB, 0xF7E68DFA, 0x816685B4,
+    0x9F38D44B, 0x366911C8, 0x756A7336, 0x696B8261,
+    0xC2FA21D2, 0x75085BF3, 0x2E5402B4, 0x75E6E744,
+    0xEAD80B0C, 0x4E689F68, 0x7A9452C6, 0xA5E1958A,
+    0x4B2B0A24, 0x97E0165E, 0xA4539B68, 0xF87A3096,
+    0x6543CA9D, 0x92A8D398, 0xA7D7FDB4, 0x1EA966B3,
+    0x75B50372, 0x4C63A778, 0x34E8E033, 0x87C60F82,
+    0xFC47303B, 0x8469AB86, 0x2DAADA50, 0xCFBB663F,
+    0x711C9C41, 0xE6C1C423, 0x8751BAA9, 0x861EC777,
+    0x31BCCCE1, 0xC1333271, 0x06864BEE, 0x41B50595,
+    0xD2267D30, 0x878BA5C5, 0x65267F56, 0x2118FB18,
+    0xA6DDD3DE, 0x8D309B98, 0x68928CB2, 0xFAE967DC,
+    0x3CEC52D0, 0x9CA8404B, 0xAADD68A8, 0x3AC6B1DF,
+    0xD53D67EA, 0x95C8D163, 0xB5F03F1D, 0x3A4C28A7,
+    0xE3C4B709, 0xB8EB7C65, 0xE76B42A3, 0x25E5A217,
+    0x6B6DD2B4, 0xBEFC5DF4, 0x9ACA5758, 0xC17F14D3,
+    0xB224A9D3, 0xDE1A7C8F, 0x1382911B, 0x627A2FB9,
+    0xC66AE36E, 0x02CC60EF, 0xC6800B20, 0x7A583C77,
+    0xE1CECEE8, 0xCA0001B4, 0x6A14CF16, 0xEF45DD21,
+    0x64CAA7D5, 0xFF3F1D95, 0xD328C67E, 0xC85868B1,
+    0x7FBF3FEB, 0x13D68388, 0x25373DD9, 0x8DE47EFB,
+    0x47912F26, 0x65515942, 0xC5ED711D, 0x6A368929,
+    0xA2405C50, 0xFFA9D6EB, 0xED39A0D4, 0xE456B8B5,
+    0x53283330, 0x7837FD52, 0x6EE46629, 0xCAFC9D63,
+    0xB781B08F, 0xDD61D834, 0xFB9ACF09, 0xEDA4444A,
+    0xBB6AA57F, 0xAED2385C, 0x22C9474D, 0x36E90167,
+    0xE6DF6150, 0xF1B0DA3B, 0xC3F6800E, 0x966302E0,
+    0x7DB1F627, 0xF9632186, 0xB4933075, 0x81C5C817,
+    0x878CA140, 0x4EDE8FED, 0x1AF347C1, 0xFDEB72BA,
+    0x2DA7FF9A, 0xB9BA3638, 0x2BB883F1, 0x474D1417,
+    0xC2F474A4, 0x1E2CF9F3, 0x231CB6B0, 0x7E574B53,
+    0xEDA8E1DA, 0xE1ACB7BB, 0xD1E354A6, 0x7C32B431,
+    0x8189991B, 0x25F9376A, 0x3FFA8782, 0xCD9038F1,
+    0x119EDBD1, 0x5C571840, 0x3DCA350F, 0x83923909,
+    0x9DC3CF55, 0x94D79DD0, 0xD683DE2B, 0xECF4316A,
+    0x0FFF48D4, 0x5D8076ED, 0x12B42C97, 0x2284CDB4,
+    0xCB245554, 0x3025B4D9, 0xB0075F35, 0x43A3802E,
+    0x18332B4D, 0x056C4467, 0xC597E3F7, 0x3F0EAF9D,
+    0xF48EBB9F, 0x92F62731, 0xBDB76296, 0x516D4466,
+    0x226102B3, 0x15E38046, 0xA683C4E0, 0x6C0D1962,
+    0xE20CB6CA, 0xC90C1D70, 0xD0FF8692, 0xD1419690,
+    0x2D6F1081, 0x34782E5E, 0xAE092CD5, 0x90C99193,
+    0xE97C0405, 0xEAE201DA, 0x631FB5AC, 0x279A2821,
+    0xDF47BA5B, 0xFBE587E2, 0x6810AD2D, 0xC63E94BD,
+    0x9AF36B42, 0xF14F0855, 0x946CE350, 0x7E3320E0,
+    0x34130DFF, 0x8C57C413, 0xAB0723B2, 0xF514C743,
+    0x63694BA3, 0x5665D23D, 0x6292C0B5, 0x9D768323,
+    0x2F8E447C, 0xB99A00FB, 0x6F8E5970, 0x69B3BB45,
+    0x59253E02, 0x1C518A02, 0xDD7C1232, 0xC6416C38,
+    0x77E10340, 0xCF6BEB9A, 0x006F9239, 0x0E99B50F,
+    0x863AD247, 0x75F0451A, 0x096E9094, 0xE0C2B357,
+    0x7CC81E15, 0x222759D4, 0xEE5BCFD0, 0x050F829B,
+    0x723B8FA9, 0x76143C55, 0x3B455EAF, 0xC2683EFD,
+    0xEE7874B4, 0x9BCE92F7, 0x6EED7461, 0x8E93898F,
+    0xA4EBE1D0, 0xFA4F019F, 0x1B0AD6DA, 0xA39CDE2F,
+    0x27002B33, 0x830D478D, 0x3EEA937E, 0x572E7DA3,
+    0x4BFFA4D1, 0x5E53DB0B, 0x708D21EE, 0xB003E23B,
+    0x12ED0756, 0x53CA0412, 0x73237D35, 0x438EC16B,
+    0x295177B8, 0xC85F4EE6, 0xB67FD3B4, 0x5221BC81,
+    0xD84E3094, 0x18C84200, 0x855E0795, 0x37BEC004,
+    0xDF9FAFC9, 0x60BEB6CD, 0x8645F0C5, 0xB1D2F1C3,
+    0xECDC4AE3, 0x424D17F1, 0x8429238C, 0x6155EAAB,
+    0xA17BEE21, 0x218D3637, 0x88A462CC, 0x8A1A031E,
+    0x3F671EA5, 0x9FA08639, 0xFF4A0F8E, 0x34167A7D,
+    0x1A817F54, 0x3215F21E, 0x412DD498, 0x57B633E7,
+    0xE8A2431F, 0x397BD699, 0x5A155288, 0xBB3538E8,
+    0xA49806D2, 0x49438A07, 0x24963568, 0x40414C26,
+    0xE45C08D4, 0x61D2435B, 0x2F36AEDE, 0x6580370C,
+    0x02A56A5E, 0x53B18017, 0xAF2C83FC, 0xF4C83871,
+    0xD9E5DDC3, 0x17B90B01, 0xED4A0904, 0xFA6DA26B,
+    0x35D9840D, 0xA0C505E4, 0x3396D0B5, 0xEC66B509,
+    0xC190E41C, 0x2F0CE5CF, 0x419C3E94, 0x220D42CA,
+    0x2F611F4F, 0x47906734, 0x8C2CDB17, 0xD8658F1C,
+    0x2F6745CD, 0x543D0D4F, 0x818F0469, 0x380FFDAE,
+    0xF5DD91E2, 0xAD25E46A, 0xE7039205, 0xA9F47165,
+    0xB2114C12, 0xCF7F626F, 0x54D2C9FF, 0xE4736A36,
+    0x16DB09FC, 0xE2B787BB, 0x9631709A, 0x72629F66,
+    0x819EBA08, 0x7F5D73F3, 0xA0B0B91C, 0xFEDFBA71,
+    0x252F14EE, 0xF26F8FA2, 0x92805F94, 0x43650F7F,
+    0x3051124F, 0x72CA8EAD, 0x21973E34, 0xA5B70509,
+    0xB36A41CC, 0xC52EDE5F, 0xF706A24E, 0x8AAF9F92,
+    0xADF6D99A, 0x23746D73, 0x1DA39F70, 0x9660FC8F,
+    0xA0A8CFEB, 0x83D5EFCA, 0x0AA4A72F, 0xEEF1B2DE,
+    0x00CFCC66, 0x8A145369, 0x6376CEDA, 0xA3262E2E,
+    0x3367BBA8, 0x01488C32, 0x5561A2AD, 0x40821BF2,
+    0xF0C89F61, 0xC4FAA6B3, 0xD843377A, 0x67A76555,
+    0xE8D9F1CE, 0x943034FF, 0x2BD468BD, 0xA514D935,
+    0x50CDB19D, 0xA09C7E9E, 0x6FEBEC30, 0xB1B36CF7,
+    0xCD7A30BC, 0x36C6FE0A, 0x2DF52C45, 0x45C9957F,
+    0x65076A79, 0xBF783DEE, 0x718D37F0, 0x098F9117,
+    0x9A70C430, 0x80EB1A53, 0x9F2505B1, 0x48D10D98,
+    0xB8D781E9, 0xF2376133, 0xECF25B98, 0x5A3B0E18,
+    0x2F623537, 0x9F0E34A4, 0xF1027EB6, 0xF9B16022,
+    0xBA3FEC59, 0xEF7226FD, 0x9F3058AA, 0xBB51DE0E,
+    0xD5435EA0, 0x8A6479D5, 0x077708B8, 0x9634876A,
+    0x069A260A, 0x168D9E6A, 0x9FD18E94, 0x8A7ACD53,
+    0x8E5A5869, 0x1B6F35FD, 0xA968913B, 0xC72F076B,
+    0x7DDA354C, 0x25B0297C, 0xD07219D5, 0xA66862BA,
+    0x87E8EE67, 0xFA28809B, 0x55762443, 0x31EF4956,
+    0xF4F4A511, 0x9A9378CB, 0x42ABDBDE, 0x7AA484B7,
+    0xE8EC22ED, 0xCADDEF61, 0x9D18538A, 0xA81B923E,
+    0x9C32F92A, 0x6D278E58, 0x4CDFC716, 0xAB64814F,
+    0xF832BF1A, 0xE2C1A36B, 0x20675610, 0xE78D855A,
+    0x38332C3D, 0x5AE0EAD9, 0x2E23F22D, 0x3C8683C5,
+    0xA351AF89, 0x54720D3B, 0xABC6E51F, 0x89330C8E,
+    0x600D5650, 0x197EA0C6, 0x7D502A5D, 0x3A536EA7,
+    0x7DF71F32, 0x456FE645, 0x3EF5E7A2, 0x6664BCAF,
+    0xA9D074C2, 0xE9D9E478, 0x1AE9AB77, 0xFECE7160,
+    0xC618EEEC, 0x771B0026, 0x2B54F43C, 0x145DA102,
+    0x1B3D7949, 0xBB6E2D9D, 0xDB8FDC4A, 0x25397EBA,
+    0x9228A6E9, 0x56B4C69D, 0x337B943C, 0xE35B716C,
+    0xF7FE89A1, 0x023AC20D, 0x033165C8, 0x9F13B130,
+    0xC1BAFB1D, 0xA2C42C8C, 0x58E4D431, 0xE10741E6,
+    0x2547589A, 0x8D9EF7BD, 0x7E322280, 0xF49FDDC2,
+    0xBE21A094, 0xA061178A, 0x34D9F13B, 0x694D652F,
+    0x05084A2A, 0x2767B991, 0xE8536AB4, 0xEBFADF6F,
+    0xF4C8DFAC, 0xD9967CCA, 0xE04BCF3F, 0x232B3460,
+    0x9FF6E88A, 0x6DF3A2B0, 0x0FE10E99, 0x7B059283,
+    0x067BFB57, 0x8DDA26B0, 0xB7D6652F, 0x85705248,
+    0x0826240C, 0x5DF7F52E, 0x47973463, 0xB9C22D37,
+    0x9BEB265D, 0x493AB6FD, 0x10C0FB07, 0x947C102A,
+    0x5FEC0608, 0x140E07AE, 0x8B330F43, 0x9364A649,
+    0xC9AD63EF, 0xBE4B2475, 0x1A09AC77, 0x9E40A4B0,
+    0xBA9C23E7, 0x7F4A798D, 0xE2C52D66, 0xA26EE9E0,
+    0x8C79DCE7, 0xDD7F1C3D, 0x6AE83B20, 0x073DBA03,
+    0xB1844D97, 0x16D7ED6E, 0x5E0DE0B1, 0xA497D717,
+    0xFA507AA2, 0xC332649B, 0x21419E15, 0x384D9CCC,
+    0x8B915A8B, 0xBA328FD5, 0xF99E8016, 0x545725EC,
+    0xED9840ED, 0x71E5D78A, 0x21862496, 0x6F858B6C,
+    0xF3736AE2, 0x8979FC2B, 0x5C8122D0, 0x0A20EB5A,
+    0x2278AA6E, 0x55275E74, 0x22D57650, 0xE5FFDC96,
+    0x6BA86E10, 0x4EC5BFCC, 0x05AFA305, 0xFB7FD007,
+    0x726EA097, 0xF6A349C4, 0xCB2F71E4, 0x08DD80BA,
+    0x892D0E23, 0xBD2E0A55, 0x40AC0CD3, 0xBFAF5688,
+    0x6E40A6A5, 0x6DA1BBE0, 0x969557A9, 0xFB88629B,
+    0x11F845C4, 0x5FC91C6F, 0x1B0C7E79, 0xD6946953,
+    0x27A164A0, 0x55D20869, 0x29A2182D, 0x406AA963,
+    0x74F40C59, 0x56A90570, 0x535AC9C6, 0x9521EF76,
+    0xBA38759B, 0xCD6EF76E, 0xF2181DB9, 0x7BE78DA6,
+    0xF88E4115, 0xABA7E166, 0xF60DC9B3, 0xFECA1EF3,
+    0x43DF196A, 0xCC4FC9DD, 0x428A8961, 0xCF6B4560,
+    0x87B30B57, 0x20E7BAC5, 0xBFBDCCDF, 0xF7D3F6BB,
+    0x7FC311C8, 0x2C7835B5, 0xA24F6821, 0x6A38454C,
+    0x460E42FD, 0x2B6BA832, 0xC7068C72, 0x28CDCE59,
+    0xAE82A0B4, 0x25F39572, 0x9B6C7758, 0xE0FE9EBA,
+    0xA8F03EE1, 0xD70B928E, 0x95E529D7, 0xDD91DB86,
+    0xF912BA8C, 0x7F478A6A, 0x1F017850, 0x5A717E10,
+    0xDAC243F9, 0xD235F314, 0x4F80AAE6, 0xA46364D8,
+    0xA1E3A9E9, 0x495FEFB1, 0xB9058508, 0x23A20999,
+    0x73D18118, 0xCA3EEE2A, 0x34E1C7E2, 0xAADBADBD};
diff --git a/crypt/liboqs/kex_rlwe_bcns15/rlwe_kex.c b/crypt/liboqs/kex_rlwe_bcns15/rlwe_kex.c
new file mode 100644
index 0000000000000000000000000000000000000000..7bf28e38debf14bba667a31a9dcd4644970aa34b
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_bcns15/rlwe_kex.c
@@ -0,0 +1,63 @@
+/* This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * See LICENSE for complete information.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <oqs/rand.h>
+
+#include "local.h"
+
+static void *(*volatile rlwe_memset_volatile)(void *, int, size_t) = memset;
+
+void oqs_kex_rlwe_bcns15_generate_keypair(const uint32_t *a, uint32_t s[1024], uint32_t b[1024], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx, OQS_RAND *rand) {
+	uint32_t e[1024];
+#if CONSTANT_TIME
+	oqs_kex_rlwe_bcns15_sample_ct(s, rand);
+	oqs_kex_rlwe_bcns15_sample_ct(e, rand);
+#else
+	oqs_kex_rlwe_bcns15_sample(s, rand);
+	oqs_kex_rlwe_bcns15_sample(e, rand);
+#endif
+	oqs_kex_rlwe_bcns15_a_times_s_plus_e(b, a, s, e, ctx);
+	rlwe_memset_volatile(e, 0, 1024 * sizeof(uint32_t));
+}
+
+void oqs_kex_rlwe_bcns15_compute_key_alice(const uint32_t b[1024], const uint32_t s[1024], const uint64_t c[16], uint64_t k[16], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx) {
+	uint32_t w[1024];
+	oqs_kex_rlwe_bcns15_fft_mul(w, b, s, ctx);
+#if CONSTANT_TIME
+	oqs_kex_rlwe_bcns15_rec_ct(k, w, c);
+#else
+	oqs_kex_rlwe_bcns15_rec(k, w, c);
+#endif
+	rlwe_memset_volatile(w, 0, 1024 * sizeof(uint32_t));
+}
+
+void oqs_kex_rlwe_bcns15_compute_key_bob(const uint32_t b[1024], const uint32_t s[1024], uint64_t c[16], uint64_t k[16], struct oqs_kex_rlwe_bcns15_fft_ctx *ctx, OQS_RAND *rand) {
+	uint32_t v[1024];
+	uint32_t eprimeprime[1024];
+#if CONSTANT_TIME
+	oqs_kex_rlwe_bcns15_sample_ct(eprimeprime, rand);
+#else
+	oqs_kex_rlwe_bcns15_sample(eprimeprime, rand);
+#endif
+	oqs_kex_rlwe_bcns15_a_times_s_plus_e(v, b, s, eprimeprime, ctx);
+#if CONSTANT_TIME
+	oqs_kex_rlwe_bcns15_crossround2_ct(c, v, rand);
+	oqs_kex_rlwe_bcns15_round2_ct(k, v);
+#else
+	oqs_kex_rlwe_bcns15_crossround2(c, v, rand);
+	oqs_kex_rlwe_bcns15_round2(k, v);
+#endif
+	rlwe_memset_volatile(v, 0, 1024 * sizeof(uint32_t));
+	rlwe_memset_volatile(eprimeprime, 0, 1024 * sizeof(uint32_t));
+}
diff --git a/crypt/liboqs/kex_rlwe_bcns15/rlwe_table.h b/crypt/liboqs/kex_rlwe_bcns15/rlwe_table.h
new file mode 100644
index 0000000000000000000000000000000000000000..402e4fd8b18516b5930576f4c98fe51d5ddee0dd
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_bcns15/rlwe_table.h
@@ -0,0 +1,63 @@
+/* This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * See LICENSE for complete information.
+ */
+
+static uint64_t rlwe_table[52][3] = {
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x1FFFFFFFFFFFFFFF},
+    {0xE0C81DA0D6A8BD22, 0x161ABD186DA13542, 0x5CEF2C248806C827},
+    {0x8D026C4E14BC7408, 0x4344C125B3533F22, 0x9186506BCC065F20},
+    {0x10AC7CEC7D7E2A3B, 0x5D62CE65E6217813, 0xBAAB5F82BCDB43B3},
+    {0x709C92996E94D801, 0x1411F551608E4D22, 0xD7D9769FAD23BCB1},
+    {0x6287D827008404B7, 0x7E1526D618902F20, 0xEA9BE2F4D6DDB5ED},
+    {0x34CBDC118C15F40E, 0xE7D2A13787E94674, 0xF58A99474919B8C9},
+    {0xD521F7EBBBE8C3A2, 0xE8A773D9A1EA0AAB, 0xFB5117812753B7B8},
+    {0xC3D9E58131089A6A, 0x148CB49FF716491B, 0xFE151BD0928596D3},
+    {0x2E060C4A842A27F6, 0x07E44D009ADB0049, 0xFF487508BA9F7208},
+    {0xFCEDEFCFAA887582, 0x1A5409BF5D4B039E, 0xFFC16686270CFC82},
+    {0x4FE22E5DF9FAAC20, 0xFDC99BFE0F991958, 0xFFEC8AC3C159431B},
+    {0xA36605F81B14FEDF, 0xA6FCD4C13F4AFCE0, 0xFFFA7DF4B6E92C28},
+    {0x9D1FDCFF97BBC957, 0x4B869C6286ED0BB5, 0xFFFE94BB4554B5AC},
+    {0x6B3EEBA74AAD104B, 0xEC72329E974D63C7, 0xFFFFAADE1B1CAA95},
+    {0x48C8DA4009C10760, 0x337F6316C1FF0A59, 0xFFFFEDDC1C6436DC},
+    {0x84480A71312F35E7, 0xD95E7B2CD6933C97, 0xFFFFFC7C9DC2569A},
+    {0x23C01DAC1513FA0F, 0x8E0B132AE72F729F, 0xFFFFFF61BC337FED},
+    {0x90C89D6570165907, 0x05B9D725AAEA5CAD, 0xFFFFFFE6B3CF05F7},
+    {0x692E2A94C500EC7D, 0x99E8F72C370F27A6, 0xFFFFFFFC53EA610E},
+    {0x28C2998CEAE37CC8, 0xC6E2F0D7CAFA9AB8, 0xFFFFFFFF841943DE},
+    {0xC515CF4CB0130256, 0x4745913CB4F9E4DD, 0xFFFFFFFFF12D07EC},
+    {0x39F0ECEA047D6E3A, 0xEE62D42142AC6544, 0xFFFFFFFFFE63E348},
+    {0xDF11BB25B50462D6, 0x064A0C6CC136E943, 0xFFFFFFFFFFD762C7},
+    {0xCDBA0DD69FD2EA0F, 0xC672F3A74DB0F175, 0xFFFFFFFFFFFC5E37},
+    {0xFDB966A75F3604D9, 0x6ABEF8B144723D83, 0xFFFFFFFFFFFFB48F},
+    {0x3C4FECBB600740D1, 0x697598CEADD71A15, 0xFFFFFFFFFFFFFA72},
+    {0x1574CC916D60E673, 0x12F5A30DD99D7051, 0xFFFFFFFFFFFFFFA1},
+    {0xDD3DCD1B9CB7321D, 0x4016ED3E05883572, 0xFFFFFFFFFFFFFFFA},
+    {0xB4A4E8CF3DF79A7A, 0xAF22D9AFAD5A73CF, 0xFFFFFFFFFFFFFFFF},
+    {0x91056A8196F74466, 0xFBF88681905332BA, 0xFFFFFFFFFFFFFFFF},
+    {0x965B9ED9BD366C04, 0xFFD16385AF29A51F, 0xFFFFFFFFFFFFFFFF},
+    {0xF05F75D38F2D28A3, 0xFFFE16FF8EA2B60C, 0xFFFFFFFFFFFFFFFF},
+    {0x77E35C8980421EE8, 0xFFFFEDD3C9DDC7E8, 0xFFFFFFFFFFFFFFFF},
+    {0x92783617956F140A, 0xFFFFFF63392B6E8F, 0xFFFFFFFFFFFFFFFF},
+    {0xA536DC994639AD78, 0xFFFFFFFB3592B3D1, 0xFFFFFFFFFFFFFFFF},
+    {0x8F3A871874DD9FD5, 0xFFFFFFFFDE04A5BB, 0xFFFFFFFFFFFFFFFF},
+    {0x310DE3650170B717, 0xFFFFFFFFFF257152, 0xFFFFFFFFFFFFFFFF},
+    {0x1F21A853A422F8CC, 0xFFFFFFFFFFFB057B, 0xFFFFFFFFFFFFFFFF},
+    {0x3CA9D5C6DB4EE2BA, 0xFFFFFFFFFFFFE5AD, 0xFFFFFFFFFFFFFFFF},
+    {0xCFD9CE958E59869C, 0xFFFFFFFFFFFFFF81, 0xFFFFFFFFFFFFFFFF},
+    {0xDB8E1F91D955C452, 0xFFFFFFFFFFFFFFFD, 0xFFFFFFFFFFFFFFFF},
+    {0xF78EE3A8E99E08C3, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+    {0xFFE1D7858BABDA25, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+    {0xFFFF9E52E32CAB4A, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+    {0xFFFFFEE13217574F, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+    {0xFFFFFFFD04888041, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+    {0xFFFFFFFFF8CD8A56, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+    {0xFFFFFFFFFFF04111, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+    {0xFFFFFFFFFFFFE0C5, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+    {0xFFFFFFFFFFFFFFC7, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}};
diff --git a/crypt/liboqs/kex_rlwe_msrln16/AMD64/consts.c b/crypt/liboqs/kex_rlwe_msrln16/AMD64/consts.c
new file mode 100644
index 0000000000000000000000000000000000000000..9c1260e8729881ea6adfbee8a2fde4cc415c1dad
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_msrln16/AMD64/consts.c
@@ -0,0 +1,38 @@
+/****************************************************************************************
+* LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library
+*
+*    Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*
+* Abstract: constants for the x64 assembly implementation
+*
+*****************************************************************************************/
+
+#include "../LatticeCrypto_priv.h"
+#include <stdint.h>
+
+uint32_t PRIME8x[8] = {OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q};
+uint8_t ONE32x[32] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+uint32_t MASK12x8[8] = {0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff};
+uint32_t PERM0246[4] = {0, 2, 4, 6};
+uint32_t PERM00224466[8] = {0, 0, 2, 2, 4, 4, 6, 6};
+uint32_t PERM02134657[8] = {0, 2, 1, 3, 4, 6, 5, 7};
+uint64_t PERM0145[4] = {0, 1, 4, 5};
+uint64_t PERM2367[4] = {2, 3, 6, 7};
+uint64_t MASK32[4] = {0xffffffff, 0, 0xffffffff, 0};
+uint64_t MASK42[4] = {0x3fff0000000, 0, 0x3fff0000000, 0};
+
+uint64_t MASK14_1[4] = {0x3fff, 0, 0x3fff, 0};
+uint64_t MASK14_2[4] = {0xFFFC000, 0, 0xFFFC000, 0};
+uint64_t MASK14_3[4] = {0x3FFF0000000, 0, 0x3FFF0000000, 0};
+uint64_t MASK14_4[4] = {0xFFFC0000000000, 0, 0xFFFC0000000000, 0};
+
+uint32_t ONE8x[8] = {1, 1, 1, 1, 1, 1, 1, 1};
+uint32_t THREE8x[8] = {3, 3, 3, 3, 3, 3, 3, 3};
+uint32_t FOUR8x[8] = {4, 4, 4, 4, 4, 4, 4, 4};
+uint32_t PARAM_Q4x8[8] = {3073, 3073, 3073, 3073, 3073, 3073, 3073, 3073};
+uint32_t PARAM_3Q4x8[8] = {9217, 9217, 9217, 9217, 9217, 9217, 9217, 9217};
+uint32_t PARAM_5Q4x8[8] = {15362, 15362, 15362, 15362, 15362, 15362, 15362, 15362};
+uint32_t PARAM_7Q4x8[8] = {21506, 21506, 21506, 21506, 21506, 21506, 21506, 21506};
+uint32_t PARAM_Q2x8[8] = {6145, 6145, 6145, 6145, 6145, 6145, 6145, 6145};
+uint32_t PARAM_3Q2x8[8] = {18434, 18434, 18434, 18434, 18434, 18434, 18434, 18434};
diff --git a/crypt/liboqs/kex_rlwe_msrln16/AMD64/error_asm.S b/crypt/liboqs/kex_rlwe_msrln16/AMD64/error_asm.S
new file mode 100644
index 0000000000000000000000000000000000000000..d5d5478575293fd18c73249ba025529ac297bf5e
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_msrln16/AMD64/error_asm.S
@@ -0,0 +1,436 @@
+//****************************************************************************************
+// LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library
+//
+//    Copyright (c) Microsoft Corporation. All rights reserved.
+//
+//
+// Abstract: functions for error sampling and reconciliation in x64 assembly using AVX2 
+//           vector instructions for Linux 
+//
+//****************************************************************************************  
+
+.intel_syntax noprefix 
+
+// Registers that are used for parameter passing:
+#define reg_p1  rdi
+#define reg_p2  rsi
+#define reg_p3  rdx
+#define reg_p4  rcx
+#define reg_p5  r8
+
+
+.text
+//***********************************************************************
+//  Error sampling from psi_12
+//  Operation: c [reg_p2] <- sampling(a) [reg_p1]
+//*********************************************************************** 
+.globl oqs_rlwe_msrln16_error_sampling_asm
+oqs_rlwe_msrln16_error_sampling_asm:  
+  vmovdqu    ymm7, ONE32x 
+  movq       r11, 384
+  movq       r10, 32
+  movq       r8, 24
+  xor        rax, rax
+  xor        rcx, rcx
+loop1:
+  vmovdqu    ymm0, YMMWORD PTR [reg_p1+4*rax]        // sample
+  vmovdqu    ymm2, YMMWORD PTR [reg_p1+4*rax+32]     // sample
+  vmovdqu    ymm4, YMMWORD PTR [reg_p1+4*rax+64]     // sample
+  movq       r9, 2
+
+loop1b:
+  vpand      ymm1, ymm0, ymm7                        // Collecting 8 bits for first sample
+  vpsrlw     ymm0, ymm0, 1 
+  vpand      ymm3, ymm0, ymm7
+  vpaddb     ymm1, ymm1, ymm3
+  vpsrlw     ymm0, ymm0, 1 
+  vpand      ymm3, ymm0, ymm7
+  vpaddb     ymm1, ymm1, ymm3
+  vpsrlw     ymm0, ymm0, 1 
+  vpand      ymm3, ymm0, ymm7
+  vpaddb     ymm1, ymm1, ymm3
+  vpsrlw     ymm0, ymm0, 1 
+  vpand      ymm3, ymm0, ymm7
+  vpaddb     ymm1, ymm1, ymm3
+  vpsrlw     ymm0, ymm0, 1 
+  vpand      ymm3, ymm0, ymm7
+  vpaddb     ymm1, ymm1, ymm3
+  vpsrlw     ymm0, ymm0, 1 
+  vpand      ymm3, ymm0, ymm7
+  vpaddb     ymm1, ymm1, ymm3
+  vpsrlw     ymm0, ymm0, 1 
+  vpand      ymm3, ymm0, ymm7
+  vpaddb     ymm1, ymm1, ymm3
+  
+  vpand      ymm3, ymm2, ymm7                        // Adding next 4 bits
+  vpaddb     ymm1, ymm1, ymm3
+  vpsrlw     ymm2, ymm2, 1 
+  vpand      ymm3, ymm2, ymm7
+  vpaddb     ymm1, ymm1, ymm3
+  vpsrlw     ymm2, ymm2, 1 
+  vpand      ymm3, ymm2, ymm7
+  vpaddb     ymm1, ymm1, ymm3
+  vpsrlw     ymm2, ymm2, 1 
+  vpand      ymm3, ymm2, ymm7
+  vpaddb     ymm1, ymm1, ymm3
+  
+  vpsrlw     ymm2, ymm2, 1                           // Collecting 4-bits for second sample
+  vpand      ymm5, ymm2, ymm7
+  vpsrlw     ymm2, ymm2, 1 
+  vpand      ymm3, ymm2, ymm7
+  vpaddb     ymm5, ymm5, ymm3
+  vpsrlw     ymm2, ymm2, 1 
+  vpand      ymm3, ymm2, ymm7
+  vpaddb     ymm5, ymm5, ymm3
+  vpsrlw     ymm2, ymm2, 1 
+  vpand      ymm3, ymm2, ymm7
+  vpaddb     ymm5, ymm5, ymm3
+  
+  vpand      ymm3, ymm4, ymm7                        // Adding next 8 bits
+  vpaddb     ymm5, ymm5, ymm3
+  vpsrlw     ymm4, ymm4, 1 
+  vpand      ymm3, ymm4, ymm7
+  vpaddb     ymm5, ymm5, ymm3
+  vpsrlw     ymm4, ymm4, 1 
+  vpand      ymm3, ymm4, ymm7
+  vpaddb     ymm5, ymm5, ymm3
+  vpsrlw     ymm4, ymm4, 1 
+  vpand      ymm3, ymm4, ymm7
+  vpaddb     ymm5, ymm5, ymm3
+  vpsrlw     ymm4, ymm4, 1 
+  vpand      ymm3, ymm4, ymm7
+  vpaddb     ymm5, ymm5, ymm3
+  vpsrlw     ymm4, ymm4, 1 
+  vpand      ymm3, ymm4, ymm7
+  vpaddb     ymm5, ymm5, ymm3
+  vpsrlw     ymm4, ymm4, 1 
+  vpand      ymm3, ymm4, ymm7
+  vpaddb     ymm5, ymm5, ymm3
+  vpsrlw     ymm4, ymm4, 1 
+  vpand      ymm3, ymm4, ymm7
+  vpaddb     ymm5, ymm5, ymm3
+
+  vpsubb     ymm5, ymm1, ymm5
+  vpermq     ymm3, ymm5, 0x0e 
+  vpmovsxbd  ymm6, xmm5
+  vpsrldq    ymm5, ymm5, 8 
+  vpmovsxbd  ymm7, xmm5 
+  vpmovsxbd  ymm8, xmm3
+  vpsrldq    ymm3, ymm3, 8 
+  vpmovsxbd  ymm9, xmm3
+  vmovdqu    YMMWORD PTR [reg_p2+4*rcx], ymm6
+  vmovdqu    YMMWORD PTR [reg_p2+4*rcx+32], ymm7
+  vmovdqu    YMMWORD PTR [reg_p2+4*rcx+64], ymm8
+  vmovdqu    YMMWORD PTR [reg_p2+4*rcx+96], ymm9
+  
+  add        rcx, r10        // i+32
+  vpsrlw     ymm0, ymm0, 1 
+  vpsrlw     ymm2, ymm2, 1 
+  vpsrlw     ymm4, ymm4, 1 
+  dec        r9
+  jnz        loop1b
+        
+  add        rax, r8         // j+24        
+  cmp        rax, r11
+  jl         loop1
+  ret
+
+
+//***********************************************************************
+//  Reconciliation helper function
+//  Operation: c [reg_p2] <- function(a) [reg_p1]
+//             [reg_p3] points to random bits
+//*********************************************************************** 
+.globl oqs_rlwe_msrln16_helprec_asm
+oqs_rlwe_msrln16_helprec_asm:  
+  vmovdqu    ymm8, ONE8x 
+  movq       r11, 256
+  movq       r10, 8
+  xor        rax, rax
+  vmovdqu    ymm4, YMMWORD PTR [reg_p3]              // rbits
+loop2:
+  vmovdqu    ymm0, YMMWORD PTR [reg_p1+4*rax]        // x
+  vmovdqu    ymm1, YMMWORD PTR [reg_p1+4*rax+4*256]  // x+256
+  vmovdqu    ymm2, YMMWORD PTR [reg_p1+4*rax+4*512]  // x+512
+  vmovdqu    ymm3, YMMWORD PTR [reg_p1+4*rax+4*768]  // x+768
+
+  vpand      ymm5, ymm4, ymm8                        // Collecting 8 random bits
+  vpslld     ymm0, ymm0, 1                           // 2*x - rbits
+  vpslld     ymm1, ymm1, 1 
+  vpslld     ymm2, ymm2, 1 
+  vpslld     ymm3, ymm3, 1 
+  vpsubd     ymm0, ymm0, ymm5
+  vpsubd     ymm1, ymm1, ymm5
+  vpsubd     ymm2, ymm2, ymm5
+  vpsubd     ymm3, ymm3, ymm5
+    
+  vmovdqu    ymm15, PARAM_Q4x8 
+  vmovdqu    ymm7, FOUR8x
+  vmovdqu    ymm8, ymm7
+  vmovdqu    ymm9, ymm7
+  vmovdqu    ymm10, ymm7
+  vpsubd     ymm6, ymm0, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm7, ymm7, ymm6
+  vpsubd     ymm6, ymm1, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm8, ymm8, ymm6
+  vpsubd     ymm6, ymm2, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm9, ymm9, ymm6
+  vpsubd     ymm6, ymm3, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm10, ymm10, ymm6
+  vmovdqu    ymm15, PARAM_3Q4x8 
+  vpsubd     ymm6, ymm0, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm7, ymm7, ymm6
+  vpsubd     ymm6, ymm1, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm8, ymm8, ymm6
+  vpsubd     ymm6, ymm2, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm9, ymm9, ymm6
+  vpsubd     ymm6, ymm3, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm10, ymm10, ymm6
+  vmovdqu    ymm15, PARAM_5Q4x8 
+  vpsubd     ymm6, ymm0, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm7, ymm7, ymm6
+  vpsubd     ymm6, ymm1, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm8, ymm8, ymm6
+  vpsubd     ymm6, ymm2, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm9, ymm9, ymm6
+  vpsubd     ymm6, ymm3, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm10, ymm10, ymm6
+  vmovdqu    ymm15, PARAM_7Q4x8 
+  vpsubd     ymm6, ymm0, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm7, ymm7, ymm6                        // v0[0]
+  vpsubd     ymm6, ymm1, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm8, ymm8, ymm6                        // v0[1]
+  vpsubd     ymm6, ymm2, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm9, ymm9, ymm6                        // v0[2]
+  vpsubd     ymm6, ymm3, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm10, ymm10, ymm6                      // v0[3]  
+    
+  vmovdqu    ymm15, PARAM_Q2x8 
+  vmovdqu    ymm11, THREE8x
+  vmovdqu    ymm12, ymm11
+  vmovdqu    ymm13, ymm11
+  vmovdqu    ymm14, ymm11
+  vpsubd     ymm6, ymm0, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm11, ymm11, ymm6
+  vpsubd     ymm6, ymm1, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm12, ymm12, ymm6
+  vpsubd     ymm6, ymm2, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm13, ymm13, ymm6
+  vpsubd     ymm6, ymm3, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm14, ymm14, ymm6
+  vmovdqu    ymm15, PARAM_3Q2x8 
+  vpsubd     ymm6, ymm0, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm11, ymm11, ymm6
+  vpsubd     ymm6, ymm1, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm12, ymm12, ymm6
+  vpsubd     ymm6, ymm2, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm13, ymm13, ymm6
+  vpsubd     ymm6, ymm3, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm14, ymm14, ymm6
+  vmovdqu    ymm15, PRIME8x  
+  vpsubd     ymm6, ymm0, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm11, ymm11, ymm6                      // v1[0]
+  vpsubd     ymm6, ymm1, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm12, ymm12, ymm6                      // v1[1]
+  vpsubd     ymm6, ymm2, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm13, ymm13, ymm6                      // v1[2]
+  vpsubd     ymm6, ymm3, ymm15
+  vpsrld     ymm6, ymm6, 31 
+  vpsubd     ymm14, ymm14, ymm6                      // v1[3]
+
+  vpmulld    ymm6, ymm7, ymm15 
+  vpslld     ymm0, ymm0, 1 
+  vpsubd     ymm0, ymm0, ymm6
+  vpabsd     ymm0, ymm0
+  vpmulld    ymm6, ymm8, ymm15 
+  vpslld     ymm1, ymm1, 1 
+  vpsubd     ymm1, ymm1, ymm6
+  vpabsd     ymm1, ymm1
+  vpaddd     ymm0, ymm0, ymm1
+  vpmulld    ymm6, ymm9, ymm15 
+  vpslld     ymm2, ymm2, 1 
+  vpsubd     ymm2, ymm2, ymm6
+  vpabsd     ymm2, ymm2
+  vpaddd     ymm0, ymm0, ymm2
+  vpmulld    ymm6, ymm10, ymm15 
+  vpslld     ymm3, ymm3, 1 
+  vpsubd     ymm3, ymm3, ymm6
+  vpabsd     ymm3, ymm3
+  vpaddd     ymm0, ymm0, ymm3                        // norm
+  vpsubd     ymm0, ymm0, ymm15
+  vpsrad     ymm0, ymm0, 31                          // If norm < q then norm = 0xff...ff, else norm = 0
+  
+  vpxor      ymm7, ymm7, ymm11                       // v0[i] = (norm & (v0[i] ^ v1[i])) ^ v1[i]
+  vpand      ymm7, ymm7, ymm0
+  vpxor      ymm7, ymm7, ymm11
+  vpxor      ymm8, ymm8, ymm12
+  vpand      ymm8, ymm8, ymm0
+  vpxor      ymm8, ymm8, ymm12
+  vpxor      ymm9, ymm9, ymm13
+  vpand      ymm9, ymm9, ymm0
+  vpxor      ymm9, ymm9, ymm13
+  vpxor      ymm10, ymm10, ymm14
+  vpand      ymm10, ymm10, ymm0
+  vpxor      ymm10, ymm10, ymm14
+  
+  vmovdqu    ymm15, THREE8x
+  vmovdqu    ymm14, ONE8x
+  vpsubd     ymm7, ymm7, ymm10
+  vpand      ymm7, ymm7, ymm15
+  vpsubd     ymm8, ymm8, ymm10
+  vpand      ymm8, ymm8, ymm15
+  vpsubd     ymm9, ymm9, ymm10
+  vpand      ymm9, ymm9, ymm15 
+  vpslld     ymm10, ymm10, 1 
+  vpxor      ymm0, ymm0, ymm14
+  vpand      ymm0, ymm0, ymm14
+  vpaddd     ymm10, ymm0, ymm10
+  vpand      ymm10, ymm10, ymm15 
+  
+  vpsrld     ymm4, ymm4, 1 
+  vmovdqu    YMMWORD PTR [reg_p2+4*rax], ymm7
+  vmovdqu    YMMWORD PTR [reg_p2+4*rax+4*256], ymm8
+  vmovdqu    YMMWORD PTR [reg_p2+4*rax+4*512], ymm9
+  vmovdqu    YMMWORD PTR [reg_p2+4*rax+4*768], ymm10
+
+  add        rax, r10             // j+8 
+  add        rcx, r9
+  cmp        rax, r11             
+  jl         loop2
+  ret
+
+
+//***********************************************************************
+//  Reconciliation function
+//  Operation: c [reg_p3] <- function(a [reg_p1], b [reg_p2])
+//*********************************************************************** 
+.globl oqs_rlwe_msrln16_rec_asm
+oqs_rlwe_msrln16_rec_asm:  
+  vpxor      ymm12, ymm12, ymm12 
+  vmovdqu    ymm15, PRIME8x   
+  vpslld     ymm14, ymm15, 2                         // 4*Q  
+  vpslld     ymm13, ymm15, 3                         // 8*Q
+  vpsubd     ymm12, ymm12, ymm13                     // -8*Q
+  vpxor      ymm11, ymm12, ymm13                     // 8*Q ^ -8*Q
+  vmovdqu    ymm10, ONE8x 
+  movq       r11, 256
+  movq       r10, 8
+  xor        rax, rax
+  xor        rcx, rcx
+loop3:
+  vmovdqu    ymm0, YMMWORD PTR [reg_p1+4*rax]        // x
+  vmovdqu    ymm1, YMMWORD PTR [reg_p1+4*rax+4*256]  // x+256
+  vmovdqu    ymm2, YMMWORD PTR [reg_p1+4*rax+4*512]  // x+512
+  vmovdqu    ymm3, YMMWORD PTR [reg_p1+4*rax+4*768]  // x+768
+  vmovdqu    ymm4, YMMWORD PTR [reg_p2+4*rax]        // rvec
+  vmovdqu    ymm5, YMMWORD PTR [reg_p2+4*rax+4*256]  // rvec+256
+  vmovdqu    ymm6, YMMWORD PTR [reg_p2+4*rax+4*512]  // rvec+512
+  vmovdqu    ymm7, YMMWORD PTR [reg_p2+4*rax+4*768]  // rvec+768
+  
+  vpslld     ymm8, ymm4, 1                           // 2*rvec + rvec
+  vpaddd     ymm4, ymm7, ymm8
+  vpslld     ymm8, ymm5, 1 
+  vpaddd     ymm5, ymm7, ymm8
+  vpslld     ymm8, ymm6, 1 
+  vpaddd     ymm6, ymm7, ymm8
+  vpmulld    ymm4, ymm4, ymm15
+  vpmulld    ymm5, ymm5, ymm15
+  vpmulld    ymm6, ymm6, ymm15
+  vpmulld    ymm7, ymm7, ymm15
+  vpslld     ymm0, ymm0, 3                           // 8*x
+  vpslld     ymm1, ymm1, 3 
+  vpslld     ymm2, ymm2, 3 
+  vpslld     ymm3, ymm3, 3 
+  vpsubd     ymm0, ymm0, ymm4                        // t[i]
+  vpsubd     ymm1, ymm1, ymm5
+  vpsubd     ymm2, ymm2, ymm6
+  vpsubd     ymm3, ymm3, ymm7
+  
+  vpsrad     ymm8, ymm0, 31                          // mask1
+  vpabsd     ymm4, ymm0
+  vpsubd     ymm4, ymm14, ymm4
+  vpsrad     ymm4, ymm4, 31                          // mask2                       
+  vpand      ymm8, ymm8, ymm11                       // (mask1 & (8*PARAMETER_Q ^ -8*PARAMETER_Q)) ^ -8*PARAMETER_Q
+  vpxor      ymm8, ymm8, ymm12
+  vpand      ymm4, ymm4, ymm8
+  vpaddd     ymm0, ymm0, ymm4
+  vpabsd     ymm0, ymm0  
+  vpsrad     ymm8, ymm1, 31                          // mask1
+  vpabsd     ymm4, ymm1
+  vpsubd     ymm4, ymm14, ymm4
+  vpsrad     ymm4, ymm4, 31                          // mask2                       
+  vpand      ymm8, ymm8, ymm11                       // (mask1 & (8*PARAMETER_Q ^ -8*PARAMETER_Q)) ^ -8*PARAMETER_Q
+  vpxor      ymm8, ymm8, ymm12
+  vpand      ymm4, ymm4, ymm8
+  vpaddd     ymm1, ymm1, ymm4
+  vpabsd     ymm1, ymm1
+  vpaddd     ymm0, ymm0, ymm1
+  vpsrad     ymm8, ymm2, 31                          // mask1
+  vpabsd     ymm4, ymm2
+  vpsubd     ymm4, ymm14, ymm4
+  vpsrad     ymm4, ymm4, 31                          // mask2                       
+  vpand      ymm8, ymm8, ymm11                       // (mask1 & (8*PARAMETER_Q ^ -8*PARAMETER_Q)) ^ -8*PARAMETER_Q
+  vpxor      ymm8, ymm8, ymm12
+  vpand      ymm4, ymm4, ymm8
+  vpaddd     ymm2, ymm2, ymm4
+  vpabsd     ymm2, ymm2
+  vpaddd     ymm0, ymm0, ymm2
+  vpsrad     ymm8, ymm3, 31                          // mask1
+  vpabsd     ymm4, ymm3
+  vpsubd     ymm4, ymm14, ymm4
+  vpsrad     ymm4, ymm4, 31                          // mask2                       
+  vpand      ymm8, ymm8, ymm11                       // (mask1 & (8*PARAMETER_Q ^ -8*PARAMETER_Q)) ^ -8*PARAMETER_Q
+  vpxor      ymm8, ymm8, ymm12
+  vpand      ymm4, ymm4, ymm8
+  vpaddd     ymm3, ymm3, ymm4
+  vpabsd     ymm3, ymm3
+  vpaddd     ymm0, ymm0, ymm3                        // norm
+
+  vpsubd     ymm0, ymm13, ymm0                       // If norm < PARAMETER_Q then result = 1, else result = 0
+  vpsrld     ymm0, ymm0, 31                            
+  vpxor      ymm0, ymm0, ymm10
+
+  vpsrlq     ymm1, ymm0, 31
+  vpor       ymm1, ymm0, ymm1 
+  vpsllq     ymm2, ymm1, 2
+  vpsrldq    ymm2, ymm2, 8
+  vpor       ymm1, ymm2, ymm1 
+  vpsllq     ymm2, ymm1, 4
+  vpermq     ymm2, ymm2, 0x56
+  vpor       ymm0, ymm1, ymm2 
+  vmovq      r9, xmm0
+  
+  mov        BYTE PTR [reg_p3+rcx], r9b
+
+  add        rax, r10             // j+8 
+  inc        rcx
+  cmp        rax, r11             
+  jl         loop3
+  ret
diff --git a/crypt/liboqs/kex_rlwe_msrln16/AMD64/ntt_x64.c b/crypt/liboqs/kex_rlwe_msrln16/AMD64/ntt_x64.c
new file mode 100644
index 0000000000000000000000000000000000000000..a143f849ec8a6eee4272445aa0f9fbb88f3aba3b
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_msrln16/AMD64/ntt_x64.c
@@ -0,0 +1,51 @@
+/****************************************************************************************
+* LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library
+*
+*    Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*
+* Abstract: NTT functions and other low-level operations
+*
+*****************************************************************************************/
+
+#include "../LatticeCrypto_priv.h"
+
+void oqs_rlwe_msrln16_NTT_CT_std2rev_12289(int32_t *a, const int32_t *psi_rev, unsigned int N) {
+	oqs_rlwe_msrln16_NTT_CT_std2rev_12289_asm(a, psi_rev, N);
+}
+
+void oqs_rlwe_msrln16_INTT_GS_rev2std_12289(int32_t *a, const int32_t *omegainv_rev, const int32_t omegainv1N_rev, const int32_t Ninv, unsigned int N) {
+	oqs_rlwe_msrln16_INTT_GS_rev2std_12289_asm(a, omegainv_rev, omegainv1N_rev, Ninv, N);
+}
+
+void oqs_rlwe_msrln16_two_reduce12289(int32_t *a, unsigned int N) {
+	oqs_rlwe_msrln16_two_reduce12289_asm(a, N);
+}
+
+void oqs_rlwe_msrln16_pmul(int32_t *a, int32_t *b, int32_t *c, unsigned int N) {
+	oqs_rlwe_msrln16_pmul_asm(a, b, c, N);
+}
+
+void oqs_rlwe_msrln16_pmuladd(int32_t *a, int32_t *b, int32_t *c, int32_t *d, unsigned int N) {
+	oqs_rlwe_msrln16_pmuladd_asm(a, b, c, d, N);
+}
+
+void oqs_rlwe_msrln16_smul(int32_t *a, int32_t scalar, unsigned int N) {
+	unsigned int i;
+
+	for (i = 0; i < N; i++) {
+		a[i] = a[i] * scalar;
+	}
+}
+
+void oqs_rlwe_msrln16_correction(int32_t *a, int32_t p, unsigned int N) {
+	unsigned int i;
+	int32_t mask;
+
+	for (i = 0; i < N; i++) {
+		mask = a[i] >> (4 * sizeof(int32_t) - 1);
+		a[i] += (p & mask) - p;
+		mask = a[i] >> (4 * sizeof(int32_t) - 1);
+		a[i] += (p & mask);
+	}
+}
diff --git a/crypt/liboqs/kex_rlwe_msrln16/AMD64/ntt_x64_asm.S b/crypt/liboqs/kex_rlwe_msrln16/AMD64/ntt_x64_asm.S
new file mode 100644
index 0000000000000000000000000000000000000000..0da17f66b147281125442c6d621d487110b93bf6
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_msrln16/AMD64/ntt_x64_asm.S
@@ -0,0 +1,979 @@
+//****************************************************************************************
+// LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library
+//
+//    Copyright (c) Microsoft Corporation. All rights reserved.
+//
+//
+// Abstract: NTT functions in x64 assembly using AVX2 vector instructions for Linux 
+//
+//****************************************************************************************  
+
+.intel_syntax noprefix 
+
+// Registers that are used for parameter passing:
+#define reg_p1  rdi
+#define reg_p2  rsi
+#define reg_p3  rdx
+#define reg_p4  rcx
+#define reg_p5  r8
+
+
+.text
+//***********************************************************************
+//  Forward NTT
+//  Operation: a [reg_p1] <- NTT(a) [reg_p1], 
+//             [reg_p2] points to table and 
+//             reg_p3 contains parameter n
+//*********************************************************************** 
+.globl oqs_rlwe_msrln16_NTT_CT_std2rev_12289_asm
+oqs_rlwe_msrln16_NTT_CT_std2rev_12289_asm:
+  push       r12
+  push       r13
+  push       r14
+
+// Stages m=1 -> m=32
+  mov        r9, 1            // m = 1
+  mov        rax, reg_p3 
+  mov        r12, reg_p3      
+  shr        r12, 4           // n/16
+  vmovdqu    ymm14, MASK12x8
+  vmovdqu    ymm12, PERM0246
+  mov        r14, 16
+  mov        rcx, 11
+loop1:
+  shr        rax, 1           // k = k/2
+  dec        rcx 
+  xor        rdx, rdx         // i = 0
+loop2:
+  mov        r10, rdx
+  mov        r11, rax
+  dec        r11
+  shl        r10, cl          // j1
+  add        r11, r10         // j2
+  mov        r13, r9
+  add        r13, rdx         // m+i
+  vbroadcastss ymm11, DWORD PTR [reg_p2+4*r13]   // S
+
+loop3:
+  mov        r13, r10
+  add        r13, rax         // j+k
+  vpmovsxdq  ymm1, XMMWORD PTR [reg_p1+4*r13]    // a[j+k]
+  vpmovsxdq  ymm3, XMMWORD PTR [reg_p1+4*r13+16] // a[j+k]
+  vpmovsxdq  ymm5, XMMWORD PTR [reg_p1+4*r13+32] // a[j+k]
+  vpmovsxdq  ymm7, XMMWORD PTR [reg_p1+4*r13+48] // a[j+k]
+  
+  vpmuldq    ymm1, ymm1, ymm11                   // a[j+k].S
+  vpmuldq    ymm3, ymm3, ymm11                   
+  vpmuldq    ymm5, ymm5, ymm11                   
+  vpmuldq    ymm7, ymm7, ymm11   
+  vpmovsxdq  ymm0, XMMWORD PTR [reg_p1+4*r10]    // U = a[j]
+
+  vmovdqu    ymm13, ymm1
+  vpand      ymm1, ymm14, ymm1                   // c0
+  vpsrlq     ymm13, ymm13, 12                    // c1
+  vpslld     ymm15, ymm1, 1                      // 2*c0
+  vpsubd     ymm13, ymm1, ymm13                  // c0-c1
+  vpaddd     ymm13, ymm13, ymm15                 // V = 3*c0-c1    
+  vpsubd     ymm1, ymm0, ymm13                   // a[j+k] = U - V
+  vpaddd     ymm0, ymm0, ymm13                   // a[j] = U + V   
+  vpermd     ymm1, ymm12, ymm1 
+  vpermd     ymm0, ymm12, ymm0 
+  vpmovsxdq  ymm2, XMMWORD PTR [reg_p1+4*r10+16] // U = a[j]
+
+  vmovdqu    ymm13, ymm3
+  vpand      ymm3, ymm14, ymm3                   // c0
+  vpsrlq     ymm13, ymm13, 12                    // c1
+  vpslld     ymm15, ymm3, 1                      // 2*c0
+  vpsubd     ymm13, ymm3, ymm13                  // c0-c1
+  vpaddd     ymm13, ymm13, ymm15                 // V = 3*c0-c1    
+  vpsubd     ymm3, ymm2, ymm13                   // a[j+k] = U - V
+  vpaddd     ymm2, ymm2, ymm13                   // a[j] = U + V  
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10], xmm0
+  vmovdqu    XMMWORD PTR [reg_p1+4*r13], xmm1 
+  vpermd     ymm3, ymm12, ymm3 
+  vpermd     ymm2, ymm12, ymm2 
+  vpmovsxdq  ymm4, XMMWORD PTR [reg_p1+4*r10+32] // U = a[j]
+
+  vmovdqu    ymm13, ymm5
+  vpand      ymm5, ymm14, ymm5                   // c0
+  vpsrlq     ymm13, ymm13, 12                    // c1
+  vpslld     ymm15, ymm5, 1                      // 2*c0
+  vpsubd     ymm13, ymm5, ymm13                  // c0-c1
+  vpaddd     ymm13, ymm13, ymm15                 // V = 3*c0-c1    
+  vpsubd     ymm5, ymm4, ymm13                   // a[j+k] = U - V
+  vpaddd     ymm4, ymm4, ymm13                   // a[j] = U + V  
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10+16], xmm2
+  vmovdqu    XMMWORD PTR [reg_p1+4*r13+16], xmm3 
+  vpermd     ymm5, ymm12, ymm5 
+  vpermd     ymm4, ymm12, ymm4 
+  vpmovsxdq  ymm6, XMMWORD PTR [reg_p1+4*r10+48] // U = a[j]
+
+  vmovdqu    ymm13, ymm7
+  vpand      ymm7, ymm14, ymm7                   // c0
+  vpsrlq     ymm13, ymm13, 12                    // c1
+  vpslld     ymm15, ymm7, 1                      // 2*c0
+  vpsubd     ymm13, ymm7, ymm13                  // c0-c1
+  vpaddd     ymm13, ymm13, ymm15                 // V = 3*c0-c1    
+  vpsubd     ymm7, ymm6, ymm13                   // a[j+k] = U - V
+  vpaddd     ymm6, ymm6, ymm13                   // a[j] = U + V 
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10+32], xmm4
+  vmovdqu    XMMWORD PTR [reg_p1+4*r13+32], xmm5  
+  vpermd     ymm6, ymm12, ymm6   
+  vpermd     ymm7, ymm12, ymm7 
+  vmovdqu    XMMWORD PTR [reg_p1+4*r13+48], xmm7
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10+48], xmm6
+  
+  add        r10, r14
+  cmp        r10, r11
+  jl         loop3
+  inc        rdx
+  cmp        rdx, r9
+  jl         loop2
+  shl        r9, 1
+  cmp        r9, r12
+  jl         loop1
+   
+// Stage m=64
+  xor        rdx, rdx         // i = 0
+  xor        r10, r10         // j1 = 0
+loop4:
+  vbroadcastss ymm11, DWORD PTR [reg_p2+4*rdx+4*64] // S
+  vpmovsxdq  ymm1, XMMWORD PTR [reg_p1+4*r10+32] // a[j+k]
+  vpmovsxdq  ymm3, XMMWORD PTR [reg_p1+4*r10+48] // a[j+k]
+  vpmovsxdq  ymm0, XMMWORD PTR [reg_p1+4*r10]    // U = a[j]
+  vpmovsxdq  ymm2, XMMWORD PTR [reg_p1+4*r10+16] // U = a[j]
+  vpmuldq    ymm1, ymm1, ymm11                   // a[j+k].S
+  vpmuldq    ymm3, ymm3, ymm11                   // a[j+k].S
+
+  vmovdqu    ymm13, ymm1
+  vpand      ymm1, ymm14, ymm1                   // c0
+  vpsrlq     ymm13, ymm13, 12                    // c1
+  vpslld     ymm15, ymm1, 1                      // 2*c0
+  vpsubd     ymm13, ymm1, ymm13                  // c0-c1
+  vpaddd     ymm13, ymm13, ymm15                 // V = 3*c0-c1 
+  
+  vmovdqu    ymm10, ymm3
+  vpand      ymm3, ymm14, ymm3                   // c0
+  vpsrlq     ymm10, ymm10, 12                    // c1
+  vpslld     ymm15, ymm3, 1                      // 2*c0
+  vpsubd     ymm10, ymm3, ymm10                  // c0-c1
+  vpaddd     ymm10, ymm10, ymm15                 // V = 3*c0-c1    
+  
+  vpsubd     ymm1, ymm0, ymm13                   // a[j+k] = U - V
+  vpaddd     ymm0, ymm0, ymm13                   // a[j] = U + V    
+  vpsubd     ymm3, ymm2, ymm10                   // a[j+k] = U - V
+  vpaddd     ymm2, ymm2, ymm10                   // a[j] = U + V 
+  
+  vpermd     ymm0, ymm12, ymm0 
+  vpermd     ymm1, ymm12, ymm1 
+  vpermd     ymm2, ymm12, ymm2 
+  vpermd     ymm3, ymm12, ymm3 
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10], xmm0
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10+32], xmm1
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10+16], xmm2
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10+48], xmm3
+  
+  add        r10, r14        // j+16 
+  inc        rdx             // i+1
+  cmp        rdx, r9
+  jl         loop4
+   
+// Stage m=128
+  shl        r9, 1
+  xor        rdx, rdx         // i = 0
+  xor        r10, r10         // j1 = 0
+  mov        r13, 8 
+loop6:
+  vbroadcastss ymm2, DWORD PTR [reg_p2+4*rdx+4*128] // S
+  vpmovsxdq  ymm1, XMMWORD PTR [reg_p1+4*r10+16] // a[j+k]
+  vpmovsxdq  ymm0, XMMWORD PTR [reg_p1+4*r10]    // U = a[j]
+  vpmuldq    ymm1, ymm1, ymm2                    // a[j+k].S
+  
+  vmovdqu    ymm3, ymm0
+  vpand      ymm0, ymm14, ymm0                   // c0
+  vpsrad     ymm3, ymm3, 12                      // c1
+  vpslld     ymm4, ymm0, 1                       // 2*c0
+  vpsubd     ymm3, ymm0, ymm3                    // c0-c1
+  vpaddd     ymm0, ymm3, ymm4                    // U = 3*c0-c1    
+  
+  vmovdqu    ymm3, ymm1
+  vpand      ymm1, ymm14, ymm1                   // c0
+  vpsrlq     ymm4, ymm3, 24                      // c2
+  vpsrad     ymm3, ymm3, 12                      // xc1
+  vpand      ymm3, ymm14, ymm3                   // c1
+  vpslld     ymm5, ymm1, 3                       // 8*c0
+  vpaddd     ymm4, ymm1, ymm4                    // c0+c2
+  vpaddd     ymm4, ymm4, ymm5                    // 9*c0+c2
+  vpslld     ymm5, ymm3, 1                       // 2*c1
+  vpaddd     ymm1, ymm0, ymm3                    // U+c1
+  vpsubd     ymm0, ymm0, ymm3                    // U-c1
+  vpsubd     ymm4, ymm4, ymm5                    // 9*c0-2*c1+c2
+  vpaddd     ymm0, ymm0, ymm4                    // U+(9*c0-3*c1+c2)
+  vpsubd     ymm1, ymm1, ymm4                    // U-(9*c0-3*c1+c2)
+  vpermd     ymm0, ymm12, ymm0 
+  vpermd     ymm1, ymm12, ymm1 
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10], xmm0
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10+16], xmm1
+
+  add        r10, r13        // j+8
+  inc        rdx             // i+1
+  cmp        rdx, r9
+  jl         loop6
+
+// Stage m=256 
+  vmovdqu    ymm9, PERM02134657  
+  shl        r9, 1
+  xor        rdx, rdx         // i = 0
+  xor        r10, r10         // j1 = 0
+  mov        r14, 32
+loop7:
+  vpmovsxdq  ymm2, XMMWORD PTR [reg_p2+4*rdx+4*256]    // S = psi[m+i]->psi[m+i+3]
+  vpermq     ymm8, ymm2, 0x50   
+  vpmovsxdq  ymm0, XMMWORD PTR [reg_p1+4*r10]    // U = a[j]->a[j+3]
+  vpmovsxdq  ymm1, XMMWORD PTR [reg_p1+4*r10+16] // a[j+k]->a[j+k+3]
+  vpermq     ymm3, ymm0, 0x4e    
+  vinserti128 ymm0, ymm0, xmm1, 1                // U
+  vpblendd   ymm1, ymm1, ymm3, 15
+  vpmuldq    ymm3, ymm1, ymm8                    // a[j+k].S
+  vmovdqu    ymm4, ymm3
+  vpand      ymm3, ymm14, ymm3                   // c0
+  vpsrlq     ymm4, ymm4, 12                      // c1
+  vpslld     ymm5, ymm3, 1                       // 2*c0
+  vpsubd     ymm4, ymm3, ymm4                    // c0-c1
+  vpaddd     ymm4, ymm4, ymm5                    // V = 3*c0-c1     
+  vpsubd     ymm1, ymm0, ymm4                    // a[j+k] = U - V
+  vpaddd     ymm0, ymm0, ymm4                    // a[j] = U + V 
+  vpslldq    ymm1, ymm1, 4    
+  vpblendd   ymm0, ymm0, ymm1, 0xaa
+  vpermd     ymm0, ymm9, ymm0 
+  vmovdqu    YMMWORD PTR [reg_p1+4*r10], ymm0
+  
+  vpermq     ymm8, ymm2, 0xfa   
+  vpmovsxdq  ymm0, XMMWORD PTR [reg_p1+4*r10+32] // U = a[j]->a[j+3]
+  vpmovsxdq  ymm1, XMMWORD PTR [reg_p1+4*r10+48] // a[j+k]->a[j+k+3]
+  vpermq     ymm3, ymm0, 0x4e    
+  vinserti128 ymm0, ymm0, xmm1, 1                // U
+  vpblendd   ymm1, ymm1, ymm3, 15
+  vpmuldq    ymm3, ymm1, ymm8                    // a[j+k].S
+  vmovdqu    ymm4, ymm3
+  vpand      ymm3, ymm14, ymm3                   // c0
+  vpsrlq     ymm4, ymm4, 12                      // c1
+  vpslld     ymm5, ymm3, 1                       // 2*c0
+  vpsubd     ymm4, ymm3, ymm4                    // c0-c1
+  vpaddd     ymm4, ymm4, ymm5                    // V = 3*c0-c1     
+  vpsubd     ymm1, ymm0, ymm4                    // a[j+k] = U - V
+  vpaddd     ymm0, ymm0, ymm4                    // a[j] = U + V 
+  vpslldq    ymm1, ymm1, 4    
+  vpblendd   ymm0, ymm0, ymm1, 0xaa
+  vpermd     ymm0, ymm9, ymm0 
+  vmovdqu    YMMWORD PTR [reg_p1+4*r10+32], ymm0
+
+  vpmovsxdq  ymm2, XMMWORD PTR [reg_p2+4*rdx+4*256+16]  // S = psi[m+i]->psi[m+i+3] 
+  vpermq     ymm8, ymm2, 0x50   
+  vpmovsxdq  ymm0, XMMWORD PTR [reg_p1+4*r10+64] // U = a[j]->a[j+3]
+  vpmovsxdq  ymm1, XMMWORD PTR [reg_p1+4*r10+80] // a[j+k]->a[j+k+3]
+  vpermq     ymm3, ymm0, 0x4e    
+  vinserti128 ymm0, ymm0, xmm1, 1                // U
+  vpblendd   ymm1, ymm1, ymm3, 15
+  vpmuldq    ymm3, ymm1, ymm8                    // a[j+k].S
+  vmovdqu    ymm4, ymm3
+  vpand      ymm3, ymm14, ymm3                   // c0
+  vpsrlq     ymm4, ymm4, 12                      // c1
+  vpslld     ymm5, ymm3, 1                       // 2*c0
+  vpsubd     ymm4, ymm3, ymm4                    // c0-c1
+  vpaddd     ymm4, ymm4, ymm5                    // V = 3*c0-c1     
+  vpsubd     ymm1, ymm0, ymm4                    // a[j+k] = U - V
+  vpaddd     ymm0, ymm0, ymm4                    // a[j] = U + V 
+  vpslldq    ymm1, ymm1, 4    
+  vpblendd   ymm0, ymm0, ymm1, 0xaa
+  vpermd     ymm0, ymm9, ymm0 
+  vmovdqu    YMMWORD PTR [reg_p1+4*r10+64], ymm0
+          
+  vpermq     ymm8, ymm2, 0xfa   
+  vpmovsxdq  ymm0, XMMWORD PTR [reg_p1+4*r10+96]  // U = a[j]->a[j+3]
+  vpmovsxdq  ymm1, XMMWORD PTR [reg_p1+4*r10+112] // a[j+k]->a[j+k+3]
+  vpermq     ymm3, ymm0, 0x4e    
+  vinserti128 ymm0, ymm0, xmm1, 1                // U
+  vpblendd   ymm1, ymm1, ymm3, 15
+  vpmuldq    ymm3, ymm1, ymm8                    // a[j+k].S
+  vmovdqu    ymm4, ymm3
+  vpand      ymm3, ymm14, ymm3                   // c0
+  vpsrlq     ymm4, ymm4, 12                      // c1
+  vpslld     ymm5, ymm3, 1                       // 2*c0
+  vpsubd     ymm4, ymm3, ymm4                    // c0-c1
+  vpaddd     ymm4, ymm4, ymm5                    // V = 3*c0-c1     
+  vpsubd     ymm1, ymm0, ymm4                    // a[j+k] = U - V
+  vpaddd     ymm0, ymm0, ymm4                    // a[j] = U + V 
+  vpslldq    ymm1, ymm1, 4    
+  vpblendd   ymm0, ymm0, ymm1, 0xaa
+  vpermd     ymm0, ymm9, ymm0 
+  vmovdqu    YMMWORD PTR [reg_p1+4*r10+96], ymm0
+         
+  add        r10, r14        // j+32
+  add        rdx, r13        // i+8
+  cmp        rdx, r9
+  jl         loop7
+
+// Stage m=512
+  vmovdqu    ymm9, PERM00224466
+  shl        r9, 1            // m = n/2 
+  xor        rdx, rdx         // i = 0
+  xor        r10, r10         // j1 = 0
+  mov        r14, 4
+loop8:
+  vpmovsxdq  ymm2, XMMWORD PTR [reg_p2+4*rdx+4*512] // S
+  vmovdqu    ymm0, YMMWORD PTR [reg_p1+4*r10]    // U = a[j]
+  vmovdqu    ymm1, YMMWORD PTR [reg_p1+4*r10+4]  // a[j+k]
+  vpmuldq    ymm3, ymm1, ymm2                    // a[j+k].S
+  vmovdqu    ymm4, ymm3
+  vpand      ymm3, ymm14, ymm3                   // c0
+  vpsrlq     ymm4, ymm4, 12                      // c1
+  vpslld     ymm5, ymm3, 1                       // 2*c0
+  vpsubd     ymm4, ymm3, ymm4                    // c0-c1
+  vpaddd     ymm4, ymm4, ymm5                    // V = 3*c0-c1     
+  vpsubd     ymm1, ymm0, ymm4                    // a[j+k] = U - V
+  vpaddd     ymm0, ymm0, ymm4                    // a[j] = U + V 
+  vpermd     ymm1, ymm9, ymm1 
+  vpblendd   ymm0, ymm0, ymm1, 0xaa
+  vmovdqu    YMMWORD PTR [reg_p1+4*r10], ymm0
+  
+  add        r10, r13        // j+8
+  add        rdx, r14        // i+4
+  cmp        rdx, r9
+  jl         loop8
+
+  pop        r14
+  pop        r13
+  pop        r12
+  ret
+
+
+//***********************************************************************
+//  Inverse NTT
+//  Operation: a [reg_p1] <- INTT(a) [reg_p1], 
+//             [reg_p2] points to table
+//             reg_p3 and reg_p4 point to constants for scaling and
+//             reg_p5 contains parameter n
+//*********************************************************************** 
+.globl oqs_rlwe_msrln16_INTT_GS_rev2std_12289_asm
+oqs_rlwe_msrln16_INTT_GS_rev2std_12289_asm:
+  push       r12
+  push       r13
+  push       r14
+  push       r15
+  push       rbx
+
+// Stage m=1024
+  vmovdqu    ymm9, PERM00224466
+  vmovdqu    ymm14, MASK12x8  
+  mov        r12, reg_p5           
+  shr        r12, 1          // n/2 = 512
+  xor        r15, r15        // i = 0
+  xor        r10, r10        // j1 = 0
+  mov        r13, 8
+  mov        r14, 4
+loop1b:
+  vmovdqu    ymm1, YMMWORD PTR [reg_p1+4*r10+4]       // V = a[j+k]    
+  vmovdqu    ymm0, YMMWORD PTR [reg_p1+4*r10]         // U = a[j]
+  vpmovsxdq  ymm2, XMMWORD PTR [reg_p2+4*r15+4*512]   // S
+  vpsubd     ymm3, ymm0, ymm1                         // U - V
+  vpaddd     ymm0, ymm0, ymm1                         // U + V 
+  vpmuldq    ymm3, ymm3, ymm2                         // (U - V).S
+  vmovdqu    ymm4, ymm3
+  vpand      ymm3, ymm14, ymm3                        // c0
+  vpsrlq     ymm4, ymm4, 12                           // c1
+  vpslld     ymm5, ymm3, 1                            // 2*c0
+  vpsubd     ymm4, ymm3, ymm4                         // c0-c1
+  vpaddd     ymm1, ymm4, ymm5                         // 3*c0-c1 
+  vpermd     ymm1, ymm9, ymm1 
+  vpblendd   ymm0, ymm0, ymm1, 0xaa
+  vmovdqu    YMMWORD PTR [reg_p1+4*r10], ymm0
+
+  add        r10, r13        // j+8
+  add        r15, r14        // i+4
+  cmp        r15, r12
+  jl         loop1b
+  
+// Stage m=512 
+  vmovdqu    ymm9, PERM02134657
+  vmovdqu    ymm13, PERM0145
+  vmovdqu    ymm15, PERM2367   
+  shr        r12, 1          // n/4 = 256
+  xor        r15, r15        // i = 0
+  xor        r10, r10        // j1 = 0
+  mov        r14, 32
+loop2b:
+  vpmovsxdq  ymm2, XMMWORD PTR [reg_p2+4*r15+4*256]   // S = psi[m+i]->psi[m+i+3]
+  vpermq     ymm8, ymm2, 0x50   
+  vmovdqu    ymm0, YMMWORD PTR [reg_p1+4*r10]         // U = a[j]->a[j+7]
+  vpermd     ymm1, ymm15, ymm0 
+  vpermd     ymm0, ymm13, ymm0  
+  vpsubd     ymm3, ymm0, ymm1                         // U - V
+  vpaddd     ymm0, ymm0, ymm1                         // U + V 
+  vpmuldq    ymm3, ymm3, ymm8                         // (U - V).S
+  vmovdqu    ymm4, ymm3
+  vpand      ymm3, ymm14, ymm3                        // c0
+  vpsrlq     ymm4, ymm4, 12                           // c1
+  vpslld     ymm5, ymm3, 1                            // 2*c0
+  vpsubd     ymm4, ymm3, ymm4                         // c0-c1
+  vpaddd     ymm1, ymm4, ymm5                         // 3*c0-c1
+  vpslldq    ymm1, ymm1, 4    
+  vpblendd   ymm0, ymm0, ymm1, 0xaa
+  vpermd     ymm0, ymm9, ymm0 
+  vmovdqu    YMMWORD PTR [reg_p1+4*r10], ymm0
+  
+  vpermq     ymm8, ymm2, 0xfa   
+  vmovdqu    ymm0, YMMWORD PTR [reg_p1+4*r10+32]      // U = a[j]->a[j+7]
+  vpermd     ymm1, ymm15, ymm0 
+  vpermd     ymm0, ymm13, ymm0  
+  vpsubd     ymm3, ymm0, ymm1                         // U - V
+  vpaddd     ymm0, ymm0, ymm1                         // U + V 
+  vpmuldq    ymm3, ymm3, ymm8                         // (U - V).S
+  vmovdqu    ymm4, ymm3
+  vpand      ymm3, ymm14, ymm3                        // c0
+  vpsrlq     ymm4, ymm4, 12                           // c1
+  vpslld     ymm5, ymm3, 1                            // 2*c0
+  vpsubd     ymm4, ymm3, ymm4                         // c0-c1
+  vpaddd     ymm1, ymm4, ymm5                         // 3*c0-c1
+  vpslldq    ymm1, ymm1, 4    
+  vpblendd   ymm0, ymm0, ymm1, 0xaa
+  vpermd     ymm0, ymm9, ymm0
+  vmovdqu    YMMWORD PTR [reg_p1+4*r10+32], ymm0
+
+  vpmovsxdq  ymm2, XMMWORD PTR [reg_p2+4*r15+4*256+16]// S = psi[m+i]->psi[m+i+3] 
+  vpermq     ymm8, ymm2, 0x50   
+  vmovdqu    ymm0, YMMWORD PTR [reg_p1+4*r10+64]      // U = a[j]->a[j+7]
+  vpermd     ymm1, ymm15, ymm0 
+  vpermd     ymm0, ymm13, ymm0  
+  vpsubd     ymm3, ymm0, ymm1                         // U - V
+  vpaddd     ymm0, ymm0, ymm1                         // U + V 
+  vpmuldq    ymm3, ymm3, ymm8                         // (U - V).S
+  vmovdqu    ymm4, ymm3
+  vpand      ymm3, ymm14, ymm3                        // c0
+  vpsrlq     ymm4, ymm4, 12                           // c1
+  vpslld     ymm5, ymm3, 1                            // 2*c0
+  vpsubd     ymm4, ymm3, ymm4                         // c0-c1
+  vpaddd     ymm1, ymm4, ymm5                         // 3*c0-c1
+  vpslldq    ymm1, ymm1, 4    
+  vpblendd   ymm0, ymm0, ymm1, 0xaa
+  vpermd     ymm0, ymm9, ymm0
+  vmovdqu    YMMWORD PTR [reg_p1+4*r10+64], ymm0
+         
+  vpermq     ymm8, ymm2, 0xfa   
+  vmovdqu    ymm0, YMMWORD PTR [reg_p1+4*r10+96]      // U = a[j]->a[j+7]
+  vpermd     ymm1, ymm15, ymm0 
+  vpermd     ymm0, ymm13, ymm0  
+  vpsubd     ymm3, ymm0, ymm1                         // U - V
+  vpaddd     ymm0, ymm0, ymm1                         // U + V 
+  vpmuldq    ymm3, ymm3, ymm8                         // (U - V).S
+  vmovdqu    ymm4, ymm3
+  vpand      ymm3, ymm14, ymm3                        // c0
+  vpsrlq     ymm4, ymm4, 12                           // c1
+  vpslld     ymm5, ymm3, 1                            // 2*c0
+  vpsubd     ymm4, ymm3, ymm4                         // c0-c1
+  vpaddd     ymm1, ymm4, ymm5                         // 3*c0-c1
+  vpslldq    ymm1, ymm1, 4    
+  vpblendd   ymm0, ymm0, ymm1, 0xaa
+  vpermd     ymm0, ymm9, ymm0
+  vmovdqu    YMMWORD PTR [reg_p1+4*r10+96], ymm0
+         
+  add        r10, r14        // j+32
+  add        r15, r13        // i+8
+  cmp        r15, r12
+  jl         loop2b
+     
+// Stage m=256 
+  vmovdqu    ymm12, PERM0246   
+  shr        r12, 1          // n/8 = 128
+  xor        r15, r15        // i = 0
+  xor        r10, r10        // j1 = 0
+loop3b:
+  vbroadcastss ymm2, DWORD PTR [reg_p2+4*r15+4*128]   // S
+  vpmovsxdq  ymm1, XMMWORD PTR [reg_p1+4*r10+16]      // V = a[j+k]
+  vpmovsxdq  ymm0, XMMWORD PTR [reg_p1+4*r10]         // U = a[j]
+  vpsubd     ymm3, ymm0, ymm1                         // U - V
+  vpaddd     ymm0, ymm0, ymm1                         // U + V 
+  vpmuldq    ymm3, ymm3, ymm2                         // (U - V).S
+  vmovdqu    ymm4, ymm3
+  vpand      ymm3, ymm14, ymm3                        // c0
+  vpsrlq     ymm4, ymm4, 12                           // c1
+  vpslld     ymm5, ymm3, 1                            // 2*c0
+  vpsubd     ymm4, ymm3, ymm4                         // c0-c1
+  vpaddd     ymm1, ymm4, ymm5                         // 3*c0-c1 
+  vpermd     ymm0, ymm12, ymm0 
+  vpermd     ymm1, ymm12, ymm1 
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10], xmm0
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10+16], xmm1
+  
+  add        r10, r13        // j+8
+  inc        r15             // i+1
+  cmp        r15, r12
+  jl         loop3b
+     
+// Stage m=128
+  shr        r12, 1          // n/16 = 64
+  xor        r15, r15        // i = 0
+  xor        r10, r10        // j1 = 0
+  mov        r14, 16 
+loop4b:
+  vbroadcastss ymm11, DWORD PTR [reg_p2+4*r15+4*64]   // S
+  vpmovsxdq  ymm13, XMMWORD PTR [reg_p1+4*r10+32]     // V = a[j+k]
+  vpmovsxdq  ymm15, XMMWORD PTR [reg_p1+4*r10+48]     // V = a[j+k]
+  vpmovsxdq  ymm0, XMMWORD PTR [reg_p1+4*r10]         // U = a[j]
+  vpmovsxdq  ymm2, XMMWORD PTR [reg_p1+4*r10+16]      // U = a[j]
+  vpsubd     ymm1, ymm0, ymm13                        // U - V
+  vpaddd     ymm0, ymm0, ymm13                        // U + V 
+  vpsubd     ymm3, ymm2, ymm15                        // U - V
+  vpaddd     ymm2, ymm2, ymm15                        // U + V   
+  vpmuldq    ymm1, ymm1, ymm11                        // (U - V).S
+  vpmuldq    ymm3, ymm3, ymm11                        // (U - V).S
+  
+  vmovdqu    ymm13, ymm1
+  vpand      ymm1, ymm14, ymm1                        // c0
+  vpsrlq     ymm13, ymm13, 12                         // c1
+  vpslld     ymm15, ymm1, 1                           // 2*c0
+  vpsubd     ymm13, ymm1, ymm13                       // c0-c1
+  vpaddd     ymm1, ymm13, ymm15                       // 3*c0-c1    
+
+  vmovdqu    ymm13, ymm3
+  vpand      ymm3, ymm14, ymm3                        // c0
+  vpsrlq     ymm13, ymm13, 12                         // c1
+  vpslld     ymm15, ymm3, 1                           // 2*c0
+  vpsubd     ymm13, ymm3, ymm13                       // c0-c1
+  vpaddd     ymm3, ymm13, ymm15                       // 3*c0-c1 
+  
+  vpermd     ymm0, ymm12, ymm0 
+  vpermd     ymm1, ymm12, ymm1 
+  vpermd     ymm2, ymm12, ymm2 
+  vpermd     ymm3, ymm12, ymm3 
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10], xmm0
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10+32], xmm1
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10+16], xmm2
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10+48], xmm3
+  
+  add        r10, r14        // j+16 
+  inc        r15             // i+1
+  cmp        r15, r12
+  jl         loop4b
+  
+// Stages m=64 -> m=4  
+  mov        r9, 5            // 5 iterations
+  mov        rax, 8 
+loop5b:
+  shl        rax, 1          // k = 2*k
+  shr        r12, 1          // m/2
+  xor        r15, r15        // i = 0
+  xor        r8, r8        
+loop6b:
+  mov        r10, r8         // Load j1
+  mov        r11, rax
+  dec        r11
+  add        r11, r10        // j2
+  mov        r13, r12
+  add        r13, r15        // m/2+i
+  vbroadcastss ymm9, DWORD PTR [reg_p2+4*r13]         // S
+  mov        rbx, 4
+
+loop7b:
+  mov        r13, r10
+  add        r13, rax         // j+k
+  vpmovsxdq  ymm10, XMMWORD PTR [reg_p1+4*r13]        // V = a[j+k]
+  vpmovsxdq  ymm11, XMMWORD PTR [reg_p1+4*r13+16]     // V = a[j+k]
+  vpmovsxdq  ymm13, XMMWORD PTR [reg_p1+4*r13+32]     // V = a[j+k]
+  vpmovsxdq  ymm15, XMMWORD PTR [reg_p1+4*r13+48]     // V = a[j+k]
+  vpmovsxdq  ymm0, XMMWORD PTR [reg_p1+4*r10]         // U = a[j]
+  vpmovsxdq  ymm2, XMMWORD PTR [reg_p1+4*r10+16]      // U = a[j]
+  vpmovsxdq  ymm4, XMMWORD PTR [reg_p1+4*r10+32]      // U = a[j]
+  vpmovsxdq  ymm6, XMMWORD PTR [reg_p1+4*r10+48]      // U = a[j]
+  
+  vpsubd     ymm1, ymm0, ymm10                        // U - V
+  vpaddd     ymm0, ymm0, ymm10                        // U + V 
+  vpsubd     ymm3, ymm2, ymm11                        // U - V
+  vpaddd     ymm2, ymm2, ymm11                        // U + V 
+  vpsubd     ymm5, ymm4, ymm13                        // U - V
+  vpaddd     ymm4, ymm4, ymm13                        // U + V 
+  vpsubd     ymm7, ymm6, ymm15                        // U - V
+  vpaddd     ymm6, ymm6, ymm15                        // U + V 
+
+  vpmuldq    ymm1, ymm1, ymm9                         // (U - V).S
+  vpmuldq    ymm3, ymm3, ymm9                   
+  vpmuldq    ymm5, ymm5, ymm9                   
+  vpmuldq    ymm7, ymm7, ymm9   
+  
+  vmovdqu    ymm13, ymm1
+  vpand      ymm1, ymm14, ymm1                        // c0
+  vpsrlq     ymm13, ymm13, 12                         // c1
+  vpslld     ymm15, ymm1, 1                           // 2*c0
+  vpsubd     ymm13, ymm1, ymm13                       // c0-c1
+  vpaddd     ymm1, ymm13, ymm15                       // 3*c0-c1 
+
+  cmp        r9, rbx 
+  jne        skip1
+  vmovdqu    ymm13, ymm0
+  vpand      ymm0, ymm14, ymm0                        // c0
+  vpsrad     ymm13, ymm13, 12                         // c1       
+  vpslld     ymm15, ymm0, 1                           // 2*c0
+  vpsubd     ymm13, ymm0, ymm13                       // c0-c1
+  vpaddd     ymm0, ymm13, ymm15                       // 3*c0-c1
+
+  vmovdqu    ymm13, ymm1
+  vpand      ymm1, ymm14, ymm1                        // c0
+  vpsrad     ymm13, ymm13, 12                         // c1
+  vpslld     ymm15, ymm1, 1                           // 2*c0
+  vpsubd     ymm13, ymm1, ymm13                       // c0-c1
+  vpaddd     ymm1, ymm13, ymm15                       // 3*c0-c1
+skip1:
+  vpermd     ymm1, ymm12, ymm1 
+  vpermd     ymm0, ymm12, ymm0 
+
+  vmovdqu    ymm13, ymm3
+  vpand      ymm3, ymm14, ymm3                        // c0
+  vpsrlq     ymm13, ymm13, 12                         // c1
+  vpslld     ymm15, ymm3, 1                           // 2*c0
+  vpsubd     ymm13, ymm3, ymm13                       // c0-c1
+  vpaddd     ymm3, ymm13, ymm15                       // 3*c0-c1 
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10], xmm0
+  vmovdqu    XMMWORD PTR [reg_p1+4*r13], xmm1 
+
+  cmp        r9, rbx 
+  jne        skip2
+  vmovdqu    ymm13, ymm2
+  vpand      ymm2, ymm14, ymm2                        // c0
+  vpsrad     ymm13, ymm13, 12                         // c1       
+  vpslld     ymm15, ymm2, 1                           // 2*c0
+  vpsubd     ymm13, ymm2, ymm13                       // c0-c1
+  vpaddd     ymm2, ymm13, ymm15                       // 3*c0-c1
+
+  vmovdqu    ymm13, ymm3
+  vpand      ymm3, ymm14, ymm3                        // c0
+  vpsrad     ymm13, ymm13, 12                         // c1
+  vpslld     ymm15, ymm3, 1                           // 2*c0
+  vpsubd     ymm13, ymm3, ymm13                       // c0-c1
+  vpaddd     ymm3, ymm13, ymm15                       // 3*c0-c1
+skip2:
+  vpermd     ymm3, ymm12, ymm3 
+  vpermd     ymm2, ymm12, ymm2 
+
+  vmovdqu    ymm13, ymm5
+  vpand      ymm5, ymm14, ymm5                        // c0
+  vpsrlq     ymm13, ymm13, 12                         // c1
+  vpslld     ymm15, ymm5, 1                           // 2*c0
+  vpsubd     ymm13, ymm5, ymm13                       // c0-c1
+  vpaddd     ymm5, ymm13, ymm15                       // 3*c0-c1 
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10+16], xmm2
+  vmovdqu    XMMWORD PTR [reg_p1+4*r13+16], xmm3 
+
+  cmp        r9, rbx 
+  jne        skip3
+  vmovdqu    ymm13, ymm4
+  vpand      ymm4, ymm14, ymm4                        // c0
+  vpsrad     ymm13, ymm13, 12                         // c1       
+  vpslld     ymm15, ymm4, 1                           // 2*c0
+  vpsubd     ymm13, ymm4, ymm13                       // c0-c1
+  vpaddd     ymm4, ymm13, ymm15                       // 3*c0-c1
+
+  vmovdqu    ymm13, ymm5
+  vpand      ymm5, ymm14, ymm5                        // c0
+  vpsrad     ymm13, ymm13, 12                         // c1
+  vpslld     ymm15, ymm5, 1                           // 2*c0
+  vpsubd     ymm13, ymm5, ymm13                       // c0-c1
+  vpaddd     ymm5, ymm13, ymm15                       // 3*c0-c1
+skip3:
+  vpermd     ymm5, ymm12, ymm5 
+  vpermd     ymm4, ymm12, ymm4 
+
+  vmovdqu    ymm13, ymm7
+  vpand      ymm7, ymm14, ymm7                        // c0
+  vpsrlq     ymm13, ymm13, 12                         // c1
+  vpslld     ymm15, ymm7, 1                           // 2*c0
+  vpsubd     ymm13, ymm7, ymm13                       // c0-c1
+  vpaddd     ymm7, ymm13, ymm15                       // 3*c0-c1 
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10+32], xmm4
+  vmovdqu    XMMWORD PTR [reg_p1+4*r13+32], xmm5  
+
+  cmp        r9, rbx 
+  jne        skip4
+  vmovdqu    ymm13, ymm6
+  vpand      ymm6, ymm14, ymm6                        // c0
+  vpsrad     ymm13, ymm13, 12                         // c1       
+  vpslld     ymm15, ymm6, 1                           // 2*c0
+  vpsubd     ymm13, ymm6, ymm13                       // c0-c1
+  vpaddd     ymm6, ymm13, ymm15                       // 3*c0-c1
+
+  vmovdqu    ymm13, ymm7
+  vpand      ymm7, ymm14, ymm7                        // c0
+  vpsrad     ymm13, ymm13, 12                         // c1
+  vpslld     ymm15, ymm7, 1                           // 2*c0
+  vpsubd     ymm13, ymm7, ymm13                       // c0-c1
+  vpaddd     ymm7, ymm13, ymm15                       // 3*c0-c1
+skip4:
+  vpermd     ymm7, ymm12, ymm7 
+  vpermd     ymm6, ymm12, ymm6   
+  vmovdqu    XMMWORD PTR [reg_p1+4*r13+48], xmm7
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10+48], xmm6
+  
+  add        r10, r14
+  cmp        r10, r11
+  jl         loop7b
+  mov        rbx, rax
+  shl        rbx, 1          // 2*k
+  add        r8, rbx         // j1+2*k
+  inc        r15
+  cmp        r15, r12
+  jl         loop6b
+  dec        r9
+  jnz        loop5b
+       
+// Scaling step
+  shl        rax, 1          // k = 2*k = 512
+  xor        r10, r10        // j = 0
+  mov        r14, 4 
+  movq       xmm0, reg_p3
+  vbroadcastsd ymm10, xmm0                            // S = omegainv1N_rev
+  movq       xmm0, reg_p4
+  vbroadcastsd ymm11, xmm0                            // T = Ninv
+loop8b:
+  vpmovsxdq  ymm13, XMMWORD PTR [reg_p1+4*r10+4*512]  // V = a[j+k]
+  vpmovsxdq  ymm0, XMMWORD PTR [reg_p1+4*r10]         // U = a[j]
+  vpsubd     ymm1, ymm0, ymm13                        // U - V
+  vpaddd     ymm0, ymm0, ymm13                        // U + V  
+  vpmuldq    ymm1, ymm1, ymm10                        // (U - V).S
+  vpmuldq    ymm0, ymm0, ymm11                        // (U + V).T
+  
+  vmovdqu    ymm13, ymm0
+  vpand      ymm0, ymm14, ymm0                        // c0
+  vpsrlq     ymm13, ymm13, 12                         // c1
+  vpslld     ymm15, ymm0, 1                           // 2*c0
+  vpsubd     ymm13, ymm0, ymm13                       // c0-c1
+  vpaddd     ymm0, ymm13, ymm15                       // 3*c0-c1    
+
+  vmovdqu    ymm13, ymm1
+  vpand      ymm1, ymm14, ymm1                        // c0
+  vpsrlq     ymm13, ymm13, 12                         // c1
+  vpslld     ymm15, ymm1, 1                           // 2*c0
+  vpsubd     ymm13, ymm1, ymm13                       // c0-c1
+  vpaddd     ymm1, ymm13, ymm15                       // 3*c0-c1 
+  
+  vpermd     ymm0, ymm12, ymm0 
+  vpermd     ymm1, ymm12, ymm1 
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10], xmm0
+  vmovdqu    XMMWORD PTR [reg_p1+4*r10+4*512], xmm1
+  
+  add        r10, r14        // j+4 
+  cmp        r10, rax
+  jl         loop8b  
+loop9b:
+  pop        rbx
+  pop        r15
+  pop        r14
+  pop        r13
+  pop        r12
+  ret
+
+
+//***********************************************************************
+//  Component-wise multiplication and addition
+//  Operation: d [reg_p4] <- a [reg_p1] * b [reg_p2] + c [reg_p3]
+//             reg_p5 contains parameter n
+//*********************************************************************** 
+.globl oqs_rlwe_msrln16_pmuladd_asm
+oqs_rlwe_msrln16_pmuladd_asm:
+  vmovdqu    ymm5, PERM0246
+  vmovdqu    ymm6, MASK12x8 
+  xor        rax, rax
+  movq       r11, 4
+lazo2:
+  vpmovsxdq  ymm0, XMMWORD PTR [reg_p1+4*rax]   // a
+  vpmovsxdq  ymm1, XMMWORD PTR [reg_p2+4*rax]   // b
+  vpmovsxdq  ymm2, XMMWORD PTR [reg_p3+4*rax]   // c
+  vpmuldq    ymm0, ymm1, ymm0 
+  vpaddq     ymm0, ymm2, ymm0                    
+
+  vmovdqu    ymm3, ymm0
+  vpand      ymm0, ymm6, ymm0                   // c0
+  vpsrlq     ymm3, ymm3, 12                     // c1
+  vpslld     ymm4, ymm0, 1                      // 2*c0
+  vpsubd     ymm3, ymm0, ymm3                   // c0-c1
+  vpaddd     ymm0, ymm3, ymm4                   // 3*c0-c1 
+
+  vmovdqu    ymm3, ymm0
+  vpand      ymm0, ymm6, ymm0                   // c0
+  vpsrad     ymm3, ymm3, 12                     // c1       
+  vpslld     ymm4, ymm0, 1                      // 2*c0
+  vpsubd     ymm3, ymm0, ymm3                   // c0-c1
+  vpaddd     ymm0, ymm3, ymm4                   // 3*c0-c1
+
+  vpermd     ymm0, ymm5, ymm0 
+  vmovdqu    XMMWORD PTR [reg_p4+4*rax], xmm0
+
+  add        rax, r11                           // j+4
+  cmp        rax, reg_p5
+  jl         lazo2
+  ret
+
+
+//***********************************************************************
+//  Component-wise multiplication
+//  Operation: c [reg_p3] <- a [reg_p1] * b [reg_p2]
+//             reg_p4 contains parameter n
+//*********************************************************************** 
+.globl oqs_rlwe_msrln16_pmul_asm
+oqs_rlwe_msrln16_pmul_asm: 
+  vmovdqu    ymm5, PERM0246
+  vmovdqu    ymm6, MASK12x8 
+  xor        rax, rax
+  movq       r11, 4
+lazo3:
+  vpmovsxdq  ymm0, XMMWORD PTR [reg_p1+4*rax]   // a
+  vpmovsxdq  ymm1, XMMWORD PTR [reg_p2+4*rax]   // b
+  vpmuldq    ymm0, ymm1, ymm0                    
+
+  vmovdqu    ymm3, ymm0
+  vpand      ymm0, ymm6, ymm0                   // c0
+  vpsrlq     ymm3, ymm3, 12                     // c1
+  vpslld     ymm4, ymm0, 1                      // 2*c0
+  vpsubd     ymm3, ymm0, ymm3                   // c0-c1
+  vpaddd     ymm0, ymm3, ymm4                   // 3*c0-c1 
+
+  vmovdqu    ymm3, ymm0
+  vpand      ymm0, ymm6, ymm0                   // c0
+  vpsrad     ymm3, ymm3, 12                     // c1       
+  vpslld     ymm4, ymm0, 1                      // 2*c0
+  vpsubd     ymm3, ymm0, ymm3                   // c0-c1
+  vpaddd     ymm0, ymm3, ymm4                   // 3*c0-c1
+
+  vpermd     ymm0, ymm5, ymm0 
+  vmovdqu    XMMWORD PTR [reg_p3+4*rax], xmm0
+
+  add        rax, r11                           // j+4
+  cmp        rax, reg_p4
+  jl         lazo3
+  ret
+
+
+//***********************************************************************
+//  Two consecutive reductions
+//  Operation: c [reg_p1] <- a [reg_p1]
+//             reg_p2 contains parameter n
+//*********************************************************************** 
+.globl oqs_rlwe_msrln16_two_reduce12289_asm
+oqs_rlwe_msrln16_two_reduce12289_asm: 
+  vmovdqu    ymm6, MASK12x8 
+  vmovdqu    ymm7, PRIME8x
+  xor        rax, rax
+  movq       r11, 8
+lazo4:
+  vmovdqu    ymm0, YMMWORD PTR [reg_p1+4*rax]   // a
+
+  vmovdqu    ymm3, ymm0
+  vpand      ymm0, ymm6, ymm0                   // c0
+  vpsrad     ymm3, ymm3, 12                     // c1
+  vpslld     ymm4, ymm0, 1                      // 2*c0
+  vpsubd     ymm3, ymm0, ymm3                   // c0-c1
+  vpaddd     ymm0, ymm3, ymm4                   // 3*c0-c1 
+
+  vmovdqu    ymm3, ymm0
+  vpand      ymm0, ymm6, ymm0                   // c0
+  vpsrad     ymm3, ymm3, 12                     // c1       
+  vpslld     ymm4, ymm0, 1                      // 2*c0
+  vpsubd     ymm3, ymm0, ymm3                   // c0-c1
+  vpaddd     ymm0, ymm3, ymm4                   // 3*c0-c1
+
+  vpsrad     ymm2, ymm0, 31
+  vpand      ymm2, ymm7, ymm2
+  vpaddd     ymm2, ymm0, ymm2
+  vpsubd     ymm0, ymm2, ymm7
+
+  vpsrad     ymm2, ymm0, 31
+  vpand      ymm2, ymm7, ymm2
+  vpaddd     ymm0, ymm0, ymm2
+
+  vmovdqu    YMMWORD PTR [reg_p1+4*rax], ymm0
+
+  add        rax, r11                           // j+8
+  cmp        rax, reg_p2
+  jl         lazo4
+  ret
+
+
+//***********************************************************************
+//  Encoding
+//  Operation: c [reg_p2] <- a [reg_p1]
+//*********************************************************************** 
+.globl oqs_rlwe_msrln16_encode_asm
+oqs_rlwe_msrln16_encode_asm: 
+  vmovdqu    ymm6, MASK32 
+  vmovdqu    ymm7, MASK42
+  mov        r9, 1024
+  xor        rax, rax
+  xor        r10, r10
+  mov        r11, 14
+  mov        rcx, 8
+lazo5:
+  vmovdqu    ymm0, YMMWORD PTR [reg_p1+4*rax]   // a
+
+  vpsrlq     ymm1, ymm0, 18  
+  vpsllq     ymm2, ymm0, 4
+  vpand      ymm0, ymm0, ymm6
+  vpsrldq    ymm2, ymm2, 5   
+  vpsrlq     ymm3, ymm1, 4
+  vpand      ymm1, ymm1, ymm6
+  vpand      ymm2, ymm2, ymm7
+  vpsrldq    ymm3, ymm3, 4 
+  vpor       ymm0, ymm0, ymm1
+  vpor       ymm0, ymm0, ymm2 
+  vpor       ymm0, ymm0, ymm3 
+  vpermq     ymm1, ymm0, 0x0e   
+
+  vmovdqu    XMMWORD PTR [reg_p2+r10], xmm0
+  vmovdqu    XMMWORD PTR [reg_p2+r10+7], xmm1
+
+  add        r10, r11
+  add        rax, rcx        // j+8
+  cmp        rax, r9
+  jl         lazo5
+  ret
+
+
+//***********************************************************************
+//  Decoding
+//  Operation: c [reg_p2] <- a [reg_p1]
+//*********************************************************************** 
+.globl oqs_rlwe_msrln16_decode_asm
+oqs_rlwe_msrln16_decode_asm: 
+  vmovdqu    ymm6, MASK14_1 
+  vmovdqu    ymm7, MASK14_2
+  vmovdqu    ymm8, MASK14_3
+  vmovdqu    ymm9, MASK14_4
+  mov        r9, 1024
+  xor        rax, rax
+  xor        r10, r10
+  mov        r11, 14
+  mov        rcx, 8
+lazo6:
+  vmovdqu    xmm0, XMMWORD PTR [reg_p1+r10]
+  vmovdqu    xmm1, XMMWORD PTR [reg_p1+r10+7]
+  vinserti128 ymm0, ymm0, xmm1, 1               
+
+  vpand      ymm1, ymm0, ymm6
+  vpand      ymm2, ymm0, ymm7
+  vpand      ymm3, ymm0, ymm8
+  vpand      ymm4, ymm0, ymm9
+   
+  vpsllq     ymm2, ymm2, 18 
+  vpsllq     ymm3, ymm3, 4
+  vpslldq    ymm3, ymm3, 4 
+  vpsrlq     ymm4, ymm4, 2
+  vpslldq    ymm4, ymm4, 7
+
+  vpor       ymm1, ymm1, ymm2 
+  vpor       ymm1, ymm1, ymm3 
+  vpor       ymm1, ymm1, ymm4 
+  
+  vmovdqu    YMMWORD PTR [reg_p2+4*rax], ymm1   
+
+  add        r10, r11
+  add        rax, rcx            // j+8
+  cmp        rax, r9
+  jl         lazo6
+  ret
diff --git a/crypt/liboqs/kex_rlwe_msrln16/LICENSE.txt b/crypt/liboqs/kex_rlwe_msrln16/LICENSE.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4340e43be7ba94b0f64000b9410ebccbc5f25f11
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_msrln16/LICENSE.txt
@@ -0,0 +1,25 @@
+LatticeCrypto
+
+Copyright (c) Microsoft Corporation
+All rights reserved. 
+
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and 
+associated documentation files (the ""Software""), to deal in the Software without restriction,
+including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial 
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 
+LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+The library uses the public domain implementation of SHAKE128 by the Keccak team; see the header
+of shake128.c for details.
diff --git a/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto.h b/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto.h
new file mode 100644
index 0000000000000000000000000000000000000000..f921d879400f9e8cc56dfa087706afc89ab8004a
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto.h
@@ -0,0 +1,90 @@
+/***************************************************************************************
+* LatticeCrypt: an efficient post-quantum Ring-Learning With Errors cryptography library
+*
+*    Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*
+* Abstract: main header file
+*
+****************************************************************************************/
+
+#ifndef __LatticeCrypt_H__
+#define __LatticeCrypt_H__
+
+// For C++
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <oqs/rand.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+// NOTE: probably a better way to do this.
+#if (defined(__x86_64__) || defined(__x86_64) || defined(__arch64__) || defined(_M_AMD64) || defined(_M_X64) || defined(_WIN64) || !defined(__LP64__))
+#define RADIX 64
+typedef uint64_t digit_t; // Unsigned 64-bit digit
+typedef int64_t sdigit_t; // Signed 64-bit digit
+#else
+#define RADIX 32
+typedef uint32_t digit_t; // Unsigned 32-bit digit
+typedef int32_t sdigit_t; // Signed 32-bit digit
+
+#endif
+
+// Definitions of the error-handling type and error codes
+
+typedef enum {
+	CRYPTO_SUCCESS,                   // 0x00
+	CRYPTO_ERROR,                     // 0x01
+	CRYPTO_ERROR_DURING_TEST,         // 0x02
+	CRYPTO_ERROR_UNKNOWN,             // 0x03
+	CRYPTO_ERROR_NOT_IMPLEMENTED,     // 0x04
+	CRYPTO_ERROR_NO_MEMORY,           // 0x05
+	CRYPTO_ERROR_INVALID_PARAMETER,   // 0x06
+	CRYPTO_ERROR_SHARED_KEY,          // 0x07
+	CRYPTO_ERROR_TOO_MANY_ITERATIONS, // 0x08
+	CRYPTO_ERROR_END_OF_LIST
+} CRYPTO_STATUS;
+
+#define CRYPTO_STATUS_TYPE_SIZE (CRYPTO_ERROR_END_OF_LIST)
+
+// Basic key-exchange constants
+#define OQS_RLWE_MSRLN16_PKA_BYTES 1824     // Alice's public key size
+#define OQS_RLWE_MSRLN16_PKB_BYTES 2048     // Bob's public key size
+#define OQS_RLWE_MSRLN16_SHAREDKEY_BYTES 32 // Shared key size
+
+/******************** Function prototypes *******************/
+
+// Clear digits from memory. "nwords" indicates the number of digits to be zeroed.
+extern void oqs_rlwe_msrln16_clear_words(void *mem, digit_t nwords);
+
+/*********************** Key exchange API ***********************/
+
+// Alice's key generation
+// It produces a private key SecretKeyA and computes the public key PublicKeyA.
+// Outputs: the private key SecretKeyA that consists of a 32-bit signed 1024-element array (4096 bytes in total)
+//          the public key PublicKeyA that occupies 1824 bytes
+CRYPTO_STATUS oqs_rlwe_msrln16_KeyGeneration_A(int32_t *SecretKeyA, unsigned char *PublicKeyA, OQS_RAND *rand);
+
+// Bob's key generation and shared secret computation
+// It produces a private key and computes the public key PublicKeyB. In combination with Alice's public key PublicKeyA, it computes
+// the shared secret SharedSecretB.
+// Input:   Alice's public key PublicKeyA that consists of 1824 bytes
+// Outputs: the public key PublicKeyB that occupies 2048 bytes.
+//          the 256-bit shared secret SharedSecretB.
+CRYPTO_STATUS oqs_rlwe_msrln16_SecretAgreement_B(unsigned char *PublicKeyA, unsigned char *SharedSecretB, unsigned char *PublicKeyB, OQS_RAND *rand);
+
+// Alice's shared secret computation
+// It computes the shared secret SharedSecretA using Bob's public key PublicKeyB and Alice's private key SecretKeyA.
+// Inputs: Bob's public key PublicKeyB that consists of 2048 bytes
+//         the private key SecretKeyA that consists of a 32-bit signed 1024-element array (4096 bytes in total)
+// Output: the 256-bit shared secret SharedSecretA.
+CRYPTO_STATUS oqs_rlwe_msrln16_SecretAgreement_A(unsigned char *PublicKeyB, int32_t *SecretKeyA, unsigned char *SharedSecretA);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto_kex.c b/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto_kex.c
new file mode 100644
index 0000000000000000000000000000000000000000..5425366bf1602da99ae61172c19c42bf06482e22
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto_kex.c
@@ -0,0 +1,438 @@
+/****************************************************************************************
+ * LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library
+ *
+ *    Copyright (c) Microsoft Corporation. All rights reserved.
+ *
+ *
+ * Abstract: Ring-LWE key exchange
+ *           The implementation is based on the instantiation of Peikert's key exchange [1]
+ *           due to Alkim, Ducas, Poppelmann and Schwabe [2].
+ *
+ * [1] C. Peikert, "Lattice cryptography for the internet", in Post-Quantum Cryptography -
+ *     6th International Workshop (PQCrypto 2014), LNCS 8772, pp. 197-219. Springer, 2014.
+ * [2] E. Alkim, L. Ducas, T. Pöppelmann and P. Schwabe, "Post-quantum key exchange - a new
+ *     hope", IACR Cryptology ePrint Archive, Report 2015/1092, 2015.
+ *
+ ******************************************************************************************/
+
+#include "LatticeCrypto_priv.h"
+#include "oqs/rand.h"
+#include <oqs/sha3.h>
+
+extern const int32_t psi_rev_ntt1024_12289[1024];
+extern const int32_t omegainv_rev_ntt1024_12289[1024];
+extern const int32_t omegainv10N_rev_ntt1024_12289;
+extern const int32_t Ninv11_ntt1024_12289;
+
+// import external code
+#ifdef RLWE_ASM_AVX2
+#include "AMD64/consts.c"
+#include "AMD64/ntt_x64.c"
+#else
+#include "generic/ntt.c"
+#endif
+
+__inline void oqs_rlwe_msrln16_clear_words(void *mem, digit_t nwords) {
+	// Clear digits from memory. "nwords" indicates the number of digits to be zeroed.
+	// This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing.
+	unsigned int i;
+	volatile digit_t *v = mem;
+
+	for (i = 0; i < nwords; i++) {
+		v[i] = 0;
+	}
+}
+
+void oqs_rlwe_msrln16_encode_A(const uint32_t *pk, const unsigned char *seed, unsigned char *m) {
+	// Alice's message encoding
+	unsigned int i = 0, j;
+#if defined(RLWE_ASM_AVX2)
+	oqs_rlwe_msrln16_encode_asm(pk, m);
+	i = 1792;
+#else
+	for (j = 0; j < 1024; j += 4) {
+		m[i] = (unsigned char) (pk[j] & 0xFF);
+		m[i + 1] = (unsigned char) ((pk[j] >> 8) | ((pk[j + 1] & 0x03) << 6));
+		m[i + 2] = (unsigned char) ((pk[j + 1] >> 2) & 0xFF);
+		m[i + 3] = (unsigned char) ((pk[j + 1] >> 10) | ((pk[j + 2] & 0x0F) << 4));
+		m[i + 4] = (unsigned char) ((pk[j + 2] >> 4) & 0xFF);
+		m[i + 5] = (unsigned char) ((pk[j + 2] >> 12) | ((pk[j + 3] & 0x3F) << 2));
+		m[i + 6] = (unsigned char) (pk[j + 3] >> 6);
+		i += 7;
+	}
+#endif
+
+	for (j = 0; j < 32; j++) {
+		m[i + j] = seed[j];
+	}
+}
+
+void oqs_rlwe_msrln16_decode_A(const unsigned char *m, uint32_t *pk, unsigned char *seed) {
+	// Alice's message decoding
+	unsigned int i = 0, j;
+
+#if defined(RLWE_ASM_AVX2)
+	oqs_rlwe_msrln16_decode_asm(m, pk);
+	i = 1792;
+#else
+	for (j = 0; j < 1024; j += 4) {
+		pk[j] = ((uint32_t) m[i] | (((uint32_t) m[i + 1] & 0x3F) << 8));
+		pk[j + 1] = (((uint32_t) m[i + 1] >> 6) | ((uint32_t) m[i + 2] << 2) | (((uint32_t) m[i + 3] & 0x0F) << 10));
+		pk[j + 2] = (((uint32_t) m[i + 3] >> 4) | ((uint32_t) m[i + 4] << 4) | (((uint32_t) m[i + 5] & 0x03) << 12));
+		pk[j + 3] = (((uint32_t) m[i + 5] >> 2) | ((uint32_t) m[i + 6] << 6));
+		i += 7;
+	}
+#endif
+
+	for (j = 0; j < 32; j++) {
+		seed[j] = m[i + j];
+	}
+}
+
+void oqs_rlwe_msrln16_encode_B(const uint32_t *pk, const uint32_t *rvec, unsigned char *m) {
+	// Bob's message encoding
+	unsigned int i = 0, j;
+
+#if defined(RLWE_ASM_AVX2)
+	oqs_rlwe_msrln16_encode_asm(pk, m);
+#else
+	for (j = 0; j < 1024; j += 4) {
+		m[i] = (unsigned char) (pk[j] & 0xFF);
+		m[i + 1] = (unsigned char) ((pk[j] >> 8) | ((pk[j + 1] & 0x03) << 6));
+		m[i + 2] = (unsigned char) ((pk[j + 1] >> 2) & 0xFF);
+		m[i + 3] = (unsigned char) ((pk[j + 1] >> 10) | ((pk[j + 2] & 0x0F) << 4));
+		m[i + 4] = (unsigned char) ((pk[j + 2] >> 4) & 0xFF);
+		m[i + 5] = (unsigned char) ((pk[j + 2] >> 12) | ((pk[j + 3] & 0x3F) << 2));
+		m[i + 6] = (unsigned char) (pk[j + 3] >> 6);
+		i += 7;
+	}
+#endif
+
+	i = 0;
+	for (j = 0; j < 1024 / 4; j++) {
+		m[1792 + j] = (unsigned char) (rvec[i] | (rvec[i + 1] << 2) | (rvec[i + 2] << 4) | (rvec[i + 3] << 6));
+		i += 4;
+	}
+}
+
+void oqs_rlwe_msrln16_decode_B(unsigned char *m, uint32_t *pk, uint32_t *rvec) {
+	// Bob's message decoding
+	unsigned int i = 0, j;
+
+#if defined(RLWE_ASM_AVX2)
+	oqs_rlwe_msrln16_decode_asm(m, pk);
+	i = 1792;
+#else
+	for (j = 0; j < 1024; j += 4) {
+		pk[j] = ((uint32_t) m[i] | (((uint32_t) m[i + 1] & 0x3F) << 8));
+		pk[j + 1] = (((uint32_t) m[i + 1] >> 6) | ((uint32_t) m[i + 2] << 2) | (((uint32_t) m[i + 3] & 0x0F) << 10));
+		pk[j + 2] = (((uint32_t) m[i + 3] >> 4) | ((uint32_t) m[i + 4] << 4) | (((uint32_t) m[i + 5] & 0x03) << 12));
+		pk[j + 3] = (((uint32_t) m[i + 5] >> 2) | ((uint32_t) m[i + 6] << 6));
+		i += 7;
+	}
+#endif
+
+	i = 0;
+	for (j = 0; j < 1024 / 4; j++) {
+		rvec[i] = (uint32_t)(m[1792 + j] & 0x03);
+		rvec[i + 1] = (uint32_t)((m[1792 + j] >> 2) & 0x03);
+		rvec[i + 2] = (uint32_t)((m[1792 + j] >> 4) & 0x03);
+		rvec[i + 3] = (uint32_t)(m[1792 + j] >> 6);
+		i += 4;
+	}
+}
+
+static __inline uint32_t Abs(int32_t value) {
+	// Compute absolute value
+	uint32_t mask;
+
+	mask = (uint32_t)(value >> 31);
+	return ((mask ^ value) - mask);
+}
+
+CRYPTO_STATUS oqs_rlwe_msrln16_HelpRec(const uint32_t *x, uint32_t *rvec, OQS_RAND *rand) {
+	// Reconciliation helper
+	unsigned int i, j, norm;
+	unsigned char bit, random_bits[32];
+	uint32_t v0[4], v1[4];
+	// OQS integration note: call to aux API replaced with direct call to OQS_RAND
+	rand->rand_n(rand, random_bits, 32);
+
+#if defined(RLWE_ASM_AVX2)
+	oqs_rlwe_msrln16_helprec_asm(x, rvec, random_bits);
+#else
+	for (i = 0; i < 256; i++) {
+		bit = 1 & (random_bits[i >> 3] >> (i & 0x07));
+		rvec[i] = (x[i] << 1) - bit;
+		rvec[i + 256] = (x[i + 256] << 1) - bit;
+		rvec[i + 512] = (x[i + 512] << 1) - bit;
+		rvec[i + 768] = (x[i + 768] << 1) - bit;
+
+		norm = 0;
+		v0[0] = 4;
+		v0[1] = 4;
+		v0[2] = 4;
+		v0[3] = 4;
+		v1[0] = 3;
+		v1[1] = 3;
+		v1[2] = 3;
+		v1[3] = 3;
+		for (j = 0; j < 4; j++) {
+			v0[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_Q4) >> 31;
+			v0[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_3Q4) >> 31;
+			v0[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_5Q4) >> 31;
+			v0[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_7Q4) >> 31;
+			v1[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_Q2) >> 31;
+			v1[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_Q) >> 31;
+			v1[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_3Q2) >> 31;
+			norm += Abs(2 * rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_Q * v0[j]);
+		}
+
+		norm = (uint32_t)((int32_t)(norm - OQS_RLWE_MSRLN16_PARAMETER_Q) >> 31); // If norm < q then norm = 0xff...ff, else norm = 0
+		v0[0] = (norm & (v0[0] ^ v1[0])) ^ v1[0];
+		v0[1] = (norm & (v0[1] ^ v1[1])) ^ v1[1];
+		v0[2] = (norm & (v0[2] ^ v1[2])) ^ v1[2];
+		v0[3] = (norm & (v0[3] ^ v1[3])) ^ v1[3];
+		rvec[i] = (v0[0] - v0[3]) & 0x03;
+		rvec[i + 256] = (v0[1] - v0[3]) & 0x03;
+		rvec[i + 512] = (v0[2] - v0[3]) & 0x03;
+		rvec[i + 768] = ((v0[3] << 1) + (1 & ~norm)) & 0x03;
+	}
+#endif
+
+	return CRYPTO_SUCCESS;
+}
+
+static __inline uint32_t LDDecode(int32_t *t) {
+	// Low-density decoding
+	unsigned int i, norm = 0;
+	uint32_t mask1, mask2, value;
+	int32_t cneg = -8 * OQS_RLWE_MSRLN16_PARAMETER_Q;
+
+	for (i = 0; i < 4; i++) {
+		mask1 = t[i] >> 31;                                                     // If t[i] < 0 then mask2 = 0xff...ff, else mask2 = 0
+		mask2 = (4 * OQS_RLWE_MSRLN16_PARAMETER_Q - (int32_t) Abs(t[i])) >> 31; // If 4*PARAMETER_Q > Abs(t[i]) then mask2 = 0, else mask2 = 0xff...ff
+
+		value = ((mask1 & (8 * OQS_RLWE_MSRLN16_PARAMETER_Q ^ cneg)) ^ cneg);
+		norm += Abs(t[i] + (mask2 & value));
+	}
+
+	return ((8 * OQS_RLWE_MSRLN16_PARAMETER_Q - norm) >> 31) ^ 1; // If norm < PARAMETER_Q then return 1, else return 0
+}
+
+void oqs_rlwe_msrln16_Rec(const uint32_t *x, const uint32_t *rvec, unsigned char *key) {
+// Reconciliation
+
+#if defined(RLWE_ASM_AVX2)
+	oqs_rlwe_msrln16_rec_asm(x, rvec, key);
+#else
+	unsigned int i;
+	uint32_t t[4];
+
+	for (i = 0; i < 32; i++) {
+		key[i] = 0;
+	}
+	for (i = 0; i < 256; i++) {
+		t[0] = 8 * x[i] - (2 * rvec[i] + rvec[i + 768]) * OQS_RLWE_MSRLN16_PARAMETER_Q;
+		t[1] = 8 * x[i + 256] - (2 * rvec[i + 256] + rvec[i + 768]) * OQS_RLWE_MSRLN16_PARAMETER_Q;
+		t[2] = 8 * x[i + 512] - (2 * rvec[i + 512] + rvec[i + 768]) * OQS_RLWE_MSRLN16_PARAMETER_Q;
+		t[3] = 8 * x[i + 768] - (rvec[i + 768]) * OQS_RLWE_MSRLN16_PARAMETER_Q;
+
+		key[i >> 3] |= (unsigned char) LDDecode((int32_t *) t) << (i & 0x07);
+	}
+#endif
+}
+
+CRYPTO_STATUS oqs_rlwe_msrln16_get_error(int32_t *e, OQS_RAND *rand) {
+	// Error sampling
+	unsigned char stream[3 * OQS_RLWE_MSRLN16_PARAMETER_N];
+	uint32_t *pstream = (uint32_t *) &stream;
+	uint32_t acc1, acc2, temp;
+	uint8_t *pacc1 = (uint8_t *) &acc1, *pacc2 = (uint8_t *) &acc2;
+	unsigned int i, j;
+
+	// OQS integration note: call to aux API replaced with direct call to OQS_RAND
+	rand->rand_n(rand, stream, 3 * OQS_RLWE_MSRLN16_PARAMETER_N);
+
+#if defined(RLWE_ASM_AVX2)
+	oqs_rlwe_msrln16_error_sampling_asm(stream, e);
+#else
+	for (i = 0; i < OQS_RLWE_MSRLN16_PARAMETER_N / 4; i++) {
+		acc1 = 0;
+		acc2 = 0;
+		for (j = 0; j < 8; j++) {
+			acc1 += (pstream[i] >> j) & 0x01010101;
+			acc2 += (pstream[i + OQS_RLWE_MSRLN16_PARAMETER_N / 4] >> j) & 0x01010101;
+		}
+		for (j = 0; j < 4; j++) {
+			temp = pstream[i + 2 * OQS_RLWE_MSRLN16_PARAMETER_N / 4] >> j;
+			acc1 += temp & 0x01010101;
+			acc2 += (temp >> 4) & 0x01010101;
+		}
+		e[2 * i] = pacc1[0] - pacc1[1];
+		e[2 * i + 1] = pacc1[2] - pacc1[3];
+		e[2 * i + OQS_RLWE_MSRLN16_PARAMETER_N / 2] = pacc2[0] - pacc2[1];
+		e[2 * i + OQS_RLWE_MSRLN16_PARAMETER_N / 2 + 1] = pacc2[2] - pacc2[3];
+	}
+#endif
+
+	return CRYPTO_SUCCESS;
+}
+
+CRYPTO_STATUS oqs_rlwe_msrln16_generate_a(uint32_t *a, const unsigned char *seed) {
+	// Generation of parameter a
+	// OQS integration note: call to aux API replaced with direct call to shake128
+	unsigned int pos = 0, ctr = 0;
+	uint16_t val;
+	unsigned int nblocks = 16;
+	uint8_t buf[OQS_SHA3_SHAKE128_RATE * 16]; // was * nblocks, but VS doesn't like this buf init
+	uint64_t state[OQS_SHA3_STATESIZE];
+	OQS_SHA3_shake128_absorb(state, seed, OQS_RLWE_MSRLN16_SEED_BYTES);
+	OQS_SHA3_shake128_squeezeblocks((unsigned char *) buf, nblocks, state);
+
+	while (ctr < OQS_RLWE_MSRLN16_PARAMETER_N) {
+		val = (buf[pos] | ((uint16_t) buf[pos + 1] << 8)) & 0x3fff;
+		if (val < OQS_RLWE_MSRLN16_PARAMETER_Q) {
+			a[ctr++] = val;
+		}
+		pos += 2;
+		if (pos > OQS_SHA3_SHAKE128_RATE * nblocks - 2) {
+			nblocks = 1;
+			OQS_SHA3_shake128_squeezeblocks((unsigned char *) buf, nblocks, state);
+			pos = 0;
+		}
+	}
+
+	return CRYPTO_SUCCESS;
+}
+
+CRYPTO_STATUS oqs_rlwe_msrln16_KeyGeneration_A(int32_t *SecretKeyA, unsigned char *PublicKeyA, OQS_RAND *rand) {
+	// Alice's key generation
+	// It produces a private key SecretKeyA and computes the public key PublicKeyA.
+	// Outputs: the private key SecretKeyA that consists of a 32-bit signed 1024-element array (4096 bytes in total)
+	//          the public key PublicKeyA that occupies 1824 bytes
+	// pLatticeCrypto must be set up in advance using LatticeCrypto_initialize().
+	uint32_t a[OQS_RLWE_MSRLN16_PARAMETER_N];
+	int32_t e[OQS_RLWE_MSRLN16_PARAMETER_N];
+	unsigned char seed[OQS_RLWE_MSRLN16_SEED_BYTES];
+	CRYPTO_STATUS Status = CRYPTO_ERROR_UNKNOWN;
+
+	rand->rand_n(rand, seed, OQS_RLWE_MSRLN16_SEED_BYTES);
+	Status = oqs_rlwe_msrln16_generate_a(a, seed);
+	if (Status != CRYPTO_SUCCESS) {
+		goto cleanup;
+	}
+
+	Status = oqs_rlwe_msrln16_get_error(SecretKeyA, rand);
+	if (Status != CRYPTO_SUCCESS) {
+		goto cleanup;
+	}
+	Status = oqs_rlwe_msrln16_get_error(e, rand);
+	if (Status != CRYPTO_SUCCESS) {
+		goto cleanup;
+	}
+	oqs_rlwe_msrln16_NTT_CT_std2rev_12289(SecretKeyA, psi_rev_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N);
+	oqs_rlwe_msrln16_NTT_CT_std2rev_12289(e, psi_rev_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N);
+	oqs_rlwe_msrln16_smul(e, 3, OQS_RLWE_MSRLN16_PARAMETER_N);
+
+	oqs_rlwe_msrln16_pmuladd((int32_t *) a, SecretKeyA, e, (int32_t *) a, OQS_RLWE_MSRLN16_PARAMETER_N);
+	oqs_rlwe_msrln16_correction((int32_t *) a, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_N);
+	oqs_rlwe_msrln16_encode_A(a, seed, PublicKeyA);
+
+cleanup:
+	oqs_rlwe_msrln16_clear_words((void *) e, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N));
+
+	return Status;
+}
+
+CRYPTO_STATUS oqs_rlwe_msrln16_SecretAgreement_B(unsigned char *PublicKeyA, unsigned char *SharedSecretB, unsigned char *PublicKeyB, OQS_RAND *rand) {
+	// Bob's key generation and shared secret computation
+	// It produces a private key and computes the public key PublicKeyB. In combination with Alice's public key PublicKeyA, it computes
+	// the shared secret SharedSecretB.
+	// Input:   Alice's public key PublicKeyA that consists of 1824 bytes
+	// Outputs: the public key PublicKeyB that occupies 2048 bytes.
+	//          the 256-bit shared secret SharedSecretB.
+	// pLatticeCrypto must be set up in advance using LatticeCrypto_initialize().
+	uint32_t pk_A[OQS_RLWE_MSRLN16_PARAMETER_N], a[OQS_RLWE_MSRLN16_PARAMETER_N], v[OQS_RLWE_MSRLN16_PARAMETER_N], r[OQS_RLWE_MSRLN16_PARAMETER_N];
+	int32_t sk_B[OQS_RLWE_MSRLN16_PARAMETER_N], e[OQS_RLWE_MSRLN16_PARAMETER_N];
+	unsigned char seed[OQS_RLWE_MSRLN16_SEED_BYTES];
+	CRYPTO_STATUS Status = CRYPTO_ERROR_UNKNOWN;
+
+	oqs_rlwe_msrln16_decode_A(PublicKeyA, pk_A, seed);
+	Status = oqs_rlwe_msrln16_generate_a(a, seed);
+	if (Status != CRYPTO_SUCCESS) {
+		goto cleanup;
+	}
+
+	Status = oqs_rlwe_msrln16_get_error(sk_B, rand);
+	if (Status != CRYPTO_SUCCESS) {
+		goto cleanup;
+	}
+	Status = oqs_rlwe_msrln16_get_error(e, rand);
+	if (Status != CRYPTO_SUCCESS) {
+		goto cleanup;
+	}
+	oqs_rlwe_msrln16_NTT_CT_std2rev_12289(sk_B, psi_rev_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N);
+	oqs_rlwe_msrln16_NTT_CT_std2rev_12289(e, psi_rev_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N);
+	oqs_rlwe_msrln16_smul(e, 3, OQS_RLWE_MSRLN16_PARAMETER_N);
+
+	oqs_rlwe_msrln16_pmuladd((int32_t *) a, sk_B, e, (int32_t *) a, OQS_RLWE_MSRLN16_PARAMETER_N);
+	oqs_rlwe_msrln16_correction((int32_t *) a, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_N);
+
+	Status = oqs_rlwe_msrln16_get_error(e, rand);
+	if (Status != CRYPTO_SUCCESS) {
+		goto cleanup;
+	}
+	oqs_rlwe_msrln16_NTT_CT_std2rev_12289(e, psi_rev_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N);
+	oqs_rlwe_msrln16_smul(e, 81, OQS_RLWE_MSRLN16_PARAMETER_N);
+
+	oqs_rlwe_msrln16_pmuladd((int32_t *) pk_A, sk_B, e, (int32_t *) v, OQS_RLWE_MSRLN16_PARAMETER_N);
+	oqs_rlwe_msrln16_INTT_GS_rev2std_12289((int32_t *) v, omegainv_rev_ntt1024_12289, omegainv10N_rev_ntt1024_12289, Ninv11_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N);
+	oqs_rlwe_msrln16_two_reduce12289((int32_t *) v, OQS_RLWE_MSRLN16_PARAMETER_N);
+#if !defined(RLWE_ASM_AVX2)
+	oqs_rlwe_msrln16_correction((int32_t *) v, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_N);
+#endif
+
+	Status = oqs_rlwe_msrln16_HelpRec(v, r, rand);
+	if (Status != CRYPTO_SUCCESS) {
+		goto cleanup;
+	}
+	oqs_rlwe_msrln16_Rec(v, r, SharedSecretB);
+	oqs_rlwe_msrln16_encode_B(a, r, PublicKeyB);
+
+cleanup:
+	oqs_rlwe_msrln16_clear_words((void *) sk_B, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N));
+	oqs_rlwe_msrln16_clear_words((void *) e, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N));
+	oqs_rlwe_msrln16_clear_words((void *) a, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N));
+	oqs_rlwe_msrln16_clear_words((void *) v, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N));
+	oqs_rlwe_msrln16_clear_words((void *) r, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N));
+
+	return Status;
+}
+
+CRYPTO_STATUS oqs_rlwe_msrln16_SecretAgreement_A(unsigned char *PublicKeyB, int32_t *SecretKeyA, unsigned char *SharedSecretA) {
+	// Alice's shared secret computation
+	// It computes the shared secret SharedSecretA using Bob's public key PublicKeyB and Alice's private key SecretKeyA.
+	// Inputs: Bob's public key PublicKeyB that consists of 2048 bytes
+	//         the private key SecretKeyA that consists of a 32-bit signed 1024-element array (4096 bytes in total)
+	// Output: the 256-bit shared secret SharedSecretA.
+	uint32_t u[OQS_RLWE_MSRLN16_PARAMETER_N], r[OQS_RLWE_MSRLN16_PARAMETER_N];
+	CRYPTO_STATUS Status = CRYPTO_SUCCESS;
+
+	oqs_rlwe_msrln16_decode_B(PublicKeyB, u, r);
+
+	oqs_rlwe_msrln16_pmul(SecretKeyA, (int32_t *) u, (int32_t *) u, OQS_RLWE_MSRLN16_PARAMETER_N);
+	oqs_rlwe_msrln16_INTT_GS_rev2std_12289((int32_t *) u, omegainv_rev_ntt1024_12289, omegainv10N_rev_ntt1024_12289, Ninv11_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N);
+	oqs_rlwe_msrln16_two_reduce12289((int32_t *) u, OQS_RLWE_MSRLN16_PARAMETER_N);
+#if !defined(RLWE_ASM_AVX2)
+	oqs_rlwe_msrln16_correction((int32_t *) u, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_N);
+#endif
+
+	oqs_rlwe_msrln16_Rec(u, r, SharedSecretA);
+
+	// Cleanup
+	oqs_rlwe_msrln16_clear_words((void *) u, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N));
+	oqs_rlwe_msrln16_clear_words((void *) r, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N));
+
+	return Status;
+}
diff --git a/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto_priv.h b/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto_priv.h
new file mode 100644
index 0000000000000000000000000000000000000000..5e8cd1db27abc534fb2a53aa1922233c56e52047
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_msrln16/LatticeCrypto_priv.h
@@ -0,0 +1,117 @@
+/****************************************************************************************
+* LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library
+*
+*    Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*
+* Abstract: internal header file
+*
+*****************************************************************************************/
+
+#ifndef __LatticeCrypto_priv_H__
+#define __LatticeCrypto_priv_H__
+
+// For C++
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "LatticeCrypto.h"
+#include <oqs/rand.h>
+
+// Basic constants
+#define OQS_RLWE_MSRLN16_PARAMETER_N 1024
+#define OQS_RLWE_MSRLN16_PARAMETER_Q 12289
+#define OQS_RLWE_MSRLN16_SEED_BYTES 256 / 8
+#define OQS_RLWE_MSRLN16_PARAMETER_Q4 3073
+#define OQS_RLWE_MSRLN16_PARAMETER_3Q4 9217
+#define OQS_RLWE_MSRLN16_PARAMETER_5Q4 15362
+#define OQS_RLWE_MSRLN16_PARAMETER_7Q4 21506
+#define OQS_RLWE_MSRLN16_PARAMETER_Q2 6145
+#define OQS_RLWE_MSRLN16_PARAMETER_3Q2 18434
+
+// Macro definitions
+
+#define OQS_RLWE_MSRLN16_NBITS_TO_NWORDS(nbits) (((nbits) + (sizeof(digit_t) * 8) - 1) / (sizeof(digit_t) * 8)) // Conversion macro from number of bits to number of computer words
+#define OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(nbytes) (((nbytes) + sizeof(digit_t) - 1) / sizeof(digit_t))          // Conversion macro from number of bytes to number of computer words
+
+// Macro to avoid compiler warnings when detecting unreferenced parameters
+#define OQS_RLWE_MSRLN16_UNREFERENCED_PARAMETER(PAR) (PAR)
+
+/******************** Function prototypes *******************/
+/******************* Polynomial functions *******************/
+
+// Forward NTT
+void oqs_rlwe_msrln16_NTT_CT_std2rev_12289(int32_t *a, const int32_t *psi_rev, unsigned int N);
+void oqs_rlwe_msrln16_NTT_CT_std2rev_12289_asm(int32_t *a, const int32_t *psi_rev, unsigned int N);
+
+// Inverse NTT
+void oqs_rlwe_msrln16_INTT_GS_rev2std_12289(int32_t *a, const int32_t *omegainv_rev, const int32_t omegainv1N_rev, const int32_t Ninv, unsigned int N);
+void oqs_rlwe_msrln16_INTT_GS_rev2std_12289_asm(int32_t *a, const int32_t *omegainv_rev, const int32_t omegainv1N_rev, const int32_t Ninv, unsigned int N);
+
+// Reduction modulo q
+int32_t oqs_rlwe_msrln16_reduce12289(int64_t a);
+
+// Two merged reductions modulo q
+int32_t oqs_rlwe_msrln16_reduce12289_2x(int64_t a);
+
+// Two consecutive reductions modulo q
+void oqs_rlwe_msrln16_two_reduce12289(int32_t *a, unsigned int N);
+void oqs_rlwe_msrln16_two_reduce12289_asm(int32_t *a, unsigned int N);
+
+// Correction modulo q
+void oqs_rlwe_msrln16_correction(int32_t *a, int32_t p, unsigned int N);
+
+// Component-wise multiplication
+void oqs_rlwe_msrln16_pmul(int32_t *a, int32_t *b, int32_t *c, unsigned int N);
+void oqs_rlwe_msrln16_pmul_asm(int32_t *a, int32_t *b, int32_t *c, unsigned int N);
+
+// Component-wise multiplication and addition
+void oqs_rlwe_msrln16_pmuladd(int32_t *a, int32_t *b, int32_t *c, int32_t *d, unsigned int N);
+void oqs_rlwe_msrln16_pmuladd_asm(int32_t *a, int32_t *b, int32_t *c, int32_t *d, unsigned int N);
+
+// Component-wise multiplication with scalar
+void oqs_rlwe_msrln16_smul(int32_t *a, int32_t scalar, unsigned int N);
+
+/******************* Key exchange functions *******************/
+
+// Alice's message encoding
+void oqs_rlwe_msrln16_encode_A(const uint32_t *pk, const unsigned char *seed, unsigned char *m);
+
+// Alice's message decoding
+void oqs_rlwe_msrln16_decode_A(const unsigned char *m, uint32_t *pk, unsigned char *seed);
+
+// Bob's message encoding
+void oqs_rlwe_msrln16_encode_B(const uint32_t *pk, const uint32_t *rvec, unsigned char *m);
+
+// Bob's message decoding
+void oqs_rlwe_msrln16_decode_B(unsigned char *m, uint32_t *pk, uint32_t *rvec);
+
+// Partial message encoding/decoding (assembly optimized)
+void oqs_rlwe_msrln16_encode_asm(const uint32_t *pk, unsigned char *m);
+void oqs_rlwe_msrln16_decode_asm(const unsigned char *m, uint32_t *pk);
+
+// Reconciliation helper
+CRYPTO_STATUS oqs_rlwe_msrln16_HelpRec(const uint32_t *x, uint32_t *rvec, OQS_RAND *rand);
+
+// Partial reconciliation helper (assembly optimized)
+void oqs_rlwe_msrln16_helprec_asm(const uint32_t *x, uint32_t *rvec, unsigned char *random_bits);
+
+// Reconciliation
+void oqs_rlwe_msrln16_Rec(const uint32_t *x, const uint32_t *rvec, unsigned char *key);
+void oqs_rlwe_msrln16_rec_asm(const uint32_t *x, const uint32_t *rvec, unsigned char *key);
+
+// Error sampling
+CRYPTO_STATUS oqs_rlwe_msrln16_get_error(int32_t *e, OQS_RAND *rand);
+
+// Partial error sampling (assembly optimized)
+void oqs_rlwe_msrln16_error_sampling_asm(unsigned char *stream, int32_t *e);
+
+// Generation of parameter a
+CRYPTO_STATUS oqs_rlwe_msrln16_generate_a(uint32_t *a, const unsigned char *seed);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_msrln16/Makefile.am b/crypt/liboqs/kex_rlwe_msrln16/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..0149e305e02d573eabafe3ed48193226cc6ad91c
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_msrln16/Makefile.am
@@ -0,0 +1,8 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = libmsrln16.la
+
+
+libmsrln16_la_SOURCES = kex_rlwe_msrln16.c LatticeCrypto_kex.c ntt_constants.c
+libmsrln16_la_CPPFLAGS = -I../../include
+libmsrln16_la_CPPFLAGS += $(AM_CPPFLAGS) 
+
diff --git a/crypt/liboqs/kex_rlwe_msrln16/README.txt b/crypt/liboqs/kex_rlwe_msrln16/README.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ab71dbec2d9129b7a0b52c5644edbac74f80453
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_msrln16/README.txt
@@ -0,0 +1,42 @@
+                                 LatticeCrypto v1.0 (C Edition)
+                                 ==============================
+
+LatticeCrypto is a post-quantum secure cryptography library based on the Ring-Learning with Errors (R-LWE) 
+problem. The version 1.0 of the library implements the instantiation of Peikert's key exchange [1] due to 
+Alkim, Ducas, Pöppelmann and Schwabe [2], and incorporates novel techniques to provide higher performance.
+
+The library [3] was developed by Microsoft Research for experimentation purposes. 
+
+*** THE ORIGINAL README HAS BEEN TRIMMED LEAVING ONLY THE INFO RELEVANT FOR THE OQS INTEGRATION ***
+
+1. CONTENTS:
+   --------
+
+/                                              - Library C and header files                                     
+AMD64/                                         - Optimized implementation of the NTT for x64 platforms
+generic/                                       - Implementation of the NTT in portable C
+README.txt                                     - This readme file
+
+
+2. MAIN FEATURES:
+   -------------
+   
+- Support arithmetic functions for computations in power-of-2 cyclotomic rings that are the basis for 
+  implementing Ring-LWE-based cryptographic algorithms.
+- Support key exchange providing at least 128 bits of quantum and classical security.
+- All functions evaluating secret data have regular, constant-time execution, which provides protection 
+  against timing and cache attacks.
+- Basic implementation of the underlying arithmetic functions using portable C to enable support on
+  a wide range of platforms including x64, x86 and ARM.  
+- Optional high-performance implementation of the underlying arithmetic functions for x64 platforms on
+  Linux using assembly and AVX2 vector instructions.
+
+
+REFERENCES
+----------
+
+[1] C. Peikert, "Lattice cryptography for the internet", in Post-Quantum Cryptography - 6th International 
+    Workshop (PQCrypto 2014), LNCS 8772, pp. 197-219. Springer, 2014.
+[2] E. Alkim, L. Ducas, T. Pöppelmann and P. Schwabe, "Post-quantum key exchange - a new hope", IACR Cryp-
+    tology ePrint Archive, Report 2015/1092, 2015.
+[3] https://www.microsoft.com/en-us/research/project/lattice-cryptography-library/
diff --git a/crypt/liboqs/kex_rlwe_msrln16/generic/ntt.c b/crypt/liboqs/kex_rlwe_msrln16/generic/ntt.c
new file mode 100644
index 0000000000000000000000000000000000000000..bd7c3f4b63899a5ebd3a880689f2af7238e69cff
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_msrln16/generic/ntt.c
@@ -0,0 +1,164 @@
+/****************************************************************************************
+* LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library
+*
+*    Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*
+* Abstract: NTT functions and other polynomial operations
+*
+*****************************************************************************************/
+
+#include "../LatticeCrypto_priv.h"
+
+const uint32_t mask12 = ((uint64_t) 1 << 12) - 1;
+
+int32_t oqs_rlwe_msrln16_reduce12289(int64_t a) { // Reduction modulo q
+	int32_t c0, c1;
+
+	c0 = (int32_t)(a & mask12);
+	c1 = (int32_t)(a >> 12);
+
+	return (3 * c0 - c1);
+}
+
+int32_t oqs_rlwe_msrln16_reduce12289_2x(int64_t a) { // Two merged reductions modulo q
+	int32_t c0, c1, c2;
+
+	c0 = (int32_t)(a & mask12);
+	c1 = (int32_t)((a >> 12) & mask12);
+	c2 = (int32_t)(a >> 24);
+
+	return (9 * c0 - 3 * c1 + c2);
+}
+
+void oqs_rlwe_msrln16_NTT_CT_std2rev_12289(int32_t *a, const int32_t *psi_rev, unsigned int N) { // Forward NTT
+	unsigned int m, i, j, j1, j2, k = N;
+	int32_t S, U, V;
+
+	for (m = 1; m < 128; m = 2 * m) {
+		k = k >> 1;
+		for (i = 0; i < m; i++) {
+			j1 = 2 * i * k;
+			j2 = j1 + k - 1;
+			S = psi_rev[m + i];
+			for (j = j1; j <= j2; j++) {
+				U = a[j];
+				V = oqs_rlwe_msrln16_reduce12289((int64_t) a[j + k] * S);
+				a[j] = U + V;
+				a[j + k] = U - V;
+			}
+		}
+	}
+
+	k = 4;
+	for (i = 0; i < 128; i++) {
+		j1 = 8 * i;
+		j2 = j1 + 3;
+		S = psi_rev[i + 128];
+		for (j = j1; j <= j2; j++) {
+			U = oqs_rlwe_msrln16_reduce12289((int64_t) a[j]);
+			V = oqs_rlwe_msrln16_reduce12289_2x((int64_t) a[j + 4] * S);
+			a[j] = U + V;
+			a[j + 4] = U - V;
+		}
+	}
+
+	for (m = 256; m < N; m = 2 * m) {
+		k = k >> 1;
+		for (i = 0; i < m; i++) {
+			j1 = 2 * i * k;
+			j2 = j1 + k - 1;
+			S = psi_rev[m + i];
+			for (j = j1; j <= j2; j++) {
+				U = a[j];
+				V = oqs_rlwe_msrln16_reduce12289((int64_t) a[j + k] * S);
+				a[j] = U + V;
+				a[j + k] = U - V;
+			}
+		}
+	}
+	return;
+}
+
+void oqs_rlwe_msrln16_INTT_GS_rev2std_12289(int32_t *a, const int32_t *omegainv_rev, const int32_t omegainv1N_rev, const int32_t Ninv, unsigned int N) { // Inverse NTT
+	unsigned int m, h, i, j, j1, j2, k = 1;
+	int32_t S, U, V;
+	int64_t temp;
+
+	for (m = N; m > 2; m >>= 1) {
+		j1 = 0;
+		h = m >> 1;
+		for (i = 0; i < h; i++) {
+			j2 = j1 + k - 1;
+			S = omegainv_rev[h + i];
+			for (j = j1; j <= j2; j++) {
+				U = a[j];
+				V = a[j + k];
+				a[j] = U + V;
+				temp = (int64_t)(U - V) * S;
+				if (m == 32) {
+					a[j] = oqs_rlwe_msrln16_reduce12289((int64_t) a[j]);
+					a[j + k] = oqs_rlwe_msrln16_reduce12289_2x(temp);
+				} else {
+					a[j + k] = oqs_rlwe_msrln16_reduce12289(temp);
+				}
+			}
+			j1 = j1 + 2 * k;
+		}
+		k = 2 * k;
+	}
+	for (j = 0; j < k; j++) {
+		U = a[j];
+		V = a[j + k];
+		a[j] = oqs_rlwe_msrln16_reduce12289((int64_t)(U + V) * Ninv);
+		a[j + k] = oqs_rlwe_msrln16_reduce12289((int64_t)(U - V) * omegainv1N_rev);
+	}
+	return;
+}
+
+void oqs_rlwe_msrln16_two_reduce12289(int32_t *a, unsigned int N) { // Two consecutive reductions modulo q
+	unsigned int i;
+
+	for (i = 0; i < N; i++) {
+		a[i] = oqs_rlwe_msrln16_reduce12289((int64_t) a[i]);
+		a[i] = oqs_rlwe_msrln16_reduce12289((int64_t) a[i]);
+	}
+}
+
+void oqs_rlwe_msrln16_pmul(int32_t *a, int32_t *b, int32_t *c, unsigned int N) { // Component-wise multiplication
+	unsigned int i;
+
+	for (i = 0; i < N; i++) {
+		c[i] = oqs_rlwe_msrln16_reduce12289((int64_t) a[i] * b[i]);
+		c[i] = oqs_rlwe_msrln16_reduce12289((int64_t) c[i]);
+	}
+}
+
+void oqs_rlwe_msrln16_pmuladd(int32_t *a, int32_t *b, int32_t *c, int32_t *d, unsigned int N) { // Component-wise multiplication and addition
+	unsigned int i;
+
+	for (i = 0; i < N; i++) {
+		d[i] = oqs_rlwe_msrln16_reduce12289((int64_t) a[i] * b[i] + c[i]);
+		d[i] = oqs_rlwe_msrln16_reduce12289((int64_t) d[i]);
+	}
+}
+
+void oqs_rlwe_msrln16_smul(int32_t *a, int32_t scalar, unsigned int N) { // Component-wise multiplication with scalar
+	unsigned int i;
+
+	for (i = 0; i < N; i++) {
+		a[i] = a[i] * scalar;
+	}
+}
+
+void oqs_rlwe_msrln16_correction(int32_t *a, int32_t p, unsigned int N) { // Correction modulo q
+	unsigned int i;
+	int32_t mask;
+
+	for (i = 0; i < N; i++) {
+		mask = a[i] >> (4 * sizeof(int32_t) - 1);
+		a[i] += (p & mask) - p;
+		mask = a[i] >> (4 * sizeof(int32_t) - 1);
+		a[i] += (p & mask);
+	}
+}
diff --git a/crypt/liboqs/kex_rlwe_msrln16/kex_rlwe_msrln16.c b/crypt/liboqs/kex_rlwe_msrln16/kex_rlwe_msrln16.c
new file mode 100644
index 0000000000000000000000000000000000000000..8bcca49055a0ac47d8bfaa73d046f4ed8d494221
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_msrln16/kex_rlwe_msrln16.c
@@ -0,0 +1,174 @@
+#if defined(WINDOWS)
+#define UNUSED
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#if !defined(WINDOWS)
+#include <strings.h>
+#include <unistd.h>
+#endif
+
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+#include "LatticeCrypto.h"
+#include "LatticeCrypto_priv.h"
+#include "kex_rlwe_msrln16.h"
+
+#if defined(WINDOWS)
+#define strdup _strdup // for strdup deprecation warning
+#endif
+
+OQS_KEX *OQS_KEX_rlwe_msrln16_new(OQS_RAND *rand) {
+
+	OQS_KEX *k = malloc(sizeof(OQS_KEX));
+	if (k == NULL) {
+		return NULL;
+	}
+
+	k->ctx = NULL;
+	k->method_name = strdup("RLWE MSR LN16");
+	k->estimated_classical_security = 128;
+	k->estimated_quantum_security = 128;
+	k->seed = NULL;
+	k->seed_len = 0;
+	k->named_parameters = NULL;
+	k->rand = rand;
+	k->params = NULL;
+	k->alice_0 = &OQS_KEX_rlwe_msrln16_alice_0;
+	k->bob = &OQS_KEX_rlwe_msrln16_bob;
+	k->alice_1 = &OQS_KEX_rlwe_msrln16_alice_1;
+	k->alice_priv_free = &OQS_KEX_rlwe_msrln16_alice_priv_free;
+	k->free = &OQS_KEX_rlwe_msrln16_free;
+
+	return k;
+}
+
+int OQS_KEX_rlwe_msrln16_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) {
+
+	int ret;
+
+	*alice_priv = NULL;
+	/* alice_msg is alice's public key */
+	*alice_msg = NULL;
+
+	*alice_msg = malloc(OQS_RLWE_MSRLN16_PKA_BYTES);
+	if (*alice_msg == NULL) {
+		goto err;
+	}
+	*alice_priv = malloc(1024 * sizeof(uint32_t));
+	if (*alice_priv == NULL) {
+		goto err;
+	}
+
+	if (oqs_rlwe_msrln16_KeyGeneration_A((int32_t *) *alice_priv, (unsigned char *) *alice_msg, k->rand) != CRYPTO_SUCCESS) {
+		goto err;
+	}
+	*alice_msg_len = OQS_RLWE_MSRLN16_PKA_BYTES;
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*alice_msg);
+	*alice_msg = NULL;
+	free(*alice_priv);
+	*alice_priv = NULL;
+
+cleanup:
+	return ret;
+}
+
+int OQS_KEX_rlwe_msrln16_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) {
+
+	int ret;
+
+	*bob_msg = NULL;
+	*key = NULL;
+
+	if (alice_msg_len != OQS_RLWE_MSRLN16_PKA_BYTES) {
+		goto err;
+	}
+	*bob_msg = malloc(OQS_RLWE_MSRLN16_PKB_BYTES);
+	if (*bob_msg == NULL) {
+		goto err;
+	}
+	*key = malloc(OQS_RLWE_MSRLN16_SHAREDKEY_BYTES);
+	if (*key == NULL) {
+		goto err;
+	}
+
+	if (oqs_rlwe_msrln16_SecretAgreement_B((unsigned char *) alice_msg, (unsigned char *) *key, (unsigned char *) *bob_msg, k->rand) != CRYPTO_SUCCESS) {
+		goto err;
+	}
+
+	*key_len = OQS_RLWE_MSRLN16_SHAREDKEY_BYTES;
+	*bob_msg_len = OQS_RLWE_MSRLN16_PKB_BYTES;
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*bob_msg);
+	*bob_msg = NULL;
+	free(*key);
+	*key = NULL;
+
+cleanup:
+
+	return ret;
+}
+
+int OQS_KEX_rlwe_msrln16_alice_1(UNUSED OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) {
+
+	int ret;
+
+	*key = NULL;
+
+	if (bob_msg_len != OQS_RLWE_MSRLN16_PKB_BYTES) {
+		goto err;
+	}
+
+	*key = malloc(OQS_RLWE_MSRLN16_SHAREDKEY_BYTES);
+	if (*key == NULL) {
+		goto err;
+	}
+
+	if (oqs_rlwe_msrln16_SecretAgreement_A((unsigned char *) bob_msg, (int32_t *) alice_priv, (unsigned char *) *key) != CRYPTO_SUCCESS) {
+		goto err;
+	}
+
+	*key_len = OQS_RLWE_MSRLN16_SHAREDKEY_BYTES;
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*key);
+	*key = NULL;
+
+cleanup:
+
+	return ret;
+}
+
+void OQS_KEX_rlwe_msrln16_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) {
+	if (alice_priv) {
+		free(alice_priv);
+	}
+}
+
+void OQS_KEX_rlwe_msrln16_free(OQS_KEX *k) {
+	if (!k) {
+		return;
+	}
+	free(k->method_name);
+	k->method_name = NULL;
+	free(k);
+}
diff --git a/crypt/liboqs/kex_rlwe_msrln16/kex_rlwe_msrln16.h b/crypt/liboqs/kex_rlwe_msrln16/kex_rlwe_msrln16.h
new file mode 100644
index 0000000000000000000000000000000000000000..ad1ee4f5202998afed40e13de1783d237e7767ab
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_msrln16/kex_rlwe_msrln16.h
@@ -0,0 +1,24 @@
+/**
+ * \file kex_rlwe_msrln16.h
+ * \brief Header for ring-LWE key exchange protocol from the Microsoft LatticeCrypto library
+ */
+
+#ifndef __OQS_KEX_RLWE_MSRLN16_H
+#define __OQS_KEX_RLWE_MSRLN16_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+OQS_KEX *OQS_KEX_rlwe_msrln16_new(OQS_RAND *rand);
+
+int OQS_KEX_rlwe_msrln16_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len);
+int OQS_KEX_rlwe_msrln16_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len);
+int OQS_KEX_rlwe_msrln16_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len);
+
+void OQS_KEX_rlwe_msrln16_alice_priv_free(OQS_KEX *k, void *alice_priv);
+void OQS_KEX_rlwe_msrln16_free(OQS_KEX *k);
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_msrln16/ntt_constants.c b/crypt/liboqs/kex_rlwe_msrln16/ntt_constants.c
new file mode 100644
index 0000000000000000000000000000000000000000..828324ac67e9b5bb55bc4dc0a425674924a41c2a
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_msrln16/ntt_constants.c
@@ -0,0 +1,136 @@
+/****************************************************************************************
+* LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library
+*
+*    Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*
+* Abstract: fixed constants for the Number Theoretic Transform (NTT)
+*
+*****************************************************************************************/
+
+#include "LatticeCrypto_priv.h"
+
+// N^-1 * prime_scale^-8
+const int32_t Ninv8_ntt1024_12289 = 8350;
+// N^-1 * prime_scale^-7 * omegainv_rev_ntt1024_12289[1]
+const int32_t omegainv7N_rev_ntt1024_12289 = 795;
+// N^-1 * prime_scale^-11
+const int32_t Ninv11_ntt1024_12289 = 2585;
+// N^-1 * prime_scale^-10 * omegainv_rev_ntt1024_12289[1]
+const int32_t omegainv10N_rev_ntt1024_12289 = 10953;
+
+// Index-reversed matrices containing powers of psi (psi_rev_nttxxx_yyy) and inverse powers of omega (omegainv_rev_nttxxx_yyy),
+// where xxx is parameter N and yyy is the prime q.
+
+const int32_t psi_rev_ntt1024_12289[1024] = {
+    8193, 493, 6845, 9908, 1378, 10377, 7952, 435, 10146, 1065, 404, 7644, 1207, 3248, 11121, 5277, 2437, 3646, 2987, 6022, 9867, 6250, 10102, 9723, 1002, 7278, 4284, 7201,
+    875, 3780, 1607, 4976, 8146, 4714, 242, 1537, 3704, 9611, 5019, 545, 5084, 10657, 4885, 11272, 3066, 12262, 3763, 10849, 2912, 5698, 11935, 4861, 7277, 9808, 11244, 2859,
+    7188, 1067, 2401, 11847, 390, 11516, 8511, 3833, 2780, 7094, 4895, 1484, 2305, 5042, 8236, 2645, 7875, 9442, 2174, 7917, 1689, 3364, 4057, 3271, 10863, 4654, 1777, 10626,
+    3636, 7351, 9585, 6998, 160, 3149, 4437, 12286, 10123, 3915, 7370, 12176, 4048, 2249, 2884, 1153, 9103, 6882, 2126, 10659, 3510, 5332, 2865, 9919, 9320, 8311, 9603, 9042,
+    3016, 12046, 9289, 11618, 7098, 3136, 9890, 3400, 2178, 1544, 5559, 420, 8304, 4905, 476, 3531, 9326, 4896, 9923, 3051, 3091, 81, 1000, 4320, 1177, 8034, 9521, 10654, 11563,
+    7678, 10436, 12149, 3014, 9088, 5086, 1326, 11119, 2319, 11334, 790, 2747, 7443, 3135, 3712, 1062, 9995, 7484, 8736, 9283, 2744, 11726, 2975, 9664, 949, 7468, 9650, 7266,
+    5828, 6561, 7698, 3328, 6512, 1351, 7311, 8155, 5736, 722, 10984, 4043, 7143, 10810, 1, 8668, 2545, 3504, 8747, 11077, 1646, 9094, 5860, 1759, 8582, 3694, 7110, 8907, 11934,
+    8058, 9741, 9558, 3932, 5911, 4890, 3637, 8830, 5542, 12144, 5755, 7657, 7901, 11029, 11955, 9863, 10861, 1696, 3284, 2881, 7197, 2089, 9000, 2013, 729, 9048, 11809, 2842,
+    11267, 9, 6498, 544, 2468, 339, 1381, 2525, 8112, 3584, 6958, 4989, 10616, 8011, 5374, 9452, 12159, 4354, 9893, 7837, 3296, 8340, 7222, 2197, 118, 2476, 5767, 827, 8541,
+    11336, 3434, 3529, 2908, 12071, 2361, 1843, 3030, 8174, 6147, 9842, 8326, 576, 10335, 10238, 10484, 9407, 11836, 5908, 418, 3772, 7515, 5429, 7552, 10996, 12133, 2767, 3969,
+    8298, 6413, 10008, 2031, 5333, 10800, 9789, 10706, 5942, 1263, 49, 5915, 10806, 11939, 10777, 1815, 5383, 3202, 4493, 6920, 10232, 1975, 8532, 2925, 347, 4754, 1858, 11863,
+    8974, 9551, 5868, 9634, 5735, 11566, 12115, 10596, 3009, 6190, 11994, 6523, 652, 3762, 9370, 4016, 4077, 8561, 4049, 5990, 11130, 11143, 948, 325, 1404, 6992, 6119, 8333,
+    10929, 1200, 5184, 2555, 6122, 1594, 10327, 7183, 5961, 2692, 12121, 4298, 3329, 5919, 4433, 8455, 7032, 1747, 3123, 3054, 6803, 5782, 10723, 9341, 2503, 683, 2459, 3656,
+    64, 4240, 3570, 835, 6065, 4046, 11580, 10970, 3150, 10331, 4322, 2078, 1112, 4079, 11231, 441, 922, 1050, 4536, 6844, 8429, 2683, 11099, 3818, 6171, 8500, 12142, 6833, 4449,
+    4749, 6752, 7500, 7822, 8214, 6974, 7965, 7373, 2169, 522, 5079, 3262, 10316, 6715, 1278, 9945, 3514, 11248, 11271, 5925, 468, 3988, 382, 11973, 5339, 6843, 6196, 8579, 2033,
+    8291, 1922, 3879, 11035, 973, 6854, 10930, 5206, 6760, 3199, 56, 3565, 654, 1702, 10302, 5862, 6153, 5415, 8646, 11889, 10561, 7341, 6152, 7232, 4698, 8844, 4780, 10240, 4912,
+    1321, 12097, 7048, 2920, 3127, 4169, 11502, 3482, 11279, 5468, 5874, 11612, 6055, 8953, 52, 3174, 10966, 9523, 151, 2127, 3957, 2839, 9784, 6383, 1579, 431, 7507, 5886, 3029,
+    6695, 4213, 504, 11684, 2302, 8689, 9026, 4624, 6212, 11868, 4080, 6221, 8687, 1003, 8757, 241, 58, 5009, 10333, 885, 6281, 3438, 9445, 11314, 8077, 6608, 3477, 142, 1105,
+    8841, 343, 4538, 1908, 1208, 4727, 7078, 10423, 10125, 6873, 11573, 10179, 416, 814, 1705, 2450, 8700, 717, 9307, 1373, 8186, 2429, 10568, 10753, 7228, 11071, 438, 8774, 5993,
+    3278, 4209, 6877, 3449, 1136, 3708, 3238, 2926, 1826, 4489, 3171, 8024, 8611, 1928, 464, 3205, 8930, 7080, 1092, 10900, 10221, 11943, 4404, 9126, 4032, 7449, 6127, 8067, 10763,
+    125, 540, 8921, 8062, 612, 8051, 12229, 9572, 9089, 10754, 10029, 68, 6453, 7723, 4781, 4924, 1014, 448, 3942, 5232, 1327, 8682, 3744, 7326, 3056, 9761, 5845, 5588, 412, 7187,
+    3975, 4883, 3087, 6454, 2257, 7784, 5676, 1417, 8400, 11710, 5596, 5987, 9175, 2769, 5966, 212, 6555, 11113, 5508, 11014, 1125, 4860, 10844, 1131, 4267, 6636, 2275, 9828, 5063,
+    4176, 3765, 1518, 8794, 4564, 10224, 5826, 3534, 3961, 4145, 10533, 506, 11034, 6505, 10897, 2674, 10077, 3338, 9013, 3511, 6811, 11111, 2776, 1165, 2575, 8881, 10347, 377,
+    4578, 11914, 10669, 10104, 392, 10453, 425, 9489, 193, 2231, 6197, 1038, 11366, 6204, 8122, 2894, 3654, 10975, 10545, 6599, 2455, 11951, 3947, 20, 5002, 5163, 4608, 8946, 8170,
+    10138, 1522, 8665, 10397, 3344, 5598, 10964, 6565, 11260, 1945, 11041, 9847, 7174, 4939, 2148, 6330, 3959, 5797, 4913, 3528, 8054, 3825, 8914, 9998, 4335, 8896, 9342, 3982,
+    6680, 11653, 7790, 6617, 1737, 622, 10485, 10886, 6195, 7100, 1687, 406, 12143, 5268, 9389, 12050, 994, 7735, 5464, 7383, 4670, 512, 364, 9929, 3028, 5216, 5518, 1226, 7550,
+    8038, 7043, 7814, 11053, 3017, 3121, 7584, 2600, 11232, 6780, 12085, 5219, 1409, 9600, 4605, 8151, 12109, 463, 8882, 8308, 10821, 9247, 10945, 9806, 2054, 6203, 6643, 3120,
+    6105, 8348, 8536, 6919, 8753, 11007, 8717, 9457, 2021, 9060, 4730, 3929, 10583, 3723, 845, 1936, 7, 5054, 3154, 3285, 4360, 3805, 11522, 2213, 4153, 12239, 12073, 5526, 769,
+    4099, 3944, 5604, 5530, 11024, 9282, 2171, 3480, 7434, 8520, 3232, 11996, 9656, 1406, 2945, 5349, 7207, 4590, 11607, 11309, 5202, 844, 7082, 4050, 8016, 9068, 9694, 8452, 7000,
+    5662, 567, 2941, 8619, 3808, 4987, 2373, 5135, 63, 7605, 3360, 11839, 10345, 578, 6921, 7628, 510, 5386, 2622, 7806, 5703, 10783, 9224, 11379, 5900, 4719, 11538, 3502, 5789,
+    10631, 5618, 826, 5043, 3090, 10891, 9951, 7596, 2293, 11872, 6151, 3469, 4443, 8871, 1555, 1802, 5103, 1891, 1223, 2334, 7878, 1590, 881, 365, 1927, 11274, 4510, 9652, 2946,
+    6828, 1280, 614, 10918, 12265, 7250, 6742, 9804, 11385, 2276, 11307, 2593, 879, 7899, 8071, 3454, 8531, 3795, 9021, 5776, 1849, 7766, 7988, 457, 8, 530, 9663, 7785, 11511, 3578,
+    7592, 10588, 3466, 8972, 9757, 3332, 139, 2046, 2940, 10808, 9332, 874, 2301, 5650, 12119, 150, 648, 8000, 9982, 9416, 2827, 2434, 11498, 6481, 12268, 9754, 11169, 11823, 11259,
+    3821, 10608, 2929, 6263, 4649, 6320, 9687, 10388, 502, 5118, 8496, 6226, 10716, 8443, 7624, 6883, 9269, 6616, 8620, 5287, 944, 7519, 6125, 1882, 11249, 10254, 5410, 1251, 1790,
+    5275, 8449, 10447, 4113, 72, 2828, 4352, 7455, 2712, 11048, 7911, 3451, 4094, 6508, 3045, 11194, 2643, 1783, 7211, 4974, 7724, 9811, 9449, 3019, 4194, 2730, 6878, 10421, 2253,
+    4518, 9195, 7469, 11129, 9173, 12100, 1763, 2209, 9617, 5170, 865, 1279, 1694, 10759, 8420, 4423, 10555, 3815, 5832, 10939};
+
+const int32_t omegainv_rev_ntt1024_12289[1024] = {
+    8193, 11796, 2381, 5444, 11854, 4337, 1912, 10911, 7012, 1168, 9041, 11082, 4645, 11885, 11224, 2143, 7313, 10682, 8509, 11414, 5088, 8005, 5011, 11287, 2566, 2187, 6039, 2422,
+    6267, 9302, 8643, 9852, 8456, 3778, 773, 11899, 442, 9888, 11222, 5101, 9430, 1045, 2481, 5012, 7428, 354, 6591, 9377, 1440, 8526, 27, 9223, 1017, 7404, 1632, 7205, 11744, 7270,
+    2678, 8585, 10752, 12047, 7575, 4143, 8758, 11813, 7384, 3985, 11869, 6730, 10745, 10111, 8889, 2399, 9153, 5191, 671, 3000, 243, 9273, 3247, 2686, 3978, 2969, 2370, 9424, 6957,
+    8779, 1630, 10163, 5407, 3186, 11136, 9405, 10040, 8241, 113, 4919, 8374, 2166, 3, 7852, 9140, 12129, 5291, 2704, 4938, 8653, 1663, 10512, 7635, 1426, 9018, 8232, 8925, 10600, 4372,
+    10115, 2847, 4414, 9644, 4053, 7247, 9984, 10805, 7394, 5195, 9509, 953, 3748, 11462, 6522, 9813, 12171, 10092, 5067, 3949, 8993, 4452, 2396, 7935, 130, 2837, 6915, 4278, 1673, 7300,
+    5331, 8705, 4177, 9764, 10908, 11950, 9821, 11745, 5791, 12280, 1022, 9447, 480, 3241, 11560, 10276, 3289, 10200, 5092, 9408, 9005, 10593, 1428, 2426, 334, 1260, 4388, 4632, 6534,
+    145, 6747, 3459, 8652, 7399, 6378, 8357, 2731, 2548, 4231, 355, 3382, 5179, 8595, 3707, 10530, 6429, 3195, 10643, 1212, 3542, 8785, 9744, 3621, 12288, 1479, 5146, 8246, 1305, 11567,
+    6553, 4134, 4978, 10938, 5777, 8961, 4591, 5728, 6461, 5023, 2639, 4821, 11340, 2625, 9314, 563, 9545, 3006, 3553, 4805, 2294, 11227, 8577, 9154, 4846, 9542, 11499, 955, 9970, 1170,
+    10963, 7203, 3201, 9275, 140, 1853, 4611, 726, 1635, 2768, 4255, 11112, 7969, 11289, 12208, 9198, 9238, 2366, 7393, 2963, 11184, 12147, 8812, 5681, 4212, 975, 2844, 8851, 6008, 11404,
+    1956, 7280, 12231, 12048, 3532, 11286, 3602, 6068, 8209, 421, 6077, 7665, 3263, 3600, 9987, 605, 11785, 8076, 5594, 9260, 6403, 4782, 11858, 10710, 5906, 2505, 9450, 8332, 10162,
+    12138, 2766, 1323, 9115, 12237, 3336, 6234, 677, 6415, 6821, 1010, 8807, 787, 8120, 9162, 9369, 5241, 192, 10968, 7377, 2049, 7509, 3445, 7591, 5057, 6137, 4948, 1728, 400, 3643,
+    6874, 6136, 6427, 1987, 10587, 11635, 8724, 12233, 9090, 5529, 7083, 1359, 5435, 11316, 1254, 8410, 10367, 3998, 10256, 3710, 6093, 5446, 6950, 316, 11907, 8301, 11821, 6364, 1018,
+    1041, 8775, 2344, 11011, 5574, 1973, 9027, 7210, 11767, 10120, 4916, 4324, 5315, 4075, 4467, 4789, 5537, 7540, 7840, 5456, 147, 3789, 6118, 8471, 1190, 9606, 3860, 5445, 7753, 11239,
+    11367, 11848, 1058, 8210, 11177, 10211, 7967, 1958, 9139, 1319, 709, 8243, 6224, 11454, 8719, 8049, 12225, 8633, 9830, 11606, 9786, 2948, 1566, 6507, 5486, 9235, 9166, 10542, 5257,
+    3834, 7856, 6370, 8960, 7991, 168, 9597, 6328, 5106, 1962, 10695, 6167, 9734, 7105, 11089, 1360, 3956, 6170, 5297, 10885, 11964, 11341, 1146, 1159, 6299, 8240, 3728, 8212, 8273, 2919,
+    8527, 11637, 5766, 295, 6099, 9280, 1693, 174, 723, 6554, 2655, 6421, 2738, 3315, 426, 10431, 7535, 11942, 9364, 3757, 10314, 2057, 5369, 7796, 9087, 6906, 10474, 1512, 350, 1483,
+    6374, 12240, 11026, 6347, 1583, 2500, 1489, 6956, 10258, 2281, 5876, 3991, 8320, 9522, 156, 1293, 4737, 6860, 4774, 8517, 11871, 6381, 453, 2882, 1805, 2051, 1954, 11713, 3963, 2447,
+    6142, 4115, 9259, 10446, 9928, 218, 9381, 8760, 8855, 1350, 6457, 8474, 1734, 7866, 3869, 1530, 10595, 11010, 11424, 7119, 2672, 10080, 10526, 189, 3116, 1160, 4820, 3094, 7771, 10036,
+    1868, 5411, 9559, 8095, 9270, 2840, 2478, 4565, 7315, 5078, 10506, 9646, 1095, 9244, 5781, 8195, 8838, 4378, 1241, 9577, 4834, 7937, 9461, 12217, 8176, 1842, 3840, 7014, 10499, 11038,
+    6879, 2035, 1040, 10407, 6164, 4770, 11345, 7002, 3669, 5673, 3020, 5406, 4665, 3846, 1573, 6063, 3793, 7171, 11787, 1901, 2602, 5969, 7640, 6026, 9360, 1681, 8468, 1030, 466, 1120,
+    2535, 21, 5808, 791, 9855, 9462, 2873, 2307, 4289, 11641, 12139, 170, 6639, 9988, 11415, 2957, 1481, 9349, 10243, 12150, 8957, 2532, 3317, 8823, 1701, 4697, 8711, 778, 4504, 2626,
+    11759, 12281, 11832, 4301, 4523, 10440, 6513, 3268, 8494, 3758, 8835, 4218, 4390, 11410, 9696, 982, 10013, 904, 2485, 5547, 5039, 24, 1371, 11675, 11009, 5461, 9343, 2637, 7779, 1015,
+    10362, 11924, 11408, 10699, 4411, 9955, 11066, 10398, 7186, 10487, 10734, 3418, 7846, 8820, 6138, 417, 9996, 4693, 2338, 1398, 9199, 7246, 11463, 6671, 1658, 6500, 8787, 751, 7570,
+    6389, 910, 3065, 1506, 6586, 4483, 9667, 6903, 11779, 4661, 5368, 11711, 1944, 450, 8929, 4684, 12226, 7154, 9916, 7302, 8481, 3670, 9348, 11722, 6627, 5289, 3837, 2595, 3221, 4273,
+    8239, 5207, 11445, 7087, 980, 682, 7699, 5082, 6940, 9344, 10883, 2633, 293, 9057, 3769, 4855, 8809, 10118, 3007, 1265, 6759, 6685, 8345, 8190, 11520, 6763, 216, 50, 8136, 10076, 767,
+    8484, 7929, 9004, 9135, 7235, 12282, 10353, 11444, 8566, 1706, 8360, 7559, 3229, 10268, 2832, 3572, 1282, 3536, 5370, 3753, 3941, 6184, 9169, 5646, 6086, 10235, 2483, 1344, 3042, 1468,
+    3981, 3407, 11826, 180, 4138, 7684, 2689, 10880, 7070, 204, 5509, 1057, 9689, 4705, 9168, 9272, 1236, 4475, 5246, 4251, 4739, 11063, 6771, 7073, 9261, 2360, 11925, 11777, 7619, 4906,
+    6825, 4554, 11295, 239, 2900, 7021, 146, 11883, 10602, 5189, 6094, 1403, 1804, 11667, 10552, 5672, 4499, 636, 5609, 8307, 2947, 3393, 7954, 2291, 3375, 8464, 4235, 8761, 7376, 6492,
+    8330, 5959, 10141, 7350, 5115, 2442, 1248, 10344, 1029, 5724, 1325, 6691, 8945, 1892, 3624, 10767, 2151, 4119, 3343, 7681, 7126, 7287, 12269, 8342, 338, 9834, 5690, 1744, 1314, 8635,
+    9395, 4167, 6085, 923, 11251, 6092, 10058, 12096, 2800, 11864, 1836, 11897, 2185, 1620, 375, 7711, 11912, 1942, 3408, 9714, 11124, 9513, 1178, 5478, 8778, 3276, 8951, 2212, 9615, 1392,
+    5784, 1255, 11783, 1756, 8144, 8328, 8755, 6463, 2065, 7725, 3495, 10771, 8524, 8113, 7226, 2461, 10014, 5653, 8022, 11158, 1445, 7429, 11164, 1275, 6781, 1176, 5734, 12077, 6323, 9520,
+    3114, 6302, 6693, 579, 3889, 10872, 6613, 4505, 10032, 5835, 9202, 7406, 8314, 5102, 11877, 6701, 6444, 2528, 9233, 4963, 8545, 3607, 10962, 7057, 8347, 11841, 11275, 7365, 7508, 4566,
+    5836, 12221, 2260, 1535, 3200, 2717, 60, 4238, 11677, 4227, 3368, 11749, 12164, 1526, 4222, 6162, 4840, 8257, 3163, 7885, 346, 2068, 1389, 11197, 5209, 3359, 9084, 11825, 10361, 3678,
+    4265, 9118, 7800, 10463, 9363, 9051, 8581, 11153, 8840, 5412, 8080, 9011, 6296, 3515, 11851, 1218, 5061, 1536, 1721, 9860, 4103, 10916, 2982, 11572, 3589, 9839, 10584, 11475, 11873,
+    2110, 716, 5416, 2164, 1866, 5211, 7562, 11081, 10381, 7751, 11946, 3448};
+
+const int32_t psi_rev_ntt512_12289[512] = {
+    8193, 493, 6845, 9908, 1378, 10377, 7952, 435, 10146, 1065, 404, 7644, 1207, 3248, 11121, 5277, 2437, 3646, 2987, 6022, 9867, 6250, 10102, 9723, 1002, 7278, 4284, 7201, 875, 3780, 1607,
+    4976, 8146, 4714, 242, 1537, 3704, 9611, 5019, 545, 5084, 10657, 4885, 11272, 3066, 12262, 3763, 10849, 2912, 5698, 11935, 4861, 7277, 9808, 11244, 2859, 7188, 1067, 2401, 11847, 390,
+    11516, 8511, 3833, 2780, 7094, 4895, 1484, 2305, 5042, 8236, 2645, 7875, 9442, 2174, 7917, 1689, 3364, 4057, 3271, 10863, 4654, 1777, 10626, 3636, 7351, 9585, 6998, 160, 3149, 4437,
+    12286, 10123, 3915, 7370, 12176, 4048, 2249, 2884, 1153, 9103, 6882, 2126, 10659, 3510, 5332, 2865, 9919, 9320, 8311, 9603, 9042, 3016, 12046, 9289, 11618, 7098, 3136, 9890, 3400, 2178,
+    1544, 5559, 420, 8304, 4905, 476, 3531, 9326, 4896, 9923, 3051, 3091, 81, 1000, 4320, 1177, 8034, 9521, 10654, 11563, 7678, 10436, 12149, 3014, 9088, 5086, 1326, 11119, 2319, 11334, 790,
+    2747, 7443, 3135, 3712, 1062, 9995, 7484, 8736, 9283, 2744, 11726, 2975, 9664, 949, 7468, 9650, 7266, 5828, 6561, 7698, 3328, 6512, 1351, 7311, 8155, 5736, 722, 10984, 4043, 7143, 10810,
+    1, 8668, 2545, 3504, 8747, 11077, 1646, 9094, 5860, 1759, 8582, 3694, 7110, 8907, 11934, 8058, 9741, 9558, 3932, 5911, 4890, 3637, 8830, 5542, 12144, 5755, 7657, 7901, 11029, 11955, 9863,
+    10861, 1696, 3284, 2881, 7197, 2089, 9000, 2013, 729, 9048, 11809, 2842, 11267, 9, 6498, 544, 2468, 339, 1381, 2525, 8112, 3584, 6958, 4989, 10616, 8011, 5374, 9452, 12159, 4354, 9893,
+    7837, 3296, 8340, 7222, 2197, 118, 2476, 5767, 827, 8541, 11336, 8855, 8760, 9381, 218, 9928, 10446, 9259, 4115, 6142, 2447, 3963, 11713, 1954, 2051, 1805, 2882, 453, 6381, 11871, 8517,
+    4774, 6860, 4737, 1293, 156, 9522, 8320, 3991, 5876, 2281, 10258, 6956, 1489, 2500, 1583, 6347, 11026, 12240, 6374, 1483, 350, 1512, 10474, 6906, 9087, 7796, 5369, 2057, 10314, 3757,
+    9364, 11942, 7535, 10431, 426, 3315, 2738, 6421, 2655, 6554, 723, 174, 1693, 9280, 6099, 295, 5766, 11637, 8527, 2919, 8273, 8212, 3728, 8240, 6299, 1159, 1146, 11341, 11964, 10885, 5297,
+    6170, 3956, 1360, 11089, 7105, 9734, 6167, 10695, 1962, 5106, 6328, 9597, 168, 7991, 8960, 6370, 7856, 3834, 5257, 10542, 9166, 9235, 5486, 6507, 1566, 2948, 9786, 11606, 9830, 8633,
+    12225, 8049, 8719, 11454, 6224, 8243, 709, 1319, 9139, 1958, 7967, 10211, 11177, 8210, 1058, 11848, 11367, 11239, 7753, 5445, 3860, 9606, 1190, 8471, 6118, 3789, 147, 5456, 7840, 7540,
+    5537, 4789, 4467, 4075, 5315, 4324, 4916, 10120, 11767, 7210, 9027, 1973, 5574, 11011, 2344, 8775, 1041, 1018, 6364, 11821, 8301, 11907, 316, 6950, 5446, 6093, 3710, 10256, 3998, 10367,
+    8410, 1254, 11316, 5435, 1359, 7083, 5529, 9090, 12233, 8724, 11635, 10587, 1987, 6427, 6136, 6874, 3643, 400, 1728, 4948, 6137, 5057, 7591, 3445, 7509, 2049, 7377, 10968, 192, 5241, 9369,
+    9162, 8120, 787, 8807, 1010, 6821, 6415, 677, 6234, 3336, 12237, 9115, 1323, 2766, 12138, 10162, 8332, 9450, 2505, 5906, 10710, 11858, 4782, 6403, 9260, 5594, 8076, 11785, 605, 9987, 3600,
+    3263, 7665, 6077, 421, 8209, 6068, 3602, 11286, 3532, 12048, 12231, 7280, 1956, 11404, 6008, 8851, 2844, 975, 4212, 5681, 8812, 12147, 11184};
+
+const int32_t omegainv_rev_ntt512_12289[512] = {
+    8193, 11796, 2381, 5444, 11854, 4337, 1912, 10911, 7012, 1168, 9041, 11082, 4645, 11885, 11224, 2143, 7313, 10682, 8509, 11414, 5088, 8005, 5011, 11287, 2566, 2187, 6039, 2422, 6267, 9302,
+    8643, 9852, 8456, 3778, 773, 11899, 442, 9888, 11222, 5101, 9430, 1045, 2481, 5012, 7428, 354, 6591, 9377, 1440, 8526, 27, 9223, 1017, 7404, 1632, 7205, 11744, 7270, 2678, 8585, 10752,
+    12047, 7575, 4143, 8758, 11813, 7384, 3985, 11869, 6730, 10745, 10111, 8889, 2399, 9153, 5191, 671, 3000, 243, 9273, 3247, 2686, 3978, 2969, 2370, 9424, 6957, 8779, 1630, 10163, 5407, 3186,
+    11136, 9405, 10040, 8241, 113, 4919, 8374, 2166, 3, 7852, 9140, 12129, 5291, 2704, 4938, 8653, 1663, 10512, 7635, 1426, 9018, 8232, 8925, 10600, 4372, 10115, 2847, 4414, 9644, 4053, 7247,
+    9984, 10805, 7394, 5195, 9509, 953, 3748, 11462, 6522, 9813, 12171, 10092, 5067, 3949, 8993, 4452, 2396, 7935, 130, 2837, 6915, 4278, 1673, 7300, 5331, 8705, 4177, 9764, 10908, 11950, 9821,
+    11745, 5791, 12280, 1022, 9447, 480, 3241, 11560, 10276, 3289, 10200, 5092, 9408, 9005, 10593, 1428, 2426, 334, 1260, 4388, 4632, 6534, 145, 6747, 3459, 8652, 7399, 6378, 8357, 2731, 2548,
+    4231, 355, 3382, 5179, 8595, 3707, 10530, 6429, 3195, 10643, 1212, 3542, 8785, 9744, 3621, 12288, 1479, 5146, 8246, 1305, 11567, 6553, 4134, 4978, 10938, 5777, 8961, 4591, 5728, 6461, 5023,
+    2639, 4821, 11340, 2625, 9314, 563, 9545, 3006, 3553, 4805, 2294, 11227, 8577, 9154, 4846, 9542, 11499, 955, 9970, 1170, 10963, 7203, 3201, 9275, 140, 1853, 4611, 726, 1635, 2768, 4255,
+    11112, 7969, 11289, 12208, 9198, 9238, 2366, 7393, 2963, 1105, 142, 3477, 6608, 8077, 11314, 9445, 3438, 6281, 885, 10333, 5009, 58, 241, 8757, 1003, 8687, 6221, 4080, 11868, 6212, 4624,
+    9026, 8689, 2302, 11684, 504, 4213, 6695, 3029, 5886, 7507, 431, 1579, 6383, 9784, 2839, 3957, 2127, 151, 9523, 10966, 3174, 52, 8953, 6055, 11612, 5874, 5468, 11279, 3482, 11502, 4169,
+    3127, 2920, 7048, 12097, 1321, 4912, 10240, 4780, 8844, 4698, 7232, 6152, 7341, 10561, 11889, 8646, 5415, 6153, 5862, 10302, 1702, 654, 3565, 56, 3199, 6760, 5206, 10930, 6854, 973, 11035,
+    3879, 1922, 8291, 2033, 8579, 6196, 6843, 5339, 11973, 382, 3988, 468, 5925, 11271, 11248, 3514, 9945, 1278, 6715, 10316, 3262, 5079, 522, 2169, 7373, 7965, 6974, 8214, 7822, 7500, 6752,
+    4749, 4449, 6833, 12142, 8500, 6171, 3818, 11099, 2683, 8429, 6844, 4536, 1050, 922, 441, 11231, 4079, 1112, 2078, 4322, 10331, 3150, 10970, 11580, 4046, 6065, 835, 3570, 4240, 64, 3656,
+    2459, 683, 2503, 9341, 10723, 5782, 6803, 3054, 3123, 1747, 7032, 8455, 4433, 5919, 3329, 4298, 12121, 2692, 5961, 7183, 10327, 1594, 6122, 2555, 5184, 1200, 10929, 8333, 6119, 6992, 1404,
+    325, 948, 11143, 11130, 5990, 4049, 8561, 4077, 4016, 9370, 3762, 652, 6523, 11994, 6190, 3009, 10596, 12115, 11566, 5735, 9634, 5868, 9551, 8974, 11863, 1858, 4754, 347, 2925, 8532, 1975,
+    10232, 6920, 4493, 3202, 5383, 1815, 10777, 11939, 10806, 5915, 49, 1263, 5942, 10706, 9789, 10800, 5333, 2031, 10008, 6413, 8298, 3969, 2767, 12133, 10996, 7552, 5429, 7515, 3772, 418, 5908,
+    11836, 9407, 10484, 10238, 10335, 576, 8326, 9842, 6147, 8174, 3030, 1843, 2361, 12071, 2908, 3529, 3434};
diff --git a/crypt/liboqs/kex_rlwe_newhope/LICENSE.txt b/crypt/liboqs/kex_rlwe_newhope/LICENSE.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae012a47e6d65b106cb25938857c57975b37c368
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/LICENSE.txt
@@ -0,0 +1,9 @@
+The files in this directory (except kex_rlwe_newhope.*) were originally written 
+by Erdem Alkim, Léo Ducas, Thomas Pöppelmann, and Peter Schwabe 
+(https://github.com/tpoeppelmann/newhope).
+
+
+The following license applies to all files in the src/kex_rlwe_newhope directory.
+
+
+Public domain.
diff --git a/crypt/liboqs/kex_rlwe_newhope/Makefile.am b/crypt/liboqs/kex_rlwe_newhope/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..ae47e2c5f8fade19d0d6847a92676ff4c7a32859
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/Makefile.am
@@ -0,0 +1,8 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = libnewhope.la
+
+libnewhope_la_SOURCES = kex_rlwe_newhope.c
+
+libnewhope_la_CPPFLAGS = -I../../include -I.
+libnewhope_la_CPPFLAGS += $(AM_CPPFLAGS)
+
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/Makefile.am b/crypt/liboqs/kex_rlwe_newhope/avx2/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..2efd00fb2f620cdc2e51f4ba04b10302ea2b6098
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/Makefile.am
@@ -0,0 +1,12 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = libnewhope_avx2.la librevert.la
+
+libnewhope_avx2_la_SOURCES = crypto_stream_chacha20.c precomp.c
+libnewhope_avx2_la_SOURCES += crypto_hash_sha256.c chacha.S cbd.s consts.c omegas.c ntt_double.s
+libnewhope_avx2_la_SOURCES += bitrev.s crypto_stream_aes256ctr.s hr.s rec.s poly_pointwise.s kex_rlwe_newhope_avx2.c
+
+libnewhope_avx2_la_CPPFLAGS = -I../../../include -I.
+libnewhope_avx2_la_CPPFLAGS += $(AM_CPPFLAGS) -O3 -fomit-frame-pointer -msse2avx -mavx2 -march=corei7-avx
+
+librevert.la:
+	cd ../../../ &&  bash patches/cleanup-patch.sh kex_rlwe_newhope/avx2
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/bitrev.s b/crypt/liboqs/kex_rlwe_newhope/avx2/bitrev.s
new file mode 100644
index 0000000000000000000000000000000000000000..002a1eef30617801abba9ad860b6138985e72feb
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/bitrev.s
@@ -0,0 +1,9976 @@
+
+# qhasm: int64 input_0
+
+# qhasm: int64 input_1
+
+# qhasm: int64 input_2
+
+# qhasm: int64 input_3
+
+# qhasm: int64 input_4
+
+# qhasm: int64 input_5
+
+# qhasm: stack64 input_6
+
+# qhasm: stack64 input_7
+
+# qhasm: int64 caller_r11
+
+# qhasm: int64 caller_r12
+
+# qhasm: int64 caller_r13
+
+# qhasm: int64 caller_r14
+
+# qhasm: int64 caller_r15
+
+# qhasm: int64 caller_rbx
+
+# qhasm: int64 caller_rbp
+
+# qhasm: int64 temp1
+
+# qhasm: int64 temp2
+
+# qhasm: int64 ap
+
+# qhasm: enter bitrev_vector
+.p2align 5
+.global _bitrev_vector
+.global bitrev_vector
+_bitrev_vector:
+bitrev_vector:
+movq %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: ap = input_0
+# asm 1: mov  <input_0=int64#1,>ap=int64#1
+# asm 2: mov  <input_0=%rdi,>ap=%rdi
+mov  %rdi,%rdi
+
+# qhasm: temp1 = mem64[ap + 4]
+# asm 1: mov   4(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   4(<ap=%rdi),>temp1=%esi
+mov   4(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2048]
+# asm 1: mov   2048(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2048(<ap=%rdi),>temp2=%edx
+mov   2048(%rdi),%edx
+
+# qhasm: mem64[ap + 2048] = temp1
+# asm 1: mov   <temp1=int64#2,2048(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2048(<ap=%rdi)
+mov   %esi,2048(%rdi)
+
+# qhasm: mem64[ap + 4] = temp2
+# asm 1: mov   <temp2=int64#3,4(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,4(<ap=%rdi)
+mov   %edx,4(%rdi)
+
+# qhasm: temp1 = mem64[ap + 8]
+# asm 1: mov   8(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   8(<ap=%rdi),>temp1=%esi
+mov   8(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1024]
+# asm 1: mov   1024(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1024(<ap=%rdi),>temp2=%edx
+mov   1024(%rdi),%edx
+
+# qhasm: mem64[ap + 1024] = temp1
+# asm 1: mov   <temp1=int64#2,1024(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1024(<ap=%rdi)
+mov   %esi,1024(%rdi)
+
+# qhasm: mem64[ap + 8] = temp2
+# asm 1: mov   <temp2=int64#3,8(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,8(<ap=%rdi)
+mov   %edx,8(%rdi)
+
+# qhasm: temp1 = mem64[ap + 12]
+# asm 1: mov   12(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   12(<ap=%rdi),>temp1=%esi
+mov   12(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3072]
+# asm 1: mov   3072(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3072(<ap=%rdi),>temp2=%edx
+mov   3072(%rdi),%edx
+
+# qhasm: mem64[ap + 3072] = temp1
+# asm 1: mov   <temp1=int64#2,3072(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3072(<ap=%rdi)
+mov   %esi,3072(%rdi)
+
+# qhasm: mem64[ap + 12] = temp2
+# asm 1: mov   <temp2=int64#3,12(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,12(<ap=%rdi)
+mov   %edx,12(%rdi)
+
+# qhasm: temp1 = mem64[ap + 16]
+# asm 1: mov   16(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   16(<ap=%rdi),>temp1=%esi
+mov   16(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 512]
+# asm 1: mov   512(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   512(<ap=%rdi),>temp2=%edx
+mov   512(%rdi),%edx
+
+# qhasm: mem64[ap + 512] = temp1
+# asm 1: mov   <temp1=int64#2,512(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,512(<ap=%rdi)
+mov   %esi,512(%rdi)
+
+# qhasm: mem64[ap + 16] = temp2
+# asm 1: mov   <temp2=int64#3,16(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,16(<ap=%rdi)
+mov   %edx,16(%rdi)
+
+# qhasm: temp1 = mem64[ap + 20]
+# asm 1: mov   20(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   20(<ap=%rdi),>temp1=%esi
+mov   20(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2560]
+# asm 1: mov   2560(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2560(<ap=%rdi),>temp2=%edx
+mov   2560(%rdi),%edx
+
+# qhasm: mem64[ap + 2560] = temp1
+# asm 1: mov   <temp1=int64#2,2560(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2560(<ap=%rdi)
+mov   %esi,2560(%rdi)
+
+# qhasm: mem64[ap + 20] = temp2
+# asm 1: mov   <temp2=int64#3,20(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,20(<ap=%rdi)
+mov   %edx,20(%rdi)
+
+# qhasm: temp1 = mem64[ap + 24]
+# asm 1: mov   24(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   24(<ap=%rdi),>temp1=%esi
+mov   24(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1536]
+# asm 1: mov   1536(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1536(<ap=%rdi),>temp2=%edx
+mov   1536(%rdi),%edx
+
+# qhasm: mem64[ap + 1536] = temp1
+# asm 1: mov   <temp1=int64#2,1536(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1536(<ap=%rdi)
+mov   %esi,1536(%rdi)
+
+# qhasm: mem64[ap + 24] = temp2
+# asm 1: mov   <temp2=int64#3,24(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,24(<ap=%rdi)
+mov   %edx,24(%rdi)
+
+# qhasm: temp1 = mem64[ap + 28]
+# asm 1: mov   28(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   28(<ap=%rdi),>temp1=%esi
+mov   28(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3584]
+# asm 1: mov   3584(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3584(<ap=%rdi),>temp2=%edx
+mov   3584(%rdi),%edx
+
+# qhasm: mem64[ap + 3584] = temp1
+# asm 1: mov   <temp1=int64#2,3584(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3584(<ap=%rdi)
+mov   %esi,3584(%rdi)
+
+# qhasm: mem64[ap + 28] = temp2
+# asm 1: mov   <temp2=int64#3,28(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,28(<ap=%rdi)
+mov   %edx,28(%rdi)
+
+# qhasm: temp1 = mem64[ap + 32]
+# asm 1: mov   32(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   32(<ap=%rdi),>temp1=%esi
+mov   32(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 256]
+# asm 1: mov   256(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   256(<ap=%rdi),>temp2=%edx
+mov   256(%rdi),%edx
+
+# qhasm: mem64[ap + 256] = temp1
+# asm 1: mov   <temp1=int64#2,256(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,256(<ap=%rdi)
+mov   %esi,256(%rdi)
+
+# qhasm: mem64[ap + 32] = temp2
+# asm 1: mov   <temp2=int64#3,32(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,32(<ap=%rdi)
+mov   %edx,32(%rdi)
+
+# qhasm: temp1 = mem64[ap + 36]
+# asm 1: mov   36(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   36(<ap=%rdi),>temp1=%esi
+mov   36(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2304]
+# asm 1: mov   2304(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2304(<ap=%rdi),>temp2=%edx
+mov   2304(%rdi),%edx
+
+# qhasm: mem64[ap + 2304] = temp1
+# asm 1: mov   <temp1=int64#2,2304(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2304(<ap=%rdi)
+mov   %esi,2304(%rdi)
+
+# qhasm: mem64[ap + 36] = temp2
+# asm 1: mov   <temp2=int64#3,36(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,36(<ap=%rdi)
+mov   %edx,36(%rdi)
+
+# qhasm: temp1 = mem64[ap + 40]
+# asm 1: mov   40(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   40(<ap=%rdi),>temp1=%esi
+mov   40(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1280]
+# asm 1: mov   1280(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1280(<ap=%rdi),>temp2=%edx
+mov   1280(%rdi),%edx
+
+# qhasm: mem64[ap + 1280] = temp1
+# asm 1: mov   <temp1=int64#2,1280(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1280(<ap=%rdi)
+mov   %esi,1280(%rdi)
+
+# qhasm: mem64[ap + 40] = temp2
+# asm 1: mov   <temp2=int64#3,40(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,40(<ap=%rdi)
+mov   %edx,40(%rdi)
+
+# qhasm: temp1 = mem64[ap + 44]
+# asm 1: mov   44(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   44(<ap=%rdi),>temp1=%esi
+mov   44(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3328]
+# asm 1: mov   3328(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3328(<ap=%rdi),>temp2=%edx
+mov   3328(%rdi),%edx
+
+# qhasm: mem64[ap + 3328] = temp1
+# asm 1: mov   <temp1=int64#2,3328(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3328(<ap=%rdi)
+mov   %esi,3328(%rdi)
+
+# qhasm: mem64[ap + 44] = temp2
+# asm 1: mov   <temp2=int64#3,44(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,44(<ap=%rdi)
+mov   %edx,44(%rdi)
+
+# qhasm: temp1 = mem64[ap + 48]
+# asm 1: mov   48(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   48(<ap=%rdi),>temp1=%esi
+mov   48(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 768]
+# asm 1: mov   768(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   768(<ap=%rdi),>temp2=%edx
+mov   768(%rdi),%edx
+
+# qhasm: mem64[ap + 768] = temp1
+# asm 1: mov   <temp1=int64#2,768(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,768(<ap=%rdi)
+mov   %esi,768(%rdi)
+
+# qhasm: mem64[ap + 48] = temp2
+# asm 1: mov   <temp2=int64#3,48(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,48(<ap=%rdi)
+mov   %edx,48(%rdi)
+
+# qhasm: temp1 = mem64[ap + 52]
+# asm 1: mov   52(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   52(<ap=%rdi),>temp1=%esi
+mov   52(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2816]
+# asm 1: mov   2816(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2816(<ap=%rdi),>temp2=%edx
+mov   2816(%rdi),%edx
+
+# qhasm: mem64[ap + 2816] = temp1
+# asm 1: mov   <temp1=int64#2,2816(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2816(<ap=%rdi)
+mov   %esi,2816(%rdi)
+
+# qhasm: mem64[ap + 52] = temp2
+# asm 1: mov   <temp2=int64#3,52(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,52(<ap=%rdi)
+mov   %edx,52(%rdi)
+
+# qhasm: temp1 = mem64[ap + 56]
+# asm 1: mov   56(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   56(<ap=%rdi),>temp1=%esi
+mov   56(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1792]
+# asm 1: mov   1792(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1792(<ap=%rdi),>temp2=%edx
+mov   1792(%rdi),%edx
+
+# qhasm: mem64[ap + 1792] = temp1
+# asm 1: mov   <temp1=int64#2,1792(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1792(<ap=%rdi)
+mov   %esi,1792(%rdi)
+
+# qhasm: mem64[ap + 56] = temp2
+# asm 1: mov   <temp2=int64#3,56(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,56(<ap=%rdi)
+mov   %edx,56(%rdi)
+
+# qhasm: temp1 = mem64[ap + 60]
+# asm 1: mov   60(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   60(<ap=%rdi),>temp1=%esi
+mov   60(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3840]
+# asm 1: mov   3840(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3840(<ap=%rdi),>temp2=%edx
+mov   3840(%rdi),%edx
+
+# qhasm: mem64[ap + 3840] = temp1
+# asm 1: mov   <temp1=int64#2,3840(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3840(<ap=%rdi)
+mov   %esi,3840(%rdi)
+
+# qhasm: mem64[ap + 60] = temp2
+# asm 1: mov   <temp2=int64#3,60(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,60(<ap=%rdi)
+mov   %edx,60(%rdi)
+
+# qhasm: temp1 = mem64[ap + 64]
+# asm 1: mov   64(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   64(<ap=%rdi),>temp1=%esi
+mov   64(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 128]
+# asm 1: mov   128(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   128(<ap=%rdi),>temp2=%edx
+mov   128(%rdi),%edx
+
+# qhasm: mem64[ap + 128] = temp1
+# asm 1: mov   <temp1=int64#2,128(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,128(<ap=%rdi)
+mov   %esi,128(%rdi)
+
+# qhasm: mem64[ap + 64] = temp2
+# asm 1: mov   <temp2=int64#3,64(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,64(<ap=%rdi)
+mov   %edx,64(%rdi)
+
+# qhasm: temp1 = mem64[ap + 68]
+# asm 1: mov   68(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   68(<ap=%rdi),>temp1=%esi
+mov   68(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2176]
+# asm 1: mov   2176(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2176(<ap=%rdi),>temp2=%edx
+mov   2176(%rdi),%edx
+
+# qhasm: mem64[ap + 2176] = temp1
+# asm 1: mov   <temp1=int64#2,2176(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2176(<ap=%rdi)
+mov   %esi,2176(%rdi)
+
+# qhasm: mem64[ap + 68] = temp2
+# asm 1: mov   <temp2=int64#3,68(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,68(<ap=%rdi)
+mov   %edx,68(%rdi)
+
+# qhasm: temp1 = mem64[ap + 72]
+# asm 1: mov   72(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   72(<ap=%rdi),>temp1=%esi
+mov   72(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1152]
+# asm 1: mov   1152(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1152(<ap=%rdi),>temp2=%edx
+mov   1152(%rdi),%edx
+
+# qhasm: mem64[ap + 1152] = temp1
+# asm 1: mov   <temp1=int64#2,1152(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1152(<ap=%rdi)
+mov   %esi,1152(%rdi)
+
+# qhasm: mem64[ap + 72] = temp2
+# asm 1: mov   <temp2=int64#3,72(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,72(<ap=%rdi)
+mov   %edx,72(%rdi)
+
+# qhasm: temp1 = mem64[ap + 76]
+# asm 1: mov   76(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   76(<ap=%rdi),>temp1=%esi
+mov   76(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3200]
+# asm 1: mov   3200(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3200(<ap=%rdi),>temp2=%edx
+mov   3200(%rdi),%edx
+
+# qhasm: mem64[ap + 3200] = temp1
+# asm 1: mov   <temp1=int64#2,3200(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3200(<ap=%rdi)
+mov   %esi,3200(%rdi)
+
+# qhasm: mem64[ap + 76] = temp2
+# asm 1: mov   <temp2=int64#3,76(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,76(<ap=%rdi)
+mov   %edx,76(%rdi)
+
+# qhasm: temp1 = mem64[ap + 80]
+# asm 1: mov   80(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   80(<ap=%rdi),>temp1=%esi
+mov   80(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 640]
+# asm 1: mov   640(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   640(<ap=%rdi),>temp2=%edx
+mov   640(%rdi),%edx
+
+# qhasm: mem64[ap + 640] = temp1
+# asm 1: mov   <temp1=int64#2,640(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,640(<ap=%rdi)
+mov   %esi,640(%rdi)
+
+# qhasm: mem64[ap + 80] = temp2
+# asm 1: mov   <temp2=int64#3,80(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,80(<ap=%rdi)
+mov   %edx,80(%rdi)
+
+# qhasm: temp1 = mem64[ap + 84]
+# asm 1: mov   84(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   84(<ap=%rdi),>temp1=%esi
+mov   84(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2688]
+# asm 1: mov   2688(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2688(<ap=%rdi),>temp2=%edx
+mov   2688(%rdi),%edx
+
+# qhasm: mem64[ap + 2688] = temp1
+# asm 1: mov   <temp1=int64#2,2688(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2688(<ap=%rdi)
+mov   %esi,2688(%rdi)
+
+# qhasm: mem64[ap + 84] = temp2
+# asm 1: mov   <temp2=int64#3,84(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,84(<ap=%rdi)
+mov   %edx,84(%rdi)
+
+# qhasm: temp1 = mem64[ap + 88]
+# asm 1: mov   88(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   88(<ap=%rdi),>temp1=%esi
+mov   88(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1664]
+# asm 1: mov   1664(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1664(<ap=%rdi),>temp2=%edx
+mov   1664(%rdi),%edx
+
+# qhasm: mem64[ap + 1664] = temp1
+# asm 1: mov   <temp1=int64#2,1664(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1664(<ap=%rdi)
+mov   %esi,1664(%rdi)
+
+# qhasm: mem64[ap + 88] = temp2
+# asm 1: mov   <temp2=int64#3,88(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,88(<ap=%rdi)
+mov   %edx,88(%rdi)
+
+# qhasm: temp1 = mem64[ap + 92]
+# asm 1: mov   92(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   92(<ap=%rdi),>temp1=%esi
+mov   92(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3712]
+# asm 1: mov   3712(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3712(<ap=%rdi),>temp2=%edx
+mov   3712(%rdi),%edx
+
+# qhasm: mem64[ap + 3712] = temp1
+# asm 1: mov   <temp1=int64#2,3712(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3712(<ap=%rdi)
+mov   %esi,3712(%rdi)
+
+# qhasm: mem64[ap + 92] = temp2
+# asm 1: mov   <temp2=int64#3,92(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,92(<ap=%rdi)
+mov   %edx,92(%rdi)
+
+# qhasm: temp1 = mem64[ap + 96]
+# asm 1: mov   96(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   96(<ap=%rdi),>temp1=%esi
+mov   96(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 384]
+# asm 1: mov   384(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   384(<ap=%rdi),>temp2=%edx
+mov   384(%rdi),%edx
+
+# qhasm: mem64[ap + 384] = temp1
+# asm 1: mov   <temp1=int64#2,384(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,384(<ap=%rdi)
+mov   %esi,384(%rdi)
+
+# qhasm: mem64[ap + 96] = temp2
+# asm 1: mov   <temp2=int64#3,96(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,96(<ap=%rdi)
+mov   %edx,96(%rdi)
+
+# qhasm: temp1 = mem64[ap + 100]
+# asm 1: mov   100(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   100(<ap=%rdi),>temp1=%esi
+mov   100(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2432]
+# asm 1: mov   2432(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2432(<ap=%rdi),>temp2=%edx
+mov   2432(%rdi),%edx
+
+# qhasm: mem64[ap + 2432] = temp1
+# asm 1: mov   <temp1=int64#2,2432(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2432(<ap=%rdi)
+mov   %esi,2432(%rdi)
+
+# qhasm: mem64[ap + 100] = temp2
+# asm 1: mov   <temp2=int64#3,100(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,100(<ap=%rdi)
+mov   %edx,100(%rdi)
+
+# qhasm: temp1 = mem64[ap + 104]
+# asm 1: mov   104(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   104(<ap=%rdi),>temp1=%esi
+mov   104(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1408]
+# asm 1: mov   1408(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1408(<ap=%rdi),>temp2=%edx
+mov   1408(%rdi),%edx
+
+# qhasm: mem64[ap + 1408] = temp1
+# asm 1: mov   <temp1=int64#2,1408(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1408(<ap=%rdi)
+mov   %esi,1408(%rdi)
+
+# qhasm: mem64[ap + 104] = temp2
+# asm 1: mov   <temp2=int64#3,104(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,104(<ap=%rdi)
+mov   %edx,104(%rdi)
+
+# qhasm: temp1 = mem64[ap + 108]
+# asm 1: mov   108(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   108(<ap=%rdi),>temp1=%esi
+mov   108(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3456]
+# asm 1: mov   3456(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3456(<ap=%rdi),>temp2=%edx
+mov   3456(%rdi),%edx
+
+# qhasm: mem64[ap + 3456] = temp1
+# asm 1: mov   <temp1=int64#2,3456(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3456(<ap=%rdi)
+mov   %esi,3456(%rdi)
+
+# qhasm: mem64[ap + 108] = temp2
+# asm 1: mov   <temp2=int64#3,108(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,108(<ap=%rdi)
+mov   %edx,108(%rdi)
+
+# qhasm: temp1 = mem64[ap + 112]
+# asm 1: mov   112(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   112(<ap=%rdi),>temp1=%esi
+mov   112(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 896]
+# asm 1: mov   896(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   896(<ap=%rdi),>temp2=%edx
+mov   896(%rdi),%edx
+
+# qhasm: mem64[ap + 896] = temp1
+# asm 1: mov   <temp1=int64#2,896(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,896(<ap=%rdi)
+mov   %esi,896(%rdi)
+
+# qhasm: mem64[ap + 112] = temp2
+# asm 1: mov   <temp2=int64#3,112(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,112(<ap=%rdi)
+mov   %edx,112(%rdi)
+
+# qhasm: temp1 = mem64[ap + 116]
+# asm 1: mov   116(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   116(<ap=%rdi),>temp1=%esi
+mov   116(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2944]
+# asm 1: mov   2944(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2944(<ap=%rdi),>temp2=%edx
+mov   2944(%rdi),%edx
+
+# qhasm: mem64[ap + 2944] = temp1
+# asm 1: mov   <temp1=int64#2,2944(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2944(<ap=%rdi)
+mov   %esi,2944(%rdi)
+
+# qhasm: mem64[ap + 116] = temp2
+# asm 1: mov   <temp2=int64#3,116(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,116(<ap=%rdi)
+mov   %edx,116(%rdi)
+
+# qhasm: temp1 = mem64[ap + 120]
+# asm 1: mov   120(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   120(<ap=%rdi),>temp1=%esi
+mov   120(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1920]
+# asm 1: mov   1920(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1920(<ap=%rdi),>temp2=%edx
+mov   1920(%rdi),%edx
+
+# qhasm: mem64[ap + 1920] = temp1
+# asm 1: mov   <temp1=int64#2,1920(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1920(<ap=%rdi)
+mov   %esi,1920(%rdi)
+
+# qhasm: mem64[ap + 120] = temp2
+# asm 1: mov   <temp2=int64#3,120(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,120(<ap=%rdi)
+mov   %edx,120(%rdi)
+
+# qhasm: temp1 = mem64[ap + 124]
+# asm 1: mov   124(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   124(<ap=%rdi),>temp1=%esi
+mov   124(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3968]
+# asm 1: mov   3968(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3968(<ap=%rdi),>temp2=%edx
+mov   3968(%rdi),%edx
+
+# qhasm: mem64[ap + 3968] = temp1
+# asm 1: mov   <temp1=int64#2,3968(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3968(<ap=%rdi)
+mov   %esi,3968(%rdi)
+
+# qhasm: mem64[ap + 124] = temp2
+# asm 1: mov   <temp2=int64#3,124(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,124(<ap=%rdi)
+mov   %edx,124(%rdi)
+
+# qhasm: temp1 = mem64[ap + 132]
+# asm 1: mov   132(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   132(<ap=%rdi),>temp1=%esi
+mov   132(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2112]
+# asm 1: mov   2112(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2112(<ap=%rdi),>temp2=%edx
+mov   2112(%rdi),%edx
+
+# qhasm: mem64[ap + 2112] = temp1
+# asm 1: mov   <temp1=int64#2,2112(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2112(<ap=%rdi)
+mov   %esi,2112(%rdi)
+
+# qhasm: mem64[ap + 132] = temp2
+# asm 1: mov   <temp2=int64#3,132(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,132(<ap=%rdi)
+mov   %edx,132(%rdi)
+
+# qhasm: temp1 = mem64[ap + 136]
+# asm 1: mov   136(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   136(<ap=%rdi),>temp1=%esi
+mov   136(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1088]
+# asm 1: mov   1088(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1088(<ap=%rdi),>temp2=%edx
+mov   1088(%rdi),%edx
+
+# qhasm: mem64[ap + 1088] = temp1
+# asm 1: mov   <temp1=int64#2,1088(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1088(<ap=%rdi)
+mov   %esi,1088(%rdi)
+
+# qhasm: mem64[ap + 136] = temp2
+# asm 1: mov   <temp2=int64#3,136(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,136(<ap=%rdi)
+mov   %edx,136(%rdi)
+
+# qhasm: temp1 = mem64[ap + 140]
+# asm 1: mov   140(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   140(<ap=%rdi),>temp1=%esi
+mov   140(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3136]
+# asm 1: mov   3136(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3136(<ap=%rdi),>temp2=%edx
+mov   3136(%rdi),%edx
+
+# qhasm: mem64[ap + 3136] = temp1
+# asm 1: mov   <temp1=int64#2,3136(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3136(<ap=%rdi)
+mov   %esi,3136(%rdi)
+
+# qhasm: mem64[ap + 140] = temp2
+# asm 1: mov   <temp2=int64#3,140(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,140(<ap=%rdi)
+mov   %edx,140(%rdi)
+
+# qhasm: temp1 = mem64[ap + 144]
+# asm 1: mov   144(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   144(<ap=%rdi),>temp1=%esi
+mov   144(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 576]
+# asm 1: mov   576(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   576(<ap=%rdi),>temp2=%edx
+mov   576(%rdi),%edx
+
+# qhasm: mem64[ap + 576] = temp1
+# asm 1: mov   <temp1=int64#2,576(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,576(<ap=%rdi)
+mov   %esi,576(%rdi)
+
+# qhasm: mem64[ap + 144] = temp2
+# asm 1: mov   <temp2=int64#3,144(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,144(<ap=%rdi)
+mov   %edx,144(%rdi)
+
+# qhasm: temp1 = mem64[ap + 148]
+# asm 1: mov   148(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   148(<ap=%rdi),>temp1=%esi
+mov   148(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2624]
+# asm 1: mov   2624(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2624(<ap=%rdi),>temp2=%edx
+mov   2624(%rdi),%edx
+
+# qhasm: mem64[ap + 2624] = temp1
+# asm 1: mov   <temp1=int64#2,2624(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2624(<ap=%rdi)
+mov   %esi,2624(%rdi)
+
+# qhasm: mem64[ap + 148] = temp2
+# asm 1: mov   <temp2=int64#3,148(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,148(<ap=%rdi)
+mov   %edx,148(%rdi)
+
+# qhasm: temp1 = mem64[ap + 152]
+# asm 1: mov   152(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   152(<ap=%rdi),>temp1=%esi
+mov   152(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1600]
+# asm 1: mov   1600(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1600(<ap=%rdi),>temp2=%edx
+mov   1600(%rdi),%edx
+
+# qhasm: mem64[ap + 1600] = temp1
+# asm 1: mov   <temp1=int64#2,1600(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1600(<ap=%rdi)
+mov   %esi,1600(%rdi)
+
+# qhasm: mem64[ap + 152] = temp2
+# asm 1: mov   <temp2=int64#3,152(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,152(<ap=%rdi)
+mov   %edx,152(%rdi)
+
+# qhasm: temp1 = mem64[ap + 156]
+# asm 1: mov   156(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   156(<ap=%rdi),>temp1=%esi
+mov   156(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3648]
+# asm 1: mov   3648(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3648(<ap=%rdi),>temp2=%edx
+mov   3648(%rdi),%edx
+
+# qhasm: mem64[ap + 3648] = temp1
+# asm 1: mov   <temp1=int64#2,3648(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3648(<ap=%rdi)
+mov   %esi,3648(%rdi)
+
+# qhasm: mem64[ap + 156] = temp2
+# asm 1: mov   <temp2=int64#3,156(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,156(<ap=%rdi)
+mov   %edx,156(%rdi)
+
+# qhasm: temp1 = mem64[ap + 160]
+# asm 1: mov   160(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   160(<ap=%rdi),>temp1=%esi
+mov   160(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 320]
+# asm 1: mov   320(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   320(<ap=%rdi),>temp2=%edx
+mov   320(%rdi),%edx
+
+# qhasm: mem64[ap + 320] = temp1
+# asm 1: mov   <temp1=int64#2,320(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,320(<ap=%rdi)
+mov   %esi,320(%rdi)
+
+# qhasm: mem64[ap + 160] = temp2
+# asm 1: mov   <temp2=int64#3,160(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,160(<ap=%rdi)
+mov   %edx,160(%rdi)
+
+# qhasm: temp1 = mem64[ap + 164]
+# asm 1: mov   164(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   164(<ap=%rdi),>temp1=%esi
+mov   164(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2368]
+# asm 1: mov   2368(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2368(<ap=%rdi),>temp2=%edx
+mov   2368(%rdi),%edx
+
+# qhasm: mem64[ap + 2368] = temp1
+# asm 1: mov   <temp1=int64#2,2368(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2368(<ap=%rdi)
+mov   %esi,2368(%rdi)
+
+# qhasm: mem64[ap + 164] = temp2
+# asm 1: mov   <temp2=int64#3,164(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,164(<ap=%rdi)
+mov   %edx,164(%rdi)
+
+# qhasm: temp1 = mem64[ap + 168]
+# asm 1: mov   168(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   168(<ap=%rdi),>temp1=%esi
+mov   168(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1344]
+# asm 1: mov   1344(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1344(<ap=%rdi),>temp2=%edx
+mov   1344(%rdi),%edx
+
+# qhasm: mem64[ap + 1344] = temp1
+# asm 1: mov   <temp1=int64#2,1344(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1344(<ap=%rdi)
+mov   %esi,1344(%rdi)
+
+# qhasm: mem64[ap + 168] = temp2
+# asm 1: mov   <temp2=int64#3,168(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,168(<ap=%rdi)
+mov   %edx,168(%rdi)
+
+# qhasm: temp1 = mem64[ap + 172]
+# asm 1: mov   172(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   172(<ap=%rdi),>temp1=%esi
+mov   172(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3392]
+# asm 1: mov   3392(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3392(<ap=%rdi),>temp2=%edx
+mov   3392(%rdi),%edx
+
+# qhasm: mem64[ap + 3392] = temp1
+# asm 1: mov   <temp1=int64#2,3392(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3392(<ap=%rdi)
+mov   %esi,3392(%rdi)
+
+# qhasm: mem64[ap + 172] = temp2
+# asm 1: mov   <temp2=int64#3,172(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,172(<ap=%rdi)
+mov   %edx,172(%rdi)
+
+# qhasm: temp1 = mem64[ap + 176]
+# asm 1: mov   176(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   176(<ap=%rdi),>temp1=%esi
+mov   176(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 832]
+# asm 1: mov   832(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   832(<ap=%rdi),>temp2=%edx
+mov   832(%rdi),%edx
+
+# qhasm: mem64[ap + 832] = temp1
+# asm 1: mov   <temp1=int64#2,832(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,832(<ap=%rdi)
+mov   %esi,832(%rdi)
+
+# qhasm: mem64[ap + 176] = temp2
+# asm 1: mov   <temp2=int64#3,176(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,176(<ap=%rdi)
+mov   %edx,176(%rdi)
+
+# qhasm: temp1 = mem64[ap + 180]
+# asm 1: mov   180(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   180(<ap=%rdi),>temp1=%esi
+mov   180(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2880]
+# asm 1: mov   2880(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2880(<ap=%rdi),>temp2=%edx
+mov   2880(%rdi),%edx
+
+# qhasm: mem64[ap + 2880] = temp1
+# asm 1: mov   <temp1=int64#2,2880(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2880(<ap=%rdi)
+mov   %esi,2880(%rdi)
+
+# qhasm: mem64[ap + 180] = temp2
+# asm 1: mov   <temp2=int64#3,180(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,180(<ap=%rdi)
+mov   %edx,180(%rdi)
+
+# qhasm: temp1 = mem64[ap + 184]
+# asm 1: mov   184(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   184(<ap=%rdi),>temp1=%esi
+mov   184(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1856]
+# asm 1: mov   1856(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1856(<ap=%rdi),>temp2=%edx
+mov   1856(%rdi),%edx
+
+# qhasm: mem64[ap + 1856] = temp1
+# asm 1: mov   <temp1=int64#2,1856(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1856(<ap=%rdi)
+mov   %esi,1856(%rdi)
+
+# qhasm: mem64[ap + 184] = temp2
+# asm 1: mov   <temp2=int64#3,184(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,184(<ap=%rdi)
+mov   %edx,184(%rdi)
+
+# qhasm: temp1 = mem64[ap + 188]
+# asm 1: mov   188(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   188(<ap=%rdi),>temp1=%esi
+mov   188(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3904]
+# asm 1: mov   3904(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3904(<ap=%rdi),>temp2=%edx
+mov   3904(%rdi),%edx
+
+# qhasm: mem64[ap + 3904] = temp1
+# asm 1: mov   <temp1=int64#2,3904(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3904(<ap=%rdi)
+mov   %esi,3904(%rdi)
+
+# qhasm: mem64[ap + 188] = temp2
+# asm 1: mov   <temp2=int64#3,188(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,188(<ap=%rdi)
+mov   %edx,188(%rdi)
+
+# qhasm: temp1 = mem64[ap + 196]
+# asm 1: mov   196(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   196(<ap=%rdi),>temp1=%esi
+mov   196(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2240]
+# asm 1: mov   2240(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2240(<ap=%rdi),>temp2=%edx
+mov   2240(%rdi),%edx
+
+# qhasm: mem64[ap + 2240] = temp1
+# asm 1: mov   <temp1=int64#2,2240(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2240(<ap=%rdi)
+mov   %esi,2240(%rdi)
+
+# qhasm: mem64[ap + 196] = temp2
+# asm 1: mov   <temp2=int64#3,196(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,196(<ap=%rdi)
+mov   %edx,196(%rdi)
+
+# qhasm: temp1 = mem64[ap + 200]
+# asm 1: mov   200(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   200(<ap=%rdi),>temp1=%esi
+mov   200(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1216]
+# asm 1: mov   1216(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1216(<ap=%rdi),>temp2=%edx
+mov   1216(%rdi),%edx
+
+# qhasm: mem64[ap + 1216] = temp1
+# asm 1: mov   <temp1=int64#2,1216(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1216(<ap=%rdi)
+mov   %esi,1216(%rdi)
+
+# qhasm: mem64[ap + 200] = temp2
+# asm 1: mov   <temp2=int64#3,200(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,200(<ap=%rdi)
+mov   %edx,200(%rdi)
+
+# qhasm: temp1 = mem64[ap + 204]
+# asm 1: mov   204(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   204(<ap=%rdi),>temp1=%esi
+mov   204(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3264]
+# asm 1: mov   3264(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3264(<ap=%rdi),>temp2=%edx
+mov   3264(%rdi),%edx
+
+# qhasm: mem64[ap + 3264] = temp1
+# asm 1: mov   <temp1=int64#2,3264(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3264(<ap=%rdi)
+mov   %esi,3264(%rdi)
+
+# qhasm: mem64[ap + 204] = temp2
+# asm 1: mov   <temp2=int64#3,204(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,204(<ap=%rdi)
+mov   %edx,204(%rdi)
+
+# qhasm: temp1 = mem64[ap + 208]
+# asm 1: mov   208(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   208(<ap=%rdi),>temp1=%esi
+mov   208(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 704]
+# asm 1: mov   704(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   704(<ap=%rdi),>temp2=%edx
+mov   704(%rdi),%edx
+
+# qhasm: mem64[ap + 704] = temp1
+# asm 1: mov   <temp1=int64#2,704(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,704(<ap=%rdi)
+mov   %esi,704(%rdi)
+
+# qhasm: mem64[ap + 208] = temp2
+# asm 1: mov   <temp2=int64#3,208(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,208(<ap=%rdi)
+mov   %edx,208(%rdi)
+
+# qhasm: temp1 = mem64[ap + 212]
+# asm 1: mov   212(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   212(<ap=%rdi),>temp1=%esi
+mov   212(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2752]
+# asm 1: mov   2752(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2752(<ap=%rdi),>temp2=%edx
+mov   2752(%rdi),%edx
+
+# qhasm: mem64[ap + 2752] = temp1
+# asm 1: mov   <temp1=int64#2,2752(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2752(<ap=%rdi)
+mov   %esi,2752(%rdi)
+
+# qhasm: mem64[ap + 212] = temp2
+# asm 1: mov   <temp2=int64#3,212(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,212(<ap=%rdi)
+mov   %edx,212(%rdi)
+
+# qhasm: temp1 = mem64[ap + 216]
+# asm 1: mov   216(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   216(<ap=%rdi),>temp1=%esi
+mov   216(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1728]
+# asm 1: mov   1728(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1728(<ap=%rdi),>temp2=%edx
+mov   1728(%rdi),%edx
+
+# qhasm: mem64[ap + 1728] = temp1
+# asm 1: mov   <temp1=int64#2,1728(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1728(<ap=%rdi)
+mov   %esi,1728(%rdi)
+
+# qhasm: mem64[ap + 216] = temp2
+# asm 1: mov   <temp2=int64#3,216(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,216(<ap=%rdi)
+mov   %edx,216(%rdi)
+
+# qhasm: temp1 = mem64[ap + 220]
+# asm 1: mov   220(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   220(<ap=%rdi),>temp1=%esi
+mov   220(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3776]
+# asm 1: mov   3776(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3776(<ap=%rdi),>temp2=%edx
+mov   3776(%rdi),%edx
+
+# qhasm: mem64[ap + 3776] = temp1
+# asm 1: mov   <temp1=int64#2,3776(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3776(<ap=%rdi)
+mov   %esi,3776(%rdi)
+
+# qhasm: mem64[ap + 220] = temp2
+# asm 1: mov   <temp2=int64#3,220(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,220(<ap=%rdi)
+mov   %edx,220(%rdi)
+
+# qhasm: temp1 = mem64[ap + 224]
+# asm 1: mov   224(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   224(<ap=%rdi),>temp1=%esi
+mov   224(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 448]
+# asm 1: mov   448(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   448(<ap=%rdi),>temp2=%edx
+mov   448(%rdi),%edx
+
+# qhasm: mem64[ap + 448] = temp1
+# asm 1: mov   <temp1=int64#2,448(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,448(<ap=%rdi)
+mov   %esi,448(%rdi)
+
+# qhasm: mem64[ap + 224] = temp2
+# asm 1: mov   <temp2=int64#3,224(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,224(<ap=%rdi)
+mov   %edx,224(%rdi)
+
+# qhasm: temp1 = mem64[ap + 228]
+# asm 1: mov   228(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   228(<ap=%rdi),>temp1=%esi
+mov   228(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2496]
+# asm 1: mov   2496(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2496(<ap=%rdi),>temp2=%edx
+mov   2496(%rdi),%edx
+
+# qhasm: mem64[ap + 2496] = temp1
+# asm 1: mov   <temp1=int64#2,2496(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2496(<ap=%rdi)
+mov   %esi,2496(%rdi)
+
+# qhasm: mem64[ap + 228] = temp2
+# asm 1: mov   <temp2=int64#3,228(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,228(<ap=%rdi)
+mov   %edx,228(%rdi)
+
+# qhasm: temp1 = mem64[ap + 232]
+# asm 1: mov   232(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   232(<ap=%rdi),>temp1=%esi
+mov   232(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1472]
+# asm 1: mov   1472(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1472(<ap=%rdi),>temp2=%edx
+mov   1472(%rdi),%edx
+
+# qhasm: mem64[ap + 1472] = temp1
+# asm 1: mov   <temp1=int64#2,1472(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1472(<ap=%rdi)
+mov   %esi,1472(%rdi)
+
+# qhasm: mem64[ap + 232] = temp2
+# asm 1: mov   <temp2=int64#3,232(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,232(<ap=%rdi)
+mov   %edx,232(%rdi)
+
+# qhasm: temp1 = mem64[ap + 236]
+# asm 1: mov   236(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   236(<ap=%rdi),>temp1=%esi
+mov   236(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3520]
+# asm 1: mov   3520(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3520(<ap=%rdi),>temp2=%edx
+mov   3520(%rdi),%edx
+
+# qhasm: mem64[ap + 3520] = temp1
+# asm 1: mov   <temp1=int64#2,3520(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3520(<ap=%rdi)
+mov   %esi,3520(%rdi)
+
+# qhasm: mem64[ap + 236] = temp2
+# asm 1: mov   <temp2=int64#3,236(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,236(<ap=%rdi)
+mov   %edx,236(%rdi)
+
+# qhasm: temp1 = mem64[ap + 240]
+# asm 1: mov   240(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   240(<ap=%rdi),>temp1=%esi
+mov   240(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 960]
+# asm 1: mov   960(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   960(<ap=%rdi),>temp2=%edx
+mov   960(%rdi),%edx
+
+# qhasm: mem64[ap + 960] = temp1
+# asm 1: mov   <temp1=int64#2,960(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,960(<ap=%rdi)
+mov   %esi,960(%rdi)
+
+# qhasm: mem64[ap + 240] = temp2
+# asm 1: mov   <temp2=int64#3,240(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,240(<ap=%rdi)
+mov   %edx,240(%rdi)
+
+# qhasm: temp1 = mem64[ap + 244]
+# asm 1: mov   244(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   244(<ap=%rdi),>temp1=%esi
+mov   244(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3008]
+# asm 1: mov   3008(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3008(<ap=%rdi),>temp2=%edx
+mov   3008(%rdi),%edx
+
+# qhasm: mem64[ap + 3008] = temp1
+# asm 1: mov   <temp1=int64#2,3008(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3008(<ap=%rdi)
+mov   %esi,3008(%rdi)
+
+# qhasm: mem64[ap + 244] = temp2
+# asm 1: mov   <temp2=int64#3,244(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,244(<ap=%rdi)
+mov   %edx,244(%rdi)
+
+# qhasm: temp1 = mem64[ap + 248]
+# asm 1: mov   248(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   248(<ap=%rdi),>temp1=%esi
+mov   248(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1984]
+# asm 1: mov   1984(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1984(<ap=%rdi),>temp2=%edx
+mov   1984(%rdi),%edx
+
+# qhasm: mem64[ap + 1984] = temp1
+# asm 1: mov   <temp1=int64#2,1984(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1984(<ap=%rdi)
+mov   %esi,1984(%rdi)
+
+# qhasm: mem64[ap + 248] = temp2
+# asm 1: mov   <temp2=int64#3,248(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,248(<ap=%rdi)
+mov   %edx,248(%rdi)
+
+# qhasm: temp1 = mem64[ap + 252]
+# asm 1: mov   252(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   252(<ap=%rdi),>temp1=%esi
+mov   252(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4032]
+# asm 1: mov   4032(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4032(<ap=%rdi),>temp2=%edx
+mov   4032(%rdi),%edx
+
+# qhasm: mem64[ap + 4032] = temp1
+# asm 1: mov   <temp1=int64#2,4032(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4032(<ap=%rdi)
+mov   %esi,4032(%rdi)
+
+# qhasm: mem64[ap + 252] = temp2
+# asm 1: mov   <temp2=int64#3,252(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,252(<ap=%rdi)
+mov   %edx,252(%rdi)
+
+# qhasm: temp1 = mem64[ap + 260]
+# asm 1: mov   260(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   260(<ap=%rdi),>temp1=%esi
+mov   260(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2080]
+# asm 1: mov   2080(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2080(<ap=%rdi),>temp2=%edx
+mov   2080(%rdi),%edx
+
+# qhasm: mem64[ap + 2080] = temp1
+# asm 1: mov   <temp1=int64#2,2080(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2080(<ap=%rdi)
+mov   %esi,2080(%rdi)
+
+# qhasm: mem64[ap + 260] = temp2
+# asm 1: mov   <temp2=int64#3,260(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,260(<ap=%rdi)
+mov   %edx,260(%rdi)
+
+# qhasm: temp1 = mem64[ap + 264]
+# asm 1: mov   264(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   264(<ap=%rdi),>temp1=%esi
+mov   264(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1056]
+# asm 1: mov   1056(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1056(<ap=%rdi),>temp2=%edx
+mov   1056(%rdi),%edx
+
+# qhasm: mem64[ap + 1056] = temp1
+# asm 1: mov   <temp1=int64#2,1056(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1056(<ap=%rdi)
+mov   %esi,1056(%rdi)
+
+# qhasm: mem64[ap + 264] = temp2
+# asm 1: mov   <temp2=int64#3,264(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,264(<ap=%rdi)
+mov   %edx,264(%rdi)
+
+# qhasm: temp1 = mem64[ap + 268]
+# asm 1: mov   268(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   268(<ap=%rdi),>temp1=%esi
+mov   268(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3104]
+# asm 1: mov   3104(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3104(<ap=%rdi),>temp2=%edx
+mov   3104(%rdi),%edx
+
+# qhasm: mem64[ap + 3104] = temp1
+# asm 1: mov   <temp1=int64#2,3104(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3104(<ap=%rdi)
+mov   %esi,3104(%rdi)
+
+# qhasm: mem64[ap + 268] = temp2
+# asm 1: mov   <temp2=int64#3,268(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,268(<ap=%rdi)
+mov   %edx,268(%rdi)
+
+# qhasm: temp1 = mem64[ap + 272]
+# asm 1: mov   272(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   272(<ap=%rdi),>temp1=%esi
+mov   272(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 544]
+# asm 1: mov   544(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   544(<ap=%rdi),>temp2=%edx
+mov   544(%rdi),%edx
+
+# qhasm: mem64[ap + 544] = temp1
+# asm 1: mov   <temp1=int64#2,544(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,544(<ap=%rdi)
+mov   %esi,544(%rdi)
+
+# qhasm: mem64[ap + 272] = temp2
+# asm 1: mov   <temp2=int64#3,272(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,272(<ap=%rdi)
+mov   %edx,272(%rdi)
+
+# qhasm: temp1 = mem64[ap + 276]
+# asm 1: mov   276(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   276(<ap=%rdi),>temp1=%esi
+mov   276(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2592]
+# asm 1: mov   2592(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2592(<ap=%rdi),>temp2=%edx
+mov   2592(%rdi),%edx
+
+# qhasm: mem64[ap + 2592] = temp1
+# asm 1: mov   <temp1=int64#2,2592(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2592(<ap=%rdi)
+mov   %esi,2592(%rdi)
+
+# qhasm: mem64[ap + 276] = temp2
+# asm 1: mov   <temp2=int64#3,276(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,276(<ap=%rdi)
+mov   %edx,276(%rdi)
+
+# qhasm: temp1 = mem64[ap + 280]
+# asm 1: mov   280(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   280(<ap=%rdi),>temp1=%esi
+mov   280(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1568]
+# asm 1: mov   1568(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1568(<ap=%rdi),>temp2=%edx
+mov   1568(%rdi),%edx
+
+# qhasm: mem64[ap + 1568] = temp1
+# asm 1: mov   <temp1=int64#2,1568(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1568(<ap=%rdi)
+mov   %esi,1568(%rdi)
+
+# qhasm: mem64[ap + 280] = temp2
+# asm 1: mov   <temp2=int64#3,280(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,280(<ap=%rdi)
+mov   %edx,280(%rdi)
+
+# qhasm: temp1 = mem64[ap + 284]
+# asm 1: mov   284(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   284(<ap=%rdi),>temp1=%esi
+mov   284(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3616]
+# asm 1: mov   3616(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3616(<ap=%rdi),>temp2=%edx
+mov   3616(%rdi),%edx
+
+# qhasm: mem64[ap + 3616] = temp1
+# asm 1: mov   <temp1=int64#2,3616(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3616(<ap=%rdi)
+mov   %esi,3616(%rdi)
+
+# qhasm: mem64[ap + 284] = temp2
+# asm 1: mov   <temp2=int64#3,284(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,284(<ap=%rdi)
+mov   %edx,284(%rdi)
+
+# qhasm: temp1 = mem64[ap + 292]
+# asm 1: mov   292(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   292(<ap=%rdi),>temp1=%esi
+mov   292(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2336]
+# asm 1: mov   2336(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2336(<ap=%rdi),>temp2=%edx
+mov   2336(%rdi),%edx
+
+# qhasm: mem64[ap + 2336] = temp1
+# asm 1: mov   <temp1=int64#2,2336(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2336(<ap=%rdi)
+mov   %esi,2336(%rdi)
+
+# qhasm: mem64[ap + 292] = temp2
+# asm 1: mov   <temp2=int64#3,292(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,292(<ap=%rdi)
+mov   %edx,292(%rdi)
+
+# qhasm: temp1 = mem64[ap + 296]
+# asm 1: mov   296(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   296(<ap=%rdi),>temp1=%esi
+mov   296(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1312]
+# asm 1: mov   1312(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1312(<ap=%rdi),>temp2=%edx
+mov   1312(%rdi),%edx
+
+# qhasm: mem64[ap + 1312] = temp1
+# asm 1: mov   <temp1=int64#2,1312(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1312(<ap=%rdi)
+mov   %esi,1312(%rdi)
+
+# qhasm: mem64[ap + 296] = temp2
+# asm 1: mov   <temp2=int64#3,296(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,296(<ap=%rdi)
+mov   %edx,296(%rdi)
+
+# qhasm: temp1 = mem64[ap + 300]
+# asm 1: mov   300(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   300(<ap=%rdi),>temp1=%esi
+mov   300(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3360]
+# asm 1: mov   3360(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3360(<ap=%rdi),>temp2=%edx
+mov   3360(%rdi),%edx
+
+# qhasm: mem64[ap + 3360] = temp1
+# asm 1: mov   <temp1=int64#2,3360(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3360(<ap=%rdi)
+mov   %esi,3360(%rdi)
+
+# qhasm: mem64[ap + 300] = temp2
+# asm 1: mov   <temp2=int64#3,300(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,300(<ap=%rdi)
+mov   %edx,300(%rdi)
+
+# qhasm: temp1 = mem64[ap + 304]
+# asm 1: mov   304(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   304(<ap=%rdi),>temp1=%esi
+mov   304(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 800]
+# asm 1: mov   800(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   800(<ap=%rdi),>temp2=%edx
+mov   800(%rdi),%edx
+
+# qhasm: mem64[ap + 800] = temp1
+# asm 1: mov   <temp1=int64#2,800(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,800(<ap=%rdi)
+mov   %esi,800(%rdi)
+
+# qhasm: mem64[ap + 304] = temp2
+# asm 1: mov   <temp2=int64#3,304(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,304(<ap=%rdi)
+mov   %edx,304(%rdi)
+
+# qhasm: temp1 = mem64[ap + 308]
+# asm 1: mov   308(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   308(<ap=%rdi),>temp1=%esi
+mov   308(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2848]
+# asm 1: mov   2848(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2848(<ap=%rdi),>temp2=%edx
+mov   2848(%rdi),%edx
+
+# qhasm: mem64[ap + 2848] = temp1
+# asm 1: mov   <temp1=int64#2,2848(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2848(<ap=%rdi)
+mov   %esi,2848(%rdi)
+
+# qhasm: mem64[ap + 308] = temp2
+# asm 1: mov   <temp2=int64#3,308(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,308(<ap=%rdi)
+mov   %edx,308(%rdi)
+
+# qhasm: temp1 = mem64[ap + 312]
+# asm 1: mov   312(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   312(<ap=%rdi),>temp1=%esi
+mov   312(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1824]
+# asm 1: mov   1824(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1824(<ap=%rdi),>temp2=%edx
+mov   1824(%rdi),%edx
+
+# qhasm: mem64[ap + 1824] = temp1
+# asm 1: mov   <temp1=int64#2,1824(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1824(<ap=%rdi)
+mov   %esi,1824(%rdi)
+
+# qhasm: mem64[ap + 312] = temp2
+# asm 1: mov   <temp2=int64#3,312(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,312(<ap=%rdi)
+mov   %edx,312(%rdi)
+
+# qhasm: temp1 = mem64[ap + 316]
+# asm 1: mov   316(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   316(<ap=%rdi),>temp1=%esi
+mov   316(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3872]
+# asm 1: mov   3872(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3872(<ap=%rdi),>temp2=%edx
+mov   3872(%rdi),%edx
+
+# qhasm: mem64[ap + 3872] = temp1
+# asm 1: mov   <temp1=int64#2,3872(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3872(<ap=%rdi)
+mov   %esi,3872(%rdi)
+
+# qhasm: mem64[ap + 316] = temp2
+# asm 1: mov   <temp2=int64#3,316(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,316(<ap=%rdi)
+mov   %edx,316(%rdi)
+
+# qhasm: temp1 = mem64[ap + 324]
+# asm 1: mov   324(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   324(<ap=%rdi),>temp1=%esi
+mov   324(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2208]
+# asm 1: mov   2208(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2208(<ap=%rdi),>temp2=%edx
+mov   2208(%rdi),%edx
+
+# qhasm: mem64[ap + 2208] = temp1
+# asm 1: mov   <temp1=int64#2,2208(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2208(<ap=%rdi)
+mov   %esi,2208(%rdi)
+
+# qhasm: mem64[ap + 324] = temp2
+# asm 1: mov   <temp2=int64#3,324(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,324(<ap=%rdi)
+mov   %edx,324(%rdi)
+
+# qhasm: temp1 = mem64[ap + 328]
+# asm 1: mov   328(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   328(<ap=%rdi),>temp1=%esi
+mov   328(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1184]
+# asm 1: mov   1184(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1184(<ap=%rdi),>temp2=%edx
+mov   1184(%rdi),%edx
+
+# qhasm: mem64[ap + 1184] = temp1
+# asm 1: mov   <temp1=int64#2,1184(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1184(<ap=%rdi)
+mov   %esi,1184(%rdi)
+
+# qhasm: mem64[ap + 328] = temp2
+# asm 1: mov   <temp2=int64#3,328(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,328(<ap=%rdi)
+mov   %edx,328(%rdi)
+
+# qhasm: temp1 = mem64[ap + 332]
+# asm 1: mov   332(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   332(<ap=%rdi),>temp1=%esi
+mov   332(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3232]
+# asm 1: mov   3232(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3232(<ap=%rdi),>temp2=%edx
+mov   3232(%rdi),%edx
+
+# qhasm: mem64[ap + 3232] = temp1
+# asm 1: mov   <temp1=int64#2,3232(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3232(<ap=%rdi)
+mov   %esi,3232(%rdi)
+
+# qhasm: mem64[ap + 332] = temp2
+# asm 1: mov   <temp2=int64#3,332(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,332(<ap=%rdi)
+mov   %edx,332(%rdi)
+
+# qhasm: temp1 = mem64[ap + 336]
+# asm 1: mov   336(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   336(<ap=%rdi),>temp1=%esi
+mov   336(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 672]
+# asm 1: mov   672(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   672(<ap=%rdi),>temp2=%edx
+mov   672(%rdi),%edx
+
+# qhasm: mem64[ap + 672] = temp1
+# asm 1: mov   <temp1=int64#2,672(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,672(<ap=%rdi)
+mov   %esi,672(%rdi)
+
+# qhasm: mem64[ap + 336] = temp2
+# asm 1: mov   <temp2=int64#3,336(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,336(<ap=%rdi)
+mov   %edx,336(%rdi)
+
+# qhasm: temp1 = mem64[ap + 340]
+# asm 1: mov   340(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   340(<ap=%rdi),>temp1=%esi
+mov   340(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2720]
+# asm 1: mov   2720(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2720(<ap=%rdi),>temp2=%edx
+mov   2720(%rdi),%edx
+
+# qhasm: mem64[ap + 2720] = temp1
+# asm 1: mov   <temp1=int64#2,2720(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2720(<ap=%rdi)
+mov   %esi,2720(%rdi)
+
+# qhasm: mem64[ap + 340] = temp2
+# asm 1: mov   <temp2=int64#3,340(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,340(<ap=%rdi)
+mov   %edx,340(%rdi)
+
+# qhasm: temp1 = mem64[ap + 344]
+# asm 1: mov   344(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   344(<ap=%rdi),>temp1=%esi
+mov   344(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1696]
+# asm 1: mov   1696(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1696(<ap=%rdi),>temp2=%edx
+mov   1696(%rdi),%edx
+
+# qhasm: mem64[ap + 1696] = temp1
+# asm 1: mov   <temp1=int64#2,1696(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1696(<ap=%rdi)
+mov   %esi,1696(%rdi)
+
+# qhasm: mem64[ap + 344] = temp2
+# asm 1: mov   <temp2=int64#3,344(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,344(<ap=%rdi)
+mov   %edx,344(%rdi)
+
+# qhasm: temp1 = mem64[ap + 348]
+# asm 1: mov   348(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   348(<ap=%rdi),>temp1=%esi
+mov   348(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3744]
+# asm 1: mov   3744(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3744(<ap=%rdi),>temp2=%edx
+mov   3744(%rdi),%edx
+
+# qhasm: mem64[ap + 3744] = temp1
+# asm 1: mov   <temp1=int64#2,3744(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3744(<ap=%rdi)
+mov   %esi,3744(%rdi)
+
+# qhasm: mem64[ap + 348] = temp2
+# asm 1: mov   <temp2=int64#3,348(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,348(<ap=%rdi)
+mov   %edx,348(%rdi)
+
+# qhasm: temp1 = mem64[ap + 352]
+# asm 1: mov   352(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   352(<ap=%rdi),>temp1=%esi
+mov   352(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 416]
+# asm 1: mov   416(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   416(<ap=%rdi),>temp2=%edx
+mov   416(%rdi),%edx
+
+# qhasm: mem64[ap + 416] = temp1
+# asm 1: mov   <temp1=int64#2,416(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,416(<ap=%rdi)
+mov   %esi,416(%rdi)
+
+# qhasm: mem64[ap + 352] = temp2
+# asm 1: mov   <temp2=int64#3,352(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,352(<ap=%rdi)
+mov   %edx,352(%rdi)
+
+# qhasm: temp1 = mem64[ap + 356]
+# asm 1: mov   356(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   356(<ap=%rdi),>temp1=%esi
+mov   356(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2464]
+# asm 1: mov   2464(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2464(<ap=%rdi),>temp2=%edx
+mov   2464(%rdi),%edx
+
+# qhasm: mem64[ap + 2464] = temp1
+# asm 1: mov   <temp1=int64#2,2464(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2464(<ap=%rdi)
+mov   %esi,2464(%rdi)
+
+# qhasm: mem64[ap + 356] = temp2
+# asm 1: mov   <temp2=int64#3,356(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,356(<ap=%rdi)
+mov   %edx,356(%rdi)
+
+# qhasm: temp1 = mem64[ap + 360]
+# asm 1: mov   360(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   360(<ap=%rdi),>temp1=%esi
+mov   360(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1440]
+# asm 1: mov   1440(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1440(<ap=%rdi),>temp2=%edx
+mov   1440(%rdi),%edx
+
+# qhasm: mem64[ap + 1440] = temp1
+# asm 1: mov   <temp1=int64#2,1440(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1440(<ap=%rdi)
+mov   %esi,1440(%rdi)
+
+# qhasm: mem64[ap + 360] = temp2
+# asm 1: mov   <temp2=int64#3,360(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,360(<ap=%rdi)
+mov   %edx,360(%rdi)
+
+# qhasm: temp1 = mem64[ap + 364]
+# asm 1: mov   364(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   364(<ap=%rdi),>temp1=%esi
+mov   364(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3488]
+# asm 1: mov   3488(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3488(<ap=%rdi),>temp2=%edx
+mov   3488(%rdi),%edx
+
+# qhasm: mem64[ap + 3488] = temp1
+# asm 1: mov   <temp1=int64#2,3488(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3488(<ap=%rdi)
+mov   %esi,3488(%rdi)
+
+# qhasm: mem64[ap + 364] = temp2
+# asm 1: mov   <temp2=int64#3,364(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,364(<ap=%rdi)
+mov   %edx,364(%rdi)
+
+# qhasm: temp1 = mem64[ap + 368]
+# asm 1: mov   368(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   368(<ap=%rdi),>temp1=%esi
+mov   368(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 928]
+# asm 1: mov   928(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   928(<ap=%rdi),>temp2=%edx
+mov   928(%rdi),%edx
+
+# qhasm: mem64[ap + 928] = temp1
+# asm 1: mov   <temp1=int64#2,928(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,928(<ap=%rdi)
+mov   %esi,928(%rdi)
+
+# qhasm: mem64[ap + 368] = temp2
+# asm 1: mov   <temp2=int64#3,368(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,368(<ap=%rdi)
+mov   %edx,368(%rdi)
+
+# qhasm: temp1 = mem64[ap + 372]
+# asm 1: mov   372(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   372(<ap=%rdi),>temp1=%esi
+mov   372(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2976]
+# asm 1: mov   2976(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2976(<ap=%rdi),>temp2=%edx
+mov   2976(%rdi),%edx
+
+# qhasm: mem64[ap + 2976] = temp1
+# asm 1: mov   <temp1=int64#2,2976(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2976(<ap=%rdi)
+mov   %esi,2976(%rdi)
+
+# qhasm: mem64[ap + 372] = temp2
+# asm 1: mov   <temp2=int64#3,372(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,372(<ap=%rdi)
+mov   %edx,372(%rdi)
+
+# qhasm: temp1 = mem64[ap + 376]
+# asm 1: mov   376(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   376(<ap=%rdi),>temp1=%esi
+mov   376(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1952]
+# asm 1: mov   1952(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1952(<ap=%rdi),>temp2=%edx
+mov   1952(%rdi),%edx
+
+# qhasm: mem64[ap + 1952] = temp1
+# asm 1: mov   <temp1=int64#2,1952(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1952(<ap=%rdi)
+mov   %esi,1952(%rdi)
+
+# qhasm: mem64[ap + 376] = temp2
+# asm 1: mov   <temp2=int64#3,376(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,376(<ap=%rdi)
+mov   %edx,376(%rdi)
+
+# qhasm: temp1 = mem64[ap + 380]
+# asm 1: mov   380(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   380(<ap=%rdi),>temp1=%esi
+mov   380(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4000]
+# asm 1: mov   4000(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4000(<ap=%rdi),>temp2=%edx
+mov   4000(%rdi),%edx
+
+# qhasm: mem64[ap + 4000] = temp1
+# asm 1: mov   <temp1=int64#2,4000(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4000(<ap=%rdi)
+mov   %esi,4000(%rdi)
+
+# qhasm: mem64[ap + 380] = temp2
+# asm 1: mov   <temp2=int64#3,380(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,380(<ap=%rdi)
+mov   %edx,380(%rdi)
+
+# qhasm: temp1 = mem64[ap + 388]
+# asm 1: mov   388(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   388(<ap=%rdi),>temp1=%esi
+mov   388(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2144]
+# asm 1: mov   2144(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2144(<ap=%rdi),>temp2=%edx
+mov   2144(%rdi),%edx
+
+# qhasm: mem64[ap + 2144] = temp1
+# asm 1: mov   <temp1=int64#2,2144(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2144(<ap=%rdi)
+mov   %esi,2144(%rdi)
+
+# qhasm: mem64[ap + 388] = temp2
+# asm 1: mov   <temp2=int64#3,388(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,388(<ap=%rdi)
+mov   %edx,388(%rdi)
+
+# qhasm: temp1 = mem64[ap + 392]
+# asm 1: mov   392(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   392(<ap=%rdi),>temp1=%esi
+mov   392(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1120]
+# asm 1: mov   1120(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1120(<ap=%rdi),>temp2=%edx
+mov   1120(%rdi),%edx
+
+# qhasm: mem64[ap + 1120] = temp1
+# asm 1: mov   <temp1=int64#2,1120(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1120(<ap=%rdi)
+mov   %esi,1120(%rdi)
+
+# qhasm: mem64[ap + 392] = temp2
+# asm 1: mov   <temp2=int64#3,392(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,392(<ap=%rdi)
+mov   %edx,392(%rdi)
+
+# qhasm: temp1 = mem64[ap + 396]
+# asm 1: mov   396(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   396(<ap=%rdi),>temp1=%esi
+mov   396(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3168]
+# asm 1: mov   3168(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3168(<ap=%rdi),>temp2=%edx
+mov   3168(%rdi),%edx
+
+# qhasm: mem64[ap + 3168] = temp1
+# asm 1: mov   <temp1=int64#2,3168(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3168(<ap=%rdi)
+mov   %esi,3168(%rdi)
+
+# qhasm: mem64[ap + 396] = temp2
+# asm 1: mov   <temp2=int64#3,396(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,396(<ap=%rdi)
+mov   %edx,396(%rdi)
+
+# qhasm: temp1 = mem64[ap + 400]
+# asm 1: mov   400(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   400(<ap=%rdi),>temp1=%esi
+mov   400(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 608]
+# asm 1: mov   608(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   608(<ap=%rdi),>temp2=%edx
+mov   608(%rdi),%edx
+
+# qhasm: mem64[ap + 608] = temp1
+# asm 1: mov   <temp1=int64#2,608(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,608(<ap=%rdi)
+mov   %esi,608(%rdi)
+
+# qhasm: mem64[ap + 400] = temp2
+# asm 1: mov   <temp2=int64#3,400(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,400(<ap=%rdi)
+mov   %edx,400(%rdi)
+
+# qhasm: temp1 = mem64[ap + 404]
+# asm 1: mov   404(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   404(<ap=%rdi),>temp1=%esi
+mov   404(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2656]
+# asm 1: mov   2656(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2656(<ap=%rdi),>temp2=%edx
+mov   2656(%rdi),%edx
+
+# qhasm: mem64[ap + 2656] = temp1
+# asm 1: mov   <temp1=int64#2,2656(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2656(<ap=%rdi)
+mov   %esi,2656(%rdi)
+
+# qhasm: mem64[ap + 404] = temp2
+# asm 1: mov   <temp2=int64#3,404(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,404(<ap=%rdi)
+mov   %edx,404(%rdi)
+
+# qhasm: temp1 = mem64[ap + 408]
+# asm 1: mov   408(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   408(<ap=%rdi),>temp1=%esi
+mov   408(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1632]
+# asm 1: mov   1632(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1632(<ap=%rdi),>temp2=%edx
+mov   1632(%rdi),%edx
+
+# qhasm: mem64[ap + 1632] = temp1
+# asm 1: mov   <temp1=int64#2,1632(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1632(<ap=%rdi)
+mov   %esi,1632(%rdi)
+
+# qhasm: mem64[ap + 408] = temp2
+# asm 1: mov   <temp2=int64#3,408(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,408(<ap=%rdi)
+mov   %edx,408(%rdi)
+
+# qhasm: temp1 = mem64[ap + 412]
+# asm 1: mov   412(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   412(<ap=%rdi),>temp1=%esi
+mov   412(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3680]
+# asm 1: mov   3680(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3680(<ap=%rdi),>temp2=%edx
+mov   3680(%rdi),%edx
+
+# qhasm: mem64[ap + 3680] = temp1
+# asm 1: mov   <temp1=int64#2,3680(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3680(<ap=%rdi)
+mov   %esi,3680(%rdi)
+
+# qhasm: mem64[ap + 412] = temp2
+# asm 1: mov   <temp2=int64#3,412(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,412(<ap=%rdi)
+mov   %edx,412(%rdi)
+
+# qhasm: temp1 = mem64[ap + 420]
+# asm 1: mov   420(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   420(<ap=%rdi),>temp1=%esi
+mov   420(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2400]
+# asm 1: mov   2400(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2400(<ap=%rdi),>temp2=%edx
+mov   2400(%rdi),%edx
+
+# qhasm: mem64[ap + 2400] = temp1
+# asm 1: mov   <temp1=int64#2,2400(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2400(<ap=%rdi)
+mov   %esi,2400(%rdi)
+
+# qhasm: mem64[ap + 420] = temp2
+# asm 1: mov   <temp2=int64#3,420(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,420(<ap=%rdi)
+mov   %edx,420(%rdi)
+
+# qhasm: temp1 = mem64[ap + 424]
+# asm 1: mov   424(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   424(<ap=%rdi),>temp1=%esi
+mov   424(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1376]
+# asm 1: mov   1376(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1376(<ap=%rdi),>temp2=%edx
+mov   1376(%rdi),%edx
+
+# qhasm: mem64[ap + 1376] = temp1
+# asm 1: mov   <temp1=int64#2,1376(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1376(<ap=%rdi)
+mov   %esi,1376(%rdi)
+
+# qhasm: mem64[ap + 424] = temp2
+# asm 1: mov   <temp2=int64#3,424(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,424(<ap=%rdi)
+mov   %edx,424(%rdi)
+
+# qhasm: temp1 = mem64[ap + 428]
+# asm 1: mov   428(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   428(<ap=%rdi),>temp1=%esi
+mov   428(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3424]
+# asm 1: mov   3424(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3424(<ap=%rdi),>temp2=%edx
+mov   3424(%rdi),%edx
+
+# qhasm: mem64[ap + 3424] = temp1
+# asm 1: mov   <temp1=int64#2,3424(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3424(<ap=%rdi)
+mov   %esi,3424(%rdi)
+
+# qhasm: mem64[ap + 428] = temp2
+# asm 1: mov   <temp2=int64#3,428(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,428(<ap=%rdi)
+mov   %edx,428(%rdi)
+
+# qhasm: temp1 = mem64[ap + 432]
+# asm 1: mov   432(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   432(<ap=%rdi),>temp1=%esi
+mov   432(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 864]
+# asm 1: mov   864(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   864(<ap=%rdi),>temp2=%edx
+mov   864(%rdi),%edx
+
+# qhasm: mem64[ap + 864] = temp1
+# asm 1: mov   <temp1=int64#2,864(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,864(<ap=%rdi)
+mov   %esi,864(%rdi)
+
+# qhasm: mem64[ap + 432] = temp2
+# asm 1: mov   <temp2=int64#3,432(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,432(<ap=%rdi)
+mov   %edx,432(%rdi)
+
+# qhasm: temp1 = mem64[ap + 436]
+# asm 1: mov   436(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   436(<ap=%rdi),>temp1=%esi
+mov   436(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2912]
+# asm 1: mov   2912(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2912(<ap=%rdi),>temp2=%edx
+mov   2912(%rdi),%edx
+
+# qhasm: mem64[ap + 2912] = temp1
+# asm 1: mov   <temp1=int64#2,2912(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2912(<ap=%rdi)
+mov   %esi,2912(%rdi)
+
+# qhasm: mem64[ap + 436] = temp2
+# asm 1: mov   <temp2=int64#3,436(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,436(<ap=%rdi)
+mov   %edx,436(%rdi)
+
+# qhasm: temp1 = mem64[ap + 440]
+# asm 1: mov   440(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   440(<ap=%rdi),>temp1=%esi
+mov   440(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1888]
+# asm 1: mov   1888(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1888(<ap=%rdi),>temp2=%edx
+mov   1888(%rdi),%edx
+
+# qhasm: mem64[ap + 1888] = temp1
+# asm 1: mov   <temp1=int64#2,1888(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1888(<ap=%rdi)
+mov   %esi,1888(%rdi)
+
+# qhasm: mem64[ap + 440] = temp2
+# asm 1: mov   <temp2=int64#3,440(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,440(<ap=%rdi)
+mov   %edx,440(%rdi)
+
+# qhasm: temp1 = mem64[ap + 444]
+# asm 1: mov   444(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   444(<ap=%rdi),>temp1=%esi
+mov   444(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3936]
+# asm 1: mov   3936(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3936(<ap=%rdi),>temp2=%edx
+mov   3936(%rdi),%edx
+
+# qhasm: mem64[ap + 3936] = temp1
+# asm 1: mov   <temp1=int64#2,3936(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3936(<ap=%rdi)
+mov   %esi,3936(%rdi)
+
+# qhasm: mem64[ap + 444] = temp2
+# asm 1: mov   <temp2=int64#3,444(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,444(<ap=%rdi)
+mov   %edx,444(%rdi)
+
+# qhasm: temp1 = mem64[ap + 452]
+# asm 1: mov   452(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   452(<ap=%rdi),>temp1=%esi
+mov   452(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2272]
+# asm 1: mov   2272(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2272(<ap=%rdi),>temp2=%edx
+mov   2272(%rdi),%edx
+
+# qhasm: mem64[ap + 2272] = temp1
+# asm 1: mov   <temp1=int64#2,2272(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2272(<ap=%rdi)
+mov   %esi,2272(%rdi)
+
+# qhasm: mem64[ap + 452] = temp2
+# asm 1: mov   <temp2=int64#3,452(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,452(<ap=%rdi)
+mov   %edx,452(%rdi)
+
+# qhasm: temp1 = mem64[ap + 456]
+# asm 1: mov   456(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   456(<ap=%rdi),>temp1=%esi
+mov   456(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1248]
+# asm 1: mov   1248(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1248(<ap=%rdi),>temp2=%edx
+mov   1248(%rdi),%edx
+
+# qhasm: mem64[ap + 1248] = temp1
+# asm 1: mov   <temp1=int64#2,1248(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1248(<ap=%rdi)
+mov   %esi,1248(%rdi)
+
+# qhasm: mem64[ap + 456] = temp2
+# asm 1: mov   <temp2=int64#3,456(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,456(<ap=%rdi)
+mov   %edx,456(%rdi)
+
+# qhasm: temp1 = mem64[ap + 460]
+# asm 1: mov   460(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   460(<ap=%rdi),>temp1=%esi
+mov   460(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3296]
+# asm 1: mov   3296(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3296(<ap=%rdi),>temp2=%edx
+mov   3296(%rdi),%edx
+
+# qhasm: mem64[ap + 3296] = temp1
+# asm 1: mov   <temp1=int64#2,3296(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3296(<ap=%rdi)
+mov   %esi,3296(%rdi)
+
+# qhasm: mem64[ap + 460] = temp2
+# asm 1: mov   <temp2=int64#3,460(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,460(<ap=%rdi)
+mov   %edx,460(%rdi)
+
+# qhasm: temp1 = mem64[ap + 464]
+# asm 1: mov   464(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   464(<ap=%rdi),>temp1=%esi
+mov   464(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 736]
+# asm 1: mov   736(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   736(<ap=%rdi),>temp2=%edx
+mov   736(%rdi),%edx
+
+# qhasm: mem64[ap + 736] = temp1
+# asm 1: mov   <temp1=int64#2,736(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,736(<ap=%rdi)
+mov   %esi,736(%rdi)
+
+# qhasm: mem64[ap + 464] = temp2
+# asm 1: mov   <temp2=int64#3,464(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,464(<ap=%rdi)
+mov   %edx,464(%rdi)
+
+# qhasm: temp1 = mem64[ap + 468]
+# asm 1: mov   468(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   468(<ap=%rdi),>temp1=%esi
+mov   468(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2784]
+# asm 1: mov   2784(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2784(<ap=%rdi),>temp2=%edx
+mov   2784(%rdi),%edx
+
+# qhasm: mem64[ap + 2784] = temp1
+# asm 1: mov   <temp1=int64#2,2784(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2784(<ap=%rdi)
+mov   %esi,2784(%rdi)
+
+# qhasm: mem64[ap + 468] = temp2
+# asm 1: mov   <temp2=int64#3,468(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,468(<ap=%rdi)
+mov   %edx,468(%rdi)
+
+# qhasm: temp1 = mem64[ap + 472]
+# asm 1: mov   472(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   472(<ap=%rdi),>temp1=%esi
+mov   472(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1760]
+# asm 1: mov   1760(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1760(<ap=%rdi),>temp2=%edx
+mov   1760(%rdi),%edx
+
+# qhasm: mem64[ap + 1760] = temp1
+# asm 1: mov   <temp1=int64#2,1760(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1760(<ap=%rdi)
+mov   %esi,1760(%rdi)
+
+# qhasm: mem64[ap + 472] = temp2
+# asm 1: mov   <temp2=int64#3,472(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,472(<ap=%rdi)
+mov   %edx,472(%rdi)
+
+# qhasm: temp1 = mem64[ap + 476]
+# asm 1: mov   476(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   476(<ap=%rdi),>temp1=%esi
+mov   476(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3808]
+# asm 1: mov   3808(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3808(<ap=%rdi),>temp2=%edx
+mov   3808(%rdi),%edx
+
+# qhasm: mem64[ap + 3808] = temp1
+# asm 1: mov   <temp1=int64#2,3808(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3808(<ap=%rdi)
+mov   %esi,3808(%rdi)
+
+# qhasm: mem64[ap + 476] = temp2
+# asm 1: mov   <temp2=int64#3,476(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,476(<ap=%rdi)
+mov   %edx,476(%rdi)
+
+# qhasm: temp1 = mem64[ap + 484]
+# asm 1: mov   484(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   484(<ap=%rdi),>temp1=%esi
+mov   484(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2528]
+# asm 1: mov   2528(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2528(<ap=%rdi),>temp2=%edx
+mov   2528(%rdi),%edx
+
+# qhasm: mem64[ap + 2528] = temp1
+# asm 1: mov   <temp1=int64#2,2528(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2528(<ap=%rdi)
+mov   %esi,2528(%rdi)
+
+# qhasm: mem64[ap + 484] = temp2
+# asm 1: mov   <temp2=int64#3,484(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,484(<ap=%rdi)
+mov   %edx,484(%rdi)
+
+# qhasm: temp1 = mem64[ap + 488]
+# asm 1: mov   488(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   488(<ap=%rdi),>temp1=%esi
+mov   488(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1504]
+# asm 1: mov   1504(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1504(<ap=%rdi),>temp2=%edx
+mov   1504(%rdi),%edx
+
+# qhasm: mem64[ap + 1504] = temp1
+# asm 1: mov   <temp1=int64#2,1504(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1504(<ap=%rdi)
+mov   %esi,1504(%rdi)
+
+# qhasm: mem64[ap + 488] = temp2
+# asm 1: mov   <temp2=int64#3,488(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,488(<ap=%rdi)
+mov   %edx,488(%rdi)
+
+# qhasm: temp1 = mem64[ap + 492]
+# asm 1: mov   492(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   492(<ap=%rdi),>temp1=%esi
+mov   492(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3552]
+# asm 1: mov   3552(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3552(<ap=%rdi),>temp2=%edx
+mov   3552(%rdi),%edx
+
+# qhasm: mem64[ap + 3552] = temp1
+# asm 1: mov   <temp1=int64#2,3552(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3552(<ap=%rdi)
+mov   %esi,3552(%rdi)
+
+# qhasm: mem64[ap + 492] = temp2
+# asm 1: mov   <temp2=int64#3,492(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,492(<ap=%rdi)
+mov   %edx,492(%rdi)
+
+# qhasm: temp1 = mem64[ap + 496]
+# asm 1: mov   496(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   496(<ap=%rdi),>temp1=%esi
+mov   496(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 992]
+# asm 1: mov   992(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   992(<ap=%rdi),>temp2=%edx
+mov   992(%rdi),%edx
+
+# qhasm: mem64[ap + 992] = temp1
+# asm 1: mov   <temp1=int64#2,992(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,992(<ap=%rdi)
+mov   %esi,992(%rdi)
+
+# qhasm: mem64[ap + 496] = temp2
+# asm 1: mov   <temp2=int64#3,496(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,496(<ap=%rdi)
+mov   %edx,496(%rdi)
+
+# qhasm: temp1 = mem64[ap + 500]
+# asm 1: mov   500(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   500(<ap=%rdi),>temp1=%esi
+mov   500(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3040]
+# asm 1: mov   3040(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3040(<ap=%rdi),>temp2=%edx
+mov   3040(%rdi),%edx
+
+# qhasm: mem64[ap + 3040] = temp1
+# asm 1: mov   <temp1=int64#2,3040(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3040(<ap=%rdi)
+mov   %esi,3040(%rdi)
+
+# qhasm: mem64[ap + 500] = temp2
+# asm 1: mov   <temp2=int64#3,500(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,500(<ap=%rdi)
+mov   %edx,500(%rdi)
+
+# qhasm: temp1 = mem64[ap + 504]
+# asm 1: mov   504(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   504(<ap=%rdi),>temp1=%esi
+mov   504(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2016]
+# asm 1: mov   2016(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2016(<ap=%rdi),>temp2=%edx
+mov   2016(%rdi),%edx
+
+# qhasm: mem64[ap + 2016] = temp1
+# asm 1: mov   <temp1=int64#2,2016(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2016(<ap=%rdi)
+mov   %esi,2016(%rdi)
+
+# qhasm: mem64[ap + 504] = temp2
+# asm 1: mov   <temp2=int64#3,504(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,504(<ap=%rdi)
+mov   %edx,504(%rdi)
+
+# qhasm: temp1 = mem64[ap + 508]
+# asm 1: mov   508(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   508(<ap=%rdi),>temp1=%esi
+mov   508(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4064]
+# asm 1: mov   4064(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4064(<ap=%rdi),>temp2=%edx
+mov   4064(%rdi),%edx
+
+# qhasm: mem64[ap + 4064] = temp1
+# asm 1: mov   <temp1=int64#2,4064(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4064(<ap=%rdi)
+mov   %esi,4064(%rdi)
+
+# qhasm: mem64[ap + 508] = temp2
+# asm 1: mov   <temp2=int64#3,508(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,508(<ap=%rdi)
+mov   %edx,508(%rdi)
+
+# qhasm: temp1 = mem64[ap + 516]
+# asm 1: mov   516(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   516(<ap=%rdi),>temp1=%esi
+mov   516(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2064]
+# asm 1: mov   2064(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2064(<ap=%rdi),>temp2=%edx
+mov   2064(%rdi),%edx
+
+# qhasm: mem64[ap + 2064] = temp1
+# asm 1: mov   <temp1=int64#2,2064(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2064(<ap=%rdi)
+mov   %esi,2064(%rdi)
+
+# qhasm: mem64[ap + 516] = temp2
+# asm 1: mov   <temp2=int64#3,516(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,516(<ap=%rdi)
+mov   %edx,516(%rdi)
+
+# qhasm: temp1 = mem64[ap + 520]
+# asm 1: mov   520(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   520(<ap=%rdi),>temp1=%esi
+mov   520(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1040]
+# asm 1: mov   1040(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1040(<ap=%rdi),>temp2=%edx
+mov   1040(%rdi),%edx
+
+# qhasm: mem64[ap + 1040] = temp1
+# asm 1: mov   <temp1=int64#2,1040(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1040(<ap=%rdi)
+mov   %esi,1040(%rdi)
+
+# qhasm: mem64[ap + 520] = temp2
+# asm 1: mov   <temp2=int64#3,520(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,520(<ap=%rdi)
+mov   %edx,520(%rdi)
+
+# qhasm: temp1 = mem64[ap + 524]
+# asm 1: mov   524(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   524(<ap=%rdi),>temp1=%esi
+mov   524(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3088]
+# asm 1: mov   3088(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3088(<ap=%rdi),>temp2=%edx
+mov   3088(%rdi),%edx
+
+# qhasm: mem64[ap + 3088] = temp1
+# asm 1: mov   <temp1=int64#2,3088(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3088(<ap=%rdi)
+mov   %esi,3088(%rdi)
+
+# qhasm: mem64[ap + 524] = temp2
+# asm 1: mov   <temp2=int64#3,524(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,524(<ap=%rdi)
+mov   %edx,524(%rdi)
+
+# qhasm: temp1 = mem64[ap + 532]
+# asm 1: mov   532(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   532(<ap=%rdi),>temp1=%esi
+mov   532(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2576]
+# asm 1: mov   2576(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2576(<ap=%rdi),>temp2=%edx
+mov   2576(%rdi),%edx
+
+# qhasm: mem64[ap + 2576] = temp1
+# asm 1: mov   <temp1=int64#2,2576(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2576(<ap=%rdi)
+mov   %esi,2576(%rdi)
+
+# qhasm: mem64[ap + 532] = temp2
+# asm 1: mov   <temp2=int64#3,532(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,532(<ap=%rdi)
+mov   %edx,532(%rdi)
+
+# qhasm: temp1 = mem64[ap + 536]
+# asm 1: mov   536(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   536(<ap=%rdi),>temp1=%esi
+mov   536(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1552]
+# asm 1: mov   1552(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1552(<ap=%rdi),>temp2=%edx
+mov   1552(%rdi),%edx
+
+# qhasm: mem64[ap + 1552] = temp1
+# asm 1: mov   <temp1=int64#2,1552(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1552(<ap=%rdi)
+mov   %esi,1552(%rdi)
+
+# qhasm: mem64[ap + 536] = temp2
+# asm 1: mov   <temp2=int64#3,536(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,536(<ap=%rdi)
+mov   %edx,536(%rdi)
+
+# qhasm: temp1 = mem64[ap + 540]
+# asm 1: mov   540(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   540(<ap=%rdi),>temp1=%esi
+mov   540(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3600]
+# asm 1: mov   3600(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3600(<ap=%rdi),>temp2=%edx
+mov   3600(%rdi),%edx
+
+# qhasm: mem64[ap + 3600] = temp1
+# asm 1: mov   <temp1=int64#2,3600(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3600(<ap=%rdi)
+mov   %esi,3600(%rdi)
+
+# qhasm: mem64[ap + 540] = temp2
+# asm 1: mov   <temp2=int64#3,540(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,540(<ap=%rdi)
+mov   %edx,540(%rdi)
+
+# qhasm: temp1 = mem64[ap + 548]
+# asm 1: mov   548(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   548(<ap=%rdi),>temp1=%esi
+mov   548(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2320]
+# asm 1: mov   2320(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2320(<ap=%rdi),>temp2=%edx
+mov   2320(%rdi),%edx
+
+# qhasm: mem64[ap + 2320] = temp1
+# asm 1: mov   <temp1=int64#2,2320(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2320(<ap=%rdi)
+mov   %esi,2320(%rdi)
+
+# qhasm: mem64[ap + 548] = temp2
+# asm 1: mov   <temp2=int64#3,548(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,548(<ap=%rdi)
+mov   %edx,548(%rdi)
+
+# qhasm: temp1 = mem64[ap + 552]
+# asm 1: mov   552(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   552(<ap=%rdi),>temp1=%esi
+mov   552(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1296]
+# asm 1: mov   1296(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1296(<ap=%rdi),>temp2=%edx
+mov   1296(%rdi),%edx
+
+# qhasm: mem64[ap + 1296] = temp1
+# asm 1: mov   <temp1=int64#2,1296(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1296(<ap=%rdi)
+mov   %esi,1296(%rdi)
+
+# qhasm: mem64[ap + 552] = temp2
+# asm 1: mov   <temp2=int64#3,552(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,552(<ap=%rdi)
+mov   %edx,552(%rdi)
+
+# qhasm: temp1 = mem64[ap + 556]
+# asm 1: mov   556(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   556(<ap=%rdi),>temp1=%esi
+mov   556(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3344]
+# asm 1: mov   3344(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3344(<ap=%rdi),>temp2=%edx
+mov   3344(%rdi),%edx
+
+# qhasm: mem64[ap + 3344] = temp1
+# asm 1: mov   <temp1=int64#2,3344(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3344(<ap=%rdi)
+mov   %esi,3344(%rdi)
+
+# qhasm: mem64[ap + 556] = temp2
+# asm 1: mov   <temp2=int64#3,556(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,556(<ap=%rdi)
+mov   %edx,556(%rdi)
+
+# qhasm: temp1 = mem64[ap + 560]
+# asm 1: mov   560(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   560(<ap=%rdi),>temp1=%esi
+mov   560(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 784]
+# asm 1: mov   784(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   784(<ap=%rdi),>temp2=%edx
+mov   784(%rdi),%edx
+
+# qhasm: mem64[ap + 784] = temp1
+# asm 1: mov   <temp1=int64#2,784(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,784(<ap=%rdi)
+mov   %esi,784(%rdi)
+
+# qhasm: mem64[ap + 560] = temp2
+# asm 1: mov   <temp2=int64#3,560(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,560(<ap=%rdi)
+mov   %edx,560(%rdi)
+
+# qhasm: temp1 = mem64[ap + 564]
+# asm 1: mov   564(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   564(<ap=%rdi),>temp1=%esi
+mov   564(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2832]
+# asm 1: mov   2832(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2832(<ap=%rdi),>temp2=%edx
+mov   2832(%rdi),%edx
+
+# qhasm: mem64[ap + 2832] = temp1
+# asm 1: mov   <temp1=int64#2,2832(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2832(<ap=%rdi)
+mov   %esi,2832(%rdi)
+
+# qhasm: mem64[ap + 564] = temp2
+# asm 1: mov   <temp2=int64#3,564(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,564(<ap=%rdi)
+mov   %edx,564(%rdi)
+
+# qhasm: temp1 = mem64[ap + 568]
+# asm 1: mov   568(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   568(<ap=%rdi),>temp1=%esi
+mov   568(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1808]
+# asm 1: mov   1808(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1808(<ap=%rdi),>temp2=%edx
+mov   1808(%rdi),%edx
+
+# qhasm: mem64[ap + 1808] = temp1
+# asm 1: mov   <temp1=int64#2,1808(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1808(<ap=%rdi)
+mov   %esi,1808(%rdi)
+
+# qhasm: mem64[ap + 568] = temp2
+# asm 1: mov   <temp2=int64#3,568(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,568(<ap=%rdi)
+mov   %edx,568(%rdi)
+
+# qhasm: temp1 = mem64[ap + 572]
+# asm 1: mov   572(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   572(<ap=%rdi),>temp1=%esi
+mov   572(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3856]
+# asm 1: mov   3856(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3856(<ap=%rdi),>temp2=%edx
+mov   3856(%rdi),%edx
+
+# qhasm: mem64[ap + 3856] = temp1
+# asm 1: mov   <temp1=int64#2,3856(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3856(<ap=%rdi)
+mov   %esi,3856(%rdi)
+
+# qhasm: mem64[ap + 572] = temp2
+# asm 1: mov   <temp2=int64#3,572(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,572(<ap=%rdi)
+mov   %edx,572(%rdi)
+
+# qhasm: temp1 = mem64[ap + 580]
+# asm 1: mov   580(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   580(<ap=%rdi),>temp1=%esi
+mov   580(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2192]
+# asm 1: mov   2192(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2192(<ap=%rdi),>temp2=%edx
+mov   2192(%rdi),%edx
+
+# qhasm: mem64[ap + 2192] = temp1
+# asm 1: mov   <temp1=int64#2,2192(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2192(<ap=%rdi)
+mov   %esi,2192(%rdi)
+
+# qhasm: mem64[ap + 580] = temp2
+# asm 1: mov   <temp2=int64#3,580(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,580(<ap=%rdi)
+mov   %edx,580(%rdi)
+
+# qhasm: temp1 = mem64[ap + 584]
+# asm 1: mov   584(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   584(<ap=%rdi),>temp1=%esi
+mov   584(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1168]
+# asm 1: mov   1168(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1168(<ap=%rdi),>temp2=%edx
+mov   1168(%rdi),%edx
+
+# qhasm: mem64[ap + 1168] = temp1
+# asm 1: mov   <temp1=int64#2,1168(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1168(<ap=%rdi)
+mov   %esi,1168(%rdi)
+
+# qhasm: mem64[ap + 584] = temp2
+# asm 1: mov   <temp2=int64#3,584(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,584(<ap=%rdi)
+mov   %edx,584(%rdi)
+
+# qhasm: temp1 = mem64[ap + 588]
+# asm 1: mov   588(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   588(<ap=%rdi),>temp1=%esi
+mov   588(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3216]
+# asm 1: mov   3216(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3216(<ap=%rdi),>temp2=%edx
+mov   3216(%rdi),%edx
+
+# qhasm: mem64[ap + 3216] = temp1
+# asm 1: mov   <temp1=int64#2,3216(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3216(<ap=%rdi)
+mov   %esi,3216(%rdi)
+
+# qhasm: mem64[ap + 588] = temp2
+# asm 1: mov   <temp2=int64#3,588(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,588(<ap=%rdi)
+mov   %edx,588(%rdi)
+
+# qhasm: temp1 = mem64[ap + 592]
+# asm 1: mov   592(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   592(<ap=%rdi),>temp1=%esi
+mov   592(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 656]
+# asm 1: mov   656(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   656(<ap=%rdi),>temp2=%edx
+mov   656(%rdi),%edx
+
+# qhasm: mem64[ap + 656] = temp1
+# asm 1: mov   <temp1=int64#2,656(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,656(<ap=%rdi)
+mov   %esi,656(%rdi)
+
+# qhasm: mem64[ap + 592] = temp2
+# asm 1: mov   <temp2=int64#3,592(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,592(<ap=%rdi)
+mov   %edx,592(%rdi)
+
+# qhasm: temp1 = mem64[ap + 596]
+# asm 1: mov   596(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   596(<ap=%rdi),>temp1=%esi
+mov   596(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2704]
+# asm 1: mov   2704(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2704(<ap=%rdi),>temp2=%edx
+mov   2704(%rdi),%edx
+
+# qhasm: mem64[ap + 2704] = temp1
+# asm 1: mov   <temp1=int64#2,2704(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2704(<ap=%rdi)
+mov   %esi,2704(%rdi)
+
+# qhasm: mem64[ap + 596] = temp2
+# asm 1: mov   <temp2=int64#3,596(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,596(<ap=%rdi)
+mov   %edx,596(%rdi)
+
+# qhasm: temp1 = mem64[ap + 600]
+# asm 1: mov   600(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   600(<ap=%rdi),>temp1=%esi
+mov   600(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1680]
+# asm 1: mov   1680(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1680(<ap=%rdi),>temp2=%edx
+mov   1680(%rdi),%edx
+
+# qhasm: mem64[ap + 1680] = temp1
+# asm 1: mov   <temp1=int64#2,1680(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1680(<ap=%rdi)
+mov   %esi,1680(%rdi)
+
+# qhasm: mem64[ap + 600] = temp2
+# asm 1: mov   <temp2=int64#3,600(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,600(<ap=%rdi)
+mov   %edx,600(%rdi)
+
+# qhasm: temp1 = mem64[ap + 604]
+# asm 1: mov   604(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   604(<ap=%rdi),>temp1=%esi
+mov   604(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3728]
+# asm 1: mov   3728(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3728(<ap=%rdi),>temp2=%edx
+mov   3728(%rdi),%edx
+
+# qhasm: mem64[ap + 3728] = temp1
+# asm 1: mov   <temp1=int64#2,3728(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3728(<ap=%rdi)
+mov   %esi,3728(%rdi)
+
+# qhasm: mem64[ap + 604] = temp2
+# asm 1: mov   <temp2=int64#3,604(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,604(<ap=%rdi)
+mov   %edx,604(%rdi)
+
+# qhasm: temp1 = mem64[ap + 612]
+# asm 1: mov   612(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   612(<ap=%rdi),>temp1=%esi
+mov   612(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2448]
+# asm 1: mov   2448(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2448(<ap=%rdi),>temp2=%edx
+mov   2448(%rdi),%edx
+
+# qhasm: mem64[ap + 2448] = temp1
+# asm 1: mov   <temp1=int64#2,2448(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2448(<ap=%rdi)
+mov   %esi,2448(%rdi)
+
+# qhasm: mem64[ap + 612] = temp2
+# asm 1: mov   <temp2=int64#3,612(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,612(<ap=%rdi)
+mov   %edx,612(%rdi)
+
+# qhasm: temp1 = mem64[ap + 616]
+# asm 1: mov   616(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   616(<ap=%rdi),>temp1=%esi
+mov   616(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1424]
+# asm 1: mov   1424(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1424(<ap=%rdi),>temp2=%edx
+mov   1424(%rdi),%edx
+
+# qhasm: mem64[ap + 1424] = temp1
+# asm 1: mov   <temp1=int64#2,1424(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1424(<ap=%rdi)
+mov   %esi,1424(%rdi)
+
+# qhasm: mem64[ap + 616] = temp2
+# asm 1: mov   <temp2=int64#3,616(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,616(<ap=%rdi)
+mov   %edx,616(%rdi)
+
+# qhasm: temp1 = mem64[ap + 620]
+# asm 1: mov   620(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   620(<ap=%rdi),>temp1=%esi
+mov   620(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3472]
+# asm 1: mov   3472(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3472(<ap=%rdi),>temp2=%edx
+mov   3472(%rdi),%edx
+
+# qhasm: mem64[ap + 3472] = temp1
+# asm 1: mov   <temp1=int64#2,3472(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3472(<ap=%rdi)
+mov   %esi,3472(%rdi)
+
+# qhasm: mem64[ap + 620] = temp2
+# asm 1: mov   <temp2=int64#3,620(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,620(<ap=%rdi)
+mov   %edx,620(%rdi)
+
+# qhasm: temp1 = mem64[ap + 624]
+# asm 1: mov   624(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   624(<ap=%rdi),>temp1=%esi
+mov   624(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 912]
+# asm 1: mov   912(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   912(<ap=%rdi),>temp2=%edx
+mov   912(%rdi),%edx
+
+# qhasm: mem64[ap + 912] = temp1
+# asm 1: mov   <temp1=int64#2,912(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,912(<ap=%rdi)
+mov   %esi,912(%rdi)
+
+# qhasm: mem64[ap + 624] = temp2
+# asm 1: mov   <temp2=int64#3,624(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,624(<ap=%rdi)
+mov   %edx,624(%rdi)
+
+# qhasm: temp1 = mem64[ap + 628]
+# asm 1: mov   628(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   628(<ap=%rdi),>temp1=%esi
+mov   628(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2960]
+# asm 1: mov   2960(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2960(<ap=%rdi),>temp2=%edx
+mov   2960(%rdi),%edx
+
+# qhasm: mem64[ap + 2960] = temp1
+# asm 1: mov   <temp1=int64#2,2960(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2960(<ap=%rdi)
+mov   %esi,2960(%rdi)
+
+# qhasm: mem64[ap + 628] = temp2
+# asm 1: mov   <temp2=int64#3,628(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,628(<ap=%rdi)
+mov   %edx,628(%rdi)
+
+# qhasm: temp1 = mem64[ap + 632]
+# asm 1: mov   632(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   632(<ap=%rdi),>temp1=%esi
+mov   632(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1936]
+# asm 1: mov   1936(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1936(<ap=%rdi),>temp2=%edx
+mov   1936(%rdi),%edx
+
+# qhasm: mem64[ap + 1936] = temp1
+# asm 1: mov   <temp1=int64#2,1936(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1936(<ap=%rdi)
+mov   %esi,1936(%rdi)
+
+# qhasm: mem64[ap + 632] = temp2
+# asm 1: mov   <temp2=int64#3,632(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,632(<ap=%rdi)
+mov   %edx,632(%rdi)
+
+# qhasm: temp1 = mem64[ap + 636]
+# asm 1: mov   636(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   636(<ap=%rdi),>temp1=%esi
+mov   636(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3984]
+# asm 1: mov   3984(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3984(<ap=%rdi),>temp2=%edx
+mov   3984(%rdi),%edx
+
+# qhasm: mem64[ap + 3984] = temp1
+# asm 1: mov   <temp1=int64#2,3984(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3984(<ap=%rdi)
+mov   %esi,3984(%rdi)
+
+# qhasm: mem64[ap + 636] = temp2
+# asm 1: mov   <temp2=int64#3,636(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,636(<ap=%rdi)
+mov   %edx,636(%rdi)
+
+# qhasm: temp1 = mem64[ap + 644]
+# asm 1: mov   644(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   644(<ap=%rdi),>temp1=%esi
+mov   644(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2128]
+# asm 1: mov   2128(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2128(<ap=%rdi),>temp2=%edx
+mov   2128(%rdi),%edx
+
+# qhasm: mem64[ap + 2128] = temp1
+# asm 1: mov   <temp1=int64#2,2128(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2128(<ap=%rdi)
+mov   %esi,2128(%rdi)
+
+# qhasm: mem64[ap + 644] = temp2
+# asm 1: mov   <temp2=int64#3,644(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,644(<ap=%rdi)
+mov   %edx,644(%rdi)
+
+# qhasm: temp1 = mem64[ap + 648]
+# asm 1: mov   648(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   648(<ap=%rdi),>temp1=%esi
+mov   648(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1104]
+# asm 1: mov   1104(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1104(<ap=%rdi),>temp2=%edx
+mov   1104(%rdi),%edx
+
+# qhasm: mem64[ap + 1104] = temp1
+# asm 1: mov   <temp1=int64#2,1104(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1104(<ap=%rdi)
+mov   %esi,1104(%rdi)
+
+# qhasm: mem64[ap + 648] = temp2
+# asm 1: mov   <temp2=int64#3,648(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,648(<ap=%rdi)
+mov   %edx,648(%rdi)
+
+# qhasm: temp1 = mem64[ap + 652]
+# asm 1: mov   652(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   652(<ap=%rdi),>temp1=%esi
+mov   652(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3152]
+# asm 1: mov   3152(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3152(<ap=%rdi),>temp2=%edx
+mov   3152(%rdi),%edx
+
+# qhasm: mem64[ap + 3152] = temp1
+# asm 1: mov   <temp1=int64#2,3152(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3152(<ap=%rdi)
+mov   %esi,3152(%rdi)
+
+# qhasm: mem64[ap + 652] = temp2
+# asm 1: mov   <temp2=int64#3,652(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,652(<ap=%rdi)
+mov   %edx,652(%rdi)
+
+# qhasm: temp1 = mem64[ap + 660]
+# asm 1: mov   660(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   660(<ap=%rdi),>temp1=%esi
+mov   660(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2640]
+# asm 1: mov   2640(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2640(<ap=%rdi),>temp2=%edx
+mov   2640(%rdi),%edx
+
+# qhasm: mem64[ap + 2640] = temp1
+# asm 1: mov   <temp1=int64#2,2640(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2640(<ap=%rdi)
+mov   %esi,2640(%rdi)
+
+# qhasm: mem64[ap + 660] = temp2
+# asm 1: mov   <temp2=int64#3,660(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,660(<ap=%rdi)
+mov   %edx,660(%rdi)
+
+# qhasm: temp1 = mem64[ap + 664]
+# asm 1: mov   664(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   664(<ap=%rdi),>temp1=%esi
+mov   664(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1616]
+# asm 1: mov   1616(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1616(<ap=%rdi),>temp2=%edx
+mov   1616(%rdi),%edx
+
+# qhasm: mem64[ap + 1616] = temp1
+# asm 1: mov   <temp1=int64#2,1616(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1616(<ap=%rdi)
+mov   %esi,1616(%rdi)
+
+# qhasm: mem64[ap + 664] = temp2
+# asm 1: mov   <temp2=int64#3,664(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,664(<ap=%rdi)
+mov   %edx,664(%rdi)
+
+# qhasm: temp1 = mem64[ap + 668]
+# asm 1: mov   668(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   668(<ap=%rdi),>temp1=%esi
+mov   668(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3664]
+# asm 1: mov   3664(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3664(<ap=%rdi),>temp2=%edx
+mov   3664(%rdi),%edx
+
+# qhasm: mem64[ap + 3664] = temp1
+# asm 1: mov   <temp1=int64#2,3664(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3664(<ap=%rdi)
+mov   %esi,3664(%rdi)
+
+# qhasm: mem64[ap + 668] = temp2
+# asm 1: mov   <temp2=int64#3,668(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,668(<ap=%rdi)
+mov   %edx,668(%rdi)
+
+# qhasm: temp1 = mem64[ap + 676]
+# asm 1: mov   676(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   676(<ap=%rdi),>temp1=%esi
+mov   676(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2384]
+# asm 1: mov   2384(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2384(<ap=%rdi),>temp2=%edx
+mov   2384(%rdi),%edx
+
+# qhasm: mem64[ap + 2384] = temp1
+# asm 1: mov   <temp1=int64#2,2384(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2384(<ap=%rdi)
+mov   %esi,2384(%rdi)
+
+# qhasm: mem64[ap + 676] = temp2
+# asm 1: mov   <temp2=int64#3,676(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,676(<ap=%rdi)
+mov   %edx,676(%rdi)
+
+# qhasm: temp1 = mem64[ap + 680]
+# asm 1: mov   680(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   680(<ap=%rdi),>temp1=%esi
+mov   680(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1360]
+# asm 1: mov   1360(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1360(<ap=%rdi),>temp2=%edx
+mov   1360(%rdi),%edx
+
+# qhasm: mem64[ap + 1360] = temp1
+# asm 1: mov   <temp1=int64#2,1360(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1360(<ap=%rdi)
+mov   %esi,1360(%rdi)
+
+# qhasm: mem64[ap + 680] = temp2
+# asm 1: mov   <temp2=int64#3,680(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,680(<ap=%rdi)
+mov   %edx,680(%rdi)
+
+# qhasm: temp1 = mem64[ap + 684]
+# asm 1: mov   684(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   684(<ap=%rdi),>temp1=%esi
+mov   684(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3408]
+# asm 1: mov   3408(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3408(<ap=%rdi),>temp2=%edx
+mov   3408(%rdi),%edx
+
+# qhasm: mem64[ap + 3408] = temp1
+# asm 1: mov   <temp1=int64#2,3408(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3408(<ap=%rdi)
+mov   %esi,3408(%rdi)
+
+# qhasm: mem64[ap + 684] = temp2
+# asm 1: mov   <temp2=int64#3,684(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,684(<ap=%rdi)
+mov   %edx,684(%rdi)
+
+# qhasm: temp1 = mem64[ap + 688]
+# asm 1: mov   688(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   688(<ap=%rdi),>temp1=%esi
+mov   688(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 848]
+# asm 1: mov   848(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   848(<ap=%rdi),>temp2=%edx
+mov   848(%rdi),%edx
+
+# qhasm: mem64[ap + 848] = temp1
+# asm 1: mov   <temp1=int64#2,848(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,848(<ap=%rdi)
+mov   %esi,848(%rdi)
+
+# qhasm: mem64[ap + 688] = temp2
+# asm 1: mov   <temp2=int64#3,688(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,688(<ap=%rdi)
+mov   %edx,688(%rdi)
+
+# qhasm: temp1 = mem64[ap + 692]
+# asm 1: mov   692(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   692(<ap=%rdi),>temp1=%esi
+mov   692(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2896]
+# asm 1: mov   2896(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2896(<ap=%rdi),>temp2=%edx
+mov   2896(%rdi),%edx
+
+# qhasm: mem64[ap + 2896] = temp1
+# asm 1: mov   <temp1=int64#2,2896(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2896(<ap=%rdi)
+mov   %esi,2896(%rdi)
+
+# qhasm: mem64[ap + 692] = temp2
+# asm 1: mov   <temp2=int64#3,692(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,692(<ap=%rdi)
+mov   %edx,692(%rdi)
+
+# qhasm: temp1 = mem64[ap + 696]
+# asm 1: mov   696(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   696(<ap=%rdi),>temp1=%esi
+mov   696(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1872]
+# asm 1: mov   1872(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1872(<ap=%rdi),>temp2=%edx
+mov   1872(%rdi),%edx
+
+# qhasm: mem64[ap + 1872] = temp1
+# asm 1: mov   <temp1=int64#2,1872(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1872(<ap=%rdi)
+mov   %esi,1872(%rdi)
+
+# qhasm: mem64[ap + 696] = temp2
+# asm 1: mov   <temp2=int64#3,696(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,696(<ap=%rdi)
+mov   %edx,696(%rdi)
+
+# qhasm: temp1 = mem64[ap + 700]
+# asm 1: mov   700(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   700(<ap=%rdi),>temp1=%esi
+mov   700(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3920]
+# asm 1: mov   3920(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3920(<ap=%rdi),>temp2=%edx
+mov   3920(%rdi),%edx
+
+# qhasm: mem64[ap + 3920] = temp1
+# asm 1: mov   <temp1=int64#2,3920(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3920(<ap=%rdi)
+mov   %esi,3920(%rdi)
+
+# qhasm: mem64[ap + 700] = temp2
+# asm 1: mov   <temp2=int64#3,700(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,700(<ap=%rdi)
+mov   %edx,700(%rdi)
+
+# qhasm: temp1 = mem64[ap + 708]
+# asm 1: mov   708(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   708(<ap=%rdi),>temp1=%esi
+mov   708(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2256]
+# asm 1: mov   2256(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2256(<ap=%rdi),>temp2=%edx
+mov   2256(%rdi),%edx
+
+# qhasm: mem64[ap + 2256] = temp1
+# asm 1: mov   <temp1=int64#2,2256(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2256(<ap=%rdi)
+mov   %esi,2256(%rdi)
+
+# qhasm: mem64[ap + 708] = temp2
+# asm 1: mov   <temp2=int64#3,708(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,708(<ap=%rdi)
+mov   %edx,708(%rdi)
+
+# qhasm: temp1 = mem64[ap + 712]
+# asm 1: mov   712(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   712(<ap=%rdi),>temp1=%esi
+mov   712(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1232]
+# asm 1: mov   1232(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1232(<ap=%rdi),>temp2=%edx
+mov   1232(%rdi),%edx
+
+# qhasm: mem64[ap + 1232] = temp1
+# asm 1: mov   <temp1=int64#2,1232(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1232(<ap=%rdi)
+mov   %esi,1232(%rdi)
+
+# qhasm: mem64[ap + 712] = temp2
+# asm 1: mov   <temp2=int64#3,712(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,712(<ap=%rdi)
+mov   %edx,712(%rdi)
+
+# qhasm: temp1 = mem64[ap + 716]
+# asm 1: mov   716(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   716(<ap=%rdi),>temp1=%esi
+mov   716(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3280]
+# asm 1: mov   3280(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3280(<ap=%rdi),>temp2=%edx
+mov   3280(%rdi),%edx
+
+# qhasm: mem64[ap + 3280] = temp1
+# asm 1: mov   <temp1=int64#2,3280(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3280(<ap=%rdi)
+mov   %esi,3280(%rdi)
+
+# qhasm: mem64[ap + 716] = temp2
+# asm 1: mov   <temp2=int64#3,716(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,716(<ap=%rdi)
+mov   %edx,716(%rdi)
+
+# qhasm: temp1 = mem64[ap + 724]
+# asm 1: mov   724(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   724(<ap=%rdi),>temp1=%esi
+mov   724(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2768]
+# asm 1: mov   2768(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2768(<ap=%rdi),>temp2=%edx
+mov   2768(%rdi),%edx
+
+# qhasm: mem64[ap + 2768] = temp1
+# asm 1: mov   <temp1=int64#2,2768(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2768(<ap=%rdi)
+mov   %esi,2768(%rdi)
+
+# qhasm: mem64[ap + 724] = temp2
+# asm 1: mov   <temp2=int64#3,724(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,724(<ap=%rdi)
+mov   %edx,724(%rdi)
+
+# qhasm: temp1 = mem64[ap + 728]
+# asm 1: mov   728(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   728(<ap=%rdi),>temp1=%esi
+mov   728(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1744]
+# asm 1: mov   1744(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1744(<ap=%rdi),>temp2=%edx
+mov   1744(%rdi),%edx
+
+# qhasm: mem64[ap + 1744] = temp1
+# asm 1: mov   <temp1=int64#2,1744(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1744(<ap=%rdi)
+mov   %esi,1744(%rdi)
+
+# qhasm: mem64[ap + 728] = temp2
+# asm 1: mov   <temp2=int64#3,728(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,728(<ap=%rdi)
+mov   %edx,728(%rdi)
+
+# qhasm: temp1 = mem64[ap + 732]
+# asm 1: mov   732(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   732(<ap=%rdi),>temp1=%esi
+mov   732(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3792]
+# asm 1: mov   3792(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3792(<ap=%rdi),>temp2=%edx
+mov   3792(%rdi),%edx
+
+# qhasm: mem64[ap + 3792] = temp1
+# asm 1: mov   <temp1=int64#2,3792(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3792(<ap=%rdi)
+mov   %esi,3792(%rdi)
+
+# qhasm: mem64[ap + 732] = temp2
+# asm 1: mov   <temp2=int64#3,732(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,732(<ap=%rdi)
+mov   %edx,732(%rdi)
+
+# qhasm: temp1 = mem64[ap + 740]
+# asm 1: mov   740(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   740(<ap=%rdi),>temp1=%esi
+mov   740(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2512]
+# asm 1: mov   2512(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2512(<ap=%rdi),>temp2=%edx
+mov   2512(%rdi),%edx
+
+# qhasm: mem64[ap + 2512] = temp1
+# asm 1: mov   <temp1=int64#2,2512(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2512(<ap=%rdi)
+mov   %esi,2512(%rdi)
+
+# qhasm: mem64[ap + 740] = temp2
+# asm 1: mov   <temp2=int64#3,740(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,740(<ap=%rdi)
+mov   %edx,740(%rdi)
+
+# qhasm: temp1 = mem64[ap + 744]
+# asm 1: mov   744(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   744(<ap=%rdi),>temp1=%esi
+mov   744(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1488]
+# asm 1: mov   1488(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1488(<ap=%rdi),>temp2=%edx
+mov   1488(%rdi),%edx
+
+# qhasm: mem64[ap + 1488] = temp1
+# asm 1: mov   <temp1=int64#2,1488(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1488(<ap=%rdi)
+mov   %esi,1488(%rdi)
+
+# qhasm: mem64[ap + 744] = temp2
+# asm 1: mov   <temp2=int64#3,744(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,744(<ap=%rdi)
+mov   %edx,744(%rdi)
+
+# qhasm: temp1 = mem64[ap + 748]
+# asm 1: mov   748(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   748(<ap=%rdi),>temp1=%esi
+mov   748(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3536]
+# asm 1: mov   3536(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3536(<ap=%rdi),>temp2=%edx
+mov   3536(%rdi),%edx
+
+# qhasm: mem64[ap + 3536] = temp1
+# asm 1: mov   <temp1=int64#2,3536(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3536(<ap=%rdi)
+mov   %esi,3536(%rdi)
+
+# qhasm: mem64[ap + 748] = temp2
+# asm 1: mov   <temp2=int64#3,748(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,748(<ap=%rdi)
+mov   %edx,748(%rdi)
+
+# qhasm: temp1 = mem64[ap + 752]
+# asm 1: mov   752(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   752(<ap=%rdi),>temp1=%esi
+mov   752(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 976]
+# asm 1: mov   976(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   976(<ap=%rdi),>temp2=%edx
+mov   976(%rdi),%edx
+
+# qhasm: mem64[ap + 976] = temp1
+# asm 1: mov   <temp1=int64#2,976(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,976(<ap=%rdi)
+mov   %esi,976(%rdi)
+
+# qhasm: mem64[ap + 752] = temp2
+# asm 1: mov   <temp2=int64#3,752(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,752(<ap=%rdi)
+mov   %edx,752(%rdi)
+
+# qhasm: temp1 = mem64[ap + 756]
+# asm 1: mov   756(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   756(<ap=%rdi),>temp1=%esi
+mov   756(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3024]
+# asm 1: mov   3024(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3024(<ap=%rdi),>temp2=%edx
+mov   3024(%rdi),%edx
+
+# qhasm: mem64[ap + 3024] = temp1
+# asm 1: mov   <temp1=int64#2,3024(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3024(<ap=%rdi)
+mov   %esi,3024(%rdi)
+
+# qhasm: mem64[ap + 756] = temp2
+# asm 1: mov   <temp2=int64#3,756(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,756(<ap=%rdi)
+mov   %edx,756(%rdi)
+
+# qhasm: temp1 = mem64[ap + 760]
+# asm 1: mov   760(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   760(<ap=%rdi),>temp1=%esi
+mov   760(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2000]
+# asm 1: mov   2000(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2000(<ap=%rdi),>temp2=%edx
+mov   2000(%rdi),%edx
+
+# qhasm: mem64[ap + 2000] = temp1
+# asm 1: mov   <temp1=int64#2,2000(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2000(<ap=%rdi)
+mov   %esi,2000(%rdi)
+
+# qhasm: mem64[ap + 760] = temp2
+# asm 1: mov   <temp2=int64#3,760(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,760(<ap=%rdi)
+mov   %edx,760(%rdi)
+
+# qhasm: temp1 = mem64[ap + 764]
+# asm 1: mov   764(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   764(<ap=%rdi),>temp1=%esi
+mov   764(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4048]
+# asm 1: mov   4048(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4048(<ap=%rdi),>temp2=%edx
+mov   4048(%rdi),%edx
+
+# qhasm: mem64[ap + 4048] = temp1
+# asm 1: mov   <temp1=int64#2,4048(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4048(<ap=%rdi)
+mov   %esi,4048(%rdi)
+
+# qhasm: mem64[ap + 764] = temp2
+# asm 1: mov   <temp2=int64#3,764(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,764(<ap=%rdi)
+mov   %edx,764(%rdi)
+
+# qhasm: temp1 = mem64[ap + 772]
+# asm 1: mov   772(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   772(<ap=%rdi),>temp1=%esi
+mov   772(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2096]
+# asm 1: mov   2096(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2096(<ap=%rdi),>temp2=%edx
+mov   2096(%rdi),%edx
+
+# qhasm: mem64[ap + 2096] = temp1
+# asm 1: mov   <temp1=int64#2,2096(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2096(<ap=%rdi)
+mov   %esi,2096(%rdi)
+
+# qhasm: mem64[ap + 772] = temp2
+# asm 1: mov   <temp2=int64#3,772(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,772(<ap=%rdi)
+mov   %edx,772(%rdi)
+
+# qhasm: temp1 = mem64[ap + 776]
+# asm 1: mov   776(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   776(<ap=%rdi),>temp1=%esi
+mov   776(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1072]
+# asm 1: mov   1072(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1072(<ap=%rdi),>temp2=%edx
+mov   1072(%rdi),%edx
+
+# qhasm: mem64[ap + 1072] = temp1
+# asm 1: mov   <temp1=int64#2,1072(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1072(<ap=%rdi)
+mov   %esi,1072(%rdi)
+
+# qhasm: mem64[ap + 776] = temp2
+# asm 1: mov   <temp2=int64#3,776(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,776(<ap=%rdi)
+mov   %edx,776(%rdi)
+
+# qhasm: temp1 = mem64[ap + 780]
+# asm 1: mov   780(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   780(<ap=%rdi),>temp1=%esi
+mov   780(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3120]
+# asm 1: mov   3120(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3120(<ap=%rdi),>temp2=%edx
+mov   3120(%rdi),%edx
+
+# qhasm: mem64[ap + 3120] = temp1
+# asm 1: mov   <temp1=int64#2,3120(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3120(<ap=%rdi)
+mov   %esi,3120(%rdi)
+
+# qhasm: mem64[ap + 780] = temp2
+# asm 1: mov   <temp2=int64#3,780(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,780(<ap=%rdi)
+mov   %edx,780(%rdi)
+
+# qhasm: temp1 = mem64[ap + 788]
+# asm 1: mov   788(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   788(<ap=%rdi),>temp1=%esi
+mov   788(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2608]
+# asm 1: mov   2608(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2608(<ap=%rdi),>temp2=%edx
+mov   2608(%rdi),%edx
+
+# qhasm: mem64[ap + 2608] = temp1
+# asm 1: mov   <temp1=int64#2,2608(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2608(<ap=%rdi)
+mov   %esi,2608(%rdi)
+
+# qhasm: mem64[ap + 788] = temp2
+# asm 1: mov   <temp2=int64#3,788(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,788(<ap=%rdi)
+mov   %edx,788(%rdi)
+
+# qhasm: temp1 = mem64[ap + 792]
+# asm 1: mov   792(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   792(<ap=%rdi),>temp1=%esi
+mov   792(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1584]
+# asm 1: mov   1584(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1584(<ap=%rdi),>temp2=%edx
+mov   1584(%rdi),%edx
+
+# qhasm: mem64[ap + 1584] = temp1
+# asm 1: mov   <temp1=int64#2,1584(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1584(<ap=%rdi)
+mov   %esi,1584(%rdi)
+
+# qhasm: mem64[ap + 792] = temp2
+# asm 1: mov   <temp2=int64#3,792(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,792(<ap=%rdi)
+mov   %edx,792(%rdi)
+
+# qhasm: temp1 = mem64[ap + 796]
+# asm 1: mov   796(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   796(<ap=%rdi),>temp1=%esi
+mov   796(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3632]
+# asm 1: mov   3632(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3632(<ap=%rdi),>temp2=%edx
+mov   3632(%rdi),%edx
+
+# qhasm: mem64[ap + 3632] = temp1
+# asm 1: mov   <temp1=int64#2,3632(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3632(<ap=%rdi)
+mov   %esi,3632(%rdi)
+
+# qhasm: mem64[ap + 796] = temp2
+# asm 1: mov   <temp2=int64#3,796(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,796(<ap=%rdi)
+mov   %edx,796(%rdi)
+
+# qhasm: temp1 = mem64[ap + 804]
+# asm 1: mov   804(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   804(<ap=%rdi),>temp1=%esi
+mov   804(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2352]
+# asm 1: mov   2352(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2352(<ap=%rdi),>temp2=%edx
+mov   2352(%rdi),%edx
+
+# qhasm: mem64[ap + 2352] = temp1
+# asm 1: mov   <temp1=int64#2,2352(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2352(<ap=%rdi)
+mov   %esi,2352(%rdi)
+
+# qhasm: mem64[ap + 804] = temp2
+# asm 1: mov   <temp2=int64#3,804(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,804(<ap=%rdi)
+mov   %edx,804(%rdi)
+
+# qhasm: temp1 = mem64[ap + 808]
+# asm 1: mov   808(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   808(<ap=%rdi),>temp1=%esi
+mov   808(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1328]
+# asm 1: mov   1328(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1328(<ap=%rdi),>temp2=%edx
+mov   1328(%rdi),%edx
+
+# qhasm: mem64[ap + 1328] = temp1
+# asm 1: mov   <temp1=int64#2,1328(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1328(<ap=%rdi)
+mov   %esi,1328(%rdi)
+
+# qhasm: mem64[ap + 808] = temp2
+# asm 1: mov   <temp2=int64#3,808(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,808(<ap=%rdi)
+mov   %edx,808(%rdi)
+
+# qhasm: temp1 = mem64[ap + 812]
+# asm 1: mov   812(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   812(<ap=%rdi),>temp1=%esi
+mov   812(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3376]
+# asm 1: mov   3376(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3376(<ap=%rdi),>temp2=%edx
+mov   3376(%rdi),%edx
+
+# qhasm: mem64[ap + 3376] = temp1
+# asm 1: mov   <temp1=int64#2,3376(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3376(<ap=%rdi)
+mov   %esi,3376(%rdi)
+
+# qhasm: mem64[ap + 812] = temp2
+# asm 1: mov   <temp2=int64#3,812(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,812(<ap=%rdi)
+mov   %edx,812(%rdi)
+
+# qhasm: temp1 = mem64[ap + 820]
+# asm 1: mov   820(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   820(<ap=%rdi),>temp1=%esi
+mov   820(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2864]
+# asm 1: mov   2864(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2864(<ap=%rdi),>temp2=%edx
+mov   2864(%rdi),%edx
+
+# qhasm: mem64[ap + 2864] = temp1
+# asm 1: mov   <temp1=int64#2,2864(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2864(<ap=%rdi)
+mov   %esi,2864(%rdi)
+
+# qhasm: mem64[ap + 820] = temp2
+# asm 1: mov   <temp2=int64#3,820(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,820(<ap=%rdi)
+mov   %edx,820(%rdi)
+
+# qhasm: temp1 = mem64[ap + 824]
+# asm 1: mov   824(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   824(<ap=%rdi),>temp1=%esi
+mov   824(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1840]
+# asm 1: mov   1840(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1840(<ap=%rdi),>temp2=%edx
+mov   1840(%rdi),%edx
+
+# qhasm: mem64[ap + 1840] = temp1
+# asm 1: mov   <temp1=int64#2,1840(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1840(<ap=%rdi)
+mov   %esi,1840(%rdi)
+
+# qhasm: mem64[ap + 824] = temp2
+# asm 1: mov   <temp2=int64#3,824(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,824(<ap=%rdi)
+mov   %edx,824(%rdi)
+
+# qhasm: temp1 = mem64[ap + 828]
+# asm 1: mov   828(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   828(<ap=%rdi),>temp1=%esi
+mov   828(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3888]
+# asm 1: mov   3888(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3888(<ap=%rdi),>temp2=%edx
+mov   3888(%rdi),%edx
+
+# qhasm: mem64[ap + 3888] = temp1
+# asm 1: mov   <temp1=int64#2,3888(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3888(<ap=%rdi)
+mov   %esi,3888(%rdi)
+
+# qhasm: mem64[ap + 828] = temp2
+# asm 1: mov   <temp2=int64#3,828(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,828(<ap=%rdi)
+mov   %edx,828(%rdi)
+
+# qhasm: temp1 = mem64[ap + 836]
+# asm 1: mov   836(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   836(<ap=%rdi),>temp1=%esi
+mov   836(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2224]
+# asm 1: mov   2224(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2224(<ap=%rdi),>temp2=%edx
+mov   2224(%rdi),%edx
+
+# qhasm: mem64[ap + 2224] = temp1
+# asm 1: mov   <temp1=int64#2,2224(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2224(<ap=%rdi)
+mov   %esi,2224(%rdi)
+
+# qhasm: mem64[ap + 836] = temp2
+# asm 1: mov   <temp2=int64#3,836(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,836(<ap=%rdi)
+mov   %edx,836(%rdi)
+
+# qhasm: temp1 = mem64[ap + 840]
+# asm 1: mov   840(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   840(<ap=%rdi),>temp1=%esi
+mov   840(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1200]
+# asm 1: mov   1200(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1200(<ap=%rdi),>temp2=%edx
+mov   1200(%rdi),%edx
+
+# qhasm: mem64[ap + 1200] = temp1
+# asm 1: mov   <temp1=int64#2,1200(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1200(<ap=%rdi)
+mov   %esi,1200(%rdi)
+
+# qhasm: mem64[ap + 840] = temp2
+# asm 1: mov   <temp2=int64#3,840(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,840(<ap=%rdi)
+mov   %edx,840(%rdi)
+
+# qhasm: temp1 = mem64[ap + 844]
+# asm 1: mov   844(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   844(<ap=%rdi),>temp1=%esi
+mov   844(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3248]
+# asm 1: mov   3248(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3248(<ap=%rdi),>temp2=%edx
+mov   3248(%rdi),%edx
+
+# qhasm: mem64[ap + 3248] = temp1
+# asm 1: mov   <temp1=int64#2,3248(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3248(<ap=%rdi)
+mov   %esi,3248(%rdi)
+
+# qhasm: mem64[ap + 844] = temp2
+# asm 1: mov   <temp2=int64#3,844(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,844(<ap=%rdi)
+mov   %edx,844(%rdi)
+
+# qhasm: temp1 = mem64[ap + 852]
+# asm 1: mov   852(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   852(<ap=%rdi),>temp1=%esi
+mov   852(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2736]
+# asm 1: mov   2736(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2736(<ap=%rdi),>temp2=%edx
+mov   2736(%rdi),%edx
+
+# qhasm: mem64[ap + 2736] = temp1
+# asm 1: mov   <temp1=int64#2,2736(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2736(<ap=%rdi)
+mov   %esi,2736(%rdi)
+
+# qhasm: mem64[ap + 852] = temp2
+# asm 1: mov   <temp2=int64#3,852(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,852(<ap=%rdi)
+mov   %edx,852(%rdi)
+
+# qhasm: temp1 = mem64[ap + 856]
+# asm 1: mov   856(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   856(<ap=%rdi),>temp1=%esi
+mov   856(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1712]
+# asm 1: mov   1712(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1712(<ap=%rdi),>temp2=%edx
+mov   1712(%rdi),%edx
+
+# qhasm: mem64[ap + 1712] = temp1
+# asm 1: mov   <temp1=int64#2,1712(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1712(<ap=%rdi)
+mov   %esi,1712(%rdi)
+
+# qhasm: mem64[ap + 856] = temp2
+# asm 1: mov   <temp2=int64#3,856(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,856(<ap=%rdi)
+mov   %edx,856(%rdi)
+
+# qhasm: temp1 = mem64[ap + 860]
+# asm 1: mov   860(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   860(<ap=%rdi),>temp1=%esi
+mov   860(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3760]
+# asm 1: mov   3760(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3760(<ap=%rdi),>temp2=%edx
+mov   3760(%rdi),%edx
+
+# qhasm: mem64[ap + 3760] = temp1
+# asm 1: mov   <temp1=int64#2,3760(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3760(<ap=%rdi)
+mov   %esi,3760(%rdi)
+
+# qhasm: mem64[ap + 860] = temp2
+# asm 1: mov   <temp2=int64#3,860(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,860(<ap=%rdi)
+mov   %edx,860(%rdi)
+
+# qhasm: temp1 = mem64[ap + 868]
+# asm 1: mov   868(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   868(<ap=%rdi),>temp1=%esi
+mov   868(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2480]
+# asm 1: mov   2480(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2480(<ap=%rdi),>temp2=%edx
+mov   2480(%rdi),%edx
+
+# qhasm: mem64[ap + 2480] = temp1
+# asm 1: mov   <temp1=int64#2,2480(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2480(<ap=%rdi)
+mov   %esi,2480(%rdi)
+
+# qhasm: mem64[ap + 868] = temp2
+# asm 1: mov   <temp2=int64#3,868(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,868(<ap=%rdi)
+mov   %edx,868(%rdi)
+
+# qhasm: temp1 = mem64[ap + 872]
+# asm 1: mov   872(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   872(<ap=%rdi),>temp1=%esi
+mov   872(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1456]
+# asm 1: mov   1456(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1456(<ap=%rdi),>temp2=%edx
+mov   1456(%rdi),%edx
+
+# qhasm: mem64[ap + 1456] = temp1
+# asm 1: mov   <temp1=int64#2,1456(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1456(<ap=%rdi)
+mov   %esi,1456(%rdi)
+
+# qhasm: mem64[ap + 872] = temp2
+# asm 1: mov   <temp2=int64#3,872(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,872(<ap=%rdi)
+mov   %edx,872(%rdi)
+
+# qhasm: temp1 = mem64[ap + 876]
+# asm 1: mov   876(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   876(<ap=%rdi),>temp1=%esi
+mov   876(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3504]
+# asm 1: mov   3504(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3504(<ap=%rdi),>temp2=%edx
+mov   3504(%rdi),%edx
+
+# qhasm: mem64[ap + 3504] = temp1
+# asm 1: mov   <temp1=int64#2,3504(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3504(<ap=%rdi)
+mov   %esi,3504(%rdi)
+
+# qhasm: mem64[ap + 876] = temp2
+# asm 1: mov   <temp2=int64#3,876(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,876(<ap=%rdi)
+mov   %edx,876(%rdi)
+
+# qhasm: temp1 = mem64[ap + 880]
+# asm 1: mov   880(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   880(<ap=%rdi),>temp1=%esi
+mov   880(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 944]
+# asm 1: mov   944(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   944(<ap=%rdi),>temp2=%edx
+mov   944(%rdi),%edx
+
+# qhasm: mem64[ap + 944] = temp1
+# asm 1: mov   <temp1=int64#2,944(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,944(<ap=%rdi)
+mov   %esi,944(%rdi)
+
+# qhasm: mem64[ap + 880] = temp2
+# asm 1: mov   <temp2=int64#3,880(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,880(<ap=%rdi)
+mov   %edx,880(%rdi)
+
+# qhasm: temp1 = mem64[ap + 884]
+# asm 1: mov   884(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   884(<ap=%rdi),>temp1=%esi
+mov   884(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2992]
+# asm 1: mov   2992(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2992(<ap=%rdi),>temp2=%edx
+mov   2992(%rdi),%edx
+
+# qhasm: mem64[ap + 2992] = temp1
+# asm 1: mov   <temp1=int64#2,2992(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2992(<ap=%rdi)
+mov   %esi,2992(%rdi)
+
+# qhasm: mem64[ap + 884] = temp2
+# asm 1: mov   <temp2=int64#3,884(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,884(<ap=%rdi)
+mov   %edx,884(%rdi)
+
+# qhasm: temp1 = mem64[ap + 888]
+# asm 1: mov   888(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   888(<ap=%rdi),>temp1=%esi
+mov   888(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1968]
+# asm 1: mov   1968(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1968(<ap=%rdi),>temp2=%edx
+mov   1968(%rdi),%edx
+
+# qhasm: mem64[ap + 1968] = temp1
+# asm 1: mov   <temp1=int64#2,1968(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1968(<ap=%rdi)
+mov   %esi,1968(%rdi)
+
+# qhasm: mem64[ap + 888] = temp2
+# asm 1: mov   <temp2=int64#3,888(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,888(<ap=%rdi)
+mov   %edx,888(%rdi)
+
+# qhasm: temp1 = mem64[ap + 892]
+# asm 1: mov   892(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   892(<ap=%rdi),>temp1=%esi
+mov   892(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4016]
+# asm 1: mov   4016(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4016(<ap=%rdi),>temp2=%edx
+mov   4016(%rdi),%edx
+
+# qhasm: mem64[ap + 4016] = temp1
+# asm 1: mov   <temp1=int64#2,4016(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4016(<ap=%rdi)
+mov   %esi,4016(%rdi)
+
+# qhasm: mem64[ap + 892] = temp2
+# asm 1: mov   <temp2=int64#3,892(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,892(<ap=%rdi)
+mov   %edx,892(%rdi)
+
+# qhasm: temp1 = mem64[ap + 900]
+# asm 1: mov   900(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   900(<ap=%rdi),>temp1=%esi
+mov   900(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2160]
+# asm 1: mov   2160(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2160(<ap=%rdi),>temp2=%edx
+mov   2160(%rdi),%edx
+
+# qhasm: mem64[ap + 2160] = temp1
+# asm 1: mov   <temp1=int64#2,2160(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2160(<ap=%rdi)
+mov   %esi,2160(%rdi)
+
+# qhasm: mem64[ap + 900] = temp2
+# asm 1: mov   <temp2=int64#3,900(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,900(<ap=%rdi)
+mov   %edx,900(%rdi)
+
+# qhasm: temp1 = mem64[ap + 904]
+# asm 1: mov   904(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   904(<ap=%rdi),>temp1=%esi
+mov   904(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1136]
+# asm 1: mov   1136(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1136(<ap=%rdi),>temp2=%edx
+mov   1136(%rdi),%edx
+
+# qhasm: mem64[ap + 1136] = temp1
+# asm 1: mov   <temp1=int64#2,1136(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1136(<ap=%rdi)
+mov   %esi,1136(%rdi)
+
+# qhasm: mem64[ap + 904] = temp2
+# asm 1: mov   <temp2=int64#3,904(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,904(<ap=%rdi)
+mov   %edx,904(%rdi)
+
+# qhasm: temp1 = mem64[ap + 908]
+# asm 1: mov   908(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   908(<ap=%rdi),>temp1=%esi
+mov   908(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3184]
+# asm 1: mov   3184(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3184(<ap=%rdi),>temp2=%edx
+mov   3184(%rdi),%edx
+
+# qhasm: mem64[ap + 3184] = temp1
+# asm 1: mov   <temp1=int64#2,3184(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3184(<ap=%rdi)
+mov   %esi,3184(%rdi)
+
+# qhasm: mem64[ap + 908] = temp2
+# asm 1: mov   <temp2=int64#3,908(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,908(<ap=%rdi)
+mov   %edx,908(%rdi)
+
+# qhasm: temp1 = mem64[ap + 916]
+# asm 1: mov   916(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   916(<ap=%rdi),>temp1=%esi
+mov   916(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2672]
+# asm 1: mov   2672(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2672(<ap=%rdi),>temp2=%edx
+mov   2672(%rdi),%edx
+
+# qhasm: mem64[ap + 2672] = temp1
+# asm 1: mov   <temp1=int64#2,2672(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2672(<ap=%rdi)
+mov   %esi,2672(%rdi)
+
+# qhasm: mem64[ap + 916] = temp2
+# asm 1: mov   <temp2=int64#3,916(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,916(<ap=%rdi)
+mov   %edx,916(%rdi)
+
+# qhasm: temp1 = mem64[ap + 920]
+# asm 1: mov   920(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   920(<ap=%rdi),>temp1=%esi
+mov   920(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1648]
+# asm 1: mov   1648(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1648(<ap=%rdi),>temp2=%edx
+mov   1648(%rdi),%edx
+
+# qhasm: mem64[ap + 1648] = temp1
+# asm 1: mov   <temp1=int64#2,1648(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1648(<ap=%rdi)
+mov   %esi,1648(%rdi)
+
+# qhasm: mem64[ap + 920] = temp2
+# asm 1: mov   <temp2=int64#3,920(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,920(<ap=%rdi)
+mov   %edx,920(%rdi)
+
+# qhasm: temp1 = mem64[ap + 924]
+# asm 1: mov   924(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   924(<ap=%rdi),>temp1=%esi
+mov   924(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3696]
+# asm 1: mov   3696(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3696(<ap=%rdi),>temp2=%edx
+mov   3696(%rdi),%edx
+
+# qhasm: mem64[ap + 3696] = temp1
+# asm 1: mov   <temp1=int64#2,3696(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3696(<ap=%rdi)
+mov   %esi,3696(%rdi)
+
+# qhasm: mem64[ap + 924] = temp2
+# asm 1: mov   <temp2=int64#3,924(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,924(<ap=%rdi)
+mov   %edx,924(%rdi)
+
+# qhasm: temp1 = mem64[ap + 932]
+# asm 1: mov   932(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   932(<ap=%rdi),>temp1=%esi
+mov   932(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2416]
+# asm 1: mov   2416(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2416(<ap=%rdi),>temp2=%edx
+mov   2416(%rdi),%edx
+
+# qhasm: mem64[ap + 2416] = temp1
+# asm 1: mov   <temp1=int64#2,2416(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2416(<ap=%rdi)
+mov   %esi,2416(%rdi)
+
+# qhasm: mem64[ap + 932] = temp2
+# asm 1: mov   <temp2=int64#3,932(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,932(<ap=%rdi)
+mov   %edx,932(%rdi)
+
+# qhasm: temp1 = mem64[ap + 936]
+# asm 1: mov   936(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   936(<ap=%rdi),>temp1=%esi
+mov   936(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1392]
+# asm 1: mov   1392(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1392(<ap=%rdi),>temp2=%edx
+mov   1392(%rdi),%edx
+
+# qhasm: mem64[ap + 1392] = temp1
+# asm 1: mov   <temp1=int64#2,1392(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1392(<ap=%rdi)
+mov   %esi,1392(%rdi)
+
+# qhasm: mem64[ap + 936] = temp2
+# asm 1: mov   <temp2=int64#3,936(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,936(<ap=%rdi)
+mov   %edx,936(%rdi)
+
+# qhasm: temp1 = mem64[ap + 940]
+# asm 1: mov   940(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   940(<ap=%rdi),>temp1=%esi
+mov   940(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3440]
+# asm 1: mov   3440(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3440(<ap=%rdi),>temp2=%edx
+mov   3440(%rdi),%edx
+
+# qhasm: mem64[ap + 3440] = temp1
+# asm 1: mov   <temp1=int64#2,3440(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3440(<ap=%rdi)
+mov   %esi,3440(%rdi)
+
+# qhasm: mem64[ap + 940] = temp2
+# asm 1: mov   <temp2=int64#3,940(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,940(<ap=%rdi)
+mov   %edx,940(%rdi)
+
+# qhasm: temp1 = mem64[ap + 948]
+# asm 1: mov   948(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   948(<ap=%rdi),>temp1=%esi
+mov   948(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2928]
+# asm 1: mov   2928(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2928(<ap=%rdi),>temp2=%edx
+mov   2928(%rdi),%edx
+
+# qhasm: mem64[ap + 2928] = temp1
+# asm 1: mov   <temp1=int64#2,2928(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2928(<ap=%rdi)
+mov   %esi,2928(%rdi)
+
+# qhasm: mem64[ap + 948] = temp2
+# asm 1: mov   <temp2=int64#3,948(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,948(<ap=%rdi)
+mov   %edx,948(%rdi)
+
+# qhasm: temp1 = mem64[ap + 952]
+# asm 1: mov   952(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   952(<ap=%rdi),>temp1=%esi
+mov   952(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1904]
+# asm 1: mov   1904(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1904(<ap=%rdi),>temp2=%edx
+mov   1904(%rdi),%edx
+
+# qhasm: mem64[ap + 1904] = temp1
+# asm 1: mov   <temp1=int64#2,1904(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1904(<ap=%rdi)
+mov   %esi,1904(%rdi)
+
+# qhasm: mem64[ap + 952] = temp2
+# asm 1: mov   <temp2=int64#3,952(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,952(<ap=%rdi)
+mov   %edx,952(%rdi)
+
+# qhasm: temp1 = mem64[ap + 956]
+# asm 1: mov   956(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   956(<ap=%rdi),>temp1=%esi
+mov   956(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3952]
+# asm 1: mov   3952(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3952(<ap=%rdi),>temp2=%edx
+mov   3952(%rdi),%edx
+
+# qhasm: mem64[ap + 3952] = temp1
+# asm 1: mov   <temp1=int64#2,3952(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3952(<ap=%rdi)
+mov   %esi,3952(%rdi)
+
+# qhasm: mem64[ap + 956] = temp2
+# asm 1: mov   <temp2=int64#3,956(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,956(<ap=%rdi)
+mov   %edx,956(%rdi)
+
+# qhasm: temp1 = mem64[ap + 964]
+# asm 1: mov   964(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   964(<ap=%rdi),>temp1=%esi
+mov   964(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2288]
+# asm 1: mov   2288(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2288(<ap=%rdi),>temp2=%edx
+mov   2288(%rdi),%edx
+
+# qhasm: mem64[ap + 2288] = temp1
+# asm 1: mov   <temp1=int64#2,2288(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2288(<ap=%rdi)
+mov   %esi,2288(%rdi)
+
+# qhasm: mem64[ap + 964] = temp2
+# asm 1: mov   <temp2=int64#3,964(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,964(<ap=%rdi)
+mov   %edx,964(%rdi)
+
+# qhasm: temp1 = mem64[ap + 968]
+# asm 1: mov   968(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   968(<ap=%rdi),>temp1=%esi
+mov   968(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1264]
+# asm 1: mov   1264(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1264(<ap=%rdi),>temp2=%edx
+mov   1264(%rdi),%edx
+
+# qhasm: mem64[ap + 1264] = temp1
+# asm 1: mov   <temp1=int64#2,1264(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1264(<ap=%rdi)
+mov   %esi,1264(%rdi)
+
+# qhasm: mem64[ap + 968] = temp2
+# asm 1: mov   <temp2=int64#3,968(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,968(<ap=%rdi)
+mov   %edx,968(%rdi)
+
+# qhasm: temp1 = mem64[ap + 972]
+# asm 1: mov   972(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   972(<ap=%rdi),>temp1=%esi
+mov   972(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3312]
+# asm 1: mov   3312(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3312(<ap=%rdi),>temp2=%edx
+mov   3312(%rdi),%edx
+
+# qhasm: mem64[ap + 3312] = temp1
+# asm 1: mov   <temp1=int64#2,3312(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3312(<ap=%rdi)
+mov   %esi,3312(%rdi)
+
+# qhasm: mem64[ap + 972] = temp2
+# asm 1: mov   <temp2=int64#3,972(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,972(<ap=%rdi)
+mov   %edx,972(%rdi)
+
+# qhasm: temp1 = mem64[ap + 980]
+# asm 1: mov   980(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   980(<ap=%rdi),>temp1=%esi
+mov   980(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2800]
+# asm 1: mov   2800(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2800(<ap=%rdi),>temp2=%edx
+mov   2800(%rdi),%edx
+
+# qhasm: mem64[ap + 2800] = temp1
+# asm 1: mov   <temp1=int64#2,2800(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2800(<ap=%rdi)
+mov   %esi,2800(%rdi)
+
+# qhasm: mem64[ap + 980] = temp2
+# asm 1: mov   <temp2=int64#3,980(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,980(<ap=%rdi)
+mov   %edx,980(%rdi)
+
+# qhasm: temp1 = mem64[ap + 984]
+# asm 1: mov   984(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   984(<ap=%rdi),>temp1=%esi
+mov   984(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1776]
+# asm 1: mov   1776(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1776(<ap=%rdi),>temp2=%edx
+mov   1776(%rdi),%edx
+
+# qhasm: mem64[ap + 1776] = temp1
+# asm 1: mov   <temp1=int64#2,1776(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1776(<ap=%rdi)
+mov   %esi,1776(%rdi)
+
+# qhasm: mem64[ap + 984] = temp2
+# asm 1: mov   <temp2=int64#3,984(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,984(<ap=%rdi)
+mov   %edx,984(%rdi)
+
+# qhasm: temp1 = mem64[ap + 988]
+# asm 1: mov   988(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   988(<ap=%rdi),>temp1=%esi
+mov   988(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3824]
+# asm 1: mov   3824(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3824(<ap=%rdi),>temp2=%edx
+mov   3824(%rdi),%edx
+
+# qhasm: mem64[ap + 3824] = temp1
+# asm 1: mov   <temp1=int64#2,3824(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3824(<ap=%rdi)
+mov   %esi,3824(%rdi)
+
+# qhasm: mem64[ap + 988] = temp2
+# asm 1: mov   <temp2=int64#3,988(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,988(<ap=%rdi)
+mov   %edx,988(%rdi)
+
+# qhasm: temp1 = mem64[ap + 996]
+# asm 1: mov   996(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   996(<ap=%rdi),>temp1=%esi
+mov   996(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2544]
+# asm 1: mov   2544(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2544(<ap=%rdi),>temp2=%edx
+mov   2544(%rdi),%edx
+
+# qhasm: mem64[ap + 2544] = temp1
+# asm 1: mov   <temp1=int64#2,2544(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2544(<ap=%rdi)
+mov   %esi,2544(%rdi)
+
+# qhasm: mem64[ap + 996] = temp2
+# asm 1: mov   <temp2=int64#3,996(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,996(<ap=%rdi)
+mov   %edx,996(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1000]
+# asm 1: mov   1000(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1000(<ap=%rdi),>temp1=%esi
+mov   1000(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1520]
+# asm 1: mov   1520(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1520(<ap=%rdi),>temp2=%edx
+mov   1520(%rdi),%edx
+
+# qhasm: mem64[ap + 1520] = temp1
+# asm 1: mov   <temp1=int64#2,1520(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1520(<ap=%rdi)
+mov   %esi,1520(%rdi)
+
+# qhasm: mem64[ap + 1000] = temp2
+# asm 1: mov   <temp2=int64#3,1000(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1000(<ap=%rdi)
+mov   %edx,1000(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1004]
+# asm 1: mov   1004(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1004(<ap=%rdi),>temp1=%esi
+mov   1004(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3568]
+# asm 1: mov   3568(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3568(<ap=%rdi),>temp2=%edx
+mov   3568(%rdi),%edx
+
+# qhasm: mem64[ap + 3568] = temp1
+# asm 1: mov   <temp1=int64#2,3568(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3568(<ap=%rdi)
+mov   %esi,3568(%rdi)
+
+# qhasm: mem64[ap + 1004] = temp2
+# asm 1: mov   <temp2=int64#3,1004(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1004(<ap=%rdi)
+mov   %edx,1004(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1012]
+# asm 1: mov   1012(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1012(<ap=%rdi),>temp1=%esi
+mov   1012(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3056]
+# asm 1: mov   3056(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3056(<ap=%rdi),>temp2=%edx
+mov   3056(%rdi),%edx
+
+# qhasm: mem64[ap + 3056] = temp1
+# asm 1: mov   <temp1=int64#2,3056(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3056(<ap=%rdi)
+mov   %esi,3056(%rdi)
+
+# qhasm: mem64[ap + 1012] = temp2
+# asm 1: mov   <temp2=int64#3,1012(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1012(<ap=%rdi)
+mov   %edx,1012(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1016]
+# asm 1: mov   1016(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1016(<ap=%rdi),>temp1=%esi
+mov   1016(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2032]
+# asm 1: mov   2032(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2032(<ap=%rdi),>temp2=%edx
+mov   2032(%rdi),%edx
+
+# qhasm: mem64[ap + 2032] = temp1
+# asm 1: mov   <temp1=int64#2,2032(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2032(<ap=%rdi)
+mov   %esi,2032(%rdi)
+
+# qhasm: mem64[ap + 1016] = temp2
+# asm 1: mov   <temp2=int64#3,1016(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1016(<ap=%rdi)
+mov   %edx,1016(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1020]
+# asm 1: mov   1020(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1020(<ap=%rdi),>temp1=%esi
+mov   1020(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4080]
+# asm 1: mov   4080(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4080(<ap=%rdi),>temp2=%edx
+mov   4080(%rdi),%edx
+
+# qhasm: mem64[ap + 4080] = temp1
+# asm 1: mov   <temp1=int64#2,4080(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4080(<ap=%rdi)
+mov   %esi,4080(%rdi)
+
+# qhasm: mem64[ap + 1020] = temp2
+# asm 1: mov   <temp2=int64#3,1020(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1020(<ap=%rdi)
+mov   %edx,1020(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1028]
+# asm 1: mov   1028(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1028(<ap=%rdi),>temp1=%esi
+mov   1028(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2056]
+# asm 1: mov   2056(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2056(<ap=%rdi),>temp2=%edx
+mov   2056(%rdi),%edx
+
+# qhasm: mem64[ap + 2056] = temp1
+# asm 1: mov   <temp1=int64#2,2056(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2056(<ap=%rdi)
+mov   %esi,2056(%rdi)
+
+# qhasm: mem64[ap + 1028] = temp2
+# asm 1: mov   <temp2=int64#3,1028(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1028(<ap=%rdi)
+mov   %edx,1028(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1036]
+# asm 1: mov   1036(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1036(<ap=%rdi),>temp1=%esi
+mov   1036(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3080]
+# asm 1: mov   3080(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3080(<ap=%rdi),>temp2=%edx
+mov   3080(%rdi),%edx
+
+# qhasm: mem64[ap + 3080] = temp1
+# asm 1: mov   <temp1=int64#2,3080(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3080(<ap=%rdi)
+mov   %esi,3080(%rdi)
+
+# qhasm: mem64[ap + 1036] = temp2
+# asm 1: mov   <temp2=int64#3,1036(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1036(<ap=%rdi)
+mov   %edx,1036(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1044]
+# asm 1: mov   1044(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1044(<ap=%rdi),>temp1=%esi
+mov   1044(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2568]
+# asm 1: mov   2568(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2568(<ap=%rdi),>temp2=%edx
+mov   2568(%rdi),%edx
+
+# qhasm: mem64[ap + 2568] = temp1
+# asm 1: mov   <temp1=int64#2,2568(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2568(<ap=%rdi)
+mov   %esi,2568(%rdi)
+
+# qhasm: mem64[ap + 1044] = temp2
+# asm 1: mov   <temp2=int64#3,1044(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1044(<ap=%rdi)
+mov   %edx,1044(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1048]
+# asm 1: mov   1048(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1048(<ap=%rdi),>temp1=%esi
+mov   1048(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1544]
+# asm 1: mov   1544(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1544(<ap=%rdi),>temp2=%edx
+mov   1544(%rdi),%edx
+
+# qhasm: mem64[ap + 1544] = temp1
+# asm 1: mov   <temp1=int64#2,1544(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1544(<ap=%rdi)
+mov   %esi,1544(%rdi)
+
+# qhasm: mem64[ap + 1048] = temp2
+# asm 1: mov   <temp2=int64#3,1048(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1048(<ap=%rdi)
+mov   %edx,1048(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1052]
+# asm 1: mov   1052(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1052(<ap=%rdi),>temp1=%esi
+mov   1052(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3592]
+# asm 1: mov   3592(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3592(<ap=%rdi),>temp2=%edx
+mov   3592(%rdi),%edx
+
+# qhasm: mem64[ap + 3592] = temp1
+# asm 1: mov   <temp1=int64#2,3592(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3592(<ap=%rdi)
+mov   %esi,3592(%rdi)
+
+# qhasm: mem64[ap + 1052] = temp2
+# asm 1: mov   <temp2=int64#3,1052(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1052(<ap=%rdi)
+mov   %edx,1052(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1060]
+# asm 1: mov   1060(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1060(<ap=%rdi),>temp1=%esi
+mov   1060(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2312]
+# asm 1: mov   2312(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2312(<ap=%rdi),>temp2=%edx
+mov   2312(%rdi),%edx
+
+# qhasm: mem64[ap + 2312] = temp1
+# asm 1: mov   <temp1=int64#2,2312(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2312(<ap=%rdi)
+mov   %esi,2312(%rdi)
+
+# qhasm: mem64[ap + 1060] = temp2
+# asm 1: mov   <temp2=int64#3,1060(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1060(<ap=%rdi)
+mov   %edx,1060(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1064]
+# asm 1: mov   1064(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1064(<ap=%rdi),>temp1=%esi
+mov   1064(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1288]
+# asm 1: mov   1288(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1288(<ap=%rdi),>temp2=%edx
+mov   1288(%rdi),%edx
+
+# qhasm: mem64[ap + 1288] = temp1
+# asm 1: mov   <temp1=int64#2,1288(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1288(<ap=%rdi)
+mov   %esi,1288(%rdi)
+
+# qhasm: mem64[ap + 1064] = temp2
+# asm 1: mov   <temp2=int64#3,1064(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1064(<ap=%rdi)
+mov   %edx,1064(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1068]
+# asm 1: mov   1068(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1068(<ap=%rdi),>temp1=%esi
+mov   1068(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3336]
+# asm 1: mov   3336(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3336(<ap=%rdi),>temp2=%edx
+mov   3336(%rdi),%edx
+
+# qhasm: mem64[ap + 3336] = temp1
+# asm 1: mov   <temp1=int64#2,3336(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3336(<ap=%rdi)
+mov   %esi,3336(%rdi)
+
+# qhasm: mem64[ap + 1068] = temp2
+# asm 1: mov   <temp2=int64#3,1068(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1068(<ap=%rdi)
+mov   %edx,1068(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1076]
+# asm 1: mov   1076(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1076(<ap=%rdi),>temp1=%esi
+mov   1076(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2824]
+# asm 1: mov   2824(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2824(<ap=%rdi),>temp2=%edx
+mov   2824(%rdi),%edx
+
+# qhasm: mem64[ap + 2824] = temp1
+# asm 1: mov   <temp1=int64#2,2824(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2824(<ap=%rdi)
+mov   %esi,2824(%rdi)
+
+# qhasm: mem64[ap + 1076] = temp2
+# asm 1: mov   <temp2=int64#3,1076(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1076(<ap=%rdi)
+mov   %edx,1076(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1080]
+# asm 1: mov   1080(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1080(<ap=%rdi),>temp1=%esi
+mov   1080(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1800]
+# asm 1: mov   1800(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1800(<ap=%rdi),>temp2=%edx
+mov   1800(%rdi),%edx
+
+# qhasm: mem64[ap + 1800] = temp1
+# asm 1: mov   <temp1=int64#2,1800(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1800(<ap=%rdi)
+mov   %esi,1800(%rdi)
+
+# qhasm: mem64[ap + 1080] = temp2
+# asm 1: mov   <temp2=int64#3,1080(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1080(<ap=%rdi)
+mov   %edx,1080(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1084]
+# asm 1: mov   1084(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1084(<ap=%rdi),>temp1=%esi
+mov   1084(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3848]
+# asm 1: mov   3848(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3848(<ap=%rdi),>temp2=%edx
+mov   3848(%rdi),%edx
+
+# qhasm: mem64[ap + 3848] = temp1
+# asm 1: mov   <temp1=int64#2,3848(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3848(<ap=%rdi)
+mov   %esi,3848(%rdi)
+
+# qhasm: mem64[ap + 1084] = temp2
+# asm 1: mov   <temp2=int64#3,1084(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1084(<ap=%rdi)
+mov   %edx,1084(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1092]
+# asm 1: mov   1092(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1092(<ap=%rdi),>temp1=%esi
+mov   1092(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2184]
+# asm 1: mov   2184(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2184(<ap=%rdi),>temp2=%edx
+mov   2184(%rdi),%edx
+
+# qhasm: mem64[ap + 2184] = temp1
+# asm 1: mov   <temp1=int64#2,2184(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2184(<ap=%rdi)
+mov   %esi,2184(%rdi)
+
+# qhasm: mem64[ap + 1092] = temp2
+# asm 1: mov   <temp2=int64#3,1092(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1092(<ap=%rdi)
+mov   %edx,1092(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1096]
+# asm 1: mov   1096(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1096(<ap=%rdi),>temp1=%esi
+mov   1096(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1160]
+# asm 1: mov   1160(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1160(<ap=%rdi),>temp2=%edx
+mov   1160(%rdi),%edx
+
+# qhasm: mem64[ap + 1160] = temp1
+# asm 1: mov   <temp1=int64#2,1160(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1160(<ap=%rdi)
+mov   %esi,1160(%rdi)
+
+# qhasm: mem64[ap + 1096] = temp2
+# asm 1: mov   <temp2=int64#3,1096(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1096(<ap=%rdi)
+mov   %edx,1096(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1100]
+# asm 1: mov   1100(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1100(<ap=%rdi),>temp1=%esi
+mov   1100(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3208]
+# asm 1: mov   3208(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3208(<ap=%rdi),>temp2=%edx
+mov   3208(%rdi),%edx
+
+# qhasm: mem64[ap + 3208] = temp1
+# asm 1: mov   <temp1=int64#2,3208(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3208(<ap=%rdi)
+mov   %esi,3208(%rdi)
+
+# qhasm: mem64[ap + 1100] = temp2
+# asm 1: mov   <temp2=int64#3,1100(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1100(<ap=%rdi)
+mov   %edx,1100(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1108]
+# asm 1: mov   1108(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1108(<ap=%rdi),>temp1=%esi
+mov   1108(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2696]
+# asm 1: mov   2696(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2696(<ap=%rdi),>temp2=%edx
+mov   2696(%rdi),%edx
+
+# qhasm: mem64[ap + 2696] = temp1
+# asm 1: mov   <temp1=int64#2,2696(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2696(<ap=%rdi)
+mov   %esi,2696(%rdi)
+
+# qhasm: mem64[ap + 1108] = temp2
+# asm 1: mov   <temp2=int64#3,1108(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1108(<ap=%rdi)
+mov   %edx,1108(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1112]
+# asm 1: mov   1112(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1112(<ap=%rdi),>temp1=%esi
+mov   1112(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1672]
+# asm 1: mov   1672(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1672(<ap=%rdi),>temp2=%edx
+mov   1672(%rdi),%edx
+
+# qhasm: mem64[ap + 1672] = temp1
+# asm 1: mov   <temp1=int64#2,1672(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1672(<ap=%rdi)
+mov   %esi,1672(%rdi)
+
+# qhasm: mem64[ap + 1112] = temp2
+# asm 1: mov   <temp2=int64#3,1112(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1112(<ap=%rdi)
+mov   %edx,1112(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1116]
+# asm 1: mov   1116(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1116(<ap=%rdi),>temp1=%esi
+mov   1116(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3720]
+# asm 1: mov   3720(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3720(<ap=%rdi),>temp2=%edx
+mov   3720(%rdi),%edx
+
+# qhasm: mem64[ap + 3720] = temp1
+# asm 1: mov   <temp1=int64#2,3720(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3720(<ap=%rdi)
+mov   %esi,3720(%rdi)
+
+# qhasm: mem64[ap + 1116] = temp2
+# asm 1: mov   <temp2=int64#3,1116(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1116(<ap=%rdi)
+mov   %edx,1116(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1124]
+# asm 1: mov   1124(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1124(<ap=%rdi),>temp1=%esi
+mov   1124(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2440]
+# asm 1: mov   2440(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2440(<ap=%rdi),>temp2=%edx
+mov   2440(%rdi),%edx
+
+# qhasm: mem64[ap + 2440] = temp1
+# asm 1: mov   <temp1=int64#2,2440(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2440(<ap=%rdi)
+mov   %esi,2440(%rdi)
+
+# qhasm: mem64[ap + 1124] = temp2
+# asm 1: mov   <temp2=int64#3,1124(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1124(<ap=%rdi)
+mov   %edx,1124(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1128]
+# asm 1: mov   1128(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1128(<ap=%rdi),>temp1=%esi
+mov   1128(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1416]
+# asm 1: mov   1416(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1416(<ap=%rdi),>temp2=%edx
+mov   1416(%rdi),%edx
+
+# qhasm: mem64[ap + 1416] = temp1
+# asm 1: mov   <temp1=int64#2,1416(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1416(<ap=%rdi)
+mov   %esi,1416(%rdi)
+
+# qhasm: mem64[ap + 1128] = temp2
+# asm 1: mov   <temp2=int64#3,1128(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1128(<ap=%rdi)
+mov   %edx,1128(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1132]
+# asm 1: mov   1132(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1132(<ap=%rdi),>temp1=%esi
+mov   1132(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3464]
+# asm 1: mov   3464(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3464(<ap=%rdi),>temp2=%edx
+mov   3464(%rdi),%edx
+
+# qhasm: mem64[ap + 3464] = temp1
+# asm 1: mov   <temp1=int64#2,3464(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3464(<ap=%rdi)
+mov   %esi,3464(%rdi)
+
+# qhasm: mem64[ap + 1132] = temp2
+# asm 1: mov   <temp2=int64#3,1132(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1132(<ap=%rdi)
+mov   %edx,1132(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1140]
+# asm 1: mov   1140(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1140(<ap=%rdi),>temp1=%esi
+mov   1140(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2952]
+# asm 1: mov   2952(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2952(<ap=%rdi),>temp2=%edx
+mov   2952(%rdi),%edx
+
+# qhasm: mem64[ap + 2952] = temp1
+# asm 1: mov   <temp1=int64#2,2952(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2952(<ap=%rdi)
+mov   %esi,2952(%rdi)
+
+# qhasm: mem64[ap + 1140] = temp2
+# asm 1: mov   <temp2=int64#3,1140(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1140(<ap=%rdi)
+mov   %edx,1140(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1144]
+# asm 1: mov   1144(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1144(<ap=%rdi),>temp1=%esi
+mov   1144(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1928]
+# asm 1: mov   1928(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1928(<ap=%rdi),>temp2=%edx
+mov   1928(%rdi),%edx
+
+# qhasm: mem64[ap + 1928] = temp1
+# asm 1: mov   <temp1=int64#2,1928(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1928(<ap=%rdi)
+mov   %esi,1928(%rdi)
+
+# qhasm: mem64[ap + 1144] = temp2
+# asm 1: mov   <temp2=int64#3,1144(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1144(<ap=%rdi)
+mov   %edx,1144(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1148]
+# asm 1: mov   1148(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1148(<ap=%rdi),>temp1=%esi
+mov   1148(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3976]
+# asm 1: mov   3976(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3976(<ap=%rdi),>temp2=%edx
+mov   3976(%rdi),%edx
+
+# qhasm: mem64[ap + 3976] = temp1
+# asm 1: mov   <temp1=int64#2,3976(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3976(<ap=%rdi)
+mov   %esi,3976(%rdi)
+
+# qhasm: mem64[ap + 1148] = temp2
+# asm 1: mov   <temp2=int64#3,1148(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1148(<ap=%rdi)
+mov   %edx,1148(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1156]
+# asm 1: mov   1156(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1156(<ap=%rdi),>temp1=%esi
+mov   1156(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2120]
+# asm 1: mov   2120(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2120(<ap=%rdi),>temp2=%edx
+mov   2120(%rdi),%edx
+
+# qhasm: mem64[ap + 2120] = temp1
+# asm 1: mov   <temp1=int64#2,2120(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2120(<ap=%rdi)
+mov   %esi,2120(%rdi)
+
+# qhasm: mem64[ap + 1156] = temp2
+# asm 1: mov   <temp2=int64#3,1156(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1156(<ap=%rdi)
+mov   %edx,1156(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1164]
+# asm 1: mov   1164(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1164(<ap=%rdi),>temp1=%esi
+mov   1164(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3144]
+# asm 1: mov   3144(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3144(<ap=%rdi),>temp2=%edx
+mov   3144(%rdi),%edx
+
+# qhasm: mem64[ap + 3144] = temp1
+# asm 1: mov   <temp1=int64#2,3144(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3144(<ap=%rdi)
+mov   %esi,3144(%rdi)
+
+# qhasm: mem64[ap + 1164] = temp2
+# asm 1: mov   <temp2=int64#3,1164(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1164(<ap=%rdi)
+mov   %edx,1164(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1172]
+# asm 1: mov   1172(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1172(<ap=%rdi),>temp1=%esi
+mov   1172(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2632]
+# asm 1: mov   2632(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2632(<ap=%rdi),>temp2=%edx
+mov   2632(%rdi),%edx
+
+# qhasm: mem64[ap + 2632] = temp1
+# asm 1: mov   <temp1=int64#2,2632(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2632(<ap=%rdi)
+mov   %esi,2632(%rdi)
+
+# qhasm: mem64[ap + 1172] = temp2
+# asm 1: mov   <temp2=int64#3,1172(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1172(<ap=%rdi)
+mov   %edx,1172(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1176]
+# asm 1: mov   1176(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1176(<ap=%rdi),>temp1=%esi
+mov   1176(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1608]
+# asm 1: mov   1608(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1608(<ap=%rdi),>temp2=%edx
+mov   1608(%rdi),%edx
+
+# qhasm: mem64[ap + 1608] = temp1
+# asm 1: mov   <temp1=int64#2,1608(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1608(<ap=%rdi)
+mov   %esi,1608(%rdi)
+
+# qhasm: mem64[ap + 1176] = temp2
+# asm 1: mov   <temp2=int64#3,1176(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1176(<ap=%rdi)
+mov   %edx,1176(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1180]
+# asm 1: mov   1180(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1180(<ap=%rdi),>temp1=%esi
+mov   1180(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3656]
+# asm 1: mov   3656(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3656(<ap=%rdi),>temp2=%edx
+mov   3656(%rdi),%edx
+
+# qhasm: mem64[ap + 3656] = temp1
+# asm 1: mov   <temp1=int64#2,3656(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3656(<ap=%rdi)
+mov   %esi,3656(%rdi)
+
+# qhasm: mem64[ap + 1180] = temp2
+# asm 1: mov   <temp2=int64#3,1180(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1180(<ap=%rdi)
+mov   %edx,1180(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1188]
+# asm 1: mov   1188(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1188(<ap=%rdi),>temp1=%esi
+mov   1188(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2376]
+# asm 1: mov   2376(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2376(<ap=%rdi),>temp2=%edx
+mov   2376(%rdi),%edx
+
+# qhasm: mem64[ap + 2376] = temp1
+# asm 1: mov   <temp1=int64#2,2376(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2376(<ap=%rdi)
+mov   %esi,2376(%rdi)
+
+# qhasm: mem64[ap + 1188] = temp2
+# asm 1: mov   <temp2=int64#3,1188(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1188(<ap=%rdi)
+mov   %edx,1188(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1192]
+# asm 1: mov   1192(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1192(<ap=%rdi),>temp1=%esi
+mov   1192(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1352]
+# asm 1: mov   1352(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1352(<ap=%rdi),>temp2=%edx
+mov   1352(%rdi),%edx
+
+# qhasm: mem64[ap + 1352] = temp1
+# asm 1: mov   <temp1=int64#2,1352(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1352(<ap=%rdi)
+mov   %esi,1352(%rdi)
+
+# qhasm: mem64[ap + 1192] = temp2
+# asm 1: mov   <temp2=int64#3,1192(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1192(<ap=%rdi)
+mov   %edx,1192(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1196]
+# asm 1: mov   1196(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1196(<ap=%rdi),>temp1=%esi
+mov   1196(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3400]
+# asm 1: mov   3400(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3400(<ap=%rdi),>temp2=%edx
+mov   3400(%rdi),%edx
+
+# qhasm: mem64[ap + 3400] = temp1
+# asm 1: mov   <temp1=int64#2,3400(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3400(<ap=%rdi)
+mov   %esi,3400(%rdi)
+
+# qhasm: mem64[ap + 1196] = temp2
+# asm 1: mov   <temp2=int64#3,1196(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1196(<ap=%rdi)
+mov   %edx,1196(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1204]
+# asm 1: mov   1204(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1204(<ap=%rdi),>temp1=%esi
+mov   1204(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2888]
+# asm 1: mov   2888(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2888(<ap=%rdi),>temp2=%edx
+mov   2888(%rdi),%edx
+
+# qhasm: mem64[ap + 2888] = temp1
+# asm 1: mov   <temp1=int64#2,2888(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2888(<ap=%rdi)
+mov   %esi,2888(%rdi)
+
+# qhasm: mem64[ap + 1204] = temp2
+# asm 1: mov   <temp2=int64#3,1204(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1204(<ap=%rdi)
+mov   %edx,1204(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1208]
+# asm 1: mov   1208(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1208(<ap=%rdi),>temp1=%esi
+mov   1208(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1864]
+# asm 1: mov   1864(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1864(<ap=%rdi),>temp2=%edx
+mov   1864(%rdi),%edx
+
+# qhasm: mem64[ap + 1864] = temp1
+# asm 1: mov   <temp1=int64#2,1864(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1864(<ap=%rdi)
+mov   %esi,1864(%rdi)
+
+# qhasm: mem64[ap + 1208] = temp2
+# asm 1: mov   <temp2=int64#3,1208(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1208(<ap=%rdi)
+mov   %edx,1208(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1212]
+# asm 1: mov   1212(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1212(<ap=%rdi),>temp1=%esi
+mov   1212(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3912]
+# asm 1: mov   3912(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3912(<ap=%rdi),>temp2=%edx
+mov   3912(%rdi),%edx
+
+# qhasm: mem64[ap + 3912] = temp1
+# asm 1: mov   <temp1=int64#2,3912(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3912(<ap=%rdi)
+mov   %esi,3912(%rdi)
+
+# qhasm: mem64[ap + 1212] = temp2
+# asm 1: mov   <temp2=int64#3,1212(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1212(<ap=%rdi)
+mov   %edx,1212(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1220]
+# asm 1: mov   1220(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1220(<ap=%rdi),>temp1=%esi
+mov   1220(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2248]
+# asm 1: mov   2248(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2248(<ap=%rdi),>temp2=%edx
+mov   2248(%rdi),%edx
+
+# qhasm: mem64[ap + 2248] = temp1
+# asm 1: mov   <temp1=int64#2,2248(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2248(<ap=%rdi)
+mov   %esi,2248(%rdi)
+
+# qhasm: mem64[ap + 1220] = temp2
+# asm 1: mov   <temp2=int64#3,1220(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1220(<ap=%rdi)
+mov   %edx,1220(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1228]
+# asm 1: mov   1228(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1228(<ap=%rdi),>temp1=%esi
+mov   1228(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3272]
+# asm 1: mov   3272(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3272(<ap=%rdi),>temp2=%edx
+mov   3272(%rdi),%edx
+
+# qhasm: mem64[ap + 3272] = temp1
+# asm 1: mov   <temp1=int64#2,3272(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3272(<ap=%rdi)
+mov   %esi,3272(%rdi)
+
+# qhasm: mem64[ap + 1228] = temp2
+# asm 1: mov   <temp2=int64#3,1228(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1228(<ap=%rdi)
+mov   %edx,1228(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1236]
+# asm 1: mov   1236(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1236(<ap=%rdi),>temp1=%esi
+mov   1236(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2760]
+# asm 1: mov   2760(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2760(<ap=%rdi),>temp2=%edx
+mov   2760(%rdi),%edx
+
+# qhasm: mem64[ap + 2760] = temp1
+# asm 1: mov   <temp1=int64#2,2760(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2760(<ap=%rdi)
+mov   %esi,2760(%rdi)
+
+# qhasm: mem64[ap + 1236] = temp2
+# asm 1: mov   <temp2=int64#3,1236(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1236(<ap=%rdi)
+mov   %edx,1236(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1240]
+# asm 1: mov   1240(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1240(<ap=%rdi),>temp1=%esi
+mov   1240(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1736]
+# asm 1: mov   1736(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1736(<ap=%rdi),>temp2=%edx
+mov   1736(%rdi),%edx
+
+# qhasm: mem64[ap + 1736] = temp1
+# asm 1: mov   <temp1=int64#2,1736(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1736(<ap=%rdi)
+mov   %esi,1736(%rdi)
+
+# qhasm: mem64[ap + 1240] = temp2
+# asm 1: mov   <temp2=int64#3,1240(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1240(<ap=%rdi)
+mov   %edx,1240(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1244]
+# asm 1: mov   1244(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1244(<ap=%rdi),>temp1=%esi
+mov   1244(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3784]
+# asm 1: mov   3784(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3784(<ap=%rdi),>temp2=%edx
+mov   3784(%rdi),%edx
+
+# qhasm: mem64[ap + 3784] = temp1
+# asm 1: mov   <temp1=int64#2,3784(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3784(<ap=%rdi)
+mov   %esi,3784(%rdi)
+
+# qhasm: mem64[ap + 1244] = temp2
+# asm 1: mov   <temp2=int64#3,1244(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1244(<ap=%rdi)
+mov   %edx,1244(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1252]
+# asm 1: mov   1252(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1252(<ap=%rdi),>temp1=%esi
+mov   1252(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2504]
+# asm 1: mov   2504(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2504(<ap=%rdi),>temp2=%edx
+mov   2504(%rdi),%edx
+
+# qhasm: mem64[ap + 2504] = temp1
+# asm 1: mov   <temp1=int64#2,2504(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2504(<ap=%rdi)
+mov   %esi,2504(%rdi)
+
+# qhasm: mem64[ap + 1252] = temp2
+# asm 1: mov   <temp2=int64#3,1252(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1252(<ap=%rdi)
+mov   %edx,1252(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1256]
+# asm 1: mov   1256(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1256(<ap=%rdi),>temp1=%esi
+mov   1256(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1480]
+# asm 1: mov   1480(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1480(<ap=%rdi),>temp2=%edx
+mov   1480(%rdi),%edx
+
+# qhasm: mem64[ap + 1480] = temp1
+# asm 1: mov   <temp1=int64#2,1480(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1480(<ap=%rdi)
+mov   %esi,1480(%rdi)
+
+# qhasm: mem64[ap + 1256] = temp2
+# asm 1: mov   <temp2=int64#3,1256(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1256(<ap=%rdi)
+mov   %edx,1256(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1260]
+# asm 1: mov   1260(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1260(<ap=%rdi),>temp1=%esi
+mov   1260(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3528]
+# asm 1: mov   3528(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3528(<ap=%rdi),>temp2=%edx
+mov   3528(%rdi),%edx
+
+# qhasm: mem64[ap + 3528] = temp1
+# asm 1: mov   <temp1=int64#2,3528(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3528(<ap=%rdi)
+mov   %esi,3528(%rdi)
+
+# qhasm: mem64[ap + 1260] = temp2
+# asm 1: mov   <temp2=int64#3,1260(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1260(<ap=%rdi)
+mov   %edx,1260(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1268]
+# asm 1: mov   1268(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1268(<ap=%rdi),>temp1=%esi
+mov   1268(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3016]
+# asm 1: mov   3016(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3016(<ap=%rdi),>temp2=%edx
+mov   3016(%rdi),%edx
+
+# qhasm: mem64[ap + 3016] = temp1
+# asm 1: mov   <temp1=int64#2,3016(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3016(<ap=%rdi)
+mov   %esi,3016(%rdi)
+
+# qhasm: mem64[ap + 1268] = temp2
+# asm 1: mov   <temp2=int64#3,1268(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1268(<ap=%rdi)
+mov   %edx,1268(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1272]
+# asm 1: mov   1272(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1272(<ap=%rdi),>temp1=%esi
+mov   1272(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1992]
+# asm 1: mov   1992(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1992(<ap=%rdi),>temp2=%edx
+mov   1992(%rdi),%edx
+
+# qhasm: mem64[ap + 1992] = temp1
+# asm 1: mov   <temp1=int64#2,1992(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1992(<ap=%rdi)
+mov   %esi,1992(%rdi)
+
+# qhasm: mem64[ap + 1272] = temp2
+# asm 1: mov   <temp2=int64#3,1272(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1272(<ap=%rdi)
+mov   %edx,1272(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1276]
+# asm 1: mov   1276(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1276(<ap=%rdi),>temp1=%esi
+mov   1276(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4040]
+# asm 1: mov   4040(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4040(<ap=%rdi),>temp2=%edx
+mov   4040(%rdi),%edx
+
+# qhasm: mem64[ap + 4040] = temp1
+# asm 1: mov   <temp1=int64#2,4040(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4040(<ap=%rdi)
+mov   %esi,4040(%rdi)
+
+# qhasm: mem64[ap + 1276] = temp2
+# asm 1: mov   <temp2=int64#3,1276(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1276(<ap=%rdi)
+mov   %edx,1276(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1284]
+# asm 1: mov   1284(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1284(<ap=%rdi),>temp1=%esi
+mov   1284(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2088]
+# asm 1: mov   2088(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2088(<ap=%rdi),>temp2=%edx
+mov   2088(%rdi),%edx
+
+# qhasm: mem64[ap + 2088] = temp1
+# asm 1: mov   <temp1=int64#2,2088(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2088(<ap=%rdi)
+mov   %esi,2088(%rdi)
+
+# qhasm: mem64[ap + 1284] = temp2
+# asm 1: mov   <temp2=int64#3,1284(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1284(<ap=%rdi)
+mov   %edx,1284(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1292]
+# asm 1: mov   1292(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1292(<ap=%rdi),>temp1=%esi
+mov   1292(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3112]
+# asm 1: mov   3112(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3112(<ap=%rdi),>temp2=%edx
+mov   3112(%rdi),%edx
+
+# qhasm: mem64[ap + 3112] = temp1
+# asm 1: mov   <temp1=int64#2,3112(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3112(<ap=%rdi)
+mov   %esi,3112(%rdi)
+
+# qhasm: mem64[ap + 1292] = temp2
+# asm 1: mov   <temp2=int64#3,1292(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1292(<ap=%rdi)
+mov   %edx,1292(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1300]
+# asm 1: mov   1300(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1300(<ap=%rdi),>temp1=%esi
+mov   1300(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2600]
+# asm 1: mov   2600(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2600(<ap=%rdi),>temp2=%edx
+mov   2600(%rdi),%edx
+
+# qhasm: mem64[ap + 2600] = temp1
+# asm 1: mov   <temp1=int64#2,2600(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2600(<ap=%rdi)
+mov   %esi,2600(%rdi)
+
+# qhasm: mem64[ap + 1300] = temp2
+# asm 1: mov   <temp2=int64#3,1300(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1300(<ap=%rdi)
+mov   %edx,1300(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1304]
+# asm 1: mov   1304(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1304(<ap=%rdi),>temp1=%esi
+mov   1304(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1576]
+# asm 1: mov   1576(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1576(<ap=%rdi),>temp2=%edx
+mov   1576(%rdi),%edx
+
+# qhasm: mem64[ap + 1576] = temp1
+# asm 1: mov   <temp1=int64#2,1576(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1576(<ap=%rdi)
+mov   %esi,1576(%rdi)
+
+# qhasm: mem64[ap + 1304] = temp2
+# asm 1: mov   <temp2=int64#3,1304(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1304(<ap=%rdi)
+mov   %edx,1304(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1308]
+# asm 1: mov   1308(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1308(<ap=%rdi),>temp1=%esi
+mov   1308(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3624]
+# asm 1: mov   3624(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3624(<ap=%rdi),>temp2=%edx
+mov   3624(%rdi),%edx
+
+# qhasm: mem64[ap + 3624] = temp1
+# asm 1: mov   <temp1=int64#2,3624(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3624(<ap=%rdi)
+mov   %esi,3624(%rdi)
+
+# qhasm: mem64[ap + 1308] = temp2
+# asm 1: mov   <temp2=int64#3,1308(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1308(<ap=%rdi)
+mov   %edx,1308(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1316]
+# asm 1: mov   1316(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1316(<ap=%rdi),>temp1=%esi
+mov   1316(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2344]
+# asm 1: mov   2344(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2344(<ap=%rdi),>temp2=%edx
+mov   2344(%rdi),%edx
+
+# qhasm: mem64[ap + 2344] = temp1
+# asm 1: mov   <temp1=int64#2,2344(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2344(<ap=%rdi)
+mov   %esi,2344(%rdi)
+
+# qhasm: mem64[ap + 1316] = temp2
+# asm 1: mov   <temp2=int64#3,1316(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1316(<ap=%rdi)
+mov   %edx,1316(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1324]
+# asm 1: mov   1324(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1324(<ap=%rdi),>temp1=%esi
+mov   1324(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3368]
+# asm 1: mov   3368(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3368(<ap=%rdi),>temp2=%edx
+mov   3368(%rdi),%edx
+
+# qhasm: mem64[ap + 3368] = temp1
+# asm 1: mov   <temp1=int64#2,3368(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3368(<ap=%rdi)
+mov   %esi,3368(%rdi)
+
+# qhasm: mem64[ap + 1324] = temp2
+# asm 1: mov   <temp2=int64#3,1324(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1324(<ap=%rdi)
+mov   %edx,1324(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1332]
+# asm 1: mov   1332(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1332(<ap=%rdi),>temp1=%esi
+mov   1332(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2856]
+# asm 1: mov   2856(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2856(<ap=%rdi),>temp2=%edx
+mov   2856(%rdi),%edx
+
+# qhasm: mem64[ap + 2856] = temp1
+# asm 1: mov   <temp1=int64#2,2856(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2856(<ap=%rdi)
+mov   %esi,2856(%rdi)
+
+# qhasm: mem64[ap + 1332] = temp2
+# asm 1: mov   <temp2=int64#3,1332(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1332(<ap=%rdi)
+mov   %edx,1332(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1336]
+# asm 1: mov   1336(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1336(<ap=%rdi),>temp1=%esi
+mov   1336(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1832]
+# asm 1: mov   1832(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1832(<ap=%rdi),>temp2=%edx
+mov   1832(%rdi),%edx
+
+# qhasm: mem64[ap + 1832] = temp1
+# asm 1: mov   <temp1=int64#2,1832(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1832(<ap=%rdi)
+mov   %esi,1832(%rdi)
+
+# qhasm: mem64[ap + 1336] = temp2
+# asm 1: mov   <temp2=int64#3,1336(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1336(<ap=%rdi)
+mov   %edx,1336(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1340]
+# asm 1: mov   1340(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1340(<ap=%rdi),>temp1=%esi
+mov   1340(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3880]
+# asm 1: mov   3880(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3880(<ap=%rdi),>temp2=%edx
+mov   3880(%rdi),%edx
+
+# qhasm: mem64[ap + 3880] = temp1
+# asm 1: mov   <temp1=int64#2,3880(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3880(<ap=%rdi)
+mov   %esi,3880(%rdi)
+
+# qhasm: mem64[ap + 1340] = temp2
+# asm 1: mov   <temp2=int64#3,1340(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1340(<ap=%rdi)
+mov   %edx,1340(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1348]
+# asm 1: mov   1348(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1348(<ap=%rdi),>temp1=%esi
+mov   1348(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2216]
+# asm 1: mov   2216(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2216(<ap=%rdi),>temp2=%edx
+mov   2216(%rdi),%edx
+
+# qhasm: mem64[ap + 2216] = temp1
+# asm 1: mov   <temp1=int64#2,2216(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2216(<ap=%rdi)
+mov   %esi,2216(%rdi)
+
+# qhasm: mem64[ap + 1348] = temp2
+# asm 1: mov   <temp2=int64#3,1348(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1348(<ap=%rdi)
+mov   %edx,1348(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1356]
+# asm 1: mov   1356(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1356(<ap=%rdi),>temp1=%esi
+mov   1356(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3240]
+# asm 1: mov   3240(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3240(<ap=%rdi),>temp2=%edx
+mov   3240(%rdi),%edx
+
+# qhasm: mem64[ap + 3240] = temp1
+# asm 1: mov   <temp1=int64#2,3240(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3240(<ap=%rdi)
+mov   %esi,3240(%rdi)
+
+# qhasm: mem64[ap + 1356] = temp2
+# asm 1: mov   <temp2=int64#3,1356(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1356(<ap=%rdi)
+mov   %edx,1356(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1364]
+# asm 1: mov   1364(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1364(<ap=%rdi),>temp1=%esi
+mov   1364(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2728]
+# asm 1: mov   2728(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2728(<ap=%rdi),>temp2=%edx
+mov   2728(%rdi),%edx
+
+# qhasm: mem64[ap + 2728] = temp1
+# asm 1: mov   <temp1=int64#2,2728(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2728(<ap=%rdi)
+mov   %esi,2728(%rdi)
+
+# qhasm: mem64[ap + 1364] = temp2
+# asm 1: mov   <temp2=int64#3,1364(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1364(<ap=%rdi)
+mov   %edx,1364(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1368]
+# asm 1: mov   1368(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1368(<ap=%rdi),>temp1=%esi
+mov   1368(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1704]
+# asm 1: mov   1704(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1704(<ap=%rdi),>temp2=%edx
+mov   1704(%rdi),%edx
+
+# qhasm: mem64[ap + 1704] = temp1
+# asm 1: mov   <temp1=int64#2,1704(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1704(<ap=%rdi)
+mov   %esi,1704(%rdi)
+
+# qhasm: mem64[ap + 1368] = temp2
+# asm 1: mov   <temp2=int64#3,1368(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1368(<ap=%rdi)
+mov   %edx,1368(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1372]
+# asm 1: mov   1372(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1372(<ap=%rdi),>temp1=%esi
+mov   1372(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3752]
+# asm 1: mov   3752(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3752(<ap=%rdi),>temp2=%edx
+mov   3752(%rdi),%edx
+
+# qhasm: mem64[ap + 3752] = temp1
+# asm 1: mov   <temp1=int64#2,3752(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3752(<ap=%rdi)
+mov   %esi,3752(%rdi)
+
+# qhasm: mem64[ap + 1372] = temp2
+# asm 1: mov   <temp2=int64#3,1372(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1372(<ap=%rdi)
+mov   %edx,1372(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1380]
+# asm 1: mov   1380(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1380(<ap=%rdi),>temp1=%esi
+mov   1380(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2472]
+# asm 1: mov   2472(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2472(<ap=%rdi),>temp2=%edx
+mov   2472(%rdi),%edx
+
+# qhasm: mem64[ap + 2472] = temp1
+# asm 1: mov   <temp1=int64#2,2472(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2472(<ap=%rdi)
+mov   %esi,2472(%rdi)
+
+# qhasm: mem64[ap + 1380] = temp2
+# asm 1: mov   <temp2=int64#3,1380(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1380(<ap=%rdi)
+mov   %edx,1380(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1384]
+# asm 1: mov   1384(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1384(<ap=%rdi),>temp1=%esi
+mov   1384(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1448]
+# asm 1: mov   1448(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1448(<ap=%rdi),>temp2=%edx
+mov   1448(%rdi),%edx
+
+# qhasm: mem64[ap + 1448] = temp1
+# asm 1: mov   <temp1=int64#2,1448(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1448(<ap=%rdi)
+mov   %esi,1448(%rdi)
+
+# qhasm: mem64[ap + 1384] = temp2
+# asm 1: mov   <temp2=int64#3,1384(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1384(<ap=%rdi)
+mov   %edx,1384(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1388]
+# asm 1: mov   1388(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1388(<ap=%rdi),>temp1=%esi
+mov   1388(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3496]
+# asm 1: mov   3496(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3496(<ap=%rdi),>temp2=%edx
+mov   3496(%rdi),%edx
+
+# qhasm: mem64[ap + 3496] = temp1
+# asm 1: mov   <temp1=int64#2,3496(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3496(<ap=%rdi)
+mov   %esi,3496(%rdi)
+
+# qhasm: mem64[ap + 1388] = temp2
+# asm 1: mov   <temp2=int64#3,1388(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1388(<ap=%rdi)
+mov   %edx,1388(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1396]
+# asm 1: mov   1396(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1396(<ap=%rdi),>temp1=%esi
+mov   1396(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2984]
+# asm 1: mov   2984(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2984(<ap=%rdi),>temp2=%edx
+mov   2984(%rdi),%edx
+
+# qhasm: mem64[ap + 2984] = temp1
+# asm 1: mov   <temp1=int64#2,2984(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2984(<ap=%rdi)
+mov   %esi,2984(%rdi)
+
+# qhasm: mem64[ap + 1396] = temp2
+# asm 1: mov   <temp2=int64#3,1396(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1396(<ap=%rdi)
+mov   %edx,1396(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1400]
+# asm 1: mov   1400(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1400(<ap=%rdi),>temp1=%esi
+mov   1400(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1960]
+# asm 1: mov   1960(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1960(<ap=%rdi),>temp2=%edx
+mov   1960(%rdi),%edx
+
+# qhasm: mem64[ap + 1960] = temp1
+# asm 1: mov   <temp1=int64#2,1960(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1960(<ap=%rdi)
+mov   %esi,1960(%rdi)
+
+# qhasm: mem64[ap + 1400] = temp2
+# asm 1: mov   <temp2=int64#3,1400(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1400(<ap=%rdi)
+mov   %edx,1400(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1404]
+# asm 1: mov   1404(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1404(<ap=%rdi),>temp1=%esi
+mov   1404(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4008]
+# asm 1: mov   4008(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4008(<ap=%rdi),>temp2=%edx
+mov   4008(%rdi),%edx
+
+# qhasm: mem64[ap + 4008] = temp1
+# asm 1: mov   <temp1=int64#2,4008(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4008(<ap=%rdi)
+mov   %esi,4008(%rdi)
+
+# qhasm: mem64[ap + 1404] = temp2
+# asm 1: mov   <temp2=int64#3,1404(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1404(<ap=%rdi)
+mov   %edx,1404(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1412]
+# asm 1: mov   1412(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1412(<ap=%rdi),>temp1=%esi
+mov   1412(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2152]
+# asm 1: mov   2152(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2152(<ap=%rdi),>temp2=%edx
+mov   2152(%rdi),%edx
+
+# qhasm: mem64[ap + 2152] = temp1
+# asm 1: mov   <temp1=int64#2,2152(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2152(<ap=%rdi)
+mov   %esi,2152(%rdi)
+
+# qhasm: mem64[ap + 1412] = temp2
+# asm 1: mov   <temp2=int64#3,1412(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1412(<ap=%rdi)
+mov   %edx,1412(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1420]
+# asm 1: mov   1420(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1420(<ap=%rdi),>temp1=%esi
+mov   1420(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3176]
+# asm 1: mov   3176(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3176(<ap=%rdi),>temp2=%edx
+mov   3176(%rdi),%edx
+
+# qhasm: mem64[ap + 3176] = temp1
+# asm 1: mov   <temp1=int64#2,3176(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3176(<ap=%rdi)
+mov   %esi,3176(%rdi)
+
+# qhasm: mem64[ap + 1420] = temp2
+# asm 1: mov   <temp2=int64#3,1420(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1420(<ap=%rdi)
+mov   %edx,1420(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1428]
+# asm 1: mov   1428(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1428(<ap=%rdi),>temp1=%esi
+mov   1428(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2664]
+# asm 1: mov   2664(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2664(<ap=%rdi),>temp2=%edx
+mov   2664(%rdi),%edx
+
+# qhasm: mem64[ap + 2664] = temp1
+# asm 1: mov   <temp1=int64#2,2664(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2664(<ap=%rdi)
+mov   %esi,2664(%rdi)
+
+# qhasm: mem64[ap + 1428] = temp2
+# asm 1: mov   <temp2=int64#3,1428(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1428(<ap=%rdi)
+mov   %edx,1428(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1432]
+# asm 1: mov   1432(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1432(<ap=%rdi),>temp1=%esi
+mov   1432(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1640]
+# asm 1: mov   1640(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1640(<ap=%rdi),>temp2=%edx
+mov   1640(%rdi),%edx
+
+# qhasm: mem64[ap + 1640] = temp1
+# asm 1: mov   <temp1=int64#2,1640(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1640(<ap=%rdi)
+mov   %esi,1640(%rdi)
+
+# qhasm: mem64[ap + 1432] = temp2
+# asm 1: mov   <temp2=int64#3,1432(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1432(<ap=%rdi)
+mov   %edx,1432(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1436]
+# asm 1: mov   1436(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1436(<ap=%rdi),>temp1=%esi
+mov   1436(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3688]
+# asm 1: mov   3688(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3688(<ap=%rdi),>temp2=%edx
+mov   3688(%rdi),%edx
+
+# qhasm: mem64[ap + 3688] = temp1
+# asm 1: mov   <temp1=int64#2,3688(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3688(<ap=%rdi)
+mov   %esi,3688(%rdi)
+
+# qhasm: mem64[ap + 1436] = temp2
+# asm 1: mov   <temp2=int64#3,1436(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1436(<ap=%rdi)
+mov   %edx,1436(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1444]
+# asm 1: mov   1444(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1444(<ap=%rdi),>temp1=%esi
+mov   1444(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2408]
+# asm 1: mov   2408(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2408(<ap=%rdi),>temp2=%edx
+mov   2408(%rdi),%edx
+
+# qhasm: mem64[ap + 2408] = temp1
+# asm 1: mov   <temp1=int64#2,2408(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2408(<ap=%rdi)
+mov   %esi,2408(%rdi)
+
+# qhasm: mem64[ap + 1444] = temp2
+# asm 1: mov   <temp2=int64#3,1444(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1444(<ap=%rdi)
+mov   %edx,1444(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1452]
+# asm 1: mov   1452(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1452(<ap=%rdi),>temp1=%esi
+mov   1452(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3432]
+# asm 1: mov   3432(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3432(<ap=%rdi),>temp2=%edx
+mov   3432(%rdi),%edx
+
+# qhasm: mem64[ap + 3432] = temp1
+# asm 1: mov   <temp1=int64#2,3432(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3432(<ap=%rdi)
+mov   %esi,3432(%rdi)
+
+# qhasm: mem64[ap + 1452] = temp2
+# asm 1: mov   <temp2=int64#3,1452(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1452(<ap=%rdi)
+mov   %edx,1452(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1460]
+# asm 1: mov   1460(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1460(<ap=%rdi),>temp1=%esi
+mov   1460(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2920]
+# asm 1: mov   2920(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2920(<ap=%rdi),>temp2=%edx
+mov   2920(%rdi),%edx
+
+# qhasm: mem64[ap + 2920] = temp1
+# asm 1: mov   <temp1=int64#2,2920(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2920(<ap=%rdi)
+mov   %esi,2920(%rdi)
+
+# qhasm: mem64[ap + 1460] = temp2
+# asm 1: mov   <temp2=int64#3,1460(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1460(<ap=%rdi)
+mov   %edx,1460(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1464]
+# asm 1: mov   1464(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1464(<ap=%rdi),>temp1=%esi
+mov   1464(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1896]
+# asm 1: mov   1896(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1896(<ap=%rdi),>temp2=%edx
+mov   1896(%rdi),%edx
+
+# qhasm: mem64[ap + 1896] = temp1
+# asm 1: mov   <temp1=int64#2,1896(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1896(<ap=%rdi)
+mov   %esi,1896(%rdi)
+
+# qhasm: mem64[ap + 1464] = temp2
+# asm 1: mov   <temp2=int64#3,1464(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1464(<ap=%rdi)
+mov   %edx,1464(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1468]
+# asm 1: mov   1468(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1468(<ap=%rdi),>temp1=%esi
+mov   1468(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3944]
+# asm 1: mov   3944(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3944(<ap=%rdi),>temp2=%edx
+mov   3944(%rdi),%edx
+
+# qhasm: mem64[ap + 3944] = temp1
+# asm 1: mov   <temp1=int64#2,3944(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3944(<ap=%rdi)
+mov   %esi,3944(%rdi)
+
+# qhasm: mem64[ap + 1468] = temp2
+# asm 1: mov   <temp2=int64#3,1468(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1468(<ap=%rdi)
+mov   %edx,1468(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1476]
+# asm 1: mov   1476(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1476(<ap=%rdi),>temp1=%esi
+mov   1476(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2280]
+# asm 1: mov   2280(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2280(<ap=%rdi),>temp2=%edx
+mov   2280(%rdi),%edx
+
+# qhasm: mem64[ap + 2280] = temp1
+# asm 1: mov   <temp1=int64#2,2280(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2280(<ap=%rdi)
+mov   %esi,2280(%rdi)
+
+# qhasm: mem64[ap + 1476] = temp2
+# asm 1: mov   <temp2=int64#3,1476(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1476(<ap=%rdi)
+mov   %edx,1476(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1484]
+# asm 1: mov   1484(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1484(<ap=%rdi),>temp1=%esi
+mov   1484(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3304]
+# asm 1: mov   3304(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3304(<ap=%rdi),>temp2=%edx
+mov   3304(%rdi),%edx
+
+# qhasm: mem64[ap + 3304] = temp1
+# asm 1: mov   <temp1=int64#2,3304(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3304(<ap=%rdi)
+mov   %esi,3304(%rdi)
+
+# qhasm: mem64[ap + 1484] = temp2
+# asm 1: mov   <temp2=int64#3,1484(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1484(<ap=%rdi)
+mov   %edx,1484(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1492]
+# asm 1: mov   1492(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1492(<ap=%rdi),>temp1=%esi
+mov   1492(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2792]
+# asm 1: mov   2792(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2792(<ap=%rdi),>temp2=%edx
+mov   2792(%rdi),%edx
+
+# qhasm: mem64[ap + 2792] = temp1
+# asm 1: mov   <temp1=int64#2,2792(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2792(<ap=%rdi)
+mov   %esi,2792(%rdi)
+
+# qhasm: mem64[ap + 1492] = temp2
+# asm 1: mov   <temp2=int64#3,1492(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1492(<ap=%rdi)
+mov   %edx,1492(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1496]
+# asm 1: mov   1496(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1496(<ap=%rdi),>temp1=%esi
+mov   1496(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1768]
+# asm 1: mov   1768(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1768(<ap=%rdi),>temp2=%edx
+mov   1768(%rdi),%edx
+
+# qhasm: mem64[ap + 1768] = temp1
+# asm 1: mov   <temp1=int64#2,1768(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1768(<ap=%rdi)
+mov   %esi,1768(%rdi)
+
+# qhasm: mem64[ap + 1496] = temp2
+# asm 1: mov   <temp2=int64#3,1496(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1496(<ap=%rdi)
+mov   %edx,1496(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1500]
+# asm 1: mov   1500(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1500(<ap=%rdi),>temp1=%esi
+mov   1500(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3816]
+# asm 1: mov   3816(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3816(<ap=%rdi),>temp2=%edx
+mov   3816(%rdi),%edx
+
+# qhasm: mem64[ap + 3816] = temp1
+# asm 1: mov   <temp1=int64#2,3816(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3816(<ap=%rdi)
+mov   %esi,3816(%rdi)
+
+# qhasm: mem64[ap + 1500] = temp2
+# asm 1: mov   <temp2=int64#3,1500(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1500(<ap=%rdi)
+mov   %edx,1500(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1508]
+# asm 1: mov   1508(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1508(<ap=%rdi),>temp1=%esi
+mov   1508(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2536]
+# asm 1: mov   2536(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2536(<ap=%rdi),>temp2=%edx
+mov   2536(%rdi),%edx
+
+# qhasm: mem64[ap + 2536] = temp1
+# asm 1: mov   <temp1=int64#2,2536(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2536(<ap=%rdi)
+mov   %esi,2536(%rdi)
+
+# qhasm: mem64[ap + 1508] = temp2
+# asm 1: mov   <temp2=int64#3,1508(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1508(<ap=%rdi)
+mov   %edx,1508(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1516]
+# asm 1: mov   1516(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1516(<ap=%rdi),>temp1=%esi
+mov   1516(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3560]
+# asm 1: mov   3560(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3560(<ap=%rdi),>temp2=%edx
+mov   3560(%rdi),%edx
+
+# qhasm: mem64[ap + 3560] = temp1
+# asm 1: mov   <temp1=int64#2,3560(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3560(<ap=%rdi)
+mov   %esi,3560(%rdi)
+
+# qhasm: mem64[ap + 1516] = temp2
+# asm 1: mov   <temp2=int64#3,1516(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1516(<ap=%rdi)
+mov   %edx,1516(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1524]
+# asm 1: mov   1524(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1524(<ap=%rdi),>temp1=%esi
+mov   1524(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3048]
+# asm 1: mov   3048(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3048(<ap=%rdi),>temp2=%edx
+mov   3048(%rdi),%edx
+
+# qhasm: mem64[ap + 3048] = temp1
+# asm 1: mov   <temp1=int64#2,3048(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3048(<ap=%rdi)
+mov   %esi,3048(%rdi)
+
+# qhasm: mem64[ap + 1524] = temp2
+# asm 1: mov   <temp2=int64#3,1524(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1524(<ap=%rdi)
+mov   %edx,1524(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1528]
+# asm 1: mov   1528(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1528(<ap=%rdi),>temp1=%esi
+mov   1528(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2024]
+# asm 1: mov   2024(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2024(<ap=%rdi),>temp2=%edx
+mov   2024(%rdi),%edx
+
+# qhasm: mem64[ap + 2024] = temp1
+# asm 1: mov   <temp1=int64#2,2024(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2024(<ap=%rdi)
+mov   %esi,2024(%rdi)
+
+# qhasm: mem64[ap + 1528] = temp2
+# asm 1: mov   <temp2=int64#3,1528(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1528(<ap=%rdi)
+mov   %edx,1528(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1532]
+# asm 1: mov   1532(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1532(<ap=%rdi),>temp1=%esi
+mov   1532(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4072]
+# asm 1: mov   4072(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4072(<ap=%rdi),>temp2=%edx
+mov   4072(%rdi),%edx
+
+# qhasm: mem64[ap + 4072] = temp1
+# asm 1: mov   <temp1=int64#2,4072(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4072(<ap=%rdi)
+mov   %esi,4072(%rdi)
+
+# qhasm: mem64[ap + 1532] = temp2
+# asm 1: mov   <temp2=int64#3,1532(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1532(<ap=%rdi)
+mov   %edx,1532(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1540]
+# asm 1: mov   1540(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1540(<ap=%rdi),>temp1=%esi
+mov   1540(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2072]
+# asm 1: mov   2072(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2072(<ap=%rdi),>temp2=%edx
+mov   2072(%rdi),%edx
+
+# qhasm: mem64[ap + 2072] = temp1
+# asm 1: mov   <temp1=int64#2,2072(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2072(<ap=%rdi)
+mov   %esi,2072(%rdi)
+
+# qhasm: mem64[ap + 1540] = temp2
+# asm 1: mov   <temp2=int64#3,1540(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1540(<ap=%rdi)
+mov   %edx,1540(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1548]
+# asm 1: mov   1548(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1548(<ap=%rdi),>temp1=%esi
+mov   1548(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3096]
+# asm 1: mov   3096(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3096(<ap=%rdi),>temp2=%edx
+mov   3096(%rdi),%edx
+
+# qhasm: mem64[ap + 3096] = temp1
+# asm 1: mov   <temp1=int64#2,3096(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3096(<ap=%rdi)
+mov   %esi,3096(%rdi)
+
+# qhasm: mem64[ap + 1548] = temp2
+# asm 1: mov   <temp2=int64#3,1548(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1548(<ap=%rdi)
+mov   %edx,1548(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1556]
+# asm 1: mov   1556(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1556(<ap=%rdi),>temp1=%esi
+mov   1556(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2584]
+# asm 1: mov   2584(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2584(<ap=%rdi),>temp2=%edx
+mov   2584(%rdi),%edx
+
+# qhasm: mem64[ap + 2584] = temp1
+# asm 1: mov   <temp1=int64#2,2584(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2584(<ap=%rdi)
+mov   %esi,2584(%rdi)
+
+# qhasm: mem64[ap + 1556] = temp2
+# asm 1: mov   <temp2=int64#3,1556(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1556(<ap=%rdi)
+mov   %edx,1556(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1564]
+# asm 1: mov   1564(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1564(<ap=%rdi),>temp1=%esi
+mov   1564(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3608]
+# asm 1: mov   3608(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3608(<ap=%rdi),>temp2=%edx
+mov   3608(%rdi),%edx
+
+# qhasm: mem64[ap + 3608] = temp1
+# asm 1: mov   <temp1=int64#2,3608(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3608(<ap=%rdi)
+mov   %esi,3608(%rdi)
+
+# qhasm: mem64[ap + 1564] = temp2
+# asm 1: mov   <temp2=int64#3,1564(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1564(<ap=%rdi)
+mov   %edx,1564(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1572]
+# asm 1: mov   1572(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1572(<ap=%rdi),>temp1=%esi
+mov   1572(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2328]
+# asm 1: mov   2328(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2328(<ap=%rdi),>temp2=%edx
+mov   2328(%rdi),%edx
+
+# qhasm: mem64[ap + 2328] = temp1
+# asm 1: mov   <temp1=int64#2,2328(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2328(<ap=%rdi)
+mov   %esi,2328(%rdi)
+
+# qhasm: mem64[ap + 1572] = temp2
+# asm 1: mov   <temp2=int64#3,1572(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1572(<ap=%rdi)
+mov   %edx,1572(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1580]
+# asm 1: mov   1580(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1580(<ap=%rdi),>temp1=%esi
+mov   1580(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3352]
+# asm 1: mov   3352(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3352(<ap=%rdi),>temp2=%edx
+mov   3352(%rdi),%edx
+
+# qhasm: mem64[ap + 3352] = temp1
+# asm 1: mov   <temp1=int64#2,3352(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3352(<ap=%rdi)
+mov   %esi,3352(%rdi)
+
+# qhasm: mem64[ap + 1580] = temp2
+# asm 1: mov   <temp2=int64#3,1580(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1580(<ap=%rdi)
+mov   %edx,1580(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1588]
+# asm 1: mov   1588(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1588(<ap=%rdi),>temp1=%esi
+mov   1588(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2840]
+# asm 1: mov   2840(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2840(<ap=%rdi),>temp2=%edx
+mov   2840(%rdi),%edx
+
+# qhasm: mem64[ap + 2840] = temp1
+# asm 1: mov   <temp1=int64#2,2840(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2840(<ap=%rdi)
+mov   %esi,2840(%rdi)
+
+# qhasm: mem64[ap + 1588] = temp2
+# asm 1: mov   <temp2=int64#3,1588(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1588(<ap=%rdi)
+mov   %edx,1588(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1592]
+# asm 1: mov   1592(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1592(<ap=%rdi),>temp1=%esi
+mov   1592(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1816]
+# asm 1: mov   1816(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1816(<ap=%rdi),>temp2=%edx
+mov   1816(%rdi),%edx
+
+# qhasm: mem64[ap + 1816] = temp1
+# asm 1: mov   <temp1=int64#2,1816(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1816(<ap=%rdi)
+mov   %esi,1816(%rdi)
+
+# qhasm: mem64[ap + 1592] = temp2
+# asm 1: mov   <temp2=int64#3,1592(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1592(<ap=%rdi)
+mov   %edx,1592(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1596]
+# asm 1: mov   1596(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1596(<ap=%rdi),>temp1=%esi
+mov   1596(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3864]
+# asm 1: mov   3864(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3864(<ap=%rdi),>temp2=%edx
+mov   3864(%rdi),%edx
+
+# qhasm: mem64[ap + 3864] = temp1
+# asm 1: mov   <temp1=int64#2,3864(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3864(<ap=%rdi)
+mov   %esi,3864(%rdi)
+
+# qhasm: mem64[ap + 1596] = temp2
+# asm 1: mov   <temp2=int64#3,1596(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1596(<ap=%rdi)
+mov   %edx,1596(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1604]
+# asm 1: mov   1604(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1604(<ap=%rdi),>temp1=%esi
+mov   1604(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2200]
+# asm 1: mov   2200(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2200(<ap=%rdi),>temp2=%edx
+mov   2200(%rdi),%edx
+
+# qhasm: mem64[ap + 2200] = temp1
+# asm 1: mov   <temp1=int64#2,2200(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2200(<ap=%rdi)
+mov   %esi,2200(%rdi)
+
+# qhasm: mem64[ap + 1604] = temp2
+# asm 1: mov   <temp2=int64#3,1604(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1604(<ap=%rdi)
+mov   %edx,1604(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1612]
+# asm 1: mov   1612(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1612(<ap=%rdi),>temp1=%esi
+mov   1612(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3224]
+# asm 1: mov   3224(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3224(<ap=%rdi),>temp2=%edx
+mov   3224(%rdi),%edx
+
+# qhasm: mem64[ap + 3224] = temp1
+# asm 1: mov   <temp1=int64#2,3224(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3224(<ap=%rdi)
+mov   %esi,3224(%rdi)
+
+# qhasm: mem64[ap + 1612] = temp2
+# asm 1: mov   <temp2=int64#3,1612(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1612(<ap=%rdi)
+mov   %edx,1612(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1620]
+# asm 1: mov   1620(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1620(<ap=%rdi),>temp1=%esi
+mov   1620(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2712]
+# asm 1: mov   2712(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2712(<ap=%rdi),>temp2=%edx
+mov   2712(%rdi),%edx
+
+# qhasm: mem64[ap + 2712] = temp1
+# asm 1: mov   <temp1=int64#2,2712(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2712(<ap=%rdi)
+mov   %esi,2712(%rdi)
+
+# qhasm: mem64[ap + 1620] = temp2
+# asm 1: mov   <temp2=int64#3,1620(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1620(<ap=%rdi)
+mov   %edx,1620(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1624]
+# asm 1: mov   1624(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1624(<ap=%rdi),>temp1=%esi
+mov   1624(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1688]
+# asm 1: mov   1688(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1688(<ap=%rdi),>temp2=%edx
+mov   1688(%rdi),%edx
+
+# qhasm: mem64[ap + 1688] = temp1
+# asm 1: mov   <temp1=int64#2,1688(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1688(<ap=%rdi)
+mov   %esi,1688(%rdi)
+
+# qhasm: mem64[ap + 1624] = temp2
+# asm 1: mov   <temp2=int64#3,1624(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1624(<ap=%rdi)
+mov   %edx,1624(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1628]
+# asm 1: mov   1628(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1628(<ap=%rdi),>temp1=%esi
+mov   1628(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3736]
+# asm 1: mov   3736(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3736(<ap=%rdi),>temp2=%edx
+mov   3736(%rdi),%edx
+
+# qhasm: mem64[ap + 3736] = temp1
+# asm 1: mov   <temp1=int64#2,3736(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3736(<ap=%rdi)
+mov   %esi,3736(%rdi)
+
+# qhasm: mem64[ap + 1628] = temp2
+# asm 1: mov   <temp2=int64#3,1628(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1628(<ap=%rdi)
+mov   %edx,1628(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1636]
+# asm 1: mov   1636(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1636(<ap=%rdi),>temp1=%esi
+mov   1636(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2456]
+# asm 1: mov   2456(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2456(<ap=%rdi),>temp2=%edx
+mov   2456(%rdi),%edx
+
+# qhasm: mem64[ap + 2456] = temp1
+# asm 1: mov   <temp1=int64#2,2456(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2456(<ap=%rdi)
+mov   %esi,2456(%rdi)
+
+# qhasm: mem64[ap + 1636] = temp2
+# asm 1: mov   <temp2=int64#3,1636(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1636(<ap=%rdi)
+mov   %edx,1636(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1644]
+# asm 1: mov   1644(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1644(<ap=%rdi),>temp1=%esi
+mov   1644(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3480]
+# asm 1: mov   3480(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3480(<ap=%rdi),>temp2=%edx
+mov   3480(%rdi),%edx
+
+# qhasm: mem64[ap + 3480] = temp1
+# asm 1: mov   <temp1=int64#2,3480(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3480(<ap=%rdi)
+mov   %esi,3480(%rdi)
+
+# qhasm: mem64[ap + 1644] = temp2
+# asm 1: mov   <temp2=int64#3,1644(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1644(<ap=%rdi)
+mov   %edx,1644(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1652]
+# asm 1: mov   1652(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1652(<ap=%rdi),>temp1=%esi
+mov   1652(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2968]
+# asm 1: mov   2968(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2968(<ap=%rdi),>temp2=%edx
+mov   2968(%rdi),%edx
+
+# qhasm: mem64[ap + 2968] = temp1
+# asm 1: mov   <temp1=int64#2,2968(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2968(<ap=%rdi)
+mov   %esi,2968(%rdi)
+
+# qhasm: mem64[ap + 1652] = temp2
+# asm 1: mov   <temp2=int64#3,1652(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1652(<ap=%rdi)
+mov   %edx,1652(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1656]
+# asm 1: mov   1656(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1656(<ap=%rdi),>temp1=%esi
+mov   1656(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1944]
+# asm 1: mov   1944(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1944(<ap=%rdi),>temp2=%edx
+mov   1944(%rdi),%edx
+
+# qhasm: mem64[ap + 1944] = temp1
+# asm 1: mov   <temp1=int64#2,1944(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1944(<ap=%rdi)
+mov   %esi,1944(%rdi)
+
+# qhasm: mem64[ap + 1656] = temp2
+# asm 1: mov   <temp2=int64#3,1656(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1656(<ap=%rdi)
+mov   %edx,1656(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1660]
+# asm 1: mov   1660(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1660(<ap=%rdi),>temp1=%esi
+mov   1660(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3992]
+# asm 1: mov   3992(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3992(<ap=%rdi),>temp2=%edx
+mov   3992(%rdi),%edx
+
+# qhasm: mem64[ap + 3992] = temp1
+# asm 1: mov   <temp1=int64#2,3992(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3992(<ap=%rdi)
+mov   %esi,3992(%rdi)
+
+# qhasm: mem64[ap + 1660] = temp2
+# asm 1: mov   <temp2=int64#3,1660(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1660(<ap=%rdi)
+mov   %edx,1660(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1668]
+# asm 1: mov   1668(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1668(<ap=%rdi),>temp1=%esi
+mov   1668(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2136]
+# asm 1: mov   2136(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2136(<ap=%rdi),>temp2=%edx
+mov   2136(%rdi),%edx
+
+# qhasm: mem64[ap + 2136] = temp1
+# asm 1: mov   <temp1=int64#2,2136(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2136(<ap=%rdi)
+mov   %esi,2136(%rdi)
+
+# qhasm: mem64[ap + 1668] = temp2
+# asm 1: mov   <temp2=int64#3,1668(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1668(<ap=%rdi)
+mov   %edx,1668(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1676]
+# asm 1: mov   1676(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1676(<ap=%rdi),>temp1=%esi
+mov   1676(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3160]
+# asm 1: mov   3160(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3160(<ap=%rdi),>temp2=%edx
+mov   3160(%rdi),%edx
+
+# qhasm: mem64[ap + 3160] = temp1
+# asm 1: mov   <temp1=int64#2,3160(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3160(<ap=%rdi)
+mov   %esi,3160(%rdi)
+
+# qhasm: mem64[ap + 1676] = temp2
+# asm 1: mov   <temp2=int64#3,1676(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1676(<ap=%rdi)
+mov   %edx,1676(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1684]
+# asm 1: mov   1684(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1684(<ap=%rdi),>temp1=%esi
+mov   1684(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2648]
+# asm 1: mov   2648(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2648(<ap=%rdi),>temp2=%edx
+mov   2648(%rdi),%edx
+
+# qhasm: mem64[ap + 2648] = temp1
+# asm 1: mov   <temp1=int64#2,2648(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2648(<ap=%rdi)
+mov   %esi,2648(%rdi)
+
+# qhasm: mem64[ap + 1684] = temp2
+# asm 1: mov   <temp2=int64#3,1684(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1684(<ap=%rdi)
+mov   %edx,1684(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1692]
+# asm 1: mov   1692(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1692(<ap=%rdi),>temp1=%esi
+mov   1692(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3672]
+# asm 1: mov   3672(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3672(<ap=%rdi),>temp2=%edx
+mov   3672(%rdi),%edx
+
+# qhasm: mem64[ap + 3672] = temp1
+# asm 1: mov   <temp1=int64#2,3672(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3672(<ap=%rdi)
+mov   %esi,3672(%rdi)
+
+# qhasm: mem64[ap + 1692] = temp2
+# asm 1: mov   <temp2=int64#3,1692(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1692(<ap=%rdi)
+mov   %edx,1692(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1700]
+# asm 1: mov   1700(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1700(<ap=%rdi),>temp1=%esi
+mov   1700(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2392]
+# asm 1: mov   2392(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2392(<ap=%rdi),>temp2=%edx
+mov   2392(%rdi),%edx
+
+# qhasm: mem64[ap + 2392] = temp1
+# asm 1: mov   <temp1=int64#2,2392(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2392(<ap=%rdi)
+mov   %esi,2392(%rdi)
+
+# qhasm: mem64[ap + 1700] = temp2
+# asm 1: mov   <temp2=int64#3,1700(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1700(<ap=%rdi)
+mov   %edx,1700(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1708]
+# asm 1: mov   1708(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1708(<ap=%rdi),>temp1=%esi
+mov   1708(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3416]
+# asm 1: mov   3416(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3416(<ap=%rdi),>temp2=%edx
+mov   3416(%rdi),%edx
+
+# qhasm: mem64[ap + 3416] = temp1
+# asm 1: mov   <temp1=int64#2,3416(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3416(<ap=%rdi)
+mov   %esi,3416(%rdi)
+
+# qhasm: mem64[ap + 1708] = temp2
+# asm 1: mov   <temp2=int64#3,1708(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1708(<ap=%rdi)
+mov   %edx,1708(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1716]
+# asm 1: mov   1716(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1716(<ap=%rdi),>temp1=%esi
+mov   1716(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2904]
+# asm 1: mov   2904(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2904(<ap=%rdi),>temp2=%edx
+mov   2904(%rdi),%edx
+
+# qhasm: mem64[ap + 2904] = temp1
+# asm 1: mov   <temp1=int64#2,2904(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2904(<ap=%rdi)
+mov   %esi,2904(%rdi)
+
+# qhasm: mem64[ap + 1716] = temp2
+# asm 1: mov   <temp2=int64#3,1716(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1716(<ap=%rdi)
+mov   %edx,1716(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1720]
+# asm 1: mov   1720(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1720(<ap=%rdi),>temp1=%esi
+mov   1720(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1880]
+# asm 1: mov   1880(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1880(<ap=%rdi),>temp2=%edx
+mov   1880(%rdi),%edx
+
+# qhasm: mem64[ap + 1880] = temp1
+# asm 1: mov   <temp1=int64#2,1880(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1880(<ap=%rdi)
+mov   %esi,1880(%rdi)
+
+# qhasm: mem64[ap + 1720] = temp2
+# asm 1: mov   <temp2=int64#3,1720(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1720(<ap=%rdi)
+mov   %edx,1720(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1724]
+# asm 1: mov   1724(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1724(<ap=%rdi),>temp1=%esi
+mov   1724(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3928]
+# asm 1: mov   3928(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3928(<ap=%rdi),>temp2=%edx
+mov   3928(%rdi),%edx
+
+# qhasm: mem64[ap + 3928] = temp1
+# asm 1: mov   <temp1=int64#2,3928(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3928(<ap=%rdi)
+mov   %esi,3928(%rdi)
+
+# qhasm: mem64[ap + 1724] = temp2
+# asm 1: mov   <temp2=int64#3,1724(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1724(<ap=%rdi)
+mov   %edx,1724(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1732]
+# asm 1: mov   1732(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1732(<ap=%rdi),>temp1=%esi
+mov   1732(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2264]
+# asm 1: mov   2264(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2264(<ap=%rdi),>temp2=%edx
+mov   2264(%rdi),%edx
+
+# qhasm: mem64[ap + 2264] = temp1
+# asm 1: mov   <temp1=int64#2,2264(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2264(<ap=%rdi)
+mov   %esi,2264(%rdi)
+
+# qhasm: mem64[ap + 1732] = temp2
+# asm 1: mov   <temp2=int64#3,1732(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1732(<ap=%rdi)
+mov   %edx,1732(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1740]
+# asm 1: mov   1740(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1740(<ap=%rdi),>temp1=%esi
+mov   1740(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3288]
+# asm 1: mov   3288(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3288(<ap=%rdi),>temp2=%edx
+mov   3288(%rdi),%edx
+
+# qhasm: mem64[ap + 3288] = temp1
+# asm 1: mov   <temp1=int64#2,3288(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3288(<ap=%rdi)
+mov   %esi,3288(%rdi)
+
+# qhasm: mem64[ap + 1740] = temp2
+# asm 1: mov   <temp2=int64#3,1740(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1740(<ap=%rdi)
+mov   %edx,1740(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1748]
+# asm 1: mov   1748(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1748(<ap=%rdi),>temp1=%esi
+mov   1748(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2776]
+# asm 1: mov   2776(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2776(<ap=%rdi),>temp2=%edx
+mov   2776(%rdi),%edx
+
+# qhasm: mem64[ap + 2776] = temp1
+# asm 1: mov   <temp1=int64#2,2776(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2776(<ap=%rdi)
+mov   %esi,2776(%rdi)
+
+# qhasm: mem64[ap + 1748] = temp2
+# asm 1: mov   <temp2=int64#3,1748(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1748(<ap=%rdi)
+mov   %edx,1748(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1756]
+# asm 1: mov   1756(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1756(<ap=%rdi),>temp1=%esi
+mov   1756(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3800]
+# asm 1: mov   3800(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3800(<ap=%rdi),>temp2=%edx
+mov   3800(%rdi),%edx
+
+# qhasm: mem64[ap + 3800] = temp1
+# asm 1: mov   <temp1=int64#2,3800(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3800(<ap=%rdi)
+mov   %esi,3800(%rdi)
+
+# qhasm: mem64[ap + 1756] = temp2
+# asm 1: mov   <temp2=int64#3,1756(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1756(<ap=%rdi)
+mov   %edx,1756(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1764]
+# asm 1: mov   1764(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1764(<ap=%rdi),>temp1=%esi
+mov   1764(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2520]
+# asm 1: mov   2520(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2520(<ap=%rdi),>temp2=%edx
+mov   2520(%rdi),%edx
+
+# qhasm: mem64[ap + 2520] = temp1
+# asm 1: mov   <temp1=int64#2,2520(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2520(<ap=%rdi)
+mov   %esi,2520(%rdi)
+
+# qhasm: mem64[ap + 1764] = temp2
+# asm 1: mov   <temp2=int64#3,1764(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1764(<ap=%rdi)
+mov   %edx,1764(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1772]
+# asm 1: mov   1772(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1772(<ap=%rdi),>temp1=%esi
+mov   1772(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3544]
+# asm 1: mov   3544(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3544(<ap=%rdi),>temp2=%edx
+mov   3544(%rdi),%edx
+
+# qhasm: mem64[ap + 3544] = temp1
+# asm 1: mov   <temp1=int64#2,3544(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3544(<ap=%rdi)
+mov   %esi,3544(%rdi)
+
+# qhasm: mem64[ap + 1772] = temp2
+# asm 1: mov   <temp2=int64#3,1772(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1772(<ap=%rdi)
+mov   %edx,1772(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1780]
+# asm 1: mov   1780(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1780(<ap=%rdi),>temp1=%esi
+mov   1780(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3032]
+# asm 1: mov   3032(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3032(<ap=%rdi),>temp2=%edx
+mov   3032(%rdi),%edx
+
+# qhasm: mem64[ap + 3032] = temp1
+# asm 1: mov   <temp1=int64#2,3032(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3032(<ap=%rdi)
+mov   %esi,3032(%rdi)
+
+# qhasm: mem64[ap + 1780] = temp2
+# asm 1: mov   <temp2=int64#3,1780(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1780(<ap=%rdi)
+mov   %edx,1780(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1784]
+# asm 1: mov   1784(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1784(<ap=%rdi),>temp1=%esi
+mov   1784(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2008]
+# asm 1: mov   2008(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2008(<ap=%rdi),>temp2=%edx
+mov   2008(%rdi),%edx
+
+# qhasm: mem64[ap + 2008] = temp1
+# asm 1: mov   <temp1=int64#2,2008(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2008(<ap=%rdi)
+mov   %esi,2008(%rdi)
+
+# qhasm: mem64[ap + 1784] = temp2
+# asm 1: mov   <temp2=int64#3,1784(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1784(<ap=%rdi)
+mov   %edx,1784(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1788]
+# asm 1: mov   1788(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1788(<ap=%rdi),>temp1=%esi
+mov   1788(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4056]
+# asm 1: mov   4056(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4056(<ap=%rdi),>temp2=%edx
+mov   4056(%rdi),%edx
+
+# qhasm: mem64[ap + 4056] = temp1
+# asm 1: mov   <temp1=int64#2,4056(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4056(<ap=%rdi)
+mov   %esi,4056(%rdi)
+
+# qhasm: mem64[ap + 1788] = temp2
+# asm 1: mov   <temp2=int64#3,1788(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1788(<ap=%rdi)
+mov   %edx,1788(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1796]
+# asm 1: mov   1796(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1796(<ap=%rdi),>temp1=%esi
+mov   1796(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2104]
+# asm 1: mov   2104(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2104(<ap=%rdi),>temp2=%edx
+mov   2104(%rdi),%edx
+
+# qhasm: mem64[ap + 2104] = temp1
+# asm 1: mov   <temp1=int64#2,2104(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2104(<ap=%rdi)
+mov   %esi,2104(%rdi)
+
+# qhasm: mem64[ap + 1796] = temp2
+# asm 1: mov   <temp2=int64#3,1796(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1796(<ap=%rdi)
+mov   %edx,1796(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1804]
+# asm 1: mov   1804(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1804(<ap=%rdi),>temp1=%esi
+mov   1804(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3128]
+# asm 1: mov   3128(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3128(<ap=%rdi),>temp2=%edx
+mov   3128(%rdi),%edx
+
+# qhasm: mem64[ap + 3128] = temp1
+# asm 1: mov   <temp1=int64#2,3128(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3128(<ap=%rdi)
+mov   %esi,3128(%rdi)
+
+# qhasm: mem64[ap + 1804] = temp2
+# asm 1: mov   <temp2=int64#3,1804(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1804(<ap=%rdi)
+mov   %edx,1804(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1812]
+# asm 1: mov   1812(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1812(<ap=%rdi),>temp1=%esi
+mov   1812(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2616]
+# asm 1: mov   2616(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2616(<ap=%rdi),>temp2=%edx
+mov   2616(%rdi),%edx
+
+# qhasm: mem64[ap + 2616] = temp1
+# asm 1: mov   <temp1=int64#2,2616(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2616(<ap=%rdi)
+mov   %esi,2616(%rdi)
+
+# qhasm: mem64[ap + 1812] = temp2
+# asm 1: mov   <temp2=int64#3,1812(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1812(<ap=%rdi)
+mov   %edx,1812(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1820]
+# asm 1: mov   1820(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1820(<ap=%rdi),>temp1=%esi
+mov   1820(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3640]
+# asm 1: mov   3640(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3640(<ap=%rdi),>temp2=%edx
+mov   3640(%rdi),%edx
+
+# qhasm: mem64[ap + 3640] = temp1
+# asm 1: mov   <temp1=int64#2,3640(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3640(<ap=%rdi)
+mov   %esi,3640(%rdi)
+
+# qhasm: mem64[ap + 1820] = temp2
+# asm 1: mov   <temp2=int64#3,1820(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1820(<ap=%rdi)
+mov   %edx,1820(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1828]
+# asm 1: mov   1828(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1828(<ap=%rdi),>temp1=%esi
+mov   1828(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2360]
+# asm 1: mov   2360(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2360(<ap=%rdi),>temp2=%edx
+mov   2360(%rdi),%edx
+
+# qhasm: mem64[ap + 2360] = temp1
+# asm 1: mov   <temp1=int64#2,2360(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2360(<ap=%rdi)
+mov   %esi,2360(%rdi)
+
+# qhasm: mem64[ap + 1828] = temp2
+# asm 1: mov   <temp2=int64#3,1828(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1828(<ap=%rdi)
+mov   %edx,1828(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1836]
+# asm 1: mov   1836(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1836(<ap=%rdi),>temp1=%esi
+mov   1836(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3384]
+# asm 1: mov   3384(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3384(<ap=%rdi),>temp2=%edx
+mov   3384(%rdi),%edx
+
+# qhasm: mem64[ap + 3384] = temp1
+# asm 1: mov   <temp1=int64#2,3384(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3384(<ap=%rdi)
+mov   %esi,3384(%rdi)
+
+# qhasm: mem64[ap + 1836] = temp2
+# asm 1: mov   <temp2=int64#3,1836(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1836(<ap=%rdi)
+mov   %edx,1836(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1844]
+# asm 1: mov   1844(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1844(<ap=%rdi),>temp1=%esi
+mov   1844(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2872]
+# asm 1: mov   2872(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2872(<ap=%rdi),>temp2=%edx
+mov   2872(%rdi),%edx
+
+# qhasm: mem64[ap + 2872] = temp1
+# asm 1: mov   <temp1=int64#2,2872(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2872(<ap=%rdi)
+mov   %esi,2872(%rdi)
+
+# qhasm: mem64[ap + 1844] = temp2
+# asm 1: mov   <temp2=int64#3,1844(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1844(<ap=%rdi)
+mov   %edx,1844(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1852]
+# asm 1: mov   1852(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1852(<ap=%rdi),>temp1=%esi
+mov   1852(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3896]
+# asm 1: mov   3896(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3896(<ap=%rdi),>temp2=%edx
+mov   3896(%rdi),%edx
+
+# qhasm: mem64[ap + 3896] = temp1
+# asm 1: mov   <temp1=int64#2,3896(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3896(<ap=%rdi)
+mov   %esi,3896(%rdi)
+
+# qhasm: mem64[ap + 1852] = temp2
+# asm 1: mov   <temp2=int64#3,1852(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1852(<ap=%rdi)
+mov   %edx,1852(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1860]
+# asm 1: mov   1860(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1860(<ap=%rdi),>temp1=%esi
+mov   1860(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2232]
+# asm 1: mov   2232(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2232(<ap=%rdi),>temp2=%edx
+mov   2232(%rdi),%edx
+
+# qhasm: mem64[ap + 2232] = temp1
+# asm 1: mov   <temp1=int64#2,2232(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2232(<ap=%rdi)
+mov   %esi,2232(%rdi)
+
+# qhasm: mem64[ap + 1860] = temp2
+# asm 1: mov   <temp2=int64#3,1860(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1860(<ap=%rdi)
+mov   %edx,1860(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1868]
+# asm 1: mov   1868(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1868(<ap=%rdi),>temp1=%esi
+mov   1868(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3256]
+# asm 1: mov   3256(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3256(<ap=%rdi),>temp2=%edx
+mov   3256(%rdi),%edx
+
+# qhasm: mem64[ap + 3256] = temp1
+# asm 1: mov   <temp1=int64#2,3256(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3256(<ap=%rdi)
+mov   %esi,3256(%rdi)
+
+# qhasm: mem64[ap + 1868] = temp2
+# asm 1: mov   <temp2=int64#3,1868(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1868(<ap=%rdi)
+mov   %edx,1868(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1876]
+# asm 1: mov   1876(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1876(<ap=%rdi),>temp1=%esi
+mov   1876(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2744]
+# asm 1: mov   2744(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2744(<ap=%rdi),>temp2=%edx
+mov   2744(%rdi),%edx
+
+# qhasm: mem64[ap + 2744] = temp1
+# asm 1: mov   <temp1=int64#2,2744(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2744(<ap=%rdi)
+mov   %esi,2744(%rdi)
+
+# qhasm: mem64[ap + 1876] = temp2
+# asm 1: mov   <temp2=int64#3,1876(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1876(<ap=%rdi)
+mov   %edx,1876(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1884]
+# asm 1: mov   1884(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1884(<ap=%rdi),>temp1=%esi
+mov   1884(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3768]
+# asm 1: mov   3768(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3768(<ap=%rdi),>temp2=%edx
+mov   3768(%rdi),%edx
+
+# qhasm: mem64[ap + 3768] = temp1
+# asm 1: mov   <temp1=int64#2,3768(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3768(<ap=%rdi)
+mov   %esi,3768(%rdi)
+
+# qhasm: mem64[ap + 1884] = temp2
+# asm 1: mov   <temp2=int64#3,1884(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1884(<ap=%rdi)
+mov   %edx,1884(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1892]
+# asm 1: mov   1892(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1892(<ap=%rdi),>temp1=%esi
+mov   1892(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2488]
+# asm 1: mov   2488(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2488(<ap=%rdi),>temp2=%edx
+mov   2488(%rdi),%edx
+
+# qhasm: mem64[ap + 2488] = temp1
+# asm 1: mov   <temp1=int64#2,2488(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2488(<ap=%rdi)
+mov   %esi,2488(%rdi)
+
+# qhasm: mem64[ap + 1892] = temp2
+# asm 1: mov   <temp2=int64#3,1892(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1892(<ap=%rdi)
+mov   %edx,1892(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1900]
+# asm 1: mov   1900(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1900(<ap=%rdi),>temp1=%esi
+mov   1900(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3512]
+# asm 1: mov   3512(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3512(<ap=%rdi),>temp2=%edx
+mov   3512(%rdi),%edx
+
+# qhasm: mem64[ap + 3512] = temp1
+# asm 1: mov   <temp1=int64#2,3512(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3512(<ap=%rdi)
+mov   %esi,3512(%rdi)
+
+# qhasm: mem64[ap + 1900] = temp2
+# asm 1: mov   <temp2=int64#3,1900(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1900(<ap=%rdi)
+mov   %edx,1900(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1908]
+# asm 1: mov   1908(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1908(<ap=%rdi),>temp1=%esi
+mov   1908(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3000]
+# asm 1: mov   3000(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3000(<ap=%rdi),>temp2=%edx
+mov   3000(%rdi),%edx
+
+# qhasm: mem64[ap + 3000] = temp1
+# asm 1: mov   <temp1=int64#2,3000(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3000(<ap=%rdi)
+mov   %esi,3000(%rdi)
+
+# qhasm: mem64[ap + 1908] = temp2
+# asm 1: mov   <temp2=int64#3,1908(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1908(<ap=%rdi)
+mov   %edx,1908(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1912]
+# asm 1: mov   1912(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1912(<ap=%rdi),>temp1=%esi
+mov   1912(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 1976]
+# asm 1: mov   1976(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   1976(<ap=%rdi),>temp2=%edx
+mov   1976(%rdi),%edx
+
+# qhasm: mem64[ap + 1976] = temp1
+# asm 1: mov   <temp1=int64#2,1976(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,1976(<ap=%rdi)
+mov   %esi,1976(%rdi)
+
+# qhasm: mem64[ap + 1912] = temp2
+# asm 1: mov   <temp2=int64#3,1912(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1912(<ap=%rdi)
+mov   %edx,1912(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1916]
+# asm 1: mov   1916(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1916(<ap=%rdi),>temp1=%esi
+mov   1916(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4024]
+# asm 1: mov   4024(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4024(<ap=%rdi),>temp2=%edx
+mov   4024(%rdi),%edx
+
+# qhasm: mem64[ap + 4024] = temp1
+# asm 1: mov   <temp1=int64#2,4024(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4024(<ap=%rdi)
+mov   %esi,4024(%rdi)
+
+# qhasm: mem64[ap + 1916] = temp2
+# asm 1: mov   <temp2=int64#3,1916(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1916(<ap=%rdi)
+mov   %edx,1916(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1924]
+# asm 1: mov   1924(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1924(<ap=%rdi),>temp1=%esi
+mov   1924(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2168]
+# asm 1: mov   2168(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2168(<ap=%rdi),>temp2=%edx
+mov   2168(%rdi),%edx
+
+# qhasm: mem64[ap + 2168] = temp1
+# asm 1: mov   <temp1=int64#2,2168(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2168(<ap=%rdi)
+mov   %esi,2168(%rdi)
+
+# qhasm: mem64[ap + 1924] = temp2
+# asm 1: mov   <temp2=int64#3,1924(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1924(<ap=%rdi)
+mov   %edx,1924(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1932]
+# asm 1: mov   1932(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1932(<ap=%rdi),>temp1=%esi
+mov   1932(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3192]
+# asm 1: mov   3192(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3192(<ap=%rdi),>temp2=%edx
+mov   3192(%rdi),%edx
+
+# qhasm: mem64[ap + 3192] = temp1
+# asm 1: mov   <temp1=int64#2,3192(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3192(<ap=%rdi)
+mov   %esi,3192(%rdi)
+
+# qhasm: mem64[ap + 1932] = temp2
+# asm 1: mov   <temp2=int64#3,1932(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1932(<ap=%rdi)
+mov   %edx,1932(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1940]
+# asm 1: mov   1940(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1940(<ap=%rdi),>temp1=%esi
+mov   1940(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2680]
+# asm 1: mov   2680(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2680(<ap=%rdi),>temp2=%edx
+mov   2680(%rdi),%edx
+
+# qhasm: mem64[ap + 2680] = temp1
+# asm 1: mov   <temp1=int64#2,2680(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2680(<ap=%rdi)
+mov   %esi,2680(%rdi)
+
+# qhasm: mem64[ap + 1940] = temp2
+# asm 1: mov   <temp2=int64#3,1940(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1940(<ap=%rdi)
+mov   %edx,1940(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1948]
+# asm 1: mov   1948(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1948(<ap=%rdi),>temp1=%esi
+mov   1948(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3704]
+# asm 1: mov   3704(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3704(<ap=%rdi),>temp2=%edx
+mov   3704(%rdi),%edx
+
+# qhasm: mem64[ap + 3704] = temp1
+# asm 1: mov   <temp1=int64#2,3704(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3704(<ap=%rdi)
+mov   %esi,3704(%rdi)
+
+# qhasm: mem64[ap + 1948] = temp2
+# asm 1: mov   <temp2=int64#3,1948(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1948(<ap=%rdi)
+mov   %edx,1948(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1956]
+# asm 1: mov   1956(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1956(<ap=%rdi),>temp1=%esi
+mov   1956(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2424]
+# asm 1: mov   2424(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2424(<ap=%rdi),>temp2=%edx
+mov   2424(%rdi),%edx
+
+# qhasm: mem64[ap + 2424] = temp1
+# asm 1: mov   <temp1=int64#2,2424(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2424(<ap=%rdi)
+mov   %esi,2424(%rdi)
+
+# qhasm: mem64[ap + 1956] = temp2
+# asm 1: mov   <temp2=int64#3,1956(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1956(<ap=%rdi)
+mov   %edx,1956(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1964]
+# asm 1: mov   1964(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1964(<ap=%rdi),>temp1=%esi
+mov   1964(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3448]
+# asm 1: mov   3448(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3448(<ap=%rdi),>temp2=%edx
+mov   3448(%rdi),%edx
+
+# qhasm: mem64[ap + 3448] = temp1
+# asm 1: mov   <temp1=int64#2,3448(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3448(<ap=%rdi)
+mov   %esi,3448(%rdi)
+
+# qhasm: mem64[ap + 1964] = temp2
+# asm 1: mov   <temp2=int64#3,1964(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1964(<ap=%rdi)
+mov   %edx,1964(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1972]
+# asm 1: mov   1972(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1972(<ap=%rdi),>temp1=%esi
+mov   1972(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2936]
+# asm 1: mov   2936(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2936(<ap=%rdi),>temp2=%edx
+mov   2936(%rdi),%edx
+
+# qhasm: mem64[ap + 2936] = temp1
+# asm 1: mov   <temp1=int64#2,2936(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2936(<ap=%rdi)
+mov   %esi,2936(%rdi)
+
+# qhasm: mem64[ap + 1972] = temp2
+# asm 1: mov   <temp2=int64#3,1972(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1972(<ap=%rdi)
+mov   %edx,1972(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1980]
+# asm 1: mov   1980(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1980(<ap=%rdi),>temp1=%esi
+mov   1980(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3960]
+# asm 1: mov   3960(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3960(<ap=%rdi),>temp2=%edx
+mov   3960(%rdi),%edx
+
+# qhasm: mem64[ap + 3960] = temp1
+# asm 1: mov   <temp1=int64#2,3960(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3960(<ap=%rdi)
+mov   %esi,3960(%rdi)
+
+# qhasm: mem64[ap + 1980] = temp2
+# asm 1: mov   <temp2=int64#3,1980(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1980(<ap=%rdi)
+mov   %edx,1980(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1988]
+# asm 1: mov   1988(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1988(<ap=%rdi),>temp1=%esi
+mov   1988(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2296]
+# asm 1: mov   2296(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2296(<ap=%rdi),>temp2=%edx
+mov   2296(%rdi),%edx
+
+# qhasm: mem64[ap + 2296] = temp1
+# asm 1: mov   <temp1=int64#2,2296(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2296(<ap=%rdi)
+mov   %esi,2296(%rdi)
+
+# qhasm: mem64[ap + 1988] = temp2
+# asm 1: mov   <temp2=int64#3,1988(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1988(<ap=%rdi)
+mov   %edx,1988(%rdi)
+
+# qhasm: temp1 = mem64[ap + 1996]
+# asm 1: mov   1996(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   1996(<ap=%rdi),>temp1=%esi
+mov   1996(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3320]
+# asm 1: mov   3320(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3320(<ap=%rdi),>temp2=%edx
+mov   3320(%rdi),%edx
+
+# qhasm: mem64[ap + 3320] = temp1
+# asm 1: mov   <temp1=int64#2,3320(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3320(<ap=%rdi)
+mov   %esi,3320(%rdi)
+
+# qhasm: mem64[ap + 1996] = temp2
+# asm 1: mov   <temp2=int64#3,1996(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,1996(<ap=%rdi)
+mov   %edx,1996(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2004]
+# asm 1: mov   2004(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2004(<ap=%rdi),>temp1=%esi
+mov   2004(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2808]
+# asm 1: mov   2808(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2808(<ap=%rdi),>temp2=%edx
+mov   2808(%rdi),%edx
+
+# qhasm: mem64[ap + 2808] = temp1
+# asm 1: mov   <temp1=int64#2,2808(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2808(<ap=%rdi)
+mov   %esi,2808(%rdi)
+
+# qhasm: mem64[ap + 2004] = temp2
+# asm 1: mov   <temp2=int64#3,2004(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2004(<ap=%rdi)
+mov   %edx,2004(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2012]
+# asm 1: mov   2012(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2012(<ap=%rdi),>temp1=%esi
+mov   2012(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3832]
+# asm 1: mov   3832(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3832(<ap=%rdi),>temp2=%edx
+mov   3832(%rdi),%edx
+
+# qhasm: mem64[ap + 3832] = temp1
+# asm 1: mov   <temp1=int64#2,3832(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3832(<ap=%rdi)
+mov   %esi,3832(%rdi)
+
+# qhasm: mem64[ap + 2012] = temp2
+# asm 1: mov   <temp2=int64#3,2012(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2012(<ap=%rdi)
+mov   %edx,2012(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2020]
+# asm 1: mov   2020(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2020(<ap=%rdi),>temp1=%esi
+mov   2020(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2552]
+# asm 1: mov   2552(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2552(<ap=%rdi),>temp2=%edx
+mov   2552(%rdi),%edx
+
+# qhasm: mem64[ap + 2552] = temp1
+# asm 1: mov   <temp1=int64#2,2552(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2552(<ap=%rdi)
+mov   %esi,2552(%rdi)
+
+# qhasm: mem64[ap + 2020] = temp2
+# asm 1: mov   <temp2=int64#3,2020(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2020(<ap=%rdi)
+mov   %edx,2020(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2028]
+# asm 1: mov   2028(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2028(<ap=%rdi),>temp1=%esi
+mov   2028(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3576]
+# asm 1: mov   3576(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3576(<ap=%rdi),>temp2=%edx
+mov   3576(%rdi),%edx
+
+# qhasm: mem64[ap + 3576] = temp1
+# asm 1: mov   <temp1=int64#2,3576(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3576(<ap=%rdi)
+mov   %esi,3576(%rdi)
+
+# qhasm: mem64[ap + 2028] = temp2
+# asm 1: mov   <temp2=int64#3,2028(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2028(<ap=%rdi)
+mov   %edx,2028(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2036]
+# asm 1: mov   2036(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2036(<ap=%rdi),>temp1=%esi
+mov   2036(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3064]
+# asm 1: mov   3064(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3064(<ap=%rdi),>temp2=%edx
+mov   3064(%rdi),%edx
+
+# qhasm: mem64[ap + 3064] = temp1
+# asm 1: mov   <temp1=int64#2,3064(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3064(<ap=%rdi)
+mov   %esi,3064(%rdi)
+
+# qhasm: mem64[ap + 2036] = temp2
+# asm 1: mov   <temp2=int64#3,2036(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2036(<ap=%rdi)
+mov   %edx,2036(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2044]
+# asm 1: mov   2044(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2044(<ap=%rdi),>temp1=%esi
+mov   2044(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4088]
+# asm 1: mov   4088(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4088(<ap=%rdi),>temp2=%edx
+mov   4088(%rdi),%edx
+
+# qhasm: mem64[ap + 4088] = temp1
+# asm 1: mov   <temp1=int64#2,4088(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4088(<ap=%rdi)
+mov   %esi,4088(%rdi)
+
+# qhasm: mem64[ap + 2044] = temp2
+# asm 1: mov   <temp2=int64#3,2044(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2044(<ap=%rdi)
+mov   %edx,2044(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2060]
+# asm 1: mov   2060(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2060(<ap=%rdi),>temp1=%esi
+mov   2060(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3076]
+# asm 1: mov   3076(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3076(<ap=%rdi),>temp2=%edx
+mov   3076(%rdi),%edx
+
+# qhasm: mem64[ap + 3076] = temp1
+# asm 1: mov   <temp1=int64#2,3076(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3076(<ap=%rdi)
+mov   %esi,3076(%rdi)
+
+# qhasm: mem64[ap + 2060] = temp2
+# asm 1: mov   <temp2=int64#3,2060(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2060(<ap=%rdi)
+mov   %edx,2060(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2068]
+# asm 1: mov   2068(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2068(<ap=%rdi),>temp1=%esi
+mov   2068(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2564]
+# asm 1: mov   2564(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2564(<ap=%rdi),>temp2=%edx
+mov   2564(%rdi),%edx
+
+# qhasm: mem64[ap + 2564] = temp1
+# asm 1: mov   <temp1=int64#2,2564(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2564(<ap=%rdi)
+mov   %esi,2564(%rdi)
+
+# qhasm: mem64[ap + 2068] = temp2
+# asm 1: mov   <temp2=int64#3,2068(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2068(<ap=%rdi)
+mov   %edx,2068(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2076]
+# asm 1: mov   2076(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2076(<ap=%rdi),>temp1=%esi
+mov   2076(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3588]
+# asm 1: mov   3588(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3588(<ap=%rdi),>temp2=%edx
+mov   3588(%rdi),%edx
+
+# qhasm: mem64[ap + 3588] = temp1
+# asm 1: mov   <temp1=int64#2,3588(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3588(<ap=%rdi)
+mov   %esi,3588(%rdi)
+
+# qhasm: mem64[ap + 2076] = temp2
+# asm 1: mov   <temp2=int64#3,2076(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2076(<ap=%rdi)
+mov   %edx,2076(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2084]
+# asm 1: mov   2084(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2084(<ap=%rdi),>temp1=%esi
+mov   2084(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2308]
+# asm 1: mov   2308(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2308(<ap=%rdi),>temp2=%edx
+mov   2308(%rdi),%edx
+
+# qhasm: mem64[ap + 2308] = temp1
+# asm 1: mov   <temp1=int64#2,2308(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2308(<ap=%rdi)
+mov   %esi,2308(%rdi)
+
+# qhasm: mem64[ap + 2084] = temp2
+# asm 1: mov   <temp2=int64#3,2084(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2084(<ap=%rdi)
+mov   %edx,2084(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2092]
+# asm 1: mov   2092(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2092(<ap=%rdi),>temp1=%esi
+mov   2092(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3332]
+# asm 1: mov   3332(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3332(<ap=%rdi),>temp2=%edx
+mov   3332(%rdi),%edx
+
+# qhasm: mem64[ap + 3332] = temp1
+# asm 1: mov   <temp1=int64#2,3332(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3332(<ap=%rdi)
+mov   %esi,3332(%rdi)
+
+# qhasm: mem64[ap + 2092] = temp2
+# asm 1: mov   <temp2=int64#3,2092(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2092(<ap=%rdi)
+mov   %edx,2092(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2100]
+# asm 1: mov   2100(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2100(<ap=%rdi),>temp1=%esi
+mov   2100(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2820]
+# asm 1: mov   2820(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2820(<ap=%rdi),>temp2=%edx
+mov   2820(%rdi),%edx
+
+# qhasm: mem64[ap + 2820] = temp1
+# asm 1: mov   <temp1=int64#2,2820(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2820(<ap=%rdi)
+mov   %esi,2820(%rdi)
+
+# qhasm: mem64[ap + 2100] = temp2
+# asm 1: mov   <temp2=int64#3,2100(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2100(<ap=%rdi)
+mov   %edx,2100(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2108]
+# asm 1: mov   2108(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2108(<ap=%rdi),>temp1=%esi
+mov   2108(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3844]
+# asm 1: mov   3844(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3844(<ap=%rdi),>temp2=%edx
+mov   3844(%rdi),%edx
+
+# qhasm: mem64[ap + 3844] = temp1
+# asm 1: mov   <temp1=int64#2,3844(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3844(<ap=%rdi)
+mov   %esi,3844(%rdi)
+
+# qhasm: mem64[ap + 2108] = temp2
+# asm 1: mov   <temp2=int64#3,2108(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2108(<ap=%rdi)
+mov   %edx,2108(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2116]
+# asm 1: mov   2116(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2116(<ap=%rdi),>temp1=%esi
+mov   2116(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2180]
+# asm 1: mov   2180(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2180(<ap=%rdi),>temp2=%edx
+mov   2180(%rdi),%edx
+
+# qhasm: mem64[ap + 2180] = temp1
+# asm 1: mov   <temp1=int64#2,2180(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2180(<ap=%rdi)
+mov   %esi,2180(%rdi)
+
+# qhasm: mem64[ap + 2116] = temp2
+# asm 1: mov   <temp2=int64#3,2116(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2116(<ap=%rdi)
+mov   %edx,2116(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2124]
+# asm 1: mov   2124(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2124(<ap=%rdi),>temp1=%esi
+mov   2124(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3204]
+# asm 1: mov   3204(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3204(<ap=%rdi),>temp2=%edx
+mov   3204(%rdi),%edx
+
+# qhasm: mem64[ap + 3204] = temp1
+# asm 1: mov   <temp1=int64#2,3204(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3204(<ap=%rdi)
+mov   %esi,3204(%rdi)
+
+# qhasm: mem64[ap + 2124] = temp2
+# asm 1: mov   <temp2=int64#3,2124(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2124(<ap=%rdi)
+mov   %edx,2124(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2132]
+# asm 1: mov   2132(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2132(<ap=%rdi),>temp1=%esi
+mov   2132(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2692]
+# asm 1: mov   2692(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2692(<ap=%rdi),>temp2=%edx
+mov   2692(%rdi),%edx
+
+# qhasm: mem64[ap + 2692] = temp1
+# asm 1: mov   <temp1=int64#2,2692(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2692(<ap=%rdi)
+mov   %esi,2692(%rdi)
+
+# qhasm: mem64[ap + 2132] = temp2
+# asm 1: mov   <temp2=int64#3,2132(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2132(<ap=%rdi)
+mov   %edx,2132(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2140]
+# asm 1: mov   2140(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2140(<ap=%rdi),>temp1=%esi
+mov   2140(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3716]
+# asm 1: mov   3716(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3716(<ap=%rdi),>temp2=%edx
+mov   3716(%rdi),%edx
+
+# qhasm: mem64[ap + 3716] = temp1
+# asm 1: mov   <temp1=int64#2,3716(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3716(<ap=%rdi)
+mov   %esi,3716(%rdi)
+
+# qhasm: mem64[ap + 2140] = temp2
+# asm 1: mov   <temp2=int64#3,2140(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2140(<ap=%rdi)
+mov   %edx,2140(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2148]
+# asm 1: mov   2148(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2148(<ap=%rdi),>temp1=%esi
+mov   2148(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2436]
+# asm 1: mov   2436(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2436(<ap=%rdi),>temp2=%edx
+mov   2436(%rdi),%edx
+
+# qhasm: mem64[ap + 2436] = temp1
+# asm 1: mov   <temp1=int64#2,2436(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2436(<ap=%rdi)
+mov   %esi,2436(%rdi)
+
+# qhasm: mem64[ap + 2148] = temp2
+# asm 1: mov   <temp2=int64#3,2148(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2148(<ap=%rdi)
+mov   %edx,2148(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2156]
+# asm 1: mov   2156(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2156(<ap=%rdi),>temp1=%esi
+mov   2156(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3460]
+# asm 1: mov   3460(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3460(<ap=%rdi),>temp2=%edx
+mov   3460(%rdi),%edx
+
+# qhasm: mem64[ap + 3460] = temp1
+# asm 1: mov   <temp1=int64#2,3460(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3460(<ap=%rdi)
+mov   %esi,3460(%rdi)
+
+# qhasm: mem64[ap + 2156] = temp2
+# asm 1: mov   <temp2=int64#3,2156(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2156(<ap=%rdi)
+mov   %edx,2156(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2164]
+# asm 1: mov   2164(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2164(<ap=%rdi),>temp1=%esi
+mov   2164(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2948]
+# asm 1: mov   2948(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2948(<ap=%rdi),>temp2=%edx
+mov   2948(%rdi),%edx
+
+# qhasm: mem64[ap + 2948] = temp1
+# asm 1: mov   <temp1=int64#2,2948(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2948(<ap=%rdi)
+mov   %esi,2948(%rdi)
+
+# qhasm: mem64[ap + 2164] = temp2
+# asm 1: mov   <temp2=int64#3,2164(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2164(<ap=%rdi)
+mov   %edx,2164(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2172]
+# asm 1: mov   2172(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2172(<ap=%rdi),>temp1=%esi
+mov   2172(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3972]
+# asm 1: mov   3972(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3972(<ap=%rdi),>temp2=%edx
+mov   3972(%rdi),%edx
+
+# qhasm: mem64[ap + 3972] = temp1
+# asm 1: mov   <temp1=int64#2,3972(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3972(<ap=%rdi)
+mov   %esi,3972(%rdi)
+
+# qhasm: mem64[ap + 2172] = temp2
+# asm 1: mov   <temp2=int64#3,2172(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2172(<ap=%rdi)
+mov   %edx,2172(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2188]
+# asm 1: mov   2188(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2188(<ap=%rdi),>temp1=%esi
+mov   2188(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3140]
+# asm 1: mov   3140(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3140(<ap=%rdi),>temp2=%edx
+mov   3140(%rdi),%edx
+
+# qhasm: mem64[ap + 3140] = temp1
+# asm 1: mov   <temp1=int64#2,3140(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3140(<ap=%rdi)
+mov   %esi,3140(%rdi)
+
+# qhasm: mem64[ap + 2188] = temp2
+# asm 1: mov   <temp2=int64#3,2188(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2188(<ap=%rdi)
+mov   %edx,2188(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2196]
+# asm 1: mov   2196(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2196(<ap=%rdi),>temp1=%esi
+mov   2196(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2628]
+# asm 1: mov   2628(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2628(<ap=%rdi),>temp2=%edx
+mov   2628(%rdi),%edx
+
+# qhasm: mem64[ap + 2628] = temp1
+# asm 1: mov   <temp1=int64#2,2628(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2628(<ap=%rdi)
+mov   %esi,2628(%rdi)
+
+# qhasm: mem64[ap + 2196] = temp2
+# asm 1: mov   <temp2=int64#3,2196(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2196(<ap=%rdi)
+mov   %edx,2196(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2204]
+# asm 1: mov   2204(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2204(<ap=%rdi),>temp1=%esi
+mov   2204(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3652]
+# asm 1: mov   3652(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3652(<ap=%rdi),>temp2=%edx
+mov   3652(%rdi),%edx
+
+# qhasm: mem64[ap + 3652] = temp1
+# asm 1: mov   <temp1=int64#2,3652(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3652(<ap=%rdi)
+mov   %esi,3652(%rdi)
+
+# qhasm: mem64[ap + 2204] = temp2
+# asm 1: mov   <temp2=int64#3,2204(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2204(<ap=%rdi)
+mov   %edx,2204(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2212]
+# asm 1: mov   2212(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2212(<ap=%rdi),>temp1=%esi
+mov   2212(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2372]
+# asm 1: mov   2372(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2372(<ap=%rdi),>temp2=%edx
+mov   2372(%rdi),%edx
+
+# qhasm: mem64[ap + 2372] = temp1
+# asm 1: mov   <temp1=int64#2,2372(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2372(<ap=%rdi)
+mov   %esi,2372(%rdi)
+
+# qhasm: mem64[ap + 2212] = temp2
+# asm 1: mov   <temp2=int64#3,2212(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2212(<ap=%rdi)
+mov   %edx,2212(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2220]
+# asm 1: mov   2220(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2220(<ap=%rdi),>temp1=%esi
+mov   2220(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3396]
+# asm 1: mov   3396(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3396(<ap=%rdi),>temp2=%edx
+mov   3396(%rdi),%edx
+
+# qhasm: mem64[ap + 3396] = temp1
+# asm 1: mov   <temp1=int64#2,3396(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3396(<ap=%rdi)
+mov   %esi,3396(%rdi)
+
+# qhasm: mem64[ap + 2220] = temp2
+# asm 1: mov   <temp2=int64#3,2220(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2220(<ap=%rdi)
+mov   %edx,2220(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2228]
+# asm 1: mov   2228(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2228(<ap=%rdi),>temp1=%esi
+mov   2228(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2884]
+# asm 1: mov   2884(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2884(<ap=%rdi),>temp2=%edx
+mov   2884(%rdi),%edx
+
+# qhasm: mem64[ap + 2884] = temp1
+# asm 1: mov   <temp1=int64#2,2884(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2884(<ap=%rdi)
+mov   %esi,2884(%rdi)
+
+# qhasm: mem64[ap + 2228] = temp2
+# asm 1: mov   <temp2=int64#3,2228(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2228(<ap=%rdi)
+mov   %edx,2228(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2236]
+# asm 1: mov   2236(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2236(<ap=%rdi),>temp1=%esi
+mov   2236(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3908]
+# asm 1: mov   3908(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3908(<ap=%rdi),>temp2=%edx
+mov   3908(%rdi),%edx
+
+# qhasm: mem64[ap + 3908] = temp1
+# asm 1: mov   <temp1=int64#2,3908(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3908(<ap=%rdi)
+mov   %esi,3908(%rdi)
+
+# qhasm: mem64[ap + 2236] = temp2
+# asm 1: mov   <temp2=int64#3,2236(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2236(<ap=%rdi)
+mov   %edx,2236(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2252]
+# asm 1: mov   2252(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2252(<ap=%rdi),>temp1=%esi
+mov   2252(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3268]
+# asm 1: mov   3268(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3268(<ap=%rdi),>temp2=%edx
+mov   3268(%rdi),%edx
+
+# qhasm: mem64[ap + 3268] = temp1
+# asm 1: mov   <temp1=int64#2,3268(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3268(<ap=%rdi)
+mov   %esi,3268(%rdi)
+
+# qhasm: mem64[ap + 2252] = temp2
+# asm 1: mov   <temp2=int64#3,2252(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2252(<ap=%rdi)
+mov   %edx,2252(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2260]
+# asm 1: mov   2260(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2260(<ap=%rdi),>temp1=%esi
+mov   2260(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2756]
+# asm 1: mov   2756(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2756(<ap=%rdi),>temp2=%edx
+mov   2756(%rdi),%edx
+
+# qhasm: mem64[ap + 2756] = temp1
+# asm 1: mov   <temp1=int64#2,2756(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2756(<ap=%rdi)
+mov   %esi,2756(%rdi)
+
+# qhasm: mem64[ap + 2260] = temp2
+# asm 1: mov   <temp2=int64#3,2260(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2260(<ap=%rdi)
+mov   %edx,2260(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2268]
+# asm 1: mov   2268(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2268(<ap=%rdi),>temp1=%esi
+mov   2268(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3780]
+# asm 1: mov   3780(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3780(<ap=%rdi),>temp2=%edx
+mov   3780(%rdi),%edx
+
+# qhasm: mem64[ap + 3780] = temp1
+# asm 1: mov   <temp1=int64#2,3780(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3780(<ap=%rdi)
+mov   %esi,3780(%rdi)
+
+# qhasm: mem64[ap + 2268] = temp2
+# asm 1: mov   <temp2=int64#3,2268(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2268(<ap=%rdi)
+mov   %edx,2268(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2276]
+# asm 1: mov   2276(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2276(<ap=%rdi),>temp1=%esi
+mov   2276(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2500]
+# asm 1: mov   2500(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2500(<ap=%rdi),>temp2=%edx
+mov   2500(%rdi),%edx
+
+# qhasm: mem64[ap + 2500] = temp1
+# asm 1: mov   <temp1=int64#2,2500(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2500(<ap=%rdi)
+mov   %esi,2500(%rdi)
+
+# qhasm: mem64[ap + 2276] = temp2
+# asm 1: mov   <temp2=int64#3,2276(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2276(<ap=%rdi)
+mov   %edx,2276(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2284]
+# asm 1: mov   2284(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2284(<ap=%rdi),>temp1=%esi
+mov   2284(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3524]
+# asm 1: mov   3524(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3524(<ap=%rdi),>temp2=%edx
+mov   3524(%rdi),%edx
+
+# qhasm: mem64[ap + 3524] = temp1
+# asm 1: mov   <temp1=int64#2,3524(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3524(<ap=%rdi)
+mov   %esi,3524(%rdi)
+
+# qhasm: mem64[ap + 2284] = temp2
+# asm 1: mov   <temp2=int64#3,2284(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2284(<ap=%rdi)
+mov   %edx,2284(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2292]
+# asm 1: mov   2292(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2292(<ap=%rdi),>temp1=%esi
+mov   2292(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3012]
+# asm 1: mov   3012(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3012(<ap=%rdi),>temp2=%edx
+mov   3012(%rdi),%edx
+
+# qhasm: mem64[ap + 3012] = temp1
+# asm 1: mov   <temp1=int64#2,3012(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3012(<ap=%rdi)
+mov   %esi,3012(%rdi)
+
+# qhasm: mem64[ap + 2292] = temp2
+# asm 1: mov   <temp2=int64#3,2292(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2292(<ap=%rdi)
+mov   %edx,2292(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2300]
+# asm 1: mov   2300(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2300(<ap=%rdi),>temp1=%esi
+mov   2300(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4036]
+# asm 1: mov   4036(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4036(<ap=%rdi),>temp2=%edx
+mov   4036(%rdi),%edx
+
+# qhasm: mem64[ap + 4036] = temp1
+# asm 1: mov   <temp1=int64#2,4036(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4036(<ap=%rdi)
+mov   %esi,4036(%rdi)
+
+# qhasm: mem64[ap + 2300] = temp2
+# asm 1: mov   <temp2=int64#3,2300(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2300(<ap=%rdi)
+mov   %edx,2300(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2316]
+# asm 1: mov   2316(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2316(<ap=%rdi),>temp1=%esi
+mov   2316(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3108]
+# asm 1: mov   3108(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3108(<ap=%rdi),>temp2=%edx
+mov   3108(%rdi),%edx
+
+# qhasm: mem64[ap + 3108] = temp1
+# asm 1: mov   <temp1=int64#2,3108(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3108(<ap=%rdi)
+mov   %esi,3108(%rdi)
+
+# qhasm: mem64[ap + 2316] = temp2
+# asm 1: mov   <temp2=int64#3,2316(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2316(<ap=%rdi)
+mov   %edx,2316(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2324]
+# asm 1: mov   2324(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2324(<ap=%rdi),>temp1=%esi
+mov   2324(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2596]
+# asm 1: mov   2596(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2596(<ap=%rdi),>temp2=%edx
+mov   2596(%rdi),%edx
+
+# qhasm: mem64[ap + 2596] = temp1
+# asm 1: mov   <temp1=int64#2,2596(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2596(<ap=%rdi)
+mov   %esi,2596(%rdi)
+
+# qhasm: mem64[ap + 2324] = temp2
+# asm 1: mov   <temp2=int64#3,2324(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2324(<ap=%rdi)
+mov   %edx,2324(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2332]
+# asm 1: mov   2332(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2332(<ap=%rdi),>temp1=%esi
+mov   2332(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3620]
+# asm 1: mov   3620(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3620(<ap=%rdi),>temp2=%edx
+mov   3620(%rdi),%edx
+
+# qhasm: mem64[ap + 3620] = temp1
+# asm 1: mov   <temp1=int64#2,3620(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3620(<ap=%rdi)
+mov   %esi,3620(%rdi)
+
+# qhasm: mem64[ap + 2332] = temp2
+# asm 1: mov   <temp2=int64#3,2332(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2332(<ap=%rdi)
+mov   %edx,2332(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2348]
+# asm 1: mov   2348(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2348(<ap=%rdi),>temp1=%esi
+mov   2348(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3364]
+# asm 1: mov   3364(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3364(<ap=%rdi),>temp2=%edx
+mov   3364(%rdi),%edx
+
+# qhasm: mem64[ap + 3364] = temp1
+# asm 1: mov   <temp1=int64#2,3364(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3364(<ap=%rdi)
+mov   %esi,3364(%rdi)
+
+# qhasm: mem64[ap + 2348] = temp2
+# asm 1: mov   <temp2=int64#3,2348(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2348(<ap=%rdi)
+mov   %edx,2348(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2356]
+# asm 1: mov   2356(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2356(<ap=%rdi),>temp1=%esi
+mov   2356(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2852]
+# asm 1: mov   2852(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2852(<ap=%rdi),>temp2=%edx
+mov   2852(%rdi),%edx
+
+# qhasm: mem64[ap + 2852] = temp1
+# asm 1: mov   <temp1=int64#2,2852(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2852(<ap=%rdi)
+mov   %esi,2852(%rdi)
+
+# qhasm: mem64[ap + 2356] = temp2
+# asm 1: mov   <temp2=int64#3,2356(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2356(<ap=%rdi)
+mov   %edx,2356(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2364]
+# asm 1: mov   2364(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2364(<ap=%rdi),>temp1=%esi
+mov   2364(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3876]
+# asm 1: mov   3876(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3876(<ap=%rdi),>temp2=%edx
+mov   3876(%rdi),%edx
+
+# qhasm: mem64[ap + 3876] = temp1
+# asm 1: mov   <temp1=int64#2,3876(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3876(<ap=%rdi)
+mov   %esi,3876(%rdi)
+
+# qhasm: mem64[ap + 2364] = temp2
+# asm 1: mov   <temp2=int64#3,2364(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2364(<ap=%rdi)
+mov   %edx,2364(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2380]
+# asm 1: mov   2380(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2380(<ap=%rdi),>temp1=%esi
+mov   2380(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3236]
+# asm 1: mov   3236(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3236(<ap=%rdi),>temp2=%edx
+mov   3236(%rdi),%edx
+
+# qhasm: mem64[ap + 3236] = temp1
+# asm 1: mov   <temp1=int64#2,3236(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3236(<ap=%rdi)
+mov   %esi,3236(%rdi)
+
+# qhasm: mem64[ap + 2380] = temp2
+# asm 1: mov   <temp2=int64#3,2380(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2380(<ap=%rdi)
+mov   %edx,2380(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2388]
+# asm 1: mov   2388(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2388(<ap=%rdi),>temp1=%esi
+mov   2388(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2724]
+# asm 1: mov   2724(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2724(<ap=%rdi),>temp2=%edx
+mov   2724(%rdi),%edx
+
+# qhasm: mem64[ap + 2724] = temp1
+# asm 1: mov   <temp1=int64#2,2724(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2724(<ap=%rdi)
+mov   %esi,2724(%rdi)
+
+# qhasm: mem64[ap + 2388] = temp2
+# asm 1: mov   <temp2=int64#3,2388(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2388(<ap=%rdi)
+mov   %edx,2388(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2396]
+# asm 1: mov   2396(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2396(<ap=%rdi),>temp1=%esi
+mov   2396(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3748]
+# asm 1: mov   3748(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3748(<ap=%rdi),>temp2=%edx
+mov   3748(%rdi),%edx
+
+# qhasm: mem64[ap + 3748] = temp1
+# asm 1: mov   <temp1=int64#2,3748(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3748(<ap=%rdi)
+mov   %esi,3748(%rdi)
+
+# qhasm: mem64[ap + 2396] = temp2
+# asm 1: mov   <temp2=int64#3,2396(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2396(<ap=%rdi)
+mov   %edx,2396(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2404]
+# asm 1: mov   2404(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2404(<ap=%rdi),>temp1=%esi
+mov   2404(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2468]
+# asm 1: mov   2468(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2468(<ap=%rdi),>temp2=%edx
+mov   2468(%rdi),%edx
+
+# qhasm: mem64[ap + 2468] = temp1
+# asm 1: mov   <temp1=int64#2,2468(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2468(<ap=%rdi)
+mov   %esi,2468(%rdi)
+
+# qhasm: mem64[ap + 2404] = temp2
+# asm 1: mov   <temp2=int64#3,2404(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2404(<ap=%rdi)
+mov   %edx,2404(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2412]
+# asm 1: mov   2412(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2412(<ap=%rdi),>temp1=%esi
+mov   2412(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3492]
+# asm 1: mov   3492(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3492(<ap=%rdi),>temp2=%edx
+mov   3492(%rdi),%edx
+
+# qhasm: mem64[ap + 3492] = temp1
+# asm 1: mov   <temp1=int64#2,3492(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3492(<ap=%rdi)
+mov   %esi,3492(%rdi)
+
+# qhasm: mem64[ap + 2412] = temp2
+# asm 1: mov   <temp2=int64#3,2412(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2412(<ap=%rdi)
+mov   %edx,2412(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2420]
+# asm 1: mov   2420(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2420(<ap=%rdi),>temp1=%esi
+mov   2420(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2980]
+# asm 1: mov   2980(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2980(<ap=%rdi),>temp2=%edx
+mov   2980(%rdi),%edx
+
+# qhasm: mem64[ap + 2980] = temp1
+# asm 1: mov   <temp1=int64#2,2980(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2980(<ap=%rdi)
+mov   %esi,2980(%rdi)
+
+# qhasm: mem64[ap + 2420] = temp2
+# asm 1: mov   <temp2=int64#3,2420(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2420(<ap=%rdi)
+mov   %edx,2420(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2428]
+# asm 1: mov   2428(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2428(<ap=%rdi),>temp1=%esi
+mov   2428(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4004]
+# asm 1: mov   4004(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4004(<ap=%rdi),>temp2=%edx
+mov   4004(%rdi),%edx
+
+# qhasm: mem64[ap + 4004] = temp1
+# asm 1: mov   <temp1=int64#2,4004(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4004(<ap=%rdi)
+mov   %esi,4004(%rdi)
+
+# qhasm: mem64[ap + 2428] = temp2
+# asm 1: mov   <temp2=int64#3,2428(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2428(<ap=%rdi)
+mov   %edx,2428(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2444]
+# asm 1: mov   2444(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2444(<ap=%rdi),>temp1=%esi
+mov   2444(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3172]
+# asm 1: mov   3172(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3172(<ap=%rdi),>temp2=%edx
+mov   3172(%rdi),%edx
+
+# qhasm: mem64[ap + 3172] = temp1
+# asm 1: mov   <temp1=int64#2,3172(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3172(<ap=%rdi)
+mov   %esi,3172(%rdi)
+
+# qhasm: mem64[ap + 2444] = temp2
+# asm 1: mov   <temp2=int64#3,2444(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2444(<ap=%rdi)
+mov   %edx,2444(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2452]
+# asm 1: mov   2452(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2452(<ap=%rdi),>temp1=%esi
+mov   2452(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2660]
+# asm 1: mov   2660(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2660(<ap=%rdi),>temp2=%edx
+mov   2660(%rdi),%edx
+
+# qhasm: mem64[ap + 2660] = temp1
+# asm 1: mov   <temp1=int64#2,2660(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2660(<ap=%rdi)
+mov   %esi,2660(%rdi)
+
+# qhasm: mem64[ap + 2452] = temp2
+# asm 1: mov   <temp2=int64#3,2452(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2452(<ap=%rdi)
+mov   %edx,2452(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2460]
+# asm 1: mov   2460(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2460(<ap=%rdi),>temp1=%esi
+mov   2460(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3684]
+# asm 1: mov   3684(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3684(<ap=%rdi),>temp2=%edx
+mov   3684(%rdi),%edx
+
+# qhasm: mem64[ap + 3684] = temp1
+# asm 1: mov   <temp1=int64#2,3684(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3684(<ap=%rdi)
+mov   %esi,3684(%rdi)
+
+# qhasm: mem64[ap + 2460] = temp2
+# asm 1: mov   <temp2=int64#3,2460(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2460(<ap=%rdi)
+mov   %edx,2460(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2476]
+# asm 1: mov   2476(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2476(<ap=%rdi),>temp1=%esi
+mov   2476(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3428]
+# asm 1: mov   3428(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3428(<ap=%rdi),>temp2=%edx
+mov   3428(%rdi),%edx
+
+# qhasm: mem64[ap + 3428] = temp1
+# asm 1: mov   <temp1=int64#2,3428(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3428(<ap=%rdi)
+mov   %esi,3428(%rdi)
+
+# qhasm: mem64[ap + 2476] = temp2
+# asm 1: mov   <temp2=int64#3,2476(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2476(<ap=%rdi)
+mov   %edx,2476(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2484]
+# asm 1: mov   2484(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2484(<ap=%rdi),>temp1=%esi
+mov   2484(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2916]
+# asm 1: mov   2916(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2916(<ap=%rdi),>temp2=%edx
+mov   2916(%rdi),%edx
+
+# qhasm: mem64[ap + 2916] = temp1
+# asm 1: mov   <temp1=int64#2,2916(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2916(<ap=%rdi)
+mov   %esi,2916(%rdi)
+
+# qhasm: mem64[ap + 2484] = temp2
+# asm 1: mov   <temp2=int64#3,2484(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2484(<ap=%rdi)
+mov   %edx,2484(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2492]
+# asm 1: mov   2492(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2492(<ap=%rdi),>temp1=%esi
+mov   2492(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3940]
+# asm 1: mov   3940(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3940(<ap=%rdi),>temp2=%edx
+mov   3940(%rdi),%edx
+
+# qhasm: mem64[ap + 3940] = temp1
+# asm 1: mov   <temp1=int64#2,3940(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3940(<ap=%rdi)
+mov   %esi,3940(%rdi)
+
+# qhasm: mem64[ap + 2492] = temp2
+# asm 1: mov   <temp2=int64#3,2492(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2492(<ap=%rdi)
+mov   %edx,2492(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2508]
+# asm 1: mov   2508(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2508(<ap=%rdi),>temp1=%esi
+mov   2508(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3300]
+# asm 1: mov   3300(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3300(<ap=%rdi),>temp2=%edx
+mov   3300(%rdi),%edx
+
+# qhasm: mem64[ap + 3300] = temp1
+# asm 1: mov   <temp1=int64#2,3300(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3300(<ap=%rdi)
+mov   %esi,3300(%rdi)
+
+# qhasm: mem64[ap + 2508] = temp2
+# asm 1: mov   <temp2=int64#3,2508(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2508(<ap=%rdi)
+mov   %edx,2508(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2516]
+# asm 1: mov   2516(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2516(<ap=%rdi),>temp1=%esi
+mov   2516(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2788]
+# asm 1: mov   2788(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2788(<ap=%rdi),>temp2=%edx
+mov   2788(%rdi),%edx
+
+# qhasm: mem64[ap + 2788] = temp1
+# asm 1: mov   <temp1=int64#2,2788(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2788(<ap=%rdi)
+mov   %esi,2788(%rdi)
+
+# qhasm: mem64[ap + 2516] = temp2
+# asm 1: mov   <temp2=int64#3,2516(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2516(<ap=%rdi)
+mov   %edx,2516(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2524]
+# asm 1: mov   2524(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2524(<ap=%rdi),>temp1=%esi
+mov   2524(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3812]
+# asm 1: mov   3812(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3812(<ap=%rdi),>temp2=%edx
+mov   3812(%rdi),%edx
+
+# qhasm: mem64[ap + 3812] = temp1
+# asm 1: mov   <temp1=int64#2,3812(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3812(<ap=%rdi)
+mov   %esi,3812(%rdi)
+
+# qhasm: mem64[ap + 2524] = temp2
+# asm 1: mov   <temp2=int64#3,2524(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2524(<ap=%rdi)
+mov   %edx,2524(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2540]
+# asm 1: mov   2540(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2540(<ap=%rdi),>temp1=%esi
+mov   2540(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3556]
+# asm 1: mov   3556(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3556(<ap=%rdi),>temp2=%edx
+mov   3556(%rdi),%edx
+
+# qhasm: mem64[ap + 3556] = temp1
+# asm 1: mov   <temp1=int64#2,3556(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3556(<ap=%rdi)
+mov   %esi,3556(%rdi)
+
+# qhasm: mem64[ap + 2540] = temp2
+# asm 1: mov   <temp2=int64#3,2540(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2540(<ap=%rdi)
+mov   %edx,2540(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2548]
+# asm 1: mov   2548(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2548(<ap=%rdi),>temp1=%esi
+mov   2548(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3044]
+# asm 1: mov   3044(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3044(<ap=%rdi),>temp2=%edx
+mov   3044(%rdi),%edx
+
+# qhasm: mem64[ap + 3044] = temp1
+# asm 1: mov   <temp1=int64#2,3044(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3044(<ap=%rdi)
+mov   %esi,3044(%rdi)
+
+# qhasm: mem64[ap + 2548] = temp2
+# asm 1: mov   <temp2=int64#3,2548(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2548(<ap=%rdi)
+mov   %edx,2548(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2556]
+# asm 1: mov   2556(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2556(<ap=%rdi),>temp1=%esi
+mov   2556(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4068]
+# asm 1: mov   4068(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4068(<ap=%rdi),>temp2=%edx
+mov   4068(%rdi),%edx
+
+# qhasm: mem64[ap + 4068] = temp1
+# asm 1: mov   <temp1=int64#2,4068(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4068(<ap=%rdi)
+mov   %esi,4068(%rdi)
+
+# qhasm: mem64[ap + 2556] = temp2
+# asm 1: mov   <temp2=int64#3,2556(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2556(<ap=%rdi)
+mov   %edx,2556(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2572]
+# asm 1: mov   2572(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2572(<ap=%rdi),>temp1=%esi
+mov   2572(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3092]
+# asm 1: mov   3092(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3092(<ap=%rdi),>temp2=%edx
+mov   3092(%rdi),%edx
+
+# qhasm: mem64[ap + 3092] = temp1
+# asm 1: mov   <temp1=int64#2,3092(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3092(<ap=%rdi)
+mov   %esi,3092(%rdi)
+
+# qhasm: mem64[ap + 2572] = temp2
+# asm 1: mov   <temp2=int64#3,2572(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2572(<ap=%rdi)
+mov   %edx,2572(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2588]
+# asm 1: mov   2588(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2588(<ap=%rdi),>temp1=%esi
+mov   2588(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3604]
+# asm 1: mov   3604(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3604(<ap=%rdi),>temp2=%edx
+mov   3604(%rdi),%edx
+
+# qhasm: mem64[ap + 3604] = temp1
+# asm 1: mov   <temp1=int64#2,3604(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3604(<ap=%rdi)
+mov   %esi,3604(%rdi)
+
+# qhasm: mem64[ap + 2588] = temp2
+# asm 1: mov   <temp2=int64#3,2588(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2588(<ap=%rdi)
+mov   %edx,2588(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2604]
+# asm 1: mov   2604(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2604(<ap=%rdi),>temp1=%esi
+mov   2604(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3348]
+# asm 1: mov   3348(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3348(<ap=%rdi),>temp2=%edx
+mov   3348(%rdi),%edx
+
+# qhasm: mem64[ap + 3348] = temp1
+# asm 1: mov   <temp1=int64#2,3348(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3348(<ap=%rdi)
+mov   %esi,3348(%rdi)
+
+# qhasm: mem64[ap + 2604] = temp2
+# asm 1: mov   <temp2=int64#3,2604(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2604(<ap=%rdi)
+mov   %edx,2604(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2612]
+# asm 1: mov   2612(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2612(<ap=%rdi),>temp1=%esi
+mov   2612(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2836]
+# asm 1: mov   2836(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2836(<ap=%rdi),>temp2=%edx
+mov   2836(%rdi),%edx
+
+# qhasm: mem64[ap + 2836] = temp1
+# asm 1: mov   <temp1=int64#2,2836(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2836(<ap=%rdi)
+mov   %esi,2836(%rdi)
+
+# qhasm: mem64[ap + 2612] = temp2
+# asm 1: mov   <temp2=int64#3,2612(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2612(<ap=%rdi)
+mov   %edx,2612(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2620]
+# asm 1: mov   2620(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2620(<ap=%rdi),>temp1=%esi
+mov   2620(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3860]
+# asm 1: mov   3860(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3860(<ap=%rdi),>temp2=%edx
+mov   3860(%rdi),%edx
+
+# qhasm: mem64[ap + 3860] = temp1
+# asm 1: mov   <temp1=int64#2,3860(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3860(<ap=%rdi)
+mov   %esi,3860(%rdi)
+
+# qhasm: mem64[ap + 2620] = temp2
+# asm 1: mov   <temp2=int64#3,2620(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2620(<ap=%rdi)
+mov   %edx,2620(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2636]
+# asm 1: mov   2636(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2636(<ap=%rdi),>temp1=%esi
+mov   2636(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3220]
+# asm 1: mov   3220(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3220(<ap=%rdi),>temp2=%edx
+mov   3220(%rdi),%edx
+
+# qhasm: mem64[ap + 3220] = temp1
+# asm 1: mov   <temp1=int64#2,3220(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3220(<ap=%rdi)
+mov   %esi,3220(%rdi)
+
+# qhasm: mem64[ap + 2636] = temp2
+# asm 1: mov   <temp2=int64#3,2636(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2636(<ap=%rdi)
+mov   %edx,2636(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2644]
+# asm 1: mov   2644(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2644(<ap=%rdi),>temp1=%esi
+mov   2644(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2708]
+# asm 1: mov   2708(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2708(<ap=%rdi),>temp2=%edx
+mov   2708(%rdi),%edx
+
+# qhasm: mem64[ap + 2708] = temp1
+# asm 1: mov   <temp1=int64#2,2708(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2708(<ap=%rdi)
+mov   %esi,2708(%rdi)
+
+# qhasm: mem64[ap + 2644] = temp2
+# asm 1: mov   <temp2=int64#3,2644(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2644(<ap=%rdi)
+mov   %edx,2644(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2652]
+# asm 1: mov   2652(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2652(<ap=%rdi),>temp1=%esi
+mov   2652(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3732]
+# asm 1: mov   3732(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3732(<ap=%rdi),>temp2=%edx
+mov   3732(%rdi),%edx
+
+# qhasm: mem64[ap + 3732] = temp1
+# asm 1: mov   <temp1=int64#2,3732(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3732(<ap=%rdi)
+mov   %esi,3732(%rdi)
+
+# qhasm: mem64[ap + 2652] = temp2
+# asm 1: mov   <temp2=int64#3,2652(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2652(<ap=%rdi)
+mov   %edx,2652(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2668]
+# asm 1: mov   2668(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2668(<ap=%rdi),>temp1=%esi
+mov   2668(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3476]
+# asm 1: mov   3476(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3476(<ap=%rdi),>temp2=%edx
+mov   3476(%rdi),%edx
+
+# qhasm: mem64[ap + 3476] = temp1
+# asm 1: mov   <temp1=int64#2,3476(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3476(<ap=%rdi)
+mov   %esi,3476(%rdi)
+
+# qhasm: mem64[ap + 2668] = temp2
+# asm 1: mov   <temp2=int64#3,2668(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2668(<ap=%rdi)
+mov   %edx,2668(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2676]
+# asm 1: mov   2676(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2676(<ap=%rdi),>temp1=%esi
+mov   2676(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2964]
+# asm 1: mov   2964(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2964(<ap=%rdi),>temp2=%edx
+mov   2964(%rdi),%edx
+
+# qhasm: mem64[ap + 2964] = temp1
+# asm 1: mov   <temp1=int64#2,2964(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2964(<ap=%rdi)
+mov   %esi,2964(%rdi)
+
+# qhasm: mem64[ap + 2676] = temp2
+# asm 1: mov   <temp2=int64#3,2676(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2676(<ap=%rdi)
+mov   %edx,2676(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2684]
+# asm 1: mov   2684(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2684(<ap=%rdi),>temp1=%esi
+mov   2684(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3988]
+# asm 1: mov   3988(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3988(<ap=%rdi),>temp2=%edx
+mov   3988(%rdi),%edx
+
+# qhasm: mem64[ap + 3988] = temp1
+# asm 1: mov   <temp1=int64#2,3988(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3988(<ap=%rdi)
+mov   %esi,3988(%rdi)
+
+# qhasm: mem64[ap + 2684] = temp2
+# asm 1: mov   <temp2=int64#3,2684(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2684(<ap=%rdi)
+mov   %edx,2684(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2700]
+# asm 1: mov   2700(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2700(<ap=%rdi),>temp1=%esi
+mov   2700(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3156]
+# asm 1: mov   3156(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3156(<ap=%rdi),>temp2=%edx
+mov   3156(%rdi),%edx
+
+# qhasm: mem64[ap + 3156] = temp1
+# asm 1: mov   <temp1=int64#2,3156(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3156(<ap=%rdi)
+mov   %esi,3156(%rdi)
+
+# qhasm: mem64[ap + 2700] = temp2
+# asm 1: mov   <temp2=int64#3,2700(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2700(<ap=%rdi)
+mov   %edx,2700(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2716]
+# asm 1: mov   2716(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2716(<ap=%rdi),>temp1=%esi
+mov   2716(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3668]
+# asm 1: mov   3668(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3668(<ap=%rdi),>temp2=%edx
+mov   3668(%rdi),%edx
+
+# qhasm: mem64[ap + 3668] = temp1
+# asm 1: mov   <temp1=int64#2,3668(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3668(<ap=%rdi)
+mov   %esi,3668(%rdi)
+
+# qhasm: mem64[ap + 2716] = temp2
+# asm 1: mov   <temp2=int64#3,2716(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2716(<ap=%rdi)
+mov   %edx,2716(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2732]
+# asm 1: mov   2732(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2732(<ap=%rdi),>temp1=%esi
+mov   2732(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3412]
+# asm 1: mov   3412(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3412(<ap=%rdi),>temp2=%edx
+mov   3412(%rdi),%edx
+
+# qhasm: mem64[ap + 3412] = temp1
+# asm 1: mov   <temp1=int64#2,3412(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3412(<ap=%rdi)
+mov   %esi,3412(%rdi)
+
+# qhasm: mem64[ap + 2732] = temp2
+# asm 1: mov   <temp2=int64#3,2732(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2732(<ap=%rdi)
+mov   %edx,2732(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2740]
+# asm 1: mov   2740(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2740(<ap=%rdi),>temp1=%esi
+mov   2740(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2900]
+# asm 1: mov   2900(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2900(<ap=%rdi),>temp2=%edx
+mov   2900(%rdi),%edx
+
+# qhasm: mem64[ap + 2900] = temp1
+# asm 1: mov   <temp1=int64#2,2900(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2900(<ap=%rdi)
+mov   %esi,2900(%rdi)
+
+# qhasm: mem64[ap + 2740] = temp2
+# asm 1: mov   <temp2=int64#3,2740(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2740(<ap=%rdi)
+mov   %edx,2740(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2748]
+# asm 1: mov   2748(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2748(<ap=%rdi),>temp1=%esi
+mov   2748(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3924]
+# asm 1: mov   3924(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3924(<ap=%rdi),>temp2=%edx
+mov   3924(%rdi),%edx
+
+# qhasm: mem64[ap + 3924] = temp1
+# asm 1: mov   <temp1=int64#2,3924(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3924(<ap=%rdi)
+mov   %esi,3924(%rdi)
+
+# qhasm: mem64[ap + 2748] = temp2
+# asm 1: mov   <temp2=int64#3,2748(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2748(<ap=%rdi)
+mov   %edx,2748(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2764]
+# asm 1: mov   2764(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2764(<ap=%rdi),>temp1=%esi
+mov   2764(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3284]
+# asm 1: mov   3284(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3284(<ap=%rdi),>temp2=%edx
+mov   3284(%rdi),%edx
+
+# qhasm: mem64[ap + 3284] = temp1
+# asm 1: mov   <temp1=int64#2,3284(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3284(<ap=%rdi)
+mov   %esi,3284(%rdi)
+
+# qhasm: mem64[ap + 2764] = temp2
+# asm 1: mov   <temp2=int64#3,2764(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2764(<ap=%rdi)
+mov   %edx,2764(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2780]
+# asm 1: mov   2780(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2780(<ap=%rdi),>temp1=%esi
+mov   2780(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3796]
+# asm 1: mov   3796(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3796(<ap=%rdi),>temp2=%edx
+mov   3796(%rdi),%edx
+
+# qhasm: mem64[ap + 3796] = temp1
+# asm 1: mov   <temp1=int64#2,3796(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3796(<ap=%rdi)
+mov   %esi,3796(%rdi)
+
+# qhasm: mem64[ap + 2780] = temp2
+# asm 1: mov   <temp2=int64#3,2780(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2780(<ap=%rdi)
+mov   %edx,2780(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2796]
+# asm 1: mov   2796(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2796(<ap=%rdi),>temp1=%esi
+mov   2796(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3540]
+# asm 1: mov   3540(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3540(<ap=%rdi),>temp2=%edx
+mov   3540(%rdi),%edx
+
+# qhasm: mem64[ap + 3540] = temp1
+# asm 1: mov   <temp1=int64#2,3540(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3540(<ap=%rdi)
+mov   %esi,3540(%rdi)
+
+# qhasm: mem64[ap + 2796] = temp2
+# asm 1: mov   <temp2=int64#3,2796(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2796(<ap=%rdi)
+mov   %edx,2796(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2804]
+# asm 1: mov   2804(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2804(<ap=%rdi),>temp1=%esi
+mov   2804(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3028]
+# asm 1: mov   3028(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3028(<ap=%rdi),>temp2=%edx
+mov   3028(%rdi),%edx
+
+# qhasm: mem64[ap + 3028] = temp1
+# asm 1: mov   <temp1=int64#2,3028(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3028(<ap=%rdi)
+mov   %esi,3028(%rdi)
+
+# qhasm: mem64[ap + 2804] = temp2
+# asm 1: mov   <temp2=int64#3,2804(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2804(<ap=%rdi)
+mov   %edx,2804(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2812]
+# asm 1: mov   2812(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2812(<ap=%rdi),>temp1=%esi
+mov   2812(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4052]
+# asm 1: mov   4052(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4052(<ap=%rdi),>temp2=%edx
+mov   4052(%rdi),%edx
+
+# qhasm: mem64[ap + 4052] = temp1
+# asm 1: mov   <temp1=int64#2,4052(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4052(<ap=%rdi)
+mov   %esi,4052(%rdi)
+
+# qhasm: mem64[ap + 2812] = temp2
+# asm 1: mov   <temp2=int64#3,2812(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2812(<ap=%rdi)
+mov   %edx,2812(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2828]
+# asm 1: mov   2828(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2828(<ap=%rdi),>temp1=%esi
+mov   2828(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3124]
+# asm 1: mov   3124(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3124(<ap=%rdi),>temp2=%edx
+mov   3124(%rdi),%edx
+
+# qhasm: mem64[ap + 3124] = temp1
+# asm 1: mov   <temp1=int64#2,3124(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3124(<ap=%rdi)
+mov   %esi,3124(%rdi)
+
+# qhasm: mem64[ap + 2828] = temp2
+# asm 1: mov   <temp2=int64#3,2828(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2828(<ap=%rdi)
+mov   %edx,2828(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2844]
+# asm 1: mov   2844(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2844(<ap=%rdi),>temp1=%esi
+mov   2844(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3636]
+# asm 1: mov   3636(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3636(<ap=%rdi),>temp2=%edx
+mov   3636(%rdi),%edx
+
+# qhasm: mem64[ap + 3636] = temp1
+# asm 1: mov   <temp1=int64#2,3636(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3636(<ap=%rdi)
+mov   %esi,3636(%rdi)
+
+# qhasm: mem64[ap + 2844] = temp2
+# asm 1: mov   <temp2=int64#3,2844(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2844(<ap=%rdi)
+mov   %edx,2844(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2860]
+# asm 1: mov   2860(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2860(<ap=%rdi),>temp1=%esi
+mov   2860(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3380]
+# asm 1: mov   3380(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3380(<ap=%rdi),>temp2=%edx
+mov   3380(%rdi),%edx
+
+# qhasm: mem64[ap + 3380] = temp1
+# asm 1: mov   <temp1=int64#2,3380(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3380(<ap=%rdi)
+mov   %esi,3380(%rdi)
+
+# qhasm: mem64[ap + 2860] = temp2
+# asm 1: mov   <temp2=int64#3,2860(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2860(<ap=%rdi)
+mov   %edx,2860(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2876]
+# asm 1: mov   2876(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2876(<ap=%rdi),>temp1=%esi
+mov   2876(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3892]
+# asm 1: mov   3892(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3892(<ap=%rdi),>temp2=%edx
+mov   3892(%rdi),%edx
+
+# qhasm: mem64[ap + 3892] = temp1
+# asm 1: mov   <temp1=int64#2,3892(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3892(<ap=%rdi)
+mov   %esi,3892(%rdi)
+
+# qhasm: mem64[ap + 2876] = temp2
+# asm 1: mov   <temp2=int64#3,2876(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2876(<ap=%rdi)
+mov   %edx,2876(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2892]
+# asm 1: mov   2892(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2892(<ap=%rdi),>temp1=%esi
+mov   2892(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3252]
+# asm 1: mov   3252(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3252(<ap=%rdi),>temp2=%edx
+mov   3252(%rdi),%edx
+
+# qhasm: mem64[ap + 3252] = temp1
+# asm 1: mov   <temp1=int64#2,3252(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3252(<ap=%rdi)
+mov   %esi,3252(%rdi)
+
+# qhasm: mem64[ap + 2892] = temp2
+# asm 1: mov   <temp2=int64#3,2892(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2892(<ap=%rdi)
+mov   %edx,2892(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2908]
+# asm 1: mov   2908(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2908(<ap=%rdi),>temp1=%esi
+mov   2908(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3764]
+# asm 1: mov   3764(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3764(<ap=%rdi),>temp2=%edx
+mov   3764(%rdi),%edx
+
+# qhasm: mem64[ap + 3764] = temp1
+# asm 1: mov   <temp1=int64#2,3764(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3764(<ap=%rdi)
+mov   %esi,3764(%rdi)
+
+# qhasm: mem64[ap + 2908] = temp2
+# asm 1: mov   <temp2=int64#3,2908(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2908(<ap=%rdi)
+mov   %edx,2908(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2924]
+# asm 1: mov   2924(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2924(<ap=%rdi),>temp1=%esi
+mov   2924(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3508]
+# asm 1: mov   3508(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3508(<ap=%rdi),>temp2=%edx
+mov   3508(%rdi),%edx
+
+# qhasm: mem64[ap + 3508] = temp1
+# asm 1: mov   <temp1=int64#2,3508(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3508(<ap=%rdi)
+mov   %esi,3508(%rdi)
+
+# qhasm: mem64[ap + 2924] = temp2
+# asm 1: mov   <temp2=int64#3,2924(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2924(<ap=%rdi)
+mov   %edx,2924(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2932]
+# asm 1: mov   2932(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2932(<ap=%rdi),>temp1=%esi
+mov   2932(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 2996]
+# asm 1: mov   2996(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   2996(<ap=%rdi),>temp2=%edx
+mov   2996(%rdi),%edx
+
+# qhasm: mem64[ap + 2996] = temp1
+# asm 1: mov   <temp1=int64#2,2996(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,2996(<ap=%rdi)
+mov   %esi,2996(%rdi)
+
+# qhasm: mem64[ap + 2932] = temp2
+# asm 1: mov   <temp2=int64#3,2932(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2932(<ap=%rdi)
+mov   %edx,2932(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2940]
+# asm 1: mov   2940(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2940(<ap=%rdi),>temp1=%esi
+mov   2940(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4020]
+# asm 1: mov   4020(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4020(<ap=%rdi),>temp2=%edx
+mov   4020(%rdi),%edx
+
+# qhasm: mem64[ap + 4020] = temp1
+# asm 1: mov   <temp1=int64#2,4020(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4020(<ap=%rdi)
+mov   %esi,4020(%rdi)
+
+# qhasm: mem64[ap + 2940] = temp2
+# asm 1: mov   <temp2=int64#3,2940(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2940(<ap=%rdi)
+mov   %edx,2940(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2956]
+# asm 1: mov   2956(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2956(<ap=%rdi),>temp1=%esi
+mov   2956(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3188]
+# asm 1: mov   3188(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3188(<ap=%rdi),>temp2=%edx
+mov   3188(%rdi),%edx
+
+# qhasm: mem64[ap + 3188] = temp1
+# asm 1: mov   <temp1=int64#2,3188(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3188(<ap=%rdi)
+mov   %esi,3188(%rdi)
+
+# qhasm: mem64[ap + 2956] = temp2
+# asm 1: mov   <temp2=int64#3,2956(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2956(<ap=%rdi)
+mov   %edx,2956(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2972]
+# asm 1: mov   2972(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2972(<ap=%rdi),>temp1=%esi
+mov   2972(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3700]
+# asm 1: mov   3700(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3700(<ap=%rdi),>temp2=%edx
+mov   3700(%rdi),%edx
+
+# qhasm: mem64[ap + 3700] = temp1
+# asm 1: mov   <temp1=int64#2,3700(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3700(<ap=%rdi)
+mov   %esi,3700(%rdi)
+
+# qhasm: mem64[ap + 2972] = temp2
+# asm 1: mov   <temp2=int64#3,2972(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2972(<ap=%rdi)
+mov   %edx,2972(%rdi)
+
+# qhasm: temp1 = mem64[ap + 2988]
+# asm 1: mov   2988(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   2988(<ap=%rdi),>temp1=%esi
+mov   2988(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3444]
+# asm 1: mov   3444(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3444(<ap=%rdi),>temp2=%edx
+mov   3444(%rdi),%edx
+
+# qhasm: mem64[ap + 3444] = temp1
+# asm 1: mov   <temp1=int64#2,3444(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3444(<ap=%rdi)
+mov   %esi,3444(%rdi)
+
+# qhasm: mem64[ap + 2988] = temp2
+# asm 1: mov   <temp2=int64#3,2988(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,2988(<ap=%rdi)
+mov   %edx,2988(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3004]
+# asm 1: mov   3004(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3004(<ap=%rdi),>temp1=%esi
+mov   3004(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3956]
+# asm 1: mov   3956(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3956(<ap=%rdi),>temp2=%edx
+mov   3956(%rdi),%edx
+
+# qhasm: mem64[ap + 3956] = temp1
+# asm 1: mov   <temp1=int64#2,3956(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3956(<ap=%rdi)
+mov   %esi,3956(%rdi)
+
+# qhasm: mem64[ap + 3004] = temp2
+# asm 1: mov   <temp2=int64#3,3004(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3004(<ap=%rdi)
+mov   %edx,3004(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3020]
+# asm 1: mov   3020(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3020(<ap=%rdi),>temp1=%esi
+mov   3020(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3316]
+# asm 1: mov   3316(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3316(<ap=%rdi),>temp2=%edx
+mov   3316(%rdi),%edx
+
+# qhasm: mem64[ap + 3316] = temp1
+# asm 1: mov   <temp1=int64#2,3316(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3316(<ap=%rdi)
+mov   %esi,3316(%rdi)
+
+# qhasm: mem64[ap + 3020] = temp2
+# asm 1: mov   <temp2=int64#3,3020(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3020(<ap=%rdi)
+mov   %edx,3020(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3036]
+# asm 1: mov   3036(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3036(<ap=%rdi),>temp1=%esi
+mov   3036(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3828]
+# asm 1: mov   3828(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3828(<ap=%rdi),>temp2=%edx
+mov   3828(%rdi),%edx
+
+# qhasm: mem64[ap + 3828] = temp1
+# asm 1: mov   <temp1=int64#2,3828(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3828(<ap=%rdi)
+mov   %esi,3828(%rdi)
+
+# qhasm: mem64[ap + 3036] = temp2
+# asm 1: mov   <temp2=int64#3,3036(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3036(<ap=%rdi)
+mov   %edx,3036(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3052]
+# asm 1: mov   3052(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3052(<ap=%rdi),>temp1=%esi
+mov   3052(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3572]
+# asm 1: mov   3572(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3572(<ap=%rdi),>temp2=%edx
+mov   3572(%rdi),%edx
+
+# qhasm: mem64[ap + 3572] = temp1
+# asm 1: mov   <temp1=int64#2,3572(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3572(<ap=%rdi)
+mov   %esi,3572(%rdi)
+
+# qhasm: mem64[ap + 3052] = temp2
+# asm 1: mov   <temp2=int64#3,3052(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3052(<ap=%rdi)
+mov   %edx,3052(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3068]
+# asm 1: mov   3068(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3068(<ap=%rdi),>temp1=%esi
+mov   3068(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4084]
+# asm 1: mov   4084(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4084(<ap=%rdi),>temp2=%edx
+mov   4084(%rdi),%edx
+
+# qhasm: mem64[ap + 4084] = temp1
+# asm 1: mov   <temp1=int64#2,4084(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4084(<ap=%rdi)
+mov   %esi,4084(%rdi)
+
+# qhasm: mem64[ap + 3068] = temp2
+# asm 1: mov   <temp2=int64#3,3068(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3068(<ap=%rdi)
+mov   %edx,3068(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3100]
+# asm 1: mov   3100(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3100(<ap=%rdi),>temp1=%esi
+mov   3100(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3596]
+# asm 1: mov   3596(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3596(<ap=%rdi),>temp2=%edx
+mov   3596(%rdi),%edx
+
+# qhasm: mem64[ap + 3596] = temp1
+# asm 1: mov   <temp1=int64#2,3596(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3596(<ap=%rdi)
+mov   %esi,3596(%rdi)
+
+# qhasm: mem64[ap + 3100] = temp2
+# asm 1: mov   <temp2=int64#3,3100(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3100(<ap=%rdi)
+mov   %edx,3100(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3116]
+# asm 1: mov   3116(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3116(<ap=%rdi),>temp1=%esi
+mov   3116(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3340]
+# asm 1: mov   3340(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3340(<ap=%rdi),>temp2=%edx
+mov   3340(%rdi),%edx
+
+# qhasm: mem64[ap + 3340] = temp1
+# asm 1: mov   <temp1=int64#2,3340(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3340(<ap=%rdi)
+mov   %esi,3340(%rdi)
+
+# qhasm: mem64[ap + 3116] = temp2
+# asm 1: mov   <temp2=int64#3,3116(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3116(<ap=%rdi)
+mov   %edx,3116(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3132]
+# asm 1: mov   3132(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3132(<ap=%rdi),>temp1=%esi
+mov   3132(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3852]
+# asm 1: mov   3852(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3852(<ap=%rdi),>temp2=%edx
+mov   3852(%rdi),%edx
+
+# qhasm: mem64[ap + 3852] = temp1
+# asm 1: mov   <temp1=int64#2,3852(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3852(<ap=%rdi)
+mov   %esi,3852(%rdi)
+
+# qhasm: mem64[ap + 3132] = temp2
+# asm 1: mov   <temp2=int64#3,3132(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3132(<ap=%rdi)
+mov   %edx,3132(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3148]
+# asm 1: mov   3148(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3148(<ap=%rdi),>temp1=%esi
+mov   3148(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3212]
+# asm 1: mov   3212(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3212(<ap=%rdi),>temp2=%edx
+mov   3212(%rdi),%edx
+
+# qhasm: mem64[ap + 3212] = temp1
+# asm 1: mov   <temp1=int64#2,3212(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3212(<ap=%rdi)
+mov   %esi,3212(%rdi)
+
+# qhasm: mem64[ap + 3148] = temp2
+# asm 1: mov   <temp2=int64#3,3148(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3148(<ap=%rdi)
+mov   %edx,3148(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3164]
+# asm 1: mov   3164(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3164(<ap=%rdi),>temp1=%esi
+mov   3164(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3724]
+# asm 1: mov   3724(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3724(<ap=%rdi),>temp2=%edx
+mov   3724(%rdi),%edx
+
+# qhasm: mem64[ap + 3724] = temp1
+# asm 1: mov   <temp1=int64#2,3724(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3724(<ap=%rdi)
+mov   %esi,3724(%rdi)
+
+# qhasm: mem64[ap + 3164] = temp2
+# asm 1: mov   <temp2=int64#3,3164(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3164(<ap=%rdi)
+mov   %edx,3164(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3180]
+# asm 1: mov   3180(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3180(<ap=%rdi),>temp1=%esi
+mov   3180(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3468]
+# asm 1: mov   3468(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3468(<ap=%rdi),>temp2=%edx
+mov   3468(%rdi),%edx
+
+# qhasm: mem64[ap + 3468] = temp1
+# asm 1: mov   <temp1=int64#2,3468(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3468(<ap=%rdi)
+mov   %esi,3468(%rdi)
+
+# qhasm: mem64[ap + 3180] = temp2
+# asm 1: mov   <temp2=int64#3,3180(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3180(<ap=%rdi)
+mov   %edx,3180(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3196]
+# asm 1: mov   3196(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3196(<ap=%rdi),>temp1=%esi
+mov   3196(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3980]
+# asm 1: mov   3980(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3980(<ap=%rdi),>temp2=%edx
+mov   3980(%rdi),%edx
+
+# qhasm: mem64[ap + 3980] = temp1
+# asm 1: mov   <temp1=int64#2,3980(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3980(<ap=%rdi)
+mov   %esi,3980(%rdi)
+
+# qhasm: mem64[ap + 3196] = temp2
+# asm 1: mov   <temp2=int64#3,3196(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3196(<ap=%rdi)
+mov   %edx,3196(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3228]
+# asm 1: mov   3228(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3228(<ap=%rdi),>temp1=%esi
+mov   3228(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3660]
+# asm 1: mov   3660(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3660(<ap=%rdi),>temp2=%edx
+mov   3660(%rdi),%edx
+
+# qhasm: mem64[ap + 3660] = temp1
+# asm 1: mov   <temp1=int64#2,3660(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3660(<ap=%rdi)
+mov   %esi,3660(%rdi)
+
+# qhasm: mem64[ap + 3228] = temp2
+# asm 1: mov   <temp2=int64#3,3228(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3228(<ap=%rdi)
+mov   %edx,3228(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3244]
+# asm 1: mov   3244(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3244(<ap=%rdi),>temp1=%esi
+mov   3244(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3404]
+# asm 1: mov   3404(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3404(<ap=%rdi),>temp2=%edx
+mov   3404(%rdi),%edx
+
+# qhasm: mem64[ap + 3404] = temp1
+# asm 1: mov   <temp1=int64#2,3404(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3404(<ap=%rdi)
+mov   %esi,3404(%rdi)
+
+# qhasm: mem64[ap + 3244] = temp2
+# asm 1: mov   <temp2=int64#3,3244(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3244(<ap=%rdi)
+mov   %edx,3244(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3260]
+# asm 1: mov   3260(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3260(<ap=%rdi),>temp1=%esi
+mov   3260(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3916]
+# asm 1: mov   3916(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3916(<ap=%rdi),>temp2=%edx
+mov   3916(%rdi),%edx
+
+# qhasm: mem64[ap + 3916] = temp1
+# asm 1: mov   <temp1=int64#2,3916(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3916(<ap=%rdi)
+mov   %esi,3916(%rdi)
+
+# qhasm: mem64[ap + 3260] = temp2
+# asm 1: mov   <temp2=int64#3,3260(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3260(<ap=%rdi)
+mov   %edx,3260(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3292]
+# asm 1: mov   3292(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3292(<ap=%rdi),>temp1=%esi
+mov   3292(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3788]
+# asm 1: mov   3788(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3788(<ap=%rdi),>temp2=%edx
+mov   3788(%rdi),%edx
+
+# qhasm: mem64[ap + 3788] = temp1
+# asm 1: mov   <temp1=int64#2,3788(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3788(<ap=%rdi)
+mov   %esi,3788(%rdi)
+
+# qhasm: mem64[ap + 3292] = temp2
+# asm 1: mov   <temp2=int64#3,3292(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3292(<ap=%rdi)
+mov   %edx,3292(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3308]
+# asm 1: mov   3308(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3308(<ap=%rdi),>temp1=%esi
+mov   3308(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3532]
+# asm 1: mov   3532(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3532(<ap=%rdi),>temp2=%edx
+mov   3532(%rdi),%edx
+
+# qhasm: mem64[ap + 3532] = temp1
+# asm 1: mov   <temp1=int64#2,3532(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3532(<ap=%rdi)
+mov   %esi,3532(%rdi)
+
+# qhasm: mem64[ap + 3308] = temp2
+# asm 1: mov   <temp2=int64#3,3308(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3308(<ap=%rdi)
+mov   %edx,3308(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3324]
+# asm 1: mov   3324(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3324(<ap=%rdi),>temp1=%esi
+mov   3324(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4044]
+# asm 1: mov   4044(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4044(<ap=%rdi),>temp2=%edx
+mov   4044(%rdi),%edx
+
+# qhasm: mem64[ap + 4044] = temp1
+# asm 1: mov   <temp1=int64#2,4044(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4044(<ap=%rdi)
+mov   %esi,4044(%rdi)
+
+# qhasm: mem64[ap + 3324] = temp2
+# asm 1: mov   <temp2=int64#3,3324(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3324(<ap=%rdi)
+mov   %edx,3324(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3356]
+# asm 1: mov   3356(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3356(<ap=%rdi),>temp1=%esi
+mov   3356(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3628]
+# asm 1: mov   3628(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3628(<ap=%rdi),>temp2=%edx
+mov   3628(%rdi),%edx
+
+# qhasm: mem64[ap + 3628] = temp1
+# asm 1: mov   <temp1=int64#2,3628(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3628(<ap=%rdi)
+mov   %esi,3628(%rdi)
+
+# qhasm: mem64[ap + 3356] = temp2
+# asm 1: mov   <temp2=int64#3,3356(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3356(<ap=%rdi)
+mov   %edx,3356(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3388]
+# asm 1: mov   3388(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3388(<ap=%rdi),>temp1=%esi
+mov   3388(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3884]
+# asm 1: mov   3884(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3884(<ap=%rdi),>temp2=%edx
+mov   3884(%rdi),%edx
+
+# qhasm: mem64[ap + 3884] = temp1
+# asm 1: mov   <temp1=int64#2,3884(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3884(<ap=%rdi)
+mov   %esi,3884(%rdi)
+
+# qhasm: mem64[ap + 3388] = temp2
+# asm 1: mov   <temp2=int64#3,3388(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3388(<ap=%rdi)
+mov   %edx,3388(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3420]
+# asm 1: mov   3420(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3420(<ap=%rdi),>temp1=%esi
+mov   3420(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3756]
+# asm 1: mov   3756(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3756(<ap=%rdi),>temp2=%edx
+mov   3756(%rdi),%edx
+
+# qhasm: mem64[ap + 3756] = temp1
+# asm 1: mov   <temp1=int64#2,3756(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3756(<ap=%rdi)
+mov   %esi,3756(%rdi)
+
+# qhasm: mem64[ap + 3420] = temp2
+# asm 1: mov   <temp2=int64#3,3420(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3420(<ap=%rdi)
+mov   %edx,3420(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3436]
+# asm 1: mov   3436(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3436(<ap=%rdi),>temp1=%esi
+mov   3436(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3500]
+# asm 1: mov   3500(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3500(<ap=%rdi),>temp2=%edx
+mov   3500(%rdi),%edx
+
+# qhasm: mem64[ap + 3500] = temp1
+# asm 1: mov   <temp1=int64#2,3500(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3500(<ap=%rdi)
+mov   %esi,3500(%rdi)
+
+# qhasm: mem64[ap + 3436] = temp2
+# asm 1: mov   <temp2=int64#3,3436(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3436(<ap=%rdi)
+mov   %edx,3436(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3452]
+# asm 1: mov   3452(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3452(<ap=%rdi),>temp1=%esi
+mov   3452(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4012]
+# asm 1: mov   4012(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4012(<ap=%rdi),>temp2=%edx
+mov   4012(%rdi),%edx
+
+# qhasm: mem64[ap + 4012] = temp1
+# asm 1: mov   <temp1=int64#2,4012(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4012(<ap=%rdi)
+mov   %esi,4012(%rdi)
+
+# qhasm: mem64[ap + 3452] = temp2
+# asm 1: mov   <temp2=int64#3,3452(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3452(<ap=%rdi)
+mov   %edx,3452(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3484]
+# asm 1: mov   3484(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3484(<ap=%rdi),>temp1=%esi
+mov   3484(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3692]
+# asm 1: mov   3692(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3692(<ap=%rdi),>temp2=%edx
+mov   3692(%rdi),%edx
+
+# qhasm: mem64[ap + 3692] = temp1
+# asm 1: mov   <temp1=int64#2,3692(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3692(<ap=%rdi)
+mov   %esi,3692(%rdi)
+
+# qhasm: mem64[ap + 3484] = temp2
+# asm 1: mov   <temp2=int64#3,3484(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3484(<ap=%rdi)
+mov   %edx,3484(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3516]
+# asm 1: mov   3516(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3516(<ap=%rdi),>temp1=%esi
+mov   3516(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3948]
+# asm 1: mov   3948(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3948(<ap=%rdi),>temp2=%edx
+mov   3948(%rdi),%edx
+
+# qhasm: mem64[ap + 3948] = temp1
+# asm 1: mov   <temp1=int64#2,3948(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3948(<ap=%rdi)
+mov   %esi,3948(%rdi)
+
+# qhasm: mem64[ap + 3516] = temp2
+# asm 1: mov   <temp2=int64#3,3516(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3516(<ap=%rdi)
+mov   %edx,3516(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3548]
+# asm 1: mov   3548(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3548(<ap=%rdi),>temp1=%esi
+mov   3548(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3820]
+# asm 1: mov   3820(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3820(<ap=%rdi),>temp2=%edx
+mov   3820(%rdi),%edx
+
+# qhasm: mem64[ap + 3820] = temp1
+# asm 1: mov   <temp1=int64#2,3820(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3820(<ap=%rdi)
+mov   %esi,3820(%rdi)
+
+# qhasm: mem64[ap + 3548] = temp2
+# asm 1: mov   <temp2=int64#3,3548(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3548(<ap=%rdi)
+mov   %edx,3548(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3580]
+# asm 1: mov   3580(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3580(<ap=%rdi),>temp1=%esi
+mov   3580(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4076]
+# asm 1: mov   4076(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4076(<ap=%rdi),>temp2=%edx
+mov   4076(%rdi),%edx
+
+# qhasm: mem64[ap + 4076] = temp1
+# asm 1: mov   <temp1=int64#2,4076(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4076(<ap=%rdi)
+mov   %esi,4076(%rdi)
+
+# qhasm: mem64[ap + 3580] = temp2
+# asm 1: mov   <temp2=int64#3,3580(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3580(<ap=%rdi)
+mov   %edx,3580(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3644]
+# asm 1: mov   3644(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3644(<ap=%rdi),>temp1=%esi
+mov   3644(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3868]
+# asm 1: mov   3868(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3868(<ap=%rdi),>temp2=%edx
+mov   3868(%rdi),%edx
+
+# qhasm: mem64[ap + 3868] = temp1
+# asm 1: mov   <temp1=int64#2,3868(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3868(<ap=%rdi)
+mov   %esi,3868(%rdi)
+
+# qhasm: mem64[ap + 3644] = temp2
+# asm 1: mov   <temp2=int64#3,3644(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3644(<ap=%rdi)
+mov   %edx,3644(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3676]
+# asm 1: mov   3676(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3676(<ap=%rdi),>temp1=%esi
+mov   3676(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3740]
+# asm 1: mov   3740(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3740(<ap=%rdi),>temp2=%edx
+mov   3740(%rdi),%edx
+
+# qhasm: mem64[ap + 3740] = temp1
+# asm 1: mov   <temp1=int64#2,3740(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3740(<ap=%rdi)
+mov   %esi,3740(%rdi)
+
+# qhasm: mem64[ap + 3676] = temp2
+# asm 1: mov   <temp2=int64#3,3676(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3676(<ap=%rdi)
+mov   %edx,3676(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3708]
+# asm 1: mov   3708(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3708(<ap=%rdi),>temp1=%esi
+mov   3708(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3996]
+# asm 1: mov   3996(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3996(<ap=%rdi),>temp2=%edx
+mov   3996(%rdi),%edx
+
+# qhasm: mem64[ap + 3996] = temp1
+# asm 1: mov   <temp1=int64#2,3996(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3996(<ap=%rdi)
+mov   %esi,3996(%rdi)
+
+# qhasm: mem64[ap + 3708] = temp2
+# asm 1: mov   <temp2=int64#3,3708(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3708(<ap=%rdi)
+mov   %edx,3708(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3772]
+# asm 1: mov   3772(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3772(<ap=%rdi),>temp1=%esi
+mov   3772(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 3932]
+# asm 1: mov   3932(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   3932(<ap=%rdi),>temp2=%edx
+mov   3932(%rdi),%edx
+
+# qhasm: mem64[ap + 3932] = temp1
+# asm 1: mov   <temp1=int64#2,3932(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,3932(<ap=%rdi)
+mov   %esi,3932(%rdi)
+
+# qhasm: mem64[ap + 3772] = temp2
+# asm 1: mov   <temp2=int64#3,3772(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3772(<ap=%rdi)
+mov   %edx,3772(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3836]
+# asm 1: mov   3836(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3836(<ap=%rdi),>temp1=%esi
+mov   3836(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4060]
+# asm 1: mov   4060(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4060(<ap=%rdi),>temp2=%edx
+mov   4060(%rdi),%edx
+
+# qhasm: mem64[ap + 4060] = temp1
+# asm 1: mov   <temp1=int64#2,4060(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4060(<ap=%rdi)
+mov   %esi,4060(%rdi)
+
+# qhasm: mem64[ap + 3836] = temp2
+# asm 1: mov   <temp2=int64#3,3836(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3836(<ap=%rdi)
+mov   %edx,3836(%rdi)
+
+# qhasm: temp1 = mem64[ap + 3964]
+# asm 1: mov   3964(<ap=int64#1),>temp1=int64#2
+# asm 2: mov   3964(<ap=%rdi),>temp1=%esi
+mov   3964(%rdi),%esi
+
+# qhasm: temp2 = mem64[ap + 4028]
+# asm 1: mov   4028(<ap=int64#1),>temp2=int64#3
+# asm 2: mov   4028(<ap=%rdi),>temp2=%edx
+mov   4028(%rdi),%edx
+
+# qhasm: mem64[ap + 4028] = temp1
+# asm 1: mov   <temp1=int64#2,4028(<ap=int64#1)
+# asm 2: mov   <temp1=%esi,4028(<ap=%rdi)
+mov   %esi,4028(%rdi)
+
+# qhasm: mem64[ap + 3964] = temp2
+# asm 1: mov   <temp2=int64#3,3964(<ap=int64#1)
+# asm 2: mov   <temp2=%edx,3964(<ap=%rdi)
+mov   %edx,3964(%rdi)
+
+# qhasm: return
+add %r11,%rsp
+ret
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/cbd.s b/crypt/liboqs/kex_rlwe_newhope/avx2/cbd.s
new file mode 100644
index 0000000000000000000000000000000000000000..7619a31ff038bd76c4b00507ccaa38e99da18385
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/cbd.s
@@ -0,0 +1,275 @@
+
+# qhasm: int64 input_0
+
+# qhasm: int64 input_1
+
+# qhasm: int64 input_2
+
+# qhasm: int64 input_3
+
+# qhasm: int64 input_4
+
+# qhasm: int64 input_5
+
+# qhasm: stack64 input_6
+
+# qhasm: stack64 input_7
+
+# qhasm: int64 caller_r11
+
+# qhasm: int64 caller_r12
+
+# qhasm: int64 caller_r13
+
+# qhasm: int64 caller_r14
+
+# qhasm: int64 caller_r15
+
+# qhasm: int64 caller_rbx
+
+# qhasm: int64 caller_rbp
+
+# qhasm: reg256 r
+
+# qhasm: reg256 r2
+
+# qhasm: reg256 a0
+
+# qhasm: reg256 b0
+
+# qhasm: reg256 a1
+
+# qhasm: reg256 b1
+
+# qhasm: reg256 t
+
+# qhasm: reg256 l
+
+# qhasm: reg256 h
+
+# qhasm: reg256 _mask1
+
+# qhasm: reg256 _maskffff
+
+# qhasm: reg256 _maskff
+
+# qhasm: reg256 _q8x
+
+# qhasm: int64 ctr
+
+# qhasm: enter cbd
+.p2align 5
+.global _cbd
+.global cbd
+_cbd:
+cbd:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: _mask1 = mem256[mask1]
+# asm 1: vmovdqu mask1,>_mask1=reg256#1
+# asm 2: vmovdqu mask1,>_mask1=%ymm0
+vmovdqu mask1,%ymm0
+
+# qhasm: _maskffff = mem256[maskffff]
+# asm 1: vmovdqu maskffff,>_maskffff=reg256#2
+# asm 2: vmovdqu maskffff,>_maskffff=%ymm1
+vmovdqu maskffff,%ymm1
+
+# qhasm: _maskff = mem256[maskff]
+# asm 1: vmovdqu maskff,>_maskff=reg256#3
+# asm 2: vmovdqu maskff,>_maskff=%ymm2
+vmovdqu maskff,%ymm2
+
+# qhasm: _q8x  = mem256[q8x]
+# asm 1: vmovdqu q8x,>_q8x=reg256#4
+# asm 2: vmovdqu q8x,>_q8x=%ymm3
+vmovdqu q8x,%ymm3
+
+# qhasm: ctr = 128
+# asm 1: mov  $128,>ctr=int64#3
+# asm 2: mov  $128,>ctr=%rdx
+mov  $128,%rdx
+
+# qhasm: looptop:
+._looptop:
+
+# qhasm:   r  = mem256[input_1 + 0]
+# asm 1: vmovupd   0(<input_1=int64#2),>r=reg256#5
+# asm 2: vmovupd   0(<input_1=%rsi),>r=%ymm4
+vmovupd   0(%rsi),%ymm4
+
+# qhasm:   a0 = r & _mask1
+# asm 1: vpand <r=reg256#5,<_mask1=reg256#1,>a0=reg256#6
+# asm 2: vpand <r=%ymm4,<_mask1=%ymm0,>a0=%ymm5
+vpand %ymm4,%ymm0,%ymm5
+
+# qhasm:   16x r unsigned>>= 1
+# asm 1: vpsrlw $1,<r=reg256#5,>r=reg256#5
+# asm 2: vpsrlw $1,<r=%ymm4,>r=%ymm4
+vpsrlw $1,%ymm4,%ymm4
+
+# qhasm:   t = r & _mask1
+# asm 1: vpand <r=reg256#5,<_mask1=reg256#1,>t=reg256#7
+# asm 2: vpand <r=%ymm4,<_mask1=%ymm0,>t=%ymm6
+vpand %ymm4,%ymm0,%ymm6
+
+# qhasm:   16x a0 += t
+# asm 1: vpaddw <t=reg256#7,<a0=reg256#6,>a0=reg256#6
+# asm 2: vpaddw <t=%ymm6,<a0=%ymm5,>a0=%ymm5
+vpaddw %ymm6,%ymm5,%ymm5
+
+# qhasm:   16x r unsigned>>= 1
+# asm 1: vpsrlw $1,<r=reg256#5,>r=reg256#5
+# asm 2: vpsrlw $1,<r=%ymm4,>r=%ymm4
+vpsrlw $1,%ymm4,%ymm4
+
+# qhasm:   t = r & _mask1
+# asm 1: vpand <r=reg256#5,<_mask1=reg256#1,>t=reg256#7
+# asm 2: vpand <r=%ymm4,<_mask1=%ymm0,>t=%ymm6
+vpand %ymm4,%ymm0,%ymm6
+
+# qhasm:   16x a0 += t
+# asm 1: vpaddw <t=reg256#7,<a0=reg256#6,>a0=reg256#6
+# asm 2: vpaddw <t=%ymm6,<a0=%ymm5,>a0=%ymm5
+vpaddw %ymm6,%ymm5,%ymm5
+
+# qhasm:   16x r unsigned>>= 1
+# asm 1: vpsrlw $1,<r=reg256#5,>r=reg256#5
+# asm 2: vpsrlw $1,<r=%ymm4,>r=%ymm4
+vpsrlw $1,%ymm4,%ymm4
+
+# qhasm:   t = r & _mask1
+# asm 1: vpand <r=reg256#5,<_mask1=reg256#1,>t=reg256#7
+# asm 2: vpand <r=%ymm4,<_mask1=%ymm0,>t=%ymm6
+vpand %ymm4,%ymm0,%ymm6
+
+# qhasm:   16x a0 += t
+# asm 1: vpaddw <t=reg256#7,<a0=reg256#6,>a0=reg256#6
+# asm 2: vpaddw <t=%ymm6,<a0=%ymm5,>a0=%ymm5
+vpaddw %ymm6,%ymm5,%ymm5
+
+# qhasm:   16x r unsigned>>= 1
+# asm 1: vpsrlw $1,<r=reg256#5,>r=reg256#5
+# asm 2: vpsrlw $1,<r=%ymm4,>r=%ymm4
+vpsrlw $1,%ymm4,%ymm4
+
+# qhasm:   t = r & _mask1
+# asm 1: vpand <r=reg256#5,<_mask1=reg256#1,>t=reg256#7
+# asm 2: vpand <r=%ymm4,<_mask1=%ymm0,>t=%ymm6
+vpand %ymm4,%ymm0,%ymm6
+
+# qhasm:   16x a0 += t
+# asm 1: vpaddw <t=reg256#7,<a0=reg256#6,>a0=reg256#6
+# asm 2: vpaddw <t=%ymm6,<a0=%ymm5,>a0=%ymm5
+vpaddw %ymm6,%ymm5,%ymm5
+
+# qhasm:   16x r unsigned>>= 1
+# asm 1: vpsrlw $1,<r=reg256#5,>r=reg256#5
+# asm 2: vpsrlw $1,<r=%ymm4,>r=%ymm4
+vpsrlw $1,%ymm4,%ymm4
+
+# qhasm:   t = r & _mask1
+# asm 1: vpand <r=reg256#5,<_mask1=reg256#1,>t=reg256#7
+# asm 2: vpand <r=%ymm4,<_mask1=%ymm0,>t=%ymm6
+vpand %ymm4,%ymm0,%ymm6
+
+# qhasm:   16x a0 += t
+# asm 1: vpaddw <t=reg256#7,<a0=reg256#6,>a0=reg256#6
+# asm 2: vpaddw <t=%ymm6,<a0=%ymm5,>a0=%ymm5
+vpaddw %ymm6,%ymm5,%ymm5
+
+# qhasm:   16x r unsigned>>= 1
+# asm 1: vpsrlw $1,<r=reg256#5,>r=reg256#5
+# asm 2: vpsrlw $1,<r=%ymm4,>r=%ymm4
+vpsrlw $1,%ymm4,%ymm4
+
+# qhasm:   t = r & _mask1
+# asm 1: vpand <r=reg256#5,<_mask1=reg256#1,>t=reg256#7
+# asm 2: vpand <r=%ymm4,<_mask1=%ymm0,>t=%ymm6
+vpand %ymm4,%ymm0,%ymm6
+
+# qhasm:   16x a0 += t
+# asm 1: vpaddw <t=reg256#7,<a0=reg256#6,>a0=reg256#6
+# asm 2: vpaddw <t=%ymm6,<a0=%ymm5,>a0=%ymm5
+vpaddw %ymm6,%ymm5,%ymm5
+
+# qhasm:   16x r unsigned>>= 1
+# asm 1: vpsrlw $1,<r=reg256#5,>r=reg256#5
+# asm 2: vpsrlw $1,<r=%ymm4,>r=%ymm4
+vpsrlw $1,%ymm4,%ymm4
+
+# qhasm:   t = r & _mask1
+# asm 1: vpand <r=reg256#5,<_mask1=reg256#1,>t=reg256#5
+# asm 2: vpand <r=%ymm4,<_mask1=%ymm0,>t=%ymm4
+vpand %ymm4,%ymm0,%ymm4
+
+# qhasm:   16x a0 += t
+# asm 1: vpaddw <t=reg256#5,<a0=reg256#6,>a0=reg256#5
+# asm 2: vpaddw <t=%ymm4,<a0=%ymm5,>a0=%ymm4
+vpaddw %ymm4,%ymm5,%ymm4
+
+# qhasm:   16x t = a0 unsigned>> 8
+# asm 1: vpsrlw $8,<a0=reg256#5,>t=reg256#6
+# asm 2: vpsrlw $8,<a0=%ymm4,>t=%ymm5
+vpsrlw $8,%ymm4,%ymm5
+
+# qhasm:   a0 &= _maskff
+# asm 1: vpand <_maskff=reg256#3,<a0=reg256#5,<a0=reg256#5
+# asm 2: vpand <_maskff=%ymm2,<a0=%ymm4,<a0=%ymm4
+vpand %ymm2,%ymm4,%ymm4
+
+# qhasm:   16x a0 += t
+# asm 1: vpaddw <t=reg256#6,<a0=reg256#5,>a0=reg256#5
+# asm 2: vpaddw <t=%ymm5,<a0=%ymm4,>a0=%ymm4
+vpaddw %ymm5,%ymm4,%ymm4
+
+# qhasm:   8x b0 = a0 unsigned>> 16
+# asm 1: vpsrld $16,<a0=reg256#5,>b0=reg256#6
+# asm 2: vpsrld $16,<a0=%ymm4,>b0=%ymm5
+vpsrld $16,%ymm4,%ymm5
+
+# qhasm:   a0 &= _maskffff
+# asm 1: vpand <_maskffff=reg256#2,<a0=reg256#5,<a0=reg256#5
+# asm 2: vpand <_maskffff=%ymm1,<a0=%ymm4,<a0=%ymm4
+vpand %ymm1,%ymm4,%ymm4
+
+# qhasm:   16x a0 += _q8x
+# asm 1: vpaddw <_q8x=reg256#4,<a0=reg256#5,>a0=reg256#5
+# asm 2: vpaddw <_q8x=%ymm3,<a0=%ymm4,>a0=%ymm4
+vpaddw %ymm3,%ymm4,%ymm4
+
+# qhasm:   16x a0 -= b0
+# asm 1: vpsubw <b0=reg256#6,<a0=reg256#5,>a0=reg256#5
+# asm 2: vpsubw <b0=%ymm5,<a0=%ymm4,>a0=%ymm4
+vpsubw %ymm5,%ymm4,%ymm4
+
+# qhasm:   mem256[input_0 + 0] = a0
+# asm 1: vmovupd   <a0=reg256#5,0(<input_0=int64#1)
+# asm 2: vmovupd   <a0=%ymm4,0(<input_0=%rdi)
+vmovupd   %ymm4,0(%rdi)
+
+# qhasm:   input_0 += 32
+# asm 1: add  $32,<input_0=int64#1
+# asm 2: add  $32,<input_0=%rdi
+add  $32,%rdi
+
+# qhasm:   input_1 += 32
+# asm 1: add  $32,<input_1=int64#2
+# asm 2: add  $32,<input_1=%rsi
+add  $32,%rsi
+
+# qhasm:   unsigned>? ctr -= 1
+# asm 1: sub  $1,<ctr=int64#3
+# asm 2: sub  $1,<ctr=%rdx
+sub  $1,%rdx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto looptop if unsigned>
+ja ._looptop
+
+# qhasm: return
+add %r11,%rsp
+ret
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/chacha.S b/crypt/liboqs/kex_rlwe_newhope/avx2/chacha.S
new file mode 100644
index 0000000000000000000000000000000000000000..4597b5a02f8f39c9aab851f118f4fc9d521bf2ae
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/chacha.S
@@ -0,0 +1,1184 @@
+/* From crypto_stream/chacha20/moon/avx2/64/ 
+ * from http://bench.cr.yp.to/supercop.html
+ * by Andrew Moon */
+
+#define GLOBAL2(n) .globl n##_avx2; .globl _##n##_avx2
+#define GLOBAL(n) GLOBAL2(n)
+#define FN2(n) .p2align 4,,15; n##_avx2:; _##n##_avx2:
+#define FN(n) FN2(n)
+
+/* linux/elf annotations and NX indicator */
+#if defined(__linux__) && defined(__ELF__)
+#define ENDFN(n) .size n##_avx2, .-n##_avx2; .type n##_avx2, @function;
+#define ENDFILE() .section .note.GNU-stack,"",%progbits
+#else
+#define ENDFN(n)
+#define ENDFILE()
+#endif
+
+.text
+
+GLOBAL(chacha)
+GLOBAL(xchacha)
+GLOBAL(hchacha)
+GLOBAL(chacha_blocks)
+
+/* Windows 64 calling convention fixups */
+#if defined(_WIN64) || defined(__CYGWIN64__)
+FN(chacha)
+subq $184, %rsp
+vmovdqa %xmm6, 0(%rsp)
+vmovdqa %xmm7, 16(%rsp)
+vmovdqa %xmm8, 32(%rsp)
+vmovdqa %xmm9, 48(%rsp)
+vmovdqa %xmm10, 64(%rsp)
+vmovdqa %xmm11, 80(%rsp)
+vmovdqa %xmm12, 96(%rsp)
+vmovdqa %xmm13, 112(%rsp)
+vmovdqa %xmm14, 128(%rsp)
+vmovdqa %xmm15, 144(%rsp)
+movq %rdi, 160(%rsp)
+movq %rsi, 168(%rsp)
+movq %rcx, %rdi
+movq %rdx, %rsi
+movq %r8, %rdx
+movq %r9, %rcx
+movq 224(%rsp), %r8
+movq 232(%rsp), %r9
+call chacha_thunk_avx2
+vmovdqa 0(%rsp), %xmm6
+vmovdqa 16(%rsp), %xmm7
+vmovdqa 32(%rsp), %xmm8
+vmovdqa 48(%rsp), %xmm9
+vmovdqa 64(%rsp), %xmm10
+vmovdqa 80(%rsp), %xmm11
+vmovdqa 96(%rsp), %xmm12
+vmovdqa 112(%rsp), %xmm13
+vmovdqa 128(%rsp), %xmm14
+vmovdqa 144(%rsp), %xmm15
+movq 160(%rsp), %rdi
+movq 168(%rsp), %rsi
+addq $184, %rsp
+ret
+ENDFN(chacha)
+
+FN(xchacha)
+subq $184, %rsp
+vmovdqa %xmm6, 0(%rsp)
+vmovdqa %xmm7, 16(%rsp)
+vmovdqa %xmm8, 32(%rsp)
+vmovdqa %xmm9, 48(%rsp)
+vmovdqa %xmm10, 64(%rsp)
+vmovdqa %xmm11, 80(%rsp)
+vmovdqa %xmm12, 96(%rsp)
+vmovdqa %xmm13, 112(%rsp)
+vmovdqa %xmm14, 128(%rsp)
+vmovdqa %xmm15, 144(%rsp)
+movq %rdi, 160(%rsp)
+movq %rsi, 168(%rsp)
+movq %rcx, %rdi
+movq %rdx, %rsi
+movq %r8, %rdx
+movq %r9, %rcx
+movq 224(%rsp), %r8
+movq 232(%rsp), %r9
+call xchacha_thunk_avx2
+vmovdqa 0(%rsp), %xmm6
+vmovdqa 16(%rsp), %xmm7
+vmovdqa 32(%rsp), %xmm8
+vmovdqa 48(%rsp), %xmm9
+vmovdqa 64(%rsp), %xmm10
+vmovdqa 80(%rsp), %xmm11
+vmovdqa 96(%rsp), %xmm12
+vmovdqa 112(%rsp), %xmm13
+vmovdqa 128(%rsp), %xmm14
+vmovdqa 144(%rsp), %xmm15
+movq 160(%rsp), %rdi
+movq 168(%rsp), %rsi
+addq $184, %rsp
+ret
+ENDFN(xchacha)
+
+FN(chacha_blocks)
+subq $184, %rsp
+movdqa %xmm6, 0(%rsp)
+movdqa %xmm7, 16(%rsp)
+movdqa %xmm8, 32(%rsp)
+movdqa %xmm9, 48(%rsp)
+movdqa %xmm10, 64(%rsp)
+movdqa %xmm11, 80(%rsp)
+movdqa %xmm12, 96(%rsp)
+movdqa %xmm13, 112(%rsp)
+movdqa %xmm14, 128(%rsp)
+movdqa %xmm15, 144(%rsp)
+movq %rdi, 160(%rsp)
+movq %rsi, 168(%rsp)
+movq %rcx, %rdi
+movq %rdx, %rsi
+movq %r8, %rdx
+movq %r9, %rcx
+call chacha_blocks_thunk_avx2
+movdqa 0(%rsp), %xmm6
+movdqa 16(%rsp), %xmm7
+movdqa 32(%rsp), %xmm8
+movdqa 48(%rsp), %xmm9
+movdqa 64(%rsp), %xmm10
+movdqa 80(%rsp), %xmm11
+movdqa 96(%rsp), %xmm12
+movdqa 112(%rsp), %xmm13
+movdqa 128(%rsp), %xmm14
+movdqa 144(%rsp), %xmm15
+movq 160(%rsp), %rdi
+movq 168(%rsp), %rsi
+addq $184, %rsp
+ret
+ENDFN(chacha_blocks)
+
+FN(hchacha)
+subq $40, %rsp
+movdqa %xmm6, 0(%rsp)
+movq %rdi, 16(%rsp)
+movq %rsi, 24(%rsp)
+movq %rcx, %rdi
+movq %rdx, %rsi
+movq %r8, %rdx
+movq %r9, %rcx
+call hchacha_thunk_avx2
+movdqa 0(%rsp), %xmm6
+movq 16(%rsp), %rdi
+movq 24(%rsp), %rsi
+addq $40, %rsp
+ret
+ENDFN(hchacha)
+
+#define chacha chacha_thunk
+#define xchacha xchacha_thunk
+#define hchacha hchacha_thunk
+#define chacha_blocks chacha_blocks_thunk
+#endif
+
+
+FN(chacha_blocks)
+chacha_blocks_avx2_local:
+pushq %rbx
+pushq %rbp
+pushq %r12
+pushq %r13
+pushq %r14
+movq %rsp, %rbp
+andq $~63, %rsp
+subq $512, %rsp
+leaq C(%rip), %rax
+vmovdqa 0(%rax), %xmm8
+vmovdqa 16(%rax), %xmm6
+vmovdqa 32(%rax), %xmm7
+vmovdqa 0(%rdi), %xmm9
+vmovdqa 16(%rdi), %xmm10
+vmovdqa 32(%rdi), %xmm11
+movq 48(%rdi), %rax
+movq $1, %r9
+vmovdqa %xmm8, 0(%rsp)
+vmovdqa %xmm9, 16(%rsp)
+vmovdqa %xmm10, 32(%rsp)
+vmovdqa %xmm11, 48(%rsp)
+movq %rax, 64(%rsp)
+vmovdqa %xmm6, 448(%rsp)
+vmovdqa %xmm6, 464(%rsp)
+vmovdqa %xmm7, 480(%rsp)
+vmovdqa %xmm7, 496(%rsp)
+cmpq $512, %rcx
+jae chacha_blocks_avx2_atleast512
+cmp $256, %rcx
+jae chacha_blocks_avx2_atleast256
+jmp chacha_blocks_avx2_below256
+.p2align 6,,63
+chacha_blocks_avx2_atleast512:
+movq 48(%rsp), %rax
+leaq 1(%rax), %r8
+leaq 2(%rax), %r9
+leaq 3(%rax), %r10
+leaq 4(%rax), %rbx
+leaq 5(%rax), %r11
+leaq 6(%rax), %r12
+leaq 7(%rax), %r13
+leaq 8(%rax), %r14
+movl %eax, 128(%rsp)
+movl %r8d, 4+128(%rsp)
+movl %r9d, 8+128(%rsp)
+movl %r10d, 12+128(%rsp)
+movl %ebx, 16+128(%rsp)
+movl %r11d, 20+128(%rsp)
+movl %r12d, 24+128(%rsp)
+movl %r13d, 28+128(%rsp)
+shrq $32, %rax
+shrq $32, %r8
+shrq $32, %r9
+shrq $32, %r10
+shrq $32, %rbx
+shrq $32, %r11
+shrq $32, %r12
+shrq $32, %r13
+movl %eax, 160(%rsp)
+movl %r8d, 4+160(%rsp)
+movl %r9d, 8+160(%rsp)
+movl %r10d, 12+160(%rsp)
+movl %ebx, 16+160(%rsp)
+movl %r11d, 20+160(%rsp)
+movl %r12d, 24+160(%rsp)
+movl %r13d, 28+160(%rsp)
+movq %r14, 48(%rsp)
+movq 64(%rsp), %rax
+vpbroadcastd 0(%rsp), %ymm0
+vpbroadcastd 4+0(%rsp), %ymm1
+vpbroadcastd 8+0(%rsp), %ymm2
+vpbroadcastd 12+0(%rsp), %ymm3
+vpbroadcastd 16(%rsp), %ymm4
+vpbroadcastd 4+16(%rsp), %ymm5
+vpbroadcastd 8+16(%rsp), %ymm6
+vpbroadcastd 12+16(%rsp), %ymm7
+vpbroadcastd 32(%rsp), %ymm8
+vpbroadcastd 4+32(%rsp), %ymm9
+vpbroadcastd 8+32(%rsp), %ymm10
+vpbroadcastd 12+32(%rsp), %ymm11
+vpbroadcastd 8+48(%rsp), %ymm14
+vpbroadcastd 12+48(%rsp), %ymm15
+vmovdqa 128(%rsp), %ymm12
+vmovdqa 160(%rsp), %ymm13
+chacha_blocks_avx2_mainloop1:
+vpaddd %ymm0, %ymm4, %ymm0
+vpaddd %ymm1, %ymm5, %ymm1
+vpxor %ymm12, %ymm0, %ymm12
+vpxor %ymm13, %ymm1, %ymm13
+vpaddd %ymm2, %ymm6, %ymm2
+vpaddd %ymm3, %ymm7, %ymm3
+vpxor %ymm14, %ymm2, %ymm14
+vpxor %ymm15, %ymm3, %ymm15
+vpshufb 448(%rsp), %ymm12, %ymm12
+vpshufb 448(%rsp), %ymm13, %ymm13
+vpaddd %ymm8, %ymm12, %ymm8
+vpaddd %ymm9, %ymm13, %ymm9
+vpshufb 448(%rsp), %ymm14, %ymm14
+vpshufb 448(%rsp), %ymm15, %ymm15
+vpaddd %ymm10, %ymm14, %ymm10
+vpaddd %ymm11, %ymm15, %ymm11
+vmovdqa %ymm12, 96(%rsp)
+vpxor %ymm4, %ymm8, %ymm4
+vpxor %ymm5, %ymm9, %ymm5
+vpslld $ 12, %ymm4, %ymm12
+vpsrld $20, %ymm4, %ymm4
+vpxor %ymm4, %ymm12, %ymm4
+vpslld $ 12, %ymm5, %ymm12
+vpsrld $20, %ymm5, %ymm5
+vpxor %ymm5, %ymm12, %ymm5
+vpxor %ymm6, %ymm10, %ymm6
+vpxor %ymm7, %ymm11, %ymm7
+vpslld $ 12, %ymm6, %ymm12
+vpsrld $20, %ymm6, %ymm6
+vpxor %ymm6, %ymm12, %ymm6
+vpslld $ 12, %ymm7, %ymm12
+vpsrld $20, %ymm7, %ymm7
+vpxor %ymm7, %ymm12, %ymm7
+vpaddd %ymm0, %ymm4, %ymm0
+vpaddd %ymm1, %ymm5, %ymm1
+vpxor 96(%rsp), %ymm0, %ymm12
+vpxor %ymm13, %ymm1, %ymm13
+vpaddd %ymm2, %ymm6, %ymm2
+vpaddd %ymm3, %ymm7, %ymm3
+vpxor %ymm14, %ymm2, %ymm14
+vpxor %ymm15, %ymm3, %ymm15
+vpshufb 480(%rsp), %ymm12, %ymm12
+vpshufb 480(%rsp), %ymm13, %ymm13
+vpaddd %ymm8, %ymm12, %ymm8
+vpaddd %ymm9, %ymm13, %ymm9
+vpshufb 480(%rsp), %ymm14, %ymm14
+vpshufb 480(%rsp), %ymm15, %ymm15
+vpaddd %ymm10, %ymm14, %ymm10
+vpaddd %ymm11, %ymm15, %ymm11
+vmovdqa %ymm12, 96(%rsp)
+vpxor %ymm4, %ymm8, %ymm4
+vpxor %ymm5, %ymm9, %ymm5
+vpslld $ 7, %ymm4, %ymm12
+vpsrld $25, %ymm4, %ymm4
+vpxor %ymm4, %ymm12, %ymm4
+vpslld $ 7, %ymm5, %ymm12
+vpsrld $25, %ymm5, %ymm5
+vpxor %ymm5, %ymm12, %ymm5
+vpxor %ymm6, %ymm10, %ymm6
+vpxor %ymm7, %ymm11, %ymm7
+vpslld $ 7, %ymm6, %ymm12
+vpsrld $25, %ymm6, %ymm6
+vpxor %ymm6, %ymm12, %ymm6
+vpslld $ 7, %ymm7, %ymm12
+vpsrld $25, %ymm7, %ymm7
+vpxor %ymm7, %ymm12, %ymm7
+vpaddd %ymm0, %ymm5, %ymm0
+vpaddd %ymm1, %ymm6, %ymm1
+vpxor %ymm15, %ymm0, %ymm15
+vpxor 96(%rsp), %ymm1, %ymm12
+vpaddd %ymm2, %ymm7, %ymm2
+vpaddd %ymm3, %ymm4, %ymm3
+vpxor %ymm13, %ymm2, %ymm13
+vpxor %ymm14, %ymm3, %ymm14
+vpshufb 448(%rsp), %ymm15, %ymm15
+vpshufb 448(%rsp), %ymm12, %ymm12
+vpaddd %ymm10, %ymm15, %ymm10
+vpaddd %ymm11, %ymm12, %ymm11
+vpshufb 448(%rsp), %ymm13, %ymm13
+vpshufb 448(%rsp), %ymm14, %ymm14
+vpaddd %ymm8, %ymm13, %ymm8
+vpaddd %ymm9, %ymm14, %ymm9
+vmovdqa %ymm15, 96(%rsp)
+vpxor %ymm5, %ymm10, %ymm5
+vpxor %ymm6, %ymm11, %ymm6
+vpslld $ 12, %ymm5, %ymm15
+vpsrld $20, %ymm5, %ymm5
+vpxor %ymm5, %ymm15, %ymm5
+vpslld $ 12, %ymm6, %ymm15
+vpsrld $20, %ymm6, %ymm6
+vpxor %ymm6, %ymm15, %ymm6
+vpxor %ymm7, %ymm8, %ymm7
+vpxor %ymm4, %ymm9, %ymm4
+vpslld $ 12, %ymm7, %ymm15
+vpsrld $20, %ymm7, %ymm7
+vpxor %ymm7, %ymm15, %ymm7
+vpslld $ 12, %ymm4, %ymm15
+vpsrld $20, %ymm4, %ymm4
+vpxor %ymm4, %ymm15, %ymm4
+vpaddd %ymm0, %ymm5, %ymm0
+vpaddd %ymm1, %ymm6, %ymm1
+vpxor 96(%rsp), %ymm0, %ymm15
+vpxor %ymm12, %ymm1, %ymm12
+vpaddd %ymm2, %ymm7, %ymm2
+vpaddd %ymm3, %ymm4, %ymm3
+vpxor %ymm13, %ymm2, %ymm13
+vpxor %ymm14, %ymm3, %ymm14
+vpshufb 480(%rsp), %ymm15, %ymm15
+vpshufb 480(%rsp), %ymm12, %ymm12
+vpaddd %ymm10, %ymm15, %ymm10
+vpaddd %ymm11, %ymm12, %ymm11
+vpshufb 480(%rsp), %ymm13, %ymm13
+vpshufb 480(%rsp), %ymm14, %ymm14
+vpaddd %ymm8, %ymm13, %ymm8
+vpaddd %ymm9, %ymm14, %ymm9
+vmovdqa %ymm15, 96(%rsp)
+vpxor %ymm5, %ymm10, %ymm5
+vpxor %ymm6, %ymm11, %ymm6
+vpslld $ 7, %ymm5, %ymm15
+vpsrld $25, %ymm5, %ymm5
+vpxor %ymm5, %ymm15, %ymm5
+vpslld $ 7, %ymm6, %ymm15
+vpsrld $25, %ymm6, %ymm6
+vpxor %ymm6, %ymm15, %ymm6
+vpxor %ymm7, %ymm8, %ymm7
+vpxor %ymm4, %ymm9, %ymm4
+vpslld $ 7, %ymm7, %ymm15
+vpsrld $25, %ymm7, %ymm7
+vpxor %ymm7, %ymm15, %ymm7
+vpslld $ 7, %ymm4, %ymm15
+vpsrld $25, %ymm4, %ymm4
+vpxor %ymm4, %ymm15, %ymm4
+vmovdqa 96(%rsp), %ymm15
+subq $2, %rax
+jnz chacha_blocks_avx2_mainloop1
+vmovdqa %ymm8, 192(%rsp)
+vmovdqa %ymm9, 224(%rsp)
+vmovdqa %ymm10, 256(%rsp)
+vmovdqa %ymm11, 288(%rsp)
+vmovdqa %ymm12, 320(%rsp)
+vmovdqa %ymm13, 352(%rsp)
+vmovdqa %ymm14, 384(%rsp)
+vmovdqa %ymm15, 416(%rsp)
+vpbroadcastd 0(%rsp), %ymm8
+vpbroadcastd 4+0(%rsp), %ymm9
+vpbroadcastd 8+0(%rsp), %ymm10
+vpbroadcastd 12+0(%rsp), %ymm11
+vpbroadcastd 16(%rsp), %ymm12
+vpbroadcastd 4+16(%rsp), %ymm13
+vpbroadcastd 8+16(%rsp), %ymm14
+vpbroadcastd 12+16(%rsp), %ymm15
+vpaddd %ymm8, %ymm0, %ymm0
+vpaddd %ymm9, %ymm1, %ymm1
+vpaddd %ymm10, %ymm2, %ymm2
+vpaddd %ymm11, %ymm3, %ymm3
+vpaddd %ymm12, %ymm4, %ymm4
+vpaddd %ymm13, %ymm5, %ymm5
+vpaddd %ymm14, %ymm6, %ymm6
+vpaddd %ymm15, %ymm7, %ymm7
+vpunpckldq %ymm1, %ymm0, %ymm8
+vpunpckldq %ymm3, %ymm2, %ymm9
+vpunpckhdq %ymm1, %ymm0, %ymm12
+vpunpckhdq %ymm3, %ymm2, %ymm13
+vpunpckldq %ymm5, %ymm4, %ymm10
+vpunpckldq %ymm7, %ymm6, %ymm11
+vpunpckhdq %ymm5, %ymm4, %ymm14
+vpunpckhdq %ymm7, %ymm6, %ymm15
+vpunpcklqdq %ymm9, %ymm8, %ymm0
+vpunpcklqdq %ymm11, %ymm10, %ymm1
+vpunpckhqdq %ymm9, %ymm8, %ymm2
+vpunpckhqdq %ymm11, %ymm10, %ymm3
+vpunpcklqdq %ymm13, %ymm12, %ymm4
+vpunpcklqdq %ymm15, %ymm14, %ymm5
+vpunpckhqdq %ymm13, %ymm12, %ymm6
+vpunpckhqdq %ymm15, %ymm14, %ymm7
+vperm2i128 $0x20, %ymm1, %ymm0, %ymm8
+vperm2i128 $0x20, %ymm3, %ymm2, %ymm9
+vperm2i128 $0x31, %ymm1, %ymm0, %ymm12
+vperm2i128 $0x31, %ymm3, %ymm2, %ymm13
+vperm2i128 $0x20, %ymm5, %ymm4, %ymm10
+vperm2i128 $0x20, %ymm7, %ymm6, %ymm11
+vperm2i128 $0x31, %ymm5, %ymm4, %ymm14
+vperm2i128 $0x31, %ymm7, %ymm6, %ymm15
+andq %rsi, %rsi
+jz chacha_blocks_avx2_noinput1
+vpxor 0(%rsi), %ymm8, %ymm8
+vpxor 64(%rsi), %ymm9, %ymm9
+vpxor 128(%rsi), %ymm10, %ymm10
+vpxor 192(%rsi), %ymm11, %ymm11
+vpxor 256(%rsi), %ymm12, %ymm12
+vpxor 320(%rsi), %ymm13, %ymm13
+vpxor 384(%rsi), %ymm14, %ymm14
+vpxor 448(%rsi), %ymm15, %ymm15
+vmovdqu %ymm8, 0(%rdx)
+vmovdqu %ymm9, 64(%rdx)
+vmovdqu %ymm10, 128(%rdx)
+vmovdqu %ymm11, 192(%rdx)
+vmovdqu %ymm12, 256(%rdx)
+vmovdqu %ymm13, 320(%rdx)
+vmovdqu %ymm14, 384(%rdx)
+vmovdqu %ymm15, 448(%rdx)
+vmovdqa 192(%rsp), %ymm0
+vmovdqa 224(%rsp), %ymm1
+vmovdqa 256(%rsp), %ymm2
+vmovdqa 288(%rsp), %ymm3
+vmovdqa 320(%rsp), %ymm4
+vmovdqa 352(%rsp), %ymm5
+vmovdqa 384(%rsp), %ymm6
+vmovdqa 416(%rsp), %ymm7
+vpbroadcastd 32(%rsp), %ymm8
+vpbroadcastd 4+32(%rsp), %ymm9
+vpbroadcastd 8+32(%rsp), %ymm10
+vpbroadcastd 12+32(%rsp), %ymm11
+vmovdqa 128(%rsp), %ymm12
+vmovdqa 160(%rsp), %ymm13
+vpbroadcastd 8+48(%rsp), %ymm14
+vpbroadcastd 12+48(%rsp), %ymm15
+vpaddd %ymm8, %ymm0, %ymm0
+vpaddd %ymm9, %ymm1, %ymm1
+vpaddd %ymm10, %ymm2, %ymm2
+vpaddd %ymm11, %ymm3, %ymm3
+vpaddd %ymm12, %ymm4, %ymm4
+vpaddd %ymm13, %ymm5, %ymm5
+vpaddd %ymm14, %ymm6, %ymm6
+vpaddd %ymm15, %ymm7, %ymm7
+vpunpckldq %ymm1, %ymm0, %ymm8
+vpunpckldq %ymm3, %ymm2, %ymm9
+vpunpckhdq %ymm1, %ymm0, %ymm12
+vpunpckhdq %ymm3, %ymm2, %ymm13
+vpunpckldq %ymm5, %ymm4, %ymm10
+vpunpckldq %ymm7, %ymm6, %ymm11
+vpunpckhdq %ymm5, %ymm4, %ymm14
+vpunpckhdq %ymm7, %ymm6, %ymm15
+vpunpcklqdq %ymm9, %ymm8, %ymm0
+vpunpcklqdq %ymm11, %ymm10, %ymm1
+vpunpckhqdq %ymm9, %ymm8, %ymm2
+vpunpckhqdq %ymm11, %ymm10, %ymm3
+vpunpcklqdq %ymm13, %ymm12, %ymm4
+vpunpcklqdq %ymm15, %ymm14, %ymm5
+vpunpckhqdq %ymm13, %ymm12, %ymm6
+vpunpckhqdq %ymm15, %ymm14, %ymm7
+vperm2i128 $0x20, %ymm1, %ymm0, %ymm8
+vperm2i128 $0x20, %ymm3, %ymm2, %ymm9
+vperm2i128 $0x31, %ymm1, %ymm0, %ymm12
+vperm2i128 $0x31, %ymm3, %ymm2, %ymm13
+vperm2i128 $0x20, %ymm5, %ymm4, %ymm10
+vperm2i128 $0x20, %ymm7, %ymm6, %ymm11
+vperm2i128 $0x31, %ymm5, %ymm4, %ymm14
+vperm2i128 $0x31, %ymm7, %ymm6, %ymm15
+vpxor 32(%rsi), %ymm8, %ymm8
+vpxor 96(%rsi), %ymm9, %ymm9
+vpxor 160(%rsi), %ymm10, %ymm10
+vpxor 224(%rsi), %ymm11, %ymm11
+vpxor 288(%rsi), %ymm12, %ymm12
+vpxor 352(%rsi), %ymm13, %ymm13
+vpxor 416(%rsi), %ymm14, %ymm14
+vpxor 480(%rsi), %ymm15, %ymm15
+vmovdqu %ymm8, 32(%rdx)
+vmovdqu %ymm9, 96(%rdx)
+vmovdqu %ymm10, 160(%rdx)
+vmovdqu %ymm11, 224(%rdx)
+vmovdqu %ymm12, 288(%rdx)
+vmovdqu %ymm13, 352(%rdx)
+vmovdqu %ymm14, 416(%rdx)
+vmovdqu %ymm15, 480(%rdx)
+addq $512, %rsi
+jmp chacha_blocks_avx2_mainloop1_cont
+chacha_blocks_avx2_noinput1:
+vmovdqu %ymm8, 0(%rdx)
+vmovdqu %ymm9, 64(%rdx)
+vmovdqu %ymm10, 128(%rdx)
+vmovdqu %ymm11, 192(%rdx)
+vmovdqu %ymm12, 256(%rdx)
+vmovdqu %ymm13, 320(%rdx)
+vmovdqu %ymm14, 384(%rdx)
+vmovdqu %ymm15, 448(%rdx)
+vmovdqa 192(%rsp), %ymm0
+vmovdqa 224(%rsp), %ymm1
+vmovdqa 256(%rsp), %ymm2
+vmovdqa 288(%rsp), %ymm3
+vmovdqa 320(%rsp), %ymm4
+vmovdqa 352(%rsp), %ymm5
+vmovdqa 384(%rsp), %ymm6
+vmovdqa 416(%rsp), %ymm7
+vpbroadcastd 32(%rsp), %ymm8
+vpbroadcastd 4+32(%rsp), %ymm9
+vpbroadcastd 8+32(%rsp), %ymm10
+vpbroadcastd 12+32(%rsp), %ymm11
+vmovdqa 128(%rsp), %ymm12
+vmovdqa 160(%rsp), %ymm13
+vpbroadcastd 8+48(%rsp), %ymm14
+vpbroadcastd 12+48(%rsp), %ymm15
+vpaddd %ymm8, %ymm0, %ymm0
+vpaddd %ymm9, %ymm1, %ymm1
+vpaddd %ymm10, %ymm2, %ymm2
+vpaddd %ymm11, %ymm3, %ymm3
+vpaddd %ymm12, %ymm4, %ymm4
+vpaddd %ymm13, %ymm5, %ymm5
+vpaddd %ymm14, %ymm6, %ymm6
+vpaddd %ymm15, %ymm7, %ymm7
+vpunpckldq %ymm1, %ymm0, %ymm8
+vpunpckldq %ymm3, %ymm2, %ymm9
+vpunpckhdq %ymm1, %ymm0, %ymm12
+vpunpckhdq %ymm3, %ymm2, %ymm13
+vpunpckldq %ymm5, %ymm4, %ymm10
+vpunpckldq %ymm7, %ymm6, %ymm11
+vpunpckhdq %ymm5, %ymm4, %ymm14
+vpunpckhdq %ymm7, %ymm6, %ymm15
+vpunpcklqdq %ymm9, %ymm8, %ymm0
+vpunpcklqdq %ymm11, %ymm10, %ymm1
+vpunpckhqdq %ymm9, %ymm8, %ymm2
+vpunpckhqdq %ymm11, %ymm10, %ymm3
+vpunpcklqdq %ymm13, %ymm12, %ymm4
+vpunpcklqdq %ymm15, %ymm14, %ymm5
+vpunpckhqdq %ymm13, %ymm12, %ymm6
+vpunpckhqdq %ymm15, %ymm14, %ymm7
+vperm2i128 $0x20, %ymm1, %ymm0, %ymm8
+vperm2i128 $0x20, %ymm3, %ymm2, %ymm9
+vperm2i128 $0x31, %ymm1, %ymm0, %ymm12
+vperm2i128 $0x31, %ymm3, %ymm2, %ymm13
+vperm2i128 $0x20, %ymm5, %ymm4, %ymm10
+vperm2i128 $0x20, %ymm7, %ymm6, %ymm11
+vperm2i128 $0x31, %ymm5, %ymm4, %ymm14
+vperm2i128 $0x31, %ymm7, %ymm6, %ymm15
+vmovdqu %ymm8, 32(%rdx)
+vmovdqu %ymm9, 96(%rdx)
+vmovdqu %ymm10, 160(%rdx)
+vmovdqu %ymm11, 224(%rdx)
+vmovdqu %ymm12, 288(%rdx)
+vmovdqu %ymm13, 352(%rdx)
+vmovdqu %ymm14, 416(%rdx)
+vmovdqu %ymm15, 480(%rdx)
+chacha_blocks_avx2_mainloop1_cont:
+addq $512, %rdx
+subq $512, %rcx
+cmp $512, %rcx
+jae chacha_blocks_avx2_atleast512
+cmp $256, %rcx
+jb chacha_blocks_avx2_below256_fixup
+chacha_blocks_avx2_atleast256:
+movq 48(%rsp), %rax
+leaq 1(%rax), %r8
+leaq 2(%rax), %r9
+leaq 3(%rax), %r10
+leaq 4(%rax), %rbx
+movl %eax, 128(%rsp)
+movl %r8d, 4+128(%rsp)
+movl %r9d, 8+128(%rsp)
+movl %r10d, 12+128(%rsp)
+shrq $32, %rax
+shrq $32, %r8
+shrq $32, %r9
+shrq $32, %r10
+movl %eax, 160(%rsp)
+movl %r8d, 4+160(%rsp)
+movl %r9d, 8+160(%rsp)
+movl %r10d, 12+160(%rsp)
+movq %rbx, 48(%rsp)
+movq 64(%rsp), %rax
+vpbroadcastd 0(%rsp), %xmm0
+vpbroadcastd 4+0(%rsp), %xmm1
+vpbroadcastd 8+0(%rsp), %xmm2
+vpbroadcastd 12+0(%rsp), %xmm3
+vpbroadcastd 16(%rsp), %xmm4
+vpbroadcastd 4+16(%rsp), %xmm5
+vpbroadcastd 8+16(%rsp), %xmm6
+vpbroadcastd 12+16(%rsp), %xmm7
+vpbroadcastd 32(%rsp), %xmm8
+vpbroadcastd 4+32(%rsp), %xmm9
+vpbroadcastd 8+32(%rsp), %xmm10
+vpbroadcastd 12+32(%rsp), %xmm11
+vmovdqa 128(%rsp), %xmm12
+vmovdqa 160(%rsp), %xmm13
+vpbroadcastd 8+48(%rsp), %xmm14
+vpbroadcastd 12+48(%rsp), %xmm15
+chacha_blocks_avx2_mainloop2:
+vpaddd %xmm0, %xmm4, %xmm0
+vpaddd %xmm1, %xmm5, %xmm1
+vpxor %xmm12, %xmm0, %xmm12
+vpxor %xmm13, %xmm1, %xmm13
+vpaddd %xmm2, %xmm6, %xmm2
+vpaddd %xmm3, %xmm7, %xmm3
+vpxor %xmm14, %xmm2, %xmm14
+vpxor %xmm15, %xmm3, %xmm15
+vpshufb 448(%rsp), %xmm12, %xmm12
+vpshufb 448(%rsp), %xmm13, %xmm13
+vpaddd %xmm8, %xmm12, %xmm8
+vpaddd %xmm9, %xmm13, %xmm9
+vpshufb 448(%rsp), %xmm14, %xmm14
+vpshufb 448(%rsp), %xmm15, %xmm15
+vpaddd %xmm10, %xmm14, %xmm10
+vpaddd %xmm11, %xmm15, %xmm11
+vmovdqa %xmm12, 96(%rsp)
+vpxor %xmm4, %xmm8, %xmm4
+vpxor %xmm5, %xmm9, %xmm5
+vpslld $ 12, %xmm4, %xmm12
+vpsrld $20, %xmm4, %xmm4
+vpxor %xmm4, %xmm12, %xmm4
+vpslld $ 12, %xmm5, %xmm12
+vpsrld $20, %xmm5, %xmm5
+vpxor %xmm5, %xmm12, %xmm5
+vpxor %xmm6, %xmm10, %xmm6
+vpxor %xmm7, %xmm11, %xmm7
+vpslld $ 12, %xmm6, %xmm12
+vpsrld $20, %xmm6, %xmm6
+vpxor %xmm6, %xmm12, %xmm6
+vpslld $ 12, %xmm7, %xmm12
+vpsrld $20, %xmm7, %xmm7
+vpxor %xmm7, %xmm12, %xmm7
+vpaddd %xmm0, %xmm4, %xmm0
+vpaddd %xmm1, %xmm5, %xmm1
+vpxor 96(%rsp), %xmm0, %xmm12
+vpxor %xmm13, %xmm1, %xmm13
+vpaddd %xmm2, %xmm6, %xmm2
+vpaddd %xmm3, %xmm7, %xmm3
+vpxor %xmm14, %xmm2, %xmm14
+vpxor %xmm15, %xmm3, %xmm15
+vpshufb 480(%rsp), %xmm12, %xmm12
+vpshufb 480(%rsp), %xmm13, %xmm13
+vpaddd %xmm8, %xmm12, %xmm8
+vpaddd %xmm9, %xmm13, %xmm9
+vpshufb 480(%rsp), %xmm14, %xmm14
+vpshufb 480(%rsp), %xmm15, %xmm15
+vpaddd %xmm10, %xmm14, %xmm10
+vpaddd %xmm11, %xmm15, %xmm11
+vmovdqa %xmm12, 96(%rsp)
+vpxor %xmm4, %xmm8, %xmm4
+vpxor %xmm5, %xmm9, %xmm5
+vpslld $ 7, %xmm4, %xmm12
+vpsrld $25, %xmm4, %xmm4
+vpxor %xmm4, %xmm12, %xmm4
+vpslld $ 7, %xmm5, %xmm12
+vpsrld $25, %xmm5, %xmm5
+vpxor %xmm5, %xmm12, %xmm5
+vpxor %xmm6, %xmm10, %xmm6
+vpxor %xmm7, %xmm11, %xmm7
+vpslld $ 7, %xmm6, %xmm12
+vpsrld $25, %xmm6, %xmm6
+vpxor %xmm6, %xmm12, %xmm6
+vpslld $ 7, %xmm7, %xmm12
+vpsrld $25, %xmm7, %xmm7
+vpxor %xmm7, %xmm12, %xmm7
+vpaddd %xmm0, %xmm5, %xmm0
+vpaddd %xmm1, %xmm6, %xmm1
+vpxor %xmm15, %xmm0, %xmm15
+vpxor 96(%rsp), %xmm1, %xmm12
+vpaddd %xmm2, %xmm7, %xmm2
+vpaddd %xmm3, %xmm4, %xmm3
+vpxor %xmm13, %xmm2, %xmm13
+vpxor %xmm14, %xmm3, %xmm14
+vpshufb 448(%rsp), %xmm15, %xmm15
+vpshufb 448(%rsp), %xmm12, %xmm12
+vpaddd %xmm10, %xmm15, %xmm10
+vpaddd %xmm11, %xmm12, %xmm11
+vpshufb 448(%rsp), %xmm13, %xmm13
+vpshufb 448(%rsp), %xmm14, %xmm14
+vpaddd %xmm8, %xmm13, %xmm8
+vpaddd %xmm9, %xmm14, %xmm9
+vmovdqa %xmm15, 96(%rsp)
+vpxor %xmm5, %xmm10, %xmm5
+vpxor %xmm6, %xmm11, %xmm6
+vpslld $ 12, %xmm5, %xmm15
+vpsrld $20, %xmm5, %xmm5
+vpxor %xmm5, %xmm15, %xmm5
+vpslld $ 12, %xmm6, %xmm15
+vpsrld $20, %xmm6, %xmm6
+vpxor %xmm6, %xmm15, %xmm6
+vpxor %xmm7, %xmm8, %xmm7
+vpxor %xmm4, %xmm9, %xmm4
+vpslld $ 12, %xmm7, %xmm15
+vpsrld $20, %xmm7, %xmm7
+vpxor %xmm7, %xmm15, %xmm7
+vpslld $ 12, %xmm4, %xmm15
+vpsrld $20, %xmm4, %xmm4
+vpxor %xmm4, %xmm15, %xmm4
+vpaddd %xmm0, %xmm5, %xmm0
+vpaddd %xmm1, %xmm6, %xmm1
+vpxor 96(%rsp), %xmm0, %xmm15
+vpxor %xmm12, %xmm1, %xmm12
+vpaddd %xmm2, %xmm7, %xmm2
+vpaddd %xmm3, %xmm4, %xmm3
+vpxor %xmm13, %xmm2, %xmm13
+vpxor %xmm14, %xmm3, %xmm14
+vpshufb 480(%rsp), %xmm15, %xmm15
+vpshufb 480(%rsp), %xmm12, %xmm12
+vpaddd %xmm10, %xmm15, %xmm10
+vpaddd %xmm11, %xmm12, %xmm11
+vpshufb 480(%rsp), %xmm13, %xmm13
+vpshufb 480(%rsp), %xmm14, %xmm14
+vpaddd %xmm8, %xmm13, %xmm8
+vpaddd %xmm9, %xmm14, %xmm9
+vmovdqa %xmm15, 96(%rsp)
+vpxor %xmm5, %xmm10, %xmm5
+vpxor %xmm6, %xmm11, %xmm6
+vpslld $ 7, %xmm5, %xmm15
+vpsrld $25, %xmm5, %xmm5
+vpxor %xmm5, %xmm15, %xmm5
+vpslld $ 7, %xmm6, %xmm15
+vpsrld $25, %xmm6, %xmm6
+vpxor %xmm6, %xmm15, %xmm6
+vpxor %xmm7, %xmm8, %xmm7
+vpxor %xmm4, %xmm9, %xmm4
+vpslld $ 7, %xmm7, %xmm15
+vpsrld $25, %xmm7, %xmm7
+vpxor %xmm7, %xmm15, %xmm7
+vpslld $ 7, %xmm4, %xmm15
+vpsrld $25, %xmm4, %xmm4
+vpxor %xmm4, %xmm15, %xmm4
+vmovdqa 96(%rsp), %xmm15
+subq $2, %rax
+jnz chacha_blocks_avx2_mainloop2
+vmovdqa %xmm8, 192(%rsp)
+vmovdqa %xmm9, 208(%rsp)
+vmovdqa %xmm10, 224(%rsp)
+vmovdqa %xmm11, 240(%rsp)
+vmovdqa %xmm12, 256(%rsp)
+vmovdqa %xmm13, 272(%rsp)
+vmovdqa %xmm14, 288(%rsp)
+vmovdqa %xmm15, 304(%rsp)
+vpbroadcastd 0(%rsp), %xmm8
+vpbroadcastd 4+0(%rsp), %xmm9
+vpbroadcastd 8+0(%rsp), %xmm10
+vpbroadcastd 12+0(%rsp), %xmm11
+vpbroadcastd 16(%rsp), %xmm12
+vpbroadcastd 4+16(%rsp), %xmm13
+vpbroadcastd 8+16(%rsp), %xmm14
+vpbroadcastd 12+16(%rsp), %xmm15
+vpaddd %xmm8, %xmm0, %xmm0
+vpaddd %xmm9, %xmm1, %xmm1
+vpaddd %xmm10, %xmm2, %xmm2
+vpaddd %xmm11, %xmm3, %xmm3
+vpaddd %xmm12, %xmm4, %xmm4
+vpaddd %xmm13, %xmm5, %xmm5
+vpaddd %xmm14, %xmm6, %xmm6
+vpaddd %xmm15, %xmm7, %xmm7
+vpunpckldq %xmm1, %xmm0, %xmm8
+vpunpckldq %xmm3, %xmm2, %xmm9
+vpunpckhdq %xmm1, %xmm0, %xmm12
+vpunpckhdq %xmm3, %xmm2, %xmm13
+vpunpckldq %xmm5, %xmm4, %xmm10
+vpunpckldq %xmm7, %xmm6, %xmm11
+vpunpckhdq %xmm5, %xmm4, %xmm14
+vpunpckhdq %xmm7, %xmm6, %xmm15
+vpunpcklqdq %xmm9, %xmm8, %xmm0
+vpunpcklqdq %xmm11, %xmm10, %xmm1
+vpunpckhqdq %xmm9, %xmm8, %xmm2
+vpunpckhqdq %xmm11, %xmm10, %xmm3
+vpunpcklqdq %xmm13, %xmm12, %xmm4
+vpunpcklqdq %xmm15, %xmm14, %xmm5
+vpunpckhqdq %xmm13, %xmm12, %xmm6
+vpunpckhqdq %xmm15, %xmm14, %xmm7
+andq %rsi, %rsi
+jz chacha_blocks_avx2_noinput2
+vpxor 0(%rsi), %xmm0, %xmm0
+vpxor 16(%rsi), %xmm1, %xmm1
+vpxor 64(%rsi), %xmm2, %xmm2
+vpxor 80(%rsi), %xmm3, %xmm3
+vpxor 128(%rsi), %xmm4, %xmm4
+vpxor 144(%rsi), %xmm5, %xmm5
+vpxor 192(%rsi), %xmm6, %xmm6
+vpxor 208(%rsi), %xmm7, %xmm7
+vmovdqu %xmm0, 0(%rdx)
+vmovdqu %xmm1, 16(%rdx)
+vmovdqu %xmm2, 64(%rdx)
+vmovdqu %xmm3, 80(%rdx)
+vmovdqu %xmm4, 128(%rdx)
+vmovdqu %xmm5, 144(%rdx)
+vmovdqu %xmm6, 192(%rdx)
+vmovdqu %xmm7, 208(%rdx)
+vmovdqa 192(%rsp), %xmm0
+vmovdqa 208(%rsp), %xmm1
+vmovdqa 224(%rsp), %xmm2
+vmovdqa 240(%rsp), %xmm3
+vmovdqa 256(%rsp), %xmm4
+vmovdqa 272(%rsp), %xmm5
+vmovdqa 288(%rsp), %xmm6
+vmovdqa 304(%rsp), %xmm7
+vpbroadcastd 32(%rsp), %xmm8
+vpbroadcastd 4+32(%rsp), %xmm9
+vpbroadcastd 8+32(%rsp), %xmm10
+vpbroadcastd 12+32(%rsp), %xmm11
+vmovdqa 128(%rsp), %xmm12
+vmovdqa 160(%rsp), %xmm13
+vpbroadcastd 8+48(%rsp), %xmm14
+vpbroadcastd 12+48(%rsp), %xmm15
+vpaddd %xmm8, %xmm0, %xmm0
+vpaddd %xmm9, %xmm1, %xmm1
+vpaddd %xmm10, %xmm2, %xmm2
+vpaddd %xmm11, %xmm3, %xmm3
+vpaddd %xmm12, %xmm4, %xmm4
+vpaddd %xmm13, %xmm5, %xmm5
+vpaddd %xmm14, %xmm6, %xmm6
+vpaddd %xmm15, %xmm7, %xmm7
+vpunpckldq %xmm1, %xmm0, %xmm8
+vpunpckldq %xmm3, %xmm2, %xmm9
+vpunpckhdq %xmm1, %xmm0, %xmm12
+vpunpckhdq %xmm3, %xmm2, %xmm13
+vpunpckldq %xmm5, %xmm4, %xmm10
+vpunpckldq %xmm7, %xmm6, %xmm11
+vpunpckhdq %xmm5, %xmm4, %xmm14
+vpunpckhdq %xmm7, %xmm6, %xmm15
+vpunpcklqdq %xmm9, %xmm8, %xmm0
+vpunpcklqdq %xmm11, %xmm10, %xmm1
+vpunpckhqdq %xmm9, %xmm8, %xmm2
+vpunpckhqdq %xmm11, %xmm10, %xmm3
+vpunpcklqdq %xmm13, %xmm12, %xmm4
+vpunpcklqdq %xmm15, %xmm14, %xmm5
+vpunpckhqdq %xmm13, %xmm12, %xmm6
+vpunpckhqdq %xmm15, %xmm14, %xmm7
+vpxor 32(%rsi), %xmm0, %xmm0
+vpxor 48(%rsi), %xmm1, %xmm1
+vpxor 96(%rsi), %xmm2, %xmm2
+vpxor 112(%rsi), %xmm3, %xmm3
+vpxor 160(%rsi), %xmm4, %xmm4
+vpxor 176(%rsi), %xmm5, %xmm5
+vpxor 224(%rsi), %xmm6, %xmm6
+vpxor 240(%rsi), %xmm7, %xmm7
+vmovdqu %xmm0, 32(%rdx)
+vmovdqu %xmm1, 48(%rdx)
+vmovdqu %xmm2, 96(%rdx)
+vmovdqu %xmm3, 112(%rdx)
+vmovdqu %xmm4, 160(%rdx)
+vmovdqu %xmm5, 176(%rdx)
+vmovdqu %xmm6, 224(%rdx)
+vmovdqu %xmm7, 240(%rdx)
+addq $256, %rsi
+jmp chacha_blocks_avx2_mainloop2_cont
+chacha_blocks_avx2_noinput2:
+vmovdqu %xmm0, 0(%rdx)
+vmovdqu %xmm1, 16(%rdx)
+vmovdqu %xmm2, 64(%rdx)
+vmovdqu %xmm3, 80(%rdx)
+vmovdqu %xmm4, 128(%rdx)
+vmovdqu %xmm5, 144(%rdx)
+vmovdqu %xmm6, 192(%rdx)
+vmovdqu %xmm7, 208(%rdx)
+vmovdqa 192(%rsp), %xmm0
+vmovdqa 208(%rsp), %xmm1
+vmovdqa 224(%rsp), %xmm2
+vmovdqa 240(%rsp), %xmm3
+vmovdqa 256(%rsp), %xmm4
+vmovdqa 272(%rsp), %xmm5
+vmovdqa 288(%rsp), %xmm6
+vmovdqa 304(%rsp), %xmm7
+vpbroadcastd 32(%rsp), %xmm8
+vpbroadcastd 4+32(%rsp), %xmm9
+vpbroadcastd 8+32(%rsp), %xmm10
+vpbroadcastd 12+32(%rsp), %xmm11
+vmovdqa 128(%rsp), %xmm12
+vmovdqa 160(%rsp), %xmm13
+vpbroadcastd 8+48(%rsp), %xmm14
+vpbroadcastd 12+48(%rsp), %xmm15
+vpaddd %xmm8, %xmm0, %xmm0
+vpaddd %xmm9, %xmm1, %xmm1
+vpaddd %xmm10, %xmm2, %xmm2
+vpaddd %xmm11, %xmm3, %xmm3
+vpaddd %xmm12, %xmm4, %xmm4
+vpaddd %xmm13, %xmm5, %xmm5
+vpaddd %xmm14, %xmm6, %xmm6
+vpaddd %xmm15, %xmm7, %xmm7
+vpunpckldq %xmm1, %xmm0, %xmm8
+vpunpckldq %xmm3, %xmm2, %xmm9
+vpunpckhdq %xmm1, %xmm0, %xmm12
+vpunpckhdq %xmm3, %xmm2, %xmm13
+vpunpckldq %xmm5, %xmm4, %xmm10
+vpunpckldq %xmm7, %xmm6, %xmm11
+vpunpckhdq %xmm5, %xmm4, %xmm14
+vpunpckhdq %xmm7, %xmm6, %xmm15
+vpunpcklqdq %xmm9, %xmm8, %xmm0
+vpunpcklqdq %xmm11, %xmm10, %xmm1
+vpunpckhqdq %xmm9, %xmm8, %xmm2
+vpunpckhqdq %xmm11, %xmm10, %xmm3
+vpunpcklqdq %xmm13, %xmm12, %xmm4
+vpunpcklqdq %xmm15, %xmm14, %xmm5
+vpunpckhqdq %xmm13, %xmm12, %xmm6
+vpunpckhqdq %xmm15, %xmm14, %xmm7
+vmovdqu %xmm0, 32(%rdx)
+vmovdqu %xmm1, 48(%rdx)
+vmovdqu %xmm2, 96(%rdx)
+vmovdqu %xmm3, 112(%rdx)
+vmovdqu %xmm4, 160(%rdx)
+vmovdqu %xmm5, 176(%rdx)
+vmovdqu %xmm6, 224(%rdx)
+vmovdqu %xmm7, 240(%rdx)
+chacha_blocks_avx2_mainloop2_cont:
+addq $256, %rdx
+subq $256, %rcx
+cmp $256, %rcx
+jae chacha_blocks_avx2_atleast256
+chacha_blocks_avx2_below256_fixup:
+vmovdqa 448(%rsp), %xmm6
+vmovdqa 480(%rsp), %xmm7
+vmovdqa 0(%rsp), %xmm8
+vmovdqa 16(%rsp), %xmm9
+vmovdqa 32(%rsp), %xmm10
+vmovdqa 48(%rsp), %xmm11
+movq $1, %r9
+chacha_blocks_avx2_below256:
+vmovq %r9, %xmm5
+andq %rcx, %rcx
+jz chacha_blocks_avx2_done
+cmpq $64, %rcx
+jae chacha_blocks_avx2_above63
+movq %rdx, %r9
+andq %rsi, %rsi
+jz chacha_blocks_avx2_noinput3
+movq %rcx, %r10
+movq %rsp, %rdx
+addq %r10, %rsi
+addq %r10, %rdx
+negq %r10
+chacha_blocks_avx2_copyinput:
+movb (%rsi, %r10), %al
+movb %al, (%rdx, %r10)
+incq %r10
+jnz chacha_blocks_avx2_copyinput
+movq %rsp, %rsi
+chacha_blocks_avx2_noinput3:
+movq %rsp, %rdx
+chacha_blocks_avx2_above63:
+vmovdqa %xmm8, %xmm0
+vmovdqa %xmm9, %xmm1
+vmovdqa %xmm10, %xmm2
+vmovdqa %xmm11, %xmm3
+movq 64(%rsp), %rax
+chacha_blocks_avx2_mainloop3:
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm6, %xmm3, %xmm3
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpslld $12, %xmm1, %xmm4
+vpsrld $20, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm7, %xmm3, %xmm3
+vpshufd $0x93, %xmm0, %xmm0
+vpaddd %xmm2, %xmm3, %xmm2
+vpshufd $0x4e, %xmm3, %xmm3
+vpxor %xmm1, %xmm2, %xmm1
+vpshufd $0x39, %xmm2, %xmm2
+vpslld $7, %xmm1, %xmm4
+vpsrld $25, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm6, %xmm3, %xmm3
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpslld $12, %xmm1, %xmm4
+vpsrld $20, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm7, %xmm3, %xmm3
+vpshufd $0x39, %xmm0, %xmm0
+vpaddd %xmm2, %xmm3, %xmm2
+vpshufd $0x4e, %xmm3, %xmm3
+vpxor %xmm1, %xmm2, %xmm1
+vpshufd $0x93, %xmm2, %xmm2
+vpslld $7, %xmm1, %xmm4
+vpsrld $25, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+subq $2, %rax
+jnz chacha_blocks_avx2_mainloop3
+vpaddd %xmm0, %xmm8, %xmm0
+vpaddd %xmm1, %xmm9, %xmm1
+vpaddd %xmm2, %xmm10, %xmm2
+vpaddd %xmm3, %xmm11, %xmm3
+andq %rsi, %rsi
+jz chacha_blocks_avx2_noinput4
+vpxor 0(%rsi), %xmm0, %xmm0
+vpxor 16(%rsi), %xmm1, %xmm1
+vpxor 32(%rsi), %xmm2, %xmm2
+vpxor 48(%rsi), %xmm3, %xmm3
+addq $64, %rsi
+chacha_blocks_avx2_noinput4:
+vmovdqu %xmm0, 0(%rdx)
+vmovdqu %xmm1, 16(%rdx)
+vmovdqu %xmm2, 32(%rdx)
+vmovdqu %xmm3, 48(%rdx)
+vpaddq %xmm11, %xmm5, %xmm11
+cmpq $64, %rcx
+jbe chacha_blocks_avx2_mainloop3_finishup
+addq $64, %rdx
+subq $64, %rcx
+jmp chacha_blocks_avx2_below256
+chacha_blocks_avx2_mainloop3_finishup:
+cmpq $64, %rcx
+je chacha_blocks_avx2_done
+addq %rcx, %r9
+addq %rcx, %rdx
+negq %rcx
+chacha_blocks_avx2_copyoutput:
+movb (%rdx, %rcx), %al
+movb %al, (%r9, %rcx)
+incq %rcx
+jnz chacha_blocks_avx2_copyoutput
+chacha_blocks_avx2_done:
+vmovdqa %xmm11, 32(%rdi)
+movq %rbp, %rsp
+popq %r14
+popq %r13
+popq %r12
+popq %rbp
+popq %rbx
+vzeroupper
+ret
+ENDFN(chacha_blocks)
+
+
+FN(hchacha)
+hchacha_avx2_local:
+leaq C(%rip), %rax
+vmovdqa 0(%rax), %xmm0
+vmovdqa 16(%rax), %xmm6
+vmovdqa 32(%rax), %xmm5
+vmovdqu 0(%rdi), %xmm1
+vmovdqu 16(%rdi), %xmm2
+vmovdqu 0(%rsi), %xmm3
+hhacha_mainloop_avx2:
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm6, %xmm3, %xmm3
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpslld $12, %xmm1, %xmm4
+vpsrld $20, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm5, %xmm3, %xmm3
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpslld $7, %xmm1, %xmm4
+vpsrld $25, %xmm1, %xmm1
+vpshufd $0x93, %xmm0, %xmm0
+vpxor %xmm1, %xmm4, %xmm1
+vpshufd $0x4e, %xmm3, %xmm3
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm6, %xmm3, %xmm3
+vpshufd $0x39, %xmm2, %xmm2
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpslld $12, %xmm1, %xmm4
+vpsrld $20, %xmm1, %xmm1
+vpxor %xmm1, %xmm4, %xmm1
+vpaddd %xmm0, %xmm1, %xmm0
+vpxor %xmm3, %xmm0, %xmm3
+vpshufb %xmm5, %xmm3, %xmm3
+vpaddd %xmm2, %xmm3, %xmm2
+vpxor %xmm1, %xmm2, %xmm1
+vpshufd $0x39, %xmm0, %xmm0
+vpslld $7, %xmm1, %xmm4
+vpshufd $0x4e, %xmm3, %xmm3
+vpsrld $25, %xmm1, %xmm1
+vpshufd $0x93, %xmm2, %xmm2
+vpxor %xmm1, %xmm4, %xmm1
+subl $2, %ecx
+jne hhacha_mainloop_avx2
+vmovdqu %xmm0, (%rdx)
+vmovdqu %xmm3, 16(%rdx)
+ret
+ENDFN(hchacha)
+
+FN(chacha)
+pushq %rbp
+movq %rsp, %rbp
+subq $64, %rsp
+andq $~63, %rsp
+vmovdqu 0(%rdi), %xmm0
+vmovdqu 16(%rdi), %xmm1
+vmovdqa %xmm0, 0(%rsp)
+vmovdqa %xmm1, 16(%rsp)
+xorq %rdi, %rdi
+movq %rdi, 32(%rsp)
+movq 0(%rsi), %rsi
+movq %rsi, 40(%rsp)
+movq %r9, 48(%rsp)
+movq %rsp, %rdi
+movq %rdx, %rsi
+movq %rcx, %rdx
+movq %r8, %rcx
+call chacha_blocks_avx2_local
+vpxor %xmm0, %xmm0, %xmm0
+vmovdqa %xmm0, 0(%rsp)
+vmovdqa %xmm0, 16(%rsp)
+vmovdqa %xmm0, 32(%rsp)
+movq %rbp, %rsp
+popq %rbp
+ret
+ENDFN(chacha)
+
+FN(xchacha)
+pushq %rbp
+pushq %rbx
+movq %rsp, %rbp
+subq $64, %rsp
+andq $~63, %rsp
+movq %rsp, %rbx
+xorq %rax, %rax
+movq %rax, 32(%rbx)
+movq 16(%rsi), %rax
+movq %rax, 40(%rbx)
+movq %r9, 48(%rbx)
+pushq %rdx
+pushq %rcx
+pushq %r8
+movq %rbx, %rdx
+movq %r9, %rcx
+call hchacha_avx2_local
+movq %rbx, %rdi
+popq %rcx
+popq %rdx
+popq %rsi
+call chacha_blocks_avx2_local
+vpxor %xmm0, %xmm0, %xmm0
+vmovdqa %xmm0, 0(%rbx)
+vmovdqa %xmm0, 16(%rbx)
+vmovdqa %xmm0, 32(%rbx)
+movq %rbp, %rsp
+popq %rbx
+popq %rbp
+ret
+ENDFN(xchacha)
+
+
+.section .rodata, "a"
+.p2align 4,,15
+C:
+.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 /* "expand 32-byte k" */
+.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13       /* pshufb rotate by 16 */
+.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14       /* pshufb rotate by 8 */
+
+ENDFILE()
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/consts.c b/crypt/liboqs/kex_rlwe_newhope/avx2/consts.c
new file mode 100644
index 0000000000000000000000000000000000000000..96c7d36a6c8b6a1ff479a7ce97f7c4f06ff3c9c2
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/consts.c
@@ -0,0 +1,19 @@
+#include <stdint.h>
+#include "params.h"
+
+uint8_t mask1[32] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+uint32_t vrshiftsx8[8] = {0,1,2,3,4,5,6,7};
+uint32_t maskffff[8] = {0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff};
+uint16_t maskff[16] = {0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff};
+
+double q8[4] = {PARAM_Q, PARAM_Q, PARAM_Q, PARAM_Q};
+uint32_t q8x[8] = {PARAM_Q, PARAM_Q, PARAM_Q, PARAM_Q, PARAM_Q, PARAM_Q, PARAM_Q, PARAM_Q};
+uint32_t v1x8[8] = {1,1,1,1,1,1,1,1};
+uint32_t v3x8[8] = {3,3,3,3,3,3,3,3};
+uint32_t v2730x8[8] = {2730,2730,2730,2730,2730,2730,2730,2730};
+
+
+double qinv16[4] = {.00008137358613394092,.00008137358613394092,.00008137358613394092,.00008137358613394092};
+double neg2[4] = {1.,-1.,1.,-1.};
+double neg4[4] = {1.,1.,-1.,-1.};
+
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/cpucycles.c b/crypt/liboqs/kex_rlwe_newhope/avx2/cpucycles.c
new file mode 100644
index 0000000000000000000000000000000000000000..e6803cef5d2f16d06ed1e303a369dda4ea5cfb08
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/cpucycles.c
@@ -0,0 +1,9 @@
+#include "cpucycles.h"
+
+long long cpucycles(void)
+{
+  unsigned long long result;
+  asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax"
+    : "=a" (result) ::  "%rdx");
+  return result;
+}
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/cpucycles.h b/crypt/liboqs/kex_rlwe_newhope/avx2/cpucycles.h
new file mode 100644
index 0000000000000000000000000000000000000000..7aac8a45d05105cc8b426415fd5938140caa0217
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/cpucycles.h
@@ -0,0 +1,6 @@
+#ifndef CPUCYCLES_H
+#define CPUCYCLES_H
+
+long long cpucycles(void);
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_hash_sha256.c b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_hash_sha256.c
new file mode 100644
index 0000000000000000000000000000000000000000..48159c22899ab3fc125a66eb7210d17b89ebb455
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_hash_sha256.c
@@ -0,0 +1,278 @@
+/*
+20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#define blocks crypto_hashblocks_sha256
+
+typedef unsigned int uint32;
+
+static uint32 load_bigendian(const unsigned char *x)
+{
+  return
+      (uint32) (x[3]) \
+  | (((uint32) (x[2])) << 8) \
+  | (((uint32) (x[1])) << 16) \
+  | (((uint32) (x[0])) << 24)
+  ;
+}
+
+static void store_bigendian(unsigned char *x,uint32 u)
+{
+  x[3] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[0] = u;
+}
+
+#define SHR(x,c) ((x) >> (c))
+#define ROTR(x,c) (((x) >> (c)) | ((x) << (32 - (c))))
+
+#define Ch(x,y,z) ((x & y) ^ (~x & z))
+#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
+#define Sigma0(x) (ROTR(x, 2) ^ ROTR(x,13) ^ ROTR(x,22))
+#define Sigma1(x) (ROTR(x, 6) ^ ROTR(x,11) ^ ROTR(x,25))
+#define sigma0(x) (ROTR(x, 7) ^ ROTR(x,18) ^ SHR(x, 3))
+#define sigma1(x) (ROTR(x,17) ^ ROTR(x,19) ^ SHR(x,10))
+
+#define M(w0,w14,w9,w1) w0 = sigma1(w14) + w9 + sigma0(w1) + w0;
+
+#define EXPAND \
+  M(w0 ,w14,w9 ,w1 ) \
+  M(w1 ,w15,w10,w2 ) \
+  M(w2 ,w0 ,w11,w3 ) \
+  M(w3 ,w1 ,w12,w4 ) \
+  M(w4 ,w2 ,w13,w5 ) \
+  M(w5 ,w3 ,w14,w6 ) \
+  M(w6 ,w4 ,w15,w7 ) \
+  M(w7 ,w5 ,w0 ,w8 ) \
+  M(w8 ,w6 ,w1 ,w9 ) \
+  M(w9 ,w7 ,w2 ,w10) \
+  M(w10,w8 ,w3 ,w11) \
+  M(w11,w9 ,w4 ,w12) \
+  M(w12,w10,w5 ,w13) \
+  M(w13,w11,w6 ,w14) \
+  M(w14,w12,w7 ,w15) \
+  M(w15,w13,w8 ,w0 )
+
+#define F(w,k) \
+  T1 = h + Sigma1(e) + Ch(e,f,g) + k + w; \
+  T2 = Sigma0(a) + Maj(a,b,c); \
+  h = g; \
+  g = f; \
+  f = e; \
+  e = d + T1; \
+  d = c; \
+  c = b; \
+  b = a; \
+  a = T1 + T2;
+
+static int crypto_hashblocks_sha256(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen)
+{
+  uint32 state[8];
+  uint32 a;
+  uint32 b;
+  uint32 c;
+  uint32 d;
+  uint32 e;
+  uint32 f;
+  uint32 g;
+  uint32 h;
+  uint32 T1;
+  uint32 T2;
+
+  a = load_bigendian(statebytes +  0); state[0] = a;
+  b = load_bigendian(statebytes +  4); state[1] = b;
+  c = load_bigendian(statebytes +  8); state[2] = c;
+  d = load_bigendian(statebytes + 12); state[3] = d;
+  e = load_bigendian(statebytes + 16); state[4] = e;
+  f = load_bigendian(statebytes + 20); state[5] = f;
+  g = load_bigendian(statebytes + 24); state[6] = g;
+  h = load_bigendian(statebytes + 28); state[7] = h;
+
+  while (inlen >= 64) {
+    uint32 w0  = load_bigendian(in +  0);
+    uint32 w1  = load_bigendian(in +  4);
+    uint32 w2  = load_bigendian(in +  8);
+    uint32 w3  = load_bigendian(in + 12);
+    uint32 w4  = load_bigendian(in + 16);
+    uint32 w5  = load_bigendian(in + 20);
+    uint32 w6  = load_bigendian(in + 24);
+    uint32 w7  = load_bigendian(in + 28);
+    uint32 w8  = load_bigendian(in + 32);
+    uint32 w9  = load_bigendian(in + 36);
+    uint32 w10 = load_bigendian(in + 40);
+    uint32 w11 = load_bigendian(in + 44);
+    uint32 w12 = load_bigendian(in + 48);
+    uint32 w13 = load_bigendian(in + 52);
+    uint32 w14 = load_bigendian(in + 56);
+    uint32 w15 = load_bigendian(in + 60);
+
+    F(w0 ,0x428a2f98)
+    F(w1 ,0x71374491)
+    F(w2 ,0xb5c0fbcf)
+    F(w3 ,0xe9b5dba5)
+    F(w4 ,0x3956c25b)
+    F(w5 ,0x59f111f1)
+    F(w6 ,0x923f82a4)
+    F(w7 ,0xab1c5ed5)
+    F(w8 ,0xd807aa98)
+    F(w9 ,0x12835b01)
+    F(w10,0x243185be)
+    F(w11,0x550c7dc3)
+    F(w12,0x72be5d74)
+    F(w13,0x80deb1fe)
+    F(w14,0x9bdc06a7)
+    F(w15,0xc19bf174)
+
+    EXPAND
+
+    F(w0 ,0xe49b69c1)
+    F(w1 ,0xefbe4786)
+    F(w2 ,0x0fc19dc6)
+    F(w3 ,0x240ca1cc)
+    F(w4 ,0x2de92c6f)
+    F(w5 ,0x4a7484aa)
+    F(w6 ,0x5cb0a9dc)
+    F(w7 ,0x76f988da)
+    F(w8 ,0x983e5152)
+    F(w9 ,0xa831c66d)
+    F(w10,0xb00327c8)
+    F(w11,0xbf597fc7)
+    F(w12,0xc6e00bf3)
+    F(w13,0xd5a79147)
+    F(w14,0x06ca6351)
+    F(w15,0x14292967)
+
+    EXPAND
+
+    F(w0 ,0x27b70a85)
+    F(w1 ,0x2e1b2138)
+    F(w2 ,0x4d2c6dfc)
+    F(w3 ,0x53380d13)
+    F(w4 ,0x650a7354)
+    F(w5 ,0x766a0abb)
+    F(w6 ,0x81c2c92e)
+    F(w7 ,0x92722c85)
+    F(w8 ,0xa2bfe8a1)
+    F(w9 ,0xa81a664b)
+    F(w10,0xc24b8b70)
+    F(w11,0xc76c51a3)
+    F(w12,0xd192e819)
+    F(w13,0xd6990624)
+    F(w14,0xf40e3585)
+    F(w15,0x106aa070)
+
+    EXPAND
+
+    F(w0 ,0x19a4c116)
+    F(w1 ,0x1e376c08)
+    F(w2 ,0x2748774c)
+    F(w3 ,0x34b0bcb5)
+    F(w4 ,0x391c0cb3)
+    F(w5 ,0x4ed8aa4a)
+    F(w6 ,0x5b9cca4f)
+    F(w7 ,0x682e6ff3)
+    F(w8 ,0x748f82ee)
+    F(w9 ,0x78a5636f)
+    F(w10,0x84c87814)
+    F(w11,0x8cc70208)
+    F(w12,0x90befffa)
+    F(w13,0xa4506ceb)
+    F(w14,0xbef9a3f7)
+    F(w15,0xc67178f2)
+
+    a += state[0];
+    b += state[1];
+    c += state[2];
+    d += state[3];
+    e += state[4];
+    f += state[5];
+    g += state[6];
+    h += state[7];
+  
+    state[0] = a;
+    state[1] = b;
+    state[2] = c;
+    state[3] = d;
+    state[4] = e;
+    state[5] = f;
+    state[6] = g;
+    state[7] = h;
+
+    in += 64;
+    inlen -= 64;
+  }
+
+  store_bigendian(statebytes +  0,state[0]);
+  store_bigendian(statebytes +  4,state[1]);
+  store_bigendian(statebytes +  8,state[2]);
+  store_bigendian(statebytes + 12,state[3]);
+  store_bigendian(statebytes + 16,state[4]);
+  store_bigendian(statebytes + 20,state[5]);
+  store_bigendian(statebytes + 24,state[6]);
+  store_bigendian(statebytes + 28,state[7]);
+
+  return inlen;
+}
+
+static const char iv[32] = {
+  0x6a,0x09,0xe6,0x67,
+  0xbb,0x67,0xae,0x85,
+  0x3c,0x6e,0xf3,0x72,
+  0xa5,0x4f,0xf5,0x3a,
+  0x51,0x0e,0x52,0x7f,
+  0x9b,0x05,0x68,0x8c,
+  0x1f,0x83,0xd9,0xab,
+  0x5b,0xe0,0xcd,0x19,
+} ;
+
+int crypto_hash_sha256(unsigned char *out,const unsigned char *in,unsigned long long inlen)
+{
+  unsigned char h[32];
+  unsigned char padded[128];
+  unsigned long long i;
+  unsigned long long bits = inlen << 3;
+
+  for (i = 0;i < 32;++i) h[i] = iv[i];
+
+  blocks(h,in,inlen);
+  in += inlen;
+  inlen &= 63;
+  in -= inlen;
+
+  for (i = 0;i < inlen;++i) padded[i] = in[i];
+  padded[inlen] = 0x80;
+
+  if (inlen < 56) {
+    for (i = inlen + 1;i < 56;++i) padded[i] = 0;
+    padded[56] = bits >> 56;
+    padded[57] = bits >> 48;
+    padded[58] = bits >> 40;
+    padded[59] = bits >> 32;
+    padded[60] = bits >> 24;
+    padded[61] = bits >> 16;
+    padded[62] = bits >> 8;
+    padded[63] = bits;
+    blocks(h,padded,64);
+  } else {
+    for (i = inlen + 1;i < 120;++i) padded[i] = 0;
+    padded[120] = bits >> 56;
+    padded[121] = bits >> 48;
+    padded[122] = bits >> 40;
+    padded[123] = bits >> 32;
+    padded[124] = bits >> 24;
+    padded[125] = bits >> 16;
+    padded[126] = bits >> 8;
+    padded[127] = bits;
+    blocks(h,padded,128);
+  }
+
+  for (i = 0;i < 32;++i) out[i] = h[i];
+
+  return 0;
+}
+
+
+
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_hash_sha256.h b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_hash_sha256.h
new file mode 100644
index 0000000000000000000000000000000000000000..4717f0983666cc1c94fc46147144f32bb8355237
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_hash_sha256.h
@@ -0,0 +1,10 @@
+#ifndef CRYPTO_HASH_SHA256_H
+#define CRYPTO_HASH_SHA256_H
+
+int crypto_hashblocks_sha256(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen);
+
+int crypto_hash_sha256(unsigned char *out,const unsigned char *in,unsigned long long inlen);
+
+#define crypto_hash_sha256_BYTES 32
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream.h b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream.h
new file mode 100644
index 0000000000000000000000000000000000000000..aab80525ed832315fab036649d45e38c4e1006eb
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream.h
@@ -0,0 +1,16 @@
+#ifndef CRYPTO_STREAM_H
+#define CRYPTO_STREAM_H
+
+#ifdef TESTVECTORS
+  #include "crypto_stream_chacha20.h"
+  #define CRYPTO_STREAM_KEYBYTES 32
+  #define CRYPTO_STREAM_NONCEBYTES 8
+  #define crypto_stream crypto_stream_chacha20
+#else
+  #include "crypto_stream_aes256ctr.h"
+  #define CRYPTO_STREAM_KEYBYTES 32
+  #define CRYPTO_STREAM_NONCEBYTES 16
+  #define crypto_stream crypto_stream_aes256ctr
+#endif
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.c b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.c
new file mode 100644
index 0000000000000000000000000000000000000000..3b745514265ff75858bb07a76567269fe6d7d06c
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.c
@@ -0,0 +1,242 @@
+/*
+  aesenc-int.c version $Date: 2014/08/22 16:49:12 $
+  AES-CTR
+  Romain Dolbeau
+  Public Domain
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <immintrin.h>
+#include "crypto_stream_aes256ctr.h"
+
+#ifdef __INTEL_COMPILER
+#define ALIGN16 __declspec(align(16))
+#define ALIGN32 __declspec(align(32))
+#define ALIGN64 __declspec(align(64))
+#else // assume GCC
+#define ALIGN16  __attribute__((aligned(16)))
+#define ALIGN32  __attribute__((aligned(32)))
+#define ALIGN64  __attribute__((aligned(64)))
+#define _bswap64(a) __builtin_bswap64(a)
+#define _bswap(a) __builtin_bswap(a)
+#endif
+
+static inline void aesni_key256_expand(const unsigned char* key, __m128 rkeys[16]) {
+  __m128 key0 = _mm_loadu_ps((const float *)(key+0));
+  __m128 key1 = _mm_loadu_ps((const float *)(key+16));
+  __m128 temp0, temp1, temp2, temp4;
+  int idx = 0;
+
+  rkeys[idx++] = key0;
+  temp0 = key0;
+  temp2 = key1;
+  temp4 = _mm_setzero_ps();
+
+  /* why single precision floating-point rather than integer instructions ?
+     because _mm_shuffle_ps takes two inputs, while _mm_shuffle_epi32 only
+     takes one - it doesn't perform the same computation...
+     _mm_shuffle_ps takes the lower 64 bits of the result from the first
+     operand, and the higher 64 bits of the result from the second operand
+     (in both cases, all four input floats are accessible).
+     I don't like the non-orthogonal naming scheme :-(
+     
+     This is all strongly inspired by the openssl assembly code.
+  */
+#define BLOCK1(IMM)                                                     \
+  temp1 = (__m128)_mm_aeskeygenassist_si128((__m128i)temp2, IMM);       \
+  rkeys[idx++] = temp2;                                                 \
+  temp4 = _mm_shuffle_ps(temp4, temp0, 0x10);                           \
+  temp0 = _mm_xor_ps(temp0, temp4);                                     \
+  temp4 = _mm_shuffle_ps(temp4, temp0, 0x8c);                           \
+  temp0 = _mm_xor_ps(temp0, temp4);                                     \
+  temp1 = _mm_shuffle_ps(temp1, temp1, 0xff);                           \
+  temp0 = _mm_xor_ps(temp0, temp1)
+  
+#define BLOCK2(IMM)                                                     \
+  temp1 = (__m128)_mm_aeskeygenassist_si128((__m128i)temp0, IMM);       \
+  rkeys[idx++] = temp0;                                                 \
+  temp4 = _mm_shuffle_ps(temp4, temp2, 0x10);                           \
+  temp2 = _mm_xor_ps(temp2, temp4);                                     \
+  temp4 = _mm_shuffle_ps(temp4, temp2, 0x8c);                           \
+  temp2 = _mm_xor_ps(temp2, temp4);                                     \
+  temp1 = _mm_shuffle_ps(temp1, temp1, 0xaa);                           \
+  temp2 = _mm_xor_ps(temp2, temp1)
+  
+  BLOCK1(0x01);
+  BLOCK2(0x01);
+
+  BLOCK1(0x02);
+  BLOCK2(0x02);
+
+  BLOCK1(0x04);
+  BLOCK2(0x04);
+
+  BLOCK1(0x08);
+  BLOCK2(0x08);
+
+  BLOCK1(0x10);
+  BLOCK2(0x10);
+
+  BLOCK1(0x20);
+  BLOCK2(0x20);
+
+  BLOCK1(0x40);
+  rkeys[idx++] = temp0;
+}
+
+/** single, by-the-book AES encryption with AES-NI */
+static inline void aesni_encrypt1(unsigned char *out, unsigned char *n, __m128i rkeys[16]) {
+  __m128i nv = _mm_load_si128((const __m128i *)n);
+  int i;
+  __m128i temp = _mm_xor_si128(nv, rkeys[0]);
+#pragma unroll(13)
+  for (i = 1 ; i < 14 ; i++) {
+    temp = _mm_aesenc_si128(temp, rkeys[i]);
+  }
+  temp = _mm_aesenclast_si128(temp, rkeys[14]);
+  _mm_store_si128((__m128i*)(out), temp);
+}
+
+/** increment the 16-bytes nonce ;
+    this really should be improved somehow...
+    but it's not yet time-critical, because we
+    use the vector variant anyway  */
+static inline void incle(unsigned char n[16]) {
+/*   unsigned long long out; */
+/*   unsigned char carry; */
+  unsigned long long *n_ = (unsigned long long*)n;
+  n_[1]++;
+  if (n_[1] == 0)
+    n_[0] ++;
+  /* perhaps this will be efficient on broadwell ? */
+  /*   carry = _addcarry_u64(0, n_[1], 1ULL, &out); */
+  /*   carry = _addcarry_u64(carry, n_[0], 0ULL, &out); */
+}
+
+/** multiple-blocks-at-once AES encryption with AES-NI ;
+    on Haswell, aesenc as a latency of 7 and a througput of 1
+    so the sequence of aesenc should be bubble-free, if you
+    have at least 8 blocks. Let's build an arbitratry-sized
+    function */
+/* Step 1 : loading the nonce */
+/* load & increment the n vector (non-vectorized, unused for now) */
+#define NVx(a)                                                  \
+  __m128i nv##a = _mm_shuffle_epi8(_mm_load_si128((const __m128i *)n), _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); incle(n)
+/* load the incremented n vector (vectorized, probably buggy) */
+#define NVxV_DEC(a)                                                     \
+  __m128i nv##a;
+#define NVxV_NOWRAP(a)                                                  \
+  nv##a = _mm_shuffle_epi8(_mm_add_epi64(nv0i, _mm_set_epi64x(a,0)), _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))
+#define NVxV_WRAP(a)                                                    \
+  __m128i ad##a = _mm_add_epi64(nv0i, _mm_set_epi64x(a,a>=wrapnumber?1:0)); \
+  nv##a = _mm_shuffle_epi8(ad##a, _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))
+
+/* Step 2 : define value in round one (xor with subkey #0, aka key) */
+#define TEMPx(a)                                        \
+  __m128i temp##a = _mm_xor_si128(nv##a, rkeys[0])
+
+/* Step 3: one round of AES */
+#define AESENCx(a)                                      \
+  temp##a =  _mm_aesenc_si128(temp##a, rkeys[i]);
+
+/* Step 4: last round of AES */
+#define AESENCLASTx(a)                                  \
+  temp##a = _mm_aesenclast_si128(temp##a, rkeys[14]);
+
+/* Step 5: store result */
+#define STOREx(a)                                       \
+  _mm_store_si128((__m128i*)(out+(a*16)), temp##a);
+
+/* all the MAKE* macros are for automatic explicit unrolling */
+#define MAKE4(X)                                \
+  X(0);X(1);X(2);X(3)
+
+#define MAKE6(X)                                \
+  X(0);X(1);X(2);X(3);                          \
+  X(4);X(5)
+
+#define MAKE7(X)                                \
+  X(0);X(1);X(2);X(3);                          \
+  X(4);X(5);X(6)
+
+#define MAKE8(X)                                \
+  X(0);X(1);X(2);X(3);                          \
+  X(4);X(5);X(6);X(7)
+
+#define MAKE10(X)                               \
+  X(0);X(1);X(2);X(3);                          \
+  X(4);X(5);X(6);X(7);                          \
+  X(8);X(9)
+
+#define MAKE12(X)                               \
+  X(0);X(1);X(2);X(3);                          \
+  X(4);X(5);X(6);X(7);                          \
+  X(8);X(9);X(10);X(11)
+
+/* create a function of unrolling N ; the MAKEN is the unrolling
+   macro, defined above. The N in MAKEN must match N, obviously. */
+#define FUNC(N, MAKEN)                          \
+  static inline void aesni_encrypt##N(unsigned char *out, unsigned char *n, __m128i rkeys[16]) { \
+    __m128i nv0i = _mm_load_si128((const __m128i *)n);                  \
+    long long nl = *(long long*)&n[8];                                  \
+    MAKEN(NVxV_DEC);                                                    \
+    /* check for nonce wraparound */                                    \
+    if ((nl < 0) && (nl + (N-1)) >= 0) {                                \
+      int wrapnumber = (int)(N - (nl+N));                               \
+      MAKEN(NVxV_WRAP);                                                 \
+      _mm_storeu_si128((__m128i*)n, _mm_add_epi64(nv0i, _mm_set_epi64x(N,1))); \
+    } else {                                                            \
+      MAKEN(NVxV_NOWRAP);                                               \
+      _mm_storeu_si128((__m128i*)n, _mm_add_epi64(nv0i, _mm_set_epi64x(N,0))); \
+    }                                                                   \
+    int i;                                                              \
+    MAKEN(TEMPx);                                                       \
+    for (i = 1 ; i < 14 ; i++) {                                        \
+      MAKEN(AESENCx);                                                   \
+    }                                                                   \
+    MAKEN(AESENCLASTx);                                                 \
+    MAKEN(STOREx);                                                      \
+  }
+
+/* and now building our unrolled function is trivial */
+FUNC(4, MAKE4)
+FUNC(6, MAKE6)
+FUNC(7, MAKE7)
+FUNC(8, MAKE8)
+FUNC(10, MAKE10)
+FUNC(12, MAKE12)
+
+int crypto_stream_aes256ctr(
+unsigned char *out,
+unsigned long long outlen,
+const unsigned char *n,
+const unsigned char *k
+)
+{
+  __m128 rkeys[16];
+  ALIGN16 unsigned char n2[16];
+  unsigned long long i, j;
+  aesni_key256_expand(k, rkeys);
+  /* n2 is in byte-reversed (i.e., native little endian)
+     order to make increment/testing easier */
+  (*(unsigned long long*)&n2[8]) = _bswap64((*(unsigned long long*)&n[8]));
+  (*(unsigned long long*)&n2[0]) = _bswap64((*(unsigned long long*)&n[0]));
+  
+#define LOOP(iter)                                       \
+  int lb = iter * 16;                                    \
+  for (i = 0 ; i < outlen ; i+= lb) {                    \
+    ALIGN16 unsigned char outni[lb];       \
+    aesni_encrypt##iter(outni, n2, (__m128i*)rkeys);     \
+    unsigned long long mj = lb;                          \
+    if ((i+mj)>=outlen)                                  \
+      mj = outlen-i;                                     \
+    for (j = 0 ; j < mj ; j++)                           \
+      out[i+j] = outni[j];                               \
+  }
+  
+  LOOP(8);
+
+  return 0;
+}
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.h b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.h
new file mode 100644
index 0000000000000000000000000000000000000000..9be2881acfd348f66147f437552274944790aed9
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.h
@@ -0,0 +1,6 @@
+#ifndef CRYPTO_STREAM_AES256CTR_H
+#define CRYPTO_STREAM_AES256CTR_H
+
+int crypto_stream_aes256ctr(unsigned char *c,unsigned long long clen, const unsigned char *n, const unsigned char *k);
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.s b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.s
new file mode 100644
index 0000000000000000000000000000000000000000..0b56a2bb6ac540b9b81bae9a81f8bfdeb29f94d7
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_aes256ctr.s
@@ -0,0 +1,396 @@
+	.file	"crypto_stream_aes256ctr.c"
+	.section	.text.unlikely,"ax",@progbits
+.LCOLDB11:
+	.text
+.LHOTB11:
+	.p2align 4,,15
+	.globl	crypto_stream_aes256ctr
+	.type	crypto_stream_aes256ctr, @function
+crypto_stream_aes256ctr:
+.LFB2248:
+	.cfi_startproc
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset 6, -16
+	vxorps	%xmm0, %xmm0, %xmm0
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register 6
+	pushq	%rbx
+	subq	$280, %rsp
+	.cfi_offset 3, -24
+	movq	8(%rdx), %rax
+	vmovups	16(%rcx), %xmm4
+	vmovups	(%rcx), %xmm10
+	vaeskeygenassist	$1, %xmm4, %xmm2
+	vmovaps	%xmm4, -256(%rbp)
+	vshufps	$255, %xmm2, %xmm2, %xmm2
+	bswap	%rax
+	movq	%rax, -280(%rbp)
+	movq	(%rdx), %rax
+	vshufps	$16, %xmm10, %xmm0, %xmm0
+	vmovaps	%xmm10, -272(%rbp)
+	vxorps	%xmm0, %xmm10, %xmm1
+	vshufps	$140, %xmm1, %xmm0, %xmm0
+	vxorps	%xmm0, %xmm1, %xmm1
+	vshufps	$16, %xmm4, %xmm0, %xmm0
+	vxorps	%xmm2, %xmm1, %xmm2
+	vxorps	%xmm0, %xmm4, %xmm4
+	vaeskeygenassist	$1, %xmm2, %xmm3
+	vshufps	$140, %xmm4, %xmm0, %xmm0
+	vshufps	$170, %xmm3, %xmm3, %xmm3
+	vxorps	%xmm0, %xmm4, %xmm4
+	vmovaps	%xmm2, -240(%rbp)
+	vxorps	%xmm3, %xmm4, %xmm3
+	vshufps	$16, %xmm2, %xmm0, %xmm0
+	bswap	%rax
+	vaeskeygenassist	$2, %xmm3, %xmm1
+	vxorps	%xmm0, %xmm2, %xmm2
+	vshufps	$255, %xmm1, %xmm1, %xmm1
+	vshufps	$140, %xmm2, %xmm0, %xmm0
+	vmovaps	%xmm3, -224(%rbp)
+	vxorps	%xmm0, %xmm2, %xmm2
+	movq	%rax, -288(%rbp)
+	vxorps	%xmm1, %xmm2, %xmm1
+	vshufps	$16, %xmm3, %xmm0, %xmm0
+	vaeskeygenassist	$2, %xmm1, %xmm4
+	vxorps	%xmm0, %xmm3, %xmm3
+	vshufps	$170, %xmm4, %xmm4, %xmm4
+	vshufps	$140, %xmm3, %xmm0, %xmm0
+	vmovaps	%xmm1, -208(%rbp)
+	vxorps	%xmm0, %xmm3, %xmm3
+	vxorps	%xmm4, %xmm3, %xmm4
+	vshufps	$16, %xmm1, %xmm0, %xmm0
+	vaeskeygenassist	$4, %xmm4, %xmm2
+	vxorps	%xmm0, %xmm1, %xmm1
+	vshufps	$255, %xmm2, %xmm2, %xmm2
+	vshufps	$140, %xmm1, %xmm0, %xmm0
+	vmovaps	%xmm4, -192(%rbp)
+	vxorps	%xmm0, %xmm1, %xmm1
+	vxorps	%xmm2, %xmm1, %xmm2
+	vshufps	$16, %xmm4, %xmm0, %xmm0
+	vaeskeygenassist	$4, %xmm2, %xmm3
+	vxorps	%xmm0, %xmm4, %xmm4
+	vshufps	$170, %xmm3, %xmm3, %xmm3
+	vshufps	$140, %xmm4, %xmm0, %xmm0
+	vmovaps	%xmm2, -176(%rbp)
+	vxorps	%xmm0, %xmm4, %xmm4
+	vxorps	%xmm3, %xmm4, %xmm3
+	vshufps	$16, %xmm2, %xmm0, %xmm0
+	vaeskeygenassist	$8, %xmm3, %xmm1
+	vxorps	%xmm0, %xmm2, %xmm2
+	vshufps	$255, %xmm1, %xmm1, %xmm1
+	vshufps	$140, %xmm2, %xmm0, %xmm0
+	vmovaps	%xmm3, -160(%rbp)
+	vxorps	%xmm0, %xmm2, %xmm2
+	vxorps	%xmm1, %xmm2, %xmm1
+	vshufps	$16, %xmm3, %xmm0, %xmm0
+	vaeskeygenassist	$8, %xmm1, %xmm4
+	vxorps	%xmm0, %xmm3, %xmm3
+	vshufps	$170, %xmm4, %xmm4, %xmm4
+	vshufps	$140, %xmm3, %xmm0, %xmm0
+	vmovaps	%xmm1, -144(%rbp)
+	vxorps	%xmm0, %xmm3, %xmm3
+	vxorps	%xmm4, %xmm3, %xmm4
+	vshufps	$16, %xmm1, %xmm0, %xmm0
+	vaeskeygenassist	$16, %xmm4, %xmm2
+	vxorps	%xmm0, %xmm1, %xmm1
+	vmovaps	%xmm4, -128(%rbp)
+	vshufps	$140, %xmm1, %xmm0, %xmm0
+	vshufps	$255, %xmm2, %xmm2, %xmm2
+	vxorps	%xmm0, %xmm1, %xmm1
+	vshufps	$16, %xmm4, %xmm0, %xmm0
+	vxorps	%xmm2, %xmm1, %xmm2
+	vxorps	%xmm0, %xmm4, %xmm4
+	vaeskeygenassist	$16, %xmm2, %xmm3
+	vshufps	$140, %xmm4, %xmm0, %xmm0
+	vmovaps	%xmm2, -112(%rbp)
+	vxorps	%xmm0, %xmm4, %xmm4
+	vshufps	$170, %xmm3, %xmm3, %xmm3
+	vshufps	$16, %xmm2, %xmm0, %xmm0
+	vxorps	%xmm3, %xmm4, %xmm3
+	vxorps	%xmm0, %xmm2, %xmm2
+	vaeskeygenassist	$32, %xmm3, %xmm1
+	vshufps	$140, %xmm2, %xmm0, %xmm0
+	vmovaps	%xmm3, -96(%rbp)
+	vxorps	%xmm0, %xmm2, %xmm2
+	vshufps	$255, %xmm1, %xmm1, %xmm1
+	vshufps	$16, %xmm3, %xmm0, %xmm0
+	vxorps	%xmm1, %xmm2, %xmm1
+	vxorps	%xmm0, %xmm3, %xmm3
+	vaeskeygenassist	$32, %xmm1, %xmm2
+	vshufps	$140, %xmm3, %xmm0, %xmm0
+	vmovaps	%xmm1, -80(%rbp)
+	vxorps	%xmm0, %xmm3, %xmm3
+	vshufps	$170, %xmm2, %xmm2, %xmm2
+	vshufps	$16, %xmm1, %xmm0, %xmm0
+	vxorps	%xmm2, %xmm3, %xmm2
+	vxorps	%xmm0, %xmm1, %xmm1
+	vaeskeygenassist	$64, %xmm2, %xmm9
+	vshufps	$140, %xmm1, %xmm0, %xmm0
+	vshufps	$255, %xmm9, %xmm9, %xmm9
+	vxorps	%xmm0, %xmm1, %xmm0
+	vmovaps	%xmm2, -64(%rbp)
+	vxorps	%xmm9, %xmm0, %xmm9
+	vmovaps	%xmm9, -48(%rbp)
+	testq	%rsi, %rsi
+	je	.L14
+	vmovdqa	.LC0(%rip), %xmm11
+	movq	%rsi, %rdx
+	xorl	%r10d, %r10d
+	vmovdqa	.LC3(%rip), %xmm15
+	vmovdqa	.LC4(%rip), %xmm14
+	vmovdqa	.LC5(%rip), %xmm13
+	vmovdqa	.LC6(%rip), %xmm12
+	.p2align 4,,10
+	.p2align 3
+.L11:
+	movq	-280(%rbp), %r8
+	movq	%rsp, %rbx
+	subq	$144, %rsp
+	leaq	15(%rsp), %rcx
+	vmovdqa	-288(%rbp), %xmm8
+	andq	$-16, %rcx
+	leaq	7(%r8), %rax
+	cmpq	$6, %rax
+	ja	.L4
+	negl	%r8d
+	xorl	%eax, %eax
+	movl	$1, %r9d
+	vpshufb	%xmm11, %xmm8, %xmm7
+	cmpl	$1, %r8d
+	setle	%al
+	vmovq	%rax, %xmm6
+	xorl	%eax, %eax
+	cmpl	$2, %r8d
+	setle	%al
+	vpinsrq	$1, %r9, %xmm6, %xmm6
+	vpaddq	%xmm6, %xmm8, %xmm6
+	vmovq	%rax, %xmm5
+	movl	$2, %eax
+	vpshufb	%xmm11, %xmm6, %xmm6
+	vpinsrq	$1, %rax, %xmm5, %xmm5
+	xorl	%eax, %eax
+	cmpl	$3, %r8d
+	vpaddq	%xmm5, %xmm8, %xmm5
+	setle	%al
+	vpshufb	%xmm11, %xmm5, %xmm5
+	vmovq	%rax, %xmm4
+	movl	$3, %eax
+	vpinsrq	$1, %rax, %xmm4, %xmm4
+	xorl	%eax, %eax
+	cmpl	$4, %r8d
+	vpaddq	%xmm4, %xmm8, %xmm4
+	setle	%al
+	vpshufb	%xmm11, %xmm4, %xmm4
+	vmovq	%rax, %xmm3
+	movl	$4, %eax
+	vpinsrq	$1, %rax, %xmm3, %xmm3
+	xorl	%eax, %eax
+	cmpl	$5, %r8d
+	vpaddq	%xmm3, %xmm8, %xmm3
+	setle	%al
+	vpshufb	%xmm11, %xmm3, %xmm3
+	vmovq	%rax, %xmm2
+	movl	$5, %eax
+	vpinsrq	$1, %rax, %xmm2, %xmm2
+	xorl	%eax, %eax
+	cmpl	$6, %r8d
+	vpaddq	%xmm2, %xmm8, %xmm2
+	setle	%al
+	vpshufb	%xmm11, %xmm2, %xmm2
+	vmovq	%rax, %xmm1
+	movl	$6, %eax
+	vpinsrq	$1, %rax, %xmm1, %xmm0
+	vpaddq	%xmm0, %xmm8, %xmm0
+	vpaddq	.LC1(%rip), %xmm8, %xmm1
+	vpaddq	.LC2(%rip), %xmm8, %xmm8
+	vpshufb	%xmm11, %xmm0, %xmm0
+	vpshufb	%xmm11, %xmm1, %xmm1
+	vmovups	%xmm8, -288(%rbp)
+.L5:
+	vpxor	%xmm10, %xmm7, %xmm7
+	vpxor	%xmm10, %xmm6, %xmm6
+	vpxor	%xmm10, %xmm5, %xmm5
+	vpxor	%xmm10, %xmm4, %xmm4
+	vpxor	%xmm10, %xmm3, %xmm3
+	vpxor	%xmm10, %xmm2, %xmm2
+	vpxor	%xmm10, %xmm0, %xmm8
+	vpxor	%xmm10, %xmm1, %xmm1
+	leaq	-272(%rbp), %rax
+	leaq	-272(%rbp), %r11
+	addq	$16, %rax
+	leaq	224(%r11), %r8
+	.p2align 4,,10
+	.p2align 3
+.L6:
+	vmovdqa	(%rax), %xmm0
+	addq	$16, %rax
+	vaesenc	%xmm0, %xmm7, %xmm7
+	vaesenc	%xmm0, %xmm6, %xmm6
+	vaesenc	%xmm0, %xmm5, %xmm5
+	vaesenc	%xmm0, %xmm4, %xmm4
+	vaesenc	%xmm0, %xmm3, %xmm3
+	vaesenc	%xmm0, %xmm2, %xmm2
+	vaesenc	%xmm0, %xmm8, %xmm8
+	vaesenc	%xmm0, %xmm1, %xmm1
+	cmpq	%r8, %rax
+	jne	.L6
+	vaesenclast	%xmm9, %xmm7, %xmm7
+	leaq	128(%r10), %r8
+	vaesenclast	%xmm9, %xmm6, %xmm6
+	vaesenclast	%xmm9, %xmm5, %xmm5
+	vaesenclast	%xmm9, %xmm4, %xmm4
+	vaesenclast	%xmm9, %xmm3, %xmm3
+	vaesenclast	%xmm9, %xmm2, %xmm2
+	vaesenclast	%xmm9, %xmm8, %xmm0
+	vaesenclast	%xmm9, %xmm1, %xmm1
+	vmovaps	%xmm7, (%rcx)
+	vmovaps	%xmm6, 16(%rcx)
+	vmovaps	%xmm5, 32(%rcx)
+	vmovaps	%xmm4, 48(%rcx)
+	vmovaps	%xmm3, 64(%rcx)
+	vmovaps	%xmm2, 80(%rcx)
+	vmovaps	%xmm0, 96(%rcx)
+	vmovaps	%xmm1, 112(%rcx)
+	cmpq	%r8, %rsi
+	jbe	.L18
+	movq	(%rcx), %r9
+	leaq	(%rdi,%r10), %rax
+	addq	$-128, %rdx
+	movq	%r8, %r10
+	movq	%r9, (%rax)
+	movq	8(%rcx), %r9
+	movq	%r9, 8(%rax)
+	movq	16(%rcx), %r9
+	movq	%r9, 16(%rax)
+	movq	24(%rcx), %r9
+	movq	%r9, 24(%rax)
+	movq	32(%rcx), %r9
+	movq	%r9, 32(%rax)
+	movq	40(%rcx), %r9
+	movq	%r9, 40(%rax)
+	movq	48(%rcx), %r9
+	movq	%r9, 48(%rax)
+	movq	56(%rcx), %r9
+	movq	%r9, 56(%rax)
+	movq	64(%rcx), %r9
+	movq	%r9, 64(%rax)
+	movq	72(%rcx), %r9
+	movq	%r9, 72(%rax)
+	movq	80(%rcx), %r9
+	movq	%r9, 80(%rax)
+	movq	88(%rcx), %r9
+	movq	%r9, 88(%rax)
+	movq	96(%rcx), %r9
+	movq	%r9, 96(%rax)
+	movq	104(%rcx), %r9
+	movq	%r9, 104(%rax)
+	movq	112(%rcx), %r9
+	movq	%r9, 112(%rax)
+	movq	120(%rcx), %rcx
+	movq	%rcx, 120(%rax)
+	movq	%rbx, %rsp
+	jmp	.L11
+	.p2align 4,,10
+	.p2align 3
+.L4:
+	vpaddq	%xmm15, %xmm8, %xmm6
+	vpaddq	%xmm14, %xmm8, %xmm5
+	vpshufb	%xmm11, %xmm8, %xmm7
+	vpaddq	%xmm13, %xmm8, %xmm4
+	vpaddq	%xmm12, %xmm8, %xmm3
+	vpshufb	%xmm11, %xmm6, %xmm6
+	vpshufb	%xmm11, %xmm5, %xmm5
+	vpaddq	.LC7(%rip), %xmm8, %xmm2
+	vpshufb	%xmm11, %xmm4, %xmm4
+	vpshufb	%xmm11, %xmm3, %xmm3
+	vpaddq	.LC8(%rip), %xmm8, %xmm0
+	vpaddq	.LC9(%rip), %xmm8, %xmm1
+	vpaddq	.LC10(%rip), %xmm8, %xmm8
+	vpshufb	%xmm11, %xmm2, %xmm2
+	vpshufb	%xmm11, %xmm0, %xmm0
+	vpshufb	%xmm11, %xmm1, %xmm1
+	vmovups	%xmm8, -288(%rbp)
+	jmp	.L5
+.L18:
+	testq	%rdx, %rdx
+	je	.L16
+	addq	%r10, %rdi
+	movq	%rcx, %rsi
+	call	memcpy
+.L16:
+	movq	%rbx, %rsp
+.L14:
+	xorl	%eax, %eax
+	movq	-8(%rbp), %rbx
+	leave
+	.cfi_def_cfa 7, 8
+	ret
+	.cfi_endproc
+.LFE2248:
+	.size	crypto_stream_aes256ctr, .-crypto_stream_aes256ctr
+	.section	.text.unlikely
+.LCOLDE11:
+	.text
+.LHOTE11:
+	.section	.rodata.cst16,"aM",@progbits,16
+	.align 16
+.LC0:
+	.byte	7
+	.byte	6
+	.byte	5
+	.byte	4
+	.byte	3
+	.byte	2
+	.byte	1
+	.byte	0
+	.byte	15
+	.byte	14
+	.byte	13
+	.byte	12
+	.byte	11
+	.byte	10
+	.byte	9
+	.byte	8
+	.align 16
+.LC1:
+	.quad	1
+	.quad	7
+	.align 16
+.LC2:
+	.quad	1
+	.quad	8
+	.align 16
+.LC3:
+	.quad	0
+	.quad	1
+	.align 16
+.LC4:
+	.quad	0
+	.quad	2
+	.align 16
+.LC5:
+	.quad	0
+	.quad	3
+	.align 16
+.LC6:
+	.quad	0
+	.quad	4
+	.align 16
+.LC7:
+	.quad	0
+	.quad	5
+	.align 16
+.LC8:
+	.quad	0
+	.quad	6
+	.align 16
+.LC9:
+	.quad	0
+	.quad	7
+	.align 16
+.LC10:
+	.quad	0
+	.quad	8
+	.ident	"GCC: (Debian 4.9.2-10) 4.9.2"
+	.section	.note.GNU-stack,"",@progbits
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_chacha20.c b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_chacha20.c
new file mode 100644
index 0000000000000000000000000000000000000000..a3ffee1b7ba2c3454dd787e15c18ed5bccf58075
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_chacha20.c
@@ -0,0 +1,10 @@
+#include "crypto_stream_chacha20.h"
+#include <stddef.h>
+
+extern void chacha_avx2(const unsigned char *k, const unsigned char *n, const unsigned char *in, unsigned char *out, size_t inlen, size_t rounds);
+
+int crypto_stream_chacha20(unsigned char *out, unsigned long long outlen, const unsigned char *n, const unsigned char *k) 
+{
+	chacha_avx2(k, n, NULL, out, (size_t)outlen, 20);
+	return 0;
+}
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_chacha20.h b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_chacha20.h
new file mode 100644
index 0000000000000000000000000000000000000000..c0cf329052ad0b23ca28578217dfcccbebcb7c7b
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/crypto_stream_chacha20.h
@@ -0,0 +1,6 @@
+#ifndef CRYPTO_STREAM_CHACHA20
+#define CRYPTO_STREAM_CHACHA20
+
+int crypto_stream_chacha20(unsigned char *c,unsigned long long clen, const unsigned char *n, const unsigned char *k);
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/error_correction.c b/crypt/liboqs/kex_rlwe_newhope/avx2/error_correction.c
new file mode 100644
index 0000000000000000000000000000000000000000..0970e6f754ce2d14383d12a80308f8990518d138
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/error_correction.c
@@ -0,0 +1,21 @@
+#include "crypto_stream.h"
+#include "error_correction.h"
+
+//See paper for details on the error reconciliation
+
+extern void hr(poly *c, const poly *v, unsigned char rand[32]);
+
+void helprec(poly *c, const poly *v, const unsigned char *seed, unsigned char nonce)
+{
+  unsigned char rand[32];
+  unsigned char n[8];
+  int i;
+
+  for(i=0;i<7;i++)
+    n[i] = 0;
+  n[7] = nonce;
+
+  crypto_stream(rand,32,n,seed);
+
+  hr(c, v, rand);
+}
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/error_correction.h b/crypt/liboqs/kex_rlwe_newhope/avx2/error_correction.h
new file mode 100644
index 0000000000000000000000000000000000000000..488c10588dabaa68020bb5bd39afc146831d1a85
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/error_correction.h
@@ -0,0 +1,15 @@
+#ifndef ERROR_CORRECTION_H
+#define ERROR_CORRECTION_H
+
+#include "inttypes.h"
+#include "params.h"
+#include "randombytes.h"
+#include "crypto_stream_chacha20.h"
+#include "math.h"
+#include "poly.h"
+#include <stdio.h>
+
+void helprec(poly *c, const poly *v, const unsigned char *seed, unsigned char nonce);
+void rec(unsigned char *key, const poly *v, const poly *c);
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/fips202.c b/crypt/liboqs/kex_rlwe_newhope/avx2/fips202.c
new file mode 100644
index 0000000000000000000000000000000000000000..f649a7d7ba80b953248d43024a564c0544bec67d
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/fips202.c
@@ -0,0 +1,415 @@
+/* Based on the public domain implementation in
+ * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html
+ * by Ronny Van Keer 
+ * and the public domain "TweetFips202" implementation
+ * from https://twitter.com/tweetfips202
+ * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe */
+
+#include <stdint.h>
+#include <assert.h>
+#include "fips202.h"
+
+#define NROUNDS 24
+#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset)))
+
+static uint64_t load64(const unsigned char *x)
+{
+  unsigned long long r = 0, i;
+
+  for (i = 0; i < 8; ++i) {
+    r |= (unsigned long long)x[i] << 8 * i;
+  }
+  return r;
+}
+
+static void store64(uint8_t *x, uint64_t u)
+{
+  unsigned int i;
+
+  for(i=0; i<8; ++i) {
+    x[i] = u;
+    u >>= 8;
+  }
+}
+
+static const uint64_t KeccakF_RoundConstants[NROUNDS] = 
+{
+    (uint64_t)0x0000000000000001ULL,
+    (uint64_t)0x0000000000008082ULL,
+    (uint64_t)0x800000000000808aULL,
+    (uint64_t)0x8000000080008000ULL,
+    (uint64_t)0x000000000000808bULL,
+    (uint64_t)0x0000000080000001ULL,
+    (uint64_t)0x8000000080008081ULL,
+    (uint64_t)0x8000000000008009ULL,
+    (uint64_t)0x000000000000008aULL,
+    (uint64_t)0x0000000000000088ULL,
+    (uint64_t)0x0000000080008009ULL,
+    (uint64_t)0x000000008000000aULL,
+    (uint64_t)0x000000008000808bULL,
+    (uint64_t)0x800000000000008bULL,
+    (uint64_t)0x8000000000008089ULL,
+    (uint64_t)0x8000000000008003ULL,
+    (uint64_t)0x8000000000008002ULL,
+    (uint64_t)0x8000000000000080ULL,
+    (uint64_t)0x000000000000800aULL,
+    (uint64_t)0x800000008000000aULL,
+    (uint64_t)0x8000000080008081ULL,
+    (uint64_t)0x8000000000008080ULL,
+    (uint64_t)0x0000000080000001ULL,
+    (uint64_t)0x8000000080008008ULL
+};
+
+void KeccakF1600_StatePermute(uint64_t * state)
+{
+  int round;
+
+        uint64_t Aba, Abe, Abi, Abo, Abu;
+        uint64_t Aga, Age, Agi, Ago, Agu;
+        uint64_t Aka, Ake, Aki, Ako, Aku;
+        uint64_t Ama, Ame, Ami, Amo, Amu;
+        uint64_t Asa, Ase, Asi, Aso, Asu;
+        uint64_t BCa, BCe, BCi, BCo, BCu;
+        uint64_t Da, De, Di, Do, Du;
+        uint64_t Eba, Ebe, Ebi, Ebo, Ebu;
+        uint64_t Ega, Ege, Egi, Ego, Egu;
+        uint64_t Eka, Eke, Eki, Eko, Eku;
+        uint64_t Ema, Eme, Emi, Emo, Emu;
+        uint64_t Esa, Ese, Esi, Eso, Esu;
+
+        //copyFromState(A, state)
+        Aba = state[ 0];
+        Abe = state[ 1];
+        Abi = state[ 2];
+        Abo = state[ 3];
+        Abu = state[ 4];
+        Aga = state[ 5];
+        Age = state[ 6];
+        Agi = state[ 7];
+        Ago = state[ 8];
+        Agu = state[ 9];
+        Aka = state[10];
+        Ake = state[11];
+        Aki = state[12];
+        Ako = state[13];
+        Aku = state[14];
+        Ama = state[15];
+        Ame = state[16];
+        Ami = state[17];
+        Amo = state[18];
+        Amu = state[19];
+        Asa = state[20];
+        Ase = state[21];
+        Asi = state[22];
+        Aso = state[23];
+        Asu = state[24];
+
+        for( round = 0; round < NROUNDS; round += 2 )
+        {
+            //    prepareTheta
+            BCa = Aba^Aga^Aka^Ama^Asa;
+            BCe = Abe^Age^Ake^Ame^Ase;
+            BCi = Abi^Agi^Aki^Ami^Asi;
+            BCo = Abo^Ago^Ako^Amo^Aso;
+            BCu = Abu^Agu^Aku^Amu^Asu;
+
+            //thetaRhoPiChiIotaPrepareTheta(round  , A, E)
+            Da = BCu^ROL(BCe, 1);
+            De = BCa^ROL(BCi, 1);
+            Di = BCe^ROL(BCo, 1);
+            Do = BCi^ROL(BCu, 1);
+            Du = BCo^ROL(BCa, 1);
+
+            Aba ^= Da;
+            BCa = Aba;
+            Age ^= De;
+            BCe = ROL(Age, 44);
+            Aki ^= Di;
+            BCi = ROL(Aki, 43);
+            Amo ^= Do;
+            BCo = ROL(Amo, 21);
+            Asu ^= Du;
+            BCu = ROL(Asu, 14);
+            Eba =   BCa ^((~BCe)&  BCi );
+            Eba ^= (uint64_t)KeccakF_RoundConstants[round];
+            Ebe =   BCe ^((~BCi)&  BCo );
+            Ebi =   BCi ^((~BCo)&  BCu );
+            Ebo =   BCo ^((~BCu)&  BCa );
+            Ebu =   BCu ^((~BCa)&  BCe );
+
+            Abo ^= Do;
+            BCa = ROL(Abo, 28);
+            Agu ^= Du;
+            BCe = ROL(Agu, 20);
+            Aka ^= Da;
+            BCi = ROL(Aka,  3);
+            Ame ^= De;
+            BCo = ROL(Ame, 45);
+            Asi ^= Di;
+            BCu = ROL(Asi, 61);
+            Ega =   BCa ^((~BCe)&  BCi );
+            Ege =   BCe ^((~BCi)&  BCo );
+            Egi =   BCi ^((~BCo)&  BCu );
+            Ego =   BCo ^((~BCu)&  BCa );
+            Egu =   BCu ^((~BCa)&  BCe );
+
+            Abe ^= De;
+            BCa = ROL(Abe,  1);
+            Agi ^= Di;
+            BCe = ROL(Agi,  6);
+            Ako ^= Do;
+            BCi = ROL(Ako, 25);
+            Amu ^= Du;
+            BCo = ROL(Amu,  8);
+            Asa ^= Da;
+            BCu = ROL(Asa, 18);
+            Eka =   BCa ^((~BCe)&  BCi );
+            Eke =   BCe ^((~BCi)&  BCo );
+            Eki =   BCi ^((~BCo)&  BCu );
+            Eko =   BCo ^((~BCu)&  BCa );
+            Eku =   BCu ^((~BCa)&  BCe );
+
+            Abu ^= Du;
+            BCa = ROL(Abu, 27);
+            Aga ^= Da;
+            BCe = ROL(Aga, 36);
+            Ake ^= De;
+            BCi = ROL(Ake, 10);
+            Ami ^= Di;
+            BCo = ROL(Ami, 15);
+            Aso ^= Do;
+            BCu = ROL(Aso, 56);
+            Ema =   BCa ^((~BCe)&  BCi );
+            Eme =   BCe ^((~BCi)&  BCo );
+            Emi =   BCi ^((~BCo)&  BCu );
+            Emo =   BCo ^((~BCu)&  BCa );
+            Emu =   BCu ^((~BCa)&  BCe );
+
+            Abi ^= Di;
+            BCa = ROL(Abi, 62);
+            Ago ^= Do;
+            BCe = ROL(Ago, 55);
+            Aku ^= Du;
+            BCi = ROL(Aku, 39);
+            Ama ^= Da;
+            BCo = ROL(Ama, 41);
+            Ase ^= De;
+            BCu = ROL(Ase,  2);
+            Esa =   BCa ^((~BCe)&  BCi );
+            Ese =   BCe ^((~BCi)&  BCo );
+            Esi =   BCi ^((~BCo)&  BCu );
+            Eso =   BCo ^((~BCu)&  BCa );
+            Esu =   BCu ^((~BCa)&  BCe );
+
+            //    prepareTheta
+            BCa = Eba^Ega^Eka^Ema^Esa;
+            BCe = Ebe^Ege^Eke^Eme^Ese;
+            BCi = Ebi^Egi^Eki^Emi^Esi;
+            BCo = Ebo^Ego^Eko^Emo^Eso;
+            BCu = Ebu^Egu^Eku^Emu^Esu;
+
+            //thetaRhoPiChiIotaPrepareTheta(round+1, E, A)
+            Da = BCu^ROL(BCe, 1);
+            De = BCa^ROL(BCi, 1);
+            Di = BCe^ROL(BCo, 1);
+            Do = BCi^ROL(BCu, 1);
+            Du = BCo^ROL(BCa, 1);
+
+            Eba ^= Da;
+            BCa = Eba;
+            Ege ^= De;
+            BCe = ROL(Ege, 44);
+            Eki ^= Di;
+            BCi = ROL(Eki, 43);
+            Emo ^= Do;
+            BCo = ROL(Emo, 21);
+            Esu ^= Du;
+            BCu = ROL(Esu, 14);
+            Aba =   BCa ^((~BCe)&  BCi );
+            Aba ^= (uint64_t)KeccakF_RoundConstants[round+1];
+            Abe =   BCe ^((~BCi)&  BCo );
+            Abi =   BCi ^((~BCo)&  BCu );
+            Abo =   BCo ^((~BCu)&  BCa );
+            Abu =   BCu ^((~BCa)&  BCe );
+
+            Ebo ^= Do;
+            BCa = ROL(Ebo, 28);
+            Egu ^= Du;
+            BCe = ROL(Egu, 20);
+            Eka ^= Da;
+            BCi = ROL(Eka, 3);
+            Eme ^= De;
+            BCo = ROL(Eme, 45);
+            Esi ^= Di;
+            BCu = ROL(Esi, 61);
+            Aga =   BCa ^((~BCe)&  BCi );
+            Age =   BCe ^((~BCi)&  BCo );
+            Agi =   BCi ^((~BCo)&  BCu );
+            Ago =   BCo ^((~BCu)&  BCa );
+            Agu =   BCu ^((~BCa)&  BCe );
+
+            Ebe ^= De;
+            BCa = ROL(Ebe, 1);
+            Egi ^= Di;
+            BCe = ROL(Egi, 6);
+            Eko ^= Do;
+            BCi = ROL(Eko, 25);
+            Emu ^= Du;
+            BCo = ROL(Emu, 8);
+            Esa ^= Da;
+            BCu = ROL(Esa, 18);
+            Aka =   BCa ^((~BCe)&  BCi );
+            Ake =   BCe ^((~BCi)&  BCo );
+            Aki =   BCi ^((~BCo)&  BCu );
+            Ako =   BCo ^((~BCu)&  BCa );
+            Aku =   BCu ^((~BCa)&  BCe );
+
+            Ebu ^= Du;
+            BCa = ROL(Ebu, 27);
+            Ega ^= Da;
+            BCe = ROL(Ega, 36);
+            Eke ^= De;
+            BCi = ROL(Eke, 10);
+            Emi ^= Di;
+            BCo = ROL(Emi, 15);
+            Eso ^= Do;
+            BCu = ROL(Eso, 56);
+            Ama =   BCa ^((~BCe)&  BCi );
+            Ame =   BCe ^((~BCi)&  BCo );
+            Ami =   BCi ^((~BCo)&  BCu );
+            Amo =   BCo ^((~BCu)&  BCa );
+            Amu =   BCu ^((~BCa)&  BCe );
+
+            Ebi ^= Di;
+            BCa = ROL(Ebi, 62);
+            Ego ^= Do;
+            BCe = ROL(Ego, 55);
+            Eku ^= Du;
+            BCi = ROL(Eku, 39);
+            Ema ^= Da;
+            BCo = ROL(Ema, 41);
+            Ese ^= De;
+            BCu = ROL(Ese, 2);
+            Asa =   BCa ^((~BCe)&  BCi );
+            Ase =   BCe ^((~BCi)&  BCo );
+            Asi =   BCi ^((~BCo)&  BCu );
+            Aso =   BCo ^((~BCu)&  BCa );
+            Asu =   BCu ^((~BCa)&  BCe );
+        }
+
+        //copyToState(state, A)
+        state[ 0] = Aba;
+        state[ 1] = Abe;
+        state[ 2] = Abi;
+        state[ 3] = Abo;
+        state[ 4] = Abu;
+        state[ 5] = Aga;
+        state[ 6] = Age;
+        state[ 7] = Agi;
+        state[ 8] = Ago;
+        state[ 9] = Agu;
+        state[10] = Aka;
+        state[11] = Ake;
+        state[12] = Aki;
+        state[13] = Ako;
+        state[14] = Aku;
+        state[15] = Ama;
+        state[16] = Ame;
+        state[17] = Ami;
+        state[18] = Amo;
+        state[19] = Amu;
+        state[20] = Asa;
+        state[21] = Ase;
+        state[22] = Asi;
+        state[23] = Aso;
+        state[24] = Asu;
+
+        #undef    round
+}
+
+#include <string.h>
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+
+static void keccak_absorb(uint64_t *s,
+                          unsigned int r,
+                          const unsigned char *m, unsigned long long int mlen,
+                          unsigned char p)
+{
+  unsigned long long i;
+  unsigned char t[200];
+
+  for (i = 0; i < 25; ++i)
+    s[i] = 0;
+  
+  while (mlen >= r) 
+  {
+    for (i = 0; i < r / 8; ++i)
+      s[i] ^= load64(m + 8 * i);
+    
+    KeccakF1600_StatePermute(s);
+    mlen -= r;
+    m += r;
+  }
+
+  for (i = 0; i < r; ++i)
+    t[i] = 0;
+  for (i = 0; i < mlen; ++i)
+    t[i] = m[i];
+  t[i] = p;
+  t[r - 1] |= 128;
+  for (i = 0; i < r / 8; ++i)
+    s[i] ^= load64(t + 8 * i);
+}
+
+
+static void keccak_squeezeblocks(unsigned char *h, unsigned long long int nblocks,
+                                 uint64_t *s, 
+                                 unsigned int r)
+{
+  unsigned int i;
+  while(nblocks > 0) 
+  {
+    KeccakF1600_StatePermute(s);
+    for(i=0;i<(r>>3);i++)
+    {
+      store64(h+8*i, s[i]);
+    }
+    h += r;
+    nblocks--;
+  }
+}
+
+
+void shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen)
+{
+  keccak_absorb(s, SHAKE128_RATE, input, inputByteLen, 0x1F);
+}
+
+
+void shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s)
+{
+  keccak_squeezeblocks(output, nblocks, s, SHAKE128_RATE);
+}
+
+
+void shake128(unsigned char *output, unsigned int outputByteLen, const unsigned char *input, unsigned int inputByteLen)
+{
+  uint64_t s[25];
+  assert(!(outputByteLen%SHAKE128_RATE));
+  shake128_absorb(s, input, inputByteLen);
+  shake128_squeezeblocks(output, outputByteLen/SHAKE128_RATE, s);
+}
+
+
+void sha3256(unsigned char *output, const unsigned char *input, unsigned int inputByteLen)
+{
+  uint64_t s[25];
+  unsigned char t[SHA3_256_RATE];
+  int i;
+
+  keccak_absorb(s, SHA3_256_RATE, input, inputByteLen, 0x06);
+  keccak_squeezeblocks(t, 1, s, SHA3_256_RATE);
+  for(i=0;i<32;i++)
+    output[i] = t[i];
+}
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/fips202.h b/crypt/liboqs/kex_rlwe_newhope/avx2/fips202.h
new file mode 100644
index 0000000000000000000000000000000000000000..b5644345035ceb8fc8c8be1ee6996b1e0aa937be
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/fips202.h
@@ -0,0 +1,12 @@
+#ifndef FIPS202_H
+#define FIPS202_H
+
+#define SHAKE128_RATE 168
+#define SHA3_256_RATE 136
+
+void shake128_absorb(uint64_t *s, const unsigned char *input, unsigned int inputByteLen);
+void shake128_squeezeblocks(unsigned char *output, unsigned long long nblocks, uint64_t *s);
+void shake128(unsigned char *output, unsigned int outputByteLen, const unsigned char *input, unsigned int inputByteLen);
+void sha3256(unsigned char *output, const unsigned char *input, unsigned int inputByteLen);
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/hr.s b/crypt/liboqs/kex_rlwe_newhope/avx2/hr.s
new file mode 100644
index 0000000000000000000000000000000000000000..beb93fe0c422fee57c730876ab6e237374e1b245
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/hr.s
@@ -0,0 +1,767 @@
+
+# qhasm: int64 input_0
+
+# qhasm: int64 input_1
+
+# qhasm: int64 input_2
+
+# qhasm: int64 input_3
+
+# qhasm: int64 input_4
+
+# qhasm: int64 input_5
+
+# qhasm: stack64 input_6
+
+# qhasm: stack64 input_7
+
+# qhasm: int64 caller_r11
+
+# qhasm: int64 caller_r12
+
+# qhasm: int64 caller_r13
+
+# qhasm: int64 caller_r14
+
+# qhasm: int64 caller_r15
+
+# qhasm: int64 caller_rbx
+
+# qhasm: int64 caller_rbp
+
+# qhasm: reg256 v
+
+# qhasm: reg256 v0a
+
+# qhasm: reg256 v0b
+
+# qhasm: reg256 v0c
+
+# qhasm: reg256 v0d
+
+# qhasm: reg256 v1a
+
+# qhasm: reg256 v1b
+
+# qhasm: reg256 v1c
+
+# qhasm: reg256 v1d
+
+# qhasm: reg256 vtmp0
+
+# qhasm: reg256 vtmp1
+
+# qhasm: reg256 vtmp2
+
+# qhasm: reg256 vtmp3
+
+# qhasm: reg256 k
+
+# qhasm: reg256 b
+
+# qhasm: reg256 t
+
+# qhasm: reg256 d
+
+# qhasm: reg256 c
+
+# qhasm: reg256 rbit
+
+# qhasm: reg256 qx8
+
+# qhasm: reg256 _1x8
+
+# qhasm: reg256 _3x8
+
+# qhasm: reg256 rshifts
+
+# qhasm: reg256 _2730
+
+# qhasm: int64 ctr
+
+# qhasm: enter hr
+.p2align 5
+.global _hr
+.global hr
+_hr:
+hr:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: ctr = 0
+# asm 1: mov  $0,>ctr=int64#4
+# asm 2: mov  $0,>ctr=%rcx
+mov  $0,%rcx
+
+# qhasm: _1x8    = mem256[v1x8]
+# asm 1: vmovdqu v1x8,>_1x8=reg256#1
+# asm 2: vmovdqu v1x8,>_1x8=%ymm0
+vmovdqu v1x8,%ymm0
+
+# qhasm: qx8     = mem256[q8x]
+# asm 1: vmovdqu q8x,>qx8=reg256#2
+# asm 2: vmovdqu q8x,>qx8=%ymm1
+vmovdqu q8x,%ymm1
+
+# qhasm: looptop:
+._looptop:
+
+# qhasm: rshifts = mem256[vrshiftsx8]
+# asm 1: vmovdqu vrshiftsx8,>rshifts=reg256#3
+# asm 2: vmovdqu vrshiftsx8,>rshifts=%ymm2
+vmovdqu vrshiftsx8,%ymm2
+
+# qhasm: 32x rbit = mem8[input_2 + ctr + 0]
+# asm 1: vpbroadcastb 0(<input_2=int64#3,<ctr=int64#4),>rbit=reg256#4
+# asm 2: vpbroadcastb 0(<input_2=%rdx,<ctr=%rcx),>rbit=%ymm3
+vpbroadcastb 0(%rdx,%rcx),%ymm3
+
+# qhasm: 8x rbit unsigned>>= rshifts
+# asm 1: vpsrlvd <rshifts=reg256#3,<rbit=reg256#4,>rbit=reg256#3
+# asm 2: vpsrlvd <rshifts=%ymm2,<rbit=%ymm3,>rbit=%ymm2
+vpsrlvd %ymm2,%ymm3,%ymm2
+
+# qhasm: rbit &= _1x8
+# asm 1: vpand <_1x8=reg256#1,<rbit=reg256#3,<rbit=reg256#3
+# asm 2: vpand <_1x8=%ymm0,<rbit=%ymm2,<rbit=%ymm2
+vpand %ymm0,%ymm2,%ymm2
+
+# qhasm: 8x rbit <<= 2
+# asm 1: vpslld $2,<rbit=reg256#3,>rbit=reg256#3
+# asm 2: vpslld $2,<rbit=%ymm2,>rbit=%ymm2
+vpslld $2,%ymm2,%ymm2
+
+# qhasm: ctr <<= 5
+# asm 1: shl  $5,<ctr=int64#4
+# asm 2: shl  $5,<ctr=%rcx
+shl  $5,%rcx
+
+# qhasm: v = mem256[input_1 + ctr + 0]
+# asm 1: vmovupd   0(<input_1=int64#2,<ctr=int64#4),>v=reg256#4
+# asm 2: vmovupd   0(<input_1=%rsi,<ctr=%rcx),>v=%ymm3
+vmovupd   0(%rsi,%rcx),%ymm3
+
+# qhasm: 8x v <<= 3
+# asm 1: vpslld $3,<v=reg256#4,>v=reg256#4
+# asm 2: vpslld $3,<v=%ymm3,>v=%ymm3
+vpslld $3,%ymm3,%ymm3
+
+# qhasm: 8x v += rbit
+# asm 1: vpaddd <rbit=reg256#3,<v=reg256#4,>v=reg256#4
+# asm 2: vpaddd <rbit=%ymm2,<v=%ymm3,>v=%ymm3
+vpaddd %ymm2,%ymm3,%ymm3
+
+# qhasm: 8x b = v * mem256[v2730x8]
+# asm 1: vpmulld v2730x8,<v=reg256#4,>b=reg256#5
+# asm 2: vpmulld v2730x8,<v=%ymm3,>b=%ymm4
+vpmulld v2730x8,%ymm3,%ymm4
+
+# qhasm: 8x t = b >> 25
+# asm 1: vpsrad $25,<b=reg256#5,>t=reg256#5
+# asm 2: vpsrad $25,<b=%ymm4,>t=%ymm4
+vpsrad $25,%ymm4,%ymm4
+
+# qhasm: 8x d = t * qx8
+# asm 1: vpmulld <t=reg256#5,<qx8=reg256#2,>d=reg256#6
+# asm 2: vpmulld <t=%ymm4,<qx8=%ymm1,>d=%ymm5
+vpmulld %ymm4,%ymm1,%ymm5
+
+# qhasm: 8x b = v - d
+# asm 1: vpsubd <d=reg256#6,<v=reg256#4,>b=reg256#6
+# asm 2: vpsubd <d=%ymm5,<v=%ymm3,>b=%ymm5
+vpsubd %ymm5,%ymm3,%ymm5
+
+# qhasm: 8x b += _1x8
+# asm 1: vpaddd <_1x8=reg256#1,<b=reg256#6,>b=reg256#6
+# asm 2: vpaddd <_1x8=%ymm0,<b=%ymm5,>b=%ymm5
+vpaddd %ymm0,%ymm5,%ymm5
+
+# qhasm: 8x b = qx8 - b
+# asm 1: vpsubd <b=reg256#6,<qx8=reg256#2,>b=reg256#6
+# asm 2: vpsubd <b=%ymm5,<qx8=%ymm1,>b=%ymm5
+vpsubd %ymm5,%ymm1,%ymm5
+
+# qhasm: 8x b >>= 31
+# asm 1: vpsrad $31,<b=reg256#6,>b=reg256#6
+# asm 2: vpsrad $31,<b=%ymm5,>b=%ymm5
+vpsrad $31,%ymm5,%ymm5
+
+# qhasm: 8x t -= b
+# asm 1: vpsubd <b=reg256#6,<t=reg256#5,>t=reg256#5
+# asm 2: vpsubd <b=%ymm5,<t=%ymm4,>t=%ymm4
+vpsubd %ymm5,%ymm4,%ymm4
+
+# qhasm:    d = t & _1x8
+# asm 1: vpand <t=reg256#5,<_1x8=reg256#1,>d=reg256#6
+# asm 2: vpand <t=%ymm4,<_1x8=%ymm0,>d=%ymm5
+vpand %ymm4,%ymm0,%ymm5
+
+# qhasm: 8x v0a = t >> 1 
+# asm 1: vpsrad $1,<t=reg256#5,>v0a=reg256#7
+# asm 2: vpsrad $1,<t=%ymm4,>v0a=%ymm6
+vpsrad $1,%ymm4,%ymm6
+
+# qhasm: 8x v0a += d
+# asm 1: vpaddd <d=reg256#6,<v0a=reg256#7,>v0a=reg256#6
+# asm 2: vpaddd <d=%ymm5,<v0a=%ymm6,>v0a=%ymm5
+vpaddd %ymm5,%ymm6,%ymm5
+
+# qhasm: 8x t -= _1x8
+# asm 1: vpsubd <_1x8=reg256#1,<t=reg256#5,>t=reg256#5
+# asm 2: vpsubd <_1x8=%ymm0,<t=%ymm4,>t=%ymm4
+vpsubd %ymm0,%ymm4,%ymm4
+
+# qhasm:    d = t & _1x8
+# asm 1: vpand <t=reg256#5,<_1x8=reg256#1,>d=reg256#7
+# asm 2: vpand <t=%ymm4,<_1x8=%ymm0,>d=%ymm6
+vpand %ymm4,%ymm0,%ymm6
+
+# qhasm: 8x v1a = t >> 1 
+# asm 1: vpsrad $1,<t=reg256#5,>v1a=reg256#5
+# asm 2: vpsrad $1,<t=%ymm4,>v1a=%ymm4
+vpsrad $1,%ymm4,%ymm4
+
+# qhasm: 8x v1a += d
+# asm 1: vpaddd <d=reg256#7,<v1a=reg256#5,>v1a=reg256#5
+# asm 2: vpaddd <d=%ymm6,<v1a=%ymm4,>v1a=%ymm4
+vpaddd %ymm6,%ymm4,%ymm4
+
+# qhasm: 8x d = v0a * qx8
+# asm 1: vpmulld <v0a=reg256#6,<qx8=reg256#2,>d=reg256#7
+# asm 2: vpmulld <v0a=%ymm5,<qx8=%ymm1,>d=%ymm6
+vpmulld %ymm5,%ymm1,%ymm6
+
+# qhasm: 8x d <<= 1
+# asm 1: vpslld $1,<d=reg256#7,>d=reg256#7
+# asm 2: vpslld $1,<d=%ymm6,>d=%ymm6
+vpslld $1,%ymm6,%ymm6
+
+# qhasm: 8x d = v - d
+# asm 1: vpsubd <d=reg256#7,<v=reg256#4,>d=reg256#4
+# asm 2: vpsubd <d=%ymm6,<v=%ymm3,>d=%ymm3
+vpsubd %ymm6,%ymm3,%ymm3
+
+# qhasm: 8x k = abs(d)
+# asm 1: vpabsd <d=reg256#4,>k=reg256#4
+# asm 2: vpabsd <d=%ymm3,>k=%ymm3
+vpabsd %ymm3,%ymm3
+
+# qhasm: v = mem256[input_1 + ctr + 1024]
+# asm 1: vmovupd   1024(<input_1=int64#2,<ctr=int64#4),>v=reg256#7
+# asm 2: vmovupd   1024(<input_1=%rsi,<ctr=%rcx),>v=%ymm6
+vmovupd   1024(%rsi,%rcx),%ymm6
+
+# qhasm: 8x v <<= 3
+# asm 1: vpslld $3,<v=reg256#7,>v=reg256#7
+# asm 2: vpslld $3,<v=%ymm6,>v=%ymm6
+vpslld $3,%ymm6,%ymm6
+
+# qhasm: 8x v += rbit
+# asm 1: vpaddd <rbit=reg256#3,<v=reg256#7,>v=reg256#7
+# asm 2: vpaddd <rbit=%ymm2,<v=%ymm6,>v=%ymm6
+vpaddd %ymm2,%ymm6,%ymm6
+
+# qhasm: 8x b = v * mem256[v2730x8]
+# asm 1: vpmulld v2730x8,<v=reg256#7,>b=reg256#8
+# asm 2: vpmulld v2730x8,<v=%ymm6,>b=%ymm7
+vpmulld v2730x8,%ymm6,%ymm7
+
+# qhasm: 8x t = b >> 25
+# asm 1: vpsrad $25,<b=reg256#8,>t=reg256#8
+# asm 2: vpsrad $25,<b=%ymm7,>t=%ymm7
+vpsrad $25,%ymm7,%ymm7
+
+# qhasm: 8x d = t * qx8
+# asm 1: vpmulld <t=reg256#8,<qx8=reg256#2,>d=reg256#9
+# asm 2: vpmulld <t=%ymm7,<qx8=%ymm1,>d=%ymm8
+vpmulld %ymm7,%ymm1,%ymm8
+
+# qhasm: 8x b = v - d
+# asm 1: vpsubd <d=reg256#9,<v=reg256#7,>b=reg256#9
+# asm 2: vpsubd <d=%ymm8,<v=%ymm6,>b=%ymm8
+vpsubd %ymm8,%ymm6,%ymm8
+
+# qhasm: 8x b += _1x8
+# asm 1: vpaddd <_1x8=reg256#1,<b=reg256#9,>b=reg256#9
+# asm 2: vpaddd <_1x8=%ymm0,<b=%ymm8,>b=%ymm8
+vpaddd %ymm0,%ymm8,%ymm8
+
+# qhasm: 8x b = qx8 - b
+# asm 1: vpsubd <b=reg256#9,<qx8=reg256#2,>b=reg256#9
+# asm 2: vpsubd <b=%ymm8,<qx8=%ymm1,>b=%ymm8
+vpsubd %ymm8,%ymm1,%ymm8
+
+# qhasm: 8x b >>= 31
+# asm 1: vpsrad $31,<b=reg256#9,>b=reg256#9
+# asm 2: vpsrad $31,<b=%ymm8,>b=%ymm8
+vpsrad $31,%ymm8,%ymm8
+
+# qhasm: 8x t -= b
+# asm 1: vpsubd <b=reg256#9,<t=reg256#8,>t=reg256#8
+# asm 2: vpsubd <b=%ymm8,<t=%ymm7,>t=%ymm7
+vpsubd %ymm8,%ymm7,%ymm7
+
+# qhasm:    d = t & _1x8
+# asm 1: vpand <t=reg256#8,<_1x8=reg256#1,>d=reg256#9
+# asm 2: vpand <t=%ymm7,<_1x8=%ymm0,>d=%ymm8
+vpand %ymm7,%ymm0,%ymm8
+
+# qhasm: 8x v0b = t >> 1 
+# asm 1: vpsrad $1,<t=reg256#8,>v0b=reg256#10
+# asm 2: vpsrad $1,<t=%ymm7,>v0b=%ymm9
+vpsrad $1,%ymm7,%ymm9
+
+# qhasm: 8x v0b += d
+# asm 1: vpaddd <d=reg256#9,<v0b=reg256#10,>v0b=reg256#9
+# asm 2: vpaddd <d=%ymm8,<v0b=%ymm9,>v0b=%ymm8
+vpaddd %ymm8,%ymm9,%ymm8
+
+# qhasm: 8x t -= _1x8
+# asm 1: vpsubd <_1x8=reg256#1,<t=reg256#8,>t=reg256#8
+# asm 2: vpsubd <_1x8=%ymm0,<t=%ymm7,>t=%ymm7
+vpsubd %ymm0,%ymm7,%ymm7
+
+# qhasm:    d = t & _1x8
+# asm 1: vpand <t=reg256#8,<_1x8=reg256#1,>d=reg256#10
+# asm 2: vpand <t=%ymm7,<_1x8=%ymm0,>d=%ymm9
+vpand %ymm7,%ymm0,%ymm9
+
+# qhasm: 8x v1b = t >> 1 
+# asm 1: vpsrad $1,<t=reg256#8,>v1b=reg256#8
+# asm 2: vpsrad $1,<t=%ymm7,>v1b=%ymm7
+vpsrad $1,%ymm7,%ymm7
+
+# qhasm: 8x v1b += d
+# asm 1: vpaddd <d=reg256#10,<v1b=reg256#8,>v1b=reg256#8
+# asm 2: vpaddd <d=%ymm9,<v1b=%ymm7,>v1b=%ymm7
+vpaddd %ymm9,%ymm7,%ymm7
+
+# qhasm: 8x d = v0b * qx8
+# asm 1: vpmulld <v0b=reg256#9,<qx8=reg256#2,>d=reg256#10
+# asm 2: vpmulld <v0b=%ymm8,<qx8=%ymm1,>d=%ymm9
+vpmulld %ymm8,%ymm1,%ymm9
+
+# qhasm: 8x d <<= 1
+# asm 1: vpslld $1,<d=reg256#10,>d=reg256#10
+# asm 2: vpslld $1,<d=%ymm9,>d=%ymm9
+vpslld $1,%ymm9,%ymm9
+
+# qhasm: 8x d = v - d
+# asm 1: vpsubd <d=reg256#10,<v=reg256#7,>d=reg256#7
+# asm 2: vpsubd <d=%ymm9,<v=%ymm6,>d=%ymm6
+vpsubd %ymm9,%ymm6,%ymm6
+
+# qhasm: 8x v = abs(d)
+# asm 1: vpabsd <d=reg256#7,>v=reg256#7
+# asm 2: vpabsd <d=%ymm6,>v=%ymm6
+vpabsd %ymm6,%ymm6
+
+# qhasm: 8x k += v
+# asm 1: vpaddd <v=reg256#7,<k=reg256#4,>k=reg256#4
+# asm 2: vpaddd <v=%ymm6,<k=%ymm3,>k=%ymm3
+vpaddd %ymm6,%ymm3,%ymm3
+
+# qhasm: v = mem256[input_1 + ctr + 2048]
+# asm 1: vmovupd   2048(<input_1=int64#2,<ctr=int64#4),>v=reg256#7
+# asm 2: vmovupd   2048(<input_1=%rsi,<ctr=%rcx),>v=%ymm6
+vmovupd   2048(%rsi,%rcx),%ymm6
+
+# qhasm: 8x v <<= 3
+# asm 1: vpslld $3,<v=reg256#7,>v=reg256#7
+# asm 2: vpslld $3,<v=%ymm6,>v=%ymm6
+vpslld $3,%ymm6,%ymm6
+
+# qhasm: 8x v += rbit
+# asm 1: vpaddd <rbit=reg256#3,<v=reg256#7,>v=reg256#7
+# asm 2: vpaddd <rbit=%ymm2,<v=%ymm6,>v=%ymm6
+vpaddd %ymm2,%ymm6,%ymm6
+
+# qhasm: 8x b = v * mem256[v2730x8]
+# asm 1: vpmulld v2730x8,<v=reg256#7,>b=reg256#10
+# asm 2: vpmulld v2730x8,<v=%ymm6,>b=%ymm9
+vpmulld v2730x8,%ymm6,%ymm9
+
+# qhasm: 8x t = b >> 25
+# asm 1: vpsrad $25,<b=reg256#10,>t=reg256#10
+# asm 2: vpsrad $25,<b=%ymm9,>t=%ymm9
+vpsrad $25,%ymm9,%ymm9
+
+# qhasm: 8x d = t * qx8
+# asm 1: vpmulld <t=reg256#10,<qx8=reg256#2,>d=reg256#11
+# asm 2: vpmulld <t=%ymm9,<qx8=%ymm1,>d=%ymm10
+vpmulld %ymm9,%ymm1,%ymm10
+
+# qhasm: 8x b = v - d
+# asm 1: vpsubd <d=reg256#11,<v=reg256#7,>b=reg256#11
+# asm 2: vpsubd <d=%ymm10,<v=%ymm6,>b=%ymm10
+vpsubd %ymm10,%ymm6,%ymm10
+
+# qhasm: 8x b += _1x8
+# asm 1: vpaddd <_1x8=reg256#1,<b=reg256#11,>b=reg256#11
+# asm 2: vpaddd <_1x8=%ymm0,<b=%ymm10,>b=%ymm10
+vpaddd %ymm0,%ymm10,%ymm10
+
+# qhasm: 8x b = qx8 - b
+# asm 1: vpsubd <b=reg256#11,<qx8=reg256#2,>b=reg256#11
+# asm 2: vpsubd <b=%ymm10,<qx8=%ymm1,>b=%ymm10
+vpsubd %ymm10,%ymm1,%ymm10
+
+# qhasm: 8x b >>= 31
+# asm 1: vpsrad $31,<b=reg256#11,>b=reg256#11
+# asm 2: vpsrad $31,<b=%ymm10,>b=%ymm10
+vpsrad $31,%ymm10,%ymm10
+
+# qhasm: 8x t -= b
+# asm 1: vpsubd <b=reg256#11,<t=reg256#10,>t=reg256#10
+# asm 2: vpsubd <b=%ymm10,<t=%ymm9,>t=%ymm9
+vpsubd %ymm10,%ymm9,%ymm9
+
+# qhasm:    d = t & _1x8
+# asm 1: vpand <t=reg256#10,<_1x8=reg256#1,>d=reg256#11
+# asm 2: vpand <t=%ymm9,<_1x8=%ymm0,>d=%ymm10
+vpand %ymm9,%ymm0,%ymm10
+
+# qhasm: 8x v0c = t >> 1 
+# asm 1: vpsrad $1,<t=reg256#10,>v0c=reg256#12
+# asm 2: vpsrad $1,<t=%ymm9,>v0c=%ymm11
+vpsrad $1,%ymm9,%ymm11
+
+# qhasm: 8x v0c += d
+# asm 1: vpaddd <d=reg256#11,<v0c=reg256#12,>v0c=reg256#11
+# asm 2: vpaddd <d=%ymm10,<v0c=%ymm11,>v0c=%ymm10
+vpaddd %ymm10,%ymm11,%ymm10
+
+# qhasm: 8x t -= _1x8
+# asm 1: vpsubd <_1x8=reg256#1,<t=reg256#10,>t=reg256#10
+# asm 2: vpsubd <_1x8=%ymm0,<t=%ymm9,>t=%ymm9
+vpsubd %ymm0,%ymm9,%ymm9
+
+# qhasm:    d = t & _1x8
+# asm 1: vpand <t=reg256#10,<_1x8=reg256#1,>d=reg256#12
+# asm 2: vpand <t=%ymm9,<_1x8=%ymm0,>d=%ymm11
+vpand %ymm9,%ymm0,%ymm11
+
+# qhasm: 8x v1c = t >> 1 
+# asm 1: vpsrad $1,<t=reg256#10,>v1c=reg256#10
+# asm 2: vpsrad $1,<t=%ymm9,>v1c=%ymm9
+vpsrad $1,%ymm9,%ymm9
+
+# qhasm: 8x v1c += d
+# asm 1: vpaddd <d=reg256#12,<v1c=reg256#10,>v1c=reg256#10
+# asm 2: vpaddd <d=%ymm11,<v1c=%ymm9,>v1c=%ymm9
+vpaddd %ymm11,%ymm9,%ymm9
+
+# qhasm: 8x d = v0c * qx8
+# asm 1: vpmulld <v0c=reg256#11,<qx8=reg256#2,>d=reg256#12
+# asm 2: vpmulld <v0c=%ymm10,<qx8=%ymm1,>d=%ymm11
+vpmulld %ymm10,%ymm1,%ymm11
+
+# qhasm: 8x d <<= 1
+# asm 1: vpslld $1,<d=reg256#12,>d=reg256#12
+# asm 2: vpslld $1,<d=%ymm11,>d=%ymm11
+vpslld $1,%ymm11,%ymm11
+
+# qhasm: 8x d = v - d
+# asm 1: vpsubd <d=reg256#12,<v=reg256#7,>d=reg256#7
+# asm 2: vpsubd <d=%ymm11,<v=%ymm6,>d=%ymm6
+vpsubd %ymm11,%ymm6,%ymm6
+
+# qhasm: 8x v = abs(d)
+# asm 1: vpabsd <d=reg256#7,>v=reg256#7
+# asm 2: vpabsd <d=%ymm6,>v=%ymm6
+vpabsd %ymm6,%ymm6
+
+# qhasm: 8x k += v
+# asm 1: vpaddd <v=reg256#7,<k=reg256#4,>k=reg256#4
+# asm 2: vpaddd <v=%ymm6,<k=%ymm3,>k=%ymm3
+vpaddd %ymm6,%ymm3,%ymm3
+
+# qhasm: v = mem256[input_1 + ctr + 3072]
+# asm 1: vmovupd   3072(<input_1=int64#2,<ctr=int64#4),>v=reg256#7
+# asm 2: vmovupd   3072(<input_1=%rsi,<ctr=%rcx),>v=%ymm6
+vmovupd   3072(%rsi,%rcx),%ymm6
+
+# qhasm: 8x v <<= 3
+# asm 1: vpslld $3,<v=reg256#7,>v=reg256#7
+# asm 2: vpslld $3,<v=%ymm6,>v=%ymm6
+vpslld $3,%ymm6,%ymm6
+
+# qhasm: 8x v += rbit
+# asm 1: vpaddd <rbit=reg256#3,<v=reg256#7,>v=reg256#3
+# asm 2: vpaddd <rbit=%ymm2,<v=%ymm6,>v=%ymm2
+vpaddd %ymm2,%ymm6,%ymm2
+
+# qhasm: 8x b = v * mem256[v2730x8]
+# asm 1: vpmulld v2730x8,<v=reg256#3,>b=reg256#7
+# asm 2: vpmulld v2730x8,<v=%ymm2,>b=%ymm6
+vpmulld v2730x8,%ymm2,%ymm6
+
+# qhasm: 8x t = b >> 25
+# asm 1: vpsrad $25,<b=reg256#7,>t=reg256#7
+# asm 2: vpsrad $25,<b=%ymm6,>t=%ymm6
+vpsrad $25,%ymm6,%ymm6
+
+# qhasm: 8x d = t * qx8
+# asm 1: vpmulld <t=reg256#7,<qx8=reg256#2,>d=reg256#12
+# asm 2: vpmulld <t=%ymm6,<qx8=%ymm1,>d=%ymm11
+vpmulld %ymm6,%ymm1,%ymm11
+
+# qhasm: 8x b = v - d
+# asm 1: vpsubd <d=reg256#12,<v=reg256#3,>b=reg256#12
+# asm 2: vpsubd <d=%ymm11,<v=%ymm2,>b=%ymm11
+vpsubd %ymm11,%ymm2,%ymm11
+
+# qhasm: 8x b += _1x8
+# asm 1: vpaddd <_1x8=reg256#1,<b=reg256#12,>b=reg256#12
+# asm 2: vpaddd <_1x8=%ymm0,<b=%ymm11,>b=%ymm11
+vpaddd %ymm0,%ymm11,%ymm11
+
+# qhasm: 8x b = qx8 - b
+# asm 1: vpsubd <b=reg256#12,<qx8=reg256#2,>b=reg256#12
+# asm 2: vpsubd <b=%ymm11,<qx8=%ymm1,>b=%ymm11
+vpsubd %ymm11,%ymm1,%ymm11
+
+# qhasm: 8x b >>= 31
+# asm 1: vpsrad $31,<b=reg256#12,>b=reg256#12
+# asm 2: vpsrad $31,<b=%ymm11,>b=%ymm11
+vpsrad $31,%ymm11,%ymm11
+
+# qhasm: 8x t -= b
+# asm 1: vpsubd <b=reg256#12,<t=reg256#7,>t=reg256#7
+# asm 2: vpsubd <b=%ymm11,<t=%ymm6,>t=%ymm6
+vpsubd %ymm11,%ymm6,%ymm6
+
+# qhasm:    d = t & _1x8
+# asm 1: vpand <t=reg256#7,<_1x8=reg256#1,>d=reg256#12
+# asm 2: vpand <t=%ymm6,<_1x8=%ymm0,>d=%ymm11
+vpand %ymm6,%ymm0,%ymm11
+
+# qhasm: 8x v0d = t >> 1 
+# asm 1: vpsrad $1,<t=reg256#7,>v0d=reg256#13
+# asm 2: vpsrad $1,<t=%ymm6,>v0d=%ymm12
+vpsrad $1,%ymm6,%ymm12
+
+# qhasm: 8x v0d += d
+# asm 1: vpaddd <d=reg256#12,<v0d=reg256#13,>v0d=reg256#12
+# asm 2: vpaddd <d=%ymm11,<v0d=%ymm12,>v0d=%ymm11
+vpaddd %ymm11,%ymm12,%ymm11
+
+# qhasm: 8x t -= _1x8
+# asm 1: vpsubd <_1x8=reg256#1,<t=reg256#7,>t=reg256#7
+# asm 2: vpsubd <_1x8=%ymm0,<t=%ymm6,>t=%ymm6
+vpsubd %ymm0,%ymm6,%ymm6
+
+# qhasm:    d = t & _1x8
+# asm 1: vpand <t=reg256#7,<_1x8=reg256#1,>d=reg256#13
+# asm 2: vpand <t=%ymm6,<_1x8=%ymm0,>d=%ymm12
+vpand %ymm6,%ymm0,%ymm12
+
+# qhasm: 8x v1d = t >> 1 
+# asm 1: vpsrad $1,<t=reg256#7,>v1d=reg256#7
+# asm 2: vpsrad $1,<t=%ymm6,>v1d=%ymm6
+vpsrad $1,%ymm6,%ymm6
+
+# qhasm: 8x v1d += d
+# asm 1: vpaddd <d=reg256#13,<v1d=reg256#7,>v1d=reg256#7
+# asm 2: vpaddd <d=%ymm12,<v1d=%ymm6,>v1d=%ymm6
+vpaddd %ymm12,%ymm6,%ymm6
+
+# qhasm: 8x d = v0d * qx8
+# asm 1: vpmulld <v0d=reg256#12,<qx8=reg256#2,>d=reg256#13
+# asm 2: vpmulld <v0d=%ymm11,<qx8=%ymm1,>d=%ymm12
+vpmulld %ymm11,%ymm1,%ymm12
+
+# qhasm: 8x d <<= 1
+# asm 1: vpslld $1,<d=reg256#13,>d=reg256#13
+# asm 2: vpslld $1,<d=%ymm12,>d=%ymm12
+vpslld $1,%ymm12,%ymm12
+
+# qhasm: 8x d = v - d
+# asm 1: vpsubd <d=reg256#13,<v=reg256#3,>d=reg256#3
+# asm 2: vpsubd <d=%ymm12,<v=%ymm2,>d=%ymm2
+vpsubd %ymm12,%ymm2,%ymm2
+
+# qhasm: 8x v = abs(d)
+# asm 1: vpabsd <d=reg256#3,>v=reg256#3
+# asm 2: vpabsd <d=%ymm2,>v=%ymm2
+vpabsd %ymm2,%ymm2
+
+# qhasm: 8x k += v
+# asm 1: vpaddd <v=reg256#3,<k=reg256#4,>k=reg256#3
+# asm 2: vpaddd <v=%ymm2,<k=%ymm3,>k=%ymm2
+vpaddd %ymm2,%ymm3,%ymm2
+
+# qhasm: 8x d = qx8 << 1
+# asm 1: vpslld $1,<qx8=reg256#2,>d=reg256#4
+# asm 2: vpslld $1,<qx8=%ymm1,>d=%ymm3
+vpslld $1,%ymm1,%ymm3
+
+# qhasm: 8x d -= _1x8
+# asm 1: vpsubd <_1x8=reg256#1,<d=reg256#4,>d=reg256#4
+# asm 2: vpsubd <_1x8=%ymm0,<d=%ymm3,>d=%ymm3
+vpsubd %ymm0,%ymm3,%ymm3
+
+# qhasm: 8x k = d - k 
+# asm 1: vpsubd <k=reg256#3,<d=reg256#4,>k=reg256#3
+# asm 2: vpsubd <k=%ymm2,<d=%ymm3,>k=%ymm2
+vpsubd %ymm2,%ymm3,%ymm2
+
+# qhasm: 8x k >>= 31
+# asm 1: vpsrad $31,<k=reg256#3,>k=reg256#3
+# asm 2: vpsrad $31,<k=%ymm2,>k=%ymm2
+vpsrad $31,%ymm2,%ymm2
+
+# qhasm: vtmp0 = v0a ^ v1a
+# asm 1: vpxor <v0a=reg256#6,<v1a=reg256#5,>vtmp0=reg256#4
+# asm 2: vpxor <v0a=%ymm5,<v1a=%ymm4,>vtmp0=%ymm3
+vpxor %ymm5,%ymm4,%ymm3
+
+# qhasm: vtmp0 &= k
+# asm 1: vpand <k=reg256#3,<vtmp0=reg256#4,<vtmp0=reg256#4
+# asm 2: vpand <k=%ymm2,<vtmp0=%ymm3,<vtmp0=%ymm3
+vpand %ymm2,%ymm3,%ymm3
+
+# qhasm: vtmp0 ^= v0a
+# asm 1: vpxor <v0a=reg256#6,<vtmp0=reg256#4,<vtmp0=reg256#4
+# asm 2: vpxor <v0a=%ymm5,<vtmp0=%ymm3,<vtmp0=%ymm3
+vpxor %ymm5,%ymm3,%ymm3
+
+# qhasm: vtmp1 = v0b ^ v1b
+# asm 1: vpxor <v0b=reg256#9,<v1b=reg256#8,>vtmp1=reg256#5
+# asm 2: vpxor <v0b=%ymm8,<v1b=%ymm7,>vtmp1=%ymm4
+vpxor %ymm8,%ymm7,%ymm4
+
+# qhasm: vtmp1 &= k
+# asm 1: vpand <k=reg256#3,<vtmp1=reg256#5,<vtmp1=reg256#5
+# asm 2: vpand <k=%ymm2,<vtmp1=%ymm4,<vtmp1=%ymm4
+vpand %ymm2,%ymm4,%ymm4
+
+# qhasm: vtmp1 ^= v0b
+# asm 1: vpxor <v0b=reg256#9,<vtmp1=reg256#5,<vtmp1=reg256#5
+# asm 2: vpxor <v0b=%ymm8,<vtmp1=%ymm4,<vtmp1=%ymm4
+vpxor %ymm8,%ymm4,%ymm4
+
+# qhasm: vtmp2 = v0c ^ v1c
+# asm 1: vpxor <v0c=reg256#11,<v1c=reg256#10,>vtmp2=reg256#6
+# asm 2: vpxor <v0c=%ymm10,<v1c=%ymm9,>vtmp2=%ymm5
+vpxor %ymm10,%ymm9,%ymm5
+
+# qhasm: vtmp2 &= k
+# asm 1: vpand <k=reg256#3,<vtmp2=reg256#6,<vtmp2=reg256#6
+# asm 2: vpand <k=%ymm2,<vtmp2=%ymm5,<vtmp2=%ymm5
+vpand %ymm2,%ymm5,%ymm5
+
+# qhasm: vtmp2 ^= v0c
+# asm 1: vpxor <v0c=reg256#11,<vtmp2=reg256#6,<vtmp2=reg256#6
+# asm 2: vpxor <v0c=%ymm10,<vtmp2=%ymm5,<vtmp2=%ymm5
+vpxor %ymm10,%ymm5,%ymm5
+
+# qhasm: vtmp3 = v0d ^ v1d
+# asm 1: vpxor <v0d=reg256#12,<v1d=reg256#7,>vtmp3=reg256#7
+# asm 2: vpxor <v0d=%ymm11,<v1d=%ymm6,>vtmp3=%ymm6
+vpxor %ymm11,%ymm6,%ymm6
+
+# qhasm: vtmp3 &= k
+# asm 1: vpand <k=reg256#3,<vtmp3=reg256#7,<vtmp3=reg256#7
+# asm 2: vpand <k=%ymm2,<vtmp3=%ymm6,<vtmp3=%ymm6
+vpand %ymm2,%ymm6,%ymm6
+
+# qhasm: vtmp3 ^= v0d
+# asm 1: vpxor <v0d=reg256#12,<vtmp3=reg256#7,<vtmp3=reg256#7
+# asm 2: vpxor <v0d=%ymm11,<vtmp3=%ymm6,<vtmp3=%ymm6
+vpxor %ymm11,%ymm6,%ymm6
+
+# qhasm: _3x8 = mem256[v3x8]
+# asm 1: vmovdqu v3x8,>_3x8=reg256#8
+# asm 2: vmovdqu v3x8,>_3x8=%ymm7
+vmovdqu v3x8,%ymm7
+
+# qhasm: 8x c = vtmp0 - vtmp3
+# asm 1: vpsubd <vtmp3=reg256#7,<vtmp0=reg256#4,>c=reg256#4
+# asm 2: vpsubd <vtmp3=%ymm6,<vtmp0=%ymm3,>c=%ymm3
+vpsubd %ymm6,%ymm3,%ymm3
+
+# qhasm:    c &= _3x8
+# asm 1: vpand <_3x8=reg256#8,<c=reg256#4,<c=reg256#4
+# asm 2: vpand <_3x8=%ymm7,<c=%ymm3,<c=%ymm3
+vpand %ymm7,%ymm3,%ymm3
+
+# qhasm: mem256[input_0 + ctr + 0] = c
+# asm 1: vmovupd   <c=reg256#4,0(<input_0=int64#1,<ctr=int64#4)
+# asm 2: vmovupd   <c=%ymm3,0(<input_0=%rdi,<ctr=%rcx)
+vmovupd   %ymm3,0(%rdi,%rcx)
+
+# qhasm: 8x c = vtmp1 - vtmp3
+# asm 1: vpsubd <vtmp3=reg256#7,<vtmp1=reg256#5,>c=reg256#4
+# asm 2: vpsubd <vtmp3=%ymm6,<vtmp1=%ymm4,>c=%ymm3
+vpsubd %ymm6,%ymm4,%ymm3
+
+# qhasm:    c &= _3x8
+# asm 1: vpand <_3x8=reg256#8,<c=reg256#4,<c=reg256#4
+# asm 2: vpand <_3x8=%ymm7,<c=%ymm3,<c=%ymm3
+vpand %ymm7,%ymm3,%ymm3
+
+# qhasm: mem256[input_0 + ctr + 1024] = c
+# asm 1: vmovupd   <c=reg256#4,1024(<input_0=int64#1,<ctr=int64#4)
+# asm 2: vmovupd   <c=%ymm3,1024(<input_0=%rdi,<ctr=%rcx)
+vmovupd   %ymm3,1024(%rdi,%rcx)
+
+# qhasm: 8x c = vtmp2 - vtmp3
+# asm 1: vpsubd <vtmp3=reg256#7,<vtmp2=reg256#6,>c=reg256#4
+# asm 2: vpsubd <vtmp3=%ymm6,<vtmp2=%ymm5,>c=%ymm3
+vpsubd %ymm6,%ymm5,%ymm3
+
+# qhasm:    c &= _3x8
+# asm 1: vpand <_3x8=reg256#8,<c=reg256#4,<c=reg256#4
+# asm 2: vpand <_3x8=%ymm7,<c=%ymm3,<c=%ymm3
+vpand %ymm7,%ymm3,%ymm3
+
+# qhasm: mem256[input_0 + ctr + 2048] = c
+# asm 1: vmovupd   <c=reg256#4,2048(<input_0=int64#1,<ctr=int64#4)
+# asm 2: vmovupd   <c=%ymm3,2048(<input_0=%rdi,<ctr=%rcx)
+vmovupd   %ymm3,2048(%rdi,%rcx)
+
+# qhasm: 8x c = vtmp3 << 1
+# asm 1: vpslld $1,<vtmp3=reg256#7,>c=reg256#4
+# asm 2: vpslld $1,<vtmp3=%ymm6,>c=%ymm3
+vpslld $1,%ymm6,%ymm3
+
+# qhasm: 8x c -= k
+# asm 1: vpsubd <k=reg256#3,<c=reg256#4,>c=reg256#3
+# asm 2: vpsubd <k=%ymm2,<c=%ymm3,>c=%ymm2
+vpsubd %ymm2,%ymm3,%ymm2
+
+# qhasm:    c &= _3x8
+# asm 1: vpand <_3x8=reg256#8,<c=reg256#3,<c=reg256#3
+# asm 2: vpand <_3x8=%ymm7,<c=%ymm2,<c=%ymm2
+vpand %ymm7,%ymm2,%ymm2
+
+# qhasm: mem256[input_0 + ctr + 3072] = c
+# asm 1: vmovupd   <c=reg256#3,3072(<input_0=int64#1,<ctr=int64#4)
+# asm 2: vmovupd   <c=%ymm2,3072(<input_0=%rdi,<ctr=%rcx)
+vmovupd   %ymm2,3072(%rdi,%rcx)
+
+# qhasm: (uint64) ctr >>= 5
+# asm 1: shr  $5,<ctr=int64#4
+# asm 2: shr  $5,<ctr=%rcx
+shr  $5,%rcx
+
+# qhasm: ctr += 1
+# asm 1: add  $1,<ctr=int64#4
+# asm 2: add  $1,<ctr=%rcx
+add  $1,%rcx
+
+# qhasm: unsigned<? ctr - 32
+# asm 1: cmp  $32,<ctr=int64#4
+# asm 2: cmp  $32,<ctr=%rcx
+cmp  $32,%rcx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto looptop if unsigned<
+jb ._looptop
+
+# qhasm: return
+add %r11,%rsp
+ret
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/newhope.c b/crypt/liboqs/kex_rlwe_newhope/avx2/newhope.c
new file mode 100644
index 0000000000000000000000000000000000000000..b9f7a6c4f518f60608f0f616472295bcda9ec43b
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/newhope.c
@@ -0,0 +1,127 @@
+#include "poly.h"
+#include "randombytes.h"
+#include "error_correction.h"
+#include "fips202.h"
+
+static void encode_a(unsigned char *r, const poly *pk, const unsigned char *seed)
+{
+  int i;
+  poly_tobytes(r, pk);
+  for(i=0;i<NEWHOPE_SEEDBYTES;i++)
+    r[POLY_BYTES+i] = seed[i];
+}
+
+static void decode_a(poly *pk, unsigned char *seed, const unsigned char *r)
+{
+  int i;
+  poly_frombytes(pk, r);
+  for(i=0;i<NEWHOPE_SEEDBYTES;i++)
+    seed[i] = r[POLY_BYTES+i];
+}
+
+static void encode_b(unsigned char *r, const poly *b, const poly *c)
+{
+  int i;
+  poly_tobytes(r,b);
+  for(i=0;i<PARAM_N/4;i++)
+    r[POLY_BYTES+i] = c->coeffs[4*i] | (c->coeffs[4*i+1] << 2) | (c->coeffs[4*i+2] << 4) | (c->coeffs[4*i+3] << 6);
+}
+
+static void decode_b(poly *b, poly *c, const unsigned char *r)
+{
+  int i;
+  poly_frombytes(b, r);
+  for(i=0;i<PARAM_N/4;i++)
+  {
+    c->coeffs[4*i+0] =  r[POLY_BYTES+i]       & 0x03;
+    c->coeffs[4*i+1] = (r[POLY_BYTES+i] >> 2) & 0x03;
+    c->coeffs[4*i+2] = (r[POLY_BYTES+i] >> 4) & 0x03;
+    c->coeffs[4*i+3] = (r[POLY_BYTES+i] >> 6);
+  }
+}
+
+static void gen_a(poly *a, const unsigned char *seed)
+{
+    poly_uniform(a,seed);
+}
+
+
+// API FUNCTIONS 
+
+void newhope_keygen(unsigned char *send, poly *sk)
+{
+  poly a, e, r, pk;
+  unsigned char seed[NEWHOPE_SEEDBYTES];
+  unsigned char noiseseed[32];
+
+  randombytes(seed, NEWHOPE_SEEDBYTES);
+  sha3256(seed, seed, NEWHOPE_SEEDBYTES); /* Don't send output of system RNG */
+  randombytes(noiseseed, 32);
+
+  gen_a(&a, seed);
+
+  poly_getnoise(sk,noiseseed,0);
+  poly_ntt(sk);
+  
+  poly_getnoise(&e,noiseseed,1);
+  poly_ntt(&e);
+
+  poly_pointwise(&r,sk,&a);
+  poly_add(&pk,&e,&r);
+
+  encode_a(send, &pk, seed);
+}
+
+
+void newhope_sharedb(unsigned char *sharedkey, unsigned char *send, const unsigned char *received)
+{
+  poly sp, ep, v, a, pka, c, epp, bp;
+  unsigned char seed[NEWHOPE_SEEDBYTES];
+  unsigned char noiseseed[32];
+  
+  randombytes(noiseseed, 32);
+
+  decode_a(&pka, seed, received);
+  gen_a(&a, seed);
+
+  poly_getnoise(&sp,noiseseed,0);
+  poly_ntt(&sp);
+  poly_getnoise(&ep,noiseseed,1);
+  poly_ntt(&ep);
+
+  poly_pointwise(&bp, &a, &sp);
+  poly_add(&bp, &bp, &ep);
+  
+  poly_pointwise(&v, &pka, &sp);
+  poly_invntt(&v);
+
+  poly_getnoise(&epp,noiseseed,2);
+  poly_add(&v, &v, &epp);
+
+  helprec(&c, &v, noiseseed, 3);
+
+  encode_b(send, &bp, &c);
+  
+  rec(sharedkey, &v, &c);
+
+#ifndef STATISTICAL_TEST 
+  sha3256(sharedkey, sharedkey, 32);
+#endif
+}
+
+
+void newhope_shareda(unsigned char *sharedkey, const poly *sk, const unsigned char *received)
+{
+  poly v,bp, c;
+
+  decode_b(&bp, &c, received);
+
+  poly_pointwise(&v,sk,&bp);
+  poly_invntt(&v);
+ 
+  rec(sharedkey, &v, &c);
+
+#ifndef STATISTICAL_TEST 
+  sha3256(sharedkey, sharedkey, 32); 
+#endif
+}
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/newhope.h b/crypt/liboqs/kex_rlwe_newhope/avx2/newhope.h
new file mode 100644
index 0000000000000000000000000000000000000000..34e519e6b8afdd4ef583d67e1a2a9ad35c289b72
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/newhope.h
@@ -0,0 +1,15 @@
+#ifndef NEWHOPE_H
+#define NEWHOPE_H
+
+#include "poly.h"
+#include "randombytes.h"
+#include "crypto_stream_chacha20.h"
+#include "error_correction.h"
+#include <math.h>
+#include <stdio.h>
+
+void newhope_keygen(unsigned char *send, poly *sk);
+void newhope_sharedb(unsigned char *sharedkey, unsigned char *send, const unsigned char *received);
+void newhope_shareda(unsigned char *sharedkey, const poly *ska, const unsigned char *received);
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/ntt.h b/crypt/liboqs/kex_rlwe_newhope/avx2/ntt.h
new file mode 100644
index 0000000000000000000000000000000000000000..c24b1371a98ea4100305f6bbbaa48bdba1151f92
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/ntt.h
@@ -0,0 +1,15 @@
+#ifndef NTT_H
+#define NTT_H
+
+#include "inttypes.h"
+
+extern int32_t psis_bitrev[];
+extern int32_t psis_inv[];
+extern double omegas_double[];
+extern double omegas_inv_double[];
+
+void bitrev_vector(int32_t* poly);
+void pwmul_double(int32_t* poly, const int32_t* factors);
+void ntt_double(int32_t*,const double*,const double*);
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/ntt_double.s b/crypt/liboqs/kex_rlwe_newhope/avx2/ntt_double.s
new file mode 100644
index 0000000000000000000000000000000000000000..1ec429f7243a22ff49a471eb000cdc8e9028e699
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/ntt_double.s
@@ -0,0 +1,2209 @@
+
+# qhasm: int64 input_0
+
+# qhasm: int64 input_1
+
+# qhasm: int64 input_2
+
+# qhasm: int64 input_3
+
+# qhasm: int64 input_4
+
+# qhasm: int64 input_5
+
+# qhasm: stack64 input_6
+
+# qhasm: stack64 input_7
+
+# qhasm: int64 caller_r11
+
+# qhasm: int64 caller_r12
+
+# qhasm: int64 caller_r13
+
+# qhasm: int64 caller_r14
+
+# qhasm: int64 caller_r15
+
+# qhasm: int64 caller_rbx
+
+# qhasm: int64 caller_rbp
+
+# qhasm: int64 ctri
+
+# qhasm: int64 ctrj
+
+# qhasm: int64 ap
+
+# qhasm: int64 tp
+
+# qhasm: int64 wp
+
+# qhasm: int64 pp
+
+# qhasm: reg256 c
+
+# qhasm: reg256 qinv
+
+# qhasm: reg256 q
+
+# qhasm: reg256 t0
+
+# qhasm: reg256 t1
+
+# qhasm: reg256 t2
+
+# qhasm: reg256 t3
+
+# qhasm: reg256 w
+
+# qhasm: reg256 a0
+
+# qhasm: reg256 a1
+
+# qhasm: reg256 a2
+
+# qhasm: reg256 a3
+
+# qhasm: reg256 r0
+
+# qhasm: reg256 r1
+
+# qhasm: reg256 r2
+
+# qhasm: reg256 r3
+
+# qhasm: enter ntt_double
+.p2align 5
+.global _ntt_double
+.global ntt_double
+_ntt_double:
+ntt_double:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: q = mem256[q8]
+# asm 1: vmovdqu q8,>q=reg256#1
+# asm 2: vmovdqu q8,>q=%ymm0
+vmovdqu q8,%ymm0
+
+# qhasm: qinv = mem256[qinv16]
+# asm 1: vmovdqu qinv16,>qinv=reg256#2
+# asm 2: vmovdqu qinv16,>qinv=%ymm1
+vmovdqu qinv16,%ymm1
+
+# qhasm: ctrj = 64
+# asm 1: mov  $64,>ctrj=int64#4
+# asm 2: mov  $64,>ctrj=%rcx
+mov  $64,%rcx
+
+# qhasm: ap = input_0
+# asm 1: mov  <input_0=int64#1,>ap=int64#5
+# asm 2: mov  <input_0=%rdi,>ap=%r8
+mov  %rdi,%r8
+
+# qhasm: tp = input_2
+# asm 1: mov  <input_2=int64#3,>tp=int64#6
+# asm 2: mov  <input_2=%rdx,>tp=%r9
+mov  %rdx,%r9
+
+# qhasm: wp = input_1 + 8192
+# asm 1: lea  8192(<input_1=int64#2),>wp=int64#7
+# asm 2: lea  8192(<input_1=%rsi),>wp=%rax
+lea  8192(%rsi),%rax
+
+# qhasm: pp = input_1
+# asm 1: mov  <input_1=int64#2,>pp=int64#2
+# asm 2: mov  <input_1=%rsi,>pp=%rsi
+mov  %rsi,%rsi
+
+# qhasm: a0 = (4x double)(4x int32)mem128[ap + 0]
+# asm 1: vcvtdq2pd 0(<ap=int64#5),>a0=reg256#3
+# asm 2: vcvtdq2pd 0(<ap=%r8),>a0=%ymm2
+vcvtdq2pd 0(%r8),%ymm2
+
+# qhasm: a1 = (4x double)(4x int32)mem128[ap + 16]
+# asm 1: vcvtdq2pd 16(<ap=int64#5),>a1=reg256#4
+# asm 2: vcvtdq2pd 16(<ap=%r8),>a1=%ymm3
+vcvtdq2pd 16(%r8),%ymm3
+
+# qhasm: a2 = (4x double)(4x int32)mem128[ap + 32]
+# asm 1: vcvtdq2pd 32(<ap=int64#5),>a2=reg256#5
+# asm 2: vcvtdq2pd 32(<ap=%r8),>a2=%ymm4
+vcvtdq2pd 32(%r8),%ymm4
+
+# qhasm: a3 = (4x double)(4x int32)mem128[ap + 48]
+# asm 1: vcvtdq2pd 48(<ap=int64#5),>a3=reg256#6
+# asm 2: vcvtdq2pd 48(<ap=%r8),>a3=%ymm5
+vcvtdq2pd 48(%r8),%ymm5
+
+# qhasm: r3 = mem256[neg2]
+# asm 1: vmovdqu neg2,>r3=reg256#7
+# asm 2: vmovdqu neg2,>r3=%ymm6
+vmovdqu neg2,%ymm6
+
+# qhasm: 4x r0 = approx a0 * r3
+# asm 1: vmulpd <a0=reg256#3,<r3=reg256#7,>r0=reg256#8
+# asm 2: vmulpd <a0=%ymm2,<r3=%ymm6,>r0=%ymm7
+vmulpd %ymm2,%ymm6,%ymm7
+
+# qhasm: 4x r1 = approx a1 * r3
+# asm 1: vmulpd <a1=reg256#4,<r3=reg256#7,>r1=reg256#9
+# asm 2: vmulpd <a1=%ymm3,<r3=%ymm6,>r1=%ymm8
+vmulpd %ymm3,%ymm6,%ymm8
+
+# qhasm: 4x r2 = approx a2 * r3
+# asm 1: vmulpd <a2=reg256#5,<r3=reg256#7,>r2=reg256#10
+# asm 2: vmulpd <a2=%ymm4,<r3=%ymm6,>r2=%ymm9
+vmulpd %ymm4,%ymm6,%ymm9
+
+# qhasm: 4x r3 approx*= a3
+# asm 1: vmulpd <a3=reg256#6,<r3=reg256#7,>r3=reg256#7
+# asm 2: vmulpd <a3=%ymm5,<r3=%ymm6,>r3=%ymm6
+vmulpd %ymm5,%ymm6,%ymm6
+
+# qhasm: r0[0,1,2,3] = a0[0]approx+a0[1],r0[0]approx+r0[1],a0[2]approx+a0[3],r0[2]approx+r0[3]
+# asm 1: vhaddpd <r0=reg256#8,<a0=reg256#3,>r0=reg256#3
+# asm 2: vhaddpd <r0=%ymm7,<a0=%ymm2,>r0=%ymm2
+vhaddpd %ymm7,%ymm2,%ymm2
+
+# qhasm: w = mem256[pp + 0]
+# asm 1: vmovupd   0(<pp=int64#2),>w=reg256#8
+# asm 2: vmovupd   0(<pp=%rsi),>w=%ymm7
+vmovupd   0(%rsi),%ymm7
+
+# qhasm: 4x r0 approx*= w
+# asm 1: vmulpd <w=reg256#8,<r0=reg256#3,>r0=reg256#3
+# asm 2: vmulpd <w=%ymm7,<r0=%ymm2,>r0=%ymm2
+vmulpd %ymm7,%ymm2,%ymm2
+
+# qhasm: a0[0,1,2,3] = r0[2,3],r0[0,1]
+# asm 1: vperm2f128 $0x21,<r0=reg256#3,<r0=reg256#3,>a0=reg256#8
+# asm 2: vperm2f128 $0x21,<r0=%ymm2,<r0=%ymm2,>a0=%ymm7
+vperm2f128 $0x21,%ymm2,%ymm2,%ymm7
+
+# qhasm: r1[0,1,2,3] = a1[0]approx+a1[1],r1[0]approx+r1[1],a1[2]approx+a1[3],r1[2]approx+r1[3]
+# asm 1: vhaddpd <r1=reg256#9,<a1=reg256#4,>r1=reg256#4
+# asm 2: vhaddpd <r1=%ymm8,<a1=%ymm3,>r1=%ymm3
+vhaddpd %ymm8,%ymm3,%ymm3
+
+# qhasm: w = mem256[pp + 32]
+# asm 1: vmovupd   32(<pp=int64#2),>w=reg256#9
+# asm 2: vmovupd   32(<pp=%rsi),>w=%ymm8
+vmovupd   32(%rsi),%ymm8
+
+# qhasm: 4x r1 approx*= w
+# asm 1: vmulpd <w=reg256#9,<r1=reg256#4,>r1=reg256#4
+# asm 2: vmulpd <w=%ymm8,<r1=%ymm3,>r1=%ymm3
+vmulpd %ymm8,%ymm3,%ymm3
+
+# qhasm: a1[0,1,2,3] = r1[2,3],r1[0,1]
+# asm 1: vperm2f128 $0x21,<r1=reg256#4,<r1=reg256#4,>a1=reg256#9
+# asm 2: vperm2f128 $0x21,<r1=%ymm3,<r1=%ymm3,>a1=%ymm8
+vperm2f128 $0x21,%ymm3,%ymm3,%ymm8
+
+# qhasm: r2[0,1,2,3] = a2[0]approx+a2[1],r2[0]approx+r2[1],a2[2]approx+a2[3],r2[2]approx+r2[3]
+# asm 1: vhaddpd <r2=reg256#10,<a2=reg256#5,>r2=reg256#5
+# asm 2: vhaddpd <r2=%ymm9,<a2=%ymm4,>r2=%ymm4
+vhaddpd %ymm9,%ymm4,%ymm4
+
+# qhasm: w = mem256[pp + 64]
+# asm 1: vmovupd   64(<pp=int64#2),>w=reg256#10
+# asm 2: vmovupd   64(<pp=%rsi),>w=%ymm9
+vmovupd   64(%rsi),%ymm9
+
+# qhasm: 4x r2 approx*= w
+# asm 1: vmulpd <w=reg256#10,<r2=reg256#5,>r2=reg256#5
+# asm 2: vmulpd <w=%ymm9,<r2=%ymm4,>r2=%ymm4
+vmulpd %ymm9,%ymm4,%ymm4
+
+# qhasm: a2[0,1,2,3] = r2[2,3],r2[0,1]
+# asm 1: vperm2f128 $0x21,<r2=reg256#5,<r2=reg256#5,>a2=reg256#10
+# asm 2: vperm2f128 $0x21,<r2=%ymm4,<r2=%ymm4,>a2=%ymm9
+vperm2f128 $0x21,%ymm4,%ymm4,%ymm9
+
+# qhasm: r3[0,1,2,3] = a3[0]approx+a3[1],r3[0]approx+r3[1],a3[2]approx+a3[3],r3[2]approx+r3[3]
+# asm 1: vhaddpd <r3=reg256#7,<a3=reg256#6,>r3=reg256#6
+# asm 2: vhaddpd <r3=%ymm6,<a3=%ymm5,>r3=%ymm5
+vhaddpd %ymm6,%ymm5,%ymm5
+
+# qhasm: w = mem256[pp + 96]
+# asm 1: vmovupd   96(<pp=int64#2),>w=reg256#7
+# asm 2: vmovupd   96(<pp=%rsi),>w=%ymm6
+vmovupd   96(%rsi),%ymm6
+
+# qhasm: 4x r3 approx*= w
+# asm 1: vmulpd <w=reg256#7,<r3=reg256#6,>r3=reg256#6
+# asm 2: vmulpd <w=%ymm6,<r3=%ymm5,>r3=%ymm5
+vmulpd %ymm6,%ymm5,%ymm5
+
+# qhasm: a3[0,1,2,3] = r3[2,3],r3[0,1]
+# asm 1: vperm2f128 $0x21,<r3=reg256#6,<r3=reg256#6,>a3=reg256#7
+# asm 2: vperm2f128 $0x21,<r3=%ymm5,<r3=%ymm5,>a3=%ymm6
+vperm2f128 $0x21,%ymm5,%ymm5,%ymm6
+
+# qhasm: c = mem256[neg4]
+# asm 1: vmovdqu neg4,>c=reg256#11
+# asm 2: vmovdqu neg4,>c=%ymm10
+vmovdqu neg4,%ymm10
+
+# qhasm: 4x a0 approx+= r0 * c
+# asm 1: vfmadd231pd <r0=reg256#3,<c=reg256#11,<a0=reg256#8
+# asm 2: vfmadd231pd <r0=%ymm2,<c=%ymm10,<a0=%ymm7
+vfmadd231pd %ymm2,%ymm10,%ymm7
+
+# qhasm: 4x a1 approx+= r1 * c
+# asm 1: vfmadd231pd <r1=reg256#4,<c=reg256#11,<a1=reg256#9
+# asm 2: vfmadd231pd <r1=%ymm3,<c=%ymm10,<a1=%ymm8
+vfmadd231pd %ymm3,%ymm10,%ymm8
+
+# qhasm: w = mem256[wp + 32]
+# asm 1: vmovupd   32(<wp=int64#7),>w=reg256#3
+# asm 2: vmovupd   32(<wp=%rax),>w=%ymm2
+vmovupd   32(%rax),%ymm2
+
+# qhasm: 4x a1 approx*= w
+# asm 1: vmulpd <w=reg256#3,<a1=reg256#9,>a1=reg256#3
+# asm 2: vmulpd <w=%ymm2,<a1=%ymm8,>a1=%ymm2
+vmulpd %ymm2,%ymm8,%ymm2
+
+# qhasm: w = mem256[wp + 64]
+# asm 1: vmovupd   64(<wp=int64#7),>w=reg256#4
+# asm 2: vmovupd   64(<wp=%rax),>w=%ymm3
+vmovupd   64(%rax),%ymm3
+
+# qhasm: 4x a2 approx+= r2 * c
+# asm 1: vfmadd231pd <r2=reg256#5,<c=reg256#11,<a2=reg256#10
+# asm 2: vfmadd231pd <r2=%ymm4,<c=%ymm10,<a2=%ymm9
+vfmadd231pd %ymm4,%ymm10,%ymm9
+
+# qhasm: 4x a2 approx*= w
+# asm 1: vmulpd <w=reg256#4,<a2=reg256#10,>a2=reg256#4
+# asm 2: vmulpd <w=%ymm3,<a2=%ymm9,>a2=%ymm3
+vmulpd %ymm3,%ymm9,%ymm3
+
+# qhasm: w = mem256[wp + 96]
+# asm 1: vmovupd   96(<wp=int64#7),>w=reg256#5
+# asm 2: vmovupd   96(<wp=%rax),>w=%ymm4
+vmovupd   96(%rax),%ymm4
+
+# qhasm: 4x a3 approx+= r3 * c
+# asm 1: vfmadd231pd <r3=reg256#6,<c=reg256#11,<a3=reg256#7
+# asm 2: vfmadd231pd <r3=%ymm5,<c=%ymm10,<a3=%ymm6
+vfmadd231pd %ymm5,%ymm10,%ymm6
+
+# qhasm: 4x a3 approx*= w
+# asm 1: vmulpd <w=reg256#5,<a3=reg256#7,>a3=reg256#5
+# asm 2: vmulpd <w=%ymm4,<a3=%ymm6,>a3=%ymm4
+vmulpd %ymm4,%ymm6,%ymm4
+
+# qhasm: 4x c = approx a1 * qinv
+# asm 1: vmulpd <a1=reg256#3,<qinv=reg256#2,>c=reg256#6
+# asm 2: vmulpd <a1=%ymm2,<qinv=%ymm1,>c=%ymm5
+vmulpd %ymm2,%ymm1,%ymm5
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#6,>c=reg256#6
+# asm 2: vroundpd $9,<c=%ymm5,>c=%ymm5
+vroundpd $9,%ymm5,%ymm5
+
+# qhasm: 4x a1 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#6,<q=reg256#1,<a1=reg256#3
+# asm 2: vfnmadd231pd <c=%ymm5,<q=%ymm0,<a1=%ymm2
+vfnmadd231pd %ymm5,%ymm0,%ymm2
+
+# qhasm: 4x c = approx a2 * qinv
+# asm 1: vmulpd <a2=reg256#4,<qinv=reg256#2,>c=reg256#6
+# asm 2: vmulpd <a2=%ymm3,<qinv=%ymm1,>c=%ymm5
+vmulpd %ymm3,%ymm1,%ymm5
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#6,>c=reg256#6
+# asm 2: vroundpd $9,<c=%ymm5,>c=%ymm5
+vroundpd $9,%ymm5,%ymm5
+
+# qhasm: 4x a2 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#6,<q=reg256#1,<a2=reg256#4
+# asm 2: vfnmadd231pd <c=%ymm5,<q=%ymm0,<a2=%ymm3
+vfnmadd231pd %ymm5,%ymm0,%ymm3
+
+# qhasm: 4x c = approx a3 * qinv
+# asm 1: vmulpd <a3=reg256#5,<qinv=reg256#2,>c=reg256#6
+# asm 2: vmulpd <a3=%ymm4,<qinv=%ymm1,>c=%ymm5
+vmulpd %ymm4,%ymm1,%ymm5
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#6,>c=reg256#6
+# asm 2: vroundpd $9,<c=%ymm5,>c=%ymm5
+vroundpd $9,%ymm5,%ymm5
+
+# qhasm: 4x a3 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#6,<q=reg256#1,<a3=reg256#5
+# asm 2: vfnmadd231pd <c=%ymm5,<q=%ymm0,<a3=%ymm4
+vfnmadd231pd %ymm5,%ymm0,%ymm4
+
+# qhasm: 4x r0 = approx a0 + a1
+# asm 1: vaddpd <a0=reg256#8,<a1=reg256#3,>r0=reg256#6
+# asm 2: vaddpd <a0=%ymm7,<a1=%ymm2,>r0=%ymm5
+vaddpd %ymm7,%ymm2,%ymm5
+
+# qhasm: 4x r2 = approx a2 + a3
+# asm 1: vaddpd <a2=reg256#4,<a3=reg256#5,>r2=reg256#7
+# asm 2: vaddpd <a2=%ymm3,<a3=%ymm4,>r2=%ymm6
+vaddpd %ymm3,%ymm4,%ymm6
+
+# qhasm: 4x r1 = approx a0 - a1
+# asm 1: vsubpd <a1=reg256#3,<a0=reg256#8,>r1=reg256#3
+# asm 2: vsubpd <a1=%ymm2,<a0=%ymm7,>r1=%ymm2
+vsubpd %ymm2,%ymm7,%ymm2
+
+# qhasm: w = mem64[wp + 136],mem64[wp + 136],mem64[wp + 136],mem64[wp + 136]
+# asm 1: vbroadcastsd 136(<wp=int64#7),>w=reg256#8
+# asm 2: vbroadcastsd 136(<wp=%rax),>w=%ymm7
+vbroadcastsd 136(%rax),%ymm7
+
+# qhasm: 4x r3 = approx a2 - a3
+# asm 1: vsubpd <a3=reg256#5,<a2=reg256#4,>r3=reg256#4
+# asm 2: vsubpd <a3=%ymm4,<a2=%ymm3,>r3=%ymm3
+vsubpd %ymm4,%ymm3,%ymm3
+
+# qhasm: 4x r3 approx*= w
+# asm 1: vmulpd <w=reg256#8,<r3=reg256#4,>r3=reg256#4
+# asm 2: vmulpd <w=%ymm7,<r3=%ymm3,>r3=%ymm3
+vmulpd %ymm7,%ymm3,%ymm3
+
+# qhasm: 4x a0 = approx r0 + r2
+# asm 1: vaddpd <r0=reg256#6,<r2=reg256#7,>a0=reg256#5
+# asm 2: vaddpd <r0=%ymm5,<r2=%ymm6,>a0=%ymm4
+vaddpd %ymm5,%ymm6,%ymm4
+
+# qhasm: 4x a1 = approx r1 + r3
+# asm 1: vaddpd <r1=reg256#3,<r3=reg256#4,>a1=reg256#8
+# asm 2: vaddpd <r1=%ymm2,<r3=%ymm3,>a1=%ymm7
+vaddpd %ymm2,%ymm3,%ymm7
+
+# qhasm: 4x a2 = approx r0 - r2
+# asm 1: vsubpd <r2=reg256#7,<r0=reg256#6,>a2=reg256#6
+# asm 2: vsubpd <r2=%ymm6,<r0=%ymm5,>a2=%ymm5
+vsubpd %ymm6,%ymm5,%ymm5
+
+# qhasm: 4x a3 = approx r1 - r3
+# asm 1: vsubpd <r3=reg256#4,<r1=reg256#3,>a3=reg256#3
+# asm 2: vsubpd <r3=%ymm3,<r1=%ymm2,>a3=%ymm2
+vsubpd %ymm3,%ymm2,%ymm2
+
+# qhasm: mem256[tp +  0] = a0
+# asm 1: vmovupd   <a0=reg256#5,0(<tp=int64#6)
+# asm 2: vmovupd   <a0=%ymm4,0(<tp=%r9)
+vmovupd   %ymm4,0(%r9)
+
+# qhasm: mem256[tp + 32] = a1
+# asm 1: vmovupd   <a1=reg256#8,32(<tp=int64#6)
+# asm 2: vmovupd   <a1=%ymm7,32(<tp=%r9)
+vmovupd   %ymm7,32(%r9)
+
+# qhasm: mem256[tp + 64] = a2
+# asm 1: vmovupd   <a2=reg256#6,64(<tp=int64#6)
+# asm 2: vmovupd   <a2=%ymm5,64(<tp=%r9)
+vmovupd   %ymm5,64(%r9)
+
+# qhasm: mem256[tp + 96] = a3
+# asm 1: vmovupd   <a3=reg256#3,96(<tp=int64#6)
+# asm 2: vmovupd   <a3=%ymm2,96(<tp=%r9)
+vmovupd   %ymm2,96(%r9)
+
+# qhasm: ap+= 64
+# asm 1: add  $64,<ap=int64#5
+# asm 2: add  $64,<ap=%r8
+add  $64,%r8
+
+# qhasm: tp+= 128
+# asm 1: add  $128,<tp=int64#6
+# asm 2: add  $128,<tp=%r9
+add  $128,%r9
+
+# qhasm: wp+= 152
+# asm 1: add  $152,<wp=int64#7
+# asm 2: add  $152,<wp=%rax
+add  $152,%rax
+
+# qhasm: pp+= 128
+# asm 1: add  $128,<pp=int64#2
+# asm 2: add  $128,<pp=%rsi
+add  $128,%rsi
+
+# qhasm: ctrj-=1
+# asm 1: sub  $1,<ctrj=int64#4
+# asm 2: sub  $1,<ctrj=%rcx
+sub  $1,%rcx
+
+# qhasm: loopinreg:
+._loopinreg:
+
+# qhasm: a0 = (4x double)(4x int32)mem128[ap + 0]
+# asm 1: vcvtdq2pd 0(<ap=int64#5),>a0=reg256#3
+# asm 2: vcvtdq2pd 0(<ap=%r8),>a0=%ymm2
+vcvtdq2pd 0(%r8),%ymm2
+
+# qhasm: a1 = (4x double)(4x int32)mem128[ap + 16]
+# asm 1: vcvtdq2pd 16(<ap=int64#5),>a1=reg256#4
+# asm 2: vcvtdq2pd 16(<ap=%r8),>a1=%ymm3
+vcvtdq2pd 16(%r8),%ymm3
+
+# qhasm: a2 = (4x double)(4x int32)mem128[ap + 32]
+# asm 1: vcvtdq2pd 32(<ap=int64#5),>a2=reg256#5
+# asm 2: vcvtdq2pd 32(<ap=%r8),>a2=%ymm4
+vcvtdq2pd 32(%r8),%ymm4
+
+# qhasm: a3 = (4x double)(4x int32)mem128[ap + 48]
+# asm 1: vcvtdq2pd 48(<ap=int64#5),>a3=reg256#6
+# asm 2: vcvtdq2pd 48(<ap=%r8),>a3=%ymm5
+vcvtdq2pd 48(%r8),%ymm5
+
+# qhasm: r3 = mem256[neg2]
+# asm 1: vmovdqu neg2,>r3=reg256#7
+# asm 2: vmovdqu neg2,>r3=%ymm6
+vmovdqu neg2,%ymm6
+
+# qhasm: 4x r0 = approx a0 * r3
+# asm 1: vmulpd <a0=reg256#3,<r3=reg256#7,>r0=reg256#8
+# asm 2: vmulpd <a0=%ymm2,<r3=%ymm6,>r0=%ymm7
+vmulpd %ymm2,%ymm6,%ymm7
+
+# qhasm: 4x r1 = approx a1 * r3
+# asm 1: vmulpd <a1=reg256#4,<r3=reg256#7,>r1=reg256#9
+# asm 2: vmulpd <a1=%ymm3,<r3=%ymm6,>r1=%ymm8
+vmulpd %ymm3,%ymm6,%ymm8
+
+# qhasm: 4x r2 = approx a2 * r3
+# asm 1: vmulpd <a2=reg256#5,<r3=reg256#7,>r2=reg256#10
+# asm 2: vmulpd <a2=%ymm4,<r3=%ymm6,>r2=%ymm9
+vmulpd %ymm4,%ymm6,%ymm9
+
+# qhasm: 4x r3 approx*= a3
+# asm 1: vmulpd <a3=reg256#6,<r3=reg256#7,>r3=reg256#7
+# asm 2: vmulpd <a3=%ymm5,<r3=%ymm6,>r3=%ymm6
+vmulpd %ymm5,%ymm6,%ymm6
+
+# qhasm: r0[0,1,2,3] = a0[0]approx+a0[1],r0[0]approx+r0[1],a0[2]approx+a0[3],r0[2]approx+r0[3]
+# asm 1: vhaddpd <r0=reg256#8,<a0=reg256#3,>r0=reg256#3
+# asm 2: vhaddpd <r0=%ymm7,<a0=%ymm2,>r0=%ymm2
+vhaddpd %ymm7,%ymm2,%ymm2
+
+# qhasm: w = mem256[pp + 0]
+# asm 1: vmovupd   0(<pp=int64#2),>w=reg256#8
+# asm 2: vmovupd   0(<pp=%rsi),>w=%ymm7
+vmovupd   0(%rsi),%ymm7
+
+# qhasm: 4x r0 approx*= w
+# asm 1: vmulpd <w=reg256#8,<r0=reg256#3,>r0=reg256#3
+# asm 2: vmulpd <w=%ymm7,<r0=%ymm2,>r0=%ymm2
+vmulpd %ymm7,%ymm2,%ymm2
+
+# qhasm: a0[0,1,2,3] = r0[2,3],r0[0,1]
+# asm 1: vperm2f128 $0x21,<r0=reg256#3,<r0=reg256#3,>a0=reg256#8
+# asm 2: vperm2f128 $0x21,<r0=%ymm2,<r0=%ymm2,>a0=%ymm7
+vperm2f128 $0x21,%ymm2,%ymm2,%ymm7
+
+# qhasm: r1[0,1,2,3] = a1[0]approx+a1[1],r1[0]approx+r1[1],a1[2]approx+a1[3],r1[2]approx+r1[3]
+# asm 1: vhaddpd <r1=reg256#9,<a1=reg256#4,>r1=reg256#4
+# asm 2: vhaddpd <r1=%ymm8,<a1=%ymm3,>r1=%ymm3
+vhaddpd %ymm8,%ymm3,%ymm3
+
+# qhasm: w = mem256[pp + 32]
+# asm 1: vmovupd   32(<pp=int64#2),>w=reg256#9
+# asm 2: vmovupd   32(<pp=%rsi),>w=%ymm8
+vmovupd   32(%rsi),%ymm8
+
+# qhasm: 4x r1 approx*= w
+# asm 1: vmulpd <w=reg256#9,<r1=reg256#4,>r1=reg256#4
+# asm 2: vmulpd <w=%ymm8,<r1=%ymm3,>r1=%ymm3
+vmulpd %ymm8,%ymm3,%ymm3
+
+# qhasm: a1[0,1,2,3] = r1[2,3],r1[0,1]
+# asm 1: vperm2f128 $0x21,<r1=reg256#4,<r1=reg256#4,>a1=reg256#9
+# asm 2: vperm2f128 $0x21,<r1=%ymm3,<r1=%ymm3,>a1=%ymm8
+vperm2f128 $0x21,%ymm3,%ymm3,%ymm8
+
+# qhasm: r2[0,1,2,3] = a2[0]approx+a2[1],r2[0]approx+r2[1],a2[2]approx+a2[3],r2[2]approx+r2[3]
+# asm 1: vhaddpd <r2=reg256#10,<a2=reg256#5,>r2=reg256#5
+# asm 2: vhaddpd <r2=%ymm9,<a2=%ymm4,>r2=%ymm4
+vhaddpd %ymm9,%ymm4,%ymm4
+
+# qhasm: w = mem256[pp + 64]
+# asm 1: vmovupd   64(<pp=int64#2),>w=reg256#10
+# asm 2: vmovupd   64(<pp=%rsi),>w=%ymm9
+vmovupd   64(%rsi),%ymm9
+
+# qhasm: 4x r2 approx*= w
+# asm 1: vmulpd <w=reg256#10,<r2=reg256#5,>r2=reg256#5
+# asm 2: vmulpd <w=%ymm9,<r2=%ymm4,>r2=%ymm4
+vmulpd %ymm9,%ymm4,%ymm4
+
+# qhasm: a2[0,1,2,3] = r2[2,3],r2[0,1]
+# asm 1: vperm2f128 $0x21,<r2=reg256#5,<r2=reg256#5,>a2=reg256#10
+# asm 2: vperm2f128 $0x21,<r2=%ymm4,<r2=%ymm4,>a2=%ymm9
+vperm2f128 $0x21,%ymm4,%ymm4,%ymm9
+
+# qhasm: r3[0,1,2,3] = a3[0]approx+a3[1],r3[0]approx+r3[1],a3[2]approx+a3[3],r3[2]approx+r3[3]
+# asm 1: vhaddpd <r3=reg256#7,<a3=reg256#6,>r3=reg256#6
+# asm 2: vhaddpd <r3=%ymm6,<a3=%ymm5,>r3=%ymm5
+vhaddpd %ymm6,%ymm5,%ymm5
+
+# qhasm: w = mem256[pp + 96]
+# asm 1: vmovupd   96(<pp=int64#2),>w=reg256#7
+# asm 2: vmovupd   96(<pp=%rsi),>w=%ymm6
+vmovupd   96(%rsi),%ymm6
+
+# qhasm: 4x r3 approx*= w
+# asm 1: vmulpd <w=reg256#7,<r3=reg256#6,>r3=reg256#6
+# asm 2: vmulpd <w=%ymm6,<r3=%ymm5,>r3=%ymm5
+vmulpd %ymm6,%ymm5,%ymm5
+
+# qhasm: a3[0,1,2,3] = r3[2,3],r3[0,1]
+# asm 1: vperm2f128 $0x21,<r3=reg256#6,<r3=reg256#6,>a3=reg256#7
+# asm 2: vperm2f128 $0x21,<r3=%ymm5,<r3=%ymm5,>a3=%ymm6
+vperm2f128 $0x21,%ymm5,%ymm5,%ymm6
+
+# qhasm: c = mem256[neg4]
+# asm 1: vmovdqu neg4,>c=reg256#11
+# asm 2: vmovdqu neg4,>c=%ymm10
+vmovdqu neg4,%ymm10
+
+# qhasm: 4x a0 approx+= r0 * c
+# asm 1: vfmadd231pd <r0=reg256#3,<c=reg256#11,<a0=reg256#8
+# asm 2: vfmadd231pd <r0=%ymm2,<c=%ymm10,<a0=%ymm7
+vfmadd231pd %ymm2,%ymm10,%ymm7
+
+# qhasm: w = mem256[wp + 0]
+# asm 1: vmovupd   0(<wp=int64#7),>w=reg256#3
+# asm 2: vmovupd   0(<wp=%rax),>w=%ymm2
+vmovupd   0(%rax),%ymm2
+
+# qhasm: 4x a0 approx*= w
+# asm 1: vmulpd <w=reg256#3,<a0=reg256#8,>a0=reg256#3
+# asm 2: vmulpd <w=%ymm2,<a0=%ymm7,>a0=%ymm2
+vmulpd %ymm2,%ymm7,%ymm2
+
+# qhasm: 4x a1 approx+= r1 * c
+# asm 1: vfmadd231pd <r1=reg256#4,<c=reg256#11,<a1=reg256#9
+# asm 2: vfmadd231pd <r1=%ymm3,<c=%ymm10,<a1=%ymm8
+vfmadd231pd %ymm3,%ymm10,%ymm8
+
+# qhasm: w = mem256[wp + 32]
+# asm 1: vmovupd   32(<wp=int64#7),>w=reg256#4
+# asm 2: vmovupd   32(<wp=%rax),>w=%ymm3
+vmovupd   32(%rax),%ymm3
+
+# qhasm: 4x a1 approx*= w
+# asm 1: vmulpd <w=reg256#4,<a1=reg256#9,>a1=reg256#4
+# asm 2: vmulpd <w=%ymm3,<a1=%ymm8,>a1=%ymm3
+vmulpd %ymm3,%ymm8,%ymm3
+
+# qhasm: w = mem256[wp + 64]
+# asm 1: vmovupd   64(<wp=int64#7),>w=reg256#8
+# asm 2: vmovupd   64(<wp=%rax),>w=%ymm7
+vmovupd   64(%rax),%ymm7
+
+# qhasm: 4x a2 approx+= r2 * c
+# asm 1: vfmadd231pd <r2=reg256#5,<c=reg256#11,<a2=reg256#10
+# asm 2: vfmadd231pd <r2=%ymm4,<c=%ymm10,<a2=%ymm9
+vfmadd231pd %ymm4,%ymm10,%ymm9
+
+# qhasm: 4x a2 approx*= w
+# asm 1: vmulpd <w=reg256#8,<a2=reg256#10,>a2=reg256#5
+# asm 2: vmulpd <w=%ymm7,<a2=%ymm9,>a2=%ymm4
+vmulpd %ymm7,%ymm9,%ymm4
+
+# qhasm: w = mem256[wp + 96]
+# asm 1: vmovupd   96(<wp=int64#7),>w=reg256#8
+# asm 2: vmovupd   96(<wp=%rax),>w=%ymm7
+vmovupd   96(%rax),%ymm7
+
+# qhasm: 4x a3 approx+= r3 * c
+# asm 1: vfmadd231pd <r3=reg256#6,<c=reg256#11,<a3=reg256#7
+# asm 2: vfmadd231pd <r3=%ymm5,<c=%ymm10,<a3=%ymm6
+vfmadd231pd %ymm5,%ymm10,%ymm6
+
+# qhasm: 4x a3 approx*= w
+# asm 1: vmulpd <w=reg256#8,<a3=reg256#7,>a3=reg256#6
+# asm 2: vmulpd <w=%ymm7,<a3=%ymm6,>a3=%ymm5
+vmulpd %ymm7,%ymm6,%ymm5
+
+# qhasm: 4x c = approx a0 * qinv
+# asm 1: vmulpd <a0=reg256#3,<qinv=reg256#2,>c=reg256#7
+# asm 2: vmulpd <a0=%ymm2,<qinv=%ymm1,>c=%ymm6
+vmulpd %ymm2,%ymm1,%ymm6
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7
+# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6
+vroundpd $9,%ymm6,%ymm6
+
+# qhasm: 4x a0 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<a0=reg256#3
+# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<a0=%ymm2
+vfnmadd231pd %ymm6,%ymm0,%ymm2
+
+# qhasm: 4x c = approx a1 * qinv
+# asm 1: vmulpd <a1=reg256#4,<qinv=reg256#2,>c=reg256#7
+# asm 2: vmulpd <a1=%ymm3,<qinv=%ymm1,>c=%ymm6
+vmulpd %ymm3,%ymm1,%ymm6
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7
+# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6
+vroundpd $9,%ymm6,%ymm6
+
+# qhasm: 4x a1 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<a1=reg256#4
+# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<a1=%ymm3
+vfnmadd231pd %ymm6,%ymm0,%ymm3
+
+# qhasm: 4x c = approx a2 * qinv
+# asm 1: vmulpd <a2=reg256#5,<qinv=reg256#2,>c=reg256#7
+# asm 2: vmulpd <a2=%ymm4,<qinv=%ymm1,>c=%ymm6
+vmulpd %ymm4,%ymm1,%ymm6
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7
+# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6
+vroundpd $9,%ymm6,%ymm6
+
+# qhasm: 4x a2 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<a2=reg256#5
+# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<a2=%ymm4
+vfnmadd231pd %ymm6,%ymm0,%ymm4
+
+# qhasm: 4x c = approx a3 * qinv
+# asm 1: vmulpd <a3=reg256#6,<qinv=reg256#2,>c=reg256#7
+# asm 2: vmulpd <a3=%ymm5,<qinv=%ymm1,>c=%ymm6
+vmulpd %ymm5,%ymm1,%ymm6
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7
+# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6
+vroundpd $9,%ymm6,%ymm6
+
+# qhasm: 4x a3 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<a3=reg256#6
+# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<a3=%ymm5
+vfnmadd231pd %ymm6,%ymm0,%ymm5
+
+# qhasm: 4x r0 = approx a0 + a1
+# asm 1: vaddpd <a0=reg256#3,<a1=reg256#4,>r0=reg256#7
+# asm 2: vaddpd <a0=%ymm2,<a1=%ymm3,>r0=%ymm6
+vaddpd %ymm2,%ymm3,%ymm6
+
+# qhasm: 4x r2 = approx a2 + a3
+# asm 1: vaddpd <a2=reg256#5,<a3=reg256#6,>r2=reg256#8
+# asm 2: vaddpd <a2=%ymm4,<a3=%ymm5,>r2=%ymm7
+vaddpd %ymm4,%ymm5,%ymm7
+
+# qhasm: w = mem64[wp + 128],mem64[wp + 128],mem64[wp + 128],mem64[wp + 128]
+# asm 1: vbroadcastsd 128(<wp=int64#7),>w=reg256#9
+# asm 2: vbroadcastsd 128(<wp=%rax),>w=%ymm8
+vbroadcastsd 128(%rax),%ymm8
+
+# qhasm: 4x r1 = approx a0 - a1
+# asm 1: vsubpd <a1=reg256#4,<a0=reg256#3,>r1=reg256#3
+# asm 2: vsubpd <a1=%ymm3,<a0=%ymm2,>r1=%ymm2
+vsubpd %ymm3,%ymm2,%ymm2
+
+# qhasm: 4x r1 approx*= w
+# asm 1: vmulpd <w=reg256#9,<r1=reg256#3,>r1=reg256#3
+# asm 2: vmulpd <w=%ymm8,<r1=%ymm2,>r1=%ymm2
+vmulpd %ymm8,%ymm2,%ymm2
+
+# qhasm: w = mem64[wp + 136],mem64[wp + 136],mem64[wp + 136],mem64[wp + 136]
+# asm 1: vbroadcastsd 136(<wp=int64#7),>w=reg256#4
+# asm 2: vbroadcastsd 136(<wp=%rax),>w=%ymm3
+vbroadcastsd 136(%rax),%ymm3
+
+# qhasm: 4x r3 = approx a2 - a3
+# asm 1: vsubpd <a3=reg256#6,<a2=reg256#5,>r3=reg256#5
+# asm 2: vsubpd <a3=%ymm5,<a2=%ymm4,>r3=%ymm4
+vsubpd %ymm5,%ymm4,%ymm4
+
+# qhasm: 4x r3 approx*= w
+# asm 1: vmulpd <w=reg256#4,<r3=reg256#5,>r3=reg256#4
+# asm 2: vmulpd <w=%ymm3,<r3=%ymm4,>r3=%ymm3
+vmulpd %ymm3,%ymm4,%ymm3
+
+# qhasm: 4x a0 = approx r0 + r2
+# asm 1: vaddpd <r0=reg256#7,<r2=reg256#8,>a0=reg256#5
+# asm 2: vaddpd <r0=%ymm6,<r2=%ymm7,>a0=%ymm4
+vaddpd %ymm6,%ymm7,%ymm4
+
+# qhasm: 4x a1 = approx r1 + r3
+# asm 1: vaddpd <r1=reg256#3,<r3=reg256#4,>a1=reg256#6
+# asm 2: vaddpd <r1=%ymm2,<r3=%ymm3,>a1=%ymm5
+vaddpd %ymm2,%ymm3,%ymm5
+
+# qhasm: w = mem64[wp + 144],mem64[wp + 144],mem64[wp + 144],mem64[wp + 144]
+# asm 1: vbroadcastsd 144(<wp=int64#7),>w=reg256#9
+# asm 2: vbroadcastsd 144(<wp=%rax),>w=%ymm8
+vbroadcastsd 144(%rax),%ymm8
+
+# qhasm: 4x a2 = approx r0 - r2
+# asm 1: vsubpd <r2=reg256#8,<r0=reg256#7,>a2=reg256#7
+# asm 2: vsubpd <r2=%ymm7,<r0=%ymm6,>a2=%ymm6
+vsubpd %ymm7,%ymm6,%ymm6
+
+# qhasm: 4x a3 = approx r1 - r3
+# asm 1: vsubpd <r3=reg256#4,<r1=reg256#3,>a3=reg256#3
+# asm 2: vsubpd <r3=%ymm3,<r1=%ymm2,>a3=%ymm2
+vsubpd %ymm3,%ymm2,%ymm2
+
+# qhasm: 4x a2 approx*= w
+# asm 1: vmulpd <w=reg256#9,<a2=reg256#7,>a2=reg256#4
+# asm 2: vmulpd <w=%ymm8,<a2=%ymm6,>a2=%ymm3
+vmulpd %ymm8,%ymm6,%ymm3
+
+# qhasm: 4x c = approx a2 * qinv
+# asm 1: vmulpd <a2=reg256#4,<qinv=reg256#2,>c=reg256#7
+# asm 2: vmulpd <a2=%ymm3,<qinv=%ymm1,>c=%ymm6
+vmulpd %ymm3,%ymm1,%ymm6
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7
+# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6
+vroundpd $9,%ymm6,%ymm6
+
+# qhasm: 4x a2 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<a2=reg256#4
+# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<a2=%ymm3
+vfnmadd231pd %ymm6,%ymm0,%ymm3
+
+# qhasm: 4x a3 approx*= w
+# asm 1: vmulpd <w=reg256#9,<a3=reg256#3,>a3=reg256#3
+# asm 2: vmulpd <w=%ymm8,<a3=%ymm2,>a3=%ymm2
+vmulpd %ymm8,%ymm2,%ymm2
+
+# qhasm: 4x c = approx a3 * qinv
+# asm 1: vmulpd <a3=reg256#3,<qinv=reg256#2,>c=reg256#7
+# asm 2: vmulpd <a3=%ymm2,<qinv=%ymm1,>c=%ymm6
+vmulpd %ymm2,%ymm1,%ymm6
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7
+# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6
+vroundpd $9,%ymm6,%ymm6
+
+# qhasm: 4x a3 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<a3=reg256#3
+# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<a3=%ymm2
+vfnmadd231pd %ymm6,%ymm0,%ymm2
+
+# qhasm: mem256[tp +  0] = a0
+# asm 1: vmovupd   <a0=reg256#5,0(<tp=int64#6)
+# asm 2: vmovupd   <a0=%ymm4,0(<tp=%r9)
+vmovupd   %ymm4,0(%r9)
+
+# qhasm: mem256[tp + 32] = a1
+# asm 1: vmovupd   <a1=reg256#6,32(<tp=int64#6)
+# asm 2: vmovupd   <a1=%ymm5,32(<tp=%r9)
+vmovupd   %ymm5,32(%r9)
+
+# qhasm: mem256[tp + 64] = a2
+# asm 1: vmovupd   <a2=reg256#4,64(<tp=int64#6)
+# asm 2: vmovupd   <a2=%ymm3,64(<tp=%r9)
+vmovupd   %ymm3,64(%r9)
+
+# qhasm: mem256[tp + 96] = a3
+# asm 1: vmovupd   <a3=reg256#3,96(<tp=int64#6)
+# asm 2: vmovupd   <a3=%ymm2,96(<tp=%r9)
+vmovupd   %ymm2,96(%r9)
+
+# qhasm: ap+= 64
+# asm 1: add  $64,<ap=int64#5
+# asm 2: add  $64,<ap=%r8
+add  $64,%r8
+
+# qhasm: tp+= 128
+# asm 1: add  $128,<tp=int64#6
+# asm 2: add  $128,<tp=%r9
+add  $128,%r9
+
+# qhasm: wp+= 152
+# asm 1: add  $152,<wp=int64#7
+# asm 2: add  $152,<wp=%rax
+add  $152,%rax
+
+# qhasm: pp+= 128
+# asm 1: add  $128,<pp=int64#2
+# asm 2: add  $128,<pp=%rsi
+add  $128,%rsi
+
+# qhasm: unsigned>? ctrj-=1
+# asm 1: sub  $1,<ctrj=int64#4
+# asm 2: sub  $1,<ctrj=%rcx
+sub  $1,%rcx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto loopinreg if unsigned>
+ja ._loopinreg
+
+# qhasm: ctri = 8
+# asm 1: mov  $8,>ctri=int64#2
+# asm 2: mov  $8,>ctri=%rsi
+mov  $8,%rsi
+
+# qhasm: tp = input_2
+# asm 1: mov  <input_2=int64#3,>tp=int64#4
+# asm 2: mov  <input_2=%rdx,>tp=%rcx
+mov  %rdx,%rcx
+
+# qhasm: ctrj = 4
+# asm 1: mov  $4,>ctrj=int64#5
+# asm 2: mov  $4,>ctrj=%r8
+mov  $4,%r8
+
+# qhasm: loop567jfirst:
+._loop567jfirst:
+
+# qhasm: a0 = mem256[tp + 0]
+# asm 1: vmovupd   0(<tp=int64#4),>a0=reg256#3
+# asm 2: vmovupd   0(<tp=%rcx),>a0=%ymm2
+vmovupd   0(%rcx),%ymm2
+
+# qhasm: a1 = mem256[tp + 128]
+# asm 1: vmovupd   128(<tp=int64#4),>a1=reg256#4
+# asm 2: vmovupd   128(<tp=%rcx),>a1=%ymm3
+vmovupd   128(%rcx),%ymm3
+
+# qhasm: a2 = mem256[tp + 256]
+# asm 1: vmovupd   256(<tp=int64#4),>a2=reg256#5
+# asm 2: vmovupd   256(<tp=%rcx),>a2=%ymm4
+vmovupd   256(%rcx),%ymm4
+
+# qhasm: a3 = mem256[tp + 384]
+# asm 1: vmovupd   384(<tp=int64#4),>a3=reg256#6
+# asm 2: vmovupd   384(<tp=%rcx),>a3=%ymm5
+vmovupd   384(%rcx),%ymm5
+
+# qhasm: 4x r0 = approx a0 + a1
+# asm 1: vaddpd <a0=reg256#3,<a1=reg256#4,>r0=reg256#7
+# asm 2: vaddpd <a0=%ymm2,<a1=%ymm3,>r0=%ymm6
+vaddpd %ymm2,%ymm3,%ymm6
+
+# qhasm: 4x r2 = approx a2 + a3
+# asm 1: vaddpd <a2=reg256#5,<a3=reg256#6,>r2=reg256#8
+# asm 2: vaddpd <a2=%ymm4,<a3=%ymm5,>r2=%ymm7
+vaddpd %ymm4,%ymm5,%ymm7
+
+# qhasm: 4x r1 = approx a0 - a1
+# asm 1: vsubpd <a1=reg256#4,<a0=reg256#3,>r1=reg256#3
+# asm 2: vsubpd <a1=%ymm3,<a0=%ymm2,>r1=%ymm2
+vsubpd %ymm3,%ymm2,%ymm2
+
+# qhasm: 4x r3 = approx a2 - a3
+# asm 1: vsubpd <a3=reg256#6,<a2=reg256#5,>r3=reg256#4
+# asm 2: vsubpd <a3=%ymm5,<a2=%ymm4,>r3=%ymm3
+vsubpd %ymm5,%ymm4,%ymm3
+
+# qhasm: 4x a0 = approx r0 + r2
+# asm 1: vaddpd <r0=reg256#7,<r2=reg256#8,>a0=reg256#5
+# asm 2: vaddpd <r0=%ymm6,<r2=%ymm7,>a0=%ymm4
+vaddpd %ymm6,%ymm7,%ymm4
+
+# qhasm: 4x a2 = approx r0 - r2
+# asm 1: vsubpd <r2=reg256#8,<r0=reg256#7,>a2=reg256#6
+# asm 2: vsubpd <r2=%ymm7,<r0=%ymm6,>a2=%ymm5
+vsubpd %ymm7,%ymm6,%ymm5
+
+# qhasm: w = mem64[wp + 8],mem64[wp + 8],mem64[wp + 8],mem64[wp + 8]
+# asm 1: vbroadcastsd 8(<wp=int64#7),>w=reg256#7
+# asm 2: vbroadcastsd 8(<wp=%rax),>w=%ymm6
+vbroadcastsd 8(%rax),%ymm6
+
+# qhasm: 4x r3 approx*= w
+# asm 1: vmulpd <w=reg256#7,<r3=reg256#4,>r3=reg256#4
+# asm 2: vmulpd <w=%ymm6,<r3=%ymm3,>r3=%ymm3
+vmulpd %ymm6,%ymm3,%ymm3
+
+# qhasm: 4x c = approx r3 * qinv
+# asm 1: vmulpd <r3=reg256#4,<qinv=reg256#2,>c=reg256#7
+# asm 2: vmulpd <r3=%ymm3,<qinv=%ymm1,>c=%ymm6
+vmulpd %ymm3,%ymm1,%ymm6
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7
+# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6
+vroundpd $9,%ymm6,%ymm6
+
+# qhasm: 4x r3 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<r3=reg256#4
+# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<r3=%ymm3
+vfnmadd231pd %ymm6,%ymm0,%ymm3
+
+# qhasm: 4x a1 = approx r1 + r3
+# asm 1: vaddpd <r1=reg256#3,<r3=reg256#4,>a1=reg256#7
+# asm 2: vaddpd <r1=%ymm2,<r3=%ymm3,>a1=%ymm6
+vaddpd %ymm2,%ymm3,%ymm6
+
+# qhasm: 4x a3 = approx r1 - r3
+# asm 1: vsubpd <r3=reg256#4,<r1=reg256#3,>a3=reg256#3
+# asm 2: vsubpd <r3=%ymm3,<r1=%ymm2,>a3=%ymm2
+vsubpd %ymm3,%ymm2,%ymm2
+
+# qhasm: t0 = mem256[tp + 512]
+# asm 1: vmovupd   512(<tp=int64#4),>t0=reg256#4
+# asm 2: vmovupd   512(<tp=%rcx),>t0=%ymm3
+vmovupd   512(%rcx),%ymm3
+
+# qhasm: t1 = mem256[tp + 640]
+# asm 1: vmovupd   640(<tp=int64#4),>t1=reg256#8
+# asm 2: vmovupd   640(<tp=%rcx),>t1=%ymm7
+vmovupd   640(%rcx),%ymm7
+
+# qhasm: t2 = mem256[tp + 768]
+# asm 1: vmovupd   768(<tp=int64#4),>t2=reg256#9
+# asm 2: vmovupd   768(<tp=%rcx),>t2=%ymm8
+vmovupd   768(%rcx),%ymm8
+
+# qhasm: t3 = mem256[tp + 896]
+# asm 1: vmovupd   896(<tp=int64#4),>t3=reg256#10
+# asm 2: vmovupd   896(<tp=%rcx),>t3=%ymm9
+vmovupd   896(%rcx),%ymm9
+
+# qhasm: 4x r0 = approx t0 + t1
+# asm 1: vaddpd <t0=reg256#4,<t1=reg256#8,>r0=reg256#11
+# asm 2: vaddpd <t0=%ymm3,<t1=%ymm7,>r0=%ymm10
+vaddpd %ymm3,%ymm7,%ymm10
+
+# qhasm: 4x r2 = approx t2 + t3
+# asm 1: vaddpd <t2=reg256#9,<t3=reg256#10,>r2=reg256#12
+# asm 2: vaddpd <t2=%ymm8,<t3=%ymm9,>r2=%ymm11
+vaddpd %ymm8,%ymm9,%ymm11
+
+# qhasm: 4x r1 = approx t0 - t1
+# asm 1: vsubpd <t1=reg256#8,<t0=reg256#4,>r1=reg256#4
+# asm 2: vsubpd <t1=%ymm7,<t0=%ymm3,>r1=%ymm3
+vsubpd %ymm7,%ymm3,%ymm3
+
+# qhasm: 4x r3 = approx t2 - t3
+# asm 1: vsubpd <t3=reg256#10,<t2=reg256#9,>r3=reg256#8
+# asm 2: vsubpd <t3=%ymm9,<t2=%ymm8,>r3=%ymm7
+vsubpd %ymm9,%ymm8,%ymm7
+
+# qhasm: 4x t0 = approx r0 + r2
+# asm 1: vaddpd <r0=reg256#11,<r2=reg256#12,>t0=reg256#9
+# asm 2: vaddpd <r0=%ymm10,<r2=%ymm11,>t0=%ymm8
+vaddpd %ymm10,%ymm11,%ymm8
+
+# qhasm: 4x t2 = approx r0 - r2
+# asm 1: vsubpd <r2=reg256#12,<r0=reg256#11,>t2=reg256#10
+# asm 2: vsubpd <r2=%ymm11,<r0=%ymm10,>t2=%ymm9
+vsubpd %ymm11,%ymm10,%ymm9
+
+# qhasm: w = mem64[wp + 24],mem64[wp + 24],mem64[wp + 24],mem64[wp + 24]
+# asm 1: vbroadcastsd 24(<wp=int64#7),>w=reg256#11
+# asm 2: vbroadcastsd 24(<wp=%rax),>w=%ymm10
+vbroadcastsd 24(%rax),%ymm10
+
+# qhasm: 4x r1 approx*= w
+# asm 1: vmulpd <w=reg256#11,<r1=reg256#4,>r1=reg256#4
+# asm 2: vmulpd <w=%ymm10,<r1=%ymm3,>r1=%ymm3
+vmulpd %ymm10,%ymm3,%ymm3
+
+# qhasm: 4x c = approx r1 * qinv
+# asm 1: vmulpd <r1=reg256#4,<qinv=reg256#2,>c=reg256#11
+# asm 2: vmulpd <r1=%ymm3,<qinv=%ymm1,>c=%ymm10
+vmulpd %ymm3,%ymm1,%ymm10
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#11,>c=reg256#11
+# asm 2: vroundpd $9,<c=%ymm10,>c=%ymm10
+vroundpd $9,%ymm10,%ymm10
+
+# qhasm: 4x r1 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#11,<q=reg256#1,<r1=reg256#4
+# asm 2: vfnmadd231pd <c=%ymm10,<q=%ymm0,<r1=%ymm3
+vfnmadd231pd %ymm10,%ymm0,%ymm3
+
+# qhasm: w = mem64[wp + 32],mem64[wp + 32],mem64[wp + 32],mem64[wp + 32]
+# asm 1: vbroadcastsd 32(<wp=int64#7),>w=reg256#11
+# asm 2: vbroadcastsd 32(<wp=%rax),>w=%ymm10
+vbroadcastsd 32(%rax),%ymm10
+
+# qhasm: 4x r3 approx*= w
+# asm 1: vmulpd <w=reg256#11,<r3=reg256#8,>r3=reg256#8
+# asm 2: vmulpd <w=%ymm10,<r3=%ymm7,>r3=%ymm7
+vmulpd %ymm10,%ymm7,%ymm7
+
+# qhasm: 4x c = approx r3 * qinv
+# asm 1: vmulpd <r3=reg256#8,<qinv=reg256#2,>c=reg256#11
+# asm 2: vmulpd <r3=%ymm7,<qinv=%ymm1,>c=%ymm10
+vmulpd %ymm7,%ymm1,%ymm10
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#11,>c=reg256#11
+# asm 2: vroundpd $9,<c=%ymm10,>c=%ymm10
+vroundpd $9,%ymm10,%ymm10
+
+# qhasm: 4x r3 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#11,<q=reg256#1,<r3=reg256#8
+# asm 2: vfnmadd231pd <c=%ymm10,<q=%ymm0,<r3=%ymm7
+vfnmadd231pd %ymm10,%ymm0,%ymm7
+
+# qhasm: 4x t1 = approx r1 + r3
+# asm 1: vaddpd <r1=reg256#4,<r3=reg256#8,>t1=reg256#11
+# asm 2: vaddpd <r1=%ymm3,<r3=%ymm7,>t1=%ymm10
+vaddpd %ymm3,%ymm7,%ymm10
+
+# qhasm: w = mem64[wp + 40],mem64[wp + 40],mem64[wp + 40],mem64[wp + 40]
+# asm 1: vbroadcastsd 40(<wp=int64#7),>w=reg256#12
+# asm 2: vbroadcastsd 40(<wp=%rax),>w=%ymm11
+vbroadcastsd 40(%rax),%ymm11
+
+# qhasm: 4x t3 = approx r1 - r3
+# asm 1: vsubpd <r3=reg256#8,<r1=reg256#4,>t3=reg256#4
+# asm 2: vsubpd <r3=%ymm7,<r1=%ymm3,>t3=%ymm3
+vsubpd %ymm7,%ymm3,%ymm3
+
+# qhasm: 4x t3 approx*= w
+# asm 1: vmulpd <w=reg256#12,<t3=reg256#4,>t3=reg256#4
+# asm 2: vmulpd <w=%ymm11,<t3=%ymm3,>t3=%ymm3
+vmulpd %ymm11,%ymm3,%ymm3
+
+# qhasm: 4x c = approx t3 * qinv
+# asm 1: vmulpd <t3=reg256#4,<qinv=reg256#2,>c=reg256#8
+# asm 2: vmulpd <t3=%ymm3,<qinv=%ymm1,>c=%ymm7
+vmulpd %ymm3,%ymm1,%ymm7
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#8,>c=reg256#8
+# asm 2: vroundpd $9,<c=%ymm7,>c=%ymm7
+vroundpd $9,%ymm7,%ymm7
+
+# qhasm: 4x t3 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#8,<q=reg256#1,<t3=reg256#4
+# asm 2: vfnmadd231pd <c=%ymm7,<q=%ymm0,<t3=%ymm3
+vfnmadd231pd %ymm7,%ymm0,%ymm3
+
+# qhasm: 4x t2 approx*= w
+# asm 1: vmulpd <w=reg256#12,<t2=reg256#10,>t2=reg256#8
+# asm 2: vmulpd <w=%ymm11,<t2=%ymm9,>t2=%ymm7
+vmulpd %ymm11,%ymm9,%ymm7
+
+# qhasm: 4x c = approx t2 * qinv
+# asm 1: vmulpd <t2=reg256#8,<qinv=reg256#2,>c=reg256#10
+# asm 2: vmulpd <t2=%ymm7,<qinv=%ymm1,>c=%ymm9
+vmulpd %ymm7,%ymm1,%ymm9
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#10,>c=reg256#10
+# asm 2: vroundpd $9,<c=%ymm9,>c=%ymm9
+vroundpd $9,%ymm9,%ymm9
+
+# qhasm: 4x t2 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#10,<q=reg256#1,<t2=reg256#8
+# asm 2: vfnmadd231pd <c=%ymm9,<q=%ymm0,<t2=%ymm7
+vfnmadd231pd %ymm9,%ymm0,%ymm7
+
+# qhasm: 4x r0 = approx a0 + t0
+# asm 1: vaddpd <a0=reg256#5,<t0=reg256#9,>r0=reg256#10
+# asm 2: vaddpd <a0=%ymm4,<t0=%ymm8,>r0=%ymm9
+vaddpd %ymm4,%ymm8,%ymm9
+
+# qhasm: 4x r1 = approx a1 + t1
+# asm 1: vaddpd <a1=reg256#7,<t1=reg256#11,>r1=reg256#12
+# asm 2: vaddpd <a1=%ymm6,<t1=%ymm10,>r1=%ymm11
+vaddpd %ymm6,%ymm10,%ymm11
+
+# qhasm: 4x r2 = approx a2 + t2
+# asm 1: vaddpd <a2=reg256#6,<t2=reg256#8,>r2=reg256#13
+# asm 2: vaddpd <a2=%ymm5,<t2=%ymm7,>r2=%ymm12
+vaddpd %ymm5,%ymm7,%ymm12
+
+# qhasm: 4x r3 = approx a3 + t3
+# asm 1: vaddpd <a3=reg256#3,<t3=reg256#4,>r3=reg256#14
+# asm 2: vaddpd <a3=%ymm2,<t3=%ymm3,>r3=%ymm13
+vaddpd %ymm2,%ymm3,%ymm13
+
+# qhasm: 4x a0 approx-= t0
+# asm 1: vsubpd <t0=reg256#9,<a0=reg256#5,>a0=reg256#5
+# asm 2: vsubpd <t0=%ymm8,<a0=%ymm4,>a0=%ymm4
+vsubpd %ymm8,%ymm4,%ymm4
+
+# qhasm: 4x a1 approx-= t1
+# asm 1: vsubpd <t1=reg256#11,<a1=reg256#7,>a1=reg256#7
+# asm 2: vsubpd <t1=%ymm10,<a1=%ymm6,>a1=%ymm6
+vsubpd %ymm10,%ymm6,%ymm6
+
+# qhasm: 4x a2 approx-= t2
+# asm 1: vsubpd <t2=reg256#8,<a2=reg256#6,>a2=reg256#6
+# asm 2: vsubpd <t2=%ymm7,<a2=%ymm5,>a2=%ymm5
+vsubpd %ymm7,%ymm5,%ymm5
+
+# qhasm: 4x a3 approx-= t3
+# asm 1: vsubpd <t3=reg256#4,<a3=reg256#3,>a3=reg256#3
+# asm 2: vsubpd <t3=%ymm3,<a3=%ymm2,>a3=%ymm2
+vsubpd %ymm3,%ymm2,%ymm2
+
+# qhasm: mem256[tp +   0] = r0
+# asm 1: vmovupd   <r0=reg256#10,0(<tp=int64#4)
+# asm 2: vmovupd   <r0=%ymm9,0(<tp=%rcx)
+vmovupd   %ymm9,0(%rcx)
+
+# qhasm: mem256[tp + 128] = r1
+# asm 1: vmovupd   <r1=reg256#12,128(<tp=int64#4)
+# asm 2: vmovupd   <r1=%ymm11,128(<tp=%rcx)
+vmovupd   %ymm11,128(%rcx)
+
+# qhasm: mem256[tp + 256] = r2
+# asm 1: vmovupd   <r2=reg256#13,256(<tp=int64#4)
+# asm 2: vmovupd   <r2=%ymm12,256(<tp=%rcx)
+vmovupd   %ymm12,256(%rcx)
+
+# qhasm: mem256[tp + 384] = r3
+# asm 1: vmovupd   <r3=reg256#14,384(<tp=int64#4)
+# asm 2: vmovupd   <r3=%ymm13,384(<tp=%rcx)
+vmovupd   %ymm13,384(%rcx)
+
+# qhasm: mem256[tp + 512] = a0
+# asm 1: vmovupd   <a0=reg256#5,512(<tp=int64#4)
+# asm 2: vmovupd   <a0=%ymm4,512(<tp=%rcx)
+vmovupd   %ymm4,512(%rcx)
+
+# qhasm: mem256[tp + 640] = a1
+# asm 1: vmovupd   <a1=reg256#7,640(<tp=int64#4)
+# asm 2: vmovupd   <a1=%ymm6,640(<tp=%rcx)
+vmovupd   %ymm6,640(%rcx)
+
+# qhasm: mem256[tp + 768] = a2
+# asm 1: vmovupd   <a2=reg256#6,768(<tp=int64#4)
+# asm 2: vmovupd   <a2=%ymm5,768(<tp=%rcx)
+vmovupd   %ymm5,768(%rcx)
+
+# qhasm: mem256[tp + 896] = a3
+# asm 1: vmovupd   <a3=reg256#3,896(<tp=int64#4)
+# asm 2: vmovupd   <a3=%ymm2,896(<tp=%rcx)
+vmovupd   %ymm2,896(%rcx)
+
+# qhasm: tp+=32
+# asm 1: add  $32,<tp=int64#4
+# asm 2: add  $32,<tp=%rcx
+add  $32,%rcx
+
+# qhasm: unsigned>? ctrj-=1
+# asm 1: sub  $1,<ctrj=int64#5
+# asm 2: sub  $1,<ctrj=%r8
+sub  $1,%r8
+# comment:fp stack unchanged by jump
+
+# qhasm: goto loop567jfirst if unsigned>
+ja ._loop567jfirst
+
+# qhasm: tp+= 896
+# asm 1: add  $896,<tp=int64#4
+# asm 2: add  $896,<tp=%rcx
+add  $896,%rcx
+
+# qhasm: wp+= 56
+# asm 1: add  $56,<wp=int64#7
+# asm 2: add  $56,<wp=%rax
+add  $56,%rax
+
+# qhasm: ctri-=1
+# asm 1: sub  $1,<ctri=int64#2
+# asm 2: sub  $1,<ctri=%rsi
+sub  $1,%rsi
+
+# qhasm: loop567i:
+._loop567i:
+
+# qhasm: ctrj = 4
+# asm 1: mov  $4,>ctrj=int64#5
+# asm 2: mov  $4,>ctrj=%r8
+mov  $4,%r8
+
+# qhasm: loop567j:
+._loop567j:
+
+# qhasm: a0 = mem256[tp + 0]
+# asm 1: vmovupd   0(<tp=int64#4),>a0=reg256#3
+# asm 2: vmovupd   0(<tp=%rcx),>a0=%ymm2
+vmovupd   0(%rcx),%ymm2
+
+# qhasm: a1 = mem256[tp + 128]
+# asm 1: vmovupd   128(<tp=int64#4),>a1=reg256#4
+# asm 2: vmovupd   128(<tp=%rcx),>a1=%ymm3
+vmovupd   128(%rcx),%ymm3
+
+# qhasm: a2 = mem256[tp + 256]
+# asm 1: vmovupd   256(<tp=int64#4),>a2=reg256#5
+# asm 2: vmovupd   256(<tp=%rcx),>a2=%ymm4
+vmovupd   256(%rcx),%ymm4
+
+# qhasm: a3 = mem256[tp + 384]
+# asm 1: vmovupd   384(<tp=int64#4),>a3=reg256#6
+# asm 2: vmovupd   384(<tp=%rcx),>a3=%ymm5
+vmovupd   384(%rcx),%ymm5
+
+# qhasm: 4x r0 = approx a0 + a1
+# asm 1: vaddpd <a0=reg256#3,<a1=reg256#4,>r0=reg256#7
+# asm 2: vaddpd <a0=%ymm2,<a1=%ymm3,>r0=%ymm6
+vaddpd %ymm2,%ymm3,%ymm6
+
+# qhasm: 4x r2 = approx a2 + a3
+# asm 1: vaddpd <a2=reg256#5,<a3=reg256#6,>r2=reg256#8
+# asm 2: vaddpd <a2=%ymm4,<a3=%ymm5,>r2=%ymm7
+vaddpd %ymm4,%ymm5,%ymm7
+
+# qhasm: 4x r1 = approx a0 - a1
+# asm 1: vsubpd <a1=reg256#4,<a0=reg256#3,>r1=reg256#3
+# asm 2: vsubpd <a1=%ymm3,<a0=%ymm2,>r1=%ymm2
+vsubpd %ymm3,%ymm2,%ymm2
+
+# qhasm: 4x r3 = approx a2 - a3
+# asm 1: vsubpd <a3=reg256#6,<a2=reg256#5,>r3=reg256#4
+# asm 2: vsubpd <a3=%ymm5,<a2=%ymm4,>r3=%ymm3
+vsubpd %ymm5,%ymm4,%ymm3
+
+# qhasm: 4x a0 = approx r0 + r2
+# asm 1: vaddpd <r0=reg256#7,<r2=reg256#8,>a0=reg256#5
+# asm 2: vaddpd <r0=%ymm6,<r2=%ymm7,>a0=%ymm4
+vaddpd %ymm6,%ymm7,%ymm4
+
+# qhasm: 4x a2 = approx r0 - r2
+# asm 1: vsubpd <r2=reg256#8,<r0=reg256#7,>a2=reg256#6
+# asm 2: vsubpd <r2=%ymm7,<r0=%ymm6,>a2=%ymm5
+vsubpd %ymm7,%ymm6,%ymm5
+
+# qhasm: w = mem64[wp + 0],mem64[wp + 0],mem64[wp + 0],mem64[wp + 0]
+# asm 1: vbroadcastsd 0(<wp=int64#7),>w=reg256#7
+# asm 2: vbroadcastsd 0(<wp=%rax),>w=%ymm6
+vbroadcastsd 0(%rax),%ymm6
+
+# qhasm: 4x r1 approx*= w
+# asm 1: vmulpd <w=reg256#7,<r1=reg256#3,>r1=reg256#3
+# asm 2: vmulpd <w=%ymm6,<r1=%ymm2,>r1=%ymm2
+vmulpd %ymm6,%ymm2,%ymm2
+
+# qhasm: 4x c = approx r1 * qinv
+# asm 1: vmulpd <r1=reg256#3,<qinv=reg256#2,>c=reg256#7
+# asm 2: vmulpd <r1=%ymm2,<qinv=%ymm1,>c=%ymm6
+vmulpd %ymm2,%ymm1,%ymm6
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7
+# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6
+vroundpd $9,%ymm6,%ymm6
+
+# qhasm: 4x r1 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<r1=reg256#3
+# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<r1=%ymm2
+vfnmadd231pd %ymm6,%ymm0,%ymm2
+
+# qhasm: w = mem64[wp + 8],mem64[wp + 8],mem64[wp + 8],mem64[wp + 8]
+# asm 1: vbroadcastsd 8(<wp=int64#7),>w=reg256#7
+# asm 2: vbroadcastsd 8(<wp=%rax),>w=%ymm6
+vbroadcastsd 8(%rax),%ymm6
+
+# qhasm: 4x r3 approx*= w
+# asm 1: vmulpd <w=reg256#7,<r3=reg256#4,>r3=reg256#4
+# asm 2: vmulpd <w=%ymm6,<r3=%ymm3,>r3=%ymm3
+vmulpd %ymm6,%ymm3,%ymm3
+
+# qhasm: 4x c = approx r3 * qinv
+# asm 1: vmulpd <r3=reg256#4,<qinv=reg256#2,>c=reg256#7
+# asm 2: vmulpd <r3=%ymm3,<qinv=%ymm1,>c=%ymm6
+vmulpd %ymm3,%ymm1,%ymm6
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#7,>c=reg256#7
+# asm 2: vroundpd $9,<c=%ymm6,>c=%ymm6
+vroundpd $9,%ymm6,%ymm6
+
+# qhasm: 4x r3 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#7,<q=reg256#1,<r3=reg256#4
+# asm 2: vfnmadd231pd <c=%ymm6,<q=%ymm0,<r3=%ymm3
+vfnmadd231pd %ymm6,%ymm0,%ymm3
+
+# qhasm: 4x a1 = approx r1 + r3
+# asm 1: vaddpd <r1=reg256#3,<r3=reg256#4,>a1=reg256#7
+# asm 2: vaddpd <r1=%ymm2,<r3=%ymm3,>a1=%ymm6
+vaddpd %ymm2,%ymm3,%ymm6
+
+# qhasm: 4x a3 = approx r1 - r3
+# asm 1: vsubpd <r3=reg256#4,<r1=reg256#3,>a3=reg256#3
+# asm 2: vsubpd <r3=%ymm3,<r1=%ymm2,>a3=%ymm2
+vsubpd %ymm3,%ymm2,%ymm2
+
+# qhasm: w = mem64[wp + 16],mem64[wp + 16],mem64[wp + 16],mem64[wp + 16]
+# asm 1: vbroadcastsd 16(<wp=int64#7),>w=reg256#4
+# asm 2: vbroadcastsd 16(<wp=%rax),>w=%ymm3
+vbroadcastsd 16(%rax),%ymm3
+
+# qhasm: 4x a3 approx*= w
+# asm 1: vmulpd <w=reg256#4,<a3=reg256#3,>a3=reg256#3
+# asm 2: vmulpd <w=%ymm3,<a3=%ymm2,>a3=%ymm2
+vmulpd %ymm3,%ymm2,%ymm2
+
+# qhasm: 4x c = approx a3 * qinv
+# asm 1: vmulpd <a3=reg256#3,<qinv=reg256#2,>c=reg256#8
+# asm 2: vmulpd <a3=%ymm2,<qinv=%ymm1,>c=%ymm7
+vmulpd %ymm2,%ymm1,%ymm7
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#8,>c=reg256#8
+# asm 2: vroundpd $9,<c=%ymm7,>c=%ymm7
+vroundpd $9,%ymm7,%ymm7
+
+# qhasm: 4x a3 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#8,<q=reg256#1,<a3=reg256#3
+# asm 2: vfnmadd231pd <c=%ymm7,<q=%ymm0,<a3=%ymm2
+vfnmadd231pd %ymm7,%ymm0,%ymm2
+
+# qhasm: 4x a2 approx*= w
+# asm 1: vmulpd <w=reg256#4,<a2=reg256#6,>a2=reg256#4
+# asm 2: vmulpd <w=%ymm3,<a2=%ymm5,>a2=%ymm3
+vmulpd %ymm3,%ymm5,%ymm3
+
+# qhasm: 4x c = approx a2 * qinv
+# asm 1: vmulpd <a2=reg256#4,<qinv=reg256#2,>c=reg256#6
+# asm 2: vmulpd <a2=%ymm3,<qinv=%ymm1,>c=%ymm5
+vmulpd %ymm3,%ymm1,%ymm5
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#6,>c=reg256#6
+# asm 2: vroundpd $9,<c=%ymm5,>c=%ymm5
+vroundpd $9,%ymm5,%ymm5
+
+# qhasm: 4x a2 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#6,<q=reg256#1,<a2=reg256#4
+# asm 2: vfnmadd231pd <c=%ymm5,<q=%ymm0,<a2=%ymm3
+vfnmadd231pd %ymm5,%ymm0,%ymm3
+
+# qhasm: t0 = mem256[tp + 512]
+# asm 1: vmovupd   512(<tp=int64#4),>t0=reg256#6
+# asm 2: vmovupd   512(<tp=%rcx),>t0=%ymm5
+vmovupd   512(%rcx),%ymm5
+
+# qhasm: t1 = mem256[tp + 640]
+# asm 1: vmovupd   640(<tp=int64#4),>t1=reg256#8
+# asm 2: vmovupd   640(<tp=%rcx),>t1=%ymm7
+vmovupd   640(%rcx),%ymm7
+
+# qhasm: t2 = mem256[tp + 768]
+# asm 1: vmovupd   768(<tp=int64#4),>t2=reg256#9
+# asm 2: vmovupd   768(<tp=%rcx),>t2=%ymm8
+vmovupd   768(%rcx),%ymm8
+
+# qhasm: t3 = mem256[tp + 896]
+# asm 1: vmovupd   896(<tp=int64#4),>t3=reg256#10
+# asm 2: vmovupd   896(<tp=%rcx),>t3=%ymm9
+vmovupd   896(%rcx),%ymm9
+
+# qhasm: 4x r0 = approx t0 + t1
+# asm 1: vaddpd <t0=reg256#6,<t1=reg256#8,>r0=reg256#11
+# asm 2: vaddpd <t0=%ymm5,<t1=%ymm7,>r0=%ymm10
+vaddpd %ymm5,%ymm7,%ymm10
+
+# qhasm: 4x r2 = approx t2 + t3
+# asm 1: vaddpd <t2=reg256#9,<t3=reg256#10,>r2=reg256#12
+# asm 2: vaddpd <t2=%ymm8,<t3=%ymm9,>r2=%ymm11
+vaddpd %ymm8,%ymm9,%ymm11
+
+# qhasm: 4x r1 = approx t0 - t1
+# asm 1: vsubpd <t1=reg256#8,<t0=reg256#6,>r1=reg256#6
+# asm 2: vsubpd <t1=%ymm7,<t0=%ymm5,>r1=%ymm5
+vsubpd %ymm7,%ymm5,%ymm5
+
+# qhasm: 4x r3 = approx t2 - t3
+# asm 1: vsubpd <t3=reg256#10,<t2=reg256#9,>r3=reg256#8
+# asm 2: vsubpd <t3=%ymm9,<t2=%ymm8,>r3=%ymm7
+vsubpd %ymm9,%ymm8,%ymm7
+
+# qhasm: 4x t0 = approx r0 + r2
+# asm 1: vaddpd <r0=reg256#11,<r2=reg256#12,>t0=reg256#9
+# asm 2: vaddpd <r0=%ymm10,<r2=%ymm11,>t0=%ymm8
+vaddpd %ymm10,%ymm11,%ymm8
+
+# qhasm: 4x t2 = approx r0 - r2
+# asm 1: vsubpd <r2=reg256#12,<r0=reg256#11,>t2=reg256#10
+# asm 2: vsubpd <r2=%ymm11,<r0=%ymm10,>t2=%ymm9
+vsubpd %ymm11,%ymm10,%ymm9
+
+# qhasm: w = mem64[wp + 24],mem64[wp + 24],mem64[wp + 24],mem64[wp + 24]
+# asm 1: vbroadcastsd 24(<wp=int64#7),>w=reg256#11
+# asm 2: vbroadcastsd 24(<wp=%rax),>w=%ymm10
+vbroadcastsd 24(%rax),%ymm10
+
+# qhasm: 4x r1 approx*= w
+# asm 1: vmulpd <w=reg256#11,<r1=reg256#6,>r1=reg256#6
+# asm 2: vmulpd <w=%ymm10,<r1=%ymm5,>r1=%ymm5
+vmulpd %ymm10,%ymm5,%ymm5
+
+# qhasm: 4x c = approx r1 * qinv
+# asm 1: vmulpd <r1=reg256#6,<qinv=reg256#2,>c=reg256#11
+# asm 2: vmulpd <r1=%ymm5,<qinv=%ymm1,>c=%ymm10
+vmulpd %ymm5,%ymm1,%ymm10
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#11,>c=reg256#11
+# asm 2: vroundpd $9,<c=%ymm10,>c=%ymm10
+vroundpd $9,%ymm10,%ymm10
+
+# qhasm: 4x r1 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#11,<q=reg256#1,<r1=reg256#6
+# asm 2: vfnmadd231pd <c=%ymm10,<q=%ymm0,<r1=%ymm5
+vfnmadd231pd %ymm10,%ymm0,%ymm5
+
+# qhasm: w = mem64[wp + 32],mem64[wp + 32],mem64[wp + 32],mem64[wp + 32]
+# asm 1: vbroadcastsd 32(<wp=int64#7),>w=reg256#11
+# asm 2: vbroadcastsd 32(<wp=%rax),>w=%ymm10
+vbroadcastsd 32(%rax),%ymm10
+
+# qhasm: 4x r3 approx*= w
+# asm 1: vmulpd <w=reg256#11,<r3=reg256#8,>r3=reg256#8
+# asm 2: vmulpd <w=%ymm10,<r3=%ymm7,>r3=%ymm7
+vmulpd %ymm10,%ymm7,%ymm7
+
+# qhasm: 4x c = approx r3 * qinv
+# asm 1: vmulpd <r3=reg256#8,<qinv=reg256#2,>c=reg256#11
+# asm 2: vmulpd <r3=%ymm7,<qinv=%ymm1,>c=%ymm10
+vmulpd %ymm7,%ymm1,%ymm10
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#11,>c=reg256#11
+# asm 2: vroundpd $9,<c=%ymm10,>c=%ymm10
+vroundpd $9,%ymm10,%ymm10
+
+# qhasm: 4x r3 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#11,<q=reg256#1,<r3=reg256#8
+# asm 2: vfnmadd231pd <c=%ymm10,<q=%ymm0,<r3=%ymm7
+vfnmadd231pd %ymm10,%ymm0,%ymm7
+
+# qhasm: 4x t1 = approx r1 + r3
+# asm 1: vaddpd <r1=reg256#6,<r3=reg256#8,>t1=reg256#11
+# asm 2: vaddpd <r1=%ymm5,<r3=%ymm7,>t1=%ymm10
+vaddpd %ymm5,%ymm7,%ymm10
+
+# qhasm: w = mem64[wp + 40],mem64[wp + 40],mem64[wp + 40],mem64[wp + 40]
+# asm 1: vbroadcastsd 40(<wp=int64#7),>w=reg256#12
+# asm 2: vbroadcastsd 40(<wp=%rax),>w=%ymm11
+vbroadcastsd 40(%rax),%ymm11
+
+# qhasm: 4x t3 = approx r1 - r3
+# asm 1: vsubpd <r3=reg256#8,<r1=reg256#6,>t3=reg256#6
+# asm 2: vsubpd <r3=%ymm7,<r1=%ymm5,>t3=%ymm5
+vsubpd %ymm7,%ymm5,%ymm5
+
+# qhasm: 4x t3 approx*= w
+# asm 1: vmulpd <w=reg256#12,<t3=reg256#6,>t3=reg256#6
+# asm 2: vmulpd <w=%ymm11,<t3=%ymm5,>t3=%ymm5
+vmulpd %ymm11,%ymm5,%ymm5
+
+# qhasm: 4x c = approx t3 * qinv
+# asm 1: vmulpd <t3=reg256#6,<qinv=reg256#2,>c=reg256#8
+# asm 2: vmulpd <t3=%ymm5,<qinv=%ymm1,>c=%ymm7
+vmulpd %ymm5,%ymm1,%ymm7
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#8,>c=reg256#8
+# asm 2: vroundpd $9,<c=%ymm7,>c=%ymm7
+vroundpd $9,%ymm7,%ymm7
+
+# qhasm: 4x t3 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#8,<q=reg256#1,<t3=reg256#6
+# asm 2: vfnmadd231pd <c=%ymm7,<q=%ymm0,<t3=%ymm5
+vfnmadd231pd %ymm7,%ymm0,%ymm5
+
+# qhasm: 4x t2 approx*= w
+# asm 1: vmulpd <w=reg256#12,<t2=reg256#10,>t2=reg256#8
+# asm 2: vmulpd <w=%ymm11,<t2=%ymm9,>t2=%ymm7
+vmulpd %ymm11,%ymm9,%ymm7
+
+# qhasm: 4x c = approx t2 * qinv
+# asm 1: vmulpd <t2=reg256#8,<qinv=reg256#2,>c=reg256#10
+# asm 2: vmulpd <t2=%ymm7,<qinv=%ymm1,>c=%ymm9
+vmulpd %ymm7,%ymm1,%ymm9
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#10,>c=reg256#10
+# asm 2: vroundpd $9,<c=%ymm9,>c=%ymm9
+vroundpd $9,%ymm9,%ymm9
+
+# qhasm: 4x t2 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#10,<q=reg256#1,<t2=reg256#8
+# asm 2: vfnmadd231pd <c=%ymm9,<q=%ymm0,<t2=%ymm7
+vfnmadd231pd %ymm9,%ymm0,%ymm7
+
+# qhasm: 4x r0 = approx a0 + t0
+# asm 1: vaddpd <a0=reg256#5,<t0=reg256#9,>r0=reg256#10
+# asm 2: vaddpd <a0=%ymm4,<t0=%ymm8,>r0=%ymm9
+vaddpd %ymm4,%ymm8,%ymm9
+
+# qhasm: 4x r1 = approx a1 + t1
+# asm 1: vaddpd <a1=reg256#7,<t1=reg256#11,>r1=reg256#12
+# asm 2: vaddpd <a1=%ymm6,<t1=%ymm10,>r1=%ymm11
+vaddpd %ymm6,%ymm10,%ymm11
+
+# qhasm: 4x r2 = approx a2 + t2
+# asm 1: vaddpd <a2=reg256#4,<t2=reg256#8,>r2=reg256#13
+# asm 2: vaddpd <a2=%ymm3,<t2=%ymm7,>r2=%ymm12
+vaddpd %ymm3,%ymm7,%ymm12
+
+# qhasm: 4x r3 = approx a3 + t3
+# asm 1: vaddpd <a3=reg256#3,<t3=reg256#6,>r3=reg256#14
+# asm 2: vaddpd <a3=%ymm2,<t3=%ymm5,>r3=%ymm13
+vaddpd %ymm2,%ymm5,%ymm13
+
+# qhasm: 4x a0 approx-= t0
+# asm 1: vsubpd <t0=reg256#9,<a0=reg256#5,>a0=reg256#5
+# asm 2: vsubpd <t0=%ymm8,<a0=%ymm4,>a0=%ymm4
+vsubpd %ymm8,%ymm4,%ymm4
+
+# qhasm: 4x a1 approx-= t1
+# asm 1: vsubpd <t1=reg256#11,<a1=reg256#7,>a1=reg256#7
+# asm 2: vsubpd <t1=%ymm10,<a1=%ymm6,>a1=%ymm6
+vsubpd %ymm10,%ymm6,%ymm6
+
+# qhasm: 4x a2 approx-= t2
+# asm 1: vsubpd <t2=reg256#8,<a2=reg256#4,>a2=reg256#4
+# asm 2: vsubpd <t2=%ymm7,<a2=%ymm3,>a2=%ymm3
+vsubpd %ymm7,%ymm3,%ymm3
+
+# qhasm: 4x a3 approx-= t3
+# asm 1: vsubpd <t3=reg256#6,<a3=reg256#3,>a3=reg256#3
+# asm 2: vsubpd <t3=%ymm5,<a3=%ymm2,>a3=%ymm2
+vsubpd %ymm5,%ymm2,%ymm2
+
+# qhasm: w = mem64[wp + 48],mem64[wp + 48],mem64[wp + 48],mem64[wp + 48]
+# asm 1: vbroadcastsd 48(<wp=int64#7),>w=reg256#6
+# asm 2: vbroadcastsd 48(<wp=%rax),>w=%ymm5
+vbroadcastsd 48(%rax),%ymm5
+
+# qhasm: 4x a0 approx*= w
+# asm 1: vmulpd <w=reg256#6,<a0=reg256#5,>a0=reg256#5
+# asm 2: vmulpd <w=%ymm5,<a0=%ymm4,>a0=%ymm4
+vmulpd %ymm5,%ymm4,%ymm4
+
+# qhasm: 4x a1 approx*= w
+# asm 1: vmulpd <w=reg256#6,<a1=reg256#7,>a1=reg256#7
+# asm 2: vmulpd <w=%ymm5,<a1=%ymm6,>a1=%ymm6
+vmulpd %ymm5,%ymm6,%ymm6
+
+# qhasm: 4x a2 approx*= w
+# asm 1: vmulpd <w=reg256#6,<a2=reg256#4,>a2=reg256#4
+# asm 2: vmulpd <w=%ymm5,<a2=%ymm3,>a2=%ymm3
+vmulpd %ymm5,%ymm3,%ymm3
+
+# qhasm: 4x a3 approx*= w
+# asm 1: vmulpd <w=reg256#6,<a3=reg256#3,>a3=reg256#3
+# asm 2: vmulpd <w=%ymm5,<a3=%ymm2,>a3=%ymm2
+vmulpd %ymm5,%ymm2,%ymm2
+
+# qhasm: 4x c = approx a0 * qinv
+# asm 1: vmulpd <a0=reg256#5,<qinv=reg256#2,>c=reg256#6
+# asm 2: vmulpd <a0=%ymm4,<qinv=%ymm1,>c=%ymm5
+vmulpd %ymm4,%ymm1,%ymm5
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#6,>c=reg256#6
+# asm 2: vroundpd $9,<c=%ymm5,>c=%ymm5
+vroundpd $9,%ymm5,%ymm5
+
+# qhasm: 4x a0 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#6,<q=reg256#1,<a0=reg256#5
+# asm 2: vfnmadd231pd <c=%ymm5,<q=%ymm0,<a0=%ymm4
+vfnmadd231pd %ymm5,%ymm0,%ymm4
+
+# qhasm: 4x c = approx a1 * qinv
+# asm 1: vmulpd <a1=reg256#7,<qinv=reg256#2,>c=reg256#6
+# asm 2: vmulpd <a1=%ymm6,<qinv=%ymm1,>c=%ymm5
+vmulpd %ymm6,%ymm1,%ymm5
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#6,>c=reg256#6
+# asm 2: vroundpd $9,<c=%ymm5,>c=%ymm5
+vroundpd $9,%ymm5,%ymm5
+
+# qhasm: 4x a1 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#6,<q=reg256#1,<a1=reg256#7
+# asm 2: vfnmadd231pd <c=%ymm5,<q=%ymm0,<a1=%ymm6
+vfnmadd231pd %ymm5,%ymm0,%ymm6
+
+# qhasm: 4x c = approx a2 * qinv
+# asm 1: vmulpd <a2=reg256#4,<qinv=reg256#2,>c=reg256#6
+# asm 2: vmulpd <a2=%ymm3,<qinv=%ymm1,>c=%ymm5
+vmulpd %ymm3,%ymm1,%ymm5
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#6,>c=reg256#6
+# asm 2: vroundpd $9,<c=%ymm5,>c=%ymm5
+vroundpd $9,%ymm5,%ymm5
+
+# qhasm: 4x a2 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#6,<q=reg256#1,<a2=reg256#4
+# asm 2: vfnmadd231pd <c=%ymm5,<q=%ymm0,<a2=%ymm3
+vfnmadd231pd %ymm5,%ymm0,%ymm3
+
+# qhasm: 4x c = approx a3 * qinv
+# asm 1: vmulpd <a3=reg256#3,<qinv=reg256#2,>c=reg256#6
+# asm 2: vmulpd <a3=%ymm2,<qinv=%ymm1,>c=%ymm5
+vmulpd %ymm2,%ymm1,%ymm5
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#6,>c=reg256#6
+# asm 2: vroundpd $9,<c=%ymm5,>c=%ymm5
+vroundpd $9,%ymm5,%ymm5
+
+# qhasm: 4x a3 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#6,<q=reg256#1,<a3=reg256#3
+# asm 2: vfnmadd231pd <c=%ymm5,<q=%ymm0,<a3=%ymm2
+vfnmadd231pd %ymm5,%ymm0,%ymm2
+
+# qhasm: mem256[tp +   0] = r0
+# asm 1: vmovupd   <r0=reg256#10,0(<tp=int64#4)
+# asm 2: vmovupd   <r0=%ymm9,0(<tp=%rcx)
+vmovupd   %ymm9,0(%rcx)
+
+# qhasm: mem256[tp + 128] = r1
+# asm 1: vmovupd   <r1=reg256#12,128(<tp=int64#4)
+# asm 2: vmovupd   <r1=%ymm11,128(<tp=%rcx)
+vmovupd   %ymm11,128(%rcx)
+
+# qhasm: mem256[tp + 256] = r2
+# asm 1: vmovupd   <r2=reg256#13,256(<tp=int64#4)
+# asm 2: vmovupd   <r2=%ymm12,256(<tp=%rcx)
+vmovupd   %ymm12,256(%rcx)
+
+# qhasm: mem256[tp + 384] = r3
+# asm 1: vmovupd   <r3=reg256#14,384(<tp=int64#4)
+# asm 2: vmovupd   <r3=%ymm13,384(<tp=%rcx)
+vmovupd   %ymm13,384(%rcx)
+
+# qhasm: mem256[tp + 512] = a0
+# asm 1: vmovupd   <a0=reg256#5,512(<tp=int64#4)
+# asm 2: vmovupd   <a0=%ymm4,512(<tp=%rcx)
+vmovupd   %ymm4,512(%rcx)
+
+# qhasm: mem256[tp + 640] = a1
+# asm 1: vmovupd   <a1=reg256#7,640(<tp=int64#4)
+# asm 2: vmovupd   <a1=%ymm6,640(<tp=%rcx)
+vmovupd   %ymm6,640(%rcx)
+
+# qhasm: mem256[tp + 768] = a2
+# asm 1: vmovupd   <a2=reg256#4,768(<tp=int64#4)
+# asm 2: vmovupd   <a2=%ymm3,768(<tp=%rcx)
+vmovupd   %ymm3,768(%rcx)
+
+# qhasm: mem256[tp + 896] = a3
+# asm 1: vmovupd   <a3=reg256#3,896(<tp=int64#4)
+# asm 2: vmovupd   <a3=%ymm2,896(<tp=%rcx)
+vmovupd   %ymm2,896(%rcx)
+
+# qhasm: tp+=32
+# asm 1: add  $32,<tp=int64#4
+# asm 2: add  $32,<tp=%rcx
+add  $32,%rcx
+
+# qhasm: unsigned>? ctrj-=1
+# asm 1: sub  $1,<ctrj=int64#5
+# asm 2: sub  $1,<ctrj=%r8
+sub  $1,%r8
+# comment:fp stack unchanged by jump
+
+# qhasm: goto loop567j if unsigned>
+ja ._loop567j
+
+# qhasm: tp+= 896
+# asm 1: add  $896,<tp=int64#4
+# asm 2: add  $896,<tp=%rcx
+add  $896,%rcx
+
+# qhasm: wp+= 56
+# asm 1: add  $56,<wp=int64#7
+# asm 2: add  $56,<wp=%rax
+add  $56,%rax
+
+# qhasm: unsigned>? ctri-=1
+# asm 1: sub  $1,<ctri=int64#2
+# asm 2: sub  $1,<ctri=%rsi
+sub  $1,%rsi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto loop567i if unsigned>
+ja ._loop567i
+
+# qhasm: ctrj = 32
+# asm 1: mov  $32,>ctrj=int64#2
+# asm 2: mov  $32,>ctrj=%rsi
+mov  $32,%rsi
+
+# qhasm: tp = input_2
+# asm 1: mov  <input_2=int64#3,>tp=int64#3
+# asm 2: mov  <input_2=%rdx,>tp=%rdx
+mov  %rdx,%rdx
+
+# qhasm: ap = input_0
+# asm 1: mov  <input_0=int64#1,>ap=int64#1
+# asm 2: mov  <input_0=%rdi,>ap=%rdi
+mov  %rdi,%rdi
+
+# qhasm: loop8910j:
+._loop8910j:
+
+# qhasm: a0 = mem256[tp + 0]
+# asm 1: vmovupd   0(<tp=int64#3),>a0=reg256#3
+# asm 2: vmovupd   0(<tp=%rdx),>a0=%ymm2
+vmovupd   0(%rdx),%ymm2
+
+# qhasm: a1 = mem256[tp + 1024]
+# asm 1: vmovupd   1024(<tp=int64#3),>a1=reg256#4
+# asm 2: vmovupd   1024(<tp=%rdx),>a1=%ymm3
+vmovupd   1024(%rdx),%ymm3
+
+# qhasm: a2 = mem256[tp + 2048]
+# asm 1: vmovupd   2048(<tp=int64#3),>a2=reg256#5
+# asm 2: vmovupd   2048(<tp=%rdx),>a2=%ymm4
+vmovupd   2048(%rdx),%ymm4
+
+# qhasm: a3 = mem256[tp + 3072]
+# asm 1: vmovupd   3072(<tp=int64#3),>a3=reg256#6
+# asm 2: vmovupd   3072(<tp=%rdx),>a3=%ymm5
+vmovupd   3072(%rdx),%ymm5
+
+# qhasm: 4x r0 = approx a0 + a1
+# asm 1: vaddpd <a0=reg256#3,<a1=reg256#4,>r0=reg256#7
+# asm 2: vaddpd <a0=%ymm2,<a1=%ymm3,>r0=%ymm6
+vaddpd %ymm2,%ymm3,%ymm6
+
+# qhasm: 4x r2 = approx a2 + a3
+# asm 1: vaddpd <a2=reg256#5,<a3=reg256#6,>r2=reg256#8
+# asm 2: vaddpd <a2=%ymm4,<a3=%ymm5,>r2=%ymm7
+vaddpd %ymm4,%ymm5,%ymm7
+
+# qhasm: 4x r1 = approx a0 - a1
+# asm 1: vsubpd <a1=reg256#4,<a0=reg256#3,>r1=reg256#3
+# asm 2: vsubpd <a1=%ymm3,<a0=%ymm2,>r1=%ymm2
+vsubpd %ymm3,%ymm2,%ymm2
+
+# qhasm: 4x r3 = approx a2 - a3
+# asm 1: vsubpd <a3=reg256#6,<a2=reg256#5,>r3=reg256#4
+# asm 2: vsubpd <a3=%ymm5,<a2=%ymm4,>r3=%ymm3
+vsubpd %ymm5,%ymm4,%ymm3
+
+# qhasm: 4x a0 = approx r0 + r2
+# asm 1: vaddpd <r0=reg256#7,<r2=reg256#8,>a0=reg256#5
+# asm 2: vaddpd <r0=%ymm6,<r2=%ymm7,>a0=%ymm4
+vaddpd %ymm6,%ymm7,%ymm4
+
+# qhasm: 4x a2 = approx r0 - r2
+# asm 1: vsubpd <r2=reg256#8,<r0=reg256#7,>a2=reg256#6
+# asm 2: vsubpd <r2=%ymm7,<r0=%ymm6,>a2=%ymm5
+vsubpd %ymm7,%ymm6,%ymm5
+
+# qhasm: w = mem64[wp + 0],mem64[wp + 0],mem64[wp + 0],mem64[wp + 0]
+# asm 1: vbroadcastsd 0(<wp=int64#7),>w=reg256#7
+# asm 2: vbroadcastsd 0(<wp=%rax),>w=%ymm6
+vbroadcastsd 0(%rax),%ymm6
+
+# qhasm: 4x r3 approx*= w
+# asm 1: vmulpd <w=reg256#7,<r3=reg256#4,>r3=reg256#4
+# asm 2: vmulpd <w=%ymm6,<r3=%ymm3,>r3=%ymm3
+vmulpd %ymm6,%ymm3,%ymm3
+
+# qhasm: 4x a1 = approx r1 + r3
+# asm 1: vaddpd <r1=reg256#3,<r3=reg256#4,>a1=reg256#7
+# asm 2: vaddpd <r1=%ymm2,<r3=%ymm3,>a1=%ymm6
+vaddpd %ymm2,%ymm3,%ymm6
+
+# qhasm: 4x a3 = approx r1 - r3
+# asm 1: vsubpd <r3=reg256#4,<r1=reg256#3,>a3=reg256#3
+# asm 2: vsubpd <r3=%ymm3,<r1=%ymm2,>a3=%ymm2
+vsubpd %ymm3,%ymm2,%ymm2
+
+# qhasm: t0 = mem256[tp + 4096]
+# asm 1: vmovupd   4096(<tp=int64#3),>t0=reg256#4
+# asm 2: vmovupd   4096(<tp=%rdx),>t0=%ymm3
+vmovupd   4096(%rdx),%ymm3
+
+# qhasm: t1 = mem256[tp + 5120]
+# asm 1: vmovupd   5120(<tp=int64#3),>t1=reg256#8
+# asm 2: vmovupd   5120(<tp=%rdx),>t1=%ymm7
+vmovupd   5120(%rdx),%ymm7
+
+# qhasm: t2 = mem256[tp + 6144]
+# asm 1: vmovupd   6144(<tp=int64#3),>t2=reg256#9
+# asm 2: vmovupd   6144(<tp=%rdx),>t2=%ymm8
+vmovupd   6144(%rdx),%ymm8
+
+# qhasm: t3 = mem256[tp + 7168]
+# asm 1: vmovupd   7168(<tp=int64#3),>t3=reg256#10
+# asm 2: vmovupd   7168(<tp=%rdx),>t3=%ymm9
+vmovupd   7168(%rdx),%ymm9
+
+# qhasm: 4x r0 = approx t0 + t1
+# asm 1: vaddpd <t0=reg256#4,<t1=reg256#8,>r0=reg256#11
+# asm 2: vaddpd <t0=%ymm3,<t1=%ymm7,>r0=%ymm10
+vaddpd %ymm3,%ymm7,%ymm10
+
+# qhasm: 4x r2 = approx t2 + t3
+# asm 1: vaddpd <t2=reg256#9,<t3=reg256#10,>r2=reg256#12
+# asm 2: vaddpd <t2=%ymm8,<t3=%ymm9,>r2=%ymm11
+vaddpd %ymm8,%ymm9,%ymm11
+
+# qhasm: 4x r1 = approx t0 - t1
+# asm 1: vsubpd <t1=reg256#8,<t0=reg256#4,>r1=reg256#4
+# asm 2: vsubpd <t1=%ymm7,<t0=%ymm3,>r1=%ymm3
+vsubpd %ymm7,%ymm3,%ymm3
+
+# qhasm: 4x r3 = approx t2 - t3
+# asm 1: vsubpd <t3=reg256#10,<t2=reg256#9,>r3=reg256#8
+# asm 2: vsubpd <t3=%ymm9,<t2=%ymm8,>r3=%ymm7
+vsubpd %ymm9,%ymm8,%ymm7
+
+# qhasm: 4x t0 = approx r0 + r2
+# asm 1: vaddpd <r0=reg256#11,<r2=reg256#12,>t0=reg256#9
+# asm 2: vaddpd <r0=%ymm10,<r2=%ymm11,>t0=%ymm8
+vaddpd %ymm10,%ymm11,%ymm8
+
+# qhasm: 4x t2 = approx r0 - r2
+# asm 1: vsubpd <r2=reg256#12,<r0=reg256#11,>t2=reg256#10
+# asm 2: vsubpd <r2=%ymm11,<r0=%ymm10,>t2=%ymm9
+vsubpd %ymm11,%ymm10,%ymm9
+
+# qhasm: w = mem64[wp + 8],mem64[wp + 8],mem64[wp + 8],mem64[wp + 8]
+# asm 1: vbroadcastsd 8(<wp=int64#7),>w=reg256#11
+# asm 2: vbroadcastsd 8(<wp=%rax),>w=%ymm10
+vbroadcastsd 8(%rax),%ymm10
+
+# qhasm: 4x r1 approx*= w
+# asm 1: vmulpd <w=reg256#11,<r1=reg256#4,>r1=reg256#4
+# asm 2: vmulpd <w=%ymm10,<r1=%ymm3,>r1=%ymm3
+vmulpd %ymm10,%ymm3,%ymm3
+
+# qhasm: 4x c = approx r1 * qinv
+# asm 1: vmulpd <r1=reg256#4,<qinv=reg256#2,>c=reg256#11
+# asm 2: vmulpd <r1=%ymm3,<qinv=%ymm1,>c=%ymm10
+vmulpd %ymm3,%ymm1,%ymm10
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#11,>c=reg256#11
+# asm 2: vroundpd $9,<c=%ymm10,>c=%ymm10
+vroundpd $9,%ymm10,%ymm10
+
+# qhasm: 4x r1 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#11,<q=reg256#1,<r1=reg256#4
+# asm 2: vfnmadd231pd <c=%ymm10,<q=%ymm0,<r1=%ymm3
+vfnmadd231pd %ymm10,%ymm0,%ymm3
+
+# qhasm: w = mem64[wp + 16],mem64[wp + 16],mem64[wp + 16],mem64[wp + 16]
+# asm 1: vbroadcastsd 16(<wp=int64#7),>w=reg256#11
+# asm 2: vbroadcastsd 16(<wp=%rax),>w=%ymm10
+vbroadcastsd 16(%rax),%ymm10
+
+# qhasm: 4x r3 approx*= w
+# asm 1: vmulpd <w=reg256#11,<r3=reg256#8,>r3=reg256#8
+# asm 2: vmulpd <w=%ymm10,<r3=%ymm7,>r3=%ymm7
+vmulpd %ymm10,%ymm7,%ymm7
+
+# qhasm: 4x c = approx r3 * qinv
+# asm 1: vmulpd <r3=reg256#8,<qinv=reg256#2,>c=reg256#11
+# asm 2: vmulpd <r3=%ymm7,<qinv=%ymm1,>c=%ymm10
+vmulpd %ymm7,%ymm1,%ymm10
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#11,>c=reg256#11
+# asm 2: vroundpd $9,<c=%ymm10,>c=%ymm10
+vroundpd $9,%ymm10,%ymm10
+
+# qhasm: 4x r3 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#11,<q=reg256#1,<r3=reg256#8
+# asm 2: vfnmadd231pd <c=%ymm10,<q=%ymm0,<r3=%ymm7
+vfnmadd231pd %ymm10,%ymm0,%ymm7
+
+# qhasm: 4x t1 = approx r1 + r3
+# asm 1: vaddpd <r1=reg256#4,<r3=reg256#8,>t1=reg256#11
+# asm 2: vaddpd <r1=%ymm3,<r3=%ymm7,>t1=%ymm10
+vaddpd %ymm3,%ymm7,%ymm10
+
+# qhasm: w = mem64[wp + 24],mem64[wp + 24],mem64[wp + 24],mem64[wp + 24]
+# asm 1: vbroadcastsd 24(<wp=int64#7),>w=reg256#12
+# asm 2: vbroadcastsd 24(<wp=%rax),>w=%ymm11
+vbroadcastsd 24(%rax),%ymm11
+
+# qhasm: 4x t3 = approx r1 - r3
+# asm 1: vsubpd <r3=reg256#8,<r1=reg256#4,>t3=reg256#4
+# asm 2: vsubpd <r3=%ymm7,<r1=%ymm3,>t3=%ymm3
+vsubpd %ymm7,%ymm3,%ymm3
+
+# qhasm: 4x t3 approx*= w
+# asm 1: vmulpd <w=reg256#12,<t3=reg256#4,>t3=reg256#4
+# asm 2: vmulpd <w=%ymm11,<t3=%ymm3,>t3=%ymm3
+vmulpd %ymm11,%ymm3,%ymm3
+
+# qhasm: 4x t2 approx*= w
+# asm 1: vmulpd <w=reg256#12,<t2=reg256#10,>t2=reg256#8
+# asm 2: vmulpd <w=%ymm11,<t2=%ymm9,>t2=%ymm7
+vmulpd %ymm11,%ymm9,%ymm7
+
+# qhasm: 4x r0 = approx a0 + t0
+# asm 1: vaddpd <a0=reg256#5,<t0=reg256#9,>r0=reg256#10
+# asm 2: vaddpd <a0=%ymm4,<t0=%ymm8,>r0=%ymm9
+vaddpd %ymm4,%ymm8,%ymm9
+
+# qhasm: 4x r1 = approx a1 + t1
+# asm 1: vaddpd <a1=reg256#7,<t1=reg256#11,>r1=reg256#12
+# asm 2: vaddpd <a1=%ymm6,<t1=%ymm10,>r1=%ymm11
+vaddpd %ymm6,%ymm10,%ymm11
+
+# qhasm: 4x r2 = approx a2 + t2
+# asm 1: vaddpd <a2=reg256#6,<t2=reg256#8,>r2=reg256#13
+# asm 2: vaddpd <a2=%ymm5,<t2=%ymm7,>r2=%ymm12
+vaddpd %ymm5,%ymm7,%ymm12
+
+# qhasm: 4x r3 = approx a3 + t3
+# asm 1: vaddpd <a3=reg256#3,<t3=reg256#4,>r3=reg256#14
+# asm 2: vaddpd <a3=%ymm2,<t3=%ymm3,>r3=%ymm13
+vaddpd %ymm2,%ymm3,%ymm13
+
+# qhasm: 4x a0 = approx a0 - t0
+# asm 1: vsubpd <t0=reg256#9,<a0=reg256#5,>a0=reg256#5
+# asm 2: vsubpd <t0=%ymm8,<a0=%ymm4,>a0=%ymm4
+vsubpd %ymm8,%ymm4,%ymm4
+
+# qhasm: 4x a1 = approx a1 - t1
+# asm 1: vsubpd <t1=reg256#11,<a1=reg256#7,>a1=reg256#7
+# asm 2: vsubpd <t1=%ymm10,<a1=%ymm6,>a1=%ymm6
+vsubpd %ymm10,%ymm6,%ymm6
+
+# qhasm: 4x a2 = approx a2 - t2
+# asm 1: vsubpd <t2=reg256#8,<a2=reg256#6,>a2=reg256#6
+# asm 2: vsubpd <t2=%ymm7,<a2=%ymm5,>a2=%ymm5
+vsubpd %ymm7,%ymm5,%ymm5
+
+# qhasm: 4x a3 = approx a3 - t3
+# asm 1: vsubpd <t3=reg256#4,<a3=reg256#3,>a3=reg256#3
+# asm 2: vsubpd <t3=%ymm3,<a3=%ymm2,>a3=%ymm2
+vsubpd %ymm3,%ymm2,%ymm2
+
+# qhasm: 4x c = approx r0 * qinv
+# asm 1: vmulpd <r0=reg256#10,<qinv=reg256#2,>c=reg256#4
+# asm 2: vmulpd <r0=%ymm9,<qinv=%ymm1,>c=%ymm3
+vmulpd %ymm9,%ymm1,%ymm3
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4
+# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3
+vroundpd $9,%ymm3,%ymm3
+
+# qhasm: 4x r0 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<r0=reg256#10
+# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<r0=%ymm9
+vfnmadd231pd %ymm3,%ymm0,%ymm9
+
+# qhasm: 4x c = approx r1 * qinv
+# asm 1: vmulpd <r1=reg256#12,<qinv=reg256#2,>c=reg256#4
+# asm 2: vmulpd <r1=%ymm11,<qinv=%ymm1,>c=%ymm3
+vmulpd %ymm11,%ymm1,%ymm3
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4
+# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3
+vroundpd $9,%ymm3,%ymm3
+
+# qhasm: 4x r1 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<r1=reg256#12
+# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<r1=%ymm11
+vfnmadd231pd %ymm3,%ymm0,%ymm11
+
+# qhasm: 4x c = approx r2 * qinv
+# asm 1: vmulpd <r2=reg256#13,<qinv=reg256#2,>c=reg256#4
+# asm 2: vmulpd <r2=%ymm12,<qinv=%ymm1,>c=%ymm3
+vmulpd %ymm12,%ymm1,%ymm3
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4
+# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3
+vroundpd $9,%ymm3,%ymm3
+
+# qhasm: 4x r2 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<r2=reg256#13
+# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<r2=%ymm12
+vfnmadd231pd %ymm3,%ymm0,%ymm12
+
+# qhasm: 4x c = approx r3 * qinv
+# asm 1: vmulpd <r3=reg256#14,<qinv=reg256#2,>c=reg256#4
+# asm 2: vmulpd <r3=%ymm13,<qinv=%ymm1,>c=%ymm3
+vmulpd %ymm13,%ymm1,%ymm3
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4
+# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3
+vroundpd $9,%ymm3,%ymm3
+
+# qhasm: 4x r3 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<r3=reg256#14
+# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<r3=%ymm13
+vfnmadd231pd %ymm3,%ymm0,%ymm13
+
+# qhasm: 4x c = approx a0 * qinv
+# asm 1: vmulpd <a0=reg256#5,<qinv=reg256#2,>c=reg256#4
+# asm 2: vmulpd <a0=%ymm4,<qinv=%ymm1,>c=%ymm3
+vmulpd %ymm4,%ymm1,%ymm3
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4
+# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3
+vroundpd $9,%ymm3,%ymm3
+
+# qhasm: 4x a0 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<a0=reg256#5
+# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<a0=%ymm4
+vfnmadd231pd %ymm3,%ymm0,%ymm4
+
+# qhasm: 4x c = approx a1 * qinv
+# asm 1: vmulpd <a1=reg256#7,<qinv=reg256#2,>c=reg256#4
+# asm 2: vmulpd <a1=%ymm6,<qinv=%ymm1,>c=%ymm3
+vmulpd %ymm6,%ymm1,%ymm3
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4
+# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3
+vroundpd $9,%ymm3,%ymm3
+
+# qhasm: 4x a1 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<a1=reg256#7
+# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<a1=%ymm6
+vfnmadd231pd %ymm3,%ymm0,%ymm6
+
+# qhasm: 4x c = approx a2 * qinv
+# asm 1: vmulpd <a2=reg256#6,<qinv=reg256#2,>c=reg256#4
+# asm 2: vmulpd <a2=%ymm5,<qinv=%ymm1,>c=%ymm3
+vmulpd %ymm5,%ymm1,%ymm3
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4
+# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3
+vroundpd $9,%ymm3,%ymm3
+
+# qhasm: 4x a2 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<a2=reg256#6
+# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<a2=%ymm5
+vfnmadd231pd %ymm3,%ymm0,%ymm5
+
+# qhasm: 4x c = approx a3 * qinv
+# asm 1: vmulpd <a3=reg256#3,<qinv=reg256#2,>c=reg256#4
+# asm 2: vmulpd <a3=%ymm2,<qinv=%ymm1,>c=%ymm3
+vmulpd %ymm2,%ymm1,%ymm3
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4
+# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3
+vroundpd $9,%ymm3,%ymm3
+
+# qhasm: 4x a3 approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<a3=reg256#3
+# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<a3=%ymm2
+vfnmadd231pd %ymm3,%ymm0,%ymm2
+
+# qhasm: t0 = (4x int32)(4x double)r0,0,0,0,0
+# asm 1: vcvtpd2dq <r0=reg256#10,>t0=reg256#4dq
+# asm 2: vcvtpd2dq <r0=%ymm9,>t0=%xmm3
+vcvtpd2dq %ymm9,%xmm3
+
+# qhasm: t1 = (4x int32)(4x double)r1,0,0,0,0
+# asm 1: vcvtpd2dq <r1=reg256#12,>t1=reg256#8dq
+# asm 2: vcvtpd2dq <r1=%ymm11,>t1=%xmm7
+vcvtpd2dq %ymm11,%xmm7
+
+# qhasm: t2 = (4x int32)(4x double)r2,0,0,0,0
+# asm 1: vcvtpd2dq <r2=reg256#13,>t2=reg256#9dq
+# asm 2: vcvtpd2dq <r2=%ymm12,>t2=%xmm8
+vcvtpd2dq %ymm12,%xmm8
+
+# qhasm: t3 = (4x int32)(4x double)r3,0,0,0,0
+# asm 1: vcvtpd2dq <r3=reg256#14,>t3=reg256#10dq
+# asm 2: vcvtpd2dq <r3=%ymm13,>t3=%xmm9
+vcvtpd2dq %ymm13,%xmm9
+
+# qhasm: mem128[ap +   0] = t0
+# asm 1: vmovupd <t0=reg256#4dq,0(<ap=int64#1)
+# asm 2: vmovupd <t0=%xmm3,0(<ap=%rdi)
+vmovupd %xmm3,0(%rdi)
+
+# qhasm: mem128[ap + 512] = t1
+# asm 1: vmovupd <t1=reg256#8dq,512(<ap=int64#1)
+# asm 2: vmovupd <t1=%xmm7,512(<ap=%rdi)
+vmovupd %xmm7,512(%rdi)
+
+# qhasm: mem128[ap + 1024] = t2
+# asm 1: vmovupd <t2=reg256#9dq,1024(<ap=int64#1)
+# asm 2: vmovupd <t2=%xmm8,1024(<ap=%rdi)
+vmovupd %xmm8,1024(%rdi)
+
+# qhasm: mem128[ap + 1536] = t3
+# asm 1: vmovupd <t3=reg256#10dq,1536(<ap=int64#1)
+# asm 2: vmovupd <t3=%xmm9,1536(<ap=%rdi)
+vmovupd %xmm9,1536(%rdi)
+
+# qhasm: t0 = (4x int32)(4x double)a0,0,0,0,0
+# asm 1: vcvtpd2dq <a0=reg256#5,>t0=reg256#4dq
+# asm 2: vcvtpd2dq <a0=%ymm4,>t0=%xmm3
+vcvtpd2dq %ymm4,%xmm3
+
+# qhasm: t1 = (4x int32)(4x double)a1,0,0,0,0
+# asm 1: vcvtpd2dq <a1=reg256#7,>t1=reg256#5dq
+# asm 2: vcvtpd2dq <a1=%ymm6,>t1=%xmm4
+vcvtpd2dq %ymm6,%xmm4
+
+# qhasm: t2 = (4x int32)(4x double)a2,0,0,0,0
+# asm 1: vcvtpd2dq <a2=reg256#6,>t2=reg256#6dq
+# asm 2: vcvtpd2dq <a2=%ymm5,>t2=%xmm5
+vcvtpd2dq %ymm5,%xmm5
+
+# qhasm: t3 = (4x int32)(4x double)a3,0,0,0,0
+# asm 1: vcvtpd2dq <a3=reg256#3,>t3=reg256#3dq
+# asm 2: vcvtpd2dq <a3=%ymm2,>t3=%xmm2
+vcvtpd2dq %ymm2,%xmm2
+
+# qhasm: mem128[ap + 2048] = t0
+# asm 1: vmovupd <t0=reg256#4dq,2048(<ap=int64#1)
+# asm 2: vmovupd <t0=%xmm3,2048(<ap=%rdi)
+vmovupd %xmm3,2048(%rdi)
+
+# qhasm: mem128[ap + 2560] = t1
+# asm 1: vmovupd <t1=reg256#5dq,2560(<ap=int64#1)
+# asm 2: vmovupd <t1=%xmm4,2560(<ap=%rdi)
+vmovupd %xmm4,2560(%rdi)
+
+# qhasm: mem128[ap + 3072] = t2
+# asm 1: vmovupd <t2=reg256#6dq,3072(<ap=int64#1)
+# asm 2: vmovupd <t2=%xmm5,3072(<ap=%rdi)
+vmovupd %xmm5,3072(%rdi)
+
+# qhasm: mem128[ap + 3584] = t3
+# asm 1: vmovupd <t3=reg256#3dq,3584(<ap=int64#1)
+# asm 2: vmovupd <t3=%xmm2,3584(<ap=%rdi)
+vmovupd %xmm2,3584(%rdi)
+
+# qhasm: ap+=16
+# asm 1: add  $16,<ap=int64#1
+# asm 2: add  $16,<ap=%rdi
+add  $16,%rdi
+
+# qhasm: tp+=32
+# asm 1: add  $32,<tp=int64#3
+# asm 2: add  $32,<tp=%rdx
+add  $32,%rdx
+
+# qhasm: unsigned>? ctrj-=1
+# asm 1: sub  $1,<ctrj=int64#2
+# asm 2: sub  $1,<ctrj=%rsi
+sub  $1,%rsi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto loop8910j if unsigned>
+ja ._loop8910j
+
+# qhasm: return
+add %r11,%rsp
+ret
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/omegas.c b/crypt/liboqs/kex_rlwe_newhope/avx2/omegas.c
new file mode 100644
index 0000000000000000000000000000000000000000..8f9733158ae3b91bc48ce68f568d90dbee8a7696
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/omegas.c
@@ -0,0 +1,463 @@
+double omegas_double[2300] = { 
+    1.0,1.0,1.0,10810.0,1.0,7143.0,1.0,4043.0,1.0,10984.0,
+    1.0,722.0,1.0,5736.0,1.0,8155.0,1.0,3542.0,1.0,8785.0,
+    1.0,9744.0,1.0,3621.0,1.0,10643.0,1.0,1212.0,1.0,3195.0,
+    1.0,5860.0,1.0,7468.0,1.0,2639.0,1.0,9664.0,1.0,11340.0,
+    1.0,11726.0,1.0,9314.0,1.0,9283.0,1.0,9545.0,1.0,5728.0,
+    1.0,7698.0,1.0,5023.0,1.0,5828.0,1.0,8961.0,1.0,6512.0,
+    1.0,7311.0,1.0,1351.0,1.0,2319.0,1.0,11119.0,1.0,11334.0,
+    1.0,11499.0,1.0,9088.0,1.0,3014.0,1.0,5086.0,1.0,10963.0,
+    1.0,4846.0,1.0,9542.0,1.0,9154.0,1.0,3712.0,1.0,4805.0,
+    1.0,8736.0,1.0,11227.0,1.0,9995.0,1.0,3091.0,1.0,12208.0,
+    1.0,7969.0,1.0,11289.0,1.0,9326.0,1.0,7393.0,1.0,9238.0,
+    1.0,2366.0,1.0,11112.0,1.0,8034.0,1.0,10654.0,1.0,9521.0,
+    1.0,12149.0,1.0,10436.0,1.0,7678.0,1.0,11563.0,1.0,1260.0,
+    1.0,4388.0,1.0,4632.0,1.0,6534.0,1.0,2426.0,1.0,334.0,
+    1.0,1428.0,1.0,1696.0,1.0,2013.0,1.0,9000.0,1.0,729.0,
+    1.0,3241.0,1.0,2881.0,1.0,3284.0,1.0,7197.0,1.0,10200.0,
+    1.0,8595.0,1.0,7110.0,1.0,10530.0,1.0,8582.0,1.0,3382.0,
+    1.0,11934.0,1.0,9741.0,1.0,8058.0,1.0,3637.0,1.0,3459.0,
+    1.0,145.0,1.0,6747.0,1.0,9558.0,1.0,8357.0,1.0,7399.0,
+    1.0,6378.0,1.0,9447.0,1.0,480.0,1.0,1022.0,1.0,9.0,
+    1.0,9821.0,1.0,339.0,1.0,5791.0,1.0,544.0,1.0,10616.0,
+    1.0,4278.0,1.0,6958.0,1.0,7300.0,1.0,8112.0,1.0,8705.0,
+    1.0,1381.0,1.0,9764.0,1.0,11336.0,1.0,8541.0,1.0,827.0,
+    1.0,5767.0,1.0,2476.0,1.0,118.0,1.0,2197.0,1.0,7222.0,
+    1.0,3949.0,1.0,8993.0,1.0,4452.0,1.0,2396.0,1.0,7935.0,
+    1.0,130.0,1.0,2837.0,1.0,6915.0,1.0,2401.0,1.0,442.0,
+    1.0,7188.0,1.0,11222.0,1.0,390.0,1.0,773.0,1.0,8456.0,
+    1.0,3778.0,1.0,354.0,1.0,4861.0,1.0,9377.0,1.0,5698.0,
+    1.0,5012.0,1.0,9808.0,1.0,2859.0,1.0,11244.0,1.0,1017.0,
+    1.0,7404.0,1.0,1632.0,1.0,7205.0,1.0,27.0,1.0,9223.0,
+    1.0,8526.0,1.0,10849.0,1.0,1537.0,1.0,242.0,1.0,4714.0,
+    1.0,8146.0,1.0,9611.0,1.0,3704.0,1.0,5019.0,1.0,11744.0,
+    1.0,1002.0,1.0,5011.0,1.0,5088.0,1.0,8005.0,1.0,7313.0,
+    1.0,10682.0,1.0,8509.0,1.0,11414.0,1.0,9852.0,1.0,3646.0,
+    1.0,6022.0,1.0,2987.0,1.0,9723.0,1.0,10102.0,1.0,6250.0,
+    1.0,9867.0,1.0,11224.0,1.0,2143.0,1.0,11885.0,1.0,7644.0,
+    1.0,1168.0,1.0,5277.0,1.0,11082.0,1.0,3248.0,1.0,493.0,
+    1.0,8193.0,1.0,6845.0,1.0,2381.0,1.0,7952.0,1.0,11854.0,
+    1.0,1378.0,1.0,1912.0,1.0,2166.0,1.0,3915.0,1.0,12176.0,
+    1.0,7370.0,1.0,12129.0,1.0,3149.0,1.0,12286.0,1.0,4437.0,
+    1.0,3636.0,1.0,4938.0,1.0,5291.0,1.0,2704.0,1.0,10863.0,
+    1.0,7635.0,1.0,1663.0,1.0,10512.0,1.0,3364.0,1.0,1689.0,
+    1.0,4057.0,1.0,9018.0,1.0,9442.0,1.0,7875.0,1.0,2174.0,
+    1.0,4372.0,1.0,7247.0,1.0,9984.0,1.0,4053.0,1.0,2645.0,
+    1.0,5195.0,1.0,9509.0,1.0,7394.0,1.0,1484.0,1.0,9042.0,
+    1.0,9603.0,1.0,8311.0,1.0,9320.0,1.0,9919.0,1.0,2865.0,
+    1.0,5332.0,1.0,3510.0,1.0,1630.0,1.0,10163.0,1.0,5407.0,
+    1.0,3186.0,1.0,11136.0,1.0,9405.0,1.0,10040.0,1.0,8241.0,
+    1.0,9890.0,1.0,8889.0,1.0,7098.0,1.0,9153.0,1.0,9289.0,
+    1.0,671.0,1.0,3016.0,1.0,243.0,1.0,6730.0,1.0,420.0,
+    1.0,10111.0,1.0,1544.0,1.0,3985.0,1.0,4905.0,1.0,3531.0,
+    1.0,476.0,1.0,49.0,1.0,1263.0,1.0,5915.0,1.0,1483.0,
+    1.0,9789.0,1.0,10800.0,1.0,10706.0,1.0,6347.0,1.0,1512.0,
+    1.0,350.0,1.0,10474.0,1.0,5383.0,1.0,5369.0,1.0,10232.0,
+    1.0,9087.0,1.0,4493.0,1.0,9551.0,1.0,6421.0,1.0,6554.0,
+    1.0,2655.0,1.0,9280.0,1.0,1693.0,1.0,174.0,1.0,723.0,
+    1.0,10314.0,1.0,8532.0,1.0,347.0,1.0,2925.0,1.0,8974.0,
+    1.0,11863.0,1.0,1858.0,1.0,4754.0,1.0,3030.0,1.0,4115.0,
+    1.0,2361.0,1.0,10446.0,1.0,2908.0,1.0,218.0,1.0,3434.0,
+    1.0,8760.0,1.0,3963.0,1.0,576.0,1.0,6142.0,1.0,9842.0,
+    1.0,1954.0,1.0,10238.0,1.0,9407.0,1.0,10484.0,1.0,3991.0,
+    1.0,8320.0,1.0,9522.0,1.0,156.0,1.0,2281.0,1.0,5876.0,
+    1.0,10258.0,1.0,5333.0,1.0,3772.0,1.0,418.0,1.0,5908.0,
+    1.0,11836.0,1.0,5429.0,1.0,7515.0,1.0,7552.0,1.0,1293.0,
+    1.0,295.0,1.0,6099.0,1.0,5766.0,1.0,652.0,1.0,8273.0,
+    1.0,4077.0,1.0,8527.0,1.0,9370.0,1.0,325.0,1.0,10885.0,
+    1.0,11143.0,1.0,11341.0,1.0,5990.0,1.0,1159.0,1.0,8561.0,
+    1.0,8240.0,1.0,3329.0,1.0,4298.0,1.0,12121.0,1.0,2692.0,
+    1.0,5961.0,1.0,7183.0,1.0,10327.0,1.0,1594.0,1.0,6167.0,
+    1.0,9734.0,1.0,7105.0,1.0,11089.0,1.0,1360.0,1.0,3956.0,
+    1.0,6170.0,1.0,5297.0,1.0,8210.0,1.0,11231.0,1.0,922.0,
+    1.0,441.0,1.0,1958.0,1.0,4322.0,1.0,1112.0,1.0,2078.0,
+    1.0,4046.0,1.0,709.0,1.0,9139.0,1.0,1319.0,1.0,4240.0,
+    1.0,8719.0,1.0,6224.0,1.0,11454.0,1.0,2459.0,1.0,683.0,
+    1.0,3656.0,1.0,12225.0,1.0,10723.0,1.0,5782.0,1.0,9341.0,
+    1.0,9786.0,1.0,9166.0,1.0,10542.0,1.0,9235.0,1.0,6803.0,
+    1.0,7856.0,1.0,6370.0,1.0,3834.0,1.0,7032.0,1.0,7048.0,
+    1.0,9369.0,1.0,8120.0,1.0,9162.0,1.0,6821.0,1.0,1010.0,
+    1.0,8807.0,1.0,787.0,1.0,5057.0,1.0,4698.0,1.0,4780.0,
+    1.0,8844.0,1.0,12097.0,1.0,1321.0,1.0,4912.0,1.0,10240.0,
+    1.0,677.0,1.0,6415.0,1.0,6234.0,1.0,8953.0,1.0,1323.0,
+    1.0,9523.0,1.0,12237.0,1.0,3174.0,1.0,1579.0,1.0,11858.0,
+    1.0,9784.0,1.0,5906.0,1.0,3957.0,1.0,9450.0,1.0,151.0,
+    1.0,10162.0,1.0,12231.0,1.0,12048.0,1.0,3532.0,1.0,11286.0,
+    1.0,1956.0,1.0,7280.0,1.0,11404.0,1.0,6281.0,1.0,3477.0,
+    1.0,6608.0,1.0,142.0,1.0,11184.0,1.0,9445.0,1.0,3438.0,
+    1.0,11314.0,1.0,4212.0,1.0,9260.0,1.0,6695.0,1.0,4782.0,
+    1.0,5886.0,1.0,8076.0,1.0,504.0,1.0,2302.0,1.0,11684.0,
+    1.0,11868.0,1.0,8209.0,1.0,3602.0,1.0,6068.0,1.0,8689.0,
+    1.0,3263.0,1.0,6077.0,1.0,7665.0,1.0,7822.0,1.0,7500.0,
+    1.0,6752.0,1.0,4749.0,1.0,4449.0,1.0,6833.0,1.0,12142.0,
+    1.0,8500.0,1.0,6118.0,1.0,8471.0,1.0,1190.0,1.0,9606.0,
+    1.0,3860.0,1.0,5445.0,1.0,7753.0,1.0,11239.0,1.0,5079.0,
+    1.0,9027.0,1.0,2169.0,1.0,11767.0,1.0,7965.0,1.0,4916.0,
+    1.0,8214.0,1.0,5315.0,1.0,11011.0,1.0,9945.0,1.0,1973.0,
+    1.0,6715.0,1.0,8775.0,1.0,11248.0,1.0,5925.0,1.0,11271.0,
+    1.0,654.0,1.0,3565.0,1.0,1702.0,1.0,1987.0,1.0,6760.0,
+    1.0,5206.0,1.0,3199.0,1.0,12233.0,1.0,6136.0,1.0,6427.0,
+    1.0,6874.0,1.0,8646.0,1.0,4948.0,1.0,6152.0,1.0,400.0,
+    1.0,10561.0,1.0,5339.0,1.0,5446.0,1.0,3710.0,1.0,6093.0,
+    1.0,468.0,1.0,8301.0,1.0,316.0,1.0,11907.0,1.0,10256.0,
+    1.0,8291.0,1.0,3879.0,1.0,1922.0,1.0,10930.0,1.0,6854.0,
+    1.0,973.0,1.0,11035.0,1.0,1.0,1.0,1.0,1.0,1.0,
+    10810.0,10810.0,1.0,1.0,7143.0,7143.0,1.0,1.0,4043.0,4043.0,
+    1.0,10810.0,1.0,1.0,1.0,10984.0,10984.0,1.0,1.0,722.0,
+    722.0,1.0,1.0,5736.0,5736.0,1.0,1.0,8155.0,8155.0,7143.0,
+    4043.0,10810.0,1.0,1.0,3542.0,3542.0,1.0,1.0,8785.0,8785.0,
+    1.0,1.0,9744.0,9744.0,1.0,1.0,3621.0,3621.0,10984.0,722.0,
+    7143.0,1.0,1.0,10643.0,10643.0,1.0,1.0,1212.0,1212.0,1.0,
+    1.0,3195.0,3195.0,1.0,1.0,5860.0,5860.0,5736.0,8155.0,4043.0,
+    1.0,1.0,7468.0,7468.0,1.0,1.0,2639.0,2639.0,1.0,1.0,
+    9664.0,9664.0,1.0,1.0,11340.0,11340.0,3542.0,8785.0,10984.0,1.0,
+    1.0,11726.0,11726.0,1.0,1.0,9314.0,9314.0,1.0,1.0,9283.0,
+    9283.0,1.0,1.0,9545.0,9545.0,9744.0,3621.0,722.0,1.0,1.0,
+    5728.0,5728.0,1.0,1.0,7698.0,7698.0,1.0,1.0,5023.0,5023.0,
+    1.0,1.0,5828.0,5828.0,10643.0,1212.0,5736.0,1.0,1.0,8961.0,
+    8961.0,1.0,1.0,6512.0,6512.0,1.0,1.0,7311.0,7311.0,1.0,
+    1.0,1351.0,1351.0,3195.0,5860.0,8155.0,1.0,1.0,2319.0,2319.0,
+    1.0,1.0,11119.0,11119.0,1.0,1.0,11334.0,11334.0,1.0,1.0,
+    11499.0,11499.0,7468.0,2639.0,3542.0,1.0,1.0,9088.0,9088.0,1.0,
+    1.0,3014.0,3014.0,1.0,1.0,5086.0,5086.0,1.0,1.0,10963.0,
+    10963.0,9664.0,11340.0,8785.0,1.0,1.0,4846.0,4846.0,1.0,1.0,
+    9542.0,9542.0,1.0,1.0,9154.0,9154.0,1.0,1.0,3712.0,3712.0,
+    11726.0,9314.0,9744.0,1.0,1.0,4805.0,4805.0,1.0,1.0,8736.0,
+    8736.0,1.0,1.0,11227.0,11227.0,1.0,1.0,9995.0,9995.0,9283.0,
+    9545.0,3621.0,1.0,1.0,3091.0,3091.0,1.0,1.0,12208.0,12208.0,
+    1.0,1.0,7969.0,7969.0,1.0,1.0,11289.0,11289.0,5728.0,7698.0,
+    10643.0,1.0,1.0,9326.0,9326.0,1.0,1.0,7393.0,7393.0,1.0,
+    1.0,9238.0,9238.0,1.0,1.0,2366.0,2366.0,5023.0,5828.0,1212.0,
+    1.0,1.0,11112.0,11112.0,1.0,1.0,8034.0,8034.0,1.0,1.0,
+    10654.0,10654.0,1.0,1.0,9521.0,9521.0,8961.0,6512.0,3195.0,1.0,
+    1.0,12149.0,12149.0,1.0,1.0,10436.0,10436.0,1.0,1.0,7678.0,
+    7678.0,1.0,1.0,11563.0,11563.0,7311.0,1351.0,5860.0,1.0,1.0,
+    1260.0,1260.0,1.0,1.0,4388.0,4388.0,1.0,1.0,4632.0,4632.0,
+    1.0,1.0,6534.0,6534.0,2319.0,11119.0,7468.0,1.0,1.0,2426.0,
+    2426.0,1.0,1.0,334.0,334.0,1.0,1.0,1428.0,1428.0,1.0,
+    1.0,1696.0,1696.0,11334.0,11499.0,2639.0,1.0,1.0,2013.0,2013.0,
+    1.0,1.0,9000.0,9000.0,1.0,1.0,729.0,729.0,1.0,1.0,
+    3241.0,3241.0,9088.0,3014.0,9664.0,1.0,1.0,2881.0,2881.0,1.0,
+    1.0,3284.0,3284.0,1.0,1.0,7197.0,7197.0,1.0,1.0,10200.0,
+    10200.0,5086.0,10963.0,11340.0,1.0,1.0,8595.0,8595.0,1.0,1.0,
+    7110.0,7110.0,1.0,1.0,10530.0,10530.0,1.0,1.0,8582.0,8582.0,
+    4846.0,9542.0,11726.0,1.0,1.0,3382.0,3382.0,1.0,1.0,11934.0,
+    11934.0,1.0,1.0,9741.0,9741.0,1.0,1.0,8058.0,8058.0,9154.0,
+    3712.0,9314.0,1.0,1.0,3637.0,3637.0,1.0,1.0,3459.0,3459.0,
+    1.0,1.0,145.0,145.0,1.0,1.0,6747.0,6747.0,4805.0,8736.0,
+    9283.0,1.0,1.0,9558.0,9558.0,1.0,1.0,8357.0,8357.0,1.0,
+    1.0,7399.0,7399.0,1.0,1.0,6378.0,6378.0,11227.0,9995.0,9545.0,
+    1.0,1.0,9447.0,9447.0,1.0,1.0,480.0,480.0,1.0,1.0,
+    1022.0,1022.0,1.0,1.0,9.0,9.0,3091.0,12208.0,5728.0,1.0,
+    1.0,9821.0,9821.0,1.0,1.0,339.0,339.0,1.0,1.0,5791.0,
+    5791.0,1.0,1.0,544.0,544.0,7969.0,11289.0,7698.0,1.0,1.0,
+    10616.0,10616.0,1.0,1.0,4278.0,4278.0,1.0,1.0,6958.0,6958.0,
+    1.0,1.0,7300.0,7300.0,9326.0,7393.0,5023.0,1.0,1.0,8112.0,
+    8112.0,1.0,1.0,8705.0,8705.0,1.0,1.0,1381.0,1381.0,1.0,
+    1.0,9764.0,9764.0,9238.0,2366.0,5828.0,1.0,1.0,11336.0,11336.0,
+    1.0,1.0,8541.0,8541.0,1.0,1.0,827.0,827.0,1.0,1.0,
+    5767.0,5767.0,11112.0,8034.0,8961.0,1.0,1.0,2476.0,2476.0,1.0,
+    1.0,118.0,118.0,1.0,1.0,2197.0,2197.0,1.0,1.0,7222.0,
+    7222.0,10654.0,9521.0,6512.0,1.0,1.0,3949.0,3949.0,1.0,1.0,
+    8993.0,8993.0,1.0,1.0,4452.0,4452.0,1.0,1.0,2396.0,2396.0,
+    12149.0,10436.0,7311.0,1.0,1.0,7935.0,7935.0,1.0,1.0,130.0,
+    130.0,1.0,1.0,2837.0,2837.0,1.0,1.0,6915.0,6915.0,7678.0,
+    11563.0,1351.0,1.0,1.0,2401.0,2401.0,1.0,1.0,442.0,442.0,
+    1.0,1.0,7188.0,7188.0,1.0,1.0,11222.0,11222.0,1260.0,4388.0,
+    2319.0,1.0,1.0,390.0,390.0,1.0,1.0,773.0,773.0,1.0,
+    1.0,8456.0,8456.0,1.0,1.0,3778.0,3778.0,4632.0,6534.0,11119.0,
+    1.0,1.0,354.0,354.0,1.0,1.0,4861.0,4861.0,1.0,1.0,
+    9377.0,9377.0,1.0,1.0,5698.0,5698.0,2426.0,334.0,11334.0,1.0,
+    1.0,5012.0,5012.0,1.0,1.0,9808.0,9808.0,1.0,1.0,2859.0,
+    2859.0,1.0,1.0,11244.0,11244.0,1428.0,1696.0,11499.0,1.0,1.0,
+    1017.0,1017.0,1.0,1.0,7404.0,7404.0,1.0,1.0,1632.0,1632.0,
+    1.0,1.0,7205.0,7205.0,2013.0,9000.0,9088.0,1.0,1.0,27.0,
+    27.0,1.0,1.0,9223.0,9223.0,1.0,1.0,8526.0,8526.0,1.0,
+    1.0,10849.0,10849.0,729.0,3241.0,3014.0,1.0,1.0,1537.0,1537.0,
+    1.0,1.0,242.0,242.0,1.0,1.0,4714.0,4714.0,1.0,1.0,
+    8146.0,8146.0,2881.0,3284.0,5086.0,1.0,1.0,9611.0,9611.0,1.0,
+    1.0,3704.0,3704.0,1.0,1.0,5019.0,5019.0,1.0,1.0,11744.0,
+    11744.0,7197.0,10200.0,10963.0,1.0,1.0,1002.0,1002.0,1.0,1.0,
+    5011.0,5011.0,1.0,1.0,5088.0,5088.0,1.0,1.0,8005.0,8005.0,
+    8595.0,7110.0,4846.0,1.0,1.0,7313.0,7313.0,1.0,1.0,10682.0,
+    10682.0,1.0,1.0,8509.0,8509.0,1.0,1.0,11414.0,11414.0,10530.0,
+    8582.0,9542.0,1.0,1.0,9852.0,9852.0,1.0,1.0,3646.0,3646.0,
+    1.0,1.0,6022.0,6022.0,1.0,1.0,2987.0,2987.0,3382.0,11934.0,
+    9154.0,1.0,1.0,9723.0,9723.0,1.0,1.0,10102.0,10102.0,1.0,
+    1.0,6250.0,6250.0,1.0,1.0,9867.0,9867.0,9741.0,8058.0,3712.0,
+    1.0,1.0,11224.0,11224.0,1.0,1.0,2143.0,2143.0,1.0,1.0,
+    11885.0,11885.0,1.0,1.0,7644.0,7644.0,3637.0,3459.0,4805.0,1.0,
+    1.0,1168.0,1168.0,1.0,1.0,5277.0,5277.0,1.0,1.0,11082.0,
+    11082.0,1.0,1.0,3248.0,3248.0,145.0,6747.0,8736.0,1.0,1.0,
+    493.0,493.0,1.0,1.0,8193.0,8193.0,1.0,1.0,6845.0,6845.0,
+    1.0,1.0,2381.0,2381.0,9558.0,8357.0,11227.0,1.0,1.0,7952.0,
+    7952.0,1.0,1.0,11854.0,11854.0,1.0,1.0,1378.0,1378.0,1.0,
+    1.0,1912.0,1912.0,7399.0,6378.0,9995.0,1.0,1.0,2166.0,2166.0,
+    1.0,1.0,3915.0,3915.0,1.0,1.0,12176.0,12176.0,1.0,1.0,
+    7370.0,7370.0,9447.0,480.0,3091.0,1.0,1.0,12129.0,12129.0,1.0,
+    1.0,3149.0,3149.0,1.0,1.0,12286.0,12286.0,1.0,1.0,4437.0,
+    4437.0,1022.0,9.0,12208.0,1.0,1.0,3636.0,3636.0,1.0,1.0,
+    4938.0,4938.0,1.0,1.0,5291.0,5291.0,1.0,1.0,2704.0,2704.0,
+    9821.0,339.0,7969.0,1.0,1.0,10863.0,10863.0,1.0,1.0,7635.0,
+    7635.0,1.0,1.0,1663.0,1663.0,1.0,1.0,10512.0,10512.0,5791.0,
+    544.0,11289.0,1.0,1.0,3364.0,3364.0,1.0,1.0,1689.0,1689.0,
+    1.0,1.0,4057.0,4057.0,1.0,1.0,9018.0,9018.0,10616.0,4278.0,
+    9326.0,1.0,1.0,9442.0,9442.0,1.0,1.0,7875.0,7875.0,1.0,
+    1.0,2174.0,2174.0,1.0,1.0,4372.0,4372.0,6958.0,7300.0,7393.0,
+    1.0,1.0,7247.0,7247.0,1.0,1.0,9984.0,9984.0,1.0,1.0,
+    4053.0,4053.0,1.0,1.0,2645.0,2645.0,8112.0,8705.0,9238.0,1.0,
+    1.0,5195.0,5195.0,1.0,1.0,9509.0,9509.0,1.0,1.0,7394.0,
+    7394.0,1.0,1.0,1484.0,1484.0,1381.0,9764.0,2366.0,1.0,1.0,
+    9042.0,9042.0,1.0,1.0,9603.0,9603.0,1.0,1.0,8311.0,8311.0,
+    1.0,1.0,9320.0,9320.0,11336.0,8541.0,11112.0,1.0,1.0,9919.0,
+    9919.0,1.0,1.0,2865.0,2865.0,1.0,1.0,5332.0,5332.0,1.0,
+    1.0,3510.0,3510.0,827.0,5767.0,8034.0,1.0,1.0,1630.0,1630.0,
+    1.0,1.0,10163.0,10163.0,1.0,1.0,5407.0,5407.0,1.0,1.0,
+    3186.0,3186.0,2476.0,118.0,10654.0,1.0,1.0,11136.0,11136.0,1.0,
+    1.0,9405.0,9405.0,1.0,1.0,10040.0,10040.0,1.0,1.0,8241.0,
+    8241.0,2197.0,7222.0,9521.0,1.0,1.0,9890.0,9890.0,1.0,1.0,
+    8889.0,8889.0,1.0,1.0,7098.0,7098.0,1.0,1.0,9153.0,9153.0,
+    3949.0,8993.0,12149.0,1.0,1.0,9289.0,9289.0,1.0,1.0,671.0,
+    671.0,1.0,1.0,3016.0,3016.0,1.0,1.0,243.0,243.0,4452.0,
+    2396.0,10436.0,1.0,1.0,6730.0,6730.0,1.0,1.0,420.0,420.0,
+    1.0,1.0,10111.0,10111.0,1.0,1.0,1544.0,1544.0,7935.0,130.0,
+    7678.0,1.0,1.0,3985.0,3985.0,1.0,1.0,4905.0,4905.0,1.0,
+    1.0,3531.0,3531.0,1.0,1.0,476.0,476.0,2837.0,6915.0,11563.0,
+    1.0,10810.0,1.0,7143.0,4043.0,10810.0,1.0,10984.0,722.0,7143.0,
+    5736.0,8155.0,4043.0,10810.0,3542.0,8785.0,10984.0,9744.0,3621.0,722.0,
+    7143.0,10643.0,1212.0,5736.0,3195.0,5860.0,8155.0,4043.0,7468.0,2639.0,
+    3542.0,9664.0,11340.0,8785.0,10984.0,11726.0,9314.0,9744.0,9283.0,9545.0,
+    3621.0,722.0,5728.0,7698.0,10643.0,5023.0,5828.0,1212.0,5736.0,8961.0,
+    6512.0,3195.0,7311.0,1351.0,5860.0,8155.0,10810.0,7143.0,4043.0,10810.0};
+
+double omegas_inv_double[2300] = { 
+    1.0, 1.0,1.0,1479.0,1.0,8246.0,1.0,5146.0,1.0,4134.0,1.0,
+    6553.0,1.0,11567.0,1.0,1305.0,1.0,6429.0,1.0,9094.0,1.0,
+    11077.0,1.0,1646.0,1.0,8668.0,1.0,2545.0,1.0,3504.0,1.0,
+    8747.0,1.0,10938.0,1.0,4978.0,1.0,5777.0,1.0,3328.0,1.0,
+    6461.0,1.0,7266.0,1.0,4591.0,1.0,6561.0,1.0,2744.0,1.0,
+    3006.0,1.0,2975.0,1.0,563.0,1.0,949.0,1.0,2625.0,1.0,
+    9650.0,1.0,4821.0,1.0,726.0,1.0,4611.0,1.0,1853.0,1.0,
+    140.0,1.0,2768.0,1.0,1635.0,1.0,4255.0,1.0,1177.0,1.0,
+    9923.0,1.0,3051.0,1.0,4896.0,1.0,2963.0,1.0,1000.0,1.0,
+    4320.0,1.0,81.0,1.0,9198.0,1.0,2294.0,1.0,1062.0,1.0,
+    3553.0,1.0,7484.0,1.0,8577.0,1.0,3135.0,1.0,2747.0,1.0,
+    7443.0,1.0,1326.0,1.0,7203.0,1.0,9275.0,1.0,3201.0,1.0,
+    790.0,1.0,955.0,1.0,1170.0,1.0,9970.0,1.0,5374.0,1.0,
+    9452.0,1.0,12159.0,1.0,4354.0,1.0,9893.0,1.0,7837.0,1.0,
+    3296.0,1.0,8340.0,1.0,5067.0,1.0,10092.0,1.0,12171.0,1.0,
+    9813.0,1.0,6522.0,1.0,11462.0,1.0,3748.0,1.0,953.0,1.0,
+    2525.0,1.0,10908.0,1.0,3584.0,1.0,4177.0,1.0,4989.0,1.0,
+    5331.0,1.0,8011.0,1.0,1673.0,1.0,11745.0,1.0,6498.0,1.0,
+    11950.0,1.0,2468.0,1.0,12280.0,1.0,11267.0,1.0,11809.0,1.0,
+    2842.0,1.0,5911.0,1.0,4890.0,1.0,3932.0,1.0,2731.0,1.0,
+    5542.0,1.0,12144.0,1.0,8830.0,1.0,8652.0,1.0,4231.0,1.0,
+    2548.0,1.0,355.0,1.0,8907.0,1.0,3707.0,1.0,1759.0,1.0,
+    5179.0,1.0,3694.0,1.0,2089.0,1.0,5092.0,1.0,9005.0,1.0,
+    9408.0,1.0,9048.0,1.0,11560.0,1.0,3289.0,1.0,10276.0,1.0,
+    10593.0,1.0,10861.0,1.0,11955.0,1.0,9863.0,1.0,5755.0,1.0,
+    7657.0,1.0,7901.0,1.0,11029.0,1.0,11813.0,1.0,8758.0,1.0,
+    7384.0,1.0,8304.0,1.0,10745.0,1.0,2178.0,1.0,11869.0,1.0,
+    5559.0,1.0,12046.0,1.0,9273.0,1.0,11618.0,1.0,3000.0,1.0,
+    3136.0,1.0,5191.0,1.0,3400.0,1.0,2399.0,1.0,4048.0,1.0,
+    2249.0,1.0,2884.0,1.0,1153.0,1.0,9103.0,1.0,6882.0,1.0,
+    2126.0,1.0,10659.0,1.0,8779.0,1.0,6957.0,1.0,9424.0,1.0,
+    2370.0,1.0,2969.0,1.0,3978.0,1.0,2686.0,1.0,3247.0,1.0,
+    10805.0,1.0,4895.0,1.0,2780.0,1.0,7094.0,1.0,9644.0,1.0,
+    8236.0,1.0,2305.0,1.0,5042.0,1.0,7917.0,1.0,10115.0,1.0,
+    4414.0,1.0,2847.0,1.0,3271.0,1.0,8232.0,1.0,10600.0,1.0,
+    8925.0,1.0,1777.0,1.0,10626.0,1.0,4654.0,1.0,1426.0,1.0,
+    9585.0,1.0,6998.0,1.0,7351.0,1.0,8653.0,1.0,7852.0,1.0,
+    3.0,1.0,9140.0,1.0,160.0,1.0,4919.0,1.0,113.0,1.0,
+    8374.0,1.0,10123.0,1.0,10377.0,1.0,10911.0,1.0,435.0,1.0,
+    4337.0,1.0,9908.0,1.0,5444.0,1.0,4096.0,1.0,11796.0,1.0,
+    9041.0,1.0,1207.0,1.0,7012.0,1.0,11121.0,1.0,4645.0,1.0,
+    404.0,1.0,10146.0,1.0,1065.0,1.0,2422.0,1.0,6039.0,1.0,
+    2187.0,1.0,2566.0,1.0,9302.0,1.0,6267.0,1.0,8643.0,1.0,
+    2437.0,1.0,875.0,1.0,3780.0,1.0,1607.0,1.0,4976.0,1.0,
+    4284.0,1.0,7201.0,1.0,7278.0,1.0,11287.0,1.0,545.0,1.0,
+    7270.0,1.0,8585.0,1.0,2678.0,1.0,4143.0,1.0,7575.0,1.0,
+    12047.0,1.0,10752.0,1.0,1440.0,1.0,3763.0,1.0,3066.0,1.0,
+    12262.0,1.0,5084.0,1.0,10657.0,1.0,4885.0,1.0,11272.0,1.0,
+    1045.0,1.0,9430.0,1.0,2481.0,1.0,7277.0,1.0,6591.0,1.0,
+    2912.0,1.0,7428.0,1.0,11935.0,1.0,8511.0,1.0,3833.0,1.0,
+    11516.0,1.0,11899.0,1.0,1067.0,1.0,5101.0,1.0,11847.0,1.0,
+    9888.0,1.0,1254.0,1.0,11316.0,1.0,5435.0,1.0,1359.0,1.0,
+    10367.0,1.0,8410.0,1.0,3998.0,1.0,2033.0,1.0,382.0,1.0,
+    11973.0,1.0,3988.0,1.0,11821.0,1.0,6196.0,1.0,8579.0,1.0,
+    6843.0,1.0,6950.0,1.0,1728.0,1.0,11889.0,1.0,6137.0,1.0,
+    7341.0,1.0,3643.0,1.0,5415.0,1.0,5862.0,1.0,6153.0,1.0,
+    56.0,1.0,9090.0,1.0,7083.0,1.0,5529.0,1.0,10302.0,1.0,
+    10587.0,1.0,8724.0,1.0,11635.0,1.0,1018.0,1.0,6364.0,1.0,
+    1041.0,1.0,3514.0,1.0,5574.0,1.0,10316.0,1.0,2344.0,1.0,
+    1278.0,1.0,6974.0,1.0,4075.0,1.0,7373.0,1.0,4324.0,1.0,
+    522.0,1.0,10120.0,1.0,3262.0,1.0,7210.0,1.0,1050.0,1.0,
+    4536.0,1.0,6844.0,1.0,8429.0,1.0,2683.0,1.0,11099.0,1.0,
+    3818.0,1.0,6171.0,1.0,3789.0,1.0,147.0,1.0,5456.0,1.0,
+    7840.0,1.0,7540.0,1.0,5537.0,1.0,4789.0,1.0,4467.0,1.0,
+    4624.0,1.0,6212.0,1.0,9026.0,1.0,3600.0,1.0,6221.0,1.0,
+    8687.0,1.0,4080.0,1.0,421.0,1.0,605.0,1.0,9987.0,1.0,
+    11785.0,1.0,4213.0,1.0,6403.0,1.0,7507.0,1.0,5594.0,1.0,
+    3029.0,1.0,8077.0,1.0,975.0,1.0,8851.0,1.0,2844.0,1.0,
+    1105.0,1.0,12147.0,1.0,5681.0,1.0,8812.0,1.0,6008.0,1.0,
+    885.0,1.0,5009.0,1.0,10333.0,1.0,1003.0,1.0,8757.0,1.0,
+    241.0,1.0,58.0,1.0,2127.0,1.0,12138.0,1.0,2839.0,1.0,
+    8332.0,1.0,6383.0,1.0,2505.0,1.0,431.0,1.0,10710.0,1.0,
+    9115.0,1.0,52.0,1.0,2766.0,1.0,10966.0,1.0,3336.0,1.0,
+    6055.0,1.0,5874.0,1.0,11612.0,1.0,2049.0,1.0,7377.0,1.0,
+    10968.0,1.0,192.0,1.0,3445.0,1.0,7509.0,1.0,7591.0,1.0,
+    7232.0,1.0,11502.0,1.0,3482.0,1.0,11279.0,1.0,5468.0,1.0,
+    3127.0,1.0,4169.0,1.0,2920.0,1.0,5241.0,1.0,5257.0,1.0,
+    8455.0,1.0,5919.0,1.0,4433.0,1.0,5486.0,1.0,3054.0,1.0,
+    1747.0,1.0,3123.0,1.0,2503.0,1.0,2948.0,1.0,6507.0,1.0,
+    1566.0,1.0,64.0,1.0,8633.0,1.0,11606.0,1.0,9830.0,1.0,
+    835.0,1.0,6065.0,1.0,3570.0,1.0,8049.0,1.0,10970.0,1.0,
+    3150.0,1.0,11580.0,1.0,8243.0,1.0,10211.0,1.0,11177.0,1.0,
+    7967.0,1.0,10331.0,1.0,11848.0,1.0,11367.0,1.0,1058.0,1.0,
+    4079.0,1.0,6992.0,1.0,6119.0,1.0,8333.0,1.0,10929.0,1.0,
+    1200.0,1.0,5184.0,1.0,2555.0,1.0,6122.0,1.0,10695.0,1.0,
+    1962.0,1.0,5106.0,1.0,6328.0,1.0,9597.0,1.0,168.0,1.0,
+    7991.0,1.0,8960.0,1.0,4049.0,1.0,3728.0,1.0,11130.0,1.0,
+    6299.0,1.0,948.0,1.0,1146.0,1.0,1404.0,1.0,11964.0,1.0,
+    2919.0,1.0,3762.0,1.0,8212.0,1.0,4016.0,1.0,11637.0,1.0,
+    6523.0,1.0,6190.0,1.0,11994.0,1.0,10996.0,1.0,4737.0,1.0,
+    4774.0,1.0,6860.0,1.0,453.0,1.0,6381.0,1.0,11871.0,1.0,
+    8517.0,1.0,6956.0,1.0,2031.0,1.0,6413.0,1.0,10008.0,1.0,
+    12133.0,1.0,2767.0,1.0,3969.0,1.0,8298.0,1.0,1805.0,1.0,
+    2882.0,1.0,2051.0,1.0,10335.0,1.0,2447.0,1.0,6147.0,1.0,
+    11713.0,1.0,8326.0,1.0,3529.0,1.0,8855.0,1.0,12071.0,1.0,
+    9381.0,1.0,1843.0,1.0,9928.0,1.0,8174.0,1.0,9259.0,1.0,
+    7535.0,1.0,10431.0,1.0,426.0,1.0,3315.0,1.0,9364.0,1.0,
+    11942.0,1.0,3757.0,1.0,1975.0,1.0,11566.0,1.0,12115.0,1.0,
+    10596.0,1.0,3009.0,1.0,9634.0,1.0,5735.0,1.0,5868.0,1.0,
+    2738.0,1.0,7796.0,1.0,3202.0,1.0,2057.0,1.0,6920.0,1.0,
+    6906.0,1.0,1815.0,1.0,11939.0,1.0,10777.0,1.0,5942.0,1.0,
+    1583.0,1.0,1489.0,1.0,2500.0,1.0,10806.0,1.0,6374.0,1.0,
+    11026.0,1.0,12240.0,1.0,1.0,1.0,1.0,1.0,1.0,1479.0,
+    1479.0,1.0,1.0,8246.0,8246.0,1.0,1.0,5146.0,5146.0,1.0,
+    1479.0,1.0,1.0,1.0,4134.0,4134.0,1.0,1.0,6553.0,6553.0,
+    1.0,1.0,11567.0,11567.0,1.0,1.0,1305.0,1305.0,8246.0,5146.0,
+    1479.0,1.0,1.0,6429.0,6429.0,1.0,1.0,9094.0,9094.0,1.0,
+    1.0,11077.0,11077.0,1.0,1.0,1646.0,1646.0,4134.0,6553.0,8246.0,
+    1.0,1.0,8668.0,8668.0,1.0,1.0,2545.0,2545.0,1.0,1.0,
+    3504.0,3504.0,1.0,1.0,8747.0,8747.0,11567.0,1305.0,5146.0,1.0,
+    1.0,10938.0,10938.0,1.0,1.0,4978.0,4978.0,1.0,1.0,5777.0,
+    5777.0,1.0,1.0,3328.0,3328.0,6429.0,9094.0,4134.0,1.0,1.0,
+    6461.0,6461.0,1.0,1.0,7266.0,7266.0,1.0,1.0,4591.0,4591.0,
+    1.0,1.0,6561.0,6561.0,11077.0,1646.0,6553.0,1.0,1.0,2744.0,
+    2744.0,1.0,1.0,3006.0,3006.0,1.0,1.0,2975.0,2975.0,1.0,
+    1.0,563.0,563.0,8668.0,2545.0,11567.0,1.0,1.0,949.0,949.0,
+    1.0,1.0,2625.0,2625.0,1.0,1.0,9650.0,9650.0,1.0,1.0,
+    4821.0,4821.0,3504.0,8747.0,1305.0,1.0,1.0,726.0,726.0,1.0,
+    1.0,4611.0,4611.0,1.0,1.0,1853.0,1853.0,1.0,1.0,140.0,
+    140.0,10938.0,4978.0,6429.0,1.0,1.0,2768.0,2768.0,1.0,1.0,
+    1635.0,1635.0,1.0,1.0,4255.0,4255.0,1.0,1.0,1177.0,1177.0,
+    5777.0,3328.0,9094.0,1.0,1.0,9923.0,9923.0,1.0,1.0,3051.0,
+    3051.0,1.0,1.0,4896.0,4896.0,1.0,1.0,2963.0,2963.0,6461.0,
+    7266.0,11077.0,1.0,1.0,1000.0,1000.0,1.0,1.0,4320.0,4320.0,
+    1.0,1.0,81.0,81.0,1.0,1.0,9198.0,9198.0,4591.0,6561.0,
+    1646.0,1.0,1.0,2294.0,2294.0,1.0,1.0,1062.0,1062.0,1.0,
+    1.0,3553.0,3553.0,1.0,1.0,7484.0,7484.0,2744.0,3006.0,8668.0,
+    1.0,1.0,8577.0,8577.0,1.0,1.0,3135.0,3135.0,1.0,1.0,
+    2747.0,2747.0,1.0,1.0,7443.0,7443.0,2975.0,563.0,2545.0,1.0,
+    1.0,1326.0,1326.0,1.0,1.0,7203.0,7203.0,1.0,1.0,9275.0,
+    9275.0,1.0,1.0,3201.0,3201.0,949.0,2625.0,3504.0,1.0,1.0,
+    790.0,790.0,1.0,1.0,955.0,955.0,1.0,1.0,1170.0,1170.0,
+    1.0,1.0,9970.0,9970.0,9650.0,4821.0,8747.0,1.0,1.0,5374.0,
+    5374.0,1.0,1.0,9452.0,9452.0,1.0,1.0,12159.0,12159.0,1.0,
+    1.0,4354.0,4354.0,726.0,4611.0,10938.0,1.0,1.0,9893.0,9893.0,
+    1.0,1.0,7837.0,7837.0,1.0,1.0,3296.0,3296.0,1.0,1.0,
+    8340.0,8340.0,1853.0,140.0,4978.0,1.0,1.0,5067.0,5067.0,1.0,
+    1.0,10092.0,10092.0,1.0,1.0,12171.0,12171.0,1.0,1.0,9813.0,
+    9813.0,2768.0,1635.0,5777.0,1.0,1.0,6522.0,6522.0,1.0,1.0,
+    11462.0,11462.0,1.0,1.0,3748.0,3748.0,1.0,1.0,953.0,953.0,
+    4255.0,1177.0,3328.0,1.0,1.0,2525.0,2525.0,1.0,1.0,10908.0,
+    10908.0,1.0,1.0,3584.0,3584.0,1.0,1.0,4177.0,4177.0,9923.0,
+    3051.0,6461.0,1.0,1.0,4989.0,4989.0,1.0,1.0,5331.0,5331.0,
+    1.0,1.0,8011.0,8011.0,1.0,1.0,1673.0,1673.0,4896.0,2963.0,
+    7266.0,1.0,1.0,11745.0,11745.0,1.0,1.0,6498.0,6498.0,1.0,
+    1.0,11950.0,11950.0,1.0,1.0,2468.0,2468.0,1000.0,4320.0,4591.0,
+    1.0,1.0,12280.0,12280.0,1.0,1.0,11267.0,11267.0,1.0,1.0,
+    11809.0,11809.0,1.0,1.0,2842.0,2842.0,81.0,9198.0,6561.0,1.0,
+    1.0,5911.0,5911.0,1.0,1.0,4890.0,4890.0,1.0,1.0,3932.0,
+    3932.0,1.0,1.0,2731.0,2731.0,2294.0,1062.0,2744.0,1.0,1.0,
+    5542.0,5542.0,1.0,1.0,12144.0,12144.0,1.0,1.0,8830.0,8830.0,
+    1.0,1.0,8652.0,8652.0,3553.0,7484.0,3006.0,1.0,1.0,4231.0,
+    4231.0,1.0,1.0,2548.0,2548.0,1.0,1.0,355.0,355.0,1.0,
+    1.0,8907.0,8907.0,8577.0,3135.0,2975.0,1.0,1.0,3707.0,3707.0,
+    1.0,1.0,1759.0,1759.0,1.0,1.0,5179.0,5179.0,1.0,1.0,
+    3694.0,3694.0,2747.0,7443.0,563.0,1.0,1.0,2089.0,2089.0,1.0,
+    1.0,5092.0,5092.0,1.0,1.0,9005.0,9005.0,1.0,1.0,9408.0,
+    9408.0,1326.0,7203.0,949.0,1.0,1.0,9048.0,9048.0,1.0,1.0,
+    11560.0,11560.0,1.0,1.0,3289.0,3289.0,1.0,1.0,10276.0,10276.0,
+    9275.0,3201.0,2625.0,1.0,1.0,10593.0,10593.0,1.0,1.0,10861.0,
+    10861.0,1.0,1.0,11955.0,11955.0,1.0,1.0,9863.0,9863.0,790.0,
+    955.0,9650.0,1.0,1.0,5755.0,5755.0,1.0,1.0,7657.0,7657.0,
+    1.0,1.0,7901.0,7901.0,1.0,1.0,11029.0,11029.0,1170.0,9970.0,
+    4821.0,1.0,1.0,11813.0,11813.0,1.0,1.0,8758.0,8758.0,1.0,
+    1.0,7384.0,7384.0,1.0,1.0,8304.0,8304.0,5374.0,9452.0,726.0,
+    1.0,1.0,10745.0,10745.0,1.0,1.0,2178.0,2178.0,1.0,1.0,
+    11869.0,11869.0,1.0,1.0,5559.0,5559.0,12159.0,4354.0,4611.0,1.0,
+    1.0,12046.0,12046.0,1.0,1.0,9273.0,9273.0,1.0,1.0,11618.0,
+    11618.0,1.0,1.0,3000.0,3000.0,9893.0,7837.0,1853.0,1.0,1.0,
+    3136.0,3136.0,1.0,1.0,5191.0,5191.0,1.0,1.0,3400.0,3400.0,
+    1.0,1.0,2399.0,2399.0,3296.0,8340.0,140.0,1.0,1.0,4048.0,
+    4048.0,1.0,1.0,2249.0,2249.0,1.0,1.0,2884.0,2884.0,1.0,
+    1.0,1153.0,1153.0,5067.0,10092.0,2768.0,1.0,1.0,9103.0,9103.0,
+    1.0,1.0,6882.0,6882.0,1.0,1.0,2126.0,2126.0,1.0,1.0,
+    10659.0,10659.0,12171.0,9813.0,1635.0,1.0,1.0,8779.0,8779.0,1.0,
+    1.0,6957.0,6957.0,1.0,1.0,9424.0,9424.0,1.0,1.0,2370.0,
+    2370.0,6522.0,11462.0,4255.0,1.0,1.0,2969.0,2969.0,1.0,1.0,
+    3978.0,3978.0,1.0,1.0,2686.0,2686.0,1.0,1.0,3247.0,3247.0,
+    3748.0,953.0,1177.0,1.0,1.0,10805.0,10805.0,1.0,1.0,4895.0,
+    4895.0,1.0,1.0,2780.0,2780.0,1.0,1.0,7094.0,7094.0,2525.0,
+    10908.0,9923.0,1.0,1.0,9644.0,9644.0,1.0,1.0,8236.0,8236.0,
+    1.0,1.0,2305.0,2305.0,1.0,1.0,5042.0,5042.0,3584.0,4177.0,
+    3051.0,1.0,1.0,7917.0,7917.0,1.0,1.0,10115.0,10115.0,1.0,
+    1.0,4414.0,4414.0,1.0,1.0,2847.0,2847.0,4989.0,5331.0,4896.0,
+    1.0,1.0,3271.0,3271.0,1.0,1.0,8232.0,8232.0,1.0,1.0,
+    10600.0,10600.0,1.0,1.0,8925.0,8925.0,8011.0,1673.0,2963.0,1.0,
+    1.0,1777.0,1777.0,1.0,1.0,10626.0,10626.0,1.0,1.0,4654.0,
+    4654.0,1.0,1.0,1426.0,1426.0,11745.0,6498.0,1000.0,1.0,1.0,
+    9585.0,9585.0,1.0,1.0,6998.0,6998.0,1.0,1.0,7351.0,7351.0,
+    1.0,1.0,8653.0,8653.0,11950.0,2468.0,4320.0,1.0,1.0,7852.0,
+    7852.0,1.0,1.0,3.0,3.0,1.0,1.0,9140.0,9140.0,1.0,
+    1.0,160.0,160.0,12280.0,11267.0,81.0,1.0,1.0,4919.0,4919.0,
+    1.0,1.0,113.0,113.0,1.0,1.0,8374.0,8374.0,1.0,1.0,
+    10123.0,10123.0,11809.0,2842.0,9198.0,1.0,1.0,10377.0,10377.0,1.0,
+    1.0,10911.0,10911.0,1.0,1.0,435.0,435.0,1.0,1.0,4337.0,
+    4337.0,5911.0,4890.0,2294.0,1.0,1.0,9908.0,9908.0,1.0,1.0,
+    5444.0,5444.0,1.0,1.0,4096.0,4096.0,1.0,1.0,11796.0,11796.0,
+    3932.0,2731.0,1062.0,1.0,1.0,9041.0,9041.0,1.0,1.0,1207.0,
+    1207.0,1.0,1.0,7012.0,7012.0,1.0,1.0,11121.0,11121.0,5542.0,
+    12144.0,3553.0,1.0,1.0,4645.0,4645.0,1.0,1.0,404.0,404.0,
+    1.0,1.0,10146.0,10146.0,1.0,1.0,1065.0,1065.0,8830.0,8652.0,
+    7484.0,1.0,1.0,2422.0,2422.0,1.0,1.0,6039.0,6039.0,1.0,
+    1.0,2187.0,2187.0,1.0,1.0,2566.0,2566.0,4231.0,2548.0,8577.0,
+    1.0,1.0,9302.0,9302.0,1.0,1.0,6267.0,6267.0,1.0,1.0,
+    8643.0,8643.0,1.0,1.0,2437.0,2437.0,355.0,8907.0,3135.0,1.0,
+    1.0,875.0,875.0,1.0,1.0,3780.0,3780.0,1.0,1.0,1607.0,
+    1607.0,1.0,1.0,4976.0,4976.0,3707.0,1759.0,2747.0,1.0,1.0,
+    4284.0,4284.0,1.0,1.0,7201.0,7201.0,1.0,1.0,7278.0,7278.0,
+    1.0,1.0,11287.0,11287.0,5179.0,3694.0,7443.0,1.0,1.0,545.0,
+    545.0,1.0,1.0,7270.0,7270.0,1.0,1.0,8585.0,8585.0,1.0,
+    1.0,2678.0,2678.0,2089.0,5092.0,1326.0,1.0,1.0,4143.0,4143.0,
+    1.0,1.0,7575.0,7575.0,1.0,1.0,12047.0,12047.0,1.0,1.0,
+    10752.0,10752.0,9005.0,9408.0,7203.0,1.0,1.0,1440.0,1440.0,1.0,
+    1.0,3763.0,3763.0,1.0,1.0,3066.0,3066.0,1.0,1.0,12262.0,
+    12262.0,9048.0,11560.0,9275.0,1.0,1.0,5084.0,5084.0,1.0,1.0,
+    10657.0,10657.0,1.0,1.0,4885.0,4885.0,1.0,1.0,11272.0,11272.0,
+    3289.0,10276.0,3201.0,1.0,1.0,1045.0,1045.0,1.0,1.0,9430.0,
+    9430.0,1.0,1.0,2481.0,2481.0,1.0,1.0,7277.0,7277.0,10593.0,
+    10861.0,790.0,1.0,1.0,6591.0,6591.0,1.0,1.0,2912.0,2912.0,
+    1.0,1.0,7428.0,7428.0,1.0,1.0,11935.0,11935.0,11955.0,9863.0,
+    955.0,1.0,1.0,8511.0,8511.0,1.0,1.0,3833.0,3833.0,1.0,
+    1.0,11516.0,11516.0,1.0,1.0,11899.0,11899.0,5755.0,7657.0,1170.0,
+    1.0,1.0,1067.0,1067.0,1.0,1.0,5101.0,5101.0,1.0,1.0,
+    11847.0,11847.0,1.0,1.0,9888.0,9888.0,7901.0,11029.0,9970.0,1.0,
+    1479.0,1.0,8246.0,5146.0,1479.0,1.0,4134.0,6553.0,8246.0,11567.0,
+    1305.0,5146.0,1479.0,6429.0,9094.0,4134.0,11077.0,1646.0,6553.0,8246.0,
+    8668.0,2545.0,11567.0,3504.0,8747.0,1305.0,5146.0,10938.0,4978.0,6429.0,
+    5777.0,3328.0,9094.0,4134.0,6461.0,7266.0,11077.0,4591.0,6561.0,1646.0,
+    6553.0,2744.0,3006.0,8668.0,2975.0,563.0,2545.0,11567.0,949.0,2625.0,
+    3504.0,9650.0,4821.0,8747.0,1305.0,1479.0,8246.0,5146.0,1479.0};
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/params.h b/crypt/liboqs/kex_rlwe_newhope/avx2/params.h
new file mode 100644
index 0000000000000000000000000000000000000000..027454ffb0b9a3c44ee5c7648ca040774cf2d6b0
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/params.h
@@ -0,0 +1,16 @@
+#ifndef PARAMS_H
+#define PARAMS_H
+
+#define PARAM_N 1024
+
+#define PARAM_K 16 /* used in sampler */
+#define PARAM_Q 12289 
+
+#define POLY_BYTES 1792
+#define NEWHOPE_SEEDBYTES 32
+#define NEWHOPE_RECBYTES 256
+
+#define NEWHOPE_SENDABYTES (POLY_BYTES + NEWHOPE_SEEDBYTES)
+#define NEWHOPE_SENDBBYTES (POLY_BYTES + NEWHOPE_RECBYTES)
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/poly.c b/crypt/liboqs/kex_rlwe_newhope/avx2/poly.c
new file mode 100644
index 0000000000000000000000000000000000000000..43c0df218c2533f91c821dee76c45cd80a14a398
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/poly.c
@@ -0,0 +1,143 @@
+#include "poly.h"
+#include "ntt.h"
+#include "randombytes.h"
+#include "fips202.h"
+#include "crypto_stream.h"
+
+static uint16_t barrett_reduce(uint16_t a)
+{
+  uint32_t u;
+
+  u = ((uint32_t) a * 5) >> 16;
+  u *= PARAM_Q;
+  a -= u;
+  return a;
+}
+
+void poly_frombytes(poly *r, const unsigned char *a)
+{
+  int i;
+  for(i=0;i<PARAM_N/4;i++)
+  {
+    r->coeffs[4*i+0] =                               a[7*i+0]        | (((uint16_t)a[7*i+1] & 0x3f) << 8);
+    r->coeffs[4*i+1] = (a[7*i+1] >> 6) | (((uint16_t)a[7*i+2]) << 2) | (((uint16_t)a[7*i+3] & 0x0f) << 10);
+    r->coeffs[4*i+2] = (a[7*i+3] >> 4) | (((uint16_t)a[7*i+4]) << 4) | (((uint16_t)a[7*i+5] & 0x03) << 12);
+    r->coeffs[4*i+3] = (a[7*i+5] >> 2) | (((uint16_t)a[7*i+6]) << 6); 
+  }
+}
+
+void poly_tobytes(unsigned char *r, const poly *p)
+{
+  int i;
+  uint16_t t0,t1,t2,t3,m;
+  int16_t c;
+  for(i=0;i<PARAM_N/4;i++)
+  {
+    t0 = barrett_reduce(p->coeffs[4*i+0]); //Make sure that coefficients have only 14 bits
+    t1 = barrett_reduce(p->coeffs[4*i+1]);
+    t2 = barrett_reduce(p->coeffs[4*i+2]);
+    t3 = barrett_reduce(p->coeffs[4*i+3]);
+
+    m = t0 - PARAM_Q;
+    c = m;
+    c >>= 15;
+    t0 = m ^ ((t0^m)&c); // <Make sure that coefficients are in [0,q]
+
+    m = t1 - PARAM_Q;
+    c = m;
+    c >>= 15;
+    t1 = m ^ ((t1^m)&c); // <Make sure that coefficients are in [0,q]
+
+    m = t2 - PARAM_Q;
+    c = m;
+    c >>= 15;
+    t2 = m ^ ((t2^m)&c); // <Make sure that coefficients are in [0,q]
+
+    m = t3 - PARAM_Q;
+    c = m;
+    c >>= 15;
+    t3 = m ^ ((t3^m)&c); // <Make sure that coefficients are in [0,q]
+
+    r[7*i+0] =  t0 & 0xff;
+    r[7*i+1] = (t0 >> 8) | (t1 << 6);
+    r[7*i+2] = (t1 >> 2);
+    r[7*i+3] = (t1 >> 10) | (t2 << 4);
+    r[7*i+4] = (t2 >> 4);
+    r[7*i+5] = (t2 >> 12) | (t3 << 2);
+    r[7*i+6] = (t3 >> 6);
+  }
+}
+
+
+
+void poly_uniform(poly *a, const unsigned char *seed)
+{
+  unsigned int pos=0, ctr=0;
+  uint16_t val;
+  uint64_t state[25];
+  unsigned int nblocks=13;
+  uint8_t buf[SHAKE128_RATE*nblocks];
+
+  shake128_absorb(state, seed, NEWHOPE_SEEDBYTES);
+  
+  shake128_squeezeblocks((unsigned char *) buf, nblocks, state);
+
+  while(ctr < PARAM_N)
+  {
+    //val = (buf[pos] | ((uint16_t) buf[pos+1] << 8)) & 0x3fff; // Specialized for q = 12889
+    val = (buf[pos] | ((uint16_t) buf[pos+1] << 8));
+    if(val < 5*PARAM_Q)
+      a->coeffs[ctr++] = val;
+    pos += 2;
+    if(pos > SHAKE128_RATE*nblocks-2)
+    {
+      nblocks=1;
+      shake128_squeezeblocks((unsigned char *) buf,nblocks,state);
+      pos = 0;
+    }
+  }
+}
+
+
+extern void cbd(poly *r, unsigned char *b);
+
+void poly_getnoise(poly *r, unsigned char *seed, unsigned char nonce)
+{
+#if PARAM_K != 16
+#error "poly_getnoise in poly.c only supports k=16"
+#endif
+  unsigned char buf[4*PARAM_N];
+  unsigned char n[CRYPTO_STREAM_NONCEBYTES];
+  int i;
+
+  for(i=1;i<CRYPTO_STREAM_NONCEBYTES;i++)
+    n[i] = 0;
+  n[0] = nonce;
+
+  crypto_stream(buf,4*PARAM_N,n,seed);
+  cbd(r,buf);
+}
+
+void poly_add(poly *r, const poly *a, const poly *b)
+{
+  int i;
+  for(i=0;i<PARAM_N;i++)
+    r->coeffs[i] = barrett_reduce(a->coeffs[i] + b->coeffs[i]);
+}
+
+void poly_ntt(poly *r)
+{
+  double __attribute__ ((aligned (32))) temp[PARAM_N];
+  poly_pointwise(r, r, (poly *)psis_bitrev);
+
+  ntt_double(r->coeffs,omegas_double,temp);
+}
+
+void poly_invntt(poly *r)
+{
+  double __attribute__ ((aligned (32))) temp[PARAM_N];
+
+  bitrev_vector(r->coeffs);
+  ntt_double(r->coeffs, omegas_inv_double,temp);
+  poly_pointwise(r, r, (poly *)psis_inv);
+}
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/poly.h b/crypt/liboqs/kex_rlwe_newhope/avx2/poly.h
new file mode 100644
index 0000000000000000000000000000000000000000..bf866230f18faed2d09248c1bf846fca604cc3ef
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/poly.h
@@ -0,0 +1,22 @@
+#ifndef POLY_H
+#define POLY_H
+
+#include <stdint.h>
+#include "params.h"
+
+typedef struct {
+  int32_t coeffs[PARAM_N];
+} poly __attribute__ ((aligned (32)));
+
+void poly_uniform(poly *a, const unsigned char *seed);
+void poly_getnoise(poly *r, unsigned char *seed, unsigned char nonce);
+void poly_add(poly *r, const poly *a, const poly *b);
+
+void poly_ntt(poly *r);
+void poly_invntt(poly *r);
+void poly_pointwise(poly *r, const poly *a, const poly *b);
+
+void poly_frombytes(poly *r, const unsigned char *a);
+void poly_tobytes(unsigned char *r, const poly *p);
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/poly_pointwise.s b/crypt/liboqs/kex_rlwe_newhope/avx2/poly_pointwise.s
new file mode 100644
index 0000000000000000000000000000000000000000..53d7d26aa8ae0beabbf55afed75cb65b6d16347e
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/poly_pointwise.s
@@ -0,0 +1,282 @@
+
+# qhasm: int64 input_0
+
+# qhasm: int64 input_1
+
+# qhasm: int64 input_2
+
+# qhasm: int64 input_3
+
+# qhasm: int64 input_4
+
+# qhasm: int64 input_5
+
+# qhasm: stack64 input_6
+
+# qhasm: stack64 input_7
+
+# qhasm: int64 caller_r11
+
+# qhasm: int64 caller_r12
+
+# qhasm: int64 caller_r13
+
+# qhasm: int64 caller_r14
+
+# qhasm: int64 caller_r15
+
+# qhasm: int64 caller_rbx
+
+# qhasm: int64 caller_rbp
+
+# qhasm: int64 ctri
+
+# qhasm: int64 rp
+
+# qhasm: int64 ap
+
+# qhasm: int64 bp
+
+# qhasm: reg256 r
+
+# qhasm: reg256 a
+
+# qhasm: reg256 b
+
+# qhasm: reg256 q
+
+# qhasm: reg256 qinv
+
+# qhasm: reg256 c
+
+# qhasm: enter poly_pointwise
+.p2align 5
+.global _poly_pointwise
+.global poly_pointwise
+_poly_pointwise:
+poly_pointwise:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: rp = input_0
+# asm 1: mov  <input_0=int64#1,>rp=int64#1
+# asm 2: mov  <input_0=%rdi,>rp=%rdi
+mov  %rdi,%rdi
+
+# qhasm: ap = input_1
+# asm 1: mov  <input_1=int64#2,>ap=int64#2
+# asm 2: mov  <input_1=%rsi,>ap=%rsi
+mov  %rsi,%rsi
+
+# qhasm: bp = input_2
+# asm 1: mov  <input_2=int64#3,>bp=int64#3
+# asm 2: mov  <input_2=%rdx,>bp=%rdx
+mov  %rdx,%rdx
+
+# qhasm: q = mem256[q8]
+# asm 1: vmovdqu q8,>q=reg256#1
+# asm 2: vmovdqu q8,>q=%ymm0
+vmovdqu q8,%ymm0
+
+# qhasm: qinv = mem256[qinv16]
+# asm 1: vmovdqu qinv16,>qinv=reg256#2
+# asm 2: vmovdqu qinv16,>qinv=%ymm1
+vmovdqu qinv16,%ymm1
+
+# qhasm: ctri = 256
+# asm 1: mov  $256,>ctri=int64#4
+# asm 2: mov  $256,>ctri=%rcx
+mov  $256,%rcx
+
+# qhasm: loopi:
+._loopi:
+
+# qhasm: a = (4x double)(4x int32)mem128[ap + 0]
+# asm 1: vcvtdq2pd 0(<ap=int64#2),>a=reg256#3
+# asm 2: vcvtdq2pd 0(<ap=%rsi),>a=%ymm2
+vcvtdq2pd 0(%rsi),%ymm2
+
+# qhasm: b = (4x double)(4x int32)mem128[bp + 0]
+# asm 1: vcvtdq2pd 0(<bp=int64#3),>b=reg256#4
+# asm 2: vcvtdq2pd 0(<bp=%rdx),>b=%ymm3
+vcvtdq2pd 0(%rdx),%ymm3
+
+# qhasm: 4x a approx*= b
+# asm 1: vmulpd <b=reg256#4,<a=reg256#3,>a=reg256#3
+# asm 2: vmulpd <b=%ymm3,<a=%ymm2,>a=%ymm2
+vmulpd %ymm3,%ymm2,%ymm2
+
+# qhasm: 4x c = approx a * qinv
+# asm 1: vmulpd <a=reg256#3,<qinv=reg256#2,>c=reg256#4
+# asm 2: vmulpd <a=%ymm2,<qinv=%ymm1,>c=%ymm3
+vmulpd %ymm2,%ymm1,%ymm3
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4
+# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3
+vroundpd $9,%ymm3,%ymm3
+
+# qhasm: 4x a approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<a=reg256#3
+# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<a=%ymm2
+vfnmadd231pd %ymm3,%ymm0,%ymm2
+
+# qhasm: a = (4x int32)(4x double)a,0,0,0,0
+# asm 1: vcvtpd2dq <a=reg256#3,>a=reg256#3dq
+# asm 2: vcvtpd2dq <a=%ymm2,>a=%xmm2
+vcvtpd2dq %ymm2,%xmm2
+
+# qhasm: mem128[rp + 0] = a
+# asm 1: vmovupd <a=reg256#3dq,0(<rp=int64#1)
+# asm 2: vmovupd <a=%xmm2,0(<rp=%rdi)
+vmovupd %xmm2,0(%rdi)
+
+# qhasm: a = (4x double)(4x int32)mem128[ap + 16]
+# asm 1: vcvtdq2pd 16(<ap=int64#2),>a=reg256#3
+# asm 2: vcvtdq2pd 16(<ap=%rsi),>a=%ymm2
+vcvtdq2pd 16(%rsi),%ymm2
+
+# qhasm: b = (4x double)(4x int32)mem128[bp + 16]
+# asm 1: vcvtdq2pd 16(<bp=int64#3),>b=reg256#4
+# asm 2: vcvtdq2pd 16(<bp=%rdx),>b=%ymm3
+vcvtdq2pd 16(%rdx),%ymm3
+
+# qhasm: 4x a approx*= b
+# asm 1: vmulpd <b=reg256#4,<a=reg256#3,>a=reg256#3
+# asm 2: vmulpd <b=%ymm3,<a=%ymm2,>a=%ymm2
+vmulpd %ymm3,%ymm2,%ymm2
+
+# qhasm: 4x c = approx a * qinv
+# asm 1: vmulpd <a=reg256#3,<qinv=reg256#2,>c=reg256#4
+# asm 2: vmulpd <a=%ymm2,<qinv=%ymm1,>c=%ymm3
+vmulpd %ymm2,%ymm1,%ymm3
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4
+# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3
+vroundpd $9,%ymm3,%ymm3
+
+# qhasm: 4x a approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<a=reg256#3
+# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<a=%ymm2
+vfnmadd231pd %ymm3,%ymm0,%ymm2
+
+# qhasm: a = (4x int32)(4x double)a,0,0,0,0
+# asm 1: vcvtpd2dq <a=reg256#3,>a=reg256#3dq
+# asm 2: vcvtpd2dq <a=%ymm2,>a=%xmm2
+vcvtpd2dq %ymm2,%xmm2
+
+# qhasm: mem128[rp + 16] = a
+# asm 1: vmovupd <a=reg256#3dq,16(<rp=int64#1)
+# asm 2: vmovupd <a=%xmm2,16(<rp=%rdi)
+vmovupd %xmm2,16(%rdi)
+
+# qhasm: a = (4x double)(4x int32)mem128[ap + 32]
+# asm 1: vcvtdq2pd 32(<ap=int64#2),>a=reg256#3
+# asm 2: vcvtdq2pd 32(<ap=%rsi),>a=%ymm2
+vcvtdq2pd 32(%rsi),%ymm2
+
+# qhasm: b = (4x double)(4x int32)mem128[bp + 32]
+# asm 1: vcvtdq2pd 32(<bp=int64#3),>b=reg256#4
+# asm 2: vcvtdq2pd 32(<bp=%rdx),>b=%ymm3
+vcvtdq2pd 32(%rdx),%ymm3
+
+# qhasm: 4x a approx*= b
+# asm 1: vmulpd <b=reg256#4,<a=reg256#3,>a=reg256#3
+# asm 2: vmulpd <b=%ymm3,<a=%ymm2,>a=%ymm2
+vmulpd %ymm3,%ymm2,%ymm2
+
+# qhasm: 4x c = approx a * qinv
+# asm 1: vmulpd <a=reg256#3,<qinv=reg256#2,>c=reg256#4
+# asm 2: vmulpd <a=%ymm2,<qinv=%ymm1,>c=%ymm3
+vmulpd %ymm2,%ymm1,%ymm3
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4
+# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3
+vroundpd $9,%ymm3,%ymm3
+
+# qhasm: 4x a approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<a=reg256#3
+# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<a=%ymm2
+vfnmadd231pd %ymm3,%ymm0,%ymm2
+
+# qhasm: a = (4x int32)(4x double)a,0,0,0,0
+# asm 1: vcvtpd2dq <a=reg256#3,>a=reg256#3dq
+# asm 2: vcvtpd2dq <a=%ymm2,>a=%xmm2
+vcvtpd2dq %ymm2,%xmm2
+
+# qhasm: mem128[rp + 32] = a
+# asm 1: vmovupd <a=reg256#3dq,32(<rp=int64#1)
+# asm 2: vmovupd <a=%xmm2,32(<rp=%rdi)
+vmovupd %xmm2,32(%rdi)
+
+# qhasm: a = (4x double)(4x int32)mem128[ap + 48]
+# asm 1: vcvtdq2pd 48(<ap=int64#2),>a=reg256#3
+# asm 2: vcvtdq2pd 48(<ap=%rsi),>a=%ymm2
+vcvtdq2pd 48(%rsi),%ymm2
+
+# qhasm: b = (4x double)(4x int32)mem128[bp + 48]
+# asm 1: vcvtdq2pd 48(<bp=int64#3),>b=reg256#4
+# asm 2: vcvtdq2pd 48(<bp=%rdx),>b=%ymm3
+vcvtdq2pd 48(%rdx),%ymm3
+
+# qhasm: 4x a approx*= b
+# asm 1: vmulpd <b=reg256#4,<a=reg256#3,>a=reg256#3
+# asm 2: vmulpd <b=%ymm3,<a=%ymm2,>a=%ymm2
+vmulpd %ymm3,%ymm2,%ymm2
+
+# qhasm: 4x c = approx a * qinv
+# asm 1: vmulpd <a=reg256#3,<qinv=reg256#2,>c=reg256#4
+# asm 2: vmulpd <a=%ymm2,<qinv=%ymm1,>c=%ymm3
+vmulpd %ymm2,%ymm1,%ymm3
+
+# qhasm: 4x c = floor(c)
+# asm 1: vroundpd $9,<c=reg256#4,>c=reg256#4
+# asm 2: vroundpd $9,<c=%ymm3,>c=%ymm3
+vroundpd $9,%ymm3,%ymm3
+
+# qhasm: 4x a approx-= c * q
+# asm 1: vfnmadd231pd <c=reg256#4,<q=reg256#1,<a=reg256#3
+# asm 2: vfnmadd231pd <c=%ymm3,<q=%ymm0,<a=%ymm2
+vfnmadd231pd %ymm3,%ymm0,%ymm2
+
+# qhasm: a = (4x int32)(4x double)a,0,0,0,0
+# asm 1: vcvtpd2dq <a=reg256#3,>a=reg256#3dq
+# asm 2: vcvtpd2dq <a=%ymm2,>a=%xmm2
+vcvtpd2dq %ymm2,%xmm2
+
+# qhasm: mem128[rp + 48] = a
+# asm 1: vmovupd <a=reg256#3dq,48(<rp=int64#1)
+# asm 2: vmovupd <a=%xmm2,48(<rp=%rdi)
+vmovupd %xmm2,48(%rdi)
+
+# qhasm: rp += 64
+# asm 1: add  $64,<rp=int64#1
+# asm 2: add  $64,<rp=%rdi
+add  $64,%rdi
+
+# qhasm: ap += 64
+# asm 1: add  $64,<ap=int64#2
+# asm 2: add  $64,<ap=%rsi
+add  $64,%rsi
+
+# qhasm: bp += 64
+# asm 1: add  $64,<bp=int64#3
+# asm 2: add  $64,<bp=%rdx
+add  $64,%rdx
+
+# qhasm: unsigned>? ctri -= 4
+# asm 1: sub  $4,<ctri=int64#4
+# asm 2: sub  $4,<ctri=%rcx
+sub  $4,%rcx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto loopi if unsigned>
+ja ._loopi
+
+# qhasm: return
+add %r11,%rsp
+ret
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/precomp.c b/crypt/liboqs/kex_rlwe_newhope/avx2/precomp.c
new file mode 100644
index 0000000000000000000000000000000000000000..7be60ec821bc859a0f031eb87db148b43879ff3d
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/precomp.c
@@ -0,0 +1,14 @@
+#include "inttypes.h"
+#include "ntt.h"
+#include "params.h"
+
+int32_t omegas[PARAM_N/2]		= {1,10810,7143,4043,10984,722,5736,8155,3542,8785,9744,3621,10643,1212,3195,5860,7468,2639,9664,11340,11726,9314,9283,9545,5728,7698,5023,5828,8961,6512,7311,1351,2319,11119,11334,11499,9088,3014,5086,10963,4846,9542,9154,3712,4805,8736,11227,9995,3091,12208,7969,11289,9326,7393,9238,2366,11112,8034,10654,9521,12149,10436,7678,11563,1260,4388,4632,6534,2426,334,1428,1696,2013,9000,729,3241,2881,3284,7197,10200,8595,7110,10530,8582,3382,11934,9741,8058,3637,3459,145,6747,9558,8357,7399,6378,9447,480,1022,9,9821,339,5791,544,10616,4278,6958,7300,8112,8705,1381,9764,11336,8541,827,5767,2476,118,2197,7222,3949,8993,4452,2396,7935,130,2837,6915,2401,442,7188,11222,390,773,8456,3778,354,4861,9377,5698,5012,9808,2859,11244,1017,7404,1632,7205,27,9223,8526,10849,1537,242,4714,8146,9611,3704,5019,11744,1002,5011,5088,8005,7313,10682,8509,11414,9852,3646,6022,2987,9723,10102,6250,9867,11224,2143,11885,7644,1168,5277,11082,3248,493,8193,6845,2381,7952,11854,1378,1912,2166,3915,12176,7370,12129,3149,12286,4437,3636,4938,5291,2704,10863,7635,1663,10512,3364,1689,4057,9018,9442,7875,2174,4372,7247,9984,4053,2645,5195,9509,7394,1484,9042,9603,8311,9320,9919,2865,5332,3510,1630,10163,5407,3186,11136,9405,10040,8241,9890,8889,7098,9153,9289,671,3016,243,6730,420,10111,1544,3985,4905,3531,476,49,1263,5915,1483,9789,10800,10706,6347,1512,350,10474,5383,5369,10232,9087,4493,9551,6421,6554,2655,9280,1693,174,723,10314,8532,347,2925,8974,11863,1858,4754,3030,4115,2361,10446,2908,218,3434,8760,3963,576,6142,9842,1954,10238,9407,10484,3991,8320,9522,156,2281,5876,10258,5333,3772,418,5908,11836,5429,7515,7552,1293,295,6099,5766,652,8273,4077,8527,9370,325,10885,11143,11341,5990,1159,8561,8240,3329,4298,12121,2692,5961,7183,10327,1594,6167,9734,7105,11089,1360,3956,6170,5297,8210,11231,922,441,1958,4322,1112,2078,4046,709,9139,1319,4240,8719,6224,11454,2459,683,3656,12225,10723,5782,9341,9786,9166,10542,9235,6803,7856,6370,3834,7032,7048,9369,8120,9162,6821,1010,8807,787,5057,4698,4780,8844,12097,1321,4912,10240,677,6415,6234,8953,1323,9523,12237,3174,1579,11858,9784,5906,3957,9450,151,10162,12231,12048,3532,11286,1956,7280,11404,6281,3477,6608,142,11184,9445,3438,11314,4212,9260,6695,4782,5886,8076,504,2302,11684,11868,8209,3602,6068,8689,3263,6077,7665,7822,7500,6752,4749,4449,6833,12142,8500,6118,8471,1190,9606,3860,5445,7753,11239,5079,9027,2169,11767,7965,4916,8214,5315,11011,9945,1973,6715,8775,11248,5925,11271,654,3565,1702,1987,6760,5206,3199,12233,6136,6427,6874,8646,4948,6152,400,10561,5339,5446,3710,6093,468,8301,316,11907,10256,8291,3879,1922,10930,6854,973,11035};
+
+
+int32_t omegas_inv[PARAM_N/2]	= {1,1479,8246,5146,4134,6553,11567,1305,6429,9094,11077,1646,8668,2545,3504,8747,10938,4978,5777,3328,6461,7266,4591,6561,2744,3006,2975,563,949,2625,9650,4821,726,4611,1853,140,2768,1635,4255,1177,9923,3051,4896,2963,1000,4320,81,9198,2294,1062,3553,7484,8577,3135,2747,7443,1326,7203,9275,3201,790,955,1170,9970,5374,9452,12159,4354,9893,7837,3296,8340,5067,10092,12171,9813,6522,11462,3748,953,2525,10908,3584,4177,4989,5331,8011,1673,11745,6498,11950,2468,12280,11267,11809,2842,5911,4890,3932,2731,5542,12144,8830,8652,4231,2548,355,8907,3707,1759,5179,3694,2089,5092,9005,9408,9048,11560,3289,10276,10593,10861,11955,9863,5755,7657,7901,11029,11813,8758,7384,8304,10745,2178,11869,5559,12046,9273,11618,3000,3136,5191,3400,2399,4048,2249,2884,1153,9103,6882,2126,10659,8779,6957,9424,2370,2969,3978,2686,3247,10805,4895,2780,7094,9644,8236,2305,5042,7917,10115,4414,2847,3271,8232,10600,8925,1777,10626,4654,1426,9585,6998,7351,8653,7852,3,9140,160,4919,113,8374,10123,10377,10911,435,4337,9908,5444,4096,11796,9041,1207,7012,11121,4645,404,10146,1065,2422,6039,2187,2566,9302,6267,8643,2437,875,3780,1607,4976,4284,7201,7278,11287,545,7270,8585,2678,4143,7575,12047,10752,1440,3763,3066,12262,5084,10657,4885,11272,1045,9430,2481,7277,6591,2912,7428,11935,8511,3833,11516,11899,1067,5101,11847,9888,1254,11316,5435,1359,10367,8410,3998,2033,382,11973,3988,11821,6196,8579,6843,6950,1728,11889,6137,7341,3643,5415,5862,6153,56,9090,7083,5529,10302,10587,8724,11635,1018,6364,1041,3514,5574,10316,2344,1278,6974,4075,7373,4324,522,10120,3262,7210,1050,4536,6844,8429,2683,11099,3818,6171,3789,147,5456,7840,7540,5537,4789,4467,4624,6212,9026,3600,6221,8687,4080,421,605,9987,11785,4213,6403,7507,5594,3029,8077,975,8851,2844,1105,12147,5681,8812,6008,885,5009,10333,1003,8757,241,58,2127,12138,2839,8332,6383,2505,431,10710,9115,52,2766,10966,3336,6055,5874,11612,2049,7377,10968,192,3445,7509,7591,7232,11502,3482,11279,5468,3127,4169,2920,5241,5257,8455,5919,4433,5486,3054,1747,3123,2503,2948,6507,1566,64,8633,11606,9830,835,6065,3570,8049,10970,3150,11580,8243,10211,11177,7967,10331,11848,11367,1058,4079,6992,6119,8333,10929,1200,5184,2555,6122,10695,1962,5106,6328,9597,168,7991,8960,4049,3728,11130,6299,948,1146,1404,11964,2919,3762,8212,4016,11637,6523,6190,11994,10996,4737,4774,6860,453,6381,11871,8517,6956,2031,6413,10008,12133,2767,3969,8298,1805,2882,2051,10335,2447,6147,11713,8326,3529,8855,12071,9381,1843,9928,8174,9259,7535,10431,426,3315,9364,11942,3757,1975,11566,12115,10596,3009,9634,5735,5868,2738,7796,3202,2057,6920,6906,1815,11939,10777,5942,1583,1489,2500,10806,6374,11026,12240};
+
+
+int32_t psis_bitrev[PARAM_N]   = {1,10810,7143,4043,10984,722,5736,8155,3542,8785,9744,3621,10643,1212,3195,5860,7468,2639,9664,11340,11726,9314,9283,9545,5728,7698,5023,5828,8961,6512,7311,1351,2319,11119,11334,11499,9088,3014,5086,10963,4846,9542,9154,3712,4805,8736,11227,9995,3091,12208,7969,11289,9326,7393,9238,2366,11112,8034,10654,9521,12149,10436,7678,11563,1260,4388,4632,6534,2426,334,1428,1696,2013,9000,729,3241,2881,3284,7197,10200,8595,7110,10530,8582,3382,11934,9741,8058,3637,3459,145,6747,9558,8357,7399,6378,9447,480,1022,9,9821,339,5791,544,10616,4278,6958,7300,8112,8705,1381,9764,11336,8541,827,5767,2476,118,2197,7222,3949,8993,4452,2396,7935,130,2837,6915,2401,442,7188,11222,390,773,8456,3778,354,4861,9377,5698,5012,9808,2859,11244,1017,7404,1632,7205,27,9223,8526,10849,1537,242,4714,8146,9611,3704,5019,11744,1002,5011,5088,8005,7313,10682,8509,11414,9852,3646,6022,2987,9723,10102,6250,9867,11224,2143,11885,7644,1168,5277,11082,3248,493,8193,6845,2381,7952,11854,1378,1912,2166,3915,12176,7370,12129,3149,12286,4437,3636,4938,5291,2704,10863,7635,1663,10512,3364,1689,4057,9018,9442,7875,2174,4372,7247,9984,4053,2645,5195,9509,7394,1484,9042,9603,8311,9320,9919,2865,5332,3510,1630,10163,5407,3186,11136,9405,10040,8241,9890,8889,7098,9153,9289,671,3016,243,6730,420,10111,1544,3985,4905,3531,476,49,1263,5915,1483,9789,10800,10706,6347,1512,350,10474,5383,5369,10232,9087,4493,9551,6421,6554,2655,9280,1693,174,723,10314,8532,347,2925,8974,11863,1858,4754,3030,4115,2361,10446,2908,218,3434,8760,3963,576,6142,9842,1954,10238,9407,10484,3991,8320,9522,156,2281,5876,10258,5333,3772,418,5908,11836,5429,7515,7552,1293,295,6099,5766,652,8273,4077,8527,9370,325,10885,11143,11341,5990,1159,8561,8240,3329,4298,12121,2692,5961,7183,10327,1594,6167,9734,7105,11089,1360,3956,6170,5297,8210,11231,922,441,1958,4322,1112,2078,4046,709,9139,1319,4240,8719,6224,11454,2459,683,3656,12225,10723,5782,9341,9786,9166,10542,9235,6803,7856,6370,3834,7032,7048,9369,8120,9162,6821,1010,8807,787,5057,4698,4780,8844,12097,1321,4912,10240,677,6415,6234,8953,1323,9523,12237,3174,1579,11858,9784,5906,3957,9450,151,10162,12231,12048,3532,11286,1956,7280,11404,6281,3477,6608,142,11184,9445,3438,11314,4212,9260,6695,4782,5886,8076,504,2302,11684,11868,8209,3602,6068,8689,3263,6077,7665,7822,7500,6752,4749,4449,6833,12142,8500,6118,8471,1190,9606,3860,5445,7753,11239,5079,9027,2169,11767,7965,4916,8214,5315,11011,9945,1973,6715,8775,11248,5925,11271,654,3565,1702,1987,6760,5206,3199,12233,6136,6427,6874,8646,4948,6152,400,10561,5339,5446,3710,6093,468,8301,316,11907,10256,8291,3879,1922,10930,6854,973,11035,7,1936,845,3723,3154,5054,3285,7929,216,50,6763,769,767,8484,10076,4153,3120,6184,6203,5646,8348,3753,3536,5370,3229,4730,10583,3929,1282,8717,2021,9457,3944,4099,5604,6759,2171,8809,11024,3007,9344,5349,2633,1406,9057,11996,4855,8520,9348,11722,6627,5289,3837,2595,3221,4273,4050,7082,844,5202,11309,11607,4590,7207,8820,6138,7846,8871,4693,2338,9996,11872,1802,1555,5103,10398,7878,10699,1223,9955,11009,614,12265,10918,11385,9804,6742,7250,881,11924,1015,10362,5461,9343,2637,7779,4684,3360,7154,63,7302,2373,3670,3808,578,5368,11839,1944,7628,11779,9667,6903,5618,10631,5789,3502,5043,826,3090,1398,3065,1506,6586,4483,6389,910,7570,11538,4518,3094,1160,4820,2730,5411,10036,1868,2478,9449,4194,3019,10506,7211,7724,4974,7119,2672,11424,1279,189,3116,10526,2209,10759,1694,8420,7866,5832,1350,10555,8474,7014,10499,11038,6879,2035,1040,10407,6164,7519,944,5287,8620,6616,9269,6883,7624,4834,2712,9461,4352,8176,72,3840,10447,3451,8195,11048,4378,6508,9244,9646,1095,2873,2827,11498,2434,11169,9754,12268,6481,874,9988,170,6639,2307,4289,11641,12139,11259,11823,3821,1681,4649,5969,2929,6026,1573,8443,3793,6226,11787,5118,2602,10388,1849,5776,9021,3795,7988,7766,457,12281,11410,9696,982,10013,4218,4390,8835,8531,7785,778,530,2626,3578,4697,8823,1701,10243,2940,9332,10808,3317,9757,139,3332,343,8841,4538,10381,7078,1866,1208,7562,10584,2450,11873,814,716,10179,2164,6873,5412,8080,9011,6296,3515,11851,1218,5061,10753,10568,2429,8186,1373,9307,717,8700,8921,4227,4238,11677,8067,1526,11749,12164,3163,4032,6127,7449,1389,10221,4404,11943,3359,9084,5209,1092,3678,4265,10361,464,1826,2926,4489,9118,1136,3449,3708,9051,2065,5826,3495,4564,8755,3961,10533,4145,2275,2461,4267,5653,5063,8113,10771,8524,11014,5508,11113,6555,4860,1125,10844,11158,6302,6693,579,3889,9520,3114,6323,212,8314,4883,6454,3087,1417,5676,7784,2257,3744,4963,2528,9233,5102,11877,6701,6444,4924,4781,1014,11841,1327,3607,3942,7057,2717,60,3200,10754,5836,7723,2260,68,180,4138,7684,2689,10880,7070,204,5509,10821,8308,8882,463,10945,9247,9806,10235,4739,8038,6771,1226,9261,5216,11925,9929,11053,9272,7043,4475,3121,4705,1057,9689,11883,10602,146,5268,1403,1804,6094,7100,12050,9389,994,4554,4670,11777,5464,4906,3375,9998,8896,4335,7376,3528,3825,8054,9342,8307,636,5609,11667,10552,5672,4499,5598,3344,10397,8665,6565,10964,11260,10344,5959,10141,8330,5797,2442,1248,5115,4939,10975,1744,2894,8635,6599,9834,8342,338,3343,8170,1522,10138,12269,5002,4608,5163,4578,377,11914,1620,10453,11864,10104,11897,6085,8122,11251,11366,10058,6197,2800,193,506,1255,1392,5784,3276,8951,2212,9615,10347,8881,2575,1165,2776,11111,6811,3511};
+int32_t psis_inv[PARAM_N]   = {12277,5265,9530,3117,5712,816,10650,3277,9246,4832,5957,851,10655,10300,3227,461,3577,511,73,1766,5519,2544,2119,7325,2802,5667,11343,3376,5749,6088,7892,2883,3923,2316,3842,4060,580,3594,2269,9102,6567,9716,1388,5465,7803,8137,2918,3928,9339,10112,11978,10489,3254,3976,568,8859,11799,12219,12279,10532,12038,8742,4760,680,8875,4779,7705,8123,2916,10950,6831,4487,641,10625,5029,2474,2109,5568,2551,2120,3814,4056,2335,10867,3308,11006,6839,977,10673,8547,1221,1930,7298,11576,8676,2995,3939,7585,11617,12193,5253,2506,358,8829,6528,11466,1638,234,1789,10789,6808,11506,8666,1238,3688,4038,4088,584,1839,7285,8063,4663,9444,10127,8469,4721,2430,9125,11837,1691,10775,6806,6239,6158,7902,4640,4174,5863,11371,3380,3994,11104,6853,979,3651,11055,6846,978,7162,9801,10178,1454,7230,4544,9427,8369,11729,12209,10522,10281,8491,1213,5440,9555,1365,195,3539,11039,1577,5492,11318,5128,11266,3365,7503,4583,7677,8119,4671,5934,7870,6391,913,1886,2025,5556,7816,11650,6931,9768,3151,9228,6585,7963,11671,6934,11524,6913,11521,5157,7759,2864,9187,3068,5705,815,1872,2023,289,5308,6025,7883,9904,4926,7726,8126,4672,2423,9124,3059,437,1818,7282,6307,901,7151,11555,8673,1239,177,5292,756,108,1771,253,8814,10037,4945,2462,7374,2809,5668,7832,4630,2417,5612,7824,8140,4674,7690,11632,8684,11774,1682,5507,7809,11649,10442,8514,6483,9704,6653,2706,10920,1560,3734,2289,327,7069,4521,4157,4105,2342,10868,12086,12260,3507,501,10605,1515,1972,7304,2799,3911,7581,1083,7177,6292,4410,630,90,3524,2259,7345,6316,6169,6148,6145,4389,627,10623,12051,12255,8773,6520,2687,3895,2312,5597,11333,1619,5498,2541,363,3563,509,7095,11547,12183,3496,2255,9100,1300,7208,8052,6417,7939,9912,1416,5469,6048,864,1879,2024,9067,6562,2693,7407,9836,10183,8477,1211,173,7047,8029,1147,3675,525,75,7033,8027,8169,1167,7189,1027,7169,9802,6667,2708,3898,4068,9359,1337,191,5294,6023,2616,7396,11590,8678,8262,6447,921,10665,12057,3478,4008,11106,12120,3487,9276,10103,6710,11492,8664,8260,1180,10702,5040,720,3614,5783,9604,1372,196,28,4,10534,5016,11250,10385,12017,8739,3004,9207,6582,6207,7909,4641,663,7117,8039,2904,3926,4072,7604,6353,11441,3390,5751,11355,10400,8508,2971,2180,2067,5562,11328,6885,11517,6912,2743,3903,11091,3340,9255,10100,4954,7730,6371,9688,1384,7220,2787,9176,4822,4200,600,7108,2771,3907,9336,8356,8216,8196,4682,4180,9375,6606,7966,1138,10696,1528,5485,11317,8639,10012,6697,7979,4651,2420,7368,11586,10433,3246,7486,2825,10937,3318,474,7090,4524,5913,7867,4635,9440,11882,3453,5760,4334,9397,3098,10976,1568,224,32,10538,3261,3977,9346,10113,8467,11743,12211,3500,500,1827,261,5304,7780,2867,10943,6830,7998,11676,1668,5505,2542,9141,4817,9466,6619,11479,5151,4247,7629,4601,5924,6113,6140,9655,6646,2705,2142,306,7066,2765,395,1812,3770,11072,8604,10007,11963,1709,9022,4800,7708,9879,6678,954,5403,4283,4123,589,8862,1266,3692,2283,9104,11834,12224,7013,4513,7667,6362,4420,2387,341,7071,9788,6665,9730,1390,10732,10311,1473,1966,3792,7564,11614,10437,1491,213,1786,9033,3046,9213,10094,1442,206,1785,255,1792,256,10570,1510,7238,1034,7170,6291,7921,11665,3422,4000,2327,2088,5565,795,10647,1521,5484,2539,7385,1055,7173,8047,11683,1669,1994,3796,5809,4341,9398,11876,12230,10525,12037,12253,3506,4012,9351,4847,2448,7372,9831,3160,2207,5582,2553,7387,6322,9681,1383,10731,1533,219,5298,4268,7632,6357,9686,8406,4712,9451,10128,4958,5975,11387,8649,11769,6948,11526,12180,1740,10782,6807,2728,7412,4570,4164,4106,11120,12122,8754,11784,3439,5758,11356,6889,9762,11928,1704,1999,10819,12079,12259,7018,11536,1648,1991,2040,2047,2048,10826,12080,8748,8272,8204,1172,1923,7297,2798,7422,6327,4415,7653,6360,11442,12168,7005,8023,9924,8440,8228,2931,7441,1063,3663,5790,9605,10150,1450,8985,11817,10466,10273,12001,3470,7518,1074,1909,7295,9820,4914,702,5367,7789,8135,9940,1420,3714,11064,12114,12264,1752,5517,9566,11900,1700,3754,5803,829,1874,7290,2797,10933,5073,7747,8129,6428,6185,11417,1631,233,5300,9535,10140,11982,8734,8270,2937,10953,8587,8249,2934,9197,4825,5956,4362,9401,1343,3703,529,10609,12049,6988,6265,895,3639,4031,4087,4095,585,10617,8539,4731,4187,9376,3095,9220,10095,10220,1460,10742,12068,1724,5513,11321,6884,2739,5658,6075,4379,11159,10372,8504,4726,9453,3106,7466,11600,10435,8513,9994,8450,9985,3182,10988,8592,2983,9204,4826,2445,5616,6069,867,3635,5786,11360,5134,2489,10889,12089,1727,7269,2794,9177,1311,5454,9557,6632,2703,9164,10087,1441,3717,531,3587,2268,324,5313,759,1864,5533,2546,7386,9833,8427,4715,11207,1601,7251,4547,11183,12131,1733,10781,10318,1474,10744,5046,4232,11138,10369,6748,964,7160,4534,7670,8118,8182,4680,11202,6867,981,8918,1274,182,26,7026,8026,11680,12202,10521,1503,7237,4545,5916,9623,8397,11733,10454,3249,9242,6587,941,1890,270,10572,6777,9746,6659,6218,6155,6146,878,1881,7291,11575,12187,1741,7271,8061,11685,6936,4502,9421,4857,4205,7623,1089,10689,1527,8996,10063,11971,10488,6765,2722,3900,9335,11867,6962,11528,5158,4248,4118,5855,2592,5637,6072,2623,7397,8079,9932,4930,5971,853,3633,519,8852,11798,3441,11025,1575,225,8810,11792,12218,3501,9278,3081,9218,4828,7712,8124,11694,12204,3499,4011,573,3593,5780,7848,9899,10192,1456,208,7052,2763,7417,11593,10434,12024,8740,11782,10461,3250,5731,7841,9898,1414,202,3540,7528,2831,2160,10842,5060,4234,4116,588,84,};
+int32_t omegas_montgomery[PARAM_N/2]={4091,7888,11060,11208,6960,4342,6275,9759,1591,6399,9477,5266,586,5825,7538,9710,1134,6407,1711,965,7099,7674,3743,6442,10414,8100,1885,1688,1364,10329,10164,9180,12210,6240,997,117,4783,4407,1549,7072,2829,6458,4431,8877,7144,2564,5664,4042,12189,432,10751,1237,7610,1534,3983,7863,2181,6308,8720,6570,4843,1690,14,3872,5569,9368,12163,2019,7543,2315,4673,7340,1553,1156,8401,11389,1020,2967,10772,7045,3316,11236,5285,11578,10637,10086,9493,6180,9277,6130,3323,883,10469,489,1502,2851,11061,9729,2742,12241,4970,10481,10078,1195,730,1762,3854,2030,5892,10922,9020,5274,9179,3604,3782,10206,3180,3467,4668,2446,7613,9386,834,7703,6836,3403,5351,12276,3580,1739,10820,9787,10209,4070,12250,8525,10401,2749,7338,10574,6040,943,9330,1477,6865,9668,3585,6633,12145,4063,3684,7680,8188,6902,3533,9807,6090,727,10099,7003,6945,1949,9731,10559,6057,378,7871,8763,8901,9229,8846,4551,9589,11664,7630,8821,5680,4956,6251,8388,10156,8723,2341,3159,1467,5460,8553,7783,2649,2320,9036,6188,737,3698,4699,5753,9046,3687,16,914,5186,10531,4552,1964,3509,8436,7516,5381,10733,3281,7037,1060,2895,7156,8887,5357,6409,8197,2962,6375,5064,6634,5625,278,932,10229,8927,7642,351,9298,237,5858,7692,3146,12126,7586,2053,11285,3802,5204,4602,1748,11300,340,3711,4614,300,10993,5070,10049,11616,12247,7421,10707,5746,5654,3835,5553,1224,8476,9237,3845,250,11209,4225,6326,9680,12254,4136,2778,692,8808,6410,6718,10105,10418,3759,7356,11361,8433,6437,3652,6342,8978,5391,2272,6476,7416,8418,10824,11986,5733,876,7030,2167,2436,3442,9217,8206,4858,5964,2746,7178,1434,7389,8879,10661,11457,4220,1432,10832,4328,8557,1867,9454,2416,3816,9076,686,5393,2523,4339,6115,619,937,2834,7775,3279,2363,7488,6112,5056,824,10204,11690,1113,2727,9848,896,2028,5075,2654,10464,7884,12169,5434,3070,6400,9132,11672,12153,4520,1273,9739,11468,9937,10039,9720,2262,9399,11192,315,4511,1158,6061,6751,11865,357,7367,4550,983,8534,8352,10126,7530,9253,4367,5221,3999,8777,3161,6990,4130,11652,3374,11477,1753,292,8681,2806,10378,12188,5800,11811,3181,1988,1024,9340,2477,10928,4582,6750,3619,5503,5233,2463,8470,7650,7964,6395,1071,1272,3474,11045,3291,11344,8502,9478,9837,1253,1857,6233,4720,11561,6034,9817,3339,1797,2879,6242,5200,2114,7962,9353,11363,5475,6084,9601,4108,7323,10438,9471,1271,408,6911,3079,360,8276,11535,9156,9049,11539,850,8617,784,7919,8334,12170,1846,10213,12184,7827,11903,5600,9779,1012,721,2784,6676,6552,5348,4424,6816,8405,9959,5150,2356,5552,5267,1333,8801,9661,7308,5788,4910,909,11613,4395,8238,6686,4302,3044,2285,12249,1963,9216,4296,11918,695,4371,9793,4884,2411,10230,2650,841,3890,10231,7248,8505,11196,6688};
+int32_t omegas_inv_montgomery[PARAM_N/2]= {4091,4401,1081,1229,2530,6014,7947,5329,2579,4751,6464,11703,7023,2812,5890,10698,3109,2125,1960,10925,10601,10404,4189,1875,5847,8546,4615,5190,11324,10578,5882,11155,8417,12275,10599,7446,5719,3569,5981,10108,4426,8306,10755,4679,11052,1538,11857,100,8247,6625,9725,5145,3412,7858,5831,9460,5217,10740,7882,7506,12172,11292,6049,79,13,6938,8886,5453,4586,11455,2903,4676,9843,7621,8822,9109,2083,8507,8685,3110,7015,3269,1367,6397,10259,8435,10527,11559,11094,2211,1808,7319,48,9547,2560,1228,9438,10787,11800,1820,11406,8966,6159,3012,6109,2796,2203,1652,711,7004,1053,8973,5244,1517,9322,11269,900,3888,11133,10736,4949,7616,9974,4746,10270,126,2921,6720,6635,6543,1582,4868,42,673,2240,7219,1296,11989,7675,8578,11949,989,10541,7687,7085,8487,1004,10236,4703,163,9143,4597,6431,12052,2991,11938,4647,3362,2060,11357,12011,6664,5655,7225,5914,9327,4092,5880,6932,3402,5133,9394,11229,5252,9008,1556,6908,4773,3853,8780,10325,7737,1758,7103,11375,12273,8602,3243,6536,7590,8591,11552,6101,3253,9969,9640,4506,3736,6829,10822,9130,9948,3566,2133,3901,6038,7333,6609,3468,4659,625,2700,7738,3443,3060,3388,3526,4418,11911,6232,1730,2558,10340,5344,5286,2190,11562,6199,2482,8756,5387,4101,4609,8605,8226,144,5656,8704,2621,5424,10812,2959,11346,6249,1715,4951,9540,1888,3764,39,8219,2080,2502,1469,10550,8709,5601,1093,3784,5041,2058,8399,11448,9639,2059,9878,7405,2496,7918,11594,371,7993,3073,10326,40,10004,9245,7987,5603,4051,7894,676,11380,7379,6501,4981,2628,3488,10956,7022,6737,9933,7139,2330,3884,5473,7865,6941,5737,5613,9505,11568,11277,2510,6689,386,4462,105,2076,10443,119,3955,4370,11505,3672,11439,750,3240,3133,754,4013,11929,9210,5378,11881,11018,2818,1851,4966,8181,2688,6205,6814,926,2936,4327,10175,7089,6047,9410,10492,8950,2472,6255,728,7569,6056,10432,11036,2452,2811,3787,945,8998,1244,8815,11017,11218,5894,4325,4639,3819,9826,7056,6786,8670,5539,7707,1361,9812,2949,11265,10301,9108,478,6489,101,1911,9483,3608,11997,10536,812,8915,637,8159,5299,9128,3512,8290,7068,7922,3036,4759,2163,3937,3755,11306,7739,4922,11932,424,5538,6228,11131,7778,11974,1097,2890,10027,2569,2250,2352,821,2550,11016,7769,136,617,3157,5889,9219,6855,120,4405,1825,9635,7214,10261,11393,2441,9562,11176,599,2085,11465,7233,6177,4801,9926,9010,4514,9455,11352,11670,6174,7950,9766,6896,11603,3213,8473,9873,2835,10422,3732,7961,1457,10857,8069,832,1628,3410,4900,10855,5111,9543,6325,7431,4083,3072,8847,9853,10122,5259,11413,6556,303,1465,3871,4873,5813,10017,6898,3311,5947,8637,5852,3856,928,4933,8530,1871,2184,5571,5879,3481,11597,9511,8153,35,2609,5963,8064,1080,12039,8444,3052,3813,11065,6736,8454};
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/randombytes.c b/crypt/liboqs/kex_rlwe_newhope/avx2/randombytes.c
new file mode 100644
index 0000000000000000000000000000000000000000..dba8ecadb2d47c0f6453ffc2da183072331fe4b5
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/randombytes.c
@@ -0,0 +1,35 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "randombytes.h"
+
+/* it's really stupid that there isn't a syscall for this */
+
+static int fd = -1;
+
+void randombytes(unsigned char *x,unsigned long long xlen)
+{
+  int i;
+
+  if (fd == -1) {
+    for (;;) {
+      fd = open("/dev/urandom",O_RDONLY);
+      if (fd != -1) break;
+      sleep(1);
+    }
+  }
+
+  while (xlen > 0) {
+    if (xlen < 1048576) i = xlen; else i = 1048576;
+
+    i = read(fd,x,i);
+    if (i < 1) {
+      sleep(1);
+      continue;
+    }
+
+    x += i;
+    xlen -= i;
+  }
+}
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/randombytes.h b/crypt/liboqs/kex_rlwe_newhope/avx2/randombytes.h
new file mode 100644
index 0000000000000000000000000000000000000000..7f2c4c0bd73d4aa79ecb6d02143cac0350189f5a
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/randombytes.h
@@ -0,0 +1,6 @@
+#ifndef RANDOMBYTES_H
+#define RANDOMBYTES_H
+
+void randombytes(unsigned char *x,unsigned long long xlen);
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/rec.s b/crypt/liboqs/kex_rlwe_newhope/avx2/rec.s
new file mode 100644
index 0000000000000000000000000000000000000000..d1cf925132dd5fb16d28e207695e59120e4341d3
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/rec.s
@@ -0,0 +1,752 @@
+
+# qhasm: int64 input_0
+
+# qhasm: int64 input_1
+
+# qhasm: int64 input_2
+
+# qhasm: int64 input_3
+
+# qhasm: int64 input_4
+
+# qhasm: int64 input_5
+
+# qhasm: stack64 input_6
+
+# qhasm: stack64 input_7
+
+# qhasm: int64 caller_r11
+
+# qhasm: int64 caller_r12
+
+# qhasm: int64 caller_r13
+
+# qhasm: int64 caller_r14
+
+# qhasm: int64 caller_r15
+
+# qhasm: int64 caller_rbx
+
+# qhasm: int64 caller_rbp
+
+# qhasm: reg256 tmp0
+
+# qhasm: reg256 tmp1
+
+# qhasm: reg256 tmp2
+
+# qhasm: reg256 tmp3
+
+# qhasm: reg256 c0
+
+# qhasm: reg256 c1
+
+# qhasm: reg256 c2
+
+# qhasm: reg256 c3
+
+# qhasm: reg256 b
+
+# qhasm: reg256 t
+
+# qhasm: reg256 d
+
+# qhasm: reg256 c
+
+# qhasm: reg256 qx8
+
+# qhasm: reg256 _1x8
+
+# qhasm: reg256 k
+
+# qhasm: stack256 pg
+
+# qhasm: int64 pgp
+
+# qhasm: int64 byte
+
+# qhasm: int64 key
+
+# qhasm: int64 ctr
+
+# qhasm: enter rec
+.p2align 5
+.global _rec
+.global rec
+_rec:
+rec:
+mov %rsp,%r11
+and $31,%r11
+add $32,%r11
+sub %r11,%rsp
+
+# qhasm: ctr = 0
+# asm 1: mov  $0,>ctr=int64#4
+# asm 2: mov  $0,>ctr=%rcx
+mov  $0,%rcx
+
+# qhasm: _1x8    = mem256[v1x8]
+# asm 1: vmovdqu v1x8,>_1x8=reg256#1
+# asm 2: vmovdqu v1x8,>_1x8=%ymm0
+vmovdqu v1x8,%ymm0
+
+# qhasm: qx8     = mem256[q8x]
+# asm 1: vmovdqu q8x,>qx8=reg256#2
+# asm 2: vmovdqu q8x,>qx8=%ymm1
+vmovdqu q8x,%ymm1
+
+# qhasm: pgp = &pg
+# asm 1: leaq <pg=stack256#1,>pgp=int64#5
+# asm 2: leaq <pg=0(%rsp),>pgp=%r8
+leaq 0(%rsp),%r8
+
+# qhasm: looptop:
+._looptop:
+
+# qhasm: ctr <<= 5
+# asm 1: shl  $5,<ctr=int64#4
+# asm 2: shl  $5,<ctr=%rcx
+shl  $5,%rcx
+
+# qhasm: c0 = mem256[input_2 + ctr + 0]
+# asm 1: vmovupd   0(<input_2=int64#3,<ctr=int64#4),>c0=reg256#3
+# asm 2: vmovupd   0(<input_2=%rdx,<ctr=%rcx),>c0=%ymm2
+vmovupd   0(%rdx,%rcx),%ymm2
+
+# qhasm: 8x c0 <<= 1
+# asm 1: vpslld $1,<c0=reg256#3,>c0=reg256#3
+# asm 2: vpslld $1,<c0=%ymm2,>c0=%ymm2
+vpslld $1,%ymm2,%ymm2
+
+# qhasm: c1 = mem256[input_2 + ctr + 1024]
+# asm 1: vmovupd   1024(<input_2=int64#3,<ctr=int64#4),>c1=reg256#4
+# asm 2: vmovupd   1024(<input_2=%rdx,<ctr=%rcx),>c1=%ymm3
+vmovupd   1024(%rdx,%rcx),%ymm3
+
+# qhasm: 8x c1 <<= 1
+# asm 1: vpslld $1,<c1=reg256#4,>c1=reg256#4
+# asm 2: vpslld $1,<c1=%ymm3,>c1=%ymm3
+vpslld $1,%ymm3,%ymm3
+
+# qhasm: c2 = mem256[input_2 + ctr + 2048]
+# asm 1: vmovupd   2048(<input_2=int64#3,<ctr=int64#4),>c2=reg256#5
+# asm 2: vmovupd   2048(<input_2=%rdx,<ctr=%rcx),>c2=%ymm4
+vmovupd   2048(%rdx,%rcx),%ymm4
+
+# qhasm: 8x c2 <<= 1
+# asm 1: vpslld $1,<c2=reg256#5,>c2=reg256#5
+# asm 2: vpslld $1,<c2=%ymm4,>c2=%ymm4
+vpslld $1,%ymm4,%ymm4
+
+# qhasm: c3 = mem256[input_2 + ctr + 3072]
+# asm 1: vmovupd   3072(<input_2=int64#3,<ctr=int64#4),>c3=reg256#6
+# asm 2: vmovupd   3072(<input_2=%rdx,<ctr=%rcx),>c3=%ymm5
+vmovupd   3072(%rdx,%rcx),%ymm5
+
+# qhasm: 8x c0 += c3
+# asm 1: vpaddd <c3=reg256#6,<c0=reg256#3,>c0=reg256#3
+# asm 2: vpaddd <c3=%ymm5,<c0=%ymm2,>c0=%ymm2
+vpaddd %ymm5,%ymm2,%ymm2
+
+# qhasm: 8x c1 += c3
+# asm 1: vpaddd <c3=reg256#6,<c1=reg256#4,>c1=reg256#4
+# asm 2: vpaddd <c3=%ymm5,<c1=%ymm3,>c1=%ymm3
+vpaddd %ymm5,%ymm3,%ymm3
+
+# qhasm: 8x c2 += c3
+# asm 1: vpaddd <c3=reg256#6,<c2=reg256#5,>c2=reg256#5
+# asm 2: vpaddd <c3=%ymm5,<c2=%ymm4,>c2=%ymm4
+vpaddd %ymm5,%ymm4,%ymm4
+
+# qhasm: 8x c0 *= qx8
+# asm 1: vpmulld <qx8=reg256#2,<c0=reg256#3,>c0=reg256#3
+# asm 2: vpmulld <qx8=%ymm1,<c0=%ymm2,>c0=%ymm2
+vpmulld %ymm1,%ymm2,%ymm2
+
+# qhasm: 8x c1 *= qx8
+# asm 1: vpmulld <qx8=reg256#2,<c1=reg256#4,>c1=reg256#4
+# asm 2: vpmulld <qx8=%ymm1,<c1=%ymm3,>c1=%ymm3
+vpmulld %ymm1,%ymm3,%ymm3
+
+# qhasm: 8x c2 *= qx8
+# asm 1: vpmulld <qx8=reg256#2,<c2=reg256#5,>c2=reg256#5
+# asm 2: vpmulld <qx8=%ymm1,<c2=%ymm4,>c2=%ymm4
+vpmulld %ymm1,%ymm4,%ymm4
+
+# qhasm: 8x c3 *= qx8
+# asm 1: vpmulld <qx8=reg256#2,<c3=reg256#6,>c3=reg256#6
+# asm 2: vpmulld <qx8=%ymm1,<c3=%ymm5,>c3=%ymm5
+vpmulld %ymm1,%ymm5,%ymm5
+
+# qhasm: tmp0 = mem256[input_1 + ctr + 0]
+# asm 1: vmovupd   0(<input_1=int64#2,<ctr=int64#4),>tmp0=reg256#7
+# asm 2: vmovupd   0(<input_1=%rsi,<ctr=%rcx),>tmp0=%ymm6
+vmovupd   0(%rsi,%rcx),%ymm6
+
+# qhasm: tmp1 = mem256[input_1 + ctr + 1024]
+# asm 1: vmovupd   1024(<input_1=int64#2,<ctr=int64#4),>tmp1=reg256#8
+# asm 2: vmovupd   1024(<input_1=%rsi,<ctr=%rcx),>tmp1=%ymm7
+vmovupd   1024(%rsi,%rcx),%ymm7
+
+# qhasm: tmp2 = mem256[input_1 + ctr + 2048]
+# asm 1: vmovupd   2048(<input_1=int64#2,<ctr=int64#4),>tmp2=reg256#9
+# asm 2: vmovupd   2048(<input_1=%rsi,<ctr=%rcx),>tmp2=%ymm8
+vmovupd   2048(%rsi,%rcx),%ymm8
+
+# qhasm: tmp3 = mem256[input_1 + ctr + 3072]
+# asm 1: vmovupd   3072(<input_1=int64#2,<ctr=int64#4),>tmp3=reg256#10
+# asm 2: vmovupd   3072(<input_1=%rsi,<ctr=%rcx),>tmp3=%ymm9
+vmovupd   3072(%rsi,%rcx),%ymm9
+
+# qhasm: (uint64) ctr >>= 5
+# asm 1: shr  $5,<ctr=int64#4
+# asm 2: shr  $5,<ctr=%rcx
+shr  $5,%rcx
+
+# qhasm: 8x tmp0 <<= 3
+# asm 1: vpslld $3,<tmp0=reg256#7,>tmp0=reg256#7
+# asm 2: vpslld $3,<tmp0=%ymm6,>tmp0=%ymm6
+vpslld $3,%ymm6,%ymm6
+
+# qhasm: 8x tmp1 <<= 3
+# asm 1: vpslld $3,<tmp1=reg256#8,>tmp1=reg256#8
+# asm 2: vpslld $3,<tmp1=%ymm7,>tmp1=%ymm7
+vpslld $3,%ymm7,%ymm7
+
+# qhasm: 8x tmp2 <<= 3
+# asm 1: vpslld $3,<tmp2=reg256#9,>tmp2=reg256#9
+# asm 2: vpslld $3,<tmp2=%ymm8,>tmp2=%ymm8
+vpslld $3,%ymm8,%ymm8
+
+# qhasm: 8x tmp3 <<= 3
+# asm 1: vpslld $3,<tmp3=reg256#10,>tmp3=reg256#10
+# asm 2: vpslld $3,<tmp3=%ymm9,>tmp3=%ymm9
+vpslld $3,%ymm9,%ymm9
+
+# qhasm: 8x qx8 <<= 4 
+# asm 1: vpslld $4,<qx8=reg256#2,>qx8=reg256#2
+# asm 2: vpslld $4,<qx8=%ymm1,>qx8=%ymm1
+vpslld $4,%ymm1,%ymm1
+
+# qhasm: 8x tmp0 += qx8
+# asm 1: vpaddd <qx8=reg256#2,<tmp0=reg256#7,>tmp0=reg256#7
+# asm 2: vpaddd <qx8=%ymm1,<tmp0=%ymm6,>tmp0=%ymm6
+vpaddd %ymm1,%ymm6,%ymm6
+
+# qhasm: 8x tmp1 += qx8
+# asm 1: vpaddd <qx8=reg256#2,<tmp1=reg256#8,>tmp1=reg256#8
+# asm 2: vpaddd <qx8=%ymm1,<tmp1=%ymm7,>tmp1=%ymm7
+vpaddd %ymm1,%ymm7,%ymm7
+
+# qhasm: 8x tmp2 += qx8
+# asm 1: vpaddd <qx8=reg256#2,<tmp2=reg256#9,>tmp2=reg256#9
+# asm 2: vpaddd <qx8=%ymm1,<tmp2=%ymm8,>tmp2=%ymm8
+vpaddd %ymm1,%ymm8,%ymm8
+
+# qhasm: 8x tmp3 += qx8
+# asm 1: vpaddd <qx8=reg256#2,<tmp3=reg256#10,>tmp3=reg256#10
+# asm 2: vpaddd <qx8=%ymm1,<tmp3=%ymm9,>tmp3=%ymm9
+vpaddd %ymm1,%ymm9,%ymm9
+
+# qhasm: 8x qx8 >>= 2 
+# asm 1: vpsrad $2,<qx8=reg256#2,>qx8=reg256#2
+# asm 2: vpsrad $2,<qx8=%ymm1,>qx8=%ymm1
+vpsrad $2,%ymm1,%ymm1
+
+# qhasm: 8x tmp0 -= c0
+# asm 1: vpsubd <c0=reg256#3,<tmp0=reg256#7,>tmp0=reg256#3
+# asm 2: vpsubd <c0=%ymm2,<tmp0=%ymm6,>tmp0=%ymm2
+vpsubd %ymm2,%ymm6,%ymm2
+
+# qhasm: 8x tmp1 -= c1
+# asm 1: vpsubd <c1=reg256#4,<tmp1=reg256#8,>tmp1=reg256#4
+# asm 2: vpsubd <c1=%ymm3,<tmp1=%ymm7,>tmp1=%ymm3
+vpsubd %ymm3,%ymm7,%ymm3
+
+# qhasm: 8x tmp2 -= c2
+# asm 1: vpsubd <c2=reg256#5,<tmp2=reg256#9,>tmp2=reg256#5
+# asm 2: vpsubd <c2=%ymm4,<tmp2=%ymm8,>tmp2=%ymm4
+vpsubd %ymm4,%ymm8,%ymm4
+
+# qhasm: 8x tmp3 -= c3
+# asm 1: vpsubd <c3=reg256#6,<tmp3=reg256#10,>tmp3=reg256#6
+# asm 2: vpsubd <c3=%ymm5,<tmp3=%ymm9,>tmp3=%ymm5
+vpsubd %ymm5,%ymm9,%ymm5
+
+# qhasm: 8x b = tmp0 * mem256[v2730x8]
+# asm 1: vpmulld v2730x8,<tmp0=reg256#3,>b=reg256#7
+# asm 2: vpmulld v2730x8,<tmp0=%ymm2,>b=%ymm6
+vpmulld v2730x8,%ymm2,%ymm6
+
+# qhasm: 8x t = b >> 27
+# asm 1: vpsrad $27,<b=reg256#7,>t=reg256#7
+# asm 2: vpsrad $27,<b=%ymm6,>t=%ymm6
+vpsrad $27,%ymm6,%ymm6
+
+# qhasm: 8x d = t * qx8
+# asm 1: vpmulld <t=reg256#7,<qx8=reg256#2,>d=reg256#8
+# asm 2: vpmulld <t=%ymm6,<qx8=%ymm1,>d=%ymm7
+vpmulld %ymm6,%ymm1,%ymm7
+
+# qhasm: 8x b = tmp0 - d
+# asm 1: vpsubd <d=reg256#8,<tmp0=reg256#3,>b=reg256#8
+# asm 2: vpsubd <d=%ymm7,<tmp0=%ymm2,>b=%ymm7
+vpsubd %ymm7,%ymm2,%ymm7
+
+# qhasm: 8x b += _1x8
+# asm 1: vpaddd <_1x8=reg256#1,<b=reg256#8,>b=reg256#8
+# asm 2: vpaddd <_1x8=%ymm0,<b=%ymm7,>b=%ymm7
+vpaddd %ymm0,%ymm7,%ymm7
+
+# qhasm: 8x b = qx8 - b
+# asm 1: vpsubd <b=reg256#8,<qx8=reg256#2,>b=reg256#8
+# asm 2: vpsubd <b=%ymm7,<qx8=%ymm1,>b=%ymm7
+vpsubd %ymm7,%ymm1,%ymm7
+
+# qhasm: 8x b >>= 31
+# asm 1: vpsrad $31,<b=reg256#8,>b=reg256#8
+# asm 2: vpsrad $31,<b=%ymm7,>b=%ymm7
+vpsrad $31,%ymm7,%ymm7
+
+# qhasm: 8x t -= b
+# asm 1: vpsubd <b=reg256#8,<t=reg256#7,>t=reg256#7
+# asm 2: vpsubd <b=%ymm7,<t=%ymm6,>t=%ymm6
+vpsubd %ymm7,%ymm6,%ymm6
+
+# qhasm:    d = t & _1x8
+# asm 1: vpand <t=reg256#7,<_1x8=reg256#1,>d=reg256#8
+# asm 2: vpand <t=%ymm6,<_1x8=%ymm0,>d=%ymm7
+vpand %ymm6,%ymm0,%ymm7
+
+# qhasm: 8x t = t >> 1 
+# asm 1: vpsrad $1,<t=reg256#7,>t=reg256#7
+# asm 2: vpsrad $1,<t=%ymm6,>t=%ymm6
+vpsrad $1,%ymm6,%ymm6
+
+# qhasm: 8x t += d
+# asm 1: vpaddd <d=reg256#8,<t=reg256#7,>t=reg256#7
+# asm 2: vpaddd <d=%ymm7,<t=%ymm6,>t=%ymm6
+vpaddd %ymm7,%ymm6,%ymm6
+
+# qhasm: 8x t *= qx8
+# asm 1: vpmulld <qx8=reg256#2,<t=reg256#7,>t=reg256#7
+# asm 2: vpmulld <qx8=%ymm1,<t=%ymm6,>t=%ymm6
+vpmulld %ymm1,%ymm6,%ymm6
+
+# qhasm: 8x t <<= 1 
+# asm 1: vpslld $1,<t=reg256#7,>t=reg256#7
+# asm 2: vpslld $1,<t=%ymm6,>t=%ymm6
+vpslld $1,%ymm6,%ymm6
+
+# qhasm: 8x t -= tmp0
+# asm 1: vpsubd <tmp0=reg256#3,<t=reg256#7,>t=reg256#3
+# asm 2: vpsubd <tmp0=%ymm2,<t=%ymm6,>t=%ymm2
+vpsubd %ymm2,%ymm6,%ymm2
+
+# qhasm: 8x k = abs(t)
+# asm 1: vpabsd <t=reg256#3,>k=reg256#3
+# asm 2: vpabsd <t=%ymm2,>k=%ymm2
+vpabsd %ymm2,%ymm2
+
+# qhasm: 8x b = tmp1 * mem256[v2730x8]
+# asm 1: vpmulld v2730x8,<tmp1=reg256#4,>b=reg256#7
+# asm 2: vpmulld v2730x8,<tmp1=%ymm3,>b=%ymm6
+vpmulld v2730x8,%ymm3,%ymm6
+
+# qhasm: 8x t = b >> 27
+# asm 1: vpsrad $27,<b=reg256#7,>t=reg256#7
+# asm 2: vpsrad $27,<b=%ymm6,>t=%ymm6
+vpsrad $27,%ymm6,%ymm6
+
+# qhasm: 8x d = t * qx8
+# asm 1: vpmulld <t=reg256#7,<qx8=reg256#2,>d=reg256#8
+# asm 2: vpmulld <t=%ymm6,<qx8=%ymm1,>d=%ymm7
+vpmulld %ymm6,%ymm1,%ymm7
+
+# qhasm: 8x b = tmp1 - d
+# asm 1: vpsubd <d=reg256#8,<tmp1=reg256#4,>b=reg256#8
+# asm 2: vpsubd <d=%ymm7,<tmp1=%ymm3,>b=%ymm7
+vpsubd %ymm7,%ymm3,%ymm7
+
+# qhasm: 8x b += _1x8
+# asm 1: vpaddd <_1x8=reg256#1,<b=reg256#8,>b=reg256#8
+# asm 2: vpaddd <_1x8=%ymm0,<b=%ymm7,>b=%ymm7
+vpaddd %ymm0,%ymm7,%ymm7
+
+# qhasm: 8x b = qx8 - b
+# asm 1: vpsubd <b=reg256#8,<qx8=reg256#2,>b=reg256#8
+# asm 2: vpsubd <b=%ymm7,<qx8=%ymm1,>b=%ymm7
+vpsubd %ymm7,%ymm1,%ymm7
+
+# qhasm: 8x b >>= 31
+# asm 1: vpsrad $31,<b=reg256#8,>b=reg256#8
+# asm 2: vpsrad $31,<b=%ymm7,>b=%ymm7
+vpsrad $31,%ymm7,%ymm7
+
+# qhasm: 8x t -= b
+# asm 1: vpsubd <b=reg256#8,<t=reg256#7,>t=reg256#7
+# asm 2: vpsubd <b=%ymm7,<t=%ymm6,>t=%ymm6
+vpsubd %ymm7,%ymm6,%ymm6
+
+# qhasm:    d = t & _1x8
+# asm 1: vpand <t=reg256#7,<_1x8=reg256#1,>d=reg256#8
+# asm 2: vpand <t=%ymm6,<_1x8=%ymm0,>d=%ymm7
+vpand %ymm6,%ymm0,%ymm7
+
+# qhasm: 8x t = t >> 1 
+# asm 1: vpsrad $1,<t=reg256#7,>t=reg256#7
+# asm 2: vpsrad $1,<t=%ymm6,>t=%ymm6
+vpsrad $1,%ymm6,%ymm6
+
+# qhasm: 8x t += d
+# asm 1: vpaddd <d=reg256#8,<t=reg256#7,>t=reg256#7
+# asm 2: vpaddd <d=%ymm7,<t=%ymm6,>t=%ymm6
+vpaddd %ymm7,%ymm6,%ymm6
+
+# qhasm: 8x t *= qx8
+# asm 1: vpmulld <qx8=reg256#2,<t=reg256#7,>t=reg256#7
+# asm 2: vpmulld <qx8=%ymm1,<t=%ymm6,>t=%ymm6
+vpmulld %ymm1,%ymm6,%ymm6
+
+# qhasm: 8x t <<= 1 
+# asm 1: vpslld $1,<t=reg256#7,>t=reg256#7
+# asm 2: vpslld $1,<t=%ymm6,>t=%ymm6
+vpslld $1,%ymm6,%ymm6
+
+# qhasm: 8x t -= tmp1
+# asm 1: vpsubd <tmp1=reg256#4,<t=reg256#7,>t=reg256#4
+# asm 2: vpsubd <tmp1=%ymm3,<t=%ymm6,>t=%ymm3
+vpsubd %ymm3,%ymm6,%ymm3
+
+# qhasm: 8x t = abs(t)
+# asm 1: vpabsd <t=reg256#4,>t=reg256#4
+# asm 2: vpabsd <t=%ymm3,>t=%ymm3
+vpabsd %ymm3,%ymm3
+
+# qhasm: 8x k += t
+# asm 1: vpaddd <t=reg256#4,<k=reg256#3,>k=reg256#3
+# asm 2: vpaddd <t=%ymm3,<k=%ymm2,>k=%ymm2
+vpaddd %ymm3,%ymm2,%ymm2
+
+# qhasm: 8x b = tmp2 * mem256[v2730x8]
+# asm 1: vpmulld v2730x8,<tmp2=reg256#5,>b=reg256#4
+# asm 2: vpmulld v2730x8,<tmp2=%ymm4,>b=%ymm3
+vpmulld v2730x8,%ymm4,%ymm3
+
+# qhasm: 8x t = b >> 27
+# asm 1: vpsrad $27,<b=reg256#4,>t=reg256#4
+# asm 2: vpsrad $27,<b=%ymm3,>t=%ymm3
+vpsrad $27,%ymm3,%ymm3
+
+# qhasm: 8x d = t * qx8
+# asm 1: vpmulld <t=reg256#4,<qx8=reg256#2,>d=reg256#7
+# asm 2: vpmulld <t=%ymm3,<qx8=%ymm1,>d=%ymm6
+vpmulld %ymm3,%ymm1,%ymm6
+
+# qhasm: 8x b = tmp2 - d
+# asm 1: vpsubd <d=reg256#7,<tmp2=reg256#5,>b=reg256#7
+# asm 2: vpsubd <d=%ymm6,<tmp2=%ymm4,>b=%ymm6
+vpsubd %ymm6,%ymm4,%ymm6
+
+# qhasm: 8x b += _1x8
+# asm 1: vpaddd <_1x8=reg256#1,<b=reg256#7,>b=reg256#7
+# asm 2: vpaddd <_1x8=%ymm0,<b=%ymm6,>b=%ymm6
+vpaddd %ymm0,%ymm6,%ymm6
+
+# qhasm: 8x b = qx8 - b
+# asm 1: vpsubd <b=reg256#7,<qx8=reg256#2,>b=reg256#7
+# asm 2: vpsubd <b=%ymm6,<qx8=%ymm1,>b=%ymm6
+vpsubd %ymm6,%ymm1,%ymm6
+
+# qhasm: 8x b >>= 31
+# asm 1: vpsrad $31,<b=reg256#7,>b=reg256#7
+# asm 2: vpsrad $31,<b=%ymm6,>b=%ymm6
+vpsrad $31,%ymm6,%ymm6
+
+# qhasm: 8x t -= b
+# asm 1: vpsubd <b=reg256#7,<t=reg256#4,>t=reg256#4
+# asm 2: vpsubd <b=%ymm6,<t=%ymm3,>t=%ymm3
+vpsubd %ymm6,%ymm3,%ymm3
+
+# qhasm:    d = t & _1x8
+# asm 1: vpand <t=reg256#4,<_1x8=reg256#1,>d=reg256#7
+# asm 2: vpand <t=%ymm3,<_1x8=%ymm0,>d=%ymm6
+vpand %ymm3,%ymm0,%ymm6
+
+# qhasm: 8x t = t >> 1 
+# asm 1: vpsrad $1,<t=reg256#4,>t=reg256#4
+# asm 2: vpsrad $1,<t=%ymm3,>t=%ymm3
+vpsrad $1,%ymm3,%ymm3
+
+# qhasm: 8x t += d
+# asm 1: vpaddd <d=reg256#7,<t=reg256#4,>t=reg256#4
+# asm 2: vpaddd <d=%ymm6,<t=%ymm3,>t=%ymm3
+vpaddd %ymm6,%ymm3,%ymm3
+
+# qhasm: 8x t *= qx8
+# asm 1: vpmulld <qx8=reg256#2,<t=reg256#4,>t=reg256#4
+# asm 2: vpmulld <qx8=%ymm1,<t=%ymm3,>t=%ymm3
+vpmulld %ymm1,%ymm3,%ymm3
+
+# qhasm: 8x t <<= 1 
+# asm 1: vpslld $1,<t=reg256#4,>t=reg256#4
+# asm 2: vpslld $1,<t=%ymm3,>t=%ymm3
+vpslld $1,%ymm3,%ymm3
+
+# qhasm: 8x t -= tmp2
+# asm 1: vpsubd <tmp2=reg256#5,<t=reg256#4,>t=reg256#4
+# asm 2: vpsubd <tmp2=%ymm4,<t=%ymm3,>t=%ymm3
+vpsubd %ymm4,%ymm3,%ymm3
+
+# qhasm: 8x t = abs(t)
+# asm 1: vpabsd <t=reg256#4,>t=reg256#4
+# asm 2: vpabsd <t=%ymm3,>t=%ymm3
+vpabsd %ymm3,%ymm3
+
+# qhasm: 8x k += t
+# asm 1: vpaddd <t=reg256#4,<k=reg256#3,>k=reg256#3
+# asm 2: vpaddd <t=%ymm3,<k=%ymm2,>k=%ymm2
+vpaddd %ymm3,%ymm2,%ymm2
+
+# qhasm: 8x b = tmp3 * mem256[v2730x8]
+# asm 1: vpmulld v2730x8,<tmp3=reg256#6,>b=reg256#4
+# asm 2: vpmulld v2730x8,<tmp3=%ymm5,>b=%ymm3
+vpmulld v2730x8,%ymm5,%ymm3
+
+# qhasm: 8x t = b >> 27
+# asm 1: vpsrad $27,<b=reg256#4,>t=reg256#4
+# asm 2: vpsrad $27,<b=%ymm3,>t=%ymm3
+vpsrad $27,%ymm3,%ymm3
+
+# qhasm: 8x d = t * qx8
+# asm 1: vpmulld <t=reg256#4,<qx8=reg256#2,>d=reg256#5
+# asm 2: vpmulld <t=%ymm3,<qx8=%ymm1,>d=%ymm4
+vpmulld %ymm3,%ymm1,%ymm4
+
+# qhasm: 8x b = tmp3 - d
+# asm 1: vpsubd <d=reg256#5,<tmp3=reg256#6,>b=reg256#5
+# asm 2: vpsubd <d=%ymm4,<tmp3=%ymm5,>b=%ymm4
+vpsubd %ymm4,%ymm5,%ymm4
+
+# qhasm: 8x b += _1x8
+# asm 1: vpaddd <_1x8=reg256#1,<b=reg256#5,>b=reg256#5
+# asm 2: vpaddd <_1x8=%ymm0,<b=%ymm4,>b=%ymm4
+vpaddd %ymm0,%ymm4,%ymm4
+
+# qhasm: 8x b = qx8 - b
+# asm 1: vpsubd <b=reg256#5,<qx8=reg256#2,>b=reg256#5
+# asm 2: vpsubd <b=%ymm4,<qx8=%ymm1,>b=%ymm4
+vpsubd %ymm4,%ymm1,%ymm4
+
+# qhasm: 8x b >>= 31
+# asm 1: vpsrad $31,<b=reg256#5,>b=reg256#5
+# asm 2: vpsrad $31,<b=%ymm4,>b=%ymm4
+vpsrad $31,%ymm4,%ymm4
+
+# qhasm: 8x t -= b
+# asm 1: vpsubd <b=reg256#5,<t=reg256#4,>t=reg256#4
+# asm 2: vpsubd <b=%ymm4,<t=%ymm3,>t=%ymm3
+vpsubd %ymm4,%ymm3,%ymm3
+
+# qhasm:    d = t & _1x8
+# asm 1: vpand <t=reg256#4,<_1x8=reg256#1,>d=reg256#5
+# asm 2: vpand <t=%ymm3,<_1x8=%ymm0,>d=%ymm4
+vpand %ymm3,%ymm0,%ymm4
+
+# qhasm: 8x t = t >> 1 
+# asm 1: vpsrad $1,<t=reg256#4,>t=reg256#4
+# asm 2: vpsrad $1,<t=%ymm3,>t=%ymm3
+vpsrad $1,%ymm3,%ymm3
+
+# qhasm: 8x t += d
+# asm 1: vpaddd <d=reg256#5,<t=reg256#4,>t=reg256#4
+# asm 2: vpaddd <d=%ymm4,<t=%ymm3,>t=%ymm3
+vpaddd %ymm4,%ymm3,%ymm3
+
+# qhasm: 8x t *= qx8
+# asm 1: vpmulld <qx8=reg256#2,<t=reg256#4,>t=reg256#4
+# asm 2: vpmulld <qx8=%ymm1,<t=%ymm3,>t=%ymm3
+vpmulld %ymm1,%ymm3,%ymm3
+
+# qhasm: 8x t <<= 1 
+# asm 1: vpslld $1,<t=reg256#4,>t=reg256#4
+# asm 2: vpslld $1,<t=%ymm3,>t=%ymm3
+vpslld $1,%ymm3,%ymm3
+
+# qhasm: 8x t -= tmp3
+# asm 1: vpsubd <tmp3=reg256#6,<t=reg256#4,>t=reg256#4
+# asm 2: vpsubd <tmp3=%ymm5,<t=%ymm3,>t=%ymm3
+vpsubd %ymm5,%ymm3,%ymm3
+
+# qhasm: 8x t = abs(t)
+# asm 1: vpabsd <t=reg256#4,>t=reg256#4
+# asm 2: vpabsd <t=%ymm3,>t=%ymm3
+vpabsd %ymm3,%ymm3
+
+# qhasm: 8x k += t
+# asm 1: vpaddd <t=reg256#4,<k=reg256#3,>k=reg256#3
+# asm 2: vpaddd <t=%ymm3,<k=%ymm2,>k=%ymm2
+vpaddd %ymm3,%ymm2,%ymm2
+
+# qhasm: 8x qx8 <<= 1 
+# asm 1: vpslld $1,<qx8=reg256#2,>qx8=reg256#2
+# asm 2: vpslld $1,<qx8=%ymm1,>qx8=%ymm1
+vpslld $1,%ymm1,%ymm1
+
+# qhasm: 8x k -= qx8
+# asm 1: vpsubd <qx8=reg256#2,<k=reg256#3,>k=reg256#3
+# asm 2: vpsubd <qx8=%ymm1,<k=%ymm2,>k=%ymm2
+vpsubd %ymm1,%ymm2,%ymm2
+
+# qhasm: 8x k >>= 31
+# asm 1: vpsrad $31,<k=reg256#3,>k=reg256#3
+# asm 2: vpsrad $31,<k=%ymm2,>k=%ymm2
+vpsrad $31,%ymm2,%ymm2
+
+# qhasm:    k &= _1x8
+# asm 1: vpand <_1x8=reg256#1,<k=reg256#3,<k=reg256#3
+# asm 2: vpand <_1x8=%ymm0,<k=%ymm2,<k=%ymm2
+vpand %ymm0,%ymm2,%ymm2
+
+# qhasm: pg = k
+# asm 1: vmovapd <k=reg256#3,>pg=stack256#1
+# asm 2: vmovapd <k=%ymm2,>pg=0(%rsp)
+vmovapd %ymm2,0(%rsp)
+
+# qhasm: key = *(uint32 *)(pgp + 28)
+# asm 1: movl   28(<pgp=int64#5),>key=int64#6d
+# asm 2: movl   28(<pgp=%r8),>key=%r9d
+movl   28(%r8),%r9d
+
+# qhasm: key <<= 1
+# asm 1: shl  $1,<key=int64#6
+# asm 2: shl  $1,<key=%r9
+shl  $1,%r9
+
+# qhasm: byte = *(uint32 *)(pgp + 24)
+# asm 1: movl   24(<pgp=int64#5),>byte=int64#7d
+# asm 2: movl   24(<pgp=%r8),>byte=%eax
+movl   24(%r8),%eax
+
+# qhasm: key |= byte
+# asm 1: or   <byte=int64#7,<key=int64#6
+# asm 2: or   <byte=%rax,<key=%r9
+or   %rax,%r9
+
+# qhasm: key <<= 1
+# asm 1: shl  $1,<key=int64#6
+# asm 2: shl  $1,<key=%r9
+shl  $1,%r9
+
+# qhasm: byte = *(uint32 *)(pgp + 20)
+# asm 1: movl   20(<pgp=int64#5),>byte=int64#7d
+# asm 2: movl   20(<pgp=%r8),>byte=%eax
+movl   20(%r8),%eax
+
+# qhasm: key |= byte
+# asm 1: or   <byte=int64#7,<key=int64#6
+# asm 2: or   <byte=%rax,<key=%r9
+or   %rax,%r9
+
+# qhasm: key <<= 1
+# asm 1: shl  $1,<key=int64#6
+# asm 2: shl  $1,<key=%r9
+shl  $1,%r9
+
+# qhasm: byte = *(uint32 *)(pgp + 16)
+# asm 1: movl   16(<pgp=int64#5),>byte=int64#7d
+# asm 2: movl   16(<pgp=%r8),>byte=%eax
+movl   16(%r8),%eax
+
+# qhasm: key |= byte
+# asm 1: or   <byte=int64#7,<key=int64#6
+# asm 2: or   <byte=%rax,<key=%r9
+or   %rax,%r9
+
+# qhasm: key <<= 1
+# asm 1: shl  $1,<key=int64#6
+# asm 2: shl  $1,<key=%r9
+shl  $1,%r9
+
+# qhasm: byte = *(uint32 *)(pgp + 12)
+# asm 1: movl   12(<pgp=int64#5),>byte=int64#7d
+# asm 2: movl   12(<pgp=%r8),>byte=%eax
+movl   12(%r8),%eax
+
+# qhasm: key |= byte
+# asm 1: or   <byte=int64#7,<key=int64#6
+# asm 2: or   <byte=%rax,<key=%r9
+or   %rax,%r9
+
+# qhasm: key <<= 1
+# asm 1: shl  $1,<key=int64#6
+# asm 2: shl  $1,<key=%r9
+shl  $1,%r9
+
+# qhasm: byte = *(uint32 *)(pgp +  8)
+# asm 1: movl   8(<pgp=int64#5),>byte=int64#7d
+# asm 2: movl   8(<pgp=%r8),>byte=%eax
+movl   8(%r8),%eax
+
+# qhasm: key |= byte
+# asm 1: or   <byte=int64#7,<key=int64#6
+# asm 2: or   <byte=%rax,<key=%r9
+or   %rax,%r9
+
+# qhasm: key <<= 1
+# asm 1: shl  $1,<key=int64#6
+# asm 2: shl  $1,<key=%r9
+shl  $1,%r9
+
+# qhasm: byte = *(uint32 *)(pgp +  4)
+# asm 1: movl   4(<pgp=int64#5),>byte=int64#7d
+# asm 2: movl   4(<pgp=%r8),>byte=%eax
+movl   4(%r8),%eax
+
+# qhasm: key |= byte
+# asm 1: or   <byte=int64#7,<key=int64#6
+# asm 2: or   <byte=%rax,<key=%r9
+or   %rax,%r9
+
+# qhasm: key <<= 1
+# asm 1: shl  $1,<key=int64#6
+# asm 2: shl  $1,<key=%r9
+shl  $1,%r9
+
+# qhasm: byte = *(uint32 *)(pgp +  0)
+# asm 1: movl   0(<pgp=int64#5),>byte=int64#7d
+# asm 2: movl   0(<pgp=%r8),>byte=%eax
+movl   0(%r8),%eax
+
+# qhasm: key |= byte
+# asm 1: or   <byte=int64#7,<key=int64#6
+# asm 2: or   <byte=%rax,<key=%r9
+or   %rax,%r9
+
+# qhasm: mem8[input_0 + ctr + 0] = key
+# asm 1: movb   <key=int64#6b,0(<input_0=int64#1,<ctr=int64#4)
+# asm 2: movb   <key=%r9b,0(<input_0=%rdi,<ctr=%rcx)
+movb   %r9b,0(%rdi,%rcx)
+
+# qhasm: 8x qx8 >>= 3 
+# asm 1: vpsrad $3,<qx8=reg256#2,>qx8=reg256#2
+# asm 2: vpsrad $3,<qx8=%ymm1,>qx8=%ymm1
+vpsrad $3,%ymm1,%ymm1
+
+# qhasm: ctr += 1
+# asm 1: add  $1,<ctr=int64#4
+# asm 2: add  $1,<ctr=%rcx
+add  $1,%rcx
+
+# qhasm: unsigned<? ctr - 32
+# asm 1: cmp  $32,<ctr=int64#4
+# asm 2: cmp  $32,<ctr=%rcx
+cmp  $32,%rcx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto looptop if unsigned<
+jb ._looptop
+
+# qhasm: return
+add %r11,%rsp
+ret
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/test/speed.c b/crypt/liboqs/kex_rlwe_newhope/avx2/test/speed.c
new file mode 100644
index 0000000000000000000000000000000000000000..3b71b3aa3cc8736bc5f701549ae9a9d718af6100
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/test/speed.c
@@ -0,0 +1,127 @@
+#include "../newhope.h"
+#include "../poly.h"
+#include "../error_correction.h"
+#include "../cpucycles.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+#define NTESTS 1000
+
+static int cmp_llu(const void *a, const void*b)
+{
+  if(*(unsigned long long *)a < *(unsigned long long *)b) return -1;
+  if(*(unsigned long long *)a > *(unsigned long long *)b) return 1;
+  return 0;
+}
+
+static unsigned long long median(unsigned long long *l, size_t llen)
+{
+  qsort(l,llen,sizeof(unsigned long long),cmp_llu);
+
+  if(llen%2) return l[llen/2];
+  else return (l[llen/2-1]+l[llen/2])/2;
+}
+
+static unsigned long long average(unsigned long long *t, size_t tlen)
+{
+  unsigned long long acc=0;
+  size_t i;
+  for(i=0;i<tlen;i++)
+    acc += t[i];
+  return acc/(tlen);
+}
+
+static void print_results(const char *s, unsigned long long *t, size_t tlen)
+{
+  size_t i;
+  printf("%s", s);
+  for(i=0;i<tlen-1;i++)
+  {
+    t[i] = t[i+1] - t[i];
+  //  printf("%llu ", t[i]);
+  }
+  printf("\n");
+  printf("median: %llu\n", median(t, tlen));
+  printf("average: %llu\n", average(t, tlen-1));
+  printf("\n");
+}
+
+
+unsigned long long t[NTESTS];
+
+int main()
+{
+  poly sk_a;
+  unsigned char key_a[32], key_b[32];
+  unsigned char senda[NTESTS*NEWHOPE_SENDABYTES];
+  unsigned char sendb[NTESTS*NEWHOPE_SENDBBYTES];
+  unsigned char seed[NEWHOPE_SEEDBYTES];
+  int i;
+
+  for(i=0; i<NTESTS; i++)
+  {
+    t[i] = cpucycles();
+    randombytes(seed, NEWHOPE_SEEDBYTES);
+    poly_uniform(&sk_a, seed);
+  }
+  print_results("poly_uniform: ", t, NTESTS);
+
+  for(i=0; i<NTESTS; i++)
+  {
+    t[i] = cpucycles();
+    poly_ntt(&sk_a);
+  }
+  print_results("poly_ntt: ", t, NTESTS);
+
+  for(i=0; i<NTESTS; i++)
+  {
+    t[i] = cpucycles();
+    poly_invntt(&sk_a);
+  }
+  print_results("poly_invntt: ", t, NTESTS);
+
+  for(i=0; i<NTESTS; i++)
+  {
+    t[i] = cpucycles();
+    poly_getnoise(&sk_a,seed,0);
+  }
+  print_results("poly_getnoise: ", t, NTESTS);
+
+  for(i=0; i<NTESTS; i++)
+  {
+    t[i] = cpucycles();
+    helprec(&sk_a, &sk_a, seed, 0);
+  }
+  print_results("helprec: ", t, NTESTS);
+
+  for(i=0; i<NTESTS; i++)
+  {
+    t[i] = cpucycles();
+    rec(key_a, &sk_a, &sk_a);
+  }
+  print_results("rec: ", t, NTESTS);
+
+  for(i=0; i<NTESTS; i++)
+  {
+    t[i] = cpucycles();
+    newhope_keygen(senda+i*NEWHOPE_SENDABYTES, &sk_a);
+  }
+  print_results("newhope_keygen: ", t, NTESTS);
+
+  for(i=0; i<NTESTS; i++)
+  {
+    t[i] = cpucycles();
+    newhope_sharedb(key_b, sendb+i*NEWHOPE_SENDBBYTES, senda+i*NEWHOPE_SENDABYTES);
+  }
+  print_results("newhope_sharedb: ", t, NTESTS);
+
+  for(i=0; i<NTESTS; i++)
+  {
+    t[i] = cpucycles();
+    newhope_shareda(key_a, &sk_a, sendb+i*NEWHOPE_SENDBBYTES);
+  }
+  print_results("newhope_shareda: ", t, NTESTS);
+    
+  
+  return 0;
+}
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/test/test_newhope.c b/crypt/liboqs/kex_rlwe_newhope/avx2/test/test_newhope.c
new file mode 100644
index 0000000000000000000000000000000000000000..8660357ceb6ea9f9aa2c0604884458d9689e5bbe
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/test/test_newhope.c
@@ -0,0 +1,121 @@
+
+#include "../newhope.h"
+#include "../poly.h"
+#include "../randombytes.h"
+#include "../crypto_stream_chacha20.h"
+#include "../error_correction.h"
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+
+#define NTESTS 100000
+
+int compare_keys(poly *a, poly *b){
+
+  int i;
+
+  for(i=0; i<256; i++){
+    if (a->coeffs[i] != b->coeffs[i]){
+      return -1;
+    }
+  }
+  return 0;
+}
+
+
+int test_keys(){
+  poly sk_a;
+  unsigned char key_a[32], key_b[32];
+  unsigned char senda[NEWHOPE_SENDABYTES];
+  unsigned char sendb[NEWHOPE_SENDBBYTES];
+  int i;
+
+
+
+  for(i=0; i<NTESTS; i++)
+  {
+    //Alice generates a public key
+    newhope_keygen(senda, &sk_a);
+
+    //Bob derives a secret key and creates a response
+    newhope_sharedb(key_b, sendb, senda);
+  
+    //Alice uses Bobs response to get her secre key
+    newhope_shareda(key_a, &sk_a, sendb);
+
+    if(memcmp(key_a, key_b, 32))
+      printf("ERROR keys\n");
+  }
+
+  return 0;
+}
+
+int test_invalid_sk_a()
+{
+  poly sk_a;
+  unsigned char key_a[32], key_b[32];
+  unsigned char senda[NEWHOPE_SENDABYTES];
+  unsigned char sendb[NEWHOPE_SENDBBYTES];
+  unsigned char noiseseed[32];
+  int i;
+  
+  randombytes(noiseseed,32);
+
+  for(i=0; i<NTESTS; i++)
+  {
+    //Alice generates a public key
+    newhope_keygen(senda, &sk_a);
+
+    //Bob derives a secret key and creates a response
+    newhope_sharedb(key_b, sendb, senda);
+  
+    //Overwrite the secret key
+    poly_getnoise(&sk_a,noiseseed,i);
+
+    //Alice uses Bobs response to get her secre key
+    newhope_shareda(key_a, &sk_a, sendb);
+
+    if(!memcmp(key_a, key_b, 32))
+      printf("ERROR invalid sk_a\n");
+  }
+  return 0;
+}
+
+
+int test_invalid_ciphertext()
+{
+  poly sk_a;
+  unsigned char key_a[32], key_b[32];
+  unsigned char senda[NEWHOPE_SENDABYTES];
+  unsigned char sendb[NEWHOPE_SENDBBYTES];
+  int i;
+
+  for(i=0; i<10; i++)
+  {
+    //Alice generates a public key
+    newhope_keygen(senda, &sk_a);
+
+    //Bob derives a secret key and creates a response
+    newhope_sharedb(key_b, sendb, senda);
+
+    //Change some byte in the "ciphertext"
+    randombytes(sendb+42,1);
+  
+    //Alice uses Bobs response to get her secre key
+    newhope_shareda(key_a, &sk_a, sendb);
+
+    if(!memcmp(key_a, key_b, 32))
+      printf("ERROR invalid sendb\n");
+  }
+
+  return 0;
+}
+
+
+int main(){
+
+  test_keys();
+  test_invalid_sk_a();
+  test_invalid_ciphertext();
+  return 0;
+}
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/test/test_statistical.c b/crypt/liboqs/kex_rlwe_newhope/avx2/test/test_statistical.c
new file mode 100644
index 0000000000000000000000000000000000000000..3bc5a9d970712b92c63f2c97d5ce20c8106ce231
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/test/test_statistical.c
@@ -0,0 +1,63 @@
+
+#include "../newhope.h"
+#include "../poly.h"
+#include "../randombytes.h"
+#include "../crypto_stream_chacha20.h"
+#include "../error_correction.h"
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+
+#define NRUNS 10000000UL
+
+int hamming[256] = {
+0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 
+1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 
+1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 
+2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 
+2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 
+1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 
+2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 
+2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
+3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 
+3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 
+4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
+
+
+static int hamming32(const unsigned char *k)
+{
+  int i;
+  int r = 0;
+  for(i=0;i<32;i++)
+    r += hamming[k[i]];
+  return r;
+}
+
+int main()
+{
+  poly sk_a;
+  unsigned char key_b[32];
+  unsigned char senda[NEWHOPE_SENDABYTES];
+  unsigned char sendb[NEWHOPE_SENDBBYTES];
+  unsigned long i;
+  long t = 0;
+
+  for(i=0;i<NRUNS;i++)
+  {
+    newhope_keygen(senda, &sk_a);
+    newhope_sharedb(key_b, sendb, senda);
+
+    t += hamming32(key_b);
+  }
+
+  printf("ones:   %ld\n",t);
+  printf("zeroes: %ld\n",256*NRUNS-t);
+  printf("diff:   %ld\n",256*NRUNS-2*t);
+
+  return 0;
+}
diff --git a/crypt/liboqs/kex_rlwe_newhope/avx2/test/testvectors.c b/crypt/liboqs/kex_rlwe_newhope/avx2/test/testvectors.c
new file mode 100644
index 0000000000000000000000000000000000000000..a72b5a44d22ce6af1887f622bf90290b70ebff2a
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/avx2/test/testvectors.c
@@ -0,0 +1,95 @@
+/* Deterministic randombytes by Daniel J. Bernstein */
+/* taken from SUPERCOP (https://bench.cr.yp.to)     */
+
+#include "../newhope.h"
+#include "../poly.h"
+#include "../randombytes.h"
+#include "../crypto_stream_chacha20.h"
+#include "../error_correction.h"
+#include <math.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+
+#define NTESTS 1000
+
+
+typedef uint32_t uint32;
+
+static uint32 seed[32] = { 3,1,4,1,5,9,2,6,5,3,5,8,9,7,9,3,2,3,8,4,6,2,6,4,3,3,8,3,2,7,9,5 } ;
+static uint32 in[12];
+static uint32 out[8];
+static int outleft = 0;
+
+#define ROTATE(x,b) (((x) << (b)) | ((x) >> (32 - (b))))
+#define MUSH(i,b) x = t[i] += (((x ^ seed[i]) + sum) ^ ROTATE(x,b));
+
+static void surf(void)
+{
+  uint32 t[12]; uint32 x; uint32 sum = 0;
+  int r; int i; int loop;
+
+  for (i = 0;i < 12;++i) t[i] = in[i] ^ seed[12 + i];
+  for (i = 0;i < 8;++i) out[i] = seed[24 + i];
+  x = t[11];
+  for (loop = 0;loop < 2;++loop) {
+    for (r = 0;r < 16;++r) {
+      sum += 0x9e3779b9;
+      MUSH(0,5) MUSH(1,7) MUSH(2,9) MUSH(3,13)
+      MUSH(4,5) MUSH(5,7) MUSH(6,9) MUSH(7,13)
+      MUSH(8,5) MUSH(9,7) MUSH(10,9) MUSH(11,13)
+    }
+    for (i = 0;i < 8;++i) out[i] ^= t[i + 4];
+  }
+}
+
+void randombytes(unsigned char *x,unsigned long long xlen)
+{
+  while (xlen > 0) {
+    if (!outleft) {
+      if (!++in[0]) if (!++in[1]) if (!++in[2]) ++in[3];
+      surf();
+      outleft = 8;
+    }
+    *x = out[--outleft];
+    printf("%02x", *x);
+    ++x;
+    --xlen;
+  }
+  printf("\n");
+}
+
+
+
+int main(void)
+{
+  poly sk_a;
+  unsigned char key_a[32], key_b[32];
+  unsigned char senda[NEWHOPE_SENDABYTES];
+  unsigned char sendb[NEWHOPE_SENDBBYTES];
+  int i,j;
+
+  for(i=0;i<NTESTS;i++)
+  {
+    newhope_keygen(senda, &sk_a);
+    for(j=0;j<NEWHOPE_SENDABYTES;j++)
+      printf("%02x",senda[j]);
+    printf("\n");
+
+    newhope_sharedb(key_b, sendb, senda);
+    for(j=0;j<NEWHOPE_SENDBBYTES;j++)
+      printf("%02x",sendb[j]);
+    printf("\n");
+
+    newhope_shareda(key_a, &sk_a, sendb);
+    for(j=0;j<32;j++)
+      printf("%02x",key_a[j]);
+    printf("\n");
+    for(j=0;j<32;j++)
+      printf("%02x",key_b[j]);
+    printf("\n");
+
+  }
+
+  return 0;
+}
diff --git a/crypt/liboqs/kex_rlwe_newhope/kex_rlwe_newhope.c b/crypt/liboqs/kex_rlwe_newhope/kex_rlwe_newhope.c
new file mode 100644
index 0000000000000000000000000000000000000000..b251a96f5737577610bd276ced6c1709d32e5c52
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/kex_rlwe_newhope.c
@@ -0,0 +1,171 @@
+#if defined(WINDOWS)
+#define UNUSED
+// __attribute__ not supported in VS, is there something else I should define?
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#if !defined(WINDOWS)
+#include <strings.h>
+#include <unistd.h>
+#endif
+
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+#include "kex_rlwe_newhope.h"
+#include "newhope.c"
+#include "params.h"
+
+#if defined(WINDOWS)
+#define strdup _strdup // for strdup deprecation warning
+#endif
+
+OQS_KEX *OQS_KEX_rlwe_newhope_new(OQS_RAND *rand) {
+	OQS_KEX *k = malloc(sizeof(OQS_KEX));
+	if (k == NULL) {
+		return NULL;
+	}
+	k->method_name = strdup("RLWE NewHope");
+	k->estimated_classical_security = 229; // http://eprint.iacr.org/2015/1092.pdf Table 1 NewHope dual known classical
+	k->estimated_quantum_security = 206;   // http://eprint.iacr.org/2015/1092.pdf Table 1 NewHope dual known quantum
+	k->seed = NULL;
+	k->seed_len = 0;
+	k->named_parameters = 0;
+	k->rand = rand;
+	k->params = NULL;
+	k->alice_0 = &OQS_KEX_rlwe_newhope_alice_0;
+	k->bob = &OQS_KEX_rlwe_newhope_bob;
+	k->alice_1 = &OQS_KEX_rlwe_newhope_alice_1;
+	k->alice_priv_free = &OQS_KEX_rlwe_newhope_alice_priv_free;
+	k->free = &OQS_KEX_rlwe_newhope_free;
+	return k;
+}
+
+int OQS_KEX_rlwe_newhope_alice_0(UNUSED OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) {
+
+	int ret;
+
+	*alice_priv = NULL;
+	*alice_msg = NULL;
+
+	/* allocate public/private key pair */
+	*alice_msg = malloc(NEWHOPE_SENDABYTES);
+	if (*alice_msg == NULL) {
+		goto err;
+	}
+	*alice_priv = malloc(sizeof(poly));
+	if (*alice_priv == NULL) {
+		goto err;
+	}
+
+	/* generate public/private key pair */
+	keygen(*alice_msg, (poly *) (*alice_priv), k->rand);
+	*alice_msg_len = NEWHOPE_SENDABYTES;
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*alice_msg);
+	*alice_msg = NULL;
+	free(*alice_priv);
+	*alice_priv = NULL;
+
+cleanup:
+
+	return ret;
+}
+
+int OQS_KEX_rlwe_newhope_bob(UNUSED OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) {
+
+	int ret;
+
+	*bob_msg = NULL;
+	*key = NULL;
+
+	if (alice_msg_len != NEWHOPE_SENDABYTES) {
+		goto err;
+	}
+
+	/* allocate message and session key */
+	*bob_msg = malloc(NEWHOPE_SENDBBYTES);
+	if (*bob_msg == NULL) {
+		goto err;
+	}
+	*key = malloc(32);
+	if (*key == NULL) {
+		goto err;
+	}
+
+	/* generate Bob's response */
+	sharedb(*key, *bob_msg, alice_msg, k->rand);
+	*bob_msg_len = NEWHOPE_SENDBBYTES;
+	*key_len = 32;
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*bob_msg);
+	*bob_msg = NULL;
+	free(*key);
+	*key = NULL;
+
+cleanup:
+
+	return ret;
+}
+
+int OQS_KEX_rlwe_newhope_alice_1(UNUSED OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) {
+
+	int ret;
+
+	*key = NULL;
+
+	if (bob_msg_len != NEWHOPE_SENDBBYTES) {
+		goto err;
+	}
+
+	/* allocate session key */
+	*key = malloc(32);
+	if (*key == NULL) {
+		goto err;
+	}
+
+	/* generate Alice's session key */
+	shareda(*key, (poly *) alice_priv, bob_msg);
+	*key_len = 32;
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*key);
+	*key = NULL;
+
+cleanup:
+
+	return ret;
+}
+
+void OQS_KEX_rlwe_newhope_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) {
+	if (alice_priv) {
+		free(alice_priv);
+	}
+}
+
+void OQS_KEX_rlwe_newhope_free(OQS_KEX *k) {
+	if (k) {
+		free(k->named_parameters);
+		k->named_parameters = NULL;
+		free(k->method_name);
+		k->method_name = NULL;
+	}
+	free(k);
+}
diff --git a/crypt/liboqs/kex_rlwe_newhope/kex_rlwe_newhope.h b/crypt/liboqs/kex_rlwe_newhope/kex_rlwe_newhope.h
new file mode 100644
index 0000000000000000000000000000000000000000..ba3d70aebfc5f1576d67b2ff397d3bae7c6ecdcd
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/kex_rlwe_newhope.h
@@ -0,0 +1,24 @@
+/**
+ * \file kex_rlwe_newhope.h
+ * \brief Header for ring-LWE key exchange protocol NewHope
+ */
+
+#ifndef __OQS_KEX_RLWE_NEWHOPE_H
+#define __OQS_KEX_RLWE_NEWHOPE_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+OQS_KEX *OQS_KEX_rlwe_newhope_new(OQS_RAND *rand);
+
+int OQS_KEX_rlwe_newhope_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len);
+int OQS_KEX_rlwe_newhope_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len);
+int OQS_KEX_rlwe_newhope_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len);
+
+void OQS_KEX_rlwe_newhope_alice_priv_free(OQS_KEX *k, void *alice_priv);
+void OQS_KEX_rlwe_newhope_free(OQS_KEX *k);
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_newhope/newhope.c b/crypt/liboqs/kex_rlwe_newhope/newhope.c
new file mode 100644
index 0000000000000000000000000000000000000000..8025273b5de5dd0a090d813c45b1727d6b7b7263
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/newhope.c
@@ -0,0 +1,120 @@
+#include <stdint.h>
+
+#include <oqs/sha3.h>
+
+// clang-format off
+// (order of include matters)
+#include "precomp.c"
+#include "poly.c"
+// clang-format on
+
+static void encode_a(unsigned char *r, const poly *pk,
+                     const unsigned char *seed) {
+	int i;
+	poly_tobytes(r, pk);
+	for (i = 0; i < NEWHOPE_SEEDBYTES; i++) {
+		r[POLY_BYTES + i] = seed[i];
+	}
+}
+
+static void decode_a(poly *pk, unsigned char *seed, const unsigned char *r) {
+	int i;
+	poly_frombytes(pk, r);
+	for (i = 0; i < NEWHOPE_SEEDBYTES; i++) {
+		seed[i] = r[POLY_BYTES + i];
+	}
+}
+
+static void encode_b(unsigned char *r, const poly *b, const poly *c) {
+	int i;
+	poly_tobytes(r, b);
+	for (i = 0; i < PARAM_N / 4; i++) {
+		r[POLY_BYTES + i] = c->coeffs[4 * i] | (c->coeffs[4 * i + 1] << 2) |
+		                    (c->coeffs[4 * i + 2] << 4) |
+		                    (c->coeffs[4 * i + 3] << 6);
+	}
+}
+
+static void decode_b(poly *b, poly *c, const unsigned char *r) {
+	int i;
+	poly_frombytes(b, r);
+	for (i = 0; i < PARAM_N / 4; i++) {
+		c->coeffs[4 * i + 0] = r[POLY_BYTES + i] & 0x03;
+		c->coeffs[4 * i + 1] = (r[POLY_BYTES + i] >> 2) & 0x03;
+		c->coeffs[4 * i + 2] = (r[POLY_BYTES + i] >> 4) & 0x03;
+		c->coeffs[4 * i + 3] = (r[POLY_BYTES + i] >> 6);
+	}
+}
+
+static void gen_a(poly *a, const unsigned char *seed) { poly_uniform(a, seed); }
+
+// API FUNCTIONS
+
+static void keygen(unsigned char *send, poly *sk, OQS_RAND *rand) {
+	poly a, e, r, pk;
+	unsigned char seed[NEWHOPE_SEEDBYTES];
+
+	rand->rand_n(rand, seed, NEWHOPE_SEEDBYTES);
+
+	gen_a(&a, seed);
+
+	poly_getnoise(sk, rand);
+	poly_ntt(sk);
+
+	poly_getnoise(&e, rand);
+	poly_ntt(&e);
+
+	poly_pointwise(&r, sk, &a);
+	poly_add(&pk, &e, &r);
+
+	encode_a(send, &pk, seed);
+}
+
+static void sharedb(unsigned char *sharedkey, unsigned char *send,
+                    const unsigned char *received, OQS_RAND *rand) {
+	poly sp, ep, v, a, pka, c, epp, bp;
+	unsigned char seed[NEWHOPE_SEEDBYTES];
+
+	decode_a(&pka, seed, received);
+	gen_a(&a, seed);
+
+	poly_getnoise(&sp, rand);
+	poly_ntt(&sp);
+	poly_getnoise(&ep, rand);
+	poly_ntt(&ep);
+
+	poly_pointwise(&bp, &a, &sp);
+	poly_add(&bp, &bp, &ep);
+
+	poly_pointwise(&v, &pka, &sp);
+	poly_invntt(&v);
+
+	poly_getnoise(&epp, rand);
+	poly_add(&v, &v, &epp);
+
+	helprec(&c, &v, rand);
+
+	encode_b(send, &bp, &c);
+
+	rec(sharedkey, &v, &c);
+
+#ifndef STATISTICAL_TEST
+	OQS_SHA3_sha3256(sharedkey, sharedkey, 32);
+#endif
+}
+
+static void shareda(unsigned char *sharedkey, const poly *sk,
+                    const unsigned char *received) {
+	poly v, bp, c;
+
+	decode_b(&bp, &c, received);
+
+	poly_pointwise(&v, sk, &bp);
+	poly_invntt(&v);
+
+	rec(sharedkey, &v, &c);
+
+#ifndef STATISTICAL_TEST
+	OQS_SHA3_sha3256(sharedkey, sharedkey, 32);
+#endif
+}
diff --git a/crypt/liboqs/kex_rlwe_newhope/params.h b/crypt/liboqs/kex_rlwe_newhope/params.h
new file mode 100644
index 0000000000000000000000000000000000000000..932770e4d093f753da393d40fe5bf874b94f9dba
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/params.h
@@ -0,0 +1,28 @@
+#ifndef PARAMS_H
+#define PARAMS_H
+
+#include <stdint.h>
+
+#define PARAM_N 1024
+
+#define PARAM_K 16 /* used in sampler */
+#define PARAM_Q 12289
+
+#define POLY_BYTES 1792
+#define NEWHOPE_SEEDBYTES 32
+#define NEWHOPE_RECBYTES 256
+
+#define NEWHOPE_SENDABYTES (POLY_BYTES + NEWHOPE_SEEDBYTES)
+#define NEWHOPE_SENDBBYTES (POLY_BYTES + NEWHOPE_RECBYTES)
+
+extern uint16_t bitrev_table[];
+extern uint16_t omegas_montgomery[];
+extern uint16_t omegas_inv_montgomery[];
+extern uint16_t psis_inv_montgomery[];
+extern uint16_t psis_bitrev_montgomery[];
+
+#if defined(WINDOWS)
+typedef unsigned __int16 uint16_t;
+#endif
+
+#endif
diff --git a/crypt/liboqs/kex_rlwe_newhope/poly.c b/crypt/liboqs/kex_rlwe_newhope/poly.c
new file mode 100644
index 0000000000000000000000000000000000000000..ca5014e9618d13fc0f7dec8d29de4c0283a96efd
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/poly.c
@@ -0,0 +1,339 @@
+#include "params.h"
+#include <oqs/rand.h>
+#include <oqs/sha3.h>
+
+typedef struct {
+	uint16_t coeffs[PARAM_N];
+#if defined(WINDOWS)
+} poly;
+#else
+} poly __attribute__((aligned(32)));
+#endif
+
+static const uint32_t qinv = 12287; // -inverse_mod(p,2^18)
+static const uint32_t rlog = 18;
+
+static uint16_t montgomery_reduce(uint32_t a) {
+	uint32_t u;
+
+	u = (a * qinv);
+	u &= ((1 << rlog) - 1);
+	u *= PARAM_Q;
+	a = a + u;
+	return a >> 18;
+}
+
+static uint16_t barrett_reduce(uint16_t a) {
+	uint32_t u;
+
+	u = ((uint32_t) a * 5) >> 16;
+	u *= PARAM_Q;
+	a -= u;
+	return a;
+}
+
+static void bitrev_vector(uint16_t *poly) {
+	unsigned int i, r;
+	uint16_t tmp;
+
+	for (i = 0; i < PARAM_N; i++) {
+		r = oqs_kex_rlwe_newhope_bitrev_table[i];
+		if (i < r) {
+			tmp = poly[i];
+			poly[i] = poly[r];
+			poly[r] = tmp;
+		}
+	}
+}
+
+static void mul_coefficients(uint16_t *poly, const uint16_t *factors) {
+	unsigned int i;
+
+	for (i = 0; i < PARAM_N; i++) {
+		poly[i] = montgomery_reduce((poly[i] * factors[i]));
+	}
+}
+
+/* GS_bo_to_no; omegas need to be in Montgomery domain */
+static void ntt(uint16_t *a, const uint16_t *omega) {
+	int i, start, j, jTwiddle, distance;
+	uint16_t temp, W;
+
+	for (i = 0; i < 10; i += 2) {
+		// Even level
+		distance = (1 << i);
+		for (start = 0; start < distance; start++) {
+			jTwiddle = 0;
+			for (j = start; j < PARAM_N - 1; j += 2 * distance) {
+				W = omega[jTwiddle++];
+				temp = a[j];
+				a[j] = (temp + a[j + distance]); // Omit reduction (be lazy)
+				a[j + distance] = montgomery_reduce(
+				    (W * ((uint32_t) temp + 3 * PARAM_Q - a[j + distance])));
+			}
+		}
+
+		// Odd level
+		distance <<= 1;
+		for (start = 0; start < distance; start++) {
+			jTwiddle = 0;
+			for (j = start; j < PARAM_N - 1; j += 2 * distance) {
+				W = omega[jTwiddle++];
+				temp = a[j];
+				a[j] = barrett_reduce((temp + a[j + distance]));
+				a[j + distance] = montgomery_reduce(
+				    (W * ((uint32_t) temp + 3 * PARAM_Q - a[j + distance])));
+			}
+		}
+	}
+}
+
+static void poly_frombytes(poly *r, const unsigned char *a) {
+	int i;
+	for (i = 0; i < PARAM_N / 4; i++) {
+		r->coeffs[4 * i + 0] =
+		    a[7 * i + 0] | (((uint16_t) a[7 * i + 1] & 0x3f) << 8);
+		r->coeffs[4 * i + 1] = (a[7 * i + 1] >> 6) |
+		                       (((uint16_t) a[7 * i + 2]) << 2) |
+		                       (((uint16_t) a[7 * i + 3] & 0x0f) << 10);
+		r->coeffs[4 * i + 2] = (a[7 * i + 3] >> 4) |
+		                       (((uint16_t) a[7 * i + 4]) << 4) |
+		                       (((uint16_t) a[7 * i + 5] & 0x03) << 12);
+		r->coeffs[4 * i + 3] =
+		    (a[7 * i + 5] >> 2) | (((uint16_t) a[7 * i + 6]) << 6);
+	}
+}
+
+static void poly_tobytes(unsigned char *r, const poly *p) {
+	int i;
+	uint16_t t0, t1, t2, t3, m;
+	int16_t c;
+	for (i = 0; i < PARAM_N / 4; i++) {
+		t0 = barrett_reduce(
+		    p->coeffs[4 * i + 0]); // Make sure that coefficients have only 14 bits
+		t1 = barrett_reduce(p->coeffs[4 * i + 1]);
+		t2 = barrett_reduce(p->coeffs[4 * i + 2]);
+		t3 = barrett_reduce(p->coeffs[4 * i + 3]);
+
+		m = t0 - PARAM_Q;
+		c = m;
+		c >>= 15;
+		t0 = m ^ ((t0 ^ m) & c); // <Make sure that coefficients are in [0,q]
+
+		m = t1 - PARAM_Q;
+		c = m;
+		c >>= 15;
+		t1 = m ^ ((t1 ^ m) & c); // <Make sure that coefficients are in [0,q]
+
+		m = t2 - PARAM_Q;
+		c = m;
+		c >>= 15;
+		t2 = m ^ ((t2 ^ m) & c); // <Make sure that coefficients are in [0,q]
+
+		m = t3 - PARAM_Q;
+		c = m;
+		c >>= 15;
+		t3 = m ^ ((t3 ^ m) & c); // <Make sure that coefficients are in [0,q]
+
+		r[7 * i + 0] = t0 & 0xff;
+		r[7 * i + 1] = (t0 >> 8) | (t1 << 6);
+		r[7 * i + 2] = (t1 >> 2);
+		r[7 * i + 3] = (t1 >> 10) | (t2 << 4);
+		r[7 * i + 4] = (t2 >> 4);
+		r[7 * i + 5] = (t2 >> 12) | (t3 << 2);
+		r[7 * i + 6] = (t3 >> 6);
+	}
+}
+
+static void poly_uniform(poly *a, const unsigned char *seed) {
+	unsigned int pos = 0, ctr = 0;
+	uint16_t val;
+	uint64_t state[OQS_SHA3_STATESIZE];
+	unsigned int nblocks = 16;
+	uint8_t buf[OQS_SHA3_SHAKE128_RATE * 16];
+
+	OQS_SHA3_shake128_absorb(state, seed, NEWHOPE_SEEDBYTES);
+
+	OQS_SHA3_shake128_squeezeblocks((unsigned char *) buf, nblocks, state);
+
+	while (ctr < PARAM_N) {
+		val = (buf[pos] | ((uint16_t) buf[pos + 1] << 8)) &
+		      0x3fff; // Specialized for q = 12889
+		if (val < PARAM_Q) {
+			a->coeffs[ctr++] = val;
+		}
+		pos += 2;
+		if (pos > OQS_SHA3_SHAKE128_RATE * nblocks - 2) {
+			nblocks = 1;
+			OQS_SHA3_shake128_squeezeblocks((unsigned char *) buf, nblocks, state);
+			pos = 0;
+		}
+	}
+}
+
+static void poly_getnoise(poly *r, OQS_RAND *rand) {
+#if PARAM_K != 16
+#error "poly_getnoise in poly.c only supports k=16"
+#endif
+
+	unsigned char buf[4 * PARAM_N];
+	uint32_t *tp, t, d, a, b;
+	int i, j;
+
+	tp = (uint32_t *) buf;
+
+	rand->rand_n(rand, buf, 4 * PARAM_N);
+
+	for (i = 0; i < PARAM_N; i++) {
+		t = tp[i];
+		d = 0;
+		for (j = 0; j < 8; j++) {
+			d += (t >> j) & 0x01010101;
+		}
+		a = ((d >> 8) & 0xff) + (d & 0xff);
+		b = (d >> 24) + ((d >> 16) & 0xff);
+		r->coeffs[i] = a + PARAM_Q - b;
+	}
+}
+
+static void poly_pointwise(poly *r, const poly *a, const poly *b) {
+	int i;
+	uint16_t t;
+	for (i = 0; i < PARAM_N; i++) {
+		t = montgomery_reduce(3186 *
+		                      b->coeffs[i]); /* t is now in Montgomery domain */
+		r->coeffs[i] = montgomery_reduce(
+		    a->coeffs[i] * t); /* r->coeffs[i] is back in normal domain */
+	}
+}
+
+static void poly_add(poly *r, const poly *a, const poly *b) {
+	int i;
+	for (i = 0; i < PARAM_N; i++) {
+		r->coeffs[i] = barrett_reduce(a->coeffs[i] + b->coeffs[i]);
+	}
+}
+
+static void poly_ntt(poly *r) {
+	mul_coefficients(r->coeffs, oqs_kex_rlwe_newhope_psis_bitrev_montgomery);
+	ntt((uint16_t *) r->coeffs, oqs_kex_rlwe_newhope_omegas_montgomery);
+}
+
+static void poly_invntt(poly *r) {
+	bitrev_vector(r->coeffs);
+	ntt((uint16_t *) r->coeffs, oqs_kex_rlwe_newhope_omegas_inv_montgomery);
+	mul_coefficients(r->coeffs, oqs_kex_rlwe_newhope_psis_inv_montgomery);
+}
+
+// Error Correction:
+
+static int32_t nh_abs(int32_t v) {
+	int32_t mask = v >> 31;
+	return (v ^ mask) - mask;
+}
+
+static int32_t f(int32_t *v0, int32_t *v1, int32_t x) {
+	int32_t xit, t, r, b;
+
+	// Next 6 lines compute t = x/PARAM_Q;
+	b = x * 2730;
+	t = b >> 25;
+	b = x - t * 12289;
+	b = 12288 - b;
+	b >>= 31;
+	t -= b;
+
+	r = t & 1;
+	xit = (t >> 1);
+	*v0 = xit + r; // v0 = round(x/(2*PARAM_Q))
+
+	t -= 1;
+	r = t & 1;
+	*v1 = (t >> 1) + r;
+
+	return nh_abs(x - ((*v0) * 2 * PARAM_Q));
+}
+
+static int32_t g(int32_t x) {
+	int32_t t, c, b;
+
+	// Next 6 lines compute t = x/(4*PARAM_Q);
+	b = x * 2730;
+	t = b >> 27;
+	b = x - t * 49156;
+	b = 49155 - b;
+	b >>= 31;
+	t -= b;
+
+	c = t & 1;
+	t = (t >> 1) + c; // t = round(x/(8*PARAM_Q))
+
+	t *= 8 * PARAM_Q;
+
+	return nh_abs(t - x);
+}
+
+static int16_t LDDecode(int32_t xi0, int32_t xi1, int32_t xi2, int32_t xi3) {
+	int32_t t;
+
+	t = g(xi0);
+	t += g(xi1);
+	t += g(xi2);
+	t += g(xi3);
+
+	t -= 8 * PARAM_Q;
+	t >>= 31;
+	return t & 1;
+}
+
+static void helprec(poly *c, const poly *v, OQS_RAND *oqs_rand) {
+	int32_t v0[4], v1[4], v_tmp[4], k;
+	unsigned char rbit;
+	unsigned char rand[32];
+	int i;
+
+	oqs_rand->rand_n(oqs_rand, rand, 32);
+
+	for (i = 0; i < 256; i++) {
+		rbit = (rand[i >> 3] >> (i & 7)) & 1;
+
+		k = f(v0 + 0, v1 + 0, 8 * v->coeffs[0 + i] + 4 * rbit);
+		k += f(v0 + 1, v1 + 1, 8 * v->coeffs[256 + i] + 4 * rbit);
+		k += f(v0 + 2, v1 + 2, 8 * v->coeffs[512 + i] + 4 * rbit);
+		k += f(v0 + 3, v1 + 3, 8 * v->coeffs[768 + i] + 4 * rbit);
+
+		k = (2 * PARAM_Q - 1 - k) >> 31;
+
+		v_tmp[0] = ((~k) & v0[0]) ^ (k & v1[0]);
+		v_tmp[1] = ((~k) & v0[1]) ^ (k & v1[1]);
+		v_tmp[2] = ((~k) & v0[2]) ^ (k & v1[2]);
+		v_tmp[3] = ((~k) & v0[3]) ^ (k & v1[3]);
+
+		c->coeffs[0 + i] = (v_tmp[0] - v_tmp[3]) & 3;
+		c->coeffs[256 + i] = (v_tmp[1] - v_tmp[3]) & 3;
+		c->coeffs[512 + i] = (v_tmp[2] - v_tmp[3]) & 3;
+		c->coeffs[768 + i] = (-k + 2 * v_tmp[3]) & 3;
+	}
+}
+
+static void rec(unsigned char *key, const poly *v, const poly *c) {
+	int i;
+	int32_t tmp[4];
+
+	for (i = 0; i < 32; i++) {
+		key[i] = 0;
+	}
+
+	for (i = 0; i < 256; i++) {
+		tmp[0] = 16 * PARAM_Q + 8 * (int32_t) v->coeffs[0 + i] -
+		         PARAM_Q * (2 * c->coeffs[0 + i] + c->coeffs[768 + i]);
+		tmp[1] = 16 * PARAM_Q + 8 * (int32_t) v->coeffs[256 + i] -
+		         PARAM_Q * (2 * c->coeffs[256 + i] + c->coeffs[768 + i]);
+		tmp[2] = 16 * PARAM_Q + 8 * (int32_t) v->coeffs[512 + i] -
+		         PARAM_Q * (2 * c->coeffs[512 + i] + c->coeffs[768 + i]);
+		tmp[3] = 16 * PARAM_Q + 8 * (int32_t) v->coeffs[768 + i] -
+		         PARAM_Q * (c->coeffs[768 + i]);
+
+		key[i >> 3] |= LDDecode(tmp[0], tmp[1], tmp[2], tmp[3]) << (i & 7);
+	}
+}
diff --git a/crypt/liboqs/kex_rlwe_newhope/precomp.c b/crypt/liboqs/kex_rlwe_newhope/precomp.c
new file mode 100644
index 0000000000000000000000000000000000000000..675b75d1c4cee5e8fd52b24691fd2feeba1ac392
--- /dev/null
+++ b/crypt/liboqs/kex_rlwe_newhope/precomp.c
@@ -0,0 +1,43 @@
+#include "params.h"
+
+uint16_t oqs_kex_rlwe_newhope_bitrev_table[1024] = {
+    0, 512, 256, 768, 128, 640, 384, 896, 64, 576, 320, 832, 192, 704, 448, 960, 32, 544, 288, 800, 160, 672, 416, 928, 96, 608, 352, 864, 224, 736, 480, 992,
+    16, 528, 272, 784, 144, 656, 400, 912, 80, 592, 336, 848, 208, 720, 464, 976, 48, 560, 304, 816, 176, 688, 432, 944, 112, 624, 368, 880, 240, 752, 496, 1008,
+    8, 520, 264, 776, 136, 648, 392, 904, 72, 584, 328, 840, 200, 712, 456, 968, 40, 552, 296, 808, 168, 680, 424, 936, 104, 616, 360, 872, 232, 744, 488, 1000,
+    24, 536, 280, 792, 152, 664, 408, 920, 88, 600, 344, 856, 216, 728, 472, 984, 56, 568, 312, 824, 184, 696, 440, 952, 120, 632, 376, 888, 248, 760, 504, 1016,
+    4, 516, 260, 772, 132, 644, 388, 900, 68, 580, 324, 836, 196, 708, 452, 964, 36, 548, 292, 804, 164, 676, 420, 932, 100, 612, 356, 868, 228, 740, 484, 996,
+    20, 532, 276, 788, 148, 660, 404, 916, 84, 596, 340, 852, 212, 724, 468, 980, 52, 564, 308, 820, 180, 692, 436, 948, 116, 628, 372, 884, 244, 756, 500, 1012,
+    12, 524, 268, 780, 140, 652, 396, 908, 76, 588, 332, 844, 204, 716, 460, 972, 44, 556, 300, 812, 172, 684, 428, 940, 108, 620, 364, 876, 236, 748, 492, 1004,
+    28, 540, 284, 796, 156, 668, 412, 924, 92, 604, 348, 860, 220, 732, 476, 988, 60, 572, 316, 828, 188, 700, 444, 956, 124, 636, 380, 892, 252, 764, 508, 1020,
+    2, 514, 258, 770, 130, 642, 386, 898, 66, 578, 322, 834, 194, 706, 450, 962, 34, 546, 290, 802, 162, 674, 418, 930, 98, 610, 354, 866, 226, 738, 482, 994,
+    18, 530, 274, 786, 146, 658, 402, 914, 82, 594, 338, 850, 210, 722, 466, 978, 50, 562, 306, 818, 178, 690, 434, 946, 114, 626, 370, 882, 242, 754, 498, 1010,
+    10, 522, 266, 778, 138, 650, 394, 906, 74, 586, 330, 842, 202, 714, 458, 970, 42, 554, 298, 810, 170, 682, 426, 938, 106, 618, 362, 874, 234, 746, 490, 1002,
+    26, 538, 282, 794, 154, 666, 410, 922, 90, 602, 346, 858, 218, 730, 474, 986, 58, 570, 314, 826, 186, 698, 442, 954, 122, 634, 378, 890, 250, 762, 506, 1018,
+    6, 518, 262, 774, 134, 646, 390, 902, 70, 582, 326, 838, 198, 710, 454, 966, 38, 550, 294, 806, 166, 678, 422, 934, 102, 614, 358, 870, 230, 742, 486, 998,
+    22, 534, 278, 790, 150, 662, 406, 918, 86, 598, 342, 854, 214, 726, 470, 982, 54, 566, 310, 822, 182, 694, 438, 950, 118, 630, 374, 886, 246, 758, 502, 1014,
+    14, 526, 270, 782, 142, 654, 398, 910, 78, 590, 334, 846, 206, 718, 462, 974, 46, 558, 302, 814, 174, 686, 430, 942, 110, 622, 366, 878, 238, 750, 494, 1006,
+    30, 542, 286, 798, 158, 670, 414, 926, 94, 606, 350, 862, 222, 734, 478, 990, 62, 574, 318, 830, 190, 702, 446, 958, 126, 638, 382, 894, 254, 766, 510, 1022,
+    1, 513, 257, 769, 129, 641, 385, 897, 65, 577, 321, 833, 193, 705, 449, 961, 33, 545, 289, 801, 161, 673, 417, 929, 97, 609, 353, 865, 225, 737, 481, 993,
+    17, 529, 273, 785, 145, 657, 401, 913, 81, 593, 337, 849, 209, 721, 465, 977, 49, 561, 305, 817, 177, 689, 433, 945, 113, 625, 369, 881, 241, 753, 497, 1009,
+    9, 521, 265, 777, 137, 649, 393, 905, 73, 585, 329, 841, 201, 713, 457, 969, 41, 553, 297, 809, 169, 681, 425, 937, 105, 617, 361, 873, 233, 745, 489, 1001,
+    25, 537, 281, 793, 153, 665, 409, 921, 89, 601, 345, 857, 217, 729, 473, 985, 57, 569, 313, 825, 185, 697, 441, 953, 121, 633, 377, 889, 249, 761, 505, 1017,
+    5, 517, 261, 773, 133, 645, 389, 901, 69, 581, 325, 837, 197, 709, 453, 965, 37, 549, 293, 805, 165, 677, 421, 933, 101, 613, 357, 869, 229, 741, 485, 997,
+    21, 533, 277, 789, 149, 661, 405, 917, 85, 597, 341, 853, 213, 725, 469, 981, 53, 565, 309, 821, 181, 693, 437, 949, 117, 629, 373, 885, 245, 757, 501, 1013,
+    13, 525, 269, 781, 141, 653, 397, 909, 77, 589, 333, 845, 205, 717, 461, 973, 45, 557, 301, 813, 173, 685, 429, 941, 109, 621, 365, 877, 237, 749, 493, 1005,
+    29, 541, 285, 797, 157, 669, 413, 925, 93, 605, 349, 861, 221, 733, 477, 989, 61, 573, 317, 829, 189, 701, 445, 957, 125, 637, 381, 893, 253, 765, 509, 1021,
+    3, 515, 259, 771, 131, 643, 387, 899, 67, 579, 323, 835, 195, 707, 451, 963, 35, 547, 291, 803, 163, 675, 419, 931, 99, 611, 355, 867, 227, 739, 483, 995,
+    19, 531, 275, 787, 147, 659, 403, 915, 83, 595, 339, 851, 211, 723, 467, 979, 51, 563, 307, 819, 179, 691, 435, 947, 115, 627, 371, 883, 243, 755, 499, 1011,
+    11, 523, 267, 779, 139, 651, 395, 907, 75, 587, 331, 843, 203, 715, 459, 971, 43, 555, 299, 811, 171, 683, 427, 939, 107, 619, 363, 875, 235, 747, 491, 1003,
+    27, 539, 283, 795, 155, 667, 411, 923, 91, 603, 347, 859, 219, 731, 475, 987, 59, 571, 315, 827, 187, 699, 443, 955, 123, 635, 379, 891, 251, 763, 507, 1019,
+    7, 519, 263, 775, 135, 647, 391, 903, 71, 583, 327, 839, 199, 711, 455, 967, 39, 551, 295, 807, 167, 679, 423, 935, 103, 615, 359, 871, 231, 743, 487, 999,
+    23, 535, 279, 791, 151, 663, 407, 919, 87, 599, 343, 855, 215, 727, 471, 983, 55, 567, 311, 823, 183, 695, 439, 951, 119, 631, 375, 887, 247, 759, 503, 1015,
+    15, 527, 271, 783, 143, 655, 399, 911, 79, 591, 335, 847, 207, 719, 463, 975, 47, 559, 303, 815, 175, 687, 431, 943, 111, 623, 367, 879, 239, 751, 495, 1007,
+    31, 543, 287, 799, 159, 671, 415, 927, 95, 607, 351, 863, 223, 735, 479, 991, 63, 575, 319, 831, 191, 703, 447, 959, 127, 639, 383, 895, 255, 767, 511, 1023};
+
+uint16_t oqs_kex_rlwe_newhope_omegas_montgomery[PARAM_N / 2] = {4075, 6974, 7373, 7965, 3262, 5079, 522, 2169, 6364, 1018, 1041, 8775, 2344, 11011, 5574, 1973, 4536, 1050, 6844, 3860, 3818, 6118, 2683, 1190, 4789, 7822, 7540, 6752, 5456, 4449, 3789, 12142, 11973, 382, 3988, 468, 6843, 5339, 6196, 3710, 11316, 1254, 5435, 10930, 3998, 10256, 10367, 3879, 11889, 1728, 6137, 4948, 5862, 6136, 3643, 6874, 8724, 654, 10302, 1702, 7083, 6760, 56, 3199, 9987, 605, 11785, 8076, 5594, 9260, 6403, 4782, 6212, 4624, 9026, 8689, 4080, 11868, 6221, 3602, 975, 8077, 8851, 9445, 5681, 3477, 1105, 142, 241, 12231, 1003, 3532, 5009, 1956, 6008, 11404, 7377, 2049, 10968, 12097, 7591, 5057, 3445, 4780, 2920, 7048, 3127, 8120, 11279, 6821, 11502, 8807, 12138, 2127, 2839, 3957, 431, 1579, 6383, 9784, 5874, 677, 3336, 6234, 2766, 1323, 9115, 12237, 2031, 6956, 6413, 2281, 3969, 3991, 12133, 9522, 4737, 10996, 4774, 5429, 11871, 3772, 453, 5908, 2882, 1805, 2051, 1954, 11713, 3963, 2447, 6142, 8174, 3030, 1843, 2361, 12071, 2908, 3529, 3434, 3202, 7796, 2057, 5369, 11939, 1512, 6906, 10474, 11026, 49, 10806, 5915, 1489, 9789, 5942, 10706, 10431, 7535, 426, 8974, 3757, 10314, 9364, 347, 5868, 9551, 9634, 6554, 10596, 9280, 11566, 174, 2948, 2503, 6507, 10723, 11606, 2459, 64, 3656, 8455, 5257, 5919, 7856, 1747, 9166, 5486, 9235, 6065, 835, 3570, 4240, 11580, 4046, 10970, 9139, 1058, 8210, 11848, 922, 7967, 1958, 10211, 1112, 3728, 4049, 11130, 5990, 1404, 325, 948, 11143, 6190, 295, 11637, 5766, 8212, 8273, 2919, 8527, 6119, 6992, 8333, 1360, 2555, 6167, 1200, 7105, 7991, 3329, 9597, 12121, 5106, 5961, 10695, 10327, 3051, 9923, 4896, 9326, 81, 3091, 1000, 7969, 4611, 726, 1853, 12149, 4255, 11112, 2768, 10654, 1062, 2294, 3553, 4805, 2747, 4846, 8577, 9154, 1170, 2319, 790, 11334, 9275, 9088, 1326, 5086, 9094, 6429, 11077, 10643, 3504, 3542, 8668, 9744, 1479, 1, 8246, 7143, 11567, 10984, 4134, 5736, 4978, 10938, 5777, 8961, 4591, 5728, 6461, 5023, 9650, 7468, 949, 9664, 2975, 11726, 2744, 9283, 10092, 5067, 12171, 2476, 3748, 11336, 6522, 827, 9452, 5374, 12159, 7935, 3296, 3949, 9893, 4452, 10908, 2525, 3584, 8112, 8011, 10616, 4989, 6958, 11809, 9447, 12280, 1022, 11950, 9821, 11745, 5791, 5092, 2089, 9005, 2881, 3289, 2013, 9048, 729, 7901, 1260, 5755, 4632, 11955, 2426, 10593, 1428, 4890, 5911, 3932, 9558, 8830, 3637, 5542, 145, 5179, 8595, 3707, 10530, 355, 3382, 4231, 9741, 1207, 9041, 7012, 1168, 10146, 11224, 4645, 11885, 10911, 10377, 435, 7952, 4096, 493, 9908, 6845, 6039, 2422, 2187, 9723, 8643, 9852, 9302, 6022, 7278, 1002, 4284, 5088, 1607, 7313, 875, 8509, 9430, 1045, 2481, 5012, 7428, 354, 6591, 9377, 11847, 2401, 1067, 7188, 11516, 390, 8511, 8456, 7270, 545, 8585, 9611, 12047, 1537, 4143, 4714, 4885, 1017, 5084, 1632, 3066, 27, 1440, 8526, 9273, 12046, 11618, 9289, 3400, 9890, 3136, 7098, 8758, 11813, 7384, 3985, 11869, 6730, 10745, 10111, 2249, 4048, 2884, 11136, 2126, 1630, 9103, 5407, 2686, 9042, 2969, 8311, 9424, 9919, 8779, 5332, 10626, 1777, 4654, 10863, 7351, 3636, 9585, 5291, 8374, 2166, 4919, 12176, 9140, 12129, 7852, 12286, 4895, 10805, 2780, 5195, 2305, 7247, 9644, 4053, 10600, 3364, 3271, 4057, 4414, 9442, 7917, 2174};
+
+uint16_t oqs_kex_rlwe_newhope_omegas_inv_montgomery[PARAM_N / 2] = {4075, 5315, 4324, 4916, 10120, 11767, 7210, 9027, 10316, 6715, 1278, 9945, 3514, 11248, 11271, 5925, 147, 8500, 7840, 6833, 5537, 4749, 4467, 7500, 11099, 9606, 6171, 8471, 8429, 5445, 11239, 7753, 9090, 12233, 5529, 5206, 10587, 1987, 11635, 3565, 5415, 8646, 6153, 6427, 7341, 6152, 10561, 400, 8410, 1922, 2033, 8291, 1359, 6854, 11035, 973, 8579, 6093, 6950, 5446, 11821, 8301, 11907, 316, 52, 3174, 10966, 9523, 6055, 8953, 11612, 6415, 2505, 5906, 10710, 11858, 8332, 9450, 10162, 151, 3482, 787, 5468, 1010, 4169, 9162, 5241, 9369, 7509, 8844, 7232, 4698, 192, 1321, 10240, 4912, 885, 6281, 10333, 7280, 8757, 11286, 58, 12048, 12147, 11184, 8812, 6608, 2844, 3438, 4212, 11314, 8687, 6068, 421, 8209, 3600, 3263, 7665, 6077, 7507, 5886, 3029, 6695, 4213, 504, 11684, 2302, 1962, 1594, 6328, 7183, 168, 2692, 8960, 4298, 5184, 11089, 6122, 9734, 10929, 3956, 5297, 6170, 3762, 9370, 4016, 4077, 6523, 652, 11994, 6099, 1146, 11341, 11964, 10885, 6299, 1159, 8240, 8561, 11177, 2078, 10331, 4322, 11367, 441, 4079, 11231, 3150, 1319, 8243, 709, 8049, 8719, 11454, 6224, 3054, 6803, 3123, 10542, 4433, 6370, 7032, 3834, 8633, 12225, 9830, 683, 1566, 5782, 9786, 9341, 12115, 723, 3009, 1693, 5735, 2655, 2738, 6421, 11942, 2925, 1975, 8532, 3315, 11863, 4754, 1858, 1583, 6347, 2500, 10800, 6374, 1483, 12240, 1263, 1815, 5383, 10777, 350, 6920, 10232, 4493, 9087, 8855, 8760, 9381, 218, 9928, 10446, 9259, 4115, 6147, 9842, 8326, 576, 10335, 10238, 10484, 9407, 6381, 11836, 8517, 418, 6860, 7515, 1293, 7552, 2767, 156, 8298, 8320, 10008, 5876, 5333, 10258, 10115, 4372, 2847, 7875, 8232, 9018, 8925, 1689, 8236, 2645, 5042, 9984, 7094, 9509, 1484, 7394, 3, 4437, 160, 3149, 113, 7370, 10123, 3915, 6998, 2704, 8653, 4938, 1426, 7635, 10512, 1663, 6957, 3510, 2370, 2865, 3978, 9320, 3247, 9603, 6882, 3186, 10659, 10163, 1153, 9405, 8241, 10040, 2178, 1544, 5559, 420, 8304, 4905, 476, 3531, 5191, 9153, 2399, 8889, 3000, 671, 243, 3016, 3763, 10849, 12262, 9223, 10657, 7205, 11272, 7404, 7575, 8146, 10752, 242, 2678, 3704, 11744, 5019, 3833, 3778, 11899, 773, 5101, 11222, 9888, 442, 2912, 5698, 11935, 4861, 7277, 9808, 11244, 2859, 3780, 11414, 4976, 10682, 7201, 8005, 11287, 5011, 6267, 2987, 2437, 3646, 2566, 10102, 9867, 6250, 5444, 2381, 11796, 8193, 4337, 11854, 1912, 1378, 404, 7644, 1065, 2143, 11121, 5277, 3248, 11082, 2548, 8058, 8907, 11934, 1759, 8582, 3694, 7110, 12144, 6747, 8652, 3459, 2731, 8357, 6378, 7399, 10861, 1696, 9863, 334, 7657, 6534, 11029, 4388, 11560, 3241, 10276, 9000, 9408, 3284, 10200, 7197, 6498, 544, 2468, 339, 11267, 9, 2842, 480, 5331, 7300, 1673, 4278, 4177, 8705, 9764, 1381, 7837, 2396, 8340, 8993, 4354, 130, 6915, 2837, 11462, 5767, 953, 8541, 9813, 118, 7222, 2197, 3006, 9545, 563, 9314, 2625, 11340, 4821, 2639, 7266, 5828, 6561, 7698, 3328, 6512, 1351, 7311, 6553, 8155, 1305, 722, 5146, 4043, 12288, 10810, 2545, 3621, 8747, 8785, 1646, 1212, 5860, 3195, 7203, 10963, 3201, 3014, 955, 11499, 9970, 11119, 3135, 3712, 7443, 9542, 7484, 8736, 9995, 11227, 1635, 9521, 1177, 8034, 140, 10436, 11563, 7678, 4320, 11289, 9198, 12208, 2963, 7393, 2366, 9238};
+
+uint16_t oqs_kex_rlwe_newhope_psis_bitrev_montgomery[PARAM_N] = {4075, 6974, 7373, 7965, 3262, 5079, 522, 2169, 6364, 1018, 1041, 8775, 2344, 11011, 5574, 1973, 4536, 1050, 6844, 3860, 3818, 6118, 2683, 1190, 4789, 7822, 7540, 6752, 5456, 4449, 3789, 12142, 11973, 382, 3988, 468, 6843, 5339, 6196, 3710, 11316, 1254, 5435, 10930, 3998, 10256, 10367, 3879, 11889, 1728, 6137, 4948, 5862, 6136, 3643, 6874, 8724, 654, 10302, 1702, 7083, 6760, 56, 3199, 9987, 605, 11785, 8076, 5594, 9260, 6403, 4782, 6212, 4624, 9026, 8689, 4080, 11868, 6221, 3602, 975, 8077, 8851, 9445, 5681, 3477, 1105, 142, 241, 12231, 1003, 3532, 5009, 1956, 6008, 11404, 7377, 2049, 10968, 12097, 7591, 5057, 3445, 4780, 2920, 7048, 3127, 8120, 11279, 6821, 11502, 8807, 12138, 2127, 2839, 3957, 431, 1579, 6383, 9784, 5874, 677, 3336, 6234, 2766, 1323, 9115, 12237, 2031, 6956, 6413, 2281, 3969, 3991, 12133, 9522, 4737, 10996, 4774, 5429, 11871, 3772, 453, 5908, 2882, 1805, 2051, 1954, 11713, 3963, 2447, 6142, 8174, 3030, 1843, 2361, 12071, 2908, 3529, 3434, 3202, 7796, 2057, 5369, 11939, 1512, 6906, 10474, 11026, 49, 10806, 5915, 1489, 9789, 5942, 10706, 10431, 7535, 426, 8974, 3757, 10314, 9364, 347, 5868, 9551, 9634, 6554, 10596, 9280, 11566, 174, 2948, 2503, 6507, 10723, 11606, 2459, 64, 3656, 8455, 5257, 5919, 7856, 1747, 9166, 5486, 9235, 6065, 835, 3570, 4240, 11580, 4046, 10970, 9139, 1058, 8210, 11848, 922, 7967, 1958, 10211, 1112, 3728, 4049, 11130, 5990, 1404, 325, 948, 11143, 6190, 295, 11637, 5766, 8212, 8273, 2919, 8527, 6119, 6992, 8333, 1360, 2555, 6167, 1200, 7105, 7991, 3329, 9597, 12121, 5106, 5961, 10695, 10327, 3051, 9923, 4896, 9326, 81, 3091, 1000, 7969, 4611, 726, 1853, 12149, 4255, 11112, 2768, 10654, 1062, 2294, 3553, 4805, 2747, 4846, 8577, 9154, 1170, 2319, 790, 11334, 9275, 9088, 1326, 5086, 9094, 6429, 11077, 10643, 3504, 3542, 8668, 9744, 1479, 1, 8246, 7143, 11567, 10984, 4134, 5736, 4978, 10938, 5777, 8961, 4591, 5728, 6461, 5023, 9650, 7468, 949, 9664, 2975, 11726, 2744, 9283, 10092, 5067, 12171, 2476, 3748, 11336, 6522, 827, 9452, 5374, 12159, 7935, 3296, 3949, 9893, 4452, 10908, 2525, 3584, 8112, 8011, 10616, 4989, 6958, 11809, 9447, 12280, 1022, 11950, 9821, 11745, 5791, 5092, 2089, 9005, 2881, 3289, 2013, 9048, 729, 7901, 1260, 5755, 4632, 11955, 2426, 10593, 1428, 4890, 5911, 3932, 9558, 8830, 3637, 5542, 145, 5179, 8595, 3707, 10530, 355, 3382, 4231, 9741, 1207, 9041, 7012, 1168, 10146, 11224, 4645, 11885, 10911, 10377, 435, 7952, 4096, 493, 9908, 6845, 6039, 2422, 2187, 9723, 8643, 9852, 9302, 6022, 7278, 1002, 4284, 5088, 1607, 7313, 875, 8509, 9430, 1045, 2481, 5012, 7428, 354, 6591, 9377, 11847, 2401, 1067, 7188, 11516, 390, 8511, 8456, 7270, 545, 8585, 9611, 12047, 1537, 4143, 4714, 4885, 1017, 5084, 1632, 3066, 27, 1440, 8526, 9273, 12046, 11618, 9289, 3400, 9890, 3136, 7098, 8758, 11813, 7384, 3985, 11869, 6730, 10745, 10111, 2249, 4048, 2884, 11136, 2126, 1630, 9103, 5407, 2686, 9042, 2969, 8311, 9424, 9919, 8779, 5332, 10626, 1777, 4654, 10863, 7351, 3636, 9585, 5291, 8374, 2166, 4919, 12176, 9140, 12129, 7852, 12286, 4895, 10805, 2780, 5195, 2305, 7247, 9644, 4053, 10600, 3364, 3271, 4057, 4414, 9442, 7917, 2174, 3947, 11951, 2455, 6599, 10545, 10975, 3654, 2894, 7681, 7126, 7287, 12269, 4119, 3343, 2151, 1522, 7174, 7350, 11041, 2442, 2148, 5959, 6492, 8330, 8945, 5598, 3624, 10397, 1325, 6565, 1945, 11260, 10077, 2674, 3338, 3276, 11034, 506, 6505, 1392, 5478, 8778, 1178, 2776, 3408, 10347, 11124, 2575, 9489, 12096, 6092, 10058, 4167, 6085, 923, 11251, 11912, 4578, 10669, 11914, 425, 10453, 392, 10104, 8464, 4235, 8761, 7376, 2291, 3375, 7954, 8896, 6617, 7790, 1737, 11667, 3982, 9342, 6680, 636, 6825, 7383, 512, 4670, 2900, 12050, 7735, 994, 1687, 11883, 7021, 146, 10485, 1403, 5189, 6094, 2483, 2054, 3042, 10945, 3981, 10821, 11826, 8882, 8151, 180, 9600, 7684, 5219, 10880, 6780, 204, 11232, 2600, 7584, 3121, 3017, 11053, 7814, 7043, 4251, 4739, 11063, 6771, 7073, 9261, 2360, 11925, 1928, 11825, 8024, 3678, 3205, 3359, 11197, 5209, 8581, 3238, 8840, 1136, 9363, 1826, 3171, 4489, 7885, 346, 2068, 1389, 8257, 3163, 4840, 6127, 8062, 8921, 612, 4238, 10763, 8067, 125, 11749, 10125, 5416, 2110, 716, 9839, 10584, 11475, 11873, 3448, 343, 1908, 4538, 10423, 7078, 4727, 1208, 11572, 3589, 2982, 1373, 1721, 10753, 4103, 2429, 4209, 5412, 5993, 9011, 438, 3515, 7228, 1218, 8347, 5232, 8682, 1327, 7508, 4924, 448, 1014, 10029, 12221, 4566, 5836, 12229, 2717, 1535, 3200, 5588, 5845, 412, 5102, 7326, 3744, 3056, 2528, 7406, 8314, 9202, 6454, 6613, 1417, 10032, 7784, 1518, 3765, 4176, 5063, 9828, 2275, 6636, 4267, 6463, 2065, 7725, 3495, 8328, 8755, 8144, 10533, 5966, 12077, 9175, 9520, 5596, 6302, 8400, 579, 6781, 11014, 5734, 11113, 11164, 4860, 1131, 10844, 9068, 8016, 9694, 3837, 567, 9348, 7000, 6627, 7699, 5082, 682, 11309, 5207, 4050, 7087, 844, 7434, 3769, 293, 9057, 6940, 9344, 10883, 2633, 8190, 3944, 5530, 5604, 3480, 2171, 9282, 11024, 2213, 8136, 3805, 767, 12239, 216, 11520, 6763, 10353, 7, 8566, 845, 7235, 3154, 4360, 3285, 10268, 2832, 3572, 1282, 7559, 3229, 8360, 10583, 6105, 3120, 6643, 6203, 8536, 8348, 6919, 3536, 9199, 10891, 11463, 5043, 1658, 5618, 8787, 5789, 4719, 751, 11379, 6389, 10783, 3065, 7806, 6586, 2622, 5386, 510, 7628, 6921, 578, 10345, 11839, 8929, 4684, 12226, 7154, 9916, 7302, 8481, 3670, 11066, 2334, 1590, 7878, 10734, 1802, 1891, 5103, 6151, 8820, 3418, 7846, 9951, 4693, 417, 9996, 9652, 4510, 2946, 5461, 365, 881, 1927, 1015, 11675, 11009, 1371, 12265, 2485, 11385, 5039, 6742, 8449, 1842, 12217, 8176, 9577, 4834, 7937, 9461, 2643, 11194, 3045, 6508, 4094, 3451, 7911, 11048, 5406, 4665, 3020, 6616, 11345, 7519, 3669, 5287, 1790, 7014, 5410, 11038, 11249, 2035, 6125, 10407, 4565, 7315, 5078, 10506, 2840, 2478, 9270, 4194, 9195, 4518, 7469, 1160, 6878, 2730, 10421, 10036, 1734, 3815, 10939, 5832, 10595, 10759, 4423, 8420, 9617, 7119, 11010, 11424, 9173, 189, 10080, 10526, 3466, 10588, 7592, 3578, 11511, 7785, 9663, 530, 12150, 8957, 2532, 3317, 9349, 10243, 1481, 9332, 3454, 3758, 7899, 4218, 2593, 11410, 2276, 982, 6513, 1849, 8494, 9021, 4523, 7988, 8, 457, 648, 150, 8000, 2307, 2301, 874, 5650, 170, 9462, 2873, 9855, 11498, 2535, 11169, 5808, 12268, 9687, 1901, 7171, 11787, 3846, 1573, 6063, 3793, 466, 11259, 10608, 3821, 6320, 4649, 6263, 2929};
+
+uint16_t oqs_kex_rlwe_newhope_psis_inv_montgomery[PARAM_N] = {256, 10570, 1510, 7238, 1034, 7170, 6291, 7921, 11665, 3422, 4000, 2327, 2088, 5565, 795, 10647, 1521, 5484, 2539, 7385, 1055, 7173, 8047, 11683, 1669, 1994, 3796, 5809, 4341, 9398, 11876, 12230, 10525, 12037, 12253, 3506, 4012, 9351, 4847, 2448, 7372, 9831, 3160, 2207, 5582, 2553, 7387, 6322, 9681, 1383, 10731, 1533, 219, 5298, 4268, 7632, 6357, 9686, 8406, 4712, 9451, 10128, 4958, 5975, 11387, 8649, 11769, 6948, 11526, 12180, 1740, 10782, 6807, 2728, 7412, 4570, 4164, 4106, 11120, 12122, 8754, 11784, 3439, 5758, 11356, 6889, 9762, 11928, 1704, 1999, 10819, 12079, 12259, 7018, 11536, 1648, 1991, 2040, 2047, 2048, 10826, 12080, 8748, 8272, 8204, 1172, 1923, 7297, 2798, 7422, 6327, 4415, 7653, 6360, 11442, 12168, 7005, 8023, 9924, 8440, 8228, 2931, 7441, 1063, 3663, 5790, 9605, 10150, 1450, 8985, 11817, 10466, 10273, 12001, 3470, 7518, 1074, 1909, 7295, 9820, 4914, 702, 5367, 7789, 8135, 9940, 1420, 3714, 11064, 12114, 12264, 1752, 5517, 9566, 11900, 1700, 3754, 5803, 829, 1874, 7290, 2797, 10933, 5073, 7747, 8129, 6428, 6185, 11417, 1631, 233, 5300, 9535, 10140, 11982, 8734, 8270, 2937, 10953, 8587, 8249, 2934, 9197, 4825, 5956, 4362, 9401, 1343, 3703, 529, 10609, 12049, 6988, 6265, 895, 3639, 4031, 4087, 4095, 585, 10617, 8539, 4731, 4187, 9376, 3095, 9220, 10095, 10220, 1460, 10742, 12068, 1724, 5513, 11321, 6884, 2739, 5658, 6075, 4379, 11159, 10372, 8504, 4726, 9453, 3106, 7466, 11600, 10435, 8513, 9994, 8450, 9985, 3182, 10988, 8592, 2983, 9204, 4826, 2445, 5616, 6069, 867, 3635, 5786, 11360, 5134, 2489, 10889, 12089, 1727, 7269, 2794, 9177, 1311, 5454, 9557, 6632, 2703, 9164, 10087, 1441, 3717, 531, 3587, 2268, 324, 5313, 759, 1864, 5533, 2546, 7386, 9833, 8427, 4715, 11207, 1601, 7251, 4547, 11183, 12131, 1733, 10781, 10318, 1474, 10744, 5046, 4232, 11138, 10369, 6748, 964, 7160, 4534, 7670, 8118, 8182, 4680, 11202, 6867, 981, 8918, 1274, 182, 26, 7026, 8026, 11680, 12202, 10521, 1503, 7237, 4545, 5916, 9623, 8397, 11733, 10454, 3249, 9242, 6587, 941, 1890, 270, 10572, 6777, 9746, 6659, 6218, 6155, 6146, 878, 1881, 7291, 11575, 12187, 1741, 7271, 8061, 11685, 6936, 4502, 9421, 4857, 4205, 7623, 1089, 10689, 1527, 8996, 10063, 11971, 10488, 6765, 2722, 3900, 9335, 11867, 6962, 11528, 5158, 4248, 4118, 5855, 2592, 5637, 6072, 2623, 7397, 8079, 9932, 4930, 5971, 853, 3633, 519, 8852, 11798, 3441, 11025, 1575, 225, 8810, 11792, 12218, 3501, 9278, 3081, 9218, 4828, 7712, 8124, 11694, 12204, 3499, 4011, 573, 3593, 5780, 7848, 9899, 10192, 1456, 208, 7052, 2763, 7417, 11593, 10434, 12024, 8740, 11782, 10461, 3250, 5731, 7841, 9898, 1414, 202, 3540, 7528, 2831, 2160, 10842, 5060, 4234, 4116, 588, 84, 12, 7024, 2759, 9172, 6577, 11473, 1639, 9012, 3043, 7457, 6332, 11438, 1634, 1989, 9062, 11828, 8712, 11778, 12216, 10523, 6770, 9745, 10170, 4964, 9487, 6622, 946, 8913, 6540, 6201, 4397, 9406, 8366, 9973, 8447, 8229, 11709, 8695, 10020, 3187, 5722, 2573, 10901, 6824, 4486, 4152, 9371, 8361, 2950, 2177, 311, 1800, 9035, 8313, 11721, 3430, 490, 70, 10, 1757, 251, 3547, 7529, 11609, 3414, 7510, 4584, 4166, 9373, 1339, 5458, 7802, 11648, 1664, 7260, 9815, 10180, 6721, 9738, 10169, 8475, 8233, 9954, 1422, 8981, 1283, 5450, 11312, 1616, 3742, 11068, 10359, 4991, 713, 3613, 9294, 8350, 4704, 672, 96, 7036, 9783, 11931, 3460, 5761, 823, 10651, 12055, 10500, 1500, 5481, 783, 3623, 11051, 8601, 8251, 8201, 11705, 10450, 5004, 4226, 7626, 2845, 2162, 3820, 7568, 9859, 3164, 452, 10598, 1514, 5483, 6050, 6131, 4387, 7649, 8115, 6426, 918, 8909, 8295, 1185, 5436, 11310, 8638, 1234, 5443, 11311, 5127, 2488, 2111, 10835, 5059, 7745, 2862, 3920, 560, 80, 1767, 2008, 3798, 11076, 6849, 2734, 10924, 12094, 8750, 1250, 10712, 6797, 971, 7161, 1023, 8924, 4786, 7706, 4612, 4170, 7618, 6355, 4419, 5898, 11376, 10403, 10264, 6733, 4473, 639, 5358, 2521, 9138, 3061, 5704, 4326, 618, 5355, 765, 5376, 768, 7132, 4530, 9425, 3102, 9221, 6584, 11474, 10417, 10266, 12000, 6981, 6264, 4406, 2385, 7363, 4563, 4163, 7617, 9866, 3165, 9230, 11852, 10471, 5007, 5982, 11388, 5138, 734, 3616, 11050, 12112, 6997, 11533, 12181, 10518, 12036, 3475, 2252, 7344, 9827, 4915, 9480, 6621, 4457, 7659, 9872, 6677, 4465, 4149, 7615, 4599, 657, 3605, 515, 10607, 6782, 4480, 640, 1847, 3775, 5806, 2585, 5636, 9583, 1369, 10729, 8555, 10000, 11962, 5220, 7768, 8132, 8184, 9947, 1421, 203, 29, 8782, 11788, 1684, 10774, 10317, 4985, 9490, 8378, 4708, 11206, 5112, 5997, 7879, 11659, 12199, 8765, 10030, 4944, 5973, 6120, 6141, 6144, 7900, 11662, 1666, 238, 34, 3516, 5769, 9602, 8394, 9977, 6692, 956, 10670, 6791, 9748, 11926, 8726, 11780, 5194, 742, 106, 8793, 10034, 3189, 10989, 5081, 4237, 5872, 4350, 2377, 10873, 6820, 6241, 11425, 10410, 10265, 3222, 5727, 9596, 4882, 2453, 2106, 3812, 11078, 12116, 5242, 4260, 11142, 8614, 11764, 12214, 5256, 4262, 4120, 11122, 5100, 11262, 5120, 2487, 5622, 9581, 8391, 8221, 2930, 10952, 12098, 6995, 6266, 9673, 4893, 699, 3611, 4027, 5842, 11368, 1624, 232, 8811, 8281, 1183, 169, 8802, 3013, 2186, 5579, 797, 3625, 4029, 11109, 1587, 7249, 11569, 8675, 6506, 2685, 10917, 12093, 12261, 12285, 1755, 7273, 1039, 1904, 272, 3550, 9285, 3082, 5707, 6082, 4380, 7648, 11626, 5172, 4250, 9385, 8363, 8217, 4685, 5936, 848, 8899, 6538, 934, 1889, 3781, 9318, 10109, 10222, 6727, 961, 5404, 772, 5377, 9546, 8386, 1198, 8949, 3034, 2189, 7335, 4559, 5918, 2601, 10905, 5069, 9502, 3113, 7467, 8089, 11689, 5181, 9518, 8382, 2953, 3933, 4073, 4093, 7607, 8109, 2914, 5683, 4323, 11151, 1593, 10761, 6804, 972, 3650, 2277, 5592, 4310, 7638, 9869, 4921, 703, 1856, 9043, 4803, 9464, 1352, 8971, 11815, 5199, 7765, 6376, 4422, 7654, 2849, 407, 8836, 6529, 7955, 2892, 9191, 1313, 10721, 12065, 12257, 1751, 9028, 8312, 2943, 2176, 3822, 546, 78, 8789, 11789, 10462, 12028, 6985, 4509, 9422, 1346, 5459, 4291, 613, 10621, 6784, 9747, 3148, 7472, 2823, 5670, 810, 7138, 8042, 4660, 7688, 6365, 6176, 6149, 2634, 5643, 9584, 10147, 11983, 5223, 9524, 11894, 10477, 8519, 1217, 3685, 2282, 326, 10580, 3267, 7489, 4581, 2410, 5611, 11335, 6886, 8006, 8166, 11700, 3427, 11023, 8597, 10006, 3185, 455, 65, 5276, 7776, 4622, 5927, 7869, 9902, 11948, 5218, 2501, 5624, 2559, 10899, 1557, 1978, 10816, 10323, 8497, 4725, 675, 1852, 10798, 12076, 10503, 3256, 9243, 3076, 2195, 10847, 12083, 10504, 12034, 10497};
diff --git a/crypt/liboqs/kex_sidh_cln16/AMD64/fp_x64.c b/crypt/liboqs/kex_sidh_cln16/AMD64/fp_x64.c
new file mode 100644
index 0000000000000000000000000000000000000000..60dede4391839bed4b9f8bd1947550941c3afae0
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/AMD64/fp_x64.c
@@ -0,0 +1,857 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral 
+*       Diffie-Hellman key exchange.
+*
+*    Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*
+* Abstract: modular arithmetic optimized for x64 platforms
+*
+*********************************************************************************************/
+
+#include "../SIDH_internal.h"
+
+// Global constants
+extern const uint64_t p751[NWORDS_FIELD];
+extern const uint64_t p751p1[NWORDS_FIELD];
+extern const uint64_t p751x2[NWORDS_FIELD];
+
+// Modular addition, c = a+b mod p751.
+// Inputs: a, b in [0, 2*p751-1]
+// Output: c in [0, 2*p751-1]
+__inline void oqs_sidh_cln16_fpadd751(const digit_t *a, const digit_t *b, digit_t *c) {
+
+#if (OS_TARGET == OS_WIN)
+	unsigned int i, carry = 0;
+	digit_t mask;
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		ADDC(carry, a[i], b[i], carry, c[i]);
+	}
+
+	carry = 0;
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		SUBC(carry, c[i], ((digit_t *) p751x2)[i], carry, c[i]);
+	}
+	mask = 0 - (digit_t) carry;
+
+	carry = 0;
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		ADDC(carry, c[i], ((digit_t *) p751x2)[i] & mask, carry, c[i]);
+	}
+
+#elif (OS_TARGET == OS_LINUX)
+
+	oqs_sidh_cln16_fpadd751_asm(a, b, c);
+
+#endif
+}
+
+// Modular subtraction, c = a-b mod p751.
+// Inputs: a, b in [0, 2*p751-1]
+// Output: c in [0, 2*p751-1]
+__inline void oqs_sidh_cln16_fpsub751(const digit_t *a, const digit_t *b, digit_t *c) {
+
+#if (OS_TARGET == OS_WIN)
+	unsigned int i, borrow = 0;
+	digit_t mask;
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		SUBC(borrow, a[i], b[i], borrow, c[i]);
+	}
+	mask = 0 - (digit_t) borrow;
+
+	borrow = 0;
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		ADDC(borrow, c[i], ((digit_t *) p751x2)[i] & mask, borrow, c[i]);
+	}
+
+#elif (OS_TARGET == OS_LINUX)
+
+	oqs_sidh_cln16_fpsub751_asm(a, b, c);
+
+#endif
+}
+
+// Modular negation, a = -a mod p751.
+// Input/output: a in [0, 2*p751-1]
+__inline void oqs_sidh_cln16_fpneg751(digit_t *a) {
+	unsigned int i, borrow = 0;
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		SUBC(borrow, ((digit_t *) p751x2)[i], a[i], borrow, a[i]);
+	}
+}
+
+// Modular division by two, c = a/2 mod p751.
+// Input : a in [0, 2*p751-1]
+// Output: c in [0, 2*p751-1]
+void oqs_sidh_cln16_fpdiv2_751(const digit_t *a, digit_t *c) {
+	unsigned int i, carry = 0;
+	digit_t mask;
+
+	mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p751
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		ADDC(carry, a[i], ((digit_t *) p751)[i] & mask, carry, c[i]);
+	}
+
+	oqs_sidh_cln16_mp_shiftr1(c, NWORDS_FIELD);
+}
+
+// Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1].
+void oqs_sidh_cln16_fpcorrection751(digit_t *a) {
+	unsigned int i, borrow = 0;
+	digit_t mask;
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		SUBC(borrow, a[i], ((digit_t *) p751)[i], borrow, a[i]);
+	}
+	mask = 0 - (digit_t) borrow;
+
+	borrow = 0;
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		ADDC(borrow, a[i], ((digit_t *) p751)[i] & mask, borrow, a[i]);
+	}
+}
+
+// Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
+void oqs_sidh_cln16_mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) {
+
+	UNREFERENCED_PARAMETER(nwords);
+
+#if (OS_TARGET == OS_WIN)
+	digit_t t = 0;
+	uint128_t uv = {0};
+	unsigned int carry = 0;
+
+	MULADD128(a[0], b[0], uv, carry, uv);
+	t += carry;
+	c[0] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[0], b[1], uv, carry, uv);
+	t += carry;
+	MULADD128(a[1], b[0], uv, carry, uv);
+	t += carry;
+	c[1] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[0], b[2], uv, carry, uv);
+	t += carry;
+	MULADD128(a[1], b[1], uv, carry, uv);
+	t += carry;
+	MULADD128(a[2], b[0], uv, carry, uv);
+	t += carry;
+	c[2] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[0], b[3], uv, carry, uv);
+	t += carry;
+	MULADD128(a[2], b[1], uv, carry, uv);
+	t += carry;
+	MULADD128(a[1], b[2], uv, carry, uv);
+	t += carry;
+	MULADD128(a[3], b[0], uv, carry, uv);
+	t += carry;
+	c[3] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[0], b[4], uv, carry, uv);
+	t += carry;
+	MULADD128(a[3], b[1], uv, carry, uv);
+	t += carry;
+	MULADD128(a[2], b[2], uv, carry, uv);
+	t += carry;
+	MULADD128(a[1], b[3], uv, carry, uv);
+	t += carry;
+	MULADD128(a[4], b[0], uv, carry, uv);
+	t += carry;
+	c[4] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[0], b[5], uv, carry, uv);
+	t += carry;
+	MULADD128(a[4], b[1], uv, carry, uv);
+	t += carry;
+	MULADD128(a[3], b[2], uv, carry, uv);
+	t += carry;
+	MULADD128(a[2], b[3], uv, carry, uv);
+	t += carry;
+	MULADD128(a[1], b[4], uv, carry, uv);
+	t += carry;
+	MULADD128(a[5], b[0], uv, carry, uv);
+	t += carry;
+	c[5] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[0], b[6], uv, carry, uv);
+	t += carry;
+	MULADD128(a[5], b[1], uv, carry, uv);
+	t += carry;
+	MULADD128(a[4], b[2], uv, carry, uv);
+	t += carry;
+	MULADD128(a[3], b[3], uv, carry, uv);
+	t += carry;
+	MULADD128(a[2], b[4], uv, carry, uv);
+	t += carry;
+	MULADD128(a[1], b[5], uv, carry, uv);
+	t += carry;
+	MULADD128(a[6], b[0], uv, carry, uv);
+	t += carry;
+	c[6] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[0], b[7], uv, carry, uv);
+	t += carry;
+	MULADD128(a[6], b[1], uv, carry, uv);
+	t += carry;
+	MULADD128(a[5], b[2], uv, carry, uv);
+	t += carry;
+	MULADD128(a[4], b[3], uv, carry, uv);
+	t += carry;
+	MULADD128(a[3], b[4], uv, carry, uv);
+	t += carry;
+	MULADD128(a[2], b[5], uv, carry, uv);
+	t += carry;
+	MULADD128(a[1], b[6], uv, carry, uv);
+	t += carry;
+	MULADD128(a[7], b[0], uv, carry, uv);
+	t += carry;
+	c[7] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[0], b[8], uv, carry, uv);
+	t += carry;
+	MULADD128(a[7], b[1], uv, carry, uv);
+	t += carry;
+	MULADD128(a[6], b[2], uv, carry, uv);
+	t += carry;
+	MULADD128(a[5], b[3], uv, carry, uv);
+	t += carry;
+	MULADD128(a[4], b[4], uv, carry, uv);
+	t += carry;
+	MULADD128(a[3], b[5], uv, carry, uv);
+	t += carry;
+	MULADD128(a[2], b[6], uv, carry, uv);
+	t += carry;
+	MULADD128(a[1], b[7], uv, carry, uv);
+	t += carry;
+	MULADD128(a[8], b[0], uv, carry, uv);
+	t += carry;
+	c[8] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[0], b[9], uv, carry, uv);
+	t += carry;
+	MULADD128(a[8], b[1], uv, carry, uv);
+	t += carry;
+	MULADD128(a[7], b[2], uv, carry, uv);
+	t += carry;
+	MULADD128(a[6], b[3], uv, carry, uv);
+	t += carry;
+	MULADD128(a[5], b[4], uv, carry, uv);
+	t += carry;
+	MULADD128(a[4], b[5], uv, carry, uv);
+	t += carry;
+	MULADD128(a[3], b[6], uv, carry, uv);
+	t += carry;
+	MULADD128(a[2], b[7], uv, carry, uv);
+	t += carry;
+	MULADD128(a[1], b[8], uv, carry, uv);
+	t += carry;
+	MULADD128(a[9], b[0], uv, carry, uv);
+	t += carry;
+	c[9] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[0], b[10], uv, carry, uv);
+	t += carry;
+	MULADD128(a[9], b[1], uv, carry, uv);
+	t += carry;
+	MULADD128(a[8], b[2], uv, carry, uv);
+	t += carry;
+	MULADD128(a[7], b[3], uv, carry, uv);
+	t += carry;
+	MULADD128(a[6], b[4], uv, carry, uv);
+	t += carry;
+	MULADD128(a[5], b[5], uv, carry, uv);
+	t += carry;
+	MULADD128(a[4], b[6], uv, carry, uv);
+	t += carry;
+	MULADD128(a[3], b[7], uv, carry, uv);
+	t += carry;
+	MULADD128(a[2], b[8], uv, carry, uv);
+	t += carry;
+	MULADD128(a[1], b[9], uv, carry, uv);
+	t += carry;
+	MULADD128(a[10], b[0], uv, carry, uv);
+	t += carry;
+	c[10] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[0], b[11], uv, carry, uv);
+	t += carry;
+	MULADD128(a[10], b[1], uv, carry, uv);
+	t += carry;
+	MULADD128(a[9], b[2], uv, carry, uv);
+	t += carry;
+	MULADD128(a[8], b[3], uv, carry, uv);
+	t += carry;
+	MULADD128(a[7], b[4], uv, carry, uv);
+	t += carry;
+	MULADD128(a[6], b[5], uv, carry, uv);
+	t += carry;
+	MULADD128(a[5], b[6], uv, carry, uv);
+	t += carry;
+	MULADD128(a[4], b[7], uv, carry, uv);
+	t += carry;
+	MULADD128(a[3], b[8], uv, carry, uv);
+	t += carry;
+	MULADD128(a[2], b[9], uv, carry, uv);
+	t += carry;
+	MULADD128(a[1], b[10], uv, carry, uv);
+	t += carry;
+	MULADD128(a[11], b[0], uv, carry, uv);
+	t += carry;
+	c[11] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[1], b[11], uv, carry, uv);
+	t += carry;
+	MULADD128(a[10], b[2], uv, carry, uv);
+	t += carry;
+	MULADD128(a[9], b[3], uv, carry, uv);
+	t += carry;
+	MULADD128(a[8], b[4], uv, carry, uv);
+	t += carry;
+	MULADD128(a[7], b[5], uv, carry, uv);
+	t += carry;
+	MULADD128(a[6], b[6], uv, carry, uv);
+	t += carry;
+	MULADD128(a[5], b[7], uv, carry, uv);
+	t += carry;
+	MULADD128(a[4], b[8], uv, carry, uv);
+	t += carry;
+	MULADD128(a[3], b[9], uv, carry, uv);
+	t += carry;
+	MULADD128(a[2], b[10], uv, carry, uv);
+	t += carry;
+	MULADD128(a[11], b[1], uv, carry, uv);
+	t += carry;
+	c[12] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[11], b[2], uv, carry, uv);
+	t += carry;
+	MULADD128(a[10], b[3], uv, carry, uv);
+	t += carry;
+	MULADD128(a[9], b[4], uv, carry, uv);
+	t += carry;
+	MULADD128(a[8], b[5], uv, carry, uv);
+	t += carry;
+	MULADD128(a[7], b[6], uv, carry, uv);
+	t += carry;
+	MULADD128(a[6], b[7], uv, carry, uv);
+	t += carry;
+	MULADD128(a[5], b[8], uv, carry, uv);
+	t += carry;
+	MULADD128(a[4], b[9], uv, carry, uv);
+	t += carry;
+	MULADD128(a[3], b[10], uv, carry, uv);
+	t += carry;
+	MULADD128(a[2], b[11], uv, carry, uv);
+	t += carry;
+	c[13] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[11], b[3], uv, carry, uv);
+	t += carry;
+	MULADD128(a[10], b[4], uv, carry, uv);
+	t += carry;
+	MULADD128(a[9], b[5], uv, carry, uv);
+	t += carry;
+	MULADD128(a[8], b[6], uv, carry, uv);
+	t += carry;
+	MULADD128(a[7], b[7], uv, carry, uv);
+	t += carry;
+	MULADD128(a[6], b[8], uv, carry, uv);
+	t += carry;
+	MULADD128(a[5], b[9], uv, carry, uv);
+	t += carry;
+	MULADD128(a[4], b[10], uv, carry, uv);
+	t += carry;
+	MULADD128(a[3], b[11], uv, carry, uv);
+	t += carry;
+	c[14] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[11], b[4], uv, carry, uv);
+	t += carry;
+	MULADD128(a[10], b[5], uv, carry, uv);
+	t += carry;
+	MULADD128(a[9], b[6], uv, carry, uv);
+	t += carry;
+	MULADD128(a[8], b[7], uv, carry, uv);
+	t += carry;
+	MULADD128(a[7], b[8], uv, carry, uv);
+	t += carry;
+	MULADD128(a[6], b[9], uv, carry, uv);
+	t += carry;
+	MULADD128(a[5], b[10], uv, carry, uv);
+	t += carry;
+	MULADD128(a[4], b[11], uv, carry, uv);
+	t += carry;
+	c[15] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[11], b[5], uv, carry, uv);
+	t += carry;
+	MULADD128(a[10], b[6], uv, carry, uv);
+	t += carry;
+	MULADD128(a[9], b[7], uv, carry, uv);
+	t += carry;
+	MULADD128(a[8], b[8], uv, carry, uv);
+	t += carry;
+	MULADD128(a[7], b[9], uv, carry, uv);
+	t += carry;
+	MULADD128(a[6], b[10], uv, carry, uv);
+	t += carry;
+	MULADD128(a[5], b[11], uv, carry, uv);
+	t += carry;
+	c[16] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[11], b[6], uv, carry, uv);
+	t += carry;
+	MULADD128(a[10], b[7], uv, carry, uv);
+	t += carry;
+	MULADD128(a[9], b[8], uv, carry, uv);
+	t += carry;
+	MULADD128(a[8], b[9], uv, carry, uv);
+	t += carry;
+	MULADD128(a[7], b[10], uv, carry, uv);
+	t += carry;
+	MULADD128(a[6], b[11], uv, carry, uv);
+	t += carry;
+	c[17] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[11], b[7], uv, carry, uv);
+	t += carry;
+	MULADD128(a[10], b[8], uv, carry, uv);
+	t += carry;
+	MULADD128(a[9], b[9], uv, carry, uv);
+	t += carry;
+	MULADD128(a[8], b[10], uv, carry, uv);
+	t += carry;
+	MULADD128(a[7], b[11], uv, carry, uv);
+	t += carry;
+	c[18] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[11], b[8], uv, carry, uv);
+	t += carry;
+	MULADD128(a[10], b[9], uv, carry, uv);
+	t += carry;
+	MULADD128(a[9], b[10], uv, carry, uv);
+	t += carry;
+	MULADD128(a[8], b[11], uv, carry, uv);
+	t += carry;
+	c[19] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[11], b[9], uv, carry, uv);
+	t += carry;
+	MULADD128(a[10], b[10], uv, carry, uv);
+	t += carry;
+	MULADD128(a[9], b[11], uv, carry, uv);
+	t += carry;
+	c[20] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(a[11], b[10], uv, carry, uv);
+	t += carry;
+	MULADD128(a[10], b[11], uv, carry, uv);
+	t += carry;
+	c[21] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+
+	MULADD128(a[11], b[11], uv, carry, uv);
+	c[22] = uv[0];
+	c[23] = uv[1];
+
+#elif (OS_TARGET == OS_LINUX)
+
+	oqs_sidh_cln16_mul751_asm(a, b, c);
+
+#endif
+}
+
+// Efficient Montgomery reduction using comba and exploiting the special form of the prime p751.
+// mc = ma*R^-1 mod p751x2, where R = 2^768.
+// If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1].
+// ma is assumed to be in Montgomery representation.
+void oqs_sidh_cln16_rdc_mont(const oqs_sidh_cln16_dfelm_t ma, oqs_sidh_cln16_felm_t mc) {
+#if (OS_TARGET == OS_WIN)
+	unsigned int carry;
+	digit_t t = 0;
+	uint128_t uv = {0};
+
+	mc[0] = ma[0];
+	mc[1] = ma[1];
+	mc[2] = ma[2];
+	mc[3] = ma[3];
+	mc[4] = ma[4];
+	MUL128(mc[0], ((digit_t *) p751p1)[5], uv);
+	ADDC(0, uv[0], ma[5], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	mc[5] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = 0;
+
+	MULADD128(mc[0], ((digit_t *) p751p1)[6], uv, carry, uv);
+	MULADD128(mc[1], ((digit_t *) p751p1)[5], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[6], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	t += carry;
+	mc[6] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(mc[0], ((digit_t *) p751p1)[7], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[1], ((digit_t *) p751p1)[6], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[2], ((digit_t *) p751p1)[5], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[7], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	t += carry;
+	mc[7] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(mc[0], ((digit_t *) p751p1)[8], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[1], ((digit_t *) p751p1)[7], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[2], ((digit_t *) p751p1)[6], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[3], ((digit_t *) p751p1)[5], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[8], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	t += carry;
+	mc[8] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(mc[0], ((digit_t *) p751p1)[9], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[1], ((digit_t *) p751p1)[8], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[2], ((digit_t *) p751p1)[7], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[3], ((digit_t *) p751p1)[6], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[4], ((digit_t *) p751p1)[5], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[9], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	t += carry;
+	mc[9] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(mc[0], ((digit_t *) p751p1)[10], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[1], ((digit_t *) p751p1)[9], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[2], ((digit_t *) p751p1)[8], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[3], ((digit_t *) p751p1)[7], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[4], ((digit_t *) p751p1)[6], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[5], ((digit_t *) p751p1)[5], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[10], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	t += carry;
+	mc[10] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(mc[0], ((digit_t *) p751p1)[11], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[1], ((digit_t *) p751p1)[10], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[2], ((digit_t *) p751p1)[9], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[3], ((digit_t *) p751p1)[8], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[4], ((digit_t *) p751p1)[7], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[5], ((digit_t *) p751p1)[6], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[6], ((digit_t *) p751p1)[5], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[11], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	t += carry;
+	mc[11] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(mc[1], ((digit_t *) p751p1)[11], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[2], ((digit_t *) p751p1)[10], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[3], ((digit_t *) p751p1)[9], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[4], ((digit_t *) p751p1)[8], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[5], ((digit_t *) p751p1)[7], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[6], ((digit_t *) p751p1)[6], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[7], ((digit_t *) p751p1)[5], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[12], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	t += carry;
+	mc[0] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(mc[2], ((digit_t *) p751p1)[11], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[3], ((digit_t *) p751p1)[10], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[4], ((digit_t *) p751p1)[9], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[5], ((digit_t *) p751p1)[8], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[6], ((digit_t *) p751p1)[7], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[7], ((digit_t *) p751p1)[6], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[8], ((digit_t *) p751p1)[5], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[13], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	t += carry;
+	mc[1] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(mc[3], ((digit_t *) p751p1)[11], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[4], ((digit_t *) p751p1)[10], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[5], ((digit_t *) p751p1)[9], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[6], ((digit_t *) p751p1)[8], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[7], ((digit_t *) p751p1)[7], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[8], ((digit_t *) p751p1)[6], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[9], ((digit_t *) p751p1)[5], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[14], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	t += carry;
+	mc[2] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(mc[4], ((digit_t *) p751p1)[11], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[5], ((digit_t *) p751p1)[10], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[6], ((digit_t *) p751p1)[9], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[7], ((digit_t *) p751p1)[8], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[8], ((digit_t *) p751p1)[7], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[9], ((digit_t *) p751p1)[6], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[10], ((digit_t *) p751p1)[5], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[15], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	t += carry;
+	mc[3] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(mc[5], ((digit_t *) p751p1)[11], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[6], ((digit_t *) p751p1)[10], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[7], ((digit_t *) p751p1)[9], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[8], ((digit_t *) p751p1)[8], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[9], ((digit_t *) p751p1)[7], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[10], ((digit_t *) p751p1)[6], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[11], ((digit_t *) p751p1)[5], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[16], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	t += carry;
+	mc[4] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(mc[6], ((digit_t *) p751p1)[11], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[7], ((digit_t *) p751p1)[10], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[8], ((digit_t *) p751p1)[9], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[9], ((digit_t *) p751p1)[8], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[10], ((digit_t *) p751p1)[7], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[11], ((digit_t *) p751p1)[6], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[17], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	t += carry;
+	mc[5] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(mc[7], ((digit_t *) p751p1)[11], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[8], ((digit_t *) p751p1)[10], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[9], ((digit_t *) p751p1)[9], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[10], ((digit_t *) p751p1)[8], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[11], ((digit_t *) p751p1)[7], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[18], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	t += carry;
+	mc[6] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(mc[8], ((digit_t *) p751p1)[11], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[9], ((digit_t *) p751p1)[10], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[10], ((digit_t *) p751p1)[9], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[11], ((digit_t *) p751p1)[8], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[19], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	t += carry;
+	mc[7] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(mc[9], ((digit_t *) p751p1)[11], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[10], ((digit_t *) p751p1)[10], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[11], ((digit_t *) p751p1)[9], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[20], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	t += carry;
+	mc[8] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(mc[10], ((digit_t *) p751p1)[11], uv, carry, uv);
+	t += carry;
+	MULADD128(mc[11], ((digit_t *) p751p1)[10], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[21], carry, uv[0]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	t += carry;
+	mc[9] = uv[0];
+	uv[0] = uv[1];
+	uv[1] = t;
+	t = 0;
+
+	MULADD128(mc[11], ((digit_t *) p751p1)[11], uv, carry, uv);
+	t += carry;
+	ADDC(0, uv[0], ma[22], carry, mc[10]);
+	ADDC(carry, uv[1], 0, carry, uv[1]);
+	ADDC(0, uv[1], ma[23], carry, mc[11]);
+
+#elif (OS_TARGET == OS_LINUX)
+
+	oqs_sidh_cln16_rdc751_asm(ma, mc);
+
+#endif
+}
diff --git a/crypt/liboqs/kex_sidh_cln16/AMD64/fp_x64_asm.S b/crypt/liboqs/kex_sidh_cln16/AMD64/fp_x64_asm.S
new file mode 100644
index 0000000000000000000000000000000000000000..8f2cb09cec54294595e53474a9d53f92abce9527
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/AMD64/fp_x64_asm.S
@@ -0,0 +1,2021 @@
+//*******************************************************************************************
+// SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral
+//       Diffie-Hellman key exchange.
+//
+//    Copyright (c) Microsoft Corporation. All rights reserved.
+//
+//
+// Abstract: field arithmetic in x64 assembly for Linux 
+//
+//*******************************************************************************************  
+
+.intel_syntax noprefix 
+
+// Registers that are used for parameter passing:
+#define reg_p1  rdi
+#define reg_p2  rsi
+#define reg_p3  rdx
+
+// p751 + 1
+#define p751p1_5   0xEEB0000000000000
+#define p751p1_6   0xE3EC968549F878A8
+#define p751p1_7   0xDA959B1A13F7CC76
+#define p751p1_8   0x084E9867D6EBE876
+#define p751p1_9   0x8562B5045CB25748
+#define p751p1_10  0x0E12909F97BADC66
+#define p751p1_11  0x00006FE5D541F71C
+
+#define p751_0     0xFFFFFFFFFFFFFFFF
+#define p751_5     0xEEAFFFFFFFFFFFFF
+#define p751_6     0xE3EC968549F878A8
+#define p751_7     0xDA959B1A13F7CC76
+#define p751_8     0x084E9867D6EBE876
+#define p751_9     0x8562B5045CB25748
+#define p751_10    0x0E12909F97BADC66
+#define p751_11    0x00006FE5D541F71C
+
+#define p751x2_0   0xFFFFFFFFFFFFFFFE
+#define p751x2_1   0xFFFFFFFFFFFFFFFF
+#define p751x2_5   0xDD5FFFFFFFFFFFFF
+#define p751x2_6   0xC7D92D0A93F0F151
+#define p751x2_7   0xB52B363427EF98ED
+#define p751x2_8   0x109D30CFADD7D0ED
+#define p751x2_9   0x0AC56A08B964AE90
+#define p751x2_10  0x1C25213F2F75B8CD
+#define p751x2_11  0x0000DFCBAA83EE38
+
+
+.text
+//***********************************************************************
+//  Field addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.globl oqs_sidh_cln16_fpadd751_asm
+oqs_sidh_cln16_fpadd751_asm:
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56] 
+  mov    rcx, [reg_p1+64]
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  adc    r12, [reg_p2+32] 
+  adc    r13, [reg_p2+40] 
+  adc    r14, [reg_p2+48] 
+  adc    r15, [reg_p2+56]
+  adc    rcx, [reg_p2+64] 
+  mov    rax, [reg_p1+72]
+  adc    rax, [reg_p2+72] 
+  mov    [reg_p3+72], rax
+  mov    rax, [reg_p1+80]
+  adc    rax, [reg_p2+80] 
+  mov    [reg_p3+80], rax
+  mov    rax, [reg_p1+88]
+  adc    rax, [reg_p2+88] 
+  mov    [reg_p3+88], rax
+
+  movq   rax, p751x2_0
+  sub    r8, rax
+  movq   rax, p751x2_1
+  sbb    r9, rax
+  sbb    r10, rax
+  sbb    r11, rax
+  sbb    r12, rax
+  movq   rax, p751x2_5
+  sbb    r13, rax
+  movq   rax, p751x2_6
+  sbb    r14, rax
+  movq   rax, p751x2_7
+  sbb    r15, rax
+  movq   rax, p751x2_8
+  sbb    rcx, rax
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15
+  mov    [reg_p3+64], rcx
+  mov    r8, [reg_p3+72]
+  mov    r9, [reg_p3+80]
+  mov    r10, [reg_p3+88]
+  movq   rax, p751x2_9
+  sbb    r8, rax
+  movq   rax, p751x2_10
+  sbb    r9, rax
+  movq   rax, p751x2_11
+  sbb    r10, rax
+  mov    [reg_p3+72], r8
+  mov    [reg_p3+80], r9
+  mov    [reg_p3+88], r10
+  movq   rax, 0
+  sbb    rax, 0
+  
+  mov    rsi, p751x2_0
+  and    rsi, rax
+  mov    r8, p751x2_1
+  and    r8, rax
+  movq   r9, p751x2_5
+  and    r9, rax
+  movq   r10, p751x2_6
+  and    r10, rax
+  movq   r11, p751x2_7
+  and    r11, rax
+  movq   r12, p751x2_8
+  and    r12, rax
+  movq   r13, p751x2_9
+  and    r13, rax
+  movq   r14, p751x2_10
+  and    r14, rax
+  movq   r15, p751x2_11
+  and    r15, rax
+  
+  mov    rax, [reg_p3]
+  add    rax, rsi  
+  mov    [reg_p3], rax
+  mov    rax, [reg_p3+8]
+  adc    rax, r8 
+  mov    [reg_p3+8], rax  
+  mov    rax, [reg_p3+16]
+  adc    rax, r8 
+  mov    [reg_p3+16], rax  
+  mov    rax, [reg_p3+24]  
+  adc    rax, r8 
+  mov    [reg_p3+24], rax 
+  mov    rax, [reg_p3+32]  
+  adc    rax, r8 
+  mov    [reg_p3+32], rax 
+  mov    rax, [reg_p3+40]    
+  adc    rax, r9 
+  mov    [reg_p3+40], rax 
+  mov    rax, [reg_p3+48]   
+  adc    rax, r10 
+  mov    [reg_p3+48], rax 
+  mov    rax, [reg_p3+56]   
+  adc    rax, r11  
+  mov    [reg_p3+56], rax 
+  mov    rax, [reg_p3+64]  
+  adc    rax, r12 
+  mov    [reg_p3+64], rax 
+  mov    rax, [reg_p3+72]   
+  adc    rax, r13 
+  mov    [reg_p3+72], rax 
+  mov    rax, [reg_p3+80]   
+  adc    rax, r14 
+  mov    [reg_p3+80], rax 
+  mov    rax, [reg_p3+88]   
+  adc    rax, r15
+  mov    [reg_p3+88], rax 
+  
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  Field subtraction
+//  Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
+//*********************************************************************** 
+.globl oqs_sidh_cln16_fpsub751_asm
+oqs_sidh_cln16_fpsub751_asm:
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56] 
+  mov    rcx, [reg_p1+64]
+  sub    r8, [reg_p2] 
+  sbb    r9, [reg_p2+8] 
+  sbb    r10, [reg_p2+16] 
+  sbb    r11, [reg_p2+24] 
+  sbb    r12, [reg_p2+32] 
+  sbb    r13, [reg_p2+40] 
+  sbb    r14, [reg_p2+48] 
+  sbb    r15, [reg_p2+56]
+  sbb    rcx, [reg_p2+64] 
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15
+  mov    [reg_p3+64], rcx
+  mov    rax, [reg_p1+72]
+  sbb    rax, [reg_p2+72] 
+  mov    [reg_p3+72], rax
+  mov    rax, [reg_p1+80]
+  sbb    rax, [reg_p2+80] 
+  mov    [reg_p3+80], rax
+  mov    rax, [reg_p1+88]
+  sbb    rax, [reg_p2+88] 
+  mov    [reg_p3+88], rax
+  movq   rax, 0
+  sbb    rax, 0
+  
+  mov    rsi, p751x2_0
+  and    rsi, rax
+  mov    r8, p751x2_1
+  and    r8, rax
+  movq   r9, p751x2_5
+  and    r9, rax
+  movq   r10, p751x2_6
+  and    r10, rax
+  movq   r11, p751x2_7
+  and    r11, rax
+  movq   r12, p751x2_8
+  and    r12, rax
+  movq   r13, p751x2_9
+  and    r13, rax
+  movq   r14, p751x2_10
+  and    r14, rax
+  movq   r15, p751x2_11
+  and    r15, rax
+  
+  mov    rax, [reg_p3]
+  add    rax, rsi  
+  mov    [reg_p3], rax
+  mov    rax, [reg_p3+8]
+  adc    rax, r8 
+  mov    [reg_p3+8], rax  
+  mov    rax, [reg_p3+16]
+  adc    rax, r8 
+  mov    [reg_p3+16], rax  
+  mov    rax, [reg_p3+24]  
+  adc    rax, r8 
+  mov    [reg_p3+24], rax 
+  mov    rax, [reg_p3+32]  
+  adc    rax, r8 
+  mov    [reg_p3+32], rax 
+  mov    rax, [reg_p3+40]    
+  adc    rax, r9 
+  mov    [reg_p3+40], rax 
+  mov    rax, [reg_p3+48]   
+  adc    rax, r10 
+  mov    [reg_p3+48], rax 
+  mov    rax, [reg_p3+56]   
+  adc    rax, r11  
+  mov    [reg_p3+56], rax 
+  mov    rax, [reg_p3+64]  
+  adc    rax, r12 
+  mov    [reg_p3+64], rax 
+  mov    rax, [reg_p3+72]   
+  adc    rax, r13 
+  mov    [reg_p3+72], rax 
+  mov    rax, [reg_p3+80]   
+  adc    rax, r14 
+  mov    [reg_p3+80], rax 
+  mov    rax, [reg_p3+88]   
+  adc    rax, r15
+  mov    [reg_p3+88], rax 
+  
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  Integer multiplication
+//  Based on Karatsuba method
+//  Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
+//  NOTE: a=c or b=c are not allowed
+//***********************************************************************
+.globl oqs_sidh_cln16_mul751_asm
+oqs_sidh_cln16_mul751_asm:
+  push   r12
+  push   r13
+  push   r14
+  mov    rcx, reg_p3
+  
+  // rcx[0-5] <- AH+AL
+  xor    rax, rax
+  mov    r8, [reg_p1+48]
+  mov    r9, [reg_p1+56]
+  mov    r10, [reg_p1+64]
+  mov    r11, [reg_p1+72]
+  mov    r12, [reg_p1+80]
+  mov    r13, [reg_p1+88]
+  add    r8, [reg_p1] 
+  adc    r9, [reg_p1+8] 
+  adc    r10, [reg_p1+16] 
+  adc    r11, [reg_p1+24] 
+  adc    r12, [reg_p1+32] 
+  adc    r13, [reg_p1+40] 
+  push   r15  
+  mov    [rcx], r8
+  mov    [rcx+8], r9
+  mov    [rcx+16], r10
+  mov    [rcx+24], r11
+  mov    [rcx+32], r12
+  mov    [rcx+40], r13
+  sbb    rax, 0 
+  sub    rsp, 96           // Allocating space in stack
+       
+  // rcx[6-11] <- BH+BL
+  xor    rdx, rdx
+  mov    r8, [reg_p2+48]
+  mov    r9, [reg_p2+56]
+  mov    r10, [reg_p2+64]
+  mov    r11, [reg_p2+72]
+  mov    r12, [reg_p2+80]
+  mov    r13, [reg_p2+88]
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  adc    r12, [reg_p2+32] 
+  adc    r13, [reg_p2+40] 
+  mov    [rcx+48], r8
+  mov    [rcx+56], r9
+  mov    [rcx+64], r10
+  mov    [rcx+72], r11
+  mov    [rcx+80], r12
+  mov    [rcx+88], r13
+  sbb    rdx, 0 
+  mov    [rsp+80], rax
+  mov    [rsp+88], rdx
+  
+  // (rsp[0-8],r10,r8,r9) <- (AH+AL)*(BH+BL)
+  mov    r11, [rcx]
+  mov    rax, r8 
+  mul    r11
+  mov    [rsp], rax        // c0
+  mov    r14, rdx
+  
+  xor    r15, r15
+  mov    rax, r9
+  mul    r11
+  xor    r9, r9
+  add    r14, rax
+  adc    r9, rdx
+  
+  mov    r12, [rcx+8] 
+  mov    rax, r8 
+  mul    r12
+  add    r14, rax
+  mov    [rsp+8], r14      // c1 
+  adc    r9, rdx
+  adc    r15, 0
+  
+  xor    r8, r8
+  mov    rax, r10 
+  mul    r11
+  add    r9, rax
+  mov    r13, [rcx+48] 
+  adc    r15, rdx 
+  adc    r8, 0
+  
+  mov    rax, [rcx+16] 
+  mul    r13
+  add    r9, rax
+  adc    r15, rdx 
+  mov    rax, [rcx+56] 
+  adc    r8, 0
+  
+  mul    r12
+  add    r9, rax
+  mov    [rsp+16], r9      // c2 
+  adc    r15, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [rcx+72] 
+  mul    r11
+  add    r15, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [rcx+24] 
+  mul    r13
+  add    r15, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, r10 
+  mul    r12
+  add    r15, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    r14, [rcx+16] 
+  mov    rax, [rcx+56] 
+  mul    r14
+  add    r15, rax
+  mov    [rsp+24], r15     // c3 
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [rcx+80] 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [rcx+64] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    r15, [rcx+48] 
+  mov    rax, [rcx+32] 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [rcx+72] 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    r13, [rcx+24] 
+  mov    rax, [rcx+56] 
+  mul    r13
+  add    r8, rax
+  mov    [rsp+32], r8      // c4 
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [rcx+88] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [rcx+64] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [rcx+72] 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [rcx+40] 
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [rcx+80] 
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    r15, [rcx+32] 
+  mov    rax, [rcx+56] 
+  mul    r15
+  add    r9, rax
+  mov    [rsp+40], r9      // c5 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [rcx+64] 
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [rcx+88] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [rcx+80] 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    r11, [rcx+40] 
+  mov    rax, [rcx+56] 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [rcx+72] 
+  mul    r13
+  add    r10, rax
+  mov    [rsp+48], r10     // c6 
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [rcx+88] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [rcx+64] 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [rcx+80]
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [rcx+72] 
+  mul    r15
+  add    r8, rax
+  mov    [rsp+56], r8      // c7 
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [rcx+72] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [rcx+80] 
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [rcx+88] 
+  mul    r13
+  add    r9, rax
+  mov    [rsp+64], r9      // c8 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [rcx+88]
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+
+  mov    rax, [rcx+80] 
+  mul    r11
+  add    r10, rax          // c9 
+  adc    r8, rdx
+  adc    r9, 0
+
+  mov    rax, [rcx+88] 
+  mul    r11
+  add    r8, rax           // c10 
+  adc    r9, rdx           // c11 
+  
+  mov    rax, [rsp+88]
+  mov    rdx, [rcx]
+  and    r12, rax
+  and    r14, rax
+  and    rdx, rax
+  and    r13, rax
+  and    r15, rax
+  and    r11, rax
+  mov    rax, [rsp+48]
+  add    rdx, rax
+  mov    rax, [rsp+56]
+  adc    r12, rax
+  mov    rax, [rsp+64]
+  adc    r14, rax
+  adc    r13, r10
+  adc    r15, r8
+  adc    r11, r9
+  mov    rax, [rsp+80]
+  mov    [rsp+48], rdx
+  mov    [rsp+56], r12
+  mov    [rsp+64], r14
+  mov    [rsp+72], r13
+  mov    [rsp+80], r15
+  mov    [rsp+88], r11
+  
+  mov    r8, [rcx+48]
+  mov    r9, [rcx+56]
+  mov    r10, [rcx+64]
+  mov    r11, [rcx+72]
+  mov    r12, [rcx+80]
+  mov    r13, [rcx+88]
+  and    r8, rax
+  and    r9, rax
+  and    r10, rax
+  and    r11, rax
+  and    r12, rax
+  and    r13, rax
+  mov    rax, [rsp+48]
+  add    r8, rax
+  mov    rax, [rsp+56]
+  adc    r9, rax
+  mov    rax, [rsp+64]
+  adc    r10, rax
+  mov    rax, [rsp+72]
+  adc    r11, rax
+  mov    rax, [rsp+80]
+  adc    r12, rax
+  mov    rax, [rsp+88]
+  adc    r13, rax
+  mov    [rsp+48], r8
+  mov    [rsp+56], r9
+  mov    [rsp+72], r11
+  
+  // rcx[0-11] <- AL*BL
+  mov    r11, [reg_p1]
+  mov    rax, [reg_p2] 
+  mul    r11
+  xor    r9, r9
+  mov    [rcx], rax        // c0
+  mov    [rsp+64], r10
+  mov    r8, rdx
+
+  mov    rax, [reg_p2+8]
+  mul    r11
+  xor    r10, r10
+  add    r8, rax
+  mov    [rsp+80], r12
+  adc    r9, rdx
+
+  mov    r12, [reg_p1+8] 
+  mov    rax, [reg_p2] 
+  mul    r12
+  add    r8, rax
+  mov    [rcx+8], r8       // c1 
+  adc    r9, rdx
+  mov    [rsp+88], r13
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [reg_p2+16] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    r13, [reg_p2] 
+  mov    rax, [reg_p1+16] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+8] 
+  mul    r12
+  add    r9, rax
+  mov    [rcx+16], r9      // c2 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [reg_p2+24] 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p1+24] 
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+16] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    r14, [reg_p1+16] 
+  mov    rax, [reg_p2+8] 
+  mul    r14
+  add    r10, rax
+  mov    [rcx+24], r10     // c3 
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [reg_p2+32] 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+16] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p1+32] 
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+24] 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    r13, [reg_p1+24] 
+  mov    rax, [reg_p2+8] 
+  mul    r13
+  add    r8, rax
+  mov    [rcx+32], r8      // c4 
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [reg_p2+40] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+16] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+24] 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    r11, [reg_p1+40] 
+  mov    rax, [reg_p2] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+32] 
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    r15, [reg_p1+32] 
+  mov    rax, [reg_p2+8] 
+  mul    r15
+  add    r9, rax
+  mov    [rcx+40], r9      // c5 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [reg_p2+16] 
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+40] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+32] 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+8] 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+24] 
+  mul    r13
+  add    r10, rax
+  mov    [rcx+48], r10     // c6 
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [reg_p2+40] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+16] 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+32]
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+24] 
+  mul    r15
+  add    r8, rax
+  mov    [rcx+56], r8      // c7 
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [reg_p2+24] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+32] 
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+40] 
+  mul    r13
+  add    r9, rax
+  mov    [rcx+64], r9     // c8 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [reg_p2+40]
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+
+  mov    rax, [reg_p2+32] 
+  mul    r11
+  add    r10, rax
+  mov    [rcx+72], r10     // c9 
+  adc    r8, rdx
+  adc    r9, 0
+
+  mov    rax, [reg_p2+40] 
+  mul    r11
+  add    r8, rax
+  mov    [rcx+80], r8      // c10 
+  adc    r9, rdx   
+  mov    [rcx+88], r9      // c11 
+
+  // rcx[12-23] <- AH*BH
+  mov    r11, [reg_p1+48]
+  mov    rax, [reg_p2+48] 
+  mul    r11
+  xor    r9, r9
+  mov    [rcx+96], rax       // c0
+  mov    r8, rdx
+
+  mov    rax, [reg_p2+56]
+  mul    r11
+  xor    r10, r10
+  add    r8, rax
+  adc    r9, rdx
+
+  mov    r12, [reg_p1+56] 
+  mov    rax, [reg_p2+48] 
+  mul    r12
+  add    r8, rax
+  mov    [rcx+104], r8      // c1 
+  adc    r9, rdx
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [reg_p2+64] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    r13, [reg_p2+48] 
+  mov    rax, [reg_p1+64] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+56] 
+  mul    r12
+  add    r9, rax
+  mov    [rcx+112], r9     // c2 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [reg_p2+72] 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p1+72] 
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+64] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    r14, [reg_p1+64] 
+  mov    rax, [reg_p2+56] 
+  mul    r14
+  add    r10, rax
+  mov    [rcx+120], r10    // c3 
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [reg_p2+80] 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+64] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    r15, [reg_p1+80] 
+  mov    rax, r13 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+72] 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    r13, [reg_p1+72] 
+  mov    rax, [reg_p2+56] 
+  mul    r13
+  add    r8, rax
+  mov    [rcx+128], r8     // c4 
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [reg_p2+88] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+64] 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+72] 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    r11, [reg_p1+88] 
+  mov    rax, [reg_p2+48] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+80] 
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+56] 
+  mul    r15
+  add    r9, rax
+  mov    [rcx+136], r9     // c5 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  xor    r9, r9
+  mov    rax, [reg_p2+64] 
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+88] 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+80] 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+56] 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  mov    rax, [reg_p2+72] 
+  mul    r13
+  add    r10, rax
+  mov    [rcx+144], r10    // c6 
+  adc    r8, rdx 
+  adc    r9, 0
+  
+  xor    r10, r10
+  mov    rax, [reg_p2+88] 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+64] 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+80]
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  mov    rax, [reg_p2+72] 
+  mul    r15
+  add    r8, rax
+  mov    [rcx+152], r8     // c7 
+  adc    r9, rdx 
+  adc    r10, 0
+  
+  xor    r8, r8
+  mov    rax, [reg_p2+72] 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+80] 
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+88] 
+  mul    r13
+  add    r9, rax
+  mov    [rcx+160], r9     // c8 
+  adc    r10, rdx 
+  adc    r8, 0
+  
+  mov    rax, [reg_p2+88]
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx
+
+  mov    rax, [reg_p2+80] 
+  mul    r11
+  add    r10, rax
+  mov    [rcx+168], r10     // c9 
+  adc    r8, rdx
+
+  mov    rax, [reg_p2+88] 
+  mul    r11
+  add    r8, rax
+  mov    [rcx+176], r8      // c10 
+  adc    rdx, 0   
+  mov    [rcx+184], rdx     // c11  
+      
+  // [r8-r15,rax,rdx,rdi,[rsp]] <- (AH+AL)*(BH+BL) - AL*BL 
+  mov    r8,  [rsp]
+  sub    r8,  [rcx] 
+  mov    r9,  [rsp+8]
+  sbb    r9,  [rcx+8]
+  mov    r10, [rsp+16]
+  sbb    r10, [rcx+16]
+  mov    r11, [rsp+24]
+  sbb    r11, [rcx+24] 
+  mov    r12, [rsp+32]
+  sbb    r12, [rcx+32]
+  mov    r13, [rsp+40]
+  sbb    r13, [rcx+40] 
+  mov    r14, [rsp+48]
+  sbb    r14, [rcx+48] 
+  mov    r15, [rsp+56]
+  sbb    r15, [rcx+56] 
+  mov    rax, [rsp+64]
+  sbb    rax, [rcx+64]
+  mov    rdx, [rsp+72]
+  sbb    rdx, [rcx+72] 
+  mov    rdi, [rsp+80]
+  sbb    rdi, [rcx+80] 
+  mov    rsi, [rsp+88]
+  sbb    rsi, [rcx+88] 
+  mov    [rsp], rsi
+      
+  // [r8-r15,rax,rdx,rdi,[rsp]] <- (AH+AL)*(BH+BL) - AL*BL - AH*BH
+  mov    rsi, [rcx+96]
+  sub    r8,  rsi 
+  mov    rsi, [rcx+104]
+  sbb    r9,  rsi
+  mov    rsi, [rcx+112]
+  sbb    r10, rsi
+  mov    rsi, [rcx+120]
+  sbb    r11, rsi 
+  mov    rsi, [rcx+128]
+  sbb    r12, rsi
+  mov    rsi, [rcx+136]
+  sbb    r13, rsi
+  mov    rsi, [rcx+144]
+  sbb    r14, rsi 
+  mov    rsi, [rcx+152]
+  sbb    r15, rsi 
+  mov    rsi, [rcx+160]
+  sbb    rax, rsi
+  mov    rsi, [rcx+168]
+  sbb    rdx, rsi
+  mov    rsi, [rcx+176] 
+  sbb    rdi, rsi
+  mov    rsi, [rsp] 
+  sbb    rsi, [rcx+184]
+      
+  // Final result
+  add    r8,  [rcx+48] 
+  mov    [rcx+48], r8
+  adc    r9,  [rcx+56]
+  mov    [rcx+56], r9
+  adc    r10, [rcx+64]
+  mov    [rcx+64], r10
+  adc    r11, [rcx+72]
+  mov    [rcx+72], r11
+  adc    r12, [rcx+80]
+  mov    [rcx+80], r12
+  adc    r13, [rcx+88]
+  mov    [rcx+88], r13
+  adc    r14, [rcx+96] 
+  mov    [rcx+96], r14
+  adc    r15, [rcx+104] 
+  mov    [rcx+104], r15
+  adc    rax, [rcx+112]
+  mov    [rcx+112], rax
+  adc    rdx, [rcx+120]
+  mov    [rcx+120], rdx
+  adc    rdi, [rcx+128]
+  mov    [rcx+128], rdi
+  adc    rsi, [rcx+136]
+  mov    [rcx+136], rsi  
+  mov    rax, [rcx+144]
+  adc    rax, 0
+  mov    [rcx+144], rax
+  mov    rax, [rcx+152]
+  adc    rax, 0
+  mov    [rcx+152], rax
+  mov    rax, [rcx+160]
+  adc    rax, 0
+  mov    [rcx+160], rax
+  mov    rax, [rcx+168]
+  adc    rax, 0
+  mov    [rcx+168], rax
+  mov    rax, [rcx+176]
+  adc    rax, 0
+  mov    [rcx+176], rax
+  mov    rax, [rcx+184]
+  adc    rax, 0
+  mov    [rcx+184], rax
+    
+  add    rsp, 96           // Restoring space in stack
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+  
+//***********************************************************************
+//  Montgomery reduction
+//  Based on comba method
+//  Operation: c [reg_p2] = a [reg_p1]
+//  NOTE: a=c is not allowed
+//*********************************************************************** 
+.globl oqs_sidh_cln16_rdc751_asm
+oqs_sidh_cln16_rdc751_asm:
+  push   r12
+  push   r13 
+  push   r14 
+  push   r15 
+
+  mov    r11, [reg_p1]
+  movq   rax, p751p1_5 
+  mul    r11
+  xor    r8, r8
+  add    rax, [reg_p1+40]
+  mov    [reg_p2+40], rax    // z5
+  adc    r8, rdx
+  
+  xor    r9, r9
+  movq   rax, p751p1_6 
+  mul    r11
+  xor    r10, r10
+  add    r8, rax
+  adc    r9, rdx
+
+  mov    r12, [reg_p1+8]
+  movq   rax, p751p1_5 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  add    r8, [reg_p1+48]
+  mov    [reg_p2+48], r8    // z6
+  adc    r9, 0
+  adc    r10, 0
+  
+  xor    r8, r8
+  movq   rax, p751p1_7 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  movq   rax, p751p1_6 
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    r13, [reg_p1+16]
+  movq   rax, p751p1_5 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  add    r9, [reg_p1+56]
+  mov    [reg_p2+56], r9    // z7
+  adc    r10, 0
+  adc    r8, 0
+  
+  xor    r9, r9
+  movq   rax, p751p1_8 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_7 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_6 
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    r14, [reg_p1+24]
+  movq   rax, p751p1_5 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  add    r10, [reg_p1+64]
+  mov    [reg_p2+64], r10   // z8
+  adc    r8, 0
+  adc    r9, 0
+  
+  xor    r10, r10
+  movq   rax, p751p1_9 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_8 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_7 
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_6 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    r15, [reg_p1+32]
+  movq   rax, p751p1_5 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  add    r8, [reg_p1+72]
+  mov    [reg_p2+72], r8    // z9
+  adc    r9, 0
+  adc    r10, 0
+  
+  xor    r8, r8
+  movq   rax, p751p1_10 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  movq   rax, p751p1_9 
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  movq   rax, p751p1_8 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  movq   rax, p751p1_7 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  movq   rax, p751p1_6 
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rcx, [reg_p2+40]
+  movq   rax, p751p1_5 
+  mul    rcx
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  add    r9, [reg_p1+80]
+  mov    [reg_p2+80], r9    // z10
+  adc    r10, 0
+  adc    r8, 0
+  
+  xor    r9, r9
+  movq   rax, p751p1_11 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_10 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_9 
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_8 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_7 
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_6 
+  mul    rcx
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    r11, [reg_p2+48]
+  movq   rax, p751p1_5 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  add    r10, [reg_p1+88]
+  mov    [reg_p2+88], r10    // z11
+  adc    r8, 0
+  adc    r9, 0
+  
+  xor    r10, r10
+  movq   rax, p751p1_11 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_10 
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_9 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_8 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_7 
+  mul    rcx
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_6 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    r12, [reg_p2+56]
+  movq   rax, p751p1_5 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  add    r8, [reg_p1+96]
+  mov    [reg_p2], r8        // z0
+  adc    r9, 0
+  adc    r10, 0
+  
+  xor    r8, r8
+  movq   rax, p751p1_11 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+
+  movq   rax, p751p1_10 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+
+  movq   rax, p751p1_9
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+
+  movq   rax, p751p1_8
+  mul    rcx
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+
+  movq   rax, p751p1_7
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+
+  movq   rax, p751p1_6
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    r13, [reg_p2+64]
+  movq   rax, p751p1_5
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  add    r9, [reg_p1+104]
+  mov    [reg_p2+8], r9      // z1
+  adc    r10, 0
+  adc    r8, 0
+  
+  xor    r9, r9
+  movq   rax, p751p1_11 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_10 
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_9 
+  mul    rcx
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_8 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_7 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_6 
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  mov    r14, [reg_p2+72]
+  movq   rax, p751p1_5 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  add    r10, [reg_p1+112]
+  mov    [reg_p2+16], r10    // z2
+  adc    r8, 0
+  adc    r9, 0
+  
+  xor    r10, r10
+  movq   rax, p751p1_11 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_10 
+  mul    rcx
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_9 
+  mul    r11
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_8 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_7 
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_6 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  mov    r15, [reg_p2+80]
+  movq   rax, p751p1_5 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  add    r8, [reg_p1+120]
+  mov    [reg_p2+24], r8     // z3
+  adc    r9, 0
+  adc    r10, 0
+  
+  xor    r8, r8
+  movq   rax, p751p1_11 
+  mul    rcx
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  movq   rax, p751p1_10 
+  mul    r11
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  movq   rax, p751p1_9 
+  mul    r12
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  movq   rax, p751p1_8 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  movq   rax, p751p1_7 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  movq   rax, p751p1_6 
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  mov    rcx, [reg_p2+88]
+  movq   rax, p751p1_5 
+  mul    rcx
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  add    r9, [reg_p1+128]
+  mov    [reg_p2+32], r9     // z4
+  adc    r10, 0
+  adc    r8, 0
+  
+  xor    r9, r9
+  movq   rax, p751p1_11 
+  mul    r11
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_10 
+  mul    r12
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_9 
+  mul    r13
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_8 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_7 
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_6 
+  mul    rcx
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  add    r10, [reg_p1+136]
+  mov    [reg_p2+40], r10    // z5
+  adc    r8, 0
+  adc    r9, 0
+  
+  xor    r10, r10
+  movq   rax, p751p1_11 
+  mul    r12
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_10 
+  mul    r13
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_9 
+  mul    r14
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_8 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  
+  movq   rax, p751p1_7 
+  mul    rcx
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  add    r8, [reg_p1+144]
+  mov    [reg_p2+48], r8     // z6
+  adc    r9, 0
+  adc    r10, 0
+  
+  xor    r8, r8
+  movq   rax, p751p1_11 
+  mul    r13
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  movq   rax, p751p1_10 
+  mul    r14
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  movq   rax, p751p1_9 
+  mul    r15
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  
+  movq   rax, p751p1_8 
+  mul    rcx
+  add    r9, rax
+  adc    r10, rdx
+  adc    r8, 0
+  add    r9, [reg_p1+152]
+  mov    [reg_p2+56], r9     // z7
+  adc    r10, 0
+  adc    r8, 0
+  
+  xor    r9, r9
+  movq   rax, p751p1_11 
+  mul    r14
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_10 
+  mul    r15
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  
+  movq   rax, p751p1_9 
+  mul    rcx
+  add    r10, rax
+  adc    r8, rdx
+  adc    r9, 0
+  add    r10, [reg_p1+160]
+  mov    [reg_p2+64], r10    // z8
+  adc    r8, 0
+  adc    r9, 0
+  
+  xor    r10, r10
+  movq   rax, p751p1_11 
+  mul    r15
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+
+  movq   rax, p751p1_10 
+  mul    rcx
+  add    r8, rax
+  adc    r9, rdx
+  adc    r10, 0
+  add    r8, [reg_p1+168]    // z9
+  mov    [reg_p2+72], r8     // z9
+  adc    r9, 0
+  adc    r10, 0
+  
+  movq   rax, p751p1_11 
+  mul    rcx
+  add    r9, rax
+  adc    r10, rdx
+  add    r9, [reg_p1+176]    // z10
+  mov    [reg_p2+80], r9     // z10
+  adc    r10, 0  
+  add    r10, [reg_p1+184]   // z11
+  mov    [reg_p2+88], r10    // z11
+
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  751-bit multiprecision addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.globl oqs_sidh_cln16_mp_add751_asm
+oqs_sidh_cln16_mp_add751_asm:
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  push   rbx
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56] 
+  mov    rax, [reg_p1+64]
+  mov    rbx, [reg_p1+72] 
+  mov    rcx, [reg_p1+80]  
+  mov    rdi, [reg_p1+88] 
+
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  adc    r12, [reg_p2+32] 
+  adc    r13, [reg_p2+40] 
+  adc    r14, [reg_p2+48] 
+  adc    r15, [reg_p2+56]
+  adc    rax, [reg_p2+64] 
+  adc    rbx, [reg_p2+72]
+  adc    rcx, [reg_p2+80]
+  adc    rdi, [reg_p2+88]
+
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15
+  mov    [reg_p3+64], rax
+  mov    [reg_p3+72], rbx
+  mov    [reg_p3+80], rcx
+  mov    [reg_p3+88], rdi
+  
+  pop    rbx
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
+
+
+//***********************************************************************
+//  2x751-bit multiprecision addition
+//  Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
+//*********************************************************************** 
+.globl oqs_sidh_cln16_mp_add751x2_asm
+oqs_sidh_cln16_mp_add751x2_asm:
+  push   r12
+  push   r13
+  push   r14
+  push   r15
+  push   rbx
+  
+  mov    r8, [reg_p1]
+  mov    r9, [reg_p1+8]
+  mov    r10, [reg_p1+16]
+  mov    r11, [reg_p1+24]
+  mov    r12, [reg_p1+32]
+  mov    r13, [reg_p1+40]
+  mov    r14, [reg_p1+48]
+  mov    r15, [reg_p1+56] 
+  mov    rax, [reg_p1+64]
+  mov    rbx, [reg_p1+72] 
+  mov    rcx, [reg_p1+80] 
+
+  add    r8, [reg_p2] 
+  adc    r9, [reg_p2+8] 
+  adc    r10, [reg_p2+16] 
+  adc    r11, [reg_p2+24] 
+  adc    r12, [reg_p2+32] 
+  adc    r13, [reg_p2+40] 
+  adc    r14, [reg_p2+48] 
+  adc    r15, [reg_p2+56]
+  adc    rax, [reg_p2+64] 
+  adc    rbx, [reg_p2+72]
+  adc    rcx, [reg_p2+80]
+
+  mov    [reg_p3], r8
+  mov    [reg_p3+8], r9
+  mov    [reg_p3+16], r10
+  mov    [reg_p3+24], r11
+  mov    [reg_p3+32], r12
+  mov    [reg_p3+40], r13
+  mov    [reg_p3+48], r14
+  mov    [reg_p3+56], r15
+  mov    [reg_p3+64], rax
+  mov    [reg_p3+72], rbx
+  mov    [reg_p3+80], rcx 
+  mov    rax, [reg_p1+88] 
+  adc    rax, [reg_p2+88]
+  mov    [reg_p3+88], rax
+  
+  mov    r8, [reg_p1+96]
+  mov    r9, [reg_p1+104]
+  mov    r10, [reg_p1+112]
+  mov    r11, [reg_p1+120]
+  mov    r12, [reg_p1+128]
+  mov    r13, [reg_p1+136]
+  mov    r14, [reg_p1+144]
+  mov    r15, [reg_p1+152] 
+  mov    rax, [reg_p1+160]
+  mov    rbx, [reg_p1+168] 
+  mov    rcx, [reg_p1+176]  
+  mov    rdi, [reg_p1+184] 
+
+  adc    r8, [reg_p2+96] 
+  adc    r9, [reg_p2+104] 
+  adc    r10, [reg_p2+112] 
+  adc    r11, [reg_p2+120] 
+  adc    r12, [reg_p2+128] 
+  adc    r13, [reg_p2+136] 
+  adc    r14, [reg_p2+144] 
+  adc    r15, [reg_p2+152]
+  adc    rax, [reg_p2+160] 
+  adc    rbx, [reg_p2+168]
+  adc    rcx, [reg_p2+176]
+  adc    rdi, [reg_p2+184]
+
+  mov    [reg_p3+96], r8
+  mov    [reg_p3+104], r9
+  mov    [reg_p3+112], r10
+  mov    [reg_p3+120], r11
+  mov    [reg_p3+128], r12
+  mov    [reg_p3+136], r13
+  mov    [reg_p3+144], r14
+  mov    [reg_p3+152], r15
+  mov    [reg_p3+160], rax
+  mov    [reg_p3+168], rbx
+  mov    [reg_p3+176], rcx
+  mov    [reg_p3+184], rdi
+  
+  pop    rbx
+  pop    r15
+  pop    r14
+  pop    r13
+  pop    r12
+  ret
diff --git a/crypt/liboqs/kex_sidh_cln16/ARM64/fp_arm64.c b/crypt/liboqs/kex_sidh_cln16/ARM64/fp_arm64.c
new file mode 100644
index 0000000000000000000000000000000000000000..b0df611d5f614c102a74d3ffc07263e1a50616be
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/ARM64/fp_arm64.c
@@ -0,0 +1,88 @@
+/********************************************************************************************
+ * SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral 
+ *       Diffie-Hellman key exchange.
+ *
+ * Author:   David Urbanik;  dburbani@uwaterloo.ca 
+ *
+ * Abstract: Finite field arithmetic for ARM64 using code modified from the original x86_64
+ *           and generic implementations by Microsoft.
+ *
+ *           Most of this file is just a wrapper for the asm file. The other routines are
+ *           direct copies of their counterparts on the AMD64 side.
+ *
+ *           Modified to allow inputs in [0, 2*p751-1].
+ *
+ *********************************************************************************************/
+
+#include "../SIDH_internal.h"
+
+// Global constants
+extern const uint64_t p751[NWORDS_FIELD];
+extern const uint64_t p751x2[NWORDS_FIELD];
+
+__inline void oqs_sidh_cln16_fpadd751(const digit_t *a, const digit_t *b, digit_t *c) { // Modular addition, c = a+b mod p751.
+	// Inputs: a, b in [0, 2*p751-1]
+	// Output: c in [0, 2*p751-1]
+
+	oqs_sidh_cln16_fpadd751_asm(a, b, c);
+}
+
+__inline void oqs_sidh_cln16_fpsub751(const digit_t *a, const digit_t *b, digit_t *c) { // Modular subtraction, c = a-b mod p751.
+	// Inputs: a, b in [0, 2*p751-1]
+	// Output: c in [0, 2*p751-1]
+
+	oqs_sidh_cln16_fpsub751_asm(a, b, c);
+}
+
+__inline void oqs_sidh_cln16_fpneg751(digit_t *a) { // Modular negation, a = -a mod p751.
+	// Input/output: a in [0, 2*p751-1]
+	unsigned int i, borrow = 0;
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		SUBC(borrow, ((digit_t *) p751x2)[i], a[i], borrow, a[i]);
+	}
+}
+
+void oqs_sidh_cln16_fpdiv2_751(const digit_t *a, digit_t *c) { // Modular division by two, c = a/2 mod p751.
+	// Input : a in [0, 2*p751-1]
+	// Output: c in [0, 2*p751-1]
+	unsigned int i, carry = 0;
+	digit_t mask;
+
+	mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p521
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		ADDC(carry, a[i], ((digit_t *) p751)[i] & mask, carry, c[i]);
+	}
+
+	oqs_sidh_cln16_mp_shiftr1(c, NWORDS_FIELD);
+}
+
+void oqs_sidh_cln16_fpcorrection751(digit_t *a) { // Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1].
+	unsigned int i, borrow = 0;
+	digit_t mask;
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		SUBC(borrow, a[i], ((digit_t *) p751)[i], borrow, a[i]);
+	}
+	mask = 0 - (digit_t) borrow;
+
+	borrow = 0;
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		ADDC(borrow, a[i], ((digit_t *) p751)[i] & mask, borrow, a[i]);
+	}
+}
+
+void oqs_sidh_cln16_mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords.
+
+	UNREFERENCED_PARAMETER(nwords);
+
+	oqs_sidh_cln16_mul751_asm(a, b, c);
+}
+
+void oqs_sidh_cln16_rdc_mont(const digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p751.
+	// mc = ma*R^-1 mod p751x2, where R = 2^768.
+	// If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1].
+	// ma is assumed to be in Montgomery representation.
+
+	oqs_sidh_cln16_rdc751_asm(ma, mc);
+}
diff --git a/crypt/liboqs/kex_sidh_cln16/ARM64/fp_arm64_asm.S b/crypt/liboqs/kex_sidh_cln16/ARM64/fp_arm64_asm.S
new file mode 100644
index 0000000000000000000000000000000000000000..b643d5e8f7a459acbe5cfa0f025ca4896bec4f9a
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/ARM64/fp_arm64_asm.S
@@ -0,0 +1,2315 @@
+//*******************************************************************************************
+// SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral 
+//       Diffie-Hellman key exchange.
+//
+// Author:   David Urbanik;  dburbani@uwaterloo.ca 
+//
+// Abstract: Assembly optimizations for finite field arithmetic on 64-bit ARM. 
+//
+//           Modified to allow inputs in [0, 2*p751-1].
+// 
+//*******************************************************************************************
+
+.data
+
+// p751 + 1
+p751p1:
+.quad  0xEEB0000000000000
+.quad  0xE3EC968549F878A8
+.quad  0xDA959B1A13F7CC76
+.quad  0x084E9867D6EBE876
+.quad  0x8562B5045CB25748
+.quad  0x0E12909F97BADC66
+.quad  0x00006FE5D541F71C
+
+// p751
+p751:
+.quad  0xFFFFFFFFFFFFFFFF
+.quad  0xEEAFFFFFFFFFFFFF
+.quad  0xE3EC968549F878A8
+.quad  0xDA959B1A13F7CC76
+.quad  0x084E9867D6EBE876
+.quad  0x8562B5045CB25748
+.quad  0x0E12909F97BADC66
+.quad  0x00006FE5D541F71C
+
+// 2 * p751
+p751x2:
+.quad  0xFFFFFFFFFFFFFFFE
+.quad  0xFFFFFFFFFFFFFFFF
+.quad  0xDD5FFFFFFFFFFFFF
+.quad  0xC7D92D0A93F0F151
+.quad  0xB52B363427EF98ED
+.quad  0x109D30CFADD7D0ED
+.quad  0x0AC56A08B964AE90
+.quad  0x1C25213F2F75B8CD
+.quad  0x0000DFCBAA83EE38
+
+
+.text
+//***********************************************************************
+//  Field addition
+//  Operation: c [x2] = a [x0] + b [x1]
+//*********************************************************************** 
+.global oqs_sidh_cln16_fpadd751_asm
+oqs_sidh_cln16_fpadd751_asm:
+    // Arguments are 3 pointers of type digit_t*, where the first two arguments are summands and the third is the result register. 
+	// These arguments are stored in x0, x1, and x2 respectively.
+
+    //  load first summand into x3 - x14
+    ldp x3, x4,   [x0,#0]
+    ldp x5, x6,   [x0,#16]
+    ldp x7, x8,   [x0,#32]
+    ldp x9, x10,  [x0,#48]
+    ldp x11, x12, [x0,#64]
+    ldp x13, x14, [x0,#80]
+
+    //  add first summand and second summand and store result in x3 - x14
+    ldp x15, x16,   [x1,#0]
+    ldp x17, x18,   [x1,#16]
+    adds x3, x3, x15
+    adcs x4, x4, x16
+    adcs x5, x5, x17
+    adcs x6, x6, x18
+    ldp x15, x16,   [x1,#32]
+    ldp x17, x18,   [x1,#48]
+    adcs x7, x7, x15
+    adcs x8, x8, x16
+    adcs x9, x9, x17
+    adcs x10, x10, x18
+    ldp x15, x16,   [x1,#64]
+    ldp x17, x18,   [x1,#80]
+    adcs x11, x11, x15
+    adcs x12, x12, x16
+    adcs x13, x13, x17
+    adcs x14, x14, x18
+    
+    //  subtract 2xp751 to the resut in x3 - x14
+    ldr x16, p751x2
+    subs x3, x3, x16
+    ldr x15, p751x2 + 8
+    sbcs x4, x4, x15
+    sbcs x5, x5, x15
+    sbcs x6, x6, x15
+    sbcs x7, x7, x15
+    ldr x16, p751x2 + 16
+    ldr x17, p751x2 + 24
+    sbcs x8, x8, x16
+    ldr x18, p751x2 + 32
+    sbcs x9, x9, x17
+    ldr x16, p751x2 + 40
+    sbcs x10, x10, x18
+    ldr x17, p751x2 + 48
+    sbcs x11, x11, x16
+    ldr x18, p751x2 + 56
+    sbcs x12, x12, x17
+    ldr x15, p751x2 + 64
+    sbcs x13, x13, x18
+    sbcs x14, x14, x15
+    sbc x15, xzr, xzr
+
+    //  add 2xp751 back but anded with the mask in x15
+    ldr x16, p751x2
+    and x16, x16, x15
+    ldr x17, p751x2 + 8
+    and x17, x17, x15
+    ldr x18, p751x2 + 16
+    and x18, x18, x15    
+
+    adds x3, x3, x16
+    adcs x4, x4, x17
+    adcs x5, x5, x17
+    adcs x6, x6, x17
+    adcs x7, x7, x17
+    adcs x8, x8, x18
+
+    ldr x16, p751x2 + 24
+    and x16, x16, x15  
+    adcs x9, x9, x16
+
+    ldr x16, p751x2 + 32
+    and x16, x16, x15
+    ldr x17, p751x2 + 40
+    and x17, x17, x15
+    ldr x18, p751x2 + 48
+    and x18, x18, x15 
+
+    adcs x10, x10, x16   
+    adcs x11, x11, x17   
+    adcs x12, x12, x18   
+
+    ldr x16, p751x2 + 56
+    and x16, x16, x15
+    ldr x17, p751x2 + 64
+    and x17, x17, x15
+
+    adcs x13, x13, x16
+    adcs x14, x14, x17
+
+    stp x3, x4,   [x2,#0]
+    stp x5, x6,   [x2,#16]
+    stp x7, x8,   [x2,#32]
+    stp x9, x10,  [x2,#48]
+    stp x11, x12, [x2,#64]
+    stp x13, x14, [x2,#80]
+    ret
+
+
+//***********************************************************************
+//  Field subtraction
+//  Operation: c [x2] = a [x0] - b [x1]
+//*********************************************************************** 
+.global oqs_sidh_cln16_fpsub751_asm
+oqs_sidh_cln16_fpsub751_asm:
+    ldp x3, x4,   [x0,#0]
+    ldp x5, x6,   [x0,#16]
+    ldp x7, x8,   [x0,#32]
+    ldp x9, x10,  [x0,#48]
+    ldp x11, x12, [x0,#64]
+    ldp x13, x14, [x0,#80]
+
+    ldp x15, x16, [x1, #0]
+    subs x3, x3, x15
+    sbcs x4, x4, x16
+    ldp x15, x16, [x1, #16]
+    sbcs x5, x5, x15
+    sbcs x6, x6, x16
+    ldp x15, x16, [x1, #32]
+    sbcs x7, x7, x15
+    sbcs x8, x8, x16
+    ldp x15, x16, [x1, #48]
+    sbcs x9, x9, x15
+    sbcs x10, x10, x16
+    ldp x15, x16, [x1, #64]
+    sbcs x11, x11, x15
+    sbcs x12, x12, x16
+    ldp x15, x16, [x1, #80]
+    sbcs x13, x13, x15
+    sbcs x14, x14, x16
+    sbc x17, xzr, xzr
+    
+    ldr x15, p751x2
+    and x15, x15, x17
+    ldr x16, p751x2 + 8
+    and x16, x16, x17
+    ldr x18, p751x2 + 16
+    and x18, x18, x17
+
+    adds x3, x3, x15
+    adcs x4, x4, x16
+    adcs x5, x5, x16
+    adcs x6, x6, x16
+    adcs x7, x7, x16
+    adcs x8, x8, x18
+
+    ldr x15, p751x2 + 24
+    and x15, x15, x17    
+    ldr x16, p751x2 + 32
+    and x16, x16, x17
+
+    adcs x9, x9, x15
+    adcs x10, x10, x16   
+
+    ldr x15, p751x2 + 40
+    and x15, x15, x17
+    ldr x16, p751x2 + 48
+    and x16, x16, x17 
+
+    adcs x11, x11, x15   
+    adcs x12, x12, x16   
+
+    ldr x15, p751x2 + 56
+    and x15, x15, x17
+    ldr x16, p751x2 + 64
+    and x16, x16, x17
+
+    adcs x13, x13, x15
+    adcs x14, x14, x16
+
+    stp x3, x4,   [x2,#0]
+    stp x5, x6,   [x2,#16]
+    stp x7, x8,   [x2,#32]
+    stp x9, x10,  [x2,#48]
+    stp x11, x12, [x2,#64]
+    stp x13, x14, [x2,#80]
+    ret
+
+
+//***********************************************************************
+//  Integer multiplication using Comba method
+//  Operation: c [x2] = a [x0] * b [x1]
+//***********************************************************************
+.global oqs_sidh_cln16_mul751_asm
+oqs_sidh_cln16_mul751_asm:
+    sub sp, sp, #80
+    stp x19, x20, [sp]
+    stp x21, x22, [sp, #16]
+    stp x23, x24, [sp, #32]
+    stp x25, x26, [sp, #48]
+    stp x27, x28, [sp, #64]
+
+    ldp x3, x4, [x0, #0]
+    ldp x5, x6, [x1, #0]
+    mul x18, x3, x5
+    umulh x17, x3, x5
+    //  c0 is now in x18
+
+    //  a0 * b1
+    mul x13, x3, x6
+    umulh x14, x3, x6
+
+    adds x17, x17, x13
+    adcs x16, x14, xzr
+    adcs x15, xzr, xzr
+
+    //  b0 * a1
+    mul x13, x4, x5
+    umulh x14, x4, x5
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  store c0 and c1
+    stp x18, x17, [x2, #0]
+
+    //  load a2, a3, b2, b3
+    ldp x7, x8, [x0, #16]
+    ldp x9, x10, [x1, #16]
+
+    //  a0 * b2
+    mul x13, x3, x9
+    umulh x14, x3, x9
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, xzr, xzr
+
+    //  a1 * b1
+    mul x13, x4, x6
+    umulh x14, x4, x6
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a2 * b0
+    mul x13, x7, x5
+    umulh x14, x7, x5
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  c2 is now in x16
+
+    //  a0 * b3
+    mul x13, x3, x10
+    umulh x14, x3, x10
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, xzr, xzr
+
+    //  a1 * b2
+    mul x13, x4, x9
+    umulh x14, x4, x9
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a2 * b1
+    mul x13, x7, x6
+    umulh x14, x7, x6
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a3 * b0
+    mul x13, x8, x5
+    umulh x14, x8, x5
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  store c2 and c3
+    stp x16, x15, [x2, #16]
+
+    //  a1 * b3
+    mul x13, x4, x10
+    umulh x14, x4, x10
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, xzr, xzr
+
+    //  a2 * b2
+    mul x13, x7, x9
+    umulh x14, x7, x9
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a3 * b1
+    mul x13, x8, x6
+    umulh x14, x8, x6
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  load a4, a5
+    ldp x11, x12, [x0, #32]
+    
+    //  a4 * b0
+    mul x13, x11, x5
+    umulh x14, x11, x5
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  load b4, b5
+    ldp x19, x20, [x1, #32]
+
+    //  a0 * b4
+    mul x13, x3, x19
+    umulh x14, x3, x19
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  c4 is now in x18
+
+    //  a0 * b5
+    mul x13, x3, x20
+    umulh x14, x3, x20
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, xzr, xzr
+
+    //  a1 * b4
+    mul x13, x4, x19
+    umulh x14, x4, x19
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a2 * b3
+    mul x13, x7, x10
+    umulh x14, x7, x10
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a3 * b2
+    mul x13, x8, x9
+    umulh x14, x8, x9
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a4 * b1
+    mul x13, x11, x6
+    umulh x14, x11, x6
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a5 * b0
+    mul x13, x12, x5
+    umulh x14, x12, x5
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  store c4 and c5
+    stp x18, x17, [x2, #32]
+
+    //  load a6, a7
+    ldp x21, x22, [x0, #48]
+
+    //  a6 * b0
+    mul x13, x21, x5
+    umulh x14, x21, x5
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, xzr, xzr
+
+    //  a5 * b1
+    mul x13, x12, x6
+    umulh x14, x12, x6
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+    
+    //  a4 * b2
+    mul x13, x11, x9
+    umulh x14, x11, x9
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a3 * b3
+    mul x13, x8, x10
+    umulh x14, x8, x10
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a2 * b4
+    mul x13, x7, x19
+    umulh x14, x7, x19
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a1 * b5
+    mul x13, x4, x20
+    umulh x14, x4, x20
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  load b6, b7
+    ldp x23, x24, [x1, #48]
+
+    //  a0 * b6
+    mul x13, x3, x23
+    umulh x14, x3, x23
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  c6 is now in x16
+
+    //  a0 * b7
+    mul x13, x3, x24
+    umulh x14, x3, x24
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, xzr, xzr
+
+    //  a1 * b6
+    mul x13, x4, x23
+    umulh x14, x4, x23
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a2 * b5
+    mul x13, x7, x20
+    umulh x14, x7, x20
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a3 * b4
+    mul x13, x8, x19
+    umulh x14, x8, x19
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a4 * b3
+    mul x13, x11, x10
+    umulh x14, x11, x10
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a5 * b2
+    mul x13, x12, x9
+    umulh x14, x12, x9
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a6 * b1
+    mul x13, x21, x6
+    umulh x14, x21, x6
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a7 * b0
+    mul x13, x22, x5
+    umulh x14, x22, x5
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  store c6 and c7
+    stp x16, x15, [x2, #48]
+
+    //  load a8, a9
+    ldp x25, x26, [x0, #64]
+
+    //  a8 * b0
+    mul x13, x25, x5
+    umulh x14, x25, x5
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, xzr, xzr
+
+    //  a7 * b1
+    mul x13, x22, x6
+    umulh x14, x22, x6
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a6 * b2
+    mul x13, x21, x9
+    umulh x14, x21, x9
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a5 * b3
+    mul x13, x12, x10
+    umulh x14, x12, x10
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a4 * b4
+    mul x13, x11, x19
+    umulh x14, x11, x19
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a3 * b5
+    mul x13, x8, x20
+    umulh x14, x8, x20
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a2 * b6
+    mul x13, x7, x23
+    umulh x14, x7, x23
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a1 * b7
+    mul x13, x4, x24
+    umulh x14, x4, x24
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  load b8, b9
+    ldp x27, x28, [x1, #64]
+
+    //  a0 * b8
+    mul x13, x3, x27
+    umulh x14, x3, x27
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  c8 is now in x18
+
+    //  a0 * b9
+    mul x13, x3, x28
+    umulh x14, x3, x28
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, xzr, xzr
+
+    //  a1 * b8
+    mul x13, x4, x27
+    umulh x14, x4, x27
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a2 * b7
+    mul x13, x7, x24
+    umulh x14, x7, x24
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a3 * b6
+    mul x13, x8, x23
+    umulh x14, x8, x23
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a4 * b5
+    mul x13, x11, x20
+    umulh x14, x11, x20
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a5 * b4
+    mul x13, x12, x19
+    umulh x14, x12, x19
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a6 * b3
+    mul x13, x21, x10
+    umulh x14, x21, x10
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a7 * b2
+    mul x13, x22, x9
+    umulh x14, x22, x9
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a8 * b1
+    mul x13, x25, x6
+    umulh x14, x25, x6
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a9 * b0
+    mul x13, x26, x5
+    umulh x14, x26, x5
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  store c8 and c9
+    stp x18, x17, [x2, #64]
+
+    //  load a10, a11; a0 and a1 unloaded
+    ldp x3, x4, [x0, #80]
+
+    //  a10 * b0
+    mul x13, x3, x5
+    umulh x14, x3, x5
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, xzr, xzr
+
+    //  a9 * b1
+    mul x13, x26, x6
+    umulh x14, x26, x6
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a8 * b2
+    mul x13, x25, x9
+    umulh x14, x25, x9
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a7 * b3
+    mul x13, x22, x10
+    umulh x14, x22, x10
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a6 * b4
+    mul x13, x21, x19
+    umulh x14, x21, x19
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a5 * b5
+    mul x13, x12, x20
+    umulh x14, x12, x20
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a4 * b6
+    mul x13, x11, x23
+    umulh x14, x11, x23
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a3 * b7
+    mul x13, x8, x24
+    umulh x14, x8, x24
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a2 * b8
+    mul x13, x7, x27
+    umulh x14, x7, x27
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  load a0, a1; b0 and b1 unloaded
+    ldp x5, x6, [x0, #0]
+
+    //  a1 * b9
+    mul x13, x6, x28
+    umulh x14, x6, x28
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  load b10, b11; a10 and a11 unloaded
+    ldp x3, x4, [x1, #80]
+
+    //  a0 * b10
+    mul x13, x3, x5
+    umulh x14, x3, x5
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  c10 now in x16
+
+    //  a0 * b11
+    mul x13, x4, x5
+    umulh x14, x4, x5
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, xzr, xzr
+
+    //  a1 * b10
+    mul x13, x3, x6
+    umulh x14, x3, x6
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a2 * b9
+    mul x13, x7, x28
+    umulh x14, x7, x28
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a3 * b8
+    mul x13, x8, x27
+    umulh x14, x8, x27
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a4 * b7
+    mul x13, x11, x24
+    umulh x14, x11, x24
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a5 * b6
+    mul x13, x12, x23
+    umulh x14, x12, x23
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a6 * b5
+    mul x13, x21, x20
+    umulh x14, x21, x20
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a7 * b4
+    mul x13, x22, x19
+    umulh x14, x22, x19
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a8 * b3
+    mul x13, x25, x10
+    umulh x14, x25, x10
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a9 * b2
+    mul x13, x26, x9
+    umulh x14, x26, x9
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  load a10, a11; b10 and b11 unloaded
+    ldp x3, x4, [x0, #80]
+    //  load b0, b1; a0 and a1 unloaded
+    ldp x5, x6, [x1, #0]
+
+    //  a10 * b1
+    mul x13, x3, x6
+    umulh x14, x3, x6
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a11 * b0
+    mul x13, x4, x5
+    umulh x14, x4, x5
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  store c10 and c11
+    stp x16, x15, [x2, #80]
+
+    //  a11 * b1
+    mul x13, x4, x6
+    umulh x14, x4, x6
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, xzr, xzr
+    
+    //  a10 * b2
+    mul x13, x9, x3
+    umulh x14, x9, x3
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a9 * b3
+    mul x13, x26, x10
+    umulh x14, x26, x10
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a8 * b4
+    mul x13, x25, x19
+    umulh x14, x25, x19
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a7 * b5
+    mul x13, x22, x20
+    umulh x14, x22, x20
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a6 * b6
+    mul x13, x21, x23
+    umulh x14, x21, x23
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a5 * b7
+    mul x13, x12, x24
+    umulh x14, x12, x24
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a4 * b8
+    mul x13, x11, x27
+    umulh x14, x11, x27
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a3 * b9
+    mul x13, x8, x28
+    umulh x14, x8, x28
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  load b10, b11; a10 and a11 unloaded
+    ldp x3, x4, [x1, #80]
+    //  load a0, a1; b0 and b1 unloaded
+    ldp x5, x6, [x0, #0]
+
+    //  a2 * b10
+    mul x13, x7, x3
+    umulh x14, x7, x3
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a1 * b11
+    mul x13, x6, x4
+    umulh x14, x6, x4
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  c12 now in x18
+
+    //  a2 * b11
+    mul x13, x7, x4
+    umulh x14, x7, x4
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, xzr, xzr
+
+    //  a3 * b10
+    mul x13, x8, x3
+    umulh x14, x8, x3
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a4 * b9
+    mul x13, x11, x28
+    umulh x14, x11, x28
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a5 * b8
+    mul x13, x12, x27
+    umulh x14, x12, x27
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a6 * b7
+    mul x13, x21, x24
+    umulh x14, x21, x24
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a7 * b6
+    mul x13, x22, x23
+    umulh x14, x22, x23
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a8 * b5
+    mul x13, x25, x20
+    umulh x14, x25, x20
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a9 * b4
+    mul x13, x26, x19
+    umulh x14, x26, x19
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  load a10, a11; a0 and a1 unloaded
+    ldp x5, x6, [x0, #80]
+
+    //  a10 * b3
+    mul x13, x5, x10
+    umulh x14, x5, x10
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a11 * b2
+    mul x13, x6, x9
+    umulh x14, x6, x9
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  store c12 and c13
+    stp x18, x17, [x2, #96]
+
+    //  a11 * b3
+    mul x13, x6, x10
+    umulh x14, x6, x10
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, xzr, xzr
+
+    //  a10 * b4
+    mul x13, x5, x19
+    umulh x14, x5, x19
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a9 * b5
+    mul x13, x26, x20
+    umulh x14, x26, x20
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a8 * b6
+    mul x13, x25, x23
+    umulh x14, x25, x23
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a7 * b7
+    mul x13, x22, x24
+    umulh x14, x22, x24
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a6 * b8
+    mul x13, x21, x27
+    umulh x14, x21, x27
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a5 * b9
+    mul x13, x12, x28
+    umulh x14, x12, x28
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a4 * b10
+    mul x13, x11, x3
+    umulh x14, x11, x3
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a3 * b11
+    mul x13, x8, x4
+    umulh x14, x8, x4
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  c14 is now in x16
+
+    //  a4 * b11
+    mul x13, x11, x4
+    umulh x14, x11, x4
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, xzr, xzr
+
+    //  a5 * b10
+    mul x13, x12, x3
+    umulh x14, x12, x3
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a6 * b9
+    mul x13, x21, x28
+    umulh x14, x21, x28
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a7 * b8
+    mul x13, x22, x27
+    umulh x14, x22, x27
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a8 * b7
+    mul x13, x25, x24
+    umulh x14, x25, x24
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a9 * b6
+    mul x13, x26, x23
+    umulh x14, x26, x23
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a10 * b5
+    mul x13, x5, x20
+    umulh x14, x5, x20
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a11 * b4
+    mul x13, x6, x19
+    umulh x14, x6, x19
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  c15 is now in x15
+
+    //  store c14 and c15
+    stp x16, x15, [x2, #112]
+
+    //  a11 * b5
+    mul x13, x6, x20
+    umulh x14, x6, x20
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, xzr, xzr
+
+    //  a10 * b6
+    mul x13, x5, x23
+    umulh x14, x5, x23
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a9 * b7
+    mul x13, x26, x24
+    umulh x14, x26, x24
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a8 * b8
+    mul x13, x25, x27
+    umulh x14, x25, x27
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a7 * b9
+    mul x13, x22, x28
+    umulh x14, x22, x28
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a6 * b10
+    mul x13, x21, x3
+    umulh x14, x21, x3
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a5 * b11
+    mul x13, x12, x4
+    umulh x14, x12, x4
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  c16 is now in x18
+
+    //  a6 * b11
+    mul x13, x21, x4
+    umulh x14, x21, x4
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, xzr, xzr
+
+    //  a7 * b10
+    mul x13, x22, x3
+    umulh x14, x22, x3
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a8 * b9
+    mul x13, x25, x28
+    umulh x14, x25, x28
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a9 * b8
+    mul x13, x26, x27
+    umulh x14, x26, x27
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a10 * b7
+    mul x13, x5, x24
+    umulh x14, x5, x24
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  a11 * b6
+    mul x13, x6, x23
+    umulh x14, x6, x23
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  store c16 and c17
+    stp x18, x17, [x2, #128]
+
+    //  a11 * b7
+    mul x13, x6, x24
+    umulh x14, x6, x24
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, xzr, xzr
+
+    //  a10 * b8
+    mul x13, x5, x27
+    umulh x14, x5, x27
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a9 * b9
+    mul x13, x26, x28
+    umulh x14, x26, x28
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a8 * b10
+    mul x13, x25, x3
+    umulh x14, x25, x3
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  a7 * b11
+    mul x13, x22, x4
+    umulh x14, x22, x4
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+    adcs x18, x18, xzr
+
+    //  c18 is now in x16
+
+    //  a8 * b11
+    mul x13, x25, x4
+    umulh x14, x25, x4
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, xzr, xzr
+
+    //  a9 * b10
+    mul x13, x26, x3
+    umulh x14, x26, x3
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a10 * b9
+    mul x13, x5, x28
+    umulh x14, x5, x28
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  a11 * b8
+    mul x13, x6, x27
+    umulh x14, x6, x27
+
+    adds x15, x15, x13
+    adcs x18, x18, x14
+    adcs x17, x17, xzr
+
+    //  store c18 and c19
+    stp x16, x15, [x2, #144]
+
+    //  a11 * b9
+    mul x13, x6, x28
+    umulh x14, x6, x28
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, xzr, xzr
+
+    //  a10 * b10
+    mul x13, x5, x3
+    umulh x14, x5, x3
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  a9 * b11
+    mul x13, x26, x4
+    umulh x14, x26, x4
+
+    adds x18, x18, x13
+    adcs x17, x17, x14
+    adcs x16, x16, xzr
+
+    //  c20 is now in x18
+
+    //  a10 * b11
+    mul x13, x5, x4
+    umulh x14, x5, x4
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, xzr, xzr
+
+    //  a11 * b10
+    mul x13, x6, x3
+    umulh x14, x6, x3
+
+    adds x17, x17, x13
+    adcs x16, x16, x14
+    adcs x15, x15, xzr
+
+    //  store c20 and c21
+    stp x18, x17, [x2, #160]
+
+    //  a11 * b11
+    mul x13, x4, x6
+    umulh x14, x4, x6
+
+    adds x16, x16, x13
+    adcs x15, x15, x14
+
+    //  store c22 and c23
+    stp x16, x15, [x2, #176]
+
+    ldp x19, x20, [sp]
+    ldp x21, x22, [sp, #16]
+    ldp x23, x24, [sp, #32]
+    ldp x25, x26, [sp, #48]
+    ldp x27, x28, [sp, #64]
+    add sp, sp, #80
+    ret
+
+  
+//***********************************************************************
+//  Montgomery reduction
+//  Based on comba method
+//  Operation: mc [x1] = ma [x0]
+//  NOTE: ma=mc is not allowed
+//*********************************************************************** 
+.global oqs_sidh_cln16_rdc751_asm
+oqs_sidh_cln16_rdc751_asm:
+    //  ma is in x0
+    //  mc is in x1
+
+    sub sp, sp, #80
+    stp x19, x20, [sp]
+    stp x21, x22, [sp, #16]
+    stp x23, x24, [sp, #32]
+    stp x25, x26, [sp, #48]
+    stp x27, x28, [sp, #64]
+
+    //  load the prime values into x14 through x20
+    ldr x14, p751p1 + 0
+    ldr x15, p751p1 + 8
+    ldr x16, p751p1 + 16
+    ldr x17, p751p1 + 24
+    ldr x18, p751p1 + 32
+    ldr x19, p751p1 + 40
+    ldr x20, p751p1 + 48
+
+    //  the values mc[0] through mc[11] will be held in x2 through x13
+    //  until the very end when they will be stored
+
+    //  load mc[0] through mc[4] and ma[5]
+    ldp x2, x3, [x0, #0]
+    ldp x4, x5, [x0, #16]
+    ldp x6, x21, [x0, #32]
+
+    //  ma[5] iteration
+    mul x22, x2, x14   
+    umulh x23, x2, x14 
+    adds x24, x22, x21
+    adcs x25, x23, xzr
+    add x7, x24, xzr    //  set mc[5]
+
+    //  ma[6] iteration
+
+    ldr x21, [x0, #48]
+
+    mul x22, x2, x15
+    umulh x23, x2, x15
+    adds x25, x25, x22
+    adcs x26, x23, xzr
+
+    mul x22, x3, x14
+    umulh x23, x3, x14
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, xzr, xzr
+
+    adds x25, x25, x21
+    adcs x26, x26, xzr
+    adcs x24, x24, xzr
+    add x8, x25, xzr    //  set mc[6]
+
+    //  ma[7] iteration
+
+    ldr x21, [x0, #56]
+    mul x22, x2, x16
+    umulh x23, x2, x16
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, xzr, xzr
+
+    mul x22, x3, x15
+    umulh x23, x3, x15
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x4, x14
+    umulh x23, x4, x14
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    adds x26, x26, x21
+    adcs x24, x24, xzr
+    adcs x25, x25, xzr
+    add x9, x26, xzr    //  set mc[7] 
+
+    //  ma[8] iteration
+
+    ldr x21, [x0, #64]
+    mul x22, x2, x17
+    umulh x23, x2, x17
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, xzr, xzr
+
+    mul x22, x3, x16
+    umulh x23, x3, x16
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x4, x15
+    umulh x23, x4, x15
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x5, x14
+    umulh x23, x5, x14
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    adds x24, x24, x21
+    adcs x25, x25, xzr
+    adcs x26, x26, xzr
+    add x10, x24, xzr   //  set mc[8]
+
+    //  ma[9] iteration
+
+    ldr x21, [x0, #72]
+    mul x22, x2, x18
+    umulh x23, x2, x18
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, xzr, xzr
+
+    mul x22, x3, x17
+    umulh x23, x3, x17
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    mul x22, x4, x16
+    umulh x23, x4, x16
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    mul x22, x5, x15
+    umulh x23, x5, x15
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    mul x22, x6, x14
+    umulh x23, x6, x14
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    adds x25, x25, x21
+    adcs x26, x26, xzr
+    adcs x24, x24, xzr
+    add x11, x25, xzr   //  set mc[9]
+
+    //  ma[10] iteration
+
+    ldr x21, [x0, #80]
+    mul x22, x2, x19
+    umulh x23, x2, x19
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, xzr, xzr
+
+    mul x22, x3, x18
+    umulh x23, x3, x18
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x4, x17
+    umulh x23, x4, x17
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x5, x16
+    umulh x23, x5, x16
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x6, x15
+    umulh x23, x6, x15
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x7, x14
+    umulh x23, x7, x14
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    adds x26, x26, x21
+    adcs x24, x24, xzr
+    adcs x25, x25, xzr
+    add x12, x26, xzr   //  set mc[10]
+
+    //  ma[11] iteration
+    ldr x21, [x0, #88]
+
+    mul x22, x2, x20
+    umulh x23, x2, x20
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, xzr, xzr
+
+    mul x22, x3, x19
+    umulh x23, x3, x19
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x4, x18
+    umulh x23, x4, x18
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x5, x17
+    umulh x23, x5, x17
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x6, x16
+    umulh x23, x6, x16
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x7, x15
+    umulh x23, x7, x15
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x8, x14
+    umulh x23, x8, x14
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    adds x24, x24, x21
+    adcs x25, x25, xzr
+    adcs x26, x26, xzr
+    add x13, x24, xzr   //  set mc[11]
+
+    //  ma[12] iteration
+
+    ldr x21, [x0, #96]
+    mul x22, x3, x20
+    umulh x23, x3, x20
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, xzr, xzr
+
+    mul x22, x4, x19
+    umulh x23, x4, x19
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    mul x22, x5, x18
+    umulh x23, x5, x18
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    mul x22, x6, x17
+    umulh x23, x6, x17
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    mul x22, x7, x16
+    umulh x23, x7, x16
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    mul x22, x8, x15
+    umulh x23, x8, x15
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    mul x22, x9, x14
+    umulh x23, x9, x14
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    adds x25, x25, x21
+    adcs x26, x26, xzr
+    adcs x24, x24, xzr
+    add x2, x25, xzr   //  set mc[0]
+
+    //  ma[13] iteration
+
+    ldr x21, [x0, #104]
+    mul x22, x4, x20
+    umulh x23, x4, x20
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, xzr, xzr
+
+    mul x22, x5, x19
+    umulh x23, x5, x19
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x6, x18
+    umulh x23, x6, x18
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x7, x17
+    umulh x23, x7, x17
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x8, x16
+    umulh x23, x8, x16
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x9, x15
+    umulh x23, x9, x15
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x10, x14
+    umulh x23, x10, x14
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    adds x26, x26, x21
+    adcs x24, x24, xzr
+    adcs x25, x25, xzr
+    add x3, x26, xzr   //  set mc[1]
+
+    //  ma[14] iteration
+
+    ldr x21, [x0, #112]
+    mul x22, x5, x20
+    umulh x23, x5, x20
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, xzr, xzr
+
+    mul x22, x6, x19
+    umulh x23, x6, x19
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x7, x18
+    umulh x23, x7, x18
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x8, x17
+    umulh x23, x8, x17
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x9, x16
+    umulh x23, x9, x16
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x10, x15
+    umulh x23, x10, x15
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x11, x14
+    umulh x23, x11, x14
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    adds x24, x24, x21
+    adcs x25, x25, xzr
+    adcs x26, x26, xzr
+    add x4, x24, xzr   //  set mc[2]
+
+    //  ma[15] iteration
+
+    ldr x21, [x0, #120]
+    mul x22, x6, x20
+    umulh x23, x6, x20
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, xzr, xzr
+
+    mul x22, x7, x19
+    umulh x23, x7, x19
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    mul x22, x8, x18
+    umulh x23, x8, x18
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    mul x22, x9, x17
+    umulh x23, x9, x17
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    mul x22, x10, x16
+    umulh x23, x10, x16
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    mul x22, x11, x15
+    umulh x23, x11, x15
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    mul x22, x12, x14
+    umulh x23, x12, x14
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    adds x25, x25, x21
+    adcs x26, x26, xzr
+    adcs x24, x24, xzr
+    add x5, x25, xzr   //  set mc[3]
+
+    //  ma[16] iteration
+
+    ldr x21, [x0, #128]
+    mul x22, x7, x20
+    umulh x23, x7, x20
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, xzr, xzr
+
+    mul x22, x8, x19
+    umulh x23, x8, x19
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x9, x18
+    umulh x23, x9, x18
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x10, x17
+    umulh x23, x10, x17
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x11, x16
+    umulh x23, x11, x16
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x12, x15
+    umulh x23, x12, x15
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x13, x14
+    umulh x23, x13, x14
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    adds x26, x26, x21
+    adcs x24, x24, xzr
+    adcs x25, x25, xzr
+    add x6, x26, xzr   //  set mc[4]
+
+    //  ma[17] iteration
+
+    ldr x21, [x0, #136]
+    mul x22, x8, x20
+    umulh x23, x8, x20
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, xzr, xzr
+
+    mul x22, x9, x19
+    umulh x23, x9, x19
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x10, x18
+    umulh x23, x10, x18
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x11, x17
+    umulh x23, x11, x17
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x12, x16
+    umulh x23, x12, x16
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x13, x15
+    umulh x23, x13, x15
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    adds x24, x24, x21
+    adcs x25, x25, xzr
+    adcs x26, x26, xzr
+    add x7, x24, xzr   //  set mc[5]
+
+    //  ma[18] iteration
+
+    ldr x21, [x0, #144]
+    mul x22, x9, x20
+    umulh x23, x9, x20
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, xzr, xzr
+
+    mul x22, x10, x19
+    umulh x23, x10, x19
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    mul x22, x11, x18
+    umulh x23, x11, x18
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    mul x22, x12, x17
+    umulh x23, x12, x17
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    mul x22, x13, x16
+    umulh x23, x13, x16
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    adds x25, x25, x21
+    adcs x26, x26, xzr
+    adcs x24, x24, xzr
+    add x8, x25, xzr   //  set mc[6]
+
+    //  ma[19] iteration
+
+    ldr x21, [x0, #152]
+    mul x22, x10, x20
+    umulh x23, x10, x20
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, xzr, xzr
+
+    mul x22, x11, x19
+    umulh x23, x11, x19
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x12, x18
+    umulh x23, x12, x18
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    mul x22, x13, x17
+    umulh x23, x13, x17
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adcs x25, x25, xzr
+
+    adds x26, x26, x21
+    adcs x24, x24, xzr
+    adcs x25, x25, xzr
+    add x9, x26, xzr   //  set mc[7]
+
+    //  ma[20] iteration
+    ldr x21, [x0, #160]
+
+    mul x22, x11, x20
+    umulh x23, x11, x20
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, xzr, xzr
+
+    mul x22, x12, x19
+    umulh x23, x12, x19
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    mul x22, x13, x18
+    umulh x23, x13, x18
+    adds x24, x24, x22
+    adcs x25, x25, x23
+    adcs x26, x26, xzr
+
+    adds x24, x24, x21
+    adcs x25, x25, xzr
+    adcs x26, x26, xzr
+    add x10, x24, xzr   //  set mc[8]
+
+    //  ma[21] iteration
+
+    ldr x21, [x0, #168]
+    mul x22, x12, x20
+    umulh x23, x12, x20
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, xzr, xzr
+
+    mul x22, x13, x19
+    umulh x23, x13, x19
+    adds x25, x25, x22
+    adcs x26, x26, x23
+    adcs x24, x24, xzr
+
+    adds x25, x25, x21
+    adcs x26, x26, xzr
+    adcs x24, x24, xzr
+    add x11, x25, xzr   //  set mc[9]
+
+    //  ma[22] iteration
+
+    ldr x21, [x0, #176]
+    mul x22, x13, x20
+    umulh x23, x13, x20
+    adds x26, x26, x22
+    adcs x24, x24, x23
+    adds x26, x26, x21
+
+    ldr x21, [x0, #184]
+    adcs x24, x24, x21
+    add x12, x26, xzr   //  set mc[10]
+    add x13, x24, xzr   //  set mc[11]
+
+    stp x2, x3, [x1, #0]
+    stp x4, x5, [x1, #16]
+    stp x6, x7, [x1, #32]
+    stp x8, x9, [x1, #48]
+    stp x10, x11, [x1, #64]
+    stp x12, x13, [x1, #80]
+    
+    ldp x19, x20, [sp]
+    ldp x21, x22, [sp, #16]
+    ldp x23, x24, [sp, #32]
+    ldp x25, x26, [sp, #48]
+    ldp x27, x28, [sp, #64]
+    add sp, sp, #80
+    ret    
+
+
+//***********************************************************************
+//  751-bit multiprecision addition
+//  Operation: c [x2] = a [x0] + b [x1]
+//*********************************************************************** 
+.global oqs_sidh_cln16_mp_add751_asm
+oqs_sidh_cln16_mp_add751_asm:
+    ldp x3, x4,   [x0,#0]
+    ldp x5, x6,   [x0,#16]
+    ldp x7, x8,   [x0,#32]
+    ldp x9, x10,  [x0,#48]
+    ldp x11, x12, [x0,#64]
+    ldp x13, x14, [x0,#80]
+
+    ldp x15, x16,   [x1,#0]
+    ldp x17, x18,   [x1,#16]
+    adds x3, x3, x15
+    adcs x4, x4, x16
+    adcs x5, x5, x17
+    adcs x6, x6, x18
+    ldp x15, x16,   [x1,#32]
+    ldp x17, x18,   [x1,#48]
+    adcs x7, x7, x15
+    adcs x8, x8, x16
+    adcs x9, x9, x17
+    adcs x10, x10, x18
+    ldp x15, x16,   [x1,#64]
+    ldp x17, x18,   [x1,#80]
+    adcs x11, x11, x15
+    adcs x12, x12, x16
+    adcs x13, x13, x17
+    adcs x14, x14, x18
+
+    stp x3, x4,   [x2,#0]
+    stp x5, x6,   [x2,#16]
+    stp x7, x8,   [x2,#32]
+    stp x9, x10,  [x2,#48]
+    stp x11, x12, [x2,#64]
+    stp x13, x14, [x2,#80]
+    ret    
+
+
+//***********************************************************************
+//  2x751-bit multiprecision addition
+//  Operation: c [x2] = a [x0] + b [x1]
+//*********************************************************************** 
+.global oqs_sidh_cln16_mp_add751x2_asm
+oqs_sidh_cln16_mp_add751x2_asm:
+    ldp x3, x4,   [x0,#0]
+    ldp x5, x6,   [x0,#16]
+    ldp x7, x8,   [x0,#32]
+    ldp x9, x10,  [x0,#48]
+    ldp x11, x12, [x0,#64]
+    ldp x13, x14, [x0,#80]
+
+    ldp x15, x16,   [x1,#0]
+    ldp x17, x18,   [x1,#16]
+    adds x3, x3, x15
+    adcs x4, x4, x16
+    adcs x5, x5, x17
+    adcs x6, x6, x18
+    ldp x15, x16,   [x1,#32]
+    ldp x17, x18,   [x1,#48]
+    adcs x7, x7, x15
+    adcs x8, x8, x16
+    adcs x9, x9, x17
+    adcs x10, x10, x18
+    ldp x15, x16,   [x1,#64]
+    ldp x17, x18,   [x1,#80]
+    adcs x11, x11, x15
+    adcs x12, x12, x16
+    adcs x13, x13, x17
+    adcs x14, x14, x18
+
+    stp x3, x4,   [x2,#0]
+    stp x5, x6,   [x2,#16]
+    stp x7, x8,   [x2,#32]
+    stp x9, x10,  [x2,#48]
+    stp x11, x12, [x2,#64]
+    stp x13, x14, [x2,#80]
+	
+    ldp x3, x4,   [x0,#96]
+    ldp x5, x6,   [x0,#112]
+    ldp x7, x8,   [x0,#128]
+    ldp x9, x10,  [x0,#144]
+    ldp x11, x12, [x0,#160]
+    ldp x13, x14, [x0,#176]
+
+    ldp x15, x16,   [x1,#96]
+    ldp x17, x18,   [x1,#112]
+    adcs x3, x3, x15
+    adcs x4, x4, x16
+    adcs x5, x5, x17
+    adcs x6, x6, x18
+    ldp x15, x16,   [x1,#128]
+    ldp x17, x18,   [x1,#144]
+    adcs x7, x7, x15
+    adcs x8, x8, x16
+    adcs x9, x9, x17
+    adcs x10, x10, x18
+    ldp x15, x16,   [x1,#160]
+    ldp x17, x18,   [x1,#176]
+    adcs x11, x11, x15
+    adcs x12, x12, x16
+    adcs x13, x13, x17
+    adcs x14, x14, x18
+
+    stp x3, x4,   [x2,#96]
+    stp x5, x6,   [x2,#112]
+    stp x7, x8,   [x2,#128]
+    stp x9, x10,  [x2,#144]
+    stp x11, x12, [x2,#160]
+    stp x13, x14, [x2,#176]
+    ret
diff --git a/crypt/liboqs/kex_sidh_cln16/LICENSE.txt b/crypt/liboqs/kex_sidh_cln16/LICENSE.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b4ffccb0a28c6be8d8207d73b9fd3eab7155e60
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/LICENSE.txt
@@ -0,0 +1,21 @@
+SIDH Library
+
+Copyright (c) Microsoft Corporation
+All rights reserved. 
+
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and 
+associated documentation files (the ""Software""), to deal in the Software without restriction,
+including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 
+and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial 
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 
+LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/crypt/liboqs/kex_sidh_cln16/Makefile.am b/crypt/liboqs/kex_sidh_cln16/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..b7272433963a635647761870f0fcdb90b861083d
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/Makefile.am
@@ -0,0 +1,18 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = libcln16.la
+
+
+libcln16_la_SOURCES = ec_isogeny.c fpx.c kex_sidh_cln16.c SIDH.c sidh_kex.c SIDH_setup.c
+
+if X86_64
+libcln16_la_SOURCES += AMD64/fp_x64.c AMD64/fp_x64_asm.S
+else
+if ARM64
+libcln16_la_SOURCES += ARM64/fp_arm64.c ARM64/fp_arm64_asm.S
+else
+libcln16_la_SOURCES += generic/fp_generic.c
+endif
+endif
+
+libcln16_la_CPPFLAGS = -I../../include -I.-fPIC -w
+libcln16_la_CPPFLAGS += $(AM_CPPFLAGS) 
diff --git a/crypt/liboqs/kex_sidh_cln16/README.txt b/crypt/liboqs/kex_sidh_cln16/README.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f4e4df30356c90b051248029d0210c46f71a5bf
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/README.txt
@@ -0,0 +1,79 @@
+                                        SIDH v2.0 (C Edition)
+                                       =======================
+
+The SIDH v2.0 library (C Edition) is a supersingular isogeny-based cryptography library that implements a
+new suite of algorithms for a post-quantum, ephemeral Diffie-Hellman key exchange scheme [2]. 
+
+The library was developed by Microsoft Research for experimentation purposes. 
+
+SECURITY NOTE: the scheme is NOT secure when using static keys.
+
+*** THE ORIGINAL README HAS BEEN TRIMMED LEAVING ONLY THE INFO RELEVANT FOR THE OQS INTEGRATION ***
+
+1. CONTENTS:
+   --------
+
+/                              - Library C and header files.                                     
+AMD64/                         - Optimized implementation of the field arithmetic for x64 platforms                                    
+ARM64/                         - Optimized implementation of the field arithmetic for ARMv8 platforms
+generic/                       - Implementation of the field arithmetic in portable C
+README.txt                     - This readme file
+
+
+2. CONTRIBUTIONS:
+   -------------
+
+   The field arithmetic implementation for 64-bit ARM processors (ARM64 folder) was contributed by 
+   David Urbanik (dburbani@uwaterloo.ca).
+
+
+3. MAIN FEATURES:
+   -------------
+   
+- Support ephemeral Diffie-Hellman key exchange.
+- Support a peace-of-mind hybrid key exchange mode that adds a classical elliptic curve Diffie-Hellman 
+  key exchange on a high-security Montgomery curve providing 384 bits of classical ECDH security.
+- Protected against timing and cache-timing attacks through regular, constant-time implementation of 
+  all operations on secret key material.
+- Basic implementation of the underlying arithmetic functions using portable C to enable support on
+  a wide range of platforms including x64, x86 and ARM. 
+- Optimized implementation of the underlying arithmetic functions for x64 platforms with optional, 
+  high-performance x64 assembly for Linux. 
+- Optimized implementation of the underlying arithmetic functions for 64-bit ARM platforms using assembly
+  for Linux.
+
+
+4. NEW IN VERSION 2.0:
+   ------------------
+   
+- A new variant of the isogeny-based key exchange that includes a new suite of algorithms for efficient
+  public key compression [3]. In this variant, public keys are only 330 bytes (compare to 564 bytes
+  required by the original SIDH key exchange variant without compression).  
+- An optimized implementation of the underlying arithmetic functions for 64-bit ARM (ARMv8) platforms.
+
+
+5. SUPPORTED PLATFORMS:
+   -------------------
+
+SIDH v2.0 is supported on a wide range of platforms including x64, x86 and ARM devices running Windows 
+or Linux OS. We have tested the library with Microsoft Visual Studio 2015, GNU GCC v4.9, and clang v3.8.
+See instructions below to choose an implementation option and compile on one of the supported platforms.
+
+
+
+REFERENCES:
+----------
+
+[1]   Craig Costello, Patrick Longa, and Michael Naehrig.
+      Efficient algorithms for supersingular isogeny Diffie-Hellman.      
+      Advances in Cryptology - CRYPTO 2016, LNCS 9814, pp. 572-601, 2016. 
+      Extended version available at: http://eprint.iacr.org/2016/413. 
+
+[2]   David Jao and Luca DeFeo. 
+      Towards quantum-resistant cryptosystems from supersingular elliptic curve isogenies.
+      PQCrypto 2011, LNCS 7071, pp. 19-34, 2011. 
+
+[3]   Craig Costello, David Jao, Patrick Longa, Michael Naehrig, Joost Renes, and David Urbanik.
+      Efficient compression of SIDH public keys.      
+      Advances in Cryptology - EUROCRYPT 2017, 2017. 
+      Preprint version available at: http://eprint.iacr.org/2016/963. 
\ No newline at end of file
diff --git a/crypt/liboqs/kex_sidh_cln16/SIDH.c b/crypt/liboqs/kex_sidh_cln16/SIDH.c
new file mode 100644
index 0000000000000000000000000000000000000000..d99d1e695cae57ac39aa4984b6acc37f65469d64
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/SIDH.c
@@ -0,0 +1,133 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral 
+*       Diffie-Hellman key exchange.
+*
+*    Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*
+* Abstract: supersingular elliptic curve isogeny parameters
+*
+*********************************************************************************************/
+
+#include "SIDH_internal.h"
+
+// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points:
+// --------------------------------------------------------------------------------------------------
+// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located
+// at the leftmost position (i.e., little endian format).
+// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {b, a}, with b
+// in the least significant position.
+// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position.
+
+//
+// Curve isogeny system "SIDHp751". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p751^2), where A=0, B=1 and C=1
+//
+
+CurveIsogenyStaticData CurveIsogeny_SIDHp751 = {
+    "SIDHp751", 768, 384, // Curve isogeny system ID, smallest multiple of 32 larger than the prime bitlength and smallest multiple of 32 larger than the order bitlength
+    751,                  // Bitlength of the prime
+    // Prime p751 = 2^372*3^239-1
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xEEAFFFFFFFFFFFFF, 0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C},
+    // Base curve parameter "A"
+    {0},
+    // Base curve parameter "C"
+    {1},
+    // Order bitlength for Alice
+    372,
+    // Order of Alice's subgroup
+    {0x0, 0x0, 0x0, 0x0, 0x0, 0x0010000000000000},
+    // Order bitlength for Bob
+    379,
+    // Power of Bob's subgroup order
+    239,
+    // Order of Bob's subgroup
+    {0xC968549F878A8EEB, 0x59B1A13F7CC76E3E, 0xE9867D6EBE876DA9, 0x2B5045CB25748084, 0x2909F97BADC66856, 0x06FE5D541F71C0E1},
+    // Alice's generator PA = (XPA,YPA), where XPA and YPA are defined over GF(p751)
+    {0x4B0346F5CCE233E9, 0x632646086CE3ACD5, 0x5661D14AB7347693, 0xA58A20449AF1F133, 0xB9AC2F40C56D6FA4, 0x8E561E008FA0E3F3, 0x6CAE096D5DB822C9, 0x83FDB7A4AD3E83E8, 0xB1317AD904386217, 0x3FA23F89F6BE06D2, 0x429C8D36FF46BCC9, 0x00003E82027A38E9, 0x12E0D620BFB341D5, 0x0F8EEA7370893430, 0x5A99EBEC3B5B8B00, 0x236C7FAC9E69F7FD, 0x0F147EF3BD0CFEC5, 0x8ED5950D80325A8D, 0x1E911F50BF3F721A, 0x163A7421DFA8378D, 0xC331B043DA010E6A, 0x5E15915A755883B7, 0xB6236F5F598D56EB, 0x00003BBF8DCD4E7E},
+    // Bob's generator PB = (XPB,YPB), where XPB and YPB are defined over GF(p751)
+    {0x76ED2325DCC93103, 0xD9E1DF566C1D26D3, 0x76AECB94B919AEED, 0xD3785AAAA4D646C5, 0xCB610E30288A7770, 0x9BD3778659023B9E, 0xD5E69CF26DF23742, 0xA3AD8E17B9F9238C, 0xE145FE2D525160E0, 0xF8D5BCE859ED725D, 0x960A01AB8FF409A2, 0x00002F1D80EF06EF, 0x91479226A0687894, 0xBBC6BAF5F6BA40BB, 0x15B529122CFE3CA6, 0x7D12754F00E898A3, 0x76EBA0C8419745E9, 0x0A94F06CDFB3EADE, 0x399A6EDB2EEB2F9B, 0xE302C5129C049EEB, 0xC35892123951D4B6, 0x15445287ED1CC55D, 0x1ACAF351F09AB55A, 0x00000127A46D082A},
+    // BigMont's curve parameter A24 = (A+2)/4
+    156113,
+    // BigMont's order, where BigMont is defined by y^2=x^3+A*x^2+x
+    {0xA59B73D250E58055, 0xCB063593D0BE10E1, 0xF6515CCB5D076CBB, 0x66880747EDDF5E20, 0xBA515248A6BFD4AB, 0x3B8EF00DDDDC789D, 0xB8FB25A1527E1E2A, 0xB6A566C684FDF31D, 0x0213A619F5BAFA1D, 0xA158AD41172C95D2, 0x0384A427E5EEB719, 0x00001BF975507DC7},
+    // Montgomery constant Montgomery_R2 = (2^768)^2 mod p751
+    {0x233046449DAD4058, 0xDB010161A696452A, 0x5E36941472E3FD8E, 0xF40BFE2082A2E706, 0x4932CCA8904F8751, 0x1F735F1F1EE7FC81, 0xA24F4D80C1048E18, 0xB56C383CCDB607C5, 0x441DD47B735F9C90, 0x5673ED2C6A6AC82A, 0x06C905261132294B, 0x000041AD830F1F35},
+    // Montgomery constant -p751^-1 mod 2^768
+    {0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xEEB0000000000000, 0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x258C28E5D541F71C},
+    // Value one in Montgomery representation
+    {0x00000000000249ad, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8310000000000000, 0x5527b1e4375c6c66, 0x697797bf3f4f24d0, 0xc89db7b2ac5c4e2e, 0x4ca4b439d2076956, 0x10f7926c7512c7e9, 0x00002d5b24bce5e2}};
+
+// Fixed parameters for isogeny tree computation
+
+const unsigned int splits_Alice[SIDH_MAX_Alice] = {
+    0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 4, 5, 5, 6, 7, 8, 8, 9, 9, 9, 9, 9, 9, 9, 12,
+    11, 12, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 17, 17, 18, 18, 17, 21, 17,
+    18, 21, 20, 21, 21, 21, 21, 21, 22, 25, 25, 25, 26, 27, 28, 28, 29, 30, 31,
+    32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 35, 36, 36, 33, 36, 35, 36, 36, 35,
+    36, 36, 37, 38, 38, 39, 40, 41, 42, 38, 39, 40, 41, 42, 40, 46, 42, 43, 46,
+    46, 46, 46, 48, 48, 48, 48, 49, 49, 48, 53, 54, 51, 52, 53, 54, 55, 56, 57,
+    58, 59, 59, 60, 62, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65,
+    65, 66, 67, 65, 66, 67, 66, 69, 70, 66, 67, 66, 69, 70, 69, 70, 70, 71, 72,
+    71, 72, 72, 74, 74, 75, 72, 72, 74, 74, 75, 72, 72, 74, 75, 75, 72, 72, 74,
+    75, 75, 77, 77, 79, 80, 80, 82};
+
+const unsigned int splits_Bob[SIDH_MAX_Bob] = {
+    0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 7, 8, 8, 8, 8, 9, 9, 9, 9, 9,
+    10, 12, 12, 12, 12, 12, 12, 13, 14, 14, 15, 16, 16, 16, 16, 16, 17, 16, 16,
+    17, 19, 19, 20, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 24, 24, 25, 27,
+    27, 28, 28, 29, 28, 29, 28, 28, 28, 30, 28, 28, 28, 29, 30, 33, 33, 33, 33,
+    34, 35, 37, 37, 37, 37, 38, 38, 37, 38, 38, 38, 38, 38, 39, 43, 38, 38, 38,
+    38, 43, 40, 41, 42, 43, 48, 45, 46, 47, 47, 48, 49, 49, 49, 50, 51, 50, 49,
+    49, 49, 49, 51, 49, 53, 50, 51, 50, 51, 51, 51, 52, 55, 55, 55, 56, 56, 56,
+    56, 56, 58, 58, 61, 61, 61, 63, 63, 63, 64, 65, 65, 65, 65, 66, 66, 65, 65,
+    66, 66, 66, 66, 66, 66, 66, 71, 66, 73, 66, 66, 71, 66, 73, 66, 66, 71, 66,
+    73, 68, 68, 71, 71, 73, 73, 73, 75, 75, 78, 78, 78, 80, 80, 80, 81, 81, 82,
+    83, 84, 85, 86, 86, 86, 86, 86, 87, 86, 88, 86, 86, 86, 86, 88, 86, 88, 86,
+    86, 86, 88, 88, 86, 86, 86, 93, 90, 90, 92, 92, 92, 93, 93, 93, 93, 93, 97,
+    97, 97, 97, 97, 97};
+
+const uint64_t LIST[22][SIDH_NWORDS64_FIELD] = {
+    {0xC4EC4EC4EC4EDB72, 0xEC4EC4EC4EC4EC4E, 0x4EC4EC4EC4EC4EC4, 0xC4EC4EC4EC4EC4EC, 0xEC4EC4EC4EC4EC4E, 0x7464EC4EC4EC4EC4,
+     0x40E503E18E2D8BE1, 0x4C633882E467773F, 0x998CB725CB703B25, 0x51F8F01043ABC448, 0x70A53813C7A0B43A, 0x00006D56A7157672},
+    {0x276276276275B6C1, 0x6276276276276276, 0x7627627627627627, 0x2762762762762762, 0x6276276276276276, 0x6377627627627627,
+     0x2F25DD32AAF69FE5, 0xC6FBECF3EDD1AA16, 0x29C9664A396A6297, 0x0110D8C47D20DEFD, 0x1322BABB1082C8DD, 0x00000CCBE6DE8350},
+    {0x093B97EBDB11A7FE, 0x5093B97EBDB11A05, 0x05093B97EBDB11A0, 0xA05093B97EBDB11A, 0x1A05093B97EBDB11, 0x6F005093B97EBDB1,
+     0x7204A6634D6196D9, 0x1D6428F62F917BE5, 0x037CE7F8E9689A28, 0x913EC08959C36290, 0x03D1055241F89FDD, 0x000066963FEC58EB},
+    {0x98C2BA559CF4F604, 0xA98C2BA559CF516A, 0x6A98C2BA559CF516, 0x16A98C2BA559CF51, 0x516A98C2BA559CF5, 0x1A56A98C2BA559CF,
+     0xDD14E231C3FF5DDC, 0x5AB78BDF0FB0C987, 0x168ED3F1672906EC, 0xAEF17C4BE3A425E0, 0x6F1B34309268385F, 0x0000438BAFFC5E17},
+    {0xA37CA5409E30BE12, 0x20D6AFD873D163ED, 0xCA5409E30BA70497, 0x6AFD873D163EDA37, 0x409E30BA7049720D, 0x7013D163EDA37CA5,
+     0x196C325CFB1D98A8, 0x2A83CC98457F6BB1, 0x157AA4649C505D94, 0x556B2CFA3ED1E977, 0x9C8FB301D3BE27CD, 0x0000659B5D688370},
+    {0x437158A103E247EB, 0x23A9D7BF076A48BD, 0x158A103E256DD0AF, 0x9D7BF076A48BD437, 0xA103E256DD0AF23A, 0xD3776A48BD437158,
+     0xD4F7B332C1F74531, 0x6A60D92C4C627CD9, 0xC8009067FA1223C2, 0x195578D349C85ABC, 0x24DCFD2C3CE56026, 0x00001170D9C4A49E},
+    {0xBBC96234E708BFC3, 0xEE2CE77DBE4CE5A9, 0x21EF6EA93828AD37, 0x66C6ED51865018AE, 0xCB18F74253FB3379, 0x6231B31A5644369D,
+     0xF1831316FD5F9AD5, 0xD64412327D9D93D5, 0x2D9659AFA40085D6, 0xB872D3713E1F01AD, 0x96B929E85C90E590, 0x00002A0A122F3E1B},
+    {0x751DE109156C74F6, 0xC86993912AE79AFE, 0x96234E708BDAC04C, 0xCE77DBE4CE5A9BBC, 0xF6EA93828AD37EE2, 0x51B51865018AE21E,
+     0x57F8534430BDF5AF, 0xA5BA9F3225E0FA02, 0x05DBA7E2AB49759E, 0xE4706D1BDBA54763, 0xC5316BE14AF60ADD, 0x00002007A8A7A392},
+    {0x2DEC0AC86E1972FF, 0xD121D09CA2E105D1, 0x258D13A0778EDFB2, 0x25140153000C1B6E, 0xA06B73718D440E30, 0xA46BFDEB49118BC0,
+     0x11C799EE82EF46CF, 0xF094D7258BE44445, 0x6B087550522BC899, 0xD4380D82ADEEA2D3, 0x2AFFEB03C6970E0B, 0x00004FF89FD0E867},
+    {0xF48E11E080A36CD8, 0x75AA967CF316BF89, 0xED69E3E85A6CDEA8, 0x228638171449F794, 0xD4107549BB0BC6AE, 0xB7888349726731CC,
+     0x0589577AC89D03A2, 0x79218D005004DCD2, 0xA69CB3C82106FDB8, 0xE54D908CD9B31ED9, 0x2BB46423F8B44F5D, 0x0000158FC37F2F78},
+    {0xA2B8F30D2D8B2266, 0x37AE9DA734F3D4D4, 0x4BC3AC46B1EE2D59, 0xA541D219D9E660D2, 0xFD629383B8C12367, 0x0E789576DA7C1E23,
+     0x2321F1135780B208, 0x059EED9A8BB7694E, 0x3EAC20CCA7C7B679, 0xADED37DC1395BAAB, 0xD701BA16F6CD4328, 0x0000250A355A8E3D},
+    {0x8D08D7B596C87C8E, 0xFC2B5A576AB81FA7, 0x4ED68A1C251D1EAD, 0xA6618E345258FA06, 0xB532F4F490BD3165, 0x0987A5FDBAA88699,
+     0x77E908F4AE484907, 0xC85226731C871CED, 0x6F3E5A699F216EC7, 0x70E42ADFCCD68C99, 0x2277864817AA0CAD, 0x000037F521DA6BAC},
+    {0xDB72B65CA8D1D274, 0x286A73457D063FD5, 0x7355642D132BA567, 0x2A970D9461C0DC41, 0x93D2A07ED36F3BCC, 0xFD59A18D2D03447E,
+     0xBC047FB33098286A, 0x153E65AE22E4D2F0, 0xBC3F628AF44DDCEB, 0xCF8C49463A2BEC5D, 0x64D31CBF9A0FAE5B, 0x00000E88DF789F48},
+    {0x7E0E3CF3F602CC03, 0x240AE231C56EB636, 0x1630875FADB3CA47, 0x3FDF66239B9021FE, 0x4FA6BEA94AAE8287, 0x20BD32942BAEF1D9,
+     0x3DBE52BE754CD223, 0xD46D6B986A4C461E, 0x31772CCF6AB0EC49, 0x0362808B445792BE, 0xA57068B23D5D4F04, 0x0000233188CFA1F9},
+    {0x5CFEB9EE80FF8802, 0x641C991F35243E77, 0x109BF7F4D15352D9, 0xF57027C40F2AEC39, 0x78834C224A9E8F4D, 0x3B53C38C5DDA4903,
+     0x2472CAD0E4A1DD20, 0x91121637EFEFBFEB, 0x555DDF1E4E875433, 0xD185E0CEBC9A6BF8, 0x247E7766FEA9846A, 0x00004E24131398C0},
+    {0xAE911D5E41FDE1D5, 0x09FD291EAE9A7528, 0xD94DB04CE76D674F, 0xF269A050B317A36A, 0x1010C2464C5B488A, 0x165E22C0571F72CE,
+     0xB649686CDD7FAA40, 0xC65F833CCBC8E854, 0xA1DC607E92B4EC01, 0x6A9F6EA6C5D5598C, 0xB73B45E033D20693, 0x0000126974812437},
+    {0x7EF889C1569E078D, 0x8B4790D31AFC6D2F, 0x24BAD80FCF2607D2, 0x13C099586804EDD0, 0x0B219830D09F67F8, 0xFEEBDD0A795A4E0D,
+     0x2C86D567D8A5A5C6, 0x29EFDB5516CD064B, 0xAFB0A05F0230B35C, 0x73FCFA65EC7C5CB4, 0x245E08DC310C14E1, 0x00001778AC2903DF},
+    {0xF2BF1FF8427C7315, 0x591042D093B90137, 0x23EF8D48782832C9, 0x8DFB39E92296E3D6, 0x0C39FF556BEBDD42, 0x369F6980A4270C5D,
+     0x901F9AD6FCBAA761, 0x0E8E81D435F5FC7F, 0x9A795B9A8409D3D3, 0xD29FB9AE4384290F, 0x3B58F53DD7270C90, 0x00001E27D50D0631},
+    {0x838A7C8B0026C13C, 0xD38CAB350DC1F6BD, 0x426C57FE2436E928, 0xB81B289B8792A253, 0xF8EDB68037D3FB8E, 0x677EE0B4C50C01CD,
+     0xF43DCE6FED67139A, 0xF87EFEBF43D77877, 0x3EEA0E8543763A8A, 0x26E5A18357A35379, 0x55867648B9EA7D35, 0x000069DEC7A3C7DA},
+    {0x91CCFD3901F3F3FE, 0x2053992393125D73, 0x2129B3A10D7FF7C0, 0x74C64B3E68087A32, 0xEE46C5739B026DF9, 0x53E7B33F97EC0300,
+     0x14672E57801EC044, 0x18610440AA870975, 0xB6B9D9E0E0097AE6, 0x37AD3B922ED0F367, 0xA737A55936D5A8B8, 0x00005A30AF4F51DA},
+    {0xC925488939591E52, 0x8F87728BF0ED44E9, 0xF987EF64E4365147, 0x9338B89963265410, 0x340DA16F22024645, 0x5D295419E474BDC1,
+     0xBA0C2E509FC0510B, 0x957E35D641D5DDB5, 0x922F901AA4A236D8, 0xCBFA24C0F7E172E3, 0xB05A32F88CB5B9DC, 0x00001DC7A766A676},
+    {0x6128F8C2B276D2A1, 0x857530A2A633CE28, 0xEB624F41494C5D1E, 0x3FA62AE33B92CCA8, 0x11BCABB4CC4FBE22, 0x91EA14743FDBAC70,
+     0x9876F7DF900DC277, 0x375FD25E09091CBA, 0x580F3084B099A111, 0x58E9B3FB623FB297, 0x957732F791F6C337, 0x00000B070F784B99}};
diff --git a/crypt/liboqs/kex_sidh_cln16/SIDH.h b/crypt/liboqs/kex_sidh_cln16/SIDH.h
new file mode 100644
index 0000000000000000000000000000000000000000..762ea99b60e3704075d269f3f66f45e4f91e7934
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/SIDH.h
@@ -0,0 +1,356 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral
+*       Diffie-Hellman key exchange.
+*
+*    Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*
+* Abstract: main header file
+*
+*********************************************************************************************/
+
+#ifndef __SIDH_H__
+#define __SIDH_H__
+
+// For C++
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <oqs/rand.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+// Definition of operating system
+
+#define OS_WIN 1
+#define OS_LINUX 2
+
+#if defined(WINDOWS) // Microsoft Windows OS
+#define OS_TARGET OS_WIN
+#else
+#define OS_TARGET OS_LINUX
+#endif
+
+// Definition of the targeted architecture and basic data types
+
+#define TARGET_AMD64 1
+#define TARGET_x86 2
+#define TARGET_ARM 3
+#define TARGET_ARM64 4
+
+#if (defined(__x86_64__) || defined(__x86_64) || defined(__arch64__) || defined(_M_AMD64) || defined(_M_X64) || defined(_WIN64) || !defined(__LP64__))
+#define _AMD64_
+#elif (defined(__aarch64__))
+#define _ARM64_
+#else
+#define _X86_
+#endif
+
+#if defined(_AMD64_)
+#define TARGET TARGET_AMD64
+#define RADIX 64
+typedef uint64_t digit_t;  // Unsigned 64-bit digit
+typedef int64_t sdigit_t;  // Signed 64-bit digit
+typedef uint32_t hdigit_t; // Unsigned 32-bit digit
+#define NWORDS_FIELD 12    // Number of words of a 751-bit field element
+#define p751_ZERO_WORDS 5  // Number of "0" digits in the least significant part of p751 + 1
+#elif defined(_X86_)
+#define TARGET TARGET_x86
+#define RADIX 32
+typedef uint32_t digit_t;  // Unsigned 32-bit digit
+typedef int32_t sdigit_t;  // Signed 32-bit digit
+typedef uint16_t hdigit_t; // Unsigned 16-bit digit
+#define NWORDS_FIELD 24
+#define p751_ZERO_WORDS 11
+#elif defined(_ARM_)
+#define TARGET TARGET_ARM
+#define RADIX 32
+typedef uint32_t digit_t;  // Unsigned 32-bit digit
+typedef int32_t sdigit_t;  // Signed 32-bit digit
+typedef uint16_t hdigit_t; // Unsigned 16-bit digit
+#define NWORDS_FIELD 24
+#define p751_ZERO_WORDS 11
+#elif defined(_ARM64_)
+#define TARGET TARGET_ARM64
+#define RADIX 64
+typedef uint64_t digit_t;
+typedef int64_t sdigit_t;
+typedef uint32_t hdigit_t;
+#define NWORDS_FIELD 12
+#define p751_ZERO_WORDS 5
+#else
+#error-- "Unsupported ARCHITECTURE"
+#endif
+
+#define RADIX64 64
+
+// Selection of generic, portable implementation
+
+#if !defined(SIDH_ASM) // defined(_GENERIC_)
+#define GENERIC_IMPLEMENTATION
+#endif
+
+// Unsupported configurations
+
+#if (TARGET != TARGET_AMD64) && (TARGET != TARGET_ARM64) && !defined(GENERIC_IMPLEMENTATION)
+#error-- "Unsupported configuration"
+#endif
+
+// Extended datatype support
+
+#if defined(GENERIC_IMPLEMENTATION)
+typedef uint64_t uint128_t[2];
+#elif (TARGET == TARGET_AMD64 && OS_TARGET == OS_LINUX) && (COMPILER == COMPILER_GCC || COMPILER == COMPILER_CLANG)
+#define UINT128_SUPPORT
+typedef unsigned uint128_t __attribute__((mode(TI)));
+#elif (TARGET == TARGET_ARM64 && OS_TARGET == OS_LINUX) && (COMPILER == COMPILER_GCC || COMPILER == COMPILER_CLANG)
+#define UINT128_SUPPORT
+typedef unsigned uint128_t __attribute__((mode(TI)));
+#elif (TARGET == TARGET_AMD64) && (OS_TARGET == OS_WIN && COMPILER == COMPILER_VC)
+#define SCALAR_INTRIN_SUPPORT
+typedef uint64_t uint128_t[2];
+#else
+#error-- "Unsupported configuration"
+#endif
+
+// Basic constants
+
+#define SIDH_NBITS_FIELD 751
+#define SIDH_MAXBITS_FIELD 768
+#define SIDH_MAXWORDS_FIELD ((SIDH_MAXBITS_FIELD + RADIX - 1) / RADIX) // Max. number of words to represent field elements
+#define SIDH_NWORDS64_FIELD ((SIDH_NBITS_FIELD + 63) / 64)             // Number of 64-bit words of a 751-bit field element
+#define SIDH_NBITS_ORDER 384
+#define SIDH_NWORDS_ORDER ((SIDH_NBITS_ORDER + RADIX - 1) / RADIX) // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp.
+#define SIDH_NWORDS64_ORDER ((SIDH_NBITS_ORDER + 63) / 64)         // Number of 64-bit words of a 384-bit element
+#define SIDH_MAXBITS_ORDER SIDH_NBITS_ORDER
+#define SIDH_MAXWORDS_ORDER ((SIDH_MAXBITS_ORDER + RADIX - 1) / RADIX) // Max. number of words to represent elements in [1, oA-1] or [1, oB].
+
+// Basic constants for elliptic curve BigMont
+
+#define BIGMONT_SIDH_SIDH_NBITS_ORDER 749
+#define BIGMONT_MAXBITS_ORDER 768
+#define BIGMONT_NWORDS_ORDER ((BIGMONT_SIDH_SIDH_NBITS_ORDER + RADIX - 1) / RADIX) // Number of words of BigMont's subgroup order.
+#define BIGMONT_MAXWORDS_ORDER ((BIGMONT_MAXBITS_ORDER + RADIX - 1) / RADIX)       // Max. number of words to represent elements in [1, BigMont_order].
+
+// Size of SIDH secret key = (CurveIsogeny_SIDHp751.owordbits + 7)/8
+#define SIDH_SECRETKEY_LEN 48
+// Size of SIDH public key = 3*2*((CurveIsogeny_SIDHp751.pwordbits + 7)/8)
+#define SIDH_PUBKEY_LEN 576
+// Size of compressed SIDH public key = 3*((CurveIsogenyData->owordbits + 7)/8) + 2*((CurveIsogenyData->pwordbits + 7)/8)
+#define SIDH_COMPRESSED_PUBKEY_LEN 336
+// Size of value R for decompression = 2*2*((CurveIsogenyData->pwordbits + 7)/8)
+#define SIDH_COMPRESSED_R_LEN 384
+// Size of value A for decompression = 2*((CurveIsogeny_SIDHp751.pwordbits + 7)/8)
+#define SIDH_COMPRESSED_A_LEN 192
+// Size of SIDH shared key = 2*PBYTES_SIDHp751
+#define SIDH_SHAREDKEY_LEN 192
+
+// Definitions of the error-handling type and error codes
+
+typedef enum {
+	SIDH_CRYPTO_SUCCESS,
+	SIDH_CRYPTO_ERROR,
+	SIDH_CRYPTO_ERROR_UNKNOWN,
+	SIDH_CRYPTO_ERROR_INVALID_PARAMETER,
+	SIDH_CRYPTO_ERROR_PUBLIC_KEY_VALIDATION,
+	SIDH_CRYPTO_ERROR_TOO_MANY_ITERATIONS,
+	SIDH_CRYPTO_ERROR_END_OF_LIST
+} SIDH_CRYPTO_STATUS;
+
+#define SIDH_CRYPTO_STATUS_TYPE_SIZE (SIDH_CRYPTO_ERROR_END_OF_LIST)
+
+// Definition of type for curve isogeny system identifiers. Currently valid value is "SIDHp751" (see SIDH.h)
+typedef char CurveIsogeny_ID[10];
+
+// Supersingular elliptic curve isogeny structures:
+
+// This data struct contains the static curve isogeny data
+typedef struct
+{
+	CurveIsogeny_ID CurveIsogeny;                   // Curve isogeny system identifier, base curve defined over GF(p^2)
+	unsigned int pwordbits;                         // Smallest multiple of 32 larger than the prime bitlength
+	unsigned int owordbits;                         // Smallest multiple of 32 larger than the order bitlength
+	unsigned int pbits;                             // Bitlength of the prime p
+	uint64_t prime[SIDH_MAXWORDS_FIELD];            // Prime p
+	uint64_t A[SIDH_MAXWORDS_FIELD];                // Base curve parameter "A"
+	uint64_t C[SIDH_MAXWORDS_FIELD];                // Base curve parameter "C"
+	unsigned int oAbits;                            // Order bitlength for Alice
+	uint64_t Aorder[SIDH_MAXWORDS_ORDER];           // Order of Alice's (sub)group
+	unsigned int oBbits;                            // Order bitlength for Bob
+	unsigned int eB;                                // Power of Bob's subgroup order (i.e., oB = 3^eB)
+	uint64_t Border[SIDH_MAXWORDS_ORDER];           // Order of Bob's (sub)group
+	uint64_t PA[2 * SIDH_MAXWORDS_FIELD];           // Alice's generator PA = (XPA,YPA), where XPA and YPA are defined over GF(p)
+	uint64_t PB[2 * SIDH_MAXWORDS_FIELD];           // Bob's generator PB = (XPB,YPB), where XPB and YPB are defined over GF(p)
+	unsigned int BigMont_A24;                       // BigMont's curve parameter A24 = (A+2)/4
+	uint64_t BigMont_order[BIGMONT_MAXWORDS_ORDER]; // BigMont's subgroup order
+	uint64_t Montgomery_R2[SIDH_MAXWORDS_FIELD];    // Montgomery constant (2^W)^2 mod p, using a suitable value W
+	uint64_t Montgomery_pp[SIDH_MAXWORDS_FIELD];    // Montgomery constant -p^-1 mod 2^W, using a suitable value W
+	uint64_t Montgomery_one[SIDH_MAXWORDS_FIELD];   // Value one in Montgomery representation
+} CurveIsogenyStaticData, *PCurveIsogenyStaticData;
+
+// This data struct is initialized with the targeted curve isogeny system during setup
+typedef struct
+{
+	CurveIsogeny_ID CurveIsogeny; // Curve isogeny system identifier, base curve defined over GF(p^2)
+	unsigned int pwordbits;       // Closest multiple of 32 to prime bitlength
+	unsigned int owordbits;       // Closest multiple of 32 to order bitlength
+	unsigned int pbits;           // Bitlength of the prime p
+	digit_t *prime;               // Prime p
+	digit_t *A;                   // Base curve parameter "A"
+	digit_t *C;                   // Base curve parameter "C"
+	unsigned int oAbits;          // Order bitlength for Alice
+	digit_t *Aorder;              // Order of Alice's (sub)group
+	unsigned int oBbits;          // Order bitlength for Bob
+	unsigned int eB;              // Power of Bob's subgroup order (i.e., oB = 3^eB)
+	digit_t *Border;              // Order of Bob's (sub)group
+	digit_t *PA;                  // Alice's generator PA = (XPA,YPA), where XPA and YPA are defined over GF(p)
+	digit_t *PB;                  // Bob's generator PB = (XPB,YPB), where XPB and YPB are defined over GF(p)
+	unsigned int BigMont_A24;     // BigMont's curve parameter A24 = (A+2)/4
+	digit_t *BigMont_order;       // BigMont's subgroup order
+	digit_t *Montgomery_R2;       // Montgomery constant (2^W)^2 mod p, using a suitable value W
+	digit_t *Montgomery_pp;       // Montgomery constant -p^-1 mod 2^W, using a suitable value W
+	digit_t *Montgomery_one;      // Value one in Montgomery representation
+} CurveIsogenyStruct, *PCurveIsogenyStruct;
+
+// Supported curve isogeny systems:
+
+// "SIDHp751", base curve: supersingular elliptic curve E: y^2 = x^3 + x
+extern CurveIsogenyStaticData CurveIsogeny_SIDHp751;
+
+/******************** Function prototypes ***********************/
+/*************** Setup/initialization functions *****************/
+
+// Dynamic allocation of memory for curve isogeny structure.
+// Returns NULL on error.
+PCurveIsogenyStruct oqs_sidh_cln16_curve_allocate(PCurveIsogenyStaticData CurveData);
+
+// Initialize curve isogeny structure pCurveIsogeny with static data extracted from pCurveIsogenyData.
+// This needs to be called after allocating memory for "pCurveIsogeny" using SIDH_curve_allocate().
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_curve_initialize(PCurveIsogenyStruct pCurveIsogeny, PCurveIsogenyStaticData pCurveIsogenyData);
+
+// Free memory for curve isogeny structure
+void oqs_sidh_cln16_curve_free(PCurveIsogenyStruct pCurveIsogeny);
+
+// Output random values in the range [1, order-1] in little endian format that can be used as private keys.
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_random_mod_order(digit_t *random_digits, unsigned int AliceOrBob, PCurveIsogenyStruct pCurveIsogeny, OQS_RAND *rand);
+
+// Output random values in the range [1, BigMont_order-1] in little endian format that can be used as private keys
+// to compute scalar multiplications using the elliptic curve BigMont.
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_random_BigMont_mod_order(digit_t *random_digits, PCurveIsogenyStruct pCurveIsogeny, OQS_RAND *rand);
+
+// Clear "nwords" digits from memory
+void oqs_sidh_cln16_clear_words(void *mem, digit_t nwords);
+
+// OQS INTEGRATION NOTE: the following code used to be in SIDH_api.h. It is merged here to simplify integration.
+
+/*********************** Ephemeral key exchange API ***********************/
+
+// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys.
+// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016.
+// Extended version available at: http://eprint.iacr.org/2016/859
+
+// Alice's ephemeral key-pair generation
+// It produces a private key pPrivateKeyA and computes the public key pPublicKeyA.
+// The private key is an even integer in the range [2, oA-2], where oA = 2^372 (i.e., 372 bits in total).
+// The public key consists of 3 elements in GF(p751^2), i.e., 564 bytes.
+// CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralKeyGeneration_A(unsigned char *pPrivateKeyA, unsigned char *pPublicKeyA, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand);
+
+// Bob's ephemeral key-pair generation
+// It produces a private key pPrivateKeyB and computes the public key pPublicKeyB.
+// The private key is an integer in the range [1, oB-1], where oA = 3^239 (i.e., 379 bits in total).
+// The public key consists of 3 elements in GF(p751^2), i.e., 564 bytes.
+// CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralKeyGeneration_B(unsigned char *pPrivateKeyB, unsigned char *pPublicKeyB, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand);
+
+// Alice's ephemeral shared secret computation
+// It produces a shared secret key pSharedSecretA using her secret key pPrivateKeyA and Bob's public key pPublicKeyB
+// Inputs: Alice's pPrivateKeyA is an even integer in the range [2, oA-2], where oA = 2^372 (i.e., 372 bits in total).
+//         Bob's pPublicKeyB consists of 3 elements in GF(p751^2), i.e., 564 bytes.
+// Output: a shared secret pSharedSecretA that consists of one element in GF(p751^2), i.e., 1502 bits in total.
+// CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralSecretAgreement_A(const unsigned char *pPrivateKeyA, const unsigned char *pPublicKeyB, unsigned char *pSharedSecretA, PCurveIsogenyStruct CurveIsogeny);
+
+// Bob's ephemeral shared secret computation
+// It produces a shared secret key pSharedSecretB using his secret key pPrivateKeyB and Alice's public key pPublicKeyA
+// Inputs: Bob's pPrivateKeyB is an integer in the range [1, oB-1], where oA = 3^239 (i.e., 379 bits in total).
+//         Alice's pPublicKeyA consists of 3 elements in GF(p751^2), i.e., 564 bytes.
+// Output: a shared secret pSharedSecretB that consists of one element in GF(p751^2), i.e., 1502 bits in total.
+// CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralSecretAgreement_B(const unsigned char *pPrivateKeyB, const unsigned char *pPublicKeyA, unsigned char *pSharedSecretB, PCurveIsogenyStruct CurveIsogeny);
+
+/*********************** Ephemeral key exchange API with compressed public keys ***********************/
+
+// Alice's public key compression
+// It produces a compressed output that consists of three elements in Z_orderB and one field element
+// Input : Alice's public key PublicKeyA, which consists of 3 elements in GF(p751^2).
+// Output: a compressed value CompressedPKA that consists of three elements in Z_orderB and one element in GF(p751^2).
+// CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+void oqs_sidh_cln16_PublicKeyCompression_A(const unsigned char *PublicKeyA, unsigned char *CompressedPKA, PCurveIsogenyStruct CurveIsogeny);
+
+// Alice's public key value decompression computed by Bob
+// Inputs: Bob's private key SecretKeyB, and
+//         Alice's compressed public key data CompressedPKA, which consists of three elements in Z_orderB and one element in GF(p751^2),
+// Output: a point point_R in coordinates (X:Z) and the curve parameter param_A in GF(p751^2). Outputs are stored in Montgomery representation.
+// CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+void oqs_sidh_cln16_PublicKeyADecompression_B(const unsigned char *SecretKeyB, const unsigned char *CompressedPKA, unsigned char *point_R, unsigned char *param_A, PCurveIsogenyStruct CurveIsogeny);
+
+// Alice's ephemeral shared secret computation
+// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB
+// Inputs: Alice's PrivateKeyA is an even integer in the range [2, oA-2], where oA = 2^372 (i.e., 372 bits in total).
+//         Bob's PublicKeyB consists of 3 elements in GF(p751^2), i.e., 564 bytes.
+// Output: a shared secret SharedSecretA that consists of one element in GF(p751^2), i.e., 1502 bits in total.
+// CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralSecretAgreement_Compression_A(const unsigned char *PrivateKeyA, const unsigned char *point_R, const unsigned char *param_A, unsigned char *SharedSecretA, PCurveIsogenyStruct CurveIsogeny);
+
+// Bob's public key compression
+// It produces a compressed output that consists of three elements in Z_orderA and one field element
+// Input : Bob's public key PublicKeyB, which consists of 3 elements in GF(p751^2).
+// Output: a compressed value CompressedPKB that consists of three elements in Z_orderA and one element in GF(p751^2).
+// CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+void oqs_sidh_cln16_PublicKeyCompression_B(const unsigned char *PublicKeyB, unsigned char *CompressedPKB, PCurveIsogenyStruct CurveIsogeny);
+
+// Bob's public key value decompression computed by Alice
+// Inputs: Alice's private key SecretKeyA, and
+//         Bob's compressed public key data CompressedPKB, which consists of three elements in Z_orderA and one element in GF(p751^2).
+// Output: a point point_R in coordinates (X:Z) and the curve parameter param_A in GF(p751^2). Outputs are stored in Montgomery representation.
+// CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+void oqs_sidh_cln16_PublicKeyBDecompression_A(const unsigned char *SecretKeyA, const unsigned char *CompressedPKB, unsigned char *point_R, unsigned char *param_A, PCurveIsogenyStruct CurveIsogeny);
+
+// Bob's ephemeral shared secret computation
+// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's decompressed data point_R and param_A
+// Inputs: Bob's PrivateKeyB is an integer in the range [1, oB-1], where oB = 3^239.
+//         Alice's decompressed data consists of point_R in (X:Z) coordinates and the curve paramater param_A in GF(p751^2).
+// Output: a shared secret SharedSecretB that consists of one element in GF(p751^2).
+// CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralSecretAgreement_Compression_B(const unsigned char *PrivateKeyB, const unsigned char *point_R, const unsigned char *param_A, unsigned char *SharedSecretB, PCurveIsogenyStruct CurveIsogeny);
+
+/*********************** Scalar multiplication API using BigMont ***********************/
+
+// BigMont's scalar multiplication using the Montgomery ladder
+// Inputs: x, the affine x-coordinate of a point P on BigMont: y^2=x^3+A*x^2+x,
+//         scalar m.
+// Output: xout, the affine x-coordinate of m*(x:1)
+// CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_BigMont_ladder(unsigned char *x, digit_t *m, unsigned char *xout, PCurveIsogenyStruct CurveIsogeny);
+
+// Encoding of keys for isogeny system "SIDHp751" (wire format):
+// ------------------------------------------------------------
+// Elements over GF(p751) are encoded in 96 octets in little endian format (i.e., the least significant octet located at the leftmost position).
+// Elements (a+b*i) over GF(p751^2), where a and b are defined over GF(p751), are encoded as {b, a}, with b in the least significant position.
+// Elements over Z_oA and Z_oB are encoded in 48 octets in little endian format.
+//
+// Private keys pPrivateKeyA and pPrivateKeyB are defined in Z_oA and Z_oB (resp.) and can have values in the range [2, 2^372-2] and [1, 3^239-1], resp.
+// In the key exchange API, they are encoded in 48 octets in little endian format.
+// Public keys pPublicKeyA and pPublicKeyB consist of four elements in GF(p751^2). In the key exchange API, they are encoded in 768 octets in little
+// endian format.
+// Shared keys pSharedSecretA and pSharedSecretB consist of one element in GF(p751^2). In the key exchange API, they are encoded in 192 octets in little
+// endian format.
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/crypt/liboqs/kex_sidh_cln16/SIDH_internal.h b/crypt/liboqs/kex_sidh_cln16/SIDH_internal.h
new file mode 100644
index 0000000000000000000000000000000000000000..82a4a101b9a157c777042b23a8ff5de58b587329
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/SIDH_internal.h
@@ -0,0 +1,598 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral 
+*       Diffie-Hellman key exchange.
+*
+*    Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*
+* Abstract: internal header file
+*
+*********************************************************************************************/
+
+#ifndef __SIDH_INTERNAL_H__
+#define __SIDH_INTERNAL_H__
+
+// For C++
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(WINDOWS)
+#define UNUSED
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#include "SIDH.h"
+
+// Basic constants
+
+#define SIDH_ALICE 0
+#define SIDH_BOB 1
+#define SIDH_MAX_INT_POINTS_ALICE 8
+// Fixed parameters for isogeny tree computation
+#define SIDH_MAX_INT_POINTS_BOB 10
+#define SIDH_MAX_Alice 185
+#define SIDH_MAX_Bob 239
+
+// SIDH's basic element definitions and point representations
+
+typedef digit_t oqs_sidh_cln16_felm_t[NWORDS_FIELD];          // Datatype for representing 751-bit field elements (768-bit max.)
+typedef digit_t oqs_sidh_cln16_dfelm_t[2 * NWORDS_FIELD];     // Datatype for representing double-precision 2x751-bit field elements (2x768-bit max.)
+typedef oqs_sidh_cln16_felm_t oqs_sidh_cln16_f2elm_t[2];      // Datatype for representing quadratic extension field elements GF(p751^2)
+typedef oqs_sidh_cln16_f2elm_t oqs_sidh_cln16_publickey_t[3]; // Datatype for representing public keys equivalent to three GF(p751^2) elements
+
+typedef struct {
+	oqs_sidh_cln16_f2elm_t x;
+	oqs_sidh_cln16_f2elm_t y;
+} oqs_sidh_cln16_point_affine; // Point representation in affine coordinates on Montgomery curve.
+typedef oqs_sidh_cln16_point_affine oqs_sidh_cln16_point_t[1];
+
+typedef struct {
+	oqs_sidh_cln16_f2elm_t X;
+	oqs_sidh_cln16_f2elm_t Z;
+} oqs_sidh_cln16_point_proj; // Point representation in projective XZ Montgomery coordinates.
+typedef oqs_sidh_cln16_point_proj oqs_sidh_cln16_point_proj_t[1];
+
+typedef struct {
+	oqs_sidh_cln16_f2elm_t X;
+	oqs_sidh_cln16_f2elm_t Y;
+	oqs_sidh_cln16_f2elm_t Z;
+} oqs_sidh_cln16_point_full_proj; // Point representation in projective XYZ Montgomery coordinates.
+typedef oqs_sidh_cln16_point_full_proj oqs_sidh_cln16_point_full_proj_t[1];
+
+typedef struct {
+	oqs_sidh_cln16_f2elm_t X2;
+	oqs_sidh_cln16_f2elm_t XZ;
+	oqs_sidh_cln16_f2elm_t Z2;
+	oqs_sidh_cln16_f2elm_t YZ;
+} oqs_sidh_cln16_point_ext_proj;
+typedef oqs_sidh_cln16_point_ext_proj oqs_sidh_cln16_point_ext_proj_t[1]; // Point representation in extended projective XYZ Montgomery coordinates.
+
+typedef struct {
+	oqs_sidh_cln16_felm_t x;
+	oqs_sidh_cln16_felm_t y;
+} oqs_sidh_cln16_point_basefield_affine; // Point representation in affine coordinates on Montgomery curve over the base field.
+typedef oqs_sidh_cln16_point_basefield_affine oqs_sidh_cln16_point_basefield_t[1];
+
+typedef struct {
+	oqs_sidh_cln16_felm_t X;
+	oqs_sidh_cln16_felm_t Z;
+} oqs_sidh_cln16_point_basefield_proj; // Point representation in projective XZ Montgomery coordinates over the base field.
+typedef oqs_sidh_cln16_point_basefield_proj oqs_sidh_cln16_point_basefield_proj_t[1];
+
+// Macro definitions
+
+#define NBITS_TO_NBYTES(nbits) (((nbits) + 7) / 8)                                             // Conversion macro from number of bits to number of bytes
+#define NBITS_TO_NWORDS(nbits) (((nbits) + (sizeof(digit_t) * 8) - 1) / (sizeof(digit_t) * 8)) // Conversion macro from number of bits to number of computer words
+#define NBYTES_TO_NWORDS(nbytes) (((nbytes) + sizeof(digit_t) - 1) / sizeof(digit_t))          // Conversion macro from number of bytes to number of computer words
+
+// Macro to avoid compiler warnings when detecting unreferenced parameters
+#define UNREFERENCED_PARAMETER(PAR) (PAR)
+
+/********************** Constant-time unsigned comparisons ***********************/
+
+// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise
+
+static __inline unsigned int is_digit_nonzero_ct(digit_t x) { // Is x != 0?
+	return (unsigned int) ((x | (0 - x)) >> (RADIX - 1));
+}
+
+static __inline unsigned int is_digit_zero_ct(digit_t x) { // Is x = 0?
+	return (unsigned int) (1 ^ is_digit_nonzero_ct(x));
+}
+
+static __inline unsigned int is_digit_lessthan_ct(digit_t x, digit_t y) { // Is x < y?
+	return (unsigned int) ((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX - 1));
+}
+
+/********************** Macros for platform-dependent operations **********************/
+
+#if defined(GENERIC_IMPLEMENTATION)
+// Digit multiplication
+#define MUL(multiplier, multiplicand, hi, lo) \
+	oqs_sidh_cln16_digit_x_digit((multiplier), (multiplicand), &(lo));
+
+// Digit addition with carry
+#define ADDC(carryIn, addend1, addend2, carryOut, sumOut)                                                           \
+	{                                                                                                               \
+		digit_t tempReg = (addend1) + (digit_t)(carryIn);                                                           \
+		(sumOut) = (addend2) + tempReg;                                                                             \
+		(carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); \
+	}
+
+// Digit subtraction with borrow
+#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut)                                                       \
+	{                                                                                                                       \
+		digit_t tempReg = (minuend) - (subtrahend);                                                                         \
+		unsigned int borrowReg = (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn) &is_digit_zero_ct(tempReg))); \
+		(differenceOut) = tempReg - (digit_t)(borrowIn);                                                                    \
+		(borrowOut) = borrowReg;                                                                                            \
+	}
+
+// Shift right with flexible datatype
+#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \
+	(shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift)));
+
+// Shift left with flexible datatype
+#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \
+	(shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (DigitSize - (shift)));
+
+// 64x64-bit multiplication
+#define MUL128(multiplier, multiplicand, product) \
+	oqs_sidh_cln16_mp_mul((digit_t *) &(multiplier), (digit_t *) &(multiplicand), (digit_t *) &(product), NWORDS_FIELD / 2);
+
+// 128-bit addition, inputs < 2^127
+#define ADD128(addend1, addend2, addition) \
+	oqs_sidh_cln16_mp_add((digit_t *) (addend1), (digit_t *) (addend2), (digit_t *) (addition), NWORDS_FIELD);
+
+// 128-bit addition with output carry
+#define ADC128(addend1, addend2, carry, addition) \
+	(carry) = oqs_sidh_cln16_mp_add((digit_t *) (addend1), (digit_t *) (addend2), (digit_t *) (addition), NWORDS_FIELD);
+
+#elif (TARGET == TARGET_AMD64 && OS_TARGET == OS_WIN)
+
+// Digit multiplication
+#define MUL(multiplier, multiplicand, hi, lo) \
+	(lo) = _umul128((multiplier), (multiplicand), (hi));
+
+// Digit addition with carry
+#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \
+	(carryOut) = _addcarry_u64((carryIn), (addend1), (addend2), &(sumOut));
+
+// Digit subtraction with borrow
+#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \
+	(borrowOut) = _subborrow_u64((borrowIn), (minuend), (subtrahend), &(differenceOut));
+
+// Digit shift right
+#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \
+	(shiftOut) = __shiftright128((lowIn), (highIn), (shift));
+
+// Digit shift left
+#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \
+	(shiftOut) = __shiftleft128((lowIn), (highIn), (shift));
+
+// 64x64-bit multiplication
+#define MUL128(multiplier, multiplicand, product) \
+	(product)[0] = _umul128((multiplier), (multiplicand), &(product)[1]);
+
+// 128-bit addition, inputs < 2^127
+#define ADD128(addend1, addend2, addition)                                                  \
+	{                                                                                       \
+		unsigned char carry = _addcarry_u64(0, (addend1)[0], (addend2)[0], &(addition)[0]); \
+		_addcarry_u64(carry, (addend1)[1], (addend2)[1], &(addition)[1]);                   \
+	}
+
+// 128-bit addition with output carry
+#define ADC128(addend1, addend2, carry, addition)                           \
+	(carry) = _addcarry_u64(0, (addend1)[0], (addend2)[0], &(addition)[0]); \
+	(carry) = _addcarry_u64((carry), (addend1)[1], (addend2)[1], &(addition)[1]);
+
+// 128-bit subtraction, subtrahend < 2^127
+#define SUB128(minuend, subtrahend, difference)                                                    \
+	{                                                                                              \
+		unsigned char borrow = _subborrow_u64(0, (minuend)[0], (subtrahend)[0], &(difference)[0]); \
+		_subborrow_u64(borrow, (minuend)[1], (subtrahend)[1], &(difference)[1]);                   \
+	}
+
+// 128-bit right shift, max. shift value is 64
+#define SHIFTR128(Input, shift, shiftOut)                             \
+	(shiftOut)[0] = __shiftright128((Input)[0], (Input)[1], (shift)); \
+	(shiftOut)[1] = (Input)[1] >> (shift);
+
+// 128-bit left shift, max. shift value is 64
+#define SHIFTL128(Input, shift, shiftOut)                            \
+	(shiftOut)[1] = __shiftleft128((Input)[0], (Input)[1], (shift)); \
+	(shiftOut)[0] = (Input)[0] << (shift);
+
+#define MULADD128(multiplier, multiplicand, addend, carry, result) \
+	;                                                              \
+	{                                                              \
+		uint128_t product;                                         \
+		MUL128(multiplier, multiplicand, product);                 \
+		ADC128(addend, product, carry, result);                    \
+	}
+
+#elif ((TARGET == TARGET_AMD64 || TARGET == TARGET_ARM64) && OS_TARGET == OS_LINUX)
+
+// Digit multiplication
+#define MUL(multiplier, multiplicand, hi, lo)                                    \
+	{                                                                            \
+		uint128_t tempReg = (uint128_t)(multiplier) * (uint128_t)(multiplicand); \
+		*(hi) = (digit_t)(tempReg >> RADIX);                                     \
+		(lo) = (digit_t) tempReg;                                                \
+	}
+
+// Digit addition with carry
+#define ADDC(carryIn, addend1, addend2, carryOut, sumOut)                                       \
+	{                                                                                           \
+		uint128_t tempReg = (uint128_t)(addend1) + (uint128_t)(addend2) + (uint128_t)(carryIn); \
+		(carryOut) = (digit_t)(tempReg >> RADIX);                                               \
+		(sumOut) = (digit_t) tempReg;                                                           \
+	}
+
+// Digit subtraction with borrow
+#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut)                               \
+	{                                                                                               \
+		uint128_t tempReg = (uint128_t)(minuend) - (uint128_t)(subtrahend) - (uint128_t)(borrowIn); \
+		(borrowOut) = (digit_t)(tempReg >> (sizeof(uint128_t) * 8 - 1));                            \
+		(differenceOut) = (digit_t) tempReg;                                                        \
+	}
+
+// Digit shift right
+#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \
+	(shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (RADIX - (shift)));
+
+// Digit shift left
+#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \
+	(shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (RADIX - (shift)));
+
+#endif
+
+// Multiprecision multiplication selection
+#if defined(GENERIC_IMPLEMENTATION) && (TARGET == TARGET_AMD64)
+#define oqs_sidh_cln16_mp_mul_comba oqs_sidh_cln16_mp_mul
+#else
+#define oqs_sidh_cln16_mp_mul_schoolbook oqs_sidh_cln16_mp_mul
+#endif
+
+/**************** Function prototypes ****************/
+/************* Multiprecision functions **************/
+
+// Copy wordsize digits, c = a, where lng(a) = nwords
+void oqs_sidh_cln16_copy_words(const digit_t *a, digit_t *c, const unsigned int nwords);
+
+// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit
+unsigned int oqs_sidh_cln16_mp_add(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
+
+// 751-bit multiprecision addition, c = a+b
+void oqs_sidh_cln16_mp_add751(const digit_t *a, const digit_t *b, digit_t *c);
+void oqs_sidh_cln16_mp_add751_asm(const digit_t *a, const digit_t *b, digit_t *c);
+
+// 2x751-bit multiprecision addition, c = a+b
+void oqs_sidh_cln16_mp_add751x2(const digit_t *a, const digit_t *b, digit_t *c);
+void oqs_sidh_cln16_mp_add751x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
+
+// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit
+unsigned int oqs_sidh_cln16_mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
+
+// Multiprecision right shift by one
+void oqs_sidh_cln16_mp_shiftr1(digit_t *x, const unsigned int nwords);
+
+// Multiprecision left right shift by one
+void oqs_sidh_cln16_mp_shiftl1(digit_t *x, const unsigned int nwords);
+
+// Digit multiplication, digit * digit -> 2-digit result
+void oqs_sidh_cln16_digit_x_digit(const digit_t a, const digit_t b, digit_t *c);
+
+// Multiprecision schoolbook multiply, c = a*b, where lng(a) = lng(b) = nwords.
+void oqs_sidh_cln16_mp_mul_schoolbook(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
+
+// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords.
+void oqs_sidh_cln16_mp_mul_comba(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
+
+void oqs_sidh_cln16_multiply(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
+
+// Montgomery multiplication modulo the group order, mc = ma*mb*r' mod order, where ma,mb,mc in [0, order-1]
+void oqs_sidh_cln16_Montgomery_multiply_mod_order(const digit_t *ma, const digit_t *mb, digit_t *mc, const digit_t *order, const digit_t *Montgomery_rprime);
+
+// (Non-constant time) Montgomery inversion modulo the curve order using a^(-1) = a^(order-2) mod order
+void oqs_sidh_cln16_Montgomery_inversion_mod_order(const digit_t *ma, digit_t *mc, const digit_t *order, const digit_t *Montgomery_rprime);
+
+void oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd(const digit_t *a, digit_t *c, const digit_t *order, const digit_t *Montgomery_rprime, const digit_t *Montgomery_R2);
+
+// Conversion of elements in Z_r to Montgomery representation, where the order r is up to 384 bits.
+void oqs_sidh_cln16_to_Montgomery_mod_order(const digit_t *a, digit_t *mc, const digit_t *order, const digit_t *Montgomery_rprime, const digit_t *Montgomery_Rprime);
+
+// Conversion of elements in Z_r from Montgomery to standard representation, where the order is up to 384 bits.
+void oqs_sidh_cln16_from_Montgomery_mod_order(const digit_t *ma, digit_t *c, const digit_t *order, const digit_t *Montgomery_rprime);
+
+// Inversion modulo Alice's order 2^372.
+void oqs_sidh_cln16_inv_mod_orderA(const digit_t *a, digit_t *c);
+
+/************ Field arithmetic functions *************/
+
+// Copy of a field element, c = a
+void oqs_sidh_cln16_fpcopy751(const oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t c);
+
+// Zeroing a field element, a = 0
+void oqs_sidh_cln16_fpzero751(oqs_sidh_cln16_felm_t a);
+
+// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE
+bool oqs_sidh_cln16_fpequal751_non_constant_time(const oqs_sidh_cln16_felm_t a, const oqs_sidh_cln16_felm_t b);
+
+// Modular addition, c = a+b mod p751
+extern void oqs_sidh_cln16_fpadd751(const digit_t *a, const digit_t *b, digit_t *c);
+extern void oqs_sidh_cln16_fpadd751_asm(const digit_t *a, const digit_t *b, digit_t *c);
+
+// Modular subtraction, c = a-b mod p751
+extern void oqs_sidh_cln16_fpsub751(const digit_t *a, const digit_t *b, digit_t *c);
+extern void oqs_sidh_cln16_fpsub751_asm(const digit_t *a, const digit_t *b, digit_t *c);
+
+// Modular negation, a = -a mod p751
+extern void oqs_sidh_cln16_fpneg751(digit_t *a);
+
+// Modular division by two, c = a/2 mod p751.
+void oqs_sidh_cln16_fpdiv2_751(const digit_t *a, digit_t *c);
+
+// Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1].
+void oqs_sidh_cln16_fpcorrection751(digit_t *a);
+
+// 751-bit Montgomery reduction, c = a mod p
+void oqs_sidh_cln16_rdc_mont(const digit_t *a, digit_t *c);
+
+// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p751, where R=2^768
+void oqs_sidh_cln16_fpmul751_mont(const oqs_sidh_cln16_felm_t a, const oqs_sidh_cln16_felm_t b, oqs_sidh_cln16_felm_t c);
+void oqs_sidh_cln16_mul751_asm(const oqs_sidh_cln16_felm_t a, const oqs_sidh_cln16_felm_t b, oqs_sidh_cln16_dfelm_t c);
+void oqs_sidh_cln16_rdc751_asm(const oqs_sidh_cln16_dfelm_t ma, oqs_sidh_cln16_dfelm_t mc);
+
+// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p751, where R=2^768
+void oqs_sidh_cln16_fpsqr751_mont(const oqs_sidh_cln16_felm_t ma, oqs_sidh_cln16_felm_t mc);
+
+// Conversion to Montgomery representation
+void oqs_sidh_cln16_to_mont(const oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t mc);
+
+// Conversion from Montgomery representation to standard representation
+void oqs_sidh_cln16_from_mont(const oqs_sidh_cln16_felm_t ma, oqs_sidh_cln16_felm_t c);
+
+// Field inversion, a = a^-1 in GF(p751)
+void oqs_sidh_cln16_fpinv751_mont(oqs_sidh_cln16_felm_t a);
+
+// Field inversion, a = a^-1 in GF(p751) using the binary GCD
+void oqs_sidh_cln16_fpinv751_mont_bingcd(oqs_sidh_cln16_felm_t a);
+
+// Chain to compute (p751-3)/4 using Montgomery arithmetic
+void oqs_sidh_cln16_fpinv751_chain_mont(oqs_sidh_cln16_felm_t a);
+
+/************ GF(p^2) arithmetic functions *************/
+
+// Copy of a GF(p751^2) element, c = a
+void oqs_sidh_cln16_fp2copy751(const oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c);
+
+// Zeroing a GF(p751^2) element, a = 0
+void oqs_sidh_cln16_fp2zero751(oqs_sidh_cln16_f2elm_t a);
+
+// GF(p751^2) negation, a = -a in GF(p751^2)
+void oqs_sidh_cln16_fp2neg751(oqs_sidh_cln16_f2elm_t a);
+
+// GF(p751^2) addition, c = a+b in GF(p751^2)
+extern void oqs_sidh_cln16_fp2add751(const oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c);
+
+// GF(p751^2) subtraction, c = a-b in GF(p751^2)
+extern void oqs_sidh_cln16_fp2sub751(const oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c);
+
+// GF(p751^2) division by two, c = a/2  in GF(p751^2)
+void oqs_sidh_cln16_fp2div2_751(const oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c);
+
+// Modular correction, a = a in GF(p751^2)
+void oqs_sidh_cln16_fp2correction751(oqs_sidh_cln16_f2elm_t a);
+
+// GF(p751^2) squaring using Montgomery arithmetic, c = a^2 in GF(p751^2)
+void oqs_sidh_cln16_fp2sqr751_mont(const oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c);
+
+// GF(p751^2) multiplication using Montgomery arithmetic, c = a*b in GF(p751^2)
+void oqs_sidh_cln16_fp2mul751_mont(const oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c);
+
+// Conversion of a GF(p751^2) element to Montgomery representation
+void oqs_sidh_cln16_to_fp2mont(const oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t mc);
+
+// Conversion of a GF(p751^2) element from Montgomery representation to standard representation
+void oqs_sidh_cln16_from_fp2mont(const oqs_sidh_cln16_f2elm_t ma, oqs_sidh_cln16_f2elm_t c);
+
+// GF(p751^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
+void oqs_sidh_cln16_fp2inv751_mont(oqs_sidh_cln16_f2elm_t a);
+
+// GF(p751^2) inversion, a = (a0-i*a1)/(a0^2+a1^2), GF(p751) inversion done using the binary GCD
+void oqs_sidh_cln16_fp2inv751_mont_bingcd(oqs_sidh_cln16_f2elm_t a);
+
+// n-way Montgomery inversion
+void oqs_sidh_cln16_mont_n_way_inv(const oqs_sidh_cln16_f2elm_t *vec, const int n, oqs_sidh_cln16_f2elm_t *out);
+
+// Select either x or y depending on value of option
+void oqs_sidh_cln16_select_f2elm(const oqs_sidh_cln16_f2elm_t x, const oqs_sidh_cln16_f2elm_t y, oqs_sidh_cln16_f2elm_t z, const digit_t option);
+
+// Computes square roots of elements in (Fp2)^2 using Hamburg's trick.
+void oqs_sidh_cln16_sqrt_Fp2(const oqs_sidh_cln16_f2elm_t u, oqs_sidh_cln16_f2elm_t y);
+
+// Computes square roots of elements in (Fp2)^2 using Hamburg's trick
+void oqs_sidh_cln16_sqrt_Fp2_frac(const oqs_sidh_cln16_f2elm_t u, const oqs_sidh_cln16_f2elm_t v, oqs_sidh_cln16_f2elm_t y);
+
+// Cyclotomic cubing on elements of norm 1, using a^(p+1) = 1
+void oqs_sidh_cln16_cube_Fp2_cycl(oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_felm_t one);
+
+// Cyclotomic squaring on elements of norm 1, using a^(p+1) = 1
+void oqs_sidh_cln16_sqr_Fp2_cycl(oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_felm_t one);
+
+// Cyclotomic inversion, a^(p+1) = 1 => a^(-1) = a^p = a0 - i*a1
+extern void oqs_sidh_cln16_inv_Fp2_cycl(oqs_sidh_cln16_f2elm_t a);
+
+// Check if GF(p751^2) element is cube
+bool oqs_sidh_cln16_is_cube_Fp2(oqs_sidh_cln16_f2elm_t u, PCurveIsogenyStruct CurveIsogeny);
+
+// Exponentiation y^t via square and multiply in the cyclotomic group. Exponent t is 6 bits at most
+void oqs_sidh_cln16_exp6_Fp2_cycl(const oqs_sidh_cln16_f2elm_t y, const uint64_t t, const oqs_sidh_cln16_felm_t one, oqs_sidh_cln16_f2elm_t res);
+
+// Exponentiation y^t via square and multiply in the cyclotomic group. Exponent t is 21 bits at most
+void oqs_sidh_cln16_exp21_Fp2_cycl(const oqs_sidh_cln16_f2elm_t y, const uint64_t t, const oqs_sidh_cln16_felm_t one, oqs_sidh_cln16_f2elm_t res);
+
+// Exponentiation y^t via square and multiply in the cyclotomic group. Exponent t is 84 bits at most
+void oqs_sidh_cln16_exp84_Fp2_cycl(const oqs_sidh_cln16_f2elm_t y, uint64_t *t, const oqs_sidh_cln16_felm_t one, oqs_sidh_cln16_f2elm_t res);
+
+// Exponentiation y^t via square and multiply in the cyclotomic group. Exponent t is length bits.
+void oqs_sidh_cln16_exp_Fp2_cycl(const oqs_sidh_cln16_f2elm_t y, uint64_t *t, const oqs_sidh_cln16_felm_t one, oqs_sidh_cln16_f2elm_t res, int length);
+
+/************ Elliptic curve and isogeny functions *************/
+
+// Check if curve isogeny structure is NULL
+bool oqs_sidh_cln16_is_CurveIsogenyStruct_null(PCurveIsogenyStruct pCurveIsogeny);
+
+// Swap points over the base field
+void oqs_sidh_cln16_swap_points_basefield(oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, const digit_t option);
+
+// Swap points
+void oqs_sidh_cln16_swap_points(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const digit_t option);
+
+// Computes the j-invariant of a Montgomery curve with projective constant.
+void oqs_sidh_cln16_j_inv(const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_f2elm_t C, oqs_sidh_cln16_f2elm_t jinv);
+
+// Simultaneous doubling and differential addition.
+void oqs_sidh_cln16_xDBLADD(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t xPQ, const oqs_sidh_cln16_f2elm_t A24);
+
+// Doubling of a Montgomery point in projective coordinates (X:Z).
+void oqs_sidh_cln16_xDBL(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A24, const oqs_sidh_cln16_f2elm_t C24);
+
+// Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings.
+void oqs_sidh_cln16_xDBLe(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_f2elm_t C, const int e);
+
+// Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings and collects a few intermediate multiples.
+void oqs_sidh_cln16_xDBLe_collect(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, unsigned int left_bound, const unsigned int right_bound, const unsigned int *col, oqs_sidh_cln16_point_proj_t *pts, unsigned int *pts_index, unsigned int *npts);
+
+// Differential addition.
+void oqs_sidh_cln16_xADD(oqs_sidh_cln16_point_proj_t P, const oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t xPQ);
+
+// Doubling of a Montgomery point in projective coordinates (X:Z) over the base field.
+void oqs_sidh_cln16_xDBL_basefield(const oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q);
+
+// Simultaneous doubling and differential addition over the base field.
+void oqs_sidh_cln16_xDBLADD_basefield(oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, const oqs_sidh_cln16_felm_t xPQ, const oqs_sidh_cln16_felm_t A24);
+
+// The Montgomery ladder
+void oqs_sidh_cln16_ladder(const oqs_sidh_cln16_felm_t x, digit_t *m, oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, const oqs_sidh_cln16_felm_t A24, const unsigned int order_bits, const unsigned int order_fullbits, PCurveIsogenyStruct CurveIsogeny);
+
+// Computes key generation entirely in the base field
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_secret_pt(const oqs_sidh_cln16_point_basefield_t P, const digit_t *m, const unsigned int AliceOrBob, oqs_sidh_cln16_point_proj_t R, PCurveIsogenyStruct CurveIsogeny);
+
+// Computes P+[m]Q via x-only arithmetic.
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_ladder_3_pt(const oqs_sidh_cln16_f2elm_t xP, const oqs_sidh_cln16_f2elm_t xQ, const oqs_sidh_cln16_f2elm_t xPQ, const digit_t *m, const unsigned int AliceOrBob, oqs_sidh_cln16_point_proj_t W, const oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny);
+
+// Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4.
+void oqs_sidh_cln16_get_4_isog(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, oqs_sidh_cln16_f2elm_t *coeff);
+
+// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny
+void oqs_sidh_cln16_eval_4_isog(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t *coeff);
+
+// Computes first 4-isogeny computed by Alice.
+void oqs_sidh_cln16_first_4_isog(oqs_sidh_cln16_point_proj_t P, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t Aout, oqs_sidh_cln16_f2elm_t Cout, PCurveIsogenyStruct CurveIsogeny);
+
+// Tripling of a Montgomery point in projective coordinates (X:Z).
+void oqs_sidh_cln16_xTPL(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A24, const oqs_sidh_cln16_f2elm_t C24);
+
+// Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings.
+void oqs_sidh_cln16_xTPLe(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_f2elm_t C, const int e);
+
+// Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings and collects a few intermediate multiples.
+void oqs_sidh_cln16_xTPLe_collect(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, unsigned int left_bound, const unsigned int right_bound, const unsigned int *col, oqs_sidh_cln16_point_proj_t *pts, unsigned int *pts_index, unsigned int *npts);
+
+// Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3.
+void oqs_sidh_cln16_get_3_isog(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C);
+
+// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and a point P = (X:Z).
+void oqs_sidh_cln16_eval_3_isog(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q);
+
+// 3-way simultaneous inversion
+void oqs_sidh_cln16_inv_3_way(oqs_sidh_cln16_f2elm_t z1, oqs_sidh_cln16_f2elm_t z2, oqs_sidh_cln16_f2elm_t z3);
+
+// Computing the point D = (x(Q-P),z(Q-P))
+void oqs_sidh_cln16_distort_and_diff(const oqs_sidh_cln16_felm_t xP, oqs_sidh_cln16_point_proj_t d, PCurveIsogenyStruct CurveIsogeny);
+
+// Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
+void oqs_sidh_cln16_get_A(const oqs_sidh_cln16_f2elm_t xP, const oqs_sidh_cln16_f2elm_t xQ, const oqs_sidh_cln16_f2elm_t xR, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny);
+
+/************ Functions for compression *************/
+
+// Produces points R1 and R2 as basis for E[2^372]
+void oqs_sidh_cln16_generate_2_torsion_basis(const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_point_full_proj_t R1, oqs_sidh_cln16_point_full_proj_t R2, PCurveIsogenyStruct CurveIsogeny);
+
+// Produces points R1 and R2 as basis for E[3^239]
+void oqs_sidh_cln16_generate_3_torsion_basis(oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_point_full_proj_t R1, oqs_sidh_cln16_point_full_proj_t R2, PCurveIsogenyStruct CurveIsogeny);
+
+// 2-torsion Tate pairing
+void oqs_sidh_cln16_Tate_pairings_2_torsion(const oqs_sidh_cln16_point_t R1, const oqs_sidh_cln16_point_t R2, const oqs_sidh_cln16_point_t P, const oqs_sidh_cln16_point_t Q, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t *n, PCurveIsogenyStruct CurveIsogeny);
+
+// 3-torsion Tate pairing
+void oqs_sidh_cln16_Tate_pairings_3_torsion(const oqs_sidh_cln16_point_t R1, const oqs_sidh_cln16_point_t R2, const oqs_sidh_cln16_point_t P, const oqs_sidh_cln16_point_t Q, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t *n, PCurveIsogenyStruct CurveIsogeny);
+
+// The Montgomery ladder, running in non constant-time
+void oqs_sidh_cln16_Mont_ladder(const oqs_sidh_cln16_f2elm_t x, const digit_t *m, oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A24, const unsigned int order_bits, const unsigned int order_fullbits, PCurveIsogenyStruct CurveIsogeny);
+
+// General addition
+void oqs_sidh_cln16_ADD(const oqs_sidh_cln16_point_full_proj_t P, const oqs_sidh_cln16_f2elm_t QX, const oqs_sidh_cln16_f2elm_t QY, const oqs_sidh_cln16_f2elm_t QZ, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_point_full_proj_t R);
+
+// 2-torsion Pohlig-Hellman function
+void oqs_sidh_cln16_ph2(const oqs_sidh_cln16_point_t phiP, const oqs_sidh_cln16_point_t phiQ, const oqs_sidh_cln16_point_t PS, const oqs_sidh_cln16_point_t QS, const oqs_sidh_cln16_f2elm_t A, uint64_t *a0, uint64_t *b0, uint64_t *a1, uint64_t *b1, PCurveIsogenyStruct CurveIsogeny);
+
+// Lookup table generation for 2-torsion PH
+void oqs_sidh_cln16_build_LUTs(const oqs_sidh_cln16_f2elm_t u, oqs_sidh_cln16_f2elm_t *t_ori, oqs_sidh_cln16_f2elm_t *LUT, oqs_sidh_cln16_f2elm_t *LUT_0, oqs_sidh_cln16_f2elm_t *LUT_1, oqs_sidh_cln16_f2elm_t *LUT_3, const oqs_sidh_cln16_felm_t one);
+
+// Pohlig-Hellman for groups of 2-power order up to 2^6
+void oqs_sidh_cln16_phn1(const oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const uint64_t a, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_i);
+
+// Pohlig-Hellman for groups of 2-power order 2^21
+void oqs_sidh_cln16_phn5(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k);
+
+// Pohlig-Hellman for groups of 2-power order 2^84
+void oqs_sidh_cln16_phn21(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k);
+
+// Pohlig-Hellman for groups of 2-power order 2^372
+void oqs_sidh_cln16_phn84(oqs_sidh_cln16_f2elm_t r, const oqs_sidh_cln16_f2elm_t *t_ori, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_f2elm_t *LUT_3, const oqs_sidh_cln16_felm_t one, uint64_t *alpha);
+
+// 3-torsion Pohlig-Hellman function
+void oqs_sidh_cln16_ph3(oqs_sidh_cln16_point_t phiP, oqs_sidh_cln16_point_t phiQ, oqs_sidh_cln16_point_t PS, oqs_sidh_cln16_point_t QS, oqs_sidh_cln16_f2elm_t A, uint64_t *a0, uint64_t *b0, uint64_t *a1, uint64_t *b1, PCurveIsogenyStruct CurveIsogeny);
+
+// Lookup table generation for 3-torsion PH
+void oqs_sidh_cln16_build_LUTs_3(oqs_sidh_cln16_f2elm_t g, oqs_sidh_cln16_f2elm_t *t_ori, oqs_sidh_cln16_f2elm_t *LUT, oqs_sidh_cln16_f2elm_t *LUT_0, oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one);
+
+// Pohlig-Hellman for groups of 3-power order up to 3^2 or 3^3
+void oqs_sidh_cln16_phn1_3(const oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const uint64_t a, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_i);
+
+// Pohlig-Hellman for groups of 3-power order up 3^15
+void oqs_sidh_cln16_phn3(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k);
+
+// Pohlig-Hellman for groups of 3-power order up 3^56
+void oqs_sidh_cln16_phn15_1(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k);
+
+// Pohlig-Hellman for groups of 3-power order up 3^61
+void oqs_sidh_cln16_phn15(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k);
+
+// Pohlig-Hellman for groups of 3-power order up 3^239
+void oqs_sidh_cln16_phn61(oqs_sidh_cln16_f2elm_t r, oqs_sidh_cln16_f2elm_t *t_ori, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha);
+
+// Recover the y-coordinates of the public key
+void oqs_sidh_cln16_recover_y(const oqs_sidh_cln16_publickey_t PK, oqs_sidh_cln16_point_full_proj_t phiP, oqs_sidh_cln16_point_full_proj_t phiQ, oqs_sidh_cln16_point_full_proj_t phiX, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny);
+
+// Computes the input modulo 3. The input is assumed to be SIDH_NWORDS_ORDER long
+unsigned int oqs_sidh_cln16_mod3(digit_t *a);
+
+// Computes R+aS
+void oqs_sidh_cln16_mont_twodim_scalarmult(digit_t *a, const oqs_sidh_cln16_point_t R, const oqs_sidh_cln16_point_t S, const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_f2elm_t A24, oqs_sidh_cln16_point_full_proj_t P, PCurveIsogenyStruct CurveIsogeny);
+
+void oqs_sidh_cln16_compress_2_torsion(const unsigned char *PublicKeyA, unsigned char *CompressedPKA, uint64_t *a0, uint64_t *b0, uint64_t *a1, uint64_t *b1, oqs_sidh_cln16_point_t R1, oqs_sidh_cln16_point_t R2, PCurveIsogenyStruct CurveIsogeny);
+void oqs_sidh_cln16_compress_3_torsion(const unsigned char *PublicKeyA, unsigned char *CompressedPKA, uint64_t *a0, uint64_t *b0, uint64_t *a1, uint64_t *b1, oqs_sidh_cln16_point_t R1, oqs_sidh_cln16_point_t R2, PCurveIsogenyStruct CurveIsogeny);
+void oqs_sidh_cln16_decompress_2_torsion(const unsigned char *SecretKey, const unsigned char *CompressedPKA, oqs_sidh_cln16_point_proj_t R, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny);
+void oqs_sidh_cln16_decompress_3_torsion(const unsigned char *SecretKey, const unsigned char *CompressedPKA, oqs_sidh_cln16_point_proj_t R, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/crypt/liboqs/kex_sidh_cln16/SIDH_setup.c b/crypt/liboqs/kex_sidh_cln16/SIDH_setup.c
new file mode 100644
index 0000000000000000000000000000000000000000..1a61e4dde893bfd6b31cd2738cecc7ac66b9d0b5
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/SIDH_setup.c
@@ -0,0 +1,211 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral
+*       Diffie-Hellman key exchange.
+*
+*    Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*
+* Abstract: functions for initialization and getting randomness
+*
+*********************************************************************************************/
+
+#include "SIDH_internal.h"
+#include <stdlib.h>
+
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_curve_initialize(PCurveIsogenyStruct pCurveIsogeny, PCurveIsogenyStaticData pCurveIsogenyData) { // Initialize curve isogeny structure pCurveIsogeny with static data extracted from pCurveIsogenyData.
+	                                                                                                                               // This needs to be called after allocating memory for "pCurveIsogeny" using SIDH_curve_allocate().
+	unsigned int i, pwords, owords;
+
+	if (oqs_sidh_cln16_is_CurveIsogenyStruct_null(pCurveIsogeny)) {
+		return SIDH_CRYPTO_ERROR_INVALID_PARAMETER;
+	}
+
+	for (i = 0; i < 8; i++) { // Copy 8-character identifier
+		pCurveIsogeny->CurveIsogeny[i] = pCurveIsogenyData->CurveIsogeny[i];
+	}
+	pCurveIsogeny->pwordbits = pCurveIsogenyData->pwordbits;
+	pCurveIsogeny->owordbits = pCurveIsogenyData->owordbits;
+	pCurveIsogeny->pbits = pCurveIsogenyData->pbits;
+	pCurveIsogeny->oAbits = pCurveIsogenyData->oAbits;
+	pCurveIsogeny->oBbits = pCurveIsogenyData->oBbits;
+	pCurveIsogeny->eB = pCurveIsogenyData->eB;
+	pCurveIsogeny->BigMont_A24 = pCurveIsogenyData->BigMont_A24;
+
+	pwords = (pCurveIsogeny->pwordbits + RADIX - 1) / RADIX;
+	owords = (pCurveIsogeny->owordbits + RADIX - 1) / RADIX;
+	oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->prime, pCurveIsogeny->prime, pwords);
+	oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->A, pCurveIsogeny->A, pwords);
+	oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->C, pCurveIsogeny->C, pwords);
+	oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->Aorder, pCurveIsogeny->Aorder, owords);
+	oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->Border, pCurveIsogeny->Border, owords);
+	oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->PA, pCurveIsogeny->PA, 2 * pwords);
+	oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->PB, pCurveIsogeny->PB, 2 * pwords);
+	oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->BigMont_order, pCurveIsogeny->BigMont_order, pwords);
+	oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->Montgomery_R2, pCurveIsogeny->Montgomery_R2, pwords);
+	oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->Montgomery_pp, pCurveIsogeny->Montgomery_pp, pwords);
+	oqs_sidh_cln16_copy_words((digit_t *) pCurveIsogenyData->Montgomery_one, pCurveIsogeny->Montgomery_one, pwords);
+
+	return SIDH_CRYPTO_SUCCESS;
+}
+
+PCurveIsogenyStruct oqs_sidh_cln16_curve_allocate(PCurveIsogenyStaticData CurveData) { // Dynamic allocation of memory for curve isogeny structure.
+	                                                                                   // Returns NULL on error.
+	digit_t pbytes = (CurveData->pwordbits + 7) / 8;
+	digit_t obytes = (CurveData->owordbits + 7) / 8;
+	PCurveIsogenyStruct pCurveIsogeny = NULL;
+
+	pCurveIsogeny = (PCurveIsogenyStruct) calloc(1, sizeof(CurveIsogenyStruct));
+	pCurveIsogeny->prime = (digit_t *) calloc(1, pbytes);
+	pCurveIsogeny->A = (digit_t *) calloc(1, pbytes);
+	pCurveIsogeny->C = (digit_t *) calloc(1, pbytes);
+	pCurveIsogeny->Aorder = (digit_t *) calloc(1, obytes);
+	pCurveIsogeny->Border = (digit_t *) calloc(1, obytes);
+	pCurveIsogeny->PA = (digit_t *) calloc(1, 2 * pbytes);
+	pCurveIsogeny->PB = (digit_t *) calloc(1, 2 * pbytes);
+	pCurveIsogeny->BigMont_order = (digit_t *) calloc(1, pbytes);
+	pCurveIsogeny->Montgomery_R2 = (digit_t *) calloc(1, pbytes);
+	pCurveIsogeny->Montgomery_pp = (digit_t *) calloc(1, pbytes);
+	pCurveIsogeny->Montgomery_one = (digit_t *) calloc(1, pbytes);
+
+	return pCurveIsogeny;
+}
+
+void oqs_sidh_cln16_curve_free(PCurveIsogenyStruct pCurveIsogeny) { // Free memory for curve isogeny structure
+
+	if (pCurveIsogeny != NULL) {
+		if (pCurveIsogeny->prime != NULL)
+			free(pCurveIsogeny->prime);
+		if (pCurveIsogeny->A != NULL)
+			free(pCurveIsogeny->A);
+		if (pCurveIsogeny->C != NULL)
+			free(pCurveIsogeny->C);
+		if (pCurveIsogeny->Aorder != NULL)
+			free(pCurveIsogeny->Aorder);
+		if (pCurveIsogeny->Border != NULL)
+			free(pCurveIsogeny->Border);
+		if (pCurveIsogeny->PA != NULL)
+			free(pCurveIsogeny->PA);
+		if (pCurveIsogeny->PB != NULL)
+			free(pCurveIsogeny->PB);
+		if (pCurveIsogeny->BigMont_order != NULL)
+			free(pCurveIsogeny->BigMont_order);
+		if (pCurveIsogeny->Montgomery_R2 != NULL)
+			free(pCurveIsogeny->Montgomery_R2);
+		if (pCurveIsogeny->Montgomery_pp != NULL)
+			free(pCurveIsogeny->Montgomery_pp);
+		if (pCurveIsogeny->Montgomery_one != NULL)
+			free(pCurveIsogeny->Montgomery_one);
+
+		free(pCurveIsogeny);
+	}
+}
+
+bool oqs_sidh_cln16_is_CurveIsogenyStruct_null(PCurveIsogenyStruct pCurveIsogeny) { // Check if curve isogeny structure is NULL
+
+	if (pCurveIsogeny == NULL || pCurveIsogeny->prime == NULL || pCurveIsogeny->A == NULL || pCurveIsogeny->C == NULL || pCurveIsogeny->Aorder == NULL || pCurveIsogeny->Border == NULL ||
+	    pCurveIsogeny->PA == NULL || pCurveIsogeny->PB == NULL || pCurveIsogeny->BigMont_order == NULL || pCurveIsogeny->Montgomery_R2 == NULL || pCurveIsogeny->Montgomery_pp == NULL ||
+	    pCurveIsogeny->Montgomery_one == NULL) {
+		return true;
+	}
+	return false;
+}
+
+const uint64_t Border_div3[SIDH_NWORDS_ORDER] = {0xEDCD718A828384F9, 0x733B35BFD4427A14, 0xF88229CF94D7CF38, 0x63C56C990C7C2AD6, 0xB858A87E8F4222C7, 0x254C9C6B525EAF5};
+
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_random_mod_order(digit_t *random_digits, unsigned int AliceOrBob, PCurveIsogenyStruct pCurveIsogeny, OQS_RAND *rand) { // Output random values in the range [1, order-1] in little endian format that can be used as private keys.
+	                                                                                                                                                     // It makes requests of random values with length "oAbits" (when AliceOrBob = 0) or "oBbits" (when AliceOrBob = 1) to the "random_bytes" function.
+	                                                                                                                                                     // The process repeats until random value is in [0, Aorder-2]  ([0, Border-2], resp.).
+	                                                                                                                                                     // If successful, the output is given in "random_digits" in the range [1, Aorder-1] ([1, Border-1], resp.).
+	unsigned int ntry = 0, nbytes, nwords;
+	digit_t t1[SIDH_MAXWORDS_ORDER] = {0}, order2[SIDH_MAXWORDS_ORDER] = {0};
+	unsigned char mask;
+	SIDH_CRYPTO_STATUS Status = SIDH_CRYPTO_SUCCESS;
+
+	if (random_digits == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(pCurveIsogeny) || AliceOrBob > 1) {
+		return SIDH_CRYPTO_ERROR_INVALID_PARAMETER;
+	}
+
+	oqs_sidh_cln16_clear_words((void *) random_digits, SIDH_MAXWORDS_ORDER);
+	t1[0] = 2;
+	if (AliceOrBob == SIDH_ALICE) {
+		nbytes = (pCurveIsogeny->oAbits + 7) / 8; // Number of random bytes to be requested
+		nwords = NBITS_TO_NWORDS(pCurveIsogeny->oAbits);
+		mask = 0x07; // Value for masking last random byte
+		oqs_sidh_cln16_copy_words(pCurveIsogeny->Aorder, order2, nwords);
+		oqs_sidh_cln16_mp_shiftr1(order2, nwords);         // order/2
+		oqs_sidh_cln16_mp_sub(order2, t1, order2, nwords); // order2 = order/2-2
+	} else {
+		nbytes = (pCurveIsogeny->oBbits + 7) / 8;
+		nwords = NBITS_TO_NWORDS(pCurveIsogeny->oBbits);
+		mask = 0x03;                                                        // Value for masking last random byte
+		oqs_sidh_cln16_mp_sub((digit_t *) Border_div3, t1, order2, nwords); // order2 = order/3-2
+	}
+
+	do {
+		ntry++;
+		if (ntry > 100) { // Max. 100 iterations to obtain random value in [0, order-2]
+			return SIDH_CRYPTO_ERROR_TOO_MANY_ITERATIONS;
+		}
+		rand->rand_n(rand, (uint8_t *) random_digits, nbytes);
+		((unsigned char *) random_digits)[nbytes - 1] &= mask; // Masking last byte
+	} while (oqs_sidh_cln16_mp_sub(order2, random_digits, t1, nwords) == 1);
+
+	oqs_sidh_cln16_clear_words((void *) t1, SIDH_MAXWORDS_ORDER);
+	t1[0] = 1;
+	oqs_sidh_cln16_mp_add(random_digits, t1, random_digits, nwords);
+	oqs_sidh_cln16_copy_words(random_digits, t1, nwords);
+	oqs_sidh_cln16_mp_shiftl1(random_digits, nwords); // Alice's output in the range [2, order-2]
+	if (AliceOrBob == SIDH_BOB) {
+		oqs_sidh_cln16_mp_add(random_digits, t1, random_digits, nwords); // Bob's output in the range [3, order-3]
+	}
+
+	return Status;
+}
+
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_random_BigMont_mod_order(digit_t *random_digits, PCurveIsogenyStruct pCurveIsogeny, OQS_RAND *rand) { // Output random values in the range [1, BigMont_order-1] in little endian format that can be used as private keys to compute scalar multiplications
+	                                                                                                                                    // using the elliptic curve BigMont.
+	                                                                                                                                    // It makes requests of random values with length "BIGMONT_SIDH_SIDH_NBITS_ORDER" to the "random_bytes" function.
+	                                                                                                                                    // The process repeats until random value is in [0, BigMont_order-2]
+	                                                                                                                                    // If successful, the output is given in "random_digits" in the range [1, BigMont_order-1].
+	                                                                                                                                    // The "random_bytes" function, which is passed through the curve isogeny structure PCurveIsogeny, should be set up in advance using SIDH_curve_initialize().
+	                                                                                                                                    // The caller is responsible of providing the "random_bytes" function passing random values as octets.
+	unsigned int ntry = 0, nbytes = (BIGMONT_SIDH_SIDH_NBITS_ORDER + 7) / 8, nwords = NBITS_TO_NWORDS(BIGMONT_SIDH_SIDH_NBITS_ORDER);
+	digit_t t1[BIGMONT_MAXWORDS_ORDER] = {0}, order2[BIGMONT_MAXWORDS_ORDER] = {0};
+	unsigned char mask;
+	SIDH_CRYPTO_STATUS Status = SIDH_CRYPTO_SUCCESS;
+
+	if (random_digits == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(pCurveIsogeny)) {
+		return SIDH_CRYPTO_ERROR_INVALID_PARAMETER;
+	}
+
+	oqs_sidh_cln16_clear_words((void *) random_digits, BIGMONT_MAXWORDS_ORDER);
+	t1[0] = 2;
+	mask = (unsigned char) (8 * nbytes - BIGMONT_SIDH_SIDH_NBITS_ORDER);
+	oqs_sidh_cln16_mp_sub(pCurveIsogeny->BigMont_order, t1, order2, nwords); // order2 = order-2
+	mask = ((unsigned char) -1 >> mask);                                     // Value for masking last random byte
+
+	do {
+		ntry++;
+		if (ntry > 100) { // Max. 100 iterations to obtain random value in [0, order-2]
+			return SIDH_CRYPTO_ERROR_TOO_MANY_ITERATIONS;
+		}
+		rand->rand_n(rand, (uint8_t *) random_digits, nbytes);
+		((unsigned char *) random_digits)[nbytes - 1] &= mask; // Masking last byte
+	} while (oqs_sidh_cln16_mp_sub(order2, random_digits, t1, nwords) == 1);
+
+	oqs_sidh_cln16_clear_words((void *) t1, BIGMONT_MAXWORDS_ORDER);
+	t1[0] = 1;
+	oqs_sidh_cln16_mp_add(random_digits, t1, random_digits, nwords); // Output in the range [1, order-1]
+
+	return Status;
+}
+
+void oqs_sidh_cln16_clear_words(void *mem, digit_t nwords) { // Clear digits from memory. "nwords" indicates the number of digits to be zeroed.
+	                                                         // This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing.
+	unsigned int i;
+	volatile digit_t *v = mem;
+
+	for (i = 0; i < nwords; i++) {
+		v[i] = 0;
+	}
+}
diff --git a/crypt/liboqs/kex_sidh_cln16/ec_isogeny.c b/crypt/liboqs/kex_sidh_cln16/ec_isogeny.c
new file mode 100644
index 0000000000000000000000000000000000000000..bdb2626c32f77ea0ee970b7d98caa05c202f6dc7
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/ec_isogeny.c
@@ -0,0 +1,2156 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny-based cryptography library for Diffie-Hellman key 
+*       key exchange.
+*
+*    Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*
+* Abstract: elliptic curve and isogeny functions
+*
+*********************************************************************************************/
+
+#include "SIDH_internal.h"
+#include <math.h>
+
+extern const uint64_t LIST[22][SIDH_NWORDS64_FIELD];
+
+void oqs_sidh_cln16_j_inv(const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_f2elm_t C, oqs_sidh_cln16_f2elm_t jinv) { // Computes the j-invariant of a Montgomery curve with projective constant.
+	                                                                                                                     // Input: A,C in GF(p^2).
+	                                                                                                                     // Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x.
+	oqs_sidh_cln16_f2elm_t t0, t1;
+
+	oqs_sidh_cln16_fp2sqr751_mont(A, jinv);        // jinv = A^2
+	oqs_sidh_cln16_fp2sqr751_mont(C, t1);          // t1 = C^2
+	oqs_sidh_cln16_fp2add751(t1, t1, t0);          // t0 = t1+t1
+	oqs_sidh_cln16_fp2sub751(jinv, t0, t0);        // t0 = jinv-t0
+	oqs_sidh_cln16_fp2sub751(t0, t1, t0);          // t0 = t0-t1
+	oqs_sidh_cln16_fp2sub751(t0, t1, jinv);        // jinv = t0-t1
+	oqs_sidh_cln16_fp2sqr751_mont(t1, t1);         // t1 = t1^2
+	oqs_sidh_cln16_fp2mul751_mont(jinv, t1, jinv); // jinv = jinv*t1
+	oqs_sidh_cln16_fp2add751(t0, t0, t0);          // t0 = t0+t0
+	oqs_sidh_cln16_fp2add751(t0, t0, t0);          // t0 = t0+t0
+	oqs_sidh_cln16_fp2sqr751_mont(t0, t1);         // t1 = t0^2
+	oqs_sidh_cln16_fp2mul751_mont(t0, t1, t0);     // t0 = t0*t1
+	oqs_sidh_cln16_fp2add751(t0, t0, t0);          // t0 = t0+t0
+	oqs_sidh_cln16_fp2add751(t0, t0, t0);          // t0 = t0+t0
+	oqs_sidh_cln16_fp2inv751_mont(jinv);           // jinv = 1/jinv
+	oqs_sidh_cln16_fp2mul751_mont(jinv, t0, jinv); // jinv = t0*jinv
+}
+
+void oqs_sidh_cln16_xDBLADD(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t xPQ, const oqs_sidh_cln16_f2elm_t A24) { // Simultaneous doubling and differential addition.
+	                                                                                                                                                            // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4.
+	                                                                                                                                                            // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP.
+	oqs_sidh_cln16_f2elm_t t0, t1, t2;
+
+	oqs_sidh_cln16_fp2add751(P->X, P->Z, t0);        // t0 = XP+ZP
+	oqs_sidh_cln16_fp2sub751(P->X, P->Z, t1);        // t1 = XP-ZP
+	oqs_sidh_cln16_fp2sqr751_mont(t0, P->X);         // XP = (XP+ZP)^2
+	oqs_sidh_cln16_fp2sub751(Q->X, Q->Z, t2);        // t2 = XQ-ZQ
+	oqs_sidh_cln16_fp2add751(Q->X, Q->Z, Q->X);      // XQ = XQ+ZQ
+	oqs_sidh_cln16_fp2mul751_mont(t0, t2, t0);       // t0 = (XP+ZP)*(XQ-ZQ)
+	oqs_sidh_cln16_fp2sqr751_mont(t1, P->Z);         // ZP = (XP-ZP)^2
+	oqs_sidh_cln16_fp2mul751_mont(t1, Q->X, t1);     // t1 = (XP-ZP)*(XQ+ZQ)
+	oqs_sidh_cln16_fp2sub751(P->X, P->Z, t2);        // t2 = (XP+ZP)^2-(XP-ZP)^2
+	oqs_sidh_cln16_fp2mul751_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2
+	oqs_sidh_cln16_fp2mul751_mont(t2, A24, Q->X);    // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2]
+	oqs_sidh_cln16_fp2sub751(t0, t1, Q->Z);          // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)
+	oqs_sidh_cln16_fp2add751(Q->X, P->Z, P->Z);      // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2
+	oqs_sidh_cln16_fp2add751(t0, t1, Q->X);          // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)
+	oqs_sidh_cln16_fp2mul751_mont(P->Z, t2, P->Z);   // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2]
+	oqs_sidh_cln16_fp2sqr751_mont(Q->Z, Q->Z);       // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
+	oqs_sidh_cln16_fp2sqr751_mont(Q->X, Q->X);       // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2
+	oqs_sidh_cln16_fp2mul751_mont(Q->Z, xPQ, Q->Z);  // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
+}
+
+void oqs_sidh_cln16_xDBL(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A24, const oqs_sidh_cln16_f2elm_t C24) { // Doubling of a Montgomery point in projective coordinates (X:Z).
+	                                                                                                                                                               // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constant A24/C24=(A/C+2)/4.
+	                                                                                                                                                               // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2).
+	oqs_sidh_cln16_f2elm_t t0, t1;
+
+	oqs_sidh_cln16_fp2sub751(P->X, P->Z, t0);      // t0 = X1-Z1
+	oqs_sidh_cln16_fp2add751(P->X, P->Z, t1);      // t1 = X1+Z1
+	oqs_sidh_cln16_fp2sqr751_mont(t0, t0);         // t0 = (X1-Z1)^2
+	oqs_sidh_cln16_fp2sqr751_mont(t1, t1);         // t1 = (X1+Z1)^2
+	oqs_sidh_cln16_fp2mul751_mont(C24, t0, Q->Z);  // Z2 = C24*(X1-Z1)^2
+	oqs_sidh_cln16_fp2mul751_mont(t1, Q->Z, Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2
+	oqs_sidh_cln16_fp2sub751(t1, t0, t1);          // t1 = (X1+Z1)^2-(X1-Z1)^2
+	oqs_sidh_cln16_fp2mul751_mont(A24, t1, t0);    // t0 = A24*[(X1+Z1)^2-(X1-Z1)^2]
+	oqs_sidh_cln16_fp2add751(Q->Z, t0, Q->Z);      // Z2 = A24*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2
+	oqs_sidh_cln16_fp2mul751_mont(Q->Z, t1, Q->Z); // Z2 = [A24*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2]
+}
+
+void oqs_sidh_cln16_xDBLe(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_f2elm_t C, const int e) { // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings.
+	                                                                                                                                                                         // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constant A/C.
+	                                                                                                                                                                         // Output: projective Montgomery x-coordinates Q <- (2^e)*P.
+	oqs_sidh_cln16_f2elm_t A24num, A24den;
+	int i;
+
+	oqs_sidh_cln16_fp2add751(C, C, A24num);
+	oqs_sidh_cln16_fp2add751(A24num, A24num, A24den);
+	oqs_sidh_cln16_fp2add751(A24num, A, A24num);
+	oqs_sidh_cln16_copy_words((digit_t *) P, (digit_t *) Q, 2 * 2 * NWORDS_FIELD);
+
+	for (i = 0; i < e; i++) {
+		oqs_sidh_cln16_xDBL(Q, Q, A24num, A24den);
+	}
+}
+
+void oqs_sidh_cln16_xADD(oqs_sidh_cln16_point_proj_t P, const oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t xPQ) { // Differential addition.
+	                                                                                                                             // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, and affine difference xPQ=x(P-Q).
+	                                                                                                                             // Output: projective Montgomery point P <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP.
+	oqs_sidh_cln16_f2elm_t t0, t1;
+
+	oqs_sidh_cln16_fp2add751(P->X, P->Z, t0);       // t0 = XP+ZP
+	oqs_sidh_cln16_fp2sub751(P->X, P->Z, t1);       // t1 = XP-ZP
+	oqs_sidh_cln16_fp2sub751(Q->X, Q->Z, P->X);     // XP = XQ-ZQ
+	oqs_sidh_cln16_fp2add751(Q->X, Q->Z, P->Z);     // ZP = XQ+ZQ
+	oqs_sidh_cln16_fp2mul751_mont(t0, P->X, t0);    // t0 = (XP+ZP)*(XQ-ZQ)
+	oqs_sidh_cln16_fp2mul751_mont(t1, P->Z, t1);    // t1 = (XP-ZP)*(XQ+ZQ)
+	oqs_sidh_cln16_fp2sub751(t0, t1, P->Z);         // ZP = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)
+	oqs_sidh_cln16_fp2add751(t0, t1, P->X);         // XP = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)
+	oqs_sidh_cln16_fp2sqr751_mont(P->Z, P->Z);      // ZP = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
+	oqs_sidh_cln16_fp2sqr751_mont(P->X, P->X);      // XP = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2
+	oqs_sidh_cln16_fp2mul751_mont(P->Z, xPQ, P->Z); // ZP = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
+}
+
+void oqs_sidh_cln16_xDBL_basefield(const oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q) { // Doubling of a Montgomery point in projective coordinates (X:Z) over the base field.
+	                                                                                                                         // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constant A24/C24=(A/C+2)/4.
+	                                                                                                                         // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2).
+	oqs_sidh_cln16_felm_t t0, t1;
+
+	// NOTE: this function is fixed for A24=1, C24=2
+
+	oqs_sidh_cln16_fpsub751(P->X, P->Z, t0);      // t0 = X1-Z1
+	oqs_sidh_cln16_fpadd751(P->X, P->Z, t1);      // t1 = X1+Z1
+	oqs_sidh_cln16_fpsqr751_mont(t0, t0);         // t0 = (X1-Z1)^2
+	oqs_sidh_cln16_fpsqr751_mont(t1, t1);         // t1 = (X1+Z1)^2
+	oqs_sidh_cln16_fpadd751(t0, t0, Q->Z);        // Z2 = C24*(X1-Z1)^2
+	oqs_sidh_cln16_fpmul751_mont(t1, Q->Z, Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2
+	oqs_sidh_cln16_fpsub751(t1, t0, t1);          // t1 = (X1+Z1)^2-(X1-Z1)^2
+	oqs_sidh_cln16_fpadd751(Q->Z, t1, Q->Z);      // Z2 = A24*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2
+	oqs_sidh_cln16_fpmul751_mont(Q->Z, t1, Q->Z); // Z2 = [A24*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2]
+}
+
+void oqs_sidh_cln16_xDBLADD_basefield(oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, const oqs_sidh_cln16_felm_t xPQ, const oqs_sidh_cln16_felm_t A24) { // Simultaneous doubling and differential addition over the base field.
+	                                                                                                                                                                                        // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4.
+	                                                                                                                                                                                        // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP.
+	oqs_sidh_cln16_felm_t t0, t1, t2;
+
+	// NOTE: this function is fixed for C24=2
+
+	oqs_sidh_cln16_fpadd751(P->X, P->Z, t0);    // t0 = XP+ZP
+	oqs_sidh_cln16_fpsub751(P->X, P->Z, t1);    // t1 = XP-ZP
+	oqs_sidh_cln16_fpsqr751_mont(t0, P->X);     // XP = (XP+ZP)^2
+	oqs_sidh_cln16_fpsub751(Q->X, Q->Z, t2);    // t2 = XQ-ZQ
+	oqs_sidh_cln16_fpadd751(Q->X, Q->Z, Q->X);  // XQ = XQ+ZQ
+	oqs_sidh_cln16_fpmul751_mont(t0, t2, t0);   // t0 = (XP+ZP)*(XQ-ZQ)
+	oqs_sidh_cln16_fpsqr751_mont(t1, P->Z);     // ZP = (XP-ZP)^2
+	oqs_sidh_cln16_fpmul751_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ)
+	oqs_sidh_cln16_fpsub751(P->X, P->Z, t2);    // t2 = (XP+ZP)^2-(XP-ZP)^2
+
+	if (A24[0] == 1) {
+		oqs_sidh_cln16_fpadd751(P->Z, P->Z, P->Z);      // ZP = C24*(XP-ZP)^2
+		oqs_sidh_cln16_fpmul751_mont(P->X, P->Z, P->X); // XP = C24*(XP+ZP)^2*(XP-ZP)^2
+		oqs_sidh_cln16_fpadd751(t2, P->Z, P->Z);        // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+C24*(XP-ZP)^2
+	} else {
+		oqs_sidh_cln16_fpmul751_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2
+		oqs_sidh_cln16_fpmul751_mont(A24, t2, Q->X);    // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2]
+		oqs_sidh_cln16_fpadd751(P->Z, Q->X, P->Z);      // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+C24*(XP-ZP)^2
+	}
+
+	oqs_sidh_cln16_fpsub751(t0, t1, Q->Z);         // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)
+	oqs_sidh_cln16_fpadd751(t0, t1, Q->X);         // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)
+	oqs_sidh_cln16_fpmul751_mont(P->Z, t2, P->Z);  // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+C24*(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2]
+	oqs_sidh_cln16_fpsqr751_mont(Q->Z, Q->Z);      // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
+	oqs_sidh_cln16_fpsqr751_mont(Q->X, Q->X);      // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2
+	oqs_sidh_cln16_fpmul751_mont(Q->Z, xPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
+}
+
+void oqs_sidh_cln16_ladder(const oqs_sidh_cln16_felm_t x, digit_t *m, oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, const oqs_sidh_cln16_felm_t A24, const unsigned int order_bits, const unsigned int order_fullbits, PCurveIsogenyStruct CurveIsogeny) { // The Montgomery ladder
+	                                                                                                                                                                                                                                                                                           // Inputs: the affine x-coordinate of a point P on E: B*y^2=x^3+A*x^2+x,
+	                                                                                                                                                                                                                                                                                           //         scalar m
+	                                                                                                                                                                                                                                                                                           //         curve constant A24 = (A+2)/4
+	                                                                                                                                                                                                                                                                                           //         order_bits = subgroup order bitlength
+	                                                                                                                                                                                                                                                                                           //         order_fullbits = smallest multiple of 32 larger than the order bitlength
+	                                                                                                                                                                                                                                                                                           // Output: Q = m*(x:1)
+	                                                                                                                                                                                                                                                                                           // CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+	unsigned int bit = 0, owords = NBITS_TO_NWORDS(order_fullbits);
+	digit_t mask;
+	int i;
+
+	// Initializing with the points (1:0) and (x:1)
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, (digit_t *) P->X);
+	oqs_sidh_cln16_fpzero751(P->Z);
+	oqs_sidh_cln16_fpcopy751(x, Q->X);
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, (digit_t *) Q->Z);
+
+	for (i = order_fullbits - order_bits; i > 0; i--) {
+		oqs_sidh_cln16_mp_shiftl1(m, owords);
+	}
+
+	for (i = order_bits; i > 0; i--) {
+		bit = (unsigned int) (m[owords - 1] >> (RADIX - 1));
+		oqs_sidh_cln16_mp_shiftl1(m, owords);
+		mask = 0 - (digit_t) bit;
+
+		oqs_sidh_cln16_swap_points_basefield(P, Q, mask);
+		oqs_sidh_cln16_xDBLADD_basefield(P, Q, x, A24);   // If bit=0 then P <- 2*P and Q <- P+Q,
+		oqs_sidh_cln16_swap_points_basefield(P, Q, mask); // else if bit=1 then Q <- 2*Q and P <- P+Q
+	}
+}
+
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_BigMont_ladder(unsigned char *x, digit_t *m, unsigned char *xout, PCurveIsogenyStruct CurveIsogeny) { // BigMont's scalar multiplication using the Montgomery ladder
+	                                                                                                                                    // Inputs: x, the affine x-coordinate of a point P on BigMont: y^2=x^3+A*x^2+x,
+	                                                                                                                                    //         scalar m.
+	                                                                                                                                    // Output: xout, the affine x-coordinate of m*(x:1)
+	                                                                                                                                    // CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+	oqs_sidh_cln16_point_basefield_proj_t P1, P2;
+	digit_t scalar[BIGMONT_NWORDS_ORDER];
+	oqs_sidh_cln16_felm_t X, A24 = {0};
+
+	A24[0] = (digit_t) CurveIsogeny->BigMont_A24;
+	oqs_sidh_cln16_to_mont(A24, A24); // Conversion to Montgomery representation
+	oqs_sidh_cln16_to_mont((digit_t *) x, X);
+
+	oqs_sidh_cln16_copy_words(m, scalar, BIGMONT_NWORDS_ORDER);
+	oqs_sidh_cln16_ladder(X, scalar, P1, P2, A24, BIGMONT_SIDH_SIDH_NBITS_ORDER, BIGMONT_MAXBITS_ORDER, CurveIsogeny);
+
+	oqs_sidh_cln16_fpinv751_mont(P1->Z);
+	oqs_sidh_cln16_fpmul751_mont(P1->X, P1->Z, (digit_t *) xout);
+	oqs_sidh_cln16_from_mont((digit_t *) xout, (digit_t *) xout); // Conversion to standard representation
+
+	return SIDH_CRYPTO_SUCCESS;
+}
+
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_secret_pt(const oqs_sidh_cln16_point_basefield_t P, const digit_t *m, const unsigned int AliceOrBob, oqs_sidh_cln16_point_proj_t R, PCurveIsogenyStruct CurveIsogeny) { // Computes key generation entirely in the base field by exploiting a 1-dimensional Montgomery ladder in the trace zero subgroup and
+	                                                                                                                                                                                                      // recovering the y-coordinate for the addition. All operations in the base field GF(p).
+	                                                                                                                                                                                                      // Input:  The scalar m, point P = (x,y) on E in the base field subgroup and Q = (x1,y1*i) on E in the trace-zero subgroup.
+	                                                                                                                                                                                                      //         x,y,x1,y1 are all in the base field.
+	                                                                                                                                                                                                      // Output: R = (RX0+RX1*i)/RZ0 (the x-coordinate of P+[m]Q).
+	unsigned int nbits;
+	oqs_sidh_cln16_point_basefield_t Q;
+	oqs_sidh_cln16_point_basefield_proj_t S, T;
+	digit_t *X0 = (digit_t *) S->X, *Z0 = (digit_t *) S->Z, *X1 = (digit_t *) T->X, *Z1 = (digit_t *) T->Z;
+	digit_t *x = (digit_t *) P->x, *y = (digit_t *) P->y, *x1 = (digit_t *) Q->x, *y1 = (digit_t *) Q->y;
+	digit_t scalar[SIDH_NWORDS_ORDER];
+	oqs_sidh_cln16_felm_t t0, t1, t2, A24 = {0};
+	digit_t *RX0 = (digit_t *) R->X[0], *RX1 = (digit_t *) R->X[1], *RZ0 = (digit_t *) R->Z[0], *RZ1 = (digit_t *) R->Z[1];
+
+	oqs_sidh_cln16_fpcopy751(P->x, Q->x); // Q = (-XP,YP)
+	oqs_sidh_cln16_fpcopy751(P->y, Q->y);
+	oqs_sidh_cln16_fpneg751(Q->x);
+
+	if (AliceOrBob == SIDH_ALICE) {
+		nbits = CurveIsogeny->oAbits;
+	} else if (AliceOrBob == SIDH_BOB) {
+		nbits = CurveIsogeny->oBbits;
+	} else {
+		return SIDH_CRYPTO_ERROR_INVALID_PARAMETER;
+	}
+
+	// Setting curve constant to one (in standard representation), used in xDBLADD_basefield() in the ladder computation
+	A24[0] = 1;
+	oqs_sidh_cln16_copy_words(m, scalar, SIDH_NWORDS_ORDER);
+	oqs_sidh_cln16_ladder(Q->x, scalar, S, T, A24, nbits, CurveIsogeny->owordbits, CurveIsogeny);
+
+	//RX0 = (2*y*y1*Z0^2*Z1 + Z1*(X0*x1+Z0)*(X0+x1*Z0) - X1*(X0-x1*Z0)^2)*(2*y*y1*Z0^2*Z1 - Z1*(X0*x1+Z0)*(X0+x1*Z0) + X1*(X0-x1*Z0)^2) - 4*y1^2*Z0*Z1^2*(X0+x*Z0)*(X0-x*Z0)^2;
+	//RX1 = 4*y*y1*Z0^2*Z1*(Z1*(X0*x1+Z0)*(X0+x1*Z0) - X1*(X0-x1*Z0)^2);
+	//RZ0 = 4*y1^2*Z0^2*Z1^2*(X0-x*Z0)^2;
+
+	oqs_sidh_cln16_fpmul751_mont(x1, Z0, RX1);
+	oqs_sidh_cln16_fpmul751_mont(X0, x1, RX0);
+	oqs_sidh_cln16_fpsub751(X0, RX1, t0);
+	oqs_sidh_cln16_fpadd751(X0, RX1, RX1);
+	oqs_sidh_cln16_fpsqr751_mont(t0, t0);
+	oqs_sidh_cln16_fpadd751(RX0, Z0, RX0);
+	oqs_sidh_cln16_fpmul751_mont(t0, X1, t0);
+	oqs_sidh_cln16_fpmul751_mont(RX0, RX1, RX0);
+	oqs_sidh_cln16_fpmul751_mont(y1, Z1, t2);
+	oqs_sidh_cln16_fpmul751_mont(y, Z0, t1);
+	oqs_sidh_cln16_fpadd751(t2, t2, t2);
+	oqs_sidh_cln16_fpmul751_mont(t2, Z0, RX1);
+	oqs_sidh_cln16_fpmul751_mont(RX0, Z1, RX0);
+	oqs_sidh_cln16_fpsub751(RX0, t0, RX0);
+	oqs_sidh_cln16_fpmul751_mont(t1, RX1, t1);
+	oqs_sidh_cln16_fpsqr751_mont(RX1, t0);
+	oqs_sidh_cln16_fpmul751_mont(t2, RX1, t2);
+	oqs_sidh_cln16_fpmul751_mont(t1, RX0, RX1);
+	oqs_sidh_cln16_fpadd751(t1, RX0, RZ0);
+	oqs_sidh_cln16_fpadd751(RX1, RX1, RX1);
+	oqs_sidh_cln16_fpsub751(t1, RX0, t1);
+	oqs_sidh_cln16_fpmul751_mont(x, Z0, RX0);
+	oqs_sidh_cln16_fpmul751_mont(t1, RZ0, t1);
+	oqs_sidh_cln16_fpsub751(X0, RX0, RZ0);
+	oqs_sidh_cln16_fpadd751(X0, RX0, RX0);
+	oqs_sidh_cln16_fpsqr751_mont(RZ0, RZ0);
+	oqs_sidh_cln16_fpmul751_mont(t2, RX0, t2);
+	oqs_sidh_cln16_fpmul751_mont(t2, RZ0, t2);
+	oqs_sidh_cln16_fpmul751_mont(RZ0, t0, RZ0);
+	oqs_sidh_cln16_fpsub751(t1, t2, RX0);
+	oqs_sidh_cln16_fpzero751(RZ1);
+
+	return SIDH_CRYPTO_SUCCESS;
+}
+
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_ladder_3_pt(const oqs_sidh_cln16_f2elm_t xP, const oqs_sidh_cln16_f2elm_t xQ, const oqs_sidh_cln16_f2elm_t xPQ, const digit_t *m, const unsigned int AliceOrBob, oqs_sidh_cln16_point_proj_t W, const oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny) { // Computes P+[m]Q via x-only arithmetic. Algorithm by De Feo, Jao and Plut.
+	                                                                                                                                                                                                                                                                                                  // Input:  three affine points xP,xQ,xPQ and Montgomery constant A.
+	                                                                                                                                                                                                                                                                                                  // Output: projective Montgomery x-coordinates of x(P+[m]Q)=WX/WZ
+	oqs_sidh_cln16_point_proj_t U = {0}, V = {0};
+	oqs_sidh_cln16_f2elm_t A24, A24num, constant1 = {0}, constant2;
+	oqs_sidh_cln16_felm_t temp_scalar;
+	unsigned int bit = 0, nbits, fullbits = CurveIsogeny->owordbits;
+	digit_t mask;
+	int i;
+
+	if (AliceOrBob == SIDH_ALICE) {
+		nbits = CurveIsogeny->oAbits;
+	} else if (AliceOrBob == SIDH_BOB) {
+		nbits = CurveIsogeny->oBbits;
+	} else {
+		return SIDH_CRYPTO_ERROR_INVALID_PARAMETER;
+	}
+
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, constant1[0]);
+	oqs_sidh_cln16_fp2add751(constant1, constant1, constant1); // constant = 2
+	oqs_sidh_cln16_fp2add751(A, constant1, A24num);
+	oqs_sidh_cln16_fp2div2_751(A24num, A24);
+	oqs_sidh_cln16_fp2div2_751(A24, A24);
+
+	// Initializing with the points (1:0), (xQ:1) and (xP:1)
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, (digit_t *) U->X);
+	oqs_sidh_cln16_fp2copy751(xQ, V->X);
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, (digit_t *) V->Z);
+	oqs_sidh_cln16_fp2copy751(xP, W->X);
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, (digit_t *) W->Z);
+	oqs_sidh_cln16_fpzero751(W->Z[1]);
+	oqs_sidh_cln16_copy_words(m, temp_scalar, SIDH_NWORDS_ORDER);
+
+	for (i = fullbits - nbits; i > 0; i--) {
+		oqs_sidh_cln16_mp_shiftl1(temp_scalar, SIDH_NWORDS_ORDER);
+	}
+
+	for (i = nbits; i > 0; i--) {
+		bit = (unsigned int) (temp_scalar[SIDH_NWORDS_ORDER - 1] >> (RADIX - 1));
+		oqs_sidh_cln16_mp_shiftl1(temp_scalar, SIDH_NWORDS_ORDER);
+		mask = 0 - (digit_t) bit;
+
+		oqs_sidh_cln16_swap_points(W, U, mask);
+		oqs_sidh_cln16_swap_points(U, V, mask);
+		oqs_sidh_cln16_select_f2elm(xP, xQ, constant1, mask);
+		oqs_sidh_cln16_select_f2elm(xQ, xPQ, constant2, mask);
+		oqs_sidh_cln16_xADD(W, U, constant1);         // If bit=0 then W <- W+U, U <- 2*U and V <- U+V,
+		oqs_sidh_cln16_xDBLADD(U, V, constant2, A24); // else if bit=1 then U <- U+V, V <- 2*V and W <- V+W
+		oqs_sidh_cln16_swap_points(U, V, mask);
+		oqs_sidh_cln16_swap_points(W, U, mask);
+	}
+
+	return SIDH_CRYPTO_SUCCESS;
+}
+
+void oqs_sidh_cln16_get_4_isog(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, oqs_sidh_cln16_f2elm_t *coeff) { // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4.
+	                                                                                                                                                     // Input:  projective point of order four P = (X4:Z4).
+	                                                                                                                                                     // Output: the 4-isogenous Montgomery curve with projective coefficient A/C and the 5 coefficients
+	                                                                                                                                                     //         that are used to evaluate the isogeny at a point in eval_4_isog().
+
+	oqs_sidh_cln16_fp2add751(P->X, P->Z, coeff[0]);         // coeff[0] = X4+Z4
+	oqs_sidh_cln16_fp2sqr751_mont(P->X, coeff[3]);          // coeff[3] = X4^2
+	oqs_sidh_cln16_fp2sqr751_mont(P->Z, coeff[4]);          // coeff[4] = Z4^2
+	oqs_sidh_cln16_fp2sqr751_mont(coeff[0], coeff[0]);      // coeff[0] = (X4+Z4)^2
+	oqs_sidh_cln16_fp2add751(coeff[3], coeff[4], coeff[1]); // coeff[1] = X4^2+Z4^2
+	oqs_sidh_cln16_fp2sub751(coeff[3], coeff[4], coeff[2]); // coeff[2] = X4^2-Z4^2
+	oqs_sidh_cln16_fp2sqr751_mont(coeff[3], coeff[3]);      // coeff[3] = X4^4
+	oqs_sidh_cln16_fp2sqr751_mont(coeff[4], coeff[4]);      // coeff[4] = Z4^4
+	oqs_sidh_cln16_fp2add751(coeff[3], coeff[3], A);        // A = 2*X4^4
+	oqs_sidh_cln16_fp2sub751(coeff[0], coeff[1], coeff[0]); // coeff[0] = 2*X4*Z4 = (X4+Z4)^2 - (X4^2+Z4^2)
+	oqs_sidh_cln16_fp2sub751(A, coeff[4], A);               // A = 2*X4^4-Z4^4
+	oqs_sidh_cln16_fp2copy751(coeff[4], C);                 // C = Z4^4
+	oqs_sidh_cln16_fp2add751(A, A, A);                      // A = 2(2*X4^4-Z4^4)
+}
+
+void oqs_sidh_cln16_eval_4_isog(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t *coeff) { // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined
+	                                                                                            // by the 5 coefficients in coeff (computed in the function four_isogeny_from_projective_kernel()).
+	                                                                                            // Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z).
+	                                                                                            // Output: the projective point P = phi(P) = (X:Z) in the codomain.
+	oqs_sidh_cln16_f2elm_t t0, t1;
+
+	oqs_sidh_cln16_fp2mul751_mont(P->X, coeff[0], P->X); // X = coeff[0]*X
+	oqs_sidh_cln16_fp2mul751_mont(P->Z, coeff[1], t0);   // t0 = coeff[1]*Z
+	oqs_sidh_cln16_fp2sub751(P->X, t0, P->X);            // X = X-t0
+	oqs_sidh_cln16_fp2mul751_mont(P->Z, coeff[2], P->Z); // Z = coeff[2]*Z
+	oqs_sidh_cln16_fp2sub751(P->X, P->Z, t0);            // t0 = X-Z
+	oqs_sidh_cln16_fp2mul751_mont(P->Z, P->X, P->Z);     // Z = X*Z
+	oqs_sidh_cln16_fp2sqr751_mont(t0, t0);               // t0 = t0^2
+	oqs_sidh_cln16_fp2add751(P->Z, P->Z, P->Z);          // Z = Z+Z
+	oqs_sidh_cln16_fp2add751(P->Z, P->Z, P->Z);          // Z = Z+Z
+	oqs_sidh_cln16_fp2add751(P->Z, t0, P->X);            // X = t0+Z
+	oqs_sidh_cln16_fp2mul751_mont(P->Z, t0, P->Z);       // Z = t0*Z
+	oqs_sidh_cln16_fp2mul751_mont(P->Z, coeff[4], P->Z); // Z = coeff[4]*Z
+	oqs_sidh_cln16_fp2mul751_mont(t0, coeff[4], t0);     // t0 = t0*coeff[4]
+	oqs_sidh_cln16_fp2mul751_mont(P->X, coeff[3], t1);   // t1 = X*coeff[3]
+	oqs_sidh_cln16_fp2sub751(t0, t1, t0);                // t0 = t0-t1
+	oqs_sidh_cln16_fp2mul751_mont(P->X, t0, P->X);       // X = X*t0
+}
+
+void oqs_sidh_cln16_first_4_isog(oqs_sidh_cln16_point_proj_t P, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t Aout, oqs_sidh_cln16_f2elm_t Cout, PCurveIsogenyStruct CurveIsogeny) { // Computes first 4-isogeny computed by Alice.
+	                                                                                                                                                                                          // Inputs: projective point P = (X4:Z4) and curve constant A.
+	                                                                                                                                                                                          // Output: the projective point P = (X4:Z4) in the codomain and isogenous curve constant Aout/Cout.
+	oqs_sidh_cln16_f2elm_t t0 = {0}, t1, t2;
+
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, t0[0]);
+	oqs_sidh_cln16_fpadd751(t0[0], t0[0], t0[0]); // t0 = 2 (in Montgomery domain)
+	oqs_sidh_cln16_fp2sub751(A, t0, Cout);        // Cout = A-2
+	oqs_sidh_cln16_fpadd751(t0[0], t0[0], t1[0]);
+	oqs_sidh_cln16_fpadd751(t0[0], t1[0], t0[0]);    // t0 = 6 (in Montgomery domain)
+	oqs_sidh_cln16_fp2add751(P->X, P->Z, t1);        // t1 = X+Z
+	oqs_sidh_cln16_fp2sub751(P->X, P->Z, t2);        // t2 = X-Z
+	oqs_sidh_cln16_fp2sqr751_mont(t1, t1);           // t1 = (X+Z)^2
+	oqs_sidh_cln16_fp2add751(A, t0, Aout);           // A = A+6
+	oqs_sidh_cln16_fp2mul751_mont(P->X, P->Z, P->Z); // Z = X*Z
+	oqs_sidh_cln16_fp2neg751(P->Z);                  // Z = -X*Z
+	oqs_sidh_cln16_fp2sqr751_mont(t2, t2);           // t2 = (X-Z)^2
+	oqs_sidh_cln16_fp2mul751_mont(P->Z, Cout, P->Z); // Z = -C*X*Z
+	oqs_sidh_cln16_fp2add751(Aout, Aout, Aout);      // Aout = 2*A+12
+	oqs_sidh_cln16_fp2sub751(t1, P->Z, P->X);        // X = (X+Z)^2+C*X*Z
+	oqs_sidh_cln16_fp2mul751_mont(P->Z, t2, P->Z);   // Z = -C*X*Z*(X-Z)^2
+	oqs_sidh_cln16_fp2mul751_mont(P->X, t1, P->X);   // X = (X+Z)^2*[(X+Z)^2+C*X*Z]
+}
+
+void oqs_sidh_cln16_xTPL(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A24, const oqs_sidh_cln16_f2elm_t C24) { // Tripling of a Montgomery point in projective coordinates (X:Z).
+	                                                                                                                                                               // Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constant A/C.
+	                                                                                                                                                               // Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3).
+	oqs_sidh_cln16_f2elm_t t0, t1, t2, t3, t4, t5;
+
+	oqs_sidh_cln16_fp2sub751(P->X, P->Z, t2);      // t2 = X-Z
+	oqs_sidh_cln16_fp2add751(P->X, P->Z, t3);      // t3 = X+Z
+	oqs_sidh_cln16_fp2sqr751_mont(t2, t0);         // t0 = t2^2
+	oqs_sidh_cln16_fp2sqr751_mont(t3, t1);         // t1 = t3^2
+	oqs_sidh_cln16_fp2mul751_mont(t0, C24, t4);    // t4 = C24*t0
+	oqs_sidh_cln16_fp2mul751_mont(t1, t4, t5);     // t5 = t4*t1
+	oqs_sidh_cln16_fp2sub751(t1, t0, t1);          // t1 = t1-t0
+	oqs_sidh_cln16_fp2mul751_mont(A24, t1, t0);    // t0 = A24*t1
+	oqs_sidh_cln16_fp2add751(t4, t0, t4);          // t4 = t4+t0
+	oqs_sidh_cln16_fp2mul751_mont(t1, t4, t4);     // t4 = t4*t1
+	oqs_sidh_cln16_fp2add751(t5, t4, t0);          // t0 = t5+t4
+	oqs_sidh_cln16_fp2sub751(t5, t4, t1);          // t1 = t5-t4
+	oqs_sidh_cln16_fp2mul751_mont(t0, t2, t0);     // t0 = t2*t0
+	oqs_sidh_cln16_fp2mul751_mont(t1, t3, t1);     // t1 = t3*t1
+	oqs_sidh_cln16_fp2sub751(t0, t1, t4);          // t4 = t0-t1
+	oqs_sidh_cln16_fp2add751(t0, t1, t5);          // t5 = t0+t1
+	oqs_sidh_cln16_fp2sqr751_mont(t4, t4);         // t4 = t4^2
+	oqs_sidh_cln16_fp2sqr751_mont(t5, t5);         // t5 = t5^2
+	oqs_sidh_cln16_fp2mul751_mont(P->X, t4, t4);   // t4 = X*t4
+	oqs_sidh_cln16_fp2mul751_mont(P->Z, t5, Q->X); // X3 = Z*t5
+	oqs_sidh_cln16_fp2copy751(t4, Q->Z);           // Z3 = t4
+}
+
+void oqs_sidh_cln16_xTPLe(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_f2elm_t C, const int e) { // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings.
+	                                                                                                                                                                         // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constant A/C.
+	                                                                                                                                                                         // Output: projective Montgomery x-coordinates Q <- (3^e)*P.
+	oqs_sidh_cln16_f2elm_t A24, C24;
+	int i;
+
+	oqs_sidh_cln16_fp2add751(C, C, A24);
+	oqs_sidh_cln16_fp2add751(A24, A24, C24);
+	oqs_sidh_cln16_fp2add751(A24, A, A24);
+	oqs_sidh_cln16_copy_words((digit_t *) P, (digit_t *) Q, 2 * 2 * NWORDS_FIELD);
+
+	for (i = 0; i < e; i++) {
+		oqs_sidh_cln16_xTPL(Q, Q, A24, C24);
+	}
+}
+
+void oqs_sidh_cln16_get_3_isog(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C) { // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3.
+	                                                                                                                      // Input:  projective point of order three P = (X3:Z3).
+	                                                                                                                      // Output: the 3-isogenous Montgomery curve with projective coefficient A/C.
+	oqs_sidh_cln16_f2elm_t t0, t1;
+
+	oqs_sidh_cln16_fp2sqr751_mont(P->X, t0);       // t0 = X^2
+	oqs_sidh_cln16_fp2add751(t0, t0, t1);          // t1 = 2*t0
+	oqs_sidh_cln16_fp2add751(t0, t1, t0);          // t0 = t0+t1
+	oqs_sidh_cln16_fp2sqr751_mont(P->Z, t1);       // t1 = Z^2
+	oqs_sidh_cln16_fp2sqr751_mont(t1, A);          // A = t1^2
+	oqs_sidh_cln16_fp2add751(t1, t1, t1);          // t1 = 2*t1
+	oqs_sidh_cln16_fp2add751(t1, t1, C);           // C = 2*t1
+	oqs_sidh_cln16_fp2sub751(t0, t1, t1);          // t1 = t0-t1
+	oqs_sidh_cln16_fp2mul751_mont(t0, t1, t1);     // t1 = t0*t1
+	oqs_sidh_cln16_fp2sub751(A, t1, A);            // A = A-t1
+	oqs_sidh_cln16_fp2sub751(A, t1, A);            // A = A-t1
+	oqs_sidh_cln16_fp2sub751(A, t1, A);            // A = A-t1
+	oqs_sidh_cln16_fp2mul751_mont(P->X, P->Z, t1); // t1 = X*Z    // ms trade-off possible (1 mul for 1sqr + 1add + 2sub)
+	oqs_sidh_cln16_fp2mul751_mont(C, t1, C);       // C = C*t1
+}
+
+void oqs_sidh_cln16_eval_3_isog(const oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q) { // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and a point P = (X:Z).
+	                                                                                                  // Inputs: projective points P = (X3:Z3) and Q = (X:Z).
+	                                                                                                  // Output: the projective point Q <- phi(Q) = (XX:ZZ).
+	oqs_sidh_cln16_f2elm_t t0, t1, t2;
+
+	oqs_sidh_cln16_fp2mul751_mont(P->X, Q->X, t0); // t0 = X3*X
+	oqs_sidh_cln16_fp2mul751_mont(P->Z, Q->X, t1); // t1 = Z3*X
+	oqs_sidh_cln16_fp2mul751_mont(P->Z, Q->Z, t2); // t2 = Z3*Z
+	oqs_sidh_cln16_fp2sub751(t0, t2, t0);          // t0 = X3*X-Z3*Z
+	oqs_sidh_cln16_fp2mul751_mont(P->X, Q->Z, t2); // t2 = X3*Z
+	oqs_sidh_cln16_fp2sub751(t1, t2, t1);          // t1 = Z3*X-X3*Z
+	oqs_sidh_cln16_fp2sqr751_mont(t0, t0);         // t0 = (X3*X-Z3*Z)^2
+	oqs_sidh_cln16_fp2sqr751_mont(t1, t1);         // t1 = (Z3*X-X3*Z)^2
+	oqs_sidh_cln16_fp2mul751_mont(Q->X, t0, Q->X); // X = X*(X3*X-Z3*Z)^2
+	oqs_sidh_cln16_fp2mul751_mont(Q->Z, t1, Q->Z); // Z = Z*(Z3*X-X3*Z)^2
+}
+
+void oqs_sidh_cln16_inv_3_way(oqs_sidh_cln16_f2elm_t z1, oqs_sidh_cln16_f2elm_t z2, oqs_sidh_cln16_f2elm_t z3) { // 3-way simultaneous inversion
+	                                                                                                             // Input:  z1,z2,z3
+	                                                                                                             // Output: 1/z1,1/z2,1/z3 (override inputs).
+	oqs_sidh_cln16_f2elm_t t0, t1, t2, t3;
+
+	oqs_sidh_cln16_fp2mul751_mont(z1, z2, t0); // t0 = z1*z2
+	oqs_sidh_cln16_fp2mul751_mont(z3, t0, t1); // t1 = z1*z2*z3
+	oqs_sidh_cln16_fp2inv751_mont(t1);         // t1 = 1/(z1*z2*z3)
+	oqs_sidh_cln16_fp2mul751_mont(z3, t1, t2); // t2 = 1/(z1*z2)
+	oqs_sidh_cln16_fp2mul751_mont(t2, z2, t3); // t3 = 1/z1
+	oqs_sidh_cln16_fp2mul751_mont(t2, z1, z2); // z2 = 1/z2
+	oqs_sidh_cln16_fp2mul751_mont(t0, t1, z3); // z3 = 1/z3
+	oqs_sidh_cln16_fp2copy751(t3, z1);         // z1 = 1/z1
+}
+
+void oqs_sidh_cln16_distort_and_diff(const oqs_sidh_cln16_felm_t xP, oqs_sidh_cln16_point_proj_t D, PCurveIsogenyStruct CurveIsogeny) { // Computing the point (x(Q-P),z(Q-P))
+	                                                                                                                                    // Input:  coordinate xP of point P=(xP,yP)
+	                                                                                                                                    // Output: the point D = (x(Q-P),z(Q-P)), where Q=tau(P).
+	oqs_sidh_cln16_felm_t one;
+
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one);
+	oqs_sidh_cln16_fpsqr751_mont(xP, D->X[0]);      // XD = xP^2
+	oqs_sidh_cln16_fpadd751(D->X[0], one, D->X[0]); // XD = XD+1
+	oqs_sidh_cln16_fpcopy751(D->X[0], D->X[1]);     // XD = XD*i
+	oqs_sidh_cln16_fpzero751(D->X[0]);
+	oqs_sidh_cln16_fpadd751(xP, xP, D->Z[0]); // ZD = xP+xP
+}
+
+void oqs_sidh_cln16_get_A(const oqs_sidh_cln16_f2elm_t xP, const oqs_sidh_cln16_f2elm_t xQ, const oqs_sidh_cln16_f2elm_t xR, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny) { // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A.
+	                                                                                                                                                                                       // Input:  the x-coordinates xP, xQ, and xR of the points P, Q and R.
+	                                                                                                                                                                                       // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x.
+	oqs_sidh_cln16_f2elm_t t0, t1, one = {0};
+
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]);
+	oqs_sidh_cln16_fp2add751(xP, xQ, t1);      // t1 = xP+xQ
+	oqs_sidh_cln16_fp2mul751_mont(xP, xQ, t0); // t0 = xP*xQ
+	oqs_sidh_cln16_fp2mul751_mont(xR, t1, A);  // A = xR*t1
+	oqs_sidh_cln16_fp2add751(t0, A, A);        // A = A+t0
+	oqs_sidh_cln16_fp2mul751_mont(t0, xR, t0); // t0 = t0*xR
+	oqs_sidh_cln16_fp2sub751(A, one, A);       // A = A-1
+	oqs_sidh_cln16_fp2add751(t0, t0, t0);      // t0 = t0+t0
+	oqs_sidh_cln16_fp2add751(t1, xR, t1);      // t1 = t1+xR
+	oqs_sidh_cln16_fp2add751(t0, t0, t0);      // t0 = t0+t0
+	oqs_sidh_cln16_fp2sqr751_mont(A, A);       // A = A^2
+	oqs_sidh_cln16_fp2inv751_mont(t0);         // t0 = 1/t0
+	oqs_sidh_cln16_fp2mul751_mont(A, t0, A);   // A = A*t0
+	oqs_sidh_cln16_fp2sub751(A, t1, A);        // Afinal = A-t1
+}
+
+///////////////////////////////////////////////////////////////////////////////////
+///////////////              FUNCTIONS FOR COMPRESSION              ///////////////
+
+static void get_point_notin_2E(oqs_sidh_cln16_felm_t alpha, const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_felm_t one, const oqs_sidh_cln16_felm_t four, const oqs_sidh_cln16_felm_t value47, const oqs_sidh_cln16_felm_t value52) { // Inputs: alpha, a small integer (parsed in Fp),
+	                                                                                                                                                                                                                                       //         Montgomery coefficient A = A0+A1*i.
+	                                                                                                                                                                                                                                       // Output: alpha such that alpha*u = alpha*(i+4) is a good x-coordinate, which means it corresponds to a point P not in [2]E.
+	                                                                                                                                                                                                                                       //         Then, [3^eB]P has full order 2^eA.
+	digit_t *A0 = (digit_t *) A[0], *A1 = (digit_t *) A[1];
+	oqs_sidh_cln16_felm_t X0, X1, x0, x1, t0, sqrt, X0_temp = {0}, X1_temp = {0}, alpha52 = {0}, alpha52_2 = {0}, alpha47 = {0}, alpha47_2 = {0};
+	unsigned int i;
+
+	oqs_sidh_cln16_fpsub751(A0, A1, x0); // x0 = A0-A1
+	oqs_sidh_cln16_fpadd751(x0, A0, x0); // x0 = x0+A0
+	oqs_sidh_cln16_fpadd751(x0, x0, x0);
+	oqs_sidh_cln16_fpadd751(x0, x0, x0);
+	oqs_sidh_cln16_fpadd751(x0, x0, x0); // x0 = 8*x0
+	oqs_sidh_cln16_fpsub751(x0, A0, X0); // X0 = x0-A0
+	oqs_sidh_cln16_fpadd751(A0, A1, x1); // x1 = A0+A1
+	oqs_sidh_cln16_fpadd751(x1, A1, x1); // x1 = x1+A1
+	oqs_sidh_cln16_fpadd751(x1, x1, x1);
+	oqs_sidh_cln16_fpadd751(x1, x1, x1);
+	oqs_sidh_cln16_fpadd751(x1, x1, x1);                     // x1 = 8*x1
+	oqs_sidh_cln16_fpsub751(x1, A1, X1);                     // X1 = x1-A1
+	oqs_sidh_cln16_fpmul751_mont(alpha, value52, alpha52);   // alpha52 = 52*alpha
+	oqs_sidh_cln16_fpmul751_mont(X0, alpha, X0_temp);        // X0*alpha
+	oqs_sidh_cln16_fpmul751_mont(alpha52, alpha, alpha52_2); // alpha52^2 = 52*alpha^2
+	oqs_sidh_cln16_fpmul751_mont(alpha, value47, alpha47);   // alpha47 = 47*alpha
+	oqs_sidh_cln16_fpmul751_mont(X1, alpha, X1_temp);        // X0*alpha
+	oqs_sidh_cln16_fpmul751_mont(alpha47, alpha, alpha47_2); // alpha47^2 = 47*alpha^2
+
+	do {
+		oqs_sidh_cln16_fpadd751(alpha, one, alpha);             // alpha += 1
+		oqs_sidh_cln16_fpadd751(X0_temp, X0, X0_temp);          // X0*alpha
+		oqs_sidh_cln16_fpadd751(alpha52, value52, t0);          // t0 = 52*alpha52 + 52
+		oqs_sidh_cln16_fpadd751(alpha52, t0, alpha52);          // 2*52*alpha52 + 52
+		oqs_sidh_cln16_fpadd751(alpha52_2, alpha52, alpha52_2); // 52*alpha^2 = 52*alpha52^2 + 2*52*alpha52 + 52
+		oqs_sidh_cln16_fpcopy751(t0, alpha52);                  // 52*alpha = 52*alpha52 + 52
+		oqs_sidh_cln16_fpadd751(alpha52_2, four, x0);           // 52*alpha^2 + 4
+		oqs_sidh_cln16_fpadd751(X0_temp, x0, x0);               // x0 = X0*alpha + 52*alpha^2 + 4
+		oqs_sidh_cln16_fpadd751(X1_temp, X1, X1_temp);          // X1*alpha
+		oqs_sidh_cln16_fpadd751(alpha47, value47, t0);          // t0 = 47*alpha47 + 47
+		oqs_sidh_cln16_fpadd751(alpha47, t0, alpha47);          // 2*47*alpha52 + 47
+		oqs_sidh_cln16_fpadd751(alpha47_2, alpha47, alpha47_2); // 47*alpha^2 = 47*alpha52^2 + 2*47*alpha52 + 47
+		oqs_sidh_cln16_fpcopy751(t0, alpha47);                  // 47*alpha = 47*alpha52 + 47
+		oqs_sidh_cln16_fpadd751(alpha47_2, one, x1);            // 47*alpha^2 + 1
+		oqs_sidh_cln16_fpadd751(X1_temp, x1, x1);               // x0 = X0*alpha + 47*alpha^2 + 1
+		oqs_sidh_cln16_fpsqr751_mont(x0, x0);                   // x0 = x0^2
+		oqs_sidh_cln16_fpsqr751_mont(x1, x1);                   // x1 = x1^2
+		oqs_sidh_cln16_fpsqr751_mont(alpha, t0);                // t0 = alpha^2
+		oqs_sidh_cln16_fpadd751(x0, x1, x0);                    // x0 = x0+x1
+		oqs_sidh_cln16_fpmul751_mont(t0, x0, t0);               // t0 = t0*x0
+		oqs_sidh_cln16_fpcopy751(t0, sqrt);
+		for (i = 0; i < 371; i++) { // sqrt = t0^((p+1) div 2)
+			oqs_sidh_cln16_fpsqr751_mont(sqrt, sqrt);
+		}
+		for (i = 0; i < 239; i++) {
+			oqs_sidh_cln16_fpsqr751_mont(sqrt, x0);
+			oqs_sidh_cln16_fpmul751_mont(sqrt, x0, sqrt);
+		}
+		oqs_sidh_cln16_fpcorrection751(sqrt);
+		oqs_sidh_cln16_fpcorrection751(t0);
+	} while (oqs_sidh_cln16_fpequal751_non_constant_time(sqrt, t0) == false);
+}
+
+void oqs_sidh_cln16_generate_2_torsion_basis(const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_point_full_proj_t R1, oqs_sidh_cln16_point_full_proj_t R2, PCurveIsogenyStruct CurveIsogeny) { // Produces points R1 and R2 such that {R1, R2} is a basis for E[2^372].
+	                                                                                                                                                                                       // Input:   curve constant A.
+	                                                                                                                                                                                       // Outputs: R1 = (X1:Y1:Z1) and R2 = (X2:Y2:Z2).
+	oqs_sidh_cln16_point_proj_t P, Q, P1 = {0}, P2 = {0};
+	oqs_sidh_cln16_felm_t *X1 = (oqs_sidh_cln16_felm_t *) P1->X, *Z1 = (oqs_sidh_cln16_felm_t *) P1->Z;
+	oqs_sidh_cln16_felm_t *X2 = (oqs_sidh_cln16_felm_t *) P2->X, *Z2 = (oqs_sidh_cln16_felm_t *) P2->Z;
+	oqs_sidh_cln16_felm_t *XP = (oqs_sidh_cln16_felm_t *) P->X, *ZP = (oqs_sidh_cln16_felm_t *) P->Z;
+	oqs_sidh_cln16_felm_t *XQ = (oqs_sidh_cln16_felm_t *) Q->X, *ZQ = (oqs_sidh_cln16_felm_t *) Q->Z;
+	oqs_sidh_cln16_felm_t *Y1 = (oqs_sidh_cln16_felm_t *) R1->Y, *Y2 = (oqs_sidh_cln16_felm_t *) R2->Y;
+	oqs_sidh_cln16_felm_t zero, alpha = {0};
+	oqs_sidh_cln16_f2elm_t t0, t1, one = {0};
+	oqs_sidh_cln16_felm_t four, value47 = {0}, value52 = {0};
+
+	oqs_sidh_cln16_fpzero751(zero);
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]);
+
+	value47[0] = 47;
+	value52[0] = 52;
+	oqs_sidh_cln16_to_mont(value47, value47);
+	oqs_sidh_cln16_to_mont(value52, value52);
+	oqs_sidh_cln16_fpadd751(one[0], one[0], four);
+	oqs_sidh_cln16_fpadd751(four, four, four);
+
+	get_point_notin_2E(alpha, A, one[0], four, value47, value52);
+	oqs_sidh_cln16_fpcopy751(alpha, X1[1]);
+	oqs_sidh_cln16_fpadd751(alpha, alpha, X1[0]);
+	oqs_sidh_cln16_fpadd751(X1[0], X1[0], X1[0]); // X1 = alpha*i + alpha*4
+	oqs_sidh_cln16_fpcopy751(one[0], Z1[0]);      // Z1 = 1
+
+	oqs_sidh_cln16_xTPLe(P1, P1, A, one, 239); // xTPL assumes projective constant, but this is minor
+	oqs_sidh_cln16_xDBLe(P1, P, A, one, 371);
+
+	// This loop is necessary to ensure that the order of the WeilPairing is oA and not smaller.
+	// This ensures that we have a basis.
+	do {
+		get_point_notin_2E(alpha, A, one[0], four, value47, value52);
+		oqs_sidh_cln16_fpcopy751(alpha, X2[1]);
+		oqs_sidh_cln16_fpadd751(alpha, alpha, X2[0]);
+		oqs_sidh_cln16_fpadd751(X2[0], X2[0], X2[0]); // X2 = alpha*i + alpha*4
+		oqs_sidh_cln16_fpzero751(Z2[1]);
+		oqs_sidh_cln16_fpcopy751(one[0], Z2[0]);   // Z2 = 1
+		oqs_sidh_cln16_xTPLe(P2, P2, A, one, 239); // xTPL assumes projective constant, but this is minor
+		oqs_sidh_cln16_xDBLe(P2, Q, A, one, 371);
+		oqs_sidh_cln16_fp2mul751_mont(XP, ZQ, t0); // t0 = XP*ZQ
+		oqs_sidh_cln16_fp2mul751_mont(XQ, ZP, t1); // t1 = XQ*ZP
+		oqs_sidh_cln16_fp2sub751(t0, t1, t0);      // t0 = XP*ZQ-XQ*ZP
+		oqs_sidh_cln16_fp2correction751(t0);
+	} while (oqs_sidh_cln16_fpequal751_non_constant_time(t0[0], zero) == true && oqs_sidh_cln16_fpequal751_non_constant_time(t0[1], zero) == true);
+
+	oqs_sidh_cln16_fp2copy751(X1, R1->X);
+	oqs_sidh_cln16_fp2copy751(Z1, R1->Z);
+	oqs_sidh_cln16_fp2copy751(X2, R2->X);
+	oqs_sidh_cln16_fp2copy751(Z2, R2->Z);
+
+	// Recover the y-coordinates.
+	oqs_sidh_cln16_fp2sqr751_mont(Z1, t0);     // t0 = Z1^2
+	oqs_sidh_cln16_fp2mul751_mont(A, Z1, Y1);  // Y1 = A*Z1
+	oqs_sidh_cln16_fp2add751(X1, Y1, Y1);      // Y1 = X1+Y1
+	oqs_sidh_cln16_fp2mul751_mont(X1, Y1, Y1); // Y1 = Y1*X1
+	oqs_sidh_cln16_fp2add751(t0, Y1, Y1);      // Y1 = Y1+t0
+	oqs_sidh_cln16_fp2mul751_mont(X1, Y1, Y1); // Y1 = Y1*X1
+	oqs_sidh_cln16_fp2mul751_mont(t0, Z1, t0); // t0 = t0*Z1
+	oqs_sidh_cln16_sqrt_Fp2_frac(Y1, t0, t1);  // t1 = sqrt(Y1/t0)
+
+	oqs_sidh_cln16_fp2sqr751_mont(Z2, t0);     // t0 = Z2^2
+	oqs_sidh_cln16_fp2mul751_mont(A, Z2, Y2);  // Y2 = A*Z2
+	oqs_sidh_cln16_fp2add751(X2, Y2, Y2);      // Y2 = X2+Y2
+	oqs_sidh_cln16_fp2mul751_mont(Y2, X2, Y2); // Y2 = Y2*X2
+	oqs_sidh_cln16_fp2add751(t0, Y2, Y2);      // Y2 = Y2+t0
+	oqs_sidh_cln16_fp2mul751_mont(Y2, X2, Y2); // Y2 = Y2*X2
+	oqs_sidh_cln16_fp2mul751_mont(t0, Z2, t0); // t0 = t0*Z2
+	oqs_sidh_cln16_fp2mul751_mont(t1, Z1, Y1); // Y1 = t1*Z1
+	oqs_sidh_cln16_sqrt_Fp2_frac(Y2, t0, t1);  // t1 = sqrt(Y2/t0)
+	oqs_sidh_cln16_fp2mul751_mont(Z2, t1, Y2); // Y2 = t1*Z2
+}
+
+static uint64_t sqrt17[SIDH_NWORDS64_FIELD] = {0x89127CDB8966913D, 0xF788014C8C8401A0, 0x1A16F73884F3E3E8, 0x2E67382B560FA195, 0xDD5EE869B7F4FD81, 0x16A0849EF695EFEB,
+                                               0x3675244609DE1963, 0x36F02976EF2EB241, 0x92D09F939A20637F, 0x41496905F2B0112C, 0xA94C09B1F7242495, 0x0000297652D36A97};
+
+static void get_X_on_curve(oqs_sidh_cln16_f2elm_t A, unsigned int *r, oqs_sidh_cln16_f2elm_t x, oqs_sidh_cln16_felm_t t1, oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t b) { // Elligator2 for X
+	oqs_sidh_cln16_felm_t v0, v1, r0, r1, t0, t2, t3, rsq = {0};
+	unsigned int i;
+
+	oqs_sidh_cln16_fpcopy751(((oqs_sidh_cln16_felm_t *) &LIST)[(*r << 1) - 2], r1); // r1 = list[2*r-1]
+	oqs_sidh_cln16_fpcopy751(((oqs_sidh_cln16_felm_t *) &LIST)[(*r << 1) - 1], r0); // r0 = list[2*r]
+	rsq[0] = (digit_t)(*r) * (*r);                                                  // rsp = r^2
+	oqs_sidh_cln16_to_mont(rsq, rsq);                                               // Converting to Montgomery representation
+	oqs_sidh_cln16_fpmul751_mont(A[1], r1, t0);                                     // t0 = A1*r1
+	oqs_sidh_cln16_fpmul751_mont(A[0], r0, v0);                                     // v0 = A0*r0
+	oqs_sidh_cln16_fpsub751(v0, t0, v0);                                            // v0 = v0-t0
+	oqs_sidh_cln16_fpmul751_mont(A[1], r0, t0);                                     // t0 = A1*r0
+	oqs_sidh_cln16_fpmul751_mont(A[0], r1, v1);                                     // v1 = A0*r1
+	oqs_sidh_cln16_fpadd751(v1, t0, v1);                                            // v1 = v1+t0
+	oqs_sidh_cln16_fpadd751(v0, A[0], t0);                                          // t0 = v0+A0
+	oqs_sidh_cln16_fpadd751(v1, A[1], t1);                                          // t1 = v1+A1
+	oqs_sidh_cln16_fpmul751_mont(v0, v1, t2);                                       // t2 = v0*v1
+	oqs_sidh_cln16_fpadd751(t2, t2, t2);                                            // t2 = t2+t2
+	oqs_sidh_cln16_fpmul751_mont(t2, A[1], a);                                      // a = t2*A1
+	oqs_sidh_cln16_fpsub751(v0, a, a);                                              // a = v0-a
+	oqs_sidh_cln16_fpmul751_mont(t2, A[0], b);                                      // b = t2*A0
+	oqs_sidh_cln16_fpadd751(b, v1, b);                                              // b = b+v1
+	oqs_sidh_cln16_fpadd751(v0, v0, t2);                                            // t2 = v0+v0
+	oqs_sidh_cln16_fpadd751(t0, t2, t2);                                            // t2 = t2+t0
+	oqs_sidh_cln16_fpsqr751_mont(v0, t3);                                           // t3 = v0^2
+	oqs_sidh_cln16_fpmul751_mont(t0, t3, t0);                                       // t0 = t0*t3
+	oqs_sidh_cln16_fpadd751(a, t0, a);                                              // a = a+t0
+	oqs_sidh_cln16_fpsqr751_mont(v1, t0);                                           // t0 = v1^2
+	oqs_sidh_cln16_fpmul751_mont(t0, t2, t2);                                       // t2 = t0*t2
+	oqs_sidh_cln16_fpsub751(a, t2, a);                                              // a = a-t2
+	oqs_sidh_cln16_fpmul751_mont(t0, t1, t0);                                       // t0 = t0*t1
+	oqs_sidh_cln16_fpsub751(b, t0, b);                                              // b = b-t0
+	oqs_sidh_cln16_fpadd751(t1, v1, t1);                                            // t1 = t1+v1
+	oqs_sidh_cln16_fpadd751(v1, t1, t1);                                            // t1 = t1+v1
+	oqs_sidh_cln16_fpmul751_mont(t3, t1, t1);                                       // t1 = t1*t3
+	oqs_sidh_cln16_fpadd751(b, t1, b);                                              // b = t1+b
+	oqs_sidh_cln16_fpsqr751_mont(a, t0);                                            // t0 = a^2
+	oqs_sidh_cln16_fpsqr751_mont(b, t1);                                            // t1 = b^2
+	oqs_sidh_cln16_fpadd751(t0, t1, t0);                                            // t0 = t0+t1
+	oqs_sidh_cln16_fpcopy751(t0, t1);
+	for (i = 0; i < 370; i++) { // t1 = t0^((p+1) div 4)
+		oqs_sidh_cln16_fpsqr751_mont(t1, t1);
+	}
+	for (i = 0; i < 239; i++) {
+		oqs_sidh_cln16_fpsqr751_mont(t1, t2);
+		oqs_sidh_cln16_fpmul751_mont(t1, t2, t1);
+	}
+	oqs_sidh_cln16_fpsqr751_mont(t1, t2); // t2 = t1^2
+	oqs_sidh_cln16_fpcorrection751(t0);
+	oqs_sidh_cln16_fpcorrection751(t2);
+	if (oqs_sidh_cln16_fpequal751_non_constant_time(t0, t2) == false) {
+		oqs_sidh_cln16_fpadd751(v0, v0, x[0]);                    // x0 = v0+v0
+		oqs_sidh_cln16_fpadd751(x[0], x[0], x[0]);                // x0 = x0+x0
+		oqs_sidh_cln16_fpsub751(x[0], v1, x[0]);                  // x0 = x0-v1
+		oqs_sidh_cln16_fpmul751_mont(rsq, x[0], x[0]);            // x0 = rsq*x0
+		oqs_sidh_cln16_fpadd751(v1, v1, x[1]);                    // x1 = v1+v1
+		oqs_sidh_cln16_fpadd751(x[1], x[1], x[1]);                // x1 = x1+x1
+		oqs_sidh_cln16_fpadd751(x[1], v0, x[1]);                  // x1 = x1+v0
+		oqs_sidh_cln16_fpmul751_mont(rsq, x[1], x[1]);            // x1 = rsq*x1
+		oqs_sidh_cln16_fpcopy751(a, t0);                          // t0 = a
+		oqs_sidh_cln16_fpadd751(a, a, a);                         // a = a+a
+		oqs_sidh_cln16_fpadd751(a, a, a);                         // a = a+a
+		oqs_sidh_cln16_fpsub751(a, b, a);                         // a = a-b
+		oqs_sidh_cln16_fpmul751_mont(rsq, a, a);                  // a = rsq*a
+		oqs_sidh_cln16_fpadd751(b, b, b);                         // b = b+b
+		oqs_sidh_cln16_fpadd751(b, b, b);                         // b = b+b
+		oqs_sidh_cln16_fpadd751(t0, b, b);                        // b = b+t0
+		oqs_sidh_cln16_fpmul751_mont(rsq, b, b);                  // b = rsq*b
+		oqs_sidh_cln16_fpmul751_mont(rsq, t1, t1);                // t1 = t1*rsq
+		oqs_sidh_cln16_fpmul751_mont(t1, (digit_t *) sqrt17, t1); // t1 = t1*sqrt17
+	} else {
+		oqs_sidh_cln16_fpcopy751(v0, x[0]); // x0 = v0
+		oqs_sidh_cln16_fpcopy751(v1, x[1]); // x1 = v1
+	}
+}
+
+static void get_pt_on_curve(oqs_sidh_cln16_f2elm_t A, unsigned int *r, oqs_sidh_cln16_f2elm_t x, oqs_sidh_cln16_f2elm_t y) { // Elligator2
+	oqs_sidh_cln16_felm_t t0, t1, t2, t3, a, b;
+
+	get_X_on_curve(A, r, x, t1, a, b);
+	oqs_sidh_cln16_fpadd751(a, t1, t0); // t0 = a+t1
+	oqs_sidh_cln16_fpdiv2_751(t0, t0);  // t0 = t0/2
+	oqs_sidh_cln16_fpcopy751(t0, t1);
+	oqs_sidh_cln16_fpinv751_chain_mont(t1);   // t1 = t0^((p-3)/4)
+	oqs_sidh_cln16_fpmul751_mont(t0, t1, t3); // t3 = t0*t1
+	oqs_sidh_cln16_fpsqr751_mont(t3, t2);     // t2 = t3^2
+	oqs_sidh_cln16_fpdiv2_751(t1, t1);        // t1 = t1/2
+	oqs_sidh_cln16_fpmul751_mont(b, t1, t1);  // t1 = t1*b
+	oqs_sidh_cln16_fpcorrection751(t0);
+	oqs_sidh_cln16_fpcorrection751(t2);
+
+	if (oqs_sidh_cln16_fpequal751_non_constant_time(t0, t2) == true) {
+		oqs_sidh_cln16_fpcopy751(t3, y[0]); // y0 = t3
+		oqs_sidh_cln16_fpcopy751(t1, y[1]); // y1 = t1;
+	} else {
+		oqs_sidh_cln16_fpneg751(t3);
+		oqs_sidh_cln16_fpcopy751(t1, y[0]); // y0 = t1;
+		oqs_sidh_cln16_fpcopy751(t3, y[1]); // y1 = -t3
+	}
+}
+
+static void get_3_torsion_elt(oqs_sidh_cln16_f2elm_t A, unsigned int *r, oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t P3, unsigned int *triples, PCurveIsogenyStruct CurveIsogeny) {
+	oqs_sidh_cln16_point_proj_t PP;
+	oqs_sidh_cln16_f2elm_t A24, C24, one = {0};
+	oqs_sidh_cln16_felm_t t0, t1, t2, zero = {0};
+
+	*triples = 0;
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]);
+	oqs_sidh_cln16_fpadd751(one[0], one[0], C24[0]);
+	oqs_sidh_cln16_fpzero751(C24[1]);
+
+	get_X_on_curve(A, r, P->X, t0, t1, t2);
+	oqs_sidh_cln16_fp2copy751(one, P->Z); // Z = 1
+	oqs_sidh_cln16_xDBLe(P, P, A, one, 372);
+
+	oqs_sidh_cln16_fp2copy751(P->X, PP->X); // XX = X
+	oqs_sidh_cln16_fp2copy751(P->Z, PP->Z); // ZZ = Z
+
+	oqs_sidh_cln16_fp2add751(A, C24, A24);           // A24 = A+2
+	oqs_sidh_cln16_fpadd751(C24[0], C24[0], C24[0]); // C24 = 4
+
+	oqs_sidh_cln16_fp2correction751(PP->Z);
+	while (oqs_sidh_cln16_fpequal751_non_constant_time(PP->Z[0], zero) == false || oqs_sidh_cln16_fpequal751_non_constant_time(PP->Z[1], zero) == false) {
+		oqs_sidh_cln16_fp2copy751(PP->X, P3->X); // X3 = XX
+		oqs_sidh_cln16_fp2copy751(PP->Z, P3->Z); // Z3 = ZZ
+		oqs_sidh_cln16_xTPL(PP, PP, A24, C24);
+		(*triples)++;
+		oqs_sidh_cln16_fp2correction751(PP->Z);
+	}
+}
+
+void oqs_sidh_cln16_generate_3_torsion_basis(oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_point_full_proj_t R1, oqs_sidh_cln16_point_full_proj_t R2, PCurveIsogenyStruct CurveIsogeny) { // Produces points R1 and R2 such that {R1, R2} is a basis for E[3^239].
+	                                                                                                                                                                                 // Input:   curve constant A.
+	                                                                                                                                                                                 // Outputs: R1 = (X1:Y1:Z1) and R2 = (X2:Y2:Z2).
+	oqs_sidh_cln16_point_proj_t R, R3, R4;
+	oqs_sidh_cln16_felm_t *X = (oqs_sidh_cln16_felm_t *) R->X, *Z = (oqs_sidh_cln16_felm_t *) R->Z;
+	oqs_sidh_cln16_felm_t *X3 = (oqs_sidh_cln16_felm_t *) R3->X, *Z3 = (oqs_sidh_cln16_felm_t *) R3->Z;
+	oqs_sidh_cln16_felm_t *X4 = (oqs_sidh_cln16_felm_t *) R4->X, *Z4 = (oqs_sidh_cln16_felm_t *) R4->Z;
+	oqs_sidh_cln16_felm_t *X1 = (oqs_sidh_cln16_felm_t *) R1->X, *Y1 = (oqs_sidh_cln16_felm_t *) R1->Y, *Z1 = (oqs_sidh_cln16_felm_t *) R1->Z;
+	oqs_sidh_cln16_felm_t *X2 = (oqs_sidh_cln16_felm_t *) R2->X, *Y2 = (oqs_sidh_cln16_felm_t *) R2->Y, *Z2 = (oqs_sidh_cln16_felm_t *) R2->Z;
+	oqs_sidh_cln16_f2elm_t u, v, c, f, t0, f0, fX, fY, Y, Y3, one = {0};
+	oqs_sidh_cln16_felm_t zero = {0};
+	unsigned int r = 1;
+	unsigned int triples = 0, pts_found = 0;
+
+	get_3_torsion_elt(A, &r, R, R3, &triples, CurveIsogeny);
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]);
+	oqs_sidh_cln16_fpzero751(zero);
+
+	if (triples == 239) {
+		pts_found = 1;
+		oqs_sidh_cln16_fp2copy751(X, X1);          // X1 = X
+		oqs_sidh_cln16_fp2copy751(Z, Z1);          // Z1 = Z
+		oqs_sidh_cln16_fp2mul751_mont(A, Z1, u);   // u = A*Z1
+		oqs_sidh_cln16_fp2add751(u, X1, u);        // u = u+X1
+		oqs_sidh_cln16_fp2mul751_mont(u, X1, u);   // u = u*X1
+		oqs_sidh_cln16_fp2sqr751_mont(Z1, v);      // v = Z1^2
+		oqs_sidh_cln16_fp2add751(u, v, u);         // u = u+v
+		oqs_sidh_cln16_fp2mul751_mont(u, X1, u);   // u = u*X1
+		oqs_sidh_cln16_fp2mul751_mont(v, Z1, v);   // v = v*Z1
+		oqs_sidh_cln16_sqrt_Fp2_frac(u, v, Y1);    // Y1 = sqrt(u/v)
+		oqs_sidh_cln16_fp2mul751_mont(Y1, Z1, Y1); // Y1 = Y1*Z1
+	}
+
+	oqs_sidh_cln16_fp2mul751_mont(A, Z3, u);   // u = A*Z3
+	oqs_sidh_cln16_fp2add751(u, X3, u);        // u = u+X3
+	oqs_sidh_cln16_fp2mul751_mont(u, X3, u);   // u = u*X3
+	oqs_sidh_cln16_fp2sqr751_mont(Z3, v);      // v = Z3^2
+	oqs_sidh_cln16_fp2add751(u, v, u);         // u = u+v
+	oqs_sidh_cln16_fp2mul751_mont(u, X3, u);   // u = u*X3
+	oqs_sidh_cln16_fp2mul751_mont(v, Z3, v);   // v = v*Z3
+	oqs_sidh_cln16_sqrt_Fp2_frac(u, v, Y3);    // Y3 = sqrt(u/v)
+	oqs_sidh_cln16_fp2mul751_mont(Y3, Z3, Y3); // Y3 = Y3*Z3
+	oqs_sidh_cln16_fp2sqr751_mont(X3, f0);     // f0 = X3^2
+	oqs_sidh_cln16_fp2sqr751_mont(Z3, t0);     // t0 = Z3^2
+	oqs_sidh_cln16_fp2mul751_mont(X3, Z3, fX); // fX = X3*Z3
+	oqs_sidh_cln16_fp2mul751_mont(A, fX, fX);  // fX = A*fX
+	oqs_sidh_cln16_fp2add751(fX, fX, fX);      // fX = fX+fX
+	oqs_sidh_cln16_fp2add751(fX, t0, fX);      // fX = fX+t0
+	oqs_sidh_cln16_fp2add751(fX, f0, fX);      // fX = fX+f0
+	oqs_sidh_cln16_fp2add751(fX, f0, fX);      // fX = fX+f0
+	oqs_sidh_cln16_fp2add751(fX, f0, fX);      // fX = fX+f0
+	oqs_sidh_cln16_fp2sub751(t0, f0, f0);      // f0 = t0-f0
+	oqs_sidh_cln16_fp2mul751_mont(fX, Z3, fX); // fX = fX*Z3
+	oqs_sidh_cln16_fp2mul751_mont(Y3, Z3, fY); // fY = Y3*Z3
+	oqs_sidh_cln16_fp2add751(fY, fY, fY);      // fY = fY+fY
+	oqs_sidh_cln16_fp2neg751(fY);              // fY = -fY
+	oqs_sidh_cln16_fp2add751(fY, fY, c);       // c = fY+fY
+	oqs_sidh_cln16_fp2mul751_mont(fY, Z3, fY); // fY = fY*Z3
+	oqs_sidh_cln16_fp2mul751_mont(f0, X3, f0); // f0 = f0*X3
+	oqs_sidh_cln16_fp2mul751_mont(c, Y3, c);   // c = c*Y3
+	oqs_sidh_cln16_fp2mul751_mont(fX, c, fX);  // fX = c*fX
+	oqs_sidh_cln16_fp2mul751_mont(fY, c, fY);  // fY = c*fY
+	oqs_sidh_cln16_fp2mul751_mont(f0, c, f0);  // f0 = c*f0
+
+	do {
+		while (pts_found < 2) {
+			r++;
+			get_pt_on_curve(A, &r, X, Y);
+			oqs_sidh_cln16_fp2mul751_mont(fX, X, f);  // f = fX*X
+			oqs_sidh_cln16_fp2mul751_mont(fY, Y, t0); // t0 = fY*Y
+			oqs_sidh_cln16_fp2add751(f, t0, f);       // f = f+t0
+			oqs_sidh_cln16_fp2add751(f, f0, f);       // f = f+f0
+
+			if (oqs_sidh_cln16_is_cube_Fp2(f, CurveIsogeny) == false) {
+				oqs_sidh_cln16_fp2copy751(one, Z); // Z = 1
+				oqs_sidh_cln16_xDBLe(R, R, A, one, 372);
+				oqs_sidh_cln16_fp2mul751_mont(A, Z, u); // u = A*Z
+				oqs_sidh_cln16_fp2add751(u, X, u);      // u = u+X
+				oqs_sidh_cln16_fp2mul751_mont(u, X, u); // u = u*X
+				oqs_sidh_cln16_fp2sqr751_mont(Z, v);    // v = Z^2
+				oqs_sidh_cln16_fp2add751(u, v, u);      // u = u+v
+				oqs_sidh_cln16_fp2mul751_mont(u, X, u); // u = u*X
+				oqs_sidh_cln16_fp2mul751_mont(v, Z, v); // v = v*Z
+				oqs_sidh_cln16_sqrt_Fp2_frac(u, v, Y);  // Y = sqrt(u/v)
+				oqs_sidh_cln16_fp2mul751_mont(Y, Z, Y); // Y = Y*Z
+
+				if (pts_found == 0) {
+					oqs_sidh_cln16_fp2copy751(X, X1); // X1 = X
+					oqs_sidh_cln16_fp2copy751(Y, Y1); // Y1 = Y
+					oqs_sidh_cln16_fp2copy751(Z, Z1); // Z1 = Z
+					oqs_sidh_cln16_xTPLe(R, R3, A, one, 238);
+				} else {
+					oqs_sidh_cln16_fp2copy751(X, X2); // X2 = X
+					oqs_sidh_cln16_fp2copy751(Y, Y2); // Y2 = Y
+					oqs_sidh_cln16_fp2copy751(Z, Z2); // Z2 = Z
+					oqs_sidh_cln16_xTPLe(R, R4, A, one, 238);
+				}
+				pts_found++;
+			}
+		}
+		oqs_sidh_cln16_fp2mul751_mont(X3, Z4, t0);
+		oqs_sidh_cln16_fp2mul751_mont(X4, Z3, v);
+		oqs_sidh_cln16_fp2sub751(t0, v, t0);
+		oqs_sidh_cln16_fp2correction751(t0);
+		pts_found--;
+	} while (oqs_sidh_cln16_fpequal751_non_constant_time(t0[0], zero) == true && oqs_sidh_cln16_fpequal751_non_constant_time(t0[1], zero) == true);
+}
+
+static void dbl_and_line(const oqs_sidh_cln16_point_ext_proj_t P, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t lx, oqs_sidh_cln16_f2elm_t ly, oqs_sidh_cln16_f2elm_t l0, oqs_sidh_cln16_f2elm_t v0) { // Doubling step for computing the Tate pairing using Miller's algorithm.
+	                                                                                                                                                                                                            // This function computes a point doubling of P and returns the corresponding line coefficients for the pairing doubling step.
+	oqs_sidh_cln16_felm_t *X2 = (oqs_sidh_cln16_felm_t *) P->X2, *XZ = (oqs_sidh_cln16_felm_t *) P->XZ, *YZ = (oqs_sidh_cln16_felm_t *) P->YZ, *Z2 = (oqs_sidh_cln16_felm_t *) P->Z2;
+	oqs_sidh_cln16_f2elm_t XX2, t0;
+
+	oqs_sidh_cln16_fp2add751(YZ, YZ, XX2);      //X2_: = YZ + YZ;
+	oqs_sidh_cln16_fp2sqr751_mont(XX2, ly);     //ly: = X2_ ^ 2;
+	oqs_sidh_cln16_fp2sub751(X2, Z2, l0);       //l0: = X2 - Z2;
+	oqs_sidh_cln16_fp2sqr751_mont(l0, v0);      //v0: = l0 ^ 2;
+	oqs_sidh_cln16_fp2mul751_mont(XX2, l0, l0); //l0: = X2_*l0;
+	oqs_sidh_cln16_fp2mul751_mont(XZ, l0, lx);  //lx: = XZ*l0;
+	oqs_sidh_cln16_fp2mul751_mont(YZ, ly, XX2); //X2_: = YZ*ly;
+	oqs_sidh_cln16_fp2add751(XX2, lx, lx);      //lx: = X2_ + lx;
+	oqs_sidh_cln16_fp2add751(X2, Z2, YZ);       //YZ: = X2 + Z2;
+	oqs_sidh_cln16_fp2mul751_mont(A, YZ, YZ);   //YZ: = A*YZ;
+	oqs_sidh_cln16_fp2add751(XZ, XZ, XX2);      //X2_: = XZ + XZ;
+	oqs_sidh_cln16_fp2add751(XX2, YZ, YZ);      //YZ: = X2_ + YZ;
+	oqs_sidh_cln16_fp2add751(XX2, YZ, YZ);      //YZ_: = X2_ + YZ_;
+	oqs_sidh_cln16_fp2mul751_mont(XX2, YZ, YZ); //YZ_: = X2_*YZ_;
+
+	oqs_sidh_cln16_fp2sqr751_mont(v0, XX2);    //X2_: = v0 ^ 2;
+	oqs_sidh_cln16_fp2sqr751_mont(l0, t0);     //XZ_: = l0 ^ 2;
+	oqs_sidh_cln16_fp2sqr751_mont(ly, Z2);     //Z2: = ly ^ 2;
+	oqs_sidh_cln16_fp2add751(v0, YZ, YZ);      //YZ: = v0 + YZ;
+	oqs_sidh_cln16_fp2mul751_mont(l0, YZ, YZ); //YZ: = l0*Y_;
+
+	oqs_sidh_cln16_fp2mul751_mont(XZ, ly, ly); //ly: = XZ*ly;
+	oqs_sidh_cln16_fp2mul751_mont(X2, l0, l0); //l0: = X2*l0;
+	oqs_sidh_cln16_fp2mul751_mont(XZ, v0, v0); //v0: = XZ*v0;
+
+	oqs_sidh_cln16_fp2copy751(XX2, X2);
+	oqs_sidh_cln16_fp2copy751(t0, XZ);
+}
+
+static void absorb_line(const oqs_sidh_cln16_f2elm_t lx, const oqs_sidh_cln16_f2elm_t ly, const oqs_sidh_cln16_f2elm_t l0, const oqs_sidh_cln16_f2elm_t v0, const oqs_sidh_cln16_point_t P, oqs_sidh_cln16_f2elm_t n, oqs_sidh_cln16_f2elm_t d) { // Absorbing line function values during Miller's algorithm.
+	                                                                                                                                                                                                                                              // Evaluate the line functions at the point P and multiply values into the running value n/d of the pairing value, keeping numerator n
+	                                                                                                                                                                                                                                              // and denominator d separate.
+	oqs_sidh_cln16_felm_t *x = (oqs_sidh_cln16_felm_t *) P->x, *y = (oqs_sidh_cln16_felm_t *) P->y;
+	oqs_sidh_cln16_f2elm_t l, v;
+
+	oqs_sidh_cln16_fp2mul751_mont(lx, x, l); // l = lx*x
+	oqs_sidh_cln16_fp2mul751_mont(ly, y, v); // v = ly*y
+	oqs_sidh_cln16_fp2sub751(v, l, l);       // l = v-l
+	oqs_sidh_cln16_fp2add751(l0, l, l);      // l = l+l0
+	oqs_sidh_cln16_fp2mul751_mont(ly, x, v); // v = ly*x
+	oqs_sidh_cln16_fp2sub751(v, v0, v);      // v = v+v0
+	oqs_sidh_cln16_fp2mul751_mont(n, l, n);  // n = n*l
+	oqs_sidh_cln16_fp2mul751_mont(d, v, d);  // d = d*v
+}
+
+static void square_and_absorb_line(const oqs_sidh_cln16_f2elm_t lx, const oqs_sidh_cln16_f2elm_t ly, const oqs_sidh_cln16_f2elm_t l0, const oqs_sidh_cln16_f2elm_t v0, const oqs_sidh_cln16_point_t P, oqs_sidh_cln16_f2elm_t n, oqs_sidh_cln16_f2elm_t d) { // Square the running pairing value in Miller's algorithm and absorb line function values of the current Miller step.
+	oqs_sidh_cln16_fp2sqr751_mont(n, n);                                                                                                                                                                                                                     // n = n^2
+	oqs_sidh_cln16_fp2sqr751_mont(d, d);                                                                                                                                                                                                                     // d = d^2
+	absorb_line(lx, ly, l0, v0, P, n, d);
+}
+
+static void final_dbl_iteration(const oqs_sidh_cln16_point_ext_proj_t P, const oqs_sidh_cln16_f2elm_t x, oqs_sidh_cln16_f2elm_t n, oqs_sidh_cln16_f2elm_t d) { // Special iteration for the final doubling step in Miller's algorithm. This is necessary since the doubling
+	                                                                                                                                                           // at the end of the Miller loop is an exceptional case (doubling a point of order 2).
+	oqs_sidh_cln16_felm_t *X = (oqs_sidh_cln16_felm_t *) P->XZ, *Z = (oqs_sidh_cln16_felm_t *) P->Z2;
+	oqs_sidh_cln16_f2elm_t l;
+
+	oqs_sidh_cln16_fp2sqr751_mont(n, n);    // n = n^2
+	oqs_sidh_cln16_fp2sqr751_mont(d, d);    // d = d^2
+	oqs_sidh_cln16_fp2mul751_mont(Z, d, d); // d = d*Z
+	oqs_sidh_cln16_fp2mul751_mont(Z, x, l); // l = Z*x
+	oqs_sidh_cln16_fp2sub751(l, X, l);      // l = l-X
+	oqs_sidh_cln16_fp2mul751_mont(n, l, n); // n = n*l
+}
+
+static void final_exponentiation_2_torsion(oqs_sidh_cln16_f2elm_t n, oqs_sidh_cln16_f2elm_t d, const oqs_sidh_cln16_f2elm_t n_inv, const oqs_sidh_cln16_f2elm_t d_inv, oqs_sidh_cln16_f2elm_t nout, PCurveIsogenyStruct CurveIsogeny) { // The final exponentiation for pairings in the 2-torsion group. Raising the value n/d to the power (p^2-1)/2^eA.
+	oqs_sidh_cln16_felm_t one = {0};
+	unsigned int i;
+
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one);
+	oqs_sidh_cln16_fp2mul751_mont(n, d_inv, n); // n = n*d_inv
+	//n = n^p, just call conjugation function
+	oqs_sidh_cln16_inv_Fp2_cycl(n);
+	oqs_sidh_cln16_fp2mul751_mont(d, n_inv, d); // d = d*n_inv
+	oqs_sidh_cln16_fp2mul751_mont(n, d, n);     // n = n*d
+
+	for (i = 0; i < 239; i++) {
+		oqs_sidh_cln16_cube_Fp2_cycl(n, one);
+	}
+	oqs_sidh_cln16_fp2copy751(n, nout);
+}
+
+void oqs_sidh_cln16_Tate_pairings_2_torsion(const oqs_sidh_cln16_point_t R1, const oqs_sidh_cln16_point_t R2, const oqs_sidh_cln16_point_t P, const oqs_sidh_cln16_point_t Q, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t *n, PCurveIsogenyStruct CurveIsogeny) { // The doubling only 2-torsion Tate pairing of order 2^eA, consisting of the doubling only Miller loop and the final exponentiation.]
+	                                                                                                                                                                                                                                                                         // Computes 5 pairings at once: e(R1, R2), e(R1, P), e(R1, Q), e(R2, P), e(R2,Q).
+	oqs_sidh_cln16_point_ext_proj_t P1 = {0}, P2 = {0};
+	oqs_sidh_cln16_f2elm_t lx1, ly1, l01, v01, lx2, ly2, l02, v02;
+	oqs_sidh_cln16_f2elm_t invs[10], nd[10] = {0};
+	oqs_sidh_cln16_felm_t one = {0};
+	unsigned int i;
+
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one);
+	oqs_sidh_cln16_fp2copy751(R1->x, P1->XZ);
+	oqs_sidh_cln16_fp2sqr751_mont(P1->XZ, P1->X2);
+	oqs_sidh_cln16_fp2copy751(R1->y, P1->YZ);
+	oqs_sidh_cln16_fpcopy751(one, P1->Z2[0]); // P1 = (x1^2,x1,1,y1)
+	oqs_sidh_cln16_fp2copy751(R2->x, P2->XZ);
+	oqs_sidh_cln16_fp2sqr751_mont(P2->XZ, P2->X2);
+	oqs_sidh_cln16_fp2copy751(R2->y, P2->YZ);
+	oqs_sidh_cln16_fpcopy751(one, P2->Z2[0]); // P2 = (x2^2,x2,1,y2)
+
+	for (i = 0; i < 10; i++) { // nd[i] = 1
+		oqs_sidh_cln16_fpcopy751(one, nd[i][0]);
+	}
+
+	for (i = 0; i < 371; i++) {
+		dbl_and_line(P1, A, lx1, ly1, l01, v01); // vx = ly
+		dbl_and_line(P2, A, lx2, ly2, l02, v02); // vx = ly
+		square_and_absorb_line(lx1, ly1, l01, v01, R2, nd[0], nd[5]);
+		square_and_absorb_line(lx1, ly1, l01, v01, P, nd[1], nd[6]);
+		square_and_absorb_line(lx1, ly1, l01, v01, Q, nd[2], nd[7]);
+		square_and_absorb_line(lx2, ly2, l02, v02, P, nd[3], nd[8]);
+		square_and_absorb_line(lx2, ly2, l02, v02, Q, nd[4], nd[9]);
+	}
+
+	final_dbl_iteration(P1, R2->x, nd[0], nd[5]);
+	final_dbl_iteration(P1, P->x, nd[1], nd[6]);
+	final_dbl_iteration(P1, Q->x, nd[2], nd[7]);
+	final_dbl_iteration(P2, P->x, nd[3], nd[8]);
+	final_dbl_iteration(P2, Q->x, nd[4], nd[9]);
+	oqs_sidh_cln16_mont_n_way_inv(nd, 10, invs);
+	final_exponentiation_2_torsion(nd[0], nd[5], invs[0], invs[5], n[0], CurveIsogeny);
+	final_exponentiation_2_torsion(nd[1], nd[6], invs[1], invs[6], n[1], CurveIsogeny);
+	final_exponentiation_2_torsion(nd[2], nd[7], invs[2], invs[7], n[2], CurveIsogeny);
+	final_exponentiation_2_torsion(nd[3], nd[8], invs[3], invs[8], n[3], CurveIsogeny);
+	final_exponentiation_2_torsion(nd[4], nd[9], invs[4], invs[9], n[4], CurveIsogeny);
+}
+
+static void tpl_and_parabola(oqs_sidh_cln16_point_ext_proj_t P, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t ly, oqs_sidh_cln16_f2elm_t lx2, oqs_sidh_cln16_f2elm_t lx1, oqs_sidh_cln16_f2elm_t lx0, oqs_sidh_cln16_f2elm_t vx, oqs_sidh_cln16_f2elm_t v0) { // Tripling step for computing the Tate pairing using Miller's algorithm.
+	                                                                                                                                                                                                                                                                   // This function computes a point tripling of P and returns the coefficients of the corresponding parabola.
+	oqs_sidh_cln16_felm_t *X2 = (oqs_sidh_cln16_felm_t *) P->X2, *XZ = (oqs_sidh_cln16_felm_t *) P->XZ, *YZ = (oqs_sidh_cln16_felm_t *) P->YZ, *Z2 = (oqs_sidh_cln16_felm_t *) P->Z2;
+	oqs_sidh_cln16_f2elm_t AXZ, t0, t1, t2, t3, t4, tlx0, tlx1, tlx2;
+
+	oqs_sidh_cln16_fp2add751(YZ, YZ, ly);           //ly: = YZ + YZ
+	oqs_sidh_cln16_fp2sqr751_mont(ly, tlx2);        //lx2: = ly ^ 2
+	oqs_sidh_cln16_fp2mul751_mont(ly, tlx2, ly);    //ly: = ly*lx2
+	oqs_sidh_cln16_fp2mul751_mont(A, XZ, AXZ);      //AXZ: = A*XZ
+	oqs_sidh_cln16_fp2add751(AXZ, Z2, t0);          //t0: = AXZ + Z2
+	oqs_sidh_cln16_fp2add751(t0, t0, t0);           //t0: = t0 + t0
+	oqs_sidh_cln16_fp2add751(X2, Z2, t1);           //t1: = X2 + Z2
+	oqs_sidh_cln16_fp2add751(X2, X2, t2);           //t2: = X2 + X2
+	oqs_sidh_cln16_fp2sub751(X2, Z2, t3);           //t3: = X2 - Z2
+	oqs_sidh_cln16_fp2sqr751_mont(t3, t3);          //t3: = t3 ^ 2
+	oqs_sidh_cln16_fp2add751(t2, t0, t4);           //t4: = t2 + t0
+	oqs_sidh_cln16_fp2mul751_mont(t2, t4, tlx2);    //lx2: = t2*t4
+	oqs_sidh_cln16_fp2sub751(tlx2, t3, tlx2);       //lx2: = lx2 - t3
+	oqs_sidh_cln16_fp2add751(t4, t1, tlx1);         //lx1: = t4 + t1
+	oqs_sidh_cln16_fp2sqr751_mont(t1, t1);          //t1: = t1 ^ 2
+	oqs_sidh_cln16_fp2mul751_mont(AXZ, tlx1, tlx1); //lx1: = AXZ*lx1
+	oqs_sidh_cln16_fp2add751(t1, tlx1, tlx1);       //lx1: = t1 + lx1
+	oqs_sidh_cln16_fp2add751(tlx1, tlx1, tlx1);     //lx1: = lx1 + lx1
+	oqs_sidh_cln16_fp2add751(t3, tlx1, tlx1);       //lx1: = t3 + lx1
+	oqs_sidh_cln16_fp2mul751_mont(Z2, t0, tlx0);    //lx0: = Z2*t0
+	oqs_sidh_cln16_fp2sub751(t3, tlx0, tlx0);       //lx0: = t3 - lx0
+	oqs_sidh_cln16_fp2add751(tlx0, tlx0, tlx0);     //lx0: = lx0 + lx0
+	oqs_sidh_cln16_fp2sub751(t1, tlx0, tlx0);       //lx0: = t1 - lx0
+	oqs_sidh_cln16_fp2mul751_mont(Z2, tlx2, lx2);   //lx2_: = Z2*lx2
+	oqs_sidh_cln16_fp2mul751_mont(XZ, tlx1, lx1);   //lx1_: = XZ*lx1
+	oqs_sidh_cln16_fp2add751(lx1, lx1, lx1);        //lx1_: = lx1_ + lx1_
+	oqs_sidh_cln16_fp2mul751_mont(X2, tlx0, lx0);   //lx0_: = X2*lx0
+	                                                // lx2_, lx1_, lx0_ done
+	oqs_sidh_cln16_fp2sqr751_mont(tlx2, t3);        //t3: = lx2 ^ 2
+	oqs_sidh_cln16_fp2mul751_mont(ly, t3, t2);      //t2: = ly*t3
+	oqs_sidh_cln16_fp2sqr751_mont(tlx0, t4);        //t4: = lx0 ^ 2
+	oqs_sidh_cln16_fp2sqr751_mont(t4, t0);          //t0: = t4 ^ 2
+	oqs_sidh_cln16_fp2mul751_mont(X2, t0, t0);      //t0: = X2*t0
+	oqs_sidh_cln16_fp2mul751_mont(ly, t0, X2);      //X2_: = ly*t0
+	oqs_sidh_cln16_fp2mul751_mont(XZ, t2, XZ);      //XZ_: = XZ*t2
+	oqs_sidh_cln16_fp2mul751_mont(XZ, t4, XZ);      //XZ_: = XZ_*t4
+	oqs_sidh_cln16_fp2mul751_mont(Z2, t2, Z2);      //Z2_: = Z2*t2
+	oqs_sidh_cln16_fp2mul751_mont(Z2, t3, Z2);      //Z2_: = Z2_*t3
+	oqs_sidh_cln16_fp2mul751_mont(tlx0, tlx1, t2);  //t2: = lx0*lx1
+	oqs_sidh_cln16_fp2add751(t2, t2, YZ);           //YZ_: = t2 + t2
+	oqs_sidh_cln16_fp2add751(YZ, t3, YZ);           //YZ_: = YZ_ + t3
+	oqs_sidh_cln16_fp2mul751_mont(lx0, tlx2, t2);   //t2: = lx0_*lx2
+	oqs_sidh_cln16_fp2mul751_mont(t2, YZ, YZ);      //YZ_: = t2*YZ_
+	oqs_sidh_cln16_fp2add751(t0, YZ, YZ);           //YZ_: = t0 + YZ_
+	oqs_sidh_cln16_fp2mul751_mont(lx2, YZ, YZ);     //YZ_: = lx2_*YZ_
+	oqs_sidh_cln16_fp2neg751(YZ);                   //YZ_: = -YZ_
+	                                                // X2_,XZ_,Z2_,YZ_ done
+	oqs_sidh_cln16_fp2copy751(Z2, vx);              //vx: = Z2_
+	oqs_sidh_cln16_fp2copy751(XZ, v0);              //v0: = -XZ_
+	oqs_sidh_cln16_fp2neg751(v0);
+	// vx,v0 done
+}
+
+static void absorb_parab(const oqs_sidh_cln16_f2elm_t ly, const oqs_sidh_cln16_f2elm_t lx2, const oqs_sidh_cln16_f2elm_t lx1, const oqs_sidh_cln16_f2elm_t lx0, const oqs_sidh_cln16_f2elm_t vx, const oqs_sidh_cln16_f2elm_t v0, const oqs_sidh_cln16_point_t P, oqs_sidh_cln16_f2elm_t n, oqs_sidh_cln16_f2elm_t d) { // Absorbing parabola function values in Miller's algorithm.
+	                                                                                                                                                                                                                                                                                                                    // Evaluate the parabola at P and absorb the values into the running pairing value n/d, keeping numerator n and denominator d separate.
+	oqs_sidh_cln16_felm_t *x = (oqs_sidh_cln16_felm_t *) P->x, *y = (oqs_sidh_cln16_felm_t *) P->y;
+	oqs_sidh_cln16_f2elm_t ln, ld;
+
+	oqs_sidh_cln16_fp2mul751_mont(lx0, x, ln); // ln = lx0*x
+	oqs_sidh_cln16_fp2mul751_mont(v0, x, ld);  // ld = v0*x
+	oqs_sidh_cln16_fp2add751(vx, ld, ld);      // ld = vx + ld
+	oqs_sidh_cln16_fp2mul751_mont(ld, ln, ld); // ld = ld*ln
+	oqs_sidh_cln16_fp2mul751_mont(d, ld, d);   // d = d*ld
+
+	oqs_sidh_cln16_fp2add751(lx1, ln, ln);     // ln = lx1 + ln
+	oqs_sidh_cln16_fp2mul751_mont(x, ln, ln);  // ln = x*ln
+	oqs_sidh_cln16_fp2mul751_mont(ly, y, ld);  // t = ly*y
+	oqs_sidh_cln16_fp2add751(lx2, ln, ln);     // ln = lx2 + ln
+	oqs_sidh_cln16_fp2add751(ld, ln, ln);      // ln = t + ln
+	oqs_sidh_cln16_fp2mul751_mont(ln, v0, ln); // ln = ln*v0
+	oqs_sidh_cln16_fp2mul751_mont(n, ln, n);   // n = n*ln
+}
+
+static void cube_and_absorb_parab(const oqs_sidh_cln16_f2elm_t ly, const oqs_sidh_cln16_f2elm_t lx2, const oqs_sidh_cln16_f2elm_t lx1, const oqs_sidh_cln16_f2elm_t lx0, const oqs_sidh_cln16_f2elm_t vx, const oqs_sidh_cln16_f2elm_t v0, const oqs_sidh_cln16_point_t P, oqs_sidh_cln16_f2elm_t n, oqs_sidh_cln16_f2elm_t d) { // Cube the running pairing value in Miller's algorithm and absorb parabola function values of the current Miller step.
+	oqs_sidh_cln16_f2elm_t ln, ld;
+
+	oqs_sidh_cln16_fp2sqr751_mont(n, ln);    // ln = n ^ 2
+	oqs_sidh_cln16_fp2mul751_mont(n, ln, n); // n = n*ln
+	oqs_sidh_cln16_fp2sqr751_mont(d, ld);    // ld = d ^ 2
+	oqs_sidh_cln16_fp2mul751_mont(d, ld, d); // d = d*ld
+	absorb_parab(ly, lx2, lx1, lx0, vx, v0, P, n, d);
+}
+
+static void final_tpl(oqs_sidh_cln16_point_ext_proj_t P, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t lam, oqs_sidh_cln16_f2elm_t mu, oqs_sidh_cln16_f2elm_t D) { // Special iteration for the final tripling step in Miller's algorithm. This is necessary since the tripling
+	                                                                                                                                                                        // at the end of the Miller loop is an exceptional case (tripling a point of order 3). Uses lines instead of
+	                                                                                                                                                                        // parabolas.
+	oqs_sidh_cln16_felm_t *X2 = (oqs_sidh_cln16_felm_t *) P->X2, *XZ = (oqs_sidh_cln16_felm_t *) P->XZ, *YZ = (oqs_sidh_cln16_felm_t *) P->YZ, *Z2 = (oqs_sidh_cln16_felm_t *) P->Z2;
+	oqs_sidh_cln16_f2elm_t X, Y, Z, Y2, tX2, AX2, tXZ, tAXZ;
+
+	oqs_sidh_cln16_fp2copy751(XZ, X);
+	oqs_sidh_cln16_fp2copy751(YZ, Y);
+	oqs_sidh_cln16_fp2copy751(Z2, Z);
+
+	oqs_sidh_cln16_fp2sqr751_mont(X, X2);        // X2 = X ^ 2
+	oqs_sidh_cln16_fp2add751(X2, X2, tX2);       // tX2 = X2 + X2
+	oqs_sidh_cln16_fp2mul751_mont(A, X2, AX2);   // AX2 = A*X2
+	oqs_sidh_cln16_fp2mul751_mont(X, Z, XZ);     // XZ = X*Z
+	oqs_sidh_cln16_fp2sqr751_mont(Y, Y2);        // Y2 = Y ^ 2
+	oqs_sidh_cln16_fp2add751(XZ, XZ, tXZ);       // tXZ = XZ + XZ
+	oqs_sidh_cln16_fp2mul751_mont(A, tXZ, tAXZ); // tAXZ = A*tXZ
+	oqs_sidh_cln16_fp2sqr751_mont(Z, Z2);        // Z2 = Z ^ 2
+	oqs_sidh_cln16_fp2mul751_mont(Y, Z, YZ);     // YZ = Y*Z
+
+	oqs_sidh_cln16_fp2add751(X2, Z2, lam);    // lambda = X2 + Z2
+	oqs_sidh_cln16_fp2add751(lam, tX2, lam);  // lambda = lambda + tX2
+	oqs_sidh_cln16_fp2add751(lam, tAXZ, lam); // lambda = lambda + tAXZ
+	oqs_sidh_cln16_fp2sub751(tXZ, Y2, mu);    // mu = tXZ - Y2
+	oqs_sidh_cln16_fp2add751(mu, AX2, mu);    // mu = mu + AX2
+	oqs_sidh_cln16_fp2add751(YZ, YZ, D);      // D = YZ + YZ
+}
+
+static void final_tpl_iteration(const oqs_sidh_cln16_f2elm_t x, const oqs_sidh_cln16_f2elm_t y, const oqs_sidh_cln16_f2elm_t lam, const oqs_sidh_cln16_f2elm_t mu, const oqs_sidh_cln16_f2elm_t D, oqs_sidh_cln16_f2elm_t n, oqs_sidh_cln16_f2elm_t d) { // Special iteration for the final tripling step in Miller's algorithm. This is necessary since the tripling
+	                                                                                                                                                                                                                                                     // at the end of the Miller loop is an exceptional case (tripling a point of order 3).
+	                                                                                                                                                                                                                                                     // Cubes the running pairing value n/d and absorbs the line function values.
+	oqs_sidh_cln16_f2elm_t ln, ld, t;
+
+	oqs_sidh_cln16_fp2sqr751_mont(n, ln);      // ln = n ^ 2
+	oqs_sidh_cln16_fp2mul751_mont(n, ln, n);   // n = n*ln
+	oqs_sidh_cln16_fp2sqr751_mont(d, ld);      // ld = d ^ 2
+	oqs_sidh_cln16_fp2mul751_mont(d, ld, d);   // d = d*ld
+	oqs_sidh_cln16_fp2sqr751_mont(x, ld);      // ld = x ^ 2
+	oqs_sidh_cln16_fp2mul751_mont(mu, ld, ld); // ld = mu*ld
+	oqs_sidh_cln16_fp2mul751_mont(lam, x, t);  // t = lambda*x
+	oqs_sidh_cln16_fp2add751(t, ld, ln);       // ln = t + ld
+	oqs_sidh_cln16_fp2mul751_mont(D, y, t);    // t = D*y
+	oqs_sidh_cln16_fp2add751(t, ln, ln);       // ln = t + ln
+	oqs_sidh_cln16_fp2mul751_mont(n, ln, n);   // n = n*ln
+	oqs_sidh_cln16_fp2mul751_mont(d, ld, d);   // d = d*ld
+}
+
+static void final_exponentiation_3_torsion(oqs_sidh_cln16_f2elm_t n, oqs_sidh_cln16_f2elm_t d, const oqs_sidh_cln16_f2elm_t n_inv, const oqs_sidh_cln16_f2elm_t d_inv, oqs_sidh_cln16_f2elm_t nout, PCurveIsogenyStruct CurveIsogeny) { // The final exponentiation for pairings in the 3-torsion group. Raising the value n/d to the power (p^2-1)/3^eB.
+	oqs_sidh_cln16_felm_t one = {0};
+	unsigned int i;
+
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one);
+	oqs_sidh_cln16_fp2mul751_mont(n, d_inv, n); // n = n*d_inv
+	                                            // n = n^p. Just call conjugation function
+	oqs_sidh_cln16_inv_Fp2_cycl(n);
+	oqs_sidh_cln16_fp2mul751_mont(d, n_inv, d); // d = d*n_inv
+	oqs_sidh_cln16_fp2mul751_mont(n, d, n);     // n = n*d
+
+	for (i = 0; i < 372; i++) {
+		oqs_sidh_cln16_sqr_Fp2_cycl(n, one);
+	}
+	oqs_sidh_cln16_fp2copy751(n, nout);
+}
+
+void oqs_sidh_cln16_Tate_pairings_3_torsion(const oqs_sidh_cln16_point_t R1, const oqs_sidh_cln16_point_t R2, const oqs_sidh_cln16_point_t P, const oqs_sidh_cln16_point_t Q, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t *n, PCurveIsogenyStruct CurveIsogeny) { // The tripling only 3-torsion Tate pairing of order 3^eB, consisting of the tripling only Miller loop and the final exponentiation.
+	                                                                                                                                                                                                                                                                         // Computes 5 pairings at once: e(R1, R2), e(R1, P), e(R1, Q), e(R2, P), e(R2,Q).
+	oqs_sidh_cln16_point_ext_proj_t P1 = {0}, P2 = {0};
+	oqs_sidh_cln16_f2elm_t ly, lx2, lx1, lx0, vx, v0, lam, mu, d;
+	oqs_sidh_cln16_f2elm_t invs[10], nd[10] = {0};
+	oqs_sidh_cln16_felm_t one = {0};
+	unsigned int i;
+
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one);
+	oqs_sidh_cln16_fp2copy751(R1->x, P1->XZ);
+	oqs_sidh_cln16_fp2sqr751_mont(P1->XZ, P1->X2);
+	oqs_sidh_cln16_fp2copy751(R1->y, P1->YZ);
+	oqs_sidh_cln16_fpcopy751(one, P1->Z2[0]); // P1 = (x1^2,x1,1,y1)
+	oqs_sidh_cln16_fp2copy751(R2->x, P2->XZ);
+	oqs_sidh_cln16_fp2sqr751_mont(P2->XZ, P2->X2);
+	oqs_sidh_cln16_fp2copy751(R2->y, P2->YZ);
+	oqs_sidh_cln16_fpcopy751(one, P2->Z2[0]); // P2 = (x2^2,x2,1,y2)
+
+	for (i = 0; i < 10; i++) { // nd[i] = 1
+		oqs_sidh_cln16_fpcopy751(one, nd[i][0]);
+	}
+
+	for (i = 239; i >= 2; i--) {
+		tpl_and_parabola(P1, A, ly, lx2, lx1, lx0, vx, v0);
+		cube_and_absorb_parab(ly, lx2, lx1, lx0, vx, v0, R2, nd[0], nd[5]);
+		cube_and_absorb_parab(ly, lx2, lx1, lx0, vx, v0, P, nd[1], nd[6]);
+		cube_and_absorb_parab(ly, lx2, lx1, lx0, vx, v0, Q, nd[2], nd[7]);
+		tpl_and_parabola(P2, A, ly, lx2, lx1, lx0, vx, v0);
+		cube_and_absorb_parab(ly, lx2, lx1, lx0, vx, v0, P, nd[3], nd[8]);
+		cube_and_absorb_parab(ly, lx2, lx1, lx0, vx, v0, Q, nd[4], nd[9]);
+	}
+
+	final_tpl(P1, A, lam, mu, d);
+	final_tpl_iteration(R2->x, R2->y, lam, mu, d, nd[0], nd[5]);
+	final_tpl_iteration(P->x, P->y, lam, mu, d, nd[1], nd[6]);
+	final_tpl_iteration(Q->x, Q->y, lam, mu, d, nd[2], nd[7]);
+	final_tpl(P2, A, lam, mu, d);
+	final_tpl_iteration(P->x, P->y, lam, mu, d, nd[3], nd[8]);
+	final_tpl_iteration(Q->x, Q->y, lam, mu, d, nd[4], nd[9]);
+
+	oqs_sidh_cln16_mont_n_way_inv(nd, 10, invs);
+	final_exponentiation_3_torsion(nd[0], nd[5], invs[0], invs[5], n[0], CurveIsogeny);
+	final_exponentiation_3_torsion(nd[1], nd[6], invs[1], invs[6], n[1], CurveIsogeny);
+	final_exponentiation_3_torsion(nd[2], nd[7], invs[2], invs[7], n[2], CurveIsogeny);
+	final_exponentiation_3_torsion(nd[3], nd[8], invs[3], invs[8], n[3], CurveIsogeny);
+	final_exponentiation_3_torsion(nd[4], nd[9], invs[4], invs[9], n[4], CurveIsogeny);
+}
+
+void oqs_sidh_cln16_phn1(const oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const uint64_t a, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_i) { // Pohlig-Hellman for groups of 2-power order up to 2^6
+	                                                                                                                                                                // This function solves the DLP in a subgroup of Fp2* of order 2^a, where a <= 6.
+	                                                                                                                                                                // The DL is returned in alpha which only needs a bits to store the result.
+	oqs_sidh_cln16_f2elm_t u, v, tmp;
+	oqs_sidh_cln16_felm_t zero = {0};
+	uint64_t l, h;
+
+	oqs_sidh_cln16_fp2copy751(q, u); // u = q
+	*alpha_i = 0;
+	for (l = 0; l < a - 1; l++) {
+		oqs_sidh_cln16_fp2copy751(u, v); // v = u
+		for (h = 1; h < (a - l); h++) {
+			oqs_sidh_cln16_sqr_Fp2_cycl(v, one);
+		}
+		oqs_sidh_cln16_fp2correction751(v);
+		if (oqs_sidh_cln16_fpequal751_non_constant_time(v[0], one) == false || oqs_sidh_cln16_fpequal751_non_constant_time(v[1], zero) == false) {
+			*alpha_i += ((uint64_t) 1 << l);
+			oqs_sidh_cln16_fp2copy751(LUT[6 - a + l], tmp); // tmp = LUT[6-a+l];
+			oqs_sidh_cln16_fp2mul751_mont(u, tmp, u);
+		}
+	}
+	oqs_sidh_cln16_fp2correction751(u);
+	if (oqs_sidh_cln16_fpequal751_non_constant_time(u[0], one) == false || oqs_sidh_cln16_fpequal751_non_constant_time(u[1], zero) == false) {
+		*alpha_i += ((uint64_t) 1 << (a - 1));
+	}
+}
+
+void oqs_sidh_cln16_phn5(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k) { // Pohlig-Hellman for groups of 2-power order 2^21
+	oqs_sidh_cln16_f2elm_t u, v, tmp;
+	oqs_sidh_cln16_felm_t zero = {0};
+	uint64_t alpha_i;
+	uint64_t i, j;
+
+	*alpha_k = 0;
+	oqs_sidh_cln16_fp2copy751(q, u);
+	for (i = 0; i < 4; i++) {
+		oqs_sidh_cln16_fp2copy751(u, v);
+		oqs_sidh_cln16_sqr_Fp2_cycl(v, one);
+		for (j = 0; j < (5 * (3 - i)); j++) {
+			oqs_sidh_cln16_sqr_Fp2_cycl(v, one);
+		}
+		oqs_sidh_cln16_phn1(v, LUT, 5, one, &alpha_i); // u order 2^5
+		*alpha_k += (alpha_i << (5 * i));
+		oqs_sidh_cln16_exp6_Fp2_cycl(LUT_1[i], alpha_i, one, tmp);
+		oqs_sidh_cln16_fp2mul751_mont(u, tmp, u);
+	}
+	oqs_sidh_cln16_fp2correction751(u);
+	// Do the last part
+	if (oqs_sidh_cln16_fpequal751_non_constant_time(u[0], one) == false || oqs_sidh_cln16_fpequal751_non_constant_time(u[1], zero) == false) { // q order 2
+		*alpha_k += ((uint64_t) 1 << 20);
+	}
+}
+
+void oqs_sidh_cln16_phn21(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k) { // Pohlig-Hellman for groups of 2-power order 2^84
+	oqs_sidh_cln16_f2elm_t u, v, tmp;
+	uint64_t alpha_i;
+	uint64_t i, j;
+
+	alpha_k[0] = 0;
+	alpha_k[1] = 0;
+	oqs_sidh_cln16_fp2copy751(q, u);
+	for (i = 0; i < 3; i++) {
+		oqs_sidh_cln16_fp2copy751(u, v);
+		for (j = 0; j < 21 * (3 - i); j++) {
+			oqs_sidh_cln16_sqr_Fp2_cycl(v, one);
+		}
+		oqs_sidh_cln16_phn5(v, LUT, LUT_1, one, &alpha_i); // u order 2^21
+		alpha_k[0] += (alpha_i << (21 * i));
+		oqs_sidh_cln16_exp21_Fp2_cycl(LUT_0[i], alpha_i, one, tmp);
+		oqs_sidh_cln16_fp2mul751_mont(u, tmp, u);
+	}
+	oqs_sidh_cln16_phn5(u, LUT, LUT_1, one, &alpha_i); // u order 2^21
+	alpha_k[0] += (alpha_i << 63);
+	alpha_k[1] = (alpha_i >> 1);
+}
+
+void oqs_sidh_cln16_phn84(oqs_sidh_cln16_f2elm_t r, const oqs_sidh_cln16_f2elm_t *t_ori, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_f2elm_t *LUT_3, const oqs_sidh_cln16_felm_t one, uint64_t *alpha) { // Pohlig-Hellman for groups of 2-power order 2^372
+	oqs_sidh_cln16_f2elm_t u, q, t, tmp;
+	uint64_t alpha_k[2], alpha_i, mask;
+	uint64_t i, j, k;
+
+	for (i = 0; i < SIDH_NWORDS64_ORDER; i++)
+		alpha[i] = 0;
+	oqs_sidh_cln16_fp2copy751(r, t);
+	for (k = 0; k < 4; k++) {
+		oqs_sidh_cln16_fp2copy751(t, q);
+		for (j = 0; j < 36; j++) {
+			oqs_sidh_cln16_sqr_Fp2_cycl(q, one);
+		}
+		for (j = 0; j < 84 * (3 - k); j++) {
+			oqs_sidh_cln16_sqr_Fp2_cycl(q, one);
+		}
+		oqs_sidh_cln16_phn21(q, LUT, LUT_0, LUT_1, one, alpha_k); // q order 2^84
+		alpha[k] += (alpha_k[0] << (k * 20));
+		mask = ((uint64_t) 1 << (k * 20)) - 1;
+		alpha[k + 1] += ((alpha_k[0] >> (64 - k * 20)) & mask);
+		alpha[k + 1] += (alpha_k[1] << (k * 20));
+		oqs_sidh_cln16_exp84_Fp2_cycl(t_ori[k], alpha_k, one, tmp);
+		oqs_sidh_cln16_fp2mul751_mont(t, tmp, t);
+	}
+	alpha[5] = (alpha_k[1] >> 4);
+	// Do the last part
+	for (i = 0; i < 6; i++) {
+		oqs_sidh_cln16_fp2copy751(t, u);
+		for (j = 0; j < 6 * (5 - i); j++) {
+			oqs_sidh_cln16_sqr_Fp2_cycl(u, one);
+		}
+		oqs_sidh_cln16_phn1(u, LUT, 6, one, &alpha_i); // u order 2^6
+		alpha[5] += (alpha_i << (16 + 6 * i));
+		oqs_sidh_cln16_exp6_Fp2_cycl(LUT_3[i], alpha_i, one, tmp);
+		oqs_sidh_cln16_fp2mul751_mont(t, tmp, t);
+	}
+}
+
+void oqs_sidh_cln16_build_LUTs(const oqs_sidh_cln16_f2elm_t g, oqs_sidh_cln16_f2elm_t *t_ori, oqs_sidh_cln16_f2elm_t *LUT, oqs_sidh_cln16_f2elm_t *LUT_0, oqs_sidh_cln16_f2elm_t *LUT_1, oqs_sidh_cln16_f2elm_t *LUT_3, const oqs_sidh_cln16_felm_t one) { // Lookup table generation for 2-torsion PH in a group of order 2^372
+	oqs_sidh_cln16_f2elm_t tmp;
+	unsigned int i, j;
+
+	oqs_sidh_cln16_fp2copy751(g, tmp); // tmp = g
+	oqs_sidh_cln16_inv_Fp2_cycl(tmp);
+	oqs_sidh_cln16_fp2copy751(tmp, t_ori[0]); // t_ori[0] = g^(-1), order 2^372
+	for (i = 0; i < 3; i++) {
+		for (j = 0; j < 84; j++)
+			oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one);
+		oqs_sidh_cln16_fp2copy751(tmp, t_ori[i + 1]); // order 2^288 & 2^204 & 2^120
+	}
+	for (i = 0; i < 36; i++)
+		oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one);
+	oqs_sidh_cln16_fp2copy751(tmp, t_ori[4]); // t_ori[4], order 2^84
+	                                          // t_ori done.
+	oqs_sidh_cln16_fp2copy751(tmp, LUT_0[0]); // LUT_0[0] = t_ori[4], order 2^84
+	for (i = 0; i < 2; i++) {
+		for (j = 0; j < 21; j++)
+			oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one);
+		oqs_sidh_cln16_fp2copy751(tmp, LUT_0[i + 1]); // order 2^63 & 2^42
+	}
+	for (j = 0; j < 6; j++)
+		oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one);
+	oqs_sidh_cln16_fp2copy751(tmp, LUT_3[0]); // LUT_3[0] = tmp, order 2^36
+	for (j = 0; j < 6; j++)
+		oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one);
+	oqs_sidh_cln16_fp2copy751(tmp, LUT_3[1]); // LUT_3[1] = tmp, order 2^30
+	for (j = 0; j < 6; j++)
+		oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one);
+	oqs_sidh_cln16_fp2copy751(tmp, LUT_3[2]); // LUT_3[2] = tmp, order 2^24
+	for (j = 0; j < 3; j++)
+		oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one);
+	oqs_sidh_cln16_fp2copy751(tmp, LUT_0[3]); // LUT_0[3] = tmp, order 2^21
+	                                          // LUT_0 done.
+	oqs_sidh_cln16_fp2copy751(tmp, LUT_1[0]); // LUT_1[0] = LUT_0[3], order 2^21
+	for (i = 0; i < 3; i++)
+		oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one);
+	oqs_sidh_cln16_fp2copy751(tmp, LUT_3[3]); // LUT_3[3] = tmp, order 2^18
+	for (i = 0; i < 2; i++)
+		oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one);
+	oqs_sidh_cln16_fp2copy751(tmp, LUT_1[1]); // LUT_1[1] = tmp, order 2^16
+	for (i = 0; i < 4; i++)
+		oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one);
+	oqs_sidh_cln16_fp2copy751(tmp, LUT_3[4]); // LUT_3[4] = tmp, order 2^12
+	oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one);
+	oqs_sidh_cln16_fp2copy751(tmp, LUT_1[2]); // LUT_1[2] = tmp, order 2^11
+	for (i = 0; i < 5; i++)
+		oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one);
+	oqs_sidh_cln16_fp2copy751(tmp, LUT_1[3]); // LUT_1[3] = tmp, order 2^16 & 2^11 & 2^6
+	oqs_sidh_cln16_fp2copy751(tmp, LUT_3[5]); // LUT_3[5] = tmp
+	                                          // LUT_1, LUT_3 done
+	oqs_sidh_cln16_fp2copy751(tmp, LUT[0]);   // LUT = LUT_3[5]
+	for (i = 0; i < 4; i++) {
+		oqs_sidh_cln16_fp2copy751(LUT[i], LUT[i + 1]);
+		oqs_sidh_cln16_sqr_Fp2_cycl(LUT[i + 1], one); // order 2^5 -- 2^1
+	}
+}
+
+void oqs_sidh_cln16_ph2(const oqs_sidh_cln16_point_t phiP, const oqs_sidh_cln16_point_t phiQ, const oqs_sidh_cln16_point_t PS, const oqs_sidh_cln16_point_t QS, const oqs_sidh_cln16_f2elm_t A, uint64_t *a0, uint64_t *b0, uint64_t *a1, uint64_t *b1, PCurveIsogenyStruct CurveIsogeny) { // Pohlig-Hellman function.
+	                                                                                                                                                                                                                                                                                        // This function computes the five pairings e(QS, PS), e(QS, phiP), e(QS, phiQ), e(PS, phiP), e(PS,phiQ),
+	                                                                                                                                                                                                                                                                                        // computes the lookup tables for the Pohlig-Hellman functions,
+	                                                                                                                                                                                                                                                                                        // and then computes the discrete logarithms of the last four pairing values to the base of the first pairing value.
+	oqs_sidh_cln16_f2elm_t t_ori[5], n[5], LUT[5], LUT_0[4], LUT_1[4], LUT_3[6];
+	oqs_sidh_cln16_felm_t one = {0};
+
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one);
+
+	// Compute the pairings.
+	oqs_sidh_cln16_Tate_pairings_2_torsion(QS, PS, phiP, phiQ, A, n, CurveIsogeny);
+
+	// Build the lookup tables from element n[0] of order 2^372.
+	oqs_sidh_cln16_build_LUTs(n[0], t_ori, LUT, LUT_0, LUT_1, LUT_3, one);
+
+	// Finish computation
+	oqs_sidh_cln16_phn84(n[1], t_ori, LUT, LUT_0, LUT_1, LUT_3, one, a0);
+	oqs_sidh_cln16_phn84(n[3], t_ori, LUT, LUT_0, LUT_1, LUT_3, one, b0);
+	oqs_sidh_cln16_mp_sub(CurveIsogeny->Aorder, (digit_t *) b0, (digit_t *) b0, SIDH_NWORDS_ORDER);
+	oqs_sidh_cln16_phn84(n[2], t_ori, LUT, LUT_0, LUT_1, LUT_3, one, a1);
+	oqs_sidh_cln16_phn84(n[4], t_ori, LUT, LUT_0, LUT_1, LUT_3, one, b1);
+	oqs_sidh_cln16_mp_sub(CurveIsogeny->Aorder, (digit_t *) b1, (digit_t *) b1, SIDH_NWORDS_ORDER);
+}
+
+static void recover_os(const oqs_sidh_cln16_f2elm_t X1, const oqs_sidh_cln16_f2elm_t Z1, const oqs_sidh_cln16_f2elm_t X2, const oqs_sidh_cln16_f2elm_t Z2, const oqs_sidh_cln16_f2elm_t x, const oqs_sidh_cln16_f2elm_t y, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t X3, oqs_sidh_cln16_f2elm_t Y3, oqs_sidh_cln16_f2elm_t Z3) {
+	oqs_sidh_cln16_f2elm_t t0, t1, t2, t3;
+
+	//X3 := 2*y*Z1*Z2*X1;
+	//Y3 := Z2*((X1+x*Z1+2*A*Z1)*(X1*x+Z1)-2*A*Z1^2)-(X1-x*Z1)^2*X2;
+	//Z3 := 2*y*Z1*Z2*Z1;
+
+	oqs_sidh_cln16_fp2add751(y, y, t0);
+	oqs_sidh_cln16_fp2mul751_mont(t0, Z1, t0);
+	oqs_sidh_cln16_fp2mul751_mont(t0, Z2, t0); // t0 = 2*y*Z1*Z2
+	oqs_sidh_cln16_fp2mul751_mont(t0, Z1, Z3); // Z3 = 2*y*Z1*Z2*Z1
+	oqs_sidh_cln16_fp2mul751_mont(t0, X1, X3); // X3 = 2*y*Z1*Z2*X1
+	oqs_sidh_cln16_fp2add751(A, A, t0);
+	oqs_sidh_cln16_fp2mul751_mont(t0, Z1, t0); // t0 = 2*A*Z1
+	oqs_sidh_cln16_fp2mul751_mont(x, Z1, t1);  // t1 = x*Z1
+	oqs_sidh_cln16_fp2add751(X1, t1, t2);      // t2 = X1+x*Z1
+	oqs_sidh_cln16_fp2sub751(X1, t1, t1);      // t1 = X1-x*Z1
+	oqs_sidh_cln16_fp2add751(t0, t2, t3);      // t3 = X1+x*Z1+2*A*Z1
+	oqs_sidh_cln16_fp2mul751_mont(t0, Z1, t0); // t0 = 2*A*Z1^2
+	oqs_sidh_cln16_fp2sqr751_mont(t1, t1);     // t1 = (X1-x*Z1)^2
+	oqs_sidh_cln16_fp2mul751_mont(x, X1, t2);  // t2 = x*X1
+	oqs_sidh_cln16_fp2add751(t2, Z1, t2);      // t2 = X1*x+Z1
+	oqs_sidh_cln16_fp2mul751_mont(t2, t3, t2); // t2 = (X1+x*Z1+2*A*Z1)*(X1*x+Z1)
+	oqs_sidh_cln16_fp2sub751(t2, t0, t0);      // t0 = (X1+x*Z1+2*A*Z1)*(X1*x+Z1)-2*A*Z1^2
+	oqs_sidh_cln16_fp2mul751_mont(t1, X2, t1); // t1 = (X1-x*Z1)^2*X2
+	oqs_sidh_cln16_fp2mul751_mont(t0, Z2, t0); // t0 = Z2*[(X1+x*Z1+2*A*Z1)*(X1*x+Z1)-2*A*Z1^2]
+	oqs_sidh_cln16_fp2sub751(t0, t1, Y3);      // Y3 = Z2*[(X1+x*Z1+2*A*Z1)*(X1*x+Z1)-2*A*Z1^2] - (X1-x*Z1)^2*X2
+}
+
+void oqs_sidh_cln16_recover_y(const oqs_sidh_cln16_publickey_t PK, oqs_sidh_cln16_point_full_proj_t phiP, oqs_sidh_cln16_point_full_proj_t phiQ, oqs_sidh_cln16_point_full_proj_t phiX, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny) { // Recover the y-coordinates of the public key
+	                                                                                                                                                                                                                                                  // The three resulting points are (simultaneously) correct up to sign
+	oqs_sidh_cln16_f2elm_t tmp, phiXY, one = {0};
+
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]);
+	oqs_sidh_cln16_get_A(PK[0], PK[1], PK[2], A, CurveIsogeny); // NOTE: don't have to compress this, can output in keygen
+
+	oqs_sidh_cln16_fp2add751(PK[2], A, tmp);
+	oqs_sidh_cln16_fp2mul751_mont(PK[2], tmp, tmp);
+	oqs_sidh_cln16_fp2add751(tmp, one, tmp);
+	oqs_sidh_cln16_fp2mul751_mont(PK[2], tmp, tmp); // tmp = PK[2]^3+A*PK[2]^2+PK[2];
+	oqs_sidh_cln16_sqrt_Fp2(tmp, phiXY);
+	oqs_sidh_cln16_fp2copy751(PK[2], phiX->X);
+	oqs_sidh_cln16_fp2copy751(phiXY, phiX->Y);
+	oqs_sidh_cln16_fp2copy751(one, phiX->Z); // phiX = [PK[2],phiXY,1];
+
+	recover_os(PK[1], one, PK[0], one, PK[2], phiXY, A, phiQ->X, phiQ->Y, phiQ->Z);
+	oqs_sidh_cln16_fp2neg751(phiXY);
+	recover_os(PK[0], one, PK[1], one, PK[2], phiXY, A, phiP->X, phiP->Y, phiP->Z);
+}
+
+void oqs_sidh_cln16_compress_2_torsion(const unsigned char *PublicKeyA, unsigned char *CompressedPKA, uint64_t *a0, uint64_t *b0, uint64_t *a1, uint64_t *b1, oqs_sidh_cln16_point_t R1, oqs_sidh_cln16_point_t R2, PCurveIsogenyStruct CurveIsogeny) { // 2-torsion compression
+	oqs_sidh_cln16_point_full_proj_t P, Q, phP, phQ, phX;
+	oqs_sidh_cln16_point_t phiP, phiQ;
+	oqs_sidh_cln16_publickey_t PK;
+	digit_t *comp = (digit_t *) CompressedPKA;
+	digit_t inv[SIDH_NWORDS_ORDER];
+	oqs_sidh_cln16_f2elm_t A, vec[4], Zinv[4];
+	digit_t tmp[2 * SIDH_NWORDS_ORDER];
+
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyA)[0], ((oqs_sidh_cln16_f2elm_t *) &PK)[0]); // Converting to Montgomery representation
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyA)[1], ((oqs_sidh_cln16_f2elm_t *) &PK)[1]);
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyA)[2], ((oqs_sidh_cln16_f2elm_t *) &PK)[2]);
+
+	oqs_sidh_cln16_recover_y(PK, phP, phQ, phX, A, CurveIsogeny);
+	oqs_sidh_cln16_generate_2_torsion_basis(A, P, Q, CurveIsogeny);
+	oqs_sidh_cln16_fp2copy751(P->Z, vec[0]);
+	oqs_sidh_cln16_fp2copy751(Q->Z, vec[1]);
+	oqs_sidh_cln16_fp2copy751(phP->Z, vec[2]);
+	oqs_sidh_cln16_fp2copy751(phQ->Z, vec[3]);
+	oqs_sidh_cln16_mont_n_way_inv(vec, 4, Zinv);
+
+	oqs_sidh_cln16_fp2mul751_mont(P->X, Zinv[0], R1->x);
+	oqs_sidh_cln16_fp2mul751_mont(P->Y, Zinv[0], R1->y);
+	oqs_sidh_cln16_fp2mul751_mont(Q->X, Zinv[1], R2->x);
+	oqs_sidh_cln16_fp2mul751_mont(Q->Y, Zinv[1], R2->y);
+	oqs_sidh_cln16_fp2mul751_mont(phP->X, Zinv[2], phiP->x);
+	oqs_sidh_cln16_fp2mul751_mont(phP->Y, Zinv[2], phiP->y);
+	oqs_sidh_cln16_fp2mul751_mont(phQ->X, Zinv[3], phiQ->x);
+	oqs_sidh_cln16_fp2mul751_mont(phQ->Y, Zinv[3], phiQ->y);
+
+	oqs_sidh_cln16_ph2(phiP, phiQ, R1, R2, A, a0, b0, a1, b1, CurveIsogeny);
+
+	if ((a0[0] & 1) == 1) { // Storing [b1*a0inv, a1*a0inv, b0*a0inv] and setting bit384 to 0
+		oqs_sidh_cln16_inv_mod_orderA((digit_t *) a0, inv);
+		oqs_sidh_cln16_multiply((digit_t *) b0, inv, tmp, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_copy_words(tmp, &comp[0], SIDH_NWORDS_ORDER);
+		comp[SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 12; // Hardcoded value
+		oqs_sidh_cln16_multiply((digit_t *) a1, inv, tmp, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_copy_words(tmp, &comp[SIDH_NWORDS_ORDER], SIDH_NWORDS_ORDER);
+		comp[2 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 12;
+		oqs_sidh_cln16_multiply((digit_t *) b1, inv, tmp, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_copy_words(tmp, &comp[2 * SIDH_NWORDS_ORDER], SIDH_NWORDS_ORDER);
+		comp[3 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 12;
+	} else { // Storing [b1*b0inv, a1*b0inv, a0*b0inv] and setting bit384 to 1
+		oqs_sidh_cln16_inv_mod_orderA((digit_t *) b0, inv);
+		oqs_sidh_cln16_multiply((digit_t *) a0, inv, tmp, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_copy_words(tmp, &comp[0], SIDH_NWORDS_ORDER);
+		comp[SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 12; // Hardcoded value
+		oqs_sidh_cln16_multiply((digit_t *) a1, inv, tmp, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_copy_words(tmp, &comp[SIDH_NWORDS_ORDER], SIDH_NWORDS_ORDER);
+		comp[2 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 12;
+		oqs_sidh_cln16_multiply((digit_t *) b1, inv, tmp, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_copy_words(tmp, &comp[2 * SIDH_NWORDS_ORDER], SIDH_NWORDS_ORDER);
+		comp[3 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 12;
+		comp[3 * SIDH_NWORDS_ORDER - 1] |= (digit_t) 1 << (sizeof(digit_t) * 8 - 1);
+	}
+
+	oqs_sidh_cln16_from_fp2mont(A, (oqs_sidh_cln16_felm_t *) &comp[3 * SIDH_NWORDS_ORDER]); // Converting back from Montgomery representation
+}
+
+void oqs_sidh_cln16_phn1_3(const oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const uint64_t a, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_i) {
+	oqs_sidh_cln16_f2elm_t u, v, tmp;
+	oqs_sidh_cln16_felm_t zero = {0};
+	uint64_t l, h;
+	// Hardcoded powers of 3, 3^0 = 1, 3^1 = 3, 3^2 = 9
+	uint64_t pow3[3] = {0x0000000000000001, 0x0000000000000003, 0x0000000000000009};
+
+	oqs_sidh_cln16_fp2copy751(q, u); // u = q
+	*alpha_i = 0;
+	for (l = 0; l < a - 1; l++) {
+		oqs_sidh_cln16_fp2copy751(u, v); // v = u
+		for (h = 1; h < (a - l); h++) {
+			oqs_sidh_cln16_cube_Fp2_cycl(v, one);
+		}
+		oqs_sidh_cln16_fp2correction751(v);
+		if (oqs_sidh_cln16_fpequal751_non_constant_time(v[0], LUT[3][0]) == true && oqs_sidh_cln16_fpequal751_non_constant_time(v[1], LUT[3][1]) == true) {
+			*alpha_i += pow3[l];
+			oqs_sidh_cln16_fp2copy751(LUT[3 - a + l], tmp); // tmp = LUT[3-a+l];
+			oqs_sidh_cln16_fp2mul751_mont(u, tmp, u);
+		} else if (oqs_sidh_cln16_fpequal751_non_constant_time(v[0], one) == false || oqs_sidh_cln16_fpequal751_non_constant_time(v[1], zero) == false) {
+			*alpha_i += pow3[l] << 1;
+			oqs_sidh_cln16_fp2copy751(LUT[3 - a + l], tmp); // tmp = LUT[3-a+l];
+			oqs_sidh_cln16_sqr_Fp2_cycl(tmp, one);
+			oqs_sidh_cln16_fp2mul751_mont(u, tmp, u);
+		}
+	}
+	oqs_sidh_cln16_fp2correction751(u);
+	if (oqs_sidh_cln16_fpequal751_non_constant_time(u[0], LUT[3][0]) == true && oqs_sidh_cln16_fpequal751_non_constant_time(u[1], LUT[3][1]) == true) {
+		*alpha_i += pow3[a - 1];
+	} else if (oqs_sidh_cln16_fpequal751_non_constant_time(u[0], one) == false || oqs_sidh_cln16_fpequal751_non_constant_time(u[1], zero) == false) {
+		*alpha_i += pow3[a - 1] << 1;
+	}
+}
+
+void oqs_sidh_cln16_phn3(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k) {
+	oqs_sidh_cln16_f2elm_t u, v, tmp;
+	uint64_t alpha_i;
+	uint64_t i, j;
+	// Powers of 3: 3^0 = 1, 3^3 = 27, 3^6 = 729, 3^9, 3^12
+	uint64_t pow3[5] = {0x0000000000000001, 0x000000000000001B,
+	                    0x00000000000002D9, 0x0000000000004CE3,
+	                    0x0000000000081BF1};
+
+	*alpha_k = 0;
+	oqs_sidh_cln16_fp2copy751(q, u);
+	for (i = 0; i < 4; i++) {
+		oqs_sidh_cln16_fp2copy751(u, v);
+		for (j = 0; j < 3 * (4 - i); j++) {
+			oqs_sidh_cln16_cube_Fp2_cycl(v, one);
+		}
+		oqs_sidh_cln16_phn1_3(v, LUT, 3, one, &alpha_i); // u order 3^3
+		*alpha_k += alpha_i * pow3[i];
+		oqs_sidh_cln16_exp6_Fp2_cycl(LUT_1[i], alpha_i, one, tmp);
+		oqs_sidh_cln16_fp2mul751_mont(u, tmp, u);
+	}
+	oqs_sidh_cln16_phn1_3(u, LUT, 3, one, &alpha_i); // u order 3^3
+	*alpha_k += alpha_i * pow3[4];
+}
+
+void oqs_sidh_cln16_phn15_1(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k) {
+	oqs_sidh_cln16_f2elm_t u, v, tmp;
+	uint64_t alpha_i, alpha_n[2], alpha_tmp[4]; // alpha_tmp[4] is overkill, only taking 4 since it is the result of a mp_mul with 2-word inputs.
+	uint64_t i, j;
+	// Powers of 3: 3^0 = 1, 3^15, 3^30
+	uint64_t pow3_15[3] = {0x0000000000000001, 0x0000000000DAF26B, 0x0000BB41C3CA78B9};
+	// Powers of 3: 3^0 = 1, 3^3 = 27, 3^6
+	uint64_t pow3_3[4] = {0x0000000000000001, 0x000000000000001B, 0x00000000000002D9, 0x0000000000004CE3};
+	// Powers of 3: 3^45 split up into two words.
+	uint64_t pow3_45[2] = {0x275329FD09495753, 0x00000000000000A0};
+
+	alpha_k[0] = 0;
+	alpha_k[1] = 0;
+	for (i = 0; i < 4; i++)
+		alpha_tmp[i] = 0;
+	oqs_sidh_cln16_fp2copy751(q, u);
+	for (i = 0; i < 3; i++) {
+		oqs_sidh_cln16_fp2copy751(u, v);
+		for (j = 0; j < 11; j++) {
+			oqs_sidh_cln16_cube_Fp2_cycl(v, one);
+		}
+		for (j = 0; j < 15 * (2 - i); j++) {
+			oqs_sidh_cln16_cube_Fp2_cycl(v, one);
+		}
+		oqs_sidh_cln16_phn3(v, LUT, LUT_1, one, &alpha_i); // v order 3^15
+		oqs_sidh_cln16_multiply((digit_t *) &alpha_i, (digit_t *) &pow3_15[i], (digit_t *) alpha_tmp, 64 / RADIX);
+		oqs_sidh_cln16_mp_add((digit_t *) alpha_k, (digit_t *) alpha_tmp, (digit_t *) alpha_k, 2 * 64 / RADIX);
+
+		oqs_sidh_cln16_fp2copy751(LUT_0[i], v);
+		for (j = 0; j < 5; j++) {
+			oqs_sidh_cln16_cube_Fp2_cycl(v, one);
+		}
+
+		oqs_sidh_cln16_exp_Fp2_cycl(v, &alpha_i, one, tmp, 24);
+		oqs_sidh_cln16_fp2mul751_mont(u, tmp, u);
+	}
+
+	// Do the last part
+	alpha_n[0] = 0;
+	alpha_n[1] = 0;
+	for (i = 0; i < 3; i++) {
+		oqs_sidh_cln16_fp2copy751(u, v);
+		for (j = 0; j < 2; j++) {
+			oqs_sidh_cln16_cube_Fp2_cycl(v, one);
+		}
+		for (j = 0; j < 3 * (2 - i); j++) {
+			oqs_sidh_cln16_cube_Fp2_cycl(v, one);
+		}
+		oqs_sidh_cln16_phn1_3(v, LUT, 3, one, &alpha_i); // v order 3^15
+		alpha_n[0] += alpha_i * pow3_3[i];
+
+		oqs_sidh_cln16_fp2copy751(LUT_1[i], v);
+		for (j = 0; j < 4; j++) {
+			oqs_sidh_cln16_cube_Fp2_cycl(v, one);
+		}
+		oqs_sidh_cln16_exp_Fp2_cycl(v, &alpha_i, one, tmp, 5);
+		oqs_sidh_cln16_fp2mul751_mont(u, tmp, u);
+	}
+
+	oqs_sidh_cln16_phn1_3(u, LUT, 2, one, &alpha_i);
+	alpha_n[0] += alpha_i * pow3_3[3];
+	oqs_sidh_cln16_multiply((digit_t *) alpha_n, (digit_t *) pow3_45, (digit_t *) alpha_tmp, 2 * 64 / RADIX); // Can be optimized because alpha_n is only single precision and pow3_45 is only slightly larger than 64 bits.
+	oqs_sidh_cln16_mp_add((digit_t *) alpha_k, (digit_t *) alpha_tmp, (digit_t *) alpha_k, 2 * 64 / RADIX);
+}
+
+void oqs_sidh_cln16_phn15(oqs_sidh_cln16_f2elm_t q, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha_k) {
+	oqs_sidh_cln16_felm_t zero = {0};
+	oqs_sidh_cln16_f2elm_t u, v, tmp;
+	uint64_t alpha_i, alpha_n[2], alpha_tmp[4];
+	uint64_t i, j;
+	// Powers of 3: 3^0 = 1, 3^15, 3^30
+	uint64_t pow3_15[3] = {0x0000000000000001, 0x0000000000DAF26B, 0x0000BB41C3CA78B9};
+	// Powers of 3: 3^45 split up into two words.
+	uint64_t pow3_45[2] = {0x275329FD09495753, 0x00000000000000A0};
+	// Powers of 3: 3^60 split up into two words.
+	uint64_t pow3_60[2] = {0xCEEDA7FE92E1F5B1, 0x0000000088F924EE};
+	uint64_t pow3_60_2[2] = {0x9DDB4FFD25C3EB62, 0x0000000111F249DD};
+
+	alpha_k[0] = 0;
+	alpha_k[1] = 0;
+	alpha_n[0] = 0;
+	alpha_n[1] = 0;
+	for (i = 0; i < 4; i++)
+		alpha_tmp[i] = 0;
+	oqs_sidh_cln16_fp2copy751(q, u);
+	for (i = 0; i < 3; i++) {
+		oqs_sidh_cln16_fp2copy751(u, v);
+		oqs_sidh_cln16_cube_Fp2_cycl(v, one);
+		for (j = 0; j < 15 * (3 - i); j++) {
+			oqs_sidh_cln16_cube_Fp2_cycl(v, one);
+		}
+		oqs_sidh_cln16_phn3(v, LUT, LUT_1, one, &alpha_i); // u order 3^15
+
+		oqs_sidh_cln16_multiply((digit_t *) &alpha_i, (digit_t *) &pow3_15[i], (digit_t *) alpha_tmp, 64 / RADIX);
+		oqs_sidh_cln16_mp_add((digit_t *) alpha_k, (digit_t *) alpha_tmp, (digit_t *) alpha_k, 2 * 64 / RADIX);
+
+		oqs_sidh_cln16_exp_Fp2_cycl(LUT_0[i], &alpha_i, one, tmp, 24);
+		oqs_sidh_cln16_fp2mul751_mont(u, tmp, u);
+	}
+
+	oqs_sidh_cln16_fp2copy751(u, v);
+	oqs_sidh_cln16_cube_Fp2_cycl(v, one);
+	oqs_sidh_cln16_phn3(v, LUT, LUT_1, one, &alpha_n[0]); // u order 3^15
+
+	oqs_sidh_cln16_multiply((digit_t *) alpha_n, (digit_t *) pow3_45, (digit_t *) alpha_tmp, 2 * 64 / RADIX);
+	oqs_sidh_cln16_mp_add((digit_t *) alpha_k, (digit_t *) alpha_tmp, (digit_t *) alpha_k, 2 * 64 / RADIX);
+
+	oqs_sidh_cln16_exp_Fp2_cycl(LUT_0[3], &alpha_n[0], one, tmp, 24);
+	oqs_sidh_cln16_fp2mul751_mont(u, tmp, u);
+	oqs_sidh_cln16_fp2correction751(u);
+	if (oqs_sidh_cln16_fpequal751_non_constant_time(u[0], LUT[3][0]) == true && oqs_sidh_cln16_fpequal751_non_constant_time(u[1], LUT[3][1]) == true) {
+		oqs_sidh_cln16_mp_add((digit_t *) alpha_k, (digit_t *) pow3_60, (digit_t *) alpha_k, 2 * 64 / RADIX);
+	} else if (oqs_sidh_cln16_fpequal751_non_constant_time(u[0], one) == false || oqs_sidh_cln16_fpequal751_non_constant_time(u[1], zero) == false) {
+		oqs_sidh_cln16_mp_add((digit_t *) alpha_k, (digit_t *) pow3_60_2, (digit_t *) alpha_k, 2 * 64 / RADIX);
+	}
+}
+
+void oqs_sidh_cln16_phn61(oqs_sidh_cln16_f2elm_t r, oqs_sidh_cln16_f2elm_t *t_ori, const oqs_sidh_cln16_f2elm_t *LUT, const oqs_sidh_cln16_f2elm_t *LUT_0, const oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one, uint64_t *alpha) {
+	oqs_sidh_cln16_f2elm_t u, v, tmp;
+	uint64_t alpha_k[5] = {0}, alpha_tmp[10] = {0};
+	uint64_t i, k;
+
+	uint64_t pow3_61[13] = {0x0000000000000001, 0x0000000000000000, // 3^0 = 1
+	                        0x6CC8F7FBB8A5E113, 0x000000019AEB6ECC, // 3^61
+	                        0x6878E44938606769, 0xD73A1059B8013933, // 3^(2*61)
+	                        0x9396F76B67B7C403, 0x0000000000000002,
+	                        0x25A79F6508B7F5CB, 0x05515FED4D025D6F, // 3^(3*61)
+	                        0x37E2AD6FF9936EA9, 0xB69B5308880B15B6,
+	                        0x0000000422BE6150};
+
+	for (i = 0; i < SIDH_NWORDS64_ORDER; i++)
+		alpha[i] = 0;
+
+	oqs_sidh_cln16_fp2copy751(r, u);
+	for (k = 0; k < 2; k++) {
+		oqs_sidh_cln16_fp2copy751(u, v);
+		for (i = 0; i < 56; i++) {
+			oqs_sidh_cln16_cube_Fp2_cycl(v, one);
+		}
+		for (i = 0; i < 61 * (2 - k); i++) {
+			oqs_sidh_cln16_cube_Fp2_cycl(v, one);
+		}
+		oqs_sidh_cln16_phn15(v, LUT, LUT_0, LUT_1, one, alpha_k); // q order 3^61
+		oqs_sidh_cln16_multiply((digit_t *) alpha_k, (digit_t *) &pow3_61[2 * k], (digit_t *) alpha_tmp, 2 * 64 / RADIX);
+		oqs_sidh_cln16_mp_add((digit_t *) alpha, (digit_t *) alpha_tmp, (digit_t *) alpha, 4 * 64 / RADIX);
+
+		oqs_sidh_cln16_exp_Fp2_cycl(t_ori[k], alpha_k, one, tmp, 97);
+		oqs_sidh_cln16_fp2mul751_mont(u, tmp, u);
+	}
+	oqs_sidh_cln16_fp2copy751(u, v);
+	for (i = 0; i < 56; i++) {
+		oqs_sidh_cln16_cube_Fp2_cycl(v, one);
+	}
+	oqs_sidh_cln16_phn15(v, LUT, LUT_0, LUT_1, one, alpha_k); // q order 3^61
+	oqs_sidh_cln16_multiply((digit_t *) alpha_k, (digit_t *) &pow3_61[4], (digit_t *) alpha_tmp, 4 * 64 / RADIX);
+	oqs_sidh_cln16_mp_add((digit_t *) alpha, (digit_t *) alpha_tmp, (digit_t *) alpha, SIDH_NWORDS_ORDER);
+
+	oqs_sidh_cln16_exp_Fp2_cycl(t_ori[2], alpha_k, one, tmp, 97);
+	oqs_sidh_cln16_fp2mul751_mont(u, tmp, u);
+	oqs_sidh_cln16_phn15_1(u, LUT, LUT_0, LUT_1, one, alpha_k); // q order 3^56
+	oqs_sidh_cln16_multiply((digit_t *) alpha_k, (digit_t *) &pow3_61[8], (digit_t *) alpha_tmp, 5 * 64 / RADIX);
+	oqs_sidh_cln16_mp_add((digit_t *) alpha, (digit_t *) alpha_tmp, (digit_t *) alpha, SIDH_NWORDS_ORDER);
+}
+
+void oqs_sidh_cln16_build_LUTs_3(oqs_sidh_cln16_f2elm_t g, oqs_sidh_cln16_f2elm_t *t_ori, oqs_sidh_cln16_f2elm_t *LUT, oqs_sidh_cln16_f2elm_t *LUT_0, oqs_sidh_cln16_f2elm_t *LUT_1, const oqs_sidh_cln16_felm_t one) { // Lookup table generation for 3-torsion PH
+	oqs_sidh_cln16_f2elm_t tmp;
+	unsigned int i, j;
+
+	// Build (small) tables
+	oqs_sidh_cln16_fp2copy751(g, tmp);
+	oqs_sidh_cln16_inv_Fp2_cycl(tmp);
+	oqs_sidh_cln16_fp2copy751(tmp, t_ori[0]); // t_ori[0] = g^(-1)
+	for (i = 0; i < 2; i++) {
+		for (j = 0; j < 61; j++)
+			oqs_sidh_cln16_cube_Fp2_cycl(tmp, one);
+		oqs_sidh_cln16_fp2copy751(tmp, t_ori[i + 1]);
+	}
+	for (i = 0; i < 56; i++)
+		oqs_sidh_cln16_cube_Fp2_cycl(tmp, one);
+	oqs_sidh_cln16_fp2copy751(tmp, t_ori[3]);
+	oqs_sidh_cln16_fp2copy751(tmp, LUT_0[0]);
+	for (i = 0; i < 5; i++)
+		oqs_sidh_cln16_cube_Fp2_cycl(tmp, one);
+	oqs_sidh_cln16_fp2copy751(tmp, t_ori[4]); // t_ori done.
+
+	for (i = 0; i < 10; i++)
+		oqs_sidh_cln16_cube_Fp2_cycl(tmp, one);
+	oqs_sidh_cln16_fp2copy751(tmp, LUT_0[1]);
+	for (i = 1; i < 3; i++) {
+		for (j = 0; j < 15; j++)
+			oqs_sidh_cln16_cube_Fp2_cycl(tmp, one);
+		oqs_sidh_cln16_fp2copy751(tmp, LUT_0[i + 1]);
+	}
+	oqs_sidh_cln16_cube_Fp2_cycl(tmp, one);
+	oqs_sidh_cln16_fp2copy751(tmp, LUT_1[0]);
+
+	for (i = 0; i < 4; i++) {
+		for (j = 0; j < 3; j++)
+			oqs_sidh_cln16_cube_Fp2_cycl(tmp, one);
+		oqs_sidh_cln16_fp2copy751(tmp, LUT_1[i + 1]);
+	}
+	oqs_sidh_cln16_fp2copy751(tmp, LUT[0]);
+	for (i = 0; i < 2; i++) {
+		oqs_sidh_cln16_fp2copy751(LUT[i], LUT[i + 1]);
+		oqs_sidh_cln16_cube_Fp2_cycl(LUT[i + 1], one);
+	}
+	oqs_sidh_cln16_fp2copy751(LUT[2], LUT[3]);
+	oqs_sidh_cln16_inv_Fp2_cycl(LUT[3]);
+	oqs_sidh_cln16_fp2correction751(LUT[3]);
+}
+
+void oqs_sidh_cln16_ph3(oqs_sidh_cln16_point_t phiP, oqs_sidh_cln16_point_t phiQ, oqs_sidh_cln16_point_t PS, oqs_sidh_cln16_point_t QS, oqs_sidh_cln16_f2elm_t A, uint64_t *a0, uint64_t *b0, uint64_t *a1, uint64_t *b1, PCurveIsogenyStruct CurveIsogeny) { // 3-torsion Pohlig-Hellman function
+	                                                                                                                                                                                                                                                          // This function computes the five pairings e(QS, PS), e(QS, phiP), e(QS, phiQ), e(PS, phiP), e(PS,phiQ),
+	                                                                                                                                                                                                                                                          // computes the lookup tables for the Pohlig-Hellman functions,
+	                                                                                                                                                                                                                                                          // and then computes the discrete logarithms of the last four pairing values to the base of the first pairing value.
+	oqs_sidh_cln16_f2elm_t t_ori[5], n[5], LUT[4], LUT_0[4], LUT_1[5];
+	oqs_sidh_cln16_felm_t one = {0};
+
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one);
+
+	// Compute the pairings
+	oqs_sidh_cln16_Tate_pairings_3_torsion(QS, PS, phiP, phiQ, A, n, CurveIsogeny);
+
+	// Build the look-up tables
+	oqs_sidh_cln16_build_LUTs_3(n[0], t_ori, LUT, LUT_0, LUT_1, one);
+
+	// Finish computation
+	oqs_sidh_cln16_phn61(n[1], t_ori, LUT, LUT_0, LUT_1, one, a0);
+	oqs_sidh_cln16_phn61(n[3], t_ori, LUT, LUT_0, LUT_1, one, b0);
+	oqs_sidh_cln16_mp_sub(CurveIsogeny->Border, (digit_t *) b0, (digit_t *) b0, SIDH_NWORDS_ORDER);
+	oqs_sidh_cln16_phn61(n[2], t_ori, LUT, LUT_0, LUT_1, one, a1);
+	oqs_sidh_cln16_phn61(n[4], t_ori, LUT, LUT_0, LUT_1, one, b1);
+	oqs_sidh_cln16_mp_sub(CurveIsogeny->Border, (digit_t *) b1, (digit_t *) b1, SIDH_NWORDS_ORDER);
+}
+
+unsigned int oqs_sidh_cln16_mod3(digit_t *a) { // Computes the input modulo 3
+	                                           // The input is assumed to be SIDH_NWORDS_ORDER long
+	digit_t temp;
+	hdigit_t *val = (hdigit_t *) a, r = 0;
+	int i;
+
+	for (i = (2 * SIDH_NWORDS_ORDER - 1); i >= 0; i--) {
+		temp = ((digit_t) r << (sizeof(hdigit_t) * 8)) | (digit_t) val[i];
+		r = temp % 3;
+	}
+
+	return r;
+}
+
+void oqs_sidh_cln16_compress_3_torsion(const unsigned char *pPublicKeyB, unsigned char *CompressedPKB, uint64_t *a0, uint64_t *b0, uint64_t *a1, uint64_t *b1, oqs_sidh_cln16_point_t R1, oqs_sidh_cln16_point_t R2, PCurveIsogenyStruct CurveIsogeny) { // 3-torsion compression function
+	oqs_sidh_cln16_point_full_proj_t P, Q, phP, phQ, phX;
+	oqs_sidh_cln16_point_t phiP, phiQ;
+	oqs_sidh_cln16_publickey_t PK;
+	digit_t *comp = (digit_t *) CompressedPKB;
+	digit_t inv[SIDH_NWORDS_ORDER];
+	oqs_sidh_cln16_f2elm_t A, vec[4], Zinv[4];
+	uint64_t Montgomery_Rprime[SIDH_NWORDS64_ORDER] = {0x1A55482318541298, 0x070A6370DFA12A03, 0xCB1658E0E3823A40, 0xB3B7384EB5DEF3F9, 0xCBCA952F7006EA33, 0x00569EF8EC94864C}; // Value (2^384)^2 mod 3^239
+	uint64_t Montgomery_rprime[SIDH_NWORDS64_ORDER] = {0x48062A91D3AB563D, 0x6CE572751303C2F5, 0x5D1319F3F160EC9D, 0xE35554E8C2D5623A, 0xCA29300232BC79A5, 0x8AAD843D646D78C5}; // Value -(3^239)^-1 mod 2^384
+	unsigned int bit;
+
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) pPublicKeyB)[0], ((oqs_sidh_cln16_f2elm_t *) &PK)[0]); // Converting to Montgomery representation
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) pPublicKeyB)[1], ((oqs_sidh_cln16_f2elm_t *) &PK)[1]);
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) pPublicKeyB)[2], ((oqs_sidh_cln16_f2elm_t *) &PK)[2]);
+
+	oqs_sidh_cln16_recover_y(PK, phP, phQ, phX, A, CurveIsogeny);
+	oqs_sidh_cln16_generate_3_torsion_basis(A, P, Q, CurveIsogeny);
+	oqs_sidh_cln16_fp2copy751(P->Z, vec[0]);
+	oqs_sidh_cln16_fp2copy751(Q->Z, vec[1]);
+	oqs_sidh_cln16_fp2copy751(phP->Z, vec[2]);
+	oqs_sidh_cln16_fp2copy751(phQ->Z, vec[3]);
+	oqs_sidh_cln16_mont_n_way_inv(vec, 4, Zinv);
+
+	oqs_sidh_cln16_fp2mul751_mont(P->X, Zinv[0], R1->x);
+	oqs_sidh_cln16_fp2mul751_mont(P->Y, Zinv[0], R1->y);
+	oqs_sidh_cln16_fp2mul751_mont(Q->X, Zinv[1], R2->x);
+	oqs_sidh_cln16_fp2mul751_mont(Q->Y, Zinv[1], R2->y);
+	oqs_sidh_cln16_fp2mul751_mont(phP->X, Zinv[2], phiP->x);
+	oqs_sidh_cln16_fp2mul751_mont(phP->Y, Zinv[2], phiP->y);
+	oqs_sidh_cln16_fp2mul751_mont(phQ->X, Zinv[3], phiQ->x);
+	oqs_sidh_cln16_fp2mul751_mont(phQ->Y, Zinv[3], phiQ->y);
+
+	oqs_sidh_cln16_ph3(phiP, phiQ, R1, R2, A, a0, b0, a1, b1, CurveIsogeny);
+
+	bit = oqs_sidh_cln16_mod3((digit_t *) a0);
+	oqs_sidh_cln16_to_Montgomery_mod_order((digit_t *) a0, (digit_t *) a0, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); // Converting to Montgomery representation
+	oqs_sidh_cln16_to_Montgomery_mod_order((digit_t *) a1, (digit_t *) a1, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+	oqs_sidh_cln16_to_Montgomery_mod_order((digit_t *) b0, (digit_t *) b0, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+	oqs_sidh_cln16_to_Montgomery_mod_order((digit_t *) b1, (digit_t *) b1, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+
+	if (bit != 0) { // Storing [b1*a0inv, a1*a0inv, b0*a0inv] and setting bit384 to 0
+		oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd((digit_t *) a0, inv, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order((digit_t *) b0, inv, &comp[0], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order((digit_t *) a1, inv, &comp[SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order((digit_t *) b1, inv, &comp[2 * SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_from_Montgomery_mod_order(&comp[0], &comp[0], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); // Converting back from Montgomery representation
+		oqs_sidh_cln16_from_Montgomery_mod_order(&comp[SIDH_NWORDS_ORDER], &comp[SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_from_Montgomery_mod_order(&comp[2 * SIDH_NWORDS_ORDER], &comp[2 * SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		comp[3 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 1;
+	} else { // Storing [b1*b0inv, a1*b0inv, a0*b0inv] and setting bit384 to 1
+		oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd((digit_t *) b0, inv, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order((digit_t *) a0, inv, &comp[0], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order((digit_t *) a1, inv, &comp[SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order((digit_t *) b1, inv, &comp[2 * SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_from_Montgomery_mod_order(&comp[0], &comp[0], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); // Converting back from Montgomery representation
+		oqs_sidh_cln16_from_Montgomery_mod_order(&comp[SIDH_NWORDS_ORDER], &comp[SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_from_Montgomery_mod_order(&comp[2 * SIDH_NWORDS_ORDER], &comp[2 * SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		comp[3 * SIDH_NWORDS_ORDER - 1] |= (digit_t) 1 << (sizeof(digit_t) * 8 - 1);
+	}
+
+	oqs_sidh_cln16_from_fp2mont(A, (oqs_sidh_cln16_felm_t *) &comp[3 * SIDH_NWORDS_ORDER]);
+}
+
+///////////////////////////////////////////////////////////////////////////////////
+///////////////             FUNCTIONS FOR DECOMPRESSION             ///////////////
+
+void oqs_sidh_cln16_ADD(const oqs_sidh_cln16_point_full_proj_t P, const oqs_sidh_cln16_f2elm_t QX, const oqs_sidh_cln16_f2elm_t QY, const oqs_sidh_cln16_f2elm_t QZ, const oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_point_full_proj_t R) { // General addition.
+	                                                                                                                                                                                                                                       // Input: projective Montgomery points P=(XP:YP:ZP) and Q=(XQ:YQ:ZQ).
+	                                                                                                                                                                                                                                       // Output: projective Montgomery point R <- P+Q = (XQP:YQP:ZQP).
+	oqs_sidh_cln16_f2elm_t t0, t1, t2, t3, t4, t5, t6, t7;
+
+	oqs_sidh_cln16_fp2mul751_mont(QX, P->Z, t0);   // t0 = x2*Z1
+	oqs_sidh_cln16_fp2mul751_mont(P->X, QZ, t1);   // t1 = X1*z2
+	oqs_sidh_cln16_fp2add751(t0, t1, t2);          // t2 = t0 + t1
+	oqs_sidh_cln16_fp2sub751(t1, t0, t3);          // t3 = t1 - t0
+	oqs_sidh_cln16_fp2mul751_mont(QX, P->X, t0);   // t0 = x2*X1
+	oqs_sidh_cln16_fp2mul751_mont(P->Z, QZ, t1);   // t1 = Z1*z2
+	oqs_sidh_cln16_fp2add751(t0, t1, t4);          // t4 = t0 + t1
+	oqs_sidh_cln16_fp2mul751_mont(t0, A, t0);      // t0 = t0*A
+	oqs_sidh_cln16_fp2mul751_mont(QY, P->Y, t5);   // t5 = y2*Y1
+	oqs_sidh_cln16_fp2sub751(t0, t5, t0);          // t0 = t0 - t5
+	oqs_sidh_cln16_fp2mul751_mont(t0, t1, t0);     // t0 = t0*t1
+	oqs_sidh_cln16_fp2add751(t0, t0, t0);          // t0 = t0 + t0
+	oqs_sidh_cln16_fp2mul751_mont(t2, t4, t5);     // t5 = t2*t4
+	oqs_sidh_cln16_fp2add751(t5, t0, t5);          // t5 = t5 + t0
+	oqs_sidh_cln16_fp2sqr751_mont(P->X, t0);       // t0 = X1 ^ 2
+	oqs_sidh_cln16_fp2sqr751_mont(P->Z, t6);       // t6 = Z1 ^ 2
+	oqs_sidh_cln16_fp2add751(t0, t6, t0);          // t0 = t0 + t6
+	oqs_sidh_cln16_fp2add751(t1, t1, t1);          // t1 = t1 + t1
+	oqs_sidh_cln16_fp2mul751_mont(QY, P->X, t7);   // t7 = y2*X1
+	oqs_sidh_cln16_fp2mul751_mont(QX, P->Y, t6);   // t6 = x2*Y1
+	oqs_sidh_cln16_fp2sub751(t7, t6, t7);          // t7 = t7 - t6
+	oqs_sidh_cln16_fp2mul751_mont(t1, t7, t1);     // t1 = t1*t7
+	oqs_sidh_cln16_fp2mul751_mont(A, t2, t7);      // t7 = A*t2
+	oqs_sidh_cln16_fp2add751(t7, t4, t4);          // t4 = t4 + t7
+	oqs_sidh_cln16_fp2mul751_mont(t1, t4, t4);     // t4 = t1*t4
+	oqs_sidh_cln16_fp2mul751_mont(QY, QZ, t1);     // t1 = y2*z2
+	oqs_sidh_cln16_fp2mul751_mont(t0, t1, t0);     // t0 = t0*t1
+	oqs_sidh_cln16_fp2sqr751_mont(QZ, t1);         // t1 = z2 ^ 2
+	oqs_sidh_cln16_fp2sqr751_mont(QX, t6);         // t6 = x2 ^ 2
+	oqs_sidh_cln16_fp2add751(t1, t6, t1);          // t1 = t1 + t6
+	oqs_sidh_cln16_fp2mul751_mont(P->Z, P->Y, t6); // t6 = Z1*Y1
+	oqs_sidh_cln16_fp2mul751_mont(t1, t6, t1);     // t1 = t1*t6
+	oqs_sidh_cln16_fp2sub751(t0, t1, t0);          // t0 = t0 - t1
+	oqs_sidh_cln16_fp2mul751_mont(t2, t0, t0);     // t0 = t2*t0
+	oqs_sidh_cln16_fp2mul751_mont(t5, t3, R->X);   // X3 = t5*t3
+	oqs_sidh_cln16_fp2add751(t4, t0, R->Y);        // Y3 = t4 + t0
+	oqs_sidh_cln16_fp2sqr751_mont(t3, t0);         // t0 = t3 ^ 2
+	oqs_sidh_cln16_fp2mul751_mont(t3, t0, R->Z);   // Z3 = t3*t0
+}
+
+void oqs_sidh_cln16_Mont_ladder(const oqs_sidh_cln16_f2elm_t x, const digit_t *m, oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const oqs_sidh_cln16_f2elm_t A24, const unsigned int order_bits, const unsigned int order_fullbits, PCurveIsogenyStruct CurveIsogeny) { // The Montgomery ladder, running in non constant-time
+	                                                                                                                                                                                                                                                                                    // Inputs: the affine x-coordinate of a point P on E: B*y^2=x^3+A*x^2+x,
+	                                                                                                                                                                                                                                                                                    //         scalar m
+	                                                                                                                                                                                                                                                                                    //         curve constant A24 = (A+2)/4
+	                                                                                                                                                                                                                                                                                    //         order_bits = subgroup order bitlength
+	                                                                                                                                                                                                                                                                                    //         order_fullbits = smallest multiple of 32 larger than the order bitlength
+	                                                                                                                                                                                                                                                                                    // Output: P = m*(x:1)
+	                                                                                                                                                                                                                                                                                    // CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+	unsigned int bit = 0, owords = NBITS_TO_NWORDS(order_fullbits);
+	digit_t scalar[SIDH_NWORDS_ORDER];
+	digit_t mask;
+	int i;
+
+	// Initializing with the points (1:0) and (x:1)
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, (digit_t *) P->X[0]);
+	oqs_sidh_cln16_fpzero751(P->X[1]);
+	oqs_sidh_cln16_fp2zero751(P->Z);
+	oqs_sidh_cln16_fp2copy751(x, Q->X);
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, (digit_t *) Q->Z[0]);
+	oqs_sidh_cln16_fpzero751(Q->Z[1]);
+
+	for (i = SIDH_NWORDS_ORDER - 1; i >= 0; i--) {
+		scalar[i] = m[i];
+	}
+
+	for (i = order_fullbits - order_bits; i > 0; i--) {
+		oqs_sidh_cln16_mp_shiftl1(scalar, owords);
+	}
+
+	for (i = order_bits; i > 0; i--) {
+		bit = (unsigned int) (scalar[owords - 1] >> (RADIX - 1));
+		oqs_sidh_cln16_mp_shiftl1(scalar, owords);
+		mask = 0 - (digit_t) bit;
+
+		oqs_sidh_cln16_swap_points(P, Q, mask);
+		oqs_sidh_cln16_xDBLADD(P, Q, x, A24);   // If bit=0 then P <- 2*P and Q <- P+Q,
+		oqs_sidh_cln16_swap_points(P, Q, mask); // else if bit=1 then Q <- 2*Q and P <- P+Q
+	}
+}
+
+void oqs_sidh_cln16_mont_twodim_scalarmult(digit_t *a, const oqs_sidh_cln16_point_t R, const oqs_sidh_cln16_point_t S, const oqs_sidh_cln16_f2elm_t A, const oqs_sidh_cln16_f2elm_t A24, oqs_sidh_cln16_point_full_proj_t P, PCurveIsogenyStruct CurveIsogeny) { // Computes R+aS
+	oqs_sidh_cln16_point_proj_t P0, P1;
+	oqs_sidh_cln16_point_full_proj_t P2;
+	oqs_sidh_cln16_f2elm_t one = {0};
+
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]);
+	oqs_sidh_cln16_Mont_ladder(S->x, a, P0, P1, A24, CurveIsogeny->oBbits, CurveIsogeny->owordbits, CurveIsogeny); // Hardwired to oBbits
+	recover_os(P0->X, P0->Z, P1->X, P1->Z, S->x, S->y, A, P2->X, P2->Y, P2->Z);
+	oqs_sidh_cln16_ADD(P2, R->x, R->y, one, A, P);
+}
+
+void oqs_sidh_cln16_decompress_2_torsion(const unsigned char *SecretKey, const unsigned char *CompressedPKB, oqs_sidh_cln16_point_proj_t R, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny) { // 2-torsion decompression function
+	oqs_sidh_cln16_point_t R1, R2;
+	oqs_sidh_cln16_point_full_proj_t P, Q;
+	digit_t *comp = (digit_t *) CompressedPKB;
+	oqs_sidh_cln16_f2elm_t A24, vec[2], invs[2], one = {0};
+	digit_t tmp1[2 * SIDH_NWORDS_ORDER], tmp2[2 * SIDH_NWORDS_ORDER], vone[2 * SIDH_NWORDS_ORDER] = {0}, mask = (digit_t)(-1);
+	unsigned int bit;
+
+	mask >>= (CurveIsogeny->owordbits - CurveIsogeny->oAbits);
+	vone[0] = 1;
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]);
+	oqs_sidh_cln16_to_fp2mont((oqs_sidh_cln16_felm_t *) &comp[3 * SIDH_NWORDS_ORDER], A); // Converting to Montgomery representation
+	oqs_sidh_cln16_generate_2_torsion_basis(A, P, Q, CurveIsogeny);
+
+	// normalize basis points
+	oqs_sidh_cln16_fp2copy751(P->Z, vec[0]);
+	oqs_sidh_cln16_fp2copy751(Q->Z, vec[1]);
+	oqs_sidh_cln16_mont_n_way_inv(vec, 2, invs);
+	oqs_sidh_cln16_fp2mul751_mont(P->X, invs[0], R1->x);
+	oqs_sidh_cln16_fp2mul751_mont(P->Y, invs[0], R1->y);
+	oqs_sidh_cln16_fp2mul751_mont(Q->X, invs[1], R2->x);
+	oqs_sidh_cln16_fp2mul751_mont(Q->Y, invs[1], R2->y);
+
+	oqs_sidh_cln16_fp2add751(A, one, A24);
+	oqs_sidh_cln16_fp2add751(A24, one, A24);
+	oqs_sidh_cln16_fp2div2_751(A24, A24);
+	oqs_sidh_cln16_fp2div2_751(A24, A24);
+
+	bit = comp[3 * SIDH_NWORDS_ORDER - 1] >> (sizeof(digit_t) * 8 - 1);
+	comp[3 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 1;
+
+	if (bit == 0) {
+		oqs_sidh_cln16_multiply((digit_t *) SecretKey, &comp[SIDH_NWORDS_ORDER], tmp1, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_mp_add(tmp1, vone, tmp1, SIDH_NWORDS_ORDER);
+		tmp1[SIDH_NWORDS_ORDER - 1] &= mask;
+		oqs_sidh_cln16_inv_mod_orderA(tmp1, tmp2);
+		oqs_sidh_cln16_multiply((digit_t *) SecretKey, &comp[2 * SIDH_NWORDS_ORDER], tmp1, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_mp_add(&comp[0], tmp1, tmp1, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_multiply(tmp1, tmp2, vone, SIDH_NWORDS_ORDER);
+		vone[SIDH_NWORDS_ORDER - 1] &= mask;
+		oqs_sidh_cln16_mont_twodim_scalarmult(vone, R1, R2, A, A24, P, CurveIsogeny);
+	} else {
+		oqs_sidh_cln16_multiply((digit_t *) SecretKey, &comp[2 * SIDH_NWORDS_ORDER], tmp1, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_mp_add(tmp1, vone, tmp1, SIDH_NWORDS_ORDER);
+		tmp1[SIDH_NWORDS_ORDER - 1] &= mask;
+		oqs_sidh_cln16_inv_mod_orderA(tmp1, tmp2);
+		oqs_sidh_cln16_multiply((digit_t *) SecretKey, &comp[SIDH_NWORDS_ORDER], tmp1, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_mp_add(&comp[0], tmp1, tmp1, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_multiply(tmp1, tmp2, vone, SIDH_NWORDS_ORDER);
+		vone[SIDH_NWORDS_ORDER - 1] &= mask;
+		oqs_sidh_cln16_mont_twodim_scalarmult(vone, R2, R1, A, A24, P, CurveIsogeny);
+	}
+
+	oqs_sidh_cln16_fp2copy751(P->X, R->X);
+	oqs_sidh_cln16_fp2copy751(P->Z, R->Z);
+}
+
+void oqs_sidh_cln16_decompress_3_torsion(const unsigned char *SecretKey, const unsigned char *CompressedPKA, oqs_sidh_cln16_point_proj_t R, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny) { // 3-torsion decompression function
+	oqs_sidh_cln16_point_t R1, R2;
+	oqs_sidh_cln16_point_full_proj_t P, Q;
+	digit_t *comp = (digit_t *) CompressedPKA;
+	digit_t *SKin = (digit_t *) SecretKey;
+	oqs_sidh_cln16_f2elm_t A24, vec[2], invs[2], one = {0};
+	digit_t t1[SIDH_NWORDS_ORDER], t2[SIDH_NWORDS_ORDER], t3[SIDH_NWORDS_ORDER], t4[SIDH_NWORDS_ORDER], vone[SIDH_NWORDS_ORDER] = {0};
+	uint64_t Montgomery_Rprime[SIDH_NWORDS64_ORDER] = {0x1A55482318541298, 0x070A6370DFA12A03, 0xCB1658E0E3823A40, 0xB3B7384EB5DEF3F9, 0xCBCA952F7006EA33, 0x00569EF8EC94864C}; // Value (2^384)^2 mod 3^239
+	uint64_t Montgomery_rprime[SIDH_NWORDS64_ORDER] = {0x48062A91D3AB563D, 0x6CE572751303C2F5, 0x5D1319F3F160EC9D, 0xE35554E8C2D5623A, 0xCA29300232BC79A5, 0x8AAD843D646D78C5}; // Value -(3^239)^-1 mod 2^384
+	unsigned int bit;
+
+	vone[0] = 1;
+	oqs_sidh_cln16_to_Montgomery_mod_order(vone, vone, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); // Converting to Montgomery representation
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]);
+	oqs_sidh_cln16_to_fp2mont((oqs_sidh_cln16_felm_t *) &comp[3 * SIDH_NWORDS_ORDER], A); // Converting to Montgomery representation
+	oqs_sidh_cln16_generate_3_torsion_basis(A, P, Q, CurveIsogeny);
+
+	// normalize basis points
+	oqs_sidh_cln16_fp2copy751(P->Z, vec[0]);
+	oqs_sidh_cln16_fp2copy751(Q->Z, vec[1]);
+	oqs_sidh_cln16_mont_n_way_inv(vec, 2, invs);
+	oqs_sidh_cln16_fp2mul751_mont(P->X, invs[0], R1->x);
+	oqs_sidh_cln16_fp2mul751_mont(P->Y, invs[0], R1->y);
+	oqs_sidh_cln16_fp2mul751_mont(Q->X, invs[1], R2->x);
+	oqs_sidh_cln16_fp2mul751_mont(Q->Y, invs[1], R2->y);
+
+	oqs_sidh_cln16_fp2add751(A, one, A24);
+	oqs_sidh_cln16_fp2add751(A24, one, A24);
+	oqs_sidh_cln16_fp2div2_751(A24, A24);
+	oqs_sidh_cln16_fp2div2_751(A24, A24);
+
+	bit = comp[3 * SIDH_NWORDS_ORDER - 1] >> (sizeof(digit_t) * 8 - 1);
+	comp[3 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 1;
+	oqs_sidh_cln16_to_Montgomery_mod_order(SKin, t1, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); // Converting to Montgomery representation
+	oqs_sidh_cln16_to_Montgomery_mod_order(&comp[0], t2, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+	oqs_sidh_cln16_to_Montgomery_mod_order(&comp[SIDH_NWORDS_ORDER], t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+	oqs_sidh_cln16_to_Montgomery_mod_order(&comp[2 * SIDH_NWORDS_ORDER], t4, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+
+	if (bit == 0) {
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(t1, t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_mp_add(t3, vone, t3, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd(t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(t1, t4, t4, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_mp_add(t2, t4, t4, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(t3, t4, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_from_Montgomery_mod_order(t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); // Converting back from Montgomery representation
+		oqs_sidh_cln16_mont_twodim_scalarmult(t3, R1, R2, A, A24, P, CurveIsogeny);
+	} else {
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(t1, t4, t4, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_mp_add(t4, vone, t4, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd(t4, t4, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(t1, t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_mp_add(t2, t3, t3, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(t3, t4, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_from_Montgomery_mod_order(t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); // Converting back from Montgomery representation
+		oqs_sidh_cln16_mont_twodim_scalarmult(t3, R2, R1, A, A24, P, CurveIsogeny);
+	}
+
+	oqs_sidh_cln16_fp2copy751(P->X, R->X);
+	oqs_sidh_cln16_fp2copy751(P->Z, R->Z);
+}
diff --git a/crypt/liboqs/kex_sidh_cln16/fpx.c b/crypt/liboqs/kex_sidh_cln16/fpx.c
new file mode 100644
index 0000000000000000000000000000000000000000..c3d60404b6e0abc5f6d6ee318a224db05da32ac5
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/fpx.c
@@ -0,0 +1,1193 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral  
+*       Diffie-Hellman key exchange.
+*
+*    Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*
+* Abstract: core functions over GF(p751^2) and field operations modulo the prime p751
+*
+*********************************************************************************************/
+
+#include "SIDH_internal.h"
+#include <string.h>
+
+// Global constants
+const uint64_t p751[NWORDS_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xEEAFFFFFFFFFFFFF,
+                                     0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C};
+const uint64_t p751p1[NWORDS_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xEEB0000000000000,
+                                       0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C};
+const uint64_t p751x2[NWORDS_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDD5FFFFFFFFFFFFF,
+                                       0xC7D92D0A93F0F151, 0xB52B363427EF98ED, 0x109D30CFADD7D0ED, 0x0AC56A08B964AE90, 0x1C25213F2F75B8CD, 0x0000DFCBAA83EE38};
+const uint64_t Montgomery_R2[NWORDS_FIELD] = {0x233046449DAD4058, 0xDB010161A696452A, 0x5E36941472E3FD8E, 0xF40BFE2082A2E706, 0x4932CCA8904F8751, 0x1F735F1F1EE7FC81,
+                                              0xA24F4D80C1048E18, 0xB56C383CCDB607C5, 0x441DD47B735F9C90, 0x5673ED2C6A6AC82A, 0x06C905261132294B, 0x000041AD830F1F35};
+
+/*******************************************************/
+/************* Field arithmetic functions **************/
+
+__inline void oqs_sidh_cln16_fpcopy751(const oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t c) { // Copy a field element, c = a.
+	unsigned int i;
+
+	for (i = 0; i < NWORDS_FIELD; i++)
+		c[i] = a[i];
+}
+
+__inline void oqs_sidh_cln16_fpzero751(oqs_sidh_cln16_felm_t a) { // Zero a field element, a = 0.
+	unsigned int i;
+
+	for (i = 0; i < NWORDS_FIELD; i++)
+		a[i] = 0;
+}
+
+bool oqs_sidh_cln16_fpequal751_non_constant_time(const oqs_sidh_cln16_felm_t a, const oqs_sidh_cln16_felm_t b) { // Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE.
+	unsigned int i;
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		if (a[i] != b[i])
+			return false;
+	}
+
+	return true;
+}
+
+void oqs_sidh_cln16_to_mont(const oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t mc) { // Conversion to Montgomery representation,
+	                                                                                   // mc = a*R^2*R^(-1) mod p751 = a*R mod p751, where a in [0, p751-1].
+	                                                                                   // The Montgomery constant R^2 mod p751 is the global value "Montgomery_R2".
+
+	oqs_sidh_cln16_fpmul751_mont(a, (digit_t *) &Montgomery_R2, mc);
+}
+
+void oqs_sidh_cln16_from_mont(const oqs_sidh_cln16_felm_t ma, oqs_sidh_cln16_felm_t c) { // Conversion from Montgomery representation to standard representation,
+	                                                                                     // c = ma*R^(-1) mod p751 = a mod p751, where ma in [0, p751-1].
+	digit_t one[NWORDS_FIELD] = {0};
+
+	one[0] = 1;
+	oqs_sidh_cln16_fpmul751_mont(ma, one, c);
+	oqs_sidh_cln16_fpcorrection751(c);
+}
+
+static __inline unsigned int is_felm_zero(const oqs_sidh_cln16_felm_t x) { // Is x = 0? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise.
+	                                                                       // SECURITY NOTE: This function does not run in constant-time.
+	unsigned int i;
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		if (x[i] != 0)
+			return false;
+	}
+	return true;
+}
+
+static __inline unsigned int is_felm_even(const oqs_sidh_cln16_felm_t x) { // Is x even? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise.
+	return (unsigned int) ((x[0] & 1) ^ 1);
+}
+
+static __inline unsigned int is_felm_lt(const oqs_sidh_cln16_felm_t x, const oqs_sidh_cln16_felm_t y) { // Is x < y? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise.
+	                                                                                                    // SECURITY NOTE: This function does not run in constant-time.
+	int i;
+
+	for (i = NWORDS_FIELD - 1; i >= 0; i--) {
+		if (x[i] < y[i]) {
+			return true;
+		} else if (x[i] > y[i]) {
+			return false;
+		}
+	}
+	return false;
+}
+
+void oqs_sidh_cln16_copy_words(const digit_t *a, digit_t *c, const unsigned int nwords) { // Copy wordsize digits, c = a, where lng(a) = nwords.
+	unsigned int i;
+
+	for (i = 0; i < nwords; i++) {
+		c[i] = a[i];
+	}
+}
+
+__inline unsigned int oqs_sidh_cln16_mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { // Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit.
+	unsigned int i, borrow = 0;
+
+	for (i = 0; i < nwords; i++) {
+		SUBC(borrow, a[i], b[i], borrow, c[i]);
+	}
+
+	return borrow;
+}
+
+__inline unsigned int oqs_sidh_cln16_mp_add(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { // Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit.
+	unsigned int i, carry = 0;
+
+	for (i = 0; i < nwords; i++) {
+		ADDC(carry, a[i], b[i], carry, c[i]);
+	}
+
+	return carry;
+}
+
+__inline void oqs_sidh_cln16_mp_add751(const digit_t *a, const digit_t *b, digit_t *c) { // 751-bit multiprecision addition, c = a+b.
+
+#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION)
+
+	oqs_sidh_cln16_mp_add(a, b, c, NWORDS_FIELD);
+
+#elif (OS_TARGET == OS_LINUX)
+
+	oqs_sidh_cln16_mp_add751_asm(a, b, c);
+
+#endif
+}
+
+__inline void oqs_sidh_cln16_mp_add751x2(const digit_t *a, const digit_t *b, digit_t *c) { // 2x751-bit multiprecision addition, c = a+b.
+
+#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION)
+
+	oqs_sidh_cln16_mp_add(a, b, c, 2 * NWORDS_FIELD);
+
+#elif (OS_TARGET == OS_LINUX)
+
+	oqs_sidh_cln16_mp_add751x2_asm(a, b, c);
+
+#endif
+}
+
+void oqs_sidh_cln16_mp_shiftr1(digit_t *x, const unsigned int nwords) { // Multiprecision right shift by one.
+	unsigned int i;
+
+	for (i = 0; i < nwords - 1; i++) {
+		SHIFTR(x[i + 1], x[i], 1, x[i], RADIX);
+	}
+	x[nwords - 1] >>= 1;
+}
+
+void oqs_sidh_cln16_mp_shiftl1(digit_t *x, const unsigned int nwords) { // Multiprecision left shift by one.
+	int i;
+
+	for (i = nwords - 1; i > 0; i--) {
+		SHIFTL(x[i], x[i - 1], 1, x[i], RADIX);
+	}
+	x[0] <<= 1;
+}
+
+void oqs_sidh_cln16_fpmul751_mont(const oqs_sidh_cln16_felm_t ma, const oqs_sidh_cln16_felm_t mb, oqs_sidh_cln16_felm_t mc) { // 751-bit Comba multi-precision multiplication, c = a*b mod p751.
+	oqs_sidh_cln16_dfelm_t temp = {0};
+
+	oqs_sidh_cln16_mp_mul(ma, mb, temp, NWORDS_FIELD);
+	oqs_sidh_cln16_rdc_mont(temp, mc);
+}
+
+void oqs_sidh_cln16_fpsqr751_mont(const oqs_sidh_cln16_felm_t ma, oqs_sidh_cln16_felm_t mc) { // 751-bit Comba multi-precision squaring, c = a^2 mod p751.
+	oqs_sidh_cln16_dfelm_t temp = {0};
+
+	oqs_sidh_cln16_mp_mul(ma, ma, temp, NWORDS_FIELD);
+	oqs_sidh_cln16_rdc_mont(temp, mc);
+}
+
+void oqs_sidh_cln16_fpinv751_chain_mont(oqs_sidh_cln16_felm_t a) { // Chain to compute a^(p751-3)/4 using Montgomery arithmetic.
+	oqs_sidh_cln16_felm_t t[27], tt;
+	unsigned int i, j;
+
+	// Precomputed table
+	oqs_sidh_cln16_fpsqr751_mont(a, tt);
+	oqs_sidh_cln16_fpmul751_mont(a, tt, t[0]);
+	oqs_sidh_cln16_fpmul751_mont(t[0], tt, t[1]);
+	oqs_sidh_cln16_fpmul751_mont(t[1], tt, t[2]);
+	oqs_sidh_cln16_fpmul751_mont(t[2], tt, t[3]);
+	oqs_sidh_cln16_fpmul751_mont(t[3], tt, t[3]);
+	for (i = 3; i <= 8; i++)
+		oqs_sidh_cln16_fpmul751_mont(t[i], tt, t[i + 1]);
+	oqs_sidh_cln16_fpmul751_mont(t[9], tt, t[9]);
+	for (i = 9; i <= 20; i++)
+		oqs_sidh_cln16_fpmul751_mont(t[i], tt, t[i + 1]);
+	oqs_sidh_cln16_fpmul751_mont(t[21], tt, t[21]);
+	for (i = 21; i <= 24; i++)
+		oqs_sidh_cln16_fpmul751_mont(t[i], tt, t[i + 1]);
+	oqs_sidh_cln16_fpmul751_mont(t[25], tt, t[25]);
+	oqs_sidh_cln16_fpmul751_mont(t[25], tt, t[26]);
+
+	oqs_sidh_cln16_fpcopy751(a, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[20], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[24], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[11], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[8], tt, tt);
+	for (i = 0; i < 8; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[23], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt);
+	for (i = 0; i < 9; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt);
+	for (i = 0; i < 10; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[15], tt, tt);
+	for (i = 0; i < 8; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[13], tt, tt);
+	for (i = 0; i < 8; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[26], tt, tt);
+	for (i = 0; i < 8; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[20], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[11], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[10], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[14], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[4], tt, tt);
+	for (i = 0; i < 10; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[18], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[1], tt, tt);
+	for (i = 0; i < 7; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[22], tt, tt);
+	for (i = 0; i < 10; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[6], tt, tt);
+	for (i = 0; i < 7; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[24], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[9], tt, tt);
+	for (i = 0; i < 8; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[18], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[17], tt, tt);
+	for (i = 0; i < 8; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(a, tt, tt);
+	for (i = 0; i < 10; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[16], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[7], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[0], tt, tt);
+	for (i = 0; i < 7; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[12], tt, tt);
+	for (i = 0; i < 7; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[19], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[22], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[25], tt, tt);
+	for (i = 0; i < 7; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[10], tt, tt);
+	for (i = 0; i < 7; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[22], tt, tt);
+	for (i = 0; i < 8; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[18], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[4], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[14], tt, tt);
+	for (i = 0; i < 7; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[13], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[5], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[23], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[21], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt);
+	for (i = 0; i < 7; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[23], tt, tt);
+	for (i = 0; i < 8; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[12], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[9], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[3], tt, tt);
+	for (i = 0; i < 7; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[13], tt, tt);
+	for (i = 0; i < 7; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[17], tt, tt);
+	for (i = 0; i < 8; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[26], tt, tt);
+	for (i = 0; i < 8; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[5], tt, tt);
+	for (i = 0; i < 8; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[8], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt);
+	for (i = 0; i < 6; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[11], tt, tt);
+	for (i = 0; i < 7; i++)
+		oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(t[20], tt, tt);
+	for (j = 0; j < 61; j++) {
+		for (i = 0; i < 6; i++)
+			oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+		oqs_sidh_cln16_fpmul751_mont(t[26], tt, tt);
+	}
+	oqs_sidh_cln16_fpcopy751(tt, a);
+}
+
+void oqs_sidh_cln16_fpinv751_mont(oqs_sidh_cln16_felm_t a) { // Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p751.
+	oqs_sidh_cln16_felm_t tt;
+
+	oqs_sidh_cln16_fpcopy751(a, tt);
+	oqs_sidh_cln16_fpinv751_chain_mont(tt);
+	oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpsqr751_mont(tt, tt);
+	oqs_sidh_cln16_fpmul751_mont(a, tt, a);
+}
+
+static __inline void power2_setup(digit_t *x, int mark, const unsigned int nwords) { // Set up the value 2^mark.
+	unsigned int i;
+
+	for (i = 0; i < nwords; i++)
+		x[i] = 0;
+
+	i = 0;
+	while (mark >= 0) {
+		if (mark < RADIX) {
+			x[i] = (digit_t) 1 << mark;
+		}
+		mark -= RADIX;
+		i += 1;
+	}
+}
+
+static __inline void fpinv751_mont_bingcd_partial(const oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t x1, unsigned int *k) { // Partial Montgomery inversion in GF(p751) via the binary GCD algorithm.
+	oqs_sidh_cln16_felm_t u, v, x2;
+	unsigned int cwords; // number of words necessary for x1, x2
+
+	oqs_sidh_cln16_fpcopy751(a, u);
+	oqs_sidh_cln16_fpcopy751((digit_t *) &p751, v);
+	oqs_sidh_cln16_fpzero751(x1);
+	x1[0] = 1;
+	oqs_sidh_cln16_fpzero751(x2);
+	*k = 0;
+
+	while (!is_felm_zero(v)) {
+		cwords = ((*k + 1) / RADIX) + 1;
+		if ((cwords < NWORDS_FIELD)) {
+			if (is_felm_even(v)) {
+				oqs_sidh_cln16_mp_shiftr1(v, NWORDS_FIELD);
+				oqs_sidh_cln16_mp_shiftl1(x1, cwords);
+			} else if (is_felm_even(u)) {
+				oqs_sidh_cln16_mp_shiftr1(u, NWORDS_FIELD);
+				oqs_sidh_cln16_mp_shiftl1(x2, cwords);
+			} else if (!is_felm_lt(v, u)) {
+				oqs_sidh_cln16_mp_sub(v, u, v, NWORDS_FIELD);
+				oqs_sidh_cln16_mp_shiftr1(v, NWORDS_FIELD);
+				oqs_sidh_cln16_mp_add(x1, x2, x2, cwords);
+				oqs_sidh_cln16_mp_shiftl1(x1, cwords);
+			} else {
+				oqs_sidh_cln16_mp_sub(u, v, u, NWORDS_FIELD);
+				oqs_sidh_cln16_mp_shiftr1(u, NWORDS_FIELD);
+				oqs_sidh_cln16_mp_add(x1, x2, x1, cwords);
+				oqs_sidh_cln16_mp_shiftl1(x2, cwords);
+			}
+		} else {
+			if (is_felm_even(v)) {
+				oqs_sidh_cln16_mp_shiftr1(v, NWORDS_FIELD);
+				oqs_sidh_cln16_mp_shiftl1(x1, NWORDS_FIELD);
+			} else if (is_felm_even(u)) {
+				oqs_sidh_cln16_mp_shiftr1(u, NWORDS_FIELD);
+				oqs_sidh_cln16_mp_shiftl1(x2, NWORDS_FIELD);
+			} else if (!is_felm_lt(v, u)) {
+				oqs_sidh_cln16_mp_sub(v, u, v, NWORDS_FIELD);
+				oqs_sidh_cln16_mp_shiftr1(v, NWORDS_FIELD);
+				oqs_sidh_cln16_mp_add751(x1, x2, x2);
+				oqs_sidh_cln16_mp_shiftl1(x1, NWORDS_FIELD);
+			} else {
+				oqs_sidh_cln16_mp_sub(u, v, u, NWORDS_FIELD);
+				oqs_sidh_cln16_mp_shiftr1(u, NWORDS_FIELD);
+				oqs_sidh_cln16_mp_add751(x1, x2, x1);
+				oqs_sidh_cln16_mp_shiftl1(x2, NWORDS_FIELD);
+			}
+		}
+		*k += 1;
+	}
+
+	if (is_felm_lt((digit_t *) &p751, x1)) {
+		oqs_sidh_cln16_mp_sub(x1, (digit_t *) &p751, x1, NWORDS_FIELD);
+	}
+}
+
+void oqs_sidh_cln16_fpinv751_mont_bingcd(oqs_sidh_cln16_felm_t a) { // Field inversion via the binary GCD using Montgomery arithmetic, a = a^-1*R mod p751.
+	                                                                // SECURITY NOTE: This function does not run in constant-time.
+	oqs_sidh_cln16_felm_t x, t;
+	unsigned int k;
+
+	fpinv751_mont_bingcd_partial(a, x, &k);
+	if (k < 768) {
+		oqs_sidh_cln16_fpmul751_mont(x, (digit_t *) &Montgomery_R2, x);
+		k += 768;
+	}
+	oqs_sidh_cln16_fpmul751_mont(x, (digit_t *) &Montgomery_R2, x);
+	power2_setup(t, 2 * 768 - k, NWORDS_FIELD);
+	oqs_sidh_cln16_fpmul751_mont(x, t, a);
+}
+
+/***********************************************/
+/************* GF(p^2) FUNCTIONS ***************/
+
+void oqs_sidh_cln16_fp2copy751(const oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c) { // Copy a GF(p751^2) element, c = a.
+	oqs_sidh_cln16_fpcopy751(a[0], c[0]);
+	oqs_sidh_cln16_fpcopy751(a[1], c[1]);
+}
+
+void oqs_sidh_cln16_fp2zero751(oqs_sidh_cln16_f2elm_t a) { // Zero a GF(p751^2) element, a = 0.
+	oqs_sidh_cln16_fpzero751(a[0]);
+	oqs_sidh_cln16_fpzero751(a[1]);
+}
+
+void oqs_sidh_cln16_fp2neg751(oqs_sidh_cln16_f2elm_t a) { // GF(p751^2) negation, a = -a in GF(p751^2).
+	oqs_sidh_cln16_fpneg751(a[0]);
+	oqs_sidh_cln16_fpneg751(a[1]);
+}
+
+__inline void oqs_sidh_cln16_fp2add751(const oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c) { // GF(p751^2) addition, c = a+b in GF(p751^2).
+	oqs_sidh_cln16_fpadd751(a[0], b[0], c[0]);
+	oqs_sidh_cln16_fpadd751(a[1], b[1], c[1]);
+}
+
+__inline void oqs_sidh_cln16_fp2sub751(const oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c) { // GF(p751^2) subtraction, c = a-b in GF(p751^2).
+	oqs_sidh_cln16_fpsub751(a[0], b[0], c[0]);
+	oqs_sidh_cln16_fpsub751(a[1], b[1], c[1]);
+}
+
+void oqs_sidh_cln16_fp2div2_751(const oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c) { // GF(p751^2) division by two, c = a/2  in GF(p751^2).
+	oqs_sidh_cln16_fpdiv2_751(a[0], c[0]);
+	oqs_sidh_cln16_fpdiv2_751(a[1], c[1]);
+}
+
+void oqs_sidh_cln16_fp2correction751(oqs_sidh_cln16_f2elm_t a) { // Modular correction, a = a in GF(p751^2).
+	oqs_sidh_cln16_fpcorrection751(a[0]);
+	oqs_sidh_cln16_fpcorrection751(a[1]);
+}
+
+void oqs_sidh_cln16_fp2sqr751_mont(const oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c) { // GF(p751^2) squaring using Montgomery arithmetic, c = a^2 in GF(p751^2).
+	                                                                                           // Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p751-1]
+	                                                                                           // Output: c = c0+c1*i, where c0, c1 are in [0, 2*p751-1]
+	oqs_sidh_cln16_felm_t t1, t2, t3;
+
+	oqs_sidh_cln16_mp_add751(a[0], a[1], t1);     // t1 = a0+a1
+	oqs_sidh_cln16_fpsub751(a[0], a[1], t2);      // t2 = a0-a1
+	oqs_sidh_cln16_mp_add751(a[0], a[0], t3);     // t3 = 2a0
+	oqs_sidh_cln16_fpmul751_mont(t1, t2, c[0]);   // c0 = (a0+a1)(a0-a1)
+	oqs_sidh_cln16_fpmul751_mont(t3, a[1], c[1]); // c1 = 2a0*a1
+}
+
+void oqs_sidh_cln16_fp2mul751_mont(const oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c) { // GF(p751^2) multiplication using Montgomery arithmetic, c = a*b in GF(p751^2).
+	                                                                                                                           // Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p751-1]
+	                                                                                                                           // Output: c = c0+c1*i, where c0, c1 are in [0, 2*p751-1]
+	oqs_sidh_cln16_felm_t t1, t2;
+	oqs_sidh_cln16_dfelm_t tt1, tt2, tt3;
+	digit_t mask;
+	unsigned int i, borrow;
+
+	oqs_sidh_cln16_mp_mul(a[0], b[0], tt1, NWORDS_FIELD);            // tt1 = a0*b0
+	oqs_sidh_cln16_mp_mul(a[1], b[1], tt2, NWORDS_FIELD);            // tt2 = a1*b1
+	oqs_sidh_cln16_mp_add751(a[0], a[1], t1);                        // t1 = a0+a1
+	oqs_sidh_cln16_mp_add751(b[0], b[1], t2);                        // t2 = b0+b1
+	borrow = oqs_sidh_cln16_mp_sub(tt1, tt2, tt3, 2 * NWORDS_FIELD); // tt3 = a0*b0 - a1*b1
+	mask = 0 - (digit_t) borrow;                                     // if tt3 < 0 then mask = 0xFF..F, else if tt3 >= 0 then mask = 0x00..0
+	borrow = 0;
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		ADDC(borrow, tt3[NWORDS_FIELD + i], ((digit_t *) p751)[i] & mask, borrow, tt3[NWORDS_FIELD + i]);
+	}
+	oqs_sidh_cln16_rdc_mont(tt3, c[0]);                     // c[0] = a0*b0 - a1*b1
+	oqs_sidh_cln16_mp_add751x2(tt1, tt2, tt1);              // tt1 = a0*b0 + a1*b1
+	oqs_sidh_cln16_mp_mul(t1, t2, tt2, NWORDS_FIELD);       // tt2 = (a0+a1)*(b0+b1)
+	oqs_sidh_cln16_mp_sub(tt2, tt1, tt2, 2 * NWORDS_FIELD); // tt2 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
+	oqs_sidh_cln16_rdc_mont(tt2, c[1]);                     // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1
+}
+
+void oqs_sidh_cln16_to_fp2mont(const oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t mc) { // Conversion of a GF(p751^2) element to Montgomery representation,
+	                                                                                        // mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p751^2).
+
+	oqs_sidh_cln16_to_mont(a[0], mc[0]);
+	oqs_sidh_cln16_to_mont(a[1], mc[1]);
+}
+
+void oqs_sidh_cln16_from_fp2mont(const oqs_sidh_cln16_f2elm_t ma, oqs_sidh_cln16_f2elm_t c) { // Conversion of a GF(p751^2) element from Montgomery representation to standard representation,
+	                                                                                          // c_i = ma_i*R^(-1) = a_i in GF(p751^2).
+
+	oqs_sidh_cln16_from_mont(ma[0], c[0]);
+	oqs_sidh_cln16_from_mont(ma[1], c[1]);
+}
+
+void oqs_sidh_cln16_fp2inv751_mont(oqs_sidh_cln16_f2elm_t a) { // GF(p751^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2).
+	oqs_sidh_cln16_f2elm_t t1;
+
+	oqs_sidh_cln16_fpsqr751_mont(a[0], t1[0]);    // t10 = a0^2
+	oqs_sidh_cln16_fpsqr751_mont(a[1], t1[1]);    // t11 = a1^2
+	oqs_sidh_cln16_fpadd751(t1[0], t1[1], t1[0]); // t10 = a0^2+a1^2
+	oqs_sidh_cln16_fpinv751_mont(t1[0]);          // t10 = (a0^2+a1^2)^-1
+	oqs_sidh_cln16_fpneg751(a[1]);                // a = a0-i*a1
+	oqs_sidh_cln16_fpmul751_mont(a[0], t1[0], a[0]);
+	oqs_sidh_cln16_fpmul751_mont(a[1], t1[0], a[1]); // a = (a0-i*a1)*(a0^2+a1^2)^-1
+}
+
+void oqs_sidh_cln16_fp2inv751_mont_bingcd(oqs_sidh_cln16_f2elm_t a) { // GF(p751^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2)
+	// This uses the binary GCD for inversion in fp and is NOT constant time!!!
+	oqs_sidh_cln16_f2elm_t t1;
+
+	oqs_sidh_cln16_fpsqr751_mont(a[0], t1[0]);    // t10 = a0^2
+	oqs_sidh_cln16_fpsqr751_mont(a[1], t1[1]);    // t11 = a1^2
+	oqs_sidh_cln16_fpadd751(t1[0], t1[1], t1[0]); // t10 = a0^2+a1^2
+	oqs_sidh_cln16_fpinv751_mont_bingcd(t1[0]);   // t10 = (a0^2+a1^2)^-1
+	oqs_sidh_cln16_fpneg751(a[1]);                // a = a0-i*a1
+	oqs_sidh_cln16_fpmul751_mont(a[0], t1[0], a[0]);
+	oqs_sidh_cln16_fpmul751_mont(a[1], t1[0], a[1]); // a = (a0-i*a1)*(a0^2+a1^2)^-1
+}
+
+void oqs_sidh_cln16_swap_points_basefield(oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, const digit_t option) { // Swap points over the base field.
+	                                                                                                                                                // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
+	digit_t temp;
+	unsigned int i;
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		temp = option & (P->X[i] ^ Q->X[i]);
+		P->X[i] = temp ^ P->X[i];
+		Q->X[i] = temp ^ Q->X[i];
+		temp = option & (P->Z[i] ^ Q->Z[i]);
+		P->Z[i] = temp ^ P->Z[i];
+		Q->Z[i] = temp ^ Q->Z[i];
+	}
+}
+
+void oqs_sidh_cln16_swap_points(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, const digit_t option) { // Swap points.
+	                                                                                                                  // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P
+	digit_t temp;
+	unsigned int i;
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		temp = option & (P->X[0][i] ^ Q->X[0][i]);
+		P->X[0][i] = temp ^ P->X[0][i];
+		Q->X[0][i] = temp ^ Q->X[0][i];
+		temp = option & (P->Z[0][i] ^ Q->Z[0][i]);
+		P->Z[0][i] = temp ^ P->Z[0][i];
+		Q->Z[0][i] = temp ^ Q->Z[0][i];
+		temp = option & (P->X[1][i] ^ Q->X[1][i]);
+		P->X[1][i] = temp ^ P->X[1][i];
+		Q->X[1][i] = temp ^ Q->X[1][i];
+		temp = option & (P->Z[1][i] ^ Q->Z[1][i]);
+		P->Z[1][i] = temp ^ P->Z[1][i];
+		Q->Z[1][i] = temp ^ Q->Z[1][i];
+	}
+}
+
+void oqs_sidh_cln16_select_f2elm(const oqs_sidh_cln16_f2elm_t x, const oqs_sidh_cln16_f2elm_t y, oqs_sidh_cln16_f2elm_t z, const digit_t option) { // Select either x or y depending on the value of option.
+	                                                                                                                                               // If option = 0 then z <- x, else if option = 0xFF...FF then z <- y.
+	unsigned int i;
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		z[0][i] = (option & (x[0][i] ^ y[0][i])) ^ x[0][i];
+		z[1][i] = (option & (x[1][i] ^ y[1][i])) ^ x[1][i];
+	}
+}
+
+void oqs_sidh_cln16_mont_n_way_inv(const oqs_sidh_cln16_f2elm_t *vec, const int n, oqs_sidh_cln16_f2elm_t *out) { // n-way simultaneous inversion using Montgomery's trick.
+	                                                                                                              // SECURITY NOTE: This function does not run in constant time.
+	                                                                                                              //       Also, vec and out CANNOT be the same variable!
+	oqs_sidh_cln16_f2elm_t t1;
+	int i;
+
+	oqs_sidh_cln16_fp2copy751(vec[0], out[0]); // out[0] = vec[0]
+	for (i = 1; i < n; i++) {
+		oqs_sidh_cln16_fp2mul751_mont(out[i - 1], vec[i], out[i]); // out[i] = out[i-1]*vec[i]
+	}
+
+	oqs_sidh_cln16_fp2copy751(out[n - 1], t1); // t1 = 1/out[n-1]
+	oqs_sidh_cln16_fp2inv751_mont_bingcd(t1);
+
+	for (i = n - 1; i >= 1; i--) {
+		oqs_sidh_cln16_fp2mul751_mont(out[i - 1], t1, out[i]); // out[i] = t1*out[i-1]
+		oqs_sidh_cln16_fp2mul751_mont(t1, vec[i], t1);         // t1 = t1*vec[i]
+	}
+	oqs_sidh_cln16_fp2copy751(t1, out[0]); // out[0] = t1
+}
+
+void oqs_sidh_cln16_sqrt_Fp2_frac(const oqs_sidh_cln16_f2elm_t u, const oqs_sidh_cln16_f2elm_t v, oqs_sidh_cln16_f2elm_t y) { // Computes square roots of elements in (Fp2)^2 using Hamburg's trick.
+	oqs_sidh_cln16_felm_t t0, t1, t2, t3, t4, t;
+	digit_t *u0 = (digit_t *) u[0], *u1 = (digit_t *) u[1];
+	digit_t *v0 = (digit_t *) v[0], *v1 = (digit_t *) v[1];
+	digit_t *y0 = (digit_t *) y[0], *y1 = (digit_t *) y[1];
+	unsigned int i;
+
+	oqs_sidh_cln16_fpsqr751_mont(v0, t0);     // t0 = v0^2
+	oqs_sidh_cln16_fpsqr751_mont(v1, t1);     // t1 = v1^2
+	oqs_sidh_cln16_fpadd751(t0, t1, t0);      // t0 = t0+t1
+	oqs_sidh_cln16_fpmul751_mont(u0, v0, t1); // t1 = u0*v0
+	oqs_sidh_cln16_fpmul751_mont(u1, v1, t2); // t2 = u1*v1
+	oqs_sidh_cln16_fpadd751(t1, t2, t1);      // t1 = t1+t2
+	oqs_sidh_cln16_fpmul751_mont(u1, v0, t2); // t2 = u1*v0
+	oqs_sidh_cln16_fpmul751_mont(u0, v1, t3); // t3 = u0*v1
+	oqs_sidh_cln16_fpsub751(t2, t3, t2);      // t2 = t2-t3
+	oqs_sidh_cln16_fpsqr751_mont(t1, t3);     // t3 = t1^2
+	oqs_sidh_cln16_fpsqr751_mont(t2, t4);     // t4 = t2^2
+	oqs_sidh_cln16_fpadd751(t3, t4, t3);      // t3 = t3+t4
+	oqs_sidh_cln16_fpcopy751(t3, t);
+	for (i = 0; i < 370; i++) { // t = t3^((p+1)/4)
+		oqs_sidh_cln16_fpsqr751_mont(t, t);
+	}
+	for (i = 0; i < 239; i++) {
+		oqs_sidh_cln16_fpsqr751_mont(t, t3);
+		oqs_sidh_cln16_fpmul751_mont(t, t3, t);
+	}
+	oqs_sidh_cln16_fpadd751(t1, t, t);        // t = t+t1
+	oqs_sidh_cln16_fpadd751(t, t, t);         // t = 2*t
+	oqs_sidh_cln16_fpsqr751_mont(t0, t3);     // t3 = t0^2
+	oqs_sidh_cln16_fpmul751_mont(t0, t3, t3); // t3 = t3*t0
+	oqs_sidh_cln16_fpmul751_mont(t, t3, t3);  // t3 = t3*t
+	oqs_sidh_cln16_fpinv751_chain_mont(t3);   // t3 = t3^((p-3)/4)
+	oqs_sidh_cln16_fpmul751_mont(t0, t3, t3); // t3 = t3*t0
+	oqs_sidh_cln16_fpmul751_mont(t, t3, t1);  // t1 = t*t3
+	oqs_sidh_cln16_fpdiv2_751(t1, y0);        // y0 = t1/2
+	oqs_sidh_cln16_fpmul751_mont(t2, t3, y1); // y1 = t3*t2
+	oqs_sidh_cln16_fpsqr751_mont(t1, t1);     // t1 = t1^2
+	oqs_sidh_cln16_fpmul751_mont(t0, t1, t1); // t1 = t1*t0
+	oqs_sidh_cln16_fpcorrection751(t);
+	oqs_sidh_cln16_fpcorrection751(t1);
+
+	if (oqs_sidh_cln16_fpequal751_non_constant_time(t1, t) == false) {
+		oqs_sidh_cln16_fpcopy751(y0, t);
+		oqs_sidh_cln16_fpcopy751(y1, y0); // Swap y0 and y1
+		oqs_sidh_cln16_fpcopy751(t, y1);
+	}
+
+	oqs_sidh_cln16_fpsqr751_mont(y0, t0);     // t0 = y0^2
+	oqs_sidh_cln16_fpsqr751_mont(y1, t1);     // t1 = y1^2
+	oqs_sidh_cln16_fpsub751(t0, t1, t0);      // t0 = t0-t1
+	oqs_sidh_cln16_fpmul751_mont(t0, v0, t0); // t0 = t0*v0
+	oqs_sidh_cln16_fpmul751_mont(y0, y1, t1); // t1 = y0*y1
+	oqs_sidh_cln16_fpmul751_mont(v1, t1, t1); // t1 = t1*v1
+	oqs_sidh_cln16_fpadd751(t1, t1, t1);      // t1 = t1+t1
+	oqs_sidh_cln16_fpsub751(t0, t1, t0);      // t0 = t0-t1
+	oqs_sidh_cln16_fpcorrection751(t0);
+	oqs_sidh_cln16_fpcorrection751(u0);
+
+	if (oqs_sidh_cln16_fpequal751_non_constant_time(t0, u0) == false) {
+		oqs_sidh_cln16_fpneg751(y1); // y1 = -y1
+	}
+}
+
+void oqs_sidh_cln16_sqrt_Fp2(const oqs_sidh_cln16_f2elm_t u, oqs_sidh_cln16_f2elm_t y) { // Computes square roots of elements in (Fp2)^2 using Hamburg's trick.
+	oqs_sidh_cln16_felm_t t0, t1, t2, t3;
+	digit_t *a = (digit_t *) u[0], *b = (digit_t *) u[1];
+	unsigned int i;
+
+	oqs_sidh_cln16_fpsqr751_mont(a, t0); // t0 = a^2
+	oqs_sidh_cln16_fpsqr751_mont(b, t1); // t1 = b^2
+	oqs_sidh_cln16_fpadd751(t0, t1, t0); // t0 = t0+t1
+	oqs_sidh_cln16_fpcopy751(t0, t1);
+	for (i = 0; i < 370; i++) { // t = t3^((p+1)/4)
+		oqs_sidh_cln16_fpsqr751_mont(t1, t1);
+	}
+	for (i = 0; i < 239; i++) {
+		oqs_sidh_cln16_fpsqr751_mont(t1, t0);
+		oqs_sidh_cln16_fpmul751_mont(t1, t0, t1);
+	}
+	oqs_sidh_cln16_fpadd751(a, t1, t0); // t0 = a+t1
+	oqs_sidh_cln16_fpdiv2_751(t0, t0);  // t0 = t0/2
+	oqs_sidh_cln16_fpcopy751(t0, t2);
+	oqs_sidh_cln16_fpinv751_chain_mont(t2);   // t2 = t0^((p-3)/4)
+	oqs_sidh_cln16_fpmul751_mont(t0, t2, t1); // t1 = t2*t0
+	oqs_sidh_cln16_fpmul751_mont(t2, b, t2);  // t2 = t2*b
+	oqs_sidh_cln16_fpdiv2_751(t2, t2);        // t2 = t2/2
+	oqs_sidh_cln16_fpsqr751_mont(t1, t3);     // t3 = t1^2
+	oqs_sidh_cln16_fpcorrection751(t0);
+	oqs_sidh_cln16_fpcorrection751(t3);
+
+	if (oqs_sidh_cln16_fpequal751_non_constant_time(t0, t3) == true) {
+		oqs_sidh_cln16_fpcopy751(t1, y[0]);
+		oqs_sidh_cln16_fpcopy751(t2, y[1]);
+	} else {
+		oqs_sidh_cln16_fpneg751(t1);
+		oqs_sidh_cln16_fpcopy751(t2, y[0]);
+		oqs_sidh_cln16_fpcopy751(t1, y[1]);
+	}
+}
+
+void oqs_sidh_cln16_cube_Fp2_cycl(oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_felm_t one) { // Cyclotomic cubing on elements of norm 1, using a^(p+1) = 1.
+	oqs_sidh_cln16_felm_t t0;
+
+	oqs_sidh_cln16_fpadd751(a[0], a[0], t0);      // t0 = a0 + a0
+	oqs_sidh_cln16_fpsqr751_mont(t0, t0);         // t0 = t0^2
+	oqs_sidh_cln16_fpsub751(t0, one, t0);         // t0 = t0 - 1
+	oqs_sidh_cln16_fpmul751_mont(a[1], t0, a[1]); // a1 = t0*a1
+	oqs_sidh_cln16_fpsub751(t0, one, t0);
+	oqs_sidh_cln16_fpsub751(t0, one, t0);         // t0 = t0 - 2
+	oqs_sidh_cln16_fpmul751_mont(a[0], t0, a[0]); // a0 = t0*a0
+}
+
+void oqs_sidh_cln16_sqr_Fp2_cycl(oqs_sidh_cln16_f2elm_t a, const oqs_sidh_cln16_felm_t one) { // Cyclotomic squaring on elements of norm 1, using a^(p+1) = 1.
+	oqs_sidh_cln16_felm_t t0;
+
+	oqs_sidh_cln16_fpadd751(a[0], a[1], t0); // t0 = a0 + a1
+	oqs_sidh_cln16_fpsqr751_mont(t0, t0);    // t0 = t0^2
+	oqs_sidh_cln16_fpsub751(t0, one, a[1]);  // a1 = t0 - 1
+	oqs_sidh_cln16_fpsqr751_mont(a[0], t0);  // t0 = a0^2
+	oqs_sidh_cln16_fpadd751(t0, t0, t0);     // t0 = t0 + t0
+	oqs_sidh_cln16_fpsub751(t0, one, a[0]);  // a0 = t0 - 1
+}
+
+__inline void oqs_sidh_cln16_inv_Fp2_cycl(oqs_sidh_cln16_f2elm_t a) { // Cyclotomic inversion, a^(p+1) = 1 => a^(-1) = a^p = a0 - i*a1.
+
+	oqs_sidh_cln16_fpneg751(a[1]);
+}
+
+void oqs_sidh_cln16_exp6_Fp2_cycl(const oqs_sidh_cln16_f2elm_t y, const uint64_t t, const oqs_sidh_cln16_felm_t one, oqs_sidh_cln16_f2elm_t res) { // Exponentiation y^t via square and multiply in the cyclotomic group. Exponent t is 6 bits at most.
+	unsigned int i, bit;
+
+	oqs_sidh_cln16_fp2zero751(res);
+	oqs_sidh_cln16_fpcopy751(one, res[0]); // res = 1
+
+	if (t != 0) {
+		for (i = 0; i < 6; i++) {
+			oqs_sidh_cln16_sqr_Fp2_cycl(res, one);
+			bit = 1 & (t >> (5 - i));
+			if (bit == 1) {
+				oqs_sidh_cln16_fp2mul751_mont(res, y, res);
+			}
+		}
+	}
+}
+
+void oqs_sidh_cln16_exp21_Fp2_cycl(const oqs_sidh_cln16_f2elm_t y, const uint64_t t, const oqs_sidh_cln16_felm_t one, oqs_sidh_cln16_f2elm_t res) { // Exponentiation y^t via square and multiply in the cyclotomic group. Exponent t is 21 bits at most.
+	unsigned int i, bit;
+
+	oqs_sidh_cln16_fp2zero751(res);
+	oqs_sidh_cln16_fpcopy751(one, res[0]); // res = 1
+
+	if (t != 0) {
+		for (i = 0; i < 21; i++) {
+			oqs_sidh_cln16_sqr_Fp2_cycl(res, one);
+			bit = 1 & (t >> (20 - i));
+			if (bit == 1) {
+				oqs_sidh_cln16_fp2mul751_mont(res, y, res);
+			}
+		}
+	}
+}
+
+static bool is_zero(digit_t *a, unsigned int nwords) { // Check if multiprecision element is zero.
+	                                                   // SECURITY NOTE: This function does not run in constant time.
+	unsigned int i;
+
+	for (i = 0; i < nwords; i++) {
+		if (a[i] != 0) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+void oqs_sidh_cln16_exp_Fp2_cycl(const oqs_sidh_cln16_f2elm_t y, uint64_t *t, const oqs_sidh_cln16_felm_t one, oqs_sidh_cln16_f2elm_t res, int length) { // Exponentiation y^t via square and multiply in the cyclotomic group.
+	                                                                                                                                                     // This function uses 64-bit digits for representing exponents.
+	unsigned int nword, bit, nwords = (length + 63) / 64;
+	int i;
+
+	oqs_sidh_cln16_fp2zero751(res);
+	oqs_sidh_cln16_fpcopy751(one, res[0]); // res = 1
+
+	if (!is_zero((digit_t *) t, nwords)) { // Is t = 0?
+		for (i = length; i >= 0; i--) {
+			oqs_sidh_cln16_sqr_Fp2_cycl(res, one);
+			nword = i >> 6;
+			bit = 1 & (t[nword] >> (i - (nword << 6)));
+			if (bit == 1) {
+				oqs_sidh_cln16_fp2mul751_mont(res, y, res);
+			}
+		}
+	}
+}
+
+void oqs_sidh_cln16_exp84_Fp2_cycl(const oqs_sidh_cln16_f2elm_t y, uint64_t *t, const oqs_sidh_cln16_felm_t one, oqs_sidh_cln16_f2elm_t res) { // Exponentiation y^t via square and multiply in the cyclotomic group. Exponent t is 84 bits at most
+	                                                                                                                                           // This function uses 64-bit digits for representing exponents.
+	unsigned int nword, bit, nwords = 2;
+	int i;
+
+	oqs_sidh_cln16_fp2zero751(res);
+	oqs_sidh_cln16_fpcopy751(one, res[0]); // res = 1
+
+	if (!is_zero((digit_t *) t, nwords)) { // Is t = 0?
+		for (i = 83; i >= 0; i--) {
+			oqs_sidh_cln16_sqr_Fp2_cycl(res, one);
+			nword = i >> 6;
+			bit = 1 & (t[nword] >> (i - (nword << 6)));
+			if (bit == 1) {
+				oqs_sidh_cln16_fp2mul751_mont(res, y, res);
+			}
+		}
+	}
+}
+
+bool oqs_sidh_cln16_is_cube_Fp2(oqs_sidh_cln16_f2elm_t u, PCurveIsogenyStruct CurveIsogeny) { // Check if a GF(p751^2) element is a cube.
+	oqs_sidh_cln16_f2elm_t v;
+	oqs_sidh_cln16_felm_t t0, zero = {0}, one = {0};
+	unsigned int e;
+
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one);
+	oqs_sidh_cln16_fpsqr751_mont(u[0], v[0]);       // v0 = u0^2
+	oqs_sidh_cln16_fpsqr751_mont(u[1], v[1]);       // v1 = u1^2
+	oqs_sidh_cln16_fpadd751(v[0], v[1], t0);        // t0 = v0+v1
+	oqs_sidh_cln16_fpinv751_mont_bingcd(t0);        // Fp inversion with binary Euclid
+	oqs_sidh_cln16_fpsub751(v[0], v[1], v[0]);      // v0 = v0-v1
+	oqs_sidh_cln16_fpmul751_mont(u[0], u[1], v[1]); // v1 = u0*u1
+	oqs_sidh_cln16_fpadd751(v[1], v[1], v[1]);      // v1 = 2*v1
+	oqs_sidh_cln16_fpneg751(v[1]);                  // v1 = -v1
+	oqs_sidh_cln16_fpmul751_mont(v[0], t0, v[0]);   // v0 = v0*t0
+	oqs_sidh_cln16_fpmul751_mont(v[1], t0, v[1]);   // v1 = v1*t0
+
+	for (e = 0; e < 372; e++) {
+		oqs_sidh_cln16_sqr_Fp2_cycl(v, one);
+	}
+
+	for (e = 0; e < 238; e++) {
+		oqs_sidh_cln16_cube_Fp2_cycl(v, one);
+	}
+
+	oqs_sidh_cln16_fp2correction751(v);
+
+	if (oqs_sidh_cln16_fpequal751_non_constant_time(v[0], one) == true && oqs_sidh_cln16_fpequal751_non_constant_time(v[1], zero) == true) { // v == 1?
+		return true;
+	} else {
+		return false;
+	}
+}
+
+void oqs_sidh_cln16_multiply(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords.
+	                                                                                                      // NOTE: a and c CANNOT be the same variable!
+	unsigned int i, j;
+	digit_t t = 0, u = 0, v = 0, UV[2];
+	unsigned int carry = 0;
+
+	for (i = 0; i < nwords; i++) {
+		for (j = 0; j <= i; j++) {
+			MUL(a[j], b[i - j], UV + 1, UV[0]);
+			ADDC(0, UV[0], v, carry, v);
+			ADDC(carry, UV[1], u, carry, u);
+			t += carry;
+		}
+		c[i] = v;
+		v = u;
+		u = t;
+		t = 0;
+	}
+
+	for (i = nwords; i < 2 * nwords - 1; i++) {
+		for (j = i - nwords + 1; j < nwords; j++) {
+			MUL(a[j], b[i - j], UV + 1, UV[0]);
+			ADDC(0, UV[0], v, carry, v);
+			ADDC(carry, UV[1], u, carry, u);
+			t += carry;
+		}
+		c[i] = v;
+		v = u;
+		u = t;
+		t = 0;
+	}
+	c[2 * nwords - 1] = v;
+}
+
+void oqs_sidh_cln16_Montgomery_multiply_mod_order(const digit_t *ma, const digit_t *mb, digit_t *mc, const digit_t *order, const digit_t *Montgomery_rprime) { // Montgomery multiplication modulo the group order, mc = ma*mb*r' mod order, where ma,mb,mc in [0, order-1].
+	                                                                                                                                                           // ma, mb and mc are assumed to be in Montgomery representation.
+	                                                                                                                                                           // The Montgomery constant r' = -r^(-1) mod 2^(log_2(r)) is the value "Montgomery_rprime", where r is the order.
+	unsigned int i, cout = 0, bout = 0;
+	digit_t mask, P[2 * SIDH_NWORDS_ORDER], Q[2 * SIDH_NWORDS_ORDER], temp[2 * SIDH_NWORDS_ORDER];
+
+	oqs_sidh_cln16_multiply(ma, mb, P, SIDH_NWORDS_ORDER);               // P = ma * mb
+	oqs_sidh_cln16_multiply(P, Montgomery_rprime, Q, SIDH_NWORDS_ORDER); // Q = P * r' mod 2^(log_2(r))
+	oqs_sidh_cln16_multiply(Q, order, temp, SIDH_NWORDS_ORDER);          // temp = Q * r
+	cout = oqs_sidh_cln16_mp_add(P, temp, temp, 2 * SIDH_NWORDS_ORDER);  // (cout, temp) = P + Q * r
+
+	for (i = 0; i < SIDH_NWORDS_ORDER; i++) { // (cout, mc) = (P + Q * r)/2^(log_2(r))
+		mc[i] = temp[SIDH_NWORDS_ORDER + i];
+	}
+
+	// Final, constant-time subtraction
+	bout = oqs_sidh_cln16_mp_sub(mc, order, mc, SIDH_NWORDS_ORDER); // (cout, mc) = (cout, mc) - r
+	mask = (digit_t) cout - (digit_t) bout;                         // if (cout, mc) >= 0 then mask = 0x00..0, else if (cout, mc) < 0 then mask = 0xFF..F
+
+	for (i = 0; i < SIDH_NWORDS_ORDER; i++) { // temp = mask & r
+		temp[i] = (order[i] & mask);
+	}
+	oqs_sidh_cln16_mp_add(mc, temp, mc, SIDH_NWORDS_ORDER); //  mc = mc + (mask & r)
+}
+
+void oqs_sidh_cln16_Montgomery_inversion_mod_order(const digit_t *ma, digit_t *mc, const digit_t *order, const digit_t *Montgomery_rprime) { // (Non-constant time) Montgomery inversion modulo the curve order using a^(-1) = a^(order-2) mod order
+	                                                                                                                                         // This function uses the sliding-window method.
+	sdigit_t i = 384;
+	unsigned int j, nwords = SIDH_NWORDS_ORDER, nbytes = (unsigned int) i / 8;
+	digit_t temp, bit = 0, count, mod2, k_EXPON = 5; // Fixing parameter k to 5 for the sliding windows method
+	digit_t modulus2[SIDH_NWORDS_ORDER] = {0}, npoints = 16;
+	digit_t input_a[SIDH_NWORDS_ORDER];
+	digit_t table[16][SIDH_NWORDS_ORDER];                    // Fixing the number of precomputed elements to 16 (assuming k = 5)
+	digit_t mask = (digit_t) 1 << (sizeof(digit_t) * 8 - 1); // 0x800...000
+	digit_t mask2 = ~((digit_t)(-1) >> k_EXPON);             // 0xF800...000, assuming k = 5
+
+	// SECURITY NOTE: this function does not run in constant time.
+
+	modulus2[0] = 2;
+	oqs_sidh_cln16_mp_sub(order, modulus2, modulus2, nwords); // modulus-2
+
+	// Precomputation stage
+	memmove((unsigned char *) &table[0], (unsigned char *) ma, nbytes);                      // table[0] = ma
+	oqs_sidh_cln16_Montgomery_multiply_mod_order(ma, ma, input_a, order, Montgomery_rprime); // ma^2
+	for (j = 0; j < npoints - 1; j++) {
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(table[j], input_a, table[j + 1], order, Montgomery_rprime); // table[j+1] = table[j] * ma^2
+	}
+
+	while (bit != 1) { // Shift (modulus-2) to the left until getting first bit 1
+		i--;
+		temp = 0;
+		for (j = 0; j < nwords; j++) {
+			bit = (modulus2[j] & mask) >> (sizeof(digit_t) * 8 - 1);
+			modulus2[j] = (modulus2[j] << 1) | temp;
+			temp = bit;
+		}
+	}
+
+	// Evaluation stage
+	memmove((unsigned char *) mc, (unsigned char *) ma, nbytes);
+	bit = (modulus2[nwords - 1] & mask) >> (sizeof(digit_t) * 8 - 1);
+	while (i > 0) {
+		if (bit == 0) {                                                                         // Square accumulated value because bit = 0 and shift (modulus-2) one bit to the left
+			oqs_sidh_cln16_Montgomery_multiply_mod_order(mc, mc, mc, order, Montgomery_rprime); // mc = mc^2
+			i--;
+			for (j = (nwords - 1); j > 0; j--) {
+				SHIFTL(modulus2[j], modulus2[j - 1], 1, modulus2[j], RADIX);
+			}
+			modulus2[0] = modulus2[0] << 1;
+		} else { // "temp" will store the longest odd bitstring with "count" bits s.t. temp <= 2^k - 1
+			count = k_EXPON;
+			temp = (modulus2[nwords - 1] & mask2) >> (sizeof(digit_t) * 8 - k_EXPON); // Extracting next k bits to the left
+			mod2 = temp & 1;
+			while (mod2 == 0) { // if even then shift to the right and adjust count
+				temp = (temp >> 1);
+				mod2 = temp & 1;
+				count--;
+			}
+			for (j = 0; j < count; j++) { // mc = mc^count
+				oqs_sidh_cln16_Montgomery_multiply_mod_order(mc, mc, mc, order, Montgomery_rprime);
+			}
+			oqs_sidh_cln16_Montgomery_multiply_mod_order(mc, table[(temp - 1) >> 1], mc, order, Montgomery_rprime); // mc = mc * table[(temp-1)/2]
+			i = i - count;
+
+			for (j = (nwords - 1); j > 0; j--) { // Shift (modulus-2) "count" bits to the left
+				SHIFTL(modulus2[j], modulus2[j - 1], count, modulus2[j], RADIX);
+			}
+			modulus2[0] = modulus2[0] << count;
+		}
+		bit = (modulus2[nwords - 1] & mask) >> (sizeof(digit_t) * 8 - 1);
+	}
+}
+
+static __inline unsigned int is_zero_mod_order(const digit_t *x) { // Is x = 0? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise
+	                                                               // SECURITY NOTE: This function does not run in constant time.
+	unsigned int i;
+
+	for (i = 0; i < SIDH_NWORDS_ORDER; i++) {
+		if (x[i] != 0)
+			return false;
+	}
+	return true;
+}
+
+static __inline unsigned int is_even_mod_order(const digit_t *x) { // Is x even? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise.
+	return (unsigned int) ((x[0] & 1) ^ 1);
+}
+
+static __inline unsigned int is_lt_mod_order(const digit_t *x, const digit_t *y) { // Is x < y? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise.
+	                                                                               // SECURITY NOTE: This function does not run in constant time.
+	int i;
+
+	for (i = SIDH_NWORDS_ORDER - 1; i >= 0; i--) {
+		if (x[i] < y[i]) {
+			return true;
+		} else if (x[i] > y[i]) {
+			return false;
+		}
+	}
+	return false;
+}
+
+static __inline void Montgomery_inversion_mod_order_bingcd_partial(const digit_t *a, digit_t *x1, unsigned int *k, const digit_t *order) { // Partial Montgomery inversion modulo order.
+	digit_t u[SIDH_NWORDS_ORDER], v[SIDH_NWORDS_ORDER], x2[SIDH_NWORDS_ORDER] = {0};
+	unsigned int cwords; // number of words necessary for x1, x2
+
+	oqs_sidh_cln16_copy_words(a, u, SIDH_NWORDS_ORDER);
+	oqs_sidh_cln16_copy_words(order, v, SIDH_NWORDS_ORDER);
+	oqs_sidh_cln16_copy_words(x2, x1, SIDH_NWORDS_ORDER);
+	x1[0] = 1;
+	*k = 0;
+
+	while (!is_zero_mod_order(v)) {
+		cwords = ((*k + 1) / RADIX) + 1;
+		if ((cwords < SIDH_NWORDS_ORDER)) {
+			if (is_even_mod_order(v)) {
+				oqs_sidh_cln16_mp_shiftr1(v, SIDH_NWORDS_ORDER);
+				oqs_sidh_cln16_mp_shiftl1(x1, cwords);
+			} else if (is_even_mod_order(u)) {
+				oqs_sidh_cln16_mp_shiftr1(u, SIDH_NWORDS_ORDER);
+				oqs_sidh_cln16_mp_shiftl1(x2, cwords);
+			} else if (!is_lt_mod_order(v, u)) {
+				oqs_sidh_cln16_mp_sub(v, u, v, SIDH_NWORDS_ORDER);
+				oqs_sidh_cln16_mp_shiftr1(v, SIDH_NWORDS_ORDER);
+				oqs_sidh_cln16_mp_add(x1, x2, x2, cwords);
+				oqs_sidh_cln16_mp_shiftl1(x1, cwords);
+			} else {
+				oqs_sidh_cln16_mp_sub(u, v, u, SIDH_NWORDS_ORDER);
+				oqs_sidh_cln16_mp_shiftr1(u, SIDH_NWORDS_ORDER);
+				oqs_sidh_cln16_mp_add(x1, x2, x1, cwords);
+				oqs_sidh_cln16_mp_shiftl1(x2, cwords);
+			}
+		} else {
+			if (is_even_mod_order(v)) {
+				oqs_sidh_cln16_mp_shiftr1(v, SIDH_NWORDS_ORDER);
+				oqs_sidh_cln16_mp_shiftl1(x1, SIDH_NWORDS_ORDER);
+			} else if (is_even_mod_order(u)) {
+				oqs_sidh_cln16_mp_shiftr1(u, SIDH_NWORDS_ORDER);
+				oqs_sidh_cln16_mp_shiftl1(x2, SIDH_NWORDS_ORDER);
+			} else if (!is_lt_mod_order(v, u)) {
+				oqs_sidh_cln16_mp_sub(v, u, v, SIDH_NWORDS_ORDER);
+				oqs_sidh_cln16_mp_shiftr1(v, SIDH_NWORDS_ORDER);
+				oqs_sidh_cln16_mp_add(x1, x2, x2, SIDH_NWORDS_ORDER);
+				oqs_sidh_cln16_mp_shiftl1(x1, SIDH_NWORDS_ORDER);
+			} else {
+				oqs_sidh_cln16_mp_sub(u, v, u, SIDH_NWORDS_ORDER);
+				oqs_sidh_cln16_mp_shiftr1(u, SIDH_NWORDS_ORDER);
+				oqs_sidh_cln16_mp_add(x1, x2, x1, SIDH_NWORDS_ORDER);
+				oqs_sidh_cln16_mp_shiftl1(x2, SIDH_NWORDS_ORDER);
+			}
+		}
+		*k += 1;
+	}
+
+	if (is_lt_mod_order(order, x1)) {
+		oqs_sidh_cln16_mp_sub(x1, order, x1, SIDH_NWORDS_ORDER);
+	}
+}
+
+void oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd(const digit_t *a, digit_t *c, const digit_t *order, const digit_t *Montgomery_rprime, const digit_t *Montgomery_Rprime) { // Montgomery inversion modulo order, a = a^(-1)*R mod order.
+	digit_t x[SIDH_NWORDS_ORDER], t[SIDH_NWORDS_ORDER];
+	unsigned int k;
+
+	Montgomery_inversion_mod_order_bingcd_partial(a, x, &k, order);
+	if (k < 384) {
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(x, Montgomery_Rprime, x, order, Montgomery_rprime);
+		k += 384;
+	}
+	oqs_sidh_cln16_Montgomery_multiply_mod_order(x, Montgomery_Rprime, x, order, Montgomery_rprime);
+	power2_setup(t, 2 * 384 - k, SIDH_NWORDS_ORDER);
+	oqs_sidh_cln16_Montgomery_multiply_mod_order(x, t, c, order, Montgomery_rprime);
+}
+
+void oqs_sidh_cln16_to_Montgomery_mod_order(const digit_t *a, digit_t *mc, const digit_t *order, const digit_t *Montgomery_rprime, const digit_t *Montgomery_Rprime) { // Conversion of elements in Z_r to Montgomery representation, where the order r is up to 384 bits.
+
+	oqs_sidh_cln16_Montgomery_multiply_mod_order(a, Montgomery_Rprime, mc, order, Montgomery_rprime);
+}
+
+void oqs_sidh_cln16_from_Montgomery_mod_order(const digit_t *ma, digit_t *c, const digit_t *order, const digit_t *Montgomery_rprime) { // Conversion of elements in Z_r from Montgomery to standard representation, where the order is up to 384 bits.
+	digit_t one[SIDH_NWORDS_ORDER] = {0};
+	one[0] = 1;
+
+	oqs_sidh_cln16_Montgomery_multiply_mod_order(ma, one, c, order, Montgomery_rprime);
+}
+
+void oqs_sidh_cln16_inv_mod_orderA(const digit_t *a, digit_t *c) { // Inversion modulo an even integer of the form 2^m.
+	                                                               // Algorithm 3: Explicit Quadratic Modular inverse modulo 2^m from Dumas '12: http://arxiv.org/pdf/1209.6626.pdf
+	                                                               // NOTE: This function is hardwired for the current parameters using 2^372.
+	unsigned int i, f, s = 0;
+	digit_t am1[SIDH_NWORDS_ORDER] = {0};
+	digit_t tmp1[SIDH_NWORDS_ORDER] = {0};
+	digit_t tmp2[2 * SIDH_NWORDS_ORDER] = {0};
+	digit_t one[SIDH_NWORDS_ORDER] = {0};
+	digit_t order[SIDH_NWORDS_ORDER] = {0};
+	digit_t mask = (digit_t)(-1) >> 12;
+	bool equal = true;
+
+	order[SIDH_NWORDS_ORDER - 1] = (digit_t) 1 << (sizeof(digit_t) * 8 - 12); // Load most significant digit of Alice's order
+	one[0] = 1;
+
+	for (i = 0; i < SIDH_NWORDS_ORDER; i++) {
+		if (a[i] != one[0])
+			equal = false;
+	}
+	if (equal) {
+		oqs_sidh_cln16_copy_words(a, c, SIDH_NWORDS_ORDER);
+	} else {
+		oqs_sidh_cln16_mp_sub(a, one, am1, SIDH_NWORDS_ORDER); // am1 = a-1
+		oqs_sidh_cln16_mp_sub(order, am1, c, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_mp_add(c, one, c, SIDH_NWORDS_ORDER); // c = 2^m - a + 2
+
+		oqs_sidh_cln16_copy_words(am1, tmp1, SIDH_NWORDS_ORDER);
+		while ((tmp1[0] & (digit_t) 1) == 0) {
+			s += 1;
+			oqs_sidh_cln16_mp_shiftr1(tmp1, SIDH_NWORDS_ORDER);
+		}
+
+		f = 372 / s;
+		for (i = 1; i < f; i <<= 1) {
+			oqs_sidh_cln16_multiply(am1, am1, tmp2, SIDH_NWORDS_ORDER); // tmp2 = am1^2
+			oqs_sidh_cln16_copy_words(tmp2, am1, SIDH_NWORDS_ORDER);
+			am1[SIDH_NWORDS_ORDER - 1] &= mask;                        // am1 = tmp2 mod 2^e
+			oqs_sidh_cln16_mp_add(am1, one, tmp1, SIDH_NWORDS_ORDER);  // tmp1 = am1 + 1
+			tmp1[SIDH_NWORDS_ORDER - 1] &= mask;                       // mod 2^e
+			oqs_sidh_cln16_multiply(c, tmp1, tmp2, SIDH_NWORDS_ORDER); // c = c*tmp1
+			oqs_sidh_cln16_copy_words(tmp2, c, SIDH_NWORDS_ORDER);
+			c[SIDH_NWORDS_ORDER - 1] &= mask; // mod 2^e
+		}
+	}
+}
diff --git a/crypt/liboqs/kex_sidh_cln16/generic/fp_generic.c b/crypt/liboqs/kex_sidh_cln16/generic/fp_generic.c
new file mode 100644
index 0000000000000000000000000000000000000000..9a19019bd5e712af1e29d539fd4b9ff5f54839a9
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/generic/fp_generic.c
@@ -0,0 +1,234 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral
+*       Diffie-Hellman key exchange.
+*
+*    Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*
+* Abstract: portable modular arithmetic
+*
+*********************************************************************************************/
+
+#include "../SIDH_internal.h"
+
+// Global constants
+extern const uint64_t p751[NWORDS_FIELD];
+extern const uint64_t p751p1[NWORDS_FIELD];
+extern const uint64_t p751x2[NWORDS_FIELD];
+
+__inline void oqs_sidh_cln16_fpadd751(const digit_t *a, const digit_t *b, digit_t *c) { // Modular addition, c = a+b mod p751.
+	                                                                                    // Inputs: a, b in [0, 2*p751-1]
+	                                                                                    // Output: c in [0, 2*p751-1]
+	unsigned int i, carry = 0;
+	digit_t mask;
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		ADDC(carry, a[i], b[i], carry, c[i]);
+	}
+
+	carry = 0;
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		SUBC(carry, c[i], ((digit_t *) p751x2)[i], carry, c[i]);
+	}
+	mask = 0 - (digit_t) carry;
+
+	carry = 0;
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		ADDC(carry, c[i], ((digit_t *) p751x2)[i] & mask, carry, c[i]);
+	}
+}
+
+__inline void oqs_sidh_cln16_fpsub751(const digit_t *a, const digit_t *b, digit_t *c) { // Modular subtraction, c = a-b mod p751.
+	                                                                                    // Inputs: a, b in [0, 2*p751-1]
+	                                                                                    // Output: c in [0, 2*p751-1]
+	unsigned int i, borrow = 0;
+	digit_t mask;
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		SUBC(borrow, a[i], b[i], borrow, c[i]);
+	}
+	mask = 0 - (digit_t) borrow;
+
+	borrow = 0;
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		ADDC(borrow, c[i], ((digit_t *) p751x2)[i] & mask, borrow, c[i]);
+	}
+}
+
+__inline void oqs_sidh_cln16_fpneg751(digit_t *a) { // Modular negation, a = -a mod p751.
+	                                                // Input/output: a in [0, 2*p751-1]
+	unsigned int i, borrow = 0;
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		SUBC(borrow, ((digit_t *) p751x2)[i], a[i], borrow, a[i]);
+	}
+}
+
+void oqs_sidh_cln16_fpdiv2_751(const digit_t *a, digit_t *c) { // Modular division by two, c = a/2 mod p751.
+	                                                           // Input : a in [0, 2*p751-1]
+	                                                           // Output: c in [0, 2*p751-1]
+	unsigned int i, carry = 0;
+	digit_t mask;
+
+	mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p751
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		ADDC(carry, a[i], ((digit_t *) p751)[i] & mask, carry, c[i]);
+	}
+
+	oqs_sidh_cln16_mp_shiftr1(c, NWORDS_FIELD);
+}
+
+void oqs_sidh_cln16_fpcorrection751(digit_t *a) { // Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1].
+	unsigned int i, borrow = 0;
+	digit_t mask;
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		SUBC(borrow, a[i], ((digit_t *) p751)[i], borrow, a[i]);
+	}
+	mask = 0 - (digit_t) borrow;
+
+	borrow = 0;
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		ADDC(borrow, a[i], ((digit_t *) p751)[i] & mask, borrow, a[i]);
+	}
+}
+
+void oqs_sidh_cln16_digit_x_digit(const digit_t a, const digit_t b, digit_t *c) { // Digit multiplication, digit * digit -> 2-digit result
+	register digit_t al, ah, bl, bh, temp;
+	digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry;
+	digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t) * 4), mask_high = (digit_t)(-1) << (sizeof(digit_t) * 4);
+
+	al = a & mask_low;               // Low part
+	ah = a >> (sizeof(digit_t) * 4); // High part
+	bl = b & mask_low;
+	bh = b >> (sizeof(digit_t) * 4);
+
+	albl = al * bl;
+	albh = al * bh;
+	ahbl = ah * bl;
+	ahbh = ah * bh;
+	c[0] = albl & mask_low; // C00
+
+	res1 = albl >> (sizeof(digit_t) * 4);
+	res2 = ahbl & mask_low;
+	res3 = albh & mask_low;
+	temp = res1 + res2 + res3;
+	carry = temp >> (sizeof(digit_t) * 4);
+	c[0] ^= temp << (sizeof(digit_t) * 4); // C01
+
+	res1 = ahbl >> (sizeof(digit_t) * 4);
+	res2 = albh >> (sizeof(digit_t) * 4);
+	res3 = ahbh & mask_low;
+	temp = res1 + res2 + res3 + carry;
+	c[1] = temp & mask_low; // C10
+	carry = temp & mask_high;
+	c[1] ^= (ahbh & mask_high) + carry; // C11
+}
+
+void oqs_sidh_cln16_mp_mul_schoolbook(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { // Multiprecision schoolbook multiply, c = a*b, where lng(a) = lng(b) = nwords.
+	unsigned int i, j;
+	digit_t u, v, UV[2];
+	unsigned int carry = 0;
+
+	for (i = 0; i < (2 * nwords); i++)
+		c[i] = 0;
+
+	for (i = 0; i < nwords; i++) {
+		u = 0;
+		for (j = 0; j < nwords; j++) {
+			MUL(a[i], b[j], UV + 1, UV[0]);
+			ADDC(0, UV[0], u, carry, v);
+			u = UV[1] + carry;
+			ADDC(0, c[i + j], v, carry, v);
+			u = u + carry;
+			c[i + j] = v;
+		}
+		c[nwords + i] = u;
+	}
+}
+
+void oqs_sidh_cln16_mp_mul_comba(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords.
+	unsigned int i, j;
+	digit_t t = 0, u = 0, v = 0, UV[2];
+	unsigned int carry = 0;
+
+	for (i = 0; i < nwords; i++) {
+		for (j = 0; j <= i; j++) {
+			MUL(a[j], b[i - j], UV + 1, UV[0]);
+			ADDC(0, UV[0], v, carry, v);
+			ADDC(carry, UV[1], u, carry, u);
+			t += carry;
+		}
+		c[i] = v;
+		v = u;
+		u = t;
+		t = 0;
+	}
+
+	for (i = nwords; i < 2 * nwords - 1; i++) {
+		for (j = i - nwords + 1; j < nwords; j++) {
+			MUL(a[j], b[i - j], UV + 1, UV[0]);
+			ADDC(0, UV[0], v, carry, v);
+			ADDC(carry, UV[1], u, carry, u);
+			t += carry;
+		}
+		c[i] = v;
+		v = u;
+		u = t;
+		t = 0;
+	}
+	c[2 * nwords - 1] = v;
+}
+
+void oqs_sidh_cln16_rdc_mont(const oqs_sidh_cln16_dfelm_t ma, oqs_sidh_cln16_felm_t mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p751.
+	                                                                                      // mc = ma*R^-1 mod p751x2, where R = 2^768.
+	                                                                                      // If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1].
+	                                                                                      // ma is assumed to be in Montgomery representation.
+	unsigned int i, j, carry, count = p751_ZERO_WORDS;
+	digit_t UV[2], t = 0, u = 0, v = 0;
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		mc[i] = 0;
+	}
+
+	for (i = 0; i < NWORDS_FIELD; i++) {
+		for (j = 0; j < i; j++) {
+			if (j < (i - p751_ZERO_WORDS + 1)) {
+				MUL(mc[j], ((digit_t *) p751p1)[i - j], UV + 1, UV[0]);
+				ADDC(0, UV[0], v, carry, v);
+				ADDC(carry, UV[1], u, carry, u);
+				t += carry;
+			}
+		}
+		ADDC(0, v, ma[i], carry, v);
+		ADDC(carry, u, 0, carry, u);
+		t += carry;
+		mc[i] = v;
+		v = u;
+		u = t;
+		t = 0;
+	}
+
+	for (i = NWORDS_FIELD; i < 2 * NWORDS_FIELD - 1; i++) {
+		if (count > 0) {
+			count -= 1;
+		}
+		for (j = i - NWORDS_FIELD + 1; j < NWORDS_FIELD; j++) {
+			if (j < (NWORDS_FIELD - count)) {
+				MUL(mc[j], ((digit_t *) p751p1)[i - j], UV + 1, UV[0]);
+				ADDC(0, UV[0], v, carry, v);
+				ADDC(carry, UV[1], u, carry, u);
+				t += carry;
+			}
+		}
+		ADDC(0, v, ma[i], carry, v);
+		ADDC(carry, u, 0, carry, u);
+		t += carry;
+		mc[i - NWORDS_FIELD] = v;
+		v = u;
+		u = t;
+		t = 0;
+	}
+	ADDC(0, v, ma[2 * NWORDS_FIELD - 1], carry, v);
+	mc[NWORDS_FIELD - 1] = v;
+}
diff --git a/crypt/liboqs/kex_sidh_cln16/kex_sidh_cln16.c b/crypt/liboqs/kex_sidh_cln16/kex_sidh_cln16.c
new file mode 100644
index 0000000000000000000000000000000000000000..c3d7420b4049f116923c3eb59e70a1eb43ce79e5
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/kex_sidh_cln16.c
@@ -0,0 +1,320 @@
+#if defined(WINDOWS)
+#pragma warning(disable : 4047 4090)
+#endif
+
+#if defined(WINDOWS)
+#define UNUSED
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#if !defined(WINDOWS)
+#include <strings.h>
+#include <unistd.h>
+#endif
+
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+#include "SIDH.h"
+#include "kex_sidh_cln16.h"
+
+#if defined(WINDOWS)
+#define strdup _strdup // for strdup deprecation warning
+#endif
+
+static const char *P751 = "p751";
+static const char *CompressedP751 = "compressedp751";
+
+static int isCompressed(const char *named_parameters) {
+	if (named_parameters != NULL && strcmp(named_parameters, CompressedP751) == 0) {
+		return 1;
+	}
+
+	return 0;
+}
+
+// Check if curve isogeny structure is NULL
+extern bool oqs_sidh_cln16_is_CurveIsogenyStruct_null(PCurveIsogenyStruct pCurveIsogeny);
+
+OQS_KEX *OQS_KEX_sidh_cln16_new(OQS_RAND *rand, const char *named_parameters) {
+	int compressed = isCompressed(named_parameters);
+	OQS_KEX *k = malloc(sizeof(OQS_KEX));
+	if (k == NULL) {
+		return NULL;
+	}
+
+	// Curve isogeny system initialization
+	PCurveIsogenyStruct curveIsogeny = oqs_sidh_cln16_curve_allocate(&CurveIsogeny_SIDHp751);
+
+	if (curveIsogeny == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(curveIsogeny)) {
+		free(k);
+		oqs_sidh_cln16_curve_free(curveIsogeny);
+		return NULL;
+	}
+	if (oqs_sidh_cln16_curve_initialize(curveIsogeny, &CurveIsogeny_SIDHp751) != SIDH_CRYPTO_SUCCESS) {
+		free(k);
+		oqs_sidh_cln16_curve_free(curveIsogeny);
+		return NULL;
+	}
+	k->ctx = curveIsogeny;
+	k->method_name = compressed ? strdup("SIDH CLN16 compressed") : strdup("SIDH CLN16");
+	k->estimated_classical_security = 192;
+	k->estimated_quantum_security = 128;
+	k->seed = NULL;
+	k->seed_len = 0;
+	k->named_parameters = compressed ? CompressedP751 : P751;
+	k->rand = rand;
+	k->params = NULL;
+	k->alice_0 = &OQS_KEX_sidh_cln16_alice_0;
+	k->bob = &OQS_KEX_sidh_cln16_bob;
+	k->alice_1 = &OQS_KEX_sidh_cln16_alice_1;
+	k->alice_priv_free = &OQS_KEX_sidh_cln16_alice_priv_free;
+	k->free = &OQS_KEX_sidh_cln16_free;
+
+	return k;
+}
+
+int OQS_KEX_sidh_cln16_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) {
+
+	int ret;
+	// non-compressed public key
+	uint8_t *alice_tmp_pub = NULL;
+
+	if (!k || !alice_priv || !alice_msg || !alice_msg_len) {
+		return 0;
+	}
+
+	int compressed = isCompressed(k->named_parameters);
+	*alice_priv = NULL;
+	/* alice_msg is alice's public key */
+	*alice_msg = NULL;
+	if (compressed) {
+		alice_tmp_pub = malloc(SIDH_PUBKEY_LEN);
+		*alice_msg = malloc(SIDH_COMPRESSED_PUBKEY_LEN);
+		if (alice_tmp_pub == NULL || *alice_msg == NULL) {
+			goto err;
+		}
+	} else {
+		// non-compressed
+		*alice_msg = malloc(SIDH_PUBKEY_LEN);
+		if (*alice_msg == NULL) {
+			goto err;
+		}
+		alice_tmp_pub = *alice_msg; // point to the pub key
+	}
+	*alice_priv = malloc(SIDH_SECRETKEY_LEN);
+	if (*alice_priv == NULL) {
+		goto err;
+	}
+
+	// generate Alice's key pair
+	if (oqs_sidh_cln16_EphemeralKeyGeneration_A((unsigned char *) *alice_priv, (unsigned char *) alice_tmp_pub, k->ctx, k->rand) != SIDH_CRYPTO_SUCCESS) {
+		goto err;
+	}
+
+	if (compressed) {
+		// compress Alice's public key
+		oqs_sidh_cln16_PublicKeyCompression_A(alice_tmp_pub, (unsigned char *) *alice_msg, k->ctx);
+		*alice_msg_len = SIDH_COMPRESSED_PUBKEY_LEN;
+	} else {
+		*alice_msg_len = SIDH_PUBKEY_LEN;
+		alice_tmp_pub = NULL; // we don't want to double-free it
+	}
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*alice_msg);
+	*alice_msg = NULL;
+	free(*alice_priv);
+	*alice_priv = NULL;
+
+cleanup:
+	free(alice_tmp_pub);
+
+	return ret;
+}
+
+int OQS_KEX_sidh_cln16_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) {
+
+	int ret;
+	uint8_t *bob_priv = NULL;
+	// non-compressed public key
+	uint8_t *bob_tmp_pub = NULL;
+	// decompression values
+	unsigned char *R = NULL, *A = NULL;
+
+	if (!k || !alice_msg || !bob_msg || !bob_msg_len || !key || !key_len) {
+		return 0;
+	}
+
+	*bob_msg = NULL;
+	*key = NULL;
+
+	int compressed = isCompressed(k->named_parameters);
+
+	if (compressed) {
+		if (alice_msg_len != SIDH_COMPRESSED_PUBKEY_LEN) {
+			goto err;
+		}
+		bob_tmp_pub = malloc(SIDH_PUBKEY_LEN);
+		*bob_msg = malloc(SIDH_COMPRESSED_PUBKEY_LEN);
+		if (bob_tmp_pub == NULL || *bob_msg == NULL) {
+			goto err;
+		}
+		A = malloc(SIDH_COMPRESSED_A_LEN);
+		if (A == NULL) {
+			goto err;
+		}
+		R = malloc(SIDH_COMPRESSED_R_LEN);
+		if (R == NULL) {
+			goto err;
+		}
+	} else {
+		if (alice_msg_len != SIDH_PUBKEY_LEN) {
+			goto err;
+		}
+		// non-compressed
+		*bob_msg = malloc(SIDH_PUBKEY_LEN);
+		if (*bob_msg == NULL) {
+			goto err;
+		}
+		bob_tmp_pub = *bob_msg; // point to the pub key
+	}
+
+	bob_priv = malloc(SIDH_SECRETKEY_LEN);
+	if (bob_priv == NULL) {
+		goto err;
+	}
+	*key = malloc(SIDH_SHAREDKEY_LEN);
+	if (*key == NULL) {
+		goto err;
+	}
+
+	// generate Bob's key pair
+	if (oqs_sidh_cln16_EphemeralKeyGeneration_B((unsigned char *) bob_priv, (unsigned char *) bob_tmp_pub, k->ctx, k->rand) != SIDH_CRYPTO_SUCCESS) {
+		goto err;
+	}
+
+	if (compressed) {
+		// compress Bob's public key
+		oqs_sidh_cln16_PublicKeyCompression_B(bob_tmp_pub, (unsigned char *) *bob_msg, k->ctx);
+		*bob_msg_len = SIDH_COMPRESSED_PUBKEY_LEN;
+		// decompress Alice's public key
+		oqs_sidh_cln16_PublicKeyADecompression_B((unsigned char *) bob_priv, (unsigned char *) alice_msg, R, A, k->ctx);
+		// compute Bob's shared secret
+		if (oqs_sidh_cln16_EphemeralSecretAgreement_Compression_B((unsigned char *) bob_priv, R, A, (unsigned char *) *key, k->ctx) != SIDH_CRYPTO_SUCCESS) {
+			goto err;
+		}
+	} else {
+		*bob_msg_len = SIDH_PUBKEY_LEN;
+		bob_tmp_pub = NULL; // we don't want to double-free it
+		// compute Bob's shared secret
+		if (oqs_sidh_cln16_EphemeralSecretAgreement_B((unsigned char *) bob_priv, (unsigned char *) alice_msg, (unsigned char *) *key, k->ctx) != SIDH_CRYPTO_SUCCESS) {
+			goto err;
+		}
+	}
+
+	*key_len = SIDH_SHAREDKEY_LEN;
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*bob_msg);
+	*bob_msg = NULL;
+	free(*key);
+	*key = NULL;
+
+cleanup:
+	free(bob_tmp_pub);
+	free(bob_priv);
+	free(A);
+	free(R);
+
+	return ret;
+}
+
+int OQS_KEX_sidh_cln16_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) {
+
+	int ret;
+	// decompression values
+	unsigned char *R = NULL, *A = NULL;
+
+	if (!k || !alice_priv || !bob_msg || !key || !key_len) {
+		return 0;
+	}
+
+	*key = NULL;
+
+	int compressed = isCompressed(k->named_parameters);
+
+	*key = malloc(SIDH_SHAREDKEY_LEN);
+	if (*key == NULL) {
+		goto err;
+	}
+	*key_len = SIDH_SHAREDKEY_LEN;
+
+	if (compressed) {
+		if (bob_msg_len != SIDH_COMPRESSED_PUBKEY_LEN) {
+			goto err;
+		}
+		A = malloc(SIDH_COMPRESSED_A_LEN);
+		if (A == NULL) {
+			goto err;
+		}
+		R = malloc(SIDH_COMPRESSED_R_LEN);
+		if (R == NULL) {
+			goto err;
+		}
+		// compute Alice's shared secret
+		oqs_sidh_cln16_PublicKeyBDecompression_A((unsigned char *) alice_priv, (unsigned char *) bob_msg, R, A, k->ctx);
+		if (oqs_sidh_cln16_EphemeralSecretAgreement_Compression_A((unsigned char *) alice_priv, R, A, (unsigned char *) *key, k->ctx) != SIDH_CRYPTO_SUCCESS) {
+			goto err;
+		}
+	} else {
+		if (bob_msg_len != SIDH_PUBKEY_LEN) {
+			goto err;
+		}
+		if (oqs_sidh_cln16_EphemeralSecretAgreement_A((unsigned char *) alice_priv, (unsigned char *) bob_msg, (unsigned char *) *key, k->ctx) != SIDH_CRYPTO_SUCCESS) {
+			goto err;
+		}
+	}
+
+	ret = 1;
+	goto cleanup;
+
+err:
+	ret = 0;
+	free(*key);
+	*key = NULL;
+
+cleanup:
+	free(A);
+	free(R);
+
+	return ret;
+}
+
+void OQS_KEX_sidh_cln16_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) {
+	if (alice_priv) {
+		free(alice_priv);
+	}
+}
+
+void OQS_KEX_sidh_cln16_free(OQS_KEX *k) {
+	if (!k) {
+		return;
+	}
+	oqs_sidh_cln16_curve_free((PCurveIsogenyStruct) k->ctx);
+	k->ctx = NULL;
+	free(k->method_name);
+	k->method_name = NULL;
+	free(k);
+}
diff --git a/crypt/liboqs/kex_sidh_cln16/kex_sidh_cln16.h b/crypt/liboqs/kex_sidh_cln16/kex_sidh_cln16.h
new file mode 100644
index 0000000000000000000000000000000000000000..4409db7a6f9f0292ac6343066fc755ff75db21f1
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/kex_sidh_cln16.h
@@ -0,0 +1,24 @@
+/**
+ * \file kex_sidh_cln16.h
+ * \brief Header for SIDH key exchange protocol from the Microsoft SIDH library
+ */
+
+#ifndef __OQS_KEX_SIDH_CLN16_H
+#define __OQS_KEX_SIDH_CLN16_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+OQS_KEX *OQS_KEX_sidh_cln16_new(OQS_RAND *rand, const char *named_parameters);
+
+int OQS_KEX_sidh_cln16_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len);
+int OQS_KEX_sidh_cln16_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len);
+int OQS_KEX_sidh_cln16_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len);
+
+void OQS_KEX_sidh_cln16_alice_priv_free(OQS_KEX *k, void *alice_priv);
+void OQS_KEX_sidh_cln16_free(OQS_KEX *k);
+
+#endif
diff --git a/crypt/liboqs/kex_sidh_cln16/sidh_kex.c b/crypt/liboqs/kex_sidh_cln16/sidh_kex.c
new file mode 100644
index 0000000000000000000000000000000000000000..de1b3100a6ab1a35265b1dee33a9fbbb0c84d99b
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_cln16/sidh_kex.c
@@ -0,0 +1,737 @@
+/********************************************************************************************
+* SIDH: an efficient supersingular isogeny-based cryptography library for ephemeral  
+*       Diffie-Hellman key exchange.
+*
+*    Copyright (c) Microsoft Corporation. All rights reserved.
+*
+*
+* Abstract: ephemeral isogeny-based key exchange
+*
+*********************************************************************************************/
+
+#include "SIDH_internal.h"
+
+extern const unsigned int splits_Alice[SIDH_MAX_Alice];
+extern const unsigned int splits_Bob[SIDH_MAX_Bob];
+
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralKeyGeneration_A(unsigned char *PrivateKeyA, unsigned char *PublicKeyA, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand) { // Alice's ephemeral key-pair generation
+	                                                                                                                                                                  // It produces a private key PrivateKeyA and computes the public key PublicKeyA.
+	                                                                                                                                                                  // The private key is an even integer in the range [2, oA-2], where oA = 2^372.
+	                                                                                                                                                                  // The public key consists of 3 elements in GF(p751^2).
+	                                                                                                                                                                  // CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+	unsigned int owords = NBITS_TO_NWORDS(CurveIsogeny->owordbits), pwords = NBITS_TO_NWORDS(CurveIsogeny->pwordbits);
+	oqs_sidh_cln16_point_basefield_t P;
+	oqs_sidh_cln16_point_proj_t R, phiP = {0}, phiQ = {0}, phiD = {0}, pts[SIDH_MAX_INT_POINTS_ALICE];
+	oqs_sidh_cln16_publickey_t *PublicKey = (oqs_sidh_cln16_publickey_t *) PublicKeyA;
+	unsigned int i, row, m, index = 0, pts_index[SIDH_MAX_INT_POINTS_ALICE], npts = 0;
+	oqs_sidh_cln16_f2elm_t coeff[5], A = {0}, C = {0}, Aout, Cout;
+	SIDH_CRYPTO_STATUS Status = SIDH_CRYPTO_SUCCESS;
+
+	if (PrivateKeyA == NULL || PublicKey == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(CurveIsogeny)) {
+		return SIDH_CRYPTO_ERROR_INVALID_PARAMETER;
+	}
+
+	// Choose a random even number in the range [2, oA-2] as secret key for Alice
+	Status = oqs_sidh_cln16_random_mod_order((digit_t *) PrivateKeyA, SIDH_ALICE, CurveIsogeny, rand);
+	if (Status != SIDH_CRYPTO_SUCCESS) {
+		oqs_sidh_cln16_clear_words((void *) PrivateKeyA, owords);
+		return Status;
+	}
+
+	oqs_sidh_cln16_to_mont((digit_t *) CurveIsogeny->PA, (digit_t *) P); // Conversion of Alice's generators to Montgomery representation
+	oqs_sidh_cln16_to_mont(((digit_t *) CurveIsogeny->PA) + NWORDS_FIELD, ((digit_t *) P) + NWORDS_FIELD);
+
+	Status = oqs_sidh_cln16_secret_pt(P, (digit_t *) PrivateKeyA, SIDH_ALICE, R, CurveIsogeny);
+	if (Status != SIDH_CRYPTO_SUCCESS) {
+		oqs_sidh_cln16_clear_words((void *) PrivateKeyA, owords);
+		return Status;
+	}
+
+	oqs_sidh_cln16_copy_words((digit_t *) CurveIsogeny->PB, (digit_t *) phiP, pwords); // Copy X-coordinates from Bob's public parameters, set Z <- 1
+	oqs_sidh_cln16_fpcopy751((digit_t *) CurveIsogeny->Montgomery_one, (digit_t *) phiP->Z);
+	oqs_sidh_cln16_to_mont((digit_t *) phiP, (digit_t *) phiP);
+	oqs_sidh_cln16_copy_words((digit_t *) phiP, (digit_t *) phiQ, pwords); // QB = (-XPB:1)
+	oqs_sidh_cln16_fpneg751(phiQ->X[0]);
+	oqs_sidh_cln16_fpcopy751((digit_t *) CurveIsogeny->Montgomery_one, (digit_t *) phiQ->Z);
+	oqs_sidh_cln16_distort_and_diff(phiP->X[0], phiD, CurveIsogeny); // DB = (x(QB-PB),z(QB-PB))
+
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->A, A[0]); // Extracting curve parameters A and C
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->C, C[0]);
+	oqs_sidh_cln16_to_mont(A[0], A[0]);
+	oqs_sidh_cln16_to_mont(C[0], C[0]);
+
+	oqs_sidh_cln16_first_4_isog(phiP, A, Aout, Cout, CurveIsogeny);
+	oqs_sidh_cln16_first_4_isog(phiQ, A, Aout, Cout, CurveIsogeny);
+	oqs_sidh_cln16_first_4_isog(phiD, A, Aout, Cout, CurveIsogeny);
+	oqs_sidh_cln16_first_4_isog(R, A, A, C, CurveIsogeny);
+
+	index = 0;
+	for (row = 1; row < SIDH_MAX_Alice; row++) {
+		while (index < SIDH_MAX_Alice - row) {
+			oqs_sidh_cln16_fp2copy751(R->X, pts[npts]->X);
+			oqs_sidh_cln16_fp2copy751(R->Z, pts[npts]->Z);
+			pts_index[npts] = index;
+			npts += 1;
+			m = splits_Alice[SIDH_MAX_Alice - index - row];
+			oqs_sidh_cln16_xDBLe(R, R, A, C, (int) (2 * m));
+			index += m;
+		}
+		oqs_sidh_cln16_get_4_isog(R, A, C, coeff);
+
+		for (i = 0; i < npts; i++) {
+			oqs_sidh_cln16_eval_4_isog(pts[i], coeff);
+		}
+		oqs_sidh_cln16_eval_4_isog(phiP, coeff);
+		oqs_sidh_cln16_eval_4_isog(phiQ, coeff);
+		oqs_sidh_cln16_eval_4_isog(phiD, coeff);
+
+		oqs_sidh_cln16_fp2copy751(pts[npts - 1]->X, R->X);
+		oqs_sidh_cln16_fp2copy751(pts[npts - 1]->Z, R->Z);
+		index = pts_index[npts - 1];
+		npts -= 1;
+	}
+
+	oqs_sidh_cln16_get_4_isog(R, A, C, coeff);
+	oqs_sidh_cln16_eval_4_isog(phiP, coeff);
+	oqs_sidh_cln16_eval_4_isog(phiQ, coeff);
+	oqs_sidh_cln16_eval_4_isog(phiD, coeff);
+
+	oqs_sidh_cln16_inv_3_way(phiP->Z, phiQ->Z, phiD->Z);
+	oqs_sidh_cln16_fp2mul751_mont(phiP->X, phiP->Z, phiP->X);
+	oqs_sidh_cln16_fp2mul751_mont(phiQ->X, phiQ->Z, phiQ->X);
+	oqs_sidh_cln16_fp2mul751_mont(phiD->X, phiD->Z, phiD->X);
+
+	oqs_sidh_cln16_from_fp2mont(phiP->X, ((oqs_sidh_cln16_f2elm_t *) PublicKey)[0]); // Converting back to standard representation
+	oqs_sidh_cln16_from_fp2mont(phiQ->X, ((oqs_sidh_cln16_f2elm_t *) PublicKey)[1]);
+	oqs_sidh_cln16_from_fp2mont(phiD->X, ((oqs_sidh_cln16_f2elm_t *) PublicKey)[2]);
+
+	// Cleanup:
+	oqs_sidh_cln16_clear_words((void *) R, 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) phiP, 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) phiQ, 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) phiD, 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) pts, SIDH_MAX_INT_POINTS_ALICE * 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) A, 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) C, 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) coeff, 5 * 2 * pwords);
+
+	return Status;
+}
+
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralKeyGeneration_B(unsigned char *PrivateKeyB, unsigned char *PublicKeyB, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand) { // Bob's ephemeral key-pair generation
+	                                                                                                                                                                  // It produces a private key PrivateKeyB and computes the public key PublicKeyB.
+	                                                                                                                                                                  // The private key is an integer in the range [1, oB-1], where oA = 3^239.
+	                                                                                                                                                                  // The public key consists of 3 elements in GF(p751^2).
+	                                                                                                                                                                  // CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+	unsigned int owords = NBITS_TO_NWORDS(CurveIsogeny->owordbits), pwords = NBITS_TO_NWORDS(CurveIsogeny->pwordbits);
+	oqs_sidh_cln16_point_basefield_t P;
+	oqs_sidh_cln16_point_proj_t R, phiP = {0}, phiQ = {0}, phiD = {0}, pts[SIDH_MAX_INT_POINTS_BOB];
+	oqs_sidh_cln16_publickey_t *PublicKey = (oqs_sidh_cln16_publickey_t *) PublicKeyB;
+	unsigned int i, row, m, index = 0, pts_index[SIDH_MAX_INT_POINTS_BOB], npts = 0;
+	oqs_sidh_cln16_f2elm_t A = {0}, C = {0};
+	SIDH_CRYPTO_STATUS Status = SIDH_CRYPTO_SUCCESS;
+
+	if (PrivateKeyB == NULL || PublicKey == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(CurveIsogeny)) {
+		return SIDH_CRYPTO_ERROR_INVALID_PARAMETER;
+	}
+
+	// Choose a random number equivalent to 0 (mod 3) in the range [3, oB-3] as secret key for Bob
+	Status = oqs_sidh_cln16_random_mod_order((digit_t *) PrivateKeyB, SIDH_BOB, CurveIsogeny, rand);
+	if (Status != SIDH_CRYPTO_SUCCESS) {
+		oqs_sidh_cln16_clear_words((void *) PrivateKeyB, owords);
+		return Status;
+	}
+
+	oqs_sidh_cln16_to_mont((digit_t *) CurveIsogeny->PB, (digit_t *) P); // Conversion of Bob's generators to Montgomery representation
+	oqs_sidh_cln16_to_mont(((digit_t *) CurveIsogeny->PB) + NWORDS_FIELD, ((digit_t *) P) + NWORDS_FIELD);
+
+	Status = oqs_sidh_cln16_secret_pt(P, (digit_t *) PrivateKeyB, SIDH_BOB, R, CurveIsogeny);
+	if (Status != SIDH_CRYPTO_SUCCESS) {
+		oqs_sidh_cln16_clear_words((void *) PrivateKeyB, owords);
+		return Status;
+	}
+
+	oqs_sidh_cln16_copy_words((digit_t *) CurveIsogeny->PA, (digit_t *) phiP, pwords); // Copy X-coordinates from Alice's public parameters, set Z <- 1
+	oqs_sidh_cln16_fpcopy751((digit_t *) CurveIsogeny->Montgomery_one, (digit_t *) phiP->Z);
+	oqs_sidh_cln16_to_mont((digit_t *) phiP, (digit_t *) phiP);            // Conversion to Montgomery representation
+	oqs_sidh_cln16_copy_words((digit_t *) phiP, (digit_t *) phiQ, pwords); // QA = (-XPA:1)
+	oqs_sidh_cln16_fpneg751(phiQ->X[0]);
+	oqs_sidh_cln16_fpcopy751((digit_t *) CurveIsogeny->Montgomery_one, (digit_t *) phiQ->Z);
+	oqs_sidh_cln16_distort_and_diff(phiP->X[0], phiD, CurveIsogeny); // DA = (x(QA-PA),z(QA-PA))
+
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->A, A[0]); // Extracting curve parameters A and C
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->C, C[0]);
+	oqs_sidh_cln16_to_mont(A[0], A[0]);
+	oqs_sidh_cln16_to_mont(C[0], C[0]);
+
+	index = 0;
+	for (row = 1; row < SIDH_MAX_Bob; row++) {
+		while (index < SIDH_MAX_Bob - row) {
+			oqs_sidh_cln16_fp2copy751(R->X, pts[npts]->X);
+			oqs_sidh_cln16_fp2copy751(R->Z, pts[npts]->Z);
+			pts_index[npts] = index;
+			npts += 1;
+			m = splits_Bob[SIDH_MAX_Bob - index - row];
+			oqs_sidh_cln16_xTPLe(R, R, A, C, (int) m);
+			index += m;
+		}
+		oqs_sidh_cln16_get_3_isog(R, A, C);
+
+		for (i = 0; i < npts; i++) {
+			oqs_sidh_cln16_eval_3_isog(R, pts[i]);
+		}
+		oqs_sidh_cln16_eval_3_isog(R, phiP);
+		oqs_sidh_cln16_eval_3_isog(R, phiQ);
+		oqs_sidh_cln16_eval_3_isog(R, phiD);
+
+		oqs_sidh_cln16_fp2copy751(pts[npts - 1]->X, R->X);
+		oqs_sidh_cln16_fp2copy751(pts[npts - 1]->Z, R->Z);
+		index = pts_index[npts - 1];
+		npts -= 1;
+	}
+
+	oqs_sidh_cln16_get_3_isog(R, A, C);
+	oqs_sidh_cln16_eval_3_isog(R, phiP);
+	oqs_sidh_cln16_eval_3_isog(R, phiQ);
+	oqs_sidh_cln16_eval_3_isog(R, phiD);
+
+	oqs_sidh_cln16_inv_3_way(phiP->Z, phiQ->Z, phiD->Z);
+	oqs_sidh_cln16_fp2mul751_mont(phiP->X, phiP->Z, phiP->X);
+	oqs_sidh_cln16_fp2mul751_mont(phiQ->X, phiQ->Z, phiQ->X);
+	oqs_sidh_cln16_fp2mul751_mont(phiD->X, phiD->Z, phiD->X);
+
+	oqs_sidh_cln16_from_fp2mont(phiP->X, ((oqs_sidh_cln16_f2elm_t *) PublicKey)[0]); // Converting back to standard representation
+	oqs_sidh_cln16_from_fp2mont(phiQ->X, ((oqs_sidh_cln16_f2elm_t *) PublicKey)[1]);
+	oqs_sidh_cln16_from_fp2mont(phiD->X, ((oqs_sidh_cln16_f2elm_t *) PublicKey)[2]);
+
+	// Cleanup:
+	oqs_sidh_cln16_clear_words((void *) R, 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) phiP, 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) phiQ, 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) phiD, 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) pts, SIDH_MAX_INT_POINTS_BOB * 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) A, 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) C, 2 * pwords);
+
+	return Status;
+}
+
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralSecretAgreement_A(const unsigned char *PrivateKeyA, const unsigned char *PublicKeyB, unsigned char *SharedSecretA, PCurveIsogenyStruct CurveIsogeny) { // Alice's ephemeral shared secret computation
+	                                                                                                                                                                                              // It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB
+	                                                                                                                                                                                              // Inputs: Alice's PrivateKeyA is an even integer in the range [2, oA-2], where oA = 2^372.
+	                                                                                                                                                                                              //         Bob's PublicKeyB consists of 3 elements in GF(p751^2).
+	                                                                                                                                                                                              // Output: a shared secret SharedSecretA that consists of one element in GF(p751^2).
+	                                                                                                                                                                                              // CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+	unsigned int pwords = NBITS_TO_NWORDS(CurveIsogeny->pwordbits);
+	unsigned int i, row, m, index = 0, pts_index[SIDH_MAX_INT_POINTS_ALICE], npts = 0;
+	oqs_sidh_cln16_point_proj_t R, pts[SIDH_MAX_INT_POINTS_ALICE];
+	oqs_sidh_cln16_publickey_t *PublicKey = (oqs_sidh_cln16_publickey_t *) PublicKeyB;
+	oqs_sidh_cln16_f2elm_t jinv, coeff[5], PKB[3], A, C = {0};
+	SIDH_CRYPTO_STATUS Status = SIDH_CRYPTO_SUCCESS;
+
+	if (PrivateKeyA == NULL || PublicKey == NULL || SharedSecretA == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(CurveIsogeny)) {
+		return SIDH_CRYPTO_ERROR_INVALID_PARAMETER;
+	}
+
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKey)[0], PKB[0]); // Extracting and converting Bob's public curve parameters to Montgomery representation
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKey)[1], PKB[1]);
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKey)[2], PKB[2]);
+
+	oqs_sidh_cln16_get_A(PKB[0], PKB[1], PKB[2], A, CurveIsogeny);
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->C, C[0]);
+	oqs_sidh_cln16_to_mont(C[0], C[0]);
+
+	Status = oqs_sidh_cln16_ladder_3_pt(PKB[0], PKB[1], PKB[2], (digit_t *) PrivateKeyA, SIDH_ALICE, R, A, CurveIsogeny);
+	if (Status != SIDH_CRYPTO_SUCCESS) {
+		return Status;
+	}
+	oqs_sidh_cln16_first_4_isog(R, A, A, C, CurveIsogeny);
+
+	index = 0;
+	for (row = 1; row < SIDH_MAX_Alice; row++) {
+		while (index < SIDH_MAX_Alice - row) {
+			oqs_sidh_cln16_fp2copy751(R->X, pts[npts]->X);
+			oqs_sidh_cln16_fp2copy751(R->Z, pts[npts]->Z);
+			pts_index[npts] = index;
+			npts += 1;
+			m = splits_Alice[SIDH_MAX_Alice - index - row];
+			oqs_sidh_cln16_xDBLe(R, R, A, C, (int) (2 * m));
+			index += m;
+		}
+		oqs_sidh_cln16_get_4_isog(R, A, C, coeff);
+
+		for (i = 0; i < npts; i++) {
+			oqs_sidh_cln16_eval_4_isog(pts[i], coeff);
+		}
+
+		oqs_sidh_cln16_fp2copy751(pts[npts - 1]->X, R->X);
+		oqs_sidh_cln16_fp2copy751(pts[npts - 1]->Z, R->Z);
+		index = pts_index[npts - 1];
+		npts -= 1;
+	}
+
+	oqs_sidh_cln16_get_4_isog(R, A, C, coeff);
+	oqs_sidh_cln16_j_inv(A, C, jinv);
+	oqs_sidh_cln16_from_fp2mont(jinv, (oqs_sidh_cln16_felm_t *) SharedSecretA); // Converting back to standard representation
+
+	// Cleanup:
+	oqs_sidh_cln16_clear_words((void *) R, 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) pts, SIDH_MAX_INT_POINTS_ALICE * 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) A, 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) C, 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) jinv, 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) coeff, 5 * 2 * pwords);
+
+	return Status;
+}
+
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralSecretAgreement_B(const unsigned char *PrivateKeyB, const unsigned char *PublicKeyA, unsigned char *SharedSecretB, PCurveIsogenyStruct CurveIsogeny) { // Bob's ephemeral shared secret computation
+	                                                                                                                                                                                              // It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA
+	                                                                                                                                                                                              // Inputs: Bob's PrivateKeyB is an integer in the range [1, oB-1], where oB = 3^239.
+	                                                                                                                                                                                              //         Alice's PublicKeyA consists of 3 elements in GF(p751^2).
+	                                                                                                                                                                                              // Output: a shared secret SharedSecretB that consists of one element in GF(p751^2).
+	                                                                                                                                                                                              // CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+	unsigned int pwords = NBITS_TO_NWORDS(CurveIsogeny->pwordbits);
+	unsigned int i, row, m, index = 0, pts_index[SIDH_MAX_INT_POINTS_BOB], npts = 0;
+	oqs_sidh_cln16_point_proj_t R, pts[SIDH_MAX_INT_POINTS_BOB];
+	oqs_sidh_cln16_publickey_t *PublicKey = (oqs_sidh_cln16_publickey_t *) PublicKeyA;
+	oqs_sidh_cln16_f2elm_t jinv, A, PKA[3], C = {0};
+	SIDH_CRYPTO_STATUS Status = SIDH_CRYPTO_SUCCESS;
+
+	if (PrivateKeyB == NULL || PublicKey == NULL || SharedSecretB == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(CurveIsogeny)) {
+		return SIDH_CRYPTO_ERROR_INVALID_PARAMETER;
+	}
+
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKey)[0], PKA[0]); // Extracting and converting Alice's public curve parameters to Montgomery representation
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKey)[1], PKA[1]);
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKey)[2], PKA[2]);
+
+	oqs_sidh_cln16_get_A(PKA[0], PKA[1], PKA[2], A, CurveIsogeny);
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->C, C[0]);
+	oqs_sidh_cln16_to_mont(C[0], C[0]);
+
+	Status = oqs_sidh_cln16_ladder_3_pt(PKA[0], PKA[1], PKA[2], (digit_t *) PrivateKeyB, SIDH_BOB, R, A, CurveIsogeny);
+	if (Status != SIDH_CRYPTO_SUCCESS) {
+		return Status;
+	}
+
+	index = 0;
+	for (row = 1; row < SIDH_MAX_Bob; row++) {
+		while (index < SIDH_MAX_Bob - row) {
+			oqs_sidh_cln16_fp2copy751(R->X, pts[npts]->X);
+			oqs_sidh_cln16_fp2copy751(R->Z, pts[npts]->Z);
+			pts_index[npts] = index;
+			npts += 1;
+			m = splits_Bob[SIDH_MAX_Bob - index - row];
+			oqs_sidh_cln16_xTPLe(R, R, A, C, (int) m);
+			index += m;
+		}
+		oqs_sidh_cln16_get_3_isog(R, A, C);
+
+		for (i = 0; i < npts; i++) {
+			oqs_sidh_cln16_eval_3_isog(R, pts[i]);
+		}
+
+		oqs_sidh_cln16_fp2copy751(pts[npts - 1]->X, R->X);
+		oqs_sidh_cln16_fp2copy751(pts[npts - 1]->Z, R->Z);
+		index = pts_index[npts - 1];
+		npts -= 1;
+	}
+
+	oqs_sidh_cln16_get_3_isog(R, A, C);
+	oqs_sidh_cln16_j_inv(A, C, jinv);
+	oqs_sidh_cln16_from_fp2mont(jinv, (oqs_sidh_cln16_felm_t *) SharedSecretB); // Converting back to standard representation
+
+	// Cleanup:
+	oqs_sidh_cln16_clear_words((void *) R, 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) pts, SIDH_MAX_INT_POINTS_BOB * 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) A, 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) C, 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) jinv, 2 * pwords);
+
+	return Status;
+}
+
+///////////////////////////////////////////////////////////////////////////////////
+///////////////          KEY EXCHANGE USING DECOMPRESSION           ///////////////
+
+void oqs_sidh_cln16_PublicKeyCompression_A(const unsigned char *PublicKeyA, unsigned char *CompressedPKA, PCurveIsogenyStruct CurveIsogeny) { // Alice's public key compression
+	                                                                                                                                          // It produces a compressed output that consists of three elements in Z_orderB and one field element
+	                                                                                                                                          // Input : Alice's public key PublicKeyA, which consists of 3 elements in GF(p751^2).
+	                                                                                                                                          // Output: a compressed value CompressedPKA that consists of three elements in Z_orderB and one element in GF(p751^2).
+	                                                                                                                                          // CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+	oqs_sidh_cln16_point_full_proj_t P, Q, phP, phQ, phX;
+	oqs_sidh_cln16_point_t R1, R2, phiP, phiQ;
+	oqs_sidh_cln16_publickey_t PK;
+	digit_t *comp = (digit_t *) CompressedPKA;
+	digit_t inv[SIDH_NWORDS_ORDER];
+	oqs_sidh_cln16_f2elm_t A, vec[4], Zinv[4];
+	digit_t a0[SIDH_NWORDS_ORDER], b0[SIDH_NWORDS_ORDER], a1[SIDH_NWORDS_ORDER], b1[SIDH_NWORDS_ORDER];
+	uint64_t Montgomery_Rprime[SIDH_NWORDS64_ORDER] = {0x1A55482318541298, 0x070A6370DFA12A03, 0xCB1658E0E3823A40, 0xB3B7384EB5DEF3F9, 0xCBCA952F7006EA33, 0x00569EF8EC94864C}; // Value (2^384)^2 mod 3^239
+	uint64_t Montgomery_rprime[SIDH_NWORDS64_ORDER] = {0x48062A91D3AB563D, 0x6CE572751303C2F5, 0x5D1319F3F160EC9D, 0xE35554E8C2D5623A, 0xCA29300232BC79A5, 0x8AAD843D646D78C5}; // Value -(3^239)^-1 mod 2^384
+	unsigned int bit;
+
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyA)[0], ((oqs_sidh_cln16_f2elm_t *) &PK)[0]); // Converting to Montgomery representation
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyA)[1], ((oqs_sidh_cln16_f2elm_t *) &PK)[1]);
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyA)[2], ((oqs_sidh_cln16_f2elm_t *) &PK)[2]);
+
+	oqs_sidh_cln16_recover_y(PK, phP, phQ, phX, A, CurveIsogeny);
+	oqs_sidh_cln16_generate_3_torsion_basis(A, P, Q, CurveIsogeny);
+	oqs_sidh_cln16_fp2copy751(P->Z, vec[0]);
+	oqs_sidh_cln16_fp2copy751(Q->Z, vec[1]);
+	oqs_sidh_cln16_fp2copy751(phP->Z, vec[2]);
+	oqs_sidh_cln16_fp2copy751(phQ->Z, vec[3]);
+	oqs_sidh_cln16_mont_n_way_inv(vec, 4, Zinv);
+
+	oqs_sidh_cln16_fp2mul751_mont(P->X, Zinv[0], R1->x);
+	oqs_sidh_cln16_fp2mul751_mont(P->Y, Zinv[0], R1->y);
+	oqs_sidh_cln16_fp2mul751_mont(Q->X, Zinv[1], R2->x);
+	oqs_sidh_cln16_fp2mul751_mont(Q->Y, Zinv[1], R2->y);
+	oqs_sidh_cln16_fp2mul751_mont(phP->X, Zinv[2], phiP->x);
+	oqs_sidh_cln16_fp2mul751_mont(phP->Y, Zinv[2], phiP->y);
+	oqs_sidh_cln16_fp2mul751_mont(phQ->X, Zinv[3], phiQ->x);
+	oqs_sidh_cln16_fp2mul751_mont(phQ->Y, Zinv[3], phiQ->y);
+
+	oqs_sidh_cln16_ph3(phiP, phiQ, R1, R2, A, (uint64_t *) a0, (uint64_t *) b0, (uint64_t *) a1, (uint64_t *) b1, CurveIsogeny);
+
+	bit = oqs_sidh_cln16_mod3(a0);
+	oqs_sidh_cln16_to_Montgomery_mod_order(a0, a0, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); // Converting to Montgomery representation
+	oqs_sidh_cln16_to_Montgomery_mod_order(a1, a1, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+	oqs_sidh_cln16_to_Montgomery_mod_order(b0, b0, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+	oqs_sidh_cln16_to_Montgomery_mod_order(b1, b1, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+
+	if (bit != 0) { // Storing [b1*a0inv, a1*a0inv, b0*a0inv] and setting bit384 to 0
+		oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd(a0, inv, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(b0, inv, &comp[0], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(a1, inv, &comp[SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(b1, inv, &comp[2 * SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_from_Montgomery_mod_order(&comp[0], &comp[0], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); // Converting back from Montgomery representation
+		oqs_sidh_cln16_from_Montgomery_mod_order(&comp[SIDH_NWORDS_ORDER], &comp[SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_from_Montgomery_mod_order(&comp[2 * SIDH_NWORDS_ORDER], &comp[2 * SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		comp[3 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 1;
+	} else { // Storing [b1*b0inv, a1*b0inv, a0*b0inv] and setting bit384 to 1
+		oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd(b0, inv, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(a0, inv, &comp[0], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(a1, inv, &comp[SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(b1, inv, &comp[2 * SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_from_Montgomery_mod_order(&comp[0], &comp[0], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); // Converting back from Montgomery representation
+		oqs_sidh_cln16_from_Montgomery_mod_order(&comp[SIDH_NWORDS_ORDER], &comp[SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_from_Montgomery_mod_order(&comp[2 * SIDH_NWORDS_ORDER], &comp[2 * SIDH_NWORDS_ORDER], CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		comp[3 * SIDH_NWORDS_ORDER - 1] |= (digit_t) 1 << (sizeof(digit_t) * 8 - 1);
+	}
+
+	oqs_sidh_cln16_from_fp2mont(A, (oqs_sidh_cln16_felm_t *) &comp[3 * SIDH_NWORDS_ORDER]);
+}
+
+void oqs_sidh_cln16_PublicKeyADecompression_B(const unsigned char *SecretKeyB, const unsigned char *CompressedPKA, unsigned char *point_R, unsigned char *param_A, PCurveIsogenyStruct CurveIsogeny) { // Alice's public key value decompression computed by Bob
+	                                                                                                                                                                                                   // Inputs: Bob's private key SecretKeyB, and
+	                                                                                                                                                                                                   //         Alice's compressed public key data CompressedPKA, which consists of three elements in Z_orderB and one element in GF(p751^2),
+	                                                                                                                                                                                                   // Output: a point point_R in coordinates (X:Z) and the curve parameter param_A in GF(p751^2). Outputs are stored in Montgomery representation.
+	                                                                                                                                                                                                   // CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+	oqs_sidh_cln16_point_t R1, R2;
+	oqs_sidh_cln16_point_proj_t *R = (oqs_sidh_cln16_point_proj_t *) point_R;
+	oqs_sidh_cln16_point_full_proj_t P, Q;
+	digit_t *comp = (digit_t *) CompressedPKA;
+	digit_t *SKin = (digit_t *) SecretKeyB;
+	oqs_sidh_cln16_f2elm_t A24, vec[2], invs[2], one = {0};
+	oqs_sidh_cln16_felm_t *A = (oqs_sidh_cln16_felm_t *) param_A;
+	digit_t t1[SIDH_NWORDS_ORDER], t2[SIDH_NWORDS_ORDER], t3[SIDH_NWORDS_ORDER], t4[SIDH_NWORDS_ORDER], vone[SIDH_NWORDS_ORDER] = {0};
+	uint64_t Montgomery_Rprime[SIDH_NWORDS64_ORDER] = {0x1A55482318541298, 0x070A6370DFA12A03, 0xCB1658E0E3823A40, 0xB3B7384EB5DEF3F9, 0xCBCA952F7006EA33, 0x00569EF8EC94864C}; // Value (2^384)^2 mod 3^239
+	uint64_t Montgomery_rprime[SIDH_NWORDS64_ORDER] = {0x48062A91D3AB563D, 0x6CE572751303C2F5, 0x5D1319F3F160EC9D, 0xE35554E8C2D5623A, 0xCA29300232BC79A5, 0x8AAD843D646D78C5}; // Value -(3^239)^-1 mod 2^384
+	unsigned int bit;
+
+	vone[0] = 1;
+	oqs_sidh_cln16_to_Montgomery_mod_order(vone, vone, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); // Converting to Montgomery representation
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]);
+	oqs_sidh_cln16_to_fp2mont((oqs_sidh_cln16_felm_t *) &comp[3 * SIDH_NWORDS_ORDER], A); // Converting to Montgomery representation
+	oqs_sidh_cln16_generate_3_torsion_basis(A, P, Q, CurveIsogeny);
+
+	// Normalize basis points
+	oqs_sidh_cln16_fp2copy751(P->Z, vec[0]);
+	oqs_sidh_cln16_fp2copy751(Q->Z, vec[1]);
+	oqs_sidh_cln16_mont_n_way_inv(vec, 2, invs);
+	oqs_sidh_cln16_fp2mul751_mont(P->X, invs[0], R1->x);
+	oqs_sidh_cln16_fp2mul751_mont(P->Y, invs[0], R1->y);
+	oqs_sidh_cln16_fp2mul751_mont(Q->X, invs[1], R2->x);
+	oqs_sidh_cln16_fp2mul751_mont(Q->Y, invs[1], R2->y);
+
+	oqs_sidh_cln16_fp2add751(A, one, A24);
+	oqs_sidh_cln16_fp2add751(A24, one, A24);
+	oqs_sidh_cln16_fp2div2_751(A24, A24);
+	oqs_sidh_cln16_fp2div2_751(A24, A24);
+
+	bit = comp[3 * SIDH_NWORDS_ORDER - 1] >> (sizeof(digit_t) * 8 - 1);
+	comp[3 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 1;
+	oqs_sidh_cln16_to_Montgomery_mod_order(SKin, t1, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime); // Converting to Montgomery representation
+	oqs_sidh_cln16_to_Montgomery_mod_order(&comp[0], t2, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+	oqs_sidh_cln16_to_Montgomery_mod_order(&comp[SIDH_NWORDS_ORDER], t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+	oqs_sidh_cln16_to_Montgomery_mod_order(&comp[2 * SIDH_NWORDS_ORDER], t4, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+
+	if (bit == 0) {
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(t1, t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_mp_add(t3, vone, t3, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd(t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(t1, t4, t4, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_mp_add(t2, t4, t4, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(t3, t4, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_from_Montgomery_mod_order(t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); // Converting back from Montgomery representation
+		oqs_sidh_cln16_mont_twodim_scalarmult(t3, R1, R2, A, A24, P, CurveIsogeny);
+	} else {
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(t1, t4, t4, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_mp_add(t4, vone, t4, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_Montgomery_inversion_mod_order_bingcd(t4, t4, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime, (digit_t *) &Montgomery_Rprime);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(t1, t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_mp_add(t2, t3, t3, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_Montgomery_multiply_mod_order(t3, t4, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime);
+		oqs_sidh_cln16_from_Montgomery_mod_order(t3, t3, CurveIsogeny->Border, (digit_t *) &Montgomery_rprime); // Converting back from Montgomery representation
+		oqs_sidh_cln16_mont_twodim_scalarmult(t3, R2, R1, A, A24, P, CurveIsogeny);
+	}
+
+	oqs_sidh_cln16_fp2copy751(P->X, R[0]->X);
+	oqs_sidh_cln16_fp2copy751(P->Z, R[0]->Z);
+}
+
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralSecretAgreement_Compression_A(const unsigned char *PrivateKeyA, const unsigned char *point_R, const unsigned char *param_A, unsigned char *SharedSecretA, PCurveIsogenyStruct CurveIsogeny) { // Alice's ephemeral shared secret computation
+	                                                                                                                                                                                                                                     // It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's decompressed data point_R and param_A
+	                                                                                                                                                                                                                                     // Inputs: Alice's PrivateKeyA is an even integer in the range [2, oA-2], where oA = 2^372.
+	                                                                                                                                                                                                                                     //         Bob's decompressed data consists of point_R in (X:Z) coordinates and the curve paramater param_A in GF(p751^2).
+	                                                                                                                                                                                                                                     // Output: a shared secret SharedSecretA that consists of one element in GF(p751^2).
+	                                                                                                                                                                                                                                     // CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+	unsigned int pwords = NBITS_TO_NWORDS(CurveIsogeny->pwordbits);
+	unsigned int i, row, m, index = 0, pts_index[SIDH_MAX_INT_POINTS_ALICE], npts = 0;
+	oqs_sidh_cln16_point_proj_t R, pts[SIDH_MAX_INT_POINTS_ALICE];
+	oqs_sidh_cln16_f2elm_t jinv, coeff[5], A, C = {0};
+
+	if (PrivateKeyA == NULL || SharedSecretA == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(CurveIsogeny)) {
+		return SIDH_CRYPTO_ERROR_INVALID_PARAMETER;
+	}
+
+	oqs_sidh_cln16_fp2copy751((((oqs_sidh_cln16_point_proj_t *) point_R)[0])->X, R->X);
+	oqs_sidh_cln16_fp2copy751((((oqs_sidh_cln16_point_proj_t *) point_R)[0])->Z, R->Z);
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->C, C[0]);
+	oqs_sidh_cln16_to_mont(C[0], C[0]);
+	oqs_sidh_cln16_first_4_isog(R, (oqs_sidh_cln16_felm_t *) param_A, A, C, CurveIsogeny);
+
+	index = 0;
+	for (row = 1; row < SIDH_MAX_Alice; row++) {
+		while (index < SIDH_MAX_Alice - row) {
+			oqs_sidh_cln16_fp2copy751(R->X, pts[npts]->X);
+			oqs_sidh_cln16_fp2copy751(R->Z, pts[npts]->Z);
+			pts_index[npts] = index;
+			npts += 1;
+			m = splits_Alice[SIDH_MAX_Alice - index - row];
+			oqs_sidh_cln16_xDBLe(R, R, A, C, (int) (2 * m));
+			index += m;
+		}
+		oqs_sidh_cln16_get_4_isog(R, A, C, coeff);
+
+		for (i = 0; i < npts; i++) {
+			oqs_sidh_cln16_eval_4_isog(pts[i], coeff);
+		}
+
+		oqs_sidh_cln16_fp2copy751(pts[npts - 1]->X, R->X);
+		oqs_sidh_cln16_fp2copy751(pts[npts - 1]->Z, R->Z);
+		index = pts_index[npts - 1];
+		npts -= 1;
+	}
+
+	oqs_sidh_cln16_get_4_isog(R, A, C, coeff);
+	oqs_sidh_cln16_j_inv(A, C, jinv);
+	oqs_sidh_cln16_from_fp2mont(jinv, (oqs_sidh_cln16_felm_t *) SharedSecretA); // Converting back to standard representation
+
+	// Cleanup:
+	oqs_sidh_cln16_clear_words((void *) R, 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) pts, SIDH_MAX_INT_POINTS_ALICE * 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) A, 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) C, 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) jinv, 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) coeff, 5 * 2 * pwords);
+
+	return SIDH_CRYPTO_SUCCESS;
+}
+
+void oqs_sidh_cln16_PublicKeyCompression_B(const unsigned char *PublicKeyB, unsigned char *CompressedPKB, PCurveIsogenyStruct CurveIsogeny) { // Bob's public key compression
+	                                                                                                                                          // It produces a compressed output that consists of three elements in Z_orderA and one field element
+	                                                                                                                                          // Input : Bob's public key PublicKeyB, which consists of 3 elements in GF(p751^2).
+	                                                                                                                                          // Output: a compressed value CompressedPKB that consists of three elements in Z_orderA and one element in GF(p751^2).
+	                                                                                                                                          // CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+	oqs_sidh_cln16_point_full_proj_t P, Q, phP, phQ, phX;
+	oqs_sidh_cln16_point_t R1, R2, phiP, phiQ;
+	oqs_sidh_cln16_publickey_t PK;
+	digit_t *comp = (digit_t *) CompressedPKB;
+	digit_t inv[SIDH_NWORDS_ORDER];
+	oqs_sidh_cln16_f2elm_t A, vec[4], Zinv[4];
+	digit_t a0[SIDH_NWORDS_ORDER], b0[SIDH_NWORDS_ORDER], a1[SIDH_NWORDS_ORDER], b1[SIDH_NWORDS_ORDER], tmp[2 * SIDH_NWORDS_ORDER], mask = (digit_t)(-1);
+
+	mask >>= (CurveIsogeny->owordbits - CurveIsogeny->oAbits);
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyB)[0], ((oqs_sidh_cln16_f2elm_t *) &PK)[0]); // Converting to Montgomery representation
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyB)[1], ((oqs_sidh_cln16_f2elm_t *) &PK)[1]);
+	oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *) PublicKeyB)[2], ((oqs_sidh_cln16_f2elm_t *) &PK)[2]);
+
+	oqs_sidh_cln16_recover_y(PK, phP, phQ, phX, A, CurveIsogeny);
+	oqs_sidh_cln16_generate_2_torsion_basis(A, P, Q, CurveIsogeny);
+	oqs_sidh_cln16_fp2copy751(P->Z, vec[0]);
+	oqs_sidh_cln16_fp2copy751(Q->Z, vec[1]);
+	oqs_sidh_cln16_fp2copy751(phP->Z, vec[2]);
+	oqs_sidh_cln16_fp2copy751(phQ->Z, vec[3]);
+	oqs_sidh_cln16_mont_n_way_inv(vec, 4, Zinv);
+
+	oqs_sidh_cln16_fp2mul751_mont(P->X, Zinv[0], R1->x);
+	oqs_sidh_cln16_fp2mul751_mont(P->Y, Zinv[0], R1->y);
+	oqs_sidh_cln16_fp2mul751_mont(Q->X, Zinv[1], R2->x);
+	oqs_sidh_cln16_fp2mul751_mont(Q->Y, Zinv[1], R2->y);
+	oqs_sidh_cln16_fp2mul751_mont(phP->X, Zinv[2], phiP->x);
+	oqs_sidh_cln16_fp2mul751_mont(phP->Y, Zinv[2], phiP->y);
+	oqs_sidh_cln16_fp2mul751_mont(phQ->X, Zinv[3], phiQ->x);
+	oqs_sidh_cln16_fp2mul751_mont(phQ->Y, Zinv[3], phiQ->y);
+
+	oqs_sidh_cln16_ph2(phiP, phiQ, R1, R2, A, (uint64_t *) a0, (uint64_t *) b0, (uint64_t *) a1, (uint64_t *) b1, CurveIsogeny);
+
+	if ((a0[0] & 1) == 1) { // Storing [b1*a0inv, a1*a0inv, b0*a0inv] and setting bit384 to 0
+		oqs_sidh_cln16_inv_mod_orderA(a0, inv);
+		oqs_sidh_cln16_multiply(b0, inv, tmp, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_copy_words(tmp, &comp[0], SIDH_NWORDS_ORDER);
+		comp[SIDH_NWORDS_ORDER - 1] &= mask;
+		oqs_sidh_cln16_multiply(a1, inv, tmp, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_copy_words(tmp, &comp[SIDH_NWORDS_ORDER], SIDH_NWORDS_ORDER);
+		comp[2 * SIDH_NWORDS_ORDER - 1] &= mask;
+		oqs_sidh_cln16_multiply(b1, inv, tmp, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_copy_words(tmp, &comp[2 * SIDH_NWORDS_ORDER], SIDH_NWORDS_ORDER);
+		comp[3 * SIDH_NWORDS_ORDER - 1] &= mask;
+	} else { // Storing [b1*b0inv, a1*b0inv, a0*b0inv] and setting bit384 to 1
+		oqs_sidh_cln16_inv_mod_orderA(b0, inv);
+		oqs_sidh_cln16_multiply(a0, inv, tmp, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_copy_words(tmp, &comp[0], SIDH_NWORDS_ORDER);
+		comp[SIDH_NWORDS_ORDER - 1] &= mask;
+		oqs_sidh_cln16_multiply(a1, inv, tmp, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_copy_words(tmp, &comp[SIDH_NWORDS_ORDER], SIDH_NWORDS_ORDER);
+		comp[2 * SIDH_NWORDS_ORDER - 1] &= mask;
+		oqs_sidh_cln16_multiply(b1, inv, tmp, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_copy_words(tmp, &comp[2 * SIDH_NWORDS_ORDER], SIDH_NWORDS_ORDER);
+		comp[3 * SIDH_NWORDS_ORDER - 1] &= mask;
+		comp[3 * SIDH_NWORDS_ORDER - 1] |= (digit_t) 1 << (sizeof(digit_t) * 8 - 1);
+	}
+
+	oqs_sidh_cln16_from_fp2mont(A, (oqs_sidh_cln16_felm_t *) &comp[3 * SIDH_NWORDS_ORDER]); // Converting back from Montgomery representation
+}
+
+void oqs_sidh_cln16_PublicKeyBDecompression_A(const unsigned char *SecretKeyA, const unsigned char *CompressedPKB, unsigned char *point_R, unsigned char *param_A, PCurveIsogenyStruct CurveIsogeny) { // Bob's public key value decompression computed by Alice
+	                                                                                                                                                                                                   // Inputs: Alice's private key SecretKeyA, and
+	                                                                                                                                                                                                   //         Bob's compressed public key data CompressedPKB, which consists of three elements in Z_orderA and one element in GF(p751^2).
+	                                                                                                                                                                                                   // Output: a point point_R in coordinates (X:Z) and the curve parameter param_A in GF(p751^2). Outputs are stored in Montgomery representation.
+	                                                                                                                                                                                                   // CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+	oqs_sidh_cln16_point_t R1, R2;
+	oqs_sidh_cln16_point_proj_t *R = (oqs_sidh_cln16_point_proj_t *) point_R;
+	oqs_sidh_cln16_point_full_proj_t P, Q;
+	digit_t *comp = (digit_t *) CompressedPKB;
+	oqs_sidh_cln16_f2elm_t A24, vec[2], invs[2], one = {0};
+	oqs_sidh_cln16_felm_t *A = (oqs_sidh_cln16_felm_t *) param_A;
+	digit_t tmp1[2 * SIDH_NWORDS_ORDER], tmp2[2 * SIDH_NWORDS_ORDER], vone[2 * SIDH_NWORDS_ORDER] = {0}, mask = (digit_t)(-1);
+	unsigned int bit;
+
+	mask >>= (CurveIsogeny->owordbits - CurveIsogeny->oAbits);
+	vone[0] = 1;
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]);
+	oqs_sidh_cln16_to_fp2mont((oqs_sidh_cln16_felm_t *) &comp[3 * SIDH_NWORDS_ORDER], A); // Converting to Montgomery representation
+	oqs_sidh_cln16_generate_2_torsion_basis(A, P, Q, CurveIsogeny);
+
+	// normalize basis points
+	oqs_sidh_cln16_fp2copy751(P->Z, vec[0]);
+	oqs_sidh_cln16_fp2copy751(Q->Z, vec[1]);
+	oqs_sidh_cln16_mont_n_way_inv(vec, 2, invs);
+	oqs_sidh_cln16_fp2mul751_mont(P->X, invs[0], R1->x);
+	oqs_sidh_cln16_fp2mul751_mont(P->Y, invs[0], R1->y);
+	oqs_sidh_cln16_fp2mul751_mont(Q->X, invs[1], R2->x);
+	oqs_sidh_cln16_fp2mul751_mont(Q->Y, invs[1], R2->y);
+
+	oqs_sidh_cln16_fp2add751(A, one, A24);
+	oqs_sidh_cln16_fp2add751(A24, one, A24);
+	oqs_sidh_cln16_fp2div2_751(A24, A24);
+	oqs_sidh_cln16_fp2div2_751(A24, A24);
+
+	bit = comp[3 * SIDH_NWORDS_ORDER - 1] >> (sizeof(digit_t) * 8 - 1);
+	comp[3 * SIDH_NWORDS_ORDER - 1] &= (digit_t)(-1) >> 1;
+
+	if (bit == 0) {
+		oqs_sidh_cln16_multiply((digit_t *) SecretKeyA, &comp[SIDH_NWORDS_ORDER], tmp1, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_mp_add(tmp1, vone, tmp1, SIDH_NWORDS_ORDER);
+		tmp1[SIDH_NWORDS_ORDER - 1] &= mask;
+		oqs_sidh_cln16_inv_mod_orderA(tmp1, tmp2);
+		oqs_sidh_cln16_multiply((digit_t *) SecretKeyA, &comp[2 * SIDH_NWORDS_ORDER], tmp1, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_mp_add(&comp[0], tmp1, tmp1, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_multiply(tmp1, tmp2, vone, SIDH_NWORDS_ORDER);
+		vone[SIDH_NWORDS_ORDER - 1] &= mask;
+		oqs_sidh_cln16_mont_twodim_scalarmult(vone, R1, R2, A, A24, P, CurveIsogeny);
+	} else {
+		oqs_sidh_cln16_multiply((digit_t *) SecretKeyA, &comp[2 * SIDH_NWORDS_ORDER], tmp1, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_mp_add(tmp1, vone, tmp1, SIDH_NWORDS_ORDER);
+		tmp1[SIDH_NWORDS_ORDER - 1] &= mask;
+		oqs_sidh_cln16_inv_mod_orderA(tmp1, tmp2);
+		oqs_sidh_cln16_multiply((digit_t *) SecretKeyA, &comp[SIDH_NWORDS_ORDER], tmp1, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_mp_add(&comp[0], tmp1, tmp1, SIDH_NWORDS_ORDER);
+		oqs_sidh_cln16_multiply(tmp1, tmp2, vone, SIDH_NWORDS_ORDER);
+		vone[SIDH_NWORDS_ORDER - 1] &= mask;
+		oqs_sidh_cln16_mont_twodim_scalarmult(vone, R2, R1, A, A24, P, CurveIsogeny);
+	}
+
+	oqs_sidh_cln16_fp2copy751(P->X, R[0]->X);
+	oqs_sidh_cln16_fp2copy751(P->Z, R[0]->Z);
+}
+
+SIDH_CRYPTO_STATUS oqs_sidh_cln16_EphemeralSecretAgreement_Compression_B(const unsigned char *PrivateKeyB, const unsigned char *point_R, const unsigned char *param_A, unsigned char *SharedSecretB, PCurveIsogenyStruct CurveIsogeny) { // Bob's ephemeral shared secret computation
+	                                                                                                                                                                                                                                     // It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's decompressed data point_R and param_A
+	                                                                                                                                                                                                                                     // Inputs: Bob's PrivateKeyB is an integer in the range [1, oB-1], where oB = 3^239.
+	                                                                                                                                                                                                                                     //         Alice's decompressed data consists of point_R in (X:Z) coordinates and the curve paramater param_A in GF(p751^2).
+	                                                                                                                                                                                                                                     // Output: a shared secret SharedSecretB that consists of one element in GF(p751^2).
+	                                                                                                                                                                                                                                     // CurveIsogeny must be set up in advance using SIDH_curve_initialize().
+	unsigned int pwords = NBITS_TO_NWORDS(CurveIsogeny->pwordbits);
+	unsigned int i, row, m, index = 0, pts_index[SIDH_MAX_INT_POINTS_BOB], npts = 0;
+	oqs_sidh_cln16_point_proj_t R, pts[SIDH_MAX_INT_POINTS_BOB];
+	oqs_sidh_cln16_f2elm_t jinv, A, C = {0};
+
+	if (PrivateKeyB == NULL || SharedSecretB == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(CurveIsogeny)) {
+		return SIDH_CRYPTO_ERROR_INVALID_PARAMETER;
+	}
+
+	oqs_sidh_cln16_fp2copy751((((oqs_sidh_cln16_point_proj_t *) point_R)[0])->X, R->X);
+	oqs_sidh_cln16_fp2copy751((((oqs_sidh_cln16_point_proj_t *) point_R)[0])->Z, R->Z);
+	oqs_sidh_cln16_fp2copy751((oqs_sidh_cln16_felm_t *) param_A, A);
+	oqs_sidh_cln16_fpcopy751(CurveIsogeny->C, C[0]);
+	oqs_sidh_cln16_to_mont(C[0], C[0]);
+
+	index = 0;
+	for (row = 1; row < SIDH_MAX_Bob; row++) {
+		while (index < SIDH_MAX_Bob - row) {
+			oqs_sidh_cln16_fp2copy751(R->X, pts[npts]->X);
+			oqs_sidh_cln16_fp2copy751(R->Z, pts[npts]->Z);
+			pts_index[npts] = index;
+			npts += 1;
+			m = splits_Bob[SIDH_MAX_Bob - index - row];
+			oqs_sidh_cln16_xTPLe(R, R, A, C, (int) m);
+			index += m;
+		}
+		oqs_sidh_cln16_get_3_isog(R, A, C);
+
+		for (i = 0; i < npts; i++) {
+			oqs_sidh_cln16_eval_3_isog(R, pts[i]);
+		}
+
+		oqs_sidh_cln16_fp2copy751(pts[npts - 1]->X, R->X);
+		oqs_sidh_cln16_fp2copy751(pts[npts - 1]->Z, R->Z);
+		index = pts_index[npts - 1];
+		npts -= 1;
+	}
+
+	oqs_sidh_cln16_get_3_isog(R, A, C);
+	oqs_sidh_cln16_j_inv(A, C, jinv);
+	oqs_sidh_cln16_from_fp2mont(jinv, (oqs_sidh_cln16_felm_t *) SharedSecretB); // Converting back to standard representation
+
+	// Cleanup:
+	oqs_sidh_cln16_clear_words((void *) R, 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) pts, SIDH_MAX_INT_POINTS_BOB * 2 * 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) A, 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) C, 2 * pwords);
+	oqs_sidh_cln16_clear_words((void *) jinv, 2 * pwords);
+
+	return SIDH_CRYPTO_SUCCESS;
+}
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/Makefile.am b/crypt/liboqs/kex_sidh_iqc_ref/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..d634163581b9e79e0207b355faa97c2f5d60202f
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/Makefile.am
@@ -0,0 +1,11 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = libsidhiqc.la
+
+
+libsidhiqc_la_SOURCES = kex_sidh_iqc_ref_params.c kex_sidh_iqc_ref.c sidh_elliptic_curve.c sidh_elliptic_curve_dlp.c sidh_isogeny.c
+libsidhiqc_la_SOURCES += sidh_private_key.c sidh_public_key.c sidh_public_key_encryption.c sidh_public_key_validation.c
+libsidhiqc_la_SOURCES += sidh_public_param.c sidh_quadratic_ext.c sidh_shared_key.c sidh_util.c
+libsidhiqc_la_CPPFLAGS = -I../../include -I.-fPIC
+libsidhiqc_la_CPPFLAGS += $(AM_CPPFLAGS) -I$(GMP_DIR)/include
+
+libsidhiqc_la_LDFLAGS = -L$(GMP_DIR)/lib $(AM_LDFLAGS)
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref.c b/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref.c
new file mode 100644
index 0000000000000000000000000000000000000000..448d7bbc6e4dfd532b165d4ed8c6c0c3a3c11418
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref.c
@@ -0,0 +1,232 @@
+#if defined(WINDOWS)
+#define UNUSED
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "sidh_elliptic_curve.h"
+#include "sidh_public_param.h"
+#include "sidh_isogeny.h"
+#include "sidh_private_key.h"
+#include "sidh_public_key.h"
+#include "sidh_shared_key.h"
+#include "kex_sidh_iqc_ref.h"
+#include "kex_sidh_iqc_ref_params.h"
+
+OQS_KEX *OQS_KEX_sidh_iqc_ref_new(OQS_RAND *rand, const char *named_parameters) {
+
+	if (named_parameters == NULL) {
+		named_parameters = "params771";
+	}
+
+	OQS_KEX *k = malloc(sizeof(OQS_KEX));
+	if (k == NULL) {
+		return NULL;
+	}
+
+	public_params_t *params =
+	    (public_params_t *) malloc(2 * sizeof(public_params_t));
+	if (params == NULL) {
+		goto err;
+	}
+
+	oqs_sidh_iqc_ref_public_params_init(params[0]);
+	oqs_sidh_iqc_ref_public_params_init(params[1]);
+
+	const char **input = oqs_sidh_iqc_ref_params_from_name(named_parameters);
+	if (input == NULL) {
+		goto err_clear;
+	}
+
+	if (!oqs_sidh_iqc_ref_public_params_read(params[0], params[1], input)) {
+		goto err_clear;
+	}
+
+	oqs_sidh_iqc_ref_fp_init_chararacteristic(params[0]->characteristic);
+
+	k->rand = rand;
+	k->method_name = strdup("SIDH IQC REFERENCE");
+	k->estimated_classical_security = 192;
+	k->estimated_quantum_security = 128;
+	k->seed = NULL;
+	k->seed_len = 0;
+	k->named_parameters = strdup(named_parameters);
+	k->params = params;
+	k->ctx = NULL;
+	k->alice_0 = &OQS_KEX_sidh_iqc_ref_alice_0;
+	k->bob = &OQS_KEX_sidh_iqc_ref_bob;
+	k->alice_1 = &OQS_KEX_sidh_iqc_ref_alice_1;
+	k->alice_priv_free = &OQS_KEX_sidh_iqc_ref_alice_priv_free;
+	k->free = &OQS_KEX_sidh_iqc_ref_free;
+
+	return k;
+
+err_clear:
+	oqs_sidh_iqc_ref_public_params_clear(params[0]);
+	oqs_sidh_iqc_ref_public_params_clear(params[1]);
+
+err:
+	free(params);
+	free(k);
+	return NULL;
+}
+
+int OQS_KEX_sidh_iqc_ref_alice_0(OQS_KEX *k, void **alice_priv,
+                                 uint8_t **alice_msg, size_t *alice_msg_len) {
+
+	public_params_t *params = (public_params_t *) k->params;
+	private_key_t Alice_private_key;
+	oqs_sidh_iqc_ref_private_key_init(Alice_private_key);
+	oqs_sidh_iqc_ref_private_key_generate(Alice_private_key, params[0]);
+
+	public_key_t Alice_public_key;
+	oqs_sidh_iqc_ref_public_key_init(Alice_public_key);
+	point_t kernel_gen;
+	oqs_sidh_iqc_ref_point_init(kernel_gen);
+	oqs_sidh_iqc_ref_private_key_compute_kernel_gen(kernel_gen, Alice_private_key,
+	                                                params[0]->P, params[0]->Q,
+	                                                params[0]->le, params[0]->E);
+	oqs_sidh_iqc_ref_public_key_generate(Alice_public_key, kernel_gen, params[0],
+	                                     params[1]);
+
+	// sizes in bytes
+	uint32_t prime_size = (mpz_sizeinbase(characteristic, 2) + 7) / 8;
+	uint32_t private_key_size = 2 * prime_size;
+	uint32_t public_key_size = 12 * prime_size;
+
+	*alice_priv = NULL;
+	*alice_msg = NULL;
+	*alice_priv = malloc(private_key_size);
+	*alice_msg = malloc(public_key_size);
+	*alice_msg_len = public_key_size;
+
+	oqs_sidh_iqc_ref_private_key_to_bytes((uint8_t *) *alice_priv,
+	                                      Alice_private_key, prime_size);
+	oqs_sidh_iqc_ref_public_key_to_bytes((uint8_t *) *alice_msg, Alice_public_key,
+	                                     prime_size);
+
+	oqs_sidh_iqc_ref_private_key_clear(Alice_private_key);
+	oqs_sidh_iqc_ref_public_key_clear(Alice_public_key);
+	oqs_sidh_iqc_ref_point_clear(kernel_gen);
+
+	return 1;
+}
+
+int OQS_KEX_sidh_iqc_ref_bob(OQS_KEX *k, const uint8_t *alice_msg,
+                             UNUSED const size_t alice_msg_len,
+                             uint8_t **bob_msg, size_t *bob_msg_len,
+                             uint8_t **key, size_t *key_len) {
+
+	public_params_t *params = (public_params_t *) k->params;
+
+	private_key_t Bob_private_key;
+	oqs_sidh_iqc_ref_private_key_init(Bob_private_key);
+	oqs_sidh_iqc_ref_private_key_generate(Bob_private_key, params[1]);
+
+	public_key_t Bob_public_key;
+	oqs_sidh_iqc_ref_public_key_init(Bob_public_key);
+	point_t kernel_gen;
+	oqs_sidh_iqc_ref_point_init(kernel_gen);
+	oqs_sidh_iqc_ref_private_key_compute_kernel_gen(kernel_gen, Bob_private_key,
+	                                                params[1]->P, params[1]->Q,
+	                                                params[1]->le, params[1]->E);
+	oqs_sidh_iqc_ref_public_key_generate(Bob_public_key, kernel_gen, params[1],
+	                                     params[0]);
+
+	// sizes in bytes
+	uint32_t prime_size = (mpz_sizeinbase(characteristic, 2) + 7) / 8;
+	uint32_t public_key_size = 12 * prime_size;
+	uint32_t shared_key_size = 2 * prime_size;
+
+	*bob_msg = NULL;
+	*key = NULL;
+	*bob_msg = malloc(public_key_size);
+	*key = malloc(shared_key_size);
+	*bob_msg_len = public_key_size;
+	*key_len = shared_key_size;
+
+	oqs_sidh_iqc_ref_public_key_to_bytes((uint8_t *) *bob_msg, Bob_public_key,
+	                                     prime_size);
+
+	public_key_t Alice_public_key;
+	oqs_sidh_iqc_ref_public_key_init(Alice_public_key);
+	oqs_sidh_iqc_ref_bytes_to_public_key(Alice_public_key, alice_msg, prime_size);
+
+	fp2_element_t Bob_shared_key;
+	oqs_sidh_iqc_ref_fp2_init(Bob_shared_key);
+	oqs_sidh_iqc_ref_shared_key_generate(Bob_shared_key, Alice_public_key,
+	                                     Bob_private_key, params[1]);
+
+	oqs_sidh_iqc_ref_fp2_to_bytes((uint8_t *) *key, Bob_shared_key, prime_size);
+
+	oqs_sidh_iqc_ref_public_key_clear(Alice_public_key);
+	oqs_sidh_iqc_ref_private_key_clear(Bob_private_key);
+	oqs_sidh_iqc_ref_public_key_clear(Bob_public_key);
+	oqs_sidh_iqc_ref_point_clear(kernel_gen);
+	oqs_sidh_iqc_ref_fp2_clear(Bob_shared_key);
+
+	return 1;
+}
+
+int OQS_KEX_sidh_iqc_ref_alice_1(OQS_KEX *k, const void *alice_priv,
+                                 const uint8_t *bob_msg,
+                                 UNUSED const size_t bob_msg_len, uint8_t **key,
+                                 size_t *key_len) {
+
+	public_params_t *params = (public_params_t *) k->params;
+
+	// sizes in bytes
+	uint32_t prime_size = (mpz_sizeinbase(characteristic, 2) + 7) / 8;
+	uint32_t shared_key_size = 2 * prime_size;
+
+	*key = NULL;
+	*key_len = shared_key_size;
+	*key = malloc(shared_key_size);
+
+	private_key_t Alice_private_key;
+	oqs_sidh_iqc_ref_private_key_init(Alice_private_key);
+	oqs_sidh_iqc_ref_bytes_to_private_key(Alice_private_key, alice_priv,
+	                                      prime_size);
+
+	public_key_t Bob_public_key;
+	oqs_sidh_iqc_ref_public_key_init(Bob_public_key);
+	oqs_sidh_iqc_ref_bytes_to_public_key(Bob_public_key, bob_msg, prime_size);
+
+	fp2_element_t Alice_shared_key;
+	oqs_sidh_iqc_ref_fp2_init(Alice_shared_key);
+	oqs_sidh_iqc_ref_shared_key_generate(Alice_shared_key, Bob_public_key,
+	                                     Alice_private_key, params[0]);
+
+	oqs_sidh_iqc_ref_fp2_to_bytes((uint8_t *) *key, Alice_shared_key, prime_size);
+
+	oqs_sidh_iqc_ref_private_key_clear(Alice_private_key);
+	oqs_sidh_iqc_ref_public_key_clear(Bob_public_key);
+	oqs_sidh_iqc_ref_fp2_clear(Alice_shared_key);
+
+	return 1;
+}
+
+void OQS_KEX_sidh_iqc_ref_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) {
+	if (alice_priv) {
+		free(alice_priv);
+	}
+}
+
+void OQS_KEX_sidh_iqc_ref_free(OQS_KEX *k) {
+	if (!k) {
+		return;
+	}
+
+	oqs_sidh_iqc_ref_public_params_clear(((public_params_t *) (k->params))[0]);
+	oqs_sidh_iqc_ref_public_params_clear(((public_params_t *) (k->params))[1]);
+	free(k->params);
+	k->params = NULL;
+	free(k->method_name);
+	k->method_name = NULL;
+	free(k->named_parameters);
+	k->named_parameters = NULL;
+	free(k);
+}
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref.h b/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref.h
new file mode 100644
index 0000000000000000000000000000000000000000..a14d2822088f3879f4c4a1f00d5921964efb3627
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref.h
@@ -0,0 +1,28 @@
+
+#ifndef KEX_SIDH_IQC_REF_H
+#define KEX_SIDH_IQC_REF_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <oqs/kex.h>
+#include <oqs/rand.h>
+
+OQS_KEX *OQS_KEX_sidh_iqc_ref_new(OQS_RAND *rand, const char *named_parameters);
+
+int OQS_KEX_sidh_iqc_ref_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len);
+int OQS_KEX_sidh_iqc_ref_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len);
+int OQS_KEX_sidh_iqc_ref_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len);
+
+void OQS_KEX_sidh_iqc_ref_alice_priv_free(OQS_KEX *k, void *alice_priv);
+void OQS_KEX_sidh_iqc_ref_free(OQS_KEX *k);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* KEX_SIDH_IQC_REF_H */
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref_params.c b/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref_params.c
new file mode 100644
index 0000000000000000000000000000000000000000..9bbe77bbf74af588eb7be5ba1b7d9ed5c85205c5
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref_params.c
@@ -0,0 +1,85 @@
+#include <stdlib.h>
+#include <string.h>
+#include "kex_sidh_iqc_ref_params.h"
+
+typedef struct {
+	const char *name;
+	const char *params[10];
+} params_def;
+
+// clang-format off
+const params_def all_params[] = {
+    {
+        "params46",
+        {
+            "p :  60183678025727",
+            "E :  y^2 = x^3 + (33377407586757 * i + 44218433491776) * x + (14267804413813 * i + 34113052821919)",
+            "lA:  2",
+            "eA:  22",
+            "PA:  (3621292231555 * i + 37993208494088, 7444041801194 * i + 49342879615307)",
+            "QA:  (42474562877393 * i + 53371276514445, 2096833973245 * i + 34935006825293)",
+            "lB:  3",
+            "eB:  15",
+            "PB:  (15834791163149 * i + 48632673242917, 26787723276578 * i + 2080970701160)",
+            "QB:  (41347477823487 * i + 16893996428645, 16353006256863 * i + 58871308637793)"
+        }
+    },
+    {
+        "params263",
+        {
+            "p :  13278338917780691403163453935679248163066204141424819568321422575495838416502783",
+            "E :  y^2 = x^3 + (10146232096640085910917654383121220722483913358884738813297160334128811466415525*i+12561065565697579851239386918801659303795666601356542822684985096240783059294353)*x + (5173097881985929355869345579251684505584624561073144550698251610858120795396524*i+7107679418274528586696192790945059679329002947961173384005281572895084003568218)",
+            "lA:  2",
+            "eA:  130",
+            "PA:  (1195124728519659060317276132092013999345554256425666367370465963951595701748339*i + 12098972036709468461769702810131237350914726908853501736574286596252384974205652, 9783772475920257416467468866150378267376245694752823265285613818169901942309758*i + 11347159712348451494564706572599934965946403356550033502368700150470499448870987)",
+            "QA:  (13205817885805264818436305084890835188490919868599289846511015770901764583677253*i + 5747572646648472262100078852868099320898697620053049578554081522615552834142382, 11801682343040573989191884352262922625922977024975963745404870899756844108073781*i + 995065035530346107238957276796927946979246210950956147759509023538740100220494)",
+            "lB:  3",
+            "eB:  81",
+            "PB:  (5344800255669587458309912385997503623935901519546261901204157001079956379346933*i + 4377688844822469620769951245537289173274736372423169606270308984109645753298367, 6652276474756696057821879367411351758786745790244544252917780253177388224676512*i + 6708409928090950067466623637647088247028372838873736207829979327577754417492323)",
+            "QB:  (5394161621076087291764603321428338049084294313968048256313378341079709241759382*i + 11839282739753708776384780179031575074752559110018400195581350405443930573103478, 13250321748367194013481592159238890438519376028036613608154243555537109237538486*i + 5018156126061581597984382235576466750307112019427938373002833669914648135622879)"
+        }
+    },
+    {
+        "params521",
+        {
+            "p :  5646428529833603710854376801719732121889771686125102421349898409102848702003905102057129871853579459735758942656769724874115169484320460874488881525976727551",
+            "E :  y^2 = x^3 + (749284552715987846148963973296050195126229569341142224654666772427960869882697237787535113296933915107646826510805251959952317725821624926383192023779227916*i+4450862168665197219135947325665108840719206715065697554561201799074300990784248608236935291171911258967881216685164820345027022153809546719817771293646383402)*x + (2090701186560231235295975659537182225064154823783034367876346818451609525370628921026656891712603865222854303410638520367213822274197422708317359681412801686*i+928331116130151780314451251635374082476545231185861659046556547242876069870814548070746611568992085667981514874929668958586384832118048434225604407758374282)",
+            "lA:  2",
+            "eA:  258",
+            "PA:  (4099566244205693793351119863629118684504739011975746402268940060566068632610815266810397027797757094816929218567651253950072216325440815687610023993835084896*i + 1558017772998619899443036875935946235185689333987633624537644882488763783158554538347022310514300405167644627965823931934377803788161014061154800653636626931, 4309963503463625615680726988334053841208952164733703323705989592325431854359074218382917880219717866947948218257035199142577782828393068620191995480863080814*i + 371139087724151319343471759858355552237686119972572871121509307705868621618190178855645217401101942092226237837619601742237974591506374361483536984282167861)",
+            "QA:  (1068668697541208179714192612921089347931894414290359842562082470165052062241629674686530102495737378212525479245784252461983051355518227298502808569246918728*i + 3439758296201500299118396242846510199830393172149382335887091564620130903972985332523718369650346601985540123834734249105539074407495456634862920938577617312, 1377114633894100174167466575056453645918713530999472681191914854993325497527119824352424425031078252594689770391880104513192317018010057467691025379460070671*i + 4932622986840321005380766859714312144000718130204073302754586852541324804616229501269099878044960212632820224170853684078869359092914886360672352750928115581)",
+            "lB:  3",
+            "eB:  161",
+            "PB:  (1873601143875829767876930991819826178988629054425671567488176037109831662817961473686026144306947256312476316751547084289880741326649856082832561714610850944*i + 1560175318533519875886314144935322002014985257654221707041583923868859979591849198401249634353361077499233198751747045058302746944304448268907689086502178616, 4982994975025169124121736752171094366264972439763192439008272414941290947933013927951198144521578535758162978083130741822789277050594650669549067865269480720*i + 5276260709601376725929198456951440724276715138987805447686932240717621617089316893818094566900580557346731678327745302440662762271627385800896501795127323769)",
+            "QB:  (697646709404910236660735870475422491121726200391885074740251760174191839071888445718446637282034941836922171390196797602747505117748596104805560163856490804*i + 3625576702015594834652275264908614642470435266304155762812699923280767886451566560460365242828862442624975825786369269113406701427398318872227046324990780609, 4066773540363717441440268891591433184568174886592697891244632377954091519594774358483000191238791345767299029423088482812653880802118173619454377886226640626*i + 4190003034380720563592676434553383980850520959077934081654167677748426947392920518786394581045699661017779519240551462796264093681413592624709890262004623150)"
+        }
+    },
+    {
+        "params771",
+        {
+            "p :  9161191555982008052298538759697325872858383005444503030763917191888120427263653604739574602371851919945332710234806205297475768266460658683484318356498713773944703702864057467786913144364234277796785269800198817400814717913480036351",
+            "E :  y^2 = x^3 + (834414288954992257633455994192711449929625512434805347027412475310171948875352369017186937444645005409828817983476510572586689796723205608064400704385270116308944492168385499542550868452776212626111499661118550170888024552876875729*i+6422246000772528873002015578224375300444670334298744905928223359513938843110113655634334268879522218663819121887750824098836054966064056104287198717041277477329053582144813207672147369924203318339728355843554541603191165928512397074)*x + (6952862402661321818296934608460489441319492072429008834217170925899505712694617760090534612163651714423387662001257443691685988562319647888954263195545834510820670121276853179853453135161349568882745925527747286264586000816122662211*i+1801461307959256058754493292728237821856779795303869606357346421878164668617118529630863155614277813374785952465141324739461376333648338471745996274618770379494305097783365807731152848487472399799827998470890201827624741461111844749)",
+            "lA:  2",
+            "eA:  386",
+            "PA:  (8104593598414300086705087098908311675030394399959710332294184564762361863835814307651327024806690095568546280563692186849608460564606528773115207348687739021740029922619947714003573784888374548470687911506820492526985145356335776446*i + 632723349492681895135768435670357424582748082370711990704098097526814363254991414123449305576539513413267774301605548017161074510859073483979044361740779150098452314120335316096416597425252882878881588818896781804191220848731008911, 6034454472695438020325443031188950458345445112930585331722482288319997086142640364760979210290700670085317874428427766978327706213014073448960161802245001428613528860063394247112544226096847944993689243523414240839321526974724280084*i + 3376800547075148066131970733541260743185153743453912100162319249600572606491084521062090319658073231669236186390422247468127880255567740549554667385892315318084299520660699776774656198376921140804260118165199828142540405774846853362)",
+            "QA:  (2765053530820933445180998871832795313413946616218824127593215418859013295660994609348273546952174346166291903038023352135058779784133949653750586420316372475215070348272866065120539728715859798970386824706592072979142529191135265323*i + 4732630024306258879927225136904171174931421517225731042645253305380470569884989406374249571295113204909266761424593068942210539695621294791558555039840356194123377363470161894673618251481432584334320864233790205509164494505329331140, 4708584843807409676003733183136845288008597122413250473476957203240325041335536376819164132204831789648003763063194049792870695914267484368767687682889171445457695974399304884657394666807097601382128550039198416659937221320896980595*i + 6104437072476030203734870744361913380396342850775889826642334399814339224961644051070388756974954400647041041004040997989961961816982978689836414675142946310418861822777207486737128660492374354598010889893873060781304652392500710082)",
+            "lB:  3",
+            "eB:  242",
+            "PB:  (3723895349260758944309889666952259909100424286764560948312844268916772080039091215194337476636106420561414206591006321840112505108525982835492286766266110460566937541551059011352798312867785900902951383836578444811900436603779674156*i + 8743733696371247709279217014221258693400652288884395784267041686740452975091187425123471886102340505926375682973850553993788314479705338557349284829716634286210825839825870379330100097103593148671390855900137936801780665161878467993, 8424241650394632026078421716292833598872223719053431149109783663376931645995151278886785513466077121063748132909299925706619410172763350254899661370368971798311266097772796416940961537063436478392108549760549489321636110323643921123*i + 5374610701506876802640722880318277643810083668249556787469960695884056089879250039154916755196748541850985290281804798435130927222521567170894905772855374873224510597225929994186728464447646660504589278345473514974074045904197344273)",
+            "QB:  (7981195513789185488304157075392399068225052449489399943063249773724560281912789833792310612686835775356813196319643714519912123781500389027567621573946130157326769787082613646934296091151487953874493791717298439146548339580934348575*i + 6959299245778867305112554827985507377113662771316265280990751282080086185858550157506531552361757479904416443825479048359163719118671413144273420888615839877660288012435298448942304481059091585291706567711227879486375625133765783910, 4336888647745442057861067196613721067586889048321014506728806821392030059558638097842307835657146159647787621123250859783441147632121339894258934458102109985684014691372520469809743302874968486912740925764328254939284436994206821845*i + 219245698132319235934495637714582743670714862281024333766283207034829039474459867538486706426384326703893620364910932534607493596118208826082598798090838576408297983032654112984263431060439529497966028364279027386883785406090014775)"
+        }
+    }
+};
+// clang-format on
+
+const char **oqs_sidh_iqc_ref_params_from_name(const char *named_parameters) {
+	static const size_t elements = sizeof(all_params) / sizeof(all_params[0]);
+
+	for (unsigned int i = 0; i < elements; ++i) {
+		if (0 == strcmp(all_params[i].name, named_parameters)) {
+			return (const char **) all_params[i].params;
+		}
+	}
+
+	return NULL;
+}
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref_params.h b/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref_params.h
new file mode 100644
index 0000000000000000000000000000000000000000..ec1c6f3d72fe3e8eab0e95e6981c520a935dbe72
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/kex_sidh_iqc_ref_params.h
@@ -0,0 +1,14 @@
+#ifndef KEX_SIDH_IQC_REF_PARAMS_H
+#define KEX_SIDH_IQC_REF_PARAMS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+const char **oqs_sidh_iqc_ref_params_from_name(const char *named_parameters);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve.c
new file mode 100644
index 0000000000000000000000000000000000000000..ea944ae6f301bdbb5d8c42a78b88764ae828ea03
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve.c
@@ -0,0 +1,351 @@
+#include <stdlib.h>
+
+#include "sidh_elliptic_curve.h"
+#include "sidh_util.h"
+#include <string.h>
+
+void oqs_sidh_iqc_ref_elliptic_curve_init(elliptic_curve_t E) {
+	oqs_sidh_iqc_ref_fp2_init_set_si(E->a, 0, 1);
+	oqs_sidh_iqc_ref_fp2_init_set_si(E->b, 0, 1);
+}
+
+void oqs_sidh_iqc_ref_elliptic_curve_set(elliptic_curve_t E,
+                                         const elliptic_curve_t T) {
+	oqs_sidh_iqc_ref_fp2_set(E->a, T->a);
+	oqs_sidh_iqc_ref_fp2_set(E->b, T->b);
+}
+
+void oqs_sidh_iqc_ref_elliptic_curve_set_coeffs(elliptic_curve_t E,
+                                                const fp2_element_t a,
+                                                const fp2_element_t b) {
+	oqs_sidh_iqc_ref_fp2_set(E->a, a);
+	oqs_sidh_iqc_ref_fp2_set(E->b, b);
+}
+
+void oqs_sidh_iqc_ref_point_init(point_t P) {
+	oqs_sidh_iqc_ref_fp2_init(P->x);
+	oqs_sidh_iqc_ref_fp2_init(P->y);
+	oqs_sidh_iqc_ref_point_zero(P);
+}
+
+void oqs_sidh_iqc_ref_point_set_coordinates(point_t P,
+                                            const fp2_element_t x,
+                                            const fp2_element_t y,
+                                            int z) {
+	oqs_sidh_iqc_ref_fp2_set(P->x, x);
+	oqs_sidh_iqc_ref_fp2_set(P->y, y);
+	P->z = z;
+}
+
+void oqs_sidh_iqc_ref_point_set(point_t P,
+                                const point_t Q) {
+	oqs_sidh_iqc_ref_point_set_coordinates(P, Q->x, Q->y, Q->z);
+}
+
+void oqs_sidh_iqc_ref_point_zero(point_t P) {
+	oqs_sidh_iqc_ref_fp2_zero(P->x);
+	oqs_sidh_iqc_ref_fp2_one(P->y);
+	P->z = 0;
+}
+
+int oqs_sidh_iqc_ref_point_is_zero(const point_t P) {
+	return P->z == 0;
+}
+
+void oqs_sidh_iqc_ref_point_negate(point_t P,
+                                   const point_t Q) {
+	oqs_sidh_iqc_ref_point_set(P, Q);
+	oqs_sidh_iqc_ref_fp2_negate(P->y, P->y);
+}
+
+int oqs_sidh_iqc_ref_point_has_order_2(const point_t P) {
+	return oqs_sidh_iqc_ref_fp2_is_zero(P->y);
+}
+
+void oqs_sidh_iqc_ref_elliptic_curve_clear(elliptic_curve_t E) {
+	oqs_sidh_iqc_ref_fp2_clear(E->a);
+	oqs_sidh_iqc_ref_fp2_clear(E->b);
+}
+
+void oqs_sidh_iqc_ref_point_clear(point_t P) {
+	oqs_sidh_iqc_ref_fp2_clear(P->x);
+	oqs_sidh_iqc_ref_fp2_clear(P->y);
+}
+
+int oqs_sidh_iqc_ref_point_equals(const point_t P,
+                                  const point_t Q) {
+	return oqs_sidh_iqc_ref_fp2_equals(P->x, Q->x) &&
+	       oqs_sidh_iqc_ref_fp2_equals(P->y, Q->y) &&
+	       (P->z == Q->z);
+}
+
+char *oqs_sidh_iqc_ref_elliptic_curve_get_str(const elliptic_curve_t E) {
+	char *result = "";
+	result = oqs_sidh_iqc_ref_concat(result, "y^2 = x^3");
+	if (!oqs_sidh_iqc_ref_fp2_is_zero(E->a)) {
+		result = oqs_sidh_iqc_ref_concat(result, " + (");
+		result = oqs_sidh_iqc_ref_concat(result, oqs_sidh_iqc_ref_fp2_get_str(E->a));
+		result = oqs_sidh_iqc_ref_concat(result, ")");
+		result = oqs_sidh_iqc_ref_concat(result, " * x");
+	}
+
+	if (!oqs_sidh_iqc_ref_fp2_is_zero(E->b)) {
+		result = oqs_sidh_iqc_ref_concat(result, " + (");
+		result = oqs_sidh_iqc_ref_concat(result, oqs_sidh_iqc_ref_fp2_get_str(E->b));
+		result = oqs_sidh_iqc_ref_concat(result, ")");
+	}
+
+	return result;
+}
+
+char *oqs_sidh_iqc_ref_point_get_str(const point_t P) {
+	char *result = "";
+	result = oqs_sidh_iqc_ref_concat(result, "(");
+	result = oqs_sidh_iqc_ref_concat(result, oqs_sidh_iqc_ref_fp2_get_str(P->x));
+	result = oqs_sidh_iqc_ref_concat(result, " : ");
+	result = oqs_sidh_iqc_ref_concat(result, oqs_sidh_iqc_ref_fp2_get_str(P->y));
+	result = oqs_sidh_iqc_ref_concat(result, " : ");
+	result = oqs_sidh_iqc_ref_concat(result, (P->z == 1 ? "1" : "0"));
+	result = oqs_sidh_iqc_ref_concat(result, ")");
+
+	return result;
+}
+
+void oqs_sidh_iqc_ref_point_add_with_lambda(point_t R,
+                                            const point_t P,
+                                            const point_t Q,
+                                            const fp2_element_t lambda) {
+	point_t result;
+	oqs_sidh_iqc_ref_point_init(result);
+	result->z = 1;
+
+	// x_R = lambda^2 - x_P - x_Q
+	oqs_sidh_iqc_ref_fp2_square(result->x, lambda);
+	oqs_sidh_iqc_ref_fp2_sub(result->x, result->x, P->x);
+	oqs_sidh_iqc_ref_fp2_sub(result->x, result->x, Q->x);
+
+	// y_R = lambda * (x_P - x_R) - y_P
+	oqs_sidh_iqc_ref_fp2_sub(result->y, P->x, result->x);
+	oqs_sidh_iqc_ref_fp2_mul(result->y, result->y, lambda);
+	oqs_sidh_iqc_ref_fp2_sub(result->y, result->y, P->y);
+	oqs_sidh_iqc_ref_point_set(R, result);
+
+	oqs_sidh_iqc_ref_point_clear(result);
+}
+
+void oqs_sidh_iqc_ref_point_double(point_t R,
+                                   const point_t P,
+                                   const elliptic_curve_t E) {
+	if (oqs_sidh_iqc_ref_point_is_zero(P)) {
+		oqs_sidh_iqc_ref_point_zero(R);
+		return;
+	}
+
+	// check if the point is of order 2
+	if (oqs_sidh_iqc_ref_point_has_order_2(P)) {
+		oqs_sidh_iqc_ref_point_zero(R);
+		return;
+	}
+
+	fp2_element_t temp;
+	fp2_element_t lambda;
+
+	oqs_sidh_iqc_ref_fp2_init(temp);
+	oqs_sidh_iqc_ref_fp2_init(lambda);
+
+	// lambda = (3(x_P)^2 + a) / (2y_p)
+	oqs_sidh_iqc_ref_fp2_square(lambda, P->x);
+	oqs_sidh_iqc_ref_fp2_mul_scaler_si(lambda, lambda, 3);
+	oqs_sidh_iqc_ref_fp2_add(lambda, lambda, E->a);
+	oqs_sidh_iqc_ref_fp2_mul_scaler_si(temp, P->y, 2);
+	oqs_sidh_iqc_ref_fp2_div(lambda, lambda, temp);
+
+	oqs_sidh_iqc_ref_point_add_with_lambda(R, P, P, lambda);
+
+	oqs_sidh_iqc_ref_fp2_clear(temp);
+	oqs_sidh_iqc_ref_fp2_clear(lambda);
+}
+
+void oqs_sidh_iqc_ref_point_add(point_t R,
+                                const point_t P,
+                                const point_t Q,
+                                const elliptic_curve_t E) {
+	if (oqs_sidh_iqc_ref_point_is_zero(P)) {
+		oqs_sidh_iqc_ref_point_set(R, Q);
+		return;
+	}
+
+	if (oqs_sidh_iqc_ref_point_is_zero(Q)) {
+		oqs_sidh_iqc_ref_point_set(R, P);
+		return;
+	}
+
+	if (oqs_sidh_iqc_ref_fp2_equals(P->x, Q->x)) {
+		if (oqs_sidh_iqc_ref_fp2_equals(P->y, Q->y)) {
+			oqs_sidh_iqc_ref_point_double(R, P, E);
+			return;
+		}
+
+		oqs_sidh_iqc_ref_point_zero(R);
+		return;
+	}
+
+	fp2_element_t temp;
+	fp2_element_t lambda;
+
+	oqs_sidh_iqc_ref_fp2_init(temp);
+	oqs_sidh_iqc_ref_fp2_init(lambda);
+
+	// lambda = (y_Q - y_P) / (x_Q - x_P)
+	oqs_sidh_iqc_ref_fp2_sub(lambda, Q->y, P->y);
+	oqs_sidh_iqc_ref_fp2_sub(temp, Q->x, P->x);
+	oqs_sidh_iqc_ref_fp2_div(lambda, lambda, temp);
+
+	oqs_sidh_iqc_ref_point_add_with_lambda(R, P, Q, lambda);
+
+	oqs_sidh_iqc_ref_fp2_clear(temp);
+	oqs_sidh_iqc_ref_fp2_clear(lambda);
+}
+
+void oqs_sidh_iqc_ref_point_sub(point_t R,
+                                const point_t P,
+                                const point_t Q,
+                                const elliptic_curve_t E) {
+	point_t temp;
+	oqs_sidh_iqc_ref_point_init(temp);
+	oqs_sidh_iqc_ref_point_negate(temp, Q);
+	oqs_sidh_iqc_ref_point_add(R, P, temp, E);
+	oqs_sidh_iqc_ref_point_clear(temp);
+}
+
+void oqs_sidh_iqc_ref_point_mul_scaler(point_t R,
+                                       const point_t P,
+                                       const mpz_t scaler,
+                                       const elliptic_curve_t E) {
+	if (mpz_cmp_ui(scaler, 0) == 0) {
+		oqs_sidh_iqc_ref_point_zero(R);
+		return;
+	}
+
+	if (mpz_cmp_ui(scaler, 1) == 0) {
+		oqs_sidh_iqc_ref_point_set(R, P);
+		return;
+	}
+
+	point_t R0;
+	point_t R1;
+
+	oqs_sidh_iqc_ref_point_init(R0);
+	oqs_sidh_iqc_ref_point_init(R1);
+	oqs_sidh_iqc_ref_point_set(R1, P);
+
+	long num_bits = mpz_sizeinbase(scaler, 2);
+	for (long i = 0; i < num_bits; i++) {
+		if (mpz_tstbit(scaler, i) == 1)
+			oqs_sidh_iqc_ref_point_add(R0, R0, R1, E);
+		oqs_sidh_iqc_ref_point_double(R1, R1, E);
+	}
+
+	if (mpz_sgn(scaler) < 0)
+		oqs_sidh_iqc_ref_point_negate(R0, R0);
+
+	oqs_sidh_iqc_ref_point_set(R, R0);
+	oqs_sidh_iqc_ref_point_clear(R0);
+	oqs_sidh_iqc_ref_point_clear(R1);
+}
+
+void oqs_sidh_iqc_ref_point_mul_scaler_si(point_t R,
+                                          const point_t P,
+                                          long scaler,
+                                          const elliptic_curve_t E) {
+	mpz_t temp;
+	mpz_init_set_si(temp, scaler);
+	oqs_sidh_iqc_ref_point_mul_scaler(R, P, temp, E);
+	mpz_clear(temp);
+}
+
+void oqs_sidh_iqc_ref_elliptic_curve_compute_j_inv(fp2_element_t j_inv,
+                                                   const elliptic_curve_t E) {
+	fp2_element_t result;
+	fp2_element_t temp;
+	oqs_sidh_iqc_ref_fp2_init(result);
+	oqs_sidh_iqc_ref_fp2_init(temp);
+
+	oqs_sidh_iqc_ref_fp2_pow_ui(temp, E->a, 3);
+	oqs_sidh_iqc_ref_fp2_mul_scaler_si(temp, temp, 4);
+	oqs_sidh_iqc_ref_fp2_square(result, E->b);
+	oqs_sidh_iqc_ref_fp2_mul_scaler_si(result, result, 27);
+	oqs_sidh_iqc_ref_fp2_add(result, result, temp);
+	oqs_sidh_iqc_ref_fp2_inv(result, result);
+	oqs_sidh_iqc_ref_fp2_mul(result, result, temp);
+	oqs_sidh_iqc_ref_fp2_mul_scaler_si(result, result, 1728);
+	oqs_sidh_iqc_ref_fp2_set(j_inv, result);
+
+	oqs_sidh_iqc_ref_fp2_clear(result);
+	oqs_sidh_iqc_ref_fp2_clear(temp);
+}
+
+int oqs_sidh_iqc_ref_point_is_on_curve(const point_t P,
+                                       const elliptic_curve_t E) {
+
+	if (oqs_sidh_iqc_ref_point_is_zero(P))
+		return 1;
+
+	fp2_element_t temp_x;
+	oqs_sidh_iqc_ref_fp2_init(temp_x);
+
+	// compute x^3 + a * x + b = x * (x^2 + a) + b
+	oqs_sidh_iqc_ref_fp2_square(temp_x, P->x);
+	oqs_sidh_iqc_ref_fp2_add(temp_x, temp_x, E->a);
+	oqs_sidh_iqc_ref_fp2_mul(temp_x, temp_x, P->x);
+	oqs_sidh_iqc_ref_fp2_add(temp_x, temp_x, E->b);
+
+	fp2_element_t temp_y;
+	oqs_sidh_iqc_ref_fp2_init(temp_y);
+	oqs_sidh_iqc_ref_fp2_square(temp_y, P->y);
+
+	int result = oqs_sidh_iqc_ref_fp2_equals(temp_y, temp_x);
+
+	oqs_sidh_iqc_ref_fp2_clear(temp_x);
+	oqs_sidh_iqc_ref_fp2_clear(temp_y);
+
+	return result;
+}
+
+void oqs_sidh_iqc_ref_elliptic_curve_random_point(point_t P,
+                                                  const elliptic_curve_t E) {
+	point_t result;
+	oqs_sidh_iqc_ref_point_init(result);
+	result->z = 1;
+
+	fp2_element_t temp_x;
+	oqs_sidh_iqc_ref_fp2_init(temp_x);
+
+	fp2_element_t temp_y;
+	oqs_sidh_iqc_ref_fp2_init(temp_y);
+
+	gmp_randstate_t randstate;
+	gmp_randinit_default(randstate);
+
+	while (1) {
+		oqs_sidh_iqc_ref_fp2_random(result->x, randstate);
+
+		// compute x^3 + a * x + b = x * (x^2 + a) + b
+		oqs_sidh_iqc_ref_fp2_square(temp_x, result->x);
+		oqs_sidh_iqc_ref_fp2_add(temp_x, temp_x, E->a);
+		oqs_sidh_iqc_ref_fp2_mul(temp_x, temp_x, result->x);
+		oqs_sidh_iqc_ref_fp2_add(temp_x, temp_x, E->b);
+
+		if (oqs_sidh_iqc_ref_fp2_is_square(temp_x)) {
+			oqs_sidh_iqc_ref_fp2_sqrt(result->y, temp_x);
+			break;
+		}
+	}
+
+	oqs_sidh_iqc_ref_point_set(P, result);
+
+	oqs_sidh_iqc_ref_point_clear(result);
+	oqs_sidh_iqc_ref_fp2_clear(temp_x);
+	oqs_sidh_iqc_ref_fp2_clear(temp_y);
+	gmp_randclear(randstate);
+}
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve.h
new file mode 100644
index 0000000000000000000000000000000000000000..3ccbcef101b228ca6c8a2d2d5ba99ddeef69140d
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve.h
@@ -0,0 +1,242 @@
+#ifndef CURVE_H
+#define CURVE_H
+
+#include "sidh_quadratic_ext.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Representation of the elliptic curve y^2 = x^3 + a * x^2 + b * x
+ */
+typedef struct {
+	fp2_element_t a;
+	fp2_element_t b;
+} elliptic_curve_struct;
+
+typedef elliptic_curve_struct elliptic_curve_t[1];
+
+/**
+ * Representation of a point in the standard affine D+(z) of the
+ * plain projective projective space
+ */
+typedef struct {
+	fp2_element_t x;
+	fp2_element_t y;
+	int z;
+} point_struct;
+
+typedef point_struct point_t[1];
+
+/**
+ * Initializes the input curve to y^2 = x^3 + x + 1.
+ * @param E
+ */
+void oqs_sidh_iqc_ref_elliptic_curve_init(elliptic_curve_t E);
+
+/**
+ * Copies T into E
+ * @param E
+ * @param T
+ */
+void oqs_sidh_iqc_ref_elliptic_curve_set(elliptic_curve_t E,
+                                         const elliptic_curve_t T);
+
+/**
+ * Sets the coefficients of E: y^2 = x^3 + a * x^2 + b * x.
+ * @param E
+ * @param a
+ * @param b
+ */
+void oqs_sidh_iqc_ref_elliptic_curve_set_coeffs(elliptic_curve_t E,
+                                                const fp2_element_t a,
+                                                const fp2_element_t b);
+
+/**
+ * Initializes the point {@code P} to the zero point (0 : 1 : 0).
+ * @param P
+ */
+void oqs_sidh_iqc_ref_point_init(point_t P);
+
+/**
+ * Sets the coordinates of the point {@code P}.
+ * @param P
+ * @param x
+ * @param y
+ * @param z
+ */
+void oqs_sidh_iqc_ref_point_set_coordinates(point_t P,
+                                            const fp2_element_t x,
+                                            const fp2_element_t y,
+                                            int z);
+
+/**
+ * Copies {@code Q} into {@code P}
+ * @param P
+ * @param Q
+ */
+void oqs_sidh_iqc_ref_point_set(point_t P,
+                                const point_t Q);
+
+/**
+ * Sets the given point to zero.
+ * @param P
+ */
+void oqs_sidh_iqc_ref_point_zero(point_t P);
+
+/**
+ * Checks if a given point is zero.
+ * @param P
+ * @return
+ */
+int oqs_sidh_iqc_ref_point_is_zero(const point_t P);
+
+/**
+ * Sets {@code P} to {@code -Q} as a group element.
+ * @param P
+ * @param Q
+ */
+void oqs_sidh_iqc_ref_point_negate(point_t P,
+                                   const point_t Q);
+
+/**
+ * Checks if 2 * {@code P} = 0.
+ * @param P
+ * @return
+ */
+int oqs_sidh_iqc_ref_point_has_order_2(const point_t P);
+
+/**
+ * Frees the memory allocated to {@code E}.
+ * @param E
+ */
+void oqs_sidh_iqc_ref_elliptic_curve_clear(elliptic_curve_t E);
+
+/**
+ * Frees the memory allocated to {@code P}.
+ * @param P
+ */
+void oqs_sidh_iqc_ref_point_clear(point_t P);
+
+/**
+ * Checks if {@code P = Q}.
+ * @param P
+ * @param Q
+ * @return 1 if the points are equal, 0 otherwise
+ */
+int oqs_sidh_iqc_ref_point_equals(const point_t P,
+                                  const point_t Q);
+
+/**
+ * @param E
+ * @return A string representation of {@code E}
+ */
+char *oqs_sidh_iqc_ref_elliptic_curve_get_str(const elliptic_curve_t E);
+
+/**
+ * @param P
+ * @return A string representation of {@code P}
+ */
+char *oqs_sidh_iqc_ref_point_get_str(const point_t P);
+
+/**
+ * Sets {@code R = P + Q} on {@code E}.
+ * @param R
+ * @param P
+ * @param Q
+ * @param E
+ */
+void oqs_sidh_iqc_ref_point_add(point_t R,
+                                const point_t P,
+                                const point_t Q,
+                                const elliptic_curve_t E);
+
+/**
+ * Sets {@code R = P - Q}.
+ * @param R
+ * @param P
+ * @param Q
+ * @param E
+ */
+void oqs_sidh_iqc_ref_point_sub(point_t R,
+                                const point_t P,
+                                const point_t Q,
+                                const elliptic_curve_t E);
+
+/**
+ * Sets {@code R = P + Q} on {@code E}.
+ * @param R
+ * @param P
+ * @param Q
+ * @param lambda The slope of the line passing through {@code P, Q}
+ */
+void oqs_sidh_iqc_ref_point_add_with_lambda(point_t R,
+                                            const point_t P,
+                                            const point_t Q,
+                                            const fp2_element_t lambda);
+
+/**
+ * Sets {@code R = 2 * P} on {@code E}.
+ * @param R
+ * @param P
+ * @param E
+ */
+void oqs_sidh_iqc_ref_point_double(point_t R,
+                                   const point_t P,
+                                   const elliptic_curve_t E);
+
+/**
+ * Sets {@code R = scaler * P} on {@code E}.
+ * @param R
+ * @param P
+ * @param scaler
+ * @param E
+ */
+void oqs_sidh_iqc_ref_point_mul_scaler(point_t R,
+                                       const point_t P,
+                                       const mpz_t scaler,
+                                       const elliptic_curve_t E);
+
+/**
+ * {@link oqs_sidh_iqc_ref_point_mul_scaler}
+ * @param R
+ * @param P
+ * @param scaler
+ * @param E
+ */
+void oqs_sidh_iqc_ref_point_mul_scaler_si(point_t R,
+                                          const point_t P,
+                                          long scaler,
+                                          const elliptic_curve_t E);
+
+/**
+ * Computes the j-invariant of {@code E}.
+ * @param j_inv
+ * @param E
+ */
+void oqs_sidh_iqc_ref_elliptic_curve_compute_j_inv(fp2_element_t j_inv,
+                                                   const elliptic_curve_t E);
+
+/**
+ * Checks if the point {@code P} is on the curve {@code E}.
+ * @param P
+ * @param E
+ * @return 1 if the point is on the curve, 0 otherwise
+ */
+int oqs_sidh_iqc_ref_point_is_on_curve(const point_t P,
+                                       const elliptic_curve_t E);
+
+/**
+ * Generates a random point on the curve {@code E}.
+ * @param P the generated random point.
+ * @param E
+ */
+void oqs_sidh_iqc_ref_elliptic_curve_random_point(point_t P,
+                                                  const elliptic_curve_t E);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CURVE_H */
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve_dlp.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve_dlp.c
new file mode 100644
index 0000000000000000000000000000000000000000..838a7dcd6a1ffbe099352f0487609a974958d5d7
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve_dlp.c
@@ -0,0 +1,97 @@
+#include "sidh_elliptic_curve_dlp.h"
+#include <stdio.h>
+
+void oqs_sidh_iqc_ref_elliptic_curve_prime_power_dlp(mpz_t x,
+                                                     const point_t P,
+                                                     const point_t Q,
+                                                     const elliptic_curve_t E,
+                                                     long l,
+                                                     long e) {
+	mpz_t exponent1;
+	mpz_t exponent2;
+	point_t temp_P;
+	point_t temp_Q;
+	point_t temp_R;
+	point_t PP;
+
+	mpz_init(exponent1);
+	mpz_init(exponent2);
+	oqs_sidh_iqc_ref_point_init(temp_P);
+	oqs_sidh_iqc_ref_point_init(temp_Q);
+	oqs_sidh_iqc_ref_point_init(temp_R);
+	oqs_sidh_iqc_ref_point_init(PP);
+
+	int ladic_rep[e];
+	mpz_ui_pow_ui(exponent1, l, e - 1);
+
+	// PP = l^(e - 1) * P once and for all
+	oqs_sidh_iqc_ref_point_mul_scaler(PP, P, exponent1, E);
+
+	// compute the first ladic coefficient
+	oqs_sidh_iqc_ref_point_mul_scaler(temp_Q, Q, exponent1, E);
+	long ladic_coeff = oqs_sidh_iqc_ref_elliptic_curve_prime_dlp(PP, temp_Q, E, l);
+
+	for (int j = 1; j < e; j++) {
+		if (ladic_coeff >= 0) {
+			ladic_rep[j - 1] = ladic_coeff;
+		} else {
+			break;
+		}
+
+		mpz_ui_pow_ui(exponent2, l, j - 1);
+		mpz_mul_ui(exponent2, exponent2, ladic_rep[j - 1]);
+		mpz_divexact_ui(exponent1, exponent1, l);
+		oqs_sidh_iqc_ref_point_mul_scaler(temp_P, P, exponent2, E);
+		oqs_sidh_iqc_ref_point_add(temp_R, temp_R, temp_P, E);
+		oqs_sidh_iqc_ref_point_sub(temp_Q, Q, temp_R, E);
+		oqs_sidh_iqc_ref_point_mul_scaler(temp_Q, temp_Q, exponent1, E);
+		ladic_coeff = oqs_sidh_iqc_ref_elliptic_curve_prime_dlp(PP, temp_Q, E, l);
+	}
+
+	if (ladic_coeff >= 0) {
+		ladic_rep[e - 1] = ladic_coeff;
+
+		// set x = l_{e - 1}l^{e - 1} + ... + l_1l + l_0
+		mpz_set_ui(x, ladic_rep[e - 1]);
+		for (long i = e - 2; i >= 0; i--) {
+			mpz_mul_ui(x, x, l);
+			mpz_add_ui(x, x, ladic_rep[i]);
+		}
+	} else {
+		mpz_set_si(x, -1);
+	}
+
+	mpz_clear(exponent1);
+	mpz_clear(exponent2);
+	oqs_sidh_iqc_ref_point_clear(temp_P);
+	oqs_sidh_iqc_ref_point_clear(temp_Q);
+	oqs_sidh_iqc_ref_point_clear(temp_R);
+	oqs_sidh_iqc_ref_point_clear(PP);
+}
+
+long oqs_sidh_iqc_ref_elliptic_curve_prime_dlp(const point_t P,
+                                               const point_t Q,
+                                               const elliptic_curve_t E,
+                                               long l) {
+	if (oqs_sidh_iqc_ref_point_is_zero(Q))
+		return 0;
+
+	if (oqs_sidh_iqc_ref_point_equals(P, Q))
+		return 1;
+
+	point_t temp;
+	oqs_sidh_iqc_ref_point_init(temp);
+	oqs_sidh_iqc_ref_point_set(temp, P);
+
+	long result = -1;
+	for (long i = 2; i < l; i++) {
+		oqs_sidh_iqc_ref_point_add(temp, temp, P, E);
+		if (oqs_sidh_iqc_ref_point_equals(temp, Q)) {
+			result = i;
+			break;
+		}
+	}
+
+	oqs_sidh_iqc_ref_point_clear(temp);
+	return result;
+}
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve_dlp.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve_dlp.h
new file mode 100644
index 0000000000000000000000000000000000000000..8d9c6c4b232d8207364802a0626bb1d11555dbe4
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_elliptic_curve_dlp.h
@@ -0,0 +1,45 @@
+#ifndef ELLIPTIC_CURVE_DLP_H
+#define ELLIPTIC_CURVE_DLP_H
+
+#include "sidh_elliptic_curve.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Computes the discrete logarithm {@code P = x * Q} in a group of order
+ * {@code l^e} generated by {@code P}. The Pohlig–Hellman algorithm is used.
+ * @param x the discrete logarithm if it exists, or -1 otherwise
+ * @param P the generator of the cyclic group
+ * @param Q an element in the the group generated by {@code P}
+ * @param E
+ * @param l a prime number
+ * @param e a positive integer
+ */
+void oqs_sidh_iqc_ref_elliptic_curve_prime_power_dlp(mpz_t x,
+                                                     const point_t P,
+                                                     const point_t Q,
+                                                     const elliptic_curve_t E,
+                                                     long l,
+                                                     long e);
+
+/**
+ * Computes the discrete logarithm {@code P = x * Q} in a group of order
+ * {@code l} generated by {@code P}.
+ * @param P the generator of the cyclic group
+ * @param Q an element in the the group generated by {@code P}
+ * @param E
+ * @param l a prime number
+ * @return the discrete logarithm if it exists, or -1 otherwise
+ */
+long oqs_sidh_iqc_ref_elliptic_curve_prime_dlp(const point_t P,
+                                               const point_t Q,
+                                               const elliptic_curve_t E,
+                                               long l);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ELLIPTIC_CURVE_DLP_H */
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_isogeny.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_isogeny.c
new file mode 100644
index 0000000000000000000000000000000000000000..e5005f8adbed475eee7502821ad72067634e128c
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_isogeny.c
@@ -0,0 +1,470 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "sidh_isogeny.h"
+#include <math.h>
+
+void oqs_sidh_iqc_ref_isogeny_init(isogeny_t isogeny,
+                                   long kernel_size) {
+	isogeny->kernel_size = 0;
+	isogeny->partition_size = 0;
+	oqs_sidh_iqc_ref_isogeny_set_kernel_size(isogeny, kernel_size);
+	long size = isogeny->partition_size;
+	isogeny->partition = (point_t *) malloc(size * sizeof(point_t));
+	isogeny->gx = (fp2_element_t *) malloc(size * sizeof(fp2_element_t));
+	isogeny->gy = (fp2_element_t *) malloc(size * sizeof(fp2_element_t));
+	isogeny->u = (fp2_element_t *) malloc(size * sizeof(fp2_element_t));
+	isogeny->v = (fp2_element_t *) malloc(size * sizeof(fp2_element_t));
+
+	oqs_sidh_iqc_ref_elliptic_curve_init(isogeny->domain);
+	oqs_sidh_iqc_ref_elliptic_curve_init(isogeny->codomain);
+
+	for (long i = 0; i < size; i++) {
+		oqs_sidh_iqc_ref_point_init(isogeny->partition[i]);
+		oqs_sidh_iqc_ref_fp2_init(isogeny->gx[i]);
+		oqs_sidh_iqc_ref_fp2_init(isogeny->gy[i]);
+		oqs_sidh_iqc_ref_fp2_init(isogeny->u[i]);
+		oqs_sidh_iqc_ref_fp2_init(isogeny->v[i]);
+	}
+}
+
+void oqs_sidh_iqc_ref_isogeny_clear(isogeny_t isogeny) {
+	oqs_sidh_iqc_ref_elliptic_curve_clear(isogeny->domain);
+	oqs_sidh_iqc_ref_elliptic_curve_clear(isogeny->codomain);
+
+	for (long i = 0; i < isogeny->partition_size; i++) {
+		oqs_sidh_iqc_ref_point_clear(isogeny->partition[i]);
+		oqs_sidh_iqc_ref_fp2_clear(isogeny->gx[i]);
+		oqs_sidh_iqc_ref_fp2_clear(isogeny->gy[i]);
+		oqs_sidh_iqc_ref_fp2_clear(isogeny->u[i]);
+		oqs_sidh_iqc_ref_fp2_clear(isogeny->v[i]);
+	}
+
+	free(isogeny->partition);
+	free(isogeny->gx);
+	free(isogeny->gy);
+	free(isogeny->u);
+	free(isogeny->v);
+}
+
+void oqs_sidh_iqc_ref_isogeny_compute(isogeny_t isogeny,
+                                      const point_t kernel_gen) {
+	oqs_sidh_iqc_ref_isogeny_partition_kernel(isogeny->partition,
+	                                          isogeny->partition_size,
+	                                          kernel_gen,
+	                                          isogeny->domain);
+	long size = isogeny->partition_size;
+
+	// compute gx_P = 3 * x_P^2 + a
+	for (long i = 0; i < size; i++) {
+		oqs_sidh_iqc_ref_fp2_square(isogeny->gx[i], isogeny->partition[i]->x);
+		oqs_sidh_iqc_ref_fp2_mul_scaler_si(isogeny->gx[i], isogeny->gx[i], 3);
+		oqs_sidh_iqc_ref_fp2_add(isogeny->gx[i], isogeny->gx[i], isogeny->domain->a);
+	}
+
+	// compute gy_P = -2y_P
+	for (long i = 0; i < size; i++) {
+		oqs_sidh_iqc_ref_fp2_mul_scaler_si(isogeny->gy[i], isogeny->partition[i]->y, -2);
+	}
+
+	// compute v_P = gx_P or 2gx_P
+	for (long i = 0; i < size; i++) {
+		if (oqs_sidh_iqc_ref_point_has_order_2(isogeny->partition[i]))
+			oqs_sidh_iqc_ref_fp2_set(isogeny->v[i], isogeny->gx[i]);
+		else
+			oqs_sidh_iqc_ref_fp2_mul_scaler_si(isogeny->v[i], isogeny->gx[i], 2);
+	}
+
+	// compute u_P = gy_P^2
+	for (long i = 0; i < size; i++) {
+		oqs_sidh_iqc_ref_fp2_square(isogeny->u[i], isogeny->gy[i]);
+	}
+
+	// compute the codomain curve
+	fp2_element_t v;
+	fp2_element_t w;
+	fp2_element_t temp;
+	oqs_sidh_iqc_ref_fp2_init(v);
+	oqs_sidh_iqc_ref_fp2_init(w);
+	oqs_sidh_iqc_ref_fp2_init(temp);
+
+	for (long i = 0; i < size; i++) {
+		oqs_sidh_iqc_ref_fp2_add(v, v, isogeny->v[i]);
+		oqs_sidh_iqc_ref_fp2_mul(temp, isogeny->v[i], isogeny->partition[i]->x);
+		oqs_sidh_iqc_ref_fp2_add(temp, isogeny->u[i], temp);
+		oqs_sidh_iqc_ref_fp2_add(w, w, temp);
+	}
+
+	oqs_sidh_iqc_ref_fp2_mul_scaler_si(v, v, 5);
+	oqs_sidh_iqc_ref_fp2_sub(v, isogeny->domain->a, v);
+	oqs_sidh_iqc_ref_fp2_mul_scaler_si(w, w, 7);
+	oqs_sidh_iqc_ref_fp2_sub(w, isogeny->domain->b, w);
+	oqs_sidh_iqc_ref_elliptic_curve_set_coeffs(isogeny->codomain, v, w);
+
+	oqs_sidh_iqc_ref_fp2_clear(v);
+	oqs_sidh_iqc_ref_fp2_clear(w);
+	oqs_sidh_iqc_ref_fp2_clear(temp);
+}
+
+void oqs_sidh_iqc_ref_isogeny_partition_kernel(point_t *partition,
+                                               long partition_size,
+                                               const point_t kernel_gen,
+                                               const elliptic_curve_t E) {
+	oqs_sidh_iqc_ref_point_set(partition[0], kernel_gen);
+	for (long i = 1; i < partition_size; i++) {
+		oqs_sidh_iqc_ref_point_add(partition[i], partition[i - 1], kernel_gen, E);
+	}
+}
+
+void oqs_sidh_iqc_ref_isogeny_set_kernel_size(isogeny_t isogeny,
+                                              long kernel_size) {
+	long current_size = isogeny->kernel_size;
+	if (current_size != 0 && current_size <= kernel_size)
+		return;
+
+	current_size = isogeny->partition_size;
+	isogeny->kernel_size = kernel_size;
+
+	if (kernel_size % 2 == 0)
+		isogeny->partition_size = kernel_size / 2;
+	else
+		isogeny->partition_size = (kernel_size - 1) / 2;
+
+	// clear the the unused memory after shrinking
+	for (long i = isogeny->partition_size; i < current_size; i++) {
+		oqs_sidh_iqc_ref_point_clear(isogeny->partition[i]);
+		oqs_sidh_iqc_ref_fp2_clear(isogeny->gx[i]);
+		oqs_sidh_iqc_ref_fp2_clear(isogeny->gy[i]);
+		oqs_sidh_iqc_ref_fp2_clear(isogeny->u[i]);
+		oqs_sidh_iqc_ref_fp2_clear(isogeny->v[i]);
+	}
+}
+
+void oqs_sidh_iqc_ref_isogeny_evaluate_velu(point_t Q,
+                                            const isogeny_t isogeny,
+                                            const point_t P) {
+
+	if (oqs_sidh_iqc_ref_point_is_zero(P)) {
+		oqs_sidh_iqc_ref_point_zero(Q);
+		return;
+	}
+
+	long size = isogeny->partition_size;
+
+	fp2_element_t temp1;
+	fp2_element_t temp2;
+	fp2_element_t temp3;
+	oqs_sidh_iqc_ref_fp2_init(temp1);
+	oqs_sidh_iqc_ref_fp2_init(temp2);
+	oqs_sidh_iqc_ref_fp2_init(temp3);
+
+	point_t result;
+	oqs_sidh_iqc_ref_point_init(result);
+	oqs_sidh_iqc_ref_point_set(result, P);
+
+	for (long i = 0; i < size; i++) {
+		oqs_sidh_iqc_ref_fp2_sub(temp1, P->x, isogeny->partition[i]->x);
+
+		// check if the point is in the kernel
+		if (oqs_sidh_iqc_ref_fp2_is_zero(temp1)) {
+			oqs_sidh_iqc_ref_point_zero(result);
+			break;
+		}
+
+		// 1 / (x - x_P)
+		oqs_sidh_iqc_ref_fp2_inv(temp1, temp1);
+
+		// add 1 / (x - x_P) * (v_P + u_P / (x - x_P)) to x
+		oqs_sidh_iqc_ref_fp2_mul(temp2, isogeny->u[i], temp1);
+		oqs_sidh_iqc_ref_fp2_add(temp2, temp2, isogeny->v[i]);
+		oqs_sidh_iqc_ref_fp2_mul(temp2, temp2, temp1);
+		oqs_sidh_iqc_ref_fp2_add(result->x, result->x, temp2);
+
+		// v_P * (y - y_P) - gx_P * gy_P
+		oqs_sidh_iqc_ref_fp2_sub(temp2, P->y, isogeny->partition[i]->y);
+		oqs_sidh_iqc_ref_fp2_mul(temp2, temp2, isogeny->v[i]);
+		oqs_sidh_iqc_ref_fp2_mul(temp3, isogeny->gx[i], isogeny->gy[i]);
+		oqs_sidh_iqc_ref_fp2_sub(temp2, temp2, temp3);
+
+		// 2 * u_P * y / (x - x_P)
+		oqs_sidh_iqc_ref_fp2_mul(temp3, isogeny->u[i], P->y);
+		oqs_sidh_iqc_ref_fp2_mul_scaler_si(temp3, temp3, 2);
+		oqs_sidh_iqc_ref_fp2_mul(temp3, temp3, temp1);
+
+		oqs_sidh_iqc_ref_fp2_add(temp3, temp3, temp2);
+		oqs_sidh_iqc_ref_fp2_square(temp1, temp1);
+		oqs_sidh_iqc_ref_fp2_mul(temp3, temp3, temp1);
+		oqs_sidh_iqc_ref_fp2_sub(result->y, result->y, temp3);
+	}
+
+	oqs_sidh_iqc_ref_point_set(Q, result);
+
+	oqs_sidh_iqc_ref_point_clear(result);
+	oqs_sidh_iqc_ref_fp2_clear(temp1);
+	oqs_sidh_iqc_ref_fp2_clear(temp2);
+	oqs_sidh_iqc_ref_fp2_clear(temp3);
+}
+
+void oqs_sidh_iqc_ref_isogeny_evaluate_kohel(point_t Q,
+                                             const isogeny_t isogeny,
+                                             const point_t P) {
+	fp2_element_t ix1;
+	fp2_element_t ix2;
+	fp2_element_t ix3;
+	fp2_element_t temp1;
+	fp2_element_t temp2;
+	fp2_element_t temp3;
+	fp2_element_t sigma1;
+
+	oqs_sidh_iqc_ref_fp2_init(ix1);
+	oqs_sidh_iqc_ref_fp2_init(ix2);
+	oqs_sidh_iqc_ref_fp2_init(ix3);
+	oqs_sidh_iqc_ref_fp2_init(temp1);
+	oqs_sidh_iqc_ref_fp2_init(temp2);
+	oqs_sidh_iqc_ref_fp2_init(temp3);
+	oqs_sidh_iqc_ref_fp2_init(sigma1);
+
+	point_t result;
+	oqs_sidh_iqc_ref_point_init(result);
+	oqs_sidh_iqc_ref_point_set(result, P);
+
+	long size = isogeny->partition_size;
+
+	for (long i = 0; i < size; i++) {
+		oqs_sidh_iqc_ref_fp2_add(sigma1, sigma1, isogeny->partition[i]->x);
+		oqs_sidh_iqc_ref_fp2_sub(temp1, P->x, isogeny->partition[i]->x);
+
+		// check if the point is in the kernel
+		if (oqs_sidh_iqc_ref_fp2_is_zero(temp1)) {
+			oqs_sidh_iqc_ref_point_zero(result);
+			break;
+		}
+
+		// 1 / (x - x_P)
+		oqs_sidh_iqc_ref_fp2_inv(temp1, temp1);
+
+		// 1 / (x - x_P)^2
+		oqs_sidh_iqc_ref_fp2_square(temp2, temp1);
+
+		// 1 / (x - x_P)^3
+		oqs_sidh_iqc_ref_fp2_mul(temp3, temp2, temp1);
+
+		if (!oqs_sidh_iqc_ref_point_has_order_2(isogeny->partition[i])) {
+			oqs_sidh_iqc_ref_fp2_add(temp1, temp1, temp1);
+			oqs_sidh_iqc_ref_fp2_add(temp2, temp2, temp2);
+			oqs_sidh_iqc_ref_fp2_add(temp3, temp3, temp3);
+			oqs_sidh_iqc_ref_fp2_add(sigma1, sigma1, isogeny->partition[i]->x);
+		}
+
+		oqs_sidh_iqc_ref_fp2_add(ix1, ix1, temp1);
+		oqs_sidh_iqc_ref_fp2_add(ix2, ix2, temp2);
+		oqs_sidh_iqc_ref_fp2_add(ix3, ix3, temp3);
+	}
+
+	if (!oqs_sidh_iqc_ref_point_is_zero(result)) {
+		fp2_element_t u1;
+		fp2_element_t u2;
+
+		oqs_sidh_iqc_ref_fp2_init(u1);
+		oqs_sidh_iqc_ref_fp2_init(u2);
+
+		// 3 * x^2 + a
+		oqs_sidh_iqc_ref_fp2_square(u1, P->x);
+		oqs_sidh_iqc_ref_fp2_mul_scaler_si(u1, u1, 3);
+		oqs_sidh_iqc_ref_fp2_add(u1, u1, isogeny->domain->a);
+
+		// 2 * y^2
+		oqs_sidh_iqc_ref_fp2_square(u2, P->y);
+		oqs_sidh_iqc_ref_fp2_mul_scaler_si(u2, u2, 2);
+
+		// compute the first coordinate
+		oqs_sidh_iqc_ref_fp2_mul_scaler_si(result->x, P->x, isogeny->kernel_size);
+		oqs_sidh_iqc_ref_fp2_sub(result->x, result->x, sigma1);
+		oqs_sidh_iqc_ref_fp2_mul(temp1, u1, ix1);
+		oqs_sidh_iqc_ref_fp2_sub(result->x, result->x, temp1);
+		oqs_sidh_iqc_ref_fp2_mul(temp1, u2, ix2);
+		oqs_sidh_iqc_ref_fp2_add(result->x, result->x, temp1);
+
+		// compute the second coordinate
+		oqs_sidh_iqc_ref_fp2_mul_scaler_si(temp1, P->x, -6);
+		oqs_sidh_iqc_ref_fp2_mul(result->y, temp1, ix1);
+		oqs_sidh_iqc_ref_fp2_add_ui(result->y, result->y, isogeny->kernel_size);
+		oqs_sidh_iqc_ref_fp2_mul_scaler_si(temp1, u1, 3);
+		oqs_sidh_iqc_ref_fp2_mul(temp1, temp1, ix2);
+		oqs_sidh_iqc_ref_fp2_add(result->y, result->y, temp1);
+		oqs_sidh_iqc_ref_fp2_mul_scaler_si(temp1, u2, -2);
+		oqs_sidh_iqc_ref_fp2_mul(temp1, temp1, ix3);
+		oqs_sidh_iqc_ref_fp2_add(result->y, result->y, temp1);
+		oqs_sidh_iqc_ref_fp2_mul(result->y, result->y, P->y);
+
+		oqs_sidh_iqc_ref_fp2_clear(u1);
+		oqs_sidh_iqc_ref_fp2_clear(u2);
+	}
+
+	oqs_sidh_iqc_ref_point_set(Q, result);
+
+	oqs_sidh_iqc_ref_point_clear(result);
+	oqs_sidh_iqc_ref_fp2_clear(ix1);
+	oqs_sidh_iqc_ref_fp2_clear(ix2);
+	oqs_sidh_iqc_ref_fp2_clear(ix3);
+	oqs_sidh_iqc_ref_fp2_clear(temp1);
+	oqs_sidh_iqc_ref_fp2_clear(temp2);
+	oqs_sidh_iqc_ref_fp2_clear(temp3);
+	oqs_sidh_iqc_ref_fp2_clear(sigma1);
+}
+
+void oqs_sidh_iqc_ref_isogeny_evaluate_naive(elliptic_curve_t E,
+                                             point_t *points,
+                                             long num_points,
+                                             const point_t kernel_gen,
+                                             long l,
+                                             long e,
+                                             long isogeny_jump) {
+
+	point_t temp_gen;
+	oqs_sidh_iqc_ref_point_init(temp_gen);
+	oqs_sidh_iqc_ref_point_set(temp_gen, kernel_gen);
+
+	mpz_t le;
+	mpz_init(le);
+	mpz_ui_pow_ui(le, l, e);
+
+	long kernel_size = 0;
+	if (e <= isogeny_jump)
+		kernel_size = mpz_get_si(le);
+	else
+		kernel_size = (long) pow(l, isogeny_jump);
+
+	isogeny_t isogeny;
+	oqs_sidh_iqc_ref_isogeny_init(isogeny, kernel_size);
+	oqs_sidh_iqc_ref_elliptic_curve_set(isogeny->domain, E);
+
+	long i = 0;
+	while (i < e) {
+		mpz_divexact_ui(le, le, kernel_size);
+		oqs_sidh_iqc_ref_isogeny_evaluate_naive_helper(isogeny,
+		                                               E,
+		                                               points,
+		                                               num_points,
+		                                               temp_gen,
+		                                               le);
+		i += isogeny_jump;
+
+		if ((e - i > 0) && (e - i) < isogeny_jump) {
+			kernel_size = (long) pow(l, e - i);
+			oqs_sidh_iqc_ref_isogeny_set_kernel_size(isogeny, kernel_size);
+		}
+	}
+
+	oqs_sidh_iqc_ref_point_clear(temp_gen);
+	mpz_clear(le);
+	oqs_sidh_iqc_ref_isogeny_clear(isogeny);
+}
+
+void oqs_sidh_iqc_ref_isogeny_evaluate_naive_curve(elliptic_curve_t E,
+                                                   const point_t kernel_gen,
+                                                   long l,
+                                                   long e,
+                                                   long isogeny_jump) {
+	oqs_sidh_iqc_ref_isogeny_evaluate_naive(E, NULL, 0, kernel_gen, l, e, isogeny_jump);
+}
+
+void oqs_sidh_iqc_ref_isogeny_evaluate_naive_helper(isogeny_t isogeny,
+                                                    elliptic_curve_t E,
+                                                    point_t *points,
+                                                    long num_points,
+                                                    point_t kernel_gen,
+                                                    const mpz_t le) {
+	point_t K;
+	oqs_sidh_iqc_ref_point_init(K);
+
+	oqs_sidh_iqc_ref_point_mul_scaler(K, kernel_gen, le, E);
+	oqs_sidh_iqc_ref_isogeny_compute(isogeny, K);
+	oqs_sidh_iqc_ref_isogeny_evaluate_kohel(kernel_gen, isogeny, kernel_gen);
+
+	for (long i = 0; i < num_points; i++) {
+		oqs_sidh_iqc_ref_isogeny_evaluate_kohel(points[i], isogeny, points[i]);
+	}
+
+	oqs_sidh_iqc_ref_elliptic_curve_set(E, isogeny->codomain);
+	oqs_sidh_iqc_ref_elliptic_curve_set(isogeny->domain, isogeny->codomain);
+
+	oqs_sidh_iqc_ref_point_clear(K);
+}
+
+void oqs_sidh_iqc_ref_isogeny_evaluate_strategy_rec(elliptic_curve_t E,
+                                                    point_t *points,
+                                                    long num_points,
+                                                    point_t *kernel_gens,
+                                                    long num_gens,
+                                                    long l,
+                                                    long e,
+                                                    float ratio) {
+
+	if (e == 1) {
+		isogeny_t isogeny;
+
+		long kernel_size = (long) pow(l, e);
+		oqs_sidh_iqc_ref_isogeny_init(isogeny, kernel_size);
+		oqs_sidh_iqc_ref_elliptic_curve_set(isogeny->domain, E);
+		oqs_sidh_iqc_ref_isogeny_compute(isogeny, kernel_gens[num_gens - 1]);
+		oqs_sidh_iqc_ref_elliptic_curve_set(E, isogeny->codomain);
+
+		for (long i = 0; i < num_points; i++) {
+			oqs_sidh_iqc_ref_isogeny_evaluate_velu(points[i], isogeny, points[i]);
+		}
+
+		for (long i = 0; i < num_gens - 1; i++) {
+			oqs_sidh_iqc_ref_isogeny_evaluate_velu(kernel_gens[i],
+			                                       isogeny,
+			                                       kernel_gens[i]);
+		}
+
+		oqs_sidh_iqc_ref_isogeny_clear(isogeny);
+		return;
+	}
+
+	long r = (long) (ratio * e);
+
+	mpz_t exponent;
+	mpz_init(exponent);
+	mpz_ui_pow_ui(exponent, l, r);
+
+	oqs_sidh_iqc_ref_point_mul_scaler(kernel_gens[num_gens],
+	                                  kernel_gens[num_gens - 1],
+	                                  exponent, E);
+
+	oqs_sidh_iqc_ref_isogeny_evaluate_strategy_rec(E, points, num_points, kernel_gens,
+	                                               num_gens + 1, l, e - r, ratio);
+	oqs_sidh_iqc_ref_isogeny_evaluate_strategy_rec(E, points, num_points, kernel_gens,
+	                                               num_gens, l, r, ratio);
+	mpz_clear(exponent);
+}
+
+void oqs_sidh_iqc_ref_isogeny_evaluate_strategy(elliptic_curve_t E,
+                                                point_t *points,
+                                                long num_points,
+                                                const point_t kernel_gen,
+                                                long l,
+                                                long e,
+                                                float ratio) {
+
+	point_t *kernel_gens = (point_t *) malloc(e * sizeof(point_t));
+	for (long i = 0; i < e; i++)
+		oqs_sidh_iqc_ref_point_init(kernel_gens[i]);
+	oqs_sidh_iqc_ref_point_set(kernel_gens[0], kernel_gen);
+
+	oqs_sidh_iqc_ref_isogeny_evaluate_strategy_rec(E, points, num_points,
+	                                               kernel_gens, 1, l, e, ratio);
+
+	for (long i = 0; i < e; i++)
+		oqs_sidh_iqc_ref_point_clear(kernel_gens[i]);
+	free(kernel_gens);
+}
+
+void oqs_sidh_iqc_ref_isogeny_evaluate_strategy_curve(elliptic_curve_t E,
+                                                      const point_t kernel_gen,
+                                                      long l,
+                                                      long e,
+                                                      float ratio) {
+	oqs_sidh_iqc_ref_isogeny_evaluate_strategy(E, NULL, 0, kernel_gen, l, e, ratio);
+}
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_isogeny.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_isogeny.h
new file mode 100644
index 0000000000000000000000000000000000000000..a1f26122c7dd19079a90a3e97be31bc7f711fdb2
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_isogeny.h
@@ -0,0 +1,215 @@
+#ifndef ISOGENY_H
+#define ISOGENY_H
+
+#include "sidh_elliptic_curve.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Representation of an isogeny between two elliptic curve
+ */
+typedef struct {
+	// Let the kernel K of the isogeny (excluding the zero point) be the union
+	// of F and G such that R \in F if and only if -R \in G for all points
+	// R \in K. Then the partition is F.
+	point_t *partition;
+	fp2_element_t *gx;
+	fp2_element_t *gy;
+	fp2_element_t *u;
+	fp2_element_t *v;
+	elliptic_curve_t domain;
+	elliptic_curve_t codomain;
+	long partition_size;
+	long kernel_size;
+} isogeny_struct;
+
+typedef isogeny_struct isogeny_t[1];
+
+/**
+ * Initializes the isogeny {@code isogeny}.
+ * @param isogeny
+ * @param kernel_size
+ */
+void oqs_sidh_iqc_ref_isogeny_init(isogeny_t isogeny,
+                                   long kernel_size);
+
+/**
+ * Frees the memory allocated to {@code isogeny}.
+ * @param isogeny
+ */
+void oqs_sidh_iqc_ref_isogeny_clear(isogeny_t isogeny);
+
+/**
+ * Computes the isogeny from the kernel generated by {@code kernel_gen}.
+ * @param isogeny
+ * @param kernel_gen
+ */
+void oqs_sidh_iqc_ref_isogeny_compute(isogeny_t isogeny,
+                                      const point_t kernel_gen);
+
+/**
+ * Evaluates {@code isogeny} at the point {@code P}, using Velu's formulas.
+ * @param Q The result of the evaluation {@code isogeny(P)}
+ * @param isogeny
+ * @param P
+ */
+void oqs_sidh_iqc_ref_isogeny_evaluate_velu(point_t Q,
+                                            const isogeny_t isogeny,
+                                            const point_t P);
+
+/**
+ * Evaluates {@code isogeny} at the point {@code P}, using Kohel's formulas.
+ * @param Q The result of the evaluation {@code isogeny(P)}
+ * @param isogeny
+ * @param P
+ */
+void oqs_sidh_iqc_ref_isogeny_evaluate_kohel(point_t Q,
+                                             const isogeny_t isogeny,
+                                             const point_t P);
+
+/**
+ * Computes the partition for the isogeny generated by {@code kernel_gen}.
+ * @see isogeny_struct.
+ * @param partition
+ * @param partition_size
+ * @param kernel_gen
+ * @param E
+ */
+void oqs_sidh_iqc_ref_isogeny_partition_kernel(point_t *partition,
+                                               long partition_size,
+                                               const point_t kernel_gen,
+                                               const elliptic_curve_t E);
+
+/**
+ * Sets the kernel size for {@code isogeny}. The new kernel size is assumed
+ * to be smaller than the current kernel size.
+ * @param isogeny
+ * @param kernel_size
+ */
+void oqs_sidh_iqc_ref_isogeny_set_kernel_size(isogeny_t isogeny,
+                                              long kernel_size);
+
+/**
+ * Computes the images of the elliptic curve {@code E} and the points
+ * {@code points} through the isogeny with kernel generated by the point
+ * {@code kernel_gen}. The size of the kernel is {@code l^e}.
+ * @param E
+ * @param points
+ * @param num_points
+ * @param kernel_gen
+ * @param l
+ * @param e the length of the chain of l-isogenies
+ * @param isogeny_jump the number of successive l-isogenies that should
+ * be computed at once. For example, if {@code isogeny_jump = 2} then a
+ * chain of l-isogenies of length e is computed by doing e / 2 {l^2-isogenies}.
+ */
+void oqs_sidh_iqc_ref_isogeny_evaluate_naive(elliptic_curve_t E,
+                                             point_t *points,
+                                             long num_points,
+                                             const point_t kernel_gen,
+                                             long l,
+                                             long e,
+                                             long isogeny_jump);
+
+/**
+ * Computes the images of the elliptic curve {@code E} through the isogeny
+ * with kernel generated by the point {@code kernel_gen}.
+ * {@link oqs_sidh_iqc_ref_isogeny_evaluate_naive}
+ * @param E
+ * @param kernel_gen
+ * @param l
+ * @param e
+ * @param isogeny_jump
+ */
+void oqs_sidh_iqc_ref_isogeny_evaluate_naive_curve(elliptic_curve_t E,
+                                                   const point_t kernel_gen,
+                                                   long l,
+                                                   long e,
+                                                   long isogeny_jump);
+
+/**
+ * A helper method for {@link oqs_sidh_iqc_ref_isogeny_evaluate_naive}. All the arguments except
+ * {@code num_points, le} will be pushed through the isogeny. For example
+ * {@code E} will be the codomain of the isogeny. This method should not be
+ * called directly.
+ * @param isogeny
+ * @param E
+ * @param points
+ * @param num_points
+ * @param kernel_gen
+ * @param le
+ */
+void oqs_sidh_iqc_ref_isogeny_evaluate_naive_helper(isogeny_t isogeny,
+                                                    elliptic_curve_t E,
+                                                    point_t *points,
+                                                    long num_points,
+                                                    point_t kernel_gen,
+                                                    const mpz_t le);
+
+/**
+ * The recursion for {@link oqs_sidh_iqc_ref_isogeny_evaluate_strategy}.
+ * @param E
+ * @param points see {@link oqs_sidh_iqc_ref_isogeny_evaluate_strategy}
+ * @param num_points see {@link oqs_sidh_iqc_ref_isogeny_evaluate_strategy}
+ * @param kernel_gens contains the previous kernels computed while going down
+ * the recursion tree.
+ * @param num_gens number of elements in {@code kernel_gens}
+ * @param l
+ * @param e
+ * @param ratio see {@link oqs_sidh_iqc_ref_isogeny_evaluate_strategy}
+ */
+void oqs_sidh_iqc_ref_isogeny_evaluate_strategy_rec(elliptic_curve_t E,
+                                                    point_t *points,
+                                                    long num_points,
+                                                    point_t *kernel_gens,
+                                                    long num_gens,
+                                                    long l,
+                                                    long e,
+                                                    float ratio);
+
+/**
+ * This method implements the optimal strategy approach proposed in the paper
+ * De Feo, Luca, David Jao, and Jérôme Plût. "Towards quantum-resistant
+ * cryptosystems from supersingular elliptic curve isogenies".
+ * @param E
+ * @param points the points to be evaluated through the isogeny
+ * @param num_points number of points in {@code points}
+ * @param kernel_gen the generator of the kernel of the isogeny
+ * @param l
+ * @param e
+ * @param ratio a float in the range (0, 1). This indicates the portions of
+ * the computation that is done through point multiplication and isogeny
+ * evaluation. The larger values of {@code ratio} means more multiplication
+ * and less isogeny evaluation.
+ */
+void oqs_sidh_iqc_ref_isogeny_evaluate_strategy(elliptic_curve_t E,
+                                                point_t *points,
+                                                long num_points,
+                                                const point_t kernel_gen,
+                                                long l,
+                                                long e,
+                                                float ratio);
+
+/**
+ * The same as {@link oqs_sidh_iqc_ref_isogeny_evaluate_strategy} except there is no point
+ * to evaluate through the isogeny. This method simply calls
+ * {@link oqs_sidh_iqc_ref_isogeny_evaluate_strategy} with {@code points = NULL, num_points = 0}.
+ * @param E
+ * @param kernel_gen
+ * @param l
+ * @param e
+ * @param ratio
+ */
+void oqs_sidh_iqc_ref_isogeny_evaluate_strategy_curve(elliptic_curve_t E,
+                                                      const point_t kernel_gen,
+                                                      long l,
+                                                      long e,
+                                                      float ratio);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ISOGENY_H */
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_private_key.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_private_key.c
new file mode 100644
index 0000000000000000000000000000000000000000..007bb70018530028a22b9b98b1eebee3cd73e7f1
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_private_key.c
@@ -0,0 +1,85 @@
+#include "sidh_private_key.h"
+#include "sidh_util.h"
+#include "sidh_public_param.h"
+#include <stdio.h>
+
+void oqs_sidh_iqc_ref_private_key_init(private_key_t private_key) {
+	mpz_inits(private_key->m, private_key->n, NULL);
+}
+
+void oqs_sidh_iqc_ref_private_key_clear(private_key_t private_key) {
+	mpz_clears(private_key->m, private_key->n, NULL);
+}
+
+void oqs_sidh_iqc_ref_private_key_generate(private_key_t private_key,
+                                           const public_params_t params) {
+	gmp_randstate_t randstate;
+	gmp_randinit_default(randstate);
+	mpz_t seed;
+	mpz_init(seed);
+	oqs_sidh_iqc_ref_get_random_mpz(seed);
+	gmp_randseed(randstate, seed);
+
+	while (1) {
+		mpz_urandomm(private_key->m, randstate, params->le);
+		mpz_urandomm(private_key->n, randstate, params->le);
+
+		if (!mpz_divisible_ui_p(private_key->m, params->l))
+			break;
+
+		if (!mpz_divisible_ui_p(private_key->n, params->l)) {
+			mpz_swap(private_key->m, private_key->n);
+			break;
+		}
+	}
+
+	gmp_randclear(randstate);
+	mpz_clear(seed);
+}
+
+void oqs_sidh_iqc_ref_private_key_compute_kernel_gen(
+    point_t gen, const private_key_t private_key, const point_t P,
+    const point_t Q, const mpz_t le, const elliptic_curve_t E) {
+	mpz_t temp_m;
+	mpz_t temp_n;
+	mpz_init_set(temp_m, private_key->m);
+	mpz_init_set(temp_n, private_key->n);
+
+	point_t result;
+	oqs_sidh_iqc_ref_point_init(result);
+
+	mpz_invert(temp_m, temp_m, le);
+	mpz_mul(temp_n, temp_m, temp_n);
+	mpz_mod(temp_n, temp_n, le);
+
+	oqs_sidh_iqc_ref_point_mul_scaler(result, Q, temp_n, E);
+	oqs_sidh_iqc_ref_point_add(result, result, P, E);
+	oqs_sidh_iqc_ref_point_set(gen, result);
+
+	mpz_clears(temp_m, temp_n, NULL);
+	oqs_sidh_iqc_ref_point_clear(result);
+}
+
+void oqs_sidh_iqc_ref_private_key_print(const private_key_t private_key) {
+	printf("m: %s\n", mpz_get_str(NULL, 10, private_key->m));
+	printf("n: %s\n", mpz_get_str(NULL, 10, private_key->n));
+}
+
+void oqs_sidh_iqc_ref_private_key_to_bytes(uint8_t *bytes,
+                                           const private_key_t private_key,
+                                           long prime_size) {
+	for (long i = 0; i < 2 * prime_size; i++)
+		bytes[i] = 0;
+
+	mpz_export(bytes, NULL, -1, 1, 0, 0, private_key->m);
+	mpz_export(bytes + prime_size, NULL, -1, 1, 0, 0, private_key->n);
+}
+
+void oqs_sidh_iqc_ref_bytes_to_private_key(private_key_t private_key,
+                                           const uint8_t *bytes,
+                                           long prime_size) {
+	mpz_set_ui(private_key->m, 0);
+	mpz_set_ui(private_key->n, 0);
+	mpz_import(private_key->m, prime_size, -1, 1, 0, 0, bytes);
+	mpz_import(private_key->n, prime_size, -1, 1, 0, 0, bytes + prime_size);
+}
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_private_key.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_private_key.h
new file mode 100644
index 0000000000000000000000000000000000000000..b8ca5a10d78bff9790cb12420c478054b95803de
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_private_key.h
@@ -0,0 +1,89 @@
+#ifndef PRIVATE_KEY_H
+#define PRIVATE_KEY_H
+
+#include "sidh_elliptic_curve.h"
+#include "sidh_public_param.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Representation of the private key in oqs_sidh_iqc_ref
+ */
+typedef struct {
+	mpz_t m;
+	mpz_t n;
+} private_key_struct;
+
+typedef private_key_struct private_key_t[1];
+
+/**
+ * Initializes the private-key.
+ * @param private_key
+ */
+void oqs_sidh_iqc_ref_private_key_init(private_key_t private_key);
+
+/**
+ * Frees the memory allocated to the private-key.
+ * @param private_key
+ */
+void oqs_sidh_iqc_ref_private_key_clear(private_key_t private_key);
+
+/**
+ * Generates the private-key. It is guaranteed that {@code private_key->m}
+ * is comprime to {@code params->l}.
+ * @param private_key
+ * @param params
+ */
+void oqs_sidh_iqc_ref_private_key_generate(private_key_t private_key,
+                                           const public_params_t params);
+
+/**
+ * Computes a generator for th kernel generated by {@code gen = m * P + n * Q}.
+ * It is assumed that {@code m} is invertible modulo {@code le}.
+ * @param gen
+ * @param P one of the generators of the l^e torsion.
+ * @param Q one of the generators of the l^e torsion.
+ * @param private_key
+ * @param le
+ * @param E
+ */
+void oqs_sidh_iqc_ref_private_key_compute_kernel_gen(point_t gen,
+                                                     const private_key_t private_key,
+                                                     const point_t P,
+                                                     const point_t Q,
+                                                     const mpz_t le,
+                                                     const elliptic_curve_t E);
+
+/**
+ * Converts a private-key to an array of bytes.
+ * @param bytes
+ * @param private_key
+ * @param prime_size
+ */
+void oqs_sidh_iqc_ref_private_key_to_bytes(uint8_t *bytes,
+                                           const private_key_t private_key,
+                                           long prime_size);
+
+/**
+ * Converts an array of bytes to a private-key.
+ * @param private_key
+ * @param bytes
+ * @param prime_size
+ */
+void oqs_sidh_iqc_ref_bytes_to_private_key(private_key_t private_key,
+                                           const uint8_t *bytes,
+                                           long prime_size);
+
+/**
+ * Prints {@code private_key} to the standard output.
+ * @param private_key
+ */
+void oqs_sidh_iqc_ref_private_key_print(const private_key_t private_key);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PRIVATE_KEY_H */
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key.c
new file mode 100644
index 0000000000000000000000000000000000000000..77877a66979b0b21646f98391dfeff8756cc68b1
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key.c
@@ -0,0 +1,96 @@
+#include "sidh_public_key.h"
+#include "sidh_isogeny.h"
+#include "sidh_private_key.h"
+#include <stdio.h>
+#include <math.h>
+
+void oqs_sidh_iqc_ref_public_key_init(public_key_t public_key) {
+	oqs_sidh_iqc_ref_elliptic_curve_init(public_key->E);
+	oqs_sidh_iqc_ref_point_init(public_key->P);
+	oqs_sidh_iqc_ref_point_init(public_key->Q);
+}
+
+void oqs_sidh_iqc_ref_public_key_clear(public_key_t public_key) {
+	oqs_sidh_iqc_ref_elliptic_curve_clear(public_key->E);
+	oqs_sidh_iqc_ref_point_clear(public_key->P);
+	oqs_sidh_iqc_ref_point_clear(public_key->Q);
+}
+
+void oqs_sidh_iqc_ref_public_key_generate(public_key_t public_key,
+                                          const point_t kernel_gen,
+                                          const public_params_t paramsA,
+                                          const public_params_t paramsB) {
+
+	point_t points[2];
+	oqs_sidh_iqc_ref_point_init(points[0]);
+	oqs_sidh_iqc_ref_point_init(points[1]);
+
+	oqs_sidh_iqc_ref_elliptic_curve_set(public_key->E, paramsA->E);
+	oqs_sidh_iqc_ref_point_set(points[0], paramsB->P);
+	oqs_sidh_iqc_ref_point_set(points[1], paramsB->Q);
+
+	oqs_sidh_iqc_ref_isogeny_evaluate_strategy(public_key->E,
+	                                           points,
+	                                           2,
+	                                           kernel_gen,
+	                                           paramsA->l,
+	                                           paramsA->e,
+	                                           0.5);
+
+	//        oqs_sidh_iqc_ref_isogeny_evaluate_naive(public_key->E,
+	//                               points,
+	//                               2,
+	//                               kernel_gen,
+	//                               paramsA->l,
+	//                               paramsA->e,
+	//                               10);
+
+	oqs_sidh_iqc_ref_point_set(public_key->P, points[0]);
+	oqs_sidh_iqc_ref_point_set(public_key->Q, points[1]);
+
+	oqs_sidh_iqc_ref_point_clear(points[0]);
+	oqs_sidh_iqc_ref_point_clear(points[1]);
+}
+
+void oqs_sidh_iqc_ref_public_key_print(const public_key_t public_key) {
+	printf("E: %s\n", oqs_sidh_iqc_ref_elliptic_curve_get_str(public_key->E));
+	printf("P: %s\n", oqs_sidh_iqc_ref_point_get_str(public_key->P));
+	printf("Q: %s\n", oqs_sidh_iqc_ref_point_get_str(public_key->Q));
+}
+
+void oqs_sidh_iqc_ref_public_key_to_bytes(uint8_t *bytes,
+                                          const public_key_t public_key,
+                                          long prime_size) {
+	long index = 0;
+	oqs_sidh_iqc_ref_fp2_to_bytes(bytes + index, public_key->E->a, prime_size);
+	index += 2 * prime_size;
+	oqs_sidh_iqc_ref_fp2_to_bytes(bytes + index, public_key->E->b, prime_size);
+	index += 2 * prime_size;
+	oqs_sidh_iqc_ref_fp2_to_bytes(bytes + index, public_key->P->x, prime_size);
+	index += 2 * prime_size;
+	oqs_sidh_iqc_ref_fp2_to_bytes(bytes + index, public_key->P->y, prime_size);
+	index += 2 * prime_size;
+	oqs_sidh_iqc_ref_fp2_to_bytes(bytes + index, public_key->Q->x, prime_size);
+	index += 2 * prime_size;
+	oqs_sidh_iqc_ref_fp2_to_bytes(bytes + index, public_key->Q->y, prime_size);
+}
+
+void oqs_sidh_iqc_ref_bytes_to_public_key(public_key_t public_key,
+                                          const uint8_t *bytes,
+                                          long prime_size) {
+	long index = 0;
+	oqs_sidh_iqc_ref_bytes_to_fp2(public_key->E->a, bytes + index, prime_size);
+	index += 2 * prime_size;
+	oqs_sidh_iqc_ref_bytes_to_fp2(public_key->E->b, bytes + index, prime_size);
+	index += 2 * prime_size;
+	oqs_sidh_iqc_ref_bytes_to_fp2(public_key->P->x, bytes + index, prime_size);
+	index += 2 * prime_size;
+	oqs_sidh_iqc_ref_bytes_to_fp2(public_key->P->y, bytes + index, prime_size);
+	index += 2 * prime_size;
+	oqs_sidh_iqc_ref_bytes_to_fp2(public_key->Q->x, bytes + index, prime_size);
+	index += 2 * prime_size;
+	oqs_sidh_iqc_ref_bytes_to_fp2(public_key->Q->y, bytes + index, prime_size);
+
+	public_key->P->z = 1;
+	public_key->Q->z = 1;
+}
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key.h
new file mode 100644
index 0000000000000000000000000000000000000000..5b55a4e49c179ef2c03782c924d876240b683ac6
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key.h
@@ -0,0 +1,77 @@
+#ifndef PUBLIC_KEY_H
+#define PUBLIC_KEY_H
+
+#include "sidh_public_param.h"
+#include "sidh_private_key.h"
+#include "sidh_isogeny.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Representation of the public key in oqs_sidh_iqc_ref
+ */
+typedef struct {
+	elliptic_curve_t E;
+	point_t P;
+	point_t Q;
+} public_key_struct;
+
+typedef public_key_struct public_key_t[1];
+
+/**
+ * Initializes the public-key.
+ * @param public_key
+ */
+void oqs_sidh_iqc_ref_public_key_init(public_key_t public_key);
+
+/**
+ * Frees the memory allocated to the public-key.
+ * @param public_key
+ */
+void oqs_sidh_iqc_ref_public_key_clear(public_key_t public_key);
+
+/**
+ * Generates the public-key
+ * @param public_key
+ * @param kernel_gen a generator for the kernel of the isogeny
+ * @param paramsA own params
+ * @param paramsB other's params
+ */
+void oqs_sidh_iqc_ref_public_key_generate(public_key_t public_key,
+                                          const point_t kernel_gen,
+                                          const public_params_t paramsA,
+                                          const public_params_t paramsB);
+
+/**
+ * Prints {@code public_key} to the standard output.
+ * @param public_key
+ */
+void oqs_sidh_iqc_ref_public_key_print(const public_key_t public_key);
+
+/**
+ * Converts a public-key to a byte array.
+ * @param bytes
+ * @param public_key
+ * @param prime_size
+ */
+void oqs_sidh_iqc_ref_public_key_to_bytes(uint8_t *bytes,
+                                          const public_key_t public_key,
+                                          long prime_size);
+
+/**
+ * Converts a byte array to a public-key.
+ * @param public_key
+ * @param bytes
+ * @param prime_size
+ */
+void oqs_sidh_iqc_ref_bytes_to_public_key(public_key_t public_key,
+                                          const uint8_t *bytes,
+                                          long prime_size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PUBLIC_KEY_H */
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_encryption.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_encryption.c
new file mode 100644
index 0000000000000000000000000000000000000000..ff43ea94c2a245579f08c10222ca7a1f2738c7c2
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_encryption.c
@@ -0,0 +1,141 @@
+#include "sidh_public_key_encryption.h"
+#include "sidh_public_key.h"
+#include "sidh_util.h"
+#include "sidh_shared_key.h"
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+void oqs_sidh_iqc_ref_public_key_ciphertext_init(ciphertext_t ciphertext) {
+	oqs_sidh_iqc_ref_elliptic_curve_init(ciphertext->E);
+	oqs_sidh_iqc_ref_point_init(ciphertext->P);
+	oqs_sidh_iqc_ref_point_init(ciphertext->Q);
+	ciphertext->size = 0;
+}
+
+void oqs_sidh_iqc_ref_public_key_ciphertext_clear(ciphertext_t ciphertext) {
+	oqs_sidh_iqc_ref_elliptic_curve_clear(ciphertext->E);
+	oqs_sidh_iqc_ref_point_clear(ciphertext->P);
+	oqs_sidh_iqc_ref_point_clear(ciphertext->Q);
+	free(ciphertext->content);
+	ciphertext->size = 0;
+}
+
+void oqs_sidh_iqc_ref_public_key_plaintext_init(plaintext_t plaintext) {
+	plaintext->size = 0;
+}
+
+void oqs_sidh_iqc_ref_public_key_plaintext_clear(plaintext_t plaintext) {
+	plaintext->size = 0;
+}
+
+int oqs_sidh_iqc_ref_public_key_pad_plaintext(plaintext_t result,
+                                              const plaintext_t raw) {
+	long key_size = oqs_sidh_iqc_ref_public_key_get_key_size();
+	long max_msg_size = key_size - 1;
+
+	if (raw->size > key_size) {
+		printf("\nMessage too large. It should be less than %ld bytes.\n",
+		       max_msg_size);
+		return -1;
+	}
+
+	// pad the message
+	char *new_content = (char *) malloc(max_msg_size);
+	memset(new_content, 0, max_msg_size);
+	memcpy(new_content, raw->content, raw->size);
+
+	result->content = new_content;
+	result->size = max_msg_size;
+
+	return 1;
+}
+
+void oqs_sidh_iqc_ref_public_key_encrypt(ciphertext_t ciphertext,
+                                         const plaintext_t plaintext,
+                                         const public_key_t public_keyA,
+                                         const public_params_t paramsA,
+                                         const public_params_t paramsB) {
+
+	private_key_t private_key_temp;
+	oqs_sidh_iqc_ref_private_key_init(private_key_temp);
+	oqs_sidh_iqc_ref_private_key_generate(private_key_temp, paramsB);
+
+	point_t kernel_gen;
+	oqs_sidh_iqc_ref_point_init(kernel_gen);
+	oqs_sidh_iqc_ref_private_key_compute_kernel_gen(kernel_gen,
+	                                                private_key_temp,
+	                                                paramsB->P,
+	                                                paramsB->Q,
+	                                                paramsB->le,
+	                                                paramsB->E);
+
+	public_key_t public_key_temp;
+	oqs_sidh_iqc_ref_public_key_init(public_key_temp);
+	oqs_sidh_iqc_ref_public_key_generate(public_key_temp, kernel_gen, paramsB, paramsA);
+
+	fp2_element_t shared_key;
+	oqs_sidh_iqc_ref_fp2_init(shared_key);
+	oqs_sidh_iqc_ref_shared_key_generate(shared_key, public_keyA, private_key_temp, paramsB);
+	char *hash = oqs_sidh_iqc_ref_public_key_encryption_hash(shared_key, plaintext->size);
+
+	ciphertext->content = oqs_sidh_iqc_ref_array_xor(plaintext->content,
+	                                                 hash, plaintext->size);
+	ciphertext->size = plaintext->size;
+	oqs_sidh_iqc_ref_elliptic_curve_set(ciphertext->E, public_key_temp->E);
+	oqs_sidh_iqc_ref_point_set(ciphertext->P, public_key_temp->P);
+	oqs_sidh_iqc_ref_point_set(ciphertext->Q, public_key_temp->Q);
+
+	oqs_sidh_iqc_ref_private_key_clear(private_key_temp);
+	oqs_sidh_iqc_ref_point_clear(kernel_gen);
+	oqs_sidh_iqc_ref_public_key_clear(public_key_temp);
+	oqs_sidh_iqc_ref_fp2_clear(shared_key);
+	free(hash);
+}
+
+void oqs_sidh_iqc_ref_public_key_decrypt(plaintext_t plaintext,
+                                         const ciphertext_t ciphertext,
+                                         const private_key_t private_keyA,
+                                         const public_params_t paramsA) {
+
+	public_key_t public_key_temp;
+	oqs_sidh_iqc_ref_public_key_init(public_key_temp);
+	oqs_sidh_iqc_ref_elliptic_curve_set(public_key_temp->E, ciphertext->E);
+	oqs_sidh_iqc_ref_point_set(public_key_temp->P, ciphertext->P);
+	oqs_sidh_iqc_ref_point_set(public_key_temp->Q, ciphertext->Q);
+
+	fp2_element_t shared_key;
+	oqs_sidh_iqc_ref_fp2_init(shared_key);
+	oqs_sidh_iqc_ref_shared_key_generate(shared_key, public_key_temp, private_keyA, paramsA);
+	char *hash = oqs_sidh_iqc_ref_public_key_encryption_hash(shared_key, ciphertext->size);
+
+	plaintext->content = oqs_sidh_iqc_ref_array_xor(ciphertext->content, hash,
+	                                                ciphertext->size);
+	plaintext->size = ciphertext->size;
+
+	oqs_sidh_iqc_ref_public_key_clear(public_key_temp);
+	oqs_sidh_iqc_ref_fp2_clear(shared_key);
+	free(hash);
+}
+
+const mp_limb_t *mpz_limbs_read(const mpz_t x);
+
+char *oqs_sidh_iqc_ref_public_key_encryption_hash(const fp2_element_t value,
+                                                  long size) {
+	// compute the size of value in chars
+	long size_a = mpz_size(value->a) * sizeof(mp_limb_t);
+	long size_b = mpz_size(value->b) * sizeof(mp_limb_t);
+
+	char *hash = (char *) malloc(size);
+
+	memcpy(hash, (char *) mpz_limbs_read(value->a), size_a);
+	memcpy(hash + size_a, (char *) mpz_limbs_read(value->b), size_b);
+
+	return hash;
+}
+
+long oqs_sidh_iqc_ref_public_key_get_key_size() {
+	// the key size is twice as large as the base prime.
+	long key_size = 2 * mpz_size(characteristic) * sizeof(mp_limb_t);
+	return key_size;
+}
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_encryption.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_encryption.h
new file mode 100644
index 0000000000000000000000000000000000000000..c9a97107f0795d6f7125a4e591e69ca5e4b230d0
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_encryption.h
@@ -0,0 +1,115 @@
+#ifndef PUBLIC_KEY_ENCRYPTION_H
+#define PUBLIC_KEY_ENCRYPTION_H
+
+#include "sidh_elliptic_curve.h"
+#include "sidh_public_key.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Representation of ciphertext in oqs_sidh_iqc_ref
+ */
+typedef struct {
+	elliptic_curve_t E;
+	point_t P;
+	point_t Q;
+	char *content;
+
+	// size of the content field
+	long size;
+} ciphertext_struct;
+
+/**
+ * Representation of plaintext in oqs_sidh_iqc_ref
+ */
+typedef struct {
+	char *content;
+
+	// size of the content field
+	long size;
+} plaintext_struct;
+
+typedef ciphertext_struct ciphertext_t[1];
+typedef plaintext_struct plaintext_t[1];
+
+/**
+ * Initializes the ciphertext.
+ * @param ciphertext
+ */
+void oqs_sidh_iqc_ref_public_key_ciphertext_init(ciphertext_t ciphertext);
+
+/**
+ * Frees the memory allocated to {@code ciphertext}.
+ * @param ciphertext
+ */
+void oqs_sidh_iqc_ref_public_key_ciphertext_clear(ciphertext_t ciphertext);
+
+/**
+ * Initializes the plaintext.
+ * @param plaintext
+ */
+void oqs_sidh_iqc_ref_public_key_plaintext_init(plaintext_t plaintext);
+
+/**
+ * Frees the memory allocated to {@code plaintext}.
+ * @param plaintext
+ */
+void oqs_sidh_iqc_ref_public_key_plaintext_clear(plaintext_t plaintext);
+
+/**
+ * Pads a given plain text for encryption.
+ * @param result the prepared plaintext
+ * @param raw the given plaintext
+ * @return 1 if successful, and -1 otherwise
+ */
+int oqs_sidh_iqc_ref_public_key_pad_plaintext(plaintext_t result,
+                                              const plaintext_t raw);
+
+/**
+ * Encrypts the {@code plaintext} using {@code public_key}.
+ * @param ciphertext the generated cipher
+ * @param plaintext
+ * @param public_keyA other's public-key
+ * @param paramsA other's public params
+ * @param paramsB own pubic params
+ */
+void oqs_sidh_iqc_ref_public_key_encrypt(ciphertext_t ciphertext,
+                                         const plaintext_t plaintext,
+                                         const public_key_t public_keyA,
+                                         const public_params_t paramsA,
+                                         const public_params_t paramsB);
+
+/**
+ * Decrypts the {@code ciphertext} using {@code private_key}.
+ * @param plaintext the result
+ * @param ciphertext the given ciphertext
+ * @param private_keyA
+ * @param paramsA the public parameters associated to the owner of
+ * the private-key
+ */
+void oqs_sidh_iqc_ref_public_key_decrypt(plaintext_t plaintext,
+                                         const ciphertext_t ciphertext,
+                                         const private_key_t private_keyA,
+                                         const public_params_t paramsA);
+
+/**
+ * Computes the hash of {@code value}
+ * @param value
+ * @param size size of the output hash
+ * @return the hash
+ */
+char *oqs_sidh_iqc_ref_public_key_encryption_hash(const fp2_element_t value,
+                                                  long size);
+
+/**
+ * @return the key-size in bytes
+ */
+long oqs_sidh_iqc_ref_public_key_get_key_size();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PUBLIC_KEY_ENCRYPTION_H */
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_validation.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_validation.c
new file mode 100644
index 0000000000000000000000000000000000000000..d90f56c5ae3cb15619007fc2443eab2d4982b93e
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_validation.c
@@ -0,0 +1,89 @@
+#include "sidh_public_key_validation.h"
+#include "sidh_elliptic_curve_dlp.h"
+#include <stdio.h>
+
+int oqs_sidh_iqc_ref_public_key_is_valid(const public_key_t public_key,
+                                         const public_params_t params) {
+	if (!oqs_sidh_iqc_ref_public_key_check_order(public_key->P, public_key->E, params))
+		return 0;
+
+	if (!oqs_sidh_iqc_ref_public_key_check_order(public_key->Q, public_key->E, params))
+		return 0;
+
+	if (!oqs_sidh_iqc_ref_public_key_check_dependency(public_key, params))
+		return 0;
+
+	if (!oqs_sidh_iqc_ref_public_key_check_curve(public_key->E))
+		return 0;
+
+	return 1;
+}
+
+int oqs_sidh_iqc_ref_public_key_check_order(const point_t P,
+                                            const elliptic_curve_t E,
+                                            const public_params_t params) {
+	mpz_t order;
+	point_t temp;
+
+	mpz_init_set(order, params->le);
+	oqs_sidh_iqc_ref_point_init(temp);
+
+	int result = 0;
+	mpz_divexact_ui(order, order, params->l);
+	oqs_sidh_iqc_ref_point_mul_scaler(temp, P, order, E);
+	if (!oqs_sidh_iqc_ref_point_is_zero(temp)) {
+		oqs_sidh_iqc_ref_point_mul_scaler_si(temp, temp, params->l, E);
+		if (oqs_sidh_iqc_ref_point_is_zero(temp))
+			result = 1;
+	}
+
+	mpz_clear(order);
+	oqs_sidh_iqc_ref_point_clear(temp);
+	return result;
+}
+
+int oqs_sidh_iqc_ref_public_key_check_dependency(const public_key_t public_key,
+                                                 const public_params_t params) {
+	mpz_t x;
+	mpz_init(x);
+
+	int result = 0;
+	oqs_sidh_iqc_ref_elliptic_curve_prime_power_dlp(x,
+	                                                public_key->P,
+	                                                public_key->Q,
+	                                                public_key->E,
+	                                                params->l,
+	                                                params->e);
+
+	if (mpz_cmp_si(x, -1) == 0) {
+		oqs_sidh_iqc_ref_elliptic_curve_prime_power_dlp(x,
+		                                                public_key->Q,
+		                                                public_key->P,
+		                                                public_key->E,
+		                                                params->l,
+		                                                params->e);
+		if (mpz_cmp_si(x, -1) == 0)
+			result = 1;
+	}
+
+	mpz_clear(x);
+	return result;
+}
+
+int oqs_sidh_iqc_ref_public_key_check_curve(const elliptic_curve_t E) {
+	point_t temp;
+	mpz_t exponent;
+
+	oqs_sidh_iqc_ref_point_init(temp);
+	mpz_init_set(exponent, characteristic);
+	mpz_add_ui(exponent, exponent, 1);
+
+	oqs_sidh_iqc_ref_elliptic_curve_random_point(temp, E);
+	oqs_sidh_iqc_ref_point_mul_scaler(temp, temp, exponent, E);
+	int result = oqs_sidh_iqc_ref_point_is_zero(temp);
+
+	oqs_sidh_iqc_ref_point_clear(temp);
+	mpz_clear(exponent);
+
+	return result;
+}
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_validation.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_validation.h
new file mode 100644
index 0000000000000000000000000000000000000000..40fd369b4563d36a21119d2e8d349be89edb343d
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_key_validation.h
@@ -0,0 +1,55 @@
+#ifndef PUBLIC_KEY_VALIDATION_H
+#define PUBLIC_KEY_VALIDATION_H
+
+#include "sidh_elliptic_curve.h"
+#include "sidh_public_key.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Check if a given public-key is valid.
+ * @param public_key
+ * @param params the other party's public parameters from which
+ * the public-key is generated.
+ * @return 1 if the public-key is valid, 0 otherwise
+ */
+int oqs_sidh_iqc_ref_public_key_is_valid(const public_key_t public_key,
+                                         const public_params_t params);
+
+/**
+ * Checks if {@code P} has the exact order l^e where l, e are given in
+ * {@code params}.
+ * @param P
+ * @param E
+ * @param params
+ * @return 1 if {@code P} has order l^e, 0 otherwise
+ */
+int oqs_sidh_iqc_ref_public_key_check_order(const point_t P,
+                                            const elliptic_curve_t E,
+                                            const public_params_t params);
+
+/**
+ * Checks if the two point in {@code public-key} are linearly independent.
+ * @param public_key
+ * @param params
+ * @return 1 if the points are linearly independent, 0 otherwise
+ */
+int oqs_sidh_iqc_ref_public_key_check_dependency(const public_key_t public_key,
+                                                 const public_params_t params);
+
+/**
+ * Checks if a given is valid supersingular curve. A curve is considered
+ * valid if it has order (p + 1)^2 where p is the characteristic. The test
+ * is done probabilistically.
+ * @param E
+ * @return 1 if the curve is valid, 0 otherwise.
+ */
+int oqs_sidh_iqc_ref_public_key_check_curve(const elliptic_curve_t E);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PUBLIC_KEY_VALIDATION_H */
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_param.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_param.c
new file mode 100644
index 0000000000000000000000000000000000000000..260a63b80ea13cd32ae44b212a14b6b1fafbb3d8
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_param.c
@@ -0,0 +1,75 @@
+#include <stdio.h>
+#include "sidh_public_param.h"
+
+void oqs_sidh_iqc_ref_public_params_init(public_params_t params) {
+	mpz_init(params->characteristic);
+	oqs_sidh_iqc_ref_elliptic_curve_init(params->E);
+	oqs_sidh_iqc_ref_point_init(params->P);
+	oqs_sidh_iqc_ref_point_init(params->Q);
+	mpz_init(params->le);
+}
+
+int oqs_sidh_iqc_ref_public_params_read(public_params_t paramsA,
+                                        public_params_t paramsB,
+                                        const char **input) {
+	fp2_element_t a;
+	fp2_element_t b;
+	oqs_sidh_iqc_ref_fp2_init(a);
+	oqs_sidh_iqc_ref_fp2_init(b);
+
+	gmp_sscanf(input[0], "p : %Zd \n", paramsA->characteristic);
+	mpz_set(paramsB->characteristic, paramsA->characteristic);
+	gmp_sscanf(input[1],
+	           "E : y^2 = x^3 + (%Zd * i + %Zd) * x + (%Zd * i + %Zd) \n",
+	           a->a, a->b, b->a, b->b);
+	oqs_sidh_iqc_ref_elliptic_curve_set_coeffs(paramsA->E, a, b);
+	oqs_sidh_iqc_ref_elliptic_curve_set(paramsB->E, paramsA->E);
+	gmp_sscanf(input[2], "lA: %ld \n", &paramsA->l);
+	gmp_sscanf(input[3], "eA: %ld \n", &paramsA->e);
+	mpz_ui_pow_ui(paramsA->le, paramsA->l, paramsA->e);
+	gmp_sscanf(input[4],
+	           "PA: (%Zd * i + %Zd, %Zd * i + %Zd) \n",
+	           a->a, a->b, b->a, b->b);
+	oqs_sidh_iqc_ref_point_set_coordinates(paramsA->P, a, b, 1);
+	gmp_sscanf(input[5],
+	           "QA: (%Zd * i + %Zd, %Zd * i + %Zd) \n",
+	           a->a, a->b, b->a, b->b);
+	oqs_sidh_iqc_ref_point_set_coordinates(paramsA->Q, a, b, 1);
+	gmp_sscanf(input[6], "lB: %ld \n", &paramsB->l);
+	gmp_sscanf(input[7], "eB: %ld \n", &paramsB->e);
+	mpz_ui_pow_ui(paramsB->le, paramsB->l, paramsB->e);
+	gmp_sscanf(input[8],
+	           "PB: (%Zd * i + %Zd, %Zd * i + %Zd) \n",
+	           a->a, a->b, b->a, b->b);
+	oqs_sidh_iqc_ref_point_set_coordinates(paramsB->P, a, b, 1);
+	gmp_sscanf(input[9],
+	           "QB: (%Zd * i + %Zd, %Zd * i + %Zd) \n",
+	           a->a, a->b, b->a, b->b);
+	oqs_sidh_iqc_ref_point_set_coordinates(paramsB->Q, a, b, 1);
+
+	oqs_sidh_iqc_ref_fp2_clear(a);
+	oqs_sidh_iqc_ref_fp2_clear(b);
+
+	return 1;
+}
+
+void oqs_sidh_iqc_ref_public_params_print(const public_params_t params,
+                                          int print_torsion) {
+	if (print_torsion != 1) {
+		printf("p : %s\n", mpz_get_str(NULL, 10, params->characteristic));
+		printf("E : %s\n", oqs_sidh_iqc_ref_elliptic_curve_get_str(params->E));
+	}
+
+	printf("lA: %ld\n", params->l);
+	printf("eA: %ld\n", params->e);
+	printf("PA: %s\n", oqs_sidh_iqc_ref_point_get_str(params->P));
+	printf("QA: %s\n", oqs_sidh_iqc_ref_point_get_str(params->Q));
+}
+
+void oqs_sidh_iqc_ref_public_params_clear(public_params_t params) {
+	mpz_clear(params->characteristic);
+	oqs_sidh_iqc_ref_elliptic_curve_clear(params->E);
+	oqs_sidh_iqc_ref_point_clear(params->P);
+	oqs_sidh_iqc_ref_point_clear(params->Q);
+	mpz_clear(params->le);
+}
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_param.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_param.h
new file mode 100644
index 0000000000000000000000000000000000000000..7d594452706db802b254b6fee7d8f9c38a37c669
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_public_param.h
@@ -0,0 +1,68 @@
+#ifndef PUBLIC_PARAM_H
+#define PUBLIC_PARAM_H
+
+#include "sidh_elliptic_curve.h"
+#include "sidh_quadratic_ext.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Representation of the public parameters in oqs_sidh_iqc_ref
+ */
+typedef struct {
+	// the characteristic
+	mpz_t characteristic;
+
+	elliptic_curve_t E;
+	unsigned long l;
+	unsigned long e;
+
+	// a generator for the l^e torsion subgroup of E
+	point_t P;
+	point_t Q;
+
+	// l^e, precomputed
+	mpz_t le;
+
+} public_params_struct;
+
+typedef public_params_struct public_params_t[1];
+
+/**
+ * Initializes the public parameters.
+ * @param params
+ */
+void oqs_sidh_iqc_ref_public_params_init(public_params_t params);
+
+/**
+ * Reads the public parameters from array pointed by {@code input}.
+ * @param paramsA
+ * @param paramsB
+ * @param input
+ * @return 1 if the parameters are read successfully, and 0 otherwise.
+ */
+int oqs_sidh_iqc_ref_public_params_read(public_params_t paramsA,
+                                        public_params_t paramsB,
+                                        const char **input);
+
+/**
+ * Prints the public parameters to the standard output.
+ * @param params
+ * @param torsion if it is 1 only the torsion parameters are printed
+ */
+void oqs_sidh_iqc_ref_public_params_print(const public_params_t params,
+                                          int print_torsion);
+
+/**
+ * Frees the memory allocated to {@code params}.
+ * @param params
+ */
+void oqs_sidh_iqc_ref_public_params_clear(public_params_t params);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PUBLIC_PARAM_H */
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_quadratic_ext.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_quadratic_ext.c
new file mode 100644
index 0000000000000000000000000000000000000000..990f5466c9c27a7b6f30fad960ba7177c15987eb
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_quadratic_ext.c
@@ -0,0 +1,426 @@
+#include "sidh_quadratic_ext.h"
+#include "sidh_util.h"
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+void oqs_sidh_iqc_ref_fp_init_chararacteristic_ui(long p) {
+	mpz_init_set_ui(characteristic, p);
+}
+
+void oqs_sidh_iqc_ref_fp_init_chararacteristic_str(const char *value) {
+	mpz_init_set_str(characteristic, value, 10);
+}
+
+void oqs_sidh_iqc_ref_fp_init_chararacteristic(const mpz_t p) {
+	mpz_init_set(characteristic, p);
+}
+
+void oqs_sidh_iqc_ref_fp_set(mpz_t x, const mpz_t a) {
+	mpz_mod(x, a, characteristic);
+}
+
+void oqs_sidh_iqc_ref_fp_add(mpz_t x, const mpz_t a, const mpz_t b) {
+	mpz_add(x, a, b);
+	mpz_mod(x, x, characteristic);
+}
+
+void oqs_sidh_iqc_ref_fp_add_ui(mpz_t x, const mpz_t a, unsigned long b) {
+	mpz_add_ui(x, a, b);
+	mpz_mod(x, x, characteristic);
+}
+
+void oqs_sidh_iqc_ref_fp_sub(mpz_t x, const mpz_t a, const mpz_t b) {
+	mpz_sub(x, a, b);
+	mpz_mod(x, x, characteristic);
+}
+
+void oqs_sidh_iqc_ref_fp_sub_ui(mpz_t x, const mpz_t a, unsigned long b) {
+	mpz_sub_ui(x, a, b);
+	mpz_mod(x, x, characteristic);
+}
+
+void oqs_sidh_iqc_ref_fp_mul(mpz_t x, const mpz_t a, const mpz_t b) {
+	mpz_mul(x, a, b);
+	mpz_mod(x, x, characteristic);
+}
+
+void oqs_sidh_iqc_ref_fp_mul_si(mpz_t x, const mpz_t a, long b) {
+	mpz_mul_si(x, a, b);
+	mpz_mod(x, x, characteristic);
+}
+
+void oqs_sidh_iqc_ref_fp_inv(mpz_t x, const mpz_t a) {
+	mpz_invert(x, a, characteristic);
+}
+
+void oqs_sidh_iqc_ref_fp_div(mpz_t x, const mpz_t a, const mpz_t b) {
+	oqs_sidh_iqc_ref_fp_inv(x, b);
+	oqs_sidh_iqc_ref_fp_mul(x, a, x);
+}
+
+void oqs_sidh_iqc_ref_fp_neg(mpz_t x, const mpz_t a) {
+	oqs_sidh_iqc_ref_fp_sub(x, characteristic, a);
+}
+
+void oqs_sidh_iqc_ref_fp_sqrt(mpz_t x, const mpz_t a) {
+	mpz_t exponent;
+	mpz_init(exponent);
+
+	// compute (p + 1) / 4
+	mpz_add_ui(exponent, characteristic, 1);
+	mpz_divexact_ui(exponent, exponent, 4);
+
+	mpz_powm(x, a, exponent, characteristic);
+	mpz_clear(exponent);
+}
+
+//////////////// fp2 methods //////////////////////////
+
+void oqs_sidh_iqc_ref_fp2_init(fp2_element_t x) { mpz_inits(x->a, x->b, NULL); }
+
+void oqs_sidh_iqc_ref_fp2_init_set_si(fp2_element_t x, long a, long b) {
+	mpz_init_set_si(x->a, a);
+	mpz_init_set_si(x->b, b);
+}
+
+void oqs_sidh_iqc_ref_fp2_init_set_str(fp2_element_t x, const char *a,
+                                       const char *b) {
+	mpz_init_set_str(x->a, a, 10);
+	mpz_init_set_str(x->b, b, 10);
+}
+
+void oqs_sidh_iqc_ref_fp2_init_set(fp2_element_t x, const fp2_element_t a) {
+	mpz_init_set(x->a, a->a);
+	mpz_init_set(x->b, a->b);
+}
+
+void oqs_sidh_iqc_ref_fp2_clear(fp2_element_t x) {
+	mpz_clears(x->a, x->b, NULL);
+}
+
+void oqs_sidh_iqc_ref_fp2_set(fp2_element_t x, const fp2_element_t b) {
+	mpz_set(x->a, b->a);
+	mpz_set(x->b, b->b);
+}
+
+void oqs_sidh_iqc_ref_fp2_zero(fp2_element_t x) {
+	mpz_set_si(x->a, 0);
+	mpz_set_si(x->b, 0);
+}
+
+void oqs_sidh_iqc_ref_fp2_one(fp2_element_t x) {
+	mpz_set_si(x->a, 0);
+	mpz_set_si(x->b, 1);
+}
+
+char *oqs_sidh_iqc_ref_fp2_get_str(const fp2_element_t a) {
+
+	if (mpz_cmp_si(a->a, 0) == 0 && mpz_cmp_si(a->b, 0) == 0) {
+		return "0";
+	}
+
+	if (mpz_cmp_si(a->a, 0) == 0) {
+		return mpz_get_str(NULL, 10, a->b);
+	}
+
+	char *result = "";
+
+	if (mpz_cmp_si(a->b, 0) == 0) {
+		result = oqs_sidh_iqc_ref_concat(result, mpz_get_str(NULL, 10, a->a));
+		result = oqs_sidh_iqc_ref_concat(result, " * i");
+		return result;
+	}
+
+	result = oqs_sidh_iqc_ref_concat(result, mpz_get_str(NULL, 10, a->a));
+	result = oqs_sidh_iqc_ref_concat(result, " * i + ");
+	result = oqs_sidh_iqc_ref_concat(result, mpz_get_str(NULL, 10, a->b));
+
+	return result;
+}
+
+void oqs_sidh_iqc_ref_fp2_add(fp2_element_t x, const fp2_element_t a,
+                              const fp2_element_t b) {
+	oqs_sidh_iqc_ref_fp_add(x->a, a->a, b->a);
+	oqs_sidh_iqc_ref_fp_add(x->b, a->b, b->b);
+}
+
+void oqs_sidh_iqc_ref_fp2_add_ui(fp2_element_t x, const fp2_element_t a,
+                                 unsigned long b) {
+	oqs_sidh_iqc_ref_fp_add_ui(x->b, a->b, b);
+	oqs_sidh_iqc_ref_fp_set(x->a, a->a);
+}
+
+void oqs_sidh_iqc_ref_fp2_sub(fp2_element_t x, const fp2_element_t a,
+                              const fp2_element_t b) {
+	oqs_sidh_iqc_ref_fp_sub(x->a, a->a, b->a);
+	oqs_sidh_iqc_ref_fp_sub(x->b, a->b, b->b);
+}
+
+void oqs_sidh_iqc_ref_fp2_sub_ui(fp2_element_t x, const fp2_element_t a,
+                                 unsigned long b) {
+	oqs_sidh_iqc_ref_fp_sub_ui(x->b, a->b, b);
+	oqs_sidh_iqc_ref_fp_set(x->a, a->a);
+}
+
+void oqs_sidh_iqc_ref_fp2_mul(fp2_element_t x, const fp2_element_t a,
+                              const fp2_element_t b) {
+	mpz_t temp1;
+	mpz_t temp2;
+
+	mpz_init(temp1);
+	mpz_init(temp2);
+
+	fp2_element_t result;
+	oqs_sidh_iqc_ref_fp2_init(result);
+
+	// (a + b) * (c + d)
+	oqs_sidh_iqc_ref_fp_add(temp1, a->a, a->b);
+	oqs_sidh_iqc_ref_fp_add(temp2, b->a, b->b);
+	oqs_sidh_iqc_ref_fp_mul(result->a, temp1, temp2);
+
+	// a * c
+	oqs_sidh_iqc_ref_fp_mul(temp1, a->a, b->a);
+	// b * d
+	oqs_sidh_iqc_ref_fp_mul(temp2, a->b, b->b);
+
+	oqs_sidh_iqc_ref_fp_sub(result->a, result->a, temp1);
+	oqs_sidh_iqc_ref_fp_sub(result->a, result->a, temp2);
+	oqs_sidh_iqc_ref_fp_sub(result->b, temp2, temp1);
+	oqs_sidh_iqc_ref_fp2_set(x, result);
+
+	mpz_clear(temp1);
+	mpz_clear(temp2);
+	oqs_sidh_iqc_ref_fp2_clear(result);
+}
+
+void oqs_sidh_iqc_ref_fp2_square(fp2_element_t x, const fp2_element_t a) {
+	mpz_t temp1;
+	mpz_t temp2;
+
+	mpz_init(temp1);
+	mpz_init(temp2);
+
+	fp2_element_t result;
+	oqs_sidh_iqc_ref_fp2_init(result);
+
+	// (b + a) * (b - a)
+	oqs_sidh_iqc_ref_fp_add(temp1, a->a, a->b);
+	oqs_sidh_iqc_ref_fp_sub(temp2, a->b, a->a);
+	oqs_sidh_iqc_ref_fp_mul(result->b, temp1, temp2);
+
+	// 2 * a * b
+	oqs_sidh_iqc_ref_fp_mul(result->a, a->a, a->b);
+	oqs_sidh_iqc_ref_fp_mul_si(result->a, result->a, 2);
+
+	oqs_sidh_iqc_ref_fp2_set(x, result);
+
+	mpz_clear(temp1);
+	mpz_clear(temp2);
+	oqs_sidh_iqc_ref_fp2_clear(result);
+}
+
+void oqs_sidh_iqc_ref_fp2_pow_ui(fp2_element_t x, const fp2_element_t a,
+                                 unsigned long n) {
+	mpz_t temp_n;
+	mpz_init_set_ui(temp_n, n);
+	oqs_sidh_iqc_ref_fp2_pow(x, a, temp_n);
+	mpz_clear(temp_n);
+}
+
+void oqs_sidh_iqc_ref_fp2_pow(fp2_element_t x, const fp2_element_t a,
+                              const mpz_t n) {
+	if (mpz_cmp_ui(n, 0) == 0) {
+		oqs_sidh_iqc_ref_fp2_one(x);
+		return;
+	}
+
+	fp2_element_t temp1;
+	fp2_element_t temp2;
+	oqs_sidh_iqc_ref_fp2_init_set_si(temp1, 0, 1);
+	oqs_sidh_iqc_ref_fp2_init_set(temp2, a);
+
+	long num_bits = mpz_sizeinbase(n, 2);
+	for (long i = 0; i < num_bits; i++) {
+		if (mpz_tstbit(n, i) == 1)
+			oqs_sidh_iqc_ref_fp2_mul(temp1, temp1, temp2);
+		oqs_sidh_iqc_ref_fp2_square(temp2, temp2);
+	}
+
+	oqs_sidh_iqc_ref_fp2_set(x, temp1);
+
+	oqs_sidh_iqc_ref_fp2_clear(temp1);
+	oqs_sidh_iqc_ref_fp2_clear(temp2);
+}
+
+void oqs_sidh_iqc_ref_fp2_conjugate(fp2_element_t x, const fp2_element_t a) {
+	oqs_sidh_iqc_ref_fp2_set(x, a);
+	oqs_sidh_iqc_ref_fp_neg(x->a, x->a);
+}
+
+void oqs_sidh_iqc_ref_fp2_negate(fp2_element_t x, const fp2_element_t a) {
+	oqs_sidh_iqc_ref_fp2_set(x, a);
+	oqs_sidh_iqc_ref_fp_neg(x->a, x->a);
+	oqs_sidh_iqc_ref_fp_neg(x->b, x->b);
+}
+
+void oqs_sidh_iqc_ref_fp2_mul_scaler(fp2_element_t x, const fp2_element_t a,
+                                     const mpz_t scaler) {
+	oqs_sidh_iqc_ref_fp_mul(x->a, a->a, scaler);
+	oqs_sidh_iqc_ref_fp_mul(x->b, a->b, scaler);
+}
+
+void oqs_sidh_iqc_ref_fp2_mul_scaler_si(fp2_element_t x, const fp2_element_t a,
+                                        long scaler) {
+	oqs_sidh_iqc_ref_fp_mul_si(x->a, a->a, scaler);
+	oqs_sidh_iqc_ref_fp_mul_si(x->b, a->b, scaler);
+}
+
+void oqs_sidh_iqc_ref_fp2_inv(fp2_element_t x, const fp2_element_t a) {
+	mpz_t temp;
+	fp2_element_t result;
+
+	mpz_init(temp);
+	oqs_sidh_iqc_ref_fp2_init(result);
+
+	oqs_sidh_iqc_ref_fp2_conjugate(result, a);
+	oqs_sidh_iqc_ref_fp2_norm(temp, a);
+	oqs_sidh_iqc_ref_fp_inv(temp, temp);
+	oqs_sidh_iqc_ref_fp2_mul_scaler(result, result, temp);
+	oqs_sidh_iqc_ref_fp2_set(x, result);
+
+	mpz_clear(temp);
+	oqs_sidh_iqc_ref_fp2_clear(result);
+}
+
+void oqs_sidh_iqc_ref_fp2_div(fp2_element_t x, const fp2_element_t a,
+                              const fp2_element_t b) {
+	fp2_element_t result;
+	oqs_sidh_iqc_ref_fp2_init(result);
+
+	oqs_sidh_iqc_ref_fp2_inv(result, b);
+	oqs_sidh_iqc_ref_fp2_mul(result, a, result);
+	oqs_sidh_iqc_ref_fp2_set(x, result);
+
+	oqs_sidh_iqc_ref_fp2_clear(result);
+}
+
+int oqs_sidh_iqc_ref_fp2_is_zero(const fp2_element_t a) {
+	return !mpz_cmp_si(a->a, 0) && !mpz_cmp_si(a->b, 0);
+}
+
+int oqs_sidh_iqc_ref_fp2_is_one(const fp2_element_t a) {
+	return !mpz_cmp_si(a->a, 0) && !mpz_cmp_si(a->b, 1);
+}
+
+int oqs_sidh_iqc_ref_fp2_equals(const fp2_element_t a, const fp2_element_t b) {
+	return (mpz_cmp(a->a, b->a) == 0) && (mpz_cmp(a->b, b->b) == 0);
+}
+
+void oqs_sidh_iqc_ref_fp2_random(fp2_element_t x, gmp_randstate_t randstate) {
+	mpz_urandomm(x->a, randstate, characteristic);
+	mpz_urandomm(x->b, randstate, characteristic);
+}
+
+void oqs_sidh_iqc_ref_fp2_sqrt(fp2_element_t x, const fp2_element_t a) {
+	mpz_t exponent;
+	fp2_element_t temp_a;
+	fp2_element_t b;
+	fp2_element_t c;
+	fp2_element_t beta;
+	mpz_t base_root;
+	gmp_randstate_t randstate;
+
+	mpz_init(exponent);
+	oqs_sidh_iqc_ref_fp2_init(temp_a);
+	oqs_sidh_iqc_ref_fp2_init(b);
+	oqs_sidh_iqc_ref_fp2_init(c);
+	oqs_sidh_iqc_ref_fp2_init(beta);
+	mpz_init(base_root);
+	gmp_randinit_default(randstate);
+
+	// compute (p - 1) / 2
+	mpz_sub_ui(exponent, characteristic, 1);
+	mpz_divexact_ui(exponent, exponent, 2);
+
+	while (oqs_sidh_iqc_ref_fp2_is_zero(b)) {
+		oqs_sidh_iqc_ref_fp2_random(c, randstate);
+		oqs_sidh_iqc_ref_fp2_square(temp_a, c);
+		oqs_sidh_iqc_ref_fp2_mul(temp_a, temp_a, a);
+
+		// compute 1 + temp_a^((p - 1) / 2)
+		oqs_sidh_iqc_ref_fp2_pow(b, temp_a, exponent);
+		oqs_sidh_iqc_ref_fp2_add_ui(b, b, 1);
+	}
+
+	// compute temp_a * b^2
+	oqs_sidh_iqc_ref_fp2_square(beta, b);
+	oqs_sidh_iqc_ref_fp2_mul(beta, beta, temp_a);
+
+	// beta is now in the prime field
+	oqs_sidh_iqc_ref_fp_sqrt(base_root, beta->b);
+	oqs_sidh_iqc_ref_fp2_inv(b, b);
+	oqs_sidh_iqc_ref_fp2_mul_scaler(b, b, base_root);
+	oqs_sidh_iqc_ref_fp2_div(x, b, c);
+
+	mpz_clear(exponent);
+	oqs_sidh_iqc_ref_fp2_clear(temp_a);
+	oqs_sidh_iqc_ref_fp2_clear(b);
+	oqs_sidh_iqc_ref_fp2_clear(c);
+	oqs_sidh_iqc_ref_fp2_clear(beta);
+	mpz_clear(base_root);
+	gmp_randclear(randstate);
+}
+
+int oqs_sidh_iqc_ref_fp2_is_square(const fp2_element_t a) {
+	mpz_t exponent;
+	mpz_t norm;
+	fp2_element_t temp;
+
+	mpz_init(exponent);
+	mpz_init(norm);
+	oqs_sidh_iqc_ref_fp2_init(temp);
+
+	// a^((p - 1) / 2)
+	mpz_sub_ui(exponent, characteristic, 1);
+	mpz_divexact_ui(exponent, exponent, 2);
+	oqs_sidh_iqc_ref_fp2_pow(temp, a, exponent);
+
+	oqs_sidh_iqc_ref_fp2_norm(norm, temp);
+	int result = (mpz_cmp_si(norm, 1) == 0);
+
+	mpz_clear(exponent);
+	mpz_clear(norm);
+	oqs_sidh_iqc_ref_fp2_clear(temp);
+
+	return result;
+}
+
+void oqs_sidh_iqc_ref_fp2_norm(mpz_t x, const fp2_element_t a) {
+	mpz_t temp1;
+	mpz_t temp2;
+	mpz_inits(temp1, temp2, NULL);
+
+	oqs_sidh_iqc_ref_fp_mul(temp1, a->a, a->a);
+	oqs_sidh_iqc_ref_fp_mul(temp2, a->b, a->b);
+	oqs_sidh_iqc_ref_fp_add(temp1, temp1, temp2);
+
+	mpz_set(x, temp1);
+	mpz_clears(temp1, temp2, NULL);
+}
+
+void oqs_sidh_iqc_ref_fp2_to_bytes(uint8_t *bytes, const fp2_element_t a,
+                                   long prime_size) {
+	for (long i = 0; i < 2 * prime_size; i++)
+		bytes[i] = 0;
+
+	mpz_export(bytes, NULL, -1, 1, 0, 0, a->a);
+	mpz_export(bytes + prime_size, NULL, -1, 1, 0, 0, a->b);
+}
+
+void oqs_sidh_iqc_ref_bytes_to_fp2(fp2_element_t a, const uint8_t *bytes,
+                                   long prime_size) {
+	oqs_sidh_iqc_ref_fp2_zero(a);
+	mpz_import(a->a, prime_size, -1, 1, 0, 0, bytes);
+	mpz_import(a->b, prime_size, -1, 1, 0, 0, bytes + prime_size);
+}
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_quadratic_ext.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_quadratic_ext.h
new file mode 100644
index 0000000000000000000000000000000000000000..3d9b4f7623ea61ca7c89777784ebd76cb863a14a
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_quadratic_ext.h
@@ -0,0 +1,428 @@
+#ifndef FP2_H
+#define FP2_H
+
+#include <gmp.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+mpz_t characteristic;
+
+/**
+ * Representation of elements of the quadratic extension F_(p^2)
+ * of F_p. We assume F_(p^2) is represented by the quotient
+ * F_p[X] / (X^2 + 1) which requires X^2 + 1 to be irreducible over F_p.
+ * The elements are therefore of the form a * i + b where i^2 = -1.
+ */
+typedef struct {
+	mpz_t a;
+	mpz_t b;
+} fp2_element_struct;
+
+typedef fp2_element_struct fp2_element_t[1];
+
+//////////////// fp methods //////////////////////////
+
+/**
+ * {@link oqs_sidh_iqc_ref_init_chararacteristic}
+ * @param p
+ */
+void oqs_sidh_iqc_ref_fp_init_chararacteristic_ui(long p);
+
+/**
+ * {@link oqs_sidh_iqc_ref_init_chararacteristic}
+ * @param value
+ */
+void oqs_sidh_iqc_ref_fp_init_chararacteristic_str(const char *value);
+
+/**
+ * Initializes the characteristic to {@code p}.
+ * @param p
+ */
+void oqs_sidh_iqc_ref_fp_init_chararacteristic(const mpz_t p);
+
+/**
+ * Sets {@code x = a}.
+ * @param x
+ * @param a
+ */
+void oqs_sidh_iqc_ref_fp_set(mpz_t x, const mpz_t a);
+
+/**
+ * Sets {@code x = a + b}.
+ * @param x
+ * @param a
+ * @param b
+ */
+void oqs_sidh_iqc_ref_fp_add(mpz_t x,
+                             const mpz_t a,
+                             const mpz_t b);
+
+/**
+ * {@link oqs_sidh_iqc_ref_fp_add}.
+ * @param x
+ * @param a
+ * @param b
+ */
+void oqs_sidh_iqc_ref_fp_add_ui(mpz_t x,
+                                const mpz_t a,
+                                unsigned long b);
+
+/**
+ * Sets {@code x = a - b}.
+ * @param x
+ * @param a
+ * @param b
+ */
+void oqs_sidh_iqc_ref_fp_sub(mpz_t x,
+                             const mpz_t a,
+                             const mpz_t b);
+
+/**
+ * {@link oqs_sidh_iqc_ref_fp_sub}
+ * @param x
+ * @param a
+ * @param b
+ */
+void oqs_sidh_iqc_ref_fp_sub_ui(mpz_t x,
+                                const mpz_t a,
+                                unsigned long b);
+
+/**
+ * Sets {@code x = a * b}.
+ * @param x
+ * @param a
+ * @param b
+ */
+void oqs_sidh_iqc_ref_fp_mul(mpz_t x,
+                             const mpz_t a,
+                             const mpz_t b);
+
+/**
+ * {@link oqs_sidh_iqc_ref_fp_mul}
+ * @param x
+ * @param a
+ * @param b
+ */
+void oqs_sidh_iqc_ref_fp_mul_si(mpz_t x,
+                                const mpz_t a,
+                                long b);
+
+/**
+ * Sets {@code x = 1 / a}. This is possible only if {@code a} is
+ * prime to the characteristic.
+ * @param x
+ * @param a
+ */
+void oqs_sidh_iqc_ref_fp_inv(mpz_t x,
+                             const mpz_t a);
+
+/**
+ * Sets {x = a / b}. @see fp_inv.
+ * @param x
+ * @param a
+ * @param b
+ */
+void oqs_sidh_iqc_ref_fp_div(mpz_t x,
+                             const mpz_t a,
+                             const mpz_t b);
+
+/**
+ * Sets {@code x = -a}.
+ * @param x
+ * @param a
+ */
+void oqs_sidh_iqc_ref_fp_neg(mpz_t x,
+                             const mpz_t a);
+
+/**
+ * Computes the square root of {@code a}.
+ * This method works only for p = 3 mod 4.
+ * @param x the square root
+ * @param a
+ */
+void oqs_sidh_iqc_ref_fp_sqrt(mpz_t x,
+                              const mpz_t a);
+
+//////////////// fp2 methods //////////////////////////
+
+/**
+ * Initializes {@code x} to zero.
+ * @param x
+ */
+void oqs_sidh_iqc_ref_fp2_init(fp2_element_t x);
+
+/**
+ * Initializes {@code x} to {@code a * i + b}.
+ * @param x
+ * @param a
+ * @param b
+ */
+void oqs_sidh_iqc_ref_fp2_init_set_si(fp2_element_t x,
+                                      long a,
+                                      long b);
+
+/**
+ * {@link oqs_sidh_iqc_ref_fp2_init_set_si}.
+ * @param x
+ * @param a
+ * @param b
+ */
+void oqs_sidh_iqc_ref_fp2_init_set_str(fp2_element_t x,
+                                       const char *a,
+                                       const char *b);
+
+/**
+ * Initializes {@code x} to {@code a}.
+ * @param x
+ * @param a
+ */
+void oqs_sidh_iqc_ref_fp2_init_set(fp2_element_t x,
+                                   const fp2_element_t a);
+
+/**
+ * Frees the memory allocated to {@code x}.
+ * @param x
+ */
+void oqs_sidh_iqc_ref_fp2_clear(fp2_element_t x);
+
+/**
+ * Copies {@code a} into {@code x}.
+ * @param x
+ * @param b
+ */
+void oqs_sidh_iqc_ref_fp2_set(fp2_element_t x,
+                              const fp2_element_t b);
+
+/**
+ * Sets {@code a = 0}
+ * @param x
+ */
+void oqs_sidh_iqc_ref_fp2_zero(fp2_element_t x);
+
+/**
+ * Sets {@code x = 1}.
+ * @param x
+ */
+void oqs_sidh_iqc_ref_fp2_one(fp2_element_t x);
+
+/**
+ * @param a
+ * @return the string representation of {@code a}
+ */
+char *oqs_sidh_iqc_ref_fp2_get_str(const fp2_element_t a);
+
+/**
+ * Sets {@code x = a + b}.
+ * @param x
+ * @param a
+ * @param b
+ */
+void oqs_sidh_iqc_ref_fp2_add(fp2_element_t x,
+                              const fp2_element_t a,
+                              const fp2_element_t b);
+
+/**
+ * {@link oqs_sidh_iqc_ref_fp2_add}
+ * @param x
+ * @param a
+ * @param b
+ */
+void oqs_sidh_iqc_ref_fp2_add_ui(fp2_element_t x,
+                                 const fp2_element_t a,
+                                 unsigned long b);
+
+/**
+ * Sets {@code x = a - b}.
+ * @param x
+ * @param a
+ * @param b
+ */
+void oqs_sidh_iqc_ref_fp2_sub(fp2_element_t x,
+                              const fp2_element_t a,
+                              const fp2_element_t b);
+
+/**
+ * {@link oqs_sidh_iqc_ref_fp2_sub}
+ * @param x
+ * @param a
+ * @param b
+ */
+void oqs_sidh_iqc_ref_fp2_sub_ui(fp2_element_t x,
+                                 const fp2_element_t a,
+                                 unsigned long b);
+
+/**
+ * Sets {@code x = a * b}.
+ * @param x
+ * @param a
+ * @param b
+ */
+void oqs_sidh_iqc_ref_fp2_mul(fp2_element_t x,
+                              const fp2_element_t a,
+                              const fp2_element_t b);
+
+/**
+ * Sets {@code x = a^2}.
+ * @param x
+ * @param a
+ */
+void oqs_sidh_iqc_ref_fp2_square(fp2_element_t x,
+                                 const fp2_element_t a);
+
+/**
+ * {@link oqs_sidh_iqc_ref_fp2_pow}
+ */
+void oqs_sidh_iqc_ref_fp2_pow_ui(fp2_element_t x,
+                                 const fp2_element_t a,
+                                 unsigned long n);
+
+/**
+ * Sets {@code x = a^n}.
+ * @param x
+ * @param a
+ * @param n
+ */
+void oqs_sidh_iqc_ref_fp2_pow(fp2_element_t x,
+                              const fp2_element_t a,
+                              const mpz_t n);
+
+/**
+ * Sets {@code x = 1 / a}.
+ * @param x
+ * @param a
+ */
+void oqs_sidh_iqc_ref_fp2_inv(fp2_element_t x,
+                              const fp2_element_t a);
+
+/**
+ * Sets {@code x = a / b}.
+ * @param x
+ * @param a
+ * @param b
+ */
+void oqs_sidh_iqc_ref_fp2_div(fp2_element_t x,
+                              const fp2_element_t a,
+                              const fp2_element_t b);
+
+/**
+ * Sets {@code x = -u * i + v} where {@code a = u * i + v}.
+ * @param x
+ * @param a
+ */
+void oqs_sidh_iqc_ref_fp2_conjugate(fp2_element_t x,
+                                    const fp2_element_t a);
+
+/**
+ * Sets {@code x = -a}.
+ * @param x
+ * @param a
+ */
+void oqs_sidh_iqc_ref_fp2_negate(fp2_element_t x,
+                                 const fp2_element_t a);
+
+/**
+ * Sets {@code x = a * scaler}.
+ * @param x
+ * @param a
+ * @param scaler
+ */
+void oqs_sidh_iqc_ref_fp2_mul_scaler(fp2_element_t x,
+                                     const fp2_element_t a,
+                                     const mpz_t scaler);
+
+/**
+ * {@link oqs_sidh_iqc_ref_fp2_mul_scaler}
+ * @param x
+ * @param a
+ * @param scaler
+ */
+void oqs_sidh_iqc_ref_fp2_mul_scaler_si(fp2_element_t x,
+                                        const fp2_element_t a,
+                                        long scaler);
+
+/**
+ * Checks if {@code a} is zero.
+ * @param a
+ * @return 1 if {@code a == 0}, and 0 otherwise
+ */
+int oqs_sidh_iqc_ref_fp2_is_zero(const fp2_element_t a);
+
+/**
+ * Checks if {@code a} is one.
+ * @param a
+ * @return 1 if {@code a == 1}, and 0 otherwise
+ */
+int oqs_sidh_iqc_ref_fp2_is_one(const fp2_element_t a);
+
+/**
+ * Checks if {@code a == b}.
+ * @param a
+ * @param b
+ * @return 1 if {@code a == b}, and 0 otherwise.
+ */
+int oqs_sidh_iqc_ref_fp2_equals(const fp2_element_t a,
+                                const fp2_element_t b);
+
+/**
+ * Generates a random element in the quadratic extension.
+ * @param x the generated random element
+ * @param randstate
+ */
+void oqs_sidh_iqc_ref_fp2_random(fp2_element_t x,
+                                 gmp_randstate_t randstate);
+
+/**
+ * Computes the square root of {@code a}.
+ * The algorithm is based on
+ * Doliskani & Schost, Taking Roots over High Extensions of Finite Fields, 2011.
+ * It works for any characteristic, but since it uses {@link oqs_sidh_iqc_ref_fp_sqrt} for
+ * base-case square root, it is limited to p = 3 mod 4.
+ * @param x the square root
+ * @param a
+ */
+void oqs_sidh_iqc_ref_fp2_sqrt(fp2_element_t x,
+                               const fp2_element_t a);
+
+/**
+ * Checks if {@code a} is a square.
+ * @param a
+ * @return 1 if {@code a} is a square, 0 otherwise
+ */
+int oqs_sidh_iqc_ref_fp2_is_square(const fp2_element_t a);
+
+/**
+ * Computes the norm of {@code x = b * i + c} which is b^2 + c^2.
+ * @param x the computed norm
+ * @param a
+ */
+void oqs_sidh_iqc_ref_fp2_norm(mpz_t x,
+                               const fp2_element_t a);
+
+/**
+ * Converts bytes an fp2 element to a byte array.
+ * @param bytes
+ * @param a
+ * @param prime_size
+ */
+void oqs_sidh_iqc_ref_fp2_to_bytes(uint8_t *bytes,
+                                   const fp2_element_t a,
+                                   long prime_size);
+
+/**
+ * Converts a byte array to an fp2 element.
+ * @param a
+ * @param bytes
+ * @param prime_size
+ */
+void oqs_sidh_iqc_ref_bytes_to_fp2(fp2_element_t a,
+                                   const uint8_t *bytes,
+                                   long prime_size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* FP2_H */
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_shared_key.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_shared_key.c
new file mode 100644
index 0000000000000000000000000000000000000000..1dbc7bcdd969eb9c5ea07a6cbbd0bfc20f9ce057
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_shared_key.c
@@ -0,0 +1,30 @@
+#include "sidh_shared_key.h"
+#include "sidh_isogeny.h"
+
+void oqs_sidh_iqc_ref_shared_key_generate(fp2_element_t shared_key,
+                                          const public_key_t public_key,
+                                          const private_key_t private_key,
+                                          const public_params_t params) {
+
+	point_t kernel_gen;
+	oqs_sidh_iqc_ref_point_init(kernel_gen);
+
+	// compute a generator for the kernel of the isogeny
+	oqs_sidh_iqc_ref_private_key_compute_kernel_gen(kernel_gen,
+	                                                private_key,
+	                                                public_key->P,
+	                                                public_key->Q,
+	                                                params->le,
+	                                                public_key->E);
+	elliptic_curve_t E;
+	oqs_sidh_iqc_ref_elliptic_curve_init(E);
+	oqs_sidh_iqc_ref_elliptic_curve_set(E, public_key->E);
+
+	oqs_sidh_iqc_ref_isogeny_evaluate_strategy_curve(E, kernel_gen, params->l, params->e, 0.5);
+	//    oqs_sidh_iqc_ref_isogeny_evaluate_naive_curve(E, kernel_gen, params->l, params->e, 3);
+
+	oqs_sidh_iqc_ref_elliptic_curve_compute_j_inv(shared_key, E);
+
+	oqs_sidh_iqc_ref_point_clear(kernel_gen);
+	oqs_sidh_iqc_ref_elliptic_curve_clear(E);
+}
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_shared_key.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_shared_key.h
new file mode 100644
index 0000000000000000000000000000000000000000..4b4547816f0a57c73d6db64d8cea6e017285557f
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_shared_key.h
@@ -0,0 +1,27 @@
+#ifndef SHARED_KEY_H
+#define SHARED_KEY_H
+
+#include "sidh_private_key.h"
+#include "sidh_public_key.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Generates the shared-key.
+ * @param shared_key the generated shared-key
+ * @param public_key other's public-key
+ * @param private_key own private-key
+ * @param params own parameters
+ */
+void oqs_sidh_iqc_ref_shared_key_generate(fp2_element_t shared_key,
+                                          const public_key_t public_key,
+                                          const private_key_t private_key,
+                                          const public_params_t params);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SHARED_KEY_H */
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_util.c b/crypt/liboqs/kex_sidh_iqc_ref/sidh_util.c
new file mode 100644
index 0000000000000000000000000000000000000000..8f68a36f7dd791a27c15f31d7e542f5c31a4f45b
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_util.c
@@ -0,0 +1,37 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <gmp.h>
+#include <time.h>
+#include <oqs/rand.h>
+
+#include "sidh_util.h"
+
+char *oqs_sidh_iqc_ref_concat(char *str1, const char *str2) {
+	char *temp = (char *) malloc(strlen(str1) + strlen(str2) + 1);
+	strcpy(temp, str1);
+	strcat(temp, str2);
+	return temp;
+}
+
+char *oqs_sidh_iqc_ref_get_random_str(int num_bytes) {
+	char *rand_value = (char *) malloc(num_bytes);
+	OQS_RAND *rand = OQS_RAND_new(OQS_RAND_alg_urandom_chacha20);
+	OQS_RAND_n(rand, (uint8_t *) rand_value, num_bytes);
+
+	return rand_value;
+}
+
+void oqs_sidh_iqc_ref_get_random_mpz(mpz_t x) {
+	int num_bytes = 20;
+	char *a = oqs_sidh_iqc_ref_get_random_str(num_bytes);
+	mpz_import(x, num_bytes, 1, sizeof(char), 0, 0, a);
+}
+
+char *oqs_sidh_iqc_ref_array_xor(const char *array1, const char *array2,
+                                 long lenght) {
+	char *result = (char *) malloc(lenght);
+	for (long i = 0; i < lenght; i++)
+		result[i] = array1[i] ^ array2[i];
+
+	return result;
+}
diff --git a/crypt/liboqs/kex_sidh_iqc_ref/sidh_util.h b/crypt/liboqs/kex_sidh_iqc_ref/sidh_util.h
new file mode 100644
index 0000000000000000000000000000000000000000..e0373498b5a86024a128b2d304e48de7e47b9fe4
--- /dev/null
+++ b/crypt/liboqs/kex_sidh_iqc_ref/sidh_util.h
@@ -0,0 +1,45 @@
+#ifndef UTIL_H
+#define UTIL_H
+
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Concatenates two strings.
+ * @param str1
+ * @param str2
+ * @return the concatenation of {@code str1, str2}
+ */
+char *oqs_sidh_iqc_ref_concat(char *str1,
+                              const char *str2);
+
+/**
+ * Generates a random char array of length {@code num_bytes}.
+ * @param num_bytes
+ * @return a random char array of length {@code num_bytes}
+ */
+char *oqs_sidh_iqc_ref_get_random_str(int num_bytes);
+
+/**
+ * @param x a randomly generated 160bit integer
+ */
+void oqs_sidh_iqc_ref_get_random_mpz(mpz_t x);
+
+/**
+ * @param array1
+ * @param array2
+ * @param lenght
+ * @return the bitwise xor of the two arrays
+ */
+char *oqs_sidh_iqc_ref_array_xor(const char *array1,
+                                 const char *array2,
+                                 long lenght);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* UTIL_H */
diff --git a/crypt/liboqs/sig/Makefile.am b/crypt/liboqs/sig/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..bc279ca069d4258d89a225ddb9b5e89b04014741
--- /dev/null
+++ b/crypt/liboqs/sig/Makefile.am
@@ -0,0 +1,8 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = libsig.la
+
+libsig_la_SOURCES = sig.c
+
+libsig_la_CPPFLAGS = -I../../include
+libsig_la_CPPFLAGS += $(AM_CPPFLAGS)
+
diff --git a/crypt/liboqs/sig/sig.c b/crypt/liboqs/sig/sig.c
new file mode 100644
index 0000000000000000000000000000000000000000..bc6d49cb1091104ed120f51a93e114c56f6676e8
--- /dev/null
+++ b/crypt/liboqs/sig/sig.c
@@ -0,0 +1,71 @@
+#include <assert.h>
+#include <oqs/common.h>
+#include <oqs/sig.h>
+#ifdef ENABLE_SIG_PICNIC
+#include <oqs/sig_picnic.h>
+#endif
+
+OQS_SIG *OQS_SIG_new(OQS_RAND *rand, enum OQS_SIG_algid algid) {
+	if (rand == NULL) {
+		return NULL;
+	}
+
+	OQS_SIG *s = malloc(sizeof(OQS_SIG));
+	if (s == NULL) {
+		return NULL;
+	}
+	s->rand = rand;
+
+	switch (algid) {
+#ifdef ENABLE_SIG_PICNIC
+	case OQS_SIG_picnic_L1_FS:
+	case OQS_SIG_picnic_L1_UR:
+	case OQS_SIG_picnic_L3_FS:
+	case OQS_SIG_picnic_L3_UR:
+	case OQS_SIG_picnic_L5_FS:
+	case OQS_SIG_picnic_L5_UR:
+	case OQS_SIG_picnic_default:
+		if (OQS_SIG_picnic_get(s, algid) != OQS_SUCCESS) {
+			free(s);
+			return NULL;
+		}
+		break;
+#endif
+	default:
+		free(s);
+		return NULL;
+	}
+
+	return s;
+}
+
+int OQS_SIG_keygen(const OQS_SIG *s, uint8_t *priv, uint8_t *pub) {
+	if (s == NULL) {
+		return OQS_ERROR;
+	} else {
+		return s->keygen(s, priv, pub);
+	}
+}
+
+int OQS_SIG_sign(const OQS_SIG *s, const uint8_t *priv, const uint8_t *msg, const size_t msg_len, uint8_t *sig, size_t *sig_len) {
+	if (s == NULL) {
+		return OQS_ERROR;
+	} else {
+		return s->sign(s, priv, msg, msg_len, sig, sig_len);
+	}
+}
+
+int OQS_SIG_verify(const OQS_SIG *s, const uint8_t *pub, const uint8_t *msg, const size_t msg_len, const uint8_t *sig, const size_t sig_len) {
+	if (s == NULL) {
+		return OQS_ERROR;
+	} else {
+		return s->verify(s, pub, msg, msg_len, sig, sig_len);
+	}
+}
+
+void OQS_SIG_free(OQS_SIG *s) {
+	if (s == NULL) {
+		return;
+	}
+	free(s);
+}
diff --git a/crypt/liboqs/sig/sig.h b/crypt/liboqs/sig/sig.h
new file mode 100644
index 0000000000000000000000000000000000000000..60d9199dc3e9f5cb6f2ca66dd5cb589f1b7e8373
--- /dev/null
+++ b/crypt/liboqs/sig/sig.h
@@ -0,0 +1,166 @@
+/**
+ * \file sig.h
+ * \brief Header defining the API for generic OQS Signature
+ */
+
+#ifndef __OQS_SIG_H
+#define __OQS_SIG_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <oqs/rand.h>
+
+/**
+ * Supported signature algorithms.
+ * Note: the Picnic algs are not wrapped with a ENABLE_SIG_PICNIC
+ *       to avoid forcing calling apps to define the macro. The library
+ *       compiled without the macro fails if these algid are requested.
+ */
+enum OQS_SIG_algid {
+	/* Picnic sig algs */
+	OQS_SIG_picnic_default, // equivalent to OQS_SIG_picnic_L1_FS
+	OQS_SIG_picnic_L1_FS,
+	OQS_SIG_picnic_L1_UR,
+	OQS_SIG_picnic_L3_FS,
+	OQS_SIG_picnic_L3_UR,
+	OQS_SIG_picnic_L5_FS,
+	OQS_SIG_picnic_L5_UR,
+};
+
+/**
+ * OQS signature object
+ */
+typedef struct OQS_SIG OQS_SIG; // so the code below compiles...
+struct OQS_SIG {
+
+	/**
+	 * PRNG
+	 */
+	OQS_RAND *rand;
+
+	/**
+	 * Specifies the name of the signature method
+	 */
+	char *method_name;
+
+	/**
+	 * Classical security in terms of the number of bits provided by the
+	 * signature method.
+	 */
+	uint16_t estimated_classical_security;
+
+	/**
+	 *  Equivalent quantum security in terms of the number of bits provided by the
+	 *  signature method.
+	 */
+	uint16_t estimated_quantum_security;
+
+	/**
+	 *  Private key length.
+	 */
+	uint16_t priv_key_len;
+
+	/**
+	 *  Public key length.
+	 */
+	uint16_t pub_key_len;
+
+	/**
+	 *  Maximum signature length.
+	 */
+	uint32_t max_sig_len;
+
+	/**
+	 * Opaque pointer for passing around any computation context
+	 */
+	void *ctx;
+
+	/**
+	 * Pointer to a function for public and private signature key generation.
+	 *
+	 * @param s                The signature structure.
+	 * @param priv             The signer's private key.
+	 * @param pub              The signer's public key.
+	 * @return                 OQS_SUCCESS on success, or OQS_ERROR on failure.
+	 */
+	int (*keygen)(const OQS_SIG *s, uint8_t *priv, uint8_t *pub);
+
+	/**
+	 * Pointer to a function for signature generation.
+	 *
+	 * @param s                The signature structure.
+	 * @param priv             The signer's private key.
+	 * @param msg              The message to sign.
+	 * @param msg_len          Length of the message to sign.
+	 * @param sig              The generated signature. Must be allocated by the caller, or NULL to learn how much space is needed, as returned in sig_len.
+	 * @param sig_len          In: length of sig, out: length of the generated signature.
+	 * @return                 OQS_SUCCESS on success, or OQS_ERROR on failure.
+	 */
+	int (*sign)(const OQS_SIG *s, const uint8_t *priv, const uint8_t *msg, const size_t msg_len, uint8_t *sig, size_t *sig_len);
+
+	/**
+	 * Pointer to a function for signature verification.
+	 *
+	 * @param s                The signature structure.
+	 * @param pub              The signer's public key.
+	 * @param msg              The signed message.
+	 * @param msg_len          Length of the signed message.
+	 * @param sig              The signature to verify.
+	 * @param sig_len          Length of the signature to verify.
+	 @return                 OQS_SUCCESS on success, or OQS_ERROR on failure.
+	 */
+	int (*verify)(const OQS_SIG *s, const uint8_t *pub, const uint8_t *msg, const size_t msg_len, const uint8_t *sig, const size_t sig_len);
+};
+
+/**
+ * Instantiate a new signature object.
+ *
+ * @param rand               The random number generator.
+ * @param algid              The id of the signature algorithm to be instantiated.
+ * @return                   A new signature object on success, or NULL on failure.
+ */
+OQS_SIG *OQS_SIG_new(OQS_RAND *rand, enum OQS_SIG_algid algid);
+
+/**
+ * Generates a new signature key pair.
+ * @param s                  Pointer to the signature object.
+ * @param priv               Pointer where the generated private key will be stored. Caller 
+ *                           must have allocated s->priv_key_len bytes.
+ * @param pub                Pointer where the generated public key will be stored. Caller 
+ *                           must have allocated s->pub_key_len bytes.
+ * @return                   OQS_SUCCESS on success, or OQS_ERROR on failure
+ */
+int OQS_SIG_keygen(const OQS_SIG *s, uint8_t *priv, uint8_t *pub);
+
+/**
+ * Generates a new signature.
+ * @param s         Pointer to the signature object.
+ * @param priv      Pointer to the signer's private key, of expected length `s->priv_key_len` bytes.
+ * @param msg       Pointer to the message to sign.
+ * @param msg_len   Length of the message to sign `msg`.
+ * @param sig       Pointer where the generated signature will be stored. Caller must have allocated `s->max_sig_len` bytes.
+ * @param sig_len   Pointer to the length of the generated signature. 
+ * @return          OQS_SUCCESS on success, or OQS_ERROR on failure
+ */
+int OQS_SIG_sign(const OQS_SIG *s, const uint8_t *priv, const uint8_t *msg, const size_t msg_len, uint8_t *sig, size_t *sig_len);
+
+/**
+ * Verifies a signature.
+ * @param s         Pointer to the signature object.
+ * @param pub       Pointer to the signer's public key, of expected length `s->pub_key_len` bytes.
+ * @param msg       Pointer to the signed message.
+ * @param msg_len   Length of the signed message `msg`.
+ * @param sig       Pointer to the signature.
+ * @param sig_len   Length of the signature. 
+ * @return          OQS_SUCCESS on success, or OQS_ERROR on failure
+ */
+int OQS_SIG_verify(const OQS_SIG *s, const uint8_t *pub, const uint8_t *msg, const size_t msg_len, const uint8_t *sig, const size_t sig_len);
+
+/**
+ * Frees the signature object, de-initializing the underlying library code.
+ * Does NOT free the rand object passed to OQS_SIG_new.
+ * @param s          The signature object.
+ */
+void OQS_SIG_free(OQS_SIG *s);
+
+#endif
diff --git a/crypt/liboqs/sig/test_sig.c b/crypt/liboqs/sig/test_sig.c
new file mode 100644
index 0000000000000000000000000000000000000000..3507278866c6d39ffafe4a9183666cfe43dfbfcc
--- /dev/null
+++ b/crypt/liboqs/sig/test_sig.c
@@ -0,0 +1,353 @@
+#if defined(WINDOWS)
+#pragma warning(disable : 4244 4293)
+#endif
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <oqs/common.h>
+#include <oqs/sig.h>
+#include <oqs/rand.h>
+
+#include "../ds_benchmark.h"
+#include "../common/common.h"
+
+// TODO: add signature size to benchmark
+
+struct sig_testcase {
+	enum OQS_SIG_algid algid;
+	char *algid_name;
+	int run;
+	int iter;
+};
+
+/* Add new testcases here */
+#ifdef ENABLE_SIG_PICNIC
+struct sig_testcase sig_testcases[] = {
+    {OQS_SIG_picnic_L1_FS, "picnic_L1_FS", 0, 10},
+    {OQS_SIG_picnic_L1_UR, "picnic_L1_UR", 0, 10},
+    {OQS_SIG_picnic_L3_FS, "picnic_L3_FS", 0, 10},
+    {OQS_SIG_picnic_L3_UR, "picnic_L3_UR", 0, 10},
+    {OQS_SIG_picnic_L5_FS, "picnic_L5_FS", 0, 10},
+    {OQS_SIG_picnic_L5_UR, "picnic_L5_UR", 0, 10},
+};
+#endif
+
+#define SIG_TEST_ITERATIONS 100
+#define SIG_BENCH_SECONDS 1
+
+#define PRINT_HEX_STRING(label, str, len)                        \
+	{                                                            \
+		printf("%-20s (%4zu bytes):  ", (label), (size_t)(len)); \
+		for (size_t i = 0; i < (len); i++) {                     \
+			printf("%02X", ((unsigned char *) (str))[i]);        \
+		}                                                        \
+		printf("\n");                                            \
+	}
+
+#define PRINT_PARTIAL_HEX_STRING(label, str, len, sublen)                \
+	{                                                                    \
+		printf("%-20s (%4zu bytes):  ", (label), (size_t)(len));         \
+		for (size_t i = 0; i < (sublen); i++) {                          \
+			printf("%02X", ((unsigned char *) (str))[i]);                \
+		}                                                                \
+		printf("...");                                                   \
+		for (size_t i = 0; i < (sublen); i++) {                          \
+			printf("%02X", ((unsigned char *) (str))[len - sublen + i]); \
+		}                                                                \
+		printf("\n");                                                    \
+	}
+
+static int sig_test_correctness(OQS_RAND *rand, enum OQS_SIG_algid algid, const int print) {
+
+	int rc;
+
+	uint8_t *priv = NULL;
+	uint8_t *pub = NULL;
+	uint8_t *msg = NULL;
+	size_t msg_len;
+	uint8_t *sig = NULL;
+	size_t sig_len;
+
+	/* setup signature object */
+	OQS_SIG *s = OQS_SIG_new(rand, algid);
+	if (s == NULL) {
+		eprintf("sig new failed\n");
+		goto err;
+	}
+
+	if (print) {
+		printf("================================================================================\n");
+		printf("Sample computation for signature method %s\n", s->method_name);
+		printf("================================================================================\n");
+	}
+
+	/* key generation */
+	priv = malloc(s->priv_key_len);
+	if (priv == NULL) {
+		eprintf("priv malloc failed\n");
+		goto err;
+	}
+	pub = malloc(s->pub_key_len);
+	if (pub == NULL) {
+		eprintf("pub malloc failed\n");
+		goto err;
+	}
+
+	rc = OQS_SIG_keygen(s, priv, pub);
+	if (rc != 1) {
+		eprintf("OQS_SIG_keygen failed\n");
+		goto err;
+	}
+
+	if (print) {
+		PRINT_HEX_STRING("Private key", priv, s->priv_key_len)
+		PRINT_HEX_STRING("Public key", pub, s->pub_key_len)
+	}
+
+	/* Generate message to sign */
+	msg_len = 100; // FIXME TODO: randomize based on scheme's max length
+	msg = malloc(msg_len);
+	if (msg == NULL) {
+		eprintf("msg malloc failed\n");
+		goto err;
+	}
+	OQS_RAND_n(rand, msg, msg_len);
+	if (print) {
+		PRINT_HEX_STRING("Message", msg, msg_len)
+	}
+
+	/* Signature */
+	sig_len = s->max_sig_len;
+	sig = malloc(sig_len);
+	if (sig == NULL) {
+		eprintf("sig malloc failed\n");
+		goto err;
+	}
+
+	rc = OQS_SIG_sign(s, priv, msg, msg_len, sig, &sig_len);
+	if (rc != 1) {
+		eprintf("OQS_SIG_sign failed\n");
+		goto err;
+	}
+
+	if (print) {
+		if (sig_len > 40) {
+			// only print the parts of the sig if too long
+			PRINT_PARTIAL_HEX_STRING("Signature", sig, sig_len, 20);
+		}
+	}
+
+	/* Verification */
+	rc = OQS_SIG_verify(s, pub, msg, msg_len, sig, sig_len);
+	if (rc != 1) {
+		eprintf("ERROR: OQS_SIG_verify failed\n");
+		goto err;
+	}
+
+	if (print) {
+		printf("Signature is valid.\n");
+		printf("\n\n");
+	}
+
+	rc = 1;
+	goto cleanup;
+
+err:
+	rc = 0;
+
+cleanup:
+	if (msg != NULL) {
+		free(msg);
+	}
+	if (sig != NULL) {
+		free(sig);
+	}
+	if (pub != NULL) {
+		free(pub);
+	}
+	if (priv != NULL) {
+		free(priv);
+	}
+	if (s != NULL) {
+		OQS_SIG_free(s);
+	}
+
+	return rc;
+}
+
+static int sig_test_correctness_wrapper(OQS_RAND *rand, enum OQS_SIG_algid algid, int iterations, bool quiet) {
+	int ret;
+	ret = sig_test_correctness(rand, algid, !quiet);
+	if (ret != 1) {
+		goto err;
+	}
+
+	printf("Testing correctness and randomness of signature for %d iterations\n", iterations);
+	for (int i = 0; i < iterations; i++) {
+		ret = sig_test_correctness(rand, algid, 0);
+		if (ret != 1) {
+			goto err;
+		}
+	}
+	printf("All signatures were valid.\n");
+	printf("\n\n");
+	return 1;
+err:
+	return ret;
+}
+
+static int sig_bench_wrapper(OQS_RAND *rand, enum OQS_SIG_algid algid, const int seconds) {
+	int rc;
+
+	uint8_t *priv = NULL;
+	uint8_t *pub = NULL;
+	uint8_t *msg = NULL;
+	size_t msg_len;
+	uint8_t *sig = NULL;
+	size_t sig_len;
+
+	/* setup signature object */
+	OQS_SIG *s = OQS_SIG_new(rand, algid);
+	if (s == NULL) {
+		eprintf("sig new failed\n");
+		goto err;
+	}
+
+	/* key generation */
+	priv = malloc(s->priv_key_len);
+	if (priv == NULL) {
+		eprintf("priv malloc failed\n");
+		goto err;
+	}
+	pub = malloc(s->pub_key_len);
+	if (pub == NULL) {
+		eprintf("pub malloc failed\n");
+		goto err;
+	}
+
+	printf("%-30s | %10s | %14s | %15s | %10s | %16s | %10s\n", s->method_name, "", "", "", "", "", "");
+
+	TIME_OPERATION_SECONDS({ OQS_SIG_keygen(s, priv, pub); }, "keygen", seconds);
+
+	OQS_SIG_keygen(s, priv, pub);
+	/* Generate message to sign */
+	msg_len = 100; // FIXME TODO: randomize based on scheme's max length
+	msg = malloc(msg_len);
+	if (msg == NULL) {
+		eprintf("msg malloc failed\n");
+		goto err;
+	}
+	sig_len = s->max_sig_len;
+	sig = malloc(sig_len);
+	if (sig == NULL) {
+		eprintf("sig malloc failed\n");
+		goto err;
+	}
+
+	TIME_OPERATION_SECONDS({ OQS_SIG_sign(s, priv, msg, msg_len, sig, &sig_len); sig_len = s->max_sig_len; }, "sign", seconds);
+
+	OQS_SIG_sign(s, priv, msg, msg_len, sig, &sig_len);
+	TIME_OPERATION_SECONDS({ OQS_SIG_verify(s, pub, msg, msg_len, sig, sig_len); }, "verify", seconds);
+
+	rc = 1;
+	goto cleanup;
+
+err:
+	rc = 0;
+
+cleanup:
+	free(priv);
+	free(pub);
+	free(msg);
+	free(sig);
+	OQS_SIG_free(s);
+
+	return rc;
+}
+
+#ifdef ENABLE_SIG_PICNIC
+int main(int argc, char **argv) {
+	int success = 1;
+	bool run_all = true;
+	bool quiet = false;
+	bool bench = false;
+	OQS_RAND *rand = NULL;
+	size_t sig_testcases_len = sizeof(sig_testcases) / sizeof(struct sig_testcase);
+	for (int i = 1; i < argc; i++) {
+		if (argv[i][0] == '-') {
+			if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "-help") == 0) || (strcmp(argv[i], "--help") == 0)) {
+				printf("Usage: ./test_sig [options] [schemes]\n");
+				printf("\nOptions:\n");
+				printf("  --quiet, -q\n");
+				printf("    Less verbose output\n");
+				printf("  --bench, -b\n");
+				printf("    Run benchmarks\n");
+				printf("\nschemes:\n");
+				for (size_t i = 0; i < sig_testcases_len; i++) {
+					printf("  %s\n", sig_testcases[i].algid_name);
+				}
+				return EXIT_SUCCESS;
+			} else if (strcmp(argv[i], "--quiet") == 0 || strcmp(argv[i], "-q") == 0) {
+				quiet = true;
+			} else if (strcmp(argv[i], "--bench") == 0 || strcmp(argv[i], "-b") == 0) {
+				bench = true;
+			}
+
+		} else {
+			run_all = false;
+			for (size_t j = 0; j < sig_testcases_len; j++) {
+				if (strcmp(argv[i], sig_testcases[j].algid_name) == 0) {
+					sig_testcases[j].run = 1;
+				}
+			}
+		}
+	}
+
+	/* setup RAND */
+	rand = OQS_RAND_new(OQS_RAND_alg_urandom_chacha20);
+	if (rand == NULL) {
+		goto err;
+	}
+
+	for (size_t i = 0; i < sig_testcases_len; i++) {
+		if (run_all || sig_testcases[i].run == 1) {
+			int num_iter = sig_testcases[i].iter;
+			success = sig_test_correctness_wrapper(rand, sig_testcases[i].algid, num_iter, quiet);
+		}
+		if (success != 1) {
+			goto err;
+		}
+	}
+
+	if (bench) {
+		PRINT_TIMER_HEADER
+		for (size_t i = 0; i < sig_testcases_len; i++) {
+			if (run_all || sig_testcases[i].run == 1) {
+				sig_bench_wrapper(rand, sig_testcases[i].algid, SIG_BENCH_SECONDS);
+			}
+		}
+		PRINT_TIMER_FOOTER
+	}
+
+	success = 1;
+	goto cleanup;
+
+err:
+	success = 0;
+	eprintf("ERROR!\n");
+
+cleanup:
+	if (rand) {
+		OQS_RAND_free(rand);
+	}
+	return (success == 1) ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+#else // !ENABLE_SIG_PICNIC
+int main() {
+	printf("No signature algorithm available. Make sure configure was run properly; see Readme.md.\n");
+	return 0;
+}
+#endif
diff --git a/crypt/liboqs/sig_picnic/Makefile.am b/crypt/liboqs/sig_picnic/Makefile.am
new file mode 100644
index 0000000000000000000000000000000000000000..fc76287b3d03da4c233b0e1b0c7d8fd0d2516aa8
--- /dev/null
+++ b/crypt/liboqs/sig_picnic/Makefile.am
@@ -0,0 +1,6 @@
+AUTOMAKE_OPTIONS = foreign
+noinst_LTLIBRARIES = libpicnic_i.la
+
+libpicnic_i_la_SOURCES = sig_picnic.c
+libpicnic_i_la_CPPFLAGS = -march=native  -I../../include -Iexternal -Iexternal/build
+libpicnic_i_la_CPPFLAGS += $(AM_CPPFLAGS) 
diff --git a/crypt/liboqs/sig_picnic/build_picnic.sh b/crypt/liboqs/sig_picnic/build_picnic.sh
new file mode 100755
index 0000000000000000000000000000000000000000..54f8ecbbaec8d402d12e207698fbb379f9d06576
--- /dev/null
+++ b/crypt/liboqs/sig_picnic/build_picnic.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+mkdir -p external/build
+# check if patch has already been applied
+patch -p1 -N -f --dry-run --silent -d external < oqs_sig_picnic.patch 2>/dev/null 1>/dev/null
+if [ $? -eq 0 ];
+then
+    # apply the patch
+    echo Patching Picnic external
+    patch -p1 -N -f -d external < oqs_sig_picnic.patch
+fi
+cd external/build
+# make picnic
+cmake -UWITH_LTO -DWITH_LTO:BOOL=OFF ..
+make
+
diff --git a/crypt/liboqs/sig_picnic/oqs_sig_picnic.patch b/crypt/liboqs/sig_picnic/oqs_sig_picnic.patch
new file mode 100644
index 0000000000000000000000000000000000000000..597afd7a3c3b1f6ed9bd0ce952a4b18591c195be
--- /dev/null
+++ b/crypt/liboqs/sig_picnic/oqs_sig_picnic.patch
@@ -0,0 +1,3730 @@
+diff --git a/bitstream.c b/bitstream.c
+index 69f5dfd..3bbd97d 100644
+--- a/bitstream.c
++++ b/bitstream.c
+@@ -13,7 +13,7 @@
+ 
+ #include "bitstream.h"
+ 
+-bitstream_value_t bitstream_get_bits(bitstream_t* bs, unsigned int num_bits) {
++bitstream_value_t oqs_sig_picnic_bitstream_get_bits(bitstream_t* bs, unsigned int num_bits) {
+   const uint8_t* p              = &bs->buffer[bs->position / 8];
+   const unsigned int skip_bits  = bs->position % 8;
+   const unsigned int start_bits = 8 - skip_bits;
+@@ -37,7 +37,7 @@ bitstream_value_t bitstream_get_bits(bitstream_t* bs, unsigned int num_bits) {
+   return ret;
+ }
+ 
+-int bitstream_put_bits(bitstream_t* bs, bitstream_value_t value, unsigned int num_bits) {
++int oqs_sig_picnic_bitstream_put_bits(bitstream_t* bs, bitstream_value_t value, unsigned int num_bits) {
+   const unsigned int skip_bits = bs->position % 8;
+   uint8_t* p                   = &bs->buffer[bs->position / 8];
+ 
+diff --git a/bitstream.h b/bitstream.h
+index 551c90c..134ecad 100644
+--- a/bitstream.h
++++ b/bitstream.h
+@@ -19,7 +19,7 @@ typedef struct {
+   size_t position;
+ } bitstream_t;
+ 
+-bitstream_value_t bitstream_get_bits(bitstream_t* bs, unsigned int num_bits);
+-int bitstream_put_bits(bitstream_t* bs, bitstream_value_t value, unsigned int num_bits);
++bitstream_value_t oqs_sig_picnic_bitstream_get_bits(bitstream_t* bs, unsigned int num_bits);
++int oqs_sig_picnic_bitstream_put_bits(bitstream_t* bs, bitstream_value_t value, unsigned int num_bits);
+ 
+ #endif
+diff --git a/io.c b/io.c
+index 0630aab..07d8fb4 100644
+--- a/io.c
++++ b/io.c
+@@ -15,7 +15,7 @@
+ 
+ #include "compat.h"
+ 
+-void mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, unsigned len) {
++void oqs_sig_picnic_mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, unsigned len) {
+   const size_t word_count = len / sizeof(uint64_t);
+   const uint64_t* rows    = &CONST_FIRST_ROW(data)[word_count - 1];
+   uint64_t* wdst          = (uint64_t*)dst;
+@@ -25,7 +25,7 @@ void mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, unsigned len) {
+   }
+ }
+ 
+-void mzd_from_char_array(mzd_local_t* result, const uint8_t* data, unsigned len) {
++void oqs_sig_picnic_mzd_from_char_array(mzd_local_t* result, const uint8_t* data, unsigned len) {
+   const size_t word_count = len / sizeof(uint64_t);
+   uint64_t* rows          = &FIRST_ROW(result)[word_count - 1];
+   const uint64_t* wsrc    = (const uint64_t*)data;
+diff --git a/io.h b/io.h
+index 5544302..2799a75 100644
+--- a/io.h
++++ b/io.h
+@@ -15,8 +15,8 @@
+ 
+ #include "mzd_additional.h"
+ 
+-void mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, unsigned numbytes);
+-void mzd_from_char_array(mzd_local_t* result, const uint8_t* data, unsigned len);
++void oqs_sig_picnic_mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, unsigned numbytes);
++void oqs_sig_picnic_mzd_from_char_array(mzd_local_t* result, const uint8_t* data, unsigned len);
+ 
+ void print_hex(FILE* out, const uint8_t* data, size_t len);
+ 
+diff --git a/kdf_shake.c b/kdf_shake.c
+index 14b6dbb..572b402 100644
+--- a/kdf_shake.c
++++ b/kdf_shake.c
+@@ -13,7 +13,7 @@
+ 
+ #include "kdf_shake.h"
+ 
+-void hash_init(hash_context* ctx, const picnic_instance_t* pp) {
++void oqs_sig_picnic_hash_init(hash_context* ctx, const picnic_instance_t* pp) {
+   if (pp->security_level == 64) {
+     Keccak_HashInitialize_SHAKE128(ctx);
+   } else {
+diff --git a/kdf_shake.h b/kdf_shake.h
+index 82152fd..9fd44a0 100644
+--- a/kdf_shake.h
++++ b/kdf_shake.h
+@@ -35,15 +35,15 @@
+ 
+ typedef Keccak_HashInstance hash_context;
+ 
+-void hash_init(hash_context* ctx, const picnic_instance_t* pp);
++void oqs_sig_picnic_hash_init(hash_context* ctx, const picnic_instance_t* pp);
+ 
+-#define hash_update(ctx, data, size) Keccak_HashUpdate((ctx), (data), (size) << 3)
+-#define hash_final(ctx) Keccak_HashFinal((ctx), NULL)
+-#define hash_squeeze(buffer, buflen, ctx) Keccak_HashSqueeze((ctx), (buffer), (buflen) << 3)
++#define hash_update(ctx, data, size) oqs_sig_picnic_Keccak_HashUpdate((ctx), (data), (size) << 3)
++#define hash_final(ctx) oqs_sig_picnic_Keccak_HashFinal((ctx), NULL)
++#define hash_squeeze(buffer, buflen, ctx) oqs_sig_picnic_Keccak_HashSqueeze((ctx), (buffer), (buflen) << 3)
+ 
+ typedef Keccak_HashInstance kdf_shake_t;
+ 
+-#define kdf_shake_init(ctx, pp) hash_init((ctx), (pp))
++#define kdf_shake_init(ctx, pp) oqs_sig_picnic_hash_init((ctx), (pp))
+ #define kdf_shake_update_key(ctx, key, keylen) hash_update((ctx), (key), (keylen))
+ #define kdf_shake_finalize_key(ctx) hash_final((ctx))
+ #define kdf_shake_get_randomness(ctx, dst, count) hash_squeeze((dst), (count), (ctx))
+diff --git a/lowmc.c b/lowmc.c
+index d4e5387..2d7a036 100644
+--- a/lowmc.c
++++ b/lowmc.c
+@@ -47,46 +47,46 @@ static void sbox_layer_uint64(mzd_local_t* y, mzd_local_t const* x, mask_t const
+ #ifdef WITH_CUSTOM_INSTANCES
+ static void sbox_layer_bitsliced(mzd_local_t* out, mzd_local_t const* in, mask_t const* mask) {
+   mzd_local_t* buffer[6] = {NULL};
+-  mzd_local_init_multiple_ex(buffer, 6, 1, in->ncols, false);
++  oqs_sig_picnic_mzd_local_init_multiple_ex(buffer, 6, 1, in->ncols, false);
+ 
+   // a
+-  mzd_local_t* x0m = mzd_and(buffer[0], mask->x0, in);
++  mzd_local_t* x0m = oqs_sig_picnic_mzd_and(buffer[0], mask->x0, in);
+   // b
+-  mzd_local_t* x1m = mzd_and(buffer[1], mask->x1, in);
++  mzd_local_t* x1m = oqs_sig_picnic_mzd_and(buffer[1], mask->x1, in);
+   // c
+-  mzd_local_t* x2m = mzd_and(buffer[2], mask->x2, in);
++  mzd_local_t* x2m = oqs_sig_picnic_mzd_and(buffer[2], mask->x2, in);
+ 
+-  mzd_shift_left(x0m, x0m, 2);
+-  mzd_shift_left(x1m, x1m, 1);
++  oqs_sig_picnic_mzd_shift_left(x0m, x0m, 2);
++  oqs_sig_picnic_mzd_shift_left(x1m, x1m, 1);
+ 
+   // b & c
+-  mzd_local_t* t0 = mzd_and(buffer[3], x1m, x2m);
++  mzd_local_t* t0 = oqs_sig_picnic_mzd_and(buffer[3], x1m, x2m);
+   // c & a
+-  mzd_local_t* t1 = mzd_and(buffer[4], x0m, x2m);
++  mzd_local_t* t1 = oqs_sig_picnic_mzd_and(buffer[4], x0m, x2m);
+   // a & b
+-  mzd_local_t* t2 = mzd_and(buffer[5], x0m, x1m);
++  mzd_local_t* t2 = oqs_sig_picnic_mzd_and(buffer[5], x0m, x1m);
+ 
+   // (b & c) ^ a
+-  mzd_xor(t0, t0, x0m);
++  oqs_sig_picnic_mzd_xor(t0, t0, x0m);
+ 
+   // (c & a) ^ a ^ b
+-  mzd_xor(t1, t1, x0m);
+-  mzd_xor(t1, t1, x1m);
++  oqs_sig_picnic_mzd_xor(t1, t1, x0m);
++  oqs_sig_picnic_mzd_xor(t1, t1, x1m);
+ 
+   // (a & b) ^ a ^ b ^c
+-  mzd_xor(t2, t2, x0m);
+-  mzd_xor(t2, t2, x1m);
+-  mzd_xor(t2, t2, x2m);
++  oqs_sig_picnic_mzd_xor(t2, t2, x0m);
++  oqs_sig_picnic_mzd_xor(t2, t2, x1m);
++  oqs_sig_picnic_mzd_xor(t2, t2, x2m);
+ 
+-  mzd_shift_right(t0, t0, 2);
+-  mzd_shift_right(t1, t1, 1);
++  oqs_sig_picnic_mzd_shift_right(t0, t0, 2);
++  oqs_sig_picnic_mzd_shift_right(t1, t1, 1);
+ 
+-  mzd_and(out, in, mask->mask);
+-  mzd_xor(out, out, t2);
+-  mzd_xor(out, out, t0);
+-  mzd_xor(out, out, t1);
++  oqs_sig_picnic_mzd_and(out, in, mask->mask);
++  oqs_sig_picnic_mzd_xor(out, out, t2);
++  oqs_sig_picnic_mzd_xor(out, out, t0);
++  oqs_sig_picnic_mzd_xor(out, out, t1);
+ 
+-  mzd_local_free_multiple(buffer);
++  oqs_sig_picnic_mzd_local_free_multiple(buffer);
+ }
+ 
+ #ifdef WITH_OPT
+@@ -264,17 +264,17 @@ static sbox_layer_impl get_sbox_layer(const lowmc_t* lowmc) {
+ #if defined(REDUCED_LINEAR_LAYER)
+ static mzd_local_t* lowmc_reduced_linear_layer(lowmc_t const* lowmc, lowmc_key_t const* lowmc_key,
+                                                mzd_local_t const* p, sbox_layer_impl sbox_layer) {
+-  mzd_local_t* x       = mzd_local_init_ex(1, lowmc->n, false);
+-  mzd_local_t* y       = mzd_local_init_ex(1, lowmc->n, false);
+-  mzd_local_t* nl_part = mzd_local_init_ex(1, lowmc->r * 32, false);
++  mzd_local_t* x       = oqs_sig_picnic_mzd_local_init_ex(1, lowmc->n, false);
++  mzd_local_t* y       = oqs_sig_picnic_mzd_local_init_ex(1, lowmc->n, false);
++  mzd_local_t* nl_part = oqs_sig_picnic_mzd_local_init_ex(1, lowmc->r * 32, false);
+ 
+-  mzd_local_copy(x, p);
++  oqs_sig_picnic_mzd_local_copy(x, p);
+ #if defined(MUL_M4RI)
+-  mzd_addmul_vl(x, lowmc_key, lowmc->k0_lookup);
+-  mzd_mul_vl(nl_part, lowmc_key, lowmc->precomputed_non_linear_part_lookup);
++  oqs_sig_picnic_mzd_addmul_vl(x, lowmc_key, lowmc->k0_lookup);
++  oqs_sig_picnic_mzd_mul_vl(nl_part, lowmc_key, lowmc->precomputed_non_linear_part_lookup);
+ #else
+-  mzd_addmul_v(x, lowmc_key, lowmc->k0_matrix);
+-  mzd_mul_v(nl_part, lowmc_key, lowmc->precomputed_non_linear_part_matrix);
++  oqs_sig_picnic_mzd_addmul_v(x, lowmc_key, lowmc->k0_matrix);
++  oqs_sig_picnic_mzd_mul_v(nl_part, lowmc_key, lowmc->precomputed_non_linear_part_matrix);
+ #endif
+ 
+   word mask                  = WORD_C(0xFFFFFFFF);
+@@ -287,28 +287,28 @@ static mzd_local_t* lowmc_reduced_linear_layer(lowmc_t const* lowmc, lowmc_key_t
+     mask = ~mask;
+ 
+ #if defined(MUL_M4RI)
+-    mzd_mul_vl(y, x, round->l_lookup);
++    oqs_sig_picnic_mzd_mul_vl(y, x, round->l_lookup);
+ #else
+-    mzd_mul_v(y, x, round->l_matrix);
++    oqs_sig_picnic_mzd_mul_v(y, x, round->l_matrix);
+ #endif
+-    mzd_xor(x, y, round->constant);
++    oqs_sig_picnic_mzd_xor(x, y, round->constant);
+   }
+ 
+-  mzd_local_free(y);
+-  mzd_local_free(nl_part);
++  oqs_sig_picnic_mzd_local_free(y);
++  oqs_sig_picnic_mzd_local_free(nl_part);
+   return x;
+ }
+ #else
+ static mzd_local_t* lowmc_plain(lowmc_t const* lowmc, lowmc_key_t const* lowmc_key,
+                                 mzd_local_t const* p, sbox_layer_impl sbox_layer) {
+-  mzd_local_t* x = mzd_local_init_ex(1, lowmc->n, false);
+-  mzd_local_t* y = mzd_local_init_ex(1, lowmc->n, false);
++  mzd_local_t* x = oqs_sig_picnic_mzd_local_init_ex(1, lowmc->n, false);
++  mzd_local_t* y = oqs_sig_picnic_mzd_local_init_ex(1, lowmc->n, false);
+ 
+-  mzd_local_copy(x, p);
++  oqs_sig_picnic_mzd_local_copy(x, p);
+ #if defined(MUL_M4RI)
+-  mzd_addmul_vl(x, lowmc_key, lowmc->k0_lookup);
++  oqs_sig_picnic_mzd_addmul_vl(x, lowmc_key, lowmc->k0_lookup);
+ #else
+-  mzd_addmul_v(x, lowmc_key, lowmc->k0_matrix);
++  oqs_sig_picnic_mzd_addmul_v(x, lowmc_key, lowmc->k0_matrix);
+ #endif
+ 
+   lowmc_round_t const* round = lowmc->rounds;
+@@ -316,24 +316,24 @@ static mzd_local_t* lowmc_plain(lowmc_t const* lowmc, lowmc_key_t const* lowmc_k
+     sbox_layer(x, x, &lowmc->mask);
+ 
+ #if defined(MUL_M4RI)
+-    mzd_mul_vl(y, x, round->l_lookup);
++    oqs_sig_picnic_mzd_mul_vl(y, x, round->l_lookup);
+ #else
+-    mzd_mul_v(y, x, round->l_matrix);
++    oqs_sig_picnic_mzd_mul_v(y, x, round->l_matrix);
+ #endif
+-    mzd_xor(x, y, round->constant);
++    oqs_sig_picnic_mzd_xor(x, y, round->constant);
+ #if defined(MUL_M4RI) && !defined(REDUCED_LINEAR_LAYER)
+-    mzd_addmul_vl(x, lowmc_key, round->k_lookup);
++    oqs_sig_picnic_mzd_addmul_vl(x, lowmc_key, round->k_lookup);
+ #else
+-    mzd_addmul_v(x, lowmc_key, round->k_matrix);
++    oqs_sig_picnic_mzd_addmul_v(x, lowmc_key, round->k_matrix);
+ #endif
+   }
+ 
+-  mzd_local_free(y);
++  oqs_sig_picnic_mzd_local_free(y);
+   return x;
+ }
+ #endif
+ 
+-mzd_local_t* lowmc_call(lowmc_t const* lowmc, lowmc_key_t const* lowmc_key, mzd_local_t const* p) {
++mzd_local_t* oqs_sig_picnic_lowmc_call(lowmc_t const* lowmc, lowmc_key_t const* lowmc_key, mzd_local_t const* p) {
+   sbox_layer_impl sbox_layer = get_sbox_layer(lowmc);
+   if (!sbox_layer) {
+     return NULL;
+diff --git a/lowmc.h b/lowmc.h
+index 38eba01..ef46d82 100644
+--- a/lowmc.h
++++ b/lowmc.h
+@@ -19,6 +19,6 @@
+  * \param  p     the plaintext
+  * \return       the ciphertext
+  */
+-mzd_local_t* lowmc_call(lowmc_t const* lowmc, lowmc_key_t const* lowmc_key, mzd_local_t const* p);
++mzd_local_t* oqs_sig_picnic_lowmc_call(lowmc_t const* lowmc, lowmc_key_t const* lowmc_key, mzd_local_t const* p);
+ 
+ #endif
+diff --git a/lowmc_128_128_20.c b/lowmc_128_128_20.c
+index 32c87a4..c79516e 100644
+--- a/lowmc_128_128_20.c
++++ b/lowmc_128_128_20.c
+@@ -5723,7 +5723,7 @@ static const mzd_local_t precomputed_round_key_matrix_non_linear_part_128_128_20
+ }};
+ 
+ #endif
+-const mzd_local_t* lowmc_128_128_20_get_linear_layer(uint32_t r) {
++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_linear_layer(uint32_t r) {
+   switch(r) {
+     default:
+       return NULL;
+@@ -5771,7 +5771,7 @@ const mzd_local_t* lowmc_128_128_20_get_linear_layer(uint32_t r) {
+ }
+ 
+ #if !defined(REDUCED_LINEAR_LAYER)
+-const mzd_local_t* lowmc_128_128_20_get_round_key(uint32_t r) {
++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_round_key(uint32_t r) {
+   switch(r) {
+     default:
+       return NULL;
+@@ -5822,7 +5822,7 @@ const mzd_local_t* lowmc_128_128_20_get_round_key(uint32_t r) {
+ #endif
+ 
+ 
+-const mzd_local_t* lowmc_128_128_20_get_round_const(uint32_t r) {
++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_round_const(uint32_t r) {
+   switch(r) {
+     default:
+       return NULL;
+@@ -5869,11 +5869,11 @@ const mzd_local_t* lowmc_128_128_20_get_round_const(uint32_t r) {
+   }
+ }
+ #if defined(REDUCED_LINEAR_LAYER)
+-const mzd_local_t* lowmc_128_128_20_get_precomputed_round_key_matrix_non_linear_part(void) {
++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_precomputed_round_key_matrix_non_linear_part(void) {
+   return &precomputed_round_key_matrix_non_linear_part_128_128_20;
+ }
+ 
+-const mzd_local_t* lowmc_128_128_20_get_precomputed_round_key_matrix_linear_part(void) {
++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_precomputed_round_key_matrix_linear_part(void) {
+   return &precomputed_round_key_matrix_linear_part_128_128_20;
+ }
+ #endif
+diff --git a/lowmc_128_128_20.h b/lowmc_128_128_20.h
+index 83a1b67..8cbd325 100644
+--- a/lowmc_128_128_20.h
++++ b/lowmc_128_128_20.h
+@@ -6,10 +6,10 @@
+ #include "mzd_additional.h"
+ 
+ 
+-const mzd_local_t* lowmc_128_128_20_get_linear_layer(uint32_t r);
+-const mzd_local_t* lowmc_128_128_20_get_round_key(uint32_t r);
+-const mzd_local_t* lowmc_128_128_20_get_round_const(uint32_t r);
+-const mzd_local_t* lowmc_128_128_20_get_precomputed_round_key_matrix_non_linear_part(void);
+-const mzd_local_t* lowmc_128_128_20_get_precomputed_round_key_matrix_linear_part(void);
++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_linear_layer(uint32_t r);
++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_round_key(uint32_t r);
++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_round_const(uint32_t r);
++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_precomputed_round_key_matrix_non_linear_part(void);
++const mzd_local_t* oqs_sig_picnic_lowmc_128_128_20_get_precomputed_round_key_matrix_linear_part(void);
+ 
+ #endif
+diff --git a/lowmc_192_192_30.c b/lowmc_192_192_30.c
+index 40a4eb2..7e82863 100644
+--- a/lowmc_192_192_30.c
++++ b/lowmc_192_192_30.c
+@@ -12415,7 +12415,7 @@ static const mzd_local_t precomputed_round_key_matrix_non_linear_part_192_192_30
+ }};
+ 
+ #endif
+-const mzd_local_t* lowmc_192_192_30_get_linear_layer(uint32_t r) {
++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_linear_layer(uint32_t r) {
+   switch(r) {
+     default:
+       return NULL;
+@@ -12483,7 +12483,7 @@ const mzd_local_t* lowmc_192_192_30_get_linear_layer(uint32_t r) {
+ }
+ 
+ #if !defined(REDUCED_LINEAR_LAYER)
+-const mzd_local_t* lowmc_192_192_30_get_round_key(uint32_t r) {
++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_round_key(uint32_t r) {
+   switch(r) {
+     default:
+       return NULL;
+@@ -12554,7 +12554,7 @@ const mzd_local_t* lowmc_192_192_30_get_round_key(uint32_t r) {
+ #endif
+ 
+ 
+-const mzd_local_t* lowmc_192_192_30_get_round_const(uint32_t r) {
++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_round_const(uint32_t r) {
+   switch(r) {
+     default:
+       return NULL;
+@@ -12621,11 +12621,11 @@ const mzd_local_t* lowmc_192_192_30_get_round_const(uint32_t r) {
+   }
+ }
+ #if defined(REDUCED_LINEAR_LAYER)
+-const mzd_local_t* lowmc_192_192_30_get_precomputed_round_key_matrix_non_linear_part(void) {
++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_precomputed_round_key_matrix_non_linear_part(void) {
+   return &precomputed_round_key_matrix_non_linear_part_192_192_30;
+ }
+ 
+-const mzd_local_t* lowmc_192_192_30_get_precomputed_round_key_matrix_linear_part(void) {
++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_precomputed_round_key_matrix_linear_part(void) {
+   return &precomputed_round_key_matrix_linear_part_192_192_30;
+ }
+ #endif
+diff --git a/lowmc_192_192_30.h b/lowmc_192_192_30.h
+index fb0169a..538166b 100644
+--- a/lowmc_192_192_30.h
++++ b/lowmc_192_192_30.h
+@@ -6,10 +6,10 @@
+ #include "mzd_additional.h"
+ 
+ 
+-const mzd_local_t* lowmc_192_192_30_get_linear_layer(uint32_t r);
+-const mzd_local_t* lowmc_192_192_30_get_round_key(uint32_t r);
+-const mzd_local_t* lowmc_192_192_30_get_round_const(uint32_t r);
+-const mzd_local_t* lowmc_192_192_30_get_precomputed_round_key_matrix_non_linear_part(void);
+-const mzd_local_t* lowmc_192_192_30_get_precomputed_round_key_matrix_linear_part(void);
++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_linear_layer(uint32_t r);
++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_round_key(uint32_t r);
++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_round_const(uint32_t r);
++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_precomputed_round_key_matrix_non_linear_part(void);
++const mzd_local_t* oqs_sig_picnic_lowmc_192_192_30_get_precomputed_round_key_matrix_linear_part(void);
+ 
+ #endif
+diff --git a/lowmc_256_256_38.c b/lowmc_256_256_38.c
+index fd2284f..092d6f0 100644
+--- a/lowmc_256_256_38.c
++++ b/lowmc_256_256_38.c
+@@ -20623,7 +20623,7 @@ static const mzd_local_t precomputed_round_key_matrix_non_linear_part_256_256_38
+ }};
+ 
+ #endif
+-const mzd_local_t* lowmc_256_256_38_get_linear_layer(uint32_t r) {
++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_linear_layer(uint32_t r) {
+   switch(r) {
+     default:
+       return NULL;
+@@ -20707,7 +20707,7 @@ const mzd_local_t* lowmc_256_256_38_get_linear_layer(uint32_t r) {
+ }
+ 
+ #if !defined(REDUCED_LINEAR_LAYER)
+-const mzd_local_t* lowmc_256_256_38_get_round_key(uint32_t r) {
++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_round_key(uint32_t r) {
+   switch(r) {
+     default:
+       return NULL;
+@@ -20794,7 +20794,7 @@ const mzd_local_t* lowmc_256_256_38_get_round_key(uint32_t r) {
+ #endif
+ 
+ 
+-const mzd_local_t* lowmc_256_256_38_get_round_const(uint32_t r) {
++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_round_const(uint32_t r) {
+   switch(r) {
+     default:
+       return NULL;
+@@ -20877,11 +20877,11 @@ const mzd_local_t* lowmc_256_256_38_get_round_const(uint32_t r) {
+   }
+ }
+ #if defined(REDUCED_LINEAR_LAYER)
+-const mzd_local_t* lowmc_256_256_38_get_precomputed_round_key_matrix_non_linear_part(void) {
++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_precomputed_round_key_matrix_non_linear_part(void) {
+   return &precomputed_round_key_matrix_non_linear_part_256_256_38;
+ }
+ 
+-const mzd_local_t* lowmc_256_256_38_get_precomputed_round_key_matrix_linear_part(void) {
++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_precomputed_round_key_matrix_linear_part(void) {
+   return &precomputed_round_key_matrix_linear_part_256_256_38;
+ }
+ #endif
+diff --git a/lowmc_256_256_38.h b/lowmc_256_256_38.h
+index 6bb0c59..b0c0afc 100644
+--- a/lowmc_256_256_38.h
++++ b/lowmc_256_256_38.h
+@@ -6,10 +6,10 @@
+ #include "mzd_additional.h"
+ 
+ 
+-const mzd_local_t* lowmc_256_256_38_get_linear_layer(uint32_t r);
+-const mzd_local_t* lowmc_256_256_38_get_round_key(uint32_t r);
+-const mzd_local_t* lowmc_256_256_38_get_round_const(uint32_t r);
+-const mzd_local_t* lowmc_256_256_38_get_precomputed_round_key_matrix_non_linear_part(void);
+-const mzd_local_t* lowmc_256_256_38_get_precomputed_round_key_matrix_linear_part(void);
++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_linear_layer(uint32_t r);
++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_round_key(uint32_t r);
++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_round_const(uint32_t r);
++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_precomputed_round_key_matrix_non_linear_part(void);
++const mzd_local_t* oqs_sig_picnic_lowmc_256_256_38_get_precomputed_round_key_matrix_linear_part(void);
+ 
+ #endif
+diff --git a/lowmc_pars.c b/lowmc_pars.c
+index 24bfb76..6b59834 100644
+--- a/lowmc_pars.c
++++ b/lowmc_pars.c
+@@ -38,20 +38,20 @@
+ #include <string.h>
+ 
+ static mask_t* prepare_masks(mask_t* mask, unsigned int n, unsigned int m) {
+-  mask->x0   = mzd_local_init(1, n);
+-  mask->x1   = mzd_local_init_ex(1, n, false);
+-  mask->x2   = mzd_local_init_ex(1, n, false);
+-  mask->mask = mzd_local_init(1, n);
++  mask->x0   = oqs_sig_picnic_mzd_local_init(1, n);
++  mask->x1   = oqs_sig_picnic_mzd_local_init_ex(1, n, false);
++  mask->x2   = oqs_sig_picnic_mzd_local_init_ex(1, n, false);
++  mask->mask = oqs_sig_picnic_mzd_local_init(1, n);
+ 
+   const unsigned int bound = n - 3 * m;
+   for (unsigned int i = 0; i < bound; ++i) {
+-    mzd_local_write_bit(mask->mask, 0, i, 1);
++    oqs_sig_picnic_mzd_local_write_bit(mask->mask, 0, i, 1);
+   }
+   for (unsigned int i = bound; i < n; i += 3) {
+-    mzd_local_write_bit(mask->x0, 0, i, 1);
++    oqs_sig_picnic_mzd_local_write_bit(mask->x0, 0, i, 1);
+   }
+-  mzd_shift_left(mask->x1, mask->x0, 1);
+-  mzd_shift_left(mask->x2, mask->x0, 2);
++  oqs_sig_picnic_mzd_shift_left(mask->x1, mask->x0, 1);
++  oqs_sig_picnic_mzd_shift_left(mask->x2, mask->x0, 2);
+ 
+   mask->x0i   = FIRST_ROW(mask->x0)[n / 64 - 1];
+   mask->x1i   = FIRST_ROW(mask->x1)[n / 64 - 1];
+@@ -61,7 +61,7 @@ static mask_t* prepare_masks(mask_t* mask, unsigned int n, unsigned int m) {
+   return mask;
+ }
+ 
+-bool lowmc_init(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r, unsigned int k) {
++bool oqs_sig_picnic_lowmc_init(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r, unsigned int k) {
+   if (!lowmc) {
+     return false;
+   }
+@@ -80,19 +80,19 @@ bool lowmc_init(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r,
+ 
+ #define LOAD_OPT(N, K, R)                                                                          \
+   lowmc->precomputed_non_linear_part_matrix =                                                      \
+-      lowmc_##N##_##K##_##R##_get_precomputed_round_key_matrix_non_linear_part();                  \
+-  lowmc->k0_matrix = lowmc_##N##_##K##_##R##_get_precomputed_round_key_matrix_linear_part()
++      oqs_sig_picnic_lowmc_##N##_##K##_##R##_get_precomputed_round_key_matrix_non_linear_part();                  \
++  lowmc->k0_matrix = oqs_sig_picnic_lowmc_##N##_##K##_##R##_get_precomputed_round_key_matrix_linear_part()
+ 
+ #define LOAD(N, K, R)                                                                              \
+-  lowmc->k0_matrix = lowmc_##N##_##K##_##R##_get_round_key(0);                                     \
++  lowmc->k0_matrix = oqs_sig_picnic_lowmc_##N##_##K##_##R##_get_round_key(0);                                     \
+   for (unsigned int i = 0; i < (R); ++i) {                                                         \
+-    lowmc->rounds[i].k_matrix = lowmc_##N##_##K##_##R##_get_round_key(i + 1);                      \
++    lowmc->rounds[i].k_matrix = oqs_sig_picnic_lowmc_##N##_##K##_##R##_get_round_key(i + 1);                      \
+   }
+ 
+ #define LOAD_FROM_FIXED_IMPL(N, K, R, PREC)                                                        \
+   for (unsigned int i = 0; i < (R); ++i) {                                                         \
+-    lowmc->rounds[i].l_matrix = lowmc_##N##_##K##_##R##_get_linear_layer(i);                       \
+-    lowmc->rounds[i].constant = lowmc_##N##_##K##_##R##_get_round_const(i);                        \
++    lowmc->rounds[i].l_matrix = oqs_sig_picnic_lowmc_##N##_##K##_##R##_get_linear_layer(i);                       \
++    lowmc->rounds[i].constant = oqs_sig_picnic_lowmc_##N##_##K##_##R##_get_round_const(i);                        \
+   }                                                                                                \
+   LOAD##PREC(N, K, R);
+ 
+@@ -127,29 +127,29 @@ bool lowmc_init(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r,
+   }
+ #endif
+ 
+-  lowmc_clear(lowmc);
++  oqs_sig_picnic_lowmc_clear(lowmc);
+   return false;
+ 
+ precomp:
+ 
+ #ifdef MUL_M4RI
+-  lowmc->k0_lookup = mzd_precompute_matrix_lookup(lowmc->k0_matrix);
++  lowmc->k0_lookup = oqs_sig_picnic_mzd_precompute_matrix_lookup(lowmc->k0_matrix);
+ #ifdef REDUCED_LINEAR_LAYER
+   lowmc->precomputed_non_linear_part_lookup =
+-      mzd_precompute_matrix_lookup(lowmc->precomputed_non_linear_part_matrix);
++      oqs_sig_picnic_mzd_precompute_matrix_lookup(lowmc->precomputed_non_linear_part_matrix);
+ #endif
+ #endif
+ #ifdef MUL_M4RI
+   for (unsigned int i = 0; i < r; ++i) {
+-    lowmc->rounds[i].l_lookup = mzd_precompute_matrix_lookup(lowmc->rounds[i].l_matrix);
++    lowmc->rounds[i].l_lookup = oqs_sig_picnic_mzd_precompute_matrix_lookup(lowmc->rounds[i].l_matrix);
+ #if !defined(REDUCED_LINEAR_LAYER)
+-    lowmc->rounds[i].k_lookup = mzd_precompute_matrix_lookup(lowmc->rounds[i].k_matrix);
++    lowmc->rounds[i].k_lookup = oqs_sig_picnic_mzd_precompute_matrix_lookup(lowmc->rounds[i].k_matrix);
+ #endif
+   }
+ #endif
+ 
+   if (!prepare_masks(&lowmc->mask, n, m)) {
+-    lowmc_clear(lowmc);
++    oqs_sig_picnic_lowmc_clear(lowmc);
+     return false;
+   }
+ 
+@@ -164,7 +164,7 @@ static mzd_local_t* readMZD_TStructFromFile(FILE* file) {
+   ret += fread(&(nrows), sizeof(uint32_t), 1, file);
+   ret += fread(&(ncols), sizeof(uint32_t), 1, file);
+ 
+-  mzd_local_t* A = mzd_local_init_ex(nrows, ncols, false);
++  mzd_local_t* A = oqs_sig_picnic_mzd_local_init_ex(nrows, ncols, false);
+   for (unsigned int i = 0; i < A->nrows; i++) {
+     ret += fread(ROW(A, i), A->rowstride * sizeof(word), 1, file);
+   }
+@@ -172,7 +172,7 @@ static mzd_local_t* readMZD_TStructFromFile(FILE* file) {
+   return A;
+ }
+ 
+-bool lowmc_read_file(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r,
++bool oqs_sig_picnic_lowmc_read_file(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r,
+                      unsigned int k) {
+   if (!lowmc) {
+     return false;
+@@ -217,40 +217,40 @@ bool lowmc_read_file(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned in
+ }
+ #endif
+ 
+-void lowmc_clear(lowmc_t* lowmc) {
++void oqs_sig_picnic_lowmc_clear(lowmc_t* lowmc) {
+   for (unsigned int i = 0; i < lowmc->r; ++i) {
+ #ifdef MUL_M4RI
+ #if !defined(REDUCED_LINEAR_LAYER)
+-    mzd_local_free(lowmc->rounds[i].k_lookup);
++    oqs_sig_picnic_mzd_local_free(lowmc->rounds[i].k_lookup);
+ #endif
+-    mzd_local_free(lowmc->rounds[i].l_lookup);
++    oqs_sig_picnic_mzd_local_free(lowmc->rounds[i].l_lookup);
+ #endif
+     if (lowmc->needs_free) {
+-      mzd_local_free((mzd_local_t*)lowmc->rounds[i].constant);
++      oqs_sig_picnic_mzd_local_free((mzd_local_t*)lowmc->rounds[i].constant);
+ #if !defined(REDUCED_LINEAR_LAYER)
+-      mzd_local_free((mzd_local_t*)lowmc->rounds[i].k_matrix);
++      oqs_sig_picnic_mzd_local_free((mzd_local_t*)lowmc->rounds[i].k_matrix);
+ #endif
+-      mzd_local_free((mzd_local_t*)lowmc->rounds[i].l_matrix);
++      oqs_sig_picnic_mzd_local_free((mzd_local_t*)lowmc->rounds[i].l_matrix);
+     }
+   }
+ #ifdef REDUCED_LINEAR_LAYER
+   if (lowmc->needs_free) {
+-    mzd_local_free((mzd_local_t*)lowmc->precomputed_non_linear_part_matrix);
++    oqs_sig_picnic_mzd_local_free((mzd_local_t*)lowmc->precomputed_non_linear_part_matrix);
+   }
+ #endif
+ #ifdef MUL_M4RI
+-  mzd_local_free(lowmc->k0_lookup);
++  oqs_sig_picnic_mzd_local_free(lowmc->k0_lookup);
+ #ifdef REDUCED_LINEAR_LAYER
+-  mzd_local_free(lowmc->precomputed_non_linear_part_lookup);
++  oqs_sig_picnic_mzd_local_free(lowmc->precomputed_non_linear_part_lookup);
+ #endif
+ #endif
+   if (lowmc->needs_free) {
+-    mzd_local_free((mzd_local_t*)lowmc->k0_matrix);
++    oqs_sig_picnic_mzd_local_free((mzd_local_t*)lowmc->k0_matrix);
+   }
+   free(lowmc->rounds);
+ 
+-  mzd_local_free(lowmc->mask.x0);
+-  mzd_local_free(lowmc->mask.x1);
+-  mzd_local_free(lowmc->mask.x2);
+-  mzd_local_free(lowmc->mask.mask);
++  oqs_sig_picnic_mzd_local_free(lowmc->mask.x0);
++  oqs_sig_picnic_mzd_local_free(lowmc->mask.x1);
++  oqs_sig_picnic_mzd_local_free(lowmc->mask.x2);
++  oqs_sig_picnic_mzd_local_free(lowmc->mask.mask);
+ }
+diff --git a/lowmc_pars.h b/lowmc_pars.h
+index 0adaca8..429d98a 100644
+--- a/lowmc_pars.h
++++ b/lowmc_pars.h
+@@ -72,16 +72,16 @@ typedef struct {
+  *
+  * \return parameters defining a LowMC instance (including a key)
+  */
+-bool lowmc_init(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r, unsigned int k);
++bool oqs_sig_picnic_lowmc_init(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r, unsigned int k);
+ 
+ /**
+  * Clears the allocated LowMC parameters
+  *
+  * \param lowmc the LowMC parameters to be cleared
+  */
+-void lowmc_clear(lowmc_t* lowmc);
++void oqs_sig_picnic_lowmc_clear(lowmc_t* lowmc);
+ 
+-bool lowmc_read_file(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r,
++bool oqs_sig_picnic_lowmc_read_file(lowmc_t* lowmc, unsigned int m, unsigned int n, unsigned int r,
+                      unsigned int k);
+ 
+ #endif
+diff --git a/mpc.c b/mpc.c
+index db17ffb..095a901 100644
+--- a/mpc.c
++++ b/mpc.c
+@@ -22,32 +22,32 @@
+ 
+ #include <string.h>
+ 
+-void mpc_clear(mzd_local_t* const* res, unsigned sc) {
++void oqs_sig_picnic_mpc_clear(mzd_local_t* const* res, unsigned sc) {
+   for (unsigned int i = 0; i < sc; i++) {
+-    mzd_local_clear(res[i]);
++    oqs_sig_picnic_mzd_local_clear(res[i]);
+   }
+ }
+ 
+-void mpc_shift_right(mzd_local_t* const* res, mzd_local_t* const* val, unsigned count,
++void oqs_sig_picnic_mpc_shift_right(mzd_local_t* const* res, mzd_local_t* const* val, unsigned count,
+                      unsigned sc) {
+-  MPC_LOOP_CONST(mzd_shift_right, res, val, count, sc);
++  MPC_LOOP_CONST(oqs_sig_picnic_mzd_shift_right, res, val, count, sc);
+ }
+ 
+-void mpc_shift_left(mzd_local_t* const* res, mzd_local_t* const* val, unsigned count, unsigned sc) {
+-  MPC_LOOP_CONST(mzd_shift_left, res, val, count, sc);
++void oqs_sig_picnic_mpc_shift_left(mzd_local_t* const* res, mzd_local_t* const* val, unsigned count, unsigned sc) {
++  MPC_LOOP_CONST(oqs_sig_picnic_mzd_shift_left, res, val, count, sc);
+ }
+ 
+-void mpc_and_const(mzd_local_t* const* result, mzd_local_t* const* first, mzd_local_t const* second,
++void oqs_sig_picnic_mpc_and_const(mzd_local_t* const* result, mzd_local_t* const* first, mzd_local_t const* second,
+                    unsigned sc) {
+-  MPC_LOOP_CONST(mzd_xor, result, first, second, sc);
++  MPC_LOOP_CONST(oqs_sig_picnic_mzd_xor, result, first, second, sc);
+ }
+ 
+-void mpc_xor(mzd_local_t* const* result, mzd_local_t* const* first, mzd_local_t* const* second,
++void oqs_sig_picnic_mpc_xor(mzd_local_t* const* result, mzd_local_t* const* first, mzd_local_t* const* second,
+              unsigned sc) {
+-  MPC_LOOP_SHARED(mzd_xor, result, first, second, sc);
++  MPC_LOOP_SHARED(oqs_sig_picnic_mzd_xor, result, first, second, sc);
+ }
+ 
+-void mpc_and_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second, uint64_t const* r,
++void oqs_sig_picnic_mpc_and_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second, uint64_t const* r,
+                     view_t* view, unsigned viewshift) {
+   for (unsigned m = 0; m < SC_PROOF; ++m) {
+     const unsigned j = (m + 1) % SC_PROOF;
+@@ -96,27 +96,27 @@ void mpc_and_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second
+ #ifdef WITH_SSE2
+ #ifdef WITH_CUSTOM_INSTANCES
+ ATTRIBUTE_TARGET("sse2")
+-void mpc_and_sse(__m128i* res, __m128i const* first, __m128i const* second, __m128i const* r,
++void oqs_sig_picnic_mpc_and_sse(__m128i* res, __m128i const* first, __m128i const* second, __m128i const* r,
+                  view_t* view, unsigned viewshift) {
+   mpc_and_def(__m128i, _mm_and_si128, _mm_xor_si128, mm128_shift_right);
+ }
+ 
+ ATTRIBUTE_TARGET("sse2")
+-void mpc_and_256_sse(__m128i res[SC_PROOF][2], __m128i const first[SC_PROOF][2],
++void oqs_sig_picnic_mpc_and_256_sse(__m128i res[SC_PROOF][2], __m128i const first[SC_PROOF][2],
+                      __m128i const second[SC_PROOF][2], __m128i const r[SC_PROOF][2], view_t* view,
+                      unsigned viewshift) {
+   mpc_and_def_multiple(__m128i, mm256_and_sse, mm256_xor_sse, mm256_shift_right_sse, 2);
+ }
+ 
+ ATTRIBUTE_TARGET("sse2")
+-void mpc_and_384_sse(__m128i res[SC_PROOF][3], __m128i const first[SC_PROOF][3],
++void oqs_sig_picnic_mpc_and_384_sse(__m128i res[SC_PROOF][3], __m128i const first[SC_PROOF][3],
+                      __m128i const second[SC_PROOF][3], __m128i const r[SC_PROOF][3], view_t* view,
+                      unsigned viewshift) {
+   mpc_and_def_multiple(__m128i, mm384_and_sse, mm384_xor_sse, mm384_shift_right_sse, 3);
+ }
+ 
+ ATTRIBUTE_TARGET("sse2")
+-void mpc_and_512_sse(__m128i res[SC_PROOF][4], __m128i const first[SC_PROOF][4],
++void oqs_sig_picnic_mpc_and_512_sse(__m128i res[SC_PROOF][4], __m128i const first[SC_PROOF][4],
+                      __m128i const second[SC_PROOF][4], __m128i const r[SC_PROOF][4], view_t* view,
+                      unsigned viewshift) {
+   mpc_and_def_multiple(__m128i, mm512_and_sse, mm512_xor_sse, mm512_shift_right_sse, 4);
+@@ -169,7 +169,7 @@ void mpc_and_512_neon(uint32x4_t res[SC_PROOF][4], uint32x4_t const first[SC_PRO
+ #endif
+ #endif
+ 
+-void mpc_and(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second,
++void oqs_sig_picnic_mpc_and(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second,
+              mzd_local_t* const* r, view_t* view, unsigned viewshift, mzd_local_t* const* buffer) {
+   mzd_local_t* b = buffer[0];
+ 
+@@ -177,26 +177,26 @@ void mpc_and(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* co
+     const unsigned j = (m + 1) % SC_PROOF;
+ 
+     // f[m] & s[m]
+-    mzd_and(res[m], first[m], second[m]);
++    oqs_sig_picnic_mzd_and(res[m], first[m], second[m]);
+ 
+     // f[m + 1] & s[m]
+-    mzd_and(b, first[j], second[m]);
+-    mzd_xor(res[m], res[m], b);
++    oqs_sig_picnic_mzd_and(b, first[j], second[m]);
++    oqs_sig_picnic_mzd_xor(res[m], res[m], b);
+ 
+     // f[m] & s[m + 1]
+-    mzd_and(b, first[m], second[j]);
+-    mzd_xor(res[m], res[m], b);
++    oqs_sig_picnic_mzd_and(b, first[m], second[j]);
++    oqs_sig_picnic_mzd_xor(res[m], res[m], b);
+ 
+     // ... ^ r[m] ^ r[m + 1]
+-    mzd_xor(res[m], res[m], r[m]);
+-    mzd_xor(res[m], res[m], r[j]);
++    oqs_sig_picnic_mzd_xor(res[m], res[m], r[m]);
++    oqs_sig_picnic_mzd_xor(res[m], res[m], r[j]);
+   }
+ 
+-  mpc_shift_right(buffer, res, viewshift, SC_PROOF);
+-  mpc_xor(view->s, view->s, buffer, SC_PROOF);
++  oqs_sig_picnic_mpc_shift_right(buffer, res, viewshift, SC_PROOF);
++  oqs_sig_picnic_mpc_xor(view->s, view->s, buffer, SC_PROOF);
+ }
+ 
+-void mpc_and_verify_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second,
++void oqs_sig_picnic_mpc_and_verify_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second,
+                            uint64_t const* r, view_t* view, uint64_t const mask,
+                            unsigned viewshift) {
+   for (unsigned m = 0; m < (SC_VERIFY - 1); ++m) {
+@@ -312,13 +312,13 @@ void mpc_and_verify_512_avx(__m256i res[SC_VERIFY][2], __m256i const first[SC_VE
+ 
+ #ifdef WITH_NEON
+ #ifdef WITH_CUSTOM_INSTANCES
+-void mpc_and_verify_neon(uint32x4_t* res, uint32x4_t const* first, uint32x4_t const* second,
++void oqs_sig_picnic_mpc_and_verify_neon(uint32x4_t* res, uint32x4_t const* first, uint32x4_t const* second,
+                          uint32x4_t const* r, view_t* view, uint32x4_t const mask,
+                          unsigned viewshift) {
+   mpc_and_verify_def(uint32x4_t, vandq_u32, veorq_u32, mm128_shift_right, mm128_shift_left);
+ }
+ 
+-void mpc_and_verify_256_neon(uint32x4_t res[SC_VERIFY][2], uint32x4_t const first[SC_VERIFY][2],
++void oqs_sig_picnic_mpc_and_verify_256_neon(uint32x4_t res[SC_VERIFY][2], uint32x4_t const first[SC_VERIFY][2],
+                              uint32x4_t const second[SC_VERIFY][2],
+                              uint32x4_t const r[SC_VERIFY][2], view_t* view, uint32x4_t const* mask,
+                              unsigned viewshift) {
+@@ -326,7 +326,7 @@ void mpc_and_verify_256_neon(uint32x4_t res[SC_VERIFY][2], uint32x4_t const firs
+                               2);
+ }
+ 
+-void mpc_and_verify_384_neon(uint32x4_t res[SC_VERIFY][3], uint32x4_t const first[SC_VERIFY][3],
++void oqs_sig_picnic_mpc_and_verify_384_neon(uint32x4_t res[SC_VERIFY][3], uint32x4_t const first[SC_VERIFY][3],
+                              uint32x4_t const second[SC_VERIFY][3],
+                              uint32x4_t const r[SC_VERIFY][3], view_t* view, uint32x4_t const* mask,
+                              unsigned viewshift) {
+@@ -334,7 +334,7 @@ void mpc_and_verify_384_neon(uint32x4_t res[SC_VERIFY][3], uint32x4_t const firs
+                               3);
+ }
+ 
+-void mpc_and_verify_512_neon(uint32x4_t res[SC_VERIFY][4], uint32x4_t const first[SC_VERIFY][4],
++void oqs_sig_picnic_mpc_and_verify_512_neon(uint32x4_t res[SC_VERIFY][4], uint32x4_t const first[SC_VERIFY][4],
+                              uint32x4_t const second[SC_VERIFY][4],
+                              uint32x4_t const r[SC_VERIFY][4], view_t* view, uint32x4_t const* mask,
+                              unsigned viewshift) {
+@@ -345,7 +345,7 @@ void mpc_and_verify_512_neon(uint32x4_t res[SC_VERIFY][4], uint32x4_t const firs
+ #endif
+ #endif
+ 
+-void mpc_and_verify(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second,
++void oqs_sig_picnic_mpc_and_verify(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second,
+                     mzd_local_t* const* r, view_t* view, mzd_local_t const* mask,
+                     unsigned viewshift, mzd_local_t* const* buffer) {
+   mzd_local_t* b = buffer[0];
+@@ -353,29 +353,29 @@ void mpc_and_verify(mzd_local_t* const* res, mzd_local_t* const* first, mzd_loca
+   for (unsigned m = 0; m < (SC_VERIFY - 1); ++m) {
+     const unsigned j = m + 1;
+ 
+-    mzd_and(res[m], first[m], second[m]);
++    oqs_sig_picnic_mzd_and(res[m], first[m], second[m]);
+ 
+-    mzd_and(b, first[j], second[m]);
+-    mzd_xor(res[m], res[m], b);
++    oqs_sig_picnic_mzd_and(b, first[j], second[m]);
++    oqs_sig_picnic_mzd_xor(res[m], res[m], b);
+ 
+-    mzd_and(b, first[m], second[j]);
+-    mzd_xor(res[m], res[m], b);
++    oqs_sig_picnic_mzd_and(b, first[m], second[j]);
++    oqs_sig_picnic_mzd_xor(res[m], res[m], b);
+ 
+-    mzd_xor(res[m], res[m], r[m]);
+-    mzd_xor(res[m], res[m], r[j]);
++    oqs_sig_picnic_mzd_xor(res[m], res[m], r[m]);
++    oqs_sig_picnic_mzd_xor(res[m], res[m], r[j]);
+   }
+ 
+   for (unsigned m = 0; m < (SC_VERIFY - 1); ++m) {
+-    mzd_shift_right(b, res[m], viewshift);
+-    mzd_xor(view->s[m], view->s[m], b);
++    oqs_sig_picnic_mzd_shift_right(b, res[m], viewshift);
++    oqs_sig_picnic_mzd_xor(view->s[m], view->s[m], b);
+   }
+ 
+-  mzd_shift_left(res[SC_VERIFY - 1], view->s[SC_VERIFY - 1], viewshift);
+-  mzd_and(res[SC_VERIFY - 1], res[SC_VERIFY - 1], mask);
++  oqs_sig_picnic_mzd_shift_left(res[SC_VERIFY - 1], view->s[SC_VERIFY - 1], viewshift);
++  oqs_sig_picnic_mzd_and(res[SC_VERIFY - 1], res[SC_VERIFY - 1], mask);
+ }
+ 
+-void mpc_copy(mzd_local_t** out, mzd_local_t* const* in, unsigned sc) {
++void oqs_sig_picnic_mpc_copy(mzd_local_t** out, mzd_local_t* const* in, unsigned sc) {
+   for (unsigned i = 0; i < sc; ++i) {
+-    mzd_local_copy(out[i], in[i]);
++    oqs_sig_picnic_mzd_local_copy(out[i], in[i]);
+   }
+ }
+diff --git a/mpc.h b/mpc.h
+index 7539e90..8b7a2d7 100644
+--- a/mpc.h
++++ b/mpc.h
+@@ -48,32 +48,32 @@ typedef view_t rvec_t;
+     }                                                                                              \
+   } while (0)
+ 
+-void mpc_shift_right(mzd_local_t* const* res, mzd_local_t* const* val, unsigned count,
++void oqs_sig_picnic_mpc_shift_right(mzd_local_t* const* res, mzd_local_t* const* val, unsigned count,
+                      unsigned sc) ATTR_NONNULL;
+ 
+-void mpc_shift_left(mzd_local_t* const* res, mzd_local_t* const* val, unsigned count,
++void oqs_sig_picnic_mpc_shift_left(mzd_local_t* const* res, mzd_local_t* const* val, unsigned count,
+                     unsigned sc) ATTR_NONNULL;
+ 
+-void mpc_and_const(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t const* second,
++void oqs_sig_picnic_mpc_and_const(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t const* second,
+                    unsigned sc) ATTR_NONNULL;
+ 
+-void mpc_xor(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second,
++void oqs_sig_picnic_mpc_xor(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second,
+              unsigned sc) ATTR_NONNULL;
+ 
+-void mpc_clear(mzd_local_t* const* res, unsigned sc) ATTR_NONNULL;
++void oqs_sig_picnic_mpc_clear(mzd_local_t* const* res, unsigned sc) ATTR_NONNULL;
+ 
+-void mpc_and(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second,
++void oqs_sig_picnic_mpc_and(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second,
+              mzd_local_t* const* r, view_t* view, unsigned viewshift,
+              mzd_local_t* const* buffer) ATTR_NONNULL;
+ 
+-void mpc_and_verify(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second,
++void oqs_sig_picnic_mpc_and_verify(mzd_local_t* const* res, mzd_local_t* const* first, mzd_local_t* const* second,
+                     mzd_local_t* const* r, view_t* view, mzd_local_t const* mask,
+                     unsigned viewshift, mzd_local_t* const* buffer) ATTR_NONNULL;
+ 
+-void mpc_and_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second, uint64_t const* r,
++void oqs_sig_picnic_mpc_and_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second, uint64_t const* r,
+                     view_t* view, unsigned viewshift) ATTR_NONNULL;
+ 
+-void mpc_and_verify_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second,
++void oqs_sig_picnic_mpc_and_verify_uint64(uint64_t* res, uint64_t const* first, uint64_t const* second,
+                            uint64_t const* r, view_t* view, uint64_t const mask,
+                            unsigned viewshift) ATTR_NONNULL;
+ 
+@@ -81,85 +81,85 @@ void mpc_and_verify_uint64(uint64_t* res, uint64_t const* first, uint64_t const*
+ #include "simd.h"
+ #if defined(WITH_SSE2) || defined(WITH_AVX) || defined(WITH_SSE4_1)
+ 
+-void mpc_and_sse(__m128i* res, __m128i const* first, __m128i const* second, __m128i const* r,
++void oqs_sig_picnic_mpc_and_sse(__m128i* res, __m128i const* first, __m128i const* second, __m128i const* r,
+                  view_t* view, unsigned viewshift) ATTR_NONNULL;
+ 
+-void mpc_and_verify_sse(__m128i* res, __m128i const* first, __m128i const* second, __m128i const* r,
++void oqs_sig_picnic_mpc_and_verify_sse(__m128i* res, __m128i const* first, __m128i const* second, __m128i const* r,
+                         view_t* view, __m128i const mask, unsigned viewshift) ATTR_NONNULL;
+ 
+-void mpc_and_256_sse(__m128i res[SC_PROOF][2], __m128i const first[SC_PROOF][2],
++void oqs_sig_picnic_mpc_and_256_sse(__m128i res[SC_PROOF][2], __m128i const first[SC_PROOF][2],
+                      __m128i const second[SC_PROOF][2], __m128i const r[SC_PROOF][2], view_t* view,
+                      unsigned viewshift) ATTR_NONNULL;
+ 
+-void mpc_and_verify_256_sse(__m128i res[SC_VERIFY][2], __m128i const first[SC_VERIFY][2],
++void oqs_sig_picnic_mpc_and_verify_256_sse(__m128i res[SC_VERIFY][2], __m128i const first[SC_VERIFY][2],
+                             __m128i const second[SC_VERIFY][2], __m128i const r[SC_VERIFY][2],
+                             view_t* view, __m128i const* mask, unsigned viewshift) ATTR_NONNULL;
+ 
+-void mpc_and_384_sse(__m128i res[SC_PROOF][3], __m128i const first[SC_PROOF][3],
++void oqs_sig_picnic_mpc_and_384_sse(__m128i res[SC_PROOF][3], __m128i const first[SC_PROOF][3],
+                      __m128i const second[SC_PROOF][3], __m128i const r[SC_PROOF][3], view_t* view,
+                      unsigned viewshift) ATTR_NONNULL;
+ 
+-void mpc_and_verify_384_sse(__m128i res[SC_VERIFY][3], __m128i const first[SC_VERIFY][3],
++void oqs_sig_picnic_mpc_and_verify_384_sse(__m128i res[SC_VERIFY][3], __m128i const first[SC_VERIFY][3],
+                             __m128i const second[SC_VERIFY][3], __m128i const r[SC_VERIFY][3],
+                             view_t* view, __m128i const* mask, unsigned viewshift) ATTR_NONNULL;
+ 
+-void mpc_and_512_sse(__m128i res[SC_PROOF][4], __m128i const first[SC_PROOF][4],
++void oqs_sig_picnic_mpc_and_512_sse(__m128i res[SC_PROOF][4], __m128i const first[SC_PROOF][4],
+                      __m128i const second[SC_PROOF][4], __m128i const r[SC_PROOF][4], view_t* view,
+                      unsigned viewshift) ATTR_NONNULL;
+ 
+-void mpc_and_verify_512_sse(__m128i res[SC_VERIFY][4], __m128i const first[SC_VERIFY][4],
++void oqs_sig_picnic_mpc_and_verify_512_sse(__m128i res[SC_VERIFY][4], __m128i const first[SC_VERIFY][4],
+                             __m128i const second[SC_VERIFY][4], __m128i const r[SC_VERIFY][4],
+                             view_t* view, __m128i const* mask, unsigned viewshift) ATTR_NONNULL;
+ 
+ #endif
+ 
+ #if defined(WITH_AVX2)
+-void mpc_and_avx(__m256i* res, __m256i const* first, __m256i const* second, __m256i const* r,
++void oqs_sig_picnic_mpc_and_avx(__m256i* res, __m256i const* first, __m256i const* second, __m256i const* r,
+                  view_t* view, unsigned viewshift) ATTR_NONNULL;
+ 
+-void mpc_and_verify_avx(__m256i* res, __m256i const* first, __m256i const* second, __m256i const* r,
++void oqs_sig_picnic_mpc_and_verify_avx(__m256i* res, __m256i const* first, __m256i const* second, __m256i const* r,
+                         view_t* view, __m256i const mask, unsigned viewshift) ATTR_NONNULL;
+ 
+-void mpc_and_512_avx(__m256i res[SC_VERIFY][2], __m256i const first[SC_VERIFY][2],
++void oqs_sig_picnic_mpc_and_512_avx(__m256i res[SC_VERIFY][2], __m256i const first[SC_VERIFY][2],
+                      __m256i const second[SC_VERIFY][2], __m256i const r[SC_VERIFY][2],
+                      view_t* view, unsigned viewshift) ATTR_NONNULL;
+ 
+-void mpc_and_verify_512_avx(__m256i res[SC_VERIFY][2], __m256i const first[SC_VERIFY][2],
++void oqs_sig_picnic_mpc_and_verify_512_avx(__m256i res[SC_VERIFY][2], __m256i const first[SC_VERIFY][2],
+                             __m256i const second[SC_VERIFY][2], __m256i const r[SC_VERIFY][2],
+                             view_t* view, __m256i const* mask, unsigned viewshift) ATTR_NONNULL;
+ #endif
+ 
+ #ifdef WITH_NEON
+-void mpc_and_neon(uint32x4_t* res, uint32x4_t const* first, uint32x4_t const* second,
++void oqs_sig_picnic_mpc_and_neon(uint32x4_t* res, uint32x4_t const* first, uint32x4_t const* second,
+                   uint32x4_t const* r, view_t* view, unsigned viewshift);
+ 
+-void mpc_and_verify_neon(uint32x4_t* res, uint32x4_t const* first, uint32x4_t const* second,
++void oqs_sig_picnic_mpc_and_verify_neon(uint32x4_t* res, uint32x4_t const* first, uint32x4_t const* second,
+                          uint32x4_t const* r, view_t* view, uint32x4_t const mask,
+                          unsigned viewshift) ATTR_NONNULL;
+ 
+-void mpc_and_256_neon(uint32x4_t res[SC_PROOF][2], uint32x4_t const first[SC_PROOF][2],
++void oqs_sig_picnic_mpc_and_256_neon(uint32x4_t res[SC_PROOF][2], uint32x4_t const first[SC_PROOF][2],
+                       uint32x4_t const second[SC_PROOF][2], uint32x4_t const r[SC_PROOF][2],
+                       view_t* view, unsigned viewshift);
+ 
+-void mpc_and_verify_256_neon(uint32x4_t res[SC_VERIFY][2], uint32x4_t const first[SC_VERIFY][2],
++void oqs_sig_picnic_mpc_and_verify_256_neon(uint32x4_t res[SC_VERIFY][2], uint32x4_t const first[SC_VERIFY][2],
+                              uint32x4_t const second[SC_VERIFY][2],
+                              uint32x4_t const r[SC_VERIFY][2], view_t* view, uint32x4_t const* mask,
+                              unsigned viewshift) ATTR_NONNULL;
+ 
+-void mpc_and_384_neon(uint32x4_t res[SC_PROOF][3], uint32x4_t const first[SC_PROOF][3],
++void oqs_sig_picnic_mpc_and_384_neon(uint32x4_t res[SC_PROOF][3], uint32x4_t const first[SC_PROOF][3],
+                       uint32x4_t const second[SC_PROOF][3], uint32x4_t const r[SC_PROOF][3],
+                       view_t* view, unsigned viewshift) ATTR_NONNULL;
+ 
+-void mpc_and_verify_384_neon(uint32x4_t res[SC_VERIFY][3], uint32x4_t const first[SC_VERIFY][3],
++void oqs_sig_picnic_mpc_and_verify_384_neon(uint32x4_t res[SC_VERIFY][3], uint32x4_t const first[SC_VERIFY][3],
+                              uint32x4_t const second[SC_VERIFY][3],
+                              uint32x4_t const r[SC_VERIFY][3], view_t* view, uint32x4_t const* mask,
+                              unsigned viewshift) ATTR_NONNULL;
+ 
+-void mpc_and_512_neon(uint32x4_t res[SC_PROOF][4], uint32x4_t const first[SC_PROOF][4],
++void oqs_sig_picnic_mpc_and_512_neon(uint32x4_t res[SC_PROOF][4], uint32x4_t const first[SC_PROOF][4],
+                       uint32x4_t const second[SC_PROOF][4], uint32x4_t const r[SC_PROOF][4],
+                       view_t* view, unsigned viewshift) ATTR_NONNULL;
+ 
+-void mpc_and_verify_512_neon(uint32x4_t res[SC_VERIFY][4], uint32x4_t const first[SC_VERIFY][4],
++void oqs_sig_picnic_mpc_and_verify_512_neon(uint32x4_t res[SC_VERIFY][4], uint32x4_t const first[SC_VERIFY][4],
+                              uint32x4_t const second[SC_VERIFY][4],
+                              uint32x4_t const r[SC_VERIFY][4], view_t* view, uint32x4_t const* mask,
+                              unsigned viewshift) ATTR_NONNULL;
+@@ -167,6 +167,6 @@ void mpc_and_verify_512_neon(uint32x4_t res[SC_VERIFY][4], uint32x4_t const firs
+ #endif
+ #endif
+ 
+-void mpc_copy(mzd_local_t** out, mzd_local_t* const* in, unsigned sc) ATTR_NONNULL_ARG(2);
++void oqs_sig_picnic_mpc_copy(mzd_local_t** out, mzd_local_t* const* in, unsigned sc) ATTR_NONNULL_ARG(2);
+ 
+ #endif
+diff --git a/mpc_lowmc.c b/mpc_lowmc.c
+index 7e3d8fe..41ffbdc 100644
+--- a/mpc_lowmc.c
++++ b/mpc_lowmc.c
+@@ -102,13 +102,13 @@ static void _mpc_sbox_layer_bitsliced_verify(mzd_local_t** out, mzd_local_t* con
+                                              mask_t const* mask, sbox_vars_t const* vars) {
+   bitsliced_step_1(SC_VERIFY);
+ 
+-  mzd_local_clear(view->s[0]);
++  oqs_sig_picnic_mzd_local_clear(view->s[0]);
+   // a & b
+-  mpc_and_verify(vars->r0m, vars->x0s, vars->x1s, vars->r2m, view, mask->x2, 0, vars->v);
++  oqs_sig_picnic_mpc_and_verify(vars->r0m, vars->x0s, vars->x1s, vars->r2m, view, mask->x2, 0, vars->v);
+   // b & c
+-  mpc_and_verify(vars->r2m, vars->x1s, vars->x2m, vars->r1s, view, mask->x2, 1, vars->v);
++  oqs_sig_picnic_mpc_and_verify(vars->r2m, vars->x1s, vars->x2m, vars->r1s, view, mask->x2, 1, vars->v);
+   // c & a
+-  mpc_and_verify(vars->r1m, vars->x0s, vars->x2m, vars->r0s, view, mask->x2, 2, vars->v);
++  oqs_sig_picnic_mpc_and_verify(vars->r1m, vars->x0s, vars->x2m, vars->r0s, view, mask->x2, 2, vars->v);
+ 
+   bitsliced_step_2(SC_VERIFY);
+ }
+@@ -167,9 +167,9 @@ static void _mpc_sbox_layer_bitsliced_uint64(uint64_t* out, uint64_t const* in,
+   bitsliced_step_1_uint64(SC_PROOF);
+ 
+   memset(view->t, 0, sizeof(uint64_t) * SC_PROOF);
+-  mpc_and_uint64(r0m, x0s, x1s, r2m, view, 0);
+-  mpc_and_uint64(r2m, x1s, x2m, r1s, view, 1);
+-  mpc_and_uint64(r1m, x0s, x2m, r0s, view, 2);
++  oqs_sig_picnic_mpc_and_uint64(r0m, x0s, x1s, r2m, view, 0);
++  oqs_sig_picnic_mpc_and_uint64(r2m, x1s, x2m, r1s, view, 1);
++  oqs_sig_picnic_mpc_and_uint64(r1m, x0s, x2m, r0s, view, 2);
+ 
+   bitsliced_step_2_uint64(SC_PROOF);
+ }
+@@ -179,9 +179,9 @@ static void _mpc_sbox_layer_bitsliced_verify_uint64(uint64_t* out, uint64_t cons
+   bitsliced_step_1_uint64(SC_VERIFY);
+ 
+   view->t[0] = 0;
+-  mpc_and_verify_uint64(r0m, x0s, x1s, r2m, view, mx2, 0);
+-  mpc_and_verify_uint64(r2m, x1s, x2m, r1s, view, mx2, 1);
+-  mpc_and_verify_uint64(r1m, x0s, x2m, r0s, view, mx2, 2);
++  oqs_sig_picnic_mpc_and_verify_uint64(r0m, x0s, x1s, r2m, view, mx2, 0);
++  oqs_sig_picnic_mpc_and_verify_uint64(r2m, x1s, x2m, r1s, view, mx2, 1);
++  oqs_sig_picnic_mpc_and_verify_uint64(r1m, x0s, x2m, r0s, view, mx2, 2);
+ 
+   bitsliced_step_2_uint64(SC_VERIFY);
+ }
+@@ -335,10 +335,10 @@ static void _mpc_sbox_layer_bitsliced_128_sse(mzd_local_t** out, mzd_local_t* co
+                                               mask_t const* mask) {
+   bitsliced_mm_step_1(SC_PROOF, __m128i, _mm_and_si128, mm128_shift_left);
+ 
+-  mpc_clear(view->s, SC_PROOF);
+-  mpc_and_sse(r0m, x0s, x1s, r2m, view, 0);
+-  mpc_and_sse(r2m, x1s, x2m, r1s, view, 1);
+-  mpc_and_sse(r1m, x0s, x2m, r0s, view, 2);
++  oqs_sig_picnic_mpc_clear(view->s, SC_PROOF);
++  oqs_sig_picnic_mpc_and_sse(r0m, x0s, x1s, r2m, view, 0);
++  oqs_sig_picnic_mpc_and_sse(r2m, x1s, x2m, r1s, view, 1);
++  oqs_sig_picnic_mpc_and_sse(r1m, x0s, x2m, r0s, view, 2);
+ 
+   bitsliced_mm_step_2(SC_PROOF, __m128i, _mm_and_si128, _mm_xor_si128, mm128_shift_right);
+ }
+@@ -349,10 +349,10 @@ static void _mpc_sbox_layer_bitsliced_verify_128_sse(mzd_local_t** out, mzd_loca
+                                                      mask_t const* mask) {
+   bitsliced_mm_step_1(SC_VERIFY, __m128i, _mm_and_si128, mm128_shift_left);
+ 
+-  mzd_local_clear(view->s[0]);
+-  mpc_and_verify_sse(r0m, x0s, x1s, r2m, view, mx2, 0);
+-  mpc_and_verify_sse(r2m, x1s, x2m, r1s, view, mx2, 1);
+-  mpc_and_verify_sse(r1m, x0s, x2m, r0s, view, mx2, 2);
++  oqs_sig_picnic_mzd_local_clear(view->s[0]);
++  oqs_sig_picnic_mpc_and_verify_sse(r0m, x0s, x1s, r2m, view, mx2, 0);
++  oqs_sig_picnic_mpc_and_verify_sse(r2m, x1s, x2m, r1s, view, mx2, 1);
++  oqs_sig_picnic_mpc_and_verify_sse(r1m, x0s, x2m, r0s, view, mx2, 2);
+ 
+   bitsliced_mm_step_2(SC_VERIFY, __m128i, _mm_and_si128, _mm_xor_si128, mm128_shift_right);
+ }
+@@ -363,10 +363,10 @@ static void _mpc_sbox_layer_bitsliced_256_sse(mzd_local_t** out, mzd_local_t* co
+                                               mask_t const* mask) {
+   bitsliced_mm_step_1_multiple_of_128(SC_PROOF, __m128i, mm256_and_sse, mm256_shift_left_sse, 2);
+ 
+-  mpc_clear(view->s, SC_PROOF);
+-  mpc_and_256_sse(r0m, x0s, x1s, r2m, view, 0);
+-  mpc_and_256_sse(r2m, x1s, x2m, r1s, view, 1);
+-  mpc_and_256_sse(r1m, x0s, x2m, r0s, view, 2);
++  oqs_sig_picnic_mpc_clear(view->s, SC_PROOF);
++  oqs_sig_picnic_mpc_and_256_sse(r0m, x0s, x1s, r2m, view, 0);
++  oqs_sig_picnic_mpc_and_256_sse(r2m, x1s, x2m, r1s, view, 1);
++  oqs_sig_picnic_mpc_and_256_sse(r1m, x0s, x2m, r0s, view, 2);
+ 
+   bitsliced_mm_step_2_multiple_of_128(SC_PROOF, __m128i, mm256_and_sse, mm256_xor_sse,
+                                       mm256_shift_right_sse, 2);
+@@ -378,10 +378,10 @@ static void _mpc_sbox_layer_bitsliced_verify_256_sse(mzd_local_t** out, mzd_loca
+                                                      mask_t const* mask) {
+   bitsliced_mm_step_1_multiple_of_128(SC_VERIFY, __m128i, mm256_and_sse, mm256_shift_left_sse, 2);
+ 
+-  mzd_local_clear(view->s[0]);
+-  mpc_and_verify_256_sse(r0m, x0s, x1s, r2m, view, mx2, 0);
+-  mpc_and_verify_256_sse(r2m, x1s, x2m, r1s, view, mx2, 1);
+-  mpc_and_verify_256_sse(r1m, x0s, x2m, r0s, view, mx2, 2);
++  oqs_sig_picnic_mzd_local_clear(view->s[0]);
++  oqs_sig_picnic_mpc_and_verify_256_sse(r0m, x0s, x1s, r2m, view, mx2, 0);
++  oqs_sig_picnic_mpc_and_verify_256_sse(r2m, x1s, x2m, r1s, view, mx2, 1);
++  oqs_sig_picnic_mpc_and_verify_256_sse(r1m, x0s, x2m, r0s, view, mx2, 2);
+ 
+   bitsliced_mm_step_2_multiple_of_128(SC_VERIFY, __m128i, mm256_and_sse, mm256_xor_sse,
+                                       mm256_shift_right_sse, 2);
+@@ -393,10 +393,10 @@ static void _mpc_sbox_layer_bitsliced_384_sse(mzd_local_t** out, mzd_local_t* co
+                                               mask_t const* mask) {
+   bitsliced_mm_step_1_multiple_of_128(SC_PROOF, __m128i, mm384_and_sse, mm384_shift_left_sse, 3);
+ 
+-  mpc_clear(view->s, SC_PROOF);
+-  mpc_and_384_sse(r0m, x0s, x1s, r2m, view, 0);
+-  mpc_and_384_sse(r2m, x1s, x2m, r1s, view, 1);
+-  mpc_and_384_sse(r1m, x0s, x2m, r0s, view, 2);
++  oqs_sig_picnic_mpc_clear(view->s, SC_PROOF);
++  oqs_sig_picnic_mpc_and_384_sse(r0m, x0s, x1s, r2m, view, 0);
++  oqs_sig_picnic_mpc_and_384_sse(r2m, x1s, x2m, r1s, view, 1);
++  oqs_sig_picnic_mpc_and_384_sse(r1m, x0s, x2m, r0s, view, 2);
+ 
+   bitsliced_mm_step_2_multiple_of_128(SC_PROOF, __m128i, mm384_and_sse, mm384_xor_sse,
+                                       mm384_shift_right_sse, 3);
+@@ -408,10 +408,10 @@ static void _mpc_sbox_layer_bitsliced_verify_384_sse(mzd_local_t** out, mzd_loca
+                                                      mask_t const* mask) {
+   bitsliced_mm_step_1_multiple_of_128(SC_VERIFY, __m128i, mm384_and_sse, mm384_shift_left_sse, 3);
+ 
+-  mzd_local_clear(view->s[0]);
+-  mpc_and_verify_384_sse(r0m, x0s, x1s, r2m, view, mx2, 0);
+-  mpc_and_verify_384_sse(r2m, x1s, x2m, r1s, view, mx2, 1);
+-  mpc_and_verify_384_sse(r1m, x0s, x2m, r0s, view, mx2, 2);
++  oqs_sig_picnic_mzd_local_clear(view->s[0]);
++  oqs_sig_picnic_mpc_and_verify_384_sse(r0m, x0s, x1s, r2m, view, mx2, 0);
++  oqs_sig_picnic_mpc_and_verify_384_sse(r2m, x1s, x2m, r1s, view, mx2, 1);
++  oqs_sig_picnic_mpc_and_verify_384_sse(r1m, x0s, x2m, r0s, view, mx2, 2);
+ 
+   bitsliced_mm_step_2_multiple_of_128(SC_VERIFY, __m128i, mm384_and_sse, mm384_xor_sse,
+                                       mm384_shift_right_sse, 3);
+@@ -423,10 +423,10 @@ static void _mpc_sbox_layer_bitsliced_512_sse(mzd_local_t** out, mzd_local_t* co
+                                               mask_t const* mask) {
+   bitsliced_mm_step_1_multiple_of_128(SC_PROOF, __m128i, mm512_and_sse, mm512_shift_left_sse, 4);
+ 
+-  mpc_clear(view->s, SC_PROOF);
+-  mpc_and_512_sse(r0m, x0s, x1s, r2m, view, 0);
+-  mpc_and_512_sse(r2m, x1s, x2m, r1s, view, 1);
+-  mpc_and_512_sse(r1m, x0s, x2m, r0s, view, 2);
++  oqs_sig_picnic_mpc_clear(view->s, SC_PROOF);
++  oqs_sig_picnic_mpc_and_512_sse(r0m, x0s, x1s, r2m, view, 0);
++  oqs_sig_picnic_mpc_and_512_sse(r2m, x1s, x2m, r1s, view, 1);
++  oqs_sig_picnic_mpc_and_512_sse(r1m, x0s, x2m, r0s, view, 2);
+ 
+   bitsliced_mm_step_2_multiple_of_128(SC_PROOF, __m128i, mm512_and_sse, mm512_xor_sse,
+                                       mm512_shift_right_sse, 4);
+@@ -438,10 +438,10 @@ static void _mpc_sbox_layer_bitsliced_verify_512_sse(mzd_local_t** out, mzd_loca
+                                                      mask_t const* mask) {
+   bitsliced_mm_step_1_multiple_of_128(SC_VERIFY, __m128i, mm512_and_sse, mm512_shift_left_sse, 4);
+ 
+-  mzd_local_clear(view->s[0]);
+-  mpc_and_verify_512_sse(r0m, x0s, x1s, r2m, view, mx2, 0);
+-  mpc_and_verify_512_sse(r2m, x1s, x2m, r1s, view, mx2, 1);
+-  mpc_and_verify_512_sse(r1m, x0s, x2m, r0s, view, mx2, 2);
++  oqs_sig_picnic_mzd_local_clear(view->s[0]);
++  oqs_sig_picnic_mpc_and_verify_512_sse(r0m, x0s, x1s, r2m, view, mx2, 0);
++  oqs_sig_picnic_mpc_and_verify_512_sse(r2m, x1s, x2m, r1s, view, mx2, 1);
++  oqs_sig_picnic_mpc_and_verify_512_sse(r1m, x0s, x2m, r0s, view, mx2, 2);
+ 
+   bitsliced_mm_step_2_multiple_of_128(SC_VERIFY, __m128i, mm512_and_sse, mm512_xor_sse,
+                                       mm512_shift_right_sse, 4);
+@@ -457,10 +457,10 @@ static void _mpc_sbox_layer_bitsliced_256_avx(mzd_local_t** out, mzd_local_t* co
+                                               mask_t const* mask) {
+   bitsliced_mm_step_1(SC_PROOF, __m256i, _mm256_and_si256, mm256_shift_left);
+ 
+-  mpc_clear(view->s, SC_PROOF);
+-  mpc_and_avx(r0m, x0s, x1s, r2m, view, 0);
+-  mpc_and_avx(r2m, x1s, x2m, r1s, view, 1);
+-  mpc_and_avx(r1m, x0s, x2m, r0s, view, 2);
++  oqs_sig_picnic_mpc_clear(view->s, SC_PROOF);
++  oqs_sig_picnic_mpc_and_avx(r0m, x0s, x1s, r2m, view, 0);
++  oqs_sig_picnic_mpc_and_avx(r2m, x1s, x2m, r1s, view, 1);
++  oqs_sig_picnic_mpc_and_avx(r1m, x0s, x2m, r0s, view, 2);
+ 
+   bitsliced_mm_step_2(SC_PROOF, __m256i, _mm256_and_si256, _mm256_xor_si256, mm256_shift_right);
+ }
+@@ -471,10 +471,10 @@ static void _mpc_sbox_layer_bitsliced_verify_256_avx(mzd_local_t** out, mzd_loca
+                                                      mask_t const* mask) {
+   bitsliced_mm_step_1(SC_VERIFY, __m256i, _mm256_and_si256, mm256_shift_left);
+ 
+-  mzd_local_clear(view->s[0]);
+-  mpc_and_verify_avx(r0m, x0s, x1s, r2m, view, mx2, 0);
+-  mpc_and_verify_avx(r2m, x1s, x2m, r1s, view, mx2, 1);
+-  mpc_and_verify_avx(r1m, x0s, x2m, r0s, view, mx2, 2);
++  oqs_sig_picnic_mzd_local_clear(view->s[0]);
++  oqs_sig_picnic_mpc_and_verify_avx(r0m, x0s, x1s, r2m, view, mx2, 0);
++  oqs_sig_picnic_mpc_and_verify_avx(r2m, x1s, x2m, r1s, view, mx2, 1);
++  oqs_sig_picnic_mpc_and_verify_avx(r1m, x0s, x2m, r0s, view, mx2, 2);
+ 
+   bitsliced_mm_step_2(SC_VERIFY, __m256i, _mm256_and_si256, _mm256_xor_si256, mm256_shift_right);
+ }
+@@ -485,10 +485,10 @@ static void _mpc_sbox_layer_bitsliced_512_avx(mzd_local_t** out, mzd_local_t* co
+                                               mask_t const* mask) {
+   bitsliced_mm_step_1_multiple_of_128(SC_PROOF, __m256i, mm512_and_avx, mm512_shift_left_avx, 2);
+ 
+-  mpc_clear(view->s, SC_PROOF);
+-  mpc_and_512_avx(r0m, x0s, x1s, r2m, view, 0);
+-  mpc_and_512_avx(r2m, x1s, x2m, r1s, view, 1);
+-  mpc_and_512_avx(r1m, x0s, x2m, r0s, view, 2);
++  oqs_sig_picnic_mpc_clear(view->s, SC_PROOF);
++  oqs_sig_picnic_mpc_and_512_avx(r0m, x0s, x1s, r2m, view, 0);
++  oqs_sig_picnic_mpc_and_512_avx(r2m, x1s, x2m, r1s, view, 1);
++  oqs_sig_picnic_mpc_and_512_avx(r1m, x0s, x2m, r0s, view, 2);
+ 
+   bitsliced_mm_step_2_multiple_of_128(SC_PROOF, __m256i, mm512_and_avx, mm512_xor_avx,
+                                       mm512_shift_right_avx, 2);
+@@ -500,10 +500,10 @@ static void _mpc_sbox_layer_bitsliced_verify_512_avx(mzd_local_t** out, mzd_loca
+                                                      mask_t const* mask) {
+   bitsliced_mm_step_1_multiple_of_128(SC_VERIFY, __m256i, mm512_and_avx, mm512_shift_left_avx, 2);
+ 
+-  mzd_local_clear(view->s[0]);
+-  mpc_and_verify_512_avx(r0m, x0s, x1s, r2m, view, mx2, 0);
+-  mpc_and_verify_512_avx(r2m, x1s, x2m, r1s, view, mx2, 1);
+-  mpc_and_verify_512_avx(r1m, x0s, x2m, r0s, view, mx2, 2);
++  oqs_sig_picnic_mzd_local_clear(view->s[0]);
++  oqs_sig_picnic_mpc_and_verify_512_avx(r0m, x0s, x1s, r2m, view, mx2, 0);
++  oqs_sig_picnic_mpc_and_verify_512_avx(r2m, x1s, x2m, r1s, view, mx2, 1);
++  oqs_sig_picnic_mpc_and_verify_512_avx(r1m, x0s, x2m, r0s, view, mx2, 2);
+ 
+   bitsliced_mm_step_2_multiple_of_128(SC_VERIFY, __m256i, mm512_and_avx, mm512_xor_avx,
+                                       mm512_shift_right_avx, 2);
+@@ -518,10 +518,10 @@ static void _mpc_sbox_layer_bitsliced_128_neon(mzd_local_t** out, mzd_local_t* c
+                                                mask_t const* mask) {
+   bitsliced_mm_step_1(SC_PROOF, uint32x4_t, vandq_u32, mm128_shift_left);
+ 
+-  mpc_clear(view->s, SC_PROOF);
+-  mpc_and_neon(r0m, x0s, x1s, r2m, view, 0);
+-  mpc_and_neon(r2m, x1s, x2m, r1s, view, 1);
+-  mpc_and_neon(r1m, x0s, x2m, r0s, view, 2);
++  oqs_sig_picnic_mpc_clear(view->s, SC_PROOF);
++  oqs_sig_picnic_mpc_and_neon(r0m, x0s, x1s, r2m, view, 0);
++  oqs_sig_picnic_mpc_and_neon(r2m, x1s, x2m, r1s, view, 1);
++  oqs_sig_picnic_mpc_and_neon(r1m, x0s, x2m, r0s, view, 2);
+ 
+   bitsliced_mm_step_2(SC_PROOF, uint32x4_t, vandq_u32, veorq_u32, mm128_shift_right);
+ }
+@@ -531,10 +531,10 @@ static void _mpc_sbox_layer_bitsliced_verify_128_neon(mzd_local_t** out, mzd_loc
+                                                       mask_t const* mask) {
+   bitsliced_mm_step_1(SC_VERIFY, uint32x4_t, vandq_u32, mm128_shift_left);
+ 
+-  mzd_local_clear(view->s[0]);
+-  mpc_and_verify_neon(r0m, x0s, x1s, r2m, view, mx2, 0);
+-  mpc_and_verify_neon(r2m, x1s, x2m, r1s, view, mx2, 1);
+-  mpc_and_verify_neon(r1m, x0s, x2m, r0s, view, mx2, 2);
++  oqs_sig_picnic_mzd_local_clear(view->s[0]);
++  oqs_sig_picnic_mpc_and_verify_neon(r0m, x0s, x1s, r2m, view, mx2, 0);
++  oqs_sig_picnic_mpc_and_verify_neon(r2m, x1s, x2m, r1s, view, mx2, 1);
++  oqs_sig_picnic_mpc_and_verify_neon(r1m, x0s, x2m, r0s, view, mx2, 2);
+ 
+   bitsliced_mm_step_2(SC_VERIFY, uint32x4_t, vandq_u32, veorq_u32, mm128_shift_right);
+ }
+@@ -544,10 +544,10 @@ static void _mpc_sbox_layer_bitsliced_256_neon(mzd_local_t** out, mzd_local_t* c
+                                                mask_t const* mask) {
+   bitsliced_mm_step_1_multiple_of_128(SC_PROOF, uint32x4_t, mm256_and, mm256_shift_left, 2);
+ 
+-  mpc_clear(view->s, SC_PROOF);
+-  mpc_and_256_neon(r0m, x0s, x1s, r2m, view, 0);
+-  mpc_and_256_neon(r2m, x1s, x2m, r1s, view, 1);
+-  mpc_and_256_neon(r1m, x0s, x2m, r0s, view, 2);
++  oqs_sig_picnic_mpc_clear(view->s, SC_PROOF);
++  oqs_sig_picnic_mpc_and_256_neon(r0m, x0s, x1s, r2m, view, 0);
++  oqs_sig_picnic_mpc_and_256_neon(r2m, x1s, x2m, r1s, view, 1);
++  oqs_sig_picnic_mpc_and_256_neon(r1m, x0s, x2m, r0s, view, 2);
+ 
+   bitsliced_mm_step_2_multiple_of_128(SC_PROOF, uint32x4_t, mm256_and, mm256_xor, mm256_shift_right,
+                                       2);
+@@ -558,10 +558,10 @@ static void _mpc_sbox_layer_bitsliced_verify_256_neon(mzd_local_t** out, mzd_loc
+                                                       mask_t const* mask) {
+   bitsliced_mm_step_1_multiple_of_128(SC_VERIFY, uint32x4_t, mm256_and, mm256_shift_left, 2);
+ 
+-  mzd_local_clear(view->s[0]);
+-  mpc_and_verify_256_neon(r0m, x0s, x1s, r2m, view, mx2, 0);
+-  mpc_and_verify_256_neon(r2m, x1s, x2m, r1s, view, mx2, 1);
+-  mpc_and_verify_256_neon(r1m, x0s, x2m, r0s, view, mx2, 2);
++  oqs_sig_picnic_mzd_local_clear(view->s[0]);
++  oqs_sig_picnic_mpc_and_verify_256_neon(r0m, x0s, x1s, r2m, view, mx2, 0);
++  oqs_sig_picnic_mpc_and_verify_256_neon(r2m, x1s, x2m, r1s, view, mx2, 1);
++  oqs_sig_picnic_mpc_and_verify_256_neon(r1m, x0s, x2m, r0s, view, mx2, 2);
+ 
+   bitsliced_mm_step_2_multiple_of_128(SC_VERIFY, uint32x4_t, mm256_and, mm256_xor,
+                                       mm256_shift_right, 2);
+@@ -572,10 +572,10 @@ static void _mpc_sbox_layer_bitsliced_384_neon(mzd_local_t** out, mzd_local_t* c
+                                                mask_t const* mask) {
+   bitsliced_mm_step_1_multiple_of_128(SC_PROOF, uint32x4_t, mm384_and, mm384_shift_left, 3);
+ 
+-  mpc_clear(view->s, SC_PROOF);
+-  mpc_and_384_neon(r0m, x0s, x1s, r2m, view, 0);
+-  mpc_and_384_neon(r2m, x1s, x2m, r1s, view, 1);
+-  mpc_and_384_neon(r1m, x0s, x2m, r0s, view, 2);
++  oqs_sig_picnic_mpc_clear(view->s, SC_PROOF);
++  oqs_sig_picnic_mpc_and_384_neon(r0m, x0s, x1s, r2m, view, 0);
++  oqs_sig_picnic_mpc_and_384_neon(r2m, x1s, x2m, r1s, view, 1);
++  oqs_sig_picnic_mpc_and_384_neon(r1m, x0s, x2m, r0s, view, 2);
+ 
+   bitsliced_mm_step_2_multiple_of_128(SC_PROOF, uint32x4_t, mm384_and, mm384_xor, mm384_shift_right,
+                                       3);
+@@ -586,10 +586,10 @@ static void _mpc_sbox_layer_bitsliced_verify_384_neon(mzd_local_t** out, mzd_loc
+                                                       mask_t const* mask) {
+   bitsliced_mm_step_1_multiple_of_128(SC_VERIFY, uint32x4_t, mm384_and, mm384_shift_left, 3);
+ 
+-  mzd_local_clear(view->s[0]);
+-  mpc_and_verify_384_neon(r0m, x0s, x1s, r2m, view, mx2, 0);
+-  mpc_and_verify_384_neon(r2m, x1s, x2m, r1s, view, mx2, 1);
+-  mpc_and_verify_384_neon(r1m, x0s, x2m, r0s, view, mx2, 2);
++  oqs_sig_picnic_mzd_local_clear(view->s[0]);
++  oqs_sig_picnic_mpc_and_verify_384_neon(r0m, x0s, x1s, r2m, view, mx2, 0);
++  oqs_sig_picnic_mpc_and_verify_384_neon(r2m, x1s, x2m, r1s, view, mx2, 1);
++  oqs_sig_picnic_mpc_and_verify_384_neon(r1m, x0s, x2m, r0s, view, mx2, 2);
+ 
+   bitsliced_mm_step_2_multiple_of_128(SC_VERIFY, uint32x4_t, mm384_and, mm384_xor,
+                                       mm384_shift_right, 3);
+@@ -601,10 +601,10 @@ static void _mpc_sbox_layer_bitsliced_512_neon(mzd_local_t** out, mzd_local_t* c
+                                                mask_t const* mask) {
+   bitsliced_mm_step_1_multiple_of_128(SC_PROOF, uint32x4_t, mm512_and, mm512_shift_left, 4);
+ 
+-  mpc_clear(view->s, SC_PROOF);
+-  mpc_and_512_neon(r0m, x0s, x1s, r2m, view, 0);
+-  mpc_and_512_neon(r2m, x1s, x2m, r1s, view, 1);
+-  mpc_and_512_neon(r1m, x0s, x2m, r0s, view, 2);
++  oqs_sig_picnic_mpc_clear(view->s, SC_PROOF);
++  oqs_sig_picnic_mpc_and_512_neon(r0m, x0s, x1s, r2m, view, 0);
++  oqs_sig_picnic_mpc_and_512_neon(r2m, x1s, x2m, r1s, view, 1);
++  oqs_sig_picnic_mpc_and_512_neon(r1m, x0s, x2m, r0s, view, 2);
+ 
+   bitsliced_mm_step_2_multiple_of_128(SC_PROOF, uint32x4_t, mm512_and, mm512_xor, mm512_shift_right,
+                                       4);
+@@ -615,10 +615,10 @@ static void _mpc_sbox_layer_bitsliced_verify_512_neon(mzd_local_t** out, mzd_loc
+                                                       mask_t const* mask) {
+   bitsliced_mm_step_1_multiple_of_128(SC_VERIFY, uint32x4_t, mm512_and, mm512_shift_left, 4);
+ 
+-  mzd_local_clear(view->s[0]);
+-  mpc_and_verify_512_neon(r0m, x0s, x1s, r2m, view, mx2, 0);
+-  mpc_and_verify_512_neon(r2m, x1s, x2m, r1s, view, mx2, 1);
+-  mpc_and_verify_512_neon(r1m, x0s, x2m, r0s, view, mx2, 2);
++  oqs_sig_picnic_mzd_local_clear(view->s[0]);
++  oqs_sig_picnic_mpc_and_verify_512_neon(r0m, x0s, x1s, r2m, view, mx2, 0);
++  oqs_sig_picnic_mpc_and_verify_512_neon(r2m, x1s, x2m, r1s, view, mx2, 1);
++  oqs_sig_picnic_mpc_and_verify_512_neon(r1m, x0s, x2m, r0s, view, mx2, 2);
+ 
+   bitsliced_mm_step_2_multiple_of_128(SC_VERIFY, uint32x4_t, mm512_and, mm512_xor,
+                                       mm512_shift_right, 4);
+@@ -700,7 +700,7 @@ static void _mpc_sbox_layer_bitsliced_verify_512_neon(mzd_local_t** out, mzd_loc
+ #define loop_optimize(sbox_args, sbox, sbox_selector, no_scr, no_scr_active, const_mat_mul_func,   \
+                       add_func, mul_more_cols, const_addmat_mul_func, ch, shares)                  \
+   mzd_local_t* nl_part[shares];                                                                    \
+-  mzd_local_init_multiple_ex(nl_part, shares, 1, lowmc->r * 32, false);                            \
++  oqs_sig_picnic_mzd_local_init_multiple_ex(nl_part, shares, 1, lowmc->r * 32, false);             \
+   MPC_LOOP(mul_more_cols, nl_part, lowmc_key, lowmc->precomputed_non_linear_part_##no_scr,         \
+            shares);                                                                                \
+   word mask = 0x00000000FFFFFFFF;                                                                  \
+@@ -715,7 +715,7 @@ static void _mpc_sbox_layer_bitsliced_verify_512_neon(mzd_local_t** out, mzd_loc
+     MPC_LOOP(const_mat_mul_func, x, y, round->l_##no_scr, shares);                                 \
+     MPC_IF_ELSE(add_func, x, x, round->constant, shares, ch);                                      \
+   }                                                                                                \
+-  mzd_local_free_multiple(nl_part);
++  oqs_sig_picnic_mzd_local_free_multiple(nl_part);
+ 
+ #define loop(sbox_args, sbox, sbox_selector, no_scr, no_scr_active, const_mat_mul_func, add_func,  \
+              mul_more_cols, const_addmat_mul_func, ch, shares)                                     \
+@@ -739,12 +739,12 @@ static void _mpc_sbox_layer_bitsliced_verify_512_neon(mzd_local_t** out, mzd_loc
+ #define _mpc_lowmc_call_bitsliced(ch, sbox_args, sbox, sbox_selector, no_scr, no_scr_active,       \
+                                   optimize, const_mat_mul_func, add_func, mul_more_cols,           \
+                                   const_addmat_mul_func)                                           \
+-  mpc_copy(in_out_shares->s, lowmc_key, SC_PROOF);                                                 \
++  oqs_sig_picnic_mpc_copy(in_out_shares->s, lowmc_key, SC_PROOF);                                                 \
+   ++in_out_shares;                                                                                 \
+   VARS_##sbox_args(SC_PROOF);                                                                      \
+   mzd_local_t** x = in_out_shares->s;                                                              \
+   mzd_local_t* y[SC_PROOF];                                                                        \
+-  mzd_local_init_multiple_ex(y, SC_PROOF, 1, lowmc->n, false);                                     \
++  oqs_sig_picnic_mzd_local_init_multiple_ex(y, SC_PROOF, 1, lowmc->n, false);                      \
+                                                                                                    \
+   MPC_LOOP(const_mat_mul_func, x, lowmc_key, lowmc->k0_##no_scr, SC_PROOF);                        \
+   MPC_IF_ELSE(add_func, x, x, p, SC_PROOF, ch);                                                    \
+@@ -754,7 +754,7 @@ static void _mpc_sbox_layer_bitsliced_verify_512_neon(mzd_local_t** out, mzd_loc
+   loop##optimize(sbox_args, sbox, sbox_selector, no_scr, no_scr_active, const_mat_mul_func,        \
+                  add_func, mul_more_cols, const_addmat_mul_func, ch, SC_PROOF)                     \
+       VARS_FREE_##sbox_args;                                                                       \
+-  mzd_local_free_multiple(y);
++  oqs_sig_picnic_mzd_local_free_multiple(y);
+ 
+ #define init_key mzd_local_t* const* lowmc_key = &in_out_shares->s[0];
+ 
+@@ -767,7 +767,7 @@ static void _mpc_sbox_layer_bitsliced_verify_512_neon(mzd_local_t** out, mzd_loc
+   VARS_##sbox_args(SC_VERIFY);                                                                     \
+   mzd_local_t* x[2 * SC_VERIFY];                                                                   \
+   mzd_local_t** y = &x[SC_VERIFY];                                                                 \
+-  mzd_local_init_multiple_ex(x, 2 * SC_VERIFY, 1, lowmc->n, false);                                \
++  oqs_sig_picnic_mzd_local_init_multiple_ex(x, 2 * SC_VERIFY, 1, lowmc->n, false);                 \
+                                                                                                    \
+   MPC_LOOP(const_mat_mul_func, x, lowmc_key, lowmc->k0_##no_scr, SC_VERIFY);                       \
+   MPC_IF_ELSE(add_func, x, x, p, SC_VERIFY, ch);                                                   \
+@@ -776,8 +776,8 @@ static void _mpc_sbox_layer_bitsliced_verify_512_neon(mzd_local_t** out, mzd_loc
+                                                                                                    \
+   loop##optimize(sbox_args, sbox, sbox_selector, no_scr, no_scr_active, const_mat_mul_func,        \
+                  add_func, mul_more_cols, const_addmat_mul_func, ch, SC_VERIFY);                   \
+-  mpc_copy(in_out_shares->s, x, SC_VERIFY);                                                        \
+-  mzd_local_free_multiple(x);                                                                      \
++  oqs_sig_picnic_mpc_copy(in_out_shares->s, x, SC_VERIFY);                                                        \
++  oqs_sig_picnic_mzd_local_free_multiple(x);                                                       \
+   VARS_FREE_##sbox_args;
+ 
+ static void mpc_lowmc_call(lowmc_t const* lowmc, mpc_lowmc_key_t* lowmc_key, mzd_local_t const* p,
+@@ -787,25 +787,25 @@ static void mpc_lowmc_call(lowmc_t const* lowmc, mpc_lowmc_key_t* lowmc_key, mzd
+ #ifdef WITH_CUSTOM_INSTANCES
+   if (lowmc->m != 10) {
+     _mpc_lowmc_call_bitsliced(0, 6, _mpc_sbox_layer_bitsliced, mzd, lookup, noscr, _optimize,
+-                              mzd_mul_vl_general, mzd_xor_general, mzd_mul_vl_general,
++                              oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general,
+                               mzd_addmul_vl_general);
+   } else
+ #endif
+   {
+-    _mpc_lowmc_call_bitsliced(0, 6, , uint64, lookup, noscr, _optimize, mzd_mul_vl_general,
+-                              mzd_xor_general, mzd_mul_vl_general, mzd_addmul_vl_general);
++    _mpc_lowmc_call_bitsliced(0, 6, , uint64, lookup, noscr, _optimize, oqs_sig_picnic_mzd_mul_vl_general,
++                              oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general, mzd_addmul_vl_general);
+   }
+ #else
+ #ifdef WITH_CUSTOM_INSTANCES
+   if (lowmc->m != 10) {
+     _mpc_lowmc_call_bitsliced(0, 6, _mpc_sbox_layer_bitsliced, mzd, matrix, scr, _optimize,
+-                              mzd_mul_v_general, mzd_xor_general, mzd_mul_v_general,
+-                              mzd_addmul_v_general);
++                              oqs_sig_picnic_mzd_mul_v_general, oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_v_general,
++                              oqs_sig_picnic_mzd_addmul_v_general);
+   } else
+ #endif
+   {
+-    _mpc_lowmc_call_bitsliced(0, 6, , uint64, matrix, scr, _optimize, mzd_mul_v_general,
+-                              mzd_xor_general, mzd_mul_v_general, mzd_addmul_v_general);
++    _mpc_lowmc_call_bitsliced(0, 6, , uint64, matrix, scr, _optimize, oqs_sig_picnic_mzd_mul_v_general,
++                              oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_v_general, oqs_sig_picnic_mzd_addmul_v_general);
+   }
+ #endif
+ #else
+@@ -813,23 +813,23 @@ static void mpc_lowmc_call(lowmc_t const* lowmc, mpc_lowmc_key_t* lowmc_key, mzd
+ #ifdef WITH_CUSTOM_INSTANCES
+   if (lowmc->m != 10) {
+     _mpc_lowmc_call_bitsliced(0, 6, _mpc_sbox_layer_bitsliced, mzd, lookup, noscr, ,
+-                              mzd_mul_vl_general, mzd_xor_general, , mzd_addmul_vl_general);
++                              oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general, , mzd_addmul_vl_general);
+   } else
+ #endif
+   {
+-    _mpc_lowmc_call_bitsliced(0, 6, , uint64, lookup, noscr, , mzd_mul_vl_general, mzd_xor_general,
++    _mpc_lowmc_call_bitsliced(0, 6, , uint64, lookup, noscr, , oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general,
+                               , mzd_addmul_vl_general);
+   }
+ #else
+ #ifdef WITH_CUSTOM_INSTANCES
+   if (lowmc->m != 10) {
+     _mpc_lowmc_call_bitsliced(0, 6, _mpc_sbox_layer_bitsliced, mzd, matrix, scr, ,
+-                              mzd_mul_v_general, mzd_xor_general, , mzd_addmul_v_general);
++                              oqs_sig_picnic_mzd_mul_v_general, oqs_sig_picnic_mzd_mul_vl_general, , oqs_sig_picnic_mzd_addmul_v_general);
+   } else
+ #endif
+   {
+-    _mpc_lowmc_call_bitsliced(0, 6, , uint64, matrix, scr, , mzd_mul_v_general, mzd_xor_general, ,
+-                              mzd_addmul_v_general);
++    _mpc_lowmc_call_bitsliced(0, 6, , uint64, matrix, scr, , oqs_sig_picnic_mzd_mul_v_general, oqs_sig_picnic_mzd_mul_vl_general, ,
++                              oqs_sig_picnic_mzd_addmul_v_general);
+   }
+ #endif
+ #endif
+@@ -842,26 +842,26 @@ static void mpc_lowmc_call_verify(lowmc_t const* lowmc, mzd_local_t const* p, vi
+ #ifdef WITH_CUSTOM_INSTANCES
+   if (lowmc->m != 10) {
+     _mpc_lowmc_call_bitsliced_verify_m(ch, 6, _mpc_sbox_layer_bitsliced_verify, mzd, lookup, noscr,
+-                                       _optimize, mzd_mul_vl_general, mzd_xor_general,
+-                                       mzd_mul_vl_general, mzd_addmul_vl_general);
++                                       _optimize, oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general,
++                                       oqs_sig_picnic_mzd_mul_vl_general, mzd_addmul_vl_general);
+   } else
+ #endif
+   {
+     _mpc_lowmc_call_bitsliced_verify_m(ch, 6, , uint64, lookup, noscr, _optimize,
+-                                       mzd_mul_vl_general, mzd_xor_general, mzd_mul_vl_general,
++                                       oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general,
+                                        mzd_addmul_vl_general);
+   }
+ #else
+ #ifdef WITH_CUSTOM_INSTANCES
+   if (lowmc->m != 10) {
+     _mpc_lowmc_call_bitsliced_verify_m(ch, 6, _mpc_sbox_layer_bitsliced_verify, mzd, matrix, scr,
+-                                       _optimize, mzd_mul_v_general, mzd_xor_general,
+-                                       mzd_mul_v_general, mzd_addmul_v_general);
++                                       _optimize, oqs_sig_picnic_mzd_mul_v_general, oqs_sig_picnic_mzd_mul_vl_general,
++                                       oqs_sig_picnic_mzd_mul_v_general, oqs_sig_picnic_mzd_addmul_v_general);
+   } else
+ #endif
+   {
+-    _mpc_lowmc_call_bitsliced_verify_m(ch, 6, , uint64, matrix, scr, _optimize, mzd_mul_v_general,
+-                                       mzd_xor_general, mzd_mul_v_general, mzd_addmul_v_general);
++    _mpc_lowmc_call_bitsliced_verify_m(ch, 6, , uint64, matrix, scr, _optimize, oqs_sig_picnic_mzd_mul_v_general,
++                                       oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_v_general, oqs_sig_picnic_mzd_addmul_v_general);
+   }
+ #endif
+ #else
+@@ -869,24 +869,24 @@ static void mpc_lowmc_call_verify(lowmc_t const* lowmc, mzd_local_t const* p, vi
+ #ifdef WITH_CUSTOM_INSTANCES
+   if (lowmc->m != 10) {
+     _mpc_lowmc_call_bitsliced_verify_m(ch, 6, _mpc_sbox_layer_bitsliced_verify, mzd, lookup, noscr,
+-                                       , mzd_mul_vl_general, mzd_xor_general, ,
++                                       , oqs_sig_picnic_mzd_mul_vl_general, oqs_sig_picnic_mzd_mul_vl_general, ,
+                                        mzd_addmul_vl_general);
+   } else
+ #endif
+   {
+-    _mpc_lowmc_call_bitsliced_verify_m(ch, 6, , uint64, lookup, noscr, , mzd_mul_vl_general,
+-                                       mzd_xor_general, , mzd_addmul_vl_general);
++    _mpc_lowmc_call_bitsliced_verify_m(ch, 6, , uint64, lookup, noscr, , oqs_sig_picnic_mzd_mul_vl_general,
++                                       oqs_sig_picnic_mzd_mul_vl_general, , mzd_addmul_vl_general);
+   }
+ #else
+ #ifdef WITH_CUSTOM_INSTANCES
+   if (lowmc->m != 10) {
+     _mpc_lowmc_call_bitsliced_verify_m(ch, 6, _mpc_sbox_layer_bitsliced_verify, mzd, matrix, scr, ,
+-                                       mzd_mul_v_general, mzd_xor_general, , mzd_addmul_v_general);
++                                       oqs_sig_picnic_mzd_mul_v_general, oqs_sig_picnic_mzd_mul_vl_general, , oqs_sig_picnic_mzd_addmul_v_general);
+   } else
+ #endif
+   {
+-    _mpc_lowmc_call_bitsliced_verify_m(ch, 6, , uint64, matrix, scr, , mzd_mul_v_general,
+-                                       mzd_xor_general, , mzd_addmul_v_general);
++    _mpc_lowmc_call_bitsliced_verify_m(ch, 6, , uint64, matrix, scr, , oqs_sig_picnic_mzd_mul_v_general,
++                                       mzd_xor_general, , oqs_sig_picnic_mzd_addmul_v_general);
+   }
+ #endif
+ #endif
+@@ -1026,67 +1026,67 @@ static void mpc_lowmc_call_verify(lowmc_t const* lowmc, mzd_local_t const* p, vi
+ #ifdef WITH_SSE2
+ mpc_lowmc_call_def(mpc_lowmc_call_128_sse, mpc_lowmc_call_verify_128_sse,
+                    _mpc_sbox_layer_bitsliced_128_sse, _mpc_sbox_layer_bitsliced_verify_128_sse,
+-                   mzd_mul_v_sse, mzd_mul_vl_sse_128, mzd_xor_sse, mzd_xor_sse, mzd_mul_v_sse,
+-                   mzd_mul_vl_sse, mzd_addmul_v_sse, mzd_addmul_vl_sse_128);
++                   oqs_sig_picnic_mzd_mul_v_sse, oqs_sig_picnic_mzd_mul_vl_sse_128, oqs_sig_picnic_mzd_xor_sse, oqs_sig_picnic_mzd_xor_sse, oqs_sig_picnic_mzd_mul_v_sse,
++                   oqs_sig_picnic_mzd_mul_vl_sse, oqs_sig_picnic_mzd_addmul_v_sse, oqs_sig_picnic_mzd_addmul_vl_sse_128);
+ mpc_lowmc_call_def(mpc_lowmc_call_256_sse, mpc_lowmc_call_verify_256_sse,
+                    _mpc_sbox_layer_bitsliced_256_sse, _mpc_sbox_layer_bitsliced_verify_256_sse,
+-                   mzd_mul_v_sse, mzd_mul_vl_sse, mzd_xor_sse, mzd_xor_sse, mzd_mul_v_sse,
+-                   mzd_mul_vl_sse, mzd_addmul_v_sse, mzd_addmul_vl_sse);
++                   oqs_sig_picnic_mzd_mul_v_sse, oqs_sig_picnic_mzd_mul_vl_sse, oqs_sig_picnic_mzd_xor_sse, oqs_sig_picnic_mzd_xor_sse, oqs_sig_picnic_mzd_mul_v_sse,
++                   oqs_sig_picnic_mzd_mul_vl_sse, oqs_sig_picnic_mzd_addmul_v_sse, oqs_sig_picnic_mzd_addmul_vl_sse);
+ #ifdef WITH_CUSTOM_INSTANCES
+ mpc_lowmc_call_def(mpc_lowmc_call_384_sse, mpc_lowmc_call_verify_384_sse,
+                    _mpc_sbox_layer_bitsliced_384_sse, _mpc_sbox_layer_bitsliced_verify_384_sse,
+-                   mzd_mul_v_sse, mzd_mul_vl_sse, mzd_xor_sse, mzd_xor_sse, mzd_mul_v_sse,
+-                   mzd_mul_vl_sse, mzd_addmul_v_sse, mzd_addmul_vl_sse);
++                   oqs_sig_picnic_mzd_mul_v_sse, oqs_sig_picnic_mzd_mul_vl_sse, oqs_sig_picnic_mzd_xor_sse, oqs_sig_picnic_mzd_xor_sse, oqs_sig_picnic_mzd_mul_v_sse,
++                   oqs_sig_picnic_mzd_mul_vl_sse, oqs_sig_picnic_mzd_addmul_v_sse, oqs_sig_picnic_mzd_addmul_vl_sse);
+ mpc_lowmc_call_def(mpc_lowmc_call_512_sse, mpc_lowmc_call_verify_512_sse,
+                    _mpc_sbox_layer_bitsliced_512_sse, _mpc_sbox_layer_bitsliced_verify_512_sse,
+-                   mzd_mul_v_sse, mzd_mul_vl_sse, mzd_xor_sse, mzd_xor_sse, mzd_mul_v_sse,
+-                   mzd_mul_vl_sse, mzd_addmul_v_sse, mzd_addmul_vl_sse);
++                   oqs_sig_picnic_mzd_mul_v_sse, oqs_sig_picnic_mzd_mul_vl_sse, oqs_sig_picnic_mzd_xor_sse, oqs_sig_picnic_mzd_xor_sse, oqs_sig_picnic_mzd_mul_v_sse,
++                   oqs_sig_picnic_mzd_mul_vl_sse, oqs_sig_picnic_mzd_addmul_v_sse, oqs_sig_picnic_mzd_addmul_vl_sse);
+ #endif
+ #endif
+ #ifdef WITH_AVX2
+ mpc_lowmc_call_def(mpc_lowmc_call_256_avx, mpc_lowmc_call_verify_256_avx,
+                    _mpc_sbox_layer_bitsliced_256_avx, _mpc_sbox_layer_bitsliced_verify_256_avx,
+-                   mzd_mul_v_avx, mzd_mul_vl_avx_256, mzd_xor_avx, mzd_xor_avx, mzd_mul_v_avx,
+-                   mzd_mul_vl_avx, mzd_addmul_v_avx, mzd_addmul_vl_avx_256);
++                   oqs_sig_picnic_mzd_mul_v_avx, oqs_sig_picnic_mzd_mul_vl_avx_256, oqs_sig_picnic_mzd_xor_avx, oqs_sig_picnic_mzd_xor_avx, oqs_sig_picnic_mzd_mul_v_avx,
++                   oqs_sig_picnic_mzd_mul_vl_avx, oqs_sig_picnic_mzd_addmul_v_avx, oqs_sig_picnic_mzd_addmul_vl_avx_256);
+ #ifdef WITH_CUSTOM_INSTANCES
+ mpc_lowmc_call_def(mpc_lowmc_call_384_avx, mpc_lowmc_call_verify_384_avx,
+                    _mpc_sbox_layer_bitsliced_512_avx, _mpc_sbox_layer_bitsliced_verify_512_avx,
+-                   mzd_mul_v_avx, mzd_mul_vl_avx, mzd_xor_avx, mzd_xor_avx, mzd_mul_v_avx,
+-                   mzd_mul_vl_avx, mzd_addmul_v_avx, mzd_addmul_vl_avx);
++                   oqs_sig_picnic_mzd_mul_v_avx, oqs_sig_picnic_mzd_mul_vl_avx, oqs_sig_picnic_mzd_xor_avx, oqs_sig_picnic_mzd_xor_avx, oqs_sig_picnic_mzd_mul_v_avx,
++                   oqs_sig_picnic_mzd_mul_vl_avx, oqs_sig_picnic_mzd_addmul_v_avx, oqs_sig_picnic_mzd_addmul_vl_avx);
+ mpc_lowmc_call_def(mpc_lowmc_call_512_avx, mpc_lowmc_call_verify_512_avx,
+                    _mpc_sbox_layer_bitsliced_512_avx, _mpc_sbox_layer_bitsliced_verify_512_avx,
+-                   mzd_mul_v_avx, mzd_mul_vl_avx, mzd_xor_avx, mzd_xor_avx, mzd_mul_v_avx,
+-                   mzd_mul_vl_avx, mzd_addmul_v_avx, mzd_addmul_vl_avx);
++                   oqs_sig_picnic_mzd_mul_v_avx, oqs_sig_picnic_mzd_mul_vl_avx, oqs_sig_picnic_mzd_xor_avx, oqs_sig_picnic_mzd_xor_avx, oqs_sig_picnic_mzd_mul_v_avx,
++                   oqs_sig_picnic_mzd_mul_vl_avx, oqs_sig_picnic_mzd_addmul_v_avx, oqs_sig_picnic_mzd_addmul_vl_avx);
+ #endif
+ #endif
+ #ifdef WITH_NEON
+ mpc_lowmc_call_def(mpc_lowmc_call_128_neon, mpc_lowmc_call_verify_128_neon,
+                    _mpc_sbox_layer_bitsliced_128_neon, _mpc_sbox_layer_bitsliced_verify_128_neon,
+-                   mzd_mul_v_neon, mzd_mul_vl_neon_128, mzd_xor_neon, mzd_xor_neon, mzd_mul_v_neon,
+-                   mzd_mul_vl_neon_multiple_of_128, mzd_addmul_v_neon, mzd_addmul_vl_neon_128);
++                   oqs_sig_picnic_mzd_mul_v_neon, oqs_sig_picnic_mzd_mul_vl_neon_128, oqs_sig_picnic_mzd_xor_neon, oqs_sig_picnic_mzd_xor_neon, oqs_sig_picnic_mzd_mul_v_neon,
++                   oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128, oqs_sig_picnic_mzd_addmul_v_neon, oqs_sig_picnic_mzd_addmul_vl_neon_128);
+ mpc_lowmc_call_def(mpc_lowmc_call_256_neon, mpc_lowmc_call_verify_256_neon,
+                    _mpc_sbox_layer_bitsliced_256_neon, _mpc_sbox_layer_bitsliced_verify_256_neon,
+-                   mzd_mul_v_neon, mzd_mul_vl_neon_multiple_of_128, mzd_xor_neon, mzd_xor_neon,
+-                   mzd_mul_v_neon, mzd_mul_vl_neon_multiple_of_128, mzd_addmul_v_neon,
+-                   mzd_addmul_vl_neon);
++                   oqs_sig_picnic_mzd_mul_v_neon, oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128, oqs_sig_picnic_mzd_xor_neon, oqs_sig_picnic_mzd_xor_neon,
++                   oqs_sig_picnic_mzd_mul_v_neon, oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128, oqs_sig_picnic_mzd_addmul_v_neon,
++                   oqs_sig_picnic_mzd_addmul_vl_neon);
+ #ifdef WITH_CUSTOM_INSTANCES
+ mpc_lowmc_call_def(mpc_lowmc_call_384_neon, mpc_lowmc_call_verify_384_neon,
+                    _mpc_sbox_layer_bitsliced_384_neon, _mpc_sbox_layer_bitsliced_verify_384_neon,
+-                   mzd_mul_v_neon, mzd_mul_vl_neon_multiple_of_128, mzd_xor_neon, mzd_xor_neon,
+-                   mzd_mul_v_neon, mzd_mul_vl_neon_multiple_of_128, mzd_addmul_v_neon,
+-                   mzd_addmul_vl_neon);
++                   oqs_sig_picnic_mzd_mul_v_neon, oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128, oqs_sig_picnic_mzd_xor_neon, oqs_sig_picnic_mzd_xor_neon,
++                   oqs_sig_picnic_mzd_mul_v_neon, oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128, oqs_sig_picnic_mzd_addmul_v_neon,
++                   oqs_sig_picnic_mzd_addmul_vl_neon);
+ mpc_lowmc_call_def(mpc_lowmc_call_512_neon, mpc_lowmc_call_verify_512_neon,
+                    _mpc_sbox_layer_bitsliced_512_neon, _mpc_sbox_layer_bitsliced_verify_512_neon,
+-                   mzd_mul_v_neon, mzd_mul_vl_neon_multiple_of_128, mzd_xor_neon, mzd_xor_neon,
+-                   mzd_mul_v_neon, mzd_mul_vl_neon_multiple_of_128, mzd_addmul_v_neon,
+-                   mzd_addmul_vl_neon);
++                   oqs_sig_picnic_mzd_mul_v_neon, oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128, oqs_sig_picnic_mzd_xor_neon, oqs_sig_picnic_mzd_xor_neon,
++                   oqs_sig_picnic_mzd_mul_v_neon, oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128, oqs_sig_picnic_mzd_addmul_v_neon,
++                   oqs_sig_picnic_mzd_addmul_vl_neon);
+ #endif
+ #endif
+ #endif
+ 
+ static void sbox_vars_clear(sbox_vars_t* vars) {
+   if (vars->storage) {
+-    mzd_local_free_multiple(vars->storage);
++    oqs_sig_picnic_mzd_local_free_multiple(vars->storage);
+     free(vars->storage);
+     memset(vars, 0, sizeof(*vars));
+   }
+@@ -1094,7 +1094,7 @@ static void sbox_vars_clear(sbox_vars_t* vars) {
+ 
+ static sbox_vars_t* sbox_vars_init(sbox_vars_t* vars, uint32_t n, unsigned sc) {
+   vars->storage = calloc(11 * sc, sizeof(mzd_local_t*));
+-  mzd_local_init_multiple_ex(vars->storage, 11 * sc, 1, n, false);
++  oqs_sig_picnic_mzd_local_init_multiple_ex(vars->storage, 11 * sc, 1, n, false);
+ 
+   for (unsigned int i = 0; i < sc; ++i) {
+     vars->x0m[i] = vars->storage[11 * i + 0];
+@@ -1119,7 +1119,7 @@ static sbox_vars_t* sbox_vars_init(sbox_vars_t* vars, uint32_t n, unsigned sc) {
+ #define general_or_10(l, f) f##_10
+ #endif
+ 
+-lowmc_implementation_f get_lowmc_implementation(const lowmc_t* lowmc) {
++lowmc_implementation_f oqs_sig_picnic_get_lowmc_implementation(const lowmc_t* lowmc) {
+ #ifdef WITH_OPT
+ #ifdef WITH_SSE2
+   if (CPU_SUPPORTS_SSE2 && lowmc->n <= 128) {
+@@ -1176,7 +1176,7 @@ lowmc_implementation_f get_lowmc_implementation(const lowmc_t* lowmc) {
+   return mpc_lowmc_call;
+ }
+ 
+-lowmc_verify_implementation_f get_lowmc_verify_implementation(const lowmc_t* lowmc) {
++lowmc_verify_implementation_f oqs_sig_picnic_get_lowmc_verify_implementation(const lowmc_t* lowmc) {
+ #ifdef WITH_OPT
+ #ifdef WITH_SSE2
+   if (CPU_SUPPORTS_SSE2 && lowmc->n <= 128) {
+diff --git a/mpc_lowmc.h b/mpc_lowmc.h
+index 39f5d95..de6b38f 100644
+--- a/mpc_lowmc.h
++++ b/mpc_lowmc.h
+@@ -24,7 +24,7 @@ typedef void (*lowmc_implementation_f)(lowmc_t const*, mpc_lowmc_key_t*, mzd_loc
+ typedef void (*lowmc_verify_implementation_f)(lowmc_t const*, mzd_local_t const*, view_t*,
+                                               in_out_shares_t*, rvec_t*, unsigned int);
+ 
+-lowmc_implementation_f get_lowmc_implementation(const lowmc_t* lowmc);
+-lowmc_verify_implementation_f get_lowmc_verify_implementation(const lowmc_t* lowmc);
++lowmc_implementation_f oqs_sig_picnic_get_lowmc_implementation(const lowmc_t* lowmc);
++lowmc_verify_implementation_f oqs_sig_picnic_get_lowmc_verify_implementation(const lowmc_t* lowmc);
+ 
+ #endif
+diff --git a/mzd_additional.c b/mzd_additional.c
+index a0e362d..91d15b8 100644
+--- a/mzd_additional.c
++++ b/mzd_additional.c
+@@ -55,7 +55,7 @@ static uint32_t calculate_rowstride(uint32_t width) {
+ // In mzd_local_init_multiple we do the same, but store n mzd_local_t instances in one
+ // memory block.
+ 
+-mzd_local_t* mzd_local_init_ex(uint32_t r, uint32_t c, bool clear) {
++mzd_local_t* oqs_sig_picnic_mzd_local_init_ex(uint32_t r, uint32_t c, bool clear) {
+   const uint32_t width     = (c + 64 - 1) / 64;
+   const uint32_t rowstride = calculate_rowstride(width);
+ 
+@@ -79,11 +79,11 @@ mzd_local_t* mzd_local_init_ex(uint32_t r, uint32_t c, bool clear) {
+   return A;
+ }
+ 
+-void mzd_local_free(mzd_local_t* v) {
++void oqs_sig_picnic_mzd_local_free(mzd_local_t* v) {
+   aligned_free(v);
+ }
+ 
+-void mzd_local_init_multiple_ex(mzd_local_t** dst, size_t n, uint32_t r, uint32_t c, bool clear) {
++void oqs_sig_picnic_mzd_local_init_multiple_ex(mzd_local_t** dst, size_t n, uint32_t r, uint32_t c, bool clear) {
+   const uint32_t width     = (c + 64 - 1) / 64;
+   const uint32_t rowstride = calculate_rowstride(width);
+ 
+@@ -111,19 +111,19 @@ void mzd_local_init_multiple_ex(mzd_local_t** dst, size_t n, uint32_t r, uint32_
+   }
+ }
+ 
+-void mzd_local_free_multiple(mzd_local_t** vs) {
++void oqs_sig_picnic_mzd_local_free_multiple(mzd_local_t** vs) {
+   if (vs) {
+     aligned_free(vs[0]);
+   }
+ }
+ 
+-mzd_local_t* mzd_local_copy(mzd_local_t* dst, mzd_local_t const* src) {
++mzd_local_t* oqs_sig_picnic_mzd_local_copy(mzd_local_t* dst, mzd_local_t const* src) {
+   if (dst == src) {
+     return dst;
+   }
+ 
+   if (!dst) {
+-    dst = mzd_local_init(src->nrows, src->ncols);
++    dst = oqs_sig_picnic_mzd_local_init(src->nrows, src->ncols);
+   }
+ 
+   memcpy(ASSUME_ALIGNED(FIRST_ROW(dst), 32), ASSUME_ALIGNED(CONST_FIRST_ROW(src), 32),
+@@ -131,13 +131,13 @@ mzd_local_t* mzd_local_copy(mzd_local_t* dst, mzd_local_t const* src) {
+   return dst;
+ }
+ 
+-void mzd_local_clear(mzd_local_t* c) {
++void oqs_sig_picnic_mzd_local_clear(mzd_local_t* c) {
+   memset(ASSUME_ALIGNED(FIRST_ROW(c), 32), 0, c->nrows * sizeof(word) * c->rowstride);
+ }
+ 
+-void mzd_shift_right(mzd_local_t* res, mzd_local_t const* val, unsigned count) {
++void oqs_sig_picnic_mzd_shift_right(mzd_local_t* res, mzd_local_t const* val, unsigned count) {
+   if (!count) {
+-    mzd_local_copy(res, val);
++    oqs_sig_picnic_mzd_local_copy(res, val);
+     return;
+   }
+ 
+@@ -154,9 +154,9 @@ void mzd_shift_right(mzd_local_t* res, mzd_local_t const* val, unsigned count) {
+   *resptr = *valptr >> count;
+ }
+ 
+-void mzd_shift_left(mzd_local_t* res, mzd_local_t const* val, unsigned count) {
++void oqs_sig_picnic_mzd_shift_left(mzd_local_t* res, mzd_local_t const* val, unsigned count) {
+   if (!count) {
+-    mzd_local_copy(res, val);
++    oqs_sig_picnic_mzd_local_copy(res, val);
+     return;
+   }
+ 
+@@ -240,7 +240,7 @@ static inline mzd_local_t* mzd_and_neon(mzd_local_t* res, mzd_local_t const* fir
+ #endif
+ #endif
+ 
+-mzd_local_t* mzd_and(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
++mzd_local_t* oqs_sig_picnic_mzd_and(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
+ #ifdef WITH_OPT
+ #ifdef WITH_AVX2
+   if (CPU_SUPPORTS_AVX2 && first->ncols >= 256 && ((first->ncols & (word_size_bits - 1)) == 0)) {
+@@ -274,7 +274,7 @@ mzd_local_t* mzd_and(mzd_local_t* res, mzd_local_t const* first, mzd_local_t con
+ #ifdef WITH_OPT
+ #ifdef WITH_SSE2
+ ATTRIBUTE_TARGET("sse2")
+-mzd_local_t* mzd_xor_sse(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
++mzd_local_t* oqs_sig_picnic_mzd_xor_sse(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
+   unsigned int width    = first->rowstride;
+   word* resptr          = FIRST_ROW(res);
+   word const* firstptr  = CONST_FIRST_ROW(first);
+@@ -295,7 +295,7 @@ mzd_local_t* mzd_xor_sse(mzd_local_t* res, mzd_local_t const* first, mzd_local_t
+ 
+ #ifdef WITH_AVX2
+ ATTRIBUTE_TARGET("avx2")
+-mzd_local_t* mzd_xor_avx(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
++mzd_local_t* oqs_sig_picnic_mzd_xor_avx(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
+   unsigned int width    = first->rowstride;
+   word* resptr          = FIRST_ROW(res);
+   word const* firstptr  = CONST_FIRST_ROW(first);
+@@ -315,7 +315,7 @@ mzd_local_t* mzd_xor_avx(mzd_local_t* res, mzd_local_t const* first, mzd_local_t
+ #endif
+ 
+ #ifdef WITH_NEON
+-inline mzd_local_t* mzd_xor_neon(mzd_local_t* res, mzd_local_t const* first,
++inline mzd_local_t* oqs_sig_picnic_mzd_xor_neon(mzd_local_t* res, mzd_local_t const* first,
+                                  mzd_local_t const* second) {
+   unsigned int width    = first->rowstride;
+   word* resptr          = FIRST_ROW(res);
+@@ -336,28 +336,28 @@ inline mzd_local_t* mzd_xor_neon(mzd_local_t* res, mzd_local_t const* first,
+ #endif
+ #endif
+ 
+-mzd_local_t* mzd_xor(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
++mzd_local_t* oqs_sig_picnic_mzd_xor(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) {
+ #ifdef WITH_OPT
+ #ifdef WITH_AVX2
+   if (CPU_SUPPORTS_AVX2 && first->ncols >= 256 && ((first->ncols & (word_size_bits - 1)) == 0)) {
+-    return mzd_xor_avx(res, first, second);
++    return oqs_sig_picnic_mzd_xor_avx(res, first, second);
+   }
+ #endif
+ #ifdef WITH_SSE2
+   if (CPU_SUPPORTS_SSE2 && ((first->ncols & (word_size_bits - 1)) == 0)) {
+-    return mzd_xor_sse(res, first, second);
++    return oqs_sig_picnic_mzd_xor_sse(res, first, second);
+   }
+ #endif
+ #ifdef WITH_NEON
+   if (CPU_SUPPORTS_NEON && ((first->ncols & (word_size_bits - 1)) == 0)) {
+-    return mzd_xor_neon(res, first, second);
++    return oqs_sig_picnic_mzd_xor_neon(res, first, second);
+   }
+ #endif
+ #endif
+-  return mzd_xor_general(res, first, second);
++  return oqs_sig_picnic_mzd_xor_general(res, first, second);
+ }
+ 
+-mzd_local_t* mzd_xor_general(mzd_local_t* res, mzd_local_t const* first,
++mzd_local_t* oqs_sig_picnic_mzd_xor_general(mzd_local_t* res, mzd_local_t const* first,
+                              mzd_local_t const* second) {
+   unsigned int width    = first->width;
+   word* resptr          = FIRST_ROW(res);
+@@ -371,37 +371,37 @@ mzd_local_t* mzd_xor_general(mzd_local_t* res, mzd_local_t const* first,
+   return res;
+ }
+ 
+-mzd_local_t* mzd_mul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) {
++mzd_local_t* oqs_sig_picnic_mzd_mul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) {
+   if (At->nrows != v->ncols) {
+     // number of columns does not match
+     return NULL;
+   }
+ 
+-  mzd_local_clear(c);
+-  return mzd_addmul_v(c, v, At);
++  oqs_sig_picnic_mzd_local_clear(c);
++  return oqs_sig_picnic_mzd_addmul_v(c, v, At);
+ }
+ 
+-mzd_local_t* mzd_mul_v_general(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) {
++mzd_local_t* oqs_sig_picnic_mzd_mul_v_general(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) {
+ 
+   if (At->nrows != v->ncols) {
+     // number of columns does not match
+     return NULL;
+   }
+ 
+-  mzd_local_clear(c);
+-  return mzd_addmul_v_general(c, v, At);
++  oqs_sig_picnic_mzd_local_clear(c);
++  return oqs_sig_picnic_mzd_addmul_v_general(c, v, At);
+ }
+ 
+ #ifdef WITH_OPT
+ #ifdef WITH_SSE2
+ ATTRIBUTE_TARGET("sse2")
+-mzd_local_t* mzd_mul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+-  mzd_local_clear(c);
+-  return mzd_addmul_v_sse(c, v, A);
++mzd_local_t* oqs_sig_picnic_mzd_mul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++  oqs_sig_picnic_mzd_local_clear(c);
++  return oqs_sig_picnic_mzd_addmul_v_sse(c, v, A);
+ }
+ 
+ ATTRIBUTE_TARGET("sse2")
+-mzd_local_t* mzd_addmul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++mzd_local_t* oqs_sig_picnic_mzd_addmul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+   word* cptr                    = FIRST_ROW(c);
+   word const* vptr              = CONST_FIRST_ROW(v);
+   const unsigned int width      = v->width;
+@@ -428,13 +428,13 @@ mzd_local_t* mzd_addmul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t
+ 
+ #ifdef WITH_AVX2
+ ATTRIBUTE_TARGET("avx2")
+-mzd_local_t* mzd_mul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+-  mzd_local_clear(c);
+-  return mzd_addmul_v_avx(c, v, A);
++mzd_local_t* oqs_sig_picnic_mzd_mul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++  oqs_sig_picnic_mzd_local_clear(c);
++  return oqs_sig_picnic_mzd_addmul_v_avx(c, v, A);
+ }
+ 
+ ATTRIBUTE_TARGET("avx2")
+-mzd_local_t* mzd_addmul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++mzd_local_t* oqs_sig_picnic_mzd_addmul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+   word* cptr                    = FIRST_ROW(c);
+   word const* vptr              = CONST_FIRST_ROW(v);
+   const unsigned int width      = v->width;
+@@ -460,12 +460,12 @@ mzd_local_t* mzd_addmul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t
+ #endif
+ 
+ #ifdef WITH_NEON
+-mzd_local_t* mzd_mul_v_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+-  mzd_local_clear(c);
+-  return mzd_addmul_v_neon(c, v, A);
++mzd_local_t* oqs_sig_picnic_mzd_mul_v_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++  oqs_sig_picnic_mzd_local_clear(c);
++  return oqs_sig_picnic_mzd_addmul_v_neon(c, v, A);
+ }
+ 
+-inline mzd_local_t* mzd_addmul_v_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++inline mzd_local_t* oqs_sig_picnic_mzd_addmul_v_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+   word* cptr                    = FIRST_ROW(c);
+   word const* vptr              = CONST_FIRST_ROW(v);
+   const unsigned int width      = v->width;
+@@ -491,7 +491,7 @@ inline mzd_local_t* mzd_addmul_v_neon(mzd_local_t* c, mzd_local_t const* v, mzd_
+ #endif
+ #endif
+ 
+-mzd_local_t* mzd_addmul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++mzd_local_t* oqs_sig_picnic_mzd_addmul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+   if (A->ncols != c->ncols || A->nrows != v->ncols) {
+     // number of columns does not match
+     return NULL;
+@@ -501,26 +501,26 @@ mzd_local_t* mzd_addmul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t cons
+   if (A->nrows % (sizeof(word) * 8) == 0) {
+ #ifdef WITH_AVX2
+     if (CPU_SUPPORTS_AVX2 && (A->ncols & 0xff) == 0) {
+-      return mzd_addmul_v_avx(c, v, A);
++      return oqs_sig_picnic_mzd_addmul_v_avx(c, v, A);
+     }
+ #endif
+ #ifdef WITH_SSE2
+     if (CPU_SUPPORTS_SSE2 && (A->ncols & 0x7f) == 0) {
+-      return mzd_addmul_v_sse(c, v, A);
++      return oqs_sig_picnic_mzd_addmul_v_sse(c, v, A);
+     }
+ #endif
+ #ifdef WITH_NEON
+     if (CPU_SUPPORTS_NEON && (A->ncols & 0x7f) == 0) {
+-      return mzd_addmul_v_neon(c, v, A);
++      return oqs_sig_picnic_mzd_addmul_v_neon(c, v, A);
+     }
+ #endif
+   }
+ #endif
+ 
+-  return mzd_addmul_v_general(c, v, A);
++  return oqs_sig_picnic_mzd_addmul_v_general(c, v, A);
+ }
+ 
+-mzd_local_t* mzd_addmul_v_general(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++mzd_local_t* oqs_sig_picnic_mzd_addmul_v_general(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+ 
+   const unsigned int len       = A->width;
+   const unsigned int rowstride = A->rowstride;
+@@ -547,7 +547,7 @@ mzd_local_t* mzd_addmul_v_general(mzd_local_t* c, mzd_local_t const* v, mzd_loca
+   return c;
+ }
+ 
+-bool mzd_local_equal(mzd_local_t const* first, mzd_local_t const* second) {
++bool oqs_sig_picnic_mzd_local_equal(mzd_local_t const* first, mzd_local_t const* second) {
+   if (first == second) {
+     return true;
+   }
+@@ -587,8 +587,8 @@ static void xor_comb(const unsigned int len, word* Brow, mzd_local_t const* A,
+  * Pre-compute matrices for faster mzd_addmul_v computions.
+  *
+  */
+-mzd_local_t* mzd_precompute_matrix_lookup(mzd_local_t const* A) {
+-  mzd_local_t* B = mzd_local_init_ex(32 * A->nrows, A->ncols, true);
++mzd_local_t* oqs_sig_picnic_mzd_precompute_matrix_lookup(mzd_local_t const* A) {
++  mzd_local_t* B = oqs_sig_picnic_mzd_local_init_ex(32 * A->nrows, A->ncols, true);
+ 
+   const unsigned int len = A->width;
+ 
+@@ -608,7 +608,7 @@ mzd_local_t* mzd_precompute_matrix_lookup(mzd_local_t const* A) {
+ #ifdef WITH_OPT
+ #ifdef WITH_SSE2
+ ATTRIBUTE_TARGET("sse2")
+-mzd_local_t* mzd_mul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+   word const* vptr                = ASSUME_ALIGNED(CONST_FIRST_ROW(v), 16);
+   const unsigned int width        = v->width;
+   static const unsigned int moff2 = 256;
+@@ -630,13 +630,13 @@ mzd_local_t* mzd_mul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_
+ }
+ 
+ ATTRIBUTE_TARGET("sse2")
+-mzd_local_t* mzd_mul_vl_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+-  mzd_local_clear(c);
+-  return mzd_addmul_vl_sse(c, v, A);
++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++  oqs_sig_picnic_mzd_local_clear(c);
++  return oqs_sig_picnic_mzd_addmul_vl_sse(c, v, A);
+ }
+ 
+ ATTRIBUTE_TARGET("sse2")
+-mzd_local_t* mzd_addmul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+   word const* vptr                = ASSUME_ALIGNED(CONST_FIRST_ROW(v), 16);
+   const unsigned int width        = v->width;
+   static const unsigned int moff2 = 256;
+@@ -658,7 +658,7 @@ mzd_local_t* mzd_addmul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v, mzd_loc
+ }
+ 
+ ATTRIBUTE_TARGET("sse2")
+-mzd_local_t* mzd_addmul_vl_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+   word const* vptr              = ASSUME_ALIGNED(CONST_FIRST_ROW(v), 16);
+   const unsigned int width      = v->width;
+   const unsigned int rowstride  = A->rowstride;
+@@ -683,7 +683,7 @@ mzd_local_t* mzd_addmul_vl_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t
+ 
+ #ifdef WITH_AVX2
+ ATTRIBUTE_TARGET("avx2")
+-mzd_local_t* mzd_mul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+   word const* vptr                = ASSUME_ALIGNED(CONST_FIRST_ROW(v), 16);
+   const unsigned int width        = v->width;
+   static const unsigned int moff2 = 256;
+@@ -705,7 +705,7 @@ mzd_local_t* mzd_mul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_
+ }
+ 
+ ATTRIBUTE_TARGET("avx2")
+-mzd_local_t* mzd_addmul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+   word const* vptr                = ASSUME_ALIGNED(CONST_FIRST_ROW(v), 16);
+   const unsigned int width        = v->width;
+   static const unsigned int moff2 = 256;
+@@ -727,13 +727,13 @@ mzd_local_t* mzd_addmul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v, mzd_loc
+ }
+ 
+ ATTRIBUTE_TARGET("avx2")
+-mzd_local_t* mzd_mul_vl_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+-  mzd_local_clear(c);
+-  return mzd_addmul_vl_avx(c, v, A);
++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++  oqs_sig_picnic_mzd_local_clear(c);
++  return oqs_sig_picnic_mzd_addmul_vl_avx(c, v, A);
+ }
+ 
+ ATTRIBUTE_TARGET("avx2")
+-mzd_local_t* mzd_addmul_vl_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+   word const* vptr              = ASSUME_ALIGNED(CONST_FIRST_ROW(v), 16);
+   const unsigned int width      = v->width;
+   const unsigned int rowstride  = A->rowstride;
+@@ -757,7 +757,7 @@ mzd_local_t* mzd_addmul_vl_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t
+ #endif
+ 
+ #ifdef WITH_NEON
+-mzd_local_t* mzd_mul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+ 
+   word const* vptr                = ASSUME_ALIGNED(CONST_FIRST_ROW(v), 16);
+   const unsigned int width        = v->width;
+@@ -780,7 +780,7 @@ mzd_local_t* mzd_mul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v, mzd_local
+   return c;
+ }
+ 
+-mzd_local_t* mzd_addmul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+   word const* vptr                = ASSUME_ALIGNED(CONST_FIRST_ROW(v), 16);
+   static const unsigned int moff2 = 256;
+ 
+@@ -804,13 +804,13 @@ mzd_local_t* mzd_addmul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v, mzd_lo
+   return c;
+ }
+ 
+-mzd_local_t* mzd_mul_vl_neon_multiple_of_128(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128(mzd_local_t* c, mzd_local_t const* v,
+                                              mzd_local_t const* A) {
+-  mzd_local_clear(c);
++  oqs_sig_picnic_mzd_local_clear(c);
+   return mzd_addmul_vl_neon(c, v, A);
+ }
+ 
+-mzd_local_t* mzd_addmul_vl_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+   word const* vptr              = ASSUME_ALIGNED(CONST_FIRST_ROW(v), alignof(uint32x4_t));
+   const unsigned int width      = v->width;
+   const unsigned int rowstride  = A->rowstride;
+@@ -835,7 +835,7 @@ mzd_local_t* mzd_addmul_vl_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_
+ #endif
+ #endif
+ 
+-mzd_local_t* mzd_mul_vl(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++mzd_local_t* oqs_sig_picnic_mzd_mul_vl(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+   if (A->nrows != 32 * v->ncols) {
+     // number of columns does not match
+     return NULL;
+@@ -846,36 +846,36 @@ mzd_local_t* mzd_mul_vl(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const*
+ #ifdef WITH_AVX2
+     if (CPU_SUPPORTS_AVX2) {
+       if (A->ncols == 256) {
+-        return mzd_mul_vl_avx_256(c, v, A);
++        return oqs_sig_picnic_mzd_mul_vl_avx_256(c, v, A);
+       }
+     }
+ #endif
+ #ifdef WITH_SSE2
+     if (CPU_SUPPORTS_SSE2) {
+       if (A->ncols == 128) {
+-        return mzd_mul_vl_sse_128(c, v, A);
++        return oqs_sig_picnic_mzd_mul_vl_sse_128(c, v, A);
+       }
+     }
+ #endif
+ #ifdef WITH_NEON
+     if (CPU_SUPPORTS_NEON) {
+       if (A->ncols == 128) {
+-        return mzd_mul_vl_neon_128(c, v, A);
++        return oqs_sig_picnic_mzd_mul_vl_neon_128(c, v, A);
+       }
+     }
+ #endif
+   }
+ #endif
+-  mzd_local_clear(c);
+-  return mzd_addmul_vl(c, v, A);
++  oqs_sig_picnic_mzd_local_clear(c);
++  return oqs_sig_picnic_mzd_addmul_vl(c, v, A);
+ }
+ 
+-mzd_local_t* mzd_mul_vl_general(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+-  mzd_local_clear(c);
+-  return mzd_addmul_vl_general(c, v, A);
++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_general(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++  oqs_sig_picnic_mzd_local_clear(c);
++  return oqs_sig_picnic_mzd_addmul_vl_general(c, v, A);
+ }
+ 
+-mzd_local_t* mzd_addmul_vl(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+   if (A->ncols != c->ncols || A->nrows != 32 * v->ncols) {
+     // number of columns does not match
+     return NULL;
+@@ -886,39 +886,39 @@ mzd_local_t* mzd_addmul_vl(mzd_local_t* c, mzd_local_t const* v, mzd_local_t con
+ #ifdef WITH_AVX2
+     if (CPU_SUPPORTS_AVX2) {
+       if (A->ncols == 256) {
+-        return mzd_addmul_vl_avx_256(c, v, A);
++        return oqs_sig_picnic_mzd_addmul_vl_avx_256(c, v, A);
+       }
+       if ((A->ncols & 0xff) == 0) {
+-        return mzd_addmul_vl_avx(c, v, A);
++        return oqs_sig_picnic_mzd_addmul_vl_avx(c, v, A);
+       }
+     }
+ #endif
+ #ifdef WITH_SSE2
+     if (CPU_SUPPORTS_SSE2) {
+       if (A->ncols == 128) {
+-        return mzd_addmul_vl_sse_128(c, v, A);
++        return oqs_sig_picnic_mzd_addmul_vl_sse_128(c, v, A);
+       }
+       if ((A->ncols & 0x7f) == 0) {
+-        return mzd_addmul_vl_sse(c, v, A);
++        return oqs_sig_picnic_mzd_addmul_vl_sse(c, v, A);
+       }
+     }
+ #endif
+ #ifdef WITH_NEON
+     if (CPU_SUPPORTS_NEON) {
+       if (A->ncols == 128) {
+-        return mzd_addmul_vl_neon_128(c, v, A);
++        return oqs_sig_picnic_mzd_addmul_vl_neon_128(c, v, A);
+       }
+       if ((A->ncols & 0x7f) == 0) {
+-        return mzd_addmul_vl_neon(c, v, A);
++        return oqs_sig_picnic_mzd_addmul_vl_neon(c, v, A);
+       }
+     }
+ #endif
+   }
+ #endif
+-  return mzd_addmul_vl_general(c, v, A);
++  return oqs_sig_picnic_mzd_addmul_vl_general(c, v, A);
+ }
+ 
+-mzd_local_t* mzd_addmul_vl_general(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_general(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) {
+   const unsigned int len   = A->width;
+   word* cptr               = FIRST_ROW(c);
+   word const* vptr         = CONST_FIRST_ROW(v);
+diff --git a/mzd_additional.h b/mzd_additional.h
+index 7992128..c084e6d 100644
+--- a/mzd_additional.h
++++ b/mzd_additional.h
+@@ -27,44 +27,44 @@ typedef struct {
+   uint64_t rows[];
+ } mzd_local_t ATTR_ALIGNED(32);
+ 
+-mzd_local_t* mzd_local_init_ex(uint32_t r, uint32_t c, bool clear) ATTR_ASSUME_ALIGNED(32);
++mzd_local_t* oqs_sig_picnic_mzd_local_init_ex(uint32_t r, uint32_t c, bool clear) ATTR_ASSUME_ALIGNED(32);
+ 
+-#define mzd_local_init(r, c) mzd_local_init_ex(r, c, true)
++#define oqs_sig_picnic_mzd_local_init(r, c) oqs_sig_picnic_mzd_local_init_ex(r, c, true)
+ 
+-void mzd_local_free(mzd_local_t* v);
++void oqs_sig_picnic_mzd_local_free(mzd_local_t* v);
+ 
+-void mzd_local_init_multiple_ex(mzd_local_t** dst, size_t n, uint32_t r, uint32_t c, bool clear)
++void oqs_sig_picnic_mzd_local_init_multiple_ex(mzd_local_t** dst, size_t n, uint32_t r, uint32_t c, bool clear)
+     ATTR_NONNULL_ARG(1);
+ 
+-#define mzd_local_init_multiple(dst, n, r, c) mzd_local_init_multiple_ex(dst, n, r, c, true)
++#define oqs_sig_picnic_mzd_local_init_multiple(dst, n, r, c) oqs_sig_picnic_mzd_local_init_multiple_ex(dst, n, r, c, true)
+ 
+ /**
+- * mzd_free for mzd_local_init_multiple.
++ * oqs_sig_picnic_mzd_free for oqs_sig_picnic_mzd_local_init_multiple.
+  */
+-void mzd_local_free_multiple(mzd_local_t** vs);
++void oqs_sig_picnic_mzd_local_free_multiple(mzd_local_t** vs);
+ /**
+- * Improved mzd_copy for specific memory layouts.
++ * Improved oqs_sig_picnic_mzd_copy for specific memory layouts.
+  */
+-mzd_local_t* mzd_local_copy(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL_ARG(2);
++mzd_local_t* oqs_sig_picnic_mzd_local_copy(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL_ARG(2);
+ 
+-void mzd_local_clear(mzd_local_t* c) ATTR_NONNULL;
++void oqs_sig_picnic_mzd_local_clear(mzd_local_t* c) ATTR_NONNULL;
+ 
+-void mzd_shift_right(mzd_local_t* res, mzd_local_t const* val, unsigned count) ATTR_NONNULL;
++void oqs_sig_picnic_mzd_shift_right(mzd_local_t* res, mzd_local_t const* val, unsigned count) ATTR_NONNULL;
+ 
+-void mzd_shift_left(mzd_local_t* res, mzd_local_t const* val, unsigned count) ATTR_NONNULL;
++void oqs_sig_picnic_mzd_shift_left(mzd_local_t* res, mzd_local_t const* val, unsigned count) ATTR_NONNULL;
+ 
+-mzd_local_t* mzd_and(mzd_local_t* res, mzd_local_t const* first,
++mzd_local_t* oqs_sig_picnic_mzd_and(mzd_local_t* res, mzd_local_t const* first,
+                      mzd_local_t const* second) ATTR_NONNULL;
+ 
+-mzd_local_t* mzd_xor(mzd_local_t* res, mzd_local_t const* first,
++mzd_local_t* oqs_sig_picnic_mzd_xor(mzd_local_t* res, mzd_local_t const* first,
+                      mzd_local_t const* second) ATTR_NONNULL;
+-mzd_local_t* mzd_xor_sse(mzd_local_t* res, mzd_local_t const* first,
++mzd_local_t* oqs_sig_picnic_mzd_xor_sse(mzd_local_t* res, mzd_local_t const* first,
+                          mzd_local_t const* second) ATTR_NONNULL;
+-mzd_local_t* mzd_xor_avx(mzd_local_t* res, mzd_local_t const* first,
++mzd_local_t* oqs_sig_picnic_mzd_xor_avx(mzd_local_t* res, mzd_local_t const* first,
+                          mzd_local_t const* second) ATTR_NONNULL;
+-mzd_local_t* mzd_xor_general(mzd_local_t* res, mzd_local_t const* first,
++mzd_local_t* oqs_sig_picnic_mzd_xor_general(mzd_local_t* res, mzd_local_t const* first,
+                              mzd_local_t const* second) ATTR_NONNULL;
+-mzd_local_t* mzd_xor_neon(mzd_local_t* res, mzd_local_t const* first,
++mzd_local_t* oqs_sig_picnic_mzd_xor_neon(mzd_local_t* res, mzd_local_t const* first,
+                           mzd_local_t const* second) ATTR_NONNULL;
+ 
+ /**
+@@ -77,85 +77,85 @@ mzd_local_t* mzd_xor_neon(mzd_local_t* res, mzd_local_t const* first,
+  *          second vector
+  * \returns true if both vectors are equal, false otherwise.
+  */
+-bool mzd_local_equal(mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
++bool oqs_sig_picnic_mzd_local_equal(mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
+ 
+ /**
+  * Compute v * A optimized for v being a vector.
+  */
+-mzd_local_t* mzd_mul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
+-mzd_local_t* mzd_mul_v_general(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_mul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
++mzd_local_t* oqs_sig_picnic_mzd_mul_v_general(mzd_local_t* c, mzd_local_t const* v,
+                                mzd_local_t const* At) ATTR_NONNULL;
+-mzd_local_t* mzd_mul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
+-mzd_local_t* mzd_mul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
+-mzd_local_t* mzd_mul_v_neon(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_mul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
++mzd_local_t* oqs_sig_picnic_mzd_mul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
++mzd_local_t* oqs_sig_picnic_mzd_mul_v_neon(mzd_local_t* c, mzd_local_t const* v,
+                             mzd_local_t const* A) ATTR_NONNULL;
+ 
+ /**
+  * Compute c + v * A optimized for c and v being vectors.
+  */
+-mzd_local_t* mzd_addmul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
+-mzd_local_t* mzd_addmul_v_general(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_addmul_v(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
++mzd_local_t* oqs_sig_picnic_mzd_addmul_v_general(mzd_local_t* c, mzd_local_t const* v,
+                                   mzd_local_t const* A) ATTR_NONNULL;
+-mzd_local_t* mzd_addmul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A);
+-mzd_local_t* mzd_addmul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A);
+-mzd_local_t* mzd_addmul_v_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A);
++mzd_local_t* oqs_sig_picnic_mzd_addmul_v_sse(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A);
++mzd_local_t* oqs_sig_picnic_mzd_addmul_v_avx(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A);
++mzd_local_t* oqs_sig_picnic_mzd_addmul_v_neon(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A);
+ 
+ /**
+  * Compute v * A optimized for v being a vector.
+  */
+-mzd_local_t* mzd_mul_vl(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
+-mzd_local_t* mzd_mul_vl_general(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_mul_vl(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_general(mzd_local_t* c, mzd_local_t const* v,
+                                 mzd_local_t const* A) ATTR_NONNULL;
+-mzd_local_t* mzd_mul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v,
+                                 mzd_local_t const* A) ATTR_NONNULL;
+-mzd_local_t* mzd_mul_vl_sse(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_sse(mzd_local_t* c, mzd_local_t const* v,
+                             mzd_local_t const* A) ATTR_NONNULL;
+-mzd_local_t* mzd_mul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v,
+                                 mzd_local_t const* A) ATTR_NONNULL;
+-mzd_local_t* mzd_mul_vl_avx(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_avx(mzd_local_t* c, mzd_local_t const* v,
+                             mzd_local_t const* A) ATTR_NONNULL;
+-mzd_local_t* mzd_mul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v,
+                                  mzd_local_t const* A) ATTR_NONNULL;
+ 
+-mzd_local_t* mzd_mul_vl_neon_multiple_of_128(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_mul_vl_neon_multiple_of_128(mzd_local_t* c, mzd_local_t const* v,
+                                              mzd_local_t const* A) ATTR_NONNULL;
+ /**
+  * Compute c + v * A optimized for c and v being vectors.
+  */
+-mzd_local_t* mzd_addmul_vl(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl(mzd_local_t* c, mzd_local_t const* v,
+                            mzd_local_t const* At) ATTR_NONNULL;
+-mzd_local_t* mzd_addmul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_sse_128(mzd_local_t* c, mzd_local_t const* v,
+                                    mzd_local_t const* A) ATTR_NONNULL;
+-mzd_local_t* mzd_addmul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_avx_256(mzd_local_t* c, mzd_local_t const* v,
+                                    mzd_local_t const* A) ATTR_NONNULL;
+-mzd_local_t* mzd_addmul_vl_sse(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_sse(mzd_local_t* c, mzd_local_t const* v,
+                                mzd_local_t const* A) ATTR_NONNULL;
+-mzd_local_t* mzd_addmul_vl_avx(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_avx(mzd_local_t* c, mzd_local_t const* v,
+                                mzd_local_t const* A) ATTR_NONNULL;
+-mzd_local_t* mzd_addmul_vl_general(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_general(mzd_local_t* c, mzd_local_t const* v,
+                                    mzd_local_t const* A) ATTR_NONNULL;
+-mzd_local_t* mzd_addmul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_neon_128(mzd_local_t* c, mzd_local_t const* v,
+                                     mzd_local_t const* A) ATTR_NONNULL;
+-mzd_local_t* mzd_addmul_vl_neon(mzd_local_t* c, mzd_local_t const* v,
++mzd_local_t* oqs_sig_picnic_mzd_addmul_vl_neon(mzd_local_t* c, mzd_local_t const* v,
+                                 mzd_local_t const* A) ATTR_NONNULL;
+ 
+ /**
+  * Compute v * A optimized for v being a vector.
+  */
+-void mzd_mul_vlm(mzd_local_t** c, mzd_local_t const* const* v, mzd_local_t const* At,
++void oqs_sig_picnic_mzd_mul_vlm(mzd_local_t** c, mzd_local_t const* const* v, mzd_local_t const* At,
+                  unsigned int sc) ATTR_NONNULL;
+ 
+ /**
+  * Compute c + v * A optimized for c and v being vectors.
+  */
+-void mzd_addmul_vlm(mzd_local_t** c, mzd_local_t const* const* v, mzd_local_t const* At,
++void oqs_sig_picnic_mzd_addmul_vlm(mzd_local_t** c, mzd_local_t const* const* v, mzd_local_t const* At,
+                     unsigned int sc) ATTR_NONNULL;
+ 
+ /**
+- * Pre-compute matrices for faster mzd_addmul_v computions.
++ * Pre-compute matrices for faster oqs_sig_picnic_mzd_addmul_v computions.
+  *
+  */
+-mzd_local_t* mzd_precompute_matrix_lookup(mzd_local_t const* A) ATTR_NONNULL;
++mzd_local_t* oqs_sig_picnic_mzd_precompute_matrix_lookup(mzd_local_t const* A) ATTR_NONNULL;
+ 
+ #define ROW(v, r) ((word*)(((uint8_t*)(v)) + 32 + (v)->rowstride * (r) * sizeof(word)))
+ #define CONST_ROW(v, r)                                                                            \
+@@ -167,7 +167,7 @@ mzd_local_t* mzd_precompute_matrix_lookup(mzd_local_t const* A) ATTR_NONNULL;
+ #define WRITE_BIT(w, spot, value)                                                                  \
+   ((w) = (((w) & ~(UINT64_C(1) << (spot))) | (-(word)(value) & (UINT64_C(1) << (spot)))))
+ 
+-#define mzd_local_write_bit(v, r, c, b)                                                            \
++#define oqs_sig_picnic_mzd_local_write_bit(v, r, c, b)                                                            \
+   WRITE_BIT(ROW(v, r)[c / (sizeof(word) * 8)], c % (sizeof(word) * 8), b)
+ 
+ #endif
+diff --git a/picnic.c b/picnic.c
+index 91eeed3..55cb19c 100644
+--- a/picnic.c
++++ b/picnic.c
+@@ -22,7 +22,7 @@
+ #include "randomness.h"
+ 
+ const picnic_instance_t* picnic_instance_get(picnic_params_t param) {
+-  return get_instance(param);
++  return oqs_sig_picnic_get_instance(param);
+ }
+ 
+ size_t PICNIC_CALLING_CONVENTION picnic_signature_size(picnic_params_t param) {
+@@ -110,22 +110,22 @@ int PICNIC_CALLING_CONVENTION picnic_sk_to_pk(const picnic_privatekey_t* sk,
+   uint8_t* pk_pt       = &pk->data[1];
+   const uint8_t* sk_pt = &sk->data[1 + input_size];
+ 
+-  mzd_local_t* plaintext = mzd_local_init_ex(1, instance->lowmc.n, false);
+-  mzd_local_t* privkey   = mzd_local_init_ex(1, instance->lowmc.k, false);
++  mzd_local_t* plaintext = oqs_sig_picnic_mzd_local_init_ex(1, instance->lowmc.n, false);
++  mzd_local_t* privkey   = oqs_sig_picnic_mzd_local_init_ex(1, instance->lowmc.k, false);
+ 
+-  mzd_from_char_array(plaintext, sk_pt, output_size);
+-  mzd_from_char_array(privkey, sk_sk, input_size);
++  oqs_sig_picnic_mzd_from_char_array(plaintext, sk_pt, output_size);
++  oqs_sig_picnic_mzd_from_char_array(privkey, sk_sk, input_size);
+ 
+   // compute public key
+-  mzd_local_t* ciphertext = lowmc_call(&instance->lowmc, privkey, plaintext);
++  mzd_local_t* ciphertext = oqs_sig_picnic_lowmc_call(&instance->lowmc, privkey, plaintext);
+ 
+   pk->data[0] = param;
+   memcpy(pk_pt, sk_pt, output_size);
+-  mzd_to_char_array(pk_c, ciphertext, output_size);
++  oqs_sig_picnic_mzd_to_char_array(pk_c, ciphertext, output_size);
+ 
+-  mzd_local_free(ciphertext);
+-  mzd_local_free(privkey);
+-  mzd_local_free(plaintext);
++  oqs_sig_picnic_mzd_local_free(ciphertext);
++  oqs_sig_picnic_mzd_local_free(privkey);
++  oqs_sig_picnic_mzd_local_free(plaintext);
+ 
+   return 0;
+ }
+@@ -156,21 +156,21 @@ int PICNIC_CALLING_CONVENTION picnic_validate_keypair(const picnic_privatekey_t*
+     return -1;
+   }
+ 
+-  mzd_local_t* plaintext = mzd_local_init_ex(1, instance->lowmc.n, false);
+-  mzd_local_t* privkey   = mzd_local_init_ex(1, instance->lowmc.k, false);
++  mzd_local_t* plaintext = oqs_sig_picnic_mzd_local_init_ex(1, instance->lowmc.n, false);
++  mzd_local_t* privkey   = oqs_sig_picnic_mzd_local_init_ex(1, instance->lowmc.k, false);
+ 
+-  mzd_from_char_array(plaintext, sk_pt, instance->output_size);
+-  mzd_from_char_array(privkey, sk_sk, instance->input_size);
++  oqs_sig_picnic_mzd_from_char_array(plaintext, sk_pt, instance->output_size);
++  oqs_sig_picnic_mzd_from_char_array(privkey, sk_sk, instance->input_size);
+ 
+   // compute public key
+-  mzd_local_t* ciphertext = lowmc_call(&instance->lowmc, privkey, plaintext);
++  mzd_local_t* ciphertext = oqs_sig_picnic_lowmc_call(&instance->lowmc, privkey, plaintext);
+ 
+   uint8_t buffer[MAX_LOWMC_BLOCK_SIZE];
+-  mzd_to_char_array(buffer, ciphertext, output_size);
++  oqs_sig_picnic_mzd_to_char_array(buffer, ciphertext, output_size);
+ 
+-  mzd_local_free(ciphertext);
+-  mzd_local_free(privkey);
+-  mzd_local_free(plaintext);
++  oqs_sig_picnic_mzd_local_free(ciphertext);
++  oqs_sig_picnic_mzd_local_free(privkey);
++  oqs_sig_picnic_mzd_local_free(plaintext);
+ 
+   return memcmp(buffer, pk_c, output_size);
+ }
+@@ -195,7 +195,7 @@ int PICNIC_CALLING_CONVENTION picnic_sign(const picnic_privatekey_t* sk, const u
+   const uint8_t* sk_c  = &sk->data[1 + input_size + output_size];
+   const uint8_t* sk_pt = &sk->data[1 + input_size];
+ 
+-  return fis_sign(instance, sk_pt, sk_sk, sk_c, message, message_len, signature, signature_len)
++  return oqs_sig_picnic_fis_sign(instance, sk_pt, sk_sk, sk_c, message, message_len, signature, signature_len)
+              ? 0
+              : -1;
+ }
+@@ -218,7 +218,7 @@ int PICNIC_CALLING_CONVENTION picnic_verify(const picnic_publickey_t* pk, const
+   const uint8_t* pk_c  = &pk->data[1 + output_size];
+   const uint8_t* pk_pt = &pk->data[1];
+ 
+-  return fis_verify(instance, pk_pt, pk_c, message, message_len, signature, signature_len) ? 0 : -1;
++  return oqs_sig_picnic_fis_verify(instance, pk_pt, pk_c, message, message_len, signature, signature_len) ? 0 : -1;
+ }
+ 
+ void picnic_visualize(FILE* out, const uint8_t* public_key, size_t public_key_size,
+@@ -233,7 +233,7 @@ void picnic_visualize(FILE* out, const uint8_t* public_key, size_t public_key_si
+     return;
+   }
+ 
+-  visualize_signature(out, instance, msg, msglen, sig, siglen);
++  oqs_sig_picnic_visualize_signature(out, instance, msg, msglen, sig, siglen);
+ }
+ 
+ const char* PICNIC_CALLING_CONVENTION picnic_get_param_name(picnic_params_t parameters) {
+diff --git a/picnic_impl.c b/picnic_impl.c
+index f1a308b..ced069e 100644
+--- a/picnic_impl.c
++++ b/picnic_impl.c
+@@ -246,10 +246,10 @@ static void mzd_to_bitstream(bitstream_t* bs, const mzd_local_t* v, const size_t
+   const uint64_t* d = &CONST_FIRST_ROW(v)[v->width - 1];
+   size_t bits       = size;
+   for (; bits >= sizeof(uint64_t) * 8; bits -= sizeof(uint64_t) * 8, --d) {
+-    bitstream_put_bits(bs, *d, sizeof(uint64_t) * 8);
++    oqs_sig_picnic_bitstream_put_bits(bs, *d, sizeof(uint64_t) * 8);
+   }
+   if (bits) {
+-    bitstream_put_bits(bs, *d >> (sizeof(uint64_t) * 8 - bits), bits);
++    oqs_sig_picnic_bitstream_put_bits(bs, *d >> (sizeof(uint64_t) * 8 - bits), bits);
+   }
+ }
+ 
+@@ -259,10 +259,10 @@ static void mzd_from_bitstream(bitstream_t* bs, mzd_local_t* v, const size_t siz
+ 
+   size_t bits = size;
+   for (; bits >= sizeof(uint64_t) * 8; bits -= sizeof(uint64_t) * 8, --d) {
+-    *d = bitstream_get_bits(bs, sizeof(uint64_t) * 8);
++    *d = oqs_sig_picnic_bitstream_get_bits(bs, sizeof(uint64_t) * 8);
+   }
+   if (bits) {
+-    *d = bitstream_get_bits(bs, bits) << (sizeof(uint64_t) * 8 - bits);
++    *d = oqs_sig_picnic_bitstream_get_bits(bs, bits) << (sizeof(uint64_t) * 8 - bits);
+     --d;
+   }
+   for (; d >= f; --d) {
+@@ -271,11 +271,11 @@ static void mzd_from_bitstream(bitstream_t* bs, mzd_local_t* v, const size_t siz
+ }
+ 
+ static void uint64_to_bitstream(bitstream_t* bs, const uint64_t v) {
+-  bitstream_put_bits(bs, v >> (64 - 30), 30);
++  oqs_sig_picnic_bitstream_put_bits(bs, v >> (64 - 30), 30);
+ }
+ 
+ static uint64_t uint64_from_bitstream(bitstream_t* bs) {
+-  return bitstream_get_bits(bs, 30) << (64 - 30);
++  return oqs_sig_picnic_bitstream_get_bits(bs, 30) << (64 - 30);
+ }
+ 
+ static void compress_view(uint8_t* dst, const picnic_instance_t* pp, const view_t* views,
+@@ -356,13 +356,13 @@ static void decompress_random_tape_new(rvec_t* rvec, const picnic_instance_t* pp
+ }
+ 
+ static void mzd_share(mzd_local_t* shared_value[SC_PROOF]) {
+-  mzd_xor(shared_value[2], shared_value[0], shared_value[2]);
+-  mzd_xor(shared_value[2], shared_value[1], shared_value[2]);
++  oqs_sig_picnic_mzd_xor(shared_value[2], shared_value[0], shared_value[2]);
++  oqs_sig_picnic_mzd_xor(shared_value[2], shared_value[1], shared_value[2]);
+ }
+ 
+ static void mzd_unshare(mzd_local_t* dst, mzd_local_t* shared_value[SC_PROOF]) {
+-  mzd_xor(dst, shared_value[0], shared_value[1]);
+-  mzd_xor(dst, dst, shared_value[2]);
++  oqs_sig_picnic_mzd_xor(dst, shared_value[0], shared_value[1]);
++  oqs_sig_picnic_mzd_xor(dst, dst, shared_value[2]);
+ }
+ 
+ static bool sign_impl(const picnic_instance_t* pp, const uint8_t* private_key,
+@@ -388,13 +388,13 @@ static bool sign_impl(const picnic_instance_t* pp, const uint8_t* private_key,
+   view_t* views    = calloc(sizeof(view_t), view_count);
+   if (lowmc->m != 10) {
+     for (size_t i = 0; i < view_count; ++i) {
+-      mzd_local_init_multiple_ex(views[i].s, SC_PROOF, 1, lowmc_n, false);
++      oqs_sig_picnic_mzd_local_init_multiple_ex(views[i].s, SC_PROOF, 1, lowmc_n, false);
+     }
+   }
+ 
+   in_out_shares_t in_out_shares[2];
+-  mzd_local_init_multiple_ex(in_out_shares[0].s, SC_PROOF, 1, lowmc_k, false);
+-  mzd_local_init_multiple_ex(in_out_shares[1].s, SC_PROOF, 1, lowmc_n, false);
++  oqs_sig_picnic_mzd_local_init_multiple_ex(in_out_shares[0].s, SC_PROOF, 1, lowmc_k, false);
++  oqs_sig_picnic_mzd_local_init_multiple_ex(in_out_shares[1].s, SC_PROOF, 1, lowmc_n, false);
+ 
+   // Generate seeds
+   START_TIMING;
+@@ -418,7 +418,7 @@ static bool sign_impl(const picnic_instance_t* pp, const uint8_t* private_key,
+ 
+   START_TIMING;
+   mzd_local_t* shared_key[SC_PROOF];
+-  mzd_local_init_multiple(shared_key, SC_PROOF, 1, lowmc_k);
++  oqs_sig_picnic_mzd_local_init_multiple(shared_key, SC_PROOF, 1, lowmc_k);
+   END_TIMING(timing_and_size->sign.secret_sharing);
+ 
+   // START_TIMING; TODO: I guess this shouldn't be here
+@@ -426,7 +426,7 @@ static bool sign_impl(const picnic_instance_t* pp, const uint8_t* private_key,
+   rvec_t* rvec = calloc(sizeof(rvec_t), lowmc_r); // random tapes for and-gates
+   if (lowmc->m != 10) {
+     for (unsigned int i = 0; i < lowmc_r; ++i) {
+-      mzd_local_init_multiple_ex(rvec[i].s, SC_PROOF, 1, lowmc_n, false);
++      oqs_sig_picnic_mzd_local_init_multiple_ex(rvec[i].s, SC_PROOF, 1, lowmc_n, false);
+     }
+   }
+ 
+@@ -443,11 +443,11 @@ static bool sign_impl(const picnic_instance_t* pp, const uint8_t* private_key,
+     // compute sharing
+     for (unsigned int j = 0; j < SC_PROOF - 1; ++j) {
+       kdf_shake_get_randomness(&kdfs[j], round->input_shares[j], input_size);
+-      mzd_from_char_array(shared_key[j], round->input_shares[j], input_size);
++      oqs_sig_picnic_mzd_from_char_array(shared_key[j], round->input_shares[j], input_size);
+     }
+-    mzd_local_copy(shared_key[SC_PROOF - 1], lowmc_key);
++    oqs_sig_picnic_mzd_local_copy(shared_key[SC_PROOF - 1], lowmc_key);
+     mzd_share(shared_key);
+-    mzd_to_char_array(round->input_shares[SC_PROOF - 1], shared_key[SC_PROOF - 1], input_size);
++    oqs_sig_picnic_mzd_to_char_array(round->input_shares[SC_PROOF - 1], shared_key[SC_PROOF - 1], input_size);
+ 
+     // compute random tapes
+     for (unsigned int j = 0; j < SC_PROOF; ++j) {
+@@ -464,7 +464,7 @@ static bool sign_impl(const picnic_instance_t* pp, const uint8_t* private_key,
+ 
+     // commitments
+     for (unsigned int j = 0; j < SC_PROOF; ++j) {
+-      mzd_to_char_array(round->output_shares[j], in_out_shares[1].s[j], output_size);
++      oqs_sig_picnic_mzd_to_char_array(round->output_shares[j], in_out_shares[1].s[j], output_size);
+       compress_view(round->communicated_bits[j], pp, views, j);
+       hash_commitment(pp, round, j);
+     }
+@@ -487,17 +487,17 @@ static bool sign_impl(const picnic_instance_t* pp, const uint8_t* private_key,
+   free(tape_bytes);
+   if (lowmc->m != 10) {
+     for (unsigned n = 0; n < view_count; ++n) {
+-      mzd_local_free_multiple(rvec[n].s);
++      oqs_sig_picnic_mzd_local_free_multiple(rvec[n].s);
+     }
+     for (unsigned n = 0; n < view_count; ++n) {
+-      mzd_local_free_multiple(views[n].s);
++      oqs_sig_picnic_mzd_local_free_multiple(views[n].s);
+     }
+   }
+   free(views);
+   free(rvec);
+-  mzd_local_free_multiple(shared_key);
+-  mzd_local_free_multiple(in_out_shares[0].s);
+-  mzd_local_free_multiple(in_out_shares[1].s);
++  oqs_sig_picnic_mzd_local_free_multiple(shared_key);
++  oqs_sig_picnic_mzd_local_free_multiple(in_out_shares[0].s);
++  oqs_sig_picnic_mzd_local_free_multiple(in_out_shares[1].s);
+   proof_free(prf);
+ 
+   END_TIMING(timing_and_size->sign.challenge);
+@@ -528,12 +528,12 @@ static bool verify_impl(const picnic_instance_t* pp, const uint8_t* plaintext, m
+   }
+ 
+   in_out_shares_t in_out_shares[2];
+-  mzd_local_init_multiple_ex(in_out_shares[0].s, SC_VERIFY, 1, lowmc_k, false);
+-  mzd_local_init_multiple_ex(in_out_shares[1].s, SC_PROOF, 1, lowmc_n, false);
++  oqs_sig_picnic_mzd_local_init_multiple_ex(in_out_shares[0].s, SC_VERIFY, 1, lowmc_k, false);
++  oqs_sig_picnic_mzd_local_init_multiple_ex(in_out_shares[1].s, SC_PROOF, 1, lowmc_n, false);
+   view_t* views = calloc(sizeof(view_t), view_count);
+   if (lowmc->m != 10) {
+     for (size_t i = 0; i < view_count; ++i) {
+-      mzd_local_init_multiple_ex(views[i].s, SC_VERIFY, 1, lowmc_n, false);
++      oqs_sig_picnic_mzd_local_init_multiple_ex(views[i].s, SC_VERIFY, 1, lowmc_n, false);
+     }
+   }
+ 
+@@ -542,7 +542,7 @@ static bool verify_impl(const picnic_instance_t* pp, const uint8_t* plaintext, m
+   rvec_t* rvec = calloc(sizeof(rvec_t), lowmc_r); // random tapes for and-gates
+   if (lowmc->m != 10) {
+     for (unsigned int i = 0; i < lowmc_r; ++i) {
+-      mzd_local_init_multiple_ex(rvec[i].s, SC_VERIFY, 1, lowmc_n, false);
++      oqs_sig_picnic_mzd_local_init_multiple_ex(rvec[i].s, SC_VERIFY, 1, lowmc_n, false);
+     }
+   }
+   uint8_t* tape_bytes = malloc(view_size);
+@@ -566,8 +566,8 @@ static bool verify_impl(const picnic_instance_t* pp, const uint8_t* plaintext, m
+       kdf_shake_get_randomness(&kdfs[1], round->input_shares[1], input_size);
+     }
+ 
+-    mzd_from_char_array(in_out_shares[0].s[0], round->input_shares[0], input_size);
+-    mzd_from_char_array(in_out_shares[0].s[1], round->input_shares[1], input_size);
++    oqs_sig_picnic_mzd_from_char_array(in_out_shares[0].s[0], round->input_shares[0], input_size);
++    oqs_sig_picnic_mzd_from_char_array(in_out_shares[0].s[1], round->input_shares[1], input_size);
+ 
+     // compute random tapes
+     for (unsigned int j = 0; j < SC_VERIFY; ++j) {
+@@ -590,10 +590,10 @@ static bool verify_impl(const picnic_instance_t* pp, const uint8_t* plaintext, m
+     mzd_unshare(in_out_shares[1].s[2], ys);
+ 
+     for (unsigned int j = 0; j < SC_VERIFY; ++j) {
+-      mzd_to_char_array(round->output_shares[j], in_out_shares[1].s[j], output_size);
++      oqs_sig_picnic_mzd_to_char_array(round->output_shares[j], in_out_shares[1].s[j], output_size);
+       hash_commitment(pp, round, j);
+     }
+-    mzd_to_char_array(round->output_shares[SC_VERIFY], in_out_shares[1].s[SC_VERIFY], output_size);
++    oqs_sig_picnic_mzd_to_char_array(round->output_shares[SC_VERIFY], in_out_shares[1].s[SC_VERIFY], output_size);
+ 
+     if (transform == TRANSFORM_UR) {
+       for (unsigned int j = 0; j < SC_VERIFY; ++j) {
+@@ -611,16 +611,16 @@ static bool verify_impl(const picnic_instance_t* pp, const uint8_t* plaintext, m
+   free(tape_bytes);
+   if (lowmc->m != 10) {
+     for (unsigned n = 0; n < view_count; ++n) {
+-      mzd_local_free_multiple(rvec[n].s);
++      oqs_sig_picnic_mzd_local_free_multiple(rvec[n].s);
+     }
+     for (unsigned n = 0; n < view_count; ++n) {
+-      mzd_local_free_multiple(views[n].s);
++      oqs_sig_picnic_mzd_local_free_multiple(views[n].s);
+     }
+   }
+   free(views);
+   free(rvec);
+-  mzd_local_free_multiple(in_out_shares[0].s);
+-  mzd_local_free_multiple(in_out_shares[1].s);
++  oqs_sig_picnic_mzd_local_free_multiple(in_out_shares[0].s);
++  oqs_sig_picnic_mzd_local_free_multiple(in_out_shares[1].s);
+ 
+   proof_free(prf);
+ 
+@@ -777,42 +777,42 @@ err:
+   return NULL;
+ }
+ 
+-bool fis_sign(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* private_key,
++bool oqs_sig_picnic_fis_sign(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* private_key,
+               const uint8_t* public_key, const uint8_t* msg, size_t msglen, uint8_t* sig,
+               size_t* siglen) {
+-  mzd_local_t* m_plaintext  = mzd_local_init_ex(1, pp->lowmc.n, false);
+-  mzd_local_t* m_privatekey = mzd_local_init_ex(1, pp->lowmc.k, false);
++  mzd_local_t* m_plaintext  = oqs_sig_picnic_mzd_local_init_ex(1, pp->lowmc.n, false);
++  mzd_local_t* m_privatekey = oqs_sig_picnic_mzd_local_init_ex(1, pp->lowmc.k, false);
+ 
+-  mzd_from_char_array(m_plaintext, plaintext, pp->output_size);
+-  mzd_from_char_array(m_privatekey, private_key, pp->input_size);
++  oqs_sig_picnic_mzd_from_char_array(m_plaintext, plaintext, pp->output_size);
++  oqs_sig_picnic_mzd_from_char_array(m_privatekey, private_key, pp->input_size);
+ 
+   const bool result = sign_impl(pp, private_key, m_privatekey, plaintext, m_plaintext, public_key,
+                                 msg, msglen, sig, siglen);
+ 
+-  mzd_local_free(m_privatekey);
+-  mzd_local_free(m_plaintext);
++  oqs_sig_picnic_mzd_local_free(m_privatekey);
++  oqs_sig_picnic_mzd_local_free(m_plaintext);
+ 
+   return result;
+ }
+ 
+-bool fis_verify(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* public_key,
++bool oqs_sig_picnic_fis_verify(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* public_key,
+                 const uint8_t* msg, size_t msglen, const uint8_t* sig, size_t siglen) {
+-  mzd_local_t* m_plaintext = mzd_local_init_ex(1, pp->lowmc.n, false);
+-  mzd_local_t* m_publickey = mzd_local_init_ex(1, pp->lowmc.n, false);
++  mzd_local_t* m_plaintext = oqs_sig_picnic_mzd_local_init_ex(1, pp->lowmc.n, false);
++  mzd_local_t* m_publickey = oqs_sig_picnic_mzd_local_init_ex(1, pp->lowmc.n, false);
+ 
+-  mzd_from_char_array(m_plaintext, plaintext, pp->output_size);
+-  mzd_from_char_array(m_publickey, public_key, pp->output_size);
++  oqs_sig_picnic_mzd_from_char_array(m_plaintext, plaintext, pp->output_size);
++  oqs_sig_picnic_mzd_from_char_array(m_publickey, public_key, pp->output_size);
+ 
+   const bool result =
+       verify_impl(pp, plaintext, m_plaintext, public_key, m_publickey, msg, msglen, sig, siglen);
+ 
+-  mzd_local_free(m_publickey);
+-  mzd_local_free(m_plaintext);
++  oqs_sig_picnic_mzd_local_free(m_publickey);
++  oqs_sig_picnic_mzd_local_free(m_plaintext);
+ 
+   return result;
+ }
+ 
+-void visualize_signature(FILE* out, const picnic_instance_t* pp, const uint8_t* msg, size_t msglen,
++void oqs_sig_picnic_visualize_signature(FILE* out, const picnic_instance_t* pp, const uint8_t* msg, size_t msglen,
+                          const uint8_t* sig, size_t siglen) {
+   const size_t digest_size    = pp->digest_size;
+   const size_t seed_size      = pp->seed_size;
+@@ -894,14 +894,14 @@ void hash_commitment(const picnic_instance_t* pp, proof_round_t* prf_round, unsi
+   hash_context ctx;
+ 
+   // hash the seed
+-  hash_init(&ctx, pp);
++  oqs_sig_picnic_hash_init(&ctx, pp);
+   hash_update(&ctx, &HASH_PREFIX_4, sizeof(HASH_PREFIX_4));
+   hash_update(&ctx, prf_round->seeds[vidx], pp->seed_size);
+   hash_final(&ctx);
+   hash_squeeze(tmp, hashlen, &ctx);
+ 
+   // compute H_0(H_4(seed), view)
+-  hash_init(&ctx, pp);
++  oqs_sig_picnic_hash_init(&ctx, pp);
+   hash_update(&ctx, &HASH_PREFIX_0, sizeof(HASH_PREFIX_0));
+   hash_update(&ctx, tmp, hashlen);
+   // hash input share
+@@ -925,7 +925,7 @@ static void H3_compute(const picnic_instance_t* pp, uint8_t* hash, uint8_t* ch)
+   while (ch < eof) {
+     if (bit_idx >= digest_size_bits) {
+       hash_context ctx;
+-      hash_init(&ctx, pp);
++      oqs_sig_picnic_hash_init(&ctx, pp);
+       hash_update(&ctx, &HASH_PREFIX_1, sizeof(HASH_PREFIX_1));
+       hash_update(&ctx, hash, digest_size);
+       hash_final(&ctx);
+@@ -948,7 +948,7 @@ void fs_H3_verify(const picnic_instance_t* pp, sig_proof_t* prf, const uint8_t*
+   const size_t output_size = pp->output_size;
+ 
+   hash_context ctx;
+-  hash_init(&ctx, pp);
++  oqs_sig_picnic_hash_init(&ctx, pp);
+   hash_update(&ctx, &HASH_PREFIX_1, sizeof(HASH_PREFIX_1));
+ 
+   // hash output shares
+@@ -1048,7 +1048,7 @@ void fs_H3(const picnic_instance_t* pp, sig_proof_t* prf, const uint8_t* circuit
+   const size_t num_rounds = pp->num_rounds;
+ 
+   hash_context ctx;
+-  hash_init(&ctx, pp);
++  oqs_sig_picnic_hash_init(&ctx, pp);
+   hash_update(&ctx, &HASH_PREFIX_1, sizeof(HASH_PREFIX_1));
+ 
+   // hash output shares
+@@ -1084,7 +1084,7 @@ void unruh_G(const picnic_instance_t* pp, proof_round_t* prf_round, unsigned vid
+   const size_t seedlen     = pp->seed_size;
+ 
+   /* Hash the seed with H_5, store digest in output */
+-  hash_init(&ctx, pp);
++  oqs_sig_picnic_hash_init(&ctx, pp);
+   hash_update(&ctx, &HASH_PREFIX_5, sizeof(HASH_PREFIX_5));
+   hash_update(&ctx, prf_round->seeds[vidx], seedlen);
+   hash_final(&ctx);
+@@ -1093,7 +1093,7 @@ void unruh_G(const picnic_instance_t* pp, proof_round_t* prf_round, unsigned vid
+   hash_squeeze(tmp, digest_size, &ctx);
+ 
+   /* Hash H_5(seed), the view, and the length */
+-  hash_init(&ctx, pp);
++  oqs_sig_picnic_hash_init(&ctx, pp);
+   hash_update(&ctx, tmp, digest_size);
+   if (include_is) {
+     hash_update(&ctx, prf_round->input_shares[vidx], pp->input_size);
+@@ -1181,14 +1181,14 @@ static bool create_instance(picnic_instance_t* pp, picnic_params_t param, uint32
+   }
+ #endif
+   if (!have_instance) {
+-    have_instance = lowmc_init(&pp->lowmc, m, n, r, k);
++    have_instance = oqs_sig_picnic_lowmc_init(&pp->lowmc, m, n, r, k);
+   }
+   if (!have_instance) {
+     return false;
+   }
+ 
+-  pp->lowmc_impl        = get_lowmc_implementation(&pp->lowmc);
+-  pp->lowmc_verify_impl = get_lowmc_verify_implementation(&pp->lowmc);
++  pp->lowmc_impl        = oqs_sig_picnic_get_lowmc_implementation(&pp->lowmc);
++  pp->lowmc_verify_impl = oqs_sig_picnic_get_lowmc_verify_implementation(&pp->lowmc);
+ 
+   pp->params         = param;
+   pp->transform      = param_to_transform(param);
+@@ -1227,10 +1227,10 @@ static bool create_instance(picnic_instance_t* pp, picnic_params_t param, uint32
+ }
+ 
+ static void destroy_instance(picnic_instance_t* pp) {
+-  lowmc_clear(&pp->lowmc);
++  oqs_sig_picnic_lowmc_clear(&pp->lowmc);
+ }
+ 
+-picnic_instance_t* get_instance(picnic_params_t param) {
++picnic_instance_t* oqs_sig_picnic_get_instance(picnic_params_t param) {
+   if (param <= PARAMETER_SET_INVALID || param >= PARAMETER_SET_MAX_INDEX) {
+     return NULL;
+   }
+@@ -1262,8 +1262,8 @@ static void collapse_challenge(uint8_t* collapsed, const picnic_instance_t* pp,
+   bs.position = 0;
+ 
+   for (unsigned int i = 0; i < pp->num_rounds; ++i) {
+-    bitstream_put_bits(&bs, challenge[i] & 1, 1);
+-    bitstream_put_bits(&bs, (challenge[i] >> 1) & 1, 1);
++    oqs_sig_picnic_bitstream_put_bits(&bs, challenge[i] & 1, 1);
++    oqs_sig_picnic_bitstream_put_bits(&bs, (challenge[i] >> 1) & 1, 1);
+   }
+ }
+ 
+@@ -1274,8 +1274,8 @@ static bool expand_challenge(uint8_t* challenge, const picnic_instance_t* pp,
+   bs.position = 0;
+ 
+   for (unsigned int i = 0; i < pp->num_rounds; ++i) {
+-    uint8_t ch = bitstream_get_bits(&bs, 1);
+-    ch |= bitstream_get_bits(&bs, 1) << 1;
++    uint8_t ch = oqs_sig_picnic_bitstream_get_bits(&bs, 1);
++    ch |= oqs_sig_picnic_bitstream_get_bits(&bs, 1) << 1;
+     if (ch == 3) {
+       return false;
+     }
+@@ -1283,7 +1283,7 @@ static bool expand_challenge(uint8_t* challenge, const picnic_instance_t* pp,
+   }
+ 
+   size_t remaining_bits = (pp->collapsed_challenge_size << 3) - bs.position;
+-  if (remaining_bits && bitstream_get_bits(&bs, remaining_bits)) {
++  if (remaining_bits && oqs_sig_picnic_bitstream_get_bits(&bs, remaining_bits)) {
+     return false;
+   }
+ 
+diff --git a/picnic_impl.h b/picnic_impl.h
+index dcc3747..18bfde5 100644
+--- a/picnic_impl.h
++++ b/picnic_impl.h
+@@ -42,17 +42,17 @@ typedef struct {
+   transform_t transform;
+ } picnic_instance_t;
+ 
+-picnic_instance_t* get_instance(picnic_params_t param);
++picnic_instance_t* oqs_sig_picnic_get_instance(picnic_params_t param);
+ const picnic_instance_t* picnic_instance_get(picnic_params_t param);
+ 
+-bool fis_sign(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* private_key,
++bool oqs_sig_picnic_fis_sign(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* private_key,
+               const uint8_t* public_key, const uint8_t* msg, size_t msglen, uint8_t* sig,
+               size_t* siglen);
+ 
+-bool fis_verify(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* public_key,
++bool oqs_sig_picnic_fis_verify(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* public_key,
+                 const uint8_t* msg, size_t msglen, const uint8_t* sig, size_t siglen);
+ 
+-void visualize_signature(FILE* out, const picnic_instance_t* pp, const uint8_t* msg, size_t msglen,
++void oqs_sig_picnic_visualize_signature(FILE* out, const picnic_instance_t* pp, const uint8_t* msg, size_t msglen,
+                          const uint8_t* sig, size_t siglen);
+ 
+ PICNIC_EXPORT size_t PICNIC_CALLING_CONVENTION picnic_get_private_key_size(picnic_params_t param);
+diff --git a/sha3/KeccakHash.c b/sha3/KeccakHash.c
+index bcfd1e9..6c7a0e6 100644
+--- a/sha3/KeccakHash.c
++++ b/sha3/KeccakHash.c
+@@ -18,7 +18,7 @@ http://creativecommons.org/publicdomain/zero/1.0/
+ 
+ /* ---------------------------------------------------------------- */
+ 
+-HashReturn Keccak_HashInitialize(Keccak_HashInstance *instance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix)
++HashReturn oqs_sig_picnic_Keccak_HashInitialize(Keccak_HashInstance *instance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix)
+ {
+     HashReturn result;
+ 
+@@ -34,7 +34,7 @@ HashReturn Keccak_HashInitialize(Keccak_HashInstance *instance, unsigned int rat
+ 
+ /* ---------------------------------------------------------------- */
+ 
+-HashReturn Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *data, BitLength databitlen)
++HashReturn oqs_sig_picnic_Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *data, BitLength databitlen)
+ {
+     if ((databitlen % 8) == 0)
+         return (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8);
+@@ -61,7 +61,7 @@ HashReturn Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *d
+ 
+ /* ---------------------------------------------------------------- */
+ 
+-HashReturn Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval)
++HashReturn oqs_sig_picnic_Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval)
+ {
+     HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorbLastFewBits(&instance->sponge, instance->delimitedSuffix);
+     if (ret == SUCCESS)
+@@ -72,7 +72,7 @@ HashReturn Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval)
+ 
+ /* ---------------------------------------------------------------- */
+ 
+-HashReturn Keccak_HashSqueeze(Keccak_HashInstance *instance, BitSequence *data, BitLength databitlen)
++HashReturn oqs_sig_picnic_Keccak_HashSqueeze(Keccak_HashInstance *instance, BitSequence *data, BitLength databitlen)
+ {
+     if ((databitlen % 8) != 0)
+         return FAIL;
+diff --git a/sha3/KeccakHash.h b/sha3/KeccakHash.h
+index 99347d6..1ba03a0 100644
+--- a/sha3/KeccakHash.h
++++ b/sha3/KeccakHash.h
+@@ -51,31 +51,31 @@ typedef struct {
+   * @pre    One must have r+c=1600 and the rate a multiple of 8 bits in this implementation.
+   * @return SUCCESS if successful, FAIL otherwise.
+   */
+-HashReturn Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix);
++HashReturn oqs_sig_picnic_Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix);
+ 
+ /** Macro to initialize a SHAKE128 instance as specified in the FIPS 202 standard.
+   */
+-#define Keccak_HashInitialize_SHAKE128(hashInstance)        Keccak_HashInitialize(hashInstance, 1344,  256,   0, 0x1F)
++#define Keccak_HashInitialize_SHAKE128(hashInstance)        oqs_sig_picnic_Keccak_HashInitialize(hashInstance, 1344,  256,   0, 0x1F)
+ 
+ /** Macro to initialize a SHAKE256 instance as specified in the FIPS 202 standard.
+   */
+-#define Keccak_HashInitialize_SHAKE256(hashInstance)        Keccak_HashInitialize(hashInstance, 1088,  512,   0, 0x1F)
++#define Keccak_HashInitialize_SHAKE256(hashInstance)        oqs_sig_picnic_Keccak_HashInitialize(hashInstance, 1088,  512,   0, 0x1F)
+ 
+ /** Macro to initialize a SHA3-224 instance as specified in the FIPS 202 standard.
+   */
+-#define Keccak_HashInitialize_SHA3_224(hashInstance)        Keccak_HashInitialize(hashInstance, 1152,  448, 224, 0x06)
++#define Keccak_HashInitialize_SHA3_224(hashInstance)        oqs_sig_picnic_Keccak_HashInitialize(hashInstance, 1152,  448, 224, 0x06)
+ 
+ /** Macro to initialize a SHA3-256 instance as specified in the FIPS 202 standard.
+   */
+-#define Keccak_HashInitialize_SHA3_256(hashInstance)        Keccak_HashInitialize(hashInstance, 1088,  512, 256, 0x06)
++#define Keccak_HashInitialize_SHA3_256(hashInstance)        oqs_sig_picnic_Keccak_HashInitialize(hashInstance, 1088,  512, 256, 0x06)
+ 
+ /** Macro to initialize a SHA3-384 instance as specified in the FIPS 202 standard.
+   */
+-#define Keccak_HashInitialize_SHA3_384(hashInstance)        Keccak_HashInitialize(hashInstance,  832,  768, 384, 0x06)
++#define Keccak_HashInitialize_SHA3_384(hashInstance)        oqs_sig_picnic_Keccak_HashInitialize(hashInstance,  832,  768, 384, 0x06)
+ 
+ /** Macro to initialize a SHA3-512 instance as specified in the FIPS 202 standard.
+   */
+-#define Keccak_HashInitialize_SHA3_512(hashInstance)        Keccak_HashInitialize(hashInstance,  576, 1024, 512, 0x06)
++#define Keccak_HashInitialize_SHA3_512(hashInstance)        oqs_sig_picnic_Keccak_HashInitialize(hashInstance,  576, 1024, 512, 0x06)
+ 
+ /**
+   * Function to give input data to be absorbed.
+@@ -87,7 +87,7 @@ HashReturn Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int
+   * @pre    In the previous call to Keccak_HashUpdate(), databitlen was a multiple of 8.
+   * @return SUCCESS if successful, FAIL otherwise.
+   */
+-HashReturn Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequence *data, BitLength databitlen);
++HashReturn oqs_sig_picnic_Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequence *data, BitLength databitlen);
+ 
+ /**
+   * Function to call after all input blocks have been input and to get
+@@ -100,7 +100,7 @@ HashReturn Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequenc
+   * @param  hashval     Pointer to the buffer where to store the output data.
+   * @return SUCCESS if successful, FAIL otherwise.
+   */
+-HashReturn Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hashval);
++HashReturn oqs_sig_picnic_Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hashval);
+ 
+  /**
+   * Function to squeeze output data.
+@@ -111,7 +111,7 @@ HashReturn Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hash
+   * @pre    @a databitlen is a multiple of 8.
+   * @return SUCCESS if successful, FAIL otherwise.
+   */
+-HashReturn Keccak_HashSqueeze(Keccak_HashInstance *hashInstance, BitSequence *data, BitLength databitlen);
++HashReturn oqs_sig_picnic_Keccak_HashSqueeze(Keccak_HashInstance *hashInstance, BitSequence *data, BitLength databitlen);
+ 
+ #endif
+ 
+diff --git a/tests/bitstream_test.c b/tests/bitstream_test.c
+index 620bcc6..7bc11e4 100644
+--- a/tests/bitstream_test.c
++++ b/tests/bitstream_test.c
+@@ -16,12 +16,12 @@ static int simple_test(void) {
+     bitstream_t bsw;
+     bsw.buffer = buffer;
+     bsw.position = 0;
+-    bitstream_put_bits(&bsw, v, i);
++    oqs_sig_picnic_bitstream_put_bits(&bsw, v, i);
+ 
+     bitstream_t bsr;
+     bsr.buffer = buffer;
+     bsr.position = 0;
+-    const uint64_t r = bitstream_get_bits(&bsr, i);
++    const uint64_t r = oqs_sig_picnic_bitstream_get_bits(&bsr, i);
+     if (r != v) {
+       printf("simple_test: expected %016" PRIx64 ", got %016" PRIx64 "\n", v, r);
+       ret = -1;
+@@ -49,19 +49,19 @@ static int test_30(void) {
+   bitstream_t bsw;
+   bsw.buffer = buffer;
+   bsw.position = 0;
+-  bitstream_put_bits(&bsw, v, 30);
++  oqs_sig_picnic_bitstream_put_bits(&bsw, v, 30);
+ 
+   bitstream_t bsw2;
+   bsw2.buffer = buffer2;
+   bsw2.position = 0;
+   for (unsigned int i = 0; i < 30; ++i) {
+-    bitstream_put_bits(&bsw2, v >> (30 - i - 1), 1);
++    oqs_sig_picnic_bitstream_put_bits(&bsw2, v >> (30 - i - 1), 1);
+   }
+ 
+   bitstream_t bsr;
+   bsr.buffer = buffer;
+   bsr.position = 0;
+-  uint64_t r = bitstream_get_bits(&bsr, 30);
++  uint64_t r = oqs_sig_picnic_bitstream_get_bits(&bsr, 30);
+   if (r != v) {
+     printf("test_30: expected %016" PRIx64 ", got %016" PRIx64 "\n", v, r);
+     ret = -1;
+@@ -71,7 +71,7 @@ static int test_30(void) {
+   bsr2.buffer = buffer2;
+   bsr2.position = 0;
+   for (unsigned int i = 0; i < 30; ++i) {
+-    r = bitstream_get_bits(&bsr2, 1);
++    r = oqs_sig_picnic_bitstream_get_bits(&bsr2, 1);
+     const uint64_t e = (v >> (30 - i - 1)) & 0x1;
+     if (e != r) {
+       printf("test_30: expected2 %016" PRIx64 ", got %016" PRIx64 "\n", e, r);
+@@ -105,18 +105,18 @@ static int test_multiple_30(void) {
+   bitstream_t bsw;
+   bsw.buffer = buffer;
+   bsw.position = 0;
+-  bitstream_put_bits(&bsw, v, 30);
+-  bitstream_put_bits(&bsw, v2, 30);
++  oqs_sig_picnic_bitstream_put_bits(&bsw, v, 30);
++  oqs_sig_picnic_bitstream_put_bits(&bsw, v2, 30);
+ 
+   bitstream_t bsr;
+   bsr.buffer = buffer;
+   bsr.position = 0;
+-  uint64_t r = bitstream_get_bits(&bsr, 30);
++  uint64_t r = oqs_sig_picnic_bitstream_get_bits(&bsr, 30);
+   if (r != v) {
+     printf("test_multiple_30: expected %016" PRIx64 ", got %016" PRIx64 "\n", v, r);
+     ret = -1;
+   }
+-  r = bitstream_get_bits(&bsr, 30);
++  r = oqs_sig_picnic_bitstream_get_bits(&bsr, 30);
+   if (r != v2) {
+     printf("test_multiple_30: expected %016" PRIx64 ", got %016" PRIx64 "\n", v2, r);
+     ret = -1;
+diff --git a/tests/lowmc_test.c b/tests/lowmc_test.c
+index 753fe8a..0288c5d 100644
+--- a/tests/lowmc_test.c
++++ b/tests/lowmc_test.c
+@@ -13,7 +13,7 @@
+ 
+ static int lowmc_enc_str(const picnic_params_t param, const char* key, const char* plaintext,
+                          const char* expected) {
+-  picnic_instance_t* pp = get_instance(param);
++  picnic_instance_t* pp = oqs_sig_picnic_get_instance(param);
+   if (!pp) {
+     return -1;
+   }
+@@ -27,21 +27,21 @@ static int lowmc_enc_str(const picnic_params_t param, const char* key, const cha
+   mzd_local_t* ctl = mzd_convert(ct);
+ 
+   int ret          = 0;
+-  mzd_local_t* ctr = lowmc_call(&pp->lowmc, skl, ptl);
++  mzd_local_t* ctr = oqs_sig_picnic_lowmc_call(&pp->lowmc, skl, ptl);
+   if (!ctr) {
+     ret = 1;
+     goto end;
+   }
+ 
+-  if (!mzd_local_equal(ctr, ctl)) {
++  if (!oqs_sig_picnic_mzd_local_equal(ctr, ctl)) {
+     ret = 2;
+   }
+ 
+ end:
+-  mzd_local_free(ctr);
+-  mzd_local_free(ctl);
+-  mzd_local_free(ptl);
+-  mzd_local_free(skl);
++  oqs_sig_picnic_mzd_local_free(ctr);
++  oqs_sig_picnic_mzd_local_free(ctl);
++  oqs_sig_picnic_mzd_local_free(ptl);
++  oqs_sig_picnic_mzd_local_free(skl);
+   mzd_free(ct);
+   mzd_free(pt);
+   mzd_free(sk);
+@@ -51,35 +51,35 @@ end:
+ 
+ static int lowmc_enc(const picnic_params_t param, const uint8_t* key, const uint8_t* plaintext,
+                      const uint8_t* expected) {
+-  picnic_instance_t* pp = get_instance(param);
++  picnic_instance_t* pp = oqs_sig_picnic_get_instance(param);
+   if (!pp) {
+     return -1;
+   }
+ 
+-  mzd_local_t* sk = mzd_local_init(1, pp->lowmc.k);
+-  mzd_local_t* pt = mzd_local_init(1, pp->lowmc.n);
+-  mzd_local_t* ct = mzd_local_init(1, pp->lowmc.n);
++  mzd_local_t* sk = oqs_sig_picnic_mzd_local_init(1, pp->lowmc.k);
++  mzd_local_t* pt = oqs_sig_picnic_mzd_local_init(1, pp->lowmc.n);
++  mzd_local_t* ct = oqs_sig_picnic_mzd_local_init(1, pp->lowmc.n);
+ 
+-  mzd_from_char_array(sk, key, pp->input_size);
+-  mzd_from_char_array(pt, plaintext, pp->output_size);
+-  mzd_from_char_array(ct, expected, pp->output_size);
++  oqs_sig_picnic_mzd_from_char_array(sk, key, pp->input_size);
++  oqs_sig_picnic_mzd_from_char_array(pt, plaintext, pp->output_size);
++  oqs_sig_picnic_mzd_from_char_array(ct, expected, pp->output_size);
+ 
+   int ret          = 0;
+-  mzd_local_t* ctr = lowmc_call(&pp->lowmc, sk, pt);
++  mzd_local_t* ctr = oqs_sig_picnic_lowmc_call(&pp->lowmc, sk, pt);
+   if (!ctr) {
+     ret = 1;
+     goto end;
+   }
+ 
+-  if (!mzd_local_equal(ctr, ct)) {
++  if (!oqs_sig_picnic_mzd_local_equal(ctr, ct)) {
+     ret = 2;
+   }
+ 
+ end:
+-  mzd_local_free(ctr);
+-  mzd_local_free(ct);
+-  mzd_local_free(pt);
+-  mzd_local_free(sk);
++  oqs_sig_picnic_mzd_local_free(ctr);
++  oqs_sig_picnic_mzd_local_free(ct);
++  oqs_sig_picnic_mzd_local_free(pt);
++  oqs_sig_picnic_mzd_local_free(sk);
+ 
+   return ret;
+ }
+diff --git a/tests/mpc_test.c b/tests/mpc_test.c
+index 57d5fda..de06da3 100644
+--- a/tests/mpc_test.c
++++ b/tests/mpc_test.c
+@@ -11,34 +11,34 @@
+ 
+ static mzd_local_t** mpc_init_empty_share_vector(uint32_t n, unsigned sc) {
+   mzd_local_t** s = malloc(sc * sizeof(mzd_local_t*));
+-  mzd_local_init_multiple(s, sc, 1, n);
++  oqs_sig_picnic_mzd_local_init_multiple(s, sc, 1, n);
+   return s;
+ }
+ 
+ static mzd_local_t* mpc_reconstruct_from_share(mzd_local_t* dst, mzd_local_t** shared_vec) {
+   if (!dst) {
+-    dst = mzd_local_init_ex(shared_vec[0]->nrows, shared_vec[0]->ncols, false);
++    dst = oqs_sig_picnic_mzd_local_init_ex(shared_vec[0]->nrows, shared_vec[0]->ncols, false);
+   }
+ 
+-  mzd_xor(dst, shared_vec[0], shared_vec[1]);
+-  return mzd_xor(dst, dst, shared_vec[2]);
++  oqs_sig_picnic_mzd_xor(dst, shared_vec[0], shared_vec[1]);
++  return oqs_sig_picnic_mzd_xor(dst, dst, shared_vec[2]);
+ }
+ 
+ static mzd_local_t* mzd_init_random_vector(rci_t n) {
+-  mzd_local_t* a = mzd_local_init(1, n);
++  mzd_local_t* a = oqs_sig_picnic_mzd_local_init(1, n);
+   mzd_randomize_ssl(a);
+   return a;
+ }
+ 
+ static mzd_local_t** mpc_init_share_vector(mzd_local_t const* v) {
+   mzd_local_t** s = malloc(3 * sizeof(mzd_local_t*));
+-  mzd_local_init_multiple_ex(s, 3, 1, v->ncols, false);
++  oqs_sig_picnic_mzd_local_init_multiple_ex(s, 3, 1, v->ncols, false);
+ 
+   mzd_randomize_ssl(s[0]);
+   mzd_randomize_ssl(s[1]);
+ 
+-  mzd_xor(s[2], s[0], s[1]);
+-  mzd_xor(s[2], s[2], v);
++  oqs_sig_picnic_mzd_xor(s[2], s[0], s[1]);
++  oqs_sig_picnic_mzd_xor(s[2], s[2], v);
+ 
+   return s;
+ }
+@@ -48,37 +48,37 @@ static void test_mpc_share(void) {
+   mzd_local_t** s1   = mpc_init_share_vector(t1);
+   mzd_local_t* t1cmb = mpc_reconstruct_from_share(NULL, s1);
+ 
+-  if (mzd_local_equal(t1, t1cmb))
++  if (oqs_sig_picnic_mzd_local_equal(t1, t1cmb))
+     printf("Share test successful.\n");
+ 
+-  mzd_local_free(t1);
+-  mzd_local_free_multiple(s1);
+-  mzd_local_free(t1cmb);
++  oqs_sig_picnic_mzd_local_free(t1);
++  oqs_sig_picnic_mzd_local_free_multiple(s1);
++  oqs_sig_picnic_mzd_local_free(t1cmb);
+ }
+ 
+ static void test_mpc_add(void) {
+   mzd_local_t* t1  = mzd_init_random_vector(10);
+   mzd_local_t* t2  = mzd_init_random_vector(10);
+-  mzd_local_t* res = mzd_local_init(1, 10);
+-  mzd_xor(res, t1, t2);
++  mzd_local_t* res = oqs_sig_picnic_mzd_local_init(1, 10);
++  oqs_sig_picnic_mzd_xor(res, t1, t2);
+ 
+   mzd_local_t** s1   = mpc_init_share_vector(t1);
+   mzd_local_t** s2   = mpc_init_share_vector(t2);
+   mzd_local_t** ress = mpc_init_empty_share_vector(10, 3);
+-  mpc_xor(ress, s1, s2, 3);
++  oqs_sig_picnic_mpc_xor(ress, s1, s2, 3);
+ 
+   mzd_local_t* cmp = mpc_reconstruct_from_share(NULL, ress);
+ 
+-  if (mzd_local_equal(res, cmp))
++  if (oqs_sig_picnic_mzd_local_equal(res, cmp))
+     printf("Shared add test successful.\n");
+ 
+-  mzd_local_free(t1);
+-  mzd_local_free(t2);
+-  mzd_local_free(res);
+-  mzd_local_free_multiple(s1);
+-  mzd_local_free_multiple(s2);
+-  mzd_local_free_multiple(ress);
+-  mzd_local_free(cmp);
++  oqs_sig_picnic_mzd_local_free(t1);
++  oqs_sig_picnic_mzd_local_free(t2);
++  oqs_sig_picnic_mzd_local_free(res);
++  oqs_sig_picnic_mzd_local_free_multiple(s1);
++  oqs_sig_picnic_mzd_local_free_multiple(s2);
++  oqs_sig_picnic_mzd_local_free_multiple(ress);
++  oqs_sig_picnic_mzd_local_free(cmp);
+ }
+ 
+ void run_tests(void) {
+diff --git a/tests/mzd_test.c b/tests/mzd_test.c
+index e1243d2..264edde 100644
+--- a/tests/mzd_test.c
++++ b/tests/mzd_test.c
+@@ -5,21 +5,21 @@
+ 
+ static void test_mzd_local_equal(void) {
+   for (unsigned int i = 0; i < 10; ++i) {
+-    mzd_local_t* a = mzd_local_init(1, (i + 1) * 64);
++    mzd_local_t* a = oqs_sig_picnic_mzd_local_init(1, (i + 1) * 64);
+     mzd_randomize_ssl(a);
+-    mzd_local_t* b = mzd_local_copy(NULL, a);
++    mzd_local_t* b = oqs_sig_picnic_mzd_local_copy(NULL, a);
+ 
+-    if (mzd_local_equal(a, b)) {
++    if (oqs_sig_picnic_mzd_local_equal(a, b)) {
+       printf("equal: ok [%u]\n", (i + 1) * 64);
+     }
+ 
+-    b = mzd_xor(b, b, a);
+-    if (mzd_local_equal(a, b)) {
++    b = oqs_sig_picnic_mzd_xor(b, b, a);
++    if (oqs_sig_picnic_mzd_local_equal(a, b)) {
+       printf("equal: ok [%u]\n", (i + 1) * 64);
+     }
+ 
+-    mzd_local_free(a);
+-    mzd_local_free(b);
++    oqs_sig_picnic_mzd_local_free(a);
++    oqs_sig_picnic_mzd_local_free(b);
+   }
+ }
+ 
+@@ -44,27 +44,27 @@ static int test_mzd_mul_avx(void) {
+   for (unsigned int k = 0; k < 3; ++k) {
+ 
+     mzd_t* r  = mzd_mul_naive(c, v, A);
+-    mzd_local_t* rl = mzd_mul_v_avx(c2, vl, Al);
++    mzd_local_t* rl = oqs_sig_picnic_mzd_mul_v_avx(c2, vl, Al);
+ 
+     mzd_local_t* rc = mzd_convert(r);
+ 
+-    if (!mzd_local_equal(rc, rl)) {
++    if (!oqs_sig_picnic_mzd_local_equal(rc, rl)) {
+       printf("mul: fail [%u x %u]\n", size, size);
+       ret = -1;
+     } else {
+       printf("mul: ok [%u x %u]\n", size, size);
+     }
+ 
+-    mzd_local_free(rc);
++    oqs_sig_picnic_mzd_local_free(rc);
+   }
+ 
+   mzd_free(A);
+   mzd_free(v);
+   mzd_free(c);
+ 
+-  mzd_local_free(c2);
+-  mzd_local_free(Al);
+-  mzd_local_free(vl);
++  oqs_sig_picnic_mzd_local_free(c2);
++  oqs_sig_picnic_mzd_local_free(Al);
++  oqs_sig_picnic_mzd_local_free(vl);
+ #endif
+ 
+   return ret;
+@@ -82,18 +82,18 @@ static void test_mzd_mul_vl_neon_192(void) {
+   mzd_randomize(v);
+   mzd_randomize(c);
+ 
+-  mzd_local_t* Al  = mzd_local_copy(NULL, A);
+-  mzd_local_t* All = mzd_precompute_matrix_lookup(Al);
+-  mzd_local_t* vl  = mzd_local_copy(NULL, v);
++  mzd_local_t* Al  = oqs_sig_picnic_mzd_local_copy(NULL, A);
++  mzd_local_t* All = oqs_sig_picnic_mzd_precompute_matrix_lookup(Al);
++  mzd_local_t* vl  = oqs_sig_picnic_mzd_local_copy(NULL, v);
+ 
+-  mzd_local_t* c2 = mzd_local_copy(NULL, c);
++  mzd_local_t* c2 = oqs_sig_picnic_mzd_local_copy(NULL, c);
+ 
+   for (unsigned int k = 0; k < 3; ++k) {
+ 
+     mzd_local_t* r  = mzd_mul_naive(c, v, A);
+     mzd_local_t* rl = mzd_mul_vl_neon_multiple_of_128(c2, vl, All);
+ 
+-    if (!mzd_local_equal(r, rl)) {
++    if (!oqs_sig_picnic_mzd_local_equal(r, rl)) {
+       printf("mul: fail [%u x %u]\n", size, size);
+       printf("r =  ");
+       mzd_print(r);
+@@ -108,9 +108,9 @@ static void test_mzd_mul_vl_neon_192(void) {
+   mzd_free(v);
+   mzd_free(c);
+ 
+-  mzd_local_free(c2);
+-  mzd_local_free(Al);
+-  mzd_local_free(vl);
++  oqs_sig_picnic_mzd_local_free(c2);
++  oqs_sig_picnic_mzd_local_free(Al);
++  oqs_sig_picnic_mzd_local_free(vl);
+ }
+ 
+ static void test_mzd_mul_vl_neon_256(void) {
+@@ -124,18 +124,18 @@ static void test_mzd_mul_vl_neon_256(void) {
+   mzd_randomize(v);
+   mzd_randomize(c);
+ 
+-  mzd_local_t* Al  = mzd_local_copy(NULL, A);
+-  mzd_local_t* All = mzd_precompute_matrix_lookup(Al);
+-  mzd_local_t* vl  = mzd_local_copy(NULL, v);
++  mzd_local_t* Al  = oqs_sig_picnic_mzd_local_copy(NULL, A);
++  mzd_local_t* All = oqs_sig_picnic_mzd_precompute_matrix_lookup(Al);
++  mzd_local_t* vl  = oqs_sig_picnic_mzd_local_copy(NULL, v);
+ 
+-  mzd_local_t* c2 = mzd_local_copy(NULL, c);
++  mzd_local_t* c2 = oqs_sig_picnic_mzd_local_copy(NULL, c);
+ 
+   for (unsigned int k = 0; k < 3; ++k) {
+ 
+     mzd_local_t* r  = mzd_mul_naive(c, v, A);
+     mzd_local_t* rl = mzd_mul_vl_neon_multiple_of_128(c2, vl, All);
+ 
+-    if (!mzd_local_equal(r, rl)) {
++    if (!oqs_sig_picnic_mzd_local_equal(r, rl)) {
+       printf("mul: fail [%u x %u]\n", size, size);
+       printf("r =  ");
+       mzd_print(r);
+@@ -150,9 +150,9 @@ static void test_mzd_mul_vl_neon_256(void) {
+   mzd_free(v);
+   mzd_free(c);
+ 
+-  mzd_local_free(c2);
+-  mzd_local_free(Al);
+-  mzd_local_free(vl);
++  oqs_sig_picnic_mzd_local_free(c2);
++  oqs_sig_picnic_mzd_local_free(Al);
++  oqs_sig_picnic_mzd_local_free(vl);
+ }
+ 
+ static void test_mzd_addmul_vl_neon_192(void) {
+@@ -166,19 +166,19 @@ static void test_mzd_addmul_vl_neon_192(void) {
+   mzd_randomize(v);
+   mzd_randomize(c);
+ 
+-  mzd_local_t* Al  = mzd_local_copy(NULL, A);
+-  mzd_local_t* All = mzd_precompute_matrix_lookup(Al);
+-  mzd_local_t* vl  = mzd_local_copy(NULL, v);
++  mzd_local_t* Al  = oqs_sig_picnic_mzd_local_copy(NULL, A);
++  mzd_local_t* All = oqs_sig_picnic_mzd_precompute_matrix_lookup(Al);
++  mzd_local_t* vl  = oqs_sig_picnic_mzd_local_copy(NULL, v);
+ 
+-  mzd_local_t* c2 = mzd_local_copy(NULL, c);
+-  mzd_local_t* c3 = mzd_local_copy(NULL, c);
++  mzd_local_t* c2 = oqs_sig_picnic_mzd_local_copy(NULL, c);
++  mzd_local_t* c3 = oqs_sig_picnic_mzd_local_copy(NULL, c);
+ 
+   for (unsigned int k = 0; k < 3; ++k) {
+ 
+     mzd_local_t* r   = mzd_addmul_naive(c, v, A);
+     mzd_local_t* rl2 = mzd_addmul_vl_neon(c3, vl, All);
+ 
+-    if (!mzd_local_equal(r, rl2)) {
++    if (!oqs_sig_picnic_mzd_local_equal(r, rl2)) {
+       printf("addmul2: fail [%u x %u]\n", size, size);
+       printf("r =  ");
+       mzd_print(r);
+@@ -193,9 +193,9 @@ static void test_mzd_addmul_vl_neon_192(void) {
+   mzd_free(v);
+   mzd_free(c);
+ 
+-  mzd_local_free(c2);
+-  mzd_local_free(Al);
+-  mzd_local_free(vl);
++  oqs_sig_picnic_mzd_local_free(c2);
++  oqs_sig_picnic_mzd_local_free(Al);
++  oqs_sig_picnic_mzd_local_free(vl);
+ }
+ 
+ static void test_mzd_addmul_vl_neon_256(void) {
+@@ -209,19 +209,19 @@ static void test_mzd_addmul_vl_neon_256(void) {
+   mzd_randomize(v);
+   mzd_randomize(c);
+ 
+-  mzd_local_t* Al  = mzd_local_copy(NULL, A);
+-  mzd_local_t* All = mzd_precompute_matrix_lookup(Al);
+-  mzd_local_t* vl  = mzd_local_copy(NULL, v);
++  mzd_local_t* Al  = oqs_sig_picnic_mzd_local_copy(NULL, A);
++  mzd_local_t* All = oqs_sig_picnic_mzd_precompute_matrix_lookup(Al);
++  mzd_local_t* vl  = oqs_sig_picnic_mzd_local_copy(NULL, v);
+ 
+-  mzd_local_t* c2 = mzd_local_copy(NULL, c);
+-  mzd_local_t* c3 = mzd_local_copy(NULL, c);
++  mzd_local_t* c2 = oqs_sig_picnic_mzd_local_copy(NULL, c);
++  mzd_local_t* c3 = oqs_sig_picnic_mzd_local_copy(NULL, c);
+ 
+   for (unsigned int k = 0; k < 3; ++k) {
+ 
+     mzd_local_t* r   = mzd_addmul_naive(c, v, A);
+     mzd_local_t* rl2 = mzd_addmul_vl_neon(c3, vl, All);
+ 
+-    if (!mzd_local_equal(r, rl2)) {
++    if (!oqs_sig_picnic_mzd_local_equal(r, rl2)) {
+       printf("addmul2: fail [%u x %u]\n", size, size);
+       printf("r =  ");
+       mzd_print(r);
+@@ -236,9 +236,9 @@ static void test_mzd_addmul_vl_neon_256(void) {
+   mzd_free(v);
+   mzd_free(c);
+ 
+-  mzd_local_free(c2);
+-  mzd_local_free(Al);
+-  mzd_local_free(vl);
++  oqs_sig_picnic_mzd_local_free(c2);
++  oqs_sig_picnic_mzd_local_free(Al);
++  oqs_sig_picnic_mzd_local_free(vl);
+ }
+ 
+ #endif
+@@ -255,7 +255,7 @@ static void test_mzd_mul(void) {
+       mzd_randomize(c);
+ 
+       mzd_local_t* Al  = mzd_convert(A);
+-      mzd_local_t* All = mzd_precompute_matrix_lookup(Al);
++      mzd_local_t* All = oqs_sig_picnic_mzd_precompute_matrix_lookup(Al);
+       mzd_local_t* vl  = mzd_convert(v);
+       mzd_local_t* cl  = mzd_convert(c);
+       mzd_local_t* cll = mzd_convert(c);
+@@ -266,20 +266,20 @@ static void test_mzd_mul(void) {
+       mzd_t* c3 = mzd_transpose(NULL, c);
+ 
+       for (unsigned int k = 0; k < 3; ++k) {
+-        mzd_local_t* r  = mzd_mul_v(cl, vl, Al);
+-        mzd_local_t* rl = mzd_mul_vl(cll, vl, All);
++        mzd_local_t* r  = oqs_sig_picnic_mzd_mul_v(cl, vl, Al);
++        mzd_local_t* rl = oqs_sig_picnic_mzd_mul_vl(cll, vl, All);
+         mzd_t* r2 = mzd_mul(c2, v, A, __M4RI_STRASSEN_MUL_CUTOFF);
+         mzd_t* r3 = mzd_mul(c3, At, vt, __M4RI_STRASSEN_MUL_CUTOFF);
+ 
+-        if (!mzd_local_equal(r, rl)) {
++        if (!oqs_sig_picnic_mzd_local_equal(r, rl)) {
+           printf("mul: fail [%u x %u]\n", i * 64, j * 64);
+         }
+ 
+         mzd_local_t* rc = mzd_convert(r2);
+-        if (!mzd_local_equal(r, rc)) {
++        if (!oqs_sig_picnic_mzd_local_equal(r, rc)) {
+           printf("mul: fail [%u x %u]\n", i * 64, j * 64);
+         }
+-        mzd_local_free(rc);
++        oqs_sig_picnic_mzd_local_free(rc);
+ 
+         mzd_t* r4 = mzd_transpose(NULL, r3);
+         if (mzd_cmp(r4, r2) != 0) {
+@@ -297,11 +297,11 @@ static void test_mzd_mul(void) {
+       mzd_free(c2);
+       mzd_free(c3);
+ 
+-      mzd_local_free(All);
+-      mzd_local_free(Al);
+-      mzd_local_free(cll);
+-      mzd_local_free(cl);
+-      mzd_local_free(vl);
++      oqs_sig_picnic_mzd_local_free(All);
++      oqs_sig_picnic_mzd_local_free(Al);
++      oqs_sig_picnic_mzd_local_free(cll);
++      oqs_sig_picnic_mzd_local_free(cl);
++      oqs_sig_picnic_mzd_local_free(vl);
+     }
+   }
+ }
+@@ -310,14 +310,14 @@ static void test_mzd_shift(void) {
+ #ifdef WITH_OPT
+ #ifdef WITH_SSE2
+   if (CPU_SUPPORTS_SSE2) {
+-    mzd_local_t* v = mzd_local_init(1, 128);
+-    mzd_local_t* w = mzd_local_copy(NULL, v);
+-    mzd_local_t* r = mzd_local_copy(NULL, v);
++    mzd_local_t* v = oqs_sig_picnic_mzd_local_init(1, 128);
++    mzd_local_t* w = oqs_sig_picnic_mzd_local_copy(NULL, v);
++    mzd_local_t* r = oqs_sig_picnic_mzd_local_copy(NULL, v);
+     __m128i* wr    = __builtin_assume_aligned(FIRST_ROW(w), 16);
+ 
+     for (unsigned int i = 0; i < 32; ++i) {
+       mzd_randomize_ssl(v);
+-      mzd_local_copy(w, v);
++      oqs_sig_picnic_mzd_local_copy(w, v);
+ 
+       mzd_shift_left(r, v, i);
+       *wr = mm128_shift_left(*wr, i);
+@@ -329,7 +329,7 @@ static void test_mzd_shift(void) {
+ 
+     for (unsigned int i = 0; i < 32; ++i) {
+       mzd_randomize_ssl(v);
+-      mzd_local_copy(w, v);
++      oqs_sig_picnic_mzd_local_copy(w, v);
+ 
+       mzd_shift_right(r, v, i);
+       *wr = mm128_shift_right(*wr, i);
+@@ -339,21 +339,21 @@ static void test_mzd_shift(void) {
+       }
+     }
+ 
+-    mzd_local_free(w);
+-    mzd_local_free(v);
+-    mzd_local_free(r);
++    oqs_sig_picnic_mzd_local_free(w);
++    oqs_sig_picnic_mzd_local_free(v);
++    oqs_sig_picnic_mzd_local_free(r);
+   }
+ #endif
+ #ifdef WITH_AVX2
+   if (CPU_SUPPORTS_AVX2) {
+-    mzd_local_t* v = mzd_local_init(1, 256);
+-    mzd_local_t* w = mzd_local_copy(NULL, v);
+-    mzd_local_t* r = mzd_local_copy(NULL, v);
++    mzd_local_t* v = oqs_sig_picnic_mzd_local_init(1, 256);
++    mzd_local_t* w = oqs_sig_picnic_mzd_local_copy(NULL, v);
++    mzd_local_t* r = oqs_sig_picnic_mzd_local_copy(NULL, v);
+     __m256i* wr    = __builtin_assume_aligned(FIRST_ROW(w), 32);
+ 
+     for (unsigned int i = 0; i < 32; ++i) {
+       mzd_randomize_ssl(v);
+-      mzd_local_copy(w, v);
++      oqs_sig_picnic_mzd_local_copy(w, v);
+ 
+       mzd_shift_left(r, v, i);
+       *wr = mm256_shift_left(*wr, i);
+@@ -365,7 +365,7 @@ static void test_mzd_shift(void) {
+ 
+     for (unsigned int i = 0; i < 32; ++i) {
+       mzd_randomize_ssl(v);
+-      mzd_local_copy(w, v);
++      oqs_sig_picnic_mzd_local_copy(w, v);
+ 
+       mzd_shift_right(r, v, i);
+       mm512_shift_right_avx(wr, wr, i);
+@@ -375,21 +375,21 @@ static void test_mzd_shift(void) {
+       }
+     }
+ 
+-    mzd_local_free(w);
+-    mzd_local_free(v);
+-    mzd_local_free(r);
++    oqs_sig_picnic_mzd_local_free(w);
++    oqs_sig_picnic_mzd_local_free(v);
++    oqs_sig_picnic_mzd_local_free(r);
+   }
+ #endif
+ #ifdef WITH_NEON
+   if (CPU_SUPPORTS_NEON) {
+-    mzd_local_t* v = mzd_local_init(1, 384);
+-    mzd_local_t* w = mzd_local_copy(NULL, v);
+-    mzd_local_t* r = mzd_local_copy(NULL, v);
++    mzd_local_t* v = oqs_sig_picnic_mzd_local_init(1, 384);
++    mzd_local_t* w = oqs_sig_picnic_mzd_local_copy(NULL, v);
++    mzd_local_t* r = oqs_sig_picnic_mzd_local_copy(NULL, v);
+     uint32x4_t* wr = __builtin_assume_aligned(FIRST_ROW(w), alignof(uint32x4_t));
+ 
+     for (unsigned int i = 0; i < 32; ++i) {
+       mzd_randomize_ssl(v);
+-      mzd_local_copy(w, v);
++      oqs_sig_picnic_mzd_local_copy(w, v);
+ 
+       mzd_shift_left(r, v, i);
+       mm384_shift_left(wr, wr, i);
+@@ -406,7 +406,7 @@ static void test_mzd_shift(void) {
+ 
+     for (unsigned int i = 0; i < 32; ++i) {
+       mzd_randomize_ssl(v);
+-      mzd_local_copy(w, v);
++      oqs_sig_picnic_mzd_local_copy(w, v);
+ 
+       mzd_shift_right(r, v, i);
+       mm384_shift_right(wr, wr, i);
+@@ -421,9 +421,9 @@ static void test_mzd_shift(void) {
+       }
+     }
+ 
+-    mzd_local_free(w);
+-    mzd_local_free(v);
+-    mzd_local_free(r);
++    oqs_sig_picnic_mzd_local_free(w);
++    oqs_sig_picnic_mzd_local_free(v);
++    oqs_sig_picnic_mzd_local_free(r);
+   }
+ #endif
+ #endif
+diff --git a/tests/utils.c.i b/tests/utils.c.i
+index 558180a..69b18af 100644
+--- a/tests/utils.c.i
++++ b/tests/utils.c.i
+@@ -8,7 +8,7 @@ void mzd_randomize_ssl(mzd_local_t* val) {
+ }
+ 
+ mzd_local_t* mzd_convert(const mzd_t* v) {
+-  mzd_local_t* r = mzd_local_init(v->nrows, v->ncols);
++  mzd_local_t* r = oqs_sig_picnic_mzd_local_init(v->nrows, v->ncols);
+ 
+   for (rci_t i = 0; i < v->nrows; ++i) {
+     memcpy(ROW(r, i), v->rows[i], v->width * sizeof(word));
diff --git a/crypt/liboqs/sig_picnic/sig_picnic.c b/crypt/liboqs/sig_picnic/sig_picnic.c
new file mode 100644
index 0000000000000000000000000000000000000000..3c7be8b7297cef2bccda0bafe4d4cecf1bfb61d2
--- /dev/null
+++ b/crypt/liboqs/sig_picnic/sig_picnic.c
@@ -0,0 +1,174 @@
+#ifdef ENABLE_SIG_PICNIC
+#if defined(WINDOWS)
+#define UNUSED
+#else
+#define UNUSED __attribute__((unused))
+#endif
+
+#include <string.h>
+#include <oqs/common.h>
+#include <oqs/sig.h>
+#include <oqs/rand.h>
+#include "sig_picnic.h"
+#include "picnic.h"
+
+static char *Picnic_L1_FS_name = "Picnic_L1_FS";
+static char *Picnic_L1_UR_name = "Picnic_L1_UR";
+static char *Picnic_L3_FS_name = "Picnic_L3_FS";
+static char *Picnic_L3_UR_name = "Picnic_L3_UR";
+static char *Picnic_L5_FS_name = "Picnic_L5_FS";
+static char *Picnic_L5_UR_name = "Picnic_L5_UR";
+static size_t PRIV_KEY_LEN[] = {
+    0,
+    PICNIC_PRIVATE_KEY_SIZE(Picnic_L1_FS),
+    PICNIC_PRIVATE_KEY_SIZE(Picnic_L1_UR),
+    PICNIC_PRIVATE_KEY_SIZE(Picnic_L3_FS),
+    PICNIC_PRIVATE_KEY_SIZE(Picnic_L3_UR),
+    PICNIC_PRIVATE_KEY_SIZE(Picnic_L5_FS),
+    PICNIC_PRIVATE_KEY_SIZE(Picnic_L5_UR)};
+static size_t PUB_KEY_LEN[] = {
+    0,
+    PICNIC_PUBLIC_KEY_SIZE(Picnic_L1_FS),
+    PICNIC_PUBLIC_KEY_SIZE(Picnic_L1_UR),
+    PICNIC_PUBLIC_KEY_SIZE(Picnic_L3_FS),
+    PICNIC_PUBLIC_KEY_SIZE(Picnic_L3_UR),
+    PICNIC_PUBLIC_KEY_SIZE(Picnic_L5_FS),
+    PICNIC_PUBLIC_KEY_SIZE(Picnic_L5_UR)};
+static size_t SIG_LEN[] = {
+    0,
+    PICNIC_SIGNATURE_SIZE_Picnic_L1_FS,
+    PICNIC_SIGNATURE_SIZE_Picnic_L1_UR,
+    PICNIC_SIGNATURE_SIZE_Picnic_L3_FS,
+    PICNIC_SIGNATURE_SIZE_Picnic_L3_UR,
+    PICNIC_SIGNATURE_SIZE_Picnic_L5_FS,
+    PICNIC_SIGNATURE_SIZE_Picnic_L5_UR};
+
+typedef struct PICNIC_CTX {
+	picnic_params_t params;
+} PICNIC_CTX;
+
+int OQS_SIG_picnic_get(OQS_SIG *s, enum OQS_SIG_algid algid) {
+	if (s == NULL) {
+		return OQS_ERROR;
+	}
+
+	PICNIC_CTX *pctx = malloc(sizeof(PICNIC_CTX));
+	if (pctx == NULL) {
+		return OQS_ERROR;
+	}
+
+	// set the scheme-specific alg values
+	// NOTE: the key and sig len values use macros, so we can't
+	//       parametrized with pctx->params to shorten the code.
+	switch (algid) {
+	case OQS_SIG_picnic_default:
+	case OQS_SIG_picnic_L1_FS:
+		pctx->params = Picnic_L1_FS;
+		s->method_name = Picnic_L1_FS_name;
+		s->estimated_classical_security = 128;
+		s->estimated_quantum_security = 64;
+		break;
+	case OQS_SIG_picnic_L1_UR:
+		pctx->params = Picnic_L1_UR;
+		s->method_name = Picnic_L1_UR_name;
+		s->estimated_classical_security = 128;
+		s->estimated_quantum_security = 64;
+		break;
+	case OQS_SIG_picnic_L3_FS:
+		pctx->params = Picnic_L3_FS;
+		s->method_name = Picnic_L3_FS_name;
+		s->estimated_classical_security = 192;
+		s->estimated_quantum_security = 96;
+		break;
+	case OQS_SIG_picnic_L3_UR:
+		pctx->params = Picnic_L3_UR;
+		s->method_name = Picnic_L3_UR_name;
+		s->estimated_classical_security = 192;
+		s->estimated_quantum_security = 96;
+		break;
+	case OQS_SIG_picnic_L5_FS:
+		pctx->params = Picnic_L5_FS;
+		s->method_name = Picnic_L5_FS_name;
+		s->estimated_classical_security = 256;
+		s->estimated_quantum_security = 128;
+		break;
+	case OQS_SIG_picnic_L5_UR:
+		pctx->params = Picnic_L5_UR;
+		s->method_name = Picnic_L5_UR_name;
+		s->estimated_classical_security = 256;
+		s->estimated_quantum_security = 128;
+		break;
+	default:
+		return OQS_ERROR;
+	}
+	// set the ctx, sizes, and API functions
+	s->ctx = pctx;
+	s->priv_key_len = PRIV_KEY_LEN[pctx->params] + PUB_KEY_LEN[pctx->params]; // priv key also contains pub key
+	s->pub_key_len = PUB_KEY_LEN[pctx->params];
+	s->max_sig_len = SIG_LEN[pctx->params];
+	s->keygen = &OQS_SIG_picnic_keygen;
+	s->sign = &OQS_SIG_picnic_sign;
+	s->verify = &OQS_SIG_picnic_verify;
+
+	return OQS_SUCCESS;
+}
+
+int OQS_SIG_picnic_keygen(const OQS_SIG *s, uint8_t *priv, uint8_t *pub) {
+	if (s == NULL || priv == NULL || pub == NULL) {
+		return OQS_ERROR;
+	}
+	picnic_publickey_t pk;
+	picnic_privatekey_t sk;
+	picnic_params_t parameters = ((PICNIC_CTX *) s->ctx)->params;
+	int ret = picnic_keygen(parameters, &pk, &sk);
+	if (ret != 0) {
+		return OQS_ERROR;
+	}
+	// serialize the public key
+	int pk_len = picnic_write_public_key(&pk, pub, PUB_KEY_LEN[parameters]);
+	if ((size_t) pk_len != PUB_KEY_LEN[parameters]) {
+		return OQS_ERROR;
+	}
+
+	// serialize the private key
+	int sk_len = picnic_write_private_key(&sk, priv, PRIV_KEY_LEN[parameters]);
+	if ((size_t) sk_len != PRIV_KEY_LEN[parameters]) {
+		return OQS_ERROR;
+	}
+	// wipe the private key
+	OQS_MEM_cleanse(&sk, sizeof(picnic_privatekey_t));
+	return OQS_SUCCESS;
+}
+
+int OQS_SIG_picnic_sign(const OQS_SIG *s, const uint8_t *priv, const uint8_t *msg, const size_t msg_len, uint8_t *sig, size_t *sig_len) {
+	if (s == NULL || priv == NULL || msg == NULL || sig == NULL || sig_len == NULL) {
+		return OQS_ERROR;
+	}
+	picnic_privatekey_t sk;
+	picnic_params_t parameters = ((PICNIC_CTX *) s->ctx)->params;
+	// deserialize the private key
+	if (picnic_read_private_key(&sk, priv, PRIV_KEY_LEN[parameters]) != 0) {
+		return OQS_ERROR;
+	}
+	if (picnic_sign(&sk, msg, msg_len, sig, sig_len) != 0) {
+		return OQS_ERROR;
+	}
+	return OQS_SUCCESS;
+}
+
+int OQS_SIG_picnic_verify(UNUSED const OQS_SIG *s, const uint8_t *pub, const uint8_t *msg, const size_t msg_len, const uint8_t *sig, const size_t sig_len) {
+	if (pub == NULL || msg == NULL || sig == NULL) {
+		return OQS_ERROR;
+	}
+	picnic_publickey_t pk;
+	// deserialize the public key
+	picnic_params_t parameters = ((PICNIC_CTX *) s->ctx)->params;
+	if (picnic_read_public_key(&pk, pub, PUB_KEY_LEN[parameters]) != 0) {
+		return OQS_ERROR;
+	}
+	if (picnic_verify(&pk, msg, msg_len, sig, sig_len) != 0) {
+		return OQS_ERROR;
+	}
+	return OQS_SUCCESS;
+}
+#endif
diff --git a/crypt/liboqs/sig_picnic/sig_picnic.h b/crypt/liboqs/sig_picnic/sig_picnic.h
new file mode 100644
index 0000000000000000000000000000000000000000..8ffa5fd2c0113fd6d6fb41e9d5841bc7869c9822
--- /dev/null
+++ b/crypt/liboqs/sig_picnic/sig_picnic.h
@@ -0,0 +1,20 @@
+/**
+ * \file sig_picnic.h
+ * \brief Header for the Microsoft Picnic library
+ */
+#ifndef __OQS_SIG_PICNIC_H
+#define __OQS_SIG_PICNIC_H
+
+#ifdef ENABLE_SIG_PICNIC
+#include <stddef.h>
+#include <stdint.h>
+
+#include <oqs/sig.h>
+#include <oqs/rand.h>
+
+int OQS_SIG_picnic_get(OQS_SIG *sig, enum OQS_SIG_algid algid);
+int OQS_SIG_picnic_keygen(const OQS_SIG *s, uint8_t *priv, uint8_t *pub);
+int OQS_SIG_picnic_sign(const OQS_SIG *s, const uint8_t *priv, const uint8_t *msg, const size_t msg_len, uint8_t *sig, size_t *sig_len);
+int OQS_SIG_picnic_verify(const OQS_SIG *s, const uint8_t *pub, const uint8_t *msg, const size_t msg_len, const uint8_t *sig, const size_t sig_len);
+#endif
+#endif