BoringSSL: Roll generated files forward.

This allows rolling boringssl forward to mid-December at:

d519bf6be0b447fb80fbc539d4bff4479b5482a2

R=johnmccutchan@google.com

Review-Url: https://codereview.chromium.org/2569253003 .
diff --git a/BUILD.generated.gni b/BUILD.generated.gni
index d1ac9be..6836fab 100644
--- a/BUILD.generated.gni
+++ b/BUILD.generated.gni
@@ -7,10 +7,11 @@
 crypto_sources = [
   "err_data.c",
   "src/crypto/aes/aes.c",
+  "src/crypto/aes/internal.h",
+  "src/crypto/aes/key_wrap.c",
   "src/crypto/aes/mode_wrappers.c",
   "src/crypto/asn1/a_bitstr.c",
   "src/crypto/asn1/a_bool.c",
-  "src/crypto/asn1/a_bytes.c",
   "src/crypto/asn1/a_d2i_fp.c",
   "src/crypto/asn1/a_dup.c",
   "src/crypto/asn1/a_enum.c",
@@ -27,6 +28,7 @@
   "src/crypto/asn1/a_utctm.c",
   "src/crypto/asn1/a_utf8.c",
   "src/crypto/asn1/asn1_lib.c",
+  "src/crypto/asn1/asn1_locl.h",
   "src/crypto/asn1/asn1_par.c",
   "src/crypto/asn1/asn_pack.c",
   "src/crypto/asn1/f_enum.c",
@@ -49,6 +51,7 @@
   "src/crypto/bio/fd.c",
   "src/crypto/bio/file.c",
   "src/crypto/bio/hexdump.c",
+  "src/crypto/bio/internal.h",
   "src/crypto/bio/pair.c",
   "src/crypto/bio/printf.c",
   "src/crypto/bio/socket.c",
@@ -64,12 +67,15 @@
   "src/crypto/bn/exponentiation.c",
   "src/crypto/bn/gcd.c",
   "src/crypto/bn/generic.c",
+  "src/crypto/bn/internal.h",
   "src/crypto/bn/kronecker.c",
   "src/crypto/bn/montgomery.c",
+  "src/crypto/bn/montgomery_inv.c",
   "src/crypto/bn/mul.c",
   "src/crypto/bn/prime.c",
   "src/crypto/bn/random.c",
   "src/crypto/bn/rsaz_exp.c",
+  "src/crypto/bn/rsaz_exp.h",
   "src/crypto/bn/shift.c",
   "src/crypto/bn/sqrt.c",
   "src/crypto/buf/buf.c",
@@ -77,6 +83,7 @@
   "src/crypto/bytestring/ber.c",
   "src/crypto/bytestring/cbb.c",
   "src/crypto/bytestring/cbs.c",
+  "src/crypto/bytestring/internal.h",
   "src/crypto/chacha/chacha.c",
   "src/crypto/cipher/aead.c",
   "src/crypto/cipher/cipher.c",
@@ -89,34 +96,45 @@
   "src/crypto/cipher/e_rc4.c",
   "src/crypto/cipher/e_ssl3.c",
   "src/crypto/cipher/e_tls.c",
+  "src/crypto/cipher/internal.h",
   "src/crypto/cipher/tls_cbc.c",
   "src/crypto/cmac/cmac.c",
   "src/crypto/conf/conf.c",
+  "src/crypto/conf/conf_def.h",
+  "src/crypto/conf/internal.h",
   "src/crypto/cpu-aarch64-linux.c",
   "src/crypto/cpu-arm-linux.c",
   "src/crypto/cpu-arm.c",
   "src/crypto/cpu-intel.c",
+  "src/crypto/cpu-ppc64le.c",
   "src/crypto/crypto.c",
   "src/crypto/curve25519/curve25519.c",
+  "src/crypto/curve25519/internal.h",
   "src/crypto/curve25519/spake25519.c",
   "src/crypto/curve25519/x25519-x86_64.c",
   "src/crypto/des/des.c",
+  "src/crypto/des/internal.h",
   "src/crypto/dh/check.c",
   "src/crypto/dh/dh.c",
   "src/crypto/dh/dh_asn1.c",
   "src/crypto/dh/params.c",
   "src/crypto/digest/digest.c",
   "src/crypto/digest/digests.c",
+  "src/crypto/digest/internal.h",
+  "src/crypto/digest/md32_common.h",
   "src/crypto/dsa/dsa.c",
   "src/crypto/dsa/dsa_asn1.c",
   "src/crypto/ec/ec.c",
   "src/crypto/ec/ec_asn1.c",
   "src/crypto/ec/ec_key.c",
   "src/crypto/ec/ec_montgomery.c",
+  "src/crypto/ec/internal.h",
   "src/crypto/ec/oct.c",
   "src/crypto/ec/p224-64.c",
   "src/crypto/ec/p256-64.c",
+  "src/crypto/ec/p256-x86_64-table.h",
   "src/crypto/ec/p256-x86_64.c",
+  "src/crypto/ec/p256-x86_64.h",
   "src/crypto/ec/simple.c",
   "src/crypto/ec/util-64.c",
   "src/crypto/ec/wnaf.c",
@@ -129,6 +147,7 @@
   "src/crypto/evp/evp.c",
   "src/crypto/evp/evp_asn1.c",
   "src/crypto/evp/evp_ctx.c",
+  "src/crypto/evp/internal.h",
   "src/crypto/evp/p_dsa_asn1.c",
   "src/crypto/evp/p_ec.c",
   "src/crypto/evp/p_ec_asn1.c",
@@ -140,6 +159,7 @@
   "src/crypto/ex_data.c",
   "src/crypto/hkdf/hkdf.c",
   "src/crypto/hmac/hmac.c",
+  "src/crypto/internal.h",
   "src/crypto/lhash/lhash.c",
   "src/crypto/md4/md4.c",
   "src/crypto/md5/md5.c",
@@ -148,15 +168,13 @@
   "src/crypto/modes/cfb.c",
   "src/crypto/modes/ctr.c",
   "src/crypto/modes/gcm.c",
+  "src/crypto/modes/internal.h",
   "src/crypto/modes/ofb.c",
-  "src/crypto/newhope/error_correction.c",
-  "src/crypto/newhope/newhope.c",
-  "src/crypto/newhope/ntt.c",
-  "src/crypto/newhope/poly.c",
-  "src/crypto/newhope/precomp.c",
-  "src/crypto/newhope/reduce.c",
+  "src/crypto/modes/polyval.c",
   "src/crypto/obj/obj.c",
+  "src/crypto/obj/obj_dat.h",
   "src/crypto/obj/obj_xref.c",
+  "src/crypto/obj/obj_xref.h",
   "src/crypto/pem/pem_all.c",
   "src/crypto/pem/pem_info.c",
   "src/crypto/pem/pem_lib.c",
@@ -165,14 +183,19 @@
   "src/crypto/pem/pem_pkey.c",
   "src/crypto/pem/pem_x509.c",
   "src/crypto/pem/pem_xaux.c",
+  "src/crypto/pkcs8/internal.h",
   "src/crypto/pkcs8/p5_pbe.c",
   "src/crypto/pkcs8/p5_pbev2.c",
   "src/crypto/pkcs8/p8_pkey.c",
   "src/crypto/pkcs8/pkcs8.c",
+  "src/crypto/poly1305/internal.h",
   "src/crypto/poly1305/poly1305.c",
   "src/crypto/poly1305/poly1305_arm.c",
   "src/crypto/poly1305/poly1305_vec.c",
+  "src/crypto/pool/internal.h",
+  "src/crypto/pool/pool.c",
   "src/crypto/rand/deterministic.c",
+  "src/crypto/rand/internal.h",
   "src/crypto/rand/rand.c",
   "src/crypto/rand/urandom.c",
   "src/crypto/rand/windows.c",
@@ -180,10 +203,12 @@
   "src/crypto/refcount_c11.c",
   "src/crypto/refcount_lock.c",
   "src/crypto/rsa/blinding.c",
+  "src/crypto/rsa/internal.h",
   "src/crypto/rsa/padding.c",
   "src/crypto/rsa/rsa.c",
   "src/crypto/rsa/rsa_asn1.c",
   "src/crypto/rsa/rsa_impl.c",
+  "src/crypto/sha/sha1-altivec.c",
   "src/crypto/sha/sha1.c",
   "src/crypto/sha/sha256.c",
   "src/crypto/sha/sha512.c",
@@ -201,13 +226,16 @@
   "src/crypto/x509/asn1_gen.c",
   "src/crypto/x509/by_dir.c",
   "src/crypto/x509/by_file.c",
+  "src/crypto/x509/charmap.h",
   "src/crypto/x509/i2d_pr.c",
+  "src/crypto/x509/internal.h",
   "src/crypto/x509/pkcs7.c",
   "src/crypto/x509/rsa_pss.c",
   "src/crypto/x509/t_crl.c",
   "src/crypto/x509/t_req.c",
   "src/crypto/x509/t_x509.c",
   "src/crypto/x509/t_x509a.c",
+  "src/crypto/x509/vpm_int.h",
   "src/crypto/x509/x509.c",
   "src/crypto/x509/x509_att.c",
   "src/crypto/x509/x509_cmp.c",
@@ -244,8 +272,10 @@
   "src/crypto/x509/x_val.c",
   "src/crypto/x509/x_x509.c",
   "src/crypto/x509/x_x509a.c",
+  "src/crypto/x509v3/ext_dat.h",
   "src/crypto/x509v3/pcy_cache.c",
   "src/crypto/x509v3/pcy_data.c",
+  "src/crypto/x509v3/pcy_int.h",
   "src/crypto/x509v3/pcy_lib.c",
   "src/crypto/x509v3/pcy_map.c",
   "src/crypto/x509v3/pcy_node.c",
@@ -276,9 +306,82 @@
   "src/crypto/x509v3/v3_skey.c",
   "src/crypto/x509v3/v3_sxnet.c",
   "src/crypto/x509v3/v3_utl.c",
+  "src/include/openssl/aead.h",
+  "src/include/openssl/aes.h",
+  "src/include/openssl/arm_arch.h",
+  "src/include/openssl/asn1.h",
+  "src/include/openssl/asn1_mac.h",
+  "src/include/openssl/asn1t.h",
+  "src/include/openssl/base.h",
+  "src/include/openssl/base64.h",
+  "src/include/openssl/bio.h",
+  "src/include/openssl/blowfish.h",
+  "src/include/openssl/bn.h",
+  "src/include/openssl/buf.h",
+  "src/include/openssl/buffer.h",
+  "src/include/openssl/bytestring.h",
+  "src/include/openssl/cast.h",
+  "src/include/openssl/chacha.h",
+  "src/include/openssl/cipher.h",
+  "src/include/openssl/cmac.h",
+  "src/include/openssl/conf.h",
+  "src/include/openssl/cpu.h",
+  "src/include/openssl/crypto.h",
+  "src/include/openssl/curve25519.h",
+  "src/include/openssl/des.h",
+  "src/include/openssl/dh.h",
+  "src/include/openssl/digest.h",
+  "src/include/openssl/dsa.h",
+  "src/include/openssl/ec.h",
+  "src/include/openssl/ec_key.h",
+  "src/include/openssl/ecdh.h",
+  "src/include/openssl/ecdsa.h",
+  "src/include/openssl/engine.h",
+  "src/include/openssl/err.h",
+  "src/include/openssl/evp.h",
+  "src/include/openssl/ex_data.h",
+  "src/include/openssl/hkdf.h",
+  "src/include/openssl/hmac.h",
+  "src/include/openssl/lhash.h",
+  "src/include/openssl/lhash_macros.h",
+  "src/include/openssl/md4.h",
+  "src/include/openssl/md5.h",
+  "src/include/openssl/mem.h",
+  "src/include/openssl/nid.h",
+  "src/include/openssl/obj.h",
+  "src/include/openssl/obj_mac.h",
+  "src/include/openssl/objects.h",
+  "src/include/openssl/opensslconf.h",
+  "src/include/openssl/opensslv.h",
+  "src/include/openssl/ossl_typ.h",
+  "src/include/openssl/pem.h",
+  "src/include/openssl/pkcs12.h",
+  "src/include/openssl/pkcs7.h",
+  "src/include/openssl/pkcs8.h",
+  "src/include/openssl/poly1305.h",
+  "src/include/openssl/pool.h",
+  "src/include/openssl/rand.h",
+  "src/include/openssl/rc4.h",
+  "src/include/openssl/ripemd.h",
+  "src/include/openssl/rsa.h",
+  "src/include/openssl/safestack.h",
+  "src/include/openssl/sha.h",
+  "src/include/openssl/srtp.h",
+  "src/include/openssl/stack.h",
+  "src/include/openssl/stack_macros.h",
+  "src/include/openssl/thread.h",
+  "src/include/openssl/time_support.h",
+  "src/include/openssl/type_check.h",
+  "src/include/openssl/x509.h",
+  "src/include/openssl/x509_vfy.h",
+  "src/include/openssl/x509v3.h",
 ]
 
 ssl_sources = [
+  "src/include/openssl/dtls1.h",
+  "src/include/openssl/ssl.h",
+  "src/include/openssl/ssl3.h",
+  "src/include/openssl/tls1.h",
   "src/ssl/custom_extensions.c",
   "src/ssl/d1_both.c",
   "src/ssl/d1_lib.c",
@@ -288,6 +391,7 @@
   "src/ssl/dtls_record.c",
   "src/ssl/handshake_client.c",
   "src/ssl/handshake_server.c",
+  "src/ssl/internal.h",
   "src/ssl/s3_both.c",
   "src/ssl/s3_enc.c",
   "src/ssl/s3_lib.c",
@@ -305,6 +409,10 @@
   "src/ssl/ssl_stat.c",
   "src/ssl/t1_enc.c",
   "src/ssl/t1_lib.c",
+  "src/ssl/tls13_both.c",
+  "src/ssl/tls13_client.c",
+  "src/ssl/tls13_enc.c",
+  "src/ssl/tls13_server.c",
   "src/ssl/tls_method.c",
   "src/ssl/tls_record.c",
 ]
@@ -334,6 +442,11 @@
   "src/crypto/poly1305/poly1305_arm_asm.S",
 ]
 
+crypto_sources_linux_ppc64le = [
+  "linux-ppc64le/crypto/aes/aesp8-ppc.S",
+  "linux-ppc64le/crypto/modes/ghashp8-ppc.S",
+]
+
 crypto_sources_linux_x86 = [
   "linux-x86/crypto/aes/aes-586.S",
   "linux-x86/crypto/aes/aesni-x86.S",
@@ -344,7 +457,6 @@
   "linux-x86/crypto/chacha/chacha-x86.S",
   "linux-x86/crypto/md5/md5-586.S",
   "linux-x86/crypto/modes/ghash-x86.S",
-  "linux-x86/crypto/rc4/rc4-586.S",
   "linux-x86/crypto/sha/sha1-586.S",
   "linux-x86/crypto/sha/sha256-586.S",
   "linux-x86/crypto/sha/sha512-586.S",
@@ -365,7 +477,6 @@
   "linux-x86_64/crypto/modes/aesni-gcm-x86_64.S",
   "linux-x86_64/crypto/modes/ghash-x86_64.S",
   "linux-x86_64/crypto/rand/rdrand-x86_64.S",
-  "linux-x86_64/crypto/rc4/rc4-x86_64.S",
   "linux-x86_64/crypto/sha/sha1-x86_64.S",
   "linux-x86_64/crypto/sha/sha256-x86_64.S",
   "linux-x86_64/crypto/sha/sha512-x86_64.S",
@@ -382,7 +493,6 @@
   "mac-x86/crypto/chacha/chacha-x86.S",
   "mac-x86/crypto/md5/md5-586.S",
   "mac-x86/crypto/modes/ghash-x86.S",
-  "mac-x86/crypto/rc4/rc4-586.S",
   "mac-x86/crypto/sha/sha1-586.S",
   "mac-x86/crypto/sha/sha256-586.S",
   "mac-x86/crypto/sha/sha512-586.S",
@@ -403,7 +513,6 @@
   "mac-x86_64/crypto/modes/aesni-gcm-x86_64.S",
   "mac-x86_64/crypto/modes/ghash-x86_64.S",
   "mac-x86_64/crypto/rand/rdrand-x86_64.S",
-  "mac-x86_64/crypto/rc4/rc4-x86_64.S",
   "mac-x86_64/crypto/sha/sha1-x86_64.S",
   "mac-x86_64/crypto/sha/sha256-x86_64.S",
   "mac-x86_64/crypto/sha/sha512-x86_64.S",
@@ -420,7 +529,6 @@
   "win-x86/crypto/chacha/chacha-x86.asm",
   "win-x86/crypto/md5/md5-586.asm",
   "win-x86/crypto/modes/ghash-x86.asm",
-  "win-x86/crypto/rc4/rc4-586.asm",
   "win-x86/crypto/sha/sha1-586.asm",
   "win-x86/crypto/sha/sha256-586.asm",
   "win-x86/crypto/sha/sha512-586.asm",
@@ -441,7 +549,6 @@
   "win-x86_64/crypto/modes/aesni-gcm-x86_64.asm",
   "win-x86_64/crypto/modes/ghash-x86_64.asm",
   "win-x86_64/crypto/rand/rdrand-x86_64.asm",
-  "win-x86_64/crypto/rc4/rc4-x86_64.asm",
   "win-x86_64/crypto/sha/sha1-x86_64.asm",
   "win-x86_64/crypto/sha/sha256-x86_64.asm",
   "win-x86_64/crypto/sha/sha512-x86_64.asm",
@@ -454,5 +561,7 @@
   "privkey",
   "read_pem",
   "server",
+  "session",
   "spki",
+  "ssl_ctx_api",
 ]
diff --git a/BUILD.generated_tests.gni b/BUILD.generated_tests.gni
index 16bddff..2889b32 100644
--- a/BUILD.generated_tests.gni
+++ b/BUILD.generated_tests.gni
@@ -8,12 +8,10 @@
   "src/crypto/test/file_test.cc",
   "src/crypto/test/file_test.h",
   "src/crypto/test/malloc.cc",
-  "src/crypto/test/scoped_types.h",
   "src/crypto/test/test_util.cc",
   "src/crypto/test/test_util.h",
   "src/ssl/test/async_bio.h",
   "src/ssl/test/packeted_bio.h",
-  "src/ssl/test/scoped_types.h",
   "src/ssl/test/test_config.h",
 ]
 
@@ -246,6 +244,42 @@
     deps = invoker.deps
   }
 
+  executable("boringssl_p256-x86_64_test") {
+    sources = [
+      "src/crypto/ec/p256-x86_64_test.cc",
+    ]
+    sources += _test_support_sources
+    if (defined(invoker.configs_exclude)) {
+      configs -= invoker.configs_exclude
+    }
+    configs += invoker.configs
+    deps = invoker.deps
+  }
+
+  executable("boringssl_ecdh_test") {
+    sources = [
+      "src/crypto/ecdh/ecdh_test.cc",
+    ]
+    sources += _test_support_sources
+    if (defined(invoker.configs_exclude)) {
+      configs -= invoker.configs_exclude
+    }
+    configs += invoker.configs
+    deps = invoker.deps
+  }
+
+  executable("boringssl_ecdsa_sign_test") {
+    sources = [
+      "src/crypto/ecdsa/ecdsa_sign_test.cc",
+    ]
+    sources += _test_support_sources
+    if (defined(invoker.configs_exclude)) {
+      configs -= invoker.configs_exclude
+    }
+    configs += invoker.configs
+    deps = invoker.deps
+  }
+
   executable("boringssl_ecdsa_test") {
     sources = [
       "src/crypto/ecdsa/ecdsa_test.cc",
@@ -258,6 +292,18 @@
     deps = invoker.deps
   }
 
+  executable("boringssl_ecdsa_verify_test") {
+    sources = [
+      "src/crypto/ecdsa/ecdsa_verify_test.cc",
+    ]
+    sources += _test_support_sources
+    if (defined(invoker.configs_exclude)) {
+      configs -= invoker.configs_exclude
+    }
+    configs += invoker.configs
+    deps = invoker.deps
+  }
+
   executable("boringssl_err_test") {
     sources = [
       "src/crypto/err/err_test.cc",
@@ -344,43 +390,7 @@
 
   executable("boringssl_gcm_test") {
     sources = [
-      "src/crypto/modes/gcm_test.c",
-    ]
-    sources += _test_support_sources
-    if (defined(invoker.configs_exclude)) {
-      configs -= invoker.configs_exclude
-    }
-    configs += invoker.configs
-    deps = invoker.deps
-  }
-
-  executable("boringssl_newhope_statistical_test") {
-    sources = [
-      "src/crypto/newhope/newhope_statistical_test.cc",
-    ]
-    sources += _test_support_sources
-    if (defined(invoker.configs_exclude)) {
-      configs -= invoker.configs_exclude
-    }
-    configs += invoker.configs
-    deps = invoker.deps
-  }
-
-  executable("boringssl_newhope_test") {
-    sources = [
-      "src/crypto/newhope/newhope_test.cc",
-    ]
-    sources += _test_support_sources
-    if (defined(invoker.configs_exclude)) {
-      configs -= invoker.configs_exclude
-    }
-    configs += invoker.configs
-    deps = invoker.deps
-  }
-
-  executable("boringssl_newhope_vectors_test") {
-    sources = [
-      "src/crypto/newhope/newhope_vectors_test.cc",
+      "src/crypto/modes/gcm_test.cc",
     ]
     sources += _test_support_sources
     if (defined(invoker.configs_exclude)) {
@@ -438,6 +448,18 @@
     deps = invoker.deps
   }
 
+  executable("boringssl_pool_test") {
+    sources = [
+      "src/crypto/pool/pool_test.cc",
+    ]
+    sources += _test_support_sources
+    if (defined(invoker.configs_exclude)) {
+      configs -= invoker.configs_exclude
+    }
+    configs += invoker.configs
+    deps = invoker.deps
+  }
+
   executable("boringssl_refcount_test") {
     sources = [
       "src/crypto/refcount_test.c",
@@ -551,7 +573,10 @@
       ":boringssl_digest_test",
       ":boringssl_dsa_test",
       ":boringssl_ec_test",
+      ":boringssl_ecdh_test",
+      ":boringssl_ecdsa_sign_test",
       ":boringssl_ecdsa_test",
+      ":boringssl_ecdsa_verify_test",
       ":boringssl_ed25519_test",
       ":boringssl_err_test",
       ":boringssl_evp_extra_test",
@@ -561,15 +586,14 @@
       ":boringssl_hkdf_test",
       ":boringssl_hmac_test",
       ":boringssl_lhash_test",
-      ":boringssl_newhope_statistical_test",
-      ":boringssl_newhope_test",
-      ":boringssl_newhope_vectors_test",
       ":boringssl_obj_test",
+      ":boringssl_p256-x86_64_test",
       ":boringssl_pbkdf_test",
       ":boringssl_pkcs12_test",
       ":boringssl_pkcs7_test",
       ":boringssl_pkcs8_test",
       ":boringssl_poly1305_test",
+      ":boringssl_pool_test",
       ":boringssl_refcount_test",
       ":boringssl_rsa_test",
       ":boringssl_spake25519_test",
diff --git a/README b/README
index 2b8b5a5..b1c5469 100644
--- a/README
+++ b/README
@@ -1,6 +1,6 @@
 This repository contains the files generated by boringssl for its build.
-It also contains this file and the files BUILD.gn, boringssl_dart.gyp, and
-boringssl_configurations.gypi.
+It also contains this file and the files BUILD.gn, boringssl_dart.gyp,
+boringssl_configurations.gypi, and codreview.settings.
 
 The generated source is for boringssl from:
 
@@ -8,7 +8,7 @@
 
 at revision:
 
-8d343b44bbab829d1a28fdef650ca95f7db4412e
+d519bf6be0b447fb80fbc539d4bff4479b5482a2
 
 To roll boringssl forward, delete all but this file and the above mentioned
 files, checkout the new boringssl into a subdirectory called src/, and run the
diff --git a/boringssl.gypi b/boringssl.gypi
index 2c7e7c7..f0cf2f7 100644
--- a/boringssl.gypi
+++ b/boringssl.gypi
@@ -7,6 +7,10 @@
 {
   'variables': {
     'boringssl_ssl_sources': [
+      'src/include/openssl/dtls1.h',
+      'src/include/openssl/ssl.h',
+      'src/include/openssl/ssl3.h',
+      'src/include/openssl/tls1.h',
       'src/ssl/custom_extensions.c',
       'src/ssl/d1_both.c',
       'src/ssl/d1_lib.c',
@@ -16,6 +20,7 @@
       'src/ssl/dtls_record.c',
       'src/ssl/handshake_client.c',
       'src/ssl/handshake_server.c',
+      'src/ssl/internal.h',
       'src/ssl/s3_both.c',
       'src/ssl/s3_enc.c',
       'src/ssl/s3_lib.c',
@@ -33,16 +38,21 @@
       'src/ssl/ssl_stat.c',
       'src/ssl/t1_enc.c',
       'src/ssl/t1_lib.c',
+      'src/ssl/tls13_both.c',
+      'src/ssl/tls13_client.c',
+      'src/ssl/tls13_enc.c',
+      'src/ssl/tls13_server.c',
       'src/ssl/tls_method.c',
       'src/ssl/tls_record.c',
     ],
     'boringssl_crypto_sources': [
       'err_data.c',
       'src/crypto/aes/aes.c',
+      'src/crypto/aes/internal.h',
+      'src/crypto/aes/key_wrap.c',
       'src/crypto/aes/mode_wrappers.c',
       'src/crypto/asn1/a_bitstr.c',
       'src/crypto/asn1/a_bool.c',
-      'src/crypto/asn1/a_bytes.c',
       'src/crypto/asn1/a_d2i_fp.c',
       'src/crypto/asn1/a_dup.c',
       'src/crypto/asn1/a_enum.c',
@@ -59,6 +69,7 @@
       'src/crypto/asn1/a_utctm.c',
       'src/crypto/asn1/a_utf8.c',
       'src/crypto/asn1/asn1_lib.c',
+      'src/crypto/asn1/asn1_locl.h',
       'src/crypto/asn1/asn1_par.c',
       'src/crypto/asn1/asn_pack.c',
       'src/crypto/asn1/f_enum.c',
@@ -81,6 +92,7 @@
       'src/crypto/bio/fd.c',
       'src/crypto/bio/file.c',
       'src/crypto/bio/hexdump.c',
+      'src/crypto/bio/internal.h',
       'src/crypto/bio/pair.c',
       'src/crypto/bio/printf.c',
       'src/crypto/bio/socket.c',
@@ -96,12 +108,15 @@
       'src/crypto/bn/exponentiation.c',
       'src/crypto/bn/gcd.c',
       'src/crypto/bn/generic.c',
+      'src/crypto/bn/internal.h',
       'src/crypto/bn/kronecker.c',
       'src/crypto/bn/montgomery.c',
+      'src/crypto/bn/montgomery_inv.c',
       'src/crypto/bn/mul.c',
       'src/crypto/bn/prime.c',
       'src/crypto/bn/random.c',
       'src/crypto/bn/rsaz_exp.c',
+      'src/crypto/bn/rsaz_exp.h',
       'src/crypto/bn/shift.c',
       'src/crypto/bn/sqrt.c',
       'src/crypto/buf/buf.c',
@@ -109,6 +124,7 @@
       'src/crypto/bytestring/ber.c',
       'src/crypto/bytestring/cbb.c',
       'src/crypto/bytestring/cbs.c',
+      'src/crypto/bytestring/internal.h',
       'src/crypto/chacha/chacha.c',
       'src/crypto/cipher/aead.c',
       'src/crypto/cipher/cipher.c',
@@ -121,34 +137,45 @@
       'src/crypto/cipher/e_rc4.c',
       'src/crypto/cipher/e_ssl3.c',
       'src/crypto/cipher/e_tls.c',
+      'src/crypto/cipher/internal.h',
       'src/crypto/cipher/tls_cbc.c',
       'src/crypto/cmac/cmac.c',
       'src/crypto/conf/conf.c',
+      'src/crypto/conf/conf_def.h',
+      'src/crypto/conf/internal.h',
       'src/crypto/cpu-aarch64-linux.c',
       'src/crypto/cpu-arm-linux.c',
       'src/crypto/cpu-arm.c',
       'src/crypto/cpu-intel.c',
+      'src/crypto/cpu-ppc64le.c',
       'src/crypto/crypto.c',
       'src/crypto/curve25519/curve25519.c',
+      'src/crypto/curve25519/internal.h',
       'src/crypto/curve25519/spake25519.c',
       'src/crypto/curve25519/x25519-x86_64.c',
       'src/crypto/des/des.c',
+      'src/crypto/des/internal.h',
       'src/crypto/dh/check.c',
       'src/crypto/dh/dh.c',
       'src/crypto/dh/dh_asn1.c',
       'src/crypto/dh/params.c',
       'src/crypto/digest/digest.c',
       'src/crypto/digest/digests.c',
+      'src/crypto/digest/internal.h',
+      'src/crypto/digest/md32_common.h',
       'src/crypto/dsa/dsa.c',
       'src/crypto/dsa/dsa_asn1.c',
       'src/crypto/ec/ec.c',
       'src/crypto/ec/ec_asn1.c',
       'src/crypto/ec/ec_key.c',
       'src/crypto/ec/ec_montgomery.c',
+      'src/crypto/ec/internal.h',
       'src/crypto/ec/oct.c',
       'src/crypto/ec/p224-64.c',
       'src/crypto/ec/p256-64.c',
+      'src/crypto/ec/p256-x86_64-table.h',
       'src/crypto/ec/p256-x86_64.c',
+      'src/crypto/ec/p256-x86_64.h',
       'src/crypto/ec/simple.c',
       'src/crypto/ec/util-64.c',
       'src/crypto/ec/wnaf.c',
@@ -161,6 +188,7 @@
       'src/crypto/evp/evp.c',
       'src/crypto/evp/evp_asn1.c',
       'src/crypto/evp/evp_ctx.c',
+      'src/crypto/evp/internal.h',
       'src/crypto/evp/p_dsa_asn1.c',
       'src/crypto/evp/p_ec.c',
       'src/crypto/evp/p_ec_asn1.c',
@@ -172,6 +200,7 @@
       'src/crypto/ex_data.c',
       'src/crypto/hkdf/hkdf.c',
       'src/crypto/hmac/hmac.c',
+      'src/crypto/internal.h',
       'src/crypto/lhash/lhash.c',
       'src/crypto/md4/md4.c',
       'src/crypto/md5/md5.c',
@@ -180,15 +209,13 @@
       'src/crypto/modes/cfb.c',
       'src/crypto/modes/ctr.c',
       'src/crypto/modes/gcm.c',
+      'src/crypto/modes/internal.h',
       'src/crypto/modes/ofb.c',
-      'src/crypto/newhope/error_correction.c',
-      'src/crypto/newhope/newhope.c',
-      'src/crypto/newhope/ntt.c',
-      'src/crypto/newhope/poly.c',
-      'src/crypto/newhope/precomp.c',
-      'src/crypto/newhope/reduce.c',
+      'src/crypto/modes/polyval.c',
       'src/crypto/obj/obj.c',
+      'src/crypto/obj/obj_dat.h',
       'src/crypto/obj/obj_xref.c',
+      'src/crypto/obj/obj_xref.h',
       'src/crypto/pem/pem_all.c',
       'src/crypto/pem/pem_info.c',
       'src/crypto/pem/pem_lib.c',
@@ -197,14 +224,19 @@
       'src/crypto/pem/pem_pkey.c',
       'src/crypto/pem/pem_x509.c',
       'src/crypto/pem/pem_xaux.c',
+      'src/crypto/pkcs8/internal.h',
       'src/crypto/pkcs8/p5_pbe.c',
       'src/crypto/pkcs8/p5_pbev2.c',
       'src/crypto/pkcs8/p8_pkey.c',
       'src/crypto/pkcs8/pkcs8.c',
+      'src/crypto/poly1305/internal.h',
       'src/crypto/poly1305/poly1305.c',
       'src/crypto/poly1305/poly1305_arm.c',
       'src/crypto/poly1305/poly1305_vec.c',
+      'src/crypto/pool/internal.h',
+      'src/crypto/pool/pool.c',
       'src/crypto/rand/deterministic.c',
+      'src/crypto/rand/internal.h',
       'src/crypto/rand/rand.c',
       'src/crypto/rand/urandom.c',
       'src/crypto/rand/windows.c',
@@ -212,10 +244,12 @@
       'src/crypto/refcount_c11.c',
       'src/crypto/refcount_lock.c',
       'src/crypto/rsa/blinding.c',
+      'src/crypto/rsa/internal.h',
       'src/crypto/rsa/padding.c',
       'src/crypto/rsa/rsa.c',
       'src/crypto/rsa/rsa_asn1.c',
       'src/crypto/rsa/rsa_impl.c',
+      'src/crypto/sha/sha1-altivec.c',
       'src/crypto/sha/sha1.c',
       'src/crypto/sha/sha256.c',
       'src/crypto/sha/sha512.c',
@@ -233,13 +267,16 @@
       'src/crypto/x509/asn1_gen.c',
       'src/crypto/x509/by_dir.c',
       'src/crypto/x509/by_file.c',
+      'src/crypto/x509/charmap.h',
       'src/crypto/x509/i2d_pr.c',
+      'src/crypto/x509/internal.h',
       'src/crypto/x509/pkcs7.c',
       'src/crypto/x509/rsa_pss.c',
       'src/crypto/x509/t_crl.c',
       'src/crypto/x509/t_req.c',
       'src/crypto/x509/t_x509.c',
       'src/crypto/x509/t_x509a.c',
+      'src/crypto/x509/vpm_int.h',
       'src/crypto/x509/x509.c',
       'src/crypto/x509/x509_att.c',
       'src/crypto/x509/x509_cmp.c',
@@ -276,8 +313,10 @@
       'src/crypto/x509/x_val.c',
       'src/crypto/x509/x_x509.c',
       'src/crypto/x509/x_x509a.c',
+      'src/crypto/x509v3/ext_dat.h',
       'src/crypto/x509v3/pcy_cache.c',
       'src/crypto/x509v3/pcy_data.c',
+      'src/crypto/x509v3/pcy_int.h',
       'src/crypto/x509v3/pcy_lib.c',
       'src/crypto/x509v3/pcy_map.c',
       'src/crypto/x509v3/pcy_node.c',
@@ -308,6 +347,75 @@
       'src/crypto/x509v3/v3_skey.c',
       'src/crypto/x509v3/v3_sxnet.c',
       'src/crypto/x509v3/v3_utl.c',
+      'src/include/openssl/aead.h',
+      'src/include/openssl/aes.h',
+      'src/include/openssl/arm_arch.h',
+      'src/include/openssl/asn1.h',
+      'src/include/openssl/asn1_mac.h',
+      'src/include/openssl/asn1t.h',
+      'src/include/openssl/base.h',
+      'src/include/openssl/base64.h',
+      'src/include/openssl/bio.h',
+      'src/include/openssl/blowfish.h',
+      'src/include/openssl/bn.h',
+      'src/include/openssl/buf.h',
+      'src/include/openssl/buffer.h',
+      'src/include/openssl/bytestring.h',
+      'src/include/openssl/cast.h',
+      'src/include/openssl/chacha.h',
+      'src/include/openssl/cipher.h',
+      'src/include/openssl/cmac.h',
+      'src/include/openssl/conf.h',
+      'src/include/openssl/cpu.h',
+      'src/include/openssl/crypto.h',
+      'src/include/openssl/curve25519.h',
+      'src/include/openssl/des.h',
+      'src/include/openssl/dh.h',
+      'src/include/openssl/digest.h',
+      'src/include/openssl/dsa.h',
+      'src/include/openssl/ec.h',
+      'src/include/openssl/ec_key.h',
+      'src/include/openssl/ecdh.h',
+      'src/include/openssl/ecdsa.h',
+      'src/include/openssl/engine.h',
+      'src/include/openssl/err.h',
+      'src/include/openssl/evp.h',
+      'src/include/openssl/ex_data.h',
+      'src/include/openssl/hkdf.h',
+      'src/include/openssl/hmac.h',
+      'src/include/openssl/lhash.h',
+      'src/include/openssl/lhash_macros.h',
+      'src/include/openssl/md4.h',
+      'src/include/openssl/md5.h',
+      'src/include/openssl/mem.h',
+      'src/include/openssl/nid.h',
+      'src/include/openssl/obj.h',
+      'src/include/openssl/obj_mac.h',
+      'src/include/openssl/objects.h',
+      'src/include/openssl/opensslconf.h',
+      'src/include/openssl/opensslv.h',
+      'src/include/openssl/ossl_typ.h',
+      'src/include/openssl/pem.h',
+      'src/include/openssl/pkcs12.h',
+      'src/include/openssl/pkcs7.h',
+      'src/include/openssl/pkcs8.h',
+      'src/include/openssl/poly1305.h',
+      'src/include/openssl/pool.h',
+      'src/include/openssl/rand.h',
+      'src/include/openssl/rc4.h',
+      'src/include/openssl/ripemd.h',
+      'src/include/openssl/rsa.h',
+      'src/include/openssl/safestack.h',
+      'src/include/openssl/sha.h',
+      'src/include/openssl/srtp.h',
+      'src/include/openssl/stack.h',
+      'src/include/openssl/stack_macros.h',
+      'src/include/openssl/thread.h',
+      'src/include/openssl/time_support.h',
+      'src/include/openssl/type_check.h',
+      'src/include/openssl/x509.h',
+      'src/include/openssl/x509_vfy.h',
+      'src/include/openssl/x509v3.h',
     ],
     'boringssl_linux_aarch64_sources': [
       'linux-aarch64/crypto/aes/aesv8-armx64.S',
@@ -332,6 +440,10 @@
       'src/crypto/curve25519/asm/x25519-asm-arm.S',
       'src/crypto/poly1305/poly1305_arm_asm.S',
     ],
+    'boringssl_linux_ppc64le_sources': [
+      'linux-ppc64le/crypto/aes/aesp8-ppc.S',
+      'linux-ppc64le/crypto/modes/ghashp8-ppc.S',
+    ],
     'boringssl_linux_x86_sources': [
       'linux-x86/crypto/aes/aes-586.S',
       'linux-x86/crypto/aes/aesni-x86.S',
@@ -342,7 +454,6 @@
       'linux-x86/crypto/chacha/chacha-x86.S',
       'linux-x86/crypto/md5/md5-586.S',
       'linux-x86/crypto/modes/ghash-x86.S',
-      'linux-x86/crypto/rc4/rc4-586.S',
       'linux-x86/crypto/sha/sha1-586.S',
       'linux-x86/crypto/sha/sha256-586.S',
       'linux-x86/crypto/sha/sha512-586.S',
@@ -362,7 +473,6 @@
       'linux-x86_64/crypto/modes/aesni-gcm-x86_64.S',
       'linux-x86_64/crypto/modes/ghash-x86_64.S',
       'linux-x86_64/crypto/rand/rdrand-x86_64.S',
-      'linux-x86_64/crypto/rc4/rc4-x86_64.S',
       'linux-x86_64/crypto/sha/sha1-x86_64.S',
       'linux-x86_64/crypto/sha/sha256-x86_64.S',
       'linux-x86_64/crypto/sha/sha512-x86_64.S',
@@ -378,7 +488,6 @@
       'mac-x86/crypto/chacha/chacha-x86.S',
       'mac-x86/crypto/md5/md5-586.S',
       'mac-x86/crypto/modes/ghash-x86.S',
-      'mac-x86/crypto/rc4/rc4-586.S',
       'mac-x86/crypto/sha/sha1-586.S',
       'mac-x86/crypto/sha/sha256-586.S',
       'mac-x86/crypto/sha/sha512-586.S',
@@ -398,7 +507,6 @@
       'mac-x86_64/crypto/modes/aesni-gcm-x86_64.S',
       'mac-x86_64/crypto/modes/ghash-x86_64.S',
       'mac-x86_64/crypto/rand/rdrand-x86_64.S',
-      'mac-x86_64/crypto/rc4/rc4-x86_64.S',
       'mac-x86_64/crypto/sha/sha1-x86_64.S',
       'mac-x86_64/crypto/sha/sha256-x86_64.S',
       'mac-x86_64/crypto/sha/sha512-x86_64.S',
@@ -414,7 +522,6 @@
       'win-x86/crypto/chacha/chacha-x86.asm',
       'win-x86/crypto/md5/md5-586.asm',
       'win-x86/crypto/modes/ghash-x86.asm',
-      'win-x86/crypto/rc4/rc4-586.asm',
       'win-x86/crypto/sha/sha1-586.asm',
       'win-x86/crypto/sha/sha256-586.asm',
       'win-x86/crypto/sha/sha512-586.asm',
@@ -434,7 +541,6 @@
       'win-x86_64/crypto/modes/aesni-gcm-x86_64.asm',
       'win-x86_64/crypto/modes/ghash-x86_64.asm',
       'win-x86_64/crypto/rand/rdrand-x86_64.asm',
-      'win-x86_64/crypto/rc4/rc4-x86_64.asm',
       'win-x86_64/crypto/sha/sha1-x86_64.asm',
       'win-x86_64/crypto/sha/sha256-x86_64.asm',
       'win-x86_64/crypto/sha/sha512-x86_64.asm',
diff --git a/boringssl_tests.gypi b/boringssl_tests.gypi
index 1076214..36d428d 100644
--- a/boringssl_tests.gypi
+++ b/boringssl_tests.gypi
@@ -273,6 +273,48 @@
       'msvs_disabled_warnings': [ 4267, ],
     },
     {
+      'target_name': 'boringssl_p256-x86_64_test',
+      'type': 'executable',
+      'dependencies': [
+        'boringssl.gyp:boringssl',
+      ],
+      'sources': [
+        'src/crypto/ec/p256-x86_64_test.cc',
+        '<@(boringssl_test_support_sources)',
+      ],
+      # TODO(davidben): Fix size_t truncations in BoringSSL.
+      # https://crbug.com/429039
+      'msvs_disabled_warnings': [ 4267, ],
+    },
+    {
+      'target_name': 'boringssl_ecdh_test',
+      'type': 'executable',
+      'dependencies': [
+        'boringssl.gyp:boringssl',
+      ],
+      'sources': [
+        'src/crypto/ecdh/ecdh_test.cc',
+        '<@(boringssl_test_support_sources)',
+      ],
+      # TODO(davidben): Fix size_t truncations in BoringSSL.
+      # https://crbug.com/429039
+      'msvs_disabled_warnings': [ 4267, ],
+    },
+    {
+      'target_name': 'boringssl_ecdsa_sign_test',
+      'type': 'executable',
+      'dependencies': [
+        'boringssl.gyp:boringssl',
+      ],
+      'sources': [
+        'src/crypto/ecdsa/ecdsa_sign_test.cc',
+        '<@(boringssl_test_support_sources)',
+      ],
+      # TODO(davidben): Fix size_t truncations in BoringSSL.
+      # https://crbug.com/429039
+      'msvs_disabled_warnings': [ 4267, ],
+    },
+    {
       'target_name': 'boringssl_ecdsa_test',
       'type': 'executable',
       'dependencies': [
@@ -287,6 +329,20 @@
       'msvs_disabled_warnings': [ 4267, ],
     },
     {
+      'target_name': 'boringssl_ecdsa_verify_test',
+      'type': 'executable',
+      'dependencies': [
+        'boringssl.gyp:boringssl',
+      ],
+      'sources': [
+        'src/crypto/ecdsa/ecdsa_verify_test.cc',
+        '<@(boringssl_test_support_sources)',
+      ],
+      # TODO(davidben): Fix size_t truncations in BoringSSL.
+      # https://crbug.com/429039
+      'msvs_disabled_warnings': [ 4267, ],
+    },
+    {
       'target_name': 'boringssl_err_test',
       'type': 'executable',
       'dependencies': [
@@ -391,49 +447,7 @@
         'boringssl.gyp:boringssl',
       ],
       'sources': [
-        'src/crypto/modes/gcm_test.c',
-        '<@(boringssl_test_support_sources)',
-      ],
-      # TODO(davidben): Fix size_t truncations in BoringSSL.
-      # https://crbug.com/429039
-      'msvs_disabled_warnings': [ 4267, ],
-    },
-    {
-      'target_name': 'boringssl_newhope_statistical_test',
-      'type': 'executable',
-      'dependencies': [
-        'boringssl.gyp:boringssl',
-      ],
-      'sources': [
-        'src/crypto/newhope/newhope_statistical_test.cc',
-        '<@(boringssl_test_support_sources)',
-      ],
-      # TODO(davidben): Fix size_t truncations in BoringSSL.
-      # https://crbug.com/429039
-      'msvs_disabled_warnings': [ 4267, ],
-    },
-    {
-      'target_name': 'boringssl_newhope_test',
-      'type': 'executable',
-      'dependencies': [
-        'boringssl.gyp:boringssl',
-      ],
-      'sources': [
-        'src/crypto/newhope/newhope_test.cc',
-        '<@(boringssl_test_support_sources)',
-      ],
-      # TODO(davidben): Fix size_t truncations in BoringSSL.
-      # https://crbug.com/429039
-      'msvs_disabled_warnings': [ 4267, ],
-    },
-    {
-      'target_name': 'boringssl_newhope_vectors_test',
-      'type': 'executable',
-      'dependencies': [
-        'boringssl.gyp:boringssl',
-      ],
-      'sources': [
-        'src/crypto/newhope/newhope_vectors_test.cc',
+        'src/crypto/modes/gcm_test.cc',
         '<@(boringssl_test_support_sources)',
       ],
       # TODO(davidben): Fix size_t truncations in BoringSSL.
@@ -497,6 +511,20 @@
       'msvs_disabled_warnings': [ 4267, ],
     },
     {
+      'target_name': 'boringssl_pool_test',
+      'type': 'executable',
+      'dependencies': [
+        'boringssl.gyp:boringssl',
+      ],
+      'sources': [
+        'src/crypto/pool/pool_test.cc',
+        '<@(boringssl_test_support_sources)',
+      ],
+      # TODO(davidben): Fix size_t truncations in BoringSSL.
+      # https://crbug.com/429039
+      'msvs_disabled_warnings': [ 4267, ],
+    },
+    {
       'target_name': 'boringssl_refcount_test',
       'type': 'executable',
       'dependencies': [
@@ -614,12 +642,10 @@
       'src/crypto/test/file_test.cc',
       'src/crypto/test/file_test.h',
       'src/crypto/test/malloc.cc',
-      'src/crypto/test/scoped_types.h',
       'src/crypto/test/test_util.cc',
       'src/crypto/test/test_util.h',
       'src/ssl/test/async_bio.h',
       'src/ssl/test/packeted_bio.h',
-      'src/ssl/test/scoped_types.h',
       'src/ssl/test/test_config.h',
     ],
     'boringssl_test_targets': [
@@ -638,7 +664,10 @@
       'boringssl_digest_test',
       'boringssl_dsa_test',
       'boringssl_ec_test',
+      'boringssl_ecdh_test',
+      'boringssl_ecdsa_sign_test',
       'boringssl_ecdsa_test',
+      'boringssl_ecdsa_verify_test',
       'boringssl_ed25519_test',
       'boringssl_err_test',
       'boringssl_evp_extra_test',
@@ -648,15 +677,14 @@
       'boringssl_hkdf_test',
       'boringssl_hmac_test',
       'boringssl_lhash_test',
-      'boringssl_newhope_statistical_test',
-      'boringssl_newhope_test',
-      'boringssl_newhope_vectors_test',
       'boringssl_obj_test',
+      'boringssl_p256-x86_64_test',
       'boringssl_pbkdf_test',
       'boringssl_pkcs12_test',
       'boringssl_pkcs7_test',
       'boringssl_pkcs8_test',
       'boringssl_poly1305_test',
+      'boringssl_pool_test',
       'boringssl_refcount_test',
       'boringssl_rsa_test',
       'boringssl_spake25519_test',
diff --git a/codereview.settings b/codereview.settings
new file mode 100644
index 0000000..ccbccd5
--- /dev/null
+++ b/codereview.settings
@@ -0,0 +1,4 @@
+# This file is used by gcl to get repository specific information.
+CODE_REVIEW_SERVER: http://codereview.chromium.org
+VIEW_VC: https://github.com/dart-lang/boringssl_gen/commit/
+CC_LIST: reviews@dartlang.org
diff --git a/err_data.c b/err_data.c
index d685679..e75a0ca 100644
--- a/err_data.c
+++ b/err_data.c
@@ -178,42 +178,42 @@
     0x28340c19,
     0x283480ac,
     0x283500ea,
-    0x2c3227cb,
-    0x2c32a7d9,
-    0x2c3327eb,
-    0x2c33a7fd,
-    0x2c342811,
-    0x2c34a823,
-    0x2c35283e,
-    0x2c35a850,
-    0x2c362863,
+    0x2c32299a,
+    0x2c32a9a8,
+    0x2c3329ba,
+    0x2c33a9cc,
+    0x2c3429e0,
+    0x2c34a9f2,
+    0x2c352a0d,
+    0x2c35aa1f,
+    0x2c362a32,
     0x2c36832d,
-    0x2c372870,
-    0x2c37a882,
-    0x2c382895,
-    0x2c38a8ac,
-    0x2c3928ba,
-    0x2c39a8ca,
-    0x2c3a28dc,
-    0x2c3aa8f0,
-    0x2c3b2901,
-    0x2c3ba920,
-    0x2c3c2934,
-    0x2c3ca94a,
-    0x2c3d2963,
-    0x2c3da980,
-    0x2c3e2991,
-    0x2c3ea99f,
-    0x2c3f29b7,
-    0x2c3fa9cf,
-    0x2c4029dc,
+    0x2c372a3f,
+    0x2c37aa51,
+    0x2c382a64,
+    0x2c38aa7b,
+    0x2c392a89,
+    0x2c39aa99,
+    0x2c3a2aab,
+    0x2c3aaabf,
+    0x2c3b2ad0,
+    0x2c3baaef,
+    0x2c3c2b03,
+    0x2c3cab19,
+    0x2c3d2b32,
+    0x2c3dab4f,
+    0x2c3e2b60,
+    0x2c3eab6e,
+    0x2c3f2b86,
+    0x2c3fab9e,
+    0x2c402bab,
     0x2c4090e7,
-    0x2c4129ed,
-    0x2c41aa00,
+    0x2c412bbc,
+    0x2c41abcf,
     0x2c4210c0,
-    0x2c42aa11,
+    0x2c42abe0,
     0x2c430720,
-    0x2c43a912,
+    0x2c43aae1,
     0x30320000,
     0x30328015,
     0x3033001f,
@@ -366,169 +366,188 @@
     0x403b9861,
     0x403c0064,
     0x403c8083,
-    0x403d186d,
-    0x403d9883,
-    0x403e1892,
-    0x403e98a5,
-    0x403f18bf,
-    0x403f98cd,
-    0x404018e2,
-    0x404098f6,
-    0x40411913,
-    0x4041992e,
-    0x40421947,
-    0x4042995a,
-    0x4043196e,
-    0x40439986,
-    0x4044199d,
+    0x403d18aa,
+    0x403d98c0,
+    0x403e18cf,
+    0x403e98e2,
+    0x403f18fc,
+    0x403f990a,
+    0x4040191f,
+    0x40409933,
+    0x40411950,
+    0x4041996b,
+    0x40421984,
+    0x40429997,
+    0x404319ab,
+    0x404399c3,
+    0x404419da,
     0x404480ac,
-    0x404519b2,
-    0x404599c4,
-    0x404619e8,
-    0x40469a08,
-    0x40471a16,
-    0x40479a3d,
-    0x40481a52,
-    0x40489a6b,
-    0x40491a82,
-    0x40499a9c,
-    0x404a1ab3,
-    0x404a9ad1,
-    0x404b1ae9,
-    0x404b9b00,
-    0x404c1b16,
-    0x404c9b28,
-    0x404d1b49,
-    0x404d9b6b,
-    0x404e1b7f,
-    0x404e9b8c,
-    0x404f1ba3,
-    0x404f9bb3,
-    0x40501bdd,
-    0x40509bf1,
-    0x40511c0c,
-    0x40519c1c,
-    0x40521c33,
-    0x40529c45,
-    0x40531c5d,
-    0x40539c70,
-    0x40541c85,
-    0x40549ca8,
-    0x40551cb6,
-    0x40559cd3,
-    0x40561ce0,
-    0x40569cf9,
-    0x40571d11,
-    0x40579d24,
-    0x40581d39,
-    0x40589d4b,
-    0x40591d7a,
-    0x40599d93,
-    0x405a1da7,
-    0x405a9db7,
-    0x405b1dcf,
-    0x405b9de0,
-    0x405c1df3,
-    0x405c9e04,
-    0x405d1e11,
-    0x405d9e28,
-    0x405e1e48,
+    0x404519ef,
+    0x40459a01,
+    0x40461a25,
+    0x40469a45,
+    0x40471a53,
+    0x40479a7a,
+    0x40481ab7,
+    0x40489ad0,
+    0x40491ae7,
+    0x40499b01,
+    0x404a1b18,
+    0x404a9b36,
+    0x404b1b4e,
+    0x404b9b65,
+    0x404c1b7b,
+    0x404c9b8d,
+    0x404d1bae,
+    0x404d9bd0,
+    0x404e1be4,
+    0x404e9bf1,
+    0x404f1c1e,
+    0x404f9c47,
+    0x40501c82,
+    0x40509c96,
+    0x40511cb1,
+    0x40519cc1,
+    0x40521cd8,
+    0x40529cfc,
+    0x40531d14,
+    0x40539d27,
+    0x40541d3c,
+    0x40549d5f,
+    0x40551d6d,
+    0x40559d8a,
+    0x40561d97,
+    0x40569db0,
+    0x40571dc8,
+    0x40579ddb,
+    0x40581df0,
+    0x40589e17,
+    0x40591e46,
+    0x40599e73,
+    0x405a1e87,
+    0x405a9e97,
+    0x405b1eaf,
+    0x405b9ec0,
+    0x405c1ed3,
+    0x405c9ef4,
+    0x405d1f01,
+    0x405d9f18,
+    0x405e1f56,
     0x405e8a95,
-    0x405f1e69,
-    0x405f9e76,
-    0x40601e84,
-    0x40609ea6,
-    0x40611ece,
-    0x40619ee3,
-    0x40621efa,
-    0x40629f0b,
-    0x40631f1c,
-    0x40639f31,
-    0x40641f48,
-    0x40649f59,
-    0x40651f74,
-    0x40659f8b,
-    0x40661fa3,
-    0x40669fcd,
-    0x40671ff8,
-    0x4067a019,
-    0x4068202c,
-    0x4068a04d,
-    0x4069207f,
-    0x4069a0ad,
-    0x406a20ce,
-    0x406aa0ee,
-    0x406b2276,
-    0x406ba299,
-    0x406c22af,
-    0x406ca4db,
-    0x406d250a,
-    0x406da532,
-    0x406e254b,
-    0x406ea563,
-    0x406f2582,
-    0x406fa597,
-    0x407025aa,
-    0x4070a5c7,
+    0x405f1f77,
+    0x405f9f84,
+    0x40601f92,
+    0x40609fb4,
+    0x40611ff8,
+    0x4061a030,
+    0x40622047,
+    0x4062a058,
+    0x40632069,
+    0x4063a07e,
+    0x40642095,
+    0x4064a0c1,
+    0x406520dc,
+    0x4065a0f3,
+    0x4066210b,
+    0x4066a135,
+    0x40672160,
+    0x4067a181,
+    0x40682194,
+    0x4068a1b5,
+    0x406921e7,
+    0x4069a215,
+    0x406a2236,
+    0x406aa256,
+    0x406b23de,
+    0x406ba401,
+    0x406c2417,
+    0x406ca679,
+    0x406d26a8,
+    0x406da6d0,
+    0x406e26fe,
+    0x406ea732,
+    0x406f2751,
+    0x406fa766,
+    0x40702779,
+    0x4070a796,
     0x40710800,
-    0x4071a5d9,
-    0x407225ec,
-    0x4072a605,
-    0x4073261d,
+    0x4071a7a8,
+    0x407227bb,
+    0x4072a7d4,
+    0x407327ec,
     0x4073936d,
-    0x40742631,
-    0x4074a64b,
-    0x4075265c,
-    0x4075a670,
-    0x4076267e,
+    0x40742800,
+    0x4074a81a,
+    0x4075282b,
+    0x4075a83f,
+    0x4076284d,
     0x407691aa,
-    0x407726a3,
-    0x4077a6c5,
-    0x407826e0,
-    0x4078a719,
-    0x40792730,
-    0x4079a746,
-    0x407a2752,
-    0x407aa765,
-    0x407b277a,
-    0x407ba78c,
-    0x407c27a1,
-    0x407ca7aa,
-    0x407d2068,
-    0x407d9bc3,
-    0x407e26f5,
-    0x407e9d5b,
-    0x407f1a2a,
-    0x41f421a1,
-    0x41f92233,
-    0x41fe2126,
-    0x41fea302,
-    0x41ff23f3,
-    0x420321ba,
-    0x420821dc,
-    0x4208a218,
-    0x4209210a,
-    0x4209a252,
-    0x420a2161,
-    0x420aa141,
-    0x420b2181,
-    0x420ba1fa,
-    0x420c240f,
-    0x420ca2cf,
-    0x420d22e9,
-    0x420da320,
-    0x4212233a,
-    0x421723d6,
-    0x4217a37c,
-    0x421c239e,
-    0x421f2359,
-    0x42212426,
-    0x422623b9,
-    0x422b24bf,
-    0x422ba488,
-    0x422c24a7,
-    0x422ca462,
-    0x422d2441,
+    0x40772872,
+    0x4077a894,
+    0x407828af,
+    0x4078a8e8,
+    0x407928ff,
+    0x4079a915,
+    0x407a2921,
+    0x407aa934,
+    0x407b2949,
+    0x407ba95b,
+    0x407c2970,
+    0x407ca979,
+    0x407d21d0,
+    0x407d9c57,
+    0x407e28c4,
+    0x407e9e27,
+    0x407f1a67,
+    0x407f9887,
+    0x40801c2e,
+    0x40809a8f,
+    0x40811cea,
+    0x40819c08,
+    0x408226e9,
+    0x4082986d,
+    0x40831e02,
+    0x4083a0a6,
+    0x40841aa3,
+    0x40849e5f,
+    0x40851ee4,
+    0x40859fdc,
+    0x40861f38,
+    0x40869c71,
+    0x40872716,
+    0x4087a00d,
+    0x41f42309,
+    0x41f9239b,
+    0x41fe228e,
+    0x41fea46a,
+    0x41ff255b,
+    0x42032322,
+    0x42082344,
+    0x4208a380,
+    0x42092272,
+    0x4209a3ba,
+    0x420a22c9,
+    0x420aa2a9,
+    0x420b22e9,
+    0x420ba362,
+    0x420c2577,
+    0x420ca437,
+    0x420d2451,
+    0x420da488,
+    0x421224a2,
+    0x4217253e,
+    0x4217a4e4,
+    0x421c2506,
+    0x421f24c1,
+    0x4221258e,
+    0x42262521,
+    0x422b265d,
+    0x422ba60b,
+    0x422c2645,
+    0x422ca5ca,
+    0x422d25a9,
+    0x422da62a,
+    0x422e25f0,
     0x4432072b,
     0x4432873a,
     0x44330746,
@@ -571,69 +590,69 @@
     0x4c3d136d,
     0x4c3d937c,
     0x4c3e1389,
-    0x50322a23,
-    0x5032aa32,
-    0x50332a3d,
-    0x5033aa4d,
-    0x50342a66,
-    0x5034aa80,
-    0x50352a8e,
-    0x5035aaa4,
-    0x50362ab6,
-    0x5036aacc,
-    0x50372ae5,
-    0x5037aaf8,
-    0x50382b10,
-    0x5038ab21,
-    0x50392b36,
-    0x5039ab4a,
-    0x503a2b6a,
-    0x503aab80,
-    0x503b2b98,
-    0x503babaa,
-    0x503c2bc6,
-    0x503cabdd,
-    0x503d2bf6,
-    0x503dac0c,
-    0x503e2c19,
-    0x503eac2f,
-    0x503f2c41,
+    0x50322bf2,
+    0x5032ac01,
+    0x50332c0c,
+    0x5033ac1c,
+    0x50342c35,
+    0x5034ac4f,
+    0x50352c5d,
+    0x5035ac73,
+    0x50362c85,
+    0x5036ac9b,
+    0x50372cb4,
+    0x5037acc7,
+    0x50382cdf,
+    0x5038acf0,
+    0x50392d05,
+    0x5039ad19,
+    0x503a2d39,
+    0x503aad4f,
+    0x503b2d67,
+    0x503bad79,
+    0x503c2d95,
+    0x503cadac,
+    0x503d2dc5,
+    0x503daddb,
+    0x503e2de8,
+    0x503eadfe,
+    0x503f2e10,
     0x503f8382,
-    0x50402c54,
-    0x5040ac64,
-    0x50412c7e,
-    0x5041ac8d,
-    0x50422ca7,
-    0x5042acc4,
-    0x50432cd4,
-    0x5043ace4,
-    0x50442cf3,
+    0x50402e23,
+    0x5040ae33,
+    0x50412e4d,
+    0x5041ae5c,
+    0x50422e76,
+    0x5042ae93,
+    0x50432ea3,
+    0x5043aeb3,
+    0x50442ec2,
     0x5044843f,
-    0x50452d07,
-    0x5045ad25,
-    0x50462d38,
-    0x5046ad4e,
-    0x50472d60,
-    0x5047ad75,
-    0x50482d9b,
-    0x5048ada9,
-    0x50492dbc,
-    0x5049add1,
-    0x504a2de7,
-    0x504aadf7,
-    0x504b2e17,
-    0x504bae2a,
-    0x504c2e4d,
-    0x504cae7b,
-    0x504d2e8d,
-    0x504daeaa,
-    0x504e2ec5,
-    0x504eaee1,
-    0x504f2ef3,
-    0x504faf0a,
-    0x50502f19,
+    0x50452ed6,
+    0x5045aef4,
+    0x50462f07,
+    0x5046af1d,
+    0x50472f2f,
+    0x5047af44,
+    0x50482f6a,
+    0x5048af78,
+    0x50492f8b,
+    0x5049afa0,
+    0x504a2fb6,
+    0x504aafc6,
+    0x504b2fe6,
+    0x504baff9,
+    0x504c301c,
+    0x504cb04a,
+    0x504d305c,
+    0x504db079,
+    0x504e3094,
+    0x504eb0b0,
+    0x504f30c2,
+    0x504fb0d9,
+    0x505030e8,
     0x505086ef,
-    0x50512f2c,
+    0x505130fb,
     0x58320ec9,
     0x68320e8b,
     0x68328c25,
@@ -994,6 +1013,8 @@
     "BAD_SSL_FILETYPE\0"
     "BAD_WRITE_RETRY\0"
     "BIO_NOT_SET\0"
+    "BLOCK_CIPHER_PAD_IS_WRONG\0"
+    "BUFFERED_MESSAGES_ON_CIPHER_CHANGE\0"
     "CA_DN_LENGTH_MISMATCH\0"
     "CA_DN_TOO_LONG\0"
     "CCS_RECEIVED_EARLY\0"
@@ -1016,6 +1037,8 @@
     "DIGEST_CHECK_FAILED\0"
     "DOWNGRADE_DETECTED\0"
     "DTLS_MESSAGE_TOO_BIG\0"
+    "DUPLICATE_EXTENSION\0"
+    "DUPLICATE_KEY_SHARE\0"
     "ECC_CERT_NOT_FOR_SIGNING\0"
     "EMS_STATE_INCONSISTENT\0"
     "ENCRYPTED_LENGTH_TOO_LONG\0"
@@ -1030,14 +1053,18 @@
     "HTTPS_PROXY_REQUEST\0"
     "HTTP_REQUEST\0"
     "INAPPROPRIATE_FALLBACK\0"
+    "INVALID_ALPN_PROTOCOL\0"
     "INVALID_COMMAND\0"
+    "INVALID_COMPRESSION_LIST\0"
     "INVALID_MESSAGE\0"
     "INVALID_OUTER_RECORD_TYPE\0"
+    "INVALID_SCT_LIST\0"
     "INVALID_SSL_SESSION\0"
     "INVALID_TICKET_KEYS_LENGTH\0"
     "LENGTH_MISMATCH\0"
     "LIBRARY_HAS_NO_CIPHERS\0"
     "MISSING_EXTENSION\0"
+    "MISSING_KEY_SHARE\0"
     "MISSING_RSA_CERTIFICATE\0"
     "MISSING_TMP_DH_KEY\0"
     "MISSING_TMP_ECDH_KEY\0"
@@ -1050,30 +1077,37 @@
     "NO_CERTIFICATE_SET\0"
     "NO_CIPHERS_AVAILABLE\0"
     "NO_CIPHERS_PASSED\0"
+    "NO_CIPHERS_SPECIFIED\0"
     "NO_CIPHER_MATCH\0"
     "NO_COMMON_SIGNATURE_ALGORITHMS\0"
     "NO_COMPRESSION_SPECIFIED\0"
+    "NO_GROUPS_SPECIFIED\0"
     "NO_METHOD_SPECIFIED\0"
     "NO_P256_SUPPORT\0"
     "NO_PRIVATE_KEY_ASSIGNED\0"
     "NO_RENEGOTIATION\0"
     "NO_REQUIRED_DIGEST\0"
     "NO_SHARED_CIPHER\0"
+    "NO_SHARED_GROUP\0"
     "NULL_SSL_CTX\0"
     "NULL_SSL_METHOD_PASSED\0"
     "OLD_SESSION_CIPHER_NOT_RETURNED\0"
+    "OLD_SESSION_PRF_HASH_MISMATCH\0"
     "OLD_SESSION_VERSION_NOT_RETURNED\0"
     "PARSE_TLSEXT\0"
     "PATH_TOO_LONG\0"
     "PEER_DID_NOT_RETURN_A_CERTIFICATE\0"
     "PEER_ERROR_UNSUPPORTED_CERTIFICATE_TYPE\0"
+    "PRE_SHARED_KEY_MUST_BE_LAST\0"
     "PROTOCOL_IS_SHUTDOWN\0"
+    "PSK_IDENTITY_BINDER_COUNT_MISMATCH\0"
     "PSK_IDENTITY_NOT_FOUND\0"
     "PSK_NO_CLIENT_CB\0"
     "PSK_NO_SERVER_CB\0"
     "READ_TIMEOUT_EXPIRED\0"
     "RECORD_LENGTH_MISMATCH\0"
     "RECORD_TOO_LARGE\0"
+    "RENEGOTIATION_EMS_MISMATCH\0"
     "RENEGOTIATION_ENCODING_ERR\0"
     "RENEGOTIATION_MISMATCH\0"
     "REQUIRED_CIPHER_MISSING\0"
@@ -1118,13 +1152,17 @@
     "TLSV1_ALERT_USER_CANCELLED\0"
     "TLSV1_BAD_CERTIFICATE_HASH_VALUE\0"
     "TLSV1_BAD_CERTIFICATE_STATUS_RESPONSE\0"
+    "TLSV1_CERTIFICATE_REQUIRED\0"
     "TLSV1_CERTIFICATE_UNOBTAINABLE\0"
+    "TLSV1_UNKNOWN_PSK_IDENTITY\0"
     "TLSV1_UNRECOGNIZED_NAME\0"
     "TLSV1_UNSUPPORTED_EXTENSION\0"
     "TLS_PEER_DID_NOT_RESPOND_WITH_CERTIFICATE_LIST\0"
     "TLS_RSA_ENCRYPTED_VALUE_LENGTH_IS_WRONG\0"
     "TOO_MANY_EMPTY_FRAGMENTS\0"
+    "TOO_MANY_KEY_UPDATES\0"
     "TOO_MANY_WARNING_ALERTS\0"
+    "TOO_MUCH_SKIPPED_EARLY_DATA\0"
     "UNABLE_TO_FIND_ECDH_PARAMETERS\0"
     "UNEXPECTED_EXTENSION\0"
     "UNEXPECTED_MESSAGE\0"
diff --git a/linux-aarch64/crypto/aes/aesv8-armx64.S b/linux-aarch64/crypto/aes/aesv8-armx64.S
index 3e8cb16..51e2464 100644
--- a/linux-aarch64/crypto/aes/aesv8-armx64.S
+++ b/linux-aarch64/crypto/aes/aesv8-armx64.S
@@ -3,7 +3,7 @@
 
 #if __ARM_MAX_ARCH__>=7
 .text
-#if !defined(__clang__)
+#if !defined(__clang__) || defined(BORINGSSL_CLANG_SUPPORTS_DOT_ARCH)
 .arch	armv8-a+crypto
 #endif
 .align	5
@@ -12,11 +12,11 @@
 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
 .long	0x1b,0x1b,0x1b,0x1b
 
-.globl	aes_v8_set_encrypt_key
-.hidden	aes_v8_set_encrypt_key
-.type	aes_v8_set_encrypt_key,%function
+.globl	aes_hw_set_encrypt_key
+.hidden	aes_hw_set_encrypt_key
+.type	aes_hw_set_encrypt_key,%function
 .align	5
-aes_v8_set_encrypt_key:
+aes_hw_set_encrypt_key:
 .Lenc_key:
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
@@ -178,13 +178,13 @@
 	mov	x0,x3			// return value
 	ldr	x29,[sp],#16
 	ret
-.size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
+.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
 
-.globl	aes_v8_set_decrypt_key
-.hidden	aes_v8_set_decrypt_key
-.type	aes_v8_set_decrypt_key,%function
+.globl	aes_hw_set_decrypt_key
+.hidden	aes_hw_set_decrypt_key
+.type	aes_hw_set_decrypt_key,%function
 .align	5
-aes_v8_set_decrypt_key:
+aes_hw_set_decrypt_key:
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	bl	.Lenc_key
@@ -219,12 +219,12 @@
 .Ldec_key_abort:
 	ldp	x29,x30,[sp],#16
 	ret
-.size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
-.globl	aes_v8_encrypt
-.hidden	aes_v8_encrypt
-.type	aes_v8_encrypt,%function
+.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
+.globl	aes_hw_encrypt
+.hidden	aes_hw_encrypt
+.type	aes_hw_encrypt,%function
 .align	5
-aes_v8_encrypt:
+aes_hw_encrypt:
 	ldr	w3,[x2,#240]
 	ld1	{v0.4s},[x2],#16
 	ld1	{v2.16b},[x0]
@@ -249,12 +249,12 @@
 
 	st1	{v2.16b},[x1]
 	ret
-.size	aes_v8_encrypt,.-aes_v8_encrypt
-.globl	aes_v8_decrypt
-.hidden	aes_v8_decrypt
-.type	aes_v8_decrypt,%function
+.size	aes_hw_encrypt,.-aes_hw_encrypt
+.globl	aes_hw_decrypt
+.hidden	aes_hw_decrypt
+.type	aes_hw_decrypt,%function
 .align	5
-aes_v8_decrypt:
+aes_hw_decrypt:
 	ldr	w3,[x2,#240]
 	ld1	{v0.4s},[x2],#16
 	ld1	{v2.16b},[x0]
@@ -279,12 +279,12 @@
 
 	st1	{v2.16b},[x1]
 	ret
-.size	aes_v8_decrypt,.-aes_v8_decrypt
-.globl	aes_v8_cbc_encrypt
-.hidden	aes_v8_cbc_encrypt
-.type	aes_v8_cbc_encrypt,%function
+.size	aes_hw_decrypt,.-aes_hw_decrypt
+.globl	aes_hw_cbc_encrypt
+.hidden	aes_hw_cbc_encrypt
+.type	aes_hw_cbc_encrypt,%function
 .align	5
-aes_v8_cbc_encrypt:
+aes_hw_cbc_encrypt:
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	subs	x2,x2,#16
@@ -570,12 +570,12 @@
 .Lcbc_abort:
 	ldr	x29,[sp],#16
 	ret
-.size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
-.globl	aes_v8_ctr32_encrypt_blocks
-.hidden	aes_v8_ctr32_encrypt_blocks
-.type	aes_v8_ctr32_encrypt_blocks,%function
+.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
+.globl	aes_hw_ctr32_encrypt_blocks
+.hidden	aes_hw_ctr32_encrypt_blocks
+.type	aes_hw_ctr32_encrypt_blocks,%function
 .align	5
-aes_v8_ctr32_encrypt_blocks:
+aes_hw_ctr32_encrypt_blocks:
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	ldr	w5,[x3,#240]
@@ -752,6 +752,6 @@
 .Lctr32_done:
 	ldr	x29,[sp],#16
 	ret
-.size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
+.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
 #endif
 #endif
diff --git a/linux-aarch64/crypto/modes/ghashv8-armx64.S b/linux-aarch64/crypto/modes/ghashv8-armx64.S
index f39f3ba..89d780f 100644
--- a/linux-aarch64/crypto/modes/ghashv8-armx64.S
+++ b/linux-aarch64/crypto/modes/ghashv8-armx64.S
@@ -2,7 +2,7 @@
 #include <openssl/arm_arch.h>
 
 .text
-#if !defined(__clang__)
+#if !defined(__clang__) || defined(BORINGSSL_CLANG_SUPPORTS_DOT_ARCH)
 .arch	armv8-a+crypto
 #endif
 .globl	gcm_init_v8
diff --git a/linux-arm/crypto/aes/aesv8-armx32.S b/linux-arm/crypto/aes/aesv8-armx32.S
index 95a2ea4..c2f6b68 100644
--- a/linux-arm/crypto/aes/aesv8-armx32.S
+++ b/linux-arm/crypto/aes/aesv8-armx32.S
@@ -12,11 +12,11 @@
 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
 .long	0x1b,0x1b,0x1b,0x1b
 
-.globl	aes_v8_set_encrypt_key
-.hidden	aes_v8_set_encrypt_key
-.type	aes_v8_set_encrypt_key,%function
+.globl	aes_hw_set_encrypt_key
+.hidden	aes_hw_set_encrypt_key
+.type	aes_hw_set_encrypt_key,%function
 .align	5
-aes_v8_set_encrypt_key:
+aes_hw_set_encrypt_key:
 .Lenc_key:
 	mov	r3,#-1
 	cmp	r0,#0
@@ -181,13 +181,13 @@
 	mov	r0,r3			@ return value
 
 	bx	lr
-.size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
+.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
 
-.globl	aes_v8_set_decrypt_key
-.hidden	aes_v8_set_decrypt_key
-.type	aes_v8_set_decrypt_key,%function
+.globl	aes_hw_set_decrypt_key
+.hidden	aes_hw_set_decrypt_key
+.type	aes_hw_set_decrypt_key,%function
 .align	5
-aes_v8_set_decrypt_key:
+aes_hw_set_decrypt_key:
 	stmdb	sp!,{r4,lr}
 	bl	.Lenc_key
 
@@ -220,12 +220,12 @@
 	eor	r0,r0,r0		@ return value
 .Ldec_key_abort:
 	ldmia	sp!,{r4,pc}
-.size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
-.globl	aes_v8_encrypt
-.hidden	aes_v8_encrypt
-.type	aes_v8_encrypt,%function
+.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
+.globl	aes_hw_encrypt
+.hidden	aes_hw_encrypt
+.type	aes_hw_encrypt,%function
 .align	5
-aes_v8_encrypt:
+aes_hw_encrypt:
 	ldr	r3,[r2,#240]
 	vld1.32	{q0},[r2]!
 	vld1.8	{q2},[r0]
@@ -250,12 +250,12 @@
 
 	vst1.8	{q2},[r1]
 	bx	lr
-.size	aes_v8_encrypt,.-aes_v8_encrypt
-.globl	aes_v8_decrypt
-.hidden	aes_v8_decrypt
-.type	aes_v8_decrypt,%function
+.size	aes_hw_encrypt,.-aes_hw_encrypt
+.globl	aes_hw_decrypt
+.hidden	aes_hw_decrypt
+.type	aes_hw_decrypt,%function
 .align	5
-aes_v8_decrypt:
+aes_hw_decrypt:
 	ldr	r3,[r2,#240]
 	vld1.32	{q0},[r2]!
 	vld1.8	{q2},[r0]
@@ -280,12 +280,12 @@
 
 	vst1.8	{q2},[r1]
 	bx	lr
-.size	aes_v8_decrypt,.-aes_v8_decrypt
-.globl	aes_v8_cbc_encrypt
-.hidden	aes_v8_cbc_encrypt
-.type	aes_v8_cbc_encrypt,%function
+.size	aes_hw_decrypt,.-aes_hw_decrypt
+.globl	aes_hw_cbc_encrypt
+.hidden	aes_hw_cbc_encrypt
+.type	aes_hw_cbc_encrypt,%function
 .align	5
-aes_v8_cbc_encrypt:
+aes_hw_cbc_encrypt:
 	mov	ip,sp
 	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
@@ -573,12 +573,12 @@
 .Lcbc_abort:
 	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
 	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
-.size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
-.globl	aes_v8_ctr32_encrypt_blocks
-.hidden	aes_v8_ctr32_encrypt_blocks
-.type	aes_v8_ctr32_encrypt_blocks,%function
+.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
+.globl	aes_hw_ctr32_encrypt_blocks
+.hidden	aes_hw_ctr32_encrypt_blocks
+.type	aes_hw_ctr32_encrypt_blocks,%function
 .align	5
-aes_v8_ctr32_encrypt_blocks:
+aes_hw_ctr32_encrypt_blocks:
 	mov	ip,sp
 	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
@@ -757,6 +757,6 @@
 .Lctr32_done:
 	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
 	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
-.size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
+.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
 #endif
 #endif
diff --git a/linux-arm/crypto/aes/bsaes-armv7.S b/linux-arm/crypto/aes/bsaes-armv7.S
index abb414d..1db7bbe 100644
--- a/linux-arm/crypto/aes/bsaes-armv7.S
+++ b/linux-arm/crypto/aes/bsaes-armv7.S
@@ -1843,8 +1843,6 @@
 	b	.Lxts_enc_done
 .align	4
 .Lxts_enc_6:
-	vst1.64	{q14}, [r0,:128]		@ next round tweak
-
 	veor	q4, q4, q12
 #ifndef	BSAES_ASM_EXTENDED_KEY
 	add	r4, sp, #0x90			@ pass key schedule
@@ -1880,8 +1878,6 @@
 
 .align	5
 .Lxts_enc_5:
-	vst1.64	{q13}, [r0,:128]		@ next round tweak
-
 	veor	q3, q3, q11
 #ifndef	BSAES_ASM_EXTENDED_KEY
 	add	r4, sp, #0x90			@ pass key schedule
@@ -1910,8 +1906,6 @@
 	b	.Lxts_enc_done
 .align	4
 .Lxts_enc_4:
-	vst1.64	{q12}, [r0,:128]		@ next round tweak
-
 	veor	q2, q2, q10
 #ifndef	BSAES_ASM_EXTENDED_KEY
 	add	r4, sp, #0x90			@ pass key schedule
@@ -1937,8 +1931,6 @@
 	b	.Lxts_enc_done
 .align	4
 .Lxts_enc_3:
-	vst1.64	{q11}, [r0,:128]		@ next round tweak
-
 	veor	q1, q1, q9
 #ifndef	BSAES_ASM_EXTENDED_KEY
 	add	r4, sp, #0x90			@ pass key schedule
@@ -1963,8 +1955,6 @@
 	b	.Lxts_enc_done
 .align	4
 .Lxts_enc_2:
-	vst1.64	{q10}, [r0,:128]		@ next round tweak
-
 	veor	q0, q0, q8
 #ifndef	BSAES_ASM_EXTENDED_KEY
 	add	r4, sp, #0x90			@ pass key schedule
@@ -1987,7 +1977,7 @@
 .align	4
 .Lxts_enc_1:
 	mov	r0, sp
-	veor	q0, q8
+	veor	q0, q0, q8
 	mov	r1, sp
 	vst1.8	{q0}, [sp,:128]
 	mov	r2, r10
@@ -2376,8 +2366,6 @@
 	b	.Lxts_dec_done
 .align	4
 .Lxts_dec_5:
-	vst1.64	{q13}, [r0,:128]		@ next round tweak
-
 	veor	q3, q3, q11
 #ifndef	BSAES_ASM_EXTENDED_KEY
 	add	r4, sp, #0x90			@ pass key schedule
@@ -2406,8 +2394,6 @@
 	b	.Lxts_dec_done
 .align	4
 .Lxts_dec_4:
-	vst1.64	{q12}, [r0,:128]		@ next round tweak
-
 	veor	q2, q2, q10
 #ifndef	BSAES_ASM_EXTENDED_KEY
 	add	r4, sp, #0x90			@ pass key schedule
@@ -2433,8 +2419,6 @@
 	b	.Lxts_dec_done
 .align	4
 .Lxts_dec_3:
-	vst1.64	{q11}, [r0,:128]		@ next round tweak
-
 	veor	q1, q1, q9
 #ifndef	BSAES_ASM_EXTENDED_KEY
 	add	r4, sp, #0x90			@ pass key schedule
@@ -2459,8 +2443,6 @@
 	b	.Lxts_dec_done
 .align	4
 .Lxts_dec_2:
-	vst1.64	{q10}, [r0,:128]		@ next round tweak
-
 	veor	q0, q0, q8
 #ifndef	BSAES_ASM_EXTENDED_KEY
 	add	r4, sp, #0x90			@ pass key schedule
@@ -2483,12 +2465,12 @@
 .align	4
 .Lxts_dec_1:
 	mov	r0, sp
-	veor	q0, q8
+	veor	q0, q0, q8
 	mov	r1, sp
 	vst1.8	{q0}, [sp,:128]
+	mov	r5, r2			@ preserve magic
 	mov	r2, r10
 	mov	r4, r3				@ preserve fp
-	mov	r5, r2			@ preserve magic
 
 	bl	AES_decrypt
 
diff --git a/linux-arm/crypto/sha/sha256-armv4.S b/linux-arm/crypto/sha/sha256-armv4.S
index 6040041..f37fd7c 100644
--- a/linux-arm/crypto/sha/sha256-armv4.S
+++ b/linux-arm/crypto/sha/sha256-armv4.S
@@ -1,4 +1,11 @@
 #if defined(__arm__)
+@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+@
+@ Licensed under the OpenSSL license (the "License").  You may not use
+@ this file except in compliance with the License.  You can obtain a copy
+@ in the file LICENSE in the source distribution or at
+@ https://www.openssl.org/source/license.html
+
 
 @ ====================================================================
 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -45,16 +52,11 @@
 #endif
 
 .text
-#if __ARM_ARCH__<7
-.code	32
-#else
+#if defined(__thumb2__)
 .syntax	unified
-# if defined(__thumb2__) && !defined(__APPLE__)
-#  define adrl adr
 .thumb
-# else
+#else
 .code	32
-# endif
 #endif
 
 .type	K256,%object
@@ -89,10 +91,10 @@
 .type	sha256_block_data_order,%function
 sha256_block_data_order:
 .Lsha256_block_data_order:
-#if __ARM_ARCH__<7
+#if __ARM_ARCH__<7 && !defined(__thumb2__)
 	sub	r3,pc,#8		@ sha256_block_data_order
 #else
-	adr	r3,sha256_block_data_order
+	adr	r3,.Lsha256_block_data_order
 #endif
 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 	ldr	r12,.LOPENSSL_armcap
@@ -1878,13 +1880,14 @@
 .globl	sha256_block_data_order_neon
 .hidden	sha256_block_data_order_neon
 .type	sha256_block_data_order_neon,%function
-.align	4
+.align	5
+.skip	16
 sha256_block_data_order_neon:
 .LNEON:
 	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
 
 	sub	r11,sp,#16*4+16
-	adrl	r14,K256
+	adr	r14,K256
 	bic	r11,r11,#15		@ align for 128-bit stores
 	mov	r12,sp
 	mov	sp,r11			@ alloca
@@ -2660,7 +2663,7 @@
 #endif
 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 
-# if defined(__thumb2__) && !defined(__APPLE__)
+# if defined(__thumb2__)
 #  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
 # else
 #  define INST(a,b,c,d)	.byte	a,b,c,d
@@ -2671,16 +2674,11 @@
 sha256_block_data_order_armv8:
 .LARMv8:
 	vld1.32	{q0,q1},[r0]
-# ifdef	__APPLE__
 	sub	r3,r3,#256+32
-# elif	defined(__thumb2__)
-	adr	r3,.LARMv8
-	sub	r3,r3,#.LARMv8-K256
-# else
-	adrl	r3,K256
-# endif
 	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
+	b	.Loop_v8
 
+.align	4
 .Loop_v8:
 	vld1.8	{q8,q9},[r1]!
 	vld1.8	{q10,q11},[r1]!
diff --git a/linux-ppc64le/crypto/aes/aesp8-ppc.S b/linux-ppc64le/crypto/aes/aesp8-ppc.S
new file mode 100644
index 0000000..3424ea6
--- /dev/null
+++ b/linux-ppc64le/crypto/aes/aesp8-ppc.S
@@ -0,0 +1,3633 @@
+.machine	"any"
+
+.text
+
+.align	7
+rcon:
+.byte	0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01
+.byte	0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b
+.byte	0x0c,0x0f,0x0e,0x0d,0x0c,0x0f,0x0e,0x0d,0x0c,0x0f,0x0e,0x0d,0x0c,0x0f,0x0e,0x0d
+.byte	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+Lconsts:
+	mflr	0
+	bcl	20,31,$+4
+	mflr	6
+	addi	6,6,-0x48
+	mtlr	0
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,0,0
+.byte	65,69,83,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+
+.globl	aes_hw_set_encrypt_key
+.align	5
+aes_hw_set_encrypt_key:
+Lset_encrypt_key:
+	mflr	11
+	std	11,16(1)
+
+	li	6,-1
+	cmpldi	3,0
+	beq-	Lenc_key_abort
+	cmpldi	5,0
+	beq-	Lenc_key_abort
+	li	6,-2
+	cmpwi	4,128
+	blt-	Lenc_key_abort
+	cmpwi	4,256
+	bgt-	Lenc_key_abort
+	andi.	0,4,0x3f
+	bne-	Lenc_key_abort
+
+	lis	0,0xfff0
+	mfspr	12,256
+	mtspr	256,0
+
+	bl	Lconsts
+	mtlr	11
+
+	neg	9,3
+	lvx	1,0,3
+	addi	3,3,15
+	lvsr	3,0,9
+	li	8,0x20
+	cmpwi	4,192
+	lvx	2,0,3
+	vspltisb	5,0x0f
+	lvx	4,0,6
+	vxor	3,3,5
+	lvx	5,8,6
+	addi	6,6,0x10
+	vperm	1,1,2,3
+	li	7,8
+	vxor	0,0,0
+	mtctr	7
+
+	lvsl	8,0,5
+	vspltisb	9,-1
+	lvx	10,0,5
+	vperm	9,9,0,8
+
+	blt	Loop128
+	addi	3,3,8
+	beq	L192
+	addi	3,3,8
+	b	L256
+
+.align	4
+Loop128:
+	vperm	3,1,1,5
+	vsldoi	6,0,1,12
+	vperm	11,1,1,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	.long	0x10632509
+	stvx	7,0,5
+	addi	5,5,16
+
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vadduwm	4,4,4
+	vxor	1,1,3
+	bc	16,0,Loop128
+
+	lvx	4,0,6
+
+	vperm	3,1,1,5
+	vsldoi	6,0,1,12
+	vperm	11,1,1,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	.long	0x10632509
+	stvx	7,0,5
+	addi	5,5,16
+
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vadduwm	4,4,4
+	vxor	1,1,3
+
+	vperm	3,1,1,5
+	vsldoi	6,0,1,12
+	vperm	11,1,1,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	.long	0x10632509
+	stvx	7,0,5
+	addi	5,5,16
+
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vxor	1,1,3
+	vperm	11,1,1,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	stvx	7,0,5
+
+	addi	3,5,15
+	addi	5,5,0x50
+
+	li	8,10
+	b	Ldone
+
+.align	4
+L192:
+	lvx	6,0,3
+	li	7,4
+	vperm	11,1,1,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	stvx	7,0,5
+	addi	5,5,16
+	vperm	2,2,6,3
+	vspltisb	3,8
+	mtctr	7
+	vsububm	5,5,3
+
+Loop192:
+	vperm	3,2,2,5
+	vsldoi	6,0,1,12
+	.long	0x10632509
+
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+
+	vsldoi	7,0,2,8
+	vspltw	6,1,3
+	vxor	6,6,2
+	vsldoi	2,0,2,12
+	vadduwm	4,4,4
+	vxor	2,2,6
+	vxor	1,1,3
+	vxor	2,2,3
+	vsldoi	7,7,1,8
+
+	vperm	3,2,2,5
+	vsldoi	6,0,1,12
+	vperm	11,7,7,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	.long	0x10632509
+	stvx	7,0,5
+	addi	5,5,16
+
+	vsldoi	7,1,2,8
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vperm	11,7,7,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	stvx	7,0,5
+	addi	5,5,16
+
+	vspltw	6,1,3
+	vxor	6,6,2
+	vsldoi	2,0,2,12
+	vadduwm	4,4,4
+	vxor	2,2,6
+	vxor	1,1,3
+	vxor	2,2,3
+	vperm	11,1,1,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	stvx	7,0,5
+	addi	3,5,15
+	addi	5,5,16
+	bc	16,0,Loop192
+
+	li	8,12
+	addi	5,5,0x20
+	b	Ldone
+
+.align	4
+L256:
+	lvx	6,0,3
+	li	7,7
+	li	8,14
+	vperm	11,1,1,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	stvx	7,0,5
+	addi	5,5,16
+	vperm	2,2,6,3
+	mtctr	7
+
+Loop256:
+	vperm	3,2,2,5
+	vsldoi	6,0,1,12
+	vperm	11,2,2,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	.long	0x10632509
+	stvx	7,0,5
+	addi	5,5,16
+
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vsldoi	6,0,6,12
+	vxor	1,1,6
+	vadduwm	4,4,4
+	vxor	1,1,3
+	vperm	11,1,1,8
+	vsel	7,10,11,9
+	vor	10,11,11
+	stvx	7,0,5
+	addi	3,5,15
+	addi	5,5,16
+	bdz	Ldone
+
+	vspltw	3,1,3
+	vsldoi	6,0,2,12
+	.long	0x106305C8
+
+	vxor	2,2,6
+	vsldoi	6,0,6,12
+	vxor	2,2,6
+	vsldoi	6,0,6,12
+	vxor	2,2,6
+
+	vxor	2,2,3
+	b	Loop256
+
+.align	4
+Ldone:
+	lvx	2,0,3
+	vsel	2,10,2,9
+	stvx	2,0,3
+	li	6,0
+	mtspr	256,12
+	stw	8,0(5)
+
+Lenc_key_abort:
+	mr	3,6
+	blr	
+.long	0
+.byte	0,12,0x14,1,0,0,3,0
+.long	0
+
+
+.globl	aes_hw_set_decrypt_key
+.align	5
+aes_hw_set_decrypt_key:
+	stdu	1,-64(1)
+	mflr	10
+	std	10,64+16(1)
+	bl	Lset_encrypt_key
+	mtlr	10
+
+	cmpwi	3,0
+	bne-	Ldec_key_abort
+
+	slwi	7,8,4
+	subi	3,5,240
+	srwi	8,8,1
+	add	5,3,7
+	mtctr	8
+
+Ldeckey:
+	lwz	0, 0(3)
+	lwz	6, 4(3)
+	lwz	7, 8(3)
+	lwz	8, 12(3)
+	addi	3,3,16
+	lwz	9, 0(5)
+	lwz	10,4(5)
+	lwz	11,8(5)
+	lwz	12,12(5)
+	stw	0, 0(5)
+	stw	6, 4(5)
+	stw	7, 8(5)
+	stw	8, 12(5)
+	subi	5,5,16
+	stw	9, -16(3)
+	stw	10,-12(3)
+	stw	11,-8(3)
+	stw	12,-4(3)
+	bc	16,0,Ldeckey
+
+	xor	3,3,3
+Ldec_key_abort:
+	addi	1,1,64
+	blr	
+.long	0
+.byte	0,12,4,1,0x80,0,3,0
+.long	0
+
+.globl	aes_hw_encrypt
+.align	5
+aes_hw_encrypt:
+	lwz	6,240(5)
+	lis	0,0xfc00
+	mfspr	12,256
+	li	7,15
+	mtspr	256,0
+
+	lvx	0,0,3
+	neg	11,4
+	lvx	1,7,3
+	lvsl	2,0,3
+	vspltisb	4,0x0f
+	lvsr	3,0,11
+	vxor	2,2,4
+	li	7,16
+	vperm	0,0,1,2
+	lvx	1,0,5
+	lvsr	5,0,5
+	srwi	6,6,1
+	lvx	2,7,5
+	addi	7,7,16
+	subi	6,6,1
+	vperm	1,2,1,5
+
+	vxor	0,0,1
+	lvx	1,7,5
+	addi	7,7,16
+	mtctr	6
+
+Loop_enc:
+	vperm	2,1,2,5
+	.long	0x10001508
+	lvx	2,7,5
+	addi	7,7,16
+	vperm	1,2,1,5
+	.long	0x10000D08
+	lvx	1,7,5
+	addi	7,7,16
+	bc	16,0,Loop_enc
+
+	vperm	2,1,2,5
+	.long	0x10001508
+	lvx	2,7,5
+	vperm	1,2,1,5
+	.long	0x10000D09
+
+	vspltisb	2,-1
+	vxor	1,1,1
+	li	7,15
+	vperm	2,2,1,3
+	vxor	3,3,4
+	lvx	1,0,4
+	vperm	0,0,0,3
+	vsel	1,1,0,2
+	lvx	4,7,4
+	stvx	1,0,4
+	vsel	0,0,4,2
+	stvx	0,7,4
+
+	mtspr	256,12
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,3,0
+.long	0
+
+.globl	aes_hw_decrypt
+.align	5
+aes_hw_decrypt:
+	lwz	6,240(5)
+	lis	0,0xfc00
+	mfspr	12,256
+	li	7,15
+	mtspr	256,0
+
+	lvx	0,0,3
+	neg	11,4
+	lvx	1,7,3
+	lvsl	2,0,3
+	vspltisb	4,0x0f
+	lvsr	3,0,11
+	vxor	2,2,4
+	li	7,16
+	vperm	0,0,1,2
+	lvx	1,0,5
+	lvsr	5,0,5
+	srwi	6,6,1
+	lvx	2,7,5
+	addi	7,7,16
+	subi	6,6,1
+	vperm	1,2,1,5
+
+	vxor	0,0,1
+	lvx	1,7,5
+	addi	7,7,16
+	mtctr	6
+
+Loop_dec:
+	vperm	2,1,2,5
+	.long	0x10001548
+	lvx	2,7,5
+	addi	7,7,16
+	vperm	1,2,1,5
+	.long	0x10000D48
+	lvx	1,7,5
+	addi	7,7,16
+	bc	16,0,Loop_dec
+
+	vperm	2,1,2,5
+	.long	0x10001548
+	lvx	2,7,5
+	vperm	1,2,1,5
+	.long	0x10000D49
+
+	vspltisb	2,-1
+	vxor	1,1,1
+	li	7,15
+	vperm	2,2,1,3
+	vxor	3,3,4
+	lvx	1,0,4
+	vperm	0,0,0,3
+	vsel	1,1,0,2
+	lvx	4,7,4
+	stvx	1,0,4
+	vsel	0,0,4,2
+	stvx	0,7,4
+
+	mtspr	256,12
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,3,0
+.long	0
+
+.globl	aes_hw_cbc_encrypt
+.align	5
+aes_hw_cbc_encrypt:
+	cmpldi	5,16
+	bclr	14,0
+
+	cmpwi	8,0
+	lis	0,0xffe0
+	mfspr	12,256
+	mtspr	256,0
+
+	li	10,15
+	vxor	0,0,0
+	vspltisb	3,0x0f
+
+	lvx	4,0,7
+	lvsl	6,0,7
+	lvx	5,10,7
+	vxor	6,6,3
+	vperm	4,4,5,6
+
+	neg	11,3
+	lvsr	10,0,6
+	lwz	9,240(6)
+
+	lvsr	6,0,11
+	lvx	5,0,3
+	addi	3,3,15
+	vxor	6,6,3
+
+	lvsl	8,0,4
+	vspltisb	9,-1
+	lvx	7,0,4
+	vperm	9,9,0,8
+	vxor	8,8,3
+
+	srwi	9,9,1
+	li	10,16
+	subi	9,9,1
+	beq	Lcbc_dec
+
+Lcbc_enc:
+	vor	2,5,5
+	lvx	5,0,3
+	addi	3,3,16
+	mtctr	9
+	subi	5,5,16
+
+	lvx	0,0,6
+	vperm	2,2,5,6
+	lvx	1,10,6
+	addi	10,10,16
+	vperm	0,1,0,10
+	vxor	2,2,0
+	lvx	0,10,6
+	addi	10,10,16
+	vxor	2,2,4
+
+Loop_cbc_enc:
+	vperm	1,0,1,10
+	.long	0x10420D08
+	lvx	1,10,6
+	addi	10,10,16
+	vperm	0,1,0,10
+	.long	0x10420508
+	lvx	0,10,6
+	addi	10,10,16
+	bc	16,0,Loop_cbc_enc
+
+	vperm	1,0,1,10
+	.long	0x10420D08
+	lvx	1,10,6
+	li	10,16
+	vperm	0,1,0,10
+	.long	0x10820509
+	cmpldi	5,16
+
+	vperm	3,4,4,8
+	vsel	2,7,3,9
+	vor	7,3,3
+	stvx	2,0,4
+	addi	4,4,16
+	bge	Lcbc_enc
+
+	b	Lcbc_done
+
+.align	4
+Lcbc_dec:
+	cmpldi	5,128
+	bge	_aesp8_cbc_decrypt8x
+	vor	3,5,5
+	lvx	5,0,3
+	addi	3,3,16
+	mtctr	9
+	subi	5,5,16
+
+	lvx	0,0,6
+	vperm	3,3,5,6
+	lvx	1,10,6
+	addi	10,10,16
+	vperm	0,1,0,10
+	vxor	2,3,0
+	lvx	0,10,6
+	addi	10,10,16
+
+Loop_cbc_dec:
+	vperm	1,0,1,10
+	.long	0x10420D48
+	lvx	1,10,6
+	addi	10,10,16
+	vperm	0,1,0,10
+	.long	0x10420548
+	lvx	0,10,6
+	addi	10,10,16
+	bc	16,0,Loop_cbc_dec
+
+	vperm	1,0,1,10
+	.long	0x10420D48
+	lvx	1,10,6
+	li	10,16
+	vperm	0,1,0,10
+	.long	0x10420549
+	cmpldi	5,16
+
+	vxor	2,2,4
+	vor	4,3,3
+	vperm	3,2,2,8
+	vsel	2,7,3,9
+	vor	7,3,3
+	stvx	2,0,4
+	addi	4,4,16
+	bge	Lcbc_dec
+
+Lcbc_done:
+	addi	4,4,-1
+	lvx	2,0,4
+	vsel	2,7,2,9
+	stvx	2,0,4
+
+	neg	8,7
+	li	10,15
+	vxor	0,0,0
+	vspltisb	9,-1
+	vspltisb	3,0x0f
+	lvsr	8,0,8
+	vperm	9,9,0,8
+	vxor	8,8,3
+	lvx	7,0,7
+	vperm	4,4,4,8
+	vsel	2,7,4,9
+	lvx	5,10,7
+	stvx	2,0,7
+	vsel	2,4,5,9
+	stvx	2,10,7
+
+	mtspr	256,12
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,6,0
+.long	0
+.align	5
+_aesp8_cbc_decrypt8x:
+	stdu	1,-448(1)
+	li	10,207
+	li	11,223
+	stvx	20,10,1
+	addi	10,10,32
+	stvx	21,11,1
+	addi	11,11,32
+	stvx	22,10,1
+	addi	10,10,32
+	stvx	23,11,1
+	addi	11,11,32
+	stvx	24,10,1
+	addi	10,10,32
+	stvx	25,11,1
+	addi	11,11,32
+	stvx	26,10,1
+	addi	10,10,32
+	stvx	27,11,1
+	addi	11,11,32
+	stvx	28,10,1
+	addi	10,10,32
+	stvx	29,11,1
+	addi	11,11,32
+	stvx	30,10,1
+	stvx	31,11,1
+	li	0,-1
+	stw	12,396(1)
+	li	8,0x10
+	std	26,400(1)
+	li	26,0x20
+	std	27,408(1)
+	li	27,0x30
+	std	28,416(1)
+	li	28,0x40
+	std	29,424(1)
+	li	29,0x50
+	std	30,432(1)
+	li	30,0x60
+	std	31,440(1)
+	li	31,0x70
+	mtspr	256,0
+
+	subi	9,9,3
+	subi	5,5,128
+
+	lvx	23,0,6
+	lvx	30,8,6
+	addi	6,6,0x20
+	lvx	31,0,6
+	vperm	23,30,23,10
+	addi	11,1,64+15
+	mtctr	9
+
+Load_cbc_dec_key:
+	vperm	24,31,30,10
+	lvx	30,8,6
+	addi	6,6,0x20
+	stvx	24,0,11
+	vperm	25,30,31,10
+	lvx	31,0,6
+	stvx	25,8,11
+	addi	11,11,0x20
+	bc	16,0,Load_cbc_dec_key
+
+	lvx	26,8,6
+	vperm	24,31,30,10
+	lvx	27,26,6
+	stvx	24,0,11
+	vperm	25,26,31,10
+	lvx	28,27,6
+	stvx	25,8,11
+	addi	11,1,64+15
+	vperm	26,27,26,10
+	lvx	29,28,6
+	vperm	27,28,27,10
+	lvx	30,29,6
+	vperm	28,29,28,10
+	lvx	31,30,6
+	vperm	29,30,29,10
+	lvx	14,31,6
+	vperm	30,31,30,10
+	lvx	24,0,11
+	vperm	31,14,31,10
+	lvx	25,8,11
+
+
+
+	subi	3,3,15
+
+	li	10,8
+	.long	0x7C001E99
+	lvsl	6,0,10
+	vspltisb	3,0x0f
+	.long	0x7C281E99
+	vxor	6,6,3
+	.long	0x7C5A1E99
+	vperm	0,0,0,6
+	.long	0x7C7B1E99
+	vperm	1,1,1,6
+	.long	0x7D5C1E99
+	vperm	2,2,2,6
+	vxor	14,0,23
+	.long	0x7D7D1E99
+	vperm	3,3,3,6
+	vxor	15,1,23
+	.long	0x7D9E1E99
+	vperm	10,10,10,6
+	vxor	16,2,23
+	.long	0x7DBF1E99
+	addi	3,3,0x80
+	vperm	11,11,11,6
+	vxor	17,3,23
+	vperm	12,12,12,6
+	vxor	18,10,23
+	vperm	13,13,13,6
+	vxor	19,11,23
+	vxor	20,12,23
+	vxor	21,13,23
+
+	mtctr	9
+	b	Loop_cbc_dec8x
+.align	5
+Loop_cbc_dec8x:
+	.long	0x11CEC548
+	.long	0x11EFC548
+	.long	0x1210C548
+	.long	0x1231C548
+	.long	0x1252C548
+	.long	0x1273C548
+	.long	0x1294C548
+	.long	0x12B5C548
+	lvx	24,26,11
+	addi	11,11,0x20
+
+	.long	0x11CECD48
+	.long	0x11EFCD48
+	.long	0x1210CD48
+	.long	0x1231CD48
+	.long	0x1252CD48
+	.long	0x1273CD48
+	.long	0x1294CD48
+	.long	0x12B5CD48
+	lvx	25,8,11
+	bc	16,0,Loop_cbc_dec8x
+
+	subic	5,5,128
+	.long	0x11CEC548
+	.long	0x11EFC548
+	.long	0x1210C548
+	.long	0x1231C548
+	.long	0x1252C548
+	.long	0x1273C548
+	.long	0x1294C548
+	.long	0x12B5C548
+
+	subfe.	0,0,0
+	.long	0x11CECD48
+	.long	0x11EFCD48
+	.long	0x1210CD48
+	.long	0x1231CD48
+	.long	0x1252CD48
+	.long	0x1273CD48
+	.long	0x1294CD48
+	.long	0x12B5CD48
+
+	and	0,0,5
+	.long	0x11CED548
+	.long	0x11EFD548
+	.long	0x1210D548
+	.long	0x1231D548
+	.long	0x1252D548
+	.long	0x1273D548
+	.long	0x1294D548
+	.long	0x12B5D548
+
+	add	3,3,0
+
+
+
+	.long	0x11CEDD48
+	.long	0x11EFDD48
+	.long	0x1210DD48
+	.long	0x1231DD48
+	.long	0x1252DD48
+	.long	0x1273DD48
+	.long	0x1294DD48
+	.long	0x12B5DD48
+
+	addi	11,1,64+15
+	.long	0x11CEE548
+	.long	0x11EFE548
+	.long	0x1210E548
+	.long	0x1231E548
+	.long	0x1252E548
+	.long	0x1273E548
+	.long	0x1294E548
+	.long	0x12B5E548
+	lvx	24,0,11
+
+	.long	0x11CEED48
+	.long	0x11EFED48
+	.long	0x1210ED48
+	.long	0x1231ED48
+	.long	0x1252ED48
+	.long	0x1273ED48
+	.long	0x1294ED48
+	.long	0x12B5ED48
+	lvx	25,8,11
+
+	.long	0x11CEF548
+	vxor	4,4,31
+	.long	0x11EFF548
+	vxor	0,0,31
+	.long	0x1210F548
+	vxor	1,1,31
+	.long	0x1231F548
+	vxor	2,2,31
+	.long	0x1252F548
+	vxor	3,3,31
+	.long	0x1273F548
+	vxor	10,10,31
+	.long	0x1294F548
+	vxor	11,11,31
+	.long	0x12B5F548
+	vxor	12,12,31
+
+	.long	0x11CE2549
+	.long	0x11EF0549
+	.long	0x7C001E99
+	.long	0x12100D49
+	.long	0x7C281E99
+	.long	0x12311549
+	vperm	0,0,0,6
+	.long	0x7C5A1E99
+	.long	0x12521D49
+	vperm	1,1,1,6
+	.long	0x7C7B1E99
+	.long	0x12735549
+	vperm	2,2,2,6
+	.long	0x7D5C1E99
+	.long	0x12945D49
+	vperm	3,3,3,6
+	.long	0x7D7D1E99
+	.long	0x12B56549
+	vperm	10,10,10,6
+	.long	0x7D9E1E99
+	vor	4,13,13
+	vperm	11,11,11,6
+	.long	0x7DBF1E99
+	addi	3,3,0x80
+
+	vperm	14,14,14,6
+	vperm	15,15,15,6
+	.long	0x7DC02799
+	vperm	12,12,12,6
+	vxor	14,0,23
+	vperm	16,16,16,6
+	.long	0x7DE82799
+	vperm	13,13,13,6
+	vxor	15,1,23
+	vperm	17,17,17,6
+	.long	0x7E1A2799
+	vxor	16,2,23
+	vperm	18,18,18,6
+	.long	0x7E3B2799
+	vxor	17,3,23
+	vperm	19,19,19,6
+	.long	0x7E5C2799
+	vxor	18,10,23
+	vperm	20,20,20,6
+	.long	0x7E7D2799
+	vxor	19,11,23
+	vperm	21,21,21,6
+	.long	0x7E9E2799
+	vxor	20,12,23
+	.long	0x7EBF2799
+	addi	4,4,0x80
+	vxor	21,13,23
+
+	mtctr	9
+	beq	Loop_cbc_dec8x
+
+	addic.	5,5,128
+	beq	Lcbc_dec8x_done
+	nop	
+	nop	
+
+Loop_cbc_dec8x_tail:
+	.long	0x11EFC548
+	.long	0x1210C548
+	.long	0x1231C548
+	.long	0x1252C548
+	.long	0x1273C548
+	.long	0x1294C548
+	.long	0x12B5C548
+	lvx	24,26,11
+	addi	11,11,0x20
+
+	.long	0x11EFCD48
+	.long	0x1210CD48
+	.long	0x1231CD48
+	.long	0x1252CD48
+	.long	0x1273CD48
+	.long	0x1294CD48
+	.long	0x12B5CD48
+	lvx	25,8,11
+	bc	16,0,Loop_cbc_dec8x_tail
+
+	.long	0x11EFC548
+	.long	0x1210C548
+	.long	0x1231C548
+	.long	0x1252C548
+	.long	0x1273C548
+	.long	0x1294C548
+	.long	0x12B5C548
+
+	.long	0x11EFCD48
+	.long	0x1210CD48
+	.long	0x1231CD48
+	.long	0x1252CD48
+	.long	0x1273CD48
+	.long	0x1294CD48
+	.long	0x12B5CD48
+
+	.long	0x11EFD548
+	.long	0x1210D548
+	.long	0x1231D548
+	.long	0x1252D548
+	.long	0x1273D548
+	.long	0x1294D548
+	.long	0x12B5D548
+
+	.long	0x11EFDD48
+	.long	0x1210DD48
+	.long	0x1231DD48
+	.long	0x1252DD48
+	.long	0x1273DD48
+	.long	0x1294DD48
+	.long	0x12B5DD48
+
+	.long	0x11EFE548
+	.long	0x1210E548
+	.long	0x1231E548
+	.long	0x1252E548
+	.long	0x1273E548
+	.long	0x1294E548
+	.long	0x12B5E548
+
+	.long	0x11EFED48
+	.long	0x1210ED48
+	.long	0x1231ED48
+	.long	0x1252ED48
+	.long	0x1273ED48
+	.long	0x1294ED48
+	.long	0x12B5ED48
+
+	.long	0x11EFF548
+	vxor	4,4,31
+	.long	0x1210F548
+	vxor	1,1,31
+	.long	0x1231F548
+	vxor	2,2,31
+	.long	0x1252F548
+	vxor	3,3,31
+	.long	0x1273F548
+	vxor	10,10,31
+	.long	0x1294F548
+	vxor	11,11,31
+	.long	0x12B5F548
+	vxor	12,12,31
+
+	cmplwi	5,32
+	blt	Lcbc_dec8x_one
+	nop	
+	beq	Lcbc_dec8x_two
+	cmplwi	5,64
+	blt	Lcbc_dec8x_three
+	nop	
+	beq	Lcbc_dec8x_four
+	cmplwi	5,96
+	blt	Lcbc_dec8x_five
+	nop	
+	beq	Lcbc_dec8x_six
+
+Lcbc_dec8x_seven:
+	.long	0x11EF2549
+	.long	0x12100D49
+	.long	0x12311549
+	.long	0x12521D49
+	.long	0x12735549
+	.long	0x12945D49
+	.long	0x12B56549
+	vor	4,13,13
+
+	vperm	15,15,15,6
+	vperm	16,16,16,6
+	.long	0x7DE02799
+	vperm	17,17,17,6
+	.long	0x7E082799
+	vperm	18,18,18,6
+	.long	0x7E3A2799
+	vperm	19,19,19,6
+	.long	0x7E5B2799
+	vperm	20,20,20,6
+	.long	0x7E7C2799
+	vperm	21,21,21,6
+	.long	0x7E9D2799
+	.long	0x7EBE2799
+	addi	4,4,0x70
+	b	Lcbc_dec8x_done
+
+.align	5
+Lcbc_dec8x_six:
+	.long	0x12102549
+	.long	0x12311549
+	.long	0x12521D49
+	.long	0x12735549
+	.long	0x12945D49
+	.long	0x12B56549
+	vor	4,13,13
+
+	vperm	16,16,16,6
+	vperm	17,17,17,6
+	.long	0x7E002799
+	vperm	18,18,18,6
+	.long	0x7E282799
+	vperm	19,19,19,6
+	.long	0x7E5A2799
+	vperm	20,20,20,6
+	.long	0x7E7B2799
+	vperm	21,21,21,6
+	.long	0x7E9C2799
+	.long	0x7EBD2799
+	addi	4,4,0x60
+	b	Lcbc_dec8x_done
+
+.align	5
+Lcbc_dec8x_five:
+	.long	0x12312549
+	.long	0x12521D49
+	.long	0x12735549
+	.long	0x12945D49
+	.long	0x12B56549
+	vor	4,13,13
+
+	vperm	17,17,17,6
+	vperm	18,18,18,6
+	.long	0x7E202799
+	vperm	19,19,19,6
+	.long	0x7E482799
+	vperm	20,20,20,6
+	.long	0x7E7A2799
+	vperm	21,21,21,6
+	.long	0x7E9B2799
+	.long	0x7EBC2799
+	addi	4,4,0x50
+	b	Lcbc_dec8x_done
+
+.align	5
+Lcbc_dec8x_four:
+	.long	0x12522549
+	.long	0x12735549
+	.long	0x12945D49
+	.long	0x12B56549
+	vor	4,13,13
+
+	vperm	18,18,18,6
+	vperm	19,19,19,6
+	.long	0x7E402799
+	vperm	20,20,20,6
+	.long	0x7E682799
+	vperm	21,21,21,6
+	.long	0x7E9A2799
+	.long	0x7EBB2799
+	addi	4,4,0x40
+	b	Lcbc_dec8x_done
+
+.align	5
+Lcbc_dec8x_three:
+	.long	0x12732549
+	.long	0x12945D49
+	.long	0x12B56549
+	vor	4,13,13
+
+	vperm	19,19,19,6
+	vperm	20,20,20,6
+	.long	0x7E602799
+	vperm	21,21,21,6
+	.long	0x7E882799
+	.long	0x7EBA2799
+	addi	4,4,0x30
+	b	Lcbc_dec8x_done
+
+.align	5
+Lcbc_dec8x_two:
+	.long	0x12942549
+	.long	0x12B56549
+	vor	4,13,13
+
+	vperm	20,20,20,6
+	vperm	21,21,21,6
+	.long	0x7E802799
+	.long	0x7EA82799
+	addi	4,4,0x20
+	b	Lcbc_dec8x_done
+
+.align	5
+Lcbc_dec8x_one:
+	.long	0x12B52549
+	vor	4,13,13
+
+	vperm	21,21,21,6
+	.long	0x7EA02799
+	addi	4,4,0x10
+
+Lcbc_dec8x_done:
+	vperm	4,4,4,6
+	.long	0x7C803F99
+
+	li	10,79
+	li	11,95
+	stvx	6,10,1
+	addi	10,10,32
+	stvx	6,11,1
+	addi	11,11,32
+	stvx	6,10,1
+	addi	10,10,32
+	stvx	6,11,1
+	addi	11,11,32
+	stvx	6,10,1
+	addi	10,10,32
+	stvx	6,11,1
+	addi	11,11,32
+	stvx	6,10,1
+	addi	10,10,32
+	stvx	6,11,1
+	addi	11,11,32
+
+	mtspr	256,12
+	lvx	20,10,1
+	addi	10,10,32
+	lvx	21,11,1
+	addi	11,11,32
+	lvx	22,10,1
+	addi	10,10,32
+	lvx	23,11,1
+	addi	11,11,32
+	lvx	24,10,1
+	addi	10,10,32
+	lvx	25,11,1
+	addi	11,11,32
+	lvx	26,10,1
+	addi	10,10,32
+	lvx	27,11,1
+	addi	11,11,32
+	lvx	28,10,1
+	addi	10,10,32
+	lvx	29,11,1
+	addi	11,11,32
+	lvx	30,10,1
+	lvx	31,11,1
+	ld	26,400(1)
+	ld	27,408(1)
+	ld	28,416(1)
+	ld	29,424(1)
+	ld	30,432(1)
+	ld	31,440(1)
+	addi	1,1,448
+	blr	
+.long	0
+.byte	0,12,0x04,0,0x80,6,6,0
+.long	0
+
+.globl	aes_hw_ctr32_encrypt_blocks
+.align	5
+aes_hw_ctr32_encrypt_blocks:
+	cmpldi	5,1
+	bclr	14,0
+
+	lis	0,0xfff0
+	mfspr	12,256
+	mtspr	256,0
+
+	li	10,15
+	vxor	0,0,0
+	vspltisb	3,0x0f
+
+	lvx	4,0,7
+	lvsl	6,0,7
+	lvx	5,10,7
+	vspltisb	11,1
+	vxor	6,6,3
+	vperm	4,4,5,6
+	vsldoi	11,0,11,1
+
+	neg	11,3
+	lvsr	10,0,6
+	lwz	9,240(6)
+
+	lvsr	6,0,11
+	lvx	5,0,3
+	addi	3,3,15
+	vxor	6,6,3
+
+	srwi	9,9,1
+	li	10,16
+	subi	9,9,1
+
+	cmpldi	5,8
+	bge	_aesp8_ctr32_encrypt8x
+
+	lvsl	8,0,4
+	vspltisb	9,-1
+	lvx	7,0,4
+	vperm	9,9,0,8
+	vxor	8,8,3
+
+	lvx	0,0,6
+	mtctr	9
+	lvx	1,10,6
+	addi	10,10,16
+	vperm	0,1,0,10
+	vxor	2,4,0
+	lvx	0,10,6
+	addi	10,10,16
+	b	Loop_ctr32_enc
+
+.align	5
+Loop_ctr32_enc:
+	vperm	1,0,1,10
+	.long	0x10420D08
+	lvx	1,10,6
+	addi	10,10,16
+	vperm	0,1,0,10
+	.long	0x10420508
+	lvx	0,10,6
+	addi	10,10,16
+	bc	16,0,Loop_ctr32_enc
+
+	vadduwm	4,4,11
+	vor	3,5,5
+	lvx	5,0,3
+	addi	3,3,16
+	subic.	5,5,1
+
+	vperm	1,0,1,10
+	.long	0x10420D08
+	lvx	1,10,6
+	vperm	3,3,5,6
+	li	10,16
+	vperm	1,1,0,10
+	lvx	0,0,6
+	vxor	3,3,1
+	.long	0x10421D09
+
+	lvx	1,10,6
+	addi	10,10,16
+	vperm	2,2,2,8
+	vsel	3,7,2,9
+	mtctr	9
+	vperm	0,1,0,10
+	vor	7,2,2
+	vxor	2,4,0
+	lvx	0,10,6
+	addi	10,10,16
+	stvx	3,0,4
+	addi	4,4,16
+	bne	Loop_ctr32_enc
+
+	addi	4,4,-1
+	lvx	2,0,4
+	vsel	2,7,2,9
+	stvx	2,0,4
+
+	mtspr	256,12
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,6,0
+.long	0
+.align	5
+_aesp8_ctr32_encrypt8x:
+	stdu	1,-448(1)
+	li	10,207
+	li	11,223
+	stvx	20,10,1
+	addi	10,10,32
+	stvx	21,11,1
+	addi	11,11,32
+	stvx	22,10,1
+	addi	10,10,32
+	stvx	23,11,1
+	addi	11,11,32
+	stvx	24,10,1
+	addi	10,10,32
+	stvx	25,11,1
+	addi	11,11,32
+	stvx	26,10,1
+	addi	10,10,32
+	stvx	27,11,1
+	addi	11,11,32
+	stvx	28,10,1
+	addi	10,10,32
+	stvx	29,11,1
+	addi	11,11,32
+	stvx	30,10,1
+	stvx	31,11,1
+	li	0,-1
+	stw	12,396(1)
+	li	8,0x10
+	std	26,400(1)
+	li	26,0x20
+	std	27,408(1)
+	li	27,0x30
+	std	28,416(1)
+	li	28,0x40
+	std	29,424(1)
+	li	29,0x50
+	std	30,432(1)
+	li	30,0x60
+	std	31,440(1)
+	li	31,0x70
+	mtspr	256,0
+
+	subi	9,9,3
+
+	lvx	23,0,6
+	lvx	30,8,6
+	addi	6,6,0x20
+	lvx	31,0,6
+	vperm	23,30,23,10
+	addi	11,1,64+15
+	mtctr	9
+
+Load_ctr32_enc_key:
+	vperm	24,31,30,10
+	lvx	30,8,6
+	addi	6,6,0x20
+	stvx	24,0,11
+	vperm	25,30,31,10
+	lvx	31,0,6
+	stvx	25,8,11
+	addi	11,11,0x20
+	bc	16,0,Load_ctr32_enc_key
+
+	lvx	26,8,6
+	vperm	24,31,30,10
+	lvx	27,26,6
+	stvx	24,0,11
+	vperm	25,26,31,10
+	lvx	28,27,6
+	stvx	25,8,11
+	addi	11,1,64+15
+	vperm	26,27,26,10
+	lvx	29,28,6
+	vperm	27,28,27,10
+	lvx	30,29,6
+	vperm	28,29,28,10
+	lvx	31,30,6
+	vperm	29,30,29,10
+	lvx	15,31,6
+	vperm	30,31,30,10
+	lvx	24,0,11
+	vperm	31,15,31,10
+	lvx	25,8,11
+
+	vadduwm	7,11,11
+	subi	3,3,15
+	sldi	5,5,4
+
+	vadduwm	16,4,11
+	vadduwm	17,4,7
+	vxor	15,4,23
+	li	10,8
+	vadduwm	18,16,7
+	vxor	16,16,23
+	lvsl	6,0,10
+	vadduwm	19,17,7
+	vxor	17,17,23
+	vspltisb	3,0x0f
+	vadduwm	20,18,7
+	vxor	18,18,23
+	vxor	6,6,3
+	vadduwm	21,19,7
+	vxor	19,19,23
+	vadduwm	22,20,7
+	vxor	20,20,23
+	vadduwm	4,21,7
+	vxor	21,21,23
+	vxor	22,22,23
+
+	mtctr	9
+	b	Loop_ctr32_enc8x
+.align	5
+Loop_ctr32_enc8x:
+	.long	0x11EFC508
+	.long	0x1210C508
+	.long	0x1231C508
+	.long	0x1252C508
+	.long	0x1273C508
+	.long	0x1294C508
+	.long	0x12B5C508
+	.long	0x12D6C508
+Loop_ctr32_enc8x_middle:
+	lvx	24,26,11
+	addi	11,11,0x20
+
+	.long	0x11EFCD08
+	.long	0x1210CD08
+	.long	0x1231CD08
+	.long	0x1252CD08
+	.long	0x1273CD08
+	.long	0x1294CD08
+	.long	0x12B5CD08
+	.long	0x12D6CD08
+	lvx	25,8,11
+	bc	16,0,Loop_ctr32_enc8x
+
+	subic	11,5,256
+	.long	0x11EFC508
+	.long	0x1210C508
+	.long	0x1231C508
+	.long	0x1252C508
+	.long	0x1273C508
+	.long	0x1294C508
+	.long	0x12B5C508
+	.long	0x12D6C508
+
+	subfe	0,0,0
+	.long	0x11EFCD08
+	.long	0x1210CD08
+	.long	0x1231CD08
+	.long	0x1252CD08
+	.long	0x1273CD08
+	.long	0x1294CD08
+	.long	0x12B5CD08
+	.long	0x12D6CD08
+
+	and	0,0,11
+	addi	11,1,64+15
+	.long	0x11EFD508
+	.long	0x1210D508
+	.long	0x1231D508
+	.long	0x1252D508
+	.long	0x1273D508
+	.long	0x1294D508
+	.long	0x12B5D508
+	.long	0x12D6D508
+	lvx	24,0,11
+
+	subic	5,5,129
+	.long	0x11EFDD08
+	addi	5,5,1
+	.long	0x1210DD08
+	.long	0x1231DD08
+	.long	0x1252DD08
+	.long	0x1273DD08
+	.long	0x1294DD08
+	.long	0x12B5DD08
+	.long	0x12D6DD08
+	lvx	25,8,11
+
+	.long	0x11EFE508
+	.long	0x7C001E99
+	.long	0x1210E508
+	.long	0x7C281E99
+	.long	0x1231E508
+	.long	0x7C5A1E99
+	.long	0x1252E508
+	.long	0x7C7B1E99
+	.long	0x1273E508
+	.long	0x7D5C1E99
+	.long	0x1294E508
+	.long	0x7D9D1E99
+	.long	0x12B5E508
+	.long	0x7DBE1E99
+	.long	0x12D6E508
+	.long	0x7DDF1E99
+	addi	3,3,0x80
+
+	.long	0x11EFED08
+	vperm	0,0,0,6
+	.long	0x1210ED08
+	vperm	1,1,1,6
+	.long	0x1231ED08
+	vperm	2,2,2,6
+	.long	0x1252ED08
+	vperm	3,3,3,6
+	.long	0x1273ED08
+	vperm	10,10,10,6
+	.long	0x1294ED08
+	vperm	12,12,12,6
+	.long	0x12B5ED08
+	vperm	13,13,13,6
+	.long	0x12D6ED08
+	vperm	14,14,14,6
+
+	add	3,3,0
+
+
+
+	subfe.	0,0,0
+	.long	0x11EFF508
+	vxor	0,0,31
+	.long	0x1210F508
+	vxor	1,1,31
+	.long	0x1231F508
+	vxor	2,2,31
+	.long	0x1252F508
+	vxor	3,3,31
+	.long	0x1273F508
+	vxor	10,10,31
+	.long	0x1294F508
+	vxor	12,12,31
+	.long	0x12B5F508
+	vxor	13,13,31
+	.long	0x12D6F508
+	vxor	14,14,31
+
+	bne	Lctr32_enc8x_break
+
+	.long	0x100F0509
+	.long	0x10300D09
+	vadduwm	16,4,11
+	.long	0x10511509
+	vadduwm	17,4,7
+	vxor	15,4,23
+	.long	0x10721D09
+	vadduwm	18,16,7
+	vxor	16,16,23
+	.long	0x11535509
+	vadduwm	19,17,7
+	vxor	17,17,23
+	.long	0x11946509
+	vadduwm	20,18,7
+	vxor	18,18,23
+	.long	0x11B56D09
+	vadduwm	21,19,7
+	vxor	19,19,23
+	.long	0x11D67509
+	vadduwm	22,20,7
+	vxor	20,20,23
+	vperm	0,0,0,6
+	vadduwm	4,21,7
+	vxor	21,21,23
+	vperm	1,1,1,6
+	vxor	22,22,23
+	mtctr	9
+
+	.long	0x11EFC508
+	.long	0x7C002799
+	vperm	2,2,2,6
+	.long	0x1210C508
+	.long	0x7C282799
+	vperm	3,3,3,6
+	.long	0x1231C508
+	.long	0x7C5A2799
+	vperm	10,10,10,6
+	.long	0x1252C508
+	.long	0x7C7B2799
+	vperm	12,12,12,6
+	.long	0x1273C508
+	.long	0x7D5C2799
+	vperm	13,13,13,6
+	.long	0x1294C508
+	.long	0x7D9D2799
+	vperm	14,14,14,6
+	.long	0x12B5C508
+	.long	0x7DBE2799
+	.long	0x12D6C508
+	.long	0x7DDF2799
+	addi	4,4,0x80
+
+	b	Loop_ctr32_enc8x_middle
+
+.align	5
+Lctr32_enc8x_break:
+	cmpwi	5,-0x60
+	blt	Lctr32_enc8x_one
+	nop	
+	beq	Lctr32_enc8x_two
+	cmpwi	5,-0x40
+	blt	Lctr32_enc8x_three
+	nop	
+	beq	Lctr32_enc8x_four
+	cmpwi	5,-0x20
+	blt	Lctr32_enc8x_five
+	nop	
+	beq	Lctr32_enc8x_six
+	cmpwi	5,0x00
+	blt	Lctr32_enc8x_seven
+
+Lctr32_enc8x_eight:
+	.long	0x11EF0509
+	.long	0x12100D09
+	.long	0x12311509
+	.long	0x12521D09
+	.long	0x12735509
+	.long	0x12946509
+	.long	0x12B56D09
+	.long	0x12D67509
+
+	vperm	15,15,15,6
+	vperm	16,16,16,6
+	.long	0x7DE02799
+	vperm	17,17,17,6
+	.long	0x7E082799
+	vperm	18,18,18,6
+	.long	0x7E3A2799
+	vperm	19,19,19,6
+	.long	0x7E5B2799
+	vperm	20,20,20,6
+	.long	0x7E7C2799
+	vperm	21,21,21,6
+	.long	0x7E9D2799
+	vperm	22,22,22,6
+	.long	0x7EBE2799
+	.long	0x7EDF2799
+	addi	4,4,0x80
+	b	Lctr32_enc8x_done
+
+.align	5
+Lctr32_enc8x_seven:
+	.long	0x11EF0D09
+	.long	0x12101509
+	.long	0x12311D09
+	.long	0x12525509
+	.long	0x12736509
+	.long	0x12946D09
+	.long	0x12B57509
+
+	vperm	15,15,15,6
+	vperm	16,16,16,6
+	.long	0x7DE02799
+	vperm	17,17,17,6
+	.long	0x7E082799
+	vperm	18,18,18,6
+	.long	0x7E3A2799
+	vperm	19,19,19,6
+	.long	0x7E5B2799
+	vperm	20,20,20,6
+	.long	0x7E7C2799
+	vperm	21,21,21,6
+	.long	0x7E9D2799
+	.long	0x7EBE2799
+	addi	4,4,0x70
+	b	Lctr32_enc8x_done
+
+.align	5
+Lctr32_enc8x_six:
+	.long	0x11EF1509
+	.long	0x12101D09
+	.long	0x12315509
+	.long	0x12526509
+	.long	0x12736D09
+	.long	0x12947509
+
+	vperm	15,15,15,6
+	vperm	16,16,16,6
+	.long	0x7DE02799
+	vperm	17,17,17,6
+	.long	0x7E082799
+	vperm	18,18,18,6
+	.long	0x7E3A2799
+	vperm	19,19,19,6
+	.long	0x7E5B2799
+	vperm	20,20,20,6
+	.long	0x7E7C2799
+	.long	0x7E9D2799
+	addi	4,4,0x60
+	b	Lctr32_enc8x_done
+
+.align	5
+Lctr32_enc8x_five:
+	.long	0x11EF1D09
+	.long	0x12105509
+	.long	0x12316509
+	.long	0x12526D09
+	.long	0x12737509
+
+	vperm	15,15,15,6
+	vperm	16,16,16,6
+	.long	0x7DE02799
+	vperm	17,17,17,6
+	.long	0x7E082799
+	vperm	18,18,18,6
+	.long	0x7E3A2799
+	vperm	19,19,19,6
+	.long	0x7E5B2799
+	.long	0x7E7C2799
+	addi	4,4,0x50
+	b	Lctr32_enc8x_done
+
+.align	5
+Lctr32_enc8x_four:
+	.long	0x11EF5509
+	.long	0x12106509
+	.long	0x12316D09
+	.long	0x12527509
+
+	vperm	15,15,15,6
+	vperm	16,16,16,6
+	.long	0x7DE02799
+	vperm	17,17,17,6
+	.long	0x7E082799
+	vperm	18,18,18,6
+	.long	0x7E3A2799
+	.long	0x7E5B2799
+	addi	4,4,0x40
+	b	Lctr32_enc8x_done
+
+.align	5
+Lctr32_enc8x_three:
+	.long	0x11EF6509
+	.long	0x12106D09
+	.long	0x12317509
+
+	vperm	15,15,15,6
+	vperm	16,16,16,6
+	.long	0x7DE02799
+	vperm	17,17,17,6
+	.long	0x7E082799
+	.long	0x7E3A2799
+	addi	4,4,0x30
+	b	Lcbc_dec8x_done
+
+.align	5
+Lctr32_enc8x_two:
+	.long	0x11EF6D09
+	.long	0x12107509
+
+	vperm	15,15,15,6
+	vperm	16,16,16,6
+	.long	0x7DE02799
+	.long	0x7E082799
+	addi	4,4,0x20
+	b	Lcbc_dec8x_done
+
+.align	5
+Lctr32_enc8x_one:
+	.long	0x11EF7509
+
+	vperm	15,15,15,6
+	.long	0x7DE02799
+	addi	4,4,0x10
+
+Lctr32_enc8x_done:
+	li	10,79
+	li	11,95
+	stvx	6,10,1
+	addi	10,10,32
+	stvx	6,11,1
+	addi	11,11,32
+	stvx	6,10,1
+	addi	10,10,32
+	stvx	6,11,1
+	addi	11,11,32
+	stvx	6,10,1
+	addi	10,10,32
+	stvx	6,11,1
+	addi	11,11,32
+	stvx	6,10,1
+	addi	10,10,32
+	stvx	6,11,1
+	addi	11,11,32
+
+	mtspr	256,12
+	lvx	20,10,1
+	addi	10,10,32
+	lvx	21,11,1
+	addi	11,11,32
+	lvx	22,10,1
+	addi	10,10,32
+	lvx	23,11,1
+	addi	11,11,32
+	lvx	24,10,1
+	addi	10,10,32
+	lvx	25,11,1
+	addi	11,11,32
+	lvx	26,10,1
+	addi	10,10,32
+	lvx	27,11,1
+	addi	11,11,32
+	lvx	28,10,1
+	addi	10,10,32
+	lvx	29,11,1
+	addi	11,11,32
+	lvx	30,10,1
+	lvx	31,11,1
+	ld	26,400(1)
+	ld	27,408(1)
+	ld	28,416(1)
+	ld	29,424(1)
+	ld	30,432(1)
+	ld	31,440(1)
+	addi	1,1,448
+	blr	
+.long	0
+.byte	0,12,0x04,0,0x80,6,6,0
+.long	0
+
+.globl	aes_hw_xts_encrypt
+.align	5
+aes_hw_xts_encrypt:
+	mr	10,3
+	li	3,-1
+	cmpldi	5,16
+	bclr	14,0
+
+	lis	0,0xfff0
+	mfspr	12,256
+	li	11,0
+	mtspr	256,0
+
+	vspltisb	9,0x07
+	lvsl	6,11,11
+	vspltisb	11,0x0f
+	vxor	6,6,9
+
+	li	3,15
+	lvx	8,0,8
+	lvsl	5,0,8
+	lvx	4,3,8
+	vxor	5,5,11
+	vperm	8,8,4,5
+
+	neg	11,10
+	lvsr	5,0,11
+	lvx	2,0,10
+	addi	10,10,15
+	vxor	5,5,11
+
+	cmpldi	7,0
+	beq	Lxts_enc_no_key2
+
+	lvsr	7,0,7
+	lwz	9,240(7)
+	srwi	9,9,1
+	subi	9,9,1
+	li	3,16
+
+	lvx	0,0,7
+	lvx	1,3,7
+	addi	3,3,16
+	vperm	0,1,0,7
+	vxor	8,8,0
+	lvx	0,3,7
+	addi	3,3,16
+	mtctr	9
+
+Ltweak_xts_enc:
+	vperm	1,0,1,7
+	.long	0x11080D08
+	lvx	1,3,7
+	addi	3,3,16
+	vperm	0,1,0,7
+	.long	0x11080508
+	lvx	0,3,7
+	addi	3,3,16
+	bc	16,0,Ltweak_xts_enc
+
+	vperm	1,0,1,7
+	.long	0x11080D08
+	lvx	1,3,7
+	vperm	0,1,0,7
+	.long	0x11080509
+
+	li	8,0
+	b	Lxts_enc
+
+Lxts_enc_no_key2:
+	li	3,-16
+	and	5,5,3
+
+
+Lxts_enc:
+	lvx	4,0,10
+	addi	10,10,16
+
+	lvsr	7,0,6
+	lwz	9,240(6)
+	srwi	9,9,1
+	subi	9,9,1
+	li	3,16
+
+	vslb	10,9,9
+	vor	10,10,9
+	vspltisb	11,1
+	vsldoi	10,10,11,15
+
+	cmpldi	5,96
+	bge	_aesp8_xts_encrypt6x
+
+	andi.	7,5,15
+	subic	0,5,32
+	subi	7,7,16
+	subfe	0,0,0
+	and	0,0,7
+	add	10,10,0
+
+	lvx	0,0,6
+	lvx	1,3,6
+	addi	3,3,16
+	vperm	2,2,4,5
+	vperm	0,1,0,7
+	vxor	2,2,8
+	vxor	2,2,0
+	lvx	0,3,6
+	addi	3,3,16
+	mtctr	9
+	b	Loop_xts_enc
+
+.align	5
+Loop_xts_enc:
+	vperm	1,0,1,7
+	.long	0x10420D08
+	lvx	1,3,6
+	addi	3,3,16
+	vperm	0,1,0,7
+	.long	0x10420508
+	lvx	0,3,6
+	addi	3,3,16
+	bc	16,0,Loop_xts_enc
+
+	vperm	1,0,1,7
+	.long	0x10420D08
+	lvx	1,3,6
+	li	3,16
+	vperm	0,1,0,7
+	vxor	0,0,8
+	.long	0x10620509
+
+	vperm	11,3,3,6
+
+	.long	0x7D602799
+
+	addi	4,4,16
+
+	subic.	5,5,16
+	beq	Lxts_enc_done
+
+	vor	2,4,4
+	lvx	4,0,10
+	addi	10,10,16
+	lvx	0,0,6
+	lvx	1,3,6
+	addi	3,3,16
+
+	subic	0,5,32
+	subfe	0,0,0
+	and	0,0,7
+	add	10,10,0
+
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vand	11,11,10
+	vxor	8,8,11
+
+	vperm	2,2,4,5
+	vperm	0,1,0,7
+	vxor	2,2,8
+	vxor	3,3,0
+	vxor	2,2,0
+	lvx	0,3,6
+	addi	3,3,16
+
+	mtctr	9
+	cmpldi	5,16
+	bge	Loop_xts_enc
+
+	vxor	3,3,8
+	lvsr	5,0,5
+	vxor	4,4,4
+	vspltisb	11,-1
+	vperm	4,4,11,5
+	vsel	2,2,3,4
+
+	subi	11,4,17
+	subi	4,4,16
+	mtctr	5
+	li	5,16
+Loop_xts_enc_steal:
+	lbzu	0,1(11)
+	stb	0,16(11)
+	bc	16,0,Loop_xts_enc_steal
+
+	mtctr	9
+	b	Loop_xts_enc
+
+Lxts_enc_done:
+	cmpldi	8,0
+	beq	Lxts_enc_ret
+
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vand	11,11,10
+	vxor	8,8,11
+
+	vperm	8,8,8,6
+	.long	0x7D004799
+
+Lxts_enc_ret:
+	mtspr	256,12
+	li	3,0
+	blr	
+.long	0
+.byte	0,12,0x04,0,0x80,6,6,0
+.long	0
+
+
+.globl	aes_hw_xts_decrypt
+.align	5
+aes_hw_xts_decrypt:
+	mr	10,3
+	li	3,-1
+	cmpldi	5,16
+	bclr	14,0
+
+	lis	0,0xfff8
+	mfspr	12,256
+	li	11,0
+	mtspr	256,0
+
+	andi.	0,5,15
+	neg	0,0
+	andi.	0,0,16
+	sub	5,5,0
+
+	vspltisb	9,0x07
+	lvsl	6,11,11
+	vspltisb	11,0x0f
+	vxor	6,6,9
+
+	li	3,15
+	lvx	8,0,8
+	lvsl	5,0,8
+	lvx	4,3,8
+	vxor	5,5,11
+	vperm	8,8,4,5
+
+	neg	11,10
+	lvsr	5,0,11
+	lvx	2,0,10
+	addi	10,10,15
+	vxor	5,5,11
+
+	cmpldi	7,0
+	beq	Lxts_dec_no_key2
+
+	lvsr	7,0,7
+	lwz	9,240(7)
+	srwi	9,9,1
+	subi	9,9,1
+	li	3,16
+
+	lvx	0,0,7
+	lvx	1,3,7
+	addi	3,3,16
+	vperm	0,1,0,7
+	vxor	8,8,0
+	lvx	0,3,7
+	addi	3,3,16
+	mtctr	9
+
+Ltweak_xts_dec:
+	vperm	1,0,1,7
+	.long	0x11080D08
+	lvx	1,3,7
+	addi	3,3,16
+	vperm	0,1,0,7
+	.long	0x11080508
+	lvx	0,3,7
+	addi	3,3,16
+	bc	16,0,Ltweak_xts_dec
+
+	vperm	1,0,1,7
+	.long	0x11080D08
+	lvx	1,3,7
+	vperm	0,1,0,7
+	.long	0x11080509
+
+	li	8,0
+	b	Lxts_dec
+
+Lxts_dec_no_key2:
+	neg	3,5
+	andi.	3,3,15
+	add	5,5,3
+
+
+Lxts_dec:
+	lvx	4,0,10
+	addi	10,10,16
+
+	lvsr	7,0,6
+	lwz	9,240(6)
+	srwi	9,9,1
+	subi	9,9,1
+	li	3,16
+
+	vslb	10,9,9
+	vor	10,10,9
+	vspltisb	11,1
+	vsldoi	10,10,11,15
+
+	cmpldi	5,96
+	bge	_aesp8_xts_decrypt6x
+
+	lvx	0,0,6
+	lvx	1,3,6
+	addi	3,3,16
+	vperm	2,2,4,5
+	vperm	0,1,0,7
+	vxor	2,2,8
+	vxor	2,2,0
+	lvx	0,3,6
+	addi	3,3,16
+	mtctr	9
+
+	cmpldi	5,16
+	blt	Ltail_xts_dec
+
+
+.align	5
+Loop_xts_dec:
+	vperm	1,0,1,7
+	.long	0x10420D48
+	lvx	1,3,6
+	addi	3,3,16
+	vperm	0,1,0,7
+	.long	0x10420548
+	lvx	0,3,6
+	addi	3,3,16
+	bc	16,0,Loop_xts_dec
+
+	vperm	1,0,1,7
+	.long	0x10420D48
+	lvx	1,3,6
+	li	3,16
+	vperm	0,1,0,7
+	vxor	0,0,8
+	.long	0x10620549
+
+	vperm	11,3,3,6
+
+	.long	0x7D602799
+
+	addi	4,4,16
+
+	subic.	5,5,16
+	beq	Lxts_dec_done
+
+	vor	2,4,4
+	lvx	4,0,10
+	addi	10,10,16
+	lvx	0,0,6
+	lvx	1,3,6
+	addi	3,3,16
+
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vand	11,11,10
+	vxor	8,8,11
+
+	vperm	2,2,4,5
+	vperm	0,1,0,7
+	vxor	2,2,8
+	vxor	2,2,0
+	lvx	0,3,6
+	addi	3,3,16
+
+	mtctr	9
+	cmpldi	5,16
+	bge	Loop_xts_dec
+
+Ltail_xts_dec:
+	vsrab	11,8,9
+	vaddubm	12,8,8
+	vsldoi	11,11,11,15
+	vand	11,11,10
+	vxor	12,12,11
+
+	subi	10,10,16
+	add	10,10,5
+
+	vxor	2,2,8
+	vxor	2,2,12
+
+Loop_xts_dec_short:
+	vperm	1,0,1,7
+	.long	0x10420D48
+	lvx	1,3,6
+	addi	3,3,16
+	vperm	0,1,0,7
+	.long	0x10420548
+	lvx	0,3,6
+	addi	3,3,16
+	bc	16,0,Loop_xts_dec_short
+
+	vperm	1,0,1,7
+	.long	0x10420D48
+	lvx	1,3,6
+	li	3,16
+	vperm	0,1,0,7
+	vxor	0,0,12
+	.long	0x10620549
+
+	vperm	11,3,3,6
+
+	.long	0x7D602799
+
+
+	vor	2,4,4
+	lvx	4,0,10
+
+	lvx	0,0,6
+	lvx	1,3,6
+	addi	3,3,16
+	vperm	2,2,4,5
+	vperm	0,1,0,7
+
+	lvsr	5,0,5
+	vxor	4,4,4
+	vspltisb	11,-1
+	vperm	4,4,11,5
+	vsel	2,2,3,4
+
+	vxor	0,0,8
+	vxor	2,2,0
+	lvx	0,3,6
+	addi	3,3,16
+
+	subi	11,4,1
+	mtctr	5
+	li	5,16
+Loop_xts_dec_steal:
+	lbzu	0,1(11)
+	stb	0,16(11)
+	bc	16,0,Loop_xts_dec_steal
+
+	mtctr	9
+	b	Loop_xts_dec
+
+Lxts_dec_done:
+	cmpldi	8,0
+	beq	Lxts_dec_ret
+
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vand	11,11,10
+	vxor	8,8,11
+
+	vperm	8,8,8,6
+	.long	0x7D004799
+
+Lxts_dec_ret:
+	mtspr	256,12
+	li	3,0
+	blr	
+.long	0
+.byte	0,12,0x04,0,0x80,6,6,0
+.long	0
+
+.align	5
+_aesp8_xts_encrypt6x:
+	stdu	1,-448(1)
+	mflr	11
+	li	7,207
+	li	3,223
+	std	11,464(1)
+	stvx	20,7,1
+	addi	7,7,32
+	stvx	21,3,1
+	addi	3,3,32
+	stvx	22,7,1
+	addi	7,7,32
+	stvx	23,3,1
+	addi	3,3,32
+	stvx	24,7,1
+	addi	7,7,32
+	stvx	25,3,1
+	addi	3,3,32
+	stvx	26,7,1
+	addi	7,7,32
+	stvx	27,3,1
+	addi	3,3,32
+	stvx	28,7,1
+	addi	7,7,32
+	stvx	29,3,1
+	addi	3,3,32
+	stvx	30,7,1
+	stvx	31,3,1
+	li	0,-1
+	stw	12,396(1)
+	li	3,0x10
+	std	26,400(1)
+	li	26,0x20
+	std	27,408(1)
+	li	27,0x30
+	std	28,416(1)
+	li	28,0x40
+	std	29,424(1)
+	li	29,0x50
+	std	30,432(1)
+	li	30,0x60
+	std	31,440(1)
+	li	31,0x70
+	mtspr	256,0
+
+	subi	9,9,3
+
+	lvx	23,0,6
+	lvx	30,3,6
+	addi	6,6,0x20
+	lvx	31,0,6
+	vperm	23,30,23,7
+	addi	7,1,64+15
+	mtctr	9
+
+Load_xts_enc_key:
+	vperm	24,31,30,7
+	lvx	30,3,6
+	addi	6,6,0x20
+	stvx	24,0,7
+	vperm	25,30,31,7
+	lvx	31,0,6
+	stvx	25,3,7
+	addi	7,7,0x20
+	bc	16,0,Load_xts_enc_key
+
+	lvx	26,3,6
+	vperm	24,31,30,7
+	lvx	27,26,6
+	stvx	24,0,7
+	vperm	25,26,31,7
+	lvx	28,27,6
+	stvx	25,3,7
+	addi	7,1,64+15
+	vperm	26,27,26,7
+	lvx	29,28,6
+	vperm	27,28,27,7
+	lvx	30,29,6
+	vperm	28,29,28,7
+	lvx	31,30,6
+	vperm	29,30,29,7
+	lvx	22,31,6
+	vperm	30,31,30,7
+	lvx	24,0,7
+	vperm	31,22,31,7
+	lvx	25,3,7
+
+	vperm	0,2,4,5
+	subi	10,10,31
+	vxor	17,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vand	11,11,10
+	vxor	7,0,17
+	vxor	8,8,11
+
+	.long	0x7C235699
+	vxor	18,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	1,1,1,6
+	vand	11,11,10
+	vxor	12,1,18
+	vxor	8,8,11
+
+	.long	0x7C5A5699
+	andi.	31,5,15
+	vxor	19,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	2,2,2,6
+	vand	11,11,10
+	vxor	13,2,19
+	vxor	8,8,11
+
+	.long	0x7C7B5699
+	sub	5,5,31
+	vxor	20,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	3,3,3,6
+	vand	11,11,10
+	vxor	14,3,20
+	vxor	8,8,11
+
+	.long	0x7C9C5699
+	subi	5,5,0x60
+	vxor	21,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	4,4,4,6
+	vand	11,11,10
+	vxor	15,4,21
+	vxor	8,8,11
+
+	.long	0x7CBD5699
+	addi	10,10,0x60
+	vxor	22,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	5,5,5,6
+	vand	11,11,10
+	vxor	16,5,22
+	vxor	8,8,11
+
+	vxor	31,31,23
+	mtctr	9
+	b	Loop_xts_enc6x
+
+.align	5
+Loop_xts_enc6x:
+	.long	0x10E7C508
+	.long	0x118CC508
+	.long	0x11ADC508
+	.long	0x11CEC508
+	.long	0x11EFC508
+	.long	0x1210C508
+	lvx	24,26,7
+	addi	7,7,0x20
+
+	.long	0x10E7CD08
+	.long	0x118CCD08
+	.long	0x11ADCD08
+	.long	0x11CECD08
+	.long	0x11EFCD08
+	.long	0x1210CD08
+	lvx	25,3,7
+	bc	16,0,Loop_xts_enc6x
+
+	subic	5,5,96
+	vxor	0,17,31
+	.long	0x10E7C508
+	.long	0x118CC508
+	vsrab	11,8,9
+	vxor	17,8,23
+	vaddubm	8,8,8
+	.long	0x11ADC508
+	.long	0x11CEC508
+	vsldoi	11,11,11,15
+	.long	0x11EFC508
+	.long	0x1210C508
+
+	subfe.	0,0,0
+	vand	11,11,10
+	.long	0x10E7CD08
+	.long	0x118CCD08
+	vxor	8,8,11
+	.long	0x11ADCD08
+	.long	0x11CECD08
+	vxor	1,18,31
+	vsrab	11,8,9
+	vxor	18,8,23
+	.long	0x11EFCD08
+	.long	0x1210CD08
+
+	and	0,0,5
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	.long	0x10E7D508
+	.long	0x118CD508
+	vand	11,11,10
+	.long	0x11ADD508
+	.long	0x11CED508
+	vxor	8,8,11
+	.long	0x11EFD508
+	.long	0x1210D508
+
+	add	10,10,0
+
+
+
+	vxor	2,19,31
+	vsrab	11,8,9
+	vxor	19,8,23
+	vaddubm	8,8,8
+	.long	0x10E7DD08
+	.long	0x118CDD08
+	vsldoi	11,11,11,15
+	.long	0x11ADDD08
+	.long	0x11CEDD08
+	vand	11,11,10
+	.long	0x11EFDD08
+	.long	0x1210DD08
+
+	addi	7,1,64+15
+	vxor	8,8,11
+	.long	0x10E7E508
+	.long	0x118CE508
+	vxor	3,20,31
+	vsrab	11,8,9
+	vxor	20,8,23
+	.long	0x11ADE508
+	.long	0x11CEE508
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	.long	0x11EFE508
+	.long	0x1210E508
+	lvx	24,0,7
+	vand	11,11,10
+
+	.long	0x10E7ED08
+	.long	0x118CED08
+	vxor	8,8,11
+	.long	0x11ADED08
+	.long	0x11CEED08
+	vxor	4,21,31
+	vsrab	11,8,9
+	vxor	21,8,23
+	.long	0x11EFED08
+	.long	0x1210ED08
+	lvx	25,3,7
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+
+	.long	0x10E7F508
+	.long	0x118CF508
+	vand	11,11,10
+	.long	0x11ADF508
+	.long	0x11CEF508
+	vxor	8,8,11
+	.long	0x11EFF508
+	.long	0x1210F508
+	vxor	5,22,31
+	vsrab	11,8,9
+	vxor	22,8,23
+
+	.long	0x10E70509
+	.long	0x7C005699
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	.long	0x118C0D09
+	.long	0x7C235699
+	.long	0x11AD1509
+	vperm	0,0,0,6
+	.long	0x7C5A5699
+	vand	11,11,10
+	.long	0x11CE1D09
+	vperm	1,1,1,6
+	.long	0x7C7B5699
+	.long	0x11EF2509
+	vperm	2,2,2,6
+	.long	0x7C9C5699
+	vxor	8,8,11
+	.long	0x11702D09
+
+	vperm	3,3,3,6
+	.long	0x7CBD5699
+	addi	10,10,0x60
+	vperm	4,4,4,6
+	vperm	5,5,5,6
+
+	vperm	7,7,7,6
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vxor	7,0,17
+	vperm	13,13,13,6
+	.long	0x7D832799
+	vxor	12,1,18
+	vperm	14,14,14,6
+	.long	0x7DBA2799
+	vxor	13,2,19
+	vperm	15,15,15,6
+	.long	0x7DDB2799
+	vxor	14,3,20
+	vperm	16,11,11,6
+	.long	0x7DFC2799
+	vxor	15,4,21
+	.long	0x7E1D2799
+
+	vxor	16,5,22
+	addi	4,4,0x60
+
+	mtctr	9
+	beq	Loop_xts_enc6x
+
+	addic.	5,5,0x60
+	beq	Lxts_enc6x_zero
+	cmpwi	5,0x20
+	blt	Lxts_enc6x_one
+	nop	
+	beq	Lxts_enc6x_two
+	cmpwi	5,0x40
+	blt	Lxts_enc6x_three
+	nop	
+	beq	Lxts_enc6x_four
+
+Lxts_enc6x_five:
+	vxor	7,1,17
+	vxor	12,2,18
+	vxor	13,3,19
+	vxor	14,4,20
+	vxor	15,5,21
+
+	bl	_aesp8_xts_enc5x
+
+	vperm	7,7,7,6
+	vor	17,22,22
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vperm	13,13,13,6
+	.long	0x7D832799
+	vperm	14,14,14,6
+	.long	0x7DBA2799
+	vxor	11,15,22
+	vperm	15,15,15,6
+	.long	0x7DDB2799
+	.long	0x7DFC2799
+	addi	4,4,0x50
+	bne	Lxts_enc6x_steal
+	b	Lxts_enc6x_done
+
+.align	4
+Lxts_enc6x_four:
+	vxor	7,2,17
+	vxor	12,3,18
+	vxor	13,4,19
+	vxor	14,5,20
+	vxor	15,15,15
+
+	bl	_aesp8_xts_enc5x
+
+	vperm	7,7,7,6
+	vor	17,21,21
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vperm	13,13,13,6
+	.long	0x7D832799
+	vxor	11,14,21
+	vperm	14,14,14,6
+	.long	0x7DBA2799
+	.long	0x7DDB2799
+	addi	4,4,0x40
+	bne	Lxts_enc6x_steal
+	b	Lxts_enc6x_done
+
+.align	4
+Lxts_enc6x_three:
+	vxor	7,3,17
+	vxor	12,4,18
+	vxor	13,5,19
+	vxor	14,14,14
+	vxor	15,15,15
+
+	bl	_aesp8_xts_enc5x
+
+	vperm	7,7,7,6
+	vor	17,20,20
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vxor	11,13,20
+	vperm	13,13,13,6
+	.long	0x7D832799
+	.long	0x7DBA2799
+	addi	4,4,0x30
+	bne	Lxts_enc6x_steal
+	b	Lxts_enc6x_done
+
+.align	4
+Lxts_enc6x_two:
+	vxor	7,4,17
+	vxor	12,5,18
+	vxor	13,13,13
+	vxor	14,14,14
+	vxor	15,15,15
+
+	bl	_aesp8_xts_enc5x
+
+	vperm	7,7,7,6
+	vor	17,19,19
+	vxor	11,12,19
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	.long	0x7D832799
+	addi	4,4,0x20
+	bne	Lxts_enc6x_steal
+	b	Lxts_enc6x_done
+
+.align	4
+Lxts_enc6x_one:
+	vxor	7,5,17
+	nop	
+Loop_xts_enc1x:
+	.long	0x10E7C508
+	lvx	24,26,7
+	addi	7,7,0x20
+
+	.long	0x10E7CD08
+	lvx	25,3,7
+	bc	16,0,Loop_xts_enc1x
+
+	add	10,10,31
+	cmpwi	31,0
+	.long	0x10E7C508
+
+	subi	10,10,16
+	.long	0x10E7CD08
+
+	lvsr	5,0,31
+	.long	0x10E7D508
+
+	.long	0x7C005699
+	.long	0x10E7DD08
+
+	addi	7,1,64+15
+	.long	0x10E7E508
+	lvx	24,0,7
+
+	.long	0x10E7ED08
+	lvx	25,3,7
+	vxor	17,17,31
+
+	vperm	0,0,0,6
+	.long	0x10E7F508
+
+	vperm	0,0,0,5
+	.long	0x10E78D09
+
+	vor	17,18,18
+	vxor	11,7,18
+	vperm	7,7,7,6
+	.long	0x7CE02799
+	addi	4,4,0x10
+	bne	Lxts_enc6x_steal
+	b	Lxts_enc6x_done
+
+.align	4
+Lxts_enc6x_zero:
+	cmpwi	31,0
+	beq	Lxts_enc6x_done
+
+	add	10,10,31
+	subi	10,10,16
+	.long	0x7C005699
+	lvsr	5,0,31
+	vperm	0,0,0,6
+	vperm	0,0,0,5
+	vxor	11,11,17
+Lxts_enc6x_steal:
+	vxor	0,0,17
+	vxor	7,7,7
+	vspltisb	12,-1
+	vperm	7,7,12,5
+	vsel	7,0,11,7
+
+	subi	30,4,17
+	subi	4,4,16
+	mtctr	31
+Loop_xts_enc6x_steal:
+	lbzu	0,1(30)
+	stb	0,16(30)
+	bc	16,0,Loop_xts_enc6x_steal
+
+	li	31,0
+	mtctr	9
+	b	Loop_xts_enc1x
+
+.align	4
+Lxts_enc6x_done:
+	cmpldi	8,0
+	beq	Lxts_enc6x_ret
+
+	vxor	8,17,23
+	vperm	8,8,8,6
+	.long	0x7D004799
+
+Lxts_enc6x_ret:
+	mtlr	11
+	li	10,79
+	li	11,95
+	stvx	9,10,1
+	addi	10,10,32
+	stvx	9,11,1
+	addi	11,11,32
+	stvx	9,10,1
+	addi	10,10,32
+	stvx	9,11,1
+	addi	11,11,32
+	stvx	9,10,1
+	addi	10,10,32
+	stvx	9,11,1
+	addi	11,11,32
+	stvx	9,10,1
+	addi	10,10,32
+	stvx	9,11,1
+	addi	11,11,32
+
+	mtspr	256,12
+	lvx	20,10,1
+	addi	10,10,32
+	lvx	21,11,1
+	addi	11,11,32
+	lvx	22,10,1
+	addi	10,10,32
+	lvx	23,11,1
+	addi	11,11,32
+	lvx	24,10,1
+	addi	10,10,32
+	lvx	25,11,1
+	addi	11,11,32
+	lvx	26,10,1
+	addi	10,10,32
+	lvx	27,11,1
+	addi	11,11,32
+	lvx	28,10,1
+	addi	10,10,32
+	lvx	29,11,1
+	addi	11,11,32
+	lvx	30,10,1
+	lvx	31,11,1
+	ld	26,400(1)
+	ld	27,408(1)
+	ld	28,416(1)
+	ld	29,424(1)
+	ld	30,432(1)
+	ld	31,440(1)
+	addi	1,1,448
+	blr	
+.long	0
+.byte	0,12,0x04,1,0x80,6,6,0
+.long	0
+
+.align	5
+_aesp8_xts_enc5x:
+	.long	0x10E7C508
+	.long	0x118CC508
+	.long	0x11ADC508
+	.long	0x11CEC508
+	.long	0x11EFC508
+	lvx	24,26,7
+	addi	7,7,0x20
+
+	.long	0x10E7CD08
+	.long	0x118CCD08
+	.long	0x11ADCD08
+	.long	0x11CECD08
+	.long	0x11EFCD08
+	lvx	25,3,7
+	bc	16,0,_aesp8_xts_enc5x
+
+	add	10,10,31
+	cmpwi	31,0
+	.long	0x10E7C508
+	.long	0x118CC508
+	.long	0x11ADC508
+	.long	0x11CEC508
+	.long	0x11EFC508
+
+	subi	10,10,16
+	.long	0x10E7CD08
+	.long	0x118CCD08
+	.long	0x11ADCD08
+	.long	0x11CECD08
+	.long	0x11EFCD08
+	vxor	17,17,31
+
+	.long	0x10E7D508
+	lvsr	5,0,31
+	.long	0x118CD508
+	.long	0x11ADD508
+	.long	0x11CED508
+	.long	0x11EFD508
+	vxor	1,18,31
+
+	.long	0x10E7DD08
+	.long	0x7C005699
+	.long	0x118CDD08
+	.long	0x11ADDD08
+	.long	0x11CEDD08
+	.long	0x11EFDD08
+	vxor	2,19,31
+
+	addi	7,1,64+15
+	.long	0x10E7E508
+	.long	0x118CE508
+	.long	0x11ADE508
+	.long	0x11CEE508
+	.long	0x11EFE508
+	lvx	24,0,7
+	vxor	3,20,31
+
+	.long	0x10E7ED08
+	vperm	0,0,0,6
+	.long	0x118CED08
+	.long	0x11ADED08
+	.long	0x11CEED08
+	.long	0x11EFED08
+	lvx	25,3,7
+	vxor	4,21,31
+
+	.long	0x10E7F508
+	vperm	0,0,0,5
+	.long	0x118CF508
+	.long	0x11ADF508
+	.long	0x11CEF508
+	.long	0x11EFF508
+
+	.long	0x10E78D09
+	.long	0x118C0D09
+	.long	0x11AD1509
+	.long	0x11CE1D09
+	.long	0x11EF2509
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,0,0
+
+.align	5
+_aesp8_xts_decrypt6x:
+	stdu	1,-448(1)
+	mflr	11
+	li	7,207
+	li	3,223
+	std	11,464(1)
+	stvx	20,7,1
+	addi	7,7,32
+	stvx	21,3,1
+	addi	3,3,32
+	stvx	22,7,1
+	addi	7,7,32
+	stvx	23,3,1
+	addi	3,3,32
+	stvx	24,7,1
+	addi	7,7,32
+	stvx	25,3,1
+	addi	3,3,32
+	stvx	26,7,1
+	addi	7,7,32
+	stvx	27,3,1
+	addi	3,3,32
+	stvx	28,7,1
+	addi	7,7,32
+	stvx	29,3,1
+	addi	3,3,32
+	stvx	30,7,1
+	stvx	31,3,1
+	li	0,-1
+	stw	12,396(1)
+	li	3,0x10
+	std	26,400(1)
+	li	26,0x20
+	std	27,408(1)
+	li	27,0x30
+	std	28,416(1)
+	li	28,0x40
+	std	29,424(1)
+	li	29,0x50
+	std	30,432(1)
+	li	30,0x60
+	std	31,440(1)
+	li	31,0x70
+	mtspr	256,0
+
+	subi	9,9,3
+
+	lvx	23,0,6
+	lvx	30,3,6
+	addi	6,6,0x20
+	lvx	31,0,6
+	vperm	23,30,23,7
+	addi	7,1,64+15
+	mtctr	9
+
+Load_xts_dec_key:
+	vperm	24,31,30,7
+	lvx	30,3,6
+	addi	6,6,0x20
+	stvx	24,0,7
+	vperm	25,30,31,7
+	lvx	31,0,6
+	stvx	25,3,7
+	addi	7,7,0x20
+	bc	16,0,Load_xts_dec_key
+
+	lvx	26,3,6
+	vperm	24,31,30,7
+	lvx	27,26,6
+	stvx	24,0,7
+	vperm	25,26,31,7
+	lvx	28,27,6
+	stvx	25,3,7
+	addi	7,1,64+15
+	vperm	26,27,26,7
+	lvx	29,28,6
+	vperm	27,28,27,7
+	lvx	30,29,6
+	vperm	28,29,28,7
+	lvx	31,30,6
+	vperm	29,30,29,7
+	lvx	22,31,6
+	vperm	30,31,30,7
+	lvx	24,0,7
+	vperm	31,22,31,7
+	lvx	25,3,7
+
+	vperm	0,2,4,5
+	subi	10,10,31
+	vxor	17,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vand	11,11,10
+	vxor	7,0,17
+	vxor	8,8,11
+
+	.long	0x7C235699
+	vxor	18,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	1,1,1,6
+	vand	11,11,10
+	vxor	12,1,18
+	vxor	8,8,11
+
+	.long	0x7C5A5699
+	andi.	31,5,15
+	vxor	19,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	2,2,2,6
+	vand	11,11,10
+	vxor	13,2,19
+	vxor	8,8,11
+
+	.long	0x7C7B5699
+	sub	5,5,31
+	vxor	20,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	3,3,3,6
+	vand	11,11,10
+	vxor	14,3,20
+	vxor	8,8,11
+
+	.long	0x7C9C5699
+	subi	5,5,0x60
+	vxor	21,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	4,4,4,6
+	vand	11,11,10
+	vxor	15,4,21
+	vxor	8,8,11
+
+	.long	0x7CBD5699
+	addi	10,10,0x60
+	vxor	22,8,23
+	vsrab	11,8,9
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	vperm	5,5,5,6
+	vand	11,11,10
+	vxor	16,5,22
+	vxor	8,8,11
+
+	vxor	31,31,23
+	mtctr	9
+	b	Loop_xts_dec6x
+
+.align	5
+Loop_xts_dec6x:
+	.long	0x10E7C548
+	.long	0x118CC548
+	.long	0x11ADC548
+	.long	0x11CEC548
+	.long	0x11EFC548
+	.long	0x1210C548
+	lvx	24,26,7
+	addi	7,7,0x20
+
+	.long	0x10E7CD48
+	.long	0x118CCD48
+	.long	0x11ADCD48
+	.long	0x11CECD48
+	.long	0x11EFCD48
+	.long	0x1210CD48
+	lvx	25,3,7
+	bc	16,0,Loop_xts_dec6x
+
+	subic	5,5,96
+	vxor	0,17,31
+	.long	0x10E7C548
+	.long	0x118CC548
+	vsrab	11,8,9
+	vxor	17,8,23
+	vaddubm	8,8,8
+	.long	0x11ADC548
+	.long	0x11CEC548
+	vsldoi	11,11,11,15
+	.long	0x11EFC548
+	.long	0x1210C548
+
+	subfe.	0,0,0
+	vand	11,11,10
+	.long	0x10E7CD48
+	.long	0x118CCD48
+	vxor	8,8,11
+	.long	0x11ADCD48
+	.long	0x11CECD48
+	vxor	1,18,31
+	vsrab	11,8,9
+	vxor	18,8,23
+	.long	0x11EFCD48
+	.long	0x1210CD48
+
+	and	0,0,5
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	.long	0x10E7D548
+	.long	0x118CD548
+	vand	11,11,10
+	.long	0x11ADD548
+	.long	0x11CED548
+	vxor	8,8,11
+	.long	0x11EFD548
+	.long	0x1210D548
+
+	add	10,10,0
+
+
+
+	vxor	2,19,31
+	vsrab	11,8,9
+	vxor	19,8,23
+	vaddubm	8,8,8
+	.long	0x10E7DD48
+	.long	0x118CDD48
+	vsldoi	11,11,11,15
+	.long	0x11ADDD48
+	.long	0x11CEDD48
+	vand	11,11,10
+	.long	0x11EFDD48
+	.long	0x1210DD48
+
+	addi	7,1,64+15
+	vxor	8,8,11
+	.long	0x10E7E548
+	.long	0x118CE548
+	vxor	3,20,31
+	vsrab	11,8,9
+	vxor	20,8,23
+	.long	0x11ADE548
+	.long	0x11CEE548
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	.long	0x11EFE548
+	.long	0x1210E548
+	lvx	24,0,7
+	vand	11,11,10
+
+	.long	0x10E7ED48
+	.long	0x118CED48
+	vxor	8,8,11
+	.long	0x11ADED48
+	.long	0x11CEED48
+	vxor	4,21,31
+	vsrab	11,8,9
+	vxor	21,8,23
+	.long	0x11EFED48
+	.long	0x1210ED48
+	lvx	25,3,7
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+
+	.long	0x10E7F548
+	.long	0x118CF548
+	vand	11,11,10
+	.long	0x11ADF548
+	.long	0x11CEF548
+	vxor	8,8,11
+	.long	0x11EFF548
+	.long	0x1210F548
+	vxor	5,22,31
+	vsrab	11,8,9
+	vxor	22,8,23
+
+	.long	0x10E70549
+	.long	0x7C005699
+	vaddubm	8,8,8
+	vsldoi	11,11,11,15
+	.long	0x118C0D49
+	.long	0x7C235699
+	.long	0x11AD1549
+	vperm	0,0,0,6
+	.long	0x7C5A5699
+	vand	11,11,10
+	.long	0x11CE1D49
+	vperm	1,1,1,6
+	.long	0x7C7B5699
+	.long	0x11EF2549
+	vperm	2,2,2,6
+	.long	0x7C9C5699
+	vxor	8,8,11
+	.long	0x12102D49
+	vperm	3,3,3,6
+	.long	0x7CBD5699
+	addi	10,10,0x60
+	vperm	4,4,4,6
+	vperm	5,5,5,6
+
+	vperm	7,7,7,6
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vxor	7,0,17
+	vperm	13,13,13,6
+	.long	0x7D832799
+	vxor	12,1,18
+	vperm	14,14,14,6
+	.long	0x7DBA2799
+	vxor	13,2,19
+	vperm	15,15,15,6
+	.long	0x7DDB2799
+	vxor	14,3,20
+	vperm	16,16,16,6
+	.long	0x7DFC2799
+	vxor	15,4,21
+	.long	0x7E1D2799
+	vxor	16,5,22
+	addi	4,4,0x60
+
+	mtctr	9
+	beq	Loop_xts_dec6x
+
+	addic.	5,5,0x60
+	beq	Lxts_dec6x_zero
+	cmpwi	5,0x20
+	blt	Lxts_dec6x_one
+	nop	
+	beq	Lxts_dec6x_two
+	cmpwi	5,0x40
+	blt	Lxts_dec6x_three
+	nop	
+	beq	Lxts_dec6x_four
+
+Lxts_dec6x_five:
+	vxor	7,1,17
+	vxor	12,2,18
+	vxor	13,3,19
+	vxor	14,4,20
+	vxor	15,5,21
+
+	bl	_aesp8_xts_dec5x
+
+	vperm	7,7,7,6
+	vor	17,22,22
+	vxor	18,8,23
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vxor	7,0,18
+	vperm	13,13,13,6
+	.long	0x7D832799
+	vperm	14,14,14,6
+	.long	0x7DBA2799
+	vperm	15,15,15,6
+	.long	0x7DDB2799
+	.long	0x7DFC2799
+	addi	4,4,0x50
+	bne	Lxts_dec6x_steal
+	b	Lxts_dec6x_done
+
+.align	4
+Lxts_dec6x_four:
+	vxor	7,2,17
+	vxor	12,3,18
+	vxor	13,4,19
+	vxor	14,5,20
+	vxor	15,15,15
+
+	bl	_aesp8_xts_dec5x
+
+	vperm	7,7,7,6
+	vor	17,21,21
+	vor	18,22,22
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vxor	7,0,22
+	vperm	13,13,13,6
+	.long	0x7D832799
+	vperm	14,14,14,6
+	.long	0x7DBA2799
+	.long	0x7DDB2799
+	addi	4,4,0x40
+	bne	Lxts_dec6x_steal
+	b	Lxts_dec6x_done
+
+.align	4
+Lxts_dec6x_three:
+	vxor	7,3,17
+	vxor	12,4,18
+	vxor	13,5,19
+	vxor	14,14,14
+	vxor	15,15,15
+
+	bl	_aesp8_xts_dec5x
+
+	vperm	7,7,7,6
+	vor	17,20,20
+	vor	18,21,21
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vxor	7,0,21
+	vperm	13,13,13,6
+	.long	0x7D832799
+	.long	0x7DBA2799
+	addi	4,4,0x30
+	bne	Lxts_dec6x_steal
+	b	Lxts_dec6x_done
+
+.align	4
+Lxts_dec6x_two:
+	vxor	7,4,17
+	vxor	12,5,18
+	vxor	13,13,13
+	vxor	14,14,14
+	vxor	15,15,15
+
+	bl	_aesp8_xts_dec5x
+
+	vperm	7,7,7,6
+	vor	17,19,19
+	vor	18,20,20
+	vperm	12,12,12,6
+	.long	0x7CE02799
+	vxor	7,0,20
+	.long	0x7D832799
+	addi	4,4,0x20
+	bne	Lxts_dec6x_steal
+	b	Lxts_dec6x_done
+
+.align	4
+Lxts_dec6x_one:
+	vxor	7,5,17
+	nop	
+Loop_xts_dec1x:
+	.long	0x10E7C548
+	lvx	24,26,7
+	addi	7,7,0x20
+
+	.long	0x10E7CD48
+	lvx	25,3,7
+	bc	16,0,Loop_xts_dec1x
+
+	subi	0,31,1
+	.long	0x10E7C548
+
+	andi.	0,0,16
+	cmpwi	31,0
+	.long	0x10E7CD48
+
+	sub	10,10,0
+	.long	0x10E7D548
+
+	.long	0x7C005699
+	.long	0x10E7DD48
+
+	addi	7,1,64+15
+	.long	0x10E7E548
+	lvx	24,0,7
+
+	.long	0x10E7ED48
+	lvx	25,3,7
+	vxor	17,17,31
+
+	vperm	0,0,0,6
+	.long	0x10E7F548
+
+	mtctr	9
+	.long	0x10E78D49
+
+	vor	17,18,18
+	vor	18,19,19
+	vperm	7,7,7,6
+	.long	0x7CE02799
+	addi	4,4,0x10
+	vxor	7,0,19
+	bne	Lxts_dec6x_steal
+	b	Lxts_dec6x_done
+
+.align	4
+Lxts_dec6x_zero:
+	cmpwi	31,0
+	beq	Lxts_dec6x_done
+
+	.long	0x7C005699
+	vperm	0,0,0,6
+	vxor	7,0,18
+Lxts_dec6x_steal:
+	.long	0x10E7C548
+	lvx	24,26,7
+	addi	7,7,0x20
+
+	.long	0x10E7CD48
+	lvx	25,3,7
+	bc	16,0,Lxts_dec6x_steal
+
+	add	10,10,31
+	.long	0x10E7C548
+
+	cmpwi	31,0
+	.long	0x10E7CD48
+
+	.long	0x7C005699
+	.long	0x10E7D548
+
+	lvsr	5,0,31
+	.long	0x10E7DD48
+
+	addi	7,1,64+15
+	.long	0x10E7E548
+	lvx	24,0,7
+
+	.long	0x10E7ED48
+	lvx	25,3,7
+	vxor	18,18,31
+
+	vperm	0,0,0,6
+	.long	0x10E7F548
+
+	vperm	0,0,0,5
+	.long	0x11679549
+
+	vperm	7,11,11,6
+	.long	0x7CE02799
+
+
+	vxor	7,7,7
+	vspltisb	12,-1
+	vperm	7,7,12,5
+	vsel	7,0,11,7
+	vxor	7,7,17
+
+	subi	30,4,1
+	mtctr	31
+Loop_xts_dec6x_steal:
+	lbzu	0,1(30)
+	stb	0,16(30)
+	bc	16,0,Loop_xts_dec6x_steal
+
+	li	31,0
+	mtctr	9
+	b	Loop_xts_dec1x
+
+.align	4
+Lxts_dec6x_done:
+	cmpldi	8,0
+	beq	Lxts_dec6x_ret
+
+	vxor	8,17,23
+	vperm	8,8,8,6
+	.long	0x7D004799
+
+Lxts_dec6x_ret:
+	mtlr	11
+	li	10,79
+	li	11,95
+	stvx	9,10,1
+	addi	10,10,32
+	stvx	9,11,1
+	addi	11,11,32
+	stvx	9,10,1
+	addi	10,10,32
+	stvx	9,11,1
+	addi	11,11,32
+	stvx	9,10,1
+	addi	10,10,32
+	stvx	9,11,1
+	addi	11,11,32
+	stvx	9,10,1
+	addi	10,10,32
+	stvx	9,11,1
+	addi	11,11,32
+
+	mtspr	256,12
+	lvx	20,10,1
+	addi	10,10,32
+	lvx	21,11,1
+	addi	11,11,32
+	lvx	22,10,1
+	addi	10,10,32
+	lvx	23,11,1
+	addi	11,11,32
+	lvx	24,10,1
+	addi	10,10,32
+	lvx	25,11,1
+	addi	11,11,32
+	lvx	26,10,1
+	addi	10,10,32
+	lvx	27,11,1
+	addi	11,11,32
+	lvx	28,10,1
+	addi	10,10,32
+	lvx	29,11,1
+	addi	11,11,32
+	lvx	30,10,1
+	lvx	31,11,1
+	ld	26,400(1)
+	ld	27,408(1)
+	ld	28,416(1)
+	ld	29,424(1)
+	ld	30,432(1)
+	ld	31,440(1)
+	addi	1,1,448
+	blr	
+.long	0
+.byte	0,12,0x04,1,0x80,6,6,0
+.long	0
+
+.align	5
+_aesp8_xts_dec5x:
+	.long	0x10E7C548
+	.long	0x118CC548
+	.long	0x11ADC548
+	.long	0x11CEC548
+	.long	0x11EFC548
+	lvx	24,26,7
+	addi	7,7,0x20
+
+	.long	0x10E7CD48
+	.long	0x118CCD48
+	.long	0x11ADCD48
+	.long	0x11CECD48
+	.long	0x11EFCD48
+	lvx	25,3,7
+	bc	16,0,_aesp8_xts_dec5x
+
+	subi	0,31,1
+	.long	0x10E7C548
+	.long	0x118CC548
+	.long	0x11ADC548
+	.long	0x11CEC548
+	.long	0x11EFC548
+
+	andi.	0,0,16
+	cmpwi	31,0
+	.long	0x10E7CD48
+	.long	0x118CCD48
+	.long	0x11ADCD48
+	.long	0x11CECD48
+	.long	0x11EFCD48
+	vxor	17,17,31
+
+	sub	10,10,0
+	.long	0x10E7D548
+	.long	0x118CD548
+	.long	0x11ADD548
+	.long	0x11CED548
+	.long	0x11EFD548
+	vxor	1,18,31
+
+	.long	0x10E7DD48
+	.long	0x7C005699
+	.long	0x118CDD48
+	.long	0x11ADDD48
+	.long	0x11CEDD48
+	.long	0x11EFDD48
+	vxor	2,19,31
+
+	addi	7,1,64+15
+	.long	0x10E7E548
+	.long	0x118CE548
+	.long	0x11ADE548
+	.long	0x11CEE548
+	.long	0x11EFE548
+	lvx	24,0,7
+	vxor	3,20,31
+
+	.long	0x10E7ED48
+	vperm	0,0,0,6
+	.long	0x118CED48
+	.long	0x11ADED48
+	.long	0x11CEED48
+	.long	0x11EFED48
+	lvx	25,3,7
+	vxor	4,21,31
+
+	.long	0x10E7F548
+	.long	0x118CF548
+	.long	0x11ADF548
+	.long	0x11CEF548
+	.long	0x11EFF548
+
+	.long	0x10E78D49
+	.long	0x118C0D49
+	.long	0x11AD1549
+	.long	0x11CE1D49
+	.long	0x11EF2549
+	mtctr	9
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,0,0
diff --git a/linux-ppc64le/crypto/modes/ghashp8-ppc.S b/linux-ppc64le/crypto/modes/ghashp8-ppc.S
new file mode 100644
index 0000000..69ae1a5
--- /dev/null
+++ b/linux-ppc64le/crypto/modes/ghashp8-ppc.S
@@ -0,0 +1,565 @@
+.machine	"any"
+
+.text
+
+.globl	gcm_init_p8
+.align	5
+gcm_init_p8:
+	li	0,-4096
+	li	8,0x10
+	mfspr	12,256
+	li	9,0x20
+	mtspr	256,0
+	li	10,0x30
+	.long	0x7D202699
+
+	vspltisb	8,-16
+	vspltisb	5,1
+	vaddubm	8,8,8
+	vxor	4,4,4
+	vor	8,8,5
+	vsldoi	8,8,4,15
+	vsldoi	6,4,5,1
+	vaddubm	8,8,8
+	vspltisb	7,7
+	vor	8,8,6
+	vspltb	6,9,0
+	vsl	9,9,5
+	vsrab	6,6,7
+	vand	6,6,8
+	vxor	3,9,6
+
+	vsldoi	9,3,3,8
+	vsldoi	8,4,8,8
+	vsldoi	11,4,9,8
+	vsldoi	10,9,4,8
+
+	.long	0x7D001F99
+	.long	0x7D681F99
+	li	8,0x40
+	.long	0x7D291F99
+	li	9,0x50
+	.long	0x7D4A1F99
+	li	10,0x60
+
+	.long	0x10035CC8
+	.long	0x10234CC8
+	.long	0x104354C8
+
+	.long	0x10E044C8
+
+	vsldoi	5,1,4,8
+	vsldoi	6,4,1,8
+	vxor	0,0,5
+	vxor	2,2,6
+
+	vsldoi	0,0,0,8
+	vxor	0,0,7
+
+	vsldoi	6,0,0,8
+	.long	0x100044C8
+	vxor	6,6,2
+	vxor	16,0,6
+
+	vsldoi	17,16,16,8
+	vsldoi	19,4,17,8
+	vsldoi	18,17,4,8
+
+	.long	0x7E681F99
+	li	8,0x70
+	.long	0x7E291F99
+	li	9,0x80
+	.long	0x7E4A1F99
+	li	10,0x90
+	.long	0x10039CC8
+	.long	0x11B09CC8
+	.long	0x10238CC8
+	.long	0x11D08CC8
+	.long	0x104394C8
+	.long	0x11F094C8
+
+	.long	0x10E044C8
+	.long	0x114D44C8
+
+	vsldoi	5,1,4,8
+	vsldoi	6,4,1,8
+	vsldoi	11,14,4,8
+	vsldoi	9,4,14,8
+	vxor	0,0,5
+	vxor	2,2,6
+	vxor	13,13,11
+	vxor	15,15,9
+
+	vsldoi	0,0,0,8
+	vsldoi	13,13,13,8
+	vxor	0,0,7
+	vxor	13,13,10
+
+	vsldoi	6,0,0,8
+	vsldoi	9,13,13,8
+	.long	0x100044C8
+	.long	0x11AD44C8
+	vxor	6,6,2
+	vxor	9,9,15
+	vxor	0,0,6
+	vxor	13,13,9
+
+	vsldoi	9,0,0,8
+	vsldoi	17,13,13,8
+	vsldoi	11,4,9,8
+	vsldoi	10,9,4,8
+	vsldoi	19,4,17,8
+	vsldoi	18,17,4,8
+
+	.long	0x7D681F99
+	li	8,0xa0
+	.long	0x7D291F99
+	li	9,0xb0
+	.long	0x7D4A1F99
+	li	10,0xc0
+	.long	0x7E681F99
+	.long	0x7E291F99
+	.long	0x7E4A1F99
+
+	mtspr	256,12
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,2,0
+.long	0
+
+.globl	gcm_gmult_p8
+.align	5
+gcm_gmult_p8:
+	lis	0,0xfff8
+	li	8,0x10
+	mfspr	12,256
+	li	9,0x20
+	mtspr	256,0
+	li	10,0x30
+	.long	0x7C601E99
+
+	.long	0x7D682699
+	lvsl	12,0,0
+	.long	0x7D292699
+	vspltisb	5,0x07
+	.long	0x7D4A2699
+	vxor	12,12,5
+	.long	0x7D002699
+	vperm	3,3,3,12
+	vxor	4,4,4
+
+	.long	0x10035CC8
+	.long	0x10234CC8
+	.long	0x104354C8
+
+	.long	0x10E044C8
+
+	vsldoi	5,1,4,8
+	vsldoi	6,4,1,8
+	vxor	0,0,5
+	vxor	2,2,6
+
+	vsldoi	0,0,0,8
+	vxor	0,0,7
+
+	vsldoi	6,0,0,8
+	.long	0x100044C8
+	vxor	6,6,2
+	vxor	0,0,6
+
+	vperm	0,0,0,12
+	.long	0x7C001F99
+
+	mtspr	256,12
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,2,0
+.long	0
+
+
+.globl	gcm_ghash_p8
+.align	5
+gcm_ghash_p8:
+	li	0,-4096
+	li	8,0x10
+	mfspr	12,256
+	li	9,0x20
+	mtspr	256,0
+	li	10,0x30
+	.long	0x7C001E99
+
+	.long	0x7D682699
+	li	8,0x40
+	lvsl	12,0,0
+	.long	0x7D292699
+	li	9,0x50
+	vspltisb	5,0x07
+	.long	0x7D4A2699
+	li	10,0x60
+	vxor	12,12,5
+	.long	0x7D002699
+	vperm	0,0,0,12
+	vxor	4,4,4
+
+	cmpldi	6,64
+	bge	Lgcm_ghash_p8_4x
+
+	.long	0x7C602E99
+	addi	5,5,16
+	subic.	6,6,16
+	vperm	3,3,3,12
+	vxor	3,3,0
+	beq	Lshort
+
+	.long	0x7E682699
+	li	8,16
+	.long	0x7E292699
+	add	9,5,6
+	.long	0x7E4A2699
+
+
+.align	5
+Loop_2x:
+	.long	0x7E002E99
+	vperm	16,16,16,12
+
+	subic	6,6,32
+	.long	0x10039CC8
+	.long	0x11B05CC8
+	subfe	0,0,0
+	.long	0x10238CC8
+	.long	0x11D04CC8
+	and	0,0,6
+	.long	0x104394C8
+	.long	0x11F054C8
+	add	5,5,0
+
+	vxor	0,0,13
+	vxor	1,1,14
+
+	.long	0x10E044C8
+
+	vsldoi	5,1,4,8
+	vsldoi	6,4,1,8
+	vxor	2,2,15
+	vxor	0,0,5
+	vxor	2,2,6
+
+	vsldoi	0,0,0,8
+	vxor	0,0,7
+	.long	0x7C682E99
+	addi	5,5,32
+
+	vsldoi	6,0,0,8
+	.long	0x100044C8
+	vperm	3,3,3,12
+	vxor	6,6,2
+	vxor	3,3,6
+	vxor	3,3,0
+	cmpld	9,5
+	bgt	Loop_2x
+
+	cmplwi	6,0
+	bne	Leven
+
+Lshort:
+	.long	0x10035CC8
+	.long	0x10234CC8
+	.long	0x104354C8
+
+	.long	0x10E044C8
+
+	vsldoi	5,1,4,8
+	vsldoi	6,4,1,8
+	vxor	0,0,5
+	vxor	2,2,6
+
+	vsldoi	0,0,0,8
+	vxor	0,0,7
+
+	vsldoi	6,0,0,8
+	.long	0x100044C8
+	vxor	6,6,2
+
+Leven:
+	vxor	0,0,6
+	vperm	0,0,0,12
+	.long	0x7C001F99
+
+	mtspr	256,12
+	blr	
+.long	0
+.byte	0,12,0x14,0,0,0,4,0
+.long	0
+.align	5
+.gcm_ghash_p8_4x:
+Lgcm_ghash_p8_4x:
+	stdu	1,-256(1)
+	li	10,63
+	li	11,79
+	stvx	20,10,1
+	addi	10,10,32
+	stvx	21,11,1
+	addi	11,11,32
+	stvx	22,10,1
+	addi	10,10,32
+	stvx	23,11,1
+	addi	11,11,32
+	stvx	24,10,1
+	addi	10,10,32
+	stvx	25,11,1
+	addi	11,11,32
+	stvx	26,10,1
+	addi	10,10,32
+	stvx	27,11,1
+	addi	11,11,32
+	stvx	28,10,1
+	addi	10,10,32
+	stvx	29,11,1
+	addi	11,11,32
+	stvx	30,10,1
+	li	10,0x60
+	stvx	31,11,1
+	li	0,-1
+	stw	12,252(1)
+	mtspr	256,0
+
+	lvsl	5,0,8
+
+	li	8,0x70
+	.long	0x7E292699
+	li	9,0x80
+	vspltisb	6,8
+
+	li	10,0x90
+	.long	0x7EE82699
+	li	8,0xa0
+	.long	0x7F092699
+	li	9,0xb0
+	.long	0x7F2A2699
+	li	10,0xc0
+	.long	0x7FA82699
+	li	8,0x10
+	.long	0x7FC92699
+	li	9,0x20
+	.long	0x7FEA2699
+	li	10,0x30
+
+	vsldoi	7,4,6,8
+	vaddubm	18,5,7
+	vaddubm	19,6,18
+
+	srdi	6,6,4
+
+	.long	0x7C602E99
+	.long	0x7E082E99
+	subic.	6,6,8
+	.long	0x7EC92E99
+	.long	0x7F8A2E99
+	addi	5,5,0x40
+	vperm	3,3,3,12
+	vperm	16,16,16,12
+	vperm	22,22,22,12
+	vperm	28,28,28,12
+
+	vxor	2,3,0
+
+	.long	0x11B0BCC8
+	.long	0x11D0C4C8
+	.long	0x11F0CCC8
+
+	vperm	11,17,9,18
+	vperm	5,22,28,19
+	vperm	10,17,9,19
+	vperm	6,22,28,18
+	.long	0x12B68CC8
+	.long	0x12855CC8
+	.long	0x137C4CC8
+	.long	0x134654C8
+
+	vxor	21,21,14
+	vxor	20,20,13
+	vxor	27,27,21
+	vxor	26,26,15
+
+	blt	Ltail_4x
+
+Loop_4x:
+	.long	0x7C602E99
+	.long	0x7E082E99
+	subic.	6,6,4
+	.long	0x7EC92E99
+	.long	0x7F8A2E99
+	addi	5,5,0x40
+	vperm	16,16,16,12
+	vperm	22,22,22,12
+	vperm	28,28,28,12
+	vperm	3,3,3,12
+
+	.long	0x1002ECC8
+	.long	0x1022F4C8
+	.long	0x1042FCC8
+	.long	0x11B0BCC8
+	.long	0x11D0C4C8
+	.long	0x11F0CCC8
+
+	vxor	0,0,20
+	vxor	1,1,27
+	vxor	2,2,26
+	vperm	5,22,28,19
+	vperm	6,22,28,18
+
+	.long	0x10E044C8
+	.long	0x12855CC8
+	.long	0x134654C8
+
+	vsldoi	5,1,4,8
+	vsldoi	6,4,1,8
+	vxor	0,0,5
+	vxor	2,2,6
+
+	vsldoi	0,0,0,8
+	vxor	0,0,7
+
+	vsldoi	6,0,0,8
+	.long	0x12B68CC8
+	.long	0x137C4CC8
+	.long	0x100044C8
+
+	vxor	20,20,13
+	vxor	26,26,15
+	vxor	2,2,3
+	vxor	21,21,14
+	vxor	2,2,6
+	vxor	27,27,21
+	vxor	2,2,0
+	bge	Loop_4x
+
+Ltail_4x:
+	.long	0x1002ECC8
+	.long	0x1022F4C8
+	.long	0x1042FCC8
+
+	vxor	0,0,20
+	vxor	1,1,27
+
+	.long	0x10E044C8
+
+	vsldoi	5,1,4,8
+	vsldoi	6,4,1,8
+	vxor	2,2,26
+	vxor	0,0,5
+	vxor	2,2,6
+
+	vsldoi	0,0,0,8
+	vxor	0,0,7
+
+	vsldoi	6,0,0,8
+	.long	0x100044C8
+	vxor	6,6,2
+	vxor	0,0,6
+
+	addic.	6,6,4
+	beq	Ldone_4x
+
+	.long	0x7C602E99
+	cmpldi	6,2
+	li	6,-4
+	blt	Lone
+	.long	0x7E082E99
+	beq	Ltwo
+
+Lthree:
+	.long	0x7EC92E99
+	vperm	3,3,3,12
+	vperm	16,16,16,12
+	vperm	22,22,22,12
+
+	vxor	2,3,0
+	vor	29,23,23
+	vor	30, 24, 24
+	vor	31,25,25
+
+	vperm	5,16,22,19
+	vperm	6,16,22,18
+	.long	0x12B08CC8
+	.long	0x13764CC8
+	.long	0x12855CC8
+	.long	0x134654C8
+
+	vxor	27,27,21
+	b	Ltail_4x
+
+.align	4
+Ltwo:
+	vperm	3,3,3,12
+	vperm	16,16,16,12
+
+	vxor	2,3,0
+	vperm	5,4,16,19
+	vperm	6,4,16,18
+
+	vsldoi	29,4,17,8
+	vor	30, 17, 17
+	vsldoi	31,17,4,8
+
+	.long	0x12855CC8
+	.long	0x13704CC8
+	.long	0x134654C8
+
+	b	Ltail_4x
+
+.align	4
+Lone:
+	vperm	3,3,3,12
+
+	vsldoi	29,4,9,8
+	vor	30, 9, 9
+	vsldoi	31,9,4,8
+
+	vxor	2,3,0
+	vxor	20,20,20
+	vxor	27,27,27
+	vxor	26,26,26
+
+	b	Ltail_4x
+
+Ldone_4x:
+	vperm	0,0,0,12
+	.long	0x7C001F99
+
+	li	10,63
+	li	11,79
+	mtspr	256,12
+	lvx	20,10,1
+	addi	10,10,32
+	lvx	21,11,1
+	addi	11,11,32
+	lvx	22,10,1
+	addi	10,10,32
+	lvx	23,11,1
+	addi	11,11,32
+	lvx	24,10,1
+	addi	10,10,32
+	lvx	25,11,1
+	addi	11,11,32
+	lvx	26,10,1
+	addi	10,10,32
+	lvx	27,11,1
+	addi	11,11,32
+	lvx	28,10,1
+	addi	10,10,32
+	lvx	29,11,1
+	addi	11,11,32
+	lvx	30,10,1
+	lvx	31,11,1
+	addi	1,1,256
+	blr	
+.long	0
+.byte	0,12,0x04,0,0x80,0,4,0
+.long	0
+
+
+.byte	71,72,65,83,72,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
diff --git a/linux-x86/crypto/rc4/rc4-586.S b/linux-x86/crypto/rc4/rc4-586.S
deleted file mode 100644
index d245589..0000000
--- a/linux-x86/crypto/rc4/rc4-586.S
+++ /dev/null
@@ -1,350 +0,0 @@
-#if defined(__i386__)
-.file	"rc4-586.S"
-.text
-.globl	asm_RC4
-.hidden	asm_RC4
-.type	asm_RC4,@function
-.align	16
-asm_RC4:
-.L_asm_RC4_begin:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%esi
-	pushl	%edi
-	movl	20(%esp),%edi
-	movl	24(%esp),%edx
-	movl	28(%esp),%esi
-	movl	32(%esp),%ebp
-	xorl	%eax,%eax
-	xorl	%ebx,%ebx
-	cmpl	$0,%edx
-	je	.L000abort
-	movb	(%edi),%al
-	movb	4(%edi),%bl
-	addl	$8,%edi
-	leal	(%esi,%edx,1),%ecx
-	subl	%esi,%ebp
-	movl	%ecx,24(%esp)
-	incb	%al
-	cmpl	$-1,256(%edi)
-	je	.L001RC4_CHAR
-	movl	(%edi,%eax,4),%ecx
-	andl	$-4,%edx
-	jz	.L002loop1
-	movl	%ebp,32(%esp)
-	testl	$-8,%edx
-	jz	.L003go4loop4
-	call	.L004PIC_me_up
-.L004PIC_me_up:
-	popl	%ebp
-	leal	OPENSSL_ia32cap_P-.L004PIC_me_up(%ebp),%ebp
-	btl	$26,(%ebp)
-	jnc	.L003go4loop4
-	movl	32(%esp),%ebp
-	andl	$-8,%edx
-	leal	-8(%esi,%edx,1),%edx
-	movl	%edx,-4(%edi)
-	addb	%cl,%bl
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	movq	(%esi),%mm0
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm2
-	jmp	.L005loop_mmx_enter
-.align	16
-.L006loop_mmx:
-	addb	%cl,%bl
-	psllq	$56,%mm1
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	pxor	%mm1,%mm2
-	movq	(%esi),%mm0
-	movq	%mm2,-8(%ebp,%esi,1)
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm2
-.L005loop_mmx_enter:
-	addb	%cl,%bl
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	pxor	%mm0,%mm2
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm1
-	addb	%cl,%bl
-	psllq	$8,%mm1
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	pxor	%mm1,%mm2
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm1
-	addb	%cl,%bl
-	psllq	$16,%mm1
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	pxor	%mm1,%mm2
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm1
-	addb	%cl,%bl
-	psllq	$24,%mm1
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	pxor	%mm1,%mm2
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm1
-	addb	%cl,%bl
-	psllq	$32,%mm1
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	pxor	%mm1,%mm2
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm1
-	addb	%cl,%bl
-	psllq	$40,%mm1
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	pxor	%mm1,%mm2
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm1
-	addb	%cl,%bl
-	psllq	$48,%mm1
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	pxor	%mm1,%mm2
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm1
-	movl	%ebx,%edx
-	xorl	%ebx,%ebx
-	movb	%dl,%bl
-	cmpl	-4(%edi),%esi
-	leal	8(%esi),%esi
-	jb	.L006loop_mmx
-	psllq	$56,%mm1
-	pxor	%mm1,%mm2
-	movq	%mm2,-8(%ebp,%esi,1)
-	emms
-	cmpl	24(%esp),%esi
-	je	.L007done
-	jmp	.L002loop1
-.align	16
-.L003go4loop4:
-	leal	-4(%esi,%edx,1),%edx
-	movl	%edx,28(%esp)
-.L008loop4:
-	addb	%cl,%bl
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	addl	%ecx,%edx
-	incb	%al
-	andl	$255,%edx
-	movl	(%edi,%eax,4),%ecx
-	movl	(%edi,%edx,4),%ebp
-	addb	%cl,%bl
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	addl	%ecx,%edx
-	incb	%al
-	andl	$255,%edx
-	rorl	$8,%ebp
-	movl	(%edi,%eax,4),%ecx
-	orl	(%edi,%edx,4),%ebp
-	addb	%cl,%bl
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	addl	%ecx,%edx
-	incb	%al
-	andl	$255,%edx
-	rorl	$8,%ebp
-	movl	(%edi,%eax,4),%ecx
-	orl	(%edi,%edx,4),%ebp
-	addb	%cl,%bl
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	addl	%ecx,%edx
-	incb	%al
-	andl	$255,%edx
-	rorl	$8,%ebp
-	movl	32(%esp),%ecx
-	orl	(%edi,%edx,4),%ebp
-	rorl	$8,%ebp
-	xorl	(%esi),%ebp
-	cmpl	28(%esp),%esi
-	movl	%ebp,(%ecx,%esi,1)
-	leal	4(%esi),%esi
-	movl	(%edi,%eax,4),%ecx
-	jb	.L008loop4
-	cmpl	24(%esp),%esi
-	je	.L007done
-	movl	32(%esp),%ebp
-.align	16
-.L002loop1:
-	addb	%cl,%bl
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	addl	%ecx,%edx
-	incb	%al
-	andl	$255,%edx
-	movl	(%edi,%edx,4),%edx
-	xorb	(%esi),%dl
-	leal	1(%esi),%esi
-	movl	(%edi,%eax,4),%ecx
-	cmpl	24(%esp),%esi
-	movb	%dl,-1(%ebp,%esi,1)
-	jb	.L002loop1
-	jmp	.L007done
-.align	16
-.L001RC4_CHAR:
-	movzbl	(%edi,%eax,1),%ecx
-.L009cloop1:
-	addb	%cl,%bl
-	movzbl	(%edi,%ebx,1),%edx
-	movb	%cl,(%edi,%ebx,1)
-	movb	%dl,(%edi,%eax,1)
-	addb	%cl,%dl
-	movzbl	(%edi,%edx,1),%edx
-	addb	$1,%al
-	xorb	(%esi),%dl
-	leal	1(%esi),%esi
-	movzbl	(%edi,%eax,1),%ecx
-	cmpl	24(%esp),%esi
-	movb	%dl,-1(%ebp,%esi,1)
-	jb	.L009cloop1
-.L007done:
-	decb	%al
-	movl	%ebx,-4(%edi)
-	movb	%al,-8(%edi)
-.L000abort:
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-.size	asm_RC4,.-.L_asm_RC4_begin
-.globl	asm_RC4_set_key
-.hidden	asm_RC4_set_key
-.type	asm_RC4_set_key,@function
-.align	16
-asm_RC4_set_key:
-.L_asm_RC4_set_key_begin:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%esi
-	pushl	%edi
-	movl	20(%esp),%edi
-	movl	24(%esp),%ebp
-	movl	28(%esp),%esi
-	call	.L010PIC_me_up
-.L010PIC_me_up:
-	popl	%edx
-	leal	OPENSSL_ia32cap_P-.L010PIC_me_up(%edx),%edx
-	leal	8(%edi),%edi
-	leal	(%esi,%ebp,1),%esi
-	negl	%ebp
-	xorl	%eax,%eax
-	movl	%ebp,-4(%edi)
-	btl	$20,(%edx)
-	jc	.L011c1stloop
-.align	16
-.L012w1stloop:
-	movl	%eax,(%edi,%eax,4)
-	addb	$1,%al
-	jnc	.L012w1stloop
-	xorl	%ecx,%ecx
-	xorl	%edx,%edx
-.align	16
-.L013w2ndloop:
-	movl	(%edi,%ecx,4),%eax
-	addb	(%esi,%ebp,1),%dl
-	addb	%al,%dl
-	addl	$1,%ebp
-	movl	(%edi,%edx,4),%ebx
-	jnz	.L014wnowrap
-	movl	-4(%edi),%ebp
-.L014wnowrap:
-	movl	%eax,(%edi,%edx,4)
-	movl	%ebx,(%edi,%ecx,4)
-	addb	$1,%cl
-	jnc	.L013w2ndloop
-	jmp	.L015exit
-.align	16
-.L011c1stloop:
-	movb	%al,(%edi,%eax,1)
-	addb	$1,%al
-	jnc	.L011c1stloop
-	xorl	%ecx,%ecx
-	xorl	%edx,%edx
-	xorl	%ebx,%ebx
-.align	16
-.L016c2ndloop:
-	movb	(%edi,%ecx,1),%al
-	addb	(%esi,%ebp,1),%dl
-	addb	%al,%dl
-	addl	$1,%ebp
-	movb	(%edi,%edx,1),%bl
-	jnz	.L017cnowrap
-	movl	-4(%edi),%ebp
-.L017cnowrap:
-	movb	%al,(%edi,%edx,1)
-	movb	%bl,(%edi,%ecx,1)
-	addb	$1,%cl
-	jnc	.L016c2ndloop
-	movl	$-1,256(%edi)
-.L015exit:
-	xorl	%eax,%eax
-	movl	%eax,-8(%edi)
-	movl	%eax,-4(%edi)
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-.size	asm_RC4_set_key,.-.L_asm_RC4_set_key_begin
-#endif
diff --git a/linux-x86_64/crypto/ec/p256-x86_64-asm.S b/linux-x86_64/crypto/ec/p256-x86_64-asm.S
index 4abce6f..e059dd6 100644
--- a/linux-x86_64/crypto/ec/p256-x86_64-asm.S
+++ b/linux-x86_64/crypto/ec/p256-x86_64-asm.S
@@ -24,6 +24,7 @@
 	pushq	%r13
 
 	movq	0(%rsi),%r8
+	xorq	%r13,%r13
 	movq	8(%rsi),%r9
 	addq	%r8,%r8
 	movq	16(%rsi),%r10
@@ -34,7 +35,7 @@
 	adcq	%r10,%r10
 	adcq	%r11,%r11
 	movq	%r9,%rdx
-	sbbq	%r13,%r13
+	adcq	$0,%r13
 
 	subq	0(%rsi),%r8
 	movq	%r10,%rcx
@@ -42,14 +43,14 @@
 	sbbq	16(%rsi),%r10
 	movq	%r11,%r12
 	sbbq	24(%rsi),%r11
-	testq	%r13,%r13
+	sbbq	$0,%r13
 
-	cmovzq	%rax,%r8
-	cmovzq	%rdx,%r9
+	cmovcq	%rax,%r8
+	cmovcq	%rdx,%r9
 	movq	%r8,0(%rdi)
-	cmovzq	%rcx,%r10
+	cmovcq	%rcx,%r10
 	movq	%r9,8(%rdi)
-	cmovzq	%r12,%r11
+	cmovcq	%r12,%r11
 	movq	%r10,16(%rdi)
 	movq	%r11,24(%rdi)
 
@@ -625,6 +626,8 @@
 	movq	%r9,%rsi
 	adcq	$0,%rdx
 
+
+
 	subq	$-1,%r8
 	movq	%r10,%rax
 	sbbq	%r12,%r9
@@ -765,13 +768,14 @@
 .type	__ecp_nistz256_add_toq,@function
 .align	32
 __ecp_nistz256_add_toq:
+	xorq	%r11,%r11
 	addq	0(%rbx),%r12
 	adcq	8(%rbx),%r13
 	movq	%r12,%rax
 	adcq	16(%rbx),%r8
 	adcq	24(%rbx),%r9
 	movq	%r13,%rbp
-	sbbq	%r11,%r11
+	adcq	$0,%r11
 
 	subq	$-1,%r12
 	movq	%r8,%rcx
@@ -779,14 +783,14 @@
 	sbbq	$0,%r8
 	movq	%r9,%r10
 	sbbq	%r15,%r9
-	testq	%r11,%r11
+	sbbq	$0,%r11
 
-	cmovzq	%rax,%r12
-	cmovzq	%rbp,%r13
+	cmovcq	%rax,%r12
+	cmovcq	%rbp,%r13
 	movq	%r12,0(%rdi)
-	cmovzq	%rcx,%r8
+	cmovcq	%rcx,%r8
 	movq	%r13,8(%rdi)
-	cmovzq	%r10,%r9
+	cmovcq	%r10,%r9
 	movq	%r8,16(%rdi)
 	movq	%r9,24(%rdi)
 
@@ -854,13 +858,14 @@
 .type	__ecp_nistz256_mul_by_2q,@function
 .align	32
 __ecp_nistz256_mul_by_2q:
+	xorq	%r11,%r11
 	addq	%r12,%r12
 	adcq	%r13,%r13
 	movq	%r12,%rax
 	adcq	%r8,%r8
 	adcq	%r9,%r9
 	movq	%r13,%rbp
-	sbbq	%r11,%r11
+	adcq	$0,%r11
 
 	subq	$-1,%r12
 	movq	%r8,%rcx
@@ -868,14 +873,14 @@
 	sbbq	$0,%r8
 	movq	%r9,%r10
 	sbbq	%r15,%r9
-	testq	%r11,%r11
+	sbbq	$0,%r11
 
-	cmovzq	%rax,%r12
-	cmovzq	%rbp,%r13
+	cmovcq	%rax,%r12
+	cmovcq	%rbp,%r13
 	movq	%r12,0(%rdi)
-	cmovzq	%rcx,%r8
+	cmovcq	%rcx,%r8
 	movq	%r13,8(%rdi)
-	cmovzq	%r10,%r9
+	cmovcq	%r10,%r9
 	movq	%r8,16(%rdi)
 	movq	%r9,24(%rdi)
 
@@ -1107,16 +1112,14 @@
 	movq	%rdx,%rsi
 	movdqa	%xmm0,384(%rsp)
 	movdqa	%xmm1,384+16(%rsp)
-	por	%xmm0,%xmm1
 	movdqa	%xmm2,416(%rsp)
 	movdqa	%xmm3,416+16(%rsp)
-	por	%xmm2,%xmm3
 	movdqa	%xmm4,448(%rsp)
 	movdqa	%xmm5,448+16(%rsp)
-	por	%xmm1,%xmm3
+	por	%xmm4,%xmm5
 
 	movdqu	0(%rsi),%xmm0
-	pshufd	$0xb1,%xmm3,%xmm5
+	pshufd	$0xb1,%xmm5,%xmm3
 	movdqu	16(%rsi),%xmm1
 	movdqu	32(%rsi),%xmm2
 	por	%xmm3,%xmm5
@@ -1128,14 +1131,14 @@
 	movdqa	%xmm0,480(%rsp)
 	pshufd	$0x1e,%xmm5,%xmm4
 	movdqa	%xmm1,480+16(%rsp)
-	por	%xmm0,%xmm1
-.byte	102,72,15,110,199
+	movdqu	64(%rsi),%xmm0
+	movdqu	80(%rsi),%xmm1
 	movdqa	%xmm2,512(%rsp)
 	movdqa	%xmm3,512+16(%rsp)
-	por	%xmm2,%xmm3
 	por	%xmm4,%xmm5
 	pxor	%xmm4,%xmm4
-	por	%xmm1,%xmm3
+	por	%xmm0,%xmm1
+.byte	102,72,15,110,199
 
 	leaq	64-0(%rsi),%rsi
 	movq	%rax,544+0(%rsp)
@@ -1146,8 +1149,8 @@
 	call	__ecp_nistz256_sqr_montq
 
 	pcmpeqd	%xmm4,%xmm5
-	pshufd	$0xb1,%xmm3,%xmm4
-	por	%xmm3,%xmm4
+	pshufd	$0xb1,%xmm1,%xmm4
+	por	%xmm1,%xmm4
 	pshufd	$0,%xmm5,%xmm5
 	pshufd	$0x1e,%xmm4,%xmm3
 	por	%xmm3,%xmm4
@@ -1330,6 +1333,7 @@
 
 
 
+	xorq	%r11,%r11
 	addq	%r12,%r12
 	leaq	96(%rsp),%rsi
 	adcq	%r13,%r13
@@ -1337,7 +1341,7 @@
 	adcq	%r8,%r8
 	adcq	%r9,%r9
 	movq	%r13,%rbp
-	sbbq	%r11,%r11
+	adcq	$0,%r11
 
 	subq	$-1,%r12
 	movq	%r8,%rcx
@@ -1345,15 +1349,15 @@
 	sbbq	$0,%r8
 	movq	%r9,%r10
 	sbbq	%r15,%r9
-	testq	%r11,%r11
+	sbbq	$0,%r11
 
-	cmovzq	%rax,%r12
+	cmovcq	%rax,%r12
 	movq	0(%rsi),%rax
-	cmovzq	%rbp,%r13
+	cmovcq	%rbp,%r13
 	movq	8(%rsi),%rbp
-	cmovzq	%rcx,%r8
+	cmovcq	%rcx,%r8
 	movq	16(%rsi),%rcx
-	cmovzq	%r10,%r9
+	cmovcq	%r10,%r9
 	movq	24(%rsi),%r10
 
 	call	__ecp_nistz256_subq
@@ -1508,16 +1512,14 @@
 	movq	64+24(%rsi),%r8
 	movdqa	%xmm0,320(%rsp)
 	movdqa	%xmm1,320+16(%rsp)
-	por	%xmm0,%xmm1
 	movdqa	%xmm2,352(%rsp)
 	movdqa	%xmm3,352+16(%rsp)
-	por	%xmm2,%xmm3
 	movdqa	%xmm4,384(%rsp)
 	movdqa	%xmm5,384+16(%rsp)
-	por	%xmm1,%xmm3
+	por	%xmm4,%xmm5
 
 	movdqu	0(%rbx),%xmm0
-	pshufd	$0xb1,%xmm3,%xmm5
+	pshufd	$0xb1,%xmm5,%xmm3
 	movdqu	16(%rbx),%xmm1
 	movdqu	32(%rbx),%xmm2
 	por	%xmm3,%xmm5
@@ -1635,6 +1637,7 @@
 
 
 
+	xorq	%r11,%r11
 	addq	%r12,%r12
 	leaq	192(%rsp),%rsi
 	adcq	%r13,%r13
@@ -1642,7 +1645,7 @@
 	adcq	%r8,%r8
 	adcq	%r9,%r9
 	movq	%r13,%rbp
-	sbbq	%r11,%r11
+	adcq	$0,%r11
 
 	subq	$-1,%r12
 	movq	%r8,%rcx
@@ -1650,15 +1653,15 @@
 	sbbq	$0,%r8
 	movq	%r9,%r10
 	sbbq	%r15,%r9
-	testq	%r11,%r11
+	sbbq	$0,%r11
 
-	cmovzq	%rax,%r12
+	cmovcq	%rax,%r12
 	movq	0(%rsi),%rax
-	cmovzq	%rbp,%r13
+	cmovcq	%rbp,%r13
 	movq	8(%rsi),%rbp
-	cmovzq	%rcx,%r8
+	cmovcq	%rcx,%r8
 	movq	16(%rsi),%rcx
-	cmovzq	%r10,%r9
+	cmovcq	%r10,%r9
 	movq	24(%rsi),%r10
 
 	call	__ecp_nistz256_subq
diff --git a/linux-x86_64/crypto/rc4/rc4-x86_64.S b/linux-x86_64/crypto/rc4/rc4-x86_64.S
deleted file mode 100644
index c4d1002..0000000
--- a/linux-x86_64/crypto/rc4/rc4-x86_64.S
+++ /dev/null
@@ -1,596 +0,0 @@
-#if defined(__x86_64__)
-.text	
-.extern	OPENSSL_ia32cap_P
-.hidden OPENSSL_ia32cap_P
-
-.globl	asm_RC4
-.hidden asm_RC4
-.type	asm_RC4,@function
-.align	16
-asm_RC4:
-	orq	%rsi,%rsi
-	jne	.Lentry
-	.byte	0xf3,0xc3
-.Lentry:
-	pushq	%rbx
-	pushq	%r12
-	pushq	%r13
-.Lprologue:
-	movq	%rsi,%r11
-	movq	%rdx,%r12
-	movq	%rcx,%r13
-	xorq	%r10,%r10
-	xorq	%rcx,%rcx
-
-	leaq	8(%rdi),%rdi
-	movb	-8(%rdi),%r10b
-	movb	-4(%rdi),%cl
-	cmpl	$-1,256(%rdi)
-	je	.LRC4_CHAR
-	movl	OPENSSL_ia32cap_P(%rip),%r8d
-	xorq	%rbx,%rbx
-	incb	%r10b
-	subq	%r10,%rbx
-	subq	%r12,%r13
-	movl	(%rdi,%r10,4),%eax
-	testq	$-16,%r11
-	jz	.Lloop1
-	btl	$30,%r8d
-	jc	.Lintel
-	andq	$7,%rbx
-	leaq	1(%r10),%rsi
-	jz	.Loop8
-	subq	%rbx,%r11
-.Loop8_warmup:
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	movl	%edx,(%rdi,%r10,4)
-	addb	%dl,%al
-	incb	%r10b
-	movl	(%rdi,%rax,4),%edx
-	movl	(%rdi,%r10,4),%eax
-	xorb	(%r12),%dl
-	movb	%dl,(%r12,%r13,1)
-	leaq	1(%r12),%r12
-	decq	%rbx
-	jnz	.Loop8_warmup
-
-	leaq	1(%r10),%rsi
-	jmp	.Loop8
-.align	16
-.Loop8:
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	movl	0(%rdi,%rsi,4),%ebx
-	rorq	$8,%r8
-	movl	%edx,0(%rdi,%r10,4)
-	addb	%al,%dl
-	movb	(%rdi,%rdx,4),%r8b
-	addb	%bl,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	movl	4(%rdi,%rsi,4),%eax
-	rorq	$8,%r8
-	movl	%edx,4(%rdi,%r10,4)
-	addb	%bl,%dl
-	movb	(%rdi,%rdx,4),%r8b
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	movl	8(%rdi,%rsi,4),%ebx
-	rorq	$8,%r8
-	movl	%edx,8(%rdi,%r10,4)
-	addb	%al,%dl
-	movb	(%rdi,%rdx,4),%r8b
-	addb	%bl,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	movl	12(%rdi,%rsi,4),%eax
-	rorq	$8,%r8
-	movl	%edx,12(%rdi,%r10,4)
-	addb	%bl,%dl
-	movb	(%rdi,%rdx,4),%r8b
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	movl	16(%rdi,%rsi,4),%ebx
-	rorq	$8,%r8
-	movl	%edx,16(%rdi,%r10,4)
-	addb	%al,%dl
-	movb	(%rdi,%rdx,4),%r8b
-	addb	%bl,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	movl	20(%rdi,%rsi,4),%eax
-	rorq	$8,%r8
-	movl	%edx,20(%rdi,%r10,4)
-	addb	%bl,%dl
-	movb	(%rdi,%rdx,4),%r8b
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	movl	24(%rdi,%rsi,4),%ebx
-	rorq	$8,%r8
-	movl	%edx,24(%rdi,%r10,4)
-	addb	%al,%dl
-	movb	(%rdi,%rdx,4),%r8b
-	addb	$8,%sil
-	addb	%bl,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	movl	-4(%rdi,%rsi,4),%eax
-	rorq	$8,%r8
-	movl	%edx,28(%rdi,%r10,4)
-	addb	%bl,%dl
-	movb	(%rdi,%rdx,4),%r8b
-	addb	$8,%r10b
-	rorq	$8,%r8
-	subq	$8,%r11
-
-	xorq	(%r12),%r8
-	movq	%r8,(%r12,%r13,1)
-	leaq	8(%r12),%r12
-
-	testq	$-8,%r11
-	jnz	.Loop8
-	cmpq	$0,%r11
-	jne	.Lloop1
-	jmp	.Lexit
-
-.align	16
-.Lintel:
-	testq	$-32,%r11
-	jz	.Lloop1
-	andq	$15,%rbx
-	jz	.Loop16_is_hot
-	subq	%rbx,%r11
-.Loop16_warmup:
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	movl	%edx,(%rdi,%r10,4)
-	addb	%dl,%al
-	incb	%r10b
-	movl	(%rdi,%rax,4),%edx
-	movl	(%rdi,%r10,4),%eax
-	xorb	(%r12),%dl
-	movb	%dl,(%r12,%r13,1)
-	leaq	1(%r12),%r12
-	decq	%rbx
-	jnz	.Loop16_warmup
-
-	movq	%rcx,%rbx
-	xorq	%rcx,%rcx
-	movb	%bl,%cl
-
-.Loop16_is_hot:
-	leaq	(%rdi,%r10,4),%rsi
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	pxor	%xmm0,%xmm0
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	4(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,0(%rsi)
-	addb	%bl,%cl
-	pinsrw	$0,(%rdi,%rax,4),%xmm0
-	jmp	.Loop16_enter
-.align	16
-.Loop16:
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	pxor	%xmm0,%xmm2
-	psllq	$8,%xmm1
-	pxor	%xmm0,%xmm0
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	4(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,0(%rsi)
-	pxor	%xmm1,%xmm2
-	addb	%bl,%cl
-	pinsrw	$0,(%rdi,%rax,4),%xmm0
-	movdqu	%xmm2,(%r12,%r13,1)
-	leaq	16(%r12),%r12
-.Loop16_enter:
-	movl	(%rdi,%rcx,4),%edx
-	pxor	%xmm1,%xmm1
-	movl	%ebx,(%rdi,%rcx,4)
-	addb	%dl,%bl
-	movl	8(%rsi),%eax
-	movzbl	%bl,%ebx
-	movl	%edx,4(%rsi)
-	addb	%al,%cl
-	pinsrw	$0,(%rdi,%rbx,4),%xmm1
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	12(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,8(%rsi)
-	addb	%bl,%cl
-	pinsrw	$1,(%rdi,%rax,4),%xmm0
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	addb	%dl,%bl
-	movl	16(%rsi),%eax
-	movzbl	%bl,%ebx
-	movl	%edx,12(%rsi)
-	addb	%al,%cl
-	pinsrw	$1,(%rdi,%rbx,4),%xmm1
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	20(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,16(%rsi)
-	addb	%bl,%cl
-	pinsrw	$2,(%rdi,%rax,4),%xmm0
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	addb	%dl,%bl
-	movl	24(%rsi),%eax
-	movzbl	%bl,%ebx
-	movl	%edx,20(%rsi)
-	addb	%al,%cl
-	pinsrw	$2,(%rdi,%rbx,4),%xmm1
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	28(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,24(%rsi)
-	addb	%bl,%cl
-	pinsrw	$3,(%rdi,%rax,4),%xmm0
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	addb	%dl,%bl
-	movl	32(%rsi),%eax
-	movzbl	%bl,%ebx
-	movl	%edx,28(%rsi)
-	addb	%al,%cl
-	pinsrw	$3,(%rdi,%rbx,4),%xmm1
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	36(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,32(%rsi)
-	addb	%bl,%cl
-	pinsrw	$4,(%rdi,%rax,4),%xmm0
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	addb	%dl,%bl
-	movl	40(%rsi),%eax
-	movzbl	%bl,%ebx
-	movl	%edx,36(%rsi)
-	addb	%al,%cl
-	pinsrw	$4,(%rdi,%rbx,4),%xmm1
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	44(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,40(%rsi)
-	addb	%bl,%cl
-	pinsrw	$5,(%rdi,%rax,4),%xmm0
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	addb	%dl,%bl
-	movl	48(%rsi),%eax
-	movzbl	%bl,%ebx
-	movl	%edx,44(%rsi)
-	addb	%al,%cl
-	pinsrw	$5,(%rdi,%rbx,4),%xmm1
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	52(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,48(%rsi)
-	addb	%bl,%cl
-	pinsrw	$6,(%rdi,%rax,4),%xmm0
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	addb	%dl,%bl
-	movl	56(%rsi),%eax
-	movzbl	%bl,%ebx
-	movl	%edx,52(%rsi)
-	addb	%al,%cl
-	pinsrw	$6,(%rdi,%rbx,4),%xmm1
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	60(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,56(%rsi)
-	addb	%bl,%cl
-	pinsrw	$7,(%rdi,%rax,4),%xmm0
-	addb	$16,%r10b
-	movdqu	(%r12),%xmm2
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	addb	%dl,%bl
-	movzbl	%bl,%ebx
-	movl	%edx,60(%rsi)
-	leaq	(%rdi,%r10,4),%rsi
-	pinsrw	$7,(%rdi,%rbx,4),%xmm1
-	movl	(%rsi),%eax
-	movq	%rcx,%rbx
-	xorq	%rcx,%rcx
-	subq	$16,%r11
-	movb	%bl,%cl
-	testq	$-16,%r11
-	jnz	.Loop16
-
-	psllq	$8,%xmm1
-	pxor	%xmm0,%xmm2
-	pxor	%xmm1,%xmm2
-	movdqu	%xmm2,(%r12,%r13,1)
-	leaq	16(%r12),%r12
-
-	cmpq	$0,%r11
-	jne	.Lloop1
-	jmp	.Lexit
-
-.align	16
-.Lloop1:
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	movl	%edx,(%rdi,%r10,4)
-	addb	%dl,%al
-	incb	%r10b
-	movl	(%rdi,%rax,4),%edx
-	movl	(%rdi,%r10,4),%eax
-	xorb	(%r12),%dl
-	movb	%dl,(%r12,%r13,1)
-	leaq	1(%r12),%r12
-	decq	%r11
-	jnz	.Lloop1
-	jmp	.Lexit
-
-.align	16
-.LRC4_CHAR:
-	addb	$1,%r10b
-	movzbl	(%rdi,%r10,1),%eax
-	testq	$-8,%r11
-	jz	.Lcloop1
-	jmp	.Lcloop8
-.align	16
-.Lcloop8:
-	movl	(%r12),%r8d
-	movl	4(%r12),%r9d
-	addb	%al,%cl
-	leaq	1(%r10),%rsi
-	movzbl	(%rdi,%rcx,1),%edx
-	movzbl	%sil,%esi
-	movzbl	(%rdi,%rsi,1),%ebx
-	movb	%al,(%rdi,%rcx,1)
-	cmpq	%rsi,%rcx
-	movb	%dl,(%rdi,%r10,1)
-	jne	.Lcmov0
-	movq	%rax,%rbx
-.Lcmov0:
-	addb	%al,%dl
-	xorb	(%rdi,%rdx,1),%r8b
-	rorl	$8,%r8d
-	addb	%bl,%cl
-	leaq	1(%rsi),%r10
-	movzbl	(%rdi,%rcx,1),%edx
-	movzbl	%r10b,%r10d
-	movzbl	(%rdi,%r10,1),%eax
-	movb	%bl,(%rdi,%rcx,1)
-	cmpq	%r10,%rcx
-	movb	%dl,(%rdi,%rsi,1)
-	jne	.Lcmov1
-	movq	%rbx,%rax
-.Lcmov1:
-	addb	%bl,%dl
-	xorb	(%rdi,%rdx,1),%r8b
-	rorl	$8,%r8d
-	addb	%al,%cl
-	leaq	1(%r10),%rsi
-	movzbl	(%rdi,%rcx,1),%edx
-	movzbl	%sil,%esi
-	movzbl	(%rdi,%rsi,1),%ebx
-	movb	%al,(%rdi,%rcx,1)
-	cmpq	%rsi,%rcx
-	movb	%dl,(%rdi,%r10,1)
-	jne	.Lcmov2
-	movq	%rax,%rbx
-.Lcmov2:
-	addb	%al,%dl
-	xorb	(%rdi,%rdx,1),%r8b
-	rorl	$8,%r8d
-	addb	%bl,%cl
-	leaq	1(%rsi),%r10
-	movzbl	(%rdi,%rcx,1),%edx
-	movzbl	%r10b,%r10d
-	movzbl	(%rdi,%r10,1),%eax
-	movb	%bl,(%rdi,%rcx,1)
-	cmpq	%r10,%rcx
-	movb	%dl,(%rdi,%rsi,1)
-	jne	.Lcmov3
-	movq	%rbx,%rax
-.Lcmov3:
-	addb	%bl,%dl
-	xorb	(%rdi,%rdx,1),%r8b
-	rorl	$8,%r8d
-	addb	%al,%cl
-	leaq	1(%r10),%rsi
-	movzbl	(%rdi,%rcx,1),%edx
-	movzbl	%sil,%esi
-	movzbl	(%rdi,%rsi,1),%ebx
-	movb	%al,(%rdi,%rcx,1)
-	cmpq	%rsi,%rcx
-	movb	%dl,(%rdi,%r10,1)
-	jne	.Lcmov4
-	movq	%rax,%rbx
-.Lcmov4:
-	addb	%al,%dl
-	xorb	(%rdi,%rdx,1),%r9b
-	rorl	$8,%r9d
-	addb	%bl,%cl
-	leaq	1(%rsi),%r10
-	movzbl	(%rdi,%rcx,1),%edx
-	movzbl	%r10b,%r10d
-	movzbl	(%rdi,%r10,1),%eax
-	movb	%bl,(%rdi,%rcx,1)
-	cmpq	%r10,%rcx
-	movb	%dl,(%rdi,%rsi,1)
-	jne	.Lcmov5
-	movq	%rbx,%rax
-.Lcmov5:
-	addb	%bl,%dl
-	xorb	(%rdi,%rdx,1),%r9b
-	rorl	$8,%r9d
-	addb	%al,%cl
-	leaq	1(%r10),%rsi
-	movzbl	(%rdi,%rcx,1),%edx
-	movzbl	%sil,%esi
-	movzbl	(%rdi,%rsi,1),%ebx
-	movb	%al,(%rdi,%rcx,1)
-	cmpq	%rsi,%rcx
-	movb	%dl,(%rdi,%r10,1)
-	jne	.Lcmov6
-	movq	%rax,%rbx
-.Lcmov6:
-	addb	%al,%dl
-	xorb	(%rdi,%rdx,1),%r9b
-	rorl	$8,%r9d
-	addb	%bl,%cl
-	leaq	1(%rsi),%r10
-	movzbl	(%rdi,%rcx,1),%edx
-	movzbl	%r10b,%r10d
-	movzbl	(%rdi,%r10,1),%eax
-	movb	%bl,(%rdi,%rcx,1)
-	cmpq	%r10,%rcx
-	movb	%dl,(%rdi,%rsi,1)
-	jne	.Lcmov7
-	movq	%rbx,%rax
-.Lcmov7:
-	addb	%bl,%dl
-	xorb	(%rdi,%rdx,1),%r9b
-	rorl	$8,%r9d
-	leaq	-8(%r11),%r11
-	movl	%r8d,(%r13)
-	leaq	8(%r12),%r12
-	movl	%r9d,4(%r13)
-	leaq	8(%r13),%r13
-
-	testq	$-8,%r11
-	jnz	.Lcloop8
-	cmpq	$0,%r11
-	jne	.Lcloop1
-	jmp	.Lexit
-.align	16
-.Lcloop1:
-	addb	%al,%cl
-	movzbl	%cl,%ecx
-	movzbl	(%rdi,%rcx,1),%edx
-	movb	%al,(%rdi,%rcx,1)
-	movb	%dl,(%rdi,%r10,1)
-	addb	%al,%dl
-	addb	$1,%r10b
-	movzbl	%dl,%edx
-	movzbl	%r10b,%r10d
-	movzbl	(%rdi,%rdx,1),%edx
-	movzbl	(%rdi,%r10,1),%eax
-	xorb	(%r12),%dl
-	leaq	1(%r12),%r12
-	movb	%dl,(%r13)
-	leaq	1(%r13),%r13
-	subq	$1,%r11
-	jnz	.Lcloop1
-	jmp	.Lexit
-
-.align	16
-.Lexit:
-	subb	$1,%r10b
-	movl	%r10d,-8(%rdi)
-	movl	%ecx,-4(%rdi)
-
-	movq	(%rsp),%r13
-	movq	8(%rsp),%r12
-	movq	16(%rsp),%rbx
-	addq	$24,%rsp
-.Lepilogue:
-	.byte	0xf3,0xc3
-.size	asm_RC4,.-asm_RC4
-.globl	asm_RC4_set_key
-.hidden asm_RC4_set_key
-.type	asm_RC4_set_key,@function
-.align	16
-asm_RC4_set_key:
-	leaq	8(%rdi),%rdi
-	leaq	(%rdx,%rsi,1),%rdx
-	negq	%rsi
-	movq	%rsi,%rcx
-	xorl	%eax,%eax
-	xorq	%r9,%r9
-	xorq	%r10,%r10
-	xorq	%r11,%r11
-
-	movl	OPENSSL_ia32cap_P(%rip),%r8d
-	btl	$20,%r8d
-	jc	.Lc1stloop
-	jmp	.Lw1stloop
-
-.align	16
-.Lw1stloop:
-	movl	%eax,(%rdi,%rax,4)
-	addb	$1,%al
-	jnc	.Lw1stloop
-
-	xorq	%r9,%r9
-	xorq	%r8,%r8
-.align	16
-.Lw2ndloop:
-	movl	(%rdi,%r9,4),%r10d
-	addb	(%rdx,%rsi,1),%r8b
-	addb	%r10b,%r8b
-	addq	$1,%rsi
-	movl	(%rdi,%r8,4),%r11d
-	cmovzq	%rcx,%rsi
-	movl	%r10d,(%rdi,%r8,4)
-	movl	%r11d,(%rdi,%r9,4)
-	addb	$1,%r9b
-	jnc	.Lw2ndloop
-	jmp	.Lexit_key
-
-.align	16
-.Lc1stloop:
-	movb	%al,(%rdi,%rax,1)
-	addb	$1,%al
-	jnc	.Lc1stloop
-
-	xorq	%r9,%r9
-	xorq	%r8,%r8
-.align	16
-.Lc2ndloop:
-	movb	(%rdi,%r9,1),%r10b
-	addb	(%rdx,%rsi,1),%r8b
-	addb	%r10b,%r8b
-	addq	$1,%rsi
-	movb	(%rdi,%r8,1),%r11b
-	jnz	.Lcnowrap
-	movq	%rcx,%rsi
-.Lcnowrap:
-	movb	%r10b,(%rdi,%r8,1)
-	movb	%r11b,(%rdi,%r9,1)
-	addb	$1,%r9b
-	jnc	.Lc2ndloop
-	movl	$-1,256(%rdi)
-
-.align	16
-.Lexit_key:
-	xorl	%eax,%eax
-	movl	%eax,-8(%rdi)
-	movl	%eax,-4(%rdi)
-	.byte	0xf3,0xc3
-.size	asm_RC4_set_key,.-asm_RC4_set_key
-#endif
diff --git a/mac-x86/crypto/rc4/rc4-586.S b/mac-x86/crypto/rc4/rc4-586.S
deleted file mode 100644
index dcddc58..0000000
--- a/mac-x86/crypto/rc4/rc4-586.S
+++ /dev/null
@@ -1,350 +0,0 @@
-#if defined(__i386__)
-.file	"rc4-586.S"
-.text
-.globl	_asm_RC4
-.private_extern	_asm_RC4
-.align	4
-_asm_RC4:
-L_asm_RC4_begin:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%esi
-	pushl	%edi
-	movl	20(%esp),%edi
-	movl	24(%esp),%edx
-	movl	28(%esp),%esi
-	movl	32(%esp),%ebp
-	xorl	%eax,%eax
-	xorl	%ebx,%ebx
-	cmpl	$0,%edx
-	je	L000abort
-	movb	(%edi),%al
-	movb	4(%edi),%bl
-	addl	$8,%edi
-	leal	(%esi,%edx,1),%ecx
-	subl	%esi,%ebp
-	movl	%ecx,24(%esp)
-	incb	%al
-	cmpl	$-1,256(%edi)
-	je	L001RC4_CHAR
-	movl	(%edi,%eax,4),%ecx
-	andl	$-4,%edx
-	jz	L002loop1
-	movl	%ebp,32(%esp)
-	testl	$-8,%edx
-	jz	L003go4loop4
-	call	L004PIC_me_up
-L004PIC_me_up:
-	popl	%ebp
-	movl	L_OPENSSL_ia32cap_P$non_lazy_ptr-L004PIC_me_up(%ebp),%ebp
-	btl	$26,(%ebp)
-	jnc	L003go4loop4
-	movl	32(%esp),%ebp
-	andl	$-8,%edx
-	leal	-8(%esi,%edx,1),%edx
-	movl	%edx,-4(%edi)
-	addb	%cl,%bl
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	movq	(%esi),%mm0
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm2
-	jmp	L005loop_mmx_enter
-.align	4,0x90
-L006loop_mmx:
-	addb	%cl,%bl
-	psllq	$56,%mm1
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	pxor	%mm1,%mm2
-	movq	(%esi),%mm0
-	movq	%mm2,-8(%ebp,%esi,1)
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm2
-L005loop_mmx_enter:
-	addb	%cl,%bl
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	pxor	%mm0,%mm2
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm1
-	addb	%cl,%bl
-	psllq	$8,%mm1
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	pxor	%mm1,%mm2
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm1
-	addb	%cl,%bl
-	psllq	$16,%mm1
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	pxor	%mm1,%mm2
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm1
-	addb	%cl,%bl
-	psllq	$24,%mm1
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	pxor	%mm1,%mm2
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm1
-	addb	%cl,%bl
-	psllq	$32,%mm1
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	pxor	%mm1,%mm2
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm1
-	addb	%cl,%bl
-	psllq	$40,%mm1
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	pxor	%mm1,%mm2
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm1
-	addb	%cl,%bl
-	psllq	$48,%mm1
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	incl	%eax
-	addl	%ecx,%edx
-	movzbl	%al,%eax
-	movzbl	%dl,%edx
-	pxor	%mm1,%mm2
-	movl	(%edi,%eax,4),%ecx
-	movd	(%edi,%edx,4),%mm1
-	movl	%ebx,%edx
-	xorl	%ebx,%ebx
-	movb	%dl,%bl
-	cmpl	-4(%edi),%esi
-	leal	8(%esi),%esi
-	jb	L006loop_mmx
-	psllq	$56,%mm1
-	pxor	%mm1,%mm2
-	movq	%mm2,-8(%ebp,%esi,1)
-	emms
-	cmpl	24(%esp),%esi
-	je	L007done
-	jmp	L002loop1
-.align	4,0x90
-L003go4loop4:
-	leal	-4(%esi,%edx,1),%edx
-	movl	%edx,28(%esp)
-L008loop4:
-	addb	%cl,%bl
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	addl	%ecx,%edx
-	incb	%al
-	andl	$255,%edx
-	movl	(%edi,%eax,4),%ecx
-	movl	(%edi,%edx,4),%ebp
-	addb	%cl,%bl
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	addl	%ecx,%edx
-	incb	%al
-	andl	$255,%edx
-	rorl	$8,%ebp
-	movl	(%edi,%eax,4),%ecx
-	orl	(%edi,%edx,4),%ebp
-	addb	%cl,%bl
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	addl	%ecx,%edx
-	incb	%al
-	andl	$255,%edx
-	rorl	$8,%ebp
-	movl	(%edi,%eax,4),%ecx
-	orl	(%edi,%edx,4),%ebp
-	addb	%cl,%bl
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	addl	%ecx,%edx
-	incb	%al
-	andl	$255,%edx
-	rorl	$8,%ebp
-	movl	32(%esp),%ecx
-	orl	(%edi,%edx,4),%ebp
-	rorl	$8,%ebp
-	xorl	(%esi),%ebp
-	cmpl	28(%esp),%esi
-	movl	%ebp,(%ecx,%esi,1)
-	leal	4(%esi),%esi
-	movl	(%edi,%eax,4),%ecx
-	jb	L008loop4
-	cmpl	24(%esp),%esi
-	je	L007done
-	movl	32(%esp),%ebp
-.align	4,0x90
-L002loop1:
-	addb	%cl,%bl
-	movl	(%edi,%ebx,4),%edx
-	movl	%ecx,(%edi,%ebx,4)
-	movl	%edx,(%edi,%eax,4)
-	addl	%ecx,%edx
-	incb	%al
-	andl	$255,%edx
-	movl	(%edi,%edx,4),%edx
-	xorb	(%esi),%dl
-	leal	1(%esi),%esi
-	movl	(%edi,%eax,4),%ecx
-	cmpl	24(%esp),%esi
-	movb	%dl,-1(%ebp,%esi,1)
-	jb	L002loop1
-	jmp	L007done
-.align	4,0x90
-L001RC4_CHAR:
-	movzbl	(%edi,%eax,1),%ecx
-L009cloop1:
-	addb	%cl,%bl
-	movzbl	(%edi,%ebx,1),%edx
-	movb	%cl,(%edi,%ebx,1)
-	movb	%dl,(%edi,%eax,1)
-	addb	%cl,%dl
-	movzbl	(%edi,%edx,1),%edx
-	addb	$1,%al
-	xorb	(%esi),%dl
-	leal	1(%esi),%esi
-	movzbl	(%edi,%eax,1),%ecx
-	cmpl	24(%esp),%esi
-	movb	%dl,-1(%ebp,%esi,1)
-	jb	L009cloop1
-L007done:
-	decb	%al
-	movl	%ebx,-4(%edi)
-	movb	%al,-8(%edi)
-L000abort:
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-.globl	_asm_RC4_set_key
-.private_extern	_asm_RC4_set_key
-.align	4
-_asm_RC4_set_key:
-L_asm_RC4_set_key_begin:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%esi
-	pushl	%edi
-	movl	20(%esp),%edi
-	movl	24(%esp),%ebp
-	movl	28(%esp),%esi
-	call	L010PIC_me_up
-L010PIC_me_up:
-	popl	%edx
-	movl	L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%edx),%edx
-	leal	8(%edi),%edi
-	leal	(%esi,%ebp,1),%esi
-	negl	%ebp
-	xorl	%eax,%eax
-	movl	%ebp,-4(%edi)
-	btl	$20,(%edx)
-	jc	L011c1stloop
-.align	4,0x90
-L012w1stloop:
-	movl	%eax,(%edi,%eax,4)
-	addb	$1,%al
-	jnc	L012w1stloop
-	xorl	%ecx,%ecx
-	xorl	%edx,%edx
-.align	4,0x90
-L013w2ndloop:
-	movl	(%edi,%ecx,4),%eax
-	addb	(%esi,%ebp,1),%dl
-	addb	%al,%dl
-	addl	$1,%ebp
-	movl	(%edi,%edx,4),%ebx
-	jnz	L014wnowrap
-	movl	-4(%edi),%ebp
-L014wnowrap:
-	movl	%eax,(%edi,%edx,4)
-	movl	%ebx,(%edi,%ecx,4)
-	addb	$1,%cl
-	jnc	L013w2ndloop
-	jmp	L015exit
-.align	4,0x90
-L011c1stloop:
-	movb	%al,(%edi,%eax,1)
-	addb	$1,%al
-	jnc	L011c1stloop
-	xorl	%ecx,%ecx
-	xorl	%edx,%edx
-	xorl	%ebx,%ebx
-.align	4,0x90
-L016c2ndloop:
-	movb	(%edi,%ecx,1),%al
-	addb	(%esi,%ebp,1),%dl
-	addb	%al,%dl
-	addl	$1,%ebp
-	movb	(%edi,%edx,1),%bl
-	jnz	L017cnowrap
-	movl	-4(%edi),%ebp
-L017cnowrap:
-	movb	%al,(%edi,%edx,1)
-	movb	%bl,(%edi,%ecx,1)
-	addb	$1,%cl
-	jnc	L016c2ndloop
-	movl	$-1,256(%edi)
-L015exit:
-	xorl	%eax,%eax
-	movl	%eax,-8(%edi)
-	movl	%eax,-4(%edi)
-	popl	%edi
-	popl	%esi
-	popl	%ebx
-	popl	%ebp
-	ret
-.section __IMPORT,__pointers,non_lazy_symbol_pointers
-L_OPENSSL_ia32cap_P$non_lazy_ptr:
-.indirect_symbol	_OPENSSL_ia32cap_P
-.long	0
-#endif
diff --git a/mac-x86_64/crypto/ec/p256-x86_64-asm.S b/mac-x86_64/crypto/ec/p256-x86_64-asm.S
index 1cd0cc3..97fb75a 100644
--- a/mac-x86_64/crypto/ec/p256-x86_64-asm.S
+++ b/mac-x86_64/crypto/ec/p256-x86_64-asm.S
@@ -23,6 +23,7 @@
 	pushq	%r13
 
 	movq	0(%rsi),%r8
+	xorq	%r13,%r13
 	movq	8(%rsi),%r9
 	addq	%r8,%r8
 	movq	16(%rsi),%r10
@@ -33,7 +34,7 @@
 	adcq	%r10,%r10
 	adcq	%r11,%r11
 	movq	%r9,%rdx
-	sbbq	%r13,%r13
+	adcq	$0,%r13
 
 	subq	0(%rsi),%r8
 	movq	%r10,%rcx
@@ -41,14 +42,14 @@
 	sbbq	16(%rsi),%r10
 	movq	%r11,%r12
 	sbbq	24(%rsi),%r11
-	testq	%r13,%r13
+	sbbq	$0,%r13
 
-	cmovzq	%rax,%r8
-	cmovzq	%rdx,%r9
+	cmovcq	%rax,%r8
+	cmovcq	%rdx,%r9
 	movq	%r8,0(%rdi)
-	cmovzq	%rcx,%r10
+	cmovcq	%rcx,%r10
 	movq	%r9,8(%rdi)
-	cmovzq	%r12,%r11
+	cmovcq	%r12,%r11
 	movq	%r10,16(%rdi)
 	movq	%r11,24(%rdi)
 
@@ -624,6 +625,8 @@
 	movq	%r9,%rsi
 	adcq	$0,%rdx
 
+
+
 	subq	$-1,%r8
 	movq	%r10,%rax
 	sbbq	%r12,%r9
@@ -764,13 +767,14 @@
 
 .p2align	5
 __ecp_nistz256_add_toq:
+	xorq	%r11,%r11
 	addq	0(%rbx),%r12
 	adcq	8(%rbx),%r13
 	movq	%r12,%rax
 	adcq	16(%rbx),%r8
 	adcq	24(%rbx),%r9
 	movq	%r13,%rbp
-	sbbq	%r11,%r11
+	adcq	$0,%r11
 
 	subq	$-1,%r12
 	movq	%r8,%rcx
@@ -778,14 +782,14 @@
 	sbbq	$0,%r8
 	movq	%r9,%r10
 	sbbq	%r15,%r9
-	testq	%r11,%r11
+	sbbq	$0,%r11
 
-	cmovzq	%rax,%r12
-	cmovzq	%rbp,%r13
+	cmovcq	%rax,%r12
+	cmovcq	%rbp,%r13
 	movq	%r12,0(%rdi)
-	cmovzq	%rcx,%r8
+	cmovcq	%rcx,%r8
 	movq	%r13,8(%rdi)
-	cmovzq	%r10,%r9
+	cmovcq	%r10,%r9
 	movq	%r8,16(%rdi)
 	movq	%r9,24(%rdi)
 
@@ -853,13 +857,14 @@
 
 .p2align	5
 __ecp_nistz256_mul_by_2q:
+	xorq	%r11,%r11
 	addq	%r12,%r12
 	adcq	%r13,%r13
 	movq	%r12,%rax
 	adcq	%r8,%r8
 	adcq	%r9,%r9
 	movq	%r13,%rbp
-	sbbq	%r11,%r11
+	adcq	$0,%r11
 
 	subq	$-1,%r12
 	movq	%r8,%rcx
@@ -867,14 +872,14 @@
 	sbbq	$0,%r8
 	movq	%r9,%r10
 	sbbq	%r15,%r9
-	testq	%r11,%r11
+	sbbq	$0,%r11
 
-	cmovzq	%rax,%r12
-	cmovzq	%rbp,%r13
+	cmovcq	%rax,%r12
+	cmovcq	%rbp,%r13
 	movq	%r12,0(%rdi)
-	cmovzq	%rcx,%r8
+	cmovcq	%rcx,%r8
 	movq	%r13,8(%rdi)
-	cmovzq	%r10,%r9
+	cmovcq	%r10,%r9
 	movq	%r8,16(%rdi)
 	movq	%r9,24(%rdi)
 
@@ -1106,16 +1111,14 @@
 	movq	%rdx,%rsi
 	movdqa	%xmm0,384(%rsp)
 	movdqa	%xmm1,384+16(%rsp)
-	por	%xmm0,%xmm1
 	movdqa	%xmm2,416(%rsp)
 	movdqa	%xmm3,416+16(%rsp)
-	por	%xmm2,%xmm3
 	movdqa	%xmm4,448(%rsp)
 	movdqa	%xmm5,448+16(%rsp)
-	por	%xmm1,%xmm3
+	por	%xmm4,%xmm5
 
 	movdqu	0(%rsi),%xmm0
-	pshufd	$0xb1,%xmm3,%xmm5
+	pshufd	$0xb1,%xmm5,%xmm3
 	movdqu	16(%rsi),%xmm1
 	movdqu	32(%rsi),%xmm2
 	por	%xmm3,%xmm5
@@ -1127,14 +1130,14 @@
 	movdqa	%xmm0,480(%rsp)
 	pshufd	$0x1e,%xmm5,%xmm4
 	movdqa	%xmm1,480+16(%rsp)
-	por	%xmm0,%xmm1
-.byte	102,72,15,110,199
+	movdqu	64(%rsi),%xmm0
+	movdqu	80(%rsi),%xmm1
 	movdqa	%xmm2,512(%rsp)
 	movdqa	%xmm3,512+16(%rsp)
-	por	%xmm2,%xmm3
 	por	%xmm4,%xmm5
 	pxor	%xmm4,%xmm4
-	por	%xmm1,%xmm3
+	por	%xmm0,%xmm1
+.byte	102,72,15,110,199
 
 	leaq	64-0(%rsi),%rsi
 	movq	%rax,544+0(%rsp)
@@ -1145,8 +1148,8 @@
 	call	__ecp_nistz256_sqr_montq
 
 	pcmpeqd	%xmm4,%xmm5
-	pshufd	$0xb1,%xmm3,%xmm4
-	por	%xmm3,%xmm4
+	pshufd	$0xb1,%xmm1,%xmm4
+	por	%xmm1,%xmm4
 	pshufd	$0,%xmm5,%xmm5
 	pshufd	$0x1e,%xmm4,%xmm3
 	por	%xmm3,%xmm4
@@ -1329,6 +1332,7 @@
 
 
 
+	xorq	%r11,%r11
 	addq	%r12,%r12
 	leaq	96(%rsp),%rsi
 	adcq	%r13,%r13
@@ -1336,7 +1340,7 @@
 	adcq	%r8,%r8
 	adcq	%r9,%r9
 	movq	%r13,%rbp
-	sbbq	%r11,%r11
+	adcq	$0,%r11
 
 	subq	$-1,%r12
 	movq	%r8,%rcx
@@ -1344,15 +1348,15 @@
 	sbbq	$0,%r8
 	movq	%r9,%r10
 	sbbq	%r15,%r9
-	testq	%r11,%r11
+	sbbq	$0,%r11
 
-	cmovzq	%rax,%r12
+	cmovcq	%rax,%r12
 	movq	0(%rsi),%rax
-	cmovzq	%rbp,%r13
+	cmovcq	%rbp,%r13
 	movq	8(%rsi),%rbp
-	cmovzq	%rcx,%r8
+	cmovcq	%rcx,%r8
 	movq	16(%rsi),%rcx
-	cmovzq	%r10,%r9
+	cmovcq	%r10,%r9
 	movq	24(%rsi),%r10
 
 	call	__ecp_nistz256_subq
@@ -1507,16 +1511,14 @@
 	movq	64+24(%rsi),%r8
 	movdqa	%xmm0,320(%rsp)
 	movdqa	%xmm1,320+16(%rsp)
-	por	%xmm0,%xmm1
 	movdqa	%xmm2,352(%rsp)
 	movdqa	%xmm3,352+16(%rsp)
-	por	%xmm2,%xmm3
 	movdqa	%xmm4,384(%rsp)
 	movdqa	%xmm5,384+16(%rsp)
-	por	%xmm1,%xmm3
+	por	%xmm4,%xmm5
 
 	movdqu	0(%rbx),%xmm0
-	pshufd	$0xb1,%xmm3,%xmm5
+	pshufd	$0xb1,%xmm5,%xmm3
 	movdqu	16(%rbx),%xmm1
 	movdqu	32(%rbx),%xmm2
 	por	%xmm3,%xmm5
@@ -1634,6 +1636,7 @@
 
 
 
+	xorq	%r11,%r11
 	addq	%r12,%r12
 	leaq	192(%rsp),%rsi
 	adcq	%r13,%r13
@@ -1641,7 +1644,7 @@
 	adcq	%r8,%r8
 	adcq	%r9,%r9
 	movq	%r13,%rbp
-	sbbq	%r11,%r11
+	adcq	$0,%r11
 
 	subq	$-1,%r12
 	movq	%r8,%rcx
@@ -1649,15 +1652,15 @@
 	sbbq	$0,%r8
 	movq	%r9,%r10
 	sbbq	%r15,%r9
-	testq	%r11,%r11
+	sbbq	$0,%r11
 
-	cmovzq	%rax,%r12
+	cmovcq	%rax,%r12
 	movq	0(%rsi),%rax
-	cmovzq	%rbp,%r13
+	cmovcq	%rbp,%r13
 	movq	8(%rsi),%rbp
-	cmovzq	%rcx,%r8
+	cmovcq	%rcx,%r8
 	movq	16(%rsi),%rcx
-	cmovzq	%r10,%r9
+	cmovcq	%r10,%r9
 	movq	24(%rsi),%r10
 
 	call	__ecp_nistz256_subq
diff --git a/mac-x86_64/crypto/rc4/rc4-x86_64.S b/mac-x86_64/crypto/rc4/rc4-x86_64.S
deleted file mode 100644
index 7808184..0000000
--- a/mac-x86_64/crypto/rc4/rc4-x86_64.S
+++ /dev/null
@@ -1,595 +0,0 @@
-#if defined(__x86_64__)
-.text	
-
-
-.globl	_asm_RC4
-.private_extern _asm_RC4
-
-.p2align	4
-_asm_RC4:
-	orq	%rsi,%rsi
-	jne	L$entry
-	.byte	0xf3,0xc3
-L$entry:
-	pushq	%rbx
-	pushq	%r12
-	pushq	%r13
-L$prologue:
-	movq	%rsi,%r11
-	movq	%rdx,%r12
-	movq	%rcx,%r13
-	xorq	%r10,%r10
-	xorq	%rcx,%rcx
-
-	leaq	8(%rdi),%rdi
-	movb	-8(%rdi),%r10b
-	movb	-4(%rdi),%cl
-	cmpl	$-1,256(%rdi)
-	je	L$RC4_CHAR
-	movl	_OPENSSL_ia32cap_P(%rip),%r8d
-	xorq	%rbx,%rbx
-	incb	%r10b
-	subq	%r10,%rbx
-	subq	%r12,%r13
-	movl	(%rdi,%r10,4),%eax
-	testq	$-16,%r11
-	jz	L$loop1
-	btl	$30,%r8d
-	jc	L$intel
-	andq	$7,%rbx
-	leaq	1(%r10),%rsi
-	jz	L$oop8
-	subq	%rbx,%r11
-L$oop8_warmup:
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	movl	%edx,(%rdi,%r10,4)
-	addb	%dl,%al
-	incb	%r10b
-	movl	(%rdi,%rax,4),%edx
-	movl	(%rdi,%r10,4),%eax
-	xorb	(%r12),%dl
-	movb	%dl,(%r12,%r13,1)
-	leaq	1(%r12),%r12
-	decq	%rbx
-	jnz	L$oop8_warmup
-
-	leaq	1(%r10),%rsi
-	jmp	L$oop8
-.p2align	4
-L$oop8:
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	movl	0(%rdi,%rsi,4),%ebx
-	rorq	$8,%r8
-	movl	%edx,0(%rdi,%r10,4)
-	addb	%al,%dl
-	movb	(%rdi,%rdx,4),%r8b
-	addb	%bl,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	movl	4(%rdi,%rsi,4),%eax
-	rorq	$8,%r8
-	movl	%edx,4(%rdi,%r10,4)
-	addb	%bl,%dl
-	movb	(%rdi,%rdx,4),%r8b
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	movl	8(%rdi,%rsi,4),%ebx
-	rorq	$8,%r8
-	movl	%edx,8(%rdi,%r10,4)
-	addb	%al,%dl
-	movb	(%rdi,%rdx,4),%r8b
-	addb	%bl,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	movl	12(%rdi,%rsi,4),%eax
-	rorq	$8,%r8
-	movl	%edx,12(%rdi,%r10,4)
-	addb	%bl,%dl
-	movb	(%rdi,%rdx,4),%r8b
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	movl	16(%rdi,%rsi,4),%ebx
-	rorq	$8,%r8
-	movl	%edx,16(%rdi,%r10,4)
-	addb	%al,%dl
-	movb	(%rdi,%rdx,4),%r8b
-	addb	%bl,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	movl	20(%rdi,%rsi,4),%eax
-	rorq	$8,%r8
-	movl	%edx,20(%rdi,%r10,4)
-	addb	%bl,%dl
-	movb	(%rdi,%rdx,4),%r8b
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	movl	24(%rdi,%rsi,4),%ebx
-	rorq	$8,%r8
-	movl	%edx,24(%rdi,%r10,4)
-	addb	%al,%dl
-	movb	(%rdi,%rdx,4),%r8b
-	addb	$8,%sil
-	addb	%bl,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	movl	-4(%rdi,%rsi,4),%eax
-	rorq	$8,%r8
-	movl	%edx,28(%rdi,%r10,4)
-	addb	%bl,%dl
-	movb	(%rdi,%rdx,4),%r8b
-	addb	$8,%r10b
-	rorq	$8,%r8
-	subq	$8,%r11
-
-	xorq	(%r12),%r8
-	movq	%r8,(%r12,%r13,1)
-	leaq	8(%r12),%r12
-
-	testq	$-8,%r11
-	jnz	L$oop8
-	cmpq	$0,%r11
-	jne	L$loop1
-	jmp	L$exit
-
-.p2align	4
-L$intel:
-	testq	$-32,%r11
-	jz	L$loop1
-	andq	$15,%rbx
-	jz	L$oop16_is_hot
-	subq	%rbx,%r11
-L$oop16_warmup:
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	movl	%edx,(%rdi,%r10,4)
-	addb	%dl,%al
-	incb	%r10b
-	movl	(%rdi,%rax,4),%edx
-	movl	(%rdi,%r10,4),%eax
-	xorb	(%r12),%dl
-	movb	%dl,(%r12,%r13,1)
-	leaq	1(%r12),%r12
-	decq	%rbx
-	jnz	L$oop16_warmup
-
-	movq	%rcx,%rbx
-	xorq	%rcx,%rcx
-	movb	%bl,%cl
-
-L$oop16_is_hot:
-	leaq	(%rdi,%r10,4),%rsi
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	pxor	%xmm0,%xmm0
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	4(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,0(%rsi)
-	addb	%bl,%cl
-	pinsrw	$0,(%rdi,%rax,4),%xmm0
-	jmp	L$oop16_enter
-.p2align	4
-L$oop16:
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	pxor	%xmm0,%xmm2
-	psllq	$8,%xmm1
-	pxor	%xmm0,%xmm0
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	4(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,0(%rsi)
-	pxor	%xmm1,%xmm2
-	addb	%bl,%cl
-	pinsrw	$0,(%rdi,%rax,4),%xmm0
-	movdqu	%xmm2,(%r12,%r13,1)
-	leaq	16(%r12),%r12
-L$oop16_enter:
-	movl	(%rdi,%rcx,4),%edx
-	pxor	%xmm1,%xmm1
-	movl	%ebx,(%rdi,%rcx,4)
-	addb	%dl,%bl
-	movl	8(%rsi),%eax
-	movzbl	%bl,%ebx
-	movl	%edx,4(%rsi)
-	addb	%al,%cl
-	pinsrw	$0,(%rdi,%rbx,4),%xmm1
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	12(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,8(%rsi)
-	addb	%bl,%cl
-	pinsrw	$1,(%rdi,%rax,4),%xmm0
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	addb	%dl,%bl
-	movl	16(%rsi),%eax
-	movzbl	%bl,%ebx
-	movl	%edx,12(%rsi)
-	addb	%al,%cl
-	pinsrw	$1,(%rdi,%rbx,4),%xmm1
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	20(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,16(%rsi)
-	addb	%bl,%cl
-	pinsrw	$2,(%rdi,%rax,4),%xmm0
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	addb	%dl,%bl
-	movl	24(%rsi),%eax
-	movzbl	%bl,%ebx
-	movl	%edx,20(%rsi)
-	addb	%al,%cl
-	pinsrw	$2,(%rdi,%rbx,4),%xmm1
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	28(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,24(%rsi)
-	addb	%bl,%cl
-	pinsrw	$3,(%rdi,%rax,4),%xmm0
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	addb	%dl,%bl
-	movl	32(%rsi),%eax
-	movzbl	%bl,%ebx
-	movl	%edx,28(%rsi)
-	addb	%al,%cl
-	pinsrw	$3,(%rdi,%rbx,4),%xmm1
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	36(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,32(%rsi)
-	addb	%bl,%cl
-	pinsrw	$4,(%rdi,%rax,4),%xmm0
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	addb	%dl,%bl
-	movl	40(%rsi),%eax
-	movzbl	%bl,%ebx
-	movl	%edx,36(%rsi)
-	addb	%al,%cl
-	pinsrw	$4,(%rdi,%rbx,4),%xmm1
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	44(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,40(%rsi)
-	addb	%bl,%cl
-	pinsrw	$5,(%rdi,%rax,4),%xmm0
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	addb	%dl,%bl
-	movl	48(%rsi),%eax
-	movzbl	%bl,%ebx
-	movl	%edx,44(%rsi)
-	addb	%al,%cl
-	pinsrw	$5,(%rdi,%rbx,4),%xmm1
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	52(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,48(%rsi)
-	addb	%bl,%cl
-	pinsrw	$6,(%rdi,%rax,4),%xmm0
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	addb	%dl,%bl
-	movl	56(%rsi),%eax
-	movzbl	%bl,%ebx
-	movl	%edx,52(%rsi)
-	addb	%al,%cl
-	pinsrw	$6,(%rdi,%rbx,4),%xmm1
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	addb	%dl,%al
-	movl	60(%rsi),%ebx
-	movzbl	%al,%eax
-	movl	%edx,56(%rsi)
-	addb	%bl,%cl
-	pinsrw	$7,(%rdi,%rax,4),%xmm0
-	addb	$16,%r10b
-	movdqu	(%r12),%xmm2
-	movl	(%rdi,%rcx,4),%edx
-	movl	%ebx,(%rdi,%rcx,4)
-	addb	%dl,%bl
-	movzbl	%bl,%ebx
-	movl	%edx,60(%rsi)
-	leaq	(%rdi,%r10,4),%rsi
-	pinsrw	$7,(%rdi,%rbx,4),%xmm1
-	movl	(%rsi),%eax
-	movq	%rcx,%rbx
-	xorq	%rcx,%rcx
-	subq	$16,%r11
-	movb	%bl,%cl
-	testq	$-16,%r11
-	jnz	L$oop16
-
-	psllq	$8,%xmm1
-	pxor	%xmm0,%xmm2
-	pxor	%xmm1,%xmm2
-	movdqu	%xmm2,(%r12,%r13,1)
-	leaq	16(%r12),%r12
-
-	cmpq	$0,%r11
-	jne	L$loop1
-	jmp	L$exit
-
-.p2align	4
-L$loop1:
-	addb	%al,%cl
-	movl	(%rdi,%rcx,4),%edx
-	movl	%eax,(%rdi,%rcx,4)
-	movl	%edx,(%rdi,%r10,4)
-	addb	%dl,%al
-	incb	%r10b
-	movl	(%rdi,%rax,4),%edx
-	movl	(%rdi,%r10,4),%eax
-	xorb	(%r12),%dl
-	movb	%dl,(%r12,%r13,1)
-	leaq	1(%r12),%r12
-	decq	%r11
-	jnz	L$loop1
-	jmp	L$exit
-
-.p2align	4
-L$RC4_CHAR:
-	addb	$1,%r10b
-	movzbl	(%rdi,%r10,1),%eax
-	testq	$-8,%r11
-	jz	L$cloop1
-	jmp	L$cloop8
-.p2align	4
-L$cloop8:
-	movl	(%r12),%r8d
-	movl	4(%r12),%r9d
-	addb	%al,%cl
-	leaq	1(%r10),%rsi
-	movzbl	(%rdi,%rcx,1),%edx
-	movzbl	%sil,%esi
-	movzbl	(%rdi,%rsi,1),%ebx
-	movb	%al,(%rdi,%rcx,1)
-	cmpq	%rsi,%rcx
-	movb	%dl,(%rdi,%r10,1)
-	jne	L$cmov0
-	movq	%rax,%rbx
-L$cmov0:
-	addb	%al,%dl
-	xorb	(%rdi,%rdx,1),%r8b
-	rorl	$8,%r8d
-	addb	%bl,%cl
-	leaq	1(%rsi),%r10
-	movzbl	(%rdi,%rcx,1),%edx
-	movzbl	%r10b,%r10d
-	movzbl	(%rdi,%r10,1),%eax
-	movb	%bl,(%rdi,%rcx,1)
-	cmpq	%r10,%rcx
-	movb	%dl,(%rdi,%rsi,1)
-	jne	L$cmov1
-	movq	%rbx,%rax
-L$cmov1:
-	addb	%bl,%dl
-	xorb	(%rdi,%rdx,1),%r8b
-	rorl	$8,%r8d
-	addb	%al,%cl
-	leaq	1(%r10),%rsi
-	movzbl	(%rdi,%rcx,1),%edx
-	movzbl	%sil,%esi
-	movzbl	(%rdi,%rsi,1),%ebx
-	movb	%al,(%rdi,%rcx,1)
-	cmpq	%rsi,%rcx
-	movb	%dl,(%rdi,%r10,1)
-	jne	L$cmov2
-	movq	%rax,%rbx
-L$cmov2:
-	addb	%al,%dl
-	xorb	(%rdi,%rdx,1),%r8b
-	rorl	$8,%r8d
-	addb	%bl,%cl
-	leaq	1(%rsi),%r10
-	movzbl	(%rdi,%rcx,1),%edx
-	movzbl	%r10b,%r10d
-	movzbl	(%rdi,%r10,1),%eax
-	movb	%bl,(%rdi,%rcx,1)
-	cmpq	%r10,%rcx
-	movb	%dl,(%rdi,%rsi,1)
-	jne	L$cmov3
-	movq	%rbx,%rax
-L$cmov3:
-	addb	%bl,%dl
-	xorb	(%rdi,%rdx,1),%r8b
-	rorl	$8,%r8d
-	addb	%al,%cl
-	leaq	1(%r10),%rsi
-	movzbl	(%rdi,%rcx,1),%edx
-	movzbl	%sil,%esi
-	movzbl	(%rdi,%rsi,1),%ebx
-	movb	%al,(%rdi,%rcx,1)
-	cmpq	%rsi,%rcx
-	movb	%dl,(%rdi,%r10,1)
-	jne	L$cmov4
-	movq	%rax,%rbx
-L$cmov4:
-	addb	%al,%dl
-	xorb	(%rdi,%rdx,1),%r9b
-	rorl	$8,%r9d
-	addb	%bl,%cl
-	leaq	1(%rsi),%r10
-	movzbl	(%rdi,%rcx,1),%edx
-	movzbl	%r10b,%r10d
-	movzbl	(%rdi,%r10,1),%eax
-	movb	%bl,(%rdi,%rcx,1)
-	cmpq	%r10,%rcx
-	movb	%dl,(%rdi,%rsi,1)
-	jne	L$cmov5
-	movq	%rbx,%rax
-L$cmov5:
-	addb	%bl,%dl
-	xorb	(%rdi,%rdx,1),%r9b
-	rorl	$8,%r9d
-	addb	%al,%cl
-	leaq	1(%r10),%rsi
-	movzbl	(%rdi,%rcx,1),%edx
-	movzbl	%sil,%esi
-	movzbl	(%rdi,%rsi,1),%ebx
-	movb	%al,(%rdi,%rcx,1)
-	cmpq	%rsi,%rcx
-	movb	%dl,(%rdi,%r10,1)
-	jne	L$cmov6
-	movq	%rax,%rbx
-L$cmov6:
-	addb	%al,%dl
-	xorb	(%rdi,%rdx,1),%r9b
-	rorl	$8,%r9d
-	addb	%bl,%cl
-	leaq	1(%rsi),%r10
-	movzbl	(%rdi,%rcx,1),%edx
-	movzbl	%r10b,%r10d
-	movzbl	(%rdi,%r10,1),%eax
-	movb	%bl,(%rdi,%rcx,1)
-	cmpq	%r10,%rcx
-	movb	%dl,(%rdi,%rsi,1)
-	jne	L$cmov7
-	movq	%rbx,%rax
-L$cmov7:
-	addb	%bl,%dl
-	xorb	(%rdi,%rdx,1),%r9b
-	rorl	$8,%r9d
-	leaq	-8(%r11),%r11
-	movl	%r8d,(%r13)
-	leaq	8(%r12),%r12
-	movl	%r9d,4(%r13)
-	leaq	8(%r13),%r13
-
-	testq	$-8,%r11
-	jnz	L$cloop8
-	cmpq	$0,%r11
-	jne	L$cloop1
-	jmp	L$exit
-.p2align	4
-L$cloop1:
-	addb	%al,%cl
-	movzbl	%cl,%ecx
-	movzbl	(%rdi,%rcx,1),%edx
-	movb	%al,(%rdi,%rcx,1)
-	movb	%dl,(%rdi,%r10,1)
-	addb	%al,%dl
-	addb	$1,%r10b
-	movzbl	%dl,%edx
-	movzbl	%r10b,%r10d
-	movzbl	(%rdi,%rdx,1),%edx
-	movzbl	(%rdi,%r10,1),%eax
-	xorb	(%r12),%dl
-	leaq	1(%r12),%r12
-	movb	%dl,(%r13)
-	leaq	1(%r13),%r13
-	subq	$1,%r11
-	jnz	L$cloop1
-	jmp	L$exit
-
-.p2align	4
-L$exit:
-	subb	$1,%r10b
-	movl	%r10d,-8(%rdi)
-	movl	%ecx,-4(%rdi)
-
-	movq	(%rsp),%r13
-	movq	8(%rsp),%r12
-	movq	16(%rsp),%rbx
-	addq	$24,%rsp
-L$epilogue:
-	.byte	0xf3,0xc3
-
-.globl	_asm_RC4_set_key
-.private_extern _asm_RC4_set_key
-
-.p2align	4
-_asm_RC4_set_key:
-	leaq	8(%rdi),%rdi
-	leaq	(%rdx,%rsi,1),%rdx
-	negq	%rsi
-	movq	%rsi,%rcx
-	xorl	%eax,%eax
-	xorq	%r9,%r9
-	xorq	%r10,%r10
-	xorq	%r11,%r11
-
-	movl	_OPENSSL_ia32cap_P(%rip),%r8d
-	btl	$20,%r8d
-	jc	L$c1stloop
-	jmp	L$w1stloop
-
-.p2align	4
-L$w1stloop:
-	movl	%eax,(%rdi,%rax,4)
-	addb	$1,%al
-	jnc	L$w1stloop
-
-	xorq	%r9,%r9
-	xorq	%r8,%r8
-.p2align	4
-L$w2ndloop:
-	movl	(%rdi,%r9,4),%r10d
-	addb	(%rdx,%rsi,1),%r8b
-	addb	%r10b,%r8b
-	addq	$1,%rsi
-	movl	(%rdi,%r8,4),%r11d
-	cmovzq	%rcx,%rsi
-	movl	%r10d,(%rdi,%r8,4)
-	movl	%r11d,(%rdi,%r9,4)
-	addb	$1,%r9b
-	jnc	L$w2ndloop
-	jmp	L$exit_key
-
-.p2align	4
-L$c1stloop:
-	movb	%al,(%rdi,%rax,1)
-	addb	$1,%al
-	jnc	L$c1stloop
-
-	xorq	%r9,%r9
-	xorq	%r8,%r8
-.p2align	4
-L$c2ndloop:
-	movb	(%rdi,%r9,1),%r10b
-	addb	(%rdx,%rsi,1),%r8b
-	addb	%r10b,%r8b
-	addq	$1,%rsi
-	movb	(%rdi,%r8,1),%r11b
-	jnz	L$cnowrap
-	movq	%rcx,%rsi
-L$cnowrap:
-	movb	%r10b,(%rdi,%r8,1)
-	movb	%r11b,(%rdi,%r9,1)
-	addb	$1,%r9b
-	jnc	L$c2ndloop
-	movl	$-1,256(%rdi)
-
-.p2align	4
-L$exit_key:
-	xorl	%eax,%eax
-	movl	%eax,-8(%rdi)
-	movl	%eax,-4(%rdi)
-	.byte	0xf3,0xc3
-
-#endif
diff --git a/win-x86/crypto/rc4/rc4-586.asm b/win-x86/crypto/rc4/rc4-586.asm
deleted file mode 100644
index 0bab2be..0000000
--- a/win-x86/crypto/rc4/rc4-586.asm
+++ /dev/null
@@ -1,353 +0,0 @@
-%ifidn __OUTPUT_FORMAT__,obj
-section	code	use32 class=code align=64
-%elifidn __OUTPUT_FORMAT__,win32
-%ifdef __YASM_VERSION_ID__
-%if __YASM_VERSION_ID__ < 01010000h
-%error yasm version 1.1.0 or later needed.
-%endif
-; Yasm automatically includes .00 and complains about redefining it.
-; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
-%else
-$@feat.00 equ 1
-%endif
-section	.text	code align=64
-%else
-section	.text	code
-%endif
-;extern	_OPENSSL_ia32cap_P
-global	_asm_RC4
-align	16
-_asm_RC4:
-L$_asm_RC4_begin:
-	push	ebp
-	push	ebx
-	push	esi
-	push	edi
-	mov	edi,DWORD [20+esp]
-	mov	edx,DWORD [24+esp]
-	mov	esi,DWORD [28+esp]
-	mov	ebp,DWORD [32+esp]
-	xor	eax,eax
-	xor	ebx,ebx
-	cmp	edx,0
-	je	NEAR L$000abort
-	mov	al,BYTE [edi]
-	mov	bl,BYTE [4+edi]
-	add	edi,8
-	lea	ecx,[edx*1+esi]
-	sub	ebp,esi
-	mov	DWORD [24+esp],ecx
-	inc	al
-	cmp	DWORD [256+edi],-1
-	je	NEAR L$001RC4_CHAR
-	mov	ecx,DWORD [eax*4+edi]
-	and	edx,-4
-	jz	NEAR L$002loop1
-	mov	DWORD [32+esp],ebp
-	test	edx,-8
-	jz	NEAR L$003go4loop4
-	lea	ebp,[_OPENSSL_ia32cap_P]
-	bt	DWORD [ebp],26
-	jnc	NEAR L$003go4loop4
-	mov	ebp,DWORD [32+esp]
-	and	edx,-8
-	lea	edx,[edx*1+esi-8]
-	mov	DWORD [edi-4],edx
-	add	bl,cl
-	mov	edx,DWORD [ebx*4+edi]
-	mov	DWORD [ebx*4+edi],ecx
-	mov	DWORD [eax*4+edi],edx
-	inc	eax
-	add	edx,ecx
-	movzx	eax,al
-	movzx	edx,dl
-	movq	mm0,[esi]
-	mov	ecx,DWORD [eax*4+edi]
-	movd	mm2,DWORD [edx*4+edi]
-	jmp	NEAR L$004loop_mmx_enter
-align	16
-L$005loop_mmx:
-	add	bl,cl
-	psllq	mm1,56
-	mov	edx,DWORD [ebx*4+edi]
-	mov	DWORD [ebx*4+edi],ecx
-	mov	DWORD [eax*4+edi],edx
-	inc	eax
-	add	edx,ecx
-	movzx	eax,al
-	movzx	edx,dl
-	pxor	mm2,mm1
-	movq	mm0,[esi]
-	movq	[esi*1+ebp-8],mm2
-	mov	ecx,DWORD [eax*4+edi]
-	movd	mm2,DWORD [edx*4+edi]
-L$004loop_mmx_enter:
-	add	bl,cl
-	mov	edx,DWORD [ebx*4+edi]
-	mov	DWORD [ebx*4+edi],ecx
-	mov	DWORD [eax*4+edi],edx
-	inc	eax
-	add	edx,ecx
-	movzx	eax,al
-	movzx	edx,dl
-	pxor	mm2,mm0
-	mov	ecx,DWORD [eax*4+edi]
-	movd	mm1,DWORD [edx*4+edi]
-	add	bl,cl
-	psllq	mm1,8
-	mov	edx,DWORD [ebx*4+edi]
-	mov	DWORD [ebx*4+edi],ecx
-	mov	DWORD [eax*4+edi],edx
-	inc	eax
-	add	edx,ecx
-	movzx	eax,al
-	movzx	edx,dl
-	pxor	mm2,mm1
-	mov	ecx,DWORD [eax*4+edi]
-	movd	mm1,DWORD [edx*4+edi]
-	add	bl,cl
-	psllq	mm1,16
-	mov	edx,DWORD [ebx*4+edi]
-	mov	DWORD [ebx*4+edi],ecx
-	mov	DWORD [eax*4+edi],edx
-	inc	eax
-	add	edx,ecx
-	movzx	eax,al
-	movzx	edx,dl
-	pxor	mm2,mm1
-	mov	ecx,DWORD [eax*4+edi]
-	movd	mm1,DWORD [edx*4+edi]
-	add	bl,cl
-	psllq	mm1,24
-	mov	edx,DWORD [ebx*4+edi]
-	mov	DWORD [ebx*4+edi],ecx
-	mov	DWORD [eax*4+edi],edx
-	inc	eax
-	add	edx,ecx
-	movzx	eax,al
-	movzx	edx,dl
-	pxor	mm2,mm1
-	mov	ecx,DWORD [eax*4+edi]
-	movd	mm1,DWORD [edx*4+edi]
-	add	bl,cl
-	psllq	mm1,32
-	mov	edx,DWORD [ebx*4+edi]
-	mov	DWORD [ebx*4+edi],ecx
-	mov	DWORD [eax*4+edi],edx
-	inc	eax
-	add	edx,ecx
-	movzx	eax,al
-	movzx	edx,dl
-	pxor	mm2,mm1
-	mov	ecx,DWORD [eax*4+edi]
-	movd	mm1,DWORD [edx*4+edi]
-	add	bl,cl
-	psllq	mm1,40
-	mov	edx,DWORD [ebx*4+edi]
-	mov	DWORD [ebx*4+edi],ecx
-	mov	DWORD [eax*4+edi],edx
-	inc	eax
-	add	edx,ecx
-	movzx	eax,al
-	movzx	edx,dl
-	pxor	mm2,mm1
-	mov	ecx,DWORD [eax*4+edi]
-	movd	mm1,DWORD [edx*4+edi]
-	add	bl,cl
-	psllq	mm1,48
-	mov	edx,DWORD [ebx*4+edi]
-	mov	DWORD [ebx*4+edi],ecx
-	mov	DWORD [eax*4+edi],edx
-	inc	eax
-	add	edx,ecx
-	movzx	eax,al
-	movzx	edx,dl
-	pxor	mm2,mm1
-	mov	ecx,DWORD [eax*4+edi]
-	movd	mm1,DWORD [edx*4+edi]
-	mov	edx,ebx
-	xor	ebx,ebx
-	mov	bl,dl
-	cmp	esi,DWORD [edi-4]
-	lea	esi,[8+esi]
-	jb	NEAR L$005loop_mmx
-	psllq	mm1,56
-	pxor	mm2,mm1
-	movq	[esi*1+ebp-8],mm2
-	emms
-	cmp	esi,DWORD [24+esp]
-	je	NEAR L$006done
-	jmp	NEAR L$002loop1
-align	16
-L$003go4loop4:
-	lea	edx,[edx*1+esi-4]
-	mov	DWORD [28+esp],edx
-L$007loop4:
-	add	bl,cl
-	mov	edx,DWORD [ebx*4+edi]
-	mov	DWORD [ebx*4+edi],ecx
-	mov	DWORD [eax*4+edi],edx
-	add	edx,ecx
-	inc	al
-	and	edx,255
-	mov	ecx,DWORD [eax*4+edi]
-	mov	ebp,DWORD [edx*4+edi]
-	add	bl,cl
-	mov	edx,DWORD [ebx*4+edi]
-	mov	DWORD [ebx*4+edi],ecx
-	mov	DWORD [eax*4+edi],edx
-	add	edx,ecx
-	inc	al
-	and	edx,255
-	ror	ebp,8
-	mov	ecx,DWORD [eax*4+edi]
-	or	ebp,DWORD [edx*4+edi]
-	add	bl,cl
-	mov	edx,DWORD [ebx*4+edi]
-	mov	DWORD [ebx*4+edi],ecx
-	mov	DWORD [eax*4+edi],edx
-	add	edx,ecx
-	inc	al
-	and	edx,255
-	ror	ebp,8
-	mov	ecx,DWORD [eax*4+edi]
-	or	ebp,DWORD [edx*4+edi]
-	add	bl,cl
-	mov	edx,DWORD [ebx*4+edi]
-	mov	DWORD [ebx*4+edi],ecx
-	mov	DWORD [eax*4+edi],edx
-	add	edx,ecx
-	inc	al
-	and	edx,255
-	ror	ebp,8
-	mov	ecx,DWORD [32+esp]
-	or	ebp,DWORD [edx*4+edi]
-	ror	ebp,8
-	xor	ebp,DWORD [esi]
-	cmp	esi,DWORD [28+esp]
-	mov	DWORD [esi*1+ecx],ebp
-	lea	esi,[4+esi]
-	mov	ecx,DWORD [eax*4+edi]
-	jb	NEAR L$007loop4
-	cmp	esi,DWORD [24+esp]
-	je	NEAR L$006done
-	mov	ebp,DWORD [32+esp]
-align	16
-L$002loop1:
-	add	bl,cl
-	mov	edx,DWORD [ebx*4+edi]
-	mov	DWORD [ebx*4+edi],ecx
-	mov	DWORD [eax*4+edi],edx
-	add	edx,ecx
-	inc	al
-	and	edx,255
-	mov	edx,DWORD [edx*4+edi]
-	xor	dl,BYTE [esi]
-	lea	esi,[1+esi]
-	mov	ecx,DWORD [eax*4+edi]
-	cmp	esi,DWORD [24+esp]
-	mov	BYTE [esi*1+ebp-1],dl
-	jb	NEAR L$002loop1
-	jmp	NEAR L$006done
-align	16
-L$001RC4_CHAR:
-	movzx	ecx,BYTE [eax*1+edi]
-L$008cloop1:
-	add	bl,cl
-	movzx	edx,BYTE [ebx*1+edi]
-	mov	BYTE [ebx*1+edi],cl
-	mov	BYTE [eax*1+edi],dl
-	add	dl,cl
-	movzx	edx,BYTE [edx*1+edi]
-	add	al,1
-	xor	dl,BYTE [esi]
-	lea	esi,[1+esi]
-	movzx	ecx,BYTE [eax*1+edi]
-	cmp	esi,DWORD [24+esp]
-	mov	BYTE [esi*1+ebp-1],dl
-	jb	NEAR L$008cloop1
-L$006done:
-	dec	al
-	mov	DWORD [edi-4],ebx
-	mov	BYTE [edi-8],al
-L$000abort:
-	pop	edi
-	pop	esi
-	pop	ebx
-	pop	ebp
-	ret
-global	_asm_RC4_set_key
-align	16
-_asm_RC4_set_key:
-L$_asm_RC4_set_key_begin:
-	push	ebp
-	push	ebx
-	push	esi
-	push	edi
-	mov	edi,DWORD [20+esp]
-	mov	ebp,DWORD [24+esp]
-	mov	esi,DWORD [28+esp]
-	lea	edx,[_OPENSSL_ia32cap_P]
-	lea	edi,[8+edi]
-	lea	esi,[ebp*1+esi]
-	neg	ebp
-	xor	eax,eax
-	mov	DWORD [edi-4],ebp
-	bt	DWORD [edx],20
-	jc	NEAR L$009c1stloop
-align	16
-L$010w1stloop:
-	mov	DWORD [eax*4+edi],eax
-	add	al,1
-	jnc	NEAR L$010w1stloop
-	xor	ecx,ecx
-	xor	edx,edx
-align	16
-L$011w2ndloop:
-	mov	eax,DWORD [ecx*4+edi]
-	add	dl,BYTE [ebp*1+esi]
-	add	dl,al
-	add	ebp,1
-	mov	ebx,DWORD [edx*4+edi]
-	jnz	NEAR L$012wnowrap
-	mov	ebp,DWORD [edi-4]
-L$012wnowrap:
-	mov	DWORD [edx*4+edi],eax
-	mov	DWORD [ecx*4+edi],ebx
-	add	cl,1
-	jnc	NEAR L$011w2ndloop
-	jmp	NEAR L$013exit
-align	16
-L$009c1stloop:
-	mov	BYTE [eax*1+edi],al
-	add	al,1
-	jnc	NEAR L$009c1stloop
-	xor	ecx,ecx
-	xor	edx,edx
-	xor	ebx,ebx
-align	16
-L$014c2ndloop:
-	mov	al,BYTE [ecx*1+edi]
-	add	dl,BYTE [ebp*1+esi]
-	add	dl,al
-	add	ebp,1
-	mov	bl,BYTE [edx*1+edi]
-	jnz	NEAR L$015cnowrap
-	mov	ebp,DWORD [edi-4]
-L$015cnowrap:
-	mov	BYTE [edx*1+edi],al
-	mov	BYTE [ecx*1+edi],bl
-	add	cl,1
-	jnc	NEAR L$014c2ndloop
-	mov	DWORD [256+edi],-1
-L$013exit:
-	xor	eax,eax
-	mov	DWORD [edi-8],eax
-	mov	DWORD [edi-4],eax
-	pop	edi
-	pop	esi
-	pop	ebx
-	pop	ebp
-	ret
-segment	.bss
-common	_OPENSSL_ia32cap_P 16
diff --git a/win-x86_64/crypto/ec/p256-x86_64-asm.asm b/win-x86_64/crypto/ec/p256-x86_64-asm.asm
index a2e4075..cbcf883 100644
--- a/win-x86_64/crypto/ec/p256-x86_64-asm.asm
+++ b/win-x86_64/crypto/ec/p256-x86_64-asm.asm
@@ -35,6 +35,7 @@
 	push	r13
 
 	mov	r8,QWORD[rsi]
+	xor	r13,r13
 	mov	r9,QWORD[8+rsi]
 	add	r8,r8
 	mov	r10,QWORD[16+rsi]
@@ -45,7 +46,7 @@
 	adc	r10,r10
 	adc	r11,r11
 	mov	rdx,r9
-	sbb	r13,r13
+	adc	r13,0
 
 	sub	r8,QWORD[rsi]
 	mov	rcx,r10
@@ -53,14 +54,14 @@
 	sbb	r10,QWORD[16+rsi]
 	mov	r12,r11
 	sbb	r11,QWORD[24+rsi]
-	test	r13,r13
+	sbb	r13,0
 
-	cmovz	r8,rax
-	cmovz	r9,rdx
+	cmovc	r8,rax
+	cmovc	r9,rdx
 	mov	QWORD[rdi],r8
-	cmovz	r10,rcx
+	cmovc	r10,rcx
 	mov	QWORD[8+rdi],r9
-	cmovz	r11,r12
+	cmovc	r11,r12
 	mov	QWORD[16+rdi],r10
 	mov	QWORD[24+rdi],r11
 
@@ -673,6 +674,8 @@
 	mov	rsi,r9
 	adc	rdx,0
 
+
+
 	sub	r8,-1
 	mov	rax,r10
 	sbb	r9,r12
@@ -873,13 +876,14 @@
 
 ALIGN	32
 __ecp_nistz256_add_toq:
+	xor	r11,r11
 	add	r12,QWORD[rbx]
 	adc	r13,QWORD[8+rbx]
 	mov	rax,r12
 	adc	r8,QWORD[16+rbx]
 	adc	r9,QWORD[24+rbx]
 	mov	rbp,r13
-	sbb	r11,r11
+	adc	r11,0
 
 	sub	r12,-1
 	mov	rcx,r8
@@ -887,14 +891,14 @@
 	sbb	r8,0
 	mov	r10,r9
 	sbb	r9,r15
-	test	r11,r11
+	sbb	r11,0
 
-	cmovz	r12,rax
-	cmovz	r13,rbp
+	cmovc	r12,rax
+	cmovc	r13,rbp
 	mov	QWORD[rdi],r12
-	cmovz	r8,rcx
+	cmovc	r8,rcx
 	mov	QWORD[8+rdi],r13
-	cmovz	r9,r10
+	cmovc	r9,r10
 	mov	QWORD[16+rdi],r8
 	mov	QWORD[24+rdi],r9
 
@@ -962,13 +966,14 @@
 
 ALIGN	32
 __ecp_nistz256_mul_by_2q:
+	xor	r11,r11
 	add	r12,r12
 	adc	r13,r13
 	mov	rax,r12
 	adc	r8,r8
 	adc	r9,r9
 	mov	rbp,r13
-	sbb	r11,r11
+	adc	r11,0
 
 	sub	r12,-1
 	mov	rcx,r8
@@ -976,14 +981,14 @@
 	sbb	r8,0
 	mov	r10,r9
 	sbb	r9,r15
-	test	r11,r11
+	sbb	r11,0
 
-	cmovz	r12,rax
-	cmovz	r13,rbp
+	cmovc	r12,rax
+	cmovc	r13,rbp
 	mov	QWORD[rdi],r12
-	cmovz	r8,rcx
+	cmovc	r8,rcx
 	mov	QWORD[8+rdi],r13
-	cmovz	r9,r10
+	cmovc	r9,r10
 	mov	QWORD[16+rdi],r8
 	mov	QWORD[24+rdi],r9
 
@@ -1232,16 +1237,14 @@
 	mov	rsi,rdx
 	movdqa	XMMWORD[384+rsp],xmm0
 	movdqa	XMMWORD[(384+16)+rsp],xmm1
-	por	xmm1,xmm0
 	movdqa	XMMWORD[416+rsp],xmm2
 	movdqa	XMMWORD[(416+16)+rsp],xmm3
-	por	xmm3,xmm2
 	movdqa	XMMWORD[448+rsp],xmm4
 	movdqa	XMMWORD[(448+16)+rsp],xmm5
-	por	xmm3,xmm1
+	por	xmm5,xmm4
 
 	movdqu	xmm0,XMMWORD[rsi]
-	pshufd	xmm5,xmm3,0xb1
+	pshufd	xmm3,xmm5,0xb1
 	movdqu	xmm1,XMMWORD[16+rsi]
 	movdqu	xmm2,XMMWORD[32+rsi]
 	por	xmm5,xmm3
@@ -1253,14 +1256,14 @@
 	movdqa	XMMWORD[480+rsp],xmm0
 	pshufd	xmm4,xmm5,0x1e
 	movdqa	XMMWORD[(480+16)+rsp],xmm1
-	por	xmm1,xmm0
-DB	102,72,15,110,199
+	movdqu	xmm0,XMMWORD[64+rsi]
+	movdqu	xmm1,XMMWORD[80+rsi]
 	movdqa	XMMWORD[512+rsp],xmm2
 	movdqa	XMMWORD[(512+16)+rsp],xmm3
-	por	xmm3,xmm2
 	por	xmm5,xmm4
 	pxor	xmm4,xmm4
-	por	xmm3,xmm1
+	por	xmm1,xmm0
+DB	102,72,15,110,199
 
 	lea	rsi,[((64-0))+rsi]
 	mov	QWORD[((544+0))+rsp],rax
@@ -1271,8 +1274,8 @@
 	call	__ecp_nistz256_sqr_montq
 
 	pcmpeqd	xmm5,xmm4
-	pshufd	xmm4,xmm3,0xb1
-	por	xmm4,xmm3
+	pshufd	xmm4,xmm1,0xb1
+	por	xmm4,xmm1
 	pshufd	xmm5,xmm5,0
 	pshufd	xmm3,xmm4,0x1e
 	por	xmm4,xmm3
@@ -1455,6 +1458,7 @@
 
 
 
+	xor	r11,r11
 	add	r12,r12
 	lea	rsi,[96+rsp]
 	adc	r13,r13
@@ -1462,7 +1466,7 @@
 	adc	r8,r8
 	adc	r9,r9
 	mov	rbp,r13
-	sbb	r11,r11
+	adc	r11,0
 
 	sub	r12,-1
 	mov	rcx,r8
@@ -1470,15 +1474,15 @@
 	sbb	r8,0
 	mov	r10,r9
 	sbb	r9,r15
-	test	r11,r11
+	sbb	r11,0
 
-	cmovz	r12,rax
+	cmovc	r12,rax
 	mov	rax,QWORD[rsi]
-	cmovz	r13,rbp
+	cmovc	r13,rbp
 	mov	rbp,QWORD[8+rsi]
-	cmovz	r8,rcx
+	cmovc	r8,rcx
 	mov	rcx,QWORD[16+rsi]
-	cmovz	r9,r10
+	cmovc	r9,r10
 	mov	r10,QWORD[24+rsi]
 
 	call	__ecp_nistz256_subq
@@ -1643,16 +1647,14 @@
 	mov	r8,QWORD[((64+24))+rsi]
 	movdqa	XMMWORD[320+rsp],xmm0
 	movdqa	XMMWORD[(320+16)+rsp],xmm1
-	por	xmm1,xmm0
 	movdqa	XMMWORD[352+rsp],xmm2
 	movdqa	XMMWORD[(352+16)+rsp],xmm3
-	por	xmm3,xmm2
 	movdqa	XMMWORD[384+rsp],xmm4
 	movdqa	XMMWORD[(384+16)+rsp],xmm5
-	por	xmm3,xmm1
+	por	xmm5,xmm4
 
 	movdqu	xmm0,XMMWORD[rbx]
-	pshufd	xmm5,xmm3,0xb1
+	pshufd	xmm3,xmm5,0xb1
 	movdqu	xmm1,XMMWORD[16+rbx]
 	movdqu	xmm2,XMMWORD[32+rbx]
 	por	xmm5,xmm3
@@ -1770,6 +1772,7 @@
 
 
 
+	xor	r11,r11
 	add	r12,r12
 	lea	rsi,[192+rsp]
 	adc	r13,r13
@@ -1777,7 +1780,7 @@
 	adc	r8,r8
 	adc	r9,r9
 	mov	rbp,r13
-	sbb	r11,r11
+	adc	r11,0
 
 	sub	r12,-1
 	mov	rcx,r8
@@ -1785,15 +1788,15 @@
 	sbb	r8,0
 	mov	r10,r9
 	sbb	r9,r15
-	test	r11,r11
+	sbb	r11,0
 
-	cmovz	r12,rax
+	cmovc	r12,rax
 	mov	rax,QWORD[rsi]
-	cmovz	r13,rbp
+	cmovc	r13,rbp
 	mov	rbp,QWORD[8+rsi]
-	cmovz	r8,rcx
+	cmovc	r8,rcx
 	mov	rcx,QWORD[16+rsi]
-	cmovz	r9,r10
+	cmovc	r9,r10
 	mov	r10,QWORD[24+rsi]
 
 	call	__ecp_nistz256_subq
diff --git a/win-x86_64/crypto/rc4/rc4-x86_64.asm b/win-x86_64/crypto/rc4/rc4-x86_64.asm
deleted file mode 100644
index c7c3b7b..0000000
--- a/win-x86_64/crypto/rc4/rc4-x86_64.asm
+++ /dev/null
@@ -1,741 +0,0 @@
-default	rel
-%define XMMWORD
-%define YMMWORD
-%define ZMMWORD
-section	.text code align=64
-
-EXTERN	OPENSSL_ia32cap_P
-
-global	asm_RC4
-
-ALIGN	16
-asm_RC4:
-	mov	QWORD[8+rsp],rdi	;WIN64 prologue
-	mov	QWORD[16+rsp],rsi
-	mov	rax,rsp
-$L$SEH_begin_asm_RC4:
-	mov	rdi,rcx
-	mov	rsi,rdx
-	mov	rdx,r8
-	mov	rcx,r9
-
-
-	or	rsi,rsi
-	jne	NEAR $L$entry
-	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
-	mov	rsi,QWORD[16+rsp]
-	DB	0F3h,0C3h		;repret
-$L$entry:
-	push	rbx
-	push	r12
-	push	r13
-$L$prologue:
-	mov	r11,rsi
-	mov	r12,rdx
-	mov	r13,rcx
-	xor	r10,r10
-	xor	rcx,rcx
-
-	lea	rdi,[8+rdi]
-	mov	r10b,BYTE[((-8))+rdi]
-	mov	cl,BYTE[((-4))+rdi]
-	cmp	DWORD[256+rdi],-1
-	je	NEAR $L$RC4_CHAR
-	mov	r8d,DWORD[OPENSSL_ia32cap_P]
-	xor	rbx,rbx
-	inc	r10b
-	sub	rbx,r10
-	sub	r13,r12
-	mov	eax,DWORD[r10*4+rdi]
-	test	r11,-16
-	jz	NEAR $L$loop1
-	bt	r8d,30
-	jc	NEAR $L$intel
-	and	rbx,7
-	lea	rsi,[1+r10]
-	jz	NEAR $L$oop8
-	sub	r11,rbx
-$L$oop8_warmup:
-	add	cl,al
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],eax
-	mov	DWORD[r10*4+rdi],edx
-	add	al,dl
-	inc	r10b
-	mov	edx,DWORD[rax*4+rdi]
-	mov	eax,DWORD[r10*4+rdi]
-	xor	dl,BYTE[r12]
-	mov	BYTE[r13*1+r12],dl
-	lea	r12,[1+r12]
-	dec	rbx
-	jnz	NEAR $L$oop8_warmup
-
-	lea	rsi,[1+r10]
-	jmp	NEAR $L$oop8
-ALIGN	16
-$L$oop8:
-	add	cl,al
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],eax
-	mov	ebx,DWORD[rsi*4+rdi]
-	ror	r8,8
-	mov	DWORD[r10*4+rdi],edx
-	add	dl,al
-	mov	r8b,BYTE[rdx*4+rdi]
-	add	cl,bl
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],ebx
-	mov	eax,DWORD[4+rsi*4+rdi]
-	ror	r8,8
-	mov	DWORD[4+r10*4+rdi],edx
-	add	dl,bl
-	mov	r8b,BYTE[rdx*4+rdi]
-	add	cl,al
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],eax
-	mov	ebx,DWORD[8+rsi*4+rdi]
-	ror	r8,8
-	mov	DWORD[8+r10*4+rdi],edx
-	add	dl,al
-	mov	r8b,BYTE[rdx*4+rdi]
-	add	cl,bl
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],ebx
-	mov	eax,DWORD[12+rsi*4+rdi]
-	ror	r8,8
-	mov	DWORD[12+r10*4+rdi],edx
-	add	dl,bl
-	mov	r8b,BYTE[rdx*4+rdi]
-	add	cl,al
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],eax
-	mov	ebx,DWORD[16+rsi*4+rdi]
-	ror	r8,8
-	mov	DWORD[16+r10*4+rdi],edx
-	add	dl,al
-	mov	r8b,BYTE[rdx*4+rdi]
-	add	cl,bl
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],ebx
-	mov	eax,DWORD[20+rsi*4+rdi]
-	ror	r8,8
-	mov	DWORD[20+r10*4+rdi],edx
-	add	dl,bl
-	mov	r8b,BYTE[rdx*4+rdi]
-	add	cl,al
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],eax
-	mov	ebx,DWORD[24+rsi*4+rdi]
-	ror	r8,8
-	mov	DWORD[24+r10*4+rdi],edx
-	add	dl,al
-	mov	r8b,BYTE[rdx*4+rdi]
-	add	sil,8
-	add	cl,bl
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],ebx
-	mov	eax,DWORD[((-4))+rsi*4+rdi]
-	ror	r8,8
-	mov	DWORD[28+r10*4+rdi],edx
-	add	dl,bl
-	mov	r8b,BYTE[rdx*4+rdi]
-	add	r10b,8
-	ror	r8,8
-	sub	r11,8
-
-	xor	r8,QWORD[r12]
-	mov	QWORD[r13*1+r12],r8
-	lea	r12,[8+r12]
-
-	test	r11,-8
-	jnz	NEAR $L$oop8
-	cmp	r11,0
-	jne	NEAR $L$loop1
-	jmp	NEAR $L$exit
-
-ALIGN	16
-$L$intel:
-	test	r11,-32
-	jz	NEAR $L$loop1
-	and	rbx,15
-	jz	NEAR $L$oop16_is_hot
-	sub	r11,rbx
-$L$oop16_warmup:
-	add	cl,al
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],eax
-	mov	DWORD[r10*4+rdi],edx
-	add	al,dl
-	inc	r10b
-	mov	edx,DWORD[rax*4+rdi]
-	mov	eax,DWORD[r10*4+rdi]
-	xor	dl,BYTE[r12]
-	mov	BYTE[r13*1+r12],dl
-	lea	r12,[1+r12]
-	dec	rbx
-	jnz	NEAR $L$oop16_warmup
-
-	mov	rbx,rcx
-	xor	rcx,rcx
-	mov	cl,bl
-
-$L$oop16_is_hot:
-	lea	rsi,[r10*4+rdi]
-	add	cl,al
-	mov	edx,DWORD[rcx*4+rdi]
-	pxor	xmm0,xmm0
-	mov	DWORD[rcx*4+rdi],eax
-	add	al,dl
-	mov	ebx,DWORD[4+rsi]
-	movzx	eax,al
-	mov	DWORD[rsi],edx
-	add	cl,bl
-	pinsrw	xmm0,WORD[rax*4+rdi],0
-	jmp	NEAR $L$oop16_enter
-ALIGN	16
-$L$oop16:
-	add	cl,al
-	mov	edx,DWORD[rcx*4+rdi]
-	pxor	xmm2,xmm0
-	psllq	xmm1,8
-	pxor	xmm0,xmm0
-	mov	DWORD[rcx*4+rdi],eax
-	add	al,dl
-	mov	ebx,DWORD[4+rsi]
-	movzx	eax,al
-	mov	DWORD[rsi],edx
-	pxor	xmm2,xmm1
-	add	cl,bl
-	pinsrw	xmm0,WORD[rax*4+rdi],0
-	movdqu	XMMWORD[r13*1+r12],xmm2
-	lea	r12,[16+r12]
-$L$oop16_enter:
-	mov	edx,DWORD[rcx*4+rdi]
-	pxor	xmm1,xmm1
-	mov	DWORD[rcx*4+rdi],ebx
-	add	bl,dl
-	mov	eax,DWORD[8+rsi]
-	movzx	ebx,bl
-	mov	DWORD[4+rsi],edx
-	add	cl,al
-	pinsrw	xmm1,WORD[rbx*4+rdi],0
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],eax
-	add	al,dl
-	mov	ebx,DWORD[12+rsi]
-	movzx	eax,al
-	mov	DWORD[8+rsi],edx
-	add	cl,bl
-	pinsrw	xmm0,WORD[rax*4+rdi],1
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],ebx
-	add	bl,dl
-	mov	eax,DWORD[16+rsi]
-	movzx	ebx,bl
-	mov	DWORD[12+rsi],edx
-	add	cl,al
-	pinsrw	xmm1,WORD[rbx*4+rdi],1
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],eax
-	add	al,dl
-	mov	ebx,DWORD[20+rsi]
-	movzx	eax,al
-	mov	DWORD[16+rsi],edx
-	add	cl,bl
-	pinsrw	xmm0,WORD[rax*4+rdi],2
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],ebx
-	add	bl,dl
-	mov	eax,DWORD[24+rsi]
-	movzx	ebx,bl
-	mov	DWORD[20+rsi],edx
-	add	cl,al
-	pinsrw	xmm1,WORD[rbx*4+rdi],2
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],eax
-	add	al,dl
-	mov	ebx,DWORD[28+rsi]
-	movzx	eax,al
-	mov	DWORD[24+rsi],edx
-	add	cl,bl
-	pinsrw	xmm0,WORD[rax*4+rdi],3
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],ebx
-	add	bl,dl
-	mov	eax,DWORD[32+rsi]
-	movzx	ebx,bl
-	mov	DWORD[28+rsi],edx
-	add	cl,al
-	pinsrw	xmm1,WORD[rbx*4+rdi],3
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],eax
-	add	al,dl
-	mov	ebx,DWORD[36+rsi]
-	movzx	eax,al
-	mov	DWORD[32+rsi],edx
-	add	cl,bl
-	pinsrw	xmm0,WORD[rax*4+rdi],4
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],ebx
-	add	bl,dl
-	mov	eax,DWORD[40+rsi]
-	movzx	ebx,bl
-	mov	DWORD[36+rsi],edx
-	add	cl,al
-	pinsrw	xmm1,WORD[rbx*4+rdi],4
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],eax
-	add	al,dl
-	mov	ebx,DWORD[44+rsi]
-	movzx	eax,al
-	mov	DWORD[40+rsi],edx
-	add	cl,bl
-	pinsrw	xmm0,WORD[rax*4+rdi],5
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],ebx
-	add	bl,dl
-	mov	eax,DWORD[48+rsi]
-	movzx	ebx,bl
-	mov	DWORD[44+rsi],edx
-	add	cl,al
-	pinsrw	xmm1,WORD[rbx*4+rdi],5
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],eax
-	add	al,dl
-	mov	ebx,DWORD[52+rsi]
-	movzx	eax,al
-	mov	DWORD[48+rsi],edx
-	add	cl,bl
-	pinsrw	xmm0,WORD[rax*4+rdi],6
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],ebx
-	add	bl,dl
-	mov	eax,DWORD[56+rsi]
-	movzx	ebx,bl
-	mov	DWORD[52+rsi],edx
-	add	cl,al
-	pinsrw	xmm1,WORD[rbx*4+rdi],6
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],eax
-	add	al,dl
-	mov	ebx,DWORD[60+rsi]
-	movzx	eax,al
-	mov	DWORD[56+rsi],edx
-	add	cl,bl
-	pinsrw	xmm0,WORD[rax*4+rdi],7
-	add	r10b,16
-	movdqu	xmm2,XMMWORD[r12]
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],ebx
-	add	bl,dl
-	movzx	ebx,bl
-	mov	DWORD[60+rsi],edx
-	lea	rsi,[r10*4+rdi]
-	pinsrw	xmm1,WORD[rbx*4+rdi],7
-	mov	eax,DWORD[rsi]
-	mov	rbx,rcx
-	xor	rcx,rcx
-	sub	r11,16
-	mov	cl,bl
-	test	r11,-16
-	jnz	NEAR $L$oop16
-
-	psllq	xmm1,8
-	pxor	xmm2,xmm0
-	pxor	xmm2,xmm1
-	movdqu	XMMWORD[r13*1+r12],xmm2
-	lea	r12,[16+r12]
-
-	cmp	r11,0
-	jne	NEAR $L$loop1
-	jmp	NEAR $L$exit
-
-ALIGN	16
-$L$loop1:
-	add	cl,al
-	mov	edx,DWORD[rcx*4+rdi]
-	mov	DWORD[rcx*4+rdi],eax
-	mov	DWORD[r10*4+rdi],edx
-	add	al,dl
-	inc	r10b
-	mov	edx,DWORD[rax*4+rdi]
-	mov	eax,DWORD[r10*4+rdi]
-	xor	dl,BYTE[r12]
-	mov	BYTE[r13*1+r12],dl
-	lea	r12,[1+r12]
-	dec	r11
-	jnz	NEAR $L$loop1
-	jmp	NEAR $L$exit
-
-ALIGN	16
-$L$RC4_CHAR:
-	add	r10b,1
-	movzx	eax,BYTE[r10*1+rdi]
-	test	r11,-8
-	jz	NEAR $L$cloop1
-	jmp	NEAR $L$cloop8
-ALIGN	16
-$L$cloop8:
-	mov	r8d,DWORD[r12]
-	mov	r9d,DWORD[4+r12]
-	add	cl,al
-	lea	rsi,[1+r10]
-	movzx	edx,BYTE[rcx*1+rdi]
-	movzx	esi,sil
-	movzx	ebx,BYTE[rsi*1+rdi]
-	mov	BYTE[rcx*1+rdi],al
-	cmp	rcx,rsi
-	mov	BYTE[r10*1+rdi],dl
-	jne	NEAR $L$cmov0
-	mov	rbx,rax
-$L$cmov0:
-	add	dl,al
-	xor	r8b,BYTE[rdx*1+rdi]
-	ror	r8d,8
-	add	cl,bl
-	lea	r10,[1+rsi]
-	movzx	edx,BYTE[rcx*1+rdi]
-	movzx	r10d,r10b
-	movzx	eax,BYTE[r10*1+rdi]
-	mov	BYTE[rcx*1+rdi],bl
-	cmp	rcx,r10
-	mov	BYTE[rsi*1+rdi],dl
-	jne	NEAR $L$cmov1
-	mov	rax,rbx
-$L$cmov1:
-	add	dl,bl
-	xor	r8b,BYTE[rdx*1+rdi]
-	ror	r8d,8
-	add	cl,al
-	lea	rsi,[1+r10]
-	movzx	edx,BYTE[rcx*1+rdi]
-	movzx	esi,sil
-	movzx	ebx,BYTE[rsi*1+rdi]
-	mov	BYTE[rcx*1+rdi],al
-	cmp	rcx,rsi
-	mov	BYTE[r10*1+rdi],dl
-	jne	NEAR $L$cmov2
-	mov	rbx,rax
-$L$cmov2:
-	add	dl,al
-	xor	r8b,BYTE[rdx*1+rdi]
-	ror	r8d,8
-	add	cl,bl
-	lea	r10,[1+rsi]
-	movzx	edx,BYTE[rcx*1+rdi]
-	movzx	r10d,r10b
-	movzx	eax,BYTE[r10*1+rdi]
-	mov	BYTE[rcx*1+rdi],bl
-	cmp	rcx,r10
-	mov	BYTE[rsi*1+rdi],dl
-	jne	NEAR $L$cmov3
-	mov	rax,rbx
-$L$cmov3:
-	add	dl,bl
-	xor	r8b,BYTE[rdx*1+rdi]
-	ror	r8d,8
-	add	cl,al
-	lea	rsi,[1+r10]
-	movzx	edx,BYTE[rcx*1+rdi]
-	movzx	esi,sil
-	movzx	ebx,BYTE[rsi*1+rdi]
-	mov	BYTE[rcx*1+rdi],al
-	cmp	rcx,rsi
-	mov	BYTE[r10*1+rdi],dl
-	jne	NEAR $L$cmov4
-	mov	rbx,rax
-$L$cmov4:
-	add	dl,al
-	xor	r9b,BYTE[rdx*1+rdi]
-	ror	r9d,8
-	add	cl,bl
-	lea	r10,[1+rsi]
-	movzx	edx,BYTE[rcx*1+rdi]
-	movzx	r10d,r10b
-	movzx	eax,BYTE[r10*1+rdi]
-	mov	BYTE[rcx*1+rdi],bl
-	cmp	rcx,r10
-	mov	BYTE[rsi*1+rdi],dl
-	jne	NEAR $L$cmov5
-	mov	rax,rbx
-$L$cmov5:
-	add	dl,bl
-	xor	r9b,BYTE[rdx*1+rdi]
-	ror	r9d,8
-	add	cl,al
-	lea	rsi,[1+r10]
-	movzx	edx,BYTE[rcx*1+rdi]
-	movzx	esi,sil
-	movzx	ebx,BYTE[rsi*1+rdi]
-	mov	BYTE[rcx*1+rdi],al
-	cmp	rcx,rsi
-	mov	BYTE[r10*1+rdi],dl
-	jne	NEAR $L$cmov6
-	mov	rbx,rax
-$L$cmov6:
-	add	dl,al
-	xor	r9b,BYTE[rdx*1+rdi]
-	ror	r9d,8
-	add	cl,bl
-	lea	r10,[1+rsi]
-	movzx	edx,BYTE[rcx*1+rdi]
-	movzx	r10d,r10b
-	movzx	eax,BYTE[r10*1+rdi]
-	mov	BYTE[rcx*1+rdi],bl
-	cmp	rcx,r10
-	mov	BYTE[rsi*1+rdi],dl
-	jne	NEAR $L$cmov7
-	mov	rax,rbx
-$L$cmov7:
-	add	dl,bl
-	xor	r9b,BYTE[rdx*1+rdi]
-	ror	r9d,8
-	lea	r11,[((-8))+r11]
-	mov	DWORD[r13],r8d
-	lea	r12,[8+r12]
-	mov	DWORD[4+r13],r9d
-	lea	r13,[8+r13]
-
-	test	r11,-8
-	jnz	NEAR $L$cloop8
-	cmp	r11,0
-	jne	NEAR $L$cloop1
-	jmp	NEAR $L$exit
-ALIGN	16
-$L$cloop1:
-	add	cl,al
-	movzx	ecx,cl
-	movzx	edx,BYTE[rcx*1+rdi]
-	mov	BYTE[rcx*1+rdi],al
-	mov	BYTE[r10*1+rdi],dl
-	add	dl,al
-	add	r10b,1
-	movzx	edx,dl
-	movzx	r10d,r10b
-	movzx	edx,BYTE[rdx*1+rdi]
-	movzx	eax,BYTE[r10*1+rdi]
-	xor	dl,BYTE[r12]
-	lea	r12,[1+r12]
-	mov	BYTE[r13],dl
-	lea	r13,[1+r13]
-	sub	r11,1
-	jnz	NEAR $L$cloop1
-	jmp	NEAR $L$exit
-
-ALIGN	16
-$L$exit:
-	sub	r10b,1
-	mov	DWORD[((-8))+rdi],r10d
-	mov	DWORD[((-4))+rdi],ecx
-
-	mov	r13,QWORD[rsp]
-	mov	r12,QWORD[8+rsp]
-	mov	rbx,QWORD[16+rsp]
-	add	rsp,24
-$L$epilogue:
-	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
-	mov	rsi,QWORD[16+rsp]
-	DB	0F3h,0C3h		;repret
-$L$SEH_end_asm_RC4:
-global	asm_RC4_set_key
-
-ALIGN	16
-asm_RC4_set_key:
-	mov	QWORD[8+rsp],rdi	;WIN64 prologue
-	mov	QWORD[16+rsp],rsi
-	mov	rax,rsp
-$L$SEH_begin_asm_RC4_set_key:
-	mov	rdi,rcx
-	mov	rsi,rdx
-	mov	rdx,r8
-
-
-	lea	rdi,[8+rdi]
-	lea	rdx,[rsi*1+rdx]
-	neg	rsi
-	mov	rcx,rsi
-	xor	eax,eax
-	xor	r9,r9
-	xor	r10,r10
-	xor	r11,r11
-
-	mov	r8d,DWORD[OPENSSL_ia32cap_P]
-	bt	r8d,20
-	jc	NEAR $L$c1stloop
-	jmp	NEAR $L$w1stloop
-
-ALIGN	16
-$L$w1stloop:
-	mov	DWORD[rax*4+rdi],eax
-	add	al,1
-	jnc	NEAR $L$w1stloop
-
-	xor	r9,r9
-	xor	r8,r8
-ALIGN	16
-$L$w2ndloop:
-	mov	r10d,DWORD[r9*4+rdi]
-	add	r8b,BYTE[rsi*1+rdx]
-	add	r8b,r10b
-	add	rsi,1
-	mov	r11d,DWORD[r8*4+rdi]
-	cmovz	rsi,rcx
-	mov	DWORD[r8*4+rdi],r10d
-	mov	DWORD[r9*4+rdi],r11d
-	add	r9b,1
-	jnc	NEAR $L$w2ndloop
-	jmp	NEAR $L$exit_key
-
-ALIGN	16
-$L$c1stloop:
-	mov	BYTE[rax*1+rdi],al
-	add	al,1
-	jnc	NEAR $L$c1stloop
-
-	xor	r9,r9
-	xor	r8,r8
-ALIGN	16
-$L$c2ndloop:
-	mov	r10b,BYTE[r9*1+rdi]
-	add	r8b,BYTE[rsi*1+rdx]
-	add	r8b,r10b
-	add	rsi,1
-	mov	r11b,BYTE[r8*1+rdi]
-	jnz	NEAR $L$cnowrap
-	mov	rsi,rcx
-$L$cnowrap:
-	mov	BYTE[r8*1+rdi],r10b
-	mov	BYTE[r9*1+rdi],r11b
-	add	r9b,1
-	jnc	NEAR $L$c2ndloop
-	mov	DWORD[256+rdi],-1
-
-ALIGN	16
-$L$exit_key:
-	xor	eax,eax
-	mov	DWORD[((-8))+rdi],eax
-	mov	DWORD[((-4))+rdi],eax
-	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
-	mov	rsi,QWORD[16+rsp]
-	DB	0F3h,0C3h		;repret
-$L$SEH_end_asm_RC4_set_key:
-EXTERN	__imp_RtlVirtualUnwind
-
-ALIGN	16
-stream_se_handler:
-	push	rsi
-	push	rdi
-	push	rbx
-	push	rbp
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	pushfq
-	sub	rsp,64
-
-	mov	rax,QWORD[120+r8]
-	mov	rbx,QWORD[248+r8]
-
-	lea	r10,[$L$prologue]
-	cmp	rbx,r10
-	jb	NEAR $L$in_prologue
-
-	mov	rax,QWORD[152+r8]
-
-	lea	r10,[$L$epilogue]
-	cmp	rbx,r10
-	jae	NEAR $L$in_prologue
-
-	lea	rax,[24+rax]
-
-	mov	rbx,QWORD[((-8))+rax]
-	mov	r12,QWORD[((-16))+rax]
-	mov	r13,QWORD[((-24))+rax]
-	mov	QWORD[144+r8],rbx
-	mov	QWORD[216+r8],r12
-	mov	QWORD[224+r8],r13
-
-$L$in_prologue:
-	mov	rdi,QWORD[8+rax]
-	mov	rsi,QWORD[16+rax]
-	mov	QWORD[152+r8],rax
-	mov	QWORD[168+r8],rsi
-	mov	QWORD[176+r8],rdi
-
-	jmp	NEAR $L$common_seh_exit
-
-
-
-ALIGN	16
-key_se_handler:
-	push	rsi
-	push	rdi
-	push	rbx
-	push	rbp
-	push	r12
-	push	r13
-	push	r14
-	push	r15
-	pushfq
-	sub	rsp,64
-
-	mov	rax,QWORD[152+r8]
-	mov	rdi,QWORD[8+rax]
-	mov	rsi,QWORD[16+rax]
-	mov	QWORD[168+r8],rsi
-	mov	QWORD[176+r8],rdi
-
-$L$common_seh_exit:
-
-	mov	rdi,QWORD[40+r9]
-	mov	rsi,r8
-	mov	ecx,154
-	DD	0xa548f3fc
-
-	mov	rsi,r9
-	xor	rcx,rcx
-	mov	rdx,QWORD[8+rsi]
-	mov	r8,QWORD[rsi]
-	mov	r9,QWORD[16+rsi]
-	mov	r10,QWORD[40+rsi]
-	lea	r11,[56+rsi]
-	lea	r12,[24+rsi]
-	mov	QWORD[32+rsp],r10
-	mov	QWORD[40+rsp],r11
-	mov	QWORD[48+rsp],r12
-	mov	QWORD[56+rsp],rcx
-	call	QWORD[__imp_RtlVirtualUnwind]
-
-	mov	eax,1
-	add	rsp,64
-	popfq
-	pop	r15
-	pop	r14
-	pop	r13
-	pop	r12
-	pop	rbp
-	pop	rbx
-	pop	rdi
-	pop	rsi
-	DB	0F3h,0C3h		;repret
-
-
-section	.pdata rdata align=4
-ALIGN	4
-	DD	$L$SEH_begin_asm_RC4 wrt ..imagebase
-	DD	$L$SEH_end_asm_RC4 wrt ..imagebase
-	DD	$L$SEH_info_asm_RC4 wrt ..imagebase
-
-	DD	$L$SEH_begin_asm_RC4_set_key wrt ..imagebase
-	DD	$L$SEH_end_asm_RC4_set_key wrt ..imagebase
-	DD	$L$SEH_info_asm_RC4_set_key wrt ..imagebase
-
-section	.xdata rdata align=8
-ALIGN	8
-$L$SEH_info_asm_RC4:
-DB	9,0,0,0
-	DD	stream_se_handler wrt ..imagebase
-$L$SEH_info_asm_RC4_set_key:
-DB	9,0,0,0
-	DD	key_se_handler wrt ..imagebase