BoringSSL: Roll generated files forward.
This allows rolling boringssl forward to mid-December at:
d519bf6be0b447fb80fbc539d4bff4479b5482a2
R=johnmccutchan@google.com
Review-Url: https://codereview.chromium.org/2569253003 .
diff --git a/BUILD.generated.gni b/BUILD.generated.gni
index d1ac9be..6836fab 100644
--- a/BUILD.generated.gni
+++ b/BUILD.generated.gni
@@ -7,10 +7,11 @@
crypto_sources = [
"err_data.c",
"src/crypto/aes/aes.c",
+ "src/crypto/aes/internal.h",
+ "src/crypto/aes/key_wrap.c",
"src/crypto/aes/mode_wrappers.c",
"src/crypto/asn1/a_bitstr.c",
"src/crypto/asn1/a_bool.c",
- "src/crypto/asn1/a_bytes.c",
"src/crypto/asn1/a_d2i_fp.c",
"src/crypto/asn1/a_dup.c",
"src/crypto/asn1/a_enum.c",
@@ -27,6 +28,7 @@
"src/crypto/asn1/a_utctm.c",
"src/crypto/asn1/a_utf8.c",
"src/crypto/asn1/asn1_lib.c",
+ "src/crypto/asn1/asn1_locl.h",
"src/crypto/asn1/asn1_par.c",
"src/crypto/asn1/asn_pack.c",
"src/crypto/asn1/f_enum.c",
@@ -49,6 +51,7 @@
"src/crypto/bio/fd.c",
"src/crypto/bio/file.c",
"src/crypto/bio/hexdump.c",
+ "src/crypto/bio/internal.h",
"src/crypto/bio/pair.c",
"src/crypto/bio/printf.c",
"src/crypto/bio/socket.c",
@@ -64,12 +67,15 @@
"src/crypto/bn/exponentiation.c",
"src/crypto/bn/gcd.c",
"src/crypto/bn/generic.c",
+ "src/crypto/bn/internal.h",
"src/crypto/bn/kronecker.c",
"src/crypto/bn/montgomery.c",
+ "src/crypto/bn/montgomery_inv.c",
"src/crypto/bn/mul.c",
"src/crypto/bn/prime.c",
"src/crypto/bn/random.c",
"src/crypto/bn/rsaz_exp.c",
+ "src/crypto/bn/rsaz_exp.h",
"src/crypto/bn/shift.c",
"src/crypto/bn/sqrt.c",
"src/crypto/buf/buf.c",
@@ -77,6 +83,7 @@
"src/crypto/bytestring/ber.c",
"src/crypto/bytestring/cbb.c",
"src/crypto/bytestring/cbs.c",
+ "src/crypto/bytestring/internal.h",
"src/crypto/chacha/chacha.c",
"src/crypto/cipher/aead.c",
"src/crypto/cipher/cipher.c",
@@ -89,34 +96,45 @@
"src/crypto/cipher/e_rc4.c",
"src/crypto/cipher/e_ssl3.c",
"src/crypto/cipher/e_tls.c",
+ "src/crypto/cipher/internal.h",
"src/crypto/cipher/tls_cbc.c",
"src/crypto/cmac/cmac.c",
"src/crypto/conf/conf.c",
+ "src/crypto/conf/conf_def.h",
+ "src/crypto/conf/internal.h",
"src/crypto/cpu-aarch64-linux.c",
"src/crypto/cpu-arm-linux.c",
"src/crypto/cpu-arm.c",
"src/crypto/cpu-intel.c",
+ "src/crypto/cpu-ppc64le.c",
"src/crypto/crypto.c",
"src/crypto/curve25519/curve25519.c",
+ "src/crypto/curve25519/internal.h",
"src/crypto/curve25519/spake25519.c",
"src/crypto/curve25519/x25519-x86_64.c",
"src/crypto/des/des.c",
+ "src/crypto/des/internal.h",
"src/crypto/dh/check.c",
"src/crypto/dh/dh.c",
"src/crypto/dh/dh_asn1.c",
"src/crypto/dh/params.c",
"src/crypto/digest/digest.c",
"src/crypto/digest/digests.c",
+ "src/crypto/digest/internal.h",
+ "src/crypto/digest/md32_common.h",
"src/crypto/dsa/dsa.c",
"src/crypto/dsa/dsa_asn1.c",
"src/crypto/ec/ec.c",
"src/crypto/ec/ec_asn1.c",
"src/crypto/ec/ec_key.c",
"src/crypto/ec/ec_montgomery.c",
+ "src/crypto/ec/internal.h",
"src/crypto/ec/oct.c",
"src/crypto/ec/p224-64.c",
"src/crypto/ec/p256-64.c",
+ "src/crypto/ec/p256-x86_64-table.h",
"src/crypto/ec/p256-x86_64.c",
+ "src/crypto/ec/p256-x86_64.h",
"src/crypto/ec/simple.c",
"src/crypto/ec/util-64.c",
"src/crypto/ec/wnaf.c",
@@ -129,6 +147,7 @@
"src/crypto/evp/evp.c",
"src/crypto/evp/evp_asn1.c",
"src/crypto/evp/evp_ctx.c",
+ "src/crypto/evp/internal.h",
"src/crypto/evp/p_dsa_asn1.c",
"src/crypto/evp/p_ec.c",
"src/crypto/evp/p_ec_asn1.c",
@@ -140,6 +159,7 @@
"src/crypto/ex_data.c",
"src/crypto/hkdf/hkdf.c",
"src/crypto/hmac/hmac.c",
+ "src/crypto/internal.h",
"src/crypto/lhash/lhash.c",
"src/crypto/md4/md4.c",
"src/crypto/md5/md5.c",
@@ -148,15 +168,13 @@
"src/crypto/modes/cfb.c",
"src/crypto/modes/ctr.c",
"src/crypto/modes/gcm.c",
+ "src/crypto/modes/internal.h",
"src/crypto/modes/ofb.c",
- "src/crypto/newhope/error_correction.c",
- "src/crypto/newhope/newhope.c",
- "src/crypto/newhope/ntt.c",
- "src/crypto/newhope/poly.c",
- "src/crypto/newhope/precomp.c",
- "src/crypto/newhope/reduce.c",
+ "src/crypto/modes/polyval.c",
"src/crypto/obj/obj.c",
+ "src/crypto/obj/obj_dat.h",
"src/crypto/obj/obj_xref.c",
+ "src/crypto/obj/obj_xref.h",
"src/crypto/pem/pem_all.c",
"src/crypto/pem/pem_info.c",
"src/crypto/pem/pem_lib.c",
@@ -165,14 +183,19 @@
"src/crypto/pem/pem_pkey.c",
"src/crypto/pem/pem_x509.c",
"src/crypto/pem/pem_xaux.c",
+ "src/crypto/pkcs8/internal.h",
"src/crypto/pkcs8/p5_pbe.c",
"src/crypto/pkcs8/p5_pbev2.c",
"src/crypto/pkcs8/p8_pkey.c",
"src/crypto/pkcs8/pkcs8.c",
+ "src/crypto/poly1305/internal.h",
"src/crypto/poly1305/poly1305.c",
"src/crypto/poly1305/poly1305_arm.c",
"src/crypto/poly1305/poly1305_vec.c",
+ "src/crypto/pool/internal.h",
+ "src/crypto/pool/pool.c",
"src/crypto/rand/deterministic.c",
+ "src/crypto/rand/internal.h",
"src/crypto/rand/rand.c",
"src/crypto/rand/urandom.c",
"src/crypto/rand/windows.c",
@@ -180,10 +203,12 @@
"src/crypto/refcount_c11.c",
"src/crypto/refcount_lock.c",
"src/crypto/rsa/blinding.c",
+ "src/crypto/rsa/internal.h",
"src/crypto/rsa/padding.c",
"src/crypto/rsa/rsa.c",
"src/crypto/rsa/rsa_asn1.c",
"src/crypto/rsa/rsa_impl.c",
+ "src/crypto/sha/sha1-altivec.c",
"src/crypto/sha/sha1.c",
"src/crypto/sha/sha256.c",
"src/crypto/sha/sha512.c",
@@ -201,13 +226,16 @@
"src/crypto/x509/asn1_gen.c",
"src/crypto/x509/by_dir.c",
"src/crypto/x509/by_file.c",
+ "src/crypto/x509/charmap.h",
"src/crypto/x509/i2d_pr.c",
+ "src/crypto/x509/internal.h",
"src/crypto/x509/pkcs7.c",
"src/crypto/x509/rsa_pss.c",
"src/crypto/x509/t_crl.c",
"src/crypto/x509/t_req.c",
"src/crypto/x509/t_x509.c",
"src/crypto/x509/t_x509a.c",
+ "src/crypto/x509/vpm_int.h",
"src/crypto/x509/x509.c",
"src/crypto/x509/x509_att.c",
"src/crypto/x509/x509_cmp.c",
@@ -244,8 +272,10 @@
"src/crypto/x509/x_val.c",
"src/crypto/x509/x_x509.c",
"src/crypto/x509/x_x509a.c",
+ "src/crypto/x509v3/ext_dat.h",
"src/crypto/x509v3/pcy_cache.c",
"src/crypto/x509v3/pcy_data.c",
+ "src/crypto/x509v3/pcy_int.h",
"src/crypto/x509v3/pcy_lib.c",
"src/crypto/x509v3/pcy_map.c",
"src/crypto/x509v3/pcy_node.c",
@@ -276,9 +306,82 @@
"src/crypto/x509v3/v3_skey.c",
"src/crypto/x509v3/v3_sxnet.c",
"src/crypto/x509v3/v3_utl.c",
+ "src/include/openssl/aead.h",
+ "src/include/openssl/aes.h",
+ "src/include/openssl/arm_arch.h",
+ "src/include/openssl/asn1.h",
+ "src/include/openssl/asn1_mac.h",
+ "src/include/openssl/asn1t.h",
+ "src/include/openssl/base.h",
+ "src/include/openssl/base64.h",
+ "src/include/openssl/bio.h",
+ "src/include/openssl/blowfish.h",
+ "src/include/openssl/bn.h",
+ "src/include/openssl/buf.h",
+ "src/include/openssl/buffer.h",
+ "src/include/openssl/bytestring.h",
+ "src/include/openssl/cast.h",
+ "src/include/openssl/chacha.h",
+ "src/include/openssl/cipher.h",
+ "src/include/openssl/cmac.h",
+ "src/include/openssl/conf.h",
+ "src/include/openssl/cpu.h",
+ "src/include/openssl/crypto.h",
+ "src/include/openssl/curve25519.h",
+ "src/include/openssl/des.h",
+ "src/include/openssl/dh.h",
+ "src/include/openssl/digest.h",
+ "src/include/openssl/dsa.h",
+ "src/include/openssl/ec.h",
+ "src/include/openssl/ec_key.h",
+ "src/include/openssl/ecdh.h",
+ "src/include/openssl/ecdsa.h",
+ "src/include/openssl/engine.h",
+ "src/include/openssl/err.h",
+ "src/include/openssl/evp.h",
+ "src/include/openssl/ex_data.h",
+ "src/include/openssl/hkdf.h",
+ "src/include/openssl/hmac.h",
+ "src/include/openssl/lhash.h",
+ "src/include/openssl/lhash_macros.h",
+ "src/include/openssl/md4.h",
+ "src/include/openssl/md5.h",
+ "src/include/openssl/mem.h",
+ "src/include/openssl/nid.h",
+ "src/include/openssl/obj.h",
+ "src/include/openssl/obj_mac.h",
+ "src/include/openssl/objects.h",
+ "src/include/openssl/opensslconf.h",
+ "src/include/openssl/opensslv.h",
+ "src/include/openssl/ossl_typ.h",
+ "src/include/openssl/pem.h",
+ "src/include/openssl/pkcs12.h",
+ "src/include/openssl/pkcs7.h",
+ "src/include/openssl/pkcs8.h",
+ "src/include/openssl/poly1305.h",
+ "src/include/openssl/pool.h",
+ "src/include/openssl/rand.h",
+ "src/include/openssl/rc4.h",
+ "src/include/openssl/ripemd.h",
+ "src/include/openssl/rsa.h",
+ "src/include/openssl/safestack.h",
+ "src/include/openssl/sha.h",
+ "src/include/openssl/srtp.h",
+ "src/include/openssl/stack.h",
+ "src/include/openssl/stack_macros.h",
+ "src/include/openssl/thread.h",
+ "src/include/openssl/time_support.h",
+ "src/include/openssl/type_check.h",
+ "src/include/openssl/x509.h",
+ "src/include/openssl/x509_vfy.h",
+ "src/include/openssl/x509v3.h",
]
ssl_sources = [
+ "src/include/openssl/dtls1.h",
+ "src/include/openssl/ssl.h",
+ "src/include/openssl/ssl3.h",
+ "src/include/openssl/tls1.h",
"src/ssl/custom_extensions.c",
"src/ssl/d1_both.c",
"src/ssl/d1_lib.c",
@@ -288,6 +391,7 @@
"src/ssl/dtls_record.c",
"src/ssl/handshake_client.c",
"src/ssl/handshake_server.c",
+ "src/ssl/internal.h",
"src/ssl/s3_both.c",
"src/ssl/s3_enc.c",
"src/ssl/s3_lib.c",
@@ -305,6 +409,10 @@
"src/ssl/ssl_stat.c",
"src/ssl/t1_enc.c",
"src/ssl/t1_lib.c",
+ "src/ssl/tls13_both.c",
+ "src/ssl/tls13_client.c",
+ "src/ssl/tls13_enc.c",
+ "src/ssl/tls13_server.c",
"src/ssl/tls_method.c",
"src/ssl/tls_record.c",
]
@@ -334,6 +442,11 @@
"src/crypto/poly1305/poly1305_arm_asm.S",
]
+crypto_sources_linux_ppc64le = [
+ "linux-ppc64le/crypto/aes/aesp8-ppc.S",
+ "linux-ppc64le/crypto/modes/ghashp8-ppc.S",
+]
+
crypto_sources_linux_x86 = [
"linux-x86/crypto/aes/aes-586.S",
"linux-x86/crypto/aes/aesni-x86.S",
@@ -344,7 +457,6 @@
"linux-x86/crypto/chacha/chacha-x86.S",
"linux-x86/crypto/md5/md5-586.S",
"linux-x86/crypto/modes/ghash-x86.S",
- "linux-x86/crypto/rc4/rc4-586.S",
"linux-x86/crypto/sha/sha1-586.S",
"linux-x86/crypto/sha/sha256-586.S",
"linux-x86/crypto/sha/sha512-586.S",
@@ -365,7 +477,6 @@
"linux-x86_64/crypto/modes/aesni-gcm-x86_64.S",
"linux-x86_64/crypto/modes/ghash-x86_64.S",
"linux-x86_64/crypto/rand/rdrand-x86_64.S",
- "linux-x86_64/crypto/rc4/rc4-x86_64.S",
"linux-x86_64/crypto/sha/sha1-x86_64.S",
"linux-x86_64/crypto/sha/sha256-x86_64.S",
"linux-x86_64/crypto/sha/sha512-x86_64.S",
@@ -382,7 +493,6 @@
"mac-x86/crypto/chacha/chacha-x86.S",
"mac-x86/crypto/md5/md5-586.S",
"mac-x86/crypto/modes/ghash-x86.S",
- "mac-x86/crypto/rc4/rc4-586.S",
"mac-x86/crypto/sha/sha1-586.S",
"mac-x86/crypto/sha/sha256-586.S",
"mac-x86/crypto/sha/sha512-586.S",
@@ -403,7 +513,6 @@
"mac-x86_64/crypto/modes/aesni-gcm-x86_64.S",
"mac-x86_64/crypto/modes/ghash-x86_64.S",
"mac-x86_64/crypto/rand/rdrand-x86_64.S",
- "mac-x86_64/crypto/rc4/rc4-x86_64.S",
"mac-x86_64/crypto/sha/sha1-x86_64.S",
"mac-x86_64/crypto/sha/sha256-x86_64.S",
"mac-x86_64/crypto/sha/sha512-x86_64.S",
@@ -420,7 +529,6 @@
"win-x86/crypto/chacha/chacha-x86.asm",
"win-x86/crypto/md5/md5-586.asm",
"win-x86/crypto/modes/ghash-x86.asm",
- "win-x86/crypto/rc4/rc4-586.asm",
"win-x86/crypto/sha/sha1-586.asm",
"win-x86/crypto/sha/sha256-586.asm",
"win-x86/crypto/sha/sha512-586.asm",
@@ -441,7 +549,6 @@
"win-x86_64/crypto/modes/aesni-gcm-x86_64.asm",
"win-x86_64/crypto/modes/ghash-x86_64.asm",
"win-x86_64/crypto/rand/rdrand-x86_64.asm",
- "win-x86_64/crypto/rc4/rc4-x86_64.asm",
"win-x86_64/crypto/sha/sha1-x86_64.asm",
"win-x86_64/crypto/sha/sha256-x86_64.asm",
"win-x86_64/crypto/sha/sha512-x86_64.asm",
@@ -454,5 +561,7 @@
"privkey",
"read_pem",
"server",
+ "session",
"spki",
+ "ssl_ctx_api",
]
diff --git a/BUILD.generated_tests.gni b/BUILD.generated_tests.gni
index 16bddff..2889b32 100644
--- a/BUILD.generated_tests.gni
+++ b/BUILD.generated_tests.gni
@@ -8,12 +8,10 @@
"src/crypto/test/file_test.cc",
"src/crypto/test/file_test.h",
"src/crypto/test/malloc.cc",
- "src/crypto/test/scoped_types.h",
"src/crypto/test/test_util.cc",
"src/crypto/test/test_util.h",
"src/ssl/test/async_bio.h",
"src/ssl/test/packeted_bio.h",
- "src/ssl/test/scoped_types.h",
"src/ssl/test/test_config.h",
]
@@ -246,6 +244,42 @@
deps = invoker.deps
}
+ executable("boringssl_p256-x86_64_test") {
+ sources = [
+ "src/crypto/ec/p256-x86_64_test.cc",
+ ]
+ sources += _test_support_sources
+ if (defined(invoker.configs_exclude)) {
+ configs -= invoker.configs_exclude
+ }
+ configs += invoker.configs
+ deps = invoker.deps
+ }
+
+ executable("boringssl_ecdh_test") {
+ sources = [
+ "src/crypto/ecdh/ecdh_test.cc",
+ ]
+ sources += _test_support_sources
+ if (defined(invoker.configs_exclude)) {
+ configs -= invoker.configs_exclude
+ }
+ configs += invoker.configs
+ deps = invoker.deps
+ }
+
+ executable("boringssl_ecdsa_sign_test") {
+ sources = [
+ "src/crypto/ecdsa/ecdsa_sign_test.cc",
+ ]
+ sources += _test_support_sources
+ if (defined(invoker.configs_exclude)) {
+ configs -= invoker.configs_exclude
+ }
+ configs += invoker.configs
+ deps = invoker.deps
+ }
+
executable("boringssl_ecdsa_test") {
sources = [
"src/crypto/ecdsa/ecdsa_test.cc",
@@ -258,6 +292,18 @@
deps = invoker.deps
}
+ executable("boringssl_ecdsa_verify_test") {
+ sources = [
+ "src/crypto/ecdsa/ecdsa_verify_test.cc",
+ ]
+ sources += _test_support_sources
+ if (defined(invoker.configs_exclude)) {
+ configs -= invoker.configs_exclude
+ }
+ configs += invoker.configs
+ deps = invoker.deps
+ }
+
executable("boringssl_err_test") {
sources = [
"src/crypto/err/err_test.cc",
@@ -344,43 +390,7 @@
executable("boringssl_gcm_test") {
sources = [
- "src/crypto/modes/gcm_test.c",
- ]
- sources += _test_support_sources
- if (defined(invoker.configs_exclude)) {
- configs -= invoker.configs_exclude
- }
- configs += invoker.configs
- deps = invoker.deps
- }
-
- executable("boringssl_newhope_statistical_test") {
- sources = [
- "src/crypto/newhope/newhope_statistical_test.cc",
- ]
- sources += _test_support_sources
- if (defined(invoker.configs_exclude)) {
- configs -= invoker.configs_exclude
- }
- configs += invoker.configs
- deps = invoker.deps
- }
-
- executable("boringssl_newhope_test") {
- sources = [
- "src/crypto/newhope/newhope_test.cc",
- ]
- sources += _test_support_sources
- if (defined(invoker.configs_exclude)) {
- configs -= invoker.configs_exclude
- }
- configs += invoker.configs
- deps = invoker.deps
- }
-
- executable("boringssl_newhope_vectors_test") {
- sources = [
- "src/crypto/newhope/newhope_vectors_test.cc",
+ "src/crypto/modes/gcm_test.cc",
]
sources += _test_support_sources
if (defined(invoker.configs_exclude)) {
@@ -438,6 +448,18 @@
deps = invoker.deps
}
+ executable("boringssl_pool_test") {
+ sources = [
+ "src/crypto/pool/pool_test.cc",
+ ]
+ sources += _test_support_sources
+ if (defined(invoker.configs_exclude)) {
+ configs -= invoker.configs_exclude
+ }
+ configs += invoker.configs
+ deps = invoker.deps
+ }
+
executable("boringssl_refcount_test") {
sources = [
"src/crypto/refcount_test.c",
@@ -551,7 +573,10 @@
":boringssl_digest_test",
":boringssl_dsa_test",
":boringssl_ec_test",
+ ":boringssl_ecdh_test",
+ ":boringssl_ecdsa_sign_test",
":boringssl_ecdsa_test",
+ ":boringssl_ecdsa_verify_test",
":boringssl_ed25519_test",
":boringssl_err_test",
":boringssl_evp_extra_test",
@@ -561,15 +586,14 @@
":boringssl_hkdf_test",
":boringssl_hmac_test",
":boringssl_lhash_test",
- ":boringssl_newhope_statistical_test",
- ":boringssl_newhope_test",
- ":boringssl_newhope_vectors_test",
":boringssl_obj_test",
+ ":boringssl_p256-x86_64_test",
":boringssl_pbkdf_test",
":boringssl_pkcs12_test",
":boringssl_pkcs7_test",
":boringssl_pkcs8_test",
":boringssl_poly1305_test",
+ ":boringssl_pool_test",
":boringssl_refcount_test",
":boringssl_rsa_test",
":boringssl_spake25519_test",
diff --git a/README b/README
index 2b8b5a5..b1c5469 100644
--- a/README
+++ b/README
@@ -1,6 +1,6 @@
This repository contains the files generated by boringssl for its build.
-It also contains this file and the files BUILD.gn, boringssl_dart.gyp, and
-boringssl_configurations.gypi.
+It also contains this file and the files BUILD.gn, boringssl_dart.gyp,
+boringssl_configurations.gypi, and codreview.settings.
The generated source is for boringssl from:
@@ -8,7 +8,7 @@
at revision:
-8d343b44bbab829d1a28fdef650ca95f7db4412e
+d519bf6be0b447fb80fbc539d4bff4479b5482a2
To roll boringssl forward, delete all but this file and the above mentioned
files, checkout the new boringssl into a subdirectory called src/, and run the
diff --git a/boringssl.gypi b/boringssl.gypi
index 2c7e7c7..f0cf2f7 100644
--- a/boringssl.gypi
+++ b/boringssl.gypi
@@ -7,6 +7,10 @@
{
'variables': {
'boringssl_ssl_sources': [
+ 'src/include/openssl/dtls1.h',
+ 'src/include/openssl/ssl.h',
+ 'src/include/openssl/ssl3.h',
+ 'src/include/openssl/tls1.h',
'src/ssl/custom_extensions.c',
'src/ssl/d1_both.c',
'src/ssl/d1_lib.c',
@@ -16,6 +20,7 @@
'src/ssl/dtls_record.c',
'src/ssl/handshake_client.c',
'src/ssl/handshake_server.c',
+ 'src/ssl/internal.h',
'src/ssl/s3_both.c',
'src/ssl/s3_enc.c',
'src/ssl/s3_lib.c',
@@ -33,16 +38,21 @@
'src/ssl/ssl_stat.c',
'src/ssl/t1_enc.c',
'src/ssl/t1_lib.c',
+ 'src/ssl/tls13_both.c',
+ 'src/ssl/tls13_client.c',
+ 'src/ssl/tls13_enc.c',
+ 'src/ssl/tls13_server.c',
'src/ssl/tls_method.c',
'src/ssl/tls_record.c',
],
'boringssl_crypto_sources': [
'err_data.c',
'src/crypto/aes/aes.c',
+ 'src/crypto/aes/internal.h',
+ 'src/crypto/aes/key_wrap.c',
'src/crypto/aes/mode_wrappers.c',
'src/crypto/asn1/a_bitstr.c',
'src/crypto/asn1/a_bool.c',
- 'src/crypto/asn1/a_bytes.c',
'src/crypto/asn1/a_d2i_fp.c',
'src/crypto/asn1/a_dup.c',
'src/crypto/asn1/a_enum.c',
@@ -59,6 +69,7 @@
'src/crypto/asn1/a_utctm.c',
'src/crypto/asn1/a_utf8.c',
'src/crypto/asn1/asn1_lib.c',
+ 'src/crypto/asn1/asn1_locl.h',
'src/crypto/asn1/asn1_par.c',
'src/crypto/asn1/asn_pack.c',
'src/crypto/asn1/f_enum.c',
@@ -81,6 +92,7 @@
'src/crypto/bio/fd.c',
'src/crypto/bio/file.c',
'src/crypto/bio/hexdump.c',
+ 'src/crypto/bio/internal.h',
'src/crypto/bio/pair.c',
'src/crypto/bio/printf.c',
'src/crypto/bio/socket.c',
@@ -96,12 +108,15 @@
'src/crypto/bn/exponentiation.c',
'src/crypto/bn/gcd.c',
'src/crypto/bn/generic.c',
+ 'src/crypto/bn/internal.h',
'src/crypto/bn/kronecker.c',
'src/crypto/bn/montgomery.c',
+ 'src/crypto/bn/montgomery_inv.c',
'src/crypto/bn/mul.c',
'src/crypto/bn/prime.c',
'src/crypto/bn/random.c',
'src/crypto/bn/rsaz_exp.c',
+ 'src/crypto/bn/rsaz_exp.h',
'src/crypto/bn/shift.c',
'src/crypto/bn/sqrt.c',
'src/crypto/buf/buf.c',
@@ -109,6 +124,7 @@
'src/crypto/bytestring/ber.c',
'src/crypto/bytestring/cbb.c',
'src/crypto/bytestring/cbs.c',
+ 'src/crypto/bytestring/internal.h',
'src/crypto/chacha/chacha.c',
'src/crypto/cipher/aead.c',
'src/crypto/cipher/cipher.c',
@@ -121,34 +137,45 @@
'src/crypto/cipher/e_rc4.c',
'src/crypto/cipher/e_ssl3.c',
'src/crypto/cipher/e_tls.c',
+ 'src/crypto/cipher/internal.h',
'src/crypto/cipher/tls_cbc.c',
'src/crypto/cmac/cmac.c',
'src/crypto/conf/conf.c',
+ 'src/crypto/conf/conf_def.h',
+ 'src/crypto/conf/internal.h',
'src/crypto/cpu-aarch64-linux.c',
'src/crypto/cpu-arm-linux.c',
'src/crypto/cpu-arm.c',
'src/crypto/cpu-intel.c',
+ 'src/crypto/cpu-ppc64le.c',
'src/crypto/crypto.c',
'src/crypto/curve25519/curve25519.c',
+ 'src/crypto/curve25519/internal.h',
'src/crypto/curve25519/spake25519.c',
'src/crypto/curve25519/x25519-x86_64.c',
'src/crypto/des/des.c',
+ 'src/crypto/des/internal.h',
'src/crypto/dh/check.c',
'src/crypto/dh/dh.c',
'src/crypto/dh/dh_asn1.c',
'src/crypto/dh/params.c',
'src/crypto/digest/digest.c',
'src/crypto/digest/digests.c',
+ 'src/crypto/digest/internal.h',
+ 'src/crypto/digest/md32_common.h',
'src/crypto/dsa/dsa.c',
'src/crypto/dsa/dsa_asn1.c',
'src/crypto/ec/ec.c',
'src/crypto/ec/ec_asn1.c',
'src/crypto/ec/ec_key.c',
'src/crypto/ec/ec_montgomery.c',
+ 'src/crypto/ec/internal.h',
'src/crypto/ec/oct.c',
'src/crypto/ec/p224-64.c',
'src/crypto/ec/p256-64.c',
+ 'src/crypto/ec/p256-x86_64-table.h',
'src/crypto/ec/p256-x86_64.c',
+ 'src/crypto/ec/p256-x86_64.h',
'src/crypto/ec/simple.c',
'src/crypto/ec/util-64.c',
'src/crypto/ec/wnaf.c',
@@ -161,6 +188,7 @@
'src/crypto/evp/evp.c',
'src/crypto/evp/evp_asn1.c',
'src/crypto/evp/evp_ctx.c',
+ 'src/crypto/evp/internal.h',
'src/crypto/evp/p_dsa_asn1.c',
'src/crypto/evp/p_ec.c',
'src/crypto/evp/p_ec_asn1.c',
@@ -172,6 +200,7 @@
'src/crypto/ex_data.c',
'src/crypto/hkdf/hkdf.c',
'src/crypto/hmac/hmac.c',
+ 'src/crypto/internal.h',
'src/crypto/lhash/lhash.c',
'src/crypto/md4/md4.c',
'src/crypto/md5/md5.c',
@@ -180,15 +209,13 @@
'src/crypto/modes/cfb.c',
'src/crypto/modes/ctr.c',
'src/crypto/modes/gcm.c',
+ 'src/crypto/modes/internal.h',
'src/crypto/modes/ofb.c',
- 'src/crypto/newhope/error_correction.c',
- 'src/crypto/newhope/newhope.c',
- 'src/crypto/newhope/ntt.c',
- 'src/crypto/newhope/poly.c',
- 'src/crypto/newhope/precomp.c',
- 'src/crypto/newhope/reduce.c',
+ 'src/crypto/modes/polyval.c',
'src/crypto/obj/obj.c',
+ 'src/crypto/obj/obj_dat.h',
'src/crypto/obj/obj_xref.c',
+ 'src/crypto/obj/obj_xref.h',
'src/crypto/pem/pem_all.c',
'src/crypto/pem/pem_info.c',
'src/crypto/pem/pem_lib.c',
@@ -197,14 +224,19 @@
'src/crypto/pem/pem_pkey.c',
'src/crypto/pem/pem_x509.c',
'src/crypto/pem/pem_xaux.c',
+ 'src/crypto/pkcs8/internal.h',
'src/crypto/pkcs8/p5_pbe.c',
'src/crypto/pkcs8/p5_pbev2.c',
'src/crypto/pkcs8/p8_pkey.c',
'src/crypto/pkcs8/pkcs8.c',
+ 'src/crypto/poly1305/internal.h',
'src/crypto/poly1305/poly1305.c',
'src/crypto/poly1305/poly1305_arm.c',
'src/crypto/poly1305/poly1305_vec.c',
+ 'src/crypto/pool/internal.h',
+ 'src/crypto/pool/pool.c',
'src/crypto/rand/deterministic.c',
+ 'src/crypto/rand/internal.h',
'src/crypto/rand/rand.c',
'src/crypto/rand/urandom.c',
'src/crypto/rand/windows.c',
@@ -212,10 +244,12 @@
'src/crypto/refcount_c11.c',
'src/crypto/refcount_lock.c',
'src/crypto/rsa/blinding.c',
+ 'src/crypto/rsa/internal.h',
'src/crypto/rsa/padding.c',
'src/crypto/rsa/rsa.c',
'src/crypto/rsa/rsa_asn1.c',
'src/crypto/rsa/rsa_impl.c',
+ 'src/crypto/sha/sha1-altivec.c',
'src/crypto/sha/sha1.c',
'src/crypto/sha/sha256.c',
'src/crypto/sha/sha512.c',
@@ -233,13 +267,16 @@
'src/crypto/x509/asn1_gen.c',
'src/crypto/x509/by_dir.c',
'src/crypto/x509/by_file.c',
+ 'src/crypto/x509/charmap.h',
'src/crypto/x509/i2d_pr.c',
+ 'src/crypto/x509/internal.h',
'src/crypto/x509/pkcs7.c',
'src/crypto/x509/rsa_pss.c',
'src/crypto/x509/t_crl.c',
'src/crypto/x509/t_req.c',
'src/crypto/x509/t_x509.c',
'src/crypto/x509/t_x509a.c',
+ 'src/crypto/x509/vpm_int.h',
'src/crypto/x509/x509.c',
'src/crypto/x509/x509_att.c',
'src/crypto/x509/x509_cmp.c',
@@ -276,8 +313,10 @@
'src/crypto/x509/x_val.c',
'src/crypto/x509/x_x509.c',
'src/crypto/x509/x_x509a.c',
+ 'src/crypto/x509v3/ext_dat.h',
'src/crypto/x509v3/pcy_cache.c',
'src/crypto/x509v3/pcy_data.c',
+ 'src/crypto/x509v3/pcy_int.h',
'src/crypto/x509v3/pcy_lib.c',
'src/crypto/x509v3/pcy_map.c',
'src/crypto/x509v3/pcy_node.c',
@@ -308,6 +347,75 @@
'src/crypto/x509v3/v3_skey.c',
'src/crypto/x509v3/v3_sxnet.c',
'src/crypto/x509v3/v3_utl.c',
+ 'src/include/openssl/aead.h',
+ 'src/include/openssl/aes.h',
+ 'src/include/openssl/arm_arch.h',
+ 'src/include/openssl/asn1.h',
+ 'src/include/openssl/asn1_mac.h',
+ 'src/include/openssl/asn1t.h',
+ 'src/include/openssl/base.h',
+ 'src/include/openssl/base64.h',
+ 'src/include/openssl/bio.h',
+ 'src/include/openssl/blowfish.h',
+ 'src/include/openssl/bn.h',
+ 'src/include/openssl/buf.h',
+ 'src/include/openssl/buffer.h',
+ 'src/include/openssl/bytestring.h',
+ 'src/include/openssl/cast.h',
+ 'src/include/openssl/chacha.h',
+ 'src/include/openssl/cipher.h',
+ 'src/include/openssl/cmac.h',
+ 'src/include/openssl/conf.h',
+ 'src/include/openssl/cpu.h',
+ 'src/include/openssl/crypto.h',
+ 'src/include/openssl/curve25519.h',
+ 'src/include/openssl/des.h',
+ 'src/include/openssl/dh.h',
+ 'src/include/openssl/digest.h',
+ 'src/include/openssl/dsa.h',
+ 'src/include/openssl/ec.h',
+ 'src/include/openssl/ec_key.h',
+ 'src/include/openssl/ecdh.h',
+ 'src/include/openssl/ecdsa.h',
+ 'src/include/openssl/engine.h',
+ 'src/include/openssl/err.h',
+ 'src/include/openssl/evp.h',
+ 'src/include/openssl/ex_data.h',
+ 'src/include/openssl/hkdf.h',
+ 'src/include/openssl/hmac.h',
+ 'src/include/openssl/lhash.h',
+ 'src/include/openssl/lhash_macros.h',
+ 'src/include/openssl/md4.h',
+ 'src/include/openssl/md5.h',
+ 'src/include/openssl/mem.h',
+ 'src/include/openssl/nid.h',
+ 'src/include/openssl/obj.h',
+ 'src/include/openssl/obj_mac.h',
+ 'src/include/openssl/objects.h',
+ 'src/include/openssl/opensslconf.h',
+ 'src/include/openssl/opensslv.h',
+ 'src/include/openssl/ossl_typ.h',
+ 'src/include/openssl/pem.h',
+ 'src/include/openssl/pkcs12.h',
+ 'src/include/openssl/pkcs7.h',
+ 'src/include/openssl/pkcs8.h',
+ 'src/include/openssl/poly1305.h',
+ 'src/include/openssl/pool.h',
+ 'src/include/openssl/rand.h',
+ 'src/include/openssl/rc4.h',
+ 'src/include/openssl/ripemd.h',
+ 'src/include/openssl/rsa.h',
+ 'src/include/openssl/safestack.h',
+ 'src/include/openssl/sha.h',
+ 'src/include/openssl/srtp.h',
+ 'src/include/openssl/stack.h',
+ 'src/include/openssl/stack_macros.h',
+ 'src/include/openssl/thread.h',
+ 'src/include/openssl/time_support.h',
+ 'src/include/openssl/type_check.h',
+ 'src/include/openssl/x509.h',
+ 'src/include/openssl/x509_vfy.h',
+ 'src/include/openssl/x509v3.h',
],
'boringssl_linux_aarch64_sources': [
'linux-aarch64/crypto/aes/aesv8-armx64.S',
@@ -332,6 +440,10 @@
'src/crypto/curve25519/asm/x25519-asm-arm.S',
'src/crypto/poly1305/poly1305_arm_asm.S',
],
+ 'boringssl_linux_ppc64le_sources': [
+ 'linux-ppc64le/crypto/aes/aesp8-ppc.S',
+ 'linux-ppc64le/crypto/modes/ghashp8-ppc.S',
+ ],
'boringssl_linux_x86_sources': [
'linux-x86/crypto/aes/aes-586.S',
'linux-x86/crypto/aes/aesni-x86.S',
@@ -342,7 +454,6 @@
'linux-x86/crypto/chacha/chacha-x86.S',
'linux-x86/crypto/md5/md5-586.S',
'linux-x86/crypto/modes/ghash-x86.S',
- 'linux-x86/crypto/rc4/rc4-586.S',
'linux-x86/crypto/sha/sha1-586.S',
'linux-x86/crypto/sha/sha256-586.S',
'linux-x86/crypto/sha/sha512-586.S',
@@ -362,7 +473,6 @@
'linux-x86_64/crypto/modes/aesni-gcm-x86_64.S',
'linux-x86_64/crypto/modes/ghash-x86_64.S',
'linux-x86_64/crypto/rand/rdrand-x86_64.S',
- 'linux-x86_64/crypto/rc4/rc4-x86_64.S',
'linux-x86_64/crypto/sha/sha1-x86_64.S',
'linux-x86_64/crypto/sha/sha256-x86_64.S',
'linux-x86_64/crypto/sha/sha512-x86_64.S',
@@ -378,7 +488,6 @@
'mac-x86/crypto/chacha/chacha-x86.S',
'mac-x86/crypto/md5/md5-586.S',
'mac-x86/crypto/modes/ghash-x86.S',
- 'mac-x86/crypto/rc4/rc4-586.S',
'mac-x86/crypto/sha/sha1-586.S',
'mac-x86/crypto/sha/sha256-586.S',
'mac-x86/crypto/sha/sha512-586.S',
@@ -398,7 +507,6 @@
'mac-x86_64/crypto/modes/aesni-gcm-x86_64.S',
'mac-x86_64/crypto/modes/ghash-x86_64.S',
'mac-x86_64/crypto/rand/rdrand-x86_64.S',
- 'mac-x86_64/crypto/rc4/rc4-x86_64.S',
'mac-x86_64/crypto/sha/sha1-x86_64.S',
'mac-x86_64/crypto/sha/sha256-x86_64.S',
'mac-x86_64/crypto/sha/sha512-x86_64.S',
@@ -414,7 +522,6 @@
'win-x86/crypto/chacha/chacha-x86.asm',
'win-x86/crypto/md5/md5-586.asm',
'win-x86/crypto/modes/ghash-x86.asm',
- 'win-x86/crypto/rc4/rc4-586.asm',
'win-x86/crypto/sha/sha1-586.asm',
'win-x86/crypto/sha/sha256-586.asm',
'win-x86/crypto/sha/sha512-586.asm',
@@ -434,7 +541,6 @@
'win-x86_64/crypto/modes/aesni-gcm-x86_64.asm',
'win-x86_64/crypto/modes/ghash-x86_64.asm',
'win-x86_64/crypto/rand/rdrand-x86_64.asm',
- 'win-x86_64/crypto/rc4/rc4-x86_64.asm',
'win-x86_64/crypto/sha/sha1-x86_64.asm',
'win-x86_64/crypto/sha/sha256-x86_64.asm',
'win-x86_64/crypto/sha/sha512-x86_64.asm',
diff --git a/boringssl_tests.gypi b/boringssl_tests.gypi
index 1076214..36d428d 100644
--- a/boringssl_tests.gypi
+++ b/boringssl_tests.gypi
@@ -273,6 +273,48 @@
'msvs_disabled_warnings': [ 4267, ],
},
{
+ 'target_name': 'boringssl_p256-x86_64_test',
+ 'type': 'executable',
+ 'dependencies': [
+ 'boringssl.gyp:boringssl',
+ ],
+ 'sources': [
+ 'src/crypto/ec/p256-x86_64_test.cc',
+ '<@(boringssl_test_support_sources)',
+ ],
+ # TODO(davidben): Fix size_t truncations in BoringSSL.
+ # https://crbug.com/429039
+ 'msvs_disabled_warnings': [ 4267, ],
+ },
+ {
+ 'target_name': 'boringssl_ecdh_test',
+ 'type': 'executable',
+ 'dependencies': [
+ 'boringssl.gyp:boringssl',
+ ],
+ 'sources': [
+ 'src/crypto/ecdh/ecdh_test.cc',
+ '<@(boringssl_test_support_sources)',
+ ],
+ # TODO(davidben): Fix size_t truncations in BoringSSL.
+ # https://crbug.com/429039
+ 'msvs_disabled_warnings': [ 4267, ],
+ },
+ {
+ 'target_name': 'boringssl_ecdsa_sign_test',
+ 'type': 'executable',
+ 'dependencies': [
+ 'boringssl.gyp:boringssl',
+ ],
+ 'sources': [
+ 'src/crypto/ecdsa/ecdsa_sign_test.cc',
+ '<@(boringssl_test_support_sources)',
+ ],
+ # TODO(davidben): Fix size_t truncations in BoringSSL.
+ # https://crbug.com/429039
+ 'msvs_disabled_warnings': [ 4267, ],
+ },
+ {
'target_name': 'boringssl_ecdsa_test',
'type': 'executable',
'dependencies': [
@@ -287,6 +329,20 @@
'msvs_disabled_warnings': [ 4267, ],
},
{
+ 'target_name': 'boringssl_ecdsa_verify_test',
+ 'type': 'executable',
+ 'dependencies': [
+ 'boringssl.gyp:boringssl',
+ ],
+ 'sources': [
+ 'src/crypto/ecdsa/ecdsa_verify_test.cc',
+ '<@(boringssl_test_support_sources)',
+ ],
+ # TODO(davidben): Fix size_t truncations in BoringSSL.
+ # https://crbug.com/429039
+ 'msvs_disabled_warnings': [ 4267, ],
+ },
+ {
'target_name': 'boringssl_err_test',
'type': 'executable',
'dependencies': [
@@ -391,49 +447,7 @@
'boringssl.gyp:boringssl',
],
'sources': [
- 'src/crypto/modes/gcm_test.c',
- '<@(boringssl_test_support_sources)',
- ],
- # TODO(davidben): Fix size_t truncations in BoringSSL.
- # https://crbug.com/429039
- 'msvs_disabled_warnings': [ 4267, ],
- },
- {
- 'target_name': 'boringssl_newhope_statistical_test',
- 'type': 'executable',
- 'dependencies': [
- 'boringssl.gyp:boringssl',
- ],
- 'sources': [
- 'src/crypto/newhope/newhope_statistical_test.cc',
- '<@(boringssl_test_support_sources)',
- ],
- # TODO(davidben): Fix size_t truncations in BoringSSL.
- # https://crbug.com/429039
- 'msvs_disabled_warnings': [ 4267, ],
- },
- {
- 'target_name': 'boringssl_newhope_test',
- 'type': 'executable',
- 'dependencies': [
- 'boringssl.gyp:boringssl',
- ],
- 'sources': [
- 'src/crypto/newhope/newhope_test.cc',
- '<@(boringssl_test_support_sources)',
- ],
- # TODO(davidben): Fix size_t truncations in BoringSSL.
- # https://crbug.com/429039
- 'msvs_disabled_warnings': [ 4267, ],
- },
- {
- 'target_name': 'boringssl_newhope_vectors_test',
- 'type': 'executable',
- 'dependencies': [
- 'boringssl.gyp:boringssl',
- ],
- 'sources': [
- 'src/crypto/newhope/newhope_vectors_test.cc',
+ 'src/crypto/modes/gcm_test.cc',
'<@(boringssl_test_support_sources)',
],
# TODO(davidben): Fix size_t truncations in BoringSSL.
@@ -497,6 +511,20 @@
'msvs_disabled_warnings': [ 4267, ],
},
{
+ 'target_name': 'boringssl_pool_test',
+ 'type': 'executable',
+ 'dependencies': [
+ 'boringssl.gyp:boringssl',
+ ],
+ 'sources': [
+ 'src/crypto/pool/pool_test.cc',
+ '<@(boringssl_test_support_sources)',
+ ],
+ # TODO(davidben): Fix size_t truncations in BoringSSL.
+ # https://crbug.com/429039
+ 'msvs_disabled_warnings': [ 4267, ],
+ },
+ {
'target_name': 'boringssl_refcount_test',
'type': 'executable',
'dependencies': [
@@ -614,12 +642,10 @@
'src/crypto/test/file_test.cc',
'src/crypto/test/file_test.h',
'src/crypto/test/malloc.cc',
- 'src/crypto/test/scoped_types.h',
'src/crypto/test/test_util.cc',
'src/crypto/test/test_util.h',
'src/ssl/test/async_bio.h',
'src/ssl/test/packeted_bio.h',
- 'src/ssl/test/scoped_types.h',
'src/ssl/test/test_config.h',
],
'boringssl_test_targets': [
@@ -638,7 +664,10 @@
'boringssl_digest_test',
'boringssl_dsa_test',
'boringssl_ec_test',
+ 'boringssl_ecdh_test',
+ 'boringssl_ecdsa_sign_test',
'boringssl_ecdsa_test',
+ 'boringssl_ecdsa_verify_test',
'boringssl_ed25519_test',
'boringssl_err_test',
'boringssl_evp_extra_test',
@@ -648,15 +677,14 @@
'boringssl_hkdf_test',
'boringssl_hmac_test',
'boringssl_lhash_test',
- 'boringssl_newhope_statistical_test',
- 'boringssl_newhope_test',
- 'boringssl_newhope_vectors_test',
'boringssl_obj_test',
+ 'boringssl_p256-x86_64_test',
'boringssl_pbkdf_test',
'boringssl_pkcs12_test',
'boringssl_pkcs7_test',
'boringssl_pkcs8_test',
'boringssl_poly1305_test',
+ 'boringssl_pool_test',
'boringssl_refcount_test',
'boringssl_rsa_test',
'boringssl_spake25519_test',
diff --git a/codereview.settings b/codereview.settings
new file mode 100644
index 0000000..ccbccd5
--- /dev/null
+++ b/codereview.settings
@@ -0,0 +1,4 @@
+# This file is used by gcl to get repository specific information.
+CODE_REVIEW_SERVER: http://codereview.chromium.org
+VIEW_VC: https://github.com/dart-lang/boringssl_gen/commit/
+CC_LIST: reviews@dartlang.org
diff --git a/err_data.c b/err_data.c
index d685679..e75a0ca 100644
--- a/err_data.c
+++ b/err_data.c
@@ -178,42 +178,42 @@
0x28340c19,
0x283480ac,
0x283500ea,
- 0x2c3227cb,
- 0x2c32a7d9,
- 0x2c3327eb,
- 0x2c33a7fd,
- 0x2c342811,
- 0x2c34a823,
- 0x2c35283e,
- 0x2c35a850,
- 0x2c362863,
+ 0x2c32299a,
+ 0x2c32a9a8,
+ 0x2c3329ba,
+ 0x2c33a9cc,
+ 0x2c3429e0,
+ 0x2c34a9f2,
+ 0x2c352a0d,
+ 0x2c35aa1f,
+ 0x2c362a32,
0x2c36832d,
- 0x2c372870,
- 0x2c37a882,
- 0x2c382895,
- 0x2c38a8ac,
- 0x2c3928ba,
- 0x2c39a8ca,
- 0x2c3a28dc,
- 0x2c3aa8f0,
- 0x2c3b2901,
- 0x2c3ba920,
- 0x2c3c2934,
- 0x2c3ca94a,
- 0x2c3d2963,
- 0x2c3da980,
- 0x2c3e2991,
- 0x2c3ea99f,
- 0x2c3f29b7,
- 0x2c3fa9cf,
- 0x2c4029dc,
+ 0x2c372a3f,
+ 0x2c37aa51,
+ 0x2c382a64,
+ 0x2c38aa7b,
+ 0x2c392a89,
+ 0x2c39aa99,
+ 0x2c3a2aab,
+ 0x2c3aaabf,
+ 0x2c3b2ad0,
+ 0x2c3baaef,
+ 0x2c3c2b03,
+ 0x2c3cab19,
+ 0x2c3d2b32,
+ 0x2c3dab4f,
+ 0x2c3e2b60,
+ 0x2c3eab6e,
+ 0x2c3f2b86,
+ 0x2c3fab9e,
+ 0x2c402bab,
0x2c4090e7,
- 0x2c4129ed,
- 0x2c41aa00,
+ 0x2c412bbc,
+ 0x2c41abcf,
0x2c4210c0,
- 0x2c42aa11,
+ 0x2c42abe0,
0x2c430720,
- 0x2c43a912,
+ 0x2c43aae1,
0x30320000,
0x30328015,
0x3033001f,
@@ -366,169 +366,188 @@
0x403b9861,
0x403c0064,
0x403c8083,
- 0x403d186d,
- 0x403d9883,
- 0x403e1892,
- 0x403e98a5,
- 0x403f18bf,
- 0x403f98cd,
- 0x404018e2,
- 0x404098f6,
- 0x40411913,
- 0x4041992e,
- 0x40421947,
- 0x4042995a,
- 0x4043196e,
- 0x40439986,
- 0x4044199d,
+ 0x403d18aa,
+ 0x403d98c0,
+ 0x403e18cf,
+ 0x403e98e2,
+ 0x403f18fc,
+ 0x403f990a,
+ 0x4040191f,
+ 0x40409933,
+ 0x40411950,
+ 0x4041996b,
+ 0x40421984,
+ 0x40429997,
+ 0x404319ab,
+ 0x404399c3,
+ 0x404419da,
0x404480ac,
- 0x404519b2,
- 0x404599c4,
- 0x404619e8,
- 0x40469a08,
- 0x40471a16,
- 0x40479a3d,
- 0x40481a52,
- 0x40489a6b,
- 0x40491a82,
- 0x40499a9c,
- 0x404a1ab3,
- 0x404a9ad1,
- 0x404b1ae9,
- 0x404b9b00,
- 0x404c1b16,
- 0x404c9b28,
- 0x404d1b49,
- 0x404d9b6b,
- 0x404e1b7f,
- 0x404e9b8c,
- 0x404f1ba3,
- 0x404f9bb3,
- 0x40501bdd,
- 0x40509bf1,
- 0x40511c0c,
- 0x40519c1c,
- 0x40521c33,
- 0x40529c45,
- 0x40531c5d,
- 0x40539c70,
- 0x40541c85,
- 0x40549ca8,
- 0x40551cb6,
- 0x40559cd3,
- 0x40561ce0,
- 0x40569cf9,
- 0x40571d11,
- 0x40579d24,
- 0x40581d39,
- 0x40589d4b,
- 0x40591d7a,
- 0x40599d93,
- 0x405a1da7,
- 0x405a9db7,
- 0x405b1dcf,
- 0x405b9de0,
- 0x405c1df3,
- 0x405c9e04,
- 0x405d1e11,
- 0x405d9e28,
- 0x405e1e48,
+ 0x404519ef,
+ 0x40459a01,
+ 0x40461a25,
+ 0x40469a45,
+ 0x40471a53,
+ 0x40479a7a,
+ 0x40481ab7,
+ 0x40489ad0,
+ 0x40491ae7,
+ 0x40499b01,
+ 0x404a1b18,
+ 0x404a9b36,
+ 0x404b1b4e,
+ 0x404b9b65,
+ 0x404c1b7b,
+ 0x404c9b8d,
+ 0x404d1bae,
+ 0x404d9bd0,
+ 0x404e1be4,
+ 0x404e9bf1,
+ 0x404f1c1e,
+ 0x404f9c47,
+ 0x40501c82,
+ 0x40509c96,
+ 0x40511cb1,
+ 0x40519cc1,
+ 0x40521cd8,
+ 0x40529cfc,
+ 0x40531d14,
+ 0x40539d27,
+ 0x40541d3c,
+ 0x40549d5f,
+ 0x40551d6d,
+ 0x40559d8a,
+ 0x40561d97,
+ 0x40569db0,
+ 0x40571dc8,
+ 0x40579ddb,
+ 0x40581df0,
+ 0x40589e17,
+ 0x40591e46,
+ 0x40599e73,
+ 0x405a1e87,
+ 0x405a9e97,
+ 0x405b1eaf,
+ 0x405b9ec0,
+ 0x405c1ed3,
+ 0x405c9ef4,
+ 0x405d1f01,
+ 0x405d9f18,
+ 0x405e1f56,
0x405e8a95,
- 0x405f1e69,
- 0x405f9e76,
- 0x40601e84,
- 0x40609ea6,
- 0x40611ece,
- 0x40619ee3,
- 0x40621efa,
- 0x40629f0b,
- 0x40631f1c,
- 0x40639f31,
- 0x40641f48,
- 0x40649f59,
- 0x40651f74,
- 0x40659f8b,
- 0x40661fa3,
- 0x40669fcd,
- 0x40671ff8,
- 0x4067a019,
- 0x4068202c,
- 0x4068a04d,
- 0x4069207f,
- 0x4069a0ad,
- 0x406a20ce,
- 0x406aa0ee,
- 0x406b2276,
- 0x406ba299,
- 0x406c22af,
- 0x406ca4db,
- 0x406d250a,
- 0x406da532,
- 0x406e254b,
- 0x406ea563,
- 0x406f2582,
- 0x406fa597,
- 0x407025aa,
- 0x4070a5c7,
+ 0x405f1f77,
+ 0x405f9f84,
+ 0x40601f92,
+ 0x40609fb4,
+ 0x40611ff8,
+ 0x4061a030,
+ 0x40622047,
+ 0x4062a058,
+ 0x40632069,
+ 0x4063a07e,
+ 0x40642095,
+ 0x4064a0c1,
+ 0x406520dc,
+ 0x4065a0f3,
+ 0x4066210b,
+ 0x4066a135,
+ 0x40672160,
+ 0x4067a181,
+ 0x40682194,
+ 0x4068a1b5,
+ 0x406921e7,
+ 0x4069a215,
+ 0x406a2236,
+ 0x406aa256,
+ 0x406b23de,
+ 0x406ba401,
+ 0x406c2417,
+ 0x406ca679,
+ 0x406d26a8,
+ 0x406da6d0,
+ 0x406e26fe,
+ 0x406ea732,
+ 0x406f2751,
+ 0x406fa766,
+ 0x40702779,
+ 0x4070a796,
0x40710800,
- 0x4071a5d9,
- 0x407225ec,
- 0x4072a605,
- 0x4073261d,
+ 0x4071a7a8,
+ 0x407227bb,
+ 0x4072a7d4,
+ 0x407327ec,
0x4073936d,
- 0x40742631,
- 0x4074a64b,
- 0x4075265c,
- 0x4075a670,
- 0x4076267e,
+ 0x40742800,
+ 0x4074a81a,
+ 0x4075282b,
+ 0x4075a83f,
+ 0x4076284d,
0x407691aa,
- 0x407726a3,
- 0x4077a6c5,
- 0x407826e0,
- 0x4078a719,
- 0x40792730,
- 0x4079a746,
- 0x407a2752,
- 0x407aa765,
- 0x407b277a,
- 0x407ba78c,
- 0x407c27a1,
- 0x407ca7aa,
- 0x407d2068,
- 0x407d9bc3,
- 0x407e26f5,
- 0x407e9d5b,
- 0x407f1a2a,
- 0x41f421a1,
- 0x41f92233,
- 0x41fe2126,
- 0x41fea302,
- 0x41ff23f3,
- 0x420321ba,
- 0x420821dc,
- 0x4208a218,
- 0x4209210a,
- 0x4209a252,
- 0x420a2161,
- 0x420aa141,
- 0x420b2181,
- 0x420ba1fa,
- 0x420c240f,
- 0x420ca2cf,
- 0x420d22e9,
- 0x420da320,
- 0x4212233a,
- 0x421723d6,
- 0x4217a37c,
- 0x421c239e,
- 0x421f2359,
- 0x42212426,
- 0x422623b9,
- 0x422b24bf,
- 0x422ba488,
- 0x422c24a7,
- 0x422ca462,
- 0x422d2441,
+ 0x40772872,
+ 0x4077a894,
+ 0x407828af,
+ 0x4078a8e8,
+ 0x407928ff,
+ 0x4079a915,
+ 0x407a2921,
+ 0x407aa934,
+ 0x407b2949,
+ 0x407ba95b,
+ 0x407c2970,
+ 0x407ca979,
+ 0x407d21d0,
+ 0x407d9c57,
+ 0x407e28c4,
+ 0x407e9e27,
+ 0x407f1a67,
+ 0x407f9887,
+ 0x40801c2e,
+ 0x40809a8f,
+ 0x40811cea,
+ 0x40819c08,
+ 0x408226e9,
+ 0x4082986d,
+ 0x40831e02,
+ 0x4083a0a6,
+ 0x40841aa3,
+ 0x40849e5f,
+ 0x40851ee4,
+ 0x40859fdc,
+ 0x40861f38,
+ 0x40869c71,
+ 0x40872716,
+ 0x4087a00d,
+ 0x41f42309,
+ 0x41f9239b,
+ 0x41fe228e,
+ 0x41fea46a,
+ 0x41ff255b,
+ 0x42032322,
+ 0x42082344,
+ 0x4208a380,
+ 0x42092272,
+ 0x4209a3ba,
+ 0x420a22c9,
+ 0x420aa2a9,
+ 0x420b22e9,
+ 0x420ba362,
+ 0x420c2577,
+ 0x420ca437,
+ 0x420d2451,
+ 0x420da488,
+ 0x421224a2,
+ 0x4217253e,
+ 0x4217a4e4,
+ 0x421c2506,
+ 0x421f24c1,
+ 0x4221258e,
+ 0x42262521,
+ 0x422b265d,
+ 0x422ba60b,
+ 0x422c2645,
+ 0x422ca5ca,
+ 0x422d25a9,
+ 0x422da62a,
+ 0x422e25f0,
0x4432072b,
0x4432873a,
0x44330746,
@@ -571,69 +590,69 @@
0x4c3d136d,
0x4c3d937c,
0x4c3e1389,
- 0x50322a23,
- 0x5032aa32,
- 0x50332a3d,
- 0x5033aa4d,
- 0x50342a66,
- 0x5034aa80,
- 0x50352a8e,
- 0x5035aaa4,
- 0x50362ab6,
- 0x5036aacc,
- 0x50372ae5,
- 0x5037aaf8,
- 0x50382b10,
- 0x5038ab21,
- 0x50392b36,
- 0x5039ab4a,
- 0x503a2b6a,
- 0x503aab80,
- 0x503b2b98,
- 0x503babaa,
- 0x503c2bc6,
- 0x503cabdd,
- 0x503d2bf6,
- 0x503dac0c,
- 0x503e2c19,
- 0x503eac2f,
- 0x503f2c41,
+ 0x50322bf2,
+ 0x5032ac01,
+ 0x50332c0c,
+ 0x5033ac1c,
+ 0x50342c35,
+ 0x5034ac4f,
+ 0x50352c5d,
+ 0x5035ac73,
+ 0x50362c85,
+ 0x5036ac9b,
+ 0x50372cb4,
+ 0x5037acc7,
+ 0x50382cdf,
+ 0x5038acf0,
+ 0x50392d05,
+ 0x5039ad19,
+ 0x503a2d39,
+ 0x503aad4f,
+ 0x503b2d67,
+ 0x503bad79,
+ 0x503c2d95,
+ 0x503cadac,
+ 0x503d2dc5,
+ 0x503daddb,
+ 0x503e2de8,
+ 0x503eadfe,
+ 0x503f2e10,
0x503f8382,
- 0x50402c54,
- 0x5040ac64,
- 0x50412c7e,
- 0x5041ac8d,
- 0x50422ca7,
- 0x5042acc4,
- 0x50432cd4,
- 0x5043ace4,
- 0x50442cf3,
+ 0x50402e23,
+ 0x5040ae33,
+ 0x50412e4d,
+ 0x5041ae5c,
+ 0x50422e76,
+ 0x5042ae93,
+ 0x50432ea3,
+ 0x5043aeb3,
+ 0x50442ec2,
0x5044843f,
- 0x50452d07,
- 0x5045ad25,
- 0x50462d38,
- 0x5046ad4e,
- 0x50472d60,
- 0x5047ad75,
- 0x50482d9b,
- 0x5048ada9,
- 0x50492dbc,
- 0x5049add1,
- 0x504a2de7,
- 0x504aadf7,
- 0x504b2e17,
- 0x504bae2a,
- 0x504c2e4d,
- 0x504cae7b,
- 0x504d2e8d,
- 0x504daeaa,
- 0x504e2ec5,
- 0x504eaee1,
- 0x504f2ef3,
- 0x504faf0a,
- 0x50502f19,
+ 0x50452ed6,
+ 0x5045aef4,
+ 0x50462f07,
+ 0x5046af1d,
+ 0x50472f2f,
+ 0x5047af44,
+ 0x50482f6a,
+ 0x5048af78,
+ 0x50492f8b,
+ 0x5049afa0,
+ 0x504a2fb6,
+ 0x504aafc6,
+ 0x504b2fe6,
+ 0x504baff9,
+ 0x504c301c,
+ 0x504cb04a,
+ 0x504d305c,
+ 0x504db079,
+ 0x504e3094,
+ 0x504eb0b0,
+ 0x504f30c2,
+ 0x504fb0d9,
+ 0x505030e8,
0x505086ef,
- 0x50512f2c,
+ 0x505130fb,
0x58320ec9,
0x68320e8b,
0x68328c25,
@@ -994,6 +1013,8 @@
"BAD_SSL_FILETYPE\0"
"BAD_WRITE_RETRY\0"
"BIO_NOT_SET\0"
+ "BLOCK_CIPHER_PAD_IS_WRONG\0"
+ "BUFFERED_MESSAGES_ON_CIPHER_CHANGE\0"
"CA_DN_LENGTH_MISMATCH\0"
"CA_DN_TOO_LONG\0"
"CCS_RECEIVED_EARLY\0"
@@ -1016,6 +1037,8 @@
"DIGEST_CHECK_FAILED\0"
"DOWNGRADE_DETECTED\0"
"DTLS_MESSAGE_TOO_BIG\0"
+ "DUPLICATE_EXTENSION\0"
+ "DUPLICATE_KEY_SHARE\0"
"ECC_CERT_NOT_FOR_SIGNING\0"
"EMS_STATE_INCONSISTENT\0"
"ENCRYPTED_LENGTH_TOO_LONG\0"
@@ -1030,14 +1053,18 @@
"HTTPS_PROXY_REQUEST\0"
"HTTP_REQUEST\0"
"INAPPROPRIATE_FALLBACK\0"
+ "INVALID_ALPN_PROTOCOL\0"
"INVALID_COMMAND\0"
+ "INVALID_COMPRESSION_LIST\0"
"INVALID_MESSAGE\0"
"INVALID_OUTER_RECORD_TYPE\0"
+ "INVALID_SCT_LIST\0"
"INVALID_SSL_SESSION\0"
"INVALID_TICKET_KEYS_LENGTH\0"
"LENGTH_MISMATCH\0"
"LIBRARY_HAS_NO_CIPHERS\0"
"MISSING_EXTENSION\0"
+ "MISSING_KEY_SHARE\0"
"MISSING_RSA_CERTIFICATE\0"
"MISSING_TMP_DH_KEY\0"
"MISSING_TMP_ECDH_KEY\0"
@@ -1050,30 +1077,37 @@
"NO_CERTIFICATE_SET\0"
"NO_CIPHERS_AVAILABLE\0"
"NO_CIPHERS_PASSED\0"
+ "NO_CIPHERS_SPECIFIED\0"
"NO_CIPHER_MATCH\0"
"NO_COMMON_SIGNATURE_ALGORITHMS\0"
"NO_COMPRESSION_SPECIFIED\0"
+ "NO_GROUPS_SPECIFIED\0"
"NO_METHOD_SPECIFIED\0"
"NO_P256_SUPPORT\0"
"NO_PRIVATE_KEY_ASSIGNED\0"
"NO_RENEGOTIATION\0"
"NO_REQUIRED_DIGEST\0"
"NO_SHARED_CIPHER\0"
+ "NO_SHARED_GROUP\0"
"NULL_SSL_CTX\0"
"NULL_SSL_METHOD_PASSED\0"
"OLD_SESSION_CIPHER_NOT_RETURNED\0"
+ "OLD_SESSION_PRF_HASH_MISMATCH\0"
"OLD_SESSION_VERSION_NOT_RETURNED\0"
"PARSE_TLSEXT\0"
"PATH_TOO_LONG\0"
"PEER_DID_NOT_RETURN_A_CERTIFICATE\0"
"PEER_ERROR_UNSUPPORTED_CERTIFICATE_TYPE\0"
+ "PRE_SHARED_KEY_MUST_BE_LAST\0"
"PROTOCOL_IS_SHUTDOWN\0"
+ "PSK_IDENTITY_BINDER_COUNT_MISMATCH\0"
"PSK_IDENTITY_NOT_FOUND\0"
"PSK_NO_CLIENT_CB\0"
"PSK_NO_SERVER_CB\0"
"READ_TIMEOUT_EXPIRED\0"
"RECORD_LENGTH_MISMATCH\0"
"RECORD_TOO_LARGE\0"
+ "RENEGOTIATION_EMS_MISMATCH\0"
"RENEGOTIATION_ENCODING_ERR\0"
"RENEGOTIATION_MISMATCH\0"
"REQUIRED_CIPHER_MISSING\0"
@@ -1118,13 +1152,17 @@
"TLSV1_ALERT_USER_CANCELLED\0"
"TLSV1_BAD_CERTIFICATE_HASH_VALUE\0"
"TLSV1_BAD_CERTIFICATE_STATUS_RESPONSE\0"
+ "TLSV1_CERTIFICATE_REQUIRED\0"
"TLSV1_CERTIFICATE_UNOBTAINABLE\0"
+ "TLSV1_UNKNOWN_PSK_IDENTITY\0"
"TLSV1_UNRECOGNIZED_NAME\0"
"TLSV1_UNSUPPORTED_EXTENSION\0"
"TLS_PEER_DID_NOT_RESPOND_WITH_CERTIFICATE_LIST\0"
"TLS_RSA_ENCRYPTED_VALUE_LENGTH_IS_WRONG\0"
"TOO_MANY_EMPTY_FRAGMENTS\0"
+ "TOO_MANY_KEY_UPDATES\0"
"TOO_MANY_WARNING_ALERTS\0"
+ "TOO_MUCH_SKIPPED_EARLY_DATA\0"
"UNABLE_TO_FIND_ECDH_PARAMETERS\0"
"UNEXPECTED_EXTENSION\0"
"UNEXPECTED_MESSAGE\0"
diff --git a/linux-aarch64/crypto/aes/aesv8-armx64.S b/linux-aarch64/crypto/aes/aesv8-armx64.S
index 3e8cb16..51e2464 100644
--- a/linux-aarch64/crypto/aes/aesv8-armx64.S
+++ b/linux-aarch64/crypto/aes/aesv8-armx64.S
@@ -3,7 +3,7 @@
#if __ARM_MAX_ARCH__>=7
.text
-#if !defined(__clang__)
+#if !defined(__clang__) || defined(BORINGSSL_CLANG_SUPPORTS_DOT_ARCH)
.arch armv8-a+crypto
#endif
.align 5
@@ -12,11 +12,11 @@
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
-.globl aes_v8_set_encrypt_key
-.hidden aes_v8_set_encrypt_key
-.type aes_v8_set_encrypt_key,%function
+.globl aes_hw_set_encrypt_key
+.hidden aes_hw_set_encrypt_key
+.type aes_hw_set_encrypt_key,%function
.align 5
-aes_v8_set_encrypt_key:
+aes_hw_set_encrypt_key:
.Lenc_key:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
@@ -178,13 +178,13 @@
mov x0,x3 // return value
ldr x29,[sp],#16
ret
-.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
+.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
-.globl aes_v8_set_decrypt_key
-.hidden aes_v8_set_decrypt_key
-.type aes_v8_set_decrypt_key,%function
+.globl aes_hw_set_decrypt_key
+.hidden aes_hw_set_decrypt_key
+.type aes_hw_set_decrypt_key,%function
.align 5
-aes_v8_set_decrypt_key:
+aes_hw_set_decrypt_key:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
bl .Lenc_key
@@ -219,12 +219,12 @@
.Ldec_key_abort:
ldp x29,x30,[sp],#16
ret
-.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
-.globl aes_v8_encrypt
-.hidden aes_v8_encrypt
-.type aes_v8_encrypt,%function
+.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
+.globl aes_hw_encrypt
+.hidden aes_hw_encrypt
+.type aes_hw_encrypt,%function
.align 5
-aes_v8_encrypt:
+aes_hw_encrypt:
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
@@ -249,12 +249,12 @@
st1 {v2.16b},[x1]
ret
-.size aes_v8_encrypt,.-aes_v8_encrypt
-.globl aes_v8_decrypt
-.hidden aes_v8_decrypt
-.type aes_v8_decrypt,%function
+.size aes_hw_encrypt,.-aes_hw_encrypt
+.globl aes_hw_decrypt
+.hidden aes_hw_decrypt
+.type aes_hw_decrypt,%function
.align 5
-aes_v8_decrypt:
+aes_hw_decrypt:
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
@@ -279,12 +279,12 @@
st1 {v2.16b},[x1]
ret
-.size aes_v8_decrypt,.-aes_v8_decrypt
-.globl aes_v8_cbc_encrypt
-.hidden aes_v8_cbc_encrypt
-.type aes_v8_cbc_encrypt,%function
+.size aes_hw_decrypt,.-aes_hw_decrypt
+.globl aes_hw_cbc_encrypt
+.hidden aes_hw_cbc_encrypt
+.type aes_hw_cbc_encrypt,%function
.align 5
-aes_v8_cbc_encrypt:
+aes_hw_cbc_encrypt:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
subs x2,x2,#16
@@ -570,12 +570,12 @@
.Lcbc_abort:
ldr x29,[sp],#16
ret
-.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
-.globl aes_v8_ctr32_encrypt_blocks
-.hidden aes_v8_ctr32_encrypt_blocks
-.type aes_v8_ctr32_encrypt_blocks,%function
+.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
+.globl aes_hw_ctr32_encrypt_blocks
+.hidden aes_hw_ctr32_encrypt_blocks
+.type aes_hw_ctr32_encrypt_blocks,%function
.align 5
-aes_v8_ctr32_encrypt_blocks:
+aes_hw_ctr32_encrypt_blocks:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ldr w5,[x3,#240]
@@ -752,6 +752,6 @@
.Lctr32_done:
ldr x29,[sp],#16
ret
-.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
+.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
#endif
#endif
diff --git a/linux-aarch64/crypto/modes/ghashv8-armx64.S b/linux-aarch64/crypto/modes/ghashv8-armx64.S
index f39f3ba..89d780f 100644
--- a/linux-aarch64/crypto/modes/ghashv8-armx64.S
+++ b/linux-aarch64/crypto/modes/ghashv8-armx64.S
@@ -2,7 +2,7 @@
#include <openssl/arm_arch.h>
.text
-#if !defined(__clang__)
+#if !defined(__clang__) || defined(BORINGSSL_CLANG_SUPPORTS_DOT_ARCH)
.arch armv8-a+crypto
#endif
.globl gcm_init_v8
diff --git a/linux-arm/crypto/aes/aesv8-armx32.S b/linux-arm/crypto/aes/aesv8-armx32.S
index 95a2ea4..c2f6b68 100644
--- a/linux-arm/crypto/aes/aesv8-armx32.S
+++ b/linux-arm/crypto/aes/aesv8-armx32.S
@@ -12,11 +12,11 @@
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
-.globl aes_v8_set_encrypt_key
-.hidden aes_v8_set_encrypt_key
-.type aes_v8_set_encrypt_key,%function
+.globl aes_hw_set_encrypt_key
+.hidden aes_hw_set_encrypt_key
+.type aes_hw_set_encrypt_key,%function
.align 5
-aes_v8_set_encrypt_key:
+aes_hw_set_encrypt_key:
.Lenc_key:
mov r3,#-1
cmp r0,#0
@@ -181,13 +181,13 @@
mov r0,r3 @ return value
bx lr
-.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
+.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
-.globl aes_v8_set_decrypt_key
-.hidden aes_v8_set_decrypt_key
-.type aes_v8_set_decrypt_key,%function
+.globl aes_hw_set_decrypt_key
+.hidden aes_hw_set_decrypt_key
+.type aes_hw_set_decrypt_key,%function
.align 5
-aes_v8_set_decrypt_key:
+aes_hw_set_decrypt_key:
stmdb sp!,{r4,lr}
bl .Lenc_key
@@ -220,12 +220,12 @@
eor r0,r0,r0 @ return value
.Ldec_key_abort:
ldmia sp!,{r4,pc}
-.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
-.globl aes_v8_encrypt
-.hidden aes_v8_encrypt
-.type aes_v8_encrypt,%function
+.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
+.globl aes_hw_encrypt
+.hidden aes_hw_encrypt
+.type aes_hw_encrypt,%function
.align 5
-aes_v8_encrypt:
+aes_hw_encrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
@@ -250,12 +250,12 @@
vst1.8 {q2},[r1]
bx lr
-.size aes_v8_encrypt,.-aes_v8_encrypt
-.globl aes_v8_decrypt
-.hidden aes_v8_decrypt
-.type aes_v8_decrypt,%function
+.size aes_hw_encrypt,.-aes_hw_encrypt
+.globl aes_hw_decrypt
+.hidden aes_hw_decrypt
+.type aes_hw_decrypt,%function
.align 5
-aes_v8_decrypt:
+aes_hw_decrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
@@ -280,12 +280,12 @@
vst1.8 {q2},[r1]
bx lr
-.size aes_v8_decrypt,.-aes_v8_decrypt
-.globl aes_v8_cbc_encrypt
-.hidden aes_v8_cbc_encrypt
-.type aes_v8_cbc_encrypt,%function
+.size aes_hw_decrypt,.-aes_hw_decrypt
+.globl aes_hw_cbc_encrypt
+.hidden aes_hw_cbc_encrypt
+.type aes_hw_cbc_encrypt,%function
.align 5
-aes_v8_cbc_encrypt:
+aes_hw_cbc_encrypt:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
@@ -573,12 +573,12 @@
.Lcbc_abort:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,pc}
-.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
-.globl aes_v8_ctr32_encrypt_blocks
-.hidden aes_v8_ctr32_encrypt_blocks
-.type aes_v8_ctr32_encrypt_blocks,%function
+.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
+.globl aes_hw_ctr32_encrypt_blocks
+.hidden aes_hw_ctr32_encrypt_blocks
+.type aes_hw_ctr32_encrypt_blocks,%function
.align 5
-aes_v8_ctr32_encrypt_blocks:
+aes_hw_ctr32_encrypt_blocks:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
@@ -757,6 +757,6 @@
.Lctr32_done:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
-.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
+.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
#endif
#endif
diff --git a/linux-arm/crypto/aes/bsaes-armv7.S b/linux-arm/crypto/aes/bsaes-armv7.S
index abb414d..1db7bbe 100644
--- a/linux-arm/crypto/aes/bsaes-armv7.S
+++ b/linux-arm/crypto/aes/bsaes-armv7.S
@@ -1843,8 +1843,6 @@
b .Lxts_enc_done
.align 4
.Lxts_enc_6:
- vst1.64 {q14}, [r0,:128] @ next round tweak
-
veor q4, q4, q12
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -1880,8 +1878,6 @@
.align 5
.Lxts_enc_5:
- vst1.64 {q13}, [r0,:128] @ next round tweak
-
veor q3, q3, q11
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -1910,8 +1906,6 @@
b .Lxts_enc_done
.align 4
.Lxts_enc_4:
- vst1.64 {q12}, [r0,:128] @ next round tweak
-
veor q2, q2, q10
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -1937,8 +1931,6 @@
b .Lxts_enc_done
.align 4
.Lxts_enc_3:
- vst1.64 {q11}, [r0,:128] @ next round tweak
-
veor q1, q1, q9
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -1963,8 +1955,6 @@
b .Lxts_enc_done
.align 4
.Lxts_enc_2:
- vst1.64 {q10}, [r0,:128] @ next round tweak
-
veor q0, q0, q8
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -1987,7 +1977,7 @@
.align 4
.Lxts_enc_1:
mov r0, sp
- veor q0, q8
+ veor q0, q0, q8
mov r1, sp
vst1.8 {q0}, [sp,:128]
mov r2, r10
@@ -2376,8 +2366,6 @@
b .Lxts_dec_done
.align 4
.Lxts_dec_5:
- vst1.64 {q13}, [r0,:128] @ next round tweak
-
veor q3, q3, q11
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -2406,8 +2394,6 @@
b .Lxts_dec_done
.align 4
.Lxts_dec_4:
- vst1.64 {q12}, [r0,:128] @ next round tweak
-
veor q2, q2, q10
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -2433,8 +2419,6 @@
b .Lxts_dec_done
.align 4
.Lxts_dec_3:
- vst1.64 {q11}, [r0,:128] @ next round tweak
-
veor q1, q1, q9
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -2459,8 +2443,6 @@
b .Lxts_dec_done
.align 4
.Lxts_dec_2:
- vst1.64 {q10}, [r0,:128] @ next round tweak
-
veor q0, q0, q8
#ifndef BSAES_ASM_EXTENDED_KEY
add r4, sp, #0x90 @ pass key schedule
@@ -2483,12 +2465,12 @@
.align 4
.Lxts_dec_1:
mov r0, sp
- veor q0, q8
+ veor q0, q0, q8
mov r1, sp
vst1.8 {q0}, [sp,:128]
+ mov r5, r2 @ preserve magic
mov r2, r10
mov r4, r3 @ preserve fp
- mov r5, r2 @ preserve magic
bl AES_decrypt
diff --git a/linux-arm/crypto/sha/sha256-armv4.S b/linux-arm/crypto/sha/sha256-armv4.S
index 6040041..f37fd7c 100644
--- a/linux-arm/crypto/sha/sha256-armv4.S
+++ b/linux-arm/crypto/sha/sha256-armv4.S
@@ -1,4 +1,11 @@
#if defined(__arm__)
+@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
+@
+@ Licensed under the OpenSSL license (the "License"). You may not use
+@ this file except in compliance with the License. You can obtain a copy
+@ in the file LICENSE in the source distribution or at
+@ https://www.openssl.org/source/license.html
+
@ ====================================================================
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -45,16 +52,11 @@
#endif
.text
-#if __ARM_ARCH__<7
-.code 32
-#else
+#if defined(__thumb2__)
.syntax unified
-# if defined(__thumb2__) && !defined(__APPLE__)
-# define adrl adr
.thumb
-# else
+#else
.code 32
-# endif
#endif
.type K256,%object
@@ -89,10 +91,10 @@
.type sha256_block_data_order,%function
sha256_block_data_order:
.Lsha256_block_data_order:
-#if __ARM_ARCH__<7
+#if __ARM_ARCH__<7 && !defined(__thumb2__)
sub r3,pc,#8 @ sha256_block_data_order
#else
- adr r3,sha256_block_data_order
+ adr r3,.Lsha256_block_data_order
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap
@@ -1878,13 +1880,14 @@
.globl sha256_block_data_order_neon
.hidden sha256_block_data_order_neon
.type sha256_block_data_order_neon,%function
-.align 4
+.align 5
+.skip 16
sha256_block_data_order_neon:
.LNEON:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
sub r11,sp,#16*4+16
- adrl r14,K256
+ adr r14,K256
bic r11,r11,#15 @ align for 128-bit stores
mov r12,sp
mov sp,r11 @ alloca
@@ -2660,7 +2663,7 @@
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-# if defined(__thumb2__) && !defined(__APPLE__)
+# if defined(__thumb2__)
# define INST(a,b,c,d) .byte c,d|0xc,a,b
# else
# define INST(a,b,c,d) .byte a,b,c,d
@@ -2671,16 +2674,11 @@
sha256_block_data_order_armv8:
.LARMv8:
vld1.32 {q0,q1},[r0]
-# ifdef __APPLE__
sub r3,r3,#256+32
-# elif defined(__thumb2__)
- adr r3,.LARMv8
- sub r3,r3,#.LARMv8-K256
-# else
- adrl r3,K256
-# endif
add r2,r1,r2,lsl#6 @ len to point at the end of inp
+ b .Loop_v8
+.align 4
.Loop_v8:
vld1.8 {q8,q9},[r1]!
vld1.8 {q10,q11},[r1]!
diff --git a/linux-ppc64le/crypto/aes/aesp8-ppc.S b/linux-ppc64le/crypto/aes/aesp8-ppc.S
new file mode 100644
index 0000000..3424ea6
--- /dev/null
+++ b/linux-ppc64le/crypto/aes/aesp8-ppc.S
@@ -0,0 +1,3633 @@
+.machine "any"
+
+.text
+
+.align 7
+rcon:
+.byte 0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01
+.byte 0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1b
+.byte 0x0c,0x0f,0x0e,0x0d,0x0c,0x0f,0x0e,0x0d,0x0c,0x0f,0x0e,0x0d,0x0c,0x0f,0x0e,0x0d
+.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+Lconsts:
+ mflr 0
+ bcl 20,31,$+4
+ mflr 6
+ addi 6,6,-0x48
+ mtlr 0
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+.byte 65,69,83,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 2
+
+.globl aes_hw_set_encrypt_key
+.align 5
+aes_hw_set_encrypt_key:
+Lset_encrypt_key:
+ mflr 11
+ std 11,16(1)
+
+ li 6,-1
+ cmpldi 3,0
+ beq- Lenc_key_abort
+ cmpldi 5,0
+ beq- Lenc_key_abort
+ li 6,-2
+ cmpwi 4,128
+ blt- Lenc_key_abort
+ cmpwi 4,256
+ bgt- Lenc_key_abort
+ andi. 0,4,0x3f
+ bne- Lenc_key_abort
+
+ lis 0,0xfff0
+ mfspr 12,256
+ mtspr 256,0
+
+ bl Lconsts
+ mtlr 11
+
+ neg 9,3
+ lvx 1,0,3
+ addi 3,3,15
+ lvsr 3,0,9
+ li 8,0x20
+ cmpwi 4,192
+ lvx 2,0,3
+ vspltisb 5,0x0f
+ lvx 4,0,6
+ vxor 3,3,5
+ lvx 5,8,6
+ addi 6,6,0x10
+ vperm 1,1,2,3
+ li 7,8
+ vxor 0,0,0
+ mtctr 7
+
+ lvsl 8,0,5
+ vspltisb 9,-1
+ lvx 10,0,5
+ vperm 9,9,0,8
+
+ blt Loop128
+ addi 3,3,8
+ beq L192
+ addi 3,3,8
+ b L256
+
+.align 4
+Loop128:
+ vperm 3,1,1,5
+ vsldoi 6,0,1,12
+ vperm 11,1,1,8
+ vsel 7,10,11,9
+ vor 10,11,11
+ .long 0x10632509
+ stvx 7,0,5
+ addi 5,5,16
+
+ vxor 1,1,6
+ vsldoi 6,0,6,12
+ vxor 1,1,6
+ vsldoi 6,0,6,12
+ vxor 1,1,6
+ vadduwm 4,4,4
+ vxor 1,1,3
+ bc 16,0,Loop128
+
+ lvx 4,0,6
+
+ vperm 3,1,1,5
+ vsldoi 6,0,1,12
+ vperm 11,1,1,8
+ vsel 7,10,11,9
+ vor 10,11,11
+ .long 0x10632509
+ stvx 7,0,5
+ addi 5,5,16
+
+ vxor 1,1,6
+ vsldoi 6,0,6,12
+ vxor 1,1,6
+ vsldoi 6,0,6,12
+ vxor 1,1,6
+ vadduwm 4,4,4
+ vxor 1,1,3
+
+ vperm 3,1,1,5
+ vsldoi 6,0,1,12
+ vperm 11,1,1,8
+ vsel 7,10,11,9
+ vor 10,11,11
+ .long 0x10632509
+ stvx 7,0,5
+ addi 5,5,16
+
+ vxor 1,1,6
+ vsldoi 6,0,6,12
+ vxor 1,1,6
+ vsldoi 6,0,6,12
+ vxor 1,1,6
+ vxor 1,1,3
+ vperm 11,1,1,8
+ vsel 7,10,11,9
+ vor 10,11,11
+ stvx 7,0,5
+
+ addi 3,5,15
+ addi 5,5,0x50
+
+ li 8,10
+ b Ldone
+
+.align 4
+L192:
+ lvx 6,0,3
+ li 7,4
+ vperm 11,1,1,8
+ vsel 7,10,11,9
+ vor 10,11,11
+ stvx 7,0,5
+ addi 5,5,16
+ vperm 2,2,6,3
+ vspltisb 3,8
+ mtctr 7
+ vsububm 5,5,3
+
+Loop192:
+ vperm 3,2,2,5
+ vsldoi 6,0,1,12
+ .long 0x10632509
+
+ vxor 1,1,6
+ vsldoi 6,0,6,12
+ vxor 1,1,6
+ vsldoi 6,0,6,12
+ vxor 1,1,6
+
+ vsldoi 7,0,2,8
+ vspltw 6,1,3
+ vxor 6,6,2
+ vsldoi 2,0,2,12
+ vadduwm 4,4,4
+ vxor 2,2,6
+ vxor 1,1,3
+ vxor 2,2,3
+ vsldoi 7,7,1,8
+
+ vperm 3,2,2,5
+ vsldoi 6,0,1,12
+ vperm 11,7,7,8
+ vsel 7,10,11,9
+ vor 10,11,11
+ .long 0x10632509
+ stvx 7,0,5
+ addi 5,5,16
+
+ vsldoi 7,1,2,8
+ vxor 1,1,6
+ vsldoi 6,0,6,12
+ vperm 11,7,7,8
+ vsel 7,10,11,9
+ vor 10,11,11
+ vxor 1,1,6
+ vsldoi 6,0,6,12
+ vxor 1,1,6
+ stvx 7,0,5
+ addi 5,5,16
+
+ vspltw 6,1,3
+ vxor 6,6,2
+ vsldoi 2,0,2,12
+ vadduwm 4,4,4
+ vxor 2,2,6
+ vxor 1,1,3
+ vxor 2,2,3
+ vperm 11,1,1,8
+ vsel 7,10,11,9
+ vor 10,11,11
+ stvx 7,0,5
+ addi 3,5,15
+ addi 5,5,16
+ bc 16,0,Loop192
+
+ li 8,12
+ addi 5,5,0x20
+ b Ldone
+
+.align 4
+L256:
+ lvx 6,0,3
+ li 7,7
+ li 8,14
+ vperm 11,1,1,8
+ vsel 7,10,11,9
+ vor 10,11,11
+ stvx 7,0,5
+ addi 5,5,16
+ vperm 2,2,6,3
+ mtctr 7
+
+Loop256:
+ vperm 3,2,2,5
+ vsldoi 6,0,1,12
+ vperm 11,2,2,8
+ vsel 7,10,11,9
+ vor 10,11,11
+ .long 0x10632509
+ stvx 7,0,5
+ addi 5,5,16
+
+ vxor 1,1,6
+ vsldoi 6,0,6,12
+ vxor 1,1,6
+ vsldoi 6,0,6,12
+ vxor 1,1,6
+ vadduwm 4,4,4
+ vxor 1,1,3
+ vperm 11,1,1,8
+ vsel 7,10,11,9
+ vor 10,11,11
+ stvx 7,0,5
+ addi 3,5,15
+ addi 5,5,16
+ bdz Ldone
+
+ vspltw 3,1,3
+ vsldoi 6,0,2,12
+ .long 0x106305C8
+
+ vxor 2,2,6
+ vsldoi 6,0,6,12
+ vxor 2,2,6
+ vsldoi 6,0,6,12
+ vxor 2,2,6
+
+ vxor 2,2,3
+ b Loop256
+
+.align 4
+Ldone:
+ lvx 2,0,3
+ vsel 2,10,2,9
+ stvx 2,0,3
+ li 6,0
+ mtspr 256,12
+ stw 8,0(5)
+
+Lenc_key_abort:
+ mr 3,6
+ blr
+.long 0
+.byte 0,12,0x14,1,0,0,3,0
+.long 0
+
+
+.globl aes_hw_set_decrypt_key
+.align 5
+aes_hw_set_decrypt_key:
+ stdu 1,-64(1)
+ mflr 10
+ std 10,64+16(1)
+ bl Lset_encrypt_key
+ mtlr 10
+
+ cmpwi 3,0
+ bne- Ldec_key_abort
+
+ slwi 7,8,4
+ subi 3,5,240
+ srwi 8,8,1
+ add 5,3,7
+ mtctr 8
+
+Ldeckey:
+ lwz 0, 0(3)
+ lwz 6, 4(3)
+ lwz 7, 8(3)
+ lwz 8, 12(3)
+ addi 3,3,16
+ lwz 9, 0(5)
+ lwz 10,4(5)
+ lwz 11,8(5)
+ lwz 12,12(5)
+ stw 0, 0(5)
+ stw 6, 4(5)
+ stw 7, 8(5)
+ stw 8, 12(5)
+ subi 5,5,16
+ stw 9, -16(3)
+ stw 10,-12(3)
+ stw 11,-8(3)
+ stw 12,-4(3)
+ bc 16,0,Ldeckey
+
+ xor 3,3,3
+Ldec_key_abort:
+ addi 1,1,64
+ blr
+.long 0
+.byte 0,12,4,1,0x80,0,3,0
+.long 0
+
+.globl aes_hw_encrypt
+.align 5
+aes_hw_encrypt:
+ lwz 6,240(5)
+ lis 0,0xfc00
+ mfspr 12,256
+ li 7,15
+ mtspr 256,0
+
+ lvx 0,0,3
+ neg 11,4
+ lvx 1,7,3
+ lvsl 2,0,3
+ vspltisb 4,0x0f
+ lvsr 3,0,11
+ vxor 2,2,4
+ li 7,16
+ vperm 0,0,1,2
+ lvx 1,0,5
+ lvsr 5,0,5
+ srwi 6,6,1
+ lvx 2,7,5
+ addi 7,7,16
+ subi 6,6,1
+ vperm 1,2,1,5
+
+ vxor 0,0,1
+ lvx 1,7,5
+ addi 7,7,16
+ mtctr 6
+
+Loop_enc:
+ vperm 2,1,2,5
+ .long 0x10001508
+ lvx 2,7,5
+ addi 7,7,16
+ vperm 1,2,1,5
+ .long 0x10000D08
+ lvx 1,7,5
+ addi 7,7,16
+ bc 16,0,Loop_enc
+
+ vperm 2,1,2,5
+ .long 0x10001508
+ lvx 2,7,5
+ vperm 1,2,1,5
+ .long 0x10000D09
+
+ vspltisb 2,-1
+ vxor 1,1,1
+ li 7,15
+ vperm 2,2,1,3
+ vxor 3,3,4
+ lvx 1,0,4
+ vperm 0,0,0,3
+ vsel 1,1,0,2
+ lvx 4,7,4
+ stvx 1,0,4
+ vsel 0,0,4,2
+ stvx 0,7,4
+
+ mtspr 256,12
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,3,0
+.long 0
+
+.globl aes_hw_decrypt
+.align 5
+aes_hw_decrypt:
+ lwz 6,240(5)
+ lis 0,0xfc00
+ mfspr 12,256
+ li 7,15
+ mtspr 256,0
+
+ lvx 0,0,3
+ neg 11,4
+ lvx 1,7,3
+ lvsl 2,0,3
+ vspltisb 4,0x0f
+ lvsr 3,0,11
+ vxor 2,2,4
+ li 7,16
+ vperm 0,0,1,2
+ lvx 1,0,5
+ lvsr 5,0,5
+ srwi 6,6,1
+ lvx 2,7,5
+ addi 7,7,16
+ subi 6,6,1
+ vperm 1,2,1,5
+
+ vxor 0,0,1
+ lvx 1,7,5
+ addi 7,7,16
+ mtctr 6
+
+Loop_dec:
+ vperm 2,1,2,5
+ .long 0x10001548
+ lvx 2,7,5
+ addi 7,7,16
+ vperm 1,2,1,5
+ .long 0x10000D48
+ lvx 1,7,5
+ addi 7,7,16
+ bc 16,0,Loop_dec
+
+ vperm 2,1,2,5
+ .long 0x10001548
+ lvx 2,7,5
+ vperm 1,2,1,5
+ .long 0x10000D49
+
+ vspltisb 2,-1
+ vxor 1,1,1
+ li 7,15
+ vperm 2,2,1,3
+ vxor 3,3,4
+ lvx 1,0,4
+ vperm 0,0,0,3
+ vsel 1,1,0,2
+ lvx 4,7,4
+ stvx 1,0,4
+ vsel 0,0,4,2
+ stvx 0,7,4
+
+ mtspr 256,12
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,3,0
+.long 0
+
+.globl aes_hw_cbc_encrypt
+.align 5
+aes_hw_cbc_encrypt:
+ cmpldi 5,16
+ bclr 14,0
+
+ cmpwi 8,0
+ lis 0,0xffe0
+ mfspr 12,256
+ mtspr 256,0
+
+ li 10,15
+ vxor 0,0,0
+ vspltisb 3,0x0f
+
+ lvx 4,0,7
+ lvsl 6,0,7
+ lvx 5,10,7
+ vxor 6,6,3
+ vperm 4,4,5,6
+
+ neg 11,3
+ lvsr 10,0,6
+ lwz 9,240(6)
+
+ lvsr 6,0,11
+ lvx 5,0,3
+ addi 3,3,15
+ vxor 6,6,3
+
+ lvsl 8,0,4
+ vspltisb 9,-1
+ lvx 7,0,4
+ vperm 9,9,0,8
+ vxor 8,8,3
+
+ srwi 9,9,1
+ li 10,16
+ subi 9,9,1
+ beq Lcbc_dec
+
+Lcbc_enc:
+ vor 2,5,5
+ lvx 5,0,3
+ addi 3,3,16
+ mtctr 9
+ subi 5,5,16
+
+ lvx 0,0,6
+ vperm 2,2,5,6
+ lvx 1,10,6
+ addi 10,10,16
+ vperm 0,1,0,10
+ vxor 2,2,0
+ lvx 0,10,6
+ addi 10,10,16
+ vxor 2,2,4
+
+Loop_cbc_enc:
+ vperm 1,0,1,10
+ .long 0x10420D08
+ lvx 1,10,6
+ addi 10,10,16
+ vperm 0,1,0,10
+ .long 0x10420508
+ lvx 0,10,6
+ addi 10,10,16
+ bc 16,0,Loop_cbc_enc
+
+ vperm 1,0,1,10
+ .long 0x10420D08
+ lvx 1,10,6
+ li 10,16
+ vperm 0,1,0,10
+ .long 0x10820509
+ cmpldi 5,16
+
+ vperm 3,4,4,8
+ vsel 2,7,3,9
+ vor 7,3,3
+ stvx 2,0,4
+ addi 4,4,16
+ bge Lcbc_enc
+
+ b Lcbc_done
+
+.align 4
+Lcbc_dec:
+ cmpldi 5,128
+ bge _aesp8_cbc_decrypt8x
+ vor 3,5,5
+ lvx 5,0,3
+ addi 3,3,16
+ mtctr 9
+ subi 5,5,16
+
+ lvx 0,0,6
+ vperm 3,3,5,6
+ lvx 1,10,6
+ addi 10,10,16
+ vperm 0,1,0,10
+ vxor 2,3,0
+ lvx 0,10,6
+ addi 10,10,16
+
+Loop_cbc_dec:
+ vperm 1,0,1,10
+ .long 0x10420D48
+ lvx 1,10,6
+ addi 10,10,16
+ vperm 0,1,0,10
+ .long 0x10420548
+ lvx 0,10,6
+ addi 10,10,16
+ bc 16,0,Loop_cbc_dec
+
+ vperm 1,0,1,10
+ .long 0x10420D48
+ lvx 1,10,6
+ li 10,16
+ vperm 0,1,0,10
+ .long 0x10420549
+ cmpldi 5,16
+
+ vxor 2,2,4
+ vor 4,3,3
+ vperm 3,2,2,8
+ vsel 2,7,3,9
+ vor 7,3,3
+ stvx 2,0,4
+ addi 4,4,16
+ bge Lcbc_dec
+
+Lcbc_done:
+ addi 4,4,-1
+ lvx 2,0,4
+ vsel 2,7,2,9
+ stvx 2,0,4
+
+ neg 8,7
+ li 10,15
+ vxor 0,0,0
+ vspltisb 9,-1
+ vspltisb 3,0x0f
+ lvsr 8,0,8
+ vperm 9,9,0,8
+ vxor 8,8,3
+ lvx 7,0,7
+ vperm 4,4,4,8
+ vsel 2,7,4,9
+ lvx 5,10,7
+ stvx 2,0,7
+ vsel 2,4,5,9
+ stvx 2,10,7
+
+ mtspr 256,12
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,6,0
+.long 0
+.align 5
+_aesp8_cbc_decrypt8x:
+ stdu 1,-448(1)
+ li 10,207
+ li 11,223
+ stvx 20,10,1
+ addi 10,10,32
+ stvx 21,11,1
+ addi 11,11,32
+ stvx 22,10,1
+ addi 10,10,32
+ stvx 23,11,1
+ addi 11,11,32
+ stvx 24,10,1
+ addi 10,10,32
+ stvx 25,11,1
+ addi 11,11,32
+ stvx 26,10,1
+ addi 10,10,32
+ stvx 27,11,1
+ addi 11,11,32
+ stvx 28,10,1
+ addi 10,10,32
+ stvx 29,11,1
+ addi 11,11,32
+ stvx 30,10,1
+ stvx 31,11,1
+ li 0,-1
+ stw 12,396(1)
+ li 8,0x10
+ std 26,400(1)
+ li 26,0x20
+ std 27,408(1)
+ li 27,0x30
+ std 28,416(1)
+ li 28,0x40
+ std 29,424(1)
+ li 29,0x50
+ std 30,432(1)
+ li 30,0x60
+ std 31,440(1)
+ li 31,0x70
+ mtspr 256,0
+
+ subi 9,9,3
+ subi 5,5,128
+
+ lvx 23,0,6
+ lvx 30,8,6
+ addi 6,6,0x20
+ lvx 31,0,6
+ vperm 23,30,23,10
+ addi 11,1,64+15
+ mtctr 9
+
+Load_cbc_dec_key:
+ vperm 24,31,30,10
+ lvx 30,8,6
+ addi 6,6,0x20
+ stvx 24,0,11
+ vperm 25,30,31,10
+ lvx 31,0,6
+ stvx 25,8,11
+ addi 11,11,0x20
+ bc 16,0,Load_cbc_dec_key
+
+ lvx 26,8,6
+ vperm 24,31,30,10
+ lvx 27,26,6
+ stvx 24,0,11
+ vperm 25,26,31,10
+ lvx 28,27,6
+ stvx 25,8,11
+ addi 11,1,64+15
+ vperm 26,27,26,10
+ lvx 29,28,6
+ vperm 27,28,27,10
+ lvx 30,29,6
+ vperm 28,29,28,10
+ lvx 31,30,6
+ vperm 29,30,29,10
+ lvx 14,31,6
+ vperm 30,31,30,10
+ lvx 24,0,11
+ vperm 31,14,31,10
+ lvx 25,8,11
+
+
+
+ subi 3,3,15
+
+ li 10,8
+ .long 0x7C001E99
+ lvsl 6,0,10
+ vspltisb 3,0x0f
+ .long 0x7C281E99
+ vxor 6,6,3
+ .long 0x7C5A1E99
+ vperm 0,0,0,6
+ .long 0x7C7B1E99
+ vperm 1,1,1,6
+ .long 0x7D5C1E99
+ vperm 2,2,2,6
+ vxor 14,0,23
+ .long 0x7D7D1E99
+ vperm 3,3,3,6
+ vxor 15,1,23
+ .long 0x7D9E1E99
+ vperm 10,10,10,6
+ vxor 16,2,23
+ .long 0x7DBF1E99
+ addi 3,3,0x80
+ vperm 11,11,11,6
+ vxor 17,3,23
+ vperm 12,12,12,6
+ vxor 18,10,23
+ vperm 13,13,13,6
+ vxor 19,11,23
+ vxor 20,12,23
+ vxor 21,13,23
+
+ mtctr 9
+ b Loop_cbc_dec8x
+.align 5
+Loop_cbc_dec8x:
+ .long 0x11CEC548
+ .long 0x11EFC548
+ .long 0x1210C548
+ .long 0x1231C548
+ .long 0x1252C548
+ .long 0x1273C548
+ .long 0x1294C548
+ .long 0x12B5C548
+ lvx 24,26,11
+ addi 11,11,0x20
+
+ .long 0x11CECD48
+ .long 0x11EFCD48
+ .long 0x1210CD48
+ .long 0x1231CD48
+ .long 0x1252CD48
+ .long 0x1273CD48
+ .long 0x1294CD48
+ .long 0x12B5CD48
+ lvx 25,8,11
+ bc 16,0,Loop_cbc_dec8x
+
+ subic 5,5,128
+ .long 0x11CEC548
+ .long 0x11EFC548
+ .long 0x1210C548
+ .long 0x1231C548
+ .long 0x1252C548
+ .long 0x1273C548
+ .long 0x1294C548
+ .long 0x12B5C548
+
+ subfe. 0,0,0
+ .long 0x11CECD48
+ .long 0x11EFCD48
+ .long 0x1210CD48
+ .long 0x1231CD48
+ .long 0x1252CD48
+ .long 0x1273CD48
+ .long 0x1294CD48
+ .long 0x12B5CD48
+
+ and 0,0,5
+ .long 0x11CED548
+ .long 0x11EFD548
+ .long 0x1210D548
+ .long 0x1231D548
+ .long 0x1252D548
+ .long 0x1273D548
+ .long 0x1294D548
+ .long 0x12B5D548
+
+ add 3,3,0
+
+
+
+ .long 0x11CEDD48
+ .long 0x11EFDD48
+ .long 0x1210DD48
+ .long 0x1231DD48
+ .long 0x1252DD48
+ .long 0x1273DD48
+ .long 0x1294DD48
+ .long 0x12B5DD48
+
+ addi 11,1,64+15
+ .long 0x11CEE548
+ .long 0x11EFE548
+ .long 0x1210E548
+ .long 0x1231E548
+ .long 0x1252E548
+ .long 0x1273E548
+ .long 0x1294E548
+ .long 0x12B5E548
+ lvx 24,0,11
+
+ .long 0x11CEED48
+ .long 0x11EFED48
+ .long 0x1210ED48
+ .long 0x1231ED48
+ .long 0x1252ED48
+ .long 0x1273ED48
+ .long 0x1294ED48
+ .long 0x12B5ED48
+ lvx 25,8,11
+
+ .long 0x11CEF548
+ vxor 4,4,31
+ .long 0x11EFF548
+ vxor 0,0,31
+ .long 0x1210F548
+ vxor 1,1,31
+ .long 0x1231F548
+ vxor 2,2,31
+ .long 0x1252F548
+ vxor 3,3,31
+ .long 0x1273F548
+ vxor 10,10,31
+ .long 0x1294F548
+ vxor 11,11,31
+ .long 0x12B5F548
+ vxor 12,12,31
+
+ .long 0x11CE2549
+ .long 0x11EF0549
+ .long 0x7C001E99
+ .long 0x12100D49
+ .long 0x7C281E99
+ .long 0x12311549
+ vperm 0,0,0,6
+ .long 0x7C5A1E99
+ .long 0x12521D49
+ vperm 1,1,1,6
+ .long 0x7C7B1E99
+ .long 0x12735549
+ vperm 2,2,2,6
+ .long 0x7D5C1E99
+ .long 0x12945D49
+ vperm 3,3,3,6
+ .long 0x7D7D1E99
+ .long 0x12B56549
+ vperm 10,10,10,6
+ .long 0x7D9E1E99
+ vor 4,13,13
+ vperm 11,11,11,6
+ .long 0x7DBF1E99
+ addi 3,3,0x80
+
+ vperm 14,14,14,6
+ vperm 15,15,15,6
+ .long 0x7DC02799
+ vperm 12,12,12,6
+ vxor 14,0,23
+ vperm 16,16,16,6
+ .long 0x7DE82799
+ vperm 13,13,13,6
+ vxor 15,1,23
+ vperm 17,17,17,6
+ .long 0x7E1A2799
+ vxor 16,2,23
+ vperm 18,18,18,6
+ .long 0x7E3B2799
+ vxor 17,3,23
+ vperm 19,19,19,6
+ .long 0x7E5C2799
+ vxor 18,10,23
+ vperm 20,20,20,6
+ .long 0x7E7D2799
+ vxor 19,11,23
+ vperm 21,21,21,6
+ .long 0x7E9E2799
+ vxor 20,12,23
+ .long 0x7EBF2799
+ addi 4,4,0x80
+ vxor 21,13,23
+
+ mtctr 9
+ beq Loop_cbc_dec8x
+
+ addic. 5,5,128
+ beq Lcbc_dec8x_done
+ nop
+ nop
+
+Loop_cbc_dec8x_tail:
+ .long 0x11EFC548
+ .long 0x1210C548
+ .long 0x1231C548
+ .long 0x1252C548
+ .long 0x1273C548
+ .long 0x1294C548
+ .long 0x12B5C548
+ lvx 24,26,11
+ addi 11,11,0x20
+
+ .long 0x11EFCD48
+ .long 0x1210CD48
+ .long 0x1231CD48
+ .long 0x1252CD48
+ .long 0x1273CD48
+ .long 0x1294CD48
+ .long 0x12B5CD48
+ lvx 25,8,11
+ bc 16,0,Loop_cbc_dec8x_tail
+
+ .long 0x11EFC548
+ .long 0x1210C548
+ .long 0x1231C548
+ .long 0x1252C548
+ .long 0x1273C548
+ .long 0x1294C548
+ .long 0x12B5C548
+
+ .long 0x11EFCD48
+ .long 0x1210CD48
+ .long 0x1231CD48
+ .long 0x1252CD48
+ .long 0x1273CD48
+ .long 0x1294CD48
+ .long 0x12B5CD48
+
+ .long 0x11EFD548
+ .long 0x1210D548
+ .long 0x1231D548
+ .long 0x1252D548
+ .long 0x1273D548
+ .long 0x1294D548
+ .long 0x12B5D548
+
+ .long 0x11EFDD48
+ .long 0x1210DD48
+ .long 0x1231DD48
+ .long 0x1252DD48
+ .long 0x1273DD48
+ .long 0x1294DD48
+ .long 0x12B5DD48
+
+ .long 0x11EFE548
+ .long 0x1210E548
+ .long 0x1231E548
+ .long 0x1252E548
+ .long 0x1273E548
+ .long 0x1294E548
+ .long 0x12B5E548
+
+ .long 0x11EFED48
+ .long 0x1210ED48
+ .long 0x1231ED48
+ .long 0x1252ED48
+ .long 0x1273ED48
+ .long 0x1294ED48
+ .long 0x12B5ED48
+
+ .long 0x11EFF548
+ vxor 4,4,31
+ .long 0x1210F548
+ vxor 1,1,31
+ .long 0x1231F548
+ vxor 2,2,31
+ .long 0x1252F548
+ vxor 3,3,31
+ .long 0x1273F548
+ vxor 10,10,31
+ .long 0x1294F548
+ vxor 11,11,31
+ .long 0x12B5F548
+ vxor 12,12,31
+
+ cmplwi 5,32
+ blt Lcbc_dec8x_one
+ nop
+ beq Lcbc_dec8x_two
+ cmplwi 5,64
+ blt Lcbc_dec8x_three
+ nop
+ beq Lcbc_dec8x_four
+ cmplwi 5,96
+ blt Lcbc_dec8x_five
+ nop
+ beq Lcbc_dec8x_six
+
+Lcbc_dec8x_seven:
+ .long 0x11EF2549
+ .long 0x12100D49
+ .long 0x12311549
+ .long 0x12521D49
+ .long 0x12735549
+ .long 0x12945D49
+ .long 0x12B56549
+ vor 4,13,13
+
+ vperm 15,15,15,6
+ vperm 16,16,16,6
+ .long 0x7DE02799
+ vperm 17,17,17,6
+ .long 0x7E082799
+ vperm 18,18,18,6
+ .long 0x7E3A2799
+ vperm 19,19,19,6
+ .long 0x7E5B2799
+ vperm 20,20,20,6
+ .long 0x7E7C2799
+ vperm 21,21,21,6
+ .long 0x7E9D2799
+ .long 0x7EBE2799
+ addi 4,4,0x70
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_six:
+ .long 0x12102549
+ .long 0x12311549
+ .long 0x12521D49
+ .long 0x12735549
+ .long 0x12945D49
+ .long 0x12B56549
+ vor 4,13,13
+
+ vperm 16,16,16,6
+ vperm 17,17,17,6
+ .long 0x7E002799
+ vperm 18,18,18,6
+ .long 0x7E282799
+ vperm 19,19,19,6
+ .long 0x7E5A2799
+ vperm 20,20,20,6
+ .long 0x7E7B2799
+ vperm 21,21,21,6
+ .long 0x7E9C2799
+ .long 0x7EBD2799
+ addi 4,4,0x60
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_five:
+ .long 0x12312549
+ .long 0x12521D49
+ .long 0x12735549
+ .long 0x12945D49
+ .long 0x12B56549
+ vor 4,13,13
+
+ vperm 17,17,17,6
+ vperm 18,18,18,6
+ .long 0x7E202799
+ vperm 19,19,19,6
+ .long 0x7E482799
+ vperm 20,20,20,6
+ .long 0x7E7A2799
+ vperm 21,21,21,6
+ .long 0x7E9B2799
+ .long 0x7EBC2799
+ addi 4,4,0x50
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_four:
+ .long 0x12522549
+ .long 0x12735549
+ .long 0x12945D49
+ .long 0x12B56549
+ vor 4,13,13
+
+ vperm 18,18,18,6
+ vperm 19,19,19,6
+ .long 0x7E402799
+ vperm 20,20,20,6
+ .long 0x7E682799
+ vperm 21,21,21,6
+ .long 0x7E9A2799
+ .long 0x7EBB2799
+ addi 4,4,0x40
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_three:
+ .long 0x12732549
+ .long 0x12945D49
+ .long 0x12B56549
+ vor 4,13,13
+
+ vperm 19,19,19,6
+ vperm 20,20,20,6
+ .long 0x7E602799
+ vperm 21,21,21,6
+ .long 0x7E882799
+ .long 0x7EBA2799
+ addi 4,4,0x30
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_two:
+ .long 0x12942549
+ .long 0x12B56549
+ vor 4,13,13
+
+ vperm 20,20,20,6
+ vperm 21,21,21,6
+ .long 0x7E802799
+ .long 0x7EA82799
+ addi 4,4,0x20
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_one:
+ .long 0x12B52549
+ vor 4,13,13
+
+ vperm 21,21,21,6
+ .long 0x7EA02799
+ addi 4,4,0x10
+
+Lcbc_dec8x_done:
+ vperm 4,4,4,6
+ .long 0x7C803F99
+
+ li 10,79
+ li 11,95
+ stvx 6,10,1
+ addi 10,10,32
+ stvx 6,11,1
+ addi 11,11,32
+ stvx 6,10,1
+ addi 10,10,32
+ stvx 6,11,1
+ addi 11,11,32
+ stvx 6,10,1
+ addi 10,10,32
+ stvx 6,11,1
+ addi 11,11,32
+ stvx 6,10,1
+ addi 10,10,32
+ stvx 6,11,1
+ addi 11,11,32
+
+ mtspr 256,12
+ lvx 20,10,1
+ addi 10,10,32
+ lvx 21,11,1
+ addi 11,11,32
+ lvx 22,10,1
+ addi 10,10,32
+ lvx 23,11,1
+ addi 11,11,32
+ lvx 24,10,1
+ addi 10,10,32
+ lvx 25,11,1
+ addi 11,11,32
+ lvx 26,10,1
+ addi 10,10,32
+ lvx 27,11,1
+ addi 11,11,32
+ lvx 28,10,1
+ addi 10,10,32
+ lvx 29,11,1
+ addi 11,11,32
+ lvx 30,10,1
+ lvx 31,11,1
+ ld 26,400(1)
+ ld 27,408(1)
+ ld 28,416(1)
+ ld 29,424(1)
+ ld 30,432(1)
+ ld 31,440(1)
+ addi 1,1,448
+ blr
+.long 0
+.byte 0,12,0x04,0,0x80,6,6,0
+.long 0
+
+.globl aes_hw_ctr32_encrypt_blocks
+.align 5
+aes_hw_ctr32_encrypt_blocks:
+ cmpldi 5,1
+ bclr 14,0
+
+ lis 0,0xfff0
+ mfspr 12,256
+ mtspr 256,0
+
+ li 10,15
+ vxor 0,0,0
+ vspltisb 3,0x0f
+
+ lvx 4,0,7
+ lvsl 6,0,7
+ lvx 5,10,7
+ vspltisb 11,1
+ vxor 6,6,3
+ vperm 4,4,5,6
+ vsldoi 11,0,11,1
+
+ neg 11,3
+ lvsr 10,0,6
+ lwz 9,240(6)
+
+ lvsr 6,0,11
+ lvx 5,0,3
+ addi 3,3,15
+ vxor 6,6,3
+
+ srwi 9,9,1
+ li 10,16
+ subi 9,9,1
+
+ cmpldi 5,8
+ bge _aesp8_ctr32_encrypt8x
+
+ lvsl 8,0,4
+ vspltisb 9,-1
+ lvx 7,0,4
+ vperm 9,9,0,8
+ vxor 8,8,3
+
+ lvx 0,0,6
+ mtctr 9
+ lvx 1,10,6
+ addi 10,10,16
+ vperm 0,1,0,10
+ vxor 2,4,0
+ lvx 0,10,6
+ addi 10,10,16
+ b Loop_ctr32_enc
+
+.align 5
+Loop_ctr32_enc:
+ vperm 1,0,1,10
+ .long 0x10420D08
+ lvx 1,10,6
+ addi 10,10,16
+ vperm 0,1,0,10
+ .long 0x10420508
+ lvx 0,10,6
+ addi 10,10,16
+ bc 16,0,Loop_ctr32_enc
+
+ vadduwm 4,4,11
+ vor 3,5,5
+ lvx 5,0,3
+ addi 3,3,16
+ subic. 5,5,1
+
+ vperm 1,0,1,10
+ .long 0x10420D08
+ lvx 1,10,6
+ vperm 3,3,5,6
+ li 10,16
+ vperm 1,1,0,10
+ lvx 0,0,6
+ vxor 3,3,1
+ .long 0x10421D09
+
+ lvx 1,10,6
+ addi 10,10,16
+ vperm 2,2,2,8
+ vsel 3,7,2,9
+ mtctr 9
+ vperm 0,1,0,10
+ vor 7,2,2
+ vxor 2,4,0
+ lvx 0,10,6
+ addi 10,10,16
+ stvx 3,0,4
+ addi 4,4,16
+ bne Loop_ctr32_enc
+
+ addi 4,4,-1
+ lvx 2,0,4
+ vsel 2,7,2,9
+ stvx 2,0,4
+
+ mtspr 256,12
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,6,0
+.long 0
+.align 5
+_aesp8_ctr32_encrypt8x:
+ stdu 1,-448(1)
+ li 10,207
+ li 11,223
+ stvx 20,10,1
+ addi 10,10,32
+ stvx 21,11,1
+ addi 11,11,32
+ stvx 22,10,1
+ addi 10,10,32
+ stvx 23,11,1
+ addi 11,11,32
+ stvx 24,10,1
+ addi 10,10,32
+ stvx 25,11,1
+ addi 11,11,32
+ stvx 26,10,1
+ addi 10,10,32
+ stvx 27,11,1
+ addi 11,11,32
+ stvx 28,10,1
+ addi 10,10,32
+ stvx 29,11,1
+ addi 11,11,32
+ stvx 30,10,1
+ stvx 31,11,1
+ li 0,-1
+ stw 12,396(1)
+ li 8,0x10
+ std 26,400(1)
+ li 26,0x20
+ std 27,408(1)
+ li 27,0x30
+ std 28,416(1)
+ li 28,0x40
+ std 29,424(1)
+ li 29,0x50
+ std 30,432(1)
+ li 30,0x60
+ std 31,440(1)
+ li 31,0x70
+ mtspr 256,0
+
+ subi 9,9,3
+
+ lvx 23,0,6
+ lvx 30,8,6
+ addi 6,6,0x20
+ lvx 31,0,6
+ vperm 23,30,23,10
+ addi 11,1,64+15
+ mtctr 9
+
+Load_ctr32_enc_key:
+ vperm 24,31,30,10
+ lvx 30,8,6
+ addi 6,6,0x20
+ stvx 24,0,11
+ vperm 25,30,31,10
+ lvx 31,0,6
+ stvx 25,8,11
+ addi 11,11,0x20
+ bc 16,0,Load_ctr32_enc_key
+
+ lvx 26,8,6
+ vperm 24,31,30,10
+ lvx 27,26,6
+ stvx 24,0,11
+ vperm 25,26,31,10
+ lvx 28,27,6
+ stvx 25,8,11
+ addi 11,1,64+15
+ vperm 26,27,26,10
+ lvx 29,28,6
+ vperm 27,28,27,10
+ lvx 30,29,6
+ vperm 28,29,28,10
+ lvx 31,30,6
+ vperm 29,30,29,10
+ lvx 15,31,6
+ vperm 30,31,30,10
+ lvx 24,0,11
+ vperm 31,15,31,10
+ lvx 25,8,11
+
+ vadduwm 7,11,11
+ subi 3,3,15
+ sldi 5,5,4
+
+ vadduwm 16,4,11
+ vadduwm 17,4,7
+ vxor 15,4,23
+ li 10,8
+ vadduwm 18,16,7
+ vxor 16,16,23
+ lvsl 6,0,10
+ vadduwm 19,17,7
+ vxor 17,17,23
+ vspltisb 3,0x0f
+ vadduwm 20,18,7
+ vxor 18,18,23
+ vxor 6,6,3
+ vadduwm 21,19,7
+ vxor 19,19,23
+ vadduwm 22,20,7
+ vxor 20,20,23
+ vadduwm 4,21,7
+ vxor 21,21,23
+ vxor 22,22,23
+
+ mtctr 9
+ b Loop_ctr32_enc8x
+.align 5
+Loop_ctr32_enc8x:
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+Loop_ctr32_enc8x_middle:
+ lvx 24,26,11
+ addi 11,11,0x20
+
+ .long 0x11EFCD08
+ .long 0x1210CD08
+ .long 0x1231CD08
+ .long 0x1252CD08
+ .long 0x1273CD08
+ .long 0x1294CD08
+ .long 0x12B5CD08
+ .long 0x12D6CD08
+ lvx 25,8,11
+ bc 16,0,Loop_ctr32_enc8x
+
+ subic 11,5,256
+ .long 0x11EFC508
+ .long 0x1210C508
+ .long 0x1231C508
+ .long 0x1252C508
+ .long 0x1273C508
+ .long 0x1294C508
+ .long 0x12B5C508
+ .long 0x12D6C508
+
+ subfe 0,0,0
+ .long 0x11EFCD08
+ .long 0x1210CD08
+ .long 0x1231CD08
+ .long 0x1252CD08
+ .long 0x1273CD08
+ .long 0x1294CD08
+ .long 0x12B5CD08
+ .long 0x12D6CD08
+
+ and 0,0,11
+ addi 11,1,64+15
+ .long 0x11EFD508
+ .long 0x1210D508
+ .long 0x1231D508
+ .long 0x1252D508
+ .long 0x1273D508
+ .long 0x1294D508
+ .long 0x12B5D508
+ .long 0x12D6D508
+ lvx 24,0,11
+
+ subic 5,5,129
+ .long 0x11EFDD08
+ addi 5,5,1
+ .long 0x1210DD08
+ .long 0x1231DD08
+ .long 0x1252DD08
+ .long 0x1273DD08
+ .long 0x1294DD08
+ .long 0x12B5DD08
+ .long 0x12D6DD08
+ lvx 25,8,11
+
+ .long 0x11EFE508
+ .long 0x7C001E99
+ .long 0x1210E508
+ .long 0x7C281E99
+ .long 0x1231E508
+ .long 0x7C5A1E99
+ .long 0x1252E508
+ .long 0x7C7B1E99
+ .long 0x1273E508
+ .long 0x7D5C1E99
+ .long 0x1294E508
+ .long 0x7D9D1E99
+ .long 0x12B5E508
+ .long 0x7DBE1E99
+ .long 0x12D6E508
+ .long 0x7DDF1E99
+ addi 3,3,0x80
+
+ .long 0x11EFED08
+ vperm 0,0,0,6
+ .long 0x1210ED08
+ vperm 1,1,1,6
+ .long 0x1231ED08
+ vperm 2,2,2,6
+ .long 0x1252ED08
+ vperm 3,3,3,6
+ .long 0x1273ED08
+ vperm 10,10,10,6
+ .long 0x1294ED08
+ vperm 12,12,12,6
+ .long 0x12B5ED08
+ vperm 13,13,13,6
+ .long 0x12D6ED08
+ vperm 14,14,14,6
+
+ add 3,3,0
+
+
+
+ subfe. 0,0,0
+ .long 0x11EFF508
+ vxor 0,0,31
+ .long 0x1210F508
+ vxor 1,1,31
+ .long 0x1231F508
+ vxor 2,2,31
+ .long 0x1252F508
+ vxor 3,3,31
+ .long 0x1273F508
+ vxor 10,10,31
+ .long 0x1294F508
+ vxor 12,12,31
+ .long 0x12B5F508
+ vxor 13,13,31
+ .long 0x12D6F508
+ vxor 14,14,31
+
+ bne Lctr32_enc8x_break
+
+ .long 0x100F0509
+ .long 0x10300D09
+ vadduwm 16,4,11
+ .long 0x10511509
+ vadduwm 17,4,7
+ vxor 15,4,23
+ .long 0x10721D09
+ vadduwm 18,16,7
+ vxor 16,16,23
+ .long 0x11535509
+ vadduwm 19,17,7
+ vxor 17,17,23
+ .long 0x11946509
+ vadduwm 20,18,7
+ vxor 18,18,23
+ .long 0x11B56D09
+ vadduwm 21,19,7
+ vxor 19,19,23
+ .long 0x11D67509
+ vadduwm 22,20,7
+ vxor 20,20,23
+ vperm 0,0,0,6
+ vadduwm 4,21,7
+ vxor 21,21,23
+ vperm 1,1,1,6
+ vxor 22,22,23
+ mtctr 9
+
+ .long 0x11EFC508
+ .long 0x7C002799
+ vperm 2,2,2,6
+ .long 0x1210C508
+ .long 0x7C282799
+ vperm 3,3,3,6
+ .long 0x1231C508
+ .long 0x7C5A2799
+ vperm 10,10,10,6
+ .long 0x1252C508
+ .long 0x7C7B2799
+ vperm 12,12,12,6
+ .long 0x1273C508
+ .long 0x7D5C2799
+ vperm 13,13,13,6
+ .long 0x1294C508
+ .long 0x7D9D2799
+ vperm 14,14,14,6
+ .long 0x12B5C508
+ .long 0x7DBE2799
+ .long 0x12D6C508
+ .long 0x7DDF2799
+ addi 4,4,0x80
+
+ b Loop_ctr32_enc8x_middle
+
+.align 5
+Lctr32_enc8x_break:
+ cmpwi 5,-0x60
+ blt Lctr32_enc8x_one
+ nop
+ beq Lctr32_enc8x_two
+ cmpwi 5,-0x40
+ blt Lctr32_enc8x_three
+ nop
+ beq Lctr32_enc8x_four
+ cmpwi 5,-0x20
+ blt Lctr32_enc8x_five
+ nop
+ beq Lctr32_enc8x_six
+ cmpwi 5,0x00
+ blt Lctr32_enc8x_seven
+
+Lctr32_enc8x_eight:
+ .long 0x11EF0509
+ .long 0x12100D09
+ .long 0x12311509
+ .long 0x12521D09
+ .long 0x12735509
+ .long 0x12946509
+ .long 0x12B56D09
+ .long 0x12D67509
+
+ vperm 15,15,15,6
+ vperm 16,16,16,6
+ .long 0x7DE02799
+ vperm 17,17,17,6
+ .long 0x7E082799
+ vperm 18,18,18,6
+ .long 0x7E3A2799
+ vperm 19,19,19,6
+ .long 0x7E5B2799
+ vperm 20,20,20,6
+ .long 0x7E7C2799
+ vperm 21,21,21,6
+ .long 0x7E9D2799
+ vperm 22,22,22,6
+ .long 0x7EBE2799
+ .long 0x7EDF2799
+ addi 4,4,0x80
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_seven:
+ .long 0x11EF0D09
+ .long 0x12101509
+ .long 0x12311D09
+ .long 0x12525509
+ .long 0x12736509
+ .long 0x12946D09
+ .long 0x12B57509
+
+ vperm 15,15,15,6
+ vperm 16,16,16,6
+ .long 0x7DE02799
+ vperm 17,17,17,6
+ .long 0x7E082799
+ vperm 18,18,18,6
+ .long 0x7E3A2799
+ vperm 19,19,19,6
+ .long 0x7E5B2799
+ vperm 20,20,20,6
+ .long 0x7E7C2799
+ vperm 21,21,21,6
+ .long 0x7E9D2799
+ .long 0x7EBE2799
+ addi 4,4,0x70
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_six:
+ .long 0x11EF1509
+ .long 0x12101D09
+ .long 0x12315509
+ .long 0x12526509
+ .long 0x12736D09
+ .long 0x12947509
+
+ vperm 15,15,15,6
+ vperm 16,16,16,6
+ .long 0x7DE02799
+ vperm 17,17,17,6
+ .long 0x7E082799
+ vperm 18,18,18,6
+ .long 0x7E3A2799
+ vperm 19,19,19,6
+ .long 0x7E5B2799
+ vperm 20,20,20,6
+ .long 0x7E7C2799
+ .long 0x7E9D2799
+ addi 4,4,0x60
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_five:
+ .long 0x11EF1D09
+ .long 0x12105509
+ .long 0x12316509
+ .long 0x12526D09
+ .long 0x12737509
+
+ vperm 15,15,15,6
+ vperm 16,16,16,6
+ .long 0x7DE02799
+ vperm 17,17,17,6
+ .long 0x7E082799
+ vperm 18,18,18,6
+ .long 0x7E3A2799
+ vperm 19,19,19,6
+ .long 0x7E5B2799
+ .long 0x7E7C2799
+ addi 4,4,0x50
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_four:
+ .long 0x11EF5509
+ .long 0x12106509
+ .long 0x12316D09
+ .long 0x12527509
+
+ vperm 15,15,15,6
+ vperm 16,16,16,6
+ .long 0x7DE02799
+ vperm 17,17,17,6
+ .long 0x7E082799
+ vperm 18,18,18,6
+ .long 0x7E3A2799
+ .long 0x7E5B2799
+ addi 4,4,0x40
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_three:
+ .long 0x11EF6509
+ .long 0x12106D09
+ .long 0x12317509
+
+ vperm 15,15,15,6
+ vperm 16,16,16,6
+ .long 0x7DE02799
+ vperm 17,17,17,6
+ .long 0x7E082799
+ .long 0x7E3A2799
+ addi 4,4,0x30
+ b Lcbc_dec8x_done
+
+.align 5
+Lctr32_enc8x_two:
+ .long 0x11EF6D09
+ .long 0x12107509
+
+ vperm 15,15,15,6
+ vperm 16,16,16,6
+ .long 0x7DE02799
+ .long 0x7E082799
+ addi 4,4,0x20
+ b Lcbc_dec8x_done
+
+.align 5
+Lctr32_enc8x_one:
+ .long 0x11EF7509
+
+ vperm 15,15,15,6
+ .long 0x7DE02799
+ addi 4,4,0x10
+
+Lctr32_enc8x_done:
+ li 10,79
+ li 11,95
+ stvx 6,10,1
+ addi 10,10,32
+ stvx 6,11,1
+ addi 11,11,32
+ stvx 6,10,1
+ addi 10,10,32
+ stvx 6,11,1
+ addi 11,11,32
+ stvx 6,10,1
+ addi 10,10,32
+ stvx 6,11,1
+ addi 11,11,32
+ stvx 6,10,1
+ addi 10,10,32
+ stvx 6,11,1
+ addi 11,11,32
+
+ mtspr 256,12
+ lvx 20,10,1
+ addi 10,10,32
+ lvx 21,11,1
+ addi 11,11,32
+ lvx 22,10,1
+ addi 10,10,32
+ lvx 23,11,1
+ addi 11,11,32
+ lvx 24,10,1
+ addi 10,10,32
+ lvx 25,11,1
+ addi 11,11,32
+ lvx 26,10,1
+ addi 10,10,32
+ lvx 27,11,1
+ addi 11,11,32
+ lvx 28,10,1
+ addi 10,10,32
+ lvx 29,11,1
+ addi 11,11,32
+ lvx 30,10,1
+ lvx 31,11,1
+ ld 26,400(1)
+ ld 27,408(1)
+ ld 28,416(1)
+ ld 29,424(1)
+ ld 30,432(1)
+ ld 31,440(1)
+ addi 1,1,448
+ blr
+.long 0
+.byte 0,12,0x04,0,0x80,6,6,0
+.long 0
+
+.globl aes_hw_xts_encrypt
+.align 5
+aes_hw_xts_encrypt:
+ mr 10,3
+ li 3,-1
+ cmpldi 5,16
+ bclr 14,0
+
+ lis 0,0xfff0
+ mfspr 12,256
+ li 11,0
+ mtspr 256,0
+
+ vspltisb 9,0x07
+ lvsl 6,11,11
+ vspltisb 11,0x0f
+ vxor 6,6,9
+
+ li 3,15
+ lvx 8,0,8
+ lvsl 5,0,8
+ lvx 4,3,8
+ vxor 5,5,11
+ vperm 8,8,4,5
+
+ neg 11,10
+ lvsr 5,0,11
+ lvx 2,0,10
+ addi 10,10,15
+ vxor 5,5,11
+
+ cmpldi 7,0
+ beq Lxts_enc_no_key2
+
+ lvsr 7,0,7
+ lwz 9,240(7)
+ srwi 9,9,1
+ subi 9,9,1
+ li 3,16
+
+ lvx 0,0,7
+ lvx 1,3,7
+ addi 3,3,16
+ vperm 0,1,0,7
+ vxor 8,8,0
+ lvx 0,3,7
+ addi 3,3,16
+ mtctr 9
+
+Ltweak_xts_enc:
+ vperm 1,0,1,7
+ .long 0x11080D08
+ lvx 1,3,7
+ addi 3,3,16
+ vperm 0,1,0,7
+ .long 0x11080508
+ lvx 0,3,7
+ addi 3,3,16
+ bc 16,0,Ltweak_xts_enc
+
+ vperm 1,0,1,7
+ .long 0x11080D08
+ lvx 1,3,7
+ vperm 0,1,0,7
+ .long 0x11080509
+
+ li 8,0
+ b Lxts_enc
+
+Lxts_enc_no_key2:
+ li 3,-16
+ and 5,5,3
+
+
+Lxts_enc:
+ lvx 4,0,10
+ addi 10,10,16
+
+ lvsr 7,0,6
+ lwz 9,240(6)
+ srwi 9,9,1
+ subi 9,9,1
+ li 3,16
+
+ vslb 10,9,9
+ vor 10,10,9
+ vspltisb 11,1
+ vsldoi 10,10,11,15
+
+ cmpldi 5,96
+ bge _aesp8_xts_encrypt6x
+
+ andi. 7,5,15
+ subic 0,5,32
+ subi 7,7,16
+ subfe 0,0,0
+ and 0,0,7
+ add 10,10,0
+
+ lvx 0,0,6
+ lvx 1,3,6
+ addi 3,3,16
+ vperm 2,2,4,5
+ vperm 0,1,0,7
+ vxor 2,2,8
+ vxor 2,2,0
+ lvx 0,3,6
+ addi 3,3,16
+ mtctr 9
+ b Loop_xts_enc
+
+.align 5
+Loop_xts_enc:
+ vperm 1,0,1,7
+ .long 0x10420D08
+ lvx 1,3,6
+ addi 3,3,16
+ vperm 0,1,0,7
+ .long 0x10420508
+ lvx 0,3,6
+ addi 3,3,16
+ bc 16,0,Loop_xts_enc
+
+ vperm 1,0,1,7
+ .long 0x10420D08
+ lvx 1,3,6
+ li 3,16
+ vperm 0,1,0,7
+ vxor 0,0,8
+ .long 0x10620509
+
+ vperm 11,3,3,6
+
+ .long 0x7D602799
+
+ addi 4,4,16
+
+ subic. 5,5,16
+ beq Lxts_enc_done
+
+ vor 2,4,4
+ lvx 4,0,10
+ addi 10,10,16
+ lvx 0,0,6
+ lvx 1,3,6
+ addi 3,3,16
+
+ subic 0,5,32
+ subfe 0,0,0
+ and 0,0,7
+ add 10,10,0
+
+ vsrab 11,8,9
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ vand 11,11,10
+ vxor 8,8,11
+
+ vperm 2,2,4,5
+ vperm 0,1,0,7
+ vxor 2,2,8
+ vxor 3,3,0
+ vxor 2,2,0
+ lvx 0,3,6
+ addi 3,3,16
+
+ mtctr 9
+ cmpldi 5,16
+ bge Loop_xts_enc
+
+ vxor 3,3,8
+ lvsr 5,0,5
+ vxor 4,4,4
+ vspltisb 11,-1
+ vperm 4,4,11,5
+ vsel 2,2,3,4
+
+ subi 11,4,17
+ subi 4,4,16
+ mtctr 5
+ li 5,16
+Loop_xts_enc_steal:
+ lbzu 0,1(11)
+ stb 0,16(11)
+ bc 16,0,Loop_xts_enc_steal
+
+ mtctr 9
+ b Loop_xts_enc
+
+Lxts_enc_done:
+ cmpldi 8,0
+ beq Lxts_enc_ret
+
+ vsrab 11,8,9
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ vand 11,11,10
+ vxor 8,8,11
+
+ vperm 8,8,8,6
+ .long 0x7D004799
+
+Lxts_enc_ret:
+ mtspr 256,12
+ li 3,0
+ blr
+.long 0
+.byte 0,12,0x04,0,0x80,6,6,0
+.long 0
+
+
+.globl aes_hw_xts_decrypt
+.align 5
+aes_hw_xts_decrypt:
+ mr 10,3
+ li 3,-1
+ cmpldi 5,16
+ bclr 14,0
+
+ lis 0,0xfff8
+ mfspr 12,256
+ li 11,0
+ mtspr 256,0
+
+ andi. 0,5,15
+ neg 0,0
+ andi. 0,0,16
+ sub 5,5,0
+
+ vspltisb 9,0x07
+ lvsl 6,11,11
+ vspltisb 11,0x0f
+ vxor 6,6,9
+
+ li 3,15
+ lvx 8,0,8
+ lvsl 5,0,8
+ lvx 4,3,8
+ vxor 5,5,11
+ vperm 8,8,4,5
+
+ neg 11,10
+ lvsr 5,0,11
+ lvx 2,0,10
+ addi 10,10,15
+ vxor 5,5,11
+
+ cmpldi 7,0
+ beq Lxts_dec_no_key2
+
+ lvsr 7,0,7
+ lwz 9,240(7)
+ srwi 9,9,1
+ subi 9,9,1
+ li 3,16
+
+ lvx 0,0,7
+ lvx 1,3,7
+ addi 3,3,16
+ vperm 0,1,0,7
+ vxor 8,8,0
+ lvx 0,3,7
+ addi 3,3,16
+ mtctr 9
+
+Ltweak_xts_dec:
+ vperm 1,0,1,7
+ .long 0x11080D08
+ lvx 1,3,7
+ addi 3,3,16
+ vperm 0,1,0,7
+ .long 0x11080508
+ lvx 0,3,7
+ addi 3,3,16
+ bc 16,0,Ltweak_xts_dec
+
+ vperm 1,0,1,7
+ .long 0x11080D08
+ lvx 1,3,7
+ vperm 0,1,0,7
+ .long 0x11080509
+
+ li 8,0
+ b Lxts_dec
+
+Lxts_dec_no_key2:
+ neg 3,5
+ andi. 3,3,15
+ add 5,5,3
+
+
+Lxts_dec:
+ lvx 4,0,10
+ addi 10,10,16
+
+ lvsr 7,0,6
+ lwz 9,240(6)
+ srwi 9,9,1
+ subi 9,9,1
+ li 3,16
+
+ vslb 10,9,9
+ vor 10,10,9
+ vspltisb 11,1
+ vsldoi 10,10,11,15
+
+ cmpldi 5,96
+ bge _aesp8_xts_decrypt6x
+
+ lvx 0,0,6
+ lvx 1,3,6
+ addi 3,3,16
+ vperm 2,2,4,5
+ vperm 0,1,0,7
+ vxor 2,2,8
+ vxor 2,2,0
+ lvx 0,3,6
+ addi 3,3,16
+ mtctr 9
+
+ cmpldi 5,16
+ blt Ltail_xts_dec
+
+
+.align 5
+Loop_xts_dec:
+ vperm 1,0,1,7
+ .long 0x10420D48
+ lvx 1,3,6
+ addi 3,3,16
+ vperm 0,1,0,7
+ .long 0x10420548
+ lvx 0,3,6
+ addi 3,3,16
+ bc 16,0,Loop_xts_dec
+
+ vperm 1,0,1,7
+ .long 0x10420D48
+ lvx 1,3,6
+ li 3,16
+ vperm 0,1,0,7
+ vxor 0,0,8
+ .long 0x10620549
+
+ vperm 11,3,3,6
+
+ .long 0x7D602799
+
+ addi 4,4,16
+
+ subic. 5,5,16
+ beq Lxts_dec_done
+
+ vor 2,4,4
+ lvx 4,0,10
+ addi 10,10,16
+ lvx 0,0,6
+ lvx 1,3,6
+ addi 3,3,16
+
+ vsrab 11,8,9
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ vand 11,11,10
+ vxor 8,8,11
+
+ vperm 2,2,4,5
+ vperm 0,1,0,7
+ vxor 2,2,8
+ vxor 2,2,0
+ lvx 0,3,6
+ addi 3,3,16
+
+ mtctr 9
+ cmpldi 5,16
+ bge Loop_xts_dec
+
+Ltail_xts_dec:
+ vsrab 11,8,9
+ vaddubm 12,8,8
+ vsldoi 11,11,11,15
+ vand 11,11,10
+ vxor 12,12,11
+
+ subi 10,10,16
+ add 10,10,5
+
+ vxor 2,2,8
+ vxor 2,2,12
+
+Loop_xts_dec_short:
+ vperm 1,0,1,7
+ .long 0x10420D48
+ lvx 1,3,6
+ addi 3,3,16
+ vperm 0,1,0,7
+ .long 0x10420548
+ lvx 0,3,6
+ addi 3,3,16
+ bc 16,0,Loop_xts_dec_short
+
+ vperm 1,0,1,7
+ .long 0x10420D48
+ lvx 1,3,6
+ li 3,16
+ vperm 0,1,0,7
+ vxor 0,0,12
+ .long 0x10620549
+
+ vperm 11,3,3,6
+
+ .long 0x7D602799
+
+
+ vor 2,4,4
+ lvx 4,0,10
+
+ lvx 0,0,6
+ lvx 1,3,6
+ addi 3,3,16
+ vperm 2,2,4,5
+ vperm 0,1,0,7
+
+ lvsr 5,0,5
+ vxor 4,4,4
+ vspltisb 11,-1
+ vperm 4,4,11,5
+ vsel 2,2,3,4
+
+ vxor 0,0,8
+ vxor 2,2,0
+ lvx 0,3,6
+ addi 3,3,16
+
+ subi 11,4,1
+ mtctr 5
+ li 5,16
+Loop_xts_dec_steal:
+ lbzu 0,1(11)
+ stb 0,16(11)
+ bc 16,0,Loop_xts_dec_steal
+
+ mtctr 9
+ b Loop_xts_dec
+
+Lxts_dec_done:
+ cmpldi 8,0
+ beq Lxts_dec_ret
+
+ vsrab 11,8,9
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ vand 11,11,10
+ vxor 8,8,11
+
+ vperm 8,8,8,6
+ .long 0x7D004799
+
+Lxts_dec_ret:
+ mtspr 256,12
+ li 3,0
+ blr
+.long 0
+.byte 0,12,0x04,0,0x80,6,6,0
+.long 0
+
+.align 5
+_aesp8_xts_encrypt6x:
+ stdu 1,-448(1)
+ mflr 11
+ li 7,207
+ li 3,223
+ std 11,464(1)
+ stvx 20,7,1
+ addi 7,7,32
+ stvx 21,3,1
+ addi 3,3,32
+ stvx 22,7,1
+ addi 7,7,32
+ stvx 23,3,1
+ addi 3,3,32
+ stvx 24,7,1
+ addi 7,7,32
+ stvx 25,3,1
+ addi 3,3,32
+ stvx 26,7,1
+ addi 7,7,32
+ stvx 27,3,1
+ addi 3,3,32
+ stvx 28,7,1
+ addi 7,7,32
+ stvx 29,3,1
+ addi 3,3,32
+ stvx 30,7,1
+ stvx 31,3,1
+ li 0,-1
+ stw 12,396(1)
+ li 3,0x10
+ std 26,400(1)
+ li 26,0x20
+ std 27,408(1)
+ li 27,0x30
+ std 28,416(1)
+ li 28,0x40
+ std 29,424(1)
+ li 29,0x50
+ std 30,432(1)
+ li 30,0x60
+ std 31,440(1)
+ li 31,0x70
+ mtspr 256,0
+
+ subi 9,9,3
+
+ lvx 23,0,6
+ lvx 30,3,6
+ addi 6,6,0x20
+ lvx 31,0,6
+ vperm 23,30,23,7
+ addi 7,1,64+15
+ mtctr 9
+
+Load_xts_enc_key:
+ vperm 24,31,30,7
+ lvx 30,3,6
+ addi 6,6,0x20
+ stvx 24,0,7
+ vperm 25,30,31,7
+ lvx 31,0,6
+ stvx 25,3,7
+ addi 7,7,0x20
+ bc 16,0,Load_xts_enc_key
+
+ lvx 26,3,6
+ vperm 24,31,30,7
+ lvx 27,26,6
+ stvx 24,0,7
+ vperm 25,26,31,7
+ lvx 28,27,6
+ stvx 25,3,7
+ addi 7,1,64+15
+ vperm 26,27,26,7
+ lvx 29,28,6
+ vperm 27,28,27,7
+ lvx 30,29,6
+ vperm 28,29,28,7
+ lvx 31,30,6
+ vperm 29,30,29,7
+ lvx 22,31,6
+ vperm 30,31,30,7
+ lvx 24,0,7
+ vperm 31,22,31,7
+ lvx 25,3,7
+
+ vperm 0,2,4,5
+ subi 10,10,31
+ vxor 17,8,23
+ vsrab 11,8,9
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ vand 11,11,10
+ vxor 7,0,17
+ vxor 8,8,11
+
+ .long 0x7C235699
+ vxor 18,8,23
+ vsrab 11,8,9
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ vperm 1,1,1,6
+ vand 11,11,10
+ vxor 12,1,18
+ vxor 8,8,11
+
+ .long 0x7C5A5699
+ andi. 31,5,15
+ vxor 19,8,23
+ vsrab 11,8,9
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ vperm 2,2,2,6
+ vand 11,11,10
+ vxor 13,2,19
+ vxor 8,8,11
+
+ .long 0x7C7B5699
+ sub 5,5,31
+ vxor 20,8,23
+ vsrab 11,8,9
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ vperm 3,3,3,6
+ vand 11,11,10
+ vxor 14,3,20
+ vxor 8,8,11
+
+ .long 0x7C9C5699
+ subi 5,5,0x60
+ vxor 21,8,23
+ vsrab 11,8,9
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ vperm 4,4,4,6
+ vand 11,11,10
+ vxor 15,4,21
+ vxor 8,8,11
+
+ .long 0x7CBD5699
+ addi 10,10,0x60
+ vxor 22,8,23
+ vsrab 11,8,9
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ vperm 5,5,5,6
+ vand 11,11,10
+ vxor 16,5,22
+ vxor 8,8,11
+
+ vxor 31,31,23
+ mtctr 9
+ b Loop_xts_enc6x
+
+.align 5
+Loop_xts_enc6x:
+ .long 0x10E7C508
+ .long 0x118CC508
+ .long 0x11ADC508
+ .long 0x11CEC508
+ .long 0x11EFC508
+ .long 0x1210C508
+ lvx 24,26,7
+ addi 7,7,0x20
+
+ .long 0x10E7CD08
+ .long 0x118CCD08
+ .long 0x11ADCD08
+ .long 0x11CECD08
+ .long 0x11EFCD08
+ .long 0x1210CD08
+ lvx 25,3,7
+ bc 16,0,Loop_xts_enc6x
+
+ subic 5,5,96
+ vxor 0,17,31
+ .long 0x10E7C508
+ .long 0x118CC508
+ vsrab 11,8,9
+ vxor 17,8,23
+ vaddubm 8,8,8
+ .long 0x11ADC508
+ .long 0x11CEC508
+ vsldoi 11,11,11,15
+ .long 0x11EFC508
+ .long 0x1210C508
+
+ subfe. 0,0,0
+ vand 11,11,10
+ .long 0x10E7CD08
+ .long 0x118CCD08
+ vxor 8,8,11
+ .long 0x11ADCD08
+ .long 0x11CECD08
+ vxor 1,18,31
+ vsrab 11,8,9
+ vxor 18,8,23
+ .long 0x11EFCD08
+ .long 0x1210CD08
+
+ and 0,0,5
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ .long 0x10E7D508
+ .long 0x118CD508
+ vand 11,11,10
+ .long 0x11ADD508
+ .long 0x11CED508
+ vxor 8,8,11
+ .long 0x11EFD508
+ .long 0x1210D508
+
+ add 10,10,0
+
+
+
+ vxor 2,19,31
+ vsrab 11,8,9
+ vxor 19,8,23
+ vaddubm 8,8,8
+ .long 0x10E7DD08
+ .long 0x118CDD08
+ vsldoi 11,11,11,15
+ .long 0x11ADDD08
+ .long 0x11CEDD08
+ vand 11,11,10
+ .long 0x11EFDD08
+ .long 0x1210DD08
+
+ addi 7,1,64+15
+ vxor 8,8,11
+ .long 0x10E7E508
+ .long 0x118CE508
+ vxor 3,20,31
+ vsrab 11,8,9
+ vxor 20,8,23
+ .long 0x11ADE508
+ .long 0x11CEE508
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ .long 0x11EFE508
+ .long 0x1210E508
+ lvx 24,0,7
+ vand 11,11,10
+
+ .long 0x10E7ED08
+ .long 0x118CED08
+ vxor 8,8,11
+ .long 0x11ADED08
+ .long 0x11CEED08
+ vxor 4,21,31
+ vsrab 11,8,9
+ vxor 21,8,23
+ .long 0x11EFED08
+ .long 0x1210ED08
+ lvx 25,3,7
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+
+ .long 0x10E7F508
+ .long 0x118CF508
+ vand 11,11,10
+ .long 0x11ADF508
+ .long 0x11CEF508
+ vxor 8,8,11
+ .long 0x11EFF508
+ .long 0x1210F508
+ vxor 5,22,31
+ vsrab 11,8,9
+ vxor 22,8,23
+
+ .long 0x10E70509
+ .long 0x7C005699
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ .long 0x118C0D09
+ .long 0x7C235699
+ .long 0x11AD1509
+ vperm 0,0,0,6
+ .long 0x7C5A5699
+ vand 11,11,10
+ .long 0x11CE1D09
+ vperm 1,1,1,6
+ .long 0x7C7B5699
+ .long 0x11EF2509
+ vperm 2,2,2,6
+ .long 0x7C9C5699
+ vxor 8,8,11
+ .long 0x11702D09
+
+ vperm 3,3,3,6
+ .long 0x7CBD5699
+ addi 10,10,0x60
+ vperm 4,4,4,6
+ vperm 5,5,5,6
+
+ vperm 7,7,7,6
+ vperm 12,12,12,6
+ .long 0x7CE02799
+ vxor 7,0,17
+ vperm 13,13,13,6
+ .long 0x7D832799
+ vxor 12,1,18
+ vperm 14,14,14,6
+ .long 0x7DBA2799
+ vxor 13,2,19
+ vperm 15,15,15,6
+ .long 0x7DDB2799
+ vxor 14,3,20
+ vperm 16,11,11,6
+ .long 0x7DFC2799
+ vxor 15,4,21
+ .long 0x7E1D2799
+
+ vxor 16,5,22
+ addi 4,4,0x60
+
+ mtctr 9
+ beq Loop_xts_enc6x
+
+ addic. 5,5,0x60
+ beq Lxts_enc6x_zero
+ cmpwi 5,0x20
+ blt Lxts_enc6x_one
+ nop
+ beq Lxts_enc6x_two
+ cmpwi 5,0x40
+ blt Lxts_enc6x_three
+ nop
+ beq Lxts_enc6x_four
+
+Lxts_enc6x_five:
+ vxor 7,1,17
+ vxor 12,2,18
+ vxor 13,3,19
+ vxor 14,4,20
+ vxor 15,5,21
+
+ bl _aesp8_xts_enc5x
+
+ vperm 7,7,7,6
+ vor 17,22,22
+ vperm 12,12,12,6
+ .long 0x7CE02799
+ vperm 13,13,13,6
+ .long 0x7D832799
+ vperm 14,14,14,6
+ .long 0x7DBA2799
+ vxor 11,15,22
+ vperm 15,15,15,6
+ .long 0x7DDB2799
+ .long 0x7DFC2799
+ addi 4,4,0x50
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_four:
+ vxor 7,2,17
+ vxor 12,3,18
+ vxor 13,4,19
+ vxor 14,5,20
+ vxor 15,15,15
+
+ bl _aesp8_xts_enc5x
+
+ vperm 7,7,7,6
+ vor 17,21,21
+ vperm 12,12,12,6
+ .long 0x7CE02799
+ vperm 13,13,13,6
+ .long 0x7D832799
+ vxor 11,14,21
+ vperm 14,14,14,6
+ .long 0x7DBA2799
+ .long 0x7DDB2799
+ addi 4,4,0x40
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_three:
+ vxor 7,3,17
+ vxor 12,4,18
+ vxor 13,5,19
+ vxor 14,14,14
+ vxor 15,15,15
+
+ bl _aesp8_xts_enc5x
+
+ vperm 7,7,7,6
+ vor 17,20,20
+ vperm 12,12,12,6
+ .long 0x7CE02799
+ vxor 11,13,20
+ vperm 13,13,13,6
+ .long 0x7D832799
+ .long 0x7DBA2799
+ addi 4,4,0x30
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_two:
+ vxor 7,4,17
+ vxor 12,5,18
+ vxor 13,13,13
+ vxor 14,14,14
+ vxor 15,15,15
+
+ bl _aesp8_xts_enc5x
+
+ vperm 7,7,7,6
+ vor 17,19,19
+ vxor 11,12,19
+ vperm 12,12,12,6
+ .long 0x7CE02799
+ .long 0x7D832799
+ addi 4,4,0x20
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_one:
+ vxor 7,5,17
+ nop
+Loop_xts_enc1x:
+ .long 0x10E7C508
+ lvx 24,26,7
+ addi 7,7,0x20
+
+ .long 0x10E7CD08
+ lvx 25,3,7
+ bc 16,0,Loop_xts_enc1x
+
+ add 10,10,31
+ cmpwi 31,0
+ .long 0x10E7C508
+
+ subi 10,10,16
+ .long 0x10E7CD08
+
+ lvsr 5,0,31
+ .long 0x10E7D508
+
+ .long 0x7C005699
+ .long 0x10E7DD08
+
+ addi 7,1,64+15
+ .long 0x10E7E508
+ lvx 24,0,7
+
+ .long 0x10E7ED08
+ lvx 25,3,7
+ vxor 17,17,31
+
+ vperm 0,0,0,6
+ .long 0x10E7F508
+
+ vperm 0,0,0,5
+ .long 0x10E78D09
+
+ vor 17,18,18
+ vxor 11,7,18
+ vperm 7,7,7,6
+ .long 0x7CE02799
+ addi 4,4,0x10
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_zero:
+ cmpwi 31,0
+ beq Lxts_enc6x_done
+
+ add 10,10,31
+ subi 10,10,16
+ .long 0x7C005699
+ lvsr 5,0,31
+ vperm 0,0,0,6
+ vperm 0,0,0,5
+ vxor 11,11,17
+Lxts_enc6x_steal:
+ vxor 0,0,17
+ vxor 7,7,7
+ vspltisb 12,-1
+ vperm 7,7,12,5
+ vsel 7,0,11,7
+
+ subi 30,4,17
+ subi 4,4,16
+ mtctr 31
+Loop_xts_enc6x_steal:
+ lbzu 0,1(30)
+ stb 0,16(30)
+ bc 16,0,Loop_xts_enc6x_steal
+
+ li 31,0
+ mtctr 9
+ b Loop_xts_enc1x
+
+.align 4
+Lxts_enc6x_done:
+ cmpldi 8,0
+ beq Lxts_enc6x_ret
+
+ vxor 8,17,23
+ vperm 8,8,8,6
+ .long 0x7D004799
+
+Lxts_enc6x_ret:
+ mtlr 11
+ li 10,79
+ li 11,95
+ stvx 9,10,1
+ addi 10,10,32
+ stvx 9,11,1
+ addi 11,11,32
+ stvx 9,10,1
+ addi 10,10,32
+ stvx 9,11,1
+ addi 11,11,32
+ stvx 9,10,1
+ addi 10,10,32
+ stvx 9,11,1
+ addi 11,11,32
+ stvx 9,10,1
+ addi 10,10,32
+ stvx 9,11,1
+ addi 11,11,32
+
+ mtspr 256,12
+ lvx 20,10,1
+ addi 10,10,32
+ lvx 21,11,1
+ addi 11,11,32
+ lvx 22,10,1
+ addi 10,10,32
+ lvx 23,11,1
+ addi 11,11,32
+ lvx 24,10,1
+ addi 10,10,32
+ lvx 25,11,1
+ addi 11,11,32
+ lvx 26,10,1
+ addi 10,10,32
+ lvx 27,11,1
+ addi 11,11,32
+ lvx 28,10,1
+ addi 10,10,32
+ lvx 29,11,1
+ addi 11,11,32
+ lvx 30,10,1
+ lvx 31,11,1
+ ld 26,400(1)
+ ld 27,408(1)
+ ld 28,416(1)
+ ld 29,424(1)
+ ld 30,432(1)
+ ld 31,440(1)
+ addi 1,1,448
+ blr
+.long 0
+.byte 0,12,0x04,1,0x80,6,6,0
+.long 0
+
+.align 5
+_aesp8_xts_enc5x:
+ .long 0x10E7C508
+ .long 0x118CC508
+ .long 0x11ADC508
+ .long 0x11CEC508
+ .long 0x11EFC508
+ lvx 24,26,7
+ addi 7,7,0x20
+
+ .long 0x10E7CD08
+ .long 0x118CCD08
+ .long 0x11ADCD08
+ .long 0x11CECD08
+ .long 0x11EFCD08
+ lvx 25,3,7
+ bc 16,0,_aesp8_xts_enc5x
+
+ add 10,10,31
+ cmpwi 31,0
+ .long 0x10E7C508
+ .long 0x118CC508
+ .long 0x11ADC508
+ .long 0x11CEC508
+ .long 0x11EFC508
+
+ subi 10,10,16
+ .long 0x10E7CD08
+ .long 0x118CCD08
+ .long 0x11ADCD08
+ .long 0x11CECD08
+ .long 0x11EFCD08
+ vxor 17,17,31
+
+ .long 0x10E7D508
+ lvsr 5,0,31
+ .long 0x118CD508
+ .long 0x11ADD508
+ .long 0x11CED508
+ .long 0x11EFD508
+ vxor 1,18,31
+
+ .long 0x10E7DD08
+ .long 0x7C005699
+ .long 0x118CDD08
+ .long 0x11ADDD08
+ .long 0x11CEDD08
+ .long 0x11EFDD08
+ vxor 2,19,31
+
+ addi 7,1,64+15
+ .long 0x10E7E508
+ .long 0x118CE508
+ .long 0x11ADE508
+ .long 0x11CEE508
+ .long 0x11EFE508
+ lvx 24,0,7
+ vxor 3,20,31
+
+ .long 0x10E7ED08
+ vperm 0,0,0,6
+ .long 0x118CED08
+ .long 0x11ADED08
+ .long 0x11CEED08
+ .long 0x11EFED08
+ lvx 25,3,7
+ vxor 4,21,31
+
+ .long 0x10E7F508
+ vperm 0,0,0,5
+ .long 0x118CF508
+ .long 0x11ADF508
+ .long 0x11CEF508
+ .long 0x11EFF508
+
+ .long 0x10E78D09
+ .long 0x118C0D09
+ .long 0x11AD1509
+ .long 0x11CE1D09
+ .long 0x11EF2509
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+
+.align 5
+_aesp8_xts_decrypt6x:
+ stdu 1,-448(1)
+ mflr 11
+ li 7,207
+ li 3,223
+ std 11,464(1)
+ stvx 20,7,1
+ addi 7,7,32
+ stvx 21,3,1
+ addi 3,3,32
+ stvx 22,7,1
+ addi 7,7,32
+ stvx 23,3,1
+ addi 3,3,32
+ stvx 24,7,1
+ addi 7,7,32
+ stvx 25,3,1
+ addi 3,3,32
+ stvx 26,7,1
+ addi 7,7,32
+ stvx 27,3,1
+ addi 3,3,32
+ stvx 28,7,1
+ addi 7,7,32
+ stvx 29,3,1
+ addi 3,3,32
+ stvx 30,7,1
+ stvx 31,3,1
+ li 0,-1
+ stw 12,396(1)
+ li 3,0x10
+ std 26,400(1)
+ li 26,0x20
+ std 27,408(1)
+ li 27,0x30
+ std 28,416(1)
+ li 28,0x40
+ std 29,424(1)
+ li 29,0x50
+ std 30,432(1)
+ li 30,0x60
+ std 31,440(1)
+ li 31,0x70
+ mtspr 256,0
+
+ subi 9,9,3
+
+ lvx 23,0,6
+ lvx 30,3,6
+ addi 6,6,0x20
+ lvx 31,0,6
+ vperm 23,30,23,7
+ addi 7,1,64+15
+ mtctr 9
+
+Load_xts_dec_key:
+ vperm 24,31,30,7
+ lvx 30,3,6
+ addi 6,6,0x20
+ stvx 24,0,7
+ vperm 25,30,31,7
+ lvx 31,0,6
+ stvx 25,3,7
+ addi 7,7,0x20
+ bc 16,0,Load_xts_dec_key
+
+ lvx 26,3,6
+ vperm 24,31,30,7
+ lvx 27,26,6
+ stvx 24,0,7
+ vperm 25,26,31,7
+ lvx 28,27,6
+ stvx 25,3,7
+ addi 7,1,64+15
+ vperm 26,27,26,7
+ lvx 29,28,6
+ vperm 27,28,27,7
+ lvx 30,29,6
+ vperm 28,29,28,7
+ lvx 31,30,6
+ vperm 29,30,29,7
+ lvx 22,31,6
+ vperm 30,31,30,7
+ lvx 24,0,7
+ vperm 31,22,31,7
+ lvx 25,3,7
+
+ vperm 0,2,4,5
+ subi 10,10,31
+ vxor 17,8,23
+ vsrab 11,8,9
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ vand 11,11,10
+ vxor 7,0,17
+ vxor 8,8,11
+
+ .long 0x7C235699
+ vxor 18,8,23
+ vsrab 11,8,9
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ vperm 1,1,1,6
+ vand 11,11,10
+ vxor 12,1,18
+ vxor 8,8,11
+
+ .long 0x7C5A5699
+ andi. 31,5,15
+ vxor 19,8,23
+ vsrab 11,8,9
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ vperm 2,2,2,6
+ vand 11,11,10
+ vxor 13,2,19
+ vxor 8,8,11
+
+ .long 0x7C7B5699
+ sub 5,5,31
+ vxor 20,8,23
+ vsrab 11,8,9
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ vperm 3,3,3,6
+ vand 11,11,10
+ vxor 14,3,20
+ vxor 8,8,11
+
+ .long 0x7C9C5699
+ subi 5,5,0x60
+ vxor 21,8,23
+ vsrab 11,8,9
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ vperm 4,4,4,6
+ vand 11,11,10
+ vxor 15,4,21
+ vxor 8,8,11
+
+ .long 0x7CBD5699
+ addi 10,10,0x60
+ vxor 22,8,23
+ vsrab 11,8,9
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ vperm 5,5,5,6
+ vand 11,11,10
+ vxor 16,5,22
+ vxor 8,8,11
+
+ vxor 31,31,23
+ mtctr 9
+ b Loop_xts_dec6x
+
+.align 5
+Loop_xts_dec6x:
+ .long 0x10E7C548
+ .long 0x118CC548
+ .long 0x11ADC548
+ .long 0x11CEC548
+ .long 0x11EFC548
+ .long 0x1210C548
+ lvx 24,26,7
+ addi 7,7,0x20
+
+ .long 0x10E7CD48
+ .long 0x118CCD48
+ .long 0x11ADCD48
+ .long 0x11CECD48
+ .long 0x11EFCD48
+ .long 0x1210CD48
+ lvx 25,3,7
+ bc 16,0,Loop_xts_dec6x
+
+ subic 5,5,96
+ vxor 0,17,31
+ .long 0x10E7C548
+ .long 0x118CC548
+ vsrab 11,8,9
+ vxor 17,8,23
+ vaddubm 8,8,8
+ .long 0x11ADC548
+ .long 0x11CEC548
+ vsldoi 11,11,11,15
+ .long 0x11EFC548
+ .long 0x1210C548
+
+ subfe. 0,0,0
+ vand 11,11,10
+ .long 0x10E7CD48
+ .long 0x118CCD48
+ vxor 8,8,11
+ .long 0x11ADCD48
+ .long 0x11CECD48
+ vxor 1,18,31
+ vsrab 11,8,9
+ vxor 18,8,23
+ .long 0x11EFCD48
+ .long 0x1210CD48
+
+ and 0,0,5
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ .long 0x10E7D548
+ .long 0x118CD548
+ vand 11,11,10
+ .long 0x11ADD548
+ .long 0x11CED548
+ vxor 8,8,11
+ .long 0x11EFD548
+ .long 0x1210D548
+
+ add 10,10,0
+
+
+
+ vxor 2,19,31
+ vsrab 11,8,9
+ vxor 19,8,23
+ vaddubm 8,8,8
+ .long 0x10E7DD48
+ .long 0x118CDD48
+ vsldoi 11,11,11,15
+ .long 0x11ADDD48
+ .long 0x11CEDD48
+ vand 11,11,10
+ .long 0x11EFDD48
+ .long 0x1210DD48
+
+ addi 7,1,64+15
+ vxor 8,8,11
+ .long 0x10E7E548
+ .long 0x118CE548
+ vxor 3,20,31
+ vsrab 11,8,9
+ vxor 20,8,23
+ .long 0x11ADE548
+ .long 0x11CEE548
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ .long 0x11EFE548
+ .long 0x1210E548
+ lvx 24,0,7
+ vand 11,11,10
+
+ .long 0x10E7ED48
+ .long 0x118CED48
+ vxor 8,8,11
+ .long 0x11ADED48
+ .long 0x11CEED48
+ vxor 4,21,31
+ vsrab 11,8,9
+ vxor 21,8,23
+ .long 0x11EFED48
+ .long 0x1210ED48
+ lvx 25,3,7
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+
+ .long 0x10E7F548
+ .long 0x118CF548
+ vand 11,11,10
+ .long 0x11ADF548
+ .long 0x11CEF548
+ vxor 8,8,11
+ .long 0x11EFF548
+ .long 0x1210F548
+ vxor 5,22,31
+ vsrab 11,8,9
+ vxor 22,8,23
+
+ .long 0x10E70549
+ .long 0x7C005699
+ vaddubm 8,8,8
+ vsldoi 11,11,11,15
+ .long 0x118C0D49
+ .long 0x7C235699
+ .long 0x11AD1549
+ vperm 0,0,0,6
+ .long 0x7C5A5699
+ vand 11,11,10
+ .long 0x11CE1D49
+ vperm 1,1,1,6
+ .long 0x7C7B5699
+ .long 0x11EF2549
+ vperm 2,2,2,6
+ .long 0x7C9C5699
+ vxor 8,8,11
+ .long 0x12102D49
+ vperm 3,3,3,6
+ .long 0x7CBD5699
+ addi 10,10,0x60
+ vperm 4,4,4,6
+ vperm 5,5,5,6
+
+ vperm 7,7,7,6
+ vperm 12,12,12,6
+ .long 0x7CE02799
+ vxor 7,0,17
+ vperm 13,13,13,6
+ .long 0x7D832799
+ vxor 12,1,18
+ vperm 14,14,14,6
+ .long 0x7DBA2799
+ vxor 13,2,19
+ vperm 15,15,15,6
+ .long 0x7DDB2799
+ vxor 14,3,20
+ vperm 16,16,16,6
+ .long 0x7DFC2799
+ vxor 15,4,21
+ .long 0x7E1D2799
+ vxor 16,5,22
+ addi 4,4,0x60
+
+ mtctr 9
+ beq Loop_xts_dec6x
+
+ addic. 5,5,0x60
+ beq Lxts_dec6x_zero
+ cmpwi 5,0x20
+ blt Lxts_dec6x_one
+ nop
+ beq Lxts_dec6x_two
+ cmpwi 5,0x40
+ blt Lxts_dec6x_three
+ nop
+ beq Lxts_dec6x_four
+
+Lxts_dec6x_five:
+ vxor 7,1,17
+ vxor 12,2,18
+ vxor 13,3,19
+ vxor 14,4,20
+ vxor 15,5,21
+
+ bl _aesp8_xts_dec5x
+
+ vperm 7,7,7,6
+ vor 17,22,22
+ vxor 18,8,23
+ vperm 12,12,12,6
+ .long 0x7CE02799
+ vxor 7,0,18
+ vperm 13,13,13,6
+ .long 0x7D832799
+ vperm 14,14,14,6
+ .long 0x7DBA2799
+ vperm 15,15,15,6
+ .long 0x7DDB2799
+ .long 0x7DFC2799
+ addi 4,4,0x50
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_four:
+ vxor 7,2,17
+ vxor 12,3,18
+ vxor 13,4,19
+ vxor 14,5,20
+ vxor 15,15,15
+
+ bl _aesp8_xts_dec5x
+
+ vperm 7,7,7,6
+ vor 17,21,21
+ vor 18,22,22
+ vperm 12,12,12,6
+ .long 0x7CE02799
+ vxor 7,0,22
+ vperm 13,13,13,6
+ .long 0x7D832799
+ vperm 14,14,14,6
+ .long 0x7DBA2799
+ .long 0x7DDB2799
+ addi 4,4,0x40
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_three:
+ vxor 7,3,17
+ vxor 12,4,18
+ vxor 13,5,19
+ vxor 14,14,14
+ vxor 15,15,15
+
+ bl _aesp8_xts_dec5x
+
+ vperm 7,7,7,6
+ vor 17,20,20
+ vor 18,21,21
+ vperm 12,12,12,6
+ .long 0x7CE02799
+ vxor 7,0,21
+ vperm 13,13,13,6
+ .long 0x7D832799
+ .long 0x7DBA2799
+ addi 4,4,0x30
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_two:
+ vxor 7,4,17
+ vxor 12,5,18
+ vxor 13,13,13
+ vxor 14,14,14
+ vxor 15,15,15
+
+ bl _aesp8_xts_dec5x
+
+ vperm 7,7,7,6
+ vor 17,19,19
+ vor 18,20,20
+ vperm 12,12,12,6
+ .long 0x7CE02799
+ vxor 7,0,20
+ .long 0x7D832799
+ addi 4,4,0x20
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_one:
+ vxor 7,5,17
+ nop
+Loop_xts_dec1x:
+ .long 0x10E7C548
+ lvx 24,26,7
+ addi 7,7,0x20
+
+ .long 0x10E7CD48
+ lvx 25,3,7
+ bc 16,0,Loop_xts_dec1x
+
+ subi 0,31,1
+ .long 0x10E7C548
+
+ andi. 0,0,16
+ cmpwi 31,0
+ .long 0x10E7CD48
+
+ sub 10,10,0
+ .long 0x10E7D548
+
+ .long 0x7C005699
+ .long 0x10E7DD48
+
+ addi 7,1,64+15
+ .long 0x10E7E548
+ lvx 24,0,7
+
+ .long 0x10E7ED48
+ lvx 25,3,7
+ vxor 17,17,31
+
+ vperm 0,0,0,6
+ .long 0x10E7F548
+
+ mtctr 9
+ .long 0x10E78D49
+
+ vor 17,18,18
+ vor 18,19,19
+ vperm 7,7,7,6
+ .long 0x7CE02799
+ addi 4,4,0x10
+ vxor 7,0,19
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_zero:
+ cmpwi 31,0
+ beq Lxts_dec6x_done
+
+ .long 0x7C005699
+ vperm 0,0,0,6
+ vxor 7,0,18
+Lxts_dec6x_steal:
+ .long 0x10E7C548
+ lvx 24,26,7
+ addi 7,7,0x20
+
+ .long 0x10E7CD48
+ lvx 25,3,7
+ bc 16,0,Lxts_dec6x_steal
+
+ add 10,10,31
+ .long 0x10E7C548
+
+ cmpwi 31,0
+ .long 0x10E7CD48
+
+ .long 0x7C005699
+ .long 0x10E7D548
+
+ lvsr 5,0,31
+ .long 0x10E7DD48
+
+ addi 7,1,64+15
+ .long 0x10E7E548
+ lvx 24,0,7
+
+ .long 0x10E7ED48
+ lvx 25,3,7
+ vxor 18,18,31
+
+ vperm 0,0,0,6
+ .long 0x10E7F548
+
+ vperm 0,0,0,5
+ .long 0x11679549
+
+ vperm 7,11,11,6
+ .long 0x7CE02799
+
+
+ vxor 7,7,7
+ vspltisb 12,-1
+ vperm 7,7,12,5
+ vsel 7,0,11,7
+ vxor 7,7,17
+
+ subi 30,4,1
+ mtctr 31
+Loop_xts_dec6x_steal:
+ lbzu 0,1(30)
+ stb 0,16(30)
+ bc 16,0,Loop_xts_dec6x_steal
+
+ li 31,0
+ mtctr 9
+ b Loop_xts_dec1x
+
+.align 4
+Lxts_dec6x_done:
+ cmpldi 8,0
+ beq Lxts_dec6x_ret
+
+ vxor 8,17,23
+ vperm 8,8,8,6
+ .long 0x7D004799
+
+Lxts_dec6x_ret:
+ mtlr 11
+ li 10,79
+ li 11,95
+ stvx 9,10,1
+ addi 10,10,32
+ stvx 9,11,1
+ addi 11,11,32
+ stvx 9,10,1
+ addi 10,10,32
+ stvx 9,11,1
+ addi 11,11,32
+ stvx 9,10,1
+ addi 10,10,32
+ stvx 9,11,1
+ addi 11,11,32
+ stvx 9,10,1
+ addi 10,10,32
+ stvx 9,11,1
+ addi 11,11,32
+
+ mtspr 256,12
+ lvx 20,10,1
+ addi 10,10,32
+ lvx 21,11,1
+ addi 11,11,32
+ lvx 22,10,1
+ addi 10,10,32
+ lvx 23,11,1
+ addi 11,11,32
+ lvx 24,10,1
+ addi 10,10,32
+ lvx 25,11,1
+ addi 11,11,32
+ lvx 26,10,1
+ addi 10,10,32
+ lvx 27,11,1
+ addi 11,11,32
+ lvx 28,10,1
+ addi 10,10,32
+ lvx 29,11,1
+ addi 11,11,32
+ lvx 30,10,1
+ lvx 31,11,1
+ ld 26,400(1)
+ ld 27,408(1)
+ ld 28,416(1)
+ ld 29,424(1)
+ ld 30,432(1)
+ ld 31,440(1)
+ addi 1,1,448
+ blr
+.long 0
+.byte 0,12,0x04,1,0x80,6,6,0
+.long 0
+
+.align 5
+_aesp8_xts_dec5x:
+ .long 0x10E7C548
+ .long 0x118CC548
+ .long 0x11ADC548
+ .long 0x11CEC548
+ .long 0x11EFC548
+ lvx 24,26,7
+ addi 7,7,0x20
+
+ .long 0x10E7CD48
+ .long 0x118CCD48
+ .long 0x11ADCD48
+ .long 0x11CECD48
+ .long 0x11EFCD48
+ lvx 25,3,7
+ bc 16,0,_aesp8_xts_dec5x
+
+ subi 0,31,1
+ .long 0x10E7C548
+ .long 0x118CC548
+ .long 0x11ADC548
+ .long 0x11CEC548
+ .long 0x11EFC548
+
+ andi. 0,0,16
+ cmpwi 31,0
+ .long 0x10E7CD48
+ .long 0x118CCD48
+ .long 0x11ADCD48
+ .long 0x11CECD48
+ .long 0x11EFCD48
+ vxor 17,17,31
+
+ sub 10,10,0
+ .long 0x10E7D548
+ .long 0x118CD548
+ .long 0x11ADD548
+ .long 0x11CED548
+ .long 0x11EFD548
+ vxor 1,18,31
+
+ .long 0x10E7DD48
+ .long 0x7C005699
+ .long 0x118CDD48
+ .long 0x11ADDD48
+ .long 0x11CEDD48
+ .long 0x11EFDD48
+ vxor 2,19,31
+
+ addi 7,1,64+15
+ .long 0x10E7E548
+ .long 0x118CE548
+ .long 0x11ADE548
+ .long 0x11CEE548
+ .long 0x11EFE548
+ lvx 24,0,7
+ vxor 3,20,31
+
+ .long 0x10E7ED48
+ vperm 0,0,0,6
+ .long 0x118CED48
+ .long 0x11ADED48
+ .long 0x11CEED48
+ .long 0x11EFED48
+ lvx 25,3,7
+ vxor 4,21,31
+
+ .long 0x10E7F548
+ .long 0x118CF548
+ .long 0x11ADF548
+ .long 0x11CEF548
+ .long 0x11EFF548
+
+ .long 0x10E78D49
+ .long 0x118C0D49
+ .long 0x11AD1549
+ .long 0x11CE1D49
+ .long 0x11EF2549
+ mtctr 9
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
diff --git a/linux-ppc64le/crypto/modes/ghashp8-ppc.S b/linux-ppc64le/crypto/modes/ghashp8-ppc.S
new file mode 100644
index 0000000..69ae1a5
--- /dev/null
+++ b/linux-ppc64le/crypto/modes/ghashp8-ppc.S
@@ -0,0 +1,565 @@
+.machine "any"
+
+.text
+
+.globl gcm_init_p8
+.align 5
+gcm_init_p8:
+ li 0,-4096
+ li 8,0x10
+ mfspr 12,256
+ li 9,0x20
+ mtspr 256,0
+ li 10,0x30
+ .long 0x7D202699
+
+ vspltisb 8,-16
+ vspltisb 5,1
+ vaddubm 8,8,8
+ vxor 4,4,4
+ vor 8,8,5
+ vsldoi 8,8,4,15
+ vsldoi 6,4,5,1
+ vaddubm 8,8,8
+ vspltisb 7,7
+ vor 8,8,6
+ vspltb 6,9,0
+ vsl 9,9,5
+ vsrab 6,6,7
+ vand 6,6,8
+ vxor 3,9,6
+
+ vsldoi 9,3,3,8
+ vsldoi 8,4,8,8
+ vsldoi 11,4,9,8
+ vsldoi 10,9,4,8
+
+ .long 0x7D001F99
+ .long 0x7D681F99
+ li 8,0x40
+ .long 0x7D291F99
+ li 9,0x50
+ .long 0x7D4A1F99
+ li 10,0x60
+
+ .long 0x10035CC8
+ .long 0x10234CC8
+ .long 0x104354C8
+
+ .long 0x10E044C8
+
+ vsldoi 5,1,4,8
+ vsldoi 6,4,1,8
+ vxor 0,0,5
+ vxor 2,2,6
+
+ vsldoi 0,0,0,8
+ vxor 0,0,7
+
+ vsldoi 6,0,0,8
+ .long 0x100044C8
+ vxor 6,6,2
+ vxor 16,0,6
+
+ vsldoi 17,16,16,8
+ vsldoi 19,4,17,8
+ vsldoi 18,17,4,8
+
+ .long 0x7E681F99
+ li 8,0x70
+ .long 0x7E291F99
+ li 9,0x80
+ .long 0x7E4A1F99
+ li 10,0x90
+ .long 0x10039CC8
+ .long 0x11B09CC8
+ .long 0x10238CC8
+ .long 0x11D08CC8
+ .long 0x104394C8
+ .long 0x11F094C8
+
+ .long 0x10E044C8
+ .long 0x114D44C8
+
+ vsldoi 5,1,4,8
+ vsldoi 6,4,1,8
+ vsldoi 11,14,4,8
+ vsldoi 9,4,14,8
+ vxor 0,0,5
+ vxor 2,2,6
+ vxor 13,13,11
+ vxor 15,15,9
+
+ vsldoi 0,0,0,8
+ vsldoi 13,13,13,8
+ vxor 0,0,7
+ vxor 13,13,10
+
+ vsldoi 6,0,0,8
+ vsldoi 9,13,13,8
+ .long 0x100044C8
+ .long 0x11AD44C8
+ vxor 6,6,2
+ vxor 9,9,15
+ vxor 0,0,6
+ vxor 13,13,9
+
+ vsldoi 9,0,0,8
+ vsldoi 17,13,13,8
+ vsldoi 11,4,9,8
+ vsldoi 10,9,4,8
+ vsldoi 19,4,17,8
+ vsldoi 18,17,4,8
+
+ .long 0x7D681F99
+ li 8,0xa0
+ .long 0x7D291F99
+ li 9,0xb0
+ .long 0x7D4A1F99
+ li 10,0xc0
+ .long 0x7E681F99
+ .long 0x7E291F99
+ .long 0x7E4A1F99
+
+ mtspr 256,12
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,2,0
+.long 0
+
+.globl gcm_gmult_p8
+.align 5
+gcm_gmult_p8:
+ lis 0,0xfff8
+ li 8,0x10
+ mfspr 12,256
+ li 9,0x20
+ mtspr 256,0
+ li 10,0x30
+ .long 0x7C601E99
+
+ .long 0x7D682699
+ lvsl 12,0,0
+ .long 0x7D292699
+ vspltisb 5,0x07
+ .long 0x7D4A2699
+ vxor 12,12,5
+ .long 0x7D002699
+ vperm 3,3,3,12
+ vxor 4,4,4
+
+ .long 0x10035CC8
+ .long 0x10234CC8
+ .long 0x104354C8
+
+ .long 0x10E044C8
+
+ vsldoi 5,1,4,8
+ vsldoi 6,4,1,8
+ vxor 0,0,5
+ vxor 2,2,6
+
+ vsldoi 0,0,0,8
+ vxor 0,0,7
+
+ vsldoi 6,0,0,8
+ .long 0x100044C8
+ vxor 6,6,2
+ vxor 0,0,6
+
+ vperm 0,0,0,12
+ .long 0x7C001F99
+
+ mtspr 256,12
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,2,0
+.long 0
+
+
+.globl gcm_ghash_p8
+.align 5
+gcm_ghash_p8:
+ li 0,-4096
+ li 8,0x10
+ mfspr 12,256
+ li 9,0x20
+ mtspr 256,0
+ li 10,0x30
+ .long 0x7C001E99
+
+ .long 0x7D682699
+ li 8,0x40
+ lvsl 12,0,0
+ .long 0x7D292699
+ li 9,0x50
+ vspltisb 5,0x07
+ .long 0x7D4A2699
+ li 10,0x60
+ vxor 12,12,5
+ .long 0x7D002699
+ vperm 0,0,0,12
+ vxor 4,4,4
+
+ cmpldi 6,64
+ bge Lgcm_ghash_p8_4x
+
+ .long 0x7C602E99
+ addi 5,5,16
+ subic. 6,6,16
+ vperm 3,3,3,12
+ vxor 3,3,0
+ beq Lshort
+
+ .long 0x7E682699
+ li 8,16
+ .long 0x7E292699
+ add 9,5,6
+ .long 0x7E4A2699
+
+
+.align 5
+Loop_2x:
+ .long 0x7E002E99
+ vperm 16,16,16,12
+
+ subic 6,6,32
+ .long 0x10039CC8
+ .long 0x11B05CC8
+ subfe 0,0,0
+ .long 0x10238CC8
+ .long 0x11D04CC8
+ and 0,0,6
+ .long 0x104394C8
+ .long 0x11F054C8
+ add 5,5,0
+
+ vxor 0,0,13
+ vxor 1,1,14
+
+ .long 0x10E044C8
+
+ vsldoi 5,1,4,8
+ vsldoi 6,4,1,8
+ vxor 2,2,15
+ vxor 0,0,5
+ vxor 2,2,6
+
+ vsldoi 0,0,0,8
+ vxor 0,0,7
+ .long 0x7C682E99
+ addi 5,5,32
+
+ vsldoi 6,0,0,8
+ .long 0x100044C8
+ vperm 3,3,3,12
+ vxor 6,6,2
+ vxor 3,3,6
+ vxor 3,3,0
+ cmpld 9,5
+ bgt Loop_2x
+
+ cmplwi 6,0
+ bne Leven
+
+Lshort:
+ .long 0x10035CC8
+ .long 0x10234CC8
+ .long 0x104354C8
+
+ .long 0x10E044C8
+
+ vsldoi 5,1,4,8
+ vsldoi 6,4,1,8
+ vxor 0,0,5
+ vxor 2,2,6
+
+ vsldoi 0,0,0,8
+ vxor 0,0,7
+
+ vsldoi 6,0,0,8
+ .long 0x100044C8
+ vxor 6,6,2
+
+Leven:
+ vxor 0,0,6
+ vperm 0,0,0,12
+ .long 0x7C001F99
+
+ mtspr 256,12
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,4,0
+.long 0
+.align 5
+.gcm_ghash_p8_4x:
+Lgcm_ghash_p8_4x:
+ stdu 1,-256(1)
+ li 10,63
+ li 11,79
+ stvx 20,10,1
+ addi 10,10,32
+ stvx 21,11,1
+ addi 11,11,32
+ stvx 22,10,1
+ addi 10,10,32
+ stvx 23,11,1
+ addi 11,11,32
+ stvx 24,10,1
+ addi 10,10,32
+ stvx 25,11,1
+ addi 11,11,32
+ stvx 26,10,1
+ addi 10,10,32
+ stvx 27,11,1
+ addi 11,11,32
+ stvx 28,10,1
+ addi 10,10,32
+ stvx 29,11,1
+ addi 11,11,32
+ stvx 30,10,1
+ li 10,0x60
+ stvx 31,11,1
+ li 0,-1
+ stw 12,252(1)
+ mtspr 256,0
+
+ lvsl 5,0,8
+
+ li 8,0x70
+ .long 0x7E292699
+ li 9,0x80
+ vspltisb 6,8
+
+ li 10,0x90
+ .long 0x7EE82699
+ li 8,0xa0
+ .long 0x7F092699
+ li 9,0xb0
+ .long 0x7F2A2699
+ li 10,0xc0
+ .long 0x7FA82699
+ li 8,0x10
+ .long 0x7FC92699
+ li 9,0x20
+ .long 0x7FEA2699
+ li 10,0x30
+
+ vsldoi 7,4,6,8
+ vaddubm 18,5,7
+ vaddubm 19,6,18
+
+ srdi 6,6,4
+
+ .long 0x7C602E99
+ .long 0x7E082E99
+ subic. 6,6,8
+ .long 0x7EC92E99
+ .long 0x7F8A2E99
+ addi 5,5,0x40
+ vperm 3,3,3,12
+ vperm 16,16,16,12
+ vperm 22,22,22,12
+ vperm 28,28,28,12
+
+ vxor 2,3,0
+
+ .long 0x11B0BCC8
+ .long 0x11D0C4C8
+ .long 0x11F0CCC8
+
+ vperm 11,17,9,18
+ vperm 5,22,28,19
+ vperm 10,17,9,19
+ vperm 6,22,28,18
+ .long 0x12B68CC8
+ .long 0x12855CC8
+ .long 0x137C4CC8
+ .long 0x134654C8
+
+ vxor 21,21,14
+ vxor 20,20,13
+ vxor 27,27,21
+ vxor 26,26,15
+
+ blt Ltail_4x
+
+Loop_4x:
+ .long 0x7C602E99
+ .long 0x7E082E99
+ subic. 6,6,4
+ .long 0x7EC92E99
+ .long 0x7F8A2E99
+ addi 5,5,0x40
+ vperm 16,16,16,12
+ vperm 22,22,22,12
+ vperm 28,28,28,12
+ vperm 3,3,3,12
+
+ .long 0x1002ECC8
+ .long 0x1022F4C8
+ .long 0x1042FCC8
+ .long 0x11B0BCC8
+ .long 0x11D0C4C8
+ .long 0x11F0CCC8
+
+ vxor 0,0,20
+ vxor 1,1,27
+ vxor 2,2,26
+ vperm 5,22,28,19
+ vperm 6,22,28,18
+
+ .long 0x10E044C8
+ .long 0x12855CC8
+ .long 0x134654C8
+
+ vsldoi 5,1,4,8
+ vsldoi 6,4,1,8
+ vxor 0,0,5
+ vxor 2,2,6
+
+ vsldoi 0,0,0,8
+ vxor 0,0,7
+
+ vsldoi 6,0,0,8
+ .long 0x12B68CC8
+ .long 0x137C4CC8
+ .long 0x100044C8
+
+ vxor 20,20,13
+ vxor 26,26,15
+ vxor 2,2,3
+ vxor 21,21,14
+ vxor 2,2,6
+ vxor 27,27,21
+ vxor 2,2,0
+ bge Loop_4x
+
+Ltail_4x:
+ .long 0x1002ECC8
+ .long 0x1022F4C8
+ .long 0x1042FCC8
+
+ vxor 0,0,20
+ vxor 1,1,27
+
+ .long 0x10E044C8
+
+ vsldoi 5,1,4,8
+ vsldoi 6,4,1,8
+ vxor 2,2,26
+ vxor 0,0,5
+ vxor 2,2,6
+
+ vsldoi 0,0,0,8
+ vxor 0,0,7
+
+ vsldoi 6,0,0,8
+ .long 0x100044C8
+ vxor 6,6,2
+ vxor 0,0,6
+
+ addic. 6,6,4
+ beq Ldone_4x
+
+ .long 0x7C602E99
+ cmpldi 6,2
+ li 6,-4
+ blt Lone
+ .long 0x7E082E99
+ beq Ltwo
+
+Lthree:
+ .long 0x7EC92E99
+ vperm 3,3,3,12
+ vperm 16,16,16,12
+ vperm 22,22,22,12
+
+ vxor 2,3,0
+ vor 29,23,23
+ vor 30, 24, 24
+ vor 31,25,25
+
+ vperm 5,16,22,19
+ vperm 6,16,22,18
+ .long 0x12B08CC8
+ .long 0x13764CC8
+ .long 0x12855CC8
+ .long 0x134654C8
+
+ vxor 27,27,21
+ b Ltail_4x
+
+.align 4
+Ltwo:
+ vperm 3,3,3,12
+ vperm 16,16,16,12
+
+ vxor 2,3,0
+ vperm 5,4,16,19
+ vperm 6,4,16,18
+
+ vsldoi 29,4,17,8
+ vor 30, 17, 17
+ vsldoi 31,17,4,8
+
+ .long 0x12855CC8
+ .long 0x13704CC8
+ .long 0x134654C8
+
+ b Ltail_4x
+
+.align 4
+Lone:
+ vperm 3,3,3,12
+
+ vsldoi 29,4,9,8
+ vor 30, 9, 9
+ vsldoi 31,9,4,8
+
+ vxor 2,3,0
+ vxor 20,20,20
+ vxor 27,27,27
+ vxor 26,26,26
+
+ b Ltail_4x
+
+Ldone_4x:
+ vperm 0,0,0,12
+ .long 0x7C001F99
+
+ li 10,63
+ li 11,79
+ mtspr 256,12
+ lvx 20,10,1
+ addi 10,10,32
+ lvx 21,11,1
+ addi 11,11,32
+ lvx 22,10,1
+ addi 10,10,32
+ lvx 23,11,1
+ addi 11,11,32
+ lvx 24,10,1
+ addi 10,10,32
+ lvx 25,11,1
+ addi 11,11,32
+ lvx 26,10,1
+ addi 10,10,32
+ lvx 27,11,1
+ addi 11,11,32
+ lvx 28,10,1
+ addi 10,10,32
+ lvx 29,11,1
+ addi 11,11,32
+ lvx 30,10,1
+ lvx 31,11,1
+ addi 1,1,256
+ blr
+.long 0
+.byte 0,12,0x04,0,0x80,0,4,0
+.long 0
+
+
+.byte 71,72,65,83,72,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 2
+.align 2
diff --git a/linux-x86/crypto/rc4/rc4-586.S b/linux-x86/crypto/rc4/rc4-586.S
deleted file mode 100644
index d245589..0000000
--- a/linux-x86/crypto/rc4/rc4-586.S
+++ /dev/null
@@ -1,350 +0,0 @@
-#if defined(__i386__)
-.file "rc4-586.S"
-.text
-.globl asm_RC4
-.hidden asm_RC4
-.type asm_RC4,@function
-.align 16
-asm_RC4:
-.L_asm_RC4_begin:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
- movl 20(%esp),%edi
- movl 24(%esp),%edx
- movl 28(%esp),%esi
- movl 32(%esp),%ebp
- xorl %eax,%eax
- xorl %ebx,%ebx
- cmpl $0,%edx
- je .L000abort
- movb (%edi),%al
- movb 4(%edi),%bl
- addl $8,%edi
- leal (%esi,%edx,1),%ecx
- subl %esi,%ebp
- movl %ecx,24(%esp)
- incb %al
- cmpl $-1,256(%edi)
- je .L001RC4_CHAR
- movl (%edi,%eax,4),%ecx
- andl $-4,%edx
- jz .L002loop1
- movl %ebp,32(%esp)
- testl $-8,%edx
- jz .L003go4loop4
- call .L004PIC_me_up
-.L004PIC_me_up:
- popl %ebp
- leal OPENSSL_ia32cap_P-.L004PIC_me_up(%ebp),%ebp
- btl $26,(%ebp)
- jnc .L003go4loop4
- movl 32(%esp),%ebp
- andl $-8,%edx
- leal -8(%esi,%edx,1),%edx
- movl %edx,-4(%edi)
- addb %cl,%bl
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- movq (%esi),%mm0
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm2
- jmp .L005loop_mmx_enter
-.align 16
-.L006loop_mmx:
- addb %cl,%bl
- psllq $56,%mm1
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- pxor %mm1,%mm2
- movq (%esi),%mm0
- movq %mm2,-8(%ebp,%esi,1)
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm2
-.L005loop_mmx_enter:
- addb %cl,%bl
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- pxor %mm0,%mm2
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm1
- addb %cl,%bl
- psllq $8,%mm1
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- pxor %mm1,%mm2
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm1
- addb %cl,%bl
- psllq $16,%mm1
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- pxor %mm1,%mm2
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm1
- addb %cl,%bl
- psllq $24,%mm1
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- pxor %mm1,%mm2
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm1
- addb %cl,%bl
- psllq $32,%mm1
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- pxor %mm1,%mm2
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm1
- addb %cl,%bl
- psllq $40,%mm1
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- pxor %mm1,%mm2
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm1
- addb %cl,%bl
- psllq $48,%mm1
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- pxor %mm1,%mm2
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm1
- movl %ebx,%edx
- xorl %ebx,%ebx
- movb %dl,%bl
- cmpl -4(%edi),%esi
- leal 8(%esi),%esi
- jb .L006loop_mmx
- psllq $56,%mm1
- pxor %mm1,%mm2
- movq %mm2,-8(%ebp,%esi,1)
- emms
- cmpl 24(%esp),%esi
- je .L007done
- jmp .L002loop1
-.align 16
-.L003go4loop4:
- leal -4(%esi,%edx,1),%edx
- movl %edx,28(%esp)
-.L008loop4:
- addb %cl,%bl
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- addl %ecx,%edx
- incb %al
- andl $255,%edx
- movl (%edi,%eax,4),%ecx
- movl (%edi,%edx,4),%ebp
- addb %cl,%bl
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- addl %ecx,%edx
- incb %al
- andl $255,%edx
- rorl $8,%ebp
- movl (%edi,%eax,4),%ecx
- orl (%edi,%edx,4),%ebp
- addb %cl,%bl
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- addl %ecx,%edx
- incb %al
- andl $255,%edx
- rorl $8,%ebp
- movl (%edi,%eax,4),%ecx
- orl (%edi,%edx,4),%ebp
- addb %cl,%bl
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- addl %ecx,%edx
- incb %al
- andl $255,%edx
- rorl $8,%ebp
- movl 32(%esp),%ecx
- orl (%edi,%edx,4),%ebp
- rorl $8,%ebp
- xorl (%esi),%ebp
- cmpl 28(%esp),%esi
- movl %ebp,(%ecx,%esi,1)
- leal 4(%esi),%esi
- movl (%edi,%eax,4),%ecx
- jb .L008loop4
- cmpl 24(%esp),%esi
- je .L007done
- movl 32(%esp),%ebp
-.align 16
-.L002loop1:
- addb %cl,%bl
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- addl %ecx,%edx
- incb %al
- andl $255,%edx
- movl (%edi,%edx,4),%edx
- xorb (%esi),%dl
- leal 1(%esi),%esi
- movl (%edi,%eax,4),%ecx
- cmpl 24(%esp),%esi
- movb %dl,-1(%ebp,%esi,1)
- jb .L002loop1
- jmp .L007done
-.align 16
-.L001RC4_CHAR:
- movzbl (%edi,%eax,1),%ecx
-.L009cloop1:
- addb %cl,%bl
- movzbl (%edi,%ebx,1),%edx
- movb %cl,(%edi,%ebx,1)
- movb %dl,(%edi,%eax,1)
- addb %cl,%dl
- movzbl (%edi,%edx,1),%edx
- addb $1,%al
- xorb (%esi),%dl
- leal 1(%esi),%esi
- movzbl (%edi,%eax,1),%ecx
- cmpl 24(%esp),%esi
- movb %dl,-1(%ebp,%esi,1)
- jb .L009cloop1
-.L007done:
- decb %al
- movl %ebx,-4(%edi)
- movb %al,-8(%edi)
-.L000abort:
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.size asm_RC4,.-.L_asm_RC4_begin
-.globl asm_RC4_set_key
-.hidden asm_RC4_set_key
-.type asm_RC4_set_key,@function
-.align 16
-asm_RC4_set_key:
-.L_asm_RC4_set_key_begin:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
- movl 20(%esp),%edi
- movl 24(%esp),%ebp
- movl 28(%esp),%esi
- call .L010PIC_me_up
-.L010PIC_me_up:
- popl %edx
- leal OPENSSL_ia32cap_P-.L010PIC_me_up(%edx),%edx
- leal 8(%edi),%edi
- leal (%esi,%ebp,1),%esi
- negl %ebp
- xorl %eax,%eax
- movl %ebp,-4(%edi)
- btl $20,(%edx)
- jc .L011c1stloop
-.align 16
-.L012w1stloop:
- movl %eax,(%edi,%eax,4)
- addb $1,%al
- jnc .L012w1stloop
- xorl %ecx,%ecx
- xorl %edx,%edx
-.align 16
-.L013w2ndloop:
- movl (%edi,%ecx,4),%eax
- addb (%esi,%ebp,1),%dl
- addb %al,%dl
- addl $1,%ebp
- movl (%edi,%edx,4),%ebx
- jnz .L014wnowrap
- movl -4(%edi),%ebp
-.L014wnowrap:
- movl %eax,(%edi,%edx,4)
- movl %ebx,(%edi,%ecx,4)
- addb $1,%cl
- jnc .L013w2ndloop
- jmp .L015exit
-.align 16
-.L011c1stloop:
- movb %al,(%edi,%eax,1)
- addb $1,%al
- jnc .L011c1stloop
- xorl %ecx,%ecx
- xorl %edx,%edx
- xorl %ebx,%ebx
-.align 16
-.L016c2ndloop:
- movb (%edi,%ecx,1),%al
- addb (%esi,%ebp,1),%dl
- addb %al,%dl
- addl $1,%ebp
- movb (%edi,%edx,1),%bl
- jnz .L017cnowrap
- movl -4(%edi),%ebp
-.L017cnowrap:
- movb %al,(%edi,%edx,1)
- movb %bl,(%edi,%ecx,1)
- addb $1,%cl
- jnc .L016c2ndloop
- movl $-1,256(%edi)
-.L015exit:
- xorl %eax,%eax
- movl %eax,-8(%edi)
- movl %eax,-4(%edi)
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.size asm_RC4_set_key,.-.L_asm_RC4_set_key_begin
-#endif
diff --git a/linux-x86_64/crypto/ec/p256-x86_64-asm.S b/linux-x86_64/crypto/ec/p256-x86_64-asm.S
index 4abce6f..e059dd6 100644
--- a/linux-x86_64/crypto/ec/p256-x86_64-asm.S
+++ b/linux-x86_64/crypto/ec/p256-x86_64-asm.S
@@ -24,6 +24,7 @@
pushq %r13
movq 0(%rsi),%r8
+ xorq %r13,%r13
movq 8(%rsi),%r9
addq %r8,%r8
movq 16(%rsi),%r10
@@ -34,7 +35,7 @@
adcq %r10,%r10
adcq %r11,%r11
movq %r9,%rdx
- sbbq %r13,%r13
+ adcq $0,%r13
subq 0(%rsi),%r8
movq %r10,%rcx
@@ -42,14 +43,14 @@
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -625,6 +626,8 @@
movq %r9,%rsi
adcq $0,%rdx
+
+
subq $-1,%r8
movq %r10,%rax
sbbq %r12,%r9
@@ -765,13 +768,14 @@
.type __ecp_nistz256_add_toq,@function
.align 32
__ecp_nistz256_add_toq:
+ xorq %r11,%r11
addq 0(%rbx),%r12
adcq 8(%rbx),%r13
movq %r12,%rax
adcq 16(%rbx),%r8
adcq 24(%rbx),%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -779,14 +783,14 @@
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
- cmovzq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -854,13 +858,14 @@
.type __ecp_nistz256_mul_by_2q,@function
.align 32
__ecp_nistz256_mul_by_2q:
+ xorq %r11,%r11
addq %r12,%r12
adcq %r13,%r13
movq %r12,%rax
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -868,14 +873,14 @@
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
- cmovzq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -1107,16 +1112,14 @@
movq %rdx,%rsi
movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5
@@ -1128,14 +1131,14 @@
movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp)
- por %xmm0,%xmm1
-.byte 102,72,15,110,199
+ movdqu 64(%rsi),%xmm0
+ movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp)
- por %xmm2,%xmm3
por %xmm4,%xmm5
pxor %xmm4,%xmm4
- por %xmm1,%xmm3
+ por %xmm0,%xmm1
+.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp)
@@ -1146,8 +1149,8 @@
call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5
- pshufd $0xb1,%xmm3,%xmm4
- por %xmm3,%xmm4
+ pshufd $0xb1,%xmm1,%xmm4
+ por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4
@@ -1330,6 +1333,7 @@
+ xorq %r11,%r11
addq %r12,%r12
leaq 96(%rsp),%rsi
adcq %r13,%r13
@@ -1337,7 +1341,7 @@
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1345,15 +1349,15 @@
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
@@ -1508,16 +1512,14 @@
movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5
@@ -1635,6 +1637,7 @@
+ xorq %r11,%r11
addq %r12,%r12
leaq 192(%rsp),%rsi
adcq %r13,%r13
@@ -1642,7 +1645,7 @@
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1650,15 +1653,15 @@
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
diff --git a/linux-x86_64/crypto/rc4/rc4-x86_64.S b/linux-x86_64/crypto/rc4/rc4-x86_64.S
deleted file mode 100644
index c4d1002..0000000
--- a/linux-x86_64/crypto/rc4/rc4-x86_64.S
+++ /dev/null
@@ -1,596 +0,0 @@
-#if defined(__x86_64__)
-.text
-.extern OPENSSL_ia32cap_P
-.hidden OPENSSL_ia32cap_P
-
-.globl asm_RC4
-.hidden asm_RC4
-.type asm_RC4,@function
-.align 16
-asm_RC4:
- orq %rsi,%rsi
- jne .Lentry
- .byte 0xf3,0xc3
-.Lentry:
- pushq %rbx
- pushq %r12
- pushq %r13
-.Lprologue:
- movq %rsi,%r11
- movq %rdx,%r12
- movq %rcx,%r13
- xorq %r10,%r10
- xorq %rcx,%rcx
-
- leaq 8(%rdi),%rdi
- movb -8(%rdi),%r10b
- movb -4(%rdi),%cl
- cmpl $-1,256(%rdi)
- je .LRC4_CHAR
- movl OPENSSL_ia32cap_P(%rip),%r8d
- xorq %rbx,%rbx
- incb %r10b
- subq %r10,%rbx
- subq %r12,%r13
- movl (%rdi,%r10,4),%eax
- testq $-16,%r11
- jz .Lloop1
- btl $30,%r8d
- jc .Lintel
- andq $7,%rbx
- leaq 1(%r10),%rsi
- jz .Loop8
- subq %rbx,%r11
-.Loop8_warmup:
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- movl %edx,(%rdi,%r10,4)
- addb %dl,%al
- incb %r10b
- movl (%rdi,%rax,4),%edx
- movl (%rdi,%r10,4),%eax
- xorb (%r12),%dl
- movb %dl,(%r12,%r13,1)
- leaq 1(%r12),%r12
- decq %rbx
- jnz .Loop8_warmup
-
- leaq 1(%r10),%rsi
- jmp .Loop8
-.align 16
-.Loop8:
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- movl 0(%rdi,%rsi,4),%ebx
- rorq $8,%r8
- movl %edx,0(%rdi,%r10,4)
- addb %al,%dl
- movb (%rdi,%rdx,4),%r8b
- addb %bl,%cl
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- movl 4(%rdi,%rsi,4),%eax
- rorq $8,%r8
- movl %edx,4(%rdi,%r10,4)
- addb %bl,%dl
- movb (%rdi,%rdx,4),%r8b
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- movl 8(%rdi,%rsi,4),%ebx
- rorq $8,%r8
- movl %edx,8(%rdi,%r10,4)
- addb %al,%dl
- movb (%rdi,%rdx,4),%r8b
- addb %bl,%cl
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- movl 12(%rdi,%rsi,4),%eax
- rorq $8,%r8
- movl %edx,12(%rdi,%r10,4)
- addb %bl,%dl
- movb (%rdi,%rdx,4),%r8b
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- movl 16(%rdi,%rsi,4),%ebx
- rorq $8,%r8
- movl %edx,16(%rdi,%r10,4)
- addb %al,%dl
- movb (%rdi,%rdx,4),%r8b
- addb %bl,%cl
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- movl 20(%rdi,%rsi,4),%eax
- rorq $8,%r8
- movl %edx,20(%rdi,%r10,4)
- addb %bl,%dl
- movb (%rdi,%rdx,4),%r8b
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- movl 24(%rdi,%rsi,4),%ebx
- rorq $8,%r8
- movl %edx,24(%rdi,%r10,4)
- addb %al,%dl
- movb (%rdi,%rdx,4),%r8b
- addb $8,%sil
- addb %bl,%cl
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- movl -4(%rdi,%rsi,4),%eax
- rorq $8,%r8
- movl %edx,28(%rdi,%r10,4)
- addb %bl,%dl
- movb (%rdi,%rdx,4),%r8b
- addb $8,%r10b
- rorq $8,%r8
- subq $8,%r11
-
- xorq (%r12),%r8
- movq %r8,(%r12,%r13,1)
- leaq 8(%r12),%r12
-
- testq $-8,%r11
- jnz .Loop8
- cmpq $0,%r11
- jne .Lloop1
- jmp .Lexit
-
-.align 16
-.Lintel:
- testq $-32,%r11
- jz .Lloop1
- andq $15,%rbx
- jz .Loop16_is_hot
- subq %rbx,%r11
-.Loop16_warmup:
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- movl %edx,(%rdi,%r10,4)
- addb %dl,%al
- incb %r10b
- movl (%rdi,%rax,4),%edx
- movl (%rdi,%r10,4),%eax
- xorb (%r12),%dl
- movb %dl,(%r12,%r13,1)
- leaq 1(%r12),%r12
- decq %rbx
- jnz .Loop16_warmup
-
- movq %rcx,%rbx
- xorq %rcx,%rcx
- movb %bl,%cl
-
-.Loop16_is_hot:
- leaq (%rdi,%r10,4),%rsi
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- pxor %xmm0,%xmm0
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 4(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,0(%rsi)
- addb %bl,%cl
- pinsrw $0,(%rdi,%rax,4),%xmm0
- jmp .Loop16_enter
-.align 16
-.Loop16:
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- pxor %xmm0,%xmm2
- psllq $8,%xmm1
- pxor %xmm0,%xmm0
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 4(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,0(%rsi)
- pxor %xmm1,%xmm2
- addb %bl,%cl
- pinsrw $0,(%rdi,%rax,4),%xmm0
- movdqu %xmm2,(%r12,%r13,1)
- leaq 16(%r12),%r12
-.Loop16_enter:
- movl (%rdi,%rcx,4),%edx
- pxor %xmm1,%xmm1
- movl %ebx,(%rdi,%rcx,4)
- addb %dl,%bl
- movl 8(%rsi),%eax
- movzbl %bl,%ebx
- movl %edx,4(%rsi)
- addb %al,%cl
- pinsrw $0,(%rdi,%rbx,4),%xmm1
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 12(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,8(%rsi)
- addb %bl,%cl
- pinsrw $1,(%rdi,%rax,4),%xmm0
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- addb %dl,%bl
- movl 16(%rsi),%eax
- movzbl %bl,%ebx
- movl %edx,12(%rsi)
- addb %al,%cl
- pinsrw $1,(%rdi,%rbx,4),%xmm1
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 20(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,16(%rsi)
- addb %bl,%cl
- pinsrw $2,(%rdi,%rax,4),%xmm0
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- addb %dl,%bl
- movl 24(%rsi),%eax
- movzbl %bl,%ebx
- movl %edx,20(%rsi)
- addb %al,%cl
- pinsrw $2,(%rdi,%rbx,4),%xmm1
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 28(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,24(%rsi)
- addb %bl,%cl
- pinsrw $3,(%rdi,%rax,4),%xmm0
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- addb %dl,%bl
- movl 32(%rsi),%eax
- movzbl %bl,%ebx
- movl %edx,28(%rsi)
- addb %al,%cl
- pinsrw $3,(%rdi,%rbx,4),%xmm1
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 36(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,32(%rsi)
- addb %bl,%cl
- pinsrw $4,(%rdi,%rax,4),%xmm0
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- addb %dl,%bl
- movl 40(%rsi),%eax
- movzbl %bl,%ebx
- movl %edx,36(%rsi)
- addb %al,%cl
- pinsrw $4,(%rdi,%rbx,4),%xmm1
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 44(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,40(%rsi)
- addb %bl,%cl
- pinsrw $5,(%rdi,%rax,4),%xmm0
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- addb %dl,%bl
- movl 48(%rsi),%eax
- movzbl %bl,%ebx
- movl %edx,44(%rsi)
- addb %al,%cl
- pinsrw $5,(%rdi,%rbx,4),%xmm1
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 52(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,48(%rsi)
- addb %bl,%cl
- pinsrw $6,(%rdi,%rax,4),%xmm0
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- addb %dl,%bl
- movl 56(%rsi),%eax
- movzbl %bl,%ebx
- movl %edx,52(%rsi)
- addb %al,%cl
- pinsrw $6,(%rdi,%rbx,4),%xmm1
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 60(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,56(%rsi)
- addb %bl,%cl
- pinsrw $7,(%rdi,%rax,4),%xmm0
- addb $16,%r10b
- movdqu (%r12),%xmm2
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- addb %dl,%bl
- movzbl %bl,%ebx
- movl %edx,60(%rsi)
- leaq (%rdi,%r10,4),%rsi
- pinsrw $7,(%rdi,%rbx,4),%xmm1
- movl (%rsi),%eax
- movq %rcx,%rbx
- xorq %rcx,%rcx
- subq $16,%r11
- movb %bl,%cl
- testq $-16,%r11
- jnz .Loop16
-
- psllq $8,%xmm1
- pxor %xmm0,%xmm2
- pxor %xmm1,%xmm2
- movdqu %xmm2,(%r12,%r13,1)
- leaq 16(%r12),%r12
-
- cmpq $0,%r11
- jne .Lloop1
- jmp .Lexit
-
-.align 16
-.Lloop1:
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- movl %edx,(%rdi,%r10,4)
- addb %dl,%al
- incb %r10b
- movl (%rdi,%rax,4),%edx
- movl (%rdi,%r10,4),%eax
- xorb (%r12),%dl
- movb %dl,(%r12,%r13,1)
- leaq 1(%r12),%r12
- decq %r11
- jnz .Lloop1
- jmp .Lexit
-
-.align 16
-.LRC4_CHAR:
- addb $1,%r10b
- movzbl (%rdi,%r10,1),%eax
- testq $-8,%r11
- jz .Lcloop1
- jmp .Lcloop8
-.align 16
-.Lcloop8:
- movl (%r12),%r8d
- movl 4(%r12),%r9d
- addb %al,%cl
- leaq 1(%r10),%rsi
- movzbl (%rdi,%rcx,1),%edx
- movzbl %sil,%esi
- movzbl (%rdi,%rsi,1),%ebx
- movb %al,(%rdi,%rcx,1)
- cmpq %rsi,%rcx
- movb %dl,(%rdi,%r10,1)
- jne .Lcmov0
- movq %rax,%rbx
-.Lcmov0:
- addb %al,%dl
- xorb (%rdi,%rdx,1),%r8b
- rorl $8,%r8d
- addb %bl,%cl
- leaq 1(%rsi),%r10
- movzbl (%rdi,%rcx,1),%edx
- movzbl %r10b,%r10d
- movzbl (%rdi,%r10,1),%eax
- movb %bl,(%rdi,%rcx,1)
- cmpq %r10,%rcx
- movb %dl,(%rdi,%rsi,1)
- jne .Lcmov1
- movq %rbx,%rax
-.Lcmov1:
- addb %bl,%dl
- xorb (%rdi,%rdx,1),%r8b
- rorl $8,%r8d
- addb %al,%cl
- leaq 1(%r10),%rsi
- movzbl (%rdi,%rcx,1),%edx
- movzbl %sil,%esi
- movzbl (%rdi,%rsi,1),%ebx
- movb %al,(%rdi,%rcx,1)
- cmpq %rsi,%rcx
- movb %dl,(%rdi,%r10,1)
- jne .Lcmov2
- movq %rax,%rbx
-.Lcmov2:
- addb %al,%dl
- xorb (%rdi,%rdx,1),%r8b
- rorl $8,%r8d
- addb %bl,%cl
- leaq 1(%rsi),%r10
- movzbl (%rdi,%rcx,1),%edx
- movzbl %r10b,%r10d
- movzbl (%rdi,%r10,1),%eax
- movb %bl,(%rdi,%rcx,1)
- cmpq %r10,%rcx
- movb %dl,(%rdi,%rsi,1)
- jne .Lcmov3
- movq %rbx,%rax
-.Lcmov3:
- addb %bl,%dl
- xorb (%rdi,%rdx,1),%r8b
- rorl $8,%r8d
- addb %al,%cl
- leaq 1(%r10),%rsi
- movzbl (%rdi,%rcx,1),%edx
- movzbl %sil,%esi
- movzbl (%rdi,%rsi,1),%ebx
- movb %al,(%rdi,%rcx,1)
- cmpq %rsi,%rcx
- movb %dl,(%rdi,%r10,1)
- jne .Lcmov4
- movq %rax,%rbx
-.Lcmov4:
- addb %al,%dl
- xorb (%rdi,%rdx,1),%r9b
- rorl $8,%r9d
- addb %bl,%cl
- leaq 1(%rsi),%r10
- movzbl (%rdi,%rcx,1),%edx
- movzbl %r10b,%r10d
- movzbl (%rdi,%r10,1),%eax
- movb %bl,(%rdi,%rcx,1)
- cmpq %r10,%rcx
- movb %dl,(%rdi,%rsi,1)
- jne .Lcmov5
- movq %rbx,%rax
-.Lcmov5:
- addb %bl,%dl
- xorb (%rdi,%rdx,1),%r9b
- rorl $8,%r9d
- addb %al,%cl
- leaq 1(%r10),%rsi
- movzbl (%rdi,%rcx,1),%edx
- movzbl %sil,%esi
- movzbl (%rdi,%rsi,1),%ebx
- movb %al,(%rdi,%rcx,1)
- cmpq %rsi,%rcx
- movb %dl,(%rdi,%r10,1)
- jne .Lcmov6
- movq %rax,%rbx
-.Lcmov6:
- addb %al,%dl
- xorb (%rdi,%rdx,1),%r9b
- rorl $8,%r9d
- addb %bl,%cl
- leaq 1(%rsi),%r10
- movzbl (%rdi,%rcx,1),%edx
- movzbl %r10b,%r10d
- movzbl (%rdi,%r10,1),%eax
- movb %bl,(%rdi,%rcx,1)
- cmpq %r10,%rcx
- movb %dl,(%rdi,%rsi,1)
- jne .Lcmov7
- movq %rbx,%rax
-.Lcmov7:
- addb %bl,%dl
- xorb (%rdi,%rdx,1),%r9b
- rorl $8,%r9d
- leaq -8(%r11),%r11
- movl %r8d,(%r13)
- leaq 8(%r12),%r12
- movl %r9d,4(%r13)
- leaq 8(%r13),%r13
-
- testq $-8,%r11
- jnz .Lcloop8
- cmpq $0,%r11
- jne .Lcloop1
- jmp .Lexit
-.align 16
-.Lcloop1:
- addb %al,%cl
- movzbl %cl,%ecx
- movzbl (%rdi,%rcx,1),%edx
- movb %al,(%rdi,%rcx,1)
- movb %dl,(%rdi,%r10,1)
- addb %al,%dl
- addb $1,%r10b
- movzbl %dl,%edx
- movzbl %r10b,%r10d
- movzbl (%rdi,%rdx,1),%edx
- movzbl (%rdi,%r10,1),%eax
- xorb (%r12),%dl
- leaq 1(%r12),%r12
- movb %dl,(%r13)
- leaq 1(%r13),%r13
- subq $1,%r11
- jnz .Lcloop1
- jmp .Lexit
-
-.align 16
-.Lexit:
- subb $1,%r10b
- movl %r10d,-8(%rdi)
- movl %ecx,-4(%rdi)
-
- movq (%rsp),%r13
- movq 8(%rsp),%r12
- movq 16(%rsp),%rbx
- addq $24,%rsp
-.Lepilogue:
- .byte 0xf3,0xc3
-.size asm_RC4,.-asm_RC4
-.globl asm_RC4_set_key
-.hidden asm_RC4_set_key
-.type asm_RC4_set_key,@function
-.align 16
-asm_RC4_set_key:
- leaq 8(%rdi),%rdi
- leaq (%rdx,%rsi,1),%rdx
- negq %rsi
- movq %rsi,%rcx
- xorl %eax,%eax
- xorq %r9,%r9
- xorq %r10,%r10
- xorq %r11,%r11
-
- movl OPENSSL_ia32cap_P(%rip),%r8d
- btl $20,%r8d
- jc .Lc1stloop
- jmp .Lw1stloop
-
-.align 16
-.Lw1stloop:
- movl %eax,(%rdi,%rax,4)
- addb $1,%al
- jnc .Lw1stloop
-
- xorq %r9,%r9
- xorq %r8,%r8
-.align 16
-.Lw2ndloop:
- movl (%rdi,%r9,4),%r10d
- addb (%rdx,%rsi,1),%r8b
- addb %r10b,%r8b
- addq $1,%rsi
- movl (%rdi,%r8,4),%r11d
- cmovzq %rcx,%rsi
- movl %r10d,(%rdi,%r8,4)
- movl %r11d,(%rdi,%r9,4)
- addb $1,%r9b
- jnc .Lw2ndloop
- jmp .Lexit_key
-
-.align 16
-.Lc1stloop:
- movb %al,(%rdi,%rax,1)
- addb $1,%al
- jnc .Lc1stloop
-
- xorq %r9,%r9
- xorq %r8,%r8
-.align 16
-.Lc2ndloop:
- movb (%rdi,%r9,1),%r10b
- addb (%rdx,%rsi,1),%r8b
- addb %r10b,%r8b
- addq $1,%rsi
- movb (%rdi,%r8,1),%r11b
- jnz .Lcnowrap
- movq %rcx,%rsi
-.Lcnowrap:
- movb %r10b,(%rdi,%r8,1)
- movb %r11b,(%rdi,%r9,1)
- addb $1,%r9b
- jnc .Lc2ndloop
- movl $-1,256(%rdi)
-
-.align 16
-.Lexit_key:
- xorl %eax,%eax
- movl %eax,-8(%rdi)
- movl %eax,-4(%rdi)
- .byte 0xf3,0xc3
-.size asm_RC4_set_key,.-asm_RC4_set_key
-#endif
diff --git a/mac-x86/crypto/rc4/rc4-586.S b/mac-x86/crypto/rc4/rc4-586.S
deleted file mode 100644
index dcddc58..0000000
--- a/mac-x86/crypto/rc4/rc4-586.S
+++ /dev/null
@@ -1,350 +0,0 @@
-#if defined(__i386__)
-.file "rc4-586.S"
-.text
-.globl _asm_RC4
-.private_extern _asm_RC4
-.align 4
-_asm_RC4:
-L_asm_RC4_begin:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
- movl 20(%esp),%edi
- movl 24(%esp),%edx
- movl 28(%esp),%esi
- movl 32(%esp),%ebp
- xorl %eax,%eax
- xorl %ebx,%ebx
- cmpl $0,%edx
- je L000abort
- movb (%edi),%al
- movb 4(%edi),%bl
- addl $8,%edi
- leal (%esi,%edx,1),%ecx
- subl %esi,%ebp
- movl %ecx,24(%esp)
- incb %al
- cmpl $-1,256(%edi)
- je L001RC4_CHAR
- movl (%edi,%eax,4),%ecx
- andl $-4,%edx
- jz L002loop1
- movl %ebp,32(%esp)
- testl $-8,%edx
- jz L003go4loop4
- call L004PIC_me_up
-L004PIC_me_up:
- popl %ebp
- movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L004PIC_me_up(%ebp),%ebp
- btl $26,(%ebp)
- jnc L003go4loop4
- movl 32(%esp),%ebp
- andl $-8,%edx
- leal -8(%esi,%edx,1),%edx
- movl %edx,-4(%edi)
- addb %cl,%bl
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- movq (%esi),%mm0
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm2
- jmp L005loop_mmx_enter
-.align 4,0x90
-L006loop_mmx:
- addb %cl,%bl
- psllq $56,%mm1
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- pxor %mm1,%mm2
- movq (%esi),%mm0
- movq %mm2,-8(%ebp,%esi,1)
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm2
-L005loop_mmx_enter:
- addb %cl,%bl
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- pxor %mm0,%mm2
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm1
- addb %cl,%bl
- psllq $8,%mm1
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- pxor %mm1,%mm2
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm1
- addb %cl,%bl
- psllq $16,%mm1
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- pxor %mm1,%mm2
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm1
- addb %cl,%bl
- psllq $24,%mm1
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- pxor %mm1,%mm2
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm1
- addb %cl,%bl
- psllq $32,%mm1
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- pxor %mm1,%mm2
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm1
- addb %cl,%bl
- psllq $40,%mm1
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- pxor %mm1,%mm2
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm1
- addb %cl,%bl
- psllq $48,%mm1
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- incl %eax
- addl %ecx,%edx
- movzbl %al,%eax
- movzbl %dl,%edx
- pxor %mm1,%mm2
- movl (%edi,%eax,4),%ecx
- movd (%edi,%edx,4),%mm1
- movl %ebx,%edx
- xorl %ebx,%ebx
- movb %dl,%bl
- cmpl -4(%edi),%esi
- leal 8(%esi),%esi
- jb L006loop_mmx
- psllq $56,%mm1
- pxor %mm1,%mm2
- movq %mm2,-8(%ebp,%esi,1)
- emms
- cmpl 24(%esp),%esi
- je L007done
- jmp L002loop1
-.align 4,0x90
-L003go4loop4:
- leal -4(%esi,%edx,1),%edx
- movl %edx,28(%esp)
-L008loop4:
- addb %cl,%bl
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- addl %ecx,%edx
- incb %al
- andl $255,%edx
- movl (%edi,%eax,4),%ecx
- movl (%edi,%edx,4),%ebp
- addb %cl,%bl
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- addl %ecx,%edx
- incb %al
- andl $255,%edx
- rorl $8,%ebp
- movl (%edi,%eax,4),%ecx
- orl (%edi,%edx,4),%ebp
- addb %cl,%bl
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- addl %ecx,%edx
- incb %al
- andl $255,%edx
- rorl $8,%ebp
- movl (%edi,%eax,4),%ecx
- orl (%edi,%edx,4),%ebp
- addb %cl,%bl
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- addl %ecx,%edx
- incb %al
- andl $255,%edx
- rorl $8,%ebp
- movl 32(%esp),%ecx
- orl (%edi,%edx,4),%ebp
- rorl $8,%ebp
- xorl (%esi),%ebp
- cmpl 28(%esp),%esi
- movl %ebp,(%ecx,%esi,1)
- leal 4(%esi),%esi
- movl (%edi,%eax,4),%ecx
- jb L008loop4
- cmpl 24(%esp),%esi
- je L007done
- movl 32(%esp),%ebp
-.align 4,0x90
-L002loop1:
- addb %cl,%bl
- movl (%edi,%ebx,4),%edx
- movl %ecx,(%edi,%ebx,4)
- movl %edx,(%edi,%eax,4)
- addl %ecx,%edx
- incb %al
- andl $255,%edx
- movl (%edi,%edx,4),%edx
- xorb (%esi),%dl
- leal 1(%esi),%esi
- movl (%edi,%eax,4),%ecx
- cmpl 24(%esp),%esi
- movb %dl,-1(%ebp,%esi,1)
- jb L002loop1
- jmp L007done
-.align 4,0x90
-L001RC4_CHAR:
- movzbl (%edi,%eax,1),%ecx
-L009cloop1:
- addb %cl,%bl
- movzbl (%edi,%ebx,1),%edx
- movb %cl,(%edi,%ebx,1)
- movb %dl,(%edi,%eax,1)
- addb %cl,%dl
- movzbl (%edi,%edx,1),%edx
- addb $1,%al
- xorb (%esi),%dl
- leal 1(%esi),%esi
- movzbl (%edi,%eax,1),%ecx
- cmpl 24(%esp),%esi
- movb %dl,-1(%ebp,%esi,1)
- jb L009cloop1
-L007done:
- decb %al
- movl %ebx,-4(%edi)
- movb %al,-8(%edi)
-L000abort:
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.globl _asm_RC4_set_key
-.private_extern _asm_RC4_set_key
-.align 4
-_asm_RC4_set_key:
-L_asm_RC4_set_key_begin:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
- movl 20(%esp),%edi
- movl 24(%esp),%ebp
- movl 28(%esp),%esi
- call L010PIC_me_up
-L010PIC_me_up:
- popl %edx
- movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%edx),%edx
- leal 8(%edi),%edi
- leal (%esi,%ebp,1),%esi
- negl %ebp
- xorl %eax,%eax
- movl %ebp,-4(%edi)
- btl $20,(%edx)
- jc L011c1stloop
-.align 4,0x90
-L012w1stloop:
- movl %eax,(%edi,%eax,4)
- addb $1,%al
- jnc L012w1stloop
- xorl %ecx,%ecx
- xorl %edx,%edx
-.align 4,0x90
-L013w2ndloop:
- movl (%edi,%ecx,4),%eax
- addb (%esi,%ebp,1),%dl
- addb %al,%dl
- addl $1,%ebp
- movl (%edi,%edx,4),%ebx
- jnz L014wnowrap
- movl -4(%edi),%ebp
-L014wnowrap:
- movl %eax,(%edi,%edx,4)
- movl %ebx,(%edi,%ecx,4)
- addb $1,%cl
- jnc L013w2ndloop
- jmp L015exit
-.align 4,0x90
-L011c1stloop:
- movb %al,(%edi,%eax,1)
- addb $1,%al
- jnc L011c1stloop
- xorl %ecx,%ecx
- xorl %edx,%edx
- xorl %ebx,%ebx
-.align 4,0x90
-L016c2ndloop:
- movb (%edi,%ecx,1),%al
- addb (%esi,%ebp,1),%dl
- addb %al,%dl
- addl $1,%ebp
- movb (%edi,%edx,1),%bl
- jnz L017cnowrap
- movl -4(%edi),%ebp
-L017cnowrap:
- movb %al,(%edi,%edx,1)
- movb %bl,(%edi,%ecx,1)
- addb $1,%cl
- jnc L016c2ndloop
- movl $-1,256(%edi)
-L015exit:
- xorl %eax,%eax
- movl %eax,-8(%edi)
- movl %eax,-4(%edi)
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.section __IMPORT,__pointers,non_lazy_symbol_pointers
-L_OPENSSL_ia32cap_P$non_lazy_ptr:
-.indirect_symbol _OPENSSL_ia32cap_P
-.long 0
-#endif
diff --git a/mac-x86_64/crypto/ec/p256-x86_64-asm.S b/mac-x86_64/crypto/ec/p256-x86_64-asm.S
index 1cd0cc3..97fb75a 100644
--- a/mac-x86_64/crypto/ec/p256-x86_64-asm.S
+++ b/mac-x86_64/crypto/ec/p256-x86_64-asm.S
@@ -23,6 +23,7 @@
pushq %r13
movq 0(%rsi),%r8
+ xorq %r13,%r13
movq 8(%rsi),%r9
addq %r8,%r8
movq 16(%rsi),%r10
@@ -33,7 +34,7 @@
adcq %r10,%r10
adcq %r11,%r11
movq %r9,%rdx
- sbbq %r13,%r13
+ adcq $0,%r13
subq 0(%rsi),%r8
movq %r10,%rcx
@@ -41,14 +42,14 @@
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -624,6 +625,8 @@
movq %r9,%rsi
adcq $0,%rdx
+
+
subq $-1,%r8
movq %r10,%rax
sbbq %r12,%r9
@@ -764,13 +767,14 @@
.p2align 5
__ecp_nistz256_add_toq:
+ xorq %r11,%r11
addq 0(%rbx),%r12
adcq 8(%rbx),%r13
movq %r12,%rax
adcq 16(%rbx),%r8
adcq 24(%rbx),%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -778,14 +782,14 @@
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
- cmovzq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -853,13 +857,14 @@
.p2align 5
__ecp_nistz256_mul_by_2q:
+ xorq %r11,%r11
addq %r12,%r12
adcq %r13,%r13
movq %r12,%rax
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -867,14 +872,14 @@
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
- cmovzq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -1106,16 +1111,14 @@
movq %rdx,%rsi
movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5
@@ -1127,14 +1130,14 @@
movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp)
- por %xmm0,%xmm1
-.byte 102,72,15,110,199
+ movdqu 64(%rsi),%xmm0
+ movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp)
- por %xmm2,%xmm3
por %xmm4,%xmm5
pxor %xmm4,%xmm4
- por %xmm1,%xmm3
+ por %xmm0,%xmm1
+.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp)
@@ -1145,8 +1148,8 @@
call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5
- pshufd $0xb1,%xmm3,%xmm4
- por %xmm3,%xmm4
+ pshufd $0xb1,%xmm1,%xmm4
+ por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4
@@ -1329,6 +1332,7 @@
+ xorq %r11,%r11
addq %r12,%r12
leaq 96(%rsp),%rsi
adcq %r13,%r13
@@ -1336,7 +1340,7 @@
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1344,15 +1348,15 @@
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
@@ -1507,16 +1511,14 @@
movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5
@@ -1634,6 +1636,7 @@
+ xorq %r11,%r11
addq %r12,%r12
leaq 192(%rsp),%rsi
adcq %r13,%r13
@@ -1641,7 +1644,7 @@
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1649,15 +1652,15 @@
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
diff --git a/mac-x86_64/crypto/rc4/rc4-x86_64.S b/mac-x86_64/crypto/rc4/rc4-x86_64.S
deleted file mode 100644
index 7808184..0000000
--- a/mac-x86_64/crypto/rc4/rc4-x86_64.S
+++ /dev/null
@@ -1,595 +0,0 @@
-#if defined(__x86_64__)
-.text
-
-
-.globl _asm_RC4
-.private_extern _asm_RC4
-
-.p2align 4
-_asm_RC4:
- orq %rsi,%rsi
- jne L$entry
- .byte 0xf3,0xc3
-L$entry:
- pushq %rbx
- pushq %r12
- pushq %r13
-L$prologue:
- movq %rsi,%r11
- movq %rdx,%r12
- movq %rcx,%r13
- xorq %r10,%r10
- xorq %rcx,%rcx
-
- leaq 8(%rdi),%rdi
- movb -8(%rdi),%r10b
- movb -4(%rdi),%cl
- cmpl $-1,256(%rdi)
- je L$RC4_CHAR
- movl _OPENSSL_ia32cap_P(%rip),%r8d
- xorq %rbx,%rbx
- incb %r10b
- subq %r10,%rbx
- subq %r12,%r13
- movl (%rdi,%r10,4),%eax
- testq $-16,%r11
- jz L$loop1
- btl $30,%r8d
- jc L$intel
- andq $7,%rbx
- leaq 1(%r10),%rsi
- jz L$oop8
- subq %rbx,%r11
-L$oop8_warmup:
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- movl %edx,(%rdi,%r10,4)
- addb %dl,%al
- incb %r10b
- movl (%rdi,%rax,4),%edx
- movl (%rdi,%r10,4),%eax
- xorb (%r12),%dl
- movb %dl,(%r12,%r13,1)
- leaq 1(%r12),%r12
- decq %rbx
- jnz L$oop8_warmup
-
- leaq 1(%r10),%rsi
- jmp L$oop8
-.p2align 4
-L$oop8:
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- movl 0(%rdi,%rsi,4),%ebx
- rorq $8,%r8
- movl %edx,0(%rdi,%r10,4)
- addb %al,%dl
- movb (%rdi,%rdx,4),%r8b
- addb %bl,%cl
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- movl 4(%rdi,%rsi,4),%eax
- rorq $8,%r8
- movl %edx,4(%rdi,%r10,4)
- addb %bl,%dl
- movb (%rdi,%rdx,4),%r8b
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- movl 8(%rdi,%rsi,4),%ebx
- rorq $8,%r8
- movl %edx,8(%rdi,%r10,4)
- addb %al,%dl
- movb (%rdi,%rdx,4),%r8b
- addb %bl,%cl
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- movl 12(%rdi,%rsi,4),%eax
- rorq $8,%r8
- movl %edx,12(%rdi,%r10,4)
- addb %bl,%dl
- movb (%rdi,%rdx,4),%r8b
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- movl 16(%rdi,%rsi,4),%ebx
- rorq $8,%r8
- movl %edx,16(%rdi,%r10,4)
- addb %al,%dl
- movb (%rdi,%rdx,4),%r8b
- addb %bl,%cl
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- movl 20(%rdi,%rsi,4),%eax
- rorq $8,%r8
- movl %edx,20(%rdi,%r10,4)
- addb %bl,%dl
- movb (%rdi,%rdx,4),%r8b
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- movl 24(%rdi,%rsi,4),%ebx
- rorq $8,%r8
- movl %edx,24(%rdi,%r10,4)
- addb %al,%dl
- movb (%rdi,%rdx,4),%r8b
- addb $8,%sil
- addb %bl,%cl
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- movl -4(%rdi,%rsi,4),%eax
- rorq $8,%r8
- movl %edx,28(%rdi,%r10,4)
- addb %bl,%dl
- movb (%rdi,%rdx,4),%r8b
- addb $8,%r10b
- rorq $8,%r8
- subq $8,%r11
-
- xorq (%r12),%r8
- movq %r8,(%r12,%r13,1)
- leaq 8(%r12),%r12
-
- testq $-8,%r11
- jnz L$oop8
- cmpq $0,%r11
- jne L$loop1
- jmp L$exit
-
-.p2align 4
-L$intel:
- testq $-32,%r11
- jz L$loop1
- andq $15,%rbx
- jz L$oop16_is_hot
- subq %rbx,%r11
-L$oop16_warmup:
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- movl %edx,(%rdi,%r10,4)
- addb %dl,%al
- incb %r10b
- movl (%rdi,%rax,4),%edx
- movl (%rdi,%r10,4),%eax
- xorb (%r12),%dl
- movb %dl,(%r12,%r13,1)
- leaq 1(%r12),%r12
- decq %rbx
- jnz L$oop16_warmup
-
- movq %rcx,%rbx
- xorq %rcx,%rcx
- movb %bl,%cl
-
-L$oop16_is_hot:
- leaq (%rdi,%r10,4),%rsi
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- pxor %xmm0,%xmm0
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 4(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,0(%rsi)
- addb %bl,%cl
- pinsrw $0,(%rdi,%rax,4),%xmm0
- jmp L$oop16_enter
-.p2align 4
-L$oop16:
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- pxor %xmm0,%xmm2
- psllq $8,%xmm1
- pxor %xmm0,%xmm0
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 4(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,0(%rsi)
- pxor %xmm1,%xmm2
- addb %bl,%cl
- pinsrw $0,(%rdi,%rax,4),%xmm0
- movdqu %xmm2,(%r12,%r13,1)
- leaq 16(%r12),%r12
-L$oop16_enter:
- movl (%rdi,%rcx,4),%edx
- pxor %xmm1,%xmm1
- movl %ebx,(%rdi,%rcx,4)
- addb %dl,%bl
- movl 8(%rsi),%eax
- movzbl %bl,%ebx
- movl %edx,4(%rsi)
- addb %al,%cl
- pinsrw $0,(%rdi,%rbx,4),%xmm1
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 12(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,8(%rsi)
- addb %bl,%cl
- pinsrw $1,(%rdi,%rax,4),%xmm0
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- addb %dl,%bl
- movl 16(%rsi),%eax
- movzbl %bl,%ebx
- movl %edx,12(%rsi)
- addb %al,%cl
- pinsrw $1,(%rdi,%rbx,4),%xmm1
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 20(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,16(%rsi)
- addb %bl,%cl
- pinsrw $2,(%rdi,%rax,4),%xmm0
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- addb %dl,%bl
- movl 24(%rsi),%eax
- movzbl %bl,%ebx
- movl %edx,20(%rsi)
- addb %al,%cl
- pinsrw $2,(%rdi,%rbx,4),%xmm1
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 28(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,24(%rsi)
- addb %bl,%cl
- pinsrw $3,(%rdi,%rax,4),%xmm0
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- addb %dl,%bl
- movl 32(%rsi),%eax
- movzbl %bl,%ebx
- movl %edx,28(%rsi)
- addb %al,%cl
- pinsrw $3,(%rdi,%rbx,4),%xmm1
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 36(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,32(%rsi)
- addb %bl,%cl
- pinsrw $4,(%rdi,%rax,4),%xmm0
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- addb %dl,%bl
- movl 40(%rsi),%eax
- movzbl %bl,%ebx
- movl %edx,36(%rsi)
- addb %al,%cl
- pinsrw $4,(%rdi,%rbx,4),%xmm1
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 44(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,40(%rsi)
- addb %bl,%cl
- pinsrw $5,(%rdi,%rax,4),%xmm0
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- addb %dl,%bl
- movl 48(%rsi),%eax
- movzbl %bl,%ebx
- movl %edx,44(%rsi)
- addb %al,%cl
- pinsrw $5,(%rdi,%rbx,4),%xmm1
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 52(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,48(%rsi)
- addb %bl,%cl
- pinsrw $6,(%rdi,%rax,4),%xmm0
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- addb %dl,%bl
- movl 56(%rsi),%eax
- movzbl %bl,%ebx
- movl %edx,52(%rsi)
- addb %al,%cl
- pinsrw $6,(%rdi,%rbx,4),%xmm1
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- addb %dl,%al
- movl 60(%rsi),%ebx
- movzbl %al,%eax
- movl %edx,56(%rsi)
- addb %bl,%cl
- pinsrw $7,(%rdi,%rax,4),%xmm0
- addb $16,%r10b
- movdqu (%r12),%xmm2
- movl (%rdi,%rcx,4),%edx
- movl %ebx,(%rdi,%rcx,4)
- addb %dl,%bl
- movzbl %bl,%ebx
- movl %edx,60(%rsi)
- leaq (%rdi,%r10,4),%rsi
- pinsrw $7,(%rdi,%rbx,4),%xmm1
- movl (%rsi),%eax
- movq %rcx,%rbx
- xorq %rcx,%rcx
- subq $16,%r11
- movb %bl,%cl
- testq $-16,%r11
- jnz L$oop16
-
- psllq $8,%xmm1
- pxor %xmm0,%xmm2
- pxor %xmm1,%xmm2
- movdqu %xmm2,(%r12,%r13,1)
- leaq 16(%r12),%r12
-
- cmpq $0,%r11
- jne L$loop1
- jmp L$exit
-
-.p2align 4
-L$loop1:
- addb %al,%cl
- movl (%rdi,%rcx,4),%edx
- movl %eax,(%rdi,%rcx,4)
- movl %edx,(%rdi,%r10,4)
- addb %dl,%al
- incb %r10b
- movl (%rdi,%rax,4),%edx
- movl (%rdi,%r10,4),%eax
- xorb (%r12),%dl
- movb %dl,(%r12,%r13,1)
- leaq 1(%r12),%r12
- decq %r11
- jnz L$loop1
- jmp L$exit
-
-.p2align 4
-L$RC4_CHAR:
- addb $1,%r10b
- movzbl (%rdi,%r10,1),%eax
- testq $-8,%r11
- jz L$cloop1
- jmp L$cloop8
-.p2align 4
-L$cloop8:
- movl (%r12),%r8d
- movl 4(%r12),%r9d
- addb %al,%cl
- leaq 1(%r10),%rsi
- movzbl (%rdi,%rcx,1),%edx
- movzbl %sil,%esi
- movzbl (%rdi,%rsi,1),%ebx
- movb %al,(%rdi,%rcx,1)
- cmpq %rsi,%rcx
- movb %dl,(%rdi,%r10,1)
- jne L$cmov0
- movq %rax,%rbx
-L$cmov0:
- addb %al,%dl
- xorb (%rdi,%rdx,1),%r8b
- rorl $8,%r8d
- addb %bl,%cl
- leaq 1(%rsi),%r10
- movzbl (%rdi,%rcx,1),%edx
- movzbl %r10b,%r10d
- movzbl (%rdi,%r10,1),%eax
- movb %bl,(%rdi,%rcx,1)
- cmpq %r10,%rcx
- movb %dl,(%rdi,%rsi,1)
- jne L$cmov1
- movq %rbx,%rax
-L$cmov1:
- addb %bl,%dl
- xorb (%rdi,%rdx,1),%r8b
- rorl $8,%r8d
- addb %al,%cl
- leaq 1(%r10),%rsi
- movzbl (%rdi,%rcx,1),%edx
- movzbl %sil,%esi
- movzbl (%rdi,%rsi,1),%ebx
- movb %al,(%rdi,%rcx,1)
- cmpq %rsi,%rcx
- movb %dl,(%rdi,%r10,1)
- jne L$cmov2
- movq %rax,%rbx
-L$cmov2:
- addb %al,%dl
- xorb (%rdi,%rdx,1),%r8b
- rorl $8,%r8d
- addb %bl,%cl
- leaq 1(%rsi),%r10
- movzbl (%rdi,%rcx,1),%edx
- movzbl %r10b,%r10d
- movzbl (%rdi,%r10,1),%eax
- movb %bl,(%rdi,%rcx,1)
- cmpq %r10,%rcx
- movb %dl,(%rdi,%rsi,1)
- jne L$cmov3
- movq %rbx,%rax
-L$cmov3:
- addb %bl,%dl
- xorb (%rdi,%rdx,1),%r8b
- rorl $8,%r8d
- addb %al,%cl
- leaq 1(%r10),%rsi
- movzbl (%rdi,%rcx,1),%edx
- movzbl %sil,%esi
- movzbl (%rdi,%rsi,1),%ebx
- movb %al,(%rdi,%rcx,1)
- cmpq %rsi,%rcx
- movb %dl,(%rdi,%r10,1)
- jne L$cmov4
- movq %rax,%rbx
-L$cmov4:
- addb %al,%dl
- xorb (%rdi,%rdx,1),%r9b
- rorl $8,%r9d
- addb %bl,%cl
- leaq 1(%rsi),%r10
- movzbl (%rdi,%rcx,1),%edx
- movzbl %r10b,%r10d
- movzbl (%rdi,%r10,1),%eax
- movb %bl,(%rdi,%rcx,1)
- cmpq %r10,%rcx
- movb %dl,(%rdi,%rsi,1)
- jne L$cmov5
- movq %rbx,%rax
-L$cmov5:
- addb %bl,%dl
- xorb (%rdi,%rdx,1),%r9b
- rorl $8,%r9d
- addb %al,%cl
- leaq 1(%r10),%rsi
- movzbl (%rdi,%rcx,1),%edx
- movzbl %sil,%esi
- movzbl (%rdi,%rsi,1),%ebx
- movb %al,(%rdi,%rcx,1)
- cmpq %rsi,%rcx
- movb %dl,(%rdi,%r10,1)
- jne L$cmov6
- movq %rax,%rbx
-L$cmov6:
- addb %al,%dl
- xorb (%rdi,%rdx,1),%r9b
- rorl $8,%r9d
- addb %bl,%cl
- leaq 1(%rsi),%r10
- movzbl (%rdi,%rcx,1),%edx
- movzbl %r10b,%r10d
- movzbl (%rdi,%r10,1),%eax
- movb %bl,(%rdi,%rcx,1)
- cmpq %r10,%rcx
- movb %dl,(%rdi,%rsi,1)
- jne L$cmov7
- movq %rbx,%rax
-L$cmov7:
- addb %bl,%dl
- xorb (%rdi,%rdx,1),%r9b
- rorl $8,%r9d
- leaq -8(%r11),%r11
- movl %r8d,(%r13)
- leaq 8(%r12),%r12
- movl %r9d,4(%r13)
- leaq 8(%r13),%r13
-
- testq $-8,%r11
- jnz L$cloop8
- cmpq $0,%r11
- jne L$cloop1
- jmp L$exit
-.p2align 4
-L$cloop1:
- addb %al,%cl
- movzbl %cl,%ecx
- movzbl (%rdi,%rcx,1),%edx
- movb %al,(%rdi,%rcx,1)
- movb %dl,(%rdi,%r10,1)
- addb %al,%dl
- addb $1,%r10b
- movzbl %dl,%edx
- movzbl %r10b,%r10d
- movzbl (%rdi,%rdx,1),%edx
- movzbl (%rdi,%r10,1),%eax
- xorb (%r12),%dl
- leaq 1(%r12),%r12
- movb %dl,(%r13)
- leaq 1(%r13),%r13
- subq $1,%r11
- jnz L$cloop1
- jmp L$exit
-
-.p2align 4
-L$exit:
- subb $1,%r10b
- movl %r10d,-8(%rdi)
- movl %ecx,-4(%rdi)
-
- movq (%rsp),%r13
- movq 8(%rsp),%r12
- movq 16(%rsp),%rbx
- addq $24,%rsp
-L$epilogue:
- .byte 0xf3,0xc3
-
-.globl _asm_RC4_set_key
-.private_extern _asm_RC4_set_key
-
-.p2align 4
-_asm_RC4_set_key:
- leaq 8(%rdi),%rdi
- leaq (%rdx,%rsi,1),%rdx
- negq %rsi
- movq %rsi,%rcx
- xorl %eax,%eax
- xorq %r9,%r9
- xorq %r10,%r10
- xorq %r11,%r11
-
- movl _OPENSSL_ia32cap_P(%rip),%r8d
- btl $20,%r8d
- jc L$c1stloop
- jmp L$w1stloop
-
-.p2align 4
-L$w1stloop:
- movl %eax,(%rdi,%rax,4)
- addb $1,%al
- jnc L$w1stloop
-
- xorq %r9,%r9
- xorq %r8,%r8
-.p2align 4
-L$w2ndloop:
- movl (%rdi,%r9,4),%r10d
- addb (%rdx,%rsi,1),%r8b
- addb %r10b,%r8b
- addq $1,%rsi
- movl (%rdi,%r8,4),%r11d
- cmovzq %rcx,%rsi
- movl %r10d,(%rdi,%r8,4)
- movl %r11d,(%rdi,%r9,4)
- addb $1,%r9b
- jnc L$w2ndloop
- jmp L$exit_key
-
-.p2align 4
-L$c1stloop:
- movb %al,(%rdi,%rax,1)
- addb $1,%al
- jnc L$c1stloop
-
- xorq %r9,%r9
- xorq %r8,%r8
-.p2align 4
-L$c2ndloop:
- movb (%rdi,%r9,1),%r10b
- addb (%rdx,%rsi,1),%r8b
- addb %r10b,%r8b
- addq $1,%rsi
- movb (%rdi,%r8,1),%r11b
- jnz L$cnowrap
- movq %rcx,%rsi
-L$cnowrap:
- movb %r10b,(%rdi,%r8,1)
- movb %r11b,(%rdi,%r9,1)
- addb $1,%r9b
- jnc L$c2ndloop
- movl $-1,256(%rdi)
-
-.p2align 4
-L$exit_key:
- xorl %eax,%eax
- movl %eax,-8(%rdi)
- movl %eax,-4(%rdi)
- .byte 0xf3,0xc3
-
-#endif
diff --git a/win-x86/crypto/rc4/rc4-586.asm b/win-x86/crypto/rc4/rc4-586.asm
deleted file mode 100644
index 0bab2be..0000000
--- a/win-x86/crypto/rc4/rc4-586.asm
+++ /dev/null
@@ -1,353 +0,0 @@
-%ifidn __OUTPUT_FORMAT__,obj
-section code use32 class=code align=64
-%elifidn __OUTPUT_FORMAT__,win32
-%ifdef __YASM_VERSION_ID__
-%if __YASM_VERSION_ID__ < 01010000h
-%error yasm version 1.1.0 or later needed.
-%endif
-; Yasm automatically includes .00 and complains about redefining it.
-; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
-%else
-$@feat.00 equ 1
-%endif
-section .text code align=64
-%else
-section .text code
-%endif
-;extern _OPENSSL_ia32cap_P
-global _asm_RC4
-align 16
-_asm_RC4:
-L$_asm_RC4_begin:
- push ebp
- push ebx
- push esi
- push edi
- mov edi,DWORD [20+esp]
- mov edx,DWORD [24+esp]
- mov esi,DWORD [28+esp]
- mov ebp,DWORD [32+esp]
- xor eax,eax
- xor ebx,ebx
- cmp edx,0
- je NEAR L$000abort
- mov al,BYTE [edi]
- mov bl,BYTE [4+edi]
- add edi,8
- lea ecx,[edx*1+esi]
- sub ebp,esi
- mov DWORD [24+esp],ecx
- inc al
- cmp DWORD [256+edi],-1
- je NEAR L$001RC4_CHAR
- mov ecx,DWORD [eax*4+edi]
- and edx,-4
- jz NEAR L$002loop1
- mov DWORD [32+esp],ebp
- test edx,-8
- jz NEAR L$003go4loop4
- lea ebp,[_OPENSSL_ia32cap_P]
- bt DWORD [ebp],26
- jnc NEAR L$003go4loop4
- mov ebp,DWORD [32+esp]
- and edx,-8
- lea edx,[edx*1+esi-8]
- mov DWORD [edi-4],edx
- add bl,cl
- mov edx,DWORD [ebx*4+edi]
- mov DWORD [ebx*4+edi],ecx
- mov DWORD [eax*4+edi],edx
- inc eax
- add edx,ecx
- movzx eax,al
- movzx edx,dl
- movq mm0,[esi]
- mov ecx,DWORD [eax*4+edi]
- movd mm2,DWORD [edx*4+edi]
- jmp NEAR L$004loop_mmx_enter
-align 16
-L$005loop_mmx:
- add bl,cl
- psllq mm1,56
- mov edx,DWORD [ebx*4+edi]
- mov DWORD [ebx*4+edi],ecx
- mov DWORD [eax*4+edi],edx
- inc eax
- add edx,ecx
- movzx eax,al
- movzx edx,dl
- pxor mm2,mm1
- movq mm0,[esi]
- movq [esi*1+ebp-8],mm2
- mov ecx,DWORD [eax*4+edi]
- movd mm2,DWORD [edx*4+edi]
-L$004loop_mmx_enter:
- add bl,cl
- mov edx,DWORD [ebx*4+edi]
- mov DWORD [ebx*4+edi],ecx
- mov DWORD [eax*4+edi],edx
- inc eax
- add edx,ecx
- movzx eax,al
- movzx edx,dl
- pxor mm2,mm0
- mov ecx,DWORD [eax*4+edi]
- movd mm1,DWORD [edx*4+edi]
- add bl,cl
- psllq mm1,8
- mov edx,DWORD [ebx*4+edi]
- mov DWORD [ebx*4+edi],ecx
- mov DWORD [eax*4+edi],edx
- inc eax
- add edx,ecx
- movzx eax,al
- movzx edx,dl
- pxor mm2,mm1
- mov ecx,DWORD [eax*4+edi]
- movd mm1,DWORD [edx*4+edi]
- add bl,cl
- psllq mm1,16
- mov edx,DWORD [ebx*4+edi]
- mov DWORD [ebx*4+edi],ecx
- mov DWORD [eax*4+edi],edx
- inc eax
- add edx,ecx
- movzx eax,al
- movzx edx,dl
- pxor mm2,mm1
- mov ecx,DWORD [eax*4+edi]
- movd mm1,DWORD [edx*4+edi]
- add bl,cl
- psllq mm1,24
- mov edx,DWORD [ebx*4+edi]
- mov DWORD [ebx*4+edi],ecx
- mov DWORD [eax*4+edi],edx
- inc eax
- add edx,ecx
- movzx eax,al
- movzx edx,dl
- pxor mm2,mm1
- mov ecx,DWORD [eax*4+edi]
- movd mm1,DWORD [edx*4+edi]
- add bl,cl
- psllq mm1,32
- mov edx,DWORD [ebx*4+edi]
- mov DWORD [ebx*4+edi],ecx
- mov DWORD [eax*4+edi],edx
- inc eax
- add edx,ecx
- movzx eax,al
- movzx edx,dl
- pxor mm2,mm1
- mov ecx,DWORD [eax*4+edi]
- movd mm1,DWORD [edx*4+edi]
- add bl,cl
- psllq mm1,40
- mov edx,DWORD [ebx*4+edi]
- mov DWORD [ebx*4+edi],ecx
- mov DWORD [eax*4+edi],edx
- inc eax
- add edx,ecx
- movzx eax,al
- movzx edx,dl
- pxor mm2,mm1
- mov ecx,DWORD [eax*4+edi]
- movd mm1,DWORD [edx*4+edi]
- add bl,cl
- psllq mm1,48
- mov edx,DWORD [ebx*4+edi]
- mov DWORD [ebx*4+edi],ecx
- mov DWORD [eax*4+edi],edx
- inc eax
- add edx,ecx
- movzx eax,al
- movzx edx,dl
- pxor mm2,mm1
- mov ecx,DWORD [eax*4+edi]
- movd mm1,DWORD [edx*4+edi]
- mov edx,ebx
- xor ebx,ebx
- mov bl,dl
- cmp esi,DWORD [edi-4]
- lea esi,[8+esi]
- jb NEAR L$005loop_mmx
- psllq mm1,56
- pxor mm2,mm1
- movq [esi*1+ebp-8],mm2
- emms
- cmp esi,DWORD [24+esp]
- je NEAR L$006done
- jmp NEAR L$002loop1
-align 16
-L$003go4loop4:
- lea edx,[edx*1+esi-4]
- mov DWORD [28+esp],edx
-L$007loop4:
- add bl,cl
- mov edx,DWORD [ebx*4+edi]
- mov DWORD [ebx*4+edi],ecx
- mov DWORD [eax*4+edi],edx
- add edx,ecx
- inc al
- and edx,255
- mov ecx,DWORD [eax*4+edi]
- mov ebp,DWORD [edx*4+edi]
- add bl,cl
- mov edx,DWORD [ebx*4+edi]
- mov DWORD [ebx*4+edi],ecx
- mov DWORD [eax*4+edi],edx
- add edx,ecx
- inc al
- and edx,255
- ror ebp,8
- mov ecx,DWORD [eax*4+edi]
- or ebp,DWORD [edx*4+edi]
- add bl,cl
- mov edx,DWORD [ebx*4+edi]
- mov DWORD [ebx*4+edi],ecx
- mov DWORD [eax*4+edi],edx
- add edx,ecx
- inc al
- and edx,255
- ror ebp,8
- mov ecx,DWORD [eax*4+edi]
- or ebp,DWORD [edx*4+edi]
- add bl,cl
- mov edx,DWORD [ebx*4+edi]
- mov DWORD [ebx*4+edi],ecx
- mov DWORD [eax*4+edi],edx
- add edx,ecx
- inc al
- and edx,255
- ror ebp,8
- mov ecx,DWORD [32+esp]
- or ebp,DWORD [edx*4+edi]
- ror ebp,8
- xor ebp,DWORD [esi]
- cmp esi,DWORD [28+esp]
- mov DWORD [esi*1+ecx],ebp
- lea esi,[4+esi]
- mov ecx,DWORD [eax*4+edi]
- jb NEAR L$007loop4
- cmp esi,DWORD [24+esp]
- je NEAR L$006done
- mov ebp,DWORD [32+esp]
-align 16
-L$002loop1:
- add bl,cl
- mov edx,DWORD [ebx*4+edi]
- mov DWORD [ebx*4+edi],ecx
- mov DWORD [eax*4+edi],edx
- add edx,ecx
- inc al
- and edx,255
- mov edx,DWORD [edx*4+edi]
- xor dl,BYTE [esi]
- lea esi,[1+esi]
- mov ecx,DWORD [eax*4+edi]
- cmp esi,DWORD [24+esp]
- mov BYTE [esi*1+ebp-1],dl
- jb NEAR L$002loop1
- jmp NEAR L$006done
-align 16
-L$001RC4_CHAR:
- movzx ecx,BYTE [eax*1+edi]
-L$008cloop1:
- add bl,cl
- movzx edx,BYTE [ebx*1+edi]
- mov BYTE [ebx*1+edi],cl
- mov BYTE [eax*1+edi],dl
- add dl,cl
- movzx edx,BYTE [edx*1+edi]
- add al,1
- xor dl,BYTE [esi]
- lea esi,[1+esi]
- movzx ecx,BYTE [eax*1+edi]
- cmp esi,DWORD [24+esp]
- mov BYTE [esi*1+ebp-1],dl
- jb NEAR L$008cloop1
-L$006done:
- dec al
- mov DWORD [edi-4],ebx
- mov BYTE [edi-8],al
-L$000abort:
- pop edi
- pop esi
- pop ebx
- pop ebp
- ret
-global _asm_RC4_set_key
-align 16
-_asm_RC4_set_key:
-L$_asm_RC4_set_key_begin:
- push ebp
- push ebx
- push esi
- push edi
- mov edi,DWORD [20+esp]
- mov ebp,DWORD [24+esp]
- mov esi,DWORD [28+esp]
- lea edx,[_OPENSSL_ia32cap_P]
- lea edi,[8+edi]
- lea esi,[ebp*1+esi]
- neg ebp
- xor eax,eax
- mov DWORD [edi-4],ebp
- bt DWORD [edx],20
- jc NEAR L$009c1stloop
-align 16
-L$010w1stloop:
- mov DWORD [eax*4+edi],eax
- add al,1
- jnc NEAR L$010w1stloop
- xor ecx,ecx
- xor edx,edx
-align 16
-L$011w2ndloop:
- mov eax,DWORD [ecx*4+edi]
- add dl,BYTE [ebp*1+esi]
- add dl,al
- add ebp,1
- mov ebx,DWORD [edx*4+edi]
- jnz NEAR L$012wnowrap
- mov ebp,DWORD [edi-4]
-L$012wnowrap:
- mov DWORD [edx*4+edi],eax
- mov DWORD [ecx*4+edi],ebx
- add cl,1
- jnc NEAR L$011w2ndloop
- jmp NEAR L$013exit
-align 16
-L$009c1stloop:
- mov BYTE [eax*1+edi],al
- add al,1
- jnc NEAR L$009c1stloop
- xor ecx,ecx
- xor edx,edx
- xor ebx,ebx
-align 16
-L$014c2ndloop:
- mov al,BYTE [ecx*1+edi]
- add dl,BYTE [ebp*1+esi]
- add dl,al
- add ebp,1
- mov bl,BYTE [edx*1+edi]
- jnz NEAR L$015cnowrap
- mov ebp,DWORD [edi-4]
-L$015cnowrap:
- mov BYTE [edx*1+edi],al
- mov BYTE [ecx*1+edi],bl
- add cl,1
- jnc NEAR L$014c2ndloop
- mov DWORD [256+edi],-1
-L$013exit:
- xor eax,eax
- mov DWORD [edi-8],eax
- mov DWORD [edi-4],eax
- pop edi
- pop esi
- pop ebx
- pop ebp
- ret
-segment .bss
-common _OPENSSL_ia32cap_P 16
diff --git a/win-x86_64/crypto/ec/p256-x86_64-asm.asm b/win-x86_64/crypto/ec/p256-x86_64-asm.asm
index a2e4075..cbcf883 100644
--- a/win-x86_64/crypto/ec/p256-x86_64-asm.asm
+++ b/win-x86_64/crypto/ec/p256-x86_64-asm.asm
@@ -35,6 +35,7 @@
push r13
mov r8,QWORD[rsi]
+ xor r13,r13
mov r9,QWORD[8+rsi]
add r8,r8
mov r10,QWORD[16+rsi]
@@ -45,7 +46,7 @@
adc r10,r10
adc r11,r11
mov rdx,r9
- sbb r13,r13
+ adc r13,0
sub r8,QWORD[rsi]
mov rcx,r10
@@ -53,14 +54,14 @@
sbb r10,QWORD[16+rsi]
mov r12,r11
sbb r11,QWORD[24+rsi]
- test r13,r13
+ sbb r13,0
- cmovz r8,rax
- cmovz r9,rdx
+ cmovc r8,rax
+ cmovc r9,rdx
mov QWORD[rdi],r8
- cmovz r10,rcx
+ cmovc r10,rcx
mov QWORD[8+rdi],r9
- cmovz r11,r12
+ cmovc r11,r12
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
@@ -673,6 +674,8 @@
mov rsi,r9
adc rdx,0
+
+
sub r8,-1
mov rax,r10
sbb r9,r12
@@ -873,13 +876,14 @@
ALIGN 32
__ecp_nistz256_add_toq:
+ xor r11,r11
add r12,QWORD[rbx]
adc r13,QWORD[8+rbx]
mov rax,r12
adc r8,QWORD[16+rbx]
adc r9,QWORD[24+rbx]
mov rbp,r13
- sbb r11,r11
+ adc r11,0
sub r12,-1
mov rcx,r8
@@ -887,14 +891,14 @@
sbb r8,0
mov r10,r9
sbb r9,r15
- test r11,r11
+ sbb r11,0
- cmovz r12,rax
- cmovz r13,rbp
+ cmovc r12,rax
+ cmovc r13,rbp
mov QWORD[rdi],r12
- cmovz r8,rcx
+ cmovc r8,rcx
mov QWORD[8+rdi],r13
- cmovz r9,r10
+ cmovc r9,r10
mov QWORD[16+rdi],r8
mov QWORD[24+rdi],r9
@@ -962,13 +966,14 @@
ALIGN 32
__ecp_nistz256_mul_by_2q:
+ xor r11,r11
add r12,r12
adc r13,r13
mov rax,r12
adc r8,r8
adc r9,r9
mov rbp,r13
- sbb r11,r11
+ adc r11,0
sub r12,-1
mov rcx,r8
@@ -976,14 +981,14 @@
sbb r8,0
mov r10,r9
sbb r9,r15
- test r11,r11
+ sbb r11,0
- cmovz r12,rax
- cmovz r13,rbp
+ cmovc r12,rax
+ cmovc r13,rbp
mov QWORD[rdi],r12
- cmovz r8,rcx
+ cmovc r8,rcx
mov QWORD[8+rdi],r13
- cmovz r9,r10
+ cmovc r9,r10
mov QWORD[16+rdi],r8
mov QWORD[24+rdi],r9
@@ -1232,16 +1237,14 @@
mov rsi,rdx
movdqa XMMWORD[384+rsp],xmm0
movdqa XMMWORD[(384+16)+rsp],xmm1
- por xmm1,xmm0
movdqa XMMWORD[416+rsp],xmm2
movdqa XMMWORD[(416+16)+rsp],xmm3
- por xmm3,xmm2
movdqa XMMWORD[448+rsp],xmm4
movdqa XMMWORD[(448+16)+rsp],xmm5
- por xmm3,xmm1
+ por xmm5,xmm4
movdqu xmm0,XMMWORD[rsi]
- pshufd xmm5,xmm3,0xb1
+ pshufd xmm3,xmm5,0xb1
movdqu xmm1,XMMWORD[16+rsi]
movdqu xmm2,XMMWORD[32+rsi]
por xmm5,xmm3
@@ -1253,14 +1256,14 @@
movdqa XMMWORD[480+rsp],xmm0
pshufd xmm4,xmm5,0x1e
movdqa XMMWORD[(480+16)+rsp],xmm1
- por xmm1,xmm0
-DB 102,72,15,110,199
+ movdqu xmm0,XMMWORD[64+rsi]
+ movdqu xmm1,XMMWORD[80+rsi]
movdqa XMMWORD[512+rsp],xmm2
movdqa XMMWORD[(512+16)+rsp],xmm3
- por xmm3,xmm2
por xmm5,xmm4
pxor xmm4,xmm4
- por xmm3,xmm1
+ por xmm1,xmm0
+DB 102,72,15,110,199
lea rsi,[((64-0))+rsi]
mov QWORD[((544+0))+rsp],rax
@@ -1271,8 +1274,8 @@
call __ecp_nistz256_sqr_montq
pcmpeqd xmm5,xmm4
- pshufd xmm4,xmm3,0xb1
- por xmm4,xmm3
+ pshufd xmm4,xmm1,0xb1
+ por xmm4,xmm1
pshufd xmm5,xmm5,0
pshufd xmm3,xmm4,0x1e
por xmm4,xmm3
@@ -1455,6 +1458,7 @@
+ xor r11,r11
add r12,r12
lea rsi,[96+rsp]
adc r13,r13
@@ -1462,7 +1466,7 @@
adc r8,r8
adc r9,r9
mov rbp,r13
- sbb r11,r11
+ adc r11,0
sub r12,-1
mov rcx,r8
@@ -1470,15 +1474,15 @@
sbb r8,0
mov r10,r9
sbb r9,r15
- test r11,r11
+ sbb r11,0
- cmovz r12,rax
+ cmovc r12,rax
mov rax,QWORD[rsi]
- cmovz r13,rbp
+ cmovc r13,rbp
mov rbp,QWORD[8+rsi]
- cmovz r8,rcx
+ cmovc r8,rcx
mov rcx,QWORD[16+rsi]
- cmovz r9,r10
+ cmovc r9,r10
mov r10,QWORD[24+rsi]
call __ecp_nistz256_subq
@@ -1643,16 +1647,14 @@
mov r8,QWORD[((64+24))+rsi]
movdqa XMMWORD[320+rsp],xmm0
movdqa XMMWORD[(320+16)+rsp],xmm1
- por xmm1,xmm0
movdqa XMMWORD[352+rsp],xmm2
movdqa XMMWORD[(352+16)+rsp],xmm3
- por xmm3,xmm2
movdqa XMMWORD[384+rsp],xmm4
movdqa XMMWORD[(384+16)+rsp],xmm5
- por xmm3,xmm1
+ por xmm5,xmm4
movdqu xmm0,XMMWORD[rbx]
- pshufd xmm5,xmm3,0xb1
+ pshufd xmm3,xmm5,0xb1
movdqu xmm1,XMMWORD[16+rbx]
movdqu xmm2,XMMWORD[32+rbx]
por xmm5,xmm3
@@ -1770,6 +1772,7 @@
+ xor r11,r11
add r12,r12
lea rsi,[192+rsp]
adc r13,r13
@@ -1777,7 +1780,7 @@
adc r8,r8
adc r9,r9
mov rbp,r13
- sbb r11,r11
+ adc r11,0
sub r12,-1
mov rcx,r8
@@ -1785,15 +1788,15 @@
sbb r8,0
mov r10,r9
sbb r9,r15
- test r11,r11
+ sbb r11,0
- cmovz r12,rax
+ cmovc r12,rax
mov rax,QWORD[rsi]
- cmovz r13,rbp
+ cmovc r13,rbp
mov rbp,QWORD[8+rsi]
- cmovz r8,rcx
+ cmovc r8,rcx
mov rcx,QWORD[16+rsi]
- cmovz r9,r10
+ cmovc r9,r10
mov r10,QWORD[24+rsi]
call __ecp_nistz256_subq
diff --git a/win-x86_64/crypto/rc4/rc4-x86_64.asm b/win-x86_64/crypto/rc4/rc4-x86_64.asm
deleted file mode 100644
index c7c3b7b..0000000
--- a/win-x86_64/crypto/rc4/rc4-x86_64.asm
+++ /dev/null
@@ -1,741 +0,0 @@
-default rel
-%define XMMWORD
-%define YMMWORD
-%define ZMMWORD
-section .text code align=64
-
-EXTERN OPENSSL_ia32cap_P
-
-global asm_RC4
-
-ALIGN 16
-asm_RC4:
- mov QWORD[8+rsp],rdi ;WIN64 prologue
- mov QWORD[16+rsp],rsi
- mov rax,rsp
-$L$SEH_begin_asm_RC4:
- mov rdi,rcx
- mov rsi,rdx
- mov rdx,r8
- mov rcx,r9
-
-
- or rsi,rsi
- jne NEAR $L$entry
- mov rdi,QWORD[8+rsp] ;WIN64 epilogue
- mov rsi,QWORD[16+rsp]
- DB 0F3h,0C3h ;repret
-$L$entry:
- push rbx
- push r12
- push r13
-$L$prologue:
- mov r11,rsi
- mov r12,rdx
- mov r13,rcx
- xor r10,r10
- xor rcx,rcx
-
- lea rdi,[8+rdi]
- mov r10b,BYTE[((-8))+rdi]
- mov cl,BYTE[((-4))+rdi]
- cmp DWORD[256+rdi],-1
- je NEAR $L$RC4_CHAR
- mov r8d,DWORD[OPENSSL_ia32cap_P]
- xor rbx,rbx
- inc r10b
- sub rbx,r10
- sub r13,r12
- mov eax,DWORD[r10*4+rdi]
- test r11,-16
- jz NEAR $L$loop1
- bt r8d,30
- jc NEAR $L$intel
- and rbx,7
- lea rsi,[1+r10]
- jz NEAR $L$oop8
- sub r11,rbx
-$L$oop8_warmup:
- add cl,al
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],eax
- mov DWORD[r10*4+rdi],edx
- add al,dl
- inc r10b
- mov edx,DWORD[rax*4+rdi]
- mov eax,DWORD[r10*4+rdi]
- xor dl,BYTE[r12]
- mov BYTE[r13*1+r12],dl
- lea r12,[1+r12]
- dec rbx
- jnz NEAR $L$oop8_warmup
-
- lea rsi,[1+r10]
- jmp NEAR $L$oop8
-ALIGN 16
-$L$oop8:
- add cl,al
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],eax
- mov ebx,DWORD[rsi*4+rdi]
- ror r8,8
- mov DWORD[r10*4+rdi],edx
- add dl,al
- mov r8b,BYTE[rdx*4+rdi]
- add cl,bl
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],ebx
- mov eax,DWORD[4+rsi*4+rdi]
- ror r8,8
- mov DWORD[4+r10*4+rdi],edx
- add dl,bl
- mov r8b,BYTE[rdx*4+rdi]
- add cl,al
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],eax
- mov ebx,DWORD[8+rsi*4+rdi]
- ror r8,8
- mov DWORD[8+r10*4+rdi],edx
- add dl,al
- mov r8b,BYTE[rdx*4+rdi]
- add cl,bl
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],ebx
- mov eax,DWORD[12+rsi*4+rdi]
- ror r8,8
- mov DWORD[12+r10*4+rdi],edx
- add dl,bl
- mov r8b,BYTE[rdx*4+rdi]
- add cl,al
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],eax
- mov ebx,DWORD[16+rsi*4+rdi]
- ror r8,8
- mov DWORD[16+r10*4+rdi],edx
- add dl,al
- mov r8b,BYTE[rdx*4+rdi]
- add cl,bl
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],ebx
- mov eax,DWORD[20+rsi*4+rdi]
- ror r8,8
- mov DWORD[20+r10*4+rdi],edx
- add dl,bl
- mov r8b,BYTE[rdx*4+rdi]
- add cl,al
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],eax
- mov ebx,DWORD[24+rsi*4+rdi]
- ror r8,8
- mov DWORD[24+r10*4+rdi],edx
- add dl,al
- mov r8b,BYTE[rdx*4+rdi]
- add sil,8
- add cl,bl
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],ebx
- mov eax,DWORD[((-4))+rsi*4+rdi]
- ror r8,8
- mov DWORD[28+r10*4+rdi],edx
- add dl,bl
- mov r8b,BYTE[rdx*4+rdi]
- add r10b,8
- ror r8,8
- sub r11,8
-
- xor r8,QWORD[r12]
- mov QWORD[r13*1+r12],r8
- lea r12,[8+r12]
-
- test r11,-8
- jnz NEAR $L$oop8
- cmp r11,0
- jne NEAR $L$loop1
- jmp NEAR $L$exit
-
-ALIGN 16
-$L$intel:
- test r11,-32
- jz NEAR $L$loop1
- and rbx,15
- jz NEAR $L$oop16_is_hot
- sub r11,rbx
-$L$oop16_warmup:
- add cl,al
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],eax
- mov DWORD[r10*4+rdi],edx
- add al,dl
- inc r10b
- mov edx,DWORD[rax*4+rdi]
- mov eax,DWORD[r10*4+rdi]
- xor dl,BYTE[r12]
- mov BYTE[r13*1+r12],dl
- lea r12,[1+r12]
- dec rbx
- jnz NEAR $L$oop16_warmup
-
- mov rbx,rcx
- xor rcx,rcx
- mov cl,bl
-
-$L$oop16_is_hot:
- lea rsi,[r10*4+rdi]
- add cl,al
- mov edx,DWORD[rcx*4+rdi]
- pxor xmm0,xmm0
- mov DWORD[rcx*4+rdi],eax
- add al,dl
- mov ebx,DWORD[4+rsi]
- movzx eax,al
- mov DWORD[rsi],edx
- add cl,bl
- pinsrw xmm0,WORD[rax*4+rdi],0
- jmp NEAR $L$oop16_enter
-ALIGN 16
-$L$oop16:
- add cl,al
- mov edx,DWORD[rcx*4+rdi]
- pxor xmm2,xmm0
- psllq xmm1,8
- pxor xmm0,xmm0
- mov DWORD[rcx*4+rdi],eax
- add al,dl
- mov ebx,DWORD[4+rsi]
- movzx eax,al
- mov DWORD[rsi],edx
- pxor xmm2,xmm1
- add cl,bl
- pinsrw xmm0,WORD[rax*4+rdi],0
- movdqu XMMWORD[r13*1+r12],xmm2
- lea r12,[16+r12]
-$L$oop16_enter:
- mov edx,DWORD[rcx*4+rdi]
- pxor xmm1,xmm1
- mov DWORD[rcx*4+rdi],ebx
- add bl,dl
- mov eax,DWORD[8+rsi]
- movzx ebx,bl
- mov DWORD[4+rsi],edx
- add cl,al
- pinsrw xmm1,WORD[rbx*4+rdi],0
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],eax
- add al,dl
- mov ebx,DWORD[12+rsi]
- movzx eax,al
- mov DWORD[8+rsi],edx
- add cl,bl
- pinsrw xmm0,WORD[rax*4+rdi],1
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],ebx
- add bl,dl
- mov eax,DWORD[16+rsi]
- movzx ebx,bl
- mov DWORD[12+rsi],edx
- add cl,al
- pinsrw xmm1,WORD[rbx*4+rdi],1
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],eax
- add al,dl
- mov ebx,DWORD[20+rsi]
- movzx eax,al
- mov DWORD[16+rsi],edx
- add cl,bl
- pinsrw xmm0,WORD[rax*4+rdi],2
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],ebx
- add bl,dl
- mov eax,DWORD[24+rsi]
- movzx ebx,bl
- mov DWORD[20+rsi],edx
- add cl,al
- pinsrw xmm1,WORD[rbx*4+rdi],2
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],eax
- add al,dl
- mov ebx,DWORD[28+rsi]
- movzx eax,al
- mov DWORD[24+rsi],edx
- add cl,bl
- pinsrw xmm0,WORD[rax*4+rdi],3
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],ebx
- add bl,dl
- mov eax,DWORD[32+rsi]
- movzx ebx,bl
- mov DWORD[28+rsi],edx
- add cl,al
- pinsrw xmm1,WORD[rbx*4+rdi],3
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],eax
- add al,dl
- mov ebx,DWORD[36+rsi]
- movzx eax,al
- mov DWORD[32+rsi],edx
- add cl,bl
- pinsrw xmm0,WORD[rax*4+rdi],4
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],ebx
- add bl,dl
- mov eax,DWORD[40+rsi]
- movzx ebx,bl
- mov DWORD[36+rsi],edx
- add cl,al
- pinsrw xmm1,WORD[rbx*4+rdi],4
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],eax
- add al,dl
- mov ebx,DWORD[44+rsi]
- movzx eax,al
- mov DWORD[40+rsi],edx
- add cl,bl
- pinsrw xmm0,WORD[rax*4+rdi],5
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],ebx
- add bl,dl
- mov eax,DWORD[48+rsi]
- movzx ebx,bl
- mov DWORD[44+rsi],edx
- add cl,al
- pinsrw xmm1,WORD[rbx*4+rdi],5
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],eax
- add al,dl
- mov ebx,DWORD[52+rsi]
- movzx eax,al
- mov DWORD[48+rsi],edx
- add cl,bl
- pinsrw xmm0,WORD[rax*4+rdi],6
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],ebx
- add bl,dl
- mov eax,DWORD[56+rsi]
- movzx ebx,bl
- mov DWORD[52+rsi],edx
- add cl,al
- pinsrw xmm1,WORD[rbx*4+rdi],6
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],eax
- add al,dl
- mov ebx,DWORD[60+rsi]
- movzx eax,al
- mov DWORD[56+rsi],edx
- add cl,bl
- pinsrw xmm0,WORD[rax*4+rdi],7
- add r10b,16
- movdqu xmm2,XMMWORD[r12]
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],ebx
- add bl,dl
- movzx ebx,bl
- mov DWORD[60+rsi],edx
- lea rsi,[r10*4+rdi]
- pinsrw xmm1,WORD[rbx*4+rdi],7
- mov eax,DWORD[rsi]
- mov rbx,rcx
- xor rcx,rcx
- sub r11,16
- mov cl,bl
- test r11,-16
- jnz NEAR $L$oop16
-
- psllq xmm1,8
- pxor xmm2,xmm0
- pxor xmm2,xmm1
- movdqu XMMWORD[r13*1+r12],xmm2
- lea r12,[16+r12]
-
- cmp r11,0
- jne NEAR $L$loop1
- jmp NEAR $L$exit
-
-ALIGN 16
-$L$loop1:
- add cl,al
- mov edx,DWORD[rcx*4+rdi]
- mov DWORD[rcx*4+rdi],eax
- mov DWORD[r10*4+rdi],edx
- add al,dl
- inc r10b
- mov edx,DWORD[rax*4+rdi]
- mov eax,DWORD[r10*4+rdi]
- xor dl,BYTE[r12]
- mov BYTE[r13*1+r12],dl
- lea r12,[1+r12]
- dec r11
- jnz NEAR $L$loop1
- jmp NEAR $L$exit
-
-ALIGN 16
-$L$RC4_CHAR:
- add r10b,1
- movzx eax,BYTE[r10*1+rdi]
- test r11,-8
- jz NEAR $L$cloop1
- jmp NEAR $L$cloop8
-ALIGN 16
-$L$cloop8:
- mov r8d,DWORD[r12]
- mov r9d,DWORD[4+r12]
- add cl,al
- lea rsi,[1+r10]
- movzx edx,BYTE[rcx*1+rdi]
- movzx esi,sil
- movzx ebx,BYTE[rsi*1+rdi]
- mov BYTE[rcx*1+rdi],al
- cmp rcx,rsi
- mov BYTE[r10*1+rdi],dl
- jne NEAR $L$cmov0
- mov rbx,rax
-$L$cmov0:
- add dl,al
- xor r8b,BYTE[rdx*1+rdi]
- ror r8d,8
- add cl,bl
- lea r10,[1+rsi]
- movzx edx,BYTE[rcx*1+rdi]
- movzx r10d,r10b
- movzx eax,BYTE[r10*1+rdi]
- mov BYTE[rcx*1+rdi],bl
- cmp rcx,r10
- mov BYTE[rsi*1+rdi],dl
- jne NEAR $L$cmov1
- mov rax,rbx
-$L$cmov1:
- add dl,bl
- xor r8b,BYTE[rdx*1+rdi]
- ror r8d,8
- add cl,al
- lea rsi,[1+r10]
- movzx edx,BYTE[rcx*1+rdi]
- movzx esi,sil
- movzx ebx,BYTE[rsi*1+rdi]
- mov BYTE[rcx*1+rdi],al
- cmp rcx,rsi
- mov BYTE[r10*1+rdi],dl
- jne NEAR $L$cmov2
- mov rbx,rax
-$L$cmov2:
- add dl,al
- xor r8b,BYTE[rdx*1+rdi]
- ror r8d,8
- add cl,bl
- lea r10,[1+rsi]
- movzx edx,BYTE[rcx*1+rdi]
- movzx r10d,r10b
- movzx eax,BYTE[r10*1+rdi]
- mov BYTE[rcx*1+rdi],bl
- cmp rcx,r10
- mov BYTE[rsi*1+rdi],dl
- jne NEAR $L$cmov3
- mov rax,rbx
-$L$cmov3:
- add dl,bl
- xor r8b,BYTE[rdx*1+rdi]
- ror r8d,8
- add cl,al
- lea rsi,[1+r10]
- movzx edx,BYTE[rcx*1+rdi]
- movzx esi,sil
- movzx ebx,BYTE[rsi*1+rdi]
- mov BYTE[rcx*1+rdi],al
- cmp rcx,rsi
- mov BYTE[r10*1+rdi],dl
- jne NEAR $L$cmov4
- mov rbx,rax
-$L$cmov4:
- add dl,al
- xor r9b,BYTE[rdx*1+rdi]
- ror r9d,8
- add cl,bl
- lea r10,[1+rsi]
- movzx edx,BYTE[rcx*1+rdi]
- movzx r10d,r10b
- movzx eax,BYTE[r10*1+rdi]
- mov BYTE[rcx*1+rdi],bl
- cmp rcx,r10
- mov BYTE[rsi*1+rdi],dl
- jne NEAR $L$cmov5
- mov rax,rbx
-$L$cmov5:
- add dl,bl
- xor r9b,BYTE[rdx*1+rdi]
- ror r9d,8
- add cl,al
- lea rsi,[1+r10]
- movzx edx,BYTE[rcx*1+rdi]
- movzx esi,sil
- movzx ebx,BYTE[rsi*1+rdi]
- mov BYTE[rcx*1+rdi],al
- cmp rcx,rsi
- mov BYTE[r10*1+rdi],dl
- jne NEAR $L$cmov6
- mov rbx,rax
-$L$cmov6:
- add dl,al
- xor r9b,BYTE[rdx*1+rdi]
- ror r9d,8
- add cl,bl
- lea r10,[1+rsi]
- movzx edx,BYTE[rcx*1+rdi]
- movzx r10d,r10b
- movzx eax,BYTE[r10*1+rdi]
- mov BYTE[rcx*1+rdi],bl
- cmp rcx,r10
- mov BYTE[rsi*1+rdi],dl
- jne NEAR $L$cmov7
- mov rax,rbx
-$L$cmov7:
- add dl,bl
- xor r9b,BYTE[rdx*1+rdi]
- ror r9d,8
- lea r11,[((-8))+r11]
- mov DWORD[r13],r8d
- lea r12,[8+r12]
- mov DWORD[4+r13],r9d
- lea r13,[8+r13]
-
- test r11,-8
- jnz NEAR $L$cloop8
- cmp r11,0
- jne NEAR $L$cloop1
- jmp NEAR $L$exit
-ALIGN 16
-$L$cloop1:
- add cl,al
- movzx ecx,cl
- movzx edx,BYTE[rcx*1+rdi]
- mov BYTE[rcx*1+rdi],al
- mov BYTE[r10*1+rdi],dl
- add dl,al
- add r10b,1
- movzx edx,dl
- movzx r10d,r10b
- movzx edx,BYTE[rdx*1+rdi]
- movzx eax,BYTE[r10*1+rdi]
- xor dl,BYTE[r12]
- lea r12,[1+r12]
- mov BYTE[r13],dl
- lea r13,[1+r13]
- sub r11,1
- jnz NEAR $L$cloop1
- jmp NEAR $L$exit
-
-ALIGN 16
-$L$exit:
- sub r10b,1
- mov DWORD[((-8))+rdi],r10d
- mov DWORD[((-4))+rdi],ecx
-
- mov r13,QWORD[rsp]
- mov r12,QWORD[8+rsp]
- mov rbx,QWORD[16+rsp]
- add rsp,24
-$L$epilogue:
- mov rdi,QWORD[8+rsp] ;WIN64 epilogue
- mov rsi,QWORD[16+rsp]
- DB 0F3h,0C3h ;repret
-$L$SEH_end_asm_RC4:
-global asm_RC4_set_key
-
-ALIGN 16
-asm_RC4_set_key:
- mov QWORD[8+rsp],rdi ;WIN64 prologue
- mov QWORD[16+rsp],rsi
- mov rax,rsp
-$L$SEH_begin_asm_RC4_set_key:
- mov rdi,rcx
- mov rsi,rdx
- mov rdx,r8
-
-
- lea rdi,[8+rdi]
- lea rdx,[rsi*1+rdx]
- neg rsi
- mov rcx,rsi
- xor eax,eax
- xor r9,r9
- xor r10,r10
- xor r11,r11
-
- mov r8d,DWORD[OPENSSL_ia32cap_P]
- bt r8d,20
- jc NEAR $L$c1stloop
- jmp NEAR $L$w1stloop
-
-ALIGN 16
-$L$w1stloop:
- mov DWORD[rax*4+rdi],eax
- add al,1
- jnc NEAR $L$w1stloop
-
- xor r9,r9
- xor r8,r8
-ALIGN 16
-$L$w2ndloop:
- mov r10d,DWORD[r9*4+rdi]
- add r8b,BYTE[rsi*1+rdx]
- add r8b,r10b
- add rsi,1
- mov r11d,DWORD[r8*4+rdi]
- cmovz rsi,rcx
- mov DWORD[r8*4+rdi],r10d
- mov DWORD[r9*4+rdi],r11d
- add r9b,1
- jnc NEAR $L$w2ndloop
- jmp NEAR $L$exit_key
-
-ALIGN 16
-$L$c1stloop:
- mov BYTE[rax*1+rdi],al
- add al,1
- jnc NEAR $L$c1stloop
-
- xor r9,r9
- xor r8,r8
-ALIGN 16
-$L$c2ndloop:
- mov r10b,BYTE[r9*1+rdi]
- add r8b,BYTE[rsi*1+rdx]
- add r8b,r10b
- add rsi,1
- mov r11b,BYTE[r8*1+rdi]
- jnz NEAR $L$cnowrap
- mov rsi,rcx
-$L$cnowrap:
- mov BYTE[r8*1+rdi],r10b
- mov BYTE[r9*1+rdi],r11b
- add r9b,1
- jnc NEAR $L$c2ndloop
- mov DWORD[256+rdi],-1
-
-ALIGN 16
-$L$exit_key:
- xor eax,eax
- mov DWORD[((-8))+rdi],eax
- mov DWORD[((-4))+rdi],eax
- mov rdi,QWORD[8+rsp] ;WIN64 epilogue
- mov rsi,QWORD[16+rsp]
- DB 0F3h,0C3h ;repret
-$L$SEH_end_asm_RC4_set_key:
-EXTERN __imp_RtlVirtualUnwind
-
-ALIGN 16
-stream_se_handler:
- push rsi
- push rdi
- push rbx
- push rbp
- push r12
- push r13
- push r14
- push r15
- pushfq
- sub rsp,64
-
- mov rax,QWORD[120+r8]
- mov rbx,QWORD[248+r8]
-
- lea r10,[$L$prologue]
- cmp rbx,r10
- jb NEAR $L$in_prologue
-
- mov rax,QWORD[152+r8]
-
- lea r10,[$L$epilogue]
- cmp rbx,r10
- jae NEAR $L$in_prologue
-
- lea rax,[24+rax]
-
- mov rbx,QWORD[((-8))+rax]
- mov r12,QWORD[((-16))+rax]
- mov r13,QWORD[((-24))+rax]
- mov QWORD[144+r8],rbx
- mov QWORD[216+r8],r12
- mov QWORD[224+r8],r13
-
-$L$in_prologue:
- mov rdi,QWORD[8+rax]
- mov rsi,QWORD[16+rax]
- mov QWORD[152+r8],rax
- mov QWORD[168+r8],rsi
- mov QWORD[176+r8],rdi
-
- jmp NEAR $L$common_seh_exit
-
-
-
-ALIGN 16
-key_se_handler:
- push rsi
- push rdi
- push rbx
- push rbp
- push r12
- push r13
- push r14
- push r15
- pushfq
- sub rsp,64
-
- mov rax,QWORD[152+r8]
- mov rdi,QWORD[8+rax]
- mov rsi,QWORD[16+rax]
- mov QWORD[168+r8],rsi
- mov QWORD[176+r8],rdi
-
-$L$common_seh_exit:
-
- mov rdi,QWORD[40+r9]
- mov rsi,r8
- mov ecx,154
- DD 0xa548f3fc
-
- mov rsi,r9
- xor rcx,rcx
- mov rdx,QWORD[8+rsi]
- mov r8,QWORD[rsi]
- mov r9,QWORD[16+rsi]
- mov r10,QWORD[40+rsi]
- lea r11,[56+rsi]
- lea r12,[24+rsi]
- mov QWORD[32+rsp],r10
- mov QWORD[40+rsp],r11
- mov QWORD[48+rsp],r12
- mov QWORD[56+rsp],rcx
- call QWORD[__imp_RtlVirtualUnwind]
-
- mov eax,1
- add rsp,64
- popfq
- pop r15
- pop r14
- pop r13
- pop r12
- pop rbp
- pop rbx
- pop rdi
- pop rsi
- DB 0F3h,0C3h ;repret
-
-
-section .pdata rdata align=4
-ALIGN 4
- DD $L$SEH_begin_asm_RC4 wrt ..imagebase
- DD $L$SEH_end_asm_RC4 wrt ..imagebase
- DD $L$SEH_info_asm_RC4 wrt ..imagebase
-
- DD $L$SEH_begin_asm_RC4_set_key wrt ..imagebase
- DD $L$SEH_end_asm_RC4_set_key wrt ..imagebase
- DD $L$SEH_info_asm_RC4_set_key wrt ..imagebase
-
-section .xdata rdata align=8
-ALIGN 8
-$L$SEH_info_asm_RC4:
-DB 9,0,0,0
- DD stream_se_handler wrt ..imagebase
-$L$SEH_info_asm_RC4_set_key:
-DB 9,0,0,0
- DD key_se_handler wrt ..imagebase